Python || Pdf Merge Using PyPdf

The following is a simple pdf file merger program which utilizes the “pyPdf” library to manipulate pdf files. This program has the ability to merge entire selected pdf files together, and save the selected files into one single new pdf file.
REQUIRED KNOWLEDGE FOR THIS PROGRAM
PyPdf - What Is It?
How To Create Executable Python Programs
Display The Time In Python
Metadata With PyPdf
Pdf Merge Executable File - Click Here To Download
This program first asks the user to place the pdf file(s) they wish to merge into a specified folder. The default input folder is titled “Files To Merge.” After the input pdf file(s) have been placed into the specified input folder, the program prompts the user to select which file(s) they wish to merge together. As soon as the input pdf file(s) have been selected, the file merging begins, with the files being saved to the output pdf file in the exact same order as specified by the user. As soon as the file merging is complete, the single merged pdf file is saved into an output folder titled “Completed Merged Files.”
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
# =============================================================================# Author: K Perkins# Date: Aug 5, 2013# Taken From: http://programmingnotes.org/# File: PdfMerge.py# Description: This is a simple program utilizing the pyPdf library to# manipulate pdf files. This program has the ability to merge entire# selected pdf files together, and save the selected files into one# single new pdf file.# =============================================================================import sys, os, datetime, platformfrom pyPdf.pdf import PdfFileWriter, PdfFileReaderfrom pyPdf.generic import NameObject, createStringObject # ---- START GLOBAL VARIABLES ---- # INPUT_FILE_FOLDER = "Files To Merge"OUTPUT_FILE_FOLDER = "Completed Merged Files"OUTPUT_FILE_NAME = "Merged File.pdf"PDF_PRODUCER = "KENNETH'S PDF MERGER" # Determine the platformif(platform.system() == "Windows"): CURRENT_USER = os.environ.get("USERNAME")else: CURRENT_USER = os.environ.get("USER") # ---- END GLOBAL VARIABLES ---- # def DoesFileExists(fileName, fileFolder): # determine if a file exists try: filePath = os.path.join(fileFolder, fileName) with open(filePath) as f: pass return True except IOError as e: return False def DoesFolderExist(fileFolder): # determine if a folder exists if not os.path.exists(fileFolder): os.makedirs(fileFolder) return False return True def CheckOutFileDigits(outfile): # check if a file already exists in a folder char = outfile[len(outfile)-1] if(char == ")"): return True return False def GetPageNumbers(pageRange): # parse a page range (i.e: 1,2,5,56-100,241) and return its # integer equivalent pageIndex = 0 inDigit = False inNums2 = False nums = "" nums2 = "" fileNumbers = [] while(pageIndex < len(pageRange)): if(pageRange[pageIndex].isdigit()): inDigit = True else: inDigit = False if(inDigit): if(inNums2 == False): nums += pageRange[pageIndex] else: nums2 += pageRange[pageIndex] else: if(nums != "" and pageRange[pageIndex] == "," and inNums2 == False): fileNumbers.append(int(nums)) nums = "" elif(nums != "" and pageRange[pageIndex] == "-"): inNums2 = True elif(nums2 != "" and inNums2): for x in range(int(nums), int(nums2)+1): fileNumbers.append(x) nums = "" nums2 = "" inNums2 = False elif((nums != "" and pageRange[pageIndex] != ",") or (nums != "" and pageRange[pageIndex] != "-")): fileNumbers.append(int(nums)) nums = "" pageIndex += 1 # DO THIS IF NUMBERS ARE LEFT OVER FROM THE ABOVE LOOP ^ if(nums != "" and nums2 != ""): for x in range(int(nums), int(nums2)+1): fileNumbers.append(x) elif(nums != ""): fileNumbers.append(int(nums)) return fileNumbers def DisplayFiles(files): # display files in a folder numFiles = 1 print("Index # ||tFile Namen"+ "-----------------------------------") for x in files: print("(%d) t ||t%s" %(numFiles, x)) numFiles += 1 def GetFileName(index, files): # return the filename from the input folder return files[index] def Cls(): # clear the console screen os.system(["clear","cls"][platform.system()=="Windows"]) def GetFiles(): # prompt the user to enter files into the input folder while(len(os.listdir(INPUT_FILE_FOLDER)) < 1): print("** NOTE: To continue, please place the file(s) that you wish to "+ "nmerge inside the "%s" folder located in:" %(INPUT_FILE_FOLDER)) print("n%s%s" %(os.getcwd(), INPUT_FILE_FOLDER)) input("nPlease press ENTER to continue...") Cls() # clear the console screen def main(): # declare variables input_pdfFile = "" output_pdfFile = PdfFileWriter() files = [] index = 0 outfileName = "" numPages = 0 pageRange = "" fileNumbers = [] removePage = [] errorPage = False # CHECK TO SEE IF INPUT/OUTPUT FOLDERS EXIST, CREATE THEM IF THEY DONT DoesFolderExist(INPUT_FILE_FOLDER) DoesFolderExist(OUTPUT_FILE_FOLDER) # GET FILE NAMES FROM THE USER TO MERGE TOGETHER while(len(files) < 1): Cls() fileNumbers = [] removePage = [] while(len(fileNumbers) < 1): # MAKE SURE THERE ARE FILES IN THE INPUT FOLDER errorPage = False if(len(os.listdir(INPUT_FILE_FOLDER)) < 1): GetFiles() print("nThese are the files thats currently located in "+ "the "%s" folder..n" %(INPUT_FILE_FOLDER)) # DISPLAY THE FILES THATS IN THE INPUT FOLDER TO THE SCREEN DisplayFiles(os.listdir(INPUT_FILE_FOLDER)) # ASK THE USER FOR FILES NAMES/INDEXES print("nPlease enter the index numbers of the files that " +"you wish to merge together:") print("Example: 1,2,5,56-100,241") pageRange = input(">> ") # REMOVE WHITESPACES FROM THE STRING pageRange = pageRange.replace(" ", "") print("nYou have selected to merge the file(s): %s" %(pageRange)) # GET THE TOTAL NUMBER OF FILES FROM THE USER AS SPECIFIED FROM ABOVE fileNumbers = GetPageNumbers(pageRange) # FIND ANY FILES FROM THE LIST WHICH DONT EXIST IN THE FOLDER for x in fileNumbers: if((x > len(os.listdir(INPUT_FILE_FOLDER))) or (x < 1)): errorPage = True removePage.append(x) # REMOVE ALL FILE INDEX NUMBERS THAT DONT EXIST IN THE FOLDER for x in removePage: fileNumbers.remove(x) # CHECK IF THERE ARE ANY VALID FILES TO BE MERGED FROM THE FOLDER if(len(fileNumbers) < 1): print("n----------------------------------------------------------") print("n** ERROR: No files have been selected to be merged!") input("nPlease press ENTER to continue...") Cls() else: for x in fileNumbers: files.append(GetFileName(x-1, os.listdir(INPUT_FILE_FOLDER))) if(errorPage): print("n----------------------------------------------------------") print("n** ERROR: The folder "%s" only contains %d files!n" %(INPUT_FILE_FOLDER, len(os.listdir(INPUT_FILE_FOLDER)))) print("Invalid file index numbers have been detected") input("nPlease press ENTER to continue...") removePage = [] print("n----------------------------------------------------------") print("nThe following files have been selected to be merged!n") DisplayFiles(files) # IF ALL ABOVE IS OK, CHECK IF THE SELECTED INPUT FILES ARE PDF FILES for x in files: if(x.endswith(".pdf") == False): removePage.append(x) errorPage = True # REMOVE ALL THE FILES THAT ARENT PDF FILES if(errorPage): print("n** ERROR: Sorry, but the files listed below are " + "not pdf files and cannot be merged..n") index = 1 for x in removePage: files.remove(x) print("(%d) t"%s"" %(index, x)) index += 1 if(len(files) < 1): input("nPlease press ENTER to continue...") input("nPlease press ENTER to continue...") print("n----------------------------------------------------------n") # IF ALL IS OK, MERGE THE EXISITING FILES TOGETHER for x in range(0, len(files)): filePath = os.path.join(INPUT_FILE_FOLDER, files[x]) input_pdfFile = PdfFileReader(open(filePath, "rb")) print("%s has %d pages." % (files[x], input_pdfFile.getNumPages())) for y in range(input_pdfFile.getNumPages()): output_pdfFile.addPage(input_pdfFile.getPage(y)) # CONSTRUCT AN OUTPUT FILENAME outfileName = files[0][:-4] +" - "+ OUTPUT_FILE_NAME while(DoesFileExists(outfileName, OUTPUT_FILE_FOLDER)): outfileName = outfileName[:-4] # remove ".pdf" # do this if there is already 2 copies of the outfile if(CheckOutFileDigits(outfileName)): outfileName = outfileName[:-1]# remove ")" count = int(outfileName[len(outfileName)-1]) # get num count += 1 outfileName = outfileName[:-1]# remove num outfileName += str(count)+")"# add new incremented num # do this if outfile exists only once else: outfileName += " ("+str(2)+")" outfileName += ".pdf" # GET THE NUMBER OF PAGES IN THE OUTPUT PDF FILE numPages = output_pdfFile.getNumPages() # SAVE OUTPUT FILE TO THE OUTPUT FOLDER now = datetime.datetime.now() time = str(now.strftime("CREATED: %m/%d/%Y, %I:%M:%S %p")) infoDict = output_pdfFile._info.getObject() infoDict.update({ NameObject('/Title'): createStringObject(outfileName), NameObject('/Author'): createStringObject(CURRENT_USER), NameObject('/Subject'): createStringObject(time), NameObject('/Creator'): createStringObject(PDF_PRODUCER) }) filePath = os.path.join(OUTPUT_FILE_FOLDER, outfileName) outputStream = open(filePath, "wb") output_pdfFile.write(outputStream) outputStream.close() # DISPLAY FINAL MESSAGE TO USER print("n"%s" has been created and contains %d total page(s)" %(outfileName, numPages)) print("nThis file is located in the following directory:n" +"n%s%s" %(os.getcwd(), OUTPUT_FILE_FOLDER)) input("nPlease press ENTER to continue...") if __name__ == "__main__": main()# http://programmingnotes.org/
QUICK NOTES:
The highlighted lines are sections of interest to look out for.
The code is heavily commented, so no further insight is necessary. If you have any questions, feel free to leave a comment below.
Click here to download a Windows executable file demonstrating the above use.
Leave a Reply