Python || Pdf Merge Using PyPdf
The following is a simple pdf file merger program which utilizes the “pyPdf” library to manipulate pdf files. This program has the ability to merge entire selected pdf files together, and save the selected files into one single new pdf file.
REQUIRED KNOWLEDGE FOR THIS PROGRAM
PyPdf - What Is It?
How To Create Executable Python Programs
Display The Time In Python
Metadata With PyPdf
Pdf Merge Executable File - Click Here To Download
This program first asks the user to place the pdf file(s) they wish to merge into a specified folder. The default input folder is titled “Files To Merge.” After the input pdf file(s) have been placed into the specified input folder, the program prompts the user to select which file(s) they wish to merge together. As soon as the input pdf file(s) have been selected, the file merging begins, with the files being saved to the output pdf file in the exact same order as specified by the user. As soon as the file merging is complete, the single merged pdf file is saved into an output folder titled “Completed Merged Files.”
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 |
# ============================================================================= # Author: K Perkins # Date: Aug 5, 2013 # Taken From: http://programmingnotes.org/ # File: PdfMerge.py # Description: This is a simple program utilizing the pyPdf library to # manipulate pdf files. This program has the ability to merge entire # selected pdf files together, and save the selected files into one # single new pdf file. # ============================================================================= import sys, os, datetime, platform from pyPdf.pdf import PdfFileWriter, PdfFileReader from pyPdf.generic import NameObject, createStringObject # ---- START GLOBAL VARIABLES ---- # INPUT_FILE_FOLDER = "Files To Merge" OUTPUT_FILE_FOLDER = "Completed Merged Files" OUTPUT_FILE_NAME = "Merged File.pdf" PDF_PRODUCER = "KENNETH'S PDF MERGER" # Determine the platform if(platform.system() == "Windows"): CURRENT_USER = os.environ.get("USERNAME") else: CURRENT_USER = os.environ.get("USER") # ---- END GLOBAL VARIABLES ---- # def DoesFileExists(fileName, fileFolder): # determine if a file exists try: filePath = os.path.join(fileFolder, fileName) with open(filePath) as f: pass return True except IOError as e: return False def DoesFolderExist(fileFolder): # determine if a folder exists if not os.path.exists(fileFolder): os.makedirs(fileFolder) return False return True def CheckOutFileDigits(outfile): # check if a file already exists in a folder char = outfile[len(outfile)-1] if(char == ")"): return True return False def GetPageNumbers(pageRange): # parse a page range (i.e: 1,2,5,56-100,241) and return its # integer equivalent pageIndex = 0 inDigit = False inNums2 = False nums = "" nums2 = "" fileNumbers = [] while(pageIndex < len(pageRange)): if(pageRange[pageIndex].isdigit()): inDigit = True else: inDigit = False if(inDigit): if(inNums2 == False): nums += pageRange[pageIndex] else: nums2 += pageRange[pageIndex] else: if(nums != "" and pageRange[pageIndex] == "," and inNums2 == False): fileNumbers.append(int(nums)) nums = "" elif(nums != "" and pageRange[pageIndex] == "-"): inNums2 = True elif(nums2 != "" and inNums2): for x in range(int(nums), int(nums2)+1): fileNumbers.append(x) nums = "" nums2 = "" inNums2 = False elif((nums != "" and pageRange[pageIndex] != ",") or (nums != "" and pageRange[pageIndex] != "-")): fileNumbers.append(int(nums)) nums = "" pageIndex += 1 # DO THIS IF NUMBERS ARE LEFT OVER FROM THE ABOVE LOOP ^ if(nums != "" and nums2 != ""): for x in range(int(nums), int(nums2)+1): fileNumbers.append(x) elif(nums != ""): fileNumbers.append(int(nums)) return fileNumbers def DisplayFiles(files): # display files in a folder numFiles = 1 print("Index # ||tFile Namen"+ "-----------------------------------") for x in files: print("(%d) t ||t%s" %(numFiles, x)) numFiles += 1 def GetFileName(index, files): # return the filename from the input folder return files[index] def Cls(): # clear the console screen os.system(["clear","cls"][platform.system()=="Windows"]) def GetFiles(): # prompt the user to enter files into the input folder while(len(os.listdir(INPUT_FILE_FOLDER)) < 1): print("** NOTE: To continue, please place the file(s) that you wish to "+ "nmerge inside the "%s" folder located in:" %(INPUT_FILE_FOLDER)) print("n%s%s" %(os.getcwd(), INPUT_FILE_FOLDER)) input("nPlease press ENTER to continue...") Cls() # clear the console screen def main(): # declare variables input_pdfFile = "" output_pdfFile = PdfFileWriter() files = [] index = 0 outfileName = "" numPages = 0 pageRange = "" fileNumbers = [] removePage = [] errorPage = False # CHECK TO SEE IF INPUT/OUTPUT FOLDERS EXIST, CREATE THEM IF THEY DONT DoesFolderExist(INPUT_FILE_FOLDER) DoesFolderExist(OUTPUT_FILE_FOLDER) # GET FILE NAMES FROM THE USER TO MERGE TOGETHER while(len(files) < 1): Cls() fileNumbers = [] removePage = [] while(len(fileNumbers) < 1): # MAKE SURE THERE ARE FILES IN THE INPUT FOLDER errorPage = False if(len(os.listdir(INPUT_FILE_FOLDER)) < 1): GetFiles() print("nThese are the files thats currently located in "+ "the "%s" folder..n" %(INPUT_FILE_FOLDER)) # DISPLAY THE FILES THATS IN THE INPUT FOLDER TO THE SCREEN DisplayFiles(os.listdir(INPUT_FILE_FOLDER)) # ASK THE USER FOR FILES NAMES/INDEXES print("nPlease enter the index numbers of the files that " +"you wish to merge together:") print("Example: 1,2,5,56-100,241") pageRange = input(">> ") # REMOVE WHITESPACES FROM THE STRING pageRange = pageRange.replace(" ", "") print("nYou have selected to merge the file(s): %s" %(pageRange)) # GET THE TOTAL NUMBER OF FILES FROM THE USER AS SPECIFIED FROM ABOVE fileNumbers = GetPageNumbers(pageRange) # FIND ANY FILES FROM THE LIST WHICH DONT EXIST IN THE FOLDER for x in fileNumbers: if((x > len(os.listdir(INPUT_FILE_FOLDER))) or (x < 1)): errorPage = True removePage.append(x) # REMOVE ALL FILE INDEX NUMBERS THAT DONT EXIST IN THE FOLDER for x in removePage: fileNumbers.remove(x) # CHECK IF THERE ARE ANY VALID FILES TO BE MERGED FROM THE FOLDER if(len(fileNumbers) < 1): print("n----------------------------------------------------------") print("n** ERROR: No files have been selected to be merged!") input("nPlease press ENTER to continue...") Cls() else: for x in fileNumbers: files.append(GetFileName(x-1, os.listdir(INPUT_FILE_FOLDER))) if(errorPage): print("n----------------------------------------------------------") print("n** ERROR: The folder "%s" only contains %d files!n" %(INPUT_FILE_FOLDER, len(os.listdir(INPUT_FILE_FOLDER)))) print("Invalid file index numbers have been detected") input("nPlease press ENTER to continue...") removePage = [] print("n----------------------------------------------------------") print("nThe following files have been selected to be merged!n") DisplayFiles(files) # IF ALL ABOVE IS OK, CHECK IF THE SELECTED INPUT FILES ARE PDF FILES for x in files: if(x.endswith(".pdf") == False): removePage.append(x) errorPage = True # REMOVE ALL THE FILES THAT ARENT PDF FILES if(errorPage): print("n** ERROR: Sorry, but the files listed below are " + "not pdf files and cannot be merged..n") index = 1 for x in removePage: files.remove(x) print("(%d) t"%s"" %(index, x)) index += 1 if(len(files) < 1): input("nPlease press ENTER to continue...") input("nPlease press ENTER to continue...") print("n----------------------------------------------------------n") # IF ALL IS OK, MERGE THE EXISITING FILES TOGETHER for x in range(0, len(files)): filePath = os.path.join(INPUT_FILE_FOLDER, files[x]) input_pdfFile = PdfFileReader(open(filePath, "rb")) print("%s has %d pages." % (files[x], input_pdfFile.getNumPages())) for y in range(input_pdfFile.getNumPages()): output_pdfFile.addPage(input_pdfFile.getPage(y)) # CONSTRUCT AN OUTPUT FILENAME outfileName = files[0][:-4] +" - "+ OUTPUT_FILE_NAME while(DoesFileExists(outfileName, OUTPUT_FILE_FOLDER)): outfileName = outfileName[:-4] # remove ".pdf" # do this if there is already 2 copies of the outfile if(CheckOutFileDigits(outfileName)): outfileName = outfileName[:-1]# remove ")" count = int(outfileName[len(outfileName)-1]) # get num count += 1 outfileName = outfileName[:-1]# remove num outfileName += str(count)+")"# add new incremented num # do this if outfile exists only once else: outfileName += " ("+str(2)+")" outfileName += ".pdf" # GET THE NUMBER OF PAGES IN THE OUTPUT PDF FILE numPages = output_pdfFile.getNumPages() # SAVE OUTPUT FILE TO THE OUTPUT FOLDER now = datetime.datetime.now() time = str(now.strftime("CREATED: %m/%d/%Y, %I:%M:%S %p")) infoDict = output_pdfFile._info.getObject() infoDict.update({ NameObject('/Title'): createStringObject(outfileName), NameObject('/Author'): createStringObject(CURRENT_USER), NameObject('/Subject'): createStringObject(time), NameObject('/Creator'): createStringObject(PDF_PRODUCER) }) filePath = os.path.join(OUTPUT_FILE_FOLDER, outfileName) outputStream = open(filePath, "wb") output_pdfFile.write(outputStream) outputStream.close() # DISPLAY FINAL MESSAGE TO USER print("n"%s" has been created and contains %d total page(s)" %(outfileName, numPages)) print("nThis file is located in the following directory:n" +"n%s%s" %(os.getcwd(), OUTPUT_FILE_FOLDER)) input("nPlease press ENTER to continue...") if __name__ == "__main__": main() # http://programmingnotes.org/ |
QUICK NOTES:
The highlighted lines are sections of interest to look out for.
The code is heavily commented, so no further insight is necessary. If you have any questions, feel free to leave a comment below.
Click here to download a Windows executable file demonstrating the above use.
Leave a Reply