合并 pdf

  1. """
  2. 合并多个 pdf 到一个,使用方式:放到当前文件夹下运行
  3. """
  4. import os.path
  5. from pyPdf import PdfFileReader, PdfFileWriter # pip install pyPdf
  6. def get_pdf_files(dst_dir):
  7. paths = []
  8. for root, dirs, files in os.walk(dst_dir):
  9. for filespath in files:
  10. if filespath.endswith('.pdf'): # pdf file
  11. abspath = os.path.join(root, filespath)
  12. paths.append(abspath)
  13. return paths
  14. ##########################合并同一个文件夹下所有PDF文件########################
  15. def merge_pdf(dst_dir, outfile, sort=True):
  16. output = PdfFileWriter()
  17. curpage = 0
  18. pdf_paths = sorted(get_pdf_files(dst_dir)) if sort else get_pdf_files(dst_dir)
  19. for each in pdf_paths:
  20. print(each)
  21. reader = PdfFileReader(file(each, "rb"))
  22. # 如果pdf文件已经加密,必须首先解密才能使用pyPdf
  23. if reader.isEncrypted == True:
  24. reader.decrypt("map")
  25. # 获得源pdf文件中页面总数
  26. page_count = reader.getNumPages()
  27. curpage += page_count
  28. print(page_count)
  29. for iPage in range(0, page_count):
  30. output.addPage(reader.getPage(iPage))
  31. print("All Pages Number:" + str(curpage))
  32. outputStream = file(dst_dir + outfile, "wb")
  33. output.write(outputStream)
  34. outputStream.close()
  35. def main():
  36. merged = "all.pdf"
  37. merge_pdf("./", merged)
  38. if __name__ == '__main__':
  39. main()