from docx import Document from docx.shared import Inches from copy import copy from PIL import Image import fitz from docx2pdf import convert import os def convert_img(path = os.path.abspath('.')): while True: print("正在扫描并处理文件中...") files = os.listdir('.') for file in files: if file.endswith('.pdf') and "output" not in file: result = pdf2img(path, file) if result: # 1 -> Error print("PDF 转换失败!") continue elif file.endswith('.jpg') or file.endswith('.jpeg'): img = Image.open(file) img.save(file.split('.')[0] + ".png", "PNG") print("JPG/JPEG 转换失败!") print("转换完毕!") break def gen_filelist(path = os.path.abspath('.')): types = "abc" print("创建文件列表中...") file_list = os.listdir('.') file_list_cp = copy(file_list) # 创建浅拷贝 for filename in file_list_cp: if ".png" not in filename: file_list.remove(filename) file_list.sort() page_curr = 1 type_curr = 0 for filename in file_list: if int(filename[0:2]) != page_curr or filename[2] != types[type_curr % 3]: return [] else: type_curr += 1 if type_curr % 3 == 0: page_curr += 1 print("创建完毕!将要加入文档的文件如下:") for file in file_list: print(file) return file_list def gen_docx(): while True: doc = Document() file_list = gen_filelist() input("按回车键确认...") print("生成 .docx 文档中...") if len(file_list) != 0: pic_count = 0 for filename in file_list: pic_count += 1 if pic_count % 3 == 1: doc.add_picture(filename, height = Inches(2.5)) table = doc.add_table(rows = 1, cols = 2) elif pic_count % 3 == 2: cell1 = table.cell(0, 0) cell1.paragraphs[0].add_run().add_picture(filename, height = Inches(5.0)) elif pic_count % 3 == 0: cell2 = table.cell(0, 1) cell2.paragraphs[0].add_run().add_picture(filename, height = Inches(5.0)) if pic_count != len(file_list): doc.add_page_break() else: print("请检查文件命名是否正确!") doc.save("output.docx") print("生成完毕!") break def pdf2img(pdf_path, pdf_name, zoom_x = 3, zoom_y = 3): doc = fitz.open(pdf_path + "\\" + pdf_name) # 打开文档 if len(doc) != 1: print("PDF 文件只能包含一页!") return 1 for page in doc: # 遍历页面 pix = page.get_pixmap(matrix=fitz.Matrix(zoom_x, zoom_y)) # 将页面渲染为图片 pix.save(pdf_name[:-4] + '.png') # 将图像存储为PNG格式 doc.close() # 关闭文档 if __name__ == "__main__": print("在使用该脚本前,请将发票、付款记录、购买记录按以下规则命名:") print("在文件名前添加[两位数字][类型]") print("数字代表的是第几份材料,类型:发票a,付款记录b,购买记录c") print("如:02b小公仔付款.png 代表第二份材料中的付款记录") input("按回车键开始...") convert_img() gen_docx() print("生成 .pdf 文件中...") convert("output.docx", "output.pdf") print("生成完毕!")