from docx import Document from docx.shared import Inches from PIL import Image from copy import copy import fitz from docx2pdf import convert import os def done(): print("正在移除多余生成文件...") for gen in gened_list: os.remove(gen) print("移除完毕!") input("按任意键退出...") exit() def deal_file_loss(path, dir_name): print(f" {dir_name} 文件夹中文件个数不符,请检查!") done() def convert_all(path = os.path.abspath('.')): print("正在扫描并处理文件中...") items = os.listdir(path) for item in items: if os.path.isdir(item): if len(os.listdir(path + '\\' + item)) < 3: deal_file_loss(path, item) result = convert_img(item, path + '\\' + item) if result == 1: print("PDF 转换失败!") done() elif result == 2: print("JPG/JPEG 转换失败!") done() print("转换完毕!") """传入绝对路径""" def convert_img(dir_name, path = os.path.abspath('.')): files = os.listdir(path) pdf_list = [] for file in files: file = file.lower() # 排除 .PNG 等带来的问题 if file.endswith('.pdf'): result = pdf2img(path, file) if result: # 1 -> Error return 1 pdf_list.append(file) elif file.endswith('.jpg') or file.endswith('.jpeg'): img = Image.open(path + '\\' + file) dot_index = file.rfind('.') img_path = path + '\\' + file[:dot_index] + ".png" img.save(img_path, "PNG") gened_list.append(img_path) if len(pdf_list) > 1: print(f"注意到文件夹 {dir_name} 中有多个 .pdf 文件:") for i in range(len(pdf_list)): # 打印 pdf 文件和选项 print(f"{i + 1} - {pdf_list[i]}") # 索引从 0 改为 1 while True: invoice_index = input("请选择发票文件(输入选项前的阿拉伯数字):") if not invoice_index.isdigit(): print("请输入数字!") continue else: invoice_int = int(invoice_index) - 1 # 索引从 1 改为 0 if invoice_int < len(pdf_list) and invoice_int >= 0: invoice_path = path + '\\' + "pdf" + pdf_list[invoice_int][:-4] + ".png" invoice_renamed = path + '\\' + 'Pdf' + pdf_list[invoice_int][:-4] + ".png" os.rename(invoice_path, invoice_renamed) # 将 "pdf" 标签变为 "Pdf" gened_list.remove(invoice_path) gened_list.append(invoice_renamed) break else: print("请选择正确的序号!") def gen_filelist(path = os.path.abspath('.')): print("创建文件夹列表中...") dir_list = [] item_list = os.listdir('.') for item in item_list: if os.path.isdir(item): dir_list.append(item) file_list = {} for dir in dir_list: file_list[dir] = os.listdir(path + '\\' + dir) for dir, dir_file in file_list.items(): png_count = 0 dir_file_copy = copy(dir_file) for file in dir_file_copy: if ".png" in file.lower(): png_count += 1 else: dir_file.remove(file) if png_count != 3: print(f"在{dir}文件夹发现错误:png文件个数不符") done() print("创建完毕!将要加入文档的文件如下:") for dir, dir_file in file_list.items(): print('- ' + dir) print(' ', end = '') for file in dir_file: print(file, end = ", ") print() print(f"共有 {len(list(file_list.items()))} 组文件") return file_list def gen_docx(path = os.path.abspath('.')): while True: doc = Document() file_list = gen_filelist() input("按回车键确认...") print("生成 .docx 文档中...") if len(file_list) != 0: for dir, dir_file in file_list.items(): parent_path = path + '\\' + dir + '\\' for file in dir_file: if file[0:3] == "Pdf": dir_file.remove(file) dir_file.insert(0, file) doc.add_picture(parent_path + dir_file[0], height = Inches(2.5)) table = doc.add_table(rows = 1, cols = 2) cell1 = table.cell(0, 0) cell1.paragraphs[0].add_run().add_picture(parent_path + dir_file[1], height = Inches(5.0)) cell2 = table.cell(0, 1) cell2.paragraphs[0].add_run().add_picture(parent_path + dir_file[2], height = Inches(5.0)) if (dir, dir_file) != list(file_list.items())[-1]: # dict.items() 返回值需先转换为列表,才能索引 doc.add_page_break() else: print("请检查文件命名是否正确!") continue doc.save("output.docx") print("生成完毕!") break def pdf2img(pdf_path, pdf_name, zoom_x = 3, zoom_y = 3): doc = fitz.open(pdf_path + "\\" + pdf_name) # 打开文档 if len(doc) != 1: print("PDF 文件只能包含一页!") return 1 for page in doc: # 遍历页面 pix = page.get_pixmap(matrix=fitz.Matrix(zoom_x, zoom_y)) # 将页面渲染为图片 png_path = pdf_path + '\\' + "pdf" + pdf_name[:-4] + ".png" gened_list.append(png_path) pix.save(png_path) # 将图像存储为PNG格式 doc.close() # 关闭文档 if __name__ == "__main__": gened_list = [] # 新创建的文件列表,在出错时或者生成完毕时进行删除 print("在使用该脚本前,请保证程序所在的文件夹中仅包含程序、发票文件夹") print("并且确保每个发票文件夹内只有三个文件") input("按回车键开始...") convert_all() gen_docx() print("生成 .pdf 文件中...") convert("output.docx", "output.pdf") print("生成完毕!") done()