From 134ae9a6f626bebc92575367f5c63847f50d77b9 Mon Sep 17 00:00:00 2001 From: Dawn_Ocean <1785590531@qq.com> Date: Fri, 20 Oct 2023 15:13:40 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=20pdf=20=E8=BD=AC=E6=8D=A2?= =?UTF-8?q?=20bug=EF=BC=9B=E7=AE=80=E5=8C=96=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main.py | 77 +++++++++++++++++++++++++++------------------------------ 1 file changed, 37 insertions(+), 40 deletions(-) diff --git a/main.py b/main.py index 1ad4c2a..a772c1c 100644 --- a/main.py +++ b/main.py @@ -1,36 +1,41 @@ from docx import Document -from docx.shared import Inches, Pt +from docx.shared import Inches + +from copy import copy from PIL import Image import fitz -import pdfkit +from docx2pdf import convert import os -types = "abc" - -def convert(path = os.path.abspath('.')): +def convert_img(path = os.path.abspath('.')): while True: - for _, _, files in os.walk(path): - for filename in files: - if ".pdf" in filename: - result = pdf2img(path, filename) - if result: # 1 -> Error - continue - if ".jpg" in filename or ".jpeg" in filename: - img = Image.open(filename) - img.save(filename.split('.')[0] + ".png", "PNG") + print("正在扫描并处理文件中...") + files = os.listdir('.') + for file in files: + if file.endswith('.pdf') and "output" not in file: + result = pdf2img(path, file) + if result: # 1 -> Error + print("PDF 转换失败!") + continue + elif file.endswith('.jpg') or file.endswith('.jpeg'): + img = Image.open(file) + img.save(file.split('.')[0] + ".png", "PNG") + print("JPG/JPEG 转换失败!") + print("转换完毕!") break -def gen_filelist(pages, path = os.path.abspath('.')): +def gen_filelist(path = os.path.abspath('.')): types = "abc" - file_list = [] - for _, _, files in os.walk(path): - for filename in files: - if ".png" in filename: - file_list.append(filename) + print("创建文件列表中...") + file_list = os.listdir('.') + file_list_cp = copy(file_list) # 创建浅拷贝 + for filename in file_list_cp: + if ".png" not in filename: + file_list.remove(filename) file_list.sort() page_curr = 1 type_curr = 0 @@ -41,20 +46,17 @@ def gen_filelist(pages, path = os.path.abspath('.')): type_curr += 1 if type_curr % 3 == 0: page_curr += 1 + print("创建完毕!将要加入文档的文件如下:") + for file in file_list: + print(file) return file_list def gen_docx(): while True: doc = Document() - print() - pages = input("输入你要生成的材料页数(对应的 3 份图片为 1 页):") - if pages.isdigit(): - pages = int(pages) - else: - print("请输入一个数字!") - continue - file_list = gen_filelist(pages) - print(file_list) + file_list = gen_filelist() + input("按回车键确认...") + print("生成 .docx 文档中...") if len(file_list) != 0: pic_count = 0 for filename in file_list: @@ -73,6 +75,7 @@ def gen_docx(): else: print("请检查文件命名是否正确!") doc.save("output.docx") + print("生成完毕!") break def pdf2img(pdf_path, pdf_name, zoom_x = 3, zoom_y = 3): @@ -85,21 +88,15 @@ def pdf2img(pdf_path, pdf_name, zoom_x = 3, zoom_y = 3): pix.save(pdf_name[:-4] + '.png') # 将图像存储为PNG格式 doc.close() # 关闭文档 -def docx2pdf(doc_file = "output.docx", html_file = "output.html"): - doc = Document(doc_file) - full_text = "" - for para in doc.paragraphs: - full_text += para.text + "\n" - with open(html_file, "w", encoding="utf-8") as file: - file.write(full_text) - pdfkit.from_file(html_file, "output.pdf") - if __name__ == "__main__": print("在使用该脚本前,请将发票、付款记录、购买记录按以下规则命名:") print("在文件名前添加[两位数字][类型]") print("数字代表的是第几份材料,类型:发票a,付款记录b,购买记录c") print("如:02b小公仔付款.png 代表第二份材料中的付款记录") - convert() + input("按回车键开始...") + convert_img() gen_docx() - docx2pdf() + print("生成 .pdf 文件中...") + convert("output.docx", "output.pdf") + print("生成完毕!") \ No newline at end of file