修复 pdf 转换 bug；简化代码

2023-10-20 15:13:40 +08:00 · 2023-10-20 15:13:40 +08:00 · 134ae9a6f6
parent 87306a18ee
commit 134ae9a6f6
1 changed files with 37 additions and 40 deletions
--- a/main.py
+++ b/main.py
@ -1,36 +1,41 @@
 from docx import Document
-from docx.shared import Inches, Pt
+from docx.shared import Inches
+
+from copy import copy

 from PIL import Image

 import fitz

-import pdfkit
+from docx2pdf import convert

 import os

-types = "abc"
-
-def convert(path = os.path.abspath('.')):
+def convert_img(path = os.path.abspath('.')):
    while True:
-        for _, _, files in os.walk(path):
-            for filename in files:
-                if ".pdf" in filename:
-                    result = pdf2img(path, filename)
-                    if result:                  # 1 -> Error
-                        continue
-                if ".jpg" in filename or ".jpeg" in filename:
-                    img = Image.open(filename)
-                    img.save(filename.split('.')[0] + ".png", "PNG")
+        print("正在扫描并处理文件中...")
+        files = os.listdir('.')
+        for file in files:
+            if file.endswith('.pdf') and "output" not in file:
+                result = pdf2img(path, file)
+                if result:                  # 1 -> Error
+                    print("PDF 转换失败！")
+                    continue
+            elif file.endswith('.jpg') or file.endswith('.jpeg'):
+                img = Image.open(file)
+                img.save(file.split('.')[0] + ".png", "PNG")
+                print("JPG/JPEG 转换失败！")
+        print("转换完毕！")
        break

-def gen_filelist(pages, path = os.path.abspath('.')):
+def gen_filelist(path = os.path.abspath('.')):
    types = "abc"
-    file_list = []
-    for _, _, files in os.walk(path):
-            for filename in files:
-                if ".png" in filename:
-                    file_list.append(filename)
+    print("创建文件列表中...")
+    file_list = os.listdir('.')
+    file_list_cp = copy(file_list)              # 创建浅拷贝
+    for filename in file_list_cp:
+        if ".png" not in filename:
+            file_list.remove(filename)
    file_list.sort()
    page_curr = 1
    type_curr = 0
@ -41,20 +46,17 @@ def gen_filelist(pages, path = os.path.abspath('.')):
            type_curr += 1
            if type_curr % 3 == 0:
                page_curr += 1
+    print("创建完毕！将要加入文档的文件如下：")
+    for file in file_list:
+        print(file)
    return file_list

 def gen_docx():
    while True:
        doc = Document()
-        print()
-        pages = input("输入你要生成的材料页数（对应的 3 份图片为 1 页）：")
-        if pages.isdigit():
-            pages = int(pages)
-        else:
-            print("请输入一个数字！")
-            continue
-        file_list = gen_filelist(pages)
-        print(file_list)
+        file_list = gen_filelist()
+        input("按回车键确认...")
+        print("生成 .docx 文档中...")
        if len(file_list) != 0:
            pic_count = 0
            for filename in file_list:
@ -73,6 +75,7 @@ def gen_docx():
        else:
            print("请检查文件命名是否正确！")
        doc.save("output.docx")
+        print("生成完毕！")
        break
    
 def pdf2img(pdf_path, pdf_name, zoom_x = 3, zoom_y = 3):
@ -85,21 +88,15 @@ def pdf2img(pdf_path, pdf_name, zoom_x = 3, zoom_y = 3):
        pix.save(pdf_name[:-4] + '.png')  # 将图像存储为PNG格式
    doc.close()  # 关闭文档
    
-def docx2pdf(doc_file = "output.docx", html_file = "output.html"):
-    doc = Document(doc_file)
-    full_text = ""
-    for para in doc.paragraphs:
-        full_text += para.text + "\n"
-    with open(html_file, "w", encoding="utf-8") as file:
-        file.write(full_text)
-    pdfkit.from_file(html_file, "output.pdf")
-    
 if __name__ == "__main__":
    print("在使用该脚本前，请将发票、付款记录、购买记录按以下规则命名：")
    print("在文件名前添加[两位数字][类型]")
    print("数字代表的是第几份材料，类型：发票a，付款记录b，购买记录c")
    print("如：02b小公仔付款.png 代表第二份材料中的付款记录")
-    convert()
+    input("按回车键开始...")
+    convert_img()
    gen_docx()
-    docx2pdf()
+    print("生成 .pdf 文件中...")
+    convert("output.docx", "output.pdf")
+    print("生成完毕！")