修复 pdf 转换 bug；简化代码

2023-10-20 15:13:40 +08:00 · 2023-10-20 15:13:40 +08:00 · 134ae9a6f6
parent 87306a18ee
commit 134ae9a6f6
1 changed files with 37 additions and 40 deletions
--- a/main.py
+++ b/main.py
@ -1,36 +1,41 @@
 from docx import Document
-from docx.shared import Inches, Pt
+from docx.shared import Inches
 from copy import copy
 from PIL import Image
 import fitz
-import pdfkit
+from docx2pdf import convert
 import os
-types = "abc"
+def convert_img(path = os.path.abspath('.')):
 def convert(path = os.path.abspath('.')):
    while True:
-        for _, _, files in os.walk(path):
+        print("正在扫描并处理文件中...")
-            for filename in files:
+        files = os.listdir('.')
-                if ".pdf" in filename:
+        for file in files:
-                    result = pdf2img(path, filename)
+            if file.endswith('.pdf') and "output" not in file:
-                    if result:                  # 1 -> Error
+                result = pdf2img(path, file)
-                        continue
+                if result:                  # 1 -> Error
-                if ".jpg" in filename or ".jpeg" in filename:
+                    print("PDF 转换失败！")
-                    img = Image.open(filename)
+                    continue
-                    img.save(filename.split('.')[0] + ".png", "PNG")
+            elif file.endswith('.jpg') or file.endswith('.jpeg'):
                img = Image.open(file)
                img.save(file.split('.')[0] + ".png", "PNG")
                print("JPG/JPEG 转换失败！")
        print("转换完毕！")
        break
-def gen_filelist(pages, path = os.path.abspath('.')):
+def gen_filelist(path = os.path.abspath('.')):
    types = "abc"
-    file_list = []
+    print("创建文件列表中...")
-    for _, _, files in os.walk(path):
+    file_list = os.listdir('.')
-            for filename in files:
+    file_list_cp = copy(file_list)              # 创建浅拷贝
-                if ".png" in filename:
+    for filename in file_list_cp:
-                    file_list.append(filename)
+        if ".png" not in filename:
            file_list.remove(filename)
    file_list.sort()
    page_curr = 1
    type_curr = 0
@ -41,20 +46,17 @@ def gen_filelist(pages, path = os.path.abspath('.')):
            type_curr += 1
            if type_curr % 3 == 0:
                page_curr += 1
    print("创建完毕！将要加入文档的文件如下：")
    for file in file_list:
        print(file)
    return file_list
 def gen_docx():
    while True:
        doc = Document()
-        print()
+        file_list = gen_filelist()
-        pages = input("输入你要生成的材料页数（对应的 3 份图片为 1 页）：")
+        input("按回车键确认...")
-        if pages.isdigit():
+        print("生成 .docx 文档中...")
            pages = int(pages)
        else:
            print("请输入一个数字！")
            continue
        file_list = gen_filelist(pages)
        print(file_list)
        if len(file_list) != 0:
            pic_count = 0
            for filename in file_list:
@ -73,6 +75,7 @@ def gen_docx():
        else:
            print("请检查文件命名是否正确！")
        doc.save("output.docx")
        print("生成完毕！")
        break
 def pdf2img(pdf_path, pdf_name, zoom_x = 3, zoom_y = 3):
@ -85,21 +88,15 @@ def pdf2img(pdf_path, pdf_name, zoom_x = 3, zoom_y = 3):
        pix.save(pdf_name[:-4] + '.png')  # 将图像存储为PNG格式
    doc.close()  # 关闭文档
 def docx2pdf(doc_file = "output.docx", html_file = "output.html"):
    doc = Document(doc_file)
    full_text = ""
    for para in doc.paragraphs:
        full_text += para.text + "\n"
    with open(html_file, "w", encoding="utf-8") as file:
        file.write(full_text)
    pdfkit.from_file(html_file, "output.pdf")
 if __name__ == "__main__":
    print("在使用该脚本前，请将发票、付款记录、购买记录按以下规则命名：")
    print("在文件名前添加[两位数字][类型]")
    print("数字代表的是第几份材料，类型：发票a，付款记录b，购买记录c")
    print("如：02b小公仔付款.png 代表第二份材料中的付款记录")
-    convert()
+    input("按回车键开始...")
    convert_img()
    gen_docx()
-    docx2pdf()
+    print("生成 .pdf 文件中...")
    convert("output.docx", "output.pdf")
    print("生成完毕！")