From 134ae9a6f626bebc92575367f5c63847f50d77b9 Mon Sep 17 00:00:00 2001
From: Dawn_Ocean <1785590531@qq.com>
Date: Fri, 20 Oct 2023 15:13:40 +0800
Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=20pdf=20=E8=BD=AC=E6=8D=A2?=
 =?UTF-8?q?=20bug=EF=BC=9B=E7=AE=80=E5=8C=96=E4=BB=A3=E7=A0=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 main.py | 77 +++++++++++++++++++++++++++------------------------------
 1 file changed, 37 insertions(+), 40 deletions(-)

diff --git a/main.py b/main.py
index 1ad4c2a..a772c1c 100644
--- a/main.py
+++ b/main.py
@@ -1,36 +1,41 @@
 from docx import Document
-from docx.shared import Inches, Pt
+from docx.shared import Inches
+
+from copy import copy
 
 from PIL import Image
 
 import fitz
 
-import pdfkit
+from docx2pdf import convert
 
 import os
 
-types = "abc"
-
-def convert(path = os.path.abspath('.')):
+def convert_img(path = os.path.abspath('.')):
     while True:
-        for _, _, files in os.walk(path):
-            for filename in files:
-                if ".pdf" in filename:
-                    result = pdf2img(path, filename)
-                    if result:                  # 1 -> Error
-                        continue
-                if ".jpg" in filename or ".jpeg" in filename:
-                    img = Image.open(filename)
-                    img.save(filename.split('.')[0] + ".png", "PNG")
+        print("正在扫描并处理文件中...")
+        files = os.listdir('.')
+        for file in files:
+            if file.endswith('.pdf') and "output" not in file:
+                result = pdf2img(path, file)
+                if result:                  # 1 -> Error
+                    print("PDF 转换失败！")
+                    continue
+            elif file.endswith('.jpg') or file.endswith('.jpeg'):
+                img = Image.open(file)
+                img.save(file.split('.')[0] + ".png", "PNG")
+                print("JPG/JPEG 转换失败！")
+        print("转换完毕！")
         break
 
-def gen_filelist(pages, path = os.path.abspath('.')):
+def gen_filelist(path = os.path.abspath('.')):
     types = "abc"
-    file_list = []
-    for _, _, files in os.walk(path):
-            for filename in files:
-                if ".png" in filename:
-                    file_list.append(filename)
+    print("创建文件列表中...")
+    file_list = os.listdir('.')
+    file_list_cp = copy(file_list)              # 创建浅拷贝
+    for filename in file_list_cp:
+        if ".png" not in filename:
+            file_list.remove(filename)
     file_list.sort()
     page_curr = 1
     type_curr = 0
@@ -41,20 +46,17 @@ def gen_filelist(pages, path = os.path.abspath('.')):
             type_curr += 1
             if type_curr % 3 == 0:
                 page_curr += 1
+    print("创建完毕！将要加入文档的文件如下：")
+    for file in file_list:
+        print(file)
     return file_list
 
 def gen_docx():
     while True:
         doc = Document()
-        print()
-        pages = input("输入你要生成的材料页数（对应的 3 份图片为 1 页）：")
-        if pages.isdigit():
-            pages = int(pages)
-        else:
-            print("请输入一个数字！")
-            continue
-        file_list = gen_filelist(pages)
-        print(file_list)
+        file_list = gen_filelist()
+        input("按回车键确认...")
+        print("生成 .docx 文档中...")
         if len(file_list) != 0:
             pic_count = 0
             for filename in file_list:
@@ -73,6 +75,7 @@ def gen_docx():
         else:
             print("请检查文件命名是否正确！")
         doc.save("output.docx")
+        print("生成完毕！")
         break
     
 def pdf2img(pdf_path, pdf_name, zoom_x = 3, zoom_y = 3):
@@ -85,21 +88,15 @@ def pdf2img(pdf_path, pdf_name, zoom_x = 3, zoom_y = 3):
         pix.save(pdf_name[:-4] + '.png')  # 将图像存储为PNG格式
     doc.close()  # 关闭文档
     
-def docx2pdf(doc_file = "output.docx", html_file = "output.html"):
-    doc = Document(doc_file)
-    full_text = ""
-    for para in doc.paragraphs:
-        full_text += para.text + "\n"
-    with open(html_file, "w", encoding="utf-8") as file:
-        file.write(full_text)
-    pdfkit.from_file(html_file, "output.pdf")
-    
 if __name__ == "__main__":
     print("在使用该脚本前，请将发票、付款记录、购买记录按以下规则命名：")
     print("在文件名前添加[两位数字][类型]")
     print("数字代表的是第几份材料，类型：发票a，付款记录b，购买记录c")
     print("如：02b小公仔付款.png 代表第二份材料中的付款记录")
-    convert()
+    input("按回车键开始...")
+    convert_img()
     gen_docx()
-    docx2pdf()
+    print("生成 .pdf 文件中...")
+    convert("output.docx", "output.pdf")
+    print("生成完毕！")
     
\ No newline at end of file