增加了一页仅两张图片生成文档的功能

在生成完毕或出现错误后会删除新产生的文件
增加了文件夹中存在多个 .pdf 文件，选择发票文件的功能
2023-10-22 23:59:12 +08:00 · 2023-10-22 22:13:25 +08:00 · 2023-10-22 21:42:57 +08:00 · 2023-10-22 21:18:40 +08:00 · 2023-10-22 20:15:19 +08:00 · 2023-10-22 20:08:09 +08:00
3 changed files with 132 additions and 77 deletions
--- a/README.md
+++ b/README.md
@ -2,22 +2,18 @@
 ## 功能
-将当前文件夹中以`发票-付款记录-购买记录`为一组的三张图片放在`.docx`文件的同一页面。同时会生成相应的`.pdf`文件。
+将当前文件夹中所有的子文件夹以`发票-付款记录-购买记录`为一组的三张图片放在`.docx`文件的同一页面。同时会生成相应的`.pdf`文件。
 会将`.pdf`、`.jpg`、`.jpeg`文件自动转换为`.png`文件再插入。
 ## 使用方法
-1. 将`.exe`文件与待处理图片放在同一个文件夹。
+1. 将`.exe`文件与所有包含待处理图片的**子文件夹**放在同一个文件夹。
 2. 运行`.exe`文件，根据提示进行操作。
-2. 文件夹中所有图片按照分组命名：【两位数字】【类型】【其余内容】。
+## 注意事项
-   从01开始按顺序命名序号，同组的文件序号相同，类型：发票a，付款记录b，购买记录c。
+1. 图片文件不得以 "pdf" 三个字母开头。
   例如：“02b公仔付款.png” -> 第二组的付款记录图片。
   **一位数请在开头加上 0，如 02、06。**
 3. 运行`.exe`文件，根据提示进行操作。
 ## 所需包（已打包在`.exe`中）
--- a/main.py
+++ b/main.py
@ -1,82 +1,137 @@
 from docx import Document
 from docx.shared import Inches
 from copy import copy
 from PIL import Image
 from copy import copy
 import fitz
 from docx2pdf import convert
 from sys import exit
 import os
-def convert_img(path = os.path.abspath('.')):
+def done(exit_code = 0):
-    while True:
+    print("正在移除多余生成文件...")
-        print("正在扫描并处理文件中...")
+    for gen in gened_list:
-        files = os.listdir('.')
+        os.remove(gen)
-        for file in files:
+    print("移除完毕！")
-            if file.endswith('.pdf') and "output" not in file:
+    input("按回车键退出...")
-                result = pdf2img(path, file)
+    exit(exit_code)
-                if result:                  # 1 -> Error
+
-                    print("PDF 转换失败！")
+def convert_all(path = os.path.abspath('.')):
-                    continue
+    print("正在扫描并处理文件中...")
-            elif file.endswith('.jpg') or file.endswith('.jpeg'):
+    items = os.listdir(path)
-                img = Image.open(file)
+    for item in items:
-                img.save(file.split('.')[0] + ".png", "PNG")
+        if os.path.isdir(item):
            result = convert_img(item, path + '\\' + item)
            if result == 1:
                print("PDF 转换失败！")
                done(1)
            elif result == 2:
                print("JPG/JPEG 转换失败！")
-        print("转换完毕！")
+                done(1)
-        break
+    print("转换完毕！")
 """传入绝对路径"""
 def convert_img(dir_name, path = os.path.abspath('.')):
    files = os.listdir(path)
    pdf_list = []
    for file in files:
        file = file.lower()             # 排除 .PNG 等带来的问题
        if file.endswith('.pdf'):
            result = pdf2img(path, file)
            if result:                  # 1 -> Error
                return 1
            pdf_list.append(file)
        elif file.endswith('.jpg') or file.endswith('.jpeg'):
            img = Image.open(path + '\\' + file)
            dot_index = file.rfind('.')
            img_path = path + '\\' + file[:dot_index] + ".png"
            img.save(img_path, "PNG")
            gened_list.append(img_path)
    if len(pdf_list) > 1:
        print(f"注意到文件夹 {dir_name} 中有多个 .pdf 文件：")
        for i in range(len(pdf_list)):  # 打印 pdf 文件和选项
            print(f"{i + 1} - {pdf_list[i]}")           # 索引从 0 改为 1
        while True:
            invoice_index = input("请选择发票文件（输入选项前的阿拉伯数字）：")
            if not invoice_index.isdigit():
                print("请输入数字！")
                continue
            else:
                invoice_int = int(invoice_index) - 1    # 索引从 1 改为 0
                if invoice_int < len(pdf_list) and invoice_int >= 0:
                    invoice_path = path + '\\' + "pdf" + pdf_list[invoice_int][:-4] + ".png"
                    invoice_renamed = path + '\\' + 'Pdf' + pdf_list[invoice_int][:-4] + ".png"
                    os.rename(invoice_path, invoice_renamed)    # 将 "pdf" 标签变为 "Pdf"
                    gened_list.remove(invoice_path)
                    gened_list.append(invoice_renamed)
                    break
                else:
                    print("请选择正确的序号!")
 def gen_filelist(path = os.path.abspath('.')):
-    types = "abc"
+    print("创建文件夹列表中...")
-    print("创建文件列表中...")
+    dir_list = []
-    file_list = os.listdir('.')
+    item_list = os.listdir('.')
-    file_list_cp = copy(file_list)              # 创建浅拷贝
+    for item in item_list:
-    for filename in file_list_cp:
+        if os.path.isdir(item):
-        if ".png" not in filename:
+            dir_list.append(item)
-            file_list.remove(filename)
+    file_list = {}
-    file_list.sort()
+    for dir in dir_list:
-    page_curr = 1
+        file_list[dir] = os.listdir(path + '\\' + dir)
-    type_curr = 0
+    for dir, dir_file in file_list.items():
-    for filename in file_list:
+        dir_file_copy = copy(dir_file)
-        if int(filename[0:2]) != page_curr or filename[2] != types[type_curr % 3]:
+        for file in dir_file_copy:
-            return []
+            if ".png" not in file.lower():
-        else:
+                dir_file.remove(file)
-            type_curr += 1
+        if len(dir_file) != 3:
-            if type_curr % 3 == 0:
+            print(f"在{dir}文件夹发现错误：png文件个数不符")
-                page_curr += 1
+            for file in dir_file:
                print('- ' + file)
            if len(dir_file) == 2:          # 说明有两个文件
                option = input("文件夹仅有两个文件，仍继续生成？（y/N）").lower()
                if option != 'y':
                    done(1)                     
    print("创建完毕！将要加入文档的文件如下：")
-    for file in file_list:
+    for dir, dir_file in file_list.items():
-        print(file)
+        print('- ' + dir)
        print('    ', end = '')
        for file in dir_file:
            print(file, end = ", ")
        print()
    print(f"共有 {len(list(file_list.items()))} 组文件")
    return file_list
-def gen_docx():
+def gen_docx(path = os.path.abspath('.')):
-    while True:
+    doc = Document()
-        doc = Document()
+    file_list = gen_filelist()
-        file_list = gen_filelist()
+    input("按回车键确认...")
-        input("按回车键确认...")
+    print("生成 .docx 文档中...")
-        print("生成 .docx 文档中...")
+    if len(file_list) != 0:
-        if len(file_list) != 0:
+        for dir, dir_file in file_list.items():
-            pic_count = 0
+            parent_path = path + '\\' + dir + '\\'
-            for filename in file_list:
+            for file in dir_file:
-                pic_count += 1
+                if file[0:3] == "Pdf":
-                if pic_count % 3 == 1:
+                    dir_file.remove(file)
-                    doc.add_picture(filename, height = Inches(2.5))
+                    dir_file.insert(0, file)                            # 将发票文件放在第一个位置
-                    table = doc.add_table(rows = 1, cols = 2)
+            doc.add_picture(parent_path + dir_file[0], height = Inches(2.5))
-                elif pic_count % 3 == 2:
+            table = doc.add_table(rows = 1, cols = 2)
-                    cell1 = table.cell(0, 0)
+            cell1 = table.cell(0, 0)
-                    cell1.paragraphs[0].add_run().add_picture(filename, height = Inches(5.0))
+            cell1.paragraphs[0].add_run().add_picture(parent_path + dir_file[1], height = Inches(5.0))
-                elif pic_count % 3 == 0:
+            cell2 = table.cell(0, 1)
-                    cell2 = table.cell(0, 1)
+            if len(dir_file) == 3:
-                    cell2.paragraphs[0].add_run().add_picture(filename, height = Inches(5.0))
+                cell2.paragraphs[0].add_run().add_picture(parent_path + dir_file[2], height = Inches(5.0))
-                    if pic_count != len(file_list):
+            if (dir, dir_file) != list(file_list.items())[-1]:          # dict.items() 返回值需先转换为列表，才能索引
-                        doc.add_page_break()
+                doc.add_page_break()
-        else:
+    else:
-            print("请检查文件命名是否正确！")
+        print("请检查文件结构是否正确！")
-        doc.save("output.docx")
+        done(1)
-        print("生成完毕！")
+    doc.save("output.docx")
-        break
+    print("生成完毕！")
 def pdf2img(pdf_path, pdf_name, zoom_x = 3, zoom_y = 3):
    doc = fitz.open(pdf_path + "\\" + pdf_name)  # 打开文档
@ -85,18 +140,22 @@ def pdf2img(pdf_path, pdf_name, zoom_x = 3, zoom_y = 3):
        return 1
    for page in doc:  # 遍历页面
        pix = page.get_pixmap(matrix=fitz.Matrix(zoom_x, zoom_y))  # 将页面渲染为图片
-        pix.save(pdf_name[:-4] + '.png')  # 将图像存储为PNG格式
+        png_path = pdf_path + '\\' + "pdf" + pdf_name[:-4] + ".png"
        gened_list.append(png_path)
        pix.save(png_path)  # 将图像存储为PNG格式
    doc.close()  # 关闭文档
 if __name__ == "__main__":
-    print("在使用该脚本前，请将发票、付款记录、购买记录按以下规则命名：")
+    gened_list = [] # 新创建的文件列表，在出错时或者生成完毕时进行删除
-    print("在文件名前添加[两位数字][类型]")
+    print("在使用该脚本前，请保证程序所在的文件夹中仅包含程序、发票文件夹")
-    print("数字代表的是第几份材料，类型：发票a，付款记录b，购买记录c")
+    print("并且确保每个发票文件夹内只有三个文件")
    print("如：02b小公仔付款.png 代表第二份材料中的付款记录")
    input("按回车键开始...")
-    convert_img()
+    convert_all()
    gen_docx()
    print("生成 .pdf 文件中...")
    convert("output.docx", "output.pdf")
    print("生成完毕！")
    done(0)
--- a/报销程序.exe
+++ b/报销程序.exe
Author	SHA1	Message	Date
Dawn_Ocean	7291dc1dbb	增加了一页仅两张图片生成文档的功能	2023-10-22 23:59:12 +08:00
Dawn_Ocean	245c56f335	在生成完毕或出现错误后会删除新产生的文件	2023-10-22 22:13:25 +08:00
Dawn_Ocean	5d7f8c0772	增加了文件夹中存在多个 .pdf 文件，选择发票文件的功能	2023-10-22 21:42:57 +08:00
Dawn_Ocean	bd63d92c50	增加了文件夹中存在多个 .pdf 文件，选择发票文件的功能	2023-10-22 21:18:40 +08:00
Dawn_Ocean	d1b679a1db	修复了文件名中的点号'.'对文件扩展的影响	2023-10-22 20:15:19 +08:00
Dawn_Ocean	6e3b1d7ddc	修复了.png等文件的大小写问题	2023-10-22 20:08:09 +08:00
Dawn_Ocean	83f79474b3	修改功能为读取当前文件夹下所有子文件夹	2023-10-20 20:55:56 +08:00