修改功能为读取当前文件夹下所有子文件夹

2023-10-20 20:55:56 +08:00 · 2023-10-20 20:55:56 +08:00 · 83f79474b3
parent e486557838
commit 83f79474b3
3 changed files with 79 additions and 66 deletions
--- a/README.md
+++ b/README.md
@ -2,22 +2,18 @@
 ## 功能
-将当前文件夹中以`发票-付款记录-购买记录`为一组的三张图片放在`.docx`文件的同一页面。同时会生成相应的`.pdf`文件。
+将当前文件夹中所有的子文件夹以`发票-付款记录-购买记录`为一组的三张图片放在`.docx`文件的同一页面。同时会生成相应的`.pdf`文件。
 会将`.pdf`、`.jpg`、`.jpeg`文件自动转换为`.png`文件再插入。
 ## 使用方法
-1. 将`.exe`文件与待处理图片放在同一个文件夹。
+1. 将`.exe`文件与所有包含待处理图片的**子文件夹**放在同一个文件夹。
 2. 运行`.exe`文件，根据提示进行操作。
-2. 文件夹中所有图片按照分组命名：【两位数字】【类型】【其余内容】。
+## 注意事项
-   从01开始按顺序命名序号，同组的文件序号相同，类型：发票a，付款记录b，购买记录c。
+1. 图片文件不得以 "pdf" 三个字母开头。
   例如：“02b公仔付款.png” -> 第二组的付款记录图片。
   **一位数请在开头加上 0，如 02、06。**
 3. 运行`.exe`文件，根据提示进行操作。
 ## 所需包（已打包在`.exe`中）
--- a/main.py
+++ b/main.py
@ -1,79 +1,98 @@
 from docx import Document
 from docx.shared import Inches
 from copy import copy
 from PIL import Image
 from copy import copy
 import fitz
 from docx2pdf import convert
 import os
-def convert_img(path = os.path.abspath('.')):
+def convert_all(path = os.path.abspath('.')):
-    while True:
+    print("正在扫描并处理文件中...")
-        print("正在扫描并处理文件中...")
+    items = os.listdir(path)
-        files = os.listdir('.')
+    for item in items:
-        for file in files:
+        if os.path.isdir(item):
-            if file.endswith('.pdf') and "output" not in file:
+            result = convert_img(path + '\\' + item)
-                result = pdf2img(path, file)
+            if result == 1:
-                if result:                  # 1 -> Error
+                print("PDF 转换失败！")
-                    print("PDF 转换失败！")
+                return
-                    continue
+            elif result == 2:
            elif file.endswith('.jpg') or file.endswith('.jpeg'):
                img = Image.open(file)
                img.save(file.split('.')[0] + ".png", "PNG")
                print("JPG/JPEG 转换失败！")
-        print("转换完毕！")
+                return
-        break
+    print("转换完毕！")
 """传入绝对路径"""
 def convert_img(path = os.path.abspath('.')):
    files = os.listdir(path)
    for file in files:
        if file.endswith('.pdf'):
            result = pdf2img(path, file)
            if result:                  # 1 -> Error
                return 1
        elif file.endswith('.jpg') or file.endswith('.jpeg'):
            img = Image.open(path + '\\' + file)
            img.save(path + '\\' + file.split('.')[0] + ".png", "PNG")
 def gen_filelist(path = os.path.abspath('.')):
-    types = "abc"
+    print("创建文件夹列表中...")
-    print("创建文件列表中...")
+    dir_list = []
-    file_list = os.listdir('.')
+    item_list = os.listdir('.')
-    file_list_cp = copy(file_list)              # 创建浅拷贝
+    for item in item_list:
-    for filename in file_list_cp:
+        if os.path.isdir(item):
-        if ".png" not in filename:
+            dir_list.append(item)
-            file_list.remove(filename)
+    file_list = {}
-    file_list.sort()
+    for dir in dir_list:
-    page_curr = 1
+        file_list[dir] = os.listdir(path + '\\' + dir)
-    type_curr = 0
+    for dir, dir_file in file_list.items():
-    for filename in file_list:
+        png_count = 0
-        if int(filename[0:2]) != page_curr or filename[2] != types[type_curr % 3]:
+        dir_file_copy = copy(dir_file)
-            return []
+        for file in dir_file_copy:
-        else:
+            if ".png" in file:
-            type_curr += 1
+                png_count += 1
-            if type_curr % 3 == 0:
+            else:
-                page_curr += 1
+                dir_file.remove(file)
        if png_count != 3:
            print(f"在{dir}文件夹发现错误：文件个数不符")
            return
    print("创建完毕！将要加入文档的文件如下：")
-    for file in file_list:
+    for dir, dir_file in file_list.items():
-        print(file)
+        print('- ' + dir)
        print('    ', end = '')
        for file in dir_file:
            print(file, end = ", ")
        print()
    print(f"共有 {len(list(file_list.items()))} 组文件")
    return file_list
-def gen_docx():
+def gen_docx(path = os.path.abspath('.')):
    while True:
        doc = Document()
        file_list = gen_filelist()
        input("按回车键确认...")
        print("生成 .docx 文档中...")
        if len(file_list) != 0:
-            pic_count = 0
+            for dir, dir_file in file_list.items():
-            for filename in file_list:
+                parent_path = path + '\\' + dir + '\\'
-                pic_count += 1
+                for file in dir_file:
-                if pic_count % 3 == 1:
+                    if file[0:3] == "pdf":
-                    doc.add_picture(filename, height = Inches(2.5))
+                        dir_file.remove(file)
-                    table = doc.add_table(rows = 1, cols = 2)
+                        dir_file.insert(0, file)
-                elif pic_count % 3 == 2:
+                doc.add_picture(parent_path + dir_file[0], height = Inches(2.5))
-                    cell1 = table.cell(0, 0)
+                table = doc.add_table(rows = 1, cols = 2)
-                    cell1.paragraphs[0].add_run().add_picture(filename, height = Inches(5.0))
+                cell1 = table.cell(0, 0)
-                elif pic_count % 3 == 0:
+                cell1.paragraphs[0].add_run().add_picture(parent_path + dir_file[1], height = Inches(5.0))
-                    cell2 = table.cell(0, 1)
+                cell2 = table.cell(0, 1)
-                    cell2.paragraphs[0].add_run().add_picture(filename, height = Inches(5.0))
+                cell2.paragraphs[0].add_run().add_picture(parent_path + dir_file[2], height = Inches(5.0))
-                    if pic_count != len(file_list):
+                if (dir, dir_file) != list(file_list.items())[-1]:          # dict.items() 返回值需先转换为列表，才能索引
-                        doc.add_page_break()
+                    doc.add_page_break()
        else:
            print("请检查文件命名是否正确！")
            continue
        doc.save("output.docx")
        print("生成完毕！")
        break
@ -85,16 +104,14 @@ def pdf2img(pdf_path, pdf_name, zoom_x = 3, zoom_y = 3):
        return 1
    for page in doc:  # 遍历页面
        pix = page.get_pixmap(matrix=fitz.Matrix(zoom_x, zoom_y))  # 将页面渲染为图片
-        pix.save(pdf_name[:-4] + '.png')  # 将图像存储为PNG格式
+        pix.save(pdf_path + '\\' + "pdf" + pdf_name[:-4] + ".png")  # 将图像存储为PNG格式
    doc.close()  # 关闭文档
 if __name__ == "__main__":
-    print("在使用该脚本前，请将发票、付款记录、购买记录按以下规则命名：")
+    print("在使用该脚本前，请保证程序所在的文件夹中仅包含程序、发票文件夹")
-    print("在文件名前添加[两位数字][类型]")
+    print("并且确保每个发票文件夹内只有三个文件：发票为.pdf文件，其他为图片")
    print("数字代表的是第几份材料，类型：发票a，付款记录b，购买记录c")
    print("如：02b小公仔付款.png 代表第二份材料中的付款记录")
    input("按回车键开始...")
-    convert_img()
+    convert_all()
    gen_docx()
    print("生成 .pdf 文件中...")
    convert("output.docx", "output.pdf")
--- a/报销程序（改版）.exe
+++ b/报销程序（改版）.exe