修改功能为读取当前文件夹下所有子文件夹

2023-10-20 20:55:56 +08:00 · 2023-10-20 20:55:56 +08:00 · 83f79474b3
parent e486557838
commit 83f79474b3
3 changed files with 79 additions and 66 deletions
--- a/README.md
+++ b/README.md
@ -2,22 +2,18 @@

 ## 功能

-将当前文件夹中以`发票-付款记录-购买记录`为一组的三张图片放在`.docx`文件的同一页面。同时会生成相应的`.pdf`文件。
+将当前文件夹中所有的子文件夹以`发票-付款记录-购买记录`为一组的三张图片放在`.docx`文件的同一页面。同时会生成相应的`.pdf`文件。

 会将`.pdf`、`.jpg`、`.jpeg`文件自动转换为`.png`文件再插入。

 ## 使用方法

-1. 将`.exe`文件与待处理图片放在同一个文件夹。
+1. 将`.exe`文件与所有包含待处理图片的**子文件夹**放在同一个文件夹。
+2. 运行`.exe`文件，根据提示进行操作。

-2. 文件夹中所有图片按照分组命名：【两位数字】【类型】【其余内容】。
+## 注意事项

-   从01开始按顺序命名序号，同组的文件序号相同，类型：发票a，付款记录b，购买记录c。
-   例如：“02b公仔付款.png” -> 第二组的付款记录图片。
-
-   **一位数请在开头加上 0，如 02、06。**
-
-3. 运行`.exe`文件，根据提示进行操作。
+1. 图片文件不得以 "pdf" 三个字母开头。

 ## 所需包（已打包在`.exe`中）

--- a/main.py
+++ b/main.py
@ -1,79 +1,98 @@
 from docx import Document
 from docx.shared import Inches

-from copy import copy
-
 from PIL import Image

+from copy import copy
+
 import fitz

 from docx2pdf import convert

 import os

-def convert_img(path = os.path.abspath('.')):
-    while True:
+def convert_all(path = os.path.abspath('.')):
    print("正在扫描并处理文件中...")
-        files = os.listdir('.')
+    items = os.listdir(path)
+    for item in items:
+        if os.path.isdir(item):
+            result = convert_img(path + '\\' + item)
+            if result == 1:
+                print("PDF 转换失败！")
+                return
+            elif result == 2:
+                print("JPG/JPEG 转换失败！")
+                return
+    print("转换完毕！")
+
+"""传入绝对路径"""
+def convert_img(path = os.path.abspath('.')):
+    files = os.listdir(path)
    for file in files:
-            if file.endswith('.pdf') and "output" not in file:
+        if file.endswith('.pdf'):
            result = pdf2img(path, file)
            if result:                  # 1 -> Error
-                    print("PDF 转换失败！")
-                    continue
+                return 1
        elif file.endswith('.jpg') or file.endswith('.jpeg'):
-                img = Image.open(file)
-                img.save(file.split('.')[0] + ".png", "PNG")
-                print("JPG/JPEG 转换失败！")
-        print("转换完毕！")
-        break
+            img = Image.open(path + '\\' + file)
+            img.save(path + '\\' + file.split('.')[0] + ".png", "PNG")
+

 def gen_filelist(path = os.path.abspath('.')):
-    types = "abc"
-    print("创建文件列表中...")
-    file_list = os.listdir('.')
-    file_list_cp = copy(file_list)              # 创建浅拷贝
-    for filename in file_list_cp:
-        if ".png" not in filename:
-            file_list.remove(filename)
-    file_list.sort()
-    page_curr = 1
-    type_curr = 0
-    for filename in file_list:
-        if int(filename[0:2]) != page_curr or filename[2] != types[type_curr % 3]:
-            return []
+    print("创建文件夹列表中...")
+    dir_list = []
+    item_list = os.listdir('.')
+    for item in item_list:
+        if os.path.isdir(item):
+            dir_list.append(item)
+    file_list = {}
+    for dir in dir_list:
+        file_list[dir] = os.listdir(path + '\\' + dir)
+    for dir, dir_file in file_list.items():
+        png_count = 0
+        dir_file_copy = copy(dir_file)
+        for file in dir_file_copy:
+            if ".png" in file:
+                png_count += 1
            else:
-            type_curr += 1
-            if type_curr % 3 == 0:
-                page_curr += 1
+                dir_file.remove(file)
+        if png_count != 3:
+            print(f"在{dir}文件夹发现错误：文件个数不符")
+            return
    print("创建完毕！将要加入文档的文件如下：")
-    for file in file_list:
-        print(file)
+    for dir, dir_file in file_list.items():
+        print('- ' + dir)
+        print('    ', end = '')
+        for file in dir_file:
+            print(file, end = ", ")
+        print()
+    print(f"共有 {len(list(file_list.items()))} 组文件")
    return file_list

-def gen_docx():
+def gen_docx(path = os.path.abspath('.')):
    while True:
        doc = Document()
        file_list = gen_filelist()
        input("按回车键确认...")
        print("生成 .docx 文档中...")
        if len(file_list) != 0:
-            pic_count = 0
-            for filename in file_list:
-                pic_count += 1
-                if pic_count % 3 == 1:
-                    doc.add_picture(filename, height = Inches(2.5))
+            for dir, dir_file in file_list.items():
+                parent_path = path + '\\' + dir + '\\'
+                for file in dir_file:
+                    if file[0:3] == "pdf":
+                        dir_file.remove(file)
+                        dir_file.insert(0, file)
+                doc.add_picture(parent_path + dir_file[0], height = Inches(2.5))
                table = doc.add_table(rows = 1, cols = 2)
-                elif pic_count % 3 == 2:
                cell1 = table.cell(0, 0)
-                    cell1.paragraphs[0].add_run().add_picture(filename, height = Inches(5.0))
-                elif pic_count % 3 == 0:
+                cell1.paragraphs[0].add_run().add_picture(parent_path + dir_file[1], height = Inches(5.0))
                cell2 = table.cell(0, 1)
-                    cell2.paragraphs[0].add_run().add_picture(filename, height = Inches(5.0))
-                    if pic_count != len(file_list):
+                cell2.paragraphs[0].add_run().add_picture(parent_path + dir_file[2], height = Inches(5.0))
+                if (dir, dir_file) != list(file_list.items())[-1]:          # dict.items() 返回值需先转换为列表，才能索引
                    doc.add_page_break()
        else:
            print("请检查文件命名是否正确！")
+            continue
        doc.save("output.docx")
        print("生成完毕！")
        break
@ -85,16 +104,14 @@ def pdf2img(pdf_path, pdf_name, zoom_x = 3, zoom_y = 3):
        return 1
    for page in doc:  # 遍历页面
        pix = page.get_pixmap(matrix=fitz.Matrix(zoom_x, zoom_y))  # 将页面渲染为图片
-        pix.save(pdf_name[:-4] + '.png')  # 将图像存储为PNG格式
+        pix.save(pdf_path + '\\' + "pdf" + pdf_name[:-4] + ".png")  # 将图像存储为PNG格式
    doc.close()  # 关闭文档
    
 if __name__ == "__main__":
-    print("在使用该脚本前，请将发票、付款记录、购买记录按以下规则命名：")
-    print("在文件名前添加[两位数字][类型]")
-    print("数字代表的是第几份材料，类型：发票a，付款记录b，购买记录c")
-    print("如：02b小公仔付款.png 代表第二份材料中的付款记录")
+    print("在使用该脚本前，请保证程序所在的文件夹中仅包含程序、发票文件夹")
+    print("并且确保每个发票文件夹内只有三个文件：发票为.pdf文件，其他为图片")
    input("按回车键开始...")
-    convert_img()
+    convert_all()
    gen_docx()
    print("生成 .pdf 文件中...")
    convert("output.docx", "output.pdf")
--- a/报销程序（改版）.exe
+++ b/报销程序（改版）.exe