ZJUEVA-Reimburse/main.py

from docx import Document
from docx.shared import Inches

from copy import copy

from PIL import Image

import fitz

from docx2pdf import convert

import os

def convert_img(path = os.path.abspath('.')):
    while True:
        print("正在扫描并处理文件中...")
        files = os.listdir('.')
        for file in files:
            if file.endswith('.pdf') and "output" not in file:
                result = pdf2img(path, file)
                if result:                  # 1 -> Error
                    print("PDF 转换失败！")
                    continue
            elif file.endswith('.jpg') or file.endswith('.jpeg'):
                img = Image.open(file)
                img.save(file.split('.')[0] + ".png", "PNG")
                print("JPG/JPEG 转换失败！")
        print("转换完毕！")
        break

def gen_filelist(path = os.path.abspath('.')):
    types = "abc"
    print("创建文件列表中...")
    file_list = os.listdir('.')
    file_list_cp = copy(file_list)              # 创建浅拷贝
    for filename in file_list_cp:
        if ".png" not in filename:
            file_list.remove(filename)
    file_list.sort()
    page_curr = 1
    type_curr = 0
    for filename in file_list:
        if int(filename[0:2]) != page_curr or filename[2] != types[type_curr % 3]:
            return []
        else:
            type_curr += 1
            if type_curr % 3 == 0:
                page_curr += 1
    print("创建完毕！将要加入文档的文件如下：")
    for file in file_list:
        print(file)
    return file_list

def gen_docx():
    while True:
        doc = Document()
        file_list = gen_filelist()
        input("按回车键确认...")
        print("生成 .docx 文档中...")
        if len(file_list) != 0:
            pic_count = 0
            for filename in file_list:
                pic_count += 1
                if pic_count % 3 == 1:
                    doc.add_picture(filename, height = Inches(2.5))
                    table = doc.add_table(rows = 1, cols = 2)
                elif pic_count % 3 == 2:
                    cell1 = table.cell(0, 0)
                    cell1.paragraphs[0].add_run().add_picture(filename, height = Inches(5.0))
                elif pic_count % 3 == 0:
                    cell2 = table.cell(0, 1)
                    cell2.paragraphs[0].add_run().add_picture(filename, height = Inches(5.0))
                    if pic_count != len(file_list):
                        doc.add_page_break()
        else:
            print("请检查文件命名是否正确！")
        doc.save("output.docx")
        print("生成完毕！")
        break

def pdf2img(pdf_path, pdf_name, zoom_x = 3, zoom_y = 3):
    doc = fitz.open(pdf_path + "\\" + pdf_name)  # 打开文档
    if len(doc) != 1:
        print("PDF 文件只能包含一页！")
        return 1
    for page in doc:  # 遍历页面
        pix = page.get_pixmap(matrix=fitz.Matrix(zoom_x, zoom_y))  # 将页面渲染为图片
        pix.save(pdf_name[:-4] + '.png')  # 将图像存储为PNG格式
    doc.close()  # 关闭文档

if __name__ == "__main__":
    print("在使用该脚本前，请将发票、付款记录、购买记录按以下规则命名：")
    print("在文件名前添加[两位数字][类型]")
    print("数字代表的是第几份材料，类型：发票a，付款记录b，购买记录c")
    print("如：02b小公仔付款.png 代表第二份材料中的付款记录")
    input("按回车键开始...")
    convert_img()
    gen_docx()
    print("生成 .pdf 文件中...")
    convert("output.docx", "output.pdf")
    print("生成完毕！")