ZJUEVA-Reimburse/main.py

from docx import Document
from docx.shared import Inches, Pt

from PIL import Image

import fitz
import os

types = "abc"

def convert(path = os.path.abspath('.')):
    while True:
        for _, _, files in os.walk(path):
            for filename in files:
                if ".pdf" in filename:
                    result = pdf2img(path, filename)
                    if result:                  # 1 -> Error
                        continue
                if ".jpg" in filename or ".jpeg" in filename:
                    img = Image.open(filename)
                    img.save(filename.split('.')[0] + ".png", "PNG")
        break

def gen_filelist(pages, path = os.path.abspath('.')):
    types = "abc"
    file_list = []
    for _, _, files in os.walk(path):
            for filename in files:
                if ".png" in filename:
                    file_list.append(filename)
    file_list.sort()
    page_curr = 1
    type_curr = 0
    for filename in file_list:
        if int(filename[0:2]) != page_curr or filename[2] != types[type_curr % 3]:
            return []
        else:
            type_curr += 1
            if type_curr % 3 == 0:
                page_curr += 1
    return file_list

def gen_docx():
    while True:
        doc = Document()
        print()
        pages = input("输入你要生成的材料页数（对应的 3 份图片为 1 页）：")
        if pages.isdigit():
            pages = int(pages)
        else:
            print("请输入一个数字！")
            continue
        file_list = gen_filelist(pages)
        print(file_list)
        if len(file_list) != 0:
            pic_count = 0
            for filename in file_list:
                pic_count += 1
                if pic_count % 3 == 1:
                    doc.add_picture(filename, height = Inches(2.5))
                    table = doc.add_table(rows = 1, cols = 2)
                elif pic_count % 3 == 2:
                    cell1 = table.cell(0, 0)
                    cell1.paragraphs[0].add_run().add_picture(filename, height = Inches(5.0))
                elif pic_count % 3 == 0:
                    cell2 = table.cell(0, 1)
                    cell2.paragraphs[0].add_run().add_picture(filename, height = Inches(5.0))
                    if pic_count != len(file_list):
                        doc.add_page_break()
        else:
            print("请检查文件命名是否正确！")
        doc.save("output.docx")
        break

def pdf2img(pdf_path, pdf_name, zoom_x = 3, zoom_y = 3):
    doc = fitz.open(pdf_path + "\\" + pdf_name)  # 打开文档
    if len(doc) != 1:
        print("PDF 文件只能包含一页！")
        return 1
    for page in doc:  # 遍历页面
        pix = page.get_pixmap(matrix=fitz.Matrix(zoom_x, zoom_y))  # 将页面渲染为图片
        pix.save(pdf_name[:-4] + '.png')  # 将图像存储为PNG格式
    doc.close()  # 关闭文档

if __name__ == "__main__":
    print("在使用该脚本前，请将发票、付款记录、购买记录按以下规则命名：")
    print("在文件名前添加[两位数字][类型]")
    print("数字代表的是第几份材料，类型：发票a，付款记录b，购买记录c")
    print("如：02b小公仔付款.png 代表第二份材料中的付款记录")
    convert()
    gen_docx()