ZJUEVA-Reimburse/main.py

102 lines
3.6 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

from docx import Document
from docx.shared import Inches
from copy import copy
from PIL import Image
import fitz
from docx2pdf import convert
import os
def convert_img(path = os.path.abspath('.')):
while True:
print("正在扫描并处理文件中...")
files = os.listdir('.')
for file in files:
if file.endswith('.pdf') and "output" not in file:
result = pdf2img(path, file)
if result: # 1 -> Error
print("PDF 转换失败!")
continue
elif file.endswith('.jpg') or file.endswith('.jpeg'):
img = Image.open(file)
img.save(file.split('.')[0] + ".png", "PNG")
print("JPG/JPEG 转换失败!")
print("转换完毕!")
break
def gen_filelist(path = os.path.abspath('.')):
types = "abc"
print("创建文件列表中...")
file_list = os.listdir('.')
file_list_cp = copy(file_list) # 创建浅拷贝
for filename in file_list_cp:
if ".png" not in filename:
file_list.remove(filename)
file_list.sort()
page_curr = 1
type_curr = 0
for filename in file_list:
if int(filename[0:2]) != page_curr or filename[2] != types[type_curr % 3]:
return []
else:
type_curr += 1
if type_curr % 3 == 0:
page_curr += 1
print("创建完毕!将要加入文档的文件如下:")
for file in file_list:
print(file)
return file_list
def gen_docx():
while True:
doc = Document()
file_list = gen_filelist()
input("按回车键确认...")
print("生成 .docx 文档中...")
if len(file_list) != 0:
pic_count = 0
for filename in file_list:
pic_count += 1
if pic_count % 3 == 1:
doc.add_picture(filename, height = Inches(2.5))
table = doc.add_table(rows = 1, cols = 2)
elif pic_count % 3 == 2:
cell1 = table.cell(0, 0)
cell1.paragraphs[0].add_run().add_picture(filename, height = Inches(5.0))
elif pic_count % 3 == 0:
cell2 = table.cell(0, 1)
cell2.paragraphs[0].add_run().add_picture(filename, height = Inches(5.0))
if pic_count != len(file_list):
doc.add_page_break()
else:
print("请检查文件命名是否正确!")
doc.save("output.docx")
print("生成完毕!")
break
def pdf2img(pdf_path, pdf_name, zoom_x = 3, zoom_y = 3):
doc = fitz.open(pdf_path + "\\" + pdf_name) # 打开文档
if len(doc) != 1:
print("PDF 文件只能包含一页!")
return 1
for page in doc: # 遍历页面
pix = page.get_pixmap(matrix=fitz.Matrix(zoom_x, zoom_y)) # 将页面渲染为图片
pix.save(pdf_name[:-4] + '.png') # 将图像存储为PNG格式
doc.close() # 关闭文档
if __name__ == "__main__":
print("在使用该脚本前,请将发票、付款记录、购买记录按以下规则命名:")
print("在文件名前添加[两位数字][类型]")
print("数字代表的是第几份材料类型发票a付款记录b购买记录c")
print("02b小公仔付款.png 代表第二份材料中的付款记录")
input("按回车键开始...")
convert_img()
gen_docx()
print("生成 .pdf 文件中...")
convert("output.docx", "output.pdf")
print("生成完毕!")