ZJUEVA-Reimburse/main.py

185 lines
7.3 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

from docx import Document
from docx.shared import Inches
from PIL import Image
from copy import copy
import fitz
from docx2pdf import convert
from sys import exit
import os
def done(exit_code = 0):
print("正在移除多余生成文件...")
for gen in gened_list:
os.remove(gen)
print("移除完毕!")
input("按回车键退出...")
exit(exit_code)
def convert_all(path = os.path.abspath('.')):
print("正在扫描并处理文件中...")
items = os.listdir(path)
for item in items:
if os.path.isdir(item):
result = convert_img(item, path + os.sep + item)
if result == 1:
print("PDF 转换失败!")
done(1)
elif result == 2:
print("JPG/JPEG 转换失败!")
done(1)
print("转换完毕!")
"""传入绝对路径"""
def convert_img(dir_name, path = os.path.abspath('.')):
files = os.listdir(path)
pdf_list = []
for file in files:
file = file.lower() # 排除 .PNG 等带来的问题
if file.endswith('.pdf'):
result = pdf2img(path, file)
if result: # 1 -> Error
return 1
pdf_list.append(file)
elif file.endswith('.jpg') or file.endswith('.jpeg'):
img = Image.open(path + os.sep + file)
dot_index = file.rfind('.')
img_path = path + os.sep + file[:dot_index] + ".png"
img.save(img_path, "PNG")
gened_list.append(img_path)
if len(pdf_list) > 1:
fp_name = ["发票","dzfp"]
finish = False
for i in fp_name:
for j in range(len(pdf_list)):
if i in pdf_list[j]:
invoice_path = path + os.sep + "pdf" + pdf_list[j][:-4] + ".png"
invoice_renamed = path + os.sep + 'Pdf' + pdf_list[j][:-4] + ".png"
os.rename(invoice_path, invoice_renamed) # 将 "pdf" 标签变为 "Pdf"
gened_list.remove(invoice_path)
gened_list.append(invoice_renamed)
finish = True
break
if finish == True:
break
if finish == False:
print(f"注意到文件夹 {dir_name} 中有多个 .pdf 文件:")
for i in range(len(pdf_list)): # 打印 pdf 文件和选项
print(f"{i + 1} - {pdf_list[i]}") # 索引从 0 改为 1
while True:
invoice_index = input("请选择发票文件(输入选项前的阿拉伯数字):")
if not invoice_index.isdigit():
print("请输入数字!")
continue
else:
invoice_int = int(invoice_index) - 1 # 索引从 1 改为 0
if invoice_int < len(pdf_list) and invoice_int >= 0:
invoice_path = path + os.sep + "pdf" + pdf_list[invoice_int][:-4] + ".png"
invoice_renamed = path + os.sep + 'Pdf' + pdf_list[invoice_int][:-4] + ".png"
os.rename(invoice_path, invoice_renamed) # 将 "pdf" 标签变为 "Pdf"
gened_list.remove(invoice_path)
gened_list.append(invoice_renamed)
break
else:
print("请选择正确的序号!")
def gen_filelist(path = os.path.abspath('.')):
print("创建文件夹列表中...")
dir_list = []
item_list = os.listdir('.')
for item in item_list:
if os.path.isdir(item):
dir_list.append(item)
file_list = {}
for dir in dir_list:
file_list[dir] = os.listdir(path + os.sep + dir)
for dir, dir_file in file_list.items():
dir_file_copy = copy(dir_file)
for file in dir_file_copy:
if ".png" not in file.lower():
dir_file.remove(file)
if len(dir_file) != 3 or len(dir_file) != 2:
print(f"{dir}文件夹发现错误png文件个数不符")
for file in dir_file:
print('- ' + file)
print("创建完毕!将要加入文档的文件如下:")
for dir, dir_file in file_list.items():
print('- ' + dir)
print(' ', end = '')
for file in dir_file:
print(file, end = ", ")
print()
print(f"共有 {len(list(file_list.items()))} 组文件")
return file_list
def gen_docx(path = os.path.abspath('.')):
doc = Document()
file_list = gen_filelist()
input("按回车键确认...")
print("生成 .docx 文档中...")
if len(file_list) != 0:
for dir, dir_file in file_list.items():
parent_path = path + os.sep + dir + os.sep
for file in dir_file:
if file[0:3] == "pdf":
dir_file.remove(file)
dir_file.insert(0, file)
for file in dir_file:
if file[0:3] == "Pdf":
dir_file.remove(file)
dir_file.insert(0, file) # 将发票文件放在第一个位置
doc.add_picture(parent_path + dir_file[0], height = Inches(4.2))
table = doc.add_table(rows = 1, cols = 2)
cell1 = table.cell(0, 0)
if "行程单" in dir_file[1]:
cell1.paragraphs[0].add_run().add_picture(parent_path + dir_file[1], height = Inches(4.0))
else:
cell1.paragraphs[0].add_run().add_picture(parent_path + dir_file[1], height = Inches(4.0))
cell2 = table.cell(0, 1)
if len(dir_file) == 3:
cell2.paragraphs[0].add_run().add_picture(parent_path + dir_file[2], height = Inches(4.0))
if (dir, dir_file) != list(file_list.items())[-1]: # dict.items() 返回值需先转换为列表,才能索引
doc.add_page_break()
else:
print("请检查文件结构是否正确!")
done(1)
doc.save("output.docx")
print("生成完毕!")
def pdf2img(pdf_path, pdf_name, zoom_x = 7, zoom_y = 7):
doc = fitz.open(pdf_path + os.sep + pdf_name) # 打开文档
if len(doc) != 1:
print(pdf_path + os.sep + pdf_name + "出错")
print("PDF 文件只能包含一页!")
return 1
for page in doc: # 遍历页面
pix = page.get_pixmap(matrix=fitz.Matrix(zoom_x, zoom_y)) # 将页面渲染为图片
png_path = pdf_path + os.sep + "pdf" + pdf_name[:-4] + ".png"
gened_list.append(png_path)
pix.save(png_path) # 将图像存储为PNG格式
doc.close() # 关闭文档
if "行程" in pdf_name:
img = Image.open(pdf_path + os.sep + "pdf" + pdf_name[:-4] + ".png")
if img.height > img.width:
img = img.rotate(270,expand=True)
img.save(pdf_path + os.sep + "pdf" + pdf_name[:-4] + ".png")
if __name__ == "__main__":
gened_list = [] # 新创建的文件列表,在出错时或者生成完毕时进行删除
print("在使用该脚本前,请保证程序所在的文件夹中仅包含程序、发票文件夹")
print("并且确保每个发票文件夹内只有三个文件")
input("按回车键开始...")
convert_all()
gen_docx()
print("生成 .pdf 文件中...")
convert("output.docx", "output.pdf")
print("生成完毕!")
done(0)