forked from Dawn_Ocean/ZJUEVA-Reimburse
修复 pdf 转换 bug;简化代码
parent
87306a18ee
commit
134ae9a6f6
77
main.py
77
main.py
|
|
@ -1,36 +1,41 @@
|
|||
from docx import Document
|
||||
from docx.shared import Inches, Pt
|
||||
from docx.shared import Inches
|
||||
|
||||
from copy import copy
|
||||
|
||||
from PIL import Image
|
||||
|
||||
import fitz
|
||||
|
||||
import pdfkit
|
||||
from docx2pdf import convert
|
||||
|
||||
import os
|
||||
|
||||
types = "abc"
|
||||
|
||||
def convert(path = os.path.abspath('.')):
|
||||
def convert_img(path = os.path.abspath('.')):
|
||||
while True:
|
||||
for _, _, files in os.walk(path):
|
||||
for filename in files:
|
||||
if ".pdf" in filename:
|
||||
result = pdf2img(path, filename)
|
||||
if result: # 1 -> Error
|
||||
continue
|
||||
if ".jpg" in filename or ".jpeg" in filename:
|
||||
img = Image.open(filename)
|
||||
img.save(filename.split('.')[0] + ".png", "PNG")
|
||||
print("正在扫描并处理文件中...")
|
||||
files = os.listdir('.')
|
||||
for file in files:
|
||||
if file.endswith('.pdf') and "output" not in file:
|
||||
result = pdf2img(path, file)
|
||||
if result: # 1 -> Error
|
||||
print("PDF 转换失败!")
|
||||
continue
|
||||
elif file.endswith('.jpg') or file.endswith('.jpeg'):
|
||||
img = Image.open(file)
|
||||
img.save(file.split('.')[0] + ".png", "PNG")
|
||||
print("JPG/JPEG 转换失败!")
|
||||
print("转换完毕!")
|
||||
break
|
||||
|
||||
def gen_filelist(pages, path = os.path.abspath('.')):
|
||||
def gen_filelist(path = os.path.abspath('.')):
|
||||
types = "abc"
|
||||
file_list = []
|
||||
for _, _, files in os.walk(path):
|
||||
for filename in files:
|
||||
if ".png" in filename:
|
||||
file_list.append(filename)
|
||||
print("创建文件列表中...")
|
||||
file_list = os.listdir('.')
|
||||
file_list_cp = copy(file_list) # 创建浅拷贝
|
||||
for filename in file_list_cp:
|
||||
if ".png" not in filename:
|
||||
file_list.remove(filename)
|
||||
file_list.sort()
|
||||
page_curr = 1
|
||||
type_curr = 0
|
||||
|
|
@ -41,20 +46,17 @@ def gen_filelist(pages, path = os.path.abspath('.')):
|
|||
type_curr += 1
|
||||
if type_curr % 3 == 0:
|
||||
page_curr += 1
|
||||
print("创建完毕!将要加入文档的文件如下:")
|
||||
for file in file_list:
|
||||
print(file)
|
||||
return file_list
|
||||
|
||||
def gen_docx():
|
||||
while True:
|
||||
doc = Document()
|
||||
print()
|
||||
pages = input("输入你要生成的材料页数(对应的 3 份图片为 1 页):")
|
||||
if pages.isdigit():
|
||||
pages = int(pages)
|
||||
else:
|
||||
print("请输入一个数字!")
|
||||
continue
|
||||
file_list = gen_filelist(pages)
|
||||
print(file_list)
|
||||
file_list = gen_filelist()
|
||||
input("按回车键确认...")
|
||||
print("生成 .docx 文档中...")
|
||||
if len(file_list) != 0:
|
||||
pic_count = 0
|
||||
for filename in file_list:
|
||||
|
|
@ -73,6 +75,7 @@ def gen_docx():
|
|||
else:
|
||||
print("请检查文件命名是否正确!")
|
||||
doc.save("output.docx")
|
||||
print("生成完毕!")
|
||||
break
|
||||
|
||||
def pdf2img(pdf_path, pdf_name, zoom_x = 3, zoom_y = 3):
|
||||
|
|
@ -85,21 +88,15 @@ def pdf2img(pdf_path, pdf_name, zoom_x = 3, zoom_y = 3):
|
|||
pix.save(pdf_name[:-4] + '.png') # 将图像存储为PNG格式
|
||||
doc.close() # 关闭文档
|
||||
|
||||
def docx2pdf(doc_file = "output.docx", html_file = "output.html"):
|
||||
doc = Document(doc_file)
|
||||
full_text = ""
|
||||
for para in doc.paragraphs:
|
||||
full_text += para.text + "\n"
|
||||
with open(html_file, "w", encoding="utf-8") as file:
|
||||
file.write(full_text)
|
||||
pdfkit.from_file(html_file, "output.pdf")
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("在使用该脚本前,请将发票、付款记录、购买记录按以下规则命名:")
|
||||
print("在文件名前添加[两位数字][类型]")
|
||||
print("数字代表的是第几份材料,类型:发票a,付款记录b,购买记录c")
|
||||
print("如:02b小公仔付款.png 代表第二份材料中的付款记录")
|
||||
convert()
|
||||
input("按回车键开始...")
|
||||
convert_img()
|
||||
gen_docx()
|
||||
docx2pdf()
|
||||
print("生成 .pdf 文件中...")
|
||||
convert("output.docx", "output.pdf")
|
||||
print("生成完毕!")
|
||||
|
||||
Loading…
Reference in New Issue