forked from Dawn_Ocean/ZJUEVA-Reimburse
修复 pdf 转换 bug;简化代码
parent
87306a18ee
commit
134ae9a6f6
73
main.py
73
main.py
|
|
@ -1,36 +1,41 @@
|
||||||
from docx import Document
|
from docx import Document
|
||||||
from docx.shared import Inches, Pt
|
from docx.shared import Inches
|
||||||
|
|
||||||
|
from copy import copy
|
||||||
|
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
import fitz
|
import fitz
|
||||||
|
|
||||||
import pdfkit
|
from docx2pdf import convert
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
types = "abc"
|
def convert_img(path = os.path.abspath('.')):
|
||||||
|
|
||||||
def convert(path = os.path.abspath('.')):
|
|
||||||
while True:
|
while True:
|
||||||
for _, _, files in os.walk(path):
|
print("正在扫描并处理文件中...")
|
||||||
for filename in files:
|
files = os.listdir('.')
|
||||||
if ".pdf" in filename:
|
for file in files:
|
||||||
result = pdf2img(path, filename)
|
if file.endswith('.pdf') and "output" not in file:
|
||||||
|
result = pdf2img(path, file)
|
||||||
if result: # 1 -> Error
|
if result: # 1 -> Error
|
||||||
|
print("PDF 转换失败!")
|
||||||
continue
|
continue
|
||||||
if ".jpg" in filename or ".jpeg" in filename:
|
elif file.endswith('.jpg') or file.endswith('.jpeg'):
|
||||||
img = Image.open(filename)
|
img = Image.open(file)
|
||||||
img.save(filename.split('.')[0] + ".png", "PNG")
|
img.save(file.split('.')[0] + ".png", "PNG")
|
||||||
|
print("JPG/JPEG 转换失败!")
|
||||||
|
print("转换完毕!")
|
||||||
break
|
break
|
||||||
|
|
||||||
def gen_filelist(pages, path = os.path.abspath('.')):
|
def gen_filelist(path = os.path.abspath('.')):
|
||||||
types = "abc"
|
types = "abc"
|
||||||
file_list = []
|
print("创建文件列表中...")
|
||||||
for _, _, files in os.walk(path):
|
file_list = os.listdir('.')
|
||||||
for filename in files:
|
file_list_cp = copy(file_list) # 创建浅拷贝
|
||||||
if ".png" in filename:
|
for filename in file_list_cp:
|
||||||
file_list.append(filename)
|
if ".png" not in filename:
|
||||||
|
file_list.remove(filename)
|
||||||
file_list.sort()
|
file_list.sort()
|
||||||
page_curr = 1
|
page_curr = 1
|
||||||
type_curr = 0
|
type_curr = 0
|
||||||
|
|
@ -41,20 +46,17 @@ def gen_filelist(pages, path = os.path.abspath('.')):
|
||||||
type_curr += 1
|
type_curr += 1
|
||||||
if type_curr % 3 == 0:
|
if type_curr % 3 == 0:
|
||||||
page_curr += 1
|
page_curr += 1
|
||||||
|
print("创建完毕!将要加入文档的文件如下:")
|
||||||
|
for file in file_list:
|
||||||
|
print(file)
|
||||||
return file_list
|
return file_list
|
||||||
|
|
||||||
def gen_docx():
|
def gen_docx():
|
||||||
while True:
|
while True:
|
||||||
doc = Document()
|
doc = Document()
|
||||||
print()
|
file_list = gen_filelist()
|
||||||
pages = input("输入你要生成的材料页数(对应的 3 份图片为 1 页):")
|
input("按回车键确认...")
|
||||||
if pages.isdigit():
|
print("生成 .docx 文档中...")
|
||||||
pages = int(pages)
|
|
||||||
else:
|
|
||||||
print("请输入一个数字!")
|
|
||||||
continue
|
|
||||||
file_list = gen_filelist(pages)
|
|
||||||
print(file_list)
|
|
||||||
if len(file_list) != 0:
|
if len(file_list) != 0:
|
||||||
pic_count = 0
|
pic_count = 0
|
||||||
for filename in file_list:
|
for filename in file_list:
|
||||||
|
|
@ -73,6 +75,7 @@ def gen_docx():
|
||||||
else:
|
else:
|
||||||
print("请检查文件命名是否正确!")
|
print("请检查文件命名是否正确!")
|
||||||
doc.save("output.docx")
|
doc.save("output.docx")
|
||||||
|
print("生成完毕!")
|
||||||
break
|
break
|
||||||
|
|
||||||
def pdf2img(pdf_path, pdf_name, zoom_x = 3, zoom_y = 3):
|
def pdf2img(pdf_path, pdf_name, zoom_x = 3, zoom_y = 3):
|
||||||
|
|
@ -85,21 +88,15 @@ def pdf2img(pdf_path, pdf_name, zoom_x = 3, zoom_y = 3):
|
||||||
pix.save(pdf_name[:-4] + '.png') # 将图像存储为PNG格式
|
pix.save(pdf_name[:-4] + '.png') # 将图像存储为PNG格式
|
||||||
doc.close() # 关闭文档
|
doc.close() # 关闭文档
|
||||||
|
|
||||||
def docx2pdf(doc_file = "output.docx", html_file = "output.html"):
|
|
||||||
doc = Document(doc_file)
|
|
||||||
full_text = ""
|
|
||||||
for para in doc.paragraphs:
|
|
||||||
full_text += para.text + "\n"
|
|
||||||
with open(html_file, "w", encoding="utf-8") as file:
|
|
||||||
file.write(full_text)
|
|
||||||
pdfkit.from_file(html_file, "output.pdf")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
print("在使用该脚本前,请将发票、付款记录、购买记录按以下规则命名:")
|
print("在使用该脚本前,请将发票、付款记录、购买记录按以下规则命名:")
|
||||||
print("在文件名前添加[两位数字][类型]")
|
print("在文件名前添加[两位数字][类型]")
|
||||||
print("数字代表的是第几份材料,类型:发票a,付款记录b,购买记录c")
|
print("数字代表的是第几份材料,类型:发票a,付款记录b,购买记录c")
|
||||||
print("如:02b小公仔付款.png 代表第二份材料中的付款记录")
|
print("如:02b小公仔付款.png 代表第二份材料中的付款记录")
|
||||||
convert()
|
input("按回车键开始...")
|
||||||
|
convert_img()
|
||||||
gen_docx()
|
gen_docx()
|
||||||
docx2pdf()
|
print("生成 .pdf 文件中...")
|
||||||
|
convert("output.docx", "output.pdf")
|
||||||
|
print("生成完毕!")
|
||||||
|
|
||||||
Loading…
Reference in New Issue