修复 pdf 转换 bug;简化代码

main
Dawn_Ocean 2023-10-20 15:13:40 +08:00
parent 87306a18ee
commit 134ae9a6f6
1 changed files with 37 additions and 40 deletions

73
main.py
View File

@ -1,36 +1,41 @@
from docx import Document
from docx.shared import Inches, Pt
from docx.shared import Inches
from copy import copy
from PIL import Image
import fitz
import pdfkit
from docx2pdf import convert
import os
types = "abc"
def convert(path = os.path.abspath('.')):
def convert_img(path = os.path.abspath('.')):
while True:
for _, _, files in os.walk(path):
for filename in files:
if ".pdf" in filename:
result = pdf2img(path, filename)
print("正在扫描并处理文件中...")
files = os.listdir('.')
for file in files:
if file.endswith('.pdf') and "output" not in file:
result = pdf2img(path, file)
if result: # 1 -> Error
print("PDF 转换失败!")
continue
if ".jpg" in filename or ".jpeg" in filename:
img = Image.open(filename)
img.save(filename.split('.')[0] + ".png", "PNG")
elif file.endswith('.jpg') or file.endswith('.jpeg'):
img = Image.open(file)
img.save(file.split('.')[0] + ".png", "PNG")
print("JPG/JPEG 转换失败!")
print("转换完毕!")
break
def gen_filelist(pages, path = os.path.abspath('.')):
def gen_filelist(path = os.path.abspath('.')):
types = "abc"
file_list = []
for _, _, files in os.walk(path):
for filename in files:
if ".png" in filename:
file_list.append(filename)
print("创建文件列表中...")
file_list = os.listdir('.')
file_list_cp = copy(file_list) # 创建浅拷贝
for filename in file_list_cp:
if ".png" not in filename:
file_list.remove(filename)
file_list.sort()
page_curr = 1
type_curr = 0
@ -41,20 +46,17 @@ def gen_filelist(pages, path = os.path.abspath('.')):
type_curr += 1
if type_curr % 3 == 0:
page_curr += 1
print("创建完毕!将要加入文档的文件如下:")
for file in file_list:
print(file)
return file_list
def gen_docx():
while True:
doc = Document()
print()
pages = input("输入你要生成的材料页数(对应的 3 份图片为 1 页):")
if pages.isdigit():
pages = int(pages)
else:
print("请输入一个数字!")
continue
file_list = gen_filelist(pages)
print(file_list)
file_list = gen_filelist()
input("按回车键确认...")
print("生成 .docx 文档中...")
if len(file_list) != 0:
pic_count = 0
for filename in file_list:
@ -73,6 +75,7 @@ def gen_docx():
else:
print("请检查文件命名是否正确!")
doc.save("output.docx")
print("生成完毕!")
break
def pdf2img(pdf_path, pdf_name, zoom_x = 3, zoom_y = 3):
@ -85,21 +88,15 @@ def pdf2img(pdf_path, pdf_name, zoom_x = 3, zoom_y = 3):
pix.save(pdf_name[:-4] + '.png') # 将图像存储为PNG格式
doc.close() # 关闭文档
def docx2pdf(doc_file = "output.docx", html_file = "output.html"):
doc = Document(doc_file)
full_text = ""
for para in doc.paragraphs:
full_text += para.text + "\n"
with open(html_file, "w", encoding="utf-8") as file:
file.write(full_text)
pdfkit.from_file(html_file, "output.pdf")
if __name__ == "__main__":
print("在使用该脚本前,请将发票、付款记录、购买记录按以下规则命名:")
print("在文件名前添加[两位数字][类型]")
print("数字代表的是第几份材料类型发票a付款记录b购买记录c")
print("02b小公仔付款.png 代表第二份材料中的付款记录")
convert()
input("按回车键开始...")
convert_img()
gen_docx()
docx2pdf()
print("生成 .pdf 文件中...")
convert("output.docx", "output.pdf")
print("生成完毕!")