修复 pdf 转换 bug;简化代码

main
Dawn_Ocean 2023-10-20 15:13:40 +08:00
parent 87306a18ee
commit 134ae9a6f6
1 changed files with 37 additions and 40 deletions

77
main.py
View File

@ -1,36 +1,41 @@
from docx import Document from docx import Document
from docx.shared import Inches, Pt from docx.shared import Inches
from copy import copy
from PIL import Image from PIL import Image
import fitz import fitz
import pdfkit from docx2pdf import convert
import os import os
types = "abc" def convert_img(path = os.path.abspath('.')):
def convert(path = os.path.abspath('.')):
while True: while True:
for _, _, files in os.walk(path): print("正在扫描并处理文件中...")
for filename in files: files = os.listdir('.')
if ".pdf" in filename: for file in files:
result = pdf2img(path, filename) if file.endswith('.pdf') and "output" not in file:
if result: # 1 -> Error result = pdf2img(path, file)
continue if result: # 1 -> Error
if ".jpg" in filename or ".jpeg" in filename: print("PDF 转换失败!")
img = Image.open(filename) continue
img.save(filename.split('.')[0] + ".png", "PNG") elif file.endswith('.jpg') or file.endswith('.jpeg'):
img = Image.open(file)
img.save(file.split('.')[0] + ".png", "PNG")
print("JPG/JPEG 转换失败!")
print("转换完毕!")
break break
def gen_filelist(pages, path = os.path.abspath('.')): def gen_filelist(path = os.path.abspath('.')):
types = "abc" types = "abc"
file_list = [] print("创建文件列表中...")
for _, _, files in os.walk(path): file_list = os.listdir('.')
for filename in files: file_list_cp = copy(file_list) # 创建浅拷贝
if ".png" in filename: for filename in file_list_cp:
file_list.append(filename) if ".png" not in filename:
file_list.remove(filename)
file_list.sort() file_list.sort()
page_curr = 1 page_curr = 1
type_curr = 0 type_curr = 0
@ -41,20 +46,17 @@ def gen_filelist(pages, path = os.path.abspath('.')):
type_curr += 1 type_curr += 1
if type_curr % 3 == 0: if type_curr % 3 == 0:
page_curr += 1 page_curr += 1
print("创建完毕!将要加入文档的文件如下:")
for file in file_list:
print(file)
return file_list return file_list
def gen_docx(): def gen_docx():
while True: while True:
doc = Document() doc = Document()
print() file_list = gen_filelist()
pages = input("输入你要生成的材料页数(对应的 3 份图片为 1 页):") input("按回车键确认...")
if pages.isdigit(): print("生成 .docx 文档中...")
pages = int(pages)
else:
print("请输入一个数字!")
continue
file_list = gen_filelist(pages)
print(file_list)
if len(file_list) != 0: if len(file_list) != 0:
pic_count = 0 pic_count = 0
for filename in file_list: for filename in file_list:
@ -73,6 +75,7 @@ def gen_docx():
else: else:
print("请检查文件命名是否正确!") print("请检查文件命名是否正确!")
doc.save("output.docx") doc.save("output.docx")
print("生成完毕!")
break break
def pdf2img(pdf_path, pdf_name, zoom_x = 3, zoom_y = 3): def pdf2img(pdf_path, pdf_name, zoom_x = 3, zoom_y = 3):
@ -85,21 +88,15 @@ def pdf2img(pdf_path, pdf_name, zoom_x = 3, zoom_y = 3):
pix.save(pdf_name[:-4] + '.png') # 将图像存储为PNG格式 pix.save(pdf_name[:-4] + '.png') # 将图像存储为PNG格式
doc.close() # 关闭文档 doc.close() # 关闭文档
def docx2pdf(doc_file = "output.docx", html_file = "output.html"):
doc = Document(doc_file)
full_text = ""
for para in doc.paragraphs:
full_text += para.text + "\n"
with open(html_file, "w", encoding="utf-8") as file:
file.write(full_text)
pdfkit.from_file(html_file, "output.pdf")
if __name__ == "__main__": if __name__ == "__main__":
print("在使用该脚本前,请将发票、付款记录、购买记录按以下规则命名:") print("在使用该脚本前,请将发票、付款记录、购买记录按以下规则命名:")
print("在文件名前添加[两位数字][类型]") print("在文件名前添加[两位数字][类型]")
print("数字代表的是第几份材料类型发票a付款记录b购买记录c") print("数字代表的是第几份材料类型发票a付款记录b购买记录c")
print("02b小公仔付款.png 代表第二份材料中的付款记录") print("02b小公仔付款.png 代表第二份材料中的付款记录")
convert() input("按回车键开始...")
convert_img()
gen_docx() gen_docx()
docx2pdf() print("生成 .pdf 文件中...")
convert("output.docx", "output.pdf")
print("生成完毕!")