commit 35993e2270bccc39d49bb07507206e3b66f30644 Author: Dawn_Ocean <1785590531@qq.com> Date: Wed Oct 18 16:39:47 2023 +0800 first commit diff --git a/README.md b/README.md new file mode 100644 index 0000000..647063a Binary files /dev/null and b/README.md differ diff --git a/main.py b/main.py new file mode 100644 index 0000000..7f17772 --- /dev/null +++ b/main.py @@ -0,0 +1,84 @@ +from docx import Document +from docx.shared import Inches, Pt + +from PIL import Image + +import fitz +import os + +types = "abc" + +def convert(path = os.path.abspath('.')): + while True: + for _, _, files in os.walk(path): + for filename in files: + if ".pdf" in filename: + result = pdf2img(path, filename) + if result: # 1 -> Error + continue + if ".jpg" in filename or ".jpeg" in filename: + img = Image.open(filename) + img.save(filename.split('.')[0] + ".png", "PNG") + break + +def gen_filelist(pages, path = os.path.abspath('.')): + types = "abc" + file_list = [] + for _, _, files in os.walk(path): + for filename in files: + if ".png" in filename: + file_list.append(filename) + file_list.sort() + page_curr = 1 + type_curr = 0 + for filename in file_list: + if int(filename[0:1]) != page_curr or filename[2] != types[type_curr % 3]: + return [] + else: + type_curr += 1 + if type_curr % 3 == 0: + page_curr += 1 + return file_list + +def gen_docx(): + while True: + doc = Document() + print() + pages = input("输入你要生成的材料页数(对应的 3 份图片为 1 页):") + if pages.isdigit(): + pages = int(pages) + else: + print("请输入一个数字!") + continue + file_list = gen_filelist(pages) + print(file_list) + if len(file_list) != 0: + pic_count = 0 + for filename in file_list: + doc.add_picture(filename, height = Inches(2.5)) + pic_count += 1 + if pic_count % 3 == 0 and pic_count != len(file_list): + doc.add_page_break() + else: + print("请检查文件命名是否正确!") + doc.save("output.docx") + break + +def pdf2img(pdf_path, pdf_name, zoom_x = 3, zoom_y = 3): + doc = fitz.open(pdf_path + "\\" + pdf_name) # 打开文档 + if len(doc) != 1: + print("PDF 文件只能包含一页!") + return 1 + for page in doc: # 遍历页面 + pix = page.get_pixmap(matrix=fitz.Matrix(zoom_x, zoom_y)) # 将页面渲染为图片 + pix.save(pdf_name[:-4] + '.png') # 将图像存储为PNG格式 + doc.close() # 关闭文档 + +if __name__ == "__main__": + print("在使用该脚本前,请将发票、付款记录、购买记录按以下规则命名:") + print("在文件名前添加[两位数字][类型]") + print("数字代表的是第几份材料,类型:发票a,付款记录b,购买记录c") + print("如:02b小公仔付款.png 代表第二份材料中的付款记录") + convert() + gen_docx() + \ No newline at end of file