From 35993e2270bccc39d49bb07507206e3b66f30644 Mon Sep 17 00:00:00 2001 From: Dawn_Ocean <1785590531@qq.com> Date: Wed, 18 Oct 2023 16:39:47 +0800 Subject: [PATCH] first commit --- README.md | Bin 0 -> 624 bytes main.py | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+) create mode 100644 README.md create mode 100644 main.py diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..647063ac9d2f7227f72e8d3deae300cc26c6d274 GIT binary patch literal 624 zcmY#pP+*8+@L~vMaAgQ%a0K#{dc`7_CNWHvYSc+z8RVVB%fQ6|!pcCXz%W1PZv#w( zEhaeLH>f@Jo1bN*uHRF?1cr;j8I`&WH~j3fH*|-_>oVx}7Wjc!2@F<#41P!ITFN;6 z6ypDdxfj``YJ?{+=rN=)W4ZM_`8i+6(;-`9ljnbW|S94%C z8Ul4GFu-iBgu0E-uQi?}lC1>hdXP)C{2;zZm}msn3kws6B+hh@dd}qZLbjkIb+sY9 zAyWSZ3{?Dl{R>jVqXi8F4619o!$EXeV5EO*;7*_z!ZZb-I}{ks`57=6GBCuSj^s?P zn$aHzGNnK6doa&`o)A!&f@}lve*i;|=Re3issEsuNMzvo4~vT=AR87N$qW!1Af|Ww zH$_VQH()SgNMgtdyy6Ga0Sh$+23-a_pbK=Wd%&>=GY#Y?ge#4~e!kG}(*X%@Scvib Wx6H^(zL0EHc^fEqHYLWN$p8R7%CR8; literal 0 HcmV?d00001 diff --git a/main.py b/main.py new file mode 100644 index 0000000..7f17772 --- /dev/null +++ b/main.py @@ -0,0 +1,84 @@ +from docx import Document +from docx.shared import Inches, Pt + +from PIL import Image + +import fitz +import os + +types = "abc" + +def convert(path = os.path.abspath('.')): + while True: + for _, _, files in os.walk(path): + for filename in files: + if ".pdf" in filename: + result = pdf2img(path, filename) + if result: # 1 -> Error + continue + if ".jpg" in filename or ".jpeg" in filename: + img = Image.open(filename) + img.save(filename.split('.')[0] + ".png", "PNG") + break + +def gen_filelist(pages, path = os.path.abspath('.')): + types = "abc" + file_list = [] + for _, _, files in os.walk(path): + for filename in files: + if ".png" in filename: + file_list.append(filename) + file_list.sort() + page_curr = 1 + type_curr = 0 + for filename in file_list: + if int(filename[0:1]) != page_curr or filename[2] != types[type_curr % 3]: + return [] + else: + type_curr += 1 + if type_curr % 3 == 0: + page_curr += 1 + return file_list + +def gen_docx(): + while True: + doc = Document() + print() + pages = input("输入你要生成的材料页数(对应的 3 份图片为 1 页):") + if pages.isdigit(): + pages = int(pages) + else: + print("请输入一个数字!") + continue + file_list = gen_filelist(pages) + print(file_list) + if len(file_list) != 0: + pic_count = 0 + for filename in file_list: + doc.add_picture(filename, height = Inches(2.5)) + pic_count += 1 + if pic_count % 3 == 0 and pic_count != len(file_list): + doc.add_page_break() + else: + print("请检查文件命名是否正确!") + doc.save("output.docx") + break + +def pdf2img(pdf_path, pdf_name, zoom_x = 3, zoom_y = 3): + doc = fitz.open(pdf_path + "\\" + pdf_name) # 打开文档 + if len(doc) != 1: + print("PDF 文件只能包含一页!") + return 1 + for page in doc: # 遍历页面 + pix = page.get_pixmap(matrix=fitz.Matrix(zoom_x, zoom_y)) # 将页面渲染为图片 + pix.save(pdf_name[:-4] + '.png') # 将图像存储为PNG格式 + doc.close() # 关闭文档 + +if __name__ == "__main__": + print("在使用该脚本前,请将发票、付款记录、购买记录按以下规则命名:") + print("在文件名前添加[两位数字][类型]") + print("数字代表的是第几份材料,类型:发票a,付款记录b,购买记录c") + print("如:02b小公仔付款.png 代表第二份材料中的付款记录") + convert() + gen_docx() + \ No newline at end of file