forked from Dawn_Ocean/ZJUEVA-Reimburse
Compare commits
7 Commits
| Author | SHA1 | Date |
|---|---|---|
|
|
7291dc1dbb | |
|
|
245c56f335 | |
|
|
5d7f8c0772 | |
|
|
bd63d92c50 | |
|
|
d1b679a1db | |
|
|
6e3b1d7ddc | |
|
|
83f79474b3 |
14
README.md
14
README.md
|
|
@ -2,22 +2,18 @@
|
||||||
|
|
||||||
## 功能
|
## 功能
|
||||||
|
|
||||||
将当前文件夹中以`发票-付款记录-购买记录`为一组的三张图片放在`.docx`文件的同一页面。同时会生成相应的`.pdf`文件。
|
将当前文件夹中所有的子文件夹以`发票-付款记录-购买记录`为一组的三张图片放在`.docx`文件的同一页面。同时会生成相应的`.pdf`文件。
|
||||||
|
|
||||||
会将`.pdf`、`.jpg`、`.jpeg`文件自动转换为`.png`文件再插入。
|
会将`.pdf`、`.jpg`、`.jpeg`文件自动转换为`.png`文件再插入。
|
||||||
|
|
||||||
## 使用方法
|
## 使用方法
|
||||||
|
|
||||||
1. 将`.exe`文件与待处理图片放在同一个文件夹。
|
1. 将`.exe`文件与所有包含待处理图片的**子文件夹**放在同一个文件夹。
|
||||||
|
2. 运行`.exe`文件,根据提示进行操作。
|
||||||
|
|
||||||
2. 文件夹中所有图片按照分组命名:【两位数字】【类型】【其余内容】。
|
## 注意事项
|
||||||
|
|
||||||
从01开始按顺序命名序号,同组的文件序号相同,类型:发票a,付款记录b,购买记录c。
|
1. 图片文件不得以 "pdf" 三个字母开头。
|
||||||
例如:“02b公仔付款.png” -> 第二组的付款记录图片。
|
|
||||||
|
|
||||||
**一位数请在开头加上 0,如 02、06。**
|
|
||||||
|
|
||||||
3. 运行`.exe`文件,根据提示进行操作。
|
|
||||||
|
|
||||||
## 所需包(已打包在`.exe`中)
|
## 所需包(已打包在`.exe`中)
|
||||||
|
|
||||||
|
|
|
||||||
195
main.py
195
main.py
|
|
@ -1,82 +1,137 @@
|
||||||
from docx import Document
|
from docx import Document
|
||||||
from docx.shared import Inches
|
from docx.shared import Inches
|
||||||
|
|
||||||
from copy import copy
|
|
||||||
|
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
|
from copy import copy
|
||||||
|
|
||||||
import fitz
|
import fitz
|
||||||
|
|
||||||
from docx2pdf import convert
|
from docx2pdf import convert
|
||||||
|
|
||||||
|
from sys import exit
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
def convert_img(path = os.path.abspath('.')):
|
def done(exit_code = 0):
|
||||||
while True:
|
print("正在移除多余生成文件...")
|
||||||
print("正在扫描并处理文件中...")
|
for gen in gened_list:
|
||||||
files = os.listdir('.')
|
os.remove(gen)
|
||||||
for file in files:
|
print("移除完毕!")
|
||||||
if file.endswith('.pdf') and "output" not in file:
|
input("按回车键退出...")
|
||||||
result = pdf2img(path, file)
|
exit(exit_code)
|
||||||
if result: # 1 -> Error
|
|
||||||
print("PDF 转换失败!")
|
def convert_all(path = os.path.abspath('.')):
|
||||||
continue
|
print("正在扫描并处理文件中...")
|
||||||
elif file.endswith('.jpg') or file.endswith('.jpeg'):
|
items = os.listdir(path)
|
||||||
img = Image.open(file)
|
for item in items:
|
||||||
img.save(file.split('.')[0] + ".png", "PNG")
|
if os.path.isdir(item):
|
||||||
|
result = convert_img(item, path + '\\' + item)
|
||||||
|
if result == 1:
|
||||||
|
print("PDF 转换失败!")
|
||||||
|
done(1)
|
||||||
|
elif result == 2:
|
||||||
print("JPG/JPEG 转换失败!")
|
print("JPG/JPEG 转换失败!")
|
||||||
print("转换完毕!")
|
done(1)
|
||||||
break
|
print("转换完毕!")
|
||||||
|
|
||||||
|
"""传入绝对路径"""
|
||||||
|
def convert_img(dir_name, path = os.path.abspath('.')):
|
||||||
|
files = os.listdir(path)
|
||||||
|
pdf_list = []
|
||||||
|
for file in files:
|
||||||
|
file = file.lower() # 排除 .PNG 等带来的问题
|
||||||
|
if file.endswith('.pdf'):
|
||||||
|
result = pdf2img(path, file)
|
||||||
|
if result: # 1 -> Error
|
||||||
|
return 1
|
||||||
|
pdf_list.append(file)
|
||||||
|
elif file.endswith('.jpg') or file.endswith('.jpeg'):
|
||||||
|
img = Image.open(path + '\\' + file)
|
||||||
|
dot_index = file.rfind('.')
|
||||||
|
img_path = path + '\\' + file[:dot_index] + ".png"
|
||||||
|
img.save(img_path, "PNG")
|
||||||
|
gened_list.append(img_path)
|
||||||
|
if len(pdf_list) > 1:
|
||||||
|
print(f"注意到文件夹 {dir_name} 中有多个 .pdf 文件:")
|
||||||
|
for i in range(len(pdf_list)): # 打印 pdf 文件和选项
|
||||||
|
print(f"{i + 1} - {pdf_list[i]}") # 索引从 0 改为 1
|
||||||
|
while True:
|
||||||
|
invoice_index = input("请选择发票文件(输入选项前的阿拉伯数字):")
|
||||||
|
if not invoice_index.isdigit():
|
||||||
|
print("请输入数字!")
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
invoice_int = int(invoice_index) - 1 # 索引从 1 改为 0
|
||||||
|
if invoice_int < len(pdf_list) and invoice_int >= 0:
|
||||||
|
invoice_path = path + '\\' + "pdf" + pdf_list[invoice_int][:-4] + ".png"
|
||||||
|
invoice_renamed = path + '\\' + 'Pdf' + pdf_list[invoice_int][:-4] + ".png"
|
||||||
|
os.rename(invoice_path, invoice_renamed) # 将 "pdf" 标签变为 "Pdf"
|
||||||
|
gened_list.remove(invoice_path)
|
||||||
|
gened_list.append(invoice_renamed)
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
print("请选择正确的序号!")
|
||||||
|
|
||||||
def gen_filelist(path = os.path.abspath('.')):
|
def gen_filelist(path = os.path.abspath('.')):
|
||||||
types = "abc"
|
print("创建文件夹列表中...")
|
||||||
print("创建文件列表中...")
|
dir_list = []
|
||||||
file_list = os.listdir('.')
|
item_list = os.listdir('.')
|
||||||
file_list_cp = copy(file_list) # 创建浅拷贝
|
for item in item_list:
|
||||||
for filename in file_list_cp:
|
if os.path.isdir(item):
|
||||||
if ".png" not in filename:
|
dir_list.append(item)
|
||||||
file_list.remove(filename)
|
file_list = {}
|
||||||
file_list.sort()
|
for dir in dir_list:
|
||||||
page_curr = 1
|
file_list[dir] = os.listdir(path + '\\' + dir)
|
||||||
type_curr = 0
|
for dir, dir_file in file_list.items():
|
||||||
for filename in file_list:
|
dir_file_copy = copy(dir_file)
|
||||||
if int(filename[0:2]) != page_curr or filename[2] != types[type_curr % 3]:
|
for file in dir_file_copy:
|
||||||
return []
|
if ".png" not in file.lower():
|
||||||
else:
|
dir_file.remove(file)
|
||||||
type_curr += 1
|
if len(dir_file) != 3:
|
||||||
if type_curr % 3 == 0:
|
print(f"在{dir}文件夹发现错误:png文件个数不符")
|
||||||
page_curr += 1
|
for file in dir_file:
|
||||||
|
print('- ' + file)
|
||||||
|
if len(dir_file) == 2: # 说明有两个文件
|
||||||
|
option = input("文件夹仅有两个文件,仍继续生成?(y/N)").lower()
|
||||||
|
if option != 'y':
|
||||||
|
done(1)
|
||||||
print("创建完毕!将要加入文档的文件如下:")
|
print("创建完毕!将要加入文档的文件如下:")
|
||||||
for file in file_list:
|
for dir, dir_file in file_list.items():
|
||||||
print(file)
|
print('- ' + dir)
|
||||||
|
print(' ', end = '')
|
||||||
|
for file in dir_file:
|
||||||
|
print(file, end = ", ")
|
||||||
|
print()
|
||||||
|
print(f"共有 {len(list(file_list.items()))} 组文件")
|
||||||
return file_list
|
return file_list
|
||||||
|
|
||||||
def gen_docx():
|
def gen_docx(path = os.path.abspath('.')):
|
||||||
while True:
|
doc = Document()
|
||||||
doc = Document()
|
file_list = gen_filelist()
|
||||||
file_list = gen_filelist()
|
input("按回车键确认...")
|
||||||
input("按回车键确认...")
|
print("生成 .docx 文档中...")
|
||||||
print("生成 .docx 文档中...")
|
if len(file_list) != 0:
|
||||||
if len(file_list) != 0:
|
for dir, dir_file in file_list.items():
|
||||||
pic_count = 0
|
parent_path = path + '\\' + dir + '\\'
|
||||||
for filename in file_list:
|
for file in dir_file:
|
||||||
pic_count += 1
|
if file[0:3] == "Pdf":
|
||||||
if pic_count % 3 == 1:
|
dir_file.remove(file)
|
||||||
doc.add_picture(filename, height = Inches(2.5))
|
dir_file.insert(0, file) # 将发票文件放在第一个位置
|
||||||
table = doc.add_table(rows = 1, cols = 2)
|
doc.add_picture(parent_path + dir_file[0], height = Inches(2.5))
|
||||||
elif pic_count % 3 == 2:
|
table = doc.add_table(rows = 1, cols = 2)
|
||||||
cell1 = table.cell(0, 0)
|
cell1 = table.cell(0, 0)
|
||||||
cell1.paragraphs[0].add_run().add_picture(filename, height = Inches(5.0))
|
cell1.paragraphs[0].add_run().add_picture(parent_path + dir_file[1], height = Inches(5.0))
|
||||||
elif pic_count % 3 == 0:
|
cell2 = table.cell(0, 1)
|
||||||
cell2 = table.cell(0, 1)
|
if len(dir_file) == 3:
|
||||||
cell2.paragraphs[0].add_run().add_picture(filename, height = Inches(5.0))
|
cell2.paragraphs[0].add_run().add_picture(parent_path + dir_file[2], height = Inches(5.0))
|
||||||
if pic_count != len(file_list):
|
if (dir, dir_file) != list(file_list.items())[-1]: # dict.items() 返回值需先转换为列表,才能索引
|
||||||
doc.add_page_break()
|
doc.add_page_break()
|
||||||
else:
|
else:
|
||||||
print("请检查文件命名是否正确!")
|
print("请检查文件结构是否正确!")
|
||||||
doc.save("output.docx")
|
done(1)
|
||||||
print("生成完毕!")
|
doc.save("output.docx")
|
||||||
break
|
print("生成完毕!")
|
||||||
|
|
||||||
def pdf2img(pdf_path, pdf_name, zoom_x = 3, zoom_y = 3):
|
def pdf2img(pdf_path, pdf_name, zoom_x = 3, zoom_y = 3):
|
||||||
doc = fitz.open(pdf_path + "\\" + pdf_name) # 打开文档
|
doc = fitz.open(pdf_path + "\\" + pdf_name) # 打开文档
|
||||||
|
|
@ -85,18 +140,22 @@ def pdf2img(pdf_path, pdf_name, zoom_x = 3, zoom_y = 3):
|
||||||
return 1
|
return 1
|
||||||
for page in doc: # 遍历页面
|
for page in doc: # 遍历页面
|
||||||
pix = page.get_pixmap(matrix=fitz.Matrix(zoom_x, zoom_y)) # 将页面渲染为图片
|
pix = page.get_pixmap(matrix=fitz.Matrix(zoom_x, zoom_y)) # 将页面渲染为图片
|
||||||
pix.save(pdf_name[:-4] + '.png') # 将图像存储为PNG格式
|
png_path = pdf_path + '\\' + "pdf" + pdf_name[:-4] + ".png"
|
||||||
|
gened_list.append(png_path)
|
||||||
|
pix.save(png_path) # 将图像存储为PNG格式
|
||||||
doc.close() # 关闭文档
|
doc.close() # 关闭文档
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
print("在使用该脚本前,请将发票、付款记录、购买记录按以下规则命名:")
|
gened_list = [] # 新创建的文件列表,在出错时或者生成完毕时进行删除
|
||||||
print("在文件名前添加[两位数字][类型]")
|
print("在使用该脚本前,请保证程序所在的文件夹中仅包含程序、发票文件夹")
|
||||||
print("数字代表的是第几份材料,类型:发票a,付款记录b,购买记录c")
|
print("并且确保每个发票文件夹内只有三个文件")
|
||||||
print("如:02b小公仔付款.png 代表第二份材料中的付款记录")
|
|
||||||
input("按回车键开始...")
|
input("按回车键开始...")
|
||||||
convert_img()
|
convert_all()
|
||||||
gen_docx()
|
gen_docx()
|
||||||
print("生成 .pdf 文件中...")
|
print("生成 .pdf 文件中...")
|
||||||
convert("output.docx", "output.pdf")
|
convert("output.docx", "output.pdf")
|
||||||
print("生成完毕!")
|
print("生成完毕!")
|
||||||
|
done(0)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Binary file not shown.
Loading…
Reference in New Issue