diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..32bcdf1 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/test \ No newline at end of file diff --git a/README.md b/README.md index 420c068..2ebabfb 100644 --- a/README.md +++ b/README.md @@ -7,4 +7,5 @@ ## 所需包(已打包在`.exe`中) - requests -- pandas \ No newline at end of file +- pandas +- openpyxl \ No newline at end of file diff --git a/main.py b/main.py index 865fc3e..a206426 100644 --- a/main.py +++ b/main.py @@ -2,6 +2,7 @@ from pandas import read_excel from requests import get from sys import exit import os +import numpy as np def download(url, pic_path_name): format_file = url.split(".")[-1].lower() @@ -14,9 +15,11 @@ def download(url, pic_path_name): f.write(result.content) if __name__ == "__main__": + skip_str = ["(空)", "(跳过)", "" ,float('nan')] #遇到这几种字符串的时候跳过 dir_path = input("请输入 .xls / .xlsx 文件所在文件夹的绝对或相对路径:") for root, _, files in os.walk(dir_path): for file in files: + print(file.lower()) if file.lower().endswith('.xls') or file.lower().endswith('.xlsx'): excel_path = os.path.join(root, file) df = read_excel(excel_path) @@ -24,20 +27,34 @@ if __name__ == "__main__": dir_name = file.split('.')[0] os.mkdir(dir_name) for one in range(len(df)): - path_name = './' + dir_name + '/' + df["3、购买日期"][one] + " 【" + str(df["4、金额"][one]) + "】 " + df["5、物品"][one] + " " + df["1、您的姓名:"][one] + path_name = './' + dir_name + '/' + str(df["2、购买日期 格式:年+月+日 如 2025.10.19"][one]) + " 【" + str(df["4、金额"][one]) + "】 " + df["3、购买物品"][one] + " " + df["1、你的姓名"][one] if not os.path.exists(path_name): os.mkdir(path_name) # 处理购买记录截图 - buy_prt_sc = df["6、购买记录截图:"][one] - if buy_prt_sc != "(跳过)": - download(buy_prt_sc, "./" + path_name + "/购买记录截图") + buy_prt_sc = df["7、订单截图(如淘宝订单截图)"][one] + if buy_prt_sc not in skip_str and isinstance(buy_prt_sc, str): + download(buy_prt_sc, "./" + path_name + "/订单截图") + else: + print(f"第{one + 1}个购买记录截图被跳过") # 处理支付记录截图 - pay_prt_sc = df["7、支付记录截图:"][one] - if pay_prt_sc != "(跳过)": + pay_prt_sc = df["6、支付记录(如微信支付/支付宝支付记录截图)"][one] + if pay_prt_sc not in skip_str and isinstance(pay_prt_sc, str): download(pay_prt_sc, "./" + path_name + "/支付记录截图") - # 处理发票图片 - pdf = df["9、发票图片:"][one] - if pdf != "(空)": - download(pdf, "./" + path_name + "/发票图片") + else: + print(f"第{one + 1}个支付截图记录被跳过") + + # 处理发票 + invoice = df["8、发票(注意一定要上传pdf格式!)"][one] + if invoice not in skip_str and isinstance(invoice, str): + download(invoice, "./" + path_name + "/发票") + else: + print(f"第{one + 1}个发票被跳过") + # 处理行程单 + itinerary = df["9、电子行程单(pdf)"][one] + if itinerary not in skip_str and isinstance(itinerary, str): + download(itinerary, "./" + path_name + "/行程单") + else: + print(f"第{one + 1}个行程单被跳过") + print(f"共计{len(df)}, 已完成{one + 1}") print(file + " 处理完毕!") \ No newline at end of file