You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
RuoYi-Vue/batchimport/batchprocessfile.py

74 lines
2.8 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import pandas as pd
import os
import sys
def process_single_file(reference_file_path, file_path, output_folder):
"""
处理单个 Excel 文件。
:param reference_file_path: 参考文件路径,用于获取表头
:param file_path: 待处理文件路径
:param output_folder: 输出文件夹路径
"""
# 读取参考文件获取表头
df1 = pd.read_excel(reference_file_path, parse_dates=False)
header = df1.columns
# 读取第二个文件,设置日期格式保持不变
df2 = pd.read_excel(file_path, parse_dates=False)
# 设置待处理文件的表头与参考文件一致
df2.columns = header
# 删除包含空白值的行
# df2 = df2.dropna(how='any')
# 检查最后一行第一列数据是否为'数据来源东方财富Choice数据',如果是则删除该行
if not df2.empty and df2.iloc[-1, 0] == '数据来源东方财富Choice数据':
df2 = df2[:-1]
# 处理指定日期列(首发上市日),只保留日期部分
if '首发上市日' in df2.columns:
df2['首发上市日'] = pd.to_datetime(df2['首发上市日'].dt.date)
# 构建保存文件的路径
file_name = os.path.basename(file_path)
output_file_path = os.path.join(output_folder, file_name)
# 保存处理后的文件
df2.to_excel(output_file_path, index=False)
def batch_process_files(reference_file_path, input_folder, output_folder):
"""
批量处理文件夹中的 Excel 文件。
:param reference_file_path: 参考文件路径,用于获取表头
:param input_folder: 输入文件夹路径,包含待处理的文件
:param output_folder: 输出文件夹路径,保存处理后的文件
"""
# 确保输出文件夹存在
os.makedirs(output_folder, exist_ok=True)
# 遍历输入文件夹中的所有文件
for file_name in os.listdir(input_folder):
file_path = os.path.join(input_folder, file_name)
# 检查文件是否为 Excel 文件且符合命名规则
if os.path.isfile(file_path) and file_path.endswith(('.xlsx', '.xls')) and '动量原始股全部A股' in file_name:
process_single_file(reference_file_path, file_path, output_folder)
print(f"Processed file: {file_name} successfully.")
# C:\Users\winds\Desktop\batch_process_xls\model.xlsx C:\Users\winds\Desktop\batch_process_xls\inputfile C:\Users\winds\Desktop\batch_process_xls\outputfile
if __name__ == "__main__":
if len(sys.argv)!= 4:
print("Usage: python batch_process_excel.py reference_file_path input_folder output_folder")
sys.exit(1)
reference_file_path = sys.argv[1]
input_folder = sys.argv[2]
output_folder = sys.argv[3]
batch_process_files(reference_file_path, input_folder, output_folder)
print(f"Batch processing completed successfully.")