from sqlalchemy import create_engine import pandas as pd import os username = os.getenv('DB_USERNAME', 'whu') password = os.getenv('DB_PASSWORD', '09093f4e6b33ddd') host = os.getenv('DB_HOST', '222.130.26.206') database = os.getenv('DB_DATABASE', 'ws_data') port = int(os.getenv('DB_PORT', '4000')) database_url = f'mysql+pymysql://{username}:{password}@{host}:{port}/{database}?charset=utf8mb4' engine = create_engine(database_url) start_time = "2025-08-01 00:01:00" end_time = "2025-09-10 00:00:00" query = """ SELECT * FROM dc_item_history_data_92 WHERE item_name in (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) AND h_time >= %s AND h_time <= %s """ params=( "C.M.RO1_FT_JS@out", # RO1反渗透进水流量 "C.M.RO2_FT_JS@out", # RO2反渗透进水流量 "C.M.RO3_FT_JS@out", # RO3反渗透进水流量 "C.M.RO4_FT_JS@out", # RO4反渗透进水流量 "C.M.RO_TT_ZJS@out", # 反渗透总进水温度 'C.M.RO_Cond_ZJS@out', # 反渗透总进水电导 'C.M.RO_Cond_ZCS@out', # 反渗透总产水电导 'C.M.RO1_DB@DPT_1', # RO1一段压差 'C.M.RO1_DB@DPT_2', # RO1二段压差 'C.M.RO2_DB@DPT_1', # RO2一段压差 'C.M.RO2_DB@DPT_2', # RO2二段压差 'C.M.RO3_DB@DPT_1', # RO3一段压差 'C.M.RO3_DB@DPT_2', # RO3二段压差 'C.M.RO4_DB@DPT_1', # RO4一段压差 'C.M.RO4_DB@DPT_2', # RO4二段压差 start_time, end_time) data_origin = pd.read_sql(query, engine, params=params) engine.dispose() data = data_origin.pivot_table(index='h_time', columns='item_name', values='val') data = data.reset_index() # 定义标准时间序列(每1秒一个时间戳) standard_times = pd.date_range(start=start_time, end=end_time, freq='s') # 对齐到标准时间序列 data_aligned = data.set_index('h_time').reindex(standard_times).reset_index() # 缺失值向前填充 data_filled = data_aligned.ffill() # 每60个数据取一次 data_sampled = data_filled.iloc[::60, :] # 数据导出 current_directory = os.getcwd() parent_directory = os.path.dirname(current_directory) folder_name = 'datasets_xishan' folder_path = os.path.join(parent_directory, folder_name) # 确保文件夹存在 if not os.path.exists(folder_path): os.makedirs(folder_path) # 多文件并存 chunk_size = 500000 file_count = 8 # 初始化文件计数器 for start in range(0, len(data_sampled), chunk_size): end = min(start + chunk_size, len(data_sampled)) chunk = data_sampled.iloc[start:end] # 生成唯一的文件名 file_name = f'data_{file_count+1}.csv' file_path = os.path.join(folder_path, file_name) chunk.to_csv(file_path, index=False, encoding='utf_8_sig') file_count += 1