| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485 |
- from sqlalchemy import create_engine
- import pandas as pd
- import os
- username = os.getenv('DB_USERNAME', 'whu')
- password = os.getenv('DB_PASSWORD', '09093f4e6b33ddd')
- host = os.getenv('DB_HOST', '222.130.26.206')
- database = os.getenv('DB_DATABASE', 'ws_data')
- port = int(os.getenv('DB_PORT', '4000'))
- database_url = f'mysql+pymysql://{username}:{password}@{host}:{port}/{database}?charset=utf8mb4'
- engine = create_engine(database_url)
- start_time = "2025-08-01 00:01:00"
- end_time = "2025-09-10 00:00:00"
- query = """
- SELECT *
- FROM dc_item_history_data_92
- WHERE item_name in (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
- AND h_time >= %s
- AND h_time <= %s
- """
- params=(
- "C.M.RO1_FT_JS@out", # RO1反渗透进水流量
- "C.M.RO2_FT_JS@out", # RO2反渗透进水流量
- "C.M.RO3_FT_JS@out", # RO3反渗透进水流量
- "C.M.RO4_FT_JS@out", # RO4反渗透进水流量
- "C.M.RO_TT_ZJS@out", # 反渗透总进水温度
- 'C.M.RO_Cond_ZJS@out', # 反渗透总进水电导
- 'C.M.RO_Cond_ZCS@out', # 反渗透总产水电导
- 'C.M.RO1_DB@DPT_1', # RO1一段压差
- 'C.M.RO1_DB@DPT_2', # RO1二段压差
- 'C.M.RO2_DB@DPT_1', # RO2一段压差
- 'C.M.RO2_DB@DPT_2', # RO2二段压差
- 'C.M.RO3_DB@DPT_1', # RO3一段压差
- 'C.M.RO3_DB@DPT_2', # RO3二段压差
- 'C.M.RO4_DB@DPT_1', # RO4一段压差
- 'C.M.RO4_DB@DPT_2', # RO4二段压差
- start_time,
- end_time)
- data_origin = pd.read_sql(query, engine, params=params)
- engine.dispose()
- data = data_origin.pivot_table(index='h_time', columns='item_name', values='val')
- data = data.reset_index()
- # 定义标准时间序列(每1秒一个时间戳)
- standard_times = pd.date_range(start=start_time, end=end_time, freq='s')
- # 对齐到标准时间序列
- data_aligned = data.set_index('h_time').reindex(standard_times).reset_index()
- # 缺失值向前填充
- data_filled = data_aligned.ffill()
- # 每60个数据取一次
- data_sampled = data_filled.iloc[::60, :]
-
- # 数据导出
- current_directory = os.getcwd()
- parent_directory = os.path.dirname(current_directory)
- folder_name = 'datasets_xishan'
- folder_path = os.path.join(parent_directory, folder_name)
- # 确保文件夹存在
- if not os.path.exists(folder_path):
- os.makedirs(folder_path)
- # 多文件并存
- chunk_size = 500000
- file_count = 8 # 初始化文件计数器
- for start in range(0, len(data_sampled), chunk_size):
- end = min(start + chunk_size, len(data_sampled))
- chunk = data_sampled.iloc[start:end]
-
- # 生成唯一的文件名
- file_name = f'data_{file_count+1}.csv'
- file_path = os.path.join(folder_path, file_name)
- chunk.to_csv(file_path, index=False, encoding='utf_8_sig')
-
- file_count += 1
|