data_export.py 2.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. from sqlalchemy import create_engine
  2. import pandas as pd
  3. import os
  4. username = os.getenv('DB_USERNAME', 'whu')
  5. password = os.getenv('DB_PASSWORD', '09093f4e6b33ddd')
  6. host = os.getenv('DB_HOST', '222.130.26.206')
  7. database = os.getenv('DB_DATABASE', 'ws_data')
  8. port = int(os.getenv('DB_PORT', '4000'))
  9. database_url = f'mysql+pymysql://{username}:{password}@{host}:{port}/{database}?charset=utf8mb4'
  10. engine = create_engine(database_url)
  11. start_time = "2025-08-01 00:01:00"
  12. end_time = "2025-09-10 00:00:00"
  13. query = """
  14. SELECT *
  15. FROM dc_item_history_data_92
  16. WHERE item_name in (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
  17. AND h_time >= %s
  18. AND h_time <= %s
  19. """
  20. params=(
  21. "C.M.RO1_FT_JS@out", # RO1反渗透进水流量
  22. "C.M.RO2_FT_JS@out", # RO2反渗透进水流量
  23. "C.M.RO3_FT_JS@out", # RO3反渗透进水流量
  24. "C.M.RO4_FT_JS@out", # RO4反渗透进水流量
  25. "C.M.RO_TT_ZJS@out", # 反渗透总进水温度
  26. 'C.M.RO_Cond_ZJS@out', # 反渗透总进水电导
  27. 'C.M.RO_Cond_ZCS@out', # 反渗透总产水电导
  28. 'C.M.RO1_DB@DPT_1', # RO1一段压差
  29. 'C.M.RO1_DB@DPT_2', # RO1二段压差
  30. 'C.M.RO2_DB@DPT_1', # RO2一段压差
  31. 'C.M.RO2_DB@DPT_2', # RO2二段压差
  32. 'C.M.RO3_DB@DPT_1', # RO3一段压差
  33. 'C.M.RO3_DB@DPT_2', # RO3二段压差
  34. 'C.M.RO4_DB@DPT_1', # RO4一段压差
  35. 'C.M.RO4_DB@DPT_2', # RO4二段压差
  36. start_time,
  37. end_time)
  38. data_origin = pd.read_sql(query, engine, params=params)
  39. engine.dispose()
  40. data = data_origin.pivot_table(index='h_time', columns='item_name', values='val')
  41. data = data.reset_index()
  42. # 定义标准时间序列(每1秒一个时间戳)
  43. standard_times = pd.date_range(start=start_time, end=end_time, freq='s')
  44. # 对齐到标准时间序列
  45. data_aligned = data.set_index('h_time').reindex(standard_times).reset_index()
  46. # 缺失值向前填充
  47. data_filled = data_aligned.ffill()
  48. # 每60个数据取一次
  49. data_sampled = data_filled.iloc[::60, :]
  50. # 数据导出
  51. current_directory = os.getcwd()
  52. parent_directory = os.path.dirname(current_directory)
  53. folder_name = 'datasets_xishan'
  54. folder_path = os.path.join(parent_directory, folder_name)
  55. # 确保文件夹存在
  56. if not os.path.exists(folder_path):
  57. os.makedirs(folder_path)
  58. # 多文件并存
  59. chunk_size = 500000
  60. file_count = 8 # 初始化文件计数器
  61. for start in range(0, len(data_sampled), chunk_size):
  62. end = min(start + chunk_size, len(data_sampled))
  63. chunk = data_sampled.iloc[start:end]
  64. # 生成唯一的文件名
  65. file_name = f'data_{file_count+1}.csv'
  66. file_path = os.path.join(folder_path, file_name)
  67. chunk.to_csv(file_path, index=False, encoding='utf_8_sig')
  68. file_count += 1