|
@@ -10,7 +10,7 @@ DB_HOST = "222.130.26.206"
|
|
|
DB_PORT = 4000
|
|
DB_PORT = 4000
|
|
|
|
|
|
|
|
# 时间配置
|
|
# 时间配置
|
|
|
-START_TIME = datetime(2025, 6, 11, 0, 0, 0)
|
|
|
|
|
|
|
+START_TIME = datetime(2025, 4, 1, 0, 0, 0)
|
|
|
END_TIME = datetime(2026, 3, 1, 6, 0, 0)
|
|
END_TIME = datetime(2026, 3, 1, 6, 0, 0)
|
|
|
BOUNDARY = datetime(2025, 3, 25, 0, 0, 0)
|
|
BOUNDARY = datetime(2025, 3, 25, 0, 0, 0)
|
|
|
|
|
|
|
@@ -28,26 +28,25 @@ DELETE_PERIODS = [(pd.to_datetime(s), pd.to_datetime(e)) for s, e in DELETE_PERI
|
|
|
UNITS = [1, 2]
|
|
UNITS = [1, 2]
|
|
|
|
|
|
|
|
BASE_VARIABLES = [
|
|
BASE_VARIABLES = [
|
|
|
- "ns=3;s={}#UF_JSFLOW_O", # 进水流量
|
|
|
|
|
- "ns=3;s={}#UF_JSPRESS_O", # 进水压力
|
|
|
|
|
- "ns=3;s=UF{}_SSD_KMYC", # 跨膜压差
|
|
|
|
|
- "ns=3;s=UF{}_STEP", # 步序/控制字
|
|
|
|
|
- "ns=3;s=ZZ_{}#UFBWB_POWER", # 反洗泵功率
|
|
|
|
|
|
|
+ "AR.{}#UF_JSFLOW_O", # 进水流量
|
|
|
|
|
+ "AR.{}#UF_JSPRESS_O", # 进水压力
|
|
|
|
|
+ "AR.UF{}_SSD_KMYC", # 跨膜压差
|
|
|
|
|
+ "AR.UF{}_STEP", # 步序/控制字
|
|
|
|
|
+ "AR.ZZ_{}#UFBWB_POWER" # 反洗泵功率
|
|
|
]
|
|
]
|
|
|
SYSTEM_VARIABLES = [
|
|
SYSTEM_VARIABLES = [
|
|
|
- "ns=3;s=ZJS_TEMP_O", # 进水温度
|
|
|
|
|
- "ns=3;s=RO_JSORP_O", # 总产水ORP
|
|
|
|
|
- "ns=3;s=RO_JSPH_O", # 总产水PH
|
|
|
|
|
- "ns=3;s=RO_JSDD_O", # 总产水电导
|
|
|
|
|
- "ns=3;s=CN_LEVEL_O", # 次钠液位
|
|
|
|
|
- "ns=3;s=S_LEVEL_O", # 酸液位
|
|
|
|
|
- "ns=3;s=J_LEVEL_O", # 碱液位
|
|
|
|
|
- "ns=3;s=ZZ_UFGSB_POWER", # 超滤供水泵功率
|
|
|
|
|
-
|
|
|
|
|
|
|
+ "AR.ZJS_TEMP_O", # 进水温度
|
|
|
|
|
+ "AR.RO_JSORP_O", # 总产水ORP
|
|
|
|
|
+ "AR.RO_JSPH_O", # 总产水PH
|
|
|
|
|
+ "AR.RO_JSDD_O", # 总产水电导
|
|
|
|
|
+ "AR.CN_LEVEL_O", # 次钠液位
|
|
|
|
|
+ "AR.S_LEVEL_O", # 酸液位
|
|
|
|
|
+ "AR.J_LEVEL_O", # 碱液位
|
|
|
|
|
+ # "AR.ZZ_UFGSB_POWER", # 超滤供水泵功率
|
|
|
]
|
|
]
|
|
|
|
|
|
|
|
# 输出目录
|
|
# 输出目录
|
|
|
-BASE_OUTPUT_DIR = "../datasets/UF_lankao_data"
|
|
|
|
|
|
|
+BASE_OUTPUT_DIR = "../datasets/UF_anzhen_data"
|
|
|
PROCESSED_OUTPUT_DIR = os.path.join(BASE_OUTPUT_DIR, "raw")
|
|
PROCESSED_OUTPUT_DIR = os.path.join(BASE_OUTPUT_DIR, "raw")
|
|
|
|
|
|
|
|
# 创建目录
|
|
# 创建目录
|
|
@@ -109,7 +108,7 @@ def fetch_valve_aggregated(name, start, end, engine, interval_minutes=1):
|
|
|
h_time,
|
|
h_time,
|
|
|
val,
|
|
val,
|
|
|
FLOOR(UNIX_TIMESTAMP(h_time) / {interval_seconds}) AS time_group
|
|
FLOOR(UNIX_TIMESTAMP(h_time) / {interval_seconds}) AS time_group
|
|
|
- FROM dc_item_history_data_1451
|
|
|
|
|
|
|
+ FROM dc_item_history_data_1181
|
|
|
WHERE item_name = :name
|
|
WHERE item_name = :name
|
|
|
AND h_time BETWEEN :st AND :et
|
|
AND h_time BETWEEN :st AND :et
|
|
|
AND val IS NOT NULL
|
|
AND val IS NOT NULL
|
|
@@ -137,7 +136,7 @@ def fetch_special_data(sensor, start, end, boundary, engine_test, engine_prod):
|
|
|
"""
|
|
"""
|
|
|
sql = text("""
|
|
sql = text("""
|
|
|
SELECT h_time AS time, val
|
|
SELECT h_time AS time, val
|
|
|
- FROM dc_item_history_data_1451
|
|
|
|
|
|
|
+ FROM dc_item_history_data_1181
|
|
|
WHERE item_name = :name
|
|
WHERE item_name = :name
|
|
|
AND h_time BETWEEN :st
|
|
AND h_time BETWEEN :st
|
|
|
AND :et
|
|
AND :et
|
|
@@ -165,6 +164,7 @@ def fetch_special_data(sensor, start, end, boundary, engine_test, engine_prod):
|
|
|
return pd.DataFrame()
|
|
return pd.DataFrame()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
+# ---------- 传感器数据查询函数(只获取聚合数据)----------
|
|
|
# ---------- 传感器数据查询函数(只获取聚合数据)----------
|
|
# ---------- 传感器数据查询函数(只获取聚合数据)----------
|
|
|
def fetch_sensor_data(sensor_names, start_time, end_time, boundary, engine_test, engine_prod):
|
|
def fetch_sensor_data(sensor_names, start_time, end_time, boundary, engine_test, engine_prod):
|
|
|
"""
|
|
"""
|
|
@@ -183,7 +183,17 @@ def fetch_sensor_data(sensor_names, start_time, end_time, boundary, engine_test,
|
|
|
|
|
|
|
|
print(f"\n识别到 {len(special_vars)} 个离散变量, {len(continuous_vars)} 个连续变量")
|
|
print(f"\n识别到 {len(special_vars)} 个离散变量, {len(continuous_vars)} 个连续变量")
|
|
|
|
|
|
|
|
- all_data = []
|
|
|
|
|
|
|
+ # 3. 创建完整的时间网格(整分钟)- 先创建
|
|
|
|
|
+ print(f"\n创建完整时间网格...")
|
|
|
|
|
+ time_grid = pd.date_range(
|
|
|
|
|
+ start=start_time.replace(second=0, microsecond=0),
|
|
|
|
|
+ end=end_time.replace(second=0, microsecond=0),
|
|
|
|
|
+ freq='1min'
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ # 创建以时间为索引的DataFrame
|
|
|
|
|
+ merged_df = pd.DataFrame(index=time_grid)
|
|
|
|
|
+ print(f"时间网格: {len(time_grid)} 个时间点")
|
|
|
|
|
|
|
|
# 1. 处理连续变量(按分钟聚合,时间对齐到整分钟)
|
|
# 1. 处理连续变量(按分钟聚合,时间对齐到整分钟)
|
|
|
if continuous_vars:
|
|
if continuous_vars:
|
|
@@ -201,12 +211,14 @@ def fetch_sensor_data(sensor_names, start_time, end_time, boundary, engine_test,
|
|
|
df = pd.concat([df1, df2], ignore_index=True)
|
|
df = pd.concat([df1, df2], ignore_index=True)
|
|
|
|
|
|
|
|
if not df.empty:
|
|
if not df.empty:
|
|
|
- # 确保时间戳是整分钟
|
|
|
|
|
|
|
+ # 确保时间戳是整分钟并设为索引
|
|
|
df['time'] = pd.to_datetime(df['time']).dt.floor('1min')
|
|
df['time'] = pd.to_datetime(df['time']).dt.floor('1min')
|
|
|
- # 按时间去重(同一分钟可能有多个聚合结果)
|
|
|
|
|
df = df.drop_duplicates(subset=['time'], keep='first')
|
|
df = df.drop_duplicates(subset=['time'], keep='first')
|
|
|
- df_renamed = df.rename(columns={'val': sensor})
|
|
|
|
|
- all_data.append(df_renamed[['time', sensor]])
|
|
|
|
|
|
|
+ df = df.set_index('time')
|
|
|
|
|
+
|
|
|
|
|
+ # 添加到合并DataFrame
|
|
|
|
|
+ merged_df[sensor] = df['val']
|
|
|
|
|
+ print(f" ✓ {sensor}: {len(df)} 条记录")
|
|
|
else:
|
|
else:
|
|
|
print(f" ⚠ {sensor}: 无数据")
|
|
print(f" ⚠ {sensor}: 无数据")
|
|
|
|
|
|
|
@@ -214,7 +226,7 @@ def fetch_sensor_data(sensor_names, start_time, end_time, boundary, engine_test,
|
|
|
print(f" ⚠ {sensor}: 处理失败 - {str(e)}")
|
|
print(f" ⚠ {sensor}: 处理失败 - {str(e)}")
|
|
|
continue
|
|
continue
|
|
|
|
|
|
|
|
- # 2. 处理离散变量(保持原始变化点,但标记时间)
|
|
|
|
|
|
|
+ # 2. 处理离散变量(保持原始变化点)
|
|
|
if special_vars:
|
|
if special_vars:
|
|
|
print("\n获取离散变量数据(原始变化点):")
|
|
print("\n获取离散变量数据(原始变化点):")
|
|
|
for sensor in special_vars:
|
|
for sensor in special_vars:
|
|
@@ -223,12 +235,16 @@ def fetch_sensor_data(sensor_names, start_time, end_time, boundary, engine_test,
|
|
|
df = fetch_special_data(sensor, start_time, end_time, boundary, engine_test, engine_prod)
|
|
df = fetch_special_data(sensor, start_time, end_time, boundary, engine_test, engine_prod)
|
|
|
|
|
|
|
|
if not df.empty:
|
|
if not df.empty:
|
|
|
- # 将时间戳对齐到分钟,用于后续扩展
|
|
|
|
|
- df['time_min'] = df['time'].dt.floor('1min')
|
|
|
|
|
- # 重命名值列
|
|
|
|
|
- df_renamed = df.rename(columns={'val': sensor})
|
|
|
|
|
- all_data.append(df_renamed[['time_min', sensor]].rename(columns={'time_min': 'time'}))
|
|
|
|
|
- print(f" ✓ {sensor}: {len(df)} 个变化点")
|
|
|
|
|
|
|
+ # 创建分钟级的重采样
|
|
|
|
|
+ df['time'] = pd.to_datetime(df['time'])
|
|
|
|
|
+ df = df.set_index('time')
|
|
|
|
|
+
|
|
|
|
|
+ # 重采样到分钟,对于离散变量使用前向填充
|
|
|
|
|
+ df_resampled = df.resample('1min').ffill()
|
|
|
|
|
+
|
|
|
|
|
+ # 添加到合并DataFrame
|
|
|
|
|
+ merged_df[sensor] = df_resampled['val']
|
|
|
|
|
+ print(f" ✓ {sensor}: {len(df)} 个原始点 -> {len(df_resampled)} 个分钟点")
|
|
|
else:
|
|
else:
|
|
|
print(f" ⚠ {sensor}: 无数据")
|
|
print(f" ⚠ {sensor}: 无数据")
|
|
|
|
|
|
|
@@ -236,35 +252,16 @@ def fetch_sensor_data(sensor_names, start_time, end_time, boundary, engine_test,
|
|
|
print(f" ⚠ {sensor}: 处理失败 - {str(e)}")
|
|
print(f" ⚠ {sensor}: 处理失败 - {str(e)}")
|
|
|
continue
|
|
continue
|
|
|
|
|
|
|
|
- if not all_data:
|
|
|
|
|
|
|
+ if merged_df.empty or len(merged_df.columns) == 0:
|
|
|
print("\n❌ 未获取到任何传感器数据")
|
|
print("\n❌ 未获取到任何传感器数据")
|
|
|
return pd.DataFrame()
|
|
return pd.DataFrame()
|
|
|
|
|
|
|
|
- # 3. 创建完整的时间网格(整分钟)
|
|
|
|
|
- print(f"\n创建完整时间网格...")
|
|
|
|
|
- time_grid = pd.date_range(
|
|
|
|
|
- start=start_time.replace(second=0, microsecond=0), # 对齐到秒0
|
|
|
|
|
- end=end_time.replace(second=0, microsecond=0),
|
|
|
|
|
- freq='1min'
|
|
|
|
|
- )
|
|
|
|
|
- merged_df = pd.DataFrame({'time': time_grid})
|
|
|
|
|
- print(f"时间网格: {len(time_grid)} 个时间点")
|
|
|
|
|
-
|
|
|
|
|
- # 4. 逐个合并数据
|
|
|
|
|
- print(f"\n开始合并 {len(all_data)} 个传感器的数据...")
|
|
|
|
|
- for df in all_data:
|
|
|
|
|
- df['time'] = pd.to_datetime(df['time']).dt.floor('1min')
|
|
|
|
|
- # 对于步序变量,需要先扩展填充
|
|
|
|
|
- sensor_name = df.columns[1] # 获取传感器名称
|
|
|
|
|
- if sensor_name in step_vars:
|
|
|
|
|
- # 步序变量:前向填充
|
|
|
|
|
- merged_df = merged_df.merge(df, on='time', how='left')
|
|
|
|
|
- merged_df[sensor_name] = merged_df[sensor_name].fillna(method='ffill')
|
|
|
|
|
- else:
|
|
|
|
|
- # 连续变量:直接合并
|
|
|
|
|
- merged_df = merged_df.merge(df, on='time', how='left')
|
|
|
|
|
|
|
+ # 重置索引,将时间变为列
|
|
|
|
|
+ merged_df = merged_df.reset_index()
|
|
|
|
|
+ merged_df = merged_df.rename(columns={'index': 'time'})
|
|
|
|
|
|
|
|
- print(f"合并完成,共 {len(merged_df)} 条时间记录")
|
|
|
|
|
|
|
+ print(f"\n合并完成,共 {len(merged_df)} 条时间记录 × {len(merged_df.columns) - 1} 个传感器")
|
|
|
|
|
+ print(f"数据框形状: {merged_df.shape}")
|
|
|
|
|
|
|
|
# 5. 删除黑名单时段
|
|
# 5. 删除黑名单时段
|
|
|
print("\n删除黑名单时段...")
|
|
print("\n删除黑名单时段...")
|
|
@@ -281,7 +278,6 @@ def fetch_sensor_data(sensor_names, start_time, end_time, boundary, engine_test,
|
|
|
return merged_df
|
|
return merged_df
|
|
|
|
|
|
|
|
|
|
|
|
|
-
|
|
|
|
|
# ---------- 数据后处理函数(填充空值)----------
|
|
# ---------- 数据后处理函数(填充空值)----------
|
|
|
def post_process_data(df, continuous_vars, step_vars):
|
|
def post_process_data(df, continuous_vars, step_vars):
|
|
|
"""
|
|
"""
|