label.py 2.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. import numpy as np
  2. import pandas as pd
  3. # =============================
  4. # 事件识别和划分
  5. # =============================
  6. class UFEventClassifier:
  7. def __init__(self, unit_name, inlet_codes, physical_codes, chemical_codes, ctrl_col):
  8. self.unit = unit_name
  9. self.inlet_min, self.inlet_max = inlet_codes
  10. self.physical_min, self.physical_max = physical_codes
  11. self.chemical_min, self.chemical_max = chemical_codes
  12. self.ctrl_col = ctrl_col
  13. def classify(self, df):
  14. df = df.copy()
  15. df["event_type"] = "other"
  16. df.loc[(df[self.ctrl_col] >= self.inlet_min) & (df[self.ctrl_col] <= self.inlet_max), "event_type"] = "inlet"
  17. df.loc[(df[self.ctrl_col] >= self.physical_min) & (df[self.ctrl_col] <= self.physical_max), "event_type"] = "bw_phys"
  18. df.loc[(df[self.ctrl_col] >= self.chemical_min) & (df[self.ctrl_col] <= self.chemical_max), "event_type"] = "bw_chem"
  19. return df
  20. def segment(self, df, inlet_types=None):
  21. """
  22. 将连续的事件划分为段
  23. Args:
  24. df: 输入数据框
  25. inlet_types: 被视为进水的事件类型列表,默认为 ["inlet", "other"]
  26. """
  27. if inlet_types is None:
  28. inlet_types = ["inlet", "other"] # 默认包括inlet和other
  29. df = df.copy()
  30. df["segment_id"] = np.nan
  31. seg_id = 0
  32. in_inlet = False
  33. for i, evt in enumerate(df["event_type"]):
  34. if evt in inlet_types: # 使用可配置的事件类型列表
  35. if not in_inlet:
  36. seg_id += 1
  37. in_inlet = True
  38. df.loc[i, "segment_id"] = seg_id
  39. else:
  40. in_inlet = False
  41. df = df[df["segment_id"].notna()].copy()
  42. df["segment_id"] = df["segment_id"].astype(int)
  43. return df
  44. class PostBackwashInletMarker:
  45. """
  46. 标记反冲洗事件后的前 N 个进水点
  47. """
  48. def __init__(self, n_points=10):
  49. self.n_points = n_points
  50. self.label_col = "post_bw_inlet" # 新标记列
  51. def mark(self, df: pd.DataFrame) -> pd.DataFrame:
  52. df = df.copy()
  53. # 确保 event_type 清洗干净,避免 object array 卡死
  54. df['event_type'] = (
  55. df['event_type']
  56. .astype(str)
  57. .str.strip()
  58. .fillna('')
  59. )
  60. df[self.label_col] = False
  61. # 找出所有反冲洗事件索引
  62. bw_idx = df.index[df['event_type'].isin(['bw_phys', 'bw_chem'])]
  63. # 预先计算 inlet mask,避免多次 object-level 比较
  64. inlet_mask = (df['event_type'] == 'inlet')
  65. for idx in bw_idx:
  66. # 只看 idx 之后的 inlet
  67. candidate_idx = df.index[(df.index > idx) & inlet_mask]
  68. post_idx = candidate_idx[: self.n_points]
  69. df.loc[post_idx, self.label_col] = True
  70. return df