label.py 2.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
  1. import numpy as np
  2. import pandas as pd
  3. # =============================
  4. # 事件识别和划分
  5. # =============================
  6. class UFEventClassifier:
  7. def __init__(self, unit_name, inlet_codes, physical_code, chemical_code):
  8. self.unit = unit_name
  9. self.inlet_codes = inlet_codes
  10. self.physical_code = physical_code
  11. self.chemical_code = chemical_code
  12. self.ctrl_col = f"C.M.{unit_name}_DB@word_control"
  13. def classify(self, df):
  14. df = df.copy()
  15. df["event_type"] = "other"
  16. df.loc[df[self.ctrl_col].isin(self.inlet_codes), "event_type"] = "inlet"
  17. df.loc[(df[self.ctrl_col] >= self.physical_code - 5) &(df[self.ctrl_col] <= self.physical_code + 5),"event_type"] = "bw_phys"
  18. df.loc[(df[self.ctrl_col] >= self.chemical_code - 5) &(df[self.ctrl_col] <= self.chemical_code + 5),"event_type"] = "bw_chem"
  19. return df
  20. def segment(self, df):
  21. df = df.copy()
  22. df["segment_id"] = np.nan
  23. seg_id = 0
  24. in_inlet = False
  25. for i, evt in enumerate(df["event_type"]):
  26. if evt == "inlet":
  27. if not in_inlet:
  28. seg_id += 1
  29. in_inlet = True
  30. df.loc[i, "segment_id"] = seg_id
  31. else:
  32. in_inlet = False
  33. df = df[df["segment_id"].notna()].copy()
  34. df["segment_id"] = df["segment_id"].astype(int)
  35. return df
  36. class PostBackwashInletMarker:
  37. """
  38. 标记反冲洗事件后的前 N 个进水点
  39. """
  40. def __init__(self, n_points=10):
  41. self.n_points = n_points
  42. self.label_col = "post_bw_inlet" # 新标记列
  43. def mark(self, df: pd.DataFrame) -> pd.DataFrame:
  44. df = df.copy()
  45. # 确保 event_type 清洗干净,避免 object array 卡死
  46. df['event_type'] = (
  47. df['event_type']
  48. .astype(str)
  49. .str.strip()
  50. .fillna('')
  51. )
  52. df[self.label_col] = False
  53. # 找出所有反冲洗事件索引
  54. bw_idx = df.index[df['event_type'].isin(['bw_phys', 'bw_chem'])]
  55. # 预先计算 inlet mask,避免多次 object-level 比较
  56. inlet_mask = (df['event_type'] == 'inlet')
  57. for idx in bw_idx:
  58. # 只看 idx 之后的 inlet
  59. candidate_idx = df.index[(df.index > idx) & inlet_mask]
  60. post_idx = candidate_idx[: self.n_points]
  61. df.loc[post_idx, self.label_col] = True
  62. return df