fit.py 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308
  1. # cycle_analysis.py
  2. import numpy as np
  3. import pandas as pd
  4. from scipy.optimize import minimize, curve_fit
  5. from sklearn.metrics import r2_score
  6. from datetime import timedelta
  7. class ChemicalCycleSegmenter:
  8. """
  9. 根据 event_type == 'bw_chem' 划分化学周期。
  10. 每两个化学反冲洗之间所有 inlet 稳定段属于一个化学周期。
  11. 若化学周期时间长度 > max_hours,则标记为无效周期。
  12. """
  13. def __init__(self, max_hours=60):
  14. self.max_hours = max_hours
  15. def assign_cycles(self, df_unit, stable_segments):
  16. """
  17. df_unit:含 time, event_type
  18. stable_segments:列表,每段有 segment_id, time 等
  19. 返回 cycles: dict(cycle_id -> { segments:[], valid:True/False, start_time/end_time })
  20. """
  21. # 找出所有化学反冲洗的位置
  22. ceb_times = df_unit.loc[df_unit["event_type"] == "bw_chem", "time"].sort_values().to_list()
  23. cycles = {}
  24. if len(ceb_times) < 2:
  25. return cycles
  26. for i in range(len(ceb_times) - 1):
  27. cycle_id = i + 1
  28. t_start = ceb_times[i]
  29. t_end = ceb_times[i + 1]
  30. # 该周期内的稳定 segment
  31. segs = []
  32. for seg in stable_segments:
  33. if seg["time"].iloc[0] >= t_start and seg["time"].iloc[-1] <= t_end:
  34. segs.append(seg)
  35. if len(segs) == 0:
  36. continue
  37. # 判断周期有效性(长度 <= max_hours)
  38. hours = (t_end - t_start).total_seconds() / 3600
  39. valid = hours <= self.max_hours
  40. cycles[cycle_id] = {
  41. "segments": segs,
  42. "start": t_start,
  43. "end": t_end,
  44. "hours": hours,
  45. "valid": valid
  46. }
  47. # 回写到段中
  48. for seg in segs:
  49. seg["chem_cycle_id"] = cycle_id
  50. seg["chem_cycle_valid"] = valid
  51. return cycles
  52. class ShortTermCycleFoulingFitter:
  53. """
  54. 对一个化学周期内所有 inlet 段统一拟合短期污染增长速率 nuK。
  55. """
  56. def __init__(self, unit):
  57. self.unit = unit
  58. @staticmethod
  59. def _is_invalid(x):
  60. return (
  61. x is None
  62. or np.any(pd.isna(x))
  63. or np.any(np.isinf(x))
  64. or np.any(np.abs(x) > 1e20)
  65. )
  66. def fit_cycle(self, segments):
  67. # ============================
  68. # 1. 初步检查
  69. # ============================
  70. if len(segments) == 0:
  71. return np.nan, np.nan
  72. try:
  73. df = pd.concat(segments, ignore_index=True).copy()
  74. except Exception:
  75. return np.nan, np.nan
  76. required_cols = ["time", f"{self.unit}_R_scaled", f"{self.unit}_J"]
  77. if any(col not in df.columns for col in required_cols):
  78. return np.nan, np.nan
  79. # 去重、排序
  80. df = df.sort_values("time").drop_duplicates(subset=["time"])
  81. if len(df) < 3:
  82. return np.nan, np.nan
  83. # ============================
  84. # 2. 计算 t
  85. # ============================
  86. try:
  87. df["t"] = (df["time"] - df["time"].iloc[0]).dt.total_seconds()
  88. except Exception:
  89. return np.nan, np.nan
  90. # 舍弃 t <= 0
  91. df = df[df["t"] > 0].copy()
  92. if len(df) < 2:
  93. return np.nan, np.nan
  94. # 取数据
  95. R = df[f"{self.unit}_R_scaled"].astype(float).values
  96. J = df[f"{self.unit}_J"].astype(float).values
  97. t = df["t"].astype(float).values
  98. R0 = R[0]
  99. # ============================
  100. # 3. 输入检查
  101. # ============================
  102. if any(self._is_invalid(x) for x in [R, J, t, R0]):
  103. return np.nan, np.nan
  104. # 防止 J=0 或极端 t
  105. if np.all(J == 0):
  106. return np.nan, np.nan
  107. # ============================
  108. # 4. 定义鲁棒损失(Huber)
  109. # ============================
  110. delta = 1.0 # Huber 阈值,可视需要调节
  111. def huber_loss(residual):
  112. abs_r = np.abs(residual)
  113. quadratic = np.minimum(abs_r, delta)
  114. linear = abs_r - quadratic
  115. return 0.5 * quadratic**2 + delta * linear
  116. def loss(nuk):
  117. pred = R0 + nuk * J * t
  118. residual = R - pred
  119. return np.sum(huber_loss(residual))
  120. # ============================
  121. # 5. 优化求解
  122. # ============================
  123. try:
  124. res = minimize(
  125. loss,
  126. x0=[1e-6],
  127. bounds=[(0, None)],
  128. options={"maxiter": 500, "disp": False}
  129. )
  130. except Exception:
  131. return np.nan, np.nan
  132. # 若优化失败
  133. if not res.success:
  134. return np.nan, np.nan
  135. nuk_opt = float(res.x[0])
  136. # ============================
  137. # 6. 计算预测值与 R2(保证无 NaN/Inf)
  138. # ============================
  139. pred = R0 + nuk_opt * J * t
  140. if self._is_invalid(pred):
  141. return nuk_opt, np.nan
  142. try:
  143. r2 = r2_score(R, pred)
  144. except Exception:
  145. r2 = np.nan
  146. return nuk_opt, r2
  147. class LongTermFoulingFitter:
  148. def __init__(self, unit):
  149. self.unit = unit
  150. @staticmethod
  151. def _power_law(t, a, b, R0):
  152. return R0 + a * np.power(t, b)
  153. @staticmethod
  154. def _is_invalid(x):
  155. return (
  156. x is None
  157. or np.any(pd.isna(x))
  158. or np.any(np.isinf(x))
  159. or np.any(np.abs(x) > 1e20) # 防止极端爆炸
  160. )
  161. def fit_cycle(self, segments):
  162. # 1. 空数据
  163. if not segments:
  164. return np.nan, np.nan, np.nan
  165. # 2. 合并
  166. try:
  167. df = pd.concat(segments, ignore_index=True).copy()
  168. except Exception:
  169. return np.nan, np.nan, np.nan
  170. if "time" not in df.columns or "R_scaled_start" not in df.columns:
  171. return np.nan, np.nan, np.nan
  172. df = df.sort_values("time").drop_duplicates(subset=["time"])
  173. if len(df) < 3:
  174. return np.nan, np.nan, np.nan
  175. # 3. 计算 t
  176. t0 = df["time"].iloc[0]
  177. df["t"] = (df["time"] - t0).dt.total_seconds()
  178. df_valid = df[df["t"] > 0].copy()
  179. if len(df_valid) < 2:
  180. return np.nan, np.nan, np.nan
  181. t = df_valid["t"].values.astype(float)
  182. R = df_valid["R_scaled_start"].values.astype(float)
  183. R0 = df["R_scaled_start"].iloc[0]
  184. # 4. 输入检查
  185. if self._is_invalid(t) or self._is_invalid(R) or self._is_invalid(R0):
  186. return np.nan, np.nan, np.nan
  187. # 5. 非线性拟合
  188. try:
  189. popt, _ = curve_fit(
  190. lambda tt, a, b: self._power_law(tt, a, b, R0),
  191. t, R,
  192. p0=(0.01, 1.0),
  193. maxfev=8000
  194. )
  195. a, b = popt
  196. # 检查结果是否爆炸
  197. if self._is_invalid(a) or self._is_invalid(b):
  198. return np.nan, np.nan, np.nan
  199. pred = self._power_law(t, a, b, R0)
  200. # 再检查预测值
  201. if self._is_invalid(pred):
  202. return np.nan, np.nan, np.nan
  203. r2 = r2_score(R, pred)
  204. # 再检查 r2
  205. if self._is_invalid(r2):
  206. return np.nan, np.nan, np.nan
  207. return a, b, r2
  208. except Exception:
  209. return np.nan, np.nan, np.nan
  210. class ChemicalBackwashCleaner:
  211. """
  212. 根据有效化学周期计算 CEB 去除膜阻力:
  213. ΔR = 上周期末 R_end - 下周期初 R_start
  214. """
  215. def __init__(self, unit):
  216. self.unit = unit
  217. def compute_removal(self, cycles):
  218. ids = sorted(cycles.keys())
  219. for i in range(len(ids) - 1):
  220. cid1 = ids[i]
  221. cid2 = ids[i + 1]
  222. c1 = cycles[cid1]
  223. c2 = cycles[cid2]
  224. if not (c1["valid"] and c2["valid"]):
  225. c1["R_removed"] = None
  226. continue
  227. # 上周期末:最后段的 R_end
  228. seg_last = c1["segments"][-1]
  229. R_end = seg_last[f"R_scaled_end"]
  230. # 下周期初:第一段的 R_start
  231. seg_first = c2["segments"][0]
  232. R_start = seg_first[f"R_scaled_start"]
  233. c1["R_removed"] = R_end - R_start
  234. # 最后一个周期无 CEB 去除值
  235. if ids:
  236. cycles[ids[-1]]["R_removed"] = None
  237. return cycles