fit.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388
  1. # cycle_analysis.py
  2. import numpy as np
  3. import pandas as pd
  4. from scipy.optimize import minimize, curve_fit
  5. from sklearn.metrics import r2_score
  6. from datetime import timedelta
  7. class ChemicalCycleSegmenter:
  8. """
  9. 根据 event_type == 'bw_chem' 划分化学周期。
  10. 每两个化学反冲洗之间所有 inlet 稳定段属于一个化学周期。
  11. 若化学周期时间长度 > max_hours,则标记为无效周期。
  12. """
  13. def __init__(self, max_hours=60):
  14. self.max_hours = max_hours
  15. def assign_cycles(self, df_unit, stable_segments):
  16. """
  17. df_unit:含 time, event_type
  18. stable_segments:列表,每段有 segment_id, time 等
  19. 返回 cycles: dict(cycle_id -> { segments:[], valid:True/False, start_time/end_time })
  20. """
  21. # 找出所有化学反冲洗的位置
  22. ceb_times = df_unit.loc[df_unit["event_type"] == "bw_chem", "time"].sort_values().to_list()
  23. cycles = {}
  24. if len(ceb_times) < 2:
  25. return cycles
  26. for i in range(len(ceb_times) - 1):
  27. cycle_id = i + 1
  28. t_start = ceb_times[i]
  29. t_end = ceb_times[i + 1]
  30. # 该周期内的稳定 segment
  31. segs = []
  32. for seg in stable_segments:
  33. if seg["time"].iloc[0] >= t_start and seg["time"].iloc[-1] <= t_end:
  34. segs.append(seg)
  35. if len(segs) == 0:
  36. continue
  37. # 判断周期有效性(长度 <= max_hours)
  38. hours = (t_end - t_start).total_seconds() / 3600
  39. valid = hours <= self.max_hours
  40. cycles[cycle_id] = {
  41. "segments": segs,
  42. "start": t_start,
  43. "end": t_end,
  44. "hours": hours,
  45. "valid": valid
  46. }
  47. # 回写到段中
  48. for seg in segs:
  49. seg["chem_cycle_id"] = cycle_id
  50. seg["chem_cycle_valid"] = valid
  51. return cycles
  52. class ShortTermCycleFoulingFitter:
  53. """
  54. 在一个化学周期内:
  55. - 每个 segment 有独立的 R0 和 t
  56. - 所有 segment 共享同一个短期污染速率 νK
  57. """
  58. def __init__(self, unit):
  59. self.unit = unit
  60. @staticmethod
  61. def _is_invalid(x):
  62. return (
  63. x is None
  64. or np.any(pd.isna(x))
  65. or np.any(np.isinf(x))
  66. or np.any(np.abs(x) > 1e20)
  67. )
  68. def fit_cycle(self, segments):
  69. # ============================
  70. # 1. 初步检查
  71. # ============================
  72. if not segments:
  73. return np.nan, np.nan
  74. R_col = f"{self.unit}_R_scaled"
  75. J_col = f"{self.unit}_J"
  76. # ============================
  77. # 2. 预处理每个 segment
  78. # ============================
  79. seg_data = []
  80. for seg in segments:
  81. if not {"time", R_col, J_col}.issubset(seg.columns):
  82. continue
  83. seg = seg.sort_values("time").drop_duplicates(subset=["time"])
  84. if len(seg) < 2:
  85. continue
  86. if not pd.api.types.is_datetime64_any_dtype(seg["time"]):
  87. seg["time"] = pd.to_datetime(seg["time"], errors="coerce")
  88. seg = seg.dropna(subset=["time"])
  89. if len(seg) == 0:
  90. # 该段完全无效,直接跳过
  91. continue
  92. # 局部时间(秒)
  93. try:
  94. t = (seg["time"] - seg["time"].iloc[0]).dt.total_seconds().astype(float)
  95. except Exception:
  96. continue
  97. # 丢弃 t <= 0
  98. mask = t > 0
  99. if mask.sum() < 1:
  100. continue
  101. R = seg.loc[mask, R_col].astype(float).values
  102. J = seg.loc[mask, J_col].astype(float).values
  103. t = t[mask].values
  104. R0 = float(seg[R_col].iloc[0])
  105. # 数值与物理检查
  106. if any(self._is_invalid(x) for x in [R, J, t, R0]):
  107. continue
  108. if np.all(J == 0):
  109. continue
  110. seg_data.append((R, J, t, R0))
  111. if len(seg_data) == 0:
  112. return np.nan, np.nan
  113. # ============================
  114. # 3. 定义 Huber 损失
  115. # ============================
  116. delta = 1.0
  117. def huber_loss(res):
  118. abs_r = np.abs(res)
  119. quad = np.minimum(abs_r, delta)
  120. lin = abs_r - quad
  121. return 0.5 * quad**2 + delta * lin
  122. # ============================
  123. # 4. 定义 cycle 级 loss(共享 νK)
  124. # ============================
  125. def loss(nuk):
  126. nuk = float(nuk[0])
  127. total = 0.0
  128. for R, J, t, R0 in seg_data:
  129. pred = R0 + nuk * J * t
  130. res = R - pred
  131. total += np.sum(huber_loss(res))
  132. return total
  133. # ============================
  134. # 5. 优化求解 νK
  135. # ============================
  136. try:
  137. res = minimize(
  138. loss,
  139. x0=[1e-6],
  140. bounds=[(0.0, None)],
  141. options={"maxiter": 500, "disp": False}
  142. )
  143. except Exception:
  144. return np.nan, np.nan
  145. if not res.success:
  146. return np.nan, np.nan
  147. nuk_opt = float(res.x[0])
  148. # ============================
  149. # 6. 计算整体 R²(跨 segment)
  150. # ============================
  151. R_all = []
  152. pred_all = []
  153. for R, J, t, R0 in seg_data:
  154. pred = R0 + nuk_opt * J * t
  155. if self._is_invalid(pred):
  156. continue
  157. R_all.append(R)
  158. pred_all.append(pred)
  159. if not R_all:
  160. return nuk_opt, np.nan
  161. R_all = np.concatenate(R_all)
  162. pred_all = np.concatenate(pred_all)
  163. try:
  164. r2 = r2_score(R_all, pred_all)
  165. except Exception:
  166. r2 = np.nan
  167. return nuk_opt, r2
  168. class LongTermFoulingFitter:
  169. """
  170. 长期 fouling 拟合:
  171. - 使用每个 segment 起点的“稳健不可逆阻力估计”
  172. - 拟合不可逆阻力随 cycle 内累计时间的增长
  173. """
  174. def __init__(self, unit, max_hours=60.0):
  175. self.unit = unit
  176. self.max_hours = max_hours
  177. @staticmethod
  178. def _power_law(t, a, b, R0):
  179. return R0 + a * np.power(t, b)
  180. @staticmethod
  181. def _is_invalid(x):
  182. return (
  183. x is None
  184. or np.any(pd.isna(x))
  185. or np.any(np.isinf(x))
  186. or np.any(np.abs(x) > 1e20)
  187. )
  188. def _robust_segment_R0(self, seg):
  189. """
  190. 计算单个 segment 的稳健不可逆阻力:
  191. 1. 优先使用 post_bw_inlet == True 的行
  192. 2. 否则使用前 10 行
  193. """
  194. R_col = "R_scaled_start"
  195. if "post_bw_inlet" in seg.columns:
  196. mask = seg["post_bw_inlet"] == True
  197. if mask.any():
  198. R_vals = seg.loc[mask, R_col].astype(float).values
  199. if not self._is_invalid(R_vals):
  200. return float(np.mean(R_vals))
  201. # fallback:前 10 行
  202. R_vals = seg[R_col].iloc[:10].astype(float).values
  203. if not self._is_invalid(R_vals):
  204. return float(np.mean(R_vals))
  205. return None
  206. def fit_cycle(self, segments):
  207. # ============================
  208. # 1. 空检查
  209. # ============================
  210. if not segments:
  211. return np.nan, np.nan, np.nan
  212. T_list = []
  213. R_list = []
  214. # cycle 起点
  215. try:
  216. cycle_start = min(seg["time"].iloc[0] for seg in segments)
  217. except Exception:
  218. return np.nan, np.nan, np.nan
  219. R0_cycle = None
  220. for seg in segments:
  221. if "time" not in seg.columns or "R_scaled_start" not in seg.columns:
  222. continue
  223. seg = seg.sort_values("time")
  224. if len(seg) < 1:
  225. continue
  226. t0 = seg["time"].iloc[0]
  227. # ===== 稳健 R0 =====
  228. R0 = self._robust_segment_R0(seg)
  229. if R0 is None:
  230. continue
  231. # cycle 起点 R0
  232. if t0 == cycle_start:
  233. R0_cycle = R0
  234. # 累计时间(小时)
  235. T = (t0 - cycle_start).total_seconds() / 3600.0
  236. if T < 0 or T > self.max_hours:
  237. continue
  238. if self._is_invalid(T) or self._is_invalid(R0):
  239. continue
  240. T_list.append(T)
  241. R_list.append(R0)
  242. # 至少 3 个点
  243. if len(T_list) < 3 or R0_cycle is None:
  244. return np.nan, np.nan, np.nan
  245. T = np.asarray(T_list, dtype=float)
  246. R = np.asarray(R_list, dtype=float)
  247. if self._is_invalid(T) or self._is_invalid(R):
  248. return np.nan, np.nan, np.nan
  249. # ============================
  250. # 2. 幂律拟合(b ∈ [0, 3])
  251. # ============================
  252. try:
  253. popt, _ = curve_fit(
  254. lambda tt, a, b: self._power_law(tt, a, b, R0_cycle),
  255. T,
  256. R,
  257. p0=(0.1, 1.0),
  258. bounds=([0.0, 0.0], [np.inf, 3.0]),
  259. maxfev=8000
  260. )
  261. a, b = popt
  262. except Exception:
  263. return np.nan, np.nan, np.nan
  264. if self._is_invalid(a) or self._is_invalid(b):
  265. return np.nan, np.nan, np.nan
  266. # ============================
  267. # 3. 拟合质量
  268. # ============================
  269. pred = self._power_law(T, a, b, R0_cycle)
  270. if self._is_invalid(pred):
  271. return np.nan, np.nan, np.nan
  272. try:
  273. r2 = r2_score(R, pred)
  274. except Exception:
  275. r2 = np.nan
  276. return a, b, r2
  277. class ChemicalBackwashCleaner:
  278. """
  279. 根据有效化学周期计算 CEB 去除膜阻力:
  280. ΔR = 上周期末 R_end - 下周期初 R_start
  281. """
  282. def __init__(self, unit):
  283. self.unit = unit
  284. def compute_removal(self, cycles):
  285. ids = sorted(cycles.keys())
  286. for i in range(len(ids) - 1):
  287. cid1 = ids[i]
  288. cid2 = ids[i + 1]
  289. c1 = cycles[cid1]
  290. c2 = cycles[cid2]
  291. if not (c1["valid"] and c2["valid"]):
  292. c1["R_removed"] = None
  293. continue
  294. # 上周期末:最后段的 R_end
  295. seg_last = c1["segments"][-1]
  296. R_end = seg_last[f"R_scaled_end"]
  297. # 下周期初:第一段的 R_start
  298. seg_first = c2["segments"][0]
  299. R_start = seg_first[f"R_scaled_start"]
  300. c1["R_removed"] = R_end - R_start
  301. # 最后一个周期无 CEB 去除值
  302. if ids:
  303. cycles[ids[-1]]["R_removed"] = None
  304. return cycles