run_this.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380
  1. import pandas as pd
  2. import numpy as np
  3. import os
  4. from datetime import datetime
  5. from sklearn.linear_model import LinearRegression
  6. # ============================================================
  7. # 路径配置
  8. # ============================================================
  9. BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
  10. FEATURE_PATH = f"{BASE_DIR}/use_data/test_cycle_sample.csv"
  11. HIST_PATHS = {
  12. "DPT_1": f"{BASE_DIR}/use_data/cycle_rv_seg1_results.csv",
  13. "DPT_2": f"{BASE_DIR}/use_data/cycle_rv_seg2_results.csv"
  14. }
  15. # ============================================================
  16. # 粘度模型(温度修正)
  17. # ============================================================
  18. def xishan_viscosity(T):
  19. """
  20. 根据温度计算水的动力粘度(Pa·s)
  21. """
  22. x = (T + 273.15) / 300
  23. factor = 890 / (
  24. 280.68 * x ** -1.9 +
  25. 511.45 * x ** -7.7 +
  26. 61.131 * x ** -19.6 +
  27. 0.45903 * x ** -40
  28. )
  29. return 0.00089 / factor
  30. # ============================================================
  31. # 字段映射(适配不同机组)
  32. # ============================================================
  33. def get_unit_cols(unit, dpt_type="DPT_1"):
  34. dpt_map = {
  35. "DPT_1": "C.M.{}_DB@DPT_1",
  36. "DPT_2": "C.M.{}_DB@DPT_2"
  37. }
  38. return {
  39. "flux": f"{unit}_FluxF",
  40. "dpt": dpt_map[dpt_type].format(unit)
  41. }
  42. # ============================================================
  43. # 膜阻力计算
  44. # ============================================================
  45. def compute_resistance(df, unit, dpt_type="DPT_1"):
  46. cols = get_unit_cols(unit, dpt_type)
  47. flux = df[cols["flux"]]
  48. dpt = df[cols["dpt"]]
  49. mu = xishan_viscosity(df["C.M.RO_TT_ZJS@out"])
  50. df["R1"] = dpt * 3.6e12 / (mu * flux)
  51. return df
  52. # ============================================================
  53. # 累积产水量计算
  54. # ============================================================
  55. def compute_V(df, unit, dpt_type="DPT_1"):
  56. cols = get_unit_cols(unit, dpt_type)
  57. flux = df[cols["flux"]].values
  58. dt = 300
  59. J = flux * (1e-3 / 3600)
  60. V = np.zeros(len(df))
  61. for i in range(1, len(df)):
  62. V[i] = V[i - 1] + J[i - 1] * dt
  63. df["V"] = V
  64. return df
  65. # ============================================================
  66. # 当前周期拟合(幂律模型)
  67. # ============================================================
  68. def fit_model(df, R_col):
  69. """
  70. 拟合 R = a * V^b
  71. """
  72. df = df.dropna(subset=[R_col, "V"])
  73. df = df[(df[R_col] > 0) & (df["V"] > 0)]
  74. if len(df) < 10:
  75. return None
  76. V = df["V"].values
  77. R = df[R_col].values / 1e12
  78. X = np.log(V + 1e-9).reshape(-1, 1)
  79. y = np.log(R + 1e-9)
  80. model = LinearRegression()
  81. model.fit(X, y)
  82. return model
  83. # ============================================================
  84. # 读取历史周期库
  85. # ============================================================
  86. def load_history(dpt_type="DPT_1"):
  87. """
  88. 根据压差类型加载对应历史库
  89. """
  90. return pd.read_csv(HIST_PATHS[dpt_type])
  91. # ============================================================
  92. # 当前周期特征提取
  93. # ============================================================
  94. def extract_features(df, unit, dpt_type="DPT_1"):
  95. cols = get_unit_cols(unit, dpt_type)
  96. return {
  97. "flux_mean": df[cols["flux"]].mean(),
  98. "cond_mean": df["C.M.RO_Cond_ZJS@out"].mean(),
  99. "ph_mean": df["C.M.RO_PH_ZJS@out"].mean(),
  100. "orp_mean": df["C.M.RO_ORP_ZJS@out"].mean(),
  101. "temp_mean": df["C.M.RO_TT_ZJS@out"].mean()
  102. }
  103. # ============================================================
  104. # 历史周期匹配(KNN + 加权)
  105. # ============================================================
  106. def match_history(hist_df, feat, unit, k=5):
  107. """
  108. 在历史周期中寻找最相似的 k 个周期
  109. 并加权得到 a_hist, b_hist
  110. """
  111. df = hist_df[hist_df["unit"] == unit].copy()
  112. feature_cols = ["flux_mean", "cond_mean", "ph_mean", "orp_mean", "temp_mean"]
  113. X_hist = df[feature_cols].values
  114. x_now = np.array([feat[c] for c in feature_cols]).reshape(1, -1)
  115. # 标准化
  116. mean = X_hist.mean(axis=0)
  117. std = X_hist.std(axis=0) + 1e-9
  118. Xn = (X_hist - mean) / std
  119. xn = (x_now - mean) / std
  120. # 欧氏距离
  121. dist = np.linalg.norm(Xn - xn, axis=1)
  122. df["dist"] = dist
  123. df = df.sort_values("dist").head(k)
  124. # 权重
  125. w = 1 / (df["dist"].values + 1e-6)
  126. w = w / w.sum()
  127. a_hist = np.sum(w * df["a"].values)
  128. b_hist = np.sum(w * df["b"].values)
  129. return a_hist, b_hist
  130. # ============================================================
  131. # 根据 a, b 解析求解 V_limit
  132. # ============================================================
  133. def solve_v_limit_ab(a, b, R_limit):
  134. """
  135. 解 R_limit = a * V^b
  136. """
  137. if abs(b) < 1e-12:
  138. return np.inf
  139. V_limit = (R_limit / 1e12 / a) ** (1 / b)
  140. return V_limit
  141. # ============================================================
  142. # 单机组处理(核心函数)
  143. # ============================================================
  144. def process_unit(unit, df_new, TMP_limit=0.23, dpt_type="DPT_1"):
  145. df = df_new.copy()
  146. # =========================
  147. # 计算 R 和 V
  148. # =========================
  149. df = compute_resistance(df, unit, dpt_type)
  150. df = compute_V(df, unit, dpt_type)
  151. # =========================
  152. # 当前周期时长(天)
  153. # =========================
  154. days = max(
  155. (df["time"].max() - df["time"].min()).total_seconds() / 86400,
  156. 1e-6
  157. )
  158. # =========================
  159. # 当前状态
  160. # =========================
  161. mu_avg = xishan_viscosity(df["C.M.RO_TT_ZJS@out"]).mean()
  162. cols = get_unit_cols(unit, dpt_type)
  163. flux_avg = df[cols["flux"]].mean()
  164. V_now = df["V"].iloc[-1]
  165. R_now = df["R1"].iloc[-1]
  166. V_daily = V_now / days
  167. # =========================
  168. # 当前周期拟合
  169. # =========================
  170. model = fit_model(df, "R1")
  171. if model is None:
  172. return None
  173. a_cur = np.exp(model.intercept_)
  174. b_cur = model.coef_[0]
  175. # =========================
  176. # 历史参数(仅用于修正)
  177. # =========================
  178. hist_df = load_history(dpt_type)
  179. feat = extract_features(df, unit, dpt_type)
  180. a_hist, b_hist= match_history(hist_df, feat, unit)
  181. # =========================
  182. # 时间权重(只依赖周期长度)
  183. # 60天后完全关闭历史
  184. # =========================
  185. w = max(0.0, 1.0 - days / 60.0)
  186. w = w ** 2 # 加速衰减(关键)
  187. # =========================
  188. # 数值保护
  189. # =========================
  190. a_cur_safe = max(a_cur, 1e-12)
  191. b_cur_safe = np.sign(b_cur) * max(abs(b_cur), 1e-6)
  192. # =========================
  193. # a 修正(对数空间,更稳定)
  194. # 最大允许 ±20%
  195. # =========================
  196. log_a_cur = np.log(a_cur_safe)
  197. log_a_hist = np.log(max(a_hist, 1e-12))
  198. delta_log_a = w * (log_a_hist - log_a_cur)
  199. delta_log_a = np.clip(delta_log_a, -0.2, 0.2)
  200. a_final = np.exp(log_a_cur + delta_log_a)
  201. # =========================
  202. # b 修正(比例修正)
  203. # 最大允许 ±15%(更严格)
  204. # =========================
  205. delta_b = w * (b_hist - b_cur) / (abs(b_cur) + 1e-6)
  206. delta_b = np.clip(delta_b, -0.15, 0.15)
  207. b_final = b_cur * (1 + delta_b)
  208. # =========================
  209. # 物理约束(防止异常)
  210. # =========================
  211. b_final = np.clip(b_final, 0.01, 3.0)
  212. # =========================
  213. # TMP → R_limit
  214. # =========================
  215. R_limit = TMP_limit * 3.6e12 / (mu_avg * flux_avg)
  216. # =========================
  217. # 求解 V_limit
  218. # =========================
  219. V_limit = solve_v_limit_ab(a_final, b_final, R_limit)
  220. # 数值安全限制
  221. if (not np.isfinite(V_limit)) or (V_limit <= 0):
  222. V_limit = V_now
  223. V_limit = max(V_limit, V_now)
  224. # 剩余时间计算
  225. remaining_days = (V_limit - V_now) / max(V_daily, 1e-9)
  226. # 异常检测
  227. if remaining_days <= 0 or not np.isfinite(remaining_days):
  228. cols = get_unit_cols(unit, dpt_type)
  229. return {
  230. "unit": unit,
  231. "status": "ERROR",
  232. "error_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
  233. "error_var": cols["dpt"], # 当前使用的压差变量
  234. "V_now": V_now,
  235. "V_limit": V_limit,
  236. "a_final": a_final,
  237. "b_final": b_final
  238. }
  239. # 正常情况
  240. remaining_days = min(remaining_days, 90 - days)
  241. return {
  242. "unit": unit,
  243. "R_now": R_now,
  244. "V_now": V_now,
  245. "R_limit": R_limit,
  246. "V_limit": V_limit,
  247. "remaining_days": remaining_days,
  248. "a_final": a_final,
  249. "b_final": b_final,
  250. }
  251. # ============================================================
  252. # 主函数
  253. # ============================================================
  254. def main(units, TMP_limit=0.21, dpt_type="DPT_1"):
  255. df_new = pd.read_csv(FEATURE_PATH)
  256. df_new["time"] = pd.to_datetime(df_new["time"])
  257. results = []
  258. for u in units:
  259. res = process_unit(u, df_new, TMP_limit, dpt_type)
  260. if res is None:
  261. results.append({
  262. "unit": u,
  263. "status": "ERROR",
  264. "error_time": None,
  265. "error_var": None,
  266. "error_type": "MODEL_FIT_FAILED"
  267. })
  268. continue
  269. if "status" in res and res["status"] == "ERROR":
  270. results.append(res)
  271. else:
  272. res["status"] = "OK"
  273. results.append(res)
  274. return pd.DataFrame(results)
  275. # ============================================================
  276. # 运行入口
  277. # ============================================================
  278. if __name__ == "__main__":
  279. units_to_run = ["RO4"]
  280. out = main(["RO4"], TMP_limit=0.19, dpt_type="DPT_2")
  281. # 防止空结果
  282. if out is None or len(out) == 0:
  283. print("无有效结果")
  284. else:
  285. row = out.iloc[0]
  286. if row["status"] == "ERROR":
  287. print("系统异常:")
  288. print(f"时间: {row['error_time']}")
  289. print(f"变量: {row['error_var']}")
  290. else:
  291. print(f"该机组预计下次CIP为:{row['remaining_days']:.2f} 天后")