import pandas as pd import numpy as np import os from datetime import datetime from sklearn.linear_model import LinearRegression # ============================================================ # 路径配置 # ============================================================ BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) FEATURE_PATH = f"{BASE_DIR}/use_data/test_cycle_sample.csv" HIST_PATHS = { "DPT_1": f"{BASE_DIR}/use_data/cycle_rv_seg1_results.csv", "DPT_2": f"{BASE_DIR}/use_data/cycle_rv_seg2_results.csv" } # ============================================================ # 粘度模型(温度修正) # ============================================================ def xishan_viscosity(T): """ 根据温度计算水的动力粘度(Pa·s) """ x = (T + 273.15) / 300 factor = 890 / ( 280.68 * x ** -1.9 + 511.45 * x ** -7.7 + 61.131 * x ** -19.6 + 0.45903 * x ** -40 ) return 0.00089 / factor # ============================================================ # 字段映射(适配不同机组) # ============================================================ def get_unit_cols(unit, dpt_type="DPT_1"): dpt_map = { "DPT_1": "C.M.{}_DB@DPT_1", "DPT_2": "C.M.{}_DB@DPT_2" } return { "flux": f"{unit}_FluxF", "dpt": dpt_map[dpt_type].format(unit) } # ============================================================ # 膜阻力计算 # ============================================================ def compute_resistance(df, unit, dpt_type="DPT_1"): cols = get_unit_cols(unit, dpt_type) flux = df[cols["flux"]] dpt = df[cols["dpt"]] mu = xishan_viscosity(df["C.M.RO_TT_ZJS@out"]) df["R1"] = dpt * 3.6e12 / (mu * flux) return df # ============================================================ # 累积产水量计算 # ============================================================ def compute_V(df, unit, dpt_type="DPT_1"): cols = get_unit_cols(unit, dpt_type) flux = df[cols["flux"]].values dt = 300 J = flux * (1e-3 / 3600) V = np.zeros(len(df)) for i in range(1, len(df)): V[i] = V[i - 1] + J[i - 1] * dt df["V"] = V return df # ============================================================ # 当前周期拟合(幂律模型) # ============================================================ def fit_model(df, R_col): """ 拟合 R = a * V^b """ df = df.dropna(subset=[R_col, "V"]) df = df[(df[R_col] > 0) & (df["V"] > 0)] if len(df) < 10: return None V = df["V"].values R = df[R_col].values / 1e12 X = np.log(V + 1e-9).reshape(-1, 1) y = np.log(R + 1e-9) model = LinearRegression() model.fit(X, y) return model # ============================================================ # 读取历史周期库 # ============================================================ def load_history(dpt_type="DPT_1"): """ 根据压差类型加载对应历史库 """ return pd.read_csv(HIST_PATHS[dpt_type]) # ============================================================ # 当前周期特征提取 # ============================================================ def extract_features(df, unit, dpt_type="DPT_1"): cols = get_unit_cols(unit, dpt_type) return { "flux_mean": df[cols["flux"]].mean(), "cond_mean": df["C.M.RO_Cond_ZJS@out"].mean(), "ph_mean": df["C.M.RO_PH_ZJS@out"].mean(), "orp_mean": df["C.M.RO_ORP_ZJS@out"].mean(), "temp_mean": df["C.M.RO_TT_ZJS@out"].mean() } # ============================================================ # 历史周期匹配(KNN + 加权) # ============================================================ def match_history(hist_df, feat, unit, k=5): """ 在历史周期中寻找最相似的 k 个周期 并加权得到 a_hist, b_hist """ df = hist_df[hist_df["unit"] == unit].copy() feature_cols = ["flux_mean", "cond_mean", "ph_mean", "orp_mean", "temp_mean"] X_hist = df[feature_cols].values x_now = np.array([feat[c] for c in feature_cols]).reshape(1, -1) # 标准化 mean = X_hist.mean(axis=0) std = X_hist.std(axis=0) + 1e-9 Xn = (X_hist - mean) / std xn = (x_now - mean) / std # 欧氏距离 dist = np.linalg.norm(Xn - xn, axis=1) df["dist"] = dist df = df.sort_values("dist").head(k) # 权重 w = 1 / (df["dist"].values + 1e-6) w = w / w.sum() a_hist = np.sum(w * df["a"].values) b_hist = np.sum(w * df["b"].values) return a_hist, b_hist # ============================================================ # 根据 a, b 解析求解 V_limit # ============================================================ def solve_v_limit_ab(a, b, R_limit): """ 解 R_limit = a * V^b """ if abs(b) < 1e-12: return np.inf V_limit = (R_limit / 1e12 / a) ** (1 / b) return V_limit # ============================================================ # 单机组处理(核心函数) # ============================================================ def process_unit(unit, df_new, TMP_limit=0.23, dpt_type="DPT_1"): df = df_new.copy() # ========================= # 计算 R 和 V # ========================= df = compute_resistance(df, unit, dpt_type) df = compute_V(df, unit, dpt_type) # ========================= # 当前周期时长(天) # ========================= days = max( (df["time"].max() - df["time"].min()).total_seconds() / 86400, 1e-6 ) # ========================= # 当前状态 # ========================= mu_avg = xishan_viscosity(df["C.M.RO_TT_ZJS@out"]).mean() cols = get_unit_cols(unit, dpt_type) flux_avg = df[cols["flux"]].mean() V_now = df["V"].iloc[-1] R_now = df["R1"].iloc[-1] V_daily = V_now / days # ========================= # 当前周期拟合 # ========================= model = fit_model(df, "R1") if model is None: return None a_cur = np.exp(model.intercept_) b_cur = model.coef_[0] # ========================= # 历史参数(仅用于修正) # ========================= hist_df = load_history(dpt_type) feat = extract_features(df, unit, dpt_type) a_hist, b_hist= match_history(hist_df, feat, unit) # ========================= # 时间权重(只依赖周期长度) # 60天后完全关闭历史 # ========================= w = max(0.0, 1.0 - days / 60.0) w = w ** 2 # 加速衰减(关键) # ========================= # 数值保护 # ========================= a_cur_safe = max(a_cur, 1e-12) b_cur_safe = np.sign(b_cur) * max(abs(b_cur), 1e-6) # ========================= # a 修正(对数空间,更稳定) # 最大允许 ±20% # ========================= log_a_cur = np.log(a_cur_safe) log_a_hist = np.log(max(a_hist, 1e-12)) delta_log_a = w * (log_a_hist - log_a_cur) delta_log_a = np.clip(delta_log_a, -0.2, 0.2) a_final = np.exp(log_a_cur + delta_log_a) # ========================= # b 修正(比例修正) # 最大允许 ±15%(更严格) # ========================= delta_b = w * (b_hist - b_cur) / (abs(b_cur) + 1e-6) delta_b = np.clip(delta_b, -0.15, 0.15) b_final = b_cur * (1 + delta_b) # ========================= # 物理约束(防止异常) # ========================= b_final = np.clip(b_final, 0.01, 3.0) # ========================= # TMP → R_limit # ========================= R_limit = TMP_limit * 3.6e12 / (mu_avg * flux_avg) # ========================= # 求解 V_limit # ========================= V_limit = solve_v_limit_ab(a_final, b_final, R_limit) # 数值安全限制 if (not np.isfinite(V_limit)) or (V_limit <= 0): V_limit = V_now V_limit = max(V_limit, V_now) # 剩余时间计算 remaining_days = (V_limit - V_now) / max(V_daily, 1e-9) # 异常检测 if remaining_days <= 0 or not np.isfinite(remaining_days): cols = get_unit_cols(unit, dpt_type) return { "unit": unit, "status": "ERROR", "error_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "error_var": cols["dpt"], # 当前使用的压差变量 "V_now": V_now, "V_limit": V_limit, "a_final": a_final, "b_final": b_final } # 正常情况 remaining_days = min(remaining_days, 90 - days) return { "unit": unit, "R_now": R_now, "V_now": V_now, "R_limit": R_limit, "V_limit": V_limit, "remaining_days": remaining_days, "a_final": a_final, "b_final": b_final, } # ============================================================ # 主函数 # ============================================================ def main(units, TMP_limit=0.21, dpt_type="DPT_1"): df_new = pd.read_csv(FEATURE_PATH) df_new["time"] = pd.to_datetime(df_new["time"]) results = [] for u in units: res = process_unit(u, df_new, TMP_limit, dpt_type) if res is None: results.append({ "unit": u, "status": "ERROR", "error_time": None, "error_var": None, "error_type": "MODEL_FIT_FAILED" }) continue if "status" in res and res["status"] == "ERROR": results.append(res) else: res["status"] = "OK" results.append(res) return pd.DataFrame(results) # ============================================================ # 运行入口 # ============================================================ if __name__ == "__main__": units_to_run = ["RO4"] out = main(["RO4"], TMP_limit=0.19, dpt_type="DPT_2") # 防止空结果 if out is None or len(out) == 0: print("无有效结果") else: row = out.iloc[0] if row["status"] == "ERROR": print("系统异常:") print(f"时间: {row['error_time']}") print(f"变量: {row['error_var']}") else: print(f"该机组预计下次CIP为:{row['remaining_days']:.2f} 天后")