| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380 |
- import pandas as pd
- import numpy as np
- import os
- from datetime import datetime
- from sklearn.linear_model import LinearRegression
- # ============================================================
- # 路径配置
- # ============================================================
- BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
- FEATURE_PATH = f"{BASE_DIR}/use_data/test_cycle_sample.csv"
- HIST_PATHS = {
- "DPT_1": f"{BASE_DIR}/use_data/cycle_rv_seg1_results.csv",
- "DPT_2": f"{BASE_DIR}/use_data/cycle_rv_seg2_results.csv"
- }
- # ============================================================
- # 粘度模型(温度修正)
- # ============================================================
- def xishan_viscosity(T):
- """
- 根据温度计算水的动力粘度(Pa·s)
- """
- x = (T + 273.15) / 300
- factor = 890 / (
- 280.68 * x ** -1.9 +
- 511.45 * x ** -7.7 +
- 61.131 * x ** -19.6 +
- 0.45903 * x ** -40
- )
- return 0.00089 / factor
- # ============================================================
- # 字段映射(适配不同机组)
- # ============================================================
- def get_unit_cols(unit, dpt_type="DPT_1"):
- dpt_map = {
- "DPT_1": "C.M.{}_DB@DPT_1",
- "DPT_2": "C.M.{}_DB@DPT_2"
- }
- return {
- "flux": f"{unit}_FluxF",
- "dpt": dpt_map[dpt_type].format(unit)
- }
- # ============================================================
- # 膜阻力计算
- # ============================================================
- def compute_resistance(df, unit, dpt_type="DPT_1"):
- cols = get_unit_cols(unit, dpt_type)
- flux = df[cols["flux"]]
- dpt = df[cols["dpt"]]
- mu = xishan_viscosity(df["C.M.RO_TT_ZJS@out"])
- df["R1"] = dpt * 3.6e12 / (mu * flux)
- return df
- # ============================================================
- # 累积产水量计算
- # ============================================================
- def compute_V(df, unit, dpt_type="DPT_1"):
- cols = get_unit_cols(unit, dpt_type)
- flux = df[cols["flux"]].values
- dt = 300
- J = flux * (1e-3 / 3600)
- V = np.zeros(len(df))
- for i in range(1, len(df)):
- V[i] = V[i - 1] + J[i - 1] * dt
- df["V"] = V
- return df
- # ============================================================
- # 当前周期拟合(幂律模型)
- # ============================================================
- def fit_model(df, R_col):
- """
- 拟合 R = a * V^b
- """
- df = df.dropna(subset=[R_col, "V"])
- df = df[(df[R_col] > 0) & (df["V"] > 0)]
- if len(df) < 10:
- return None
- V = df["V"].values
- R = df[R_col].values / 1e12
- X = np.log(V + 1e-9).reshape(-1, 1)
- y = np.log(R + 1e-9)
- model = LinearRegression()
- model.fit(X, y)
- return model
- # ============================================================
- # 读取历史周期库
- # ============================================================
- def load_history(dpt_type="DPT_1"):
- """
- 根据压差类型加载对应历史库
- """
- return pd.read_csv(HIST_PATHS[dpt_type])
- # ============================================================
- # 当前周期特征提取
- # ============================================================
- def extract_features(df, unit, dpt_type="DPT_1"):
- cols = get_unit_cols(unit, dpt_type)
- return {
- "flux_mean": df[cols["flux"]].mean(),
- "cond_mean": df["C.M.RO_Cond_ZJS@out"].mean(),
- "ph_mean": df["C.M.RO_PH_ZJS@out"].mean(),
- "orp_mean": df["C.M.RO_ORP_ZJS@out"].mean(),
- "temp_mean": df["C.M.RO_TT_ZJS@out"].mean()
- }
- # ============================================================
- # 历史周期匹配(KNN + 加权)
- # ============================================================
- def match_history(hist_df, feat, unit, k=5):
- """
- 在历史周期中寻找最相似的 k 个周期
- 并加权得到 a_hist, b_hist
- """
- df = hist_df[hist_df["unit"] == unit].copy()
- feature_cols = ["flux_mean", "cond_mean", "ph_mean", "orp_mean", "temp_mean"]
- X_hist = df[feature_cols].values
- x_now = np.array([feat[c] for c in feature_cols]).reshape(1, -1)
- # 标准化
- mean = X_hist.mean(axis=0)
- std = X_hist.std(axis=0) + 1e-9
- Xn = (X_hist - mean) / std
- xn = (x_now - mean) / std
- # 欧氏距离
- dist = np.linalg.norm(Xn - xn, axis=1)
- df["dist"] = dist
- df = df.sort_values("dist").head(k)
- # 权重
- w = 1 / (df["dist"].values + 1e-6)
- w = w / w.sum()
- a_hist = np.sum(w * df["a"].values)
- b_hist = np.sum(w * df["b"].values)
- return a_hist, b_hist
- # ============================================================
- # 根据 a, b 解析求解 V_limit
- # ============================================================
- def solve_v_limit_ab(a, b, R_limit):
- """
- 解 R_limit = a * V^b
- """
- if abs(b) < 1e-12:
- return np.inf
- V_limit = (R_limit / 1e12 / a) ** (1 / b)
- return V_limit
- # ============================================================
- # 单机组处理(核心函数)
- # ============================================================
- def process_unit(unit, df_new, TMP_limit=0.23, dpt_type="DPT_1"):
- df = df_new.copy()
- # =========================
- # 计算 R 和 V
- # =========================
- df = compute_resistance(df, unit, dpt_type)
- df = compute_V(df, unit, dpt_type)
- # =========================
- # 当前周期时长(天)
- # =========================
- days = max(
- (df["time"].max() - df["time"].min()).total_seconds() / 86400,
- 1e-6
- )
- # =========================
- # 当前状态
- # =========================
- mu_avg = xishan_viscosity(df["C.M.RO_TT_ZJS@out"]).mean()
- cols = get_unit_cols(unit, dpt_type)
- flux_avg = df[cols["flux"]].mean()
- V_now = df["V"].iloc[-1]
- R_now = df["R1"].iloc[-1]
- V_daily = V_now / days
- # =========================
- # 当前周期拟合
- # =========================
- model = fit_model(df, "R1")
- if model is None:
- return None
- a_cur = np.exp(model.intercept_)
- b_cur = model.coef_[0]
- # =========================
- # 历史参数(仅用于修正)
- # =========================
- hist_df = load_history(dpt_type)
- feat = extract_features(df, unit, dpt_type)
- a_hist, b_hist= match_history(hist_df, feat, unit)
- # =========================
- # 时间权重(只依赖周期长度)
- # 60天后完全关闭历史
- # =========================
- w = max(0.0, 1.0 - days / 60.0)
- w = w ** 2 # 加速衰减(关键)
- # =========================
- # 数值保护
- # =========================
- a_cur_safe = max(a_cur, 1e-12)
- b_cur_safe = np.sign(b_cur) * max(abs(b_cur), 1e-6)
- # =========================
- # a 修正(对数空间,更稳定)
- # 最大允许 ±20%
- # =========================
- log_a_cur = np.log(a_cur_safe)
- log_a_hist = np.log(max(a_hist, 1e-12))
- delta_log_a = w * (log_a_hist - log_a_cur)
- delta_log_a = np.clip(delta_log_a, -0.2, 0.2)
- a_final = np.exp(log_a_cur + delta_log_a)
- # =========================
- # b 修正(比例修正)
- # 最大允许 ±15%(更严格)
- # =========================
- delta_b = w * (b_hist - b_cur) / (abs(b_cur) + 1e-6)
- delta_b = np.clip(delta_b, -0.15, 0.15)
- b_final = b_cur * (1 + delta_b)
- # =========================
- # 物理约束(防止异常)
- # =========================
- b_final = np.clip(b_final, 0.01, 3.0)
- # =========================
- # TMP → R_limit
- # =========================
- R_limit = TMP_limit * 3.6e12 / (mu_avg * flux_avg)
- # =========================
- # 求解 V_limit
- # =========================
- V_limit = solve_v_limit_ab(a_final, b_final, R_limit)
- # 数值安全限制
- if (not np.isfinite(V_limit)) or (V_limit <= 0):
- V_limit = V_now
- V_limit = max(V_limit, V_now)
- # 剩余时间计算
- remaining_days = (V_limit - V_now) / max(V_daily, 1e-9)
- # 异常检测
- if remaining_days <= 0 or not np.isfinite(remaining_days):
- cols = get_unit_cols(unit, dpt_type)
- return {
- "unit": unit,
- "status": "ERROR",
- "error_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
- "error_var": cols["dpt"], # 当前使用的压差变量
- "V_now": V_now,
- "V_limit": V_limit,
- "a_final": a_final,
- "b_final": b_final
- }
- # 正常情况
- remaining_days = min(remaining_days, 90 - days)
- return {
- "unit": unit,
- "R_now": R_now,
- "V_now": V_now,
- "R_limit": R_limit,
- "V_limit": V_limit,
- "remaining_days": remaining_days,
- "a_final": a_final,
- "b_final": b_final,
- }
- # ============================================================
- # 主函数
- # ============================================================
- def main(units, TMP_limit=0.21, dpt_type="DPT_1"):
- df_new = pd.read_csv(FEATURE_PATH)
- df_new["time"] = pd.to_datetime(df_new["time"])
- results = []
- for u in units:
- res = process_unit(u, df_new, TMP_limit, dpt_type)
- if res is None:
- results.append({
- "unit": u,
- "status": "ERROR",
- "error_time": None,
- "error_var": None,
- "error_type": "MODEL_FIT_FAILED"
- })
- continue
- if "status" in res and res["status"] == "ERROR":
- results.append(res)
- else:
- res["status"] = "OK"
- results.append(res)
- return pd.DataFrame(results)
- # ============================================================
- # 运行入口
- # ============================================================
- if __name__ == "__main__":
- units_to_run = ["RO4"]
- out = main(["RO4"], TMP_limit=0.19, dpt_type="DPT_2")
- # 防止空结果
- if out is None or len(out) == 0:
- print("无有效结果")
- else:
- row = out.iloc[0]
- if row["status"] == "ERROR":
- print("系统异常:")
- print(f"时间: {row['error_time']}")
- print(f"变量: {row['error_var']}")
- else:
- print(f"该机组预计下次CIP为:{row['remaining_days']:.2f} 天后")
|