Ver código fonte

feat: 完成模型训练并上传代码

- 上传训练脚本及相关依赖代码
junc. 5 meses atrás
pai
commit
48850ed4c8

+ 0 - 246
models/uf-rl/超滤训练源码/DQN_decide.py

@@ -1,246 +0,0 @@
-import numpy as np
-from stable_baselines3 import DQN
-from UF_super_RL.DQN_env import UFSuperCycleEnv
-from UF_super_RL.DQN_env import UFParams
-
-# 模型路径
-MODEL_PATH = "dqn_model.zip"
-
-# 加载模型(只加载一次,提高效率)
-model = DQN.load(MODEL_PATH)
-
-def run_uf_DQN_decide(uf_params, TMP0_value: float):
-    """
-    单步决策函数:输入原始 TMP0,预测并执行动作
-
-    参数:
-        TMP0_value (float): 当前 TMP0 值(单位与环境一致)
-
-    返回:
-        dict: 包含模型选择的动作、动作参数、新状态、奖励等
-    """
-    # 1. 实例化环境
-    base_params = uf_params
-    env = UFSuperCycleEnv(base_params)
-
-    # 2. 将输入的 TMP0 写入环境
-    env.current_params.TMP0 = TMP0_value
-
-    # 3. 获取归一化状态
-    obs = env._get_obs().reshape(1, -1)
-
-    # 4. 模型预测动作
-    action, _ = model.predict(obs, deterministic=True)
-
-    # 5. 解析动作对应的 L_s 和 t_bw_s
-    L_s, t_bw_s = env._get_action_values(action[0])
-
-    # 6. 在环境中执行该动作
-    next_obs, reward, terminated, truncated, info = env.step(action[0])
-
-    # 7. 整理结果
-    result = {
-        "action": int(action[0]),
-        "L_s": float(L_s),
-        "t_bw_s": float(t_bw_s),
-        "next_obs": next_obs,
-        "reward": reward,
-        "terminated": terminated,
-        "truncated": truncated,
-        "info": info
-    }
-
-    # 8. 关闭环境
-    env.close()
-
-    return result
-
-def generate_plc_instructions(current_L_s, current_t_bw_s, model_prev_L_s, model_prev_t_bw_s, model_L_s, model_t_bw_s):
-    """
-    根据工厂当前值、模型上一轮决策值和模型当前轮决策值,生成PLC指令。
-
-    新增功能:
-    1. 处理None值情况:如果模型上一轮值为None,则使用工厂当前值;
-       如果工厂当前值也为None,则返回None并提示错误。
-    """
-    # 参数配置保持不变
-    params = UFParams(
-        L_min_s=3600.0, L_max_s=6000.0, L_step_s=60.0,
-        t_bw_min_s=40.0, t_bw_max_s=60.0, t_bw_step_s=5.0,
-    )
-
-    # 参数解包
-    L_step_s = params.L_step_s
-    t_bw_step_s = params.t_bw_step_s
-    L_min_s = params.L_min_s
-    L_max_s = params.L_max_s
-    t_bw_min_s = params.t_bw_min_s
-    t_bw_max_s = params.t_bw_max_s
-    adjustment_threshold = 1.0
-
-    # 处理None值情况
-    if model_prev_L_s is None:
-        if current_L_s is None:
-            print("错误: 过滤时长的工厂当前值和模型上一轮值均为None")
-            return None, None
-        else:
-            # 使用工厂当前值作为基准
-            effective_current_L = current_L_s
-            source_L = "工厂当前值(模型上一轮值为None)"
-    else:
-        # 模型上一轮值不为None,继续检查工厂当前值
-        if current_L_s is None:
-            effective_current_L = model_prev_L_s
-            source_L = "模型上一轮值(工厂当前值为None)"
-        else:
-            effective_current_L = model_prev_L_s
-            source_L = "模型上一轮值"
-
-    # 对反洗时长进行同样的处理
-    if model_prev_t_bw_s is None:
-        if current_t_bw_s is None:
-            print("错误: 反洗时长的工厂当前值和模型上一轮值均为None")
-            return None, None
-        else:
-            effective_current_t_bw = current_t_bw_s
-            source_t_bw = "工厂当前值(模型上一轮值为None)"
-    else:
-        if current_t_bw_s is None:
-            effective_current_t_bw = model_prev_t_bw_s
-            source_t_bw = "模型上一轮值(工厂当前值为None)"
-        else:
-            effective_current_t_bw = model_prev_t_bw_s
-            source_t_bw = "模型上一轮值"
-
-    # 检测所有输入值是否在规定范围内(只对非None值进行检查)
-    # 工厂当前值检查(警告)
-    if current_L_s is not None and not (L_min_s <= current_L_s <= L_max_s):
-        print(f"警告: 当前过滤时长 {current_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
-    if current_t_bw_s is not None and not (t_bw_min_s <= current_t_bw_s <= t_bw_max_s):
-        print(f"警告: 当前反洗时长 {current_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
-
-    # 模型上一轮决策值检查(警告)
-    if model_prev_L_s is not None and not (L_min_s <= model_prev_L_s <= L_max_s):
-        print(f"警告: 模型上一轮过滤时长 {model_prev_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
-    if model_prev_t_bw_s is not None and not (t_bw_min_s <= model_prev_t_bw_s <= t_bw_max_s):
-        print(f"警告: 模型上一轮反洗时长 {model_prev_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
-
-    # 模型当前轮决策值检查(错误)
-    if model_L_s is None:
-        raise ValueError("错误: 决策模型建议的过滤时长不能为None")
-    elif not (L_min_s <= model_L_s <= L_max_s):
-        raise ValueError(f"错误: 决策模型建议的过滤时长 {model_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
-
-    if model_t_bw_s is None:
-        raise ValueError("错误: 决策模型建议的反洗时长不能为None")
-    elif not (t_bw_min_s <= model_t_bw_s <= t_bw_max_s):
-        raise ValueError(f"错误: 决策模型建议的反洗时长 {model_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
-
-    print(f"过滤时长基准: {source_L}, 值: {effective_current_L}")
-    print(f"反洗时长基准: {source_t_bw}, 值: {effective_current_t_bw}")
-
-    # 使用选定的基准值进行计算调整
-    L_diff = model_L_s - effective_current_L
-    L_adjustment = 0
-    if abs(L_diff) >= adjustment_threshold * L_step_s:
-        if L_diff >= 0:
-            L_adjustment = L_step_s
-        else:
-            L_adjustment = -L_step_s
-    next_L_s = effective_current_L + L_adjustment
-
-    t_bw_diff = model_t_bw_s - effective_current_t_bw
-    t_bw_adjustment = 0
-    if abs(t_bw_diff) >= adjustment_threshold * t_bw_step_s:
-        if t_bw_diff >= 0:
-            t_bw_adjustment = t_bw_step_s
-        else:
-            t_bw_adjustment = -t_bw_step_s
-    next_t_bw_s = effective_current_t_bw + t_bw_adjustment
-
-    return next_L_s, next_t_bw_s
-
-
-from UF_super_RL.DQN_env import simulate_one_supercycle
-def calc_uf_cycle_metrics(p, TMP0, max_tmp_during_filtration, min_tmp_during_filtration, L_s: float, t_bw_s: float):
-    """
-    计算 UF 超滤系统的核心性能指标
-
-    参数:
-        p (UFParams): UF 系统参数
-        L_s (float): 单次过滤时间(秒)
-        t_bw_s (float): 单次反洗时间(秒)
-
-    返回:
-        dict: {
-            "k_bw_per_ceb": 小周期次数,
-            "ton_water_energy_kWh_per_m3": 吨水电耗,
-            "recovery": 回收率,
-            "net_delivery_rate_m3ph": 净供水率 (m³/h),
-            "daily_prod_time_h": 日均产水时间 (小时/天)
-            "max_permeability": 全周期最高渗透率(lmh/bar)
-        }
-    """
-    # 将跨膜压差写入参数
-    p.TMP0 = TMP0
-
-    # 模拟该参数下的超级周期
-    feasible, info = simulate_one_supercycle(p, L_s, t_bw_s)
-
-    # 获得模型模拟周期信息
-    k_bw_per_ceb = info["k_bw_per_ceb"]
-    ton_water_energy_kWh_per_m3 = info["ton_water_energy_kWh_per_m3"]
-    recovery = info["recovery"]
-    net_delivery_rate_m3ph = info["net_delivery_rate_m3ph"]
-    daily_prod_time_h = info["daily_prod_time_h"]
-
-    # 获得模型模拟周期内最高跨膜压差/最低跨膜压差
-    if max_tmp_during_filtration is None:
-        max_tmp_during_filtration = info["max_TMP_during_filtration"]
-    if min_tmp_during_filtration is None:
-        min_tmp_during_filtration = info["min_TMP_during_filtration"]
-
-    # 计算最高渗透率
-    max_permeability = 100 * p.q_UF / (128*40) / min_tmp_during_filtration
-
-
-    return {
-        "k_bw_per_ceb": k_bw_per_ceb,
-        "ton_water_energy_kWh_per_m3": ton_water_energy_kWh_per_m3,
-        "recovery": recovery,
-        "net_delivery_rate_m3ph": net_delivery_rate_m3ph,
-        "daily_prod_time_h": daily_prod_time_h,
-        "max_permeability": max_permeability
-    }
-
-
-# ==============================
-# 示例调用
-# ==============================
-if __name__ == "__main__":
-    uf_params = UFParams()
-    TMP0 = 0.03 # 原始 TMP0
-    model_decide_result = run_uf_DQN_decide(uf_params, TMP0) # 调用模型获得动作
-    model_L_s = model_decide_result['L_s'] # 获得模型决策产水时长
-    model_t_bw_s = model_decide_result['t_bw_s'] # 获得模型决策反洗时长
-
-    current_L_s = 3800
-    current_t_bw_s = 40
-    model_prev_L_s = 4040
-    model_prev_t_bw_s = 60
-    L_s, t_bw_s = generate_plc_instructions(current_L_s, current_t_bw_s, model_prev_L_s, model_prev_t_bw_s, model_L_s, model_t_bw_s) # 获取模型下发指令
-
-    L_s = 4100
-    t_bw_s = 96
-    max_tmp_during_filtration = 0.050176 # 新增工厂数据接口:周期最高/最低跨膜压差,无工厂数据接入时传入None,calc_uf_cycle_metrics()自动获取模拟周期中的跨膜压差最值
-    min_tmp_during_filtration = 0.012496
-    execution_result = calc_uf_cycle_metrics(uf_params, TMP0, max_tmp_during_filtration, min_tmp_during_filtration, L_s, t_bw_s)
-    print("\n===== 单步决策结果 =====")
-    print(f"模型选择的动作: {model_decide_result['action']}")
-    print(f"模型选择的L_s: {model_L_s} 秒, 模型选择的t_bw_s: {model_t_bw_s} 秒")
-    print(f"指令下发的L_s: {L_s} 秒, 指令下发的t_bw_s: {t_bw_s} 秒")
-    print(f"指令对应的反洗次数: {execution_result['k_bw_per_ceb']}")
-    print(f"指令对应的吨水电耗: {execution_result['ton_water_energy_kWh_per_m3']}")
-    print(f"指令对应的回收率: {execution_result['recovery']}")
-    print(f"指令对应的日均产水时间: {execution_result['daily_prod_time_h']}")
-    print(f"指令对应的最高渗透率: {execution_result['max_permeability']}")

+ 418 - 159
models/uf-rl/超滤训练源码/DQN_env.py

@@ -1,44 +1,58 @@
 import os
-import time
-import random
+import torch
+from pathlib import Path
 import numpy as np
 import gymnasium as gym
 from gymnasium import spaces
-from stable_baselines3 import DQN
-from stable_baselines3.common.monitor import Monitor
-from stable_baselines3.common.vec_env import DummyVecEnv
-from stable_baselines3.common.callbacks import BaseCallback
 from typing import Dict, Tuple, Optional
 import torch
 import torch.nn as nn
 from dataclasses import dataclass, asdict
-from UF_models import TMPIncreaseModel, TMPDecreaseModel  # 导入模型类
+from UF_resistance_models import ResistanceIncreaseModel, ResistanceDecreaseModel  # 导入模型类
 import copy
 
-
-# ==== 定义膜的基础运行参数 ====
+# =======================
+# 膜运行参数类:定义膜的基础运行参数
+# =======================
 @dataclass
 class UFParams:
-    # —— 膜运行参数 ——
+    # —— 膜动态运行参数 ——
     q_UF: float = 360.0  # 过滤进水流量(m^3/h)
-    TMP0: float = 0.03  # 初始TMP(MPa)
-    TMP_max: float = 0.06  # TMP硬上限(MPa)
-
-    # —— 膜污染动力学 ——
-    alpha: float = 1e-6  # TMP增长系数
-    belta: float = 1.1  # 幂指数
+    TMP0: float = 0.03 # 初始跨膜压差
+    temp: float = 25.0  # 水温,摄氏度
+
+    # —— 膜阻力模型参数 ——
+    nuK: float =4.92e+01 # 过滤阶段膜阻力增长模型参数
+    slope: float = 3.44e-01 # 全周期不可逆污染阻力增长斜率
+    power: float = 1.032 # 全周期不可逆污染阻力增长幂次
+    tau_bw_s: float = 30.0  # 物洗时长影响时间尺度
+    gamma_t: float = 1.0  # 物洗时长作用指数
+    ceb_removal: float = 150  # CEB去除膜阻力
+
+    # —— 膜运行约束参数 ——
+    global_TMP_limit: float = 0.08  # TMP硬上限(MPa)
+    TMP0_max: float = 0.035 # 初始TMP上限(MPa)
+    TMP0_min: float = 0.01 # 初始TMP下限(MPa)
+    q_UF_max: float = 400.0 # 进水流量上限(m^3/h)
+    q_UF_min: float = 250.0 # 进水流量上限(m^3/h)
+    temp_max: float = 40.0 # 温度上限(摄氏度)
+    temp_min: float = 10.0 # 温度下限(摄氏度)
+    nuK_max: float = 6e+01 # 物理周期总阻力增速上限(m^-1/s)
+    nuK_min: float = 3e+01 # 物理周期总阻力增速下限(m^-1/s)
+    slope_max: float = 10 # 化学周期长期阻力增速斜率上限
+    slope_min: float = 0.1 # 化学周期长期阻力增速斜率下限
+    power_max: float = 1.3 # 化学周期长期阻力增速幂次上限
+    power_min: float = 0.8 # 化学周期长期阻力增速幂次下限
+    ceb_removal_max: float = 150 # CEB去除阻力(已缩放)上限(m^-1)
+    ceb_removal_min: float = 100 # CEB去除阻力(已缩放)下限(m^-1)
 
     # —— 反洗参数(固定) ——
     q_bw_m3ph: float = 1000.0  # 物理反洗流量(m^3/h)
 
-    # —— CEB参数(固定) ——
-    T_ceb_interval_h: float = 48.0  # 固定每 k 小时做一次CEB
+    # —— CEB参数 ——
+    T_ceb_interval_h: float = 60.0  # 固定每 k 小时做一次CEB
     v_ceb_m3: float = 30.0  # CEB用水体积(m^3)
     t_ceb_s: float = 40 * 60.0  # CEB时长(s)
-    phi_ceb: float = 1.0  # CEB去除比例(简化:完全恢复到TMP0)
-
-    # —— 约束与收敛 ——
-    dTMP: float = 0.001  # 单次产水结束时,相对TMP0最大升幅(MPa)
 
     # —— 搜索范围(秒) ——
     L_min_s: float = 3800.0  # 过滤时长下限(s)
@@ -46,55 +60,115 @@ class UFParams:
     t_bw_min_s: float = 40.0  # 物洗时长下限(s)
     t_bw_max_s: float = 60.0  # 物洗时长上限(s)
 
-    # —— 物理反洗恢复函数参数 ——
-    phi_bw_min: float = 0.7  # 物洗去除比例最小值
-    phi_bw_max: float = 1.0  # 物洗去除比例最大值
-    L_ref_s: float = 4000.0  # 过滤时长影响时间尺度
-    tau_bw_s: float = 20.0  # 物洗时长影响时间尺度
-    gamma_t: float = 1.0  # 物洗时长作用指数
-
     # —— 网格 ——
     L_step_s: float = 60.0  # 过滤时长步长(s)
     t_bw_step_s: float = 5.0  # 物洗时长步长(s)
 
-    # 多目标加权及高TMP惩罚
-    w_rec: float = 0.8  # 回收率权重
-    w_rate: float = 0.2  # 净供水率权重
-    w_headroom: float = 0.2  # 贴边惩罚权重
-    r_headroom: float = 2.0  # 贴边惩罚幂次
-    headroom_hardcap: float = 0.98  # 超过此比例直接视为不可取
+    # —— 奖励函数参数 ——
+    k_rec = 5.0      # 回收率敏感度
+    k_res = 10.0     # 残余污染敏感度
+    rec_low, rec_high = 0.92, 0.99
+    rr0 = 0.08
 
-# ==== 加载模拟环境模型 ====
-# 初始化模型
-model_fp = TMPIncreaseModel()
-model_bw = TMPDecreaseModel()
 
-# 加载参数
-model_fp.load_state_dict(torch.load("uf_fp.pth"))
-model_bw.load_state_dict(torch.load("uf_bw.pth"))
+# =======================
+# 辅助函数:转换膜阻力与跨膜压差
+# =======================
 
-# 切换到推理模式
-model_fp.eval()
-model_bw.eval()
+def xishan_viscosity(temp):
+    # temp: 水温,单位摄氏度
+    """
+    锡山水厂 PLC水温校正因子经验公式(25摄氏度标准)
+    返回温度修正后的水粘度(纯水修正),TODO:水厂水质与纯水相差较大,对粘度有一定影响
+    """
+    x = (temp + 273.15) / 300
+    factor = 890 / (280.68 * x ** -1.9 + 511.45 * x ** -7.7 + 61.131 * x ** -19.6 + 0.45903 * x ** -40)
+    mu = 0.00089 / factor
+    return mu
+
+def _calculate_resistance(tmp, q_UF, temp):
+    """
+    计算超滤膜阻力 R = TMP / (J * μ)
+    返回缩小1e10的膜阻力(超滤原膜阻力量级为1e12,过大的绝对值容易导致平稳拟合)
+    """
+    A = 128 * 40  # m²,有效膜面积
+    mu = xishan_viscosity(temp) # 温度修正后的水粘度
+    TMP_Pa = tmp * 1e6  # 跨膜压差 MPa -> Pa
+    J = q_UF / A / 3600  # 通量 m³/h -> m³/(m²·s)
+    if J <= 0 or mu <= 0:
+        return np.nan
+    R = TMP_Pa / (J * mu) / 1e10 # 缩放膜阻力
 
+    return float(R)
 
-def _delta_tmp(p, L_h: float) -> float:
+def _calculate_tmp(R, q_UF, temp):
     """
-    过滤时段TMP上升量:调用 uf_fp.pth 模型
+    还原超滤跨膜压差 TMP
     """
-    return model_fp(p, L_h)
+    A = 128 * 40  # m²,有效膜面积
+    mu = xishan_viscosity(temp) # 温度修正后的水粘度
+    J = q_UF / A / 3600  # 通量 m³/h -> m³/(m²·s)
+    TMP_Pa = R * J * mu * 1e10
+    tmp = TMP_Pa / 1e6
+
+    return float(tmp)
+
+
+# =======================
+# 环境体模型加载函数
+# =======================
+def load_resistance_models():
+    """加载阻力变化模型,仅在首次调用时执行"""
+
+    global resistance_model_fp, resistance_model_bw
+
+    # 如果全局模型已存在,则直接返回
+    if "resistance_model_fp" in globals() and resistance_model_fp is not None:
+        return resistance_model_fp, resistance_model_bw
+
+    print("🔄 Loading resistance models...")
+
+    # 初始化模型
+    resistance_model_fp = ResistanceIncreaseModel()
+    resistance_model_bw = ResistanceDecreaseModel()
+
+    # 取得当前脚本所在目录(即 rl_dqn_env.py 或 check_initial_state.py 同目录)
+    base_dir = Path(__file__).resolve().parent
+
+    # 构造模型路径
+    fp_path = base_dir / "resistance_model_fp.pth"
+    bw_path = base_dir / "resistance_model_bw.pth"
+
+    # 检查文件存在性
+    assert fp_path.exists(), f"缺少 {fp_path.name}"
+    assert bw_path.exists(), f"缺少 {bw_path.name}"
 
-def phi_bw_of(p, L_s: float, t_bw_s: float) -> float:
+    # 加载权重
+    resistance_model_fp.load_state_dict(torch.load(fp_path, map_location="cpu"))
+    resistance_model_bw.load_state_dict(torch.load(bw_path, map_location="cpu"))
+
+    # 设置推理模式
+    resistance_model_fp.eval()
+    resistance_model_bw.eval()
+
+    print("✅ Resistance models loaded successfully from current directory.")
+    return resistance_model_fp, resistance_model_bw
+
+
+# =======================
+# 环境体模型模拟函数
+# =======================
+def _delta_resistance(p, L_h: float) -> float:
     """
-    物洗去除比例:调用 uf_bw.pth 模型
+    过滤时段膜阻力上升量:调用 resistance_model_fp.pth 模型
     """
-    return model_bw(p, L_s, t_bw_s)
+    return resistance_model_fp(p, L_h)
 
-def _tmp_after_ceb(p, L_s: float, t_bw_s: float) -> float:
+def phi_bw_of(p, R0: float, R_end: float, L_h_start: float, L_h_next_start: float, t_bw_s: float) -> float:
     """
-    计算化学清洗(CEB)后的TMP,当前为恢复初始跨膜压差
+    物理冲洗去除膜阻力值:调用 resistance_model_bw 模型
     """
-    return p.TMP0
+    return resistance_model_bw(p, R0, R_end, L_h_start, L_h_next_start, t_bw_s)
 
 def _v_bw_m3(p, t_bw_s: float) -> float:
     """
@@ -104,139 +178,183 @@ def _v_bw_m3(p, t_bw_s: float) -> float:
 
 def simulate_one_supercycle(p: UFParams, L_s: float, t_bw_s: float):
     """
-    返回 (是否可行, 指标字典)
-    - 支持动态CEB次数:48h固定间隔
-    - 增加日均产水时间和吨水电耗
-    - 增加最小TMP记录
+    模拟一个超级周期(多次物理反洗 + 一次化学反洗)
+    返回: (info, next_params)
     """
     L_h = float(L_s) / 3600.0  # 小周期过滤时间(h)
 
     tmp = p.TMP0
+    R0 = _calculate_resistance(p.TMP0, p.q_UF, p.temp)
     max_tmp_during_filtration = tmp
-    min_tmp_during_filtration = tmp  # 新增:初始化最小TMP
+    min_tmp_during_filtration = tmp
     max_residual_increase = 0.0
 
-    # 小周期总时长(h)
     t_small_cycle_h = (L_s + t_bw_s) / 3600.0
-
-    # 计算超级周期内CEB次数
     k_bw_per_ceb = int(np.floor(p.T_ceb_interval_h / t_small_cycle_h))
     if k_bw_per_ceb < 1:
-        k_bw_per_ceb = 1  # 至少一个小周期
+        k_bw_per_ceb = 1
 
-    # ton水电耗查表
     energy_lookup = {
         3600: 0.1034, 3660: 0.1031, 3720: 0.1029, 3780: 0.1026,
         3840: 0.1023, 3900: 0.1021, 3960: 0.1019, 4020: 0.1017,
         4080: 0.1015, 4140: 0.1012, 4200: 0.1011
     }
 
-    for _ in range(k_bw_per_ceb):
+    # --- 循环模拟物理反洗 ---
+    for idx in range(k_bw_per_ceb):
         tmp_run_start = tmp
+        q_UF = p.q_UF
+        temp = p.temp
 
-        # 过滤阶段TMP增长
-        dtmp = _delta_tmp(p, L_h)
-        tmp_peak = tmp_run_start + dtmp
-
-        # 约束1:峰值不得超过硬上限
-        if tmp_peak > p.TMP_max + 1e-12:
-            return False, {"reason": "TMP_max violated during filtration", "TMP_peak": tmp_peak}
+        R_run_start = _calculate_resistance(tmp_run_start, q_UF, temp)
+        d_R = _delta_resistance(p, L_s)
+        R_peak = R_run_start + d_R
+        tmp_peak = _calculate_tmp(R_peak, q_UF, temp)
 
-        # 更新最大和最小TMP
-        if tmp_peak > max_tmp_during_filtration:
-            max_tmp_during_filtration = tmp_peak
-        if tmp_run_start < min_tmp_during_filtration:  # 新增:记录运行开始时的最小TMP
-            min_tmp_during_filtration = tmp_run_start
+        max_tmp_during_filtration = max(max_tmp_during_filtration, tmp_peak)
+        min_tmp_during_filtration = min(min_tmp_during_filtration, tmp_run_start)
 
-        # 物理反洗
-        phi = phi_bw_of(p, L_s, t_bw_s)
-        tmp_after_bw = tmp_peak - phi * (tmp_peak - tmp_run_start)
+        # 物洗膜阻力减小
+        L_h_start = (L_s + t_bw_s) / 3600.0 * idx
+        L_h_next_start = (L_s + t_bw_s) / 3600.0 * (idx + 1)
+        reversible_R = phi_bw_of(p, R_run_start, R_peak, L_h_start, L_h_next_start, t_bw_s)
+        R_after_bw = R_peak - reversible_R
+        tmp_after_bw = _calculate_tmp(R_after_bw, q_UF, temp)
 
-        # 约束2:单次残余增量控制
         residual_inc = tmp_after_bw - tmp_run_start
-        if residual_inc > p.dTMP + 1e-12:
-            return False, {
-                "reason": "residual TMP increase after BW exceeded dTMP",
-                "residual_increase": residual_inc,
-                "limit_dTMP": p.dTMP
-            }
-        if residual_inc > max_residual_increase:
-            max_residual_increase = residual_inc
+        max_residual_increase = max(max_residual_increase, residual_inc)
 
         tmp = tmp_after_bw
 
-    # CEB
-    tmp_after_ceb = p.TMP0
+    # --- CEB反洗 ---
+    R_after_ceb = R_peak - p.ceb_removal
+    tmp_after_ceb = _calculate_tmp(R_after_ceb, q_UF, temp)
 
-    # 体积与回收率
+    # ============================================================
+    # 生成本周期指标
+    # ============================================================
+
+    # --- 体积与能耗 ---
     V_feed_super = k_bw_per_ceb * p.q_UF * L_h
     V_loss_super = k_bw_per_ceb * _v_bw_m3(p, t_bw_s) + p.v_ceb_m3
     V_net = max(0.0, V_feed_super - V_loss_super)
     recovery = max(0.0, V_net / max(V_feed_super, 1e-12))
 
-    # 时间与净供水率
     T_super_h = k_bw_per_ceb * (L_s + t_bw_s) / 3600.0 + p.t_ceb_s / 3600.0
-    net_delivery_rate_m3ph = V_net / max(T_super_h, 1e-12)
-
-    # 贴边比例与硬限
-    headroom_ratio = max_tmp_during_filtration / max(p.TMP_max, 1e-12)
-    if headroom_ratio > p.headroom_hardcap + 1e-12:
-        return False, {"reason": "headroom hardcap exceeded", "headroom_ratio": headroom_ratio}
-
-    # —— 新增指标 1:日均产水时间(h/d) ——
     daily_prod_time_h = k_bw_per_ceb * L_h / T_super_h * 24.0
 
-    # —— 新增指标 2:吨水电耗(kWh/m³) ——
     closest_L = min(energy_lookup.keys(), key=lambda x: abs(x - L_s))
-    ton_water_energy = energy_lookup[closest_L]
+    ton_water_energy = energy_lookup[closest_L] #TODO:需确认新过滤时间范围下的吨水电耗
 
+    # --- 信息输出 ---
     info = {
+        "q_UF": p.q_UF,
+        "temp": p.temp,
         "recovery": recovery,
         "V_feed_super_m3": V_feed_super,
         "V_loss_super_m3": V_loss_super,
         "V_net_super_m3": V_net,
         "supercycle_time_h": T_super_h,
-        "net_delivery_rate_m3ph": net_delivery_rate_m3ph,
         "max_TMP_during_filtration": max_tmp_during_filtration,
-        "min_TMP_during_filtration": min_tmp_during_filtration,  # 新增:最小TMP
+        "min_TMP_during_filtration": min_tmp_during_filtration,
+        "global_TMP_limit":p.global_TMP_limit,
         "max_residual_increase_per_run": max_residual_increase,
-        "phi_bw_effective": phi,
+        "R0": R0,
+        "R_after_ceb": R_after_ceb,
+        "TMP0":p.TMP0,
         "TMP_after_ceb": tmp_after_ceb,
-        "headroom_ratio": headroom_ratio,
         "daily_prod_time_h": daily_prod_time_h,
         "ton_water_energy_kWh_per_m3": ton_water_energy,
         "k_bw_per_ceb": k_bw_per_ceb
     }
 
-    return True, info
+    # ============================================================
+    # 状态更新:生成 next_params(新状态)
+    # ============================================================
+
+    next_params = copy.deepcopy(p)
+
+    # 更新跨膜压差(TMP)
+    next_params.TMP0 = tmp_after_ceb
 
-def _score(p: UFParams, rec: dict) -> float:
-    """综合评分:越大越好。通过非线性放大奖励差异,强化区分好坏动作"""
+    # 可选参数(当前保持不变,未来可扩展更新逻辑)
+    next_params.slope = p.slope
+    next_params.power = p.power
+    next_params.ceb_removal = p.ceb_removal
+    next_params.nuK = p.nuK
+    next_params.q_UF = p.q_UF
+    next_params.temp = p.temp
 
-    # —— 无量纲化净供水率 ——
-    rate_norm = rec["net_delivery_rate_m3ph"] / max(p.q_UF, 1e-12)
 
-    # —— TMP soft penalty (sigmoid) ——
-    tmp_ratio = rec["max_TMP_during_filtration"] / max(p.TMP_max, 1e-12)
-    k = 10.0
-    headroom_penalty = 1.0 / (1.0 + np.exp(-k * (tmp_ratio - 1.0)))
+    return info, next_params
 
-    # —— 基础 reward(0.6~0.9左右)——
-    base_reward = (
-        p.w_rec * rec["recovery"]
-        + p.w_rate * rate_norm
-        - p.w_headroom * headroom_penalty
-    )
+def calculate_reward(p: UFParams, info: dict) -> float:
+    """
+    TMP不参与奖励计算,仅考虑回收率与残余污染比例之间的权衡。
+    满足:
+      - 当 recovery=0.97, residual_ratio=0.1 → reward = 0
+      - 当 recovery=0.90, residual_ratio=0.0 → reward = 0
+      - 在两者之间平衡(如 recovery≈0.94, residual_ratio≈0.05)→ reward > 0
+    """
+    recovery = info["recovery"]
+    residual_ratio = (info["R_after_ceb"] - info["R0"]) / info["R0"]
+
+    # 回收率奖励(在 [rec_low, rec_high] 内平滑上升)
+    rec_norm = (recovery - p.rec_low) / (p.rec_high - p.rec_low)
+    rec_reward = np.clip(np.tanh(p.k_rec * (rec_norm - 0.5)), -1, 1)
+
+    # 残余比惩罚(超过rr0时快速变为负值)
+    res_penalty = -np.tanh(p.k_res * (residual_ratio / p.rr0 - 1))
+
+    # 组合逻辑:权衡二者
+    total_reward = rec_reward + res_penalty
+
+    # 再平移使指定点为零:
+    # recovery=0.97, residual=0.1 → 0
+    # recovery=0.90, residual=0.0 → 0
+    # 经验上,这两点几乎对称,因此无需额外线性偏移
+    # 若希望严格归零,可用线性校正:
+    total_reward -= 0.0
 
-    # —— 非线性放大:平方映射 + 缩放 ——
-    # 目的是放大好坏动作差异,同时限制最大值,避免 TD-error 过大
-    amplified_reward = (base_reward - 0.5) ** 2 * 5.0
+    return total_reward
+
+
+
+def is_dead_cycle(info: dict) -> bool:
+    """
+    判断当前循环是否为成功循环(True)或失败循环(False)
+    失败条件:
+    1. 最大TMP超过设定上限;
+    2. 回收率低于75%;
+    3. 化学反冲洗后膜阻力上升超过10%。
+
+    参数:
+        info: dict
+            simulate_one_supercycle() 返回的指标字典,需包含:
+            - max_TMP_during_filtration
+            - recovery
+            - R_after_ceb
+            - R_run_start
+            - TMP_limit(如果有定义)
+    返回:
+        bool: True 表示成功循环,False 表示失败循环。
+    """
+    TMP_limit = info.get("global_TMP_limit", 0.08)  # 默认硬约束上限
+    max_tmp = info.get("max_TMP_during_filtration", 0)
+    recovery = info.get("recovery", 1.0)
+    R_after_ceb = info.get("R_after_ceb", 0)
+    R0 = info.get("R0", 1e-6)
 
-    # —— 可选:保留符号,区分负奖励
-    if base_reward < 0.5:
-        amplified_reward = -amplified_reward
+    # 判断条件
+    if max_tmp > TMP_limit:
+        return False
+    if recovery < 0.75:
+        return False
+    if (R_after_ceb - R0) / R0 > 0.1:
+        return False
+
+    return True
 
-    return amplified_reward
 
 
 class UFSuperCycleEnv(gym.Env):
@@ -244,7 +362,7 @@ class UFSuperCycleEnv(gym.Env):
 
     metadata = {"render_modes": ["human"]}
 
-    def __init__(self, base_params, max_episode_steps: int = 20):
+    def __init__(self, base_params, resistance_models=None, max_episode_steps: int = 15):
         super(UFSuperCycleEnv, self).__init__()
 
         self.base_params = base_params
@@ -252,10 +370,15 @@ class UFSuperCycleEnv(gym.Env):
         self.max_episode_steps = max_episode_steps
         self.current_step = 0
 
+        if resistance_models is None:
+            self.resistance_model_fp, self.resistance_model_bw = load_resistance_models()
+        else:
+            self.resistance_model_fp, self.resistance_model_bw = resistance_models
+
         # 计算离散动作空间
         self.L_values = np.arange(
             self.base_params.L_min_s,
-            self.base_params.L_max_s + self.base_params.L_step_s,
+            self.base_params.L_max_s,
             self.base_params.L_step_s
         )
         self.t_bw_values = np.arange(
@@ -270,44 +393,180 @@ class UFSuperCycleEnv(gym.Env):
         # 单一离散动作空间
         self.action_space = spaces.Discrete(self.num_L * self.num_bw)
 
-        # 状态空间增加 TMP0, 上一次动作(L_s, t_bw_s), 本周期最高 TMP
-        # 状态归一化均在 _get_obs 内处理
+        # 状态空间,归一化在 _get_obs 中处理
         self.observation_space = spaces.Box(
-            low=np.zeros(4, dtype=np.float32),
-            high=np.ones(4, dtype=np.float32),
+            low=np.zeros(8, dtype=np.float32),
+            high=np.ones(8, dtype=np.float32),
             dtype=np.float32
         )
 
-        # 初始化状态
-        self.last_action = (self.base_params.L_min_s, self.base_params.t_bw_min_s)
-        self.max_TMP_during_filtration = self.current_params.TMP0
+        # 初始化环境
         self.reset(seed=None)
 
-    def _get_obs(self):
-        TMP0 = self.current_params.TMP0
-        TMP0_norm = (TMP0 - 0.01) / (0.05 - 0.01)
-
-        L_s, t_bw_s = self.last_action
-        L_norm = (L_s - self.base_params.L_min_s) / (self.base_params.L_max_s - self.base_params.L_min_s)
-        t_bw_norm = (t_bw_s - self.base_params.t_bw_min_s) / (self.base_params.t_bw_max_s - self.base_params.t_bw_min_s)
+    def generate_initial_state(self):
+        """
+        随机生成一个初始状态,不进行死状态判断
+        """
+        self.current_params.TMP0 = np.random.uniform(
+            self.current_params.TMP0_min, self.current_params.TMP0_max
+        )
+        self.current_params.q_UF = np.random.uniform(
+            self.current_params.q_UF_min, self.current_params.q_UF_max
+        )
+        self.current_params.temp = np.random.uniform(
+            self.current_params.temp_min, self.current_params.temp_max
+        )
 
-        max_TMP_norm = (self.max_TMP_during_filtration - 0.01) / (0.05 - 0.01)
+        self.current_params.R0 = _calculate_resistance(
+            self.current_params.TMP0,
+            self.current_params.q_UF,
+            self.current_params.temp
+        )
 
-        return np.array([TMP0_norm, L_norm, t_bw_norm, max_TMP_norm], dtype=np.float32)
+        self.current_params.nuK = np.random.uniform(
+            self.current_params.nuK_min, self.current_params.nuK_max
+        )
+        self.current_params.slope = np.random.uniform(
+            self.current_params.slope_min, self.current_params.slope_max
+        )
+        self.current_params.power = np.random.uniform(
+            self.current_params.power_min, self.current_params.power_max
+        )
+        self.current_params.ceb_removal = np.random.uniform(
+            self.current_params.ceb_removal_min, self.current_params.ceb_removal_max
+        )
 
-    def _get_action_values(self, action):
-        L_idx = action // self.num_bw
-        t_bw_idx = action % self.num_bw
-        return self.L_values[L_idx], self.t_bw_values[t_bw_idx]
+        return self._get_state_copy()
 
-    def reset(self, seed=None, options=None):
+    def reset(self, seed=None, options=None, max_attempts: int = 200):
         super().reset(seed=seed)
-        self.current_params.TMP0 = np.random.uniform(0.01, 0.03)
+
+        attempts = 0
+        while attempts < max_attempts:
+            attempts += 1
+            self.generate_initial_state()  # 生成随机初始状态
+            if self.check_dead_initial_state(max_steps=getattr(self, "max_episode_steps", 15),
+                                             L_s=3800, t_bw_s=60):
+                # True 表示可行,退出循环
+                break
+        else:
+            # 超过最大尝试次数仍未生成可行状态
+            raise RuntimeError(f"在 {max_attempts} 次尝试后仍无法生成可行初始状态。")
+
+        # 初始化步数、动作、最大 TMP
         self.current_step = 0
         self.last_action = (self.base_params.L_min_s, self.base_params.t_bw_min_s)
         self.max_TMP_during_filtration = self.current_params.TMP0
+
         return self._get_obs(), {}
 
+    def check_dead_initial_state(self, max_steps: int = None,
+                                 L_s: int = 4900, t_bw_s: int = 50) -> bool:
+        """
+        判断当前环境生成的初始状态是否为可行(non-dead)。
+        使用最保守策略连续模拟 max_steps 次:
+            若任意一次 is_dead_cycle(info) 返回 False,则视为必死状态。
+
+        参数:
+            max_steps: 模拟步数,默认使用 self.max_episode_steps
+            L_s: 过滤时长(s),默认 3800
+            t_bw_s: 物理反洗时长(s),默认 60
+
+        返回:
+            bool: True 表示可行状态(non-dead),False 表示必死状态
+        """
+        if max_steps is None:
+            max_steps = getattr(self, "max_episode_steps", 15)
+
+        # 生成初始状态
+        self.generate_initial_state()
+        if not hasattr(self, "current_params"):
+            raise AttributeError("generate_initial_state() 未设置 current_params。")
+
+        import copy
+        curr_p = copy.deepcopy(self.current_params)
+
+        # 逐步模拟
+        for step in range(max_steps):
+            try:
+                info, next_params = simulate_one_supercycle(curr_p, L_s, t_bw_s)
+            except Exception:
+                # 异常即视为不可行
+                return False
+
+            if not is_dead_cycle(info):
+                # 任意一次失败即为必死状态
+                return False
+
+            curr_p = next_params
+
+        return True
+
+    def _get_state_copy(self):
+        return copy.deepcopy(self.current_params)
+
+    def _get_obs(self):
+        """
+        构建当前环境归一化状态向量
+        """
+        # === 1. 从 current_params 读取动态参数 ===
+        TMP0 = self.current_params.TMP0
+        q_UF = self.current_params.q_UF
+        temp = self.current_params.temp
+
+        # === 2. 计算本周期初始膜阻力 ===
+        R0 = _calculate_resistance(TMP0, q_UF, temp)
+
+        # === 3. 从 current_params 读取膜阻力增长模型参数 ===
+        nuk = self.current_params.nuK
+        slope = self.current_params.slope
+        power = self.current_params.power
+        ceb_removal = self.current_params.ceb_removal
+
+        # === 4. 从 current_params 动态读取上下限 ===
+        TMP0_min, TMP0_max = self.current_params.TMP0_min, self.current_params.TMP0_max
+        q_UF_min, q_UF_max = self.current_params.q_UF_min, self.current_params.q_UF_max
+        temp_min, temp_max = self.current_params.temp_min, self.current_params.temp_max
+        nuK_min, nuK_max = self.current_params.nuK_min, self.current_params.nuK_max
+        slope_min, slope_max = self.current_params.slope_min, self.current_params.slope_max
+        power_min, power_max = self.current_params.power_min, self.current_params.power_max
+        ceb_min, ceb_max = self.current_params.ceb_removal_min, self.current_params.ceb_removal_max
+
+        # === 5. 归一化计算(clip防止越界) ===
+        TMP0_norm = np.clip((TMP0 - TMP0_min) / (TMP0_max - TMP0_min), 0, 1)
+        q_UF_norm = np.clip((q_UF - q_UF_min) / (q_UF_max - q_UF_min), 0, 1)
+        temp_norm = np.clip((temp - temp_min) / (temp_max - temp_min), 0, 1)
+
+        # R0 不在 current_params 中定义上下限,设定经验范围
+        R0_norm = np.clip((R0 - 100.0) / (800.0 - 100.0), 0, 1)
+
+        short_term_norm = np.clip((nuk - nuK_min) / (nuK_max - nuK_min), 0, 1)
+        long_term_slope_norm = np.clip((slope - slope_min) / (slope_max - slope_min), 0, 1)
+        long_term_power_norm = np.clip((power - power_min) / (power_max - power_min), 0, 1)
+        ceb_removal_norm = np.clip((ceb_removal - ceb_min) / (ceb_max - ceb_min), 0, 1)
+
+        # === 6. 构建观测向量 ===
+        obs = np.array([
+            TMP0_norm,
+            q_UF_norm,
+            temp_norm,
+            R0_norm,
+            short_term_norm,
+            long_term_slope_norm,
+            long_term_power_norm,
+            ceb_removal_norm
+        ], dtype=np.float32)
+
+        return obs
+
+    def _get_action_values(self, action):
+        """
+        将动作还原为实际时长
+        """
+        L_idx = action // self.num_bw
+        t_bw_idx = action % self.num_bw
+        return self.L_values[L_idx], self.t_bw_values[t_bw_idx]
+
     def step(self, action):
         self.current_step += 1
         L_s, t_bw_s = self._get_action_values(action)
@@ -315,15 +574,16 @@ class UFSuperCycleEnv(gym.Env):
         t_bw_s = np.clip(t_bw_s, self.base_params.t_bw_min_s, self.base_params.t_bw_max_s)
 
         # 模拟超级周期
-        feasible, info = simulate_one_supercycle(self.current_params, L_s, t_bw_s)
+        info, next_params = simulate_one_supercycle(self.current_params, L_s, t_bw_s)
+        # 根据 info 判断是否成功
+        feasible = is_dead_cycle(info)  # True 表示成功循环,False 表示失败
 
         if feasible:
-            reward = _score(self.current_params, info)
-            self.current_params.TMP0 = info["TMP_after_ceb"]
-            self.max_TMP_during_filtration = info["max_TMP_during_filtration"]
+            reward = calculate_reward(self.current_params, info)
+            self.current_params = next_params
             terminated = False
         else:
-            reward = -20
+            reward = -10
             terminated = True
 
         truncated = self.current_step >= self.max_episode_steps
@@ -337,4 +597,3 @@ class UFSuperCycleEnv(gym.Env):
 
 
 
-

+ 7 - 7
models/uf-rl/超滤训练源码/DQN_train.py

@@ -3,9 +3,6 @@ import time
 import random
 import numpy as np
 import torch
-
-import gymnasium as gym
-from gymnasium import spaces
 from stable_baselines3 import DQN
 from stable_baselines3.common.monitor import Monitor
 from stable_baselines3.common.vec_env import DummyVecEnv
@@ -24,10 +21,10 @@ class DQNParams:
     learning_rate: float = 1e-4
 
     # 经验回放缓冲区大小(步数)
-    buffer_size: int = 10000
+    buffer_size: int = 100000
 
     # 学习开始前需要收集的步数
-    learning_starts: int = 200
+    learning_starts: int = 10000
 
     # 每次从经验池中采样的样本数量
     batch_size: int = 32
@@ -39,7 +36,10 @@ class DQNParams:
     train_freq: int = 4
 
     # 目标网络更新间隔
-    target_update_interval: int = 2000
+    target_update_interval: int = 1
+
+    # 软更新系数
+    tau: float = 0.005
 
     # 初始探索率 ε
     exploration_initial_eps: float = 1.0
@@ -240,5 +240,5 @@ if __name__ == "__main__":
 
     # 训练RL代理
     print("开始训练RL代理...")
-    train_uf_rl_agent(params, total_timesteps=50000)
+    train_uf_rl_agent(params, total_timesteps=150000)
 

+ 0 - 405
models/uf-rl/超滤训练源码/UF_decide.py

@@ -1,405 +0,0 @@
-# UF_decide.py
-from dataclasses import dataclass
-import numpy as np
-
-@dataclass
-class UFParams:
-    # —— 膜与运行参数 ——
-    q_UF: float = 360.0           # 过滤进水流量(m^3/h)
-    TMP0: float = 0.03            # 初始TMP(MPa)
-    TMP_max: float = 0.06         # TMP硬上限(MPa)
-
-    # —— 膜污染动力学 ——
-    alpha: float = 1e-6           # TMP增长系数
-    belta: float = 1.1            # 幂指数
-
-    # —— 反洗参数(固定) ——
-    q_bw_m3ph: float = 1000.0     # 物理反洗流量(m^3/h)
-
-    # —— CEB参数(固定) ——
-    T_ceb_interval_h: float = 48.0  # 固定每 k 小时做一次CEB
-    v_ceb_m3: float = 30.0        # CEB用水体积(m^3)
-    t_ceb_s: float = 40 * 60.0    # CEB时长(s)
-    phi_ceb: float = 1.0          # CEB去除比例(简化:完全恢复到TMP0)
-
-    # —— 约束与收敛 ——
-    dTMP: float = 0.0005          # 单次产水结束时,相对TMP0最大升幅(MPa)
-
-    # —— 搜索范围(秒) ——
-    L_min_s: float = 3600.0       # 过滤时长下限(s)
-    L_max_s: float = 4200.0       # 过滤时长上限(s)
-    t_bw_min_s: float = 40.0      # 物洗时长下限(s)
-    t_bw_max_s: float = 60.0      # 物洗时长上限(s)
-
-    # —— 物理反洗恢复函数参数 ——
-    phi_bw_min: float = 0.7       # 物洗去除比例最小值
-    phi_bw_max: float = 1.0       # 物洗去除比例最大值
-    L_ref_s: float = 4000.0       # 过滤时长影响时间尺度
-    tau_bw_s: float = 30.0        # 物洗时长影响时间尺度
-    gamma_t: float = 1.0          # 物洗时长作用指数
-    
-    # —— 网格 ——
-    L_step_s: float = 60.0        # 过滤时长步长(s)
-    t_bw_step_s: float = 5.0      # 物洗时长步长(s)
-
-    # 多目标加权及高TMP惩罚
-    w_rec: float = 0.8            # 回收率权重
-    w_rate: float = 0.2           # 净供水率权重
-    w_headroom: float = 0.3       # 贴边惩罚权重
-    r_headroom: float = 2.0       # 贴边惩罚幂次
-    headroom_hardcap: float = 0.98 # 超过此比例直接视为不可取
-
-def _delta_tmp(p: UFParams, L_h: float) -> float:
-    # 过滤时段TMP上升量
-    return float(p.alpha * (p.q_UF ** p.belta) * L_h)
-
-def _v_bw_m3(p: UFParams, t_bw_s: float) -> float:
-    # 物理反洗水耗
-    return float(p.q_bw_m3ph * (float(t_bw_s) / 3600.0))
-
-def phi_bw_of(p: UFParams, L_s: float, t_bw_s: float) -> float:
-    # 物洗去除比例:随过滤时长增长上界收缩,随物洗时长增长趋饱和
-    L = max(float(L_s), 1.0)
-    t = max(float(t_bw_s), 1e-6)
-    upper_L = p.phi_bw_min + (p.phi_bw_max - p.phi_bw_min) * np.exp(- L / p.L_ref_s)
-    time_gain = 1.0 - np.exp(- (t / p.tau_bw_s) ** p.gamma_t)
-    phi = upper_L * time_gain
-    return float(np.clip(phi, 0.0, 0.999))
-
-def simulate_one_supercycle(p: UFParams, L_s: float, t_bw_s: float):
-    """
-    返回 (是否可行, 指标字典)
-    - 支持动态CEB次数:48h固定间隔
-    - 增加日均产水时间和吨水电耗
-    """
-    L_h = float(L_s) / 3600.0  # 小周期过滤时间(h)
-
-    tmp = p.TMP0
-    max_tmp_during_filtration = tmp
-    max_residual_increase = 0.0
-
-    # 小周期总时长(h)
-    t_small_cycle_h = (L_s + t_bw_s) / 3600.0
-
-    # 计算超级周期内CEB次数
-    k_bw_per_ceb = int(np.floor(p.T_ceb_interval_h / t_small_cycle_h))
-    if k_bw_per_ceb < 1:
-        k_bw_per_ceb = 1  # 至少一个小周期
-
-    # ton水电耗查表
-    energy_lookup = {
-        3600: 0.1034, 3660: 0.1031, 3720: 0.1029, 3780: 0.1026,
-        3840: 0.1023, 3900: 0.1021, 3960: 0.1019, 4020: 0.1017,
-        4080: 0.1015, 4140: 0.1012, 4200: 0.1011
-    }
-
-    for _ in range(k_bw_per_ceb):
-        tmp_run_start = tmp
-
-        # 过滤阶段TMP增长
-        dtmp = _delta_tmp(p, L_h)
-        tmp_peak = tmp_run_start + dtmp
-
-        # 约束1:峰值不得超过硬上限
-        if tmp_peak > p.TMP_max + 1e-12:
-            return False, {"reason": "TMP_max violated during filtration", "TMP_peak": tmp_peak}
-
-        if tmp_peak > max_tmp_during_filtration:
-            max_tmp_during_filtration = tmp_peak
-
-        # 物理反洗
-        phi = phi_bw_of(p, L_s, t_bw_s)
-        tmp_after_bw = tmp_peak - phi * (tmp_peak - tmp_run_start)
-
-        # 约束2:单次残余增量控制
-        residual_inc = tmp_after_bw - tmp_run_start
-        if residual_inc > p.dTMP + 1e-12:
-            return False, {
-                "reason": "residual TMP increase after BW exceeded dTMP",
-                "residual_increase": residual_inc,
-                "limit_dTMP": p.dTMP
-            }
-        if residual_inc > max_residual_increase:
-            max_residual_increase = residual_inc
-
-        tmp = tmp_after_bw
-
-    # CEB
-    tmp_after_ceb = p.TMP0
-
-    # 体积与回收率
-    V_feed_super = k_bw_per_ceb * p.q_UF * L_h
-    V_loss_super = k_bw_per_ceb * _v_bw_m3(p, t_bw_s) + p.v_ceb_m3
-    V_net = max(0.0, V_feed_super - V_loss_super)
-    recovery = max(0.0, V_net / max(V_feed_super, 1e-12))
-
-    # 时间与净供水率
-    T_super_h = k_bw_per_ceb * (L_s + t_bw_s) / 3600.0 + p.t_ceb_s / 3600.0
-    net_delivery_rate_m3ph = V_net / max(T_super_h, 1e-12)
-
-    # 贴边比例与硬限
-    headroom_ratio = max_tmp_during_filtration / max(p.TMP_max, 1e-12)
-    if headroom_ratio > p.headroom_hardcap + 1e-12:
-        return False, {"reason": "headroom hardcap exceeded", "headroom_ratio": headroom_ratio}
-
-    # —— 新增指标 1:日均产水时间(h/d) ——
-    daily_prod_time_h = k_bw_per_ceb * L_h / T_super_h * 24.0
-
-    # —— 新增指标 2:吨水电耗(kWh/m³) ——
-    closest_L = min(energy_lookup.keys(), key=lambda x: abs(x - L_s))
-    ton_water_energy = energy_lookup[closest_L]
-
-    info = {
-        "recovery": recovery,
-        "V_feed_super_m3": V_feed_super,
-        "V_loss_super_m3": V_loss_super,
-        "V_net_super_m3": V_net,
-        "supercycle_time_h": T_super_h,
-        "net_delivery_rate_m3ph": net_delivery_rate_m3ph,
-        "max_TMP_during_filtration": max_tmp_during_filtration,
-        "max_residual_increase_per_run": max_residual_increase,
-        "phi_bw_effective": phi,
-        "TMP_after_ceb": tmp_after_ceb,
-        "headroom_ratio": headroom_ratio,
-        "daily_prod_time_h": daily_prod_time_h,
-        "ton_water_energy_kWh_per_m3": ton_water_energy,
-        "k_bw_per_ceb": k_bw_per_ceb
-    }
-
-    return True, info
-
-def _score(p: UFParams, rec: dict) -> float:
-    """综合评分:越大越好。不同TMP0会改变max_TMP→改变惩罚→得到不同解。"""
-    # 无量纲化净供水率
-    rate_norm = rec["net_delivery_rate_m3ph"] / max(p.q_UF, 1e-12)
-    headroom_penalty = (rec["max_TMP_during_filtration"] / max(p.TMP_max, 1e-12)) ** p.r_headroom
-    return (p.w_rec * rec["recovery"]
-            + p.w_rate * rate_norm
-            - p.w_headroom * headroom_penalty)
-
-def optimize_2d(p: UFParams,
-                L_min_s=None, L_max_s=None, L_step_s=None,
-                t_bw_min_s=None, t_bw_max_s=None, t_bw_step_s=None):
-    # 网格生成
-    L_lo = p.L_min_s if L_min_s is None else float(L_min_s)
-    L_hi = p.L_max_s if L_max_s is None else float(L_max_s)
-    L_st = p.L_step_s if L_step_s is None else float(L_step_s)
-
-    t_lo = p.t_bw_min_s if t_bw_min_s is None else float(t_bw_min_s)
-    t_hi = p.t_bw_max_s if t_bw_max_s is None else float(t_bw_max_s)
-    t_st = p.t_bw_step_s if t_bw_step_s is None else float(t_bw_step_s)
-
-    L_vals = np.arange(L_lo, L_hi + 1e-9, L_st)
-    t_vals = np.arange(t_lo, t_hi + 1e-9, t_st)
-
-    best = None
-    best_score = -np.inf
-
-    for L_s in L_vals:
-        for t_bw_s in t_vals:
-            feasible, info = simulate_one_supercycle(p, L_s, t_bw_s)
-            if not feasible:
-                continue
-
-            rec = {"L_s": float(L_s), "t_bw_s": float(t_bw_s)}
-            rec.update(info)
-
-            score = _score(p, rec)
-
-            if score > best_score + 1e-14:
-                best_score = score
-                best = rec.copy()
-                best["score"] = float(score)
-            # 若分数相同,偏好回收率更高,再偏好净供水率更高
-            elif abs(score - best_score) <= 1e-14:
-                if (rec["recovery"] > best["recovery"] + 1e-12) or (
-                    abs(rec["recovery"] - best["recovery"]) <= 1e-12 and
-                    rec["net_delivery_rate_m3ph"] > best["net_delivery_rate_m3ph"] + 1e-12
-                ):
-                    best = rec.copy()
-                    best["score"] = float(score)
-
-    if best is None:
-        return {"status": "no-feasible-solution"}
-    best["status"] = "feasible"
-    return best
-
-def run_uf_decision(TMP0: float = None) -> dict:
-    if TMP0 is None:
-        rng = np.random.default_rng()
-        TMP0 = rng.uniform(0.03, 0.04)  # 初始TMP随机
-
-    params = UFParams(
-        q_UF=360.0,
-        TMP_max=0.05,
-        alpha=1.2e-6,
-        belta=1.0,
-        q_bw_m3ph=1000.0,
-        T_ceb_interval_h=48,
-        v_ceb_m3=30.0,
-        t_ceb_s=40*60.0,
-        phi_ceb=1.0,
-        dTMP=0.001,
-
-        L_min_s=3600.0, L_max_s=4200.0, L_step_s=30.0,
-        t_bw_min_s=90.0, t_bw_max_s=100.0, t_bw_step_s=2.0,
-
-        phi_bw_min=0.70, phi_bw_max=1.00,
-        L_ref_s=500.0, tau_bw_s=40.0, gamma_t=1.0,
-
-        TMP0=TMP0,
-
-        w_rec=0.7, w_rate=0.3, w_headroom=0.3, r_headroom=2.0, headroom_hardcap=0.9
-    )
-
-    result = optimize_2d(params)
-    if result.get("status") == "feasible":
-        return {
-            "L_s": result["L_s"],
-            "t_bw_s": result["t_bw_s"],
-            "recovery": result["recovery"],
-            "k_bw_per_ceb": result["k_bw_per_ceb"],
-            "daily_prod_time_h": result["daily_prod_time_h"],
-            "ton_water_energy_kWh_per_m3": result["ton_water_energy_kWh_per_m3"]
-        }
-
-    # 若没有可行解,返回最小过滤时间和默认值
-    return {
-        "L_s": params.L_min_s,
-        "t_bw_s": params.t_bw_min_s,
-        "recovery": 0.0,
-        "k_bw_per_ceb": 1,
-        "daily_prod_time_h": 0.0,
-        "ton_water_energy_kWh_per_m3": 0.0
-    }
-
-
-def generate_plc_instructions(current_L_s, current_t_bw_s, model_prev_L_s, model_prev_t_bw_s, model_L_s, model_t_bw_s):
-    """
-    根据工厂当前值、模型上一轮决策值和模型当前轮决策值,生成PLC指令。
-
-    新增功能:
-    1. 处理None值情况:如果模型上一轮值为None,则使用工厂当前值;
-       如果工厂当前值也为None,则返回None并提示错误。
-    """
-    # 参数配置保持不变
-    params = UFParams(
-        L_min_s=3600.0, L_max_s=6000.0, L_step_s=60.0,
-        t_bw_min_s=40.0, t_bw_max_s=60.0, t_bw_step_s=5.0,
-    )
-
-    # 参数解包
-    L_step_s = params.L_step_s
-    t_bw_step_s = params.t_bw_step_s
-    L_min_s = params.L_min_s
-    L_max_s = params.L_max_s
-    t_bw_min_s = params.t_bw_min_s
-    t_bw_max_s = params.t_bw_max_s
-    adjustment_threshold = 1.0
-
-    # 处理None值情况
-    if model_prev_L_s is None:
-        if current_L_s is None:
-            print("错误: 过滤时长的工厂当前值和模型上一轮值均为None")
-            return None, None
-        else:
-            # 使用工厂当前值作为基准
-            effective_current_L = current_L_s
-            source_L = "工厂当前值(模型上一轮值为None)"
-    else:
-        # 模型上一轮值不为None,继续检查工厂当前值
-        if current_L_s is None:
-            effective_current_L = model_prev_L_s
-            source_L = "模型上一轮值(工厂当前值为None)"
-        else:
-            # 两个值都不为None,比较哪个更接近模型当前建议值
-            current_to_model_diff = abs(current_L_s - model_L_s)
-            prev_to_model_diff = abs(model_prev_L_s - model_L_s)
-
-            if current_to_model_diff <= prev_to_model_diff:
-                effective_current_L = current_L_s
-                source_L = "工厂当前值"
-            else:
-                effective_current_L = model_prev_L_s
-                source_L = "模型上一轮值"
-
-    # 对反洗时长进行同样的处理
-    if model_prev_t_bw_s is None:
-        if current_t_bw_s is None:
-            print("错误: 反洗时长的工厂当前值和模型上一轮值均为None")
-            return None, None
-        else:
-            effective_current_t_bw = current_t_bw_s
-            source_t_bw = "工厂当前值(模型上一轮值为None)"
-    else:
-        if current_t_bw_s is None:
-            effective_current_t_bw = model_prev_t_bw_s
-            source_t_bw = "模型上一轮值(工厂当前值为None)"
-        else:
-            current_to_model_t_bw_diff = abs(current_t_bw_s - model_t_bw_s)
-            prev_to_model_t_bw_diff = abs(model_prev_t_bw_s - model_t_bw_s)
-
-            if current_to_model_t_bw_diff <= prev_to_model_t_bw_diff:
-                effective_current_t_bw = current_t_bw_s
-                source_t_bw = "工厂当前值"
-            else:
-                effective_current_t_bw = model_prev_t_bw_s
-                source_t_bw = "模型上一轮值"
-
-    # 检测所有输入值是否在规定范围内(只对非None值进行检查)
-    # 工厂当前值检查(警告)
-    if current_L_s is not None and not (L_min_s <= current_L_s <= L_max_s):
-        print(f"警告: 当前过滤时长 {current_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
-    if current_t_bw_s is not None and not (t_bw_min_s <= current_t_bw_s <= t_bw_max_s):
-        print(f"警告: 当前反洗时长 {current_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
-
-    # 模型上一轮决策值检查(警告)
-    if model_prev_L_s is not None and not (L_min_s <= model_prev_L_s <= L_max_s):
-        print(f"警告: 模型上一轮过滤时长 {model_prev_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
-    if model_prev_t_bw_s is not None and not (t_bw_min_s <= model_prev_t_bw_s <= t_bw_max_s):
-        print(f"警告: 模型上一轮反洗时长 {model_prev_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
-
-    # 模型当前轮决策值检查(错误)
-    if model_L_s is None:
-        raise ValueError("错误: 决策模型建议的过滤时长不能为None")
-    elif not (L_min_s <= model_L_s <= L_max_s):
-        raise ValueError(f"错误: 决策模型建议的过滤时长 {model_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
-
-    if model_t_bw_s is None:
-        raise ValueError("错误: 决策模型建议的反洗时长不能为None")
-    elif not (t_bw_min_s <= model_t_bw_s <= t_bw_max_s):
-        raise ValueError(f"错误: 决策模型建议的反洗时长 {model_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
-
-    print(f"过滤时长基准: {source_L}, 值: {effective_current_L}")
-    print(f"反洗时长基准: {source_t_bw}, 值: {effective_current_t_bw}")
-
-    # 使用选定的基准值进行计算调整
-    L_diff = model_L_s - effective_current_L
-    L_adjustment = 0
-    if abs(L_diff) > adjustment_threshold * L_step_s:
-        if L_diff > 0:
-            L_adjustment = L_step_s
-        else:
-            L_adjustment = -L_step_s
-    next_L_s = effective_current_L + L_adjustment
-
-    t_bw_diff = model_t_bw_s - effective_current_t_bw
-    t_bw_adjustment = 0
-    if abs(t_bw_diff) > adjustment_threshold * t_bw_step_s:
-        if t_bw_diff > 0:
-            t_bw_adjustment = t_bw_step_s
-        else:
-            t_bw_adjustment = -t_bw_step_s
-    next_t_bw_s = effective_current_t_bw + t_bw_adjustment
-
-    return next_L_s, next_t_bw_s
-
-
-current_L_s = 3920
-current_t_bw_s = 98
-model_prev_L_s = None
-model_prev_t_bw_s = None
-model_L_s = 4160
-model_t_bw_s = 96
-next_L_s, next_t_bw_s = generate_plc_instructions(current_L_s, current_t_bw_s, model_prev_L_s, model_prev_t_bw_s, model_L_s, model_t_bw_s)
-print(f"next_L_s={next_L_s}, next_t_bw_s={next_t_bw_s}")

+ 0 - 33
models/uf-rl/超滤训练源码/UF_models.py

@@ -1,33 +0,0 @@
-import torch
-import numpy as np
-
-# TMP 上升量模型
-class TMPIncreaseModel(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-    def forward(self, p, L_h):
-        return float(p.alpha * (p.q_UF ** p.belta) * L_h)
-
-# 反洗 TMP 去除模型
-class TMPDecreaseModel(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-    def forward(self, p, L_s, t_bw_s):
-        L = max(float(L_s), 1.0)
-        t = max(float(t_bw_s), 1e-6)
-        upper_L = p.phi_bw_min + (p.phi_bw_max - p.phi_bw_min) * np.exp(- L / p.L_ref_s)
-        time_gain = 1.0 - np.exp(- (t / p.tau_bw_s) ** p.gamma_t)
-        phi = upper_L * time_gain
-        return float(np.clip(phi, 0.0, 0.999))
-
-
-if __name__ == "__main__":
-    model_fp = TMPIncreaseModel()
-    model_bw = TMPDecreaseModel()
-
-
-    torch.save(model_fp.state_dict(), "uf_fp.pth")
-    torch.save(model_bw.state_dict(), "uf_bw.pth")
-
-
-    print("模型已安全保存为 uf_fp.pth、uf_bw.pth")

+ 0 - 0
models/uf-rl/超滤训练源码/uf_resistance_models.py → models/uf-rl/超滤训练源码/UF_resistance_models.py


+ 138 - 0
models/uf-rl/超滤训练源码/check_initial_state.py

@@ -0,0 +1,138 @@
+# check_initial_state.py
+"""
+检查初始状态是否为“必死状态”(conservatively dead):
+1) 实例化 base_params(优先使用 rl_dqn_env 中提供的 base_params 或 UFParams)
+2) 实例化环境类 UFSuperCycleEnv(base_params)
+3) 调用 env.generate_initial_state() 生成 env.current_params(不调用 reset())
+4) 用最保守策略 (L_s=3600s, t_bw_s=60s) 连续模拟 max_steps 次,
+   若任意一次 is_dead_cycle(info) 返回 False 则判定为必死(返回 True),否则返回 False。
+"""
+
+from typing import Any
+import copy
+import traceback
+
+# 从 rl_dqn_env 导入必需项
+try:
+    from DQN_env import (
+        simulate_one_supercycle,
+        is_dead_cycle,
+        UFSuperCycleEnv,
+        UFParams,       # 如果模块里有 UFParams 类就导入
+        base_params     # 如果模块直接提供 base_params 实例也尝试导入
+    )
+except Exception:
+    # 有可能某些名字不存在 —— 我们会稍后用回退方案处理
+    # 先导入模块并再尝试访问属性,确保错误信息更友好
+    import importlib
+    rl = importlib.import_module("rl_dqn_env")
+    simulate_one_supercycle = getattr(rl, "simulate_one_supercycle", None)
+    is_dead_cycle = getattr(rl, "is_dead_cycle", None)
+    UFSuperCycleEnv = getattr(rl, "UFSuperCycleEnv", None)
+    UFParams = getattr(rl, "UFParams", None)
+    base_params = getattr(rl, "base_params", None)
+
+# 检查导入完整性
+_missing = []
+if simulate_one_supercycle is None:
+    _missing.append("simulate_one_supercycle")
+if is_dead_cycle is None:
+    _missing.append("is_dead_cycle")
+if UFSuperCycleEnv is None:
+    _missing.append("UFSuperCycleEnv")
+if _missing:
+    raise ImportError(f"无法从 rl_dqn_env 导入以下必要项: {', '.join(_missing)}")
+
+def is_dead_initial_state_env(env: UFSuperCycleEnv, max_steps: int = 15,
+                              L_s: int = 4200, t_bw_s: int = 50,
+                              verbose: bool = True) -> bool:
+    """
+    使用 env.current_params 作为初始状态判断是否为必死状态(保守策略)。
+
+    参数:
+        env: 已实例化的 UFSuperCycleEnv(必须包含 generate_initial_state() 与 current_params)
+        max_steps: 模拟步数(默认 15)
+        L_s: 过滤时长(s),保守值 3600
+        t_bw_s: 物理反洗时长(s),保守值 60
+        verbose: 是否打印每步结果
+
+    返回:
+        True 表示必死(conservatively dead)
+        False 表示可行
+    """
+    # 1) 确保 env 有 current_params,并且 generate_initial_state 可用
+    if not hasattr(env, "generate_initial_state"):
+        raise AttributeError("env 缺少 generate_initial_state() 方法。")
+    # 生成初始状态(不会调用 reset)
+    env.generate_initial_state()
+
+    if not hasattr(env, "current_params"):
+        raise AttributeError("env.generate_initial_state() 未设置 env.current_params。")
+
+    curr_p = copy.deepcopy(env.current_params)
+
+    for step in range(1, max_steps + 1):
+        try:
+            info, next_params = simulate_one_supercycle(curr_p, L_s, t_bw_s)
+        except Exception as e:
+            # 如果 simulate 出错,把异常视为“失败”(保守处理)
+            if verbose:
+                print(f"[Step {step}] simulate_one_supercycle 抛出异常,视为失败。异常信息:{e}")
+                traceback.print_exc()
+            return True
+
+        success = is_dead_cycle(info)  # True 表示成功循环
+
+        if verbose:
+            print(f"[Step {step}] 循环结果:{'成功' if success else '失败'}")
+            # 如果 info 中有关键诊断字段,打印简要信息
+            try:
+                print(f"     TMP0: {info.get('TMP0')},max_TMP: {info.get('max_TMP_during_filtration')}, recovery: {info.get('recovery')}, "
+                      f"R0: {info.get('R0')}, R_after_ceb: {info.get('R_after_ceb')}")
+            except Exception:
+                pass
+
+        if not success:
+            if verbose:
+                print(f"在第 {step} 步检测到失败,判定为必死初始状态(conservatively dead)。")
+            return True
+
+        # 否则继续,用 next_params 作为下一步起始参数
+        curr_p = next_params
+
+    if verbose:
+        print(f"{max_steps} 步均成功,初始状态判定为可行(non-dead)。")
+    return False
+
+
+if __name__ == "__main__":
+    print("=== check_initial_state.py: 使用 env.generate_initial_state() 检查初始状态是否为必死 ===")
+
+    try:
+        # 1) 构造 base_params
+        if base_params is not None:
+            bp = base_params
+            print("使用 rl_dqn_env 中提供的 base_params。")
+        elif UFParams is not None:
+            bp = UFParams()  # 使用默认构造
+            print("使用 UFParams() 构造 base_params 的实例。")
+        else:
+            raise ImportError("无法构造 base_params:rl_dqn_env 中既无 base_params 也无 UFParams。")
+
+        # 2) 实例化环境类(将 base_params 传入构造器)
+        env = UFSuperCycleEnv(bp)
+        print("已实例化 UFSuperCycleEnv 环境。")
+
+        # 3) 调用 env.generate_initial_state() 并检查 env.current_params 是否为必死
+        dead = is_dead_initial_state_env(env, max_steps=getattr(env, "max_episode_steps", 15),
+                                        L_s=6000, t_bw_s=40, verbose=True)
+
+        print("\n=== 判定结果 ===")
+        if dead:
+            print("当前生成的初始状态为【必死状态】(conservatively dead)。")
+        else:
+            print("当前生成的初始状态为【可行状态】(non-dead)。")
+
+    except Exception as e:
+        print("脚本执行出现错误:", e)
+        traceback.print_exc()

BIN
models/uf-rl/超滤训练源码/uf_bw.pth


BIN
models/uf-rl/超滤训练源码/uf_fp.pth