Procházet zdrojové kódy

feat: 完成模型训练并上传代码

- 上传训练脚本及相关依赖代码
junc. před 5 měsíci
rodič
revize
48850ed4c8

+ 0 - 246
models/uf-rl/超滤训练源码/DQN_decide.py

@@ -1,246 +0,0 @@
-import numpy as np
-from stable_baselines3 import DQN
-from UF_super_RL.DQN_env import UFSuperCycleEnv
-from UF_super_RL.DQN_env import UFParams
-
-# 模型路径
-MODEL_PATH = "dqn_model.zip"
-
-# 加载模型(只加载一次,提高效率)
-model = DQN.load(MODEL_PATH)
-
-def run_uf_DQN_decide(uf_params, TMP0_value: float):
-    """
-    单步决策函数:输入原始 TMP0,预测并执行动作
-
-    参数:
-        TMP0_value (float): 当前 TMP0 值(单位与环境一致)
-
-    返回:
-        dict: 包含模型选择的动作、动作参数、新状态、奖励等
-    """
-    # 1. 实例化环境
-    base_params = uf_params
-    env = UFSuperCycleEnv(base_params)
-
-    # 2. 将输入的 TMP0 写入环境
-    env.current_params.TMP0 = TMP0_value
-
-    # 3. 获取归一化状态
-    obs = env._get_obs().reshape(1, -1)
-
-    # 4. 模型预测动作
-    action, _ = model.predict(obs, deterministic=True)
-
-    # 5. 解析动作对应的 L_s 和 t_bw_s
-    L_s, t_bw_s = env._get_action_values(action[0])
-
-    # 6. 在环境中执行该动作
-    next_obs, reward, terminated, truncated, info = env.step(action[0])
-
-    # 7. 整理结果
-    result = {
-        "action": int(action[0]),
-        "L_s": float(L_s),
-        "t_bw_s": float(t_bw_s),
-        "next_obs": next_obs,
-        "reward": reward,
-        "terminated": terminated,
-        "truncated": truncated,
-        "info": info
-    }
-
-    # 8. 关闭环境
-    env.close()
-
-    return result
-
-def generate_plc_instructions(current_L_s, current_t_bw_s, model_prev_L_s, model_prev_t_bw_s, model_L_s, model_t_bw_s):
-    """
-    根据工厂当前值、模型上一轮决策值和模型当前轮决策值,生成PLC指令。
-
-    新增功能:
-    1. 处理None值情况:如果模型上一轮值为None,则使用工厂当前值;
-       如果工厂当前值也为None,则返回None并提示错误。
-    """
-    # 参数配置保持不变
-    params = UFParams(
-        L_min_s=3600.0, L_max_s=6000.0, L_step_s=60.0,
-        t_bw_min_s=40.0, t_bw_max_s=60.0, t_bw_step_s=5.0,
-    )
-
-    # 参数解包
-    L_step_s = params.L_step_s
-    t_bw_step_s = params.t_bw_step_s
-    L_min_s = params.L_min_s
-    L_max_s = params.L_max_s
-    t_bw_min_s = params.t_bw_min_s
-    t_bw_max_s = params.t_bw_max_s
-    adjustment_threshold = 1.0
-
-    # 处理None值情况
-    if model_prev_L_s is None:
-        if current_L_s is None:
-            print("错误: 过滤时长的工厂当前值和模型上一轮值均为None")
-            return None, None
-        else:
-            # 使用工厂当前值作为基准
-            effective_current_L = current_L_s
-            source_L = "工厂当前值(模型上一轮值为None)"
-    else:
-        # 模型上一轮值不为None,继续检查工厂当前值
-        if current_L_s is None:
-            effective_current_L = model_prev_L_s
-            source_L = "模型上一轮值(工厂当前值为None)"
-        else:
-            effective_current_L = model_prev_L_s
-            source_L = "模型上一轮值"
-
-    # 对反洗时长进行同样的处理
-    if model_prev_t_bw_s is None:
-        if current_t_bw_s is None:
-            print("错误: 反洗时长的工厂当前值和模型上一轮值均为None")
-            return None, None
-        else:
-            effective_current_t_bw = current_t_bw_s
-            source_t_bw = "工厂当前值(模型上一轮值为None)"
-    else:
-        if current_t_bw_s is None:
-            effective_current_t_bw = model_prev_t_bw_s
-            source_t_bw = "模型上一轮值(工厂当前值为None)"
-        else:
-            effective_current_t_bw = model_prev_t_bw_s
-            source_t_bw = "模型上一轮值"
-
-    # 检测所有输入值是否在规定范围内(只对非None值进行检查)
-    # 工厂当前值检查(警告)
-    if current_L_s is not None and not (L_min_s <= current_L_s <= L_max_s):
-        print(f"警告: 当前过滤时长 {current_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
-    if current_t_bw_s is not None and not (t_bw_min_s <= current_t_bw_s <= t_bw_max_s):
-        print(f"警告: 当前反洗时长 {current_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
-
-    # 模型上一轮决策值检查(警告)
-    if model_prev_L_s is not None and not (L_min_s <= model_prev_L_s <= L_max_s):
-        print(f"警告: 模型上一轮过滤时长 {model_prev_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
-    if model_prev_t_bw_s is not None and not (t_bw_min_s <= model_prev_t_bw_s <= t_bw_max_s):
-        print(f"警告: 模型上一轮反洗时长 {model_prev_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
-
-    # 模型当前轮决策值检查(错误)
-    if model_L_s is None:
-        raise ValueError("错误: 决策模型建议的过滤时长不能为None")
-    elif not (L_min_s <= model_L_s <= L_max_s):
-        raise ValueError(f"错误: 决策模型建议的过滤时长 {model_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
-
-    if model_t_bw_s is None:
-        raise ValueError("错误: 决策模型建议的反洗时长不能为None")
-    elif not (t_bw_min_s <= model_t_bw_s <= t_bw_max_s):
-        raise ValueError(f"错误: 决策模型建议的反洗时长 {model_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
-
-    print(f"过滤时长基准: {source_L}, 值: {effective_current_L}")
-    print(f"反洗时长基准: {source_t_bw}, 值: {effective_current_t_bw}")
-
-    # 使用选定的基准值进行计算调整
-    L_diff = model_L_s - effective_current_L
-    L_adjustment = 0
-    if abs(L_diff) >= adjustment_threshold * L_step_s:
-        if L_diff >= 0:
-            L_adjustment = L_step_s
-        else:
-            L_adjustment = -L_step_s
-    next_L_s = effective_current_L + L_adjustment
-
-    t_bw_diff = model_t_bw_s - effective_current_t_bw
-    t_bw_adjustment = 0
-    if abs(t_bw_diff) >= adjustment_threshold * t_bw_step_s:
-        if t_bw_diff >= 0:
-            t_bw_adjustment = t_bw_step_s
-        else:
-            t_bw_adjustment = -t_bw_step_s
-    next_t_bw_s = effective_current_t_bw + t_bw_adjustment
-
-    return next_L_s, next_t_bw_s
-
-
-from UF_super_RL.DQN_env import simulate_one_supercycle
-def calc_uf_cycle_metrics(p, TMP0, max_tmp_during_filtration, min_tmp_during_filtration, L_s: float, t_bw_s: float):
-    """
-    计算 UF 超滤系统的核心性能指标
-
-    参数:
-        p (UFParams): UF 系统参数
-        L_s (float): 单次过滤时间(秒)
-        t_bw_s (float): 单次反洗时间(秒)
-
-    返回:
-        dict: {
-            "k_bw_per_ceb": 小周期次数,
-            "ton_water_energy_kWh_per_m3": 吨水电耗,
-            "recovery": 回收率,
-            "net_delivery_rate_m3ph": 净供水率 (m³/h),
-            "daily_prod_time_h": 日均产水时间 (小时/天)
-            "max_permeability": 全周期最高渗透率(lmh/bar)
-        }
-    """
-    # 将跨膜压差写入参数
-    p.TMP0 = TMP0
-
-    # 模拟该参数下的超级周期
-    feasible, info = simulate_one_supercycle(p, L_s, t_bw_s)
-
-    # 获得模型模拟周期信息
-    k_bw_per_ceb = info["k_bw_per_ceb"]
-    ton_water_energy_kWh_per_m3 = info["ton_water_energy_kWh_per_m3"]
-    recovery = info["recovery"]
-    net_delivery_rate_m3ph = info["net_delivery_rate_m3ph"]
-    daily_prod_time_h = info["daily_prod_time_h"]
-
-    # 获得模型模拟周期内最高跨膜压差/最低跨膜压差
-    if max_tmp_during_filtration is None:
-        max_tmp_during_filtration = info["max_TMP_during_filtration"]
-    if min_tmp_during_filtration is None:
-        min_tmp_during_filtration = info["min_TMP_during_filtration"]
-
-    # 计算最高渗透率
-    max_permeability = 100 * p.q_UF / (128*40) / min_tmp_during_filtration
-
-
-    return {
-        "k_bw_per_ceb": k_bw_per_ceb,
-        "ton_water_energy_kWh_per_m3": ton_water_energy_kWh_per_m3,
-        "recovery": recovery,
-        "net_delivery_rate_m3ph": net_delivery_rate_m3ph,
-        "daily_prod_time_h": daily_prod_time_h,
-        "max_permeability": max_permeability
-    }
-
-
-# ==============================
-# 示例调用
-# ==============================
-if __name__ == "__main__":
-    uf_params = UFParams()
-    TMP0 = 0.03 # 原始 TMP0
-    model_decide_result = run_uf_DQN_decide(uf_params, TMP0) # 调用模型获得动作
-    model_L_s = model_decide_result['L_s'] # 获得模型决策产水时长
-    model_t_bw_s = model_decide_result['t_bw_s'] # 获得模型决策反洗时长
-
-    current_L_s = 3800
-    current_t_bw_s = 40
-    model_prev_L_s = 4040
-    model_prev_t_bw_s = 60
-    L_s, t_bw_s = generate_plc_instructions(current_L_s, current_t_bw_s, model_prev_L_s, model_prev_t_bw_s, model_L_s, model_t_bw_s) # 获取模型下发指令
-
-    L_s = 4100
-    t_bw_s = 96
-    max_tmp_during_filtration = 0.050176 # 新增工厂数据接口:周期最高/最低跨膜压差,无工厂数据接入时传入None,calc_uf_cycle_metrics()自动获取模拟周期中的跨膜压差最值
-    min_tmp_during_filtration = 0.012496
-    execution_result = calc_uf_cycle_metrics(uf_params, TMP0, max_tmp_during_filtration, min_tmp_during_filtration, L_s, t_bw_s)
-    print("\n===== 单步决策结果 =====")
-    print(f"模型选择的动作: {model_decide_result['action']}")
-    print(f"模型选择的L_s: {model_L_s} 秒, 模型选择的t_bw_s: {model_t_bw_s} 秒")
-    print(f"指令下发的L_s: {L_s} 秒, 指令下发的t_bw_s: {t_bw_s} 秒")
-    print(f"指令对应的反洗次数: {execution_result['k_bw_per_ceb']}")
-    print(f"指令对应的吨水电耗: {execution_result['ton_water_energy_kWh_per_m3']}")
-    print(f"指令对应的回收率: {execution_result['recovery']}")
-    print(f"指令对应的日均产水时间: {execution_result['daily_prod_time_h']}")
-    print(f"指令对应的最高渗透率: {execution_result['max_permeability']}")

+ 418 - 159
models/uf-rl/超滤训练源码/DQN_env.py

@@ -1,44 +1,58 @@
 import os
 import os
-import time
-import random
+import torch
+from pathlib import Path
 import numpy as np
 import numpy as np
 import gymnasium as gym
 import gymnasium as gym
 from gymnasium import spaces
 from gymnasium import spaces
-from stable_baselines3 import DQN
-from stable_baselines3.common.monitor import Monitor
-from stable_baselines3.common.vec_env import DummyVecEnv
-from stable_baselines3.common.callbacks import BaseCallback
 from typing import Dict, Tuple, Optional
 from typing import Dict, Tuple, Optional
 import torch
 import torch
 import torch.nn as nn
 import torch.nn as nn
 from dataclasses import dataclass, asdict
 from dataclasses import dataclass, asdict
-from UF_models import TMPIncreaseModel, TMPDecreaseModel  # 导入模型类
+from UF_resistance_models import ResistanceIncreaseModel, ResistanceDecreaseModel  # 导入模型类
 import copy
 import copy
 
 
-
-# ==== 定义膜的基础运行参数 ====
+# =======================
+# 膜运行参数类:定义膜的基础运行参数
+# =======================
 @dataclass
 @dataclass
 class UFParams:
 class UFParams:
-    # —— 膜运行参数 ——
+    # —— 膜动态运行参数 ——
     q_UF: float = 360.0  # 过滤进水流量(m^3/h)
     q_UF: float = 360.0  # 过滤进水流量(m^3/h)
-    TMP0: float = 0.03  # 初始TMP(MPa)
-    TMP_max: float = 0.06  # TMP硬上限(MPa)
-
-    # —— 膜污染动力学 ——
-    alpha: float = 1e-6  # TMP增长系数
-    belta: float = 1.1  # 幂指数
+    TMP0: float = 0.03 # 初始跨膜压差
+    temp: float = 25.0  # 水温,摄氏度
+
+    # —— 膜阻力模型参数 ——
+    nuK: float =4.92e+01 # 过滤阶段膜阻力增长模型参数
+    slope: float = 3.44e-01 # 全周期不可逆污染阻力增长斜率
+    power: float = 1.032 # 全周期不可逆污染阻力增长幂次
+    tau_bw_s: float = 30.0  # 物洗时长影响时间尺度
+    gamma_t: float = 1.0  # 物洗时长作用指数
+    ceb_removal: float = 150  # CEB去除膜阻力
+
+    # —— 膜运行约束参数 ——
+    global_TMP_limit: float = 0.08  # TMP硬上限(MPa)
+    TMP0_max: float = 0.035 # 初始TMP上限(MPa)
+    TMP0_min: float = 0.01 # 初始TMP下限(MPa)
+    q_UF_max: float = 400.0 # 进水流量上限(m^3/h)
+    q_UF_min: float = 250.0 # 进水流量上限(m^3/h)
+    temp_max: float = 40.0 # 温度上限(摄氏度)
+    temp_min: float = 10.0 # 温度下限(摄氏度)
+    nuK_max: float = 6e+01 # 物理周期总阻力增速上限(m^-1/s)
+    nuK_min: float = 3e+01 # 物理周期总阻力增速下限(m^-1/s)
+    slope_max: float = 10 # 化学周期长期阻力增速斜率上限
+    slope_min: float = 0.1 # 化学周期长期阻力增速斜率下限
+    power_max: float = 1.3 # 化学周期长期阻力增速幂次上限
+    power_min: float = 0.8 # 化学周期长期阻力增速幂次下限
+    ceb_removal_max: float = 150 # CEB去除阻力(已缩放)上限(m^-1)
+    ceb_removal_min: float = 100 # CEB去除阻力(已缩放)下限(m^-1)
 
 
     # —— 反洗参数(固定) ——
     # —— 反洗参数(固定) ——
     q_bw_m3ph: float = 1000.0  # 物理反洗流量(m^3/h)
     q_bw_m3ph: float = 1000.0  # 物理反洗流量(m^3/h)
 
 
-    # —— CEB参数(固定) ——
-    T_ceb_interval_h: float = 48.0  # 固定每 k 小时做一次CEB
+    # —— CEB参数 ——
+    T_ceb_interval_h: float = 60.0  # 固定每 k 小时做一次CEB
     v_ceb_m3: float = 30.0  # CEB用水体积(m^3)
     v_ceb_m3: float = 30.0  # CEB用水体积(m^3)
     t_ceb_s: float = 40 * 60.0  # CEB时长(s)
     t_ceb_s: float = 40 * 60.0  # CEB时长(s)
-    phi_ceb: float = 1.0  # CEB去除比例(简化:完全恢复到TMP0)
-
-    # —— 约束与收敛 ——
-    dTMP: float = 0.001  # 单次产水结束时,相对TMP0最大升幅(MPa)
 
 
     # —— 搜索范围(秒) ——
     # —— 搜索范围(秒) ——
     L_min_s: float = 3800.0  # 过滤时长下限(s)
     L_min_s: float = 3800.0  # 过滤时长下限(s)
@@ -46,55 +60,115 @@ class UFParams:
     t_bw_min_s: float = 40.0  # 物洗时长下限(s)
     t_bw_min_s: float = 40.0  # 物洗时长下限(s)
     t_bw_max_s: float = 60.0  # 物洗时长上限(s)
     t_bw_max_s: float = 60.0  # 物洗时长上限(s)
 
 
-    # —— 物理反洗恢复函数参数 ——
-    phi_bw_min: float = 0.7  # 物洗去除比例最小值
-    phi_bw_max: float = 1.0  # 物洗去除比例最大值
-    L_ref_s: float = 4000.0  # 过滤时长影响时间尺度
-    tau_bw_s: float = 20.0  # 物洗时长影响时间尺度
-    gamma_t: float = 1.0  # 物洗时长作用指数
-
     # —— 网格 ——
     # —— 网格 ——
     L_step_s: float = 60.0  # 过滤时长步长(s)
     L_step_s: float = 60.0  # 过滤时长步长(s)
     t_bw_step_s: float = 5.0  # 物洗时长步长(s)
     t_bw_step_s: float = 5.0  # 物洗时长步长(s)
 
 
-    # 多目标加权及高TMP惩罚
-    w_rec: float = 0.8  # 回收率权重
-    w_rate: float = 0.2  # 净供水率权重
-    w_headroom: float = 0.2  # 贴边惩罚权重
-    r_headroom: float = 2.0  # 贴边惩罚幂次
-    headroom_hardcap: float = 0.98  # 超过此比例直接视为不可取
+    # —— 奖励函数参数 ——
+    k_rec = 5.0      # 回收率敏感度
+    k_res = 10.0     # 残余污染敏感度
+    rec_low, rec_high = 0.92, 0.99
+    rr0 = 0.08
 
 
-# ==== 加载模拟环境模型 ====
-# 初始化模型
-model_fp = TMPIncreaseModel()
-model_bw = TMPDecreaseModel()
 
 
-# 加载参数
-model_fp.load_state_dict(torch.load("uf_fp.pth"))
-model_bw.load_state_dict(torch.load("uf_bw.pth"))
+# =======================
+# 辅助函数:转换膜阻力与跨膜压差
+# =======================
 
 
-# 切换到推理模式
-model_fp.eval()
-model_bw.eval()
+def xishan_viscosity(temp):
+    # temp: 水温,单位摄氏度
+    """
+    锡山水厂 PLC水温校正因子经验公式(25摄氏度标准)
+    返回温度修正后的水粘度(纯水修正),TODO:水厂水质与纯水相差较大,对粘度有一定影响
+    """
+    x = (temp + 273.15) / 300
+    factor = 890 / (280.68 * x ** -1.9 + 511.45 * x ** -7.7 + 61.131 * x ** -19.6 + 0.45903 * x ** -40)
+    mu = 0.00089 / factor
+    return mu
+
+def _calculate_resistance(tmp, q_UF, temp):
+    """
+    计算超滤膜阻力 R = TMP / (J * μ)
+    返回缩小1e10的膜阻力(超滤原膜阻力量级为1e12,过大的绝对值容易导致平稳拟合)
+    """
+    A = 128 * 40  # m²,有效膜面积
+    mu = xishan_viscosity(temp) # 温度修正后的水粘度
+    TMP_Pa = tmp * 1e6  # 跨膜压差 MPa -> Pa
+    J = q_UF / A / 3600  # 通量 m³/h -> m³/(m²·s)
+    if J <= 0 or mu <= 0:
+        return np.nan
+    R = TMP_Pa / (J * mu) / 1e10 # 缩放膜阻力
 
 
+    return float(R)
 
 
-def _delta_tmp(p, L_h: float) -> float:
+def _calculate_tmp(R, q_UF, temp):
     """
     """
-    过滤时段TMP上升量:调用 uf_fp.pth 模型
+    还原超滤跨膜压差 TMP
     """
     """
-    return model_fp(p, L_h)
+    A = 128 * 40  # m²,有效膜面积
+    mu = xishan_viscosity(temp) # 温度修正后的水粘度
+    J = q_UF / A / 3600  # 通量 m³/h -> m³/(m²·s)
+    TMP_Pa = R * J * mu * 1e10
+    tmp = TMP_Pa / 1e6
+
+    return float(tmp)
+
+
+# =======================
+# 环境体模型加载函数
+# =======================
+def load_resistance_models():
+    """加载阻力变化模型,仅在首次调用时执行"""
+
+    global resistance_model_fp, resistance_model_bw
+
+    # 如果全局模型已存在,则直接返回
+    if "resistance_model_fp" in globals() and resistance_model_fp is not None:
+        return resistance_model_fp, resistance_model_bw
+
+    print("🔄 Loading resistance models...")
+
+    # 初始化模型
+    resistance_model_fp = ResistanceIncreaseModel()
+    resistance_model_bw = ResistanceDecreaseModel()
+
+    # 取得当前脚本所在目录(即 rl_dqn_env.py 或 check_initial_state.py 同目录)
+    base_dir = Path(__file__).resolve().parent
+
+    # 构造模型路径
+    fp_path = base_dir / "resistance_model_fp.pth"
+    bw_path = base_dir / "resistance_model_bw.pth"
+
+    # 检查文件存在性
+    assert fp_path.exists(), f"缺少 {fp_path.name}"
+    assert bw_path.exists(), f"缺少 {bw_path.name}"
 
 
-def phi_bw_of(p, L_s: float, t_bw_s: float) -> float:
+    # 加载权重
+    resistance_model_fp.load_state_dict(torch.load(fp_path, map_location="cpu"))
+    resistance_model_bw.load_state_dict(torch.load(bw_path, map_location="cpu"))
+
+    # 设置推理模式
+    resistance_model_fp.eval()
+    resistance_model_bw.eval()
+
+    print("✅ Resistance models loaded successfully from current directory.")
+    return resistance_model_fp, resistance_model_bw
+
+
+# =======================
+# 环境体模型模拟函数
+# =======================
+def _delta_resistance(p, L_h: float) -> float:
     """
     """
-    物洗去除比例:调用 uf_bw.pth 模型
+    过滤时段膜阻力上升量:调用 resistance_model_fp.pth 模型
     """
     """
-    return model_bw(p, L_s, t_bw_s)
+    return resistance_model_fp(p, L_h)
 
 
-def _tmp_after_ceb(p, L_s: float, t_bw_s: float) -> float:
+def phi_bw_of(p, R0: float, R_end: float, L_h_start: float, L_h_next_start: float, t_bw_s: float) -> float:
     """
     """
-    计算化学清洗(CEB)后的TMP,当前为恢复初始跨膜压差
+    物理冲洗去除膜阻力值:调用 resistance_model_bw 模型
     """
     """
-    return p.TMP0
+    return resistance_model_bw(p, R0, R_end, L_h_start, L_h_next_start, t_bw_s)
 
 
 def _v_bw_m3(p, t_bw_s: float) -> float:
 def _v_bw_m3(p, t_bw_s: float) -> float:
     """
     """
@@ -104,139 +178,183 @@ def _v_bw_m3(p, t_bw_s: float) -> float:
 
 
 def simulate_one_supercycle(p: UFParams, L_s: float, t_bw_s: float):
 def simulate_one_supercycle(p: UFParams, L_s: float, t_bw_s: float):
     """
     """
-    返回 (是否可行, 指标字典)
-    - 支持动态CEB次数:48h固定间隔
-    - 增加日均产水时间和吨水电耗
-    - 增加最小TMP记录
+    模拟一个超级周期(多次物理反洗 + 一次化学反洗)
+    返回: (info, next_params)
     """
     """
     L_h = float(L_s) / 3600.0  # 小周期过滤时间(h)
     L_h = float(L_s) / 3600.0  # 小周期过滤时间(h)
 
 
     tmp = p.TMP0
     tmp = p.TMP0
+    R0 = _calculate_resistance(p.TMP0, p.q_UF, p.temp)
     max_tmp_during_filtration = tmp
     max_tmp_during_filtration = tmp
-    min_tmp_during_filtration = tmp  # 新增:初始化最小TMP
+    min_tmp_during_filtration = tmp
     max_residual_increase = 0.0
     max_residual_increase = 0.0
 
 
-    # 小周期总时长(h)
     t_small_cycle_h = (L_s + t_bw_s) / 3600.0
     t_small_cycle_h = (L_s + t_bw_s) / 3600.0
-
-    # 计算超级周期内CEB次数
     k_bw_per_ceb = int(np.floor(p.T_ceb_interval_h / t_small_cycle_h))
     k_bw_per_ceb = int(np.floor(p.T_ceb_interval_h / t_small_cycle_h))
     if k_bw_per_ceb < 1:
     if k_bw_per_ceb < 1:
-        k_bw_per_ceb = 1  # 至少一个小周期
+        k_bw_per_ceb = 1
 
 
-    # ton水电耗查表
     energy_lookup = {
     energy_lookup = {
         3600: 0.1034, 3660: 0.1031, 3720: 0.1029, 3780: 0.1026,
         3600: 0.1034, 3660: 0.1031, 3720: 0.1029, 3780: 0.1026,
         3840: 0.1023, 3900: 0.1021, 3960: 0.1019, 4020: 0.1017,
         3840: 0.1023, 3900: 0.1021, 3960: 0.1019, 4020: 0.1017,
         4080: 0.1015, 4140: 0.1012, 4200: 0.1011
         4080: 0.1015, 4140: 0.1012, 4200: 0.1011
     }
     }
 
 
-    for _ in range(k_bw_per_ceb):
+    # --- 循环模拟物理反洗 ---
+    for idx in range(k_bw_per_ceb):
         tmp_run_start = tmp
         tmp_run_start = tmp
+        q_UF = p.q_UF
+        temp = p.temp
 
 
-        # 过滤阶段TMP增长
-        dtmp = _delta_tmp(p, L_h)
-        tmp_peak = tmp_run_start + dtmp
-
-        # 约束1:峰值不得超过硬上限
-        if tmp_peak > p.TMP_max + 1e-12:
-            return False, {"reason": "TMP_max violated during filtration", "TMP_peak": tmp_peak}
+        R_run_start = _calculate_resistance(tmp_run_start, q_UF, temp)
+        d_R = _delta_resistance(p, L_s)
+        R_peak = R_run_start + d_R
+        tmp_peak = _calculate_tmp(R_peak, q_UF, temp)
 
 
-        # 更新最大和最小TMP
-        if tmp_peak > max_tmp_during_filtration:
-            max_tmp_during_filtration = tmp_peak
-        if tmp_run_start < min_tmp_during_filtration:  # 新增:记录运行开始时的最小TMP
-            min_tmp_during_filtration = tmp_run_start
+        max_tmp_during_filtration = max(max_tmp_during_filtration, tmp_peak)
+        min_tmp_during_filtration = min(min_tmp_during_filtration, tmp_run_start)
 
 
-        # 物理反洗
-        phi = phi_bw_of(p, L_s, t_bw_s)
-        tmp_after_bw = tmp_peak - phi * (tmp_peak - tmp_run_start)
+        # 物洗膜阻力减小
+        L_h_start = (L_s + t_bw_s) / 3600.0 * idx
+        L_h_next_start = (L_s + t_bw_s) / 3600.0 * (idx + 1)
+        reversible_R = phi_bw_of(p, R_run_start, R_peak, L_h_start, L_h_next_start, t_bw_s)
+        R_after_bw = R_peak - reversible_R
+        tmp_after_bw = _calculate_tmp(R_after_bw, q_UF, temp)
 
 
-        # 约束2:单次残余增量控制
         residual_inc = tmp_after_bw - tmp_run_start
         residual_inc = tmp_after_bw - tmp_run_start
-        if residual_inc > p.dTMP + 1e-12:
-            return False, {
-                "reason": "residual TMP increase after BW exceeded dTMP",
-                "residual_increase": residual_inc,
-                "limit_dTMP": p.dTMP
-            }
-        if residual_inc > max_residual_increase:
-            max_residual_increase = residual_inc
+        max_residual_increase = max(max_residual_increase, residual_inc)
 
 
         tmp = tmp_after_bw
         tmp = tmp_after_bw
 
 
-    # CEB
-    tmp_after_ceb = p.TMP0
+    # --- CEB反洗 ---
+    R_after_ceb = R_peak - p.ceb_removal
+    tmp_after_ceb = _calculate_tmp(R_after_ceb, q_UF, temp)
 
 
-    # 体积与回收率
+    # ============================================================
+    # 生成本周期指标
+    # ============================================================
+
+    # --- 体积与能耗 ---
     V_feed_super = k_bw_per_ceb * p.q_UF * L_h
     V_feed_super = k_bw_per_ceb * p.q_UF * L_h
     V_loss_super = k_bw_per_ceb * _v_bw_m3(p, t_bw_s) + p.v_ceb_m3
     V_loss_super = k_bw_per_ceb * _v_bw_m3(p, t_bw_s) + p.v_ceb_m3
     V_net = max(0.0, V_feed_super - V_loss_super)
     V_net = max(0.0, V_feed_super - V_loss_super)
     recovery = max(0.0, V_net / max(V_feed_super, 1e-12))
     recovery = max(0.0, V_net / max(V_feed_super, 1e-12))
 
 
-    # 时间与净供水率
     T_super_h = k_bw_per_ceb * (L_s + t_bw_s) / 3600.0 + p.t_ceb_s / 3600.0
     T_super_h = k_bw_per_ceb * (L_s + t_bw_s) / 3600.0 + p.t_ceb_s / 3600.0
-    net_delivery_rate_m3ph = V_net / max(T_super_h, 1e-12)
-
-    # 贴边比例与硬限
-    headroom_ratio = max_tmp_during_filtration / max(p.TMP_max, 1e-12)
-    if headroom_ratio > p.headroom_hardcap + 1e-12:
-        return False, {"reason": "headroom hardcap exceeded", "headroom_ratio": headroom_ratio}
-
-    # —— 新增指标 1:日均产水时间(h/d) ——
     daily_prod_time_h = k_bw_per_ceb * L_h / T_super_h * 24.0
     daily_prod_time_h = k_bw_per_ceb * L_h / T_super_h * 24.0
 
 
-    # —— 新增指标 2:吨水电耗(kWh/m³) ——
     closest_L = min(energy_lookup.keys(), key=lambda x: abs(x - L_s))
     closest_L = min(energy_lookup.keys(), key=lambda x: abs(x - L_s))
-    ton_water_energy = energy_lookup[closest_L]
+    ton_water_energy = energy_lookup[closest_L] #TODO:需确认新过滤时间范围下的吨水电耗
 
 
+    # --- 信息输出 ---
     info = {
     info = {
+        "q_UF": p.q_UF,
+        "temp": p.temp,
         "recovery": recovery,
         "recovery": recovery,
         "V_feed_super_m3": V_feed_super,
         "V_feed_super_m3": V_feed_super,
         "V_loss_super_m3": V_loss_super,
         "V_loss_super_m3": V_loss_super,
         "V_net_super_m3": V_net,
         "V_net_super_m3": V_net,
         "supercycle_time_h": T_super_h,
         "supercycle_time_h": T_super_h,
-        "net_delivery_rate_m3ph": net_delivery_rate_m3ph,
         "max_TMP_during_filtration": max_tmp_during_filtration,
         "max_TMP_during_filtration": max_tmp_during_filtration,
-        "min_TMP_during_filtration": min_tmp_during_filtration,  # 新增:最小TMP
+        "min_TMP_during_filtration": min_tmp_during_filtration,
+        "global_TMP_limit":p.global_TMP_limit,
         "max_residual_increase_per_run": max_residual_increase,
         "max_residual_increase_per_run": max_residual_increase,
-        "phi_bw_effective": phi,
+        "R0": R0,
+        "R_after_ceb": R_after_ceb,
+        "TMP0":p.TMP0,
         "TMP_after_ceb": tmp_after_ceb,
         "TMP_after_ceb": tmp_after_ceb,
-        "headroom_ratio": headroom_ratio,
         "daily_prod_time_h": daily_prod_time_h,
         "daily_prod_time_h": daily_prod_time_h,
         "ton_water_energy_kWh_per_m3": ton_water_energy,
         "ton_water_energy_kWh_per_m3": ton_water_energy,
         "k_bw_per_ceb": k_bw_per_ceb
         "k_bw_per_ceb": k_bw_per_ceb
     }
     }
 
 
-    return True, info
+    # ============================================================
+    # 状态更新:生成 next_params(新状态)
+    # ============================================================
+
+    next_params = copy.deepcopy(p)
+
+    # 更新跨膜压差(TMP)
+    next_params.TMP0 = tmp_after_ceb
 
 
-def _score(p: UFParams, rec: dict) -> float:
-    """综合评分:越大越好。通过非线性放大奖励差异,强化区分好坏动作"""
+    # 可选参数(当前保持不变,未来可扩展更新逻辑)
+    next_params.slope = p.slope
+    next_params.power = p.power
+    next_params.ceb_removal = p.ceb_removal
+    next_params.nuK = p.nuK
+    next_params.q_UF = p.q_UF
+    next_params.temp = p.temp
 
 
-    # —— 无量纲化净供水率 ——
-    rate_norm = rec["net_delivery_rate_m3ph"] / max(p.q_UF, 1e-12)
 
 
-    # —— TMP soft penalty (sigmoid) ——
-    tmp_ratio = rec["max_TMP_during_filtration"] / max(p.TMP_max, 1e-12)
-    k = 10.0
-    headroom_penalty = 1.0 / (1.0 + np.exp(-k * (tmp_ratio - 1.0)))
+    return info, next_params
 
 
-    # —— 基础 reward(0.6~0.9左右)——
-    base_reward = (
-        p.w_rec * rec["recovery"]
-        + p.w_rate * rate_norm
-        - p.w_headroom * headroom_penalty
-    )
+def calculate_reward(p: UFParams, info: dict) -> float:
+    """
+    TMP不参与奖励计算,仅考虑回收率与残余污染比例之间的权衡。
+    满足:
+      - 当 recovery=0.97, residual_ratio=0.1 → reward = 0
+      - 当 recovery=0.90, residual_ratio=0.0 → reward = 0
+      - 在两者之间平衡(如 recovery≈0.94, residual_ratio≈0.05)→ reward > 0
+    """
+    recovery = info["recovery"]
+    residual_ratio = (info["R_after_ceb"] - info["R0"]) / info["R0"]
+
+    # 回收率奖励(在 [rec_low, rec_high] 内平滑上升)
+    rec_norm = (recovery - p.rec_low) / (p.rec_high - p.rec_low)
+    rec_reward = np.clip(np.tanh(p.k_rec * (rec_norm - 0.5)), -1, 1)
+
+    # 残余比惩罚(超过rr0时快速变为负值)
+    res_penalty = -np.tanh(p.k_res * (residual_ratio / p.rr0 - 1))
+
+    # 组合逻辑:权衡二者
+    total_reward = rec_reward + res_penalty
+
+    # 再平移使指定点为零:
+    # recovery=0.97, residual=0.1 → 0
+    # recovery=0.90, residual=0.0 → 0
+    # 经验上,这两点几乎对称,因此无需额外线性偏移
+    # 若希望严格归零,可用线性校正:
+    total_reward -= 0.0
 
 
-    # —— 非线性放大:平方映射 + 缩放 ——
-    # 目的是放大好坏动作差异,同时限制最大值,避免 TD-error 过大
-    amplified_reward = (base_reward - 0.5) ** 2 * 5.0
+    return total_reward
+
+
+
+def is_dead_cycle(info: dict) -> bool:
+    """
+    判断当前循环是否为成功循环(True)或失败循环(False)
+    失败条件:
+    1. 最大TMP超过设定上限;
+    2. 回收率低于75%;
+    3. 化学反冲洗后膜阻力上升超过10%。
+
+    参数:
+        info: dict
+            simulate_one_supercycle() 返回的指标字典,需包含:
+            - max_TMP_during_filtration
+            - recovery
+            - R_after_ceb
+            - R_run_start
+            - TMP_limit(如果有定义)
+    返回:
+        bool: True 表示成功循环,False 表示失败循环。
+    """
+    TMP_limit = info.get("global_TMP_limit", 0.08)  # 默认硬约束上限
+    max_tmp = info.get("max_TMP_during_filtration", 0)
+    recovery = info.get("recovery", 1.0)
+    R_after_ceb = info.get("R_after_ceb", 0)
+    R0 = info.get("R0", 1e-6)
 
 
-    # —— 可选:保留符号,区分负奖励
-    if base_reward < 0.5:
-        amplified_reward = -amplified_reward
+    # 判断条件
+    if max_tmp > TMP_limit:
+        return False
+    if recovery < 0.75:
+        return False
+    if (R_after_ceb - R0) / R0 > 0.1:
+        return False
+
+    return True
 
 
-    return amplified_reward
 
 
 
 
 class UFSuperCycleEnv(gym.Env):
 class UFSuperCycleEnv(gym.Env):
@@ -244,7 +362,7 @@ class UFSuperCycleEnv(gym.Env):
 
 
     metadata = {"render_modes": ["human"]}
     metadata = {"render_modes": ["human"]}
 
 
-    def __init__(self, base_params, max_episode_steps: int = 20):
+    def __init__(self, base_params, resistance_models=None, max_episode_steps: int = 15):
         super(UFSuperCycleEnv, self).__init__()
         super(UFSuperCycleEnv, self).__init__()
 
 
         self.base_params = base_params
         self.base_params = base_params
@@ -252,10 +370,15 @@ class UFSuperCycleEnv(gym.Env):
         self.max_episode_steps = max_episode_steps
         self.max_episode_steps = max_episode_steps
         self.current_step = 0
         self.current_step = 0
 
 
+        if resistance_models is None:
+            self.resistance_model_fp, self.resistance_model_bw = load_resistance_models()
+        else:
+            self.resistance_model_fp, self.resistance_model_bw = resistance_models
+
         # 计算离散动作空间
         # 计算离散动作空间
         self.L_values = np.arange(
         self.L_values = np.arange(
             self.base_params.L_min_s,
             self.base_params.L_min_s,
-            self.base_params.L_max_s + self.base_params.L_step_s,
+            self.base_params.L_max_s,
             self.base_params.L_step_s
             self.base_params.L_step_s
         )
         )
         self.t_bw_values = np.arange(
         self.t_bw_values = np.arange(
@@ -270,44 +393,180 @@ class UFSuperCycleEnv(gym.Env):
         # 单一离散动作空间
         # 单一离散动作空间
         self.action_space = spaces.Discrete(self.num_L * self.num_bw)
         self.action_space = spaces.Discrete(self.num_L * self.num_bw)
 
 
-        # 状态空间增加 TMP0, 上一次动作(L_s, t_bw_s), 本周期最高 TMP
-        # 状态归一化均在 _get_obs 内处理
+        # 状态空间,归一化在 _get_obs 中处理
         self.observation_space = spaces.Box(
         self.observation_space = spaces.Box(
-            low=np.zeros(4, dtype=np.float32),
-            high=np.ones(4, dtype=np.float32),
+            low=np.zeros(8, dtype=np.float32),
+            high=np.ones(8, dtype=np.float32),
             dtype=np.float32
             dtype=np.float32
         )
         )
 
 
-        # 初始化状态
-        self.last_action = (self.base_params.L_min_s, self.base_params.t_bw_min_s)
-        self.max_TMP_during_filtration = self.current_params.TMP0
+        # 初始化环境
         self.reset(seed=None)
         self.reset(seed=None)
 
 
-    def _get_obs(self):
-        TMP0 = self.current_params.TMP0
-        TMP0_norm = (TMP0 - 0.01) / (0.05 - 0.01)
-
-        L_s, t_bw_s = self.last_action
-        L_norm = (L_s - self.base_params.L_min_s) / (self.base_params.L_max_s - self.base_params.L_min_s)
-        t_bw_norm = (t_bw_s - self.base_params.t_bw_min_s) / (self.base_params.t_bw_max_s - self.base_params.t_bw_min_s)
+    def generate_initial_state(self):
+        """
+        随机生成一个初始状态,不进行死状态判断
+        """
+        self.current_params.TMP0 = np.random.uniform(
+            self.current_params.TMP0_min, self.current_params.TMP0_max
+        )
+        self.current_params.q_UF = np.random.uniform(
+            self.current_params.q_UF_min, self.current_params.q_UF_max
+        )
+        self.current_params.temp = np.random.uniform(
+            self.current_params.temp_min, self.current_params.temp_max
+        )
 
 
-        max_TMP_norm = (self.max_TMP_during_filtration - 0.01) / (0.05 - 0.01)
+        self.current_params.R0 = _calculate_resistance(
+            self.current_params.TMP0,
+            self.current_params.q_UF,
+            self.current_params.temp
+        )
 
 
-        return np.array([TMP0_norm, L_norm, t_bw_norm, max_TMP_norm], dtype=np.float32)
+        self.current_params.nuK = np.random.uniform(
+            self.current_params.nuK_min, self.current_params.nuK_max
+        )
+        self.current_params.slope = np.random.uniform(
+            self.current_params.slope_min, self.current_params.slope_max
+        )
+        self.current_params.power = np.random.uniform(
+            self.current_params.power_min, self.current_params.power_max
+        )
+        self.current_params.ceb_removal = np.random.uniform(
+            self.current_params.ceb_removal_min, self.current_params.ceb_removal_max
+        )
 
 
-    def _get_action_values(self, action):
-        L_idx = action // self.num_bw
-        t_bw_idx = action % self.num_bw
-        return self.L_values[L_idx], self.t_bw_values[t_bw_idx]
+        return self._get_state_copy()
 
 
-    def reset(self, seed=None, options=None):
+    def reset(self, seed=None, options=None, max_attempts: int = 200):
         super().reset(seed=seed)
         super().reset(seed=seed)
-        self.current_params.TMP0 = np.random.uniform(0.01, 0.03)
+
+        attempts = 0
+        while attempts < max_attempts:
+            attempts += 1
+            self.generate_initial_state()  # 生成随机初始状态
+            if self.check_dead_initial_state(max_steps=getattr(self, "max_episode_steps", 15),
+                                             L_s=3800, t_bw_s=60):
+                # True 表示可行,退出循环
+                break
+        else:
+            # 超过最大尝试次数仍未生成可行状态
+            raise RuntimeError(f"在 {max_attempts} 次尝试后仍无法生成可行初始状态。")
+
+        # 初始化步数、动作、最大 TMP
         self.current_step = 0
         self.current_step = 0
         self.last_action = (self.base_params.L_min_s, self.base_params.t_bw_min_s)
         self.last_action = (self.base_params.L_min_s, self.base_params.t_bw_min_s)
         self.max_TMP_during_filtration = self.current_params.TMP0
         self.max_TMP_during_filtration = self.current_params.TMP0
+
         return self._get_obs(), {}
         return self._get_obs(), {}
 
 
+    def check_dead_initial_state(self, max_steps: int = None,
+                                 L_s: int = 4900, t_bw_s: int = 50) -> bool:
+        """
+        判断当前环境生成的初始状态是否为可行(non-dead)。
+        使用最保守策略连续模拟 max_steps 次:
+            若任意一次 is_dead_cycle(info) 返回 False,则视为必死状态。
+
+        参数:
+            max_steps: 模拟步数,默认使用 self.max_episode_steps
+            L_s: 过滤时长(s),默认 3800
+            t_bw_s: 物理反洗时长(s),默认 60
+
+        返回:
+            bool: True 表示可行状态(non-dead),False 表示必死状态
+        """
+        if max_steps is None:
+            max_steps = getattr(self, "max_episode_steps", 15)
+
+        # 生成初始状态
+        self.generate_initial_state()
+        if not hasattr(self, "current_params"):
+            raise AttributeError("generate_initial_state() 未设置 current_params。")
+
+        import copy
+        curr_p = copy.deepcopy(self.current_params)
+
+        # 逐步模拟
+        for step in range(max_steps):
+            try:
+                info, next_params = simulate_one_supercycle(curr_p, L_s, t_bw_s)
+            except Exception:
+                # 异常即视为不可行
+                return False
+
+            if not is_dead_cycle(info):
+                # 任意一次失败即为必死状态
+                return False
+
+            curr_p = next_params
+
+        return True
+
+    def _get_state_copy(self):
+        return copy.deepcopy(self.current_params)
+
+    def _get_obs(self):
+        """
+        构建当前环境归一化状态向量
+        """
+        # === 1. 从 current_params 读取动态参数 ===
+        TMP0 = self.current_params.TMP0
+        q_UF = self.current_params.q_UF
+        temp = self.current_params.temp
+
+        # === 2. 计算本周期初始膜阻力 ===
+        R0 = _calculate_resistance(TMP0, q_UF, temp)
+
+        # === 3. 从 current_params 读取膜阻力增长模型参数 ===
+        nuk = self.current_params.nuK
+        slope = self.current_params.slope
+        power = self.current_params.power
+        ceb_removal = self.current_params.ceb_removal
+
+        # === 4. 从 current_params 动态读取上下限 ===
+        TMP0_min, TMP0_max = self.current_params.TMP0_min, self.current_params.TMP0_max
+        q_UF_min, q_UF_max = self.current_params.q_UF_min, self.current_params.q_UF_max
+        temp_min, temp_max = self.current_params.temp_min, self.current_params.temp_max
+        nuK_min, nuK_max = self.current_params.nuK_min, self.current_params.nuK_max
+        slope_min, slope_max = self.current_params.slope_min, self.current_params.slope_max
+        power_min, power_max = self.current_params.power_min, self.current_params.power_max
+        ceb_min, ceb_max = self.current_params.ceb_removal_min, self.current_params.ceb_removal_max
+
+        # === 5. 归一化计算(clip防止越界) ===
+        TMP0_norm = np.clip((TMP0 - TMP0_min) / (TMP0_max - TMP0_min), 0, 1)
+        q_UF_norm = np.clip((q_UF - q_UF_min) / (q_UF_max - q_UF_min), 0, 1)
+        temp_norm = np.clip((temp - temp_min) / (temp_max - temp_min), 0, 1)
+
+        # R0 不在 current_params 中定义上下限,设定经验范围
+        R0_norm = np.clip((R0 - 100.0) / (800.0 - 100.0), 0, 1)
+
+        short_term_norm = np.clip((nuk - nuK_min) / (nuK_max - nuK_min), 0, 1)
+        long_term_slope_norm = np.clip((slope - slope_min) / (slope_max - slope_min), 0, 1)
+        long_term_power_norm = np.clip((power - power_min) / (power_max - power_min), 0, 1)
+        ceb_removal_norm = np.clip((ceb_removal - ceb_min) / (ceb_max - ceb_min), 0, 1)
+
+        # === 6. 构建观测向量 ===
+        obs = np.array([
+            TMP0_norm,
+            q_UF_norm,
+            temp_norm,
+            R0_norm,
+            short_term_norm,
+            long_term_slope_norm,
+            long_term_power_norm,
+            ceb_removal_norm
+        ], dtype=np.float32)
+
+        return obs
+
+    def _get_action_values(self, action):
+        """
+        将动作还原为实际时长
+        """
+        L_idx = action // self.num_bw
+        t_bw_idx = action % self.num_bw
+        return self.L_values[L_idx], self.t_bw_values[t_bw_idx]
+
     def step(self, action):
     def step(self, action):
         self.current_step += 1
         self.current_step += 1
         L_s, t_bw_s = self._get_action_values(action)
         L_s, t_bw_s = self._get_action_values(action)
@@ -315,15 +574,16 @@ class UFSuperCycleEnv(gym.Env):
         t_bw_s = np.clip(t_bw_s, self.base_params.t_bw_min_s, self.base_params.t_bw_max_s)
         t_bw_s = np.clip(t_bw_s, self.base_params.t_bw_min_s, self.base_params.t_bw_max_s)
 
 
         # 模拟超级周期
         # 模拟超级周期
-        feasible, info = simulate_one_supercycle(self.current_params, L_s, t_bw_s)
+        info, next_params = simulate_one_supercycle(self.current_params, L_s, t_bw_s)
+        # 根据 info 判断是否成功
+        feasible = is_dead_cycle(info)  # True 表示成功循环,False 表示失败
 
 
         if feasible:
         if feasible:
-            reward = _score(self.current_params, info)
-            self.current_params.TMP0 = info["TMP_after_ceb"]
-            self.max_TMP_during_filtration = info["max_TMP_during_filtration"]
+            reward = calculate_reward(self.current_params, info)
+            self.current_params = next_params
             terminated = False
             terminated = False
         else:
         else:
-            reward = -20
+            reward = -10
             terminated = True
             terminated = True
 
 
         truncated = self.current_step >= self.max_episode_steps
         truncated = self.current_step >= self.max_episode_steps
@@ -337,4 +597,3 @@ class UFSuperCycleEnv(gym.Env):
 
 
 
 
 
 
-

+ 7 - 7
models/uf-rl/超滤训练源码/DQN_train.py

@@ -3,9 +3,6 @@ import time
 import random
 import random
 import numpy as np
 import numpy as np
 import torch
 import torch
-
-import gymnasium as gym
-from gymnasium import spaces
 from stable_baselines3 import DQN
 from stable_baselines3 import DQN
 from stable_baselines3.common.monitor import Monitor
 from stable_baselines3.common.monitor import Monitor
 from stable_baselines3.common.vec_env import DummyVecEnv
 from stable_baselines3.common.vec_env import DummyVecEnv
@@ -24,10 +21,10 @@ class DQNParams:
     learning_rate: float = 1e-4
     learning_rate: float = 1e-4
 
 
     # 经验回放缓冲区大小(步数)
     # 经验回放缓冲区大小(步数)
-    buffer_size: int = 10000
+    buffer_size: int = 100000
 
 
     # 学习开始前需要收集的步数
     # 学习开始前需要收集的步数
-    learning_starts: int = 200
+    learning_starts: int = 10000
 
 
     # 每次从经验池中采样的样本数量
     # 每次从经验池中采样的样本数量
     batch_size: int = 32
     batch_size: int = 32
@@ -39,7 +36,10 @@ class DQNParams:
     train_freq: int = 4
     train_freq: int = 4
 
 
     # 目标网络更新间隔
     # 目标网络更新间隔
-    target_update_interval: int = 2000
+    target_update_interval: int = 1
+
+    # 软更新系数
+    tau: float = 0.005
 
 
     # 初始探索率 ε
     # 初始探索率 ε
     exploration_initial_eps: float = 1.0
     exploration_initial_eps: float = 1.0
@@ -240,5 +240,5 @@ if __name__ == "__main__":
 
 
     # 训练RL代理
     # 训练RL代理
     print("开始训练RL代理...")
     print("开始训练RL代理...")
-    train_uf_rl_agent(params, total_timesteps=50000)
+    train_uf_rl_agent(params, total_timesteps=150000)
 
 

+ 0 - 405
models/uf-rl/超滤训练源码/UF_decide.py

@@ -1,405 +0,0 @@
-# UF_decide.py
-from dataclasses import dataclass
-import numpy as np
-
-@dataclass
-class UFParams:
-    # —— 膜与运行参数 ——
-    q_UF: float = 360.0           # 过滤进水流量(m^3/h)
-    TMP0: float = 0.03            # 初始TMP(MPa)
-    TMP_max: float = 0.06         # TMP硬上限(MPa)
-
-    # —— 膜污染动力学 ——
-    alpha: float = 1e-6           # TMP增长系数
-    belta: float = 1.1            # 幂指数
-
-    # —— 反洗参数(固定) ——
-    q_bw_m3ph: float = 1000.0     # 物理反洗流量(m^3/h)
-
-    # —— CEB参数(固定) ——
-    T_ceb_interval_h: float = 48.0  # 固定每 k 小时做一次CEB
-    v_ceb_m3: float = 30.0        # CEB用水体积(m^3)
-    t_ceb_s: float = 40 * 60.0    # CEB时长(s)
-    phi_ceb: float = 1.0          # CEB去除比例(简化:完全恢复到TMP0)
-
-    # —— 约束与收敛 ——
-    dTMP: float = 0.0005          # 单次产水结束时,相对TMP0最大升幅(MPa)
-
-    # —— 搜索范围(秒) ——
-    L_min_s: float = 3600.0       # 过滤时长下限(s)
-    L_max_s: float = 4200.0       # 过滤时长上限(s)
-    t_bw_min_s: float = 40.0      # 物洗时长下限(s)
-    t_bw_max_s: float = 60.0      # 物洗时长上限(s)
-
-    # —— 物理反洗恢复函数参数 ——
-    phi_bw_min: float = 0.7       # 物洗去除比例最小值
-    phi_bw_max: float = 1.0       # 物洗去除比例最大值
-    L_ref_s: float = 4000.0       # 过滤时长影响时间尺度
-    tau_bw_s: float = 30.0        # 物洗时长影响时间尺度
-    gamma_t: float = 1.0          # 物洗时长作用指数
-    
-    # —— 网格 ——
-    L_step_s: float = 60.0        # 过滤时长步长(s)
-    t_bw_step_s: float = 5.0      # 物洗时长步长(s)
-
-    # 多目标加权及高TMP惩罚
-    w_rec: float = 0.8            # 回收率权重
-    w_rate: float = 0.2           # 净供水率权重
-    w_headroom: float = 0.3       # 贴边惩罚权重
-    r_headroom: float = 2.0       # 贴边惩罚幂次
-    headroom_hardcap: float = 0.98 # 超过此比例直接视为不可取
-
-def _delta_tmp(p: UFParams, L_h: float) -> float:
-    # 过滤时段TMP上升量
-    return float(p.alpha * (p.q_UF ** p.belta) * L_h)
-
-def _v_bw_m3(p: UFParams, t_bw_s: float) -> float:
-    # 物理反洗水耗
-    return float(p.q_bw_m3ph * (float(t_bw_s) / 3600.0))
-
-def phi_bw_of(p: UFParams, L_s: float, t_bw_s: float) -> float:
-    # 物洗去除比例:随过滤时长增长上界收缩,随物洗时长增长趋饱和
-    L = max(float(L_s), 1.0)
-    t = max(float(t_bw_s), 1e-6)
-    upper_L = p.phi_bw_min + (p.phi_bw_max - p.phi_bw_min) * np.exp(- L / p.L_ref_s)
-    time_gain = 1.0 - np.exp(- (t / p.tau_bw_s) ** p.gamma_t)
-    phi = upper_L * time_gain
-    return float(np.clip(phi, 0.0, 0.999))
-
-def simulate_one_supercycle(p: UFParams, L_s: float, t_bw_s: float):
-    """
-    返回 (是否可行, 指标字典)
-    - 支持动态CEB次数:48h固定间隔
-    - 增加日均产水时间和吨水电耗
-    """
-    L_h = float(L_s) / 3600.0  # 小周期过滤时间(h)
-
-    tmp = p.TMP0
-    max_tmp_during_filtration = tmp
-    max_residual_increase = 0.0
-
-    # 小周期总时长(h)
-    t_small_cycle_h = (L_s + t_bw_s) / 3600.0
-
-    # 计算超级周期内CEB次数
-    k_bw_per_ceb = int(np.floor(p.T_ceb_interval_h / t_small_cycle_h))
-    if k_bw_per_ceb < 1:
-        k_bw_per_ceb = 1  # 至少一个小周期
-
-    # ton水电耗查表
-    energy_lookup = {
-        3600: 0.1034, 3660: 0.1031, 3720: 0.1029, 3780: 0.1026,
-        3840: 0.1023, 3900: 0.1021, 3960: 0.1019, 4020: 0.1017,
-        4080: 0.1015, 4140: 0.1012, 4200: 0.1011
-    }
-
-    for _ in range(k_bw_per_ceb):
-        tmp_run_start = tmp
-
-        # 过滤阶段TMP增长
-        dtmp = _delta_tmp(p, L_h)
-        tmp_peak = tmp_run_start + dtmp
-
-        # 约束1:峰值不得超过硬上限
-        if tmp_peak > p.TMP_max + 1e-12:
-            return False, {"reason": "TMP_max violated during filtration", "TMP_peak": tmp_peak}
-
-        if tmp_peak > max_tmp_during_filtration:
-            max_tmp_during_filtration = tmp_peak
-
-        # 物理反洗
-        phi = phi_bw_of(p, L_s, t_bw_s)
-        tmp_after_bw = tmp_peak - phi * (tmp_peak - tmp_run_start)
-
-        # 约束2:单次残余增量控制
-        residual_inc = tmp_after_bw - tmp_run_start
-        if residual_inc > p.dTMP + 1e-12:
-            return False, {
-                "reason": "residual TMP increase after BW exceeded dTMP",
-                "residual_increase": residual_inc,
-                "limit_dTMP": p.dTMP
-            }
-        if residual_inc > max_residual_increase:
-            max_residual_increase = residual_inc
-
-        tmp = tmp_after_bw
-
-    # CEB
-    tmp_after_ceb = p.TMP0
-
-    # 体积与回收率
-    V_feed_super = k_bw_per_ceb * p.q_UF * L_h
-    V_loss_super = k_bw_per_ceb * _v_bw_m3(p, t_bw_s) + p.v_ceb_m3
-    V_net = max(0.0, V_feed_super - V_loss_super)
-    recovery = max(0.0, V_net / max(V_feed_super, 1e-12))
-
-    # 时间与净供水率
-    T_super_h = k_bw_per_ceb * (L_s + t_bw_s) / 3600.0 + p.t_ceb_s / 3600.0
-    net_delivery_rate_m3ph = V_net / max(T_super_h, 1e-12)
-
-    # 贴边比例与硬限
-    headroom_ratio = max_tmp_during_filtration / max(p.TMP_max, 1e-12)
-    if headroom_ratio > p.headroom_hardcap + 1e-12:
-        return False, {"reason": "headroom hardcap exceeded", "headroom_ratio": headroom_ratio}
-
-    # —— 新增指标 1:日均产水时间(h/d) ——
-    daily_prod_time_h = k_bw_per_ceb * L_h / T_super_h * 24.0
-
-    # —— 新增指标 2:吨水电耗(kWh/m³) ——
-    closest_L = min(energy_lookup.keys(), key=lambda x: abs(x - L_s))
-    ton_water_energy = energy_lookup[closest_L]
-
-    info = {
-        "recovery": recovery,
-        "V_feed_super_m3": V_feed_super,
-        "V_loss_super_m3": V_loss_super,
-        "V_net_super_m3": V_net,
-        "supercycle_time_h": T_super_h,
-        "net_delivery_rate_m3ph": net_delivery_rate_m3ph,
-        "max_TMP_during_filtration": max_tmp_during_filtration,
-        "max_residual_increase_per_run": max_residual_increase,
-        "phi_bw_effective": phi,
-        "TMP_after_ceb": tmp_after_ceb,
-        "headroom_ratio": headroom_ratio,
-        "daily_prod_time_h": daily_prod_time_h,
-        "ton_water_energy_kWh_per_m3": ton_water_energy,
-        "k_bw_per_ceb": k_bw_per_ceb
-    }
-
-    return True, info
-
-def _score(p: UFParams, rec: dict) -> float:
-    """综合评分:越大越好。不同TMP0会改变max_TMP→改变惩罚→得到不同解。"""
-    # 无量纲化净供水率
-    rate_norm = rec["net_delivery_rate_m3ph"] / max(p.q_UF, 1e-12)
-    headroom_penalty = (rec["max_TMP_during_filtration"] / max(p.TMP_max, 1e-12)) ** p.r_headroom
-    return (p.w_rec * rec["recovery"]
-            + p.w_rate * rate_norm
-            - p.w_headroom * headroom_penalty)
-
-def optimize_2d(p: UFParams,
-                L_min_s=None, L_max_s=None, L_step_s=None,
-                t_bw_min_s=None, t_bw_max_s=None, t_bw_step_s=None):
-    # 网格生成
-    L_lo = p.L_min_s if L_min_s is None else float(L_min_s)
-    L_hi = p.L_max_s if L_max_s is None else float(L_max_s)
-    L_st = p.L_step_s if L_step_s is None else float(L_step_s)
-
-    t_lo = p.t_bw_min_s if t_bw_min_s is None else float(t_bw_min_s)
-    t_hi = p.t_bw_max_s if t_bw_max_s is None else float(t_bw_max_s)
-    t_st = p.t_bw_step_s if t_bw_step_s is None else float(t_bw_step_s)
-
-    L_vals = np.arange(L_lo, L_hi + 1e-9, L_st)
-    t_vals = np.arange(t_lo, t_hi + 1e-9, t_st)
-
-    best = None
-    best_score = -np.inf
-
-    for L_s in L_vals:
-        for t_bw_s in t_vals:
-            feasible, info = simulate_one_supercycle(p, L_s, t_bw_s)
-            if not feasible:
-                continue
-
-            rec = {"L_s": float(L_s), "t_bw_s": float(t_bw_s)}
-            rec.update(info)
-
-            score = _score(p, rec)
-
-            if score > best_score + 1e-14:
-                best_score = score
-                best = rec.copy()
-                best["score"] = float(score)
-            # 若分数相同,偏好回收率更高,再偏好净供水率更高
-            elif abs(score - best_score) <= 1e-14:
-                if (rec["recovery"] > best["recovery"] + 1e-12) or (
-                    abs(rec["recovery"] - best["recovery"]) <= 1e-12 and
-                    rec["net_delivery_rate_m3ph"] > best["net_delivery_rate_m3ph"] + 1e-12
-                ):
-                    best = rec.copy()
-                    best["score"] = float(score)
-
-    if best is None:
-        return {"status": "no-feasible-solution"}
-    best["status"] = "feasible"
-    return best
-
-def run_uf_decision(TMP0: float = None) -> dict:
-    if TMP0 is None:
-        rng = np.random.default_rng()
-        TMP0 = rng.uniform(0.03, 0.04)  # 初始TMP随机
-
-    params = UFParams(
-        q_UF=360.0,
-        TMP_max=0.05,
-        alpha=1.2e-6,
-        belta=1.0,
-        q_bw_m3ph=1000.0,
-        T_ceb_interval_h=48,
-        v_ceb_m3=30.0,
-        t_ceb_s=40*60.0,
-        phi_ceb=1.0,
-        dTMP=0.001,
-
-        L_min_s=3600.0, L_max_s=4200.0, L_step_s=30.0,
-        t_bw_min_s=90.0, t_bw_max_s=100.0, t_bw_step_s=2.0,
-
-        phi_bw_min=0.70, phi_bw_max=1.00,
-        L_ref_s=500.0, tau_bw_s=40.0, gamma_t=1.0,
-
-        TMP0=TMP0,
-
-        w_rec=0.7, w_rate=0.3, w_headroom=0.3, r_headroom=2.0, headroom_hardcap=0.9
-    )
-
-    result = optimize_2d(params)
-    if result.get("status") == "feasible":
-        return {
-            "L_s": result["L_s"],
-            "t_bw_s": result["t_bw_s"],
-            "recovery": result["recovery"],
-            "k_bw_per_ceb": result["k_bw_per_ceb"],
-            "daily_prod_time_h": result["daily_prod_time_h"],
-            "ton_water_energy_kWh_per_m3": result["ton_water_energy_kWh_per_m3"]
-        }
-
-    # 若没有可行解,返回最小过滤时间和默认值
-    return {
-        "L_s": params.L_min_s,
-        "t_bw_s": params.t_bw_min_s,
-        "recovery": 0.0,
-        "k_bw_per_ceb": 1,
-        "daily_prod_time_h": 0.0,
-        "ton_water_energy_kWh_per_m3": 0.0
-    }
-
-
-def generate_plc_instructions(current_L_s, current_t_bw_s, model_prev_L_s, model_prev_t_bw_s, model_L_s, model_t_bw_s):
-    """
-    根据工厂当前值、模型上一轮决策值和模型当前轮决策值,生成PLC指令。
-
-    新增功能:
-    1. 处理None值情况:如果模型上一轮值为None,则使用工厂当前值;
-       如果工厂当前值也为None,则返回None并提示错误。
-    """
-    # 参数配置保持不变
-    params = UFParams(
-        L_min_s=3600.0, L_max_s=6000.0, L_step_s=60.0,
-        t_bw_min_s=40.0, t_bw_max_s=60.0, t_bw_step_s=5.0,
-    )
-
-    # 参数解包
-    L_step_s = params.L_step_s
-    t_bw_step_s = params.t_bw_step_s
-    L_min_s = params.L_min_s
-    L_max_s = params.L_max_s
-    t_bw_min_s = params.t_bw_min_s
-    t_bw_max_s = params.t_bw_max_s
-    adjustment_threshold = 1.0
-
-    # 处理None值情况
-    if model_prev_L_s is None:
-        if current_L_s is None:
-            print("错误: 过滤时长的工厂当前值和模型上一轮值均为None")
-            return None, None
-        else:
-            # 使用工厂当前值作为基准
-            effective_current_L = current_L_s
-            source_L = "工厂当前值(模型上一轮值为None)"
-    else:
-        # 模型上一轮值不为None,继续检查工厂当前值
-        if current_L_s is None:
-            effective_current_L = model_prev_L_s
-            source_L = "模型上一轮值(工厂当前值为None)"
-        else:
-            # 两个值都不为None,比较哪个更接近模型当前建议值
-            current_to_model_diff = abs(current_L_s - model_L_s)
-            prev_to_model_diff = abs(model_prev_L_s - model_L_s)
-
-            if current_to_model_diff <= prev_to_model_diff:
-                effective_current_L = current_L_s
-                source_L = "工厂当前值"
-            else:
-                effective_current_L = model_prev_L_s
-                source_L = "模型上一轮值"
-
-    # 对反洗时长进行同样的处理
-    if model_prev_t_bw_s is None:
-        if current_t_bw_s is None:
-            print("错误: 反洗时长的工厂当前值和模型上一轮值均为None")
-            return None, None
-        else:
-            effective_current_t_bw = current_t_bw_s
-            source_t_bw = "工厂当前值(模型上一轮值为None)"
-    else:
-        if current_t_bw_s is None:
-            effective_current_t_bw = model_prev_t_bw_s
-            source_t_bw = "模型上一轮值(工厂当前值为None)"
-        else:
-            current_to_model_t_bw_diff = abs(current_t_bw_s - model_t_bw_s)
-            prev_to_model_t_bw_diff = abs(model_prev_t_bw_s - model_t_bw_s)
-
-            if current_to_model_t_bw_diff <= prev_to_model_t_bw_diff:
-                effective_current_t_bw = current_t_bw_s
-                source_t_bw = "工厂当前值"
-            else:
-                effective_current_t_bw = model_prev_t_bw_s
-                source_t_bw = "模型上一轮值"
-
-    # 检测所有输入值是否在规定范围内(只对非None值进行检查)
-    # 工厂当前值检查(警告)
-    if current_L_s is not None and not (L_min_s <= current_L_s <= L_max_s):
-        print(f"警告: 当前过滤时长 {current_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
-    if current_t_bw_s is not None and not (t_bw_min_s <= current_t_bw_s <= t_bw_max_s):
-        print(f"警告: 当前反洗时长 {current_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
-
-    # 模型上一轮决策值检查(警告)
-    if model_prev_L_s is not None and not (L_min_s <= model_prev_L_s <= L_max_s):
-        print(f"警告: 模型上一轮过滤时长 {model_prev_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
-    if model_prev_t_bw_s is not None and not (t_bw_min_s <= model_prev_t_bw_s <= t_bw_max_s):
-        print(f"警告: 模型上一轮反洗时长 {model_prev_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
-
-    # 模型当前轮决策值检查(错误)
-    if model_L_s is None:
-        raise ValueError("错误: 决策模型建议的过滤时长不能为None")
-    elif not (L_min_s <= model_L_s <= L_max_s):
-        raise ValueError(f"错误: 决策模型建议的过滤时长 {model_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
-
-    if model_t_bw_s is None:
-        raise ValueError("错误: 决策模型建议的反洗时长不能为None")
-    elif not (t_bw_min_s <= model_t_bw_s <= t_bw_max_s):
-        raise ValueError(f"错误: 决策模型建议的反洗时长 {model_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
-
-    print(f"过滤时长基准: {source_L}, 值: {effective_current_L}")
-    print(f"反洗时长基准: {source_t_bw}, 值: {effective_current_t_bw}")
-
-    # 使用选定的基准值进行计算调整
-    L_diff = model_L_s - effective_current_L
-    L_adjustment = 0
-    if abs(L_diff) > adjustment_threshold * L_step_s:
-        if L_diff > 0:
-            L_adjustment = L_step_s
-        else:
-            L_adjustment = -L_step_s
-    next_L_s = effective_current_L + L_adjustment
-
-    t_bw_diff = model_t_bw_s - effective_current_t_bw
-    t_bw_adjustment = 0
-    if abs(t_bw_diff) > adjustment_threshold * t_bw_step_s:
-        if t_bw_diff > 0:
-            t_bw_adjustment = t_bw_step_s
-        else:
-            t_bw_adjustment = -t_bw_step_s
-    next_t_bw_s = effective_current_t_bw + t_bw_adjustment
-
-    return next_L_s, next_t_bw_s
-
-
-current_L_s = 3920
-current_t_bw_s = 98
-model_prev_L_s = None
-model_prev_t_bw_s = None
-model_L_s = 4160
-model_t_bw_s = 96
-next_L_s, next_t_bw_s = generate_plc_instructions(current_L_s, current_t_bw_s, model_prev_L_s, model_prev_t_bw_s, model_L_s, model_t_bw_s)
-print(f"next_L_s={next_L_s}, next_t_bw_s={next_t_bw_s}")

+ 0 - 33
models/uf-rl/超滤训练源码/UF_models.py

@@ -1,33 +0,0 @@
-import torch
-import numpy as np
-
-# TMP 上升量模型
-class TMPIncreaseModel(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-    def forward(self, p, L_h):
-        return float(p.alpha * (p.q_UF ** p.belta) * L_h)
-
-# 反洗 TMP 去除模型
-class TMPDecreaseModel(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-    def forward(self, p, L_s, t_bw_s):
-        L = max(float(L_s), 1.0)
-        t = max(float(t_bw_s), 1e-6)
-        upper_L = p.phi_bw_min + (p.phi_bw_max - p.phi_bw_min) * np.exp(- L / p.L_ref_s)
-        time_gain = 1.0 - np.exp(- (t / p.tau_bw_s) ** p.gamma_t)
-        phi = upper_L * time_gain
-        return float(np.clip(phi, 0.0, 0.999))
-
-
-if __name__ == "__main__":
-    model_fp = TMPIncreaseModel()
-    model_bw = TMPDecreaseModel()
-
-
-    torch.save(model_fp.state_dict(), "uf_fp.pth")
-    torch.save(model_bw.state_dict(), "uf_bw.pth")
-
-
-    print("模型已安全保存为 uf_fp.pth、uf_bw.pth")

+ 0 - 0
models/uf-rl/超滤训练源码/uf_resistance_models.py → models/uf-rl/超滤训练源码/UF_resistance_models.py


+ 138 - 0
models/uf-rl/超滤训练源码/check_initial_state.py

@@ -0,0 +1,138 @@
+# check_initial_state.py
+"""
+检查初始状态是否为“必死状态”(conservatively dead):
+1) 实例化 base_params(优先使用 rl_dqn_env 中提供的 base_params 或 UFParams)
+2) 实例化环境类 UFSuperCycleEnv(base_params)
+3) 调用 env.generate_initial_state() 生成 env.current_params(不调用 reset())
+4) 用最保守策略 (L_s=3600s, t_bw_s=60s) 连续模拟 max_steps 次,
+   若任意一次 is_dead_cycle(info) 返回 False 则判定为必死(返回 True),否则返回 False。
+"""
+
+from typing import Any
+import copy
+import traceback
+
+# 从 rl_dqn_env 导入必需项
+try:
+    from DQN_env import (
+        simulate_one_supercycle,
+        is_dead_cycle,
+        UFSuperCycleEnv,
+        UFParams,       # 如果模块里有 UFParams 类就导入
+        base_params     # 如果模块直接提供 base_params 实例也尝试导入
+    )
+except Exception:
+    # 有可能某些名字不存在 —— 我们会稍后用回退方案处理
+    # 先导入模块并再尝试访问属性,确保错误信息更友好
+    import importlib
+    rl = importlib.import_module("rl_dqn_env")
+    simulate_one_supercycle = getattr(rl, "simulate_one_supercycle", None)
+    is_dead_cycle = getattr(rl, "is_dead_cycle", None)
+    UFSuperCycleEnv = getattr(rl, "UFSuperCycleEnv", None)
+    UFParams = getattr(rl, "UFParams", None)
+    base_params = getattr(rl, "base_params", None)
+
+# 检查导入完整性
+_missing = []
+if simulate_one_supercycle is None:
+    _missing.append("simulate_one_supercycle")
+if is_dead_cycle is None:
+    _missing.append("is_dead_cycle")
+if UFSuperCycleEnv is None:
+    _missing.append("UFSuperCycleEnv")
+if _missing:
+    raise ImportError(f"无法从 rl_dqn_env 导入以下必要项: {', '.join(_missing)}")
+
+def is_dead_initial_state_env(env: UFSuperCycleEnv, max_steps: int = 15,
+                              L_s: int = 4200, t_bw_s: int = 50,
+                              verbose: bool = True) -> bool:
+    """
+    使用 env.current_params 作为初始状态判断是否为必死状态(保守策略)。
+
+    参数:
+        env: 已实例化的 UFSuperCycleEnv(必须包含 generate_initial_state() 与 current_params)
+        max_steps: 模拟步数(默认 15)
+        L_s: 过滤时长(s),保守值 3600
+        t_bw_s: 物理反洗时长(s),保守值 60
+        verbose: 是否打印每步结果
+
+    返回:
+        True 表示必死(conservatively dead)
+        False 表示可行
+    """
+    # 1) 确保 env 有 current_params,并且 generate_initial_state 可用
+    if not hasattr(env, "generate_initial_state"):
+        raise AttributeError("env 缺少 generate_initial_state() 方法。")
+    # 生成初始状态(不会调用 reset)
+    env.generate_initial_state()
+
+    if not hasattr(env, "current_params"):
+        raise AttributeError("env.generate_initial_state() 未设置 env.current_params。")
+
+    curr_p = copy.deepcopy(env.current_params)
+
+    for step in range(1, max_steps + 1):
+        try:
+            info, next_params = simulate_one_supercycle(curr_p, L_s, t_bw_s)
+        except Exception as e:
+            # 如果 simulate 出错,把异常视为“失败”(保守处理)
+            if verbose:
+                print(f"[Step {step}] simulate_one_supercycle 抛出异常,视为失败。异常信息:{e}")
+                traceback.print_exc()
+            return True
+
+        success = is_dead_cycle(info)  # True 表示成功循环
+
+        if verbose:
+            print(f"[Step {step}] 循环结果:{'成功' if success else '失败'}")
+            # 如果 info 中有关键诊断字段,打印简要信息
+            try:
+                print(f"     TMP0: {info.get('TMP0')},max_TMP: {info.get('max_TMP_during_filtration')}, recovery: {info.get('recovery')}, "
+                      f"R0: {info.get('R0')}, R_after_ceb: {info.get('R_after_ceb')}")
+            except Exception:
+                pass
+
+        if not success:
+            if verbose:
+                print(f"在第 {step} 步检测到失败,判定为必死初始状态(conservatively dead)。")
+            return True
+
+        # 否则继续,用 next_params 作为下一步起始参数
+        curr_p = next_params
+
+    if verbose:
+        print(f"{max_steps} 步均成功,初始状态判定为可行(non-dead)。")
+    return False
+
+
+if __name__ == "__main__":
+    print("=== check_initial_state.py: 使用 env.generate_initial_state() 检查初始状态是否为必死 ===")
+
+    try:
+        # 1) 构造 base_params
+        if base_params is not None:
+            bp = base_params
+            print("使用 rl_dqn_env 中提供的 base_params。")
+        elif UFParams is not None:
+            bp = UFParams()  # 使用默认构造
+            print("使用 UFParams() 构造 base_params 的实例。")
+        else:
+            raise ImportError("无法构造 base_params:rl_dqn_env 中既无 base_params 也无 UFParams。")
+
+        # 2) 实例化环境类(将 base_params 传入构造器)
+        env = UFSuperCycleEnv(bp)
+        print("已实例化 UFSuperCycleEnv 环境。")
+
+        # 3) 调用 env.generate_initial_state() 并检查 env.current_params 是否为必死
+        dead = is_dead_initial_state_env(env, max_steps=getattr(env, "max_episode_steps", 15),
+                                        L_s=6000, t_bw_s=40, verbose=True)
+
+        print("\n=== 判定结果 ===")
+        if dead:
+            print("当前生成的初始状态为【必死状态】(conservatively dead)。")
+        else:
+            print("当前生成的初始状态为【可行状态】(non-dead)。")
+
+    except Exception as e:
+        print("脚本执行出现错误:", e)
+        traceback.print_exc()

binární
models/uf-rl/超滤训练源码/uf_bw.pth


binární
models/uf-rl/超滤训练源码/uf_fp.pth