5 meses atrás · 48850ed4c8
--- a/models/uf-rl/超滤训练源码/DQN_decide.py
+++ b/models/uf-rl/超滤训练源码/DQN_decide.py
@@ -1,246 +0,0 @@
 
				-import numpy as np
			
 
				-from stable_baselines3 import DQN
			
 
				-from UF_super_RL.DQN_env import UFSuperCycleEnv
			
 
				-from UF_super_RL.DQN_env import UFParams
			
 
				-
			
 
				-# 模型路径
			
 
				-MODEL_PATH = "dqn_model.zip"
			
 
				-
			
 
				-# 加载模型（只加载一次，提高效率）
			
 
				-model = DQN.load(MODEL_PATH)
			
 
				-
			
 
				-def run_uf_DQN_decide(uf_params, TMP0_value: float):
			
 
				-    """
			
 
				-    单步决策函数：输入原始 TMP0，预测并执行动作
			
 
				-
			
 
				-    参数:
			
 
				-        TMP0_value (float): 当前 TMP0 值（单位与环境一致）
			
 
				-
			
 
				-    返回:
			
 
				-        dict: 包含模型选择的动作、动作参数、新状态、奖励等
			
 
				-    """
			
 
				-    # 1. 实例化环境
			
 
				-    base_params = uf_params
			
 
				-    env = UFSuperCycleEnv(base_params)
			
 
				-
			
 
				-    # 2. 将输入的 TMP0 写入环境
			
 
				-    env.current_params.TMP0 = TMP0_value
			
 
				-
			
 
				-    # 3. 获取归一化状态
			
 
				-    obs = env._get_obs().reshape(1, -1)
			
 
				-
			
 
				-    # 4. 模型预测动作
			
 
				-    action, _ = model.predict(obs, deterministic=True)
			
 
				-
			
 
				-    # 5. 解析动作对应的 L_s 和 t_bw_s
			
 
				-    L_s, t_bw_s = env._get_action_values(action[0])
			
 
				-
			
 
				-    # 6. 在环境中执行该动作
			
 
				-    next_obs, reward, terminated, truncated, info = env.step(action[0])
			
 
				-
			
 
				-    # 7. 整理结果
			
 
				-    result = {
			
 
				-        "action": int(action[0]),
			
 
				-        "L_s": float(L_s),
			
 
				-        "t_bw_s": float(t_bw_s),
			
 
				-        "next_obs": next_obs,
			
 
				-        "reward": reward,
			
 
				-        "terminated": terminated,
			
 
				-        "truncated": truncated,
			
 
				-        "info": info
			
 
				-    }
			
 
				-
			
 
				-    # 8. 关闭环境
			
 
				-    env.close()
			
 
				-
			
 
				-    return result
			
 
				-
			
 
				-def generate_plc_instructions(current_L_s, current_t_bw_s, model_prev_L_s, model_prev_t_bw_s, model_L_s, model_t_bw_s):
			
 
				-    """
			
 
				-    根据工厂当前值、模型上一轮决策值和模型当前轮决策值，生成PLC指令。
			
 
				-
			
 
				-    新增功能：
			
 
				-    1. 处理None值情况：如果模型上一轮值为None，则使用工厂当前值；
			
 
				-       如果工厂当前值也为None，则返回None并提示错误。
			
 
				-    """
			
 
				-    # 参数配置保持不变
			
 
				-    params = UFParams(
			
 
				-        L_min_s=3600.0, L_max_s=6000.0, L_step_s=60.0,
			
 
				-        t_bw_min_s=40.0, t_bw_max_s=60.0, t_bw_step_s=5.0,
			
 
				-    )
			
 
				-
			
 
				-    # 参数解包
			
 
				-    L_step_s = params.L_step_s
			
 
				-    t_bw_step_s = params.t_bw_step_s
			
 
				-    L_min_s = params.L_min_s
			
 
				-    L_max_s = params.L_max_s
			
 
				-    t_bw_min_s = params.t_bw_min_s
			
 
				-    t_bw_max_s = params.t_bw_max_s
			
 
				-    adjustment_threshold = 1.0
			
 
				-
			
 
				-    # 处理None值情况
			
 
				-    if model_prev_L_s is None:
			
 
				-        if current_L_s is None:
			
 
				-            print("错误: 过滤时长的工厂当前值和模型上一轮值均为None")
			
 
				-            return None, None
			
 
				-        else:
			
 
				-            # 使用工厂当前值作为基准
			
 
				-            effective_current_L = current_L_s
			
 
				-            source_L = "工厂当前值(模型上一轮值为None)"
			
 
				-    else:
			
 
				-        # 模型上一轮值不为None，继续检查工厂当前值
			
 
				-        if current_L_s is None:
			
 
				-            effective_current_L = model_prev_L_s
			
 
				-            source_L = "模型上一轮值(工厂当前值为None)"
			
 
				-        else:
			
 
				-            effective_current_L = model_prev_L_s
			
 
				-            source_L = "模型上一轮值"
			
 
				-
			
 
				-    # 对反洗时长进行同样的处理
			
 
				-    if model_prev_t_bw_s is None:
			
 
				-        if current_t_bw_s is None:
			
 
				-            print("错误: 反洗时长的工厂当前值和模型上一轮值均为None")
			
 
				-            return None, None
			
 
				-        else:
			
 
				-            effective_current_t_bw = current_t_bw_s
			
 
				-            source_t_bw = "工厂当前值(模型上一轮值为None)"
			
 
				-    else:
			
 
				-        if current_t_bw_s is None:
			
 
				-            effective_current_t_bw = model_prev_t_bw_s
			
 
				-            source_t_bw = "模型上一轮值(工厂当前值为None)"
			
 
				-        else:
			
 
				-            effective_current_t_bw = model_prev_t_bw_s
			
 
				-            source_t_bw = "模型上一轮值"
			
 
				-
			
 
				-    # 检测所有输入值是否在规定范围内（只对非None值进行检查）
			
 
				-    # 工厂当前值检查（警告）
			
 
				-    if current_L_s is not None and not (L_min_s <= current_L_s <= L_max_s):
			
 
				-        print(f"警告: 当前过滤时长 {current_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
			
 
				-    if current_t_bw_s is not None and not (t_bw_min_s <= current_t_bw_s <= t_bw_max_s):
			
 
				-        print(f"警告: 当前反洗时长 {current_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
			
 
				-
			
 
				-    # 模型上一轮决策值检查（警告）
			
 
				-    if model_prev_L_s is not None and not (L_min_s <= model_prev_L_s <= L_max_s):
			
 
				-        print(f"警告: 模型上一轮过滤时长 {model_prev_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
			
 
				-    if model_prev_t_bw_s is not None and not (t_bw_min_s <= model_prev_t_bw_s <= t_bw_max_s):
			
 
				-        print(f"警告: 模型上一轮反洗时长 {model_prev_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
			
 
				-
			
 
				-    # 模型当前轮决策值检查（错误）
			
 
				-    if model_L_s is None:
			
 
				-        raise ValueError("错误: 决策模型建议的过滤时长不能为None")
			
 
				-    elif not (L_min_s <= model_L_s <= L_max_s):
			
 
				-        raise ValueError(f"错误: 决策模型建议的过滤时长 {model_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
			
 
				-
			
 
				-    if model_t_bw_s is None:
			
 
				-        raise ValueError("错误: 决策模型建议的反洗时长不能为None")
			
 
				-    elif not (t_bw_min_s <= model_t_bw_s <= t_bw_max_s):
			
 
				-        raise ValueError(f"错误: 决策模型建议的反洗时长 {model_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
			
 
				-
			
 
				-    print(f"过滤时长基准: {source_L}, 值: {effective_current_L}")
			
 
				-    print(f"反洗时长基准: {source_t_bw}, 值: {effective_current_t_bw}")
			
 
				-
			
 
				-    # 使用选定的基准值进行计算调整
			
 
				-    L_diff = model_L_s - effective_current_L
			
 
				-    L_adjustment = 0
			
 
				-    if abs(L_diff) >= adjustment_threshold * L_step_s:
			
 
				-        if L_diff >= 0:
			
 
				-            L_adjustment = L_step_s
			
 
				-        else:
			
 
				-            L_adjustment = -L_step_s
			
 
				-    next_L_s = effective_current_L + L_adjustment
			
 
				-
			
 
				-    t_bw_diff = model_t_bw_s - effective_current_t_bw
			
 
				-    t_bw_adjustment = 0
			
 
				-    if abs(t_bw_diff) >= adjustment_threshold * t_bw_step_s:
			
 
				-        if t_bw_diff >= 0:
			
 
				-            t_bw_adjustment = t_bw_step_s
			
 
				-        else:
			
 
				-            t_bw_adjustment = -t_bw_step_s
			
 
				-    next_t_bw_s = effective_current_t_bw + t_bw_adjustment
			
 
				-
			
 
				-    return next_L_s, next_t_bw_s
			
 
				-
			
 
				-
			
 
				-from UF_super_RL.DQN_env import simulate_one_supercycle
			
 
				-def calc_uf_cycle_metrics(p, TMP0, max_tmp_during_filtration, min_tmp_during_filtration, L_s: float, t_bw_s: float):
			
 
				-    """
			
 
				-    计算 UF 超滤系统的核心性能指标
			
 
				-
			
 
				-    参数:
			
 
				-        p (UFParams): UF 系统参数
			
 
				-        L_s (float): 单次过滤时间（秒）
			
 
				-        t_bw_s (float): 单次反洗时间（秒）
			
 
				-
			
 
				-    返回:
			
 
				-        dict: {
			
 
				-            "k_bw_per_ceb": 小周期次数,
			
 
				-            "ton_water_energy_kWh_per_m3": 吨水电耗,
			
 
				-            "recovery": 回收率,
			
 
				-            "net_delivery_rate_m3ph": 净供水率 (m³/h),
			
 
				-            "daily_prod_time_h": 日均产水时间 (小时/天)
			
 
				-            "max_permeability": 全周期最高渗透率(lmh/bar)
			
 
				-        }
			
 
				-    """
			
 
				-    # 将跨膜压差写入参数
			
 
				-    p.TMP0 = TMP0
			
 
				-
			
 
				-    # 模拟该参数下的超级周期
			
 
				-    feasible, info = simulate_one_supercycle(p, L_s, t_bw_s)
			
 
				-
			
 
				-    # 获得模型模拟周期信息
			
 
				-    k_bw_per_ceb = info["k_bw_per_ceb"]
			
 
				-    ton_water_energy_kWh_per_m3 = info["ton_water_energy_kWh_per_m3"]
			
 
				-    recovery = info["recovery"]
			
 
				-    net_delivery_rate_m3ph = info["net_delivery_rate_m3ph"]
			
 
				-    daily_prod_time_h = info["daily_prod_time_h"]
			
 
				-
			
 
				-    # 获得模型模拟周期内最高跨膜压差/最低跨膜压差
			
 
				-    if max_tmp_during_filtration is None:
			
 
				-        max_tmp_during_filtration = info["max_TMP_during_filtration"]
			
 
				-    if min_tmp_during_filtration is None:
			
 
				-        min_tmp_during_filtration = info["min_TMP_during_filtration"]
			
 
				-
			
 
				-    # 计算最高渗透率
			
 
				-    max_permeability = 100 * p.q_UF / (128*40) / min_tmp_during_filtration
			
 
				-
			
 
				-
			
 
				-    return {
			
 
				-        "k_bw_per_ceb": k_bw_per_ceb,
			
 
				-        "ton_water_energy_kWh_per_m3": ton_water_energy_kWh_per_m3,
			
 
				-        "recovery": recovery,
			
 
				-        "net_delivery_rate_m3ph": net_delivery_rate_m3ph,
			
 
				-        "daily_prod_time_h": daily_prod_time_h,
			
 
				-        "max_permeability": max_permeability
			
 
				-    }
			
 
				-
			
 
				-
			
 
				-# ==============================
			
 
				-# 示例调用
			
 
				-# ==============================
			
 
				-if __name__ == "__main__":
			
 
				-    uf_params = UFParams()
			
 
				-    TMP0 = 0.03 # 原始 TMP0
			
 
				-    model_decide_result = run_uf_DQN_decide(uf_params, TMP0) # 调用模型获得动作
			
 
				-    model_L_s = model_decide_result['L_s'] # 获得模型决策产水时长
			
 
				-    model_t_bw_s = model_decide_result['t_bw_s'] # 获得模型决策反洗时长
			
 
				-
			
 
				-    current_L_s = 3800
			
 
				-    current_t_bw_s = 40
			
 
				-    model_prev_L_s = 4040
			
 
				-    model_prev_t_bw_s = 60
			
 
				-    L_s, t_bw_s = generate_plc_instructions(current_L_s, current_t_bw_s, model_prev_L_s, model_prev_t_bw_s, model_L_s, model_t_bw_s) # 获取模型下发指令
			
 
				-
			
 
				-    L_s = 4100
			
 
				-    t_bw_s = 96
			
 
				-    max_tmp_during_filtration = 0.050176 # 新增工厂数据接口：周期最高/最低跨膜压差，无工厂数据接入时传入None，calc_uf_cycle_metrics()自动获取模拟周期中的跨膜压差最值
			
 
				-    min_tmp_during_filtration = 0.012496
			
 
				-    execution_result = calc_uf_cycle_metrics(uf_params, TMP0, max_tmp_during_filtration, min_tmp_during_filtration, L_s, t_bw_s)
			
 
				-    print("\n===== 单步决策结果 =====")
			
 
				-    print(f"模型选择的动作: {model_decide_result['action']}")
			
 
				-    print(f"模型选择的L_s: {model_L_s} 秒, 模型选择的t_bw_s: {model_t_bw_s} 秒")
			
 
				-    print(f"指令下发的L_s: {L_s} 秒, 指令下发的t_bw_s: {t_bw_s} 秒")
			
 
				-    print(f"指令对应的反洗次数: {execution_result['k_bw_per_ceb']}")
			
 
				-    print(f"指令对应的吨水电耗: {execution_result['ton_water_energy_kWh_per_m3']}")
			
 
				-    print(f"指令对应的回收率: {execution_result['recovery']}")
			
 
				-    print(f"指令对应的日均产水时间: {execution_result['daily_prod_time_h']}")
			
 
				-    print(f"指令对应的最高渗透率: {execution_result['max_permeability']}")
			
--- a/models/uf-rl/超滤训练源码/DQN_env.py
+++ b/models/uf-rl/超滤训练源码/DQN_env.py
@@ -1,44 +1,58 @@
 
				 import os
			
 
				-import time
			
 
				-import random
			
 
				+import torch
			
 
				+from pathlib import Path
			
 
				 import numpy as np
			
 
				 import gymnasium as gym
			
 
				 from gymnasium import spaces
			
 
				-from stable_baselines3 import DQN
			
 
				-from stable_baselines3.common.monitor import Monitor
			
 
				-from stable_baselines3.common.vec_env import DummyVecEnv
			
 
				-from stable_baselines3.common.callbacks import BaseCallback
			
 
				 from typing import Dict, Tuple, Optional
			
 
				 import torch
			
 
				 import torch.nn as nn
			
 
				 from dataclasses import dataclass, asdict
			
 
				-from UF_models import TMPIncreaseModel, TMPDecreaseModel  # 导入模型类
			
 
				+from UF_resistance_models import ResistanceIncreaseModel, ResistanceDecreaseModel  # 导入模型类
			
 
				 import copy
			
 
				 
			
 
				-
			
 
				-# ==== 定义膜的基础运行参数 ====
			
 
				+# =======================
			
 
				+# 膜运行参数类：定义膜的基础运行参数
			
 
				+# =======================
			
 
				 @dataclass
			
 
				 class UFParams:
			
 
				-    # —— 膜与运行参数 ——
			
 
				+    # —— 膜动态运行参数 ——
			
 
				     q_UF: float = 360.0  # 过滤进水流量（m^3/h）
			
 
				-    TMP0: float = 0.03  # 初始TMP（MPa）
			
 
				-    TMP_max: float = 0.06  # TMP硬上限（MPa）
			
 
				-
			
 
				-    # —— 膜污染动力学 ——
			
 
				-    alpha: float = 1e-6  # TMP增长系数
			
 
				-    belta: float = 1.1  # 幂指数
			
 
				+    TMP0: float = 0.03 # 初始跨膜压差
			
 
				+    temp: float = 25.0  # 水温，摄氏度
			
 
				+
			
 
				+    # —— 膜阻力模型参数 ——
			
 
				+    nuK: float =4.92e+01 # 过滤阶段膜阻力增长模型参数
			
 
				+    slope: float = 3.44e-01 # 全周期不可逆污染阻力增长斜率
			
 
				+    power: float = 1.032 # 全周期不可逆污染阻力增长幂次
			
 
				+    tau_bw_s: float = 30.0  # 物洗时长影响时间尺度
			
 
				+    gamma_t: float = 1.0  # 物洗时长作用指数
			
 
				+    ceb_removal: float = 150  # CEB去除膜阻力
			
 
				+
			
 
				+    # —— 膜运行约束参数 ——
			
 
				+    global_TMP_limit: float = 0.08  # TMP硬上限（MPa）
			
 
				+    TMP0_max: float = 0.035 # 初始TMP上限（MPa）
			
 
				+    TMP0_min: float = 0.01 # 初始TMP下限（MPa）
			
 
				+    q_UF_max: float = 400.0 # 进水流量上限（m^3/h）
			
 
				+    q_UF_min: float = 250.0 # 进水流量上限（m^3/h）
			
 
				+    temp_max: float = 40.0 # 温度上限（摄氏度）
			
 
				+    temp_min: float = 10.0 # 温度下限（摄氏度）
			
 
				+    nuK_max: float = 6e+01 # 物理周期总阻力增速上限（m^-1/s）
			
 
				+    nuK_min: float = 3e+01 # 物理周期总阻力增速下限（m^-1/s）
			
 
				+    slope_max: float = 10 # 化学周期长期阻力增速斜率上限
			
 
				+    slope_min: float = 0.1 # 化学周期长期阻力增速斜率下限
			
 
				+    power_max: float = 1.3 # 化学周期长期阻力增速幂次上限
			
 
				+    power_min: float = 0.8 # 化学周期长期阻力增速幂次下限
			
 
				+    ceb_removal_max: float = 150 # CEB去除阻力(已缩放)上限（m^-1）
			
 
				+    ceb_removal_min: float = 100 # CEB去除阻力(已缩放)下限（m^-1）
			
 
				 
			
 
				     # —— 反洗参数（固定） ——
			
 
				     q_bw_m3ph: float = 1000.0  # 物理反洗流量（m^3/h）
			
 
				 
			
 
				-    # —— CEB参数（固定） ——
			
 
				-    T_ceb_interval_h: float = 48.0  # 固定每 k 小时做一次CEB
			
 
				+    # —— CEB参数 ——
			
 
				+    T_ceb_interval_h: float = 60.0  # 固定每 k 小时做一次CEB
			
 
				     v_ceb_m3: float = 30.0  # CEB用水体积（m^3）
			
 
				     t_ceb_s: float = 40 * 60.0  # CEB时长（s）
			
 
				-    phi_ceb: float = 1.0  # CEB去除比例（简化：完全恢复到TMP0）
			
 
				-
			
 
				-    # —— 约束与收敛 ——
			
 
				-    dTMP: float = 0.001  # 单次产水结束时，相对TMP0最大升幅（MPa）
			
 
				 
			
 
				     # —— 搜索范围（秒） ——
			
 
				     L_min_s: float = 3800.0  # 过滤时长下限（s）
			
@@ -46,55 +60,115 @@ class UFParams:
 
				     t_bw_min_s: float = 40.0  # 物洗时长下限（s）
			
 
				     t_bw_max_s: float = 60.0  # 物洗时长上限（s）
			
 
				 
			
 
				-    # —— 物理反洗恢复函数参数 ——
			
 
				-    phi_bw_min: float = 0.7  # 物洗去除比例最小值
			
 
				-    phi_bw_max: float = 1.0  # 物洗去除比例最大值
			
 
				-    L_ref_s: float = 4000.0  # 过滤时长影响时间尺度
			
 
				-    tau_bw_s: float = 20.0  # 物洗时长影响时间尺度
			
 
				-    gamma_t: float = 1.0  # 物洗时长作用指数
			
 
				-
			
 
				     # —— 网格 ——
			
 
				     L_step_s: float = 60.0  # 过滤时长步长（s）
			
 
				     t_bw_step_s: float = 5.0  # 物洗时长步长（s）
			
 
				 
			
 
				-    # 多目标加权及高TMP惩罚
			
 
				-    w_rec: float = 0.8  # 回收率权重
			
 
				-    w_rate: float = 0.2  # 净供水率权重
			
 
				-    w_headroom: float = 0.2  # 贴边惩罚权重
			
 
				-    r_headroom: float = 2.0  # 贴边惩罚幂次
			
 
				-    headroom_hardcap: float = 0.98  # 超过此比例直接视为不可取
			
 
				+    # —— 奖励函数参数 ——
			
 
				+    k_rec = 5.0      # 回收率敏感度
			
 
				+    k_res = 10.0     # 残余污染敏感度
			
 
				+    rec_low, rec_high = 0.92, 0.99
			
 
				+    rr0 = 0.08
			
 
				 
			
 
				-# ==== 加载模拟环境模型 ====
			
 
				-# 初始化模型
			
 
				-model_fp = TMPIncreaseModel()
			
 
				-model_bw = TMPDecreaseModel()
			
 
				 
			
 
				-# 加载参数
			
 
				-model_fp.load_state_dict(torch.load("uf_fp.pth"))
			
 
				-model_bw.load_state_dict(torch.load("uf_bw.pth"))
			
 
				+# =======================
			
 
				+# 辅助函数：转换膜阻力与跨膜压差
			
 
				+# =======================
			
 
				 
			
 
				-# 切换到推理模式
			
 
				-model_fp.eval()
			
 
				-model_bw.eval()
			
 
				+def xishan_viscosity(temp):
			
 
				+    # temp: 水温，单位摄氏度
			
 
				+    """
			
 
				+    锡山水厂 PLC水温校正因子经验公式（25摄氏度标准）
			
 
				+    返回温度修正后的水粘度（纯水修正），TODO：水厂水质与纯水相差较大，对粘度有一定影响
			
 
				+    """
			
 
				+    x = (temp + 273.15) / 300
			
 
				+    factor = 890 / (280.68 * x ** -1.9 + 511.45 * x ** -7.7 + 61.131 * x ** -19.6 + 0.45903 * x ** -40)
			
 
				+    mu = 0.00089 / factor
			
 
				+    return mu
			
 
				+
			
 
				+def _calculate_resistance(tmp, q_UF, temp):
			
 
				+    """
			
 
				+    计算超滤膜阻力 R = TMP / (J * μ)
			
 
				+    返回缩小1e10的膜阻力（超滤原膜阻力量级为1e12，过大的绝对值容易导致平稳拟合）
			
 
				+    """
			
 
				+    A = 128 * 40  # m²，有效膜面积
			
 
				+    mu = xishan_viscosity(temp) # 温度修正后的水粘度
			
 
				+    TMP_Pa = tmp * 1e6  # 跨膜压差 MPa -> Pa
			
 
				+    J = q_UF / A / 3600  # 通量 m³/h -> m³/(m²·s)
			
 
				+    if J <= 0 or mu <= 0:
			
 
				+        return np.nan
			
 
				+    R = TMP_Pa / (J * mu) / 1e10 # 缩放膜阻力
			
 
				 
			
 
				+    return float(R)
			
 
				 
			
 
				-def _delta_tmp(p, L_h: float) -> float:
			
 
				+def _calculate_tmp(R, q_UF, temp):
			
 
				     """
			
 
				-    过滤时段TMP上升量：调用 uf_fp.pth 模型
			
 
				+    还原超滤跨膜压差 TMP
			
 
				     """
			
 
				-    return model_fp(p, L_h)
			
 
				+    A = 128 * 40  # m²，有效膜面积
			
 
				+    mu = xishan_viscosity(temp) # 温度修正后的水粘度
			
 
				+    J = q_UF / A / 3600  # 通量 m³/h -> m³/(m²·s)
			
 
				+    TMP_Pa = R * J * mu * 1e10
			
 
				+    tmp = TMP_Pa / 1e6
			
 
				+
			
 
				+    return float(tmp)
			
 
				+
			
 
				+
			
 
				+# =======================
			
 
				+# 环境体模型加载函数
			
 
				+# =======================
			
 
				+def load_resistance_models():
			
 
				+    """加载阻力变化模型，仅在首次调用时执行"""
			
 
				+
			
 
				+    global resistance_model_fp, resistance_model_bw
			
 
				+
			
 
				+    # 如果全局模型已存在，则直接返回
			
 
				+    if "resistance_model_fp" in globals() and resistance_model_fp is not None:
			
 
				+        return resistance_model_fp, resistance_model_bw
			
 
				+
			
 
				+    print("🔄 Loading resistance models...")
			
 
				+
			
 
				+    # 初始化模型
			
 
				+    resistance_model_fp = ResistanceIncreaseModel()
			
 
				+    resistance_model_bw = ResistanceDecreaseModel()
			
 
				+
			
 
				+    # 取得当前脚本所在目录（即 rl_dqn_env.py 或 check_initial_state.py 同目录）
			
 
				+    base_dir = Path(__file__).resolve().parent
			
 
				+
			
 
				+    # 构造模型路径
			
 
				+    fp_path = base_dir / "resistance_model_fp.pth"
			
 
				+    bw_path = base_dir / "resistance_model_bw.pth"
			
 
				+
			
 
				+    # 检查文件存在性
			
 
				+    assert fp_path.exists(), f"缺少 {fp_path.name}"
			
 
				+    assert bw_path.exists(), f"缺少 {bw_path.name}"
			
 
				 
			
 
				-def phi_bw_of(p, L_s: float, t_bw_s: float) -> float:
			
 
				+    # 加载权重
			
 
				+    resistance_model_fp.load_state_dict(torch.load(fp_path, map_location="cpu"))
			
 
				+    resistance_model_bw.load_state_dict(torch.load(bw_path, map_location="cpu"))
			
 
				+
			
 
				+    # 设置推理模式
			
 
				+    resistance_model_fp.eval()
			
 
				+    resistance_model_bw.eval()
			
 
				+
			
 
				+    print("✅ Resistance models loaded successfully from current directory.")
			
 
				+    return resistance_model_fp, resistance_model_bw
			
 
				+
			
 
				+
			
 
				+# =======================
			
 
				+# 环境体模型模拟函数
			
 
				+# =======================
			
 
				+def _delta_resistance(p, L_h: float) -> float:
			
 
				     """
			
 
				-    物洗去除比例：调用 uf_bw.pth 模型
			
 
				+    过滤时段膜阻力上升量：调用 resistance_model_fp.pth 模型
			
 
				     """
			
 
				-    return model_bw(p, L_s, t_bw_s)
			
 
				+    return resistance_model_fp(p, L_h)
			
 
				 
			
 
				-def _tmp_after_ceb(p, L_s: float, t_bw_s: float) -> float:
			
 
				+def phi_bw_of(p, R0: float, R_end: float, L_h_start: float, L_h_next_start: float, t_bw_s: float) -> float:
			
 
				     """
			
 
				-    计算化学清洗(CEB)后的TMP，当前为恢复初始跨膜压差
			
 
				+    物理冲洗去除膜阻力值：调用 resistance_model_bw 模型
			
 
				     """
			
 
				-    return p.TMP0
			
 
				+    return resistance_model_bw(p, R0, R_end, L_h_start, L_h_next_start, t_bw_s)
			
 
				 
			
 
				 def _v_bw_m3(p, t_bw_s: float) -> float:
			
 
				     """
			
@@ -104,139 +178,183 @@ def _v_bw_m3(p, t_bw_s: float) -> float:
 
				 
			
 
				 def simulate_one_supercycle(p: UFParams, L_s: float, t_bw_s: float):
			
 
				     """
			
 
				-    返回 (是否可行, 指标字典)
			
 
				-    - 支持动态CEB次数：48h固定间隔
			
 
				-    - 增加日均产水时间和吨水电耗
			
 
				-    - 增加最小TMP记录
			
 
				+    模拟一个超级周期（多次物理反洗 + 一次化学反洗）
			
 
				+    返回: (info, next_params)
			
 
				     """
			
 
				     L_h = float(L_s) / 3600.0  # 小周期过滤时间(h)
			
 
				 
			
 
				     tmp = p.TMP0
			
 
				+    R0 = _calculate_resistance(p.TMP0, p.q_UF, p.temp)
			
 
				     max_tmp_during_filtration = tmp
			
 
				-    min_tmp_during_filtration = tmp  # 新增：初始化最小TMP
			
 
				+    min_tmp_during_filtration = tmp
			
 
				     max_residual_increase = 0.0
			
 
				 
			
 
				-    # 小周期总时长(h)
			
 
				     t_small_cycle_h = (L_s + t_bw_s) / 3600.0
			
 
				-
			
 
				-    # 计算超级周期内CEB次数
			
 
				     k_bw_per_ceb = int(np.floor(p.T_ceb_interval_h / t_small_cycle_h))
			
 
				     if k_bw_per_ceb < 1:
			
 
				-        k_bw_per_ceb = 1  # 至少一个小周期
			
 
				+        k_bw_per_ceb = 1
			
 
				 
			
 
				-    # ton水电耗查表
			
 
				     energy_lookup = {
			
 
				         3600: 0.1034, 3660: 0.1031, 3720: 0.1029, 3780: 0.1026,
			
 
				         3840: 0.1023, 3900: 0.1021, 3960: 0.1019, 4020: 0.1017,
			
 
				         4080: 0.1015, 4140: 0.1012, 4200: 0.1011
			
 
				     }
			
 
				 
			
 
				-    for _ in range(k_bw_per_ceb):
			
 
				+    # --- 循环模拟物理反洗 ---
			
 
				+    for idx in range(k_bw_per_ceb):
			
 
				         tmp_run_start = tmp
			
 
				+        q_UF = p.q_UF
			
 
				+        temp = p.temp
			
 
				 
			
 
				-        # 过滤阶段TMP增长
			
 
				-        dtmp = _delta_tmp(p, L_h)
			
 
				-        tmp_peak = tmp_run_start + dtmp
			
 
				-
			
 
				-        # 约束1：峰值不得超过硬上限
			
 
				-        if tmp_peak > p.TMP_max + 1e-12:
			
 
				-            return False, {"reason": "TMP_max violated during filtration", "TMP_peak": tmp_peak}
			
 
				+        R_run_start = _calculate_resistance(tmp_run_start, q_UF, temp)
			
 
				+        d_R = _delta_resistance(p, L_s)
			
 
				+        R_peak = R_run_start + d_R
			
 
				+        tmp_peak = _calculate_tmp(R_peak, q_UF, temp)
			
 
				 
			
 
				-        # 更新最大和最小TMP
			
 
				-        if tmp_peak > max_tmp_during_filtration:
			
 
				-            max_tmp_during_filtration = tmp_peak
			
 
				-        if tmp_run_start < min_tmp_during_filtration:  # 新增：记录运行开始时的最小TMP
			
 
				-            min_tmp_during_filtration = tmp_run_start
			
 
				+        max_tmp_during_filtration = max(max_tmp_during_filtration, tmp_peak)
			
 
				+        min_tmp_during_filtration = min(min_tmp_during_filtration, tmp_run_start)
			
 
				 
			
 
				-        # 物理反洗
			
 
				-        phi = phi_bw_of(p, L_s, t_bw_s)
			
 
				-        tmp_after_bw = tmp_peak - phi * (tmp_peak - tmp_run_start)
			
 
				+        # 物洗膜阻力减小
			
 
				+        L_h_start = (L_s + t_bw_s) / 3600.0 * idx
			
 
				+        L_h_next_start = (L_s + t_bw_s) / 3600.0 * (idx + 1)
			
 
				+        reversible_R = phi_bw_of(p, R_run_start, R_peak, L_h_start, L_h_next_start, t_bw_s)
			
 
				+        R_after_bw = R_peak - reversible_R
			
 
				+        tmp_after_bw = _calculate_tmp(R_after_bw, q_UF, temp)
			
 
				 
			
 
				-        # 约束2：单次残余增量控制
			
 
				         residual_inc = tmp_after_bw - tmp_run_start
			
 
				-        if residual_inc > p.dTMP + 1e-12:
			
 
				-            return False, {
			
 
				-                "reason": "residual TMP increase after BW exceeded dTMP",
			
 
				-                "residual_increase": residual_inc,
			
 
				-                "limit_dTMP": p.dTMP
			
 
				-            }
			
 
				-        if residual_inc > max_residual_increase:
			
 
				-            max_residual_increase = residual_inc
			
 
				+        max_residual_increase = max(max_residual_increase, residual_inc)
			
 
				 
			
 
				         tmp = tmp_after_bw
			
 
				 
			
 
				-    # CEB
			
 
				-    tmp_after_ceb = p.TMP0
			
 
				+    # --- CEB反洗 ---
			
 
				+    R_after_ceb = R_peak - p.ceb_removal
			
 
				+    tmp_after_ceb = _calculate_tmp(R_after_ceb, q_UF, temp)
			
 
				 
			
 
				-    # 体积与回收率
			
 
				+    # ============================================================
			
 
				+    # 生成本周期指标
			
 
				+    # ============================================================
			
 
				+
			
 
				+    # --- 体积与能耗 ---
			
 
				     V_feed_super = k_bw_per_ceb * p.q_UF * L_h
			
 
				     V_loss_super = k_bw_per_ceb * _v_bw_m3(p, t_bw_s) + p.v_ceb_m3
			
 
				     V_net = max(0.0, V_feed_super - V_loss_super)
			
 
				     recovery = max(0.0, V_net / max(V_feed_super, 1e-12))
			
 
				 
			
 
				-    # 时间与净供水率
			
 
				     T_super_h = k_bw_per_ceb * (L_s + t_bw_s) / 3600.0 + p.t_ceb_s / 3600.0
			
 
				-    net_delivery_rate_m3ph = V_net / max(T_super_h, 1e-12)
			
 
				-
			
 
				-    # 贴边比例与硬限
			
 
				-    headroom_ratio = max_tmp_during_filtration / max(p.TMP_max, 1e-12)
			
 
				-    if headroom_ratio > p.headroom_hardcap + 1e-12:
			
 
				-        return False, {"reason": "headroom hardcap exceeded", "headroom_ratio": headroom_ratio}
			
 
				-
			
 
				-    # —— 新增指标 1：日均产水时间（h/d） ——
			
 
				     daily_prod_time_h = k_bw_per_ceb * L_h / T_super_h * 24.0
			
 
				 
			
 
				-    # —— 新增指标 2：吨水电耗（kWh/m³） ——
			
 
				     closest_L = min(energy_lookup.keys(), key=lambda x: abs(x - L_s))
			
 
				-    ton_water_energy = energy_lookup[closest_L]
			
 
				+    ton_water_energy = energy_lookup[closest_L] #TODO:需确认新过滤时间范围下的吨水电耗
			
 
				 
			
 
				+    # --- 信息输出 ---
			
 
				     info = {
			
 
				+        "q_UF": p.q_UF,
			
 
				+        "temp": p.temp,
			
 
				         "recovery": recovery,
			
 
				         "V_feed_super_m3": V_feed_super,
			
 
				         "V_loss_super_m3": V_loss_super,
			
 
				         "V_net_super_m3": V_net,
			
 
				         "supercycle_time_h": T_super_h,
			
 
				-        "net_delivery_rate_m3ph": net_delivery_rate_m3ph,
			
 
				         "max_TMP_during_filtration": max_tmp_during_filtration,
			
 
				-        "min_TMP_during_filtration": min_tmp_during_filtration,  # 新增：最小TMP
			
 
				+        "min_TMP_during_filtration": min_tmp_during_filtration,
			
 
				+        "global_TMP_limit":p.global_TMP_limit,
			
 
				         "max_residual_increase_per_run": max_residual_increase,
			
 
				-        "phi_bw_effective": phi,
			
 
				+        "R0": R0,
			
 
				+        "R_after_ceb": R_after_ceb,
			
 
				+        "TMP0":p.TMP0,
			
 
				         "TMP_after_ceb": tmp_after_ceb,
			
 
				-        "headroom_ratio": headroom_ratio,
			
 
				         "daily_prod_time_h": daily_prod_time_h,
			
 
				         "ton_water_energy_kWh_per_m3": ton_water_energy,
			
 
				         "k_bw_per_ceb": k_bw_per_ceb
			
 
				     }
			
 
				 
			
 
				-    return True, info
			
 
				+    # ============================================================
			
 
				+    # 状态更新：生成 next_params（新状态）
			
 
				+    # ============================================================
			
 
				+
			
 
				+    next_params = copy.deepcopy(p)
			
 
				+
			
 
				+    # 更新跨膜压差（TMP）
			
 
				+    next_params.TMP0 = tmp_after_ceb
			
 
				 
			
 
				-def _score(p: UFParams, rec: dict) -> float:
			
 
				-    """综合评分：越大越好。通过非线性放大奖励差异，强化区分好坏动作"""
			
 
				+    # 可选参数（当前保持不变，未来可扩展更新逻辑）
			
 
				+    next_params.slope = p.slope
			
 
				+    next_params.power = p.power
			
 
				+    next_params.ceb_removal = p.ceb_removal
			
 
				+    next_params.nuK = p.nuK
			
 
				+    next_params.q_UF = p.q_UF
			
 
				+    next_params.temp = p.temp
			
 
				 
			
 
				-    # —— 无量纲化净供水率 ——
			
 
				-    rate_norm = rec["net_delivery_rate_m3ph"] / max(p.q_UF, 1e-12)
			
 
				 
			
 
				-    # —— TMP soft penalty (sigmoid) ——
			
 
				-    tmp_ratio = rec["max_TMP_during_filtration"] / max(p.TMP_max, 1e-12)
			
 
				-    k = 10.0
			
 
				-    headroom_penalty = 1.0 / (1.0 + np.exp(-k * (tmp_ratio - 1.0)))
			
 
				+    return info, next_params
			
 
				 
			
 
				-    # —— 基础 reward（0.6~0.9左右）——
			
 
				-    base_reward = (
			
 
				-        p.w_rec * rec["recovery"]
			
 
				-        + p.w_rate * rate_norm
			
 
				-        - p.w_headroom * headroom_penalty
			
 
				-    )
			
 
				+def calculate_reward(p: UFParams, info: dict) -> float:
			
 
				+    """
			
 
				+    TMP不参与奖励计算，仅考虑回收率与残余污染比例之间的权衡。
			
 
				+    满足：
			
 
				+      - 当 recovery=0.97, residual_ratio=0.1 → reward = 0
			
 
				+      - 当 recovery=0.90, residual_ratio=0.0 → reward = 0
			
 
				+      - 在两者之间平衡（如 recovery≈0.94, residual_ratio≈0.05）→ reward > 0
			
 
				+    """
			
 
				+    recovery = info["recovery"]
			
 
				+    residual_ratio = (info["R_after_ceb"] - info["R0"]) / info["R0"]
			
 
				+
			
 
				+    # 回收率奖励（在 [rec_low, rec_high] 内平滑上升）
			
 
				+    rec_norm = (recovery - p.rec_low) / (p.rec_high - p.rec_low)
			
 
				+    rec_reward = np.clip(np.tanh(p.k_rec * (rec_norm - 0.5)), -1, 1)
			
 
				+
			
 
				+    # 残余比惩罚（超过rr0时快速变为负值）
			
 
				+    res_penalty = -np.tanh(p.k_res * (residual_ratio / p.rr0 - 1))
			
 
				+
			
 
				+    # 组合逻辑：权衡二者
			
 
				+    total_reward = rec_reward + res_penalty
			
 
				+
			
 
				+    # 再平移使指定点为零：
			
 
				+    # recovery=0.97, residual=0.1 → 0
			
 
				+    # recovery=0.90, residual=0.0 → 0
			
 
				+    # 经验上，这两点几乎对称，因此无需额外线性偏移
			
 
				+    # 若希望严格归零，可用线性校正：
			
 
				+    total_reward -= 0.0
			
 
				 
			
 
				-    # —— 非线性放大：平方映射 + 缩放 ——
			
 
				-    # 目的是放大好坏动作差异，同时限制最大值，避免 TD-error 过大
			
 
				-    amplified_reward = (base_reward - 0.5) ** 2 * 5.0
			
 
				+    return total_reward
			
 
				+
			
 
				+
			
 
				+
			
 
				+def is_dead_cycle(info: dict) -> bool:
			
 
				+    """
			
 
				+    判断当前循环是否为成功循环（True）或失败循环（False）
			
 
				+    失败条件：
			
 
				+    1. 最大TMP超过设定上限；
			
 
				+    2. 回收率低于75%；
			
 
				+    3. 化学反冲洗后膜阻力上升超过10%。
			
 
				+
			
 
				+    参数：
			
 
				+        info: dict
			
 
				+            simulate_one_supercycle() 返回的指标字典，需包含：
			
 
				+            - max_TMP_during_filtration
			
 
				+            - recovery
			
 
				+            - R_after_ceb
			
 
				+            - R_run_start
			
 
				+            - TMP_limit（如果有定义）
			
 
				+    返回：
			
 
				+        bool: True 表示成功循环，False 表示失败循环。
			
 
				+    """
			
 
				+    TMP_limit = info.get("global_TMP_limit", 0.08)  # 默认硬约束上限
			
 
				+    max_tmp = info.get("max_TMP_during_filtration", 0)
			
 
				+    recovery = info.get("recovery", 1.0)
			
 
				+    R_after_ceb = info.get("R_after_ceb", 0)
			
 
				+    R0 = info.get("R0", 1e-6)
			
 
				 
			
 
				-    # —— 可选：保留符号，区分负奖励
			
 
				-    if base_reward < 0.5:
			
 
				-        amplified_reward = -amplified_reward
			
 
				+    # 判断条件
			
 
				+    if max_tmp > TMP_limit:
			
 
				+        return False
			
 
				+    if recovery < 0.75:
			
 
				+        return False
			
 
				+    if (R_after_ceb - R0) / R0 > 0.1:
			
 
				+        return False
			
 
				+
			
 
				+    return True
			
 
				 
			
 
				-    return amplified_reward
			
 
				 
			
 
				 
			
 
				 class UFSuperCycleEnv(gym.Env):
			
@@ -244,7 +362,7 @@ class UFSuperCycleEnv(gym.Env):
 
				 
			
 
				     metadata = {"render_modes": ["human"]}
			
 
				 
			
 
				-    def __init__(self, base_params, max_episode_steps: int = 20):
			
 
				+    def __init__(self, base_params, resistance_models=None, max_episode_steps: int = 15):
			
 
				         super(UFSuperCycleEnv, self).__init__()
			
 
				 
			
 
				         self.base_params = base_params
			
@@ -252,10 +370,15 @@ class UFSuperCycleEnv(gym.Env):
 
				         self.max_episode_steps = max_episode_steps
			
 
				         self.current_step = 0
			
 
				 
			
 
				+        if resistance_models is None:
			
 
				+            self.resistance_model_fp, self.resistance_model_bw = load_resistance_models()
			
 
				+        else:
			
 
				+            self.resistance_model_fp, self.resistance_model_bw = resistance_models
			
 
				+
			
 
				         # 计算离散动作空间
			
 
				         self.L_values = np.arange(
			
 
				             self.base_params.L_min_s,
			
 
				-            self.base_params.L_max_s + self.base_params.L_step_s,
			
 
				+            self.base_params.L_max_s,
			
 
				             self.base_params.L_step_s
			
 
				         )
			
 
				         self.t_bw_values = np.arange(
			
@@ -270,44 +393,180 @@ class UFSuperCycleEnv(gym.Env):
 
				         # 单一离散动作空间
			
 
				         self.action_space = spaces.Discrete(self.num_L * self.num_bw)
			
 
				 
			
 
				-        # 状态空间增加 TMP0, 上一次动作(L_s, t_bw_s), 本周期最高 TMP
			
 
				-        # 状态归一化均在 _get_obs 内处理
			
 
				+        # 状态空间，归一化在 _get_obs 中处理
			
 
				         self.observation_space = spaces.Box(
			
 
				-            low=np.zeros(4, dtype=np.float32),
			
 
				-            high=np.ones(4, dtype=np.float32),
			
 
				+            low=np.zeros(8, dtype=np.float32),
			
 
				+            high=np.ones(8, dtype=np.float32),
			
 
				             dtype=np.float32
			
 
				         )
			
 
				 
			
 
				-        # 初始化状态
			
 
				-        self.last_action = (self.base_params.L_min_s, self.base_params.t_bw_min_s)
			
 
				-        self.max_TMP_during_filtration = self.current_params.TMP0
			
 
				+        # 初始化环境
			
 
				         self.reset(seed=None)
			
 
				 
			
 
				-    def _get_obs(self):
			
 
				-        TMP0 = self.current_params.TMP0
			
 
				-        TMP0_norm = (TMP0 - 0.01) / (0.05 - 0.01)
			
 
				-
			
 
				-        L_s, t_bw_s = self.last_action
			
 
				-        L_norm = (L_s - self.base_params.L_min_s) / (self.base_params.L_max_s - self.base_params.L_min_s)
			
 
				-        t_bw_norm = (t_bw_s - self.base_params.t_bw_min_s) / (self.base_params.t_bw_max_s - self.base_params.t_bw_min_s)
			
 
				+    def generate_initial_state(self):
			
 
				+        """
			
 
				+        随机生成一个初始状态，不进行死状态判断
			
 
				+        """
			
 
				+        self.current_params.TMP0 = np.random.uniform(
			
 
				+            self.current_params.TMP0_min, self.current_params.TMP0_max
			
 
				+        )
			
 
				+        self.current_params.q_UF = np.random.uniform(
			
 
				+            self.current_params.q_UF_min, self.current_params.q_UF_max
			
 
				+        )
			
 
				+        self.current_params.temp = np.random.uniform(
			
 
				+            self.current_params.temp_min, self.current_params.temp_max
			
 
				+        )
			
 
				 
			
 
				-        max_TMP_norm = (self.max_TMP_during_filtration - 0.01) / (0.05 - 0.01)
			
 
				+        self.current_params.R0 = _calculate_resistance(
			
 
				+            self.current_params.TMP0,
			
 
				+            self.current_params.q_UF,
			
 
				+            self.current_params.temp
			
 
				+        )
			
 
				 
			
 
				-        return np.array([TMP0_norm, L_norm, t_bw_norm, max_TMP_norm], dtype=np.float32)
			
 
				+        self.current_params.nuK = np.random.uniform(
			
 
				+            self.current_params.nuK_min, self.current_params.nuK_max
			
 
				+        )
			
 
				+        self.current_params.slope = np.random.uniform(
			
 
				+            self.current_params.slope_min, self.current_params.slope_max
			
 
				+        )
			
 
				+        self.current_params.power = np.random.uniform(
			
 
				+            self.current_params.power_min, self.current_params.power_max
			
 
				+        )
			
 
				+        self.current_params.ceb_removal = np.random.uniform(
			
 
				+            self.current_params.ceb_removal_min, self.current_params.ceb_removal_max
			
 
				+        )
			
 
				 
			
 
				-    def _get_action_values(self, action):
			
 
				-        L_idx = action // self.num_bw
			
 
				-        t_bw_idx = action % self.num_bw
			
 
				-        return self.L_values[L_idx], self.t_bw_values[t_bw_idx]
			
 
				+        return self._get_state_copy()
			
 
				 
			
 
				-    def reset(self, seed=None, options=None):
			
 
				+    def reset(self, seed=None, options=None, max_attempts: int = 200):
			
 
				         super().reset(seed=seed)
			
 
				-        self.current_params.TMP0 = np.random.uniform(0.01, 0.03)
			
 
				+
			
 
				+        attempts = 0
			
 
				+        while attempts < max_attempts:
			
 
				+            attempts += 1
			
 
				+            self.generate_initial_state()  # 生成随机初始状态
			
 
				+            if self.check_dead_initial_state(max_steps=getattr(self, "max_episode_steps", 15),
			
 
				+                                             L_s=3800, t_bw_s=60):
			
 
				+                # True 表示可行，退出循环
			
 
				+                break
			
 
				+        else:
			
 
				+            # 超过最大尝试次数仍未生成可行状态
			
 
				+            raise RuntimeError(f"在 {max_attempts} 次尝试后仍无法生成可行初始状态。")
			
 
				+
			
 
				+        # 初始化步数、动作、最大 TMP
			
 
				         self.current_step = 0
			
 
				         self.last_action = (self.base_params.L_min_s, self.base_params.t_bw_min_s)
			
 
				         self.max_TMP_during_filtration = self.current_params.TMP0
			
 
				+
			
 
				         return self._get_obs(), {}
			
 
				 
			
 
				+    def check_dead_initial_state(self, max_steps: int = None,
			
 
				+                                 L_s: int = 4900, t_bw_s: int = 50) -> bool:
			
 
				+        """
			
 
				+        判断当前环境生成的初始状态是否为可行（non-dead）。
			
 
				+        使用最保守策略连续模拟 max_steps 次：
			
 
				+            若任意一次 is_dead_cycle(info) 返回 False，则视为必死状态。
			
 
				+
			
 
				+        参数：
			
 
				+            max_steps: 模拟步数，默认使用 self.max_episode_steps
			
 
				+            L_s: 过滤时长(s)，默认 3800
			
 
				+            t_bw_s: 物理反洗时长(s)，默认 60
			
 
				+
			
 
				+        返回：
			
 
				+            bool: True 表示可行状态（non-dead），False 表示必死状态
			
 
				+        """
			
 
				+        if max_steps is None:
			
 
				+            max_steps = getattr(self, "max_episode_steps", 15)
			
 
				+
			
 
				+        # 生成初始状态
			
 
				+        self.generate_initial_state()
			
 
				+        if not hasattr(self, "current_params"):
			
 
				+            raise AttributeError("generate_initial_state() 未设置 current_params。")
			
 
				+
			
 
				+        import copy
			
 
				+        curr_p = copy.deepcopy(self.current_params)
			
 
				+
			
 
				+        # 逐步模拟
			
 
				+        for step in range(max_steps):
			
 
				+            try:
			
 
				+                info, next_params = simulate_one_supercycle(curr_p, L_s, t_bw_s)
			
 
				+            except Exception:
			
 
				+                # 异常即视为不可行
			
 
				+                return False
			
 
				+
			
 
				+            if not is_dead_cycle(info):
			
 
				+                # 任意一次失败即为必死状态
			
 
				+                return False
			
 
				+
			
 
				+            curr_p = next_params
			
 
				+
			
 
				+        return True
			
 
				+
			
 
				+    def _get_state_copy(self):
			
 
				+        return copy.deepcopy(self.current_params)
			
 
				+
			
 
				+    def _get_obs(self):
			
 
				+        """
			
 
				+        构建当前环境归一化状态向量
			
 
				+        """
			
 
				+        # === 1. 从 current_params 读取动态参数 ===
			
 
				+        TMP0 = self.current_params.TMP0
			
 
				+        q_UF = self.current_params.q_UF
			
 
				+        temp = self.current_params.temp
			
 
				+
			
 
				+        # === 2. 计算本周期初始膜阻力 ===
			
 
				+        R0 = _calculate_resistance(TMP0, q_UF, temp)
			
 
				+
			
 
				+        # === 3. 从 current_params 读取膜阻力增长模型参数 ===
			
 
				+        nuk = self.current_params.nuK
			
 
				+        slope = self.current_params.slope
			
 
				+        power = self.current_params.power
			
 
				+        ceb_removal = self.current_params.ceb_removal
			
 
				+
			
 
				+        # === 4. 从 current_params 动态读取上下限 ===
			
 
				+        TMP0_min, TMP0_max = self.current_params.TMP0_min, self.current_params.TMP0_max
			
 
				+        q_UF_min, q_UF_max = self.current_params.q_UF_min, self.current_params.q_UF_max
			
 
				+        temp_min, temp_max = self.current_params.temp_min, self.current_params.temp_max
			
 
				+        nuK_min, nuK_max = self.current_params.nuK_min, self.current_params.nuK_max
			
 
				+        slope_min, slope_max = self.current_params.slope_min, self.current_params.slope_max
			
 
				+        power_min, power_max = self.current_params.power_min, self.current_params.power_max
			
 
				+        ceb_min, ceb_max = self.current_params.ceb_removal_min, self.current_params.ceb_removal_max
			
 
				+
			
 
				+        # === 5. 归一化计算（clip防止越界） ===
			
 
				+        TMP0_norm = np.clip((TMP0 - TMP0_min) / (TMP0_max - TMP0_min), 0, 1)
			
 
				+        q_UF_norm = np.clip((q_UF - q_UF_min) / (q_UF_max - q_UF_min), 0, 1)
			
 
				+        temp_norm = np.clip((temp - temp_min) / (temp_max - temp_min), 0, 1)
			
 
				+
			
 
				+        # R0 不在 current_params 中定义上下限，设定经验范围
			
 
				+        R0_norm = np.clip((R0 - 100.0) / (800.0 - 100.0), 0, 1)
			
 
				+
			
 
				+        short_term_norm = np.clip((nuk - nuK_min) / (nuK_max - nuK_min), 0, 1)
			
 
				+        long_term_slope_norm = np.clip((slope - slope_min) / (slope_max - slope_min), 0, 1)
			
 
				+        long_term_power_norm = np.clip((power - power_min) / (power_max - power_min), 0, 1)
			
 
				+        ceb_removal_norm = np.clip((ceb_removal - ceb_min) / (ceb_max - ceb_min), 0, 1)
			
 
				+
			
 
				+        # === 6. 构建观测向量 ===
			
 
				+        obs = np.array([
			
 
				+            TMP0_norm,
			
 
				+            q_UF_norm,
			
 
				+            temp_norm,
			
 
				+            R0_norm,
			
 
				+            short_term_norm,
			
 
				+            long_term_slope_norm,
			
 
				+            long_term_power_norm,
			
 
				+            ceb_removal_norm
			
 
				+        ], dtype=np.float32)
			
 
				+
			
 
				+        return obs
			
 
				+
			
 
				+    def _get_action_values(self, action):
			
 
				+        """
			
 
				+        将动作还原为实际时长
			
 
				+        """
			
 
				+        L_idx = action // self.num_bw
			
 
				+        t_bw_idx = action % self.num_bw
			
 
				+        return self.L_values[L_idx], self.t_bw_values[t_bw_idx]
			
 
				+
			
 
				     def step(self, action):
			
 
				         self.current_step += 1
			
 
				         L_s, t_bw_s = self._get_action_values(action)
			
@@ -315,15 +574,16 @@ class UFSuperCycleEnv(gym.Env):
 
				         t_bw_s = np.clip(t_bw_s, self.base_params.t_bw_min_s, self.base_params.t_bw_max_s)
			
 
				 
			
 
				         # 模拟超级周期
			
 
				-        feasible, info = simulate_one_supercycle(self.current_params, L_s, t_bw_s)
			
 
				+        info, next_params = simulate_one_supercycle(self.current_params, L_s, t_bw_s)
			
 
				+        # 根据 info 判断是否成功
			
 
				+        feasible = is_dead_cycle(info)  # True 表示成功循环，False 表示失败
			
 
				 
			
 
				         if feasible:
			
 
				-            reward = _score(self.current_params, info)
			
 
				-            self.current_params.TMP0 = info["TMP_after_ceb"]
			
 
				-            self.max_TMP_during_filtration = info["max_TMP_during_filtration"]
			
 
				+            reward = calculate_reward(self.current_params, info)
			
 
				+            self.current_params = next_params
			
 
				             terminated = False
			
 
				         else:
			
 
				-            reward = -20
			
 
				+            reward = -10
			
 
				             terminated = True
			
 
				 
			
 
				         truncated = self.current_step >= self.max_episode_steps
			
@@ -337,4 +597,3 @@ class UFSuperCycleEnv(gym.Env):
 
				 
			
 
				 
			
 
				 
			
 
				-
			
--- a/models/uf-rl/超滤训练源码/DQN_train.py
+++ b/models/uf-rl/超滤训练源码/DQN_train.py
@@ -3,9 +3,6 @@ import time
 
				 import random
			
 
				 import numpy as np
			
 
				 import torch
			
 
				-
			
 
				-import gymnasium as gym
			
 
				-from gymnasium import spaces
			
 
				 from stable_baselines3 import DQN
			
 
				 from stable_baselines3.common.monitor import Monitor
			
 
				 from stable_baselines3.common.vec_env import DummyVecEnv
			
@@ -24,10 +21,10 @@ class DQNParams:
 
				     learning_rate: float = 1e-4
			
 
				 
			
 
				     # 经验回放缓冲区大小（步数）
			
 
				-    buffer_size: int = 10000
			
 
				+    buffer_size: int = 100000
			
 
				 
			
 
				     # 学习开始前需要收集的步数
			
 
				-    learning_starts: int = 200
			
 
				+    learning_starts: int = 10000
			
 
				 
			
 
				     # 每次从经验池中采样的样本数量
			
 
				     batch_size: int = 32
			
@@ -39,7 +36,10 @@ class DQNParams:
 
				     train_freq: int = 4
			
 
				 
			
 
				     # 目标网络更新间隔
			
 
				-    target_update_interval: int = 2000
			
 
				+    target_update_interval: int = 1
			
 
				+
			
 
				+    # 软更新系数
			
 
				+    tau: float = 0.005
			
 
				 
			
 
				     # 初始探索率 ε
			
 
				     exploration_initial_eps: float = 1.0
			
@@ -240,5 +240,5 @@ if __name__ == "__main__":
 
				 
			
 
				     # 训练RL代理
			
 
				     print("开始训练RL代理...")
			
 
				-    train_uf_rl_agent(params, total_timesteps=50000)
			
 
				+    train_uf_rl_agent(params, total_timesteps=150000)
			
 
				 
			
--- a/models/uf-rl/超滤训练源码/UF_decide.py
+++ b/models/uf-rl/超滤训练源码/UF_decide.py
@@ -1,405 +0,0 @@
 
				-# UF_decide.py
			
 
				-from dataclasses import dataclass
			
 
				-import numpy as np
			
 
				-
			
 
				-@dataclass
			
 
				-class UFParams:
			
 
				-    # —— 膜与运行参数 ——
			
 
				-    q_UF: float = 360.0           # 过滤进水流量（m^3/h）
			
 
				-    TMP0: float = 0.03            # 初始TMP（MPa）
			
 
				-    TMP_max: float = 0.06         # TMP硬上限（MPa）
			
 
				-
			
 
				-    # —— 膜污染动力学 ——
			
 
				-    alpha: float = 1e-6           # TMP增长系数
			
 
				-    belta: float = 1.1            # 幂指数
			
 
				-
			
 
				-    # —— 反洗参数（固定） ——
			
 
				-    q_bw_m3ph: float = 1000.0     # 物理反洗流量（m^3/h）
			
 
				-
			
 
				-    # —— CEB参数（固定） ——
			
 
				-    T_ceb_interval_h: float = 48.0  # 固定每 k 小时做一次CEB
			
 
				-    v_ceb_m3: float = 30.0        # CEB用水体积（m^3）
			
 
				-    t_ceb_s: float = 40 * 60.0    # CEB时长（s）
			
 
				-    phi_ceb: float = 1.0          # CEB去除比例（简化：完全恢复到TMP0）
			
 
				-
			
 
				-    # —— 约束与收敛 ——
			
 
				-    dTMP: float = 0.0005          # 单次产水结束时，相对TMP0最大升幅（MPa）
			
 
				-
			
 
				-    # —— 搜索范围（秒） ——
			
 
				-    L_min_s: float = 3600.0       # 过滤时长下限（s）
			
 
				-    L_max_s: float = 4200.0       # 过滤时长上限（s）
			
 
				-    t_bw_min_s: float = 40.0      # 物洗时长下限（s）
			
 
				-    t_bw_max_s: float = 60.0      # 物洗时长上限（s）
			
 
				-
			
 
				-    # —— 物理反洗恢复函数参数 ——
			
 
				-    phi_bw_min: float = 0.7       # 物洗去除比例最小值
			
 
				-    phi_bw_max: float = 1.0       # 物洗去除比例最大值
			
 
				-    L_ref_s: float = 4000.0       # 过滤时长影响时间尺度
			
 
				-    tau_bw_s: float = 30.0        # 物洗时长影响时间尺度
			
 
				-    gamma_t: float = 1.0          # 物洗时长作用指数
			
 
				-    
			
 
				-    # —— 网格 ——
			
 
				-    L_step_s: float = 60.0        # 过滤时长步长（s）
			
 
				-    t_bw_step_s: float = 5.0      # 物洗时长步长（s）
			
 
				-
			
 
				-    # 多目标加权及高TMP惩罚
			
 
				-    w_rec: float = 0.8            # 回收率权重
			
 
				-    w_rate: float = 0.2           # 净供水率权重
			
 
				-    w_headroom: float = 0.3       # 贴边惩罚权重
			
 
				-    r_headroom: float = 2.0       # 贴边惩罚幂次
			
 
				-    headroom_hardcap: float = 0.98 # 超过此比例直接视为不可取
			
 
				-
			
 
				-def _delta_tmp(p: UFParams, L_h: float) -> float:
			
 
				-    # 过滤时段TMP上升量
			
 
				-    return float(p.alpha * (p.q_UF ** p.belta) * L_h)
			
 
				-
			
 
				-def _v_bw_m3(p: UFParams, t_bw_s: float) -> float:
			
 
				-    # 物理反洗水耗
			
 
				-    return float(p.q_bw_m3ph * (float(t_bw_s) / 3600.0))
			
 
				-
			
 
				-def phi_bw_of(p: UFParams, L_s: float, t_bw_s: float) -> float:
			
 
				-    # 物洗去除比例：随过滤时长增长上界收缩，随物洗时长增长趋饱和
			
 
				-    L = max(float(L_s), 1.0)
			
 
				-    t = max(float(t_bw_s), 1e-6)
			
 
				-    upper_L = p.phi_bw_min + (p.phi_bw_max - p.phi_bw_min) * np.exp(- L / p.L_ref_s)
			
 
				-    time_gain = 1.0 - np.exp(- (t / p.tau_bw_s) ** p.gamma_t)
			
 
				-    phi = upper_L * time_gain
			
 
				-    return float(np.clip(phi, 0.0, 0.999))
			
 
				-
			
 
				-def simulate_one_supercycle(p: UFParams, L_s: float, t_bw_s: float):
			
 
				-    """
			
 
				-    返回 (是否可行, 指标字典)
			
 
				-    - 支持动态CEB次数：48h固定间隔
			
 
				-    - 增加日均产水时间和吨水电耗
			
 
				-    """
			
 
				-    L_h = float(L_s) / 3600.0  # 小周期过滤时间(h)
			
 
				-
			
 
				-    tmp = p.TMP0
			
 
				-    max_tmp_during_filtration = tmp
			
 
				-    max_residual_increase = 0.0
			
 
				-
			
 
				-    # 小周期总时长(h)
			
 
				-    t_small_cycle_h = (L_s + t_bw_s) / 3600.0
			
 
				-
			
 
				-    # 计算超级周期内CEB次数
			
 
				-    k_bw_per_ceb = int(np.floor(p.T_ceb_interval_h / t_small_cycle_h))
			
 
				-    if k_bw_per_ceb < 1:
			
 
				-        k_bw_per_ceb = 1  # 至少一个小周期
			
 
				-
			
 
				-    # ton水电耗查表
			
 
				-    energy_lookup = {
			
 
				-        3600: 0.1034, 3660: 0.1031, 3720: 0.1029, 3780: 0.1026,
			
 
				-        3840: 0.1023, 3900: 0.1021, 3960: 0.1019, 4020: 0.1017,
			
 
				-        4080: 0.1015, 4140: 0.1012, 4200: 0.1011
			
 
				-    }
			
 
				-
			
 
				-    for _ in range(k_bw_per_ceb):
			
 
				-        tmp_run_start = tmp
			
 
				-
			
 
				-        # 过滤阶段TMP增长
			
 
				-        dtmp = _delta_tmp(p, L_h)
			
 
				-        tmp_peak = tmp_run_start + dtmp
			
 
				-
			
 
				-        # 约束1：峰值不得超过硬上限
			
 
				-        if tmp_peak > p.TMP_max + 1e-12:
			
 
				-            return False, {"reason": "TMP_max violated during filtration", "TMP_peak": tmp_peak}
			
 
				-
			
 
				-        if tmp_peak > max_tmp_during_filtration:
			
 
				-            max_tmp_during_filtration = tmp_peak
			
 
				-
			
 
				-        # 物理反洗
			
 
				-        phi = phi_bw_of(p, L_s, t_bw_s)
			
 
				-        tmp_after_bw = tmp_peak - phi * (tmp_peak - tmp_run_start)
			
 
				-
			
 
				-        # 约束2：单次残余增量控制
			
 
				-        residual_inc = tmp_after_bw - tmp_run_start
			
 
				-        if residual_inc > p.dTMP + 1e-12:
			
 
				-            return False, {
			
 
				-                "reason": "residual TMP increase after BW exceeded dTMP",
			
 
				-                "residual_increase": residual_inc,
			
 
				-                "limit_dTMP": p.dTMP
			
 
				-            }
			
 
				-        if residual_inc > max_residual_increase:
			
 
				-            max_residual_increase = residual_inc
			
 
				-
			
 
				-        tmp = tmp_after_bw
			
 
				-
			
 
				-    # CEB
			
 
				-    tmp_after_ceb = p.TMP0
			
 
				-
			
 
				-    # 体积与回收率
			
 
				-    V_feed_super = k_bw_per_ceb * p.q_UF * L_h
			
 
				-    V_loss_super = k_bw_per_ceb * _v_bw_m3(p, t_bw_s) + p.v_ceb_m3
			
 
				-    V_net = max(0.0, V_feed_super - V_loss_super)
			
 
				-    recovery = max(0.0, V_net / max(V_feed_super, 1e-12))
			
 
				-
			
 
				-    # 时间与净供水率
			
 
				-    T_super_h = k_bw_per_ceb * (L_s + t_bw_s) / 3600.0 + p.t_ceb_s / 3600.0
			
 
				-    net_delivery_rate_m3ph = V_net / max(T_super_h, 1e-12)
			
 
				-
			
 
				-    # 贴边比例与硬限
			
 
				-    headroom_ratio = max_tmp_during_filtration / max(p.TMP_max, 1e-12)
			
 
				-    if headroom_ratio > p.headroom_hardcap + 1e-12:
			
 
				-        return False, {"reason": "headroom hardcap exceeded", "headroom_ratio": headroom_ratio}
			
 
				-
			
 
				-    # —— 新增指标 1：日均产水时间（h/d） ——
			
 
				-    daily_prod_time_h = k_bw_per_ceb * L_h / T_super_h * 24.0
			
 
				-
			
 
				-    # —— 新增指标 2：吨水电耗（kWh/m³） ——
			
 
				-    closest_L = min(energy_lookup.keys(), key=lambda x: abs(x - L_s))
			
 
				-    ton_water_energy = energy_lookup[closest_L]
			
 
				-
			
 
				-    info = {
			
 
				-        "recovery": recovery,
			
 
				-        "V_feed_super_m3": V_feed_super,
			
 
				-        "V_loss_super_m3": V_loss_super,
			
 
				-        "V_net_super_m3": V_net,
			
 
				-        "supercycle_time_h": T_super_h,
			
 
				-        "net_delivery_rate_m3ph": net_delivery_rate_m3ph,
			
 
				-        "max_TMP_during_filtration": max_tmp_during_filtration,
			
 
				-        "max_residual_increase_per_run": max_residual_increase,
			
 
				-        "phi_bw_effective": phi,
			
 
				-        "TMP_after_ceb": tmp_after_ceb,
			
 
				-        "headroom_ratio": headroom_ratio,
			
 
				-        "daily_prod_time_h": daily_prod_time_h,
			
 
				-        "ton_water_energy_kWh_per_m3": ton_water_energy,
			
 
				-        "k_bw_per_ceb": k_bw_per_ceb
			
 
				-    }
			
 
				-
			
 
				-    return True, info
			
 
				-
			
 
				-def _score(p: UFParams, rec: dict) -> float:
			
 
				-    """综合评分：越大越好。不同TMP0会改变max_TMP→改变惩罚→得到不同解。"""
			
 
				-    # 无量纲化净供水率
			
 
				-    rate_norm = rec["net_delivery_rate_m3ph"] / max(p.q_UF, 1e-12)
			
 
				-    headroom_penalty = (rec["max_TMP_during_filtration"] / max(p.TMP_max, 1e-12)) ** p.r_headroom
			
 
				-    return (p.w_rec * rec["recovery"]
			
 
				-            + p.w_rate * rate_norm
			
 
				-            - p.w_headroom * headroom_penalty)
			
 
				-
			
 
				-def optimize_2d(p: UFParams,
			
 
				-                L_min_s=None, L_max_s=None, L_step_s=None,
			
 
				-                t_bw_min_s=None, t_bw_max_s=None, t_bw_step_s=None):
			
 
				-    # 网格生成
			
 
				-    L_lo = p.L_min_s if L_min_s is None else float(L_min_s)
			
 
				-    L_hi = p.L_max_s if L_max_s is None else float(L_max_s)
			
 
				-    L_st = p.L_step_s if L_step_s is None else float(L_step_s)
			
 
				-
			
 
				-    t_lo = p.t_bw_min_s if t_bw_min_s is None else float(t_bw_min_s)
			
 
				-    t_hi = p.t_bw_max_s if t_bw_max_s is None else float(t_bw_max_s)
			
 
				-    t_st = p.t_bw_step_s if t_bw_step_s is None else float(t_bw_step_s)
			
 
				-
			
 
				-    L_vals = np.arange(L_lo, L_hi + 1e-9, L_st)
			
 
				-    t_vals = np.arange(t_lo, t_hi + 1e-9, t_st)
			
 
				-
			
 
				-    best = None
			
 
				-    best_score = -np.inf
			
 
				-
			
 
				-    for L_s in L_vals:
			
 
				-        for t_bw_s in t_vals:
			
 
				-            feasible, info = simulate_one_supercycle(p, L_s, t_bw_s)
			
 
				-            if not feasible:
			
 
				-                continue
			
 
				-
			
 
				-            rec = {"L_s": float(L_s), "t_bw_s": float(t_bw_s)}
			
 
				-            rec.update(info)
			
 
				-
			
 
				-            score = _score(p, rec)
			
 
				-
			
 
				-            if score > best_score + 1e-14:
			
 
				-                best_score = score
			
 
				-                best = rec.copy()
			
 
				-                best["score"] = float(score)
			
 
				-            # 若分数相同，偏好回收率更高，再偏好净供水率更高
			
 
				-            elif abs(score - best_score) <= 1e-14:
			
 
				-                if (rec["recovery"] > best["recovery"] + 1e-12) or (
			
 
				-                    abs(rec["recovery"] - best["recovery"]) <= 1e-12 and
			
 
				-                    rec["net_delivery_rate_m3ph"] > best["net_delivery_rate_m3ph"] + 1e-12
			
 
				-                ):
			
 
				-                    best = rec.copy()
			
 
				-                    best["score"] = float(score)
			
 
				-
			
 
				-    if best is None:
			
 
				-        return {"status": "no-feasible-solution"}
			
 
				-    best["status"] = "feasible"
			
 
				-    return best
			
 
				-
			
 
				-def run_uf_decision(TMP0: float = None) -> dict:
			
 
				-    if TMP0 is None:
			
 
				-        rng = np.random.default_rng()
			
 
				-        TMP0 = rng.uniform(0.03, 0.04)  # 初始TMP随机
			
 
				-
			
 
				-    params = UFParams(
			
 
				-        q_UF=360.0,
			
 
				-        TMP_max=0.05,
			
 
				-        alpha=1.2e-6,
			
 
				-        belta=1.0,
			
 
				-        q_bw_m3ph=1000.0,
			
 
				-        T_ceb_interval_h=48,
			
 
				-        v_ceb_m3=30.0,
			
 
				-        t_ceb_s=40*60.0,
			
 
				-        phi_ceb=1.0,
			
 
				-        dTMP=0.001,
			
 
				-
			
 
				-        L_min_s=3600.0, L_max_s=4200.0, L_step_s=30.0,
			
 
				-        t_bw_min_s=90.0, t_bw_max_s=100.0, t_bw_step_s=2.0,
			
 
				-
			
 
				-        phi_bw_min=0.70, phi_bw_max=1.00,
			
 
				-        L_ref_s=500.0, tau_bw_s=40.0, gamma_t=1.0,
			
 
				-
			
 
				-        TMP0=TMP0,
			
 
				-
			
 
				-        w_rec=0.7, w_rate=0.3, w_headroom=0.3, r_headroom=2.0, headroom_hardcap=0.9
			
 
				-    )
			
 
				-
			
 
				-    result = optimize_2d(params)
			
 
				-    if result.get("status") == "feasible":
			
 
				-        return {
			
 
				-            "L_s": result["L_s"],
			
 
				-            "t_bw_s": result["t_bw_s"],
			
 
				-            "recovery": result["recovery"],
			
 
				-            "k_bw_per_ceb": result["k_bw_per_ceb"],
			
 
				-            "daily_prod_time_h": result["daily_prod_time_h"],
			
 
				-            "ton_water_energy_kWh_per_m3": result["ton_water_energy_kWh_per_m3"]
			
 
				-        }
			
 
				-
			
 
				-    # 若没有可行解，返回最小过滤时间和默认值
			
 
				-    return {
			
 
				-        "L_s": params.L_min_s,
			
 
				-        "t_bw_s": params.t_bw_min_s,
			
 
				-        "recovery": 0.0,
			
 
				-        "k_bw_per_ceb": 1,
			
 
				-        "daily_prod_time_h": 0.0,
			
 
				-        "ton_water_energy_kWh_per_m3": 0.0
			
 
				-    }
			
 
				-
			
 
				-
			
 
				-def generate_plc_instructions(current_L_s, current_t_bw_s, model_prev_L_s, model_prev_t_bw_s, model_L_s, model_t_bw_s):
			
 
				-    """
			
 
				-    根据工厂当前值、模型上一轮决策值和模型当前轮决策值，生成PLC指令。
			
 
				-
			
 
				-    新增功能：
			
 
				-    1. 处理None值情况：如果模型上一轮值为None，则使用工厂当前值；
			
 
				-       如果工厂当前值也为None，则返回None并提示错误。
			
 
				-    """
			
 
				-    # 参数配置保持不变
			
 
				-    params = UFParams(
			
 
				-        L_min_s=3600.0, L_max_s=6000.0, L_step_s=60.0,
			
 
				-        t_bw_min_s=40.0, t_bw_max_s=60.0, t_bw_step_s=5.0,
			
 
				-    )
			
 
				-
			
 
				-    # 参数解包
			
 
				-    L_step_s = params.L_step_s
			
 
				-    t_bw_step_s = params.t_bw_step_s
			
 
				-    L_min_s = params.L_min_s
			
 
				-    L_max_s = params.L_max_s
			
 
				-    t_bw_min_s = params.t_bw_min_s
			
 
				-    t_bw_max_s = params.t_bw_max_s
			
 
				-    adjustment_threshold = 1.0
			
 
				-
			
 
				-    # 处理None值情况
			
 
				-    if model_prev_L_s is None:
			
 
				-        if current_L_s is None:
			
 
				-            print("错误: 过滤时长的工厂当前值和模型上一轮值均为None")
			
 
				-            return None, None
			
 
				-        else:
			
 
				-            # 使用工厂当前值作为基准
			
 
				-            effective_current_L = current_L_s
			
 
				-            source_L = "工厂当前值(模型上一轮值为None)"
			
 
				-    else:
			
 
				-        # 模型上一轮值不为None，继续检查工厂当前值
			
 
				-        if current_L_s is None:
			
 
				-            effective_current_L = model_prev_L_s
			
 
				-            source_L = "模型上一轮值(工厂当前值为None)"
			
 
				-        else:
			
 
				-            # 两个值都不为None，比较哪个更接近模型当前建议值
			
 
				-            current_to_model_diff = abs(current_L_s - model_L_s)
			
 
				-            prev_to_model_diff = abs(model_prev_L_s - model_L_s)
			
 
				-
			
 
				-            if current_to_model_diff <= prev_to_model_diff:
			
 
				-                effective_current_L = current_L_s
			
 
				-                source_L = "工厂当前值"
			
 
				-            else:
			
 
				-                effective_current_L = model_prev_L_s
			
 
				-                source_L = "模型上一轮值"
			
 
				-
			
 
				-    # 对反洗时长进行同样的处理
			
 
				-    if model_prev_t_bw_s is None:
			
 
				-        if current_t_bw_s is None:
			
 
				-            print("错误: 反洗时长的工厂当前值和模型上一轮值均为None")
			
 
				-            return None, None
			
 
				-        else:
			
 
				-            effective_current_t_bw = current_t_bw_s
			
 
				-            source_t_bw = "工厂当前值(模型上一轮值为None)"
			
 
				-    else:
			
 
				-        if current_t_bw_s is None:
			
 
				-            effective_current_t_bw = model_prev_t_bw_s
			
 
				-            source_t_bw = "模型上一轮值(工厂当前值为None)"
			
 
				-        else:
			
 
				-            current_to_model_t_bw_diff = abs(current_t_bw_s - model_t_bw_s)
			
 
				-            prev_to_model_t_bw_diff = abs(model_prev_t_bw_s - model_t_bw_s)
			
 
				-
			
 
				-            if current_to_model_t_bw_diff <= prev_to_model_t_bw_diff:
			
 
				-                effective_current_t_bw = current_t_bw_s
			
 
				-                source_t_bw = "工厂当前值"
			
 
				-            else:
			
 
				-                effective_current_t_bw = model_prev_t_bw_s
			
 
				-                source_t_bw = "模型上一轮值"
			
 
				-
			
 
				-    # 检测所有输入值是否在规定范围内（只对非None值进行检查）
			
 
				-    # 工厂当前值检查（警告）
			
 
				-    if current_L_s is not None and not (L_min_s <= current_L_s <= L_max_s):
			
 
				-        print(f"警告: 当前过滤时长 {current_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
			
 
				-    if current_t_bw_s is not None and not (t_bw_min_s <= current_t_bw_s <= t_bw_max_s):
			
 
				-        print(f"警告: 当前反洗时长 {current_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
			
 
				-
			
 
				-    # 模型上一轮决策值检查（警告）
			
 
				-    if model_prev_L_s is not None and not (L_min_s <= model_prev_L_s <= L_max_s):
			
 
				-        print(f"警告: 模型上一轮过滤时长 {model_prev_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
			
 
				-    if model_prev_t_bw_s is not None and not (t_bw_min_s <= model_prev_t_bw_s <= t_bw_max_s):
			
 
				-        print(f"警告: 模型上一轮反洗时长 {model_prev_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
			
 
				-
			
 
				-    # 模型当前轮决策值检查（错误）
			
 
				-    if model_L_s is None:
			
 
				-        raise ValueError("错误: 决策模型建议的过滤时长不能为None")
			
 
				-    elif not (L_min_s <= model_L_s <= L_max_s):
			
 
				-        raise ValueError(f"错误: 决策模型建议的过滤时长 {model_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
			
 
				-
			
 
				-    if model_t_bw_s is None:
			
 
				-        raise ValueError("错误: 决策模型建议的反洗时长不能为None")
			
 
				-    elif not (t_bw_min_s <= model_t_bw_s <= t_bw_max_s):
			
 
				-        raise ValueError(f"错误: 决策模型建议的反洗时长 {model_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
			
 
				-
			
 
				-    print(f"过滤时长基准: {source_L}, 值: {effective_current_L}")
			
 
				-    print(f"反洗时长基准: {source_t_bw}, 值: {effective_current_t_bw}")
			
 
				-
			
 
				-    # 使用选定的基准值进行计算调整
			
 
				-    L_diff = model_L_s - effective_current_L
			
 
				-    L_adjustment = 0
			
 
				-    if abs(L_diff) > adjustment_threshold * L_step_s:
			
 
				-        if L_diff > 0:
			
 
				-            L_adjustment = L_step_s
			
 
				-        else:
			
 
				-            L_adjustment = -L_step_s
			
 
				-    next_L_s = effective_current_L + L_adjustment
			
 
				-
			
 
				-    t_bw_diff = model_t_bw_s - effective_current_t_bw
			
 
				-    t_bw_adjustment = 0
			
 
				-    if abs(t_bw_diff) > adjustment_threshold * t_bw_step_s:
			
 
				-        if t_bw_diff > 0:
			
 
				-            t_bw_adjustment = t_bw_step_s
			
 
				-        else:
			
 
				-            t_bw_adjustment = -t_bw_step_s
			
 
				-    next_t_bw_s = effective_current_t_bw + t_bw_adjustment
			
 
				-
			
 
				-    return next_L_s, next_t_bw_s
			
 
				-
			
 
				-
			
 
				-current_L_s = 3920
			
 
				-current_t_bw_s = 98
			
 
				-model_prev_L_s = None
			
 
				-model_prev_t_bw_s = None
			
 
				-model_L_s = 4160
			
 
				-model_t_bw_s = 96
			
 
				-next_L_s, next_t_bw_s = generate_plc_instructions(current_L_s, current_t_bw_s, model_prev_L_s, model_prev_t_bw_s, model_L_s, model_t_bw_s)
			
 
				-print(f"next_L_s={next_L_s}, next_t_bw_s={next_t_bw_s}")
			
--- a/models/uf-rl/超滤训练源码/UF_models.py
+++ b/models/uf-rl/超滤训练源码/UF_models.py
@@ -1,33 +0,0 @@
 
				-import torch
			
 
				-import numpy as np
			
 
				-
			
 
				-# TMP 上升量模型
			
 
				-class TMPIncreaseModel(torch.nn.Module):
			
 
				-    def __init__(self):
			
 
				-        super().__init__()
			
 
				-    def forward(self, p, L_h):
			
 
				-        return float(p.alpha * (p.q_UF ** p.belta) * L_h)
			
 
				-
			
 
				-# 反洗 TMP 去除模型
			
 
				-class TMPDecreaseModel(torch.nn.Module):
			
 
				-    def __init__(self):
			
 
				-        super().__init__()
			
 
				-    def forward(self, p, L_s, t_bw_s):
			
 
				-        L = max(float(L_s), 1.0)
			
 
				-        t = max(float(t_bw_s), 1e-6)
			
 
				-        upper_L = p.phi_bw_min + (p.phi_bw_max - p.phi_bw_min) * np.exp(- L / p.L_ref_s)
			
 
				-        time_gain = 1.0 - np.exp(- (t / p.tau_bw_s) ** p.gamma_t)
			
 
				-        phi = upper_L * time_gain
			
 
				-        return float(np.clip(phi, 0.0, 0.999))
			
 
				-
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    model_fp = TMPIncreaseModel()
			
 
				-    model_bw = TMPDecreaseModel()
			
 
				-
			
 
				-
			
 
				-    torch.save(model_fp.state_dict(), "uf_fp.pth")
			
 
				-    torch.save(model_bw.state_dict(), "uf_bw.pth")
			
 
				-
			
 
				-
			
 
				-    print("模型已安全保存为 uf_fp.pth、uf_bw.pth")
			
--- a/models/uf-rl/超滤训练源码/UF_resistance_models.py
+++ b/models/uf-rl/超滤训练源码/UF_resistance_models.py
--- a/models/uf-rl/超滤训练源码/check_initial_state.py
+++ b/models/uf-rl/超滤训练源码/check_initial_state.py
@@ -0,0 +1,138 @@
 
				+# check_initial_state.py
			
 
				+"""
			
 
				+检查初始状态是否为“必死状态”（conservatively dead）：
			
 
				+1) 实例化 base_params（优先使用 rl_dqn_env 中提供的 base_params 或 UFParams）
			
 
				+2) 实例化环境类 UFSuperCycleEnv(base_params)
			
 
				+3) 调用 env.generate_initial_state() 生成 env.current_params（不调用 reset()）
			
 
				+4) 用最保守策略 (L_s=3600s, t_bw_s=60s) 连续模拟 max_steps 次，
			
 
				+   若任意一次 is_dead_cycle(info) 返回 False 则判定为必死（返回 True），否则返回 False。
			
 
				+"""
			
 
				+
			
 
				+from typing import Any
			
 
				+import copy
			
 
				+import traceback
			
 
				+
			
 
				+# 从 rl_dqn_env 导入必需项
			
 
				+try:
			
 
				+    from DQN_env import (
			
 
				+        simulate_one_supercycle,
			
 
				+        is_dead_cycle,
			
 
				+        UFSuperCycleEnv,
			
 
				+        UFParams,       # 如果模块里有 UFParams 类就导入
			
 
				+        base_params     # 如果模块直接提供 base_params 实例也尝试导入
			
 
				+    )
			
 
				+except Exception:
			
 
				+    # 有可能某些名字不存在 —— 我们会稍后用回退方案处理
			
 
				+    # 先导入模块并再尝试访问属性，确保错误信息更友好
			
 
				+    import importlib
			
 
				+    rl = importlib.import_module("rl_dqn_env")
			
 
				+    simulate_one_supercycle = getattr(rl, "simulate_one_supercycle", None)
			
 
				+    is_dead_cycle = getattr(rl, "is_dead_cycle", None)
			
 
				+    UFSuperCycleEnv = getattr(rl, "UFSuperCycleEnv", None)
			
 
				+    UFParams = getattr(rl, "UFParams", None)
			
 
				+    base_params = getattr(rl, "base_params", None)
			
 
				+
			
 
				+# 检查导入完整性
			
 
				+_missing = []
			
 
				+if simulate_one_supercycle is None:
			
 
				+    _missing.append("simulate_one_supercycle")
			
 
				+if is_dead_cycle is None:
			
 
				+    _missing.append("is_dead_cycle")
			
 
				+if UFSuperCycleEnv is None:
			
 
				+    _missing.append("UFSuperCycleEnv")
			
 
				+if _missing:
			
 
				+    raise ImportError(f"无法从 rl_dqn_env 导入以下必要项: {', '.join(_missing)}")
			
 
				+
			
 
				+def is_dead_initial_state_env(env: UFSuperCycleEnv, max_steps: int = 15,
			
 
				+                              L_s: int = 4200, t_bw_s: int = 50,
			
 
				+                              verbose: bool = True) -> bool:
			
 
				+    """
			
 
				+    使用 env.current_params 作为初始状态判断是否为必死状态（保守策略）。
			
 
				+
			
 
				+    参数:
			
 
				+        env: 已实例化的 UFSuperCycleEnv（必须包含 generate_initial_state() 与 current_params）
			
 
				+        max_steps: 模拟步数（默认 15）
			
 
				+        L_s: 过滤时长（s），保守值 3600
			
 
				+        t_bw_s: 物理反洗时长（s），保守值 60
			
 
				+        verbose: 是否打印每步结果
			
 
				+
			
 
				+    返回:
			
 
				+        True 表示必死（conservatively dead）
			
 
				+        False 表示可行
			
 
				+    """
			
 
				+    # 1) 确保 env 有 current_params，并且 generate_initial_state 可用
			
 
				+    if not hasattr(env, "generate_initial_state"):
			
 
				+        raise AttributeError("env 缺少 generate_initial_state() 方法。")
			
 
				+    # 生成初始状态（不会调用 reset）
			
 
				+    env.generate_initial_state()
			
 
				+
			
 
				+    if not hasattr(env, "current_params"):
			
 
				+        raise AttributeError("env.generate_initial_state() 未设置 env.current_params。")
			
 
				+
			
 
				+    curr_p = copy.deepcopy(env.current_params)
			
 
				+
			
 
				+    for step in range(1, max_steps + 1):
			
 
				+        try:
			
 
				+            info, next_params = simulate_one_supercycle(curr_p, L_s, t_bw_s)
			
 
				+        except Exception as e:
			
 
				+            # 如果 simulate 出错，把异常视为“失败”（保守处理）
			
 
				+            if verbose:
			
 
				+                print(f"[Step {step}] simulate_one_supercycle 抛出异常，视为失败。异常信息：{e}")
			
 
				+                traceback.print_exc()
			
 
				+            return True
			
 
				+
			
 
				+        success = is_dead_cycle(info)  # True 表示成功循环
			
 
				+
			
 
				+        if verbose:
			
 
				+            print(f"[Step {step}] 循环结果：{'成功' if success else '失败'}")
			
 
				+            # 如果 info 中有关键诊断字段，打印简要信息
			
 
				+            try:
			
 
				+                print(f"     TMP0: {info.get('TMP0')},max_TMP: {info.get('max_TMP_during_filtration')}, recovery: {info.get('recovery')}, "
			
 
				+                      f"R0: {info.get('R0')}, R_after_ceb: {info.get('R_after_ceb')}")
			
 
				+            except Exception:
			
 
				+                pass
			
 
				+
			
 
				+        if not success:
			
 
				+            if verbose:
			
 
				+                print(f"在第 {step} 步检测到失败，判定为必死初始状态（conservatively dead）。")
			
 
				+            return True
			
 
				+
			
 
				+        # 否则继续，用 next_params 作为下一步起始参数
			
 
				+        curr_p = next_params
			
 
				+
			
 
				+    if verbose:
			
 
				+        print(f"{max_steps} 步均成功，初始状态判定为可行（non-dead）。")
			
 
				+    return False
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    print("=== check_initial_state.py: 使用 env.generate_initial_state() 检查初始状态是否为必死 ===")
			
 
				+
			
 
				+    try:
			
 
				+        # 1) 构造 base_params
			
 
				+        if base_params is not None:
			
 
				+            bp = base_params
			
 
				+            print("使用 rl_dqn_env 中提供的 base_params。")
			
 
				+        elif UFParams is not None:
			
 
				+            bp = UFParams()  # 使用默认构造
			
 
				+            print("使用 UFParams() 构造 base_params 的实例。")
			
 
				+        else:
			
 
				+            raise ImportError("无法构造 base_params：rl_dqn_env 中既无 base_params 也无 UFParams。")
			
 
				+
			
 
				+        # 2) 实例化环境类（将 base_params 传入构造器）
			
 
				+        env = UFSuperCycleEnv(bp)
			
 
				+        print("已实例化 UFSuperCycleEnv 环境。")
			
 
				+
			
 
				+        # 3) 调用 env.generate_initial_state() 并检查 env.current_params 是否为必死
			
 
				+        dead = is_dead_initial_state_env(env, max_steps=getattr(env, "max_episode_steps", 15),
			
 
				+                                        L_s=6000, t_bw_s=40, verbose=True)
			
 
				+
			
 
				+        print("\n=== 判定结果 ===")
			
 
				+        if dead:
			
 
				+            print("当前生成的初始状态为【必死状态】（conservatively dead）。")
			
 
				+        else:
			
 
				+            print("当前生成的初始状态为【可行状态】（non-dead）。")
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        print("脚本执行出现错误：", e)
			
 
				+        traceback.print_exc()
			
--- a/models/uf-rl/超滤训练源码/uf_bw.pth
+++ b/models/uf-rl/超滤训练源码/uf_bw.pth
--- a/models/uf-rl/超滤训练源码/uf_fp.pth
+++ b/models/uf-rl/超滤训练源码/uf_fp.pth