před 5 měsíci · 48850ed4c8
--- a/models/uf-rl/超滤训练源码/DQN_decide.py
+++ b/models/uf-rl/超滤训练源码/DQN_decide.py
@@ -1,246 +0,0 @@
 
															-import numpy as np
														
 
															-from stable_baselines3 import DQN
														
 
															-from UF_super_RL.DQN_env import UFSuperCycleEnv
														
 
															-from UF_super_RL.DQN_env import UFParams
														
 
															-
														
 
															-# 模型路径
														
 
															-MODEL_PATH = "dqn_model.zip"
														
 
															-
														
 
															-# 加载模型（只加载一次，提高效率）
														
 
															-model = DQN.load(MODEL_PATH)
														
 
															-
														
 
															-def run_uf_DQN_decide(uf_params, TMP0_value: float):
														
 
															-    """
														
 
															-    单步决策函数：输入原始 TMP0，预测并执行动作
														
 
															-
														
 
															-    参数:
														
 
															-        TMP0_value (float): 当前 TMP0 值（单位与环境一致）
														
 
															-
														
 
															-    返回:
														
 
															-        dict: 包含模型选择的动作、动作参数、新状态、奖励等
														
 
															-    """
														
 
															-    # 1. 实例化环境
														
 
															-    base_params = uf_params
														
 
															-    env = UFSuperCycleEnv(base_params)
														
 
															-
														
 
															-    # 2. 将输入的 TMP0 写入环境
														
 
															-    env.current_params.TMP0 = TMP0_value
														
 
															-
														
 
															-    # 3. 获取归一化状态
														
 
															-    obs = env._get_obs().reshape(1, -1)
														
 
															-
														
 
															-    # 4. 模型预测动作
														
 
															-    action, _ = model.predict(obs, deterministic=True)
														
 
															-
														
 
															-    # 5. 解析动作对应的 L_s 和 t_bw_s
														
 
															-    L_s, t_bw_s = env._get_action_values(action[0])
														
 
															-
														
 
															-    # 6. 在环境中执行该动作
														
 
															-    next_obs, reward, terminated, truncated, info = env.step(action[0])
														
 
															-
														
 
															-    # 7. 整理结果
														
 
															-    result = {
														
 
															-        "action": int(action[0]),
														
 
															-        "L_s": float(L_s),
														
 
															-        "t_bw_s": float(t_bw_s),
														
 
															-        "next_obs": next_obs,
														
 
															-        "reward": reward,
														
 
															-        "terminated": terminated,
														
 
															-        "truncated": truncated,
														
 
															-        "info": info
														
 
															-    }
														
 
															-
														
 
															-    # 8. 关闭环境
														
 
															-    env.close()
														
 
															-
														
 
															-    return result
														
 
															-
														
 
															-def generate_plc_instructions(current_L_s, current_t_bw_s, model_prev_L_s, model_prev_t_bw_s, model_L_s, model_t_bw_s):
														
 
															-    """
														
 
															-    根据工厂当前值、模型上一轮决策值和模型当前轮决策值，生成PLC指令。
														
 
															-
														
 
															-    新增功能：
														
 
															-    1. 处理None值情况：如果模型上一轮值为None，则使用工厂当前值；
														
 
															-       如果工厂当前值也为None，则返回None并提示错误。
														
 
															-    """
														
 
															-    # 参数配置保持不变
														
 
															-    params = UFParams(
														
 
															-        L_min_s=3600.0, L_max_s=6000.0, L_step_s=60.0,
														
 
															-        t_bw_min_s=40.0, t_bw_max_s=60.0, t_bw_step_s=5.0,
														
 
															-    )
														
 
															-
														
 
															-    # 参数解包
														
 
															-    L_step_s = params.L_step_s
														
 
															-    t_bw_step_s = params.t_bw_step_s
														
 
															-    L_min_s = params.L_min_s
														
 
															-    L_max_s = params.L_max_s
														
 
															-    t_bw_min_s = params.t_bw_min_s
														
 
															-    t_bw_max_s = params.t_bw_max_s
														
 
															-    adjustment_threshold = 1.0
														
 
															-
														
 
															-    # 处理None值情况
														
 
															-    if model_prev_L_s is None:
														
 
															-        if current_L_s is None:
														
 
															-            print("错误: 过滤时长的工厂当前值和模型上一轮值均为None")
														
 
															-            return None, None
														
 
															-        else:
														
 
															-            # 使用工厂当前值作为基准
														
 
															-            effective_current_L = current_L_s
														
 
															-            source_L = "工厂当前值(模型上一轮值为None)"
														
 
															-    else:
														
 
															-        # 模型上一轮值不为None，继续检查工厂当前值
														
 
															-        if current_L_s is None:
														
 
															-            effective_current_L = model_prev_L_s
														
 
															-            source_L = "模型上一轮值(工厂当前值为None)"
														
 
															-        else:
														
 
															-            effective_current_L = model_prev_L_s
														
 
															-            source_L = "模型上一轮值"
														
 
															-
														
 
															-    # 对反洗时长进行同样的处理
														
 
															-    if model_prev_t_bw_s is None:
														
 
															-        if current_t_bw_s is None:
														
 
															-            print("错误: 反洗时长的工厂当前值和模型上一轮值均为None")
														
 
															-            return None, None
														
 
															-        else:
														
 
															-            effective_current_t_bw = current_t_bw_s
														
 
															-            source_t_bw = "工厂当前值(模型上一轮值为None)"
														
 
															-    else:
														
 
															-        if current_t_bw_s is None:
														
 
															-            effective_current_t_bw = model_prev_t_bw_s
														
 
															-            source_t_bw = "模型上一轮值(工厂当前值为None)"
														
 
															-        else:
														
 
															-            effective_current_t_bw = model_prev_t_bw_s
														
 
															-            source_t_bw = "模型上一轮值"
														
 
															-
														
 
															-    # 检测所有输入值是否在规定范围内（只对非None值进行检查）
														
 
															-    # 工厂当前值检查（警告）
														
 
															-    if current_L_s is not None and not (L_min_s <= current_L_s <= L_max_s):
														
 
															-        print(f"警告: 当前过滤时长 {current_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
														
 
															-    if current_t_bw_s is not None and not (t_bw_min_s <= current_t_bw_s <= t_bw_max_s):
														
 
															-        print(f"警告: 当前反洗时长 {current_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
														
 
															-
														
 
															-    # 模型上一轮决策值检查（警告）
														
 
															-    if model_prev_L_s is not None and not (L_min_s <= model_prev_L_s <= L_max_s):
														
 
															-        print(f"警告: 模型上一轮过滤时长 {model_prev_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
														
 
															-    if model_prev_t_bw_s is not None and not (t_bw_min_s <= model_prev_t_bw_s <= t_bw_max_s):
														
 
															-        print(f"警告: 模型上一轮反洗时长 {model_prev_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
														
 
															-
														
 
															-    # 模型当前轮决策值检查（错误）
														
 
															-    if model_L_s is None:
														
 
															-        raise ValueError("错误: 决策模型建议的过滤时长不能为None")
														
 
															-    elif not (L_min_s <= model_L_s <= L_max_s):
														
 
															-        raise ValueError(f"错误: 决策模型建议的过滤时长 {model_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
														
 
															-
														
 
															-    if model_t_bw_s is None:
														
 
															-        raise ValueError("错误: 决策模型建议的反洗时长不能为None")
														
 
															-    elif not (t_bw_min_s <= model_t_bw_s <= t_bw_max_s):
														
 
															-        raise ValueError(f"错误: 决策模型建议的反洗时长 {model_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
														
 
															-
														
 
															-    print(f"过滤时长基准: {source_L}, 值: {effective_current_L}")
														
 
															-    print(f"反洗时长基准: {source_t_bw}, 值: {effective_current_t_bw}")
														
 
															-
														
 
															-    # 使用选定的基准值进行计算调整
														
 
															-    L_diff = model_L_s - effective_current_L
														
 
															-    L_adjustment = 0
														
 
															-    if abs(L_diff) >= adjustment_threshold * L_step_s:
														
 
															-        if L_diff >= 0:
														
 
															-            L_adjustment = L_step_s
														
 
															-        else:
														
 
															-            L_adjustment = -L_step_s
														
 
															-    next_L_s = effective_current_L + L_adjustment
														
 
															-
														
 
															-    t_bw_diff = model_t_bw_s - effective_current_t_bw
														
 
															-    t_bw_adjustment = 0
														
 
															-    if abs(t_bw_diff) >= adjustment_threshold * t_bw_step_s:
														
 
															-        if t_bw_diff >= 0:
														
 
															-            t_bw_adjustment = t_bw_step_s
														
 
															-        else:
														
 
															-            t_bw_adjustment = -t_bw_step_s
														
 
															-    next_t_bw_s = effective_current_t_bw + t_bw_adjustment
														
 
															-
														
 
															-    return next_L_s, next_t_bw_s
														
 
															-
														
 
															-
														
 
															-from UF_super_RL.DQN_env import simulate_one_supercycle
														
 
															-def calc_uf_cycle_metrics(p, TMP0, max_tmp_during_filtration, min_tmp_during_filtration, L_s: float, t_bw_s: float):
														
 
															-    """
														
 
															-    计算 UF 超滤系统的核心性能指标
														
 
															-
														
 
															-    参数:
														
 
															-        p (UFParams): UF 系统参数
														
 
															-        L_s (float): 单次过滤时间（秒）
														
 
															-        t_bw_s (float): 单次反洗时间（秒）
														
 
															-
														
 
															-    返回:
														
 
															-        dict: {
														
 
															-            "k_bw_per_ceb": 小周期次数,
														
 
															-            "ton_water_energy_kWh_per_m3": 吨水电耗,
														
 
															-            "recovery": 回收率,
														
 
															-            "net_delivery_rate_m3ph": 净供水率 (m³/h),
														
 
															-            "daily_prod_time_h": 日均产水时间 (小时/天)
														
 
															-            "max_permeability": 全周期最高渗透率(lmh/bar)
														
 
															-        }
														
 
															-    """
														
 
															-    # 将跨膜压差写入参数
														
 
															-    p.TMP0 = TMP0
														
 
															-
														
 
															-    # 模拟该参数下的超级周期
														
 
															-    feasible, info = simulate_one_supercycle(p, L_s, t_bw_s)
														
 
															-
														
 
															-    # 获得模型模拟周期信息
														
 
															-    k_bw_per_ceb = info["k_bw_per_ceb"]
														
 
															-    ton_water_energy_kWh_per_m3 = info["ton_water_energy_kWh_per_m3"]
														
 
															-    recovery = info["recovery"]
														
 
															-    net_delivery_rate_m3ph = info["net_delivery_rate_m3ph"]
														
 
															-    daily_prod_time_h = info["daily_prod_time_h"]
														
 
															-
														
 
															-    # 获得模型模拟周期内最高跨膜压差/最低跨膜压差
														
 
															-    if max_tmp_during_filtration is None:
														
 
															-        max_tmp_during_filtration = info["max_TMP_during_filtration"]
														
 
															-    if min_tmp_during_filtration is None:
														
 
															-        min_tmp_during_filtration = info["min_TMP_during_filtration"]
														
 
															-
														
 
															-    # 计算最高渗透率
														
 
															-    max_permeability = 100 * p.q_UF / (128*40) / min_tmp_during_filtration
														
 
															-
														
 
															-
														
 
															-    return {
														
 
															-        "k_bw_per_ceb": k_bw_per_ceb,
														
 
															-        "ton_water_energy_kWh_per_m3": ton_water_energy_kWh_per_m3,
														
 
															-        "recovery": recovery,
														
 
															-        "net_delivery_rate_m3ph": net_delivery_rate_m3ph,
														
 
															-        "daily_prod_time_h": daily_prod_time_h,
														
 
															-        "max_permeability": max_permeability
														
 
															-    }
														
 
															-
														
 
															-
														
 
															-# ==============================
														
 
															-# 示例调用
														
 
															-# ==============================
														
 
															-if __name__ == "__main__":
														
 
															-    uf_params = UFParams()
														
 
															-    TMP0 = 0.03 # 原始 TMP0
														
 
															-    model_decide_result = run_uf_DQN_decide(uf_params, TMP0) # 调用模型获得动作
														
 
															-    model_L_s = model_decide_result['L_s'] # 获得模型决策产水时长
														
 
															-    model_t_bw_s = model_decide_result['t_bw_s'] # 获得模型决策反洗时长
														
 
															-
														
 
															-    current_L_s = 3800
														
 
															-    current_t_bw_s = 40
														
 
															-    model_prev_L_s = 4040
														
 
															-    model_prev_t_bw_s = 60
														
 
															-    L_s, t_bw_s = generate_plc_instructions(current_L_s, current_t_bw_s, model_prev_L_s, model_prev_t_bw_s, model_L_s, model_t_bw_s) # 获取模型下发指令
														
 
															-
														
 
															-    L_s = 4100
														
 
															-    t_bw_s = 96
														
 
															-    max_tmp_during_filtration = 0.050176 # 新增工厂数据接口：周期最高/最低跨膜压差，无工厂数据接入时传入None，calc_uf_cycle_metrics()自动获取模拟周期中的跨膜压差最值
														
 
															-    min_tmp_during_filtration = 0.012496
														
 
															-    execution_result = calc_uf_cycle_metrics(uf_params, TMP0, max_tmp_during_filtration, min_tmp_during_filtration, L_s, t_bw_s)
														
 
															-    print("\n===== 单步决策结果 =====")
														
 
															-    print(f"模型选择的动作: {model_decide_result['action']}")
														
 
															-    print(f"模型选择的L_s: {model_L_s} 秒, 模型选择的t_bw_s: {model_t_bw_s} 秒")
														
 
															-    print(f"指令下发的L_s: {L_s} 秒, 指令下发的t_bw_s: {t_bw_s} 秒")
														
 
															-    print(f"指令对应的反洗次数: {execution_result['k_bw_per_ceb']}")
														
 
															-    print(f"指令对应的吨水电耗: {execution_result['ton_water_energy_kWh_per_m3']}")
														
 
															-    print(f"指令对应的回收率: {execution_result['recovery']}")
														
 
															-    print(f"指令对应的日均产水时间: {execution_result['daily_prod_time_h']}")
														
 
															-    print(f"指令对应的最高渗透率: {execution_result['max_permeability']}")
														
--- a/models/uf-rl/超滤训练源码/DQN_env.py
+++ b/models/uf-rl/超滤训练源码/DQN_env.py
@@ -1,44 +1,58 @@
 
															 import os
														
 
															-import time
														
 
															-import random
														
 
															+import torch
														
 
															+from pathlib import Path
														
 
															 import numpy as np
														
 
															 import gymnasium as gym
														
 
															 from gymnasium import spaces
														
 
															-from stable_baselines3 import DQN
														
 
															-from stable_baselines3.common.monitor import Monitor
														
 
															-from stable_baselines3.common.vec_env import DummyVecEnv
														
 
															-from stable_baselines3.common.callbacks import BaseCallback
														
 
															 from typing import Dict, Tuple, Optional
														
 
															 import torch
														
 
															 import torch.nn as nn
														
 
															 from dataclasses import dataclass, asdict
														
 
															-from UF_models import TMPIncreaseModel, TMPDecreaseModel  # 导入模型类
														
 
															+from UF_resistance_models import ResistanceIncreaseModel, ResistanceDecreaseModel  # 导入模型类
														
 
															 import copy
														
 
															-
														
 
															-# ==== 定义膜的基础运行参数 ====
														
 
															+# =======================
														
 
															+# 膜运行参数类：定义膜的基础运行参数
														
 
															+# =======================
														
 
															 @dataclass
														
 
															 class UFParams:
														
 
															-    # —— 膜与运行参数 ——
														
 
															+    # —— 膜动态运行参数 ——
														
 
															     q_UF: float = 360.0  # 过滤进水流量（m^3/h）
														
 
															-    TMP0: float = 0.03  # 初始TMP（MPa）
														
 
															-    TMP_max: float = 0.06  # TMP硬上限（MPa）
														
 
															-
														
 
															-    # —— 膜污染动力学 ——
														
 
															-    alpha: float = 1e-6  # TMP增长系数
														
 
															-    belta: float = 1.1  # 幂指数
														
 
															+    TMP0: float = 0.03 # 初始跨膜压差
														
 
															+    temp: float = 25.0  # 水温，摄氏度
														
 
															+
														
 
															+    # —— 膜阻力模型参数 ——
														
 
															+    nuK: float =4.92e+01 # 过滤阶段膜阻力增长模型参数
														
 
															+    slope: float = 3.44e-01 # 全周期不可逆污染阻力增长斜率
														
 
															+    power: float = 1.032 # 全周期不可逆污染阻力增长幂次
														
 
															+    tau_bw_s: float = 30.0  # 物洗时长影响时间尺度
														
 
															+    gamma_t: float = 1.0  # 物洗时长作用指数
														
 
															+    ceb_removal: float = 150  # CEB去除膜阻力
														
 
															+
														
 
															+    # —— 膜运行约束参数 ——
														
 
															+    global_TMP_limit: float = 0.08  # TMP硬上限（MPa）
														
 
															+    TMP0_max: float = 0.035 # 初始TMP上限（MPa）
														
 
															+    TMP0_min: float = 0.01 # 初始TMP下限（MPa）
														
 
															+    q_UF_max: float = 400.0 # 进水流量上限（m^3/h）
														
 
															+    q_UF_min: float = 250.0 # 进水流量上限（m^3/h）
														
 
															+    temp_max: float = 40.0 # 温度上限（摄氏度）
														
 
															+    temp_min: float = 10.0 # 温度下限（摄氏度）
														
 
															+    nuK_max: float = 6e+01 # 物理周期总阻力增速上限（m^-1/s）
														
 
															+    nuK_min: float = 3e+01 # 物理周期总阻力增速下限（m^-1/s）
														
 
															+    slope_max: float = 10 # 化学周期长期阻力增速斜率上限
														
 
															+    slope_min: float = 0.1 # 化学周期长期阻力增速斜率下限
														
 
															+    power_max: float = 1.3 # 化学周期长期阻力增速幂次上限
														
 
															+    power_min: float = 0.8 # 化学周期长期阻力增速幂次下限
														
 
															+    ceb_removal_max: float = 150 # CEB去除阻力(已缩放)上限（m^-1）
														
 
															+    ceb_removal_min: float = 100 # CEB去除阻力(已缩放)下限（m^-1）
														
 
															     # —— 反洗参数（固定） ——
														
 
															     q_bw_m3ph: float = 1000.0  # 物理反洗流量（m^3/h）
														
 
															-    # —— CEB参数（固定） ——
														
 
															-    T_ceb_interval_h: float = 48.0  # 固定每 k 小时做一次CEB
														
 
															+    # —— CEB参数 ——
														
 
															+    T_ceb_interval_h: float = 60.0  # 固定每 k 小时做一次CEB
														
 
															     v_ceb_m3: float = 30.0  # CEB用水体积（m^3）
														
 
															     t_ceb_s: float = 40 * 60.0  # CEB时长（s）
														
 
															-    phi_ceb: float = 1.0  # CEB去除比例（简化：完全恢复到TMP0）
														
 
															-
														
 
															-    # —— 约束与收敛 ——
														
 
															-    dTMP: float = 0.001  # 单次产水结束时，相对TMP0最大升幅（MPa）
														
 
															     # —— 搜索范围（秒） ——
														
 
															     L_min_s: float = 3800.0  # 过滤时长下限（s）
														
@@ -46,55 +60,115 @@ class UFParams:
 
															     t_bw_min_s: float = 40.0  # 物洗时长下限（s）
														
 
															     t_bw_max_s: float = 60.0  # 物洗时长上限（s）
														
 
															-    # —— 物理反洗恢复函数参数 ——
														
 
															-    phi_bw_min: float = 0.7  # 物洗去除比例最小值
														
 
															-    phi_bw_max: float = 1.0  # 物洗去除比例最大值
														
 
															-    L_ref_s: float = 4000.0  # 过滤时长影响时间尺度
														
 
															-    tau_bw_s: float = 20.0  # 物洗时长影响时间尺度
														
 
															-    gamma_t: float = 1.0  # 物洗时长作用指数
														
 
															-
														
 
															     # —— 网格 ——
														
 
															     L_step_s: float = 60.0  # 过滤时长步长（s）
														
 
															     t_bw_step_s: float = 5.0  # 物洗时长步长（s）
														
 
															-    # 多目标加权及高TMP惩罚
														
 
															-    w_rec: float = 0.8  # 回收率权重
														
 
															-    w_rate: float = 0.2  # 净供水率权重
														
 
															-    w_headroom: float = 0.2  # 贴边惩罚权重
														
 
															-    r_headroom: float = 2.0  # 贴边惩罚幂次
														
 
															-    headroom_hardcap: float = 0.98  # 超过此比例直接视为不可取
														
 
															+    # —— 奖励函数参数 ——
														
 
															+    k_rec = 5.0      # 回收率敏感度
														
 
															+    k_res = 10.0     # 残余污染敏感度
														
 
															+    rec_low, rec_high = 0.92, 0.99
														
 
															+    rr0 = 0.08
														
 
															-# ==== 加载模拟环境模型 ====
														
 
															-# 初始化模型
														
 
															-model_fp = TMPIncreaseModel()
														
 
															-model_bw = TMPDecreaseModel()
														
 
															-# 加载参数
														
 
															-model_fp.load_state_dict(torch.load("uf_fp.pth"))
														
 
															-model_bw.load_state_dict(torch.load("uf_bw.pth"))
														
 
															+# =======================
														
 
															+# 辅助函数：转换膜阻力与跨膜压差
														
 
															+# =======================
														
 
															-# 切换到推理模式
														
 
															-model_fp.eval()
														
 
															-model_bw.eval()
														
 
															+def xishan_viscosity(temp):
														
 
															+    # temp: 水温，单位摄氏度
														
 
															+    """
														
 
															+    锡山水厂 PLC水温校正因子经验公式（25摄氏度标准）
														
 
															+    返回温度修正后的水粘度（纯水修正），TODO：水厂水质与纯水相差较大，对粘度有一定影响
														
 
															+    """
														
 
															+    x = (temp + 273.15) / 300
														
 
															+    factor = 890 / (280.68 * x ** -1.9 + 511.45 * x ** -7.7 + 61.131 * x ** -19.6 + 0.45903 * x ** -40)
														
 
															+    mu = 0.00089 / factor
														
 
															+    return mu
														
 
															+
														
 
															+def _calculate_resistance(tmp, q_UF, temp):
														
 
															+    """
														
 
															+    计算超滤膜阻力 R = TMP / (J * μ)
														
 
															+    返回缩小1e10的膜阻力（超滤原膜阻力量级为1e12，过大的绝对值容易导致平稳拟合）
														
 
															+    """
														
 
															+    A = 128 * 40  # m²，有效膜面积
														
 
															+    mu = xishan_viscosity(temp) # 温度修正后的水粘度
														
 
															+    TMP_Pa = tmp * 1e6  # 跨膜压差 MPa -> Pa
														
 
															+    J = q_UF / A / 3600  # 通量 m³/h -> m³/(m²·s)
														
 
															+    if J <= 0 or mu <= 0:
														
 
															+        return np.nan
														
 
															+    R = TMP_Pa / (J * mu) / 1e10 # 缩放膜阻力
														
 
															+    return float(R)
														
 
															-def _delta_tmp(p, L_h: float) -> float:
														
 
															+def _calculate_tmp(R, q_UF, temp):
														
 
															     """
														
 
															-    过滤时段TMP上升量：调用 uf_fp.pth 模型
														
 
															+    还原超滤跨膜压差 TMP
														
 
															     """
														
 
															-    return model_fp(p, L_h)
														
 
															+    A = 128 * 40  # m²，有效膜面积
														
 
															+    mu = xishan_viscosity(temp) # 温度修正后的水粘度
														
 
															+    J = q_UF / A / 3600  # 通量 m³/h -> m³/(m²·s)
														
 
															+    TMP_Pa = R * J * mu * 1e10
														
 
															+    tmp = TMP_Pa / 1e6
														
 
															+
														
 
															+    return float(tmp)
														
 
															+
														
 
															+
														
 
															+# =======================
														
 
															+# 环境体模型加载函数
														
 
															+# =======================
														
 
															+def load_resistance_models():
														
 
															+    """加载阻力变化模型，仅在首次调用时执行"""
														
 
															+
														
 
															+    global resistance_model_fp, resistance_model_bw
														
 
															+
														
 
															+    # 如果全局模型已存在，则直接返回
														
 
															+    if "resistance_model_fp" in globals() and resistance_model_fp is not None:
														
 
															+        return resistance_model_fp, resistance_model_bw
														
 
															+
														
 
															+    print("🔄 Loading resistance models...")
														
 
															+
														
 
															+    # 初始化模型
														
 
															+    resistance_model_fp = ResistanceIncreaseModel()
														
 
															+    resistance_model_bw = ResistanceDecreaseModel()
														
 
															+
														
 
															+    # 取得当前脚本所在目录（即 rl_dqn_env.py 或 check_initial_state.py 同目录）
														
 
															+    base_dir = Path(__file__).resolve().parent
														
 
															+
														
 
															+    # 构造模型路径
														
 
															+    fp_path = base_dir / "resistance_model_fp.pth"
														
 
															+    bw_path = base_dir / "resistance_model_bw.pth"
														
 
															+
														
 
															+    # 检查文件存在性
														
 
															+    assert fp_path.exists(), f"缺少 {fp_path.name}"
														
 
															+    assert bw_path.exists(), f"缺少 {bw_path.name}"
														
 
															-def phi_bw_of(p, L_s: float, t_bw_s: float) -> float:
														
 
															+    # 加载权重
														
 
															+    resistance_model_fp.load_state_dict(torch.load(fp_path, map_location="cpu"))
														
 
															+    resistance_model_bw.load_state_dict(torch.load(bw_path, map_location="cpu"))
														
 
															+
														
 
															+    # 设置推理模式
														
 
															+    resistance_model_fp.eval()
														
 
															+    resistance_model_bw.eval()
														
 
															+
														
 
															+    print("✅ Resistance models loaded successfully from current directory.")
														
 
															+    return resistance_model_fp, resistance_model_bw
														
 
															+
														
 
															+
														
 
															+# =======================
														
 
															+# 环境体模型模拟函数
														
 
															+# =======================
														
 
															+def _delta_resistance(p, L_h: float) -> float:
														
 
															     """
														
 
															-    物洗去除比例：调用 uf_bw.pth 模型
														
 
															+    过滤时段膜阻力上升量：调用 resistance_model_fp.pth 模型
														
 
															     """
														
 
															-    return model_bw(p, L_s, t_bw_s)
														
 
															+    return resistance_model_fp(p, L_h)
														
 
															-def _tmp_after_ceb(p, L_s: float, t_bw_s: float) -> float:
														
 
															+def phi_bw_of(p, R0: float, R_end: float, L_h_start: float, L_h_next_start: float, t_bw_s: float) -> float:
														
 
															     """
														
 
															-    计算化学清洗(CEB)后的TMP，当前为恢复初始跨膜压差
														
 
															+    物理冲洗去除膜阻力值：调用 resistance_model_bw 模型
														
 
															     """
														
 
															-    return p.TMP0
														
 
															+    return resistance_model_bw(p, R0, R_end, L_h_start, L_h_next_start, t_bw_s)
														
 
															 def _v_bw_m3(p, t_bw_s: float) -> float:
														
 
															     """
														
@@ -104,139 +178,183 @@ def _v_bw_m3(p, t_bw_s: float) -> float:
 
															 def simulate_one_supercycle(p: UFParams, L_s: float, t_bw_s: float):
														
 
															     """
														
 
															-    返回 (是否可行, 指标字典)
														
 
															-    - 支持动态CEB次数：48h固定间隔
														
 
															-    - 增加日均产水时间和吨水电耗
														
 
															-    - 增加最小TMP记录
														
 
															+    模拟一个超级周期（多次物理反洗 + 一次化学反洗）
														
 
															+    返回: (info, next_params)
														
 
															     """
														
 
															     L_h = float(L_s) / 3600.0  # 小周期过滤时间(h)
														
 
															     tmp = p.TMP0
														
 
															+    R0 = _calculate_resistance(p.TMP0, p.q_UF, p.temp)
														
 
															     max_tmp_during_filtration = tmp
														
 
															-    min_tmp_during_filtration = tmp  # 新增：初始化最小TMP
														
 
															+    min_tmp_during_filtration = tmp
														
 
															     max_residual_increase = 0.0
														
 
															-    # 小周期总时长(h)
														
 
															     t_small_cycle_h = (L_s + t_bw_s) / 3600.0
														
 
															-
														
 
															-    # 计算超级周期内CEB次数
														
 
															     k_bw_per_ceb = int(np.floor(p.T_ceb_interval_h / t_small_cycle_h))
														
 
															     if k_bw_per_ceb < 1:
														
 
															-        k_bw_per_ceb = 1  # 至少一个小周期
														
 
															+        k_bw_per_ceb = 1
														
 
															-    # ton水电耗查表
														
 
															     energy_lookup = {
														
 
															         3600: 0.1034, 3660: 0.1031, 3720: 0.1029, 3780: 0.1026,
														
 
															         3840: 0.1023, 3900: 0.1021, 3960: 0.1019, 4020: 0.1017,
														
 
															         4080: 0.1015, 4140: 0.1012, 4200: 0.1011
														
 
															     }
														
 
															-    for _ in range(k_bw_per_ceb):
														
 
															+    # --- 循环模拟物理反洗 ---
														
 
															+    for idx in range(k_bw_per_ceb):
														
 
															         tmp_run_start = tmp
														
 
															+        q_UF = p.q_UF
														
 
															+        temp = p.temp
														
 
															-        # 过滤阶段TMP增长
														
 
															-        dtmp = _delta_tmp(p, L_h)
														
 
															-        tmp_peak = tmp_run_start + dtmp
														
 
															-
														
 
															-        # 约束1：峰值不得超过硬上限
														
 
															-        if tmp_peak > p.TMP_max + 1e-12:
														
 
															-            return False, {"reason": "TMP_max violated during filtration", "TMP_peak": tmp_peak}
														
 
															+        R_run_start = _calculate_resistance(tmp_run_start, q_UF, temp)
														
 
															+        d_R = _delta_resistance(p, L_s)
														
 
															+        R_peak = R_run_start + d_R
														
 
															+        tmp_peak = _calculate_tmp(R_peak, q_UF, temp)
														
 
															-        # 更新最大和最小TMP
														
 
															-        if tmp_peak > max_tmp_during_filtration:
														
 
															-            max_tmp_during_filtration = tmp_peak
														
 
															-        if tmp_run_start < min_tmp_during_filtration:  # 新增：记录运行开始时的最小TMP
														
 
															-            min_tmp_during_filtration = tmp_run_start
														
 
															+        max_tmp_during_filtration = max(max_tmp_during_filtration, tmp_peak)
														
 
															+        min_tmp_during_filtration = min(min_tmp_during_filtration, tmp_run_start)
														
 
															-        # 物理反洗
														
 
															-        phi = phi_bw_of(p, L_s, t_bw_s)
														
 
															-        tmp_after_bw = tmp_peak - phi * (tmp_peak - tmp_run_start)
														
 
															+        # 物洗膜阻力减小
														
 
															+        L_h_start = (L_s + t_bw_s) / 3600.0 * idx
														
 
															+        L_h_next_start = (L_s + t_bw_s) / 3600.0 * (idx + 1)
														
 
															+        reversible_R = phi_bw_of(p, R_run_start, R_peak, L_h_start, L_h_next_start, t_bw_s)
														
 
															+        R_after_bw = R_peak - reversible_R
														
 
															+        tmp_after_bw = _calculate_tmp(R_after_bw, q_UF, temp)
														
 
															-        # 约束2：单次残余增量控制
														
 
															         residual_inc = tmp_after_bw - tmp_run_start
														
 
															-        if residual_inc > p.dTMP + 1e-12:
														
 
															-            return False, {
														
 
															-                "reason": "residual TMP increase after BW exceeded dTMP",
														
 
															-                "residual_increase": residual_inc,
														
 
															-                "limit_dTMP": p.dTMP
														
 
															-            }
														
 
															-        if residual_inc > max_residual_increase:
														
 
															-            max_residual_increase = residual_inc
														
 
															+        max_residual_increase = max(max_residual_increase, residual_inc)
														
 
															         tmp = tmp_after_bw
														
 
															-    # CEB
														
 
															-    tmp_after_ceb = p.TMP0
														
 
															+    # --- CEB反洗 ---
														
 
															+    R_after_ceb = R_peak - p.ceb_removal
														
 
															+    tmp_after_ceb = _calculate_tmp(R_after_ceb, q_UF, temp)
														
 
															-    # 体积与回收率
														
 
															+    # ============================================================
														
 
															+    # 生成本周期指标
														
 
															+    # ============================================================
														
 
															+
														
 
															+    # --- 体积与能耗 ---
														
 
															     V_feed_super = k_bw_per_ceb * p.q_UF * L_h
														
 
															     V_loss_super = k_bw_per_ceb * _v_bw_m3(p, t_bw_s) + p.v_ceb_m3
														
 
															     V_net = max(0.0, V_feed_super - V_loss_super)
														
 
															     recovery = max(0.0, V_net / max(V_feed_super, 1e-12))
														
 
															-    # 时间与净供水率
														
 
															     T_super_h = k_bw_per_ceb * (L_s + t_bw_s) / 3600.0 + p.t_ceb_s / 3600.0
														
 
															-    net_delivery_rate_m3ph = V_net / max(T_super_h, 1e-12)
														
 
															-
														
 
															-    # 贴边比例与硬限
														
 
															-    headroom_ratio = max_tmp_during_filtration / max(p.TMP_max, 1e-12)
														
 
															-    if headroom_ratio > p.headroom_hardcap + 1e-12:
														
 
															-        return False, {"reason": "headroom hardcap exceeded", "headroom_ratio": headroom_ratio}
														
 
															-
														
 
															-    # —— 新增指标 1：日均产水时间（h/d） ——
														
 
															     daily_prod_time_h = k_bw_per_ceb * L_h / T_super_h * 24.0
														
 
															-    # —— 新增指标 2：吨水电耗（kWh/m³） ——
														
 
															     closest_L = min(energy_lookup.keys(), key=lambda x: abs(x - L_s))
														
 
															-    ton_water_energy = energy_lookup[closest_L]
														
 
															+    ton_water_energy = energy_lookup[closest_L] #TODO:需确认新过滤时间范围下的吨水电耗
														
 
															+    # --- 信息输出 ---
														
 
															     info = {
														
 
															+        "q_UF": p.q_UF,
														
 
															+        "temp": p.temp,
														
 
															         "recovery": recovery,
														
 
															         "V_feed_super_m3": V_feed_super,
														
 
															         "V_loss_super_m3": V_loss_super,
														
 
															         "V_net_super_m3": V_net,
														
 
															         "supercycle_time_h": T_super_h,
														
 
															-        "net_delivery_rate_m3ph": net_delivery_rate_m3ph,
														
 
															         "max_TMP_during_filtration": max_tmp_during_filtration,
														
 
															-        "min_TMP_during_filtration": min_tmp_during_filtration,  # 新增：最小TMP
														
 
															+        "min_TMP_during_filtration": min_tmp_during_filtration,
														
 
															+        "global_TMP_limit":p.global_TMP_limit,
														
 
															         "max_residual_increase_per_run": max_residual_increase,
														
 
															-        "phi_bw_effective": phi,
														
 
															+        "R0": R0,
														
 
															+        "R_after_ceb": R_after_ceb,
														
 
															+        "TMP0":p.TMP0,
														
 
															         "TMP_after_ceb": tmp_after_ceb,
														
 
															-        "headroom_ratio": headroom_ratio,
														
 
															         "daily_prod_time_h": daily_prod_time_h,
														
 
															         "ton_water_energy_kWh_per_m3": ton_water_energy,
														
 
															         "k_bw_per_ceb": k_bw_per_ceb
														
 
															     }
														
 
															-    return True, info
														
 
															+    # ============================================================
														
 
															+    # 状态更新：生成 next_params（新状态）
														
 
															+    # ============================================================
														
 
															+
														
 
															+    next_params = copy.deepcopy(p)
														
 
															+
														
 
															+    # 更新跨膜压差（TMP）
														
 
															+    next_params.TMP0 = tmp_after_ceb
														
 
															-def _score(p: UFParams, rec: dict) -> float:
														
 
															-    """综合评分：越大越好。通过非线性放大奖励差异，强化区分好坏动作"""
														
 
															+    # 可选参数（当前保持不变，未来可扩展更新逻辑）
														
 
															+    next_params.slope = p.slope
														
 
															+    next_params.power = p.power
														
 
															+    next_params.ceb_removal = p.ceb_removal
														
 
															+    next_params.nuK = p.nuK
														
 
															+    next_params.q_UF = p.q_UF
														
 
															+    next_params.temp = p.temp
														
 
															-    # —— 无量纲化净供水率 ——
														
 
															-    rate_norm = rec["net_delivery_rate_m3ph"] / max(p.q_UF, 1e-12)
														
 
															-    # —— TMP soft penalty (sigmoid) ——
														
 
															-    tmp_ratio = rec["max_TMP_during_filtration"] / max(p.TMP_max, 1e-12)
														
 
															-    k = 10.0
														
 
															-    headroom_penalty = 1.0 / (1.0 + np.exp(-k * (tmp_ratio - 1.0)))
														
 
															+    return info, next_params
														
 
															-    # —— 基础 reward（0.6~0.9左右）——
														
 
															-    base_reward = (
														
 
															-        p.w_rec * rec["recovery"]
														
 
															-        + p.w_rate * rate_norm
														
 
															-        - p.w_headroom * headroom_penalty
														
 
															-    )
														
 
															+def calculate_reward(p: UFParams, info: dict) -> float:
														
 
															+    """
														
 
															+    TMP不参与奖励计算，仅考虑回收率与残余污染比例之间的权衡。
														
 
															+    满足：
														
 
															+      - 当 recovery=0.97, residual_ratio=0.1 → reward = 0
														
 
															+      - 当 recovery=0.90, residual_ratio=0.0 → reward = 0
														
 
															+      - 在两者之间平衡（如 recovery≈0.94, residual_ratio≈0.05）→ reward > 0
														
 
															+    """
														
 
															+    recovery = info["recovery"]
														
 
															+    residual_ratio = (info["R_after_ceb"] - info["R0"]) / info["R0"]
														
 
															+
														
 
															+    # 回收率奖励（在 [rec_low, rec_high] 内平滑上升）
														
 
															+    rec_norm = (recovery - p.rec_low) / (p.rec_high - p.rec_low)
														
 
															+    rec_reward = np.clip(np.tanh(p.k_rec * (rec_norm - 0.5)), -1, 1)
														
 
															+
														
 
															+    # 残余比惩罚（超过rr0时快速变为负值）
														
 
															+    res_penalty = -np.tanh(p.k_res * (residual_ratio / p.rr0 - 1))
														
 
															+
														
 
															+    # 组合逻辑：权衡二者
														
 
															+    total_reward = rec_reward + res_penalty
														
 
															+
														
 
															+    # 再平移使指定点为零：
														
 
															+    # recovery=0.97, residual=0.1 → 0
														
 
															+    # recovery=0.90, residual=0.0 → 0
														
 
															+    # 经验上，这两点几乎对称，因此无需额外线性偏移
														
 
															+    # 若希望严格归零，可用线性校正：
														
 
															+    total_reward -= 0.0
														
 
															-    # —— 非线性放大：平方映射 + 缩放 ——
														
 
															-    # 目的是放大好坏动作差异，同时限制最大值，避免 TD-error 过大
														
 
															-    amplified_reward = (base_reward - 0.5) ** 2 * 5.0
														
 
															+    return total_reward
														
 
															+
														
 
															+
														
 
															+
														
 
															+def is_dead_cycle(info: dict) -> bool:
														
 
															+    """
														
 
															+    判断当前循环是否为成功循环（True）或失败循环（False）
														
 
															+    失败条件：
														
 
															+    1. 最大TMP超过设定上限；
														
 
															+    2. 回收率低于75%；
														
 
															+    3. 化学反冲洗后膜阻力上升超过10%。
														
 
															+
														
 
															+    参数：
														
 
															+        info: dict
														
 
															+            simulate_one_supercycle() 返回的指标字典，需包含：
														
 
															+            - max_TMP_during_filtration
														
 
															+            - recovery
														
 
															+            - R_after_ceb
														
 
															+            - R_run_start
														
 
															+            - TMP_limit（如果有定义）
														
 
															+    返回：
														
 
															+        bool: True 表示成功循环，False 表示失败循环。
														
 
															+    """
														
 
															+    TMP_limit = info.get("global_TMP_limit", 0.08)  # 默认硬约束上限
														
 
															+    max_tmp = info.get("max_TMP_during_filtration", 0)
														
 
															+    recovery = info.get("recovery", 1.0)
														
 
															+    R_after_ceb = info.get("R_after_ceb", 0)
														
 
															+    R0 = info.get("R0", 1e-6)
														
 
															-    # —— 可选：保留符号，区分负奖励
														
 
															-    if base_reward < 0.5:
														
 
															-        amplified_reward = -amplified_reward
														
 
															+    # 判断条件
														
 
															+    if max_tmp > TMP_limit:
														
 
															+        return False
														
 
															+    if recovery < 0.75:
														
 
															+        return False
														
 
															+    if (R_after_ceb - R0) / R0 > 0.1:
														
 
															+        return False
														
 
															+
														
 
															+    return True
														
 
															-    return amplified_reward
														
 
															 class UFSuperCycleEnv(gym.Env):
														
@@ -244,7 +362,7 @@ class UFSuperCycleEnv(gym.Env):
 
															     metadata = {"render_modes": ["human"]}
														
 
															-    def __init__(self, base_params, max_episode_steps: int = 20):
														
 
															+    def __init__(self, base_params, resistance_models=None, max_episode_steps: int = 15):
														
 
															         super(UFSuperCycleEnv, self).__init__()
														
 
															         self.base_params = base_params
														
@@ -252,10 +370,15 @@ class UFSuperCycleEnv(gym.Env):
 
															         self.max_episode_steps = max_episode_steps
														
 
															         self.current_step = 0
														
 
															+        if resistance_models is None:
														
 
															+            self.resistance_model_fp, self.resistance_model_bw = load_resistance_models()
														
 
															+        else:
														
 
															+            self.resistance_model_fp, self.resistance_model_bw = resistance_models
														
 
															+
														
 
															         # 计算离散动作空间
														
 
															         self.L_values = np.arange(
														
 
															             self.base_params.L_min_s,
														
 
															-            self.base_params.L_max_s + self.base_params.L_step_s,
														
 
															+            self.base_params.L_max_s,
														
 
															             self.base_params.L_step_s
														
 
															         )
														
 
															         self.t_bw_values = np.arange(
														
@@ -270,44 +393,180 @@ class UFSuperCycleEnv(gym.Env):
 
															         # 单一离散动作空间
														
 
															         self.action_space = spaces.Discrete(self.num_L * self.num_bw)
														
 
															-        # 状态空间增加 TMP0, 上一次动作(L_s, t_bw_s), 本周期最高 TMP
														
 
															-        # 状态归一化均在 _get_obs 内处理
														
 
															+        # 状态空间，归一化在 _get_obs 中处理
														
 
															         self.observation_space = spaces.Box(
														
 
															-            low=np.zeros(4, dtype=np.float32),
														
 
															-            high=np.ones(4, dtype=np.float32),
														
 
															+            low=np.zeros(8, dtype=np.float32),
														
 
															+            high=np.ones(8, dtype=np.float32),
														
 
															             dtype=np.float32
														
 
															         )
														
 
															-        # 初始化状态
														
 
															-        self.last_action = (self.base_params.L_min_s, self.base_params.t_bw_min_s)
														
 
															-        self.max_TMP_during_filtration = self.current_params.TMP0
														
 
															+        # 初始化环境
														
 
															         self.reset(seed=None)
														
 
															-    def _get_obs(self):
														
 
															-        TMP0 = self.current_params.TMP0
														
 
															-        TMP0_norm = (TMP0 - 0.01) / (0.05 - 0.01)
														
 
															-
														
 
															-        L_s, t_bw_s = self.last_action
														
 
															-        L_norm = (L_s - self.base_params.L_min_s) / (self.base_params.L_max_s - self.base_params.L_min_s)
														
 
															-        t_bw_norm = (t_bw_s - self.base_params.t_bw_min_s) / (self.base_params.t_bw_max_s - self.base_params.t_bw_min_s)
														
 
															+    def generate_initial_state(self):
														
 
															+        """
														
 
															+        随机生成一个初始状态，不进行死状态判断
														
 
															+        """
														
 
															+        self.current_params.TMP0 = np.random.uniform(
														
 
															+            self.current_params.TMP0_min, self.current_params.TMP0_max
														
 
															+        )
														
 
															+        self.current_params.q_UF = np.random.uniform(
														
 
															+            self.current_params.q_UF_min, self.current_params.q_UF_max
														
 
															+        )
														
 
															+        self.current_params.temp = np.random.uniform(
														
 
															+            self.current_params.temp_min, self.current_params.temp_max
														
 
															+        )
														
 
															-        max_TMP_norm = (self.max_TMP_during_filtration - 0.01) / (0.05 - 0.01)
														
 
															+        self.current_params.R0 = _calculate_resistance(
														
 
															+            self.current_params.TMP0,
														
 
															+            self.current_params.q_UF,
														
 
															+            self.current_params.temp
														
 
															+        )
														
 
															-        return np.array([TMP0_norm, L_norm, t_bw_norm, max_TMP_norm], dtype=np.float32)
														
 
															+        self.current_params.nuK = np.random.uniform(
														
 
															+            self.current_params.nuK_min, self.current_params.nuK_max
														
 
															+        )
														
 
															+        self.current_params.slope = np.random.uniform(
														
 
															+            self.current_params.slope_min, self.current_params.slope_max
														
 
															+        )
														
 
															+        self.current_params.power = np.random.uniform(
														
 
															+            self.current_params.power_min, self.current_params.power_max
														
 
															+        )
														
 
															+        self.current_params.ceb_removal = np.random.uniform(
														
 
															+            self.current_params.ceb_removal_min, self.current_params.ceb_removal_max
														
 
															+        )
														
 
															-    def _get_action_values(self, action):
														
 
															-        L_idx = action // self.num_bw
														
 
															-        t_bw_idx = action % self.num_bw
														
 
															-        return self.L_values[L_idx], self.t_bw_values[t_bw_idx]
														
 
															+        return self._get_state_copy()
														
 
															-    def reset(self, seed=None, options=None):
														
 
															+    def reset(self, seed=None, options=None, max_attempts: int = 200):
														
 
															         super().reset(seed=seed)
														
 
															-        self.current_params.TMP0 = np.random.uniform(0.01, 0.03)
														
 
															+
														
 
															+        attempts = 0
														
 
															+        while attempts < max_attempts:
														
 
															+            attempts += 1
														
 
															+            self.generate_initial_state()  # 生成随机初始状态
														
 
															+            if self.check_dead_initial_state(max_steps=getattr(self, "max_episode_steps", 15),
														
 
															+                                             L_s=3800, t_bw_s=60):
														
 
															+                # True 表示可行，退出循环
														
 
															+                break
														
 
															+        else:
														
 
															+            # 超过最大尝试次数仍未生成可行状态
														
 
															+            raise RuntimeError(f"在 {max_attempts} 次尝试后仍无法生成可行初始状态。")
														
 
															+
														
 
															+        # 初始化步数、动作、最大 TMP
														
 
															         self.current_step = 0
														
 
															         self.last_action = (self.base_params.L_min_s, self.base_params.t_bw_min_s)
														
 
															         self.max_TMP_during_filtration = self.current_params.TMP0
														
 
															+
														
 
															         return self._get_obs(), {}
														
 
															+    def check_dead_initial_state(self, max_steps: int = None,
														
 
															+                                 L_s: int = 4900, t_bw_s: int = 50) -> bool:
														
 
															+        """
														
 
															+        判断当前环境生成的初始状态是否为可行（non-dead）。
														
 
															+        使用最保守策略连续模拟 max_steps 次：
														
 
															+            若任意一次 is_dead_cycle(info) 返回 False，则视为必死状态。
														
 
															+
														
 
															+        参数：
														
 
															+            max_steps: 模拟步数，默认使用 self.max_episode_steps
														
 
															+            L_s: 过滤时长(s)，默认 3800
														
 
															+            t_bw_s: 物理反洗时长(s)，默认 60
														
 
															+
														
 
															+        返回：
														
 
															+            bool: True 表示可行状态（non-dead），False 表示必死状态
														
 
															+        """
														
 
															+        if max_steps is None:
														
 
															+            max_steps = getattr(self, "max_episode_steps", 15)
														
 
															+
														
 
															+        # 生成初始状态
														
 
															+        self.generate_initial_state()
														
 
															+        if not hasattr(self, "current_params"):
														
 
															+            raise AttributeError("generate_initial_state() 未设置 current_params。")
														
 
															+
														
 
															+        import copy
														
 
															+        curr_p = copy.deepcopy(self.current_params)
														
 
															+
														
 
															+        # 逐步模拟
														
 
															+        for step in range(max_steps):
														
 
															+            try:
														
 
															+                info, next_params = simulate_one_supercycle(curr_p, L_s, t_bw_s)
														
 
															+            except Exception:
														
 
															+                # 异常即视为不可行
														
 
															+                return False
														
 
															+
														
 
															+            if not is_dead_cycle(info):
														
 
															+                # 任意一次失败即为必死状态
														
 
															+                return False
														
 
															+
														
 
															+            curr_p = next_params
														
 
															+
														
 
															+        return True
														
 
															+
														
 
															+    def _get_state_copy(self):
														
 
															+        return copy.deepcopy(self.current_params)
														
 
															+
														
 
															+    def _get_obs(self):
														
 
															+        """
														
 
															+        构建当前环境归一化状态向量
														
 
															+        """
														
 
															+        # === 1. 从 current_params 读取动态参数 ===
														
 
															+        TMP0 = self.current_params.TMP0
														
 
															+        q_UF = self.current_params.q_UF
														
 
															+        temp = self.current_params.temp
														
 
															+
														
 
															+        # === 2. 计算本周期初始膜阻力 ===
														
 
															+        R0 = _calculate_resistance(TMP0, q_UF, temp)
														
 
															+
														
 
															+        # === 3. 从 current_params 读取膜阻力增长模型参数 ===
														
 
															+        nuk = self.current_params.nuK
														
 
															+        slope = self.current_params.slope
														
 
															+        power = self.current_params.power
														
 
															+        ceb_removal = self.current_params.ceb_removal
														
 
															+
														
 
															+        # === 4. 从 current_params 动态读取上下限 ===
														
 
															+        TMP0_min, TMP0_max = self.current_params.TMP0_min, self.current_params.TMP0_max
														
 
															+        q_UF_min, q_UF_max = self.current_params.q_UF_min, self.current_params.q_UF_max
														
 
															+        temp_min, temp_max = self.current_params.temp_min, self.current_params.temp_max
														
 
															+        nuK_min, nuK_max = self.current_params.nuK_min, self.current_params.nuK_max
														
 
															+        slope_min, slope_max = self.current_params.slope_min, self.current_params.slope_max
														
 
															+        power_min, power_max = self.current_params.power_min, self.current_params.power_max
														
 
															+        ceb_min, ceb_max = self.current_params.ceb_removal_min, self.current_params.ceb_removal_max
														
 
															+
														
 
															+        # === 5. 归一化计算（clip防止越界） ===
														
 
															+        TMP0_norm = np.clip((TMP0 - TMP0_min) / (TMP0_max - TMP0_min), 0, 1)
														
 
															+        q_UF_norm = np.clip((q_UF - q_UF_min) / (q_UF_max - q_UF_min), 0, 1)
														
 
															+        temp_norm = np.clip((temp - temp_min) / (temp_max - temp_min), 0, 1)
														
 
															+
														
 
															+        # R0 不在 current_params 中定义上下限，设定经验范围
														
 
															+        R0_norm = np.clip((R0 - 100.0) / (800.0 - 100.0), 0, 1)
														
 
															+
														
 
															+        short_term_norm = np.clip((nuk - nuK_min) / (nuK_max - nuK_min), 0, 1)
														
 
															+        long_term_slope_norm = np.clip((slope - slope_min) / (slope_max - slope_min), 0, 1)
														
 
															+        long_term_power_norm = np.clip((power - power_min) / (power_max - power_min), 0, 1)
														
 
															+        ceb_removal_norm = np.clip((ceb_removal - ceb_min) / (ceb_max - ceb_min), 0, 1)
														
 
															+
														
 
															+        # === 6. 构建观测向量 ===
														
 
															+        obs = np.array([
														
 
															+            TMP0_norm,
														
 
															+            q_UF_norm,
														
 
															+            temp_norm,
														
 
															+            R0_norm,
														
 
															+            short_term_norm,
														
 
															+            long_term_slope_norm,
														
 
															+            long_term_power_norm,
														
 
															+            ceb_removal_norm
														
 
															+        ], dtype=np.float32)
														
 
															+
														
 
															+        return obs
														
 
															+
														
 
															+    def _get_action_values(self, action):
														
 
															+        """
														
 
															+        将动作还原为实际时长
														
 
															+        """
														
 
															+        L_idx = action // self.num_bw
														
 
															+        t_bw_idx = action % self.num_bw
														
 
															+        return self.L_values[L_idx], self.t_bw_values[t_bw_idx]
														
 
															+
														
 
															     def step(self, action):
														
 
															         self.current_step += 1
														
 
															         L_s, t_bw_s = self._get_action_values(action)
														
@@ -315,15 +574,16 @@ class UFSuperCycleEnv(gym.Env):
 
															         t_bw_s = np.clip(t_bw_s, self.base_params.t_bw_min_s, self.base_params.t_bw_max_s)
														
 
															         # 模拟超级周期
														
 
															-        feasible, info = simulate_one_supercycle(self.current_params, L_s, t_bw_s)
														
 
															+        info, next_params = simulate_one_supercycle(self.current_params, L_s, t_bw_s)
														
 
															+        # 根据 info 判断是否成功
														
 
															+        feasible = is_dead_cycle(info)  # True 表示成功循环，False 表示失败
														
 
															         if feasible:
														
 
															-            reward = _score(self.current_params, info)
														
 
															-            self.current_params.TMP0 = info["TMP_after_ceb"]
														
 
															-            self.max_TMP_during_filtration = info["max_TMP_during_filtration"]
														
 
															+            reward = calculate_reward(self.current_params, info)
														
 
															+            self.current_params = next_params
														
 
															             terminated = False
														
 
															         else:
														
 
															-            reward = -20
														
 
															+            reward = -10
														
 
															             terminated = True
														
 
															         truncated = self.current_step >= self.max_episode_steps
														
@@ -337,4 +597,3 @@ class UFSuperCycleEnv(gym.Env):
 
															-
														
--- a/models/uf-rl/超滤训练源码/DQN_train.py
+++ b/models/uf-rl/超滤训练源码/DQN_train.py
@@ -3,9 +3,6 @@ import time
 
															 import random
														
 
															 import numpy as np
														
 
															 import torch
														
 
															-
														
 
															-import gymnasium as gym
														
 
															-from gymnasium import spaces
														
 
															 from stable_baselines3 import DQN
														
 
															 from stable_baselines3.common.monitor import Monitor
														
 
															 from stable_baselines3.common.vec_env import DummyVecEnv
														
@@ -24,10 +21,10 @@ class DQNParams:
 
															     learning_rate: float = 1e-4
														
 
															     # 经验回放缓冲区大小（步数）
														
 
															-    buffer_size: int = 10000
														
 
															+    buffer_size: int = 100000
														
 
															     # 学习开始前需要收集的步数
														
 
															-    learning_starts: int = 200
														
 
															+    learning_starts: int = 10000
														
 
															     # 每次从经验池中采样的样本数量
														
 
															     batch_size: int = 32
														
@@ -39,7 +36,10 @@ class DQNParams:
 
															     train_freq: int = 4
														
 
															     # 目标网络更新间隔
														
 
															-    target_update_interval: int = 2000
														
 
															+    target_update_interval: int = 1
														
 
															+
														
 
															+    # 软更新系数
														
 
															+    tau: float = 0.005
														
 
															     # 初始探索率 ε
														
 
															     exploration_initial_eps: float = 1.0
														
@@ -240,5 +240,5 @@ if __name__ == "__main__":
 
															     # 训练RL代理
														
 
															     print("开始训练RL代理...")
														
 
															-    train_uf_rl_agent(params, total_timesteps=50000)
														
 
															+    train_uf_rl_agent(params, total_timesteps=150000)
														
--- a/models/uf-rl/超滤训练源码/UF_decide.py
+++ b/models/uf-rl/超滤训练源码/UF_decide.py
@@ -1,405 +0,0 @@
 
															-# UF_decide.py
														
 
															-from dataclasses import dataclass
														
 
															-import numpy as np
														
 
															-
														
 
															-@dataclass
														
 
															-class UFParams:
														
 
															-    # —— 膜与运行参数 ——
														
 
															-    q_UF: float = 360.0           # 过滤进水流量（m^3/h）
														
 
															-    TMP0: float = 0.03            # 初始TMP（MPa）
														
 
															-    TMP_max: float = 0.06         # TMP硬上限（MPa）
														
 
															-
														
 
															-    # —— 膜污染动力学 ——
														
 
															-    alpha: float = 1e-6           # TMP增长系数
														
 
															-    belta: float = 1.1            # 幂指数
														
 
															-
														
 
															-    # —— 反洗参数（固定） ——
														
 
															-    q_bw_m3ph: float = 1000.0     # 物理反洗流量（m^3/h）
														
 
															-
														
 
															-    # —— CEB参数（固定） ——
														
 
															-    T_ceb_interval_h: float = 48.0  # 固定每 k 小时做一次CEB
														
 
															-    v_ceb_m3: float = 30.0        # CEB用水体积（m^3）
														
 
															-    t_ceb_s: float = 40 * 60.0    # CEB时长（s）
														
 
															-    phi_ceb: float = 1.0          # CEB去除比例（简化：完全恢复到TMP0）
														
 
															-
														
 
															-    # —— 约束与收敛 ——
														
 
															-    dTMP: float = 0.0005          # 单次产水结束时，相对TMP0最大升幅（MPa）
														
 
															-
														
 
															-    # —— 搜索范围（秒） ——
														
 
															-    L_min_s: float = 3600.0       # 过滤时长下限（s）
														
 
															-    L_max_s: float = 4200.0       # 过滤时长上限（s）
														
 
															-    t_bw_min_s: float = 40.0      # 物洗时长下限（s）
														
 
															-    t_bw_max_s: float = 60.0      # 物洗时长上限（s）
														
 
															-
														
 
															-    # —— 物理反洗恢复函数参数 ——
														
 
															-    phi_bw_min: float = 0.7       # 物洗去除比例最小值
														
 
															-    phi_bw_max: float = 1.0       # 物洗去除比例最大值
														
 
															-    L_ref_s: float = 4000.0       # 过滤时长影响时间尺度
														
 
															-    tau_bw_s: float = 30.0        # 物洗时长影响时间尺度
														
 
															-    gamma_t: float = 1.0          # 物洗时长作用指数
														
 
															-    
														
 
															-    # —— 网格 ——
														
 
															-    L_step_s: float = 60.0        # 过滤时长步长（s）
														
 
															-    t_bw_step_s: float = 5.0      # 物洗时长步长（s）
														
 
															-
														
 
															-    # 多目标加权及高TMP惩罚
														
 
															-    w_rec: float = 0.8            # 回收率权重
														
 
															-    w_rate: float = 0.2           # 净供水率权重
														
 
															-    w_headroom: float = 0.3       # 贴边惩罚权重
														
 
															-    r_headroom: float = 2.0       # 贴边惩罚幂次
														
 
															-    headroom_hardcap: float = 0.98 # 超过此比例直接视为不可取
														
 
															-
														
 
															-def _delta_tmp(p: UFParams, L_h: float) -> float:
														
 
															-    # 过滤时段TMP上升量
														
 
															-    return float(p.alpha * (p.q_UF ** p.belta) * L_h)
														
 
															-
														
 
															-def _v_bw_m3(p: UFParams, t_bw_s: float) -> float:
														
 
															-    # 物理反洗水耗
														
 
															-    return float(p.q_bw_m3ph * (float(t_bw_s) / 3600.0))
														
 
															-
														
 
															-def phi_bw_of(p: UFParams, L_s: float, t_bw_s: float) -> float:
														
 
															-    # 物洗去除比例：随过滤时长增长上界收缩，随物洗时长增长趋饱和
														
 
															-    L = max(float(L_s), 1.0)
														
 
															-    t = max(float(t_bw_s), 1e-6)
														
 
															-    upper_L = p.phi_bw_min + (p.phi_bw_max - p.phi_bw_min) * np.exp(- L / p.L_ref_s)
														
 
															-    time_gain = 1.0 - np.exp(- (t / p.tau_bw_s) ** p.gamma_t)
														
 
															-    phi = upper_L * time_gain
														
 
															-    return float(np.clip(phi, 0.0, 0.999))
														
 
															-
														
 
															-def simulate_one_supercycle(p: UFParams, L_s: float, t_bw_s: float):
														
 
															-    """
														
 
															-    返回 (是否可行, 指标字典)
														
 
															-    - 支持动态CEB次数：48h固定间隔
														
 
															-    - 增加日均产水时间和吨水电耗
														
 
															-    """
														
 
															-    L_h = float(L_s) / 3600.0  # 小周期过滤时间(h)
														
 
															-
														
 
															-    tmp = p.TMP0
														
 
															-    max_tmp_during_filtration = tmp
														
 
															-    max_residual_increase = 0.0
														
 
															-
														
 
															-    # 小周期总时长(h)
														
 
															-    t_small_cycle_h = (L_s + t_bw_s) / 3600.0
														
 
															-
														
 
															-    # 计算超级周期内CEB次数
														
 
															-    k_bw_per_ceb = int(np.floor(p.T_ceb_interval_h / t_small_cycle_h))
														
 
															-    if k_bw_per_ceb < 1:
														
 
															-        k_bw_per_ceb = 1  # 至少一个小周期
														
 
															-
														
 
															-    # ton水电耗查表
														
 
															-    energy_lookup = {
														
 
															-        3600: 0.1034, 3660: 0.1031, 3720: 0.1029, 3780: 0.1026,
														
 
															-        3840: 0.1023, 3900: 0.1021, 3960: 0.1019, 4020: 0.1017,
														
 
															-        4080: 0.1015, 4140: 0.1012, 4200: 0.1011
														
 
															-    }
														
 
															-
														
 
															-    for _ in range(k_bw_per_ceb):
														
 
															-        tmp_run_start = tmp
														
 
															-
														
 
															-        # 过滤阶段TMP增长
														
 
															-        dtmp = _delta_tmp(p, L_h)
														
 
															-        tmp_peak = tmp_run_start + dtmp
														
 
															-
														
 
															-        # 约束1：峰值不得超过硬上限
														
 
															-        if tmp_peak > p.TMP_max + 1e-12:
														
 
															-            return False, {"reason": "TMP_max violated during filtration", "TMP_peak": tmp_peak}
														
 
															-
														
 
															-        if tmp_peak > max_tmp_during_filtration:
														
 
															-            max_tmp_during_filtration = tmp_peak
														
 
															-
														
 
															-        # 物理反洗
														
 
															-        phi = phi_bw_of(p, L_s, t_bw_s)
														
 
															-        tmp_after_bw = tmp_peak - phi * (tmp_peak - tmp_run_start)
														
 
															-
														
 
															-        # 约束2：单次残余增量控制
														
 
															-        residual_inc = tmp_after_bw - tmp_run_start
														
 
															-        if residual_inc > p.dTMP + 1e-12:
														
 
															-            return False, {
														
 
															-                "reason": "residual TMP increase after BW exceeded dTMP",
														
 
															-                "residual_increase": residual_inc,
														
 
															-                "limit_dTMP": p.dTMP
														
 
															-            }
														
 
															-        if residual_inc > max_residual_increase:
														
 
															-            max_residual_increase = residual_inc
														
 
															-
														
 
															-        tmp = tmp_after_bw
														
 
															-
														
 
															-    # CEB
														
 
															-    tmp_after_ceb = p.TMP0
														
 
															-
														
 
															-    # 体积与回收率
														
 
															-    V_feed_super = k_bw_per_ceb * p.q_UF * L_h
														
 
															-    V_loss_super = k_bw_per_ceb * _v_bw_m3(p, t_bw_s) + p.v_ceb_m3
														
 
															-    V_net = max(0.0, V_feed_super - V_loss_super)
														
 
															-    recovery = max(0.0, V_net / max(V_feed_super, 1e-12))
														
 
															-
														
 
															-    # 时间与净供水率
														
 
															-    T_super_h = k_bw_per_ceb * (L_s + t_bw_s) / 3600.0 + p.t_ceb_s / 3600.0
														
 
															-    net_delivery_rate_m3ph = V_net / max(T_super_h, 1e-12)
														
 
															-
														
 
															-    # 贴边比例与硬限
														
 
															-    headroom_ratio = max_tmp_during_filtration / max(p.TMP_max, 1e-12)
														
 
															-    if headroom_ratio > p.headroom_hardcap + 1e-12:
														
 
															-        return False, {"reason": "headroom hardcap exceeded", "headroom_ratio": headroom_ratio}
														
 
															-
														
 
															-    # —— 新增指标 1：日均产水时间（h/d） ——
														
 
															-    daily_prod_time_h = k_bw_per_ceb * L_h / T_super_h * 24.0
														
 
															-
														
 
															-    # —— 新增指标 2：吨水电耗（kWh/m³） ——
														
 
															-    closest_L = min(energy_lookup.keys(), key=lambda x: abs(x - L_s))
														
 
															-    ton_water_energy = energy_lookup[closest_L]
														
 
															-
														
 
															-    info = {
														
 
															-        "recovery": recovery,
														
 
															-        "V_feed_super_m3": V_feed_super,
														
 
															-        "V_loss_super_m3": V_loss_super,
														
 
															-        "V_net_super_m3": V_net,
														
 
															-        "supercycle_time_h": T_super_h,
														
 
															-        "net_delivery_rate_m3ph": net_delivery_rate_m3ph,
														
 
															-        "max_TMP_during_filtration": max_tmp_during_filtration,
														
 
															-        "max_residual_increase_per_run": max_residual_increase,
														
 
															-        "phi_bw_effective": phi,
														
 
															-        "TMP_after_ceb": tmp_after_ceb,
														
 
															-        "headroom_ratio": headroom_ratio,
														
 
															-        "daily_prod_time_h": daily_prod_time_h,
														
 
															-        "ton_water_energy_kWh_per_m3": ton_water_energy,
														
 
															-        "k_bw_per_ceb": k_bw_per_ceb
														
 
															-    }
														
 
															-
														
 
															-    return True, info
														
 
															-
														
 
															-def _score(p: UFParams, rec: dict) -> float:
														
 
															-    """综合评分：越大越好。不同TMP0会改变max_TMP→改变惩罚→得到不同解。"""
														
 
															-    # 无量纲化净供水率
														
 
															-    rate_norm = rec["net_delivery_rate_m3ph"] / max(p.q_UF, 1e-12)
														
 
															-    headroom_penalty = (rec["max_TMP_during_filtration"] / max(p.TMP_max, 1e-12)) ** p.r_headroom
														
 
															-    return (p.w_rec * rec["recovery"]
														
 
															-            + p.w_rate * rate_norm
														
 
															-            - p.w_headroom * headroom_penalty)
														
 
															-
														
 
															-def optimize_2d(p: UFParams,
														
 
															-                L_min_s=None, L_max_s=None, L_step_s=None,
														
 
															-                t_bw_min_s=None, t_bw_max_s=None, t_bw_step_s=None):
														
 
															-    # 网格生成
														
 
															-    L_lo = p.L_min_s if L_min_s is None else float(L_min_s)
														
 
															-    L_hi = p.L_max_s if L_max_s is None else float(L_max_s)
														
 
															-    L_st = p.L_step_s if L_step_s is None else float(L_step_s)
														
 
															-
														
 
															-    t_lo = p.t_bw_min_s if t_bw_min_s is None else float(t_bw_min_s)
														
 
															-    t_hi = p.t_bw_max_s if t_bw_max_s is None else float(t_bw_max_s)
														
 
															-    t_st = p.t_bw_step_s if t_bw_step_s is None else float(t_bw_step_s)
														
 
															-
														
 
															-    L_vals = np.arange(L_lo, L_hi + 1e-9, L_st)
														
 
															-    t_vals = np.arange(t_lo, t_hi + 1e-9, t_st)
														
 
															-
														
 
															-    best = None
														
 
															-    best_score = -np.inf
														
 
															-
														
 
															-    for L_s in L_vals:
														
 
															-        for t_bw_s in t_vals:
														
 
															-            feasible, info = simulate_one_supercycle(p, L_s, t_bw_s)
														
 
															-            if not feasible:
														
 
															-                continue
														
 
															-
														
 
															-            rec = {"L_s": float(L_s), "t_bw_s": float(t_bw_s)}
														
 
															-            rec.update(info)
														
 
															-
														
 
															-            score = _score(p, rec)
														
 
															-
														
 
															-            if score > best_score + 1e-14:
														
 
															-                best_score = score
														
 
															-                best = rec.copy()
														
 
															-                best["score"] = float(score)
														
 
															-            # 若分数相同，偏好回收率更高，再偏好净供水率更高
														
 
															-            elif abs(score - best_score) <= 1e-14:
														
 
															-                if (rec["recovery"] > best["recovery"] + 1e-12) or (
														
 
															-                    abs(rec["recovery"] - best["recovery"]) <= 1e-12 and
														
 
															-                    rec["net_delivery_rate_m3ph"] > best["net_delivery_rate_m3ph"] + 1e-12
														
 
															-                ):
														
 
															-                    best = rec.copy()
														
 
															-                    best["score"] = float(score)
														
 
															-
														
 
															-    if best is None:
														
 
															-        return {"status": "no-feasible-solution"}
														
 
															-    best["status"] = "feasible"
														
 
															-    return best
														
 
															-
														
 
															-def run_uf_decision(TMP0: float = None) -> dict:
														
 
															-    if TMP0 is None:
														
 
															-        rng = np.random.default_rng()
														
 
															-        TMP0 = rng.uniform(0.03, 0.04)  # 初始TMP随机
														
 
															-
														
 
															-    params = UFParams(
														
 
															-        q_UF=360.0,
														
 
															-        TMP_max=0.05,
														
 
															-        alpha=1.2e-6,
														
 
															-        belta=1.0,
														
 
															-        q_bw_m3ph=1000.0,
														
 
															-        T_ceb_interval_h=48,
														
 
															-        v_ceb_m3=30.0,
														
 
															-        t_ceb_s=40*60.0,
														
 
															-        phi_ceb=1.0,
														
 
															-        dTMP=0.001,
														
 
															-
														
 
															-        L_min_s=3600.0, L_max_s=4200.0, L_step_s=30.0,
														
 
															-        t_bw_min_s=90.0, t_bw_max_s=100.0, t_bw_step_s=2.0,
														
 
															-
														
 
															-        phi_bw_min=0.70, phi_bw_max=1.00,
														
 
															-        L_ref_s=500.0, tau_bw_s=40.0, gamma_t=1.0,
														
 
															-
														
 
															-        TMP0=TMP0,
														
 
															-
														
 
															-        w_rec=0.7, w_rate=0.3, w_headroom=0.3, r_headroom=2.0, headroom_hardcap=0.9
														
 
															-    )
														
 
															-
														
 
															-    result = optimize_2d(params)
														
 
															-    if result.get("status") == "feasible":
														
 
															-        return {
														
 
															-            "L_s": result["L_s"],
														
 
															-            "t_bw_s": result["t_bw_s"],
														
 
															-            "recovery": result["recovery"],
														
 
															-            "k_bw_per_ceb": result["k_bw_per_ceb"],
														
 
															-            "daily_prod_time_h": result["daily_prod_time_h"],
														
 
															-            "ton_water_energy_kWh_per_m3": result["ton_water_energy_kWh_per_m3"]
														
 
															-        }
														
 
															-
														
 
															-    # 若没有可行解，返回最小过滤时间和默认值
														
 
															-    return {
														
 
															-        "L_s": params.L_min_s,
														
 
															-        "t_bw_s": params.t_bw_min_s,
														
 
															-        "recovery": 0.0,
														
 
															-        "k_bw_per_ceb": 1,
														
 
															-        "daily_prod_time_h": 0.0,
														
 
															-        "ton_water_energy_kWh_per_m3": 0.0
														
 
															-    }
														
 
															-
														
 
															-
														
 
															-def generate_plc_instructions(current_L_s, current_t_bw_s, model_prev_L_s, model_prev_t_bw_s, model_L_s, model_t_bw_s):
														
 
															-    """
														
 
															-    根据工厂当前值、模型上一轮决策值和模型当前轮决策值，生成PLC指令。
														
 
															-
														
 
															-    新增功能：
														
 
															-    1. 处理None值情况：如果模型上一轮值为None，则使用工厂当前值；
														
 
															-       如果工厂当前值也为None，则返回None并提示错误。
														
 
															-    """
														
 
															-    # 参数配置保持不变
														
 
															-    params = UFParams(
														
 
															-        L_min_s=3600.0, L_max_s=6000.0, L_step_s=60.0,
														
 
															-        t_bw_min_s=40.0, t_bw_max_s=60.0, t_bw_step_s=5.0,
														
 
															-    )
														
 
															-
														
 
															-    # 参数解包
														
 
															-    L_step_s = params.L_step_s
														
 
															-    t_bw_step_s = params.t_bw_step_s
														
 
															-    L_min_s = params.L_min_s
														
 
															-    L_max_s = params.L_max_s
														
 
															-    t_bw_min_s = params.t_bw_min_s
														
 
															-    t_bw_max_s = params.t_bw_max_s
														
 
															-    adjustment_threshold = 1.0
														
 
															-
														
 
															-    # 处理None值情况
														
 
															-    if model_prev_L_s is None:
														
 
															-        if current_L_s is None:
														
 
															-            print("错误: 过滤时长的工厂当前值和模型上一轮值均为None")
														
 
															-            return None, None
														
 
															-        else:
														
 
															-            # 使用工厂当前值作为基准
														
 
															-            effective_current_L = current_L_s
														
 
															-            source_L = "工厂当前值(模型上一轮值为None)"
														
 
															-    else:
														
 
															-        # 模型上一轮值不为None，继续检查工厂当前值
														
 
															-        if current_L_s is None:
														
 
															-            effective_current_L = model_prev_L_s
														
 
															-            source_L = "模型上一轮值(工厂当前值为None)"
														
 
															-        else:
														
 
															-            # 两个值都不为None，比较哪个更接近模型当前建议值
														
 
															-            current_to_model_diff = abs(current_L_s - model_L_s)
														
 
															-            prev_to_model_diff = abs(model_prev_L_s - model_L_s)
														
 
															-
														
 
															-            if current_to_model_diff <= prev_to_model_diff:
														
 
															-                effective_current_L = current_L_s
														
 
															-                source_L = "工厂当前值"
														
 
															-            else:
														
 
															-                effective_current_L = model_prev_L_s
														
 
															-                source_L = "模型上一轮值"
														
 
															-
														
 
															-    # 对反洗时长进行同样的处理
														
 
															-    if model_prev_t_bw_s is None:
														
 
															-        if current_t_bw_s is None:
														
 
															-            print("错误: 反洗时长的工厂当前值和模型上一轮值均为None")
														
 
															-            return None, None
														
 
															-        else:
														
 
															-            effective_current_t_bw = current_t_bw_s
														
 
															-            source_t_bw = "工厂当前值(模型上一轮值为None)"
														
 
															-    else:
														
 
															-        if current_t_bw_s is None:
														
 
															-            effective_current_t_bw = model_prev_t_bw_s
														
 
															-            source_t_bw = "模型上一轮值(工厂当前值为None)"
														
 
															-        else:
														
 
															-            current_to_model_t_bw_diff = abs(current_t_bw_s - model_t_bw_s)
														
 
															-            prev_to_model_t_bw_diff = abs(model_prev_t_bw_s - model_t_bw_s)
														
 
															-
														
 
															-            if current_to_model_t_bw_diff <= prev_to_model_t_bw_diff:
														
 
															-                effective_current_t_bw = current_t_bw_s
														
 
															-                source_t_bw = "工厂当前值"
														
 
															-            else:
														
 
															-                effective_current_t_bw = model_prev_t_bw_s
														
 
															-                source_t_bw = "模型上一轮值"
														
 
															-
														
 
															-    # 检测所有输入值是否在规定范围内（只对非None值进行检查）
														
 
															-    # 工厂当前值检查（警告）
														
 
															-    if current_L_s is not None and not (L_min_s <= current_L_s <= L_max_s):
														
 
															-        print(f"警告: 当前过滤时长 {current_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
														
 
															-    if current_t_bw_s is not None and not (t_bw_min_s <= current_t_bw_s <= t_bw_max_s):
														
 
															-        print(f"警告: 当前反洗时长 {current_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
														
 
															-
														
 
															-    # 模型上一轮决策值检查（警告）
														
 
															-    if model_prev_L_s is not None and not (L_min_s <= model_prev_L_s <= L_max_s):
														
 
															-        print(f"警告: 模型上一轮过滤时长 {model_prev_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
														
 
															-    if model_prev_t_bw_s is not None and not (t_bw_min_s <= model_prev_t_bw_s <= t_bw_max_s):
														
 
															-        print(f"警告: 模型上一轮反洗时长 {model_prev_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
														
 
															-
														
 
															-    # 模型当前轮决策值检查（错误）
														
 
															-    if model_L_s is None:
														
 
															-        raise ValueError("错误: 决策模型建议的过滤时长不能为None")
														
 
															-    elif not (L_min_s <= model_L_s <= L_max_s):
														
 
															-        raise ValueError(f"错误: 决策模型建议的过滤时长 {model_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
														
 
															-
														
 
															-    if model_t_bw_s is None:
														
 
															-        raise ValueError("错误: 决策模型建议的反洗时长不能为None")
														
 
															-    elif not (t_bw_min_s <= model_t_bw_s <= t_bw_max_s):
														
 
															-        raise ValueError(f"错误: 决策模型建议的反洗时长 {model_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
														
 
															-
														
 
															-    print(f"过滤时长基准: {source_L}, 值: {effective_current_L}")
														
 
															-    print(f"反洗时长基准: {source_t_bw}, 值: {effective_current_t_bw}")
														
 
															-
														
 
															-    # 使用选定的基准值进行计算调整
														
 
															-    L_diff = model_L_s - effective_current_L
														
 
															-    L_adjustment = 0
														
 
															-    if abs(L_diff) > adjustment_threshold * L_step_s:
														
 
															-        if L_diff > 0:
														
 
															-            L_adjustment = L_step_s
														
 
															-        else:
														
 
															-            L_adjustment = -L_step_s
														
 
															-    next_L_s = effective_current_L + L_adjustment
														
 
															-
														
 
															-    t_bw_diff = model_t_bw_s - effective_current_t_bw
														
 
															-    t_bw_adjustment = 0
														
 
															-    if abs(t_bw_diff) > adjustment_threshold * t_bw_step_s:
														
 
															-        if t_bw_diff > 0:
														
 
															-            t_bw_adjustment = t_bw_step_s
														
 
															-        else:
														
 
															-            t_bw_adjustment = -t_bw_step_s
														
 
															-    next_t_bw_s = effective_current_t_bw + t_bw_adjustment
														
 
															-
														
 
															-    return next_L_s, next_t_bw_s
														
 
															-
														
 
															-
														
 
															-current_L_s = 3920
														
 
															-current_t_bw_s = 98
														
 
															-model_prev_L_s = None
														
 
															-model_prev_t_bw_s = None
														
 
															-model_L_s = 4160
														
 
															-model_t_bw_s = 96
														
 
															-next_L_s, next_t_bw_s = generate_plc_instructions(current_L_s, current_t_bw_s, model_prev_L_s, model_prev_t_bw_s, model_L_s, model_t_bw_s)
														
 
															-print(f"next_L_s={next_L_s}, next_t_bw_s={next_t_bw_s}")
														
--- a/models/uf-rl/超滤训练源码/UF_models.py
+++ b/models/uf-rl/超滤训练源码/UF_models.py
@@ -1,33 +0,0 @@
 
															-import torch
														
 
															-import numpy as np
														
 
															-
														
 
															-# TMP 上升量模型
														
 
															-class TMPIncreaseModel(torch.nn.Module):
														
 
															-    def __init__(self):
														
 
															-        super().__init__()
														
 
															-    def forward(self, p, L_h):
														
 
															-        return float(p.alpha * (p.q_UF ** p.belta) * L_h)
														
 
															-
														
 
															-# 反洗 TMP 去除模型
														
 
															-class TMPDecreaseModel(torch.nn.Module):
														
 
															-    def __init__(self):
														
 
															-        super().__init__()
														
 
															-    def forward(self, p, L_s, t_bw_s):
														
 
															-        L = max(float(L_s), 1.0)
														
 
															-        t = max(float(t_bw_s), 1e-6)
														
 
															-        upper_L = p.phi_bw_min + (p.phi_bw_max - p.phi_bw_min) * np.exp(- L / p.L_ref_s)
														
 
															-        time_gain = 1.0 - np.exp(- (t / p.tau_bw_s) ** p.gamma_t)
														
 
															-        phi = upper_L * time_gain
														
 
															-        return float(np.clip(phi, 0.0, 0.999))
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    model_fp = TMPIncreaseModel()
														
 
															-    model_bw = TMPDecreaseModel()
														
 
															-
														
 
															-
														
 
															-    torch.save(model_fp.state_dict(), "uf_fp.pth")
														
 
															-    torch.save(model_bw.state_dict(), "uf_bw.pth")
														
 
															-
														
 
															-
														
 
															-    print("模型已安全保存为 uf_fp.pth、uf_bw.pth")
														
--- a/models/uf-rl/超滤训练源码/UF_resistance_models.py
+++ b/models/uf-rl/超滤训练源码/UF_resistance_models.py
--- a/models/uf-rl/超滤训练源码/check_initial_state.py
+++ b/models/uf-rl/超滤训练源码/check_initial_state.py
@@ -0,0 +1,138 @@
 
															+# check_initial_state.py
														
 
															+"""
														
 
															+检查初始状态是否为“必死状态”（conservatively dead）：
														
 
															+1) 实例化 base_params（优先使用 rl_dqn_env 中提供的 base_params 或 UFParams）
														
 
															+2) 实例化环境类 UFSuperCycleEnv(base_params)
														
 
															+3) 调用 env.generate_initial_state() 生成 env.current_params（不调用 reset()）
														
 
															+4) 用最保守策略 (L_s=3600s, t_bw_s=60s) 连续模拟 max_steps 次，
														
 
															+   若任意一次 is_dead_cycle(info) 返回 False 则判定为必死（返回 True），否则返回 False。
														
 
															+"""
														
 
															+
														
 
															+from typing import Any
														
 
															+import copy
														
 
															+import traceback
														
 
															+
														
 
															+# 从 rl_dqn_env 导入必需项
														
 
															+try:
														
 
															+    from DQN_env import (
														
 
															+        simulate_one_supercycle,
														
 
															+        is_dead_cycle,
														
 
															+        UFSuperCycleEnv,
														
 
															+        UFParams,       # 如果模块里有 UFParams 类就导入
														
 
															+        base_params     # 如果模块直接提供 base_params 实例也尝试导入
														
 
															+    )
														
 
															+except Exception:
														
 
															+    # 有可能某些名字不存在 —— 我们会稍后用回退方案处理
														
 
															+    # 先导入模块并再尝试访问属性，确保错误信息更友好
														
 
															+    import importlib
														
 
															+    rl = importlib.import_module("rl_dqn_env")
														
 
															+    simulate_one_supercycle = getattr(rl, "simulate_one_supercycle", None)
														
 
															+    is_dead_cycle = getattr(rl, "is_dead_cycle", None)
														
 
															+    UFSuperCycleEnv = getattr(rl, "UFSuperCycleEnv", None)
														
 
															+    UFParams = getattr(rl, "UFParams", None)
														
 
															+    base_params = getattr(rl, "base_params", None)
														
 
															+
														
 
															+# 检查导入完整性
														
 
															+_missing = []
														
 
															+if simulate_one_supercycle is None:
														
 
															+    _missing.append("simulate_one_supercycle")
														
 
															+if is_dead_cycle is None:
														
 
															+    _missing.append("is_dead_cycle")
														
 
															+if UFSuperCycleEnv is None:
														
 
															+    _missing.append("UFSuperCycleEnv")
														
 
															+if _missing:
														
 
															+    raise ImportError(f"无法从 rl_dqn_env 导入以下必要项: {', '.join(_missing)}")
														
 
															+
														
 
															+def is_dead_initial_state_env(env: UFSuperCycleEnv, max_steps: int = 15,
														
 
															+                              L_s: int = 4200, t_bw_s: int = 50,
														
 
															+                              verbose: bool = True) -> bool:
														
 
															+    """
														
 
															+    使用 env.current_params 作为初始状态判断是否为必死状态（保守策略）。
														
 
															+
														
 
															+    参数:
														
 
															+        env: 已实例化的 UFSuperCycleEnv（必须包含 generate_initial_state() 与 current_params）
														
 
															+        max_steps: 模拟步数（默认 15）
														
 
															+        L_s: 过滤时长（s），保守值 3600
														
 
															+        t_bw_s: 物理反洗时长（s），保守值 60
														
 
															+        verbose: 是否打印每步结果
														
 
															+
														
 
															+    返回:
														
 
															+        True 表示必死（conservatively dead）
														
 
															+        False 表示可行
														
 
															+    """
														
 
															+    # 1) 确保 env 有 current_params，并且 generate_initial_state 可用
														
 
															+    if not hasattr(env, "generate_initial_state"):
														
 
															+        raise AttributeError("env 缺少 generate_initial_state() 方法。")
														
 
															+    # 生成初始状态（不会调用 reset）
														
 
															+    env.generate_initial_state()
														
 
															+
														
 
															+    if not hasattr(env, "current_params"):
														
 
															+        raise AttributeError("env.generate_initial_state() 未设置 env.current_params。")
														
 
															+
														
 
															+    curr_p = copy.deepcopy(env.current_params)
														
 
															+
														
 
															+    for step in range(1, max_steps + 1):
														
 
															+        try:
														
 
															+            info, next_params = simulate_one_supercycle(curr_p, L_s, t_bw_s)
														
 
															+        except Exception as e:
														
 
															+            # 如果 simulate 出错，把异常视为“失败”（保守处理）
														
 
															+            if verbose:
														
 
															+                print(f"[Step {step}] simulate_one_supercycle 抛出异常，视为失败。异常信息：{e}")
														
 
															+                traceback.print_exc()
														
 
															+            return True
														
 
															+
														
 
															+        success = is_dead_cycle(info)  # True 表示成功循环
														
 
															+
														
 
															+        if verbose:
														
 
															+            print(f"[Step {step}] 循环结果：{'成功' if success else '失败'}")
														
 
															+            # 如果 info 中有关键诊断字段，打印简要信息
														
 
															+            try:
														
 
															+                print(f"     TMP0: {info.get('TMP0')},max_TMP: {info.get('max_TMP_during_filtration')}, recovery: {info.get('recovery')}, "
														
 
															+                      f"R0: {info.get('R0')}, R_after_ceb: {info.get('R_after_ceb')}")
														
 
															+            except Exception:
														
 
															+                pass
														
 
															+
														
 
															+        if not success:
														
 
															+            if verbose:
														
 
															+                print(f"在第 {step} 步检测到失败，判定为必死初始状态（conservatively dead）。")
														
 
															+            return True
														
 
															+
														
 
															+        # 否则继续，用 next_params 作为下一步起始参数
														
 
															+        curr_p = next_params
														
 
															+
														
 
															+    if verbose:
														
 
															+        print(f"{max_steps} 步均成功，初始状态判定为可行（non-dead）。")
														
 
															+    return False
														
 
															+
														
 
															+
														
 
															+if __name__ == "__main__":
														
 
															+    print("=== check_initial_state.py: 使用 env.generate_initial_state() 检查初始状态是否为必死 ===")
														
 
															+
														
 
															+    try:
														
 
															+        # 1) 构造 base_params
														
 
															+        if base_params is not None:
														
 
															+            bp = base_params
														
 
															+            print("使用 rl_dqn_env 中提供的 base_params。")
														
 
															+        elif UFParams is not None:
														
 
															+            bp = UFParams()  # 使用默认构造
														
 
															+            print("使用 UFParams() 构造 base_params 的实例。")
														
 
															+        else:
														
 
															+            raise ImportError("无法构造 base_params：rl_dqn_env 中既无 base_params 也无 UFParams。")
														
 
															+
														
 
															+        # 2) 实例化环境类（将 base_params 传入构造器）
														
 
															+        env = UFSuperCycleEnv(bp)
														
 
															+        print("已实例化 UFSuperCycleEnv 环境。")
														
 
															+
														
 
															+        # 3) 调用 env.generate_initial_state() 并检查 env.current_params 是否为必死
														
 
															+        dead = is_dead_initial_state_env(env, max_steps=getattr(env, "max_episode_steps", 15),
														
 
															+                                        L_s=6000, t_bw_s=40, verbose=True)
														
 
															+
														
 
															+        print("\n=== 判定结果 ===")
														
 
															+        if dead:
														
 
															+            print("当前生成的初始状态为【必死状态】（conservatively dead）。")
														
 
															+        else:
														
 
															+            print("当前生成的初始状态为【可行状态】（non-dead）。")
														
 
															+
														
 
															+    except Exception as e:
														
 
															+        print("脚本执行出现错误：", e)
														
 
															+        traceback.print_exc()
														
--- a/models/uf-rl/超滤训练源码/uf_bw.pth
+++ b/models/uf-rl/超滤训练源码/uf_bw.pth
--- a/models/uf-rl/超滤训练源码/uf_fp.pth
+++ b/models/uf-rl/超滤训练源码/uf_fp.pth