5 сар өмнө · d869d1eb00
--- a/models/pressure-predictor/gat-lstm_model/20min/__init__.py
+++ b/models/pressure-predictor/gat-lstm_model/20min/__init__.py
@@ -13,3 +13,4 @@ from .predict import Predictor
 
				 
			
 
				 __all__ = ['Predictor']
			
 
				 
			
 
				+
			
--- a/models/pressure-predictor/gat-lstm_model/20min/predict.py
+++ b/models/pressure-predictor/gat-lstm_model/20min/predict.py
@@ -40,7 +40,7 @@ except ImportError:
 
				     import time
			
 
				     
			
 
				     def setup_logger(name, level='INFO', log_file=None, format_type='colored', max_bytes=10485760, backup_count=5):
			
 
				-        """简化版logger设置"""
			
 
				+        """logger设置"""
			
 
				         logger = logging.getLogger(name)
			
 
				         logger.setLevel(getattr(logging, level))
			
 
				         
			
@@ -80,7 +80,7 @@ except ImportError:
 
				         return wrapper
			
 
				     
			
 
				     class Config:
			
 
				-        """简化版配置类"""
			
 
				+        """配置类"""
			
 
				         def __init__(self, config_file):
			
 
				             with open(config_file, 'r', encoding='utf-8') as f:
			
 
				                 self.config = yaml.safe_load(f)
			
--- a/models/pressure-predictor/gat-lstm_model/90day/__init__.py
+++ b/models/pressure-predictor/gat-lstm_model/90day/__init__.py
@@ -13,3 +13,4 @@ from .predict import Predictor
 
				 
			
 
				 __all__ = ['Predictor']
			
 
				 
			
 
				+
			
--- a/models/pressure-predictor/gat-lstm_model/api_main.py
+++ b/models/pressure-predictor/gat-lstm_model/api_main.py
@@ -27,19 +27,20 @@ os.makedirs(log_dir, exist_ok=True)
 
				 data_save_dir = os.path.join(base_dir, 'received_data')
			
 
				 os.makedirs(data_save_dir, exist_ok=True)
			
 
				 
			
 
				-logging.basicConfig(
			
 
				-    level=logging.INFO,
			
 
				-    format='%(asctime)s - %(levelname)s - %(message)s',
			
 
				-    handlers=[
			
 
				-        RotatingFileHandler(
			
 
				-            os.path.join(log_dir, "api.log"),
			
 
				-            maxBytes=2 * 1024 * 1024,
			
 
				-            backupCount=5,
			
 
				-            encoding='utf-8'
			
 
				-        ),
			
 
				-        logging.StreamHandler()
			
 
				-    ]
			
 
				-)
			
 
				+if not logging.getLogger().handlers:  # 只在无handler时配置
			
 
				+    logging.basicConfig(
			
 
				+        level=logging.INFO,
			
 
				+        format='%(asctime)s - %(levelname)s - %(message)s',
			
 
				+        handlers=[
			
 
				+            RotatingFileHandler(
			
 
				+                os.path.join(log_dir, "api.log"),
			
 
				+                maxBytes=2 * 1024 * 1024,
			
 
				+                backupCount=5,
			
 
				+                encoding='utf-8'
			
 
				+            ),
			
 
				+            logging.StreamHandler()
			
 
				+        ]
			
 
				+    )
			
 
				 logger = logging.getLogger(__name__)
			
 
				 
			
 
				 # --- 添加当前目录到Python路径 ---
			
--- a/models/pressure-predictor/gat-lstm_model/requirements.txt
+++ b/models/pressure-predictor/gat-lstm_model/requirements.txt
@@ -31,3 +31,4 @@ pyyaml>=6.0
 
				 
			
 
				 
			
 
				 
			
 
				+
			
--- a/models/pressure-predictor/gat-lstm_model/shared/__init__.py
+++ b/models/pressure-predictor/gat-lstm_model/shared/__init__.py
@@ -10,3 +10,4 @@
 
				 
			
 
				 __version__ = '1.0.0'
			
 
				 
			
 
				+
			
--- a/models/pressure-predictor/gat-lstm_model/shared/args.py
+++ b/models/pressure-predictor/gat-lstm_model/shared/args.py
@@ -58,3 +58,4 @@ def lstm_args_parser():
 
				     
			
 
				     return args
			
 
				 
			
 
				+
			
--- a/models/pressure-predictor/gat-lstm_model/shared/data_preprocessor.py
+++ b/models/pressure-predictor/gat-lstm_model/shared/data_preprocessor.py
@@ -307,3 +307,4 @@ class DataPreprocessor:
 
				     
			
 
				         return data_loader
			
 
				 
			
 
				+
			
--- a/models/pressure-predictor/gat-lstm_model/shared/data_trainer.py
+++ b/models/pressure-predictor/gat-lstm_model/shared/data_trainer.py
@@ -265,3 +265,4 @@ class Trainer:
 
				         
			
 
				         return r2_scores, rmse_scores, mape_scores
			
 
				 
			
 
				+
			
--- a/models/pressure-predictor/gat-lstm_model/shared/gat_lstm.py
+++ b/models/pressure-predictor/gat-lstm_model/shared/gat_lstm.py
@@ -101,3 +101,4 @@ class GAT_LSTM(nn.Module):
 
				             outputs.append(model(x))  # 每个输出为[batch, output_size]
			
 
				         return torch.cat(outputs, dim=1)  # 拼接后[batch, output_size * labels_num]
			
 
				 
			
 
				+
			
--- a/models/uf-rl/DQN_decide.py
+++ b/models/uf-rl/DQN_decide.py
@@ -1,246 +0,0 @@
 
				-import numpy as np
			
 
				-from stable_baselines3 import DQN
			
 
				-from UF_super_RL.DQN_env import UFSuperCycleEnv
			
 
				-from UF_super_RL.DQN_env import UFParams
			
 
				-
			
 
				-# 模型路径
			
 
				-MODEL_PATH = "dqn_model.zip"
			
 
				-
			
 
				-# 加载模型（只加载一次，提高效率）
			
 
				-model = DQN.load(MODEL_PATH)
			
 
				-
			
 
				-def run_uf_DQN_decide(uf_params, TMP0_value: float):
			
 
				-    """
			
 
				-    单步决策函数：输入原始 TMP0，预测并执行动作
			
 
				-
			
 
				-    参数:
			
 
				-        TMP0_value (float): 当前 TMP0 值（单位与环境一致）
			
 
				-
			
 
				-    返回:
			
 
				-        dict: 包含模型选择的动作、动作参数、新状态、奖励等
			
 
				-    """
			
 
				-    # 1. 实例化环境
			
 
				-    base_params = uf_params
			
 
				-    env = UFSuperCycleEnv(base_params)
			
 
				-
			
 
				-    # 2. 将输入的 TMP0 写入环境
			
 
				-    env.current_params.TMP0 = TMP0_value
			
 
				-
			
 
				-    # 3. 获取归一化状态
			
 
				-    obs = env._get_obs().reshape(1, -1)
			
 
				-
			
 
				-    # 4. 模型预测动作
			
 
				-    action, _ = model.predict(obs, deterministic=True)
			
 
				-
			
 
				-    # 5. 解析动作对应的 L_s 和 t_bw_s
			
 
				-    L_s, t_bw_s = env._get_action_values(action[0])
			
 
				-
			
 
				-    # 6. 在环境中执行该动作
			
 
				-    next_obs, reward, terminated, truncated, info = env.step(action[0])
			
 
				-
			
 
				-    # 7. 整理结果
			
 
				-    result = {
			
 
				-        "action": int(action[0]),
			
 
				-        "L_s": float(L_s),
			
 
				-        "t_bw_s": float(t_bw_s),
			
 
				-        "next_obs": next_obs,
			
 
				-        "reward": reward,
			
 
				-        "terminated": terminated,
			
 
				-        "truncated": truncated,
			
 
				-        "info": info
			
 
				-    }
			
 
				-
			
 
				-    # 8. 关闭环境
			
 
				-    env.close()
			
 
				-
			
 
				-    return result
			
 
				-
			
 
				-def generate_plc_instructions(current_L_s, current_t_bw_s, model_prev_L_s, model_prev_t_bw_s, model_L_s, model_t_bw_s):
			
 
				-    """
			
 
				-    根据工厂当前值、模型上一轮决策值和模型当前轮决策值，生成PLC指令。
			
 
				-
			
 
				-    新增功能：
			
 
				-    1. 处理None值情况：如果模型上一轮值为None，则使用工厂当前值；
			
 
				-       如果工厂当前值也为None，则返回None并提示错误。
			
 
				-    """
			
 
				-    # 参数配置保持不变
			
 
				-    params = UFParams(
			
 
				-        L_min_s=3600.0, L_max_s=6000.0, L_step_s=60.0,
			
 
				-        t_bw_min_s=40.0, t_bw_max_s=60.0, t_bw_step_s=5.0,
			
 
				-    )
			
 
				-
			
 
				-    # 参数解包
			
 
				-    L_step_s = params.L_step_s
			
 
				-    t_bw_step_s = params.t_bw_step_s
			
 
				-    L_min_s = params.L_min_s
			
 
				-    L_max_s = params.L_max_s
			
 
				-    t_bw_min_s = params.t_bw_min_s
			
 
				-    t_bw_max_s = params.t_bw_max_s
			
 
				-    adjustment_threshold = 1.0
			
 
				-
			
 
				-    # 处理None值情况
			
 
				-    if model_prev_L_s is None:
			
 
				-        if current_L_s is None:
			
 
				-            print("错误: 过滤时长的工厂当前值和模型上一轮值均为None")
			
 
				-            return None, None
			
 
				-        else:
			
 
				-            # 使用工厂当前值作为基准
			
 
				-            effective_current_L = current_L_s
			
 
				-            source_L = "工厂当前值(模型上一轮值为None)"
			
 
				-    else:
			
 
				-        # 模型上一轮值不为None，继续检查工厂当前值
			
 
				-        if current_L_s is None:
			
 
				-            effective_current_L = model_prev_L_s
			
 
				-            source_L = "模型上一轮值(工厂当前值为None)"
			
 
				-        else:
			
 
				-            effective_current_L = model_prev_L_s
			
 
				-            source_L = "模型上一轮值"
			
 
				-
			
 
				-    # 对反洗时长进行同样的处理
			
 
				-    if model_prev_t_bw_s is None:
			
 
				-        if current_t_bw_s is None:
			
 
				-            print("错误: 反洗时长的工厂当前值和模型上一轮值均为None")
			
 
				-            return None, None
			
 
				-        else:
			
 
				-            effective_current_t_bw = current_t_bw_s
			
 
				-            source_t_bw = "工厂当前值(模型上一轮值为None)"
			
 
				-    else:
			
 
				-        if current_t_bw_s is None:
			
 
				-            effective_current_t_bw = model_prev_t_bw_s
			
 
				-            source_t_bw = "模型上一轮值(工厂当前值为None)"
			
 
				-        else:
			
 
				-            effective_current_t_bw = model_prev_t_bw_s
			
 
				-            source_t_bw = "模型上一轮值"
			
 
				-
			
 
				-    # 检测所有输入值是否在规定范围内（只对非None值进行检查）
			
 
				-    # 工厂当前值检查（警告）
			
 
				-    if current_L_s is not None and not (L_min_s <= current_L_s <= L_max_s):
			
 
				-        print(f"警告: 当前过滤时长 {current_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
			
 
				-    if current_t_bw_s is not None and not (t_bw_min_s <= current_t_bw_s <= t_bw_max_s):
			
 
				-        print(f"警告: 当前反洗时长 {current_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
			
 
				-
			
 
				-    # 模型上一轮决策值检查（警告）
			
 
				-    if model_prev_L_s is not None and not (L_min_s <= model_prev_L_s <= L_max_s):
			
 
				-        print(f"警告: 模型上一轮过滤时长 {model_prev_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
			
 
				-    if model_prev_t_bw_s is not None and not (t_bw_min_s <= model_prev_t_bw_s <= t_bw_max_s):
			
 
				-        print(f"警告: 模型上一轮反洗时长 {model_prev_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
			
 
				-
			
 
				-    # 模型当前轮决策值检查（错误）
			
 
				-    if model_L_s is None:
			
 
				-        raise ValueError("错误: 决策模型建议的过滤时长不能为None")
			
 
				-    elif not (L_min_s <= model_L_s <= L_max_s):
			
 
				-        raise ValueError(f"错误: 决策模型建议的过滤时长 {model_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
			
 
				-
			
 
				-    if model_t_bw_s is None:
			
 
				-        raise ValueError("错误: 决策模型建议的反洗时长不能为None")
			
 
				-    elif not (t_bw_min_s <= model_t_bw_s <= t_bw_max_s):
			
 
				-        raise ValueError(f"错误: 决策模型建议的反洗时长 {model_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
			
 
				-
			
 
				-    print(f"过滤时长基准: {source_L}, 值: {effective_current_L}")
			
 
				-    print(f"反洗时长基准: {source_t_bw}, 值: {effective_current_t_bw}")
			
 
				-
			
 
				-    # 使用选定的基准值进行计算调整
			
 
				-    L_diff = model_L_s - effective_current_L
			
 
				-    L_adjustment = 0
			
 
				-    if abs(L_diff) >= adjustment_threshold * L_step_s:
			
 
				-        if L_diff >= 0:
			
 
				-            L_adjustment = L_step_s
			
 
				-        else:
			
 
				-            L_adjustment = -L_step_s
			
 
				-    next_L_s = effective_current_L + L_adjustment
			
 
				-
			
 
				-    t_bw_diff = model_t_bw_s - effective_current_t_bw
			
 
				-    t_bw_adjustment = 0
			
 
				-    if abs(t_bw_diff) >= adjustment_threshold * t_bw_step_s:
			
 
				-        if t_bw_diff >= 0:
			
 
				-            t_bw_adjustment = t_bw_step_s
			
 
				-        else:
			
 
				-            t_bw_adjustment = -t_bw_step_s
			
 
				-    next_t_bw_s = effective_current_t_bw + t_bw_adjustment
			
 
				-
			
 
				-    return next_L_s, next_t_bw_s
			
 
				-
			
 
				-
			
 
				-from UF_super_RL.DQN_env import simulate_one_supercycle
			
 
				-def calc_uf_cycle_metrics(p, TMP0, max_tmp_during_filtration, min_tmp_during_filtration, L_s: float, t_bw_s: float):
			
 
				-    """
			
 
				-    计算 UF 超滤系统的核心性能指标
			
 
				-
			
 
				-    参数:
			
 
				-        p (UFParams): UF 系统参数
			
 
				-        L_s (float): 单次过滤时间（秒）
			
 
				-        t_bw_s (float): 单次反洗时间（秒）
			
 
				-
			
 
				-    返回:
			
 
				-        dict: {
			
 
				-            "k_bw_per_ceb": 小周期次数,
			
 
				-            "ton_water_energy_kWh_per_m3": 吨水电耗,
			
 
				-            "recovery": 回收率,
			
 
				-            "net_delivery_rate_m3ph": 净供水率 (m³/h),
			
 
				-            "daily_prod_time_h": 日均产水时间 (小时/天)
			
 
				-            "max_permeability": 全周期最高渗透率(lmh/bar)
			
 
				-        }
			
 
				-    """
			
 
				-    # 将跨膜压差写入参数
			
 
				-    p.TMP0 = TMP0
			
 
				-
			
 
				-    # 模拟该参数下的超级周期
			
 
				-    feasible, info = simulate_one_supercycle(p, L_s, t_bw_s)
			
 
				-
			
 
				-    # 获得模型模拟周期信息
			
 
				-    k_bw_per_ceb = info["k_bw_per_ceb"]
			
 
				-    ton_water_energy_kWh_per_m3 = info["ton_water_energy_kWh_per_m3"]
			
 
				-    recovery = info["recovery"]
			
 
				-    net_delivery_rate_m3ph = info["net_delivery_rate_m3ph"]
			
 
				-    daily_prod_time_h = info["daily_prod_time_h"]
			
 
				-
			
 
				-    # 获得模型模拟周期内最高跨膜压差/最低跨膜压差
			
 
				-    if max_tmp_during_filtration is None:
			
 
				-        max_tmp_during_filtration = info["max_TMP_during_filtration"]
			
 
				-    if min_tmp_during_filtration is None:
			
 
				-        min_tmp_during_filtration = info["min_TMP_during_filtration"]
			
 
				-
			
 
				-    # 计算最高渗透率
			
 
				-    max_permeability = 100 * p.q_UF / (128*40) / min_tmp_during_filtration
			
 
				-
			
 
				-
			
 
				-    return {
			
 
				-        "k_bw_per_ceb": k_bw_per_ceb,
			
 
				-        "ton_water_energy_kWh_per_m3": ton_water_energy_kWh_per_m3,
			
 
				-        "recovery": recovery,
			
 
				-        "net_delivery_rate_m3ph": net_delivery_rate_m3ph,
			
 
				-        "daily_prod_time_h": daily_prod_time_h,
			
 
				-        "max_permeability": max_permeability
			
 
				-    }
			
 
				-
			
 
				-
			
 
				-# ==============================
			
 
				-# 示例调用
			
 
				-# ==============================
			
 
				-if __name__ == "__main__":
			
 
				-    uf_params = UFParams()
			
 
				-    TMP0 = 0.03 # 原始 TMP0
			
 
				-    model_decide_result = run_uf_DQN_decide(uf_params, TMP0) # 调用模型获得动作
			
 
				-    model_L_s = model_decide_result['L_s'] # 获得模型决策产水时长
			
 
				-    model_t_bw_s = model_decide_result['t_bw_s'] # 获得模型决策反洗时长
			
 
				-
			
 
				-    current_L_s = 3800
			
 
				-    current_t_bw_s = 40
			
 
				-    model_prev_L_s = 4040
			
 
				-    model_prev_t_bw_s = 60
			
 
				-    L_s, t_bw_s = generate_plc_instructions(current_L_s, current_t_bw_s, model_prev_L_s, model_prev_t_bw_s, model_L_s, model_t_bw_s) # 获取模型下发指令
			
 
				-
			
 
				-    L_s = 4100
			
 
				-    t_bw_s = 96
			
 
				-    max_tmp_during_filtration = 0.050176 # 新增工厂数据接口：周期最高/最低跨膜压差，无工厂数据接入时传入None，calc_uf_cycle_metrics()自动获取模拟周期中的跨膜压差最值
			
 
				-    min_tmp_during_filtration = 0.012496
			
 
				-    execution_result = calc_uf_cycle_metrics(uf_params, TMP0, max_tmp_during_filtration, min_tmp_during_filtration, L_s, t_bw_s)
			
 
				-    print("\n===== 单步决策结果 =====")
			
 
				-    print(f"模型选择的动作: {model_decide_result['action']}")
			
 
				-    print(f"模型选择的L_s: {model_L_s} 秒, 模型选择的t_bw_s: {model_t_bw_s} 秒")
			
 
				-    print(f"指令下发的L_s: {L_s} 秒, 指令下发的t_bw_s: {t_bw_s} 秒")
			
 
				-    print(f"指令对应的反洗次数: {execution_result['k_bw_per_ceb']}")
			
 
				-    print(f"指令对应的吨水电耗: {execution_result['ton_water_energy_kWh_per_m3']}")
			
 
				-    print(f"指令对应的回收率: {execution_result['recovery']}")
			
 
				-    print(f"指令对应的日均产水时间: {execution_result['daily_prod_time_h']}")
			
 
				-    print(f"指令对应的最高渗透率: {execution_result['max_permeability']}")
			
--- a/models/uf-rl/DQN_env.py
+++ b/models/uf-rl/DQN_env.py
@@ -1,340 +0,0 @@
 
				-import os
			
 
				-import time
			
 
				-import random
			
 
				-import numpy as np
			
 
				-import gymnasium as gym
			
 
				-from gymnasium import spaces
			
 
				-from stable_baselines3 import DQN
			
 
				-from stable_baselines3.common.monitor import Monitor
			
 
				-from stable_baselines3.common.vec_env import DummyVecEnv
			
 
				-from stable_baselines3.common.callbacks import BaseCallback
			
 
				-from typing import Dict, Tuple, Optional
			
 
				-import torch
			
 
				-import torch.nn as nn
			
 
				-from dataclasses import dataclass, asdict
			
 
				-from UF_models import TMPIncreaseModel, TMPDecreaseModel  # 导入模型类
			
 
				-import copy
			
 
				-
			
 
				-
			
 
				-# ==== 定义膜的基础运行参数 ====
			
 
				-@dataclass
			
 
				-class UFParams:
			
 
				-    # —— 膜与运行参数 ——
			
 
				-    q_UF: float = 360.0  # 过滤进水流量（m^3/h）
			
 
				-    TMP0: float = 0.03  # 初始TMP（MPa）
			
 
				-    TMP_max: float = 0.06  # TMP硬上限（MPa）
			
 
				-
			
 
				-    # —— 膜污染动力学 ——
			
 
				-    alpha: float = 1e-6  # TMP增长系数
			
 
				-    belta: float = 1.1  # 幂指数
			
 
				-
			
 
				-    # —— 反洗参数（固定） ——
			
 
				-    q_bw_m3ph: float = 1000.0  # 物理反洗流量（m^3/h）
			
 
				-
			
 
				-    # —— CEB参数（固定） ——
			
 
				-    T_ceb_interval_h: float = 48.0  # 固定每 k 小时做一次CEB
			
 
				-    v_ceb_m3: float = 30.0  # CEB用水体积（m^3）
			
 
				-    t_ceb_s: float = 40 * 60.0  # CEB时长（s）
			
 
				-    phi_ceb: float = 1.0  # CEB去除比例（简化：完全恢复到TMP0）
			
 
				-
			
 
				-    # —— 约束与收敛 ——
			
 
				-    dTMP: float = 0.001  # 单次产水结束时，相对TMP0最大升幅（MPa）
			
 
				-
			
 
				-    # —— 搜索范围（秒） ——
			
 
				-    L_min_s: float = 3800.0  # 过滤时长下限（s）
			
 
				-    L_max_s: float = 6000.0  # 过滤时长上限（s）
			
 
				-    t_bw_min_s: float = 40.0  # 物洗时长下限（s）
			
 
				-    t_bw_max_s: float = 60.0  # 物洗时长上限（s）
			
 
				-
			
 
				-    # —— 物理反洗恢复函数参数 ——
			
 
				-    phi_bw_min: float = 0.7  # 物洗去除比例最小值
			
 
				-    phi_bw_max: float = 1.0  # 物洗去除比例最大值
			
 
				-    L_ref_s: float = 4000.0  # 过滤时长影响时间尺度
			
 
				-    tau_bw_s: float = 20.0  # 物洗时长影响时间尺度
			
 
				-    gamma_t: float = 1.0  # 物洗时长作用指数
			
 
				-
			
 
				-    # —— 网格 ——
			
 
				-    L_step_s: float = 60.0  # 过滤时长步长（s）
			
 
				-    t_bw_step_s: float = 5.0  # 物洗时长步长（s）
			
 
				-
			
 
				-    # 多目标加权及高TMP惩罚
			
 
				-    w_rec: float = 0.8  # 回收率权重
			
 
				-    w_rate: float = 0.2  # 净供水率权重
			
 
				-    w_headroom: float = 0.2  # 贴边惩罚权重
			
 
				-    r_headroom: float = 2.0  # 贴边惩罚幂次
			
 
				-    headroom_hardcap: float = 0.98  # 超过此比例直接视为不可取
			
 
				-
			
 
				-# ==== 加载模拟环境模型 ====
			
 
				-# 初始化模型
			
 
				-model_fp = TMPIncreaseModel()
			
 
				-model_bw = TMPDecreaseModel()
			
 
				-
			
 
				-# 加载参数
			
 
				-model_fp.load_state_dict(torch.load("uf_fp.pth"))
			
 
				-model_bw.load_state_dict(torch.load("uf_bw.pth"))
			
 
				-
			
 
				-# 切换到推理模式
			
 
				-model_fp.eval()
			
 
				-model_bw.eval()
			
 
				-
			
 
				-
			
 
				-def _delta_tmp(p, L_h: float) -> float:
			
 
				-    """
			
 
				-    过滤时段TMP上升量：调用 uf_fp.pth 模型
			
 
				-    """
			
 
				-    return model_fp(p, L_h)
			
 
				-
			
 
				-def phi_bw_of(p, L_s: float, t_bw_s: float) -> float:
			
 
				-    """
			
 
				-    物洗去除比例：调用 uf_bw.pth 模型
			
 
				-    """
			
 
				-    return model_bw(p, L_s, t_bw_s)
			
 
				-
			
 
				-def _tmp_after_ceb(p, L_s: float, t_bw_s: float) -> float:
			
 
				-    """
			
 
				-    计算化学清洗(CEB)后的TMP，当前为恢复初始跨膜压差
			
 
				-    """
			
 
				-    return p.TMP0
			
 
				-
			
 
				-def _v_bw_m3(p, t_bw_s: float) -> float:
			
 
				-    """
			
 
				-    物理反洗水耗
			
 
				-    """
			
 
				-    return float(p.q_bw_m3ph * (float(t_bw_s) / 3600.0))
			
 
				-
			
 
				-def simulate_one_supercycle(p: UFParams, L_s: float, t_bw_s: float):
			
 
				-    """
			
 
				-    返回 (是否可行, 指标字典)
			
 
				-    - 支持动态CEB次数：48h固定间隔
			
 
				-    - 增加日均产水时间和吨水电耗
			
 
				-    - 增加最小TMP记录
			
 
				-    """
			
 
				-    L_h = float(L_s) / 3600.0  # 小周期过滤时间(h)
			
 
				-
			
 
				-    tmp = p.TMP0
			
 
				-    max_tmp_during_filtration = tmp
			
 
				-    min_tmp_during_filtration = tmp  # 新增：初始化最小TMP
			
 
				-    max_residual_increase = 0.0
			
 
				-
			
 
				-    # 小周期总时长(h)
			
 
				-    t_small_cycle_h = (L_s + t_bw_s) / 3600.0
			
 
				-
			
 
				-    # 计算超级周期内CEB次数
			
 
				-    k_bw_per_ceb = int(np.floor(p.T_ceb_interval_h / t_small_cycle_h))
			
 
				-    if k_bw_per_ceb < 1:
			
 
				-        k_bw_per_ceb = 1  # 至少一个小周期
			
 
				-
			
 
				-    # ton水电耗查表
			
 
				-    energy_lookup = {
			
 
				-        3600: 0.1034, 3660: 0.1031, 3720: 0.1029, 3780: 0.1026,
			
 
				-        3840: 0.1023, 3900: 0.1021, 3960: 0.1019, 4020: 0.1017,
			
 
				-        4080: 0.1015, 4140: 0.1012, 4200: 0.1011
			
 
				-    }
			
 
				-
			
 
				-    for _ in range(k_bw_per_ceb):
			
 
				-        tmp_run_start = tmp
			
 
				-
			
 
				-        # 过滤阶段TMP增长
			
 
				-        dtmp = _delta_tmp(p, L_h)
			
 
				-        tmp_peak = tmp_run_start + dtmp
			
 
				-
			
 
				-        # 约束1：峰值不得超过硬上限
			
 
				-        if tmp_peak > p.TMP_max + 1e-12:
			
 
				-            return False, {"reason": "TMP_max violated during filtration", "TMP_peak": tmp_peak}
			
 
				-
			
 
				-        # 更新最大和最小TMP
			
 
				-        if tmp_peak > max_tmp_during_filtration:
			
 
				-            max_tmp_during_filtration = tmp_peak
			
 
				-        if tmp_run_start < min_tmp_during_filtration:  # 新增：记录运行开始时的最小TMP
			
 
				-            min_tmp_during_filtration = tmp_run_start
			
 
				-
			
 
				-        # 物理反洗
			
 
				-        phi = phi_bw_of(p, L_s, t_bw_s)
			
 
				-        tmp_after_bw = tmp_peak - phi * (tmp_peak - tmp_run_start)
			
 
				-
			
 
				-        # 约束2：单次残余增量控制
			
 
				-        residual_inc = tmp_after_bw - tmp_run_start
			
 
				-        if residual_inc > p.dTMP + 1e-12:
			
 
				-            return False, {
			
 
				-                "reason": "residual TMP increase after BW exceeded dTMP",
			
 
				-                "residual_increase": residual_inc,
			
 
				-                "limit_dTMP": p.dTMP
			
 
				-            }
			
 
				-        if residual_inc > max_residual_increase:
			
 
				-            max_residual_increase = residual_inc
			
 
				-
			
 
				-        tmp = tmp_after_bw
			
 
				-
			
 
				-    # CEB
			
 
				-    tmp_after_ceb = p.TMP0
			
 
				-
			
 
				-    # 体积与回收率
			
 
				-    V_feed_super = k_bw_per_ceb * p.q_UF * L_h
			
 
				-    V_loss_super = k_bw_per_ceb * _v_bw_m3(p, t_bw_s) + p.v_ceb_m3
			
 
				-    V_net = max(0.0, V_feed_super - V_loss_super)
			
 
				-    recovery = max(0.0, V_net / max(V_feed_super, 1e-12))
			
 
				-
			
 
				-    # 时间与净供水率
			
 
				-    T_super_h = k_bw_per_ceb * (L_s + t_bw_s) / 3600.0 + p.t_ceb_s / 3600.0
			
 
				-    net_delivery_rate_m3ph = V_net / max(T_super_h, 1e-12)
			
 
				-
			
 
				-    # 贴边比例与硬限
			
 
				-    headroom_ratio = max_tmp_during_filtration / max(p.TMP_max, 1e-12)
			
 
				-    if headroom_ratio > p.headroom_hardcap + 1e-12:
			
 
				-        return False, {"reason": "headroom hardcap exceeded", "headroom_ratio": headroom_ratio}
			
 
				-
			
 
				-    # —— 新增指标 1：日均产水时间（h/d） ——
			
 
				-    daily_prod_time_h = k_bw_per_ceb * L_h / T_super_h * 24.0
			
 
				-
			
 
				-    # —— 新增指标 2：吨水电耗（kWh/m³） ——
			
 
				-    closest_L = min(energy_lookup.keys(), key=lambda x: abs(x - L_s))
			
 
				-    ton_water_energy = energy_lookup[closest_L]
			
 
				-
			
 
				-    info = {
			
 
				-        "recovery": recovery,
			
 
				-        "V_feed_super_m3": V_feed_super,
			
 
				-        "V_loss_super_m3": V_loss_super,
			
 
				-        "V_net_super_m3": V_net,
			
 
				-        "supercycle_time_h": T_super_h,
			
 
				-        "net_delivery_rate_m3ph": net_delivery_rate_m3ph,
			
 
				-        "max_TMP_during_filtration": max_tmp_during_filtration,
			
 
				-        "min_TMP_during_filtration": min_tmp_during_filtration,  # 新增：最小TMP
			
 
				-        "max_residual_increase_per_run": max_residual_increase,
			
 
				-        "phi_bw_effective": phi,
			
 
				-        "TMP_after_ceb": tmp_after_ceb,
			
 
				-        "headroom_ratio": headroom_ratio,
			
 
				-        "daily_prod_time_h": daily_prod_time_h,
			
 
				-        "ton_water_energy_kWh_per_m3": ton_water_energy,
			
 
				-        "k_bw_per_ceb": k_bw_per_ceb
			
 
				-    }
			
 
				-
			
 
				-    return True, info
			
 
				-
			
 
				-def _score(p: UFParams, rec: dict) -> float:
			
 
				-    """综合评分：越大越好。通过非线性放大奖励差异，强化区分好坏动作"""
			
 
				-
			
 
				-    # —— 无量纲化净供水率 ——
			
 
				-    rate_norm = rec["net_delivery_rate_m3ph"] / max(p.q_UF, 1e-12)
			
 
				-
			
 
				-    # —— TMP soft penalty (sigmoid) ——
			
 
				-    tmp_ratio = rec["max_TMP_during_filtration"] / max(p.TMP_max, 1e-12)
			
 
				-    k = 10.0
			
 
				-    headroom_penalty = 1.0 / (1.0 + np.exp(-k * (tmp_ratio - 1.0)))
			
 
				-
			
 
				-    # —— 基础 reward（0.6~0.9左右）——
			
 
				-    base_reward = (
			
 
				-        p.w_rec * rec["recovery"]
			
 
				-        + p.w_rate * rate_norm
			
 
				-        - p.w_headroom * headroom_penalty
			
 
				-    )
			
 
				-
			
 
				-    # —— 非线性放大：平方映射 + 缩放 ——
			
 
				-    # 目的是放大好坏动作差异，同时限制最大值，避免 TD-error 过大
			
 
				-    amplified_reward = (base_reward - 0.5) ** 2 * 5.0
			
 
				-
			
 
				-    # —— 可选：保留符号，区分负奖励
			
 
				-    if base_reward < 0.5:
			
 
				-        amplified_reward = -amplified_reward
			
 
				-
			
 
				-    return amplified_reward
			
 
				-
			
 
				-
			
 
				-class UFSuperCycleEnv(gym.Env):
			
 
				-    """超滤系统环境（超级周期级别决策）"""
			
 
				-
			
 
				-    metadata = {"render_modes": ["human"]}
			
 
				-
			
 
				-    def __init__(self, base_params, max_episode_steps: int = 20):
			
 
				-        super(UFSuperCycleEnv, self).__init__()
			
 
				-
			
 
				-        self.base_params = base_params
			
 
				-        self.current_params = copy.deepcopy(base_params)
			
 
				-        self.max_episode_steps = max_episode_steps
			
 
				-        self.current_step = 0
			
 
				-
			
 
				-        # 计算离散动作空间
			
 
				-        self.L_values = np.arange(
			
 
				-            self.base_params.L_min_s,
			
 
				-            self.base_params.L_max_s + self.base_params.L_step_s,
			
 
				-            self.base_params.L_step_s
			
 
				-        )
			
 
				-        self.t_bw_values = np.arange(
			
 
				-            self.base_params.t_bw_min_s,
			
 
				-            self.base_params.t_bw_max_s + self.base_params.t_bw_step_s,
			
 
				-            self.base_params.t_bw_step_s
			
 
				-        )
			
 
				-
			
 
				-        self.num_L = len(self.L_values)
			
 
				-        self.num_bw = len(self.t_bw_values)
			
 
				-
			
 
				-        # 单一离散动作空间
			
 
				-        self.action_space = spaces.Discrete(self.num_L * self.num_bw)
			
 
				-
			
 
				-        # 状态空间增加 TMP0, 上一次动作(L_s, t_bw_s), 本周期最高 TMP
			
 
				-        # 状态归一化均在 _get_obs 内处理
			
 
				-        self.observation_space = spaces.Box(
			
 
				-            low=np.zeros(4, dtype=np.float32),
			
 
				-            high=np.ones(4, dtype=np.float32),
			
 
				-            dtype=np.float32
			
 
				-        )
			
 
				-
			
 
				-        # 初始化状态
			
 
				-        self.last_action = (self.base_params.L_min_s, self.base_params.t_bw_min_s)
			
 
				-        self.max_TMP_during_filtration = self.current_params.TMP0
			
 
				-        self.reset(seed=None)
			
 
				-
			
 
				-    def _get_obs(self):
			
 
				-        TMP0 = self.current_params.TMP0
			
 
				-        TMP0_norm = (TMP0 - 0.01) / (0.05 - 0.01)
			
 
				-
			
 
				-        L_s, t_bw_s = self.last_action
			
 
				-        L_norm = (L_s - self.base_params.L_min_s) / (self.base_params.L_max_s - self.base_params.L_min_s)
			
 
				-        t_bw_norm = (t_bw_s - self.base_params.t_bw_min_s) / (self.base_params.t_bw_max_s - self.base_params.t_bw_min_s)
			
 
				-
			
 
				-        max_TMP_norm = (self.max_TMP_during_filtration - 0.01) / (0.05 - 0.01)
			
 
				-
			
 
				-        return np.array([TMP0_norm, L_norm, t_bw_norm, max_TMP_norm], dtype=np.float32)
			
 
				-
			
 
				-    def _get_action_values(self, action):
			
 
				-        L_idx = action // self.num_bw
			
 
				-        t_bw_idx = action % self.num_bw
			
 
				-        return self.L_values[L_idx], self.t_bw_values[t_bw_idx]
			
 
				-
			
 
				-    def reset(self, seed=None, options=None):
			
 
				-        super().reset(seed=seed)
			
 
				-        self.current_params.TMP0 = np.random.uniform(0.01, 0.03)
			
 
				-        self.current_step = 0
			
 
				-        self.last_action = (self.base_params.L_min_s, self.base_params.t_bw_min_s)
			
 
				-        self.max_TMP_during_filtration = self.current_params.TMP0
			
 
				-        return self._get_obs(), {}
			
 
				-
			
 
				-    def step(self, action):
			
 
				-        self.current_step += 1
			
 
				-        L_s, t_bw_s = self._get_action_values(action)
			
 
				-        L_s = np.clip(L_s, self.base_params.L_min_s, self.base_params.L_max_s)
			
 
				-        t_bw_s = np.clip(t_bw_s, self.base_params.t_bw_min_s, self.base_params.t_bw_max_s)
			
 
				-
			
 
				-        # 模拟超级周期
			
 
				-        feasible, info = simulate_one_supercycle(self.current_params, L_s, t_bw_s)
			
 
				-
			
 
				-        if feasible:
			
 
				-            reward = _score(self.current_params, info)
			
 
				-            self.current_params.TMP0 = info["TMP_after_ceb"]
			
 
				-            self.max_TMP_during_filtration = info["max_TMP_during_filtration"]
			
 
				-            terminated = False
			
 
				-        else:
			
 
				-            reward = -20
			
 
				-            terminated = True
			
 
				-
			
 
				-        truncated = self.current_step >= self.max_episode_steps
			
 
				-        self.last_action = (L_s, t_bw_s)
			
 
				-        next_obs = self._get_obs()
			
 
				-
			
 
				-        info["feasible"] = feasible
			
 
				-        info["step"] = self.current_step
			
 
				-
			
 
				-        return next_obs, reward, terminated, truncated, info
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
--- a/models/uf-rl/DQN_train.py
+++ b/models/uf-rl/DQN_train.py
@@ -1,244 +0,0 @@
 
				-import os
			
 
				-import time
			
 
				-import random
			
 
				-import numpy as np
			
 
				-import torch
			
 
				-
			
 
				-import gymnasium as gym
			
 
				-from gymnasium import spaces
			
 
				-from stable_baselines3 import DQN
			
 
				-from stable_baselines3.common.monitor import Monitor
			
 
				-from stable_baselines3.common.vec_env import DummyVecEnv
			
 
				-from stable_baselines3.common.callbacks import BaseCallback
			
 
				-
			
 
				-from DQN_env import UFParams, UFSuperCycleEnv
			
 
				-
			
 
				-
			
 
				-# ==== 定义强化学习超参数 ====
			
 
				-class DQNParams:
			
 
				-    """
			
 
				-    DQN 超参数定义类
			
 
				-    用于统一管理模型训练参数
			
 
				-    """
			
 
				-    # 学习率，控制神经网络更新步长
			
 
				-    learning_rate: float = 1e-4
			
 
				-
			
 
				-    # 经验回放缓冲区大小（步数）
			
 
				-    buffer_size: int = 10000
			
 
				-
			
 
				-    # 学习开始前需要收集的步数
			
 
				-    learning_starts: int = 200
			
 
				-
			
 
				-    # 每次从经验池中采样的样本数量
			
 
				-    batch_size: int = 32
			
 
				-
			
 
				-    # 折扣因子，越接近1越重视长期奖励
			
 
				-    gamma: float = 0.95
			
 
				-
			
 
				-    # 每隔多少步训练一次
			
 
				-    train_freq: int = 4
			
 
				-
			
 
				-    # 目标网络更新间隔
			
 
				-    target_update_interval: int = 2000
			
 
				-
			
 
				-    # 初始探索率 ε
			
 
				-    exploration_initial_eps: float = 1.0
			
 
				-
			
 
				-    # 从初始ε衰减到最终ε所占的训练比例
			
 
				-    exploration_fraction: float = 0.3
			
 
				-
			
 
				-    # 最终探索率 ε
			
 
				-    exploration_final_eps: float = 0.02
			
 
				-
			
 
				-    # 日志备注（用于区分不同实验）
			
 
				-    remark: str = "default"
			
 
				-
			
 
				-class UFEpisodeRecorder:
			
 
				-    """记录episode中的决策和结果"""
			
 
				-
			
 
				-    def __init__(self):
			
 
				-        self.episode_data = []
			
 
				-        self.current_episode = []
			
 
				-
			
 
				-    def record_step(self, obs, action, reward, done, info):
			
 
				-        """记录单步信息"""
			
 
				-        step_data = {
			
 
				-            "obs": obs.copy(),
			
 
				-            "action": action.copy(),
			
 
				-            "reward": reward,
			
 
				-            "done": done,
			
 
				-            "info": info.copy() if info else {}
			
 
				-        }
			
 
				-        self.current_episode.append(step_data)
			
 
				-
			
 
				-        if done:
			
 
				-            self.episode_data.append(self.current_episode)
			
 
				-            self.current_episode = []
			
 
				-
			
 
				-    def get_episode_stats(self, episode_idx=-1):
			
 
				-        """获取episode统计信息"""
			
 
				-        if not self.episode_data:
			
 
				-            return {}
			
 
				-
			
 
				-        episode = self.episode_data[episode_idx]
			
 
				-        total_reward = sum(step["reward"] for step in episode)
			
 
				-        avg_recovery = np.mean([step["info"].get("recovery", 0) for step in episode if "recovery" in step["info"]])
			
 
				-        feasible_steps = sum(1 for step in episode if step["info"].get("feasible", False))
			
 
				-
			
 
				-        return {
			
 
				-            "total_reward": total_reward,
			
 
				-            "avg_recovery": avg_recovery,
			
 
				-            "feasible_steps": feasible_steps,
			
 
				-            "total_steps": len(episode)
			
 
				-        }
			
 
				-
			
 
				-
			
 
				-# ==== 定义强化学习训练回调器 ====
			
 
				-class UFTrainingCallback(BaseCallback):
			
 
				-    """
			
 
				-    强化学习训练回调，用于记录每一步的数据到 recorder。
			
 
				-    1. 不依赖环境内部 last_* 属性
			
 
				-    2. 使用环境接口提供的 obs、actions、rewards、dones、infos
			
 
				-    3. 自动处理 episode 结束时的统计
			
 
				-    """
			
 
				-
			
 
				-    def __init__(self, recorder, verbose=0):
			
 
				-        super(UFTrainingCallback, self).__init__(verbose)
			
 
				-        self.recorder = recorder
			
 
				-
			
 
				-    def _on_step(self) -> bool:
			
 
				-        try:
			
 
				-            new_obs = self.locals.get("new_obs")
			
 
				-            actions = self.locals.get("actions")
			
 
				-            rewards = self.locals.get("rewards")
			
 
				-            dones = self.locals.get("dones")
			
 
				-            infos = self.locals.get("infos")
			
 
				-
			
 
				-            if len(new_obs) > 0:
			
 
				-                step_obs = new_obs[0]
			
 
				-                step_action = actions[0] if actions is not None else None
			
 
				-                step_reward = rewards[0] if rewards is not None else 0.0
			
 
				-                step_done = dones[0] if dones is not None else False
			
 
				-                step_info = infos[0] if infos is not None else {}
			
 
				-
			
 
				-                # 打印当前 step 的信息
			
 
				-                if self.verbose:
			
 
				-                    print(f"[Step {self.num_timesteps}] 动作={step_action}, 奖励={step_reward:.3f}, Done={step_done}")
			
 
				-
			
 
				-                # 记录数据
			
 
				-                self.recorder.record_step(
			
 
				-                    obs=step_obs,
			
 
				-                    action=step_action,
			
 
				-                    reward=step_reward,
			
 
				-                    done=step_done,
			
 
				-                    info=step_info,
			
 
				-                )
			
 
				-
			
 
				-        except Exception as e:
			
 
				-            if self.verbose:
			
 
				-                print(f"[Callback Error] {e}")
			
 
				-
			
 
				-        return True
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-class DQNTrainer:
			
 
				-    def __init__(self, env, params, callback=None):
			
 
				-        self.env = env
			
 
				-        self.params = params
			
 
				-        self.callback = callback
			
 
				-        self.log_dir = self._create_log_dir()
			
 
				-        self.model = self._create_model()
			
 
				-
			
 
				-    def _create_log_dir(self):
			
 
				-        # 创建训练日志
			
 
				-        timestamp = time.strftime("%Y%m%d-%H%M%S")
			
 
				-        log_name = (
			
 
				-            f"DQN_lr{self.params.learning_rate}_buf{self.params.buffer_size}_bs{self.params.batch_size}"
			
 
				-            f"_gamma{self.params.gamma}_exp{self.params.exploration_fraction}"
			
 
				-            f"_{self.params.remark}_{timestamp}"
			
 
				-        )
			
 
				-        log_dir = os.path.join("./uf_dqn_tensorboard", log_name)
			
 
				-        os.makedirs(log_dir, exist_ok=True)
			
 
				-        return log_dir
			
 
				-
			
 
				-    def _create_model(self):
			
 
				-        return DQN(
			
 
				-            policy="MlpPolicy",
			
 
				-            env=self.env,
			
 
				-            learning_rate=self.params.learning_rate,
			
 
				-            buffer_size=self.params.buffer_size,
			
 
				-            learning_starts=self.params.learning_starts,
			
 
				-            batch_size=self.params.batch_size,
			
 
				-            gamma=self.params.gamma,
			
 
				-            train_freq=self.params.train_freq,
			
 
				-            target_update_interval=1,
			
 
				-            tau=0.005,
			
 
				-            exploration_initial_eps=self.params.exploration_initial_eps,
			
 
				-            exploration_fraction=self.params.exploration_fraction,
			
 
				-            exploration_final_eps=self.params.exploration_final_eps,
			
 
				-            verbose=1,
			
 
				-            tensorboard_log=self.log_dir
			
 
				-        )
			
 
				-
			
 
				-    def train(self, total_timesteps: int):
			
 
				-        if self.callback:
			
 
				-            self.model.learn(total_timesteps=total_timesteps, callback=self.callback)
			
 
				-        else:
			
 
				-            self.model.learn(total_timesteps=total_timesteps)
			
 
				-        print(f"模型训练完成，日志保存在：{self.log_dir}")
			
 
				-
			
 
				-    def save(self, path=None):
			
 
				-        if path is None:
			
 
				-            path = os.path.join(self.log_dir, "dqn_model.zip")
			
 
				-        self.model.save(path)
			
 
				-        print(f"模型已保存到：{path}")
			
 
				-
			
 
				-    def load(self, path):
			
 
				-        self.model = DQN.load(path, env=self.env)
			
 
				-        print(f"模型已从 {path} 加载")
			
 
				-
			
 
				-
			
 
				-def set_global_seed(seed: int):
			
 
				-    """固定全局随机种子，保证训练可复现"""
			
 
				-    random.seed(seed)
			
 
				-    np.random.seed(seed)
			
 
				-    torch.manual_seed(seed)
			
 
				-    torch.cuda.manual_seed_all(seed)  # 如果使用GPU
			
 
				-    torch.backends.cudnn.deterministic = True
			
 
				-    torch.backends.cudnn.benchmark = False
			
 
				-
			
 
				-
			
 
				-def train_uf_rl_agent(params: UFParams, total_timesteps: int = 10000, seed: int = 2025):
			
 
				-    set_global_seed(seed)
			
 
				-    recorder = UFEpisodeRecorder()
			
 
				-    callback = UFTrainingCallback(recorder, verbose=1)
			
 
				-
			
 
				-    def make_env():
			
 
				-        env = UFSuperCycleEnv(params)
			
 
				-        env = Monitor(env)
			
 
				-        return env
			
 
				-
			
 
				-    env = DummyVecEnv([make_env])
			
 
				-
			
 
				-    dqn_params = DQNParams()
			
 
				-    trainer = DQNTrainer(env, dqn_params, callback=callback)
			
 
				-    trainer.train(total_timesteps)
			
 
				-    trainer.save()
			
 
				-
			
 
				-    stats = callback.recorder.get_episode_stats()
			
 
				-    print(f"训练完成 - 总奖励: {stats.get('total_reward', 0):.2f}, 平均回收率: {stats.get('avg_recovery', 0):.3f}")
			
 
				-
			
 
				-    return trainer.model
			
 
				-
			
 
				-
			
 
				-# 训练
			
 
				-if __name__ == "__main__":
			
 
				-    # 初始化参数
			
 
				-    params = UFParams()
			
 
				-
			
 
				-    # 训练RL代理
			
 
				-    print("开始训练RL代理...")
			
 
				-    train_uf_rl_agent(params, total_timesteps=50000)
			
 
				-
			
--- a/models/uf-rl/README.md
+++ b/models/uf-rl/README.md
@@ -1,500 +0,0 @@
 
				-# UF超滤系统强化学习决策模型训练逻辑说明
			
 
				-
			
 
				-## 模型概述
			
 
				-
			
 
				-这是一个基于**深度强化学习（DQN）**的超滤系统运行参数优化模型。不同于前两个"预测模型"，这个模型的目标是**决策**：在给定当前跨膜压差（TMP）的情况下，自动决定最优的产水时长和反洗时长。
			
 
				-
			
 
				-**核心问题**：如何平衡产水量、回收率、能耗和膜寿命？
			
 
				-
			
 
				-## 问题背景
			
 
				-
			
 
				-### 超滤运行周期
			
 
				-
			
 
				-超滤系统运行遵循"小周期"模式：
			
 
				-```
			
 
				-[产水L秒] → [反洗t_bw秒] → [产水L秒] → [反洗t_bw秒] → ... → [化学清洗CEB]
			
 
				-```
			
 
				-
			
 
				-- **产水阶段**：过滤原水，TMP逐渐升高（膜污染）
			
 
				-- **反洗阶段**：反向冲洗，TMP部分恢复
			
 
				-- **化学清洗（CEB）**：每48小时一次，TMP完全恢复
			
 
				-
			
 
				-### 决策难题
			
 
				-
			
 
				-**调节杠杆**：
			
 
				-- `L_s`：单次产水时长（3600-6000秒）
			
 
				-- `t_bw_s`：单次反洗时长（40-60秒）
			
 
				-
			
 
				-**矛盾目标**：
			
 
				-1. **产水量↑**：希望L_s长、t_bw_s短（多产水、少反洗）
			
 
				-2. **回收率↑**：希望t_bw_s短（减少反洗水耗）
			
 
				-3. **膜保护↓**：希望L_s短、t_bw_s长（频繁反洗、TMP不升太高）
			
 
				-4. **能耗↓**：产水时间越长，单位吨水的泵能耗越低
			
 
				-
			
 
				-**传统方法**：人工经验+固定参数，难以在复杂约束下找到最优解  
			
 
				-**强化学习方法**：让AI自己探索，学习在不同TMP下的最佳决策
			
 
				-
			
 
				-## 核心思路：强化学习框架
			
 
				-
			
 
				-### 1. 强化学习是什么？
			
 
				-
			
 
				-把决策问题想象成玩游戏：
			
 
				-```
			
 
				-游戏状态（TMP）→ AI选择动作（L_s, t_bw_s）→ 执行动作 → 获得奖励（回收率、净供水率）→ 新状态（TMP更新）
			
 
				-```
			
 
				-
			
 
				-AI通过**反复试错**，学习哪些动作能获得高奖励。
			
 
				-
			
 
				-### 2. Markov决策过程（MDP）建模
			
 
				-
			
 
				-#### 状态（State）
			
 
				-```python
			
 
				-state = [
			
 
				-    TMP0_normalized,           # 当前初始TMP（归一化到0-1）
			
 
				-    last_L_s_normalized,       # 上一次产水时长（归一化）
			
 
				-    last_t_bw_s_normalized,    # 上一次反洗时长（归一化）
			
 
				-    max_TMP_normalized         # 本周期最高TMP（归一化）
			
 
				-]
			
 
				-```
			
 
				-**4维状态向量**描述当前系统状态
			
 
				-
			
 
				-#### 动作（Action）
			
 
				-```python
			
 
				-# 离散动作空间：L_s × t_bw_s的网格
			
 
				-L_s范围：3800-6000秒，步长60秒 → 37个选项
			
 
				-t_bw_s范围：40-60秒，步长5秒 → 5个选项
			
 
				-
			
 
				-总动作数 = 37 × 5 = 185个
			
 
				-```
			
 
				-
			
 
				-每个动作对应一个`(L_s, t_bw_s)`组合
			
 
				-
			
 
				-#### 奖励（Reward）
			
 
				-```python
			
 
				-# 多目标加权奖励
			
 
				-reward = 0.8 × recovery           # 回收率（主要目标）
			
 
				-       + 0.2 × rate_normalized    # 净供水率
			
 
				-       - 0.2 × headroom_penalty   # TMP贴边惩罚
			
 
				-```
			
 
				-
			
 
				-**奖励设计原则**：
			
 
				-- 高回收率 → 高奖励
			
 
				-- 高净供水率 → 高奖励
			
 
				-- TMP接近上限 → 负奖励（膜风险）
			
 
				-- 违反约束 → 大负奖励（-20）
			
 
				-
			
 
				-#### 状态转移
			
 
				-```python
			
 
				-# 模拟器：根据物理模型计算下一个状态
			
 
				-def simulate_one_supercycle(TMP0, L_s, t_bw_s):
			
 
				-    # 1. 计算产水阶段TMP上升
			
 
				-    delta_TMP = model_fp(L_s)  # 调用TMP增长模型
			
 
				-    TMP_peak = TMP0 + delta_TMP
			
 
				-    
			
 
				-    # 2. 计算反洗恢复
			
 
				-    phi = model_bw(L_s, t_bw_s)  # 调用反洗恢复模型
			
 
				-    TMP_after_bw = TMP_peak - phi × (TMP_peak - TMP0)
			
 
				-    
			
 
				-    # 3. 多次小周期后CEB
			
 
				-    TMP_new = TMP0  # 化学清洗后完全恢复
			
 
				-    
			
 
				-    # 4. 计算指标
			
 
				-    recovery = (产水 - 反洗水耗 - CEB水耗) / 产水
			
 
				-    net_rate = 净产水 / 总时间
			
 
				-    
			
 
				-    return TMP_new, recovery, net_rate, ...
			
 
				-```
			
 
				-
			
 
				-## DQN算法详解
			
 
				-
			
 
				-### 什么是DQN？
			
 
				-
			
 
				-**Deep Q-Network（深度Q网络）**：
			
 
				-- 用神经网络估计**Q值函数**：`Q(state, action) = 预期累积奖励`
			
 
				-- 最优策略：在每个状态选择Q值最大的动作
			
 
				-
			
 
				-```
			
 
				-状态 → [神经网络] → 每个动作的Q值 → 选择最大Q值的动作
			
 
				-```
			
 
				-
			
 
				-### 神经网络结构
			
 
				-
			
 
				-```python
			
 
				-# Stable-Baselines3的MlpPolicy默认结构
			
 
				-输入层：4维状态
			
 
				-隐藏层1：64神经元 + ReLU
			
 
				-隐藏层2：64神经元 + ReLU
			
 
				-输出层：185个动作的Q值
			
 
				-```
			
 
				-
			
 
				-### 训练流程（`DQN_train.py`）
			
 
				-
			
 
				-#### 1. 经验回放（Experience Replay）
			
 
				-```python
			
 
				-buffer_size = 10000  # 存储10000条经验
			
 
				-
			
 
				-# 交互过程
			
 
				-for step in range(total_timesteps):
			
 
				-    action = model.select_action(state)        # ε-贪心选择动作
			
 
				-    next_state, reward = env.step(action)      # 执行动作
			
 
				-    buffer.store(state, action, reward, next_state)  # 存入缓冲区
			
 
				-    
			
 
				-    # 从缓冲区随机采样训练
			
 
				-    if step > learning_starts:
			
 
				-        batch = buffer.sample(batch_size=32)
			
 
				-        model.train_on_batch(batch)
			
 
				-```
			
 
				-
			
 
				-**为什么需要经验回放？**
			
 
				-- 打破数据相关性（连续状态往往相似）
			
 
				-- 提高样本利用效率（同一条经验可多次使用）
			
 
				-
			
 
				-#### 2. ε-贪心探索
			
 
				-```python
			
 
				-# 随机探索 vs 利用已学知识
			
 
				-if random() < epsilon:
			
 
				-    action = random_action()   # 探索：随机选
			
 
				-else:
			
 
				-    action = argmax(Q(state))  # 利用：选Q值最大的
			
 
				-
			
 
				-# epsilon从1.0衰减到0.02
			
 
				-epsilon = 1.0 → 0.8 → ... → 0.02
			
 
				-```
			
 
				-
			
 
				-**探索-利用权衡**：
			
 
				-- 初期多探索（发现好动作）
			
 
				-- 后期多利用（稳定在最优策略）
			
 
				-
			
 
				-#### 3. 目标网络（Target Network）
			
 
				-```python
			
 
				-# 两个网络：当前网络 + 目标网络
			
 
				-Q_current(state, action)  # 每步更新
			
 
				-Q_target(next_state, a')   # 每2000步同步一次
			
 
				-
			
 
				-# TD误差
			
 
				-loss = MSE(Q_current(s,a), reward + γ × max(Q_target(s', a')))
			
 
				-```
			
 
				-
			
 
				-**为什么需要目标网络？**
			
 
				-- 稳定训练（避免"追逐移动目标"问题）
			
 
				-- 减少Q值估计的震荡
			
 
				-
			
 
				-#### 4. 训练超参数
			
 
				-
			
 
				-```python
			
 
				-class DQNParams:
			
 
				-    learning_rate = 1e-4          # 学习率
			
 
				-    buffer_size = 10000           # 经验池大小
			
 
				-    learning_starts = 200         # 200步后开始学习
			
 
				-    batch_size = 32               # 每次训练32个样本
			
 
				-    gamma = 0.95                  # 折扣因子（重视长期奖励）
			
 
				-    train_freq = 4                # 每4步训练一次
			
 
				-    target_update_interval = 2000 # 每2000步更新目标网络
			
 
				-    exploration_fraction = 0.3    # 前30%训练时间用于探索
			
 
				-    exploration_final_eps = 0.02  # 最终保留2%探索
			
 
				-```
			
 
				-
			
 
				-## 模拟环境（`DQN_env.py`）
			
 
				-
			
 
				-### UFSuperCycleEnv类
			
 
				-
			
 
				-```python
			
 
				-class UFSuperCycleEnv(gym.Env):
			
 
				-    def reset(self):
			
 
				-        # 重置环境：随机初始TMP
			
 
				-        self.TMP0 = random.uniform(0.01, 0.03)
			
 
				-        return self._get_obs()
			
 
				-    
			
 
				-    def step(self, action):
			
 
				-        # 执行动作
			
 
				-        L_s, t_bw_s = self._decode_action(action)
			
 
				-        
			
 
				-        # 调用模拟器
			
 
				-        feasible, info = simulate_one_supercycle(self.TMP0, L_s, t_bw_s)
			
 
				-        
			
 
				-        if feasible:
			
 
				-            reward = _score(info)  # 计算奖励
			
 
				-            self.TMP0 = info["TMP_after_ceb"]  # 更新TMP
			
 
				-            done = False
			
 
				-        else:
			
 
				-            reward = -20  # 违反约束，大负奖励
			
 
				-            done = True   # episode终止
			
 
				-        
			
 
				-        return next_state, reward, done, info
			
 
				-```
			
 
				-
			
 
				-### 约束检查
			
 
				-
			
 
				-```python
			
 
				-# 硬约束1：TMP峰值不得超过0.06 MPa
			
 
				-if TMP_peak > 0.06:
			
 
				-    return False
			
 
				-
			
 
				-# 硬约束2：单次残余增量不得超过0.001 MPa
			
 
				-if (TMP_after_bw - TMP0) > 0.001:
			
 
				-    return False
			
 
				-
			
 
				-# 硬约束3：TMP不得超过上限的98%
			
 
				-if TMP_peak / TMP_max > 0.98:
			
 
				-    return False
			
 
				-```
			
 
				-
			
 
				-### 物理模型集成
			
 
				-
			
 
				-```python
			
 
				-# TMP增长模型（uf_fp.pth）
			
 
				-def _delta_tmp(L_h):
			
 
				-    return model_fp(params, L_h)  # 产水时长 → TMP增量
			
 
				-
			
 
				-# 反洗恢复模型（uf_bw.pth）
			
 
				-def phi_bw_of(L_s, t_bw_s):
			
 
				-    return model_bw(params, L_s, t_bw_s)  # (产水时长, 反洗时长) → 恢复比例
			
 
				-```
			
 
				-
			
 
				-这两个模型是基于数据拟合或物理建模得到的。
			
 
				-
			
 
				-## 决策使用（`DQN_decide.py`）
			
 
				-
			
 
				-### 单步决策接口
			
 
				-
			
 
				-```python
			
 
				-def run_uf_DQN_decide(uf_params, TMP0_value):
			
 
				-    # 1. 创建环境
			
 
				-    env = UFSuperCycleEnv(uf_params)
			
 
				-    env.current_params.TMP0 = TMP0_value  # 设置当前TMP
			
 
				-    
			
 
				-    # 2. 加载训练好的模型
			
 
				-    model = DQN.load("dqn_model.zip")
			
 
				-    
			
 
				-    # 3. 预测动作（确定性，不探索）
			
 
				-    action, _ = model.predict(state, deterministic=True)
			
 
				-    
			
 
				-    # 4. 解码动作
			
 
				-    L_s, t_bw_s = decode_action(action)
			
 
				-    
			
 
				-    return {
			
 
				-        "action": action,
			
 
				-        "L_s": L_s,
			
 
				-        "t_bw_s": t_bw_s,
			
 
				-        "expected_recovery": info["recovery"],
			
 
				-        ...
			
 
				-    }
			
 
				-```
			
 
				-
			
 
				-### PLC指令生成
			
 
				-
			
 
				-为了避免频繁大幅调整（工艺稳定性），使用**渐进式调整**：
			
 
				-
			
 
				-```python
			
 
				-def generate_plc_instructions(current, model_prev, model_current):
			
 
				-    # 计算差异
			
 
				-    diff = model_current - effective_current
			
 
				-    
			
 
				-    # 渐进调整：每次只调整一个步长
			
 
				-    if abs(diff) >= threshold:
			
 
				-        adjustment = +step_size if diff > 0 else -step_size
			
 
				-    else:
			
 
				-        adjustment = 0
			
 
				-    
			
 
				-    next_value = effective_current + adjustment
			
 
				-    return next_value
			
 
				-```
			
 
				-
			
 
				-**示例**：
			
 
				-```
			
 
				-当前L_s = 4000秒
			
 
				-模型建议 = 4300秒
			
 
				-步长 = 60秒
			
 
				-
			
 
				-第1轮下发：4060秒（+60）
			
 
				-第2轮下发：4120秒（+60）
			
 
				-...
			
 
				-第5轮下发：4300秒（到达目标）
			
 
				-```
			
 
				-
			
 
				-## 性能指标计算（`DQN_decide.py`）
			
 
				-
			
 
				-```python
			
 
				-def calc_uf_cycle_metrics(TMP0, L_s, t_bw_s):
			
 
				-    # 模拟一个超级周期
			
 
				-    feasible, info = simulate_one_supercycle(params, L_s, t_bw_s)
			
 
				-    
			
 
				-    return {
			
 
				-        "k_bw_per_ceb": 小周期次数,
			
 
				-        "recovery": 回收率,
			
 
				-        "net_delivery_rate_m3ph": 净供水率（m³/h）,
			
 
				-        "daily_prod_time_h": 日均产水时间（h/天）,
			
 
				-        "ton_water_energy_kWh_per_m3": 吨水电耗（kWh/m³）,
			
 
				-        "max_permeability": 最高渗透率（lmh/bar）
			
 
				-    }
			
 
				-```
			
 
				-
			
 
				-## 文件结构说明
			
 
				-
			
 
				-```
			
 
				-uf-rl/
			
 
				-├── DQN_train.py         # 强化学习训练脚本（DQN算法）
			
 
				-├── DQN_env.py           # 模拟环境（MDP定义、物理模拟）
			
 
				-├── DQN_decide.py        # 决策接口（加载模型、生成指令）
			
 
				-├── UF_decide.py         # 传统优化方法（网格搜索，用于对比）
			
 
				-├── UF_models.py         # 物理模型定义（TMP增长、反洗恢复）
			
 
				-├── uf_fp.pth            # TMP增长模型权重
			
 
				-├── uf_bw.pth            # 反洗恢复模型权重
			
 
				-└── dqn_model.zip        # 训练好的DQN模型
			
 
				-```
			
 
				-
			
 
				-## 训练流程总结
			
 
				-
			
 
				-```mermaid
			
 
				-graph LR
			
 
				-    A[初始化环境] --> B[随机初始TMP]
			
 
				-    B --> C{ε-贪心选择动作}
			
 
				-    C -->|探索| D[随机动作]
			
 
				-    C -->|利用| E[Q值最大动作]
			
 
				-    D --> F[模拟执行]
			
 
				-    E --> F
			
 
				-    F --> G{约束检查}
			
 
				-    G -->|可行| H[计算奖励]
			
 
				-    G -->|不可行| I[负奖励-20]
			
 
				-    H --> J[存入经验池]
			
 
				-    I --> J
			
 
				-    J --> K{达到学习步数?}
			
 
				-    K -->|是| L[采样训练]
			
 
				-    K -->|否| M[继续交互]
			
 
				-    L --> N{episode结束?}
			
 
				-    M --> N
			
 
				-    N -->|否| C
			
 
				-    N -->|是| B
			
 
				-```
			
 
				-
			
 
				-## 与传统方法对比
			
 
				-
			
 
				-### 传统网格搜索（`UF_decide.py`）
			
 
				-
			
 
				-```python
			
 
				-# 穷举所有(L_s, t_bw_s)组合
			
 
				-for L_s in [3600, 3660, ..., 4200]:
			
 
				-    for t_bw_s in [90, 92, ..., 100]:
			
 
				-        feasible, metrics = simulate(L_s, t_bw_s)
			
 
				-        if feasible and score > best_score:
			
 
				-            best = (L_s, t_bw_s)
			
 
				-```
			
 
				-
			
 
				-**优点**：简单、可解释、保证找到网格上的最优解  
			
 
				-**缺点**：
			
 
				-- 计算量大（数百次模拟）
			
 
				-- 参数空间离散化（可能错过真正最优点）
			
 
				-- 无法泛化（每个TMP都要重新搜索）
			
 
				-
			
 
				-### 强化学习（DQN）
			
 
				-
			
 
				-**优点**：
			
 
				-- 训练后推理快（一次前向传播）
			
 
				-- 能泛化到不同TMP（学到状态-动作映射）
			
 
				-- 可处理更复杂的状态（如历史趋势）
			
 
				-
			
 
				-**缺点**：
			
 
				-- 训练耗时（需要大量交互）
			
 
				-- 黑盒性（难以解释为何选择某动作）
			
 
				-- 性能受模拟器精度影响
			
 
				-
			
 
				-## 训练建议
			
 
				-
			
 
				-### 提升策略性能
			
 
				-
			
 
				-1. **改进奖励设计**：
			
 
				-   ```python
			
 
				-   # 添加渗透率奖励
			
 
				-   reward += 0.1 × permeability
			
 
				-   
			
 
				-   # 添加稳定性奖励（动作变化小）
			
 
				-   reward -= 0.05 × |action - last_action|
			
 
				-   ```
			
 
				-
			
 
				-2. **增加状态信息**：
			
 
				-   ```python
			
 
				-   state = [
			
 
				-       TMP0, last_L, last_t_bw, max_TMP,
			
 
				-       water_quality,  # 水质指标
			
 
				-       days_since_ceb, # 距上次CEB天数
			
 
				-       ...
			
 
				-   ]
			
 
				-   ```
			
 
				-
			
 
				-3. **课程学习（Curriculum Learning）**：
			
 
				-   ```python
			
 
				-   # 阶段1：简单场景（TMP变化小）
			
 
				-   env.TMP_range = [0.025, 0.035]
			
 
				-   train(10000 steps)
			
 
				-   
			
 
				-   # 阶段2：中等场景
			
 
				-   env.TMP_range = [0.01, 0.04]
			
 
				-   train(20000 steps)
			
 
				-   
			
 
				-   # 阶段3：困难场景（全范围）
			
 
				-   env.TMP_range = [0.01, 0.05]
			
 
				-   train(20000 steps)
			
 
				-   ```
			
 
				-
			
 
				-### 加速训练
			
 
				-
			
 
				-```python
			
 
				-# 1. 减少训练步数
			
 
				-total_timesteps = 10000  # 从50000降到10000
			
 
				-
			
 
				-# 2. 增大batch_size（如果内存足够）
			
 
				-batch_size = 64
			
 
				-
			
 
				-# 3. 调高learning_rate（小心不稳定）
			
 
				-learning_rate = 5e-4
			
 
				-
			
 
				-# 4. 预训练：从传统方法生成初始数据
			
 
				-buffer.load_from_grid_search()
			
 
				-```
			
 
				-
			
 
				-## 常见问题
			
 
				-
			
 
				-**Q：为什么用强化学习而不是监督学习？**  
			
 
				-A：监督学习需要"正确答案"标签，但这里没有标准答案（最优策略本身就是要学习的）。强化学习通过奖励信号自己探索最优策略。
			
 
				-
			
 
				-**Q：模拟器不准确怎么办？**  
			
 
				-A：这是强化学习最大风险。解决方法：
			
 
				-- 用真实数据校准模拟器
			
 
				-- Sim-to-Real迁移（在真实系统上微调）
			
 
				-- 保守策略（加大安全裕度）
			
 
				-
			
 
				-**Q：能否用于在线学习？**  
			
 
				-A：可以，但需谨慎：
			
 
				-- 设置安全约束（避免危险动作）
			
 
				-- 分阶段部署（先离线验证）
			
 
				-- 人工监督（关键决策需人工确认）
			
 
				-
			
 
				-**Q：为什么动作空间是离散的？**  
			
 
				-A：DQN擅长离散动作（每个动作一个Q值）。如果需要连续动作，可用DDPG、SAC等算法。
			
 
				-
			
 
				-**Q：如何评估策略好坏？**  
			
 
				-A：
			
 
				-- 离线：在验证集上计算平均回收率、净供水率
			
 
				-- 在线：实际运行后对比历史数据
			
 
				-- 对比基线：与传统固定参数、网格搜索比较
			
 
				-
			
 
				-## 未来优化方向
			
 
				-
			
 
				-1. **多智能体协同**：多个UF模组联合优化
			
 
				-2. **分层强化学习**：高层决策策略，低层决策参数
			
 
				-3. **模型预测控制（MPC）集成**：结合物理模型和学习策略
			
 
				-4. **安全强化学习**：硬约束保证（Safety RL）
			
 
				-5. **离线强化学习**：仅用历史数据训练（Offline RL）
			
 
				-
			
 
				-## 总结
			
 
				-
			
 
				-UF-RL模型是一个**决策优化系统**，通过深度强化学习学习在不同跨膜压差下的最优运行策略。相比传统方法：
			
 
				-- **更智能**：能适应不同状态，无需人工调参
			
 
				-- **更高效**：训练后推理快速
			
 
				-- **更全面**：平衡多个矛盾目标
			
 
				-
			
 
				-但同时也需要：
			
 
				-- **准确的模拟器**：保证学到的策略有效
			
 
				-- **充分的训练**：探索足够多的状态-动作组合
			
 
				-- **谨慎的部署**：实际应用前充分验证
			
 
				-
			
--- a/models/uf-rl/UF_decide.py
+++ b/models/uf-rl/UF_decide.py
@@ -1,405 +0,0 @@
 
				-# UF_decide.py
			
 
				-from dataclasses import dataclass
			
 
				-import numpy as np
			
 
				-
			
 
				-@dataclass
			
 
				-class UFParams:
			
 
				-    # —— 膜与运行参数 ——
			
 
				-    q_UF: float = 360.0           # 过滤进水流量（m^3/h）
			
 
				-    TMP0: float = 0.03            # 初始TMP（MPa）
			
 
				-    TMP_max: float = 0.06         # TMP硬上限（MPa）
			
 
				-
			
 
				-    # —— 膜污染动力学 ——
			
 
				-    alpha: float = 1e-6           # TMP增长系数
			
 
				-    belta: float = 1.1            # 幂指数
			
 
				-
			
 
				-    # —— 反洗参数（固定） ——
			
 
				-    q_bw_m3ph: float = 1000.0     # 物理反洗流量（m^3/h）
			
 
				-
			
 
				-    # —— CEB参数（固定） ——
			
 
				-    T_ceb_interval_h: float = 48.0  # 固定每 k 小时做一次CEB
			
 
				-    v_ceb_m3: float = 30.0        # CEB用水体积（m^3）
			
 
				-    t_ceb_s: float = 40 * 60.0    # CEB时长（s）
			
 
				-    phi_ceb: float = 1.0          # CEB去除比例（简化：完全恢复到TMP0）
			
 
				-
			
 
				-    # —— 约束与收敛 ——
			
 
				-    dTMP: float = 0.0005          # 单次产水结束时，相对TMP0最大升幅（MPa）
			
 
				-
			
 
				-    # —— 搜索范围（秒） ——
			
 
				-    L_min_s: float = 3600.0       # 过滤时长下限（s）
			
 
				-    L_max_s: float = 4200.0       # 过滤时长上限（s）
			
 
				-    t_bw_min_s: float = 40.0      # 物洗时长下限（s）
			
 
				-    t_bw_max_s: float = 60.0      # 物洗时长上限（s）
			
 
				-
			
 
				-    # —— 物理反洗恢复函数参数 ——
			
 
				-    phi_bw_min: float = 0.7       # 物洗去除比例最小值
			
 
				-    phi_bw_max: float = 1.0       # 物洗去除比例最大值
			
 
				-    L_ref_s: float = 4000.0       # 过滤时长影响时间尺度
			
 
				-    tau_bw_s: float = 30.0        # 物洗时长影响时间尺度
			
 
				-    gamma_t: float = 1.0          # 物洗时长作用指数
			
 
				-    
			
 
				-    # —— 网格 ——
			
 
				-    L_step_s: float = 60.0        # 过滤时长步长（s）
			
 
				-    t_bw_step_s: float = 5.0      # 物洗时长步长（s）
			
 
				-
			
 
				-    # 多目标加权及高TMP惩罚
			
 
				-    w_rec: float = 0.8            # 回收率权重
			
 
				-    w_rate: float = 0.2           # 净供水率权重
			
 
				-    w_headroom: float = 0.3       # 贴边惩罚权重
			
 
				-    r_headroom: float = 2.0       # 贴边惩罚幂次
			
 
				-    headroom_hardcap: float = 0.98 # 超过此比例直接视为不可取
			
 
				-
			
 
				-def _delta_tmp(p: UFParams, L_h: float) -> float:
			
 
				-    # 过滤时段TMP上升量
			
 
				-    return float(p.alpha * (p.q_UF ** p.belta) * L_h)
			
 
				-
			
 
				-def _v_bw_m3(p: UFParams, t_bw_s: float) -> float:
			
 
				-    # 物理反洗水耗
			
 
				-    return float(p.q_bw_m3ph * (float(t_bw_s) / 3600.0))
			
 
				-
			
 
				-def phi_bw_of(p: UFParams, L_s: float, t_bw_s: float) -> float:
			
 
				-    # 物洗去除比例：随过滤时长增长上界收缩，随物洗时长增长趋饱和
			
 
				-    L = max(float(L_s), 1.0)
			
 
				-    t = max(float(t_bw_s), 1e-6)
			
 
				-    upper_L = p.phi_bw_min + (p.phi_bw_max - p.phi_bw_min) * np.exp(- L / p.L_ref_s)
			
 
				-    time_gain = 1.0 - np.exp(- (t / p.tau_bw_s) ** p.gamma_t)
			
 
				-    phi = upper_L * time_gain
			
 
				-    return float(np.clip(phi, 0.0, 0.999))
			
 
				-
			
 
				-def simulate_one_supercycle(p: UFParams, L_s: float, t_bw_s: float):
			
 
				-    """
			
 
				-    返回 (是否可行, 指标字典)
			
 
				-    - 支持动态CEB次数：48h固定间隔
			
 
				-    - 增加日均产水时间和吨水电耗
			
 
				-    """
			
 
				-    L_h = float(L_s) / 3600.0  # 小周期过滤时间(h)
			
 
				-
			
 
				-    tmp = p.TMP0
			
 
				-    max_tmp_during_filtration = tmp
			
 
				-    max_residual_increase = 0.0
			
 
				-
			
 
				-    # 小周期总时长(h)
			
 
				-    t_small_cycle_h = (L_s + t_bw_s) / 3600.0
			
 
				-
			
 
				-    # 计算超级周期内CEB次数
			
 
				-    k_bw_per_ceb = int(np.floor(p.T_ceb_interval_h / t_small_cycle_h))
			
 
				-    if k_bw_per_ceb < 1:
			
 
				-        k_bw_per_ceb = 1  # 至少一个小周期
			
 
				-
			
 
				-    # ton水电耗查表
			
 
				-    energy_lookup = {
			
 
				-        3600: 0.1034, 3660: 0.1031, 3720: 0.1029, 3780: 0.1026,
			
 
				-        3840: 0.1023, 3900: 0.1021, 3960: 0.1019, 4020: 0.1017,
			
 
				-        4080: 0.1015, 4140: 0.1012, 4200: 0.1011
			
 
				-    }
			
 
				-
			
 
				-    for _ in range(k_bw_per_ceb):
			
 
				-        tmp_run_start = tmp
			
 
				-
			
 
				-        # 过滤阶段TMP增长
			
 
				-        dtmp = _delta_tmp(p, L_h)
			
 
				-        tmp_peak = tmp_run_start + dtmp
			
 
				-
			
 
				-        # 约束1：峰值不得超过硬上限
			
 
				-        if tmp_peak > p.TMP_max + 1e-12:
			
 
				-            return False, {"reason": "TMP_max violated during filtration", "TMP_peak": tmp_peak}
			
 
				-
			
 
				-        if tmp_peak > max_tmp_during_filtration:
			
 
				-            max_tmp_during_filtration = tmp_peak
			
 
				-
			
 
				-        # 物理反洗
			
 
				-        phi = phi_bw_of(p, L_s, t_bw_s)
			
 
				-        tmp_after_bw = tmp_peak - phi * (tmp_peak - tmp_run_start)
			
 
				-
			
 
				-        # 约束2：单次残余增量控制
			
 
				-        residual_inc = tmp_after_bw - tmp_run_start
			
 
				-        if residual_inc > p.dTMP + 1e-12:
			
 
				-            return False, {
			
 
				-                "reason": "residual TMP increase after BW exceeded dTMP",
			
 
				-                "residual_increase": residual_inc,
			
 
				-                "limit_dTMP": p.dTMP
			
 
				-            }
			
 
				-        if residual_inc > max_residual_increase:
			
 
				-            max_residual_increase = residual_inc
			
 
				-
			
 
				-        tmp = tmp_after_bw
			
 
				-
			
 
				-    # CEB
			
 
				-    tmp_after_ceb = p.TMP0
			
 
				-
			
 
				-    # 体积与回收率
			
 
				-    V_feed_super = k_bw_per_ceb * p.q_UF * L_h
			
 
				-    V_loss_super = k_bw_per_ceb * _v_bw_m3(p, t_bw_s) + p.v_ceb_m3
			
 
				-    V_net = max(0.0, V_feed_super - V_loss_super)
			
 
				-    recovery = max(0.0, V_net / max(V_feed_super, 1e-12))
			
 
				-
			
 
				-    # 时间与净供水率
			
 
				-    T_super_h = k_bw_per_ceb * (L_s + t_bw_s) / 3600.0 + p.t_ceb_s / 3600.0
			
 
				-    net_delivery_rate_m3ph = V_net / max(T_super_h, 1e-12)
			
 
				-
			
 
				-    # 贴边比例与硬限
			
 
				-    headroom_ratio = max_tmp_during_filtration / max(p.TMP_max, 1e-12)
			
 
				-    if headroom_ratio > p.headroom_hardcap + 1e-12:
			
 
				-        return False, {"reason": "headroom hardcap exceeded", "headroom_ratio": headroom_ratio}
			
 
				-
			
 
				-    # —— 新增指标 1：日均产水时间（h/d） ——
			
 
				-    daily_prod_time_h = k_bw_per_ceb * L_h / T_super_h * 24.0
			
 
				-
			
 
				-    # —— 新增指标 2：吨水电耗（kWh/m³） ——
			
 
				-    closest_L = min(energy_lookup.keys(), key=lambda x: abs(x - L_s))
			
 
				-    ton_water_energy = energy_lookup[closest_L]
			
 
				-
			
 
				-    info = {
			
 
				-        "recovery": recovery,
			
 
				-        "V_feed_super_m3": V_feed_super,
			
 
				-        "V_loss_super_m3": V_loss_super,
			
 
				-        "V_net_super_m3": V_net,
			
 
				-        "supercycle_time_h": T_super_h,
			
 
				-        "net_delivery_rate_m3ph": net_delivery_rate_m3ph,
			
 
				-        "max_TMP_during_filtration": max_tmp_during_filtration,
			
 
				-        "max_residual_increase_per_run": max_residual_increase,
			
 
				-        "phi_bw_effective": phi,
			
 
				-        "TMP_after_ceb": tmp_after_ceb,
			
 
				-        "headroom_ratio": headroom_ratio,
			
 
				-        "daily_prod_time_h": daily_prod_time_h,
			
 
				-        "ton_water_energy_kWh_per_m3": ton_water_energy,
			
 
				-        "k_bw_per_ceb": k_bw_per_ceb
			
 
				-    }
			
 
				-
			
 
				-    return True, info
			
 
				-
			
 
				-def _score(p: UFParams, rec: dict) -> float:
			
 
				-    """综合评分：越大越好。不同TMP0会改变max_TMP→改变惩罚→得到不同解。"""
			
 
				-    # 无量纲化净供水率
			
 
				-    rate_norm = rec["net_delivery_rate_m3ph"] / max(p.q_UF, 1e-12)
			
 
				-    headroom_penalty = (rec["max_TMP_during_filtration"] / max(p.TMP_max, 1e-12)) ** p.r_headroom
			
 
				-    return (p.w_rec * rec["recovery"]
			
 
				-            + p.w_rate * rate_norm
			
 
				-            - p.w_headroom * headroom_penalty)
			
 
				-
			
 
				-def optimize_2d(p: UFParams,
			
 
				-                L_min_s=None, L_max_s=None, L_step_s=None,
			
 
				-                t_bw_min_s=None, t_bw_max_s=None, t_bw_step_s=None):
			
 
				-    # 网格生成
			
 
				-    L_lo = p.L_min_s if L_min_s is None else float(L_min_s)
			
 
				-    L_hi = p.L_max_s if L_max_s is None else float(L_max_s)
			
 
				-    L_st = p.L_step_s if L_step_s is None else float(L_step_s)
			
 
				-
			
 
				-    t_lo = p.t_bw_min_s if t_bw_min_s is None else float(t_bw_min_s)
			
 
				-    t_hi = p.t_bw_max_s if t_bw_max_s is None else float(t_bw_max_s)
			
 
				-    t_st = p.t_bw_step_s if t_bw_step_s is None else float(t_bw_step_s)
			
 
				-
			
 
				-    L_vals = np.arange(L_lo, L_hi + 1e-9, L_st)
			
 
				-    t_vals = np.arange(t_lo, t_hi + 1e-9, t_st)
			
 
				-
			
 
				-    best = None
			
 
				-    best_score = -np.inf
			
 
				-
			
 
				-    for L_s in L_vals:
			
 
				-        for t_bw_s in t_vals:
			
 
				-            feasible, info = simulate_one_supercycle(p, L_s, t_bw_s)
			
 
				-            if not feasible:
			
 
				-                continue
			
 
				-
			
 
				-            rec = {"L_s": float(L_s), "t_bw_s": float(t_bw_s)}
			
 
				-            rec.update(info)
			
 
				-
			
 
				-            score = _score(p, rec)
			
 
				-
			
 
				-            if score > best_score + 1e-14:
			
 
				-                best_score = score
			
 
				-                best = rec.copy()
			
 
				-                best["score"] = float(score)
			
 
				-            # 若分数相同，偏好回收率更高，再偏好净供水率更高
			
 
				-            elif abs(score - best_score) <= 1e-14:
			
 
				-                if (rec["recovery"] > best["recovery"] + 1e-12) or (
			
 
				-                    abs(rec["recovery"] - best["recovery"]) <= 1e-12 and
			
 
				-                    rec["net_delivery_rate_m3ph"] > best["net_delivery_rate_m3ph"] + 1e-12
			
 
				-                ):
			
 
				-                    best = rec.copy()
			
 
				-                    best["score"] = float(score)
			
 
				-
			
 
				-    if best is None:
			
 
				-        return {"status": "no-feasible-solution"}
			
 
				-    best["status"] = "feasible"
			
 
				-    return best
			
 
				-
			
 
				-def run_uf_decision(TMP0: float = None) -> dict:
			
 
				-    if TMP0 is None:
			
 
				-        rng = np.random.default_rng()
			
 
				-        TMP0 = rng.uniform(0.03, 0.04)  # 初始TMP随机
			
 
				-
			
 
				-    params = UFParams(
			
 
				-        q_UF=360.0,
			
 
				-        TMP_max=0.05,
			
 
				-        alpha=1.2e-6,
			
 
				-        belta=1.0,
			
 
				-        q_bw_m3ph=1000.0,
			
 
				-        T_ceb_interval_h=48,
			
 
				-        v_ceb_m3=30.0,
			
 
				-        t_ceb_s=40*60.0,
			
 
				-        phi_ceb=1.0,
			
 
				-        dTMP=0.001,
			
 
				-
			
 
				-        L_min_s=3600.0, L_max_s=4200.0, L_step_s=30.0,
			
 
				-        t_bw_min_s=90.0, t_bw_max_s=100.0, t_bw_step_s=2.0,
			
 
				-
			
 
				-        phi_bw_min=0.70, phi_bw_max=1.00,
			
 
				-        L_ref_s=500.0, tau_bw_s=40.0, gamma_t=1.0,
			
 
				-
			
 
				-        TMP0=TMP0,
			
 
				-
			
 
				-        w_rec=0.7, w_rate=0.3, w_headroom=0.3, r_headroom=2.0, headroom_hardcap=0.9
			
 
				-    )
			
 
				-
			
 
				-    result = optimize_2d(params)
			
 
				-    if result.get("status") == "feasible":
			
 
				-        return {
			
 
				-            "L_s": result["L_s"],
			
 
				-            "t_bw_s": result["t_bw_s"],
			
 
				-            "recovery": result["recovery"],
			
 
				-            "k_bw_per_ceb": result["k_bw_per_ceb"],
			
 
				-            "daily_prod_time_h": result["daily_prod_time_h"],
			
 
				-            "ton_water_energy_kWh_per_m3": result["ton_water_energy_kWh_per_m3"]
			
 
				-        }
			
 
				-
			
 
				-    # 若没有可行解，返回最小过滤时间和默认值
			
 
				-    return {
			
 
				-        "L_s": params.L_min_s,
			
 
				-        "t_bw_s": params.t_bw_min_s,
			
 
				-        "recovery": 0.0,
			
 
				-        "k_bw_per_ceb": 1,
			
 
				-        "daily_prod_time_h": 0.0,
			
 
				-        "ton_water_energy_kWh_per_m3": 0.0
			
 
				-    }
			
 
				-
			
 
				-
			
 
				-def generate_plc_instructions(current_L_s, current_t_bw_s, model_prev_L_s, model_prev_t_bw_s, model_L_s, model_t_bw_s):
			
 
				-    """
			
 
				-    根据工厂当前值、模型上一轮决策值和模型当前轮决策值，生成PLC指令。
			
 
				-
			
 
				-    新增功能：
			
 
				-    1. 处理None值情况：如果模型上一轮值为None，则使用工厂当前值；
			
 
				-       如果工厂当前值也为None，则返回None并提示错误。
			
 
				-    """
			
 
				-    # 参数配置保持不变
			
 
				-    params = UFParams(
			
 
				-        L_min_s=3600.0, L_max_s=6000.0, L_step_s=60.0,
			
 
				-        t_bw_min_s=40.0, t_bw_max_s=60.0, t_bw_step_s=5.0,
			
 
				-    )
			
 
				-
			
 
				-    # 参数解包
			
 
				-    L_step_s = params.L_step_s
			
 
				-    t_bw_step_s = params.t_bw_step_s
			
 
				-    L_min_s = params.L_min_s
			
 
				-    L_max_s = params.L_max_s
			
 
				-    t_bw_min_s = params.t_bw_min_s
			
 
				-    t_bw_max_s = params.t_bw_max_s
			
 
				-    adjustment_threshold = 1.0
			
 
				-
			
 
				-    # 处理None值情况
			
 
				-    if model_prev_L_s is None:
			
 
				-        if current_L_s is None:
			
 
				-            print("错误: 过滤时长的工厂当前值和模型上一轮值均为None")
			
 
				-            return None, None
			
 
				-        else:
			
 
				-            # 使用工厂当前值作为基准
			
 
				-            effective_current_L = current_L_s
			
 
				-            source_L = "工厂当前值(模型上一轮值为None)"
			
 
				-    else:
			
 
				-        # 模型上一轮值不为None，继续检查工厂当前值
			
 
				-        if current_L_s is None:
			
 
				-            effective_current_L = model_prev_L_s
			
 
				-            source_L = "模型上一轮值(工厂当前值为None)"
			
 
				-        else:
			
 
				-            # 两个值都不为None，比较哪个更接近模型当前建议值
			
 
				-            current_to_model_diff = abs(current_L_s - model_L_s)
			
 
				-            prev_to_model_diff = abs(model_prev_L_s - model_L_s)
			
 
				-
			
 
				-            if current_to_model_diff <= prev_to_model_diff:
			
 
				-                effective_current_L = current_L_s
			
 
				-                source_L = "工厂当前值"
			
 
				-            else:
			
 
				-                effective_current_L = model_prev_L_s
			
 
				-                source_L = "模型上一轮值"
			
 
				-
			
 
				-    # 对反洗时长进行同样的处理
			
 
				-    if model_prev_t_bw_s is None:
			
 
				-        if current_t_bw_s is None:
			
 
				-            print("错误: 反洗时长的工厂当前值和模型上一轮值均为None")
			
 
				-            return None, None
			
 
				-        else:
			
 
				-            effective_current_t_bw = current_t_bw_s
			
 
				-            source_t_bw = "工厂当前值(模型上一轮值为None)"
			
 
				-    else:
			
 
				-        if current_t_bw_s is None:
			
 
				-            effective_current_t_bw = model_prev_t_bw_s
			
 
				-            source_t_bw = "模型上一轮值(工厂当前值为None)"
			
 
				-        else:
			
 
				-            current_to_model_t_bw_diff = abs(current_t_bw_s - model_t_bw_s)
			
 
				-            prev_to_model_t_bw_diff = abs(model_prev_t_bw_s - model_t_bw_s)
			
 
				-
			
 
				-            if current_to_model_t_bw_diff <= prev_to_model_t_bw_diff:
			
 
				-                effective_current_t_bw = current_t_bw_s
			
 
				-                source_t_bw = "工厂当前值"
			
 
				-            else:
			
 
				-                effective_current_t_bw = model_prev_t_bw_s
			
 
				-                source_t_bw = "模型上一轮值"
			
 
				-
			
 
				-    # 检测所有输入值是否在规定范围内（只对非None值进行检查）
			
 
				-    # 工厂当前值检查（警告）
			
 
				-    if current_L_s is not None and not (L_min_s <= current_L_s <= L_max_s):
			
 
				-        print(f"警告: 当前过滤时长 {current_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
			
 
				-    if current_t_bw_s is not None and not (t_bw_min_s <= current_t_bw_s <= t_bw_max_s):
			
 
				-        print(f"警告: 当前反洗时长 {current_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
			
 
				-
			
 
				-    # 模型上一轮决策值检查（警告）
			
 
				-    if model_prev_L_s is not None and not (L_min_s <= model_prev_L_s <= L_max_s):
			
 
				-        print(f"警告: 模型上一轮过滤时长 {model_prev_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
			
 
				-    if model_prev_t_bw_s is not None and not (t_bw_min_s <= model_prev_t_bw_s <= t_bw_max_s):
			
 
				-        print(f"警告: 模型上一轮反洗时长 {model_prev_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
			
 
				-
			
 
				-    # 模型当前轮决策值检查（错误）
			
 
				-    if model_L_s is None:
			
 
				-        raise ValueError("错误: 决策模型建议的过滤时长不能为None")
			
 
				-    elif not (L_min_s <= model_L_s <= L_max_s):
			
 
				-        raise ValueError(f"错误: 决策模型建议的过滤时长 {model_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
			
 
				-
			
 
				-    if model_t_bw_s is None:
			
 
				-        raise ValueError("错误: 决策模型建议的反洗时长不能为None")
			
 
				-    elif not (t_bw_min_s <= model_t_bw_s <= t_bw_max_s):
			
 
				-        raise ValueError(f"错误: 决策模型建议的反洗时长 {model_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
			
 
				-
			
 
				-    print(f"过滤时长基准: {source_L}, 值: {effective_current_L}")
			
 
				-    print(f"反洗时长基准: {source_t_bw}, 值: {effective_current_t_bw}")
			
 
				-
			
 
				-    # 使用选定的基准值进行计算调整
			
 
				-    L_diff = model_L_s - effective_current_L
			
 
				-    L_adjustment = 0
			
 
				-    if abs(L_diff) > adjustment_threshold * L_step_s:
			
 
				-        if L_diff > 0:
			
 
				-            L_adjustment = L_step_s
			
 
				-        else:
			
 
				-            L_adjustment = -L_step_s
			
 
				-    next_L_s = effective_current_L + L_adjustment
			
 
				-
			
 
				-    t_bw_diff = model_t_bw_s - effective_current_t_bw
			
 
				-    t_bw_adjustment = 0
			
 
				-    if abs(t_bw_diff) > adjustment_threshold * t_bw_step_s:
			
 
				-        if t_bw_diff > 0:
			
 
				-            t_bw_adjustment = t_bw_step_s
			
 
				-        else:
			
 
				-            t_bw_adjustment = -t_bw_step_s
			
 
				-    next_t_bw_s = effective_current_t_bw + t_bw_adjustment
			
 
				-
			
 
				-    return next_L_s, next_t_bw_s
			
 
				-
			
 
				-
			
 
				-current_L_s = 3920
			
 
				-current_t_bw_s = 98
			
 
				-model_prev_L_s = None
			
 
				-model_prev_t_bw_s = None
			
 
				-model_L_s = 4160
			
 
				-model_t_bw_s = 96
			
 
				-next_L_s, next_t_bw_s = generate_plc_instructions(current_L_s, current_t_bw_s, model_prev_L_s, model_prev_t_bw_s, model_L_s, model_t_bw_s)
			
 
				-print(f"next_L_s={next_L_s}, next_t_bw_s={next_t_bw_s}")
			
--- a/models/uf-rl/UF_models.py
+++ b/models/uf-rl/UF_models.py
@@ -1,33 +0,0 @@
 
				-import torch
			
 
				-import numpy as np
			
 
				-
			
 
				-# TMP 上升量模型
			
 
				-class TMPIncreaseModel(torch.nn.Module):
			
 
				-    def __init__(self):
			
 
				-        super().__init__()
			
 
				-    def forward(self, p, L_h):
			
 
				-        return float(p.alpha * (p.q_UF ** p.belta) * L_h)
			
 
				-
			
 
				-# 反洗 TMP 去除模型
			
 
				-class TMPDecreaseModel(torch.nn.Module):
			
 
				-    def __init__(self):
			
 
				-        super().__init__()
			
 
				-    def forward(self, p, L_s, t_bw_s):
			
 
				-        L = max(float(L_s), 1.0)
			
 
				-        t = max(float(t_bw_s), 1e-6)
			
 
				-        upper_L = p.phi_bw_min + (p.phi_bw_max - p.phi_bw_min) * np.exp(- L / p.L_ref_s)
			
 
				-        time_gain = 1.0 - np.exp(- (t / p.tau_bw_s) ** p.gamma_t)
			
 
				-        phi = upper_L * time_gain
			
 
				-        return float(np.clip(phi, 0.0, 0.999))
			
 
				-
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    model_fp = TMPIncreaseModel()
			
 
				-    model_bw = TMPDecreaseModel()
			
 
				-
			
 
				-
			
 
				-    torch.save(model_fp.state_dict(), "uf_fp.pth")
			
 
				-    torch.save(model_bw.state_dict(), "uf_bw.pth")
			
 
				-
			
 
				-
			
 
				-    print("模型已安全保存为 uf_fp.pth、uf_bw.pth")
			
--- a/models/uf-rl/resistance_model_bw.pth
+++ b/models/uf-rl/resistance_model_bw.pth
--- a/models/uf-rl/resistance_model_fp.pth
+++ b/models/uf-rl/resistance_model_fp.pth
--- a/models/uf-rl/uf_bw.pth
+++ b/models/uf-rl/uf_bw.pth
--- a/models/uf-rl/uf_fp.pth
+++ b/models/uf-rl/uf_fp.pth
--- a/models/uf-rl/uf_resistance_models.py
+++ b/models/uf-rl/uf_resistance_models.py
@@ -1,61 +0,0 @@
 
				-import torch
			
 
				-import numpy as np
			
 
				-
			
 
				-# ===== 膜阻力上升模型 =====
			
 
				-class ResistanceIncreaseModel(torch.nn.Module):
			
 
				-    def __init__(self):
			
 
				-        super().__init__()
			
 
				-
			
 
				-    def forward(self, p, L_s):
			
 
				-        """
			
 
				-        计算膜阻力上升量 ΔR
			
 
				-        """
			
 
				-        A = 128 * 40.0
			
 
				-        J = p.q_UF / A / 3600
			
 
				-        # 膜阻力上升模型(已缩放)
			
 
				-        dR = p.nuK * J * L_s
			
 
				-        return float(dR)
			
 
				-
			
 
				-
			
 
				-# ===== 膜阻力下降模型 =====
			
 
				-class ResistanceDecreaseModel(torch.nn.Module):
			
 
				-    def __init__(self):
			
 
				-        super().__init__()
			
 
				-
			
 
				-    def forward(self, p, R0, R_end, L_h_start, L_h_next_start, t_bw_s):
			
 
				-        """
			
 
				-        计算物理反冲洗污染去除比例（受反洗时间影响），最大可去除的可逆膜阻力（受过滤时间影响）
			
 
				-        """
			
 
				-
			
 
				-        # 计算单次不可逆膜阻力（线性依赖于进水时间）
			
 
				-        # 周期起点和下次起点的理论阻力
			
 
				-        R_start = R0 + p.slope * (L_h_start ** p.power)
			
 
				-        R_next_start = R0 + p.slope * (L_h_next_start ** p.power)
			
 
				-
			
 
				-        # 不可逆污染（反洗后残余增加量）
			
 
				-        irreversible_R = max(R_next_start - R_start, 0.0)
			
 
				-
			
 
				-        # 本周期的总污染增长量
			
 
				-        total_increase = max(R_end - R_start, 0.0)
			
 
				-
			
 
				-        # 可逆污染量 = 本周期总增长 - 不可逆残留
			
 
				-        reversible_R = max(total_increase - irreversible_R, 0.0)
			
 
				-
			
 
				-        # 时间因子：反洗时间越长，效果越充分
			
 
				-        time_gain = 1.0 - np.exp(- (t_bw_s / p.tau_bw_s))
			
 
				-
			
 
				-        # 实际去除的膜阻力（随机在可去除区间内，乘以时间因子）
			
 
				-        dR_bw = reversible_R * time_gain
			
 
				-
			
 
				-        return float(np.clip(dR_bw, 0.0, reversible_R))
			
 
				-
			
 
				-
			
 
				-# ===== 主程序 =====
			
 
				-if __name__ == "__main__":
			
 
				-    model_fp = ResistanceIncreaseModel()
			
 
				-    model_bw = ResistanceDecreaseModel()
			
 
				-
			
 
				-    torch.save(model_fp.state_dict(), "resistance_model_fp.pth")
			
 
				-    torch.save(model_bw.state_dict(), "resistance_model_bw.pth")
			
 
				-
			
 
				-    print("模型已安全保存为 resistance_model_fp.pth、resistance_model_bw.pth")