Эх сурвалжийг харах

1:拆分超滤训练和开发版本

wmy 5 сар өмнө
parent
commit
d869d1eb00

+ 1 - 0
models/pressure-predictor/gat-lstm_model/20min/__init__.py

@@ -13,3 +13,4 @@ from .predict import Predictor
 
 __all__ = ['Predictor']
 
+

+ 2 - 2
models/pressure-predictor/gat-lstm_model/20min/predict.py

@@ -40,7 +40,7 @@ except ImportError:
     import time
     
     def setup_logger(name, level='INFO', log_file=None, format_type='colored', max_bytes=10485760, backup_count=5):
-        """简化版logger设置"""
+        """logger设置"""
         logger = logging.getLogger(name)
         logger.setLevel(getattr(logging, level))
         
@@ -80,7 +80,7 @@ except ImportError:
         return wrapper
     
     class Config:
-        """简化版配置类"""
+        """配置类"""
         def __init__(self, config_file):
             with open(config_file, 'r', encoding='utf-8') as f:
                 self.config = yaml.safe_load(f)

+ 1 - 0
models/pressure-predictor/gat-lstm_model/90day/__init__.py

@@ -13,3 +13,4 @@ from .predict import Predictor
 
 __all__ = ['Predictor']
 
+

+ 14 - 13
models/pressure-predictor/gat-lstm_model/api_main.py

@@ -27,19 +27,20 @@ os.makedirs(log_dir, exist_ok=True)
 data_save_dir = os.path.join(base_dir, 'received_data')
 os.makedirs(data_save_dir, exist_ok=True)
 
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(levelname)s - %(message)s',
-    handlers=[
-        RotatingFileHandler(
-            os.path.join(log_dir, "api.log"),
-            maxBytes=2 * 1024 * 1024,
-            backupCount=5,
-            encoding='utf-8'
-        ),
-        logging.StreamHandler()
-    ]
-)
+if not logging.getLogger().handlers:  # 只在无handler时配置
+    logging.basicConfig(
+        level=logging.INFO,
+        format='%(asctime)s - %(levelname)s - %(message)s',
+        handlers=[
+            RotatingFileHandler(
+                os.path.join(log_dir, "api.log"),
+                maxBytes=2 * 1024 * 1024,
+                backupCount=5,
+                encoding='utf-8'
+            ),
+            logging.StreamHandler()
+        ]
+    )
 logger = logging.getLogger(__name__)
 
 # --- 添加当前目录到Python路径 ---

+ 1 - 0
models/pressure-predictor/gat-lstm_model/requirements.txt

@@ -31,3 +31,4 @@ pyyaml>=6.0
 
 
 
+

+ 1 - 0
models/pressure-predictor/gat-lstm_model/shared/__init__.py

@@ -10,3 +10,4 @@
 
 __version__ = '1.0.0'
 
+

+ 1 - 0
models/pressure-predictor/gat-lstm_model/shared/args.py

@@ -58,3 +58,4 @@ def lstm_args_parser():
     
     return args
 
+

+ 1 - 0
models/pressure-predictor/gat-lstm_model/shared/data_preprocessor.py

@@ -307,3 +307,4 @@ class DataPreprocessor:
     
         return data_loader
 
+

+ 1 - 0
models/pressure-predictor/gat-lstm_model/shared/data_trainer.py

@@ -265,3 +265,4 @@ class Trainer:
         
         return r2_scores, rmse_scores, mape_scores
 
+

+ 1 - 0
models/pressure-predictor/gat-lstm_model/shared/gat_lstm.py

@@ -101,3 +101,4 @@ class GAT_LSTM(nn.Module):
             outputs.append(model(x))  # 每个输出为[batch, output_size]
         return torch.cat(outputs, dim=1)  # 拼接后[batch, output_size * labels_num]
 
+

+ 0 - 246
models/uf-rl/DQN_decide.py

@@ -1,246 +0,0 @@
-import numpy as np
-from stable_baselines3 import DQN
-from UF_super_RL.DQN_env import UFSuperCycleEnv
-from UF_super_RL.DQN_env import UFParams
-
-# 模型路径
-MODEL_PATH = "dqn_model.zip"
-
-# 加载模型(只加载一次,提高效率)
-model = DQN.load(MODEL_PATH)
-
-def run_uf_DQN_decide(uf_params, TMP0_value: float):
-    """
-    单步决策函数:输入原始 TMP0,预测并执行动作
-
-    参数:
-        TMP0_value (float): 当前 TMP0 值(单位与环境一致)
-
-    返回:
-        dict: 包含模型选择的动作、动作参数、新状态、奖励等
-    """
-    # 1. 实例化环境
-    base_params = uf_params
-    env = UFSuperCycleEnv(base_params)
-
-    # 2. 将输入的 TMP0 写入环境
-    env.current_params.TMP0 = TMP0_value
-
-    # 3. 获取归一化状态
-    obs = env._get_obs().reshape(1, -1)
-
-    # 4. 模型预测动作
-    action, _ = model.predict(obs, deterministic=True)
-
-    # 5. 解析动作对应的 L_s 和 t_bw_s
-    L_s, t_bw_s = env._get_action_values(action[0])
-
-    # 6. 在环境中执行该动作
-    next_obs, reward, terminated, truncated, info = env.step(action[0])
-
-    # 7. 整理结果
-    result = {
-        "action": int(action[0]),
-        "L_s": float(L_s),
-        "t_bw_s": float(t_bw_s),
-        "next_obs": next_obs,
-        "reward": reward,
-        "terminated": terminated,
-        "truncated": truncated,
-        "info": info
-    }
-
-    # 8. 关闭环境
-    env.close()
-
-    return result
-
-def generate_plc_instructions(current_L_s, current_t_bw_s, model_prev_L_s, model_prev_t_bw_s, model_L_s, model_t_bw_s):
-    """
-    根据工厂当前值、模型上一轮决策值和模型当前轮决策值,生成PLC指令。
-
-    新增功能:
-    1. 处理None值情况:如果模型上一轮值为None,则使用工厂当前值;
-       如果工厂当前值也为None,则返回None并提示错误。
-    """
-    # 参数配置保持不变
-    params = UFParams(
-        L_min_s=3600.0, L_max_s=6000.0, L_step_s=60.0,
-        t_bw_min_s=40.0, t_bw_max_s=60.0, t_bw_step_s=5.0,
-    )
-
-    # 参数解包
-    L_step_s = params.L_step_s
-    t_bw_step_s = params.t_bw_step_s
-    L_min_s = params.L_min_s
-    L_max_s = params.L_max_s
-    t_bw_min_s = params.t_bw_min_s
-    t_bw_max_s = params.t_bw_max_s
-    adjustment_threshold = 1.0
-
-    # 处理None值情况
-    if model_prev_L_s is None:
-        if current_L_s is None:
-            print("错误: 过滤时长的工厂当前值和模型上一轮值均为None")
-            return None, None
-        else:
-            # 使用工厂当前值作为基准
-            effective_current_L = current_L_s
-            source_L = "工厂当前值(模型上一轮值为None)"
-    else:
-        # 模型上一轮值不为None,继续检查工厂当前值
-        if current_L_s is None:
-            effective_current_L = model_prev_L_s
-            source_L = "模型上一轮值(工厂当前值为None)"
-        else:
-            effective_current_L = model_prev_L_s
-            source_L = "模型上一轮值"
-
-    # 对反洗时长进行同样的处理
-    if model_prev_t_bw_s is None:
-        if current_t_bw_s is None:
-            print("错误: 反洗时长的工厂当前值和模型上一轮值均为None")
-            return None, None
-        else:
-            effective_current_t_bw = current_t_bw_s
-            source_t_bw = "工厂当前值(模型上一轮值为None)"
-    else:
-        if current_t_bw_s is None:
-            effective_current_t_bw = model_prev_t_bw_s
-            source_t_bw = "模型上一轮值(工厂当前值为None)"
-        else:
-            effective_current_t_bw = model_prev_t_bw_s
-            source_t_bw = "模型上一轮值"
-
-    # 检测所有输入值是否在规定范围内(只对非None值进行检查)
-    # 工厂当前值检查(警告)
-    if current_L_s is not None and not (L_min_s <= current_L_s <= L_max_s):
-        print(f"警告: 当前过滤时长 {current_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
-    if current_t_bw_s is not None and not (t_bw_min_s <= current_t_bw_s <= t_bw_max_s):
-        print(f"警告: 当前反洗时长 {current_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
-
-    # 模型上一轮决策值检查(警告)
-    if model_prev_L_s is not None and not (L_min_s <= model_prev_L_s <= L_max_s):
-        print(f"警告: 模型上一轮过滤时长 {model_prev_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
-    if model_prev_t_bw_s is not None and not (t_bw_min_s <= model_prev_t_bw_s <= t_bw_max_s):
-        print(f"警告: 模型上一轮反洗时长 {model_prev_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
-
-    # 模型当前轮决策值检查(错误)
-    if model_L_s is None:
-        raise ValueError("错误: 决策模型建议的过滤时长不能为None")
-    elif not (L_min_s <= model_L_s <= L_max_s):
-        raise ValueError(f"错误: 决策模型建议的过滤时长 {model_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
-
-    if model_t_bw_s is None:
-        raise ValueError("错误: 决策模型建议的反洗时长不能为None")
-    elif not (t_bw_min_s <= model_t_bw_s <= t_bw_max_s):
-        raise ValueError(f"错误: 决策模型建议的反洗时长 {model_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
-
-    print(f"过滤时长基准: {source_L}, 值: {effective_current_L}")
-    print(f"反洗时长基准: {source_t_bw}, 值: {effective_current_t_bw}")
-
-    # 使用选定的基准值进行计算调整
-    L_diff = model_L_s - effective_current_L
-    L_adjustment = 0
-    if abs(L_diff) >= adjustment_threshold * L_step_s:
-        if L_diff >= 0:
-            L_adjustment = L_step_s
-        else:
-            L_adjustment = -L_step_s
-    next_L_s = effective_current_L + L_adjustment
-
-    t_bw_diff = model_t_bw_s - effective_current_t_bw
-    t_bw_adjustment = 0
-    if abs(t_bw_diff) >= adjustment_threshold * t_bw_step_s:
-        if t_bw_diff >= 0:
-            t_bw_adjustment = t_bw_step_s
-        else:
-            t_bw_adjustment = -t_bw_step_s
-    next_t_bw_s = effective_current_t_bw + t_bw_adjustment
-
-    return next_L_s, next_t_bw_s
-
-
-from UF_super_RL.DQN_env import simulate_one_supercycle
-def calc_uf_cycle_metrics(p, TMP0, max_tmp_during_filtration, min_tmp_during_filtration, L_s: float, t_bw_s: float):
-    """
-    计算 UF 超滤系统的核心性能指标
-
-    参数:
-        p (UFParams): UF 系统参数
-        L_s (float): 单次过滤时间(秒)
-        t_bw_s (float): 单次反洗时间(秒)
-
-    返回:
-        dict: {
-            "k_bw_per_ceb": 小周期次数,
-            "ton_water_energy_kWh_per_m3": 吨水电耗,
-            "recovery": 回收率,
-            "net_delivery_rate_m3ph": 净供水率 (m³/h),
-            "daily_prod_time_h": 日均产水时间 (小时/天)
-            "max_permeability": 全周期最高渗透率(lmh/bar)
-        }
-    """
-    # 将跨膜压差写入参数
-    p.TMP0 = TMP0
-
-    # 模拟该参数下的超级周期
-    feasible, info = simulate_one_supercycle(p, L_s, t_bw_s)
-
-    # 获得模型模拟周期信息
-    k_bw_per_ceb = info["k_bw_per_ceb"]
-    ton_water_energy_kWh_per_m3 = info["ton_water_energy_kWh_per_m3"]
-    recovery = info["recovery"]
-    net_delivery_rate_m3ph = info["net_delivery_rate_m3ph"]
-    daily_prod_time_h = info["daily_prod_time_h"]
-
-    # 获得模型模拟周期内最高跨膜压差/最低跨膜压差
-    if max_tmp_during_filtration is None:
-        max_tmp_during_filtration = info["max_TMP_during_filtration"]
-    if min_tmp_during_filtration is None:
-        min_tmp_during_filtration = info["min_TMP_during_filtration"]
-
-    # 计算最高渗透率
-    max_permeability = 100 * p.q_UF / (128*40) / min_tmp_during_filtration
-
-
-    return {
-        "k_bw_per_ceb": k_bw_per_ceb,
-        "ton_water_energy_kWh_per_m3": ton_water_energy_kWh_per_m3,
-        "recovery": recovery,
-        "net_delivery_rate_m3ph": net_delivery_rate_m3ph,
-        "daily_prod_time_h": daily_prod_time_h,
-        "max_permeability": max_permeability
-    }
-
-
-# ==============================
-# 示例调用
-# ==============================
-if __name__ == "__main__":
-    uf_params = UFParams()
-    TMP0 = 0.03 # 原始 TMP0
-    model_decide_result = run_uf_DQN_decide(uf_params, TMP0) # 调用模型获得动作
-    model_L_s = model_decide_result['L_s'] # 获得模型决策产水时长
-    model_t_bw_s = model_decide_result['t_bw_s'] # 获得模型决策反洗时长
-
-    current_L_s = 3800
-    current_t_bw_s = 40
-    model_prev_L_s = 4040
-    model_prev_t_bw_s = 60
-    L_s, t_bw_s = generate_plc_instructions(current_L_s, current_t_bw_s, model_prev_L_s, model_prev_t_bw_s, model_L_s, model_t_bw_s) # 获取模型下发指令
-
-    L_s = 4100
-    t_bw_s = 96
-    max_tmp_during_filtration = 0.050176 # 新增工厂数据接口:周期最高/最低跨膜压差,无工厂数据接入时传入None,calc_uf_cycle_metrics()自动获取模拟周期中的跨膜压差最值
-    min_tmp_during_filtration = 0.012496
-    execution_result = calc_uf_cycle_metrics(uf_params, TMP0, max_tmp_during_filtration, min_tmp_during_filtration, L_s, t_bw_s)
-    print("\n===== 单步决策结果 =====")
-    print(f"模型选择的动作: {model_decide_result['action']}")
-    print(f"模型选择的L_s: {model_L_s} 秒, 模型选择的t_bw_s: {model_t_bw_s} 秒")
-    print(f"指令下发的L_s: {L_s} 秒, 指令下发的t_bw_s: {t_bw_s} 秒")
-    print(f"指令对应的反洗次数: {execution_result['k_bw_per_ceb']}")
-    print(f"指令对应的吨水电耗: {execution_result['ton_water_energy_kWh_per_m3']}")
-    print(f"指令对应的回收率: {execution_result['recovery']}")
-    print(f"指令对应的日均产水时间: {execution_result['daily_prod_time_h']}")
-    print(f"指令对应的最高渗透率: {execution_result['max_permeability']}")

+ 0 - 340
models/uf-rl/DQN_env.py

@@ -1,340 +0,0 @@
-import os
-import time
-import random
-import numpy as np
-import gymnasium as gym
-from gymnasium import spaces
-from stable_baselines3 import DQN
-from stable_baselines3.common.monitor import Monitor
-from stable_baselines3.common.vec_env import DummyVecEnv
-from stable_baselines3.common.callbacks import BaseCallback
-from typing import Dict, Tuple, Optional
-import torch
-import torch.nn as nn
-from dataclasses import dataclass, asdict
-from UF_models import TMPIncreaseModel, TMPDecreaseModel  # 导入模型类
-import copy
-
-
-# ==== 定义膜的基础运行参数 ====
-@dataclass
-class UFParams:
-    # —— 膜与运行参数 ——
-    q_UF: float = 360.0  # 过滤进水流量(m^3/h)
-    TMP0: float = 0.03  # 初始TMP(MPa)
-    TMP_max: float = 0.06  # TMP硬上限(MPa)
-
-    # —— 膜污染动力学 ——
-    alpha: float = 1e-6  # TMP增长系数
-    belta: float = 1.1  # 幂指数
-
-    # —— 反洗参数(固定) ——
-    q_bw_m3ph: float = 1000.0  # 物理反洗流量(m^3/h)
-
-    # —— CEB参数(固定) ——
-    T_ceb_interval_h: float = 48.0  # 固定每 k 小时做一次CEB
-    v_ceb_m3: float = 30.0  # CEB用水体积(m^3)
-    t_ceb_s: float = 40 * 60.0  # CEB时长(s)
-    phi_ceb: float = 1.0  # CEB去除比例(简化:完全恢复到TMP0)
-
-    # —— 约束与收敛 ——
-    dTMP: float = 0.001  # 单次产水结束时,相对TMP0最大升幅(MPa)
-
-    # —— 搜索范围(秒) ——
-    L_min_s: float = 3800.0  # 过滤时长下限(s)
-    L_max_s: float = 6000.0  # 过滤时长上限(s)
-    t_bw_min_s: float = 40.0  # 物洗时长下限(s)
-    t_bw_max_s: float = 60.0  # 物洗时长上限(s)
-
-    # —— 物理反洗恢复函数参数 ——
-    phi_bw_min: float = 0.7  # 物洗去除比例最小值
-    phi_bw_max: float = 1.0  # 物洗去除比例最大值
-    L_ref_s: float = 4000.0  # 过滤时长影响时间尺度
-    tau_bw_s: float = 20.0  # 物洗时长影响时间尺度
-    gamma_t: float = 1.0  # 物洗时长作用指数
-
-    # —— 网格 ——
-    L_step_s: float = 60.0  # 过滤时长步长(s)
-    t_bw_step_s: float = 5.0  # 物洗时长步长(s)
-
-    # 多目标加权及高TMP惩罚
-    w_rec: float = 0.8  # 回收率权重
-    w_rate: float = 0.2  # 净供水率权重
-    w_headroom: float = 0.2  # 贴边惩罚权重
-    r_headroom: float = 2.0  # 贴边惩罚幂次
-    headroom_hardcap: float = 0.98  # 超过此比例直接视为不可取
-
-# ==== 加载模拟环境模型 ====
-# 初始化模型
-model_fp = TMPIncreaseModel()
-model_bw = TMPDecreaseModel()
-
-# 加载参数
-model_fp.load_state_dict(torch.load("uf_fp.pth"))
-model_bw.load_state_dict(torch.load("uf_bw.pth"))
-
-# 切换到推理模式
-model_fp.eval()
-model_bw.eval()
-
-
-def _delta_tmp(p, L_h: float) -> float:
-    """
-    过滤时段TMP上升量:调用 uf_fp.pth 模型
-    """
-    return model_fp(p, L_h)
-
-def phi_bw_of(p, L_s: float, t_bw_s: float) -> float:
-    """
-    物洗去除比例:调用 uf_bw.pth 模型
-    """
-    return model_bw(p, L_s, t_bw_s)
-
-def _tmp_after_ceb(p, L_s: float, t_bw_s: float) -> float:
-    """
-    计算化学清洗(CEB)后的TMP,当前为恢复初始跨膜压差
-    """
-    return p.TMP0
-
-def _v_bw_m3(p, t_bw_s: float) -> float:
-    """
-    物理反洗水耗
-    """
-    return float(p.q_bw_m3ph * (float(t_bw_s) / 3600.0))
-
-def simulate_one_supercycle(p: UFParams, L_s: float, t_bw_s: float):
-    """
-    返回 (是否可行, 指标字典)
-    - 支持动态CEB次数:48h固定间隔
-    - 增加日均产水时间和吨水电耗
-    - 增加最小TMP记录
-    """
-    L_h = float(L_s) / 3600.0  # 小周期过滤时间(h)
-
-    tmp = p.TMP0
-    max_tmp_during_filtration = tmp
-    min_tmp_during_filtration = tmp  # 新增:初始化最小TMP
-    max_residual_increase = 0.0
-
-    # 小周期总时长(h)
-    t_small_cycle_h = (L_s + t_bw_s) / 3600.0
-
-    # 计算超级周期内CEB次数
-    k_bw_per_ceb = int(np.floor(p.T_ceb_interval_h / t_small_cycle_h))
-    if k_bw_per_ceb < 1:
-        k_bw_per_ceb = 1  # 至少一个小周期
-
-    # ton水电耗查表
-    energy_lookup = {
-        3600: 0.1034, 3660: 0.1031, 3720: 0.1029, 3780: 0.1026,
-        3840: 0.1023, 3900: 0.1021, 3960: 0.1019, 4020: 0.1017,
-        4080: 0.1015, 4140: 0.1012, 4200: 0.1011
-    }
-
-    for _ in range(k_bw_per_ceb):
-        tmp_run_start = tmp
-
-        # 过滤阶段TMP增长
-        dtmp = _delta_tmp(p, L_h)
-        tmp_peak = tmp_run_start + dtmp
-
-        # 约束1:峰值不得超过硬上限
-        if tmp_peak > p.TMP_max + 1e-12:
-            return False, {"reason": "TMP_max violated during filtration", "TMP_peak": tmp_peak}
-
-        # 更新最大和最小TMP
-        if tmp_peak > max_tmp_during_filtration:
-            max_tmp_during_filtration = tmp_peak
-        if tmp_run_start < min_tmp_during_filtration:  # 新增:记录运行开始时的最小TMP
-            min_tmp_during_filtration = tmp_run_start
-
-        # 物理反洗
-        phi = phi_bw_of(p, L_s, t_bw_s)
-        tmp_after_bw = tmp_peak - phi * (tmp_peak - tmp_run_start)
-
-        # 约束2:单次残余增量控制
-        residual_inc = tmp_after_bw - tmp_run_start
-        if residual_inc > p.dTMP + 1e-12:
-            return False, {
-                "reason": "residual TMP increase after BW exceeded dTMP",
-                "residual_increase": residual_inc,
-                "limit_dTMP": p.dTMP
-            }
-        if residual_inc > max_residual_increase:
-            max_residual_increase = residual_inc
-
-        tmp = tmp_after_bw
-
-    # CEB
-    tmp_after_ceb = p.TMP0
-
-    # 体积与回收率
-    V_feed_super = k_bw_per_ceb * p.q_UF * L_h
-    V_loss_super = k_bw_per_ceb * _v_bw_m3(p, t_bw_s) + p.v_ceb_m3
-    V_net = max(0.0, V_feed_super - V_loss_super)
-    recovery = max(0.0, V_net / max(V_feed_super, 1e-12))
-
-    # 时间与净供水率
-    T_super_h = k_bw_per_ceb * (L_s + t_bw_s) / 3600.0 + p.t_ceb_s / 3600.0
-    net_delivery_rate_m3ph = V_net / max(T_super_h, 1e-12)
-
-    # 贴边比例与硬限
-    headroom_ratio = max_tmp_during_filtration / max(p.TMP_max, 1e-12)
-    if headroom_ratio > p.headroom_hardcap + 1e-12:
-        return False, {"reason": "headroom hardcap exceeded", "headroom_ratio": headroom_ratio}
-
-    # —— 新增指标 1:日均产水时间(h/d) ——
-    daily_prod_time_h = k_bw_per_ceb * L_h / T_super_h * 24.0
-
-    # —— 新增指标 2:吨水电耗(kWh/m³) ——
-    closest_L = min(energy_lookup.keys(), key=lambda x: abs(x - L_s))
-    ton_water_energy = energy_lookup[closest_L]
-
-    info = {
-        "recovery": recovery,
-        "V_feed_super_m3": V_feed_super,
-        "V_loss_super_m3": V_loss_super,
-        "V_net_super_m3": V_net,
-        "supercycle_time_h": T_super_h,
-        "net_delivery_rate_m3ph": net_delivery_rate_m3ph,
-        "max_TMP_during_filtration": max_tmp_during_filtration,
-        "min_TMP_during_filtration": min_tmp_during_filtration,  # 新增:最小TMP
-        "max_residual_increase_per_run": max_residual_increase,
-        "phi_bw_effective": phi,
-        "TMP_after_ceb": tmp_after_ceb,
-        "headroom_ratio": headroom_ratio,
-        "daily_prod_time_h": daily_prod_time_h,
-        "ton_water_energy_kWh_per_m3": ton_water_energy,
-        "k_bw_per_ceb": k_bw_per_ceb
-    }
-
-    return True, info
-
-def _score(p: UFParams, rec: dict) -> float:
-    """综合评分:越大越好。通过非线性放大奖励差异,强化区分好坏动作"""
-
-    # —— 无量纲化净供水率 ——
-    rate_norm = rec["net_delivery_rate_m3ph"] / max(p.q_UF, 1e-12)
-
-    # —— TMP soft penalty (sigmoid) ——
-    tmp_ratio = rec["max_TMP_during_filtration"] / max(p.TMP_max, 1e-12)
-    k = 10.0
-    headroom_penalty = 1.0 / (1.0 + np.exp(-k * (tmp_ratio - 1.0)))
-
-    # —— 基础 reward(0.6~0.9左右)——
-    base_reward = (
-        p.w_rec * rec["recovery"]
-        + p.w_rate * rate_norm
-        - p.w_headroom * headroom_penalty
-    )
-
-    # —— 非线性放大:平方映射 + 缩放 ——
-    # 目的是放大好坏动作差异,同时限制最大值,避免 TD-error 过大
-    amplified_reward = (base_reward - 0.5) ** 2 * 5.0
-
-    # —— 可选:保留符号,区分负奖励
-    if base_reward < 0.5:
-        amplified_reward = -amplified_reward
-
-    return amplified_reward
-
-
-class UFSuperCycleEnv(gym.Env):
-    """超滤系统环境(超级周期级别决策)"""
-
-    metadata = {"render_modes": ["human"]}
-
-    def __init__(self, base_params, max_episode_steps: int = 20):
-        super(UFSuperCycleEnv, self).__init__()
-
-        self.base_params = base_params
-        self.current_params = copy.deepcopy(base_params)
-        self.max_episode_steps = max_episode_steps
-        self.current_step = 0
-
-        # 计算离散动作空间
-        self.L_values = np.arange(
-            self.base_params.L_min_s,
-            self.base_params.L_max_s + self.base_params.L_step_s,
-            self.base_params.L_step_s
-        )
-        self.t_bw_values = np.arange(
-            self.base_params.t_bw_min_s,
-            self.base_params.t_bw_max_s + self.base_params.t_bw_step_s,
-            self.base_params.t_bw_step_s
-        )
-
-        self.num_L = len(self.L_values)
-        self.num_bw = len(self.t_bw_values)
-
-        # 单一离散动作空间
-        self.action_space = spaces.Discrete(self.num_L * self.num_bw)
-
-        # 状态空间增加 TMP0, 上一次动作(L_s, t_bw_s), 本周期最高 TMP
-        # 状态归一化均在 _get_obs 内处理
-        self.observation_space = spaces.Box(
-            low=np.zeros(4, dtype=np.float32),
-            high=np.ones(4, dtype=np.float32),
-            dtype=np.float32
-        )
-
-        # 初始化状态
-        self.last_action = (self.base_params.L_min_s, self.base_params.t_bw_min_s)
-        self.max_TMP_during_filtration = self.current_params.TMP0
-        self.reset(seed=None)
-
-    def _get_obs(self):
-        TMP0 = self.current_params.TMP0
-        TMP0_norm = (TMP0 - 0.01) / (0.05 - 0.01)
-
-        L_s, t_bw_s = self.last_action
-        L_norm = (L_s - self.base_params.L_min_s) / (self.base_params.L_max_s - self.base_params.L_min_s)
-        t_bw_norm = (t_bw_s - self.base_params.t_bw_min_s) / (self.base_params.t_bw_max_s - self.base_params.t_bw_min_s)
-
-        max_TMP_norm = (self.max_TMP_during_filtration - 0.01) / (0.05 - 0.01)
-
-        return np.array([TMP0_norm, L_norm, t_bw_norm, max_TMP_norm], dtype=np.float32)
-
-    def _get_action_values(self, action):
-        L_idx = action // self.num_bw
-        t_bw_idx = action % self.num_bw
-        return self.L_values[L_idx], self.t_bw_values[t_bw_idx]
-
-    def reset(self, seed=None, options=None):
-        super().reset(seed=seed)
-        self.current_params.TMP0 = np.random.uniform(0.01, 0.03)
-        self.current_step = 0
-        self.last_action = (self.base_params.L_min_s, self.base_params.t_bw_min_s)
-        self.max_TMP_during_filtration = self.current_params.TMP0
-        return self._get_obs(), {}
-
-    def step(self, action):
-        self.current_step += 1
-        L_s, t_bw_s = self._get_action_values(action)
-        L_s = np.clip(L_s, self.base_params.L_min_s, self.base_params.L_max_s)
-        t_bw_s = np.clip(t_bw_s, self.base_params.t_bw_min_s, self.base_params.t_bw_max_s)
-
-        # 模拟超级周期
-        feasible, info = simulate_one_supercycle(self.current_params, L_s, t_bw_s)
-
-        if feasible:
-            reward = _score(self.current_params, info)
-            self.current_params.TMP0 = info["TMP_after_ceb"]
-            self.max_TMP_during_filtration = info["max_TMP_during_filtration"]
-            terminated = False
-        else:
-            reward = -20
-            terminated = True
-
-        truncated = self.current_step >= self.max_episode_steps
-        self.last_action = (L_s, t_bw_s)
-        next_obs = self._get_obs()
-
-        info["feasible"] = feasible
-        info["step"] = self.current_step
-
-        return next_obs, reward, terminated, truncated, info
-
-
-
-

+ 0 - 244
models/uf-rl/DQN_train.py

@@ -1,244 +0,0 @@
-import os
-import time
-import random
-import numpy as np
-import torch
-
-import gymnasium as gym
-from gymnasium import spaces
-from stable_baselines3 import DQN
-from stable_baselines3.common.monitor import Monitor
-from stable_baselines3.common.vec_env import DummyVecEnv
-from stable_baselines3.common.callbacks import BaseCallback
-
-from DQN_env import UFParams, UFSuperCycleEnv
-
-
-# ==== 定义强化学习超参数 ====
-class DQNParams:
-    """
-    DQN 超参数定义类
-    用于统一管理模型训练参数
-    """
-    # 学习率,控制神经网络更新步长
-    learning_rate: float = 1e-4
-
-    # 经验回放缓冲区大小(步数)
-    buffer_size: int = 10000
-
-    # 学习开始前需要收集的步数
-    learning_starts: int = 200
-
-    # 每次从经验池中采样的样本数量
-    batch_size: int = 32
-
-    # 折扣因子,越接近1越重视长期奖励
-    gamma: float = 0.95
-
-    # 每隔多少步训练一次
-    train_freq: int = 4
-
-    # 目标网络更新间隔
-    target_update_interval: int = 2000
-
-    # 初始探索率 ε
-    exploration_initial_eps: float = 1.0
-
-    # 从初始ε衰减到最终ε所占的训练比例
-    exploration_fraction: float = 0.3
-
-    # 最终探索率 ε
-    exploration_final_eps: float = 0.02
-
-    # 日志备注(用于区分不同实验)
-    remark: str = "default"
-
-class UFEpisodeRecorder:
-    """记录episode中的决策和结果"""
-
-    def __init__(self):
-        self.episode_data = []
-        self.current_episode = []
-
-    def record_step(self, obs, action, reward, done, info):
-        """记录单步信息"""
-        step_data = {
-            "obs": obs.copy(),
-            "action": action.copy(),
-            "reward": reward,
-            "done": done,
-            "info": info.copy() if info else {}
-        }
-        self.current_episode.append(step_data)
-
-        if done:
-            self.episode_data.append(self.current_episode)
-            self.current_episode = []
-
-    def get_episode_stats(self, episode_idx=-1):
-        """获取episode统计信息"""
-        if not self.episode_data:
-            return {}
-
-        episode = self.episode_data[episode_idx]
-        total_reward = sum(step["reward"] for step in episode)
-        avg_recovery = np.mean([step["info"].get("recovery", 0) for step in episode if "recovery" in step["info"]])
-        feasible_steps = sum(1 for step in episode if step["info"].get("feasible", False))
-
-        return {
-            "total_reward": total_reward,
-            "avg_recovery": avg_recovery,
-            "feasible_steps": feasible_steps,
-            "total_steps": len(episode)
-        }
-
-
-# ==== 定义强化学习训练回调器 ====
-class UFTrainingCallback(BaseCallback):
-    """
-    强化学习训练回调,用于记录每一步的数据到 recorder。
-    1. 不依赖环境内部 last_* 属性
-    2. 使用环境接口提供的 obs、actions、rewards、dones、infos
-    3. 自动处理 episode 结束时的统计
-    """
-
-    def __init__(self, recorder, verbose=0):
-        super(UFTrainingCallback, self).__init__(verbose)
-        self.recorder = recorder
-
-    def _on_step(self) -> bool:
-        try:
-            new_obs = self.locals.get("new_obs")
-            actions = self.locals.get("actions")
-            rewards = self.locals.get("rewards")
-            dones = self.locals.get("dones")
-            infos = self.locals.get("infos")
-
-            if len(new_obs) > 0:
-                step_obs = new_obs[0]
-                step_action = actions[0] if actions is not None else None
-                step_reward = rewards[0] if rewards is not None else 0.0
-                step_done = dones[0] if dones is not None else False
-                step_info = infos[0] if infos is not None else {}
-
-                # 打印当前 step 的信息
-                if self.verbose:
-                    print(f"[Step {self.num_timesteps}] 动作={step_action}, 奖励={step_reward:.3f}, Done={step_done}")
-
-                # 记录数据
-                self.recorder.record_step(
-                    obs=step_obs,
-                    action=step_action,
-                    reward=step_reward,
-                    done=step_done,
-                    info=step_info,
-                )
-
-        except Exception as e:
-            if self.verbose:
-                print(f"[Callback Error] {e}")
-
-        return True
-
-
-
-
-class DQNTrainer:
-    def __init__(self, env, params, callback=None):
-        self.env = env
-        self.params = params
-        self.callback = callback
-        self.log_dir = self._create_log_dir()
-        self.model = self._create_model()
-
-    def _create_log_dir(self):
-        # 创建训练日志
-        timestamp = time.strftime("%Y%m%d-%H%M%S")
-        log_name = (
-            f"DQN_lr{self.params.learning_rate}_buf{self.params.buffer_size}_bs{self.params.batch_size}"
-            f"_gamma{self.params.gamma}_exp{self.params.exploration_fraction}"
-            f"_{self.params.remark}_{timestamp}"
-        )
-        log_dir = os.path.join("./uf_dqn_tensorboard", log_name)
-        os.makedirs(log_dir, exist_ok=True)
-        return log_dir
-
-    def _create_model(self):
-        return DQN(
-            policy="MlpPolicy",
-            env=self.env,
-            learning_rate=self.params.learning_rate,
-            buffer_size=self.params.buffer_size,
-            learning_starts=self.params.learning_starts,
-            batch_size=self.params.batch_size,
-            gamma=self.params.gamma,
-            train_freq=self.params.train_freq,
-            target_update_interval=1,
-            tau=0.005,
-            exploration_initial_eps=self.params.exploration_initial_eps,
-            exploration_fraction=self.params.exploration_fraction,
-            exploration_final_eps=self.params.exploration_final_eps,
-            verbose=1,
-            tensorboard_log=self.log_dir
-        )
-
-    def train(self, total_timesteps: int):
-        if self.callback:
-            self.model.learn(total_timesteps=total_timesteps, callback=self.callback)
-        else:
-            self.model.learn(total_timesteps=total_timesteps)
-        print(f"模型训练完成,日志保存在:{self.log_dir}")
-
-    def save(self, path=None):
-        if path is None:
-            path = os.path.join(self.log_dir, "dqn_model.zip")
-        self.model.save(path)
-        print(f"模型已保存到:{path}")
-
-    def load(self, path):
-        self.model = DQN.load(path, env=self.env)
-        print(f"模型已从 {path} 加载")
-
-
-def set_global_seed(seed: int):
-    """固定全局随机种子,保证训练可复现"""
-    random.seed(seed)
-    np.random.seed(seed)
-    torch.manual_seed(seed)
-    torch.cuda.manual_seed_all(seed)  # 如果使用GPU
-    torch.backends.cudnn.deterministic = True
-    torch.backends.cudnn.benchmark = False
-
-
-def train_uf_rl_agent(params: UFParams, total_timesteps: int = 10000, seed: int = 2025):
-    set_global_seed(seed)
-    recorder = UFEpisodeRecorder()
-    callback = UFTrainingCallback(recorder, verbose=1)
-
-    def make_env():
-        env = UFSuperCycleEnv(params)
-        env = Monitor(env)
-        return env
-
-    env = DummyVecEnv([make_env])
-
-    dqn_params = DQNParams()
-    trainer = DQNTrainer(env, dqn_params, callback=callback)
-    trainer.train(total_timesteps)
-    trainer.save()
-
-    stats = callback.recorder.get_episode_stats()
-    print(f"训练完成 - 总奖励: {stats.get('total_reward', 0):.2f}, 平均回收率: {stats.get('avg_recovery', 0):.3f}")
-
-    return trainer.model
-
-
-# 训练
-if __name__ == "__main__":
-    # 初始化参数
-    params = UFParams()
-
-    # 训练RL代理
-    print("开始训练RL代理...")
-    train_uf_rl_agent(params, total_timesteps=50000)
-

+ 0 - 500
models/uf-rl/README.md

@@ -1,500 +0,0 @@
-# UF超滤系统强化学习决策模型训练逻辑说明
-
-## 模型概述
-
-这是一个基于**深度强化学习(DQN)**的超滤系统运行参数优化模型。不同于前两个"预测模型",这个模型的目标是**决策**:在给定当前跨膜压差(TMP)的情况下,自动决定最优的产水时长和反洗时长。
-
-**核心问题**:如何平衡产水量、回收率、能耗和膜寿命?
-
-## 问题背景
-
-### 超滤运行周期
-
-超滤系统运行遵循"小周期"模式:
-```
-[产水L秒] → [反洗t_bw秒] → [产水L秒] → [反洗t_bw秒] → ... → [化学清洗CEB]
-```
-
-- **产水阶段**:过滤原水,TMP逐渐升高(膜污染)
-- **反洗阶段**:反向冲洗,TMP部分恢复
-- **化学清洗(CEB)**:每48小时一次,TMP完全恢复
-
-### 决策难题
-
-**调节杠杆**:
-- `L_s`:单次产水时长(3600-6000秒)
-- `t_bw_s`:单次反洗时长(40-60秒)
-
-**矛盾目标**:
-1. **产水量↑**:希望L_s长、t_bw_s短(多产水、少反洗)
-2. **回收率↑**:希望t_bw_s短(减少反洗水耗)
-3. **膜保护↓**:希望L_s短、t_bw_s长(频繁反洗、TMP不升太高)
-4. **能耗↓**:产水时间越长,单位吨水的泵能耗越低
-
-**传统方法**:人工经验+固定参数,难以在复杂约束下找到最优解  
-**强化学习方法**:让AI自己探索,学习在不同TMP下的最佳决策
-
-## 核心思路:强化学习框架
-
-### 1. 强化学习是什么?
-
-把决策问题想象成玩游戏:
-```
-游戏状态(TMP)→ AI选择动作(L_s, t_bw_s)→ 执行动作 → 获得奖励(回收率、净供水率)→ 新状态(TMP更新)
-```
-
-AI通过**反复试错**,学习哪些动作能获得高奖励。
-
-### 2. Markov决策过程(MDP)建模
-
-#### 状态(State)
-```python
-state = [
-    TMP0_normalized,           # 当前初始TMP(归一化到0-1)
-    last_L_s_normalized,       # 上一次产水时长(归一化)
-    last_t_bw_s_normalized,    # 上一次反洗时长(归一化)
-    max_TMP_normalized         # 本周期最高TMP(归一化)
-]
-```
-**4维状态向量**描述当前系统状态
-
-#### 动作(Action)
-```python
-# 离散动作空间:L_s × t_bw_s的网格
-L_s范围:3800-6000秒,步长60秒 → 37个选项
-t_bw_s范围:40-60秒,步长5秒 → 5个选项
-
-总动作数 = 37 × 5 = 185个
-```
-
-每个动作对应一个`(L_s, t_bw_s)`组合
-
-#### 奖励(Reward)
-```python
-# 多目标加权奖励
-reward = 0.8 × recovery           # 回收率(主要目标)
-       + 0.2 × rate_normalized    # 净供水率
-       - 0.2 × headroom_penalty   # TMP贴边惩罚
-```
-
-**奖励设计原则**:
-- 高回收率 → 高奖励
-- 高净供水率 → 高奖励
-- TMP接近上限 → 负奖励(膜风险)
-- 违反约束 → 大负奖励(-20)
-
-#### 状态转移
-```python
-# 模拟器:根据物理模型计算下一个状态
-def simulate_one_supercycle(TMP0, L_s, t_bw_s):
-    # 1. 计算产水阶段TMP上升
-    delta_TMP = model_fp(L_s)  # 调用TMP增长模型
-    TMP_peak = TMP0 + delta_TMP
-    
-    # 2. 计算反洗恢复
-    phi = model_bw(L_s, t_bw_s)  # 调用反洗恢复模型
-    TMP_after_bw = TMP_peak - phi × (TMP_peak - TMP0)
-    
-    # 3. 多次小周期后CEB
-    TMP_new = TMP0  # 化学清洗后完全恢复
-    
-    # 4. 计算指标
-    recovery = (产水 - 反洗水耗 - CEB水耗) / 产水
-    net_rate = 净产水 / 总时间
-    
-    return TMP_new, recovery, net_rate, ...
-```
-
-## DQN算法详解
-
-### 什么是DQN?
-
-**Deep Q-Network(深度Q网络)**:
-- 用神经网络估计**Q值函数**:`Q(state, action) = 预期累积奖励`
-- 最优策略:在每个状态选择Q值最大的动作
-
-```
-状态 → [神经网络] → 每个动作的Q值 → 选择最大Q值的动作
-```
-
-### 神经网络结构
-
-```python
-# Stable-Baselines3的MlpPolicy默认结构
-输入层:4维状态
-隐藏层1:64神经元 + ReLU
-隐藏层2:64神经元 + ReLU
-输出层:185个动作的Q值
-```
-
-### 训练流程(`DQN_train.py`)
-
-#### 1. 经验回放(Experience Replay)
-```python
-buffer_size = 10000  # 存储10000条经验
-
-# 交互过程
-for step in range(total_timesteps):
-    action = model.select_action(state)        # ε-贪心选择动作
-    next_state, reward = env.step(action)      # 执行动作
-    buffer.store(state, action, reward, next_state)  # 存入缓冲区
-    
-    # 从缓冲区随机采样训练
-    if step > learning_starts:
-        batch = buffer.sample(batch_size=32)
-        model.train_on_batch(batch)
-```
-
-**为什么需要经验回放?**
-- 打破数据相关性(连续状态往往相似)
-- 提高样本利用效率(同一条经验可多次使用)
-
-#### 2. ε-贪心探索
-```python
-# 随机探索 vs 利用已学知识
-if random() < epsilon:
-    action = random_action()   # 探索:随机选
-else:
-    action = argmax(Q(state))  # 利用:选Q值最大的
-
-# epsilon从1.0衰减到0.02
-epsilon = 1.0 → 0.8 → ... → 0.02
-```
-
-**探索-利用权衡**:
-- 初期多探索(发现好动作)
-- 后期多利用(稳定在最优策略)
-
-#### 3. 目标网络(Target Network)
-```python
-# 两个网络:当前网络 + 目标网络
-Q_current(state, action)  # 每步更新
-Q_target(next_state, a')   # 每2000步同步一次
-
-# TD误差
-loss = MSE(Q_current(s,a), reward + γ × max(Q_target(s', a')))
-```
-
-**为什么需要目标网络?**
-- 稳定训练(避免"追逐移动目标"问题)
-- 减少Q值估计的震荡
-
-#### 4. 训练超参数
-
-```python
-class DQNParams:
-    learning_rate = 1e-4          # 学习率
-    buffer_size = 10000           # 经验池大小
-    learning_starts = 200         # 200步后开始学习
-    batch_size = 32               # 每次训练32个样本
-    gamma = 0.95                  # 折扣因子(重视长期奖励)
-    train_freq = 4                # 每4步训练一次
-    target_update_interval = 2000 # 每2000步更新目标网络
-    exploration_fraction = 0.3    # 前30%训练时间用于探索
-    exploration_final_eps = 0.02  # 最终保留2%探索
-```
-
-## 模拟环境(`DQN_env.py`)
-
-### UFSuperCycleEnv类
-
-```python
-class UFSuperCycleEnv(gym.Env):
-    def reset(self):
-        # 重置环境:随机初始TMP
-        self.TMP0 = random.uniform(0.01, 0.03)
-        return self._get_obs()
-    
-    def step(self, action):
-        # 执行动作
-        L_s, t_bw_s = self._decode_action(action)
-        
-        # 调用模拟器
-        feasible, info = simulate_one_supercycle(self.TMP0, L_s, t_bw_s)
-        
-        if feasible:
-            reward = _score(info)  # 计算奖励
-            self.TMP0 = info["TMP_after_ceb"]  # 更新TMP
-            done = False
-        else:
-            reward = -20  # 违反约束,大负奖励
-            done = True   # episode终止
-        
-        return next_state, reward, done, info
-```
-
-### 约束检查
-
-```python
-# 硬约束1:TMP峰值不得超过0.06 MPa
-if TMP_peak > 0.06:
-    return False
-
-# 硬约束2:单次残余增量不得超过0.001 MPa
-if (TMP_after_bw - TMP0) > 0.001:
-    return False
-
-# 硬约束3:TMP不得超过上限的98%
-if TMP_peak / TMP_max > 0.98:
-    return False
-```
-
-### 物理模型集成
-
-```python
-# TMP增长模型(uf_fp.pth)
-def _delta_tmp(L_h):
-    return model_fp(params, L_h)  # 产水时长 → TMP增量
-
-# 反洗恢复模型(uf_bw.pth)
-def phi_bw_of(L_s, t_bw_s):
-    return model_bw(params, L_s, t_bw_s)  # (产水时长, 反洗时长) → 恢复比例
-```
-
-这两个模型是基于数据拟合或物理建模得到的。
-
-## 决策使用(`DQN_decide.py`)
-
-### 单步决策接口
-
-```python
-def run_uf_DQN_decide(uf_params, TMP0_value):
-    # 1. 创建环境
-    env = UFSuperCycleEnv(uf_params)
-    env.current_params.TMP0 = TMP0_value  # 设置当前TMP
-    
-    # 2. 加载训练好的模型
-    model = DQN.load("dqn_model.zip")
-    
-    # 3. 预测动作(确定性,不探索)
-    action, _ = model.predict(state, deterministic=True)
-    
-    # 4. 解码动作
-    L_s, t_bw_s = decode_action(action)
-    
-    return {
-        "action": action,
-        "L_s": L_s,
-        "t_bw_s": t_bw_s,
-        "expected_recovery": info["recovery"],
-        ...
-    }
-```
-
-### PLC指令生成
-
-为了避免频繁大幅调整(工艺稳定性),使用**渐进式调整**:
-
-```python
-def generate_plc_instructions(current, model_prev, model_current):
-    # 计算差异
-    diff = model_current - effective_current
-    
-    # 渐进调整:每次只调整一个步长
-    if abs(diff) >= threshold:
-        adjustment = +step_size if diff > 0 else -step_size
-    else:
-        adjustment = 0
-    
-    next_value = effective_current + adjustment
-    return next_value
-```
-
-**示例**:
-```
-当前L_s = 4000秒
-模型建议 = 4300秒
-步长 = 60秒
-
-第1轮下发:4060秒(+60)
-第2轮下发:4120秒(+60)
-...
-第5轮下发:4300秒(到达目标)
-```
-
-## 性能指标计算(`DQN_decide.py`)
-
-```python
-def calc_uf_cycle_metrics(TMP0, L_s, t_bw_s):
-    # 模拟一个超级周期
-    feasible, info = simulate_one_supercycle(params, L_s, t_bw_s)
-    
-    return {
-        "k_bw_per_ceb": 小周期次数,
-        "recovery": 回收率,
-        "net_delivery_rate_m3ph": 净供水率(m³/h),
-        "daily_prod_time_h": 日均产水时间(h/天),
-        "ton_water_energy_kWh_per_m3": 吨水电耗(kWh/m³),
-        "max_permeability": 最高渗透率(lmh/bar)
-    }
-```
-
-## 文件结构说明
-
-```
-uf-rl/
-├── DQN_train.py         # 强化学习训练脚本(DQN算法)
-├── DQN_env.py           # 模拟环境(MDP定义、物理模拟)
-├── DQN_decide.py        # 决策接口(加载模型、生成指令)
-├── UF_decide.py         # 传统优化方法(网格搜索,用于对比)
-├── UF_models.py         # 物理模型定义(TMP增长、反洗恢复)
-├── uf_fp.pth            # TMP增长模型权重
-├── uf_bw.pth            # 反洗恢复模型权重
-└── dqn_model.zip        # 训练好的DQN模型
-```
-
-## 训练流程总结
-
-```mermaid
-graph LR
-    A[初始化环境] --> B[随机初始TMP]
-    B --> C{ε-贪心选择动作}
-    C -->|探索| D[随机动作]
-    C -->|利用| E[Q值最大动作]
-    D --> F[模拟执行]
-    E --> F
-    F --> G{约束检查}
-    G -->|可行| H[计算奖励]
-    G -->|不可行| I[负奖励-20]
-    H --> J[存入经验池]
-    I --> J
-    J --> K{达到学习步数?}
-    K -->|是| L[采样训练]
-    K -->|否| M[继续交互]
-    L --> N{episode结束?}
-    M --> N
-    N -->|否| C
-    N -->|是| B
-```
-
-## 与传统方法对比
-
-### 传统网格搜索(`UF_decide.py`)
-
-```python
-# 穷举所有(L_s, t_bw_s)组合
-for L_s in [3600, 3660, ..., 4200]:
-    for t_bw_s in [90, 92, ..., 100]:
-        feasible, metrics = simulate(L_s, t_bw_s)
-        if feasible and score > best_score:
-            best = (L_s, t_bw_s)
-```
-
-**优点**:简单、可解释、保证找到网格上的最优解  
-**缺点**:
-- 计算量大(数百次模拟)
-- 参数空间离散化(可能错过真正最优点)
-- 无法泛化(每个TMP都要重新搜索)
-
-### 强化学习(DQN)
-
-**优点**:
-- 训练后推理快(一次前向传播)
-- 能泛化到不同TMP(学到状态-动作映射)
-- 可处理更复杂的状态(如历史趋势)
-
-**缺点**:
-- 训练耗时(需要大量交互)
-- 黑盒性(难以解释为何选择某动作)
-- 性能受模拟器精度影响
-
-## 训练建议
-
-### 提升策略性能
-
-1. **改进奖励设计**:
-   ```python
-   # 添加渗透率奖励
-   reward += 0.1 × permeability
-   
-   # 添加稳定性奖励(动作变化小)
-   reward -= 0.05 × |action - last_action|
-   ```
-
-2. **增加状态信息**:
-   ```python
-   state = [
-       TMP0, last_L, last_t_bw, max_TMP,
-       water_quality,  # 水质指标
-       days_since_ceb, # 距上次CEB天数
-       ...
-   ]
-   ```
-
-3. **课程学习(Curriculum Learning)**:
-   ```python
-   # 阶段1:简单场景(TMP变化小)
-   env.TMP_range = [0.025, 0.035]
-   train(10000 steps)
-   
-   # 阶段2:中等场景
-   env.TMP_range = [0.01, 0.04]
-   train(20000 steps)
-   
-   # 阶段3:困难场景(全范围)
-   env.TMP_range = [0.01, 0.05]
-   train(20000 steps)
-   ```
-
-### 加速训练
-
-```python
-# 1. 减少训练步数
-total_timesteps = 10000  # 从50000降到10000
-
-# 2. 增大batch_size(如果内存足够)
-batch_size = 64
-
-# 3. 调高learning_rate(小心不稳定)
-learning_rate = 5e-4
-
-# 4. 预训练:从传统方法生成初始数据
-buffer.load_from_grid_search()
-```
-
-## 常见问题
-
-**Q:为什么用强化学习而不是监督学习?**  
-A:监督学习需要"正确答案"标签,但这里没有标准答案(最优策略本身就是要学习的)。强化学习通过奖励信号自己探索最优策略。
-
-**Q:模拟器不准确怎么办?**  
-A:这是强化学习最大风险。解决方法:
-- 用真实数据校准模拟器
-- Sim-to-Real迁移(在真实系统上微调)
-- 保守策略(加大安全裕度)
-
-**Q:能否用于在线学习?**  
-A:可以,但需谨慎:
-- 设置安全约束(避免危险动作)
-- 分阶段部署(先离线验证)
-- 人工监督(关键决策需人工确认)
-
-**Q:为什么动作空间是离散的?**  
-A:DQN擅长离散动作(每个动作一个Q值)。如果需要连续动作,可用DDPG、SAC等算法。
-
-**Q:如何评估策略好坏?**  
-A:
-- 离线:在验证集上计算平均回收率、净供水率
-- 在线:实际运行后对比历史数据
-- 对比基线:与传统固定参数、网格搜索比较
-
-## 未来优化方向
-
-1. **多智能体协同**:多个UF模组联合优化
-2. **分层强化学习**:高层决策策略,低层决策参数
-3. **模型预测控制(MPC)集成**:结合物理模型和学习策略
-4. **安全强化学习**:硬约束保证(Safety RL)
-5. **离线强化学习**:仅用历史数据训练(Offline RL)
-
-## 总结
-
-UF-RL模型是一个**决策优化系统**,通过深度强化学习学习在不同跨膜压差下的最优运行策略。相比传统方法:
-- **更智能**:能适应不同状态,无需人工调参
-- **更高效**:训练后推理快速
-- **更全面**:平衡多个矛盾目标
-
-但同时也需要:
-- **准确的模拟器**:保证学到的策略有效
-- **充分的训练**:探索足够多的状态-动作组合
-- **谨慎的部署**:实际应用前充分验证
-

+ 0 - 405
models/uf-rl/UF_decide.py

@@ -1,405 +0,0 @@
-# UF_decide.py
-from dataclasses import dataclass
-import numpy as np
-
-@dataclass
-class UFParams:
-    # —— 膜与运行参数 ——
-    q_UF: float = 360.0           # 过滤进水流量(m^3/h)
-    TMP0: float = 0.03            # 初始TMP(MPa)
-    TMP_max: float = 0.06         # TMP硬上限(MPa)
-
-    # —— 膜污染动力学 ——
-    alpha: float = 1e-6           # TMP增长系数
-    belta: float = 1.1            # 幂指数
-
-    # —— 反洗参数(固定) ——
-    q_bw_m3ph: float = 1000.0     # 物理反洗流量(m^3/h)
-
-    # —— CEB参数(固定) ——
-    T_ceb_interval_h: float = 48.0  # 固定每 k 小时做一次CEB
-    v_ceb_m3: float = 30.0        # CEB用水体积(m^3)
-    t_ceb_s: float = 40 * 60.0    # CEB时长(s)
-    phi_ceb: float = 1.0          # CEB去除比例(简化:完全恢复到TMP0)
-
-    # —— 约束与收敛 ——
-    dTMP: float = 0.0005          # 单次产水结束时,相对TMP0最大升幅(MPa)
-
-    # —— 搜索范围(秒) ——
-    L_min_s: float = 3600.0       # 过滤时长下限(s)
-    L_max_s: float = 4200.0       # 过滤时长上限(s)
-    t_bw_min_s: float = 40.0      # 物洗时长下限(s)
-    t_bw_max_s: float = 60.0      # 物洗时长上限(s)
-
-    # —— 物理反洗恢复函数参数 ——
-    phi_bw_min: float = 0.7       # 物洗去除比例最小值
-    phi_bw_max: float = 1.0       # 物洗去除比例最大值
-    L_ref_s: float = 4000.0       # 过滤时长影响时间尺度
-    tau_bw_s: float = 30.0        # 物洗时长影响时间尺度
-    gamma_t: float = 1.0          # 物洗时长作用指数
-    
-    # —— 网格 ——
-    L_step_s: float = 60.0        # 过滤时长步长(s)
-    t_bw_step_s: float = 5.0      # 物洗时长步长(s)
-
-    # 多目标加权及高TMP惩罚
-    w_rec: float = 0.8            # 回收率权重
-    w_rate: float = 0.2           # 净供水率权重
-    w_headroom: float = 0.3       # 贴边惩罚权重
-    r_headroom: float = 2.0       # 贴边惩罚幂次
-    headroom_hardcap: float = 0.98 # 超过此比例直接视为不可取
-
-def _delta_tmp(p: UFParams, L_h: float) -> float:
-    # 过滤时段TMP上升量
-    return float(p.alpha * (p.q_UF ** p.belta) * L_h)
-
-def _v_bw_m3(p: UFParams, t_bw_s: float) -> float:
-    # 物理反洗水耗
-    return float(p.q_bw_m3ph * (float(t_bw_s) / 3600.0))
-
-def phi_bw_of(p: UFParams, L_s: float, t_bw_s: float) -> float:
-    # 物洗去除比例:随过滤时长增长上界收缩,随物洗时长增长趋饱和
-    L = max(float(L_s), 1.0)
-    t = max(float(t_bw_s), 1e-6)
-    upper_L = p.phi_bw_min + (p.phi_bw_max - p.phi_bw_min) * np.exp(- L / p.L_ref_s)
-    time_gain = 1.0 - np.exp(- (t / p.tau_bw_s) ** p.gamma_t)
-    phi = upper_L * time_gain
-    return float(np.clip(phi, 0.0, 0.999))
-
-def simulate_one_supercycle(p: UFParams, L_s: float, t_bw_s: float):
-    """
-    返回 (是否可行, 指标字典)
-    - 支持动态CEB次数:48h固定间隔
-    - 增加日均产水时间和吨水电耗
-    """
-    L_h = float(L_s) / 3600.0  # 小周期过滤时间(h)
-
-    tmp = p.TMP0
-    max_tmp_during_filtration = tmp
-    max_residual_increase = 0.0
-
-    # 小周期总时长(h)
-    t_small_cycle_h = (L_s + t_bw_s) / 3600.0
-
-    # 计算超级周期内CEB次数
-    k_bw_per_ceb = int(np.floor(p.T_ceb_interval_h / t_small_cycle_h))
-    if k_bw_per_ceb < 1:
-        k_bw_per_ceb = 1  # 至少一个小周期
-
-    # ton水电耗查表
-    energy_lookup = {
-        3600: 0.1034, 3660: 0.1031, 3720: 0.1029, 3780: 0.1026,
-        3840: 0.1023, 3900: 0.1021, 3960: 0.1019, 4020: 0.1017,
-        4080: 0.1015, 4140: 0.1012, 4200: 0.1011
-    }
-
-    for _ in range(k_bw_per_ceb):
-        tmp_run_start = tmp
-
-        # 过滤阶段TMP增长
-        dtmp = _delta_tmp(p, L_h)
-        tmp_peak = tmp_run_start + dtmp
-
-        # 约束1:峰值不得超过硬上限
-        if tmp_peak > p.TMP_max + 1e-12:
-            return False, {"reason": "TMP_max violated during filtration", "TMP_peak": tmp_peak}
-
-        if tmp_peak > max_tmp_during_filtration:
-            max_tmp_during_filtration = tmp_peak
-
-        # 物理反洗
-        phi = phi_bw_of(p, L_s, t_bw_s)
-        tmp_after_bw = tmp_peak - phi * (tmp_peak - tmp_run_start)
-
-        # 约束2:单次残余增量控制
-        residual_inc = tmp_after_bw - tmp_run_start
-        if residual_inc > p.dTMP + 1e-12:
-            return False, {
-                "reason": "residual TMP increase after BW exceeded dTMP",
-                "residual_increase": residual_inc,
-                "limit_dTMP": p.dTMP
-            }
-        if residual_inc > max_residual_increase:
-            max_residual_increase = residual_inc
-
-        tmp = tmp_after_bw
-
-    # CEB
-    tmp_after_ceb = p.TMP0
-
-    # 体积与回收率
-    V_feed_super = k_bw_per_ceb * p.q_UF * L_h
-    V_loss_super = k_bw_per_ceb * _v_bw_m3(p, t_bw_s) + p.v_ceb_m3
-    V_net = max(0.0, V_feed_super - V_loss_super)
-    recovery = max(0.0, V_net / max(V_feed_super, 1e-12))
-
-    # 时间与净供水率
-    T_super_h = k_bw_per_ceb * (L_s + t_bw_s) / 3600.0 + p.t_ceb_s / 3600.0
-    net_delivery_rate_m3ph = V_net / max(T_super_h, 1e-12)
-
-    # 贴边比例与硬限
-    headroom_ratio = max_tmp_during_filtration / max(p.TMP_max, 1e-12)
-    if headroom_ratio > p.headroom_hardcap + 1e-12:
-        return False, {"reason": "headroom hardcap exceeded", "headroom_ratio": headroom_ratio}
-
-    # —— 新增指标 1:日均产水时间(h/d) ——
-    daily_prod_time_h = k_bw_per_ceb * L_h / T_super_h * 24.0
-
-    # —— 新增指标 2:吨水电耗(kWh/m³) ——
-    closest_L = min(energy_lookup.keys(), key=lambda x: abs(x - L_s))
-    ton_water_energy = energy_lookup[closest_L]
-
-    info = {
-        "recovery": recovery,
-        "V_feed_super_m3": V_feed_super,
-        "V_loss_super_m3": V_loss_super,
-        "V_net_super_m3": V_net,
-        "supercycle_time_h": T_super_h,
-        "net_delivery_rate_m3ph": net_delivery_rate_m3ph,
-        "max_TMP_during_filtration": max_tmp_during_filtration,
-        "max_residual_increase_per_run": max_residual_increase,
-        "phi_bw_effective": phi,
-        "TMP_after_ceb": tmp_after_ceb,
-        "headroom_ratio": headroom_ratio,
-        "daily_prod_time_h": daily_prod_time_h,
-        "ton_water_energy_kWh_per_m3": ton_water_energy,
-        "k_bw_per_ceb": k_bw_per_ceb
-    }
-
-    return True, info
-
-def _score(p: UFParams, rec: dict) -> float:
-    """综合评分:越大越好。不同TMP0会改变max_TMP→改变惩罚→得到不同解。"""
-    # 无量纲化净供水率
-    rate_norm = rec["net_delivery_rate_m3ph"] / max(p.q_UF, 1e-12)
-    headroom_penalty = (rec["max_TMP_during_filtration"] / max(p.TMP_max, 1e-12)) ** p.r_headroom
-    return (p.w_rec * rec["recovery"]
-            + p.w_rate * rate_norm
-            - p.w_headroom * headroom_penalty)
-
-def optimize_2d(p: UFParams,
-                L_min_s=None, L_max_s=None, L_step_s=None,
-                t_bw_min_s=None, t_bw_max_s=None, t_bw_step_s=None):
-    # 网格生成
-    L_lo = p.L_min_s if L_min_s is None else float(L_min_s)
-    L_hi = p.L_max_s if L_max_s is None else float(L_max_s)
-    L_st = p.L_step_s if L_step_s is None else float(L_step_s)
-
-    t_lo = p.t_bw_min_s if t_bw_min_s is None else float(t_bw_min_s)
-    t_hi = p.t_bw_max_s if t_bw_max_s is None else float(t_bw_max_s)
-    t_st = p.t_bw_step_s if t_bw_step_s is None else float(t_bw_step_s)
-
-    L_vals = np.arange(L_lo, L_hi + 1e-9, L_st)
-    t_vals = np.arange(t_lo, t_hi + 1e-9, t_st)
-
-    best = None
-    best_score = -np.inf
-
-    for L_s in L_vals:
-        for t_bw_s in t_vals:
-            feasible, info = simulate_one_supercycle(p, L_s, t_bw_s)
-            if not feasible:
-                continue
-
-            rec = {"L_s": float(L_s), "t_bw_s": float(t_bw_s)}
-            rec.update(info)
-
-            score = _score(p, rec)
-
-            if score > best_score + 1e-14:
-                best_score = score
-                best = rec.copy()
-                best["score"] = float(score)
-            # 若分数相同,偏好回收率更高,再偏好净供水率更高
-            elif abs(score - best_score) <= 1e-14:
-                if (rec["recovery"] > best["recovery"] + 1e-12) or (
-                    abs(rec["recovery"] - best["recovery"]) <= 1e-12 and
-                    rec["net_delivery_rate_m3ph"] > best["net_delivery_rate_m3ph"] + 1e-12
-                ):
-                    best = rec.copy()
-                    best["score"] = float(score)
-
-    if best is None:
-        return {"status": "no-feasible-solution"}
-    best["status"] = "feasible"
-    return best
-
-def run_uf_decision(TMP0: float = None) -> dict:
-    if TMP0 is None:
-        rng = np.random.default_rng()
-        TMP0 = rng.uniform(0.03, 0.04)  # 初始TMP随机
-
-    params = UFParams(
-        q_UF=360.0,
-        TMP_max=0.05,
-        alpha=1.2e-6,
-        belta=1.0,
-        q_bw_m3ph=1000.0,
-        T_ceb_interval_h=48,
-        v_ceb_m3=30.0,
-        t_ceb_s=40*60.0,
-        phi_ceb=1.0,
-        dTMP=0.001,
-
-        L_min_s=3600.0, L_max_s=4200.0, L_step_s=30.0,
-        t_bw_min_s=90.0, t_bw_max_s=100.0, t_bw_step_s=2.0,
-
-        phi_bw_min=0.70, phi_bw_max=1.00,
-        L_ref_s=500.0, tau_bw_s=40.0, gamma_t=1.0,
-
-        TMP0=TMP0,
-
-        w_rec=0.7, w_rate=0.3, w_headroom=0.3, r_headroom=2.0, headroom_hardcap=0.9
-    )
-
-    result = optimize_2d(params)
-    if result.get("status") == "feasible":
-        return {
-            "L_s": result["L_s"],
-            "t_bw_s": result["t_bw_s"],
-            "recovery": result["recovery"],
-            "k_bw_per_ceb": result["k_bw_per_ceb"],
-            "daily_prod_time_h": result["daily_prod_time_h"],
-            "ton_water_energy_kWh_per_m3": result["ton_water_energy_kWh_per_m3"]
-        }
-
-    # 若没有可行解,返回最小过滤时间和默认值
-    return {
-        "L_s": params.L_min_s,
-        "t_bw_s": params.t_bw_min_s,
-        "recovery": 0.0,
-        "k_bw_per_ceb": 1,
-        "daily_prod_time_h": 0.0,
-        "ton_water_energy_kWh_per_m3": 0.0
-    }
-
-
-def generate_plc_instructions(current_L_s, current_t_bw_s, model_prev_L_s, model_prev_t_bw_s, model_L_s, model_t_bw_s):
-    """
-    根据工厂当前值、模型上一轮决策值和模型当前轮决策值,生成PLC指令。
-
-    新增功能:
-    1. 处理None值情况:如果模型上一轮值为None,则使用工厂当前值;
-       如果工厂当前值也为None,则返回None并提示错误。
-    """
-    # 参数配置保持不变
-    params = UFParams(
-        L_min_s=3600.0, L_max_s=6000.0, L_step_s=60.0,
-        t_bw_min_s=40.0, t_bw_max_s=60.0, t_bw_step_s=5.0,
-    )
-
-    # 参数解包
-    L_step_s = params.L_step_s
-    t_bw_step_s = params.t_bw_step_s
-    L_min_s = params.L_min_s
-    L_max_s = params.L_max_s
-    t_bw_min_s = params.t_bw_min_s
-    t_bw_max_s = params.t_bw_max_s
-    adjustment_threshold = 1.0
-
-    # 处理None值情况
-    if model_prev_L_s is None:
-        if current_L_s is None:
-            print("错误: 过滤时长的工厂当前值和模型上一轮值均为None")
-            return None, None
-        else:
-            # 使用工厂当前值作为基准
-            effective_current_L = current_L_s
-            source_L = "工厂当前值(模型上一轮值为None)"
-    else:
-        # 模型上一轮值不为None,继续检查工厂当前值
-        if current_L_s is None:
-            effective_current_L = model_prev_L_s
-            source_L = "模型上一轮值(工厂当前值为None)"
-        else:
-            # 两个值都不为None,比较哪个更接近模型当前建议值
-            current_to_model_diff = abs(current_L_s - model_L_s)
-            prev_to_model_diff = abs(model_prev_L_s - model_L_s)
-
-            if current_to_model_diff <= prev_to_model_diff:
-                effective_current_L = current_L_s
-                source_L = "工厂当前值"
-            else:
-                effective_current_L = model_prev_L_s
-                source_L = "模型上一轮值"
-
-    # 对反洗时长进行同样的处理
-    if model_prev_t_bw_s is None:
-        if current_t_bw_s is None:
-            print("错误: 反洗时长的工厂当前值和模型上一轮值均为None")
-            return None, None
-        else:
-            effective_current_t_bw = current_t_bw_s
-            source_t_bw = "工厂当前值(模型上一轮值为None)"
-    else:
-        if current_t_bw_s is None:
-            effective_current_t_bw = model_prev_t_bw_s
-            source_t_bw = "模型上一轮值(工厂当前值为None)"
-        else:
-            current_to_model_t_bw_diff = abs(current_t_bw_s - model_t_bw_s)
-            prev_to_model_t_bw_diff = abs(model_prev_t_bw_s - model_t_bw_s)
-
-            if current_to_model_t_bw_diff <= prev_to_model_t_bw_diff:
-                effective_current_t_bw = current_t_bw_s
-                source_t_bw = "工厂当前值"
-            else:
-                effective_current_t_bw = model_prev_t_bw_s
-                source_t_bw = "模型上一轮值"
-
-    # 检测所有输入值是否在规定范围内(只对非None值进行检查)
-    # 工厂当前值检查(警告)
-    if current_L_s is not None and not (L_min_s <= current_L_s <= L_max_s):
-        print(f"警告: 当前过滤时长 {current_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
-    if current_t_bw_s is not None and not (t_bw_min_s <= current_t_bw_s <= t_bw_max_s):
-        print(f"警告: 当前反洗时长 {current_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
-
-    # 模型上一轮决策值检查(警告)
-    if model_prev_L_s is not None and not (L_min_s <= model_prev_L_s <= L_max_s):
-        print(f"警告: 模型上一轮过滤时长 {model_prev_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
-    if model_prev_t_bw_s is not None and not (t_bw_min_s <= model_prev_t_bw_s <= t_bw_max_s):
-        print(f"警告: 模型上一轮反洗时长 {model_prev_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
-
-    # 模型当前轮决策值检查(错误)
-    if model_L_s is None:
-        raise ValueError("错误: 决策模型建议的过滤时长不能为None")
-    elif not (L_min_s <= model_L_s <= L_max_s):
-        raise ValueError(f"错误: 决策模型建议的过滤时长 {model_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
-
-    if model_t_bw_s is None:
-        raise ValueError("错误: 决策模型建议的反洗时长不能为None")
-    elif not (t_bw_min_s <= model_t_bw_s <= t_bw_max_s):
-        raise ValueError(f"错误: 决策模型建议的反洗时长 {model_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
-
-    print(f"过滤时长基准: {source_L}, 值: {effective_current_L}")
-    print(f"反洗时长基准: {source_t_bw}, 值: {effective_current_t_bw}")
-
-    # 使用选定的基准值进行计算调整
-    L_diff = model_L_s - effective_current_L
-    L_adjustment = 0
-    if abs(L_diff) > adjustment_threshold * L_step_s:
-        if L_diff > 0:
-            L_adjustment = L_step_s
-        else:
-            L_adjustment = -L_step_s
-    next_L_s = effective_current_L + L_adjustment
-
-    t_bw_diff = model_t_bw_s - effective_current_t_bw
-    t_bw_adjustment = 0
-    if abs(t_bw_diff) > adjustment_threshold * t_bw_step_s:
-        if t_bw_diff > 0:
-            t_bw_adjustment = t_bw_step_s
-        else:
-            t_bw_adjustment = -t_bw_step_s
-    next_t_bw_s = effective_current_t_bw + t_bw_adjustment
-
-    return next_L_s, next_t_bw_s
-
-
-current_L_s = 3920
-current_t_bw_s = 98
-model_prev_L_s = None
-model_prev_t_bw_s = None
-model_L_s = 4160
-model_t_bw_s = 96
-next_L_s, next_t_bw_s = generate_plc_instructions(current_L_s, current_t_bw_s, model_prev_L_s, model_prev_t_bw_s, model_L_s, model_t_bw_s)
-print(f"next_L_s={next_L_s}, next_t_bw_s={next_t_bw_s}")

+ 0 - 33
models/uf-rl/UF_models.py

@@ -1,33 +0,0 @@
-import torch
-import numpy as np
-
-# TMP 上升量模型
-class TMPIncreaseModel(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-    def forward(self, p, L_h):
-        return float(p.alpha * (p.q_UF ** p.belta) * L_h)
-
-# 反洗 TMP 去除模型
-class TMPDecreaseModel(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-    def forward(self, p, L_s, t_bw_s):
-        L = max(float(L_s), 1.0)
-        t = max(float(t_bw_s), 1e-6)
-        upper_L = p.phi_bw_min + (p.phi_bw_max - p.phi_bw_min) * np.exp(- L / p.L_ref_s)
-        time_gain = 1.0 - np.exp(- (t / p.tau_bw_s) ** p.gamma_t)
-        phi = upper_L * time_gain
-        return float(np.clip(phi, 0.0, 0.999))
-
-
-if __name__ == "__main__":
-    model_fp = TMPIncreaseModel()
-    model_bw = TMPDecreaseModel()
-
-
-    torch.save(model_fp.state_dict(), "uf_fp.pth")
-    torch.save(model_bw.state_dict(), "uf_bw.pth")
-
-
-    print("模型已安全保存为 uf_fp.pth、uf_bw.pth")

BIN
models/uf-rl/resistance_model_bw.pth


BIN
models/uf-rl/resistance_model_fp.pth


BIN
models/uf-rl/uf_bw.pth


BIN
models/uf-rl/uf_fp.pth


+ 0 - 61
models/uf-rl/uf_resistance_models.py

@@ -1,61 +0,0 @@
-import torch
-import numpy as np
-
-# ===== 膜阻力上升模型 =====
-class ResistanceIncreaseModel(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-
-    def forward(self, p, L_s):
-        """
-        计算膜阻力上升量 ΔR
-        """
-        A = 128 * 40.0
-        J = p.q_UF / A / 3600
-        # 膜阻力上升模型(已缩放)
-        dR = p.nuK * J * L_s
-        return float(dR)
-
-
-# ===== 膜阻力下降模型 =====
-class ResistanceDecreaseModel(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-
-    def forward(self, p, R0, R_end, L_h_start, L_h_next_start, t_bw_s):
-        """
-        计算物理反冲洗污染去除比例(受反洗时间影响),最大可去除的可逆膜阻力(受过滤时间影响)
-        """
-
-        # 计算单次不可逆膜阻力(线性依赖于进水时间)
-        # 周期起点和下次起点的理论阻力
-        R_start = R0 + p.slope * (L_h_start ** p.power)
-        R_next_start = R0 + p.slope * (L_h_next_start ** p.power)
-
-        # 不可逆污染(反洗后残余增加量)
-        irreversible_R = max(R_next_start - R_start, 0.0)
-
-        # 本周期的总污染增长量
-        total_increase = max(R_end - R_start, 0.0)
-
-        # 可逆污染量 = 本周期总增长 - 不可逆残留
-        reversible_R = max(total_increase - irreversible_R, 0.0)
-
-        # 时间因子:反洗时间越长,效果越充分
-        time_gain = 1.0 - np.exp(- (t_bw_s / p.tau_bw_s))
-
-        # 实际去除的膜阻力(随机在可去除区间内,乘以时间因子)
-        dR_bw = reversible_R * time_gain
-
-        return float(np.clip(dR_bw, 0.0, reversible_R))
-
-
-# ===== 主程序 =====
-if __name__ == "__main__":
-    model_fp = ResistanceIncreaseModel()
-    model_bw = ResistanceDecreaseModel()
-
-    torch.save(model_fp.state_dict(), "resistance_model_fp.pth")
-    torch.save(model_bw.state_dict(), "resistance_model_bw.pth")
-
-    print("模型已安全保存为 resistance_model_fp.pth、resistance_model_bw.pth")