Explorar o código

feat:上传基于膜污染奖励函数设计的强化学习模型测试版本
- 基于跨膜压差硬上限0.08Mpa和CEB效果评估计算奖励
- 为reset中初始状态生成增加了物理限制
- 修正了物理反洗模型计算的错误
- 增加了只调控进水时间的模型版本

junc_WHU hai 4 meses
pai
achega
bf12237bf4

+ 1 - 1
models/uf-rl/超滤训练源码/DQN_env.py

@@ -135,7 +135,7 @@ class UFParams:
     q_bw_m3ph: float = 1000.0
     # 物理反洗流量(m³/h)
     # 说明:反洗流量通常为正常过滤流量的 2-3 倍
-    fixed_t_bw_s = 60 # 固定物理反洗时间
+
 
     # ========== CEB 化学反洗参数 ==========
     T_ceb_interval_h: float = 48.0

+ 1 - 1
models/uf-rl/超滤训练源码/DQN_train.py

@@ -515,7 +515,7 @@ if __name__ == "__main__":
     训练脚本入口
     
     使用方法:
-        python DQN_train.py
+        python fixed_DQN_train.py
     
     训练参数:
         - total_timesteps=150000: 总训练步数

BIN=BIN
models/uf-rl/超滤训练源码/model/DQN_1/events.out.tfevents.1764511457.DESKTOP-L4D89R1.22304.0


+ 0 - 138
models/uf-rl/进水动作版超滤训练源码/check_initial_state.py

@@ -1,138 +0,0 @@
-# check_initial_state.py
-"""
-检查初始状态是否为“必死状态”(conservatively dead):
-1) 实例化 base_params(优先使用 rl_dqn_env 中提供的 base_params 或 UFParams)
-2) 实例化环境类 UFSuperCycleEnv(base_params)
-3) 调用 env.generate_initial_state() 生成 env.current_params(不调用 reset())
-4) 用最保守策略 (L_s=3600s, t_bw_s=60s) 连续模拟 max_steps 次,
-   若任意一次 is_dead_cycle(info) 返回 False 则判定为必死(返回 True),否则返回 False。
-"""
-
-from typing import Any
-import copy
-import traceback
-
-# 从 rl_dqn_env 导入必需项
-try:
-    from DQN_env import (
-        simulate_one_supercycle,
-        is_dead_cycle,
-        UFSuperCycleEnv,
-        UFParams,       # 如果模块里有 UFParams 类就导入
-        base_params     # 如果模块直接提供 base_params 实例也尝试导入
-    )
-except Exception:
-    # 有可能某些名字不存在 —— 我们会稍后用回退方案处理
-    # 先导入模块并再尝试访问属性,确保错误信息更友好
-    import importlib
-    rl = importlib.import_module("rl_dqn_env")
-    simulate_one_supercycle = getattr(rl, "simulate_one_supercycle", None)
-    is_dead_cycle = getattr(rl, "is_dead_cycle", None)
-    UFSuperCycleEnv = getattr(rl, "UFSuperCycleEnv", None)
-    UFParams = getattr(rl, "UFParams", None)
-    base_params = getattr(rl, "base_params", None)
-
-# 检查导入完整性
-_missing = []
-if simulate_one_supercycle is None:
-    _missing.append("simulate_one_supercycle")
-if is_dead_cycle is None:
-    _missing.append("is_dead_cycle")
-if UFSuperCycleEnv is None:
-    _missing.append("UFSuperCycleEnv")
-if _missing:
-    raise ImportError(f"无法从 rl_dqn_env 导入以下必要项: {', '.join(_missing)}")
-
-def is_dead_initial_state_env(env: UFSuperCycleEnv, max_steps: int = 15,
-                              L_s: int = 4200, t_bw_s: int = 50,
-                              verbose: bool = True) -> bool:
-    """
-    使用 env.current_params 作为初始状态判断是否为必死状态(保守策略)。
-
-    参数:
-        env: 已实例化的 UFSuperCycleEnv(必须包含 generate_initial_state() 与 current_params)
-        max_steps: 模拟步数(默认 15)
-        L_s: 过滤时长(s),保守值 3600
-        t_bw_s: 物理反洗时长(s),保守值 60
-        verbose: 是否打印每步结果
-
-    返回:
-        True 表示必死(conservatively dead)
-        False 表示可行
-    """
-    # 1) 确保 env 有 current_params,并且 generate_initial_state 可用
-    if not hasattr(env, "generate_initial_state"):
-        raise AttributeError("env 缺少 generate_initial_state() 方法。")
-    # 生成初始状态(不会调用 reset)
-    env.generate_initial_state()
-
-    if not hasattr(env, "current_params"):
-        raise AttributeError("env.generate_initial_state() 未设置 env.current_params。")
-
-    curr_p = copy.deepcopy(env.current_params)
-
-    for step in range(1, max_steps + 1):
-        try:
-            info, next_params = simulate_one_supercycle(curr_p, L_s, t_bw_s)
-        except Exception as e:
-            # 如果 simulate 出错,把异常视为“失败”(保守处理)
-            if verbose:
-                print(f"[Step {step}] simulate_one_supercycle 抛出异常,视为失败。异常信息:{e}")
-                traceback.print_exc()
-            return True
-
-        success = is_dead_cycle(info)  # True 表示成功循环
-
-        if verbose:
-            print(f"[Step {step}] 循环结果:{'成功' if success else '失败'}")
-            # 如果 info 中有关键诊断字段,打印简要信息
-            try:
-                print(f"     TMP0: {info.get('TMP0')},max_TMP: {info.get('max_TMP_during_filtration')}, recovery: {info.get('recovery')}, "
-                      f"R0: {info.get('R0')}, R_after_ceb: {info.get('R_after_ceb')}")
-            except Exception:
-                pass
-
-        if not success:
-            if verbose:
-                print(f"在第 {step} 步检测到失败,判定为必死初始状态(conservatively dead)。")
-            return True
-
-        # 否则继续,用 next_params 作为下一步起始参数
-        curr_p = next_params
-
-    if verbose:
-        print(f"{max_steps} 步均成功,初始状态判定为可行(non-dead)。")
-    return False
-
-
-if __name__ == "__main__":
-    print("=== check_initial_state.py: 使用 env.generate_initial_state() 检查初始状态是否为必死 ===")
-
-    try:
-        # 1) 构造 base_params
-        if base_params is not None:
-            bp = base_params
-            print("使用 rl_dqn_env 中提供的 base_params。")
-        elif UFParams is not None:
-            bp = UFParams()  # 使用默认构造
-            print("使用 UFParams() 构造 base_params 的实例。")
-        else:
-            raise ImportError("无法构造 base_params:rl_dqn_env 中既无 base_params 也无 UFParams。")
-
-        # 2) 实例化环境类(将 base_params 传入构造器)
-        env = UFSuperCycleEnv(bp)
-        print("已实例化 UFSuperCycleEnv 环境。")
-
-        # 3) 调用 env.generate_initial_state() 并检查 env.current_params 是否为必死
-        dead = is_dead_initial_state_env(env, max_steps=getattr(env, "max_episode_steps", 15),
-                                        L_s=6000, t_bw_s=40, verbose=True)
-
-        print("\n=== 判定结果 ===")
-        if dead:
-            print("当前生成的初始状态为【必死状态】(conservatively dead)。")
-        else:
-            print("当前生成的初始状态为【可行状态】(non-dead)。")
-
-    except Exception as e:
-        print("脚本执行出现错误:", e)
-        traceback.print_exc()

+ 233 - 0
models/uf-rl/进水动作版超滤训练源码/fixed_DQN_decide.py

@@ -0,0 +1,233 @@
+import numpy as np
+from stable_baselines3 import DQN
+from fixed_DQN_env import UFSuperCycleEnv, UFParams
+from fixed_DQN_env import simulate_one_supercycle
+
+# 模型路径
+MODEL_PATH = "model/dqn_model.zip"
+
+# 加载模型(只加载一次,提高效率)
+model = DQN.load(MODEL_PATH)
+
+def run_uf_DQN_decide(uf_params, TMP0_value: float):
+    """
+    单步决策函数(新版):
+    当前模型只输出进水时间 L_s,不输出反洗时间。
+    """
+
+    # 1. 初始化环境
+    env = UFSuperCycleEnv(uf_params)
+
+    # 2. 设置 TMP0
+    env.current_params.TMP0 = TMP0_value
+
+    # 3. 获取观察(归一化)
+    obs = env._get_obs().reshape(1, -1)
+
+    # 4. 模型预测动作
+    action, _ = model.predict(obs, deterministic=True)
+
+    # 5. 新模型动作只返回 L_s
+    L_s = env._get_action_values(action[0])   # 单值
+
+    # 6. 在环境中执行动作
+    next_obs, reward, terminated, truncated, info = env.step(action[0])
+
+    # 7. 返回结构化结果
+    return {
+        "action": int(action[0]),
+        "L_s": float(L_s),
+        "t_bw_s": None,       # 保留字段但固定为 None
+        "next_obs": next_obs,
+        "reward": reward,
+        "terminated": terminated,
+        "truncated": truncated,
+        "info": info
+    }
+
+def generate_plc_instructions(current_L_s, model_prev_L_s, model_L_s):
+    """
+    根据工厂当前值、模型上一轮决策值和模型当前轮决策值,生成PLC指令。
+
+    新增功能:
+    1. 处理None值情况:如果模型上一轮值为None,则使用工厂当前值;
+       如果工厂当前值也为None,则返回None并提示错误。
+    """
+    # 参数配置保持不变
+    params = UFParams(
+        L_min_s=3600.0, L_max_s=4800.0, L_step_s=60.0,
+    )
+
+    # 参数解包
+    L_step_s = params.L_step_s
+    L_min_s = params.L_min_s
+    L_max_s = params.L_max_s
+    adjustment_threshold = 1.0
+
+    # 处理None值情况
+    if model_prev_L_s is None:
+        if current_L_s is None:
+            print("错误: 过滤时长的工厂当前值和模型上一轮值均为None")
+            return None, None
+        else:
+            # 使用工厂当前值作为基准
+            effective_current_L = current_L_s
+            source_L = "工厂当前值(模型上一轮值为None)"
+    else:
+        # 模型上一轮值不为None,继续检查工厂当前值
+        if current_L_s is None:
+            effective_current_L = model_prev_L_s
+            source_L = "模型上一轮值(工厂当前值为None)"
+        else:
+            effective_current_L = model_prev_L_s
+            source_L = "模型上一轮值"
+
+
+    # 检测所有输入值是否在规定范围内(只对非None值进行检查)
+    # 工厂当前值检查(警告)
+    if current_L_s is not None and not (L_min_s <= current_L_s <= L_max_s):
+        print(f"警告: 当前过滤时长 {current_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
+
+    # 模型上一轮决策值检查(警告)
+    if model_prev_L_s is not None and not (L_min_s <= model_prev_L_s <= L_max_s):
+        print(f"警告: 模型上一轮过滤时长 {model_prev_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
+
+    # 模型当前轮决策值检查(错误)
+    if model_L_s is None:
+        raise ValueError("错误: 决策模型建议的过滤时长不能为None")
+    elif not (L_min_s <= model_L_s <= L_max_s):
+        raise ValueError(f"错误: 决策模型建议的过滤时长 {model_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
+
+    print(f"过滤时长基准: {source_L}, 值: {effective_current_L}")
+
+    # 使用选定的基准值进行计算调整
+    L_diff = model_L_s - effective_current_L
+    L_adjustment = 0
+    if abs(L_diff) >= adjustment_threshold * L_step_s:
+        if L_diff >= 0:
+            L_adjustment = L_step_s
+        else:
+            L_adjustment = -L_step_s
+    next_L_s = effective_current_L + L_adjustment
+
+    return next_L_s
+
+
+def calc_uf_cycle_metrics(p, TMP0, max_tmp_during_filtration, min_tmp_during_filtration, L_s: float, t_bw_s):
+    """
+    计算 UF 超滤系统的核心性能指标
+
+    参数:
+        p (UFParams): UF 系统参数
+        L_s (float): 单次过滤时间(秒)
+        t_bw_s (float): 单次反洗时间(秒)
+
+    返回:
+        dict: {
+            "k_bw_per_ceb": 小周期次数,
+            "ton_water_energy_kWh_per_m3": 吨水电耗,
+            "recovery": 回收率,
+            "net_delivery_rate_m3ph": 净供水率 (m³/h),
+            "daily_prod_time_h": 日均产水时间 (小时/天)
+            "max_permeability": 全周期最高渗透率(lmh/bar)
+        }
+    """
+    # 将跨膜压差写入参数
+    p.TMP0 = TMP0
+
+    # 模拟该参数下的超级周期
+    info, next_params = simulate_one_supercycle(p, L_s, t_bw_s)
+
+    # 获得模型模拟周期信息
+    k_bw_per_ceb = info["k_bw_per_ceb"]
+    ton_water_energy_kWh_per_m3 = info["ton_water_energy_kWh_per_m3"]
+    recovery = info["recovery"]
+    daily_prod_time_h = info["daily_prod_time_h"]
+
+    # 获得模型模拟周期内最高跨膜压差/最低跨膜压差
+    if max_tmp_during_filtration is None:
+        max_tmp_during_filtration = info["max_TMP_during_filtration"]
+    if min_tmp_during_filtration is None:
+        min_tmp_during_filtration = info["min_TMP_during_filtration"]
+
+    # 计算最高渗透率
+    max_permeability = 100 * p.q_UF / (128*40) / min_tmp_during_filtration
+
+
+    return {
+        "k_bw_per_ceb": k_bw_per_ceb,
+        "ton_water_energy_kWh_per_m3": ton_water_energy_kWh_per_m3,
+        "recovery": recovery,
+        "daily_prod_time_h": daily_prod_time_h,
+        "max_permeability": max_permeability
+    }
+
+
+# ==============================
+# 示例调用
+# ==============================
+if __name__ == "__main__":
+    # -------------------------
+    # 1. 初始化参数
+    # -------------------------
+    uf_params = UFParams()
+    TMP0 = 0.01   # 原始跨膜压差
+
+    # -------------------------
+    # 2. 调用模型做一次决策(只输出 L_s)
+    # -------------------------
+    model_decide_result = run_uf_DQN_decide(uf_params, TMP0)
+    model_L_s = model_decide_result["L_s"]     # 只输出 L_s
+    print(f"模型决策进水时长 L_s = {model_L_s}")
+
+    # -------------------------
+    # 3. 工厂当前值 + 模型上一轮值(示例值)
+    # -------------------------
+    current_L_s = 3800
+    model_prev_L_s = 4040
+
+    # -------------------------
+    # 4. 生成 PLC 指令(新版仅 L_s)
+    # -------------------------
+    plc_L_s = generate_plc_instructions(current_L_s,model_prev_L_s,model_L_s)
+
+    print(f"PLC 指令 L_s = {plc_L_s}")
+
+    # -------------------------
+    # 5. 反洗时长由工厂参数决定/固定值
+    #    (新模型不输出 t_bw_s)
+    # -------------------------
+    plc_t_bw_s = uf_params.fixed_t_bw_s
+
+    # -------------------------
+    # 6. 工厂 TMP 最大/最小(可为空 None)
+    # -------------------------
+    max_tmp_during_filtration = 0.050176
+    min_tmp_during_filtration = 0.012496
+
+    # -------------------------
+    # 7. 计算周期指标(模型动作实际效果)
+    # -------------------------
+    execution_result = calc_uf_cycle_metrics(
+        p=uf_params,
+        TMP0=TMP0,
+        max_tmp_during_filtration=max_tmp_during_filtration,
+        min_tmp_during_filtration=min_tmp_during_filtration,
+        L_s=plc_L_s,
+        t_bw_s=plc_t_bw_s   # 仍需要反洗时长参数
+    )
+
+    # -------------------------
+    # 8. 打印结果
+    # -------------------------
+    print("\n===== 单步决策结果 =====")
+    print(f"模型动作编号: {model_decide_result['action']}")
+    print(f"模型选择的 L_s: {model_L_s} 秒")
+    print(f"PLC 下发 L_s: {plc_L_s} 秒")
+    print(f"PLC 下发反洗时长(固定) t_bw_s: {plc_t_bw_s} 秒")
+
+    print(f"周期对应的反洗次数: {execution_result['k_bw_per_ceb']}")
+    print(f"吨水电耗: {execution_result['ton_water_energy_kWh_per_m3']}")
+    print(f"回收率: {execution_result['recovery']}")
+    print(f"日均产水时间: {execution_result['daily_prod_time_h']}")
+    print(f"最高渗透率: {execution_result['max_permeability']}")

+ 29 - 50
models/uf-rl/进水动作版超滤训练源码/DQN_env.py → models/uf-rl/进水动作版超滤训练源码/fixed_DQN_env.py

@@ -135,6 +135,7 @@ class UFParams:
     q_bw_m3ph: float = 1000.0
     # 物理反洗流量(m³/h)
     # 说明:反洗流量通常为正常过滤流量的 2-3 倍
+    fixed_t_bw_s = 60 # 固定物理反洗时间
 
     # ========== CEB 化学反洗参数 ==========
     T_ceb_interval_h: float = 48.0
@@ -776,28 +777,12 @@ class UFSuperCycleEnv(gym.Env):
             # 使用预加载的模型(用于并行环境避免重复加载)
             self.resistance_model_fp, self.resistance_model_bw = resistance_models
 
-        # ========== 构建离散动作空间 ==========
+        # ========== 构建进水时间离散动作空间 ==========
         # 过滤时长候选值(例:3800, 3860, 3920, ..., 5940, 6000秒)
-        self.L_values = np.arange(
-            self.base_params.L_min_s,
-            self.base_params.L_max_s,
-            self.base_params.L_step_s
-        )
-        
-        # 反洗时长候选值(例:40, 45, 50, 55, 60秒)
-        self.t_bw_values = np.arange(
-            self.base_params.t_bw_min_s,
-            self.base_params.t_bw_max_s + self.base_params.t_bw_step_s,  # +step确保包含上限
-            self.base_params.t_bw_step_s
-        )
-
-        self.num_L = len(self.L_values)      # 过滤时长选项数
-        self.num_bw = len(self.t_bw_values)  # 反洗时长选项数
-
-        # 定义单一离散动作空间(笛卡尔积编码)
-        # 动作编号 action = L_idx × num_bw + t_bw_idx
-        # 例:num_L=37, num_bw=5 → 总动作数=185
-        self.action_space = spaces.Discrete(self.num_L * self.num_bw)
+        self.L_values = np.arange( self.base_params.L_min_s,
+                                   self.base_params.L_max_s,
+                                   self.base_params.L_step_s )
+        self.action_space = spaces.Discrete(len(self.L_values))
 
         # ========== 定义状态空间 ==========
         # 8维连续状态,归一化到 [0, 1]
@@ -1008,19 +993,17 @@ class UFSuperCycleEnv(gym.Env):
 
         return obs
 
-    def _get_action_values(self, action):
-        """
-        将动作还原为实际时长
-        """
-        L_idx = action // self.num_bw
-        t_bw_idx = action % self.num_bw
-        return self.L_values[L_idx], self.t_bw_values[t_bw_idx]
+    def _get_action_values(self, action: int):
+        """ 新版动作解释函数: action 只对应一个 L_s,不再包含 t_bw_s。 """
+
+        L_s = self.L_values[action]
+        return L_s
 
     def step(self, action):
         self.current_step += 1
-        L_s, t_bw_s = self._get_action_values(action)
+        L_s= self._get_action_values(action)
         L_s = np.clip(L_s, self.base_params.L_min_s, self.base_params.L_max_s)
-        t_bw_s = np.clip(t_bw_s, self.base_params.t_bw_min_s, self.base_params.t_bw_max_s)
+        t_bw_s = self.current_params.fixed_t_bw_s
 
         # 模拟超级周期
         info, next_params = simulate_one_supercycle(self.current_params, L_s, t_bw_s)
@@ -1034,7 +1017,7 @@ class UFSuperCycleEnv(gym.Env):
             terminated = False
         else:
             # 中途失败惩罚
-            reward = -10
+            reward = -20
             terminated = True
 
         # 判断是否到达最大步数
@@ -1046,25 +1029,21 @@ class UFSuperCycleEnv(gym.Env):
         info["feasible"] = feasible
         info["step"] = self.current_step
 
-        # # ===================== 测试终末奖励:鼓励 TMP 接近初始状态 =====================
-        # # 仅在 episode 自然结束(满步但未提前失败)时触发
-        # if truncated and not terminated:
-        #     TMP_initial = self.TMP0  # reset 时记录的初始 TMP
-        #     TMP_final = next_obs[0]  # next_obs 提供的最终 TMP
-        #
-        #     delta_ratio = abs((TMP_final - TMP_initial) / TMP_initial)
-        #
-        #     alpha = 4.0  # TMP 偏差敏感度
-        #     gamma = 5.0  # 奖励幅度
-        #     stability_reward = gamma * (np.exp(-alpha * delta_ratio) - 1) # 量级在0到-5之间
-        #
-        #     reward += stability_reward
-        #     terminated = True  # episode 正式结束
-
-        # # ===================== 测试结果 =====================
-        # 增加该奖励后强化学习依然能保证奖励收敛,但是损失函数在2-3之间反复震荡,无法降低,见reward_test&loss_test
-        # 原设想是只能听在大额偏移发生前能通过该奖励学习到提前减小偏移步伐,但是实际训练时该惩罚反复被触发
-        # 推测是终末的大额奖惩无法有效传递回过往时间步引导智能体学习,可能由于状态中缺少预测值,智能体会将其观测为不可控事件,暂时不添加该奖励,TODO:等待优化
+        # ===================== 测试终末奖励:鼓励 TMP 接近初始状态 =====================
+        # 仅在 episode 自然结束(满步但未提前失败)时触发
+        if truncated and not terminated:
+            TMP_initial = self.TMP0  # reset 时记录的初始 TMP
+            TMP_final = next_obs[0]  # next_obs 提供的最终 TMP
+
+            delta_ratio = abs((TMP_final - TMP_initial) / TMP_initial)
+
+            alpha = 4.0  # TMP 偏差敏感度
+            gamma = 5.0  # 奖励幅度
+            stability_reward = gamma * (np.exp(-alpha * delta_ratio) - 1) # 量级在0到-5之间
+
+            reward += stability_reward
+            terminated = True  # episode 正式结束
+
 
         return next_obs, reward, terminated, truncated, info
 

+ 3 - 3
models/uf-rl/进水动作版超滤训练源码/DQN_train.py → models/uf-rl/进水动作版超滤训练源码/fixed_DQN_train.py

@@ -32,7 +32,7 @@ from stable_baselines3.common.monitor import Monitor
 from stable_baselines3.common.vec_env import DummyVecEnv
 from stable_baselines3.common.callbacks import BaseCallback
 
-from DQN_env import UFParams, UFSuperCycleEnv
+from fixed_DQN_env import UFParams, UFSuperCycleEnv
 
 
 # ==================== DQN超参数配置类 ====================
@@ -515,7 +515,7 @@ if __name__ == "__main__":
     训练脚本入口
     
     使用方法:
-        python DQN_train.py
+        python fixed_DQN_train.py
     
     训练参数:
         - total_timesteps=150000: 总训练步数
@@ -530,7 +530,7 @@ if __name__ == "__main__":
     params = UFParams()
     
     # 执行训练
-    train_uf_rl_agent(params, total_timesteps=300000)
+    train_uf_rl_agent(params, total_timesteps=200000)
     
     print("\n🎉 训练流程全部完成!")
 

BIN=BIN
models/uf-rl/进水动作版超滤训练源码/model/DQN_1/events.out.tfevents.1764511457.DESKTOP-L4D89R1.22304.0


BIN=BIN
models/uf-rl/进水动作版超滤训练源码/model/dqn_model.zip