Bläddra i källkod

feat:更新了带异常接口的超滤脚本,run_dqn_decide.py新增输入units_to_run

junc_WHU 19 timmar sedan
förälder
incheckning
3f0532b1e6

+ 4 - 1
models/uf-rl/anzhen/env_config.yaml

@@ -69,10 +69,13 @@ UFPhysicsParams:
     4740: 0.0995
     4800: 0.0993
 
-  p_feed_kw: 9.0
+  p_feed_kw_min: 5.0
+  p_feed_kw_max: 15.0
   p_bw_kw: 30.0
+
   dose_min: 0.05
   dose_max: 0.15
+  dose_area: 0.56
 
 
 UFActionSpec:

+ 2 - 1
models/uf-rl/env/env_params.py

@@ -270,7 +270,8 @@ class UFPhysicsParams:
     })
 
     # 实际吨水电耗计算指标
-    p_feed_kw: float = 19.0
+    p_feed_kw_min: float = 15.0
+    p_feed_kw_max: float = 20.0
     p_bw_kw: float = 15.0
 
     # 实际吨水药耗计算指标

+ 29 - 15
models/uf-rl/env/uf_env.py

@@ -375,13 +375,18 @@ class UFSuperCycleEnv(gym.Env):
         # ==============================
         # TMP 状态惩罚
         # ==============================
+        # ==============================
+        # TMP 状态惩罚(第一阶段优化版)
+        # ==============================
 
         tmp = info["max_TMP_during_filtration"]
         tmp_soft = self.reward_params.global_TMP_soft_limit
         tmp_hard = self.reward_params.global_TMP_hard_limit
 
+        HARD_PENALTY = 2.0  # 替代原来的5
+
         if self.tmp_over_limit_flag:
-            tmp_state_penalty = -self.reward_params.w_tmp_hard
+            tmp_state_penalty = -HARD_PENALTY
 
         elif tmp <= tmp_soft:
             tmp_state_penalty = 0.0
@@ -390,26 +395,27 @@ class UFSuperCycleEnv(gym.Env):
             x = (tmp - tmp_soft) / (tmp_hard - tmp_soft)
 
             tmp_state_penalty = -self.reward_params.w_tmp * (
-                    x ** self.reward_params.p
+                    x ** self.reward_params.p  # p建议=4
             )
 
         else:
-            tmp_state_penalty = -self.reward_params.w_tmp_hard
+            tmp_state_penalty = -HARD_PENALTY
 
         # ==============================
-        # TMP 趋势惩罚
+        # TMP 趋势惩罚(加死区)
         # ==============================
 
         tmp_trend_penalty = 0.0
 
         if info_next is not None:
-            delta_tmp = (
-                    info_next["max_TMP_during_filtration"] - tmp
-            )
+            delta_tmp = info_next["max_TMP_during_filtration"] - tmp
 
-            # 只惩罚TMP上升
             delta_tmp = max(delta_tmp, 0)
 
+            # 死区(关键)
+            if delta_tmp < 0.001:
+                delta_tmp = 0.0
+
             tmp_trend_penalty = -self.reward_params.w_trend * delta_tmp
 
         tmp_penalty = tmp_state_penalty + tmp_trend_penalty
@@ -417,16 +423,22 @@ class UFSuperCycleEnv(gym.Env):
         # ==============================
         # 残余污染惩罚
         # ==============================
+        delta_R = info["delta_R"]
+        delta_R_allow = info["delta_R_allow"]
 
-        residual_ratio = info["residual_ratio"]
+        # 归一化
+        ratio = delta_R / (delta_R_allow + 1e-6)
 
-        ref_residual = 1 / self.max_episode_steps
+        # 基础响应
+        k_res = self.reward_params.k_res  # 2~4
+        base = np.tanh(-k_res * ratio)
 
-        res_penalty = np.tanh(
-            self.reward_params.k_res *
-            (1 - residual_ratio / ref_residual)
-        )
+        # 污染阶段调制
+        c = 50  # 控制“污染敏感度”
+        severity = np.tanh(c / (delta_R_allow + 1e-6))
 
+        # 最终奖励
+        res_penalty = base * (1 + 0.5 * severity)
         # ==============================
         # 经济成本(电耗 + 药耗)
         # ==============================
@@ -459,10 +471,12 @@ class UFSuperCycleEnv(gym.Env):
 
         total_reward = (
                 econ_reward
-                + res_penalty
+                + 1.5 * res_penalty
                 + tmp_penalty
         )
 
+        total_reward += 1
+
         return (
             total_reward,
             tmp_penalty,

+ 2 - 1
models/uf-rl/env/uf_physics.py

@@ -354,7 +354,7 @@ class UFPhysicsModel:
 
         # 参考吨水电耗(从查找表获取最接近的值)
         # 从物理参数类中获取查找表
-        closest_L = min(self.p.energy_lookup.keys(), key=lambda x: abs(x - L_s))
+        closest_L = min(self.p.energy_lookup.keys(), key=lambda x: abs(float(x) - L_s))
         refer_ton_water_energy = self.p.energy_lookup[closest_L]  # [kWh/m³]
 
         # 实际吨水电耗计算
@@ -440,6 +440,7 @@ class UFPhysicsModel:
             "R_after_ceb": R_after_ceb,  # CEB后膜阻力
             "max_residual_increase_per_run": max_residual_increase,  # 最大残余污染增量
             "delta_R_allow": delta_R_allow,  # 污染允许增长空间
+            "delta_R": delta_R, # 污染上升值
             "residual_ratio" : residual_ratio, # 污染上升比例
 
             # 能耗指标

+ 4 - 1
models/uf-rl/lankao/env_config.yaml

@@ -69,10 +69,13 @@ UFPhysicsParams:
     4740: 0.0995
     4800: 0.0993
 
-  p_feed_kw: 15.0
+  p_feed_kw_min: 15.0
+  p_feed_kw_max: 20.0
   p_bw_kw: 25.0
+
   dose_min: 0.10
   dose_max: 0.20
+  dose_area: 0.56
 
 
 UFActionSpec:

+ 8 - 5
models/uf-rl/longting/env_config.yaml

@@ -70,10 +70,13 @@ UFPhysicsParams:
     4740: 0.0995
     4800: 0.0993
 
-  p_feed_kw:  18.0
-  p_bw_kw:  20.0
+  p_feed_kw_min: 16.0
+  p_feed_kw_max: 19.0
+  p_bw_kw: 20.0
+
   dose_min:  0.10
   dose_max:  0.20
+  dose_area: 0.56
 
 
 
@@ -99,11 +102,11 @@ UFRewardParams:
   w_trend: 1.0
 
   # ===== 经济成本 =====
-  k_cost: 3.0
+  k_cost: 1.0
   chemical_price: 13.0
   energy_price: 0.667
-  cost_low: 0.10
-  cost_high:  0.15
+  cost_low: 0.11
+  cost_high:  0.14
   w_cost: 1.0
 
   # ===== 残余污染 =====

+ 56 - 5
models/uf-rl/rl_model/DQN/uf_decide/run_dqn_decide.py

@@ -46,6 +46,52 @@ def build_physics(IS_TIMES, phys_params,state_bounds):
     )
     return physics
 
+
+def check_state_bounds(current_state, state_bounds, unit_name):
+    """
+    检查当前状态是否在边界范围内
+
+    参数:
+        current_state: UFState对象,包含TMP, q_UF, temp
+        state_bounds: 状态边界对象
+        unit_name: 机组名称(如 "UF1")
+
+    返回:
+        dict: 错误信息字典,格式 {"error_time": str, "error_feature": str}
+              如果没有错误,返回 None
+    """
+    from datetime import datetime
+
+    error_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+
+    # 检查各项参数是否在边界范围内
+    TMP0_min = state_bounds.TMP0_min
+    TMP0_max = state_bounds.TMP0_max
+    if not (TMP0_min <= current_state.TMP <= TMP0_max):
+        return {
+            "error_time": error_time,
+            "error_feature": f"{unit_name}Per"
+        }
+
+    q_UF_min = state_bounds.q_UF_min
+    q_UF_max = state_bounds.q_UF_max
+    if not (q_UF_min <= current_state.q_UF <= q_UF_max):
+        return {
+            "error_time": error_time,
+            "error_feature": f"{unit_name}Per"
+        }
+
+    temp_min = state_bounds.temp_min
+    temp_max = state_bounds.temp_max
+    if not (temp_min <= current_state.temp <= temp_max):
+        return {
+            "error_time": error_time,
+            "error_feature": f"{unit_name}Per"
+        }
+
+    return None
+
+
 def generate_plc_instructions(action_spec,current_L_s, current_t_bw_s, model_prev_L_s, model_prev_t_bw_s, model_L_s, model_t_bw_s):
     """
     根据工厂当前值、模型上一轮决策值和模型当前轮决策值,生成PLC指令。
@@ -227,9 +273,10 @@ def run_dqn_decide(
 # ==============================
 if __name__ == "__main__":
 
-    MODEL_PATH = UF_RL_ROOT / "xishan" / "48h_dqn_model.zip"
-    ENV_CONFIG_PATH = UF_RL_ROOT / "xishan" / "env_config.yaml"
-    TMP0 = 0.019  # 原始 TMP0
+    MODEL_PATH = UF_RL_ROOT / "anzhen" / "48h_dqn_model.zip"
+    ENV_CONFIG_PATH = UF_RL_ROOT / "anzhen" / "env_config.yaml"
+    units_to_run = ["UF1"] # 新增输入:本次调用的机组对象名
+    TMP0 = 0.07  # 原始 TMP0
     q_UF = 300 # 进水流量
     temp = 20.0 #进水温度
     IS_TIMES = False # 新增指定变量,表示CEB间隔为时间控制/次数控制,T表示48次bw一次CEB,F表示48h一次CEB
@@ -249,6 +296,12 @@ if __name__ == "__main__":
 
     physics = build_physics(IS_TIMES, phys_params,state_bounds)
 
+    # ========== 异常检查(仅检查,不中断,后续归一化时将异常状态强制归一化至上下限) ==========
+    for unit_name in units_to_run:
+        error_result = check_state_bounds(current_state, state_bounds, unit_name)
+        if error_result:
+            print(f"错误发生时间: {error_result['error_time']};错误特征量:{error_result['error_feature']}")
+
     action_id, model_L_s, model_t_bw_s = run_dqn_decide(
         model_path=MODEL_PATH,
         physics=physics,
@@ -265,8 +318,6 @@ if __name__ == "__main__":
     L_s, t_bw_s = generate_plc_instructions(action_spec, current_L_s, current_t_bw_s, model_prev_L_s, model_prev_t_bw_s, model_L_s,
                                             model_t_bw_s)  # 获取模型下发指令
 
-    L_s = 4100
-    t_bw_s = 96
     max_tmp_during_filtration = 0.050176 # 新增工厂数据接口:周期最高/最低跨膜压差,无工厂数据接入时传入None,calc_uf_cycle_metrics()自动获取模拟周期中的跨膜压差最值
     min_tmp_during_filtration = 0.012496
     execution_result = calc_uf_cycle_metrics(current_state, max_tmp_during_filtration, min_tmp_during_filtration, L_s, t_bw_s)

+ 7 - 4
models/uf-rl/xishan/env_config.yaml

@@ -70,10 +70,13 @@ UFPhysicsParams:
     4740: 0.0995
     4800: 0.0993
 
-  p_feed_kw: 25.0
-  p_bw_kw: 30.0
-  dose_min: 0.05
-  dose_max: 0.15
+  p_feed_kw_min: 15.0
+  p_feed_kw_max: 20.0
+  p_bw_kw: 15.0
+
+  dose_min: 0.10
+  dose_max: 0.30
+  dose_area: 0.56
 
 
 UFActionSpec: