Prechádzať zdrojové kódy

feat:1. 重新训练了锡山与龙亭的模型 2.优化了膜阻力相关的奖励计算,更新了兰考和安镇的模型

junc_WHU 3 týždňov pred
rodič
commit
f989f54839

BIN
models/uf-rl/anzhen/48h_dqn_model.zip


+ 2 - 2
models/uf-rl/env/uf_env.py

@@ -422,9 +422,9 @@ class UFSuperCycleEnv(gym.Env):
 
         ref_residual = 1 / self.max_episode_steps
 
-        res_penalty = -np.tanh(
+        res_penalty = np.tanh(
             self.reward_params.k_res *
-            (residual_ratio / ref_residual - 1)
+            (1 - residual_ratio / ref_residual)
         )
 
         # ==============================

+ 1 - 1
models/uf-rl/env/uf_physics.py

@@ -398,7 +398,7 @@ class UFPhysicsModel:
             1e-6
         )
         if delta_R_allow > 50:
-            residual_ratio = delta_R / delta_R_allow
+            residual_ratio = max(delta_R / delta_R_allow, 0.0)
         else:
             residual_ratio = 1.0
 

BIN
models/uf-rl/lankao/48h_dqn_model.zip


BIN
models/uf-rl/longting/48h_dqn_model.zip


+ 2 - 2
models/uf-rl/longting/env_config.yaml

@@ -102,8 +102,8 @@ UFRewardParams:
   k_cost: 3.0
   chemical_price: 13.0
   energy_price: 0.667
-  cost_low: 0.06
-  cost_high:  0.10
+  cost_low: 0.10
+  cost_high:  0.15
   w_cost: 1.0
 
   # ===== 残余污染 =====

+ 4 - 2
models/uf-rl/rl_model/DQN/uf_train/dqn_trainer.py

@@ -14,7 +14,7 @@ class DQNTrainer:
     - 在测试集环境上评估策略
     """
 
-    def __init__(self, env, params, callback=None,PROJECT_ROOT=None):
+    def __init__(self, env, params, callback=None,PROJECT_ROOT=None,DIR_NAME=None):
         """
         初始化训练器
 
@@ -27,9 +27,11 @@ class DQNTrainer:
         self.params = params
         self.callback = callback
         self.PROJECT_ROOT = PROJECT_ROOT
+        self.dir_name = DIR_NAME
         self.log_dir = self._create_log_dir()  # 创建TensorBoard日志目录
         self.model = self._create_model()      # 创建DQN模型
 
+
     # ------------------- 私有方法 -------------------
     def _create_log_dir(self):
         """
@@ -54,7 +56,7 @@ class DQNTrainer:
 
         # 4️⃣ 固定日志存放位置:PROJECT_ROOT/model_result/uf_dqn_tensorboard
         # 假设在 run_dqn_train.py 中定义了 PROJECT_ROOT = "models/uf-rl"
-        base_dir = os.path.join(self.PROJECT_ROOT, "model_result", "uf_dqn_tensorboard","anzhen48h")
+        base_dir = os.path.join(self.PROJECT_ROOT, "model_result", "uf_dqn_tensorboard",self.dir_name)
         os.makedirs(base_dir, exist_ok=True)
 
         # 5️⃣ 完整日志目录路径

+ 3 - 1
models/uf-rl/rl_model/DQN/uf_train/run_dqn_train.py

@@ -178,7 +178,8 @@ def main():
         env=train_env,
         params=dqn_params,
         callback=callback,
-        PROJECT_ROOT=PROJECT_ROOT
+        PROJECT_ROOT=PROJECT_ROOT,
+        DIR_NAME=DIR_NAME,
     )
 
 
@@ -297,5 +298,6 @@ if __name__ == "__main__":
 
     ENV_CONFIG_PATH = PROJECT_ROOT / "anzhen" / "env_config.yaml"
     MODEL_CONFIG_PATH = PROJECT_ROOT / "anzhen" / "dqn_config.yaml"
+    DIR_NAME = "anzhen48h"
 
     main()

BIN
models/uf-rl/xishan/48h_dqn_model.zip


BIN
models/uf-rl/xishan/48times_dqn_model.zip


+ 18 - 14
models/uf-rl/xishan/env_config.yaml

@@ -70,6 +70,11 @@ UFPhysicsParams:
     4740: 0.0995
     4800: 0.0993
 
+  p_feed_kw: 25.0
+  p_bw_kw: 30.0
+  dose_min: 0.05
+  dose_max: 0.15
+
 
 UFActionSpec:
   L_min_s: 3800.0
@@ -81,6 +86,7 @@ UFActionSpec:
 
 
 UFRewardParams:
+  # ===== TMP 安全与惩罚 =====
   global_TMP_hard_limit: 0.08
   global_TMP_soft_limit: 0.06
   w_tmp_hard: 5.0
@@ -88,20 +94,18 @@ UFRewardParams:
   p: 3.0
   w_trend: 1.0
 
-  k_rec: 5.0
-  rec_low: 0.92
-  rec_high: 0.99
-  w_rec: 1.0
-
-  k_res: 10.0
-  residual_ref_ratio: null    # ✅ Python None 的正确表示
-  w_res: 2.0
+  # ===== 经济成本 =====
+  k_cost: 3.0
+  chemical_price: 13.0
+  energy_price: 0.667
+  cost_low: 0.08
+  cost_high:  0.12
+  w_cost: 1.0
 
-  k_energy: 5.0
-  energy_low: 0.0993
-  energy_high: 0.1034
-  energy_ref: 0.1011
-  w_energy: 1.0
+  # ===== 残余污染 =====
+  k_res: 3.0
+  residual_ref_ratio: null
+  w_res: 1.0
 
 
 UFStateBounds:
@@ -123,5 +127,5 @@ UFStateBounds:
   power_min: 0.4
   power_max: 2.2
 
-  ceb_removal_min: 40.0
+  ceb_removal_min: 60.0
   ceb_removal_max: 250.0