před 1 měsícem · 3dcc843ad2
--- a/models/uf-rl/uf_data_process/fit.py
+++ b/models/uf-rl/uf_data_process/fit.py
@@ -105,6 +105,13 @@ class ShortTermCycleFoulingFitter:
 
															             if len(seg) < 2:
														
 
															                 continue
														
 
															+            if not pd.api.types.is_datetime64_any_dtype(seg["time"]):
														
 
															+                seg["time"] = pd.to_datetime(seg["time"], errors="coerce")
														
 
															+            seg = seg.dropna(subset=["time"])
														
 
															+            if len(seg) == 0:
														
 
															+                # 该段完全无效，直接跳过
														
 
															+                continue
														
 
															+
														
 
															             # 局部时间（秒）
														
 
															             try:
														
 
															                 t = (seg["time"] - seg["time"].iloc[0]).dt.total_seconds().astype(float)
														
--- a/models/uf-rl/uf_data_process/plot.py
+++ b/models/uf-rl/uf_data_process/plot.py
@@ -0,0 +1,93 @@
 
															+import os
														
 
															+import glob
														
 
															+import pandas as pd
														
 
															+import numpy as np
														
 
															+import matplotlib.pyplot as plt
														
 
															+from matplotlib.font_manager import FontProperties
														
 
															+
														
 
															+# ===================== 配置 =====================
														
 
															+data_dir = r"E:\Greentech\models\uf-rl\datasets\processed\segments"
														
 
															+target_col = "cycle_long_r2"
														
 
															+
														
 
															+# ===================== 中文字体设置 =====================
														
 
															+# 注意：这里使用 SimHei 字体，可显示中文
														
 
															+font = FontProperties(fname=r"C:\Windows\Fonts\simhei.ttf", size=12)
														
 
															+
														
 
															+# ===================== 读取所有 CSV =====================
														
 
															+all_files = glob.glob(os.path.join(data_dir, "*.csv"))
														
 
															+
														
 
															+values = []
														
 
															+
														
 
															+for file in all_files:
														
 
															+    try:
														
 
															+        df = pd.read_csv(file)
														
 
															+        if target_col in df.columns:
														
 
															+            vals = df[target_col].dropna().values
														
 
															+            values.append(vals)
														
 
															+    except Exception as e:
														
 
															+        print(f"读取失败: {file}, 错误: {e}")
														
 
															+
														
 
															+# 合并所有数据
														
 
															+if len(values) == 0:
														
 
															+    raise ValueError("未在任何 CSV 中找到有效的 cycle_long_R2 数据")
														
 
															+
														
 
															+data = np.concatenate(values)
														
 
															+total_count = len(data)
														
 
															+
														
 
															+# ===================== 定义区间 =====================
														
 
															+bins = [
														
 
															+    -np.inf,
														
 
															+    0.0,
														
 
															+    0.5,
														
 
															+    0.6,
														
 
															+    0.7,
														
 
															+    0.8,
														
 
															+    0.9,
														
 
															+    1.0
														
 
															+]
														
 
															+
														
 
															+labels = [
														
 
															+    "<0",
														
 
															+    "0 – 0.5",
														
 
															+    "0.5 – 0.6",
														
 
															+    "0.6 – 0.7",
														
 
															+    "0.7 – 0.8",
														
 
															+    "0.8 – 0.9",
														
 
															+    "0.9 – 1.0"
														
 
															+]
														
 
															+
														
 
															+# ===================== 统计分布 =====================
														
 
															+counts = pd.cut(
														
 
															+    data,
														
 
															+    bins=bins,
														
 
															+    labels=labels,
														
 
															+    right=True,
														
 
															+    include_lowest=True
														
 
															+).value_counts().sort_index()
														
 
															+
														
 
															+ratios = counts / total_count * 100
														
 
															+
														
 
															+# ===================== 输出结果 =====================
														
 
															+result = pd.DataFrame({
														
 
															+    "样本数": counts,
														
 
															+    "占比 (%)": ratios.round(2)
														
 
															+})
														
 
															+
														
 
															+print(f"\n总样本数: {total_count}\n")
														
 
															+print(result)
														
 
															+
														
 
															+# ===================== 绘制柱状图 =====================
														
 
															+plt.figure(figsize=(10, 6))
														
 
															+plt.bar(labels, ratios, color='skyblue', edgecolor='black')
														
 
															+plt.title("cycle_long_R2 数据分布柱状图", fontproperties=font)
														
 
															+plt.xlabel("区间", fontproperties=font)
														
 
															+plt.ylabel("占比 (%)", fontproperties=font)
														
 
															+plt.ylim(0, 100)
														
 
															+plt.grid(axis='y', linestyle='--', alpha=0.7)
														
 
															+
														
 
															+# 在柱子上显示百分比
														
 
															+for i, v in enumerate(ratios):
														
 
															+    plt.text(i, v + 1, f"{v:.1f}%", ha='center', va='bottom', fontsize=10, fontproperties=font)
														
 
															+
														
 
															+plt.tight_layout()
														
 
															+plt.show()
														
--- a/models/uf-rl/uf_data_process/run_ufdata_pipeline.py
+++ b/models/uf-rl/uf_data_process/run_ufdata_pipeline.py
@@ -1,7 +1,9 @@
 
															 import os
														
 
															 import sys
														
 
															+from pathlib import Path
														
 
															-SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
														
 
															+THIS_FILE = Path(__file__).resolve()
														
 
															+UF_RL_ROOT = THIS_FILE.parents[1]
														
 
															 from load import UFConfigLoader
														
 
															 from pipeline import UFAnalysisPipeline
														
@@ -13,8 +15,8 @@ def main():
 
															     print("=====================================")
														
 
															     # 1. 加载配置文件
														
 
															-    config_path = os.path.join(SCRIPT_DIR, "uf_analyze_config.yaml")
														
 
															-    cfg = UFConfigLoader(config_path)
														
 
															+    CONFIG_PATH = UF_RL_ROOT / "config" / "uf_analyze_config.yaml"
														
 
															+    cfg = UFConfigLoader(CONFIG_PATH)
														
 
															     # 2. 创建 pipeline
														
 
															     pipeline = UFAnalysisPipeline(cfg)
														
--- a/models/uf-rl/uf_train/env/reset_plot.py
+++ b/models/uf-rl/uf_train/env/reset_plot.py
@@ -0,0 +1,58 @@
 
															+import numpy as np
														
 
															+import matplotlib.pyplot as plt
														
 
															+
														
 
															+# 训练进度
														
 
															+progress = np.linspace(0, 1, 200)
														
 
															+
														
 
															+# -----------------------------
														
 
															+# 参数设置（可调）
														
 
															+# -----------------------------
														
 
															+alpha = 0.5    # 虚拟工况最终最大占比
														
 
															+k = 10.0       # Sigmoid 陡峭程度
														
 
															+p0 = 0.7       # 虚拟工况启动拐点
														
 
															+beta = 0.5     # 扰动工况线性增长系数
														
 
															+
														
 
															+# -----------------------------
														
 
															+# 权重定义
														
 
															+# -----------------------------
														
 
															+# 虚拟工况（非线性，后期快速增长）
														
 
															+w_virtual = alpha / (1.0 + np.exp(-k * (progress - p0)))
														
 
															+
														
 
															+# 扰动工况（线性增长）
														
 
															+w_perturb = beta * progress
														
 
															+
														
 
															+# 真实工况（剩余比例）
														
 
															+w_real = 1.0 - w_virtual - w_perturb
														
 
															+w_real = np.clip(w_real, 0.0, 1.0)  # 数值安全
														
 
															+
														
 
															+# -----------------------------
														
 
															+# 扰动幅度
														
 
															+# -----------------------------
														
 
															+perturb_scale = 0.02 + 0.04 * progress
														
 
															+
														
 
															+# -----------------------------
														
 
															+# 绘图
														
 
															+# -----------------------------
														
 
															+fig, ax1 = plt.subplots(figsize=(8, 5))
														
 
															+
														
 
															+ax1.plot(progress, w_real, label="w_real", linewidth=2)
														
 
															+ax1.plot(progress, w_perturb, label="w_perturb", linewidth=2)
														
 
															+ax1.plot(progress, w_virtual, label="w_virtual", linewidth=2)
														
 
															+
														
 
															+ax1.set_xlabel("Training Progress")
														
 
															+ax1.set_ylabel("Sampling Weights")
														
 
															+ax1.set_ylim(0, 1.05)
														
 
															+ax1.grid(True, linestyle="--", alpha=0.5)
														
 
															+ax1.legend(loc="upper left")
														
 
															+
														
 
															+# 第二纵轴：扰动幅度
														
 
															+ax2 = ax1.twinx()
														
 
															+ax2.plot(progress, perturb_scale, label="perturb_scale",
														
 
															+         linestyle="--", linewidth=2)
														
 
															+ax2.set_ylabel("Perturb Scale")
														
 
															+ax2.set_ylim(0, 0.07)
														
 
															+ax2.legend(loc="upper right")
														
 
															+
														
 
															+plt.title("Progressive Reset Sampling Strategy")
														
 
															+plt.tight_layout()
														
 
															+plt.show()
														
--- a/models/uf-rl/uf_train/rl_model/DQN/model/reward.png
+++ b/models/uf-rl/uf_train/rl_model/DQN/model/reward.png
--- a/models/uf-rl/uf_train/rl_model/DQN/run_dqn_train.py
+++ b/models/uf-rl/uf_train/rl_model/DQN/run_dqn_train.py
@@ -175,23 +175,51 @@ def main():
 
															     # ========================================================
														
 
															     print("\n[Eval] Start validation rollout")
														
 
															+    TMP0_min = 0.01
														
 
															+    TMP0_max = 0.08
														
 
															+
														
 
															     rewards = []
														
 
															-    for _ in range(len(val_pool)):
														
 
															+    # ---------- 用于可视化的容器（只记录第一个 episode） ----------
														
 
															+    vis_tmp_series = []
														
 
															+    vis_action_series = []
														
 
															+
														
 
															+    for ep_idx in range(len(val_pool)):
														
 
															         obs = val_env.reset()
														
 
															         episode_reward = 0.0
														
 
															-        for _ in range(10):
														
 
															+        for step in range(10):
														
 
															+            # ====================================================
														
 
															+            # 可视化：只记录第一个 validation episode
														
 
															+            # ====================================================
														
 
															+            if ep_idx == 0:
														
 
															+                TMP0_norm = obs[0]
														
 
															+                TMP0 = (
														
 
															+                        TMP0_norm * (TMP0_max - TMP0_min)
														
 
															+                        + TMP0_min
														
 
															+                )
														
 
															+                vis_tmp_series.append(TMP0)
														
 
															+
														
 
															+            # ---------------- 策略决策 ----------------
														
 
															             action, _ = trainer.model.predict(
														
 
															                 obs, deterministic=True
														
 
															             )
														
 
															+
														
 
															+            if ep_idx == 0:
														
 
															+                vis_action_series.append(action[0])
														
 
															+
														
 
															+            # ---------------- 环境推进 ----------------
														
 
															             obs, reward, done, _ = val_env.step(action)
														
 
															             episode_reward += reward[0]
														
 
															+
														
 
															             if done:
														
 
															                 break
														
 
															         rewards.append(episode_reward)
														
 
															+    # ========================================================
														
 
															+    # 验证结果保存
														
 
															+    # ========================================================
														
 
															     rewards = np.asarray(rewards)
														
 
															     save_path = Path(trainer.log_dir) / "val_rewards.npy"
														
@@ -200,6 +228,39 @@ def main():
 
															     print(f"[Eval] Saved to {save_path}")
														
 
															     print(f"[Eval] Mean reward = {rewards.mean():.3f}")
														
 
															+    # ========================================================
														
 
															+    # 可视化（第一个 validation episode）
														
 
															+    # ========================================================
														
 
															+    import matplotlib.pyplot as plt
														
 
															+
														
 
															+    vis_tmp_series = np.asarray(vis_tmp_series)
														
 
															+    vis_action_series = np.asarray(vis_action_series)
														
 
															+    steps = np.arange(len(vis_tmp_series))
														
 
															+
														
 
															+    # ---------- TMP 曲线 ----------
														
 
															+    plt.figure()
														
 
															+    plt.plot(steps, vis_tmp_series, marker="o")
														
 
															+    plt.axhline(
														
 
															+        TMP0_max,
														
 
															+        linestyle="--",
														
 
															+        label="TMP Upper Limit"
														
 
															+    )
														
 
															+    plt.xlabel("Step")
														
 
															+    plt.ylabel("TMP (MPa)")
														
 
															+    plt.title("Validation Episode TMP Evolution")
														
 
															+    plt.legend()
														
 
															+    plt.grid(True)
														
 
															+    plt.show()
														
 
															+
														
 
															+    # ---------- Action 曲线 ----------
														
 
															+    plt.figure()
														
 
															+    plt.plot(steps, vis_action_series, marker="o")
														
 
															+    plt.xlabel("Step")
														
 
															+    plt.ylabel("Action")
														
 
															+    plt.title("Validation Episode Action Output")
														
 
															+    plt.grid(True)
														
 
															+    plt.show()
														
 
															+
														
 
															 # ============================================================
														
 
															 # 入口
														
--- a/models/uf-rl/uf_train/rl_model/common/train_entry.py
+++ b/models/uf-rl/uf_train/rl_model/common/train_entry.py
@@ -0,0 +1,186 @@
 
															+"""
														
 
															+通用强化学习训练入口（当前绑定 DQN，实现已验证）
														
 
															+仅负责：
														
 
															+- 构造环境
														
 
															+- 构造 Trainer
														
 
															+- 启动训练并保存模型
														
 
															+"""
														
 
															+
														
 
															+import random
														
 
															+from pathlib import Path
														
 
															+import numpy as np
														
 
															+import torch
														
 
															+
														
 
															+# ============================================================
														
 
															+# 1. 路径解析
														
 
															+# ============================================================
														
 
															+CURRENT_DIR = Path(__file__).resolve().parent
														
 
															+PROJECT_ROOT = CURRENT_DIR.parents[2]   # uf_train / uf-rl
														
 
															+
														
 
															+
														
 
															+# ============================================================
														
 
															+# 2. 导入：数据 / 环境
														
 
															+# ============================================================
														
 
															+from uf_train.data_to_rl.data_splitter import ResetStatePoolLoader
														
 
															+
														
 
															+from uf_train.env.uf_resistance_models_load import load_resistance_models
														
 
															+from uf_train.env.uf_physics import UFPhysicsModel
														
 
															+from uf_train.env.env_params import (
														
 
															+    UFPhysicsParams,
														
 
															+    UFStateBounds,
														
 
															+    UFRewardParams,
														
 
															+    UFActionSpec,
														
 
															+)
														
 
															+from uf_train.env.uf_env import UFSuperCycleEnv
														
 
															+
														
 
															+from uf_train.env.env_visual import UFEpisodeRecorder, UFTrainingCallback
														
 
															+
														
 
															+
														
 
															+# ============================================================
														
 
															+# 3. 导入：算法（当前为 DQN）
														
 
															+# ============================================================
														
 
															+from uf_train.rl_model.DQN.dqn_params import DQNParams
														
 
															+from uf_train.rl_model.DQN.dqn_trainer import DQNTrainer
														
 
															+
														
 
															+
														
 
															+# ============================================================
														
 
															+# 4. SB3 VecEnv
														
 
															+# ============================================================
														
 
															+from stable_baselines3.common.monitor import Monitor
														
 
															+from stable_baselines3.common.vec_env import DummyVecEnv
														
 
															+
														
 
															+
														
 
															+# ============================================================
														
 
															+# 5. 随机种子
														
 
															+# ============================================================
														
 
															+def set_global_seed(seed: int):
														
 
															+    random.seed(seed)
														
 
															+    np.random.seed(seed)
														
 
															+    torch.manual_seed(seed)
														
 
															+    torch.cuda.manual_seed_all(seed)
														
 
															+
														
 
															+    torch.backends.cudnn.deterministic = True
														
 
															+    torch.backends.cudnn.benchmark = False
														
 
															+
														
 
															+    print(f"[Seed] Global random seed = {seed}")
														
 
															+
														
 
															+
														
 
															+# ============================================================
														
 
															+# 6. Reset State Pool 加载
														
 
															+# ============================================================
														
 
															+def load_reset_state_pool():
														
 
															+    loader = ResetStatePoolLoader(
														
 
															+        csv_path=RESET_STATE_CSV,
														
 
															+        train_ratio=0.8,
														
 
															+        shuffle=True,
														
 
															+        random_state=RANDOM_SEED,
														
 
															+    )
														
 
															+
														
 
															+    train_pool, _ = loader.split()
														
 
															+
														
 
															+    print("[Data] Reset state pool loaded")
														
 
															+    print(f"       Train pool size: {len(train_pool)}")
														
 
															+
														
 
															+    return train_pool
														
 
															+
														
 
															+
														
 
															+# ============================================================
														
 
															+# 7. 环境构造函数
														
 
															+# ============================================================
														
 
															+def make_env(
														
 
															+    physics: UFPhysicsModel,
														
 
															+    reward_params: UFRewardParams,
														
 
															+    action_spec: UFActionSpec,
														
 
															+    statebounds: UFStateBounds,
														
 
															+    reset_state_pool,
														
 
															+    seed: int,
														
 
															+):
														
 
															+    def _init():
														
 
															+        env = UFSuperCycleEnv(
														
 
															+            physics=physics,
														
 
															+            reward_params=reward_params,
														
 
															+            action_spec=action_spec,
														
 
															+            statebounds=statebounds,
														
 
															+            real_state_pool=reset_state_pool,
														
 
															+            RANDOM_SEED=seed,
														
 
															+        )
														
 
															+        env.action_space.seed(seed)
														
 
															+        env.observation_space.seed(seed)
														
 
															+        return Monitor(env)
														
 
															+
														
 
															+    return _init
														
 
															+
														
 
															+
														
 
															+# ============================================================
														
 
															+# 8. 主训练流程
														
 
															+# ============================================================
														
 
															+def main():
														
 
															+    # ---------- Seed ----------
														
 
															+    set_global_seed(RANDOM_SEED)
														
 
															+
														
 
															+    # ---------- Reset states ----------
														
 
															+    train_pool = load_reset_state_pool()
														
 
															+
														
 
															+    # ---------- Resistance models ----------
														
 
															+    phys_params = UFPhysicsParams()
														
 
															+    res_fp, res_bw = load_resistance_models(phys_params)
														
 
															+
														
 
															+    # ---------- Physics ----------
														
 
															+    physics_model = UFPhysicsModel(
														
 
															+        phys_params=phys_params,
														
 
															+        resistance_model_fp=res_fp,
														
 
															+        resistance_model_bw=res_bw,
														
 
															+    )
														
 
															+
														
 
															+    # ---------- RL specs ----------
														
 
															+    reward_params = UFRewardParams()
														
 
															+    action_spec = UFActionSpec()
														
 
															+    state_bounds = UFStateBounds()
														
 
															+
														
 
															+    # ---------- Training Env ----------
														
 
															+    train_env = DummyVecEnv([
														
 
															+        make_env(
														
 
															+            physics_model,
														
 
															+            reward_params,
														
 
															+            action_spec,
														
 
															+            state_bounds,
														
 
															+            train_pool,
														
 
															+            RANDOM_SEED,
														
 
															+        )
														
 
															+    ])
														
 
															+
														
 
															+    # ---------- Callback ----------
														
 
															+    recorder = UFEpisodeRecorder()
														
 
															+    callback = UFTrainingCallback(recorder, verbose=1)
														
 
															+
														
 
															+    # ---------- Trainer ----------
														
 
															+    algo_params = DQNParams(remark="uf_dqn_train_only")
														
 
															+
														
 
															+    trainer = DQNTrainer(
														
 
															+        env=train_env,
														
 
															+        params=algo_params,
														
 
															+        callback=callback,
														
 
															+        PROJECT_ROOT=PROJECT_ROOT,
														
 
															+    )
														
 
															+
														
 
															+    # ---------- Training ----------
														
 
															+    print("\n[Train] Start training")
														
 
															+    trainer.train(total_timesteps=TOTAL_TIMESTEPS)
														
 
															+    trainer.save()
														
 
															+
														
 
															+    print("[Train] Finished")
														
 
															+
														
 
															+
														
 
															+# ============================================================
														
 
															+# 9. 入口
														
 
															+# ============================================================
														
 
															+if __name__ == "__main__":
														
 
															+    RANDOM_SEED = 2025
														
 
															+    TOTAL_TIMESTEPS = 1_500_000
														
 
															+
														
 
															+    RESET_STATE_CSV = (
														
 
															+        PROJECT_ROOT
														
 
															+        / "datasets/rl_ready/output/reset_state_pool.csv"
														
 
															+    )
														
 
															+
														
 
															+    main()