1 ay önce · 3dcc843ad2
--- a/models/uf-rl/uf_data_process/fit.py
+++ b/models/uf-rl/uf_data_process/fit.py
@@ -105,6 +105,13 @@ class ShortTermCycleFoulingFitter:
 
				             if len(seg) < 2:
			
 
				                 continue
			
 
				 
			
 
				+            if not pd.api.types.is_datetime64_any_dtype(seg["time"]):
			
 
				+                seg["time"] = pd.to_datetime(seg["time"], errors="coerce")
			
 
				+            seg = seg.dropna(subset=["time"])
			
 
				+            if len(seg) == 0:
			
 
				+                # 该段完全无效，直接跳过
			
 
				+                continue
			
 
				+
			
 
				             # 局部时间（秒）
			
 
				             try:
			
 
				                 t = (seg["time"] - seg["time"].iloc[0]).dt.total_seconds().astype(float)
			
--- a/models/uf-rl/uf_data_process/plot.py
+++ b/models/uf-rl/uf_data_process/plot.py
@@ -0,0 +1,93 @@
 
				+import os
			
 
				+import glob
			
 
				+import pandas as pd
			
 
				+import numpy as np
			
 
				+import matplotlib.pyplot as plt
			
 
				+from matplotlib.font_manager import FontProperties
			
 
				+
			
 
				+# ===================== 配置 =====================
			
 
				+data_dir = r"E:\Greentech\models\uf-rl\datasets\processed\segments"
			
 
				+target_col = "cycle_long_r2"
			
 
				+
			
 
				+# ===================== 中文字体设置 =====================
			
 
				+# 注意：这里使用 SimHei 字体，可显示中文
			
 
				+font = FontProperties(fname=r"C:\Windows\Fonts\simhei.ttf", size=12)
			
 
				+
			
 
				+# ===================== 读取所有 CSV =====================
			
 
				+all_files = glob.glob(os.path.join(data_dir, "*.csv"))
			
 
				+
			
 
				+values = []
			
 
				+
			
 
				+for file in all_files:
			
 
				+    try:
			
 
				+        df = pd.read_csv(file)
			
 
				+        if target_col in df.columns:
			
 
				+            vals = df[target_col].dropna().values
			
 
				+            values.append(vals)
			
 
				+    except Exception as e:
			
 
				+        print(f"读取失败: {file}, 错误: {e}")
			
 
				+
			
 
				+# 合并所有数据
			
 
				+if len(values) == 0:
			
 
				+    raise ValueError("未在任何 CSV 中找到有效的 cycle_long_R2 数据")
			
 
				+
			
 
				+data = np.concatenate(values)
			
 
				+total_count = len(data)
			
 
				+
			
 
				+# ===================== 定义区间 =====================
			
 
				+bins = [
			
 
				+    -np.inf,
			
 
				+    0.0,
			
 
				+    0.5,
			
 
				+    0.6,
			
 
				+    0.7,
			
 
				+    0.8,
			
 
				+    0.9,
			
 
				+    1.0
			
 
				+]
			
 
				+
			
 
				+labels = [
			
 
				+    "<0",
			
 
				+    "0 – 0.5",
			
 
				+    "0.5 – 0.6",
			
 
				+    "0.6 – 0.7",
			
 
				+    "0.7 – 0.8",
			
 
				+    "0.8 – 0.9",
			
 
				+    "0.9 – 1.0"
			
 
				+]
			
 
				+
			
 
				+# ===================== 统计分布 =====================
			
 
				+counts = pd.cut(
			
 
				+    data,
			
 
				+    bins=bins,
			
 
				+    labels=labels,
			
 
				+    right=True,
			
 
				+    include_lowest=True
			
 
				+).value_counts().sort_index()
			
 
				+
			
 
				+ratios = counts / total_count * 100
			
 
				+
			
 
				+# ===================== 输出结果 =====================
			
 
				+result = pd.DataFrame({
			
 
				+    "样本数": counts,
			
 
				+    "占比 (%)": ratios.round(2)
			
 
				+})
			
 
				+
			
 
				+print(f"\n总样本数: {total_count}\n")
			
 
				+print(result)
			
 
				+
			
 
				+# ===================== 绘制柱状图 =====================
			
 
				+plt.figure(figsize=(10, 6))
			
 
				+plt.bar(labels, ratios, color='skyblue', edgecolor='black')
			
 
				+plt.title("cycle_long_R2 数据分布柱状图", fontproperties=font)
			
 
				+plt.xlabel("区间", fontproperties=font)
			
 
				+plt.ylabel("占比 (%)", fontproperties=font)
			
 
				+plt.ylim(0, 100)
			
 
				+plt.grid(axis='y', linestyle='--', alpha=0.7)
			
 
				+
			
 
				+# 在柱子上显示百分比
			
 
				+for i, v in enumerate(ratios):
			
 
				+    plt.text(i, v + 1, f"{v:.1f}%", ha='center', va='bottom', fontsize=10, fontproperties=font)
			
 
				+
			
 
				+plt.tight_layout()
			
 
				+plt.show()
			
--- a/models/uf-rl/uf_data_process/run_ufdata_pipeline.py
+++ b/models/uf-rl/uf_data_process/run_ufdata_pipeline.py
@@ -1,7 +1,9 @@
 
				 import os
			
 
				 import sys
			
 
				+from pathlib import Path
			
 
				 
			
 
				-SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
			
 
				+THIS_FILE = Path(__file__).resolve()
			
 
				+UF_RL_ROOT = THIS_FILE.parents[1]
			
 
				 
			
 
				 from load import UFConfigLoader
			
 
				 from pipeline import UFAnalysisPipeline
			
@@ -13,8 +15,8 @@ def main():
 
				     print("=====================================")
			
 
				 
			
 
				     # 1. 加载配置文件
			
 
				-    config_path = os.path.join(SCRIPT_DIR, "uf_analyze_config.yaml")
			
 
				-    cfg = UFConfigLoader(config_path)
			
 
				+    CONFIG_PATH = UF_RL_ROOT / "config" / "uf_analyze_config.yaml"
			
 
				+    cfg = UFConfigLoader(CONFIG_PATH)
			
 
				 
			
 
				     # 2. 创建 pipeline
			
 
				     pipeline = UFAnalysisPipeline(cfg)
			
--- a/models/uf-rl/uf_train/env/reset_plot.py
+++ b/models/uf-rl/uf_train/env/reset_plot.py
@@ -0,0 +1,58 @@
 
				+import numpy as np
			
 
				+import matplotlib.pyplot as plt
			
 
				+
			
 
				+# 训练进度
			
 
				+progress = np.linspace(0, 1, 200)
			
 
				+
			
 
				+# -----------------------------
			
 
				+# 参数设置（可调）
			
 
				+# -----------------------------
			
 
				+alpha = 0.5    # 虚拟工况最终最大占比
			
 
				+k = 10.0       # Sigmoid 陡峭程度
			
 
				+p0 = 0.7       # 虚拟工况启动拐点
			
 
				+beta = 0.5     # 扰动工况线性增长系数
			
 
				+
			
 
				+# -----------------------------
			
 
				+# 权重定义
			
 
				+# -----------------------------
			
 
				+# 虚拟工况（非线性，后期快速增长）
			
 
				+w_virtual = alpha / (1.0 + np.exp(-k * (progress - p0)))
			
 
				+
			
 
				+# 扰动工况（线性增长）
			
 
				+w_perturb = beta * progress
			
 
				+
			
 
				+# 真实工况（剩余比例）
			
 
				+w_real = 1.0 - w_virtual - w_perturb
			
 
				+w_real = np.clip(w_real, 0.0, 1.0)  # 数值安全
			
 
				+
			
 
				+# -----------------------------
			
 
				+# 扰动幅度
			
 
				+# -----------------------------
			
 
				+perturb_scale = 0.02 + 0.04 * progress
			
 
				+
			
 
				+# -----------------------------
			
 
				+# 绘图
			
 
				+# -----------------------------
			
 
				+fig, ax1 = plt.subplots(figsize=(8, 5))
			
 
				+
			
 
				+ax1.plot(progress, w_real, label="w_real", linewidth=2)
			
 
				+ax1.plot(progress, w_perturb, label="w_perturb", linewidth=2)
			
 
				+ax1.plot(progress, w_virtual, label="w_virtual", linewidth=2)
			
 
				+
			
 
				+ax1.set_xlabel("Training Progress")
			
 
				+ax1.set_ylabel("Sampling Weights")
			
 
				+ax1.set_ylim(0, 1.05)
			
 
				+ax1.grid(True, linestyle="--", alpha=0.5)
			
 
				+ax1.legend(loc="upper left")
			
 
				+
			
 
				+# 第二纵轴：扰动幅度
			
 
				+ax2 = ax1.twinx()
			
 
				+ax2.plot(progress, perturb_scale, label="perturb_scale",
			
 
				+         linestyle="--", linewidth=2)
			
 
				+ax2.set_ylabel("Perturb Scale")
			
 
				+ax2.set_ylim(0, 0.07)
			
 
				+ax2.legend(loc="upper right")
			
 
				+
			
 
				+plt.title("Progressive Reset Sampling Strategy")
			
 
				+plt.tight_layout()
			
 
				+plt.show()
			
--- a/models/uf-rl/uf_train/rl_model/DQN/model/reward.png
+++ b/models/uf-rl/uf_train/rl_model/DQN/model/reward.png
--- a/models/uf-rl/uf_train/rl_model/DQN/run_dqn_train.py
+++ b/models/uf-rl/uf_train/rl_model/DQN/run_dqn_train.py
@@ -175,23 +175,51 @@ def main():
 
				     # ========================================================
			
 
				     print("\n[Eval] Start validation rollout")
			
 
				 
			
 
				+    TMP0_min = 0.01
			
 
				+    TMP0_max = 0.08
			
 
				+
			
 
				     rewards = []
			
 
				 
			
 
				-    for _ in range(len(val_pool)):
			
 
				+    # ---------- 用于可视化的容器（只记录第一个 episode） ----------
			
 
				+    vis_tmp_series = []
			
 
				+    vis_action_series = []
			
 
				+
			
 
				+    for ep_idx in range(len(val_pool)):
			
 
				         obs = val_env.reset()
			
 
				         episode_reward = 0.0
			
 
				 
			
 
				-        for _ in range(10):
			
 
				+        for step in range(10):
			
 
				+            # ====================================================
			
 
				+            # 可视化：只记录第一个 validation episode
			
 
				+            # ====================================================
			
 
				+            if ep_idx == 0:
			
 
				+                TMP0_norm = obs[0]
			
 
				+                TMP0 = (
			
 
				+                        TMP0_norm * (TMP0_max - TMP0_min)
			
 
				+                        + TMP0_min
			
 
				+                )
			
 
				+                vis_tmp_series.append(TMP0)
			
 
				+
			
 
				+            # ---------------- 策略决策 ----------------
			
 
				             action, _ = trainer.model.predict(
			
 
				                 obs, deterministic=True
			
 
				             )
			
 
				+
			
 
				+            if ep_idx == 0:
			
 
				+                vis_action_series.append(action[0])
			
 
				+
			
 
				+            # ---------------- 环境推进 ----------------
			
 
				             obs, reward, done, _ = val_env.step(action)
			
 
				             episode_reward += reward[0]
			
 
				+
			
 
				             if done:
			
 
				                 break
			
 
				 
			
 
				         rewards.append(episode_reward)
			
 
				 
			
 
				+    # ========================================================
			
 
				+    # 验证结果保存
			
 
				+    # ========================================================
			
 
				     rewards = np.asarray(rewards)
			
 
				 
			
 
				     save_path = Path(trainer.log_dir) / "val_rewards.npy"
			
@@ -200,6 +228,39 @@ def main():
 
				     print(f"[Eval] Saved to {save_path}")
			
 
				     print(f"[Eval] Mean reward = {rewards.mean():.3f}")
			
 
				 
			
 
				+    # ========================================================
			
 
				+    # 可视化（第一个 validation episode）
			
 
				+    # ========================================================
			
 
				+    import matplotlib.pyplot as plt
			
 
				+
			
 
				+    vis_tmp_series = np.asarray(vis_tmp_series)
			
 
				+    vis_action_series = np.asarray(vis_action_series)
			
 
				+    steps = np.arange(len(vis_tmp_series))
			
 
				+
			
 
				+    # ---------- TMP 曲线 ----------
			
 
				+    plt.figure()
			
 
				+    plt.plot(steps, vis_tmp_series, marker="o")
			
 
				+    plt.axhline(
			
 
				+        TMP0_max,
			
 
				+        linestyle="--",
			
 
				+        label="TMP Upper Limit"
			
 
				+    )
			
 
				+    plt.xlabel("Step")
			
 
				+    plt.ylabel("TMP (MPa)")
			
 
				+    plt.title("Validation Episode TMP Evolution")
			
 
				+    plt.legend()
			
 
				+    plt.grid(True)
			
 
				+    plt.show()
			
 
				+
			
 
				+    # ---------- Action 曲线 ----------
			
 
				+    plt.figure()
			
 
				+    plt.plot(steps, vis_action_series, marker="o")
			
 
				+    plt.xlabel("Step")
			
 
				+    plt.ylabel("Action")
			
 
				+    plt.title("Validation Episode Action Output")
			
 
				+    plt.grid(True)
			
 
				+    plt.show()
			
 
				+
			
 
				 
			
 
				 # ============================================================
			
 
				 # 入口
			
--- a/models/uf-rl/uf_train/rl_model/common/train_entry.py
+++ b/models/uf-rl/uf_train/rl_model/common/train_entry.py
@@ -0,0 +1,186 @@
 
				+"""
			
 
				+通用强化学习训练入口（当前绑定 DQN，实现已验证）
			
 
				+仅负责：
			
 
				+- 构造环境
			
 
				+- 构造 Trainer
			
 
				+- 启动训练并保存模型
			
 
				+"""
			
 
				+
			
 
				+import random
			
 
				+from pathlib import Path
			
 
				+import numpy as np
			
 
				+import torch
			
 
				+
			
 
				+# ============================================================
			
 
				+# 1. 路径解析
			
 
				+# ============================================================
			
 
				+CURRENT_DIR = Path(__file__).resolve().parent
			
 
				+PROJECT_ROOT = CURRENT_DIR.parents[2]   # uf_train / uf-rl
			
 
				+
			
 
				+
			
 
				+# ============================================================
			
 
				+# 2. 导入：数据 / 环境
			
 
				+# ============================================================
			
 
				+from uf_train.data_to_rl.data_splitter import ResetStatePoolLoader
			
 
				+
			
 
				+from uf_train.env.uf_resistance_models_load import load_resistance_models
			
 
				+from uf_train.env.uf_physics import UFPhysicsModel
			
 
				+from uf_train.env.env_params import (
			
 
				+    UFPhysicsParams,
			
 
				+    UFStateBounds,
			
 
				+    UFRewardParams,
			
 
				+    UFActionSpec,
			
 
				+)
			
 
				+from uf_train.env.uf_env import UFSuperCycleEnv
			
 
				+
			
 
				+from uf_train.env.env_visual import UFEpisodeRecorder, UFTrainingCallback
			
 
				+
			
 
				+
			
 
				+# ============================================================
			
 
				+# 3. 导入：算法（当前为 DQN）
			
 
				+# ============================================================
			
 
				+from uf_train.rl_model.DQN.dqn_params import DQNParams
			
 
				+from uf_train.rl_model.DQN.dqn_trainer import DQNTrainer
			
 
				+
			
 
				+
			
 
				+# ============================================================
			
 
				+# 4. SB3 VecEnv
			
 
				+# ============================================================
			
 
				+from stable_baselines3.common.monitor import Monitor
			
 
				+from stable_baselines3.common.vec_env import DummyVecEnv
			
 
				+
			
 
				+
			
 
				+# ============================================================
			
 
				+# 5. 随机种子
			
 
				+# ============================================================
			
 
				+def set_global_seed(seed: int):
			
 
				+    random.seed(seed)
			
 
				+    np.random.seed(seed)
			
 
				+    torch.manual_seed(seed)
			
 
				+    torch.cuda.manual_seed_all(seed)
			
 
				+
			
 
				+    torch.backends.cudnn.deterministic = True
			
 
				+    torch.backends.cudnn.benchmark = False
			
 
				+
			
 
				+    print(f"[Seed] Global random seed = {seed}")
			
 
				+
			
 
				+
			
 
				+# ============================================================
			
 
				+# 6. Reset State Pool 加载
			
 
				+# ============================================================
			
 
				+def load_reset_state_pool():
			
 
				+    loader = ResetStatePoolLoader(
			
 
				+        csv_path=RESET_STATE_CSV,
			
 
				+        train_ratio=0.8,
			
 
				+        shuffle=True,
			
 
				+        random_state=RANDOM_SEED,
			
 
				+    )
			
 
				+
			
 
				+    train_pool, _ = loader.split()
			
 
				+
			
 
				+    print("[Data] Reset state pool loaded")
			
 
				+    print(f"       Train pool size: {len(train_pool)}")
			
 
				+
			
 
				+    return train_pool
			
 
				+
			
 
				+
			
 
				+# ============================================================
			
 
				+# 7. 环境构造函数
			
 
				+# ============================================================
			
 
				+def make_env(
			
 
				+    physics: UFPhysicsModel,
			
 
				+    reward_params: UFRewardParams,
			
 
				+    action_spec: UFActionSpec,
			
 
				+    statebounds: UFStateBounds,
			
 
				+    reset_state_pool,
			
 
				+    seed: int,
			
 
				+):
			
 
				+    def _init():
			
 
				+        env = UFSuperCycleEnv(
			
 
				+            physics=physics,
			
 
				+            reward_params=reward_params,
			
 
				+            action_spec=action_spec,
			
 
				+            statebounds=statebounds,
			
 
				+            real_state_pool=reset_state_pool,
			
 
				+            RANDOM_SEED=seed,
			
 
				+        )
			
 
				+        env.action_space.seed(seed)
			
 
				+        env.observation_space.seed(seed)
			
 
				+        return Monitor(env)
			
 
				+
			
 
				+    return _init
			
 
				+
			
 
				+
			
 
				+# ============================================================
			
 
				+# 8. 主训练流程
			
 
				+# ============================================================
			
 
				+def main():
			
 
				+    # ---------- Seed ----------
			
 
				+    set_global_seed(RANDOM_SEED)
			
 
				+
			
 
				+    # ---------- Reset states ----------
			
 
				+    train_pool = load_reset_state_pool()
			
 
				+
			
 
				+    # ---------- Resistance models ----------
			
 
				+    phys_params = UFPhysicsParams()
			
 
				+    res_fp, res_bw = load_resistance_models(phys_params)
			
 
				+
			
 
				+    # ---------- Physics ----------
			
 
				+    physics_model = UFPhysicsModel(
			
 
				+        phys_params=phys_params,
			
 
				+        resistance_model_fp=res_fp,
			
 
				+        resistance_model_bw=res_bw,
			
 
				+    )
			
 
				+
			
 
				+    # ---------- RL specs ----------
			
 
				+    reward_params = UFRewardParams()
			
 
				+    action_spec = UFActionSpec()
			
 
				+    state_bounds = UFStateBounds()
			
 
				+
			
 
				+    # ---------- Training Env ----------
			
 
				+    train_env = DummyVecEnv([
			
 
				+        make_env(
			
 
				+            physics_model,
			
 
				+            reward_params,
			
 
				+            action_spec,
			
 
				+            state_bounds,
			
 
				+            train_pool,
			
 
				+            RANDOM_SEED,
			
 
				+        )
			
 
				+    ])
			
 
				+
			
 
				+    # ---------- Callback ----------
			
 
				+    recorder = UFEpisodeRecorder()
			
 
				+    callback = UFTrainingCallback(recorder, verbose=1)
			
 
				+
			
 
				+    # ---------- Trainer ----------
			
 
				+    algo_params = DQNParams(remark="uf_dqn_train_only")
			
 
				+
			
 
				+    trainer = DQNTrainer(
			
 
				+        env=train_env,
			
 
				+        params=algo_params,
			
 
				+        callback=callback,
			
 
				+        PROJECT_ROOT=PROJECT_ROOT,
			
 
				+    )
			
 
				+
			
 
				+    # ---------- Training ----------
			
 
				+    print("\n[Train] Start training")
			
 
				+    trainer.train(total_timesteps=TOTAL_TIMESTEPS)
			
 
				+    trainer.save()
			
 
				+
			
 
				+    print("[Train] Finished")
			
 
				+
			
 
				+
			
 
				+# ============================================================
			
 
				+# 9. 入口
			
 
				+# ============================================================
			
 
				+if __name__ == "__main__":
			
 
				+    RANDOM_SEED = 2025
			
 
				+    TOTAL_TIMESTEPS = 1_500_000
			
 
				+
			
 
				+    RESET_STATE_CSV = (
			
 
				+        PROJECT_ROOT
			
 
				+        / "datasets/rl_ready/output/reset_state_pool.csv"
			
 
				+    )
			
 
				+
			
 
				+    main()