junc_WHU 1 ay önce
ebeveyn
işleme
3dcc843ad2

+ 7 - 0
models/uf-rl/uf_data_process/fit.py

@@ -105,6 +105,13 @@ class ShortTermCycleFoulingFitter:
             if len(seg) < 2:
                 continue
 
+            if not pd.api.types.is_datetime64_any_dtype(seg["time"]):
+                seg["time"] = pd.to_datetime(seg["time"], errors="coerce")
+            seg = seg.dropna(subset=["time"])
+            if len(seg) == 0:
+                # 该段完全无效,直接跳过
+                continue
+
             # 局部时间(秒)
             try:
                 t = (seg["time"] - seg["time"].iloc[0]).dt.total_seconds().astype(float)

+ 93 - 0
models/uf-rl/uf_data_process/plot.py

@@ -0,0 +1,93 @@
+import os
+import glob
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+from matplotlib.font_manager import FontProperties
+
+# ===================== 配置 =====================
+data_dir = r"E:\Greentech\models\uf-rl\datasets\processed\segments"
+target_col = "cycle_long_r2"
+
+# ===================== 中文字体设置 =====================
+# 注意:这里使用 SimHei 字体,可显示中文
+font = FontProperties(fname=r"C:\Windows\Fonts\simhei.ttf", size=12)
+
+# ===================== 读取所有 CSV =====================
+all_files = glob.glob(os.path.join(data_dir, "*.csv"))
+
+values = []
+
+for file in all_files:
+    try:
+        df = pd.read_csv(file)
+        if target_col in df.columns:
+            vals = df[target_col].dropna().values
+            values.append(vals)
+    except Exception as e:
+        print(f"读取失败: {file}, 错误: {e}")
+
+# 合并所有数据
+if len(values) == 0:
+    raise ValueError("未在任何 CSV 中找到有效的 cycle_long_R2 数据")
+
+data = np.concatenate(values)
+total_count = len(data)
+
+# ===================== 定义区间 =====================
+bins = [
+    -np.inf,
+    0.0,
+    0.5,
+    0.6,
+    0.7,
+    0.8,
+    0.9,
+    1.0
+]
+
+labels = [
+    "<0",
+    "0 – 0.5",
+    "0.5 – 0.6",
+    "0.6 – 0.7",
+    "0.7 – 0.8",
+    "0.8 – 0.9",
+    "0.9 – 1.0"
+]
+
+# ===================== 统计分布 =====================
+counts = pd.cut(
+    data,
+    bins=bins,
+    labels=labels,
+    right=True,
+    include_lowest=True
+).value_counts().sort_index()
+
+ratios = counts / total_count * 100
+
+# ===================== 输出结果 =====================
+result = pd.DataFrame({
+    "样本数": counts,
+    "占比 (%)": ratios.round(2)
+})
+
+print(f"\n总样本数: {total_count}\n")
+print(result)
+
+# ===================== 绘制柱状图 =====================
+plt.figure(figsize=(10, 6))
+plt.bar(labels, ratios, color='skyblue', edgecolor='black')
+plt.title("cycle_long_R2 数据分布柱状图", fontproperties=font)
+plt.xlabel("区间", fontproperties=font)
+plt.ylabel("占比 (%)", fontproperties=font)
+plt.ylim(0, 100)
+plt.grid(axis='y', linestyle='--', alpha=0.7)
+
+# 在柱子上显示百分比
+for i, v in enumerate(ratios):
+    plt.text(i, v + 1, f"{v:.1f}%", ha='center', va='bottom', fontsize=10, fontproperties=font)
+
+plt.tight_layout()
+plt.show()

+ 5 - 3
models/uf-rl/uf_data_process/run_ufdata_pipeline.py

@@ -1,7 +1,9 @@
 import os
 import sys
+from pathlib import Path
 
-SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+THIS_FILE = Path(__file__).resolve()
+UF_RL_ROOT = THIS_FILE.parents[1]
 
 from load import UFConfigLoader
 from pipeline import UFAnalysisPipeline
@@ -13,8 +15,8 @@ def main():
     print("=====================================")
 
     # 1. 加载配置文件
-    config_path = os.path.join(SCRIPT_DIR, "uf_analyze_config.yaml")
-    cfg = UFConfigLoader(config_path)
+    CONFIG_PATH = UF_RL_ROOT / "config" / "uf_analyze_config.yaml"
+    cfg = UFConfigLoader(CONFIG_PATH)
 
     # 2. 创建 pipeline
     pipeline = UFAnalysisPipeline(cfg)

+ 58 - 0
models/uf-rl/uf_train/env/reset_plot.py

@@ -0,0 +1,58 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
+# 训练进度
+progress = np.linspace(0, 1, 200)
+
+# -----------------------------
+# 参数设置(可调)
+# -----------------------------
+alpha = 0.5    # 虚拟工况最终最大占比
+k = 10.0       # Sigmoid 陡峭程度
+p0 = 0.7       # 虚拟工况启动拐点
+beta = 0.5     # 扰动工况线性增长系数
+
+# -----------------------------
+# 权重定义
+# -----------------------------
+# 虚拟工况(非线性,后期快速增长)
+w_virtual = alpha / (1.0 + np.exp(-k * (progress - p0)))
+
+# 扰动工况(线性增长)
+w_perturb = beta * progress
+
+# 真实工况(剩余比例)
+w_real = 1.0 - w_virtual - w_perturb
+w_real = np.clip(w_real, 0.0, 1.0)  # 数值安全
+
+# -----------------------------
+# 扰动幅度
+# -----------------------------
+perturb_scale = 0.02 + 0.04 * progress
+
+# -----------------------------
+# 绘图
+# -----------------------------
+fig, ax1 = plt.subplots(figsize=(8, 5))
+
+ax1.plot(progress, w_real, label="w_real", linewidth=2)
+ax1.plot(progress, w_perturb, label="w_perturb", linewidth=2)
+ax1.plot(progress, w_virtual, label="w_virtual", linewidth=2)
+
+ax1.set_xlabel("Training Progress")
+ax1.set_ylabel("Sampling Weights")
+ax1.set_ylim(0, 1.05)
+ax1.grid(True, linestyle="--", alpha=0.5)
+ax1.legend(loc="upper left")
+
+# 第二纵轴:扰动幅度
+ax2 = ax1.twinx()
+ax2.plot(progress, perturb_scale, label="perturb_scale",
+         linestyle="--", linewidth=2)
+ax2.set_ylabel("Perturb Scale")
+ax2.set_ylim(0, 0.07)
+ax2.legend(loc="upper right")
+
+plt.title("Progressive Reset Sampling Strategy")
+plt.tight_layout()
+plt.show()

BIN
models/uf-rl/uf_train/rl_model/DQN/model/reward.png


+ 63 - 2
models/uf-rl/uf_train/rl_model/DQN/run_dqn_train.py

@@ -175,23 +175,51 @@ def main():
     # ========================================================
     print("\n[Eval] Start validation rollout")
 
+    TMP0_min = 0.01
+    TMP0_max = 0.08
+
     rewards = []
 
-    for _ in range(len(val_pool)):
+    # ---------- 用于可视化的容器(只记录第一个 episode) ----------
+    vis_tmp_series = []
+    vis_action_series = []
+
+    for ep_idx in range(len(val_pool)):
         obs = val_env.reset()
         episode_reward = 0.0
 
-        for _ in range(10):
+        for step in range(10):
+            # ====================================================
+            # 可视化:只记录第一个 validation episode
+            # ====================================================
+            if ep_idx == 0:
+                TMP0_norm = obs[0]
+                TMP0 = (
+                        TMP0_norm * (TMP0_max - TMP0_min)
+                        + TMP0_min
+                )
+                vis_tmp_series.append(TMP0)
+
+            # ---------------- 策略决策 ----------------
             action, _ = trainer.model.predict(
                 obs, deterministic=True
             )
+
+            if ep_idx == 0:
+                vis_action_series.append(action[0])
+
+            # ---------------- 环境推进 ----------------
             obs, reward, done, _ = val_env.step(action)
             episode_reward += reward[0]
+
             if done:
                 break
 
         rewards.append(episode_reward)
 
+    # ========================================================
+    # 验证结果保存
+    # ========================================================
     rewards = np.asarray(rewards)
 
     save_path = Path(trainer.log_dir) / "val_rewards.npy"
@@ -200,6 +228,39 @@ def main():
     print(f"[Eval] Saved to {save_path}")
     print(f"[Eval] Mean reward = {rewards.mean():.3f}")
 
+    # ========================================================
+    # 可视化(第一个 validation episode)
+    # ========================================================
+    import matplotlib.pyplot as plt
+
+    vis_tmp_series = np.asarray(vis_tmp_series)
+    vis_action_series = np.asarray(vis_action_series)
+    steps = np.arange(len(vis_tmp_series))
+
+    # ---------- TMP 曲线 ----------
+    plt.figure()
+    plt.plot(steps, vis_tmp_series, marker="o")
+    plt.axhline(
+        TMP0_max,
+        linestyle="--",
+        label="TMP Upper Limit"
+    )
+    plt.xlabel("Step")
+    plt.ylabel("TMP (MPa)")
+    plt.title("Validation Episode TMP Evolution")
+    plt.legend()
+    plt.grid(True)
+    plt.show()
+
+    # ---------- Action 曲线 ----------
+    plt.figure()
+    plt.plot(steps, vis_action_series, marker="o")
+    plt.xlabel("Step")
+    plt.ylabel("Action")
+    plt.title("Validation Episode Action Output")
+    plt.grid(True)
+    plt.show()
+
 
 # ============================================================
 # 入口

+ 186 - 0
models/uf-rl/uf_train/rl_model/common/train_entry.py

@@ -0,0 +1,186 @@
+"""
+通用强化学习训练入口(当前绑定 DQN,实现已验证)
+仅负责:
+- 构造环境
+- 构造 Trainer
+- 启动训练并保存模型
+"""
+
+import random
+from pathlib import Path
+import numpy as np
+import torch
+
+# ============================================================
+# 1. 路径解析
+# ============================================================
+CURRENT_DIR = Path(__file__).resolve().parent
+PROJECT_ROOT = CURRENT_DIR.parents[2]   # uf_train / uf-rl
+
+
+# ============================================================
+# 2. 导入:数据 / 环境
+# ============================================================
+from uf_train.data_to_rl.data_splitter import ResetStatePoolLoader
+
+from uf_train.env.uf_resistance_models_load import load_resistance_models
+from uf_train.env.uf_physics import UFPhysicsModel
+from uf_train.env.env_params import (
+    UFPhysicsParams,
+    UFStateBounds,
+    UFRewardParams,
+    UFActionSpec,
+)
+from uf_train.env.uf_env import UFSuperCycleEnv
+
+from uf_train.env.env_visual import UFEpisodeRecorder, UFTrainingCallback
+
+
+# ============================================================
+# 3. 导入:算法(当前为 DQN)
+# ============================================================
+from uf_train.rl_model.DQN.dqn_params import DQNParams
+from uf_train.rl_model.DQN.dqn_trainer import DQNTrainer
+
+
+# ============================================================
+# 4. SB3 VecEnv
+# ============================================================
+from stable_baselines3.common.monitor import Monitor
+from stable_baselines3.common.vec_env import DummyVecEnv
+
+
+# ============================================================
+# 5. 随机种子
+# ============================================================
+def set_global_seed(seed: int):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+
+    print(f"[Seed] Global random seed = {seed}")
+
+
+# ============================================================
+# 6. Reset State Pool 加载
+# ============================================================
+def load_reset_state_pool():
+    loader = ResetStatePoolLoader(
+        csv_path=RESET_STATE_CSV,
+        train_ratio=0.8,
+        shuffle=True,
+        random_state=RANDOM_SEED,
+    )
+
+    train_pool, _ = loader.split()
+
+    print("[Data] Reset state pool loaded")
+    print(f"       Train pool size: {len(train_pool)}")
+
+    return train_pool
+
+
+# ============================================================
+# 7. 环境构造函数
+# ============================================================
+def make_env(
+    physics: UFPhysicsModel,
+    reward_params: UFRewardParams,
+    action_spec: UFActionSpec,
+    statebounds: UFStateBounds,
+    reset_state_pool,
+    seed: int,
+):
+    def _init():
+        env = UFSuperCycleEnv(
+            physics=physics,
+            reward_params=reward_params,
+            action_spec=action_spec,
+            statebounds=statebounds,
+            real_state_pool=reset_state_pool,
+            RANDOM_SEED=seed,
+        )
+        env.action_space.seed(seed)
+        env.observation_space.seed(seed)
+        return Monitor(env)
+
+    return _init
+
+
+# ============================================================
+# 8. 主训练流程
+# ============================================================
+def main():
+    # ---------- Seed ----------
+    set_global_seed(RANDOM_SEED)
+
+    # ---------- Reset states ----------
+    train_pool = load_reset_state_pool()
+
+    # ---------- Resistance models ----------
+    phys_params = UFPhysicsParams()
+    res_fp, res_bw = load_resistance_models(phys_params)
+
+    # ---------- Physics ----------
+    physics_model = UFPhysicsModel(
+        phys_params=phys_params,
+        resistance_model_fp=res_fp,
+        resistance_model_bw=res_bw,
+    )
+
+    # ---------- RL specs ----------
+    reward_params = UFRewardParams()
+    action_spec = UFActionSpec()
+    state_bounds = UFStateBounds()
+
+    # ---------- Training Env ----------
+    train_env = DummyVecEnv([
+        make_env(
+            physics_model,
+            reward_params,
+            action_spec,
+            state_bounds,
+            train_pool,
+            RANDOM_SEED,
+        )
+    ])
+
+    # ---------- Callback ----------
+    recorder = UFEpisodeRecorder()
+    callback = UFTrainingCallback(recorder, verbose=1)
+
+    # ---------- Trainer ----------
+    algo_params = DQNParams(remark="uf_dqn_train_only")
+
+    trainer = DQNTrainer(
+        env=train_env,
+        params=algo_params,
+        callback=callback,
+        PROJECT_ROOT=PROJECT_ROOT,
+    )
+
+    # ---------- Training ----------
+    print("\n[Train] Start training")
+    trainer.train(total_timesteps=TOTAL_TIMESTEPS)
+    trainer.save()
+
+    print("[Train] Finished")
+
+
+# ============================================================
+# 9. 入口
+# ============================================================
+if __name__ == "__main__":
+    RANDOM_SEED = 2025
+    TOTAL_TIMESTEPS = 1_500_000
+
+    RESET_STATE_CSV = (
+        PROJECT_ROOT
+        / "datasets/rl_ready/output/reset_state_pool.csv"
+    )
+
+    main()