Kaynağa Gözat

上传文件至 'models/causal-inference'

zhanghao 4 ay önce
ebeveyn
işleme
e90766ee9f

BIN
models/causal-inference/features_scaler.joblib


BIN
models/causal-inference/policy.optimizer.pth


BIN
models/causal-inference/policy.pth


+ 188 - 0
models/causal-inference/rl_optimizer.py

@@ -0,0 +1,188 @@
+import torch
+import numpy as np
+import gymnasium as gym
+from gymnasium import spaces
+from stable_baselines3 import PPO
+from stable_baselines3.common.callbacks import BaseCallback
+import torch.optim as optim
+from gat import GAT
+from data_trainer import DataTrainer
+
+class GATEnv(gym.Env):
+    metadata = {'render_modes': ['human'], 'render_fps': 4}
+    
+    def __init__(self, preprocessor, train_loader, val_loader, adj, args, logger):
+        super().__init__()
+        self.preprocessor = preprocessor
+        self.train_loader = train_loader
+        self.val_loader = val_loader
+        # 使用指定设备(支持GPU)
+        self.eval_device = torch.device(args.device)
+        self.adj = adj.to(self.eval_device)
+        self.args = args
+        self.logger = logger
+        
+        self.action_space = spaces.Box(
+            low=np.array([1e-5, 32, 2, 0.1], dtype=np.float32),
+            high=np.array([1e-2, 128, 8, 0.5], dtype=np.float32),
+            shape=(4,),
+            dtype=np.float32
+        )
+        self.observation_space = spaces.Box(
+            low=np.array([1e-5, 32, 2, 0.1, 0], dtype=np.float32),
+            high=np.array([1e-2, 128, 8, 0.5, 100], dtype=np.float32),
+            shape=(5,),
+            dtype=np.float32
+        )
+        self.best_val_loss = float('inf')
+        self.current_step = 0
+        self.max_steps = args.rl_max_steps
+        self.render_mode = None
+    
+    def reset(self, seed=None, options=None):
+        super().reset(seed=seed)
+        self.current_step = 0
+        self.best_val_loss = float('inf')
+        self.current_state = np.array([
+            float(self.args.lr),
+            float(self.args.hidden_dim),
+            float(self.args.num_heads),
+            float(self.args.dropout),
+            10.0
+        ], dtype=np.float32)
+        return self.current_state, {}
+    
+    def step(self, action):
+        self.current_step += 1
+        lr = float(action[0])
+        hidden_dim = int(round(float(action[1])))
+        num_heads = int(round(float(action[2])))
+        dropout = float(action[3])
+        
+        hidden_dim = max(32, min(128, hidden_dim))
+        num_heads = max(2, min(8, num_heads))
+        dropout = max(0.1, min(0.5, dropout))
+        
+        # 在指定设备上构建与评估(支持GPU)
+        model = GAT(
+            nfeat=1,
+            nhid=hidden_dim,
+            noutput=self.args.num_targets,
+            dropout=dropout,
+            nheads=num_heads,
+            alpha=0.2
+        ).to(self.eval_device)
+        optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=self.args.weight_decay)
+        
+        # 使用指定设备
+        rl_args = self.args
+        trainer = DataTrainer(model, rl_args, self.preprocessor, optimizer, logger=self.logger)
+        val_loss = self._short_evaluate(trainer)
+        
+        reward = 1.0 / (1.0 + val_loss)
+        if val_loss < self.best_val_loss:
+            reward += 0.5
+            self.best_val_loss = val_loss
+        
+        self.current_state = np.array([lr, hidden_dim, num_heads, dropout, val_loss], dtype=np.float32)
+        terminated = self.current_step >= self.max_steps
+        truncated = False
+        return self.current_state, float(reward), terminated, truncated, {}
+    
+    def _short_evaluate(self, trainer):
+        """
+        关键加速:只用极少 batch 做快速近似,保证一个 env.step() 在毫秒到秒级完成。
+        """
+        # 训练 1 个 batch、重复 2 次以产生可用梯度信号
+        for _ in range(2):
+            trainer.train_epoch(self.train_loader, self.adj, max_batches=1)
+        # 验证用 2 个 batch,降低方差
+        val_loss, _ = trainer.validate(self.val_loader, self.adj, max_batches=2)
+        return float(val_loss)
+    
+    def render(self):
+        if self.render_mode == 'human':
+            print(f"[RL] Step: {self.current_step}, Best Val Loss: {self.best_val_loss:.6f}")
+    
+    def close(self):
+        pass
+
+class TrainingCallback(BaseCallback):
+    def __init__(self, verbose=0, print_every=100):
+        super().__init__(verbose)
+        self.print_every = print_every
+    
+    def _on_step(self) -> bool:
+        # BaseCallback.logger 不是 logging.Logger;用 print 或 record。
+        if self.n_calls % self.print_every == 0:
+            # 某些版本下 self.locals 里没有 'rewards' 键,做个健壮保护
+            rew = None
+            try:
+                r = self.locals.get('rewards', None)
+                if r is not None:
+                    rew = float(r[0])
+            except Exception:
+                pass
+            print(f"[RL] timesteps={self.num_timesteps} calls={self.n_calls} reward={rew}")
+        return True
+
+class RLOptimizer:
+    def __init__(self, args, preprocessor, train_loader, val_loader, adj, logger):
+        self.args = args
+        self.preprocessor = preprocessor
+        self.train_loader = train_loader
+        self.val_loader = val_loader
+        self.adj = adj
+        self.logger = logger
+    
+    def optimize(self):
+        env = GATEnv(
+            self.preprocessor, self.train_loader, self.val_loader,
+            self.adj, self.args, self.logger
+        )
+        
+        # 关键:将 PPO rollout 和训练配置调小,避免一次 rollout 等太久
+        model = PPO(
+            "MlpPolicy",
+            env,
+            verbose=1,
+            learning_rate=3e-4,
+            n_steps=32,       # 原来 2048 -> 32
+            batch_size=32,    # 原来 64 -> 32
+            n_epochs=1,       # 原来 10 -> 1
+            gamma=0.99,
+            gae_lambda=0.95,
+            clip_range=0.2,
+            ent_coef=0.01,
+            device=self.args.device  # 使用指定设备(支持GPU)
+        )
+        
+        self.logger.info("开始训练强化学习智能体...")
+        callback = TrainingCallback(verbose=1, print_every=100)
+        model.learn(total_timesteps=self.args.rl_timesteps, callback=callback)
+        model.save("gat_ppo_agent")
+        
+        # 评估并选最优动作
+        self.logger.info("寻找最优超参数组合...")
+        best_reward = -1.0
+        best_action = None
+        eval_env = GATEnv(
+            self.preprocessor, self.train_loader, self.val_loader,
+            self.adj, self.args, self.logger
+        )
+        for _ in range(self.args.rl_eval_episodes):
+            obs, _ = eval_env.reset()
+            action, _ = model.predict(obs, deterministic=True)
+            _, reward, _, _, _ = eval_env.step(action)
+            if reward > best_reward:
+                best_reward = reward
+                best_action = action
+        
+        best_hparams = {
+            'lr': float(best_action[0]),
+            'hidden_dim': int(round(float(best_action[1]))),
+            'num_heads': int(round(float(best_action[2]))),
+            'dropout': float(best_action[3])
+        }
+        self.logger.info(f"\n最优超参数: {best_hparams}")
+        return best_hparams

BIN
models/causal-inference/targets_scaler.joblib