# ==================== DQN 超参数配置 ==================== # ===== 神经网络参数 ===== learning_rate: 1.0e-4 # ===== 经验回放参数 ===== buffer_size: 100000 learning_starts: 10000 batch_size: 32 # ===== 强化学习核心参数 ===== gamma: 0.95 train_freq: 4 # ===== 目标网络更新参数 ===== target_update_interval: 1 tau: 0.005 # ===== 探索策略(ε-greedy) ===== exploration_initial_eps: 1.0 exploration_fraction: 0.3 exploration_final_eps: 0.02 # ===== 实验标识 ===== remark: "default"