DQN_env.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340
  1. import os
  2. import time
  3. import random
  4. import numpy as np
  5. import gymnasium as gym
  6. from gymnasium import spaces
  7. from stable_baselines3 import DQN
  8. from stable_baselines3.common.monitor import Monitor
  9. from stable_baselines3.common.vec_env import DummyVecEnv
  10. from stable_baselines3.common.callbacks import BaseCallback
  11. from typing import Dict, Tuple, Optional
  12. import torch
  13. import torch.nn as nn
  14. from dataclasses import dataclass, asdict
  15. from UF_models import TMPIncreaseModel, TMPDecreaseModel # 导入模型类
  16. import copy
  17. # ==== 定义膜的基础运行参数 ====
  18. @dataclass
  19. class UFParams:
  20. # —— 膜与运行参数 ——
  21. q_UF: float = 360.0 # 过滤进水流量(m^3/h)
  22. TMP0: float = 0.03 # 初始TMP(MPa)
  23. TMP_max: float = 0.06 # TMP硬上限(MPa)
  24. # —— 膜污染动力学 ——
  25. alpha: float = 1e-6 # TMP增长系数
  26. belta: float = 1.1 # 幂指数
  27. # —— 反洗参数(固定) ——
  28. q_bw_m3ph: float = 1000.0 # 物理反洗流量(m^3/h)
  29. # —— CEB参数(固定) ——
  30. T_ceb_interval_h: float = 48.0 # 固定每 k 小时做一次CEB
  31. v_ceb_m3: float = 30.0 # CEB用水体积(m^3)
  32. t_ceb_s: float = 40 * 60.0 # CEB时长(s)
  33. phi_ceb: float = 1.0 # CEB去除比例(简化:完全恢复到TMP0)
  34. # —— 约束与收敛 ——
  35. dTMP: float = 0.001 # 单次产水结束时,相对TMP0最大升幅(MPa)
  36. # —— 搜索范围(秒) ——
  37. L_min_s: float = 3800.0 # 过滤时长下限(s)
  38. L_max_s: float = 6000.0 # 过滤时长上限(s)
  39. t_bw_min_s: float = 40.0 # 物洗时长下限(s)
  40. t_bw_max_s: float = 60.0 # 物洗时长上限(s)
  41. # —— 物理反洗恢复函数参数 ——
  42. phi_bw_min: float = 0.7 # 物洗去除比例最小值
  43. phi_bw_max: float = 1.0 # 物洗去除比例最大值
  44. L_ref_s: float = 4000.0 # 过滤时长影响时间尺度
  45. tau_bw_s: float = 20.0 # 物洗时长影响时间尺度
  46. gamma_t: float = 1.0 # 物洗时长作用指数
  47. # —— 网格 ——
  48. L_step_s: float = 60.0 # 过滤时长步长(s)
  49. t_bw_step_s: float = 5.0 # 物洗时长步长(s)
  50. # 多目标加权及高TMP惩罚
  51. w_rec: float = 0.8 # 回收率权重
  52. w_rate: float = 0.2 # 净供水率权重
  53. w_headroom: float = 0.2 # 贴边惩罚权重
  54. r_headroom: float = 2.0 # 贴边惩罚幂次
  55. headroom_hardcap: float = 0.98 # 超过此比例直接视为不可取
  56. # ==== 加载模拟环境模型 ====
  57. # 初始化模型
  58. model_fp = TMPIncreaseModel()
  59. model_bw = TMPDecreaseModel()
  60. # 加载参数
  61. model_fp.load_state_dict(torch.load("uf_fp.pth"))
  62. model_bw.load_state_dict(torch.load("uf_bw.pth"))
  63. # 切换到推理模式
  64. model_fp.eval()
  65. model_bw.eval()
  66. def _delta_tmp(p, L_h: float) -> float:
  67. """
  68. 过滤时段TMP上升量:调用 uf_fp.pth 模型
  69. """
  70. return model_fp(p, L_h)
  71. def phi_bw_of(p, L_s: float, t_bw_s: float) -> float:
  72. """
  73. 物洗去除比例:调用 uf_bw.pth 模型
  74. """
  75. return model_bw(p, L_s, t_bw_s)
  76. def _tmp_after_ceb(p, L_s: float, t_bw_s: float) -> float:
  77. """
  78. 计算化学清洗(CEB)后的TMP,当前为恢复初始跨膜压差
  79. """
  80. return p.TMP0
  81. def _v_bw_m3(p, t_bw_s: float) -> float:
  82. """
  83. 物理反洗水耗
  84. """
  85. return float(p.q_bw_m3ph * (float(t_bw_s) / 3600.0))
  86. def simulate_one_supercycle(p: UFParams, L_s: float, t_bw_s: float):
  87. """
  88. 返回 (是否可行, 指标字典)
  89. - 支持动态CEB次数:48h固定间隔
  90. - 增加日均产水时间和吨水电耗
  91. - 增加最小TMP记录
  92. """
  93. L_h = float(L_s) / 3600.0 # 小周期过滤时间(h)
  94. tmp = p.TMP0
  95. max_tmp_during_filtration = tmp
  96. min_tmp_during_filtration = tmp # 新增:初始化最小TMP
  97. max_residual_increase = 0.0
  98. # 小周期总时长(h)
  99. t_small_cycle_h = (L_s + t_bw_s) / 3600.0
  100. # 计算超级周期内CEB次数
  101. k_bw_per_ceb = int(np.floor(p.T_ceb_interval_h / t_small_cycle_h))
  102. if k_bw_per_ceb < 1:
  103. k_bw_per_ceb = 1 # 至少一个小周期
  104. # ton水电耗查表
  105. energy_lookup = {
  106. 3600: 0.1034, 3660: 0.1031, 3720: 0.1029, 3780: 0.1026,
  107. 3840: 0.1023, 3900: 0.1021, 3960: 0.1019, 4020: 0.1017,
  108. 4080: 0.1015, 4140: 0.1012, 4200: 0.1011
  109. }
  110. for _ in range(k_bw_per_ceb):
  111. tmp_run_start = tmp
  112. # 过滤阶段TMP增长
  113. dtmp = _delta_tmp(p, L_h)
  114. tmp_peak = tmp_run_start + dtmp
  115. # 约束1:峰值不得超过硬上限
  116. if tmp_peak > p.TMP_max + 1e-12:
  117. return False, {"reason": "TMP_max violated during filtration", "TMP_peak": tmp_peak}
  118. # 更新最大和最小TMP
  119. if tmp_peak > max_tmp_during_filtration:
  120. max_tmp_during_filtration = tmp_peak
  121. if tmp_run_start < min_tmp_during_filtration: # 新增:记录运行开始时的最小TMP
  122. min_tmp_during_filtration = tmp_run_start
  123. # 物理反洗
  124. phi = phi_bw_of(p, L_s, t_bw_s)
  125. tmp_after_bw = tmp_peak - phi * (tmp_peak - tmp_run_start)
  126. # 约束2:单次残余增量控制
  127. residual_inc = tmp_after_bw - tmp_run_start
  128. if residual_inc > p.dTMP + 1e-12:
  129. return False, {
  130. "reason": "residual TMP increase after BW exceeded dTMP",
  131. "residual_increase": residual_inc,
  132. "limit_dTMP": p.dTMP
  133. }
  134. if residual_inc > max_residual_increase:
  135. max_residual_increase = residual_inc
  136. tmp = tmp_after_bw
  137. # CEB
  138. tmp_after_ceb = p.TMP0
  139. # 体积与回收率
  140. V_feed_super = k_bw_per_ceb * p.q_UF * L_h
  141. V_loss_super = k_bw_per_ceb * _v_bw_m3(p, t_bw_s) + p.v_ceb_m3
  142. V_net = max(0.0, V_feed_super - V_loss_super)
  143. recovery = max(0.0, V_net / max(V_feed_super, 1e-12))
  144. # 时间与净供水率
  145. T_super_h = k_bw_per_ceb * (L_s + t_bw_s) / 3600.0 + p.t_ceb_s / 3600.0
  146. net_delivery_rate_m3ph = V_net / max(T_super_h, 1e-12)
  147. # 贴边比例与硬限
  148. headroom_ratio = max_tmp_during_filtration / max(p.TMP_max, 1e-12)
  149. if headroom_ratio > p.headroom_hardcap + 1e-12:
  150. return False, {"reason": "headroom hardcap exceeded", "headroom_ratio": headroom_ratio}
  151. # —— 新增指标 1:日均产水时间(h/d) ——
  152. daily_prod_time_h = k_bw_per_ceb * L_h / T_super_h * 24.0
  153. # —— 新增指标 2:吨水电耗(kWh/m³) ——
  154. closest_L = min(energy_lookup.keys(), key=lambda x: abs(x - L_s))
  155. ton_water_energy = energy_lookup[closest_L]
  156. info = {
  157. "recovery": recovery,
  158. "V_feed_super_m3": V_feed_super,
  159. "V_loss_super_m3": V_loss_super,
  160. "V_net_super_m3": V_net,
  161. "supercycle_time_h": T_super_h,
  162. "net_delivery_rate_m3ph": net_delivery_rate_m3ph,
  163. "max_TMP_during_filtration": max_tmp_during_filtration,
  164. "min_TMP_during_filtration": min_tmp_during_filtration, # 新增:最小TMP
  165. "max_residual_increase_per_run": max_residual_increase,
  166. "phi_bw_effective": phi,
  167. "TMP_after_ceb": tmp_after_ceb,
  168. "headroom_ratio": headroom_ratio,
  169. "daily_prod_time_h": daily_prod_time_h,
  170. "ton_water_energy_kWh_per_m3": ton_water_energy,
  171. "k_bw_per_ceb": k_bw_per_ceb
  172. }
  173. return True, info
  174. def _score(p: UFParams, rec: dict) -> float:
  175. """综合评分:越大越好。通过非线性放大奖励差异,强化区分好坏动作"""
  176. # —— 无量纲化净供水率 ——
  177. rate_norm = rec["net_delivery_rate_m3ph"] / max(p.q_UF, 1e-12)
  178. # —— TMP soft penalty (sigmoid) ——
  179. tmp_ratio = rec["max_TMP_during_filtration"] / max(p.TMP_max, 1e-12)
  180. k = 10.0
  181. headroom_penalty = 1.0 / (1.0 + np.exp(-k * (tmp_ratio - 1.0)))
  182. # —— 基础 reward(0.6~0.9左右)——
  183. base_reward = (
  184. p.w_rec * rec["recovery"]
  185. + p.w_rate * rate_norm
  186. - p.w_headroom * headroom_penalty
  187. )
  188. # —— 非线性放大:平方映射 + 缩放 ——
  189. # 目的是放大好坏动作差异,同时限制最大值,避免 TD-error 过大
  190. amplified_reward = (base_reward - 0.5) ** 2 * 5.0
  191. # —— 可选:保留符号,区分负奖励
  192. if base_reward < 0.5:
  193. amplified_reward = -amplified_reward
  194. return amplified_reward
  195. class UFSuperCycleEnv(gym.Env):
  196. """超滤系统环境(超级周期级别决策)"""
  197. metadata = {"render_modes": ["human"]}
  198. def __init__(self, base_params, max_episode_steps: int = 20):
  199. super(UFSuperCycleEnv, self).__init__()
  200. self.base_params = base_params
  201. self.current_params = copy.deepcopy(base_params)
  202. self.max_episode_steps = max_episode_steps
  203. self.current_step = 0
  204. # 计算离散动作空间
  205. self.L_values = np.arange(
  206. self.base_params.L_min_s,
  207. self.base_params.L_max_s + self.base_params.L_step_s,
  208. self.base_params.L_step_s
  209. )
  210. self.t_bw_values = np.arange(
  211. self.base_params.t_bw_min_s,
  212. self.base_params.t_bw_max_s + self.base_params.t_bw_step_s,
  213. self.base_params.t_bw_step_s
  214. )
  215. self.num_L = len(self.L_values)
  216. self.num_bw = len(self.t_bw_values)
  217. # 单一离散动作空间
  218. self.action_space = spaces.Discrete(self.num_L * self.num_bw)
  219. # 状态空间增加 TMP0, 上一次动作(L_s, t_bw_s), 本周期最高 TMP
  220. # 状态归一化均在 _get_obs 内处理
  221. self.observation_space = spaces.Box(
  222. low=np.zeros(4, dtype=np.float32),
  223. high=np.ones(4, dtype=np.float32),
  224. dtype=np.float32
  225. )
  226. # 初始化状态
  227. self.last_action = (self.base_params.L_min_s, self.base_params.t_bw_min_s)
  228. self.max_TMP_during_filtration = self.current_params.TMP0
  229. self.reset(seed=None)
  230. def _get_obs(self):
  231. TMP0 = self.current_params.TMP0
  232. TMP0_norm = (TMP0 - 0.01) / (0.05 - 0.01)
  233. L_s, t_bw_s = self.last_action
  234. L_norm = (L_s - self.base_params.L_min_s) / (self.base_params.L_max_s - self.base_params.L_min_s)
  235. t_bw_norm = (t_bw_s - self.base_params.t_bw_min_s) / (self.base_params.t_bw_max_s - self.base_params.t_bw_min_s)
  236. max_TMP_norm = (self.max_TMP_during_filtration - 0.01) / (0.05 - 0.01)
  237. return np.array([TMP0_norm, L_norm, t_bw_norm, max_TMP_norm], dtype=np.float32)
  238. def _get_action_values(self, action):
  239. L_idx = action // self.num_bw
  240. t_bw_idx = action % self.num_bw
  241. return self.L_values[L_idx], self.t_bw_values[t_bw_idx]
  242. def reset(self, seed=None, options=None):
  243. super().reset(seed=seed)
  244. self.current_params.TMP0 = np.random.uniform(0.01, 0.03)
  245. self.current_step = 0
  246. self.last_action = (self.base_params.L_min_s, self.base_params.t_bw_min_s)
  247. self.max_TMP_during_filtration = self.current_params.TMP0
  248. return self._get_obs(), {}
  249. def step(self, action):
  250. self.current_step += 1
  251. L_s, t_bw_s = self._get_action_values(action)
  252. L_s = np.clip(L_s, self.base_params.L_min_s, self.base_params.L_max_s)
  253. t_bw_s = np.clip(t_bw_s, self.base_params.t_bw_min_s, self.base_params.t_bw_max_s)
  254. # 模拟超级周期
  255. feasible, info = simulate_one_supercycle(self.current_params, L_s, t_bw_s)
  256. if feasible:
  257. reward = _score(self.current_params, info)
  258. self.current_params.TMP0 = info["TMP_after_ceb"]
  259. self.max_TMP_during_filtration = info["max_TMP_during_filtration"]
  260. terminated = False
  261. else:
  262. reward = -20
  263. terminated = True
  264. truncated = self.current_step >= self.max_episode_steps
  265. self.last_action = (L_s, t_bw_s)
  266. next_obs = self._get_obs()
  267. info["feasible"] = feasible
  268. info["step"] = self.current_step
  269. return next_obs, reward, terminated, truncated, info