|
@@ -97,10 +97,14 @@ class UFParams:
|
|
|
# ========== 膜运行约束参数 ==========
|
|
# ========== 膜运行约束参数 ==========
|
|
|
# 定义各运行参数的物理约束和安全限制
|
|
# 定义各运行参数的物理约束和安全限制
|
|
|
|
|
|
|
|
- global_TMP_limit: float = 0.08
|
|
|
|
|
|
|
+ global_TMP_hard_limit: float = 0.08
|
|
|
# TMP 硬上限(MPa)
|
|
# TMP 硬上限(MPa)
|
|
|
# 说明:超过此值将导致episode失败,需立即停机
|
|
# 说明:超过此值将导致episode失败,需立即停机
|
|
|
-
|
|
|
|
|
|
|
+
|
|
|
|
|
+ global_TMP_soft_limit: float = 0.06
|
|
|
|
|
+ # TMP 软上限 (MPa)
|
|
|
|
|
+ # 说明:此上限用于指导奖励函数中膜阻力允许上升值,越接近该上限,系统对膜阻力上升控制的更严格
|
|
|
|
|
+
|
|
|
# --- 初始TMP约束 ---
|
|
# --- 初始TMP约束 ---
|
|
|
TMP0_max: float = 0.035 # 初始TMP上限(MPa)
|
|
TMP0_max: float = 0.035 # 初始TMP上限(MPa)
|
|
|
TMP0_min: float = 0.01 # 初始TMP下限(MPa)
|
|
TMP0_min: float = 0.01 # 初始TMP下限(MPa)
|
|
@@ -128,38 +132,38 @@ class UFParams:
|
|
|
ceb_removal_min: float = 100 # CEB去除阻力下限(缩放后)
|
|
ceb_removal_min: float = 100 # CEB去除阻力下限(缩放后)
|
|
|
|
|
|
|
|
# ========== 反洗参数(固定配置) ==========
|
|
# ========== 反洗参数(固定配置) ==========
|
|
|
- q_bw_m3ph: float = 1000.0
|
|
|
|
|
|
|
+ q_bw_m3ph: float = 1000.0
|
|
|
# 物理反洗流量(m³/h)
|
|
# 物理反洗流量(m³/h)
|
|
|
# 说明:反洗流量通常为正常过滤流量的 2-3 倍
|
|
# 说明:反洗流量通常为正常过滤流量的 2-3 倍
|
|
|
|
|
|
|
|
# ========== CEB 化学反洗参数 ==========
|
|
# ========== CEB 化学反洗参数 ==========
|
|
|
- T_ceb_interval_h: float = 60.0
|
|
|
|
|
|
|
+ T_ceb_interval_h: float = 60.0
|
|
|
# CEB 间隔时间(小时)
|
|
# CEB 间隔时间(小时)
|
|
|
# 说明:每运行约 60 小时执行一次化学增强反洗
|
|
# 说明:每运行约 60 小时执行一次化学增强反洗
|
|
|
-
|
|
|
|
|
- v_ceb_m3: float = 30.0
|
|
|
|
|
|
|
+
|
|
|
|
|
+ v_ceb_m3: float = 30.0
|
|
|
# CEB 用水体积(m³)
|
|
# CEB 用水体积(m³)
|
|
|
-
|
|
|
|
|
- t_ceb_s: float = 40 * 60.0
|
|
|
|
|
|
|
+
|
|
|
|
|
+ t_ceb_s: float = 40 * 60.0
|
|
|
# CEB 时长(秒,这里为 40 分钟)
|
|
# CEB 时长(秒,这里为 40 分钟)
|
|
|
|
|
|
|
|
# ========== 强化学习动作空间搜索范围 ==========
|
|
# ========== 强化学习动作空间搜索范围 ==========
|
|
|
# 定义智能体可选择的动作范围(离散化)
|
|
# 定义智能体可选择的动作范围(离散化)
|
|
|
-
|
|
|
|
|
- L_min_s: float = 3800.0 # 过滤时长下限(秒,约 63 分钟)
|
|
|
|
|
- L_max_s: float = 6000.0 # 过滤时长上限(秒,约 100 分钟)
|
|
|
|
|
|
|
+
|
|
|
|
|
+ L_min_s: float = 3800.0 # 过滤时长下限(秒,约 63 分钟)
|
|
|
|
|
+ L_max_s: float = 4800.0 # 过滤时长上限,改为 4800s
|
|
|
t_bw_min_s: float = 40.0 # 物理反洗时长下限(秒)
|
|
t_bw_min_s: float = 40.0 # 物理反洗时长下限(秒)
|
|
|
t_bw_max_s: float = 60.0 # 物理反洗时长上限(秒)
|
|
t_bw_max_s: float = 60.0 # 物理反洗时长上限(秒)
|
|
|
|
|
|
|
|
# ========== 动作离散化网格 ==========
|
|
# ========== 动作离散化网格 ==========
|
|
|
- L_step_s: float = 60.0 # 过滤时长步长(秒)
|
|
|
|
|
|
|
+ L_step_s: float = 60.0 # 过滤时长步长(秒)
|
|
|
t_bw_step_s: float = 5.0 # 物理反洗时长步长(秒)
|
|
t_bw_step_s: float = 5.0 # 物理反洗时长步长(秒)
|
|
|
|
|
|
|
|
# ========== 奖励函数参数 ==========
|
|
# ========== 奖励函数参数 ==========
|
|
|
- k_rec = 5.0 # 回收率敏感度系数(控制回收率奖励的陡峭程度)
|
|
|
|
|
- k_res = 10.0 # 残余污染敏感度系数(控制污染惩罚的陡峭程度)
|
|
|
|
|
|
|
+ k_rec = 5.0 # 回收率敏感度系数(控制回收率奖励的陡峭程度)
|
|
|
|
|
+ k_res = 10.0 # 残余污染敏感度系数(控制污染惩罚的陡峭程度)
|
|
|
rec_low, rec_high = 0.92, 0.99 # 回收率的正常范围
|
|
rec_low, rec_high = 0.92, 0.99 # 回收率的正常范围
|
|
|
- rr0 = 0.08 # 残余污染比例的参考值
|
|
|
|
|
|
|
+ rr0 = 0.08 # 残余污染比例的参考值
|
|
|
|
|
|
|
|
|
|
|
|
|
# ==================== 辅助函数:膜阻力与跨膜压差转换 ====================
|
|
# ==================== 辅助函数:膜阻力与跨膜压差转换 ====================
|
|
@@ -454,12 +458,14 @@ def simulate_one_supercycle(p: UFParams, L_s: float, t_bw_s: float):
|
|
|
k_bw_per_ceb = 1 # 至少包含1个小周期
|
|
k_bw_per_ceb = 1 # 至少包含1个小周期
|
|
|
|
|
|
|
|
# ========== 吨水电耗查找表 ==========
|
|
# ========== 吨水电耗查找表 ==========
|
|
|
- # TODO: 需根据实际过滤时间范围更新此表
|
|
|
|
|
# 键:过滤时长(秒),值:吨水电耗(kWh/m³)
|
|
# 键:过滤时长(秒),值:吨水电耗(kWh/m³)
|
|
|
energy_lookup = {
|
|
energy_lookup = {
|
|
|
3600: 0.1034, 3660: 0.1031, 3720: 0.1029, 3780: 0.1026,
|
|
3600: 0.1034, 3660: 0.1031, 3720: 0.1029, 3780: 0.1026,
|
|
|
3840: 0.1023, 3900: 0.1021, 3960: 0.1019, 4020: 0.1017,
|
|
3840: 0.1023, 3900: 0.1021, 3960: 0.1019, 4020: 0.1017,
|
|
|
- 4080: 0.1015, 4140: 0.1012, 4200: 0.1011
|
|
|
|
|
|
|
+ 4080: 0.1015, 4140: 0.1012, 4200: 0.1011, 4260: 0.1008,
|
|
|
|
|
+ 4320: 0.1007, 4380: 0.1005, 4440: 0.1003, 4500: 0.1001,
|
|
|
|
|
+ 4560: 0.0999, 4620: 0.0998, 4680: 0.0996, 4740: 0.0995,
|
|
|
|
|
+ 4800: 0.0993,
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
# ========== 循环模拟每个小周期(过滤 + 物理反洗) ==========
|
|
# ========== 循环模拟每个小周期(过滤 + 物理反洗) ==========
|
|
@@ -531,7 +537,11 @@ def simulate_one_supercycle(p: UFParams, L_s: float, t_bw_s: float):
|
|
|
# 吨水电耗(从查找表获取最接近的值)
|
|
# 吨水电耗(从查找表获取最接近的值)
|
|
|
closest_L = min(energy_lookup.keys(), key=lambda x: abs(x - L_s))
|
|
closest_L = min(energy_lookup.keys(), key=lambda x: abs(x - L_s))
|
|
|
ton_water_energy = energy_lookup[closest_L] # [kWh/m³]
|
|
ton_water_energy = energy_lookup[closest_L] # [kWh/m³]
|
|
|
- # TODO: 需根据实际过滤时间范围更新电耗查找表
|
|
|
|
|
|
|
+
|
|
|
|
|
+ # ===== 新指标:膜阻力允许上升空间 =====
|
|
|
|
|
+ R_max = _calculate_resistance(max_tmp_during_filtration, p.q_UF, p.temp)
|
|
|
|
|
+ R_soft_limit = _calculate_resistance(p.global_TMP_soft_limit, p.q_UF, p.temp)
|
|
|
|
|
+ delta_R_allow = max(R_soft_limit - R_max, 1e-6) # 供奖励函数使用的“污染允许增长空间”
|
|
|
|
|
|
|
|
# ========== 构建性能指标字典 ==========
|
|
# ========== 构建性能指标字典 ==========
|
|
|
info = {
|
|
info = {
|
|
@@ -553,7 +563,7 @@ def simulate_one_supercycle(p: UFParams, L_s: float, t_bw_s: float):
|
|
|
# TMP指标
|
|
# TMP指标
|
|
|
"max_TMP_during_filtration": max_tmp_during_filtration, # 周期内最大TMP
|
|
"max_TMP_during_filtration": max_tmp_during_filtration, # 周期内最大TMP
|
|
|
"min_TMP_during_filtration": min_tmp_during_filtration, # 周期内最小TMP
|
|
"min_TMP_during_filtration": min_tmp_during_filtration, # 周期内最小TMP
|
|
|
- "global_TMP_limit": p.global_TMP_limit, # TMP限制
|
|
|
|
|
|
|
+ "global_TMP_limit": p.global_TMP_hard_limit, # TMP限制
|
|
|
"TMP0": p.TMP0, # 周期初始TMP
|
|
"TMP0": p.TMP0, # 周期初始TMP
|
|
|
"TMP_after_ceb": tmp_after_ceb, # CEB后TMP
|
|
"TMP_after_ceb": tmp_after_ceb, # CEB后TMP
|
|
|
|
|
|
|
@@ -561,6 +571,7 @@ def simulate_one_supercycle(p: UFParams, L_s: float, t_bw_s: float):
|
|
|
"R0": R0, # 周期初始膜阻力
|
|
"R0": R0, # 周期初始膜阻力
|
|
|
"R_after_ceb": R_after_ceb, # CEB后膜阻力
|
|
"R_after_ceb": R_after_ceb, # CEB后膜阻力
|
|
|
"max_residual_increase_per_run": max_residual_increase, # 最大残余污染增量
|
|
"max_residual_increase_per_run": max_residual_increase, # 最大残余污染增量
|
|
|
|
|
+ "delta_R_allow": delta_R_allow, # 污染允许增长空间
|
|
|
|
|
|
|
|
# 能耗指标
|
|
# 能耗指标
|
|
|
"ton_water_energy_kWh_per_m3": ton_water_energy, # 吨水电耗
|
|
"ton_water_energy_kWh_per_m3": ton_water_energy, # 吨水电耗
|
|
@@ -618,9 +629,9 @@ def calculate_reward(p: UFParams, info: dict) -> float:
|
|
|
# ========== 提取性能指标 ==========
|
|
# ========== 提取性能指标 ==========
|
|
|
recovery = info["recovery"] # 回收率 [0-1]
|
|
recovery = info["recovery"] # 回收率 [0-1]
|
|
|
|
|
|
|
|
- # 残余污染比例 = (CEB后阻力 - 初始阻力) / 初始阻力
|
|
|
|
|
- # 反映本超级周期后的净污染累积
|
|
|
|
|
- residual_ratio = (info["R_after_ceb"] - info["R0"]) / info["R0"]
|
|
|
|
|
|
|
+ # 污染比例:实际上升的阻力 / 允许上升的阻力
|
|
|
|
|
+ # 允许上升的阻力值 = 当前阻力值软上限 - 当前阻力
|
|
|
|
|
+ residual_ratio = (info["R_after_ceb"] - info["R0"]) / info["delta_R_allow"]
|
|
|
|
|
|
|
|
# ========== 回收率奖励项 ==========
|
|
# ========== 回收率奖励项 ==========
|
|
|
# 将回收率归一化到 [0, 1] 区间(基于预期范围)
|
|
# 将回收率归一化到 [0, 1] 区间(基于预期范围)
|
|
@@ -633,7 +644,7 @@ def calculate_reward(p: UFParams, info: dict) -> float:
|
|
|
# - k_rec 控制曲线陡峭程度,越大变化越陡
|
|
# - k_rec 控制曲线陡峭程度,越大变化越陡
|
|
|
rec_reward = np.clip(np.tanh(p.k_rec * (rec_norm - 0.5)), -1, 1)
|
|
rec_reward = np.clip(np.tanh(p.k_rec * (rec_norm - 0.5)), -1, 1)
|
|
|
|
|
|
|
|
- # ========== 残余污染惩罚项 ==========
|
|
|
|
|
|
|
+ # ========== 污染惩罚项 ==========
|
|
|
# 使用 tanh 函数构建惩罚曲线
|
|
# 使用 tanh 函数构建惩罚曲线
|
|
|
# - residual_ratio < rr0 时,res_penalty > 0(奖励低污染)
|
|
# - residual_ratio < rr0 时,res_penalty > 0(奖励低污染)
|
|
|
# - residual_ratio > rr0 时,res_penalty < 0(惩罚高污染)
|
|
# - residual_ratio > rr0 时,res_penalty < 0(惩罚高污染)
|
|
@@ -683,7 +694,8 @@ def is_dead_cycle(info: dict) -> bool:
|
|
|
max_tmp = info.get("max_TMP_during_filtration", 0) # 周期内最大TMP
|
|
max_tmp = info.get("max_TMP_during_filtration", 0) # 周期内最大TMP
|
|
|
recovery = info.get("recovery", 1.0) # 回收率
|
|
recovery = info.get("recovery", 1.0) # 回收率
|
|
|
R_after_ceb = info.get("R_after_ceb", 0) # CEB后膜阻力
|
|
R_after_ceb = info.get("R_after_ceb", 0) # CEB后膜阻力
|
|
|
- R0 = info.get("R0", 1e-6) # 初始膜阻力(加小值避免除零)
|
|
|
|
|
|
|
+ R0 = info.get("R0", 1e-6) # 初始膜阻力
|
|
|
|
|
+ delta_R_allow = info.get("delta_R_allow", 1e-6) # 允许上升的膜阻力(加小值避免除零)
|
|
|
|
|
|
|
|
# ========== 失败条件检查 ==========
|
|
# ========== 失败条件检查 ==========
|
|
|
# 条件1:TMP超限
|
|
# 条件1:TMP超限
|
|
@@ -694,9 +706,9 @@ def is_dead_cycle(info: dict) -> bool:
|
|
|
if recovery < 0.75:
|
|
if recovery < 0.75:
|
|
|
return False # 失败
|
|
return False # 失败
|
|
|
|
|
|
|
|
- # 条件3:残余污染增长过快
|
|
|
|
|
- residual_increase_ratio = (R_after_ceb - R0) / R0
|
|
|
|
|
- if residual_increase_ratio > 0.1:
|
|
|
|
|
|
|
+ # 条件3:污染增长量超过容许范围
|
|
|
|
|
+ residual_increase = (R_after_ceb - R0) / delta_R_allow
|
|
|
|
|
+ if residual_increase > 1/15:
|
|
|
return False # 失败
|
|
return False # 失败
|
|
|
|
|
|
|
|
# 所有条件通过
|
|
# 所有条件通过
|
|
@@ -843,7 +855,7 @@ class UFSuperCycleEnv(gym.Env):
|
|
|
attempts += 1
|
|
attempts += 1
|
|
|
self.generate_initial_state() # 生成随机初始状态
|
|
self.generate_initial_state() # 生成随机初始状态
|
|
|
if self.check_dead_initial_state(max_steps=getattr(self, "max_episode_steps", 15),
|
|
if self.check_dead_initial_state(max_steps=getattr(self, "max_episode_steps", 15),
|
|
|
- L_s=3800, t_bw_s=60):
|
|
|
|
|
|
|
+ L_s=4900, t_bw_s=50):
|
|
|
# True 表示可行,退出循环
|
|
# True 表示可行,退出循环
|
|
|
break
|
|
break
|
|
|
else:
|
|
else:
|
|
@@ -855,10 +867,13 @@ class UFSuperCycleEnv(gym.Env):
|
|
|
self.last_action = (self.base_params.L_min_s, self.base_params.t_bw_min_s)
|
|
self.last_action = (self.base_params.L_min_s, self.base_params.t_bw_min_s)
|
|
|
self.max_TMP_during_filtration = self.current_params.TMP0
|
|
self.max_TMP_during_filtration = self.current_params.TMP0
|
|
|
|
|
|
|
|
|
|
+ # 记录本episode初始TMP,用于终末奖励
|
|
|
|
|
+ self.TMP0 = self.current_params.TMP0
|
|
|
|
|
+
|
|
|
return self._get_obs(), {}
|
|
return self._get_obs(), {}
|
|
|
|
|
|
|
|
def check_dead_initial_state(self, max_steps: int = None,
|
|
def check_dead_initial_state(self, max_steps: int = None,
|
|
|
- L_s: int = 4900, t_bw_s: int = 50) -> bool:
|
|
|
|
|
|
|
+ L_s: int = 3800, t_bw_s: int = 60) -> bool:
|
|
|
"""
|
|
"""
|
|
|
判断当前环境生成的初始状态是否为可行(non-dead)。
|
|
判断当前环境生成的初始状态是否为可行(non-dead)。
|
|
|
使用最保守策略连续模拟 max_steps 次:
|
|
使用最保守策略连续模拟 max_steps 次:
|
|
@@ -921,7 +936,7 @@ class UFSuperCycleEnv(gym.Env):
|
|
|
ceb_removal = self.current_params.ceb_removal
|
|
ceb_removal = self.current_params.ceb_removal
|
|
|
|
|
|
|
|
# === 4. 从 current_params 动态读取上下限 ===
|
|
# === 4. 从 current_params 动态读取上下限 ===
|
|
|
- TMP0_min, TMP0_max = self.current_params.TMP0_min, self.current_params.TMP0_max
|
|
|
|
|
|
|
+ TMP0_min, TMP0_max = self.current_params.TMP0_min, self.current_params.global_TMP_hard_limit
|
|
|
q_UF_min, q_UF_max = self.current_params.q_UF_min, self.current_params.q_UF_max
|
|
q_UF_min, q_UF_max = self.current_params.q_UF_min, self.current_params.q_UF_max
|
|
|
temp_min, temp_max = self.current_params.temp_min, self.current_params.temp_max
|
|
temp_min, temp_max = self.current_params.temp_min, self.current_params.temp_max
|
|
|
nuK_min, nuK_max = self.current_params.nuK_min, self.current_params.nuK_max
|
|
nuK_min, nuK_max = self.current_params.nuK_min, self.current_params.nuK_max
|
|
@@ -976,21 +991,41 @@ class UFSuperCycleEnv(gym.Env):
|
|
|
feasible = is_dead_cycle(info) # True 表示成功循环,False 表示失败
|
|
feasible = is_dead_cycle(info) # True 表示成功循环,False 表示失败
|
|
|
|
|
|
|
|
if feasible:
|
|
if feasible:
|
|
|
|
|
+ # 每步奖励
|
|
|
reward = calculate_reward(self.current_params, info)
|
|
reward = calculate_reward(self.current_params, info)
|
|
|
self.current_params = next_params
|
|
self.current_params = next_params
|
|
|
terminated = False
|
|
terminated = False
|
|
|
else:
|
|
else:
|
|
|
- reward = -10
|
|
|
|
|
|
|
+ # 中途失败惩罚
|
|
|
|
|
+ reward = -20
|
|
|
terminated = True
|
|
terminated = True
|
|
|
|
|
|
|
|
|
|
+ # 判断是否到达最大步数
|
|
|
truncated = self.current_step >= self.max_episode_steps
|
|
truncated = self.current_step >= self.max_episode_steps
|
|
|
|
|
+
|
|
|
self.last_action = (L_s, t_bw_s)
|
|
self.last_action = (L_s, t_bw_s)
|
|
|
next_obs = self._get_obs()
|
|
next_obs = self._get_obs()
|
|
|
|
|
|
|
|
info["feasible"] = feasible
|
|
info["feasible"] = feasible
|
|
|
info["step"] = self.current_step
|
|
info["step"] = self.current_step
|
|
|
|
|
|
|
|
|
|
+ # ===================== 终末奖励:鼓励 TMP 接近初始状态 =====================
|
|
|
|
|
+ # 仅在 episode 自然结束(满步但未提前失败)时触发
|
|
|
|
|
+ if truncated and not terminated:
|
|
|
|
|
+ TMP_initial = self.TMP0 # reset 时记录的初始 TMP
|
|
|
|
|
+ TMP_final = next_obs[self.obs_index["TMP0"]] # next_obs 提供的最终 TMP
|
|
|
|
|
+
|
|
|
|
|
+ delta_ratio = abs((TMP_final - TMP_initial) / TMP_initial)
|
|
|
|
|
+
|
|
|
|
|
+ alpha = 4.0 # TMP 偏差敏感度
|
|
|
|
|
+ gamma = 2.0 # 奖励幅度
|
|
|
|
|
+ stability_reward = gamma * (np.exp(-alpha * delta_ratio) - 1)
|
|
|
|
|
+
|
|
|
|
|
+ reward += stability_reward
|
|
|
|
|
+ terminated = True # episode 正式结束
|
|
|
|
|
+
|
|
|
return next_obs, reward, terminated, truncated, info
|
|
return next_obs, reward, terminated, truncated, info
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
+
|