detection.py 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613
  1. import os
  2. import re
  3. import pandas as pd
  4. import numpy as np
  5. import matplotlib.pyplot as plt
  6. from matplotlib.font_manager import FontProperties
  7. # -------------------------- 彻底解决字体警告问题 --------------------------
  8. def setup_chinese_font():
  9. try:
  10. # 尝试加载Windows常见中文字体
  11. font = FontProperties(fname="C:/Windows/Fonts/simhei.ttf") # 黑体
  12. plt.rcParams["font.family"] = font.get_name()
  13. except:
  14. try:
  15. # 尝试加载macOS常见中文字体
  16. font = FontProperties(fname="/System/Library/Fonts/PingFang.ttc") # 苹方
  17. plt.rcParams["font.family"] = font.get_name()
  18. except:
  19. try:
  20. # 尝试加载Linux常见中文字体
  21. font = FontProperties(fname="/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf")
  22. plt.rcParams["font.family"] = font.get_name()
  23. except:
  24. # 无中文字体时使用默认英文无衬线字体(无警告)
  25. plt.rcParams["font.family"] = ["DejaVu Sans", "sans-serif"]
  26. plt.rcParams["axes.unicode_minus"] = False # 解决负号显示问题
  27. # 初始化字体配置(无警告)
  28. setup_chinese_font()
  29. # -------------------------- 核心配置:需画图的传感器白名单(新增4个水质传感器) --------------------------
  30. PLOT_WHITELIST = [
  31. # 1. 跨膜压差(4个)
  32. "C.M.UF1_DB@press_PV",
  33. "C.M.UF2_DB@press_PV",
  34. "C.M.UF3_DB@press_PV",
  35. "C.M.UF4_DB@press_PV",
  36. # 2. 一段压差(4个)
  37. "C.M.RO1_DB@DPT_1",
  38. "C.M.RO2_DB@DPT_1",
  39. "C.M.RO3_DB@DPT_1",
  40. "C.M.RO4_DB@DPT_1",
  41. # 3. 二段压差(4个)
  42. "C.M.RO1_DB@DPT_2",
  43. "C.M.RO2_DB@DPT_2",
  44. "C.M.RO3_DB@DPT_2",
  45. "C.M.RO4_DB@DPT_2",
  46. # 4. 总进水电导(1个)
  47. "C.M.RO_Cond_ZJS@out",
  48. # 5. 产水电导(4个)
  49. "C.M.RO1_Cond_CS@out",
  50. "C.M.RO2_Cond_CS@out",
  51. "C.M.RO3_Cond_CS@out",
  52. "C.M.RO4_Cond_CS@out",
  53. # 6. 新增需画图的水质传感器(4个)
  54. "C.M.RO_PH_ZJS@out", # 进水pH:不在6-9为异常
  55. "C.M.UF_Tur_ZJS@out", # 进水浊度:大于10为异常
  56. "C.M.UF_Tur_ZCS@out", # 产水浊度:大于0.15为异常
  57. "C.M.PH_WGS@out" # 外供水pH:不在6-10为异常(已修改阈值)
  58. ]
  59. # -------------------------- 传感器异常检测类 --------------------------
  60. class DirectSensorAnomalyDetector:
  61. """直接传感器异常检测器类:包含压差、电导、水质传感器画图,标准按要求配置"""
  62. def __init__(self, data_dir="datasets_xishan", result_dir="direct_detection_results"):
  63. self.data_dir = data_dir # 数据文件存放目录
  64. self.result_dir = result_dir # 结果保存目录
  65. self.fig_dir = os.path.join(result_dir, "figures") # 图表保存目录
  66. os.makedirs(self.result_dir, exist_ok=True)
  67. os.makedirs(self.fig_dir, exist_ok=True) # 自动创建图表目录
  68. # 核心配置:仅保留有明确阈值的传感器(无阈值的已删除)
  69. self.sensor_config = self._build_sensor_config()
  70. # 存储结果
  71. self.results = {
  72. "raw_data": {}, # 原始数据(仅用于统计)
  73. "valid_data": {}, # 筛选后的有效数据(核心分析数据)
  74. "anomalies": {}, # 异常标记(True=异常)
  75. "threshold_summary": [] # 阈值总结列表
  76. }
  77. def _build_sensor_config(self):
  78. """构建传感器配置:包含所有明确阈值,重点确保新增水质传感器标准正确"""
  79. config = {}
  80. # 1. 超滤(UF)相关传感器 - 跨膜压差标准:超过0.06Mpa为异常
  81. uf_vars = [
  82. "C.M.UF1_DB@press_PV", # 跨膜压差:超过0.06Mpa为异常
  83. "C.M.UF2_DB@press_PV",
  84. "C.M.UF3_DB@press_PV",
  85. "C.M.UF4_DB@press_PV",
  86. "C.M.UF1_FT_JS@out", # 进水流量:低于基准80%或110%-120%
  87. "C.M.UF2_FT_JS@out",
  88. "C.M.UF3_FT_JS@out",
  89. "C.M.UF4_FT_JS@out",
  90. "UF1_FluxF", # 膜通量:49.5-60.5lmh
  91. "UF2_FluxF",
  92. "UF3_FluxF",
  93. "UF4_FluxF",
  94. "UF1Per", # 渗透率:247.5-302.5lmh/bar
  95. "UF2Per",
  96. "UF3Per",
  97. "UF4Per",
  98. "C.M.UF1_DW@press_PV", # 反洗压差:偏离正常±10%-20%
  99. "C.M.UF2_DW@press_PV",
  100. "C.M.UF3_DW@press_PV",
  101. "C.M.UF4_DW@press_PV",
  102. "C.M.UF1_PT_CS@out", # 产水压力:偏离正常±10%-20%
  103. "C.M.UF2_PT_CS@out",
  104. "C.M.UF3_PT_CS@out",
  105. "C.M.UF4_PT_CS@out",
  106. "C.M.UF_FT_ZCS@out", # 总产水流量:偏离正常±10%-20%
  107. "C.M.UF_PT_ZCS@out", # 总产水压力:偏离正常±10%-20%
  108. "C.M.UF_PT_ZJS@out", # 总进水压力:偏离正常±10%-20%
  109. "C.M.UF_FT_FX@out" # 反洗流量:偏离正常±10%-20%
  110. ]
  111. for var in uf_vars:
  112. if "DB@press_PV" in var and "DW" not in var: # 跨膜压差(非反洗)
  113. desc = "跨膜压差:超过0.06Mpa为异常"
  114. elif "FluxF" in var:
  115. desc = "膜运行通量:49.5-60.5lmh"
  116. elif "FT_JS" in var:
  117. desc = "进水流量:低于基准80%或110%-120%"
  118. elif "Per" in var:
  119. desc = "渗透率:247.5-302.5lmh/bar"
  120. else:
  121. desc = "正常范围±10%-20%"
  122. config[var] = {"source": "UFExcel", "desc": desc}
  123. # 2. 反渗透(RO)相关传感器 - 段间压差、电导标准按要求配置
  124. ro_vars = [
  125. "C.M.RO1_DB@DPT_1", # 一段压差:超过0.3Mpa为异常(已修改阈值)
  126. "C.M.RO2_DB@DPT_1",
  127. "C.M.RO3_DB@DPT_1",
  128. "C.M.RO4_DB@DPT_1",
  129. "C.M.RO1_DB@DPT_2", # 二段压差:超过0.3Mpa为异常(已修改阈值)
  130. "C.M.RO2_DB@DPT_2",
  131. "C.M.RO3_DB@DPT_2",
  132. "C.M.RO4_DB@DPT_2",
  133. "C.M.RO1_PT_JS@out", # 一段进水压力:偏离正常±10%-20%
  134. "C.M.RO2_PT_JS@out",
  135. "C.M.RO3_PT_JS@out",
  136. "C.M.RO4_PT_JS@out",
  137. "C.M.RO1_FT_NS@out", # 浓水流量:偏离正常±10%-20%
  138. "C.M.RO2_FT_NS@out",
  139. "C.M.RO3_FT_NS@out",
  140. "C.M.RO4_FT_NS@out",
  141. "C.M.RO_Cond_ZJS@out", # 总进水电导:超过4000μs/cm为异常
  142. "C.M.RO1_Cond_CS@out", # 产水电导:超过250μs/cm为异常
  143. "C.M.RO2_Cond_CS@out",
  144. "C.M.RO3_Cond_CS@out",
  145. "C.M.RO4_Cond_CS@out",
  146. "RO1_FluxF", # 膜通量:19.8-24.2lmh
  147. "RO2_FluxF",
  148. "RO3_FluxF",
  149. "RO4_FluxF",
  150. "RO1HSL", # 回收率:70%-80%
  151. "RO2HSL",
  152. "RO3HSL",
  153. "RO4HSL",
  154. "RO1_TYL", # 脱盐率:≥97%
  155. "RO2_TYL",
  156. "RO3_TYL",
  157. "RO4_TYL",
  158. "C.M.RO_PT_ZCS@out", # 总产水压力:偏离正常±10%-20%
  159. "C.M.RO1_PT_CS@out", # 产水压力:偏离正常±10%-20%
  160. "C.M.RO2_PT_CS@out",
  161. "C.M.RO3_PT_CS@out",
  162. "C.M.RO4_PT_CS@out"
  163. ]
  164. for var in ro_vars:
  165. if "DB@DPT_1" in var or "DB@DPT_2" in var: # 一段/二段压差(已修改阈值)
  166. desc = "段间压差:超过0.3Mpa为异常" # 原阈值0.12Mpa修改为0.3Mpa
  167. elif "Cond_ZJS" in var: # 总进水电导
  168. desc = "总进水电导:超过4000μs/cm为异常"
  169. elif "Cond_CS" in var: # 产水电导
  170. desc = "产水电导:超过250μs/cm为异常"
  171. elif "HSL" in var:
  172. desc = "回收率:70%-80%"
  173. elif "TYL" in var:
  174. desc = "脱盐率:≥97%"
  175. elif "FluxF" in var:
  176. desc = "膜运行通量:19.8-24.2lmh"
  177. else:
  178. desc = "正常范围±10%-20%"
  179. config[var] = {"source": "ROExcel", "desc": desc}
  180. # 3. 水质类传感器 - 重点确保新增4个传感器标准正确
  181. water_quality_vars = {
  182. "C.M.RO_TT_ZJS@out": "水温:5-35℃",
  183. "C.M.RO_PH_ZJS@out": "进水PH:不在6.0-9.0为异常", # 新增画图
  184. "C.M.RO_ORP_ZJS@out": "进水ORP:±300mv",
  185. "C.M.UF_Tur_ZJS@out": "进水浊度:大于10NTU为异常", # 新增画图
  186. "C.M.UF_Tur_ZCS@out": "产水浊度:大于0.15NTU为异常", # 新增画图
  187. "C.M.ZH_PH@out": "中和池PH:6.0-9.0",
  188. "C.M.ZH_ORP@out": "中和池ORP:±300mv",
  189. "C.M.PH_CIP@out": "CIP清洗液PH:酸1.5-3.0/碱10-12.5",
  190. "C.M.PH_WGS@out": "外供水PH:不在6.0-10.0为异常" # 原阈值6.0-9.0修改为6.0-10.0
  191. }
  192. for var, desc in water_quality_vars.items():
  193. config[var] = {"source": "Excel水质规则", "desc": desc}
  194. # 4. 液位类传感器 - 标准不变
  195. level_vars = {
  196. "C.M.LT_JSC@out": "超滤原水池液位:3.0-5.0m",
  197. "C.M.LT_FXSC@out": "反洗水池液位:3.0-5.0m",
  198. "C.M.LT_QSC@out": "清水池液位:3.0-5.0m",
  199. "C.M.LT_ZHC@out": "中和池液位:3.0-5.0m",
  200. "C.M.LT_HCl@out": "盐酸药箱液位:0.2-1.1m",
  201. "C.M.LT_NaOH@out": "氢氧化钠药箱液位:0.2-1.1m",
  202. "C.M.LT_NaClO@out": "次氯酸钠药箱液位:0.2-1.1m",
  203. "C.M.LT_PAC@out": "絮凝剂药箱液位:0.2-1.1m",
  204. "C.M.LT_HYJ1@out": "还原剂药箱液位:0.2-1.1m",
  205. "C.M.LT_HYJ2@out": "还原剂药箱液位:0.2-1.1m",
  206. "C.M.LT_ZGJ@out": "阻垢剂药箱液位:0.2-1.1m",
  207. "C.M.LT_SJJ@out": "杀菌剂药箱液位:0.2-1.1m"
  208. }
  209. for var, desc in level_vars.items():
  210. config[var] = {"source": "Excel液位规则", "desc": desc}
  211. # 5. 泵/风机频率传感器 - 标准不变
  212. frequency_vars = [
  213. "C.M.UF_GSB1_fre@out", # 运行频率:35-65Hz
  214. "C.M.UF_GSB2_fre@out",
  215. "C.M.UF_GSB3_fre@out",
  216. "C.M.UF_GSB4_fre@out",
  217. "C.M.UF_FXB1_fre@out",
  218. "C.M.UF_FXB2_fre@out",
  219. "C.M.UF_FXB3_fre@out",
  220. "C.M.RO_GYB1_fre@out",
  221. "C.M.RO_GYB2_fre@out",
  222. "C.M.RO_GYB3_fre@out",
  223. "C.M.RO_GYB4_fre@out",
  224. "C.M.RO_DJB1_fre@out",
  225. "C.M.RO_DJB2_fre@out",
  226. "C.M.RO_DJB3_fre@out",
  227. "C.M.RO_DJB4_fre@out",
  228. "C.M.RO_WGB1_fre@out",
  229. "C.M.RO_WGB2_fre@out",
  230. "C.M.RO_WGB3_fre@out",
  231. "C.M.CIP_QXB1_fre@out",
  232. "C.M.CIP_QXB2_fre@out",
  233. "C.M.JYB2_ZGJ1_fre@out",
  234. "C.M.JYB2_ZGJ2_fre@out",
  235. "C.M.JYB2_ZGJ3_fre@out",
  236. "C.M.JYB2_ZGJ4_fre@out"
  237. ]
  238. for var in frequency_vars:
  239. config[var] = {
  240. "source": "设备默认规则",
  241. "desc": "运行频率:35-65Hz"
  242. }
  243. # 6. 补充异常规则变量 - 标准不变
  244. supplemental_vars = {
  245. "water_in": "超滤总进水量:≥600",
  246. "C.M.FT_ZJS@out": "总进水流量:≥600",
  247. "RO_TCHFlow": "RO总产水流量:≥550",
  248. "RO1_CSFlow": "RO1产水流量:≥150",
  249. "RO2_CSFlow": "RO2产水流量:≥150",
  250. "RO3_CSFlow": "RO3产水流量:≥150",
  251. "RO4_CSFlow": "RO4产水流量:≥150",
  252. "C.M.UF_ORP_ZCS@out": "超滤总产水ORP:≤100",
  253. "C.M.UF_PH_ZCS@out": "超滤总产水PH:6.0-9.0",
  254. "C.M.PT_KYJ@out": "压缩空气压力:≥0.5",
  255. "C.M.UF_Cl_ZCS@out": "超滤总产水余氯:≤0",
  256. "RO_TotalFlow": "RO总进水流量:≥700",
  257. "QSWGB_Flow": "清水外供泵出水流量:≥200"
  258. }
  259. for var, desc in supplemental_vars.items():
  260. config[var] = {"source": "补充规则", "desc": desc}
  261. return config
  262. def filter_valid_data(self, raw_data):
  263. """筛选有效数据:剔除NaN、负值、无穷大(无效数据)"""
  264. valid_data = np.where(
  265. (raw_data > 0) & (~np.isnan(raw_data)) & (~np.isinf(raw_data)),
  266. raw_data,
  267. np.nan # 无效数据标记为NaN(画图时自动不显示)
  268. )
  269. return valid_data
  270. def _parse_threshold(self, desc):
  271. """解析阈值描述:支持超过阈值、范围、小于阈值等所有场景"""
  272. # 处理"超过X为异常"格式(跨膜压差、段间压差、电导)
  273. over_match = re.search(r'超过(\d+\.?\d*)', desc)
  274. if over_match:
  275. return ("over", float(over_match.group(1)))
  276. # 处理"不在X-Y为异常"格式(pH值)
  277. not_range_match = re.search(r'不在(\d+\.?\d*)-(\d+\.?\d*)', desc)
  278. if not_range_match:
  279. return ("not_range", float(not_range_match.group(1)), float(not_range_match.group(2)))
  280. # 处理"大于X为异常"格式(浊度)
  281. gt_match = re.search(r'大于(\d+\.?\d*)', desc)
  282. if gt_match:
  283. return ("gt", float(gt_match.group(1)))
  284. # 处理范围格式(A-B)
  285. range_match = re.search(r'(\d+\.?\d*)-(\d+\.?\d*)', desc)
  286. if range_match:
  287. return (float(range_match.group(1)), float(range_match.group(2)))
  288. # 处理"<X"格式
  289. lt_match = re.search(r'<(\d+\.?\d*)', desc)
  290. if lt_match:
  291. return (0.0, float(lt_match.group(1)) - 1e-9)
  292. # 处理"≥X"格式
  293. ge_match = re.search(r'≥(\d+\.?\d*)', desc)
  294. if ge_match:
  295. return (float(ge_match.group(1)), float('inf'))
  296. # 处理"≤X"格式
  297. le_match = re.search(r'≤(\d+\.?\d*)', desc)
  298. if le_match:
  299. return (float('-inf'), float(le_match.group(1)))
  300. # 处理"±X"格式
  301. pm_match = re.search(r'±(\d+\.?\d*)', desc)
  302. if pm_match:
  303. val = float(pm_match.group(1))
  304. return (-val, val)
  305. # 处理双范围(酸A-B/碱C-D)
  306. double_range_match = re.search(r'酸(\d+\.?\d*)-(\d+\.?\d*)/碱(\d+\.?\d*)-(\d+\.?\d*)', desc)
  307. if double_range_match:
  308. a, b, c, d = map(float, double_range_match.groups())
  309. return ((a, b), (c, d))
  310. # 处理百分比范围(低于基准80%或110%-120%)
  311. if "低于基准80%" in desc and "110%-120%" in desc:
  312. return ("baseline_80_110_120",)
  313. # 处理±10%-20%
  314. if "±10%" in desc or "±20%" in desc:
  315. return ("percent_range", desc)
  316. return (None, None)
  317. def rule_based_detect(self, var_name, valid_data):
  318. """基于规则的异常检测:支持新增水质传感器的异常逻辑"""
  319. config = self.sensor_config[var_name]
  320. desc = config["desc"]
  321. thresholds = self._parse_threshold(desc)
  322. # 跳过无效数据(NaN)
  323. valid_mask = ~np.isnan(valid_data)
  324. anomalies = np.zeros_like(valid_data, dtype=bool)
  325. if not np.any(valid_mask):
  326. print(f" 变量 {var_name} 无有效数据,跳过检测")
  327. return anomalies
  328. # 1. 超过X为异常(跨膜压差、段间压差、电导)
  329. if isinstance(thresholds, tuple) and thresholds[0] == "over":
  330. threshold_val = thresholds[1]
  331. anomalies[valid_mask] = valid_data[valid_mask] > threshold_val
  332. # 2. 不在X-Y为异常(pH值)
  333. elif isinstance(thresholds, tuple) and thresholds[0] == "not_range":
  334. low, high = thresholds[1], thresholds[2]
  335. anomalies[valid_mask] = (valid_data[valid_mask] < low) | (valid_data[valid_mask] > high)
  336. # 3. 大于X为异常(浊度)
  337. elif isinstance(thresholds, tuple) and thresholds[0] == "gt":
  338. threshold_val = thresholds[1]
  339. anomalies[valid_mask] = valid_data[valid_mask] > threshold_val
  340. # 4. 双范围处理(CIP清洗液PH)
  341. elif isinstance(thresholds, tuple) and len(thresholds) == 2 and all(isinstance(t, tuple) for t in thresholds):
  342. (a, b), (c, d) = thresholds
  343. normal = ((valid_data >= a) & (valid_data <= b)) | ((valid_data >= c) & (valid_data <= d))
  344. anomalies[valid_mask] = ~normal[valid_mask]
  345. # 5. 基准百分比范围(低于80%或110%-120%)
  346. elif thresholds == ("baseline_80_110_120",):
  347. baseline = np.percentile(valid_data[valid_mask], 50)
  348. low_threshold = baseline * 0.8
  349. high_low = baseline * 1.1
  350. high_high = baseline * 1.2
  351. anomalies[valid_mask] = (valid_data[valid_mask] < low_threshold) | \
  352. ((valid_data[valid_mask] >= high_low) & (valid_data[valid_mask] <= high_high))
  353. # 6. 百分比范围(±10%-20%)
  354. elif isinstance(thresholds, tuple) and thresholds[0] == "percent_range":
  355. percent = 0.1 if "±10%" in config["desc"] else 0.2
  356. mean_val = np.mean(valid_data[valid_mask])
  357. low = mean_val * (1 - percent)
  358. high = mean_val * (1 + percent)
  359. anomalies[valid_mask] = (valid_data[valid_mask] < low) | (valid_data[valid_mask] > high)
  360. # 7. 普通范围处理
  361. elif thresholds[0] is not None and thresholds[1] is not None:
  362. lower, upper = thresholds
  363. anomalies[valid_mask] = (valid_data[valid_mask] < lower) | (valid_data[valid_mask] > upper)
  364. return anomalies
  365. def load_raw_data(self):
  366. """加载所有CSV数据,仅保留有明确阈值的传感器"""
  367. file_paths = [os.path.join(self.data_dir, f"data_process_{i}.csv") for i in range(1, 105)]
  368. existing_files = [p for p in file_paths if os.path.exists(p)]
  369. if not existing_files:
  370. raise FileNotFoundError(f" 未在 {self.data_dir} 目录找到数据文件(需命名为data_process_1~104.csv)")
  371. print(f"找到 {len(existing_files)} 个数据文件,加载中...")
  372. for file in existing_files:
  373. try:
  374. df = pd.read_csv(file).iloc[:, 1:] # 剔除第一列时间列
  375. for var_name in self.sensor_config.keys():
  376. if var_name not in df.columns:
  377. continue
  378. # 合并原始数据
  379. raw_data = df[var_name].values
  380. if var_name not in self.results["raw_data"]:
  381. self.results["raw_data"][var_name] = []
  382. self.results["raw_data"][var_name].append(raw_data)
  383. except Exception as e:
  384. print(f" 加载文件 {os.path.basename(file)} 出错:{str(e)},跳过")
  385. # 转换为numpy数组
  386. for var_name in self.results["raw_data"]:
  387. self.results["raw_data"][var_name] = np.concatenate(self.results["raw_data"][var_name])
  388. def plot_anomalies(self, var_name):
  389. """画图:有效数据+异常数据+阈值线(适配所有新增传感器类型)"""
  390. valid_data = self.results["valid_data"][var_name]
  391. anomalies = self.results["anomalies"][var_name]
  392. config = self.sensor_config[var_name]
  393. # 生成时间轴(按数据点顺序,单位:小时)
  394. time = np.arange(len(valid_data)) / 60 # 假设1分钟1个数据点,转换为小时
  395. # 筛选有效数据和异常数据的索引
  396. valid_mask = ~np.isnan(valid_data)
  397. anomaly_mask = anomalies & valid_mask
  398. # 创建图表
  399. plt.figure(figsize=(12, 6))
  400. # 1. 绘制有效数据(蓝色实线)
  401. plt.plot(time[valid_mask], valid_data[valid_mask], 'b-', alpha=0.8, label='有效数据')
  402. # 2. 绘制异常数据(红色圆点,突出显示)- 不显示异常点个数
  403. if np.any(anomaly_mask):
  404. plt.scatter(time[anomaly_mask], valid_data[anomaly_mask],
  405. color='red', s=20, zorder=5, label='异常点') # 修改此处,去掉个数统计
  406. # 3. 绘制阈值线(根据阈值类型适配)
  407. thresholds = self._parse_threshold(config["desc"])
  408. if isinstance(thresholds, tuple):
  409. # 超过X为异常(红色虚线)
  410. if thresholds[0] == "over":
  411. threshold_val = thresholds[1]
  412. plt.axhline(y=threshold_val, color='red', linestyle='--', alpha=0.8,
  413. label=f'异常阈值:{threshold_val}(超过为异常)')
  414. # 不在X-Y为异常(绿色虚线:上下限)
  415. elif thresholds[0] == "not_range":
  416. low, high = thresholds[1], thresholds[2]
  417. plt.axhline(y=low, color='green', linestyle='--', alpha=0.8, label=f'正常下限:{low}')
  418. plt.axhline(y=high, color='green', linestyle='--', alpha=0.8, label=f'正常上限:{high}')
  419. plt.fill_between(time, low, high, alpha=0.1, color='green', label='正常范围')
  420. # 大于X为异常(红色虚线)
  421. elif thresholds[0] == "gt":
  422. threshold_val = thresholds[1]
  423. plt.axhline(y=threshold_val, color='red', linestyle='--', alpha=0.8,
  424. label=f'异常阈值:{threshold_val}(大于为异常)')
  425. # 双范围(CIP清洗液PH)
  426. elif len(thresholds) == 2 and all(isinstance(t, tuple) for t in thresholds):
  427. (a, b), (c, d) = thresholds
  428. plt.axhline(y=a, color='g', linestyle='--', alpha=0.6, label=f'酸下限:{a}')
  429. plt.axhline(y=b, color='g', linestyle='--', alpha=0.6, label=f'酸上限:{b}')
  430. plt.axhline(y=c, color='g', linestyle='--', alpha=0.6, label=f'碱下限:{c}')
  431. plt.axhline(y=d, color='g', linestyle='--', alpha=0.6, label=f'碱上限:{d}')
  432. # 普通范围
  433. elif thresholds[0] is not None and thresholds[1] is not None:
  434. lower, upper = thresholds
  435. if lower != float('-inf'):
  436. plt.axhline(y=lower, color='g', linestyle='--', alpha=0.6, label=f'正常下限:{lower}')
  437. if upper != float('inf'):
  438. plt.axhline(y=upper, color='g', linestyle='--', alpha=0.6, label=f'正常上限:{upper}')
  439. # 百分比范围
  440. elif thresholds[0] == "percent_range":
  441. percent = 0.1 if "±10%" in config["desc"] else 0.2
  442. mean_val = np.mean(valid_data[valid_mask])
  443. low = mean_val * (1 - percent)
  444. high = mean_val * (1 + percent)
  445. plt.axhline(y=low, color='g', linestyle='--', alpha=0.6, label=f'正常下限:{low:.2f}')
  446. plt.axhline(y=high, color='g', linestyle='--', alpha=0.6, label=f'正常上限:{high:.2f}')
  447. # 图表美化
  448. plt.title(f'{var_name}\n异常标准:{config["desc"]}', fontsize=12, pad=20)
  449. plt.xlabel('时间(小时)', fontsize=10)
  450. plt.ylabel('数值', fontsize=10)
  451. plt.legend(loc='upper right', fontsize=9)
  452. plt.grid(alpha=0.3)
  453. plt.tight_layout()
  454. # 保存图表
  455. fig_path = os.path.join(self.fig_dir, f'{var_name}_异常检测图.png')
  456. plt.savefig(fig_path, dpi=300, bbox_inches='tight', facecolor='white')
  457. plt.close()
  458. print(f" 图表保存:{fig_path}")
  459. def run_detection(self):
  460. """完整检测流程:加载数据→筛选有效数据→异常检测→指定传感器画图→生成报告"""
  461. # 1. 加载原始数据
  462. self.load_raw_data()
  463. # 2. 逐传感器处理
  464. processed_count = 0
  465. abnormal_count = 0
  466. plotted_count = 0 # 统计画图的传感器数量
  467. for var_name, config in self.sensor_config.items():
  468. if var_name not in self.results["raw_data"]:
  469. print(f"变量 {var_name} 无数据,跳过")
  470. continue
  471. print(f"\n 处理传感器:{var_name}")
  472. raw_data = self.results["raw_data"][var_name]
  473. # 3. 筛选有效数据
  474. valid_data = self.filter_valid_data(raw_data)
  475. self.results["valid_data"][var_name] = valid_data
  476. valid_count = np.sum(~np.isnan(valid_data))
  477. print(f" 有效数据量:{valid_count}/{len(raw_data)}")
  478. if valid_count == 0:
  479. continue
  480. # 4. 异常检测
  481. anomalies = self.rule_based_detect(var_name, valid_data)
  482. self.results["anomalies"][var_name] = anomalies
  483. processed_count += 1
  484. # 统计异常
  485. anomaly_count = np.sum(anomalies)
  486. anomaly_ratio = (anomaly_count / valid_count) * 100 if valid_count > 0 else 0.0
  487. if anomaly_count > 0:
  488. abnormal_count += 1
  489. print(f" 异常数据:{anomaly_count} 条(占比 {anomaly_ratio:.2f}%)")
  490. else:
  491. print(f" 无异常数据")
  492. # 5. 仅对白名单中的传感器画图(包含新增的4个水质传感器)
  493. if var_name in PLOT_WHITELIST:
  494. self.plot_anomalies(var_name)
  495. plotted_count += 1
  496. # 记录总结
  497. self.results["threshold_summary"].append({
  498. "传感器名称": var_name.split("@")[0] if "@" in var_name else var_name,
  499. "变量标识": var_name,
  500. "阈值来源": config["source"],
  501. "异常标准": config["desc"],
  502. "原始数据总量": len(raw_data),
  503. "有效数据量": valid_count,
  504. "异常数据量": anomaly_count,
  505. "异常比例(%)": round(anomaly_ratio, 2),
  506. "是否画图": "是" if var_name in PLOT_WHITELIST else "否"
  507. })
  508. # 6. 生成总结报告
  509. self._generate_summary_report(processed_count, abnormal_count, plotted_count)
  510. return self.results
  511. def _generate_summary_report(self, processed_count, abnormal_count, plotted_count):
  512. """生成检测总结报告"""
  513. summary_df = pd.DataFrame(self.results["threshold_summary"])
  514. summary_path = os.path.join(self.result_dir, "传感器异常检测总结报告.csv")
  515. summary_df.to_csv(summary_path, index=False, encoding="utf-8-sig")
  516. # 打印汇总信息
  517. total_sensors = len(self.sensor_config)
  518. print(f"\n" + "="*60)
  519. print(f" 检测总结")
  520. print(f"="*60)
  521. print(f"总传感器数量(有明确阈值):{total_sensors}")
  522. print(f"成功处理的传感器:{processed_count}")
  523. print(f"存在异常的传感器:{abnormal_count}")
  524. print(f"生成图表的传感器:{plotted_count}(压差类+电导类+水质类)")
  525. print(f" - 跨膜压差:4个")
  526. print(f" - 一段/二段压差:8个")
  527. print(f" - 电导类:5个")
  528. print(f" - 水质类(pH+浊度):4个")
  529. print(f"结果目录:{os.path.abspath(self.result_dir)}")
  530. print(f"图表目录:{os.path.abspath(self.fig_dir)}")
  531. print(f"总结报告:{os.path.abspath(summary_path)}")
  532. print(f"="*60)
  533. if __name__ == "__main__":
  534. try:
  535. # 初始化检测器并执行
  536. detector = DirectSensorAnomalyDetector(
  537. data_dir="datasets_xishan", # 数据目录(需自行确保存在)
  538. result_dir="direct_detection_results" # 结果保存目录
  539. )
  540. detector.run_detection()
  541. print("\n 检测完成!已生成压差、电导、水质传感器的异常检测图表")
  542. except Exception as e:
  543. print(f"\n 检测出错:{str(e)}")