2 nedēļas atpakaļ · 33c6123101
--- a/README.md
+++ b/README.md
@@ -146,3 +146,15 @@ deploy_pickup/
 
				 ├── tool/migrate_yaml_to_db.py # YAML → DB 迁移
			
 
				 └── data/                      # 运行时音频
			
 
				 ```
			
 
				+# 启用 NPU 推理步骤
			
 
				+# 未来在 BM1684X 服务器上启用时：
			
 
				+# 1. 导出 ONNX
			
 
				+# python tool/convert_to_bmodel.py --all
			
 
				+# 2. 安装 TPU-MLIR 后生成 BModel
			
 
				+# python tool/convert_to_bmodel.py --all --with-bmodel --quantize fp16
			
 
				+# 3. 取消 multi_model_predictor.py 中的注释
			
 
				+#    - import BM1684XEngine, is_bm1684x_available
			
 
				+#    - self.bm_engine = self._load_bmodel()
			
 
				+#    - _load_bmodel() 方法
			
 
				+# 4. 修改 _compute_reconstruction_error() 中
			
 
				+#    判断 device_predictor.bm_engine 是否存在，优先调用 NPU 推理
			
--- a/auto_training/data_cleanup.py
+++ b/auto_training/data_cleanup.py
@@ -24,10 +24,18 @@ logger = logging.getLogger('DataCleanup')
 
				 class DataCleaner:
			
 
				     """数据清理器"""
			
 
				     
			
 
				-    def __init__(self, config_file: Path):
			
 
				-        """初始化清理器"""
			
 
				-        self.config_file = config_file
			
 
				-        self.config = self._load_config()
			
 
				+    def __init__(self, config_file: Path = None, config: dict = None):
			
 
				+        # 支持两种初始化方式：
			
 
				+        # 1. 传 config dict（从数据库读取后直接传入，主程序使用）
			
 
				+        # 2. 传 config_file YAML 路径（命令行独立运行使用）
			
 
				+        if config is not None:
			
 
				+            self.config = config
			
 
				+            self.config_file = None
			
 
				+        elif config_file is not None:
			
 
				+            self.config_file = config_file
			
 
				+            self.config = self._load_config()
			
 
				+        else:
			
 
				+            raise ValueError("必须提供 config_file 或 config 之一")
			
 
				         
			
 
				         # 路径配置
			
 
				         self.deploy_root = Path(__file__).parent.parent
			
@@ -37,7 +45,7 @@ class DataCleaner:
 
				         self.logs_dir = self.deploy_root / "logs"
			
 
				     
			
 
				     def _load_config(self):
			
 
				-        """加载配置"""
			
 
				+        # 从 YAML 文件加载配置（仅 config_file 模式使用）
			
 
				         with open(self.config_file, 'r', encoding='utf-8') as f:
			
 
				             return yaml.safe_load(f)
			
 
				     
			
@@ -60,12 +68,14 @@ class DataCleaner:
 
				                 continue
			
 
				             
			
 
				             for date_dir in device_dir.iterdir():
			
 
				-                if not date_dir.is_dir() or date_dir.name == "current":
			
 
				+                # current: 正在写入的目录; verified_normal: 核查确认的正常音频（增训用）
			
 
				+                if not date_dir.is_dir() or date_dir.name in ("current", "verified_normal"):
			
 
				                     continue
			
 
				                 
			
 
				                 # 检查日期
			
 
				                 if date_dir.name < cutoff_date:
			
 
				                     if date_dir.exists():
			
 
				+                        # rglob 递归统计所有子目录（normal/ + pump_transition/）中的音频
			
 
				                         for f in date_dir.rglob("*.wav"):
			
 
				                             total_size += f.stat().st_size
			
 
				                             total_deleted += 1
			
--- a/auto_training/incremental_trainer.py
+++ b/auto_training/incremental_trainer.py
@@ -57,15 +57,18 @@ class IncrementalTrainer:
 
				     2. 增量训练：使用运行中采集的数据，对已有模型微调（兼容旧逻辑）
			
 
				     """
			
 
				 
			
 
				-    def __init__(self, config_file: Path):
			
 
				-        """
			
 
				-        初始化训练器
			
 
				-
			
 
				-        Args:
			
 
				-            config_file: auto_training.yaml 配置文件路径
			
 
				-        """
			
 
				-        self.config_file = config_file
			
 
				-        self.config = self._load_config()
			
 
				+    def __init__(self, config_file: Path = None, config: dict = None):
			
 
				+        # 支持两种初始化方式：
			
 
				+        # 1. 传 config dict（从数据库读取后直接传入，主程序使用）
			
 
				+        # 2. 传 config_file YAML 路径（standalone_train.py 等独立工具使用）
			
 
				+        if config is not None:
			
 
				+            self.config = config
			
 
				+            self.config_file = None
			
 
				+        elif config_file is not None:
			
 
				+            self.config_file = config_file
			
 
				+            self.config = self._load_config()
			
 
				+        else:
			
 
				+            raise ValueError("必须提供 config_file 或 config 之一")
			
 
				 
			
 
				         # 路径配置
			
 
				         self.deploy_root = Path(__file__).parent.parent
			
@@ -88,7 +91,7 @@ class IncrementalTrainer:
 
				         self.cold_start_mode = False
			
 
				 
			
 
				     def _load_config(self) -> Dict:
			
 
				-        # 加载配置文件
			
 
				+        # 从 YAML 文件加载配置（仅 config_file 模式使用）
			
 
				         with open(self.config_file, 'r', encoding='utf-8') as f:
			
 
				             return yaml.safe_load(f)
			
 
				 
			
@@ -192,30 +195,43 @@ class IncrementalTrainer:
 
				             device_code = device_dir.name
			
 
				             audio_files = []
			
 
				 
			
 
				-            # 冷启动模式：收集所有已归档日期目录的数据（跳过 current/）
			
 
				+            # 冷启动模式：收集所有已归档日期目录的正常音频（跳过 current/）
			
 
				             if self.cold_start_mode:
			
 
				                 # 注意：跳过 current/ 目录，因其中可能包含 FFmpeg 正在写入的不完整文件
			
 
				                 for sub_dir in device_dir.iterdir():
			
 
				                     if sub_dir.is_dir() and sub_dir.name.isdigit() and len(sub_dir.name) == 8:
			
 
				+                        # 新结构：从 {date}/normal/ 子目录读取
			
 
				+                        normal_dir = sub_dir / "normal"
			
 
				+                        if normal_dir.exists():
			
 
				+                            audio_files.extend(list(normal_dir.glob("*.wav")))
			
 
				+                            audio_files.extend(list(normal_dir.glob("*.mp4")))
			
 
				+                        # 兼容旧结构：日期目录下直接存放的音频文件
			
 
				                         audio_files.extend(list(sub_dir.glob("*.wav")))
			
 
				                         audio_files.extend(list(sub_dir.glob("*.mp4")))
			
 
				             else:
			
 
				-                # 正常模式：只收集指定日期的目录
			
 
				+                # 正常模式：只收集指定日期的正常音频
			
 
				                 date_dir = device_dir / target_date
			
 
				                 if date_dir.exists():
			
 
				+                    # 新结构：从 {date}/normal/ 子目录读取
			
 
				+                    normal_dir = date_dir / "normal"
			
 
				+                    if normal_dir.exists():
			
 
				+                        audio_files.extend(list(normal_dir.glob("*.wav")))
			
 
				+                        audio_files.extend(list(normal_dir.glob("*.mp4")))
			
 
				+                    # 兼容旧结构：日期目录下直接存放的音频文件
			
 
				                     audio_files.extend(list(date_dir.glob("*.wav")))
			
 
				                     audio_files.extend(list(date_dir.glob("*.mp4")))
			
 
				 
			
 
				-            # 加上 verified_normal 目录
			
 
				+            # 加上 verified_normal 目录（单独收集，不参与采样和质量预筛）
			
 
				             verified_dir = device_dir / "verified_normal"
			
 
				+            verified_files = []
			
 
				             if verified_dir.exists():
			
 
				-                audio_files.extend(list(verified_dir.glob("*.wav")))
			
 
				-                audio_files.extend(list(verified_dir.glob("*.mp4")))
			
 
				+                verified_files.extend(list(verified_dir.glob("*.wav")))
			
 
				+                verified_files.extend(list(verified_dir.glob("*.mp4")))
			
 
				 
			
 
				-            # 去重
			
 
				+            # 去重（仅日期目录音频）
			
 
				             audio_files = list(set(audio_files))
			
 
				 
			
 
				-            # 数据质量预筛：过滤能量/频谱异常的音频
			
 
				+            # 数据质量预筛：仅对日期目录音频过滤，verified_normal 已经人工确认，跳过
			
 
				             if audio_files and not self.cold_start_mode:
			
 
				                 before_count = len(audio_files)
			
 
				                 audio_files = self._filter_audio_quality(audio_files, device_code)
			
@@ -223,7 +239,7 @@ class IncrementalTrainer:
 
				                 if filtered > 0:
			
 
				                     logger.info(f"  {device_code}: 质量预筛过滤 {filtered} 个异常音频")
			
 
				 
			
 
				-            # 随机采样（如果配置了采样时长）
			
 
				+            # 随机采样（仅对日期目录音频采样，verified_normal 不参与）
			
 
				             if sample_hours > 0 and audio_files:
			
 
				                 files_needed = int(sample_hours * 3600 / 60)
			
 
				                 if len(audio_files) > files_needed:
			
@@ -234,6 +250,11 @@ class IncrementalTrainer:
 
				             else:
			
 
				                 logger.info(f"  {device_code}: {len(audio_files)} 个音频")
			
 
				 
			
 
				+            # 合并 verified_normal（采样后追加，保证全量参与训练）
			
 
				+            if verified_files:
			
 
				+                audio_files.extend(verified_files)
			
 
				+                logger.info(f"  {device_code}: +{len(verified_files)} 个核查确认音频（verified_normal）")
			
 
				+
			
 
				             if audio_files:
			
 
				                 device_files[device_code] = audio_files
			
 
				 
			
@@ -302,8 +323,7 @@ class IncrementalTrainer:
 
				     # ========================================
			
 
				 
			
 
				     def _extract_mel_for_device(self, device_code: str,
			
 
				-                                wav_files: List[Path],
			
 
				-                                inherit_scale: bool = False
			
 
				+                                wav_files: List[Path]
			
 
				                                 ) -> Tuple[Optional[Path], Optional[Tuple[float, float]]]:
			
 
				         """
			
 
				         为单个设备提取 Mel 特征并计算独立的 Min-Max 标准化参数
			
@@ -315,7 +335,6 @@ class IncrementalTrainer:
 
				         Args:
			
 
				             device_code: 设备编码
			
 
				             wav_files: 该设备的音频文件列表
			
 
				-            inherit_scale: 增量训练时是否继承已部署的 scale 参数
			
 
				 
			
 
				         Returns:
			
 
				             (mel_dir, (global_min, global_max))，失败返回 (None, None)
			
@@ -369,18 +388,6 @@ class IncrementalTrainer:
 
				             logger.warning(f"  {device_code}: 无有效数据")
			
 
				             return None, None
			
 
				 
			
 
				-        # 增量训练时，用已部署的 scale 做 EMA 平滑，避免剧烈偏移
			
 
				-        if inherit_scale:
			
 
				-            old_scale = self._load_deployed_scale(device_code)
			
 
				-            if old_scale is not None:
			
 
				-                ema_alpha = 0.3  # 新数据权重
			
 
				-                old_min, old_max = old_scale
			
 
				-                global_min = ema_alpha * global_min + (1 - ema_alpha) * old_min
			
 
				-                global_max = ema_alpha * global_max + (1 - ema_alpha) * old_max
			
 
				-                logger.info(f"  {device_code}: scale EMA 融合 | "
			
 
				-                            f"old=[{old_min:.4f}, {old_max:.4f}] → "
			
 
				-                            f"new=[{global_min:.4f}, {global_max:.4f}]")
			
 
				-
			
 
				         logger.info(f"  {device_code}: {patch_count} patches | "
			
 
				                     f"min={global_min:.4f} max={global_max:.4f}")
			
 
				 
			
@@ -396,20 +403,7 @@ class IncrementalTrainer:
 
				 
			
 
				         return device_mel_dir, (global_min, global_max)
			
 
				 
			
 
				-    def _load_deployed_scale(self, device_code: str) -> Optional[Tuple[float, float]]:
			
 
				-        """加载已部署的 global_scale.npy，用于增量训练时的 scale 继承"""
			
 
				-        scale_path = self.model_root / device_code / "global_scale.npy"
			
 
				-        if not scale_path.exists():
			
 
				-            return None
			
 
				-        try:
			
 
				-            scale = np.load(scale_path)
			
 
				-            return float(scale[0]), float(scale[1])
			
 
				-        except Exception as e:
			
 
				-            logger.warning(f"加载旧 scale 失败: {device_code} | {e}")
			
 
				-            return None
			
 
				-
			
 
				-    def prepare_mel_features_per_device(self, device_files: Dict[str, List[Path]],
			
 
				-                                        inherit_scale: bool = False
			
 
				+    def prepare_mel_features_per_device(self, device_files: Dict[str, List[Path]]
			
 
				                                         ) -> Dict[str, Tuple[Path, Tuple[float, float]]]:
			
 
				         """
			
 
				         为每个设备独立提取 Mel 特征
			
@@ -418,7 +412,6 @@ class IncrementalTrainer:
 
				 
			
 
				         Args:
			
 
				             device_files: {device_code: [wav_files]}
			
 
				-            inherit_scale: 增量训练时传 True，将新旧 scale 做 EMA 融合
			
 
				 
			
 
				         Returns:
			
 
				             {device_code: (mel_dir, (global_min, global_max))}
			
@@ -433,9 +426,7 @@ class IncrementalTrainer:
 
				         device_results = {}
			
 
				 
			
 
				         for device_code, wav_files in device_files.items():
			
 
				-            mel_dir, scale = self._extract_mel_for_device(
			
 
				-                device_code, wav_files, inherit_scale=inherit_scale
			
 
				-            )
			
 
				+            mel_dir, scale = self._extract_mel_for_device(device_code, wav_files)
			
 
				             if mel_dir is not None:
			
 
				                 device_results[device_code] = (mel_dir, scale)
			
 
				 
			
@@ -451,8 +442,8 @@ class IncrementalTrainer:
 
				     # ========================================
			
 
				 
			
 
				     def _select_training_device(self) -> torch.device:
			
 
				-        # 智能选择训练设备：GPU/NPU 显存充足则使用，否则回退 CPU
			
 
				-        # 训练配置中可通过 training_device 强制指定 (auto/cpu/cuda/npu)
			
 
				+        # 智能选择训练设备：GPU 显存充足则使用，否则回退 CPU
			
 
				+        # 训练配置中可通过 training_device 强制指定 (auto/cpu/cuda)
			
 
				         training_cfg = self.config['auto_training']['incremental']
			
 
				         forced_device = training_cfg.get('training_device', 'auto')
			
 
				 
			
@@ -465,14 +456,8 @@ class IncrementalTrainer:
 
				                 return torch.device('cuda')
			
 
				             logger.warning("配置指定 CUDA 但不可用，回退 CPU")
			
 
				             return torch.device('cpu')
			
 
				-        if forced_device == 'npu':
			
 
				-            if self._npu_available():
			
 
				-                return torch.device('npu')
			
 
				-            logger.warning("配置指定 NPU 但不可用，回退 CPU")
			
 
				-            return torch.device('cpu')
			
 
				 
			
 
				-        # auto 模式：依次检测 CUDA → NPU → CPU
			
 
				-        # 1. 检测 CUDA
			
 
				+        # auto 模式：检测 CUDA → CPU
			
 
				         if torch.cuda.is_available():
			
 
				             try:
			
 
				                 free_mem = torch.cuda.mem_get_info()[0] / (1024 * 1024)
			
@@ -486,32 +471,9 @@ class IncrementalTrainer:
 
				             except Exception as e:
			
 
				                 logger.warning(f"CUDA 显存检测失败: {e}")
			
 
				 
			
 
				-        # 2. 检测 NPU (华为昇腾)
			
 
				-        if self._npu_available():
			
 
				-            try:
			
 
				-                free_mem = torch.npu.mem_get_info()[0] / (1024 * 1024)
			
 
				-                min_gpu_mem_mb = training_cfg.get('min_gpu_mem_mb', 512)
			
 
				-                if free_mem >= min_gpu_mem_mb:
			
 
				-                    logger.info(f"训练设备: NPU（空闲显存 {free_mem:.0f}MB）")
			
 
				-                    return torch.device('npu')
			
 
				-                logger.info(
			
 
				-                    f"NPU 空闲显存不足 ({free_mem:.0f}MB < {min_gpu_mem_mb}MB)"
			
 
				-                )
			
 
				-            except Exception as e:
			
 
				-                logger.warning(f"NPU 显存检测失败: {e}，回退 CPU")
			
 
				-
			
 
				         logger.info("训练设备: CPU")
			
 
				         return torch.device('cpu')
			
 
				 
			
 
				-    @staticmethod
			
 
				-    def _npu_available() -> bool:
			
 
				-        """检查华为昇腾 NPU 是否可用"""
			
 
				-        try:
			
 
				-            import torch_npu  # noqa: F401
			
 
				-            return torch.npu.is_available()
			
 
				-        except ImportError:
			
 
				-            return False
			
 
				-
			
 
				     def _run_training_loop(self, device_code: str, model: nn.Module,
			
 
				                            train_loader, val_loader, epochs: int, lr: float,
			
 
				                            device: torch.device) -> Tuple[nn.Module, float]:
			
@@ -522,8 +484,8 @@ class IncrementalTrainer:
 
				         optimizer = torch.optim.Adam(model.parameters(), lr=lr)
			
 
				         criterion = nn.MSELoss()
			
 
				 
			
 
				-        # AMP 混合精度（GPU/NPU 生效，减少约 40% 显存占用）
			
 
				-        use_amp = device.type in ('cuda', 'npu')
			
 
				+        # AMP 混合精度（GPU 生效，减少约 40% 显存占用）
			
 
				+        use_amp = device.type == 'cuda'
			
 
				         scaler = torch.amp.GradScaler(device.type) if use_amp else None
			
 
				 
			
 
				         # 早停配置
			
@@ -608,11 +570,9 @@ class IncrementalTrainer:
 
				                            f"最终轮数={actual_epochs}/{epochs} | Loss={avg_loss:.6f}")
			
 
				                 break
			
 
				 
			
 
				-        # 训练后清理加速器缓存
			
 
				+        # 训练后清理 GPU 缓存
			
 
				         if device.type == 'cuda':
			
 
				             torch.cuda.empty_cache()
			
 
				-        elif device.type == 'npu':
			
 
				-            torch.npu.empty_cache()
			
 
				 
			
 
				         if actual_epochs < epochs:
			
 
				             logger.info(f"  [{device_code}] 早停节省 {epochs - actual_epochs} 轮训练")
			
@@ -631,15 +591,11 @@ class IncrementalTrainer:
 
				         # 智能选择训练设备
			
 
				         device = self._select_training_device()
			
 
				 
			
 
				-        # 训练前清理加速器缓存，释放推理残留的显存碎片
			
 
				+        # 训练前清理 GPU 缓存，释放推理残留的显存碎片
			
 
				         if device.type == 'cuda':
			
 
				             torch.cuda.empty_cache()
			
 
				             import gc
			
 
				             gc.collect()
			
 
				-        elif device.type == 'npu':
			
 
				-            torch.npu.empty_cache()
			
 
				-            import gc
			
 
				-            gc.collect()
			
 
				 
			
 
				         model = ConvAutoencoder()
			
 
				 
			
@@ -684,26 +640,22 @@ class IncrementalTrainer:
 
				             logger.info(f"  数据量不足20，跳过验证集划分（共{len(dataset)}样本）")
			
 
				 
			
 
				         # 尝试在选定设备上训练
			
 
				-        if device.type in ('cuda', 'npu'):
			
 
				+        if device.type == 'cuda':
			
 
				             try:
			
 
				                 return self._run_training_loop(
			
 
				                     device_code, model, train_loader, val_loader,
			
 
				                     epochs, lr, device
			
 
				                 )
			
 
				             except (torch.cuda.OutOfMemoryError, RuntimeError) as e:
			
 
				-                # GPU/NPU OOM -> 清理显存后回退 CPU 重试
			
 
				+                # GPU OOM -> 清理显存后回退 CPU 重试
			
 
				                 if 'out of memory' not in str(e).lower() and isinstance(e, RuntimeError):
			
 
				                     raise  # 非 OOM 的 RuntimeError 不拦截
			
 
				                 logger.warning(
			
 
				-                    f"  [{device_code}] {device.type.upper()} OOM，"
			
 
				-                    f"清理显存后回退 CPU 训练"
			
 
				+                    f"  [{device_code}] CUDA OOM，清理显存后回退 CPU 训练"
			
 
				                 )
			
 
				                 import gc
			
 
				                 gc.collect()
			
 
				-                if device.type == 'cuda':
			
 
				-                    torch.cuda.empty_cache()
			
 
				-                elif device.type == 'npu':
			
 
				-                    torch.npu.empty_cache()
			
 
				+                torch.cuda.empty_cache()
			
 
				                 # 模型可能处于脏状态，重新初始化
			
 
				                 model = ConvAutoencoder()
			
 
				                 if not from_scratch:
			
@@ -812,6 +764,31 @@ class IncrementalTrainer:
 
				 
			
 
				         return threshold
			
 
				 
			
 
				+    def _eval_model_error(self, model: nn.Module, mel_dir: Path) -> float:
			
 
				+        """在验证数据上计算模型的平均重建误差，用于新旧模型对比"""
			
 
				+        device = next(model.parameters()).device
			
 
				+        model.eval()
			
 
				+
			
 
				+        dataset = MelNPYDataset(mel_dir)
			
 
				+        if len(dataset) == 0:
			
 
				+            return float('inf')
			
 
				+
			
 
				+        dataloader = torch.utils.data.DataLoader(
			
 
				+            dataset, batch_size=64, shuffle=False
			
 
				+        )
			
 
				+
			
 
				+        all_errors = []
			
 
				+        with torch.no_grad():
			
 
				+            for batch in dataloader:
			
 
				+                batch = batch.to(device)
			
 
				+                output = model(batch)
			
 
				+                output = align_to_target(output, batch)
			
 
				+                mse = torch.mean((output - batch) ** 2, dim=[1, 2, 3])
			
 
				+                all_errors.append(mse.cpu().numpy())
			
 
				+
			
 
				+        errors = np.concatenate(all_errors)
			
 
				+        return float(np.mean(errors))
			
 
				+
			
 
				     # ========================================
			
 
				     # 全量训练入口
			
 
				     # ========================================
			
@@ -954,7 +931,6 @@ class IncrementalTrainer:
 
				                   else self.config['auto_training']['incremental']['learning_rate'])
			
 
				 
			
 
				             from_scratch = self.cold_start_mode
			
 
				-            inherit_scale = not self.cold_start_mode
			
 
				 
			
 
				             model_cfg = self.config['auto_training']['model']
			
 
				             rollback_enabled = model_cfg.get('rollback_on_degradation', True)
			
@@ -973,7 +949,7 @@ class IncrementalTrainer:
 
				                 try:
			
 
				                     # ── 4a. 单设备特征提取 ──
			
 
				                     mel_dir, scale = self._extract_mel_for_device(
			
 
				-                        device_code, wav_files, inherit_scale=inherit_scale
			
 
				+                        device_code, wav_files
			
 
				                     )
			
 
				                     if mel_dir is None:
			
 
				                         logger.warning(f"{device_code}: 特征提取无有效数据，跳过")
			
@@ -989,52 +965,49 @@ class IncrementalTrainer:
 
				                         logger.error(f"{device_code}: 形状验证失败，跳过部署")
			
 
				                         continue
			
 
				 
			
 
				-                    # ── 4d. 损失退化检测（增量训练时生效） ──
			
 
				+                    # ── 4d. 新旧模型对比（增量训练时生效） ──
			
 
				+                    # 在相同验证数据上比较新旧模型的重建误差，新模型更差则跳过部署
			
 
				                     if rollback_enabled and not self.cold_start_mode:
			
 
				-                        old_threshold = self._get_old_threshold(device_code)
			
 
				-                        if old_threshold and old_threshold > 0:
			
 
				-                            if final_loss > old_threshold * rollback_factor:
			
 
				+                        old_model_path = self.model_root / device_code / "ae_model.pth"
			
 
				+                        if old_model_path.exists():
			
 
				+                            new_avg_err = self._eval_model_error(model, mel_dir)
			
 
				+                            old_model = ConvAutoencoder()
			
 
				+                            old_model.load_state_dict(
			
 
				+                                torch.load(old_model_path, map_location='cpu')
			
 
				+                            )
			
 
				+                            old_avg_err = self._eval_model_error(old_model, mel_dir)
			
 
				+
			
 
				+                            logger.info(
			
 
				+                                f"  {device_code}: 新旧模型对比 | "
			
 
				+                                f"旧模型误差={old_avg_err:.6f} 新模型误差={new_avg_err:.6f}"
			
 
				+                            )
			
 
				+
			
 
				+                            if new_avg_err > old_avg_err * rollback_factor:
			
 
				                                 logger.warning(
			
 
				-                                    f"{device_code}: 损失退化检测触发 | "
			
 
				-                                    f"训练损失={final_loss:.6f} > "
			
 
				-                                    f"旧阈值={old_threshold:.6f} × {rollback_factor} = "
			
 
				-                                    f"{old_threshold * rollback_factor:.6f}"
			
 
				+                                    f"{device_code}: 新模型退化 | "
			
 
				+                                    f"新={new_avg_err:.6f} > 旧={old_avg_err:.6f} × {rollback_factor}，跳过部署"
			
 
				                                 )
			
 
				                                 degraded_count += 1
			
 
				                                 continue
			
 
				 
			
 
				-                    # ── 4e. 阈值偏移检测 + 部署 ──
			
 
				+                    # ── 4e. 部署 ──
			
 
				                     if model_cfg.get('auto_deploy', True):
			
 
				-                        if rollback_enabled and not self.cold_start_mode:
			
 
				-                            old_threshold = self._get_old_threshold(device_code)
			
 
				-                            if old_threshold and old_threshold > 0:
			
 
				-                                new_threshold = self._compute_threshold(model, mel_dir)
			
 
				-                                drift_ratio = abs(new_threshold - old_threshold) / old_threshold
			
 
				-                                # 记录阈值变化趋势（用于长期漂移监控）
			
 
				-                                self._log_threshold_history(
			
 
				-                                    device_code, target_date,
			
 
				-                                    old_threshold, new_threshold, final_loss
			
 
				-                                )
			
 
				-                                if drift_ratio > 0.3:
			
 
				-                                    logger.warning(
			
 
				-                                        f"{device_code}: 阈值偏移告警 | "
			
 
				-                                        f"旧={old_threshold:.6f} → "
			
 
				-                                        f"新={new_threshold:.6f} | "
			
 
				-                                        f"偏移={drift_ratio:.1%}"
			
 
				-                                    )
			
 
				-                                    if drift_ratio > 1.0:
			
 
				-                                        logger.warning(
			
 
				-                                            f"{device_code}: 阈值偏移过大"
			
 
				-                                            f"(>{drift_ratio:.0%})，跳过部署"
			
 
				-                                        )
			
 
				-                                        degraded_count += 1
			
 
				-                                        continue
			
 
				                         self.deploy_device_model(device_code, model, scale, mel_dir)
			
 
				 
			
 
				                     success_count += 1
			
 
				                     logger.info(f"{device_code}: 训练+部署完成 | loss={final_loss:.6f}")
			
 
				 
			
 
				-                    # ── 4f. 即时通知该设备模型重载 ──
			
 
				+                    # ── 4f. 清理已参与训练的 verified_normal 目录 ──
			
 
				+                    # 核查确认的音频已被模型吸收，训练后清空释放磁盘空间
			
 
				+                    verified_dir = self.audio_root / device_code / "verified_normal"
			
 
				+                    if verified_dir.exists():
			
 
				+                        v_count = len(list(verified_dir.glob("*")))
			
 
				+                        if v_count > 0:
			
 
				+                            shutil.rmtree(verified_dir)
			
 
				+                            verified_dir.mkdir(parents=True, exist_ok=True)
			
 
				+                            logger.info(f"{device_code}: 已清理 verified_normal ({v_count} 个文件)")
			
 
				+
			
 
				+                    # ── 4g. 即时通知该设备模型重载 ──
			
 
				                     if on_device_trained:
			
 
				                         try:
			
 
				                             on_device_trained(device_code)
			
@@ -1045,7 +1018,7 @@ class IncrementalTrainer:
 
				                     logger.error(f"{device_code}: 训练失败 | {e}", exc_info=True)
			
 
				 
			
 
				                 finally:
			
 
				-                    # ── 4g. 清理该设备的临时 Mel 文件，释放磁盘空间 ──
			
 
				+                    # ── 4h. 清理该设备的临时 Mel 文件，释放磁盘空间 ──
			
 
				                     device_mel_dir = self.temp_mel_dir / device_code
			
 
				                     if device_mel_dir.exists():
			
 
				                         shutil.rmtree(device_mel_dir)
			
@@ -1064,9 +1037,6 @@ class IncrementalTrainer:
 
				                     f"{success_count} 个设备部署成功"
			
 
				                 )
			
 
				 
			
 
				-            # 6. 更新分类器基线
			
 
				-            self._update_classifier_baseline(device_files)
			
 
				-
			
 
				             logger.info("=" * 60)
			
 
				             logger.info(f"增量训练完成: {success_count}/{device_count} 个设备成功")
			
 
				             if degraded_count > 0:
			
@@ -1086,60 +1056,6 @@ class IncrementalTrainer:
 
				     # 辅助方法
			
 
				     # ========================================
			
 
				 
			
 
				-    def _get_old_threshold(self, device_code: str) -> float:
			
 
				-        """
			
 
				-        读取设备当前已部署的阈值（训练前的旧阈值）
			
 
				-
			
 
				-        用于损失退化校验：新模型的训练损失不应远超旧阈值。
			
 
				-        阈值文件路径: models/{device_code}/thresholds/threshold_{device_code}.npy
			
 
				-
			
 
				-        返回:
			
 
				-            阈值浮点数，文件不存在时返回 0.0
			
 
				-        """
			
 
				-        threshold_file = self.model_root / device_code / "thresholds" / f"threshold_{device_code}.npy"
			
 
				-        if not threshold_file.exists():
			
 
				-            return 0.0
			
 
				-        try:
			
 
				-            data = np.load(threshold_file)
			
 
				-            return float(data.flat[0])
			
 
				-        except Exception as e:
			
 
				-            logger.warning(f"读取旧阈值失败: {device_code} | {e}")
			
 
				-            return 0.0
			
 
				-
			
 
				-    def _log_threshold_history(self, device_code: str, date_str: str,
			
 
				-                                old_threshold: float, new_threshold: float,
			
 
				-                                train_loss: float):
			
 
				-        """
			
 
				-        记录阈值变化历史到 CSV，用于监控模型长期漂移趋势
			
 
				-
			
 
				-        文件路径: logs/threshold_history.csv
			
 
				-        格式: date,device_code,old_threshold,new_threshold,drift_ratio,train_loss
			
 
				-        """
			
 
				-        import csv
			
 
				-
			
 
				-        log_dir = self.deploy_root / "logs"
			
 
				-        log_dir.mkdir(parents=True, exist_ok=True)
			
 
				-        csv_path = log_dir / "threshold_history.csv"
			
 
				-
			
 
				-        drift_ratio = (new_threshold - old_threshold) / old_threshold if old_threshold > 0 else 0.0
			
 
				-        write_header = not csv_path.exists()
			
 
				-
			
 
				-        try:
			
 
				-            with open(csv_path, 'a', newline='', encoding='utf-8') as f:
			
 
				-                writer = csv.writer(f)
			
 
				-                if write_header:
			
 
				-                    writer.writerow([
			
 
				-                        'date', 'device_code', 'old_threshold', 'new_threshold',
			
 
				-                        'drift_ratio', 'train_loss'
			
 
				-                    ])
			
 
				-                writer.writerow([
			
 
				-                    date_str, device_code,
			
 
				-                    f"{old_threshold:.8f}", f"{new_threshold:.8f}",
			
 
				-                    f"{drift_ratio:.4f}", f"{train_loss:.8f}"
			
 
				-                ])
			
 
				-        except Exception as e:
			
 
				-            logger.warning(f"写入阈值历史失败: {e}")
			
 
				-
			
 
				     def _validate_model(self, model: nn.Module) -> bool:
			
 
				         # 验证模型输出形状是否合理
			
 
				         if not self.config['auto_training']['validation']['enabled']:
			
@@ -1235,57 +1151,6 @@ class IncrementalTrainer:
 
				         logger.info(f"恢复完成: {restored} 个设备")
			
 
				         return restored > 0
			
 
				 
			
 
				-    def _update_classifier_baseline(self, device_files: Dict[str, List[Path]]):
			
 
				-        # 从训练数据计算并更新分类器基线
			
 
				-        logger.info("更新分类器基线")
			
 
				-
			
 
				-        try:
			
 
				-            import librosa
			
 
				-            from core.anomaly_classifier import AnomalyClassifier
			
 
				-
			
 
				-            classifier = AnomalyClassifier()
			
 
				-
			
 
				-            all_files = []
			
 
				-            for files in device_files.values():
			
 
				-                all_files.extend(files)
			
 
				-
			
 
				-            if not all_files:
			
 
				-                logger.warning("无音频文件，跳过基线更新")
			
 
				-                return
			
 
				-
			
 
				-            sample_files = random.sample(all_files, min(50, len(all_files)))
			
 
				-
			
 
				-            all_features = []
			
 
				-            for wav_file in sample_files:
			
 
				-                try:
			
 
				-                    y, _ = librosa.load(str(wav_file), sr=CFG.SR, mono=True)
			
 
				-                    if len(y) < CFG.SR:
			
 
				-                        continue
			
 
				-                    features = classifier.extract_features(y, sr=CFG.SR)
			
 
				-                    if features:
			
 
				-                        all_features.append(features)
			
 
				-                except Exception:
			
 
				-                    continue
			
 
				-
			
 
				-            if not all_features:
			
 
				-                logger.warning("无法提取特征，跳过基线更新")
			
 
				-                return
			
 
				-
			
 
				-            baseline = {}
			
 
				-            keys = all_features[0].keys()
			
 
				-            for key in keys:
			
 
				-                if key == 'has_periodic':
			
 
				-                    values = [f[key] for f in all_features]
			
 
				-                    baseline[key] = sum(values) > len(values) / 2
			
 
				-                else:
			
 
				-                    values = [f[key] for f in all_features]
			
 
				-                    baseline[key] = float(np.mean(values))
			
 
				-
			
 
				-            classifier.save_baseline(baseline)
			
 
				-            logger.info(f"  基线已更新 (样本数: {len(all_features)})")
			
 
				-
			
 
				-        except Exception as e:
			
 
				-            logger.warning(f"更新基线失败: {e}")
			
 
				 
			
 
				 
			
 
				 def main():
			
--- a/config/auto_training.yaml
+++ b/config/auto_training.yaml
@@ -25,8 +25,8 @@ auto_training:
 
				     learning_rate: 0.0001       # 学习率
			
 
				     batch_size: 32              # 批大小（降低显存占用）
			
 
				     early_stop_patience: 5      # 早停耐心值：连续N轮loss无改善则停止
			
 
				-    training_device: cpu           # 训练设备选择：auto(自动检测显存)/cpu/cuda/npu
			
 
				-                                    # 低配服务器推荐 cpu，模型小(~214KB) CPU训练30epoch耗时可接受
			
 
				+    training_device: cpu           # 训练设备选择：auto(自动检测GPU显存)/cpu/cuda
			
 
				+                                    # 低配服务器推荐 cpu，模型小(~192KB) CPU训练30epoch耗时可接受
			
 
				     min_gpu_mem_mb: 512          # auto模式下，GPU空闲显存低于此值(MB)时回退CPU
			
 
				     
			
 
				   # 模型管理
			
--- a/config/config_manager.py
+++ b/config/config_manager.py
@@ -49,7 +49,7 @@ class ConfigManager:
 
				         config['plants'] = self._build_plants_list()
			
 
				 
			
 
				         # 2. 组装系统级配置（audio, prediction, push_notification, scada_api, human_detection）
			
 
				-        for section in ['audio', 'prediction', 'push_notification', 'scada_api', 'human_detection']:
			
 
				+        for section in ['audio', 'prediction', 'push_notification', 'scada_api', 'human_detection', 'auto_training']:
			
 
				             config[section] = self._get_section_config(section)
			
 
				 
			
 
				         return config
			
--- a/config/pickup_config.db
+++ b/config/pickup_config.db
--- a/config/pickup_config.db-shm
+++ b/config/pickup_config.db-shm
--- a/config/pickup_config.db-wal
+++ b/config/pickup_config.db-wal
--- a/config/yaml_backup/db_output/pickup_config_anzhen.db
+++ b/config/yaml_backup/db_output/pickup_config_anzhen.db
--- a/config/yaml_backup/db_output/pickup_config_jianding.db
+++ b/config/yaml_backup/db_output/pickup_config_jianding.db
--- a/config/yaml_backup/db_output/pickup_config_longting.db
+++ b/config/yaml_backup/db_output/pickup_config_longting.db
--- a/config/yaml_backup/db_output/pickup_config_longting.db-shm
+++ b/config/yaml_backup/db_output/pickup_config_longting.db-shm
--- a/config/yaml_backup/db_output/pickup_config_longting.db-wal
+++ b/config/yaml_backup/db_output/pickup_config_longting.db-wal
--- a/config/yaml_backup/db_output/pickup_config_xishan.db
+++ b/config/yaml_backup/db_output/pickup_config_xishan.db
--- a/config/yaml_backup/db_output/pickup_config_xishan.db-shm
+++ b/config/yaml_backup/db_output/pickup_config_xishan.db-shm
--- a/config/yaml_backup/db_output/pickup_config_xishan.db-wal
+++ b/config/yaml_backup/db_output/pickup_config_xishan.db-wal
--- a/config/yaml_backup/db_output/pickup_config_yancheng.db
+++ b/config/yaml_backup/db_output/pickup_config_yancheng.db
--- a/config/yaml_backup/rtsp_config_anzhen.yaml
+++ b/config/yaml_backup/rtsp_config_anzhen.yaml
@@ -83,10 +83,21 @@ push_notification:
 
				     window_seconds: 300
			
 
				     min_devices: 2
			
 
				 
			
 
				+
			
 
				+  # ----------------------------------------------------------
			
 
				+  # 项目模式调度（参观/检修/调试模式下自动暂停异响检测）
			
 
				+  # ----------------------------------------------------------
			
 
				+  project_mode:
			
 
				+    base_url: http://120.55.44.4:8900    # 平台 API 根地址
			
 
				+    poll_interval: 60                     # 查询间隔（秒）
			
 
				+    request_timeout: 10                   # 请求超时（秒）
			
 
				+
			
 
				+  # ----------
			
 
				+
			
 
				 scada_api:
			
 
				   enabled: true
			
 
				   base_url: http://120.55.44.4:8900/api/v1/jinke-cloud/db/device/history-data
			
 
				-  realtime_url: http://47.96.12.136:8788/api/v1/jinke-cloud/device/current-data
			
 
				+  realtime_url: http://120.55.44.4:8900/api/v1/jinke-cloud/device/current-data
			
 
				   jwt_token: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJJRCI6NywiVXNlcm5hbWUiOiJhZG1pbiIsIkRlcCI6IjEzNSIsImV4cCI6MTc3NjExOTExNCwiaXNzIjoiZ2luLWJsb2cifQ.0HTtzHZjyd2mHo8VCy8icYROxmntRMuQhyoZsAYRL_M
			
 
				   timeout: 10
			
 
				 
			
--- a/config/yaml_backup/rtsp_config_jianding.yaml
+++ b/config/yaml_backup/rtsp_config_jianding.yaml
@@ -82,7 +82,7 @@ push_notification:
 
				 scada_api:
			
 
				   enabled: true
			
 
				   base_url: http://120.55.44.4:8900/api/v1/jinke-cloud/db/device/history-data
			
 
				-  realtime_url: http://47.96.12.136:8788/api/v1/jinke-cloud/device/current-data
			
 
				+  realtime_url: http://120.55.44.4:8900/api/v1/jinke-cloud/device/current-data
			
 
				   jwt_token: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJJRCI6NywiVXNlcm5hbWUiOiJhZG1pbiIsIkRlcCI6IjEzNSIsImV4cCI6MTc3NjExOTExNCwiaXNzIjoiZ2luLWJsb2cifQ.0HTtzHZjyd2mHo8VCy8icYROxmntRMuQhyoZsAYRL_M
			
 
				   timeout: 10
			
 
				 
			
--- a/config/yaml_backup/rtsp_config_longting.yaml
+++ b/config/yaml_backup/rtsp_config_longting.yaml
@@ -105,13 +105,21 @@ push_notification:
 
				     window_seconds: 300
			
 
				     min_devices: 2
			
 
				 
			
 
				+# ----------------------------------------------------------
			
 
				+# 项目模式调度（参观/检修/调试模式下自动暂停异响检测）
			
 
				+# ----------------------------------------------------------
			
 
				+project_mode:
			
 
				+  base_url: http://120.55.44.4:8900    # 平台 API 根地址
			
 
				+  poll_interval: 60                     # 查询间隔（秒）
			
 
				+  request_timeout: 10                   # 请求超时（秒）
			
 
				+
			
 
				 # ----------------------------------------------------------
			
 
				 # SCADA/PLC 接口
			
 
				 # ----------------------------------------------------------
			
 
				 scada_api:
			
 
				   enabled: true
			
 
				   base_url: http://120.55.44.4:8900/api/v1/jinke-cloud/db/device/history-data
			
 
				-  realtime_url: http://47.96.12.136:8788/api/v1/jinke-cloud/device/current-data
			
 
				+  realtime_url: http://120.55.44.4:8900/api/v1/jinke-cloud/device/current-data
			
 
				   jwt_token: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJJRCI6NywiVXNlcm5hbWUiOiJhZG1pbiIsIkRlcCI6IjEzNSIsImV4cCI6MTc3NjExOTExNCwiaXNzIjoiZ2luLWJsb2cifQ.0HTtzHZjyd2mHo8VCy8icYROxmntRMuQhyoZsAYRL_M
			
 
				   timeout: 10
			
 
				 
			
@@ -122,3 +130,38 @@ human_detection:
 
				   enabled: false
			
 
				   db_path: /data/human_detector/detection_status.db
			
 
				   cooldown_minutes: 5
			
 
				+
			
 
				+# ----------------------------------------------------------
			
 
				+# 自动增量训练
			
 
				+# ----------------------------------------------------------
			
 
				+auto_training:
			
 
				+  enabled: True                      # 总开关（暂时关闭自动增训）
			
 
				+  data:
			
 
				+    keep_normal_days: 7               # 正常音频保留天数
			
 
				+    keep_anomaly_days: -1             # 异常音频保留天数（-1=永久）
			
 
				+    cleanup_time: "00:00"             # 每日清理时间（0点）
			
 
				+  incremental:
			
 
				+    enabled: true
			
 
				+    schedule_time: "02:00"            # 每日训练时间
			
 
				+    use_days_ago: 1                   # 使用N天前的数据（1=昨天）
			
 
				+    sample_hours: 1                   # 随机采样时长（小时），0=使用全部
			
 
				+    min_samples: 50                   # 最少样本数，不足则跳过
			
 
				+    epochs: 30                        # 训练轮数（配合早停，实际通常更少）
			
 
				+    learning_rate: 0.0001             # 学习率
			
 
				+    batch_size: 32                    # 批大小（降低显存占用）
			
 
				+    early_stop_patience: 5            # 早停耐心值：连续N轮loss无改善则停止
			
 
				+    training_device: auto
			
 
				+    min_gpu_mem_mb: 512               # auto模式下GPU空闲显存低于此值(MB)时回退CPU
			
 
				+  model:
			
 
				+    backup_before_train: true         # 训练前备份
			
 
				+    keep_backups: 7                   # 保留备份数量
			
 
				+    auto_deploy: true                 # 自动部署新模型
			
 
				+    update_thresholds: true           # 训练后更新阈值npy
			
 
				+    rollback_on_degradation: true     # 训练后损失异常时自动回滚到备份
			
 
				+    rollback_factor: 2.0              # 新模型损失 > 旧阈值 * 此因子则判定为退化
			
 
				+  validation:
			
 
				+    enabled: true
			
 
				+  cold_start:
			
 
				+    enabled: true
			
 
				+    wait_hours: 2                     # 等待收集数据时长
			
 
				+    min_samples: 100                  # 最少样本数
			
--- a/config/yaml_backup/rtsp_config_xishan.yaml
+++ b/config/yaml_backup/rtsp_config_xishan.yaml
@@ -121,13 +121,21 @@ push_notification:
 
				     window_seconds: 300              # 聚合窗口（秒）
			
 
				     min_devices: 2                   # 至少 2 台设备同时异常才触发聚合告警
			
 
				 
			
 
				+# ----------------------------------------------------------
			
 
				+# 项目模式调度（参观/检修/调试模式下自动暂停异响检测）
			
 
				+# ----------------------------------------------------------
			
 
				+project_mode:
			
 
				+  base_url: http://120.55.44.4:8900    # 平台 API 根地址
			
 
				+  poll_interval: 60                     # 查询间隔（秒）
			
 
				+  request_timeout: 10                   # 请求超时（秒）
			
 
				+
			
 
				 # ----------------------------------------------------------
			
 
				 # SCADA/PLC 接口（泵状态查询）
			
 
				 # ----------------------------------------------------------
			
 
				 scada_api:
			
 
				   enabled: true                      # 是否启用 PLC 查询（false 时用音频能量判断启停）
			
 
				   base_url: http://120.55.44.4:8900/api/v1/jinke-cloud/db/device/history-data    # 历史数据接口
			
 
				-  realtime_url: http://47.96.12.136:8788/api/v1/jinke-cloud/device/current-data  # 实时数据接口
			
 
				+  realtime_url: http://120.55.44.4:8900/api/v1/jinke-cloud/device/current-data  # 实时数据接口
			
 
				   jwt_token: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJJRCI6NywiVXNlcm5hbWUiOiJhZG1pbiIsIkRlcCI6IjEzNSIsImV4cCI6MTc3NjExOTExNCwiaXNzIjoiZ2luLWJsb2cifQ.0HTtzHZjyd2mHo8VCy8icYROxmntRMuQhyoZsAYRL_M
			
 
				   timeout: 10                        # 查询超时（秒）
			
 
				 
			
@@ -138,3 +146,40 @@ human_detection:
 
				   enabled: false                     # 是否启用（需要独立的人体检测服务）
			
 
				   db_path: /data/human_detector/detection_status.db  # 人体检测状态 DB 路径
			
 
				   cooldown_minutes: 5                # 检测到有人后抑制告警的时间（分钟）
			
 
				+
			
 
				+
			
 
				+# ----------------------------------------------------------
			
 
				+# 自动增量训练
			
 
				+# ----------------------------------------------------------
			
 
				+auto_training:
			
 
				+  enabled: True                      # 总开关（暂时关闭自动增训）
			
 
				+  data:
			
 
				+    keep_normal_days: 7               # 正常音频保留天数
			
 
				+    keep_anomaly_days: -1             # 异常音频保留天数（-1=永久）
			
 
				+    cleanup_time: "00:00"             # 每日清理时间（0点）
			
 
				+  incremental:
			
 
				+    enabled: true
			
 
				+    schedule_time: "18:00"            # 每日训练时间
			
 
				+    use_days_ago: 1                   # 使用N天前的数据（1=昨天）
			
 
				+    sample_hours: 1                   # 随机采样时长（小时），0=使用全部
			
 
				+    min_samples: 50                   # 最少样本数，不足则跳过
			
 
				+    epochs: 30                        # 训练轮数（配合早停，实际通常更少）
			
 
				+    learning_rate: 0.0001             # 学习率
			
 
				+    batch_size: 32                    # 批大小（降低显存占用）
			
 
				+    early_stop_patience: 5            # 早停耐心值：连续N轮loss无改善则停止
			
 
				+    training_device: auto
			
 
				+    min_gpu_mem_mb: 512               # auto模式下GPU空闲显存低于此值(MB)时回退CPU
			
 
				+  model:
			
 
				+    backup_before_train: true         # 训练前备份
			
 
				+    keep_backups: 7                   # 保留备份数量
			
 
				+    auto_deploy: true                 # 自动部署新模型
			
 
				+    update_thresholds: true           # 训练后更新阈值npy
			
 
				+    rollback_on_degradation: true     # 训练后损失异常时自动回滚到备份
			
 
				+    rollback_factor: 2.0              # 新模型损失 > 旧阈值 * 此因子则判定为退化
			
 
				+  validation:
			
 
				+    enabled: true
			
 
				+  cold_start:
			
 
				+    enabled: true
			
 
				+    wait_hours: 2                     # 等待收集数据时长
			
 
				+    min_samples: 100                  # 最少样本数
			
 
				+
			
--- a/config/yaml_backup/rtsp_config_yancheng.yaml
+++ b/config/yaml_backup/rtsp_config_yancheng.yaml
@@ -86,7 +86,7 @@ push_notification:
 
				 scada_api:
			
 
				   enabled: true
			
 
				   base_url: http://120.55.44.4:8900/api/v1/jinke-cloud/db/device/history-data
			
 
				-  realtime_url: http://47.96.12.136:8788/api/v1/jinke-cloud/device/current-data
			
 
				+  realtime_url: http://120.55.44.4:8900/api/v1/jinke-cloud/device/current-data
			
 
				   jwt_token: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJJRCI6NywiVXNlcm5hbWUiOiJhZG1pbiIsIkRlcCI6IjEzNSIsImV4cCI6MTc3NjExOTExNCwiaXNzIjoiZ2luLWJsb2cifQ.0HTtzHZjyd2mHo8VCy8icYROxmntRMuQhyoZsAYRL_M
			
 
				   timeout: 10
			
 
				 
			
--- a/core/pump_state_monitor.py
+++ b/core/pump_state_monitor.py
@@ -11,7 +11,7 @@ pump_state_monitor.py - 泵状态监控模块
 
				 from pump_state_monitor import PumpStateMonitor
			
 
				 
			
 
				 monitor = PumpStateMonitor(
			
 
				-    scada_url="http://47.96.12.136:8788/api/v1/jinke-cloud/device/current-data",
			
 
				+    scada_url="http://120.55.44.4:8900/api/v1/jinke-cloud/device/current-data",
			
 
				     scada_jwt="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJJRCI6NywiVXNlcm5hbWUiOiJhZG1pbiIsIkRlcCI6IjEzNSIsImV4cCI6MTc3NjExOTExNCwiaXNzIjoiZ2luLWJsb2cifQ.0HTtzHZjyd2mHo8VCy8icYROxmntRMuQhyoZsAYRL_M",
			
 
				     project_id=92,
			
 
				     transition_window_minutes=15
			
--- a/models/LT-2/ae_model.pth
+++ b/models/LT-2/ae_model.pth
--- a/models/LT-2/global_scale.npy
+++ b/models/LT-2/global_scale.npy
--- a/models/LT-2/thresholds/threshold_default.npy
+++ b/models/LT-2/thresholds/threshold_default.npy
--- a/models/LT-5/ae_model.pth
+++ b/models/LT-5/ae_model.pth
--- a/models/LT-5/global_scale.npy
+++ b/models/LT-5/global_scale.npy
--- a/models/LT-5/thresholds/threshold_default.npy
+++ b/models/LT-5/thresholds/threshold_default.npy
--- a/predictor/multi_model_predictor.py
+++ b/predictor/multi_model_predictor.py
@@ -24,6 +24,10 @@ from .config import CFG, DeployConfig
 
				 from .model_def import ConvAutoencoder
			
 
				 from .utils import get_device
			
 
				 
			
 
				+# --- BM1684X NPU 推理适配（预埋，暂未启用） ---
			
 
				+# 取消以下注释以启用 BM1684X NPU 推理（需先用 convert_to_bmodel.py 生成 .bmodel）
			
 
				+from .bm1684x_engine import BM1684XEngine, is_bm1684x_available
			
 
				+
			
 
				 logger = logging.getLogger('MultiModelPredictor')
			
 
				 
			
 
				 
			
@@ -52,11 +56,18 @@ class DevicePredictor:
 
				         # 阈值（标量）
			
 
				         self.threshold = self._load_threshold()
			
 
				 
			
 
				+        # --- BM1684X NPU 推理适配（预埋，暂未启用） ---
			
 
				+        # 如果 .bmodel 文件存在且 BM1684X 硬件可用，优先使用 NPU 推理
			
 
				+        # 启用后 self.bm_engine 不为 None，推理时调用 bm_engine.predict() 而非 self.model()
			
 
				+        self.bm_engine = None
			
 
				+        self.bm_engine = self._load_bmodel()
			
 
				+
			
 
				         # 记录文件 mtime（用于热加载检测）
			
 
				         self._model_mtime = self._get_mtime(self.model_path)
			
 
				         self._scale_mtime = self._get_mtime(self.scale_path)
			
 
				 
			
 
				-        logger.info(f"设备 {device_code} 模型加载完成 | 目录: {model_subdir} | "
			
 
				+        engine_type = "BM1684X BModel" if self.bm_engine else "PyTorch (.pth)"
			
 
				+        logger.info(f"设备 {device_code} 模型加载完成 | 引擎: {engine_type} | 目录: {model_subdir} | "
			
 
				                     f"阈值: {self.threshold:.6f}")
			
 
				 
			
 
				     def _get_mtime(self, path: Path) -> float:
			
@@ -65,7 +76,7 @@ class DevicePredictor:
 
				             return os.path.getmtime(path)
			
 
				         except OSError:
			
 
				             return 0.0
			
 
				-
			
 
				+    
			
 
				     def has_files_changed(self) -> bool:
			
 
				         # 检查模型或标准化参数文件是否有更新
			
 
				         new_model_mtime = self._get_mtime(self.model_path)
			
@@ -83,6 +94,24 @@ class DevicePredictor:
 
				         model.eval()
			
 
				         return model
			
 
				 
			
 
				+    # --- BM1684X NPU 推理适配（预埋，暂未启用） ---
			
 
				+    # 取消注释以启用 BModel 加载逻辑
			
 
				+    def _load_bmodel(self):
			
 
				+        """尝试加载 BModel，成功返回 BM1684XEngine 实例，否则返回 None"""
			
 
				+        bmodel_path = self.model_dir / "ae_model.bmodel"
			
 
				+        if not bmodel_path.exists():
			
 
				+            return None
			
 
				+        if not is_bm1684x_available():
			
 
				+            logger.info(f"设备 {self.device_code}: 存在 .bmodel 但 BM1684X 不可用，回退 PyTorch")
			
 
				+            return None
			
 
				+        try:
			
 
				+            engine = BM1684XEngine(str(bmodel_path))
			
 
				+            logger.info(f"设备 {self.device_code}: 已加载 BM1684X BModel 推理引擎")
			
 
				+            return engine
			
 
				+        except Exception as e:
			
 
				+            logger.warning(f"设备 {self.device_code}: BModel 加载失败，回退 PyTorch | {e}")
			
 
				+            return None
			
 
				+
			
 
				     def _load_scale(self) -> Tuple[float, float]:
			
 
				         # 加载 Min-Max 标准化参数 [min, max]
			
 
				         if not self.scale_path.exists():
			
--- a/predictor/utils.py
+++ b/predictor/utils.py
@@ -32,25 +32,36 @@ def ensure_dirs():
 
				 
			
 
				 def get_device():
			
 
				     """
			
 
				-    获取可用的计算设备
			
 
				+    获取可用的 PyTorch 计算设备
			
 
				 
			
 
				-    优先级: CUDA > NPU (华为昇腾) > CPU
			
 
				+    优先级: CUDA > CPU
			
 
				+
			
 
				+    注意: BM1684X (算能) 不走此函数，它不是 PyTorch 后端。
			
 
				+    BM1684X 推理由 bm1684x_engine.py 中的 sophon.sail 独立处理。
			
 
				 
			
 
				     返回:
			
 
				-        str: "cuda", "npu" 或 "cpu"
			
 
				+        str: "cuda" 或 "cpu"
			
 
				     """
			
 
				     if torch.cuda.is_available():
			
 
				         return "cuda"
			
 
				-    # 华为昇腾 NPU
			
 
				-    try:
			
 
				-        import torch_npu  # noqa: F401
			
 
				-        if torch.npu.is_available():
			
 
				-            return "npu"
			
 
				-    except ImportError:
			
 
				-        pass
			
 
				     return "cpu"
			
 
				 
			
 
				 
			
 
				+# --- BM1684X NPU 推理适配（预埋，暂未启用） ---
			
 
				+# BM1684X 不是 PyTorch 后端，不能通过 get_device() 返回。
			
 
				+# 以下函数用于检测 BM1684X 硬件是否可用，供 multi_model_predictor 判断。
			
 
				+# 启用时从 bm1684x_engine.py import is_bm1684x_available 即可。
			
 
				+#
			
 
				+# def is_bm1684x_available() -> bool:
			
 
				+#     """检测 BM1684X 硬件是否可用（SDK + 设备节点）"""
			
 
				+#     try:
			
 
				+#         import sophon.sail  # noqa: F401
			
 
				+#     except ImportError:
			
 
				+#         return False
			
 
				+#     import glob
			
 
				+#     return len(glob.glob("/dev/bm-sophon*")) > 0
			
 
				+
			
 
				+
			
 
				 def align_to_target(pred, target):
			
 
				     """
			
 
				     将预测tensor对齐到目标tensor的尺寸
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,6 +8,7 @@
 
				 
			
 
				 # 深度学习
			
 
				 torch>=2.0.0              # 模型推理
			
 
				+onnxruntime>=1.14.0       # ONNX 模型验证 / BModel 转换前精度检查
			
 
				 
			
 
				 # 数值计算
			
 
				 numpy>=1.23.0,<2.0        # PyTorch 2.x 不兼容 NumPy 2.x
			
--- a/run_with_auto_training.py
+++ b/run_with_auto_training.py
@@ -27,12 +27,13 @@ sys.path.insert(0, str(Path(__file__).parent))
 
				 try:
			
 
				     from apscheduler.schedulers.background import BackgroundScheduler
			
 
				     from apscheduler.triggers.cron import CronTrigger
			
 
				-    import yaml
			
 
				 except ImportError:
			
 
				     print("错误：缺少依赖库")
			
 
				-    print("请运行：pip install apscheduler pyyaml")
			
 
				+    print("请运行：pip install apscheduler")
			
 
				     sys.exit(1)
			
 
				 
			
 
				+from config.config_manager import ConfigManager
			
 
				+
			
 
				 
			
 
				 def setup_logging():
			
 
				     # 配置日志系统（按文件大小轮转），与 run_pickup_monitor.py 共用同一日志文件
			
@@ -90,6 +91,7 @@ class ColdStartManager:
 
				         self.model_root = deploy_root / "models"
			
 
				         # 音频数据根目录
			
 
				         self.audio_root = deploy_root / "data" / "audio"
			
 
				+        # self.audio_root = "/Volumes/mo/水厂正常音频/龙亭"
			
 
				 
			
 
				         # 冷启动配置
			
 
				         cold_start_cfg = config.get('auto_training', {}).get('cold_start', {})
			
@@ -175,6 +177,11 @@ class ColdStartManager:
 
				             total_samples = 0
			
 
				             for sub_dir in device_dir.iterdir():
			
 
				                 if sub_dir.is_dir():
			
 
				+                    # 新结构：{date}/normal/ 子目录
			
 
				+                    normal_dir = sub_dir / "normal"
			
 
				+                    if normal_dir.exists():
			
 
				+                        total_samples += len(list(normal_dir.glob("*.wav")))
			
 
				+                    # 兼容旧结构 + current 目录：直接存放的 wav
			
 
				                     total_samples += len(list(sub_dir.glob("*.wav")))
			
 
				 
			
 
				             if total_samples < self.min_samples:
			
@@ -203,8 +210,8 @@ class ColdStartManager:
 
				         try:
			
 
				             from auto_training.incremental_trainer import IncrementalTrainer
			
 
				 
			
 
				-            config_file = self.deploy_root / "config" / "auto_training.yaml"
			
 
				-            trainer = IncrementalTrainer(config_file)
			
 
				+            # 从当前内存中的配置 dict 初始化训练器（配置来源为数据库）
			
 
				+            trainer = IncrementalTrainer(config=self.config)
			
 
				 
			
 
				             # 冷启动模式：收集所有目录的数据，用全量训练
			
 
				             trainer.cold_start_mode = True
			
@@ -243,10 +250,27 @@ class IntegratedSystem:
 
				 
			
 
				     def __init__(self):
			
 
				         self.deploy_root = Path(__file__).parent
			
 
				-        self.auto_config_file = self.deploy_root / "config" / "auto_training.yaml"
			
 
				 
			
 
				-        # 加载自动训练配置
			
 
				-        self.auto_config = self._load_yaml(self.auto_config_file)
			
 
				+        # =========================================================================
			
 
				+        # 配置加载来源：自动检测
			
 
				+        #   优先使用 YAML（config/rtsp_config.yaml 存在时）
			
 
				+        #   否则使用 SQLite 数据库
			
 
				+        # =========================================================================
			
 
				+        yaml_path = self.deploy_root / "config" / "rtsp_config.yaml"
			
 
				+
			
 
				+        if yaml_path.exists():
			
 
				+            import yaml
			
 
				+            with open(yaml_path, 'r', encoding='utf-8') as f:
			
 
				+                full_config = yaml.safe_load(f)
			
 
				+            self.full_yaml_config = full_config
			
 
				+            self.auto_config = {'auto_training': full_config.get('auto_training', {})}
			
 
				+            logger.info(f"已从 YAML ({yaml_path.name}) 加载配置")
			
 
				+        else:
			
 
				+            self.full_yaml_config = None
			
 
				+            mgr = ConfigManager()
			
 
				+            self.auto_config = {'auto_training': mgr.get_system_config('auto_training')}
			
 
				+            mgr.close()
			
 
				+            logger.info(f"已从数据库加载 auto_training 配置 ({len(self.auto_config.get('auto_training', {}))} 项)")
			
 
				 
			
 
				         # 运行时对象
			
 
				         self.scheduler = None
			
@@ -254,15 +278,6 @@ class IntegratedSystem:
 
				         self.cold_start_manager = None
			
 
				         self.cold_start_thread = None
			
 
				 
			
 
				-    def _load_yaml(self, config_file: Path) -> dict:
			
 
				-        # 加载 YAML 配置文件，不存在时返回空字典
			
 
				-        if not config_file.exists():
			
 
				-            logger.warning(f"配置文件不存在: {config_file}")
			
 
				-            return {}
			
 
				-
			
 
				-        with open(config_file, 'r', encoding='utf-8') as f:
			
 
				-            return yaml.safe_load(f) or {}
			
 
				-
			
 
				     def _check_and_handle_cold_start(self) -> bool:
			
 
				         """
			
 
				         检查并处理冷启动
			
@@ -406,7 +421,8 @@ class IntegratedSystem:
 
				             logger.info("定时任务触发：增量训练开始")
			
 
				 
			
 
				             from auto_training.incremental_trainer import IncrementalTrainer
			
 
				-            trainer = IncrementalTrainer(self.auto_config_file)
			
 
				+            # 传 config dict 而非 YAML 路径，配置来源为数据库
			
 
				+            trainer = IncrementalTrainer(config=self.auto_config)
			
 
				             success = trainer.run_daily_training(
			
 
				                 on_device_trained=self._reload_single_device
			
 
				             )
			
@@ -425,7 +441,8 @@ class IntegratedSystem:
 
				             logger.info("定时任务触发：数据清理开始")
			
 
				             from auto_training.data_cleanup import DataCleaner
			
 
				 
			
 
				-            cleaner = DataCleaner(self.auto_config_file)
			
 
				+            # 传 config dict 而非 YAML 路径，配置来源为数据库
			
 
				+            cleaner = DataCleaner(config=self.auto_config)
			
 
				             cleaner.run_cleanup()
			
 
				         except Exception as e:
			
 
				             logger.error(f"数据清理异常: {e}", exc_info=True)
			
@@ -439,7 +456,7 @@ class IntegratedSystem:
 
				         # 1. 创建 PickupMonitoringSystem（会初始化 multi_predictor + 注册设备）
			
 
				         logger.info("初始化监控系统...")
			
 
				         from run_pickup_monitor import PickupMonitoringSystem
			
 
				-        self.pickup_system = PickupMonitoringSystem()
			
 
				+        self.pickup_system = PickupMonitoringSystem(yaml_config=self.full_yaml_config)
			
 
				 
			
 
				         # 2. 检查冷启动（需要在 pickup_system 初始化之后，因为需要设备注册信息）
			
 
				         is_cold_start = self._check_and_handle_cold_start()
			
@@ -447,7 +464,7 @@ class IntegratedSystem:
 
				         # 3. 设置定时任务
			
 
				         self._setup_auto_training_tasks()
			
 
				 
			
 
				-        # 4. 覆盖信号处理（确保优雅关闭 scheduler）
			
 
				+        # 4. 覆盖信号处理（确保关闭 scheduler）
			
 
				         signal.signal(signal.SIGINT, self._signal_handler)
			
 
				         signal.signal(signal.SIGTERM, self._signal_handler)
			
 
				 
			
--- a/start.sh
+++ b/start.sh
@@ -21,6 +21,24 @@ cd "$(dirname "$0")"
 
				 
			
 
				 # PID文件路径
			
 
				 PID_FILE="logs/pid.txt"
			
 
				+STARTUP_TIMEOUT=5
			
 
				+HEALTH_CHECK_INTERVAL=1
			
 
				+
			
 
				+# ========================================
			
 
				+# 函数：按PID精确清理PID文件
			
 
				+# ========================================
			
 
				+cleanup_pid_file_if_matches() {
			
 
				+    local expected_pid="$1"
			
 
				+    if [ ! -f "$PID_FILE" ]; then
			
 
				+        return 0
			
 
				+    fi
			
 
				+
			
 
				+    local current_pid
			
 
				+    current_pid=$(cat "$PID_FILE" 2>/dev/null)
			
 
				+    if [ -z "$expected_pid" ] || [ "$current_pid" = "$expected_pid" ]; then
			
 
				+        rm -f "$PID_FILE"
			
 
				+    fi
			
 
				+}
			
 
				 
			
 
				 # ========================================
			
 
				 # 函数：激活conda环境
			
@@ -34,16 +52,42 @@ activate_conda() {
 
				     fi
			
 
				 }
			
 
				 
			
 
				+# ========================================
			
 
				+# 函数：检查PID是否为当前服务进程
			
 
				+# ========================================
			
 
				+is_expected_process() {
			
 
				+    local pid="$1"
			
 
				+    if [ -z "$pid" ]; then
			
 
				+        return 1
			
 
				+    fi
			
 
				+
			
 
				+    if ! ps -p "$pid" > /dev/null 2>&1; then
			
 
				+        return 1
			
 
				+    fi
			
 
				+
			
 
				+    local command
			
 
				+    command=$(ps -p "$pid" -o command= 2>/dev/null)
			
 
				+    case "$command" in
			
 
				+        *"run_with_auto_training.py"*)
			
 
				+            return 0
			
 
				+            ;;
			
 
				+        *)
			
 
				+            return 1
			
 
				+            ;;
			
 
				+    esac
			
 
				+}
			
 
				+
			
 
				 # ========================================
			
 
				 # 函数：检查进程是否运行
			
 
				 # ========================================
			
 
				 is_running() {
			
 
				     if [ -f "$PID_FILE" ]; then
			
 
				         PID=$(cat "$PID_FILE")
			
 
				-        # 检查进程是否存在
			
 
				-        if ps -p "$PID" > /dev/null 2>&1; then
			
 
				+        # 不仅检查PID是否存在，还要确认是本服务进程，避免PID复用误判
			
 
				+        if is_expected_process "$PID"; then
			
 
				             return 0  # 运行中
			
 
				         fi
			
 
				+        cleanup_pid_file_if_matches "$PID"
			
 
				     fi
			
 
				     return 1  # 未运行
			
 
				 }
			
@@ -59,6 +103,43 @@ get_pid() {
 
				     fi
			
 
				 }
			
 
				 
			
 
				+# ========================================
			
 
				+# 函数：等待服务稳定启动
			
 
				+# ========================================
			
 
				+wait_for_service_ready() {
			
 
				+    local pid="$1"
			
 
				+    local elapsed=0
			
 
				+
			
 
				+    while [ "$elapsed" -lt "$STARTUP_TIMEOUT" ]; do
			
 
				+        if ! is_expected_process "$pid"; then
			
 
				+            return 1
			
 
				+        fi
			
 
				+        sleep "$HEALTH_CHECK_INTERVAL"
			
 
				+        elapsed=$((elapsed + HEALTH_CHECK_INTERVAL))
			
 
				+    done
			
 
				+
			
 
				+    return 0
			
 
				+}
			
 
				+
			
 
				+# ========================================
			
 
				+# 函数：后台监控PID，进程退出后自动清理PID文件
			
 
				+# ========================================
			
 
				+spawn_pid_watcher() {
			
 
				+    local watched_pid="$1"
			
 
				+    nohup bash -c '
			
 
				+        watched_pid="$1"
			
 
				+        pid_file="$2"
			
 
				+
			
 
				+        while ps -p "$watched_pid" > /dev/null 2>&1; do
			
 
				+            sleep 2
			
 
				+        done
			
 
				+
			
 
				+        if [ -f "$pid_file" ] && [ "$(cat "$pid_file" 2>/dev/null)" = "$watched_pid" ]; then
			
 
				+            rm -f "$pid_file"
			
 
				+        fi
			
 
				+    ' _ "$watched_pid" "$PID_FILE" > /dev/null 2>&1 &
			
 
				+}
			
 
				+
			
 
				 # ========================================
			
 
				 # 函数：启动服务
			
 
				 # ========================================
			
@@ -78,16 +159,17 @@ start_service() {
 
				         echo "错误: run_with_auto_training.py 不存在"
			
 
				         exit 1
			
 
				     fi
			
 
				-    
			
 
				-    if [ ! -f "config/pickup_config.db" ]; then
			
 
				-        echo "错误: config/pickup_config.db 不存在"
			
 
				-        echo "请先运行迁移脚本: python tool/migrate_yaml_to_db.py"
			
 
				+
			
 
				+    # 检查配置文件（YAML 或 DB 至少存在一个）
			
 
				+    if [ ! -f "config/pickup_config.db" ] && [ ! -f "config/rtsp_config.yaml" ]; then
			
 
				+        echo "错误: 找不到配置文件"
			
 
				+        echo "需要 config/pickup_config.db 或 config/rtsp_config.yaml 之一"
			
 
				         exit 1
			
 
				     fi
			
 
				-    
			
 
				+
			
 
				     # 创建日志目录
			
 
				     mkdir -p logs
			
 
				-    
			
 
				+
			
 
				     # 启动服务
			
 
				     echo "后台运行模式..."
			
 
				     # stdout/stderr 丢弃，所有日志由 RotatingFileHandler 写入 logs/system.log
			
@@ -95,9 +177,9 @@ start_service() {
 
				     PID=$!
			
 
				     echo $PID > "$PID_FILE"
			
 
				     
			
 
				-    # 等待1秒检查是否正常启动
			
 
				-    sleep 1
			
 
				-    if ps -p "$PID" > /dev/null 2>&1; then
			
 
				+    # 等待一段观察窗口，避免“刚启动1秒就退出”仍被误判为成功
			
 
				+    if wait_for_service_ready "$PID"; then
			
 
				+        spawn_pid_watcher "$PID"
			
 
				         echo "服务启动成功, PID: $PID"
			
 
				         echo "日志文件: logs/system.log"
			
 
				         echo ""
			
@@ -106,7 +188,7 @@ start_service() {
 
				         echo "重启服务: ./start.sh restart"
			
 
				     else
			
 
				         echo "服务启动失败，请检查日志: logs/system.log"
			
 
				-        rm -f "$PID_FILE"
			
 
				+        cleanup_pid_file_if_matches "$PID"
			
 
				         return 1
			
 
				     fi
			
 
				 }
			
@@ -117,7 +199,7 @@ start_service() {
 
				 stop_service() {
			
 
				     if ! is_running; then
			
 
				         echo "服务未运行"
			
 
				-        rm -f "$PID_FILE"
			
 
				+        cleanup_pid_file_if_matches ""
			
 
				         return 0
			
 
				     fi
			
 
				     
			
@@ -140,7 +222,7 @@ stop_service() {
 
				         echo "等待进程结束... ($WAIT_COUNT/10)"
			
 
				     done
			
 
				     
			
 
				-    rm -f "$PID_FILE"
			
 
				+    cleanup_pid_file_if_matches "$PID"
			
 
				     echo "服务已停止"
			
 
				 }
			
 
				 
			
@@ -185,7 +267,7 @@ show_status() {
 
				         echo "状态: 未运行"
			
 
				         if [ -f "$PID_FILE" ]; then
			
 
				             echo "注意: PID文件存在但进程已停止，可能是异常退出"
			
 
				-            rm -f "$PID_FILE"
			
 
				+            cleanup_pid_file_if_matches ""
			
 
				         fi
			
 
				     fi
			
 
				 }
			
@@ -209,16 +291,17 @@ run_foreground() {
 
				         echo "错误: run_with_auto_training.py 不存在"
			
 
				         exit 1
			
 
				     fi
			
 
				-    
			
 
				-    if [ ! -f "config/pickup_config.db" ]; then
			
 
				-        echo "错误: config/pickup_config.db 不存在"
			
 
				-        echo "请先运行迁移脚本: python tool/migrate_yaml_to_db.py"
			
 
				+
			
 
				+    # 检查配置文件（YAML 或 DB 至少存在一个）
			
 
				+    if [ ! -f "config/pickup_config.db" ] && [ ! -f "config/rtsp_config.yaml" ]; then
			
 
				+        echo "错误: 找不到配置文件"
			
 
				+        echo "需要 config/pickup_config.db 或 config/rtsp_config.yaml 之一"
			
 
				         exit 1
			
 
				     fi
			
 
				-    
			
 
				+
			
 
				     # 创建日志目录
			
 
				     mkdir -p logs
			
 
				-    
			
 
				+
			
 
				     echo "前台运行模式..."
			
 
				     python run_with_auto_training.py
			
 
				 }
			
--- a/tool/migrate_yaml_to_db.py
+++ b/tool/migrate_yaml_to_db.py
@@ -125,7 +125,7 @@ def migrate_yaml_to_db(yaml_path: str, db_path: str = None, force: bool = False)
 
				     # ========================================
			
 
				     # 2. 迁移系统级配置
			
 
				     # ========================================
			
 
				-    system_sections = ['audio', 'prediction', 'push_notification', 'scada_api', 'human_detection']
			
 
				+    system_sections = ['audio', 'prediction', 'push_notification', 'scada_api', 'human_detection', 'auto_training']
			
 
				 
			
 
				     for section in system_sections:
			
 
				         section_data = config.get(section, {})