data_cleanup.py 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. data_cleanup.py
  5. ---------------
  6. 每日数据清理任务
  7. 功能:
  8. 1. 删除过期的正常音频(超过keep_normal_days天)
  9. 2. 异常音频永久保留(keep_anomaly_days=-1时不删除)
  10. 3. 删除过期的日志文件
  11. """
  12. import sys
  13. import logging
  14. import shutil
  15. from pathlib import Path
  16. from datetime import datetime, timedelta
  17. import yaml
  18. logger = logging.getLogger('DataCleanup')
  19. class DataCleaner:
  20. """数据清理器"""
  21. def __init__(self, config_file: Path):
  22. """初始化清理器"""
  23. self.config_file = config_file
  24. self.config = self._load_config()
  25. # 路径配置
  26. self.deploy_root = Path(__file__).parent.parent
  27. self.audio_root = self.deploy_root / "data" / "audio"
  28. self.anomaly_root = self.deploy_root / "data" / "anomaly_detected"
  29. self.backup_dir = self.deploy_root / "models" / "backups"
  30. self.logs_dir = self.deploy_root / "logs"
  31. def _load_config(self):
  32. """加载配置"""
  33. with open(self.config_file, 'r', encoding='utf-8') as f:
  34. return yaml.safe_load(f)
  35. def cleanup_old_normal_audio(self):
  36. """清理过期的正常音频"""
  37. keep_days = self.config['auto_training']['data']['keep_normal_days']
  38. cutoff_date = (datetime.now() - timedelta(days=keep_days)).strftime('%Y%m%d')
  39. logger.info(f"清理 {cutoff_date} 之前的正常音频(保留{keep_days}天)")
  40. total_deleted = 0
  41. total_size = 0
  42. if not self.audio_root.exists():
  43. return
  44. # deploy_pickup使用设备目录结构: audio/{device_code}/{date}/*.wav
  45. for device_dir in self.audio_root.iterdir():
  46. if not device_dir.is_dir():
  47. continue
  48. for date_dir in device_dir.iterdir():
  49. if not date_dir.is_dir() or date_dir.name == "current":
  50. continue
  51. # 检查日期
  52. if date_dir.name < cutoff_date:
  53. if date_dir.exists():
  54. for f in date_dir.rglob("*.wav"):
  55. total_size += f.stat().st_size
  56. total_deleted += 1
  57. shutil.rmtree(date_dir)
  58. logger.info(f"已删除: {device_dir.name}/{date_dir.name}")
  59. logger.info(f"正常音频清理完成: 删除 {total_deleted} 个文件, 释放 {total_size / 1e6:.2f} MB")
  60. def cleanup_old_anomaly_audio(self):
  61. """清理过期的异常音频(-1表示永久保留)"""
  62. keep_days = self.config['auto_training']['data']['keep_anomaly_days']
  63. if keep_days < 0:
  64. logger.info("异常音频配置为永久保留,跳过清理")
  65. return
  66. cutoff_date = (datetime.now() - timedelta(days=keep_days)).strftime('%Y%m%d')
  67. logger.info(f"清理 {cutoff_date} 之前的异常音频(保留{keep_days}天)")
  68. total_deleted = 0
  69. total_size = 0
  70. if not self.anomaly_root.exists():
  71. return
  72. for date_dir in self.anomaly_root.iterdir():
  73. if not date_dir.is_dir():
  74. continue
  75. if date_dir.name < cutoff_date:
  76. for f in date_dir.glob("*.wav"):
  77. total_size += f.stat().st_size
  78. total_deleted += 1
  79. shutil.rmtree(date_dir)
  80. logger.info(f"已删除: anomaly/{date_dir.name}")
  81. logger.info(f"异常音频清理完成: 删除 {total_deleted} 个文件, 释放 {total_size / 1e6:.2f} MB")
  82. def cleanup_old_logs(self):
  83. """清理过期的日志文件"""
  84. logs_config = self.config['auto_training'].get('logs', {})
  85. keep_days = logs_config.get('keep_days', 30)
  86. cutoff_date = datetime.now() - timedelta(days=keep_days)
  87. logger.info(f"清理 {keep_days} 天前的日志文件")
  88. total_deleted = 0
  89. total_size = 0
  90. if not self.logs_dir.exists():
  91. return
  92. for log_file in self.logs_dir.glob("*.log"):
  93. try:
  94. mtime = datetime.fromtimestamp(log_file.stat().st_mtime)
  95. if mtime < cutoff_date:
  96. total_size += log_file.stat().st_size
  97. log_file.unlink()
  98. total_deleted += 1
  99. logger.info(f"已删除日志: {log_file.name}")
  100. except Exception as e:
  101. logger.warning(f"删除日志失败: {log_file.name} | {e}")
  102. logger.info(f"日志清理完成: 删除 {total_deleted} 个文件, 释放 {total_size / 1e6:.2f} MB")
  103. def run_cleanup(self):
  104. """执行清理(主入口)"""
  105. try:
  106. logger.info("=" * 70)
  107. logger.info("开始每日数据清理")
  108. logger.info("=" * 70)
  109. self.cleanup_old_normal_audio()
  110. self.cleanup_old_anomaly_audio()
  111. self.cleanup_old_logs()
  112. logger.info("数据清理完成")
  113. return True
  114. except Exception as e:
  115. logger.error(f"数据清理失败: {e}", exc_info=True)
  116. return False
  117. def main():
  118. """命令行入口"""
  119. logging.basicConfig(
  120. level=logging.INFO,
  121. format='%(asctime)s | %(levelname)-8s | %(message)s',
  122. datefmt='%Y-%m-%d %H:%M:%S'
  123. )
  124. config_file = Path(__file__).parent.parent / "config" / "auto_training.yaml"
  125. cleaner = DataCleaner(config_file)
  126. success = cleaner.run_cleanup()
  127. sys.exit(0 if success else 1)
  128. if __name__ == "__main__":
  129. main()