start.sh 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450
  1. #!/bin/bash
  2. # ========================================
  3. # 拾音器异响检测系统启动脚本
  4. # ========================================
  5. #
  6. # 使用方法:
  7. # ./start.sh # 前台运行
  8. # ./start.sh -d # 后台运行
  9. # ./start.sh --daemon # 后台运行
  10. # ./start.sh stop # 停止服务
  11. # ./start.sh restart # 重启服务
  12. # ./start.sh status # 查看状态
  13. #
  14. # 日志文件:
  15. # 前台运行:直接输出到控制台
  16. # 后台运行:logs/system.log(RotatingFileHandler 自动轮转)
  17. #
  18. # 切换到脚本所在目录
  19. cd "$(dirname "$0")"
  20. # PID文件路径
  21. PID_FILE="logs/pid.txt"
  22. STARTUP_TIMEOUT=5
  23. HEALTH_CHECK_INTERVAL=1
  24. # ========================================
  25. # 函数:按PID精确清理PID文件
  26. # ========================================
  27. cleanup_pid_file_if_matches() {
  28. local expected_pid="$1"
  29. if [ ! -f "$PID_FILE" ]; then
  30. return 0
  31. fi
  32. local current_pid
  33. current_pid=$(cat "$PID_FILE" 2>/dev/null)
  34. if [ -z "$expected_pid" ] || [ "$current_pid" = "$expected_pid" ]; then
  35. rm -f "$PID_FILE"
  36. fi
  37. }
  38. # ========================================
  39. # 函数:激活conda环境
  40. # ========================================
  41. activate_conda() {
  42. if command -v conda &> /dev/null; then
  43. # 激活 conda 环境
  44. source $(conda info --base)/etc/profile.d/conda.sh
  45. conda activate pump_asd
  46. echo "已激活 conda 环境: pump_asd"
  47. fi
  48. }
  49. # ========================================
  50. # 函数:检查PID是否为当前服务进程
  51. # ========================================
  52. is_expected_process() {
  53. local pid="$1"
  54. if [ -z "$pid" ]; then
  55. return 1
  56. fi
  57. if ! ps -p "$pid" > /dev/null 2>&1; then
  58. return 1
  59. fi
  60. local command
  61. command=$(ps -p "$pid" -o command= 2>/dev/null)
  62. case "$command" in
  63. *"run_with_auto_training.py"*)
  64. return 0
  65. ;;
  66. *)
  67. return 1
  68. ;;
  69. esac
  70. }
  71. # ========================================
  72. # 函数:检查进程是否运行
  73. # ========================================
  74. is_running() {
  75. if [ -f "$PID_FILE" ]; then
  76. PID=$(cat "$PID_FILE")
  77. # 不仅检查PID是否存在,还要确认是本服务进程,避免PID复用误判
  78. if is_expected_process "$PID"; then
  79. return 0 # 运行中
  80. fi
  81. cleanup_pid_file_if_matches "$PID"
  82. fi
  83. return 1 # 未运行
  84. }
  85. # ========================================
  86. # 函数:获取当前PID
  87. # ========================================
  88. get_pid() {
  89. if [ -f "$PID_FILE" ]; then
  90. cat "$PID_FILE"
  91. else
  92. echo ""
  93. fi
  94. }
  95. # ========================================
  96. # 函数:等待服务稳定启动
  97. # ========================================
  98. wait_for_service_ready() {
  99. local pid="$1"
  100. local elapsed=0
  101. while [ "$elapsed" -lt "$STARTUP_TIMEOUT" ]; do
  102. if ! is_expected_process "$pid"; then
  103. return 1
  104. fi
  105. sleep "$HEALTH_CHECK_INTERVAL"
  106. elapsed=$((elapsed + HEALTH_CHECK_INTERVAL))
  107. done
  108. return 0
  109. }
  110. # ========================================
  111. # 函数:后台监控PID,进程退出后自动清理PID文件
  112. # ========================================
  113. spawn_pid_watcher() {
  114. local watched_pid="$1"
  115. nohup bash -c '
  116. watched_pid="$1"
  117. pid_file="$2"
  118. while ps -p "$watched_pid" > /dev/null 2>&1; do
  119. sleep 2
  120. done
  121. if [ -f "$pid_file" ] && [ "$(cat "$pid_file" 2>/dev/null)" = "$watched_pid" ]; then
  122. rm -f "$pid_file"
  123. fi
  124. ' _ "$watched_pid" "$PID_FILE" > /dev/null 2>&1 &
  125. }
  126. # ========================================
  127. # 函数:启动服务
  128. # ========================================
  129. start_service() {
  130. # 检查是否已经运行
  131. if is_running; then
  132. echo "服务已在运行中, PID: $(get_pid)"
  133. echo "如需重启,请使用: ./start.sh restart"
  134. return 1
  135. fi
  136. # 激活conda环境
  137. activate_conda
  138. # 检查必要文件
  139. if [ ! -f "run_with_auto_training.py" ]; then
  140. echo "错误: run_with_auto_training.py 不存在"
  141. exit 1
  142. fi
  143. # 检查配置文件(YAML 或 DB 至少存在一个)
  144. if [ ! -f "config/pickup_config.db" ] && [ ! -f "config/rtsp_config.yaml" ]; then
  145. echo "错误: 找不到配置文件"
  146. echo "需要 config/pickup_config.db 或 config/rtsp_config.yaml 之一"
  147. exit 1
  148. fi
  149. # 创建日志目录
  150. mkdir -p logs
  151. # 启动服务
  152. echo "后台运行模式..."
  153. # stdout/stderr 丢弃,所有日志由 RotatingFileHandler 写入 logs/system.log
  154. nohup python run_with_auto_training.py > /dev/null 2>&1 &
  155. PID=$!
  156. echo $PID > "$PID_FILE"
  157. # 等待一段观察窗口,避免“刚启动1秒就退出”仍被误判为成功
  158. if wait_for_service_ready "$PID"; then
  159. spawn_pid_watcher "$PID"
  160. echo "服务启动成功, PID: $PID"
  161. echo "日志文件: logs/system.log"
  162. # 自动启动辅助进程(根据配置开关决定)
  163. start_auxiliary_workers
  164. echo ""
  165. echo "查看日志: tail -f logs/system.log"
  166. echo "停止服务: ./start.sh stop"
  167. echo "重启服务: ./start.sh restart"
  168. else
  169. echo "服务启动失败,请检查日志: logs/system.log"
  170. cleanup_pid_file_if_matches "$PID"
  171. return 1
  172. fi
  173. }
  174. # ========================================
  175. # 函数:启动辅助进程(上传Worker + 模型同步)
  176. # ========================================
  177. UPLOAD_PID_FILE="logs/upload_worker.pid"
  178. MODEL_SYNC_PID_FILE="logs/model_sync.pid"
  179. start_auxiliary_workers() {
  180. # 读取 YAML 配置中的开关,决定是否启动辅助进程
  181. local yaml_file="config/rtsp_config.yaml"
  182. if [ ! -f "$yaml_file" ]; then
  183. return 0
  184. fi
  185. # 检查 cloud_sync.enabled
  186. if grep -A1 'cloud_sync:' "$yaml_file" | grep -q 'enabled: [Tt]rue'; then
  187. if [ -f "run_upload_worker.py" ]; then
  188. nohup nice -n 19 python run_upload_worker.py > /dev/null 2>&1 &
  189. echo $! > "$UPLOAD_PID_FILE"
  190. echo "上传Worker已启动, PID: $!"
  191. fi
  192. fi
  193. # 检查 model_sync.enabled
  194. if grep -A1 'model_sync:' "$yaml_file" | grep -q 'enabled: [Tt]rue'; then
  195. if [ -f "run_model_sync.py" ]; then
  196. nohup nice -n 19 python run_model_sync.py > /dev/null 2>&1 &
  197. echo $! > "$MODEL_SYNC_PID_FILE"
  198. echo "模型同步已启动, PID: $!"
  199. fi
  200. fi
  201. }
  202. stop_auxiliary_workers() {
  203. # 停止上传Worker
  204. if [ -f "$UPLOAD_PID_FILE" ]; then
  205. local upid
  206. upid=$(cat "$UPLOAD_PID_FILE" 2>/dev/null)
  207. if [ -n "$upid" ] && ps -p "$upid" > /dev/null 2>&1; then
  208. kill "$upid" 2>/dev/null
  209. echo "上传Worker已停止, PID: $upid"
  210. fi
  211. rm -f "$UPLOAD_PID_FILE"
  212. fi
  213. # 停止模型同步
  214. if [ -f "$MODEL_SYNC_PID_FILE" ]; then
  215. local mpid
  216. mpid=$(cat "$MODEL_SYNC_PID_FILE" 2>/dev/null)
  217. if [ -n "$mpid" ] && ps -p "$mpid" > /dev/null 2>&1; then
  218. kill "$mpid" 2>/dev/null
  219. echo "模型同步已停止, PID: $mpid"
  220. fi
  221. rm -f "$MODEL_SYNC_PID_FILE"
  222. fi
  223. }
  224. # ========================================
  225. # 函数:停止服务
  226. # ========================================
  227. stop_service() {
  228. # 先停辅助进程
  229. stop_auxiliary_workers
  230. if ! is_running; then
  231. echo "主服务未运行"
  232. cleanup_pid_file_if_matches ""
  233. return 0
  234. fi
  235. PID=$(get_pid)
  236. echo "正在停止主服务, PID: $PID"
  237. # 发送 SIGTERM 信号,优雅停止
  238. kill "$PID" 2>/dev/null
  239. # 等待进程结束(最多等待10秒)
  240. WAIT_COUNT=0
  241. while ps -p "$PID" > /dev/null 2>&1; do
  242. if [ $WAIT_COUNT -ge 10 ]; then
  243. echo "进程未响应,强制终止..."
  244. kill -9 "$PID" 2>/dev/null
  245. break
  246. fi
  247. sleep 1
  248. WAIT_COUNT=$((WAIT_COUNT + 1))
  249. echo "等待进程结束... ($WAIT_COUNT/10)"
  250. done
  251. cleanup_pid_file_if_matches "$PID"
  252. echo "服务已停止"
  253. }
  254. # ========================================
  255. # 函数:重启服务
  256. # ========================================
  257. restart_service() {
  258. echo "=========================================="
  259. echo "重启拾音器异响检测服务"
  260. echo "=========================================="
  261. stop_service
  262. echo ""
  263. sleep 2 # 等待2秒确保资源完全释放
  264. start_service
  265. }
  266. # ========================================
  267. # 函数:查看服务状态
  268. # ========================================
  269. show_status() {
  270. echo "=========================================="
  271. echo "拾音器异响检测服务状态"
  272. echo "=========================================="
  273. if is_running; then
  274. PID=$(get_pid)
  275. echo "状态: 运行中"
  276. echo "PID: $PID"
  277. echo ""
  278. # 显示进程信息
  279. echo "进程详情:"
  280. ps -p "$PID" -o pid,ppid,user,%cpu,%mem,etime,command | head -2
  281. echo ""
  282. # 显示最近日志
  283. echo "最近10行日志:"
  284. echo "------------------------------------------"
  285. tail -10 logs/system.log 2>/dev/null || echo "(无日志)"
  286. else
  287. echo "主服务状态: 未运行"
  288. if [ -f "$PID_FILE" ]; then
  289. echo "注意: PID文件存在但进程已停止,可能是异常退出"
  290. cleanup_pid_file_if_matches ""
  291. fi
  292. fi
  293. # 辅助进程状态
  294. echo ""
  295. echo "辅助进程:"
  296. if [ -f "$UPLOAD_PID_FILE" ]; then
  297. local upid
  298. upid=$(cat "$UPLOAD_PID_FILE" 2>/dev/null)
  299. if [ -n "$upid" ] && ps -p "$upid" > /dev/null 2>&1; then
  300. echo " 上传Worker: 运行中 (PID: $upid)"
  301. else
  302. echo " 上传Worker: 已停止"
  303. fi
  304. else
  305. echo " 上传Worker: 未启动"
  306. fi
  307. if [ -f "$MODEL_SYNC_PID_FILE" ]; then
  308. local mpid
  309. mpid=$(cat "$MODEL_SYNC_PID_FILE" 2>/dev/null)
  310. if [ -n "$mpid" ] && ps -p "$mpid" > /dev/null 2>&1; then
  311. echo " 模型同步: 运行中 (PID: $mpid)"
  312. else
  313. echo " 模型同步: 已停止"
  314. fi
  315. else
  316. echo " 模型同步: 未启动"
  317. fi
  318. }
  319. # ========================================
  320. # 函数:前台运行
  321. # ========================================
  322. run_foreground() {
  323. # 检查是否已经运行
  324. if is_running; then
  325. echo "服务已在后台运行中, PID: $(get_pid)"
  326. echo "请先停止: ./start.sh stop"
  327. return 1
  328. fi
  329. # 激活conda环境
  330. activate_conda
  331. # 检查必要文件
  332. if [ ! -f "run_with_auto_training.py" ]; then
  333. echo "错误: run_with_auto_training.py 不存在"
  334. exit 1
  335. fi
  336. # 检查配置文件(YAML 或 DB 至少存在一个)
  337. if [ ! -f "config/pickup_config.db" ] && [ ! -f "config/rtsp_config.yaml" ]; then
  338. echo "错误: 找不到配置文件"
  339. echo "需要 config/pickup_config.db 或 config/rtsp_config.yaml 之一"
  340. exit 1
  341. fi
  342. # 创建日志目录
  343. mkdir -p logs
  344. echo "前台运行模式..."
  345. python run_with_auto_training.py
  346. }
  347. # ========================================
  348. # 函数:显示帮助
  349. # ========================================
  350. show_help() {
  351. echo "拾音器异响检测系统 - 启动脚本"
  352. echo ""
  353. echo "用法: ./start.sh [命令]"
  354. echo ""
  355. echo "命令:"
  356. echo " (无参数) 前台运行"
  357. echo " -d, --daemon 后台运行"
  358. echo " start 后台启动服务"
  359. echo " stop 停止服务"
  360. echo " restart 重启服务"
  361. echo " status 查看服务状态"
  362. echo " help 显示帮助信息"
  363. echo ""
  364. echo "示例:"
  365. echo " ./start.sh -d # 后台启动"
  366. echo " ./start.sh restart # 重启服务"
  367. echo " ./start.sh status # 查看状态"
  368. }
  369. # ========================================
  370. # 主逻辑
  371. # ========================================
  372. case "$1" in
  373. stop)
  374. stop_service
  375. ;;
  376. restart)
  377. restart_service
  378. ;;
  379. status)
  380. show_status
  381. ;;
  382. start|-d|--daemon)
  383. start_service
  384. ;;
  385. help|--help|-h)
  386. show_help
  387. ;;
  388. "")
  389. run_foreground
  390. ;;
  391. *)
  392. echo "未知命令: $1"
  393. echo ""
  394. show_help
  395. exit 1
  396. ;;
  397. esac