get_items_distribution_from_database.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. import sys
  2. sys.path.append("..")
  3. from Database.database_ import Database, DatabaseParam
  4. import config
  5. import os
  6. import csv
  7. from utils.tools import fmt_date
  8. def add_stat_info():
  9. # 读取字段文件
  10. file_path = os.path.join(config.ALL_ITEMS_FILE_DIR, config.ALL_ITEMS_FILE_NAME)
  11. if not os.path.exists(file_path):
  12. raise RuntimeError('文件不存在 ', file_path)
  13. # 格式化起始结束日期
  14. start_date, end_date = fmt_date(
  15. start_year=config.CHECK_YEAR_START, end_year=config.CHECK_YEAR_END,start_month=config.CHECK_MONTH_START,end_month=config.CHECK_MONTH_END,start_day=config.CHECK_DAY_START,end_day=config.CHECK_DAY_END,
  16. start_hour=config.CHECK_HOUR_START, end_hour=config.CHECK_HOUR_END, start_minute=config.CHECK_MINUTE_START, end_minute=config.CHECK_MINUTE_END, start_second=config.CHECK_SECONDS_START, end_second=config.CHECK_SECONDS_END)
  17. print(f'统计起始日期:{start_date};结束日期:{end_date}')
  18. # 连接数据库
  19. db_param = DatabaseParam(
  20. db_host=config.DB_HOST,
  21. db_user=config.DB_USER,
  22. db_password=config.DB_PASSWORD,
  23. db_name=config.DB_NAME,
  24. db_port=config.DB_PORT)
  25. # 数据库操作应在内部
  26. with Database(db_param) as db: # 连接数据库
  27. with open(file_path, 'r', encoding='utf-8') as file_handler:
  28. csv_reader = csv.reader(file_handler) # all_items文件读取器
  29. # 先读一行头
  30. if config.INCLUDE_HEAD:
  31. try:
  32. head = next(csv_reader)
  33. except StopIteration:
  34. print(f'{file_path} 文件为空')
  35. # 读标签
  36. try:
  37. label = next(csv_reader)
  38. except StopIteration:
  39. print(f'{file_path} 文件为空')
  40. # 添加字段
  41. label += ['记录数', '最小时间', '最大时间']
  42. stat_file_path = os.path.join(config.STATISTICS_FILE_DIR,config.STATISTICS_FILE_NAME)
  43. if os.path.exists(stat_file_path):
  44. print('清理历史文件:',stat_file_path)
  45. os.remove(stat_file_path)
  46. with open(stat_file_path, 'w', encoding='utf-8', newline='') as file_handler_stat:
  47. # 写入头
  48. csv.writer(file_handler_stat).writerow(label)
  49. # 获取数据库所有的字段
  50. sql = f"""SELECT DISTINCT item_name FROM {config.DB_SHEET_NAME} WHERE project_id = {config.PROJECT_ID}"""
  51. db.db_cursor.execute(sql)
  52. db_items = [item[0].strip() for item in db.db_cursor.fetchall()]
  53. # 逐行查询数据库并写入文件
  54. print('正在统计... ', end='')
  55. for num, row in enumerate(csv_reader): # all_items文件读取器
  56. data_code = row[1].strip() # 表格中的数据编码
  57. if not data_code in db_items: continue # 如果字段不在数据库,那么就直接跳过
  58. # 统计数量
  59. sql = f"""SELECT COUNT(*) FROM {config.DB_SHEET_NAME} WHERE item_name = '{data_code}' AND project_id = '{config.PROJECT_ID}' AND h_time >= '{start_date}' AND h_time <= '{end_date}'"""
  60. db.db_cursor.execute(sql)
  61. query_count = db.db_cursor.fetchone()[0]
  62. row.append(query_count)
  63. # 统计最小时间
  64. sql = f"""SELECT MIN(h_time) FROM {config.DB_SHEET_NAME} WHERE item_name = '{data_code}' AND project_id = '{config.PROJECT_ID}' AND h_time >= '{start_date}' AND h_time <= '{end_date}'"""
  65. db.db_cursor.execute(sql)
  66. query_min_date = db.db_cursor.fetchone()[0]
  67. row.append(query_min_date)
  68. # 统计最大时间
  69. sql = f"""SELECT MAX(h_time) FROM {config.DB_SHEET_NAME} WHERE item_name = '{data_code}' AND project_id = '{config.PROJECT_ID}' AND h_time >= '{start_date}' AND h_time <= '{end_date}'"""
  70. db.db_cursor.execute(sql)
  71. query_max_date = db.db_cursor.fetchone()[0]
  72. row.append(query_max_date)
  73. csv.writer(file_handler_stat).writerow(row)
  74. print('.', end='')
  75. print('\n统计完成,文件保存至:',stat_file_path)
  76. if __name__ == '__main__':
  77. # 从数据库中统计各字段数据量
  78. add_stat_info()