get_all_items.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231
  1. import sys
  2. sys.path.append("..")
  3. import config
  4. import os
  5. import requests
  6. import time
  7. import csv
  8. from datetime import datetime
  9. import shutil
  10. import json
  11. class DataHelper:
  12. """采用爬虫方式,动态获取smart-water网站某项目的各传感器数据库标签和对应的中文名称
  13. 项目代码 :92, 锡山中荷污水再生水项目
  14. """
  15. def __init__(self,
  16. project_id = config.PROJECT_ID,
  17. username = config.USERNAME,
  18. password = config.PASSWORD,
  19. dep_id = config.DEP_ID,
  20. base_url = config.BASE_URL,
  21. out_path = config.ALL_ITEMS_FILE_DIR,
  22. out_file_name = config.ALL_ITEMS_FILE_NAME,
  23. save_path_final = config.ALL_ITEMS_FILE_PATH,
  24. max_pages = config.MAX_PAGES,
  25. page_size = config.PAGE_SIZE,
  26. include_head = config.INCLUDE_HEAD
  27. ):
  28. print('开始获取项目所有的数据编号...')
  29. self.username = username
  30. self.password = password
  31. self.dep_id = dep_id
  32. self.project_id = project_id
  33. self.BASE_URL = base_url #smart-water 网站首页
  34. self.out_path = out_path
  35. self.out_file_name = out_file_name
  36. self.max_pages = int(max_pages)
  37. self.page_size = int(page_size)
  38. self.token = None
  39. self.include_head = include_head
  40. self.save_path_tem = os.path.join(self.out_path,'tem_' + self.out_file_name)
  41. self.save_path_final = save_path_final
  42. self.start_time = time.time()
  43. self.end_time = self.start_time
  44. # 清理上一次执行的结果文件
  45. if os.path.exists(self.save_path_tem) or os.path.exists(self.save_path_final):
  46. print(f'清理缓存文件...')
  47. if os.path.exists(self.save_path_final):
  48. os.remove(self.save_path_final)
  49. print(f'清理 {self.save_path_final}')
  50. if os.path.exists(self.save_path_tem):
  51. os.remove(self.save_path_tem)
  52. print(f'清理 {self.save_path_tem}')
  53. def login_smart_water(self):
  54. login_url = f"{self.BASE_URL}/api/v2/user/login" # smart-water 登陆页面
  55. login_headers = { # 登陆请求头
  56. "Accept": "application/json",
  57. "Content-Type": "application/json;charset=utf-8",
  58. "Cookie": "lang=zh-CN",
  59. "Origin": self.BASE_URL,
  60. "Referer": f"{self.BASE_URL}/",
  61. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36"
  62. }
  63. login_params = { # 请求参数
  64. "username": self.username,
  65. "password": self.password,
  66. "type": "account",
  67. "DepId": self.dep_id # 部门ID
  68. }
  69. try:
  70. # 尝试登陆
  71. response = requests.post(login_url, json=login_params, headers=login_headers)
  72. response.raise_for_status() # 检查HTTP错误
  73. data = response.json()
  74. token = data['data']['token']
  75. self.token = token if token != '' else None
  76. if self.token is not None:
  77. print(f'{self.username} 登陆成功! \n获取token {self.token}')
  78. else:
  79. print(f'{self.username} 登陆失败!')
  80. except requests.exceptions.HTTPError as errh:
  81. print("HTTP Error:", errh)
  82. except requests.exceptions.ConnectionError as errc:
  83. print("Error Connecting:", errc)
  84. except requests.exceptions.Timeout as errt:
  85. print("Timeout Error:", errt)
  86. except requests.exceptions.RequestException as err:
  87. print("OOps: Something Else", err)
  88. return None
  89. @staticmethod
  90. def write_file(handler, data: list):
  91. write_cnt = 0
  92. for label in data:
  93. # '名称', '编码', '单位' , '精度', '是否枚举', '设备号'
  94. csv.writer(handler).writerow([label['ItemAlias'], label['ItemName'], label['ItemUnit'], label['ItemPrecise'], int(label['IsBool']), label['DeviceCode']])
  95. write_cnt += 1
  96. return write_cnt
  97. @staticmethod
  98. def format_chinese_datetime(dt=None):
  99. """格式化日期时间为中文格式"""
  100. if dt is None:
  101. dt = datetime.now()
  102. # 提取日期时间各部分
  103. year = dt.year
  104. month = dt.month
  105. day = dt.day
  106. hour = dt.hour
  107. minute = dt.minute
  108. # 格式化为中文
  109. return f"{year}年{month}月{day}日 {hour:02d}:{minute:02d}"
  110. def get_all_label(self):
  111. if self.token is None:
  112. self.login_smart_water()
  113. label_url = f"{self.BASE_URL}/api/v1/config/device-realtime-plc-item/list/{self.project_id}" # 数据抓取页面
  114. headers = {
  115. 'Accept': '*/*',
  116. 'Accept-Encoding': 'gzip, deflate',
  117. 'Accept-Language': 'zh-CN,zh;q=0.9',
  118. 'Connection': 'keep-alive',
  119. 'Cookie': 'lang=zh-CN',
  120. 'Host': '120.55.44.4:8900',
  121. 'JWT-TOKEN': self.token,
  122. 'Referer': 'http://120.55.44.4:8900/',
  123. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36'
  124. }
  125. with requests.Session() as session:
  126. session.headers.update(headers)
  127. failed_cnt = 0
  128. # 爬取每个页面
  129. with open(self.save_path_tem, mode='a', encoding='utf-8', newline='') as file_handler:
  130. # 按照'名称', '编码', '单位' , '精度', '设备号' 格式保存数据
  131. print('准备写入数据...')
  132. csv.writer(file_handler).writerow(['名称', '编码', '单位' , '精度', '是否枚举', '设备号'])
  133. pages = 1
  134. total_write_cnt = 0
  135. while pages <= self.max_pages:
  136. try:
  137. params = {
  138. 'currentPage': f'{pages}',
  139. 'pageSize': f'{self.page_size}',
  140. 'ProjectId': self.project_id,
  141. 'time': int(time.time() * 1000)
  142. }
  143. response = session.get(label_url, params=params)
  144. response.raise_for_status()
  145. result = response.json()
  146. if result.get('code') == 603: # token 过期就重新登录一次
  147. self.login_smart_water()
  148. headers['JWT-TOKEN'] = self.token
  149. session.headers.update(headers)
  150. if result.get('code') == 200:
  151. print(f'时间:{params['time']} 页码:{params['currentPage']}, 网页数据获取成功, 写入文件')
  152. label_list = result['data']['list']
  153. total_write_cnt += self.write_file(file_handler, label_list)
  154. pages += 1
  155. except requests.exceptions.HTTPError as errh:
  156. print("HTTP Error:", errh)
  157. failed_cnt += 1
  158. except requests.exceptions.ConnectionError as errc:
  159. print("Error Connecting:", errc)
  160. failed_cnt += 1
  161. except requests.exceptions.Timeout as errt:
  162. print("Timeout Error:", errt)
  163. failed_cnt += 1
  164. except requests.exceptions.RequestException as err:
  165. print("OOps: Something Else", err)
  166. failed_cnt += 1
  167. finally:
  168. if failed_cnt >= 3 :
  169. print('失败次数达到3次, 自动退出!')
  170. break
  171. print(f'数据写入完成,写入网页数量为{pages - 1}页,{total_write_cnt}条数据记录!')
  172. # 写最终文件
  173. self.end_time = time.time()
  174. total_time = round(self.end_time - self.start_time, 2)
  175. current_date = self.format_chinese_datetime()
  176. stat_info = f"# 项目编号: {self.project_id}, 获取日期: {current_date}, 总记录数量: {total_write_cnt}, 总耗时: {total_time}s"
  177. with open(self.save_path_tem, mode='r', encoding='utf-8') as file_handler:
  178. with open(self.save_path_final, mode='w', encoding='utf-8', newline='') as final_file_handler:
  179. if self.include_head: final_file_handler.write(stat_info + '\n')
  180. # 复制临时文件内容到最终文件
  181. shutil.copyfileobj(file_handler, final_file_handler)
  182. os.unlink(self.save_path_tem)
  183. print('all-items文件写入成功:',self.save_path_final)
  184. def get_name_code_transfer(self):
  185. """生成code和name之间的转换文件"""
  186. total_name_to_code = {'name_2_code': {},
  187. 'code_2_name': {},
  188. 'len': 0}
  189. if not os.path.exists(self.save_path_final):
  190. raise RuntimeError('文件不存在:', self.save_path_final)
  191. file_path_out = config.TRANSFER_JSON_NAME
  192. # file_path_out = self.save_path_final[:-4] + '_name_code_transfer.json'
  193. if os.path.exists(file_path_out):
  194. print('清理历史文件:', file_path_out)
  195. os.remove(file_path_out)
  196. with open(self.save_path_final, 'r', encoding='utf-8') as file_handler:
  197. csv_reader = csv.reader(file_handler)
  198. if self.include_head:
  199. try:
  200. next(csv_reader)
  201. except StopIteration:
  202. pass
  203. try:
  204. next(csv_reader)
  205. except StopIteration:
  206. pass
  207. for row in csv_reader:
  208. total_name_to_code.get('name_2_code').update({row[0].strip(): row[1].strip()})
  209. total_name_to_code['len'] += 1
  210. total_name_to_code.get('code_2_name').update({v: k for k, v in total_name_to_code.get('name_2_code').items()})
  211. with open(file_path_out, 'w', encoding="utf-8",newline='') as f:
  212. json.dump(total_name_to_code, f, ensure_ascii=False, indent=4)
  213. print('name-code字典文件写入成功:',file_path_out)
  214. if __name__ == '__main__':
  215. # 从智慧水萝卜网站获取数据库中的数据字段英文编号和中文名称
  216. dh = DataHelper()
  217. dh.get_all_label()
  218. # 生成code-name字典文件
  219. dh.get_name_code_transfer()