Forráskód Böngészése

提交所有代码

jiyuhang 4 hónapja
szülő
commit
334373eeac

+ 334 - 0
intent_description_template.py

@@ -0,0 +1,334 @@
+# 意图描述模板(同类问题应具备较高的相似度和关联度,不同类的问题间应保持较低的相似度和关联度,不符合阈值的统一走搜索分支)
+# 字典键为自编号
+template = {
+    # 水厂问题
+    "1":["查询当前、今天、昨天或过去某个时间段的水厂产水水质数据,如外供水电导率、外供水PH、进水氨氮、进水COD、进水总氮、进水水温、进水总磷等产水水质参数等。",
+         "查询水厂的某个水质数据",
+         "水厂今天进水COD、悬浮物、氨氮、总氮、总磷、是多少?",
+         "水厂今天进水浊度、电导率、电导、温度是多少?",
+         "水厂今天产水COD、悬浮物、氨氮、总氮、总磷是多少?",
+         "水厂今天产水浊度、电导率、电导、温度是多少?",
+         "查询水厂今天的外供水电导率、外供水PH、进水氨氮、进水COD、进水总氮、进水水温、进水总磷等产水水质"],
+    "2":["查询当前、今天、昨天或过去某个时间段的水厂电耗、能耗等信息,如累计用电量等",
+         "查询水厂电量或能耗",
+         "水厂那台设备用电量最高?什么设备用电量最高?",],
+    "4":["查询当前、今天、昨天或过去某个时间段的水厂用水量和进水产水流量相关的数据,如总进水量(进水总量)、总出水量(出水总量)、总进水流量或总出水流量。",
+         "查询水厂进出水量和流量",
+         "今天水厂进水量是多少?",
+         "水厂今天的产水量是多少",
+         "水厂今天的浓水产水量是多少?"],
+    "5":["查询当前、今天、昨天或过去某个时间段的水厂药耗情况,如氢氧化钠、盐酸、次氯酸钠、阻垢剂、凝絮剂、杀菌剂等药剂的用药量",
+         "查询水厂药剂药耗情况"],
+    "6":["看看水厂综合工况",
+         "水厂整体工况怎么样?",
+         "水厂综合工况怎么样?",
+         "看看水厂超滤、反渗透和水泵的工况"],
+    "7":["查询水厂漏水检测记录",
+         "当前有哪些管道或区域处于漏水报警状态?",
+         "看看管道有没有漏水的地方。",
+         "水厂漏水了吗?有没有漏水的情况呢?"],
+    "8":["查水厂今日水质数据、进水、产水水质情况等信息"],  # 已经与1合并
+    "9":["查水厂过去历史时期,如昨天、上周等水质数据、进水、产水水质情况等信息"],  # 已经与8合并
+    "10":["打开水厂视频监控",
+          "打开监控",
+          "我想回放昨天下午三点在污泥处理区的监控录像,该怎么操作?",
+          "能不能同时显示多个摄像头的画面?或者设置画面自动轮巡查看?",
+          "我想看一下集成在三维厂区模型上的视频。"],
+    "11":["查看水厂的数字孪生BIM模型"], # 关联数字孪生BIM模型
+    "13":["打开水厂的scada软件页面。",
+          "打开水厂的scada组态页面。"],
+    "15":["打开AI运营管理模块,看看今天水厂运营的怎么样,运营情况如何。",
+          "今天水厂的运营情况怎么样?"],
+    "17":["你今天为水厂做了哪些工作呢?今天都干什么了?",
+          "今天水厂发了多少自控指令呢?",
+          "今天水厂派了多少工单呢?"],
+    "18":["打开水厂智慧在线巡检页面,进行系统自检。",
+          "看看水厂的巡检自检情况。",
+          "水厂现在有没有异常?"],
+    "19":["打开水厂门禁记录",
+          "打开水厂门禁管理模块"],
+    "20":["打开报警页面。",
+          "打开安防报警模块"],
+    "21":["打开水厂的水质管理页面模块,查询近期产水水质的详情,了解过去一段时间的水质统计数据。",
+          "最近一段时间水厂的水质情况怎么样?是否达标?"
+          "统计分析最近水厂的水质情况"], # 关联水质管理模块
+    "22":["打开水厂的水量管理页面模块,查询近期水量的详情,了解过去一段时间的水量统计数据。",
+          "最近一段时间水厂水量怎么样?是否达标?",
+          "统计分析最近一段时间的水厂水量"],  # 关联水量管理页面
+    "23":["打开水厂的能耗电耗管理页面模块,查询近期能耗情况,了解产水电耗和用电量的统计数据。",
+          "最近一段时间水厂的电耗用电量是多少?",
+          "统计分析最近一段时间的电耗用电量"],  # 关联能耗管理页面
+    "24":["打开水厂的药耗管理页面模块,查询近期药耗情况,了解过去一段时间药耗量情况。",
+          "最近一段时间水厂的药耗情况怎么样?",
+          "统计分析最近水厂的药耗情况"],  # 关联药耗管理页面
+    "25":["查询水厂水泵、泵组的工况和实时运行情况",  # 关联水泵工况
+          "水厂水泵运行情况、运行的怎么样?",
+          "看看水厂水泵的效率怎么样。",
+          "看看水厂水泵泵组的效果怎么样"],
+    "26":["打开水厂超滤工况",
+          "水厂超滤运行得怎么样?",
+          "水厂超滤情况如何?效果怎么样?"],  # 关联超滤工况
+    "27":["打开水厂反渗透工况",  # 关联反渗透工况
+          "水厂反渗透运行的怎么样?",
+          "水厂反渗透情况如何?效果怎么样?"],
+    "28":["打开AI工况管理页面,看看工况得了多少分?",
+          "给水厂工况评个分吧。",
+          "给水厂运行工况打分"],  # 关联AI工况,与6综合工况查询容易混淆
+    "29":["打开水厂的成本管理模块,查询水厂的吨水电和吨水药等经营成本。",  # 关联成本管理模块
+          "目前水厂的吨水电成本是多少?",
+          "目前水厂的吨水药成本是多少?",
+          "水厂运营成本如何?",
+          "分析水厂成本情况"],
+    "30":["打开水厂工单管理页面",],
+    "31":["水厂安装了哪些设备和仪器?请列出设备清单。",
+          "水厂安装了哪些仪器、电器自控设备、阀?",
+          "水厂安装了哪些膜设备、加药装置、泵?"],
+    "32":["查看水厂设备维修记录、维修情况。",
+          "水厂哪些设备维修过?"],
+    "33":["查看水厂设备保养计划。",
+          "水厂现在有哪些设备需要保养?"],
+    "34":["查看水厂设备的保养情况和保养记录。",
+          "水厂目前保养过哪些设备?"],
+    "35":["打开照明设备控制模块",
+          "水厂照明设备的当前状态"],
+    "36":["打开空调设备控制模块",
+          "水厂空调当前状态"],
+    "37":["打开文档管理模块",
+          "下载水厂年度计划模板",
+          "打开危险化学物品文档",
+          "打开风险管理文档",
+          "打开安全讲座文档",
+          "打开应急演练文档",
+          "打开应急预案文档",
+          "打开安全操作手册",
+          "打开水厂安全文档"],
+    "38":["介绍水厂详情,了解水厂规模、地点、建设背景、执行标准、项目人员、建设单位。",
+          "介绍水厂,水厂简介"],
+    "39":["水厂用了哪些工艺?介绍工艺信息",
+          "介绍水厂采用了什么工艺"],
+    "40":["打开水厂运营报表页面",
+          "打开水厂运营记录"],
+    "41":["打开水厂绩效管理页面",
+          "水厂个人绩效评分记录"],
+    "42":["打开水厂库存报告",
+          "打开水厂备品总览",],
+    "43":["打开水厂库存管理",
+          "看看水厂库存情况"],
+    "44":["打开水厂入库记录",
+          "打开水厂入库管理"],
+    "45":["打开水厂出库记录",
+          "打开水厂出库管理"],
+    "46":["打开水厂报废记录",
+          "打开水厂报废管理"],
+    "47":["打开水厂盘点记录",
+          "打开水厂盘点管理"],
+    "48":["打开水厂工单配置"],
+    "49":["打开AI综合分析面板,查看水厂各方面分析数据。",
+          "打开AI驾驶舱",
+          "打开AI数据驾驶舱",
+          "打开水厂驾驶舱"],
+    "50":["打开区域驾驶舱"],
+    "51":["打开水厂仪表管理页面"],
+    "53":["打开水厂工艺模型"],
+    "54":["打开水厂视觉模型",
+          "打开水厂视觉识别模型"],
+    "55":["打开水厂听觉模型",
+          "打开水厂异响识别模型"],
+    "56":["打开红外模型", "打开红外感知模型"],
+    "57":["打开膜系统稳定性模型"],
+    "58":["查询超滤UF曲线或者反渗透RO曲线"],  # 需要细分小类
+    "59":["打开水厂日报"],
+    "60":["打开水厂功能菜单"],
+    "61":["打开水厂生产绩效"],
+    "62":["打开水厂生产效率"],
+    "63":["打开水厂设备与环境异常"],
+    "64":["打开水厂资产管理"],
+    "66":["打开水厂报告管理",
+          "帮我输出水厂的运营报告"],
+    "67":["打开水厂温湿度烟感",
+          "水厂的温度湿度怎么样?",
+          "看看水厂的温度和湿度情况"],
+    "68":["打开水厂工单记录"],
+    "69":["打开水厂工单状态",
+          "打开水厂工单统计"],
+    "70":["打开水厂工单绩效"],
+    "71":["打开水厂生产报告"],
+    "72":["打开水厂财务报告"],
+    "73":["打开水厂人力绩效"],
+    "74":["打开水厂数字资产"],
+    "75":["打开水厂实物资产"],
+    "76":["打开水厂工艺监控",
+          "看看水厂的工艺监控数据"],
+    "78":["查询流量、压差曲线、压力曲线、进水或产水电导曲线、渗透率曲线、脱盐率曲线",],  # *?* 是否能够和58合并
+
+
+    # 固定问答
+    "0": ["泵的日常保养需要做哪些步骤?",
+          "能给一份详细的泵保养规程吗?",
+          "泵运行噪音很大,该怎么检查和保养?",
+          "添加絮凝剂、还原剂或阻垢剂的具体操作步骤是什么?包括药剂配制、投加方法和注意事项。",
+          "添加絮凝剂的具体步骤是什么?",
+          "如何添加絮凝剂?",
+          "添加阻垢剂的具体步骤是什么?",
+          "如何添加阻垢剂?"
+          "添加盐酸的步骤是什么?",
+          "添加氢氧化钠的步骤是什么?",
+          "添加次氯酸钠的步骤是什么?"],
+    # 意见反馈
+    "100": ["这个设计我觉得不太合理,某个功能有问题,应该从某个方面加以改进。",
+            "反馈一个bug,某个功能使用的时候出现了一些问题。",
+            "你们的产品做得很好,希望后续能增加多语言支持",
+            "我想给你提个意见"],
+
+    # 金科环境企业相关问题
+    "101": ["查询金科环境企业相关的问题,如发展历程、核心技术、新水岛产品介绍、超滤反渗透产品参数、业务范围介绍,具备哪些资质、企业文化、公司制度、公司新闻大事件等",
+           "介绍一下金科环境。",
+           "了解金科环境企业相关的问题",
+           "金科环境的产品有哪些",
+           "金科环境的公司制度是什么?",
+           "金科环境的公司情况、介绍、业务、资质是什么?",
+            "水萝卜是什么?新水岛是什么?两者有什么关系?"],
+    "102": ["查看新水岛项目总览。",
+            "打开新水岛项目管理页面",
+            "查看新水岛项目概况",
+            "打开新水岛项目统一管理模块",],
+
+
+    # 人脸识别
+    "201": ["对人脸识别功能进行控制和操作,如打开或关闭人脸识别、查看有没有人在场", ],
+    # 有奖问答
+    "202": ["打开有奖问答",
+            "关闭有奖问答",
+            "关掉有奖问答",
+            "开启有奖问答"],
+
+    # 污水处理工艺问题
+    "301":["询问污水处理工艺问题,技术原理细节,工艺细节。",
+          "介绍双膜工艺技术原理。",
+          "膜系统稳定性如何提升?",
+          "视觉模型能识别哪些工艺问题?",
+          "针对目前饮用水安全,给水厂采用什么处理工艺应对?",
+          "难降解有机废水采用哪种工艺处理?是否有专利技术?",
+          "生化接触氧化工艺处理高硬度废水如何防止污堵和结垢?",
+          "在饮用水深度处理中,如何保障膜系统运行稳定,抗污染能力强?"],
+
+    # 面板控制
+    "401": ["关闭或打开左侧面板",
+            "关闭或打开左边栏",
+            "隐藏左面板或左边栏"],
+
+    # 自我介绍
+    "501":["你是谁?请介绍一下你自己。",
+          "询问水萝卜是什么呢?",
+          "你能做什么,有哪些功能?"],
+
+
+}
+
+# 映射意图自编号为大类、小类,同时记录是否需要细分小类和是否关联某个编号进行一致处理
+# 格式说明:
+# parent表示大类,child表示子类,leaf为True表示不需要细分小类,leaf为False表示需要细分小类,related_id为None表示处理方法独立,否则和相应的编号走相同分支
+intent_code = {
+    # 水厂问题
+    "1":{"parent":1,"child":1,"leaf":True,"related_id":None, "alias":"PLC查询_by_bge"},
+    "2":{"parent":1,"child":2,"leaf":True,"related_id":None, "alias":"电耗查询_by_bge"},
+    "3":{"parent":1,"child":3,"leaf":True,"related_id":None, "alias":"不存在_by_bge"},  # 不存在
+    "4":{"parent":1,"child":4,"leaf":True,"related_id":None, "alias":"水量查询_by_bge"},
+    "5":{"parent":1,"child":5,"leaf":True,"related_id":None, "alias":"药耗查询_by_bge"},
+    "6":{"parent":1,"child":6,"leaf":True,"related_id":None, "alias":"工况查询_by_bge"},
+    "7":{"parent":1,"child":7,"leaf":True,"related_id":None, "alias":"漏水查询_by_bge"},
+    "8":{"parent":1,"child":8,"leaf":True,"related_id":None, "alias":"今日水质查询_by_bge"},   # 弃用
+    "9":{"parent":1,"child":9,"leaf":True,"related_id":None, "alias":"历史水质查询_by_bge"},  # 弃用
+    "10":{"parent":1,"child":10,"leaf":True,"related_id":None, "alias":"视频监控查询_by_bge"},
+    "11":{"parent":1,"child":11,"leaf":True,"related_id":None, "alias":"数字孪生查询_by_bge"},
+    "13":{"parent":1,"child":13,"leaf":True,"related_id":None, "alias":"组态查询_by_bge"},
+    "15":{"parent":1,"child":15,"leaf":True,"related_id":None, "alias":"运营查询_by_bge"},
+    "17":{"parent":1,"child":17,"leaf":True,"related_id":None, "alias":"工作查询_by_bge"},
+    "18":{"parent":1,"child":18,"leaf":True,"related_id":None, "alias":"在线检测_by_bge"},
+    "19":{"parent":1,"child":19,"leaf":True,"related_id":None, "alias":"门禁查询_by_bge"},
+    "20":{"parent":1,"child":20,"leaf":True,"related_id":None, "alias":"安防报警查询_by_bge"},
+    "21":{"parent":1,"child":21,"leaf":True,"related_id":None, "alias":"水质模块查询_by_bge"},
+    "22":{"parent":1,"child":22,"leaf":True,"related_id":None, "alias":"水量模块查询_by_bge"},
+    "23":{"parent":1,"child":23,"leaf":True,"related_id":None, "alias":"电耗模块查询_by_bge"},
+    "24":{"parent":1,"child":24,"leaf":True,"related_id":None, "alias":"药耗模块查询_by_bge"},
+    "25":{"parent":1,"child":25,"leaf":True,"related_id":None, "alias":"不存在_by_bge"},
+    "26":{"parent":1,"child":26,"leaf":True,"related_id":None, "alias":"超滤工况查询_by_bge"},
+    "27":{"parent":1,"child":27,"leaf":True,"related_id":None, "alias":"反渗透工况查询_by_bge"},
+    "28":{"parent":1,"child":28,"leaf":True,"related_id":None, "alias":"工况管理查询_by_bge"},
+    "29":{"parent":1,"child":29,"leaf":True,"related_id":None, "alias":"成本模块查询_by_bge"},
+    "30":{"parent":1,"child":30,"leaf":True,"related_id":None, "alias":"工单查询_by_bge"},
+    "31":{"parent":1,"child":31,"leaf":True,"related_id":None, "alias":"设备信息查询_by_bge"},
+    "32":{"parent":1,"child":32,"leaf":True,"related_id":None, "alias":"维修记录查询_by_bge"},
+    "33":{"parent":1,"child":33,"leaf":True,"related_id":None, "alias":"保养计划查询_by_bge"},
+    "34":{"parent":1,"child":34,"leaf":True,"related_id":None, "alias":"保养记录查询_by_bge"},
+    "35":{"parent":1,"child":35,"leaf":True,"related_id":None, "alias":"照明查询_by_bge"},
+    "36":{"parent":1,"child":36,"leaf":True,"related_id":None, "alias":"空调查询_by_bge"},
+    "37":{"parent":1,"child":37,"leaf":True,"related_id":None, "alias":"文档查询_by_bge"},
+    "38":{"parent":1,"child":38,"leaf":True,"related_id":None, "alias":"水厂模块查询_by_bge"},
+    "39":{"parent":1,"child":39,"leaf":True,"related_id":None, "alias":"水厂工艺查询_by_bge"},
+    "40":{"parent":1,"child":40,"leaf":True,"related_id":None, "alias":"报表查询_by_bge"},
+    "41":{"parent":1,"child":41,"leaf":True,"related_id":None, "alias":"绩效查询_by_bge"},
+    "42":{"parent":1,"child":42,"leaf":True,"related_id":None, "alias":"库存模块查询_by_bge"},
+    "43":{"parent":1,"child":43,"leaf":True,"related_id":None, "alias":"库存列表查询_by_bge"},
+    "44":{"parent":1,"child":44,"leaf":True,"related_id":None, "alias":"入库查询_by_bge"},
+    "45":{"parent":1,"child":45,"leaf":True,"related_id":None, "alias":"出库查询_by_bge"},
+    "46":{"parent":1,"child":46,"leaf":True,"related_id":None, "alias":"报废查询_by_bge"},
+    "47":{"parent":1,"child":47,"leaf":True,"related_id":None, "alias":"盘点查询_by_bge"},
+    "48":{"parent":1,"child":48,"leaf":True,"related_id":None, "alias":"工单配置查询_by_bge"},
+    "49":{"parent":1,"child":49,"leaf":True,"related_id":None, "alias":"AI分析查询_by_bge"},
+    "50":{"parent":1,"child":50,"leaf":True,"related_id":None, "alias":"区域驾驶舱查询_by_bge"},
+    "51":{"parent":1,"child":51,"leaf":True,"related_id":None, "alias":"仪表查询_by_bge"},
+    "52":{"parent":1,"child":52,"leaf":True,"related_id":None, "alias":"AI管理查询_by_bge"},
+    "53":{"parent":1,"child":53,"leaf":True,"related_id":None, "alias":"工艺模型查询_by_bge"},
+    "54":{"parent":1,"child":54,"leaf":True,"related_id":None, "alias":"视觉模型查询_by_bge"},
+    "55":{"parent":1,"child":55,"leaf":True,"related_id":None, "alias":"听觉模型查询_by_bge"},
+    "56":{"parent":1,"child":56,"leaf":True,"related_id":None, "alias":"红外模型查询_by_bge"},
+    "57":{"parent":1,"child":57,"leaf":True,"related_id":None, "alias":"膜系统查询_by_bge"},
+    "58":{"parent":1,"child":58,"leaf":True,"related_id":None, "alias":"数据曲线查询_by_bge"},
+    "59":{"parent":1,"child":59,"leaf":True,"related_id":None, "alias":"日报查询_by_bge"},
+    "60":{"parent":1,"child":60,"leaf":True,"related_id":None, "alias":"功能菜单查询_by_bge"},
+    "61":{"parent":1,"child":61,"leaf":True,"related_id":None, "alias":"生产绩效查询_by_bge"},
+    "62":{"parent":1,"child":62,"leaf":True,"related_id":None, "alias":"生产效率查询_by_bge"},
+    "63":{"parent":1,"child":63,"leaf":True,"related_id":None, "alias":"环境查询_by_bge"},
+    "64":{"parent":1,"child":64,"leaf":True,"related_id":None, "alias":"资产管理查询_by_bge"},
+    "66":{"parent":1,"child":66,"leaf":True,"related_id":None, "alias":"报告管理查询_by_bge"},
+    "67":{"parent":1,"child":67,"leaf":True,"related_id":None, "alias":"温湿传感器查询_by_bge"},
+    "68":{"parent":1,"child":68,"leaf":True,"related_id":None, "alias":"工单记录查询_by_bge"},
+    "69":{"parent":1,"child":69,"leaf":True,"related_id":None, "alias":"工单统计查询_by_bge"},
+    "70":{"parent":1,"child":70,"leaf":True,"related_id":None, "alias":"工单绩效查询_by_bge"},
+    "71":{"parent":1,"child":71,"leaf":True,"related_id":None, "alias":"生产报告查询_by_bge"},
+    "72":{"parent":1,"child":72,"leaf":True,"related_id":None, "alias":"财务报告查询_by_bge"},
+    "73":{"parent":1,"child":73,"leaf":True,"related_id":None, "alias":"人力绩效查询_by_bge"},
+    "74":{"parent":1,"child":74,"leaf":True,"related_id":None, "alias":"数字资产查询_by_bge"},
+    "75":{"parent":1,"child":75,"leaf":True,"related_id":None, "alias":"实物资产查询_by_bge"},
+    "76":{"parent":1,"child":76,"leaf":True,"related_id":None, "alias":"工艺监控查询_by_bge"},
+    "77":{"parent":1,"child":77,"leaf":True,"related_id":None, "alias":"不存在_by_bge"},
+    "78":{"parent":1,"child":78,"leaf":True,"related_id":None, "alias":"曲线查询_by_bge"},
+
+    # 固定问答
+    "0":{"parent":0,"child":9999,"leaf":True,"related_id":None, "alias":"操作规范固定问答_by_bge"},
+    # 意见建议
+    "100":{"parent":0,"child":9999,"leaf":True,"related_id":None, "alias":"意见反馈_by_bge"},
+
+    # 金科环境企业类问题
+    "101":{"parent":2,"child":99,"leaf":True, "related_id":None, "alias":"企业运营_by_bge"},
+    "102":{"parent":2,"child":77,"leaf":True, "related_id":None, "alias":"新水岛项目查询_by_bge"},
+
+    # 人脸识别
+    "201":{"parent":5,"child":9999,"leaf":True,"related_id":None, "alias":"人脸识别_by_bge"},
+    # 有奖问答
+    "202":{"parent":5,"child":99,"leaf":True,"related_id":None, "alias":"快速有奖问答_by_bge"},
+
+    # 污水处理工艺问题
+    "301":{"parent":4,"child":9999,"leaf":True,"related_id":None, "alias":"水处理工艺_by_bge"},
+
+    # 面板操作
+    "401":{"parent":3,"child":9999,"leaf":True,"related_id":None, "alias":"面板操作_by_bge"},
+
+    # 水萝卜介绍
+    "501":{"parent":10,"child":9999,"leaf":True,"related_id":None, "alias":"自我介绍_by_bge"},
+
+    # 其他问题,不应该通过这种方式匹配,而是通过关联程度的阈值来判断
+    "9999":{"parent":6,"child":9999,"leaf":True,"related_id":None, "alias":"其他问题_by_bge"}
+
+}

BIN
intent_index.faiss


+ 57 - 0
main.py

@@ -0,0 +1,57 @@
+from patch_intent_cls import recognizer_bge, quick_answer_q_a_v2
+
+if __name__ == '__main__':
+
+    # mode = 'query' #
+    # while True:
+    #     print('1.查询模式;2.测试模式;')
+    #     query = input("请输入模式:\n")
+    #     print('切换到模式:', query)
+    #     if query == 'q' or query == 'Q':
+    #         print("退出!")
+    #         break
+    #     if query == '1':  # 查询模式
+    #         mode = 'query'
+    #         while True:
+    #             query = input("\n查询模式,请输入查询语句:\n")
+    #             if query == "q":
+    #                 print("退出!")
+    #                 break
+    #             recognizer_bge.pick_out(query, 10)
+    #     if query == '2':
+    #         mode = 'test'
+    #         while True:
+    #             query = input("\n测试模式,请输入测试语句:\n")
+    #             if query == "q":
+    #                 print("退出!")
+    #                 break
+    #             recognizer.pick_out(query, len(recognizer.template_meta_list))
+    question = input("请输入查询语句:")
+    results = {} # 存放意图识别结果
+    user_intent_conf, user_intent = recognizer_bge.pick_out(question, top_k=4)
+    target_intent = user_intent[0]  # 目标意图
+    target_intent_conf = user_intent_conf[0] # 目标意图置信度
+    candidate_intent = user_intent[1:]  # 候选意图
+    candidate_intent_conf = user_intent_conf[1:]  # 候选意图置信度
+
+    # 单独处理有奖问答分支
+    if target_intent.get('parent') == 5 and target_intent.get('child') == 99:
+        results['intent'] = "5"  # 大类
+        results['metric_type'] = "99"  # 小类
+        results['confidence'] = str(target_intent_conf)  # 置信度
+        results['status'] = "无"
+        results['metric'] =  quick_answer_q_a_v2(question)  # 主要更新metric
+    else:
+        results['intent'] = str(target_intent.get('parent'))  # 大类
+        results['metric_type'] = str(target_intent.get('child'))  # 小类
+        results['confidence'] = str(target_intent_conf)  # 置信度
+        results['metric'] = target_intent.get('alias')
+        results['status'] = "无"
+
+    # 判断意图可信程度,低于阈值就直接归到其他类
+    if target_intent_conf < 0.1:
+        results['intent'] = "6"  # 其他问题大类为6
+        results['metric_type'] = "9999"  # 其他问题小类为9999
+        results['confidence'] = str(1 - target_intent_conf)  # 置信度
+        results['metric'] = "其他问题"
+        results['status'] = "无"

+ 127 - 0
patch_intent_cls.py

@@ -0,0 +1,127 @@
+import os
+script_dir = os.path.dirname(os.path.abspath(__file__))
+import sys
+sys.path.append(script_dir)
+from intent_description_template import template, intent_code
+from remote_model import RemoteBGEModel
+import torch
+import faiss
+import numpy as np
+import re
+
+
+class IntentRecognizer:
+
+    def __init__(self):
+        # 加载元数据
+        self.template_meta = {}
+        for k, v in template.items():
+            for desc in v:
+                self.template_meta[desc] = k
+        # 模板元数据
+        self.template_meta_list = list(self.template_meta.keys())
+        # 加载远程模型
+        self.model = RemoteBGEModel('dev')
+        # 模型预热
+        print("模型预热中...")
+        self.model.encode(["这是一段预热文字,首次推理通过预测保证后续推理的稳定性和性能。"])
+        self.model.compute_score([("这是一段预热文字,首次推理通过预测保证后续推理的稳定性和性能。",
+                                 "这是一段预热文字,首次推理通过预测保证后续推理的稳定性和性能。")])
+        self.print_gpu()
+        # 加载向量数据库
+        self.database_index = None
+        database_path = os.path.join(script_dir, "intent_index.faiss")
+        if not os.path.exists(database_path):
+            # embeddings = self.model.encode(self.template_meta_list)['dense_vecs'].astype(np.float32)  # 选取密集向量,变为float32
+            #faiss.normalize_L2(embeddings)  # L2归一化
+            # 调用远程embedding模型,one by one 地处理
+            embeddings = [self.model.encode([temp], normalize=True)[0] for temp in self.template_meta_list]
+            for _ in  embeddings:
+                if _ is None:
+                    raise RuntimeError('构建意图描述模板库时发生异常,embeddings不能存在None')
+            # 要求embeddings是一个二维矩阵,类型为float32
+            embeddings = np.array(embeddings, dtype=np.float32)
+            # Create FAISS index
+            dimension = embeddings[0].shape[0]
+            self.database_index = faiss.IndexFlatIP(dimension)  # 建立内积索引
+            self.database_index.add(embeddings)  # 添加索引
+            # Save for future use
+            faiss.write_index(self.database_index, database_path)
+
+        if self.database_index is None:
+            self.database_index = faiss.read_index(database_path)
+
+    @staticmethod
+    def print_gpu():
+        if torch.cuda.is_available():
+            print(f"allocated:{torch.cuda.memory_allocated()/1024**3:.2f}GB", end=' ')
+            print(f"reserved: {torch.cuda.memory_reserved()/1024**3:.2f}GB")
+
+    # 应用推理阶段
+    def pick_out(self, query, top_k):
+        # 使用本地模型
+        # query_embedding = self.model.encode([query])['dense_vecs'].astype(np.float32)
+        # 下面使用远程模型代替本地模型
+        # 要求query_embedding是一个二维矩阵,形状为(1, 1024)
+        query_embedding = np.array(self.model.encode([query], normalize=True), dtype=np.float32)
+        faiss.normalize_L2(query_embedding)
+        distances, indices = self.database_index.search(query_embedding, top_k)
+        group_query = [(query, self.template_meta_list[indices[0][i]]) for i in range(top_k)]
+        # 调用远程reranker模型
+        score = self.model.compute_score(group_query)
+        rerank_result = sorted([(distances[0][_], indices[0][_], score[_]) for _ in range(top_k)], key=lambda x: x[2],
+                               reverse=True)  # distance, indices, rerank_score
+        score_idx = 2  # 重排序相关度
+        meta_idx = 1  # 模板位置
+        similarity_idx = 0  # 向量相似度
+        print("***检索结果***:")
+        for i in range(top_k - 1, -1, -1):
+            print(
+                f"***{i} 相关度:{rerank_result[i][score_idx]:.2f} "
+                f"相似度:{rerank_result[i][similarity_idx]:.2f} "
+                f"意图:{self.template_meta.get(self.template_meta_list[rerank_result[i][meta_idx]])} "
+                f"{intent_code.get(self.template_meta.get(self.template_meta_list[rerank_result[i][meta_idx]])).get('alias')} "
+                f"关联:{self.template_meta_list[rerank_result[i][meta_idx]]}"
+            )
+
+        # 从排序结果中拆解到意图的大小类编号
+        result = []  # 意图识别结果
+        confidence = []# 置信度
+        for i in range(top_k):
+            # 拿到描述词
+            description = self.template_meta_list[rerank_result[i][meta_idx]]
+            # 拿到自编码
+            custom_number = self.template_meta[description]
+            # 拿到大小类标号
+            result.append(intent_code[custom_number])
+            # 添加置信度
+            confidence.append(rerank_result[i][score_idx])
+
+        return confidence, result
+
+
+def quick_answer_q_a_v2(question, is_english=0):
+    """快速问答分支,赋值metric"""
+    metric = ''
+    question += ' ' # 深拷贝
+    if is_english == 0:
+        pattern_res_open_qa = re.findall("(开启|打开).*?问答", question)
+        pattern_res_close_qa = re.findall("(关闭|关掉).*?问答", question)
+    else:
+        question = question.strip().lower()
+        pattern_res_open_qa = re.findall(
+            r'\b(?:open|show me|enable)\b\W*.*?(?:quiz\W+with\W+prizes|enable\W+award\W*-\W*winning\W+q\W*&\W*a)',
+            question)
+        pattern_res_close_qa = re.findall(
+            r'\b(?:close|disable)\b\W*.*?(?:quiz\W+with\W+prizes|enable\W+award\W*-\W*winning\W+q\W*&\W*a)', question)
+
+    if len(pattern_res_open_qa) > 0:
+        metric = "openQandA"
+
+    if len(pattern_res_close_qa) > 0:
+        metric = "closeQandA"
+
+    return metric
+
+# 单例模式
+recognizer_bge = IntentRecognizer()

+ 118 - 0
patch_intent_cls_local.py

@@ -0,0 +1,118 @@
+# 从本地加载模型进行推理,bge和reranker均使用本地模型
+from FlagEmbedding import FlagAutoModel, FlagReranker
+from intent_description_template import template, intent_code
+import torch
+import os
+import faiss
+import numpy as np
+import re
+
+script_dir = os.path.dirname(os.path.abspath(__file__))
+
+
+class IntentRecognizer:
+
+    def __init__(self):
+        # 加载元数据
+        self.template_meta = {}
+        for k, v in template.items():
+            for desc in v:
+                self.template_meta[desc] = k
+        # 模板元数据
+        self.template_meta_list = list(self.template_meta.keys())
+        # 加载模型
+        self.model = FlagAutoModel.from_finetuned(os.path.join(script_dir, "bge-m3"),
+                                             query_instruction_for_retrieval="为这个句子生成表示以用于检索相关文章:",
+                                             local_files_only=True,
+                                             use_fp16=True,
+                                             pooling_method="cls",
+                                             devices=["cuda:0"])
+        self.reranker = FlagReranker(os.path.join(script_dir, 'bge-reranker-v2-m3'), use_fp16=True, local_files_only=True, devices=["cuda:0"])
+        # 模型预热
+        print("模型预热中...")
+        self.model.encode(["这是一段预热文字,首次推理通过预测保证后续推理的稳定性和性能。"])
+        self.reranker.compute_score([("这是一段预热文字,首次推理通过预测保证后续推理的稳定性和性能。",
+                                 "这是一段预热文字,首次推理通过预测保证后续推理的稳定性和性能。")])
+        self.print_gpu()
+        # 加载向量数据库
+        self.database_index = None
+        database_path = os.path.join(script_dir, "intent_index.faiss")
+        if not os.path.exists(database_path):
+            # 要求embeddings是一个二维矩阵,类型为float32
+            embeddings = self.model.encode(self.template_meta_list)['dense_vecs'].astype(np.float32)  # 选取密集向量,变为float32
+            faiss.normalize_L2(embeddings)  # L2归一化
+            # Create FAISS index
+            dimension = embeddings[0].shape[0]
+            self.database_index = faiss.IndexFlatIP(dimension)  # 建立内积索引
+            self.database_index.add(embeddings)  # 添加索引
+            # Save for future use
+            faiss.write_index(self.database_index, database_path)
+
+        if self.database_index is None:
+            self.database_index = faiss.read_index(database_path)
+
+    @staticmethod
+    def print_gpu():
+        if torch.cuda.is_available():
+            print(f"allocated:{torch.cuda.memory_allocated()/1024**3:.2f}GB", end=' ')
+            print(f"reserved: {torch.cuda.memory_reserved()/1024**3:.2f}GB")
+
+    # 应用推理阶段
+    def pick_out(self, query, top_k):
+        # 要求query_embedding是一个二维矩阵,形状为(1, 1024)
+        query_embedding = self.model.encode([query])['dense_vecs'].astype(np.float32)
+        faiss.normalize_L2(query_embedding)
+        distances, indices = self.database_index.search(query_embedding, top_k)
+        group_query = [(query, self.template_meta_list[indices[0][i]]) for i in range(top_k)]
+        score = self.reranker.compute_score(group_query, normalize=True)
+        rerank_result = sorted([(distances[0][_], indices[0][_], score[_]) for _ in range(top_k)], key=lambda x: x[2],
+                               reverse=True)  # distance, indices, rerank_score
+        score_idx = 2  # 重排序相关度
+        meta_idx = 1  # 模板位置
+        similarity_idx = 0  # 向量相似度
+        print("***检索结果***:")
+        for i in range(top_k - 1, -1, -1):
+            print(
+                f"***{i} 相关度:{rerank_result[i][score_idx]:.2f} 相似度:{rerank_result[i][similarity_idx]:.2f} 意图:{self.template_meta.get(self.template_meta_list[rerank_result[i][meta_idx]])} 关联:{self.template_meta_list[rerank_result[i][meta_idx]]}")
+
+        # 从排序结果中拆解到意图的大小类编号
+        result = []  # 意图识别结果
+        confidence = []# 置信度
+        for i in range(top_k):
+            # 拿到描述词
+            description = self.template_meta_list[rerank_result[i][meta_idx]]
+            # 拿到自编码
+            custom_number = self.template_meta[description]
+            # 拿到大小类标号
+            result.append(intent_code[custom_number])
+            # 添加置信度
+            confidence.append(rerank_result[i][score_idx])
+
+        return confidence, result
+
+
+def quick_answer_q_a_v2(question, is_english=0):
+    """快速问答分支,赋值metric"""
+    metric = ''
+    question += ' ' # 深拷贝
+    if is_english == 0:
+        pattern_res_open_qa = re.findall("(开启|打开).*?问答", question)
+        pattern_res_close_qa = re.findall("(关闭|关掉).*?问答", question)
+    else:
+        question = question.strip().lower()
+        pattern_res_open_qa = re.findall(
+            r'\b(?:open|show me|enable)\b\W*.*?(?:quiz\W+with\W+prizes|enable\W+award\W*-\W*winning\W+q\W*&\W*a)',
+            question)
+        pattern_res_close_qa = re.findall(
+            r'\b(?:close|disable)\b\W*.*?(?:quiz\W+with\W+prizes|enable\W+award\W*-\W*winning\W+q\W*&\W*a)', question)
+
+    if len(pattern_res_open_qa) > 0:
+        metric = "openQandA"
+
+    if len(pattern_res_close_qa) > 0:
+        metric = "closeQandA"
+
+    return metric
+
+# 单例模式
+recognizer_bge = IntentRecognizer()

+ 131 - 0
remote_model.py

@@ -0,0 +1,131 @@
+import requests
+from typing import List, Tuple, Optional
+import os
+import json
+import time
+import numpy as np
+from FlagEmbedding import FlagAutoModel, FlagReranker
+script_dir = os.path.dirname(os.path.abspath(__file__))
+
+class RemoteBGEModel:
+
+    def __init__(self, branch:str='dev', timeout:int=3, max_retries:int=3):
+        # 加载网址配置文件
+        self.branch = branch.strip().lower()
+        if not self.branch in ['dev', 'test', 'master', 'main', 'local']:  # 输入参数合法
+            raise ValueError("Param 'branch' must be dev test master or main",branch)
+
+        self.url_file = os.path.join(script_dir, 'url_config.json')
+        self.embedding_url, self.reranker_url = self.load_url()
+        self.timeout = timeout
+        self.max_retries = max_retries
+        # 构建请求头
+        self.headers = {"Content-Type": "application/json"}
+
+    def load_url(self):
+        """加载url"""
+        if not os.path.exists(self.url_file):
+            raise FileNotFoundError("File not exist", self.url_file)
+        # 读取json配置文件
+        with open(self.url_file, 'r', encoding='utf-8') as f:
+            json_data = json.load(f)
+        if self.branch == 'dev' or self.branch == 'test':
+            embed_url = json_data['dev_embed_url'] + '/embed'
+            rerank_url = json_data['dev_reranker_url'] + '/rerank'
+        elif self.branch == 'main' or self.branch == 'master':
+            embed_url = json_data['master_embed_url'] + '/embed'
+            rerank_url = json_data['master_reranker_url'] + '/rerank'
+        else:
+            embed_url = json_data['local_embed_url'] + '/embed'
+            rerank_url = json_data['local_reranker_url'] + '/rerank'
+        return embed_url, rerank_url
+
+    def _access_remote_model(self, url:str, data:dict):
+        """调用bge-m3,embedding"""
+        # 类型检查
+        time.sleep(0.08)  # 方式频繁调用接口
+        for attempt in range(self.max_retries):
+            try:
+                response = requests.post(url=url, headers=self.headers, json=data)
+                if response.status_code == 200:
+                    return np.array(response.json())
+            except Exception as e:
+                print('请求embedding模型失败', e)
+                time.sleep(1)
+                return None
+        return None
+
+    def encode(self,texts: List[str], normalize: bool = True):
+        """调用bge-m3,embedding"""
+        # 类型检查
+        if not isinstance(texts, list) and not isinstance(texts, str):
+            raise TypeError("Text must be list or string",texts)
+        if isinstance(texts, List):
+            if not texts:
+                raise ValueError("Text must not be empty",texts)
+            for i, content in enumerate(texts):
+                if not isinstance(content, str):
+                    raise ValueError(f"Text must not be empty, pos:{i}, content{content}")
+        data = {"inputs":texts, "normalize":normalize}
+
+        return self._access_remote_model(
+            url=self.embedding_url,
+            data=data
+        )
+
+    def compute_score(self, pairs: List[Tuple[str, str]]):
+        """调用远程bge-reranker计算相关性"""
+        # 类型检查
+        if not isinstance(pairs, list):
+            raise TypeError("Pairs must be list",pairs)
+
+        if not pairs:
+            raise ValueError("Pairs must not be empty",pairs)
+
+        if len(pairs[0]) != 2:
+            raise ValueError("Pairs must not be empty",pairs)
+        i = 0
+        for j, k in pairs:
+            if not isinstance(j, str) or not isinstance(k, str):
+                raise TypeError(f"Elements of every pairs must not be str, pos:{i}, ({j}, {k})")
+            i+=1
+        # 判断pairs的每个query是否为一致
+        if len(pairs) >= 3:
+            for i in range(1, len(pairs), len(pairs) - 1):
+                if pairs[i - 1][0] != pairs[i][0] or pairs[i-1][0] != pairs[i+1][0]:
+                    raise ValueError("Pairs must have the same query", pairs)
+        elif len(pairs) == 2:
+            if pairs[0][0] != pairs[1][0]:
+                raise ValueError("Pairs must have the same query", pairs)
+        texts = [t for q, t in pairs]
+        data = {
+            "query": pairs[0][0],  # 对于bge-reranker,query字段可为空
+            "texts": texts
+        }
+
+        # 返回rerank结果
+        res = self._access_remote_model(
+            url=self.reranker_url,
+            data=data
+        )
+        # 按照原有位置输出score
+        score = [_["score"] for _ in sorted(res, key=lambda x: x["index"])]
+        return score
+
+
+if __name__ == "__main__":
+    timeout = 3
+    max_retries = 3
+    bge_model = RemoteBGEModel('dev', timeout, max_retries)
+    t = bge_model.encode(["hello"], normalize=True)
+    tt = bge_model.compute_score([("你好呀我的名字叫做汤姆","今天世界杯中国得了冠军"),
+                                  ("你好呀我的名字叫做汤姆","你好呀我的名字叫做山姆"),
+                                  ("你好呀我的名字叫做汤姆","你好呀我的名字叫做汤姆?"),
+                                  ("你好呀我的名字叫做汤姆","我今天非常的开心,你呢?")])
+    # reranker = FlagReranker(os.path.join(script_dir, 'bge-reranker-v2-m3'), use_fp16=True, local_files_only=True,
+    #                              devices=["cuda:0"])
+    # ttt = reranker.compute_score([("你好呀我的名字叫做汤姆","今天世界杯中国得了冠军"),
+    #                               ("你好呀我的名字叫做汤姆","你好呀我的名字叫做山姆"),
+    #                               ("你好呀我的名字叫做汤姆","你好呀我的名字叫做汤姆?"),
+    #                               ("你好呀我的名字叫做汤姆","我今天非常的开心,你呢?")], normalize=True)
+    pass

BIN
template_description.xlsx


+ 50 - 0
template_relevant_map.py

@@ -0,0 +1,50 @@
+from intent_description_template import *
+from FlagEmbedding import FlagAutoModel, FlagReranker
+from intent_description_template import template
+import torch
+import os
+import faiss
+import numpy as np
+import pandas as pd
+
+num = 1
+template_list = []
+for k, v in template.items():
+    for v2 in v:
+        template_list.append((str(num), k, v2))
+        num+=1
+def print_gpu():
+    if torch.cuda.is_available():
+        print(f"allocated:{torch.cuda.memory_allocated()/1024**3:.2f}GB", end=' ')
+        print(f"reserved: {torch.cuda.memory_reserved()/1024**3:.2f}GB")
+
+# 加载模型
+reranker = FlagReranker('./bge-reranker-v2-m3', use_fp16=True, local_files_only=True,devices=["cuda:0"])
+# 模型预热
+reranker.compute_score([("这是一段预热文字,首次推理通过预测保证后续推理的稳定性和性能。","这是一段预热文字,首次推理通过预测保证后续推理的稳定性和性能。")])
+print_gpu()
+
+# 开始计算矩阵
+mat = np.zeros([len(template_list)+1, len(template_list)+1])
+for i in range(len(template_list)+1):
+    mat[i][0] = i
+    mat[0][i] = i
+for i in range(len(template_list)):
+    query_template_pairs = []
+    for j in range(len(template_list)):
+        query_template_pairs.append((template_list[i][2], template_list[j][2]))
+    relevant_score = reranker.compute_score(query_template_pairs, normalize=True)
+    print(f"完成{i+1}行...")
+    # 填充
+    for j in range(len(template_list)):
+        mat[i+1, j+1] = relevant_score[j]
+
+
+# 将NumPy矩阵转换为DataFrame
+df = pd.DataFrame(mat)
+# 保存到Excel文件,不包含行索引和列标题
+df.to_excel('template_similarity_matrix.xlsx', index=False, header=False, float_format='%.2f')
+
+df = pd.DataFrame([("序号", "意图", "描述")] + template_list)
+df.to_excel('template_description.xlsx', index=False, header=False)
+pass

BIN
template_similarity_matrix.xlsx


+ 8 - 0
url_config.json

@@ -0,0 +1,8 @@
+{
+  "dev_embed_url": "http://101.200.76.30:8002",
+  "dev_reranker_url": "http://101.200.76.30:8003",
+  "master_embed_url": "http://101.200.76.30:8002",
+  "master_reranker_url": "http://101.200.76.30:8003",
+  "local_embed_url": "http://101.200.76.30:8002",
+  "local_reranker_url": "http://101.200.76.30:8003"
+}