há 3 meses atrás · 95f871f0e6
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,3 @@
 
				+__pycache__/
			
 
				+*.pyc
			
 
				+.idea/
			
--- a/create_pcl_match_dictionary.py
+++ b/create_pcl_match_dictionary.py
@@ -0,0 +1,464 @@
 
				+import os

			
 
				+

			
 
				+from sympy.solvers.diophantine.diophantine import equivalent

			
 
				+

			
 
				+script_dir = os.path.dirname(os.path.abspath(__file__))

			
 
				+import sys

			
 
				+sys.path.append(script_dir)

			
 
				+import pandas as pd

			
 
				+import jieba

			
 
				+import jieba.posseg as pseg

			
 
				+import re

			
 
				+import numpy as np

			
 
				+import json

			
 
				+import textdistance

			
 
				+import faiss

			
 
				+from remote_model import RemoteBGEModel

			
 
				+

			
 
				+

			
 
				+class PLCMatch:

			
 
				+    """通过关键词+语义相似度的方式，从用户输入中匹配PLC点位"""

			
 
				+    def __init__(self, project_id:int):

			
 
				+        # 水厂id

			
 
				+        self.project_id = str(project_id)

			
 
				+        # 路径

			
 
				+        self.script_dir = os.path.dirname(os.path.abspath(__file__)) # 脚本绝对路径

			
 
				+        # 水厂的词典根路径

			
 
				+        self.plc_dict_root_dir = os.path.join(self.script_dir, f'plc_dictionary/{self.project_id}_plc_dictionary')

			
 
				+        # 读取pcl点位文件，生成name-code映射字典

			
 
				+        self.name_2_code_dict = self.__read_pcl()

			
 
				+

			
 
				+        # 加载用户自定义词典,添加到jieba词库

			
 
				+        user_dictionary_dir = os.path.join(self.script_dir, 'user_maintain_dictionary', 'jieba_words')

			
 
				+        user_dict_list = [os.path.join(user_dictionary_dir, _) for _ in os.listdir(user_dictionary_dir) if _.split('.')[-1] == 'txt']  # 用户词典

			
 
				+        self.user_dict_list = user_dict_list

			
 
				+        self.__load_user_dict()

			
 
				+

			
 
				+        # 生成二级字典

			
 
				+        self.dict_level_2 = self.__make_level_two_dictionary()

			
 
				+

			
 
				+        # 生成一级字典

			
 
				+        self.dict_level_1 = self.__make_level_one_dictionary()

			
 
				+

			
 
				+        # 等价词映射表

			
 
				+        self.equivalent_wordmap_txt = os.path.join(self.script_dir,'user_maintain_dictionary','equivalent_words', 'equivalent_wordmap.txt')

			
 
				+        self.dict_equivalent_wordmap = self.__construct_equivalent_wordmap()

			
 
				+

			
 
				+        # 生成知识库,PLC点位数据库中文字段

			
 
				+        # 加载bge-m3和bge-reranker远程模型

			
 
				+        self.plc_database_name_template_list = list(self.name_2_code_dict.keys())

			
 
				+        self.model = RemoteBGEModel('dev')

			
 
				+        self.knowledge = self.__load_faiss_database()

			
 
				+

			
 
				+

			
 
				+    def __load_faiss_database(self):

			
 
				+        """从本地加载向量数据库"""

			
 
				+        # 水厂的数据库字段知识库

			
 
				+        faiss_path = os.path.join(self.plc_dict_root_dir, f'{self.project_id}_knowledge.faiss')

			
 
				+        # 尝试从本地加载

			
 
				+        if os.path.exists(faiss_path):

			
 
				+            print('PLC点位查询功能从本地加载点位字段向量知识库...')

			
 
				+            return faiss.read_index(faiss_path)

			
 
				+

			
 
				+        # 如果不存在就尝试重新创建

			
 
				+        # 首先，我们需要拿到数据库的点位名称，可以直接从name-code映射字典当中获取

			
 
				+        plc_database_name_template_list = self.plc_database_name_template_list

			
 
				+        # 调用远程embedding模型,one by one 地处理，远程模型通过配置参数进行归一化

			
 
				+        embeddings = [self.model.encode([temp], normalize=True)[0] for temp in plc_database_name_template_list]

			
 
				+        for _ in embeddings:

			
 
				+            if _ is None:

			
 
				+                raise RuntimeError('为plc数据库中文字段构建向量数据库时发生异常，embeddings不能存在None')

			
 
				+        # 要求embeddings是一个二维矩阵，类型为float32

			
 
				+        embeddings = np.array(embeddings, dtype=np.float32)

			
 
				+        # 创建 FAISS 索引

			
 
				+        dimension = embeddings[0].shape[0]

			
 
				+        local_faiss = faiss.IndexFlatIP(dimension)  # 建立内积索引

			
 
				+        local_faiss.add(embeddings)  # 添加索引

			
 
				+        # 保存未来使用

			
 
				+        faiss.write_index(local_faiss, faiss_path)

			
 
				+        return local_faiss

			
 
				+

			
 
				+

			
 
				+    def __read_pcl(self):

			
 
				+        """

			
 
				+        读取pcl文件,生成name2code词典

			
 
				+        :return:

			
 
				+        """

			
 
				+        # name-code映射词典路径

			
 
				+        dict_name2code_path = os.path.join(self.plc_dict_root_dir, f'{self.project_id}_dict_name_2_code.json')

			
 
				+        # 尝试从本地加载name-code映射字典

			
 
				+        if os.path.exists(dict_name2code_path):

			
 
				+            with open(dict_name2code_path, 'r', encoding='utf-8') as f:

			
 
				+                dict_name2code = json.load(f)

			
 
				+            return dict_name2code

			
 
				+

			
 
				+        # 如果本地没有就重新生成

			
 
				+        # 检查点位文件是否存在

			
 
				+        pcl_file_path = os.path.join(self.plc_dict_root_dir, f'{self.project_id}_点位.xlsx') # 点位文件路径

			
 
				+        if not os.path.exists(pcl_file_path):

			
 
				+            raise FileNotFoundError(f'{pcl_file_path} does not exist')

			
 
				+        # 读点位

			
 
				+        points = pd.read_excel(pcl_file_path)

			
 
				+        # 列名称，name | code

			
 
				+        column_label_alias, column_label_code = points.columns.tolist()

			
 
				+        # 中英文匹配

			
 
				+        names = points.loc[:, column_label_alias].to_numpy()

			
 
				+        codes = points.loc[:, column_label_code].to_numpy()

			
 
				+        # 对齐命名规范, 按照中荷水厂命名风格，将1#UF或1#RO统一替换为UF1，RO1，将所有反渗透文字替换为RO，所有超滤文字替换为UF

			
 
				+        names = [s.replace('超滤','UF').replace('反渗透','RO') for s in names]

			
 
				+        names = [self.field_align(s) for s in names]

			
 
				+        # 名到英文的字典

			
 
				+        dict_name2code = dict(zip(names, codes))

			
 
				+        # name-code映射字典保存到本地文件

			
 
				+        with open(dict_name2code_path, 'w', encoding='utf-8') as f:

			
 
				+            json.dump(dict_name2code, f, ensure_ascii=False)

			
 
				+        return dict_name2code

			
 
				+

			
 
				+    def __load_user_dict(self):

			
 
				+        """加载用户词典，添加到jieba词库"""

			
 
				+        # 删除

			
 
				+        jieba.del_word('反渗透')

			
 
				+        jieba.del_word('超滤')

			
 
				+        for user_dict_txt in self.user_dict_list:

			
 
				+            # 检查文件是否存在

			
 
				+            if not os.path.exists(user_dict_txt):

			
 
				+                raise FileNotFoundError(f'{user_dict_txt} does not exist')

			
 
				+            # 检查文件后缀名是否合法

			
 
				+            if os.path.splitext(user_dict_txt)[1] != '.txt':

			
 
				+                continue

			
 
				+            # 分词库加载用户字典

			
 
				+            jieba.load_userdict(user_dict_txt)

			
 
				+

			
 
				+    def __construct_equivalent_wordmap(self):

			
 
				+        """构建等价词汇映射表，等价词汇的使用方式是将备查词的所有等效说法都纳入备查序列，从而保证了搜索的高召回率"""

			
 
				+        # 检查文件是否存在

			
 
				+        equivalent_wordmap_path = os.path.join(self.script_dir, 'user_maintain_dictionary','equivalent_words', 'dict_equivalent_wordmap.json')

			
 
				+        if os.path.exists(equivalent_wordmap_path):

			
 
				+            with open(equivalent_wordmap_path, 'r', encoding='utf-8') as f:

			
 
				+                equivalent_wordmap = json.load(f)

			
 
				+            return equivalent_wordmap

			
 
				+        # 如果本地不存在等价词典json文件，那么就尝试创建

			
 
				+        if not os.path.exists(self.equivalent_wordmap_txt):

			
 
				+            raise FileNotFoundError(f'{self.equivalent_wordmap_txt} does not exist')

			
 
				+

			
 
				+        with open(self.equivalent_wordmap_txt, 'r', encoding='utf-8') as f:

			
 
				+            all_lines = [_.strip() for _ in f.readlines()]

			
 
				+        # 创建等价词汇映射表

			
 
				+        dict_equi_wordmap = {}

			
 
				+        for line in all_lines:

			
 
				+            split_list = line.split('=')

			
 
				+            for i in range(len(split_list)):

			
 
				+                dict_equi_wordmap[split_list[i]] = split_list

			
 
				+        with open(equivalent_wordmap_path, 'w', encoding='utf-8') as f:

			
 
				+            json.dump(dict_equi_wordmap,f,ensure_ascii=False)

			
 
				+        return dict_equi_wordmap

			
 
				+

			
 
				+    def __make_level_two_dictionary(self):

			
 
				+        """创建二级字典，对点位所有字段进行正则匹配中文，将中文一样的字段聚合为同一个字典键值对，键为正则提取的中文字符"""

			
 
				+        group_dict = {}

			
 
				+        # 尝试从本地加载二级字典

			
 
				+        dict_level2_dict_path = os.path.join(self.plc_dict_root_dir, f'{self.project_id}_dict_level_2.json')

			
 
				+        if os.path.exists(dict_level2_dict_path):

			
 
				+            with open(dict_level2_dict_path, 'r', encoding='utf-8') as f:

			
 
				+                group_dict = json.load(f)

			
 
				+            return group_dict

			
 
				+

			
 
				+        if self.name_2_code_dict is None:

			
 
				+            raise ValueError(f'name_2_code_dict is None', self.name_2_code_dict)

			
 
				+        data = self.name_2_code_dict.keys()

			
 
				+

			
 
				+        # 创建二级字典

			
 
				+        for item in data:

			
 
				+            k = re.sub(r'[^\u4e00-\u9fa5]', '', item)

			
 
				+            # 处理没有汉字的字段

			
 
				+            if k == '':

			
 
				+                k = "无"

			
 
				+            if k not in group_dict.keys():

			
 
				+                group_dict[k] = [item]

			
 
				+            else:

			
 
				+                group_dict[k].append(item)

			
 
				+

			
 
				+        # 保存二级字典到本地

			
 
				+        with open(dict_level2_dict_path, 'w', encoding='utf-8') as f:

			
 
				+            json.dump(group_dict, f, ensure_ascii=False)

			
 
				+        return group_dict

			
 
				+

			
 
				+    @staticmethod

			
 
				+    def cut_compair(arr_a: str, arr_b: str, condition='nz') -> str:

			
 
				+        """

			
 
				+        :param condition: 词性

			
 
				+        :param arr_a:

			
 
				+        :param arr_b:

			
 
				+        :return: 第一个相同nz词

			
 
				+        """

			
 
				+        # a: w1,f1  w2,f2  w3, f3

			
 
				+        # b: w1,f1  w2,f2  w3, f3

			
 
				+

			
 
				+        cut_arr_a = [list(_) for _ in pseg.lcut(arr_a)]

			
 
				+        cut_arr_b = [list(_) for _ in pseg.lcut(arr_b)]

			
 
				+        for i in range(len(cut_arr_a)):

			
 
				+            for j in range(i, len(cut_arr_b)):

			
 
				+                # 只比较nz词性

			
 
				+                if cut_arr_a[i][1] != condition or cut_arr_b[j][1] != condition:

			
 
				+                    continue

			
 
				+                if cut_arr_a[i][0] == cut_arr_b[j][0] and cut_arr_a[i][1] == cut_arr_b[j][1]:

			
 
				+                    return cut_arr_a[i][0]

			
 
				+        return ''

			
 
				+

			
 
				+    def __make_level_one_dictionary(self):

			
 
				+        """创建一级字典"""

			
 
				+        group_dict = {}  # 存放二次分组的结果

			
 
				+        # 尝试从本地加载一级字典

			
 
				+        dict_level_1_path = os.path.join(self.plc_dict_root_dir, f'{self.project_id}_dict_level_1.json')

			
 
				+        if os.path.exists(dict_level_1_path):

			
 
				+            with open(dict_level_1_path, 'r', encoding='utf-8') as f:

			
 
				+                group_dict = json.load(f)

			
 
				+            return group_dict

			
 
				+

			
 
				+        if self.dict_level_2.keys() is None:

			
 
				+            raise ValueError(f'dict_lev2 is None', self.dict_level_2)

			
 
				+        # 提取二级字典的所有key

			
 
				+        data = self.dict_level_2.keys()

			
 
				+

			
 
				+        # 如果不存在就重新生成一级字典

			
 
				+        # 根据用户词典进行分词，筛选出所有带nz词的字段

			
 
				+        no_nz_list = []  # 没有nz词的字段

			
 
				+        nz_list = []  # 有nz词的字段

			
 
				+        for item in data:

			
 
				+            # 判断是否存在nz名词

			
 
				+            is_exist_n = False

			
 
				+            for w, f in pseg.lcut(item):

			
 
				+                if f == 'nz':  # 查看词性

			
 
				+                    is_exist_n = True

			
 
				+                    break

			
 
				+            if is_exist_n:  # 存在词

			
 
				+                nz_list.append(item)

			
 
				+            else:  # 不存在nz词

			
 
				+                no_nz_list.append(item)

			
 
				+

			
 
				+        # 聚合具有相同nz名词的字段

			
 
				+        while len(nz_list) > 0:

			
 
				+            pos = [1 for _ in range(len(nz_list))]  # 0表示不被取，1表示需要被取，默认都要被取，用来更新nz_list给下次判断使用

			
 
				+            pos[0] = 0  # 标记第一个单词为不需要处理

			
 
				+            for i in range(len(nz_list)):

			
 
				+                # 查看是否存在相同的nz词

			
 
				+                same_nz_word = self.cut_compair(nz_list[0], nz_list[i])

			
 
				+                if same_nz_word:

			
 
				+                    # 执行聚合

			
 
				+                    if same_nz_word not in group_dict.keys():

			
 
				+                        # 首次聚合，与自身比较，创建自身类别

			
 
				+                        group_dict[same_nz_word] = [nz_list[i]]

			
 
				+                    else:

			
 
				+                        group_dict[same_nz_word].append(nz_list[i])

			
 
				+

			
 
				+                    pos[i] = 0

			
 
				+            # 处理完一趟就要变更nz_list

			
 
				+            nz_list = np.array(nz_list)[np.array(pos, dtype=np.bool)].tolist()

			
 
				+

			
 
				+        # 聚合不包含nz的名词, 单独占一个类别

			
 
				+        for item in no_nz_list:

			
 
				+            group_dict[item] = [item]

			
 
				+

			
 
				+        with open(dict_level_1_path, 'w', encoding='utf-8') as f:

			
 
				+            json.dump(group_dict, f, ensure_ascii=False)

			
 
				+

			
 
				+        return group_dict

			
 
				+

			
 
				+    @staticmethod

			
 
				+    def field_align(input_str:str)->str:

			
 
				+        """按照锡山中荷命名规范对齐字段，1#UF替换为UF1，1#RO替换为RO1，保持统一"""

			
 
				+        sources_uf = re.findall(r'\d+#UF', input_str, re.IGNORECASE)  # 匹配1#UF

			
 
				+        sources_ro = re.findall(r'\d+#RO', input_str, re.IGNORECASE)  # 匹配1#RO

			
 
				+        sources = sources_uf + sources_ro

			
 
				+        for sou in sources:

			
 
				+            number_, flag_ = sou.split('#')

			
 
				+            input_str = input_str.replace(sou, flag_.upper() + number_) # 统一转为大写

			
 
				+        return input_str

			
 
				+

			
 
				+    @ staticmethod

			
 
				+    def quicksort_up_part(arr:list, start:int, end:int)-> int:

			
 
				+        """升序排序"""

			
 
				+        # 双指针

			
 
				+        low = start

			
 
				+        high = end

			
 
				+        pivot = arr[start][1] # 基准值

			
 
				+        # 大数放在基准值右边，小数放在基准值左边

			
 
				+        while low < high:

			
 
				+            # 先从右向左找比基准值小的

			
 
				+            while low< high and arr[high][1] >= pivot:

			
 
				+                high -= 1

			
 
				+            # 此时high指向值小于基准值，交换

			
 
				+            if low < high:

			
 
				+                arr[low], arr[high] = arr[high], arr[low]

			
 
				+                low +=1

			
 
				+            # 现在开始从左向右找，比基准值大的数

			
 
				+            while low < high and arr[low][1] <= pivot:

			
 
				+                low += 1

			
 
				+            # 此时low指向值大于基准值，交换

			
 
				+            if low < high:

			
 
				+                arr[high], arr[low] = arr[low], arr[high]

			
 
				+                high -= 1

			
 
				+        return low

			
 
				+

			
 
				+

			
 
				+    def quicksort_up(self, arr:list, start:int, end:int):

			
 
				+        """按照元组第二个元素值大小进行升序排序"""

			
 
				+        if start >= end:

			
 
				+            return

			
 
				+        # 先排一次获得基准值位置

			
 
				+        mid = self.quicksort_up_part(arr, start, end)

			
 
				+        # 排左面

			
 
				+        self.quicksort_up(arr, start, mid - 1)

			
 
				+        # 排右面

			
 
				+        self.quicksort_up(arr, mid + 1, end)

			
 
				+

			
 
				+    def words_similarity_score_sorted(self, query:str, candidates:list)->list:

			
 
				+        """计算输入语句与候选词的相似度并按照相似度分值进行排序"""

			
 
				+        # 选择算法（示例使用Levenshtein，归一化到0-1）

			
 
				+        candidates = candidates.copy()

			
 
				+        jarowinkler = textdistance.JaroWinkler()

			
 
				+        key_score_list = [(candidate, jarowinkler.normalized_similarity(query, candidate)) for candidate in candidates]

			
 
				+        self.quicksort_up(key_score_list, 0, len(key_score_list) - 1)  # 升序排序

			
 
				+        key_sorted_list = [tuple_element[0] for tuple_element in key_score_list]  # 取出key

			
 
				+        key_sorted_list = key_sorted_list[::-1]  # 反转，变为降序

			
 
				+        return key_sorted_list

			
 
				+

			
 
				+    def words_similarity_score_sorted_v2(self, query:str, candidates:list)->list:

			
 
				+        """通过rerank的方式为候选词进行相似度排序"""

			
 
				+        # 调用远程reranker模型

			
 
				+        n = len(candidates)  # 候选词数量

			
 
				+        group_query = [(query, i) for i in candidates]

			
 
				+        score = self.model.compute_score(group_query)

			
 
				+        key_score_list = [(candidates[i], score[i]) for i in range(n)]

			
 
				+        self.quicksort_up(key_score_list, 0, len(key_score_list) - 1)  # 升序排序

			
 
				+        key_sorted_list = [tuple_element[0] for tuple_element in key_score_list]  # 取出key

			
 
				+        key_sorted_list = key_sorted_list[::-1]  # 反转，变为降序

			
 
				+        return key_sorted_list

			
 
				+

			
 
				+    def match_v2_on(self, promt: str,is_agent:bool=False):

			
 
				+        """

			
 
				+        模糊匹配v2

			
 
				+        :param is_agent:

			
 
				+        :param promt:

			
 
				+        :return:

			
 
				+        """

			
 
				+        print("=" * 50)

			
 
				+        # 命名风格转换

			
 
				+        print("原始查询：", promt)

			
 
				+        promt = promt.replace('超滤', 'UF').replace('反渗透', 'RO').replace('号', '#').replace('组', '#')

			
 
				+        promt = self.field_align(promt)

			
 
				+        print("转换查询：", promt)

			
 
				+        # 输入分词

			
 
				+        nz_words = []

			
 
				+        for w, f in pseg.lcut(promt):

			
 
				+            print(f'{w}({f})', end="")

			
 
				+            if f == 'nz':

			
 
				+                nz_words.append(w)

			
 
				+        print('\n备查nz词：', nz_words)

			
 
				+

			
 
				+        # 处理专有名词的等价词，为了保证高召回率，我们将备查词的所有等价说法都放入备查序列

			
 
				+        equivalent_words = []

			
 
				+        for nz_idx, nz in enumerate(nz_words):

			
 
				+            # 首先判断nz词是否在等价词汇表中，如果不在根本无法替换

			
 
				+            if nz in self.dict_equivalent_wordmap.keys():

			
 
				+                # 然后把等价的说法都添加进去就好了

			
 
				+                equivalent_words = self.dict_equivalent_wordmap.get(nz, [])

			
 
				+        if equivalent_words:

			
 
				+            nz_words += equivalent_words

			
 
				+            nz_words = list(set(nz_words))

			
 
				+        print('等价备查nz词：', nz_words)

			
 
				+        del equivalent_words

			
 
				+

			
 
				+        # 进行一级查询,根据nz词是否包含于词典

			
 
				+        query_level_one = []

			
 
				+        for i in range(len(nz_words)):  # 为第i个nz词进行初次匹配

			
 
				+            result = []

			
 
				+            # 如果nz词包含在一级词典中就算匹配成功

			
 
				+            for dict_level_1_key in self.dict_level_1.keys():

			
 
				+                if nz_words[i] in dict_level_1_key:  # 如果nz词包含在一级词典内

			
 
				+                    result+= self.dict_level_1.get(dict_level_1_key)

			
 
				+            query_level_one.append(result)  # 放入一级查询结果中

			
 
				+

			
 
				+        # 进行二级查询

			
 
				+        query_level_two = []

			
 
				+        for idx_nz, i_nz_query_result in enumerate(query_level_one):  # 遍历每个nz词的查询结果

			
 
				+            result = []  # 为第i个nz词进行二次匹配

			
 
				+            # 如果第i个nz词一级查询不为空

			
 
				+            if i_nz_query_result: # 第i个nz词的查询结果list

			
 
				+                for res_word_level_one in i_nz_query_result:

			
 
				+                    if res_word_level_one in self.dict_level_2.keys():

			
 
				+                        result += self.dict_level_2.get(res_word_level_one)  # self.dict_level_2的value本身就是字典，所以用+=拼接

			
 
				+            # 虽然一级查询失败，但是并不意味着映射词典里没有，因为一级词典忽略英文。

			
 
				+            else:  # 如果一级查询失败，就直接在name2code字典中查询

			
 
				+                if nz_words[idx_nz] in self.name_2_code_dict.keys():# 如果第i个nz词在2级词典，就直接添加到结果中

			
 
				+                    result.append(nz_words[idx_nz])

			
 
				+            # 如果第i个nz词的一级查询结果为空,则添加空列表占位

			
 
				+            query_level_two.append(result)

			
 
				+

			
 
				+        # 常规精确匹配结束，如果匹配成功，结构为二维列表，否则为空列表

			
 
				+        matched_keys = query_level_two  # 获取已匹配的字段

			
 
				+        # 备查词合并，我们约定所有备查词进行统一的查询，后面怎么用这些结果取决于外部的应用，对于agent模式，将会输出许多结果，对月非agent只会输出概率最高的结果

			
 
				+        tem_matched_keys = []

			
 
				+        for item in matched_keys:

			
 
				+            tem_matched_keys += item

			
 
				+        matched_keys = [list(set(tem_matched_keys))]

			
 
				+        del tem_matched_keys

			
 
				+

			
 
				+        # 如果精确匹配失败，没有匹配到任何结果则按照语义进行模糊匹配，返回满足条件的置信度最高的结果

			
 
				+        # if not nz_words or ([] in matched_keys):

			
 
				+        # 比起手动维护词典，我们更相信语义相似度

			
 
				+        top_k = 5

			
 
				+        confi = 0.2 # 置信度阈值

			
 
				+        print(f'进入模糊匹配,召回Top:{top_k} 置信度阈值:{confi}...')

			
 
				+        # 调用远程bge-m3模型进行embedding

			
 
				+        query_embedding = np.array(self.model.encode([promt], normalize=True), dtype=np.float32) # 要求query_embedding是一个二维矩阵，形状为(1, 1024)

			
 
				+        distances, indices = self.knowledge.search(query_embedding, top_k)

			
 
				+        group_query = [(promt, self.plc_database_name_template_list[indices[0][i]]) for i in range(top_k)]

			
 
				+        # 我们更愿意相信bge，因此把词典关键词匹配的结果一并放进去重排序

			
 
				+        group_query_manuel = [(promt, k) for keys in matched_keys for k in keys]

			
 
				+        group_query += group_query_manuel

			
 
				+        del group_query_manuel

			
 
				+        group_query = list(set(group_query))  # 去重

			
 
				+        # 调用远程bge-reranker模型

			
 
				+        score = self.model.compute_score(group_query)

			
 
				+        rerank_result = sorted([(group_query[i][1], score[i]) for i in range(len(group_query))], key=lambda x: x[1], reverse=True)

			
 
				+        print(F'打印前top{top_k}候选词结果：', rerank_result[:top_k])

			
 
				+        print(f'首元素模糊匹配到{rerank_result[0][0]}, 置信度为{rerank_result[0][1]}')

			
 
				+        # matched_keys 为最终结果，保持形状为二维列表

			
 
				+        matched_keys = [[i[0] for i in rerank_result]]

			
 
				+        # 每个匹配结果的置信度

			
 
				+        matched_keys_score = [[i[1] for i in rerank_result]]

			
 
				+

			
 
				+        # 为结果创建映射字典

			
 
				+        result_list = []

			
 
				+        for i_nz_keys in matched_keys:

			
 
				+            result_list.append([{key: self.name_2_code_dict.get(key)} for key in i_nz_keys])

			
 
				+        print(f"查询到{len([_ for _ in result_list if _])}个结果：")

			
 
				+

			
 
				+        if not is_agent:

			
 
				+            # 非agent模式每个匹配结果只取第一个元素的英文

			
 
				+            tem_list = []

			
 
				+            for res in result_list:

			
 
				+                if res:

			
 
				+                    for k, v in res[0].items():  # 每个nz词的查询结果都是一个list，每个list可能包含多个字典

			
 
				+                        tem_list.append(f'{k}:{v}')

			
 
				+            result_list = tem_list

			
 
				+            print('以非agent模式返回：', result_list)

			
 
				+            return result_list

			
 
				+

			
 
				+        print('以agent模式返回：', result_list)

			
 
				+        print('='*50)

			
 
				+        return result_list, matched_keys_score

			
 
				+

			
 
				+if __name__ == '__main__':

			
 
				+    pj = 92  # pcl点位

			
 
				+    pcl_helper = PLCMatch(project_id=pj)

			
 
				+    # 用户输入

			
 
				+    my_promt = "我想要查询锡山中荷进水电导率"

			
 
				+    # query_res = pcl_helper.match_v2_on(my_promt, is_agent=True)

			
 
				+    query_res = pcl_helper.match_v2_on(my_promt, is_agent=False)

			
 
				+

			
 
				+    pass

			
 
				+

			
 
				+

			
 
				+

			
--- a/item_del_tool.py
+++ b/item_del_tool.py
@@ -0,0 +1,30 @@
 
				+import pandas as pd

			
 
				+

			
 
				+# 读取xlsx

			
 
				+project_id = 1450

			
 
				+xlsx_file = f'./plc点位原始文件/{project_id}_点位_原始.xlsx'

			
 
				+df_xlsx = pd.read_excel(xlsx_file)

			
 
				+# 字段剔除关键字

			
 
				+del_list = ['相电压', '相电流', '启动操作', '停止操作']+\

			
 
				+['备用', '中间值，不用读', '报警', '联动标志', '排水显示']+\

			
 
				+['校准开关', '功率因数', '开关', '设定', '是否','起泵','停泵']+\

			
 
				+['高限设置','低限设置', '手自动','关操作', '关到位', '开操作','开到位','打开/关闭', '远程']+\

			
 
				+['运行电流', '不确定', '未知', '复位', '需要']+\

			
 
				+['准备好', '信息清除', '提醒字', '控制字', '设置频率']+\

			
 
				+['循环', '启停', '故障字', '投入/切除', '定频控制', '控制模式', '选择', '开机/停止', '杀菌步序']+\

			
 
				+['加药阀', '搅拌器', '定频率', '设置', '手动/自动', '按钮']+\

			
 
				+['跳转', '紧急', '启动停止', '申请', '应答', '允许', '排队', '启动/停止', '打开', '关闭']+\

			
 
				+['手动开','手动关','自动关闭','手动启动','自动启动','手动停止','泵启动', '入栈','出栈','按键','信号类型','flag','Flag','FLAG']

			
 
				+# 遍历每行

			
 
				+reserve_idx = []

			
 
				+for index, column in df_xlsx.iterrows():

			
 
				+    flag = True

			
 
				+    for key_word in del_list:

			
 
				+        if key_word in column.loc['item_alias']:

			
 
				+            flag = False

			
 
				+            break

			
 
				+    if flag:

			
 
				+        reserve_idx.append(index)

			
 
				+new_df = df_xlsx.iloc[reserve_idx, :].reset_index(drop=True)

			
 
				+# 写入新文件

			
 
				+new_df.to_excel(xlsx_file.replace('_原始.','.'), index=False)
			
--- a/plc_dictionary/1181_plc_dictionary/1181_dict_level_1.json
+++ b/plc_dictionary/1181_plc_dictionary/1181_dict_level_1.json
--- a/plc_dictionary/1181_plc_dictionary/1181_dict_level_2.json
+++ b/plc_dictionary/1181_plc_dictionary/1181_dict_level_2.json
--- a/plc_dictionary/1181_plc_dictionary/1181_dict_name_2_code.json
+++ b/plc_dictionary/1181_plc_dictionary/1181_dict_name_2_code.json
--- a/plc_dictionary/1181_plc_dictionary/1181_knowledge.faiss
+++ b/plc_dictionary/1181_plc_dictionary/1181_knowledge.faiss
--- a/plc_dictionary/1181_plc_dictionary/1181_点位.xlsx
+++ b/plc_dictionary/1181_plc_dictionary/1181_点位.xlsx
--- a/plc_dictionary/1202_plc_dictionary/1202_dict_level_1.json
+++ b/plc_dictionary/1202_plc_dictionary/1202_dict_level_1.json
--- a/plc_dictionary/1202_plc_dictionary/1202_dict_level_2.json
+++ b/plc_dictionary/1202_plc_dictionary/1202_dict_level_2.json
--- a/plc_dictionary/1202_plc_dictionary/1202_dict_name_2_code.json
+++ b/plc_dictionary/1202_plc_dictionary/1202_dict_name_2_code.json
--- a/plc_dictionary/1202_plc_dictionary/1202_knowledge.faiss
+++ b/plc_dictionary/1202_plc_dictionary/1202_knowledge.faiss
--- a/plc_dictionary/1202_plc_dictionary/1202_点位.xlsx
+++ b/plc_dictionary/1202_plc_dictionary/1202_点位.xlsx
--- a/plc_dictionary/1450_plc_dictionary/1450_dict_level_1.json
+++ b/plc_dictionary/1450_plc_dictionary/1450_dict_level_1.json
--- a/plc_dictionary/1450_plc_dictionary/1450_dict_level_2.json
+++ b/plc_dictionary/1450_plc_dictionary/1450_dict_level_2.json
--- a/plc_dictionary/1450_plc_dictionary/1450_dict_name_2_code.json
+++ b/plc_dictionary/1450_plc_dictionary/1450_dict_name_2_code.json
--- a/plc_dictionary/1450_plc_dictionary/1450_knowledge.faiss
+++ b/plc_dictionary/1450_plc_dictionary/1450_knowledge.faiss
--- a/plc_dictionary/1450_plc_dictionary/1450_点位.xlsx
+++ b/plc_dictionary/1450_plc_dictionary/1450_点位.xlsx
--- a/plc_dictionary/92_plc_dictionary/92_dict_level_1.json
+++ b/plc_dictionary/92_plc_dictionary/92_dict_level_1.json
--- a/plc_dictionary/92_plc_dictionary/92_dict_level_2.json
+++ b/plc_dictionary/92_plc_dictionary/92_dict_level_2.json
--- a/plc_dictionary/92_plc_dictionary/92_dict_name_2_code.json
+++ b/plc_dictionary/92_plc_dictionary/92_dict_name_2_code.json
--- a/plc_dictionary/92_plc_dictionary/92_knowledge.faiss
+++ b/plc_dictionary/92_plc_dictionary/92_knowledge.faiss
--- a/plc_dictionary/92_plc_dictionary/92_点位.xlsx
+++ b/plc_dictionary/92_plc_dictionary/92_点位.xlsx
--- a/plclib.py
+++ b/plclib.py
@@ -0,0 +1,335 @@
 
				+import os

			
 
				+script_dir = os.path.dirname(os.path.abspath(__file__))

			
 
				+import sys

			
 
				+sys.path.append(script_dir)

			
 
				+import jieba

			
 
				+import jieba.posseg as pseg

			
 
				+import re

			
 
				+import os

			
 
				+import json

			
 
				+import textdistance

			
 
				+import warnings

			
 
				+import numpy as np

			
 
				+import faiss

			
 
				+from remote_model import RemoteBGEModel

			
 
				+

			
 
				+

			
 
				+class PLCLib:

			
 
				+

			
 
				+    def __init__(self):

			
 
				+        """缓存待实现"""

			
 
				+        self.project_id = None

			
 
				+        self.plc_dict_root_dir = None

			
 
				+        self.name_2_code_dict = None

			
 
				+        self.plc_database_name_template_list = None

			
 
				+        self.dict_equivalent_wordmap = None

			
 
				+        self.dict_level_2 = None

			
 
				+        self.dict_level_1 = None

			
 
				+        self.user_dict_list = None

			
 
				+        self.knowledge = None

			
 
				+

			
 
				+        # 加载bge-m3和bge-reranker远程模型

			
 
				+        self.model = RemoteBGEModel('dev')

			
 
				+

			
 
				+        # 加载用户自定义词典,添加到jieba词库, 不依赖水厂id

			
 
				+        self.script_dir = os.path.dirname(os.path.abspath(__file__)) # 脚本绝对路径

			
 
				+        user_dictionary_dir = os.path.join(self.script_dir, 'user_maintain_dictionary', 'jieba_words')

			
 
				+        if not os.path.exists(user_dictionary_dir):

			
 
				+            warnings.warn(f'用户分词词典不存在，严重影响匹配成功率，请检查路径{user_dictionary_dir}是否存在!', UserWarning)

			
 
				+        else:

			
 
				+            self.user_dict_list = [os.path.join(user_dictionary_dir, _) for _ in os.listdir(user_dictionary_dir) if _.split('.')[-1] == 'txt']  # 用户词典

			
 
				+            self.__load_user_dict()

			
 
				+

			
 
				+

			
 
				+    def load(self, project_id):

			
 
				+        """加载词典"""

			
 
				+        self.project_id = project_id

			
 
				+        self.plc_dict_root_dir = os.path.join(self.script_dir, 'plc_dictionary',f'{self.project_id}_plc_dictionary')

			
 
				+        # 加载name2code

			
 
				+        self.name_2_code_dict = self.__read_pcl()

			
 
				+        self.plc_database_name_template_list = list(self.name_2_code_dict.keys())

			
 
				+        # 加载等价词表

			
 
				+        self.dict_equivalent_wordmap = self.__construct_equivalent_wordmap()

			
 
				+        # 加载二级词典

			
 
				+        self.dict_level_2 =self.__make_level_two_dictionary()

			
 
				+        # 加载一级词典

			
 
				+        self.dict_level_1 = self.__make_level_one_dictionary()

			
 
				+        # 加载本地知识库

			
 
				+        self.knowledge = self.__load_faiss_database()

			
 
				+

			
 
				+    def __load_faiss_database(self):

			
 
				+        """从本地加载向量数据库"""

			
 
				+        # 水厂的数据库字段知识库

			
 
				+        faiss_path = os.path.join(self.plc_dict_root_dir, f'{self.project_id}_knowledge.faiss')

			
 
				+        # 尝试从本地加载

			
 
				+        if os.path.exists(faiss_path):

			
 
				+            print('PLC点位查询功能从本地加载点位字段向量知识库...')

			
 
				+            local_faiss = faiss.read_index(faiss_path)

			
 
				+        else:

			
 
				+            raise FileNotFoundError('file not found!', faiss_path)

			
 
				+        return local_faiss

			
 
				+

			
 
				+

			
 
				+    @staticmethod

			
 
				+    def field_align(input_str:str)->str:

			
 
				+        """按照锡山中荷命名规范对齐字段，1#UF替换为UF1，1#RO替换为RO1，保持统一"""

			
 
				+        sources_uf = re.findall(r'\d+#UF', input_str, re.IGNORECASE)  # 匹配1#UF

			
 
				+        sources_ro = re.findall(r'\d+#RO', input_str, re.IGNORECASE)  # 匹配1#RO

			
 
				+        sources = sources_uf + sources_ro

			
 
				+        for sou in sources:

			
 
				+            number_, flag_ = sou.split('#')

			
 
				+            input_str = input_str.replace(sou, flag_.upper() + number_) # 统一转为大写

			
 
				+        return input_str

			
 
				+

			
 
				+    def __construct_equivalent_wordmap(self):

			
 
				+        """构建等价词汇映射表"""

			
 
				+        # 检查文件是否存在

			
 
				+        equivalent_wordmap_path = os.path.join(self.script_dir, 'user_maintain_dictionary','equivalent_words', 'dict_equivalent_wordmap.json')

			
 
				+        if os.path.exists(equivalent_wordmap_path):

			
 
				+            with open(equivalent_wordmap_path, 'r', encoding='utf-8') as f:

			
 
				+                equivalent_wordmap = json.load(f)

			
 
				+        else:

			
 
				+            raise FileNotFoundError('file not found!', equivalent_wordmap_path)

			
 
				+        return equivalent_wordmap

			
 
				+

			
 
				+    def __make_level_one_dictionary(self):

			
 
				+        """创建一级字典"""

			
 
				+        # 尝试从本地加载一级字典

			
 
				+        dict_level_1_path = os.path.join(self.plc_dict_root_dir, f'{self.project_id}_dict_level_1.json')

			
 
				+        if os.path.exists(dict_level_1_path):

			
 
				+            with open(dict_level_1_path, 'r', encoding='utf-8') as f:

			
 
				+                group_dict = json.load(f)

			
 
				+        else:

			
 
				+            raise FileNotFoundError('file not found!', dict_level_1_path)

			
 
				+        return group_dict

			
 
				+

			
 
				+    def __make_level_two_dictionary(self):

			
 
				+        """创建二级字典，对点位所有字段进行正则匹配中文，将中文一样的字段聚合为同一个字典键值对，键为正则提取的中文字符"""

			
 
				+        # 尝试从本地加载二级字典

			
 
				+        dict_level2_dict_path = os.path.join(self.plc_dict_root_dir, f'{self.project_id}_dict_level_2.json')

			
 
				+        if os.path.exists(dict_level2_dict_path):

			
 
				+            with open(dict_level2_dict_path, 'r', encoding='utf-8') as f:

			
 
				+                group_dict = json.load(f)

			
 
				+        else:

			
 
				+            raise FileNotFoundError('file not found!', dict_level2_dict_path)

			
 
				+        return group_dict

			
 
				+

			
 
				+    def __read_pcl(self):

			
 
				+        """

			
 
				+        读取pcl文件,生成name2code词典

			
 
				+        :return:

			
 
				+        """

			
 
				+        # 尝试从本地加载name-code映射字典

			
 
				+        dict_name2code_path = os.path.join(self.plc_dict_root_dir, f'{self.project_id}_dict_name_2_code.json')

			
 
				+        if os.path.exists(dict_name2code_path):

			
 
				+            with open(dict_name2code_path, 'r', encoding='utf-8') as f:

			
 
				+                dict_name2code = json.load(f)

			
 
				+        else:

			
 
				+            raise FileNotFoundError('file not found!', dict_name2code_path)

			
 
				+        return dict_name2code

			
 
				+

			
 
				+    def __load_user_dict(self):

			
 
				+        """加载用户词典，添加到jieba词库"""

			
 
				+        # 删除

			
 
				+        jieba.del_word('反渗透')

			
 
				+        jieba.del_word('超滤')

			
 
				+        for user_dict_txt in self.user_dict_list:

			
 
				+            # # 检查文件是否存在

			
 
				+            # if not os.path.exists(user_dict_txt):

			
 
				+            #     raise FileNotFoundError(f'{user_dict_txt} does not exist')

			
 
				+            # # 检查文件后缀名是否合法

			
 
				+            # if os.path.splitext(user_dict_txt)[1] != '.txt':

			
 
				+            #     continue

			
 
				+            # 分词库加载用户字典

			
 
				+            jieba.load_userdict(user_dict_txt)

			
 
				+    @ staticmethod

			
 
				+    def quicksort_up_part(arr:list, start:int, end:int)-> int:

			
 
				+        """升序排序"""

			
 
				+        # 双指针

			
 
				+        low = start

			
 
				+        high = end

			
 
				+        pivot = arr[start][1] # 基准值

			
 
				+        # 大数放在基准值右边，小数放在基准值左边

			
 
				+        while low < high:

			
 
				+            # 先从右向左找比基准值小的

			
 
				+            while low< high and arr[high][1] >= pivot:

			
 
				+                high -= 1

			
 
				+            # 此时high指向值小于基准值，交换

			
 
				+            if low < high:

			
 
				+                arr[low], arr[high] = arr[high], arr[low]

			
 
				+                low +=1

			
 
				+            # 现在开始从左向右找，比基准值大的数

			
 
				+            while low < high and arr[low][1] <= pivot:

			
 
				+                low += 1

			
 
				+            # 此时low指向值大于基准值，交换

			
 
				+            if low < high:

			
 
				+                arr[high], arr[low] = arr[low], arr[high]

			
 
				+                high -= 1

			
 
				+        return low

			
 
				+    def quicksort_up(self, arr:list, start:int, end:int):

			
 
				+        """按照元组第二个元素值大小进行升序排序"""

			
 
				+        if start >= end:

			
 
				+            return

			
 
				+        # 先排一次获得基准值位置

			
 
				+        mid = self.quicksort_up_part(arr, start, end)

			
 
				+        # 排左面

			
 
				+        self.quicksort_up(arr, start, mid - 1)

			
 
				+        # 排右面

			
 
				+        self.quicksort_up(arr, mid + 1, end)

			
 
				+    def words_similarity_score_sorted(self, query:str, candidates:list)->list:

			
 
				+        """计算输入语句与候选词的相似度并按照相似度分值进行排序"""

			
 
				+        # 选择算法（示例使用Levenshtein，归一化到0-1）

			
 
				+        candidates = candidates.copy()

			
 
				+        jarowinkler = textdistance.JaroWinkler()

			
 
				+        key_score_list = [(candidate, jarowinkler.normalized_similarity(query, candidate)) for candidate in candidates]

			
 
				+        self.quicksort_up(key_score_list, 0, len(key_score_list) - 1)  # 升序排序

			
 
				+        key_sorted_list = [tuple_element[0] for tuple_element in key_score_list]  # 取出key

			
 
				+        key_sorted_list = key_sorted_list[::-1]  # 反转，变为降序

			
 
				+        return key_sorted_list

			
 
				+

			
 
				+    def words_similarity_score_sorted_v2(self, query:str, candidates:list)->list:

			
 
				+        """通过rerank的方式为候选词进行相似度排序"""

			
 
				+        # 调用远程reranker模型

			
 
				+        n = len(candidates)  # 候选词数量

			
 
				+        group_query = [(query, i) for i in candidates]

			
 
				+        score = self.model.compute_score(group_query)

			
 
				+        key_score_list = [(candidates[i], score[i]) for i in range(n)]

			
 
				+        self.quicksort_up(key_score_list, 0, len(key_score_list) - 1)  # 升序排序

			
 
				+        key_sorted_list = [tuple_element[0] for tuple_element in key_score_list]  # 取出key

			
 
				+        key_sorted_list = key_sorted_list[::-1]  # 反转，变为降序

			
 
				+        return key_sorted_list

			
 
				+

			
 
				+    def query(self, promt, is_agent:bool=False):

			
 
				+        """直接拷贝PLCMatch_match_v2_on函数"""

			
 
				+        """

			
 
				+        模糊匹配v2

			
 
				+        :param is_agent:

			
 
				+        :param promt:

			
 
				+        :return:

			
 
				+        """

			
 
				+        print("=" * 50)

			
 
				+        # 命名风格转换

			
 
				+        print("原始查询：", promt)

			
 
				+        promt = promt.replace('超滤', 'UF').replace('反渗透', 'RO').replace('号', '#').replace('组', '#')

			
 
				+        promt = self.field_align(promt)

			
 
				+        print("转换查询：", promt)

			
 
				+        # 输入分词

			
 
				+        nz_words = []

			
 
				+        for w, f in pseg.lcut(promt):

			
 
				+            print(f'{w}({f})', end="")

			
 
				+            if f == 'nz':

			
 
				+                nz_words.append(w)

			
 
				+        print('\n备查nz词：', nz_words)

			
 
				+

			
 
				+        # 处理专有名词的等价词，为了保证高召回率，我们将备查词的所有等价说法都放入备查序列

			
 
				+        equivalent_words = []

			
 
				+        for nz_idx, nz in enumerate(nz_words):

			
 
				+            # 首先判断nz词是否在等价词汇表中，如果不在根本无法替换

			
 
				+            if nz in self.dict_equivalent_wordmap.keys():

			
 
				+                # 然后把等价的说法都添加进去就好了

			
 
				+                equivalent_words = self.dict_equivalent_wordmap.get(nz, [])

			
 
				+        if equivalent_words:

			
 
				+            nz_words += equivalent_words

			
 
				+            nz_words = list(set(nz_words))

			
 
				+        print('等价备查nz词：', nz_words)

			
 
				+        del equivalent_words

			
 
				+

			
 
				+        # 进行一级查询,根据nz词是否包含于词典

			
 
				+        query_level_one = []

			
 
				+        for i in range(len(nz_words)):  # 为第i个nz词进行初次匹配

			
 
				+            result = []

			
 
				+            # 如果nz词包含在一级词典中就算匹配成功

			
 
				+            for dict_level_1_key in self.dict_level_1.keys():

			
 
				+                if nz_words[i] in dict_level_1_key:  # 如果nz词包含在一级词典内

			
 
				+                    result+= self.dict_level_1.get(dict_level_1_key)

			
 
				+            query_level_one.append(result)  # 放入一级查询结果中

			
 
				+

			
 
				+        # 进行二级查询

			
 
				+        query_level_two = []

			
 
				+        for idx_nz, i_nz_query_result in enumerate(query_level_one):  # 遍历每个nz词的查询结果

			
 
				+            result = []  # 为第i个nz词进行二次匹配

			
 
				+            # 如果第i个nz词一级查询不为空

			
 
				+            if i_nz_query_result: # 第i个nz词的查询结果list

			
 
				+                for res_word_level_one in i_nz_query_result:

			
 
				+                    if res_word_level_one in self.dict_level_2.keys():

			
 
				+                        result += self.dict_level_2.get(res_word_level_one)  # self.dict_level_2的value本身就是字典，所以用+=拼接

			
 
				+            # 虽然一级查询失败，但是并不意味着映射词典里没有，因为一级词典忽略英文。

			
 
				+            else:  # 如果一级查询失败，就直接在name2code字典中查询

			
 
				+                if nz_words[idx_nz] in self.name_2_code_dict.keys():# 如果第i个nz词在2级词典，就直接添加到结果中

			
 
				+                    result.append(nz_words[idx_nz])

			
 
				+            # 如果第i个nz词的一级查询结果为空,则添加空列表占位

			
 
				+            query_level_two.append(result)

			
 
				+

			
 
				+        # 常规精确匹配结束，如果匹配成功，结构为二维列表，否则为空列表

			
 
				+        matched_keys = query_level_two  # 获取已匹配的字段

			
 
				+        # 备查词合并，我们约定所有备查词进行统一的查询，后面怎么用这些结果取决于外部的应用，对于agent模式，将会输出许多结果，对月非agent只会输出概率最高的结果

			
 
				+        tem_matched_keys = []

			
 
				+        for item in matched_keys:

			
 
				+            tem_matched_keys += item

			
 
				+        matched_keys = [list(set(tem_matched_keys))]

			
 
				+        del tem_matched_keys

			
 
				+

			
 
				+        # 如果精确匹配失败，没有匹配到任何结果则按照语义进行模糊匹配，返回满足条件的置信度最高的结果

			
 
				+        # if not nz_words or ([] in matched_keys):

			
 
				+        # 比起手动维护词典，我们更相信语义相似度

			
 
				+        top_k = 5

			
 
				+        confi = 0.2 # 置信度阈值

			
 
				+        print(f'进入模糊匹配,召回Top:{top_k} 置信度阈值:{confi}...')

			
 
				+        # 调用远程bge-m3模型进行embedding

			
 
				+        query_embedding = np.array(self.model.encode([promt], normalize=True), dtype=np.float32) # 要求query_embedding是一个二维矩阵，形状为(1, 1024)

			
 
				+        distances, indices = self.knowledge.search(query_embedding, top_k)

			
 
				+        group_query = [(promt, self.plc_database_name_template_list[indices[0][i]]) for i in range(top_k)]

			
 
				+        # 我们更愿意相信bge，因此把词典关键词匹配的结果一并放进去重排序

			
 
				+        group_query_manuel = [(promt, k) for keys in matched_keys for k in keys]

			
 
				+        group_query += group_query_manuel

			
 
				+        del group_query_manuel

			
 
				+        group_query = list(set(group_query))  # 去重

			
 
				+        # 调用远程bge-reranker模型

			
 
				+        score = self.model.compute_score(group_query)

			
 
				+        rerank_result = sorted([(group_query[i][1], score[i]) for i in range(len(group_query))], key=lambda x: x[1], reverse=True)

			
 
				+        print(F'打印前top{top_k}候选词结果：', rerank_result[:top_k])

			
 
				+        print(f'首元素模糊匹配到{rerank_result[0][0]}, 置信度为{rerank_result[0][1]}')

			
 
				+        # matched_keys 为最终结果，保持形状为二维列表

			
 
				+        matched_keys = [[i[0] for i in rerank_result]]

			
 
				+        # 每个匹配结果的置信度

			
 
				+        matched_keys_score = [[i[1] for i in rerank_result]]

			
 
				+

			
 
				+        # 为结果创建映射字典

			
 
				+        result_list = []

			
 
				+        for i_nz_keys in matched_keys:

			
 
				+            result_list.append([{key: self.name_2_code_dict.get(key)} for key in i_nz_keys])

			
 
				+        print(f"查询到{len([_ for _ in result_list if _])}个结果：")

			
 
				+

			
 
				+        if not is_agent:

			
 
				+            # 非agent模式每个匹配结果只取第一个元素的英文

			
 
				+            tem_list = []

			
 
				+            for res in result_list:

			
 
				+                if res:

			
 
				+                    for k, v in res[0].items():  # 每个nz词的查询结果都是一个list，每个list可能包含多个字典

			
 
				+                        tem_list.append(f'{k}:{v}')

			
 
				+            result_list = tem_list

			
 
				+            print('以非agent模式返回：', result_list)

			
 
				+            return result_list

			
 
				+

			
 
				+        print('以agent模式返回：', result_list)

			
 
				+        print('='*50)

			
 
				+        return result_list, matched_keys_score

			
 
				+

			
 
				+# 步骤1：实例化，单例模式

			
 
				+helper = PLCLib()

			
 
				+

			
 
				+if __name__ == '__main__':

			
 
				+    # demo

			
 
				+    # 步骤2：按照水厂加载数据库

			
 
				+    helper.load(92)

			
 
				+    # 步骤3：根据查询匹配水厂

			
 
				+    # helper.query("查询RO1回收率、RO2回收率、...")

			
 
				+    helper.query("查询中荷水厂产水电导率", is_agent=False)

			
 
				+    # agent 模式

			
 
				+    # 输出格式：list, [RO1回收率查询结果, RO2回收率查询结果, ...]

			
 
				+    # RO1回收率查询结果：list, [{'RO1回收率': 'RO1HSL'}]

			
 
				+    # RO2回收率查询结果：list, [{'RO2回收率': 'RO2HSL'}]

			
 
				+    # ...

			
 
				+    # 完整查询格式: [[{'RO1回收率': 'RO1HSL'}], [{'RO2回收率': 'RO2HSL'}]]

			
 
				+    # 非agent 模式，每个结果取首个元素，直接返回英文code

			
 
				+    #

			
--- a/plc点位原始文件/1181_点位_原始.xlsx
+++ b/plc点位原始文件/1181_点位_原始.xlsx
--- a/plc点位原始文件/1202_点位_原始.xlsx
+++ b/plc点位原始文件/1202_点位_原始.xlsx
--- a/plc点位原始文件/1450_点位_原始.xlsx
+++ b/plc点位原始文件/1450_点位_原始.xlsx
--- a/plc点位原始文件/92_点位_原始.xlsx
+++ b/plc点位原始文件/92_点位_原始.xlsx
--- a/readme
+++ b/readme
@@ -0,0 +1,10 @@
 
				+create_level_query_dict.py 创建二级查询字典

			
 
				+item_del_tool.py 字段剔除工具

			
 
				+user_dictionary.txt 用户分词词典

			
 
				+user_level_1.json 1级词典，一轮查询

			
 
				+user_level_2.json 2级词典，二轮查询

			
 
				+

			
 
				+注意，用户词典中的词如果数据库字段中包含用户词典中的词，那么

			
 
				+

			
 
				+

			
 
				+version:2025年11月7日15点12分
			
--- a/remote_model.py
+++ b/remote_model.py
@@ -0,0 +1,132 @@
 
				+# version: 2025.12.04

			
 
				+import requests

			
 
				+from typing import List, Tuple, Optional

			
 
				+import os

			
 
				+import json

			
 
				+import time

			
 
				+import numpy as np

			
 
				+from FlagEmbedding import FlagAutoModel, FlagReranker

			
 
				+script_dir = os.path.dirname(os.path.abspath(__file__))

			
 
				+

			
 
				+class RemoteBGEModel:

			
 
				+

			
 
				+    def __init__(self, branch:str='dev', timeout:int=3, max_retries:int=3):

			
 
				+        # 加载网址配置文件

			
 
				+        self.branch = branch.strip().lower()

			
 
				+        if not self.branch in ['dev', 'test', 'master', 'main', 'local']:  # 输入参数合法

			
 
				+            raise ValueError("Param 'branch' must be dev test master or main",branch)

			
 
				+

			
 
				+        self.url_file = os.path.join(script_dir, 'url_config.json')

			
 
				+        self.embedding_url, self.reranker_url = self.load_url()

			
 
				+        self.timeout = timeout

			
 
				+        self.max_retries = max_retries

			
 
				+        # 构建请求头

			
 
				+        self.headers = {"Content-Type": "application/json"}

			
 
				+

			
 
				+    def load_url(self):

			
 
				+        """加载url"""

			
 
				+        if not os.path.exists(self.url_file):

			
 
				+            raise FileNotFoundError("File not exist", self.url_file)

			
 
				+        # 读取json配置文件

			
 
				+        with open(self.url_file, 'r', encoding='utf-8') as f:

			
 
				+            json_data = json.load(f)

			
 
				+        if self.branch == 'dev' or self.branch == 'test':

			
 
				+            embed_url = json_data['dev_embed_url'] + '/embed'

			
 
				+            rerank_url = json_data['dev_reranker_url'] + '/rerank'

			
 
				+        elif self.branch == 'main' or self.branch == 'master':

			
 
				+            embed_url = json_data['master_embed_url'] + '/embed'

			
 
				+            rerank_url = json_data['master_reranker_url'] + '/rerank'

			
 
				+        else:

			
 
				+            embed_url = json_data['local_embed_url'] + '/embed'

			
 
				+            rerank_url = json_data['local_reranker_url'] + '/rerank'

			
 
				+        return embed_url, rerank_url

			
 
				+

			
 
				+    def _access_remote_model(self, url:str, data:dict):

			
 
				+        """调用bge-m3，embedding"""

			
 
				+        # 类型检查

			
 
				+        time.sleep(0.08)  # 方式频繁调用接口

			
 
				+        for attempt in range(self.max_retries):

			
 
				+            try:

			
 
				+                response = requests.post(url=url, headers=self.headers, json=data)

			
 
				+                if response.status_code == 200:

			
 
				+                    return np.array(response.json())

			
 
				+            except Exception as e:

			
 
				+                print('请求embedding模型失败', e)

			
 
				+                time.sleep(1)

			
 
				+                return None

			
 
				+        return None

			
 
				+

			
 
				+    def encode(self,texts: List[str], normalize: bool = True):

			
 
				+        """调用bge-m3，embedding"""

			
 
				+        # 类型检查

			
 
				+        if not isinstance(texts, list) and not isinstance(texts, str):

			
 
				+            raise TypeError("Text must be list or string",texts)

			
 
				+        if isinstance(texts, List):

			
 
				+            if not texts:

			
 
				+                raise ValueError("Text must not be empty",texts)

			
 
				+            for i, content in enumerate(texts):

			
 
				+                if not isinstance(content, str):

			
 
				+                    raise ValueError(f"Text must not be empty, pos:{i}, content{content}")

			
 
				+        data = {"inputs":texts, "normalize":normalize}

			
 
				+

			
 
				+        return self._access_remote_model(

			
 
				+            url=self.embedding_url,

			
 
				+            data=data

			
 
				+        )

			
 
				+

			
 
				+    def compute_score(self, pairs: List[Tuple[str, str]]):

			
 
				+        """调用远程bge-reranker计算相关性, 并按照原位置输出分数"""

			
 
				+        # 类型检查

			
 
				+        if not isinstance(pairs, list):

			
 
				+            raise TypeError("Pairs must be list",pairs)

			
 
				+

			
 
				+        if not pairs:

			
 
				+            raise ValueError("Pairs must not be empty",pairs)

			
 
				+

			
 
				+        if len(pairs[0]) != 2:

			
 
				+            raise ValueError("Pairs must not be empty",pairs)

			
 
				+        i = 0

			
 
				+        for j, k in pairs:

			
 
				+            if not isinstance(j, str) or not isinstance(k, str):

			
 
				+                raise TypeError(f"Elements of every pairs must not be str, pos:{i}, ({j}, {k})")

			
 
				+            i+=1

			
 
				+        # 判断pairs的每个query是否为一致

			
 
				+        if len(pairs) >= 3:

			
 
				+            for i in range(1, len(pairs), len(pairs) - 1):

			
 
				+                if pairs[i - 1][0] != pairs[i][0] or pairs[i-1][0] != pairs[i+1][0]:

			
 
				+                    raise ValueError("Pairs must have the same query", pairs)

			
 
				+        elif len(pairs) == 2:

			
 
				+            if pairs[0][0] != pairs[1][0]:

			
 
				+                raise ValueError("Pairs must have the same query", pairs)

			
 
				+        texts = [t for q, t in pairs]

			
 
				+        data = {

			
 
				+            "query": pairs[0][0],  # 对于bge-reranker，query字段可为空

			
 
				+            "texts": texts

			
 
				+        }

			
 
				+

			
 
				+        # 返回rerank结果

			
 
				+        res = self._access_remote_model(

			
 
				+            url=self.reranker_url,

			
 
				+            data=data

			
 
				+        )

			
 
				+        # 按照原有位置输出score

			
 
				+        score = [_["score"] for _ in sorted(res, key=lambda x: x["index"])]

			
 
				+        return score

			
 
				+

			
 
				+

			
 
				+if __name__ == "__main__":

			
 
				+    timeout = 3

			
 
				+    max_retries = 3

			
 
				+    bge_model = RemoteBGEModel('dev', timeout, max_retries)

			
 
				+    t = bge_model.encode(["hello"], normalize=True)

			
 
				+    tt = bge_model.compute_score([("你好呀我的名字叫做汤姆","今天世界杯中国得了冠军"),

			
 
				+                                  ("你好呀我的名字叫做汤姆","你好呀我的名字叫做山姆"),

			
 
				+                                  ("你好呀我的名字叫做汤姆","你好呀我的名字叫做汤姆？"),

			
 
				+                                  ("你好呀我的名字叫做汤姆","我今天非常的开心，你呢？")])

			
 
				+    # reranker = FlagReranker(os.path.join(script_dir, 'bge-reranker-v2-m3'), use_fp16=True, local_files_only=True,

			
 
				+    #                              devices=["cuda:0"])

			
 
				+    # ttt = reranker.compute_score([("你好呀我的名字叫做汤姆","今天世界杯中国得了冠军"),

			
 
				+    #                               ("你好呀我的名字叫做汤姆","你好呀我的名字叫做山姆"),

			
 
				+    #                               ("你好呀我的名字叫做汤姆","你好呀我的名字叫做汤姆？"),

			
 
				+    #                               ("你好呀我的名字叫做汤姆","我今天非常的开心，你呢？")], normalize=True)

			
 
				+    pass

			
--- a/url_config.json
+++ b/url_config.json
@@ -0,0 +1,8 @@
 
				+{

			
 
				+  "dev_embed_url": "http://101.200.76.30:8002",

			
 
				+  "dev_reranker_url": "http://101.200.76.30:8003",

			
 
				+  "master_embed_url": "http://101.200.76.30:8002",

			
 
				+  "master_reranker_url": "http://101.200.76.30:8003",

			
 
				+  "local_embed_url": "http://101.200.76.30:8002",

			
 
				+  "local_reranker_url": "http://101.200.76.30:8003"

			
 
				+}
			
--- a/user_maintain_dictionary/equivalent_words/dict_equivalent_wordmap.json
+++ b/user_maintain_dictionary/equivalent_words/dict_equivalent_wordmap.json
@@ -0,0 +1 @@
 
				+{"总回收率": ["总回收率", "回收率"], "回收率": ["总回收率", "回收率"], "总进水量": ["总进水量", "进水量"], "进水量": ["总进水量", "进水量"], "总产水电导": ["总产水电导", "产水电导"], "产水电导": ["总产水电导", "产水电导"], "总进水电导": ["总进水电导", "进水电导"], "进水电导": ["总进水电导", "进水电导"], "总产水压力": ["总产水压力", "产水压力"], "产水压力": ["总产水压力", "产水压力"], "总产水流量": ["总产水流量", "产水流量"], "产水流量": ["总产水流量", "产水流量"], "总进水流量": ["总进水流量", "进水流量"], "进水流量": ["总进水流量", "进水流量"], "电导": ["电导", "电导率"], "电导率": ["电导", "电导率"], "": [""]}
			
--- a/user_maintain_dictionary/equivalent_words/equivalent_wordmap.txt
+++ b/user_maintain_dictionary/equivalent_words/equivalent_wordmap.txt
@@ -0,0 +1,9 @@
 
				+总回收率=回收率

			
 
				+总进水量=进水量

			
 
				+总产水电导=产水电导

			
 
				+总进水电导=进水电导

			
 
				+总产水压力=产水压力

			
 
				+总产水流量=产水流量

			
 
				+总进水流量=进水流量

			
 
				+电导=电导率

			
 
				+

			
--- a/user_maintain_dictionary/jieba_words/A综合.txt
+++ b/user_maintain_dictionary/jieba_words/A综合.txt
@@ -0,0 +1,40 @@
 
				+ph 1000 nz

			
 
				+CIP 1000 nz

			
 
				+液位 1000 nz

			
 
				+温度 1000 nz

			
 
				+UF进水浊度 1000 nz

			
 
				+UF产水浊度 1000 nz

			
 
				+

			
 
				+供水泵 1000 nz

			
 
				+UF泵 1000 nz

			
 
				+反洗泵 1000 nz

			
 
				+UF反洗水泵 1000 nz

			
 
				+清水外供泵 1000 nz

			
 
				+清洗水泵 1000 nz

			
 
				+高压泵 1000 nz

			
 
				+加药泵 1000 nz

			
 
				+段间泵 1000 nz

			
 
				+卸料泵 1000 nz

			
 
				+

			
 
				+反洗膜通量 1000 nz

			
 
				+清洗膜通量 1000 nz

			
 
				+

			
 
				+自清洗过滤器 1000 nz

			
 
				+脱碳风机 1000 nz

			
 
				+

			
 
				+还原剂 1000 nz

			
 
				+阻垢剂 1000 nz

			
 
				+絮凝剂 1000 nz

			
 
				+盐酸 1000 nz

			
 
				+

			
 
				+清水池 1000 nz

			
 
				+中和池 1000 nz

			
 
				+

			
 
				+水温校正因子 1000 nz

			
 
				+

			
 
				+外供水 1000 nz

			
 
				+UF总产水 1000 nz

			
 
				+进水池 1000 nz

			
 
				+反洗水池 1000 nz

			
 
				+中荷废水 1000 nz

			
 
				+

			
--- a/user_maintain_dictionary/jieba_words/B产水率.txt
+++ b/user_maintain_dictionary/jieba_words/B产水率.txt
@@ -0,0 +1,9 @@
 
				+产水率 1000 nz

			
 
				+RO1产水率 1000 nz

			
 
				+RO2产水率 1000 nz

			
 
				+RO3产水率 1000 nz

			
 
				+RO4产水率 1000 nz

			
 
				+RO5产水率 1000 nz

			
 
				+RO6产水率 1000 nz

			
 
				+RO7产水率 1000 nz

			
 
				+RO8产水率 1000 nz
			
--- a/user_maintain_dictionary/jieba_words/B回收率.txt
+++ b/user_maintain_dictionary/jieba_words/B回收率.txt
@@ -0,0 +1,40 @@
 
				+回收率 1000 nz

			
 
				+产水率 1000 nz

			
 
				+运行回收率 1000 nz

			
 
				+RO回收率 1000 nz

			
 
				+RO总回收率 1000 nz

			
 
				+UF回收率 1000 nz

			
 
				+UF总回收率 1000 nz

			
 
				+RO1回收率 1000 nz

			
 
				+RO2回收率 1000 nz

			
 
				+RO3回收率 1000 nz

			
 
				+RO4回收率 1000 nz

			
 
				+RO5回收率 1000 nz

			
 
				+RO6回收率 1000 nz

			
 
				+RO7回收率 1000 nz

			
 
				+RO8回收率 1000 nz

			
 
				+RO1运行回收率 1000 nz

			
 
				+RO2运行回收率 1000 nz

			
 
				+RO3运行回收率 1000 nz

			
 
				+RO4运行回收率 1000 nz

			
 
				+RO5运行回收率 1000 nz

			
 
				+RO6运行回收率 1000 nz

			
 
				+RO7运行回收率 1000 nz

			
 
				+RO8运行回收率 1000 nz

			
 
				+UF1回收率 1000 nz

			
 
				+UF2回收率 1000 nz

			
 
				+UF3回收率 1000 nz

			
 
				+UF4回收率 1000 nz

			
 
				+UF5回收率 1000 nz

			
 
				+UF6回收率 1000 nz

			
 
				+UF7回收率 1000 nz

			
 
				+UF8回收率 1000 nz

			
 
				+UF1运行回收率 1000 nz

			
 
				+UF2运行回收率 1000 nz

			
 
				+UF3运行回收率 1000 nz

			
 
				+UF4运行回收率 1000 nz

			
 
				+UF5运行回收率 1000 nz

			
 
				+UF6运行回收率 1000 nz

			
 
				+UF7运行回收率 1000 nz

			
 
				+UF8运行回收率 1000 nz

			
 
				+

			
--- a/user_maintain_dictionary/jieba_words/B电导.txt
+++ b/user_maintain_dictionary/jieba_words/B电导.txt
@@ -0,0 +1,15 @@
 
				+电导 1000 nz

			
 
				+电导率 1000 nz

			
 
				+产水电导 1000 nz

			
 
				+RO1产水电导 1000 nz

			
 
				+RO2产水电导 1000 nz

			
 
				+RO3产水电导 1000 nz

			
 
				+RO4产水电导 1000 nz

			
 
				+RO5产水电导 1000 nz

			
 
				+RO6产水电导 1000 nz

			
 
				+RO7产水电导 1000 nz

			
 
				+RO8产水电导 1000 nz

			
 
				+RO总产水电导 1000 nz

			
 
				+RO产水电导 1000 nz

			
 
				+RO总进水电导 1000 nz

			
 
				+RO进水电导 1000 nz

			
--- a/user_maintain_dictionary/jieba_words/B脱盐率.txt
+++ b/user_maintain_dictionary/jieba_words/B脱盐率.txt
@@ -0,0 +1,5 @@
 
				+脱盐率 1000 nz

			
 
				+RO1脱盐率 1000 nz

			
 
				+RO2脱盐率 1000 nz

			
 
				+RO3脱盐率 1000 nz

			
 
				+RO4脱盐率 1000 nz
			
--- a/user_maintain_dictionary/jieba_words/C膜渗透率.txt
+++ b/user_maintain_dictionary/jieba_words/C膜渗透率.txt
@@ -0,0 +1,18 @@
 
				+膜渗透率 1000 nz

			
 
				+渗透率 1000 nz

			
 
				+UF1膜渗透率 1000 nz

			
 
				+UF2膜渗透率 1000 nz

			
 
				+UF3膜渗透率 1000 nz

			
 
				+UF4膜渗透率 1000 nz

			
 
				+UF5膜渗透率 1000 nz

			
 
				+UF6膜渗透率 1000 nz

			
 
				+UF7膜渗透率 1000 nz

			
 
				+UF8膜渗透率 1000 nz

			
 
				+UF1渗透率 1000 nz

			
 
				+UF2渗透率 1000 nz

			
 
				+UF3渗透率 1000 nz

			
 
				+UF4渗透率 1000 nz

			
 
				+UF5渗透率 1000 nz

			
 
				+UF6渗透率 1000 nz

			
 
				+UF7渗透率 1000 nz

			
 
				+UF8渗透率 1000 nz

			
--- a/user_maintain_dictionary/jieba_words/C膜通量.txt
+++ b/user_maintain_dictionary/jieba_words/C膜通量.txt
@@ -0,0 +1,53 @@
 
				+膜通量 1000 nz

			
 
				+膜运行通量 1000 nz

			
 
				+UF1膜运行通量 1000 nz

			
 
				+UF2膜运行通量 1000 nz

			
 
				+UF3膜运行通量 1000 nz

			
 
				+UF4膜运行通量 1000 nz

			
 
				+UF5膜运行通量 1000 nz

			
 
				+UF6膜运行通量 1000 nz

			
 
				+UF7膜运行通量 1000 nz

			
 
				+UF8膜运行通量 1000 nz

			
 
				+RO1膜运行通量 1000 nz

			
 
				+RO2膜运行通量 1000 nz

			
 
				+RO3膜运行通量 1000 nz

			
 
				+RO4膜运行通量 1000 nz

			
 
				+RO5膜运行通量 1000 nz

			
 
				+RO6膜运行通量 1000 nz

			
 
				+RO7膜运行通量 1000 nz

			
 
				+RO8膜运行通量 1000 nz

			
 
				+通量 1000 nz

			
 
				+UF1通量 1000 nz

			
 
				+UF2通量 1000 nz

			
 
				+UF3通量 1000 nz

			
 
				+UF4通量 1000 nz

			
 
				+UF5通量 1000 nz

			
 
				+UF6通量 1000 nz

			
 
				+UF7通量 1000 nz

			
 
				+UF8通量 1000 nz

			
 
				+RO1通量 1000 nz

			
 
				+RO2通量 1000 nz

			
 
				+RO3通量 1000 nz

			
 
				+RO4通量 1000 nz

			
 
				+RO5通量 1000 nz

			
 
				+RO6通量 1000 nz

			
 
				+RO7通量 1000 nz

			
 
				+RO8通量 1000 nz

			
 
				+UF1膜通量 1000 nz

			
 
				+UF2膜通量 1000 nz

			
 
				+UF3膜通量 1000 nz

			
 
				+UF4膜通量 1000 nz

			
 
				+UF5膜通量 1000 nz

			
 
				+UF6膜通量 1000 nz

			
 
				+UF7膜通量 1000 nz

			
 
				+UF8膜通量 1000 nz

			
 
				+RO1膜通量 1000 nz

			
 
				+RO2膜通量 1000 nz

			
 
				+RO3膜通量 1000 nz

			
 
				+RO4膜通量 1000 nz

			
 
				+RO5膜通量 1000 nz

			
 
				+RO6膜通量 1000 nz

			
 
				+RO7膜通量 1000 nz

			
 
				+RO8膜通量 1000 nz

			
 
				+

			
 
				+

			
--- a/user_maintain_dictionary/jieba_words/C跨膜压差.txt
+++ b/user_maintain_dictionary/jieba_words/C跨膜压差.txt
@@ -0,0 +1,9 @@
 
				+跨膜压差 1000 nz

			
 
				+UF1跨膜压差 1000 nz

			
 
				+UF2跨膜压差 1000 nz

			
 
				+UF3跨膜压差 1000 nz

			
 
				+UF4跨膜压差 1000 nz

			
 
				+UF5跨膜压差 1000 nz

			
 
				+UF6跨膜压差 1000 nz

			
 
				+UF7跨膜压差 1000 nz

			
 
				+UF8跨膜压差 1000 nz
			
--- a/user_maintain_dictionary/jieba_words/产水压力.txt
+++ b/user_maintain_dictionary/jieba_words/产水压力.txt
@@ -0,0 +1,21 @@
 
				+产水压力 1000

			
 
				+RO总产水压力 1000 nz

			
 
				+RO产水压力 1000 nz

			
 
				+RO1产水压力 1000 nz

			
 
				+RO2产水压力 1000 nz

			
 
				+RO3产水压力 1000 nz

			
 
				+RO4产水压力 1000 nz

			
 
				+RO5产水压力 1000 nz

			
 
				+RO6产水压力 1000 nz

			
 
				+RO7产水压力 1000 nz

			
 
				+RO8产水压力 1000 nz

			
 
				+UF总产水压力 1000 nz

			
 
				+UF产水压力 1000 nz

			
 
				+UF1产水压力 1000 nz

			
 
				+UF2产水压力 1000 nz

			
 
				+UF3产水压力 1000 nz

			
 
				+UF4产水压力 1000 nz

			
 
				+UF5产水压力 1000 nz

			
 
				+UF6产水压力 1000 nz

			
 
				+UF7产水压力 1000 nz

			
 
				+UF8产水压力 1000 nz

			
--- a/user_maintain_dictionary/jieba_words/产水流量.txt
+++ b/user_maintain_dictionary/jieba_words/产水流量.txt
@@ -0,0 +1,30 @@
 
				+产水流量 1000 nz

			
 
				+UF产水流量 1000 nz

			
 
				+UF总产水流量 1000 nz

			
 
				+RO产水流量 1000 nz

			
 
				+RO总产水流量 1000 nz

			
 
				+RO4一二段产水流量比值 1000 nz

			
 
				+RO3一二段产水流量比值 1000 nz

			
 
				+RO2一二段产水流量比值 1000 nz

			
 
				+RO1一二段产水流量比值 1000 nz

			
 
				+RO5一二段产水流量比值 1000 nz

			
 
				+RO6一二段产水流量比值 1000 nz

			
 
				+RO7一二段产水流量比值 1000 nz

			
 
				+RO8一二段产水流量比值 1000 nz

			
 
				+RO1产水流量 1000 nz

			
 
				+RO2产水流量 1000 nz

			
 
				+RO3产水流量 1000 nz

			
 
				+RO4产水流量 1000 nz

			
 
				+RO5产水流量 1000 nz

			
 
				+RO6产水流量 1000 nz

			
 
				+RO7产水流量 1000 nz

			
 
				+RO8产水流量 1000 nz

			
 
				+RO1反渗透二段产水流量 1000 nz

			
 
				+RO2反渗透二段产水流量 1000 nz

			
 
				+RO3反渗透二段产水流量 1000 nz

			
 
				+RO4反渗透二段产水流量 1000 nz

			
 
				+RO5反渗透二段产水流量 1000 nz

			
 
				+RO6反渗透二段产水流量 1000 nz

			
 
				+RO7反渗透二段产水流量 1000 nz

			
 
				+RO8反渗透二段产水流量 1000 nz

			
 
				+

			
--- a/user_maintain_dictionary/jieba_words/段压差.txt
+++ b/user_maintain_dictionary/jieba_words/段压差.txt
@@ -0,0 +1,27 @@
 
				+一段压差 1000 nz

			
 
				+二段压差 1000 nz

			
 
				+三段压差 1000 nz

			
 
				+RO1一段压差 1000 nz

			
 
				+RO1二段压差 1000 nz

			
 
				+RO1三段压差 1000 nz

			
 
				+RO2一段压差 1000 nz

			
 
				+RO2二段压差 1000 nz

			
 
				+RO2三段压差 1000 nz

			
 
				+RO3一段压差 1000 nz

			
 
				+RO3二段压差 1000 nz

			
 
				+RO3三段压差 1000 nz

			
 
				+RO4一段压差 1000 nz

			
 
				+RO4二段压差 1000 nz

			
 
				+RO4三段压差 1000 nz

			
 
				+RO5一段压差 1000 nz

			
 
				+RO5二段压差 1000 nz

			
 
				+RO5三段压差 1000 nz

			
 
				+RO6一段压差 1000 nz

			
 
				+RO6二段压差 1000 nz

			
 
				+RO6三段压差 1000 nz

			
 
				+RO7一段压差 1000 nz

			
 
				+RO7二段压差 1000 nz

			
 
				+RO7三段压差 1000 nz

			
 
				+RO8一段压差 1000 nz

			
 
				+RO8二段压差 1000 nz

			
 
				+RO8三段压差 1000 nz
			
--- a/user_maintain_dictionary/jieba_words/浓水压力.txt
+++ b/user_maintain_dictionary/jieba_words/浓水压力.txt
@@ -0,0 +1,18 @@
 
				+浓水压力 1000 nz

			
 
				+RO1一段浓水压力 1000 nz

			
 
				+RO1二段浓水压力 1000 nz

			
 
				+RO2一段浓水压力 1000 nz

			
 
				+RO2二段浓水压力 1000 nz

			
 
				+RO3一段浓水压力 1000 nz

			
 
				+RO3二段浓水压力 1000 nz

			
 
				+RO4一段浓水压力 1000 nz

			
 
				+RO4二段浓水压力 1000 nz

			
 
				+RO5一段浓水压力 1000 nz

			
 
				+RO5二段浓水压力 1000 nz

			
 
				+RO6一段浓水压力 1000 nz

			
 
				+RO6二段浓水压力 1000 nz

			
 
				+RO7一段浓水压力 1000 nz

			
 
				+RO7二段浓水压力 1000 nz

			
 
				+RO8一段浓水压力 1000 nz

			
 
				+RO8二段浓水压力 1000 nz

			
 
				+

			
--- a/user_maintain_dictionary/jieba_words/浓水流量.txt
+++ b/user_maintain_dictionary/jieba_words/浓水流量.txt
@@ -0,0 +1,6 @@
 
				+浓水流量 1000 nz

			
 
				+RO浓水流量 1000 nz

			
 
				+RO1反渗透浓水流量 1000 nz

			
 
				+RO2反渗透浓水流量 1000 nz

			
 
				+RO3反渗透浓水流量 1000 nz

			
 
				+RO4反渗透浓水流量 1000 nz

			
--- a/user_maintain_dictionary/jieba_words/进水压力.txt
+++ b/user_maintain_dictionary/jieba_words/进水压力.txt
@@ -0,0 +1,37 @@
 
				+进水压力 1000 nz

			
 
				+一段进水压力 1000 nz

			
 
				+二段进水压力 1000 nz

			
 
				+三段进水压力 1000 nz

			
 
				+RO1一段进水压力 1000 nz

			
 
				+RO2一段进水压力 1000 nz

			
 
				+RO3一段进水压力 1000 nz

			
 
				+RO4一段进水压力 1000 nz

			
 
				+RO5一段进水压力 1000 nz

			
 
				+RO6一段进水压力 1000 nz

			
 
				+RO7一段进水压力 1000 nz

			
 
				+RO8一段进水压力 1000 nz

			
 
				+RO1二段进水压力 1000 nz

			
 
				+RO2二段进水压力 1000 nz

			
 
				+RO3二段进水压力 1000 nz

			
 
				+RO4二段进水压力 1000 nz

			
 
				+RO5二段进水压力 1000 nz

			
 
				+RO6二段进水压力 1000 nz

			
 
				+RO7二段进水压力 1000 nz

			
 
				+RO8二段进水压力 1000 nz

			
 
				+RO1三段进水压力 1000 nz

			
 
				+RO2三段进水压力 1000 nz

			
 
				+RO3三段进水压力 1000 nz

			
 
				+RO4三段进水压力 1000 nz

			
 
				+RO5三段进水压力 1000 nz

			
 
				+RO6三段进水压力 1000 nz

			
 
				+RO7三段进水压力 1000 nz

			
 
				+RO8三段进水压力 1000 nz

			
 
				+

			
 
				+UF1进水压力 1000 nz

			
 
				+UF2进水压力 1000 nz

			
 
				+UF3进水压力 1000 nz

			
 
				+UF4进水压力 1000 nz

			
 
				+UF5进水压力 1000 nz

			
 
				+UF6进水压力 1000 nz

			
 
				+UF7进水压力 1000 nz

			
 
				+UF8进水压力 1000 nz

			
--- a/user_maintain_dictionary/jieba_words/进水流量.txt
+++ b/user_maintain_dictionary/jieba_words/进水流量.txt
@@ -0,0 +1,24 @@
 
				+UF总进水量 1000 nz

			
 
				+UF进水量 1000 nz

			
 
				+进水流量 1000 nz

			
 
				+RO总进水流量 1000 nz

			
 
				+RO进水流量 1000 nz

			
 
				+总进水流量 1000 nz

			
 
				+UF总进水流量 1000 nz

			
 
				+UF进水流量 1000 nz

			
 
				+RO1反渗透进水流量 1000 nz

			
 
				+RO2反渗透进水流量 1000 nz

			
 
				+RO3反渗透进水流量 1000 nz

			
 
				+RO4反渗透进水流量 1000 nz

			
 
				+RO5反渗透进水流量 1000 nz

			
 
				+RO6反渗透进水流量 1000 nz

			
 
				+RO7反渗透进水流量 1000 nz

			
 
				+RO8反渗透进水流量 1000 nz

			
 
				+UF1进水流量 1000 nz

			
 
				+UF2进水流量 1000 nz

			
 
				+UF3进水流量 1000 nz

			
 
				+UF4进水流量 1000 nz

			
 
				+UF5进水流量 1000 nz

			
 
				+UF6进水流量 1000 nz

			
 
				+UF7进水流量 1000 nz

			
 
				+UF8进水流量 1000 nz