from __future__ import annotations import re KNOWLEDGE_DIRECT_ANSWER_HINTS = ( "是什么", "介绍", "说明", "概述", "标准", "限额", "流程", "条件", "规则", "怎么", "如何", "哪些", "需要", "是否", "区别", "范围", "额度", "金额", "多少", "多少钱", "上限", ) KNOWLEDGE_QUERY_STOPWORDS = { "什么", "多少", "哪些", "怎么", "如何", "请问", "一下", "关于", "规定", "标准", "可以", "是否", "一个", "哪些人", "目前", "当前", "一下子", } MAX_KNOWLEDGE_QUERY_TERMS = 12 MAX_KNOWLEDGE_DIRECT_EVIDENCE = 4 MAX_KNOWLEDGE_MODEL_HITS = 5 KNOWLEDGE_SECTION_HEADING_PATTERN = re.compile( r"^(#\s*.+|##\s*.+|###\s*.+|第[一二三四五六七八九十百零0-9]+[部分章节条]\s*.*|[一二三四五六七八九十]+、.*|([一二三四五六七八九十]+).*|\([一二三四五六七八九十]+\).*)$" ) KNOWLEDGE_LIST_ITEM_PATTERN = re.compile(r"^[-*•]\s+.+$") KNOWLEDGE_NUMBERED_ITEM_PATTERN = re.compile( r"^(?:(?:\d+[.)、])|(?:[((][一二三四五六七八九十百零0-9]+[))])|[①②③④⑤⑥⑦⑧⑨⑩])\s*.+$" ) KNOWLEDGE_ARTICLE_PATTERN = re.compile(r"^(第[一二三四五六七八九十百零0-9]+条)\s*.*$")