- user_agent 拆分 application/locations/knowledge/response/review 四个子模块,接入申请位置语义与关联草稿分支 - steward planner/runtime/slot/plan_builder 决策链路重构,travel_reimbursement_calculator/orchestrator_expense_query 适配 - ocr/document_preview/document_intelligence/receipt_folder 复用预览与资产缓存,expense_claim_draft_flow/application_handoff 适配 - pyproject.toml 新增依赖,paddleocr bootstrap 脚本与 server_start.sh 调整 - 更新差旅/交通/通信等财务规则表,同步 document_intelligence/ocr/receipt_folder/user_agent 等测试
736 lines
28 KiB
Python
736 lines
28 KiB
Python
from __future__ import annotations
|
||
|
||
import re
|
||
from typing import Any
|
||
|
||
from app.schemas.user_agent import UserAgentCitation, UserAgentRequest
|
||
from app.services.user_agent_knowledge_helpers import UserAgentKnowledgeHelpersMixin
|
||
from app.services.user_agent_knowledge_constants import (
|
||
KNOWLEDGE_ARTICLE_PATTERN,
|
||
KNOWLEDGE_DIRECT_ANSWER_HINTS,
|
||
KNOWLEDGE_LIST_ITEM_PATTERN,
|
||
KNOWLEDGE_NUMBERED_ITEM_PATTERN,
|
||
KNOWLEDGE_QUERY_STOPWORDS,
|
||
KNOWLEDGE_SECTION_HEADING_PATTERN,
|
||
MAX_KNOWLEDGE_DIRECT_EVIDENCE,
|
||
MAX_KNOWLEDGE_MODEL_HITS,
|
||
MAX_KNOWLEDGE_QUERY_TERMS,
|
||
)
|
||
|
||
|
||
class UserAgentKnowledgeMixin(UserAgentKnowledgeHelpersMixin):
|
||
@staticmethod
|
||
def _build_model_tool_payload(
|
||
tool_payload: dict[str, Any],
|
||
*,
|
||
question: str | None = None,
|
||
) -> dict[str, Any]:
|
||
normalized = dict(tool_payload or {})
|
||
hits = []
|
||
for item in UserAgentKnowledgeMixin._select_knowledge_model_hits(
|
||
tool_payload,
|
||
question=question,
|
||
):
|
||
if not isinstance(item, dict):
|
||
continue
|
||
hits.append(
|
||
{
|
||
"title": str(item.get("title") or "").strip(),
|
||
"document_name": str(item.get("document_name") or "").strip(),
|
||
"excerpt": str(item.get("excerpt") or "").strip(),
|
||
"content": str(item.get("content") or "").strip()[:1200],
|
||
"tags": list(item.get("tags") or [])[:5],
|
||
"evidence": list(item.get("evidence") or [])[:3],
|
||
"code": str(item.get("code") or "").strip(),
|
||
}
|
||
)
|
||
normalized["hits"] = hits
|
||
return normalized
|
||
|
||
@staticmethod
|
||
def _build_knowledge_evidence_blocks(
|
||
tool_payload: dict[str, Any],
|
||
*,
|
||
question: str | None = None,
|
||
) -> str:
|
||
blocks: list[str] = []
|
||
for index, item in enumerate(
|
||
UserAgentKnowledgeMixin._select_knowledge_model_hits(
|
||
tool_payload,
|
||
question=question,
|
||
)[:3],
|
||
start=1,
|
||
):
|
||
if not isinstance(item, dict):
|
||
continue
|
||
title = str(item.get("title") or item.get("document_name") or f"证据 {index}").strip()
|
||
code = str(item.get("code") or "").strip()
|
||
content = str(item.get("content") or "").strip()
|
||
if not content:
|
||
continue
|
||
blocks.append(
|
||
"\n".join(
|
||
[
|
||
f"[证据 {index}] {title}" + (f" ({code})" if code else ""),
|
||
"```text",
|
||
content[:1200],
|
||
"```",
|
||
]
|
||
)
|
||
)
|
||
return "\n\n".join(blocks)
|
||
|
||
def _build_fast_knowledge_answer(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
*,
|
||
citations: list[UserAgentCitation],
|
||
) -> str | None:
|
||
if payload.ontology.scenario != "knowledge":
|
||
return None
|
||
if str(payload.tool_payload.get("result_type") or "").strip() != "knowledge_search":
|
||
return None
|
||
|
||
evidence_items = self._build_knowledge_answer_evidence(payload)
|
||
if not evidence_items:
|
||
return None
|
||
|
||
question = self._resolve_knowledge_question(payload)
|
||
if not self._should_use_direct_knowledge_answer(question, evidence_items):
|
||
return None
|
||
|
||
return self._render_knowledge_direct_answer(
|
||
payload,
|
||
citations=citations,
|
||
evidence_items=evidence_items,
|
||
)
|
||
|
||
|
||
def _render_knowledge_direct_answer(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
*,
|
||
citations: list[UserAgentCitation],
|
||
evidence_items: list[dict[str, Any]],
|
||
) -> str | None:
|
||
if not evidence_items:
|
||
return None
|
||
|
||
title = str(
|
||
(citations[0].title if citations else "")
|
||
or evidence_items[0].get("title")
|
||
or "相关制度"
|
||
).strip()
|
||
user_name = str(payload.context_json.get("name") or "").strip()
|
||
question = self._resolve_knowledge_question(payload)
|
||
query_terms = self._extract_knowledge_query_terms(question)
|
||
ordered_evidence_items = self._prioritize_knowledge_evidence_items(question, evidence_items)
|
||
primary_item = ordered_evidence_items[0]
|
||
primary_heading = self._format_knowledge_heading_label(
|
||
str(primary_item.get("heading") or "").strip()
|
||
)
|
||
primary_lines = self._collect_direct_knowledge_answer_lines(
|
||
ordered_evidence_items,
|
||
query_terms=query_terms,
|
||
)
|
||
|
||
lines: list[str] = []
|
||
if user_name:
|
||
lines.append(f"{user_name},您好。")
|
||
source_prefix = f"根据《{title}》"
|
||
if primary_heading:
|
||
source_prefix = f"{source_prefix}({primary_heading})"
|
||
|
||
conclusion_lines: list[str] = []
|
||
evidence_lines: list[str] = []
|
||
if str(primary_item.get("kind") or "") == "table":
|
||
table_content = str(primary_item.get("content") or "")
|
||
if self._question_requests_broad_knowledge_table(question):
|
||
table_preview = table_content.strip()
|
||
else:
|
||
table_preview = self._extract_relevant_table_preview(
|
||
table_content,
|
||
query_terms,
|
||
preferred_terms=self._build_knowledge_table_preferred_terms(payload),
|
||
)
|
||
table_summary = self._summarize_knowledge_table_preview(table_preview)
|
||
conclusion_lines.append(f"{source_prefix},{table_summary}")
|
||
evidence_lines.append(table_preview)
|
||
else:
|
||
if not primary_lines:
|
||
summary = self._summarize_knowledge_evidence_content(primary_item, query_terms)
|
||
conclusion_lines.append(
|
||
f"{source_prefix},当前能直接确认的是:"
|
||
f"{summary}"
|
||
)
|
||
elif len(primary_lines) == 1:
|
||
conclusion_lines.append(f"{source_prefix},当前能直接确认的是:{primary_lines[0].strip()}")
|
||
evidence_lines.extend(primary_lines)
|
||
else:
|
||
subject = self._build_knowledge_answer_subject(question, primary_heading)
|
||
summary = self._summarize_knowledge_lines_conclusion(
|
||
primary_lines,
|
||
heading=subject,
|
||
)
|
||
if summary:
|
||
conclusion_lines.append(f"{source_prefix},{summary}")
|
||
else:
|
||
conclusion_lines.append(f"{source_prefix},当前能直接确认的是:")
|
||
evidence_lines.extend(primary_lines)
|
||
|
||
notes: list[str] = []
|
||
location_note = self._build_missing_location_grounding_note(question, evidence_items)
|
||
if location_note:
|
||
notes.append(location_note)
|
||
if self._question_requires_explicit_condition(question) and not self._answer_evidence_has_numeric_or_condition(evidence_items):
|
||
notes.append("当前命中的证据更偏规则说明或流程约束,还没有直接给出可立即套用的数值或完整条件。")
|
||
|
||
self._append_markdown_section(lines, "结论", conclusion_lines)
|
||
self._append_markdown_section(lines, "依据", evidence_lines)
|
||
if notes:
|
||
self._append_markdown_section(lines, "说明", [f"- {note}" for note in notes])
|
||
|
||
return "\n".join(line for line in lines if line is not None).strip()
|
||
|
||
|
||
@staticmethod
|
||
def _append_markdown_section(lines: list[str], title: str, body_lines: list[str]) -> None:
|
||
cleaned = [str(line or "").rstrip() for line in body_lines if str(line or "").strip()]
|
||
if not cleaned:
|
||
return
|
||
if lines and lines[-1] != "":
|
||
lines.append("")
|
||
lines.append(f"## {title}")
|
||
lines.append("")
|
||
lines.extend(cleaned)
|
||
|
||
|
||
@staticmethod
|
||
def _build_knowledge_answer_subject(question: str, heading: str = "") -> str:
|
||
clean_heading = str(heading or "").strip()
|
||
if clean_heading and not any(
|
||
marker in clean_heading
|
||
for marker in ("问答线索补充", "结构化表格补充", "重点章节摘录", "章节导航")
|
||
):
|
||
return clean_heading
|
||
|
||
normalized = re.sub(r"\s+", "", str(question or "").strip())
|
||
normalized = re.sub(r"[??。.!!]+$", "", normalized)
|
||
normalized = re.sub(r"(是什么|有哪些|是多少|如何|怎么|吗|呢)$", "", normalized)
|
||
return normalized.strip("::,,。.")
|
||
|
||
|
||
@staticmethod
|
||
def _build_knowledge_table_preferred_terms(payload: UserAgentRequest) -> list[str]:
|
||
terms: list[str] = []
|
||
context = payload.context_json or {}
|
||
for key in ("grade", "position", "job_grade", "rank", "level"):
|
||
value = str(context.get(key) or "").strip()
|
||
if value and value not in terms:
|
||
terms.append(value)
|
||
|
||
grade_match = re.fullmatch(r"[Pp](\d{1,2})", str(context.get("grade") or "").strip())
|
||
if grade_match:
|
||
grade = int(grade_match.group(1))
|
||
for start in range(max(0, grade - 4), grade + 1):
|
||
for end in range(grade, min(12, grade + 4) + 1):
|
||
if start >= end:
|
||
continue
|
||
for separator in ("~", "~", "-", "至"):
|
||
term = f"P{start}{separator}P{end}"
|
||
if term not in terms:
|
||
terms.append(term)
|
||
return terms
|
||
|
||
|
||
@staticmethod
|
||
def _resolve_knowledge_question(payload: UserAgentRequest) -> str:
|
||
return str(payload.context_json.get("user_input_text") or payload.message or "").strip()
|
||
|
||
|
||
@staticmethod
|
||
def _looks_like_structured_knowledge_query(question: str) -> bool:
|
||
normalized = str(question or "").strip()
|
||
if not normalized:
|
||
return False
|
||
return any(keyword in normalized for keyword in KNOWLEDGE_DIRECT_ANSWER_HINTS)
|
||
|
||
|
||
def _should_use_direct_knowledge_answer(
|
||
self,
|
||
question: str,
|
||
evidence_items: list[dict[str, Any]],
|
||
) -> bool:
|
||
if not evidence_items:
|
||
return False
|
||
if self._looks_like_structured_knowledge_query(question):
|
||
return True
|
||
return str(evidence_items[0].get("kind") or "") in {"table", "kv", "list", "clause"}
|
||
|
||
|
||
def _build_knowledge_answer_evidence(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
) -> list[dict[str, Any]]:
|
||
question = self._resolve_knowledge_question(payload)
|
||
query_terms = self._extract_knowledge_query_terms(question)
|
||
candidates: list[dict[str, Any]] = []
|
||
|
||
for hit in self._select_knowledge_model_hits(
|
||
payload.tool_payload,
|
||
question=question,
|
||
):
|
||
if not isinstance(hit, dict):
|
||
continue
|
||
candidates.extend(self._extract_knowledge_evidence_candidates(hit, query_terms))
|
||
|
||
deduped: list[dict[str, Any]] = []
|
||
seen: set[tuple[str, str, str]] = set()
|
||
ranked_candidates = sorted(
|
||
candidates,
|
||
key=lambda value: (
|
||
float(value.get("score") or 0),
|
||
-len(str(value.get("content") or "")),
|
||
),
|
||
reverse=True,
|
||
)
|
||
top_score = float(ranked_candidates[0].get("score") or 0) if ranked_candidates else 0.0
|
||
|
||
for item in ranked_candidates:
|
||
score = float(item.get("score") or 0)
|
||
if deduped and score < max(6.0, top_score - 14):
|
||
continue
|
||
key = (
|
||
str(item.get("title") or "").strip(),
|
||
str(item.get("heading") or "").strip(),
|
||
self._clean_knowledge_segment_text(str(item.get("content") or ""))[:180],
|
||
)
|
||
if key in seen:
|
||
continue
|
||
seen.add(key)
|
||
deduped.append(item)
|
||
if len(deduped) >= MAX_KNOWLEDGE_DIRECT_EVIDENCE:
|
||
break
|
||
return deduped
|
||
|
||
|
||
def _extract_knowledge_evidence_candidates(
|
||
self,
|
||
hit: dict[str, Any],
|
||
query_terms: list[str],
|
||
) -> list[dict[str, Any]]:
|
||
title = str(hit.get("title") or hit.get("document_name") or "相关制度").strip()
|
||
content = str(hit.get("content") or "").strip()
|
||
if not content:
|
||
return []
|
||
|
||
raw_candidates = self._merge_knowledge_lead_in_segments(
|
||
self._split_knowledge_hit_into_segments(content)
|
||
)
|
||
candidates: list[dict[str, Any]] = []
|
||
for item in raw_candidates:
|
||
score = self._score_knowledge_evidence_candidate(item, query_terms)
|
||
if query_terms and score <= 0:
|
||
continue
|
||
normalized = dict(item)
|
||
normalized["title"] = title
|
||
normalized["score"] = score
|
||
candidates.append(normalized)
|
||
|
||
if candidates:
|
||
return candidates
|
||
|
||
fallback_text = str(hit.get("excerpt") or "").strip() or self._extract_excerpt(content)
|
||
if not fallback_text:
|
||
return []
|
||
return [
|
||
{
|
||
"title": title,
|
||
"heading": "",
|
||
"kind": "paragraph",
|
||
"content": fallback_text,
|
||
"score": 1,
|
||
}
|
||
]
|
||
|
||
|
||
def _merge_knowledge_lead_in_segments(
|
||
self,
|
||
segments: list[dict[str, str]],
|
||
) -> list[dict[str, str]]:
|
||
if not segments:
|
||
return []
|
||
|
||
merged: list[dict[str, str]] = []
|
||
index = 0
|
||
while index < len(segments):
|
||
current = dict(segments[index])
|
||
if not self._is_knowledge_lead_in_segment(current):
|
||
merged.append(current)
|
||
index += 1
|
||
continue
|
||
|
||
base_heading = str(current.get("heading") or "").strip()
|
||
current_marker = self._extract_knowledge_marker_family(str(current.get("content") or ""))
|
||
follow_segments: list[dict[str, str]] = []
|
||
next_index = index + 1
|
||
|
||
while next_index < len(segments):
|
||
candidate = segments[next_index]
|
||
if str(candidate.get("heading") or "").strip() != base_heading:
|
||
break
|
||
|
||
candidate_kind = str(candidate.get("kind") or "").strip()
|
||
candidate_content = str(candidate.get("content") or "").strip()
|
||
candidate_marker = self._extract_knowledge_marker_family(candidate_content)
|
||
if not candidate_content or candidate_kind == "table":
|
||
break
|
||
if current_marker and candidate_marker == current_marker:
|
||
break
|
||
if self._is_knowledge_lead_in_segment(candidate) and follow_segments:
|
||
break
|
||
if candidate_kind not in {"list", "paragraph", "kv", "clause"}:
|
||
break
|
||
|
||
follow_segments.append(candidate)
|
||
next_index += 1
|
||
if len(follow_segments) >= 4:
|
||
break
|
||
if candidate_kind == "paragraph" and len(candidate_content) >= 200:
|
||
break
|
||
|
||
if follow_segments:
|
||
current["content"] = "\n".join(
|
||
[str(current.get("content") or "").strip()]
|
||
+ [str(item.get("content") or "").strip() for item in follow_segments]
|
||
)
|
||
if any(str(item.get("kind") or "").strip() == "list" for item in follow_segments):
|
||
current["kind"] = "list"
|
||
merged.append(current)
|
||
index = next_index
|
||
continue
|
||
|
||
merged.append(current)
|
||
index += 1
|
||
|
||
return merged
|
||
|
||
|
||
def _split_knowledge_hit_into_segments(self, content: str) -> list[dict[str, str]]:
|
||
segments: list[dict[str, str]] = []
|
||
markdown_headings: list[str] = []
|
||
section_heading = ""
|
||
paragraph_lines: list[str] = []
|
||
table_lines: list[str] = []
|
||
|
||
def current_heading() -> str:
|
||
heading_parts = [item for item in markdown_headings if item]
|
||
if section_heading:
|
||
heading_parts.append(section_heading)
|
||
return " > ".join(heading_parts)
|
||
|
||
def flush_paragraph() -> None:
|
||
nonlocal paragraph_lines
|
||
if not paragraph_lines:
|
||
return
|
||
merged = " ".join(line.strip() for line in paragraph_lines if line.strip()).strip()
|
||
paragraph_lines = []
|
||
if merged:
|
||
segments.append(
|
||
{
|
||
"heading": current_heading(),
|
||
"kind": "paragraph",
|
||
"content": merged,
|
||
}
|
||
)
|
||
|
||
def flush_table() -> None:
|
||
nonlocal table_lines
|
||
if not table_lines:
|
||
return
|
||
merged = "\n".join(line.rstrip() for line in table_lines if line.strip()).strip()
|
||
table_lines = []
|
||
if merged:
|
||
segments.append(
|
||
{
|
||
"heading": current_heading(),
|
||
"kind": "table",
|
||
"content": merged,
|
||
}
|
||
)
|
||
|
||
for raw_line in str(content or "").replace("\r\n", "\n").replace("\r", "\n").splitlines():
|
||
line = raw_line.rstrip()
|
||
stripped = line.strip()
|
||
|
||
if not stripped:
|
||
flush_paragraph()
|
||
flush_table()
|
||
continue
|
||
|
||
markdown_heading_match = re.match(r"^(#{1,6})\s+(.+)$", stripped)
|
||
if markdown_heading_match:
|
||
flush_paragraph()
|
||
flush_table()
|
||
level = len(markdown_heading_match.group(1))
|
||
heading_text = markdown_heading_match.group(2).strip()
|
||
markdown_headings = markdown_headings[: max(0, level - 1)]
|
||
markdown_headings.append(heading_text)
|
||
section_heading = ""
|
||
continue
|
||
|
||
if KNOWLEDGE_SECTION_HEADING_PATTERN.match(stripped) and len(stripped) <= 90:
|
||
flush_paragraph()
|
||
flush_table()
|
||
section_heading = stripped.lstrip("#").strip()
|
||
continue
|
||
|
||
if stripped.count("|") >= 2 and "|" in stripped:
|
||
flush_paragraph()
|
||
table_lines.append(stripped)
|
||
continue
|
||
|
||
flush_table()
|
||
|
||
if KNOWLEDGE_LIST_ITEM_PATTERN.match(stripped):
|
||
flush_paragraph()
|
||
segments.append(
|
||
{
|
||
"heading": current_heading(),
|
||
"kind": "list",
|
||
"content": stripped,
|
||
}
|
||
)
|
||
continue
|
||
|
||
if KNOWLEDGE_NUMBERED_ITEM_PATTERN.match(stripped):
|
||
flush_paragraph()
|
||
segments.append(
|
||
{
|
||
"heading": current_heading(),
|
||
"kind": "list",
|
||
"content": stripped,
|
||
}
|
||
)
|
||
continue
|
||
|
||
if KNOWLEDGE_ARTICLE_PATTERN.match(stripped):
|
||
flush_paragraph()
|
||
segments.append(
|
||
{
|
||
"heading": current_heading(),
|
||
"kind": "clause",
|
||
"content": stripped,
|
||
}
|
||
)
|
||
continue
|
||
|
||
if (":" in stripped or ":" in stripped) and len(stripped) <= 180:
|
||
flush_paragraph()
|
||
segments.append(
|
||
{
|
||
"heading": current_heading(),
|
||
"kind": "kv",
|
||
"content": stripped,
|
||
}
|
||
)
|
||
continue
|
||
|
||
paragraph_lines.append(stripped)
|
||
|
||
flush_paragraph()
|
||
flush_table()
|
||
return segments
|
||
|
||
|
||
def _render_knowledge_evidence_text(self, item: dict[str, Any]) -> str:
|
||
lines = self._split_clean_knowledge_lines(
|
||
str(item.get("content") or ""),
|
||
preserve_marker=True,
|
||
)
|
||
if not lines:
|
||
return ""
|
||
if len(lines) == 1:
|
||
return self._clean_knowledge_segment_text(lines[0])
|
||
return "\n".join(f" {line}" for line in lines)
|
||
|
||
|
||
def _collect_direct_knowledge_answer_lines(
|
||
self,
|
||
ordered_evidence_items: list[dict[str, Any]],
|
||
*,
|
||
query_terms: list[str] | None = None,
|
||
) -> list[str]:
|
||
if not ordered_evidence_items:
|
||
return []
|
||
|
||
primary_item = ordered_evidence_items[0]
|
||
primary_title = str(primary_item.get("title") or "").strip()
|
||
primary_heading = str(primary_item.get("heading") or "").strip()
|
||
primary_kind = str(primary_item.get("kind") or "").strip()
|
||
|
||
related_items = [primary_item]
|
||
if primary_kind != "table":
|
||
for item in ordered_evidence_items[1:]:
|
||
if len(related_items) >= 3:
|
||
break
|
||
if str(item.get("kind") or "").strip() != primary_kind:
|
||
continue
|
||
if str(item.get("title") or "").strip() != primary_title:
|
||
continue
|
||
if str(item.get("heading") or "").strip() != primary_heading:
|
||
continue
|
||
related_items.append(item)
|
||
|
||
lines: list[str] = []
|
||
seen: set[str] = set()
|
||
for item in related_items:
|
||
item_kind = str(item.get("kind") or "").strip()
|
||
item_content = str(item.get("content") or "")
|
||
if item_kind == "paragraph" or self._has_inline_numbered_knowledge_items(item_content):
|
||
rendered = self._focus_knowledge_segment_content(
|
||
item_content,
|
||
query_terms or [],
|
||
)
|
||
rendered_lines = self._split_inline_numbered_knowledge_items(rendered)
|
||
else:
|
||
rendered = self._render_knowledge_evidence_text(item)
|
||
rendered_lines = rendered.splitlines()
|
||
for line in rendered_lines:
|
||
normalized = str(line or "").strip()
|
||
if not normalized or normalized in seen:
|
||
continue
|
||
seen.add(normalized)
|
||
lines.append(line)
|
||
return lines
|
||
|
||
|
||
def _summarize_knowledge_evidence_content(
|
||
self,
|
||
item: dict[str, Any],
|
||
query_terms: list[str],
|
||
) -> str:
|
||
kind = str(item.get("kind") or "").strip()
|
||
content = str(item.get("content") or "").strip()
|
||
if kind == "table":
|
||
preview = self._extract_relevant_table_preview(content, query_terms)
|
||
preview_rows = [line for line in preview.splitlines() if line.strip()][:4]
|
||
if len(preview_rows) >= 3:
|
||
return "当前命中的直接依据是一张与问题强相关的标准表,已摘出最相关的表头和行。"
|
||
return "当前命中的直接依据是一张与问题强相关的标准表。"
|
||
lines = self._split_clean_knowledge_lines(content, preserve_marker=True)
|
||
if len(lines) >= 2:
|
||
return self._clean_knowledge_segment_text(f"{lines[0]} {' '.join(lines[1:4])}")
|
||
return self._clean_knowledge_segment_text(content)
|
||
|
||
|
||
def _build_missing_location_grounding_note(
|
||
self,
|
||
question: str,
|
||
evidence_items: list[dict[str, Any]],
|
||
) -> str:
|
||
location = self._extract_query_location(question)
|
||
if not location:
|
||
return ""
|
||
|
||
haystack = "\n".join(
|
||
str(item.get("heading") or "") + "\n" + str(item.get("content") or "")
|
||
for item in evidence_items
|
||
)
|
||
if location in haystack:
|
||
return ""
|
||
return (
|
||
f"当前命中的制度依据没有直接写出“{location}”对应的地区档位或映射关系,"
|
||
"因此不能直接把它套用到表格中的某一列。"
|
||
)
|
||
|
||
|
||
def _build_knowledge_search_answer(
|
||
self,
|
||
payload: UserAgentRequest,
|
||
citations: list[UserAgentCitation],
|
||
) -> str:
|
||
hits = [item for item in list(payload.tool_payload.get("hits") or []) if isinstance(item, dict)]
|
||
evidence_items = self._build_knowledge_answer_evidence(payload)
|
||
primary_citation = citations[0] if citations else None
|
||
title = str(
|
||
(primary_citation.title if primary_citation else "")
|
||
or (hits[0].get("title") if hits else "")
|
||
or "相关制度"
|
||
).strip()
|
||
user_name = str(payload.context_json.get("name") or "").strip()
|
||
answer_lines: list[str] = []
|
||
if user_name:
|
||
answer_lines.append(f"{user_name},您好。")
|
||
if not hits:
|
||
self._append_markdown_section(
|
||
answer_lines,
|
||
"结论",
|
||
[f"当前没有拿到可用于回答这个问题的《{title}》知识库命中。"],
|
||
)
|
||
self._append_markdown_section(
|
||
answer_lines,
|
||
"说明",
|
||
["- 我不会用相似主题或外部常识硬凑答案;请补充更具体的关键词后再试一次。"],
|
||
)
|
||
return "\n".join(answer_lines).strip()
|
||
|
||
evidence_lines: list[str] = []
|
||
for item in evidence_items[:3]:
|
||
heading = str(item.get("heading") or "").strip()
|
||
if "表格行级检索线索" in heading:
|
||
heading = heading.replace("表格行级检索线索", "").strip(" >")
|
||
heading_text = f"({heading})" if heading else ""
|
||
item_title = item.get("title") or title
|
||
if str(item.get("kind") or "") == "table":
|
||
preview = self._extract_relevant_table_preview(
|
||
str(item.get("content") or ""),
|
||
self._extract_knowledge_query_terms(self._resolve_knowledge_question(payload)),
|
||
)
|
||
evidence_lines.append(f"- **《{item_title}》** {heading_text}\n{preview}")
|
||
continue
|
||
rendered = self._render_knowledge_evidence_text(item)
|
||
if rendered:
|
||
if "\n" in rendered:
|
||
evidence_lines.append(f"- **《{item_title}》** {heading_text}\n{rendered}")
|
||
else:
|
||
evidence_lines.append(f"- **《{item_title}》** {heading_text}\n {rendered}")
|
||
|
||
if not evidence_lines:
|
||
for item in hits[:2]:
|
||
item_title = str(item.get("title") or item.get("document_name") or "相关制度").strip()
|
||
excerpt = (
|
||
str(item.get("excerpt") or "").strip()
|
||
or self._extract_excerpt(str(item.get("content") or ""))
|
||
)
|
||
if not excerpt:
|
||
continue
|
||
evidence_lines.append(f"- **《{item_title}》**:{excerpt}")
|
||
|
||
if not evidence_lines:
|
||
self._append_markdown_section(
|
||
answer_lines,
|
||
"结论",
|
||
[f"当前《{title}》里可用于回答这个问题的关键条款还不够明确。"],
|
||
)
|
||
self._append_markdown_section(
|
||
answer_lines,
|
||
"说明",
|
||
["- 请补充费用类型、适用地区、职级或具体业务场景,我会继续帮您缩小范围。"],
|
||
)
|
||
return "\n".join(answer_lines).strip()
|
||
|
||
self._append_markdown_section(
|
||
answer_lines,
|
||
"结论",
|
||
["我先根据当前制度依据给出可以确认的部分。"],
|
||
)
|
||
self._append_markdown_section(answer_lines, "依据", evidence_lines)
|
||
self._append_markdown_section(
|
||
answer_lines,
|
||
"说明",
|
||
["- 以上只使用当前命中的知识库证据;没有在证据中出现的适用条件或金额,我不会替您默认补齐。"],
|
||
)
|
||
return "\n".join(answer_lines).strip()
|
||
|