From 127d603e7d860a308b0b50d24277e7575b80e19f Mon Sep 17 00:00:00 2001 From: caoxiaozhu Date: Thu, 18 Jun 2026 22:12:00 +0800 Subject: [PATCH] =?UTF-8?q?feat(ontology):=20=E4=BB=85=E6=94=BE=E8=A1=8C?= =?UTF-8?q?=E8=B4=A2=E5=8A=A1=E4=B8=9A=E5=8A=A1=E7=9B=B8=E5=85=B3=E9=97=AE?= =?UTF-8?q?=E9=A2=98=E7=9A=84=E4=BF=A1=E5=8F=B7=E6=A0=A1=E9=AA=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 新增 _has_supported_business_signal,在加载目录前拦截非财务问题并抛错 - 同步重构 ontology 服务测试覆盖业务信号判定分支 --- server/src/app/services/ontology.py | 7 +- server/src/app/services/ontology_detection.py | 87 + server/tests/test_ontology_service.py | 1401 +++++++++-------- 3 files changed, 800 insertions(+), 695 deletions(-) diff --git a/server/src/app/services/ontology.py b/server/src/app/services/ontology.py index 1d01414..ef28f4d 100644 --- a/server/src/app/services/ontology.py +++ b/server/src/app/services/ontology.py @@ -103,11 +103,14 @@ class SemanticOntologyService( if not query: raise ValueError("query 不能为空。") - AgentFoundationService(self.db).ensure_foundation_ready() context_json = normalize_ontology_context_json(payload.context_json or {}) payload = payload.model_copy(update={"context_json": context_json}) - reference = self._load_reference_catalog() compact_query = self._compact(query) + if not self._has_supported_business_signal(compact_query, context_json): + raise ValueError("当前系统仅支持财务业务相关问题。") + + AgentFoundationService(self.db).ensure_foundation_ready() + reference = self._load_reference_catalog() entities = self._extract_entities(query, compact_query, reference, context_json=context_json) rule_scenario, scenario_score = self._detect_scenario(compact_query) time_range, _time_score = self._extract_time_range( diff --git a/server/src/app/services/ontology_detection.py b/server/src/app/services/ontology_detection.py index f158eab..93e52b7 100644 --- a/server/src/app/services/ontology_detection.py +++ b/server/src/app/services/ontology_detection.py @@ -92,6 +92,92 @@ class OntologyDetectionMixin: def _looks_like_expense_application(compact_query: str) -> bool: return looks_like_expense_application_signal(compact_query) + def _has_supported_business_signal(self, compact_query: str, context_json: dict[str, Any]) -> bool: + has_business_context = ( + self._is_expense_application_context(context_json) + or self._resolve_session_type_scenario(context_json) == "knowledge" + or self._resolve_context_scenario(context_json) is not None + ) + + if self._looks_like_expense_application(compact_query): + return True + + domain_keywords = [ + keyword + for keywords in SCENARIO_KEYWORDS.values() + for keyword, _weight in keywords + ] + if any(keyword in compact_query for keyword in domain_keywords): + return True + if any(keyword in compact_query for keyword in EXPENSE_NARRATIVE_KEYWORDS): + return True + knowledge_keywords = ( + "制度", + "规则", + "办法", + "依据", + "政策", + "知识库", + "规定", + "流程", + "口径", + "标准", + "上限", + "额度", + "补贴", + "票据要求", + ) + if any(keyword in compact_query for keyword in knowledge_keywords): + return True + + approval_keywords = ( + "待我审核", + "待审", + "审核", + "审批", + "审核意见", + "审批意见", + "审批通过", + "审批驳回", + "驳回", + "退回", + "审核中心", + "审批中心", + "领导审批", + "财务审核", + "处理意见", + ) + if any(keyword in compact_query for keyword in approval_keywords): + return True + if has_business_context and self._looks_like_contextual_business_follow_up(compact_query): + return True + + return False + + @staticmethod + def _looks_like_contextual_business_follow_up(compact_query: str) -> bool: + if not compact_query: + return False + if compact_query in { + "好", + "好的", + "行", + "可以", + "嗯", + "继续", + "下一步", + "确认", + "确定", + "补充", + "再补充", + "再看看", + "没问题", + }: + return True + if any(keyword in compact_query for keyword in DRAFT_FOLLOW_UP_KEYWORDS): + return True + return compact_query.startswith(("那", "这", "它", "这个", "那个")) + def _detect_scenario(self, compact_query: str) -> tuple[str, float]: scores = {key: 0.0 for key in SCENARIO_KEYWORDS} for scenario, keywords in SCENARIO_KEYWORDS.items(): @@ -126,6 +212,7 @@ class OntologyDetectionMixin: return best_scenario, round(min(best_score, 0.34), 2) + def _detect_intent( self, compact_query: str, diff --git a/server/tests/test_ontology_service.py b/server/tests/test_ontology_service.py index f815b67..cb3551d 100644 --- a/server/tests/test_ontology_service.py +++ b/server/tests/test_ontology_service.py @@ -3,18 +3,18 @@ from __future__ import annotations from collections.abc import Generator import pytest -from fastapi.testclient import TestClient -from sqlalchemy import create_engine -from sqlalchemy.orm import Session, sessionmaker -from sqlalchemy.pool import StaticPool - -from app.core.agent_enums import AgentName, AgentRunSource, AgentRunStatus -from app.api.deps import get_db -from app.db.base import Base -from app.schemas.ontology import OntologyParseRequest -from app.services.ontology import LlmOntologyParseResult, SemanticOntologyService -from app.services.ontology_field_registry import normalize_ontology_context_json -from app.services.runtime_chat import RuntimeChatCallTrace, RuntimeChatResult +from fastapi.testclient import TestClient +from sqlalchemy import create_engine +from sqlalchemy.orm import Session, sessionmaker +from sqlalchemy.pool import StaticPool + +from app.core.agent_enums import AgentName, AgentRunSource, AgentRunStatus +from app.api.deps import get_db +from app.db.base import Base +from app.schemas.ontology import OntologyParseRequest +from app.services.ontology import LlmOntologyParseResult, SemanticOntologyService +from app.services.ontology_field_registry import normalize_ontology_context_json +from app.services.runtime_chat import RuntimeChatCallTrace, RuntimeChatResult def build_session_factory() -> sessionmaker[Session]: @@ -27,11 +27,11 @@ def build_session_factory() -> sessionmaker[Session]: return sessionmaker(bind=engine, autoflush=False, autocommit=False) -def build_client() -> tuple[TestClient, sessionmaker[Session]]: - session_factory = build_session_factory() - from app.main import create_app - - app = create_app() +def build_client() -> tuple[TestClient, sessionmaker[Session]]: + session_factory = build_session_factory() + from app.main import create_app + + app = create_app() def override_db() -> Generator[Session, None, None]: db = session_factory() @@ -248,196 +248,196 @@ def test_semantic_ontology_service_matches_day3_evaluation_set( assert result.run_id.startswith("run_") -def test_semantic_ontology_service_extracts_entities_time_and_constraints() -> None: - session_factory = build_session_factory() - with session_factory() as db: - result = SemanticOntologyService(db).parse( +def test_semantic_ontology_service_extracts_entities_time_and_constraints() -> None: + session_factory = build_session_factory() + with session_factory() as db: + result = SemanticOntologyService(db).parse( OntologyParseRequest( query="张三 2026年4月差旅报销金额超过5000元的明细", user_id="pytest", ) ) - - assert result.scenario == "expense" - assert result.intent == "query" - assert result.time_range.start_date == "2026-04-01" - assert result.time_range.end_date == "2026-04-30" - - -def test_semantic_ontology_service_extracts_budget_query_fields() -> None: - session_factory = build_session_factory() - with session_factory() as db: - result = SemanticOntologyService(db).parse( - OntologyParseRequest( - query="查询 CC-4100 2026年度差旅费可用预算和预算占用", - user_id="pytest", - ) - ) - - entity_map = {item.type: item.normalized_value for item in result.entities} - metric_names = {item.name for item in result.metrics} - - assert result.scenario == "budget" - assert result.intent == "query" - assert entity_map["cost_center"] == "CC-4100" - assert entity_map["budget_period"] == "2026年度" - assert entity_map["budget_subject"] == "travel" - assert entity_map["expense_type"] == "travel" - assert {"available_amount", "reserved_amount"}.issubset(metric_names) - - -@pytest.mark.parametrize( - "query", - [ - "申请出差", - "申请差旅", - "去国网出差3天,协助仿生产环境部署", - "去北京出差3天,支撑国网仿生产环境部署", - "下周去上海出差支撑客户系统上线,预计3天", - "安排去深圳客户现场验收项目,出差两天", - "准备去国网现场做仿生产环境部署,差旅3天", - ], -) -def test_semantic_ontology_service_treats_apply_for_travel_as_expense_application(query: str) -> None: - session_factory = build_session_factory() - with session_factory() as db: - result = SemanticOntologyService(db).parse( - OntologyParseRequest( - query=query, - user_id="pytest", - ) - ) - - entity_map = {item.type: item.normalized_value for item in result.entities} - entity_types = {item.type for item in result.entities} - - assert result.scenario == "expense" - assert result.intent == "draft" - assert result.permission.level == "draft_write" - assert entity_map["document_type"] == "expense_application" - assert entity_map["workflow_stage"] == "pre_approval" - assert entity_map["expense_type"] == "travel" - assert "employee" not in entity_types - assert "amount" in result.missing_slots - assert "time_range" in result.missing_slots - - -def test_semantic_ontology_service_keeps_explicit_travel_reimbursement_as_reimbursement_draft() -> None: - session_factory = build_session_factory() - with session_factory() as db: - result = SemanticOntologyService(db).parse( - OntologyParseRequest( - query="我要报销去北京出差的费用", - user_id="pytest", - ) - ) - - entity_map = {item.type: item.normalized_value for item in result.entities} - - assert result.scenario == "expense" - assert result.intent == "draft" - assert entity_map["expense_type"] == "travel" - assert "document_type" not in entity_map - assert "workflow_stage" not in entity_map - - -def test_semantic_ontology_service_extracts_budget_edit_fields() -> None: - session_factory = build_session_factory() - with session_factory() as db: - result = SemanticOntologyService(db).parse( - OntologyParseRequest( - query="编辑预算:2026年度 CC-4100 差旅费预算金额60万元,预警线80%,控制动作提醒", - user_id="pytest", - context_json={ - "document_type": "budget_plan", - "entry_source": "budget_center", - "conversation_scenario": "budget", - }, - ) - ) - - entity_map = {item.type: item.normalized_value for item in result.entities} - - assert result.scenario == "budget" - assert result.intent == "draft" - assert result.permission.level == "draft_write" - assert entity_map["budget_period"] == "2026年度" - assert entity_map["budget_subject"] == "travel" - assert entity_map["expense_type"] == "travel" - assert entity_map["budget_amount"] == "600000" - assert entity_map["warning_threshold"] == "80%" - assert entity_map["control_action"] == "remind" - - -def test_semantic_ontology_service_extracts_quarter_budget_period() -> None: - session_factory = build_session_factory() - with session_factory() as db: - result = SemanticOntologyService(db).parse( - OntologyParseRequest( - query="查询 CC-4100 2026年Q3 住宿费预算金额", - user_id="pytest", - ) - ) - - entity_map = {item.type: item.normalized_value for item in result.entities} - - assert result.scenario == "budget" - assert entity_map["budget_period"] == "2026年Q3" - assert entity_map["budget_subject"] == "hotel" - assert entity_map["expense_type"] == "hotel" - - -@pytest.mark.parametrize( - "query,expected_code,expected_label", - [ - ("查询2026年度市场推广费预算余额", "marketing", "市场推广费"), - ("查看2026年度软件服务费已占用金额", "software", "软件服务费"), - ("统计2026年度业务招待费预算金额", "meal", "业务招待费"), - ], -) -def test_semantic_ontology_service_links_budget_subject_to_expense_type( - query: str, - expected_code: str, - expected_label: str, -) -> None: - session_factory = build_session_factory() - with session_factory() as db: - result = SemanticOntologyService(db).parse( - OntologyParseRequest(query=query, user_id="pytest") - ) - - assert result.scenario == "budget" - assert any( - item.type == "budget_subject" and item.normalized_value == expected_code - for item in result.entities - ) - assert any( - item.type == "expense_type" - and item.normalized_value == expected_code - and item.value == expected_label - for item in result.entities - ) - - -def test_semantic_ontology_service_extracts_new_document_numbers() -> None: - session_factory = build_session_factory() - with session_factory() as db: - result = SemanticOntologyService(db).parse( - OntologyParseRequest( - query="查询 RE-20260525103045-ABCDEFGH 和 AP-20260525113045-HGFEDCBA 的状态", - user_id="pytest", - ) - ) - - claim_codes = { - item.normalized_value - for item in result.entities - if item.type == "expense_claim" - } - assert claim_codes == { - "RE-20260525103045-ABCDEFGH", - "AP-20260525113045-HGFEDCBA", - } + + assert result.scenario == "expense" + assert result.intent == "query" + assert result.time_range.start_date == "2026-04-01" + assert result.time_range.end_date == "2026-04-30" + + +def test_semantic_ontology_service_extracts_budget_query_fields() -> None: + session_factory = build_session_factory() + with session_factory() as db: + result = SemanticOntologyService(db).parse( + OntologyParseRequest( + query="查询 CC-4100 2026年度差旅费可用预算和预算占用", + user_id="pytest", + ) + ) + + entity_map = {item.type: item.normalized_value for item in result.entities} + metric_names = {item.name for item in result.metrics} + + assert result.scenario == "budget" + assert result.intent == "query" + assert entity_map["cost_center"] == "CC-4100" + assert entity_map["budget_period"] == "2026年度" + assert entity_map["budget_subject"] == "travel" + assert entity_map["expense_type"] == "travel" + assert {"available_amount", "reserved_amount"}.issubset(metric_names) + + +@pytest.mark.parametrize( + "query", + [ + "申请出差", + "申请差旅", + "去国网出差3天,协助仿生产环境部署", + "去北京出差3天,支撑国网仿生产环境部署", + "下周去上海出差支撑客户系统上线,预计3天", + "安排去深圳客户现场验收项目,出差两天", + "准备去国网现场做仿生产环境部署,差旅3天", + ], +) +def test_semantic_ontology_service_treats_apply_for_travel_as_expense_application(query: str) -> None: + session_factory = build_session_factory() + with session_factory() as db: + result = SemanticOntologyService(db).parse( + OntologyParseRequest( + query=query, + user_id="pytest", + ) + ) + + entity_map = {item.type: item.normalized_value for item in result.entities} + entity_types = {item.type for item in result.entities} + + assert result.scenario == "expense" + assert result.intent == "draft" + assert result.permission.level == "draft_write" + assert entity_map["document_type"] == "expense_application" + assert entity_map["workflow_stage"] == "pre_approval" + assert entity_map["expense_type"] == "travel" + assert "employee" not in entity_types + assert "amount" in result.missing_slots + assert "time_range" in result.missing_slots + + +def test_semantic_ontology_service_keeps_explicit_travel_reimbursement_as_reimbursement_draft() -> None: + session_factory = build_session_factory() + with session_factory() as db: + result = SemanticOntologyService(db).parse( + OntologyParseRequest( + query="我要报销去北京出差的费用", + user_id="pytest", + ) + ) + + entity_map = {item.type: item.normalized_value for item in result.entities} + + assert result.scenario == "expense" + assert result.intent == "draft" + assert entity_map["expense_type"] == "travel" + assert "document_type" not in entity_map + assert "workflow_stage" not in entity_map + + +def test_semantic_ontology_service_extracts_budget_edit_fields() -> None: + session_factory = build_session_factory() + with session_factory() as db: + result = SemanticOntologyService(db).parse( + OntologyParseRequest( + query="编辑预算:2026年度 CC-4100 差旅费预算金额60万元,预警线80%,控制动作提醒", + user_id="pytest", + context_json={ + "document_type": "budget_plan", + "entry_source": "budget_center", + "conversation_scenario": "budget", + }, + ) + ) + + entity_map = {item.type: item.normalized_value for item in result.entities} + + assert result.scenario == "budget" + assert result.intent == "draft" + assert result.permission.level == "draft_write" + assert entity_map["budget_period"] == "2026年度" + assert entity_map["budget_subject"] == "travel" + assert entity_map["expense_type"] == "travel" + assert entity_map["budget_amount"] == "600000" + assert entity_map["warning_threshold"] == "80%" + assert entity_map["control_action"] == "remind" + + +def test_semantic_ontology_service_extracts_quarter_budget_period() -> None: + session_factory = build_session_factory() + with session_factory() as db: + result = SemanticOntologyService(db).parse( + OntologyParseRequest( + query="查询 CC-4100 2026年Q3 住宿费预算金额", + user_id="pytest", + ) + ) + + entity_map = {item.type: item.normalized_value for item in result.entities} + + assert result.scenario == "budget" + assert entity_map["budget_period"] == "2026年Q3" + assert entity_map["budget_subject"] == "hotel" + assert entity_map["expense_type"] == "hotel" + + +@pytest.mark.parametrize( + "query,expected_code,expected_label", + [ + ("查询2026年度市场推广费预算余额", "marketing", "市场推广费"), + ("查看2026年度软件服务费已占用金额", "software", "软件服务费"), + ("统计2026年度业务招待费预算金额", "meal", "业务招待费"), + ], +) +def test_semantic_ontology_service_links_budget_subject_to_expense_type( + query: str, + expected_code: str, + expected_label: str, +) -> None: + session_factory = build_session_factory() + with session_factory() as db: + result = SemanticOntologyService(db).parse( + OntologyParseRequest(query=query, user_id="pytest") + ) + + assert result.scenario == "budget" + assert any( + item.type == "budget_subject" and item.normalized_value == expected_code + for item in result.entities + ) + assert any( + item.type == "expense_type" + and item.normalized_value == expected_code + and item.value == expected_label + for item in result.entities + ) + + +def test_semantic_ontology_service_extracts_new_document_numbers() -> None: + session_factory = build_session_factory() + with session_factory() as db: + result = SemanticOntologyService(db).parse( + OntologyParseRequest( + query="查询 RE-20260525103045-ABCDEFGH 和 AP-20260525113045-HGFEDCBA 的状态", + user_id="pytest", + ) + ) + + claim_codes = { + item.normalized_value + for item in result.entities + if item.type == "expense_claim" + } + assert claim_codes == { + "RE-20260525103045-ABCDEFGH", + "AP-20260525113045-HGFEDCBA", + } def test_semantic_ontology_service_treats_travel_amount_question_as_knowledge_query() -> None: @@ -490,30 +490,30 @@ def test_semantic_ontology_service_keeps_travel_amount_follow_up_in_knowledge_qu assert result.clarification_required is False -def test_semantic_ontology_service_rejects_draft_intent_inside_knowledge_session( - monkeypatch, -) -> None: +def test_semantic_ontology_service_rejects_draft_intent_inside_knowledge_session( + monkeypatch, +) -> None: session_factory = build_session_factory() with session_factory() as db: service = SemanticOntologyService(db) - monkeypatch.setattr( - service, - "_parse_with_model", - lambda **kwargs: ( - LlmOntologyParseResult( - scenario="expense", - intent="draft", - confidence=0.91, - clarification_required=True, - clarification_question="请补充招待对象和票据附件。", - missing_slots=["participants", "attachments"], - ambiguity=[], - entity_hints=[], - ), - [], - None, - ), - ) + monkeypatch.setattr( + service, + "_parse_with_model", + lambda **kwargs: ( + LlmOntologyParseResult( + scenario="expense", + intent="draft", + confidence=0.91, + clarification_required=True, + clarification_question="请补充招待对象和票据附件。", + missing_slots=["participants", "attachments"], + ambiguity=[], + entity_hints=[], + ), + [], + None, + ), + ) result = service.parse( OntologyParseRequest( @@ -527,33 +527,33 @@ def test_semantic_ontology_service_rejects_draft_intent_inside_knowledge_session }, ) ) - - assert result.scenario == "knowledge" - assert result.intent == "query" - assert result.clarification_required is False - assert result.clarification_question is None - - -def test_review_next_step_context_inherits_expense_draft_flow() -> None: - session_factory = build_session_factory() - with session_factory() as db: - result = SemanticOntologyService(db).parse( - OntologyParseRequest( - query="我已核对右侧识别结果,请进入下一步。", - user_id="pytest", - context_json={ - "review_action": "next_step", - "draft_claim_id": "claim-1", - "attachment_count": 1, - }, - ) - ) - - assert result.scenario == "expense" - assert result.intent == "draft" - assert result.permission.level == "draft_write" - assert result.clarification_required is False - assert result.clarification_question is None + + assert result.scenario == "knowledge" + assert result.intent == "query" + assert result.clarification_required is False + assert result.clarification_question is None + + +def test_review_next_step_context_inherits_expense_draft_flow() -> None: + session_factory = build_session_factory() + with session_factory() as db: + result = SemanticOntologyService(db).parse( + OntologyParseRequest( + query="我已核对右侧识别结果,请进入下一步。", + user_id="pytest", + context_json={ + "review_action": "next_step", + "draft_claim_id": "claim-1", + "attachment_count": 1, + }, + ) + ) + + assert result.scenario == "expense" + assert result.intent == "draft" + assert result.permission.level == "draft_write" + assert result.clarification_required is False + assert result.clarification_question is None def test_semantic_ontology_service_prefers_expense_for_customer_entertainment_narrative() -> None: @@ -573,10 +573,10 @@ def test_semantic_ontology_service_prefers_expense_for_customer_entertainment_na assert result.clarification_required is True assert "customer_name" in result.missing_slots assert "participants" in result.missing_slots - assert any( - item.type == "expense_type" and item.normalized_value == "meal" - for item in result.entities - ) + assert any( + item.type == "expense_type" and item.normalized_value == "meal" + for item in result.entities + ) def test_semantic_ontology_service_uses_client_local_date_for_relative_time() -> None: @@ -598,11 +598,11 @@ def test_semantic_ontology_service_uses_client_local_date_for_relative_time() -> assert result.time_range.end_date == "2026-05-12" -def test_semantic_ontology_service_extracts_day_before_yesterday_from_client_local_date() -> None: - session_factory = build_session_factory() - with session_factory() as db: - result = SemanticOntologyService(db).parse( - OntologyParseRequest( +def test_semantic_ontology_service_extracts_day_before_yesterday_from_client_local_date() -> None: + session_factory = build_session_factory() + with session_factory() as db: + result = SemanticOntologyService(db).parse( + OntologyParseRequest( query="我前天请客户吃饭花了200元", user_id="pytest", context_json={ @@ -611,82 +611,82 @@ def test_semantic_ontology_service_extracts_day_before_yesterday_from_client_loc }, ) ) - - assert result.time_range.raw == "前天" - assert result.time_range.start_date == "2026-05-11" - assert result.time_range.end_date == "2026-05-11" - - -def test_semantic_ontology_service_treats_status_document_text_as_query() -> None: - session_factory = build_session_factory() - with session_factory() as db: - result = SemanticOntologyService(db).parse( - OntologyParseRequest( - query="查询草稿的单据", - user_id="pytest", - ) - ) - - assert result.scenario == "expense" - assert result.intent == "query" - assert result.permission.level == "read" - assert any( - item.field == "status" and item.value == "draft" - for item in result.constraints - ) - - -def test_semantic_ontology_service_extracts_history_query_time_and_location() -> None: - session_factory = build_session_factory() - with session_factory() as db: - result = SemanticOntologyService(db).parse( - OntologyParseRequest( - query="我去年去北京报销的单据", - user_id="pytest", - context_json={ - "client_now_iso": "2026-05-21T04:00:00.000Z", - "client_timezone_offset_minutes": -480, - }, - ) - ) - - assert result.scenario == "expense" - assert result.intent == "query" - assert result.time_range.raw == "去年" - assert result.time_range.start_date == "2025-01-01" - assert result.time_range.end_date == "2025-12-31" - assert any( - item.type == "location" and item.normalized_value == "北京" - for item in result.entities - ) - - -def test_semantic_ontology_service_understands_last_week_claim_progress_query() -> None: - session_factory = build_session_factory() - with session_factory() as db: - result = SemanticOntologyService(db).parse( - OntologyParseRequest( - query="我上周提交的单据报销了么?", - user_id="pytest", - context_json={ - "client_now_iso": "2026-05-21T04:00:00.000Z", - "client_timezone_offset_minutes": -480, - }, - ) - ) - - assert result.scenario == "expense" - assert result.intent == "query" - assert result.time_range.raw == "上周" - assert result.time_range.start_date == "2026-05-11" - assert result.time_range.end_date == "2026-05-17" - - -def test_semantic_ontology_service_maps_office_supplies_to_office_expense_type() -> None: - session_factory = build_session_factory() - with session_factory() as db: - result = SemanticOntologyService(db).parse( - OntologyParseRequest( + + assert result.time_range.raw == "前天" + assert result.time_range.start_date == "2026-05-11" + assert result.time_range.end_date == "2026-05-11" + + +def test_semantic_ontology_service_treats_status_document_text_as_query() -> None: + session_factory = build_session_factory() + with session_factory() as db: + result = SemanticOntologyService(db).parse( + OntologyParseRequest( + query="查询草稿的单据", + user_id="pytest", + ) + ) + + assert result.scenario == "expense" + assert result.intent == "query" + assert result.permission.level == "read" + assert any( + item.field == "status" and item.value == "draft" + for item in result.constraints + ) + + +def test_semantic_ontology_service_extracts_history_query_time_and_location() -> None: + session_factory = build_session_factory() + with session_factory() as db: + result = SemanticOntologyService(db).parse( + OntologyParseRequest( + query="我去年去北京报销的单据", + user_id="pytest", + context_json={ + "client_now_iso": "2026-05-21T04:00:00.000Z", + "client_timezone_offset_minutes": -480, + }, + ) + ) + + assert result.scenario == "expense" + assert result.intent == "query" + assert result.time_range.raw == "去年" + assert result.time_range.start_date == "2025-01-01" + assert result.time_range.end_date == "2025-12-31" + assert any( + item.type == "location" and item.normalized_value == "北京" + for item in result.entities + ) + + +def test_semantic_ontology_service_understands_last_week_claim_progress_query() -> None: + session_factory = build_session_factory() + with session_factory() as db: + result = SemanticOntologyService(db).parse( + OntologyParseRequest( + query="我上周提交的单据报销了么?", + user_id="pytest", + context_json={ + "client_now_iso": "2026-05-21T04:00:00.000Z", + "client_timezone_offset_minutes": -480, + }, + ) + ) + + assert result.scenario == "expense" + assert result.intent == "query" + assert result.time_range.raw == "上周" + assert result.time_range.start_date == "2026-05-11" + assert result.time_range.end_date == "2026-05-17" + + +def test_semantic_ontology_service_maps_office_supplies_to_office_expense_type() -> None: + session_factory = build_session_factory() + with session_factory() as db: + result = SemanticOntologyService(db).parse( + OntologyParseRequest( query="我买了办公用品和文具,花了88元,帮我报销", user_id="pytest", ) @@ -695,267 +695,267 @@ def test_semantic_ontology_service_maps_office_supplies_to_office_expense_type() assert result.scenario == "expense" assert result.intent == "draft" assert any( - item.type == "expense_type" and item.normalized_value == "office" - for item in result.entities - ) - - -def test_semantic_ontology_service_maps_riding_fare_to_transport_expense_type() -> None: - session_factory = build_session_factory() - with session_factory() as db: - result = SemanticOntologyService(db).parse( - OntologyParseRequest( - query="业务发生时间:2026-03-04,送客户去林萃小区办事,请报销乘车费用", - user_id="pytest", - ) - ) - - assert result.scenario == "expense" - assert result.intent == "draft" - assert any( - item.type == "expense_type" and item.normalized_value == "transport" - for item in result.entities - ) - - -def test_semantic_ontology_service_maps_taxi_ticket_reimbursement_to_transport_draft() -> None: - session_factory = build_session_factory() - with session_factory() as db: - result = SemanticOntologyService(db).parse( - OntologyParseRequest( - query="送客户去机场,报销的士票", - user_id="pytest", - ) - ) - - assert result.scenario == "expense" - assert result.intent == "draft" - assert any( - item.type == "expense_type" and item.normalized_value == "transport" - for item in result.entities - ) - assert not any( - item.type == "expense_type" and item.normalized_value == "entertainment" - for item in result.entities - ) - - -@pytest.mark.parametrize( - "query,expected_type", - [ - ("报销飞机票和行程单", "travel"), - ("报销酒店发票和房费", "hotel"), - ("报销滴滴打车票", "transport"), - ("报销工作餐餐费", "meal"), - ("报销会议场地费", "meeting"), - ("报销客户接待餐", "meal"), - ("报销打印纸和硒鼓", "office"), - ("报销培训课程费", "training"), - ("报销手机话费和流量费", "communication"), - ("报销员工体检费", "welfare"), - ], -) -def test_semantic_ontology_service_covers_common_expense_scene_keywords( - query: str, - expected_type: str, -) -> None: - session_factory = build_session_factory() - with session_factory() as db: - result = SemanticOntologyService(db).parse( - OntologyParseRequest(query=query, user_id="pytest") - ) - - assert result.scenario == "expense" - assert result.intent == "draft" - assert any( - item.type == "expense_type" and item.normalized_value == expected_type - for item in result.entities - ) - - -def test_semantic_ontology_service_connects_expense_application_to_ontology() -> None: - session_factory = build_session_factory() - with session_factory() as db: - result = SemanticOntologyService(db).parse( - OntologyParseRequest( - query="申请2026-06-01 ~ 2026-06-03去北京做客户现场验收,差旅预算18000元", - user_id="pytest", - context_json={ - "document_type": "expense_application", - "application_stage": "pre_approval", - "entry_source": "documents_application", - }, - ) - ) - - assert result.scenario == "expense" - assert result.intent == "draft" - assert any( - item.type == "document_type" and item.normalized_value == "expense_application" - for item in result.entities - ) - assert any( - item.type == "workflow_stage" and item.normalized_value == "pre_approval" - for item in result.entities - ) - assert any( - item.field == "document_type" and item.value == "expense_application" - for item in result.constraints - ) - assert any( - item.type == "expense_type" and item.normalized_value == "travel" - for item in result.entities - ) - - -def test_semantic_ontology_service_requires_attachment_for_meeting_application() -> None: - session_factory = build_session_factory() - with session_factory() as db: - result = SemanticOntologyService(db).parse( - OntologyParseRequest( - query="发起会务申请,2026-06-01 ~ 2026-06-02上海产品发布会,预算32000元", - user_id="pytest", - context_json={ - "document_type": "expense_application", - "application_stage": "pre_approval", - "entry_source": "documents_application", - "attachment_count": 0, - }, - ) - ) - - assert result.scenario == "expense" - assert result.intent == "draft" - assert any( - item.type == "expense_type" and item.normalized_value == "meeting" - for item in result.entities - ) - assert "attachments" in result.missing_slots - - -def test_semantic_ontology_service_treats_application_session_as_application_context() -> None: - session_factory = build_session_factory() - with session_factory() as db: - result = SemanticOntologyService(db).parse( - OntologyParseRequest( - query=( - "发生时间:2026-05-25\n" - "地点:上海\n" - "事由:支持上海国网服务器部署\n" - "天数:3天" - ), - user_id="pytest", - context_json={ - "session_type": "application", - "entry_source": "application", - "attachment_count": 0, - }, - ) - ) - - assert result.scenario == "expense" - assert result.intent == "draft" - assert any( - item.type == "document_type" and item.normalized_value == "expense_application" - for item in result.entities - ) - assert any( - item.type == "workflow_stage" and item.normalized_value == "pre_approval" - for item in result.entities - ) - assert "expense_type" in result.missing_slots - assert "amount" in result.missing_slots - - -def test_semantic_ontology_service_normalizes_business_aliases_to_ontology_fields( - monkeypatch, -) -> None: - session_factory = build_session_factory() - with session_factory() as db: - service = SemanticOntologyService(db) - monkeypatch.setattr( - service, - "_parse_with_model", - lambda **kwargs: (None, [], "model_disabled_for_field_registry_test"), - ) - - result = service.parse( - OntologyParseRequest( - query="生成差旅费报销草稿", - user_id="pytest", - context_json={ - "review_action": "save_draft", - "review_form_values": { - "reimbursement_type": "差旅费", - "business_time": "2026-06-01 至 2026-06-03", - "business_location": "上海", - "reason_value": "支撑国网仿生产环境部署", - "application_amount": "3000元", - "transport_type": "火车", - }, - }, - ) - ) - - entity_map = {(item.type, item.normalized_value) for item in result.entities} - assert ("transport_mode", "火车") in entity_map - assert ("reason", "支撑国网仿生产环境部署") in entity_map - assert ("location", "上海") in entity_map - assert "time_range" not in result.missing_slots - assert "reason" not in result.missing_slots - - -def test_ontology_context_normalizes_employee_profile_aliases() -> None: - context = normalize_ontology_context_json( - { - "name": "曹笑竹", - "department": "技术部", - "position": "财务智能化产品经理", - "grade": "P5", - "managerName": "向万红", - "costCenter": "TECH-DEPT", - } - ) - - assert context["employee_name"] == "曹笑竹" - assert context["department_name"] == "技术部" - assert context["employee_position"] == "财务智能化产品经理" - assert context["employee_grade"] == "P5" - assert context["manager_name"] == "向万红" - assert context["cost_center"] == "TECH-DEPT" - - -def test_semantic_ontology_service_uses_model_parse_when_available(monkeypatch) -> None: - session_factory = build_session_factory() - with session_factory() as db: - service = SemanticOntologyService(db) - monkeypatch.setattr( - service, - "_parse_with_model", - lambda **kwargs: ( - LlmOntologyParseResult( - scenario="expense", - intent="draft", - confidence=0.91, - clarification_required=True, - clarification_question="请补充费用类型、金额和票据附件。", - missing_slots=["expense_type", "amount", "attachments"], - ambiguity=[], - entity_hints=[], - ), - [ - { - "slot": "main", - "provider": "MiniMax", - "model": "intent-model", - "attempt": 1, - "status": "succeeded", - "duration_ms": 8, - } - ], - None, - ), - ) + item.type == "expense_type" and item.normalized_value == "office" + for item in result.entities + ) + + +def test_semantic_ontology_service_maps_riding_fare_to_transport_expense_type() -> None: + session_factory = build_session_factory() + with session_factory() as db: + result = SemanticOntologyService(db).parse( + OntologyParseRequest( + query="业务发生时间:2026-03-04,送客户去林萃小区办事,请报销乘车费用", + user_id="pytest", + ) + ) + + assert result.scenario == "expense" + assert result.intent == "draft" + assert any( + item.type == "expense_type" and item.normalized_value == "transport" + for item in result.entities + ) + + +def test_semantic_ontology_service_maps_taxi_ticket_reimbursement_to_transport_draft() -> None: + session_factory = build_session_factory() + with session_factory() as db: + result = SemanticOntologyService(db).parse( + OntologyParseRequest( + query="送客户去机场,报销的士票", + user_id="pytest", + ) + ) + + assert result.scenario == "expense" + assert result.intent == "draft" + assert any( + item.type == "expense_type" and item.normalized_value == "transport" + for item in result.entities + ) + assert not any( + item.type == "expense_type" and item.normalized_value == "entertainment" + for item in result.entities + ) + + +@pytest.mark.parametrize( + "query,expected_type", + [ + ("报销飞机票和行程单", "travel"), + ("报销酒店发票和房费", "hotel"), + ("报销滴滴打车票", "transport"), + ("报销工作餐餐费", "meal"), + ("报销会议场地费", "meeting"), + ("报销客户接待餐", "meal"), + ("报销打印纸和硒鼓", "office"), + ("报销培训课程费", "training"), + ("报销手机话费和流量费", "communication"), + ("报销员工体检费", "welfare"), + ], +) +def test_semantic_ontology_service_covers_common_expense_scene_keywords( + query: str, + expected_type: str, +) -> None: + session_factory = build_session_factory() + with session_factory() as db: + result = SemanticOntologyService(db).parse( + OntologyParseRequest(query=query, user_id="pytest") + ) + + assert result.scenario == "expense" + assert result.intent == "draft" + assert any( + item.type == "expense_type" and item.normalized_value == expected_type + for item in result.entities + ) + + +def test_semantic_ontology_service_connects_expense_application_to_ontology() -> None: + session_factory = build_session_factory() + with session_factory() as db: + result = SemanticOntologyService(db).parse( + OntologyParseRequest( + query="申请2026-06-01 ~ 2026-06-03去北京做客户现场验收,差旅预算18000元", + user_id="pytest", + context_json={ + "document_type": "expense_application", + "application_stage": "pre_approval", + "entry_source": "documents_application", + }, + ) + ) + + assert result.scenario == "expense" + assert result.intent == "draft" + assert any( + item.type == "document_type" and item.normalized_value == "expense_application" + for item in result.entities + ) + assert any( + item.type == "workflow_stage" and item.normalized_value == "pre_approval" + for item in result.entities + ) + assert any( + item.field == "document_type" and item.value == "expense_application" + for item in result.constraints + ) + assert any( + item.type == "expense_type" and item.normalized_value == "travel" + for item in result.entities + ) + + +def test_semantic_ontology_service_requires_attachment_for_meeting_application() -> None: + session_factory = build_session_factory() + with session_factory() as db: + result = SemanticOntologyService(db).parse( + OntologyParseRequest( + query="发起会务申请,2026-06-01 ~ 2026-06-02上海产品发布会,预算32000元", + user_id="pytest", + context_json={ + "document_type": "expense_application", + "application_stage": "pre_approval", + "entry_source": "documents_application", + "attachment_count": 0, + }, + ) + ) + + assert result.scenario == "expense" + assert result.intent == "draft" + assert any( + item.type == "expense_type" and item.normalized_value == "meeting" + for item in result.entities + ) + assert "attachments" in result.missing_slots + + +def test_semantic_ontology_service_treats_application_session_as_application_context() -> None: + session_factory = build_session_factory() + with session_factory() as db: + result = SemanticOntologyService(db).parse( + OntologyParseRequest( + query=( + "发生时间:2026-05-25\n" + "地点:上海\n" + "事由:支持上海国网服务器部署\n" + "天数:3天" + ), + user_id="pytest", + context_json={ + "session_type": "application", + "entry_source": "application", + "attachment_count": 0, + }, + ) + ) + + assert result.scenario == "expense" + assert result.intent == "draft" + assert any( + item.type == "document_type" and item.normalized_value == "expense_application" + for item in result.entities + ) + assert any( + item.type == "workflow_stage" and item.normalized_value == "pre_approval" + for item in result.entities + ) + assert "expense_type" in result.missing_slots + assert "amount" in result.missing_slots + + +def test_semantic_ontology_service_normalizes_business_aliases_to_ontology_fields( + monkeypatch, +) -> None: + session_factory = build_session_factory() + with session_factory() as db: + service = SemanticOntologyService(db) + monkeypatch.setattr( + service, + "_parse_with_model", + lambda **kwargs: (None, [], "model_disabled_for_field_registry_test"), + ) + + result = service.parse( + OntologyParseRequest( + query="生成差旅费报销草稿", + user_id="pytest", + context_json={ + "review_action": "save_draft", + "review_form_values": { + "reimbursement_type": "差旅费", + "business_time": "2026-06-01 至 2026-06-03", + "business_location": "上海", + "reason_value": "支撑国网仿生产环境部署", + "application_amount": "3000元", + "transport_type": "火车", + }, + }, + ) + ) + + entity_map = {(item.type, item.normalized_value) for item in result.entities} + assert ("transport_mode", "火车") in entity_map + assert ("reason", "支撑国网仿生产环境部署") in entity_map + assert ("location", "上海") in entity_map + assert "time_range" not in result.missing_slots + assert "reason" not in result.missing_slots + + +def test_ontology_context_normalizes_employee_profile_aliases() -> None: + context = normalize_ontology_context_json( + { + "name": "曹笑竹", + "department": "技术部", + "position": "财务智能化产品经理", + "grade": "P5", + "managerName": "向万红", + "costCenter": "TECH-DEPT", + } + ) + + assert context["employee_name"] == "曹笑竹" + assert context["department_name"] == "技术部" + assert context["employee_position"] == "财务智能化产品经理" + assert context["employee_grade"] == "P5" + assert context["manager_name"] == "向万红" + assert context["cost_center"] == "TECH-DEPT" + + +def test_semantic_ontology_service_uses_model_parse_when_available(monkeypatch) -> None: + session_factory = build_session_factory() + with session_factory() as db: + service = SemanticOntologyService(db) + monkeypatch.setattr( + service, + "_parse_with_model", + lambda **kwargs: ( + LlmOntologyParseResult( + scenario="expense", + intent="draft", + confidence=0.91, + clarification_required=True, + clarification_question="请补充费用类型、金额和票据附件。", + missing_slots=["expense_type", "amount", "attachments"], + ambiguity=[], + entity_hints=[], + ), + [ + { + "slot": "main", + "provider": "MiniMax", + "model": "intent-model", + "attempt": 1, + "status": "succeeded", + "duration_ms": 8, + } + ], + None, + ), + ) result = service.parse( OntologyParseRequest( @@ -969,103 +969,103 @@ def test_semantic_ontology_service_uses_model_parse_when_available(monkeypatch) assert result.parse_strategy == "llm_primary" assert result.clarification_required is True assert "expense_type" in result.missing_slots - assert result.clarification_question == "请补充费用类型、金额和票据附件。" - - -def test_semantic_ontology_service_falls_back_when_model_conflicts_with_application_signal( - monkeypatch, -) -> None: - session_factory = build_session_factory() - with session_factory() as db: - service = SemanticOntologyService(db) - - monkeypatch.setattr( - service.runtime_chat_service, - "complete_with_trace", - lambda *args, **kwargs: RuntimeChatResult( - text=( - '{"scenario":"knowledge","intent":"query","confidence":0.91,' - '"clarification_required":false,"missing_slots":[],' - '"ambiguity":[],"entity_hints":[]}' - ), - calls=[ - RuntimeChatCallTrace( - slot="main", - provider="MiniMax", - model="intent-model", - attempt=1, - status="succeeded", - duration_ms=11, - ) - ], - ), - ) - - result = service.parse( - OntologyParseRequest( - query="去国网出差3天,协助仿生产环境部署", - user_id="pytest", - ) - ) - fetched = service.run_service.get_run(result.run_id) - - entity_map = {item.type: item.normalized_value for item in result.entities} - - assert result.scenario == "expense" - assert result.intent == "draft" - assert result.parse_strategy == "rule_fallback" - assert entity_map["document_type"] == "expense_application" - assert fetched is not None - assert fetched.tool_calls[0].status == "failed" - assert fetched.tool_calls[0].error_message == "model_conflicts_with_application_stage_signal" - - -def test_semantic_ontology_service_records_model_call_errors_for_statistics(monkeypatch) -> None: - session_factory = build_session_factory() - with session_factory() as db: - service = SemanticOntologyService(db) - run = service.run_service.create_run( - agent=AgentName.ORCHESTRATOR.value, - source=AgentRunSource.USER_MESSAGE.value, - status=AgentRunStatus.RUNNING.value, - ) - - monkeypatch.setattr( - service.runtime_chat_service, - "complete_with_trace", - lambda *args, **kwargs: RuntimeChatResult( - text=None, - calls=[ - RuntimeChatCallTrace( - slot="main", - provider="MiniMax", - model="intent-model", - attempt=1, - status="failed", - duration_ms=15, - error_message="incorrect api key", - ) - ], - ), - ) - - result = service.parse_for_run( - OntologyParseRequest( - query="去北京出差3天,支撑国网仿生产环境部署", - user_id="pytest", - ), - run_id=run.run_id, - ) - fetched = service.run_service.get_run(run.run_id) - stats = service.run_service.summarize_runs(limit=20) - - assert result.parse_strategy == "rule_fallback" - assert fetched is not None - assert len(fetched.tool_calls) == 1 - assert fetched.tool_calls[0].tool_name == "semantic_ontology.main" - assert fetched.tool_calls[0].status == "failed" - assert fetched.tool_calls[0].error_message == "incorrect api key" - assert stats.failed_llm_call_count >= 1 + assert result.clarification_question == "请补充费用类型、金额和票据附件。" + + +def test_semantic_ontology_service_falls_back_when_model_conflicts_with_application_signal( + monkeypatch, +) -> None: + session_factory = build_session_factory() + with session_factory() as db: + service = SemanticOntologyService(db) + + monkeypatch.setattr( + service.runtime_chat_service, + "complete_with_trace", + lambda *args, **kwargs: RuntimeChatResult( + text=( + '{"scenario":"knowledge","intent":"query","confidence":0.91,' + '"clarification_required":false,"missing_slots":[],' + '"ambiguity":[],"entity_hints":[]}' + ), + calls=[ + RuntimeChatCallTrace( + slot="main", + provider="MiniMax", + model="intent-model", + attempt=1, + status="succeeded", + duration_ms=11, + ) + ], + ), + ) + + result = service.parse( + OntologyParseRequest( + query="去国网出差3天,协助仿生产环境部署", + user_id="pytest", + ) + ) + fetched = service.run_service.get_run(result.run_id) + + entity_map = {item.type: item.normalized_value for item in result.entities} + + assert result.scenario == "expense" + assert result.intent == "draft" + assert result.parse_strategy == "rule_fallback" + assert entity_map["document_type"] == "expense_application" + assert fetched is not None + assert fetched.tool_calls[0].status == "failed" + assert fetched.tool_calls[0].error_message == "model_conflicts_with_application_stage_signal" + + +def test_semantic_ontology_service_records_model_call_errors_for_statistics(monkeypatch) -> None: + session_factory = build_session_factory() + with session_factory() as db: + service = SemanticOntologyService(db) + run = service.run_service.create_run( + agent=AgentName.ORCHESTRATOR.value, + source=AgentRunSource.USER_MESSAGE.value, + status=AgentRunStatus.RUNNING.value, + ) + + monkeypatch.setattr( + service.runtime_chat_service, + "complete_with_trace", + lambda *args, **kwargs: RuntimeChatResult( + text=None, + calls=[ + RuntimeChatCallTrace( + slot="main", + provider="MiniMax", + model="intent-model", + attempt=1, + status="failed", + duration_ms=15, + error_message="incorrect api key", + ) + ], + ), + ) + + result = service.parse_for_run( + OntologyParseRequest( + query="去北京出差3天,支撑国网仿生产环境部署", + user_id="pytest", + ), + run_id=run.run_id, + ) + fetched = service.run_service.get_run(run.run_id) + stats = service.run_service.summarize_runs(limit=20) + + assert result.parse_strategy == "rule_fallback" + assert fetched is not None + assert len(fetched.tool_calls) == 1 + assert fetched.tool_calls[0].tool_name == "semantic_ontology.main" + assert fetched.tool_calls[0].status == "failed" + assert fetched.tool_calls[0].error_message == "incorrect api key" + assert stats.failed_llm_call_count >= 1 def test_parse_ontology_endpoint_returns_eight_fields_and_writes_trace() -> None: @@ -1115,6 +1115,21 @@ def test_parse_ontology_endpoint_returns_eight_fields_and_writes_trace() -> None assert run_payload["semantic_parse"]["intent"] == "risk_check" +def test_parse_ontology_endpoint_blocks_non_business_input() -> None: + client, _ = build_client() + + response = client.post( + "/api/v1/ontology/parse", + json={ + "query": "你好", + "user_id": "pytest", + }, + ) + + assert response.status_code == 400 + assert "财务业务相关问题" in response.json()["detail"] + + def test_parse_ontology_endpoint_returns_forbidden_for_unprivileged_payment_request() -> None: client, _ = build_client()