feat(backend): add ontology and orchestrator API endpoints

New endpoints: - server/src/app/api/v1/endpoints/ontology.py: ontology API - server/src/app/api/v1/endpoints/orchestrator.py: orchestrator API New schemas: - server/src/app/schemas/ontology.py: ontology data schemas - server/src/app/schemas/orchestrator.py: orchestrator data schemas - server/src/app/schemas/user_agent.py: user agent data schemas New services: - server/src/app/services/ontology.py: ontology business logic - server/src/app/services/orchestrator.py: orchestrator business logic - server/src/app/services/runtime_chat.py: runtime chat service - server/src/app/services/user_agent.py: user agent service New tests: - server/tests/test_ontology_service.py - server/tests/test_orchestrator_service.py - server/tests/test_user_agent_service.py
2026-05-12 01:24:39 +00:00
parent 19da459bb3
commit 22d47cbf2b
12 changed files with 4262 additions and 0 deletions
--- a/server/src/app/api/v1/endpoints/ontology.py
+++ b/server/src/app/api/v1/endpoints/ontology.py
@@ -0,0 +1,36 @@
+from __future__ import annotations
+
+from typing import Annotated
+
+from fastapi import APIRouter, Depends, HTTPException, status
+from sqlalchemy.orm import Session
+
+from app.api.deps import get_db
+from app.schemas.common import ErrorResponse
+from app.schemas.ontology import OntologyParseRequest, OntologyParseResult
+from app.services.ontology import SemanticOntologyService
+
+router = APIRouter(prefix="/ontology")
+DbSession = Annotated[Session, Depends(get_db)]
+
+
+@router.post(
+    "/parse",
+    response_model=OntologyParseResult,
+    summary="解析自然语言为语义本体",
+    description=(
+        "把自然语言问题解析成 Day 3 约定的 8 个核心字段，"
+        "并写入 AgentRun 与 SemanticParseLog。"
+    ),
+    responses={
+        status.HTTP_400_BAD_REQUEST: {
+            "model": ErrorResponse,
+            "description": "请求缺少有效 query 或解析请求格式不合法。",
+        }
+    },
+)
+def parse_ontology(payload: OntologyParseRequest, db: DbSession) -> OntologyParseResult:
+    try:
+        return SemanticOntologyService(db).parse(payload)
+    except ValueError as exc:
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc
--- a/server/src/app/api/v1/endpoints/orchestrator.py
+++ b/server/src/app/api/v1/endpoints/orchestrator.py
@@ -0,0 +1,33 @@
+from __future__ import annotations
+
+from typing import Annotated
+
+from fastapi import APIRouter, Depends, HTTPException, status
+from sqlalchemy.orm import Session
+
+from app.api.deps import get_db
+from app.schemas.common import ErrorResponse
+from app.schemas.orchestrator import OrchestratorRequest, OrchestratorResponse
+from app.services.orchestrator import OrchestratorService
+
+router = APIRouter(prefix="/orchestrator")
+DbSession = Annotated[Session, Depends(get_db)]
+
+
+@router.post(
+    "/run",
+    response_model=OrchestratorResponse,
+    summary="运行 Orchestrator 统一调度",
+    description="统一接收用户消息、定时任务和系统事件，完成语义解析、权限判断、路由和占位执行。",
+    responses={
+        status.HTTP_400_BAD_REQUEST: {
+            "model": ErrorResponse,
+            "description": "请求缺少 message 或 task_id，无法启动调度。",
+        }
+    },
+)
+def run_orchestrator(payload: OrchestratorRequest, db: DbSession) -> OrchestratorResponse:
+    try:
+        return OrchestratorService(db).run(payload)
+    except ValueError as exc:
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc
--- a/server/src/app/schemas/ontology.py
+++ b/server/src/app/schemas/ontology.py
@@ -0,0 +1,116 @@
+from __future__ import annotations
+
+from typing import Any, Literal
+
+from pydantic import BaseModel, ConfigDict, Field
+
+OntologyScenario = Literal[
+    "expense",
+    "accounts_receivable",
+    "accounts_payable",
+    "knowledge",
+    "unknown",
+]
+OntologyIntent = Literal["query", "explain", "compare", "risk_check", "draft", "operate"]
+OntologyPermissionLevel = Literal["read", "draft_write", "approval_required", "forbidden"]
+OntologyParseStrategy = Literal["llm_primary", "rule_fallback"]
+
+
+class OntologyEntity(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+
+    type: str = Field(description="业务对象类型，例如 employee / customer / vendor。")
+    value: str = Field(description="从原始问题中提取的对象值。")
+    normalized_value: str = Field(description="标准化后的对象值。")
+    role: str = Field(default="target", description="对象角色，例如 target / filter / threshold。")
+    confidence: float = Field(default=0.0, ge=0.0, le=1.0, description="字段级置信度。")
+
+
+class OntologyTimeRange(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+
+    raw: str = Field(default="", description="命中的原始时间表达。")
+    start_date: str | None = Field(default=None, description="ISO 格式起始日期。")
+    end_date: str | None = Field(default=None, description="ISO 格式结束日期。")
+    granularity: str | None = Field(
+        default=None,
+        description="day / week / month / quarter / year。",
+    )
+
+
+class OntologyMetric(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+
+    name: str = Field(description="指标名，例如 amount / count / overdue。")
+    aggregation: str | None = Field(default=None, description="sum / count / max 等聚合口径。")
+    unit: str | None = Field(default=None, description="金额、数量等单位。")
+    sort: str | None = Field(default=None, description="asc / desc 排序方向。")
+    top_n: int | None = Field(default=None, ge=1, description="Top N 口径。")
+
+
+class OntologyConstraint(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+
+    field: str = Field(description="约束字段，例如 department / status / amount。")
+    operator: str = Field(description="操作符，例如 = / > / < / desc。")
+    value: str | int | float | bool = Field(description="约束值。")
+    currency: str | None = Field(default=None, description="金额类约束使用的币种。")
+
+
+class OntologyPermission(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+
+    level: OntologyPermissionLevel = Field(default="read", description="动作权限等级。")
+    allowed: bool = Field(default=True, description="是否可直接执行当前动作。")
+    reason: str = Field(default="", description="权限判断原因。")
+
+
+class OntologyFieldError(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+
+    field: str = Field(description="发生问题的字段。")
+    code: str = Field(description="错误码。")
+    message: str = Field(description="面向前端展示的说明。")
+
+
+class OntologyParseRequest(BaseModel):
+    query: str = Field(min_length=1, description="自然语言问题。")
+    user_id: str | None = Field(default=None, description="当前请求用户 ID。")
+    context_json: dict[str, Any] = Field(
+        default_factory=dict,
+        description="用户上下文，例如角色、部门、是否管理员。",
+    )
+
+
+class OntologyParseResult(BaseModel):
+    scenario: OntologyScenario = Field(default="unknown", description="业务场景。")
+    intent: OntologyIntent = Field(default="query", description="用户意图。")
+    entities: list[OntologyEntity] = Field(default_factory=list, description="业务对象列表。")
+    time_range: OntologyTimeRange = Field(
+        default_factory=OntologyTimeRange,
+        description="时间范围。",
+    )
+    metrics: list[OntologyMetric] = Field(default_factory=list, description="指标解析结果。")
+    constraints: list[OntologyConstraint] = Field(
+        default_factory=list,
+        description="过滤、阈值、排序等约束。",
+    )
+    risk_flags: list[str] = Field(default_factory=list, description="风险信号列表。")
+    permission: OntologyPermission = Field(
+        default_factory=OntologyPermission,
+        description="权限结果。",
+    )
+    confidence: float = Field(default=0.0, ge=0.0, le=1.0, description="整体置信度。")
+    missing_slots: list[str] = Field(default_factory=list, description="继续处理所缺少的关键槽位。")
+    ambiguity: list[str] = Field(default_factory=list, description="当前识别中的潜在歧义。")
+    parse_strategy: OntologyParseStrategy = Field(
+        default="rule_fallback",
+        description="本次语义解析使用的主策略。",
+    )
+    clarification_required: bool = Field(default=False, description="是否需要追问。")
+    clarification_question: str | None = Field(default=None, description="推荐追问问题。")
+    run_id: str = Field(description="关联的 AgentRun.run_id。")
+    field_errors: list[OntologyFieldError] = Field(
+        default_factory=list,
+        description="字段级错误或提示。",
+    )
--- a/server/src/app/schemas/orchestrator.py
+++ b/server/src/app/schemas/orchestrator.py
@@ -0,0 +1,46 @@
+from __future__ import annotations
+
+from typing import Any, Literal
+
+from pydantic import BaseModel, Field
+
+OrchestratorSource = Literal["user_message", "schedule", "system_event"]
+OrchestratorAgent = Literal["user_agent", "hermes"]
+OrchestratorStatus = Literal["succeeded", "blocked", "failed"]
+
+
+class OrchestratorRequest(BaseModel):
+    source: OrchestratorSource = Field(description="请求来源。")
+    user_id: str | None = Field(default=None, description="当前用户 ID，任务触发可为空。")
+    message: str | None = Field(default=None, description="用户消息或任务描述。")
+    task_id: str | None = Field(default=None, description="任务资产 ID，schedule 触发时优先使用。")
+    context_json: dict[str, Any] = Field(
+        default_factory=dict,
+        description="用户上下文、测试开关或调用方附加信息。",
+    )
+
+
+class OrchestratorTraceSummary(BaseModel):
+    scenario: str = Field(description="语义场景。")
+    intent: str = Field(description="语义意图。")
+    tool_count: int = Field(default=0, ge=0, description="工具调用总数。")
+    failed_tool_count: int = Field(default=0, ge=0, description="失败工具调用数量。")
+    selected_capability_codes: list[str] = Field(
+        default_factory=list,
+        description="本次路由命中的能力编码。",
+    )
+    degraded: bool = Field(default=False, description="是否发生降级。")
+
+
+class OrchestratorResponse(BaseModel):
+    run_id: str = Field(description="本次运行的唯一 run_id。")
+    selected_agent: OrchestratorAgent | None = Field(
+        default=None,
+        description="最终路由到的下游 Agent。",
+    )
+    route_reason: str = Field(description="路由原因摘要。")
+    permission_level: str = Field(description="权限级别。")
+    status: OrchestratorStatus = Field(description="最终运行状态。")
+    result: dict[str, Any] = Field(default_factory=dict, description="对前端可直接展示的最小结果。")
+    requires_confirmation: bool = Field(default=False, description="是否需要用户或管理员确认。")
+    trace_summary: OrchestratorTraceSummary = Field(description="简化后的 Trace 摘要。")
--- a/server/src/app/schemas/user_agent.py
+++ b/server/src/app/schemas/user_agent.py
@@ -0,0 +1,58 @@
+from __future__ import annotations
+
+from typing import Any, Literal
+
+from pydantic import BaseModel, Field
+
+from app.schemas.ontology import OntologyParseResult
+
+UserAgentCitationType = Literal["rule", "knowledge"]
+
+
+class UserAgentCitation(BaseModel):
+    source_type: UserAgentCitationType = Field(description="引用来源类型。")
+    code: str = Field(description="来源编码。")
+    title: str = Field(description="来源标题。")
+    version: str | None = Field(default=None, description="引用版本。")
+    updated_at: str | None = Field(default=None, description="来源更新时间。")
+    excerpt: str | None = Field(default=None, description="面向用户展示的引用摘要。")
+
+
+class UserAgentSuggestedAction(BaseModel):
+    label: str = Field(description="建议动作文案。")
+    action_type: str = Field(description="动作类型，例如 open_detail / create_draft。")
+    description: str = Field(default="", description="动作说明。")
+
+
+class UserAgentDraftPayload(BaseModel):
+    draft_type: str = Field(description="草稿类型。")
+    title: str = Field(description="草稿标题。")
+    body: str = Field(description="草稿正文。")
+    confirmation_required: bool = Field(default=True, description="是否需要人工确认。")
+
+
+class UserAgentRequest(BaseModel):
+    run_id: str = Field(description="关联的 AgentRun.run_id。")
+    user_id: str | None = Field(default=None, description="当前请求用户 ID。")
+    message: str = Field(description="原始用户问题。")
+    ontology: OntologyParseResult = Field(description="语义解析结果。")
+    context_json: dict[str, Any] = Field(default_factory=dict, description="附加上下文。")
+    tool_payload: dict[str, Any] = Field(default_factory=dict, description="工具返回的原始结果。")
+    selected_capability_codes: list[str] = Field(
+        default_factory=list,
+        description="本次命中的能力编码。",
+    )
+    degraded: bool = Field(default=False, description="当前是否发生降级。")
+    requires_confirmation: bool = Field(default=False, description="是否要求确认。")
+
+
+class UserAgentResponse(BaseModel):
+    answer: str = Field(description="面向用户展示的自然语言回答。")
+    citations: list[UserAgentCitation] = Field(default_factory=list, description="规则或知识引用。")
+    suggested_actions: list[UserAgentSuggestedAction] = Field(
+        default_factory=list,
+        description="建议的下一步动作。",
+    )
+    draft_payload: UserAgentDraftPayload | None = Field(default=None, description="可选草稿内容。")
+    risk_flags: list[str] = Field(default_factory=list, description="本次回答关联的风险标签。")
+    requires_confirmation: bool = Field(default=False, description="是否需要人工确认。")
--- a/server/src/app/services/ontology.py
+++ b/server/src/app/services/ontology.py
--- a/server/src/app/services/orchestrator.py
+++ b/server/src/app/services/orchestrator.py
@@ -0,0 +1,887 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from datetime import UTC, datetime
+from time import perf_counter
+from typing import Any
+
+from sqlalchemy import func, select
+from sqlalchemy.orm import Session
+
+from app.core.agent_enums import (
+    AgentAssetStatus,
+    AgentAssetType,
+    AgentName,
+    AgentPermissionLevel,
+    AgentRunSource,
+    AgentRunStatus,
+    AgentToolType,
+)
+from app.core.logging import get_logger
+from app.models.financial_record import (
+    AccountsPayableRecord,
+    AccountsReceivableRecord,
+    ExpenseClaim,
+)
+from app.schemas.agent_asset import AgentAssetListItem, AgentAssetRead
+from app.schemas.ontology import OntologyParseRequest, OntologyParseResult
+from app.schemas.orchestrator import (
+    OrchestratorRequest,
+    OrchestratorResponse,
+    OrchestratorTraceSummary,
+)
+from app.schemas.user_agent import UserAgentRequest, UserAgentResponse
+from app.services.agent_assets import AgentAssetService
+from app.services.agent_foundation import AgentFoundationService
+from app.services.agent_runs import AgentRunService
+from app.services.ontology import SemanticOntologyService
+from app.services.user_agent import UserAgentService
+
+logger = get_logger("app.services.orchestrator")
+
+SCENARIO_TO_DOMAIN = {
+    "expense": "expense",
+    "accounts_receivable": "ar",
+    "accounts_payable": "ap",
+    "knowledge": "knowledge",
+    "unknown": "system",
+}
+
+
+@dataclass(slots=True)
+class ExecutionOutcome:
+    status: str
+    result: dict[str, Any]
+    degraded: bool
+    tool_count: int
+    failed_tool_count: int
+
+
+class OrchestratorService:
+    def __init__(self, db: Session) -> None:
+        self.db = db
+        self.asset_service = AgentAssetService(db)
+        self.run_service = AgentRunService(db)
+        self.ontology_service = SemanticOntologyService(db)
+        self.user_agent_service = UserAgentService(db)
+
+    def run(self, payload: OrchestratorRequest) -> OrchestratorResponse:
+        AgentFoundationService(self.db).ensure_foundation_ready()
+        route_json: dict[str, Any] = {
+            "orchestrated_by": AgentName.ORCHESTRATOR.value,
+            "stage": "created",
+        }
+        run = self.run_service.create_run(
+            agent=AgentName.ORCHESTRATOR.value,
+            source=payload.source,
+            user_id=payload.user_id,
+            task_id=payload.task_id,
+            ontology_json={},
+            route_json=route_json,
+            permission_level=AgentPermissionLevel.READ.value,
+            status=AgentRunStatus.RUNNING.value,
+            result_summary="Orchestrator 已接收请求。",
+        )
+
+        try:
+            message, task_asset = self._resolve_message(payload)
+            ontology = self.ontology_service.parse_for_run(
+                OntologyParseRequest(
+                    query=message,
+                    user_id=payload.user_id,
+                    context_json=payload.context_json,
+                ),
+                run_id=run.run_id,
+            )
+            if payload.context_json.get("simulate_orchestrator_exception"):
+                raise RuntimeError("simulated orchestrator exception")
+            selected_agent, route_reason = self._select_agent(payload, ontology)
+            capabilities = self._select_capabilities(
+                payload=payload,
+                ontology=ontology,
+                task_asset=task_asset,
+            )
+            selected_capability_codes = self._flatten_capability_codes(capabilities)
+            requires_confirmation = (
+                ontology.permission.level == AgentPermissionLevel.APPROVAL_REQUIRED.value
+            )
+
+            route_json = {
+                "orchestrated_by": AgentName.ORCHESTRATOR.value,
+                "stage": "routed",
+                "selected_agent": selected_agent,
+                "route_reason": route_reason,
+                "selected_capability_codes": selected_capability_codes,
+                "ontology_run_id": ontology.run_id,
+            }
+
+            if ontology.permission.level == AgentPermissionLevel.FORBIDDEN.value:
+                outcome = ExecutionOutcome(
+                    status=AgentRunStatus.BLOCKED.value,
+                    result={
+                        "message": ontology.permission.reason,
+                        "clarification_question": ontology.clarification_question,
+                        "degraded": False,
+                    },
+                    degraded=False,
+                    tool_count=0,
+                    failed_tool_count=0,
+                )
+                selected_agent = None
+                route_reason = "permission_forbidden"
+                route_json["stage"] = "blocked"
+                route_json["route_reason"] = route_reason
+            elif ontology.clarification_required:
+                outcome = ExecutionOutcome(
+                    status=AgentRunStatus.BLOCKED.value,
+                    result={
+                        "message": ontology.clarification_question or "需要补充更多上下文。",
+                        "clarification_required": True,
+                        "missing_slots": ontology.missing_slots,
+                        "ambiguity": ontology.ambiguity,
+                        "parse_strategy": ontology.parse_strategy,
+                        "degraded": False,
+                    },
+                    degraded=False,
+                    tool_count=0,
+                    failed_tool_count=0,
+                )
+                route_reason = "clarification_required"
+                route_json["stage"] = "clarification"
+                route_json["route_reason"] = route_reason
+            elif selected_agent == AgentName.HERMES.value:
+                outcome = self._execute_hermes(
+                    payload=payload,
+                    run_id=run.run_id,
+                    ontology=ontology,
+                    capabilities=capabilities,
+                    requires_confirmation=requires_confirmation,
+                    task_asset=task_asset,
+                )
+            else:
+                outcome = self._execute_user_agent(
+                    payload=payload,
+                    run_id=run.run_id,
+                    ontology=ontology,
+                    capabilities=capabilities,
+                    requires_confirmation=requires_confirmation,
+                )
+
+            final_status = (
+                AgentRunStatus.BLOCKED.value
+                if requires_confirmation
+                and outcome.status == AgentRunStatus.SUCCEEDED.value
+                and ontology.permission.level == AgentPermissionLevel.APPROVAL_REQUIRED.value
+                else outcome.status
+            )
+            result_message = (
+                str(outcome.result.get("message", "")).strip()
+                or "Orchestrator 执行完成。"
+            )
+            self.run_service.update_run(
+                run.run_id,
+                agent=selected_agent or AgentName.ORCHESTRATOR.value,
+                ontology_json=self._build_ontology_json(ontology),
+                route_json={
+                    **route_json,
+                    "requires_confirmation": requires_confirmation,
+                    "degraded": outcome.degraded,
+                },
+                permission_level=ontology.permission.level,
+                status=final_status,
+                result_summary=result_message,
+                error_message=None,
+                finished_at=datetime.now(UTC),
+            )
+            return OrchestratorResponse(
+                run_id=run.run_id,
+                selected_agent=selected_agent,
+                route_reason=route_reason,
+                permission_level=ontology.permission.level,
+                status=self._normalize_response_status(final_status),
+                result=outcome.result,
+                requires_confirmation=requires_confirmation,
+                trace_summary=OrchestratorTraceSummary(
+                    scenario=ontology.scenario,
+                    intent=ontology.intent,
+                    tool_count=outcome.tool_count,
+                    failed_tool_count=outcome.failed_tool_count,
+                    selected_capability_codes=selected_capability_codes,
+                    degraded=outcome.degraded,
+                ),
+            )
+        except Exception as exc:
+            logger.exception("Orchestrator run failed run_id=%s", run.run_id)
+            self.run_service.update_run(
+                run.run_id,
+                agent=AgentName.ORCHESTRATOR.value,
+                route_json={**route_json, "stage": "failed"},
+                status=AgentRunStatus.FAILED.value,
+                result_summary="Orchestrator 执行失败。",
+                error_message=str(exc),
+                finished_at=datetime.now(UTC),
+            )
+            return OrchestratorResponse(
+                run_id=run.run_id,
+                selected_agent=None,
+                route_reason="orchestrator_exception",
+                permission_level=AgentPermissionLevel.READ.value,
+                status="failed",
+                result={"message": f"Orchestrator 执行失败：{exc}"},
+                requires_confirmation=False,
+                trace_summary=OrchestratorTraceSummary(
+                    scenario="unknown",
+                    intent="query",
+                    tool_count=0,
+                    failed_tool_count=0,
+                    selected_capability_codes=[],
+                    degraded=False,
+                ),
+            )
+
+    def _resolve_message(
+        self,
+        payload: OrchestratorRequest,
+    ) -> tuple[str, AgentAssetRead | None]:
+        task_asset = None
+        if payload.task_id:
+            task_asset = self.asset_service.get_asset(payload.task_id)
+
+        if payload.message and payload.message.strip():
+            return payload.message.strip(), task_asset
+
+        if task_asset is not None:
+            description = str(task_asset.description or "").strip()
+            scenario_text = " ".join(str(item) for item in task_asset.scenario_json)
+            message = f"{task_asset.name} {description} {scenario_text}".strip()
+            return message, task_asset
+
+        if payload.source == AgentRunSource.SCHEDULE.value:
+            return "定时风险巡检任务", task_asset
+
+        raise ValueError("message 或 task_id 至少需要提供一个。")
+
+    @staticmethod
+    def _select_agent(
+        payload: OrchestratorRequest,
+        ontology: OntologyParseResult,
+    ) -> tuple[str, str]:
+        if payload.source == AgentRunSource.SCHEDULE.value:
+            return AgentName.HERMES.value, "schedule_source_defaults_to_hermes"
+        if payload.source == AgentRunSource.SYSTEM_EVENT.value and ontology.intent == "risk_check":
+            return AgentName.HERMES.value, "system_event_risk_check_routes_to_hermes"
+        if ontology.intent == "risk_check" and payload.source == AgentRunSource.SCHEDULE.value:
+            return AgentName.HERMES.value, "scheduled_risk_check_routes_to_hermes"
+        if ontology.intent in {"query", "explain", "draft", "compare", "operate"}:
+            return AgentName.USER_AGENT.value, f"{ontology.intent}_routes_to_user_agent"
+        return AgentName.USER_AGENT.value, "user_message_defaults_to_user_agent"
+
+    def _select_capabilities(
+        self,
+        *,
+        payload: OrchestratorRequest,
+        ontology: OntologyParseResult,
+        task_asset: AgentAssetRead | None,
+    ) -> dict[str, list[AgentAssetListItem | AgentAssetRead]]:
+        domain_value = SCENARIO_TO_DOMAIN.get(ontology.scenario)
+        rules = self._rank_assets(
+            self.asset_service.list_assets(
+                asset_type=AgentAssetType.RULE.value,
+                status=AgentAssetStatus.ACTIVE.value,
+                domain=domain_value if domain_value not in {"knowledge", "system"} else None,
+            ),
+            ontology,
+        )
+        skills = self._rank_assets(
+            self.asset_service.list_assets(
+                asset_type=AgentAssetType.SKILL.value,
+                status=AgentAssetStatus.ACTIVE.value,
+                domain=domain_value if domain_value not in {"system"} else None,
+            ),
+            ontology,
+        )
+        mcps = self._rank_assets(
+            self.asset_service.list_assets(
+                asset_type=AgentAssetType.MCP.value,
+                status=AgentAssetStatus.ACTIVE.value,
+            ),
+            ontology,
+        )
+        tasks: list[AgentAssetListItem | AgentAssetRead] = []
+        if task_asset is not None and task_asset.status == AgentAssetStatus.ACTIVE.value:
+            tasks.append(task_asset)
+        elif payload.source == AgentRunSource.SCHEDULE.value:
+            tasks = self._rank_assets(
+                self.asset_service.list_assets(
+                    asset_type=AgentAssetType.TASK.value,
+                    status=AgentAssetStatus.ACTIVE.value,
+                ),
+                ontology,
+            )
+
+        return {
+            "rules": rules,
+            "skills": skills,
+            "mcps": mcps,
+            "tasks": tasks,
+        }
+
+    def _execute_user_agent(
+        self,
+        *,
+        payload: OrchestratorRequest,
+        run_id: str,
+        ontology: OntologyParseResult,
+        capabilities: dict[str, list[AgentAssetListItem | AgentAssetRead]],
+        requires_confirmation: bool,
+    ) -> ExecutionOutcome:
+        selected_capability_codes = self._flatten_capability_codes(capabilities)
+        if requires_confirmation:
+            response, degraded = self._invoke_tool(
+                run_id=run_id,
+                tool_type=AgentToolType.LLM.value,
+                tool_name="user_agent.confirmation_placeholder",
+                request_json={
+                    "message": payload.message,
+                    "permission_level": ontology.permission.level,
+                },
+                context_json=payload.context_json,
+                executor=lambda: {
+                    "confirmation_title": "操作需要确认",
+                    "message": f"{ontology.permission.reason} 当前仅返回确认摘要，不直接执行动作。",
+                },
+                fallback_factory=lambda exc: {
+                    "confirmation_title": "操作需要确认",
+                    "message": f"确认摘要生成失败，已阻断自动执行：{exc}",
+                },
+            )
+            return ExecutionOutcome(
+                status=AgentRunStatus.BLOCKED.value,
+                result={**response, "degraded": degraded},
+                degraded=degraded,
+                tool_count=1,
+                failed_tool_count=1 if degraded else 0,
+            )
+
+        next_step = self._resolve_next_step(ontology, payload.source)
+        if next_step == "query_database":
+            tool_payload, degraded = self._invoke_tool(
+                run_id=run_id,
+                tool_type=AgentToolType.DATABASE.value,
+                tool_name=self._database_tool_name(ontology.scenario),
+                request_json=self._build_ontology_json(ontology),
+                context_json=payload.context_json,
+                executor=lambda: self._build_database_answer(ontology),
+                fallback_factory=lambda exc: {
+                    "message": f"数据库查询暂时不可用，已返回降级说明：{exc}",
+                    "degraded": True,
+                },
+            )
+            result = self._build_user_agent_result(
+                self.user_agent_service.respond(
+                    UserAgentRequest(
+                        run_id=run_id,
+                        user_id=payload.user_id,
+                        message=payload.message or "",
+                        ontology=ontology,
+                        context_json=payload.context_json,
+                        tool_payload=tool_payload,
+                        selected_capability_codes=selected_capability_codes,
+                        degraded=degraded,
+                        requires_confirmation=requires_confirmation,
+                    )
+                ),
+                degraded=degraded,
+            )
+            return ExecutionOutcome(
+                status=AgentRunStatus.SUCCEEDED.value,
+                result=result,
+                degraded=degraded,
+                tool_count=1,
+                failed_tool_count=1 if degraded else 0,
+            )
+
+        if next_step == "search_knowledge":
+            tool_payload, degraded = self._invoke_tool(
+                run_id=run_id,
+                tool_type=AgentToolType.DATABASE.value,
+                tool_name="knowledge.search",
+                request_json=self._build_ontology_json(ontology),
+                context_json=payload.context_json,
+                executor=lambda: self._build_knowledge_answer(ontology, capabilities),
+                fallback_factory=lambda exc: {
+                    "message": f"知识检索暂时不可用，建议稍后重试：{exc}",
+                    "degraded": True,
+                },
+            )
+            result = self._build_user_agent_result(
+                self.user_agent_service.respond(
+                    UserAgentRequest(
+                        run_id=run_id,
+                        user_id=payload.user_id,
+                        message=payload.message or "",
+                        ontology=ontology,
+                        context_json=payload.context_json,
+                        tool_payload=tool_payload,
+                        selected_capability_codes=selected_capability_codes,
+                        degraded=degraded,
+                        requires_confirmation=requires_confirmation,
+                    )
+                ),
+                degraded=degraded,
+            )
+            return ExecutionOutcome(
+                status=AgentRunStatus.SUCCEEDED.value,
+                result=result,
+                degraded=degraded,
+                tool_count=1,
+                failed_tool_count=1 if degraded else 0,
+            )
+
+        if next_step == "run_rule":
+            tool_payload, degraded = self._invoke_tool(
+                run_id=run_id,
+                tool_type=AgentToolType.RULE_ENGINE.value,
+                tool_name=self._rule_tool_name(capabilities),
+                request_json=self._build_ontology_json(ontology),
+                context_json=payload.context_json,
+                executor=lambda: self._build_rule_answer(ontology),
+                fallback_factory=lambda exc: {
+                    "message": f"规则检查暂时不可用，已返回人工复核建议：{exc}",
+                    "degraded": True,
+                },
+            )
+            result = self._build_user_agent_result(
+                self.user_agent_service.respond(
+                    UserAgentRequest(
+                        run_id=run_id,
+                        user_id=payload.user_id,
+                        message=payload.message or "",
+                        ontology=ontology,
+                        context_json=payload.context_json,
+                        tool_payload=tool_payload,
+                        selected_capability_codes=selected_capability_codes,
+                        degraded=degraded,
+                        requires_confirmation=requires_confirmation,
+                    )
+                ),
+                degraded=degraded,
+            )
+            return ExecutionOutcome(
+                status=AgentRunStatus.SUCCEEDED.value,
+                result=result,
+                degraded=degraded,
+                tool_count=1,
+                failed_tool_count=1 if degraded else 0,
+            )
+
+        tool_payload, degraded = self._invoke_tool(
+            run_id=run_id,
+            tool_type=AgentToolType.LLM.value,
+            tool_name="user_agent.draft_placeholder",
+            request_json=self._build_ontology_json(ontology),
+            context_json=payload.context_json,
+            executor=lambda: {
+                "message": (
+                    f"已生成 {ontology.scenario} 场景草稿，"
+                    "占位能力后续由 Day 5 User Agent 接管。"
+                ),
+                "draft_only": True,
+            },
+            fallback_factory=lambda exc: {
+                "message": f"草稿生成暂时不可用，请稍后再试：{exc}",
+                "degraded": True,
+            },
+        )
+        result = self._build_user_agent_result(
+            self.user_agent_service.respond(
+                UserAgentRequest(
+                    run_id=run_id,
+                    user_id=payload.user_id,
+                    message=payload.message or "",
+                    ontology=ontology,
+                    context_json=payload.context_json,
+                    tool_payload=tool_payload,
+                    selected_capability_codes=selected_capability_codes,
+                    degraded=degraded,
+                    requires_confirmation=requires_confirmation,
+                )
+            ),
+            degraded=degraded,
+        )
+        return ExecutionOutcome(
+            status=AgentRunStatus.SUCCEEDED.value,
+            result=result,
+            degraded=degraded,
+            tool_count=1,
+            failed_tool_count=1 if degraded else 0,
+        )
+
+    def _execute_hermes(
+        self,
+        *,
+        payload: OrchestratorRequest,
+        run_id: str,
+        ontology: OntologyParseResult,
+        capabilities: dict[str, list[AgentAssetListItem | AgentAssetRead]],
+        requires_confirmation: bool,
+        task_asset: AgentAssetRead | None,
+    ) -> ExecutionOutcome:
+        if requires_confirmation:
+            return ExecutionOutcome(
+                status=AgentRunStatus.BLOCKED.value,
+                result={
+                    "message": "Hermes 不会自动执行需要确认的高风险动作，已阻断。",
+                    "degraded": False,
+                },
+                degraded=False,
+                tool_count=0,
+                failed_tool_count=0,
+            )
+
+        rule_response, rule_degraded = self._invoke_tool(
+            run_id=run_id,
+            tool_type=AgentToolType.RULE_ENGINE.value,
+            tool_name=self._rule_tool_name(capabilities),
+            request_json=self._build_ontology_json(ontology),
+            context_json=payload.context_json,
+            executor=lambda: self._build_rule_answer(ontology),
+            fallback_factory=lambda exc: {
+                "message": f"规则巡检失败，已降级为待人工复核：{exc}",
+                "degraded": True,
+            },
+        )
+        mcp_response, mcp_degraded = self._invoke_tool(
+            run_id=run_id,
+            tool_type=AgentToolType.MCP.value,
+            tool_name=self._mcp_tool_name(capabilities),
+            request_json={
+                "task_code": task_asset.code if task_asset is not None else "",
+                "scenario": ontology.scenario,
+            },
+            context_json=payload.context_json,
+            executor=lambda: self._build_mcp_answer(task_asset, ontology),
+            fallback_factory=lambda exc: {
+                "message": f"MCP 调用失败，已使用缓存快照降级：{exc}",
+                "fallback": "used_cached_snapshot",
+            },
+        )
+        degraded = rule_degraded or mcp_degraded
+        failed_tool_count = int(rule_degraded) + int(mcp_degraded)
+        result = {
+            "message": self._build_hermes_message(
+                task_asset=task_asset,
+                ontology=ontology,
+                rule_response=rule_response,
+                mcp_response=mcp_response,
+                degraded=degraded,
+            ),
+            "report_type": task_asset.code if task_asset is not None else "hermes_runtime",
+            "degraded": degraded,
+        }
+        return ExecutionOutcome(
+            status=AgentRunStatus.SUCCEEDED.value,
+            result=result,
+            degraded=degraded,
+            tool_count=2,
+            failed_tool_count=failed_tool_count,
+        )
+
+    @staticmethod
+    def _resolve_next_step(ontology: OntologyParseResult, source: str) -> str:
+        if ontology.clarification_required:
+            return "ask_clarification"
+        if ontology.intent == "draft":
+            return "create_draft"
+        if ontology.scenario == "knowledge" or ontology.intent == "explain":
+            return "search_knowledge"
+        if ontology.intent == "risk_check" or source == AgentRunSource.SCHEDULE.value:
+            return "run_rule"
+        if ontology.intent in {"query", "compare"}:
+            return "query_database"
+        return "create_draft"
+
+    @staticmethod
+    def _flatten_capability_codes(
+        capabilities: dict[str, list[AgentAssetListItem | AgentAssetRead]],
+    ) -> list[str]:
+        codes: list[str] = []
+        for items in capabilities.values():
+            for item in items[:2]:
+                if item.code not in codes:
+                    codes.append(item.code)
+        return codes
+
+    def _rank_assets(
+        self,
+        items: list[AgentAssetListItem],
+        ontology: OntologyParseResult,
+    ) -> list[AgentAssetListItem]:
+        def score(item: AgentAssetListItem) -> tuple[int, str]:
+            item_tags = {str(value) for value in item.scenario_json or []}
+            weight = 0
+            if ontology.scenario in item_tags:
+                weight += 3
+            if ontology.intent in item_tags:
+                weight += 2
+            for risk_flag in ontology.risk_flags:
+                if risk_flag in item_tags:
+                    weight += 4
+            return weight, item.code
+
+        ranked = sorted(items, key=score, reverse=True)
+        if not ranked:
+            return []
+        scored = [item for item in ranked if score(item)[0] > 0]
+        return scored or ranked[:1]
+
+    def _invoke_tool(
+        self,
+        *,
+        run_id: str,
+        tool_type: str,
+        tool_name: str,
+        request_json: dict[str, Any],
+        context_json: dict[str, Any],
+        executor,
+        fallback_factory,
+    ) -> tuple[dict[str, Any], bool]:
+        started = perf_counter()
+        try:
+            self._maybe_raise_simulated_failure(tool_type, context_json)
+            response = executor()
+            duration_ms = int((perf_counter() - started) * 1000)
+            self.run_service.record_tool_call(
+                run_id=run_id,
+                tool_type=tool_type,
+                tool_name=tool_name,
+                request_json=request_json,
+                response_json=response,
+                status="succeeded",
+                duration_ms=duration_ms,
+            )
+            return response, False
+        except Exception as exc:
+            duration_ms = int((perf_counter() - started) * 1000)
+            response = fallback_factory(exc)
+            self.run_service.record_tool_call(
+                run_id=run_id,
+                tool_type=tool_type,
+                tool_name=tool_name,
+                request_json=request_json,
+                response_json=response,
+                status="failed",
+                duration_ms=duration_ms,
+                error_message=str(exc),
+            )
+            return response, True
+
+    @staticmethod
+    def _maybe_raise_simulated_failure(tool_type: str, context_json: dict[str, Any]) -> None:
+        expected = str(context_json.get("simulate_tool_failure") or "").strip().lower()
+        if not expected:
+            return
+        if expected == tool_type.lower():
+            raise RuntimeError(f"simulated {tool_type} failure")
+
+    def _build_database_answer(self, ontology: OntologyParseResult) -> dict[str, Any]:
+        if ontology.scenario == "expense":
+            count_stmt = select(func.count()).select_from(ExpenseClaim)
+            amount_stmt = select(
+                func.coalesce(func.sum(ExpenseClaim.amount), 0)
+            ).select_from(ExpenseClaim)
+            employee_names = [
+                item.normalized_value
+                for item in ontology.entities
+                if item.type == "employee"
+            ]
+            if employee_names:
+                count_stmt = count_stmt.where(ExpenseClaim.employee_name.in_(employee_names))
+                amount_stmt = amount_stmt.where(ExpenseClaim.employee_name.in_(employee_names))
+            total_count = int(self.db.scalar(count_stmt) or 0)
+            total_amount = float(self.db.scalar(amount_stmt) or 0)
+            return {
+                "record_count": total_count,
+                "total_amount": round(total_amount, 2),
+            }
+
+        if ontology.scenario == "accounts_receivable":
+            total_count = int(
+                self.db.scalar(
+                    select(func.count()).select_from(AccountsReceivableRecord)
+                )
+                or 0
+            )
+            total_amount = float(
+                self.db.scalar(
+                    select(func.coalesce(func.sum(AccountsReceivableRecord.amount_outstanding), 0))
+                )
+                or 0
+            )
+            return {
+                "record_count": total_count,
+                "outstanding_amount": round(total_amount, 2),
+            }
+
+        total_count = int(
+            self.db.scalar(select(func.count()).select_from(AccountsPayableRecord))
+            or 0
+        )
+        total_amount = float(
+            self.db.scalar(
+                select(func.coalesce(func.sum(AccountsPayableRecord.amount_outstanding), 0))
+            )
+            or 0
+        )
+        return {
+            "record_count": total_count,
+            "outstanding_amount": round(total_amount, 2),
+        }
+
+    @staticmethod
+    def _build_user_query_result(
+        ontology: OntologyParseResult,
+        response: dict[str, Any],
+    ) -> dict[str, Any]:
+        if ontology.scenario == "expense":
+            return {
+                "message": (
+                    f"已路由到 User Agent，占位查询结果：命中 {response['record_count']} 笔报销，"
+                    f"金额合计 {response['total_amount']} 元。"
+                ),
+                "data": response,
+            }
+        if ontology.scenario == "accounts_receivable":
+            return {
+                "message": (
+                    f"已路由到 User Agent，占位查询结果：命中 {response['record_count']} 条应收，"
+                    f"未回款金额 {response['outstanding_amount']} 元。"
+                ),
+                "data": response,
+            }
+        return {
+            "message": (
+                f"已路由到 User Agent，占位查询结果：命中 {response['record_count']} 条应付，"
+                f"待付金额 {response['outstanding_amount']} 元。"
+            ),
+            "data": response,
+        }
+
+    @staticmethod
+    def _build_user_agent_result(
+        response: UserAgentResponse,
+        *,
+        degraded: bool,
+    ) -> dict[str, Any]:
+        result = {
+            "message": response.answer,
+            "answer": response.answer,
+            "citations": [item.model_dump() for item in response.citations],
+            "suggested_actions": [item.model_dump() for item in response.suggested_actions],
+            "risk_flags": response.risk_flags,
+            "requires_confirmation": response.requires_confirmation,
+            "degraded": degraded,
+        }
+        if response.draft_payload is not None:
+            result["draft_payload"] = response.draft_payload.model_dump()
+        return result
+
+    @staticmethod
+    def _build_knowledge_answer(
+        ontology: OntologyParseResult,
+        capabilities: dict[str, list[AgentAssetListItem | AgentAssetRead]],
+    ) -> dict[str, Any]:
+        referenced = [item.code for item in capabilities["rules"][:1]] or [
+            "knowledge.policy.default"
+        ]
+        return {
+            "message": f"已路由到 User Agent，占位知识结果：建议先查看 {', '.join(referenced)}。",
+            "references": referenced,
+        }
+
+    @staticmethod
+    def _build_rule_answer(ontology: OntologyParseResult) -> dict[str, Any]:
+        risk_text = (
+            "、".join(ontology.risk_flags)
+            if ontology.risk_flags
+            else "未识别到明确风险标签"
+        )
+        return {
+            "message": f"已完成占位规则检查，风险标签：{risk_text}。",
+            "risk_flags": ontology.risk_flags,
+        }
+
+    @staticmethod
+    def _build_mcp_answer(
+        task_asset: AgentAssetRead | None,
+        ontology: OntologyParseResult,
+    ) -> dict[str, Any]:
+        return {
+            "message": (
+                f"已调用占位 MCP 快照，任务={task_asset.code if task_asset else 'none'}，"
+                f"scenario={ontology.scenario}。"
+            ),
+            "snapshot": "stubbed",
+        }
+
+    @staticmethod
+    def _build_hermes_message(
+        *,
+        task_asset: AgentAssetRead | None,
+        ontology: OntologyParseResult,
+        rule_response: dict[str, Any],
+        mcp_response: dict[str, Any],
+        degraded: bool,
+    ) -> str:
+        task_code = task_asset.code if task_asset is not None else "task.unspecified"
+        suffix = "，其中部分能力已降级。" if degraded else "。"
+        return (
+            f"Hermes 占位执行完成：任务 {task_code}，"
+            f"场景 {ontology.scenario}，规则结果={rule_response.get('message', '')}，"
+            f"MCP 结果={mcp_response.get('message', '')}{suffix}"
+        )
+
+    @staticmethod
+    def _database_tool_name(scenario: str) -> str:
+        if scenario == "expense":
+            return "database.expense_claims.lookup"
+        if scenario == "accounts_receivable":
+            return "database.accounts_receivable.lookup"
+        return "database.accounts_payable.lookup"
+
+    @staticmethod
+    def _rule_tool_name(
+        capabilities: dict[str, list[AgentAssetListItem | AgentAssetRead]],
+    ) -> str:
+        if capabilities["rules"]:
+            return capabilities["rules"][0].code
+        return "rule_engine.default_risk_check"
+
+    @staticmethod
+    def _mcp_tool_name(
+        capabilities: dict[str, list[AgentAssetListItem | AgentAssetRead]],
+    ) -> str:
+        if capabilities["mcps"]:
+            return capabilities["mcps"][0].code
+        return "mcp.default_snapshot"
+
+    @staticmethod
+    def _build_ontology_json(ontology: OntologyParseResult) -> dict[str, Any]:
+        return {
+            "scenario": ontology.scenario,
+            "intent": ontology.intent,
+            "entities": [item.model_dump() for item in ontology.entities],
+            "time_range": ontology.time_range.model_dump(),
+            "metrics": [item.model_dump() for item in ontology.metrics],
+            "constraints": [item.model_dump() for item in ontology.constraints],
+            "risk_flags": ontology.risk_flags,
+            "permission": ontology.permission.model_dump(),
+        }
+
+    @staticmethod
+    def _normalize_response_status(status: str) -> str:
+        if status == AgentRunStatus.FAILED.value:
+            return "failed"
+        if status == AgentRunStatus.BLOCKED.value:
+            return "blocked"
+        return "succeeded"
--- a/server/src/app/services/runtime_chat.py
+++ b/server/src/app/services/runtime_chat.py
@@ -0,0 +1,252 @@
+from __future__ import annotations
+
+from http import HTTPStatus
+from typing import Any
+
+from sqlalchemy.orm import Session
+
+from app.core.logging import get_logger
+from app.services.model_connectivity import (
+    AZURE_API_VERSION,
+    ConnectivityCheckError,
+    _build_azure_deployment_base,
+    _build_headers,
+    _ensure_path,
+    _normalize_endpoint,
+    _send_json_request,
+)
+from app.services.settings import SettingsService
+
+logger = get_logger("app.services.runtime_chat")
+
+
+class RuntimeChatService:
+    def __init__(self, db: Session) -> None:
+        self.db = db
+        self.settings_service = SettingsService(db)
+
+    def complete(
+        self,
+        messages: list[dict[str, str]],
+        *,
+        slot_priority: tuple[str, ...] = ("main", "backup"),
+        max_tokens: int = 500,
+        temperature: float = 0.2,
+    ) -> str | None:
+        for slot in slot_priority:
+            config = self._load_chat_slot(slot)
+            if config is None:
+                continue
+
+            try:
+                response_text = self._request_chat_completion(
+                    config,
+                    messages,
+                    max_tokens=max_tokens,
+                    temperature=temperature,
+                )
+            except Exception as exc:
+                logger.warning(
+                    "Runtime chat request failed slot=%s provider=%s: %s",
+                    slot,
+                    config["provider"],
+                    exc,
+                )
+                continue
+
+            if response_text:
+                return response_text.strip()
+
+        return None
+
+    def _load_chat_slot(self, slot: str) -> dict[str, str] | None:
+        try:
+            config = self.settings_service.get_runtime_model_config(slot)
+        except ValueError:
+            return None
+
+        if config["capability"] != "chat":
+            return None
+
+        provider = str(config["provider"] or "").strip()
+        endpoint = str(config["endpoint"] or "").strip()
+        model = str(config["model"] or "").strip()
+        api_key = str(config["apiKey"] or "").strip()
+
+        if not provider or not endpoint or not model:
+            return None
+
+        if provider != "Ollama" and not api_key:
+            logger.info("Skip runtime chat slot=%s because api key is empty", slot)
+            return None
+
+        return {
+            "slot": slot,
+            "provider": provider,
+            "endpoint": endpoint,
+            "model": model,
+            "apiKey": api_key,
+        }
+
+    def _request_chat_completion(
+        self,
+        config: dict[str, str],
+        messages: list[dict[str, str]],
+        *,
+        max_tokens: int,
+        temperature: float,
+    ) -> str:
+        provider = config["provider"]
+        endpoint = config["endpoint"]
+        model = config["model"]
+        api_key = config["apiKey"]
+
+        if provider == "Azure OpenAI":
+            return self._request_azure_openai(
+                endpoint=endpoint,
+                model=model,
+                api_key=api_key,
+                messages=messages,
+                max_tokens=max_tokens,
+                temperature=temperature,
+            )
+
+        if provider == "Ollama":
+            return self._request_ollama(
+                endpoint=endpoint,
+                model=model,
+                api_key=api_key,
+                messages=messages,
+                max_tokens=max_tokens,
+                temperature=temperature,
+            )
+
+        return self._request_openai_compatible(
+            endpoint=endpoint,
+            model=model,
+            api_key=api_key,
+            messages=messages,
+            max_tokens=max_tokens,
+            temperature=temperature,
+        )
+
+    def _request_openai_compatible(
+        self,
+        *,
+        endpoint: str,
+        model: str,
+        api_key: str,
+        messages: list[dict[str, str]],
+        max_tokens: int,
+        temperature: float,
+    ) -> str:
+        url = _ensure_path(_normalize_endpoint(endpoint), "chat/completions")
+        status_code, payload = _send_json_request(
+            "POST",
+            url,
+            headers=_build_headers(api_key=api_key, use_bearer=True),
+            payload={
+                "model": model,
+                "messages": messages,
+                "max_tokens": max_tokens,
+                "temperature": temperature,
+            },
+        )
+        if status_code >= HTTPStatus.BAD_REQUEST:
+            raise ConnectivityCheckError(
+                f"模型接口返回异常状态 {status_code}。",
+                status_code=status_code,
+            )
+        return self._extract_openai_text(payload)
+
+    def _request_ollama(
+        self,
+        *,
+        endpoint: str,
+        model: str,
+        api_key: str,
+        messages: list[dict[str, str]],
+        max_tokens: int,
+        temperature: float,
+    ) -> str:
+        url = _ensure_path(_normalize_endpoint(endpoint), "api/chat")
+        status_code, payload = _send_json_request(
+            "POST",
+            url,
+            headers=_build_headers(api_key=api_key, use_bearer=False),
+            payload={
+                "model": model,
+                "messages": messages,
+                "stream": False,
+                "options": {
+                    "num_predict": max_tokens,
+                    "temperature": temperature,
+                },
+            },
+        )
+        if status_code >= HTTPStatus.BAD_REQUEST:
+            raise ConnectivityCheckError(
+                f"Ollama 返回异常状态 {status_code}。",
+                status_code=status_code,
+            )
+        return str((payload or {}).get("message", {}).get("content", "")).strip()
+
+    def _request_azure_openai(
+        self,
+        *,
+        endpoint: str,
+        model: str,
+        api_key: str,
+        messages: list[dict[str, str]],
+        max_tokens: int,
+        temperature: float,
+    ) -> str:
+        deployment_base = _build_azure_deployment_base(endpoint, model)
+        url = f"{deployment_base}/chat/completions?api-version={AZURE_API_VERSION}"
+        status_code, payload = _send_json_request(
+            "POST",
+            url,
+            headers=_build_headers(api_key=api_key, use_bearer=False, use_api_key=True),
+            payload={
+                "messages": messages,
+                "max_tokens": max_tokens,
+                "temperature": temperature,
+            },
+        )
+        if status_code >= HTTPStatus.BAD_REQUEST:
+            raise ConnectivityCheckError(
+                f"Azure OpenAI 返回异常状态 {status_code}。",
+                status_code=status_code,
+            )
+        return self._extract_openai_text(payload)
+
+    @staticmethod
+    def _extract_openai_text(payload: Any) -> str:
+        if not isinstance(payload, dict):
+            return ""
+
+        choices = payload.get("choices")
+        if not isinstance(choices, list) or not choices:
+            return ""
+
+        first_choice = choices[0]
+        if not isinstance(first_choice, dict):
+            return ""
+
+        message = first_choice.get("message")
+        if isinstance(message, dict):
+            content = message.get("content", "")
+            if isinstance(content, str):
+                return content.strip()
+            if isinstance(content, list):
+                parts: list[str] = []
+                for item in content:
+                    if isinstance(item, dict) and item.get("type") == "text":
+                        parts.append(str(item.get("text", "")))
+                return "\n".join(part.strip() for part in parts if part.strip()).strip()
+
+        text = first_choice.get("text")
+        if isinstance(text, str):
+            return text.strip()
+
+        return ""
--- a/server/src/app/services/user_agent.py
+++ b/server/src/app/services/user_agent.py
@@ -0,0 +1,547 @@
+from __future__ import annotations
+
+import json
+import re
+
+from sqlalchemy.orm import Session
+
+from app.core.agent_enums import AgentAssetStatus, AgentAssetType
+from app.schemas.agent_asset import AgentAssetListItem
+from app.schemas.user_agent import (
+    UserAgentCitation,
+    UserAgentDraftPayload,
+    UserAgentRequest,
+    UserAgentResponse,
+    UserAgentSuggestedAction,
+)
+from app.services.agent_assets import AgentAssetService
+from app.services.agent_foundation import AgentFoundationService
+from app.services.runtime_chat import RuntimeChatService
+
+SCENARIO_LABELS = {
+    "expense": "报销",
+    "accounts_receivable": "应收",
+    "accounts_payable": "应付",
+    "knowledge": "知识",
+    "unknown": "通用",
+}
+
+RISK_REASON_MAP = {
+    "duplicate_expense": "检测到同员工、同金额或近似单据存在重复提交迹象。",
+    "amount_over_limit": "金额超过当前制度或预算阈值，需要补充例外说明。",
+    "invoice_anomaly": "票据或附件完整性不满足当前规则要求，需要补件或人工复核。",
+    "ar_overdue": "应收账款已出现逾期，存在回款延迟风险。",
+    "ap_overdue": "应付付款已出现逾期，可能影响供应商履约或合作关系。",
+}
+
+GENERIC_EXPENSE_PROMPTS = {
+    "报销",
+    "我要报销",
+    "我想报销",
+    "帮我报销",
+    "我要申请报销",
+    "发起报销",
+    "提交报销",
+}
+
+EXPLICIT_DRAFT_KEYWORDS = ("生成", "草稿", "起草", "创建", "发起", "准备")
+
+EXPENSE_TYPE_LABELS = {
+    "travel": "差旅",
+    "hotel": "住宿",
+    "transport": "交通",
+    "meal": "餐费",
+    "meeting": "会务",
+    "entertainment": "招待",
+}
+
+
+class UserAgentService:
+    def __init__(self, db: Session) -> None:
+        self.db = db
+        self.asset_service = AgentAssetService(db)
+        self.runtime_chat_service = RuntimeChatService(db)
+
+    def respond(self, payload: UserAgentRequest) -> UserAgentResponse:
+        AgentFoundationService(self.db).ensure_foundation_ready()
+        citations = self._build_rule_citations(payload)
+        suggested_actions = self._build_suggested_actions(payload)
+        risk_flags = self._resolve_risk_flags(payload)
+        draft_payload = (
+            self._build_draft_payload(payload)
+            if payload.ontology.intent == "draft"
+            else None
+        )
+
+        if payload.degraded and payload.tool_payload.get("message"):
+            return UserAgentResponse(
+                answer=str(payload.tool_payload["message"]),
+                citations=citations,
+                suggested_actions=suggested_actions,
+                risk_flags=risk_flags,
+                requires_confirmation=payload.requires_confirmation,
+            )
+
+        guided_answer = self._build_guided_answer(payload)
+        if guided_answer:
+            return UserAgentResponse(
+                answer=guided_answer,
+                citations=citations,
+                suggested_actions=suggested_actions,
+                draft_payload=draft_payload,
+                risk_flags=risk_flags,
+                requires_confirmation=payload.requires_confirmation,
+            )
+
+        fallback_answer = self._build_fallback_answer(
+            payload,
+            citations=citations,
+            draft_payload=draft_payload,
+        )
+        answer = self._generate_answer_with_model(
+            payload,
+            citations=citations,
+            suggested_actions=suggested_actions,
+            risk_flags=risk_flags,
+            draft_payload=draft_payload,
+            fallback_answer=fallback_answer,
+        )
+
+        return UserAgentResponse(
+            answer=answer or fallback_answer,
+            citations=citations,
+            suggested_actions=suggested_actions,
+            draft_payload=draft_payload,
+            risk_flags=risk_flags,
+            requires_confirmation=payload.requires_confirmation,
+        )
+
+    def _build_fallback_answer(
+        self,
+        payload: UserAgentRequest,
+        *,
+        citations: list[UserAgentCitation],
+        draft_payload: UserAgentDraftPayload | None,
+    ) -> str:
+        if payload.ontology.intent in {"query", "compare"}:
+            return self._build_query_answer(payload)
+
+        if payload.ontology.intent == "risk_check":
+            return self._build_risk_answer(payload, citations)
+
+        if payload.ontology.intent == "draft" and draft_payload is not None:
+            return (
+                f"已生成 {draft_payload.title}，当前仅返回待人工确认的草稿内容，"
+                "仍需人工确认后再进入正式流程。"
+            )
+
+        return self._build_explain_answer(payload, citations)
+
+    def _build_guided_answer(self, payload: UserAgentRequest) -> str | None:
+        if not self._is_generic_expense_prompt(payload):
+            return self._build_implicit_expense_draft_guidance(payload)
+
+        attachment_names = self._resolve_attachment_names(payload)
+        attachment_hint = ""
+        if attachment_names:
+            attachment_hint = (
+                f" 我已带入 {len(attachment_names)} 份附件名称，但目前还不能直接读取附件内容，"
+                "仍需要你补充关键信息。"
+            )
+
+        return (
+            "可以帮你发起报销。请补充费用类型、发生时间、金额、事由和相关对象，"
+            "或者直接上传票据附件，我再继续帮你判断能否报、缺什么材料以及生成报销草稿。"
+            f"{attachment_hint}"
+        )
+
+    def _build_implicit_expense_draft_guidance(
+        self,
+        payload: UserAgentRequest,
+    ) -> str | None:
+        if not self._is_implicit_expense_draft_request(payload):
+            return None
+
+        amount_text = next(
+            (item.value for item in payload.ontology.entities if item.type == "amount"),
+            "",
+        )
+        expense_type = next(
+            (
+                EXPENSE_TYPE_LABELS.get(item.normalized_value, item.value)
+                for item in payload.ontology.entities
+                if item.type == "expense_type"
+            ),
+            "报销",
+        )
+        time_text = payload.ontology.time_range.raw or "本次"
+        amount_hint = f"，金额 {amount_text}" if amount_text else ""
+
+        return (
+            f"已识别到一笔{time_text}的{expense_type}支出{amount_hint}。"
+            "如果要继续生成报销草稿，还需要补充客户单位、参与人员、费用明细和票据附件。"
+            "你也可以继续上传发票或图片，我会把这些信息带入后续对话。"
+        )
+
+    def _generate_answer_with_model(
+        self,
+        payload: UserAgentRequest,
+        *,
+        citations: list[UserAgentCitation],
+        suggested_actions: list[UserAgentSuggestedAction],
+        risk_flags: list[str],
+        draft_payload: UserAgentDraftPayload | None,
+        fallback_answer: str,
+    ) -> str | None:
+        messages = self._build_model_messages(
+            payload,
+            citations=citations,
+            suggested_actions=suggested_actions,
+            risk_flags=risk_flags,
+            draft_payload=draft_payload,
+            fallback_answer=fallback_answer,
+        )
+        return self._sanitize_model_answer(
+            self.runtime_chat_service.complete(
+                messages,
+                max_tokens=420,
+                temperature=0.2,
+            )
+        )
+
+    def _sanitize_model_answer(self, answer: str | None) -> str | None:
+        if not answer:
+            return None
+
+        cleaned = re.sub(r"<think>.*?</think>", "", answer, flags=re.DOTALL | re.IGNORECASE)
+        cleaned = cleaned.strip()
+        return cleaned or None
+
+    def _build_model_messages(
+        self,
+        payload: UserAgentRequest,
+        *,
+        citations: list[UserAgentCitation],
+        suggested_actions: list[UserAgentSuggestedAction],
+        risk_flags: list[str],
+        draft_payload: UserAgentDraftPayload | None,
+        fallback_answer: str,
+    ) -> list[dict[str, str]]:
+        facts = {
+            "run_id": payload.run_id,
+            "user_message": payload.message,
+            "ontology": payload.ontology.model_dump(mode="json"),
+            "context": {
+                "entry_source": payload.context_json.get("entry_source"),
+                "user_name": payload.context_json.get("name"),
+                "user_role": payload.context_json.get("role"),
+                "request_context": payload.context_json.get("request_context"),
+                "attachment_count": payload.context_json.get("attachment_count"),
+                "attachment_names": self._resolve_attachment_names(payload),
+            },
+            "tool_payload": payload.tool_payload,
+            "citations": [item.model_dump(mode="json") for item in citations],
+            "suggested_actions": [
+                item.model_dump(mode="json") for item in suggested_actions
+            ],
+            "risk_flags": risk_flags,
+            "draft_payload": (
+                draft_payload.model_dump(mode="json")
+                if draft_payload is not None
+                else None
+            ),
+            "selected_capability_codes": payload.selected_capability_codes,
+            "requires_confirmation": payload.requires_confirmation,
+            "fallback_answer": fallback_answer,
+        }
+
+        system_prompt = (
+            "你是企业财务共享场景中的中文智能助手，负责和最终用户直接对话。"
+            "你只能基于提供的事实回答，不能编造制度、流程结果或附件内容。"
+            "如果用户问题很笼统，例如“我要报销”，优先告诉用户你可以协助什么，"
+            "并明确要求补充费用类型、金额、时间、事由、参与对象或上传票据。"
+            "如果上下文里只有附件名称，必须明确说明你只拿到了附件名称，"
+            "不能假装已看过图片、PDF 或发票内容。"
+            "不要声称已经提交、审批、付款、入账或真正执行了任何动作；如果只是建议、草稿或待确认，要明确说清楚。"
+            "若给出了风险标签、制度引用或建议动作，可以简洁吸收进回答，但不要新增未提供的事实。"
+            "只输出最终给用户看的自然语言，不要输出 JSON、Markdown、标题、"
+            "<think> 标签或任何中间推理。"
+            "使用简体中文，控制在 2 到 4 句。"
+        )
+        user_prompt = (
+            "请根据以下事实生成最终答复，优先保持准确、具体、可执行：\n"
+            f"{json.dumps(facts, ensure_ascii=False, indent=2)}"
+        )
+        return [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_prompt},
+        ]
+
+    def _build_query_answer(self, payload: UserAgentRequest) -> str:
+        scenario = payload.ontology.scenario
+        data = payload.tool_payload
+        subject = self._resolve_subject(payload)
+
+        if scenario == "expense":
+            record_count = int(data.get("record_count") or 0)
+            total_amount = float(data.get("total_amount") or 0)
+            return (
+                f"{subject}共命中 {record_count} 笔报销，金额合计 {total_amount:.2f} 元。"
+                "如需继续处理，可以查看明细或生成处理意见草稿。"
+            )
+
+        if scenario == "accounts_receivable":
+            record_count = int(data.get("record_count") or 0)
+            outstanding_amount = float(data.get("outstanding_amount") or 0)
+            return (
+                f"{subject}共命中 {record_count} 条应收，未回款金额 {outstanding_amount:.2f} 元。"
+                "建议结合账龄和客户分布继续排查逾期风险。"
+            )
+
+        if scenario == "accounts_payable":
+            record_count = int(data.get("record_count") or 0)
+            outstanding_amount = float(data.get("outstanding_amount") or 0)
+            return (
+                f"{subject}共命中 {record_count} 条应付，待付金额 {outstanding_amount:.2f} 元。"
+                "如需推进动作，建议先生成付款建议草稿并发起人工确认。"
+            )
+
+        return "已完成当前查询，但暂时没有更多结构化结果可展示。"
+
+    def _build_explain_answer(
+        self,
+        payload: UserAgentRequest,
+        citations: list[UserAgentCitation],
+    ) -> str:
+        if citations:
+            titles = "、".join(item.title for item in citations[:2])
+            summary = citations[0].excerpt or "请结合制度全文进一步确认。"
+            return f"已检索到相关依据：{titles}。核心说明：{summary}"
+
+        return (
+            f"当前还没有与“{SCENARIO_LABELS.get(payload.ontology.scenario, '当前问题')}”"
+            "强匹配的已上线规则引用，建议先人工复核或补充更具体的单据上下文。"
+        )
+
+    def _build_risk_answer(
+        self,
+        payload: UserAgentRequest,
+        citations: list[UserAgentCitation],
+    ) -> str:
+        risk_flags = self._resolve_risk_flags(payload)
+        if not risk_flags:
+            return "当前未识别到明确风险标签，建议继续查看原始明细或补充更多上下文。"
+
+        reasons = [RISK_REASON_MAP.get(flag, f"{flag} 需要人工进一步确认。") for flag in risk_flags]
+        citation_text = (
+            f" 参考规则：{'、'.join(item.title for item in citations[:2])}。"
+            if citations
+            else ""
+        )
+        return (
+            f"本次识别到 {len(risk_flags)} 类风险：{'、'.join(risk_flags)}。"
+            f"触发原因：{'；'.join(reasons)}。"
+            "建议先复核明细、附件和审批链，再决定是否继续处理。"
+            f"{citation_text}"
+        )
+
+    def _build_draft_payload(self, payload: UserAgentRequest) -> UserAgentDraftPayload:
+        scenario_label = SCENARIO_LABELS.get(payload.ontology.scenario, "业务")
+        subject = self._resolve_subject(payload)
+        title = f"{scenario_label}处理意见草稿"
+        body = (
+            f"主题：{subject}\n"
+            "结论：已根据当前语义解析结果生成草稿，尚未自动执行。\n"
+            "建议：请先核对明细、规则命中和所需附件，再由人工确认是否提交正式流程。\n"
+            f"原始问题：{payload.message}"
+        )
+        return UserAgentDraftPayload(
+            draft_type=payload.ontology.scenario,
+            title=title,
+            body=body,
+            confirmation_required=True,
+        )
+
+    def _build_suggested_actions(
+        self,
+        payload: UserAgentRequest,
+    ) -> list[UserAgentSuggestedAction]:
+        if self._is_generic_expense_prompt(payload):
+            return [
+                UserAgentSuggestedAction(
+                    label="上传票据",
+                    action_type="ask_clarification",
+                    description="上传发票、行程单或付款截图，继续识别报销内容。",
+                ),
+                UserAgentSuggestedAction(
+                    label="补充报销信息",
+                    action_type="ask_clarification",
+                    description="补充费用类型、金额、时间和事由后继续处理。",
+                ),
+            ]
+
+        if payload.ontology.intent in {"query", "compare"}:
+            return [
+                UserAgentSuggestedAction(
+                    label="查看明细",
+                    action_type="open_detail",
+                    description="继续查看命中记录和过滤条件。",
+                ),
+                UserAgentSuggestedAction(
+                    label="生成处理意见",
+                    action_type="create_draft",
+                    description="把当前查询结果整理成可确认草稿。",
+                ),
+            ]
+
+        if payload.ontology.intent == "risk_check":
+            return [
+                UserAgentSuggestedAction(
+                    label="人工复核风险",
+                    action_type="manual_review",
+                    description="优先检查明细、附件和规则命中原因。",
+                ),
+                UserAgentSuggestedAction(
+                    label="生成整改建议",
+                    action_type="create_draft",
+                    description="把风险说明整理成处理意见草稿。",
+                ),
+            ]
+
+        if payload.ontology.intent == "draft":
+            return [
+                UserAgentSuggestedAction(
+                    label="复制草稿",
+                    action_type="copy_draft",
+                    description="复制当前草稿后交由人工确认。",
+                ),
+                UserAgentSuggestedAction(
+                    label="补充上下文",
+                    action_type="ask_clarification",
+                    description="补充单据编号、客户或供应商信息以完善草稿。",
+                ),
+            ]
+
+        return [
+            UserAgentSuggestedAction(
+                label="查看规则全文",
+                action_type="open_rule",
+                description="继续查看引用规则或知识内容。",
+            ),
+            UserAgentSuggestedAction(
+                label="补充问题上下文",
+                action_type="ask_clarification",
+                description="补充业务对象、时间或单据范围，提升回答准确度。",
+            ),
+        ]
+
+    def _build_rule_citations(self, payload: UserAgentRequest) -> list[UserAgentCitation]:
+        domain = self._resolve_domain(payload.ontology.scenario)
+        items = self.asset_service.list_assets(
+            asset_type=AgentAssetType.RULE.value,
+            status=AgentAssetStatus.ACTIVE.value,
+            domain=domain,
+        )
+        ranked = self._rank_rule_assets(items, payload)
+        citations: list[UserAgentCitation] = []
+        for item in ranked[:2]:
+            detail = self.asset_service.get_asset(item.id)
+            if detail is None:
+                continue
+            excerpt = self._extract_excerpt(str(detail.current_version_content or ""))
+            citations.append(
+                UserAgentCitation(
+                    source_type="rule",
+                    code=detail.code,
+                    title=detail.name,
+                    version=detail.current_version,
+                    updated_at=detail.updated_at.date().isoformat(),
+                    excerpt=excerpt,
+                )
+            )
+        return citations
+
+    @staticmethod
+    def _resolve_risk_flags(payload: UserAgentRequest) -> list[str]:
+        tool_flags = payload.tool_payload.get("risk_flags")
+        if isinstance(tool_flags, list) and tool_flags:
+            return [str(item) for item in tool_flags]
+        return [str(item) for item in payload.ontology.risk_flags]
+
+    @staticmethod
+    def _resolve_subject(payload: UserAgentRequest) -> str:
+        named_entities = [
+            item.value
+            for item in payload.ontology.entities
+            if item.type in {"employee", "customer", "vendor", "project"}
+        ]
+        if named_entities:
+            return f"{'、'.join(named_entities)} 相关数据"
+        return f"{SCENARIO_LABELS.get(payload.ontology.scenario, '当前')}场景数据"
+
+    @staticmethod
+    def _is_generic_expense_prompt(payload: UserAgentRequest) -> bool:
+        if payload.ontology.scenario != "expense":
+            return False
+        normalized_message = re.sub(r"\s+", "", payload.message)
+        return normalized_message in GENERIC_EXPENSE_PROMPTS
+
+    @staticmethod
+    def _is_implicit_expense_draft_request(payload: UserAgentRequest) -> bool:
+        if payload.ontology.scenario != "expense" or payload.ontology.intent != "draft":
+            return False
+
+        compact_message = re.sub(r"\s+", "", payload.message)
+        if any(keyword in compact_message for keyword in EXPLICIT_DRAFT_KEYWORDS):
+            return False
+
+        return True
+
+    @staticmethod
+    def _resolve_attachment_names(payload: UserAgentRequest) -> list[str]:
+        names = payload.context_json.get("attachment_names")
+        if not isinstance(names, list):
+            return []
+        return [str(name) for name in names if str(name).strip()]
+
+    @staticmethod
+    def _resolve_domain(scenario: str) -> str | None:
+        if scenario == "expense":
+            return "expense"
+        if scenario == "accounts_receivable":
+            return "ar"
+        if scenario == "accounts_payable":
+            return "ap"
+        return None
+
+    @staticmethod
+    def _rank_rule_assets(
+        items: list[AgentAssetListItem],
+        payload: UserAgentRequest,
+    ) -> list[AgentAssetListItem]:
+        def score(item: AgentAssetListItem) -> tuple[int, str]:
+            tags = {str(value) for value in item.scenario_json or []}
+            weight = 0
+            if payload.ontology.scenario in tags:
+                weight += 3
+            if payload.ontology.intent in tags:
+                weight += 2
+            for risk_flag in payload.ontology.risk_flags:
+                if risk_flag in tags:
+                    weight += 4
+            return weight, item.code
+
+        ranked = sorted(items, key=score, reverse=True)
+        return [item for item in ranked if score(item)[0] > 0]
+
+    @staticmethod
+    def _extract_excerpt(content: str) -> str:
+        lines = [line.strip() for line in str(content).splitlines() if line.strip()]
+        cleaned: list[str] = []
+        for line in lines:
+            normalized = re.sub(r"^[#>\-\*\d\.\s`]+", "", line).strip()
+            if normalized:
+                cleaned.append(normalized)
+            if len(cleaned) >= 2:
+                break
+        return "；".join(cleaned[:2])
--- a/server/tests/test_ontology_service.py
+++ b/server/tests/test_ontology_service.py
@@ -0,0 +1,397 @@
+from __future__ import annotations
+
+from collections.abc import Generator
+
+import pytest
+from fastapi.testclient import TestClient
+from sqlalchemy import create_engine
+from sqlalchemy.orm import Session, sessionmaker
+from sqlalchemy.pool import StaticPool
+
+from app.api.deps import get_db
+from app.db.base import Base
+from app.main import create_app
+from app.schemas.ontology import OntologyParseRequest
+from app.services.ontology import LlmOntologyParseResult, SemanticOntologyService
+
+
+def build_session_factory() -> sessionmaker[Session]:
+    engine = create_engine(
+        "sqlite+pysqlite:///:memory:",
+        connect_args={"check_same_thread": False},
+        poolclass=StaticPool,
+    )
+    Base.metadata.create_all(bind=engine)
+    return sessionmaker(bind=engine, autoflush=False, autocommit=False)
+
+
+def build_client() -> tuple[TestClient, sessionmaker[Session]]:
+    session_factory = build_session_factory()
+    app = create_app()
+
+    def override_db() -> Generator[Session, None, None]:
+        db = session_factory()
+        try:
+            yield db
+        finally:
+            db.close()
+
+    app.dependency_overrides[get_db] = override_db
+    return TestClient(app), session_factory
+
+
+EVALUATION_CASES = [
+    pytest.param(
+        "查一下本周报销超标风险",
+        "expense",
+        "risk_check",
+        "read",
+        {},
+        id="expense-risk-check",
+    ),
+    pytest.param(
+        "张三 4 月差旅报销金额是多少",
+        "expense",
+        "query",
+        "read",
+        {},
+        id="expense-query-employee-month",
+    ),
+    pytest.param(
+        "为什么酒店超标报销不能直接通过",
+        "expense",
+        "explain",
+        "read",
+        {},
+        id="expense-explain-policy",
+    ),
+    pytest.param(
+        "列出金额最高的10笔报销",
+        "expense",
+        "query",
+        "read",
+        {},
+        id="expense-topn-query",
+    ),
+    pytest.param(
+        "帮我生成张三4月差旅报销草稿",
+        "expense",
+        "draft",
+        "draft_write",
+        {},
+        id="expense-draft",
+    ),
+    pytest.param(
+        "我今天去客户现场，招待了客户，花销了1000元",
+        "expense",
+        "draft",
+        "draft_write",
+        {},
+        id="expense-narrative-draft",
+    ),
+    pytest.param(
+        "客户 A 这个月还有多少应收",
+        "accounts_receivable",
+        "query",
+        "read",
+        {},
+        id="ar-query-customer-month",
+    ),
+    pytest.param(
+        "对比客户A和客户B本月应收差异",
+        "accounts_receivable",
+        "compare",
+        "read",
+        {},
+        id="ar-compare-customers",
+    ),
+    pytest.param(
+        "检查客户B逾期应收风险",
+        "accounts_receivable",
+        "risk_check",
+        "read",
+        {},
+        id="ar-risk-check",
+    ),
+    pytest.param(
+        "生成客户A回款跟进草稿",
+        "accounts_receivable",
+        "draft",
+        "draft_write",
+        {},
+        id="ar-draft",
+    ),
+    pytest.param(
+        "查询客户B账龄明细",
+        "accounts_receivable",
+        "query",
+        "read",
+        {},
+        id="ar-aging-query",
+    ),
+    pytest.param(
+        "供应商 B 明天要付多少钱",
+        "accounts_payable",
+        "query",
+        "read",
+        {},
+        id="ap-query-vendor-tomorrow",
+    ),
+    pytest.param(
+        "对比供应商A和供应商B本月应付差异",
+        "accounts_payable",
+        "compare",
+        "read",
+        {},
+        id="ap-compare-vendors",
+    ),
+    pytest.param(
+        "检查供应商B逾期付款风险",
+        "accounts_payable",
+        "risk_check",
+        "read",
+        {},
+        id="ap-risk-check",
+    ),
+    pytest.param(
+        "生成供应商A付款沟通草稿",
+        "accounts_payable",
+        "draft",
+        "draft_write",
+        {},
+        id="ap-draft",
+    ),
+    pytest.param(
+        "帮我安排付款给供应商B",
+        "accounts_payable",
+        "operate",
+        "approval_required",
+        {"role_codes": ["finance"]},
+        id="ap-operate-approval-required",
+    ),
+    pytest.param(
+        "公司财务制度在哪里看",
+        "knowledge",
+        "query",
+        "read",
+        {},
+        id="knowledge-query",
+    ),
+    pytest.param(
+        "规则中心的审核依据是什么",
+        "knowledge",
+        "explain",
+        "read",
+        {},
+        id="knowledge-explain",
+    ),
+    pytest.param(
+        "知识库里有没有双人复核制度",
+        "knowledge",
+        "query",
+        "read",
+        {},
+        id="knowledge-query-library",
+    ),
+    pytest.param(
+        "帮我直接付款给供应商B",
+        "accounts_payable",
+        "operate",
+        "forbidden",
+        {"role_codes": ["user"]},
+        id="forbidden-direct-payment",
+    ),
+    pytest.param(
+        "帮我上线付款双人复核规则",
+        "accounts_payable",
+        "operate",
+        "forbidden",
+        {"role_codes": ["user"]},
+        id="forbidden-activate-rule",
+    ),
+    pytest.param(
+        "帮我删除今天的报销记录",
+        "expense",
+        "operate",
+        "forbidden",
+        {"role_codes": ["user"]},
+        id="forbidden-delete-expense",
+    ),
+]
+
+
+@pytest.mark.parametrize("query,scenario,intent,permission,context_json", EVALUATION_CASES)
+def test_semantic_ontology_service_matches_day3_evaluation_set(
+    query: str,
+    scenario: str,
+    intent: str,
+    permission: str,
+    context_json: dict,
+) -> None:
+    session_factory = build_session_factory()
+    with session_factory() as db:
+        result = SemanticOntologyService(db).parse(
+            OntologyParseRequest(
+                query=query,
+                user_id="pytest",
+                context_json=context_json,
+            )
+        )
+
+        assert result.scenario == scenario
+        assert result.intent == intent
+        assert result.permission.level == permission
+        assert result.run_id.startswith("run_")
+
+
+def test_semantic_ontology_service_extracts_entities_time_and_constraints() -> None:
+    session_factory = build_session_factory()
+    with session_factory() as db:
+        result = SemanticOntologyService(db).parse(
+            OntologyParseRequest(
+                query="张三 2026年4月差旅报销金额超过5000元的明细",
+                user_id="pytest",
+            )
+        )
+
+        assert result.scenario == "expense"
+        assert result.intent == "query"
+        assert result.time_range.start_date == "2026-04-01"
+        assert result.time_range.end_date == "2026-04-30"
+        assert any(
+            item.type == "employee" and item.normalized_value == "张三"
+            for item in result.entities
+        )
+        assert any(
+            item.type == "expense_type" and item.normalized_value == "travel"
+            for item in result.entities
+        )
+        assert any(
+            item.field == "amount" and item.operator == ">" and item.value == 5000
+            for item in result.constraints
+        )
+
+
+def test_semantic_ontology_service_prefers_expense_for_customer_entertainment_narrative() -> None:
+    session_factory = build_session_factory()
+    with session_factory() as db:
+        result = SemanticOntologyService(db).parse(
+            OntologyParseRequest(
+                query="我今天去客户现场，招待了客户，花销了1000元",
+                user_id="pytest",
+            )
+        )
+
+        assert result.scenario == "expense"
+        assert result.intent == "draft"
+        assert result.permission.level == "draft_write"
+        assert result.time_range.raw == "今天"
+        assert result.clarification_required is True
+        assert "customer_name" in result.missing_slots
+        assert "participants" in result.missing_slots
+        assert any(
+            item.type == "expense_type" and item.normalized_value == "entertainment"
+            for item in result.entities
+        )
+
+
+def test_semantic_ontology_service_uses_model_parse_when_available(monkeypatch) -> None:
+    session_factory = build_session_factory()
+    with session_factory() as db:
+        service = SemanticOntologyService(db)
+        monkeypatch.setattr(
+            service,
+            "_parse_with_model",
+            lambda **kwargs: LlmOntologyParseResult(
+                scenario="expense",
+                intent="draft",
+                confidence=0.91,
+                clarification_required=True,
+                clarification_question="请补充费用类型、金额和票据附件。",
+                missing_slots=["expense_type", "amount", "attachments"],
+                ambiguity=[],
+                entity_hints=[],
+            ),
+        )
+
+        result = service.parse(
+            OntologyParseRequest(
+                query="我要报销",
+                user_id="pytest",
+            )
+        )
+
+        assert result.scenario == "expense"
+        assert result.intent == "draft"
+        assert result.parse_strategy == "llm_primary"
+        assert result.clarification_required is True
+        assert "expense_type" in result.missing_slots
+        assert result.clarification_question == "请补充费用类型、金额和票据附件。"
+
+
+def test_parse_ontology_endpoint_returns_eight_fields_and_writes_trace() -> None:
+    client, _ = build_client()
+
+    response = client.post(
+        "/api/v1/ontology/parse",
+        json={
+            "query": "查一下本周报销超标风险",
+            "user_id": "pytest",
+            "context_json": {"role_codes": ["finance"]},
+        },
+    )
+
+    assert response.status_code == 200
+    payload = response.json()
+    assert payload["scenario"] == "expense"
+    assert payload["intent"] == "risk_check"
+    assert payload["permission"]["level"] == "read"
+    assert payload["run_id"].startswith("run_")
+    assert set(payload) >= {
+        "scenario",
+        "intent",
+        "entities",
+        "time_range",
+        "metrics",
+        "constraints",
+        "risk_flags",
+        "permission",
+        "confidence",
+        "missing_slots",
+        "ambiguity",
+        "parse_strategy",
+        "clarification_required",
+        "clarification_question",
+        "run_id",
+        "field_errors",
+    }
+
+    run_response = client.get(f"/api/v1/agent-runs/{payload['run_id']}")
+
+    assert run_response.status_code == 200
+    run_payload = run_response.json()
+    assert run_payload["ontology_json"]["scenario"] == "expense"
+    assert run_payload["ontology_json"]["intent"] == "risk_check"
+    assert run_payload["semantic_parse"]["scenario"] == "expense"
+    assert run_payload["semantic_parse"]["intent"] == "risk_check"
+
+
+def test_parse_ontology_endpoint_returns_forbidden_for_unprivileged_payment_request() -> None:
+    client, _ = build_client()
+
+    response = client.post(
+        "/api/v1/ontology/parse",
+        json={
+            "query": "帮我直接付款给供应商B",
+            "user_id": "pytest",
+            "context_json": {"role_codes": ["user"]},
+        },
+    )
+
+    assert response.status_code == 200
+    payload = response.json()
+    assert payload["scenario"] == "accounts_payable"
+    assert payload["intent"] == "operate"
+    assert payload["permission"]["level"] == "forbidden"
+    assert payload["clarification_required"] is True
+    assert payload["field_errors"]
--- a/server/tests/test_orchestrator_service.py
+++ b/server/tests/test_orchestrator_service.py
@@ -0,0 +1,241 @@
+from __future__ import annotations
+
+from collections.abc import Generator
+
+from fastapi.testclient import TestClient
+from sqlalchemy import create_engine
+from sqlalchemy.orm import Session, sessionmaker
+from sqlalchemy.pool import StaticPool
+
+from app.api.deps import get_db
+from app.db.base import Base
+from app.main import create_app
+from app.services.agent_assets import AgentAssetService
+
+
+def build_client() -> tuple[TestClient, sessionmaker[Session]]:
+    engine = create_engine(
+        "sqlite+pysqlite:///:memory:",
+        connect_args={"check_same_thread": False},
+        poolclass=StaticPool,
+    )
+    Base.metadata.create_all(bind=engine)
+    session_factory = sessionmaker(bind=engine, autoflush=False, autocommit=False)
+    app = create_app()
+
+    def override_db() -> Generator[Session, None, None]:
+        db = session_factory()
+        try:
+            yield db
+        finally:
+            db.close()
+
+    app.dependency_overrides[get_db] = override_db
+    return TestClient(app), session_factory
+
+
+def test_orchestrator_routes_user_query_to_user_agent() -> None:
+    client, _ = build_client()
+
+    response = client.post(
+        "/api/v1/orchestrator/run",
+        json={
+            "source": "user_message",
+            "user_id": "pytest",
+            "message": "客户A这个月还有多少应收",
+            "context_json": {"role_codes": ["finance"]},
+        },
+    )
+
+    assert response.status_code == 200
+    payload = response.json()
+    assert payload["selected_agent"] == "user_agent"
+    assert payload["permission_level"] == "read"
+    assert payload["status"] == "succeeded"
+    assert payload["result"]["answer"]
+    assert payload["result"]["suggested_actions"]
+    assert payload["trace_summary"]["tool_count"] >= 1
+
+    run_detail = client.get(f"/api/v1/agent-runs/{payload['run_id']}").json()
+    assert run_detail["agent"] == "user_agent"
+    assert run_detail["route_json"]["selected_agent"] == "user_agent"
+    assert run_detail["semantic_parse"]["scenario"] == "accounts_receivable"
+    assert run_detail["tool_calls"][0]["tool_type"] == "database"
+
+
+def test_orchestrator_routes_schedule_to_hermes() -> None:
+    client, session_factory = build_client()
+
+    with session_factory() as db:
+        task = next(
+            item
+            for item in AgentAssetService(db).list_assets(asset_type="task", status="active")
+            if item.code == "task.hermes.daily_risk_scan"
+        )
+
+    response = client.post(
+        "/api/v1/orchestrator/run",
+        json={
+            "source": "schedule",
+            "task_id": task.id,
+            "context_json": {"role_codes": ["finance"]},
+        },
+    )
+
+    assert response.status_code == 200
+    payload = response.json()
+    assert payload["selected_agent"] == "hermes"
+    assert payload["status"] == "succeeded"
+    assert payload["trace_summary"]["tool_count"] == 2
+
+    run_detail = client.get(f"/api/v1/agent-runs/{payload['run_id']}").json()
+    assert run_detail["agent"] == "hermes"
+    assert run_detail["route_json"]["selected_agent"] == "hermes"
+    assert len(run_detail["tool_calls"]) == 2
+
+
+def test_orchestrator_forbidden_request_does_not_call_downstream_agent() -> None:
+    client, _ = build_client()
+
+    response = client.post(
+        "/api/v1/orchestrator/run",
+        json={
+            "source": "user_message",
+            "user_id": "pytest",
+            "message": "帮我直接付款给供应商B",
+            "context_json": {"role_codes": ["user"]},
+        },
+    )
+
+    assert response.status_code == 200
+    payload = response.json()
+    assert payload["selected_agent"] is None
+    assert payload["permission_level"] == "forbidden"
+    assert payload["status"] == "blocked"
+    assert payload["trace_summary"]["tool_count"] == 0
+
+    run_detail = client.get(f"/api/v1/agent-runs/{payload['run_id']}").json()
+    assert run_detail["agent"] == "orchestrator"
+    assert run_detail["tool_calls"] == []
+
+
+def test_orchestrator_approval_required_returns_confirmation_result() -> None:
+    client, _ = build_client()
+
+    response = client.post(
+        "/api/v1/orchestrator/run",
+        json={
+            "source": "user_message",
+            "user_id": "pytest",
+            "message": "帮我安排付款给供应商B",
+            "context_json": {"role_codes": ["finance"]},
+        },
+    )
+
+    assert response.status_code == 200
+    payload = response.json()
+    assert payload["selected_agent"] == "user_agent"
+    assert payload["permission_level"] == "approval_required"
+    assert payload["requires_confirmation"] is True
+    assert payload["status"] == "blocked"
+    assert "确认" in payload["result"]["message"]
+
+
+def test_orchestrator_user_agent_draft_returns_structured_payload() -> None:
+    client, _ = build_client()
+
+    response = client.post(
+        "/api/v1/orchestrator/run",
+        json={
+            "source": "user_message",
+            "user_id": "pytest",
+            "message": "帮我生成张三4月差旅报销草稿",
+            "context_json": {"role_codes": ["finance"]},
+        },
+    )
+
+    assert response.status_code == 200
+    payload = response.json()
+    assert payload["selected_agent"] == "user_agent"
+    assert payload["status"] == "succeeded"
+    assert payload["result"]["draft_payload"]["confirmation_required"] is True
+    assert payload["result"]["suggested_actions"]
+
+
+def test_orchestrator_treats_expense_narrative_as_draft_instead_of_ar_query() -> None:
+    client, _ = build_client()
+
+    response = client.post(
+        "/api/v1/orchestrator/run",
+        json={
+            "source": "user_message",
+            "user_id": "pytest",
+            "message": "我今天去客户现场，招待了客户，花销了1000元",
+            "context_json": {"role_codes": ["finance"]},
+        },
+    )
+
+    assert response.status_code == 200
+    payload = response.json()
+    assert payload["selected_agent"] == "user_agent"
+    assert payload["permission_level"] == "draft_write"
+    assert payload["status"] == "blocked"
+    assert payload["route_reason"] == "clarification_required"
+    assert payload["trace_summary"]["scenario"] == "expense"
+    assert payload["trace_summary"]["intent"] == "draft"
+    assert payload["trace_summary"]["tool_count"] == 0
+    assert "应收场景数据" not in payload["result"]["message"]
+    assert "请补充" in payload["result"]["message"]
+
+
+def test_orchestrator_tool_failure_is_logged_and_degraded() -> None:
+    client, _ = build_client()
+
+    response = client.post(
+        "/api/v1/orchestrator/run",
+        json={
+            "source": "user_message",
+            "user_id": "pytest",
+            "message": "查一下本周报销金额",
+            "context_json": {
+                "role_codes": ["finance"],
+                "simulate_tool_failure": "database",
+            },
+        },
+    )
+
+    assert response.status_code == 200
+    payload = response.json()
+    assert payload["selected_agent"] == "user_agent"
+    assert payload["status"] == "succeeded"
+    assert payload["trace_summary"]["failed_tool_count"] == 1
+    assert payload["trace_summary"]["degraded"] is True
+
+    run_detail = client.get(f"/api/v1/agent-runs/{payload['run_id']}").json()
+    assert run_detail["tool_calls"][0]["status"] == "failed"
+    assert "simulated database failure" in run_detail["tool_calls"][0]["error_message"]
+
+
+def test_orchestrator_exception_is_written_to_agent_run() -> None:
+    client, _ = build_client()
+
+    response = client.post(
+        "/api/v1/orchestrator/run",
+        json={
+            "source": "user_message",
+            "user_id": "pytest",
+            "message": "查一下本周报销金额",
+            "context_json": {
+                "role_codes": ["finance"],
+                "simulate_orchestrator_exception": True,
+            },
+        },
+    )
+
+    assert response.status_code == 200
+    payload = response.json()
+    assert payload["status"] == "failed"
+
+    run_detail = client.get(f"/api/v1/agent-runs/{payload['run_id']}").json()
+    assert run_detail["status"] == "failed"
+    assert "simulated orchestrator exception" in run_detail["error_message"]
--- a/server/tests/test_user_agent_service.py
+++ b/server/tests/test_user_agent_service.py
@@ -0,0 +1,179 @@
+from __future__ import annotations
+
+from sqlalchemy import create_engine
+from sqlalchemy.orm import Session, sessionmaker
+from sqlalchemy.pool import StaticPool
+
+from app.db.base import Base
+from app.schemas.ontology import OntologyParseRequest
+from app.schemas.user_agent import UserAgentRequest
+from app.services.ontology import SemanticOntologyService
+from app.services.user_agent import UserAgentService
+
+
+def build_session_factory() -> sessionmaker[Session]:
+    engine = create_engine(
+        "sqlite+pysqlite:///:memory:",
+        connect_args={"check_same_thread": False},
+        poolclass=StaticPool,
+    )
+    Base.metadata.create_all(bind=engine)
+    return sessionmaker(bind=engine, autoflush=False, autocommit=False)
+
+
+def test_user_agent_query_returns_readable_answer_and_actions() -> None:
+    session_factory = build_session_factory()
+    with session_factory() as db:
+        ontology = SemanticOntologyService(db).parse(
+            OntologyParseRequest(
+                query="张三 4 月差旅报销金额是多少",
+                user_id="pytest",
+            )
+        )
+        response = UserAgentService(db).respond(
+            UserAgentRequest(
+                run_id=ontology.run_id,
+                user_id="pytest",
+                message="张三 4 月差旅报销金额是多少",
+                ontology=ontology,
+                tool_payload={"record_count": 2, "total_amount": 8800.0},
+            )
+        )
+
+        assert "8800.00" in response.answer
+        assert len(response.suggested_actions) >= 1
+
+
+def test_user_agent_prefers_runtime_model_answer_when_available(monkeypatch) -> None:
+    session_factory = build_session_factory()
+    with session_factory() as db:
+        ontology = SemanticOntologyService(db).parse(
+            OntologyParseRequest(
+                query="张三 4 月差旅报销金额是多少",
+                user_id="pytest",
+            )
+        )
+        service = UserAgentService(db)
+        monkeypatch.setattr(
+            service,
+            "_generate_answer_with_model",
+            lambda *args, **kwargs: "这是模型回答",
+        )
+
+        response = service.respond(
+            UserAgentRequest(
+                run_id=ontology.run_id,
+                user_id="pytest",
+                message="张三 4 月差旅报销金额是多少",
+                ontology=ontology,
+                tool_payload={"record_count": 2, "total_amount": 8800.0},
+            )
+        )
+
+        assert response.answer == "这是模型回答"
+
+
+def test_user_agent_sanitizes_model_thinking_blocks() -> None:
+    session_factory = build_session_factory()
+    with session_factory() as db:
+        service = UserAgentService(db)
+
+        assert (
+            service._sanitize_model_answer("<think>内部推理</think>\n最终答复")
+            == "最终答复"
+        )
+
+
+def test_user_agent_guides_generic_expense_request() -> None:
+    session_factory = build_session_factory()
+    with session_factory() as db:
+        ontology = SemanticOntologyService(db).parse(
+            OntologyParseRequest(
+                query="我要报销",
+                user_id="pytest",
+            )
+        )
+        response = UserAgentService(db).respond(
+            UserAgentRequest(
+                run_id=ontology.run_id,
+                user_id="pytest",
+                message="我要报销",
+                ontology=ontology,
+                tool_payload={"record_count": 9, "total_amount": 12345.0},
+            )
+        )
+
+        assert "补充费用类型" in response.answer
+        assert "上传票据" in response.answer
+
+
+def test_user_agent_guides_implicit_expense_draft_request() -> None:
+    session_factory = build_session_factory()
+    with session_factory() as db:
+        ontology = SemanticOntologyService(db).parse(
+            OntologyParseRequest(
+                query="我今天去客户现场，招待了客户，花销了1000元",
+                user_id="pytest",
+            )
+        )
+        response = UserAgentService(db).respond(
+            UserAgentRequest(
+                run_id=ontology.run_id,
+                user_id="pytest",
+                message="我今天去客户现场，招待了客户，花销了1000元",
+                ontology=ontology,
+                tool_payload={"draft_only": True},
+            )
+        )
+
+        assert "1000元" in response.answer
+        assert "票据附件" in response.answer
+        assert "报销草稿" in response.answer
+
+
+def test_user_agent_risk_response_includes_rule_citations() -> None:
+    session_factory = build_session_factory()
+    with session_factory() as db:
+        ontology = SemanticOntologyService(db).parse(
+            OntologyParseRequest(
+                query="检查重复报销风险",
+                user_id="pytest",
+            )
+        )
+        response = UserAgentService(db).respond(
+            UserAgentRequest(
+                run_id=ontology.run_id,
+                user_id="pytest",
+                message="检查重复报销风险",
+                ontology=ontology,
+                tool_payload={"risk_flags": ["duplicate_expense"]},
+            )
+        )
+
+        assert response.risk_flags == ["duplicate_expense"]
+        assert any(item.source_type == "rule" for item in response.citations)
+        assert "duplicate_expense" in response.answer
+
+
+def test_user_agent_draft_returns_structured_payload() -> None:
+    session_factory = build_session_factory()
+    with session_factory() as db:
+        ontology = SemanticOntologyService(db).parse(
+            OntologyParseRequest(
+                query="帮我生成张三4月差旅报销草稿",
+                user_id="pytest",
+            )
+        )
+        response = UserAgentService(db).respond(
+            UserAgentRequest(
+                run_id=ontology.run_id,
+                user_id="pytest",
+                message="帮我生成张三4月差旅报销草稿",
+                ontology=ontology,
+                tool_payload={"draft_only": True},
+            )
+        )
+
+        assert response.draft_payload is not None
+        assert response.draft_payload.confirmation_required is True
+        assert "待人工确认" in response.answer