- 重构报销状态注册表、审批流路由与平台风险标记 - 完善管家意图规划器与模型计划构建器全链路 - 新增 OCR Worker 脚本、数据库会话管理与通知状态 - 优化文档中心、日志视图、预算中心与员工管理交互 - 增强工作台摘要、图标资源与全局主题样式 - 补充审批路由、状态注册、OCR 服务与管家规划器测试覆盖
577 lines
21 KiB
Python
577 lines
21 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
import uuid
|
|
from datetime import UTC, datetime, timedelta
|
|
from typing import Any
|
|
|
|
from sqlalchemy.orm import Session
|
|
|
|
from app.core.config import get_settings
|
|
from app.core.agent_enums import AgentName, AgentPermissionLevel, AgentRunStatus
|
|
from app.core.logging import get_logger
|
|
from app.models.agent_run import AgentRun, AgentToolCall, SemanticParseLog
|
|
from app.repositories.agent_run import AgentRunRepository
|
|
from app.schemas.agent_run import (
|
|
AgentRunRead,
|
|
AgentRunStatsRead,
|
|
AgentToolCallRead,
|
|
SemanticParseRead,
|
|
)
|
|
from app.services.agent_foundation import AgentFoundationService
|
|
from app.services.knowledge_ingest_log import enrich_knowledge_ingest_route_json
|
|
|
|
logger = get_logger("app.services.agent_runs")
|
|
|
|
KNOWLEDGE_SYNC_HEARTBEAT_TIMEOUT = timedelta(minutes=30)
|
|
KNOWLEDGE_SYNC_JOB_TYPES = {"knowledge_index_sync", "llm_wiki_sync"}
|
|
LIST_ROUTE_FIELDS = (
|
|
("route_job_type", "job_type"),
|
|
("route_task_type", "task_type"),
|
|
("route_task_code", "task_code"),
|
|
("route_task_name", "task_name"),
|
|
("route_task_title", "task_title"),
|
|
("route_asset_name", "asset_name"),
|
|
("route_selected_agent", "selected_agent"),
|
|
("route_phase", "phase"),
|
|
("route_stage", "stage"),
|
|
("route_report_type", "report_type"),
|
|
("route_snapshot_key", "snapshot_key"),
|
|
("route_folder", "folder"),
|
|
("route_heartbeat_at", "heartbeat_at"),
|
|
)
|
|
LIST_ONTOLOGY_FIELDS = (
|
|
("ontology_scenario", "scenario"),
|
|
("ontology_intent", "intent"),
|
|
("ontology_parse_strategy", "parse_strategy"),
|
|
)
|
|
LIST_PROGRESS_FIELDS = {
|
|
"percent",
|
|
"total_documents",
|
|
"completed_documents",
|
|
"failed_documents",
|
|
"skipped_documents",
|
|
"current_stage",
|
|
}
|
|
|
|
|
|
class AgentRunService:
|
|
def __init__(self, db: Session) -> None:
|
|
self.db = db
|
|
self.repository = AgentRunRepository(db)
|
|
|
|
def list_runs(
|
|
self,
|
|
*,
|
|
agent: str | None = None,
|
|
status: str | None = None,
|
|
source: str | None = None,
|
|
limit: int = 20,
|
|
) -> list[AgentRunRead]:
|
|
self._ensure_ready()
|
|
self._reconcile_stale_knowledge_index_runs()
|
|
rows = self.repository.list_light(
|
|
agent=agent,
|
|
status=status,
|
|
source=source,
|
|
limit=limit,
|
|
)
|
|
tool_calls_by_run_id = self._group_light_tool_calls(
|
|
self.repository.list_light_tool_calls([str(item["run_id"]) for item in rows])
|
|
)
|
|
return [
|
|
self._serialize_run_list_item(
|
|
item,
|
|
tool_calls_by_run_id.get(str(item["run_id"]), []),
|
|
)
|
|
for item in rows
|
|
]
|
|
|
|
def get_run(self, run_id: str) -> AgentRunRead | None:
|
|
self._ensure_ready()
|
|
self._reconcile_stale_knowledge_index_runs(target_run_id=run_id)
|
|
run = self.repository.get_by_run_id(run_id)
|
|
if run is None:
|
|
return None
|
|
return self._serialize_run(run, enrich_knowledge_ingest=True)
|
|
|
|
def summarize_runs(
|
|
self,
|
|
*,
|
|
agent: str | None = None,
|
|
status: str | None = None,
|
|
source: str | None = None,
|
|
limit: int = 200,
|
|
) -> AgentRunStatsRead:
|
|
self._ensure_ready()
|
|
self._reconcile_stale_knowledge_index_runs()
|
|
runs = self.repository.list(agent=agent, status=status, source=source, limit=limit)
|
|
agents: dict[str, int] = {}
|
|
statuses: dict[str, int] = {}
|
|
tool_statuses: dict[str, int] = {}
|
|
tool_call_count = 0
|
|
failed_tool_call_count = 0
|
|
llm_call_count = 0
|
|
failed_llm_call_count = 0
|
|
model_fallback_count = 0
|
|
model_guardrail_count = 0
|
|
recent_errors: list[dict[str, Any]] = []
|
|
|
|
for run in runs:
|
|
agents[run.agent] = agents.get(run.agent, 0) + 1
|
|
statuses[run.status] = statuses.get(run.status, 0) + 1
|
|
ontology_json = run.ontology_json or {}
|
|
if ontology_json.get("parse_strategy") == "rule_fallback":
|
|
model_fallback_count += 1
|
|
model_summary = ontology_json.get("model_invocation_summary")
|
|
if isinstance(model_summary, dict) and model_summary.get("model_guardrail_reason"):
|
|
model_guardrail_count += 1
|
|
if run.status == AgentRunStatus.FAILED.value and run.error_message:
|
|
recent_errors.append(
|
|
{
|
|
"run_id": run.run_id,
|
|
"agent": run.agent,
|
|
"stage": (run.route_json or {}).get("stage"),
|
|
"message": run.error_message,
|
|
}
|
|
)
|
|
|
|
for tool_call in run.tool_calls:
|
|
tool_call_count += 1
|
|
tool_statuses[tool_call.status] = tool_statuses.get(tool_call.status, 0) + 1
|
|
failed = tool_call.status == "failed"
|
|
if failed:
|
|
failed_tool_call_count += 1
|
|
if tool_call.tool_type == "llm":
|
|
llm_call_count += 1
|
|
if failed:
|
|
failed_llm_call_count += 1
|
|
if tool_call.error_message:
|
|
recent_errors.append(
|
|
{
|
|
"run_id": run.run_id,
|
|
"agent": run.agent,
|
|
"tool_name": tool_call.tool_name,
|
|
"tool_type": tool_call.tool_type,
|
|
"message": tool_call.error_message,
|
|
}
|
|
)
|
|
|
|
return AgentRunStatsRead(
|
|
window_limit=limit,
|
|
total_runs=len(runs),
|
|
succeeded_runs=statuses.get(AgentRunStatus.SUCCEEDED.value, 0),
|
|
blocked_runs=statuses.get(AgentRunStatus.BLOCKED.value, 0),
|
|
failed_runs=statuses.get(AgentRunStatus.FAILED.value, 0),
|
|
tool_call_count=tool_call_count,
|
|
failed_tool_call_count=failed_tool_call_count,
|
|
llm_call_count=llm_call_count,
|
|
failed_llm_call_count=failed_llm_call_count,
|
|
model_fallback_count=model_fallback_count,
|
|
model_guardrail_count=model_guardrail_count,
|
|
agents=agents,
|
|
statuses=statuses,
|
|
tool_statuses=tool_statuses,
|
|
recent_errors=recent_errors[:10],
|
|
)
|
|
|
|
def create_run(
|
|
self,
|
|
*,
|
|
agent: str,
|
|
source: str,
|
|
user_id: str | None = None,
|
|
task_id: str | None = None,
|
|
ontology_json: dict[str, Any] | None = None,
|
|
route_json: dict[str, Any] | None = None,
|
|
permission_level: str = AgentPermissionLevel.READ.value,
|
|
status: str = AgentRunStatus.RUNNING.value,
|
|
result_summary: str | None = None,
|
|
error_message: str | None = None,
|
|
started_at: datetime | None = None,
|
|
finished_at: datetime | None = None,
|
|
) -> AgentRunRead:
|
|
self._ensure_ready()
|
|
run = AgentRun(
|
|
run_id=f"run_{uuid.uuid4().hex[:16]}",
|
|
agent=agent,
|
|
source=source,
|
|
user_id=user_id,
|
|
task_id=task_id,
|
|
ontology_json=ontology_json or {},
|
|
route_json=route_json or {},
|
|
permission_level=permission_level,
|
|
status=status,
|
|
result_summary=result_summary,
|
|
error_message=error_message,
|
|
started_at=started_at or datetime.now(UTC),
|
|
finished_at=finished_at,
|
|
)
|
|
created = self.repository.create_run(run)
|
|
logger.info("Created agent run id=%s run_id=%s", created.id, created.run_id)
|
|
return self._serialize_run(created)
|
|
|
|
def update_run(
|
|
self,
|
|
run_id: str,
|
|
*,
|
|
agent: str | None = None,
|
|
ontology_json: dict[str, Any] | None = None,
|
|
route_json: dict[str, Any] | None = None,
|
|
permission_level: str | None = None,
|
|
status: str | None = None,
|
|
result_summary: str | None = None,
|
|
error_message: str | None = None,
|
|
finished_at: datetime | None = None,
|
|
) -> AgentRunRead:
|
|
self._ensure_ready()
|
|
run = self.repository.get_by_run_id(run_id)
|
|
if run is None:
|
|
raise LookupError("Run not found")
|
|
|
|
if agent is not None:
|
|
run.agent = agent
|
|
if ontology_json is not None:
|
|
run.ontology_json = ontology_json
|
|
if route_json is not None:
|
|
run.route_json = route_json
|
|
if permission_level is not None:
|
|
run.permission_level = permission_level
|
|
if status is not None:
|
|
run.status = status
|
|
if result_summary is not None:
|
|
run.result_summary = result_summary
|
|
if error_message is not None:
|
|
run.error_message = error_message
|
|
if finished_at is not None:
|
|
run.finished_at = finished_at
|
|
|
|
updated = self.repository.save_run(run)
|
|
logger.info("Updated agent run run_id=%s status=%s", updated.run_id, updated.status)
|
|
return self._serialize_run(updated)
|
|
|
|
def merge_route_json(
|
|
self,
|
|
run_id: str,
|
|
route_patch: dict[str, Any],
|
|
*,
|
|
status: str | None = None,
|
|
result_summary: str | None = None,
|
|
error_message: str | None = None,
|
|
finished_at: datetime | None = None,
|
|
) -> AgentRunRead:
|
|
self._ensure_ready()
|
|
run = self.repository.get_by_run_id(run_id)
|
|
if run is None:
|
|
raise LookupError("Run not found")
|
|
|
|
route_json = dict(run.route_json or {})
|
|
route_json.update(route_patch or {})
|
|
run.route_json = route_json
|
|
|
|
if status is not None:
|
|
run.status = status
|
|
if result_summary is not None:
|
|
run.result_summary = result_summary
|
|
if error_message is not None:
|
|
run.error_message = error_message
|
|
if finished_at is not None:
|
|
run.finished_at = finished_at
|
|
|
|
updated = self.repository.save_run(run)
|
|
logger.info("Merged route_json for agent run run_id=%s status=%s", updated.run_id, updated.status)
|
|
return self._serialize_run(updated)
|
|
|
|
def record_tool_call(
|
|
self,
|
|
*,
|
|
run_id: str,
|
|
tool_type: str,
|
|
tool_name: str,
|
|
request_json: dict[str, Any] | None = None,
|
|
response_json: dict[str, Any] | None = None,
|
|
status: str,
|
|
duration_ms: int = 0,
|
|
error_message: str | None = None,
|
|
) -> AgentToolCallRead:
|
|
self._ensure_ready()
|
|
tool_call = AgentToolCall(
|
|
run_id=run_id,
|
|
tool_type=tool_type,
|
|
tool_name=tool_name,
|
|
request_json=request_json or {},
|
|
response_json=response_json or {},
|
|
status=status,
|
|
duration_ms=duration_ms,
|
|
error_message=error_message,
|
|
)
|
|
created = self.repository.create_tool_call(tool_call)
|
|
logger.info("Recorded tool call run_id=%s tool=%s", run_id, tool_name)
|
|
return AgentToolCallRead.model_validate(created)
|
|
|
|
def update_tool_call(
|
|
self,
|
|
tool_call_id: str,
|
|
*,
|
|
request_json: dict[str, Any] | None = None,
|
|
response_json: dict[str, Any] | None = None,
|
|
status: str | None = None,
|
|
duration_ms: int | None = None,
|
|
error_message: str | None = None,
|
|
) -> AgentToolCallRead:
|
|
self._ensure_ready()
|
|
tool_call = self.repository.get_tool_call(tool_call_id)
|
|
if tool_call is None:
|
|
raise LookupError("Tool call not found")
|
|
|
|
if request_json is not None:
|
|
tool_call.request_json = request_json
|
|
if response_json is not None:
|
|
tool_call.response_json = response_json
|
|
if status is not None:
|
|
tool_call.status = status
|
|
if duration_ms is not None:
|
|
tool_call.duration_ms = duration_ms
|
|
tool_call.error_message = error_message
|
|
|
|
updated = self.repository.save_tool_call(tool_call)
|
|
logger.info("Updated tool call id=%s status=%s", updated.id, updated.status)
|
|
return AgentToolCallRead.model_validate(updated)
|
|
|
|
def record_semantic_parse(
|
|
self,
|
|
*,
|
|
run_id: str,
|
|
user_id: str | None,
|
|
raw_query: str,
|
|
scenario: str,
|
|
intent: str,
|
|
entities_json: list[Any] | None = None,
|
|
time_range_json: dict[str, Any] | None = None,
|
|
metrics_json: list[Any] | None = None,
|
|
constraints_json: list[Any] | None = None,
|
|
risk_flags_json: list[Any] | None = None,
|
|
permission_json: dict[str, Any] | None = None,
|
|
confidence: float = 0.0,
|
|
) -> SemanticParseRead:
|
|
self._ensure_ready()
|
|
semantic_parse = SemanticParseLog(
|
|
run_id=run_id,
|
|
user_id=user_id,
|
|
raw_query=raw_query,
|
|
scenario=scenario,
|
|
intent=intent,
|
|
entities_json=entities_json or [],
|
|
time_range_json=time_range_json or {},
|
|
metrics_json=metrics_json or [],
|
|
constraints_json=constraints_json or [],
|
|
risk_flags_json=risk_flags_json or [],
|
|
permission_json=permission_json or {},
|
|
confidence=confidence,
|
|
)
|
|
created = self.repository.create_semantic_parse(semantic_parse)
|
|
logger.info(
|
|
"Recorded semantic parse run_id=%s scenario=%s intent=%s", run_id, scenario, intent
|
|
)
|
|
return SemanticParseRead.model_validate(created)
|
|
|
|
def _ensure_ready(self) -> None:
|
|
AgentFoundationService(self.db).ensure_foundation_ready()
|
|
|
|
def _reconcile_stale_knowledge_index_runs(self, *, target_run_id: str | None = None) -> None:
|
|
runs = self.repository.list(
|
|
agent=AgentName.HERMES.value,
|
|
status=AgentRunStatus.RUNNING.value,
|
|
limit=200,
|
|
)
|
|
now = datetime.now(UTC)
|
|
|
|
for run in runs:
|
|
if target_run_id is not None and run.run_id != target_run_id:
|
|
continue
|
|
|
|
route_json = dict(run.route_json or {})
|
|
if str(route_json.get("job_type") or "").strip() not in KNOWLEDGE_SYNC_JOB_TYPES:
|
|
continue
|
|
|
|
heartbeat_at = self._parse_heartbeat_time(
|
|
str(route_json.get("heartbeat_at") or "").strip()
|
|
)
|
|
last_seen_at = heartbeat_at or run.started_at
|
|
if last_seen_at.tzinfo is None:
|
|
last_seen_at = last_seen_at.replace(tzinfo=UTC)
|
|
|
|
if now - last_seen_at <= KNOWLEDGE_SYNC_HEARTBEAT_TIMEOUT:
|
|
continue
|
|
|
|
stale_document_ids = [
|
|
str(document_id).strip()
|
|
for document_id in list(route_json.get("requested_document_ids") or [])
|
|
if str(document_id).strip()
|
|
]
|
|
if stale_document_ids:
|
|
from app.services.knowledge import (
|
|
KNOWLEDGE_INGEST_STATUS_FAILED,
|
|
KnowledgeService,
|
|
)
|
|
|
|
KnowledgeService(db=self.db).set_document_ingest_statuses(
|
|
stale_document_ids,
|
|
KNOWLEDGE_INGEST_STATUS_FAILED,
|
|
agent_run_id=run.run_id,
|
|
)
|
|
|
|
route_json.update(
|
|
{
|
|
"phase": "stale_failed",
|
|
"heartbeat_at": now.isoformat(),
|
|
}
|
|
)
|
|
run.route_json = route_json
|
|
run.status = AgentRunStatus.FAILED.value
|
|
run.result_summary = "知识归纳任务长时间无心跳,系统已自动标记失败。"
|
|
run.error_message = "Knowledge index heartbeat timed out."
|
|
run.finished_at = now
|
|
self.repository.save_run(run)
|
|
logger.warning("Marked stale knowledge index run as failed run_id=%s", run.run_id)
|
|
|
|
@staticmethod
|
|
def _parse_heartbeat_time(raw_value: str) -> datetime | None:
|
|
normalized = str(raw_value or "").strip()
|
|
if not normalized:
|
|
return None
|
|
try:
|
|
return datetime.fromisoformat(normalized)
|
|
except ValueError:
|
|
return None
|
|
|
|
def _serialize_run(
|
|
self,
|
|
run: AgentRun,
|
|
*,
|
|
enrich_knowledge_ingest: bool = False,
|
|
) -> AgentRunRead:
|
|
semantic_parse = run.semantic_parse_logs[0] if run.semantic_parse_logs else None
|
|
route_json = run.route_json
|
|
if enrich_knowledge_ingest:
|
|
route_json = enrich_knowledge_ingest_route_json(
|
|
dict(run.route_json or {}),
|
|
storage_root=get_settings().resolved_storage_root_dir,
|
|
)
|
|
return AgentRunRead(
|
|
id=run.id,
|
|
run_id=run.run_id,
|
|
agent=run.agent,
|
|
source=run.source,
|
|
user_id=run.user_id,
|
|
task_id=run.task_id,
|
|
ontology_json=run.ontology_json,
|
|
route_json=route_json,
|
|
permission_level=run.permission_level,
|
|
status=run.status,
|
|
result_summary=run.result_summary,
|
|
error_message=run.error_message,
|
|
started_at=run.started_at,
|
|
finished_at=run.finished_at,
|
|
tool_calls=[AgentToolCallRead.model_validate(item) for item in run.tool_calls],
|
|
semantic_parse=SemanticParseRead.model_validate(semantic_parse)
|
|
if semantic_parse
|
|
else None,
|
|
)
|
|
|
|
def _serialize_run_list_item(
|
|
self,
|
|
row: dict[str, Any],
|
|
tool_calls: list[dict[str, Any]],
|
|
) -> AgentRunRead:
|
|
return AgentRunRead(
|
|
id=str(row["id"]),
|
|
run_id=str(row["run_id"]),
|
|
agent=str(row["agent"]),
|
|
source=str(row["source"]),
|
|
user_id=row.get("user_id"),
|
|
task_id=row.get("task_id"),
|
|
ontology_json=self._build_list_ontology_json(row),
|
|
route_json=self._build_list_route_json(row),
|
|
permission_level=str(row["permission_level"]),
|
|
status=str(row["status"]),
|
|
result_summary=row.get("result_summary"),
|
|
error_message=row.get("error_message"),
|
|
started_at=row["started_at"],
|
|
finished_at=row.get("finished_at"),
|
|
tool_calls=[self._serialize_light_tool_call(item) for item in tool_calls],
|
|
semantic_parse=None,
|
|
)
|
|
|
|
def _build_list_route_json(self, row: dict[str, Any]) -> dict[str, Any]:
|
|
payload: dict[str, Any] = {}
|
|
for source_key, target_key in LIST_ROUTE_FIELDS:
|
|
self._set_if_present(payload, target_key, row.get(source_key))
|
|
|
|
progress = self._coerce_json_object(row.get("route_progress"))
|
|
compact_progress = {
|
|
key: value
|
|
for key, value in progress.items()
|
|
if key in LIST_PROGRESS_FIELDS and self._is_scalar_json_value(value)
|
|
}
|
|
if compact_progress:
|
|
payload["progress"] = compact_progress
|
|
return payload
|
|
|
|
def _build_list_ontology_json(self, row: dict[str, Any]) -> dict[str, Any]:
|
|
payload: dict[str, Any] = {}
|
|
for source_key, target_key in LIST_ONTOLOGY_FIELDS:
|
|
self._set_if_present(payload, target_key, row.get(source_key))
|
|
return payload
|
|
|
|
def _serialize_light_tool_call(self, row: dict[str, Any]) -> AgentToolCallRead:
|
|
return AgentToolCallRead(
|
|
id=str(row["id"]),
|
|
run_id=str(row["run_id"]),
|
|
tool_type=str(row["tool_type"]),
|
|
tool_name=str(row["tool_name"]),
|
|
request_json={},
|
|
response_json={},
|
|
status=str(row["status"]),
|
|
duration_ms=int(row.get("duration_ms") or 0),
|
|
error_message=row.get("error_message"),
|
|
created_at=row["created_at"],
|
|
)
|
|
|
|
@staticmethod
|
|
def _group_light_tool_calls(
|
|
tool_calls: list[dict[str, Any]],
|
|
) -> dict[str, list[dict[str, Any]]]:
|
|
grouped: dict[str, list[dict[str, Any]]] = {}
|
|
for tool_call in tool_calls:
|
|
grouped.setdefault(str(tool_call.get("run_id") or ""), []).append(tool_call)
|
|
return grouped
|
|
|
|
@staticmethod
|
|
def _coerce_json_object(value: Any) -> dict[str, Any]:
|
|
if isinstance(value, dict):
|
|
return value
|
|
if isinstance(value, str):
|
|
normalized = value.strip()
|
|
if normalized.startswith("{") and normalized.endswith("}"):
|
|
try:
|
|
loaded = json.loads(normalized)
|
|
except json.JSONDecodeError:
|
|
return {}
|
|
return loaded if isinstance(loaded, dict) else {}
|
|
return {}
|
|
|
|
@staticmethod
|
|
def _set_if_present(payload: dict[str, Any], key: str, value: Any) -> None:
|
|
if value is None:
|
|
return
|
|
if isinstance(value, str) and not value.strip():
|
|
return
|
|
if not AgentRunService._is_scalar_json_value(value):
|
|
return
|
|
payload[key] = value
|
|
|
|
@staticmethod
|
|
def _is_scalar_json_value(value: Any) -> bool:
|
|
return value is None or isinstance(value, str | int | float | bool)
|