feat: add Jarvis agent verification foundation

Add Day 1 agent runtime foundations with task and event schemas, verifier support, capability metadata, graph event tracing, and regression coverage while preserving the direct execution path.
This commit is contained in:
2026-04-03 15:18:08 +08:00
parent 4972b4e6b1
commit aa0ef0fbea
14 changed files with 867 additions and 17 deletions

View File

@@ -6,6 +6,7 @@ import asyncio
import json
import logging
import re
from uuid import uuid4
from typing import Any, Literal, cast
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage, ToolMessage
@@ -19,10 +20,13 @@ from app.agents.prompts import (
MASTER_SYSTEM_PROMPT,
SCHEDULE_PLANNER_SYSTEM_PROMPT,
)
from app.agents.registry import load_builtin_registry_indexes
from app.agents.schemas.event import AgentEvent
from app.agents.skill_registry import build_skill_context
from app.agents.state import AgentRole, AgentState
from app.agents.tools import SUB_COMMANDER_TOOLSETS
from app.agents.tools.time_reasoning import normalize_tool_time_arguments
from app.agents.verifier import apply_verification_verdict, verify_task_result
from app.services.llm_service import (
create_llm_from_config,
default_provider_capabilities,
@@ -632,6 +636,76 @@ def _conversation_history_messages(state: AgentState) -> list[BaseMessage]:
return [message for message in history if getattr(message, "type", "") != "system"]
def _append_event_trace(
state: AgentState,
event_type: str,
*,
payload: dict[str, Any] | None = None,
severity: str = "info",
task_id: str | None = None,
) -> None:
event = AgentEvent(
event_id=f"evt-{uuid4()}",
event_type=cast(Any, event_type),
conversation_id=str(state.get("conversation_id") or "") or None,
agent_id=_role_value(state.get("current_agent")),
sub_commander_id=state.get("current_sub_commander"),
task_id=task_id,
payload=payload or {},
severity=cast(Any, severity),
)
state["event_trace"] = [
*(state.get("event_trace") or []),
event.model_dump(mode="json"),
]
def _capability_manifest_for_tool(tool_name: str):
indexes = load_builtin_registry_indexes()
capability_id = indexes.capability_id_by_tool_name.get(tool_name)
if capability_id is None:
return None
return indexes.capability_by_id.get(capability_id)
def _build_verifier_hints(state: AgentState, tool_name: str, result: Any) -> dict[str, Any]:
capability = _capability_manifest_for_tool(tool_name)
permission_class = getattr(capability, "permission_class", None)
side_effect_scope = getattr(capability, "side_effect_scope", None)
return {
"tool_name": tool_name,
"permission_class": getattr(permission_class, "value", None),
"side_effect_scope": getattr(side_effect_scope, "value", None),
"requires_confirmation": bool(getattr(capability, "requires_confirmation", False)),
"supports_retry": bool(getattr(capability, "supports_retry", False)),
"safe_for_parallel_use": bool(getattr(capability, "safe_for_parallel_use", False)),
"result_preview": _stringify_message_content(result)[:200],
}
def _update_task_result_summary(state: AgentState, tool_summaries: list[dict[str, Any]]) -> None:
if not tool_summaries:
return
previous_summary = state.get("task_result_summary") or {}
previous_tools = previous_summary.get("tools") or []
merged_tools = [*previous_tools, *tool_summaries]
summary = {
"tool_count": len(merged_tools),
"tools": merged_tools,
"created_count": sum(int(item.get("created_count") or 0) for item in merged_tools),
"created_entity_types": [
entity_type
for item in merged_tools
for entity_type in item.get("created_entity_types") or []
if entity_type
],
"stop_reason": state.get("stop_reason"),
}
state["task_result_summary"] = summary
state["action_results"] = [*(state.get("action_results") or []), summary]
def _record_sub_commander(state: AgentState, role: AgentRole, sub_commander: str, user_query: str) -> None:
state["current_agent"] = role.value
state["current_sub_commander"] = sub_commander
@@ -889,6 +963,8 @@ async def _execute_tool_calls(
result_lines: list[str] = []
created_entities: list[dict[str, str]] = []
tool_messages: list[ToolMessage] = []
verifier_hints_by_tool: list[dict[str, Any]] = []
tool_summaries: list[dict[str, Any]] = []
for call in tool_calls:
tool_name = call["name"]
@@ -897,6 +973,13 @@ async def _execute_tool_calls(
if tool is None:
raise ValueError(f"Tool not found: {tool_name}")
_append_event_trace(
state,
"agent.tool.start",
payload={"tool_name": tool_name, "args": normalized_args},
task_id=str(call.get("id") or "") or None,
)
try:
if hasattr(tool, "ainvoke"):
result = await tool.ainvoke(normalized_args)
@@ -905,6 +988,13 @@ async def _execute_tool_calls(
except Exception as exc:
logger.exception("Tool execution failed: %s args=%s", tool_name, normalized_args)
result = f"工具执行失败: {exc}"
_append_event_trace(
state,
"agent.error",
payload={"tool_name": tool_name, "args": normalized_args, "error": str(exc)},
severity="error",
task_id=str(call.get("id") or "") or None,
)
normalized_call = {
"id": call.get("id"),
@@ -914,6 +1004,27 @@ async def _execute_tool_calls(
}
normalized_calls.append(normalized_call)
result_lines.append(f"[{tool_name}] {result}")
verifier_hints = _build_verifier_hints(state, tool_name, result)
verifier_hints_by_tool.append(verifier_hints)
tool_outcome = {
"tool_name": tool_name,
"args": normalized_args,
"result_preview": _stringify_message_content(result)[:200],
"verifier_hints": verifier_hints,
}
state["tool_outcomes"] = [*(state.get("tool_outcomes") or []), tool_outcome]
_append_event_trace(
state,
"agent.tool.result",
payload={
"tool_name": tool_name,
"args": normalized_args,
"result_preview": _stringify_message_content(result)[:200],
"verification": verifier_hints,
},
severity="error" if _tool_result_indicates_failure(result) else "info",
task_id=str(call.get("id") or "") or None,
)
tool_messages.append(
ToolMessage(
content=_stringify_message_content(result),
@@ -922,9 +1033,21 @@ async def _execute_tool_calls(
)
)
entity = _classify_created_entity(tool_name)
call_created_entities: list[dict[str, str]] = []
if entity and not _tool_result_indicates_failure(result):
created_entities.append(entity)
call_created_entities.append(entity)
tool_summaries.append(
{
"tool_name": tool_name,
"result_preview": _stringify_message_content(result)[:200],
"created_entity_types": [entity.get("type") for entity in call_created_entities if entity.get("type")],
"created_count": len(call_created_entities),
}
)
state["verifier_hints"] = {"tools": verifier_hints_by_tool}
_update_task_result_summary(state, tool_summaries)
return normalized_calls, "\n".join(result_lines), created_entities, tool_messages
@@ -1127,6 +1250,43 @@ async def _run_sub_commander(
if summary_target:
state[_summary_state_key(summary_target)] = state.get("final_response")
task_result_summary = state.get("task_result_summary")
tool_outcomes = list(state.get("tool_outcomes") or [])
has_tool_failure = any(
_tool_result_indicates_failure(outcome.get("result_preview"))
for outcome in tool_outcomes
)
verifier_input = {
"summary": state.get("final_response") or (task_result_summary or {}).get("tools"),
"evidence": tool_outcomes,
"success": bool(tool_outcomes or state.get("final_response")) and not has_tool_failure,
}
_append_event_trace(
state,
"agent.verify.started",
payload={
"summary_present": bool(verifier_input["summary"]),
"evidence_count": len(verifier_input["evidence"]),
},
)
verdict = verify_task_result(
summary=state.get("final_response"),
evidence=tool_outcomes,
result=verifier_input,
)
updated_state = apply_verification_verdict(state, verdict)
state.update(updated_state)
_append_event_trace(
state,
"agent.verify.completed",
payload={
"status": verdict.status,
"summary": verdict.summary,
"evidence_count": len(verdict.evidence),
},
severity="error" if verdict.status == "failed" else "info",
)
final_response_text = state.get("final_response")
if not state.get("clarification_needed") and final_response_text:
_clear_clarification_context(state)
@@ -1355,6 +1515,7 @@ def get_agent_graph(callbacks: list | None = None):
__all__ = [
"_build_verifier_hints",
"_choose_sub_commander",
"_parse_json_action",
"_route_agent_from_user_query",