feat: add Jarvis agent verification foundation
Add Day 1 agent runtime foundations with task and event schemas, verifier support, capability metadata, graph event tracing, and regression coverage while preserving the direct execution path.
This commit is contained in:
@@ -6,6 +6,7 @@ import asyncio
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from uuid import uuid4
|
||||
from typing import Any, Literal, cast
|
||||
|
||||
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage, ToolMessage
|
||||
@@ -19,10 +20,13 @@ from app.agents.prompts import (
|
||||
MASTER_SYSTEM_PROMPT,
|
||||
SCHEDULE_PLANNER_SYSTEM_PROMPT,
|
||||
)
|
||||
from app.agents.registry import load_builtin_registry_indexes
|
||||
from app.agents.schemas.event import AgentEvent
|
||||
from app.agents.skill_registry import build_skill_context
|
||||
from app.agents.state import AgentRole, AgentState
|
||||
from app.agents.tools import SUB_COMMANDER_TOOLSETS
|
||||
from app.agents.tools.time_reasoning import normalize_tool_time_arguments
|
||||
from app.agents.verifier import apply_verification_verdict, verify_task_result
|
||||
from app.services.llm_service import (
|
||||
create_llm_from_config,
|
||||
default_provider_capabilities,
|
||||
@@ -632,6 +636,76 @@ def _conversation_history_messages(state: AgentState) -> list[BaseMessage]:
|
||||
return [message for message in history if getattr(message, "type", "") != "system"]
|
||||
|
||||
|
||||
def _append_event_trace(
|
||||
state: AgentState,
|
||||
event_type: str,
|
||||
*,
|
||||
payload: dict[str, Any] | None = None,
|
||||
severity: str = "info",
|
||||
task_id: str | None = None,
|
||||
) -> None:
|
||||
event = AgentEvent(
|
||||
event_id=f"evt-{uuid4()}",
|
||||
event_type=cast(Any, event_type),
|
||||
conversation_id=str(state.get("conversation_id") or "") or None,
|
||||
agent_id=_role_value(state.get("current_agent")),
|
||||
sub_commander_id=state.get("current_sub_commander"),
|
||||
task_id=task_id,
|
||||
payload=payload or {},
|
||||
severity=cast(Any, severity),
|
||||
)
|
||||
state["event_trace"] = [
|
||||
*(state.get("event_trace") or []),
|
||||
event.model_dump(mode="json"),
|
||||
]
|
||||
|
||||
|
||||
def _capability_manifest_for_tool(tool_name: str):
|
||||
indexes = load_builtin_registry_indexes()
|
||||
capability_id = indexes.capability_id_by_tool_name.get(tool_name)
|
||||
if capability_id is None:
|
||||
return None
|
||||
return indexes.capability_by_id.get(capability_id)
|
||||
|
||||
|
||||
def _build_verifier_hints(state: AgentState, tool_name: str, result: Any) -> dict[str, Any]:
|
||||
capability = _capability_manifest_for_tool(tool_name)
|
||||
permission_class = getattr(capability, "permission_class", None)
|
||||
side_effect_scope = getattr(capability, "side_effect_scope", None)
|
||||
return {
|
||||
"tool_name": tool_name,
|
||||
"permission_class": getattr(permission_class, "value", None),
|
||||
"side_effect_scope": getattr(side_effect_scope, "value", None),
|
||||
"requires_confirmation": bool(getattr(capability, "requires_confirmation", False)),
|
||||
"supports_retry": bool(getattr(capability, "supports_retry", False)),
|
||||
"safe_for_parallel_use": bool(getattr(capability, "safe_for_parallel_use", False)),
|
||||
"result_preview": _stringify_message_content(result)[:200],
|
||||
}
|
||||
|
||||
|
||||
def _update_task_result_summary(state: AgentState, tool_summaries: list[dict[str, Any]]) -> None:
|
||||
if not tool_summaries:
|
||||
return
|
||||
|
||||
previous_summary = state.get("task_result_summary") or {}
|
||||
previous_tools = previous_summary.get("tools") or []
|
||||
merged_tools = [*previous_tools, *tool_summaries]
|
||||
summary = {
|
||||
"tool_count": len(merged_tools),
|
||||
"tools": merged_tools,
|
||||
"created_count": sum(int(item.get("created_count") or 0) for item in merged_tools),
|
||||
"created_entity_types": [
|
||||
entity_type
|
||||
for item in merged_tools
|
||||
for entity_type in item.get("created_entity_types") or []
|
||||
if entity_type
|
||||
],
|
||||
"stop_reason": state.get("stop_reason"),
|
||||
}
|
||||
state["task_result_summary"] = summary
|
||||
state["action_results"] = [*(state.get("action_results") or []), summary]
|
||||
|
||||
|
||||
def _record_sub_commander(state: AgentState, role: AgentRole, sub_commander: str, user_query: str) -> None:
|
||||
state["current_agent"] = role.value
|
||||
state["current_sub_commander"] = sub_commander
|
||||
@@ -889,6 +963,8 @@ async def _execute_tool_calls(
|
||||
result_lines: list[str] = []
|
||||
created_entities: list[dict[str, str]] = []
|
||||
tool_messages: list[ToolMessage] = []
|
||||
verifier_hints_by_tool: list[dict[str, Any]] = []
|
||||
tool_summaries: list[dict[str, Any]] = []
|
||||
|
||||
for call in tool_calls:
|
||||
tool_name = call["name"]
|
||||
@@ -897,6 +973,13 @@ async def _execute_tool_calls(
|
||||
if tool is None:
|
||||
raise ValueError(f"Tool not found: {tool_name}")
|
||||
|
||||
_append_event_trace(
|
||||
state,
|
||||
"agent.tool.start",
|
||||
payload={"tool_name": tool_name, "args": normalized_args},
|
||||
task_id=str(call.get("id") or "") or None,
|
||||
)
|
||||
|
||||
try:
|
||||
if hasattr(tool, "ainvoke"):
|
||||
result = await tool.ainvoke(normalized_args)
|
||||
@@ -905,6 +988,13 @@ async def _execute_tool_calls(
|
||||
except Exception as exc:
|
||||
logger.exception("Tool execution failed: %s args=%s", tool_name, normalized_args)
|
||||
result = f"工具执行失败: {exc}"
|
||||
_append_event_trace(
|
||||
state,
|
||||
"agent.error",
|
||||
payload={"tool_name": tool_name, "args": normalized_args, "error": str(exc)},
|
||||
severity="error",
|
||||
task_id=str(call.get("id") or "") or None,
|
||||
)
|
||||
|
||||
normalized_call = {
|
||||
"id": call.get("id"),
|
||||
@@ -914,6 +1004,27 @@ async def _execute_tool_calls(
|
||||
}
|
||||
normalized_calls.append(normalized_call)
|
||||
result_lines.append(f"[{tool_name}] {result}")
|
||||
verifier_hints = _build_verifier_hints(state, tool_name, result)
|
||||
verifier_hints_by_tool.append(verifier_hints)
|
||||
tool_outcome = {
|
||||
"tool_name": tool_name,
|
||||
"args": normalized_args,
|
||||
"result_preview": _stringify_message_content(result)[:200],
|
||||
"verifier_hints": verifier_hints,
|
||||
}
|
||||
state["tool_outcomes"] = [*(state.get("tool_outcomes") or []), tool_outcome]
|
||||
_append_event_trace(
|
||||
state,
|
||||
"agent.tool.result",
|
||||
payload={
|
||||
"tool_name": tool_name,
|
||||
"args": normalized_args,
|
||||
"result_preview": _stringify_message_content(result)[:200],
|
||||
"verification": verifier_hints,
|
||||
},
|
||||
severity="error" if _tool_result_indicates_failure(result) else "info",
|
||||
task_id=str(call.get("id") or "") or None,
|
||||
)
|
||||
tool_messages.append(
|
||||
ToolMessage(
|
||||
content=_stringify_message_content(result),
|
||||
@@ -922,9 +1033,21 @@ async def _execute_tool_calls(
|
||||
)
|
||||
)
|
||||
entity = _classify_created_entity(tool_name)
|
||||
call_created_entities: list[dict[str, str]] = []
|
||||
if entity and not _tool_result_indicates_failure(result):
|
||||
created_entities.append(entity)
|
||||
call_created_entities.append(entity)
|
||||
tool_summaries.append(
|
||||
{
|
||||
"tool_name": tool_name,
|
||||
"result_preview": _stringify_message_content(result)[:200],
|
||||
"created_entity_types": [entity.get("type") for entity in call_created_entities if entity.get("type")],
|
||||
"created_count": len(call_created_entities),
|
||||
}
|
||||
)
|
||||
|
||||
state["verifier_hints"] = {"tools": verifier_hints_by_tool}
|
||||
_update_task_result_summary(state, tool_summaries)
|
||||
return normalized_calls, "\n".join(result_lines), created_entities, tool_messages
|
||||
|
||||
|
||||
@@ -1127,6 +1250,43 @@ async def _run_sub_commander(
|
||||
if summary_target:
|
||||
state[_summary_state_key(summary_target)] = state.get("final_response")
|
||||
|
||||
task_result_summary = state.get("task_result_summary")
|
||||
tool_outcomes = list(state.get("tool_outcomes") or [])
|
||||
has_tool_failure = any(
|
||||
_tool_result_indicates_failure(outcome.get("result_preview"))
|
||||
for outcome in tool_outcomes
|
||||
)
|
||||
verifier_input = {
|
||||
"summary": state.get("final_response") or (task_result_summary or {}).get("tools"),
|
||||
"evidence": tool_outcomes,
|
||||
"success": bool(tool_outcomes or state.get("final_response")) and not has_tool_failure,
|
||||
}
|
||||
_append_event_trace(
|
||||
state,
|
||||
"agent.verify.started",
|
||||
payload={
|
||||
"summary_present": bool(verifier_input["summary"]),
|
||||
"evidence_count": len(verifier_input["evidence"]),
|
||||
},
|
||||
)
|
||||
verdict = verify_task_result(
|
||||
summary=state.get("final_response"),
|
||||
evidence=tool_outcomes,
|
||||
result=verifier_input,
|
||||
)
|
||||
updated_state = apply_verification_verdict(state, verdict)
|
||||
state.update(updated_state)
|
||||
_append_event_trace(
|
||||
state,
|
||||
"agent.verify.completed",
|
||||
payload={
|
||||
"status": verdict.status,
|
||||
"summary": verdict.summary,
|
||||
"evidence_count": len(verdict.evidence),
|
||||
},
|
||||
severity="error" if verdict.status == "failed" else "info",
|
||||
)
|
||||
|
||||
final_response_text = state.get("final_response")
|
||||
if not state.get("clarification_needed") and final_response_text:
|
||||
_clear_clarification_context(state)
|
||||
@@ -1355,6 +1515,7 @@ def get_agent_graph(callbacks: list | None = None):
|
||||
|
||||
|
||||
__all__ = [
|
||||
"_build_verifier_hints",
|
||||
"_choose_sub_commander",
|
||||
"_parse_json_action",
|
||||
"_route_agent_from_user_query",
|
||||
|
||||
Reference in New Issue
Block a user