feat: add Jarvis agent verification foundation

Add Day 1 agent runtime foundations with task and event schemas, verifier support, capability metadata, graph event tracing, and regression coverage while preserving the direct execution path.
This commit is contained in:
2026-04-03 15:18:08 +08:00
parent 4972b4e6b1
commit aa0ef0fbea
14 changed files with 867 additions and 17 deletions

View File

@@ -6,6 +6,7 @@ import asyncio
import json
import logging
import re
from uuid import uuid4
from typing import Any, Literal, cast
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage, ToolMessage
@@ -19,10 +20,13 @@ from app.agents.prompts import (
MASTER_SYSTEM_PROMPT,
SCHEDULE_PLANNER_SYSTEM_PROMPT,
)
from app.agents.registry import load_builtin_registry_indexes
from app.agents.schemas.event import AgentEvent
from app.agents.skill_registry import build_skill_context
from app.agents.state import AgentRole, AgentState
from app.agents.tools import SUB_COMMANDER_TOOLSETS
from app.agents.tools.time_reasoning import normalize_tool_time_arguments
from app.agents.verifier import apply_verification_verdict, verify_task_result
from app.services.llm_service import (
create_llm_from_config,
default_provider_capabilities,
@@ -632,6 +636,76 @@ def _conversation_history_messages(state: AgentState) -> list[BaseMessage]:
return [message for message in history if getattr(message, "type", "") != "system"]
def _append_event_trace(
state: AgentState,
event_type: str,
*,
payload: dict[str, Any] | None = None,
severity: str = "info",
task_id: str | None = None,
) -> None:
event = AgentEvent(
event_id=f"evt-{uuid4()}",
event_type=cast(Any, event_type),
conversation_id=str(state.get("conversation_id") or "") or None,
agent_id=_role_value(state.get("current_agent")),
sub_commander_id=state.get("current_sub_commander"),
task_id=task_id,
payload=payload or {},
severity=cast(Any, severity),
)
state["event_trace"] = [
*(state.get("event_trace") or []),
event.model_dump(mode="json"),
]
def _capability_manifest_for_tool(tool_name: str):
indexes = load_builtin_registry_indexes()
capability_id = indexes.capability_id_by_tool_name.get(tool_name)
if capability_id is None:
return None
return indexes.capability_by_id.get(capability_id)
def _build_verifier_hints(state: AgentState, tool_name: str, result: Any) -> dict[str, Any]:
capability = _capability_manifest_for_tool(tool_name)
permission_class = getattr(capability, "permission_class", None)
side_effect_scope = getattr(capability, "side_effect_scope", None)
return {
"tool_name": tool_name,
"permission_class": getattr(permission_class, "value", None),
"side_effect_scope": getattr(side_effect_scope, "value", None),
"requires_confirmation": bool(getattr(capability, "requires_confirmation", False)),
"supports_retry": bool(getattr(capability, "supports_retry", False)),
"safe_for_parallel_use": bool(getattr(capability, "safe_for_parallel_use", False)),
"result_preview": _stringify_message_content(result)[:200],
}
def _update_task_result_summary(state: AgentState, tool_summaries: list[dict[str, Any]]) -> None:
if not tool_summaries:
return
previous_summary = state.get("task_result_summary") or {}
previous_tools = previous_summary.get("tools") or []
merged_tools = [*previous_tools, *tool_summaries]
summary = {
"tool_count": len(merged_tools),
"tools": merged_tools,
"created_count": sum(int(item.get("created_count") or 0) for item in merged_tools),
"created_entity_types": [
entity_type
for item in merged_tools
for entity_type in item.get("created_entity_types") or []
if entity_type
],
"stop_reason": state.get("stop_reason"),
}
state["task_result_summary"] = summary
state["action_results"] = [*(state.get("action_results") or []), summary]
def _record_sub_commander(state: AgentState, role: AgentRole, sub_commander: str, user_query: str) -> None:
state["current_agent"] = role.value
state["current_sub_commander"] = sub_commander
@@ -889,6 +963,8 @@ async def _execute_tool_calls(
result_lines: list[str] = []
created_entities: list[dict[str, str]] = []
tool_messages: list[ToolMessage] = []
verifier_hints_by_tool: list[dict[str, Any]] = []
tool_summaries: list[dict[str, Any]] = []
for call in tool_calls:
tool_name = call["name"]
@@ -897,6 +973,13 @@ async def _execute_tool_calls(
if tool is None:
raise ValueError(f"Tool not found: {tool_name}")
_append_event_trace(
state,
"agent.tool.start",
payload={"tool_name": tool_name, "args": normalized_args},
task_id=str(call.get("id") or "") or None,
)
try:
if hasattr(tool, "ainvoke"):
result = await tool.ainvoke(normalized_args)
@@ -905,6 +988,13 @@ async def _execute_tool_calls(
except Exception as exc:
logger.exception("Tool execution failed: %s args=%s", tool_name, normalized_args)
result = f"工具执行失败: {exc}"
_append_event_trace(
state,
"agent.error",
payload={"tool_name": tool_name, "args": normalized_args, "error": str(exc)},
severity="error",
task_id=str(call.get("id") or "") or None,
)
normalized_call = {
"id": call.get("id"),
@@ -914,6 +1004,27 @@ async def _execute_tool_calls(
}
normalized_calls.append(normalized_call)
result_lines.append(f"[{tool_name}] {result}")
verifier_hints = _build_verifier_hints(state, tool_name, result)
verifier_hints_by_tool.append(verifier_hints)
tool_outcome = {
"tool_name": tool_name,
"args": normalized_args,
"result_preview": _stringify_message_content(result)[:200],
"verifier_hints": verifier_hints,
}
state["tool_outcomes"] = [*(state.get("tool_outcomes") or []), tool_outcome]
_append_event_trace(
state,
"agent.tool.result",
payload={
"tool_name": tool_name,
"args": normalized_args,
"result_preview": _stringify_message_content(result)[:200],
"verification": verifier_hints,
},
severity="error" if _tool_result_indicates_failure(result) else "info",
task_id=str(call.get("id") or "") or None,
)
tool_messages.append(
ToolMessage(
content=_stringify_message_content(result),
@@ -922,9 +1033,21 @@ async def _execute_tool_calls(
)
)
entity = _classify_created_entity(tool_name)
call_created_entities: list[dict[str, str]] = []
if entity and not _tool_result_indicates_failure(result):
created_entities.append(entity)
call_created_entities.append(entity)
tool_summaries.append(
{
"tool_name": tool_name,
"result_preview": _stringify_message_content(result)[:200],
"created_entity_types": [entity.get("type") for entity in call_created_entities if entity.get("type")],
"created_count": len(call_created_entities),
}
)
state["verifier_hints"] = {"tools": verifier_hints_by_tool}
_update_task_result_summary(state, tool_summaries)
return normalized_calls, "\n".join(result_lines), created_entities, tool_messages
@@ -1127,6 +1250,43 @@ async def _run_sub_commander(
if summary_target:
state[_summary_state_key(summary_target)] = state.get("final_response")
task_result_summary = state.get("task_result_summary")
tool_outcomes = list(state.get("tool_outcomes") or [])
has_tool_failure = any(
_tool_result_indicates_failure(outcome.get("result_preview"))
for outcome in tool_outcomes
)
verifier_input = {
"summary": state.get("final_response") or (task_result_summary or {}).get("tools"),
"evidence": tool_outcomes,
"success": bool(tool_outcomes or state.get("final_response")) and not has_tool_failure,
}
_append_event_trace(
state,
"agent.verify.started",
payload={
"summary_present": bool(verifier_input["summary"]),
"evidence_count": len(verifier_input["evidence"]),
},
)
verdict = verify_task_result(
summary=state.get("final_response"),
evidence=tool_outcomes,
result=verifier_input,
)
updated_state = apply_verification_verdict(state, verdict)
state.update(updated_state)
_append_event_trace(
state,
"agent.verify.completed",
payload={
"status": verdict.status,
"summary": verdict.summary,
"evidence_count": len(verdict.evidence),
},
severity="error" if verdict.status == "failed" else "info",
)
final_response_text = state.get("final_response")
if not state.get("clarification_needed") and final_response_text:
_clear_clarification_context(state)
@@ -1355,6 +1515,7 @@ def get_agent_graph(callbacks: list | None = None):
__all__ = [
"_build_verifier_hints",
"_choose_sub_commander",
"_parse_json_action",
"_route_agent_from_user_query",

View File

@@ -324,6 +324,19 @@ ANALYST_INSIGHTS_PROMPT = f"""{JARVIS_PERSONA_PROMPT}
"""
VERIFIER_PROMPT = f"""{JARVIS_PERSONA_PROMPT}
你是 Jarvis 的验证官,负责对执行结果做最小但明确的核验。
## 你的职责:
- 只输出 passed、failed、skipped 三种验证结论之一
- 用一句话总结验证判断
- 如有证据,保留关键证据点
- 当信息不足以证明成功或失败时,优先判定为 skipped
- 不重写执行方案,不扩展无关建议
"""
JSON_ACTION_FALLBACK_PROMPT = """你当前运行在 JSON action fallback 模式。
你的输出必须满足以下规则:

View File

@@ -1,11 +1,19 @@
"""Registry manifest models and validation helpers."""
from functools import lru_cache
from app.agents.registry.indexes import RegistryIndexes, build_registry_indexes
from app.agents.registry.loader import RegistryBundle, load_builtin_registry_bundle
@lru_cache(maxsize=1)
def load_builtin_registry_indexes() -> RegistryIndexes:
return build_registry_indexes(load_builtin_registry_bundle())
__all__ = [
"RegistryBundle",
"RegistryIndexes",
"build_registry_indexes",
"load_builtin_registry_bundle",
"load_builtin_registry_indexes",
]

View File

@@ -2,6 +2,8 @@ from app.agents.prompts import SUB_COMMANDER_PROMPTS_BY_KEY
from app.agents.registry.models import (
AgentManifest,
CapabilityManifest,
PermissionClass,
SideEffectScope,
SpecialistTemplateManifest,
SubCommanderManifest,
)
@@ -89,10 +91,150 @@ _capability_tool_names = tuple(
)
)
_CAPABILITY_METADATA_BY_TOOL_NAME: dict[str, dict[str, object]] = {
"get_tasks": {
"permission_class": PermissionClass.READ,
"side_effect_scope": SideEffectScope.NONE,
"supports_retry": True,
"idempotent": True,
"safe_for_parallel_use": True,
"requires_confirmation": False,
},
"get_schedule_day": {
"permission_class": PermissionClass.READ,
"side_effect_scope": SideEffectScope.NONE,
"supports_retry": True,
"idempotent": True,
"safe_for_parallel_use": True,
"requires_confirmation": False,
},
"resolve_time_expression": {
"permission_class": PermissionClass.READ,
"side_effect_scope": SideEffectScope.NONE,
"supports_retry": True,
"idempotent": True,
"safe_for_parallel_use": True,
"requires_confirmation": False,
},
"search_knowledge": {
"permission_class": PermissionClass.READ,
"side_effect_scope": SideEffectScope.NONE,
"supports_retry": True,
"idempotent": True,
"safe_for_parallel_use": True,
"requires_confirmation": False,
},
"hybrid_search": {
"permission_class": PermissionClass.READ,
"side_effect_scope": SideEffectScope.NONE,
"supports_retry": True,
"idempotent": True,
"safe_for_parallel_use": True,
"requires_confirmation": False,
},
"get_knowledge_graph_context": {
"permission_class": PermissionClass.READ,
"side_effect_scope": SideEffectScope.NONE,
"supports_retry": True,
"idempotent": True,
"safe_for_parallel_use": True,
"requires_confirmation": False,
},
"get_forum_posts": {
"permission_class": PermissionClass.READ,
"side_effect_scope": SideEffectScope.NONE,
"supports_retry": True,
"idempotent": True,
"safe_for_parallel_use": True,
"requires_confirmation": False,
},
"scan_forum_for_instructions": {
"permission_class": PermissionClass.READ,
"side_effect_scope": SideEffectScope.NONE,
"supports_retry": True,
"idempotent": True,
"safe_for_parallel_use": True,
"requires_confirmation": False,
},
"web_search": {
"permission_class": PermissionClass.EXTERNAL,
"side_effect_scope": SideEffectScope.NETWORK,
"supports_retry": True,
"idempotent": True,
"safe_for_parallel_use": True,
"requires_confirmation": False,
},
"create_task": {
"permission_class": PermissionClass.WRITE,
"side_effect_scope": SideEffectScope.LOCAL_STATE,
"supports_retry": False,
"idempotent": False,
"safe_for_parallel_use": False,
"requires_confirmation": True,
},
"update_task_status": {
"permission_class": PermissionClass.WRITE,
"side_effect_scope": SideEffectScope.LOCAL_STATE,
"supports_retry": False,
"idempotent": False,
"safe_for_parallel_use": False,
"requires_confirmation": True,
},
"create_todo": {
"permission_class": PermissionClass.WRITE,
"side_effect_scope": SideEffectScope.LOCAL_STATE,
"supports_retry": False,
"idempotent": False,
"safe_for_parallel_use": False,
"requires_confirmation": True,
},
"create_schedule_task": {
"permission_class": PermissionClass.WRITE,
"side_effect_scope": SideEffectScope.LOCAL_STATE,
"supports_retry": False,
"idempotent": False,
"safe_for_parallel_use": False,
"requires_confirmation": True,
},
"create_reminder": {
"permission_class": PermissionClass.WRITE,
"side_effect_scope": SideEffectScope.LOCAL_STATE,
"supports_retry": False,
"idempotent": False,
"safe_for_parallel_use": False,
"requires_confirmation": True,
},
"create_goal": {
"permission_class": PermissionClass.WRITE,
"side_effect_scope": SideEffectScope.LOCAL_STATE,
"supports_retry": False,
"idempotent": False,
"safe_for_parallel_use": False,
"requires_confirmation": True,
},
"create_forum_post": {
"permission_class": PermissionClass.WRITE,
"side_effect_scope": SideEffectScope.LOCAL_STATE,
"supports_retry": False,
"idempotent": False,
"safe_for_parallel_use": False,
"requires_confirmation": True,
},
"build_knowledge_graph": {
"permission_class": PermissionClass.WRITE,
"side_effect_scope": SideEffectScope.LOCAL_STATE,
"supports_retry": False,
"idempotent": False,
"safe_for_parallel_use": False,
"requires_confirmation": True,
},
}
BUILTIN_CAPABILITY_MANIFESTS: tuple[CapabilityManifest, ...] = tuple(
CapabilityManifest(
capability_id=tool_name,
tool_name=tool_name,
**dict(_CAPABILITY_METADATA_BY_TOOL_NAME.get(tool_name, {})),
)
for tool_name in _capability_tool_names
)

View File

@@ -1,6 +1,21 @@
from enum import Enum
from pydantic import BaseModel
class PermissionClass(str, Enum):
READ = "read"
WRITE = "write"
EXTERNAL = "external"
class SideEffectScope(str, Enum):
NONE = "none"
LOCAL_STATE = "local_state"
DB_WRITE = "db_write"
NETWORK = "network"
class AgentManifest(BaseModel):
agent_id: str
display_name: str
@@ -23,6 +38,12 @@ class SubCommanderManifest(BaseModel):
class CapabilityManifest(BaseModel):
capability_id: str
tool_name: str
permission_class: PermissionClass = PermissionClass.READ
side_effect_scope: SideEffectScope = SideEffectScope.NONE
supports_retry: bool = False
idempotent: bool = False
safe_for_parallel_use: bool = False
requires_confirmation: bool = False
class SpecialistTemplateManifest(BaseModel):

View File

@@ -0,0 +1,10 @@
from app.agents.schemas.event import AgentEvent
from app.agents.schemas.task import AgentTask, TaskResult, TaskLifecycleStatus, VerificationStatus
__all__ = [
"AgentEvent",
"AgentTask",
"TaskLifecycleStatus",
"TaskResult",
"VerificationStatus",
]

View File

@@ -0,0 +1,28 @@
from __future__ import annotations
from datetime import datetime, timezone
from typing import Any, Literal
from pydantic import BaseModel, Field
AgentEventType = Literal[
"agent.tool.start",
"agent.tool.result",
"agent.verify.started",
"agent.verify.completed",
"agent.error",
]
AgentEventSeverity = Literal["info", "warning", "error"]
class AgentEvent(BaseModel):
event_id: str
event_type: AgentEventType
timestamp: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
conversation_id: str | None = None
agent_id: str | None = None
sub_commander_id: str | None = None
task_id: str | None = None
payload: dict[str, Any] = Field(default_factory=dict)
severity: AgentEventSeverity = "info"

View File

@@ -0,0 +1,32 @@
from __future__ import annotations
from datetime import datetime, timezone
from typing import Any, Literal
from pydantic import BaseModel, Field
TaskLifecycleStatus = Literal["pending", "in_progress", "completed", "failed", "blocked"]
VerificationStatus = Literal["passed", "failed", "skipped"]
class AgentTask(BaseModel):
task_id: str
title: str
status: TaskLifecycleStatus = "pending"
owner_agent_id: str | None = None
role: str | None = None
goal: str | None = None
expected_evidence: list[dict[str, Any]] = Field(default_factory=list)
evidence: list[dict[str, Any]] = Field(default_factory=list)
result_summary: str | None = None
created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
updated_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
class TaskResult(BaseModel):
task_id: str
status: VerificationStatus
summary: str | None = None
evidence: list[dict[str, Any]] = Field(default_factory=list)
output_data: dict[str, Any] | None = None

View File

@@ -1,7 +1,9 @@
from dataclasses import dataclass
from enum import Enum
from typing import Annotated, Any, TypedDict
from typing import Annotated, Any, Literal, TypedDict
from app.agents.schemas.event import AgentEvent
from app.agents.schemas.task import AgentTask, TaskResult, VerificationStatus
from langchain_core.messages import BaseMessage
from langgraph.graph.message import add_messages
@@ -27,6 +29,7 @@ class AgentState(TypedDict):
user_id: str
conversation_id: str
execution_mode: Literal["direct", "delegated", "verified"]
current_agent: str | None
next_step: str | None
active_agents: list[AgentRole]
@@ -34,14 +37,24 @@ class AgentState(TypedDict):
active_sub_commanders: list[str]
sub_commander_trace: list[dict[str, Any]]
agent_trace: list[str]
event_trace: list[AgentEvent | dict[str, Any]]
pending_tasks: list[dict[str, Any]]
completed_tasks: list[dict[str, Any]]
active_tasks: list[AgentTask | dict[str, Any]]
task_results: list[TaskResult | dict[str, Any]]
tool_calls: list[dict[str, Any]]
last_tool_result: str | None
action_results: list[dict[str, Any]]
created_entities: list[dict[str, Any]]
tool_outcomes: list[dict[str, Any]]
task_result_summary: dict[str, Any] | None
verifier_hints: dict[str, Any] | None
verification_status: VerificationStatus | None
verification_summary: str | None
verification_evidence: list[dict[str, Any]]
budget_state: dict[str, Any] | None
tool_strategy_used: str | None
tool_round_count: int
@@ -89,6 +102,7 @@ def initial_state(user_id: str, conversation_id: str) -> AgentState:
messages=[],
user_id=user_id,
conversation_id=conversation_id,
execution_mode="direct",
current_agent=AgentRole.MASTER.value,
next_step=None,
active_agents=[AgentRole.MASTER],
@@ -96,13 +110,22 @@ def initial_state(user_id: str, conversation_id: str) -> AgentState:
active_sub_commanders=[],
sub_commander_trace=[],
agent_trace=[AgentRole.MASTER.value],
event_trace=[],
pending_tasks=[],
completed_tasks=[],
active_tasks=[],
task_results=[],
tool_calls=[],
last_tool_result=None,
action_results=[],
created_entities=[],
tool_outcomes=[],
task_result_summary=None,
verifier_hints=None,
verification_status=None,
verification_summary=None,
verification_evidence=[],
budget_state=None,
tool_strategy_used=None,
tool_round_count=0,
max_tool_rounds=2,

View File

@@ -0,0 +1,60 @@
from __future__ import annotations
from typing import Any
from pydantic import BaseModel, Field
from app.agents.schemas.task import AgentTask, TaskResult, VerificationStatus
from app.agents.state import AgentState
class VerificationVerdict(BaseModel):
status: VerificationStatus
summary: str | None = None
evidence: list[dict[str, Any]] = Field(default_factory=list)
def verify_task_result(
*,
task: AgentTask | dict[str, Any] | None = None,
result: TaskResult | dict[str, Any] | None = None,
summary: str | None = None,
evidence: list[dict[str, Any]] | None = None,
status: VerificationStatus | None = None,
) -> VerificationVerdict:
normalized_result = result.model_dump() if isinstance(result, TaskResult) else dict(result or {})
normalized_task = task.model_dump() if isinstance(task, AgentTask) else dict(task or {})
normalized_summary = summary or normalized_result.get("summary") or normalized_task.get("result_summary")
normalized_evidence = list(evidence or normalized_result.get("evidence") or normalized_task.get("evidence") or [])
if status is not None:
return VerificationVerdict(status=status, summary=normalized_summary, evidence=normalized_evidence)
if normalized_result.get("status") in {"passed", "failed", "skipped"}:
inferred_status = normalized_result["status"]
elif normalized_result.get("success") is True:
inferred_status = "passed"
elif normalized_result.get("success") is False:
inferred_status = "failed"
elif normalized_summary or normalized_evidence:
inferred_status = "skipped"
else:
inferred_status = "failed"
normalized_summary = "No verification input available."
return VerificationVerdict(
status=inferred_status,
summary=normalized_summary,
evidence=normalized_evidence,
)
def apply_verification_verdict(state: AgentState, verdict: VerificationVerdict) -> AgentState:
next_state = dict(state)
next_state["verification_status"] = verdict.status
next_state["verification_summary"] = verdict.summary
next_state["verification_evidence"] = list(verdict.evidence)
return AgentState(**next_state)
__all__ = ["VerificationVerdict", "apply_verification_verdict", "verify_task_result"]

View File

@@ -8,6 +8,7 @@ import app.agents.graph as graph_module
from langchain_core.messages import AIMessage, HumanMessage
from app.agents.graph import (
_build_verifier_hints,
_choose_sub_commander,
_execute_tool_calls,
_parse_json_action,
@@ -29,29 +30,46 @@ def _base_state(message: str, user_llm_config: dict | None = None) -> dict:
'messages': [HumanMessage(content=message)],
'user_id': 'u1',
'conversation_id': 'c1',
'current_agent': AgentRole.MASTER,
'execution_mode': 'direct',
'current_agent': AgentRole.MASTER.value,
'next_step': None,
'active_agents': [AgentRole.MASTER],
'current_sub_commander': None,
'active_sub_commanders': [],
'sub_commander_trace': [],
'agent_trace': [AgentRole.MASTER.value],
'event_trace': [],
'pending_tasks': [],
'completed_tasks': [],
'active_tasks': [],
'task_results': [],
'tool_calls': [],
'last_tool_result': None,
'action_results': [],
'created_entities': [],
'tool_outcomes': [],
'task_result_summary': None,
'verifier_hints': None,
'verification_status': None,
'verification_summary': None,
'verification_evidence': [],
'budget_state': None,
'tool_strategy_used': None,
'tool_round_count': 0,
'max_tool_rounds': 2,
'retry_count': 0,
'max_retries': 1,
'iteration_count': 0,
'max_iterations': 3,
'routing_hops': 0,
'max_routing_hops': 2,
'terminated_due_to_loop_guard': False,
'retrieval_trace': [],
'stop_reason': None,
'clarification_needed': False,
'clarification_question': None,
'provider_capabilities': None,
'fallback_parse_error': None,
'should_respond': True,
'knowledge_context': None,
'graph_context': None,
'schedule_context_summary': None,
@@ -59,11 +77,17 @@ def _base_state(message: str, user_llm_config: dict | None = None) -> dict:
'plan_steps': [],
'analysis_report': None,
'final_response': None,
'should_respond': True,
'memory_context': None,
'current_datetime_context': 'CURRENT_TIME: 2026-03-28T12:00:00+08:00',
'current_datetime_reference': {'current_time_iso': '2026-03-28T12:00:00+08:00', 'current_date_iso': '2026-03-28', 'timezone': 'UTC'},
'turn_context': None,
'routing_decision': None,
'continuity_state': None,
'pending_action': None,
'last_completed_action': None,
'clarification_context': None,
'user_llm_config': user_llm_config,
'provider_capabilities': None,
}
@@ -258,6 +282,7 @@ def test_initial_state_sets_structured_continuity_defaults():
assert state['pending_action'] is None
assert state['last_completed_action'] is None
assert state['clarification_context'] is None
assert state['event_trace'] == []
assert state['tool_outcomes'] == []
@@ -322,6 +347,7 @@ async def test_planner_node_clears_next_step_after_consuming_routed_turn(monkeyp
assert result['final_response'] is not None
async def test_master_node_returns_stable_reply_for_simple_greeting(monkeypatch):
monkeypatch.setattr('app.agents.graph._get_llm_for_state', lambda state: FailIfCalledLLM())
state = {
@@ -1062,8 +1088,147 @@ async def test_master_node_returns_stable_reply_for_capability_question(monkeypa
assert getattr(result['messages'][-1], 'content', '') == result['final_response']
def test_choose_sub_commander_routes_schedule_requests_to_schedule_planning():
assert _choose_sub_commander(AgentRole.SCHEDULE_PLANNER, '帮我安排一下这周计划') == 'schedule_planning'
def test_build_verifier_hints_uses_capability_metadata():
state = _base_state('明天提醒我开会')
hints = _build_verifier_hints(state, 'create_reminder', '提醒创建成功')
assert hints['tool_name'] == 'create_reminder'
assert hints['permission_class'] == 'write'
assert hints['side_effect_scope'] == 'local_state'
assert hints['requires_confirmation'] is True
assert hints['supports_retry'] is False
assert hints['safe_for_parallel_use'] is False
assert '提醒创建成功' in hints['result_preview']
async def test_execute_tool_calls_records_schema_events_and_aggregate_summaries(monkeypatch):
tool = FakeTool('create_reminder', '提醒创建成功: 开会 @ 2026-03-29 09:00')
state = _base_state('test')
normalized_calls, tool_result, created_entities, tool_messages = await _execute_tool_calls(
[{'id': 'task-1', 'name': 'create_reminder', 'args': {'title': '开会', 'reminder_at': '2026-03-29T09:00:00'}}],
[tool],
state,
)
assert normalized_calls[0]['name'] == 'create_reminder'
assert tool_result.startswith('[create_reminder]')
assert created_entities == [{'type': 'reminder', 'tool': 'create_reminder'}]
assert len(tool_messages) == 1
assert state['verifier_hints'] == {
'tools': [
{
'tool_name': 'create_reminder',
'permission_class': 'write',
'side_effect_scope': 'local_state',
'requires_confirmation': True,
'supports_retry': False,
'safe_for_parallel_use': False,
'result_preview': '提醒创建成功: 开会 @ 2026-03-29 09:00',
}
]
}
assert state['task_result_summary']['tool_count'] == 1
assert state['task_result_summary']['created_entity_types'] == ['reminder']
assert state['tool_outcomes'][0]['tool_name'] == 'create_reminder'
assert state['event_trace'][0]['event_type'] == 'agent.tool.start'
assert state['event_trace'][-1]['event_type'] == 'agent.tool.result'
assert state['event_trace'][-1]['payload']['verification']['tool_name'] == 'create_reminder'
assert state['task_result_summary'] == {
'tool_count': 1,
'tools': [
{
'tool_name': 'create_reminder',
'result_preview': '提醒创建成功: 开会 @ 2026-03-29 09:00',
'created_entity_types': ['reminder'],
'created_count': 1,
}
],
'created_count': 1,
'created_entity_types': ['reminder'],
'stop_reason': None,
}
assert state['action_results'][-1] == state['task_result_summary']
assert state['tool_outcomes'][0]['tool_name'] == 'create_reminder'
assert [event['event_type'] for event in state['event_trace']] == [
'agent.tool.start',
'agent.tool.result',
]
assert all('event_id' in event for event in state['event_trace'])
assert all('timestamp' in event for event in state['event_trace'])
assert all(event['conversation_id'] == 'c1' for event in state['event_trace'])
assert all(event['agent_id'] == AgentRole.MASTER.value for event in state['event_trace'])
assert all(event['task_id'] == 'task-1' for event in state['event_trace'])
async def test_execute_tool_calls_aggregates_multiple_tool_turns_without_overwrite(monkeypatch):
reminder_tool = FakeTool('create_reminder', '提醒创建成功: 开会 @ 2026-03-29 09:00')
search_tool = FakeTool('web_search', '成功搜索到 2 条网页结果')
state = _base_state('test')
normalized_calls, tool_result, created_entities, tool_messages = await _execute_tool_calls(
[
{'id': 'task-1', 'name': 'create_reminder', 'args': {'title': '开会', 'reminder_at': '2026-03-29T09:00:00'}},
{'id': 'task-2', 'name': 'web_search', 'args': {'query': 'Jarvis 最新模型更新'}},
],
[reminder_tool, search_tool],
state,
)
assert [call['name'] for call in normalized_calls] == ['create_reminder', 'web_search']
assert tool_result == '[create_reminder] 提醒创建成功: 开会 @ 2026-03-29 09:00\n[web_search] 成功搜索到 2 条网页结果'
assert created_entities == [{'type': 'reminder', 'tool': 'create_reminder'}]
assert [message.name for message in tool_messages] == ['create_reminder', 'web_search']
assert state['verifier_hints'] == {
'tools': [
{
'tool_name': 'create_reminder',
'permission_class': 'write',
'side_effect_scope': 'local_state',
'requires_confirmation': True,
'supports_retry': False,
'safe_for_parallel_use': False,
'result_preview': '提醒创建成功: 开会 @ 2026-03-29 09:00',
},
{
'tool_name': 'web_search',
'permission_class': 'external',
'side_effect_scope': 'network',
'requires_confirmation': False,
'supports_retry': True,
'safe_for_parallel_use': True,
'result_preview': '成功搜索到 2 条网页结果',
},
]
}
assert state['task_result_summary'] == {
'tool_count': 2,
'tools': [
{
'tool_name': 'create_reminder',
'result_preview': '提醒创建成功: 开会 @ 2026-03-29 09:00',
'created_entity_types': ['reminder'],
'created_count': 1,
},
{
'tool_name': 'web_search',
'result_preview': '成功搜索到 2 条网页结果',
'created_entity_types': [],
'created_count': 0,
},
],
'created_count': 1,
'created_entity_types': ['reminder'],
'stop_reason': None,
}
assert len(state['tool_outcomes']) == 2
assert [event['event_type'] for event in state['event_trace']] == [
'agent.tool.start',
'agent.tool.result',
'agent.tool.start',
'agent.tool.result',
]
def test_choose_sub_commander_routes_focus_requests_to_schedule_analysis():

View File

@@ -5,11 +5,13 @@ from app.agents.prompts import (
SUB_COMMANDER_PROMPTS_BY_KEY,
TOP_LEVEL_SYSTEM_PROMPTS_BY_KEY,
)
from app.agents.registry import build_registry_indexes, load_builtin_registry_bundle
from app.agents.registry import build_registry_indexes, load_builtin_registry_bundle, load_builtin_registry_indexes
from app.agents.registry.indexes import summarize_registry_indexes
from app.agents.registry.models import (
AgentManifest,
CapabilityManifest,
PermissionClass,
SideEffectScope,
SpecialistTemplateManifest,
SubCommanderManifest,
)
@@ -251,17 +253,34 @@ def test_builtin_capabilities_reference_actual_runtime_tool_names() -> None:
assert manifest_tool_names == expected_tool_names
def test_builtin_sub_commander_capabilities_match_runtime_toolsets() -> None:
capabilities_by_tool_name = {
manifest.tool_name: manifest.capability_id for manifest in BUILTIN_CAPABILITY_MANIFESTS
}
def test_builtin_capability_metadata_distinguishes_read_and_write_surfaces() -> None:
capability_by_id = {manifest.capability_id: manifest for manifest in BUILTIN_CAPABILITY_MANIFESTS}
for sub_commander in BUILTIN_SUB_COMMANDER_MANIFESTS:
expected_capability_ids = {
capabilities_by_tool_name[tool.name]
for tool in SUB_COMMANDER_TOOLSETS[sub_commander.sub_commander_id]
}
assert set(sub_commander.capability_ids) == expected_capability_ids
assert capability_by_id["get_tasks"].permission_class == PermissionClass.READ
assert capability_by_id["get_tasks"].side_effect_scope == SideEffectScope.NONE
assert capability_by_id["get_tasks"].supports_retry is True
assert capability_by_id["get_tasks"].idempotent is True
assert capability_by_id["get_tasks"].safe_for_parallel_use is True
assert capability_by_id["get_tasks"].requires_confirmation is False
assert capability_by_id["create_reminder"].permission_class == PermissionClass.WRITE
assert capability_by_id["create_reminder"].side_effect_scope == SideEffectScope.LOCAL_STATE
assert capability_by_id["create_reminder"].supports_retry is False
assert capability_by_id["create_reminder"].idempotent is False
assert capability_by_id["create_reminder"].safe_for_parallel_use is False
assert capability_by_id["create_reminder"].requires_confirmation is True
assert capability_by_id["web_search"].permission_class == PermissionClass.EXTERNAL
assert capability_by_id["web_search"].side_effect_scope == SideEffectScope.NETWORK
def test_load_builtin_registry_indexes_is_cached_and_matches_bundle_indexes() -> None:
cached = load_builtin_registry_indexes()
rebuilt = build_registry_indexes(load_builtin_registry_bundle())
assert cached is load_builtin_registry_indexes()
assert cached.capability_id_by_tool_name == rebuilt.capability_id_by_tool_name
assert cached.capability_by_id["create_reminder"].requires_confirmation is True
def test_builtin_manifests_form_a_valid_registry_bundle() -> None:

View File

@@ -0,0 +1,66 @@
from app.agents.schemas.event import AgentEvent
from app.agents.schemas.task import AgentTask
from app.agents.verifier import verify_task_result
def test_agent_task_accepts_day1_fields():
task = AgentTask(
task_id="task-1",
title="Verify foundation",
status="in_progress",
owner_agent_id="executor",
role="verifier",
goal="check output",
expected_evidence=[{"type": "assertion"}],
evidence=[{"type": "log"}],
result_summary="running",
)
assert task.task_id == "task-1"
assert task.owner_agent_id == "executor"
assert task.status == "in_progress"
assert task.expected_evidence == [{"type": "assertion"}]
assert task.evidence == [{"type": "log"}]
assert task.result_summary == "running"
def test_agent_event_accepts_day1_fields():
event = AgentEvent(
event_id="evt-1",
event_type="agent.verify.completed",
conversation_id="conv-1",
agent_id="executor",
sub_commander_id="executor_tasks",
task_id="task-1",
payload={"status": "passed"},
severity="info",
)
assert event.event_id == "evt-1"
assert event.event_type == "agent.verify.completed"
assert event.conversation_id == "conv-1"
assert event.payload == {"status": "passed"}
assert event.severity == "info"
def test_verifier_verdict_is_separate_from_task_lifecycle_status():
task = AgentTask(task_id="task-1", title="Verify", status="blocked", result_summary="waiting")
verdict = verify_task_result(task=task)
assert verdict.status == "skipped"
assert verdict.summary == "waiting"
def test_verifier_prefers_explicit_result_success_signal():
verdict = verify_task_result(result={"success": True, "summary": "all checks passed"})
assert verdict.status == "passed"
assert verdict.summary == "all checks passed"
def test_verifier_fails_when_no_verification_input_exists():
verdict = verify_task_result()
assert verdict.status == "failed"
assert verdict.summary == "No verification input available."