feat: add Jarvis agent verification foundation

Add Day 1 agent runtime foundations with task and event schemas, verifier support, capability metadata, graph event tracing, and regression coverage while preserving the direct execution path.
This commit is contained in:
2026-04-03 15:18:08 +08:00
parent 4972b4e6b1
commit aa0ef0fbea
14 changed files with 867 additions and 17 deletions

View File

@@ -8,6 +8,7 @@ import app.agents.graph as graph_module
from langchain_core.messages import AIMessage, HumanMessage
from app.agents.graph import (
_build_verifier_hints,
_choose_sub_commander,
_execute_tool_calls,
_parse_json_action,
@@ -29,29 +30,46 @@ def _base_state(message: str, user_llm_config: dict | None = None) -> dict:
'messages': [HumanMessage(content=message)],
'user_id': 'u1',
'conversation_id': 'c1',
'current_agent': AgentRole.MASTER,
'execution_mode': 'direct',
'current_agent': AgentRole.MASTER.value,
'next_step': None,
'active_agents': [AgentRole.MASTER],
'current_sub_commander': None,
'active_sub_commanders': [],
'sub_commander_trace': [],
'agent_trace': [AgentRole.MASTER.value],
'event_trace': [],
'pending_tasks': [],
'completed_tasks': [],
'active_tasks': [],
'task_results': [],
'tool_calls': [],
'last_tool_result': None,
'action_results': [],
'created_entities': [],
'tool_outcomes': [],
'task_result_summary': None,
'verifier_hints': None,
'verification_status': None,
'verification_summary': None,
'verification_evidence': [],
'budget_state': None,
'tool_strategy_used': None,
'tool_round_count': 0,
'max_tool_rounds': 2,
'retry_count': 0,
'max_retries': 1,
'iteration_count': 0,
'max_iterations': 3,
'routing_hops': 0,
'max_routing_hops': 2,
'terminated_due_to_loop_guard': False,
'retrieval_trace': [],
'stop_reason': None,
'clarification_needed': False,
'clarification_question': None,
'provider_capabilities': None,
'fallback_parse_error': None,
'should_respond': True,
'knowledge_context': None,
'graph_context': None,
'schedule_context_summary': None,
@@ -59,11 +77,17 @@ def _base_state(message: str, user_llm_config: dict | None = None) -> dict:
'plan_steps': [],
'analysis_report': None,
'final_response': None,
'should_respond': True,
'memory_context': None,
'current_datetime_context': 'CURRENT_TIME: 2026-03-28T12:00:00+08:00',
'current_datetime_reference': {'current_time_iso': '2026-03-28T12:00:00+08:00', 'current_date_iso': '2026-03-28', 'timezone': 'UTC'},
'turn_context': None,
'routing_decision': None,
'continuity_state': None,
'pending_action': None,
'last_completed_action': None,
'clarification_context': None,
'user_llm_config': user_llm_config,
'provider_capabilities': None,
}
@@ -258,6 +282,7 @@ def test_initial_state_sets_structured_continuity_defaults():
assert state['pending_action'] is None
assert state['last_completed_action'] is None
assert state['clarification_context'] is None
assert state['event_trace'] == []
assert state['tool_outcomes'] == []
@@ -322,6 +347,7 @@ async def test_planner_node_clears_next_step_after_consuming_routed_turn(monkeyp
assert result['final_response'] is not None
async def test_master_node_returns_stable_reply_for_simple_greeting(monkeypatch):
monkeypatch.setattr('app.agents.graph._get_llm_for_state', lambda state: FailIfCalledLLM())
state = {
@@ -1062,8 +1088,147 @@ async def test_master_node_returns_stable_reply_for_capability_question(monkeypa
assert getattr(result['messages'][-1], 'content', '') == result['final_response']
def test_choose_sub_commander_routes_schedule_requests_to_schedule_planning():
assert _choose_sub_commander(AgentRole.SCHEDULE_PLANNER, '帮我安排一下这周计划') == 'schedule_planning'
def test_build_verifier_hints_uses_capability_metadata():
state = _base_state('明天提醒我开会')
hints = _build_verifier_hints(state, 'create_reminder', '提醒创建成功')
assert hints['tool_name'] == 'create_reminder'
assert hints['permission_class'] == 'write'
assert hints['side_effect_scope'] == 'local_state'
assert hints['requires_confirmation'] is True
assert hints['supports_retry'] is False
assert hints['safe_for_parallel_use'] is False
assert '提醒创建成功' in hints['result_preview']
async def test_execute_tool_calls_records_schema_events_and_aggregate_summaries(monkeypatch):
tool = FakeTool('create_reminder', '提醒创建成功: 开会 @ 2026-03-29 09:00')
state = _base_state('test')
normalized_calls, tool_result, created_entities, tool_messages = await _execute_tool_calls(
[{'id': 'task-1', 'name': 'create_reminder', 'args': {'title': '开会', 'reminder_at': '2026-03-29T09:00:00'}}],
[tool],
state,
)
assert normalized_calls[0]['name'] == 'create_reminder'
assert tool_result.startswith('[create_reminder]')
assert created_entities == [{'type': 'reminder', 'tool': 'create_reminder'}]
assert len(tool_messages) == 1
assert state['verifier_hints'] == {
'tools': [
{
'tool_name': 'create_reminder',
'permission_class': 'write',
'side_effect_scope': 'local_state',
'requires_confirmation': True,
'supports_retry': False,
'safe_for_parallel_use': False,
'result_preview': '提醒创建成功: 开会 @ 2026-03-29 09:00',
}
]
}
assert state['task_result_summary']['tool_count'] == 1
assert state['task_result_summary']['created_entity_types'] == ['reminder']
assert state['tool_outcomes'][0]['tool_name'] == 'create_reminder'
assert state['event_trace'][0]['event_type'] == 'agent.tool.start'
assert state['event_trace'][-1]['event_type'] == 'agent.tool.result'
assert state['event_trace'][-1]['payload']['verification']['tool_name'] == 'create_reminder'
assert state['task_result_summary'] == {
'tool_count': 1,
'tools': [
{
'tool_name': 'create_reminder',
'result_preview': '提醒创建成功: 开会 @ 2026-03-29 09:00',
'created_entity_types': ['reminder'],
'created_count': 1,
}
],
'created_count': 1,
'created_entity_types': ['reminder'],
'stop_reason': None,
}
assert state['action_results'][-1] == state['task_result_summary']
assert state['tool_outcomes'][0]['tool_name'] == 'create_reminder'
assert [event['event_type'] for event in state['event_trace']] == [
'agent.tool.start',
'agent.tool.result',
]
assert all('event_id' in event for event in state['event_trace'])
assert all('timestamp' in event for event in state['event_trace'])
assert all(event['conversation_id'] == 'c1' for event in state['event_trace'])
assert all(event['agent_id'] == AgentRole.MASTER.value for event in state['event_trace'])
assert all(event['task_id'] == 'task-1' for event in state['event_trace'])
async def test_execute_tool_calls_aggregates_multiple_tool_turns_without_overwrite(monkeypatch):
reminder_tool = FakeTool('create_reminder', '提醒创建成功: 开会 @ 2026-03-29 09:00')
search_tool = FakeTool('web_search', '成功搜索到 2 条网页结果')
state = _base_state('test')
normalized_calls, tool_result, created_entities, tool_messages = await _execute_tool_calls(
[
{'id': 'task-1', 'name': 'create_reminder', 'args': {'title': '开会', 'reminder_at': '2026-03-29T09:00:00'}},
{'id': 'task-2', 'name': 'web_search', 'args': {'query': 'Jarvis 最新模型更新'}},
],
[reminder_tool, search_tool],
state,
)
assert [call['name'] for call in normalized_calls] == ['create_reminder', 'web_search']
assert tool_result == '[create_reminder] 提醒创建成功: 开会 @ 2026-03-29 09:00\n[web_search] 成功搜索到 2 条网页结果'
assert created_entities == [{'type': 'reminder', 'tool': 'create_reminder'}]
assert [message.name for message in tool_messages] == ['create_reminder', 'web_search']
assert state['verifier_hints'] == {
'tools': [
{
'tool_name': 'create_reminder',
'permission_class': 'write',
'side_effect_scope': 'local_state',
'requires_confirmation': True,
'supports_retry': False,
'safe_for_parallel_use': False,
'result_preview': '提醒创建成功: 开会 @ 2026-03-29 09:00',
},
{
'tool_name': 'web_search',
'permission_class': 'external',
'side_effect_scope': 'network',
'requires_confirmation': False,
'supports_retry': True,
'safe_for_parallel_use': True,
'result_preview': '成功搜索到 2 条网页结果',
},
]
}
assert state['task_result_summary'] == {
'tool_count': 2,
'tools': [
{
'tool_name': 'create_reminder',
'result_preview': '提醒创建成功: 开会 @ 2026-03-29 09:00',
'created_entity_types': ['reminder'],
'created_count': 1,
},
{
'tool_name': 'web_search',
'result_preview': '成功搜索到 2 条网页结果',
'created_entity_types': [],
'created_count': 0,
},
],
'created_count': 1,
'created_entity_types': ['reminder'],
'stop_reason': None,
}
assert len(state['tool_outcomes']) == 2
assert [event['event_type'] for event in state['event_trace']] == [
'agent.tool.start',
'agent.tool.result',
'agent.tool.start',
'agent.tool.result',
]
def test_choose_sub_commander_routes_focus_requests_to_schedule_analysis():

View File

@@ -5,11 +5,13 @@ from app.agents.prompts import (
SUB_COMMANDER_PROMPTS_BY_KEY,
TOP_LEVEL_SYSTEM_PROMPTS_BY_KEY,
)
from app.agents.registry import build_registry_indexes, load_builtin_registry_bundle
from app.agents.registry import build_registry_indexes, load_builtin_registry_bundle, load_builtin_registry_indexes
from app.agents.registry.indexes import summarize_registry_indexes
from app.agents.registry.models import (
AgentManifest,
CapabilityManifest,
PermissionClass,
SideEffectScope,
SpecialistTemplateManifest,
SubCommanderManifest,
)
@@ -251,17 +253,34 @@ def test_builtin_capabilities_reference_actual_runtime_tool_names() -> None:
assert manifest_tool_names == expected_tool_names
def test_builtin_sub_commander_capabilities_match_runtime_toolsets() -> None:
capabilities_by_tool_name = {
manifest.tool_name: manifest.capability_id for manifest in BUILTIN_CAPABILITY_MANIFESTS
}
def test_builtin_capability_metadata_distinguishes_read_and_write_surfaces() -> None:
capability_by_id = {manifest.capability_id: manifest for manifest in BUILTIN_CAPABILITY_MANIFESTS}
for sub_commander in BUILTIN_SUB_COMMANDER_MANIFESTS:
expected_capability_ids = {
capabilities_by_tool_name[tool.name]
for tool in SUB_COMMANDER_TOOLSETS[sub_commander.sub_commander_id]
}
assert set(sub_commander.capability_ids) == expected_capability_ids
assert capability_by_id["get_tasks"].permission_class == PermissionClass.READ
assert capability_by_id["get_tasks"].side_effect_scope == SideEffectScope.NONE
assert capability_by_id["get_tasks"].supports_retry is True
assert capability_by_id["get_tasks"].idempotent is True
assert capability_by_id["get_tasks"].safe_for_parallel_use is True
assert capability_by_id["get_tasks"].requires_confirmation is False
assert capability_by_id["create_reminder"].permission_class == PermissionClass.WRITE
assert capability_by_id["create_reminder"].side_effect_scope == SideEffectScope.LOCAL_STATE
assert capability_by_id["create_reminder"].supports_retry is False
assert capability_by_id["create_reminder"].idempotent is False
assert capability_by_id["create_reminder"].safe_for_parallel_use is False
assert capability_by_id["create_reminder"].requires_confirmation is True
assert capability_by_id["web_search"].permission_class == PermissionClass.EXTERNAL
assert capability_by_id["web_search"].side_effect_scope == SideEffectScope.NETWORK
def test_load_builtin_registry_indexes_is_cached_and_matches_bundle_indexes() -> None:
cached = load_builtin_registry_indexes()
rebuilt = build_registry_indexes(load_builtin_registry_bundle())
assert cached is load_builtin_registry_indexes()
assert cached.capability_id_by_tool_name == rebuilt.capability_id_by_tool_name
assert cached.capability_by_id["create_reminder"].requires_confirmation is True
def test_builtin_manifests_form_a_valid_registry_bundle() -> None:

View File

@@ -0,0 +1,66 @@
from app.agents.schemas.event import AgentEvent
from app.agents.schemas.task import AgentTask
from app.agents.verifier import verify_task_result
def test_agent_task_accepts_day1_fields():
task = AgentTask(
task_id="task-1",
title="Verify foundation",
status="in_progress",
owner_agent_id="executor",
role="verifier",
goal="check output",
expected_evidence=[{"type": "assertion"}],
evidence=[{"type": "log"}],
result_summary="running",
)
assert task.task_id == "task-1"
assert task.owner_agent_id == "executor"
assert task.status == "in_progress"
assert task.expected_evidence == [{"type": "assertion"}]
assert task.evidence == [{"type": "log"}]
assert task.result_summary == "running"
def test_agent_event_accepts_day1_fields():
event = AgentEvent(
event_id="evt-1",
event_type="agent.verify.completed",
conversation_id="conv-1",
agent_id="executor",
sub_commander_id="executor_tasks",
task_id="task-1",
payload={"status": "passed"},
severity="info",
)
assert event.event_id == "evt-1"
assert event.event_type == "agent.verify.completed"
assert event.conversation_id == "conv-1"
assert event.payload == {"status": "passed"}
assert event.severity == "info"
def test_verifier_verdict_is_separate_from_task_lifecycle_status():
task = AgentTask(task_id="task-1", title="Verify", status="blocked", result_summary="waiting")
verdict = verify_task_result(task=task)
assert verdict.status == "skipped"
assert verdict.summary == "waiting"
def test_verifier_prefers_explicit_result_success_signal():
verdict = verify_task_result(result={"success": True, "summary": "all checks passed"})
assert verdict.status == "passed"
assert verdict.summary == "all checks passed"
def test_verifier_fails_when_no_verification_input_exists():
verdict = verify_task_result()
assert verdict.status == "failed"
assert verdict.summary == "No verification input available."