feat: add agent registry manifests and coverage

Introduce a manifest-backed agent registry surface and align graph tests with the new runtime prompt and tool indexing behavior.
2026-04-02 14:34:26 +08:00
parent e9ba8597e9
commit 4251a79062
12 changed files with 1111 additions and 423 deletions
--- a/backend/tests/backend/app/agents/test_graph.py
+++ b/backend/tests/backend/app/agents/test_graph.py
@@ -1,470 +1,291 @@
+from pathlib import Path
 from types import SimpleNamespace
+import sys

-from langchain_core.messages import AIMessage, HumanMessage
+WORKTREE_ROOT = Path(__file__).resolve().parents[4]
+if str(WORKTREE_ROOT) not in sys.path:
+    sys.path.insert(0, str(WORKTREE_ROOT))
+for module_name in list(sys.modules):
+    if module_name == "app" or module_name.startswith("app."):
+        del sys.modules[module_name]
+
+from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage
+from langgraph.graph import END

 from app.agents.graph import (
-    _choose_sub_commander,
-    _parse_json_action,
-    _route_agent_from_user_query,
-    _run_sub_commander,
+    JSON_ACTION_FALLBACK_PROMPT,
+    _get_role_tools,
+    call_agent_llm,
+    execute_tools_node,
    master_node,
+    route_after_agent,
+    route_master,
 )
-from app.agents.tools.time_reasoning import resolve_time_expression
 from app.agents.state import AgentRole
+from app.agents.tools import SUB_COMMANDER_TOOLSETS
+from app.agents.prompts import MASTER_SYSTEM_PROMPT


-
-
-def _base_state(message: str, user_llm_config: dict | None = None) -> dict:
+def _base_state(message: str = "帮我安排今天的重点") -> dict:
    return {
-        'messages': [HumanMessage(content=message)],
-        'user_id': 'u1',
-        'conversation_id': 'c1',
-        'current_agent': AgentRole.MASTER,
-        'active_agents': [AgentRole.MASTER],
-        'current_sub_commander': None,
-        'active_sub_commanders': [],
-        'sub_commander_trace': [],
-        'pending_tasks': [],
-        'completed_tasks': [],
-        'tool_calls': [],
-        'last_tool_result': None,
-        'action_results': [],
-        'created_entities': [],
-        'tool_strategy_used': None,
-        'provider_capabilities': None,
-        'fallback_parse_error': None,
-        'knowledge_context': None,
-        'graph_context': None,
-        'schedule_context_summary': None,
-        'plan': None,
-        'plan_steps': [],
-        'analysis_report': None,
-        'final_response': None,
-        'should_respond': True,
-        'memory_context': None,
-        'current_datetime_context': 'CURRENT_TIME: 2026-03-28T12:00:00+08:00',
-        'current_datetime_reference': {'current_time_iso': '2026-03-28T12:00:00+08:00', 'current_date_iso': '2026-03-28', 'timezone': 'UTC'},
-        'user_llm_config': user_llm_config,
+        "messages": [HumanMessage(content=message)],
+        "user_id": "u1",
+        "conversation_id": "c1",
+        "current_agent": AgentRole.MASTER.value,
+        "next_step": None,
+        "agent_trace": [AgentRole.MASTER.value],
+        "pending_tasks": [],
+        "completed_tasks": [],
+        "created_entities": [],
+        "knowledge_context": None,
+        "schedule_context_summary": None,
+        "analysis_report": None,
+        "final_response": None,
+        "memory_context": None,
+        "current_datetime_context": None,
+        "user_llm_config": None,
+        "provider_capabilities": None,
    }


-class FakeFallbackLLM:
-    def __init__(self, first_content: str, followup_content: str = '已创建提醒：开会，时间为 2026-03-29 09:00（按当前时间理解为“明天早上9点”）。'):
-        self.first_content = first_content
-        self.followup_content = followup_content
-        self.calls = 0
-
-    async def ainvoke(self, messages):
-        self.calls += 1
-        if self.calls == 1:
-            return AIMessage(content=self.first_content)
-        return AIMessage(content=self.followup_content)
-
-    def bind_tools(self, tools):
-        raise AssertionError('bind_tools should not be called in JSON fallback mode')
-
-
-class FakeNativeBoundLLM:
-    async def ainvoke(self, messages):
-        return AIMessage(
-            content='',
-            tool_calls=[
-                {
-                    'id': 'call_1',
-                    'name': 'create_reminder',
-                    'args': {'title': '开会', 'reminder_at': '明天 09:00'},
-                }
-            ],
-        )
-
-
-class FakeNativeLLM:
-    def __init__(self):
-        self.bound = FakeNativeBoundLLM()
-        self.tool_binding_count = 0
-        self.calls = 0
-        self._jarvis_provider_capabilities = SimpleNamespace(provider='openai', supports_native_tools=True, preferred_tool_strategy='native')
-
-    def bind_tools(self, tools):
-        self.tool_binding_count += 1
-        return self.bound
-
-    async def ainvoke(self, messages):
-        self.calls += 1
-        return AIMessage(content='已创建提醒：开会，时间为 2026-03-29 09:00（按当前时间理解为“明天早上9点”）。')
-
-
-class FakeTool:
-    def __init__(self, name: str, result: str):
-        self.name = name
-        self.result = result
-        self.invocations: list[dict] = []
-
-    def invoke(self, args: dict):
-        self.invocations.append(args)
-        return self.result
-
-
-class CapturingLLM:
-    def __init__(self, content: str = '{"mode":"final","final_response":"好的。"}'):
-        self.content = content
-        self.messages = None
-        self._jarvis_provider_capabilities = SimpleNamespace(provider='ollama', supports_native_tools=False, preferred_tool_strategy='json_fallback')
-
-    async def ainvoke(self, messages):
-        self.messages = messages
-        return AIMessage(content=self.content)
-
-
 class FailIfCalledLLM:
    async def ainvoke(self, messages):
-        raise AssertionError('LLM should not be called for simple greetings')
+        raise AssertionError("LLM should not be called for greeting fast-path")


-async def test_master_node_returns_stable_reply_for_simple_greeting(monkeypatch):
-    monkeypatch.setattr('app.agents.graph._get_llm_for_state', lambda state: FailIfCalledLLM())
+class StaticResponseLLM:
+    def __init__(self, response: AIMessage):
+        self.response = response
+        self.messages = None

-    state = {
-        'messages': [HumanMessage(content='你好')],
-        'user_id': 'u1',
-        'conversation_id': 'c1',
-        'current_agent': AgentRole.MASTER,
-        'active_agents': [AgentRole.MASTER],
-        'pending_tasks': [],
-        'completed_tasks': [],
-        'tool_calls': [],
-        'last_tool_result': None,
-        'knowledge_context': None,
-        'graph_context': None,
-        'plan': None,
-        'plan_steps': [],
-        'analysis_report': None,
-        'final_response': None,
-        'should_respond': True,
-        'memory_context': None,
-        'user_llm_config': None,
-    }
-
-    result = await master_node(state)
-
-    assert result['final_response'] == '您好。我在。\n\n您把问题给我，我先帮您收束重点，再往下推。'
-    assert result['current_agent'] == AgentRole.MASTER
-    assert result['active_agents'] == [AgentRole.MASTER]
+    async def ainvoke(self, messages):
+        self.messages = messages
+        return self.response


-async def test_master_node_returns_stable_reply_for_identity_question(monkeypatch):
-    monkeypatch.setattr('app.agents.graph._get_llm_for_state', lambda state: FailIfCalledLLM())
+class CaptureFallbackLLM:
+    def __init__(self, response: AIMessage):
+        self.response = response
+        self.messages = None
+        self.bind_tools_called = False

-    state = {
-        'messages': [HumanMessage(content='你是谁')],
-        'user_id': 'u1',
-        'conversation_id': 'c1',
-        'current_agent': AgentRole.MASTER,
-        'active_agents': [AgentRole.MASTER],
-        'pending_tasks': [],
-        'completed_tasks': [],
-        'tool_calls': [],
-        'last_tool_result': None,
-        'knowledge_context': None,
-        'graph_context': None,
-        'plan': None,
-        'plan_steps': [],
-        'analysis_report': None,
-        'final_response': None,
-        'should_respond': True,
-        'memory_context': None,
-        'user_llm_config': None,
-    }
+    async def ainvoke(self, messages):
+        self.messages = messages
+        return self.response

-    result = await master_node(state)
-
-    assert result['final_response'] == '我是 Jarvis。\n\n比起做一个泛泛的助手，我更像您的判断型协作伙伴：帮您看清问题、压缩路径、把事情往前推进。'
-    assert result['current_agent'] == AgentRole.MASTER
-    assert result['active_agents'] == [AgentRole.MASTER]
+    def bind_tools(self, tools):
+        self.bind_tools_called = True
+        raise AssertionError("bind_tools should not be used when native tools are unsupported")


-async def test_master_node_returns_stable_reply_for_identity_question_with_punctuation(monkeypatch):
-    monkeypatch.setattr('app.agents.graph._get_llm_for_state', lambda state: FailIfCalledLLM())
+class AsyncFakeTool:
+    def __init__(self, name: str, result: str):
+        self.name = name
+        self.result = result
+        self.calls: list[dict] = []

-    state = {
-        'messages': [HumanMessage(content='你是谁？')],
-        'user_id': 'u1',
-        'conversation_id': 'c1',
-        'current_agent': AgentRole.MASTER,
-        'active_agents': [AgentRole.MASTER],
-        'pending_tasks': [],
-        'completed_tasks': [],
-        'tool_calls': [],
-        'last_tool_result': None,
-        'knowledge_context': None,
-        'graph_context': None,
-        'plan': None,
-        'plan_steps': [],
-        'analysis_report': None,
-        'final_response': None,
-        'should_respond': True,
-        'memory_context': None,
-        'user_llm_config': None,
-    }
-
-    result = await master_node(state)
-
-    assert result['final_response'] == '我是 Jarvis。\n\n比起做一个泛泛的助手，我更像您的判断型协作伙伴：帮您看清问题、压缩路径、把事情往前推进。'
-    assert result['current_agent'] == AgentRole.MASTER
-    assert result['active_agents'] == [AgentRole.MASTER]
+    async def ainvoke(self, args: dict):
+        self.calls.append(args)
+        return self.result


-async def test_master_node_returns_stable_reply_for_identity_question_with_particle(monkeypatch):
-    monkeypatch.setattr('app.agents.graph._get_llm_for_state', lambda state: FailIfCalledLLM())
+class SyncFakeTool:
+    def __init__(self, name: str, result: str):
+        self.name = name
+        self.result = result
+        self.calls: list[dict] = []

-    state = {
-        'messages': [HumanMessage(content='你是谁啊')],
-        'user_id': 'u1',
-        'conversation_id': 'c1',
-        'current_agent': AgentRole.MASTER,
-        'active_agents': [AgentRole.MASTER],
-        'pending_tasks': [],
-        'completed_tasks': [],
-        'tool_calls': [],
-        'last_tool_result': None,
-        'knowledge_context': None,
-        'graph_context': None,
-        'plan': None,
-        'plan_steps': [],
-        'analysis_report': None,
-        'final_response': None,
-        'should_respond': True,
-        'memory_context': None,
-        'user_llm_config': None,
-    }
-
-    result = await master_node(state)
-
-    assert result['final_response'] == '我是 Jarvis。\n\n比起做一个泛泛的助手，我更像您的判断型协作伙伴：帮您看清问题、压缩路径、把事情往前推进。'
-    assert result['current_agent'] == AgentRole.MASTER
-    assert result['active_agents'] == [AgentRole.MASTER]
+    def invoke(self, args: dict):
+        self.calls.append(args)
+        return self.result


-async def test_master_node_returns_stable_reply_for_capability_question(monkeypatch):
-    monkeypatch.setattr('app.agents.graph._get_llm_for_state', lambda state: FailIfCalledLLM())
+async def test_master_node_greeting_fast_path_returns_stable_reply_without_llm(monkeypatch):
+    monkeypatch.setattr("app.agents.graph._get_llm_for_state", lambda state: (FailIfCalledLLM(), SimpleNamespace()))

-    state = {
-        'messages': [HumanMessage(content='你能做什么')],
-        'user_id': 'u1',
-        'conversation_id': 'c1',
-        'current_agent': AgentRole.MASTER,
-        'active_agents': [AgentRole.MASTER],
-        'pending_tasks': [],
-        'completed_tasks': [],
-        'tool_calls': [],
-        'last_tool_result': None,
-        'knowledge_context': None,
-        'graph_context': None,
-        'plan': None,
-        'plan_steps': [],
-        'analysis_report': None,
-        'final_response': None,
-        'should_respond': True,
-        'memory_context': None,
-        'user_llm_config': None,
-    }
+    result = await master_node(_base_state("你好"))

-    result = await master_node(state)
-
-    assert result['final_response'] == '主要做三件事。\n- 帮您判断：看问题本质、梳理取舍、给出方向\n- 帮您收束：把复杂内容理顺，把重点拎出来\n- 帮您推进：拆任务、定步骤、把下一步变清楚\n\n如果您现在有具体目标，我可以直接进入处理。'
-    assert result['current_agent'] == AgentRole.MASTER
-    assert result['active_agents'] == [AgentRole.MASTER]
+    assert result["final_response"] == "您好。我在。\n\n您把问题给我，我先帮您收束重点，再往下推。"
+    assert result["messages"][0].content == "您好。我在。"


-def test_choose_sub_commander_routes_schedule_requests_to_schedule_planning():
-    assert _choose_sub_commander(AgentRole.SCHEDULE_PLANNER, '帮我安排一下这周计划') == 'schedule_planning'
-
-
-def test_choose_sub_commander_routes_focus_requests_to_schedule_analysis():
-    assert _choose_sub_commander(AgentRole.SCHEDULE_PLANNER, '基于最近对话帮我判断该聚焦什么') == 'schedule_analysis'
-
-
-def test_route_agent_from_user_query_routes_knowledge_requests_to_librarian():
-    assert _route_agent_from_user_query('帮我搜索知识库里的项目资料') == AgentRole.LIBRARIAN
-
-
-def test_route_agent_from_user_query_routes_schedule_requests_to_schedule_planner():
-    assert _route_agent_from_user_query('明天提醒我开会') == AgentRole.SCHEDULE_PLANNER
-
-
-def test_route_agent_from_user_query_routes_explicit_month_day_milestone_to_schedule_planner():
-    assert _route_agent_from_user_query('3月29日，对话系统交付节点') == AgentRole.SCHEDULE_PLANNER
-
-
-def test_choose_sub_commander_routes_explicit_month_day_milestone_to_schedule_planning():
-    assert _choose_sub_commander(AgentRole.SCHEDULE_PLANNER, '3月29日，对话系统交付节点') == 'schedule_planning'
-
-
-
-
-def test_parse_json_action_extracts_tool_calls_from_fenced_json():
-    parsed = _parse_json_action(
-        '```json\n{"mode":"tool_call","tool_calls":[{"name":"create_reminder","arguments":{"title":"开会","reminder_at":"明天 09:00"}}]}\n```',
-        ['create_reminder'],
+async def test_master_node_routes_to_agent_when_llm_returns_role_name(monkeypatch):
+    llm = StaticResponseLLM(AIMessage(content="schedule_planner"))
+    monkeypatch.setattr(
+        "app.agents.graph._get_llm_for_state",
+        lambda state: (llm, SimpleNamespace(provider="test", supports_native_tools=True)),
    )

-    assert parsed == {
-        'mode': 'tool_call',
-        'tool_calls': [
-            {
-                'name': 'create_reminder',
-                'args': {'title': '开会', 'reminder_at': '明天 09:00'},
-                'reason': None,
-            }
+    state = _base_state("帮我安排这周重点")
+    result = await master_node(state)
+
+    assert result["current_agent"] == AgentRole.SCHEDULE_PLANNER.value
+    assert result["agent_trace"] == [AgentRole.MASTER.value, AgentRole.SCHEDULE_PLANNER.value]
+    assert result["messages"][0].content == f"已分发至 {AgentRole.SCHEDULE_PLANNER.value} 处理。"
+    assert isinstance(llm.messages[0], SystemMessage)
+    assert MASTER_SYSTEM_PROMPT in llm.messages[0].content
+
+
+async def test_master_node_returns_final_response_when_llm_answers_directly(monkeypatch):
+    response = AIMessage(content="我建议先收束需求，再拆执行步骤。")
+    llm = StaticResponseLLM(response)
+    monkeypatch.setattr(
+        "app.agents.graph._get_llm_for_state",
+        lambda state: (llm, SimpleNamespace(provider="test", supports_native_tools=True)),
+    )
+
+    result = await master_node(_base_state("现在应该怎么推进这个项目？"))
+
+    assert result["final_response"] == response.content
+    assert result["messages"] == [response]
+
+
+def test_route_after_agent_sends_tool_calls_to_tools_node():
+    state = _base_state()
+    state["messages"] = [AIMessage(content="", tool_calls=[{"id": "1", "name": "create_task", "args": {}}])]
+
+    assert route_after_agent(state) == "tools"
+
+
+def test_route_after_agent_ends_when_no_tool_calls_exist():
+    state = _base_state()
+    state["messages"] = [AIMessage(content="done")]
+
+    assert route_after_agent(state) == END
+
+
+def test_route_master_ends_when_final_response_exists():
+    state = _base_state()
+    state["final_response"] = "done"
+    state["current_agent"] = AgentRole.EXECUTOR.value
+
+    assert route_master(state) == END
+
+
+def test_route_master_returns_current_agent_when_more_work_remains():
+    state = _base_state()
+    state["current_agent"] = AgentRole.LIBRARIAN.value
+
+    assert route_master(state) == AgentRole.LIBRARIAN.value
+
+
+def test_get_role_tools_returns_expected_semantic_tool_sets():
+    expected_by_role = {
+        AgentRole.SCHEDULE_PLANNER: [
+            "get_schedule_day",
+            "get_tasks",
+            "resolve_time_expression",
+            "create_todo",
+            "create_schedule_task",
+            "create_reminder",
+            "create_goal",
+        ],
+        AgentRole.EXECUTOR: [
+            "get_tasks",
+            "create_task",
+            "update_task_status",
+            "resolve_time_expression",
+            "create_todo",
+            "create_schedule_task",
+            "create_reminder",
+            "create_goal",
+            "get_forum_posts",
+            "create_forum_post",
+            "scan_forum_for_instructions",
+        ],
+        AgentRole.LIBRARIAN: [
+            "search_knowledge",
+            "hybrid_search",
+            "web_search",
+            "get_knowledge_graph_context",
+            "build_knowledge_graph",
+        ],
+        AgentRole.ANALYST: [
+            "get_tasks",
+            "get_forum_posts",
+            "scan_forum_for_instructions",
+            "search_knowledge",
+            "hybrid_search",
+            "web_search",
        ],
    }

-
-def test_parse_json_action_returns_none_for_invalid_or_unknown_payload():
-    assert _parse_json_action('not json', ['create_reminder']) is None
-    assert _parse_json_action('{"mode":"tool_call","tool_calls":[{"name":"unknown","arguments":{}}]}', ['create_reminder']) is None
+    for role, expected_tool_names in expected_by_role.items():
+        actual_tools = _get_role_tools(role)
+        actual_tool_names = [tool.name for tool in actual_tools]
+        assert actual_tool_names == expected_tool_names
+        assert len(actual_tool_names) == len(set(actual_tool_names))


-def test_parse_json_action_tolerates_prefix_and_suffix_text():
-    parsed = _parse_json_action(
-        '好的，下面是 JSON：\n```json\n{"mode":"tool_call","tool_calls":[{"name":"create_reminder","arguments":{"title":"开会","reminder_at":"明天 09:00"}}]}\n```\n谢谢',
-        ['create_reminder'],
-    )
-    assert parsed is not None
-    assert parsed['mode'] == 'tool_call'
-    assert parsed['tool_calls'][0]['name'] == 'create_reminder'
+async def test_execute_tools_node_executes_tool_calls_and_tracks_created_entities(monkeypatch):
+    create_tool = AsyncFakeTool("create_task", "created task 123")
+    read_tool = SyncFakeTool("get_tasks", "[]")

-
-def test_parse_json_action_accepts_parameters_alias_for_tool_calls():
-    parsed = _parse_json_action(
-        '{"mode":"tool_call","tool_calls":[{"name":"create_reminder","parameters":{"title":"收被子","reminder_at":"2026-03-29T09:00:00+08:00"}}]}',
-        ['create_reminder'],
+    monkeypatch.setattr("app.agents.graph.ALL_TOOLS", [create_tool, read_tool])
+    monkeypatch.setattr(
+        "app.agents.graph.normalize_tool_time_arguments",
+        lambda tool_name, tool_args, current_datetime_context: {**tool_args, "normalized": True},
    )

-    assert parsed == {
-        'mode': 'tool_call',
-        'tool_calls': [
-            {
-                'name': 'create_reminder',
-                'args': {'title': '收被子', 'reminder_at': '2026-03-29T09:00:00+08:00'},
-                'reason': None,
-            }
-        ],
-    }
+    state = _base_state()
+    state["created_entities"] = [{"tool": "existing", "result": "already there"}]
+    state["current_datetime_context"] = "2026-04-02T09:00:00+08:00"
+    state["messages"] = [
+        AIMessage(
+            content="",
+            tool_calls=[
+                {"id": "tool-1", "name": "create_task", "args": {"title": "Write tests"}},
+                {"id": "tool-2", "name": "get_tasks", "args": {"status": "open"}},
+            ],
+        )
+    ]
+
+    result = await execute_tools_node(state)
+
+    assert create_tool.calls == [{"title": "Write tests", "normalized": True}]
+    assert read_tool.calls == [{"status": "open", "normalized": True}]
+    assert [type(message) for message in result["messages"]] == [ToolMessage, ToolMessage]
+    assert result["messages"][0].tool_call_id == "tool-1"
+    assert result["messages"][0].name == "create_task"
+    assert result["messages"][0].content == "created task 123"
+    assert result["messages"][1].tool_call_id == "tool-2"
+    assert result["messages"][1].name == "get_tasks"
+    assert result["messages"][1].content == "[]"
+    assert result["created_entities"] == [
+        {"tool": "existing", "result": "already there"},
+        {"tool": "create_task", "result": "created task 123"},
+    ]


-async def test_run_sub_commander_uses_json_fallback_for_non_native_provider(monkeypatch):
-    fake_llm = FakeFallbackLLM(
-        '{"mode":"tool_call","tool_calls":[{"name":"create_reminder","arguments":{"title":"开会","reminder_at":"明天 09:00"}}]}'
+async def test_call_agent_llm_includes_context_messages_and_uses_json_fallback(monkeypatch):
+    llm = CaptureFallbackLLM(AIMessage(content='{"mode":"final","final_response":"好的。"}'))
+    capabilities = SimpleNamespace(
+        provider="ollama",
+        supports_native_tools=False,
+        preferred_tool_strategy="json_fallback",
    )
-    fake_tool = FakeTool('create_reminder', '成功创建 reminder: 开会 @ 明天 09:00')
+    fake_tools = [SimpleNamespace(name="create_reminder"), SimpleNamespace(name="get_tasks")]

-    monkeypatch.setattr('app.agents.graph._get_llm_for_state', lambda state: fake_llm)
-    monkeypatch.setitem(
-        __import__('app.agents.graph', fromlist=['SUB_COMMANDER_TOOLSETS']).SUB_COMMANDER_TOOLSETS,
-        'schedule_planning',
-        [fake_tool],
-    )
+    monkeypatch.setattr("app.agents.graph._get_llm_for_state", lambda state: (llm, capabilities))
+    monkeypatch.setattr("app.agents.graph._get_role_tools", lambda role: fake_tools)
+    monkeypatch.setattr("app.agents.graph.build_skill_context", lambda role_key: "技能上下文: 先判断，再执行")

-    state = _base_state('明天 9 点提醒我开会', {'provider': 'ollama', 'model': 'qwen2.5'})
-    state['current_agent'] = AgentRole.SCHEDULE_PLANNER
+    state = _base_state("明天提醒我开会")
+    state["messages"] = [HumanMessage(content="明天提醒我开会")]
+    state["current_datetime_context"] = "CURRENT_TIME: 2026-04-02T09:00:00+08:00"
+    state["memory_context"] = "用户偏好早上处理深度工作。"

-    result = await _run_sub_commander(
-        state,
-        AgentRole.SCHEDULE_PLANNER,
-        'manager prompt',
-        '明天 9 点提醒我开会',
-        use_tools=True,
-    )
+    result = await call_agent_llm(state, AgentRole.EXECUTOR, "executor system prompt")

-    assert result['tool_strategy_used'] == 'json_fallback'
-    assert fake_tool.invocations == [{'title': '开会', 'reminder_at': '2026-03-29T09:00:00'}]
-    assert result['tool_calls'][0]['name'] == 'create_reminder'
-    assert result['created_entities'][0]['type'] == 'reminder'
-    assert result['fallback_parse_error'] is None
-    assert result['final_response'] == '已创建提醒：开会，时间为 2026-03-29 09:00（按当前时间理解为“明天早上9点”）。'
+    assert result["messages"][0].content == '{"mode":"final","final_response":"好的。"}'
+    assert llm.bind_tools_called is False
+    assert llm.messages is not None

-
-async def test_run_sub_commander_includes_current_datetime_context_in_system_messages(monkeypatch):
-    fake_llm = CapturingLLM('{"mode":"final","final_response":"好的。"}')
-    monkeypatch.setattr('app.agents.graph._get_llm_for_state', lambda state: fake_llm)
-
-    state = _base_state('明天 9 点提醒我开会', {'provider': 'ollama', 'model': 'qwen2.5'})
-    state['current_agent'] = AgentRole.SCHEDULE_PLANNER
-    state['current_datetime_context'] = 'CURRENT_TIME: 2026-03-28T12:00:00+08:00'
-
-    await _run_sub_commander(
-        state,
-        AgentRole.SCHEDULE_PLANNER,
-        'manager prompt',
-        '明天 9 点提醒我开会',
-        use_tools=True,
-    )
-
-    assert fake_llm.messages is not None
-    assert any(
-        getattr(m, 'type', None) == 'system' and 'CURRENT_TIME:' in str(getattr(m, 'content', ''))
-        for m in fake_llm.messages
-    )
-
-
-async def test_run_sub_commander_uses_web_search_in_json_fallback(monkeypatch):
-    fake_llm = FakeFallbackLLM(
-        '{"mode":"tool_call","tool_calls":[{"name":"web_search","arguments":{"query":"Jarvis 最新模型更新","top_k":2}}]}',
-        '我查了外部网页，下面是最新结果摘要。',
-    )
-    fake_tool = FakeTool('web_search', '成功搜索到 2 条网页结果')
-
-    monkeypatch.setattr('app.agents.graph._get_llm_for_state', lambda state: fake_llm)
-    monkeypatch.setitem(
-        __import__('app.agents.graph', fromlist=['SUB_COMMANDER_TOOLSETS']).SUB_COMMANDER_TOOLSETS,
-        'librarian_retrieval',
-        [fake_tool],
-    )
-
-    state = _base_state('帮我上网查一下 Jarvis 最新模型更新', {'provider': 'ollama', 'model': 'qwen2.5'})
-    state['current_agent'] = AgentRole.LIBRARIAN
-
-    result = await _run_sub_commander(
-        state,
-        AgentRole.LIBRARIAN,
-        'manager prompt',
-        '帮我上网查一下 Jarvis 最新模型更新',
-        use_tools=True,
-        summary_target='knowledge_context',
-    )
-
-    assert result['tool_strategy_used'] == 'json_fallback'
-    assert fake_tool.invocations == [{'query': 'Jarvis 最新模型更新', 'top_k': 2}]
-    assert result['tool_calls'][0]['name'] == 'web_search'
-    assert result['last_tool_result'] == '[web_search] 成功搜索到 2 条网页结果'
-    assert result['final_response'] == '我查了外部网页，下面是最新结果摘要。'
-
-
-    fake_llm = FakeNativeLLM()
-    fake_tool = FakeTool('create_reminder', '成功创建 reminder: 开会 @ 明天 09:00')
-
-    monkeypatch.setattr('app.agents.graph._get_llm_for_state', lambda state: fake_llm)
-    monkeypatch.setitem(
-        __import__('app.agents.graph', fromlist=['SUB_COMMANDER_TOOLSETS']).SUB_COMMANDER_TOOLSETS,
-        'schedule_planning',
-        [fake_tool],
-    )
-
-    state = _base_state('明天 9 点提醒我开会', {'provider': 'openai', 'model': 'gpt-4o'})
-    state['current_agent'] = AgentRole.SCHEDULE_PLANNER
-
-    result = await _run_sub_commander(
-        state,
-        AgentRole.SCHEDULE_PLANNER,
-        'manager prompt',
-        '明天 9 点提醒我开会',
-        use_tools=True,
-    )
-
-    assert result['tool_strategy_used'] == 'native'
-    assert fake_llm.tool_binding_count == 1
-    assert fake_tool.invocations == [{'title': '开会', 'reminder_at': '2026-03-29T09:00:00'}]
-    assert result['created_entities'][0]['type'] == 'reminder'
-    assert result['final_response'] == '已创建提醒：开会，时间为 2026-03-29 09:00（按当前时间理解为“明天早上9点”）。'
+    system_contents = [message.content for message in llm.messages if isinstance(message, SystemMessage)]
+    assert "executor system prompt" in system_contents[0]
+    assert any("当前时间上下文: CURRENT_TIME: 2026-04-02T09:00:00+08:00" == content for content in system_contents)
+    assert any("长期记忆上下文: 用户偏好早上处理深度工作。" == content for content in system_contents)
+    assert any("技能上下文: 先判断，再执行" == content for content in system_contents)
+    assert any(content == JSON_ACTION_FALLBACK_PROMPT for content in system_contents)
+    assert any(content == "本次可用工具列表: create_reminder, get_tasks" for content in system_contents)
+    assert any(isinstance(message, HumanMessage) and message.content == "明天提醒我开会" for message in llm.messages)
--- a/backend/tests/backend/app/agents/test_registry.py
+++ b/backend/tests/backend/app/agents/test_registry.py
@@ -0,0 +1,360 @@
+import pytest
+from collections.abc import Mapping
+
+from app.agents.prompts import (
+    SUB_COMMANDER_PROMPTS_BY_KEY,
+    TOP_LEVEL_SYSTEM_PROMPTS_BY_KEY,
+)
+from app.agents.registry import build_registry_indexes, load_builtin_registry_bundle
+from app.agents.registry.indexes import summarize_registry_indexes
+from app.agents.registry.models import (
+    AgentManifest,
+    CapabilityManifest,
+    SpecialistTemplateManifest,
+    SubCommanderManifest,
+)
+from app.agents.registry.validator import validate_registry_bundle
+from app.agents.registry.builtins import (
+    BUILTIN_AGENT_MANIFESTS,
+    BUILTIN_CAPABILITY_MANIFESTS,
+    BUILTIN_SPECIALIST_TEMPLATE_MANIFESTS,
+    BUILTIN_SUB_COMMANDER_MANIFESTS,
+)
+from app.agents.state import AgentRole
+from app.agents.tools import SUB_COMMANDER_TOOLSETS
+
+
+def make_agent(
+    agent_id: str = "master",
+    *,
+    display_name: str = "Master",
+    role_value: str = "master",
+    system_prompt_key: str = "master",
+    default_sub_commanders: list[str] | None = None,
+) -> AgentManifest:
+    return AgentManifest(
+        agent_id=agent_id,
+        display_name=display_name,
+        role_value=role_value,
+        system_prompt_key=system_prompt_key,
+        routing_hints=["route"],
+        default_sub_commanders=default_sub_commanders or [],
+    )
+
+
+def make_sub_commander(
+    sub_commander_id: str = "planner",
+    *,
+    parent_agent_id: str = "master",
+    capability_ids: list[str] | None = None,
+) -> SubCommanderManifest:
+    return SubCommanderManifest(
+        sub_commander_id=sub_commander_id,
+        parent_agent_id=parent_agent_id,
+        prompt_text="Plan the work.",
+        capability_ids=capability_ids or [],
+    )
+
+
+def make_capability(capability_id: str = "calendar") -> CapabilityManifest:
+    return CapabilityManifest(capability_id=capability_id, tool_name=f"{capability_id}_tool")
+
+
+def make_specialist_template(
+    template_id: str = "researcher",
+    *,
+    allowed_capability_ids: list[str] | None = None,
+) -> SpecialistTemplateManifest:
+    return SpecialistTemplateManifest(
+        template_id=template_id,
+        display_name="Researcher",
+        description="Research specialist",
+        allowed_capability_ids=allowed_capability_ids,
+    )
+
+
+def test_validate_registry_bundle_accepts_valid_bundle() -> None:
+    validate_registry_bundle(
+        agents=[make_agent(default_sub_commanders=["planner"])],
+        sub_commanders=[make_sub_commander(capability_ids=["calendar"])],
+        capabilities=[make_capability()],
+        specialist_templates=[make_specialist_template(allowed_capability_ids=["calendar"])],
+    )
+
+
+def test_validate_registry_bundle_rejects_duplicate_agent_ids() -> None:
+    agents = [
+        make_agent(default_sub_commanders=["planner"]),
+        make_agent(
+            display_name="Duplicate Master",
+            role_value="master_duplicate",
+            system_prompt_key="master_duplicate",
+        ),
+    ]
+
+    with pytest.raises(ValueError, match="duplicate agent id: master"):
+        validate_registry_bundle(
+            agents=agents,
+            sub_commanders=[],
+            capabilities=[],
+            specialist_templates=[],
+        )
+
+
+def test_validate_registry_bundle_rejects_duplicate_sub_commander_ids() -> None:
+    with pytest.raises(ValueError, match="duplicate sub commander id: planner"):
+        validate_registry_bundle(
+            agents=[make_agent()],
+            sub_commanders=[make_sub_commander(), make_sub_commander()],
+            capabilities=[],
+            specialist_templates=[],
+        )
+
+
+def test_validate_registry_bundle_rejects_duplicate_capability_ids() -> None:
+    with pytest.raises(ValueError, match="duplicate capability id: calendar"):
+        validate_registry_bundle(
+            agents=[],
+            sub_commanders=[],
+            capabilities=[make_capability(), make_capability()],
+            specialist_templates=[],
+        )
+
+
+def test_validate_registry_bundle_rejects_duplicate_template_ids() -> None:
+    with pytest.raises(ValueError, match="duplicate template id: researcher"):
+        validate_registry_bundle(
+            agents=[],
+            sub_commanders=[],
+            capabilities=[],
+            specialist_templates=[make_specialist_template(), make_specialist_template()],
+        )
+
+
+def test_validate_registry_bundle_rejects_unknown_sub_commander_parent_agent_ids() -> None:
+    sub_commanders = [make_sub_commander(parent_agent_id="missing-agent")]
+
+    with pytest.raises(ValueError, match="unknown parent agent id: missing-agent"):
+        validate_registry_bundle(
+            agents=[],
+            sub_commanders=sub_commanders,
+            capabilities=[],
+            specialist_templates=[],
+        )
+
+
+def test_validate_registry_bundle_rejects_unknown_sub_commander_capability_references() -> None:
+    with pytest.raises(ValueError, match="unknown capability id: search"):
+        validate_registry_bundle(
+            agents=[make_agent(default_sub_commanders=["planner"])],
+            sub_commanders=[make_sub_commander(capability_ids=["search"])],
+            capabilities=[make_capability()],
+            specialist_templates=[],
+        )
+
+
+def test_validate_registry_bundle_rejects_unknown_specialist_template_capability_references() -> None:
+    with pytest.raises(ValueError, match="unknown capability id: missing-capability"):
+        validate_registry_bundle(
+            agents=[],
+            sub_commanders=[],
+            capabilities=[make_capability()],
+            specialist_templates=[
+                make_specialist_template(allowed_capability_ids=["missing-capability"])
+            ],
+        )
+
+
+def test_registry_bundle_agent_roles_match_runtime_agent_role_enum_values() -> None:
+    bundle = load_builtin_registry_bundle()
+    indexes = build_registry_indexes(bundle)
+
+    assert set(indexes.agent_by_id) == {role.value for role in AgentRole}
+    assert {agent.role_value for agent in bundle.agents} == {role.value for role in AgentRole}
+
+
+def test_registry_bundle_agent_system_prompt_keys_match_runtime_top_level_prompt_surface() -> None:
+    bundle = load_builtin_registry_bundle()
+    indexes = build_registry_indexes(bundle)
+
+    expected_prompt_keys_by_agent_id = {
+        role.value: role.value for role in AgentRole if role.value in TOP_LEVEL_SYSTEM_PROMPTS_BY_KEY
+    }
+
+    assert set(TOP_LEVEL_SYSTEM_PROMPTS_BY_KEY) == {role.value for role in AgentRole}
+    assert indexes.agent_prompt_key_by_id == expected_prompt_keys_by_agent_id
+    assert {
+        agent.agent_id: TOP_LEVEL_SYSTEM_PROMPTS_BY_KEY[agent.system_prompt_key]
+        for agent in bundle.agents
+    } == {
+        role.value: TOP_LEVEL_SYSTEM_PROMPTS_BY_KEY[role.value]
+        for role in AgentRole
+    }
+
+
+def test_registry_bundle_skill_context_keys_match_graph_role_derivation_rule() -> None:
+    bundle = load_builtin_registry_bundle()
+    indexes = build_registry_indexes(bundle)
+
+    expected_skill_context_keys = {
+        role.value: role.value.replace("agent_", "")
+        for role in AgentRole
+    }
+
+    assert indexes.skill_context_key_by_agent_id == expected_skill_context_keys
+    assert {
+        agent.agent_id: agent.skill_context_key for agent in bundle.agents
+    } == expected_skill_context_keys
+
+
+def test_registry_bundle_sub_commander_prompt_texts_match_runtime_prompt_map() -> None:
+    bundle = load_builtin_registry_bundle()
+    indexes = build_registry_indexes(bundle)
+
+    assert set(indexes.sub_commander_by_id) == set(SUB_COMMANDER_PROMPTS_BY_KEY)
+    assert indexes.sub_commander_prompt_key_by_id == {
+        sub_commander_id: sub_commander_id
+        for sub_commander_id in SUB_COMMANDER_PROMPTS_BY_KEY
+    }
+    assert {
+        sub_commander.sub_commander_id: sub_commander.prompt_text
+        for sub_commander in bundle.sub_commanders
+    } == SUB_COMMANDER_PROMPTS_BY_KEY
+
+
+def test_registry_bundle_sub_commander_tool_membership_and_order_match_runtime_toolsets() -> None:
+    bundle = load_builtin_registry_bundle()
+    indexes = build_registry_indexes(bundle)
+
+    assert set(indexes.sub_commander_by_id) == set(SUB_COMMANDER_TOOLSETS)
+    assert indexes.capability_ids_by_sub_commander_id == {
+        sub_commander_id: tuple(tool.name for tool in tools)
+        for sub_commander_id, tools in SUB_COMMANDER_TOOLSETS.items()
+    }
+    assert {
+        sub_commander.sub_commander_id: tuple(sub_commander.capability_ids)
+        for sub_commander in bundle.sub_commanders
+    } == {
+        sub_commander_id: tuple(tool.name for tool in tools)
+        for sub_commander_id, tools in SUB_COMMANDER_TOOLSETS.items()
+    }
+
+
+def test_builtin_capabilities_reference_actual_runtime_tool_names() -> None:
+    expected_tool_names = {
+        tool.name
+        for tools in SUB_COMMANDER_TOOLSETS.values()
+        for tool in tools
+    }
+    manifest_tool_names = {manifest.tool_name for manifest in BUILTIN_CAPABILITY_MANIFESTS}
+
+    assert manifest_tool_names == expected_tool_names
+
+
+def test_builtin_sub_commander_capabilities_match_runtime_toolsets() -> None:
+    capabilities_by_tool_name = {
+        manifest.tool_name: manifest.capability_id for manifest in BUILTIN_CAPABILITY_MANIFESTS
+    }
+
+    for sub_commander in BUILTIN_SUB_COMMANDER_MANIFESTS:
+        expected_capability_ids = {
+            capabilities_by_tool_name[tool.name]
+            for tool in SUB_COMMANDER_TOOLSETS[sub_commander.sub_commander_id]
+        }
+        assert set(sub_commander.capability_ids) == expected_capability_ids
+
+
+def test_builtin_manifests_form_a_valid_registry_bundle() -> None:
+    validate_registry_bundle(
+        agents=list(BUILTIN_AGENT_MANIFESTS),
+        sub_commanders=list(BUILTIN_SUB_COMMANDER_MANIFESTS),
+        capabilities=list(BUILTIN_CAPABILITY_MANIFESTS),
+        specialist_templates=list(BUILTIN_SPECIALIST_TEMPLATE_MANIFESTS),
+    )
+
+
+def test_load_builtin_registry_bundle_returns_non_empty_manifest_sets() -> None:
+    bundle = load_builtin_registry_bundle()
+
+    assert bundle.agents
+    assert bundle.sub_commanders
+    assert bundle.capabilities
+    assert isinstance(bundle.specialist_templates, tuple)
+
+
+def test_build_registry_indexes_exposes_manifest_lookups_by_id() -> None:
+    bundle = load_builtin_registry_bundle()
+
+    indexes = build_registry_indexes(bundle)
+
+    assert indexes.agent_by_id
+    assert indexes.sub_commander_by_id
+    assert indexes.capability_by_id
+    assert isinstance(indexes.specialist_template_by_id, Mapping)
+    assert set(indexes.agent_by_id) == {agent.agent_id for agent in bundle.agents}
+    assert set(indexes.sub_commander_by_id) == {
+        sub_commander.sub_commander_id for sub_commander in bundle.sub_commanders
+    }
+    assert set(indexes.capability_by_id) == {
+        capability.capability_id for capability in bundle.capabilities
+    }
+    assert set(indexes.specialist_template_by_id) == {
+        template.template_id for template in bundle.specialist_templates
+    }
+
+
+def test_summarize_registry_indexes_returns_read_only_debug_counts() -> None:
+    bundle = load_builtin_registry_bundle()
+    indexes = build_registry_indexes(bundle)
+
+    assert summarize_registry_indexes(indexes) == {
+        "agent_count": len(bundle.agents),
+        "sub_commander_count": len(bundle.sub_commanders),
+        "capability_count": len(bundle.capabilities),
+        "specialist_template_count": len(bundle.specialist_templates),
+    }
+
+
+def test_build_registry_indexes_exposes_prompt_keys_skill_context_keys_and_capability_mappings() -> None:
+    bundle = load_builtin_registry_bundle()
+
+    indexes = build_registry_indexes(bundle)
+
+    assert indexes.agent_prompt_key_by_id == {
+        agent.agent_id: agent.system_prompt_key for agent in bundle.agents
+    }
+    assert indexes.agent_prompt_key_by_id == {
+        agent.agent_id: agent.system_prompt_key for agent in BUILTIN_AGENT_MANIFESTS
+    }
+    assert set(indexes.agent_prompt_key_by_id.values()) == set(TOP_LEVEL_SYSTEM_PROMPTS_BY_KEY)
+    assert indexes.sub_commander_prompt_key_by_id == {
+        sub_commander.sub_commander_id: sub_commander.sub_commander_id
+        for sub_commander in bundle.sub_commanders
+    }
+    assert set(indexes.sub_commander_prompt_key_by_id.values()) == {
+        sub_commander.sub_commander_id for sub_commander in bundle.sub_commanders
+    }
+    assert indexes.skill_context_key_by_agent_id == {
+        agent.agent_id: agent.skill_context_key
+        for agent in bundle.agents
+        if agent.skill_context_key is not None
+    }
+    assert indexes.capability_ids_by_sub_commander_id == {
+        sub_commander.sub_commander_id: tuple(sub_commander.capability_ids)
+        for sub_commander in bundle.sub_commanders
+    }
+
+
+def test_validate_registry_bundle_accepts_loaded_builtin_registry_bundle() -> None:
+    bundle = load_builtin_registry_bundle()
+
+    validate_registry_bundle(
+        agents=list(bundle.agents),
+        sub_commanders=list(bundle.sub_commanders),
+        capabilities=list(bundle.capabilities),
+        specialist_templates=list(bundle.specialist_templates),
+    )
+
+
+def test_phase_one_still_declares_specialist_template_surface_even_if_runtime_is_deferred() -> None:
+    assert isinstance(BUILTIN_SPECIALIST_TEMPLATE_MANIFESTS, tuple)