feat: add agent registry manifests and coverage

Introduce a manifest-backed agent registry surface and align graph tests with the new runtime prompt and tool indexing behavior.
This commit is contained in:
2026-04-02 14:34:26 +08:00
parent e9ba8597e9
commit 4251a79062
12 changed files with 1111 additions and 423 deletions

View File

@@ -1,470 +1,291 @@
from pathlib import Path
from types import SimpleNamespace
import sys
from langchain_core.messages import AIMessage, HumanMessage
WORKTREE_ROOT = Path(__file__).resolve().parents[4]
if str(WORKTREE_ROOT) not in sys.path:
sys.path.insert(0, str(WORKTREE_ROOT))
for module_name in list(sys.modules):
if module_name == "app" or module_name.startswith("app."):
del sys.modules[module_name]
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage
from langgraph.graph import END
from app.agents.graph import (
_choose_sub_commander,
_parse_json_action,
_route_agent_from_user_query,
_run_sub_commander,
JSON_ACTION_FALLBACK_PROMPT,
_get_role_tools,
call_agent_llm,
execute_tools_node,
master_node,
route_after_agent,
route_master,
)
from app.agents.tools.time_reasoning import resolve_time_expression
from app.agents.state import AgentRole
from app.agents.tools import SUB_COMMANDER_TOOLSETS
from app.agents.prompts import MASTER_SYSTEM_PROMPT
def _base_state(message: str, user_llm_config: dict | None = None) -> dict:
def _base_state(message: str = "帮我安排今天的重点") -> dict:
return {
'messages': [HumanMessage(content=message)],
'user_id': 'u1',
'conversation_id': 'c1',
'current_agent': AgentRole.MASTER,
'active_agents': [AgentRole.MASTER],
'current_sub_commander': None,
'active_sub_commanders': [],
'sub_commander_trace': [],
'pending_tasks': [],
'completed_tasks': [],
'tool_calls': [],
'last_tool_result': None,
'action_results': [],
'created_entities': [],
'tool_strategy_used': None,
'provider_capabilities': None,
'fallback_parse_error': None,
'knowledge_context': None,
'graph_context': None,
'schedule_context_summary': None,
'plan': None,
'plan_steps': [],
'analysis_report': None,
'final_response': None,
'should_respond': True,
'memory_context': None,
'current_datetime_context': 'CURRENT_TIME: 2026-03-28T12:00:00+08:00',
'current_datetime_reference': {'current_time_iso': '2026-03-28T12:00:00+08:00', 'current_date_iso': '2026-03-28', 'timezone': 'UTC'},
'user_llm_config': user_llm_config,
"messages": [HumanMessage(content=message)],
"user_id": "u1",
"conversation_id": "c1",
"current_agent": AgentRole.MASTER.value,
"next_step": None,
"agent_trace": [AgentRole.MASTER.value],
"pending_tasks": [],
"completed_tasks": [],
"created_entities": [],
"knowledge_context": None,
"schedule_context_summary": None,
"analysis_report": None,
"final_response": None,
"memory_context": None,
"current_datetime_context": None,
"user_llm_config": None,
"provider_capabilities": None,
}
class FakeFallbackLLM:
def __init__(self, first_content: str, followup_content: str = '已创建提醒:开会,时间为 2026-03-29 09:00按当前时间理解为“明天早上9点”'):
self.first_content = first_content
self.followup_content = followup_content
self.calls = 0
async def ainvoke(self, messages):
self.calls += 1
if self.calls == 1:
return AIMessage(content=self.first_content)
return AIMessage(content=self.followup_content)
def bind_tools(self, tools):
raise AssertionError('bind_tools should not be called in JSON fallback mode')
class FakeNativeBoundLLM:
async def ainvoke(self, messages):
return AIMessage(
content='',
tool_calls=[
{
'id': 'call_1',
'name': 'create_reminder',
'args': {'title': '开会', 'reminder_at': '明天 09:00'},
}
],
)
class FakeNativeLLM:
def __init__(self):
self.bound = FakeNativeBoundLLM()
self.tool_binding_count = 0
self.calls = 0
self._jarvis_provider_capabilities = SimpleNamespace(provider='openai', supports_native_tools=True, preferred_tool_strategy='native')
def bind_tools(self, tools):
self.tool_binding_count += 1
return self.bound
async def ainvoke(self, messages):
self.calls += 1
return AIMessage(content='已创建提醒:开会,时间为 2026-03-29 09:00按当前时间理解为“明天早上9点”')
class FakeTool:
def __init__(self, name: str, result: str):
self.name = name
self.result = result
self.invocations: list[dict] = []
def invoke(self, args: dict):
self.invocations.append(args)
return self.result
class CapturingLLM:
def __init__(self, content: str = '{"mode":"final","final_response":"好的。"}'):
self.content = content
self.messages = None
self._jarvis_provider_capabilities = SimpleNamespace(provider='ollama', supports_native_tools=False, preferred_tool_strategy='json_fallback')
async def ainvoke(self, messages):
self.messages = messages
return AIMessage(content=self.content)
class FailIfCalledLLM:
async def ainvoke(self, messages):
raise AssertionError('LLM should not be called for simple greetings')
raise AssertionError("LLM should not be called for greeting fast-path")
async def test_master_node_returns_stable_reply_for_simple_greeting(monkeypatch):
monkeypatch.setattr('app.agents.graph._get_llm_for_state', lambda state: FailIfCalledLLM())
class StaticResponseLLM:
def __init__(self, response: AIMessage):
self.response = response
self.messages = None
state = {
'messages': [HumanMessage(content='你好')],
'user_id': 'u1',
'conversation_id': 'c1',
'current_agent': AgentRole.MASTER,
'active_agents': [AgentRole.MASTER],
'pending_tasks': [],
'completed_tasks': [],
'tool_calls': [],
'last_tool_result': None,
'knowledge_context': None,
'graph_context': None,
'plan': None,
'plan_steps': [],
'analysis_report': None,
'final_response': None,
'should_respond': True,
'memory_context': None,
'user_llm_config': None,
}
result = await master_node(state)
assert result['final_response'] == '您好。我在。\n\n您把问题给我,我先帮您收束重点,再往下推。'
assert result['current_agent'] == AgentRole.MASTER
assert result['active_agents'] == [AgentRole.MASTER]
async def ainvoke(self, messages):
self.messages = messages
return self.response
async def test_master_node_returns_stable_reply_for_identity_question(monkeypatch):
monkeypatch.setattr('app.agents.graph._get_llm_for_state', lambda state: FailIfCalledLLM())
class CaptureFallbackLLM:
def __init__(self, response: AIMessage):
self.response = response
self.messages = None
self.bind_tools_called = False
state = {
'messages': [HumanMessage(content='你是谁')],
'user_id': 'u1',
'conversation_id': 'c1',
'current_agent': AgentRole.MASTER,
'active_agents': [AgentRole.MASTER],
'pending_tasks': [],
'completed_tasks': [],
'tool_calls': [],
'last_tool_result': None,
'knowledge_context': None,
'graph_context': None,
'plan': None,
'plan_steps': [],
'analysis_report': None,
'final_response': None,
'should_respond': True,
'memory_context': None,
'user_llm_config': None,
}
async def ainvoke(self, messages):
self.messages = messages
return self.response
result = await master_node(state)
assert result['final_response'] == '我是 Jarvis。\n\n比起做一个泛泛的助手,我更像您的判断型协作伙伴:帮您看清问题、压缩路径、把事情往前推进。'
assert result['current_agent'] == AgentRole.MASTER
assert result['active_agents'] == [AgentRole.MASTER]
def bind_tools(self, tools):
self.bind_tools_called = True
raise AssertionError("bind_tools should not be used when native tools are unsupported")
async def test_master_node_returns_stable_reply_for_identity_question_with_punctuation(monkeypatch):
monkeypatch.setattr('app.agents.graph._get_llm_for_state', lambda state: FailIfCalledLLM())
class AsyncFakeTool:
def __init__(self, name: str, result: str):
self.name = name
self.result = result
self.calls: list[dict] = []
state = {
'messages': [HumanMessage(content='你是谁?')],
'user_id': 'u1',
'conversation_id': 'c1',
'current_agent': AgentRole.MASTER,
'active_agents': [AgentRole.MASTER],
'pending_tasks': [],
'completed_tasks': [],
'tool_calls': [],
'last_tool_result': None,
'knowledge_context': None,
'graph_context': None,
'plan': None,
'plan_steps': [],
'analysis_report': None,
'final_response': None,
'should_respond': True,
'memory_context': None,
'user_llm_config': None,
}
result = await master_node(state)
assert result['final_response'] == '我是 Jarvis。\n\n比起做一个泛泛的助手,我更像您的判断型协作伙伴:帮您看清问题、压缩路径、把事情往前推进。'
assert result['current_agent'] == AgentRole.MASTER
assert result['active_agents'] == [AgentRole.MASTER]
async def ainvoke(self, args: dict):
self.calls.append(args)
return self.result
async def test_master_node_returns_stable_reply_for_identity_question_with_particle(monkeypatch):
monkeypatch.setattr('app.agents.graph._get_llm_for_state', lambda state: FailIfCalledLLM())
class SyncFakeTool:
def __init__(self, name: str, result: str):
self.name = name
self.result = result
self.calls: list[dict] = []
state = {
'messages': [HumanMessage(content='你是谁啊')],
'user_id': 'u1',
'conversation_id': 'c1',
'current_agent': AgentRole.MASTER,
'active_agents': [AgentRole.MASTER],
'pending_tasks': [],
'completed_tasks': [],
'tool_calls': [],
'last_tool_result': None,
'knowledge_context': None,
'graph_context': None,
'plan': None,
'plan_steps': [],
'analysis_report': None,
'final_response': None,
'should_respond': True,
'memory_context': None,
'user_llm_config': None,
}
result = await master_node(state)
assert result['final_response'] == '我是 Jarvis。\n\n比起做一个泛泛的助手,我更像您的判断型协作伙伴:帮您看清问题、压缩路径、把事情往前推进。'
assert result['current_agent'] == AgentRole.MASTER
assert result['active_agents'] == [AgentRole.MASTER]
def invoke(self, args: dict):
self.calls.append(args)
return self.result
async def test_master_node_returns_stable_reply_for_capability_question(monkeypatch):
monkeypatch.setattr('app.agents.graph._get_llm_for_state', lambda state: FailIfCalledLLM())
async def test_master_node_greeting_fast_path_returns_stable_reply_without_llm(monkeypatch):
monkeypatch.setattr("app.agents.graph._get_llm_for_state", lambda state: (FailIfCalledLLM(), SimpleNamespace()))
state = {
'messages': [HumanMessage(content='你能做什么')],
'user_id': 'u1',
'conversation_id': 'c1',
'current_agent': AgentRole.MASTER,
'active_agents': [AgentRole.MASTER],
'pending_tasks': [],
'completed_tasks': [],
'tool_calls': [],
'last_tool_result': None,
'knowledge_context': None,
'graph_context': None,
'plan': None,
'plan_steps': [],
'analysis_report': None,
'final_response': None,
'should_respond': True,
'memory_context': None,
'user_llm_config': None,
}
result = await master_node(_base_state("你好"))
result = await master_node(state)
assert result['final_response'] == '主要做三件事。\n- 帮您判断:看问题本质、梳理取舍、给出方向\n- 帮您收束:把复杂内容理顺,把重点拎出来\n- 帮您推进:拆任务、定步骤、把下一步变清楚\n\n如果您现在有具体目标,我可以直接进入处理。'
assert result['current_agent'] == AgentRole.MASTER
assert result['active_agents'] == [AgentRole.MASTER]
assert result["final_response"] == "您好。我在。\n\n您把问题给我,我先帮您收束重点,再往下推。"
assert result["messages"][0].content == "您好。我在。"
def test_choose_sub_commander_routes_schedule_requests_to_schedule_planning():
assert _choose_sub_commander(AgentRole.SCHEDULE_PLANNER, '帮我安排一下这周计划') == 'schedule_planning'
def test_choose_sub_commander_routes_focus_requests_to_schedule_analysis():
assert _choose_sub_commander(AgentRole.SCHEDULE_PLANNER, '基于最近对话帮我判断该聚焦什么') == 'schedule_analysis'
def test_route_agent_from_user_query_routes_knowledge_requests_to_librarian():
assert _route_agent_from_user_query('帮我搜索知识库里的项目资料') == AgentRole.LIBRARIAN
def test_route_agent_from_user_query_routes_schedule_requests_to_schedule_planner():
assert _route_agent_from_user_query('明天提醒我开会') == AgentRole.SCHEDULE_PLANNER
def test_route_agent_from_user_query_routes_explicit_month_day_milestone_to_schedule_planner():
assert _route_agent_from_user_query('3月29日对话系统交付节点') == AgentRole.SCHEDULE_PLANNER
def test_choose_sub_commander_routes_explicit_month_day_milestone_to_schedule_planning():
assert _choose_sub_commander(AgentRole.SCHEDULE_PLANNER, '3月29日对话系统交付节点') == 'schedule_planning'
def test_parse_json_action_extracts_tool_calls_from_fenced_json():
parsed = _parse_json_action(
'```json\n{"mode":"tool_call","tool_calls":[{"name":"create_reminder","arguments":{"title":"开会","reminder_at":"明天 09:00"}}]}\n```',
['create_reminder'],
async def test_master_node_routes_to_agent_when_llm_returns_role_name(monkeypatch):
llm = StaticResponseLLM(AIMessage(content="schedule_planner"))
monkeypatch.setattr(
"app.agents.graph._get_llm_for_state",
lambda state: (llm, SimpleNamespace(provider="test", supports_native_tools=True)),
)
assert parsed == {
'mode': 'tool_call',
'tool_calls': [
{
'name': 'create_reminder',
'args': {'title': '开会', 'reminder_at': '明天 09:00'},
'reason': None,
}
state = _base_state("帮我安排这周重点")
result = await master_node(state)
assert result["current_agent"] == AgentRole.SCHEDULE_PLANNER.value
assert result["agent_trace"] == [AgentRole.MASTER.value, AgentRole.SCHEDULE_PLANNER.value]
assert result["messages"][0].content == f"已分发至 {AgentRole.SCHEDULE_PLANNER.value} 处理。"
assert isinstance(llm.messages[0], SystemMessage)
assert MASTER_SYSTEM_PROMPT in llm.messages[0].content
async def test_master_node_returns_final_response_when_llm_answers_directly(monkeypatch):
response = AIMessage(content="我建议先收束需求,再拆执行步骤。")
llm = StaticResponseLLM(response)
monkeypatch.setattr(
"app.agents.graph._get_llm_for_state",
lambda state: (llm, SimpleNamespace(provider="test", supports_native_tools=True)),
)
result = await master_node(_base_state("现在应该怎么推进这个项目?"))
assert result["final_response"] == response.content
assert result["messages"] == [response]
def test_route_after_agent_sends_tool_calls_to_tools_node():
state = _base_state()
state["messages"] = [AIMessage(content="", tool_calls=[{"id": "1", "name": "create_task", "args": {}}])]
assert route_after_agent(state) == "tools"
def test_route_after_agent_ends_when_no_tool_calls_exist():
state = _base_state()
state["messages"] = [AIMessage(content="done")]
assert route_after_agent(state) == END
def test_route_master_ends_when_final_response_exists():
state = _base_state()
state["final_response"] = "done"
state["current_agent"] = AgentRole.EXECUTOR.value
assert route_master(state) == END
def test_route_master_returns_current_agent_when_more_work_remains():
state = _base_state()
state["current_agent"] = AgentRole.LIBRARIAN.value
assert route_master(state) == AgentRole.LIBRARIAN.value
def test_get_role_tools_returns_expected_semantic_tool_sets():
expected_by_role = {
AgentRole.SCHEDULE_PLANNER: [
"get_schedule_day",
"get_tasks",
"resolve_time_expression",
"create_todo",
"create_schedule_task",
"create_reminder",
"create_goal",
],
AgentRole.EXECUTOR: [
"get_tasks",
"create_task",
"update_task_status",
"resolve_time_expression",
"create_todo",
"create_schedule_task",
"create_reminder",
"create_goal",
"get_forum_posts",
"create_forum_post",
"scan_forum_for_instructions",
],
AgentRole.LIBRARIAN: [
"search_knowledge",
"hybrid_search",
"web_search",
"get_knowledge_graph_context",
"build_knowledge_graph",
],
AgentRole.ANALYST: [
"get_tasks",
"get_forum_posts",
"scan_forum_for_instructions",
"search_knowledge",
"hybrid_search",
"web_search",
],
}
def test_parse_json_action_returns_none_for_invalid_or_unknown_payload():
assert _parse_json_action('not json', ['create_reminder']) is None
assert _parse_json_action('{"mode":"tool_call","tool_calls":[{"name":"unknown","arguments":{}}]}', ['create_reminder']) is None
for role, expected_tool_names in expected_by_role.items():
actual_tools = _get_role_tools(role)
actual_tool_names = [tool.name for tool in actual_tools]
assert actual_tool_names == expected_tool_names
assert len(actual_tool_names) == len(set(actual_tool_names))
def test_parse_json_action_tolerates_prefix_and_suffix_text():
parsed = _parse_json_action(
'好的,下面是 JSON\n```json\n{"mode":"tool_call","tool_calls":[{"name":"create_reminder","arguments":{"title":"开会","reminder_at":"明天 09:00"}}]}\n```\n谢谢',
['create_reminder'],
)
assert parsed is not None
assert parsed['mode'] == 'tool_call'
assert parsed['tool_calls'][0]['name'] == 'create_reminder'
async def test_execute_tools_node_executes_tool_calls_and_tracks_created_entities(monkeypatch):
create_tool = AsyncFakeTool("create_task", "created task 123")
read_tool = SyncFakeTool("get_tasks", "[]")
def test_parse_json_action_accepts_parameters_alias_for_tool_calls():
parsed = _parse_json_action(
'{"mode":"tool_call","tool_calls":[{"name":"create_reminder","parameters":{"title":"收被子","reminder_at":"2026-03-29T09:00:00+08:00"}}]}',
['create_reminder'],
monkeypatch.setattr("app.agents.graph.ALL_TOOLS", [create_tool, read_tool])
monkeypatch.setattr(
"app.agents.graph.normalize_tool_time_arguments",
lambda tool_name, tool_args, current_datetime_context: {**tool_args, "normalized": True},
)
assert parsed == {
'mode': 'tool_call',
'tool_calls': [
{
'name': 'create_reminder',
'args': {'title': '收被子', 'reminder_at': '2026-03-29T09:00:00+08:00'},
'reason': None,
}
],
}
state = _base_state()
state["created_entities"] = [{"tool": "existing", "result": "already there"}]
state["current_datetime_context"] = "2026-04-02T09:00:00+08:00"
state["messages"] = [
AIMessage(
content="",
tool_calls=[
{"id": "tool-1", "name": "create_task", "args": {"title": "Write tests"}},
{"id": "tool-2", "name": "get_tasks", "args": {"status": "open"}},
],
)
]
result = await execute_tools_node(state)
assert create_tool.calls == [{"title": "Write tests", "normalized": True}]
assert read_tool.calls == [{"status": "open", "normalized": True}]
assert [type(message) for message in result["messages"]] == [ToolMessage, ToolMessage]
assert result["messages"][0].tool_call_id == "tool-1"
assert result["messages"][0].name == "create_task"
assert result["messages"][0].content == "created task 123"
assert result["messages"][1].tool_call_id == "tool-2"
assert result["messages"][1].name == "get_tasks"
assert result["messages"][1].content == "[]"
assert result["created_entities"] == [
{"tool": "existing", "result": "already there"},
{"tool": "create_task", "result": "created task 123"},
]
async def test_run_sub_commander_uses_json_fallback_for_non_native_provider(monkeypatch):
fake_llm = FakeFallbackLLM(
'{"mode":"tool_call","tool_calls":[{"name":"create_reminder","arguments":{"title":"开会","reminder_at":"明天 09:00"}}]}'
async def test_call_agent_llm_includes_context_messages_and_uses_json_fallback(monkeypatch):
llm = CaptureFallbackLLM(AIMessage(content='{"mode":"final","final_response":"好的。"}'))
capabilities = SimpleNamespace(
provider="ollama",
supports_native_tools=False,
preferred_tool_strategy="json_fallback",
)
fake_tool = FakeTool('create_reminder', '成功创建 reminder: 开会 @ 明天 09:00')
fake_tools = [SimpleNamespace(name="create_reminder"), SimpleNamespace(name="get_tasks")]
monkeypatch.setattr('app.agents.graph._get_llm_for_state', lambda state: fake_llm)
monkeypatch.setitem(
__import__('app.agents.graph', fromlist=['SUB_COMMANDER_TOOLSETS']).SUB_COMMANDER_TOOLSETS,
'schedule_planning',
[fake_tool],
)
monkeypatch.setattr("app.agents.graph._get_llm_for_state", lambda state: (llm, capabilities))
monkeypatch.setattr("app.agents.graph._get_role_tools", lambda role: fake_tools)
monkeypatch.setattr("app.agents.graph.build_skill_context", lambda role_key: "技能上下文: 先判断,再执行")
state = _base_state('明天 9 点提醒我开会', {'provider': 'ollama', 'model': 'qwen2.5'})
state['current_agent'] = AgentRole.SCHEDULE_PLANNER
state = _base_state("明天提醒我开会")
state["messages"] = [HumanMessage(content="明天提醒我开会")]
state["current_datetime_context"] = "CURRENT_TIME: 2026-04-02T09:00:00+08:00"
state["memory_context"] = "用户偏好早上处理深度工作。"
result = await _run_sub_commander(
state,
AgentRole.SCHEDULE_PLANNER,
'manager prompt',
'明天 9 点提醒我开会',
use_tools=True,
)
result = await call_agent_llm(state, AgentRole.EXECUTOR, "executor system prompt")
assert result['tool_strategy_used'] == 'json_fallback'
assert fake_tool.invocations == [{'title': '开会', 'reminder_at': '2026-03-29T09:00:00'}]
assert result['tool_calls'][0]['name'] == 'create_reminder'
assert result['created_entities'][0]['type'] == 'reminder'
assert result['fallback_parse_error'] is None
assert result['final_response'] == '已创建提醒:开会,时间为 2026-03-29 09:00按当前时间理解为“明天早上9点”'
assert result["messages"][0].content == '{"mode":"final","final_response":"好的。"}'
assert llm.bind_tools_called is False
assert llm.messages is not None
async def test_run_sub_commander_includes_current_datetime_context_in_system_messages(monkeypatch):
fake_llm = CapturingLLM('{"mode":"final","final_response":"好的。"}')
monkeypatch.setattr('app.agents.graph._get_llm_for_state', lambda state: fake_llm)
state = _base_state('明天 9 点提醒我开会', {'provider': 'ollama', 'model': 'qwen2.5'})
state['current_agent'] = AgentRole.SCHEDULE_PLANNER
state['current_datetime_context'] = 'CURRENT_TIME: 2026-03-28T12:00:00+08:00'
await _run_sub_commander(
state,
AgentRole.SCHEDULE_PLANNER,
'manager prompt',
'明天 9 点提醒我开会',
use_tools=True,
)
assert fake_llm.messages is not None
assert any(
getattr(m, 'type', None) == 'system' and 'CURRENT_TIME:' in str(getattr(m, 'content', ''))
for m in fake_llm.messages
)
async def test_run_sub_commander_uses_web_search_in_json_fallback(monkeypatch):
fake_llm = FakeFallbackLLM(
'{"mode":"tool_call","tool_calls":[{"name":"web_search","arguments":{"query":"Jarvis 最新模型更新","top_k":2}}]}',
'我查了外部网页,下面是最新结果摘要。',
)
fake_tool = FakeTool('web_search', '成功搜索到 2 条网页结果')
monkeypatch.setattr('app.agents.graph._get_llm_for_state', lambda state: fake_llm)
monkeypatch.setitem(
__import__('app.agents.graph', fromlist=['SUB_COMMANDER_TOOLSETS']).SUB_COMMANDER_TOOLSETS,
'librarian_retrieval',
[fake_tool],
)
state = _base_state('帮我上网查一下 Jarvis 最新模型更新', {'provider': 'ollama', 'model': 'qwen2.5'})
state['current_agent'] = AgentRole.LIBRARIAN
result = await _run_sub_commander(
state,
AgentRole.LIBRARIAN,
'manager prompt',
'帮我上网查一下 Jarvis 最新模型更新',
use_tools=True,
summary_target='knowledge_context',
)
assert result['tool_strategy_used'] == 'json_fallback'
assert fake_tool.invocations == [{'query': 'Jarvis 最新模型更新', 'top_k': 2}]
assert result['tool_calls'][0]['name'] == 'web_search'
assert result['last_tool_result'] == '[web_search] 成功搜索到 2 条网页结果'
assert result['final_response'] == '我查了外部网页,下面是最新结果摘要。'
fake_llm = FakeNativeLLM()
fake_tool = FakeTool('create_reminder', '成功创建 reminder: 开会 @ 明天 09:00')
monkeypatch.setattr('app.agents.graph._get_llm_for_state', lambda state: fake_llm)
monkeypatch.setitem(
__import__('app.agents.graph', fromlist=['SUB_COMMANDER_TOOLSETS']).SUB_COMMANDER_TOOLSETS,
'schedule_planning',
[fake_tool],
)
state = _base_state('明天 9 点提醒我开会', {'provider': 'openai', 'model': 'gpt-4o'})
state['current_agent'] = AgentRole.SCHEDULE_PLANNER
result = await _run_sub_commander(
state,
AgentRole.SCHEDULE_PLANNER,
'manager prompt',
'明天 9 点提醒我开会',
use_tools=True,
)
assert result['tool_strategy_used'] == 'native'
assert fake_llm.tool_binding_count == 1
assert fake_tool.invocations == [{'title': '开会', 'reminder_at': '2026-03-29T09:00:00'}]
assert result['created_entities'][0]['type'] == 'reminder'
assert result['final_response'] == '已创建提醒:开会,时间为 2026-03-29 09:00按当前时间理解为“明天早上9点”'
system_contents = [message.content for message in llm.messages if isinstance(message, SystemMessage)]
assert "executor system prompt" in system_contents[0]
assert any("当前时间上下文: CURRENT_TIME: 2026-04-02T09:00:00+08:00" == content for content in system_contents)
assert any("长期记忆上下文: 用户偏好早上处理深度工作。" == content for content in system_contents)
assert any("技能上下文: 先判断,再执行" == content for content in system_contents)
assert any(content == JSON_ACTION_FALLBACK_PROMPT for content in system_contents)
assert any(content == "本次可用工具列表: create_reminder, get_tasks" for content in system_contents)
assert any(isinstance(message, HumanMessage) and message.content == "明天提醒我开会" for message in llm.messages)

View File

@@ -0,0 +1,360 @@
import pytest
from collections.abc import Mapping
from app.agents.prompts import (
SUB_COMMANDER_PROMPTS_BY_KEY,
TOP_LEVEL_SYSTEM_PROMPTS_BY_KEY,
)
from app.agents.registry import build_registry_indexes, load_builtin_registry_bundle
from app.agents.registry.indexes import summarize_registry_indexes
from app.agents.registry.models import (
AgentManifest,
CapabilityManifest,
SpecialistTemplateManifest,
SubCommanderManifest,
)
from app.agents.registry.validator import validate_registry_bundle
from app.agents.registry.builtins import (
BUILTIN_AGENT_MANIFESTS,
BUILTIN_CAPABILITY_MANIFESTS,
BUILTIN_SPECIALIST_TEMPLATE_MANIFESTS,
BUILTIN_SUB_COMMANDER_MANIFESTS,
)
from app.agents.state import AgentRole
from app.agents.tools import SUB_COMMANDER_TOOLSETS
def make_agent(
agent_id: str = "master",
*,
display_name: str = "Master",
role_value: str = "master",
system_prompt_key: str = "master",
default_sub_commanders: list[str] | None = None,
) -> AgentManifest:
return AgentManifest(
agent_id=agent_id,
display_name=display_name,
role_value=role_value,
system_prompt_key=system_prompt_key,
routing_hints=["route"],
default_sub_commanders=default_sub_commanders or [],
)
def make_sub_commander(
sub_commander_id: str = "planner",
*,
parent_agent_id: str = "master",
capability_ids: list[str] | None = None,
) -> SubCommanderManifest:
return SubCommanderManifest(
sub_commander_id=sub_commander_id,
parent_agent_id=parent_agent_id,
prompt_text="Plan the work.",
capability_ids=capability_ids or [],
)
def make_capability(capability_id: str = "calendar") -> CapabilityManifest:
return CapabilityManifest(capability_id=capability_id, tool_name=f"{capability_id}_tool")
def make_specialist_template(
template_id: str = "researcher",
*,
allowed_capability_ids: list[str] | None = None,
) -> SpecialistTemplateManifest:
return SpecialistTemplateManifest(
template_id=template_id,
display_name="Researcher",
description="Research specialist",
allowed_capability_ids=allowed_capability_ids,
)
def test_validate_registry_bundle_accepts_valid_bundle() -> None:
validate_registry_bundle(
agents=[make_agent(default_sub_commanders=["planner"])],
sub_commanders=[make_sub_commander(capability_ids=["calendar"])],
capabilities=[make_capability()],
specialist_templates=[make_specialist_template(allowed_capability_ids=["calendar"])],
)
def test_validate_registry_bundle_rejects_duplicate_agent_ids() -> None:
agents = [
make_agent(default_sub_commanders=["planner"]),
make_agent(
display_name="Duplicate Master",
role_value="master_duplicate",
system_prompt_key="master_duplicate",
),
]
with pytest.raises(ValueError, match="duplicate agent id: master"):
validate_registry_bundle(
agents=agents,
sub_commanders=[],
capabilities=[],
specialist_templates=[],
)
def test_validate_registry_bundle_rejects_duplicate_sub_commander_ids() -> None:
with pytest.raises(ValueError, match="duplicate sub commander id: planner"):
validate_registry_bundle(
agents=[make_agent()],
sub_commanders=[make_sub_commander(), make_sub_commander()],
capabilities=[],
specialist_templates=[],
)
def test_validate_registry_bundle_rejects_duplicate_capability_ids() -> None:
with pytest.raises(ValueError, match="duplicate capability id: calendar"):
validate_registry_bundle(
agents=[],
sub_commanders=[],
capabilities=[make_capability(), make_capability()],
specialist_templates=[],
)
def test_validate_registry_bundle_rejects_duplicate_template_ids() -> None:
with pytest.raises(ValueError, match="duplicate template id: researcher"):
validate_registry_bundle(
agents=[],
sub_commanders=[],
capabilities=[],
specialist_templates=[make_specialist_template(), make_specialist_template()],
)
def test_validate_registry_bundle_rejects_unknown_sub_commander_parent_agent_ids() -> None:
sub_commanders = [make_sub_commander(parent_agent_id="missing-agent")]
with pytest.raises(ValueError, match="unknown parent agent id: missing-agent"):
validate_registry_bundle(
agents=[],
sub_commanders=sub_commanders,
capabilities=[],
specialist_templates=[],
)
def test_validate_registry_bundle_rejects_unknown_sub_commander_capability_references() -> None:
with pytest.raises(ValueError, match="unknown capability id: search"):
validate_registry_bundle(
agents=[make_agent(default_sub_commanders=["planner"])],
sub_commanders=[make_sub_commander(capability_ids=["search"])],
capabilities=[make_capability()],
specialist_templates=[],
)
def test_validate_registry_bundle_rejects_unknown_specialist_template_capability_references() -> None:
with pytest.raises(ValueError, match="unknown capability id: missing-capability"):
validate_registry_bundle(
agents=[],
sub_commanders=[],
capabilities=[make_capability()],
specialist_templates=[
make_specialist_template(allowed_capability_ids=["missing-capability"])
],
)
def test_registry_bundle_agent_roles_match_runtime_agent_role_enum_values() -> None:
bundle = load_builtin_registry_bundle()
indexes = build_registry_indexes(bundle)
assert set(indexes.agent_by_id) == {role.value for role in AgentRole}
assert {agent.role_value for agent in bundle.agents} == {role.value for role in AgentRole}
def test_registry_bundle_agent_system_prompt_keys_match_runtime_top_level_prompt_surface() -> None:
bundle = load_builtin_registry_bundle()
indexes = build_registry_indexes(bundle)
expected_prompt_keys_by_agent_id = {
role.value: role.value for role in AgentRole if role.value in TOP_LEVEL_SYSTEM_PROMPTS_BY_KEY
}
assert set(TOP_LEVEL_SYSTEM_PROMPTS_BY_KEY) == {role.value for role in AgentRole}
assert indexes.agent_prompt_key_by_id == expected_prompt_keys_by_agent_id
assert {
agent.agent_id: TOP_LEVEL_SYSTEM_PROMPTS_BY_KEY[agent.system_prompt_key]
for agent in bundle.agents
} == {
role.value: TOP_LEVEL_SYSTEM_PROMPTS_BY_KEY[role.value]
for role in AgentRole
}
def test_registry_bundle_skill_context_keys_match_graph_role_derivation_rule() -> None:
bundle = load_builtin_registry_bundle()
indexes = build_registry_indexes(bundle)
expected_skill_context_keys = {
role.value: role.value.replace("agent_", "")
for role in AgentRole
}
assert indexes.skill_context_key_by_agent_id == expected_skill_context_keys
assert {
agent.agent_id: agent.skill_context_key for agent in bundle.agents
} == expected_skill_context_keys
def test_registry_bundle_sub_commander_prompt_texts_match_runtime_prompt_map() -> None:
bundle = load_builtin_registry_bundle()
indexes = build_registry_indexes(bundle)
assert set(indexes.sub_commander_by_id) == set(SUB_COMMANDER_PROMPTS_BY_KEY)
assert indexes.sub_commander_prompt_key_by_id == {
sub_commander_id: sub_commander_id
for sub_commander_id in SUB_COMMANDER_PROMPTS_BY_KEY
}
assert {
sub_commander.sub_commander_id: sub_commander.prompt_text
for sub_commander in bundle.sub_commanders
} == SUB_COMMANDER_PROMPTS_BY_KEY
def test_registry_bundle_sub_commander_tool_membership_and_order_match_runtime_toolsets() -> None:
bundle = load_builtin_registry_bundle()
indexes = build_registry_indexes(bundle)
assert set(indexes.sub_commander_by_id) == set(SUB_COMMANDER_TOOLSETS)
assert indexes.capability_ids_by_sub_commander_id == {
sub_commander_id: tuple(tool.name for tool in tools)
for sub_commander_id, tools in SUB_COMMANDER_TOOLSETS.items()
}
assert {
sub_commander.sub_commander_id: tuple(sub_commander.capability_ids)
for sub_commander in bundle.sub_commanders
} == {
sub_commander_id: tuple(tool.name for tool in tools)
for sub_commander_id, tools in SUB_COMMANDER_TOOLSETS.items()
}
def test_builtin_capabilities_reference_actual_runtime_tool_names() -> None:
expected_tool_names = {
tool.name
for tools in SUB_COMMANDER_TOOLSETS.values()
for tool in tools
}
manifest_tool_names = {manifest.tool_name for manifest in BUILTIN_CAPABILITY_MANIFESTS}
assert manifest_tool_names == expected_tool_names
def test_builtin_sub_commander_capabilities_match_runtime_toolsets() -> None:
capabilities_by_tool_name = {
manifest.tool_name: manifest.capability_id for manifest in BUILTIN_CAPABILITY_MANIFESTS
}
for sub_commander in BUILTIN_SUB_COMMANDER_MANIFESTS:
expected_capability_ids = {
capabilities_by_tool_name[tool.name]
for tool in SUB_COMMANDER_TOOLSETS[sub_commander.sub_commander_id]
}
assert set(sub_commander.capability_ids) == expected_capability_ids
def test_builtin_manifests_form_a_valid_registry_bundle() -> None:
validate_registry_bundle(
agents=list(BUILTIN_AGENT_MANIFESTS),
sub_commanders=list(BUILTIN_SUB_COMMANDER_MANIFESTS),
capabilities=list(BUILTIN_CAPABILITY_MANIFESTS),
specialist_templates=list(BUILTIN_SPECIALIST_TEMPLATE_MANIFESTS),
)
def test_load_builtin_registry_bundle_returns_non_empty_manifest_sets() -> None:
bundle = load_builtin_registry_bundle()
assert bundle.agents
assert bundle.sub_commanders
assert bundle.capabilities
assert isinstance(bundle.specialist_templates, tuple)
def test_build_registry_indexes_exposes_manifest_lookups_by_id() -> None:
bundle = load_builtin_registry_bundle()
indexes = build_registry_indexes(bundle)
assert indexes.agent_by_id
assert indexes.sub_commander_by_id
assert indexes.capability_by_id
assert isinstance(indexes.specialist_template_by_id, Mapping)
assert set(indexes.agent_by_id) == {agent.agent_id for agent in bundle.agents}
assert set(indexes.sub_commander_by_id) == {
sub_commander.sub_commander_id for sub_commander in bundle.sub_commanders
}
assert set(indexes.capability_by_id) == {
capability.capability_id for capability in bundle.capabilities
}
assert set(indexes.specialist_template_by_id) == {
template.template_id for template in bundle.specialist_templates
}
def test_summarize_registry_indexes_returns_read_only_debug_counts() -> None:
bundle = load_builtin_registry_bundle()
indexes = build_registry_indexes(bundle)
assert summarize_registry_indexes(indexes) == {
"agent_count": len(bundle.agents),
"sub_commander_count": len(bundle.sub_commanders),
"capability_count": len(bundle.capabilities),
"specialist_template_count": len(bundle.specialist_templates),
}
def test_build_registry_indexes_exposes_prompt_keys_skill_context_keys_and_capability_mappings() -> None:
bundle = load_builtin_registry_bundle()
indexes = build_registry_indexes(bundle)
assert indexes.agent_prompt_key_by_id == {
agent.agent_id: agent.system_prompt_key for agent in bundle.agents
}
assert indexes.agent_prompt_key_by_id == {
agent.agent_id: agent.system_prompt_key for agent in BUILTIN_AGENT_MANIFESTS
}
assert set(indexes.agent_prompt_key_by_id.values()) == set(TOP_LEVEL_SYSTEM_PROMPTS_BY_KEY)
assert indexes.sub_commander_prompt_key_by_id == {
sub_commander.sub_commander_id: sub_commander.sub_commander_id
for sub_commander in bundle.sub_commanders
}
assert set(indexes.sub_commander_prompt_key_by_id.values()) == {
sub_commander.sub_commander_id for sub_commander in bundle.sub_commanders
}
assert indexes.skill_context_key_by_agent_id == {
agent.agent_id: agent.skill_context_key
for agent in bundle.agents
if agent.skill_context_key is not None
}
assert indexes.capability_ids_by_sub_commander_id == {
sub_commander.sub_commander_id: tuple(sub_commander.capability_ids)
for sub_commander in bundle.sub_commanders
}
def test_validate_registry_bundle_accepts_loaded_builtin_registry_bundle() -> None:
bundle = load_builtin_registry_bundle()
validate_registry_bundle(
agents=list(bundle.agents),
sub_commanders=list(bundle.sub_commanders),
capabilities=list(bundle.capabilities),
specialist_templates=list(bundle.specialist_templates),
)
def test_phase_one_still_declares_specialist_template_surface_even_if_runtime_is_deferred() -> None:
assert isinstance(BUILTIN_SPECIALIST_TEMPLATE_MANIFESTS, tuple)