test(backend): add tests for orchestration and learning runtimes
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
@@ -314,6 +314,22 @@ class FailIfCalledLLM:
|
|||||||
raise AssertionError('LLM should not be called for simple greetings')
|
raise AssertionError('LLM should not be called for simple greetings')
|
||||||
|
|
||||||
|
|
||||||
|
class InternalMarkupRecoveryLLM:
|
||||||
|
def __init__(self, responses: list[str]):
|
||||||
|
self.responses = responses
|
||||||
|
self.calls = 0
|
||||||
|
self._jarvis_provider_capabilities = SimpleNamespace(
|
||||||
|
provider='minimax',
|
||||||
|
supports_native_tools=False,
|
||||||
|
preferred_tool_strategy='json_fallback',
|
||||||
|
)
|
||||||
|
|
||||||
|
async def ainvoke(self, messages):
|
||||||
|
self.calls += 1
|
||||||
|
index = min(self.calls - 1, len(self.responses) - 1)
|
||||||
|
return AIMessage(content=self.responses[index])
|
||||||
|
|
||||||
|
|
||||||
def test_initial_state_sets_structured_continuity_defaults():
|
def test_initial_state_sets_structured_continuity_defaults():
|
||||||
state = initial_state('u1', 'c1')
|
state = initial_state('u1', 'c1')
|
||||||
|
|
||||||
@@ -2047,6 +2063,75 @@ async def test_run_sub_commander_uses_web_search_in_json_fallback(monkeypatch):
|
|||||||
assert result['final_response'] == '我查了外部网页,下面是最新结果摘要。'
|
assert result['final_response'] == '我查了外部网页,下面是最新结果摘要。'
|
||||||
|
|
||||||
|
|
||||||
|
async def test_run_sub_commander_recovers_from_internal_tool_markup_after_tool_round(monkeypatch):
|
||||||
|
fake_llm = InternalMarkupRecoveryLLM([
|
||||||
|
'{"mode":"tool_call","tool_calls":[{"name":"web_search","arguments":{"query":"武汉 介绍","top_k":2}}]}',
|
||||||
|
'我来让知识管理员为你整理武汉的详细介绍。\n\n分发说明:这个问题需要调用知识库信息,由 librarian(知识管理员)处理最合适。\n<minimax:tool_call>\n<invoke name="librarian">\n<parameter name="info_type">city_introduction</parameter>\n<parameter name="parameters">{"city":"武汉","word_count":2000,"language":"zh-CN"}</parameter>\n</invoke>\n</minimax:tool_call>',
|
||||||
|
'武汉是湖北省省会,位于长江与汉江交汇处,是中部重要的交通、科教和工业中心。',
|
||||||
|
])
|
||||||
|
fake_tool = FakeTool('web_search', 'found 2 web results')
|
||||||
|
|
||||||
|
monkeypatch.setattr('app.agents.graph._get_llm_for_state', lambda state: fake_llm)
|
||||||
|
monkeypatch.setitem(
|
||||||
|
__import__('app.agents.graph', fromlist=['SUB_COMMANDER_TOOLSETS']).SUB_COMMANDER_TOOLSETS,
|
||||||
|
'librarian_retrieval',
|
||||||
|
[fake_tool],
|
||||||
|
)
|
||||||
|
|
||||||
|
state = _base_state('请介绍一下武汉', {'provider': 'openai', 'model': 'MiniMax-M2.7-highspeed', 'base_url': 'https://api.minimaxi.com/v1'})
|
||||||
|
state['current_agent'] = AgentRole.LIBRARIAN
|
||||||
|
state['max_retries'] = 1
|
||||||
|
|
||||||
|
result = await _run_sub_commander(
|
||||||
|
state,
|
||||||
|
AgentRole.LIBRARIAN,
|
||||||
|
'manager prompt',
|
||||||
|
'请介绍一下武汉',
|
||||||
|
use_tools=True,
|
||||||
|
summary_target='knowledge_context',
|
||||||
|
)
|
||||||
|
|
||||||
|
assert fake_llm.calls == 3
|
||||||
|
assert fake_tool.invocations == [{'query': '武汉 介绍', 'top_k': 2}]
|
||||||
|
assert result['fallback_parse_error'] is None
|
||||||
|
assert '<invoke name=' not in result['final_response']
|
||||||
|
assert '分发说明' not in result['final_response']
|
||||||
|
assert result['final_response'] == '武汉是湖北省省会,位于长江与汉江交汇处,是中部重要的交通、科教和工业中心。'
|
||||||
|
|
||||||
|
|
||||||
|
async def test_run_sub_commander_falls_back_to_tool_summary_when_internal_markup_persists(monkeypatch):
|
||||||
|
fake_llm = InternalMarkupRecoveryLLM([
|
||||||
|
'{"mode":"tool_call","tool_calls":[{"name":"web_search","arguments":{"query":"武汉 介绍","top_k":2}}]}',
|
||||||
|
'分发说明:交给 librarian。\n<minimax:tool_call><invoke name="librarian"></invoke></minimax:tool_call>',
|
||||||
|
])
|
||||||
|
fake_tool = FakeTool('web_search', 'found 2 web results')
|
||||||
|
|
||||||
|
monkeypatch.setattr('app.agents.graph._get_llm_for_state', lambda state: fake_llm)
|
||||||
|
monkeypatch.setitem(
|
||||||
|
__import__('app.agents.graph', fromlist=['SUB_COMMANDER_TOOLSETS']).SUB_COMMANDER_TOOLSETS,
|
||||||
|
'librarian_retrieval',
|
||||||
|
[fake_tool],
|
||||||
|
)
|
||||||
|
|
||||||
|
state = _base_state('请介绍一下武汉', {'provider': 'openai', 'model': 'MiniMax-M2.7-highspeed', 'base_url': 'https://api.minimaxi.com/v1'})
|
||||||
|
state['current_agent'] = AgentRole.LIBRARIAN
|
||||||
|
state['max_retries'] = 0
|
||||||
|
|
||||||
|
result = await _run_sub_commander(
|
||||||
|
state,
|
||||||
|
AgentRole.LIBRARIAN,
|
||||||
|
'manager prompt',
|
||||||
|
'请介绍一下武汉',
|
||||||
|
use_tools=True,
|
||||||
|
summary_target='knowledge_context',
|
||||||
|
)
|
||||||
|
|
||||||
|
assert fake_llm.calls == 2
|
||||||
|
assert result['fallback_parse_error'] == 'internal_tool_markup'
|
||||||
|
assert result['final_response'] == '我已经完成检索,直接给您可用信息:\n\nfound 2 web results'
|
||||||
|
assert '<invoke name=' not in result['final_response']
|
||||||
|
|
||||||
|
|
||||||
async def test_run_sub_commander_supports_multiple_json_fallback_tool_rounds(monkeypatch):
|
async def test_run_sub_commander_supports_multiple_json_fallback_tool_rounds(monkeypatch):
|
||||||
fake_llm = TripleResponseFallbackLLM([
|
fake_llm = TripleResponseFallbackLLM([
|
||||||
'{"mode":"tool_call","tool_calls":[{"name":"web_search","arguments":{"query":"Jarvis 最新模型更新","top_k":2}}]}',
|
'{"mode":"tool_call","tool_calls":[{"name":"web_search","arguments":{"query":"Jarvis 最新模型更新","top_k":2}}]}',
|
||||||
|
|||||||
138
backend/tests/backend/app/agents/test_result_merge_runtime.py
Normal file
138
backend/tests/backend/app/agents/test_result_merge_runtime.py
Normal file
@@ -0,0 +1,138 @@
|
|||||||
|
import app.agents.graph as graph_module
|
||||||
|
from app.agents.orchestration.result_merge import merge_task_results
|
||||||
|
from app.agents.schemas.task import AgentTask
|
||||||
|
from app.agents.state import AgentRole, initial_state
|
||||||
|
|
||||||
|
|
||||||
|
def test_merge_task_results_marks_conflict_for_distinct_completed_summaries():
|
||||||
|
report = merge_task_results(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"task_id": "task-1",
|
||||||
|
"status": "completed",
|
||||||
|
"summary": "结论 A",
|
||||||
|
"evidence": [{"type": "source"}],
|
||||||
|
"owner_agent_id": "librarian",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"task_id": "task-2",
|
||||||
|
"status": "completed",
|
||||||
|
"summary": "结论 B",
|
||||||
|
"evidence": [{"type": "analysis"}, {"type": "analysis"}],
|
||||||
|
"owner_agent_id": "analyst",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
assert report.status == "conflicted"
|
||||||
|
assert "multiple_distinct_completed_summaries" in report.conflict_flags
|
||||||
|
assert report.resolution_strategy == "rank_by_evidence_count"
|
||||||
|
assert report.resolved_summary == "结论 B"
|
||||||
|
|
||||||
|
|
||||||
|
def test_verify_collaboration_results_persists_merge_and_verification_reports():
|
||||||
|
state = initial_state("u1", "c1")
|
||||||
|
tasks = [
|
||||||
|
AgentTask(
|
||||||
|
task_id="task-1",
|
||||||
|
title="收集证据",
|
||||||
|
role=AgentRole.LIBRARIAN.value,
|
||||||
|
owner_agent_id=AgentRole.LIBRARIAN.value,
|
||||||
|
goal="检索资料",
|
||||||
|
expected_evidence=[{"type": "evidence"}],
|
||||||
|
),
|
||||||
|
AgentTask(
|
||||||
|
task_id="task-2",
|
||||||
|
title="给出分析",
|
||||||
|
role=AgentRole.ANALYST.value,
|
||||||
|
owner_agent_id=AgentRole.ANALYST.value,
|
||||||
|
goal="分析风险",
|
||||||
|
expected_evidence=[{"type": "analysis"}],
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
graph_module._verify_collaboration_results(
|
||||||
|
state,
|
||||||
|
tasks,
|
||||||
|
task_results=[
|
||||||
|
{
|
||||||
|
"task_id": "task-1",
|
||||||
|
"status": "completed",
|
||||||
|
"summary": "证据显示风险中等",
|
||||||
|
"evidence": [{"type": "evidence"}],
|
||||||
|
"owner_agent_id": AgentRole.LIBRARIAN.value,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"task_id": "task-2",
|
||||||
|
"status": "completed",
|
||||||
|
"summary": "证据显示风险中等",
|
||||||
|
"evidence": [{"type": "analysis"}],
|
||||||
|
"owner_agent_id": AgentRole.ANALYST.value,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
assert state["merge_report"] is not None
|
||||||
|
assert state["merge_report"]["status"] == "merged"
|
||||||
|
assert state["verification_report"] is not None
|
||||||
|
assert state["verification_report"]["status"] == "passed"
|
||||||
|
event_types = [item["event_type"] for item in state["event_trace"]]
|
||||||
|
assert "agent.merge.completed" in event_types
|
||||||
|
assert "agent.verify.completed" in event_types
|
||||||
|
|
||||||
|
|
||||||
|
def test_serial_fallback_response_is_used_when_merge_report_requires_fallback():
|
||||||
|
state = initial_state("u1", "c1")
|
||||||
|
tasks = [
|
||||||
|
AgentTask(
|
||||||
|
task_id="task-1",
|
||||||
|
title="收集证据",
|
||||||
|
role=AgentRole.LIBRARIAN.value,
|
||||||
|
owner_agent_id=AgentRole.LIBRARIAN.value,
|
||||||
|
goal="检索资料",
|
||||||
|
expected_evidence=[{"type": "evidence"}],
|
||||||
|
),
|
||||||
|
AgentTask(
|
||||||
|
task_id="task-2",
|
||||||
|
title="给出分析",
|
||||||
|
role=AgentRole.ANALYST.value,
|
||||||
|
owner_agent_id=AgentRole.ANALYST.value,
|
||||||
|
goal="分析风险",
|
||||||
|
expected_evidence=[{"type": "analysis"}],
|
||||||
|
),
|
||||||
|
]
|
||||||
|
state["task_results"] = [
|
||||||
|
{
|
||||||
|
"task_id": "task-1",
|
||||||
|
"status": "completed",
|
||||||
|
"summary": "已确认可用证据",
|
||||||
|
"evidence": [{"type": "evidence"}],
|
||||||
|
"owner_agent_id": AgentRole.LIBRARIAN.value,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"task_id": "task-2",
|
||||||
|
"status": "failed",
|
||||||
|
"summary": "分析失败",
|
||||||
|
"evidence": [{"type": "analysis"}],
|
||||||
|
"owner_agent_id": AgentRole.ANALYST.value,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
state["final_response"] = "原始协作汇总"
|
||||||
|
|
||||||
|
graph_module._verify_collaboration_results(state, tasks, state["task_results"])
|
||||||
|
if state["verification_status"] == "failed" and state["merge_report"]["fallback_used"]:
|
||||||
|
state["final_response"] = graph_module._build_serial_fallback_response(
|
||||||
|
"先查资料再分析",
|
||||||
|
state["task_results"],
|
||||||
|
state["merge_report"],
|
||||||
|
)
|
||||||
|
graph_module._append_event_trace(
|
||||||
|
state,
|
||||||
|
"agent.rollback.triggered",
|
||||||
|
payload={"layer": "collaboration_runtime", "reason": "merge_fallback_used"},
|
||||||
|
severity="warning",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert "切回保守收敛路径" in state["final_response"]
|
||||||
|
event_types = [item["event_type"] for item in state["event_trace"]]
|
||||||
|
assert "agent.rollback.triggered" in event_types
|
||||||
170
backend/tests/backend/app/agents/test_runtime_context.py
Normal file
170
backend/tests/backend/app/agents/test_runtime_context.py
Normal file
@@ -0,0 +1,170 @@
|
|||||||
|
from langchain_core.messages import HumanMessage
|
||||||
|
|
||||||
|
import app.agents.graph as graph_module
|
||||||
|
from app.agents.graph import master_node
|
||||||
|
from app.agents.learning.retrospector import build_session_retrospective
|
||||||
|
from app.agents.schemas.orchestration import (
|
||||||
|
RuntimeRequestContext,
|
||||||
|
assess_parallel_worthiness,
|
||||||
|
render_runtime_request_context_summary,
|
||||||
|
)
|
||||||
|
from app.agents.schemas.skills import SkillShortlistEntry
|
||||||
|
from app.agents.state import initial_state
|
||||||
|
|
||||||
|
|
||||||
|
def test_runtime_request_context_summary_renders_parallel_and_shortlists():
|
||||||
|
worthiness = assess_parallel_worthiness(
|
||||||
|
"先分析需求,再查资料,同时整理成计划",
|
||||||
|
retrospective_count=1,
|
||||||
|
skill_count=2,
|
||||||
|
)
|
||||||
|
context = RuntimeRequestContext(
|
||||||
|
user_id="u1",
|
||||||
|
session_id="c1",
|
||||||
|
query_text="先分析需求,再查资料,同时整理成计划",
|
||||||
|
recalled_memories=["最近偏好结构化输出"],
|
||||||
|
retrospective_shortlist=[
|
||||||
|
{
|
||||||
|
"task_type": "analysis",
|
||||||
|
"summary": "上次先检索再分析,结果更稳。",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
shortlisted_skills=["weekly-planning"],
|
||||||
|
skill_shortlist=[
|
||||||
|
SkillShortlistEntry(
|
||||||
|
skill_name="weekly-planning",
|
||||||
|
score=0.82,
|
||||||
|
rationale="命中计划关键词",
|
||||||
|
injection_mode="summary",
|
||||||
|
)
|
||||||
|
],
|
||||||
|
parallel_worthiness=worthiness,
|
||||||
|
recommended_runtime_mode="collaboration",
|
||||||
|
)
|
||||||
|
|
||||||
|
summary = render_runtime_request_context_summary(context)
|
||||||
|
|
||||||
|
assert "Runtime Request Context" in summary
|
||||||
|
assert "collaboration" in summary
|
||||||
|
assert "weekly-planning" in summary
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_session_retrospective_captures_skill_and_history_context():
|
||||||
|
retrospective = build_session_retrospective(
|
||||||
|
request_id="resp-1",
|
||||||
|
session_id="conv-1",
|
||||||
|
user_query="帮我分析并安排下周任务",
|
||||||
|
state={
|
||||||
|
"execution_mode": "collaboration",
|
||||||
|
"current_agent": "analyst",
|
||||||
|
"verification_status": "passed",
|
||||||
|
"verification_summary": "ok",
|
||||||
|
"final_response": "已经给出建议",
|
||||||
|
"skill_shortlist": [{"skill_name": "weekly-planning"}],
|
||||||
|
"event_trace": [{"event_type": "agent.execution.decided", "agent_id": "master"}],
|
||||||
|
"verification_evidence": [{"type": "verification"}],
|
||||||
|
"completed_tasks": [{"task_id": "t1", "title": "收集信息", "status": "completed"}],
|
||||||
|
"retrospective_shortlist": [{"summary": "上次周计划拆解有效"}],
|
||||||
|
"parallel_worthiness": {"score": 0.6},
|
||||||
|
},
|
||||||
|
runtime_context={
|
||||||
|
"user_id": "u1",
|
||||||
|
"recommended_runtime_mode": "collaboration",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert retrospective.user_id == "u1"
|
||||||
|
assert retrospective.execution_mode == "collaboration"
|
||||||
|
assert retrospective.used_skill_names == ["weekly-planning"]
|
||||||
|
assert retrospective.context_snapshot["retrospective_shortlist_count"] == 1
|
||||||
|
assert retrospective.outcome == "completed"
|
||||||
|
|
||||||
|
|
||||||
|
async def test_master_node_records_execution_decision_and_skill_shortlist_event():
|
||||||
|
state = initial_state("u1", "c1")
|
||||||
|
state["messages"] = [HumanMessage(content="帮我查一下资料并分析重点")]
|
||||||
|
state["skill_shortlist"] = [
|
||||||
|
{
|
||||||
|
"skill_name": "research-synthesis",
|
||||||
|
"score": 0.73,
|
||||||
|
"injection_mode": "summary",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
state["runtime_request_context"] = {
|
||||||
|
"request_id": "req-1",
|
||||||
|
"recommended_runtime_mode": "direct",
|
||||||
|
"parallel_worthiness": {
|
||||||
|
"preferred_mode": "direct",
|
||||||
|
"score": 0.2,
|
||||||
|
"estimated_subtasks": 1,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
state["task_graph"] = {
|
||||||
|
"graph_id": "graph-1",
|
||||||
|
"nodes": [{"node_id": "task-1", "title": "收集证据", "role": "librarian"}],
|
||||||
|
"entry_node_ids": ["task-1"],
|
||||||
|
"max_parallelism": 1,
|
||||||
|
}
|
||||||
|
|
||||||
|
result = await master_node(state)
|
||||||
|
|
||||||
|
assert result["execution_decision"] is not None
|
||||||
|
event_types = [item["event_type"] for item in result["event_trace"]]
|
||||||
|
assert "agent.parallel.assessed" in event_types
|
||||||
|
assert "agent.skill.shortlisted" in event_types
|
||||||
|
assert "agent.task_graph.built" in event_types
|
||||||
|
assert "agent.execution.decided" in event_types
|
||||||
|
|
||||||
|
|
||||||
|
async def test_master_node_records_rollback_event_when_parallel_task_graph_flag_is_disabled():
|
||||||
|
async def fake_collaboration_flow(state, _user_query):
|
||||||
|
state["execution_mode"] = "collaboration"
|
||||||
|
state["final_response"] = "collaboration skipped in test"
|
||||||
|
return state
|
||||||
|
|
||||||
|
graph_module._run_collaboration_flow = fake_collaboration_flow
|
||||||
|
state = initial_state("u1", "c1")
|
||||||
|
state["messages"] = [HumanMessage(content="先查资料再分析风险再安排计划")]
|
||||||
|
state["feature_flags"] = {"ENABLE_PARALLEL_TASK_GRAPH": False}
|
||||||
|
state["parallel_worthiness"] = {
|
||||||
|
"preferred_mode": "parallel",
|
||||||
|
"score": 0.8,
|
||||||
|
"estimated_subtasks": 3,
|
||||||
|
}
|
||||||
|
state["runtime_request_context"] = {
|
||||||
|
"request_id": "req-2",
|
||||||
|
"user_id": "u1",
|
||||||
|
"session_id": "c1",
|
||||||
|
"recommended_runtime_mode": "collaboration",
|
||||||
|
}
|
||||||
|
|
||||||
|
result = await master_node(state)
|
||||||
|
|
||||||
|
event_types = [item["event_type"] for item in result["event_trace"]]
|
||||||
|
assert "agent.rollback.triggered" in event_types
|
||||||
|
|
||||||
|
|
||||||
|
async def test_master_node_direct_mode_baseline_still_returns_simple_response():
|
||||||
|
state = initial_state("u1", "c1")
|
||||||
|
state["messages"] = [HumanMessage(content="你好")]
|
||||||
|
|
||||||
|
result = await master_node(state)
|
||||||
|
|
||||||
|
assert result["execution_mode"] == "direct"
|
||||||
|
assert result["final_response"] is not None
|
||||||
|
|
||||||
|
|
||||||
|
async def test_master_node_collaboration_mode_baseline_still_respects_complex_request(monkeypatch):
|
||||||
|
async def fake_collaboration_flow(state, _user_query):
|
||||||
|
state["execution_mode"] = "collaboration"
|
||||||
|
state["final_response"] = "collaboration baseline ok"
|
||||||
|
return state
|
||||||
|
|
||||||
|
graph_module._run_collaboration_flow = fake_collaboration_flow
|
||||||
|
state = initial_state("u1", "c1")
|
||||||
|
state["messages"] = [HumanMessage(content="先查资料,再分析风险,再安排计划")]
|
||||||
|
|
||||||
|
result = await master_node(state)
|
||||||
|
|
||||||
|
assert result["execution_mode"] == "collaboration"
|
||||||
|
assert result["final_response"] == "collaboration baseline ok"
|
||||||
66
backend/tests/backend/app/agents/test_scheduler_runtime.py
Normal file
66
backend/tests/backend/app/agents/test_scheduler_runtime.py
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
from langchain_core.messages import HumanMessage
|
||||||
|
|
||||||
|
import app.agents.graph as graph_module
|
||||||
|
from app.agents.orchestration.scheduler import build_subtask_specs
|
||||||
|
from app.agents.orchestration.task_graph import build_bounded_task_graph
|
||||||
|
from app.agents.schemas.orchestration import TaskGraph, assess_parallel_worthiness
|
||||||
|
from app.agents.state import initial_state
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_subtask_specs_keeps_dependencies_and_contract_fields():
|
||||||
|
worthiness = assess_parallel_worthiness(
|
||||||
|
"先查资料、再分析风险、再安排下周计划",
|
||||||
|
retrospective_count=2,
|
||||||
|
skill_count=1,
|
||||||
|
)
|
||||||
|
task_graph = build_bounded_task_graph(
|
||||||
|
query_text="先查资料、再分析风险、再安排下周计划",
|
||||||
|
parallel_worthiness=worthiness,
|
||||||
|
)
|
||||||
|
|
||||||
|
specs = build_subtask_specs(task_graph, query_text="先查资料、再分析风险、再安排下周计划")
|
||||||
|
|
||||||
|
assert specs
|
||||||
|
assert all(spec.parent_run_id == task_graph.graph_id for spec in specs)
|
||||||
|
assert all(isinstance(spec.context_slice, dict) for spec in specs)
|
||||||
|
assert all(spec.expected_output_schema for spec in specs)
|
||||||
|
assert any(spec.dependencies for spec in specs)
|
||||||
|
|
||||||
|
|
||||||
|
async def test_run_collaboration_flow_uses_task_graph_plan_and_records_subtask_events(monkeypatch):
|
||||||
|
async def fake_run_sub_commander(
|
||||||
|
state,
|
||||||
|
assigned_role,
|
||||||
|
_system_prompt,
|
||||||
|
task_goal,
|
||||||
|
**_kwargs,
|
||||||
|
):
|
||||||
|
state["final_response"] = f"{assigned_role.value} handled: {task_goal}"
|
||||||
|
return state
|
||||||
|
|
||||||
|
monkeypatch.setattr(graph_module, "_run_sub_commander", fake_run_sub_commander)
|
||||||
|
|
||||||
|
state = initial_state("u1", "c1")
|
||||||
|
state["messages"] = [HumanMessage(content="先查资料、再分析风险、再安排下周计划")]
|
||||||
|
state["current_datetime_context"] = "CURRENT_TIME: 2026-03-28T12:00:00+08:00"
|
||||||
|
state["task_graph"] = TaskGraph.model_validate(
|
||||||
|
build_bounded_task_graph(
|
||||||
|
query_text="先查资料、再分析风险、再安排下周计划",
|
||||||
|
parallel_worthiness=assess_parallel_worthiness(
|
||||||
|
"先查资料、再分析风险、再安排下周计划",
|
||||||
|
retrospective_count=2,
|
||||||
|
skill_count=1,
|
||||||
|
),
|
||||||
|
).model_dump(mode="json")
|
||||||
|
).model_dump(mode="json")
|
||||||
|
|
||||||
|
result = await graph_module._run_collaboration_flow(
|
||||||
|
state,
|
||||||
|
"先查资料、再分析风险、再安排下周计划",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result["scheduled_subtasks"]
|
||||||
|
event_types = [item["event_type"] for item in result["event_trace"]]
|
||||||
|
assert "agent.subtask.started" in event_types
|
||||||
|
assert "agent.subtask.completed" in event_types
|
||||||
|
assert result["task_results"]
|
||||||
59
backend/tests/backend/app/agents/test_task_graph_runtime.py
Normal file
59
backend/tests/backend/app/agents/test_task_graph_runtime.py
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
from app.agents.orchestration.task_graph import build_bounded_task_graph
|
||||||
|
from app.agents.schemas.orchestration import RuntimeRequestContext, assess_parallel_worthiness, render_runtime_request_context_summary
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_bounded_task_graph_creates_independent_nodes_and_merge_node():
|
||||||
|
worthiness = assess_parallel_worthiness(
|
||||||
|
"先查资料、再分析风险、再安排下周计划",
|
||||||
|
retrospective_count=2,
|
||||||
|
skill_count=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
graph = build_bounded_task_graph(
|
||||||
|
query_text="先查资料、再分析风险、再安排下周计划",
|
||||||
|
parallel_worthiness=worthiness,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert graph is not None
|
||||||
|
assert len(graph.nodes) >= 2
|
||||||
|
assert graph.entry_node_ids
|
||||||
|
assert any(node.execution_mode == "parallel" for node in graph.nodes[:-1])
|
||||||
|
assert graph.nodes[-1].role == "master"
|
||||||
|
|
||||||
|
|
||||||
|
def test_runtime_request_context_summary_renders_task_graph():
|
||||||
|
worthiness = assess_parallel_worthiness(
|
||||||
|
"先查资料、再分析风险、再安排下周计划",
|
||||||
|
retrospective_count=1,
|
||||||
|
skill_count=1,
|
||||||
|
)
|
||||||
|
task_graph = build_bounded_task_graph(
|
||||||
|
query_text="先查资料、再分析风险、再安排下周计划",
|
||||||
|
parallel_worthiness=worthiness,
|
||||||
|
)
|
||||||
|
context = RuntimeRequestContext(
|
||||||
|
user_id="u1",
|
||||||
|
session_id="c1",
|
||||||
|
query_text="先查资料、再分析风险、再安排下周计划",
|
||||||
|
parallel_worthiness=worthiness,
|
||||||
|
task_graph=task_graph,
|
||||||
|
recommended_runtime_mode="collaboration",
|
||||||
|
)
|
||||||
|
|
||||||
|
summary = render_runtime_request_context_summary(context)
|
||||||
|
|
||||||
|
assert "任务图" in summary
|
||||||
|
assert "max_parallelism" in summary
|
||||||
|
|
||||||
|
|
||||||
|
def test_runtime_request_context_summary_renders_assembly_metrics():
|
||||||
|
context = RuntimeRequestContext(
|
||||||
|
user_id="u1",
|
||||||
|
session_id="c1",
|
||||||
|
query_text="帮我分析一下资料",
|
||||||
|
assembly_metrics={"total_ms": 12.3},
|
||||||
|
)
|
||||||
|
|
||||||
|
summary = render_runtime_request_context_summary(context)
|
||||||
|
|
||||||
|
assert "上下文装配耗时" in summary
|
||||||
@@ -503,7 +503,9 @@ async def test_visibility_tools_returns_governance_metadata_and_usage_counts(vis
|
|||||||
payload = response.json()
|
payload = response.json()
|
||||||
assert payload['total_tools'] >= 1
|
assert payload['total_tools'] >= 1
|
||||||
assert payload['used_tools'] >= 1
|
assert payload['used_tools'] >= 1
|
||||||
search_tool = next(item for item in payload['items'] if item['tool_name'] == 'search_web')
|
search_tool = next(
|
||||||
|
item for item in payload['items'] if item['tool_name'] in {'search_web', 'web_search'}
|
||||||
|
)
|
||||||
assert search_tool['permission_class'] == 'external'
|
assert search_tool['permission_class'] == 'external'
|
||||||
assert search_tool['side_effect_scope'] == 'network'
|
assert search_tool['side_effect_scope'] == 'network'
|
||||||
assert search_tool['usage_count'] == 1
|
assert search_tool['usage_count'] == 1
|
||||||
@@ -516,6 +518,26 @@ async def test_visibility_tools_returns_governance_metadata_and_usage_counts(vis
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_visibility_debug_returns_observability_and_learning_views(visibility_env):
|
||||||
|
app, ids = visibility_env
|
||||||
|
transport = ASGITransport(app=app)
|
||||||
|
async with AsyncClient(transport=transport, base_url='http://testserver') as client:
|
||||||
|
response = await client.get(
|
||||||
|
'/api/agents/visibility/debug',
|
||||||
|
params={'conversation_id': ids['conversation_id']},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
payload = response.json()
|
||||||
|
assert payload['conversation_id'] == ids['conversation_id']
|
||||||
|
assert 'observability' in payload
|
||||||
|
assert 'skill_shortlist' in payload
|
||||||
|
assert 'retrospective_shortlist' in payload
|
||||||
|
assert 'recent_retrospectives' in payload
|
||||||
|
assert 'recent_learning_artifacts' in payload
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_visibility_events_reject_invalid_datetime(visibility_env):
|
async def test_visibility_events_reject_invalid_datetime(visibility_env):
|
||||||
app, ids = visibility_env
|
app, ids = visibility_env
|
||||||
|
|||||||
@@ -73,3 +73,25 @@ async def test_list_conversations_succeeds_when_agent_state_column_was_missing(c
|
|||||||
assert len(payload) == 1
|
assert len(payload) == 1
|
||||||
assert payload[0]['title'] == 'Existing conversation'
|
assert payload[0]['title'] == 'Existing conversation'
|
||||||
assert payload[0]['message_count'] == 3
|
assert payload[0]['message_count'] == 3
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_chat_stream_emits_error_event_when_agent_service_fails_before_stream_starts(
|
||||||
|
conversation_env,
|
||||||
|
monkeypatch,
|
||||||
|
):
|
||||||
|
async def fail_chat(*args, **kwargs):
|
||||||
|
raise RuntimeError('stream boot failed')
|
||||||
|
|
||||||
|
monkeypatch.setattr('app.routers.conversation.AgentService.chat', fail_chat)
|
||||||
|
|
||||||
|
transport = ASGITransport(app=conversation_env)
|
||||||
|
async with AsyncClient(transport=transport, base_url='http://testserver') as client:
|
||||||
|
response = await client.post(
|
||||||
|
'/api/conversations/chat/stream',
|
||||||
|
json={'message': 'hello'},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert 'event: error' in response.text
|
||||||
|
assert 'stream boot failed' in response.text
|
||||||
|
|||||||
115
backend/tests/backend/app/test_database_schema_bootstrap.py
Normal file
115
backend/tests/backend/app/test_database_schema_bootstrap.py
Normal file
@@ -0,0 +1,115 @@
|
|||||||
|
import pytest
|
||||||
|
from sqlalchemy import text
|
||||||
|
from sqlalchemy.ext.asyncio import create_async_engine
|
||||||
|
|
||||||
|
from app.database import ensure_learning_artifact_tables, ensure_memory_columns, ensure_skill_columns
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.anyio
|
||||||
|
async def test_ensure_memory_columns_adds_importance_tracking_fields_for_existing_user_memories_table(tmp_path):
|
||||||
|
db_path = tmp_path / 'test_user_memories.db'
|
||||||
|
engine = create_async_engine(f"sqlite+aiosqlite:///{db_path}", future=True)
|
||||||
|
|
||||||
|
async with engine.begin() as conn:
|
||||||
|
await conn.execute(text(
|
||||||
|
'''
|
||||||
|
CREATE TABLE user_memories (
|
||||||
|
id VARCHAR(36) PRIMARY KEY,
|
||||||
|
user_id VARCHAR(36) NOT NULL,
|
||||||
|
memory_type VARCHAR(50) NOT NULL,
|
||||||
|
content TEXT NOT NULL,
|
||||||
|
importance INTEGER,
|
||||||
|
is_recalled BOOLEAN,
|
||||||
|
recall_count INTEGER,
|
||||||
|
source_conversation_id VARCHAR(36),
|
||||||
|
extracted_at DATETIME,
|
||||||
|
last_recalled_at DATETIME,
|
||||||
|
created_at DATETIME,
|
||||||
|
updated_at DATETIME
|
||||||
|
)
|
||||||
|
'''
|
||||||
|
))
|
||||||
|
result = await conn.execute(text("PRAGMA table_info(user_memories)"))
|
||||||
|
columns_before = {row[1] for row in result.fetchall()}
|
||||||
|
assert 'frequency_count' not in columns_before
|
||||||
|
assert 'importance_score' not in columns_before
|
||||||
|
assert 'decay_score' not in columns_before
|
||||||
|
|
||||||
|
await ensure_memory_columns(conn)
|
||||||
|
|
||||||
|
result = await conn.execute(text("PRAGMA table_info(user_memories)"))
|
||||||
|
columns_after = {row[1] for row in result.fetchall()}
|
||||||
|
assert 'frequency_count' in columns_after
|
||||||
|
assert 'emotion_tags' in columns_after
|
||||||
|
assert 'importance_score' in columns_after
|
||||||
|
assert 'importance_level' in columns_after
|
||||||
|
assert 'associated_topics' in columns_after
|
||||||
|
assert 'decay_score' in columns_after
|
||||||
|
assert 'is_archived' in columns_after
|
||||||
|
assert 'last_accessed_at' in columns_after
|
||||||
|
assert 'archive_at' in columns_after
|
||||||
|
|
||||||
|
await engine.dispose()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.anyio
|
||||||
|
async def test_ensure_skill_columns_adds_lifecycle_fields_for_existing_skills_table(tmp_path):
|
||||||
|
db_path = tmp_path / 'test_skills.db'
|
||||||
|
engine = create_async_engine(f"sqlite+aiosqlite:///{db_path}", future=True)
|
||||||
|
|
||||||
|
async with engine.begin() as conn:
|
||||||
|
await conn.execute(text(
|
||||||
|
'''
|
||||||
|
CREATE TABLE skills (
|
||||||
|
id VARCHAR(36) PRIMARY KEY,
|
||||||
|
name VARCHAR(100) NOT NULL,
|
||||||
|
description TEXT,
|
||||||
|
instructions TEXT NOT NULL,
|
||||||
|
agent_type VARCHAR(50) NOT NULL,
|
||||||
|
visibility VARCHAR(20),
|
||||||
|
is_active BOOLEAN,
|
||||||
|
owner_id VARCHAR(36),
|
||||||
|
created_at DATETIME,
|
||||||
|
updated_at DATETIME
|
||||||
|
)
|
||||||
|
'''
|
||||||
|
))
|
||||||
|
result = await conn.execute(text("PRAGMA table_info(skills)"))
|
||||||
|
columns_before = {row[1] for row in result.fetchall()}
|
||||||
|
assert 'status' not in columns_before
|
||||||
|
assert 'effectiveness' not in columns_before
|
||||||
|
|
||||||
|
await ensure_skill_columns(conn)
|
||||||
|
|
||||||
|
result = await conn.execute(text("PRAGMA table_info(skills)"))
|
||||||
|
columns_after = {row[1] for row in result.fetchall()}
|
||||||
|
assert 'status' in columns_after
|
||||||
|
assert 'scope' in columns_after
|
||||||
|
assert 'effectiveness' in columns_after
|
||||||
|
assert 'review_after' in columns_after
|
||||||
|
assert 'activation_count' in columns_after
|
||||||
|
assert 'last_activated_at' in columns_after
|
||||||
|
|
||||||
|
await engine.dispose()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.anyio
|
||||||
|
async def test_ensure_learning_artifact_tables_creates_table_and_indexes(tmp_path):
|
||||||
|
db_path = tmp_path / 'test_learning_artifacts.db'
|
||||||
|
engine = create_async_engine(f"sqlite+aiosqlite:///{db_path}", future=True)
|
||||||
|
|
||||||
|
async with engine.begin() as conn:
|
||||||
|
await ensure_learning_artifact_tables(conn)
|
||||||
|
result = await conn.execute(text("PRAGMA table_info(learning_artifacts)"))
|
||||||
|
columns = {row[1] for row in result.fetchall()}
|
||||||
|
assert 'artifact_type' in columns
|
||||||
|
assert 'artifact_key' in columns
|
||||||
|
assert 'summary_text' in columns
|
||||||
|
assert 'payload' in columns
|
||||||
|
|
||||||
|
indexes = await conn.execute(text("PRAGMA index_list(learning_artifacts)"))
|
||||||
|
index_names = {row[1] for row in indexes.fetchall()}
|
||||||
|
assert 'ix_learning_artifacts_user_id' in index_names
|
||||||
|
assert 'ix_learning_artifacts_artifact_type' in index_names
|
||||||
|
|
||||||
|
await engine.dispose()
|
||||||
@@ -54,6 +54,9 @@ async def skill_env(tmp_path, monkeypatch):
|
|||||||
required_context=[],
|
required_context=[],
|
||||||
visibility='private',
|
visibility='private',
|
||||||
is_active=True,
|
is_active=True,
|
||||||
|
status='active',
|
||||||
|
scope=['schedule_planner'],
|
||||||
|
effectiveness=0.88,
|
||||||
owner_id=user.id,
|
owner_id=user.id,
|
||||||
),
|
),
|
||||||
Skill(
|
Skill(
|
||||||
@@ -65,6 +68,9 @@ async def skill_env(tmp_path, monkeypatch):
|
|||||||
required_context=[],
|
required_context=[],
|
||||||
visibility='private',
|
visibility='private',
|
||||||
is_active=True,
|
is_active=True,
|
||||||
|
status='shadow',
|
||||||
|
scope=['executor'],
|
||||||
|
effectiveness=0.41,
|
||||||
owner_id=user.id,
|
owner_id=user.id,
|
||||||
),
|
),
|
||||||
Skill(
|
Skill(
|
||||||
@@ -76,6 +82,8 @@ async def skill_env(tmp_path, monkeypatch):
|
|||||||
required_context=[],
|
required_context=[],
|
||||||
visibility='private',
|
visibility='private',
|
||||||
is_active=True,
|
is_active=True,
|
||||||
|
status='active',
|
||||||
|
scope=['schedule_planner'],
|
||||||
owner_id=other_user.id,
|
owner_id=other_user.id,
|
||||||
),
|
),
|
||||||
])
|
])
|
||||||
@@ -188,3 +196,9 @@ async def test_list_skills_without_agent_type_returns_current_user_skills(skill_
|
|||||||
assert all(isinstance(item['updated_at'], str) for item in payload)
|
assert all(isinstance(item['updated_at'], str) for item in payload)
|
||||||
assert all('is_builtin' in item for item in payload)
|
assert all('is_builtin' in item for item in payload)
|
||||||
assert all(item['is_builtin'] is False for item in payload)
|
assert all(item['is_builtin'] is False for item in payload)
|
||||||
|
assert all('status' in item for item in payload)
|
||||||
|
assert all('scope' in item for item in payload)
|
||||||
|
assert any(item['status'] == 'shadow' for item in payload)
|
||||||
|
executor = next(item for item in payload if item['name'] == 'Executor skill')
|
||||||
|
assert executor['scope'] == ['executor']
|
||||||
|
assert executor['effectiveness'] == 0.41
|
||||||
|
|||||||
130
backend/tests/backend/app/test_system_router.py
Normal file
130
backend/tests/backend/app/test_system_router.py
Normal file
@@ -0,0 +1,130 @@
|
|||||||
|
import httpx
|
||||||
|
import pytest
|
||||||
|
from httpx import ASGITransport, AsyncClient
|
||||||
|
|
||||||
|
from app.main import app
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_system_config_returns_location_and_weather(monkeypatch):
|
||||||
|
async def fake_get_config(self):
|
||||||
|
return {
|
||||||
|
'location': 'wuhan',
|
||||||
|
'weather_code': 3,
|
||||||
|
'weather_summary': 'Overcast 22°C',
|
||||||
|
}
|
||||||
|
|
||||||
|
monkeypatch.setattr('app.routers.system.SystemService.get_config', fake_get_config)
|
||||||
|
transport = ASGITransport(app=app)
|
||||||
|
|
||||||
|
async with AsyncClient(transport=transport, base_url='http://testserver') as client:
|
||||||
|
response = await client.get('/api/system/config')
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert response.json() == {
|
||||||
|
'location': 'wuhan',
|
||||||
|
'weather_code': 3,
|
||||||
|
'weather_summary': 'Overcast 22°C',
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_system_config_gracefully_returns_unavailable_weather(monkeypatch):
|
||||||
|
async def fake_get_config(self):
|
||||||
|
return {
|
||||||
|
'location': 'wuhan',
|
||||||
|
'weather_code': None,
|
||||||
|
'weather_summary': 'Weather unavailable',
|
||||||
|
}
|
||||||
|
|
||||||
|
monkeypatch.setattr('app.routers.system.SystemService.get_config', fake_get_config)
|
||||||
|
transport = ASGITransport(app=app)
|
||||||
|
|
||||||
|
async with AsyncClient(transport=transport, base_url='http://testserver') as client:
|
||||||
|
response = await client.get('/api/system/config')
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert response.json() == {
|
||||||
|
'location': 'wuhan',
|
||||||
|
'weather_code': None,
|
||||||
|
'weather_summary': 'Weather unavailable',
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class FakeWeatherResponse:
|
||||||
|
def __init__(self, payload: dict, status_code: int = 200):
|
||||||
|
self._payload = payload
|
||||||
|
self.status_code = status_code
|
||||||
|
|
||||||
|
def raise_for_status(self):
|
||||||
|
if self.status_code >= 400:
|
||||||
|
raise httpx.HTTPStatusError(
|
||||||
|
'request failed',
|
||||||
|
request=httpx.Request('GET', 'https://wttr.in/wuhan?format=j1'),
|
||||||
|
response=httpx.Response(self.status_code, request=httpx.Request('GET', 'https://wttr.in/wuhan?format=j1')),
|
||||||
|
)
|
||||||
|
|
||||||
|
def json(self):
|
||||||
|
return self._payload
|
||||||
|
|
||||||
|
|
||||||
|
class FakeAsyncClient:
|
||||||
|
def __init__(self, *, response=None, error=None, **kwargs):
|
||||||
|
self._response = response
|
||||||
|
self._error = error
|
||||||
|
|
||||||
|
async def __aenter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
async def __aexit__(self, exc_type, exc, tb):
|
||||||
|
return False
|
||||||
|
|
||||||
|
async def get(self, url, *, params=None):
|
||||||
|
if self._error is not None:
|
||||||
|
raise self._error
|
||||||
|
return self._response
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_system_service_get_config_fetches_weather(monkeypatch):
|
||||||
|
monkeypatch.setattr(
|
||||||
|
'app.services.system_service.httpx.AsyncClient',
|
||||||
|
lambda **kwargs: FakeAsyncClient(
|
||||||
|
response=FakeWeatherResponse({'current_condition': [{'weatherCode': '61', 'temp_C': '18'}]}),
|
||||||
|
**kwargs,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
from app.services.system_service import SystemService
|
||||||
|
|
||||||
|
service = SystemService()
|
||||||
|
monkeypatch.setattr(service._settings, 'LOCATION', 'wuhan')
|
||||||
|
|
||||||
|
payload = await service.get_config()
|
||||||
|
|
||||||
|
assert payload == {
|
||||||
|
'location': 'wuhan',
|
||||||
|
'weather_code': 61,
|
||||||
|
'weather_summary': 'Rain 18°C',
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_system_service_get_config_handles_weather_failure(monkeypatch):
|
||||||
|
monkeypatch.setattr(
|
||||||
|
'app.services.system_service.httpx.AsyncClient',
|
||||||
|
lambda **kwargs: FakeAsyncClient(error=httpx.TimeoutException('timed out'), **kwargs),
|
||||||
|
)
|
||||||
|
|
||||||
|
from app.services.system_service import SystemService
|
||||||
|
|
||||||
|
service = SystemService()
|
||||||
|
monkeypatch.setattr(service._settings, 'LOCATION', 'wuhan')
|
||||||
|
|
||||||
|
payload = await service.get_config()
|
||||||
|
|
||||||
|
assert payload == {
|
||||||
|
'location': 'wuhan',
|
||||||
|
'weather_code': None,
|
||||||
|
'weather_summary': 'Weather unavailable',
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user