From 52fb619084fbf5a8a02bfa0d0946725c07588946 Mon Sep 17 00:00:00 2001
From: "WIN-JHFT4D3SIVT\\caoxiaozhu" <leocaoxiaozhu@163.com>
Date: Wed, 8 Apr 2026 00:12:50 +0800
Subject: [PATCH] test(backend): add tests for orchestration and learning
 runtimes

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
---
 .../tests/backend/app/agents/test_graph.py    |  85 +++++++++
 .../app/agents/test_result_merge_runtime.py   | 138 ++++++++++++++
 .../app/agents/test_runtime_context.py        | 170 ++++++++++++++++++
 .../app/agents/test_scheduler_runtime.py      |  66 +++++++
 .../app/agents/test_task_graph_runtime.py     |  59 ++++++
 .../backend/app/agents/test_visibility_api.py |  24 ++-
 .../backend/app/test_conversation_router.py   |  22 +++
 .../app/test_database_schema_bootstrap.py     | 115 ++++++++++++
 .../tests/backend/app/test_skill_router.py    |  14 ++
 .../tests/backend/app/test_system_router.py   | 130 ++++++++++++++
 10 files changed, 822 insertions(+), 1 deletion(-)
 create mode 100644 backend/tests/backend/app/agents/test_result_merge_runtime.py
 create mode 100644 backend/tests/backend/app/agents/test_runtime_context.py
 create mode 100644 backend/tests/backend/app/agents/test_scheduler_runtime.py
 create mode 100644 backend/tests/backend/app/agents/test_task_graph_runtime.py
 create mode 100644 backend/tests/backend/app/test_database_schema_bootstrap.py
 create mode 100644 backend/tests/backend/app/test_system_router.py
diff --git a/backend/tests/backend/app/agents/test_graph.py b/backend/tests/backend/app/agents/test_graph.py
index 1d955c0..11b0485 100644
--- a/backend/tests/backend/app/agents/test_graph.py
+++ b/backend/tests/backend/app/agents/test_graph.py
@@ -314,6 +314,22 @@ class FailIfCalledLLM:
         raise AssertionError('LLM should not be called for simple greetings')
 
 
+class InternalMarkupRecoveryLLM:
+    def __init__(self, responses: list[str]):
+        self.responses = responses
+        self.calls = 0
+        self._jarvis_provider_capabilities = SimpleNamespace(
+            provider='minimax',
+            supports_native_tools=False,
+            preferred_tool_strategy='json_fallback',
+        )
+
+    async def ainvoke(self, messages):
+        self.calls += 1
+        index = min(self.calls - 1, len(self.responses) - 1)
+        return AIMessage(content=self.responses[index])
+
+
 def test_initial_state_sets_structured_continuity_defaults():
     state = initial_state('u1', 'c1')
 
@@ -2047,6 +2063,75 @@ async def test_run_sub_commander_uses_web_search_in_json_fallback(monkeypatch):
     assert result['final_response'] == '我查了外部网页，下面是最新结果摘要。'
 
 
+async def test_run_sub_commander_recovers_from_internal_tool_markup_after_tool_round(monkeypatch):
+    fake_llm = InternalMarkupRecoveryLLM([
+        '{"mode":"tool_call","tool_calls":[{"name":"web_search","arguments":{"query":"武汉 介绍","top_k":2}}]}',
+        '我来让知识管理员为你整理武汉的详细介绍。\n\n分发说明：这个问题需要调用知识库信息，由 librarian（知识管理员）处理最合适。\n<minimax:tool_call>\n<invoke name="librarian">\n<parameter name="info_type">city_introduction</parameter>\n<parameter name="parameters">{"city":"武汉","word_count":2000,"language":"zh-CN"}</parameter>\n</invoke>\n</minimax:tool_call>',
+        '武汉是湖北省省会，位于长江与汉江交汇处，是中部重要的交通、科教和工业中心。',
+    ])
+    fake_tool = FakeTool('web_search', 'found 2 web results')
+
+    monkeypatch.setattr('app.agents.graph._get_llm_for_state', lambda state: fake_llm)
+    monkeypatch.setitem(
+        __import__('app.agents.graph', fromlist=['SUB_COMMANDER_TOOLSETS']).SUB_COMMANDER_TOOLSETS,
+        'librarian_retrieval',
+        [fake_tool],
+    )
+
+    state = _base_state('请介绍一下武汉', {'provider': 'openai', 'model': 'MiniMax-M2.7-highspeed', 'base_url': 'https://api.minimaxi.com/v1'})
+    state['current_agent'] = AgentRole.LIBRARIAN
+    state['max_retries'] = 1
+
+    result = await _run_sub_commander(
+        state,
+        AgentRole.LIBRARIAN,
+        'manager prompt',
+        '请介绍一下武汉',
+        use_tools=True,
+        summary_target='knowledge_context',
+    )
+
+    assert fake_llm.calls == 3
+    assert fake_tool.invocations == [{'query': '武汉 介绍', 'top_k': 2}]
+    assert result['fallback_parse_error'] is None
+    assert '<invoke name=' not in result['final_response']
+    assert '分发说明' not in result['final_response']
+    assert result['final_response'] == '武汉是湖北省省会，位于长江与汉江交汇处，是中部重要的交通、科教和工业中心。'
+
+
+async def test_run_sub_commander_falls_back_to_tool_summary_when_internal_markup_persists(monkeypatch):
+    fake_llm = InternalMarkupRecoveryLLM([
+        '{"mode":"tool_call","tool_calls":[{"name":"web_search","arguments":{"query":"武汉 介绍","top_k":2}}]}',
+        '分发说明：交给 librarian。\n<minimax:tool_call><invoke name="librarian"></invoke></minimax:tool_call>',
+    ])
+    fake_tool = FakeTool('web_search', 'found 2 web results')
+
+    monkeypatch.setattr('app.agents.graph._get_llm_for_state', lambda state: fake_llm)
+    monkeypatch.setitem(
+        __import__('app.agents.graph', fromlist=['SUB_COMMANDER_TOOLSETS']).SUB_COMMANDER_TOOLSETS,
+        'librarian_retrieval',
+        [fake_tool],
+    )
+
+    state = _base_state('请介绍一下武汉', {'provider': 'openai', 'model': 'MiniMax-M2.7-highspeed', 'base_url': 'https://api.minimaxi.com/v1'})
+    state['current_agent'] = AgentRole.LIBRARIAN
+    state['max_retries'] = 0
+
+    result = await _run_sub_commander(
+        state,
+        AgentRole.LIBRARIAN,
+        'manager prompt',
+        '请介绍一下武汉',
+        use_tools=True,
+        summary_target='knowledge_context',
+    )
+
+    assert fake_llm.calls == 2
+    assert result['fallback_parse_error'] == 'internal_tool_markup'
+    assert result['final_response'] == '我已经完成检索，直接给您可用信息：\n\nfound 2 web results'
+    assert '<invoke name=' not in result['final_response']
+
+
 async def test_run_sub_commander_supports_multiple_json_fallback_tool_rounds(monkeypatch):
     fake_llm = TripleResponseFallbackLLM([
         '{"mode":"tool_call","tool_calls":[{"name":"web_search","arguments":{"query":"Jarvis 最新模型更新","top_k":2}}]}',
diff --git a/backend/tests/backend/app/agents/test_result_merge_runtime.py b/backend/tests/backend/app/agents/test_result_merge_runtime.py
new file mode 100644
index 0000000..777c83b
--- /dev/null
+++ b/backend/tests/backend/app/agents/test_result_merge_runtime.py
@@ -0,0 +1,138 @@
+import app.agents.graph as graph_module
+from app.agents.orchestration.result_merge import merge_task_results
+from app.agents.schemas.task import AgentTask
+from app.agents.state import AgentRole, initial_state
+
+
+def test_merge_task_results_marks_conflict_for_distinct_completed_summaries():
+    report = merge_task_results(
+        [
+            {
+                "task_id": "task-1",
+                "status": "completed",
+                "summary": "结论 A",
+                "evidence": [{"type": "source"}],
+                "owner_agent_id": "librarian",
+            },
+            {
+                "task_id": "task-2",
+                "status": "completed",
+                "summary": "结论 B",
+                "evidence": [{"type": "analysis"}, {"type": "analysis"}],
+                "owner_agent_id": "analyst",
+            },
+        ]
+    )
+
+    assert report.status == "conflicted"
+    assert "multiple_distinct_completed_summaries" in report.conflict_flags
+    assert report.resolution_strategy == "rank_by_evidence_count"
+    assert report.resolved_summary == "结论 B"
+
+
+def test_verify_collaboration_results_persists_merge_and_verification_reports():
+    state = initial_state("u1", "c1")
+    tasks = [
+        AgentTask(
+            task_id="task-1",
+            title="收集证据",
+            role=AgentRole.LIBRARIAN.value,
+            owner_agent_id=AgentRole.LIBRARIAN.value,
+            goal="检索资料",
+            expected_evidence=[{"type": "evidence"}],
+        ),
+        AgentTask(
+            task_id="task-2",
+            title="给出分析",
+            role=AgentRole.ANALYST.value,
+            owner_agent_id=AgentRole.ANALYST.value,
+            goal="分析风险",
+            expected_evidence=[{"type": "analysis"}],
+        ),
+    ]
+
+    graph_module._verify_collaboration_results(
+        state,
+        tasks,
+        task_results=[
+            {
+                "task_id": "task-1",
+                "status": "completed",
+                "summary": "证据显示风险中等",
+                "evidence": [{"type": "evidence"}],
+                "owner_agent_id": AgentRole.LIBRARIAN.value,
+            },
+            {
+                "task_id": "task-2",
+                "status": "completed",
+                "summary": "证据显示风险中等",
+                "evidence": [{"type": "analysis"}],
+                "owner_agent_id": AgentRole.ANALYST.value,
+            },
+        ],
+    )
+
+    assert state["merge_report"] is not None
+    assert state["merge_report"]["status"] == "merged"
+    assert state["verification_report"] is not None
+    assert state["verification_report"]["status"] == "passed"
+    event_types = [item["event_type"] for item in state["event_trace"]]
+    assert "agent.merge.completed" in event_types
+    assert "agent.verify.completed" in event_types
+
+
+def test_serial_fallback_response_is_used_when_merge_report_requires_fallback():
+    state = initial_state("u1", "c1")
+    tasks = [
+        AgentTask(
+            task_id="task-1",
+            title="收集证据",
+            role=AgentRole.LIBRARIAN.value,
+            owner_agent_id=AgentRole.LIBRARIAN.value,
+            goal="检索资料",
+            expected_evidence=[{"type": "evidence"}],
+        ),
+        AgentTask(
+            task_id="task-2",
+            title="给出分析",
+            role=AgentRole.ANALYST.value,
+            owner_agent_id=AgentRole.ANALYST.value,
+            goal="分析风险",
+            expected_evidence=[{"type": "analysis"}],
+        ),
+    ]
+    state["task_results"] = [
+        {
+            "task_id": "task-1",
+            "status": "completed",
+            "summary": "已确认可用证据",
+            "evidence": [{"type": "evidence"}],
+            "owner_agent_id": AgentRole.LIBRARIAN.value,
+        },
+        {
+            "task_id": "task-2",
+            "status": "failed",
+            "summary": "分析失败",
+            "evidence": [{"type": "analysis"}],
+            "owner_agent_id": AgentRole.ANALYST.value,
+        },
+    ]
+    state["final_response"] = "原始协作汇总"
+
+    graph_module._verify_collaboration_results(state, tasks, state["task_results"])
+    if state["verification_status"] == "failed" and state["merge_report"]["fallback_used"]:
+        state["final_response"] = graph_module._build_serial_fallback_response(
+            "先查资料再分析",
+            state["task_results"],
+            state["merge_report"],
+        )
+        graph_module._append_event_trace(
+            state,
+            "agent.rollback.triggered",
+            payload={"layer": "collaboration_runtime", "reason": "merge_fallback_used"},
+            severity="warning",
+        )
+
+    assert "切回保守收敛路径" in state["final_response"]
+    event_types = [item["event_type"] for item in state["event_trace"]]
+    assert "agent.rollback.triggered" in event_types
diff --git a/backend/tests/backend/app/agents/test_runtime_context.py b/backend/tests/backend/app/agents/test_runtime_context.py
new file mode 100644
index 0000000..b18822b
--- /dev/null
+++ b/backend/tests/backend/app/agents/test_runtime_context.py
@@ -0,0 +1,170 @@
+from langchain_core.messages import HumanMessage
+
+import app.agents.graph as graph_module
+from app.agents.graph import master_node
+from app.agents.learning.retrospector import build_session_retrospective
+from app.agents.schemas.orchestration import (
+    RuntimeRequestContext,
+    assess_parallel_worthiness,
+    render_runtime_request_context_summary,
+)
+from app.agents.schemas.skills import SkillShortlistEntry
+from app.agents.state import initial_state
+
+
+def test_runtime_request_context_summary_renders_parallel_and_shortlists():
+    worthiness = assess_parallel_worthiness(
+        "先分析需求，再查资料，同时整理成计划",
+        retrospective_count=1,
+        skill_count=2,
+    )
+    context = RuntimeRequestContext(
+        user_id="u1",
+        session_id="c1",
+        query_text="先分析需求，再查资料，同时整理成计划",
+        recalled_memories=["最近偏好结构化输出"],
+        retrospective_shortlist=[
+            {
+                "task_type": "analysis",
+                "summary": "上次先检索再分析，结果更稳。",
+            }
+        ],
+        shortlisted_skills=["weekly-planning"],
+        skill_shortlist=[
+            SkillShortlistEntry(
+                skill_name="weekly-planning",
+                score=0.82,
+                rationale="命中计划关键词",
+                injection_mode="summary",
+            )
+        ],
+        parallel_worthiness=worthiness,
+        recommended_runtime_mode="collaboration",
+    )
+
+    summary = render_runtime_request_context_summary(context)
+
+    assert "Runtime Request Context" in summary
+    assert "collaboration" in summary
+    assert "weekly-planning" in summary
+
+
+def test_build_session_retrospective_captures_skill_and_history_context():
+    retrospective = build_session_retrospective(
+        request_id="resp-1",
+        session_id="conv-1",
+        user_query="帮我分析并安排下周任务",
+        state={
+            "execution_mode": "collaboration",
+            "current_agent": "analyst",
+            "verification_status": "passed",
+            "verification_summary": "ok",
+            "final_response": "已经给出建议",
+            "skill_shortlist": [{"skill_name": "weekly-planning"}],
+            "event_trace": [{"event_type": "agent.execution.decided", "agent_id": "master"}],
+            "verification_evidence": [{"type": "verification"}],
+            "completed_tasks": [{"task_id": "t1", "title": "收集信息", "status": "completed"}],
+            "retrospective_shortlist": [{"summary": "上次周计划拆解有效"}],
+            "parallel_worthiness": {"score": 0.6},
+        },
+        runtime_context={
+            "user_id": "u1",
+            "recommended_runtime_mode": "collaboration",
+        },
+    )
+
+    assert retrospective.user_id == "u1"
+    assert retrospective.execution_mode == "collaboration"
+    assert retrospective.used_skill_names == ["weekly-planning"]
+    assert retrospective.context_snapshot["retrospective_shortlist_count"] == 1
+    assert retrospective.outcome == "completed"
+
+
+async def test_master_node_records_execution_decision_and_skill_shortlist_event():
+    state = initial_state("u1", "c1")
+    state["messages"] = [HumanMessage(content="帮我查一下资料并分析重点")]
+    state["skill_shortlist"] = [
+        {
+            "skill_name": "research-synthesis",
+            "score": 0.73,
+            "injection_mode": "summary",
+        }
+    ]
+    state["runtime_request_context"] = {
+        "request_id": "req-1",
+        "recommended_runtime_mode": "direct",
+        "parallel_worthiness": {
+            "preferred_mode": "direct",
+            "score": 0.2,
+            "estimated_subtasks": 1,
+        },
+    }
+    state["task_graph"] = {
+        "graph_id": "graph-1",
+        "nodes": [{"node_id": "task-1", "title": "收集证据", "role": "librarian"}],
+        "entry_node_ids": ["task-1"],
+        "max_parallelism": 1,
+    }
+
+    result = await master_node(state)
+
+    assert result["execution_decision"] is not None
+    event_types = [item["event_type"] for item in result["event_trace"]]
+    assert "agent.parallel.assessed" in event_types
+    assert "agent.skill.shortlisted" in event_types
+    assert "agent.task_graph.built" in event_types
+    assert "agent.execution.decided" in event_types
+
+
+async def test_master_node_records_rollback_event_when_parallel_task_graph_flag_is_disabled():
+    async def fake_collaboration_flow(state, _user_query):
+        state["execution_mode"] = "collaboration"
+        state["final_response"] = "collaboration skipped in test"
+        return state
+
+    graph_module._run_collaboration_flow = fake_collaboration_flow
+    state = initial_state("u1", "c1")
+    state["messages"] = [HumanMessage(content="先查资料再分析风险再安排计划")]
+    state["feature_flags"] = {"ENABLE_PARALLEL_TASK_GRAPH": False}
+    state["parallel_worthiness"] = {
+        "preferred_mode": "parallel",
+        "score": 0.8,
+        "estimated_subtasks": 3,
+    }
+    state["runtime_request_context"] = {
+        "request_id": "req-2",
+        "user_id": "u1",
+        "session_id": "c1",
+        "recommended_runtime_mode": "collaboration",
+    }
+
+    result = await master_node(state)
+
+    event_types = [item["event_type"] for item in result["event_trace"]]
+    assert "agent.rollback.triggered" in event_types
+
+
+async def test_master_node_direct_mode_baseline_still_returns_simple_response():
+    state = initial_state("u1", "c1")
+    state["messages"] = [HumanMessage(content="你好")]
+
+    result = await master_node(state)
+
+    assert result["execution_mode"] == "direct"
+    assert result["final_response"] is not None
+
+
+async def test_master_node_collaboration_mode_baseline_still_respects_complex_request(monkeypatch):
+    async def fake_collaboration_flow(state, _user_query):
+        state["execution_mode"] = "collaboration"
+        state["final_response"] = "collaboration baseline ok"
+        return state
+
+    graph_module._run_collaboration_flow = fake_collaboration_flow
+    state = initial_state("u1", "c1")
+    state["messages"] = [HumanMessage(content="先查资料，再分析风险，再安排计划")]
+
+    result = await master_node(state)
+
+    assert result["execution_mode"] == "collaboration"
+    assert result["final_response"] == "collaboration baseline ok"
diff --git a/backend/tests/backend/app/agents/test_scheduler_runtime.py b/backend/tests/backend/app/agents/test_scheduler_runtime.py
new file mode 100644
index 0000000..0c667f4
--- /dev/null
+++ b/backend/tests/backend/app/agents/test_scheduler_runtime.py
@@ -0,0 +1,66 @@
+from langchain_core.messages import HumanMessage
+
+import app.agents.graph as graph_module
+from app.agents.orchestration.scheduler import build_subtask_specs
+from app.agents.orchestration.task_graph import build_bounded_task_graph
+from app.agents.schemas.orchestration import TaskGraph, assess_parallel_worthiness
+from app.agents.state import initial_state
+
+
+def test_build_subtask_specs_keeps_dependencies_and_contract_fields():
+    worthiness = assess_parallel_worthiness(
+        "先查资料、再分析风险、再安排下周计划",
+        retrospective_count=2,
+        skill_count=1,
+    )
+    task_graph = build_bounded_task_graph(
+        query_text="先查资料、再分析风险、再安排下周计划",
+        parallel_worthiness=worthiness,
+    )
+
+    specs = build_subtask_specs(task_graph, query_text="先查资料、再分析风险、再安排下周计划")
+
+    assert specs
+    assert all(spec.parent_run_id == task_graph.graph_id for spec in specs)
+    assert all(isinstance(spec.context_slice, dict) for spec in specs)
+    assert all(spec.expected_output_schema for spec in specs)
+    assert any(spec.dependencies for spec in specs)
+
+
+async def test_run_collaboration_flow_uses_task_graph_plan_and_records_subtask_events(monkeypatch):
+    async def fake_run_sub_commander(
+        state,
+        assigned_role,
+        _system_prompt,
+        task_goal,
+        **_kwargs,
+    ):
+        state["final_response"] = f"{assigned_role.value} handled: {task_goal}"
+        return state
+
+    monkeypatch.setattr(graph_module, "_run_sub_commander", fake_run_sub_commander)
+
+    state = initial_state("u1", "c1")
+    state["messages"] = [HumanMessage(content="先查资料、再分析风险、再安排下周计划")]
+    state["current_datetime_context"] = "CURRENT_TIME: 2026-03-28T12:00:00+08:00"
+    state["task_graph"] = TaskGraph.model_validate(
+        build_bounded_task_graph(
+            query_text="先查资料、再分析风险、再安排下周计划",
+            parallel_worthiness=assess_parallel_worthiness(
+                "先查资料、再分析风险、再安排下周计划",
+                retrospective_count=2,
+                skill_count=1,
+            ),
+        ).model_dump(mode="json")
+    ).model_dump(mode="json")
+
+    result = await graph_module._run_collaboration_flow(
+        state,
+        "先查资料、再分析风险、再安排下周计划",
+    )
+
+    assert result["scheduled_subtasks"]
+    event_types = [item["event_type"] for item in result["event_trace"]]
+    assert "agent.subtask.started" in event_types
+    assert "agent.subtask.completed" in event_types
+    assert result["task_results"]
diff --git a/backend/tests/backend/app/agents/test_task_graph_runtime.py b/backend/tests/backend/app/agents/test_task_graph_runtime.py
new file mode 100644
index 0000000..b3abed6
--- /dev/null
+++ b/backend/tests/backend/app/agents/test_task_graph_runtime.py
@@ -0,0 +1,59 @@
+from app.agents.orchestration.task_graph import build_bounded_task_graph
+from app.agents.schemas.orchestration import RuntimeRequestContext, assess_parallel_worthiness, render_runtime_request_context_summary
+
+
+def test_build_bounded_task_graph_creates_independent_nodes_and_merge_node():
+    worthiness = assess_parallel_worthiness(
+        "先查资料、再分析风险、再安排下周计划",
+        retrospective_count=2,
+        skill_count=1,
+    )
+
+    graph = build_bounded_task_graph(
+        query_text="先查资料、再分析风险、再安排下周计划",
+        parallel_worthiness=worthiness,
+    )
+
+    assert graph is not None
+    assert len(graph.nodes) >= 2
+    assert graph.entry_node_ids
+    assert any(node.execution_mode == "parallel" for node in graph.nodes[:-1])
+    assert graph.nodes[-1].role == "master"
+
+
+def test_runtime_request_context_summary_renders_task_graph():
+    worthiness = assess_parallel_worthiness(
+        "先查资料、再分析风险、再安排下周计划",
+        retrospective_count=1,
+        skill_count=1,
+    )
+    task_graph = build_bounded_task_graph(
+        query_text="先查资料、再分析风险、再安排下周计划",
+        parallel_worthiness=worthiness,
+    )
+    context = RuntimeRequestContext(
+        user_id="u1",
+        session_id="c1",
+        query_text="先查资料、再分析风险、再安排下周计划",
+        parallel_worthiness=worthiness,
+        task_graph=task_graph,
+        recommended_runtime_mode="collaboration",
+    )
+
+    summary = render_runtime_request_context_summary(context)
+
+    assert "任务图" in summary
+    assert "max_parallelism" in summary
+
+
+def test_runtime_request_context_summary_renders_assembly_metrics():
+    context = RuntimeRequestContext(
+        user_id="u1",
+        session_id="c1",
+        query_text="帮我分析一下资料",
+        assembly_metrics={"total_ms": 12.3},
+    )
+
+    summary = render_runtime_request_context_summary(context)
+
+    assert "上下文装配耗时" in summary
diff --git a/backend/tests/backend/app/agents/test_visibility_api.py b/backend/tests/backend/app/agents/test_visibility_api.py
index 6775d43..6ea94a6 100644
--- a/backend/tests/backend/app/agents/test_visibility_api.py
+++ b/backend/tests/backend/app/agents/test_visibility_api.py
@@ -503,7 +503,9 @@ async def test_visibility_tools_returns_governance_metadata_and_usage_counts(vis
     payload = response.json()
     assert payload['total_tools'] >= 1
     assert payload['used_tools'] >= 1
-    search_tool = next(item for item in payload['items'] if item['tool_name'] == 'search_web')
+    search_tool = next(
+        item for item in payload['items'] if item['tool_name'] in {'search_web', 'web_search'}
+    )
     assert search_tool['permission_class'] == 'external'
     assert search_tool['side_effect_scope'] == 'network'
     assert search_tool['usage_count'] == 1
@@ -516,6 +518,26 @@ async def test_visibility_tools_returns_governance_metadata_and_usage_counts(vis
     ]
 
 
+@pytest.mark.asyncio
+async def test_visibility_debug_returns_observability_and_learning_views(visibility_env):
+    app, ids = visibility_env
+    transport = ASGITransport(app=app)
+    async with AsyncClient(transport=transport, base_url='http://testserver') as client:
+        response = await client.get(
+            '/api/agents/visibility/debug',
+            params={'conversation_id': ids['conversation_id']},
+        )
+
+    assert response.status_code == 200
+    payload = response.json()
+    assert payload['conversation_id'] == ids['conversation_id']
+    assert 'observability' in payload
+    assert 'skill_shortlist' in payload
+    assert 'retrospective_shortlist' in payload
+    assert 'recent_retrospectives' in payload
+    assert 'recent_learning_artifacts' in payload
+
+
 @pytest.mark.asyncio
 async def test_visibility_events_reject_invalid_datetime(visibility_env):
     app, ids = visibility_env
diff --git a/backend/tests/backend/app/test_conversation_router.py b/backend/tests/backend/app/test_conversation_router.py
index f157beb..49afa16 100644
--- a/backend/tests/backend/app/test_conversation_router.py
+++ b/backend/tests/backend/app/test_conversation_router.py
@@ -73,3 +73,25 @@ async def test_list_conversations_succeeds_when_agent_state_column_was_missing(c
     assert len(payload) == 1
     assert payload[0]['title'] == 'Existing conversation'
     assert payload[0]['message_count'] == 3
+
+
+@pytest.mark.asyncio
+async def test_chat_stream_emits_error_event_when_agent_service_fails_before_stream_starts(
+    conversation_env,
+    monkeypatch,
+):
+    async def fail_chat(*args, **kwargs):
+        raise RuntimeError('stream boot failed')
+
+    monkeypatch.setattr('app.routers.conversation.AgentService.chat', fail_chat)
+
+    transport = ASGITransport(app=conversation_env)
+    async with AsyncClient(transport=transport, base_url='http://testserver') as client:
+        response = await client.post(
+            '/api/conversations/chat/stream',
+            json={'message': 'hello'},
+        )
+
+    assert response.status_code == 200
+    assert 'event: error' in response.text
+    assert 'stream boot failed' in response.text
diff --git a/backend/tests/backend/app/test_database_schema_bootstrap.py b/backend/tests/backend/app/test_database_schema_bootstrap.py
new file mode 100644
index 0000000..a8ff6d7
--- /dev/null
+++ b/backend/tests/backend/app/test_database_schema_bootstrap.py
@@ -0,0 +1,115 @@
+import pytest
+from sqlalchemy import text
+from sqlalchemy.ext.asyncio import create_async_engine
+
+from app.database import ensure_learning_artifact_tables, ensure_memory_columns, ensure_skill_columns
+
+
+@pytest.mark.anyio
+async def test_ensure_memory_columns_adds_importance_tracking_fields_for_existing_user_memories_table(tmp_path):
+    db_path = tmp_path / 'test_user_memories.db'
+    engine = create_async_engine(f"sqlite+aiosqlite:///{db_path}", future=True)
+
+    async with engine.begin() as conn:
+        await conn.execute(text(
+            '''
+            CREATE TABLE user_memories (
+                id VARCHAR(36) PRIMARY KEY,
+                user_id VARCHAR(36) NOT NULL,
+                memory_type VARCHAR(50) NOT NULL,
+                content TEXT NOT NULL,
+                importance INTEGER,
+                is_recalled BOOLEAN,
+                recall_count INTEGER,
+                source_conversation_id VARCHAR(36),
+                extracted_at DATETIME,
+                last_recalled_at DATETIME,
+                created_at DATETIME,
+                updated_at DATETIME
+            )
+            '''
+        ))
+        result = await conn.execute(text("PRAGMA table_info(user_memories)"))
+        columns_before = {row[1] for row in result.fetchall()}
+        assert 'frequency_count' not in columns_before
+        assert 'importance_score' not in columns_before
+        assert 'decay_score' not in columns_before
+
+        await ensure_memory_columns(conn)
+
+        result = await conn.execute(text("PRAGMA table_info(user_memories)"))
+        columns_after = {row[1] for row in result.fetchall()}
+        assert 'frequency_count' in columns_after
+        assert 'emotion_tags' in columns_after
+        assert 'importance_score' in columns_after
+        assert 'importance_level' in columns_after
+        assert 'associated_topics' in columns_after
+        assert 'decay_score' in columns_after
+        assert 'is_archived' in columns_after
+        assert 'last_accessed_at' in columns_after
+        assert 'archive_at' in columns_after
+
+    await engine.dispose()
+
+
+@pytest.mark.anyio
+async def test_ensure_skill_columns_adds_lifecycle_fields_for_existing_skills_table(tmp_path):
+    db_path = tmp_path / 'test_skills.db'
+    engine = create_async_engine(f"sqlite+aiosqlite:///{db_path}", future=True)
+
+    async with engine.begin() as conn:
+        await conn.execute(text(
+            '''
+            CREATE TABLE skills (
+                id VARCHAR(36) PRIMARY KEY,
+                name VARCHAR(100) NOT NULL,
+                description TEXT,
+                instructions TEXT NOT NULL,
+                agent_type VARCHAR(50) NOT NULL,
+                visibility VARCHAR(20),
+                is_active BOOLEAN,
+                owner_id VARCHAR(36),
+                created_at DATETIME,
+                updated_at DATETIME
+            )
+            '''
+        ))
+        result = await conn.execute(text("PRAGMA table_info(skills)"))
+        columns_before = {row[1] for row in result.fetchall()}
+        assert 'status' not in columns_before
+        assert 'effectiveness' not in columns_before
+
+        await ensure_skill_columns(conn)
+
+        result = await conn.execute(text("PRAGMA table_info(skills)"))
+        columns_after = {row[1] for row in result.fetchall()}
+        assert 'status' in columns_after
+        assert 'scope' in columns_after
+        assert 'effectiveness' in columns_after
+        assert 'review_after' in columns_after
+        assert 'activation_count' in columns_after
+        assert 'last_activated_at' in columns_after
+
+    await engine.dispose()
+
+
+@pytest.mark.anyio
+async def test_ensure_learning_artifact_tables_creates_table_and_indexes(tmp_path):
+    db_path = tmp_path / 'test_learning_artifacts.db'
+    engine = create_async_engine(f"sqlite+aiosqlite:///{db_path}", future=True)
+
+    async with engine.begin() as conn:
+        await ensure_learning_artifact_tables(conn)
+        result = await conn.execute(text("PRAGMA table_info(learning_artifacts)"))
+        columns = {row[1] for row in result.fetchall()}
+        assert 'artifact_type' in columns
+        assert 'artifact_key' in columns
+        assert 'summary_text' in columns
+        assert 'payload' in columns
+
+        indexes = await conn.execute(text("PRAGMA index_list(learning_artifacts)"))
+        index_names = {row[1] for row in indexes.fetchall()}
+        assert 'ix_learning_artifacts_user_id' in index_names
+        assert 'ix_learning_artifacts_artifact_type' in index_names
+
+    await engine.dispose()
diff --git a/backend/tests/backend/app/test_skill_router.py b/backend/tests/backend/app/test_skill_router.py
index 617ecc5..c621e3d 100644
--- a/backend/tests/backend/app/test_skill_router.py
+++ b/backend/tests/backend/app/test_skill_router.py
@@ -54,6 +54,9 @@ async def skill_env(tmp_path, monkeypatch):
                 required_context=[],
                 visibility='private',
                 is_active=True,
+                status='active',
+                scope=['schedule_planner'],
+                effectiveness=0.88,
                 owner_id=user.id,
             ),
             Skill(
@@ -65,6 +68,9 @@ async def skill_env(tmp_path, monkeypatch):
                 required_context=[],
                 visibility='private',
                 is_active=True,
+                status='shadow',
+                scope=['executor'],
+                effectiveness=0.41,
                 owner_id=user.id,
             ),
             Skill(
@@ -76,6 +82,8 @@ async def skill_env(tmp_path, monkeypatch):
                 required_context=[],
                 visibility='private',
                 is_active=True,
+                status='active',
+                scope=['schedule_planner'],
                 owner_id=other_user.id,
             ),
         ])
@@ -188,3 +196,9 @@ async def test_list_skills_without_agent_type_returns_current_user_skills(skill_
     assert all(isinstance(item['updated_at'], str) for item in payload)
     assert all('is_builtin' in item for item in payload)
     assert all(item['is_builtin'] is False for item in payload)
+    assert all('status' in item for item in payload)
+    assert all('scope' in item for item in payload)
+    assert any(item['status'] == 'shadow' for item in payload)
+    executor = next(item for item in payload if item['name'] == 'Executor skill')
+    assert executor['scope'] == ['executor']
+    assert executor['effectiveness'] == 0.41
diff --git a/backend/tests/backend/app/test_system_router.py b/backend/tests/backend/app/test_system_router.py
new file mode 100644
index 0000000..7679866
--- /dev/null
+++ b/backend/tests/backend/app/test_system_router.py
@@ -0,0 +1,130 @@
+import httpx
+import pytest
+from httpx import ASGITransport, AsyncClient
+
+from app.main import app
+
+
+@pytest.mark.asyncio
+async def test_system_config_returns_location_and_weather(monkeypatch):
+    async def fake_get_config(self):
+        return {
+            'location': 'wuhan',
+            'weather_code': 3,
+            'weather_summary': 'Overcast 22°C',
+        }
+
+    monkeypatch.setattr('app.routers.system.SystemService.get_config', fake_get_config)
+    transport = ASGITransport(app=app)
+
+    async with AsyncClient(transport=transport, base_url='http://testserver') as client:
+        response = await client.get('/api/system/config')
+
+    assert response.status_code == 200
+    assert response.json() == {
+        'location': 'wuhan',
+        'weather_code': 3,
+        'weather_summary': 'Overcast 22°C',
+    }
+
+
+@pytest.mark.asyncio
+async def test_system_config_gracefully_returns_unavailable_weather(monkeypatch):
+    async def fake_get_config(self):
+        return {
+            'location': 'wuhan',
+            'weather_code': None,
+            'weather_summary': 'Weather unavailable',
+        }
+
+    monkeypatch.setattr('app.routers.system.SystemService.get_config', fake_get_config)
+    transport = ASGITransport(app=app)
+
+    async with AsyncClient(transport=transport, base_url='http://testserver') as client:
+        response = await client.get('/api/system/config')
+
+    assert response.status_code == 200
+    assert response.json() == {
+        'location': 'wuhan',
+        'weather_code': None,
+        'weather_summary': 'Weather unavailable',
+    }
+
+
+class FakeWeatherResponse:
+    def __init__(self, payload: dict, status_code: int = 200):
+        self._payload = payload
+        self.status_code = status_code
+
+    def raise_for_status(self):
+        if self.status_code >= 400:
+            raise httpx.HTTPStatusError(
+                'request failed',
+                request=httpx.Request('GET', 'https://wttr.in/wuhan?format=j1'),
+                response=httpx.Response(self.status_code, request=httpx.Request('GET', 'https://wttr.in/wuhan?format=j1')),
+            )
+
+    def json(self):
+        return self._payload
+
+
+class FakeAsyncClient:
+    def __init__(self, *, response=None, error=None, **kwargs):
+        self._response = response
+        self._error = error
+
+    async def __aenter__(self):
+        return self
+
+    async def __aexit__(self, exc_type, exc, tb):
+        return False
+
+    async def get(self, url, *, params=None):
+        if self._error is not None:
+            raise self._error
+        return self._response
+
+
+@pytest.mark.asyncio
+async def test_system_service_get_config_fetches_weather(monkeypatch):
+    monkeypatch.setattr(
+        'app.services.system_service.httpx.AsyncClient',
+        lambda **kwargs: FakeAsyncClient(
+            response=FakeWeatherResponse({'current_condition': [{'weatherCode': '61', 'temp_C': '18'}]}),
+            **kwargs,
+        ),
+    )
+
+    from app.services.system_service import SystemService
+
+    service = SystemService()
+    monkeypatch.setattr(service._settings, 'LOCATION', 'wuhan')
+
+    payload = await service.get_config()
+
+    assert payload == {
+        'location': 'wuhan',
+        'weather_code': 61,
+        'weather_summary': 'Rain 18°C',
+    }
+
+
+@pytest.mark.asyncio
+async def test_system_service_get_config_handles_weather_failure(monkeypatch):
+    monkeypatch.setattr(
+        'app.services.system_service.httpx.AsyncClient',
+        lambda **kwargs: FakeAsyncClient(error=httpx.TimeoutException('timed out'), **kwargs),
+    )
+
+    from app.services.system_service import SystemService
+
+    service = SystemService()
+    monkeypatch.setattr(service._settings, 'LOCATION', 'wuhan')
+
+    payload = await service.get_config()
+
+    assert payload == {
+        'location': 'wuhan',
+        'weather_code': None,
+        'weather_summary': 'Weather unavailable',
+    }