feat: add agent visibility APIs and harden runtime verification

Add Day 4 visibility endpoints and response models, strengthen collaboration/task verification behavior, and patch conversation schema startup migration for agent_state compatibility. Extend backend regression coverage for runtime schemas, verifier behavior, visibility APIs, router auth, and legacy conversation list loading.
2026-04-04 00:56:03 +08:00
parent aa0ef0fbea
commit a7b6b5eb90
24 changed files with 2986 additions and 111 deletions
--- a/backend/app/agents/graph.py
+++ b/backend/app/agents/graph.py
--- a/backend/app/agents/prompts.py
+++ b/backend/app/agents/prompts.py
@@ -324,6 +324,25 @@ ANALYST_INSIGHTS_PROMPT = f"""{JARVIS_PERSONA_PROMPT}
 """


+COORDINATOR_SYSTEM_PROMPT = f"""{JARVIS_PERSONA_PROMPT}
+
+你是 Jarvis 的协作协调官，负责把复杂请求收束成最小受控协作，而不是放任系统进入自由 swarm。
+
+## 你的职责:
+- 先判断当前请求是否真的需要拆解；不需要时应明确建议继续走 direct
+- 只有在明显多步骤、跨领域、需要多角色配合时，才拆成 2~4 个子任务
+- 每个子任务必须清晰写出 `title`、`role`、`goal`、`expected_evidence`
+- 角色建议只能来自现有 top-level agent：`schedule_planner`、`librarian`、`analyst`、`executor`
+- 汇总时基于子任务结果回收，不依赖单点硬编码拼接
+
+## 边界:
+- 禁止无限递归拆分
+- 禁止创建新的 runtime agent / worker
+- 禁止把一个简单请求硬拆成多个空泛步骤
+- 如果证据不足、子任务未闭环，必须把风险明确暴露出来
+"""
+
+
 VERIFIER_PROMPT = f"""{JARVIS_PERSONA_PROMPT}

 你是 Jarvis 的验证官，负责对执行结果做最小但明确的核验。
--- a/backend/app/agents/registry/builtins.py
+++ b/backend/app/agents/registry/builtins.py
@@ -57,6 +57,19 @@ TOP_LEVEL_AGENT_ROUTING_HINTS: dict[str, tuple[str, ...]] = {
    ),
 }

+TOP_LEVEL_AGENT_ALLOWED_SPAWN_ROLES: dict[str, tuple[str, ...]] = {
+    AgentRole.MASTER.value: (
+        AgentRole.SCHEDULE_PLANNER.value,
+        AgentRole.EXECUTOR.value,
+        AgentRole.LIBRARIAN.value,
+        AgentRole.ANALYST.value,
+    ),
+    AgentRole.SCHEDULE_PLANNER.value: (AgentRole.SCHEDULE_PLANNER.value,),
+    AgentRole.EXECUTOR.value: (AgentRole.EXECUTOR.value,),
+    AgentRole.LIBRARIAN.value: (AgentRole.LIBRARIAN.value,),
+    AgentRole.ANALYST.value: (AgentRole.ANALYST.value,),
+}
+
 SUB_COMMANDER_PARENT_AGENT_IDS: dict[str, str] = {
    "schedule_analysis": AgentRole.SCHEDULE_PLANNER.value,
    "schedule_planning": AgentRole.SCHEDULE_PLANNER.value,
@@ -77,6 +90,8 @@ BUILTIN_AGENT_MANIFESTS: tuple[AgentManifest, ...] = tuple(
        system_prompt_key=role.value,
        routing_hints=list(TOP_LEVEL_AGENT_ROUTING_HINTS[role.value]),
        default_sub_commanders=list(TOP_LEVEL_AGENT_DEFAULT_SUB_COMMANDERS[role.value]),
+        can_spawn_children=bool(TOP_LEVEL_AGENT_ALLOWED_SPAWN_ROLES[role.value]),
+        allowed_spawn_role_values=list(TOP_LEVEL_AGENT_ALLOWED_SPAWN_ROLES[role.value]),
        skill_context_key=role.value.replace("agent_", ""),
    )
    for role in AgentRole
--- a/backend/app/agents/registry/indexes.py
+++ b/backend/app/agents/registry/indexes.py
@@ -16,6 +16,7 @@ from app.agents.registry.models import (
@dataclass(frozen=True)
 class RegistryIndexes:
    agent_by_id: Mapping[str, AgentManifest]
+    agent_by_role_value: Mapping[str, AgentManifest]
    sub_commander_by_id: Mapping[str, SubCommanderManifest]
    capability_by_id: Mapping[str, CapabilityManifest]
    specialist_template_by_id: Mapping[str, SpecialistTemplateManifest]
@@ -24,6 +25,7 @@ class RegistryIndexes:
    skill_context_key_by_agent_id: Mapping[str, str]
    capability_id_by_tool_name: Mapping[str, str]
    capability_ids_by_sub_commander_id: Mapping[str, tuple[str, ...]]
+    spawnable_role_values_by_agent_id: Mapping[str, tuple[str, ...]]


 def summarize_registry_indexes(indexes: RegistryIndexes) -> dict[str, int]:
@@ -50,6 +52,9 @@ def build_registry_indexes(bundle: RegistryBundle) -> RegistryIndexes:

    return RegistryIndexes(
        agent_by_id=MappingProxyType(agent_by_id),
+        agent_by_role_value=MappingProxyType({
+            agent.role_value: agent for agent in bundle.agents
+        }),
        sub_commander_by_id=MappingProxyType(sub_commander_by_id),
        capability_by_id=MappingProxyType(capability_by_id),
        specialist_template_by_id=MappingProxyType(specialist_template_by_id),
@@ -73,4 +78,9 @@ def build_registry_indexes(bundle: RegistryBundle) -> RegistryIndexes:
            sub_commander.sub_commander_id: tuple(sub_commander.capability_ids)
            for sub_commander in bundle.sub_commanders
        }),
+        spawnable_role_values_by_agent_id=MappingProxyType({
+            agent.agent_id: tuple(agent.allowed_spawn_role_values)
+            for agent in bundle.agents
+            if agent.can_spawn_children and agent.allowed_spawn_role_values
+        }),
    )
--- a/backend/app/agents/registry/models.py
+++ b/backend/app/agents/registry/models.py
@@ -1,6 +1,6 @@
 from enum import Enum

-from pydantic import BaseModel
+from pydantic import BaseModel, Field


 class PermissionClass(str, Enum):
@@ -23,6 +23,8 @@ class AgentManifest(BaseModel):
    system_prompt_key: str
    routing_hints: list[str]
    default_sub_commanders: list[str]
+    can_spawn_children: bool = False
+    allowed_spawn_role_values: list[str] = Field(default_factory=list)
    skill_context_key: str | None = None
    continuity_policy: str | None = None
    clarification_policy: str | None = None
--- a/backend/app/agents/schemas/init.py
+++ b/backend/app/agents/schemas/init.py
@@ -1,10 +1,25 @@
 from app.agents.schemas.event import AgentEvent
-from app.agents.schemas.task import AgentTask, TaskResult, TaskLifecycleStatus, VerificationStatus
+from app.agents.schemas.message import AgentMessage
+from app.agents.schemas.task import (
+    AgentTask,
+    CollaborationBudget,
+    InterruptRecord,
+    RecoveryRecord,
+    TaskLifecycleStatus,
+    TaskResult,
+    TaskResultStatus,
+    VerificationStatus,
+)

 __all__ = [
    "AgentEvent",
+    "AgentMessage",
    "AgentTask",
+    "CollaborationBudget",
+    "InterruptRecord",
+    "RecoveryRecord",
    "TaskLifecycleStatus",
    "TaskResult",
+    "TaskResultStatus",
    "VerificationStatus",
 ]
--- a/backend/app/agents/schemas/event.py
+++ b/backend/app/agents/schemas/event.py
@@ -11,6 +11,18 @@ AgentEventType = Literal[
    "agent.tool.result",
    "agent.verify.started",
    "agent.verify.completed",
+    "agent.created",
+    "agent.spawn.blocked",
+    "agent.message.sent",
+    "agent.message.received",
+    "agent.interrupt.requested",
+    "agent.interrupt.completed",
+    "agent.recovery.started",
+    "agent.recovery.completed",
+    "agent.task.interrupted",
+    "agent.task.recovered",
+    "agent.task.reassigned",
+    "agent.collaboration.budget.updated",
    "agent.error",
 ]
 AgentEventSeverity = Literal["info", "warning", "error"]
@@ -24,5 +36,11 @@ class AgentEvent(BaseModel):
    agent_id: str | None = None
    sub_commander_id: str | None = None
    task_id: str | None = None
+    parent_task_id: str | None = None
+    child_task_id: str | None = None
+    thread_id: str | None = None
+    message_id: str | None = None
+    interrupt_id: str | None = None
+    recovery_id: str | None = None
    payload: dict[str, Any] = Field(default_factory=dict)
    severity: AgentEventSeverity = "info"
--- a/backend/app/agents/schemas/message.py
+++ b/backend/app/agents/schemas/message.py
@@ -0,0 +1,29 @@
+from __future__ import annotations
+
+from datetime import datetime, timezone
+from typing import Any, Literal
+
+from pydantic import BaseModel, Field
+
+
+AgentMessageType = Literal[
+    "task_request",
+    "task_update",
+    "handoff",
+    "verification_request",
+    "verification_feedback",
+    "interrupt_notice",
+]
+
+
+class AgentMessage(BaseModel):
+    message_id: str
+    thread_id: str
+    from_agent_id: str
+    to_agent_id: str
+    task_id: str | None = None
+    reply_to_message_id: str | None = None
+    message_type: AgentMessageType = "task_update"
+    content_summary: str
+    created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
+    payload: dict[str, Any] = Field(default_factory=dict)
--- a/backend/app/agents/schemas/task.py
+++ b/backend/app/agents/schemas/task.py
@@ -8,6 +8,41 @@ from pydantic import BaseModel, Field

 TaskLifecycleStatus = Literal["pending", "in_progress", "completed", "failed", "blocked"]
 VerificationStatus = Literal["passed", "failed", "skipped"]
+TaskResultStatus = Literal["completed", "failed", "blocked", "passed", "skipped"]
+InterruptStatus = Literal["requested", "acknowledged", "resolved"]
+BudgetMode = Literal["direct", "collaboration"]
+
+
+class InterruptRecord(BaseModel):
+    interrupt_id: str
+    reason: str
+    status: InterruptStatus = "requested"
+    requested_by: str | None = None
+    source_event_id: str | None = None
+    requested_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
+    payload: dict[str, Any] = Field(default_factory=dict)
+
+
+class RecoveryRecord(BaseModel):
+    recovery_id: str
+    source_interrupt_id: str | None = None
+    strategy: str | None = None
+    resumed_from_task_id: str | None = None
+    resumed_from_thread_id: str | None = None
+    recovered_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
+    payload: dict[str, Any] = Field(default_factory=dict)
+
+
+class CollaborationBudget(BaseModel):
+    mode: BudgetMode = "direct"
+    max_parallel_tasks: int | None = None
+    remaining_parallel_tasks: int | None = None
+    max_tool_calls: int | None = None
+    remaining_tool_calls: int | None = None
+    max_iterations: int | None = None
+    remaining_iterations: int | None = None
+    escalation_threshold: int | None = None
+    metadata: dict[str, Any] = Field(default_factory=dict)


 class AgentTask(BaseModel):
@@ -17,8 +52,16 @@ class AgentTask(BaseModel):
    owner_agent_id: str | None = None
    role: str | None = None
    goal: str | None = None
+    parent_task_id: str | None = None
+    child_task_ids: list[str] = Field(default_factory=list)
+    thread_id: str | None = None
+    message_id: str | None = None
+    message_index: int | None = None
    expected_evidence: list[dict[str, Any]] = Field(default_factory=list)
    evidence: list[dict[str, Any]] = Field(default_factory=list)
+    interrupt_records: list[InterruptRecord | dict[str, Any]] = Field(default_factory=list)
+    recovery_records: list[RecoveryRecord | dict[str, Any]] = Field(default_factory=list)
+    collaboration_budget: CollaborationBudget | dict[str, Any] | None = None
    result_summary: str | None = None
    created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
    updated_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
@@ -26,7 +69,17 @@ class AgentTask(BaseModel):

 class TaskResult(BaseModel):
    task_id: str
-    status: VerificationStatus
+    status: TaskResultStatus
    summary: str | None = None
    evidence: list[dict[str, Any]] = Field(default_factory=list)
+    owner_agent_id: str | None = None
+    parent_task_id: str | None = None
+    child_task_ids: list[str] = Field(default_factory=list)
+    thread_id: str | None = None
+    message_id: str | None = None
+    message_index: int | None = None
+    interrupt_records: list[InterruptRecord | dict[str, Any]] = Field(default_factory=list)
+    recovery_records: list[RecoveryRecord | dict[str, Any]] = Field(default_factory=list)
+    budget_snapshot: CollaborationBudget | dict[str, Any] | None = None
+    next_action: str | None = None
    output_data: dict[str, Any] | None = None
--- a/backend/app/agents/state.py
+++ b/backend/app/agents/state.py
@@ -3,8 +3,9 @@ from enum import Enum
 from typing import Annotated, Any, Literal, TypedDict

 from app.agents.schemas.event import AgentEvent
-from app.agents.schemas.task import AgentTask, TaskResult, VerificationStatus
-from langchain_core.messages import BaseMessage
+from app.agents.schemas.message import AgentMessage
+from app.agents.schemas.task import AgentTask, CollaborationBudget, InterruptRecord, RecoveryRecord, TaskResult, VerificationStatus
+from langchain_core.messages import AIMessage, BaseMessage, HumanMessage
 from langgraph.graph.message import add_messages


@@ -24,12 +25,27 @@ class ConversationTurn:
    model: str | None = None


+def turn_to_message(turn: ConversationTurn) -> BaseMessage:
+    if turn.role == "user":
+        return HumanMessage(content=turn.content)
+    return AIMessage(content=turn.content)
+
+
 class AgentState(TypedDict):
    messages: Annotated[list[BaseMessage], add_messages]
    user_id: str
    conversation_id: str
+    parent_conversation_id: str | None
+    thread_id: str | None
+    last_message_id: str | None
+    message_sequence: int
+    agent_id: str | None
+    parent_agent_id: str | None
+    root_agent_id: str | None
+    collaboration_depth: int
+    spawned_agent_ids: list[str]

-    execution_mode: Literal["direct", "delegated", "verified"]
+    execution_mode: Literal["direct", "collaboration", "delegated", "verified"]
    current_agent: str | None
    next_step: str | None
    active_agents: list[AgentRole]
@@ -38,11 +54,16 @@ class AgentState(TypedDict):
    sub_commander_trace: list[dict[str, Any]]
    agent_trace: list[str]
    event_trace: list[AgentEvent | dict[str, Any]]
+    message_trace: list[AgentMessage | dict[str, Any]]

    pending_tasks: list[dict[str, Any]]
    completed_tasks: list[dict[str, Any]]
    active_tasks: list[AgentTask | dict[str, Any]]
    task_results: list[TaskResult | dict[str, Any]]
+    task_hierarchy: dict[str, list[str]]
+    interrupted_tasks: list[InterruptRecord | dict[str, Any]]
+    recovery_trace: list[RecoveryRecord | dict[str, Any]]
+    recovery_points: list[dict[str, Any]]
    tool_calls: list[dict[str, Any]]
    last_tool_result: str | None
    action_results: list[dict[str, Any]]
@@ -54,7 +75,8 @@ class AgentState(TypedDict):
    verification_status: VerificationStatus | None
    verification_summary: str | None
    verification_evidence: list[dict[str, Any]]
-    budget_state: dict[str, Any] | None
+    budget_state: CollaborationBudget | dict[str, Any] | None
+    collaboration_budget_history: list[CollaborationBudget | dict[str, Any]]

    tool_strategy_used: str | None
    tool_round_count: int
@@ -102,6 +124,15 @@ def initial_state(user_id: str, conversation_id: str) -> AgentState:
        messages=[],
        user_id=user_id,
        conversation_id=conversation_id,
+        parent_conversation_id=None,
+        thread_id=None,
+        last_message_id=None,
+        message_sequence=0,
+        agent_id=AgentRole.MASTER.value,
+        parent_agent_id=None,
+        root_agent_id=AgentRole.MASTER.value,
+        collaboration_depth=0,
+        spawned_agent_ids=[],
        execution_mode="direct",
        current_agent=AgentRole.MASTER.value,
        next_step=None,
@@ -111,10 +142,15 @@ def initial_state(user_id: str, conversation_id: str) -> AgentState:
        sub_commander_trace=[],
        agent_trace=[AgentRole.MASTER.value],
        event_trace=[],
+        message_trace=[],
        pending_tasks=[],
        completed_tasks=[],
        active_tasks=[],
        task_results=[],
+        task_hierarchy={},
+        interrupted_tasks=[],
+        recovery_trace=[],
+        recovery_points=[],
        tool_calls=[],
        last_tool_result=None,
        action_results=[],
@@ -126,6 +162,7 @@ def initial_state(user_id: str, conversation_id: str) -> AgentState:
        verification_summary=None,
        verification_evidence=[],
        budget_state=None,
+        collaboration_budget_history=[],
        tool_strategy_used=None,
        tool_round_count=0,
        max_tool_rounds=2,
--- a/backend/app/agents/verifier.py
+++ b/backend/app/agents/verifier.py
@@ -1,10 +1,10 @@
 from __future__ import annotations

-from typing import Any
+from typing import Any, cast

 from pydantic import BaseModel, Field

-from app.agents.schemas.task import AgentTask, TaskResult, VerificationStatus
+from app.agents.schemas.task import AgentTask, TaskResult, TaskResultStatus, VerificationStatus
 from app.agents.state import AgentState


@@ -14,6 +14,34 @@ class VerificationVerdict(BaseModel):
    evidence: list[dict[str, Any]] = Field(default_factory=list)


+def normalize_task_result(
+    task_result: TaskResult | dict[str, Any],
+    *,
+    default_task_id: str | None = None,
+) -> TaskResult:
+    payload = task_result.model_dump(mode="json") if isinstance(task_result, TaskResult) else dict(task_result or {})
+    normalized_status = payload.get("status")
+    if normalized_status not in {"completed", "failed", "blocked", "passed", "skipped"}:
+        normalized_status = "failed"
+    return TaskResult(
+        task_id=str(payload.get("task_id") or default_task_id or "unknown-task"),
+        status=cast(TaskResultStatus, normalized_status),
+        summary=payload.get("summary"),
+        evidence=list(payload.get("evidence") or []),
+        owner_agent_id=payload.get("owner_agent_id"),
+        parent_task_id=payload.get("parent_task_id"),
+        child_task_ids=list(payload.get("child_task_ids") or []),
+        thread_id=payload.get("thread_id"),
+        message_id=payload.get("message_id"),
+        message_index=payload.get("message_index") if isinstance(payload.get("message_index"), int) else None,
+        interrupt_records=list(payload.get("interrupt_records") or []),
+        recovery_records=list(payload.get("recovery_records") or []),
+        budget_snapshot=payload.get("budget_snapshot") if isinstance(payload.get("budget_snapshot"), dict) else None,
+        next_action=payload.get("next_action"),
+        output_data=payload.get("output_data") if isinstance(payload.get("output_data"), dict) else None,
+    )
+
+
 def verify_task_result(
    *,
    task: AgentTask | dict[str, Any] | None = None,
@@ -30,8 +58,13 @@ def verify_task_result(
    if status is not None:
        return VerificationVerdict(status=status, summary=normalized_summary, evidence=normalized_evidence)

-    if normalized_result.get("status") in {"passed", "failed", "skipped"}:
-        inferred_status = normalized_result["status"]
+    normalized_status = normalized_result.get("status")
+    if normalized_status in {"passed", "failed", "skipped"}:
+        inferred_status = normalized_status
+    elif normalized_status == "completed":
+        inferred_status = "passed"
+    elif normalized_status == "blocked":
+        inferred_status = "skipped"
    elif normalized_result.get("success") is True:
        inferred_status = "passed"
    elif normalized_result.get("success") is False:
@@ -57,4 +90,4 @@ def apply_verification_verdict(state: AgentState, verdict: VerificationVerdict)
    return AgentState(**next_state)


-__all__ = ["VerificationVerdict", "apply_verification_verdict", "verify_task_result"]
+__all__ = ["VerificationVerdict", "apply_verification_verdict", "normalize_task_result", "verify_task_result"]