feat: add agent visibility APIs and harden runtime verification

Add Day 4 visibility endpoints and response models, strengthen collaboration/task verification behavior, and patch conversation schema startup migration for agent_state compatibility. Extend backend regression coverage for runtime schemas, verifier behavior, visibility APIs, router auth, and legacy conversation list loading.
This commit is contained in:
2026-04-04 00:56:03 +08:00
parent aa0ef0fbea
commit a7b6b5eb90
24 changed files with 2986 additions and 111 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -324,6 +324,25 @@ ANALYST_INSIGHTS_PROMPT = f"""{JARVIS_PERSONA_PROMPT}
"""
COORDINATOR_SYSTEM_PROMPT = f"""{JARVIS_PERSONA_PROMPT}
你是 Jarvis 的协作协调官,负责把复杂请求收束成最小受控协作,而不是放任系统进入自由 swarm。
## 你的职责:
- 先判断当前请求是否真的需要拆解;不需要时应明确建议继续走 direct
- 只有在明显多步骤、跨领域、需要多角色配合时,才拆成 2~4 个子任务
- 每个子任务必须清晰写出 `title`、`role`、`goal`、`expected_evidence`
- 角色建议只能来自现有 top-level agent`schedule_planner`、`librarian`、`analyst`、`executor`
- 汇总时基于子任务结果回收,不依赖单点硬编码拼接
## 边界:
- 禁止无限递归拆分
- 禁止创建新的 runtime agent / worker
- 禁止把一个简单请求硬拆成多个空泛步骤
- 如果证据不足、子任务未闭环,必须把风险明确暴露出来
"""
VERIFIER_PROMPT = f"""{JARVIS_PERSONA_PROMPT}
你是 Jarvis 的验证官,负责对执行结果做最小但明确的核验。

View File

@@ -57,6 +57,19 @@ TOP_LEVEL_AGENT_ROUTING_HINTS: dict[str, tuple[str, ...]] = {
),
}
TOP_LEVEL_AGENT_ALLOWED_SPAWN_ROLES: dict[str, tuple[str, ...]] = {
AgentRole.MASTER.value: (
AgentRole.SCHEDULE_PLANNER.value,
AgentRole.EXECUTOR.value,
AgentRole.LIBRARIAN.value,
AgentRole.ANALYST.value,
),
AgentRole.SCHEDULE_PLANNER.value: (AgentRole.SCHEDULE_PLANNER.value,),
AgentRole.EXECUTOR.value: (AgentRole.EXECUTOR.value,),
AgentRole.LIBRARIAN.value: (AgentRole.LIBRARIAN.value,),
AgentRole.ANALYST.value: (AgentRole.ANALYST.value,),
}
SUB_COMMANDER_PARENT_AGENT_IDS: dict[str, str] = {
"schedule_analysis": AgentRole.SCHEDULE_PLANNER.value,
"schedule_planning": AgentRole.SCHEDULE_PLANNER.value,
@@ -77,6 +90,8 @@ BUILTIN_AGENT_MANIFESTS: tuple[AgentManifest, ...] = tuple(
system_prompt_key=role.value,
routing_hints=list(TOP_LEVEL_AGENT_ROUTING_HINTS[role.value]),
default_sub_commanders=list(TOP_LEVEL_AGENT_DEFAULT_SUB_COMMANDERS[role.value]),
can_spawn_children=bool(TOP_LEVEL_AGENT_ALLOWED_SPAWN_ROLES[role.value]),
allowed_spawn_role_values=list(TOP_LEVEL_AGENT_ALLOWED_SPAWN_ROLES[role.value]),
skill_context_key=role.value.replace("agent_", ""),
)
for role in AgentRole

View File

@@ -16,6 +16,7 @@ from app.agents.registry.models import (
@dataclass(frozen=True)
class RegistryIndexes:
agent_by_id: Mapping[str, AgentManifest]
agent_by_role_value: Mapping[str, AgentManifest]
sub_commander_by_id: Mapping[str, SubCommanderManifest]
capability_by_id: Mapping[str, CapabilityManifest]
specialist_template_by_id: Mapping[str, SpecialistTemplateManifest]
@@ -24,6 +25,7 @@ class RegistryIndexes:
skill_context_key_by_agent_id: Mapping[str, str]
capability_id_by_tool_name: Mapping[str, str]
capability_ids_by_sub_commander_id: Mapping[str, tuple[str, ...]]
spawnable_role_values_by_agent_id: Mapping[str, tuple[str, ...]]
def summarize_registry_indexes(indexes: RegistryIndexes) -> dict[str, int]:
@@ -50,6 +52,9 @@ def build_registry_indexes(bundle: RegistryBundle) -> RegistryIndexes:
return RegistryIndexes(
agent_by_id=MappingProxyType(agent_by_id),
agent_by_role_value=MappingProxyType({
agent.role_value: agent for agent in bundle.agents
}),
sub_commander_by_id=MappingProxyType(sub_commander_by_id),
capability_by_id=MappingProxyType(capability_by_id),
specialist_template_by_id=MappingProxyType(specialist_template_by_id),
@@ -73,4 +78,9 @@ def build_registry_indexes(bundle: RegistryBundle) -> RegistryIndexes:
sub_commander.sub_commander_id: tuple(sub_commander.capability_ids)
for sub_commander in bundle.sub_commanders
}),
spawnable_role_values_by_agent_id=MappingProxyType({
agent.agent_id: tuple(agent.allowed_spawn_role_values)
for agent in bundle.agents
if agent.can_spawn_children and agent.allowed_spawn_role_values
}),
)

View File

@@ -1,6 +1,6 @@
from enum import Enum
from pydantic import BaseModel
from pydantic import BaseModel, Field
class PermissionClass(str, Enum):
@@ -23,6 +23,8 @@ class AgentManifest(BaseModel):
system_prompt_key: str
routing_hints: list[str]
default_sub_commanders: list[str]
can_spawn_children: bool = False
allowed_spawn_role_values: list[str] = Field(default_factory=list)
skill_context_key: str | None = None
continuity_policy: str | None = None
clarification_policy: str | None = None

View File

@@ -1,10 +1,25 @@
from app.agents.schemas.event import AgentEvent
from app.agents.schemas.task import AgentTask, TaskResult, TaskLifecycleStatus, VerificationStatus
from app.agents.schemas.message import AgentMessage
from app.agents.schemas.task import (
AgentTask,
CollaborationBudget,
InterruptRecord,
RecoveryRecord,
TaskLifecycleStatus,
TaskResult,
TaskResultStatus,
VerificationStatus,
)
__all__ = [
"AgentEvent",
"AgentMessage",
"AgentTask",
"CollaborationBudget",
"InterruptRecord",
"RecoveryRecord",
"TaskLifecycleStatus",
"TaskResult",
"TaskResultStatus",
"VerificationStatus",
]

View File

@@ -11,6 +11,18 @@ AgentEventType = Literal[
"agent.tool.result",
"agent.verify.started",
"agent.verify.completed",
"agent.created",
"agent.spawn.blocked",
"agent.message.sent",
"agent.message.received",
"agent.interrupt.requested",
"agent.interrupt.completed",
"agent.recovery.started",
"agent.recovery.completed",
"agent.task.interrupted",
"agent.task.recovered",
"agent.task.reassigned",
"agent.collaboration.budget.updated",
"agent.error",
]
AgentEventSeverity = Literal["info", "warning", "error"]
@@ -24,5 +36,11 @@ class AgentEvent(BaseModel):
agent_id: str | None = None
sub_commander_id: str | None = None
task_id: str | None = None
parent_task_id: str | None = None
child_task_id: str | None = None
thread_id: str | None = None
message_id: str | None = None
interrupt_id: str | None = None
recovery_id: str | None = None
payload: dict[str, Any] = Field(default_factory=dict)
severity: AgentEventSeverity = "info"

View File

@@ -0,0 +1,29 @@
from __future__ import annotations
from datetime import datetime, timezone
from typing import Any, Literal
from pydantic import BaseModel, Field
AgentMessageType = Literal[
"task_request",
"task_update",
"handoff",
"verification_request",
"verification_feedback",
"interrupt_notice",
]
class AgentMessage(BaseModel):
message_id: str
thread_id: str
from_agent_id: str
to_agent_id: str
task_id: str | None = None
reply_to_message_id: str | None = None
message_type: AgentMessageType = "task_update"
content_summary: str
created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
payload: dict[str, Any] = Field(default_factory=dict)

View File

@@ -8,6 +8,41 @@ from pydantic import BaseModel, Field
TaskLifecycleStatus = Literal["pending", "in_progress", "completed", "failed", "blocked"]
VerificationStatus = Literal["passed", "failed", "skipped"]
TaskResultStatus = Literal["completed", "failed", "blocked", "passed", "skipped"]
InterruptStatus = Literal["requested", "acknowledged", "resolved"]
BudgetMode = Literal["direct", "collaboration"]
class InterruptRecord(BaseModel):
interrupt_id: str
reason: str
status: InterruptStatus = "requested"
requested_by: str | None = None
source_event_id: str | None = None
requested_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
payload: dict[str, Any] = Field(default_factory=dict)
class RecoveryRecord(BaseModel):
recovery_id: str
source_interrupt_id: str | None = None
strategy: str | None = None
resumed_from_task_id: str | None = None
resumed_from_thread_id: str | None = None
recovered_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
payload: dict[str, Any] = Field(default_factory=dict)
class CollaborationBudget(BaseModel):
mode: BudgetMode = "direct"
max_parallel_tasks: int | None = None
remaining_parallel_tasks: int | None = None
max_tool_calls: int | None = None
remaining_tool_calls: int | None = None
max_iterations: int | None = None
remaining_iterations: int | None = None
escalation_threshold: int | None = None
metadata: dict[str, Any] = Field(default_factory=dict)
class AgentTask(BaseModel):
@@ -17,8 +52,16 @@ class AgentTask(BaseModel):
owner_agent_id: str | None = None
role: str | None = None
goal: str | None = None
parent_task_id: str | None = None
child_task_ids: list[str] = Field(default_factory=list)
thread_id: str | None = None
message_id: str | None = None
message_index: int | None = None
expected_evidence: list[dict[str, Any]] = Field(default_factory=list)
evidence: list[dict[str, Any]] = Field(default_factory=list)
interrupt_records: list[InterruptRecord | dict[str, Any]] = Field(default_factory=list)
recovery_records: list[RecoveryRecord | dict[str, Any]] = Field(default_factory=list)
collaboration_budget: CollaborationBudget | dict[str, Any] | None = None
result_summary: str | None = None
created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
updated_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
@@ -26,7 +69,17 @@ class AgentTask(BaseModel):
class TaskResult(BaseModel):
task_id: str
status: VerificationStatus
status: TaskResultStatus
summary: str | None = None
evidence: list[dict[str, Any]] = Field(default_factory=list)
owner_agent_id: str | None = None
parent_task_id: str | None = None
child_task_ids: list[str] = Field(default_factory=list)
thread_id: str | None = None
message_id: str | None = None
message_index: int | None = None
interrupt_records: list[InterruptRecord | dict[str, Any]] = Field(default_factory=list)
recovery_records: list[RecoveryRecord | dict[str, Any]] = Field(default_factory=list)
budget_snapshot: CollaborationBudget | dict[str, Any] | None = None
next_action: str | None = None
output_data: dict[str, Any] | None = None

View File

@@ -3,8 +3,9 @@ from enum import Enum
from typing import Annotated, Any, Literal, TypedDict
from app.agents.schemas.event import AgentEvent
from app.agents.schemas.task import AgentTask, TaskResult, VerificationStatus
from langchain_core.messages import BaseMessage
from app.agents.schemas.message import AgentMessage
from app.agents.schemas.task import AgentTask, CollaborationBudget, InterruptRecord, RecoveryRecord, TaskResult, VerificationStatus
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage
from langgraph.graph.message import add_messages
@@ -24,12 +25,27 @@ class ConversationTurn:
model: str | None = None
def turn_to_message(turn: ConversationTurn) -> BaseMessage:
if turn.role == "user":
return HumanMessage(content=turn.content)
return AIMessage(content=turn.content)
class AgentState(TypedDict):
messages: Annotated[list[BaseMessage], add_messages]
user_id: str
conversation_id: str
parent_conversation_id: str | None
thread_id: str | None
last_message_id: str | None
message_sequence: int
agent_id: str | None
parent_agent_id: str | None
root_agent_id: str | None
collaboration_depth: int
spawned_agent_ids: list[str]
execution_mode: Literal["direct", "delegated", "verified"]
execution_mode: Literal["direct", "collaboration", "delegated", "verified"]
current_agent: str | None
next_step: str | None
active_agents: list[AgentRole]
@@ -38,11 +54,16 @@ class AgentState(TypedDict):
sub_commander_trace: list[dict[str, Any]]
agent_trace: list[str]
event_trace: list[AgentEvent | dict[str, Any]]
message_trace: list[AgentMessage | dict[str, Any]]
pending_tasks: list[dict[str, Any]]
completed_tasks: list[dict[str, Any]]
active_tasks: list[AgentTask | dict[str, Any]]
task_results: list[TaskResult | dict[str, Any]]
task_hierarchy: dict[str, list[str]]
interrupted_tasks: list[InterruptRecord | dict[str, Any]]
recovery_trace: list[RecoveryRecord | dict[str, Any]]
recovery_points: list[dict[str, Any]]
tool_calls: list[dict[str, Any]]
last_tool_result: str | None
action_results: list[dict[str, Any]]
@@ -54,7 +75,8 @@ class AgentState(TypedDict):
verification_status: VerificationStatus | None
verification_summary: str | None
verification_evidence: list[dict[str, Any]]
budget_state: dict[str, Any] | None
budget_state: CollaborationBudget | dict[str, Any] | None
collaboration_budget_history: list[CollaborationBudget | dict[str, Any]]
tool_strategy_used: str | None
tool_round_count: int
@@ -102,6 +124,15 @@ def initial_state(user_id: str, conversation_id: str) -> AgentState:
messages=[],
user_id=user_id,
conversation_id=conversation_id,
parent_conversation_id=None,
thread_id=None,
last_message_id=None,
message_sequence=0,
agent_id=AgentRole.MASTER.value,
parent_agent_id=None,
root_agent_id=AgentRole.MASTER.value,
collaboration_depth=0,
spawned_agent_ids=[],
execution_mode="direct",
current_agent=AgentRole.MASTER.value,
next_step=None,
@@ -111,10 +142,15 @@ def initial_state(user_id: str, conversation_id: str) -> AgentState:
sub_commander_trace=[],
agent_trace=[AgentRole.MASTER.value],
event_trace=[],
message_trace=[],
pending_tasks=[],
completed_tasks=[],
active_tasks=[],
task_results=[],
task_hierarchy={},
interrupted_tasks=[],
recovery_trace=[],
recovery_points=[],
tool_calls=[],
last_tool_result=None,
action_results=[],
@@ -126,6 +162,7 @@ def initial_state(user_id: str, conversation_id: str) -> AgentState:
verification_summary=None,
verification_evidence=[],
budget_state=None,
collaboration_budget_history=[],
tool_strategy_used=None,
tool_round_count=0,
max_tool_rounds=2,

View File

@@ -1,10 +1,10 @@
from __future__ import annotations
from typing import Any
from typing import Any, cast
from pydantic import BaseModel, Field
from app.agents.schemas.task import AgentTask, TaskResult, VerificationStatus
from app.agents.schemas.task import AgentTask, TaskResult, TaskResultStatus, VerificationStatus
from app.agents.state import AgentState
@@ -14,6 +14,34 @@ class VerificationVerdict(BaseModel):
evidence: list[dict[str, Any]] = Field(default_factory=list)
def normalize_task_result(
task_result: TaskResult | dict[str, Any],
*,
default_task_id: str | None = None,
) -> TaskResult:
payload = task_result.model_dump(mode="json") if isinstance(task_result, TaskResult) else dict(task_result or {})
normalized_status = payload.get("status")
if normalized_status not in {"completed", "failed", "blocked", "passed", "skipped"}:
normalized_status = "failed"
return TaskResult(
task_id=str(payload.get("task_id") or default_task_id or "unknown-task"),
status=cast(TaskResultStatus, normalized_status),
summary=payload.get("summary"),
evidence=list(payload.get("evidence") or []),
owner_agent_id=payload.get("owner_agent_id"),
parent_task_id=payload.get("parent_task_id"),
child_task_ids=list(payload.get("child_task_ids") or []),
thread_id=payload.get("thread_id"),
message_id=payload.get("message_id"),
message_index=payload.get("message_index") if isinstance(payload.get("message_index"), int) else None,
interrupt_records=list(payload.get("interrupt_records") or []),
recovery_records=list(payload.get("recovery_records") or []),
budget_snapshot=payload.get("budget_snapshot") if isinstance(payload.get("budget_snapshot"), dict) else None,
next_action=payload.get("next_action"),
output_data=payload.get("output_data") if isinstance(payload.get("output_data"), dict) else None,
)
def verify_task_result(
*,
task: AgentTask | dict[str, Any] | None = None,
@@ -30,8 +58,13 @@ def verify_task_result(
if status is not None:
return VerificationVerdict(status=status, summary=normalized_summary, evidence=normalized_evidence)
if normalized_result.get("status") in {"passed", "failed", "skipped"}:
inferred_status = normalized_result["status"]
normalized_status = normalized_result.get("status")
if normalized_status in {"passed", "failed", "skipped"}:
inferred_status = normalized_status
elif normalized_status == "completed":
inferred_status = "passed"
elif normalized_status == "blocked":
inferred_status = "skipped"
elif normalized_result.get("success") is True:
inferred_status = "passed"
elif normalized_result.get("success") is False:
@@ -57,4 +90,4 @@ def apply_verification_verdict(state: AgentState, verdict: VerificationVerdict)
return AgentState(**next_state)
__all__ = ["VerificationVerdict", "apply_verification_verdict", "verify_task_result"]
__all__ = ["VerificationVerdict", "apply_verification_verdict", "normalize_task_result", "verify_task_result"]