feat: add agent visibility APIs and harden runtime verification
Add Day 4 visibility endpoints and response models, strengthen collaboration/task verification behavior, and patch conversation schema startup migration for agent_state compatibility. Extend backend regression coverage for runtime schemas, verifier behavior, visibility APIs, router auth, and legacy conversation list loading.
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -324,6 +324,25 @@ ANALYST_INSIGHTS_PROMPT = f"""{JARVIS_PERSONA_PROMPT}
|
||||
"""
|
||||
|
||||
|
||||
COORDINATOR_SYSTEM_PROMPT = f"""{JARVIS_PERSONA_PROMPT}
|
||||
|
||||
你是 Jarvis 的协作协调官,负责把复杂请求收束成最小受控协作,而不是放任系统进入自由 swarm。
|
||||
|
||||
## 你的职责:
|
||||
- 先判断当前请求是否真的需要拆解;不需要时应明确建议继续走 direct
|
||||
- 只有在明显多步骤、跨领域、需要多角色配合时,才拆成 2~4 个子任务
|
||||
- 每个子任务必须清晰写出 `title`、`role`、`goal`、`expected_evidence`
|
||||
- 角色建议只能来自现有 top-level agent:`schedule_planner`、`librarian`、`analyst`、`executor`
|
||||
- 汇总时基于子任务结果回收,不依赖单点硬编码拼接
|
||||
|
||||
## 边界:
|
||||
- 禁止无限递归拆分
|
||||
- 禁止创建新的 runtime agent / worker
|
||||
- 禁止把一个简单请求硬拆成多个空泛步骤
|
||||
- 如果证据不足、子任务未闭环,必须把风险明确暴露出来
|
||||
"""
|
||||
|
||||
|
||||
VERIFIER_PROMPT = f"""{JARVIS_PERSONA_PROMPT}
|
||||
|
||||
你是 Jarvis 的验证官,负责对执行结果做最小但明确的核验。
|
||||
|
||||
@@ -57,6 +57,19 @@ TOP_LEVEL_AGENT_ROUTING_HINTS: dict[str, tuple[str, ...]] = {
|
||||
),
|
||||
}
|
||||
|
||||
TOP_LEVEL_AGENT_ALLOWED_SPAWN_ROLES: dict[str, tuple[str, ...]] = {
|
||||
AgentRole.MASTER.value: (
|
||||
AgentRole.SCHEDULE_PLANNER.value,
|
||||
AgentRole.EXECUTOR.value,
|
||||
AgentRole.LIBRARIAN.value,
|
||||
AgentRole.ANALYST.value,
|
||||
),
|
||||
AgentRole.SCHEDULE_PLANNER.value: (AgentRole.SCHEDULE_PLANNER.value,),
|
||||
AgentRole.EXECUTOR.value: (AgentRole.EXECUTOR.value,),
|
||||
AgentRole.LIBRARIAN.value: (AgentRole.LIBRARIAN.value,),
|
||||
AgentRole.ANALYST.value: (AgentRole.ANALYST.value,),
|
||||
}
|
||||
|
||||
SUB_COMMANDER_PARENT_AGENT_IDS: dict[str, str] = {
|
||||
"schedule_analysis": AgentRole.SCHEDULE_PLANNER.value,
|
||||
"schedule_planning": AgentRole.SCHEDULE_PLANNER.value,
|
||||
@@ -77,6 +90,8 @@ BUILTIN_AGENT_MANIFESTS: tuple[AgentManifest, ...] = tuple(
|
||||
system_prompt_key=role.value,
|
||||
routing_hints=list(TOP_LEVEL_AGENT_ROUTING_HINTS[role.value]),
|
||||
default_sub_commanders=list(TOP_LEVEL_AGENT_DEFAULT_SUB_COMMANDERS[role.value]),
|
||||
can_spawn_children=bool(TOP_LEVEL_AGENT_ALLOWED_SPAWN_ROLES[role.value]),
|
||||
allowed_spawn_role_values=list(TOP_LEVEL_AGENT_ALLOWED_SPAWN_ROLES[role.value]),
|
||||
skill_context_key=role.value.replace("agent_", ""),
|
||||
)
|
||||
for role in AgentRole
|
||||
|
||||
@@ -16,6 +16,7 @@ from app.agents.registry.models import (
|
||||
@dataclass(frozen=True)
|
||||
class RegistryIndexes:
|
||||
agent_by_id: Mapping[str, AgentManifest]
|
||||
agent_by_role_value: Mapping[str, AgentManifest]
|
||||
sub_commander_by_id: Mapping[str, SubCommanderManifest]
|
||||
capability_by_id: Mapping[str, CapabilityManifest]
|
||||
specialist_template_by_id: Mapping[str, SpecialistTemplateManifest]
|
||||
@@ -24,6 +25,7 @@ class RegistryIndexes:
|
||||
skill_context_key_by_agent_id: Mapping[str, str]
|
||||
capability_id_by_tool_name: Mapping[str, str]
|
||||
capability_ids_by_sub_commander_id: Mapping[str, tuple[str, ...]]
|
||||
spawnable_role_values_by_agent_id: Mapping[str, tuple[str, ...]]
|
||||
|
||||
|
||||
def summarize_registry_indexes(indexes: RegistryIndexes) -> dict[str, int]:
|
||||
@@ -50,6 +52,9 @@ def build_registry_indexes(bundle: RegistryBundle) -> RegistryIndexes:
|
||||
|
||||
return RegistryIndexes(
|
||||
agent_by_id=MappingProxyType(agent_by_id),
|
||||
agent_by_role_value=MappingProxyType({
|
||||
agent.role_value: agent for agent in bundle.agents
|
||||
}),
|
||||
sub_commander_by_id=MappingProxyType(sub_commander_by_id),
|
||||
capability_by_id=MappingProxyType(capability_by_id),
|
||||
specialist_template_by_id=MappingProxyType(specialist_template_by_id),
|
||||
@@ -73,4 +78,9 @@ def build_registry_indexes(bundle: RegistryBundle) -> RegistryIndexes:
|
||||
sub_commander.sub_commander_id: tuple(sub_commander.capability_ids)
|
||||
for sub_commander in bundle.sub_commanders
|
||||
}),
|
||||
spawnable_role_values_by_agent_id=MappingProxyType({
|
||||
agent.agent_id: tuple(agent.allowed_spawn_role_values)
|
||||
for agent in bundle.agents
|
||||
if agent.can_spawn_children and agent.allowed_spawn_role_values
|
||||
}),
|
||||
)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from enum import Enum
|
||||
|
||||
from pydantic import BaseModel
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class PermissionClass(str, Enum):
|
||||
@@ -23,6 +23,8 @@ class AgentManifest(BaseModel):
|
||||
system_prompt_key: str
|
||||
routing_hints: list[str]
|
||||
default_sub_commanders: list[str]
|
||||
can_spawn_children: bool = False
|
||||
allowed_spawn_role_values: list[str] = Field(default_factory=list)
|
||||
skill_context_key: str | None = None
|
||||
continuity_policy: str | None = None
|
||||
clarification_policy: str | None = None
|
||||
|
||||
@@ -1,10 +1,25 @@
|
||||
from app.agents.schemas.event import AgentEvent
|
||||
from app.agents.schemas.task import AgentTask, TaskResult, TaskLifecycleStatus, VerificationStatus
|
||||
from app.agents.schemas.message import AgentMessage
|
||||
from app.agents.schemas.task import (
|
||||
AgentTask,
|
||||
CollaborationBudget,
|
||||
InterruptRecord,
|
||||
RecoveryRecord,
|
||||
TaskLifecycleStatus,
|
||||
TaskResult,
|
||||
TaskResultStatus,
|
||||
VerificationStatus,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"AgentEvent",
|
||||
"AgentMessage",
|
||||
"AgentTask",
|
||||
"CollaborationBudget",
|
||||
"InterruptRecord",
|
||||
"RecoveryRecord",
|
||||
"TaskLifecycleStatus",
|
||||
"TaskResult",
|
||||
"TaskResultStatus",
|
||||
"VerificationStatus",
|
||||
]
|
||||
|
||||
@@ -11,6 +11,18 @@ AgentEventType = Literal[
|
||||
"agent.tool.result",
|
||||
"agent.verify.started",
|
||||
"agent.verify.completed",
|
||||
"agent.created",
|
||||
"agent.spawn.blocked",
|
||||
"agent.message.sent",
|
||||
"agent.message.received",
|
||||
"agent.interrupt.requested",
|
||||
"agent.interrupt.completed",
|
||||
"agent.recovery.started",
|
||||
"agent.recovery.completed",
|
||||
"agent.task.interrupted",
|
||||
"agent.task.recovered",
|
||||
"agent.task.reassigned",
|
||||
"agent.collaboration.budget.updated",
|
||||
"agent.error",
|
||||
]
|
||||
AgentEventSeverity = Literal["info", "warning", "error"]
|
||||
@@ -24,5 +36,11 @@ class AgentEvent(BaseModel):
|
||||
agent_id: str | None = None
|
||||
sub_commander_id: str | None = None
|
||||
task_id: str | None = None
|
||||
parent_task_id: str | None = None
|
||||
child_task_id: str | None = None
|
||||
thread_id: str | None = None
|
||||
message_id: str | None = None
|
||||
interrupt_id: str | None = None
|
||||
recovery_id: str | None = None
|
||||
payload: dict[str, Any] = Field(default_factory=dict)
|
||||
severity: AgentEventSeverity = "info"
|
||||
|
||||
29
backend/app/agents/schemas/message.py
Normal file
29
backend/app/agents/schemas/message.py
Normal file
@@ -0,0 +1,29 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Literal
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
AgentMessageType = Literal[
|
||||
"task_request",
|
||||
"task_update",
|
||||
"handoff",
|
||||
"verification_request",
|
||||
"verification_feedback",
|
||||
"interrupt_notice",
|
||||
]
|
||||
|
||||
|
||||
class AgentMessage(BaseModel):
|
||||
message_id: str
|
||||
thread_id: str
|
||||
from_agent_id: str
|
||||
to_agent_id: str
|
||||
task_id: str | None = None
|
||||
reply_to_message_id: str | None = None
|
||||
message_type: AgentMessageType = "task_update"
|
||||
content_summary: str
|
||||
created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
||||
payload: dict[str, Any] = Field(default_factory=dict)
|
||||
@@ -8,6 +8,41 @@ from pydantic import BaseModel, Field
|
||||
|
||||
TaskLifecycleStatus = Literal["pending", "in_progress", "completed", "failed", "blocked"]
|
||||
VerificationStatus = Literal["passed", "failed", "skipped"]
|
||||
TaskResultStatus = Literal["completed", "failed", "blocked", "passed", "skipped"]
|
||||
InterruptStatus = Literal["requested", "acknowledged", "resolved"]
|
||||
BudgetMode = Literal["direct", "collaboration"]
|
||||
|
||||
|
||||
class InterruptRecord(BaseModel):
|
||||
interrupt_id: str
|
||||
reason: str
|
||||
status: InterruptStatus = "requested"
|
||||
requested_by: str | None = None
|
||||
source_event_id: str | None = None
|
||||
requested_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
||||
payload: dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class RecoveryRecord(BaseModel):
|
||||
recovery_id: str
|
||||
source_interrupt_id: str | None = None
|
||||
strategy: str | None = None
|
||||
resumed_from_task_id: str | None = None
|
||||
resumed_from_thread_id: str | None = None
|
||||
recovered_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
||||
payload: dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class CollaborationBudget(BaseModel):
|
||||
mode: BudgetMode = "direct"
|
||||
max_parallel_tasks: int | None = None
|
||||
remaining_parallel_tasks: int | None = None
|
||||
max_tool_calls: int | None = None
|
||||
remaining_tool_calls: int | None = None
|
||||
max_iterations: int | None = None
|
||||
remaining_iterations: int | None = None
|
||||
escalation_threshold: int | None = None
|
||||
metadata: dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class AgentTask(BaseModel):
|
||||
@@ -17,8 +52,16 @@ class AgentTask(BaseModel):
|
||||
owner_agent_id: str | None = None
|
||||
role: str | None = None
|
||||
goal: str | None = None
|
||||
parent_task_id: str | None = None
|
||||
child_task_ids: list[str] = Field(default_factory=list)
|
||||
thread_id: str | None = None
|
||||
message_id: str | None = None
|
||||
message_index: int | None = None
|
||||
expected_evidence: list[dict[str, Any]] = Field(default_factory=list)
|
||||
evidence: list[dict[str, Any]] = Field(default_factory=list)
|
||||
interrupt_records: list[InterruptRecord | dict[str, Any]] = Field(default_factory=list)
|
||||
recovery_records: list[RecoveryRecord | dict[str, Any]] = Field(default_factory=list)
|
||||
collaboration_budget: CollaborationBudget | dict[str, Any] | None = None
|
||||
result_summary: str | None = None
|
||||
created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
||||
updated_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
||||
@@ -26,7 +69,17 @@ class AgentTask(BaseModel):
|
||||
|
||||
class TaskResult(BaseModel):
|
||||
task_id: str
|
||||
status: VerificationStatus
|
||||
status: TaskResultStatus
|
||||
summary: str | None = None
|
||||
evidence: list[dict[str, Any]] = Field(default_factory=list)
|
||||
owner_agent_id: str | None = None
|
||||
parent_task_id: str | None = None
|
||||
child_task_ids: list[str] = Field(default_factory=list)
|
||||
thread_id: str | None = None
|
||||
message_id: str | None = None
|
||||
message_index: int | None = None
|
||||
interrupt_records: list[InterruptRecord | dict[str, Any]] = Field(default_factory=list)
|
||||
recovery_records: list[RecoveryRecord | dict[str, Any]] = Field(default_factory=list)
|
||||
budget_snapshot: CollaborationBudget | dict[str, Any] | None = None
|
||||
next_action: str | None = None
|
||||
output_data: dict[str, Any] | None = None
|
||||
|
||||
@@ -3,8 +3,9 @@ from enum import Enum
|
||||
from typing import Annotated, Any, Literal, TypedDict
|
||||
|
||||
from app.agents.schemas.event import AgentEvent
|
||||
from app.agents.schemas.task import AgentTask, TaskResult, VerificationStatus
|
||||
from langchain_core.messages import BaseMessage
|
||||
from app.agents.schemas.message import AgentMessage
|
||||
from app.agents.schemas.task import AgentTask, CollaborationBudget, InterruptRecord, RecoveryRecord, TaskResult, VerificationStatus
|
||||
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage
|
||||
from langgraph.graph.message import add_messages
|
||||
|
||||
|
||||
@@ -24,12 +25,27 @@ class ConversationTurn:
|
||||
model: str | None = None
|
||||
|
||||
|
||||
def turn_to_message(turn: ConversationTurn) -> BaseMessage:
|
||||
if turn.role == "user":
|
||||
return HumanMessage(content=turn.content)
|
||||
return AIMessage(content=turn.content)
|
||||
|
||||
|
||||
class AgentState(TypedDict):
|
||||
messages: Annotated[list[BaseMessage], add_messages]
|
||||
user_id: str
|
||||
conversation_id: str
|
||||
parent_conversation_id: str | None
|
||||
thread_id: str | None
|
||||
last_message_id: str | None
|
||||
message_sequence: int
|
||||
agent_id: str | None
|
||||
parent_agent_id: str | None
|
||||
root_agent_id: str | None
|
||||
collaboration_depth: int
|
||||
spawned_agent_ids: list[str]
|
||||
|
||||
execution_mode: Literal["direct", "delegated", "verified"]
|
||||
execution_mode: Literal["direct", "collaboration", "delegated", "verified"]
|
||||
current_agent: str | None
|
||||
next_step: str | None
|
||||
active_agents: list[AgentRole]
|
||||
@@ -38,11 +54,16 @@ class AgentState(TypedDict):
|
||||
sub_commander_trace: list[dict[str, Any]]
|
||||
agent_trace: list[str]
|
||||
event_trace: list[AgentEvent | dict[str, Any]]
|
||||
message_trace: list[AgentMessage | dict[str, Any]]
|
||||
|
||||
pending_tasks: list[dict[str, Any]]
|
||||
completed_tasks: list[dict[str, Any]]
|
||||
active_tasks: list[AgentTask | dict[str, Any]]
|
||||
task_results: list[TaskResult | dict[str, Any]]
|
||||
task_hierarchy: dict[str, list[str]]
|
||||
interrupted_tasks: list[InterruptRecord | dict[str, Any]]
|
||||
recovery_trace: list[RecoveryRecord | dict[str, Any]]
|
||||
recovery_points: list[dict[str, Any]]
|
||||
tool_calls: list[dict[str, Any]]
|
||||
last_tool_result: str | None
|
||||
action_results: list[dict[str, Any]]
|
||||
@@ -54,7 +75,8 @@ class AgentState(TypedDict):
|
||||
verification_status: VerificationStatus | None
|
||||
verification_summary: str | None
|
||||
verification_evidence: list[dict[str, Any]]
|
||||
budget_state: dict[str, Any] | None
|
||||
budget_state: CollaborationBudget | dict[str, Any] | None
|
||||
collaboration_budget_history: list[CollaborationBudget | dict[str, Any]]
|
||||
|
||||
tool_strategy_used: str | None
|
||||
tool_round_count: int
|
||||
@@ -102,6 +124,15 @@ def initial_state(user_id: str, conversation_id: str) -> AgentState:
|
||||
messages=[],
|
||||
user_id=user_id,
|
||||
conversation_id=conversation_id,
|
||||
parent_conversation_id=None,
|
||||
thread_id=None,
|
||||
last_message_id=None,
|
||||
message_sequence=0,
|
||||
agent_id=AgentRole.MASTER.value,
|
||||
parent_agent_id=None,
|
||||
root_agent_id=AgentRole.MASTER.value,
|
||||
collaboration_depth=0,
|
||||
spawned_agent_ids=[],
|
||||
execution_mode="direct",
|
||||
current_agent=AgentRole.MASTER.value,
|
||||
next_step=None,
|
||||
@@ -111,10 +142,15 @@ def initial_state(user_id: str, conversation_id: str) -> AgentState:
|
||||
sub_commander_trace=[],
|
||||
agent_trace=[AgentRole.MASTER.value],
|
||||
event_trace=[],
|
||||
message_trace=[],
|
||||
pending_tasks=[],
|
||||
completed_tasks=[],
|
||||
active_tasks=[],
|
||||
task_results=[],
|
||||
task_hierarchy={},
|
||||
interrupted_tasks=[],
|
||||
recovery_trace=[],
|
||||
recovery_points=[],
|
||||
tool_calls=[],
|
||||
last_tool_result=None,
|
||||
action_results=[],
|
||||
@@ -126,6 +162,7 @@ def initial_state(user_id: str, conversation_id: str) -> AgentState:
|
||||
verification_summary=None,
|
||||
verification_evidence=[],
|
||||
budget_state=None,
|
||||
collaboration_budget_history=[],
|
||||
tool_strategy_used=None,
|
||||
tool_round_count=0,
|
||||
max_tool_rounds=2,
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
from typing import Any, cast
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from app.agents.schemas.task import AgentTask, TaskResult, VerificationStatus
|
||||
from app.agents.schemas.task import AgentTask, TaskResult, TaskResultStatus, VerificationStatus
|
||||
from app.agents.state import AgentState
|
||||
|
||||
|
||||
@@ -14,6 +14,34 @@ class VerificationVerdict(BaseModel):
|
||||
evidence: list[dict[str, Any]] = Field(default_factory=list)
|
||||
|
||||
|
||||
def normalize_task_result(
|
||||
task_result: TaskResult | dict[str, Any],
|
||||
*,
|
||||
default_task_id: str | None = None,
|
||||
) -> TaskResult:
|
||||
payload = task_result.model_dump(mode="json") if isinstance(task_result, TaskResult) else dict(task_result or {})
|
||||
normalized_status = payload.get("status")
|
||||
if normalized_status not in {"completed", "failed", "blocked", "passed", "skipped"}:
|
||||
normalized_status = "failed"
|
||||
return TaskResult(
|
||||
task_id=str(payload.get("task_id") or default_task_id or "unknown-task"),
|
||||
status=cast(TaskResultStatus, normalized_status),
|
||||
summary=payload.get("summary"),
|
||||
evidence=list(payload.get("evidence") or []),
|
||||
owner_agent_id=payload.get("owner_agent_id"),
|
||||
parent_task_id=payload.get("parent_task_id"),
|
||||
child_task_ids=list(payload.get("child_task_ids") or []),
|
||||
thread_id=payload.get("thread_id"),
|
||||
message_id=payload.get("message_id"),
|
||||
message_index=payload.get("message_index") if isinstance(payload.get("message_index"), int) else None,
|
||||
interrupt_records=list(payload.get("interrupt_records") or []),
|
||||
recovery_records=list(payload.get("recovery_records") or []),
|
||||
budget_snapshot=payload.get("budget_snapshot") if isinstance(payload.get("budget_snapshot"), dict) else None,
|
||||
next_action=payload.get("next_action"),
|
||||
output_data=payload.get("output_data") if isinstance(payload.get("output_data"), dict) else None,
|
||||
)
|
||||
|
||||
|
||||
def verify_task_result(
|
||||
*,
|
||||
task: AgentTask | dict[str, Any] | None = None,
|
||||
@@ -30,8 +58,13 @@ def verify_task_result(
|
||||
if status is not None:
|
||||
return VerificationVerdict(status=status, summary=normalized_summary, evidence=normalized_evidence)
|
||||
|
||||
if normalized_result.get("status") in {"passed", "failed", "skipped"}:
|
||||
inferred_status = normalized_result["status"]
|
||||
normalized_status = normalized_result.get("status")
|
||||
if normalized_status in {"passed", "failed", "skipped"}:
|
||||
inferred_status = normalized_status
|
||||
elif normalized_status == "completed":
|
||||
inferred_status = "passed"
|
||||
elif normalized_status == "blocked":
|
||||
inferred_status = "skipped"
|
||||
elif normalized_result.get("success") is True:
|
||||
inferred_status = "passed"
|
||||
elif normalized_result.get("success") is False:
|
||||
@@ -57,4 +90,4 @@ def apply_verification_verdict(state: AgentState, verdict: VerificationVerdict)
|
||||
return AgentState(**next_state)
|
||||
|
||||
|
||||
__all__ = ["VerificationVerdict", "apply_verification_verdict", "verify_task_result"]
|
||||
__all__ = ["VerificationVerdict", "apply_verification_verdict", "normalize_task_result", "verify_task_result"]
|
||||
|
||||
Reference in New Issue
Block a user