feat: add Jarvis agent verification foundation
Add Day 1 agent runtime foundations with task and event schemas, verifier support, capability metadata, graph event tracing, and regression coverage while preserving the direct execution path.
This commit is contained in:
@@ -6,6 +6,7 @@ import asyncio
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from uuid import uuid4
|
||||
from typing import Any, Literal, cast
|
||||
|
||||
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage, ToolMessage
|
||||
@@ -19,10 +20,13 @@ from app.agents.prompts import (
|
||||
MASTER_SYSTEM_PROMPT,
|
||||
SCHEDULE_PLANNER_SYSTEM_PROMPT,
|
||||
)
|
||||
from app.agents.registry import load_builtin_registry_indexes
|
||||
from app.agents.schemas.event import AgentEvent
|
||||
from app.agents.skill_registry import build_skill_context
|
||||
from app.agents.state import AgentRole, AgentState
|
||||
from app.agents.tools import SUB_COMMANDER_TOOLSETS
|
||||
from app.agents.tools.time_reasoning import normalize_tool_time_arguments
|
||||
from app.agents.verifier import apply_verification_verdict, verify_task_result
|
||||
from app.services.llm_service import (
|
||||
create_llm_from_config,
|
||||
default_provider_capabilities,
|
||||
@@ -632,6 +636,76 @@ def _conversation_history_messages(state: AgentState) -> list[BaseMessage]:
|
||||
return [message for message in history if getattr(message, "type", "") != "system"]
|
||||
|
||||
|
||||
def _append_event_trace(
|
||||
state: AgentState,
|
||||
event_type: str,
|
||||
*,
|
||||
payload: dict[str, Any] | None = None,
|
||||
severity: str = "info",
|
||||
task_id: str | None = None,
|
||||
) -> None:
|
||||
event = AgentEvent(
|
||||
event_id=f"evt-{uuid4()}",
|
||||
event_type=cast(Any, event_type),
|
||||
conversation_id=str(state.get("conversation_id") or "") or None,
|
||||
agent_id=_role_value(state.get("current_agent")),
|
||||
sub_commander_id=state.get("current_sub_commander"),
|
||||
task_id=task_id,
|
||||
payload=payload or {},
|
||||
severity=cast(Any, severity),
|
||||
)
|
||||
state["event_trace"] = [
|
||||
*(state.get("event_trace") or []),
|
||||
event.model_dump(mode="json"),
|
||||
]
|
||||
|
||||
|
||||
def _capability_manifest_for_tool(tool_name: str):
|
||||
indexes = load_builtin_registry_indexes()
|
||||
capability_id = indexes.capability_id_by_tool_name.get(tool_name)
|
||||
if capability_id is None:
|
||||
return None
|
||||
return indexes.capability_by_id.get(capability_id)
|
||||
|
||||
|
||||
def _build_verifier_hints(state: AgentState, tool_name: str, result: Any) -> dict[str, Any]:
|
||||
capability = _capability_manifest_for_tool(tool_name)
|
||||
permission_class = getattr(capability, "permission_class", None)
|
||||
side_effect_scope = getattr(capability, "side_effect_scope", None)
|
||||
return {
|
||||
"tool_name": tool_name,
|
||||
"permission_class": getattr(permission_class, "value", None),
|
||||
"side_effect_scope": getattr(side_effect_scope, "value", None),
|
||||
"requires_confirmation": bool(getattr(capability, "requires_confirmation", False)),
|
||||
"supports_retry": bool(getattr(capability, "supports_retry", False)),
|
||||
"safe_for_parallel_use": bool(getattr(capability, "safe_for_parallel_use", False)),
|
||||
"result_preview": _stringify_message_content(result)[:200],
|
||||
}
|
||||
|
||||
|
||||
def _update_task_result_summary(state: AgentState, tool_summaries: list[dict[str, Any]]) -> None:
|
||||
if not tool_summaries:
|
||||
return
|
||||
|
||||
previous_summary = state.get("task_result_summary") or {}
|
||||
previous_tools = previous_summary.get("tools") or []
|
||||
merged_tools = [*previous_tools, *tool_summaries]
|
||||
summary = {
|
||||
"tool_count": len(merged_tools),
|
||||
"tools": merged_tools,
|
||||
"created_count": sum(int(item.get("created_count") or 0) for item in merged_tools),
|
||||
"created_entity_types": [
|
||||
entity_type
|
||||
for item in merged_tools
|
||||
for entity_type in item.get("created_entity_types") or []
|
||||
if entity_type
|
||||
],
|
||||
"stop_reason": state.get("stop_reason"),
|
||||
}
|
||||
state["task_result_summary"] = summary
|
||||
state["action_results"] = [*(state.get("action_results") or []), summary]
|
||||
|
||||
|
||||
def _record_sub_commander(state: AgentState, role: AgentRole, sub_commander: str, user_query: str) -> None:
|
||||
state["current_agent"] = role.value
|
||||
state["current_sub_commander"] = sub_commander
|
||||
@@ -889,6 +963,8 @@ async def _execute_tool_calls(
|
||||
result_lines: list[str] = []
|
||||
created_entities: list[dict[str, str]] = []
|
||||
tool_messages: list[ToolMessage] = []
|
||||
verifier_hints_by_tool: list[dict[str, Any]] = []
|
||||
tool_summaries: list[dict[str, Any]] = []
|
||||
|
||||
for call in tool_calls:
|
||||
tool_name = call["name"]
|
||||
@@ -897,6 +973,13 @@ async def _execute_tool_calls(
|
||||
if tool is None:
|
||||
raise ValueError(f"Tool not found: {tool_name}")
|
||||
|
||||
_append_event_trace(
|
||||
state,
|
||||
"agent.tool.start",
|
||||
payload={"tool_name": tool_name, "args": normalized_args},
|
||||
task_id=str(call.get("id") or "") or None,
|
||||
)
|
||||
|
||||
try:
|
||||
if hasattr(tool, "ainvoke"):
|
||||
result = await tool.ainvoke(normalized_args)
|
||||
@@ -905,6 +988,13 @@ async def _execute_tool_calls(
|
||||
except Exception as exc:
|
||||
logger.exception("Tool execution failed: %s args=%s", tool_name, normalized_args)
|
||||
result = f"工具执行失败: {exc}"
|
||||
_append_event_trace(
|
||||
state,
|
||||
"agent.error",
|
||||
payload={"tool_name": tool_name, "args": normalized_args, "error": str(exc)},
|
||||
severity="error",
|
||||
task_id=str(call.get("id") or "") or None,
|
||||
)
|
||||
|
||||
normalized_call = {
|
||||
"id": call.get("id"),
|
||||
@@ -914,6 +1004,27 @@ async def _execute_tool_calls(
|
||||
}
|
||||
normalized_calls.append(normalized_call)
|
||||
result_lines.append(f"[{tool_name}] {result}")
|
||||
verifier_hints = _build_verifier_hints(state, tool_name, result)
|
||||
verifier_hints_by_tool.append(verifier_hints)
|
||||
tool_outcome = {
|
||||
"tool_name": tool_name,
|
||||
"args": normalized_args,
|
||||
"result_preview": _stringify_message_content(result)[:200],
|
||||
"verifier_hints": verifier_hints,
|
||||
}
|
||||
state["tool_outcomes"] = [*(state.get("tool_outcomes") or []), tool_outcome]
|
||||
_append_event_trace(
|
||||
state,
|
||||
"agent.tool.result",
|
||||
payload={
|
||||
"tool_name": tool_name,
|
||||
"args": normalized_args,
|
||||
"result_preview": _stringify_message_content(result)[:200],
|
||||
"verification": verifier_hints,
|
||||
},
|
||||
severity="error" if _tool_result_indicates_failure(result) else "info",
|
||||
task_id=str(call.get("id") or "") or None,
|
||||
)
|
||||
tool_messages.append(
|
||||
ToolMessage(
|
||||
content=_stringify_message_content(result),
|
||||
@@ -922,9 +1033,21 @@ async def _execute_tool_calls(
|
||||
)
|
||||
)
|
||||
entity = _classify_created_entity(tool_name)
|
||||
call_created_entities: list[dict[str, str]] = []
|
||||
if entity and not _tool_result_indicates_failure(result):
|
||||
created_entities.append(entity)
|
||||
call_created_entities.append(entity)
|
||||
tool_summaries.append(
|
||||
{
|
||||
"tool_name": tool_name,
|
||||
"result_preview": _stringify_message_content(result)[:200],
|
||||
"created_entity_types": [entity.get("type") for entity in call_created_entities if entity.get("type")],
|
||||
"created_count": len(call_created_entities),
|
||||
}
|
||||
)
|
||||
|
||||
state["verifier_hints"] = {"tools": verifier_hints_by_tool}
|
||||
_update_task_result_summary(state, tool_summaries)
|
||||
return normalized_calls, "\n".join(result_lines), created_entities, tool_messages
|
||||
|
||||
|
||||
@@ -1127,6 +1250,43 @@ async def _run_sub_commander(
|
||||
if summary_target:
|
||||
state[_summary_state_key(summary_target)] = state.get("final_response")
|
||||
|
||||
task_result_summary = state.get("task_result_summary")
|
||||
tool_outcomes = list(state.get("tool_outcomes") or [])
|
||||
has_tool_failure = any(
|
||||
_tool_result_indicates_failure(outcome.get("result_preview"))
|
||||
for outcome in tool_outcomes
|
||||
)
|
||||
verifier_input = {
|
||||
"summary": state.get("final_response") or (task_result_summary or {}).get("tools"),
|
||||
"evidence": tool_outcomes,
|
||||
"success": bool(tool_outcomes or state.get("final_response")) and not has_tool_failure,
|
||||
}
|
||||
_append_event_trace(
|
||||
state,
|
||||
"agent.verify.started",
|
||||
payload={
|
||||
"summary_present": bool(verifier_input["summary"]),
|
||||
"evidence_count": len(verifier_input["evidence"]),
|
||||
},
|
||||
)
|
||||
verdict = verify_task_result(
|
||||
summary=state.get("final_response"),
|
||||
evidence=tool_outcomes,
|
||||
result=verifier_input,
|
||||
)
|
||||
updated_state = apply_verification_verdict(state, verdict)
|
||||
state.update(updated_state)
|
||||
_append_event_trace(
|
||||
state,
|
||||
"agent.verify.completed",
|
||||
payload={
|
||||
"status": verdict.status,
|
||||
"summary": verdict.summary,
|
||||
"evidence_count": len(verdict.evidence),
|
||||
},
|
||||
severity="error" if verdict.status == "failed" else "info",
|
||||
)
|
||||
|
||||
final_response_text = state.get("final_response")
|
||||
if not state.get("clarification_needed") and final_response_text:
|
||||
_clear_clarification_context(state)
|
||||
@@ -1355,6 +1515,7 @@ def get_agent_graph(callbacks: list | None = None):
|
||||
|
||||
|
||||
__all__ = [
|
||||
"_build_verifier_hints",
|
||||
"_choose_sub_commander",
|
||||
"_parse_json_action",
|
||||
"_route_agent_from_user_query",
|
||||
|
||||
@@ -324,6 +324,19 @@ ANALYST_INSIGHTS_PROMPT = f"""{JARVIS_PERSONA_PROMPT}
|
||||
"""
|
||||
|
||||
|
||||
VERIFIER_PROMPT = f"""{JARVIS_PERSONA_PROMPT}
|
||||
|
||||
你是 Jarvis 的验证官,负责对执行结果做最小但明确的核验。
|
||||
|
||||
## 你的职责:
|
||||
- 只输出 passed、failed、skipped 三种验证结论之一
|
||||
- 用一句话总结验证判断
|
||||
- 如有证据,保留关键证据点
|
||||
- 当信息不足以证明成功或失败时,优先判定为 skipped
|
||||
- 不重写执行方案,不扩展无关建议
|
||||
"""
|
||||
|
||||
|
||||
JSON_ACTION_FALLBACK_PROMPT = """你当前运行在 JSON action fallback 模式。
|
||||
|
||||
你的输出必须满足以下规则:
|
||||
|
||||
@@ -1,11 +1,19 @@
|
||||
"""Registry manifest models and validation helpers."""
|
||||
|
||||
from functools import lru_cache
|
||||
|
||||
from app.agents.registry.indexes import RegistryIndexes, build_registry_indexes
|
||||
from app.agents.registry.loader import RegistryBundle, load_builtin_registry_bundle
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def load_builtin_registry_indexes() -> RegistryIndexes:
|
||||
return build_registry_indexes(load_builtin_registry_bundle())
|
||||
|
||||
|
||||
__all__ = [
|
||||
"RegistryBundle",
|
||||
"RegistryIndexes",
|
||||
"build_registry_indexes",
|
||||
"load_builtin_registry_bundle",
|
||||
"load_builtin_registry_indexes",
|
||||
]
|
||||
|
||||
@@ -2,6 +2,8 @@ from app.agents.prompts import SUB_COMMANDER_PROMPTS_BY_KEY
|
||||
from app.agents.registry.models import (
|
||||
AgentManifest,
|
||||
CapabilityManifest,
|
||||
PermissionClass,
|
||||
SideEffectScope,
|
||||
SpecialistTemplateManifest,
|
||||
SubCommanderManifest,
|
||||
)
|
||||
@@ -89,10 +91,150 @@ _capability_tool_names = tuple(
|
||||
)
|
||||
)
|
||||
|
||||
_CAPABILITY_METADATA_BY_TOOL_NAME: dict[str, dict[str, object]] = {
|
||||
"get_tasks": {
|
||||
"permission_class": PermissionClass.READ,
|
||||
"side_effect_scope": SideEffectScope.NONE,
|
||||
"supports_retry": True,
|
||||
"idempotent": True,
|
||||
"safe_for_parallel_use": True,
|
||||
"requires_confirmation": False,
|
||||
},
|
||||
"get_schedule_day": {
|
||||
"permission_class": PermissionClass.READ,
|
||||
"side_effect_scope": SideEffectScope.NONE,
|
||||
"supports_retry": True,
|
||||
"idempotent": True,
|
||||
"safe_for_parallel_use": True,
|
||||
"requires_confirmation": False,
|
||||
},
|
||||
"resolve_time_expression": {
|
||||
"permission_class": PermissionClass.READ,
|
||||
"side_effect_scope": SideEffectScope.NONE,
|
||||
"supports_retry": True,
|
||||
"idempotent": True,
|
||||
"safe_for_parallel_use": True,
|
||||
"requires_confirmation": False,
|
||||
},
|
||||
"search_knowledge": {
|
||||
"permission_class": PermissionClass.READ,
|
||||
"side_effect_scope": SideEffectScope.NONE,
|
||||
"supports_retry": True,
|
||||
"idempotent": True,
|
||||
"safe_for_parallel_use": True,
|
||||
"requires_confirmation": False,
|
||||
},
|
||||
"hybrid_search": {
|
||||
"permission_class": PermissionClass.READ,
|
||||
"side_effect_scope": SideEffectScope.NONE,
|
||||
"supports_retry": True,
|
||||
"idempotent": True,
|
||||
"safe_for_parallel_use": True,
|
||||
"requires_confirmation": False,
|
||||
},
|
||||
"get_knowledge_graph_context": {
|
||||
"permission_class": PermissionClass.READ,
|
||||
"side_effect_scope": SideEffectScope.NONE,
|
||||
"supports_retry": True,
|
||||
"idempotent": True,
|
||||
"safe_for_parallel_use": True,
|
||||
"requires_confirmation": False,
|
||||
},
|
||||
"get_forum_posts": {
|
||||
"permission_class": PermissionClass.READ,
|
||||
"side_effect_scope": SideEffectScope.NONE,
|
||||
"supports_retry": True,
|
||||
"idempotent": True,
|
||||
"safe_for_parallel_use": True,
|
||||
"requires_confirmation": False,
|
||||
},
|
||||
"scan_forum_for_instructions": {
|
||||
"permission_class": PermissionClass.READ,
|
||||
"side_effect_scope": SideEffectScope.NONE,
|
||||
"supports_retry": True,
|
||||
"idempotent": True,
|
||||
"safe_for_parallel_use": True,
|
||||
"requires_confirmation": False,
|
||||
},
|
||||
"web_search": {
|
||||
"permission_class": PermissionClass.EXTERNAL,
|
||||
"side_effect_scope": SideEffectScope.NETWORK,
|
||||
"supports_retry": True,
|
||||
"idempotent": True,
|
||||
"safe_for_parallel_use": True,
|
||||
"requires_confirmation": False,
|
||||
},
|
||||
"create_task": {
|
||||
"permission_class": PermissionClass.WRITE,
|
||||
"side_effect_scope": SideEffectScope.LOCAL_STATE,
|
||||
"supports_retry": False,
|
||||
"idempotent": False,
|
||||
"safe_for_parallel_use": False,
|
||||
"requires_confirmation": True,
|
||||
},
|
||||
"update_task_status": {
|
||||
"permission_class": PermissionClass.WRITE,
|
||||
"side_effect_scope": SideEffectScope.LOCAL_STATE,
|
||||
"supports_retry": False,
|
||||
"idempotent": False,
|
||||
"safe_for_parallel_use": False,
|
||||
"requires_confirmation": True,
|
||||
},
|
||||
"create_todo": {
|
||||
"permission_class": PermissionClass.WRITE,
|
||||
"side_effect_scope": SideEffectScope.LOCAL_STATE,
|
||||
"supports_retry": False,
|
||||
"idempotent": False,
|
||||
"safe_for_parallel_use": False,
|
||||
"requires_confirmation": True,
|
||||
},
|
||||
"create_schedule_task": {
|
||||
"permission_class": PermissionClass.WRITE,
|
||||
"side_effect_scope": SideEffectScope.LOCAL_STATE,
|
||||
"supports_retry": False,
|
||||
"idempotent": False,
|
||||
"safe_for_parallel_use": False,
|
||||
"requires_confirmation": True,
|
||||
},
|
||||
"create_reminder": {
|
||||
"permission_class": PermissionClass.WRITE,
|
||||
"side_effect_scope": SideEffectScope.LOCAL_STATE,
|
||||
"supports_retry": False,
|
||||
"idempotent": False,
|
||||
"safe_for_parallel_use": False,
|
||||
"requires_confirmation": True,
|
||||
},
|
||||
"create_goal": {
|
||||
"permission_class": PermissionClass.WRITE,
|
||||
"side_effect_scope": SideEffectScope.LOCAL_STATE,
|
||||
"supports_retry": False,
|
||||
"idempotent": False,
|
||||
"safe_for_parallel_use": False,
|
||||
"requires_confirmation": True,
|
||||
},
|
||||
"create_forum_post": {
|
||||
"permission_class": PermissionClass.WRITE,
|
||||
"side_effect_scope": SideEffectScope.LOCAL_STATE,
|
||||
"supports_retry": False,
|
||||
"idempotent": False,
|
||||
"safe_for_parallel_use": False,
|
||||
"requires_confirmation": True,
|
||||
},
|
||||
"build_knowledge_graph": {
|
||||
"permission_class": PermissionClass.WRITE,
|
||||
"side_effect_scope": SideEffectScope.LOCAL_STATE,
|
||||
"supports_retry": False,
|
||||
"idempotent": False,
|
||||
"safe_for_parallel_use": False,
|
||||
"requires_confirmation": True,
|
||||
},
|
||||
}
|
||||
|
||||
BUILTIN_CAPABILITY_MANIFESTS: tuple[CapabilityManifest, ...] = tuple(
|
||||
CapabilityManifest(
|
||||
capability_id=tool_name,
|
||||
tool_name=tool_name,
|
||||
**dict(_CAPABILITY_METADATA_BY_TOOL_NAME.get(tool_name, {})),
|
||||
)
|
||||
for tool_name in _capability_tool_names
|
||||
)
|
||||
|
||||
@@ -1,6 +1,21 @@
|
||||
from enum import Enum
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class PermissionClass(str, Enum):
|
||||
READ = "read"
|
||||
WRITE = "write"
|
||||
EXTERNAL = "external"
|
||||
|
||||
|
||||
class SideEffectScope(str, Enum):
|
||||
NONE = "none"
|
||||
LOCAL_STATE = "local_state"
|
||||
DB_WRITE = "db_write"
|
||||
NETWORK = "network"
|
||||
|
||||
|
||||
class AgentManifest(BaseModel):
|
||||
agent_id: str
|
||||
display_name: str
|
||||
@@ -23,6 +38,12 @@ class SubCommanderManifest(BaseModel):
|
||||
class CapabilityManifest(BaseModel):
|
||||
capability_id: str
|
||||
tool_name: str
|
||||
permission_class: PermissionClass = PermissionClass.READ
|
||||
side_effect_scope: SideEffectScope = SideEffectScope.NONE
|
||||
supports_retry: bool = False
|
||||
idempotent: bool = False
|
||||
safe_for_parallel_use: bool = False
|
||||
requires_confirmation: bool = False
|
||||
|
||||
|
||||
class SpecialistTemplateManifest(BaseModel):
|
||||
|
||||
10
backend/app/agents/schemas/__init__.py
Normal file
10
backend/app/agents/schemas/__init__.py
Normal file
@@ -0,0 +1,10 @@
|
||||
from app.agents.schemas.event import AgentEvent
|
||||
from app.agents.schemas.task import AgentTask, TaskResult, TaskLifecycleStatus, VerificationStatus
|
||||
|
||||
__all__ = [
|
||||
"AgentEvent",
|
||||
"AgentTask",
|
||||
"TaskLifecycleStatus",
|
||||
"TaskResult",
|
||||
"VerificationStatus",
|
||||
]
|
||||
28
backend/app/agents/schemas/event.py
Normal file
28
backend/app/agents/schemas/event.py
Normal file
@@ -0,0 +1,28 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Literal
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
AgentEventType = Literal[
|
||||
"agent.tool.start",
|
||||
"agent.tool.result",
|
||||
"agent.verify.started",
|
||||
"agent.verify.completed",
|
||||
"agent.error",
|
||||
]
|
||||
AgentEventSeverity = Literal["info", "warning", "error"]
|
||||
|
||||
|
||||
class AgentEvent(BaseModel):
|
||||
event_id: str
|
||||
event_type: AgentEventType
|
||||
timestamp: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
||||
conversation_id: str | None = None
|
||||
agent_id: str | None = None
|
||||
sub_commander_id: str | None = None
|
||||
task_id: str | None = None
|
||||
payload: dict[str, Any] = Field(default_factory=dict)
|
||||
severity: AgentEventSeverity = "info"
|
||||
32
backend/app/agents/schemas/task.py
Normal file
32
backend/app/agents/schemas/task.py
Normal file
@@ -0,0 +1,32 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Literal
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
TaskLifecycleStatus = Literal["pending", "in_progress", "completed", "failed", "blocked"]
|
||||
VerificationStatus = Literal["passed", "failed", "skipped"]
|
||||
|
||||
|
||||
class AgentTask(BaseModel):
|
||||
task_id: str
|
||||
title: str
|
||||
status: TaskLifecycleStatus = "pending"
|
||||
owner_agent_id: str | None = None
|
||||
role: str | None = None
|
||||
goal: str | None = None
|
||||
expected_evidence: list[dict[str, Any]] = Field(default_factory=list)
|
||||
evidence: list[dict[str, Any]] = Field(default_factory=list)
|
||||
result_summary: str | None = None
|
||||
created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
||||
updated_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
||||
|
||||
|
||||
class TaskResult(BaseModel):
|
||||
task_id: str
|
||||
status: VerificationStatus
|
||||
summary: str | None = None
|
||||
evidence: list[dict[str, Any]] = Field(default_factory=list)
|
||||
output_data: dict[str, Any] | None = None
|
||||
@@ -1,7 +1,9 @@
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import Annotated, Any, TypedDict
|
||||
from typing import Annotated, Any, Literal, TypedDict
|
||||
|
||||
from app.agents.schemas.event import AgentEvent
|
||||
from app.agents.schemas.task import AgentTask, TaskResult, VerificationStatus
|
||||
from langchain_core.messages import BaseMessage
|
||||
from langgraph.graph.message import add_messages
|
||||
|
||||
@@ -27,6 +29,7 @@ class AgentState(TypedDict):
|
||||
user_id: str
|
||||
conversation_id: str
|
||||
|
||||
execution_mode: Literal["direct", "delegated", "verified"]
|
||||
current_agent: str | None
|
||||
next_step: str | None
|
||||
active_agents: list[AgentRole]
|
||||
@@ -34,14 +37,24 @@ class AgentState(TypedDict):
|
||||
active_sub_commanders: list[str]
|
||||
sub_commander_trace: list[dict[str, Any]]
|
||||
agent_trace: list[str]
|
||||
event_trace: list[AgentEvent | dict[str, Any]]
|
||||
|
||||
pending_tasks: list[dict[str, Any]]
|
||||
completed_tasks: list[dict[str, Any]]
|
||||
active_tasks: list[AgentTask | dict[str, Any]]
|
||||
task_results: list[TaskResult | dict[str, Any]]
|
||||
tool_calls: list[dict[str, Any]]
|
||||
last_tool_result: str | None
|
||||
action_results: list[dict[str, Any]]
|
||||
created_entities: list[dict[str, Any]]
|
||||
tool_outcomes: list[dict[str, Any]]
|
||||
task_result_summary: dict[str, Any] | None
|
||||
verifier_hints: dict[str, Any] | None
|
||||
|
||||
verification_status: VerificationStatus | None
|
||||
verification_summary: str | None
|
||||
verification_evidence: list[dict[str, Any]]
|
||||
budget_state: dict[str, Any] | None
|
||||
|
||||
tool_strategy_used: str | None
|
||||
tool_round_count: int
|
||||
@@ -89,6 +102,7 @@ def initial_state(user_id: str, conversation_id: str) -> AgentState:
|
||||
messages=[],
|
||||
user_id=user_id,
|
||||
conversation_id=conversation_id,
|
||||
execution_mode="direct",
|
||||
current_agent=AgentRole.MASTER.value,
|
||||
next_step=None,
|
||||
active_agents=[AgentRole.MASTER],
|
||||
@@ -96,13 +110,22 @@ def initial_state(user_id: str, conversation_id: str) -> AgentState:
|
||||
active_sub_commanders=[],
|
||||
sub_commander_trace=[],
|
||||
agent_trace=[AgentRole.MASTER.value],
|
||||
event_trace=[],
|
||||
pending_tasks=[],
|
||||
completed_tasks=[],
|
||||
active_tasks=[],
|
||||
task_results=[],
|
||||
tool_calls=[],
|
||||
last_tool_result=None,
|
||||
action_results=[],
|
||||
created_entities=[],
|
||||
tool_outcomes=[],
|
||||
task_result_summary=None,
|
||||
verifier_hints=None,
|
||||
verification_status=None,
|
||||
verification_summary=None,
|
||||
verification_evidence=[],
|
||||
budget_state=None,
|
||||
tool_strategy_used=None,
|
||||
tool_round_count=0,
|
||||
max_tool_rounds=2,
|
||||
|
||||
60
backend/app/agents/verifier.py
Normal file
60
backend/app/agents/verifier.py
Normal file
@@ -0,0 +1,60 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from app.agents.schemas.task import AgentTask, TaskResult, VerificationStatus
|
||||
from app.agents.state import AgentState
|
||||
|
||||
|
||||
class VerificationVerdict(BaseModel):
|
||||
status: VerificationStatus
|
||||
summary: str | None = None
|
||||
evidence: list[dict[str, Any]] = Field(default_factory=list)
|
||||
|
||||
|
||||
def verify_task_result(
|
||||
*,
|
||||
task: AgentTask | dict[str, Any] | None = None,
|
||||
result: TaskResult | dict[str, Any] | None = None,
|
||||
summary: str | None = None,
|
||||
evidence: list[dict[str, Any]] | None = None,
|
||||
status: VerificationStatus | None = None,
|
||||
) -> VerificationVerdict:
|
||||
normalized_result = result.model_dump() if isinstance(result, TaskResult) else dict(result or {})
|
||||
normalized_task = task.model_dump() if isinstance(task, AgentTask) else dict(task or {})
|
||||
normalized_summary = summary or normalized_result.get("summary") or normalized_task.get("result_summary")
|
||||
normalized_evidence = list(evidence or normalized_result.get("evidence") or normalized_task.get("evidence") or [])
|
||||
|
||||
if status is not None:
|
||||
return VerificationVerdict(status=status, summary=normalized_summary, evidence=normalized_evidence)
|
||||
|
||||
if normalized_result.get("status") in {"passed", "failed", "skipped"}:
|
||||
inferred_status = normalized_result["status"]
|
||||
elif normalized_result.get("success") is True:
|
||||
inferred_status = "passed"
|
||||
elif normalized_result.get("success") is False:
|
||||
inferred_status = "failed"
|
||||
elif normalized_summary or normalized_evidence:
|
||||
inferred_status = "skipped"
|
||||
else:
|
||||
inferred_status = "failed"
|
||||
normalized_summary = "No verification input available."
|
||||
|
||||
return VerificationVerdict(
|
||||
status=inferred_status,
|
||||
summary=normalized_summary,
|
||||
evidence=normalized_evidence,
|
||||
)
|
||||
|
||||
|
||||
def apply_verification_verdict(state: AgentState, verdict: VerificationVerdict) -> AgentState:
|
||||
next_state = dict(state)
|
||||
next_state["verification_status"] = verdict.status
|
||||
next_state["verification_summary"] = verdict.summary
|
||||
next_state["verification_evidence"] = list(verdict.evidence)
|
||||
return AgentState(**next_state)
|
||||
|
||||
|
||||
__all__ = ["VerificationVerdict", "apply_verification_verdict", "verify_task_result"]
|
||||
Reference in New Issue
Block a user