feat(agents): Phase 8.4-10.5 built-in plugins, bundled skills, coordinator

This commit is contained in:
2026-04-04 23:24:34 +08:00
parent 88955ed550
commit d18167826e
105 changed files with 14780 additions and 15685 deletions

View File

@@ -12,6 +12,12 @@ from typing import Any, Literal, cast
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage, ToolMessage
from langgraph.graph import END, StateGraph
from app.agents.isolation import (
WorktreeIsolationError,
prepare_session_isolation,
prepare_worktree_isolation,
select_isolation_strategy,
)
from app.agents.prompts import (
ANALYST_SYSTEM_PROMPT,
COORDINATOR_SYSTEM_PROMPT,
@@ -22,6 +28,12 @@ from app.agents.prompts import (
SCHEDULE_PLANNER_SYSTEM_PROMPT,
)
from app.agents.registry import load_builtin_registry_indexes
from app.agents.runtime_metrics import (
coerce_cost_thresholds,
estimate_token_cost,
extract_token_usage,
is_cost_budget_warning,
)
from app.agents.schemas.event import AgentEvent
from app.agents.schemas.message import AgentMessage
from app.agents.schemas.task import AgentTask, CollaborationBudget, InterruptRecord, RecoveryRecord, TaskResult
@@ -193,6 +205,175 @@ def _get_state_int(state: AgentState, key: str) -> int:
return value if isinstance(value, int) else 0
def _clear_isolation_state(state: AgentState) -> None:
state["isolation_mode"] = "none"
state["isolation_id"] = None
state["isolation_workspace_path"] = None
state["isolation_parent_conversation_id"] = None
state["isolation_metadata"] = {}
def _apply_isolation_payload(state: AgentState, payload: dict[str, Any]) -> None:
state["isolation_mode"] = str(payload.get("mode") or "none")
state["isolation_id"] = str(payload.get("isolation_id") or "") or None
state["isolation_workspace_path"] = str(payload.get("workspace_path") or "") or None
state["isolation_parent_conversation_id"] = str(payload.get("parent_conversation_id") or "") or None
state["isolation_metadata"] = dict(payload.get("metadata") or {})
def _prepare_isolation_context(
state: AgentState,
*,
role: AgentRole,
sub_commander: str,
user_query: str,
toolset: list[Any],
) -> None:
tool_names = [tool.name for tool in toolset]
decision = select_isolation_strategy(
user_query=user_query,
tool_names=tool_names,
role_value=role.value,
execution_mode=str(state.get("execution_mode") or "direct"),
)
if decision.mode == "none":
_clear_isolation_state(state)
_append_event_trace(
state,
"agent.isolation.selected",
payload={"mode": "none", "reason": decision.reason, "tool_names": tool_names},
)
return
if decision.mode == "session":
isolation_payload = prepare_session_isolation(
state=state,
decision=decision,
role_value=role.value,
sub_commander=sub_commander,
)
_apply_isolation_payload(state, isolation_payload)
_append_event_trace(
state,
"agent.isolation.selected",
payload=isolation_payload,
)
return
try:
isolation_payload = prepare_worktree_isolation(
state=state,
decision=decision,
role_value=role.value,
sub_commander=sub_commander,
)
except WorktreeIsolationError as exc:
isolation_payload = prepare_session_isolation(
state=state,
decision=decision,
role_value=role.value,
sub_commander=sub_commander,
)
isolation_payload["metadata"] = {
**dict(isolation_payload.get("metadata") or {}),
"fallback_reason": str(exc),
"fallback_from": "worktree",
}
_append_event_trace(
state,
"agent.isolation.fallback",
payload={
"requested_mode": "worktree",
"fallback_mode": "session",
"reason": str(exc),
"tool_names": tool_names,
},
severity="warning",
)
_apply_isolation_payload(state, isolation_payload)
_append_event_trace(
state,
"agent.isolation.selected",
payload=isolation_payload,
)
def _record_response_usage(state: AgentState, response: Any) -> None:
input_tokens, output_tokens = extract_token_usage(response)
if not input_tokens and not output_tokens:
return
current_input_tokens = int(state.get("input_tokens") or 0)
current_output_tokens = int(state.get("output_tokens") or 0)
total_input_tokens = current_input_tokens + input_tokens
total_output_tokens = current_output_tokens + output_tokens
state["input_tokens"] = total_input_tokens
state["output_tokens"] = total_output_tokens
state["estimated_cost"] = estimate_token_cost(total_input_tokens, total_output_tokens)
thresholds = coerce_cost_thresholds(state.get("cost_thresholds"))
state["cost_thresholds"] = thresholds
budget_warning = is_cost_budget_warning(
total_input_tokens,
total_output_tokens,
state.get("estimated_cost"),
thresholds,
)
previous_budget_warning = bool(state.get("budget_warning") or False)
state["budget_warning"] = budget_warning
agent_id = str(state.get("agent_id") or state.get("current_agent") or AgentRole.MASTER.value)
cost_by_agent = {
key: dict(value)
for key, value in dict(state.get("cost_by_agent") or {}).items()
}
agent_totals = dict(cost_by_agent.get(agent_id) or {})
agent_input_tokens = int(agent_totals.get("input_tokens") or 0) + input_tokens
agent_output_tokens = int(agent_totals.get("output_tokens") or 0) + output_tokens
agent_estimated_cost = estimate_token_cost(agent_input_tokens, agent_output_tokens)
cost_by_agent[agent_id] = {
"agent_id": agent_id,
"input_tokens": agent_input_tokens,
"output_tokens": agent_output_tokens,
"total_tokens": agent_input_tokens + agent_output_tokens,
"estimated_cost": agent_estimated_cost,
"budget_warning": is_cost_budget_warning(
agent_input_tokens,
agent_output_tokens,
agent_estimated_cost,
thresholds,
),
}
state["cost_by_agent"] = cost_by_agent
_append_event_trace(
state,
"agent.cost.updated",
payload={
"agent_id": agent_id,
"input_tokens_delta": input_tokens,
"output_tokens_delta": output_tokens,
"input_tokens": total_input_tokens,
"output_tokens": total_output_tokens,
"estimated_cost": state.get("estimated_cost"),
"budget_warning": budget_warning,
},
)
if budget_warning and not previous_budget_warning:
_append_event_trace(
state,
"agent.cost.warning",
payload={
"thresholds": thresholds,
"input_tokens": total_input_tokens,
"output_tokens": total_output_tokens,
"estimated_cost": state.get("estimated_cost"),
},
severity="warning",
)
def _role_values() -> set[str]:
return {role.value for role in AgentRole}
@@ -1120,6 +1301,43 @@ def _append_event_trace(
]
def _set_phase(state: AgentState, phase: str, *, reason: str, payload: dict[str, Any] | None = None) -> None:
if state.get("current_phase") == phase:
return
state["current_phase"] = phase
state["phase_history"] = [
*(state.get("phase_history") or []),
{
"phase": phase,
"reason": reason,
**({"payload": payload} if payload else {}),
},
]
_append_event_trace(
state,
"agent.phase.changed",
payload={"phase": phase, "reason": reason, **(payload or {})},
)
def _record_checkpoint(state: AgentState, checkpoint: str, *, reason: str, payload: dict[str, Any] | None = None) -> None:
state["current_checkpoint"] = checkpoint
state["checkpoint_history"] = [
*(state.get("checkpoint_history") or []),
{
"checkpoint": checkpoint,
"phase": state.get("current_phase"),
"reason": reason,
**({"payload": payload} if payload else {}),
},
]
_append_event_trace(
state,
"agent.checkpoint.recorded",
payload={"checkpoint": checkpoint, "phase": state.get("current_phase"), "reason": reason, **(payload or {})},
)
def _capability_manifest_for_tool(tool_name: str):
indexes = load_builtin_registry_indexes()
capability_id = indexes.capability_id_by_tool_name.get(tool_name)
@@ -1488,6 +1706,10 @@ async def _execute_tool_calls(
"args": normalized_args,
"result_preview": _stringify_message_content(result)[:200],
"verifier_hints": verifier_hints,
"isolation": {
"mode": state.get("isolation_mode"),
"workspace_path": state.get("isolation_workspace_path"),
},
}
state["tool_outcomes"] = [*(state.get("tool_outcomes") or []), tool_outcome]
_append_event_trace(
@@ -1549,6 +1771,13 @@ async def _run_sub_commander(
_record_sub_commander(state, role, sub_commander, user_query)
toolset = SUB_COMMANDER_TOOLSETS.get(sub_commander, []) if use_tools else []
_prepare_isolation_context(
state,
role=role,
sub_commander=sub_commander,
user_query=user_query,
toolset=toolset,
)
if (
role == AgentRole.EXECUTOR
and _is_short_confirmation(user_query)
@@ -1583,6 +1812,7 @@ async def _run_sub_commander(
if _guard_sub_commander_budget(state, "iteration_count", "max_iterations", "max_iterations_exceeded"):
state["iteration_count"] = int(state.get("iteration_count") or 0) + 1
response = await _invoke_llm(llm, working_messages)
_record_response_usage(state, response)
state["final_response"] = _stringify_message_content(response.content)
elif capabilities.supports_native_tools:
state["tool_strategy_used"] = "native"
@@ -1592,6 +1822,7 @@ async def _run_sub_commander(
break
state["iteration_count"] = int(state.get("iteration_count") or 0) + 1
response = await _invoke_llm(bound_llm, working_messages)
_record_response_usage(state, response)
tool_calls = getattr(response, "tool_calls", None) or []
if tool_calls:
if not _guard_sub_commander_budget(state, "tool_round_count", "max_tool_rounds", "max_tool_rounds_exceeded"):
@@ -1653,6 +1884,7 @@ async def _run_sub_commander(
*([retry_instruction] if retry_instruction else []),
],
)
_record_response_usage(state, response)
response_text = _stringify_message_content(response.content)
parsed = _parse_json_action(response_text, allowed_tools)
if parsed is None and response_text.strip() and state.get("tool_round_count"):
@@ -1804,6 +2036,27 @@ def _build_task_evidence(state: AgentState, start_index: int) -> list[dict[str,
else:
evidence = []
if state.get("isolation_mode") and state.get("isolation_mode") != "none":
evidence.append(
{
"type": "isolation",
"mode": state.get("isolation_mode"),
"workspace_path": state.get("isolation_workspace_path"),
"metadata": dict(state.get("isolation_metadata") or {}),
}
)
if state.get("input_tokens") or state.get("output_tokens"):
evidence.append(
{
"type": "cost",
"input_tokens": int(state.get("input_tokens") or 0),
"output_tokens": int(state.get("output_tokens") or 0),
"estimated_cost": state.get("estimated_cost"),
"budget_warning": bool(state.get("budget_warning") or False),
}
)
if state.get("verification_status") or state.get("verification_summary"):
evidence.append(
{
@@ -1846,6 +2099,10 @@ def _collect_task_result(task: AgentTask, state: AgentState, start_tool_index: i
"role": task.role,
"sub_commander": state.get("current_sub_commander"),
"verification_status": state.get("verification_status"),
"isolation_mode": state.get("isolation_mode"),
"isolation_workspace_path": state.get("isolation_workspace_path"),
"estimated_cost": state.get("estimated_cost"),
"budget_warning": bool(state.get("budget_warning") or False),
},
)
@@ -1959,10 +2216,15 @@ def _verify_collaboration_results(
async def _run_collaboration_flow(state: AgentState, user_query: str) -> AgentState:
_set_phase(state, "phase_2_controlled_collaboration", reason="collaboration_flow_started")
_record_checkpoint(state, "collaboration.tasks_planning", reason="collaboration_flow_started")
tasks = _build_collaboration_tasks(user_query)
if len(tasks) < 2:
state["execution_mode"] = "direct"
state["routing_decision"] = {"mode": "direct", "reason": "collaboration_plan_fell_back"}
_record_checkpoint(state, "collaboration.fallback_to_direct", reason="insufficient_tasks", payload={"task_count": len(tasks)})
_set_phase(state, "phase_1_routing", reason="collaboration_flow_abandoned", payload={"task_count": len(tasks)})
_record_checkpoint(state, "routing.direct_resumed", reason="collaboration_flow_abandoned", payload={"task_count": len(tasks)})
return state
base_history = list(state.get("messages", []))
@@ -1988,12 +2250,15 @@ async def _run_collaboration_flow(state: AgentState, user_query: str) -> AgentSt
payload=budget_snapshot,
)
state["active_tasks"] = [task.model_dump(mode="json") for task in tasks]
_record_checkpoint(state, "collaboration.tasks_ready", reason="tasks_built", payload={"task_count": len(tasks)})
parent_task_id = next((task.parent_task_id for task in tasks if task.parent_task_id), None) or "root"
state["task_hierarchy"] = {parent_task_id: [task.task_id for task in tasks]}
state["task_results"] = []
state["next_step"] = None
_set_phase(state, "phase_3_dynamic_collaboration", reason="collaboration_workers_dispatch")
for task in tasks:
_record_checkpoint(state, "collaboration.task_dispatch", reason="dispatch_task", payload={"task_id": task.task_id, "role": task.role})
state["current_agent"] = AgentRole.MASTER.value
state["agent_id"] = coordinator_agent_id
state["parent_agent_id"] = None
@@ -2046,6 +2311,7 @@ async def _run_collaboration_flow(state: AgentState, user_query: str) -> AgentSt
)
task_result = _collect_task_result(task, state, start_tool_index)
_record_checkpoint(state, "collaboration.task_result_collected", reason="task_finished", payload={"task_id": task.task_id, "status": task_result.status})
_append_message_trace(
state,
from_agent_id=child_agent_id,
@@ -2077,6 +2343,8 @@ async def _run_collaboration_flow(state: AgentState, user_query: str) -> AgentSt
state["root_agent_id"] = root_agent_id
state["collaboration_depth"] = 0
state["final_response"] = _build_collaboration_final_response(state.get("task_results") or [])
_set_phase(state, "phase_4_visibility_and_verification", reason="collaboration_verification_started")
_record_checkpoint(state, "collaboration.verification_started", reason="before_verify")
_append_event_trace(
state,
"agent.verify.started",
@@ -2096,6 +2364,7 @@ async def _run_collaboration_flow(state: AgentState, user_query: str) -> AgentSt
},
severity="error" if state.get("verification_status") == "failed" else "info",
)
_record_checkpoint(state, "collaboration.completed", reason="collaboration_flow_finished", payload={"verification_status": state.get("verification_status")})
state["messages"] = [*base_history, AIMessage(content=state["final_response"])]
state["should_respond"] = True
return state
@@ -2114,6 +2383,8 @@ def _stop_due_to_loop_guard(state: AgentState) -> AgentState:
async def master_node(state: AgentState) -> AgentState:
_maybe_reset_turn_budgets(state)
_set_phase(state, "phase_1_routing", reason="master_node_entered")
_record_checkpoint(state, "routing.master_entered", reason="master_node_entered")
user_messages = _filter_user_messages(state["messages"])
user_query = _stringify_message_content(user_messages[-1].content).strip() if user_messages else ""
@@ -2179,6 +2450,7 @@ async def master_node(state: AgentState) -> AgentState:
llm = _get_llm_for_state(state)
response = await _invoke_llm(llm, [SystemMessage(content=MASTER_SYSTEM_PROMPT), *state["messages"]])
_record_response_usage(state, response)
content = _stringify_message_content(response.content).strip()
routed_agent = _route_agent_from_user_query(content)