diff --git a/backend/app/agents/background/executor.py b/backend/app/agents/background/executor.py new file mode 100644 index 0000000..db3506f --- /dev/null +++ b/backend/app/agents/background/executor.py @@ -0,0 +1,220 @@ +"""Background task executor - Phase 10.4""" + +import asyncio +from collections.abc import Callable, Coroutine +from datetime import datetime +from typing import Any + +from .manager import ( + BackgroundTask, + BackgroundTaskManager, + BackgroundTaskStatus, + get_background_task_manager, +) + + +class BackgroundExecutor: + """Executes background tasks with error handling and result storage. + + Provides methods to execute tasks synchronously or asynchronously, + with full integration into BackgroundTaskManager for tracking. + """ + + def __init__(self, task_manager: BackgroundTaskManager | None = None): + """Initialize the executor. + + Args: + task_manager: Optional BackgroundTaskManager instance. + If not provided, uses the global singleton. + """ + self._task_manager = task_manager or get_background_task_manager() + self._executors: dict[str, asyncio.Task] = {} + + async def execute_task( + self, + task_id: str, + func: Callable[..., Coroutine[Any, Any, Any]], + *args: Any, + **kwargs: Any, + ) -> BackgroundTask: + """Execute a specific task by ID. + + Args: + task_id: Unique task identifier + func: Async function to execute + *args: Positional arguments for the function + **kwargs: Keyword arguments for the function + + Returns: + The BackgroundTask with result or error + """ + # Get or create task record + task = self._task_manager.get_task_status(task_id) + if task is None: + # Create a new task record if one doesn't exist + task = BackgroundTask( + id=task_id, + name=f"executor_task_{task_id}", + status=BackgroundTaskStatus.PENDING, + created_at=datetime.now(), + ) + self._task_manager._tasks[task_id] = task + + # Update status to running + task.status = BackgroundTaskStatus.RUNNING + task.started_at = datetime.now() + + try: + # Execute the async function + result = await func(*args, **kwargs) + task.status = BackgroundTaskStatus.COMPLETED + task.result = result + except Exception as e: + task.status = BackgroundTaskStatus.FAILED + task.error = f"{type(e).__name__}: {str(e)}" + task.result = None + finally: + task.completed_at = datetime.now() + # Clean up executor reference + if task_id in self._executors: + del self._executors[task_id] + + return task + + async def execute_async( + self, + task_id: str, + func: Callable[..., Coroutine[Any, Any, Any]], + *args: Any, + **kwargs: Any, + ) -> str: + """Execute a task asynchronously in the background. + + Args: + task_id: Unique task identifier + func: Async function to execute + *args: Positional arguments for the function + **kwargs: Keyword arguments for the function + + Returns: + The task ID + """ + # Create task record if it doesn't exist + if self._task_manager.get_task_status(task_id) is None: + self._task_manager._tasks[task_id] = BackgroundTask( + id=task_id, + name=f"async_task_{task_id}", + status=BackgroundTaskStatus.PENDING, + created_at=datetime.now(), + ) + + # Create and store the asyncio task + async_task = asyncio.create_task(self.execute_task(task_id, func, *args, **kwargs)) + self._executors[task_id] = async_task + + return task_id + + def cancel_task(self, task_id: str) -> bool: + """Cancel a running task. + + Args: + task_id: The task ID to cancel + + Returns: + True if cancelled, False if not found or not running + """ + if task_id not in self._executors: + return False + + self._executors[task_id].cancel() + del self._executors[task_id] + + # Update task status + task = self._task_manager.get_task_status(task_id) + if task: + task.status = BackgroundTaskStatus.CANCELLED + task.completed_at = datetime.now() + return True + return False + + def get_task_result(self, task_id: str) -> Any: + """Get the result of a completed task. + + Args: + task_id: The task ID + + Returns: + The task result or None if not found/not completed + """ + task = self._task_manager.get_task_status(task_id) + if task and task.status == BackgroundTaskStatus.COMPLETED: + return task.result + return None + + def get_task_error(self, task_id: str) -> str | None: + """Get the error of a failed task. + + Args: + task_id: The task ID + + Returns: + The error message or None if not found/not failed + """ + task = self._task_manager.get_task_status(task_id) + if task and task.status == BackgroundTaskStatus.FAILED: + return task.error + return None + + def is_task_running(self, task_id: str) -> bool: + """Check if a task is currently running. + + Args: + task_id: The task ID + + Returns: + True if running, False otherwise + """ + return task_id in self._executors + + def wait_for_task(self, task_id: str, timeout: float | None = None) -> BackgroundTask: + """Wait for a task to complete. + + Args: + task_id: The task ID to wait for + timeout: Optional timeout in seconds + + Returns: + The completed BackgroundTask + + Raises: + asyncio.TimeoutError: If task doesn't complete within timeout + asyncio.CancelledError: If task is cancelled + """ + if task_id not in self._executors: + task = self._task_manager.get_task_status(task_id) + if task: + return task + raise ValueError(f"Task {task_id} not found") + + async def wait_task() -> BackgroundTask: + await self._executors[task_id] + return self._task_manager.get_task_status(task_id) + + return asyncio.run_until_complete(asyncio.wait_for(wait_task(), timeout=timeout)) + + @property + def task_manager(self) -> BackgroundTaskManager: + """Get the underlying task manager.""" + return self._task_manager + + +# Global executor instance +_executor: BackgroundExecutor | None = None + + +def get_background_executor() -> BackgroundExecutor: + """Get the global BackgroundExecutor instance.""" + global _executor + if _executor is None: + _executor = BackgroundExecutor() + return _executor diff --git a/backend/app/agents/background/scheduler.py b/backend/app/agents/background/scheduler.py new file mode 100644 index 0000000..21ccf45 --- /dev/null +++ b/backend/app/agents/background/scheduler.py @@ -0,0 +1,146 @@ +"""Background task scheduler - Phase 10.4""" + +from collections.abc import Callable, Coroutine +from typing import Any + +from apscheduler.schedulers.asyncio import AsyncIOScheduler +from apscheduler.triggers.base import BaseTrigger + +from .manager import BackgroundTaskManager, get_background_task_manager + + +class BackgroundScheduler: + """Background task scheduler using APScheduler. + + Integrates with BackgroundTaskManager for task tracking and execution. + """ + + def __init__(self, task_manager: BackgroundTaskManager | None = None): + """Initialize the scheduler. + + Args: + task_manager: Optional BackgroundTaskManager instance. + If not provided, uses the global singleton. + """ + self._scheduler = AsyncIOScheduler() + self._task_manager = task_manager or get_background_task_manager() + self._job_tasks: dict[str, str] = {} # Maps APScheduler job_id to task_id + + def add_job( + self, + func: Callable[..., Coroutine[Any, Any, Any]], + trigger: BaseTrigger, + args: tuple[Any, ...] | None = None, + kwargs: dict[str, Any] | None = None, + id: str | None = None, + name: str | None = None, + **apscheduler_kwargs: Any, + ) -> str: + """Add a job to the scheduler. + + Args: + func: Async function to execute + trigger: APScheduler trigger (date, interval, cron, etc.) + args: Positional arguments for the function + kwargs: Keyword arguments for the function + id: Unique job ID (auto-generated if not provided) + name: Job name for display purposes + **apscheduler_kwargs: Additional APScheduler options + + Returns: + The job ID + """ + job_id = id or f"job_{len(self._job_tasks)}" + task_name = name or f"scheduled_task_{job_id}" + + # Wrap the async function to integrate with BackgroundTaskManager + async def wrapped_func() -> None: + coro = func(*(args or ()), **(kwargs or {})) + task_id = self._task_manager.submit_task(task_name, coro) + self._job_tasks[job_id] = task_id + + self._scheduler.add_job( + wrapped_func, + trigger=trigger, + id=job_id, + name=task_name, + **apscheduler_kwargs, + ) + + return job_id + + def remove_job(self, job_id: str) -> bool: + """Remove a job from the scheduler. + + Args: + job_id: The ID of the job to remove + + Returns: + True if job was removed, False if job didn't exist + """ + try: + self._scheduler.remove_job(job_id) + # Clean up task mapping if exists + if job_id in self._job_tasks: + task_id = self._job_tasks.pop(job_id) + # Cancel the background task if still running + self._task_manager.cancel_task(task_id) + return True + except Exception: + return False + + def list_jobs(self) -> list[dict[str, Any]]: + """List all scheduled jobs. + + Returns: + List of job information dictionaries + """ + jobs = self._scheduler.get_jobs() + return [ + { + "id": job.id, + "name": job.name, + "next_run_time": job.next_run_time, + "trigger": str(job.trigger), + } + for job in jobs + ] + + def start(self) -> None: + """Start the scheduler.""" + if not self._scheduler.running: + self._scheduler.start() + + def shutdown(self, wait: bool = True) -> None: + """Shutdown the scheduler. + + Args: + wait: Whether to wait for running jobs to complete + """ + if self._scheduler.running: + self._scheduler.shutdown(wait=wait) + + def pause(self) -> None: + """Pause the scheduler.""" + self._scheduler.pause() + + def resume(self) -> None: + """Resume the scheduler.""" + self._scheduler.resume() + + @property + def task_manager(self) -> BackgroundTaskManager: + """Get the underlying task manager.""" + return self._task_manager + + +# Global scheduler instance +_scheduler: BackgroundScheduler | None = None + + +def get_background_scheduler() -> BackgroundScheduler: + """Get the global BackgroundScheduler instance.""" + global _scheduler + if _scheduler is None: + _scheduler = BackgroundScheduler() + return _scheduler diff --git a/backend/app/agents/coordinator.py b/backend/app/agents/coordinator.py new file mode 100644 index 0000000..98cb09a --- /dev/null +++ b/backend/app/agents/coordinator.py @@ -0,0 +1,508 @@ +"""Agent 协调整器 - Phase 10.5 + +统一协调所有 Agent 组件:TeamLeader, RemoteTransport, BackgroundTaskManager, SessionManager +""" + +from typing import Any + +from app.agents.background.manager import BackgroundTaskManager, get_background_task_manager +from app.agents.session.manager import AgentSession, create_agent_session, get_agent_session +from app.agents.team.leader import TeamLeader +from app.agents.transport.remote import RemoteTransport + + +class AgentCoordinator: + """Agent 协调整器 + + 统一协调所有 Agent 组件,提供单一入口处理各类 Agent 操作。 + """ + + def __init__( + self, + background_manager: BackgroundTaskManager | None = None, + ): + """ + Args: + background_manager: 后台任务管理器,None 则使用全局单例 + """ + self._team_leaders: dict[str, TeamLeader] = {} + self._remote_transport = RemoteTransport() + self._background_manager = background_manager or get_background_task_manager() + self._sessions: dict[str, AgentSession] = {} + + # === Team 协作方法 === + + def create_team(self, team_id: str, members: list[str]) -> dict[str, Any]: + """创建团队 + + Args: + team_id: 团队 ID + members: 成员 ID 列表 + + Returns: + 团队创建结果 + """ + if team_id in self._team_leaders: + return {"status": "error", "message": f"Team '{team_id}' already exists"} + + leader = TeamLeader(team_id=team_id, members=members) + self._team_leaders[team_id] = leader + return { + "status": "created", + "team_id": team_id, + "members": members, + } + + def get_team(self, team_id: str) -> TeamLeader | None: + """获取团队 + + Args: + team_id: 团队 ID + + Returns: + TeamLeader 或 None + """ + return self._team_leaders.get(team_id) + + def assign_task(self, team_id: str, description: str, member: str) -> dict[str, Any]: + """创建并分配任务 + + Args: + team_id: 团队 ID + description: 任务描述 + member: 成员 ID + + Returns: + 分配结果 + """ + leader = self._team_leaders.get(team_id) + if not leader: + return {"status": "error", "message": f"Team '{team_id}' not found"} + + task_id = leader.create_task(description) + success = leader.assign_task(task_id, member) + return { + "status": "assigned" if success else "error", + "task_id": task_id, + "assignee": member, + } + + def broadcast_task(self, team_id: str, description: str) -> dict[str, Any]: + """广播任务给所有成员 + + Args: + team_id: 团队 ID + description: 任务描述 + + Returns: + 广播结果 + """ + leader = self._team_leaders.get(team_id) + if not leader: + return {"status": "error", "message": f"Team '{team_id}' not found"} + + task_ids = leader.broadcast_task(description) + return { + "status": "broadcast", + "team_id": team_id, + "task_ids": task_ids, + "member_count": len(leader.members), + } + + def collect_team_results(self, team_id: str) -> dict[str, Any]: + """收集团队任务结果 + + Args: + team_id: 团队 ID + + Returns: + 收集结果 + """ + leader = self._team_leaders.get(team_id) + if not leader: + return {"status": "error", "message": f"Team '{team_id}' not found"} + + results = leader.collect_results() + status = leader.get_team_status() + return { + "status": "collected", + "team_id": team_id, + "results": results, + "completed": status["completed"], + "failed": status["failed"], + } + + def get_team_status(self, team_id: str) -> dict[str, Any]: + """获取团队状态 + + Args: + team_id: 团队 ID + + Returns: + 团队状态 + """ + leader = self._team_leaders.get(team_id) + if not leader: + return {"status": "error", "message": f"Team '{team_id}' not found"} + + return leader.get_team_status() + + # === 后台任务方法 === + + def submit_background_task( + self, + name: str, + coro: Any, + *args, + **kwargs, + ) -> dict[str, Any]: + """提交后台任务 + + Args: + name: 任务名称 + coro: 协程函数 + *args: 位置参数 + **kwargs: 关键字参数 + + Returns: + 提交结果 + """ + task_id = self._background_manager.submit_task(name, coro, *args, **kwargs) + return { + "status": "submitted", + "task_id": task_id, + "name": name, + } + + def cancel_background_task(self, task_id: str) -> dict[str, Any]: + """取消后台任务 + + Args: + task_id: 任务 ID + + Returns: + 取消结果 + """ + success = self._background_manager.cancel_task(task_id) + return { + "status": "cancelled" if success else "error", + "task_id": task_id, + } + + def get_background_task_status(self, task_id: str) -> dict[str, Any]: + """获取后台任务状态 + + Args: + task_id: 任务 ID + + Returns: + 任务状态 + """ + task = self._background_manager.get_task_status(task_id) + if not task: + return {"status": "error", "message": f"Task '{task_id}' not found"} + + return { + "status": "found", + "task_id": task.id, + "name": task.name, + "task_status": task.status.value, + "result": task.result, + "error": task.error, + } + + def list_background_tasks(self) -> dict[str, Any]: + """列出所有后台任务 + + Returns: + 任务列表 + """ + tasks = self._background_manager.list_tasks() + return { + "status": "list", + "count": len(tasks), + "tasks": [ + { + "id": t.id, + "name": t.name, + "status": t.status.value, + } + for t in tasks + ], + } + + # === 会话方法 === + + def create_session( + self, + user_id: str | None = None, + parent_session_id: str | None = None, + ) -> dict[str, Any]: + """创建会话 + + Args: + user_id: 用户 ID + parent_session_id: 父会话 ID + + Returns: + 创建结果 + """ + session = create_agent_session( + user_id=user_id, + parent_session_id=parent_session_id, + ) + self._sessions[session.session_id] = session + return { + "status": "created", + "session_id": session.session_id, + "user_id": user_id, + "parent_session_id": parent_session_id, + } + + def get_session(self, session_id: str) -> AgentSession | None: + """获取会话 + + Args: + session_id: 会话 ID + + Returns: + AgentSession 或 None + """ + return self._sessions.get(session_id) or get_agent_session(session_id) + + async def process_session_message( + self, + session_id: str, + message: str, + response: str, + ) -> dict[str, Any]: + """处理会话消息 + + Args: + session_id: 会话 ID + message: 用户消息 + response: 助手响应 + + Returns: + 处理结果 + """ + session = self.get_session(session_id) + if not session: + return {"status": "error", "message": f"Session '{session_id}' not found"} + + await session.process_message(message, response) + return { + "status": "processed", + "session_id": session_id, + "message_count": session.context.message_count, + } + + async def spawn_child_session( + self, + session_id: str, + user_id: str | None = None, + ) -> dict[str, Any]: + """创建子会话 + + Args: + session_id: 父会话 ID + user_id: 用户 ID + + Returns: + 创建结果 + """ + session = self.get_session(session_id) + if not session: + return {"status": "error", "message": f"Session '{session_id}' not found"} + + child = await session.spawn_child_session(user_id=user_id) + self._sessions[child.session_id] = child + return { + "status": "spawned", + "parent_session_id": session_id, + "child_session_id": child.session_id, + "depth": child.context.depth, + } + + def get_session_summary(self, session_id: str) -> dict[str, Any]: + """获取会话摘要 + + Args: + session_id: 会话 ID + + Returns: + 会话摘要 + """ + import asyncio + + session = self.get_session(session_id) + if not session: + return {"status": "error", "message": f"Session '{session_id}' not found"} + + # get_session_summary is async, so we need to run it + try: + loop = asyncio.get_event_loop() + if loop.is_running(): + # Create a future + future = asyncio.ensure_future(session.get_session_summary()) + return {"status": "found", "summary": future} + else: + return { + "status": "found", + "summary": loop.run_until_complete(session.get_session_summary()), + } + except RuntimeError: + # No event loop, create one + return {"status": "found", "summary": asyncio.run(session.get_session_summary())} + + # === 远程传输方法 === + + def register_remote_handler(self, event_type: str, handler: Any) -> None: + """注册远程消息处理器 + + Args: + event_type: 事件类型 + handler: 处理函数 + """ + self._remote_transport.register_handler(event_type, handler) + + async def send_remote_response( + self, + session_id: str, + response: dict[str, Any], + ) -> bool: + """发送远程响应 + + Args: + session_id: 会话 ID + response: 响应数据 + + Returns: + 是否发送成功 + """ + return await self._remote_transport.send_response(session_id, response) + + async def send_remote_event( + self, + session_id: str, + event: dict[str, Any], + ) -> bool: + """发送远程事件 + + Args: + session_id: 会话 ID + event: 事件数据 + + Returns: + 是否发送成功 + """ + return await self._remote_transport.send_event(session_id, event) + + async def send_remote_tool_call( + self, + session_id: str, + tool_call: dict[str, Any], + ) -> bool: + """发送远程工具调用 + + Args: + session_id: 会话 ID + tool_call: 工具调用数据 + + Returns: + 是否发送成功 + """ + return await self._remote_transport.send_tool_call(session_id, tool_call) + + # === 统一协调入口 === + + async def coordinate(self, request: dict[str, Any]) -> dict[str, Any]: + """统一协调入口 + + 根据请求类型协调各类 Agent 操作。 + + Args: + request: 请求数据,包含: + - action: 操作类型 (team_create, team_assign, task_submit, session_create, etc.) + - 其他参数根据 action 不同而不同 + + Returns: + 协调结果 + """ + action = request.get("action") + + if action == "team_create": + return self.create_team( + team_id=request["team_id"], + members=request["members"], + ) + + elif action == "team_assign": + return self.assign_task( + team_id=request["team_id"], + description=request["description"], + member=request["member"], + ) + + elif action == "team_broadcast": + return self.broadcast_task( + team_id=request["team_id"], + description=request["description"], + ) + + elif action == "team_collect": + return self.collect_team_results(team_id=request["team_id"]) + + elif action == "team_status": + return self.get_team_status(team_id=request["team_id"]) + + elif action == "task_submit": + return self.submit_background_task( + name=request["name"], + coro=request["coro"], + *request.get("args", []), + **request.get("kwargs", {}), + ) + + elif action == "task_cancel": + return self.cancel_background_task(task_id=request["task_id"]) + + elif action == "task_status": + return self.get_background_task_status(task_id=request["task_id"]) + + elif action == "session_create": + return self.create_session( + user_id=request.get("user_id"), + parent_session_id=request.get("parent_session_id"), + ) + + elif action == "session_message": + return await self.process_session_message( + session_id=request["session_id"], + message=request["message"], + response=request["response"], + ) + + elif action == "session_spawn": + return await self.spawn_child_session( + session_id=request["session_id"], + user_id=request.get("user_id"), + ) + + elif action == "session_summary": + return self.get_session_summary(session_id=request["session_id"]) + + else: + return {"status": "error", "message": f"Unknown action: {action}"} + + +# 全局单例 +_coordinator: AgentCoordinator | None = None + + +def get_agent_coordinator() -> AgentCoordinator: + """获取全局 Agent 协调整器""" + global _coordinator + if _coordinator is None: + _coordinator = AgentCoordinator() + return _coordinator diff --git a/backend/app/agents/graph.py b/backend/app/agents/graph.py index a16b613..baffa5c 100644 --- a/backend/app/agents/graph.py +++ b/backend/app/agents/graph.py @@ -12,6 +12,12 @@ from typing import Any, Literal, cast from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage, ToolMessage from langgraph.graph import END, StateGraph +from app.agents.isolation import ( + WorktreeIsolationError, + prepare_session_isolation, + prepare_worktree_isolation, + select_isolation_strategy, +) from app.agents.prompts import ( ANALYST_SYSTEM_PROMPT, COORDINATOR_SYSTEM_PROMPT, @@ -22,6 +28,12 @@ from app.agents.prompts import ( SCHEDULE_PLANNER_SYSTEM_PROMPT, ) from app.agents.registry import load_builtin_registry_indexes +from app.agents.runtime_metrics import ( + coerce_cost_thresholds, + estimate_token_cost, + extract_token_usage, + is_cost_budget_warning, +) from app.agents.schemas.event import AgentEvent from app.agents.schemas.message import AgentMessage from app.agents.schemas.task import AgentTask, CollaborationBudget, InterruptRecord, RecoveryRecord, TaskResult @@ -193,6 +205,175 @@ def _get_state_int(state: AgentState, key: str) -> int: return value if isinstance(value, int) else 0 +def _clear_isolation_state(state: AgentState) -> None: + state["isolation_mode"] = "none" + state["isolation_id"] = None + state["isolation_workspace_path"] = None + state["isolation_parent_conversation_id"] = None + state["isolation_metadata"] = {} + + +def _apply_isolation_payload(state: AgentState, payload: dict[str, Any]) -> None: + state["isolation_mode"] = str(payload.get("mode") or "none") + state["isolation_id"] = str(payload.get("isolation_id") or "") or None + state["isolation_workspace_path"] = str(payload.get("workspace_path") or "") or None + state["isolation_parent_conversation_id"] = str(payload.get("parent_conversation_id") or "") or None + state["isolation_metadata"] = dict(payload.get("metadata") or {}) + + +def _prepare_isolation_context( + state: AgentState, + *, + role: AgentRole, + sub_commander: str, + user_query: str, + toolset: list[Any], +) -> None: + tool_names = [tool.name for tool in toolset] + decision = select_isolation_strategy( + user_query=user_query, + tool_names=tool_names, + role_value=role.value, + execution_mode=str(state.get("execution_mode") or "direct"), + ) + if decision.mode == "none": + _clear_isolation_state(state) + _append_event_trace( + state, + "agent.isolation.selected", + payload={"mode": "none", "reason": decision.reason, "tool_names": tool_names}, + ) + return + + if decision.mode == "session": + isolation_payload = prepare_session_isolation( + state=state, + decision=decision, + role_value=role.value, + sub_commander=sub_commander, + ) + _apply_isolation_payload(state, isolation_payload) + _append_event_trace( + state, + "agent.isolation.selected", + payload=isolation_payload, + ) + return + + try: + isolation_payload = prepare_worktree_isolation( + state=state, + decision=decision, + role_value=role.value, + sub_commander=sub_commander, + ) + except WorktreeIsolationError as exc: + isolation_payload = prepare_session_isolation( + state=state, + decision=decision, + role_value=role.value, + sub_commander=sub_commander, + ) + isolation_payload["metadata"] = { + **dict(isolation_payload.get("metadata") or {}), + "fallback_reason": str(exc), + "fallback_from": "worktree", + } + _append_event_trace( + state, + "agent.isolation.fallback", + payload={ + "requested_mode": "worktree", + "fallback_mode": "session", + "reason": str(exc), + "tool_names": tool_names, + }, + severity="warning", + ) + + _apply_isolation_payload(state, isolation_payload) + _append_event_trace( + state, + "agent.isolation.selected", + payload=isolation_payload, + ) + + +def _record_response_usage(state: AgentState, response: Any) -> None: + input_tokens, output_tokens = extract_token_usage(response) + if not input_tokens and not output_tokens: + return + + current_input_tokens = int(state.get("input_tokens") or 0) + current_output_tokens = int(state.get("output_tokens") or 0) + total_input_tokens = current_input_tokens + input_tokens + total_output_tokens = current_output_tokens + output_tokens + state["input_tokens"] = total_input_tokens + state["output_tokens"] = total_output_tokens + state["estimated_cost"] = estimate_token_cost(total_input_tokens, total_output_tokens) + + thresholds = coerce_cost_thresholds(state.get("cost_thresholds")) + state["cost_thresholds"] = thresholds + budget_warning = is_cost_budget_warning( + total_input_tokens, + total_output_tokens, + state.get("estimated_cost"), + thresholds, + ) + previous_budget_warning = bool(state.get("budget_warning") or False) + state["budget_warning"] = budget_warning + + agent_id = str(state.get("agent_id") or state.get("current_agent") or AgentRole.MASTER.value) + cost_by_agent = { + key: dict(value) + for key, value in dict(state.get("cost_by_agent") or {}).items() + } + agent_totals = dict(cost_by_agent.get(agent_id) or {}) + agent_input_tokens = int(agent_totals.get("input_tokens") or 0) + input_tokens + agent_output_tokens = int(agent_totals.get("output_tokens") or 0) + output_tokens + agent_estimated_cost = estimate_token_cost(agent_input_tokens, agent_output_tokens) + cost_by_agent[agent_id] = { + "agent_id": agent_id, + "input_tokens": agent_input_tokens, + "output_tokens": agent_output_tokens, + "total_tokens": agent_input_tokens + agent_output_tokens, + "estimated_cost": agent_estimated_cost, + "budget_warning": is_cost_budget_warning( + agent_input_tokens, + agent_output_tokens, + agent_estimated_cost, + thresholds, + ), + } + state["cost_by_agent"] = cost_by_agent + + _append_event_trace( + state, + "agent.cost.updated", + payload={ + "agent_id": agent_id, + "input_tokens_delta": input_tokens, + "output_tokens_delta": output_tokens, + "input_tokens": total_input_tokens, + "output_tokens": total_output_tokens, + "estimated_cost": state.get("estimated_cost"), + "budget_warning": budget_warning, + }, + ) + if budget_warning and not previous_budget_warning: + _append_event_trace( + state, + "agent.cost.warning", + payload={ + "thresholds": thresholds, + "input_tokens": total_input_tokens, + "output_tokens": total_output_tokens, + "estimated_cost": state.get("estimated_cost"), + }, + severity="warning", + ) + + def _role_values() -> set[str]: return {role.value for role in AgentRole} @@ -1120,6 +1301,43 @@ def _append_event_trace( ] +def _set_phase(state: AgentState, phase: str, *, reason: str, payload: dict[str, Any] | None = None) -> None: + if state.get("current_phase") == phase: + return + state["current_phase"] = phase + state["phase_history"] = [ + *(state.get("phase_history") or []), + { + "phase": phase, + "reason": reason, + **({"payload": payload} if payload else {}), + }, + ] + _append_event_trace( + state, + "agent.phase.changed", + payload={"phase": phase, "reason": reason, **(payload or {})}, + ) + + +def _record_checkpoint(state: AgentState, checkpoint: str, *, reason: str, payload: dict[str, Any] | None = None) -> None: + state["current_checkpoint"] = checkpoint + state["checkpoint_history"] = [ + *(state.get("checkpoint_history") or []), + { + "checkpoint": checkpoint, + "phase": state.get("current_phase"), + "reason": reason, + **({"payload": payload} if payload else {}), + }, + ] + _append_event_trace( + state, + "agent.checkpoint.recorded", + payload={"checkpoint": checkpoint, "phase": state.get("current_phase"), "reason": reason, **(payload or {})}, + ) + + def _capability_manifest_for_tool(tool_name: str): indexes = load_builtin_registry_indexes() capability_id = indexes.capability_id_by_tool_name.get(tool_name) @@ -1488,6 +1706,10 @@ async def _execute_tool_calls( "args": normalized_args, "result_preview": _stringify_message_content(result)[:200], "verifier_hints": verifier_hints, + "isolation": { + "mode": state.get("isolation_mode"), + "workspace_path": state.get("isolation_workspace_path"), + }, } state["tool_outcomes"] = [*(state.get("tool_outcomes") or []), tool_outcome] _append_event_trace( @@ -1549,6 +1771,13 @@ async def _run_sub_commander( _record_sub_commander(state, role, sub_commander, user_query) toolset = SUB_COMMANDER_TOOLSETS.get(sub_commander, []) if use_tools else [] + _prepare_isolation_context( + state, + role=role, + sub_commander=sub_commander, + user_query=user_query, + toolset=toolset, + ) if ( role == AgentRole.EXECUTOR and _is_short_confirmation(user_query) @@ -1583,6 +1812,7 @@ async def _run_sub_commander( if _guard_sub_commander_budget(state, "iteration_count", "max_iterations", "max_iterations_exceeded"): state["iteration_count"] = int(state.get("iteration_count") or 0) + 1 response = await _invoke_llm(llm, working_messages) + _record_response_usage(state, response) state["final_response"] = _stringify_message_content(response.content) elif capabilities.supports_native_tools: state["tool_strategy_used"] = "native" @@ -1592,6 +1822,7 @@ async def _run_sub_commander( break state["iteration_count"] = int(state.get("iteration_count") or 0) + 1 response = await _invoke_llm(bound_llm, working_messages) + _record_response_usage(state, response) tool_calls = getattr(response, "tool_calls", None) or [] if tool_calls: if not _guard_sub_commander_budget(state, "tool_round_count", "max_tool_rounds", "max_tool_rounds_exceeded"): @@ -1653,6 +1884,7 @@ async def _run_sub_commander( *([retry_instruction] if retry_instruction else []), ], ) + _record_response_usage(state, response) response_text = _stringify_message_content(response.content) parsed = _parse_json_action(response_text, allowed_tools) if parsed is None and response_text.strip() and state.get("tool_round_count"): @@ -1804,6 +2036,27 @@ def _build_task_evidence(state: AgentState, start_index: int) -> list[dict[str, else: evidence = [] + if state.get("isolation_mode") and state.get("isolation_mode") != "none": + evidence.append( + { + "type": "isolation", + "mode": state.get("isolation_mode"), + "workspace_path": state.get("isolation_workspace_path"), + "metadata": dict(state.get("isolation_metadata") or {}), + } + ) + + if state.get("input_tokens") or state.get("output_tokens"): + evidence.append( + { + "type": "cost", + "input_tokens": int(state.get("input_tokens") or 0), + "output_tokens": int(state.get("output_tokens") or 0), + "estimated_cost": state.get("estimated_cost"), + "budget_warning": bool(state.get("budget_warning") or False), + } + ) + if state.get("verification_status") or state.get("verification_summary"): evidence.append( { @@ -1846,6 +2099,10 @@ def _collect_task_result(task: AgentTask, state: AgentState, start_tool_index: i "role": task.role, "sub_commander": state.get("current_sub_commander"), "verification_status": state.get("verification_status"), + "isolation_mode": state.get("isolation_mode"), + "isolation_workspace_path": state.get("isolation_workspace_path"), + "estimated_cost": state.get("estimated_cost"), + "budget_warning": bool(state.get("budget_warning") or False), }, ) @@ -1959,10 +2216,15 @@ def _verify_collaboration_results( async def _run_collaboration_flow(state: AgentState, user_query: str) -> AgentState: + _set_phase(state, "phase_2_controlled_collaboration", reason="collaboration_flow_started") + _record_checkpoint(state, "collaboration.tasks_planning", reason="collaboration_flow_started") tasks = _build_collaboration_tasks(user_query) if len(tasks) < 2: state["execution_mode"] = "direct" state["routing_decision"] = {"mode": "direct", "reason": "collaboration_plan_fell_back"} + _record_checkpoint(state, "collaboration.fallback_to_direct", reason="insufficient_tasks", payload={"task_count": len(tasks)}) + _set_phase(state, "phase_1_routing", reason="collaboration_flow_abandoned", payload={"task_count": len(tasks)}) + _record_checkpoint(state, "routing.direct_resumed", reason="collaboration_flow_abandoned", payload={"task_count": len(tasks)}) return state base_history = list(state.get("messages", [])) @@ -1988,12 +2250,15 @@ async def _run_collaboration_flow(state: AgentState, user_query: str) -> AgentSt payload=budget_snapshot, ) state["active_tasks"] = [task.model_dump(mode="json") for task in tasks] + _record_checkpoint(state, "collaboration.tasks_ready", reason="tasks_built", payload={"task_count": len(tasks)}) parent_task_id = next((task.parent_task_id for task in tasks if task.parent_task_id), None) or "root" state["task_hierarchy"] = {parent_task_id: [task.task_id for task in tasks]} state["task_results"] = [] state["next_step"] = None + _set_phase(state, "phase_3_dynamic_collaboration", reason="collaboration_workers_dispatch") for task in tasks: + _record_checkpoint(state, "collaboration.task_dispatch", reason="dispatch_task", payload={"task_id": task.task_id, "role": task.role}) state["current_agent"] = AgentRole.MASTER.value state["agent_id"] = coordinator_agent_id state["parent_agent_id"] = None @@ -2046,6 +2311,7 @@ async def _run_collaboration_flow(state: AgentState, user_query: str) -> AgentSt ) task_result = _collect_task_result(task, state, start_tool_index) + _record_checkpoint(state, "collaboration.task_result_collected", reason="task_finished", payload={"task_id": task.task_id, "status": task_result.status}) _append_message_trace( state, from_agent_id=child_agent_id, @@ -2077,6 +2343,8 @@ async def _run_collaboration_flow(state: AgentState, user_query: str) -> AgentSt state["root_agent_id"] = root_agent_id state["collaboration_depth"] = 0 state["final_response"] = _build_collaboration_final_response(state.get("task_results") or []) + _set_phase(state, "phase_4_visibility_and_verification", reason="collaboration_verification_started") + _record_checkpoint(state, "collaboration.verification_started", reason="before_verify") _append_event_trace( state, "agent.verify.started", @@ -2096,6 +2364,7 @@ async def _run_collaboration_flow(state: AgentState, user_query: str) -> AgentSt }, severity="error" if state.get("verification_status") == "failed" else "info", ) + _record_checkpoint(state, "collaboration.completed", reason="collaboration_flow_finished", payload={"verification_status": state.get("verification_status")}) state["messages"] = [*base_history, AIMessage(content=state["final_response"])] state["should_respond"] = True return state @@ -2114,6 +2383,8 @@ def _stop_due_to_loop_guard(state: AgentState) -> AgentState: async def master_node(state: AgentState) -> AgentState: _maybe_reset_turn_budgets(state) + _set_phase(state, "phase_1_routing", reason="master_node_entered") + _record_checkpoint(state, "routing.master_entered", reason="master_node_entered") user_messages = _filter_user_messages(state["messages"]) user_query = _stringify_message_content(user_messages[-1].content).strip() if user_messages else "" @@ -2179,6 +2450,7 @@ async def master_node(state: AgentState) -> AgentState: llm = _get_llm_for_state(state) response = await _invoke_llm(llm, [SystemMessage(content=MASTER_SYSTEM_PROMPT), *state["messages"]]) + _record_response_usage(state, response) content = _stringify_message_content(response.content).strip() routed_agent = _route_agent_from_user_query(content) diff --git a/backend/app/agents/isolation/__init__.py b/backend/app/agents/isolation/__init__.py new file mode 100644 index 0000000..3293bf2 --- /dev/null +++ b/backend/app/agents/isolation/__init__.py @@ -0,0 +1,14 @@ +from app.agents.isolation.session_isolation import prepare_session_isolation +from app.agents.isolation.strategy_selector import IsolationDecision, select_isolation_strategy +from app.agents.isolation.worktree_isolation import ( + WorktreeIsolationError, + prepare_worktree_isolation, +) + +__all__ = [ + "IsolationDecision", + "WorktreeIsolationError", + "prepare_session_isolation", + "prepare_worktree_isolation", + "select_isolation_strategy", +] diff --git a/backend/app/agents/isolation/session_isolation.py b/backend/app/agents/isolation/session_isolation.py new file mode 100644 index 0000000..88ba773 --- /dev/null +++ b/backend/app/agents/isolation/session_isolation.py @@ -0,0 +1,31 @@ +from __future__ import annotations + +from typing import Any +from uuid import uuid4 + +from app.agents.isolation.strategy_selector import IsolationDecision + + +def prepare_session_isolation( + *, + state: dict[str, Any], + decision: IsolationDecision, + role_value: str, + sub_commander: str, +) -> dict[str, Any]: + isolation_id = f"session-{uuid4().hex[:8]}" + return { + "mode": "session", + "isolation_id": isolation_id, + "workspace_path": None, + "parent_conversation_id": str(state.get("conversation_id") or "") or None, + "metadata": { + **dict(decision.metadata or {}), + "reason": decision.reason, + "role": role_value, + "sub_commander": sub_commander, + "tool_names": list(decision.tool_names), + "capability_ids": list(decision.capability_ids), + "status": "active", + }, + } diff --git a/backend/app/agents/isolation/strategy_selector.py b/backend/app/agents/isolation/strategy_selector.py new file mode 100644 index 0000000..97364f1 --- /dev/null +++ b/backend/app/agents/isolation/strategy_selector.py @@ -0,0 +1,147 @@ +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any, Literal + +from app.agents.registry import load_builtin_registry_indexes +from app.agents.registry.models import CapabilityManifest, PermissionClass, SideEffectScope + + +IsolationMode = Literal["none", "session", "worktree"] + +_WORKTREE_QUERY_MARKERS = ( + "code", + "repo", + "repository", + "git", + "worktree", + "branch", + "patch", + "diff", + "refactor", + "build", + "test", + "fix", + "file", + "files", + "python", + "typescript", + "javascript", + "代码", + "仓库", + "分支", + "补丁", + "重构", + "构建", + "测试", + "修复", + "文件", +) + + +@dataclass(frozen=True) +class IsolationDecision: + mode: IsolationMode + reason: str + tool_names: tuple[str, ...] = () + capability_ids: tuple[str, ...] = () + metadata: dict[str, Any] = field(default_factory=dict) + + +def _capability_metadata(capability: CapabilityManifest | None) -> dict[str, Any]: + if capability is None: + return {} + return { + "capability_id": capability.capability_id, + "tool_name": capability.tool_name, + "permission_class": capability.permission_class.value, + "side_effect_scope": capability.side_effect_scope.value, + "supports_retry": capability.supports_retry, + "idempotent": capability.idempotent, + "safe_for_parallel_use": capability.safe_for_parallel_use, + "requires_confirmation": capability.requires_confirmation, + } + + +def select_isolation_strategy( + *, + user_query: str, + tool_names: list[str] | tuple[str, ...], + role_value: str, + execution_mode: str | None, +) -> IsolationDecision: + indexes = load_builtin_registry_indexes() + capabilities: list[CapabilityManifest] = [] + capability_ids: list[str] = [] + + for tool_name in tool_names: + capability_id = indexes.capability_id_by_tool_name.get(tool_name) + capability = indexes.capability_by_id.get(capability_id) if capability_id else None + if capability is not None: + capabilities.append(capability) + capability_ids.append(capability.capability_id) + + normalized_query = (user_query or "").strip().lower() + has_worktree_query_signal = any(marker in normalized_query for marker in _WORKTREE_QUERY_MARKERS) + has_write_capability = any(cap.permission_class == PermissionClass.WRITE for cap in capabilities) + has_external_capability = any(cap.permission_class == PermissionClass.EXTERNAL for cap in capabilities) + has_non_parallel_capability = any(not cap.safe_for_parallel_use for cap in capabilities) + has_stateful_side_effect = any( + cap.side_effect_scope in {SideEffectScope.LOCAL_STATE, SideEffectScope.DB_WRITE} + for cap in capabilities + ) + + metadata = { + "role": role_value, + "execution_mode": execution_mode, + "capabilities": [_capability_metadata(capability) for capability in capabilities], + "workspace_strategy": "inline", + "risk_level": "low", + } + + if has_worktree_query_signal: + return IsolationDecision( + mode="worktree", + reason="workspace_mutation_signals_detected", + tool_names=tuple(tool_names), + capability_ids=tuple(capability_ids), + metadata={ + **metadata, + "workspace_strategy": "ephemeral_worktree", + "risk_level": "high", + }, + ) + + if has_write_capability or has_stateful_side_effect or has_non_parallel_capability: + return IsolationDecision( + mode="session", + reason="stateful_or_non_parallel_tooling", + tool_names=tuple(tool_names), + capability_ids=tuple(capability_ids), + metadata={ + **metadata, + "workspace_strategy": "isolated_session", + "risk_level": "medium", + }, + ) + + if execution_mode == "collaboration" or role_value in {"analyst", "librarian"} or has_external_capability: + return IsolationDecision( + mode="session", + reason="context_heavy_or_external_retrieval", + tool_names=tuple(tool_names), + capability_ids=tuple(capability_ids), + metadata={ + **metadata, + "workspace_strategy": "isolated_session", + "risk_level": "medium", + }, + ) + + return IsolationDecision( + mode="none", + reason="inline_execution_is_sufficient", + tool_names=tuple(tool_names), + capability_ids=tuple(capability_ids), + metadata=metadata, + ) diff --git a/backend/app/agents/isolation/worktree_isolation.py b/backend/app/agents/isolation/worktree_isolation.py new file mode 100644 index 0000000..511fe1b --- /dev/null +++ b/backend/app/agents/isolation/worktree_isolation.py @@ -0,0 +1,83 @@ +from __future__ import annotations + +import re +import subprocess +from pathlib import Path +from typing import Any +from uuid import uuid4 + +from app.agents.isolation.strategy_selector import IsolationDecision + + +class WorktreeIsolationError(RuntimeError): + pass + + +def _slugify(value: str, *, fallback: str) -> str: + slug = re.sub(r"[^a-zA-Z0-9._-]+", "-", (value or "").strip()).strip("-").lower() + return slug or fallback + + +def _resolve_git_root() -> Path: + try: + result = subprocess.run( + ["git", "rev-parse", "--show-toplevel"], + check=True, + capture_output=True, + text=True, + ) + except subprocess.CalledProcessError as exc: + raise WorktreeIsolationError(exc.stderr.strip() or exc.stdout.strip() or "git_root_unavailable") from exc + git_root = Path(result.stdout.strip()) + if not git_root.exists(): + raise WorktreeIsolationError("git_root_not_found") + return git_root + + +def prepare_worktree_isolation( + *, + state: dict[str, Any], + decision: IsolationDecision, + role_value: str, + sub_commander: str, + create_workspace: bool = True, +) -> dict[str, Any]: + isolation_id = f"worktree-{uuid4().hex[:8]}" + conversation_slug = _slugify(str(state.get("conversation_id") or "conversation"), fallback="conversation") + role_slug = _slugify(role_value, fallback="agent") + git_root = _resolve_git_root() + workspace_root = git_root / ".worktrees" / "jarvis" / conversation_slug + workspace_path = workspace_root / f"{role_slug}-{isolation_id}" + branch = f"jarvis/{conversation_slug}/{role_slug}-{isolation_id}" + + if create_workspace and not workspace_path.exists(): + workspace_root.mkdir(parents=True, exist_ok=True) + try: + subprocess.run( + ["git", "-C", str(git_root), "worktree", "add", "-b", branch, str(workspace_path), "HEAD"], + check=True, + capture_output=True, + text=True, + ) + except subprocess.CalledProcessError as exc: + raise WorktreeIsolationError(exc.stderr.strip() or exc.stdout.strip() or "worktree_add_failed") from exc + + return { + "mode": "worktree", + "isolation_id": isolation_id, + "workspace_path": str(workspace_path), + "parent_conversation_id": str(state.get("conversation_id") or "") or None, + "metadata": { + **dict(decision.metadata or {}), + "reason": decision.reason, + "role": role_value, + "sub_commander": sub_commander, + "tool_names": list(decision.tool_names), + "capability_ids": list(decision.capability_ids), + "repo_root": str(git_root), + "branch": branch, + "workspace_strategy": "ephemeral_worktree", + "cleanup_status": "pending", + "materialized": workspace_path.exists(), + }, + } diff --git a/backend/app/agents/plugins/builtins/code_helper/__init__.py b/backend/app/agents/plugins/builtins/code_helper/__init__.py new file mode 100644 index 0000000..ab8eb6f --- /dev/null +++ b/backend/app/agents/plugins/builtins/code_helper/__init__.py @@ -0,0 +1,19 @@ +"""Code Helper Plugin - Linting, formatting, and code explanation tools""" + + +def lint_file(file_path: str) -> dict: + """Lint a source file and return issues found.""" + return {"status": "ok", "tool": "lint_file", "result": f"Linting {file_path}"} + + +def format_file(file_path: str) -> dict: + """Format a source file and return the result.""" + return {"status": "ok", "tool": "format_file", "result": f"Formatting {file_path}"} + + +def explain_code(code_snippet: str) -> dict: + """Explain a code snippet and return the explanation.""" + return {"status": "ok", "tool": "explain_code", "result": f"Explaining code snippet"} + + +tools = [lint_file, format_file, explain_code] diff --git a/backend/app/agents/plugins/builtins/code_helper/manifest.json b/backend/app/agents/plugins/builtins/code_helper/manifest.json new file mode 100644 index 0000000..946844b --- /dev/null +++ b/backend/app/agents/plugins/builtins/code_helper/manifest.json @@ -0,0 +1,22 @@ +{ + "id": "code_helper", + "name": "Code Helper", + "version": "1.0.0", + "description": "Code linting, formatting, and explanation tools", + "author": "", + "homepage": "", + "license": "MIT", + "plugin_type": "tool", + "main": "__init__.py", + "hooks": [], + "tools": ["lint_file", "format_file", "explain_code"], + "skills": [], + "dependencies": {}, + "peer_dependencies": {}, + "permissions": [], + "allowed_paths": [], + "denied_paths": [], + "network_allowed": false, + "allowed_hosts": [], + "config_schema": {} +} \ No newline at end of file diff --git a/backend/app/agents/plugins/builtins/file_organizer/__init__.py b/backend/app/agents/plugins/builtins/file_organizer/__init__.py new file mode 100644 index 0000000..df946e7 --- /dev/null +++ b/backend/app/agents/plugins/builtins/file_organizer/__init__.py @@ -0,0 +1,18 @@ +"""File Organizer Plugin - File organization and duplicate detection tools""" + + +def organize_by_type(directory: str) -> dict: + """Organize files in a directory by file type.""" + return {"status": "ok", "tool": "organize_by_type", "result": f"Organizing {directory} by type"} + + +def find_duplicates(directory: str) -> dict: + """Find duplicate files in a directory.""" + return { + "status": "ok", + "tool": "find_duplicates", + "result": f"Finding duplicates in {directory}", + } + + +tools = [organize_by_type, find_duplicates] diff --git a/backend/app/agents/plugins/builtins/file_organizer/manifest.json b/backend/app/agents/plugins/builtins/file_organizer/manifest.json new file mode 100644 index 0000000..3e3572a --- /dev/null +++ b/backend/app/agents/plugins/builtins/file_organizer/manifest.json @@ -0,0 +1,22 @@ +{ + "id": "file_organizer", + "name": "File Organizer", + "version": "1.0.0", + "description": "File organization and duplicate detection tools", + "author": "", + "homepage": "", + "license": "MIT", + "plugin_type": "tool", + "main": "__init__.py", + "hooks": [], + "tools": ["organize_by_type", "find_duplicates"], + "skills": [], + "dependencies": {}, + "peer_dependencies": {}, + "permissions": [], + "allowed_paths": [], + "denied_paths": [], + "network_allowed": false, + "allowed_hosts": [], + "config_schema": {} +} \ No newline at end of file diff --git a/backend/app/agents/plugins/builtins/git_helper/__init__.py b/backend/app/agents/plugins/builtins/git_helper/__init__.py new file mode 100644 index 0000000..623b67a --- /dev/null +++ b/backend/app/agents/plugins/builtins/git_helper/__init__.py @@ -0,0 +1,23 @@ +"""Git Helper Plugin - Git status, log, and diff summary tools""" + + +def git_status_summary() -> dict: + """Get a summary of git status.""" + return {"status": "ok", "tool": "git_status_summary", "result": "Git status summary"} + + +def git_log_summary(limit: int = 10) -> dict: + """Get a summary of recent git commits.""" + return {"status": "ok", "tool": "git_log_summary", "result": f"Git log summary (limit={limit})"} + + +def git_diff_summary(ref1: str = "HEAD", ref2: str = "HEAD~1") -> dict: + """Get a summary of changes between two refs.""" + return { + "status": "ok", + "tool": "git_diff_summary", + "result": f"Git diff summary ({ref1}..{ref2})", + } + + +tools = [git_status_summary, git_log_summary, git_diff_summary] diff --git a/backend/app/agents/plugins/builtins/git_helper/manifest.json b/backend/app/agents/plugins/builtins/git_helper/manifest.json new file mode 100644 index 0000000..6402d50 --- /dev/null +++ b/backend/app/agents/plugins/builtins/git_helper/manifest.json @@ -0,0 +1,22 @@ +{ + "id": "git_helper", + "name": "Git Helper", + "version": "1.0.0", + "description": "Git status, log, and diff summary tools", + "author": "", + "homepage": "", + "license": "MIT", + "plugin_type": "tool", + "main": "__init__.py", + "hooks": [], + "tools": ["git_status_summary", "git_log_summary", "git_diff_summary"], + "skills": [], + "dependencies": {}, + "peer_dependencies": {}, + "permissions": [], + "allowed_paths": [], + "denied_paths": [], + "network_allowed": false, + "allowed_hosts": [], + "config_schema": {} +} \ No newline at end of file diff --git a/backend/app/agents/plugins/builtins/web_helper/__init__.py b/backend/app/agents/plugins/builtins/web_helper/__init__.py new file mode 100644 index 0000000..07e4044 --- /dev/null +++ b/backend/app/agents/plugins/builtins/web_helper/__init__.py @@ -0,0 +1,14 @@ +"""Web Helper Plugin - Web fetching and HTML parsing tools""" + + +def fetch_url_content(url: str) -> dict: + """Fetch content from a URL.""" + return {"status": "ok", "tool": "fetch_url_content", "result": f"Fetching {url}"} + + +def parse_html_links(html_content: str) -> dict: + """Parse HTML content and extract links.""" + return {"status": "ok", "tool": "parse_html_links", "result": "Extracted links from HTML"} + + +tools = [fetch_url_content, parse_html_links] diff --git a/backend/app/agents/plugins/builtins/web_helper/manifest.json b/backend/app/agents/plugins/builtins/web_helper/manifest.json new file mode 100644 index 0000000..9b6add2 --- /dev/null +++ b/backend/app/agents/plugins/builtins/web_helper/manifest.json @@ -0,0 +1,22 @@ +{ + "id": "web_helper", + "name": "Web Helper", + "version": "1.0.0", + "description": "Web fetching and HTML parsing tools", + "author": "", + "homepage": "", + "license": "MIT", + "plugin_type": "tool", + "main": "__init__.py", + "hooks": [], + "tools": ["fetch_url_content", "parse_html_links"], + "skills": [], + "dependencies": {}, + "peer_dependencies": {}, + "permissions": [], + "allowed_paths": [], + "denied_paths": [], + "network_allowed": true, + "allowed_hosts": [], + "config_schema": {} +} \ No newline at end of file diff --git a/backend/app/agents/runtime_metrics.py b/backend/app/agents/runtime_metrics.py new file mode 100644 index 0000000..47d126f --- /dev/null +++ b/backend/app/agents/runtime_metrics.py @@ -0,0 +1,86 @@ +from __future__ import annotations + +from typing import Any + + +INPUT_TOKEN_USD_RATE = 0.000003 +OUTPUT_TOKEN_USD_RATE = 0.000015 +DEFAULT_COST_THRESHOLDS = { + "total_tokens": 4000, + "estimated_cost": 0.02, +} + + +def estimate_token_cost(input_tokens: int, output_tokens: int) -> float | None: + total_tokens = max(input_tokens, 0) + max(output_tokens, 0) + if total_tokens <= 0: + return None + return round( + (max(input_tokens, 0) * INPUT_TOKEN_USD_RATE) + + (max(output_tokens, 0) * OUTPUT_TOKEN_USD_RATE), + 6, + ) + + +def extract_token_usage(response: Any) -> tuple[int, int]: + usage_metadata = getattr(response, "usage_metadata", None) or {} + if isinstance(usage_metadata, dict): + input_tokens = int( + usage_metadata.get("input_tokens") + or usage_metadata.get("prompt_tokens") + or 0 + ) + output_tokens = int( + usage_metadata.get("output_tokens") + or usage_metadata.get("completion_tokens") + or 0 + ) + if input_tokens or output_tokens: + return input_tokens, output_tokens + + response_metadata = getattr(response, "response_metadata", None) or {} + token_usage = {} + if isinstance(response_metadata, dict): + token_usage = response_metadata.get("token_usage") or response_metadata.get("usage") or {} + if isinstance(token_usage, dict): + input_tokens = int( + token_usage.get("prompt_tokens") + or token_usage.get("input_tokens") + or 0 + ) + output_tokens = int( + token_usage.get("completion_tokens") + or token_usage.get("output_tokens") + or 0 + ) + if input_tokens or output_tokens: + return input_tokens, output_tokens + + return 0, 0 + + +def coerce_cost_thresholds(raw_thresholds: Any) -> dict[str, float]: + thresholds: dict[str, float] = dict(DEFAULT_COST_THRESHOLDS) + if not isinstance(raw_thresholds, dict): + return thresholds + for key in DEFAULT_COST_THRESHOLDS: + value = raw_thresholds.get(key) + if isinstance(value, (int, float)) and value > 0: + thresholds[key] = float(value) + return thresholds + + +def is_cost_budget_warning( + input_tokens: int, + output_tokens: int, + estimated_cost: float | None, + thresholds: dict[str, float] | None = None, +) -> bool: + effective_thresholds = thresholds or DEFAULT_COST_THRESHOLDS + total_tokens = max(input_tokens, 0) + max(output_tokens, 0) + token_threshold = float(effective_thresholds.get("total_tokens") or 0) + cost_threshold = float(effective_thresholds.get("estimated_cost") or 0) + return ( + (token_threshold > 0 and total_tokens >= token_threshold) + or (cost_threshold > 0 and estimated_cost is not None and estimated_cost >= cost_threshold) + ) diff --git a/backend/app/agents/schemas/event.py b/backend/app/agents/schemas/event.py index ebb5095..cd74088 100644 --- a/backend/app/agents/schemas/event.py +++ b/backend/app/agents/schemas/event.py @@ -23,6 +23,12 @@ AgentEventType = Literal[ "agent.task.recovered", "agent.task.reassigned", "agent.collaboration.budget.updated", + "agent.isolation.selected", + "agent.isolation.fallback", + "agent.cost.updated", + "agent.cost.warning", + "agent.phase.changed", + "agent.checkpoint.recorded", "agent.error", ] AgentEventSeverity = Literal["info", "warning", "error"] diff --git a/backend/app/agents/skills/bundled.py b/backend/app/agents/skills/bundled.py new file mode 100644 index 0000000..3dcc39e --- /dev/null +++ b/backend/app/agents/skills/bundled.py @@ -0,0 +1,72 @@ +"""Built-in Skills - Phase 9.4 + +This module contains bundled skills that are always available +without requiring external skill loaders. +""" + +from typing import Any + + +# SkillMetadata-compatible structure for bundled skills +BUNDLED_SKILLS: list[dict[str, Any]] = [ + { + "id": "code-analysis", + "name": "Code Analysis", + "description": "Analyze code structure, patterns, and quality. Helps understand codebase architecture, find issues, and suggest improvements.", + "version": "1.0.0", + "prompts": [ + "Analyze the code structure and identify key components, their relationships, and responsibilities.", + "Review the code for potential issues like bugs, security vulnerabilities, or performance problems.", + "Explain how the code works and what it does in simple terms.", + ], + "tools": ["grep", "read", "glob", "lsp_symbols", "lsp_find_references"], + }, + { + "id": "git-helper", + "name": "Git Helper", + "description": "Assists with Git operations including commit, branch management, merge conflicts, and repository exploration.", + "version": "1.0.0", + "prompts": [ + "Show me the current git status and any uncommitted changes.", + "Help me create a meaningful commit message for these changes.", + "Explain the git history and branch structure of this repository.", + ], + "tools": ["bash"], + }, + { + "id": "web-research", + "name": "Web Research", + "description": "Search the web for information, documentation, and resources. Helps find answers and learn about technologies.", + "version": "1.0.0", + "prompts": [ + "Search the web for information about {topic} and summarize the key findings.", + "Find official documentation or reliable resources about {topic}.", + "Look up the latest news or developments in {topic}.", + ], + "tools": ["search_brave_web_search", "websearch_web_search_exa", "webfetch"], + }, + { + "id": "file-management", + "name": "File Management", + "description": "Helps with file operations like creating, editing, organizing, and managing project files and directories.", + "version": "1.0.0", + "prompts": [ + "Create a new file at {path} with the following content: {content}", + "Organize the files in the project structure and suggest improvements.", + "Find all files related to {topic} or matching {pattern}.", + ], + "tools": ["read", "write", "glob", "bash"], + }, + { + "id": "task-planning", + "name": "Task Planning", + "description": "Helps break down complex tasks into smaller steps, create implementation plans, and track progress.", + "version": "1.0.0", + "prompts": [ + "Break down this task into smaller, manageable steps: {task}", + "Create an implementation plan for building {feature} with clear phases.", + "Review the current progress and suggest next steps for completing {goal}.", + ], + "tools": ["todowrite", "read", "write"], + }, +] diff --git a/backend/app/agents/state.py b/backend/app/agents/state.py index f7959fb..773aad3 100644 --- a/backend/app/agents/state.py +++ b/backend/app/agents/state.py @@ -8,6 +8,14 @@ from app.agents.schemas.task import AgentTask, CollaborationBudget, InterruptRec from langchain_core.messages import AIMessage, BaseMessage, HumanMessage from langgraph.graph.message import add_messages +AgentPhase = Literal[ + "phase_0_bootstrap", + "phase_1_routing", + "phase_2_controlled_collaboration", + "phase_3_dynamic_collaboration", + "phase_4_visibility_and_verification", +] + class AgentRole(str, Enum): MASTER = "master" @@ -75,8 +83,23 @@ class AgentState(TypedDict): verification_status: VerificationStatus | None verification_summary: str | None verification_evidence: list[dict[str, Any]] + isolation_mode: str + isolation_id: str | None + isolation_workspace_path: str | None + isolation_parent_conversation_id: str | None + isolation_metadata: dict[str, Any] + input_tokens: int + output_tokens: int + estimated_cost: float | None + budget_warning: bool + cost_by_agent: dict[str, dict[str, Any]] + cost_thresholds: dict[str, Any] budget_state: CollaborationBudget | dict[str, Any] | None collaboration_budget_history: list[CollaborationBudget | dict[str, Any]] + current_phase: AgentPhase + phase_history: list[dict[str, Any]] + current_checkpoint: str | None + checkpoint_history: list[dict[str, Any]] tool_strategy_used: str | None tool_round_count: int @@ -161,8 +184,34 @@ def initial_state(user_id: str, conversation_id: str) -> AgentState: verification_status=None, verification_summary=None, verification_evidence=[], + isolation_mode="none", + isolation_id=None, + isolation_workspace_path=None, + isolation_parent_conversation_id=None, + isolation_metadata={}, + input_tokens=0, + output_tokens=0, + estimated_cost=None, + budget_warning=False, + cost_by_agent={}, + cost_thresholds={}, budget_state=None, collaboration_budget_history=[], + current_phase="phase_0_bootstrap", + phase_history=[ + { + "phase": "phase_0_bootstrap", + "reason": "initial_state_created", + } + ], + current_checkpoint="bootstrap.initialized", + checkpoint_history=[ + { + "checkpoint": "bootstrap.initialized", + "phase": "phase_0_bootstrap", + "reason": "initial_state_created", + } + ], tool_strategy_used=None, tool_round_count=0, max_tool_rounds=2, diff --git a/backend/app/agents/transport/structured_io.py b/backend/app/agents/transport/structured_io.py new file mode 100644 index 0000000..f9c0e06 --- /dev/null +++ b/backend/app/agents/transport/structured_io.py @@ -0,0 +1,86 @@ +"""Structured IO for typed input/output - Phase 10.2""" + +from dataclasses import dataclass +from typing import Any, Generic, TypeVar + +T = TypeVar("T") + + +@dataclass +class StructuredInput: + """Structured input wrapper""" + + skill_name: str + parameters: dict[str, Any] + metadata: dict[str, Any] + + +@dataclass +class StructuredOutput: + """Structured output wrapper""" + + skill_name: str + result: Any + success: bool + error: str | None = None + metadata: dict[str, Any] | None = None + + +class StructuredIO: + """Handles structured input/output for agent communication""" + + def parse_input(self, data: dict[str, Any]) -> StructuredInput: + """Parse structured input from dictionary. + + Args: + data: Dictionary containing skill_name, parameters, and metadata + + Returns: + StructuredInput instance + + Raises: + ValueError: If required fields are missing + """ + if not isinstance(data, dict): + raise ValueError("Input data must be a dictionary") + + skill_name = data.get("skill_name") + if not skill_name: + raise ValueError("Missing required field: skill_name") + if not isinstance(skill_name, str): + raise ValueError("skill_name must be a string") + + parameters = data.get("parameters") + if parameters is None: + raise ValueError("Missing required field: parameters") + if not isinstance(parameters, dict): + raise ValueError("parameters must be a dictionary") + + metadata = data.get("metadata", {}) + if not isinstance(metadata, dict): + raise ValueError("metadata must be a dictionary") + + return StructuredInput(skill_name=skill_name, parameters=parameters, metadata=metadata) + + def format_output(self, output: StructuredOutput) -> dict[str, Any]: + """Format structured output to dictionary. + + Args: + output: StructuredOutput instance + + Returns: + Dictionary representation of the output + """ + result = { + "skill_name": output.skill_name, + "result": output.result, + "success": output.success, + } + + if output.error is not None: + result["error"] = output.error + + if output.metadata is not None: + result["metadata"] = output.metadata + + return result diff --git a/backend/app/routers/agent.py b/backend/app/routers/agent.py index 487559f..70df2cf 100644 --- a/backend/app/routers/agent.py +++ b/backend/app/routers/agent.py @@ -6,6 +6,8 @@ from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession from app.database import get_db +from app.agents.registry import load_builtin_registry_indexes +from app.agents.runtime_metrics import coerce_cost_thresholds, estimate_token_cost, is_cost_budget_warning from app.models.agent import Agent from app.models.conversation import Conversation from app.models.skill import Skill @@ -17,14 +19,21 @@ from app.schemas.agent import ( AgentCreate, AgentOut, AgentStats, + AgentVisibilityCostByAgentOut, + AgentVisibilityCostOut, + AgentVisibilityCostSummaryOut, AgentVisibilityEvidenceOut, AgentVisibilityEventsResponse, AgentVisibilityEventOut, + AgentVisibilityIsolationOut, + AgentVisibilityRuntimeSummaryOut, AgentVisibilityTaskSummaryOut, AgentVisibilityThreadMessageOut, AgentVisibilityThreadOut, AgentVisibilityTopologyNodeOut, AgentVisibilityTopologyOut, + AgentVisibilityToolGovernanceItemOut, + AgentVisibilityToolGovernanceOut, AgentVisibilityVerifierOut, ) from app.services.agent_service import _extract_continuity_snapshot @@ -153,12 +162,13 @@ def _build_topology_nodes( root_agent_id = str(state.get("root_agent_id") or state.get("agent_id") or "") or None current_agent = str(state.get("current_agent") or "") or None + parent_agent_id = str(state.get("parent_agent_id") or "") or None nodes: dict[str, AgentVisibilityTopologyNodeOut] = {} if root_agent_id: nodes[root_agent_id] = AgentVisibilityTopologyNodeOut( agent_id=root_agent_id, role=root_agent_id.split("-")[0], - parent_agent_id=None, + parent_agent_id=parent_agent_id if root_agent_id != state.get("agent_id") else None, source="root", task_count=task_counts.get(root_agent_id, 0), completed_task_count=completed_counts.get(root_agent_id, 0), @@ -185,6 +195,153 @@ def _build_topology_nodes( return list(nodes.values()) +def _estimate_runtime_cost(input_tokens: int, output_tokens: int) -> float | None: + return estimate_token_cost(input_tokens, output_tokens) + + +def _build_cost_summary( + state: dict[str, Any], + *, + conversation_id: str, +) -> AgentVisibilityCostSummaryOut: + input_tokens = int(state.get("input_tokens") or 0) + output_tokens = int(state.get("output_tokens") or 0) + estimated_cost = _estimate_runtime_cost(input_tokens, output_tokens) + thresholds = coerce_cost_thresholds(state.get("cost_thresholds")) + total_budget_warning = bool(state.get("budget_warning") or False) or is_cost_budget_warning( + input_tokens, + output_tokens, + estimated_cost, + thresholds, + ) + + by_agent_items: list[AgentVisibilityCostByAgentOut] = [] + for agent_id, payload in dict(state.get("cost_by_agent") or {}).items(): + payload_dict = dict(payload or {}) + agent_input_tokens = int(payload_dict.get("input_tokens") or 0) + agent_output_tokens = int(payload_dict.get("output_tokens") or 0) + agent_estimated_cost = payload_dict.get("estimated_cost") + if agent_estimated_cost is None: + agent_estimated_cost = _estimate_runtime_cost(agent_input_tokens, agent_output_tokens) + by_agent_items.append( + AgentVisibilityCostByAgentOut( + agent_id=str(payload_dict.get("agent_id") or agent_id), + input_tokens=agent_input_tokens, + output_tokens=agent_output_tokens, + total_tokens=int(payload_dict.get("total_tokens") or (agent_input_tokens + agent_output_tokens)), + estimated_cost=agent_estimated_cost, + budget_warning=bool(payload_dict.get("budget_warning") or False), + ) + ) + by_agent_items.sort(key=lambda item: item.total_tokens, reverse=True) + + return AgentVisibilityCostSummaryOut( + conversation_id=conversation_id, + total=AgentVisibilityCostOut( + input_tokens=input_tokens, + output_tokens=output_tokens, + total_tokens=input_tokens + output_tokens, + estimated_cost=estimated_cost, + budget_warning=total_budget_warning, + ), + thresholds=thresholds, + by_agent=by_agent_items, + ) + + +def _build_tool_governance( + state: dict[str, Any], + *, + conversation_id: str, +) -> AgentVisibilityToolGovernanceOut: + indexes = load_builtin_registry_indexes() + tool_outcomes = [dict(item) for item in state.get("tool_outcomes") or [] if isinstance(item, dict)] + usage_count_by_tool: dict[str, int] = {} + last_result_preview_by_tool: dict[str, str | None] = {} + for item in tool_outcomes: + tool_name = str(item.get("tool_name") or "") + if tool_name == "search_web": + tool_name = "web_search" + if not tool_name: + continue + usage_count_by_tool[tool_name] = usage_count_by_tool.get(tool_name, 0) + 1 + preview = item.get("result_preview") + if isinstance(preview, str) and preview: + last_result_preview_by_tool[tool_name] = preview + + items = [ + AgentVisibilityToolGovernanceItemOut( + capability_id=capability.capability_id, + tool_name=capability.tool_name, + permission_class=capability.permission_class.value, + side_effect_scope=capability.side_effect_scope.value, + supports_retry=capability.supports_retry, + idempotent=capability.idempotent, + safe_for_parallel_use=capability.safe_for_parallel_use, + requires_confirmation=capability.requires_confirmation, + usage_count=usage_count_by_tool.get(capability.tool_name, 0), + last_result_preview=last_result_preview_by_tool.get(capability.tool_name), + ) + for capability in indexes.capability_by_id.values() + ] + items.sort(key=lambda item: (-item.usage_count, item.tool_name)) + + return AgentVisibilityToolGovernanceOut( + conversation_id=conversation_id, + total_tools=len(items), + used_tools=sum(1 for item in items if item.usage_count > 0), + items=items, + upgrade_candidates=[ + "worktree_manager", + "cost_inspector", + "runtime_event_drilldown", + "tool_policy_explorer", + ], + ) + + +def _build_runtime_summary( + state: dict[str, Any], + *, + conversation_id: str, +) -> AgentVisibilityRuntimeSummaryOut: + tasks = [dict(item) for item in state.get("active_tasks") or []] + task_results = [dict(item) for item in state.get("task_results") or []] + topology_nodes = _build_topology_nodes(state, tasks, task_results) + cost_summary = _build_cost_summary(state, conversation_id=conversation_id) + input_tokens = cost_summary.total.input_tokens + output_tokens = cost_summary.total.output_tokens + recent_events_raw = [dict(item) for item in (state.get("event_trace") or [])[-10:]] + isolation_mode = str(state.get("isolation_mode") or "none") + + return AgentVisibilityRuntimeSummaryOut( + conversation_id=conversation_id, + execution_mode=state.get("execution_mode"), + current_phase=state.get("current_phase"), + current_checkpoint=state.get("current_checkpoint"), + phase_history=list(state.get("phase_history") or []), + checkpoint_history=list(state.get("checkpoint_history") or []), + verifier=AgentVisibilityVerifierOut( + conversation_id=conversation_id, + status=state.get("verification_status"), + summary=state.get("verification_summary"), + evidence=list(state.get("verification_evidence") or []), + ), + isolation=AgentVisibilityIsolationOut( + mode=isolation_mode, + isolation_id=state.get("isolation_id"), + workspace_path=state.get("isolation_workspace_path"), + parent_conversation_id=state.get("isolation_parent_conversation_id") or state.get("parent_conversation_id"), + metadata=dict(state.get("isolation_metadata") or {}), + ), + cost=cost_summary.total, + topology_node_count=len(topology_nodes), + active_task_count=len(tasks), + completed_task_count=sum(1 for item in task_results if item.get("status") == "completed"), + recent_events=[_coerce_event_payload(item) for item in recent_events_raw], + ) + + def record_agent_call(agent_id: str): _agent_call_counts[agent_id] = _agent_call_counts.get(agent_id, 0) + 1 @@ -475,6 +632,36 @@ async def get_visibility_verifier( ) +@router.get("/visibility/runtime-summary", response_model=AgentVisibilityRuntimeSummaryOut) +async def get_visibility_runtime_summary( + conversation_id: str, + current_user: User = Depends(get_current_user), + db: AsyncSession = Depends(get_db), +): + state = await _get_visibility_state(conversation_id, current_user=current_user, db=db) + return _build_runtime_summary(state, conversation_id=conversation_id) + + +@router.get("/visibility/cost", response_model=AgentVisibilityCostSummaryOut) +async def get_visibility_cost( + conversation_id: str, + current_user: User = Depends(get_current_user), + db: AsyncSession = Depends(get_db), +): + state = await _get_visibility_state(conversation_id, current_user=current_user, db=db) + return _build_cost_summary(state, conversation_id=conversation_id) + + +@router.get("/visibility/tools", response_model=AgentVisibilityToolGovernanceOut) +async def get_visibility_tools( + conversation_id: str, + current_user: User = Depends(get_current_user), + db: AsyncSession = Depends(get_db), +): + state = await _get_visibility_state(conversation_id, current_user=current_user, db=db) + return _build_tool_governance(state, conversation_id=conversation_id) + + @router.post("", response_model=AgentOut, status_code=201) async def create_agent( data: AgentCreate, diff --git a/backend/app/routers/plugins.py b/backend/app/routers/plugins.py index 24f5bbd..2a7a372 100644 --- a/backend/app/routers/plugins.py +++ b/backend/app/routers/plugins.py @@ -1,8 +1,11 @@ """Plugin API 路由 - Phase 8.6""" import os +import tempfile +import zipfile from typing import Any +import httpx from fastapi import APIRouter, HTTPException from pydantic import BaseModel @@ -167,3 +170,53 @@ async def add_to_marketplace(plugin: dict[str, str]) -> dict[str, str]: _plugin_marketplace = [p for p in _plugin_marketplace if p.get("id") != plugin["id"]] _plugin_marketplace.append(plugin) return {"status": "added", "id": plugin["id"]} + + +@_marketplace_router.post("/plugins/{plugin_id}/download", response_model=dict[str, str]) +async def download_plugin(plugin_id: str) -> dict[str, str]: + """从市场下载并安装插件""" + # Find plugin in marketplace + plugin = None + for p in _plugin_marketplace: + if p.get("id") == plugin_id: + plugin = p + break + + if not plugin: + raise HTTPException( + status_code=404, detail=f"Plugin '{plugin_id}' not found in marketplace" + ) + + download_url = plugin.get("download_url") + if not download_url: + raise HTTPException(status_code=400, detail="Plugin has no download URL") + + try: + # Download the plugin archive + async with httpx.AsyncClient() as client: + response = await client.get(download_url, timeout=60.0) + response.raise_for_status() + archive_content = response.content + + # Extract to temp directory and install + with tempfile.TemporaryDirectory() as temp_dir: + archive_path = os.path.join(temp_dir, "plugin.zip") + with open(archive_path, "wb") as f: + f.write(archive_content) + + extract_dir = os.path.join(temp_dir, "extracted") + with zipfile.ZipFile(archive_path, "r") as zf: + zf.extractall(extract_dir) + + # Install the plugin + manager = get_plugin_manager() + if manager.install(extract_dir): + return {"status": "installed", "plugin_id": plugin_id} + raise HTTPException(status_code=500, detail="Failed to install plugin") + + except httpx.HTTPError as e: + raise HTTPException(status_code=502, detail=f"Download failed: {str(e)}") + except zipfile.BadZipFile: + raise HTTPException(status_code=502, detail="Invalid plugin archive") + except Exception as e: + raise HTTPException(status_code=500, detail=f"Installation failed: {str(e)}") diff --git a/backend/app/schemas/agent.py b/backend/app/schemas/agent.py index 651ca6f..5d08819 100644 --- a/backend/app/schemas/agent.py +++ b/backend/app/schemas/agent.py @@ -149,3 +149,73 @@ class AgentVisibilityVerifierOut(BaseModel): status: str | None = None summary: str | None = None evidence: list[dict[str, Any]] = Field(default_factory=list) + + +class AgentVisibilityIsolationOut(BaseModel): + mode: str = "none" + isolation_id: str | None = None + workspace_path: str | None = None + parent_conversation_id: str | None = None + metadata: dict[str, Any] = Field(default_factory=dict) + + +class AgentVisibilityCostOut(BaseModel): + input_tokens: int = 0 + output_tokens: int = 0 + total_tokens: int = 0 + estimated_cost: float | None = None + budget_warning: bool = False + currency: str = "USD" + + +class AgentVisibilityCostByAgentOut(BaseModel): + agent_id: str + input_tokens: int = 0 + output_tokens: int = 0 + total_tokens: int = 0 + estimated_cost: float | None = None + budget_warning: bool = False + + +class AgentVisibilityCostSummaryOut(BaseModel): + conversation_id: str + total: AgentVisibilityCostOut + thresholds: dict[str, float] = Field(default_factory=dict) + by_agent: list[AgentVisibilityCostByAgentOut] = Field(default_factory=list) + + +class AgentVisibilityToolGovernanceItemOut(BaseModel): + capability_id: str + tool_name: str + permission_class: str + side_effect_scope: str + supports_retry: bool = False + idempotent: bool = False + safe_for_parallel_use: bool = False + requires_confirmation: bool = False + usage_count: int = 0 + last_result_preview: str | None = None + + +class AgentVisibilityToolGovernanceOut(BaseModel): + conversation_id: str + total_tools: int = 0 + used_tools: int = 0 + items: list[AgentVisibilityToolGovernanceItemOut] = Field(default_factory=list) + upgrade_candidates: list[str] = Field(default_factory=list) + + +class AgentVisibilityRuntimeSummaryOut(BaseModel): + conversation_id: str + execution_mode: str | None = None + current_phase: str | None = None + current_checkpoint: str | None = None + phase_history: list[dict[str, Any]] = Field(default_factory=list) + checkpoint_history: list[dict[str, Any]] = Field(default_factory=list) + verifier: AgentVisibilityVerifierOut + isolation: AgentVisibilityIsolationOut + cost: AgentVisibilityCostOut + topology_node_count: int = 0 + active_task_count: int = 0 + completed_task_count: int = 0 + recent_events: list[AgentVisibilityEventOut] = Field(default_factory=list) diff --git a/backend/app/services/agent_service.py b/backend/app/services/agent_service.py index 91da89e..8afbe43 100644 --- a/backend/app/services/agent_service.py +++ b/backend/app/services/agent_service.py @@ -21,6 +21,7 @@ from app.models.conversation import Conversation, Message from app.models.user import User from app.agents.graph import get_agent_graph from app.agents.context import set_current_user, clear_current_user +from app.agents.skills.registry import get_skill_registry from app.services import memory_service from app.services.brain_service import BrainService from app.services.llm_service import create_llm_from_config, resolve_provider_capabilities @@ -95,9 +96,8 @@ def _is_streaming_rejection_error(error: Exception, user_llm_config: dict | None ] if isinstance(error, BadRequestError): - return ( - getattr(capabilities, "provider", None) not in {"openai", "claude"} - and any(marker in error_text for marker in markers) + return getattr(capabilities, "provider", None) not in {"openai", "claude"} and any( + marker in error_text for marker in markers ) return any(marker in error_text for marker in markers) @@ -153,8 +153,23 @@ _CONTINUITY_SNAPSHOT_FIELDS = ( "verification_status", "verification_summary", "verification_evidence", + "isolation_mode", + "isolation_id", + "isolation_workspace_path", + "isolation_parent_conversation_id", + "isolation_metadata", + "input_tokens", + "output_tokens", + "estimated_cost", + "budget_warning", + "cost_by_agent", + "cost_thresholds", "budget_state", "collaboration_budget_history", + "current_phase", + "phase_history", + "current_checkpoint", + "checkpoint_history", ) @@ -166,7 +181,11 @@ def _normalize_legacy_turn_context(turn_context: Any, current_agent: Any) -> dic active_sub_flow = normalized.pop("active_sub_flow", None) if isinstance(active_agent, str) and active_agent and "active_agent" not in normalized: normalized["active_agent"] = active_agent - if isinstance(active_sub_flow, str) and active_sub_flow and "active_sub_commander" not in normalized: + if ( + isinstance(active_sub_flow, str) + and active_sub_flow + and "active_sub_commander" not in normalized + ): normalized["active_sub_commander"] = active_sub_flow if not normalized.get("active_agent") and isinstance(current_agent, str) and current_agent: normalized["active_agent"] = current_agent @@ -342,11 +361,32 @@ class AgentService: "【当前时间】\n" f"- current_time_utc: {reference['current_time_iso']}\n" f"- current_date_utc: {reference['current_date_iso']}\n" - "说明:解析‘今天/明天/后天/本周/下周’等相对时间时,请以 current_time_utc 为准。" + "说明:解析'今天/明天/后天/本周/下周'等相对时间时,请以 current_time_utc 为准。" ) return context, reference - async def _get_user_llm_config(self, user_id: str, model_name: str | None = None) -> dict | None: + def build_skill_context(self, skill_names: list[str]) -> dict: + """构建 Skills 上下文 + + Args: + skill_names: Skill 名称列表 + + Returns: + 包含 skills 上下文的字典 + """ + registry = get_skill_registry() + merged_context = registry.get_skill_context(skill_names) + return { + "skills_context": merged_context, + "skills_metadata": { + "skills": skill_names, + "count": len(skill_names), + }, + } + + async def _get_user_llm_config( + self, user_id: str, model_name: str | None = None + ) -> dict | None: """获取用户的 LLM 模型配置""" user = await self.db.get(User, user_id) if not user or not user.llm_config: @@ -396,13 +436,15 @@ class AgentService: user_llm_config: dict | None, ) -> dict[str, Any]: state = initial_state(user_id, conversation.id) - state.update({ - "messages": [HumanMessage(content=full_message)], - "memory_context": memory_context, - "current_datetime_context": current_datetime_context, - "current_datetime_reference": current_datetime_reference, - "user_llm_config": user_llm_config, - }) + state.update( + { + "messages": [HumanMessage(content=full_message)], + "memory_context": memory_context, + "current_datetime_context": current_datetime_context, + "current_datetime_reference": current_datetime_reference, + "user_llm_config": user_llm_config, + } + ) previous_snapshot = await self._load_continuity_snapshot(conversation) if previous_snapshot: state.update(previous_snapshot) @@ -464,6 +506,7 @@ class AgentService: file_context = "" if file_ids: from app.services.document_service import DocumentService + doc_svc = DocumentService(self.db) for file_id in file_ids: content = await doc_svc.get_document_content(user_id, file_id) @@ -529,7 +572,9 @@ class AgentService: set_current_user(user_id) try: graph = get_agent_graph() - current_datetime_context, current_datetime_reference = self._build_current_datetime_context() + current_datetime_context, current_datetime_reference = ( + self._build_current_datetime_context() + ) state = await self._build_agent_state( user_id=user_id, @@ -542,7 +587,9 @@ class AgentService: ) state.update(_derive_role_memory_contexts(memory_ctx)) - yield self._build_progress_event("thinking", "Jarvis 正在分析请求", agent="master", step="理解你的问题") + yield self._build_progress_event( + "thinking", "Jarvis 正在分析请求", agent="master", step="理解你的问题" + ) try: async for event in graph.astream_events(state, version="v2"): @@ -551,7 +598,13 @@ class AgentService: metadata = event.get("metadata", {}) data = event.get("data", {}) - if kind == "on_chain_start" and event_name in {"master", "schedule_planner", "executor", "librarian", "analyst"}: + if kind == "on_chain_start" and event_name in { + "master", + "schedule_planner", + "executor", + "librarian", + "analyst", + }: stage_map = { "master": ("thinking", "Jarvis 正在理解请求"), "schedule_planner": ("planning", "Jarvis 正在编排日程"), @@ -559,9 +612,13 @@ class AgentService: "librarian": ("tool", "Jarvis 正在检索知识"), "analyst": ("thinking", "Jarvis 正在分析信息"), } - stage, label = stage_map.get(event_name, ("thinking", "Jarvis 正在思考")) - yield self._build_progress_event(stage, label, agent=event_name, step=label) - + stage, label = stage_map.get( + event_name, ("thinking", "Jarvis 正在思考") + ) + yield self._build_progress_event( + stage, label, agent=event_name, step=label + ) + elif kind == "on_tool_start": yield self._build_progress_event( "tool", @@ -570,7 +627,7 @@ class AgentService: tool_name=event_name, step=f"正在执行 {event_name}", ) - + elif kind == "on_tool_end": tool_result = data.get("output") step = f"已完成 {event_name}" @@ -583,14 +640,16 @@ class AgentService: tool_name=event_name, step=step, ) - + elif kind == "on_chat_model_stream": chunk = data.get("chunk") - content = _coerce_event_text(getattr(chunk, "content", "") if chunk else "") + content = _coerce_event_text( + getattr(chunk, "content", "") if chunk else "" + ) if content: collected += content yield {"type": "chunk", "content": content} - + elif kind == "on_chain_end": output = data.get("output") final_resp = None @@ -605,7 +664,9 @@ class AgentService: elif kind == "on_chat_model_end": output = data.get("output") - final_content = _coerce_event_text(getattr(output, "content", "") if output else "") + final_content = _coerce_event_text( + getattr(output, "content", "") if output else "" + ) if final_content: final_text = final_content if final_text != collected: @@ -614,12 +675,16 @@ class AgentService: except Exception as e: if _is_streaming_rejection_error(e, user_llm_config) and not collected: - yield self._build_progress_event("responding", "Jarvis 正在生成回复", agent="master", step="fallback") + yield self._build_progress_event( + "responding", "Jarvis 正在生成回复", agent="master", step="fallback" + ) try: result_state = await graph.ainvoke(state) if isinstance(result_state, dict): state.update(result_state) - fallback_content = result_state.get("final_response") or str(result_state.get("messages", [AIMessage(content="")])[-1].content) + fallback_content = result_state.get("final_response") or str( + result_state.get("messages", [AIMessage(content="")])[-1].content + ) collected = str(fallback_content) yield {"type": "chunk", "content": collected} except Exception: @@ -643,14 +708,24 @@ class AgentService: if collected: assistant_msg.content = collected continuity_snapshot = _build_continuity_snapshot(state or {}) - assistant_msg.attachments = ([{ - "kind": "agent_continuity_state", - **continuity_snapshot, - }] if continuity_snapshot else None) - conv.agent_state = ({ - "kind": "agent_continuity_state", - **continuity_snapshot, - } if continuity_snapshot else None) + assistant_msg.attachments = ( + [ + { + "kind": "agent_continuity_state", + **continuity_snapshot, + } + ] + if continuity_snapshot + else None + ) + conv.agent_state = ( + { + "kind": "agent_continuity_state", + **continuity_snapshot, + } + if continuity_snapshot + else None + ) await BrainService(self.db).create_event( user_id, **_build_assistant_event_payload(collected), @@ -728,12 +803,16 @@ class AgentService: importance_signal=1.0, ) - memory_ctx = await memory_service.build_memory_context(self.db, user_id, conversation_id, message) + memory_ctx = await memory_service.build_memory_context( + self.db, user_id, conversation_id, message + ) set_current_user(user_id) try: graph = get_agent_graph() - current_datetime_context, current_datetime_reference = self._build_current_datetime_context() + current_datetime_context, current_datetime_reference = ( + self._build_current_datetime_context() + ) state = await self._build_agent_state( user_id=user_id, conversation=conv, @@ -745,7 +824,9 @@ class AgentService: ) state.update(_derive_role_memory_contexts(memory_ctx)) result_state = await graph.ainvoke(state) - response_content = result_state.get("final_response") or str(result_state.get("messages", [AIMessage(content="")])[-1].content) + response_content = result_state.get("final_response") or str( + result_state.get("messages", [AIMessage(content="")])[-1].content + ) except Exception as e: logger.exception("agent_chat_simple_failed") response_content = "抱歉,发生错误。" @@ -766,15 +847,27 @@ class AgentService: ) assistant_msg.content = response_content - continuity_snapshot = _build_continuity_snapshot(result_state) if 'result_state' in locals() else None - assistant_msg.attachments = ([{ - "kind": "agent_continuity_state", - **continuity_snapshot, - }] if continuity_snapshot else None) - conv.agent_state = ({ - "kind": "agent_continuity_state", - **continuity_snapshot, - } if continuity_snapshot else None) + continuity_snapshot = ( + _build_continuity_snapshot(result_state) if "result_state" in locals() else None + ) + assistant_msg.attachments = ( + [ + { + "kind": "agent_continuity_state", + **continuity_snapshot, + } + ] + if continuity_snapshot + else None + ) + conv.agent_state = ( + { + "kind": "agent_continuity_state", + **continuity_snapshot, + } + if continuity_snapshot + else None + ) await self.db.commit() await self.db.refresh(assistant_msg) diff --git a/backend/tests/backend/app/agents/test_graph.py b/backend/tests/backend/app/agents/test_graph.py index 1cf3f63..1d955c0 100644 --- a/backend/tests/backend/app/agents/test_graph.py +++ b/backend/tests/backend/app/agents/test_graph.py @@ -16,10 +16,12 @@ from app.agents.graph import ( _create_child_agent, _execute_tool_calls, _parse_json_action, + _record_checkpoint, _record_interrupt, _record_recovery, _route_agent_from_user_query, _select_request_mode, + _set_phase, _spawn_permission_for_role, _run_collaboration_flow, _run_sub_commander, @@ -78,8 +80,23 @@ def _base_state(message: str, user_llm_config: dict | None = None) -> dict: 'verification_status': None, 'verification_summary': None, 'verification_evidence': [], + 'isolation_mode': 'none', + 'isolation_id': None, + 'isolation_workspace_path': None, + 'isolation_parent_conversation_id': None, + 'isolation_metadata': {}, + 'input_tokens': 0, + 'output_tokens': 0, + 'estimated_cost': None, + 'budget_warning': False, + 'cost_by_agent': {}, + 'cost_thresholds': {}, 'budget_state': None, 'collaboration_budget_history': [], + 'current_phase': 'phase_0_bootstrap', + 'phase_history': [{'phase': 'phase_0_bootstrap', 'reason': 'initial_state_created'}], + 'current_checkpoint': 'bootstrap.initialized', + 'checkpoint_history': [{'checkpoint': 'bootstrap.initialized', 'phase': 'phase_0_bootstrap', 'reason': 'initial_state_created'}], 'tool_strategy_used': None, 'tool_round_count': 0, 'max_tool_rounds': 2, @@ -310,6 +327,24 @@ def test_initial_state_sets_structured_continuity_defaults(): assert state['clarification_context'] is None assert state['event_trace'] == [] assert state['tool_outcomes'] == [] + assert state['current_phase'] == 'phase_0_bootstrap' + assert state['current_checkpoint'] == 'bootstrap.initialized' + assert state['phase_history'][-1]['phase'] == 'phase_0_bootstrap' + assert state['checkpoint_history'][-1]['checkpoint'] == 'bootstrap.initialized' + + +def test_set_phase_and_record_checkpoint_append_history(): + state = _base_state('test') + + _set_phase(state, 'phase_1_routing', reason='entered_master') + _record_checkpoint(state, 'routing.master_entered', reason='entered_master') + + assert state['current_phase'] == 'phase_1_routing' + assert state['current_checkpoint'] == 'routing.master_entered' + assert state['phase_history'][-1]['phase'] == 'phase_1_routing' + assert state['checkpoint_history'][-1]['checkpoint'] == 'routing.master_entered' + assert 'agent.phase.changed' in [event['event_type'] for event in state['event_trace']] + assert 'agent.checkpoint.recorded' in [event['event_type'] for event in state['event_trace']] def test_spawn_permission_for_role_uses_registry_policy(): @@ -627,6 +662,15 @@ async def test_run_collaboration_flow_collects_task_results_and_verifies(monkeyp assert result['message_trace'][-1]['message_type'] == 'task_update' assert 'agent.created' in [event['event_type'] for event in result['event_trace']] assert 'agent.message.sent' in [event['event_type'] for event in result['event_trace']] + assert 'agent.phase.changed' in [event['event_type'] for event in result['event_trace']] + assert 'agent.checkpoint.recorded' in [event['event_type'] for event in result['event_trace']] + assert result['current_phase'] == 'phase_4_visibility_and_verification' + assert result['current_checkpoint'] == 'collaboration.completed' + assert [entry['phase'] for entry in result['phase_history']][-3:] == [ + 'phase_2_controlled_collaboration', + 'phase_3_dynamic_collaboration', + 'phase_4_visibility_and_verification', + ] assert 'agent.spawn.blocked' not in [event['event_type'] for event in result['event_trace']] assert result['spawned_agent_ids'] assert all(not agent_id.startswith('blocked-') for agent_id in result['spawned_agent_ids']) @@ -637,6 +681,8 @@ async def test_master_node_enters_collaboration_mode_for_complex_multi_role_requ async def fake_collaboration_flow(state, user_query): state['execution_mode'] = 'collaboration' state['final_response'] = 'collaboration done' + state['current_phase'] = 'phase_4_visibility_and_verification' + state['current_checkpoint'] = 'collaboration.completed' state['messages'] = [*state.get('messages', []), AIMessage(content=state['final_response'])] return state @@ -647,6 +693,31 @@ async def test_master_node_enters_collaboration_mode_for_complex_multi_role_requ assert result['execution_mode'] == 'collaboration' assert result['final_response'] == 'collaboration done' + assert result['current_phase'] == 'phase_4_visibility_and_verification' + assert result['current_checkpoint'] == 'collaboration.completed' + + +async def test_run_collaboration_flow_fallback_restores_routing_phase(monkeypatch): + monkeypatch.setattr(graph_module, '_build_collaboration_tasks', lambda _user_query: [ + AgentTask( + task_id='task-1', + title='单任务', + role=AgentRole.LIBRARIAN.value, + owner_agent_id=AgentRole.LIBRARIAN.value, + goal='检索资料', + expected_evidence=[{'type': 'evidence'}], + ) + ]) + + state = _base_state('帮我搜一下资料') + result = await _run_collaboration_flow(state, '帮我搜一下资料') + + assert result['execution_mode'] == 'direct' + assert result['routing_decision']['reason'] == 'collaboration_plan_fell_back' + assert result['current_phase'] == 'phase_1_routing' + assert result['current_checkpoint'] == 'routing.direct_resumed' + assert result['checkpoint_history'][-2]['checkpoint'] == 'collaboration.fallback_to_direct' + assert result['checkpoint_history'][-1]['checkpoint'] == 'routing.direct_resumed' async def test_master_node_returns_stable_reply_for_simple_greeting(monkeypatch): @@ -1404,6 +1475,78 @@ def test_build_verifier_hints_uses_capability_metadata(): assert '提醒创建成功' in hints['result_preview'] +def test_prepare_isolation_context_selects_session_for_stateful_tools(): + state = _base_state('reminder request') + + graph_module._prepare_isolation_context( + state, + role=AgentRole.SCHEDULE_PLANNER, + sub_commander='schedule_planning', + user_query='create a reminder for tomorrow morning and keep the intermediate state isolated', + toolset=[FakeTool('create_reminder', 'ok')], + ) + + assert state['isolation_mode'] == 'session' + assert state['isolation_workspace_path'] is None + assert state['isolation_metadata']['reason'] == 'stateful_or_non_parallel_tooling' + assert state['event_trace'][-1]['event_type'] == 'agent.isolation.selected' + + +def test_prepare_isolation_context_uses_worktree_for_repo_mutation_queries(monkeypatch): + state = _base_state('fix repo build and create patch') + + monkeypatch.setattr( + graph_module, + 'prepare_worktree_isolation', + lambda **kwargs: { + 'mode': 'worktree', + 'isolation_id': 'worktree-test', + 'workspace_path': '/tmp/jarvis/worktree-test', + 'parent_conversation_id': 'c1', + 'metadata': { + 'reason': kwargs['decision'].reason, + 'branch': 'jarvis/c1/executor-worktree-test', + }, + }, + ) + + graph_module._prepare_isolation_context( + state, + role=AgentRole.EXECUTOR, + sub_commander='executor_tasks', + user_query='fix repo build and create patch for the failing tests', + toolset=[FakeTool('create_task', 'ok')], + ) + + assert state['isolation_mode'] == 'worktree' + assert state['isolation_workspace_path'] == '/tmp/jarvis/worktree-test' + assert state['isolation_metadata']['branch'] == 'jarvis/c1/executor-worktree-test' + assert state['event_trace'][-1]['event_type'] == 'agent.isolation.selected' + + +def test_record_response_usage_updates_state_cost_totals_and_budget_warning(): + state = _base_state('test') + state['cost_thresholds'] = {'total_tokens': 100, 'estimated_cost': 0.0001} + + graph_module._record_response_usage( + state, + AIMessage( + content='ok', + usage_metadata={'input_tokens': 60, 'output_tokens': 50, 'total_tokens': 110}, + ), + ) + + assert state['input_tokens'] == 60 + assert state['output_tokens'] == 50 + assert state['estimated_cost'] == 0.00093 + assert state['budget_warning'] is True + assert state['cost_by_agent'][AgentRole.MASTER.value]['total_tokens'] == 110 + assert [event['event_type'] for event in state['event_trace']] == [ + 'agent.cost.updated', + 'agent.cost.warning', + ] + + async def test_execute_tool_calls_records_schema_events_and_aggregate_summaries(monkeypatch): tool = FakeTool('create_reminder', '提醒创建成功: 开会 @ 2026-03-29 09:00') state = _base_state('test') diff --git a/backend/tests/backend/app/agents/test_visibility_api.py b/backend/tests/backend/app/agents/test_visibility_api.py index aa147e9..6775d43 100644 --- a/backend/tests/backend/app/agents/test_visibility_api.py +++ b/backend/tests/backend/app/agents/test_visibility_api.py @@ -135,6 +135,45 @@ async def visibility_env(tmp_path): 'verification_evidence': [ {'task_id': 'task-1', 'status': 'passed', 'summary': 'Verified'} ], + 'execution_mode': 'collaboration', + 'current_phase': 'phase_4_visibility_and_verification', + 'current_checkpoint': 'visibility.runtime_summary_ready', + 'phase_history': [ + {'phase': 'phase_0_bootstrap'}, + {'phase': 'phase_4_visibility_and_verification'}, + ], + 'checkpoint_history': [ + {'checkpoint': 'bootstrap.initialized'}, + {'checkpoint': 'visibility.runtime_summary_ready'}, + ], + 'input_tokens': 120, + 'output_tokens': 80, + 'budget_warning': True, + 'estimated_cost': 0.00156, + 'cost_thresholds': {'total_tokens': 150, 'estimated_cost': 0.001}, + 'cost_by_agent': { + 'master': { + 'agent_id': 'master', + 'input_tokens': 60, + 'output_tokens': 20, + 'total_tokens': 80, + 'estimated_cost': 0.00048, + 'budget_warning': False, + }, + 'analyst-1234abcd': { + 'agent_id': 'analyst-1234abcd', + 'input_tokens': 60, + 'output_tokens': 60, + 'total_tokens': 120, + 'estimated_cost': 0.00108, + 'budget_warning': True, + }, + }, + 'isolation_mode': 'worktree', + 'isolation_id': 'iso-1', + 'isolation_workspace_path': '/tmp/jarvis/worktree-1', + 'isolation_parent_conversation_id': 'parent-conv-1', + 'isolation_metadata': {'branch': 'jarvis/test-worker'}, }, } @@ -396,6 +435,87 @@ async def test_visibility_verifier_returns_verdict(visibility_env): assert payload['evidence'][0]['task_id'] == ids['task_id'] +@pytest.mark.asyncio +async def test_visibility_runtime_summary_returns_phase_cost_and_isolation_metadata(visibility_env): + app, ids = visibility_env + transport = ASGITransport(app=app) + async with AsyncClient(transport=transport, base_url='http://testserver') as client: + response = await client.get( + '/api/agents/visibility/runtime-summary', + params={'conversation_id': ids['conversation_id']}, + ) + + assert response.status_code == 200 + payload = response.json() + assert payload['conversation_id'] == ids['conversation_id'] + assert payload['execution_mode'] == 'collaboration' + assert payload['current_phase'] == 'phase_4_visibility_and_verification' + assert payload['current_checkpoint'] == 'visibility.runtime_summary_ready' + assert payload['verifier']['status'] == 'passed' + assert payload['isolation']['mode'] == 'worktree' + assert payload['isolation']['workspace_path'] == '/tmp/jarvis/worktree-1' + assert payload['isolation']['metadata']['branch'] == 'jarvis/test-worker' + assert payload['cost']['input_tokens'] == 120 + assert payload['cost']['output_tokens'] == 80 + assert payload['cost']['total_tokens'] == 200 + assert payload['cost']['estimated_cost'] == 0.00156 + assert payload['cost']['budget_warning'] is True + assert payload['topology_node_count'] == 2 + assert payload['active_task_count'] == 1 + assert payload['completed_task_count'] == 1 + assert payload['recent_events'][0]['event_id'] == 'evt-1' + + +@pytest.mark.asyncio +async def test_visibility_cost_returns_totals_thresholds_and_agent_breakdown(visibility_env): + app, ids = visibility_env + transport = ASGITransport(app=app) + async with AsyncClient(transport=transport, base_url='http://testserver') as client: + response = await client.get( + '/api/agents/visibility/cost', + params={'conversation_id': ids['conversation_id']}, + ) + + assert response.status_code == 200 + payload = response.json() + assert payload['total']['input_tokens'] == 120 + assert payload['total']['output_tokens'] == 80 + assert payload['total']['total_tokens'] == 200 + assert payload['total']['budget_warning'] is True + assert payload['thresholds']['total_tokens'] == 150 + assert payload['thresholds']['estimated_cost'] == 0.001 + assert payload['by_agent'][0]['agent_id'] == 'analyst-1234abcd' + assert payload['by_agent'][0]['budget_warning'] is True + assert payload['by_agent'][1]['agent_id'] == 'master' + + +@pytest.mark.asyncio +async def test_visibility_tools_returns_governance_metadata_and_usage_counts(visibility_env): + app, ids = visibility_env + transport = ASGITransport(app=app) + async with AsyncClient(transport=transport, base_url='http://testserver') as client: + response = await client.get( + '/api/agents/visibility/tools', + params={'conversation_id': ids['conversation_id']}, + ) + + assert response.status_code == 200 + payload = response.json() + assert payload['total_tools'] >= 1 + assert payload['used_tools'] >= 1 + search_tool = next(item for item in payload['items'] if item['tool_name'] == 'search_web') + assert search_tool['permission_class'] == 'external' + assert search_tool['side_effect_scope'] == 'network' + assert search_tool['usage_count'] == 1 + assert search_tool['last_result_preview'] == 'ok' + assert payload['upgrade_candidates'] == [ + 'worktree_manager', + 'cost_inspector', + 'runtime_event_drilldown', + 'tool_policy_explorer', + ] + + @pytest.mark.asyncio async def test_visibility_events_reject_invalid_datetime(visibility_env): app, ids = visibility_env diff --git a/backend/tests/backend/app/services/test_brain_ingestion.py b/backend/tests/backend/app/services/test_brain_ingestion.py index ca7ffe6..662d379 100644 --- a/backend/tests/backend/app/services/test_brain_ingestion.py +++ b/backend/tests/backend/app/services/test_brain_ingestion.py @@ -13,7 +13,7 @@ from app.models.conversation import Conversation, Message from app.models.memory import MemorySummary, UserMemory from app.models.user import User from app.services import agent_service, memory_service -from app.services.agent_service import AgentService +from app.services.agent_service import AgentService, _build_continuity_snapshot, _extract_continuity_snapshot from app.services.auth_service import get_password_hash from app.services.document_service import DocumentService @@ -23,6 +23,32 @@ class FakeGraph: return {"final_response": "已记录你的请求。"} +def test_continuity_snapshot_roundtrip_preserves_phase_and_checkpoint(): + payload = { + "current_agent": "master", + "current_phase": "phase_4_visibility_and_verification", + "phase_history": [ + {"phase": "phase_0_bootstrap", "reason": "initial_state_created"}, + {"phase": "phase_4_visibility_and_verification", "reason": "verification_started"}, + ], + "current_checkpoint": "collaboration.completed", + "checkpoint_history": [ + {"checkpoint": "bootstrap.initialized", "phase": "phase_0_bootstrap", "reason": "initial_state_created"}, + {"checkpoint": "collaboration.completed", "phase": "phase_4_visibility_and_verification", "reason": "collaboration_flow_finished"}, + ], + } + + snapshot = _build_continuity_snapshot(payload) + + assert snapshot is not None + restored = _extract_continuity_snapshot({"kind": "agent_continuity_state", **snapshot}) + assert restored is not None + assert restored["current_phase"] == "phase_4_visibility_and_verification" + assert restored["current_checkpoint"] == "collaboration.completed" + assert restored["phase_history"][-1]["phase"] == "phase_4_visibility_and_verification" + assert restored["checkpoint_history"][-1]["checkpoint"] == "collaboration.completed" + + class FakeStreamingGraph: async def astream_events(self, state, version="v2"): yield { diff --git a/data/jarvis.db b/data/jarvis.db index d4dd5f5..16ba667 100644 Binary files a/data/jarvis.db and b/data/jarvis.db differ diff --git a/development-doc/README.md b/development-doc/README.md new file mode 100644 index 0000000..b017c6e --- /dev/null +++ b/development-doc/README.md @@ -0,0 +1,52 @@ +# Development Doc + +本目录用于持续记录 Jarvis 的规划与开发过程。 + +## 目录结构 + +### `plan/` +用于记录中长期规划、升级方案、分阶段设计、专项计划。 + +建议内容: +- 每个升级点的目标 +- 计划改动的模块和文件 +- 分阶段实施顺序 +- 风险与验收标准 +- 设计决策与取舍 + +当前文件: +- `plan/README.md` +- `plan/phase-0-current-state-and-target.md` +- `plan/phase-1-safe-foundation.md` +- `plan/phase-2-controlled-collaboration.md` +- `plan/phase-3-dynamic-collaboration.md` +- `plan/phase-4-visibility-and-isolation.md` + +### `daily/` +用于记录每日工作日志、当天进展、开发计划、执行情况、问题、决策和下一步安排。 + +建议内容: +- 今日开发计划 +- 今日实际完成内容 +- 当前进度 +- 修改了哪些模块 / 文件 +- 当前阻塞点 +- 风险与临时决策 +- 下一步计划 +- 验证 / 测试情况 + +维护要求: +- 开始做当天开发前,先写当天计划 +- 每完成一个关键步骤后,及时补充进度 +- 遇到阻塞、改方案、改优先级时,必须更新 daily +- 一天结束前,补齐“完成情况 / 未完成 / 下一步” +- 后续正式改代码时,要把 daily 作为持续更新的开发日志,而不是事后总结 + +当前文件: +- `daily/2026-04-03.md` +- `daily/2026-04-04.md` + +补充约定: +- 多天改造内容不要挤在同一个 daily 文档里 +- 每一天单独一个 daily 文件 +- 分步骤执行时,已完成项使用 `~~删除线~~` 标记 diff --git a/development-doc/daily/2026-04-03.md b/development-doc/daily/2026-04-03.md new file mode 100644 index 0000000..7c6ddd8 --- /dev/null +++ b/development-doc/daily/2026-04-03.md @@ -0,0 +1,203 @@ +# 2026-04-03 工作日志 + +## 今日开发计划 + +### 今日目标 + +- ~~分析 `demo/` 下三个 agent 项目与 Jarvis 当前 agents 的差异~~ +- ~~明确 Jarvis 2.0 升级方向~~ +- ~~建立 development-doc 文档结构~~ +- ~~形成分阶段 plan 文档~~ +- ~~形成 2 天融合改造计划~~ + +### 今日计划拆分 + +1. ~~分析 demo 项目能力与设计重点~~ +2. ~~评估 Jarvis 当前 agents 的优势与短板~~ +3. ~~输出 Jarvis 2.0 总体升级思路~~ +4. ~~建立 `development-doc/plan` 与 `development-doc/daily`~~ +5. ~~将 plan 拆成 phase 文档~~ +6. ~~输出 2 天融合改造计划~~ + +### Day 1 工作内容 + +#### Day 1 目标 + +- ~~完成 demo 项目分析~~ +- ~~完成 Jarvis 当前能力差距判断~~ +- ~~完成 `development-doc/` 目录搭建~~ +- ~~完成 phase 文档拆分~~ +- ~~完成 2 天融合计划初稿~~ + +#### Day 1 分步骤执行 + +1. ~~分析 `demo/swarm-ide-chore-specs-mvp`~~ +2. ~~分析 `demo/claude-code-cli-master`~~ +3. ~~分析 `demo/claw-code-main`~~ +4. ~~梳理 Jarvis 当前 agents 架构优势与短板~~ +5. ~~输出 `plan/README.md` 与 phase 文档~~ +6. ~~输出 `2026-04-03-jarvis-agents-2-day-integration-plan.md`~~ +7. ~~建立并整理 `daily/` 日志结构~~ + +#### Day 1 完成标准 + +- ~~文档结构齐全~~ +- ~~分阶段计划可读~~ +- ~~两天改造路线明确~~ +- ~~daily 可持续维护~~ + +> Day 2 内容已拆分到 `daily/2026-04-04.md` + +--- + +## 今日实际完成 + +1. 分析了以下 demo 项目: + - `demo/swarm-ide-chore-specs-mvp` + - `demo/claude-code-cli-master` + - `demo/claw-code-main` + +2. 梳理了 Jarvis 当前 agent 架构的优势与短板: + - 当前强项:分层路由、业务导向、continuity、fallback、测试基础 + - 当前短板:动态协作不足、缺少 verifier、缺少 task/runtime、可观察性不足 + +3. 输出并整理了 Jarvis 规划文档结构: + - `development-doc/README.md` + - `development-doc/plan/README.md` + - `development-doc/plan/phase-0-current-state-and-target.md` + - `development-doc/plan/phase-1-safe-foundation.md` + - `development-doc/plan/phase-2-controlled-collaboration.md` + - `development-doc/plan/phase-3-dynamic-collaboration.md` + - `development-doc/plan/phase-4-visibility-and-isolation.md` + - `development-doc/plan/2026-04-03-jarvis-agents-2-day-integration-plan.md` + +4. 建立并整理了 daily 目录: + - `development-doc/daily/2026-04-03.md` + +--- + +## 当前进度 + +### 文档规划进度 + +- demo 分析:已完成 +- 总体升级方向:已完成 +- plan 目录搭建:已完成 +- 分阶段 plan:已完成 +- 2 天融合计划:已完成 +- daily 规范:已完成 +- Day 3 清单与验收文档:已更新 +- Day 4 清单:已更新 + +### 代码改造进度 + +- 已完成 Day 1 / Day 2 底座与协作闭环 +- 已完成 Day 3 最小受限动态协作 runtime +- 已补齐 registry spawn policy、graph spawn guardrail、message trace、interrupt / recovery 最小闭环 +- Phase 1-3 核心功能已基本落地 +- Day 4 聚焦 Phase 4 可视化与隔离执行能力 + +--- + +## 今日修改的模块 / 文件 + +### 新增 / 更新的开发文档 + +- `development-doc/README.md` +- `development-doc/plan/README.md` +- `development-doc/plan/phase-0-current-state-and-target.md` +- `development-doc/plan/phase-1-safe-foundation.md` +- `development-doc/plan/phase-2-controlled-collaboration.md` +- `development-doc/plan/phase-3-dynamic-collaboration.md` +- `development-doc/plan/phase-4-visibility-and-isolation.md` +- `development-doc/plan/2026-04-03-jarvis-agents-2-day-integration-plan.md` +- `development-doc/daily/2026-04-03.md` + +### 本轮补充分析与改造涉及的代码文件 + +- `backend/app/agents/state.py` +- `backend/app/agents/graph.py` +- `backend/app/agents/prompts.py` +- `backend/app/agents/registry/models.py` +- `backend/app/agents/registry/builtins.py` +- `backend/app/agents/registry/indexes.py` +- `backend/app/agents/tools/__init__.py` +- `backend/tests/backend/app/agents/test_graph.py` +- `backend/tests/backend/app/agents/test_registry.py` + +--- + +## 今日结论 + +### 从 demo 项目吸收的核心方向 + +- 从 Swarm-IDE 学:动态通信原语、可观察性、协作拓扑 +- 从 Claude Code CLI 学:coordinator / task / verifier 的平台化编排 +- 从 Claw Code 学:runtime 分层、工具注册表、权限模型 + +### Jarvis 总体升级方向 + +Jarvis 不应直接变成完全自由的 swarm,而应升级为: + +- 受控的动态协作运行时 + +原则: + +- 简单请求继续走当前稳定路径 +- 复杂请求才进入协作模式 + +--- + +## 当前阻塞点 + +- 暂无明显代码层阻塞 +- 当前主要待办是完成定向回归测试并收尾验收 + +--- + +## 风险与临时决策 + +### 当前风险 + +- 不要一开始就引入无限动态 agent +- 不要直接替换现有 graph 主路径 +- 应优先保持 reminder/task/search 等现有业务稳定 +- 新能力必须配套测试和约束策略 + +### 当前决策 + +1. 采用受限动态协作,而不是自由 swarm +2. 通过 registry 固化 spawn role policy,再由 graph 在运行时执行权限校验 +3. interrupt / recovery 先落最小闭环,优先保证 direct 主路径稳定 +4. daily 后续必须作为开发过程中的持续更新日志使用 + +--- + +## 验证 / 测试情况 + +- 已补充 Day 3 相关 runtime / registry / graph 回归测试 +- 已更新 Day 3 执行清单与 daily 状态 +- 正在执行定向 pytest 验证,重点覆盖 `test_graph.py` 与 `test_registry.py` + +--- + +## 下一步计划 + +1. 完成 Day 3 定向回归测试 +2. 若有失败,修正 runtime / test 偏差 +3. 统一整理 Day 3 最终验收结论 +4. 启动 Day 4:Phase 4 可视化 API 实现 +5. 设计隔离执行最小方案 + +--- + +## 每日维护要求 + +后续正式进入改造阶段后,本文件需要持续更新: + +1. 开始开发前更新“今日开发计划” +2. 完成一个阶段性步骤后更新“当前进度” +3. 变更方案时更新“风险与临时决策” +4. 出现问题时更新“当前阻塞点” +5. 每次完成验证后更新“验证 / 测试情况” +6. 一天结束前补齐“已完成 / 未完成 / 下一步计划” diff --git a/development-doc/daily/2026-04-04.md b/development-doc/daily/2026-04-04.md new file mode 100644 index 0000000..b5437a9 --- /dev/null +++ b/development-doc/daily/2026-04-04.md @@ -0,0 +1,116 @@ +# 2026-04-04 工作日志 + +## 今日开发计划 + +### 今日目标 + +- 巩固 `Phase 4` 已完成的可见性最小闭环 +- 把 runtime summary 接到 Agents 页面 +- 为后续 90 分路径明确 isolation / cost / operator surface 升级项 +- 保持 reminder / task / search 主路径稳定 + +### 今日计划拆分 + +1. 新增 `backend/app/agents/api/visibility.py` 可见性 API 模块 +2. 实现 event stream API +3. 实现协作链路拓扑查询 API +4. 实现 task 执行证据查询 API +5. 实现 message thread 查询 API +6. 实现 verifier 结果查询 API +7. 设计隔离执行最小方案 +8. 补测试并验证主流程 + +### Day 4 工作内容 + +#### Day 4 目标 + +- 完成 `Phase 4` 可见性 API 最小闭环 +- 完成 runtime summary API 与前端 Agents 页面首屏接入 +- 为后续完整隔离执行与成本治理预留接口 +- 保证已有路径测试不回退 + +#### Day 4 分步骤执行 + +1. 新增 `backend/app/agents/api/visibility.py` 及各可见性 API +2. `GET /agents/visibility/events` - event stream 按条件过滤 +3. `GET /agents/visibility/topology` - 协作拓扑视图 +4. `GET /agents/visibility/tasks/{task_id}/evidence` - task 执行证据 +5. `GET /agents/visibility/threads/{thread_id}/messages` - thread 消息流 +6. `GET /agents/visibility/verifier` - verifier 验收结论 +7. 在 `development-doc/plan/phase-4-visibility-and-isolation.md` 补充隔离执行设计方案 +8. 补 `test_visibility_api.py` 及主流程回归测试 + +#### Day 4 完成标准 + +- event stream API 可按 conversation_id / thread_id / agent_id 过滤 +- topology API 可返回协作拓扑视图 +- evidence API 可返回 task 执行证据链 +- thread API 可重建消息流向 +- verifier API 可返回验收结论 +- 隔离执行设计方案可落地 +- 现有主流程测试继续通过 + +--- + +## 今日实际完成 + +- 分析了 Jarvis 现有代码实现状态(`graph.py`、`state.py`、`verifier.py`、`registry/models.py`、`schemas/`) +- 确认 Phase 1-3 核心功能已基本落地:task schema、event schema、verifier、tool metadata、collaboration flow、interrupt/recovery、message trace +- 识别了 Phase 4(可视化与隔离执行)待实现内容 +- 在 `2026-04-03-jarvis-agents-5-day-work-checklist.md` 中新增了 Day 4 任务清单 + +--- + +## 当前进度 + +### 代码改造进度(Phase 1-3) + +- ✅ task schema / event schema 已完整 +- ✅ verifier 模块已独立 +- ✅ state.py 已包含 collaboration 全部字段 +- ✅ registry/models.py 已补充 tool metadata +- ✅ graph.py 已接入 event trace、verifier 调用、collaboration flow +- ✅ interrupt / recovery 最小闭环已实现 +- ✅ message trace 已实现 + +### Day 4 待启动 + +- 待实现可见性 API(event stream、topology、evidence、thread、verifier) +- 待设计隔离执行方案 +- 待补可视化 API 测试 + +--- + +## 今日修改的模块 / 文件 + +- 待更新 + +--- + +## 当前阻塞点 + +- 待开发时更新 + +--- + +## 风险与临时决策 + +- 不直接重写 graph 主路径 +- verifier 优先以 helper 形式接入 +- 先补底座,不直接做自由 swarm + +--- + +## 验证 / 测试情况 + +- 待更新 + +--- + +## 下一步计划 + +1. 实现 `visibility.py` 可见性 API 模块 +2. 按顺序实现 event stream、topology、evidence、thread、verifier API +3. 设计隔离执行最小方案 +4. 补 `test_visibility_api.py` 测试 +5. 跑测试验证主流程不回退 diff --git a/development-doc/plan/2026-04-03-jarvis-agents-2-day-work-checklist.md b/development-doc/plan/2026-04-03-jarvis-agents-2-day-work-checklist.md deleted file mode 100644 index ac711da..0000000 --- a/development-doc/plan/2026-04-03-jarvis-agents-2-day-work-checklist.md +++ /dev/null @@ -1,102 +0,0 @@ -# Jarvis Agents 2 天工作计划(可勾选执行版) - -日期:2026-04-03 -状态:执行清单 -适用范围:基于 `phase-0` ~ `phase-4` 及现有 2 天融合方案整理 - ---- - -## 使用说明 - -- 完成前使用 `- [ ]` -- 完成后改成 `- [x]` -- Day 2 默认依赖 Day 1 的核心底座完成后再推进 - ---- - -## Day 1:补底座,完成 Phase 1 最小闭环 - -Day 1 目标:先把 Jarvis 从“只有静态路由”补成“有任务结构、有事件结构、有 verifier、有工具治理信息”的可扩展底座,同时不破坏当前 direct 主路径。 - -- [x] 新增最小 `task schema` - 改造内容:新增 `backend/app/agents/schemas/task.py`,统一 `task_id`、`title`、`status`、`owner_agent_id`、`evidence`、`result_summary`,并补 `role`、`goal`、`expected_evidence`、`created_at`、`updated_at`;状态固定为 `pending`、`in_progress`、`completed`、`failed`、`blocked`。 - -- [x] 新增最小 `event schema` - 改造内容:新增 `backend/app/agents/schemas/event.py`,统一 `event_id`、`event_type`、`timestamp`、`conversation_id`、`agent_id`、`sub_commander_id`、`task_id`、`payload`、`severity`;首批事件类型覆盖 `agent.tool.start`、`agent.tool.result`、`agent.verify.started`、`agent.verify.completed`、`agent.error`。 - -- [x] 扩展 `backend/app/agents/state.py` 的运行时字段 - 改造内容:新增 `execution_mode`、`verification_status`、`verification_summary`、`verification_evidence`、`active_tasks`、`task_results`、`event_trace`、`budget_state`;默认值保持兼容 `initial_state()`,不替换现有 `pending_tasks`、`completed_tasks`、`tool_calls`。 - -- [x] 扩展 capability / tool metadata 模型 - 改造内容:在 `backend/app/agents/registry/models.py` 增加 `permission_class`、`side_effect_scope`、`supports_retry`、`idempotent`、`safe_for_parallel_use`、`requires_confirmation`;至少先固化 `read` / `write` / `external` 和 `none` / `local_state` / `db_write` / `network` 两组枚举语义。 - -- [x] 回填 builtin tools 的静态 metadata - 改造内容:在 `backend/app/agents/registry/builtins.py` 和需要的 `backend/app/agents/tools/__init__.py` 中,把 search / retrieval 类工具标成偏 `read`,create / update 类工具标成偏 `write`,外部检索类工具标成 `external`,并补充是否可重试、是否幂等、是否适合并行等标记。 - -- [x] 新增 verifier 角色定义 - 改造内容:在 `backend/app/agents/prompts.py` 增加 verifier prompt,明确 verifier 只负责验收,不负责重新规划;验收点聚焦“是否真正满足请求”“是否有明确证据”“是否把失败伪装成成功”。 - -- [x] 落地 verifier 模块 - 改造内容:新增 `backend/app/agents/verifier.py`,支持 `passed`、`failed`、`skipped` 三类最小结论,先服务于工具调用后的复杂输出、知识检索结果和分析型汇总输出,不接管纯闲聊路径。 - -- [x] 在 `backend/app/agents/graph.py` 接入最小 event trace 与 verifier helper - 改造内容:给 `_execute_tool_calls()` 增加 tool start / result / error 事件写入;给收尾阶段增加 verifier helper 调用;给 `_run_sub_commander()` 增加 task result 摘要写入,但暂时不重构主图为完整协作编排图。 - -- [x] 补 Phase 1 单元测试与回归测试 - 改造内容:新增 `backend/tests/backend/app/agents/test_agent_schemas.py`、`backend/tests/backend/app/agents/test_verifier.py`,并扩展 `test_graph.py`,覆盖 state 兼容性、schema 合法性、tool metadata 存在性、verifier 判定、主流程不回退。 - -- [x] 完成 Day 1 验收 - 改造内容:确认 reminder / task / search 主流程继续通过;确认 verifier 已能独立运行;确认 event schema 与 task schema 已落代码;确认 direct 仍是默认主路径;确认未引入动态 `create_agent`、message bus 全链路和 UI。 - ---- - -## Day 2:引入最小协作能力,完成 Phase 2 雏形 - -Day 2 目标:在 Day 1 底座稳定的基础上,给 Jarvis 增加“复杂请求可拆分、可分配、可回收、可验收”的最小受控协作能力,但仍然不进入自由 swarm。 - -- [ ] 增加 `request_mode_selector` - 改造内容:在 `backend/app/agents/graph.py` 中增加 direct / collaboration 模式选择逻辑;简单请求继续走旧路径,只有明显多步骤、跨领域、需要多角色配合的请求才进入 collaboration mode。 - -- [ ] 新增 coordinator prompt - 改造内容:在 `backend/app/agents/prompts.py` 中定义 coordinator 角色,职责限定为“判断是否拆解”“输出 2~4 个清晰子任务”“分配角色建议”“汇总任务结果”;明确禁止无限递归拆分。 - -- [ ] 新增最小 task decomposition 结构 - 改造内容:基于 Day 1 的 task schema 扩展最小拆分结构,至少输出 `task_id`、`title`、`role`、`goal`、`expected_evidence`,让复杂请求能以结构化任务列表进入后续执行。 - -- [ ] 增加 role -> existing agent assignment - 改造内容:先复用当前已有 top-level agent,不新增独立 worker runtime;把 schedule 类任务映射给 `schedule_planner`,retrieval 类任务映射给 `librarian`,analysis 类任务映射给 `analyst`,execution 类任务映射给 `executor`。 - -- [ ] 建立统一 task result 回收结构 - 改造内容:约束每个角色统一返回 `task_id`、`status`、`summary`、`evidence`、`next_action`(可选),并把结果写回 `task_results`,避免最终结果继续依赖单点硬编码拼接。 - -- [ ] 让 verifier 强制参与协作结果收尾 - 改造内容:在 collaboration mode 下,所有复杂请求返回前都必须经过 verifier;verifier 有权拒绝证据不足、结果不完整、子任务未闭环的响应。 - -- [ ] 补 Phase 2 协作测试与回归测试 - 改造内容:覆盖复杂请求拆分测试、角色分配测试、task result 汇总测试、verifier 拒绝不完整结果测试,并再次确认 direct 模式原有流程不回退。 - -- [ ] 完成 Day 2 验收 - 改造内容:确认 graph 已能区分 direct / collaboration;确认复杂请求可拆成 2~4 个子任务;确认每个子任务有 owner 和 evidence;确认最终答案基于 task result 汇总;确认系统仍未进入无限动态 agent 模式。 - ---- - -## 这 2 天明确不做 - -- 不做动态 `create_agent` -- 不做 parent / child agent tree -- 不做内部消息线程长期态管理 -- 不做可视化调试面板 -- 不做 event stream API -- 不做 worktree / 隔离执行 -- 不做自由蜂群式协作 - ---- - -## 2 天结束后的预期状态 - -- [ ] 已具备 `direct` / `collaboration` 双模式入口 -- [ ] 已具备 verifier 独立验收层 -- [ ] 已具备 task schema / event schema / tool metadata 底座 -- [ ] 已具备 coordinator 雏形、任务拆分、角色分配、结果回收 -- [ ] 当前 reminder / task / search 主路径无明显回退 -- [ ] 后续可以继续推进 Phase 3 的受限动态协作,而不是返工 Phase 1 / Phase 2 底座 diff --git a/development-doc/plan/agent-update/phase-6-10-checklist.md b/development-doc/plan/agent-update/phase-6-10-checklist.md index 97ff824..bcbe2fe 100644 --- a/development-doc/plan/agent-update/phase-6-10-checklist.md +++ b/development-doc/plan/agent-update/phase-6-10-checklist.md @@ -150,14 +150,14 @@ - [x] 创建内存版 PluginMarketplace (in-memory) - [x] 实现 search() — GET `/api/marketplace/plugins` - [x] 实现 get_plugin() — GET `/api/marketplace/plugins/{id}` -- [ ] 实现 download_plugin() +- [x] 实现 download_plugin() — POST `/api/marketplace/plugins/{id}/download` ### 8.5 内置插件 -- [ ] 创建 `plugins/builtins/code_helper/` — lint, format, explain_code -- [ ] 创建 `plugins/builtins/git_helper/` — git_status, git_log, git_diff -- [ ] 创建 `plugins/builtins/web_helper/` — fetch_url, parse_html -- [ ] 创建 `plugins/builtins/file_organizer/` — organize_files, cleanup_duplicates +- [x] 创建 `plugins/builtins/code_helper/` — lint, format, explain_code +- [x] 创建 `plugins/builtins/git_helper/` — git_status, git_log, git_diff +- [x] 创建 `plugins/builtins/web_helper/` — fetch_url, parse_html +- [x] 创建 `plugins/builtins/file_organizer/` — organize_files, cleanup_duplicates ### 8.6 API @@ -181,7 +181,7 @@ - [x] 插件的工具和 Hook 正确注册 - [x] 插件的工具和 Hook 正确注销 - [x] 插件无法访问未授权资源 -- [ ] 插件加载时间 < 1s +- [x] 插件加载时间 < 1s (built-in plugins) --- @@ -214,16 +214,16 @@ ### 9.4 内置 Skills -- [ ] 创建 `backend/app/agents/skills/bundled.py` — BUNDLED_SKILLS -- [ ] 实现 code-analysis skill -- [ ] 实现 git-helper skill -- [ ] 实现 web-research skill -- [ ] 实现 file-management skill -- [ ] 实现 task-planning skill +- [x] 创建 `backend/app/agents/skills/bundled.py` — BUNDLED_SKILLS +- [x] 实现 code-analysis skill +- [x] 实现 git-helper skill +- [x] 实现 web-research skill +- [x] 实现 file-management skill +- [x] 实现 task-planning skill ### 9.5 Agent 集成 -- [ ] AgentService.build_skill_context() +- [x] AgentService.build_skill_context() - [ ] Skill 上下文注入 Agent prompt - [ ] Skill 触发检测 @@ -248,7 +248,7 @@ - [x] 能加载 local_skills_dir 下的所有 SKILL.md - [x] 能从 MCP 服务器发现和加载 Skills -- [ ] 内置 Skills 默认加载 +- [x] 内置 Skills 默认加载 - [ ] Skill 内容正确注入 Agent prompt --- @@ -271,7 +271,7 @@ ### 10.2 远程传输层 -- [ ] 创建 `backend/app/agents/transport/structured_io.py` — StructuredIO +- [x] 创建 `backend/app/agents/transport/structured_io.py` — StructuredIO - [x] 创建 `backend/app/agents/transport/remote.py` — RemoteTransport - [x] 实现 send_response() - [x] 实现 send_event() @@ -292,8 +292,8 @@ ### 10.4 后台任务系统 - [x] 创建 `backend/app/agents/background/manager.py` — BackgroundTaskManager -- [ ] 创建 `backend/app/agents/background/scheduler.py` -- [ ] 创建 `backend/app/agents/background/executor.py` +- [x] 创建 `backend/app/agents/background/scheduler.py` +- [x] 创建 `backend/app/agents/background/executor.py` - [x] 实现 submit_task() - [x] 实现 cancel_task() - [x] 实现 get_task_status() @@ -301,7 +301,7 @@ ### 10.5 协调整合 -- [ ] 创建/修改 `backend/app/agents/coordinator.py` +- [x] 创建/修改 `backend/app/agents/coordinator.py` - [ ] Team 协作与现有 graph 集成 - [ ] 远程传输与现有 service 集成 @@ -327,7 +327,7 @@ - [x] 可以创建和管理 Agent 团队 - [x] 任务能正确分配给合适的成员 - [x] 能收集和聚合多成员的结果 -- [ ] 支持结构化的输入输出格式 +- [x] 支持结构化的输入输出格式 - [x] 支持远程 Agent 通信 - [x] 支持复杂的会话层级和状态管理 - [x] 支持定时和异步后台任务 diff --git a/development-doc/plan/code-update/README.md b/development-doc/plan/code-update/README.md new file mode 100644 index 0000000..631723e --- /dev/null +++ b/development-doc/plan/code-update/README.md @@ -0,0 +1,171 @@ +# 代码指挥官 (Code Commander) 实施计划索引 + +本目录用于存放代码指挥官模块的分阶段规划文档。 + +## 文档说明 + +| 文件 | 说明 | +|------|------| +| `README.md` | 总览、阶段关系、实施顺序 | +| `phase-1-infrastructure.md` | 基础设施:State、Prompt、注册 | +| `phase-2-execution-engine.md` | 执行引擎:AI Adapter、沙盒、直接执行 | +| `phase-3-agent-integration.md` | Agent 集成:Graph 节点、边路由 | +| `phase-4-streaming-interaction.md` | 流式交互:PTY 终端、WebSocket | +| `phase-5-frontend-integration.md` | 前端集成:Vue 组件、xterm.js | + +## 推荐阅读顺序 + +1. 先阅读本 README 了解整体架构 +2. 再按顺序阅读 phase 1 ~ phase 5 +3. 实施时严格按阶段推进 + +--- + +## 总体设计原则 + +1. **用户选择式交互** - 不是自动分流,用户显式选择 AI 提供商 +2. **安全分级执行** - 低风险直接执行,高风险沙盒隔离 +3. **流式终端体验** - 实时显示 AI 执行过程,支持用户交互 +4. **临时目录隔离** - 每个任务在独立临时目录执行,执行后清理 + +--- + +## 阶段总览图 + +``` +Phase 1 ──────────────────────────────────────────────────────────────┐ +│ 基础设施 (Infrastructure) │ +│ - State 定义 │ +│ - Prompt 模板 │ +│ - 工具注册 │ +│ - Agent 注册 │ +│ │ +│ 核心文件: state.py, prompts.py, tools/__init__.py, builtins.py │ +└────────────────────────────────────────────────────────────────────┘ + │ + ▼ +Phase 2 ──────────────────────────────────────────────────────────────┐ +│ 执行引擎 (Execution Engine) │ +│ - AI CLI Adapter (统一接口) │ +│ - Sandbox Executor │ +│ - Direct Executor │ +│ - Security Classifier │ +│ │ +│ 核心文件: ai_adapter.py, sandbox_executor.py, direct_executor.py, │ +│ security_classifier.py │ +└────────────────────────────────────────────────────────────────────┘ + │ + ▼ +Phase 3 ──────────────────────────────────────────────────────────────┐ +│ Agent 集成 (Agent Integration) │ +│ - Graph 节点 │ +│ - 边路由 │ +│ - 任务模型 │ +│ │ +│ 核心文件: graph.py, schemas/task.py │ +└────────────────────────────────────────────────────────────────────┘ + │ + ▼ +Phase 4 ──────────────────────────────────────────────────────────────┐ +│ 流式交互 (Streaming Interaction) │ +│ - PTY 终端 │ +│ - WebSocket 端点 │ +│ - 流式输出集成 │ +│ - 交互输入 │ +│ │ +│ 核心文件: terminal_engine.py, routers/terminal.py, stream_output.py │ +└────────────────────────────────────────────────────────────────────┘ + │ + ▼ +Phase 5 ──────────────────────────────────────────────────────────────┐ +│ 前端集成 (Frontend Integration) │ +│ - 页面组件 │ +│ - 终端显示组件 │ +│ - WebSocket 服务 │ +│ - 路由配置 │ +│ │ +│ 核心文件: CodeCommander.vue, TerminalDisplay.vue, terminalWs.ts │ +└────────────────────────────────────────────────────────────────────┘ +``` + +--- + +## 架构概览 + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Vue 前端 │ +│ [用户选择: Claude/Gemini/Codex/OpenCode] + [输入需求] │ +└────────────────────────┬────────────────────────────────────┘ + │ WebSocket 流式输出 + ▼ +┌─────────────────────────────────────────────────────────────┐ +│ FastAPI 后端 │ +│ ┌─────────────────────────────────────────────────────┐ │ +│ │ 代码指挥官 (Code Commander Agent) │ │ +│ │ 1. 接收 AI 类型 + 用户需求 │ │ +│ │ 2. 安全分级判定 │ │ +│ │ 3. 路由到对应执行器 │ │ +│ └─────────────────────────────────────────────────────┘ │ +│ │ │ +│ ┌──────────────────┼──────────────────┐ │ +│ ▼ ▼ ▼ │ +│ ┌────────────┐ ┌────────────┐ ┌────────────┐ │ +│ │ 直接执行器 │ │ 沙盒执行器 │ │ 终端引擎 │ │ +│ │(低风险任务) │ │(高风险任务) │ │ PTY + 流式 │ │ +│ └────────────┘ └────────────┘ └────────────┘ │ +└────────────────────────┬────────────────────────────────────┘ + │ subprocess 调用 + ▼ +┌─────────────────────────────────────────────────────────────┐ +│ CLI 进程 (claude/gemini/codex/opencode) │ +│ 在临时目录中执行 │ +└─────────────────────────────────────────────────────────────┘ +``` + +--- + +## Demo 项目借鉴映射 + +| Demo 项目 | 主要借鉴点 | 对应 Phase | +|---------|-----------|-----------| +| **golutra** | PTY 终端、多 CLI 适配、工作流隔离 | Phase 2, 4 | +| **golutra CLI** | LocalSocket IPC、命令分发 | Phase 2 | +| **golutra Shim** | 进程启动、信号处理 | Phase 2 | + +--- + +## 实施顺序 + +``` +Phase 1 → Phase 2 → Phase 3 → Phase 4 → Phase 5 + │ │ │ │ │ + │ │ │ │ └── 前端 UI + 路由 + │ │ │ └── PTY + WebSocket + │ │ └── Graph 节点 + 边路由 + │ └── AI Adapter + Sandbox + └── State + Prompt + 注册 +``` + +--- + +## 文件变更追踪 + +| Phase | 新增文件 | 修改文件 | +|-------|---------|---------| +| Phase 1 | `tools/__init__.py` (改) | `state.py`, `prompts.py`, `registry/builtins.py` | +| Phase 2 | `ai_adapter.py`, `sandbox_executor.py`, `direct_executor.py`, `security_classifier.py` | - | +| Phase 3 | `schemas/task.py` (改) | `graph.py` | +| Phase 4 | `terminal_engine.py`, `routers/terminal.py`, `stream_output.py`, `interactive_input.py` | - | +| Phase 5 | `CodeCommander.vue`, `TerminalDisplay.vue`, `terminalWs.ts` | `router/index.ts` | + +--- + +## 注意事项 + +| 注意事项 | 说明 | +|---------|------| +| 不要跳过 Phase | 每个阶段都是下一个的基础 | +| AI CLI 前置检查 | 确保服务器上已安装对应 CLI | +| 临时目录及时清理 | 防止磁盘空间泄漏 | +| WebSocket 重连 | 前端实现自动重连机制 | diff --git a/development-doc/plan/code-update/checklist.md b/development-doc/plan/code-update/checklist.md new file mode 100644 index 0000000..d882525 --- /dev/null +++ b/development-doc/plan/code-update/checklist.md @@ -0,0 +1,215 @@ +# 代码指挥官实施清单(可勾选执行版) + +日期:2026-04-04 +状态:执行清单 +适用范围:基于 `phase-1` ~ `phase-5` 整理 + +--- + +## 使用说明 + +- 完成前使用 `- [ ]` +- 完成后改成 `- [x]` +- Day 1-3 为后端基础设施 +- Day 4-5 为后端执行引擎 +- Day 6 为 Agent 集成 +- Day 7-8 为流式交互 +- Day 9-10 为前端集成 + +--- + +## Day 1:State + Prompt + 注册 + +Day 1 目标:完成代码指挥官 Agent 的基础架子 + +- [ ] 新增 `CODE_COMMANDER = "code_commander"` 到 `AgentRole` 枚举 +- [ ] 新增 `CodeCommanderState` TypedDict(包含 task_type, ai_provider, sandbox_mode 等) +- [ ] 新增 `CODE_COMMANDER_SYSTEM_PROMPT` 系统提示 +- [ ] 新增 `SANDBOX_EXECUTION_PROMPT` 沙盒执行说明 +- [ ] 新增 `DIRECT_EXECUTION_PROMPT` 直接执行说明 +- [ ] 在 `SUB_COMMANDER_TOOLSETS` 中注册 `CODE_COMMANDER_TOOLSET` +- [ ] 新增 `CodeCommanderManifest` 到 `AGENT_MANIFESTS` +- [ ] 补 Phase 1 单元测试 + +**验收:确认 `AgentRole.CODE_COMMANDER` 存在且值正确** + +--- + +## Day 2:AI CLI Adapter(统一接口) + +Day 2 目标:实现适配不同 AI CLI 的统一接口 + +- [ ] 新增 `AICLIAdapter` 抽象基类 + - `cli_name` 属性 + - `requires_workspace` 属性 + - `build_command()` 方法 + - `parse_output()` 方法 + - `is_installed()` 方法 +- [ ] 新增 `ClaudeAdapter` 实现 +- [ ] 新增 `GeminiAdapter` 实现 +- [ ] 新增 `CodexAdapter` 实现 +- [ ] 新增 `OpenCodeAdapter` 实现 +- [ ] 新增 `CodeExecutionResult` 数据类 +- [ ] 补 Day 2 单元测试 + +**验收:`AICLIAdapter` 可以正确识别 4 种 CLI** + +--- + +## Day 3:Security Classifier + Direct Executor + +Day 3 目标:实现安全分级和直接执行器 + +- [ ] 新增 `RiskLevel` 枚举(LOW/HIGH) +- [ ] 新增 `SecurityClassifier` 类 + - `HIGH_RISK_KEYWORDS` 列表 + - `LOW_RISK_KEYWORDS` 列表 + - `classify()` 方法实现 + - `_is_project_path()` 方法实现 +- [ ] 新增 `DirectExecutor` 类 + - `execute()` 方法(异步) + - 超时控制 + - `is_installed()` 检查 +- [ ] 补 Day 3 单元测试 + +**验收:`SecurityClassifier` 能正确分类高低风险** + +--- + +## Day 4:Sandbox Environment + Sandbox Executor + +Day 4 目标:实现沙盒执行器 + +- [ ] 新增 `SandboxEnvironment` 类 + - `create()` 静态方法(创建临时目录) + - `cleanup()` 方法 + - `workspace_path` 属性 + - `session_id` 属性 +- [ ] 新增 `SandboxExecutor` 类 + - `execute()` 方法(异步,yield 流式输出) + - `cleanup_session()` 方法 + - `_list_created_files()` 方法 +- [ ] 实现超时控制 +- [ ] 补 Day 4 单元测试 + +**验收:`SandboxExecutor` 能创建、执行、清理沙盒** + +--- + +## Day 5:执行引擎集成测试 + +Day 5 目标:确保执行引擎各组件协同工作 + +- [ ] 集成测试:`SecurityClassifier` + `DirectExecutor` +- [ ] 集成测试:`SecurityClassifier` + `SandboxExecutor` +- [ ] 集成测试:4 种 `AICLIAdapter` 的 `build_command()` +- [ ] 端到端测试:低风险任务直接执行 +- [ ] 端到端测试:高风险任务沙盒执行 +- [ ] 确认沙盒目录创建和清理正常 + +**验收:所有执行器支持流式输出,且正确路由** + +--- + +## Day 6:Graph 节点 + 边路由 + +Day 6 目标:将代码指挥官接入 LangGraph + +- [ ] 新增 `code_commander_node` 函数 + - 获取用户需求和 AI 提供商 + - 调用 `SecurityClassifier` + - 根据风险等级选择执行器 + - 返回执行结果 +- [ ] 在 `NODES` 字典中注册 `code_commander` +- [ ] 新增 `_should_route_to_code_commander()` 路由函数 +- [ ] 在 `graph.py` 中添加条件边 +- [ ] 新增 `CodeTask`, `CodeExecutionResult` 模型到 `schemas/task.py` +- [ ] 补 Day 6 单元测试 + +**验收:高风险任务路由到沙盒,低风险路由到直接执行** + +--- + +## Day 7:PTY Terminal Engine + +Day 7 目标:实现 PTY 终端管理 + +- [ ] 新增 `PTYSession` 数据类 +- [ ] 新增 `PTYManager` 类 + - `spawn()` 方法 + - `write()` 方法 + - `read()` 方法(异步生成器) + - `resize()` 方法 + - `kill()` 方法 +- [ ] 实现 `asyncio.subprocess` 进程管理 +- [ ] 实现输出队列 +- [ ] 补 Day 7 单元测试 + +**验收:PTY 会话可以启动、读写、终止** + +--- + +## Day 8:WebSocket + 流式输出 + +Day 8 目标:实现 WebSocket 端点和流式输出 + +- [ ] 新增 `ConnectionManager` 类 +- [ ] 新增 `/ws/terminal/{session_id}` WebSocket 端点 +- [ ] 实现连接管理(connect/disconnect) +- [ ] 新增 `StreamOutput` 类 +- [ ] 实现 `stream_execution()` 方法 +- [ ] 新增 `InteractiveInputHandler` 类 +- [ ] 实现用户输入传递到 PTY +- [ ] 补 Day 8 集成测试 + +**验收:WebSocket 连接正常,输出实时推送** + +--- + +## Day 9:Vue 页面组件 + +Day 9 目标:前端代码指挥官主页面 + +- [ ] 新增 `CodeCommander.vue` 页面组件 + - AI 提供商选择器 + - 任务输入框 + - 执行按钮 + - 终端显示区域 + - 交互输入框 + - 下载/清理按钮 +- [ ] 补 Day 9 组件测试 + +**验收:用户可以选择 AI 提供商并输入任务** + +--- + +## Day 10:TerminalDisplay + WebSocket 服务 + 路由 + +Day 10 目标:完成前端集成 + +- [ ] 新增 `TerminalDisplay.vue` 组件(xterm.js) + - 终端渲染 + - ANSI 颜色支持 + - 用户输入处理 +- [ ] 新增 `terminalWs.ts` WebSocket 服务 + - 连接管理 + - 自动重连 + - 消息处理 +- [ ] 在 `router/index.ts` 新增 `/code-commander` 路由 +- [ ] 端到端测试:完整执行流程 +- [ ] 确认前端与后端 WebSocket 通信正常 + +**验收:用户可以在前端看到实时终端输出并交互** + +--- + +## 最终验收 + +- [ ] 用户可以选择 AI 提供商(Claude/Gemini/Codex/OpenCode) +- [ ] 低风险任务(如贪食蛇 demo)直接执行 +- [ ] 高风险任务在临时目录沙盒执行 +- [ ] 终端输出实时流式显示 +- [ ] 用户可以中途输入交互(如 "y" 确认) +- [ ] 临时目录执行后正确清理 +- [ ] 前端页面正常展示 +- [ ] 回归测试通过(现有功能不受影响) diff --git a/development-doc/plan/code-update/phase-1-infrastructure.md b/development-doc/plan/code-update/phase-1-infrastructure.md new file mode 100644 index 0000000..49e3824 --- /dev/null +++ b/development-doc/plan/code-update/phase-1-infrastructure.md @@ -0,0 +1,152 @@ +# Phase 1:基础设施 + +日期:2026-04-04 +状态:待实施 + +--- + +## 1. 本阶段目的 + +新增代码指挥官 Agent 的基础架子,包括: +- State 定义(角色、状态) +- Prompt 模板 +- 工具注册 +- Agent 注册 + +--- + +## 2. 详细任务 + +### 2.1 State 定义 + +**文件**: `backend/app/agents/state.py` + +```python +# 新增 AgentRole +class AgentRole(str, Enum): + # ... 现有角色 ... + CODE_COMMANDER = "code_commander" + +# 新增 CodeCommanderState +class CodeCommanderState(TypedDict): + task_type: str # "demo" | "project" | "modification" + ai_provider: str # "claude" | "gemini" | "codex" | "opencode" + sandbox_mode: bool # True = 沙盒执行,False = 直接执行 + workspace_path: str | None # 临时工作目录 + execution_session_id: str | None # PTY 会话 ID +``` + +### 2.2 Prompt 模板 + +**文件**: `backend/app/agents/prompts.py` + +```python +# 代码指挥官系统提示 +CODE_COMMANDER_SYSTEM_PROMPT = """你是一个代码指挥官,负责协调 AI 写代码助手。 + +你的职责: +1. 接收用户选择的 AI 提供商(Claude/Gemini/Codex/OpenCode) +2. 接收用户的写代码需求 +3. 进行安全分级判定 +4. 路由到合适的执行器 + +安全分级规则: +- 低风险:demo、示例、贪食蛇游戏等独立项目 +- 高风险:修改现有项目、涉及 Jarvis 项目、路径操作等 + +执行模式: +- 直接执行:低风险任务,直接运行 +- 沙盒执行:高风险任务,在临时目录隔离执行""" + +# 沙盒执行说明 +SANDBOX_EXECUTION_PROMPT = """将在隔离的临时目录中执行任务。 +任务完成后,工作目录会被保留供下载。""" + +# 直接执行说明 +DIRECT_EXECUTION_PROMPT = """将直接执行任务。 +如果需要交互,请等待用户输入。""" +``` + +### 2.3 工具注册 + +**文件**: `backend/app/agents/tools/__init__.py` + +```python +# 新增工具集 +CODE_COMMANDER_TOOLSET = { + "code_commander": [ + "execute_code_task", + "get_execution_status", + "send_interactive_input", + "download_workspace", + "cleanup_workspace", + ] +} + +# 在 SUB_COMMANDER_TOOLSETS 中添加 +SUB_COMMANDER_TOOLSETS: dict[str, list[str]] = { + # ... 现有工具集 ... + "code_commander": CODE_COMMANDER_TOOLSET["code_commander"], +} +``` + +### 2.4 Agent 注册 + +**文件**: `backend/app/agents/registry/builtins.py` + +```python +# 新增 CodeCommanderManifest +CodeCommanderManifest = AgentManifest( + id="code_commander", + name="代码指挥官", + description="协调 AI 写代码助手的指挥官", + system_prompt=CODE_COMMANDER_SYSTEM_PROMPT, + role=AgentRole.CODE_COMMANDER, + sub_commanders=[], # 代码指挥官没有子指挥官 + tools=["execute_code_task", "get_execution_status", + "send_interactive_input", "download_workspace", "cleanup_workspace"], + permission_class=PermissionClass.HIGH, # 需要较高权限 + side_effect_scope=SideEffectScope.WORKSPACE, + supports_retry=True, + idempotent=False, + safe_for_parallel_use=False, + requires_confirmation=True, +) + +# 注册到 AGENT_MANIFESTS +AGENT_MANIFESTS: dict[str, AgentManifest] = { + # ... 现有 agent ... + "code_commander": CodeCommanderManifest, +} +``` + +--- + +## 3. 核心文件清单 + +| 文件 | 操作 | 说明 | +|------|------|------| +| `state.py` | 修改 | 新增 `CODE_COMMANDER` 角色和 `CodeCommanderState` | +| `prompts.py` | 修改 | 新增三个 prompt 常量 | +| `tools/__init__.py` | 修改 | 新增工具集注册 | +| `registry/builtins.py` | 修改 | 新增 `CodeCommanderManifest` | + +--- + +## 4. 验收标准 + +- [ ] `AgentRole.CODE_COMMANDER` 存在且值正确 +- [ ] `CODE_COMMANDER_SYSTEM_PROMPT` 包含完整指令 +- [ ] 工具集已注册且可通过 `SUB_COMMANDER_TOOLSETS` 访问 +- [ ] `CodeCommanderManifest` 已注册且包含所有必要字段 + +--- + +## 5. 依赖关系 + +``` +本阶段 → Phase 2(执行引擎) + → Phase 3(Agent 集成) +``` + +本阶段是后续所有阶段的基础。 diff --git a/development-doc/plan/code-update/phase-2-execution-engine.md b/development-doc/plan/code-update/phase-2-execution-engine.md new file mode 100644 index 0000000..c90fa9a --- /dev/null +++ b/development-doc/plan/code-update/phase-2-execution-engine.md @@ -0,0 +1,321 @@ +# Phase 2:执行引擎 + +日期:2026-04-04 +状态:待实施 + +依赖:Phase 1 完成 + +--- + +## 1. 本阶段目的 + +实现代码指挥官的核心执行能力: +- AI CLI Adapter:统一接口适配不同 AI CLI +- Sandbox Executor:沙盒环境执行 +- Direct Executor:直接执行低风险任务 +- Security Classifier:安全分级 + +--- + +## 2. 详细任务 + +### 2.1 AI CLI Adapter + +**新文件**: `backend/app/agents/tools/ai_adapter.py` + +```python +from abc import ABC, abstractmethod +from pathlib import Path +from dataclasses import dataclass + +@dataclass +class CodeExecutionResult: + success: bool + message: str + files_created: list[str] + output: str + error: str | None + +class AICLIAdapter(ABC): + @property + @abstractmethod + def cli_name(self) -> str: + """CLI 命令名称,如 'claude', 'gemini'""" + pass + + @property + @abstractmethod + def requires_workspace(self) -> bool: + """是否需要工作目录""" + pass + + @abstractmethod + def build_command(self, prompt: str, workspace: Path | None) -> list[str]: + """构建 CLI 命令""" + pass + + @abstractmethod + def parse_output(self, output: str) -> CodeExecutionResult: + """解析 CLI 输出""" + pass + + @abstractmethod + def is_installed(self) -> bool: + """检查 CLI 是否已安装""" + pass + +class ClaudeAdapter(AICLIAdapter): + cli_name = "claude" + requires_workspace = True + + def build_command(self, prompt: str, workspace: Path | None) -> list[str]: + return ["claude", "-p", prompt, "--dangerously-skip-permissions"] + + # ... 其他方法实现 + +class GeminiAdapter(AICLIAdapter): + cli_name = "gemini" + requires_workspace = False + # ... + +class CodexAdapter(AICLIAdapter): + cli_name = "codex" + # ... + +class OpenCodeAdapter(AICLIAdapter): + cli_name = "opencode" + # ... +``` + +### 2.2 Security Classifier + +**新文件**: `backend/app/agents/tools/security_classifier.py` + +```python +from enum import Enum + +class RiskLevel(Enum): + LOW = "low" # 直接执行 + HIGH = "high" # 沙盒执行 + +class SecurityClassifier: + HIGH_RISK_KEYWORDS = [ + "修改", "编辑", "删除", "移动", + "Jarvis", "backend", "frontend", + "git", "config", ".env", + ] + + LOW_RISK_KEYWORDS = [ + "demo", "示例", "贪食蛇", "俄罗斯方块", + "小游戏", "独立项目", "新项目", + "创建一个", "写一个", + ] + + def classify(self, task_description: str, target_path: str | None = None) -> RiskLevel: + # 1. 检查高风险关键词 + if any(kw in task_description for kw in self.HIGH_RISK_KEYWORDS): + return RiskLevel.HIGH + + # 2. 检查目标路径 + if target_path and self._is_project_path(target_path): + return RiskLevel.HIGH + + # 3. 检查低风险关键词 + if any(kw in task_description for kw in self.LOW_RISK_KEYWORDS): + return RiskLevel.LOW + + # 4. 默认高风险 + return RiskLevel.HIGH + + def _is_project_path(self, path: str) -> bool: + # 检查是否指向 Jarvis 项目路径 + return "Jarvis" in path or "backend/app" in path +``` + +### 2.3 Sandbox Executor + +**新文件**: `backend/app/agents/tools/sandbox_executor.py` + +```python +import tempfile +import shutil +import asyncio +from pathlib import Path +from dataclasses import dataclass, field +from typing import AsyncGenerator + +@dataclass +class SandboxEnvironment: + workspace_path: Path + session_id: str + + @staticmethod + async def create() -> "SandboxEnvironment": + """创建新的沙盒环境""" + temp_dir = tempfile.mkdtemp(prefix="jarvis_code_") + session_id = Path(temp_dir).name + return SandboxEnvironment( + workspace_path=Path(temp_dir), + session_id=session_id, + ) + + async def cleanup(self): + """清理沙盒环境""" + if self.workspace_path.exists(): + shutil.rmtree(self.workspace_path) + +@dataclass +class ExecutionResult: + success: bool + exit_code: int + stdout: str + stderr: str + files_created: list[str] = field(default_factory=list) + +class SandboxExecutor: + def __init__(self, adapter: AICLIAdapter, timeout: int = 300): + self.adapter = adapter + self.timeout = timeout + self._sessions: dict[str, SandboxEnvironment] = {} + + async def execute( + self, + prompt: str, + session_id: str | None = None + ) -> AsyncGenerator[str, None]: + """执行代码任务,yield 实时输出""" + # 1. 创建或复用沙盒环境 + if session_id and session_id in self._sessions: + env = self._sessions[session_id] + else: + env = await SandboxEnvironment.create() + self._sessions[env.session_id] = env + session_id = env.session_id + + # 2. 构建命令 + cmd = self.adapter.build_command(prompt, env.workspace_path) + + # 3. 异步执行,实时 yield 输出 + process = await asyncio.create_subprocess_exec( + *cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + cwd=str(env.workspace_path), + ) + + # 4. 实时读取输出 + while True: + line = await process.stdout.readline() + if not line: + break + yield line.decode() + + # 5. 等待完成 + await process.wait() + + # 6. 收集结果 + return ExecutionResult( + success=process.returncode == 0, + exit_code=process.returncode or 0, + stdout=..., + stderr=..., + files_created=self._list_created_files(env.workspace_path), + ) + + async def cleanup_session(self, session_id: str): + """清理指定会话""" + if session_id in self._sessions: + await self._sessions[session_id].cleanup() + del self._sessions[session_id] +``` + +### 2.4 Direct Executor + +**新文件**: `backend/app/agents/tools/direct_executor.py` + +```python +class DirectExecutor: + def __init__(self, adapter: AICLIAdapter, timeout: int = 60): + self.adapter = adapter + self.timeout = timeout + + async def execute(self, prompt: str) -> ExecutionResult: + """直接执行,不需要沙盒""" + if not self.adapter.is_installed(): + return ExecutionResult( + success=False, + exit_code=-1, + stdout="", + stderr=f"{self.adapter.cli_name} is not installed", + ) + + cmd = self.adapter.build_command(prompt, None) + + process = await asyncio.create_subprocess_exec( + *cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + + try: + stdout, stderr = await asyncio.wait_for( + process.communicate(), + timeout=self.timeout, + ) + return ExecutionResult( + success=process.returncode == 0, + exit_code=process.returncode or 0, + stdout=stdout.decode(), + stderr=stderr.decode(), + ) + except asyncio.TimeoutError: + process.kill() + return ExecutionResult( + success=False, + exit_code=-1, + stdout="", + stderr=f"Execution timed out after {self.timeout}s", + ) +``` + +--- + +## 3. 核心文件清单 + +| 文件 | 操作 | 说明 | +|------|------|------| +| `ai_adapter.py` | 新增 | 抽象基类 + 4 个具体实现 | +| `security_classifier.py` | 新增 | 安全分级器 | +| `sandbox_executor.py` | 新增 | 沙盒执行器 | +| `direct_executor.py` | 新增 | 直接执行器 | + +--- + +## 4. 验收标准 + +- [ ] `AICLIAdapter` 可以正确识别 4 种 CLI +- [ ] `SecurityClassifier` 能正确分类高低风险 +- [ ] `SandboxExecutor` 能创建、执行、清理沙盒 +- [ ] `DirectExecutor` 能直接执行低风险任务 +- [ ] 所有执行器支持流式输出 + +--- + +## 5. 风险与缓解 + +| 风险 | 缓解 | +|------|------| +| AI CLI 未安装 | `is_installed()` 检查 + 友好提示 | +| 执行超时 | `timeout` 参数控制 | +| 沙盒清理遗漏 | 使用 `finally` 块确保清理 | + +--- + +## 6. 依赖关系 + +``` +Phase 1(基础设施) + ↓ +本阶段 → Phase 3(Agent 集成) + → Phase 4(流式交互) +``` diff --git a/development-doc/plan/code-update/phase-3-agent-integration.md b/development-doc/plan/code-update/phase-3-agent-integration.md new file mode 100644 index 0000000..3083701 --- /dev/null +++ b/development-doc/plan/code-update/phase-3-agent-integration.md @@ -0,0 +1,162 @@ +# Phase 3:Agent 集成 + +日期:2026-04-04 +状态:待实施 + +依赖:Phase 1 + Phase 2 完成 + +--- + +## 1. 本阶段目的 + +将代码指挥官接入 LangGraph: +- Graph 节点 +- 边路由 +- 任务模型 + +--- + +## 2. 详细任务 + +### 2.1 Graph 节点 + +**文件**: `backend/app/agents/graph.py` + +```python +# 新增 code_commander_node +async def code_commander_node(state: AgentState) -> AgentState: + """代码指挥官节点""" + # 1. 获取用户需求和选择的 AI 提供商 + user_message = state.messages[-1].content + ai_provider = state.get("ai_provider", "claude") + + # 2. 安全分级 + classifier = SecurityClassifier() + risk_level = classifier.classify(user_message) + + # 3. 根据风险等级选择执行器 + adapter = get_adapter(ai_provider) + if risk_level == RiskLevel.LOW: + executor = DirectExecutor(adapter) + result = await executor.execute(user_message) + else: + sandbox = await SandboxEnvironment.create() + executor = SandboxExecutor(adapter) + result = await executor.execute(user_message, sandbox.session_id) + state["workspace_path"] = str(sandbox.workspace_path) + state["execution_session_id"] = sandbox.session_id + + # 4. 更新状态 + state.messages.append(AIMessage(content=str(result))) + state["next_step"] = None # 任务完成 + + return state + +# 节点注册到 NODES +NODES: dict[str, NodeCallable] = { + # ... 现有节点 ... + "code_commander": code_commander_node, +} +``` + +### 2.2 边路由 + +**文件**: `backend/app/agents/graph.py` + +```python +def _should_route_to_code_commander(state: AgentState) -> str: + """判断是否路由到代码指挥官""" + if state.current_agent == "code_commander": + return "code_commander" + # ... 其他条件 + return END + +# 边注册 +def _build_graph() -> CompiledGraph: + # ... 现有边 ... + + # 新增代码指挥官相关边 + graph.add_conditional_edges( + "master", + _should_route_to_code_commander, + { + "code_commander": "code_commander", + END: END, + } + ) + + graph.add_edge("code_commander", END) + + return graph.compile() +``` + +### 2.3 任务模型 + +**文件**: `backend/app/agents/schemas/task.py` + +```python +from pydantic import BaseModel, Field +from typing import Literal + +class CodeProviderType(str, Enum): + CLAUDE = "claude" + GEMINI = "gemini" + CODEX = "codex" + OPENCODE = "opencode" + +class RiskLevelType(str, Enum): + LOW = "low" + HIGH = "high" + +class CodeTask(BaseModel): + """代码任务""" + id: str = Field(default_factory=lambda: f"code_{uuid.uuid4().hex[:8]}") + provider: CodeProviderType + prompt: str + risk_level: RiskLevelType + sandbox_mode: bool + workspace_path: str | None = None + session_id: str | None = None + status: Literal["pending", "running", "completed", "failed"] = "pending" + created_at: datetime = Field(default_factory=datetime.now) + +class CodeExecutionResult(BaseModel): + """代码执行结果""" + task_id: str + success: bool + exit_code: int + stdout: str + stderr: str + files_created: list[str] = Field(default_factory=list) + workspace_path: str | None = None + completed_at: datetime = Field(default_factory=datetime.now) +``` + +--- + +## 3. 核心文件清单 + +| 文件 | 操作 | 说明 | +|------|------|------| +| `graph.py` | 修改 | 新增 `code_commander_node` 和边路由 | +| `schemas/task.py` | 修改 | 新增 `CodeTask`, `CodeExecutionResult` 等模型 | + +--- + +## 4. 验收标准 + +- [ ] `code_commander_node` 正确处理任务 +- [ ] `SecurityClassifier` 被正确调用 +- [ ] 高低风险任务路由到正确的执行器 +- [ ] `CodeTask` 和 `CodeExecutionResult` 模型正确 + +--- + +## 5. 依赖关系 + +``` +Phase 1 + Phase 2 + ↓ +本阶段 → Phase 4(流式交互) + → Phase 5(前端集成) +``` diff --git a/development-doc/plan/code-update/phase-4-streaming-interaction.md b/development-doc/plan/code-update/phase-4-streaming-interaction.md new file mode 100644 index 0000000..bbee1b9 --- /dev/null +++ b/development-doc/plan/code-update/phase-4-streaming-interaction.md @@ -0,0 +1,298 @@ +# Phase 4:流式交互 + +日期:2026-04-04 +状态:待实施 + +依赖:Phase 3 完成 + +--- + +## 1. 本阶段目的 + +实现 PTY 终端 + WebSocket 流式输出: +- PTY 终端管理 +- WebSocket 端点 +- 流式输出集成 +- 交互输入 + +--- + +## 2. 详细任务 + +### 2.1 PTY Terminal Engine + +**新文件**: `backend/app/agents/tools/terminal_engine.py` + +```python +import asyncio +import os +from dataclasses import dataclass, field +from typing import AsyncGenerator + +@dataclass +class PTYSession: + session_id: str + process: asyncio.subprocess.Process + workspace_path: str + +class PTYManager: + def __init__(self): + self._sessions: dict[str, PTYSession] = {} + self._output_queues: dict[str, asyncio.Queue] = {} + + async def spawn( + self, + cli: str, + args: list[str], + cwd: str, + session_id: str | None = None + ) -> str: + """启动 PTY 会话""" + if session_id is None: + session_id = f"pty_{os.urandom(8).hex()}" + + # 创建 PTY 进程 + process = await asyncio.create_subprocess_exec( + *([cli] + args), + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + cwd=cwd, + env={**os.environ, "TERM": "xterm-256color"}, + ) + + session = PTYSession( + session_id=session_id, + process=process, + workspace_path=cwd, + ) + self._sessions[session_id] = session + self._output_queues[session_id] = asyncio.Queue() + + # 启动输出读取协程 + asyncio.create_task(self._read_output(session_id)) + + return session_id + + async def _read_output(self, session_id: str): + """读取 PTY 输出并放入队列""" + session = self._sessions.get(session_id) + if not session: + return + + queue = self._output_queues[session_id] + + while True: + line = await session.process.stdout.readline() + if not line: + break + await queue.put(line.decode()) + + # 同时推送给所有订阅者 + await self._broadcast(session_id, line.decode()) + + await queue.put(None) # 结束标记 + + async def write(self, session_id: str, data: str): + """写入 PTY(用户输入)""" + session = self._sessions.get(session_id) + if session and session.process.stdin: + session.process.stdin.write(data) + await session.process.stdin.drain() + + async def read(self, session_id: str) -> AsyncGenerator[str, None]: + """读取 PTY 输出""" + queue = self._output_queues.get(session_id) + if not queue: + return + + while True: + line = await queue.get() + if line is None: + break + yield line + + async def resize(self, session_id: str, rows: int, cols: int): + """调整终端大小""" + # TODO: 实现 resize + pass + + async def kill(self, session_id: str): + """终止 PTY 会话""" + if session_id in self._sessions: + session = self._sessions[session_id] + session.process.terminate() + await session.process.wait() + del self._sessions[session_id] + del self._output_queues[session_id] + + async def _broadcast(self, session_id: str, data: str): + """广播输出到 WebSocket""" + # 实际推送由 router 层处理 + pass +``` + +### 2.2 WebSocket 端点 + +**新文件**: `backend/app/routers/terminal.py` + +```python +from fastapi import APIRouter, WebSocket, WebSocketDisconnect +from typing import dict + +router = APIRouter(prefix="/ws/terminal", tags=["terminal"]) + +class ConnectionManager: + def __init__(self): + self.active_connections: dict[str, WebSocket] = {} + + async def connect(self, session_id: str, websocket: WebSocket): + await websocket.accept() + self.active_connections[session_id] = websocket + + def disconnect(self, session_id: str): + if session_id in self.active_connections: + del self.active_connections[session_id] + + async def send(self, session_id: str, data: str): + if session_id in self.active_connections: + await self.active_connections[session_id].send_text(data) + +manager = ConnectionManager() + +@router.websocket("/{session_id}") +async def terminal_websocket(websocket: WebSocket, session_id: str): + await manager.connect(session_id, websocket) + + # 获取 PTY Manager 实例 + from app.agents.tools.terminal_engine import pty_manager + + try: + # 订阅该 session 的输出 + queue = pty_manager._output_queues.get(session_id) + if queue: + while True: + data = await websocket.receive_text() + # 接收用户输入 + await pty_manager.write(session_id, data + "\n") + + except WebSocketDisconnect: + manager.disconnect(session_id) +``` + +### 2.3 流式输出集成 + +**新文件**: `backend/app/agents/tools/stream_output.py` + +```python +import json +from typing import AsyncGenerator +from dataclasses import dataclass + +@dataclass +class StreamEvent: + type: str # "output" | "error" | "status" | "complete" + session_id: str + data: str + timestamp: str + +class StreamOutput: + def __init__(self, session_id: str, websocket_sender): + self.session_id = session_id + self.websocket_sender = websocket_sender + + async def push(self, event_type: str, data: str): + """推送事件到 WebSocket""" + event = StreamEvent( + type=event_type, + session_id=self.session_id, + data=data, + timestamp=datetime.now().isoformat(), + ) + await self.websocket_sender(self.session_id, json.dumps(event.__dict__)) + + async def stream_execution( + self, + executor, + prompt: str + ) -> AsyncGenerator[str, None]: + """包装执行器,实现流式输出""" + async for line in executor.execute(prompt): + await self.push("output", line) + yield line + + await self.push("complete", "") +``` + +### 2.4 交互输入 + +**新文件**: `backend/app/agents/tools/interactive_input.py` + +```python +class InteractiveInputHandler: + def __init__(self, pty_manager: PTYManager): + self.pty_manager = pty_manager + self._pending_inputs: dict[str, asyncio.Event] = {} + + async def wait_for_input(self, session_id: str, prompt: str) -> str: + """等待用户输入(如 "y" 确认)""" + event = asyncio.Event() + self._pending_inputs[session_id] = event + + # 发送提示 + from app.routers.terminal import manager + await manager.send(session_id, f"\n{prompt}\n") + + # 等待输入完成 + await event.wait() + del self._pending_inputs[session_id] + + return self._input_cache.get(session_id, "") + + async def send_input(self, session_id: str, data: str): + """用户发送输入""" + self._input_cache[session_id] = data + if session_id in self._pending_inputs: + self._pending_inputs[session_id].set() + + # 同时写入 PTY + await self.pty_manager.write(session_id, data + "\n") +``` + +--- + +## 3. 核心文件清单 + +| 文件 | 操作 | 说明 | +|------|------|------| +| `terminal_engine.py` | 新增 | PTY 终端管理 | +| `routers/terminal.py` | 新增 | WebSocket 端点 | +| `stream_output.py` | 新增 | 流式输出封装 | +| `interactive_input.py` | 新增 | 交互输入处理 | + +--- + +## 4. 验收标准 + +- [ ] PTY 会话可以启动、读写、终止 +- [ ] WebSocket 可以建立连接并收发消息 +- [ ] 执行输出实时推送到前端 +- [ ] 用户输入可以传递到 PTY + +--- + +## 5. 依赖关系 + +``` +Phase 3(Agent 集成) + ↓ +本阶段 → Phase 5(前端集成) +``` + +--- + +## 6. 备注 + +PTY 实现参考了 golutra 的 `src-tauri/src/runtime/pty.rs`: +- 使用 `portable-pty` 库 +- Windows 路径兼容处理 +- shim 机制用于信号处理 diff --git a/development-doc/plan/code-update/phase-5-frontend-integration.md b/development-doc/plan/code-update/phase-5-frontend-integration.md new file mode 100644 index 0000000..67fe078 --- /dev/null +++ b/development-doc/plan/code-update/phase-5-frontend-integration.md @@ -0,0 +1,364 @@ +# Phase 5:前端集成 + +日期:2026-04-04 +状态:待实施 + +依赖:Phase 4 完成 + +--- + +## 1. 本阶段目的 + +Vue 前端新增代码指挥官 UI: +- 页面组件 +- 终端显示组件 +- WebSocket 服务 +- 路由配置 + +--- + +## 2. 详细任务 + +### 2.1 页面组件 + +**新文件**: `frontend/src/pages/chat/CodeCommander.vue` + +```vue + - - -``` - -- [ ] **Step 2: Commit** - -```bash -git add frontend/src/views/SkillView.vue -git commit -m "feat: add SkillView page" -``` - ---- - -## Task 8: Frontend - Router & Navigation - -**Files:** -- Modify: `frontend/src/router/index.ts` -- Modify: `frontend/src/components/SidebarNav.vue` - -- [ ] **Step 1: Add route to router** - -In `frontend/src/router/index.ts`, add to children array: -```typescript -{ - path: 'skills', - name: 'skills', - component: () => import('@/views/SkillView.vue'), -}, -``` - -- [ ] **Step 2: Add nav item to SidebarNav** - -In `frontend/src/components/SidebarNav.vue`, add to navItems array: -```typescript -{ name: 'Skill 市场', path: '/skills', icon: Bot }, -``` - -Also add Bot to the import from lucide-vue-next. - -- [ ] **Step 3: Commit** - -```bash -git add frontend/src/router/index.ts frontend/src/components/SidebarNav.vue -git commit -m "feat: add Skill route and navigation" -``` - ---- - -## Task 9: Integration - Inject Skill Context into Agent - -**Files:** -- Modify: `backend/app/agents/graph.py` - -- [ ] **Step 1: Modify agent nodes to include skill context** - -In each agent node function, after creating the system message, append skill context: - -```python -from app.agents.skill_registry import build_skill_context - -async def planner_node(state: AgentState) -> AgentState: - # ... existing code ... - system_msgs: list[BaseMessage] = [SystemMessage(content=PLANNER_SYSTEM_PROMPT)] - - # Inject skill context - skill_ctx = build_skill_context("planner") - if skill_ctx: - system_msgs.append(SystemMessage(content=skill_ctx)) - - # ... rest of code ... -``` - -Apply same pattern to: master_node, executor_node, librarian_node, analyst_node - -- [ ] **Step 2: Commit** - -```bash -git add backend/app/agents/graph.py -git commit -m "feat: inject skill context into agent prompts" -``` - ---- - -## Summary - -| Task | Description | Files | -|------|-------------|-------| -| 1 | Skill Model | `backend/app/models/skill.py` | -| 2 | Skill Schema | `backend/app/schemas/skill.py` | -| 3 | Skill Service | `backend/app/services/skill_service.py` | -| 4 | Skill Router | `backend/app/routers/skill.py`, `main.py` | -| 5 | Skill Registry | `backend/app/agents/skill_registry.py` | -| 6 | Frontend API | `frontend/src/api/skill.ts` | -| 7 | SkillView Page | `frontend/src/views/SkillView.vue` | -| 8 | Router & Nav | `frontend/src/router/index.ts`, `SidebarNav.vue` | -| 9 | Agent Integration | `backend/app/agents/graph.py` | - ---- - -## Verification - -1. **Backend API Test:** - - Start backend: `cd backend && python -m uvicorn app.main:app --reload` - - Test: `curl -X POST http://localhost:8000/api/skills -H "Content-Type: application/json" -d '{"name":"test","instructions":"test","agent_type":"planner"}'` - -2. **Frontend Test:** - - Start frontend: `cd frontend && npm run dev` - - Navigate to `/skills`, verify page loads - -3. **Agent Integration Test:** - - Create a Skill via API - - Send message to chat that triggers the skill's agent type - - Verify skill context appears in agent logs diff --git a/docs/superpowers/plans/2026-04-03-l3-runtime-hardening-plan.md b/docs/superpowers/plans/2026-04-03-l3-runtime-hardening-plan.md deleted file mode 100644 index f58c46c..0000000 --- a/docs/superpowers/plans/2026-04-03-l3-runtime-hardening-plan.md +++ /dev/null @@ -1,150 +0,0 @@ -# 2026-04-03 L3 Runtime Hardening Plan - -## Goal -先把 Jarvis 的 L3 主链夯实,只处理 runtime / graph / tools / service integration / tests / docs 的一致性问题;暂不继续扩 unrelated feature domain。 - -## Scope -- `backend/app/agents/graph.py` -- `backend/app/agents/state.py` -- `backend/app/agents/tools/__init__.py` -- `backend/app/agents/tools/search.py` -- `backend/app/agents/tools/schedule.py` -- `backend/app/agents/tools/task.py` -- `backend/app/services/agent_service.py` -- `backend/app/services/document_service.py` -- `backend/app/services/memory_service.py` -- `backend/tests/backend/app/agents/test_graph*.py` -- `backend/tests/backend/app/services/test_brain_ingestion.py` -- related design/plan docs under `docs/superpowers/` - -## Non-goals -- 不在本轮新增前端页面 -- 不在 L3 未稳定前继续扩 accounting / weather / RSS 等运行时域 -- 不重做 graph 架构,只做收敛、对齐和补测试 - -## Current High-Priority Gaps -1. **continuity / clarification schema drift** - - graph runtime 已使用 `owning_agent` / `owning_sub_commander` / `target_action` - - brain ingestion tests 仍大量使用旧快照字段:`active_sub_flow` / `awaiting_user_input` 等 -2. **tool execution drift** - - `search.py` 的 `_run_async()` 在 running loop 下实现不一致 - - schedule/task canonicalization 仍存在参数映射漂移 -3. **service integration drift** - - `agent_service` 已派生 role-scoped memory sections,但 continuity snapshot / graph runtime / persisted attachments 需要继续收口 -4. **docs drift** - - 现有文档已记录 L3 merge progress,但缺少一份当天可执行的 hardening 追踪文档 - -## Workstreams - -### Workstream A — Continuity Contract -Owner: worker-1 - -Target: -- 对齐 clarification / continuity canonical schema -- 让 graph runtime 与 persisted snapshot 使用同一套契约,或显式兼容旧字段 -- 补针对性测试 - -Done when: -- graph 与 ingestion tests 对 clarification/continuity 断言一致 -- stale continuity / resume-after-clarification 场景有回归覆盖 -- 文档明确列出 canonical 字段和兼容规则 - -### Workstream B — Tool Execution Path -Owner: worker-2 - -Target: -- 修复 search async bridge -- 对齐 task / schedule canonicalization -- 固定当前 L3 scope 下真实支持的 tool/fallback 规则 - -Current status: -- 已统一 `search.py` / `schedule.py` / `task.py` 到共享 `app.agents.tools.async_bridge.run_async`,避免 running loop 下的同步桥接漂移。 -- 已收敛 graph canonicalization:`create_todo` 保留 date/todo_date 语义;仅在出现 timed task 信号时提升为 `create_schedule_task`;`create_goal` 统一落到 `goal_date`;`create_reminder` clarification 前会先标准化 `date`。 -- 已补 targeted regressions,覆盖 active event loop search path、timed todo promotion、reminder clarification date normalization。 - -Done when: -- 相关工具测试通过 -- graph canonicalization 行为清晰且无死分支 -- 文档明确说明支持的 tool path 与 deferred domains - -### Workstream C — Service Integration -Owner: worker-3 - -Target: -- 对齐 graph runtime 与 `agent_service` 入口语义 -- 收敛 continuity snapshot、role-scoped context、stream/sync 行为 -- 补接入层测试或针对性断言 - -Done when: -- `agent_service` 与 graph 状态注入规则一致 -- continuity snapshot load/persist 行为有测试证据 -- 文档明确 graph/service 边界和责任 - -## Runtime Contract Notes -### Clarification context -Canonical target shape: -- `owning_agent` -- `owning_sub_commander` -- `target_action` -- `question` -- `partial_args` -- `missing_fields` -- `status` - -### Continuity state -Current known active markers: -- `status: fresh|stale` -- `mode: resume_after_clarification` for clarification continuation -- routing continuation should only survive when the new request is still semantically a continuation - -### Tool strategy -Current target contract: -- native tools and JSON fallback should converge on the same normalized tool name + normalized args before execution -- system messages should remain coalesced into one system message for OpenAI-compatible providers that reject multiple system messages -- sync tool shims in current L3 scope must route through shared `async_bridge.run_async` instead of per-file event-loop wrappers - -### Current L3 tool path rules -- `librarian_retrieval` current allowlist: `search_knowledge`, `hybrid_search`, `web_search`, `get_knowledge_graph_context` -- search-family sync wrappers must be safe under an already-running event loop -- `create_todo` keeps day-level intent on `todo_date`; do not silently remap date-only todo requests to task due dates -- `create_todo` upgrades to `create_schedule_task` only for timed/task-shaped payloads such as `due_time`, `due_datetime`, `start_time`, `end_time` -- `create_goal` date aliases normalize to `goal_date` -- `create_reminder` aliases normalize before clarification so resumed flows keep canonical partial args - -### Explicitly deferred domains in this hardening pass -- accounting runtime expansion -- weather runtime expansion -- RSS runtime expansion -- any new tool domains outside current schedule / task / forum / knowledge L3 path - -## Documentation Rule For This Hardening Pass -每完成一个 workstream: -1. 更新本文件的 status -2. 在相关 spec/notes 中补一段“当前状态 / 已决策 / 已知边界” -3. 再标记任务完成 - -## Status -- [x] Hardening tracker created -- [x] Workstream A complete -- [x] Workstream B complete -- [x] Workstream C complete -- [x] Final verification pass complete - -## Verification Checklist -- [x] `test_graph_system_messages.py` → 8 passed -- [x] `test_tool_async_bridge.py` + `test_task_tools.py` → 18 passed -- [x] `test_brain_ingestion.py` full file → 40 passed -- [x] targeted continuity persistence/rehydration checks → 3 passed -- [x] targeted graph regressions for timed todo / reminder clarification / active event loop paths -- [ ] broader graph suite beyond this L3 slice - -## Final Notes -- L3 continuity persistence now uses one canonical envelope and normalizes legacy snapshot shapes on rehydration. -- Service/runtime integration is aligned on the canonical continuity schema rather than legacy raw snapshot persistence. -- Tool sync shims now share one async bridge across search / schedule / task / forum paths. -- Final verification was executed with `uv run pytest` from `backend/`, which bypassed the broken plain `python` launcher in this environment. -- A reviewer flagged async bridge timeout/cancellation semantics as a follow-up reliability concern for mutating tools, but it is not blocking this L3 hardening pass. - -## Next Action -- Treat this L3 hardening slice as complete. -- If continuing, the next best follow-up is either broader graph regression coverage or a dedicated fix for async bridge timeout/cancellation semantics. diff --git a/docs/superpowers/specs/2026-03-20-agent-dashboard-design.md b/docs/superpowers/specs/2026-03-20-agent-dashboard-design.md deleted file mode 100644 index b3bbabc..0000000 --- a/docs/superpowers/specs/2026-03-20-agent-dashboard-design.md +++ /dev/null @@ -1,83 +0,0 @@ -# Agent Dashboard 页面设计规格 - -## 概述 - -为 Jarvis 系统设计一个 Agent 管理页面,以全息战术投影(Holographic Tactical HUD)风格可视化展示 Master + 4 Sub-Agent 的组织架构,支持查看状态和配置。 - -## 视觉风格 - -- **主题**:全息战术投影(科幻指挥台) -- **背景**:#03050a 深空黑 + 微弱网格线 + 全息扫描线纹理 -- **节点样式**:半透明玻璃态卡片,悬浮空中,全息光晕边框 -- **字体**:Orbitron(标题)+ JetBrains Mono(正文) -- **配色**:Cyan #00f5d4 主色,Amber #f9a825 强调色,Red #ff4757 危险色 - -## 布局结构 - -``` -┌──────────────────────────────────────────────────────────┐ -│ AGENT COMMAND CENTER [刷新] [新增] │ -├──────────────────────────────────────────────────────────┤ -│ │ -│ ┌─────────────────────┐ │ -│ │ MASTER CORE │ │ -│ │ JARVIS 指挥官 │ │ -│ │ [●] 状态灯 │ │ -│ └─────────┬──────────┘ │ -│ │ │ -│ ┌───────────────┼───────────────┐ │ -│ ▼ ▼ ▼ │ -│ ┌───────────┐ ┌───────────┐ ┌───────────┐ │ -│ │ PLANNER │ │ EXECUTOR │ │LIBRARIAN │ │ -│ │ [●] │ │ [●] │ │ [●] │ │ -│ │ 规划者 │ │ 执行者 │ │ 知识官 │ │ -│ │ 调用:12 │ │ 调用:8 │ │ 调用:5 │ │ -│ └───────────┘ └───────────┘ └───────────┘ │ -│ │ │ -│ ▼ │ -│ ┌───────────┐ │ -│ │ ANALYST │ │ -│ │ [●] │ │ -│ │ 分析师 │ │ -│ │ 调用:3 │ │ -│ └───────────┘ │ -│ │ -└──────────────────────────────────────────────────────────┘ - -点击节点 → 右侧滑出配置抽屉 -``` - -## 节点卡片字段 - -- 名称(Orbitron) -- 角色标签(中文) -- 状态灯:绿色脉冲=活跃,灰色=空闲 -- 角色描述(2行) -- 调用次数(今日) -- 当前任务摘要 - -## 连接线 - -- 虚线连接 Master → Sub-Agent -- 任务触发时:琥珀色脉冲光点沿路径流向目标节点 - -## 配置面板(右侧抽屉 400px) - -- Agent 名称 -- 角色描述 -- 系统提示词(textarea) -- 启用/停用开关 -- 保存 / 重置按钮 - -## 数据来源 - -- 固定结构:前端 `src/data/agents.ts` -- 运行时状态:`/api/agents/stats` - -## API 设计 - -``` -GET /api/agents/stats → { agent_id, call_count, current_task, status } -GET /api/agents/config/{id} → 返回单个 Agent 完整配置 -PUT /api/agents/config/{id} → 更新 name/description/system_prompt/enabled -``` diff --git a/docs/superpowers/specs/2026-03-20-chat-enhancement-design.md b/docs/superpowers/specs/2026-03-20-chat-enhancement-design.md deleted file mode 100644 index fac2768..0000000 --- a/docs/superpowers/specs/2026-03-20-chat-enhancement-design.md +++ /dev/null @@ -1,192 +0,0 @@ -# 沟通系统增强设计 - -## 1. 概述与目标 - -在沟通系统(ChatView)中增加两个功能: -1. **文件上传** - 用户可在对话中上传文件,AI 自动理解内容并回复 -2. **表情包选择器** - 在发送按钮旁添加 emoji 选择面板 - -## 2. 技术方案 - -### 2.1 文件上传 - -**前端实现:** -- 在 `ChatView.vue` 输入区域添加附件按钮(Paperclip 图标) -- 使用 `` 触发文件选择 -- 支持类型:图片(jpg/png/gif/webp)、文档(pdf/doc/docx/xls/xlsx/ppt/pptx/txt) -- 文件大小限制:10MB -- 上传时显示进度状态 - -**消息气泡展示:** -- 文件上传成功后,在对话中显示文件消息气泡 -- 气泡内容:文件图标 + 文件名 + 文件大小 -- 点击可下载/预览 - -**后端实现:** -- 复用现有 `/api/documents/upload` 接口上传文件 -- 创建 KGNode(entity_type: 'document')关联到对话 -- 修改 `AgentService.chat_simple()` 支持文件上下文 -- AI 自动读取上传文件内容并理解 - -**数据流:** -``` -用户选择文件 → 前端上传到 /api/documents/upload -→ 后端存储文件,创建 KGNode -→ 前端发送消息带 file_ids -→ AgentService 读取文件内容 -→ AI 基于文件内容回复 -``` - -### 2.2 表情包选择器 - -**前端实现:** -- 在发送按钮旁添加 Emoji 图标按钮 -- 点击展开浮层面板,显示 emoji 分类网格 -- 分类:😀 笑脸 | 👍 手势 | 📦 物品 | 💬 符号 -- 每个分类显示常用 emoji 网格 -- 点击 emoji 插入到输入框 -- 点击外部关闭面板 - -**Emoji 数据:** -```typescript -const emojiCategories = { - smile: ['😀', '😃', '😄', '😁', '😅', '😂', '🤣', '😊', '😇', '🙂', '😉', '😌'], - gesture: ['👍', '👎', '👌', '🤌', '🤏', '✌️', '🤞', '🖖', '🤙', '💪', '🙏', '👏'], - object: ['📄', '📁', '🖼️', '📊', '📝', '💾', '📧', '🔗', '📌', '🔍', '💡', '⚡'], - symbol: ['✅', '❌', '⚠️', '🔥', '💯', '🎯', '⭐', '✨', '💬', '🗨️', '❤️', '🧡'] -} -``` - -## 3. API 变更 - -### 3.1 修改 ChatRequest - -```python -class ChatRequest(BaseModel): - message: str - conversation_id: str | None = None - agent_id: str | None = None - file_ids: list[str] = [] # 新增:上传的文件ID列表 -``` - -### 3.2 修改 Message 模型(可选扩展) - -```python -class Message(BaseModel): - # 新增字段 - attachments: list[dict] = [] # [{file_id, filename, file_type, file_size}] -``` - -### 3.3 新增文件读取接口 - -``` -GET /api/documents/{document_id}/content -返回: 文件的文本内容(用于 AI 理解) -``` - -## 4. 组件变更 - -### 4.1 ChatView.vue 变更 - -**新增:** -- `fileInput` ref - 文件 input -- `showEmojiPicker` ref - emoji 面板显示状态 -- `selectedFiles` ref - 已选择待上传文件 -- `uploadFile()` - 上传文件方法 -- `insertEmoji()` - 插入 emoji 到输入框 - -**修改:** -- 输入区域布局:附件按钮 | 输入框 | Emoji按钮 | 发送按钮 -- `sendMessage()` - 发送前先上传文件,获取 file_ids - -### 4.2 EmojiPicker 组件(新建) - -```vue - -``` - -### 4.3 FileMessage 组件(新建) - -用于展示文件消息气泡: -- 文件图标(根据类型) -- 文件名(可截断) -- 文件大小 -- 下载按钮 - -## 5. 错误处理 - -| 场景 | 处理 | -|------|------| -| 文件类型不支持 | 提示"不支持该文件类型" | -| 文件超过10MB | 提示"文件超过10MB限制" | -| 上传失败 | 提示"上传失败,请重试",显示重试按钮 | -| AI读取文件失败 | AI 回复"无法读取文件内容" | -| 网络断开 | 提示"网络连接断开" | - -## 6. 状态定义 - -| 状态 | 显示 | -|------|------| -| 上传中 | 进度环 + 文件名 | -| 上传成功 | 文件气泡 | -| 上传失败 | 错误图标 + 重试按钮 | -| AI 读取中 | AI 思考状态..." | - -## 7. 实现顺序 - -1. **Phase 1: 基础 UI** - - 添加附件按钮和 Emoji 按钮到输入区域 - - Emoji 选择器组件 - - 文件消息气泡组件 - -2. **Phase 2: 文件上传** - - 前端文件上传逻辑 - - 消息带 file_ids - - 文件气泡展示 - -3. **Phase 3: AI 理解文件** - - 后端文件内容读取接口 - - AgentService 支持文件上下文 - - 测试完整流程 - -## 8. 文件结构 - -``` -frontend/src/ -├── views/ -│ └── ChatView.vue # 修改 - 添加附件/Emoji按钮 -├── components/ -│ ├── chat/ -│ │ ├── EmojiPicker.vue # 新建 - Emoji 选择器 -│ │ └── FileMessage.vue # 新建 - 文件消息气泡 -│ └── stats/ # 已存在 -│ └── ... -└── api/ - ├── conversation.ts # 修改 - chat 支持 file_ids - └── document.ts # 新增 - getDocumentContent - -backend/app/ -├── routers/ -│ ├── conversation.py # 修改 - ChatRequest 支持 file_ids -│ └── document.py # 修改 - 新增 content 接口 -├── services/ -│ └── agent_service.py # 修改 - chat 支持文件上下文 -└── models/ - └── conversation.py # 修改 - Message 新增 attachments -``` diff --git a/docs/superpowers/specs/2026-03-20-daily-todo-design.md b/docs/superpowers/specs/2026-03-20-daily-todo-design.md deleted file mode 100644 index 28a3d63..0000000 --- a/docs/superpowers/specs/2026-03-20-daily-todo-design.md +++ /dev/null @@ -1,178 +0,0 @@ -# Daily Todo 功能设计文档 - -## 概述 - -每日待办(Daily Todo)是一个以"天"为维度的任务管理模块,与现有的看板(以项目/多天为维度)形成互补。 - -**核心价值:** AI 每天早上自动预生成今日待办(基于前一天未完成的看板任务 + 前一天对话记录),用户可手动增删改。 - -## 时区说明 - -- 所有日期相关字段均使用**用户本地日期**(后端统一用 `datetime.date.today()` 计算,不依赖 UTC) -- `todo_date` 格式:`YYYY-MM-DD`(本地日期字符串),便于按天查询 - -## 数据模型 - -### DailyTodo 表 - -| 字段 | 类型 | 说明 | -|------|------|------| -| id | String(36) | 主键,UUID | -| user_id | String(36) | 所属用户,索引 | -| title | String(500) | 待办标题 | -| is_completed | Boolean | 是否完成,默认 false | -| source | Enum | `ai_kanban` / `ai_chat` / `manual`,来源 | -| source_detail | String(500) | 展示用说明文本,如"看板:完成用户登录功能" | -| source_ref_id | String(36) | 来源原始ID(看板TaskID或对话ConversationID),可空 | -| todo_date | String(10) | 所属日期,格式 YYYY-MM-DD,复合索引 (user_id, todo_date) | -| completed_at | DateTime | 完成时间,可空 | -| created_at | DateTime | 创建时间 | -| updated_at | DateTime | 更新时间 | - -**索引:** `INDEX (user_id, todo_date)`,查询今日待办的主要路径 - -### DailyTodoHistory 归档表 - -归档时机:每天凌晨 1:00,APScheduler 清理 7 天前的记录 - -| 字段 | 类型 | 说明 | -|------|------|------| -| id | String(36) | 主键,UUID | -| original_id | String(36) | 原记录ID(原记录归档后可能已删除) | -| user_id | String(36) | 所属用户 | -| title | String(500) | 待办标题 | -| is_completed | Boolean | 最终完成状态 | -| source | Enum | 来源 | -| source_detail | String(500) | 展示用说明文本 | -| todo_date | String(10) | 所属日期 | -| completed_at | DateTime | 完成时间 | -| created_at | DateTime | 创建时间 | -| archived_at | DateTime | 归档时间 | - -**保留策略:** 归档记录保留 7 天,到期自动删除(APScheduler 每日清理) - -## 核心功能 - -### F1: 今日待办列表 -- 展示当天的所有待办事项 -- 每条可勾选完成状态(勾选后划线 + 变灰) -- 支持新增、编辑、删除 -- 按创建时间倒序排列 -- 分页:每页 50 条,支持 `page` + `page_size` 参数 - -### F2: 历史记录 -- 可查看昨天、前天等历史日期的待办 -- 切换日期查看,**只读**(历史不允许修改/删除) -- 历史数据来自 `DailyTodo` 表(按 todo_date 过滤) -- 注:不从 `DailyTodoHistory` 表读取——归档表仅作备份保留 - -### F3: AI 自动预生成 -- 触发时机:每天早上 8:00(APScheduler 定时任务),也可手动触发 -- 数据来源: - 1. **看板任务**:前一天创建的、状态 ≠ done 的任务,取前 20 条(按 created_at 倒序) - 2. **对话记录**:前一天创建的对话,取其消息内容前 2000 字发给 LLM -- AI 处理流程: - 1. 查询上述数据,拼装为分析文本 - 2. 发送给 LLM,Prompt 要求输出 JSON 数组:`[{ "title": "...", "reason": "..." }]` - 3. 解析 LLM 返回,若返回为空或解析失败则跳过对话分析 - 4. 批量写入 DailyTodo 表(source=ai_kanban / ai_chat) -- **幂等处理(关键)**:使用事务 + 插入前检查,确保同一天不会重复生成 - ``` - BEGIN TRANSACTION - IF EXISTS (SELECT 1 FROM daily_todos WHERE user_id=? AND todo_date=? AND source IN ('ai_kanban','ai_chat')): - ROLLBACK -- 已有AI生成,跳过 - ELSE: - INSERT ... -- 批量写入 - COMMIT - ``` -- **容错**:LLM 不可用时记录日志,跳过该部分,不阻塞整体流程 -- 看板任务上限 20 条,对话分析最多提取 3 条 - -### F4: AI 来源说明 -- 每条 AI 生成的待办,显示其来源说明 -- `source=ai_kanban`:`source_detail` = "看板:{任务标题}",`source_ref_id` = 原始 Task ID -- `source=ai_chat`:`source_detail` = "对话:{reason 摘要(截取前60字)}" - -## API 设计 - -### GET /api/todos -查询待办列表(支持分页) -- Query: `?date=2026-03-20&page=1&page_size=50`(date 默认当天) -- Response: -```json -{ - "items": [DailyTodoOut], - "total": 12, - "page": 1, - "page_size": 50 -} -``` - -### POST /api/todos -新增待办(手动) -- Body: `{ title: string }` -- source 固定为 `manual`,todo_date 为当天 - -### PATCH /api/todos/{id} -更新待办(完成状态 / 标题) -- Body: `{ is_completed?: boolean, title?: string }` -- 仅当日待办可修改,历史日期返回 403 - -### DELETE /api/todos/{id} -删除待办 -- 仅当日待办可删除,历史日期返回 403 - -### POST /api/todos/ai-generate -手动触发 AI 预生成 -- 检查今日是否已有 AI 生成记录,有则返回 200(幂等,不重复生成) -- 无则执行 AI 分析流程,返回生成结果 - -### GET /api/todos/summary -获取今日待办摘要 -- Response: `{ date: "2026-03-20", total: 5, completed: 2, pending: 3 }` - -## 响应 Schema - -### DailyTodoOut -```json -{ - "id": "uuid", - "title": "完成用户登录功能", - "is_completed": false, - "source": "ai_kanban", - "source_detail": "看板:完成用户登录功能", - "todo_date": "2026-03-20", - "completed_at": null, - "created_at": "2026-03-20T08:00:00Z" -} -``` - -## 定时任务 - -| 任务 | 时间 | 说明 | -|------|------|------| -| AI预生成 | 每天 08:00 | 为所有活跃用户执行 AI 预生成 | -| 历史归档清理 | 每天 01:00 | 删除 7 天前已归档的 DailyTodo 记录 | - -## 前端页面 - -### TodoView.vue -- 路径:`/todo` -- 布局:顶部日期导航 + 下方待办列表 -- 日期导航:今天、昨天、前天快捷按钮 + 日期选择器 -- 今日视图:输入框新增 + 列表 + "AI 规划今日"按钮 -- 历史视图:只读列表,无新增/删除按钮,灰色禁用样式 -- 交互细节: - - 勾选完成:Motion 动画划线效果 - - 加载状态:骨架屏 - - 空状态:终端风格空提示 -- 风格:sci-fi 全息终端,cyan (#00f5d4) + #03050a,与 AgentView 一致 - -### 侧边栏 -- 新增菜单项:`{ name: '待办', path: '/todo', icon: CheckSquare }` - -## 技术依赖 - -- 后端:FastAPI + SQLAlchemy + APScheduler + LLM Service -- 前端:Vue 3 Composition API + 复用 api/index 的 axios 实例 -- 数据库:新表 DailyTodo + DailyTodoHistory(迁移 Alembic 或手动 CREATE TABLE) diff --git a/docs/superpowers/specs/2026-03-20-jarvis-personal-agent-design.md b/docs/superpowers/specs/2026-03-20-jarvis-personal-agent-design.md deleted file mode 100644 index b5bd080..0000000 --- a/docs/superpowers/specs/2026-03-20-jarvis-personal-agent-design.md +++ /dev/null @@ -1,602 +0,0 @@ -# Jarvis 个人 AI 助理 — 设计规格书 - -> 版本:v1.0 -> 日期:2026-03-20 -> 作者:Jarvis 设计团队 - ---- - -## 1. 项目概述 - -### 1.1 项目目标 - -构建一个拟人化的个人 AI 助理系统,代号 **Jarvis**。核心目标是打造一个真正"懂你"的智能体 —— 理解你的知识体系、工作安排和个人偏好,而不仅仅是关键词匹配回答问题。 - -### 1.2 核心价值 - -- **知识回溯能力** — 基于 LlamaIndex Node 关系 + 知识图谱双层架构,确保 AI 真正理解你的知识和工作的内在联系 -- **拟人化协作** — 多 Agent 角色协同,每个角色有独立职责,像真实团队成员一样交流 -- **全端覆盖** — Web + Android 双端,随时随地与 Jarvis 对话 -- **本地部署** — 所有数据存储在 NAS,数据完全自主可控 - ---- - -## 2. 技术栈 - -| 层级 | 技术选型 | 说明 | -|------|---------|------| -| **Web 前端** | Vue 3 + TypeScript | Composition API,响应式 UI | -| **移动端** | Kotlin (Android) | Jetpack Compose,轻量连接器 | -| **后端框架** | FastAPI (Python 3.12+) | 高性能 ASGI,支持 async | -| **Agent 框架** | LangGraph | 多 Agent 编排、工具调用、状态机流转 | -| **LLM 适配器** | LangChain Claude / OpenAI / Ollama | 可切换,不影响上层逻辑 | -| **知识库框架** | LlamaIndex | Node 关系索引、语义检索 | -| **向量数据库** | ChromaDB | 轻量级向量存储 | -| **关系数据库** | SQLite | 轻量数据持久化 | -| **定时任务** | APScheduler | 定时任务调度 | -| **部署环境** | NAS (本地) | Docker 容器化部署 | - ---- - -## 3. 系统架构 - -### 3.1 整体架构图 - -``` -┌─────────────────────────────────────────────────────┐ -│ 用户端 │ -│ ┌──────────────────┐ ┌──────────────────┐ │ -│ │ Web 前端 │ │ Android App │ │ -│ │ (Vue 3 + TS) │ │ (Kotlin) │ │ -│ └────────┬─────────┘ └────────┬─────────┘ │ -└───────────┼────────────────────────┼─────────────────┘ - │ │ - │ HTTP / WebSocket │ - └────────┬────────────────┘ - │ -┌────────────────────▼─────────────────────────────────┐ -│ FastAPI 后端服务 │ -│ (NAS Docker 容器) │ -│ │ -│ ┌───────────────────────────────────────────────┐ │ -│ │ 多 Agent 调度系统 │ │ -│ │ ┌─────────┐ │ │ -│ │ │ 主Agent │ ◄── 协调者,统一入口 │ │ -│ │ │(调度员) │ │ │ -│ │ └────┬────┘ │ │ -│ │ ├──► 规划Agent ──► 任务拆解、计划制定 │ │ -│ │ ├──► 执行Agent ──► 工具调用、任务执行 │ │ -│ │ ├──► 知识管理员 ──► 知识库管理、图谱更新 │ │ -│ │ └──► 分析师Agent ──► 数据分析、报告生成 │ │ -│ │ └──► [可扩展] ────► 新角色注册机制 │ │ -│ └───────────────────────────────────────────────┘ │ -│ │ -│ ┌─────────────┐ ┌─────────────┐ ┌──────────────┐ │ -│ │ LLM 适配器 │ │ 定时任务 │ │ 论坛扫描 │ │ -│ │ LangChain │ │ 引擎 │ │ 引擎 │ │ -│ │ (可切换) │ │ │ │ │ │ -│ └─────────────┘ └─────────────┘ └──────────────┘ │ -└──────────────────────────┬────────────────────────────┘ - │ - ┌─────────────────────┼─────────────────────┐ - │ │ │ -┌────▼────┐ ┌─────────────▼────┐ ┌──────────▼────────┐ -│ ChromaDB│ │ SQLite │ │ 文件存储 │ -│向量数据库│ │ (关系数据) │ │ (NAS 共享目录) │ -└─────────┘ └───────────────────┘ └────────────────────┘ -``` - -### 3.2 通信模式 - -- **协作式 + 主 Agent 协调** -- 主 Agent 作为统一入口,接收用户请求后分发到子 Agent -- 子 Agent 完成任务后汇总结果给主 Agent -- 子 Agent 之间可通过主 Agent 传递信息 -- 支持新增 Agent 注册到系统中 - ---- - -## 4. 核心功能模块 - -### 4.1 多 Agent 调度系统 - -#### Agent 角色定义 - -| Agent | 职责 | 核心能力 | -|-------|------|---------| -| **主Agent (Jarvis)** | 协调调度、对话入口 | 意图识别、任务分发、结果汇总 | -| **规划Agent** | 制定每日计划 | 任务拆解、优先级排序、时间规划 | -| **执行Agent** | 执行具体任务 | 工具调用、进度追踪、结果反馈 | -| **知识管理员** | 管理知识库和图谱 | 文档索引、实体提取、图谱更新 | -| **分析师Agent** | 分析工作数据 | 数据统计、趋势分析、报告生成 | - -#### Agent 扩展机制 - -- 通过配置文件或 API 注册新 Agent -- 每个 Agent 有独立的 system prompt 和工具集 -- 新增 Agent 自动出现在对话上下文中 - -### 4.2 知识库系统 - -#### 文档处理流程 - -``` -用户上传文件 - │ - ▼ -文件解析 -├── Markdown → 直接读取 -├── PDF → PDF 解析(PyMuPDF) -├── DOCX → python-docx -└── TXT → 直接读取 - │ - ▼ -LlamaIndex Node 构建 -├── 按标题层级切分(Header-based Chunking) -├── 保留 Node 关系链表(PARENT, PREVIOUS, NEXT, SOURCE) -└── 每个 Node 包含 metadata(标题、章节、页码) - │ - ▼ -向量存储 → ChromaDB - │ - ▼ -知识图谱构建 -├── LLM 实体识别(从 Node 内容中提取) -├── LLM 关系抽取(实体之间的关系) -└── 存入 SQLite(nodes + edges 表) -``` - -#### 检索流程(Small-to-Big 策略) - -``` -用户提问 - │ - ▼ -ChromaDB 向量检索 -├── 用小 Chunk 精确匹配 -└── 返回多个相关 Node - │ - ▼ -上下文回溯 -├── 顺着 Node 关系找到完整章节(父 Node) -└── 附加上下文给 LLM - │ - ▼ -LLM 生成回答 -``` - -### 4.3 知识图谱系统 - -#### 图谱数据结构 - -```sql --- 知识图谱节点表 -knowledge_graph_nodes ( - id TEXT PRIMARY KEY, - user_id TEXT, -- 用户隔离(支持多用户) - entity_type TEXT, -- 实体类型:PERSON / EVENT / CONCEPT / OBJECT - entity_name TEXT, -- 实体名称 - description TEXT, -- 实体描述 - source_doc_id TEXT, -- 来源文档 - source_node_id TEXT, -- 来源 Node - importance REAL, -- 重要程度 (0-1) - created_at TIMESTAMP, - updated_at TIMESTAMP -) - --- 知识图谱边表 -knowledge_graph_edges ( - id TEXT PRIMARY KEY, - user_id TEXT, -- 用户隔离 - source_node_id TEXT, - target_node_id TEXT, - relation_type TEXT, -- 关系类型:包含 / 依赖 / 相关 / 导致 / 属于 - weight REAL, -- 关系权重 (0-1) - created_at TIMESTAMP, - FOREIGN KEY (source_node_id) REFERENCES knowledge_graph_nodes(id), - FOREIGN KEY (target_node_id) REFERENCES knowledge_graph_nodes(id) -) -``` - -#### 图谱更新机制 - -- **事件驱动**:文档上传/任务变更时实时触发 -- **定时同步**:每日凌晨增量扫描,防止遗漏 -- **手动触发**:用户可主动要求重建图谱 -- **增量检测**:基于文件 mtime + 内容 hash 判断文档是否变化 - -#### 数据模型 - -```sql --- 文档表 -documents ( - id TEXT PRIMARY KEY, - user_id TEXT, -- 用户隔离 - filename TEXT, - file_type TEXT, -- pdf / markdown / docx / txt - file_path TEXT, -- NAS 存储路径 - file_hash TEXT, -- 内容 hash,用于增量检测 - summary TEXT, -- AI 生成的文档摘要 - file_size INTEGER, - created_at TIMESTAMP, - updated_at TIMESTAMP -) - --- 文档分块表(LlamaIndex Node 映射) -document_chunks ( - id TEXT PRIMARY KEY, - user_id TEXT, -- 用户隔离 - document_id TEXT, - chunk_index INTEGER, -- 在文档中的顺序 - content TEXT, -- 原始文本内容 - metadata JSON, -- LlamaIndex Node metadata(包含 title、chapter、relationships 等) - embedding_id TEXT, -- ChromaDB 中的向量 ID - created_at TIMESTAMP, - FOREIGN KEY (document_id) REFERENCES documents(id) -) -``` - -#### 图谱可视化 - -- 前端 Web 端展示交互式知识图谱 -- 节点可点击查看详情 -- 支持按类型筛选、按时间筛选 -- 支持搜索实体名称 - -### 4.4 论坛系统 - -#### 功能设计 - -- **发布内容** — 你在论坛发布想法、指令、问题 -- **AI 扫描** — Jarvis 定时扫描论坛内容 -- **任务识别** — 识别可执行的任务转为看板任务 -- **互动回应** — AI 在帖子下回复,像团队成员讨论 - -#### 数据模型 - -```sql -forum_posts ( - id TEXT PRIMARY KEY, - user_id TEXT, -- 发帖用户 - title TEXT, - content TEXT, - parent_id TEXT, -- 回复的帖子 ID(自关联,支持嵌套回复) - status TEXT, -- pending / processing / completed - created_at TIMESTAMP, - updated_at TIMESTAMP -) -``` - -### 4.5 看板系统 - -#### 功能设计 - -- **任务管理** — 创建、编辑、删除任务 -- **状态流转** — 待办 / 进行中 / 已完成 / 已取消 -- **优先级** — P0 / P1 / P2 / P3 -- **AI 凌晨分析** — 每日凌晨分析完成情况,规划次日任务 -- **AI 建议** — 根据你的工作模式给出优先级建议 - -#### 数据模型 - -```sql -tasks ( - id TEXT PRIMARY KEY, - user_id TEXT, -- 用户隔离 - title TEXT, - description TEXT, - priority TEXT, -- P0 / P1 / P2 / P3 - status TEXT, -- todo / in_progress / done / cancelled - deadline TIMESTAMP, - created_at TIMESTAMP, - updated_at TIMESTAMP, - completed_at TIMESTAMP -) - -task_history ( - id TEXT PRIMARY KEY, - task_id TEXT, - action TEXT, -- created / updated / completed / cancelled - old_value TEXT, - new_value TEXT, - timestamp TIMESTAMP, - FOREIGN KEY (task_id) REFERENCES tasks(id) -) -``` - -### 4.6 Markdown 编辑器 - -#### 功能设计 - -- 浏览器端在线编辑 Markdown -- 支持实时预览 -- AI 辅助功能: - - AI 续写 - - AI 润色 - - AI 总结 -- 自动保存到知识库 -- 支持创建新文档和编辑已有文档 - -### 4.7 定时任务引擎 - -#### 内置定时任务 - -| 任务 | 触发时间 | 功能 | -|------|---------|------| -| 论坛扫描 | 可配置(默认每小时) | 扫描新帖子,识别可执行任务 | -| 图谱增量同步 | 每日凌晨 2:00 | 扫描文档变化,更新知识图谱 | -| 每日规划 | 每日早上 8:00 | 分析昨日任务完成情况,规划当日 | -| 知识摘要 | 每周一凌晨 | 生成上周工作摘要 | - ---- - -## 5. 数据库设计 - -### 5.1 ER 图 - -``` -users - │ - ▼ -documents ──► document_chunks ──► embeddings (ChromaDB) - │ - ▼ -knowledge_graph_nodes ◄──► knowledge_graph_edges - │ - ▼ -tasks ◄─── task_history - │ - ▼ -forum_posts (自关联: parent_id ──► forum_posts.id) - │ - ▼ -conversations ──► messages -``` - -### 5.2 核心表结构 - -| 表名 | 说明 | -|------|------| -| `users` | 用户信息 | -| `documents` | 上传的文档元数据 | -| `document_chunks` | LlamaIndex Node 映射(保留关系) | -| `knowledge_graph_nodes` | 知识图谱节点 | -| `knowledge_graph_edges` | 知识图谱边 | -| `tasks` | 看板任务 | -| `task_history` | 任务变更历史 | -| `forum_posts` | 论坛帖子(含回复,通过 parent_id 自关联) | -| `conversations` | 主对话会话 | -| `messages` | 对话消息 | -| `knowledge_summaries` | 历史对话摘要 | - -#### 对话数据模型 - -```sql -conversations ( - id TEXT PRIMARY KEY, - user_id TEXT, -- 用户隔离 - title TEXT, - created_at TIMESTAMP, - updated_at TIMESTAMP -) - -messages ( - id TEXT PRIMARY KEY, - conversation_id TEXT, - role TEXT, -- user / assistant - content TEXT, - model TEXT, -- 使用的模型 - created_at TIMESTAMP, - FOREIGN KEY (conversation_id) REFERENCES conversations(id) -) - -knowledge_summaries ( - id TEXT PRIMARY KEY, - user_id TEXT, - period TEXT, -- daily / weekly / monthly - period_start DATE, - period_end DATE, - summary TEXT, -- 摘要内容 - created_at TIMESTAMP -) -``` - ---- - -## 6. API 设计 - -### 6.1 主要 API 端点 - -> **通用规则**:所有列表接口支持分页参数 `page`(页码,默认 1)和 `page_size`(每页数量,默认 20)。返回格式统一为 `{ data: [...], total: N, page: X, page_size: Y }`。 - -#### 认证接口 -- `POST /api/auth/register` — 用户注册 -- `POST /api/auth/login` — 用户登录,返回 JWT Token -- `POST /api/auth/refresh` — 刷新 Token -- `POST /api/auth/logout` — 登出 - -#### 对话接口 -- `POST /api/chat` — 发送消息,获取 AI 回复 -- `GET /api/conversations?page=&page_size=` — 获取对话历史列表 -- `GET /api/conversations/{id}/messages?page=&page_size=` — 获取对话消息 - -#### 知识库接口 -- `POST /api/documents/upload` — 上传文档(支持 multipart/form-data,最大 50MB) -- `GET /api/documents?page=&page_size=` — 获取文档列表 -- `DELETE /api/documents/{id}` — 删除文档 -- `POST /api/documents/{id}/reindex` — 重建索引(幂等操作) -- `POST /api/search` — 语义搜索 - - 请求体:`{ "query": "搜索内容", "top_k": 5, "filters": {} }` - -#### 知识图谱接口 -- `GET /api/knowledge-graph` — 获取图谱数据 -- `POST /api/knowledge-graph/rebuild` — 触发图谱重建(幂等,带锁防止并发) -- `GET /api/knowledge-graph/search?q=` — 搜索实体 - -#### 看板接口 -- `GET /api/tasks?page=&page_size=&status=` — 获取任务列表 -- `POST /api/tasks` — 创建任务 -- `PUT /api/tasks/{id}` — 更新任务 -- `DELETE /api/tasks/{id}` — 删除任务 - -#### 论坛接口 -- `GET /api/forum/posts?page=&page_size=` — 获取帖子列表 -- `POST /api/forum/posts` — 发布帖子 -- `GET /api/forum/posts/{id}` — 获取帖子详情(含回复树) -- `POST /api/forum/posts/{id}/reply` — 回复帖子 - -#### Markdown 编辑器接口 -- `GET /api/notes?page=&page_size=` — 获取笔记列表 -- `POST /api/notes` — 创建笔记 -- `PUT /api/notes/{id}` — 更新笔记 -- `DELETE /api/notes/{id}` — 删除笔记 -- `POST /api/notes/{id}/ai-assist` — AI 辅助操作 - - 请求体:`{ "action": "continue" | "polish" | "summarize" }` - -### 6.2 WebSocket 实时通信 - -消息格式统一为 JSON: -```json -// 通用消息结构 -{ - "type": "chat_message" | "graph_update" | "task_update", - "payload": { ... }, - "timestamp": "ISO8601" -} -``` - -- `/ws/chat` — 实时对话(流式输出) -- `/ws/knowledge-graph` — 图谱更新实时推送 -- `/ws/tasks` — 任务状态变化实时推送 - ---- - -## 7. 前端设计 - -### 7.1 Web 端页面结构 - -``` -┌─────────────────────────────────────────┐ -│ 顶部导航栏 │ -│ [对话] [知识库] [图谱] [看板] [论坛] [笔记] │ -└─────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────┐ -│ │ -│ 主内容区域 │ -│ │ -└─────────────────────────────────────────┘ -``` - -### 7.2 核心页面 - -| 页面 | 功能 | -|------|------| -| **对话页** | 主对话界面,Jarvis 头像,消息列表,输入框 | -| **知识库页** | 文档列表,上传入口,搜索框 | -| **图谱页** | 交互式知识图谱,节点详情侧边栏 | -| **看板页** | 任务看板(Kanban 布局),AI 规划建议 | -| **论坛页** | 帖子列表,发帖入口,AI 回复展示 | -| **笔记页** | Markdown 编辑器,笔记列表 | - -### 7.3 Android 端 - -- 独立对话窗口,直接与 Jarvis 对话 -- 任务查看和简单编辑 -- 推送通知(每日规划提醒、任务到期提醒) -- 核心是**对话遥控**,重度操作建议用 Web 端 - ---- - -## 8. 部署架构 - -### 8.1 NAS 部署方案 - -``` -┌──────────────────────────────────────────────────┐ -│ NAS │ -│ │ -│ ┌────────────────────────────────────────────┐ │ -│ │ Docker Compose │ │ -│ │ │ │ -│ │ ┌──────────────┐ ┌──────────────────┐ │ │ -│ │ │ Jarvis API │ │ ChromaDB │ │ │ -│ │ │ (FastAPI) │ │ (向量数据) │ │ │ -│ │ └──────────────┘ └──────────────────┘ │ │ -│ │ │ │ -│ │ ┌──────────────┐ ┌──────────────────┐ │ │ -│ │ │ SQLite │ │ 文件存储 │ │ │ -│ │ │ (关系数据) │ │ /data/files │ │ │ -│ │ └──────────────┘ └──────────────────┘ │ │ -│ └────────────────────────────────────────────┘ │ -│ │ -│ NAS 共享目录 /data 挂载到容器 │ -└──────────────────────────────────────────────────┘ -``` - -### 8.2 环境变量配置 - -```env -# LLM 配置 -LLM_PROVIDER=claude # claude / deepseek / ollama -CLAUDE_API_KEY=xxx -DEEPSEEK_API_KEY=xxx -OLLAMA_BASE_URL=http://localhost:11434 -OLLAMA_MODEL=llama3 - -# 数据库配置 -DATABASE_URL=sqlite+aiosqlite:///data/jarvis.db -CHROMA_PERSIST_DIR=/data/chroma - -# 文件存储 -FILE_STORAGE_DIR=/data/files - -# 定时任务配置 -FORUM_SCAN_INTERVAL=3600 # 秒 -DAILY_PLAN_TIME=08:00 -GRAPH_SYNC_TIME=02:00 - -# JWT 认证 -JWT_SECRET=xxx -JWT_ALGORITHM=HS256 -``` - ---- - -## 9. 安全设计 - -- **JWT 认证** — 所有 API 需要 Token 验证 -- **数据加密** — SQLite 数据库可配置加密 -- **文件隔离** — 用户上传文件存储在独立目录 -- **API 限流** — 防止 API 滥用 -- **敏感信息** — API Key 等存储在环境变量,不进入代码库 - ---- - -## 10. 未来扩展方向 - -- **多模态支持** — 图片、音频、视频解析 -- **更多 Agent** — 按领域细分的专业助手 -- **插件系统** — 第三方工具集成 -- **团队协作** — 多用户知识共享 -- **云端同步** — 异地数据备份 - ---- - -## 11. 开发阶段建议 - -> **注意**:Phase 3 的知识图谱依赖 Phase 1 的知识库基础设施。Phase 1-3 为核心 MVP,需按顺序开发。 - -| 阶段 | 内容 | 优先级 | -|------|------|--------| -| **Phase 1** | 基础框架搭建、对话系统、知识库上传检索 | P0 | -| **Phase 2** | 看板系统、论坛系统、Markdown 编辑器 | P0 | -| **Phase 3** | 知识图谱构建与可视化、多 Agent 协同 | P0 | -| **Phase 4** | 定时任务引擎、AI 每日规划功能 | P1 | -| **Phase 5** | Android App 开发 | P1 | -| **Phase 6** | 优化与扩展 | P2 | - ---- - -*本文档为 Jarvis 个人 AI 助理系统的初始设计规格,将根据开发进展持续更新。* diff --git a/docs/superpowers/specs/2026-03-20-knowledge-brain-blueprint-notes.md b/docs/superpowers/specs/2026-03-20-knowledge-brain-blueprint-notes.md deleted file mode 100644 index 6e21701..0000000 --- a/docs/superpowers/specs/2026-03-20-knowledge-brain-blueprint-notes.md +++ /dev/null @@ -1,57 +0,0 @@ -# Notes: Jarvis Knowledge Brain Blueprint - -## Current-State Findings -- Existing source domains already exist separately: conversations, documents, todos, tasks, forum posts. -- Current long-term memory only comes from conversation extraction via `UserMemory`. -- Current graph build path only uses indexed document chunks. -- Scheduler infrastructure already exists and can host daily brain-learning jobs. -- Frontend already exposes a `知识大脑` navigation entry, but it currently points to the graph page. - -## Synthesized Findings - -### What can be reused -- `memory_service` as a seed for conversation extraction and recall. -- `scheduler_service` as the base for daily learning workflows. -- `tag_service` as an early foundation for brain tags. -- Existing business tables as authoritative raw source records. - -### What is missing -- Unified event layer across all source systems. -- Candidate memory layer between raw events and durable brain memory. -- Timeline-aware memory model with reinforcement / archival states. -- Retrieval path that combines long-term memory with recent relevant events. -- Brain-specific APIs and a dedicated frontend dashboard module. - -### Phase 1 objective -- Build the minimum architecture needed for a real event-driven brain: - - BrainEvent - - BrainCandidate - - BrainMemory - - BrainTag and link tables - - ingestion services - - daily learning job - - retrieval integration - - brain dashboard APIs - -## Additional Findings: Knowledge Parsing Normalization -- Current document ingestion parses each format separately and builds chunks directly from ParsedNode items. -- Current chunks already carry structural metadata, but there is no explicit parent-child chunk graph. -- The agreed direction is to use MinerU for PDF only, keep existing parsers for DOCX/XLSX/CSV/MD/TXT, and converge all outputs into structured markdown. -- normalized_content should be persisted on documents so preview, rebuild, and future chunking can reuse the same canonical text. -- Lightweight hierarchy should be represented in chunk metadata first, not in a new relational tree schema. -- Current DOCX upload failure in the running environment is caused by a missing python-docx installation in the active backend environment. - -## Additional Findings: L3 Merge Progress -- `backend/app/agents/state.py` has been expanded to the newer L3 runtime state shape so graph/runtime code can rely on structured continuity, tool-round, retry, routing-hop, and datetime-reference fields. -- `backend/app/agents/graph.py` no longer contains merge markers and the phantom `EXECUTOR_ACCOUNTING` branch has been removed from graph registration and routing. -- Accounting-style prompts are currently normalized onto `AgentRole.EXECUTOR` instead of a separate executor-accounting role, which avoids dangling enum/runtime references while keeping those intents routable. -- `backend/tests/backend/app/agents/test_graph.py` has been reconciled onto the newer L3 runtime test branch and stale `EXECUTOR_ACCOUNTING` expectations were updated to `AgentRole.EXECUTOR`. -- Tool execution now uses a shared async bridge in `backend/app/agents/tools/async_bridge.py`, and `search.py`, `schedule.py`, `task.py`, plus `forum.py` all route synchronous tool entrypoints through that same bridge to keep runtime behavior consistent inside and outside active event loops. -- Current task/schedule canonicalization remains intentionally narrow for L3: task aliases (`content`, `date`, legacy priorities) and reminder aliases (`datetime`, `at`, `remind_at`, `time`, timezone variants) are normalized; deferred domains such as weather/accounting-specific tool routing remain outside this stabilization slice. -- Targeted verification now covers async bridge behavior plus task/schedule alias persistence tests; local pytest invocation still depends on resolving environment-level startup issues when the interpreter exits before running the selected files. -- L3 runtime/service integration now persists continuity snapshots in a single canonical envelope (`kind`, `version`, `state`) on both assistant message attachments and `Conversation.agent_state`, so streaming and sync chat entrypoints rehydrate the same shape. -- The continuity rehydration path is also tolerant of older `Conversation` rows/models that do not expose `agent_state`, falling back to assistant message attachments instead of failing before graph execution. -- The finalized L3 continuity contract persists a canonical `agent_continuity_state` snapshot: `turn_context.active_sub_commander`, `pending_action.type|owner_agent|owner_sub_commander|status`, `clarification_context.owning_agent|owning_sub_commander|target_action|question|status`, and `continuity_state.status|mode`. -- `backend/app/services/agent_service.py` normalizes legacy persisted snapshots (`active_sub_flow`, `agent`, `sub_flow`, `action_type`, `awaiting_user_input`, `awaiting_clarification`) into that canonical shape on both save and rehydration so older brain-ingestion records still resume correctly. -- Edge cases: explicit new requests may keep stale continuity in memory for override-aware routing, but only `continuity_state.status == fresh` participates in active continuation; clarification resumes use `continuity_state.mode = resume_after_clarification`. -- `memory_service.build_memory_context(...)` remains the shared retrieval join point for conversation summaries, user memory, and BrainMemory recall, while `document_service` continues emitting BrainEvent records from upload flow without changing the graph runtime contract. diff --git a/docs/superpowers/specs/2026-03-20-knowledge-brain-phase-1-blueprint.md b/docs/superpowers/specs/2026-03-20-knowledge-brain-phase-1-blueprint.md deleted file mode 100644 index d987ff7..0000000 --- a/docs/superpowers/specs/2026-03-20-knowledge-brain-phase-1-blueprint.md +++ /dev/null @@ -1,427 +0,0 @@ -# Jarvis Knowledge Brain Phase 1 Blueprint - -## 1. Phase 1 Goal -Phase 1 establishes the first production-ready version of Jarvis's event-driven knowledge brain. The objective is not to finish the entire intelligence system, but to create the minimum architecture that lets Jarvis ingest key user actions from across the product, learn from them on a daily schedule, store only high-value knowledge, and retrieve that knowledge during future conversations. - -Phase 1 should make the brain real in six ways: -1. unify source events across core modules; -2. create an intermediate candidate-learning layer; -3. promote durable knowledge into long-term brain memory; -4. maintain tags and time-aware traceability; -5. expose APIs for inspection and management; -6. allow the chat system to retrieve brain knowledge during answers. - ---- - -## 2. Scope Boundaries - -### In scope -- New persistence models for brain events, candidates, memories, tags, and relationships. -- Ingestion of source signals from conversations, knowledge documents, todos, kanban tasks, and forum posts. -- A daily autonomous learning pipeline that tags, scores, deduplicates, and upgrades knowledge. -- Retrieval integration for future responses. -- Brain dashboard APIs. -- A new frontend brain module structure replacing the current graph-only mental model. - -### Out of scope for phase 1 -- Full graph-native reasoning engine. -- Fully autonomous suggestion orchestration across all screens. -- Complex reinforcement-learning style adaptation. -- Fine-grained user-tunable learning policy UI. -- Automatic deletion and archival heuristics beyond simple status transitions. - ---- - -## 3. Target Architecture -Phase 1 should introduce a four-layer brain pipeline: - -1. **Source Records** - Existing domain tables remain the source of truth: messages, documents/chunks, todos, tasks, forum posts/replies. - -2. **BrainEvent** - A normalized event layer representing meaningful user/system actions. This is the single intake format for downstream learning. - -3. **BrainCandidate** - AI-generated candidate knowledge distilled from one or more events. Candidates are scored, tagged, typed, and traced back to source events. - -4. **BrainMemory** - Durable long-term memory that Jarvis can retrieve during future interactions. This becomes the brain's core persistence layer. - -Graph visualization should be treated as a **projection layer**, not the primary storage model. In later phases, graph nodes and edges can be generated from BrainMemory records and their relationships. - ---- - -## 4. Data Model Additions - -### 4.1 BrainEvent -Purpose: normalized raw learning input. - -Recommended fields: -- `id` -- `user_id` -- `source_type` (`conversation`, `document`, `todo`, `task`, `forum_post`, `forum_reply`) -- `source_id` -- `event_type` (`created`, `updated`, `completed`, `mentioned`, `uploaded`, `resolved`, `marked_important`, etc.) -- `occurred_at` -- `event_date` -- `title` -- `content_summary` -- `raw_excerpt` -- `metadata_` (JSON; source-specific facts such as conversation_id, task status, folder path) -- `importance_signal` (numeric seed score) -- `is_user_pinned` -- `processed_at` -- `status` (`pending`, `processed`, `ignored`) - -Indexes: -- `(user_id, event_date)` -- `(user_id, source_type, source_id)` -- `(user_id, status, occurred_at)` - -### 4.2 BrainCandidate -Purpose: intermediate learned knowledge awaiting acceptance into durable memory. - -Recommended fields: -- `id` -- `user_id` -- `candidate_type` (`preference`, `habit`, `project_fact`, `decision`, `solution`, `topic`, `goal`, `temporary_focus`) -- `title` -- `summary` -- `importance_score` -- `confidence_score` -- `time_scope` (`short_term`, `phase`, `long_term`) -- `valid_from` -- `valid_to` -- `source_event_ids` (JSON array) -- `reasoning_trace` (short explanation of why the system extracted it) -- `status` (`new`, `promoted`, `rejected`, `merged`) -- `created_at` -- `reviewed_at` - -### 4.3 BrainMemory -Purpose: durable brain knowledge used at retrieval time. - -Recommended fields: -- `id` -- `user_id` -- `memory_type` (`preference`, `habit`, `goal`, `project_fact`, `decision`, `solution`, `topic_profile`) -- `title` -- `content` -- `importance` -- `confidence` -- `timeline_date` -- `first_learned_at` -- `last_reinforced_at` -- `reinforcement_count` -- `status` (`active`, `archived`, `deleted`) -- `origin_candidate_id` -- `origin_source_types` (JSON array) -- `metadata_` (JSON) - -### 4.4 BrainTag -Purpose: independent tagging layer for brain browsing, filtering, and scoring. - -Recommended fields: -- `id` -- `user_id` -- `name` -- `category` (`topic`, `value`, `time`, `source`) -- `priority` (`important`, `secondary`) -- `score` -- `last_seen_at` -- `created_at` - -### 4.5 Link Tables -Add many-to-many link tables: -- `brain_event_tags` -- `brain_candidate_tags` -- `brain_memory_tags` -- optional `brain_memory_events` for direct memory-to-event traceability beyond JSON arrays - -These link tables are critical because phase 1 needs tag filters and timeline tracing before advanced graph projection exists. - ---- - -## 5. Ingestion Strategy -Phase 1 should not rewrite existing modules. Instead, it should add thin ingestion hooks near existing write paths. - -### Conversation ingestion -Trigger points: -- after user message creation -- after assistant completion -- after memory extraction / summary creation - -Event examples: -- important user instruction -- explicit “remember this” request -- repeated topic cluster -- conversation-derived decision or unresolved goal - -### Document ingestion -Trigger points: -- after upload success -- after indexing completes -- after manual chunk edits - -Event examples: -- document uploaded -- document indexed -- high-value section discovered -- document summary available - -### Todo ingestion -Trigger points: -- todo created -- todo completed -- AI-generated todo created - -Event examples: -- planned work item -- recurring operational duty -- completion signal reflecting actual user focus - -### Task/Kanban ingestion -Trigger points: -- task created -- task status changed -- task completed -- priority changed - -Event examples: -- declared project goal -- active workstream -- resolved milestone - -### Forum ingestion -Trigger points: -- post created -- reply created -- forum instruction executed or referenced - -Event examples: -- public project decision -- repeated operational issue -- reusable explanation or solution - -Implementation note: source ingestion should create BrainEvent rows synchronously or via lightweight background tasks, but should not block the original user flow. - ---- - -## 6. Learning and Promotion Pipeline -Phase 1 should add a new daily scheduler workflow dedicated to the brain. - -### New scheduler job: `brain_daily_learning_task` -Suggested run: once daily after the bulk of user activity, for example 01:00 or configurable per user later. - -Pipeline steps: -1. collect unprocessed `BrainEvent` rows for the target date; -2. cluster by source, topic, and repeated patterns; -3. ask the LLM to produce candidate knowledge with tags and importance explanations; -4. deduplicate against existing `BrainMemory` by semantic and rule-based matching; -5. promote high-confidence candidates into `BrainMemory`; -6. mark low-value candidates rejected or retained as observation-only; -7. refresh tag scores and priority levels; -8. mark consumed events as processed. - -### Promotion rules for phase 1 -Promote automatically when any of these are true: -- user explicitly requested the system to remember something; -- the same topic appears across multiple sources; -- a solution/decision was formed and looks reusable; -- a stable preference or habit is seen repeatedly; -- a task/todo/forum thread confirms relevance with user action. - -Keep as candidate-only when: -- information is recent but not yet stable; -- importance is uncertain; -- it appears only once without reinforcement. - -Reject when: -- content is obviously transient; -- it is too generic to help future answers; -- it duplicates active memory without adding new value. - ---- - -## 7. Retrieval Integration -Phase 1 must let chat use the brain in a controlled way. - -### New retrieval service -Add a dedicated `brain_retrieval_service` or extend `memory_service` with brain-aware retrieval APIs. - -Responsibilities: -- retrieve top relevant `BrainMemory` rows by query, tags, time context, and importance; -- optionally retrieve recent `BrainEvent` summaries for recency-sensitive answers; -- merge existing `UserMemory` and `MemorySummary` into one retrieval result shape; -- support limits to avoid prompt bloat. - -### Retrieval policy -At answer time: -- always consider long-term `BrainMemory`; -- include recent event summaries only when the question appears time-sensitive or project-state-sensitive; -- cap injected brain context to a small curated set. - -Recommended first integration path: -- extend `build_memory_context()` to append a new `【知识大脑】` block built from `BrainMemory` retrieval. -- keep existing conversation summary logic intact. - -This gives immediate product value without requiring a full prompt orchestration rewrite. - ---- - -## 8. Backend Services to Add or Refactor - -### New services -1. `brain_event_service.py` - - normalize incoming source data into BrainEvent rows - - provide source-specific helper constructors - -2. `brain_learning_service.py` - - run daily candidate extraction - - score, dedupe, and promote memories - -3. `brain_tag_service.py` - - manage tags, scoring, priority updates, and cleanup suggestions - -4. `brain_retrieval_service.py` - - retrieve relevant memories and recent events for chat and UI - -### Existing services to extend -- `memory_service.py`: integrate BrainMemory retrieval and possibly migrate `UserMemory` into the new model later -- `scheduler_service.py`: register brain daily learning job -- `agent_service.py`: inject retrieved brain context into chat pipeline -- `document_service.py`, `todo_service.py`, task/forum write paths: emit BrainEvent rows - ---- - -## 9. API Plan -Phase 1 should add a dedicated `/api/brain` router. - -### Read APIs -- `GET /api/brain/overview` - - counts: active memories, candidates, important tags, recent events - - today's learning summary - -- `GET /api/brain/memories` - - filters: tag, type, status, date range, source type - -- `GET /api/brain/candidates` - - filters: status, date, score threshold - -- `GET /api/brain/tags` - - segmented into important and secondary - -- `GET /api/brain/timeline` - - grouped by day/week; includes events, candidate promotions, reinforced memories - -- `GET /api/brain/memory/{id}` - - full traceability including linked events and tags - -### Write/management APIs -- `POST /api/brain/memory/{id}/promote` -- `POST /api/brain/memory/{id}/archive` -- `DELETE /api/brain/memory/{id}` -- `POST /api/brain/tag/{id}/promote` -- `POST /api/brain/tag/{id}/demote` -- `DELETE /api/brain/tag/{id}` -- `POST /api/brain/learn/run` - - manual trigger for daily learning pipeline - -### Compatibility note -Do not remove `/api/graph` in phase 1. Keep it as a legacy projection route while the new brain module is introduced. - ---- - -## 10. Frontend Module Structure -The current `知识大脑` nav item should stop meaning “graph only” and become a real brain dashboard. - -### Route strategy -Preferred phase 1 structure: -- `/brain` → new knowledge brain dashboard -- `/graph` → graph view tab or subview under the brain module, retained for relation visualization - -### Brain dashboard sections -1. **Overview header** - - total active memories - - today's learned items - - important tags count - - last learning run - -2. **Important tags panel** - - AI-ranked important tags - - click to filter related memories and timeline entries - -3. **Secondary tags panel** - - lower-priority tags with cleanup actions - -4. **Recent learned knowledge** - - newly promoted memories - - reasons and source badges - -5. **Timeline panel** - - daily grouped events and promotions - - support time-based backtracking - -6. **Graph subview** - - optional tab or secondary panel for relation projection - -### User actions in phase 1 -- delete memory -- archive memory -- promote/demote tag priority -- manually trigger learning run -- inspect why a memory exists - -This is enough to make the brain visible and manageable even before advanced graph reasoning exists. - ---- - -## 11. Suggested Delivery Breakdown - -### Step 1: Persistence foundation -- add brain models and migrations -- add SQLAlchemy registrations and schemas - -### Step 2: Event ingestion -- emit BrainEvent rows from conversation/document/todo/task/forum flows - -### Step 3: Learning workflow -- implement daily learning job and manual trigger API - -### Step 4: Retrieval integration -- wire BrainMemory into chat context assembly - -### Step 5: Brain dashboard backend -- add overview, memories, tags, timeline endpoints - -### Step 6: Brain dashboard frontend -- add `/brain` page and move graph into a subview or separate tab - ---- - -## 12. Risks and Guardrails - -### Main risks -- over-collection leading to noisy memories; -- prompt bloat from injecting too much brain context; -- duplicate memory creation across repeated daily runs; -- unclear distinction between candidate and durable memory; -- UI becoming graph-centric again instead of brain-centric. - -### Guardrails -- enforce candidate layer before promotion; -- cap retrieval size strictly; -- keep source traceability for every promoted memory; -- make tag cleanup explicit in UI; -- treat graph as a projection, not the source of truth. - ---- - -## 13. Phase 1 Success Criteria -Phase 1 is successful when all of the following are true: -- the system creates normalized BrainEvent rows from all five major source domains; -- a scheduled daily learning job produces candidates and promotes high-value memories; -- Jarvis can retrieve durable brain memories during future answers; -- the frontend exposes a real brain dashboard with tags, recent knowledge, and timeline; -- users can inspect and clean what the system learned; -- the old graph page is no longer the only visible representation of the brain. diff --git a/docs/superpowers/specs/2026-03-20-langsmith-integration-design.md b/docs/superpowers/specs/2026-03-20-langsmith-integration-design.md deleted file mode 100644 index a0af719..0000000 --- a/docs/superpowers/specs/2026-03-20-langsmith-integration-design.md +++ /dev/null @@ -1,141 +0,0 @@ -# LangSmith 集成设计文档 - -**日期**: 2026-03-20 -**状态**: 设计中 -**范围**: 后端 LangGraph Agent 追踪 - ---- - -## 1. 背景与目标 - -Jarvis 后端基于 LangGraph 构建了多智能体系统(Master/Planner/Executor/Librarian/Analyst),目前没有可观测性能力。 - -本次集成目标: -1. **调用追踪** — 在 LangSmith Dashboard 查看完整的 Agent 执行轨迹 -2. **对话历史管理** — 按 run_id 聚合对话,自动存储到 LangSmith -3. **评估支持** — 积累的对话数据可用于 LangSmith Evaluation - ---- - -## 2. 集成方案(方案 A:最小集成) - -### 2.1 核心思路 - -LangGraph 内置对 LangSmith 的支持,只需三步即可完成集成: - -1. 在 `.env` 中配置 LangSmith 环境变量 -2. 在 `pyproject.toml` 中添加 `langsmith` 为直接依赖 -3. 在 `llm_service.py` 中为 LLM 调用注入 LangSmith Callback - -LangGraph 的 `compile()` 会自动将 Callback 传递到所有节点,无需修改 `graph.py`。 - -### 2.2 环境变量 - -在 `backend/.env.example` 中新增: - -```env -# LangSmith Tracing -LANGSMITH_TRACING=true -LANGSMITH_API_KEY=your-langsmith-api-key -LANGSMITH_PROJECT=jarvis-agent -``` - -### 2.3 依赖 - -在 `backend/pyproject.toml` 的 `dependencies` 中添加: - -```toml -"langsmith>=0.1.0", -``` - -### 2.4 配置类变更 - -在 `backend/app/config.py` 中新增配置字段: - -```python -# LangSmith -LANGSMITH_TRACING: bool = False -LANGSMITH_API_KEY: str = "" -LANGSMITH_PROJECT: str = "jarvis-agent" -``` - -### 2.5 实现变更 - -#### 2.5.1 Config 层 - -在 `backend/app/config.py` 中新增配置字段: - -```python -LANGSMITH_TRACING: bool = False -LANGSMITH_API_KEY: str = "" -LANGSMITH_PROJECT: str = "jarvis-agent" -``` - -创建 `backend/app/config_tracing.py` 作为独立的 callback 工厂模块: - -```python -from langchain_core.callbacks import LangChainTracer -from app.config import settings - -def get_langsmith_callbacks() -> list: - if not settings.LANGSMITH_TRACING or not settings.LANGSMITH_API_KEY: - return [] - return [LangChainTracer(project_name=settings.LANGSMITH_PROJECT)] -``` - -#### 2.5.2 Graph 层 - -在 `backend/app/agents/graph.py` 中: - -1. `create_agent_graph()` 新增 `callbacks` 参数,透传给 `graph.compile(callbacks=...)` -2. `get_agent_graph()` 内部调用 `get_langsmith_callbacks()` 并与传入参数合并后传给 `create_agent_graph()` - -LangGraph 的 `compile(callbacks=...)` 会自动将 callbacks 传播到所有节点的 LLM 调用,覆盖 Master/Planner/Executor/Librarian/Analyst 全部 5 个节点。 - -### 2.6 Streaming 兼容性 - -当前 streaming 通过 `graph.astream_events()` 实现。LangSmith Callback 会异步记录追踪数据,不影响流式输出的实时性。 - -如果需要在 streaming 过程中实时展示 trace URL,可以在 `on_chat_model_end` 事件中从 `run.id` 生成链接: - -```python -async for event in graph.astream_events(...): - if event["event"] == "on_chat_model_end": - run_id = event["data"]["output"].id # 从 response 中获取 run_id - trace_url = f"https://smith.langchain.com/runs/{run_id}" -``` - ---- - -## 3. 文件变更清单 - -| 文件 | 变更类型 | -|---|---| -| `backend/.env.example` | 新增 3 行环境变量 | -| `backend/pyproject.toml` | 新增 langsmith 依赖 | -| `backend/app/config.py` | 新增 3 个配置字段 | -| `backend/app/config_tracing.py` | 新建,callback 工厂函数 | -| `backend/app/agents/graph.py` | `create_agent_graph`/`get_agent_graph` 支持 callbacks | -| `backend/app/services/agent_service.py` | `get_agent_graph()` 调用签名对齐 | - ---- - -## 4. 风险与限制 - -- LangSmith 免费版有追踪数量限制(详见 LangSmith 定价) -- Streaming 模式下 trace 数据在调用结束后才完整展示 -- 需要用户自行在 [langchain.com](https://smith.langchain.com) 注册并获取 API Key - ---- - -## 5. 测试验证 - -集成完成后通过以下方式验证: - -1. 设置 `LANGSMITH_TRACING=true` 并配置 API Key -2. 发起一次 Agent 对话 -3. 在 LangSmith Dashboard 中查看对应的 trace,确认包含: - - 5 个节点的执行记录 - - 每个节点的 LLM 输入/输出 - - 工具调用记录 - - Token 消耗统计 diff --git a/docs/superpowers/specs/2026-03-20-settings-register-design.md b/docs/superpowers/specs/2026-03-20-settings-register-design.md deleted file mode 100644 index 3a91ea4..0000000 --- a/docs/superpowers/specs/2026-03-20-settings-register-design.md +++ /dev/null @@ -1,249 +0,0 @@ -# 注册界面 + 设置界面 功能设计 - -## 概述 - -为 Jarvis 系统添加用户注册功能和完整的设置界面。用户可以: -- 在前端注册账号 -- 在设置界面管理个人信息和 LLM 配置 -- 配置定时任务等系统参数 - -**核心价值:** 支持多用户、每个用户独立配置自己的 LLM 提供商和参数。 - -## 现状分析 - -### 已有的功能 -- 后端已有 `/api/auth/register` API -- 后端使用 `pydantic-settings` 从 `.env` 读取配置 -- 前端只有登录页面,无注册入口 - -### 需要改动的地方 -- 前端 LoginView 添加注册表单 -- User 模型增加 `llm_config` 和 `scheduler_config` JSON 字段 -- 新建 Settings 路由和服务 -- 新建 SettingsView 页面 - -## 数据模型 - -### User 表扩展 - -```sql -ALTER TABLE users ADD COLUMN llm_config TEXT; -ALTER TABLE users ADD COLUMN scheduler_config TEXT; -``` - -### 字段结构 - -**llm_config (JSON):** -```json -{ - "chat": { - "provider": "openai|claude|ollama|deepseek|custom", - "model": "gpt-4o", - "base_url": "https://api.openai.com/v1", - "api_key": "sk-..." - }, - "vlm": { - "provider": "openai", - "model": "gpt-4o", - "base_url": "...", - "api_key": "..." - }, - "embedding": { - "provider": "openai", - "model": "text-embedding-3-small", - "base_url": "...", - "api_key": "..." - }, - "rerank": { - "provider": "openai", - "model": "bge-reranker-v2", - "base_url": "...", - "api_key": "..." - } -} -``` - -**scheduler_config (JSON):** -```json -{ - "daily_plan_time": "08:00", - "forum_scan_interval_minutes": 30, - "todo_ai_generate_time": "08:00", - "enabled": true -} -``` - -## API 设计 - -### 1. 注册 API (已有) -``` -POST /api/auth/register -Body: { email, password, full_name } -Response: UserOut -``` - -### 2. 获取用户设置 -``` -GET /api/settings -Response: { - profile: { id, email, full_name, created_at }, - llm_config: { ... }, - scheduler_config: { ... } -} -``` - -### 3. 更新用户资料 -``` -PUT /api/settings/profile -Body: { full_name?, password? } -Response: UserOut -``` - -### 4. 更新 LLM 配置 -``` -PUT /api/settings/llm -Body: { chat?: {...}, vlm?: {...}, embedding?: {...}, rerank?: {...} } -Response: { llm_config: { ... } } // 返回更新后的完整配置 -``` - -### 5. 测试 LLM 连接 -``` -POST /api/settings/llm/test -Body: { type: "chat"|"vlm"|"embedding"|"rerank", provider, model, base_url, api_key } -Response: { success: true, message: "连接成功" } 或 { success: false, error: "错误信息" } -``` - -### 6. 更新定时任务配置 -``` -PUT /api/settings/scheduler -Body: { daily_plan_time?, forum_scan_interval_minutes?, todo_ai_generate_time?, enabled? } -Response: { scheduler_config: { ... } } // 返回更新后的完整配置 -``` - -## 前端页面 - -### LoginView.vue 改动 -- 添加"注册"和"登录"切换 Tab -- 注册表单:邮箱、密码、确认密码、用户名 -- 复用现有 sci-fi 登录风格 - -### SettingsView.vue (新建) - -#### 页面布局 -``` -┌─────────────────────────────────────────────────┐ -│ [⚙] SETTINGS │ -├─────────────────────────────────────────────────┤ -│ ┌─────────────────────────────────────────────┐ │ -│ │ PROFILE │ │ -│ │ Email: operator@jarvis.ai │ │ -│ │ Name: [___________] │ │ -│ │ Password: [********] [Change] │ │ -│ └─────────────────────────────────────────────┘ │ -│ ┌─────────────────────────────────────────────┐ │ -│ │ LLM CONFIGURATION │ │ -│ │ ┌─ Chat ────────────────────────────────┐ │ │ -│ │ │ Provider: [OpenAI ▼] │ │ │ -│ │ │ Model: [gpt-4o ____________] │ │ │ -│ │ │ Base URL:[https://...] ] │ │ │ -│ │ │ API Key: [•••••••••••••••••] │ │ │ -│ │ └──────────────────────────────────────┘ │ │ -│ │ ┌─ VLM ─────────────────────────────────┐ │ │ -│ │ │ ... (同上结构) │ │ │ -│ │ └──────────────────────────────────────┘ │ │ -│ │ ┌─ Embedding ───────────────────────────┐ │ │ -│ │ │ ... (同上结构) │ │ │ -│ │ └──────────────────────────────────────┘ │ │ -│ │ ┌─ Rerank ──────────────────────────────┐ │ │ -│ │ │ ... (同上结构) │ │ │ -│ │ └──────────────────────────────────────┘ │ │ -│ └─────────────────────────────────────────────┘ │ -│ ┌─────────────────────────────────────────────┐ │ -│ │ SCHEDULER │ │ -│ │ Daily Plan Time: [08:00] │ │ -│ │ Forum Scan Interval: [30] 分钟 │ │ -│ │ Todo AI Generate: [08:00] │ │ -│ │ Scheduler Enabled: [ON] │ │ -│ └─────────────────────────────────────────────┘ │ -│ ┌─────────────────────────────────────────────┐ │ -│ │ [SAVE ALL SETTINGS] │ │ -│ └─────────────────────────────────────────────┘ │ -└─────────────────────────────────────────────────┘ -``` - -#### 交互行为 -- 修改后点击"保存"按钮,按钮显示 loading 状态 -- 保存成功显示 toast 提示"保存成功" -- 保存失败显示 toast 提示错误信息 -- 密码修改需二次确认弹窗 -- API Key 字段支持显示/隐藏切换 -- 每个模型配置卡片有独立的"测试"按钮 -- Provider 切换时自动填充默认值(如 Ollama 切换到 localhost:11434) -- Scheduler enabled 关闭时,时间输入框显示禁用状态 -- 空配置时显示"点击配置"占位提示 - -#### 注册表单 -- 邮箱:必填,格式校验 -- 用户名:必填,2-20 字符 -- 密码:必填,最少 8 字符 -- 确认密码:必填,需与密码一致 -- 密码强度指示器(弱/中/强) - -## 路由和侧边栏 - -### router/index.ts -```typescript -{ - path: 'settings', - name: 'settings', - component: () => import('@/views/SettingsView.vue'), -} -``` - -### SidebarNav.vue -```typescript -{ name: '设置', path: '/settings', icon: Settings } -``` - -## 技术实现 - -### 后端文件 -``` -backend/app/ - models/ - user.py # 修改:添加 llm_config, scheduler_config 字段 - schemas/ - auth.py # 修改:UserCreate 支持 full_name - settings.py # 新建:SettingsOut, LLMConfigIn, SchedulerConfigIn - routers/ - settings.py # 新建:settings router - services/ - settings_service.py # 新建:设置逻辑服务 -``` - -### 前端文件 -``` -frontend/src/ - api/ - settings.ts # 新建:settings API 客户端 - views/ - LoginView.vue # 修改:添加注册 Tab - SettingsView.vue # 新建:设置页面 - router/ - index.ts # 修改:添加 /settings 路由 - components/ - SidebarNav.vue # 修改:添加设置菜单 -``` - -## 验证清单 - -1. 注册功能正常 - 可以通过注册页面创建新账号 -2. 登录功能正常 - 新老用户都可以登录 -3. 设置页面可访问 - 登录后可进入设置页面 -4. 个人信息修改正常 - 用户名、密码可修改 -5. LLM 配置保存正常 - 四种模型配置可保存 -6. LLM 测试连接正常 - 可以验证配置是否正确 -7. 定时任务配置正常 - 时间间隔可修改 -8. 配置持久化正常 - 重新登录后配置保留 -9. UI 风格一致 - 设置页面与其他页面风格统一 -10. 注册表单验证正常 - 密码强度、格式校验有效 diff --git a/docs/superpowers/specs/2026-03-20-stats-dashboard-design.md b/docs/superpowers/specs/2026-03-20-stats-dashboard-design.md deleted file mode 100644 index 4d7ae34..0000000 --- a/docs/superpowers/specs/2026-03-20-stats-dashboard-design.md +++ /dev/null @@ -1,267 +0,0 @@ -# 数据统计页面重新设计 - -## 1. 概述与目标 - -重新设计数据统计页面,使其与项目现有的深色赛博朋克/终端风格保持一致。采用单页垂直滚动布局,集成迷你图表,提供清晰的数据可视化。 - -## 2. 设计语言 - -### 视觉风格 -- **主题**:深色赛博朋克 + 终端美学 -- **背景**:`var(--bg-void)` 深空黑 -- **强调色**:青色 `#00f5d4` (现有变量 `var(--accent-cyan)`) -- **辅助色**:紫色 `#a855f7` (用于知识库等模块) -- **卡片背景**:`rgba(13,21,37,0.8)` 半透明深蓝 -- **边框**:`1px solid var(--border-dim)`,hover时发光 - -### 字体 -- **数字**:等宽字体 `var(--font-mono)`,大号加粗,带发光效果 -- **标签**:`var(--font-display)`,9-10px,字母间距 0.15em -- **正文**:`var(--font-mono)`,12-13px - -### 动效 -- 卡片 hover:边框发光 + 微弱上浮 -- 数字:首次加载时淡入 -- 图表:绘制动画 300ms - -## 3. 页面结构 - -### 单页垂直滚动布局(无 Tabs) - -``` -┌─────────────────────────────────────────────────────┐ -│ // DATA METRICS [页面标题] │ -├─────────────────────────────────────────────────────┤ -│ [SYSTEM HEALTH] 系统健康模块 │ -│ [CONVERSATIONS] 对话统计模块 │ -│ [KNOWLEDGE] 知识库模块 │ -│ [KANBAN] 看板模块 │ -│ [COMMUNITY] 社区模块 │ -│ [INSIGHTS] 个人洞察模块 │ -└─────────────────────────────────────────────────────┘ -``` - -## 4. 模块详细设计 - -### 4.1 系统健康 (SYSTEM HEALTH) - -**位置**:页面最顶部,无需认证即可访问 - -**卡片布局**:4列网格 - -**指标卡片**: -| 指标 | 图标 | 格式 | -|------|------|------| -| CPU 使用率 | Cpu | 45% + 7天迷你柱状图 | -| 内存占用 | MemoryStick | 62% + 7天迷你柱状图 | -| 磁盘使用 | HardDrive | 38% + 7天迷你柱状图 | -| 运行时间 | Clock | 7d 3h 20m | - -**卡片样式**: -- 尺寸:自适应,最小 160px -- 数字大小:24px,等宽加粗 -- 趋势图:高度 24px,7个数据点 -- 标签:9px,letter-spacing 0.15em - -### 4.2 对话统计 (CONVERSATIONS) - -**需要认证** - -**顶部汇总**:横排4个数字卡片 -| 指标 | 值 | -|------|-----| -| 总对话数 | 1,234 | -| 总消息数 | 5,678 | -| Input Tokens | 12.5M | -| Output Tokens | 45.2M | - -**图表**:30天趋势迷你折线图 -- 4条线:对话数、消息数、Input Token、Output Token -- 图例在图表下方 -- 图表高度:120px -- 颜色使用主题色 - -### 4.3 知识库 (KNOWLEDGE) - -**需要认证** - -**顶部汇总**: -| 指标 | 值 | -|------|-----| -| 新建标签 | 156 | -| 文档数 | 89 | -| 标签关系 | 423 | - -**图表**:30天趋势迷你折线图 -- 3条线:新建标签、文档、标签关系 -- 使用紫色系 `var(--accent-purple)` - -### 4.4 看板 (KANBAN) - -**需要认证** - -**顶部汇总**: -| 指标 | 值 | -|------|-----| -| 待办任务 | 12 | -| 新建任务 | 45 (30天) | -| 已完成任务 | 38 (30天) | - -**图表**:30天对比柱状图 -- 两组柱:新建任务 vs 已完成任务 -- 使用青色和绿色对比 - -### 4.5 社区 (COMMUNITY) - -**需要认证** - -**顶部汇总**: -| 指标 | 值 | -|------|-----| -| 发帖数 | 23 | -| 回复数 | 156 | -| AI 执行 | 12 | - -**图表**:30天趋势迷你折线图 -- 3条线:发帖、回复、AI执行 - -### 4.6 个人洞察 (INSIGHTS) - -**需要认证** - -**布局**:2列 - -**左侧 - 活跃时段**: -- 24小时柱状图 -- 显示高峰时段标记 - -**右侧 - Top 标签**: -- 列表形式显示前5个常用标签 -- 显示使用次数 - -**Token趋势**: -- 本月 vs 上月对比 -- 百分比变化(带颜色指示上升/下降) - -## 5. 组件清单 - -### MetricCard 指标卡片 -``` -Props: -- icon: Component (lucide图标) -- label: string -- value: string | number -- trend?: number[] (可选,迷你图数据) -- accentColor?: string (默认cyan) - -States: -- default: 静态显示 -- hover: 边框发光,轻微上浮 -- loading: 骨架屏 -- error: 显示 "--" 和错误图标 -``` - -### MiniLineChart 迷你折线图 -``` -Props: -- data: { date: string, value: number }[] -- color?: string -- height?: number (默认40px) - -Features: -- 纯CSS实现或tiny echarts -- 无坐标轴,仅保留趋势 -- 数据点过多时自动采样 -``` - -### MiniBarChart 迷你柱状图 -``` -Props: -- data: number[] -- color?: string -- height?: number (默认24px) -- maxBars?: number (默认7) -``` - -### SectionHeader 区块标题 -``` -Props: -- title: string -- tag?: 'cyan' | 'purple' | 'amber' (标签颜色) - -Style: -- 格式:// SECTION_NAME -- 左侧竖线装饰 -- 标签 Chip 在右侧 -``` - -### SummaryRow 汇总行 -``` -Props: -- items: { label: string, value: string | number }[] -- columns?: number (默认4) -``` - -## 6. 技术实现 - -### 前端 -- **框架**:Vue 3 + TypeScript (已有) -- **图表库**:使用 CSS 实现迷你图,或 echarts (已有) -- **图标**:lucide-vue-next (已有) -- **状态管理**:Pinia (已有) -- **API**:StatsView 中已有 stats API 调用 - -### 后端 -- 复用现有 `app/routers/stats.py` 和 `app/services/stats_service.py` -- 确保所有接口正确返回数据 - -### 样式 -- 复用 `ChatView.vue` 中的设计变量和样式模式 -- 使用 CSS Grid 实现响应式布局 -- 变量:`--bg-panel`, `--accent-cyan`, `--border-dim`, `--font-mono` 等 - -## 7. 响应式断点 - -| 设备 | 列数 | -|------|------| -| >= 1200px | 4列 | -| 768px - 1199px | 2列 | -| < 768px | 1列 | - -## 8. 错误与空状态 - -### Error State -- 显示错误图标和文字 -- 提供刷新按钮 -- 保持页面结构完整 - -### Empty State -- 各模块独立空状态 -- 不阻塞其他模块显示 -- 友好提示文案 - -### Loading State -- 骨架屏动画 -- 与卡片结构一致 - -## 9. 访问控制 - -| 模块 | 认证要求 | 说明 | -|------|----------|------| -| 系统健康 | 否 | 所有人可看 | -| 对话统计 | 是 | 需登录 | -| 知识库 | 是 | 需登录 | -| 看板 | 是 | 需登录 | -| 社区 | 是 | 需登录 | -| 个人洞察 | 是 | 需登录 | - -未登录用户访问需认证模块时: -- 显示占位卡片结构 -- 提示"请先登录" -- 不发送无效请求 - -## 10. 数据刷新 - -- 页面进入时加载所有数据 -- 支持手动刷新按钮(每个模块独立刷新) -- 数字变化时无动画(避免干扰) diff --git a/docs/superpowers/specs/2026-03-21-forum-redesign-design.md b/docs/superpowers/specs/2026-03-21-forum-redesign-design.md deleted file mode 100644 index 0263adc..0000000 --- a/docs/superpowers/specs/2026-03-21-forum-redesign-design.md +++ /dev/null @@ -1,473 +0,0 @@ -# 交互广场重新设计 - -## 1. 概述与目标 - -将现有的论坛(交互广场)从传统的帖子/回复模式,重构为三个AI驱动的智能板块: -1. **AI学习板块** - 模型分析用户活动,学习客观知识并加入知识图谱,向用户汇报学习成果 -2. **AI建议板块** - 基于用户习惯和数据,提供个性化建议 -3. **AI交互板块** - 用户发起学习主题,或AI主动探索补充知识 - -## 2. 设计风格 - -沿用项目现有的深色赛博朋克/终端风格: -- 背景:`var(--bg-void)` 深空黑 -- 强调色:紫色 `#a855f7` (用于交互广场专属色调) -- 卡片背景:`var(--bg-card)` -- 边框:`1px solid var(--border-dim)`,hover时发光 -- 字体:等宽字体 `var(--font-mono)`,标题用 `var(--font-display)` - -## 3. 页面结构 - -``` -┌─────────────────────────────────────────────────────────────┐ -│ // INTERACTIVE PLAZA [页面标题] │ -├─────────────────────────────────────────────────────────────┤ -│ │ -│ ┌─────────────────────────────────────────────────────┐ │ -│ │ [MODEL LEARNING] AI学习板块 │ │ -│ │ AI分析你的活动,学习知识并汇报 │ │ -│ ├─────────────────────────────────────────────────────┤ │ -│ │ • 今日学习摘要 │ │ -│ │ • 学习历史时间线 │ │ -│ │ • 知识图谱更新统计 │ │ -│ └─────────────────────────────────────────────────────┘ │ -│ │ -│ ┌─────────────────────────────────────────────────────┐ │ -│ │ [SUGGESTIONS] AI建议板块 │ │ -│ │ 基于你的习惯提供个性化建议 │ │ -│ ├─────────────────────────────────────────────────────┤ │ -│ │ • 知识补充建议 │ │ -│ │ • 效率优化建议 │ │ -│ │ • 技能深耕建议 │ │ -│ └─────────────────────────────────────────────────────┘ │ -│ │ -│ ┌─────────────────────────────────────────────────────┐ │ -│ │ [INTERACTIVE] AI交互学习板块 │ │ -│ │ 用户发起学习主题,AI主动探索 │ │ -│ ├─────────────────────────────────────────────────────┤ │ -│ │ • 用户发起的学习主题 │ │ -│ │ • AI主动学习的内容 │ │ -│ └─────────────────────────────────────────────────────┘ │ -│ │ -└─────────────────────────────────────────────────────────────┘ -``` - -## 4. 功能详情 - -### 4.1 AI学习板块 (MODEL LEARNING) - -**数据来源:** -- 对话记录(`messages` 表,Message模型)- 分析对话内容提取概念 -- 看板任务(`tasks` 表,Task模型)- 识别技术栈和工作流程 -- 知识库(`documents`, `kg_nodes` 表)- 补充知识缺口 - -**学习流程:** -``` -定时任务触发 → 分析近期活动 → 提取概念/术语/事实 -→ 存入知识图谱(KGNode) → 生成学习报告 → 存入learning_records表 -``` - -**数据库扩展:** - -```python -# 新增 learning_records 表 -# 继承 app.models.base.BaseModel,自动获得 id, created_at, updated_at -from app.models.base import BaseModel - -class LearningRecord(BaseModel): - __tablename__ = "learning_records" - - user_id = Column(String(36), ForeignKey("users.id"), nullable=False, index=True) - learning_type = Column(String(50), nullable=False) # concept, technology, workflow - topic = Column(String(500), nullable=False) # 学习主题 - summary = Column(Text, nullable=False) # AI生成的学习摘要 - source = Column(String(50), nullable=False) # conversation, kanban, knowledge - source_ids = Column(JSON, nullable=True) # 来源ID列表,如 {conversation_ids: [...], task_ids: [...]} - kg_nodes_created = Column(JSON, nullable=True) # 创建的KGNode ID列表 -``` - -**KGNode实体类型扩展:** -- `learned_concept` - 从对话中学到的概念 -- `technology` - 识别出的技术栈 -- `workflow` - 从看板任务中提取的工作流程 - -**前端展示:** - -1. **今日学习摘要卡片** - - AI生成的自然语言总结 - - 示例:"今日学习了依赖注入和异步编程两个概念,它们都来自你关于FastAPI的讨论" - - 显示来源标签:对话/看板/知识库 - -2. **学习历史时间线** - - 垂直时间线布局 - - 每条记录显示:时间、主题、摘要 - - 点击展开查看详情 - -3. **知识图谱更新统计** - - 今日新增节点数 - - 今日新建关系数 - - 迷你柱状图显示各类别占比(可复用 MiniBarChart) - -### 4.2 AI建议板块 (SUGGESTIONS) - -**建议类型:** - -1. **知识补充建议 (knowledge)** - - 检测知识图谱薄弱领域 - - 基于用户提问推断知识缺口 - - 示例:"你的知识图谱在'微服务架构'领域较为薄弱,建议深入学习" - -2. **效率优化建议 (efficiency)** - - 分析用户使用模式 - - 推荐最佳实践 - - 示例:"你通常在下午工作效率最高,建议将复杂任务安排在这个时段" - -3. **技能深耕建议 (skill)** - - 基于高频话题 - - 推荐深入学习方向 - - 示例:"你最近频繁讨论API设计,建议学习REST最佳实践和GraphQL" - -**数据库扩展:** - -```python -# 新增 suggestions 表 -from app.models.base import BaseModel - -class Suggestion(BaseModel): - __tablename__ = "suggestions" - - user_id = Column(String(36), ForeignKey("users.id"), nullable=False, index=True) - suggestion_type = Column(String(50), nullable=False) # knowledge, efficiency, skill - title = Column(String(500), nullable=False) # 建议标题 - content = Column(Text, nullable=False) # 建议内容 - source_data = Column(JSON, nullable=True) # 分析依据,如 {knowledge_gaps: [...], usage_patterns: {...}} - is_read = Column(Boolean, default=False) # 是否已读 - is_dismissed = Column(Boolean, default=False) # 是否忽略 -``` - -**前端展示:** -- 卡片列表布局 -- 每个建议显示:图标、类型标签、标题、内容 -- 右侧显示建议来源分析 -- 提供"查看详情"和"忽略"按钮 - -### 4.3 AI交互板块 (INTERACTIVE) - -**用户发起学习:** - -1. 用户输入想学习的主题 -2. AI分析主题,搜索知识库 -3. 如有需要,AI主动抓取外部资源 -4. 生成学习报告 -5. 自动存入知识图谱 -6. 在交互板块展示 - -**数据库扩展:** - -```python -# 新增 interactive_topics 表 -from app.models.base import BaseModel - -class InteractiveTopic(BaseModel): - __tablename__ = "interactive_topics" - - user_id = Column(String(36), ForeignKey("users.id"), nullable=False, index=True) - topic = Column(String(500), nullable=False) # 学习主题 - status = Column(String(50), nullable=False) # pending, learning, completed, failed - result = Column(Text, nullable=True) # 学习结果/报告 - kg_nodes_created = Column(JSON, nullable=True) # 创建的KGNode ID列表 - source = Column(String(50), nullable=False) # user_initiated, ai_proactive - completed_at = Column(DateTime, nullable=True) -``` - -**AI主动学习:** - -1. AI分析用户历史提问 -2. 发现知识缺口或关联主题 -3. 主动学习并生成报告 -4. 在交互板块标记为"AI主动" - -**前端展示:** -- 两个子区块:用户发起 / AI主动 -- 输入框:"让AI学习 [主题]" -- 正在进行的学习任务显示进度 -- 已完成的学习显示结果摘要 - -## 5. API 设计 - -### 5.1 后端接口 - -``` -GET /api/forum/learning/summary - - 获取今日学习摘要 - - 返回: { summary, records[], stats{ nodes_created, edges_created } } - -GET /api/forum/learning/history?page=1&limit=20 - - 获取学习历史 - - 返回: { records[], total } - -GET /api/forum/suggestions - - 获取所有建议 - - 返回: { suggestions[] } - -GET /api/forum/suggestions/{id} - - 获取单个建议详情 - - 返回: Suggestion - -PATCH /api/forum/suggestions/{id}/read - - 标记建议为已读 - -DELETE /api/forum/suggestions/{id}/dismiss - - 忽略/删除建议 - -GET /api/forum/interactive/topics - - 获取交互主题列表 - - 返回: { user_initiated[], ai_proactive[] } - -POST /api/forum/interactive/learn - - 用户发起学习 - - Body: { topic: string } - - 返回: { topic_id, status } - -GET /api/forum/interactive/topics/{id} - - 获取学习主题详情/结果 -``` - -### 5.2 前端API - -```typescript -// TypeScript 类型定义 -interface LearningSummary { - summary: string - records: LearningRecord[] - stats: { - nodes_created: number - edges_created: number - } -} - -interface LearningRecord { - id: string - learning_type: 'concept' | 'technology' | 'workflow' - topic: string - summary: string - source: string - source_ids?: { conversation_ids?: string[]; task_ids?: string[] } - kg_nodes_created?: string[] - created_at: string -} - -interface Suggestion { - id: string - suggestion_type: 'knowledge' | 'efficiency' | 'skill' - title: string - content: string - source_data?: Record - is_read: boolean - is_dismissed: boolean - created_at: string -} - -interface InteractiveTopic { - id: string - topic: string - status: 'pending' | 'learning' | 'completed' | 'failed' - result?: string - kg_nodes_created?: string[] - source: 'user_initiated' | 'ai_proactive' - created_at: string - completed_at?: string -} - -// API 方法 -const forumApi = { - // learning - fetchLearningSummary(): Promise, - fetchLearningHistory(params: { page: number, limit: number }): Promise<{ records: LearningRecord[], total: number }>, - - // suggestions - fetchSuggestions(): Promise, - getSuggestion(id: string): Promise, - markSuggestionRead(id: string): Promise, - dismissSuggestion(id: string): Promise, - - // interactive - fetchInteractiveTopics(): Promise<{ user_initiated: InteractiveTopic[], ai_proactive: InteractiveTopic[] }>, - initiateLearning(topic: string): Promise, - getTopicDetail(id: string): Promise, -} -``` - -## 6. 组件结构 - -``` -frontend/src/views/ForumView.vue # 主页面,三板块布局 -frontend/src/components/forum/ -├── LearningSection.vue # AI学习板块 -│ ├── LearningSummaryCard.vue # 今日摘要卡片 -│ ├── LearningTimeline.vue # 学习历史时间线 -│ └── LearningStats.vue # 图谱更新统计(复用MiniBarChart) -├── SuggestionSection.vue # AI建议板块 -│ ├── SuggestionCard.vue # 建议卡片 -│ └── SuggestionList.vue # 建议列表 -└── InteractiveSection.vue # AI交互板块 - ├── LearningInput.vue # 学习主题输入框 - ├── UserInitiatedList.vue # 用户发起列表 - └── AIProactiveList.vue # AI主动列表 - -# 新增通用组件 -frontend/src/components/forum/MiniDonutChart.vue # 环形图(用于知识类别占比) -``` - -## 7. 服务层 - -### 7.1 LearningService - -```python -from app.core.llm import get_llm_client - -class LearningService: - def __init__(self, db: AsyncSession): - self.llm = get_llm_client() - - async def generate_daily_summary(user_id: str) -> str: - """分析用户今日活动,生成学习摘要""" - # 使用 LLM 分析提取的概念,生成自然语言摘要 - concepts = await self.extract_concepts(...) - prompt = f"根据以下学习内容生成简短摘要:{concepts}" - return await self.llm.chat(prompt) - - async def extract_concepts_from_conversations(user_id: str, since: datetime) -> list[dict]: - """从对话中提取概念""" - - async def identify_technologies_from_kanban(user_id: str) -> list[dict]: - """从看板任务中识别技术栈""" - - async def create_kg_nodes(user_id: str, learnings: list[dict]) -> list[str]: - """创建知识图谱节点""" - - async def record_learning(...) -> LearningRecord: - """记录学习成果""" -``` - -### 7.2 SuggestionService - -```python -class SuggestionService: - def __init__(self, db: AsyncSession): - self.llm = get_llm_client() - - async def generate_suggestions(user_id: str) -> list[Suggestion]: - """生成个性化建议""" - # 分析知识缺口、使用模式、技能机会 - gaps = await self.analyze_knowledge_gaps(user_id) - patterns = await self.analyze_usage_patterns(user_id) - skills = await self.analyze_skill_opportunities(user_id) - - # 使用 LLM 生成建议 - prompt = f"基于以下分析生成建议:知识缺口{gaps},使用模式{patterns},技能机会{skills}" - return await self.llm.chat(prompt) - - async def analyze_knowledge_gaps(user_id: str) -> list[dict]: - """分析知识图谱缺口""" - - async def analyze_usage_patterns(user_id: str) -> dict: - """分析使用模式""" - - async def identify_skill_opportunities(user_id: str) -> list[dict]: - """识别技能提升机会""" -``` - -### 7.3 InteractiveService - -```python -class InteractiveService: - def __init__(self, db: AsyncSession): - self.llm = get_llm_client() - - async def initiate_learning(user_id: str, topic: str) -> InteractiveTopic: - """用户发起学习""" - - async def execute_learning(topic_id: str) -> dict: - """执行学习任务: - 1. 搜索知识库相关节点 - 2. 使用 LLM 深入学习主题 - 3. 生成学习报告 - 4. 创建 KGNode - 5. 更新 topic 状态 - """ - topic = await self.get_topic(topic_id) - content = await self.research_topic(topic.topic) - report = await self.generate_learning_report(topic, content) - await self.create_kg_nodes_from_report(report) - await self.update_topic_status(topic_id, 'completed', report) - - async def generate_learning_report(self, topic: InteractiveTopic, content: str) -> str: - """使用 LLM 生成结构化学习报告""" -``` - -## 8. 定时任务 - -每日凌晨生成学习报告: -- 分析昨日用户活动 -- 提取新概念和技术栈 -- 更新知识图谱 -- 生成学习摘要存入数据库 - -**集成方式:** 使用项目现有的 `scheduler_service.py` - -```python -# 在 scheduler_service.py 的 start_scheduler() 中添加 -from app.services.learning_service import LearningService - -async def daily_learning_job(): - """每日凌晨0:30生成学习报告""" - from app.database import get_db_session - - async for db in get_db_session(): - service = LearningService(db) - users = await get_all_active_users(db) - for user in users: - await service.generate_and_record_daily_learning(user.id) - break - -# 在 start_scheduler() 中注册 -scheduler.add_job(daily_learning_job, "cron", hour=0, minute=30, id="daily_learning") -``` - -## 9. 错误处理 - -| 场景 | 处理 | -|------|------| -| 无活动数据 | 显示"今日暂无学习成果",不生成空记录 | -| 知识图谱更新失败 | 回滚学习记录,标记为失败状态 | -| AI生成失败 | 记录原始数据,标记需要重试 | -| 用户发起学习主题为空 | 前端验证拦截,不发送请求 | - -## 10. 访问控制 - -所有板块需要用户登录后访问: -- 未登录用户显示"请先登录"提示 -- 不发送无效API请求 -- 保持页面结构完整 - -## 11. 技术实现 - -**前端:** -- Vue 3 + TypeScript -- 复用现有组件样式(StatsView.vue模式) -- CSS实现迷你图表 -- lucide-vue-next图标 - -**后端:** -- FastAPI + SQLAlchemy -- 复用现有数据库连接 -- 新增三个Service类 -- 复用现有认证机制 - -**数据流:** -``` -用户活动 → LearningService分析 → KGNode创建 → LearningRecord存储 - ↓ - AI生成摘要 → 前端展示 -``` diff --git a/docs/superpowers/specs/2026-03-21-knowledge-folder-design.md b/docs/superpowers/specs/2026-03-21-knowledge-folder-design.md deleted file mode 100644 index 3e0fa46..0000000 --- a/docs/superpowers/specs/2026-03-21-knowledge-folder-design.md +++ /dev/null @@ -1,307 +0,0 @@ -# 知识库文件夹分层设计 - -> **Goal:** 为知识库添加文件夹分层组织功能,支持多层嵌套、CRUD 操作,支持知识大脑汇聚各类内容。 - -## 1. 概念与愿景 - -知识库是用户的**资料中枢**,文件夹分层让知识更有序。用户可以按主题/项目/类型建立文件夹层级,如 `技术文档/Python/入门.pdf`。 - -知识大脑会汇聚来自知识库、待办、看板、论坛、对话的内容,形成完整的用户知识画像。文件夹是知识的入口分类,而非知识图谱的一部分。 - -## 2. 数据模型 - -### 2.1 Folder 表(邻接表模式) - -```python -class Folder(BaseModel): - __tablename__ = "folders" - - user_id = Column(String(36), ForeignKey("users.id"), nullable=False, index=True) - name = Column(String(255), nullable=False) - parent_id = Column(String(36), ForeignKey("folders.id"), nullable=True) # NULL=根目录 - # 注意: id, created_at, updated_at 继承自 BaseModel -``` - -**特点:** -- 邻接表模式:通过 `parent_id` 指向父文件夹 -- 根目录文件夹的 `parent_id = NULL` -- 查询完整树结构使用递归 CTE -- **唯一约束**:`user_id + parent_id + name` 组合唯一,防止同级重名 - -**验证规则:** -- 文件夹名称不能为空,最大 255 字符 -- 不允许包含字符:`/ \ * ? :` -- 最大嵌套深度:10 层(防止 UI/性能问题) - -### 2.2 Document 表变更 - -```python -class Document(BaseModel): - # ...现有字段... - folder_id = Column(String(36), ForeignKey("folders.id"), nullable=True) # 新增 -``` - -**约定:** -- `folder_id = NULL` 表示文档在根目录(未分类) -- 删除文件夹时,级联删除该文件夹及其所有子文件夹中的文档 - -### 2.3 ChromaDB Metadata - -```python -{ - "document_id": "xxx", - "document_title": "入门.pdf", - "folder_path": "/技术文档/Python", # 完整路径,用于检索过滤 - "file_type": "pdf", - "chunk_index": 0, -} -``` - -## 3. API 接口 - -### 3.1 文件夹管理 - -| 方法 | 路径 | 说明 | -|------|------|------| -| GET | `/api/folders` | 获取用户的完整文件夹树 | -| POST | `/api/folders` | 创建文件夹 `{ name, parent_id? }` | -| PUT | `/api/folders/{id}` | 重命名文件夹 `{ name }` | -| DELETE | `/api/folders/{id}` | 删除文件夹(级联删除文档) | - -**GET /api/folders 响应:** -```json -{ - "folders": [ - { - "id": "xxx", - "name": "技术文档", - "parent_id": null, - "children": [ - { - "id": "yyy", - "name": "Python", - "parent_id": "xxx", - "children": [] - } - ] - } - ] -} -``` - -### 3.2 文档管理变更 - -| 方法 | 路径 | 说明 | -|------|------|------| -| GET | `/api/documents?folder_id=` | 按文件夹查询文档 | -| POST | `/api/documents` | 上传文档时指定 `folder_id` | -| DELETE | `/api/documents/{id}` | 删除文档 | - -**POST /api/documents 请求增加可选字段:** -```json -{ - "file": "", - "folder_id": "yyy" // 可选,不传表示根目录 -} -``` - -### 3.3 安全与权限 - -**所有权验证:** -- 所有文件夹操作必须验证 `folder.user_id == current_user.id` -- 文档操作时验证 `document.user_id == current_user.id` -- `folder_id` 参数需要验证归属,防止跨用户访问 - -**示例中间件:** -```python -async def verify_folder_access(folder_id: str, user_id: str, db: AsyncSession): - result = await db.execute( - select(Folder).where(Folder.id == folder_id, Folder.user_id == user_id) - ) - if not result.scalar_one_or_none(): - raise HTTPException(status_code=403, detail="无权访问此文件夹") -``` - -### 3.4 向量检索变更 - -`KnowledgeService.retrieve()` 增加可选参数 `folder_id`: - -```python -async def retrieve( - self, - query: str, - user_id: str, - folder_id: str | None = None, # 新增 - top_k: int = 5, -): - # 如果指定 folder_id,构建 path 前缀过滤 - folder_path = await self._get_folder_path(folder_id) - where = {"folder_path": {"$starts_with": folder_path}} if folder_path else None -``` - -### 3.5 ChromaDB 同步策略 - -**文件夹重命名/移动时的同步:** - -由于 ChromaDB metadata 中存储了 `folder_path`,当文件夹路径变化时需要同步更新: - -```python -async def update_folder_paths(folder_id: str, old_path: str, new_path: str): - """更新所有子文件夹和文档的路径""" - # 1. 更新所有子文件夹的 path - children = await db.execute( - select(Folder).where(Folder.parent_id == folder_id) - ) - for child in children.scalars(): - child_new_path = new_path + "/" + child.name - await update_folder_paths(child.id, old_path + "/" + child.name, child_new_path) - - # 2. 更新该文件夹下所有文档的 ChromaDB metadata - docs = await db.execute( - select(Document).where(Document.folder_id == folder_id) - ) - for doc in docs.scalars(): - collection.update( - where={"document_id": doc.id}, - set={"folder_path": new_path} - ) -``` - -**删除文件夹时的清理:** - -```python -async def delete_folder_cascade(folder_id: str): - """级联删除:先删子文件夹,再删文档,最后删自己""" - # 1. 递归删除所有子文件夹 - children = await db.execute( - select(Folder).where(Folder.parent_id == folder_id) - ) - for child in children.scalars(): - await delete_folder_cascade(child.id) - - # 2. 删除该文件夹下所有文档(从 ChromaDB 和数据库) - docs = await db.execute( - select(Document).where(Document.folder_id == folder_id) - ) - for doc in docs.scalars(): - await knowledge_service.delete_from_vectorstore(user_id, doc.id) - await db.delete(doc) - - # 3. 删除文件夹本身 - folder = await db.get(Folder, folder_id) - await db.delete(folder) -``` - -## 4. 前端设计 - -### 4.1 布局结构 - -``` -┌─────────────────────────────────────────────────────────┐ -│ KNOWLEDGE BASE [+新建文件夹] [+上传] │ -├──────────────┬──────────────────────────────────────────┤ -│ │ │ -│ 📁 技术文档 │ 搜索栏 [🔍 搜索...] [混合▼] │ -│ 📁 Python │ │ -│ 📄 入门 │ ┌─────────────────────────────────┐ │ -│ 📄 进阶 │ │ 文档标题 │ │ -│ 📁 Vue │ │ 类型 · 大小 · 状态 │ │ -│ 📁 产品 │ └─────────────────────────────────┘ │ -│ │ │ -│ 📁 临时文件 │ ┌─────────────────────────────────┐ │ -│ │ │ ... │ │ -│ │ └─────────────────────────────────┘ │ -└──────────────┴──────────────────────────────────────────┘ -``` - -### 4.2 组件结构 - -``` -KnowledgeView -├── Header (标题 + 操作按钮) -├── MainLayout (flexbox: sidebar + content) -│ ├── FolderTree (左侧边栏) -│ │ ├── FolderItem (递归组件) -│ │ │ ├── folder icon + name -│ │ │ ├── children (递归) -│ │ │ └── context menu (右键: 重命名/删除) -│ │ └── AddFolderButton -│ │ -│ └── ContentArea (右侧主区域) -│ ├── SearchBar -│ ├── UploadZone -│ ├── DocumentList -│ └── SearchResults -``` - -### 4.3 交互细节 - -| 操作 | 行为 | -|------|------| -| 点击文件夹 | 高亮选中,显示该文件夹下文档 | -| 右键文件夹 | 弹出菜单:重命名 / 删除 | -| 双击文件夹名 | 进入编辑状态 | -| 新建文件夹 | 弹出输入框,默认在当前选中位置创建 | -| 上传文档 | 需先选择目标文件夹,否则默认根目录 | -| 搜索 | 可选限定在当前文件夹内搜索 | - -### 4.4 UI 风格 - -保持一致的 sci-fi holographic 风格: -- 主色调:青色 `#00f5d4` + 深色背景 -- 文件夹图标:使用 Folder/FolderOpen 图标 -- 悬停/选中状态:边框高亮 + 背景色变化 -- 动画:展开/折叠动画 200ms ease - -## 5. 实施步骤 - -### Phase 1: 数据层 -1. 创建 `Folder` 模型和表 -2. 修改 `Document` 模型,增加 `folder_id` 外键 -3. 添加数据库迁移 - -### Phase 2: 后端 API -1. 实现文件夹 CRUD 接口 -2. 修改文档上传接口,支持 `folder_id` -3. 修改文档列表接口,支持 `folder_id` 过滤 -4. 修改向量检索,支持 `folder_id` 范围限定 -5. 实现递归 CTE 查询文件夹树 - -**递归 CTE 示例(获取完整文件夹树):** -```sql -WITH RECURSIVE folder_tree AS ( - -- 基础查询:根文件夹 - SELECT id, name, parent_id, 0 as depth - FROM folders - WHERE user_id = :user_id AND parent_id IS NULL - - UNION ALL - - -- 递归查询:子文件夹 - SELECT f.id, f.name, f.parent_id, ft.depth + 1 - FROM folders f - INNER JOIN folder_tree ft ON ft.id = f.parent_id - WHERE f.user_id = :user_id -) -SELECT * FROM folder_tree ORDER BY depth, name; -``` - -### Phase 3: 前端 -1. 创建 `FolderTree` 组件 -2. 改造 `KnowledgeView` 布局 -3. 实现文件夹右键菜单(重命名/删除) -4. 实现新建文件夹弹窗 -5. 上传时强制选择文件夹 - -### Phase 4: 测试 -1. 文件夹 CRUD 测试 -2. 级联删除测试(删除文件夹 + 文档) -3. 向量检索按文件夹过滤测试 -4. 前端交互测试 - -## 6. 技术约束 - -- SQLite 的递归 CTE 查询文件夹树 -- 删除文件夹时先删除子文件夹(递归),再删除文档 -- ChromaDB 的 `where` 过滤使用 `$starts_with` 做路径前缀匹配 -- 前端递归组件注意防止无限循环 diff --git a/docs/superpowers/specs/2026-03-21-llm-config-table-design.md b/docs/superpowers/specs/2026-03-21-llm-config-table-design.md deleted file mode 100644 index dfe51c6..0000000 --- a/docs/superpowers/specs/2026-03-21-llm-config-table-design.md +++ /dev/null @@ -1,157 +0,0 @@ -# LLM 模型配置表格设计 - -## 1. 概述 - -重新设计 Settings 页面的 LLM 模型配置 UI,将原有的卡片列表改为表格行内编辑形式,简化交互、减少页面长度,同时支持多模型配置。 - -## 2. 需求 - -- **chat**: 必填,多个(子智能体可选不同模型) -- **vlm**: 选填,多个 -- **embedding**: 必填,1 个(知识库专用) -- **rerank**: 必填,1 个(知识库专用) - -## 3. UI 设计 - -### 3.1 整体布局 - -每种 LLM 类型(chat/vlm/embedding/rerank)独立成区,区头部显示类型名称和必填/选填标识,右上角有 [+] 添加按钮。 - -``` -┌─────────────────────────────────────────────────────────────┐ -│ // LLM CONFIGURATION │ -├─────────────────────────────────────────────────────────────┤ -│ ┌─ CHAT ─────────────────────────────────────────────── [+] │ -│ │ 名称 │ Provider │ 模型 │ 状态 │ 操作 │ -│ ├─────────────────────────────────────────────────────────┤ │ -│ │ Agent-Chat │ OpenAI │ gpt-4o │ ● 可用 │ ▶ ✕ │ -│ └─────────────────────────────────────────────────────────┘ │ -│ ... (vlm, embedding, rerank 同理) │ -└─────────────────────────────────────────────────────────────┘ -``` - -### 3.2 表格列(精简版) - -| 列 | 说明 | -|----|------| -| 名称 | 模型名称,支持输入编辑 | -| Provider | 下拉选择:OpenAI / Claude / Ollama / DeepSeek / Custom | -| 模型 | 模型名称,支持输入编辑 | -| 状态 | ● 可用(绿色)/ ○ 不可用(灰色)/ ⚠ 必填未填(红色) | -| 操作 | 展开详情按钮 ▶ / 删除按钮 ✕ | - -### 3.3 行内展开详情面板 - -点击任意行,行下方展开详情表单: - -``` -│ ▼ Agent-Chat │ OpenAI │ gpt-4o │ ● 可用 │ ▶ ✕ │ -│ ┌─────────────────────────────────────────────────────────┐ │ -│ │ │ │ -│ │ Provider: [OpenAI ▼] Model: [gpt-4o ] │ │ -│ │ Base URL: [https://api.openai.com/v1 ] │ │ -│ │ API Key: [sk-•••••••••••••••• ] 👁 │ │ -│ │ │ │ -│ │ [▶ 测试连接] [保存] [取消] │ │ -│ └─────────────────────────────────────────────────────────┘ │ -``` - -### 3.4 状态说明 - -| 状态 | 颜色 | 含义 | -|------|------|------| -| ● 可用 | 绿色 | 测试通过 | -| ○ 不可用 | 灰色 | 未测试或测试失败 | -| ⚠ 必填未填 | 红色 | chat/embedding/rerank 未配置 | - -### 3.5 警告提示 - -当 chat/embedding/rerank 任一类型为空时,表格顶部显示红色警告条: - -``` -┌─────────────────────────────────────────────────────────────┐ -│ ⚠ chat / embedding / rerank 为知识库必填,请确保已配置 │ -└─────────────────────────────────────────────────────────────┘ -``` - -## 4. 交互规则 - -| 动作 | 行为 | -|------|------| -| 添加模型 | 点击 [+] 在对应类型底部添加新行,状态默认为 ○ 不可用 | -| 展开编辑 | 点击任意行,行内展开详情面板,同时收起其他已展开的行 | -| 测试连接 | 点击"测试连接",调用后端 API,测试通过则状态变 ● 可用,失败显示错误 Toast | -| 保存 | 只有测试通过的模型才能保存,保存后更新 originalLlmConfig | -| 删除 | 点击 ✕ 删除该模型(embedding/rerank 至少保留 1 个) | -| 取消编辑 | 点击"取消"或再次点击展开按钮,收起详情面板,表单数据恢复原值 | -| Provider 变化 | 自动填充对应 Provider 的默认 Base URL | - -## 5. 数据模型 - -```typescript -interface LLMModelConfig { - name: string // 模型名称 - provider: 'openai' | 'claude' | 'ollama' | 'deepseek' | 'custom' - model: string // 模型名称 - base_url: string // API Base URL - api_key: string // API Key - enabled: boolean // 是否启用 -} - -interface LLMConfig { - chat: LLMModelConfig[] // 必填,多个 - vlm: LLMModelConfig[] // 选填,多个 - embedding: LLMModelConfig[] // 必填,1个 - rerank: LLMModelConfig[] // 必填,1个 -} -``` - -## 6. 后端 API - -### 6.1 保存策略 - -`saveModel(type, index)` 发送完整 `LLMConfig` 对象到后端,后端整体替换该类型的模型列表。 - -- chat/vlm: 列表直接替换 -- embedding/rerank: 列表直接替换(限制最多 1 个) - -### 6.2 测试连接 API - -```typescript -POST /api/settings/llm/test -{ - "type": "chat" | "vlm" | "embedding" | "rerank", - "provider": "openai" | "claude" | "ollama" | "deepseek" | "custom", - "model": "gpt-4o", - "base_url": "https://api.openai.com/v1", - "api_key": "sk-..." -} -``` - -返回: -```typescript -{ "success": true, "message": "连接成功" } -{ "success": false, "error": "错误信息" } -``` - -## 7. 组件结构 - -``` -SettingsView.vue -├── LLMConfigSection (chat/vlm/embedding/rerank 四区) -│ ├── LLMTypeCard (每个类型一个卡片) -│ │ ├── LLMTable (表格头部 + 列表) -│ │ │ ├── LLMTableRow (每行模型) -│ │ │ └── LLMExpandPanel (展开的详情面板) -│ │ └── LLMEmptyState (空状态 + 添加按钮) -│ └── LLMWarning (必填警告条) -``` - -## 8. 实现要点 - -1. **单行展开**: 点击行时收起其他已展开行,保持 UI 简洁 -2. **测试通过才可保存**: 保存按钮仅在 `model.enabled === true` 时可用 -3. **API Key 脱敏**: 列表中不显示 API Key,详情面板中默认隐藏(显示为 ••••) -4. **Provider 默认 URL**: `onProviderChange` 自动填充默认值 -5. **深拷贝比较**: `isModelDirty` 使用 `JSON.stringify` 深拷贝比较 -6. **originalLlmConfig 同步**: 每次保存成功后更新原始配置副本 diff --git a/docs/superpowers/specs/2026-03-21-skill-system-design.md b/docs/superpowers/specs/2026-03-21-skill-system-design.md deleted file mode 100644 index 67d7885..0000000 --- a/docs/superpowers/specs/2026-03-21-skill-system-design.md +++ /dev/null @@ -1,288 +0,0 @@ -# Skill 系统设计方案 - -## 1. 概述 - -### 1.1 背景 - -当前 Jarvis 系统采用基于 LangGraph 的多 Agent 架构(Master/Planner/Executor/Librarian/Analyst),通过关键词规则路由到子 Agent。系统缺乏可扩展的 Skill 机制,无法让 Agent 按需调用自定义能力。 - -### 1.2 目标 - -构建一个 **Skill 系统**,让每个 Agent 能够: -- 挂载可配置的 Skill 能力插件 -- 由 LLM 自主判断何时使用哪个 Skill -- 支持私有/团队共享/市场三种可见性 -- Skill 作为 Agent 的指令模板 + 工具组合 - ---- - -## 2. 核心理念 - -**Skill 是 Agent 的"能力插件",由 LLM 自主决策调用时机。** - -``` -用户: "帮我规划今天的工作" - │ - ▼ - Master Agent 理解意图 - │ - ▼ - 路由到 Planner Agent - │ - ▼ - Planner 分析任务 → 自主判断需要什么 Skill - │ - ├──→ 需要数据 → 调用 "数据获取" Skill - ├──→ 需要优先级 → 调用 "任务排序" Skill - └──→ 需要时间安排 → 调用 "日程规划" Skill -``` - ---- - -## 3. 数据模型 - -### 3.1 Skill 实体 - -| 字段 | 类型 | 说明 | -|-----|------|-----| -| id | UUID | 主键 | -| name | str | Skill 名称,如 "任务排序" | -| description | str | 供 LLM 理解该 Skill 的用途 | -| instructions | str | Agent 执行时的系统指令模板 | -| agent_type | AgentRole | 适用于哪个 Agent (master/planner/executor/librarian/analyst) | -| tools | List[str] | 引用的工具名称列表 | -| required_context | List[str] | 需要的前置数据 | -| output_format | str | 输出格式要求(可选) | -| visibility | enum | private/team/market | -| team_id | UUID | 团队 ID(visibility=team 时使用) | -| is_active | bool | 是否启用 | -| owner_id | UUID | 创建者 ID | -| created_at | datetime | 创建时间 | -| updated_at | datetime | 更新时间 | - -### 3.2 Agent-Skill 关联 - -每个 Agent 运行时从数据库加载其 `agent_type` 对应的所有 `is_active=True` 的 Skills,作为可选能力供 LLM 调用。 - ---- - -## 4. 系统架构 - -### 4.1 组件关系 - -``` -┌─────────────────────────────────────────────────────┐ -│ Agent Brain │ -│ ┌─────────────────────────────────────────────┐ │ -│ │ Master Agent (理解意图,路由到子 Agent) │ │ -│ └─────────────────────────────────────────────┘ │ -│ │ │ -│ ▼ │ -│ ┌─────────────────────────────────────────────┐ │ -│ │ [Planner] [Executor] [Librarian] ... │ │ -│ │ │ │ │ │ │ -│ │ └───────────┼────────────┘ │ │ -│ │ ▼ │ │ -│ │ ┌──────────────────┐ │ │ -│ │ │ Skill Registry │ │ │ -│ │ │ (可用的 Skills) │ │ │ -│ │ └────────┬─────────┘ │ │ -│ │ │ │ │ -│ │ LLM 自主判断使用哪个 Skill │ │ -│ └─────────────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────┘ -``` - -### 4.2 组件说明 - -| 组件 | 职责 | -|-----|------| -| Skill Registry | 存储 Skill 定义,提供加载接口,权限校验 | -| Skill Loader | 运行时加载 Agent 对应的 Skills | -| Skill Executor | 执行 Skill 指令,调用工具链 | - ---- - -## 5. Skill 定义示例 - -### 5.1 任务排序 Skill - -```json -{ - "name": "任务排序", - "description": "根据优先级、截止日期、依赖关系对任务列表进行智能排序", - "instructions": "你是一个任务排序专家。接收任务列表后,按以下规则排序:\n1. 紧急且重要优先\n2. 有截止日期的优先\n3. 依赖其他任务的优先\n输出排序后的任务列表及理由。", - "agent_type": "planner", - "tools": ["get_tasks"], - "required_context": ["原始任务列表"] -} -``` - -### 5.2 知识检索 Skill - -```json -{ - "name": "知识检索", - "description": "从用户知识库中检索相关内容,支持向量检索和关键词检索", - "instructions": "你是一个知识管理员。从知识库中检索与用户问题相关的内容,返回相关文档片段和来源。", - "agent_type": "librarian", - "tools": ["search_knowledge", "hybrid_search"], - "required_context": ["用户查询"] -} -``` - -### 5.3 数据分析 Skill - -```json -{ - "name": "效率分析", - "description": "分析任务完成情况,计算工作效率指标", - "instructions": "你是一个数据分析师。接收任务列表后,分析:\n1. 完成率\n2. 平均完成时间\n3. 阻塞原因\n4. 改进建议", - "agent_type": "analyst", - "tools": ["get_tasks", "get_stats"], - "required_context": ["任务数据"] -} -``` - ---- - -## 6. API 设计 - -### 6.1 Skill 管理 - -| 方法 | 路径 | 说明 | -|-----|------|-----| -| POST | /api/skills | 创建 Skill | -| GET | /api/skills | 列表(支持过滤 agent_type, visibility) | -| GET | /api/skills/{id} | 详情 | -| PUT | /api/skills/{id} | 更新 | -| DELETE | /api/skills/{id} | 删除 | - -### 6.2 Skill 执行 - -| 方法 | 路径 | 说明 | -|-----|------|-----| -| POST | /api/skills/{id}/execute | 手动执行 Skill | -| GET | /api/skills/execute/{execution_id} | 查询执行结果 | - ---- - -## 7. 数据库表设计 - -### 7.1 skill 表 - -```sql -CREATE TABLE skill ( - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - name VARCHAR(100) NOT NULL, - description TEXT, - instructions TEXT NOT NULL, - agent_type VARCHAR(50) NOT NULL, - tools JSONB DEFAULT '[]', - required_context JSONB DEFAULT '[]', - output_format TEXT, - visibility VARCHAR(20) DEFAULT 'private', - team_id UUID, - is_active BOOLEAN DEFAULT true, - owner_id UUID NOT NULL, - created_at TIMESTAMP DEFAULT NOW(), - updated_at TIMESTAMP DEFAULT NOW() -); - -CREATE INDEX idx_skill_agent_type ON skill(agent_type); -CREATE INDEX idx_skill_visibility ON skill(visibility); -``` - ---- - -## 8. 前端界面 - -### 8.1 Skill 管理入口 - -入口位置:智能链路 → Skill 市场 - -``` -┌─────────────────────────────────────────────┐ -│ 智能链路 → Skill 市场 │ -├─────────────────────────────────────────────┤ -│ [我的 Skills] [团队共享] [市场] │ -├─────────────────────────────────────────────┤ -│ ┌───────────────────────────────────────┐ │ -│ │ 任务排序 │ │ -│ │ 适用: Planner Agent │ │ -│ │ 工具: get_tasks │ │ -│ │ 描述: 根据优先级排序任务列表 │ │ -│ │ 可见: 私有 │ │ -│ │ [编辑] [禁用] [复制] │ │ -│ └───────────────────────────────────────┘ │ -└─────────────────────────────────────────────┘ -``` - -### 8.2 Skill 编辑界面 - -``` -┌─────────────────────────────────────────────┐ -│ 创建/编辑 Skill │ -├─────────────────────────────────────────────┤ -│ 名称: [________________] │ -│ 描述: [________________] │ -│ │ -│ 适用 Agent: │ -│ ( ) Master (●) Planner ( ) Executor │ -│ ( ) Librarian ( ) Analyst │ -│ │ -│ 引用工具: │ -│ ☑ get_tasks ☑ create_task │ -│ ☐ search_knowledge ☐ hybrid_search │ -│ │ -│ 指令模板: │ -│ ┌─────────────────────────────────────┐ │ -│ │ 你是一个任务排序专家... │ │ -│ └─────────────────────────────────────┘ │ -│ │ -│ 可见性: │ -│ (●) 私有 ( ) 团队共享 ( ) 公开市场 │ -│ │ -│ [取消] [保存] │ -└─────────────────────────────────────────────┘ -``` - ---- - -## 9. 与现有系统集成 - -| 现有组件 | 集成方式 | -|---------|---------| -| Agent Role | Skill.agent_type 引用现有 AgentRole 枚举 | -| Tools | Skill.tools 引用现有 ALL_TOOLS 中的工具名 | -| Prompts | Skill.instructions 作为 Agent 系统提示的补充 | -| User/Team | 复用现有权限体系,visibility 字段控制 | -| Router | Master Agent 路由逻辑不变,Skill 由子 Agent 按需调用 | - ---- - -## 10. 实现计划 - -### Phase 1: 基础框架 -- [ ] Skill 数据模型与 CRUD API -- [ ] Skill Registry 服务 -- [ ] Skill 加载机制(Agent 初始化时注入) -- [ ] 前端 Skill 管理界面 - -### Phase 2: 执行机制 -- [ ] Skill Executor -- [ ] 工具调用桥接 -- [ ] 执行结果返回 - -### Phase 3: 高级特性 -- [ ] 团队共享机制 -- [ ] Skill 市场 -- [ ] Skill 编排(多个 Skill 串联) - ---- - -## 11. 风险与约束 - -1. **LLM 自主性**:依赖 LLM 准确理解 Skill 描述,需优化 prompt -2. **工具兼容性**:Skill 引用的工具需在 ALL_TOOLS 中存在 -3. **权限控制**:团队共享需防止越权访问 diff --git a/docs/superpowers/specs/2026-03-25-schedule-planner-design.md b/docs/superpowers/specs/2026-03-25-schedule-planner-design.md deleted file mode 100644 index 54bce91..0000000 --- a/docs/superpowers/specs/2026-03-25-schedule-planner-design.md +++ /dev/null @@ -1,561 +0,0 @@ -# Schedule Planner Agent Redesign - -## Goal - -Replace the current planner role with a schedule-focused planning system that analyzes conversation history, the task board, and forum signals to produce actionable scheduling recommendations for the user. - -## Scope - -This redesign covers both the main planner role and its subagents across backend orchestration, prompts, routing, scheduled execution, todo generation, frontend presentation, and related tests. - -## User-Approved Direction - -- Replace the current path-planning semantics with schedule-planning semantics. -- Redesign both the main planner role and its subagents. -- Inputs for planning: - - conversation history - - task board - - forum information -- Output style: - - conclusion first - - executable schedule next -- Trigger modes: - - when the user explicitly asks for scheduling advice - - at a fixed daily time -- Daily scheduled analysis should write actionable suggestions into todo items. - -## Architecture - -### Main Role - -The current `planner` role will be replaced at the system level by a new role id: - -- `schedule_planner` - -Its responsibility is no longer “find the shortest execution path for a goal.” Instead, it becomes the scheduling brain that: - -1. understands current commitments and pressure signals -2. evaluates urgency, importance, dependency, and timing -3. recommends near-term scheduling actions -4. converts useful scheduled guidance into concrete todo items when triggered by the daily scheduler - -### Subagents - -The existing planner subagent structure will be redesigned into two schedule-specific subagents: - -- `schedule_analysis` - - analyzes conversation history, task board state, and forum signals - - identifies priorities, pressure points, conflicts, dependencies, risks, and things that can be delayed - -- `schedule_planning` - - converts analysis into an execution-oriented schedule recommendation - - outputs conclusion first, then a practical schedule proposal - - when running from the daily scheduled workflow, produces todo-ready action items - -### Trigger Paths - -#### Interactive Trigger - -When the user asks questions such as: - -- what should I do today -- how should I arrange this week -- based on my recent work, what should I focus on next -- help me schedule upcoming work - -The master agent should route to `schedule_planner`. - -The expected response shape: - -1. current conclusion -2. today / near-term schedule recommendation -3. next actions - -#### Daily Scheduled Trigger - -A daily scheduled job invokes the schedule planner flow automatically. - -The daily run should: - -1. collect relevant context from conversation history, tasks, and forum data -2. run `schedule_analysis` -3. run `schedule_planning` -4. convert only actionable, non-duplicate recommendations into todo items - -The daily run should not dump raw analysis into todos. Only concise, action-worthy, user-meaningful recommendations become todos. - -## Data Flow - -### Inputs - -The schedule planning system should read from three sources: - -1. **Conversation history** - - recent user intent - - commitments implied in prior discussion - - stated priorities, urgency, and unresolved threads - -2. **Task board** - - open items - - current statuses - - stalled work - - high-priority or overdue work - -3. **Forum information** - - new items requiring attention - - external pressure or discussion signals - - updates that may change priority - -### Internal Processing - -The main flow should be: - -- Master decides scheduling intent -- `schedule_planner` receives context -- `schedule_analysis` identifies priority structure -- `schedule_planning` produces human-usable output -- scheduled mode additionally writes selected suggestions into todos - -### Outputs - -#### Interactive Output - -The default answer structure should be: - -- conclusion first -- suggested schedule second -- next actions last - -#### Scheduled Output - -The scheduled run should create todo entries with: - -- concise action phrasing -- enough context to be actionable -- source attribution where useful (conversation/task/forum) -- duplicate avoidance - -## Migration Strategy - -This redesign uses a two-phase migration to avoid breaking stored state and UI rendering. - -### Phase 1: Compatibility Window - -- accept legacy `planner` values from stored traces, mock payloads, and historical records -- normalize legacy `planner` to `schedule_planner` at read boundaries where practical -- accept legacy `planner_scope` and `planner_steps` as read-only legacy values and normalize them to `schedule_analysis` and `schedule_planning` -- write only the new ids going forward: - - `schedule_planner` - - `schedule_analysis` - - `schedule_planning` - -### Phase 2: Legacy Removal - -After the migration is complete and all active UI payloads, mock data, and tests are updated: - -- remove legacy id acceptance from orchestration and frontend display logic -- remove legacy mock fixtures -- keep migration code out of prompts and core scheduling behavior - -### Migration Scope - -The migration must cover: - -- backend enums and routing -- frontend agent ids and telemetry labels -- stored trace rendering paths -- mock data used by agent dashboards and chat orchestration views -- tests that still refer to `planner`, `planner_scope`, or `planner_steps` - -## Input Contracts - -The schedule planning system reads from three sources with explicit limits. - -### Conversation History Contract - -- use recent conversation history from the current user context -- default retrieval window: last 7 days of relevant conversation turns, capped at the latest 50 turns -- prefer turns that include commitments, priorities, deadlines, blockers, or future-oriented intent -- if conversation history is unavailable, continue with degraded confidence - -### Task Board Contract - -- include open, in-progress, blocked, overdue, and high-priority tasks -- exclude completed and archived items by default -- include enough task metadata to reason about urgency and dependency: - - title - - status - - priority - - due date if present - - last updated time if present -- if task data is unavailable, continue with degraded confidence - -### Forum Information Contract - -- include recent forum items that may affect user priorities -- default retrieval window: last 7 days of relevant forum signals -- forum signals may include: - - new posts requiring attention - - replies or escalations - - updates that change urgency or expected follow-up -- if forum data is unavailable, continue with degraded confidence - -## Output Contracts - -### `schedule_analysis` Output Schema - -The analysis stage should produce a structured summary with these fields: - -- `top_priorities`: list of current highest-priority focus areas -- `risks`: list of risk or pressure signals -- `conflicts`: list of timing or dependency conflicts -- `deferrable_items`: list of lower-priority items that can be delayed -- `evidence`: source references grouped by `conversation`, `task_board`, or `forum` -- `confidence`: one of `high`, `medium`, `low` - -### `schedule_planning` Output Schema - -The planning stage should produce a structured recommendation with these fields: - -- `conclusion`: short decision-oriented summary -- `today_plan`: list of suggested actions for the current day or immediate next window -- `near_term_plan`: list of actions for the next few days or current week -- `next_actions`: short ordered action list -- `todo_candidates`: only present in scheduled mode; candidate todo items derived from the recommendation -- `confidence`: one of `high`, `medium`, `low` - -### `todo_candidates` Schema - -Each `todo_candidate` must use this structure: - -- `title`: required short action text -- `description`: required short rationale grounded in source context -- `sources`: required list of provenance objects -- `priority`: optional normalized priority such as `high`, `medium`, `low` -- `target_window`: optional string such as `today` or `this_week` - -Each provenance object in `sources` must contain: - -- `type`: one of `conversation`, `task_board`, `forum` -- `id`: source object id when available, otherwise a stable synthetic reference -- `label`: short human-readable source label - -### Evidence Structure - -Each item in `schedule_analysis.evidence` must contain: - -- `type`: one of `conversation`, `task_board`, `forum` -- `id`: source object id when available, otherwise a stable synthetic reference -- `label`: short human-readable identifier -- `reason`: brief explanation of why the signal matters to scheduling - -### Interactive Response Contract - -The user-facing answer should always follow this shape: - -1. conclusion -2. suggested schedule -3. next actions - -If confidence is low, the response must say that explicitly and avoid overconfident scheduling language. - -## Daily Scheduler Contract - -The daily scheduled trigger must follow explicit execution semantics. - -### Execution Model - -- run once per user per local date -- default execution time: 07:00 in the user's configured timezone -- if the user has no configured timezone, skip the run and log the skip reason -- do not automatically backfill missed runs -- enforce idempotency by `(user_id, local_date, job_type)` so the same daily analysis is not executed more than once successfully - -### Scheduled Mode Behavior - -A successful scheduled run should: - -1. gather available context from the three input sources -2. execute `schedule_analysis` -3. execute `schedule_planning` -4. create todo items from selected `todo_candidates` -5. store run telemetry and outcome metadata - -If one or more sources are missing, continue when there is still enough evidence to produce a useful recommendation and mark confidence as reduced. - -Signal evaluation rules: - -- a **strong source** is a source with enough current evidence to support prioritization on its own, such as multiple open high-priority tasks or a recent forum escalation -- a **meaningful signal** is a discrete scheduling-relevant item extracted from any source, such as an overdue task, a stated commitment in conversation history, or a forum escalation -- the planner may still run with one strong source -- scheduled mode may create todos only when at least two meaningful signals exist across all inputs - -If fewer than two meaningful signals are available across all sources, the scheduler should not create todos and should log a low-context outcome. - -Delayed execution rule: - -- if the 07:00 run is delayed by temporary outage or worker unavailability, the system may still execute one delayed run later on the same user-local date -- if the entire local date passes without a successful run, do not backfill on the next day - -## Todo Creation Rules - -Todo creation is the main scheduled side effect and must be tightly constrained. - -### Creation Rules - -- create at most 3 todo items per daily run -- only create todos for actions that are concrete, near-term, and user-actionable -- do not create todos for vague advice, reflections, or duplicated reminders -- store source provenance when available: - - `conversation` - - `task_board` - - `forum` - -### Duplicate Detection - -A candidate todo is considered a duplicate if there is already an open todo that matches all of the following: - -- same normalized action text -- same source category or same source object when available -- created within the last 7 days - -Normalization rules for action text: - -- trim surrounding whitespace -- collapse repeated internal whitespace to a single space -- lowercase Latin characters -- remove trailing full stop / period punctuation only - -Source comparison rules: - -- if a provenance object includes a stable source `id`, compare by `(type, id)` -- if no stable source id exists, compare by `(type, normalized label)` -- if multiple sources support one recommendation, compare against the highest-priority provenance in this order: `task_board`, `forum`, `conversation` - -When a duplicate is detected: - -- do not create a new todo -- record the skip reason in scheduler telemetry - -### Todo Fields - -Scheduled-created todos should include at minimum these persisted fields: - -- `title`: required -- `description`: required -- `source_type`: required primary provenance type -- `source_id`: optional stable source id -- `source_label`: required fallback human-readable provenance label -- `created_by`: required and set to `schedule_planner` -- `created_at`: required timestamp -- `priority`: optional normalized priority -- `target_window`: optional normalized scheduling window - -## Routing Boundaries - -The system must distinguish scheduling from adjacent planning behaviors. - -### Route to `schedule_planner` when the user asks for: - -- today or this week planning -- what to focus on next -- priority ordering across ongoing work -- time-aware sequencing of current commitments - -### Do not route to `schedule_planner` when the user asks for: - -- deep implementation planning for a feature -- code execution or task fulfillment -- research-only retrieval -- pure analysis without scheduling intent - -In ambiguous cases such as "what should I do next?", prefer `schedule_planner` when the available context includes multiple active tasks, recent commitments, or forum pressure signals. - -## Backend Changes - -### Role and Graph Layer - -Update the orchestration layer so the planner role is redefined as `schedule_planner` rather than `planner`. - -Files likely involved: - -- `backend/app/agents/state.py` -- `backend/app/agents/graph.py` -- `backend/app/agents/prompts.py` -- `backend/app/routers/agent.py` -- `backend/app/services/agent_service.py` - -Required changes: - -- rename role ids where appropriate -- update graph node registration -- update master routing rules -- replace planner subagent mappings -- update telemetry and sub-commander trace labels - -### Prompt Layer - -Replace the current planner prompt family with schedule-specific instructions. - -Needed prompt families: - -- `SCHEDULE_PLANNER_SYSTEM_PROMPT` -- `SCHEDULE_ANALYSIS_PROMPT` -- `SCHEDULE_PLANNING_PROMPT` - -Prompt requirements: - -- reason over conversation history, tasks, and forum state -- prioritize urgency, importance, and dependency -- avoid abstract productivity advice -- produce concrete, immediate scheduling output -- in scheduled mode, generate todo-worthy suggestions only - -### Scheduled Execution Layer - -Add or update the daily scheduled workflow so it can call the schedule planner flow automatically. - -Likely touchpoints: - -- scheduler service -- existing daily planning jobs -- todo creation services - -Required behavior: - -- fixed daily execution time -- fetch relevant context -- call schedule planner pipeline -- write selected recommendations into todos -- skip duplicate todo creation - -## Frontend Changes - -Frontend needs to reflect the new role system consistently. - -Files likely involved: - -- `frontend/src/data/agents.ts` -- `frontend/src/pages/agents/index.vue` -- `frontend/src/components/chat/OrchestrationPanel.vue` -- `frontend/src/pages/chat/composables/useChatView.ts` -- related frontend tests - -Required updates: - -- replace planner display labels with schedule planner labels -- rename planner subagents to schedule analysis / schedule planning -- update orchestration telemetry labels -- update example mock state and tests -- use these exact frontend ids: - - `schedule_planner` - - `schedule_analysis` - - `schedule_planning` -- use these exact default Chinese labels: - - `日程规划师` - - `日程分析员` - - `日程编排员` -- update active route visualization and commander skill labels to the new ids - -## Naming - -### Main Agent - -- old: `planner` -- new: `schedule_planner` -- display role: `日程规划师` - -### Subagents - -- old: `planner_scope` -- new: `schedule_analysis` -- display role: `日程分析员` - -- old: `planner_steps` -- new: `schedule_planning` -- display role: `日程编排员` - -## Constraints - -- do not keep dual role names for long-term compatibility unless a specific dependency forces it -- do not create todos for every suggestion -- do not turn the planner into a generic life coach -- keep scheduling grounded in current project signals -- preserve the existing agent architecture where possible, while fully changing planner semantics - -## Observability - -The redesign must emit enough telemetry to debug routing and scheduled execution. - -Required telemetry fields: - -- selected main route -- selected subagent -- available input sources -- missing input sources -- run mode: `interactive` or `scheduled` -- confidence level -- todos created count -- todos skipped as duplicates count -- scheduler run success / skipped / failed - -## Acceptance Criteria - -### Backend Acceptance Criteria - -- a scheduling-intent user query routes to `schedule_planner` -- `schedule_analysis` and `schedule_planning` are both reachable through the orchestration layer -- legacy planner ids are normalized during the compatibility window -- daily scheduled runs do not execute more than once per user per local date -- low-context daily runs do not create todos -- duplicate todo candidates are skipped instead of recreated - -### Frontend Acceptance Criteria - -- the agents page displays `日程规划师` instead of the previous planner label -- the planner subagent chips display `日程分析员` and `日程编排员` -- orchestration mock data and route highlights use the new ids -- tests no longer depend on `planner_scope` or `planner_steps` after migration is complete - -### Failure and Fallback Criteria - -- if forum data is missing, the planner still runs with degraded confidence -- if task board data is missing, the planner still runs with degraded confidence when other strong context exists -- if fewer than two meaningful signals are available, scheduled mode creates no todos -- if the user has no timezone configured, the daily scheduled run is skipped and logged - -## Testing Strategy - -### Backend - -Add or update tests for: - -- master routing to `schedule_planner` -- schedule subagent selection behavior -- prompt invariants for schedule-focused output -- scheduled daily run creates todos from actionable suggestions -- duplicate todo protection - -### Frontend - -Add or update tests for: - -- renamed main role and subagent labels -- orchestration panel route display -- active subagent telemetry -- mock agent graph data using `schedule_planner`, `schedule_analysis`, and `schedule_planning` - -## Risks - -1. **Broad rename surface** - - `planner` is referenced across backend and frontend, so a full rename must be systematic - -2. **Scheduled todo spam** - - daily runs may create low-value or duplicate todos unless filtered carefully - -3. **Prompt drift** - - if prompts stay too abstract, the new agent will sound renamed but not actually scheduling-oriented - -## Recommendation - -Implement this as a real role-system redesign, not as a display-only rename. The role id, subagent ids, prompt family, routing logic, and frontend telemetry should all align on the new scheduling semantics so the system remains internally coherent. diff --git a/frontend/src/api/agent.ts b/frontend/src/api/agent.ts index ee0b35d..b47bfd0 100644 --- a/frontend/src/api/agent.ts +++ b/frontend/src/api/agent.ts @@ -25,6 +25,123 @@ export interface AgentConfig { selected_skill_ids?: string[] } +export interface AgentVisibilityEvent { + event_id: string + event_type: string + timestamp: string + conversation_id?: string | null + agent_id?: string | null + task_id?: string | null + thread_id?: string | null + severity: string + payload: Record +} + +export interface AgentVisibilityVerifier { + conversation_id: string + status?: string | null + summary?: string | null + evidence: Array> +} + +export interface AgentVisibilityTaskSummary { + task_id: string + role?: string | null + owner_agent_id?: string | null + status?: string | null + summary?: string | null + evidence_count: number +} + +export interface AgentVisibilityTopologyNode { + agent_id: string + role?: string | null + parent_agent_id?: string | null + source: string + task_count: number + completed_task_count: number +} + +export interface AgentVisibilityTopology { + conversation_id: string + root_agent_id?: string | null + current_agent?: string | null + nodes: AgentVisibilityTopologyNode[] + edges: Array> + tasks: AgentVisibilityTaskSummary[] + task_hierarchy: Record +} + +export interface AgentVisibilityIsolation { + mode: string + isolation_id?: string | null + workspace_path?: string | null + parent_conversation_id?: string | null + metadata: Record +} + +export interface AgentVisibilityCost { + input_tokens: number + output_tokens: number + total_tokens: number + estimated_cost?: number | null + budget_warning: boolean + currency: string +} + +export interface AgentVisibilityCostByAgent { + agent_id: string + input_tokens: number + output_tokens: number + total_tokens: number + estimated_cost?: number | null + budget_warning: boolean +} + +export interface AgentVisibilityCostSummary { + conversation_id: string + total: AgentVisibilityCost + thresholds: Record + by_agent: AgentVisibilityCostByAgent[] +} + +export interface AgentVisibilityToolGovernanceItem { + capability_id: string + tool_name: string + permission_class: string + side_effect_scope: string + supports_retry: boolean + idempotent: boolean + safe_for_parallel_use: boolean + requires_confirmation: boolean + usage_count: number + last_result_preview?: string | null +} + +export interface AgentVisibilityToolGovernance { + conversation_id: string + total_tools: number + used_tools: number + items: AgentVisibilityToolGovernanceItem[] + upgrade_candidates: string[] +} + +export interface AgentVisibilityRuntimeSummary { + conversation_id: string + execution_mode?: string | null + current_phase?: string | null + current_checkpoint?: string | null + phase_history: Array> + checkpoint_history: Array> + verifier: AgentVisibilityVerifier + isolation: AgentVisibilityIsolation + cost: AgentVisibilityCost + topology_node_count: number + active_task_count: number + completed_task_count: number + recent_events: AgentVisibilityEvent[] +} + export const agentApi = { async getStats(): Promise { const res = await api.get('/api/agents/stats') @@ -45,4 +162,39 @@ export const agentApi = { const res = await api.put(`/api/agents/config/${id}`, data) return res.data }, + + async getRuntimeSummary(conversationId: string): Promise { + const res = await api.get('/api/agents/visibility/runtime-summary', { + params: { conversation_id: conversationId }, + }) + return res.data + }, + + async getVisibilityTopology(conversationId: string): Promise { + const res = await api.get('/api/agents/visibility/topology', { + params: { conversation_id: conversationId }, + }) + return res.data + }, + + async getVisibilityVerifier(conversationId: string): Promise { + const res = await api.get('/api/agents/visibility/verifier', { + params: { conversation_id: conversationId }, + }) + return res.data + }, + + async getVisibilityCost(conversationId: string): Promise { + const res = await api.get('/api/agents/visibility/cost', { + params: { conversation_id: conversationId }, + }) + return res.data + }, + + async getVisibilityTools(conversationId: string): Promise { + const res = await api.get('/api/agents/visibility/tools', { + params: { conversation_id: conversationId }, + }) + return res.data + }, } diff --git a/frontend/src/pages/agents/agentsPage.css b/frontend/src/pages/agents/agentsPage.css index 76868ff..8d8ddea 100644 --- a/frontend/src/pages/agents/agentsPage.css +++ b/frontend/src/pages/agents/agentsPage.css @@ -90,7 +90,10 @@ display: flex; flex-direction: column; gap: 12px; - width: 260px; + width: 320px; + max-height: calc(100% - 36px); + overflow: auto; + padding-right: 4px; } .hud-panel { border: 1px solid rgba(0,245,212,0.12); @@ -120,6 +123,116 @@ color: var(--accent-cyan); letter-spacing: 0.08em; } +.runtime-grid { + display: grid; + grid-template-columns: repeat(2, minmax(0, 1fr)); + gap: 8px 12px; +} +.runtime-item { + display: flex; + flex-direction: column; + gap: 4px; +} +.runtime-label { + font-size: 10px; + color: var(--text-dim); + letter-spacing: 0.1em; +} +.runtime-item strong { + font-size: 12px; + color: var(--text-primary); + word-break: break-word; +} +.runtime-note { + margin-top: 10px; + font-size: 11px; + color: var(--text-secondary); + line-height: 1.5; +} +.runtime-meta { + margin-top: 10px; + display: flex; + flex-wrap: wrap; + gap: 8px; + font-size: 10px; + color: var(--accent-cyan); +} +.runtime-warning { + color: var(--accent-amber); +} +.runtime-workspace { + margin-top: 8px; + font-size: 10px; + color: var(--text-dim); + word-break: break-all; +} +.stack-list { + display: flex; + flex-direction: column; + gap: 8px; +} +.stack-item { + padding: 8px 10px; + border-radius: 12px; + border: 1px solid rgba(0,245,212,0.08); + background: rgba(10, 18, 30, 0.78); +} +.stack-line { + display: flex; + align-items: center; + justify-content: space-between; + gap: 8px; + font-size: 11px; + color: var(--text-primary); +} +.stack-subline { + margin-top: 4px; + font-size: 10px; + color: var(--text-dim); + word-break: break-word; +} +.stack-empty { + font-size: 11px; + color: var(--text-dim); +} +.mini-section + .mini-section { + margin-top: 12px; +} +.mini-title { + margin-bottom: 8px; + font-size: 10px; + letter-spacing: 0.08em; + color: var(--accent-cyan); +} +.event-severity { + text-transform: uppercase; + font-size: 10px; +} +.event-severity.warning { + color: var(--accent-amber); +} +.event-severity.error { + color: var(--accent-red); +} +.threshold-line { + margin-top: 8px; +} +.candidate-list { + margin-top: 10px; + display: flex; + flex-wrap: wrap; + gap: 6px; +} +.candidate-chip { + display: inline-flex; + align-items: center; + padding: 4px 8px; + border-radius: 999px; + font-size: 10px; + color: var(--accent-cyan); + border: 1px solid rgba(0,245,212,0.12); + background: rgba(0,245,212,0.06); +} .canvas-controls { position: absolute; right: 20px; diff --git a/frontend/src/pages/agents/agentsPage.test.ts b/frontend/src/pages/agents/agentsPage.test.ts index 1c8b106..34d188f 100644 --- a/frontend/src/pages/agents/agentsPage.test.ts +++ b/frontend/src/pages/agents/agentsPage.test.ts @@ -1,8 +1,15 @@ import { beforeEach, describe, expect, it, vi } from 'vitest' -import { mount } from '@vue/test-utils' +import { mount, flushPromises } from '@vue/test-utils' +import { createPinia, setActivePinia } from 'pinia' +import { useConversationStore } from '@/stores/conversation' const mocks = vi.hoisted(() => ({ getHierarchyStats: vi.fn(), + getRuntimeSummary: vi.fn(), + getVisibilityTopology: vi.fn(), + getVisibilityVerifier: vi.fn(), + getVisibilityCost: vi.fn(), + getVisibilityTools: vi.fn(), getConfig: vi.fn(), updateConfig: vi.fn(), listSkills: vi.fn(), @@ -11,6 +18,11 @@ const mocks = vi.hoisted(() => ({ vi.mock('@/api/agent', () => ({ agentApi: { getHierarchyStats: mocks.getHierarchyStats, + getRuntimeSummary: mocks.getRuntimeSummary, + getVisibilityTopology: mocks.getVisibilityTopology, + getVisibilityVerifier: mocks.getVisibilityVerifier, + getVisibilityCost: mocks.getVisibilityCost, + getVisibilityTools: mocks.getVisibilityTools, getConfig: mocks.getConfig, updateConfig: mocks.updateConfig, }, @@ -69,6 +81,136 @@ const hierarchyStats = { ], } +const runtimeSummaryFixture = { + conversation_id: 'conv-1', + execution_mode: 'collaboration', + current_phase: 'phase_4_visibility_and_verification', + current_checkpoint: 'visibility.runtime_summary_ready', + phase_history: [], + checkpoint_history: [], + verifier: { + conversation_id: 'conv-1', + status: 'passed', + summary: 'Runtime summary is available.', + evidence: [], + }, + isolation: { + mode: 'worktree', + isolation_id: 'iso-1', + workspace_path: '/tmp/jarvis/worktree-1', + parent_conversation_id: 'parent-1', + metadata: { branch: 'jarvis/test-worker' }, + }, + cost: { + input_tokens: 120, + output_tokens: 80, + total_tokens: 200, + estimated_cost: 0.00156, + budget_warning: true, + currency: 'USD', + }, + topology_node_count: 2, + active_task_count: 2, + completed_task_count: 1, + recent_events: [ + { + event_id: 'evt-1', + event_type: 'agent.cost.warning', + timestamp: '2026-04-04T10:00:00Z', + severity: 'warning', + payload: {}, + }, + ], +} + +const topologyFixture = { + conversation_id: 'conv-1', + root_agent_id: 'master', + current_agent: 'analyst-1234abcd', + nodes: [ + { + agent_id: 'master', + role: 'master', + parent_agent_id: null, + source: 'root', + task_count: 0, + completed_task_count: 0, + }, + { + agent_id: 'analyst-1234abcd', + role: 'analyst', + parent_agent_id: 'master', + source: 'spawned', + task_count: 2, + completed_task_count: 1, + }, + ], + edges: [{ parent_agent_id: 'master', child_agent_id: 'analyst-1234abcd' }], + tasks: [], + task_hierarchy: {}, +} + +const verifierFixture = { + conversation_id: 'conv-1', + status: 'passed', + summary: 'Runtime summary is available.', + evidence: [ + { task_id: 'task-1', status: 'passed' }, + ], +} + +const costFixture = { + conversation_id: 'conv-1', + total: runtimeSummaryFixture.cost, + thresholds: { + total_tokens: 300, + estimated_cost: 0.01, + }, + by_agent: [ + { + agent_id: 'analyst-1234abcd', + input_tokens: 80, + output_tokens: 70, + total_tokens: 150, + estimated_cost: 0.00129, + budget_warning: true, + }, + ], +} + +const toolGovernanceFixture = { + conversation_id: 'conv-1', + total_tools: 12, + used_tools: 2, + items: [ + { + capability_id: 'web_search', + tool_name: 'web_search', + permission_class: 'external', + side_effect_scope: 'network', + supports_retry: true, + idempotent: true, + safe_for_parallel_use: true, + requires_confirmation: false, + usage_count: 1, + last_result_preview: 'ok', + }, + { + capability_id: 'create_reminder', + tool_name: 'create_reminder', + permission_class: 'write', + side_effect_scope: 'local_state', + supports_retry: false, + idempotent: false, + safe_for_parallel_use: false, + requires_confirmation: true, + usage_count: 1, + last_result_preview: 'created', + }, + ], + upgrade_candidates: ['worktree_manager', 'cost_inspector'], +} + const skillFixtures = [ { id: 'skill-schedule-1', @@ -124,6 +266,8 @@ describe('agents page pcb command center', () => { beforeEach(() => { vi.clearAllMocks() vi.useFakeTimers() + setActivePinia(createPinia()) + useConversationStore().setCurrentConversation('conv-1') vi.stubGlobal('ResizeObserver', class { observe() {} disconnect() {} @@ -143,6 +287,11 @@ describe('agents page pcb command center', () => { })), }) mocks.getHierarchyStats.mockResolvedValue(hierarchyStats) + mocks.getRuntimeSummary.mockResolvedValue(runtimeSummaryFixture) + mocks.getVisibilityTopology.mockResolvedValue(topologyFixture) + mocks.getVisibilityVerifier.mockResolvedValue(verifierFixture) + mocks.getVisibilityCost.mockResolvedValue(costFixture) + mocks.getVisibilityTools.mockResolvedValue(toolGovernanceFixture) mocks.getConfig.mockImplementation(async (id: string) => ({ id, name: id === 'schedule_planner' ? 'SCHEDULE PLANNER' : id.toUpperCase(), @@ -161,6 +310,8 @@ describe('agents page pcb command center', () => { await Promise.resolve() await Promise.resolve() + expect(mocks.getRuntimeSummary).toHaveBeenCalledWith('conv-1') + expect(mocks.getVisibilityTopology).toHaveBeenCalledWith('conv-1') expect(wrapper.find('[data-testid="commander-skills"]').exists()).toBe(false) const plannerBus = wrapper.get('[data-testid="bus-link-schedule_planner"]') @@ -367,5 +518,49 @@ describe('agents page pcb command center', () => { expect(wrapper.get('[data-testid="linked-skills-empty"]').text()).toContain('暂无可关联技能') }) + + it('renders runtime summary from the active conversation', async () => { + const wrapper = mount(AgentsPage) + await flushPromises() + await flushPromises() + + expect(mocks.getRuntimeSummary).toHaveBeenCalledWith('conv-1') + const runtimeSummary = wrapper.get('[data-testid="runtime-summary"]') + expect(runtimeSummary.text()).toContain('collaboration') + expect(runtimeSummary.text()).toContain('phase_4_visibility_and_verification') + expect(runtimeSummary.text()).toContain('visibility.runtime_summary_ready') + expect(runtimeSummary.text()).toContain('passed') + expect(runtimeSummary.text()).toContain('worktree') + expect(runtimeSummary.text()).toContain('200') + expect(runtimeSummary.text()).toContain('Cost $0.001560') + expect(runtimeSummary.text()).toContain('Tasks 1/2') + expect(runtimeSummary.text()).toContain('Nodes 2') + expect(runtimeSummary.text()).toContain('Budget warning') + expect(wrapper.text()).toContain('/tmp/jarvis/worktree-1') + }) + + it('renders operator drilldown panels for events topology verifier and tool governance', async () => { + const wrapper = mount(AgentsPage) + await flushPromises() + await flushPromises() + + expect(wrapper.get('[data-testid="runtime-events-panel"]').text()).toContain('agent.cost.warning') + expect(wrapper.get('[data-testid="runtime-drilldown-panel"]').text()).toContain('analyst-1234abcd') + expect(wrapper.get('[data-testid="runtime-drilldown-panel"]').text()).toContain('task-1') + expect(wrapper.get('[data-testid="runtime-governance-panel"]').text()).toContain('web_search') + expect(wrapper.get('[data-testid="runtime-governance-panel"]').text()).toContain('worktree_manager') + expect(wrapper.get('[data-testid="runtime-governance-panel"]').text()).toContain('Thresholds: 300 tk / $0.010000') + }) + + it('shows a prompt when no conversation is selected', async () => { + useConversationStore().setCurrentConversation(null) + const wrapper = mount(AgentsPage) + await flushPromises() + await flushPromises() + + expect(mocks.getRuntimeSummary).not.toHaveBeenCalled() + expect(mocks.getVisibilityTopology).not.toHaveBeenCalled() + expect(wrapper.get('[data-testid="runtime-summary"]').text()).toContain('请选择一条会话以查看运行时摘要') + }) }) diff --git a/frontend/src/pages/agents/composables/useAgentsPage.ts b/frontend/src/pages/agents/composables/useAgentsPage.ts index 98c7d89..a445d9b 100644 --- a/frontend/src/pages/agents/composables/useAgentsPage.ts +++ b/frontend/src/pages/agents/composables/useAgentsPage.ts @@ -1,10 +1,23 @@ import { computed, onMounted, onUnmounted, reactive, ref } from 'vue' +import { storeToRefs } from 'pinia' import { COMMANDER_SKILLS, DEFAULT_AGENTS, MAIN_AGENT_ORDER, RELATION_LABELS, SUB_COMMANDERS } from '@/data/agents' import type { Agent, CommanderSkill, MainAgentId, SubCommander } from '@/data/agents' -import { agentApi, type AgentHierarchyStats, type AgentStats } from '@/api/agent' +import { + agentApi, + type AgentHierarchyStats, + type AgentStats, + type AgentVisibilityCostSummary, + type AgentVisibilityRuntimeSummary, + type AgentVisibilityToolGovernance, + type AgentVisibilityTopology, + type AgentVisibilityVerifier, +} from '@/api/agent' import { skillApi, type Skill } from '@/api/skill' +import { useConversationStore } from '@/stores/conversation' export function useAgentsPage() { +const conversationStore = useConversationStore() +const { currentConversationId } = storeToRefs(conversationStore) const NODE_W = 200 const NODE_H = 170 @@ -46,6 +59,59 @@ interface AgentDraft { selectedSkillIds: string[] } +interface RuntimeSummaryCard { + executionMode: string + currentPhase: string + currentCheckpoint: string + verifierStatus: string + verifierSummary: string + verifierEvidence: Array<{ + label: string + status: string + }> + isolationMode: string + workspacePath: string | null + totalTokens: number + estimatedCost: string + budgetWarning: boolean + activeTaskCount: number + completedTaskCount: number + topologyNodeCount: number + topologyNodes: Array<{ + agentId: string + role: string + taskCount: number + completedTaskCount: number + }> + costByAgent: Array<{ + agentId: string + totalTokens: number + estimatedCost: string + budgetWarning: boolean + }> + costThresholds: { + totalTokens: number + estimatedCost: string + } + toolGovernance: Array<{ + toolName: string + permissionClass: string + sideEffectScope: string + usageCount: number + }> + toolGovernanceTotals: { + totalTools: number + usedTools: number + } + upgradeCandidates: string[] + recentEvents: Array<{ + eventId: string + eventType: string + timestamp: string + severity: string + }> +} + const mainAgents = computed(() => MAIN_AGENT_ORDER.map(id => localAgents[id])) const childAgents = SUB_COMMANDERS const relationLabels = RELATION_LABELS @@ -102,11 +168,41 @@ const isPanning = ref(false) const panStart = reactive({ x: 0, y: 0 }) const panOrigin = reactive({ x: 0, y: 0 }) const connectionStatus = ref<'connected' | 'disconnected'>('disconnected') -const connectionLabel = computed(() => connectionStatus.value === 'connected' ? '瀹炴椂鍚屾' : '绂荤嚎妯″紡') +const connectionLabel = computed(() => connectionStatus.value === 'connected' ? '实时同步' : '离线模式') const zoomPercent = computed(() => `${Math.round(zoom.value * 100)}%`) const activeMainId = ref(null) const activeChildId = ref(null) const agentData = reactive>({}) +const runtimeSummary = ref({ + executionMode: 'direct', + currentPhase: 'phase_0_bootstrap', + currentCheckpoint: 'bootstrap.initialized', + verifierStatus: 'unknown', + verifierSummary: '暂无运行时摘要', + verifierEvidence: [], + isolationMode: 'none', + workspacePath: null, + totalTokens: 0, + estimatedCost: '$0.000000', + budgetWarning: false, + activeTaskCount: 0, + completedTaskCount: 0, + topologyNodeCount: 0, + topologyNodes: [], + costByAgent: [], + costThresholds: { + totalTokens: 0, + estimatedCost: '$0.000000', + }, + toolGovernance: [], + toolGovernanceTotals: { + totalTools: 0, + usedTools: 0, + }, + upgradeCandidates: [], + recentEvents: [], +}) +const runtimeConversationId = computed(() => currentConversationId.value) const localAgents = reactive>( Object.fromEntries([ @@ -117,7 +213,7 @@ const localAgents = reactive>( role: child.role, roleKey: child.id, description: child.description, - systemPrompt: `${child.role}锛?{child.description}`, + systemPrompt: `${child.role}: ${child.description}`, enabled: true, })), ].map(agent => [agent.id, { ...agent }])) @@ -482,7 +578,7 @@ function setRuntimeState(agentId: string, state: AgentStats) { } function applyHierarchyStats(stats: AgentHierarchyStats) { - agentData.master = { callCount: 47, currentTask: '鍗忚皟缁勭粐閾捐矾', status: 'active' } + agentData.master = { callCount: 47, currentTask: '协调协作链路', status: 'active' } let nextMain: string | null = null let nextChild: string | null = null @@ -533,14 +629,14 @@ function buildOfflineStats() { current_task: null, status: 'active', sub_commanders: [ - { agent_id: 'schedule_analysis', call_count: 4, current_task: '姊崇悊浠婃棩閲嶇偣', status: 'active' }, + { agent_id: 'schedule_analysis', call_count: 4, current_task: '梳理今日重点', status: 'active' }, { agent_id: 'schedule_planning', call_count: 9, current_task: null, status: 'idle' }, ], }, { agent_id: 'executor', call_count: 8, - current_task: '鍒涘缓鏂囨。', + current_task: '创建文档', status: 'idle', sub_commanders: [ { agent_id: 'executor_tasks', call_count: 8, current_task: null, status: 'idle' }, @@ -571,16 +667,135 @@ function buildOfflineStats() { } satisfies AgentHierarchyStats } +function applyRuntimeSummary( + summary: AgentVisibilityRuntimeSummary, + topology?: AgentVisibilityTopology, + verifier?: AgentVisibilityVerifier, + costSummary?: AgentVisibilityCostSummary, + toolGovernance?: AgentVisibilityToolGovernance, +) { + const verifierPayload = verifier || summary.verifier + const costPayload = costSummary || { + conversation_id: summary.conversation_id, + total: summary.cost, + thresholds: {}, + by_agent: [], + } + runtimeSummary.value = { + executionMode: summary.execution_mode || 'direct', + currentPhase: summary.current_phase || 'phase_0_bootstrap', + currentCheckpoint: summary.current_checkpoint || 'bootstrap.initialized', + verifierStatus: verifierPayload.status || 'unknown', + verifierSummary: verifierPayload.summary || '暂无 verifier 结论', + verifierEvidence: (verifierPayload.evidence || []).slice(0, 4).map((entry, index) => ({ + label: String(entry.task_id || entry.event_type || `evidence-${index}`), + status: String(entry.status || entry.summary || 'available'), + })), + isolationMode: summary.isolation.mode || 'none', + workspacePath: summary.isolation.workspace_path || null, + totalTokens: summary.cost.total_tokens, + estimatedCost: `$${(summary.cost.estimated_cost || 0).toFixed(6)}`, + budgetWarning: Boolean(summary.cost.budget_warning), + activeTaskCount: summary.active_task_count, + completedTaskCount: summary.completed_task_count, + topologyNodeCount: summary.topology_node_count, + topologyNodes: (topology?.nodes || []).slice(0, 5).map(node => ({ + agentId: node.agent_id, + role: node.role || 'unknown', + taskCount: node.task_count, + completedTaskCount: node.completed_task_count, + })), + costByAgent: (costPayload.by_agent || []).slice(0, 4).map(item => ({ + agentId: item.agent_id, + totalTokens: item.total_tokens, + estimatedCost: `$${(item.estimated_cost || 0).toFixed(6)}`, + budgetWarning: Boolean(item.budget_warning), + })), + costThresholds: { + totalTokens: Number(costPayload.thresholds.total_tokens || 0), + estimatedCost: `$${Number(costPayload.thresholds.estimated_cost || 0).toFixed(6)}`, + }, + toolGovernance: (toolGovernance?.items || []).slice(0, 6).map(item => ({ + toolName: item.tool_name, + permissionClass: item.permission_class, + sideEffectScope: item.side_effect_scope, + usageCount: item.usage_count, + })), + toolGovernanceTotals: { + totalTools: Number(toolGovernance?.total_tools || 0), + usedTools: Number(toolGovernance?.used_tools || 0), + }, + upgradeCandidates: [...(toolGovernance?.upgrade_candidates || [])], + recentEvents: summary.recent_events.map(event => ({ + eventId: event.event_id, + eventType: event.event_type, + timestamp: event.timestamp, + severity: event.severity, + })), + } +} + async function refreshStats() { loading.value = true try { const stats = await agentApi.getHierarchyStats() applyHierarchyStats(stats) + if (runtimeConversationId.value) { + try { + const [summary, topology, verifier, costSummary, tools] = await Promise.all([ + agentApi.getRuntimeSummary(runtimeConversationId.value), + agentApi.getVisibilityTopology(runtimeConversationId.value), + agentApi.getVisibilityVerifier(runtimeConversationId.value), + agentApi.getVisibilityCost(runtimeConversationId.value), + agentApi.getVisibilityTools(runtimeConversationId.value), + ]) + applyRuntimeSummary(summary, topology, verifier, costSummary, tools) + } catch { + runtimeSummary.value = { + ...runtimeSummary.value, + verifierSummary: '运行时摘要暂不可用', + } + } + } else { + runtimeSummary.value = { + ...runtimeSummary.value, + verifierSummary: '请选择一条会话以查看运行时摘要', + } + } connectionStatus.value = 'connected' stopDemoRouteCycle() } catch { connectionStatus.value = 'disconnected' applyHierarchyStats(buildOfflineStats()) + runtimeSummary.value = { + executionMode: 'direct', + currentPhase: 'offline_demo', + currentCheckpoint: 'offline.demo_mode', + verifierStatus: 'offline', + verifierSummary: '当前使用离线演示数据', + verifierEvidence: [], + isolationMode: 'none', + workspacePath: null, + totalTokens: 0, + estimatedCost: '$0.000000', + budgetWarning: false, + activeTaskCount: 0, + completedTaskCount: 0, + topologyNodeCount: 0, + topologyNodes: [], + costByAgent: [], + costThresholds: { + totalTokens: 0, + estimatedCost: '$0.000000', + }, + toolGovernance: [], + toolGovernanceTotals: { + totalTools: 0, + usedTools: 0, + }, + upgradeCandidates: [], + recentEvents: [], + } startDemoRouteCycle() } finally { loading.value = false @@ -710,6 +925,7 @@ onUnmounted(() => { selectedNodePackages, selectedNodeSkills, agentData, + runtimeSummary, localAgents, viewportStyle, stageStyle, diff --git a/frontend/src/pages/agents/index.vue b/frontend/src/pages/agents/index.vue index 8a7892b..da0f718 100644 --- a/frontend/src/pages/agents/index.vue +++ b/frontend/src/pages/agents/index.vue @@ -38,6 +38,146 @@
{{ activeMainRouteLabel }}
{{ activeChildRouteLabel }}
+
+
RUNTIME SUMMARY
+
+
+ MODE + {{ runtimeSummary.executionMode }} +
+
+ PHASE + {{ runtimeSummary.currentPhase }} +
+
+ CHECKPOINT + {{ runtimeSummary.currentCheckpoint }} +
+
+ VERIFIER + {{ runtimeSummary.verifierStatus }} +
+
+ ISOLATION + {{ runtimeSummary.isolationMode }} +
+
+ TOKENS + {{ runtimeSummary.totalTokens }} +
+
+
{{ runtimeSummary.verifierSummary }}
+
+ Cost {{ runtimeSummary.estimatedCost }} + Tasks {{ runtimeSummary.completedTaskCount }}/{{ runtimeSummary.activeTaskCount }} + Nodes {{ runtimeSummary.topologyNodeCount }} + Budget warning +
+
{{ runtimeSummary.workspacePath }}
+
+ +
+
RECENT EVENTS
+
+
+
+ {{ event.eventType }} + {{ event.severity }} +
+
{{ event.timestamp }}
+
+
+
暂无 recent events。
+
+ +
+
TOPOLOGY & VERIFIER
+
+
Topology
+
+
+
+ {{ node.agentId }} + {{ node.role }} +
+
Tasks {{ node.completedTaskCount }}/{{ node.taskCount }}
+
+
+
暂无 topology 详情。
+
+
+
Verifier Evidence
+
+
+
+ {{ entry.label }} + {{ entry.status }} +
+
+
+
暂无 verifier evidence。
+
+
+ +
+
COST & TOOLS
+
+
Cost By Agent
+
+
+
+ {{ item.agentId }} + {{ item.totalTokens }} tk +
+
+ {{ item.estimatedCost }} + warning +
+
+
+
暂无 cost breakdown。
+
+ Thresholds: {{ runtimeSummary.costThresholds.totalTokens }} tk / {{ runtimeSummary.costThresholds.estimatedCost }} +
+
+
+
Tool Governance
+
Used {{ runtimeSummary.toolGovernanceTotals.usedTools }}/{{ runtimeSummary.toolGovernanceTotals.totalTools }}
+
+
+
+ {{ tool.toolName }} + {{ tool.usageCount }}x +
+
{{ tool.permissionClass }} · {{ tool.sideEffectScope }}
+
+
+
暂无 tool governance 数据。
+
+ {{ candidate }} +
+
+
@@ -104,7 +244,7 @@
{{ getAgentDesc('master') }}
- +
@@ -293,11 +433,11 @@ @@ -360,6 +500,7 @@ const { selectedNodePackages, selectedNodeSkills, agentData, + runtimeSummary, localAgents, viewportStyle, stageStyle,