feat: 增强 Agent 意图识别和上下文管理

- 新增 intent_router.py 意图路由模块 - 优化 context.py 上下文管理 - 增强 loop.py Agent 运行循环 - 更新 memory.py 记忆模块 - 修复 builtin.py 工具函数 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-15 21:44:00 +08:00
parent d72c6a3f25
commit 0e0f988264
5 changed files with 493 additions and 8 deletions
--- a/core/agents/agent/context.py
+++ b/core/agents/agent/context.py
@@ -36,6 +36,22 @@ Your workspace is at: {workspace_path}
 - Be helpful and concise
 - Think step by step when needed
 - Ask for clarification when the request is ambiguous
 ## Tool Usage Guidelines
 **IMPORTANT**: Only use tools when explicitly requested by the user:
 **Use tools for**:
 - Searching the web for current information
 - Executing code or commands
 - Reading or writing files
 - Performing calculations
 **DO NOT use tools for**:
 - Simple questions and greetings (e.g., "介绍一下武汉", "你好", "什么是AI")
 - General knowledge that you already know
 - Conversational responses
 For simple informational questions, respond directly from your knowledge without calling any tools.
 """
    def build_messages(
--- a/core/agents/agent/intent_router.py
+++ b/core/agents/agent/intent_router.py
@@ -0,0 +1,278 @@
 """Intent recognition system for routing user requests."""
 import json
 import logging
 from enum import Enum
 from typing import Any
 logger = logging.getLogger(__name__)
 class IntentType(Enum):
    """Types of user intents."""
    SIMPLE = "simple"  # Simple Q&A, no tools needed
    TOOL = "tool"  # Needs tools (search, code, files, etc.)
    SKILL = "skill"  # Needs specific domain skill
    TEAM = "team"  # Needs multi-agent collaboration
    UNKNOWN = "unknown"  # Cannot determine
 # Intent recognition prompt template
 INTENT_PROMPT = """Analyze the user's message and classify their intent.
 Intent Types:
 - simple: General knowledge questions, greetings, casual conversation, simple Q&A
  Examples: "你好", "介绍一下武汉", "什么是AI", "今天天气怎么样"
 - tool: Requires external tools - web search, code execution, file operations, calculations
  Examples: "搜索最新的AI新闻", "帮我运行这段代码", "读取文件内容", "计算这个表达式"
 - skill: Requires specific domain skill (coding, design, analysis, etc.)
  Examples: "用Python写一个排序算法", "分析这段代码的性能", "创建一个网页"
 - team: Requires multiple agents working together
  Examples: "让设计agent和开发agent一起完成这个任务", "创建一个团队来完成这个项目"
 Guidelines:
 - For greetings and simple questions, prefer "simple"
 - Only use "tool" when user explicitly asks for search, execution, or file operations
 - "introduce Wuhan" in Chinese is general knowledge - prefer "simple" unless user specifically asks for latest/current information
 - If ambiguous, prefer "simple" to avoid unnecessary tool calls
 User message: {message}
 Respond with only the intent type (simple/tool/skill/team), no explanation:"""
 class IntentRecognizer:
    """Recognizes user intent to route requests appropriately."""
    def __init__(self, llm_provider=None):
        """Initialize intent recognizer.
        Args:
            llm_provider: LLM provider for intent recognition
        """
        self._llm_provider = llm_provider
        self._cache = {}  # Simple cache for recent intents
    def recognize(
        self,
        message: str,
        available_tools: list[str] | None = None,
        available_skills: list[str] | None = None,
    ) -> IntentType:
        """Recognize user intent.
        Args:
            message: User message
            available_tools: List of available tool names
            available_skills: List of available skill names
        Returns:
            Recognized intent type
        """
        # Simple heuristics for common cases (fast path)
        intent = self._heuristic_recognition(message)
        if intent != IntentType.UNKNOWN:
            logger.info(f"Intent recognized (heuristic): {intent.value} for message: {message[:50]}...")
            return intent
        # Use LLM for complex cases
        if self._llm_provider:
            return self._llm_recognition(message)
        # Default to simple if no LLM
        return IntentType.SIMPLE
    def _heuristic_recognition(self, message: str) -> IntentType:
        """Fast heuristic-based intent recognition.
        Args:
            message: User message
        Returns:
            Recognized intent or UNKNOWN
        """
        if not message:
            return IntentType.UNKNOWN
        message_lower = message.lower().strip()
        # Greetings
        greetings = ["你好", "hello", "hi", "嗨", "您好", "hey"]
        if any(g in message_lower for g in greetings) and len(message_lower) < 20:
            return IntentType.SIMPLE
        # Simple questions patterns
        simple_patterns = [
            "什么是", "什么叫", "什么是",
            "介绍一下", "请介绍",
            "解释一下", "解释",
            "怎么样", "好不好",
            "是什么意思",
            "who are", "what is", "what's",
            "tell me about",
        ]
        # Check for simple patterns that don't require tools
        for pattern in simple_patterns:
            if pattern in message_lower:
                # But exclude if explicitly asking for current/latest/real-time
                if any(kw in message_lower for kw in ["最新", "现在", "current", "latest", "实时"]):
                    return IntentType.UNKNOWN  # Might need web search
                return IntentType.SIMPLE
        # Explicit tool request patterns
        tool_patterns = [
            "搜索", "查找", "search",
            "执行", "运行", "run",
            "计算", "calculate",
            "帮我写代码", "write code",
            "读取", "读取", "read file",
            "创建文件", "write file",
        ]
        for pattern in tool_patterns:
            if pattern in message_lower:
                return IntentType.TOOL
        # Skill patterns
        skill_patterns = [
            "用python", "用java", "用js",
            "写一个算法", "实现",
            "创建一个", "开发",
            "分析", "优化",
        ]
        for pattern in skill_patterns:
            if pattern in message_lower:
                return IntentType.SKILL
        # Team patterns
        team_patterns = [
            "团队", "协作", "多个agent",
            "team", "collaborate", "一起",
        ]
        for pattern in team_patterns:
            if pattern in message_lower:
                return IntentType.TEAM
        return IntentType.UNKNOWN
    def _llm_recognition(self, message: str) -> IntentType:
        """LLM-based intent recognition.
        Args:
            message: User message
        Returns:
            Recognized intent type
        """
        try:
            prompt = INTENT_PROMPT.format(message=message)
            # Use the LLM to classify intent
            response = self._llm_provider.chat(
                messages=[{"role": "user", "content": prompt}],
                max_tokens=50,
            )
            content = response.content.strip().lower()
            # Parse the response
            if "simple" in content:
                return IntentType.SIMPLE
            elif "tool" in content:
                return IntentType.TOOL
            elif "skill" in content:
                return IntentType.SKILL
            elif "team" in content:
                return IntentType.TEAM
            else:
                logger.warning(f"Unexpected intent response: {content}")
                return IntentType.SIMPLE  # Default to simple
        except Exception as e:
            logger.error(f"LLM intent recognition failed: {e}")
            return IntentType.SIMPLE  # Default to simple on error
 class IntentRouter:
    """Routes requests based on recognized intent."""
    def __init__(
        self,
        intent_recognizer: IntentRecognizer | None = None,
        use_llm_recognition: bool = True,
    ):
        """Initialize intent router.
        Args:
            intent_recognizer: Intent recognizer instance
            use_llm_recognition: Whether to use LLM for complex cases
        """
        self._recognizer = intent_recognizer
        self._use_llm = use_llm_recognition
    def route(
        self,
        message: str,
        available_tools: list[str] | None = None,
        available_skills: list[str] | None = None,
    ) -> dict[str, Any]:
        """Route the user message based on intent.
        Args:
            message: User message
            available_tools: List of available tool names
            available_skills: List of available skill names
        Returns:
            Routing decision with intent type and suggested action
        """
        # Recognize intent
        intent = self._recognizer.recognize(
            message,
            available_tools,
            available_skills,
        )
        # Build routing decision
        decision = {
            "intent": intent.value,
            "action": self._get_action(intent),
            "message": message,
        }
        logger.info(f"Routed message to {intent.value}: {message[:50]}...")
        return decision
    def _get_action(self, intent: IntentType) -> str:
        """Get the action to take based on intent.
        Args:
            intent: Recognized intent type
        Returns:
            Action name
        """
        return {
            IntentType.SIMPLE: "direct_response",
            IntentType.TOOL: "execute_tools",
            IntentType.SKILL: "execute_skill",
            IntentType.TEAM: "team_collaboration",
            IntentType.UNKNOWN: "direct_response",  # Default to direct response
        }.get(intent, "direct_response")
 def create_intent_router(llm_provider=None) -> IntentRouter:
    """Create an intent router with default settings.
    Args:
        llm_provider: LLM provider for intent recognition
    Returns:
        Configured IntentRouter instance
    """
    recognizer = IntentRecognizer(llm_provider=llm_provider)
    return IntentRouter(intent_recognizer=recognizer)
--- a/core/agents/agent/loop.py
+++ b/core/agents/agent/loop.py
@@ -10,6 +10,7 @@ from typing import Any, Callable, Awaitable, AsyncGenerator
 from agents.agent.context import ContextBuilder
 from agents.agent.memory import AgentMemory
 from agents.agent.intent_router import IntentRouter, create_intent_router, IntentType
 from agents.llm import LLMProvider, LLMResponse, ProviderFactory
 from agents.tools import ToolRegistry
@@ -28,6 +29,7 @@ class AgentLoop:
        workspace: Path | None = None,
        max_iterations: int = 10,
        tools: ToolRegistry | None = None,
        enable_intent_routing: bool = True,
    ):
        """Initialize the agent loop.
@@ -37,16 +39,24 @@ class AgentLoop:
            workspace: Workspace directory for memory and configs
            max_iterations: Maximum tool call iterations
            tools: Tool registry (creates default if None)
            enable_intent_routing: Enable intent recognition and routing
        """
        self.provider = provider
        self.model = model
        self.workspace = workspace or Path.cwd()
        self.max_iterations = max_iterations
        self.tools = tools
        self.enable_intent_routing = enable_intent_routing
        self.context = ContextBuilder(self.workspace)
        self.memory = AgentMemory(self.workspace)
        # Initialize intent router
        if enable_intent_routing:
            self.intent_router = create_intent_router(llm_provider=provider)
        else:
            self.intent_router = None
    async def chat(
        self,
        message: str,
@@ -79,10 +89,43 @@ class AgentLoop:
        """
        history = history or []
        # Intent recognition and routing
        intent_decision = None
        if self.intent_router and not history:  # Only for first message in conversation
            try:
                tool_names = self.tools.tool_names if self.tools else []
                intent_decision = self.intent_router.route(
                    message=message,
                    available_tools=tool_names,
                )
                logger.info(f"Intent recognized: {intent_decision['intent']} -> {intent_decision['action']}")
                # For simple intent, respond directly without tool loop
                if intent_decision["intent"] == IntentType.SIMPLE.value:
                    # Build messages for direct response
                    messages = self.context.build_messages(
                        history=history,
                        current_message=message,
                    )
                    # Call LLM without tools
                    response = await self.provider.chat_with_retry(
                        messages=messages,
                        tools=None,  # No tools for simple requests
                        model=self.model,
                    )
                    content = self._strip_think(response.content) or "好的，让我来回答这个问题。"
                    # Save to history
                    self._save_history(session_key, messages, len(history))
                    return content
            except Exception as e:
                logger.warning(f"Intent routing failed: {e}, continuing with normal flow")
        # Load history from session if session_key is provided
        if session_key and session_key != "default":
            loaded_history = self.memory.get_history(session_key, max_messages=20)
            if loaded_history:
                # Merge any split assistant messages
                loaded_history = self._merge_history_messages(loaded_history)
                logger.info(f"Loaded {len(loaded_history)} messages from session history")
                # Merge loaded history with provided history (loaded takes precedence if empty)
                if not history:
@@ -155,10 +198,43 @@ class AgentLoop:
        """
        history = history or []
        # Intent recognition and routing
        intent_decision = None
        if self.intent_router and not history:  # Only for first message in conversation
            try:
                tool_names = self.tools.tool_names if self.tools else []
                intent_decision = self.intent_router.route(
                    message=message,
                    available_tools=tool_names,
                )
                logger.info(f"Intent recognized: {intent_decision['intent']} -> {intent_decision['action']}")
                # For simple intent, respond directly without tool loop
                if intent_decision["intent"] == IntentType.SIMPLE.value:
                    # Build messages for direct response
                    messages = self.context.build_messages(
                        history=history,
                        current_message=message,
                    )
                    # Call LLM without tools
                    response = await self.provider.chat_with_retry(
                        messages=messages,
                        tools=None,  # No tools for simple requests
                        model=self.model,
                    )
                    content = self._strip_think(response.content) or "好的，让我来回答这个问题。"
                    # Save to history
                    self._save_history(session_key, messages, len(history))
                    return content
            except Exception as e:
                logger.warning(f"Intent routing failed: {e}, continuing with normal flow")
        # Load history from session if session_key is provided
        if session_key and session_key != "default":
            loaded_history = self.memory.get_history(session_key, max_messages=20)
            if loaded_history:
                # Merge any split assistant messages
                loaded_history = self._merge_history_messages(loaded_history)
                logger.info(f"Loaded {len(loaded_history)} messages from session history")
                # Merge loaded history with provided history (loaded takes precedence if empty)
                if not history:
@@ -334,6 +410,28 @@ class AgentLoop:
        tool_defs = self.tools.get_definitions() if self.tools else []
        # Intent recognition - determine if tools are needed before first LLM call
        user_message = ""
        for msg in messages:
            if msg.get("role") == "user":
                user_message = msg.get("content", "")
                break
        # Apply intent recognition on first iteration
        if self.enable_intent_routing and self.intent_router and user_message:
            available_tools = [t.get("function", {}).get("name", "") for t in tool_defs] if tool_defs else []
            routing_decision = self.intent_router.route(
                user_message,
                available_tools=available_tools,
            )
            intent = routing_decision.get("intent", "simple")
            logger.info(f"Intent recognized: {intent} for message: {user_message[:50]}...")
            # If simple intent, don't pass tools to reduce unnecessary tool calls
            if intent == "simple":
                tool_defs = []
                logger.info("Simple intent detected - disabling tool definitions for this request")
        while iteration < self.max_iterations:
            iteration += 1
@@ -423,6 +521,28 @@ class AgentLoop:
        model = model or self.model
        tool_defs = self.tools.get_definitions() if self.tools else []
        # Intent recognition - determine if tools are needed before first LLM call
        user_message = ""
        for msg in initial_messages:
            if msg.get("role") == "user":
                user_message = msg.get("content", "")
                break
        # Apply intent recognition
        if self.enable_intent_routing and self.intent_router and user_message:
            available_tools = [t.get("function", {}).get("name", "") for t in tool_defs] if tool_defs else []
            routing_decision = self.intent_router.route(
                user_message,
                available_tools=available_tools,
            )
            intent = routing_decision.get("intent", "simple")
            logger.info(f"[stream] Intent recognized: {intent} for message: {user_message[:50]}...")
            # If simple intent, don't pass tools to reduce unnecessary tool calls
            if intent == "simple":
                tool_defs = []
                logger.info("[stream] Simple intent detected - disabling tool definitions")
        # First call to check for tool calls
        response = await provider.chat_with_retry(
            messages=initial_messages,
@@ -490,6 +610,55 @@ class AgentLoop:
            return f'{tc.name}("{val[:40]}...")' if len(val) > 40 else f'{tc.name}("{val}")'
        return ", ".join(_fmt(tc) for tc in tool_calls)
    @staticmethod
    def _merge_history_messages(messages: list[dict]) -> list[dict]:
        """Merge adjacent assistant messages that have content and tool_calls separately.
        When saving/loading history, assistant messages with both content and tool_calls
        might be split into multiple entries. This method merges them back together.
        Args:
            messages: List of message dictionaries
        Returns:
            Merged list of messages
        """
        if not messages:
            return messages
        merged = []
        i = 0
        while i < len(messages):
            current = messages[i].copy()
            # If current is an assistant message with tool_calls, check if next is
            # an assistant message with content (or vice versa)
            if current.get("role") == "assistant" and current.get("tool_calls"):
                # Look ahead for another assistant message to merge with
                j = i + 1
                while j < len(messages):
                    next_msg = messages[j]
                    if next_msg.get("role") == "assistant":
                        # Merge content
                        if next_msg.get("content") and not current.get("content"):
                            current["content"] = next_msg.get("content")
                        # Merge tool_calls (should already be in current)
                        if next_msg.get("tool_calls") and not current.get("tool_calls"):
                            current["tool_calls"] = next_msg.get("tool_calls")
                        j += 1
                    else:
                        break
                # If we merged multiple messages, skip them
                if j > i + 1:
                    logger.debug(f"Merged {j - i} assistant messages")
                i = j
            else:
                merged.append(current)
                i += 1
        return merged
    def _save_history(
        self,
        session_key: str,
@@ -510,13 +679,18 @@ class AgentLoop:
            if role == "user" and content:
                self.memory.add_to_history("user", str(content)[:1000], session_key)
            elif role == "assistant":
-                # Save assistant message content
+                # Build a combined message with content and tool_calls
                msg_data = {}
                if content:
-                    self.memory.add_to_history("assistant", str(content)[:1000], session_key)
+                    msg_data["content"] = str(content)[:1000]
                # Save tool_calls if present (needed for multi-turn tool calls)
                if m.get("tool_calls"):
-                    tool_calls_str = json.dumps(m.get("tool_calls", []))
+                    msg_data["tool_calls"] = m.get("tool_calls", [])
-                    self.memory.add_to_history("assistant", f"[tool_calls]{tool_calls_str}", session_key)
+
                # Save as a single JSON message with all data
                if msg_data:
                    msg_str = json.dumps(msg_data)
                    self.memory.add_to_history("assistant", msg_str, session_key)
            # Save tool results (needed for multi-turn conversations)
            elif role == "tool":
                tool_call_id = m.get("tool_call_id", "")
--- a/core/agents/agent/memory.py
+++ b/core/agents/agent/memory.py
@@ -537,7 +537,7 @@ class AgentMemory:
            except:
                pass
-        # Check if content contains tool_calls or tool_result markers
+        # Check if content contains tool_calls or tool_result markers, or is JSON
        # Format as Markdown (产品经理指定格式)
        entry_lines = [
            f"## 消息 {msg_count}",
@@ -553,7 +553,20 @@ class AgentMemory:
            entry_lines.append(f"工具结果: {content[len('[tool_result]'):]}")
            entry_lines.append(f"内容: ")
        else:
-            entry_lines.append(f"内容: {content}")
+            # Check if it's a JSON object (new format with content + tool_calls)
            try:
                data = json.loads(content)
                if isinstance(data, dict):
                    # New JSON format: might have content and/or tool_calls
                    if "content" in data:
                        entry_lines.append(f"内容: {data['content']}")
                    if "tool_calls" in data:
                        entry_lines.append(f"工具调用: {json.dumps(data['tool_calls'])}")
                else:
                    entry_lines.append(f"内容: {content}")
            except (json.JSONDecodeError, TypeError):
                # Not JSON, treat as regular content
                entry_lines.append(f"内容: {content}")
        entry = "\n".join(entry_lines) + "\n\n"
@@ -631,6 +644,9 @@ class AgentMemory:
                if line.startswith("工具调用:") and current_message is not None:
                    tool_calls_json = line.split(":", 1)[1].strip()
                    try:
                        # Set role if not already set
                        if not current_message.get("role"):
                            current_message["role"] = "assistant"
                        current_message["tool_calls"] = json.loads(tool_calls_json)
                    except json.JSONDecodeError:
                        pass
@@ -641,6 +657,7 @@ class AgentMemory:
                    tool_result_json = line.split(":", 1)[1].strip()
                    try:
                        tool_result = json.loads(tool_result_json)
                        current_message["role"] = "tool"  # Set role to tool
                        current_message["tool_call_id"] = tool_result.get("tool_call_id", "")
                        current_message["name"] = tool_result.get("name", "")
                        current_message["content"] = tool_result.get("content", "")
--- a/core/agents/tools/builtin.py
+++ b/core/agents/tools/builtin.py
@@ -275,7 +275,7 @@ class WebSearchTool(Tool):
    @property
    def description(self) -> str:
-        return "Search the web for information using a search engine."
+        return "Search the web for current information, real-time data, or information that is not in your training data. **Only use this when the user explicitly asks for** latest news, current events, real-time information, or specifically requests a web search. **DO NOT use for simple questions** like '介绍一下武汉', '什么是AI' - answer from your knowledge instead."
    @property
    def parameters(self) -> dict[str, Any]: