feat: 增强 Agent 意图识别和上下文管理

- 新增 intent_router.py 意图路由模块
- 优化 context.py 上下文管理
- 增强 loop.py Agent 运行循环
- 更新 memory.py 记忆模块
- 修复 builtin.py 工具函数

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-15 21:44:00 +08:00
parent d72c6a3f25
commit 0e0f988264
5 changed files with 493 additions and 8 deletions

View File

@@ -36,6 +36,22 @@ Your workspace is at: {workspace_path}
- Be helpful and concise - Be helpful and concise
- Think step by step when needed - Think step by step when needed
- Ask for clarification when the request is ambiguous - Ask for clarification when the request is ambiguous
## Tool Usage Guidelines
**IMPORTANT**: Only use tools when explicitly requested by the user:
**Use tools for**:
- Searching the web for current information
- Executing code or commands
- Reading or writing files
- Performing calculations
**DO NOT use tools for**:
- Simple questions and greetings (e.g., "介绍一下武汉", "你好", "什么是AI")
- General knowledge that you already know
- Conversational responses
For simple informational questions, respond directly from your knowledge without calling any tools.
""" """
def build_messages( def build_messages(

View File

@@ -0,0 +1,278 @@
"""Intent recognition system for routing user requests."""
import json
import logging
from enum import Enum
from typing import Any
logger = logging.getLogger(__name__)
class IntentType(Enum):
"""Types of user intents."""
SIMPLE = "simple" # Simple Q&A, no tools needed
TOOL = "tool" # Needs tools (search, code, files, etc.)
SKILL = "skill" # Needs specific domain skill
TEAM = "team" # Needs multi-agent collaboration
UNKNOWN = "unknown" # Cannot determine
# Intent recognition prompt template
INTENT_PROMPT = """Analyze the user's message and classify their intent.
Intent Types:
- simple: General knowledge questions, greetings, casual conversation, simple Q&A
Examples: "你好", "介绍一下武汉", "什么是AI", "今天天气怎么样"
- tool: Requires external tools - web search, code execution, file operations, calculations
Examples: "搜索最新的AI新闻", "帮我运行这段代码", "读取文件内容", "计算这个表达式"
- skill: Requires specific domain skill (coding, design, analysis, etc.)
Examples: "用Python写一个排序算法", "分析这段代码的性能", "创建一个网页"
- team: Requires multiple agents working together
Examples: "让设计agent和开发agent一起完成这个任务", "创建一个团队来完成这个项目"
Guidelines:
- For greetings and simple questions, prefer "simple"
- Only use "tool" when user explicitly asks for search, execution, or file operations
- "introduce Wuhan" in Chinese is general knowledge - prefer "simple" unless user specifically asks for latest/current information
- If ambiguous, prefer "simple" to avoid unnecessary tool calls
User message: {message}
Respond with only the intent type (simple/tool/skill/team), no explanation:"""
class IntentRecognizer:
"""Recognizes user intent to route requests appropriately."""
def __init__(self, llm_provider=None):
"""Initialize intent recognizer.
Args:
llm_provider: LLM provider for intent recognition
"""
self._llm_provider = llm_provider
self._cache = {} # Simple cache for recent intents
def recognize(
self,
message: str,
available_tools: list[str] | None = None,
available_skills: list[str] | None = None,
) -> IntentType:
"""Recognize user intent.
Args:
message: User message
available_tools: List of available tool names
available_skills: List of available skill names
Returns:
Recognized intent type
"""
# Simple heuristics for common cases (fast path)
intent = self._heuristic_recognition(message)
if intent != IntentType.UNKNOWN:
logger.info(f"Intent recognized (heuristic): {intent.value} for message: {message[:50]}...")
return intent
# Use LLM for complex cases
if self._llm_provider:
return self._llm_recognition(message)
# Default to simple if no LLM
return IntentType.SIMPLE
def _heuristic_recognition(self, message: str) -> IntentType:
"""Fast heuristic-based intent recognition.
Args:
message: User message
Returns:
Recognized intent or UNKNOWN
"""
if not message:
return IntentType.UNKNOWN
message_lower = message.lower().strip()
# Greetings
greetings = ["你好", "hello", "hi", "", "您好", "hey"]
if any(g in message_lower for g in greetings) and len(message_lower) < 20:
return IntentType.SIMPLE
# Simple questions patterns
simple_patterns = [
"什么是", "什么叫", "什么是",
"介绍一下", "请介绍",
"解释一下", "解释",
"怎么样", "好不好",
"是什么意思",
"who are", "what is", "what's",
"tell me about",
]
# Check for simple patterns that don't require tools
for pattern in simple_patterns:
if pattern in message_lower:
# But exclude if explicitly asking for current/latest/real-time
if any(kw in message_lower for kw in ["最新", "现在", "current", "latest", "实时"]):
return IntentType.UNKNOWN # Might need web search
return IntentType.SIMPLE
# Explicit tool request patterns
tool_patterns = [
"搜索", "查找", "search",
"执行", "运行", "run",
"计算", "calculate",
"帮我写代码", "write code",
"读取", "读取", "read file",
"创建文件", "write file",
]
for pattern in tool_patterns:
if pattern in message_lower:
return IntentType.TOOL
# Skill patterns
skill_patterns = [
"用python", "用java", "用js",
"写一个算法", "实现",
"创建一个", "开发",
"分析", "优化",
]
for pattern in skill_patterns:
if pattern in message_lower:
return IntentType.SKILL
# Team patterns
team_patterns = [
"团队", "协作", "多个agent",
"team", "collaborate", "一起",
]
for pattern in team_patterns:
if pattern in message_lower:
return IntentType.TEAM
return IntentType.UNKNOWN
def _llm_recognition(self, message: str) -> IntentType:
"""LLM-based intent recognition.
Args:
message: User message
Returns:
Recognized intent type
"""
try:
prompt = INTENT_PROMPT.format(message=message)
# Use the LLM to classify intent
response = self._llm_provider.chat(
messages=[{"role": "user", "content": prompt}],
max_tokens=50,
)
content = response.content.strip().lower()
# Parse the response
if "simple" in content:
return IntentType.SIMPLE
elif "tool" in content:
return IntentType.TOOL
elif "skill" in content:
return IntentType.SKILL
elif "team" in content:
return IntentType.TEAM
else:
logger.warning(f"Unexpected intent response: {content}")
return IntentType.SIMPLE # Default to simple
except Exception as e:
logger.error(f"LLM intent recognition failed: {e}")
return IntentType.SIMPLE # Default to simple on error
class IntentRouter:
"""Routes requests based on recognized intent."""
def __init__(
self,
intent_recognizer: IntentRecognizer | None = None,
use_llm_recognition: bool = True,
):
"""Initialize intent router.
Args:
intent_recognizer: Intent recognizer instance
use_llm_recognition: Whether to use LLM for complex cases
"""
self._recognizer = intent_recognizer
self._use_llm = use_llm_recognition
def route(
self,
message: str,
available_tools: list[str] | None = None,
available_skills: list[str] | None = None,
) -> dict[str, Any]:
"""Route the user message based on intent.
Args:
message: User message
available_tools: List of available tool names
available_skills: List of available skill names
Returns:
Routing decision with intent type and suggested action
"""
# Recognize intent
intent = self._recognizer.recognize(
message,
available_tools,
available_skills,
)
# Build routing decision
decision = {
"intent": intent.value,
"action": self._get_action(intent),
"message": message,
}
logger.info(f"Routed message to {intent.value}: {message[:50]}...")
return decision
def _get_action(self, intent: IntentType) -> str:
"""Get the action to take based on intent.
Args:
intent: Recognized intent type
Returns:
Action name
"""
return {
IntentType.SIMPLE: "direct_response",
IntentType.TOOL: "execute_tools",
IntentType.SKILL: "execute_skill",
IntentType.TEAM: "team_collaboration",
IntentType.UNKNOWN: "direct_response", # Default to direct response
}.get(intent, "direct_response")
def create_intent_router(llm_provider=None) -> IntentRouter:
"""Create an intent router with default settings.
Args:
llm_provider: LLM provider for intent recognition
Returns:
Configured IntentRouter instance
"""
recognizer = IntentRecognizer(llm_provider=llm_provider)
return IntentRouter(intent_recognizer=recognizer)

View File

@@ -10,6 +10,7 @@ from typing import Any, Callable, Awaitable, AsyncGenerator
from agents.agent.context import ContextBuilder from agents.agent.context import ContextBuilder
from agents.agent.memory import AgentMemory from agents.agent.memory import AgentMemory
from agents.agent.intent_router import IntentRouter, create_intent_router, IntentType
from agents.llm import LLMProvider, LLMResponse, ProviderFactory from agents.llm import LLMProvider, LLMResponse, ProviderFactory
from agents.tools import ToolRegistry from agents.tools import ToolRegistry
@@ -28,6 +29,7 @@ class AgentLoop:
workspace: Path | None = None, workspace: Path | None = None,
max_iterations: int = 10, max_iterations: int = 10,
tools: ToolRegistry | None = None, tools: ToolRegistry | None = None,
enable_intent_routing: bool = True,
): ):
"""Initialize the agent loop. """Initialize the agent loop.
@@ -37,16 +39,24 @@ class AgentLoop:
workspace: Workspace directory for memory and configs workspace: Workspace directory for memory and configs
max_iterations: Maximum tool call iterations max_iterations: Maximum tool call iterations
tools: Tool registry (creates default if None) tools: Tool registry (creates default if None)
enable_intent_routing: Enable intent recognition and routing
""" """
self.provider = provider self.provider = provider
self.model = model self.model = model
self.workspace = workspace or Path.cwd() self.workspace = workspace or Path.cwd()
self.max_iterations = max_iterations self.max_iterations = max_iterations
self.tools = tools self.tools = tools
self.enable_intent_routing = enable_intent_routing
self.context = ContextBuilder(self.workspace) self.context = ContextBuilder(self.workspace)
self.memory = AgentMemory(self.workspace) self.memory = AgentMemory(self.workspace)
# Initialize intent router
if enable_intent_routing:
self.intent_router = create_intent_router(llm_provider=provider)
else:
self.intent_router = None
async def chat( async def chat(
self, self,
message: str, message: str,
@@ -79,10 +89,43 @@ class AgentLoop:
""" """
history = history or [] history = history or []
# Intent recognition and routing
intent_decision = None
if self.intent_router and not history: # Only for first message in conversation
try:
tool_names = self.tools.tool_names if self.tools else []
intent_decision = self.intent_router.route(
message=message,
available_tools=tool_names,
)
logger.info(f"Intent recognized: {intent_decision['intent']} -> {intent_decision['action']}")
# For simple intent, respond directly without tool loop
if intent_decision["intent"] == IntentType.SIMPLE.value:
# Build messages for direct response
messages = self.context.build_messages(
history=history,
current_message=message,
)
# Call LLM without tools
response = await self.provider.chat_with_retry(
messages=messages,
tools=None, # No tools for simple requests
model=self.model,
)
content = self._strip_think(response.content) or "好的,让我来回答这个问题。"
# Save to history
self._save_history(session_key, messages, len(history))
return content
except Exception as e:
logger.warning(f"Intent routing failed: {e}, continuing with normal flow")
# Load history from session if session_key is provided # Load history from session if session_key is provided
if session_key and session_key != "default": if session_key and session_key != "default":
loaded_history = self.memory.get_history(session_key, max_messages=20) loaded_history = self.memory.get_history(session_key, max_messages=20)
if loaded_history: if loaded_history:
# Merge any split assistant messages
loaded_history = self._merge_history_messages(loaded_history)
logger.info(f"Loaded {len(loaded_history)} messages from session history") logger.info(f"Loaded {len(loaded_history)} messages from session history")
# Merge loaded history with provided history (loaded takes precedence if empty) # Merge loaded history with provided history (loaded takes precedence if empty)
if not history: if not history:
@@ -155,10 +198,43 @@ class AgentLoop:
""" """
history = history or [] history = history or []
# Intent recognition and routing
intent_decision = None
if self.intent_router and not history: # Only for first message in conversation
try:
tool_names = self.tools.tool_names if self.tools else []
intent_decision = self.intent_router.route(
message=message,
available_tools=tool_names,
)
logger.info(f"Intent recognized: {intent_decision['intent']} -> {intent_decision['action']}")
# For simple intent, respond directly without tool loop
if intent_decision["intent"] == IntentType.SIMPLE.value:
# Build messages for direct response
messages = self.context.build_messages(
history=history,
current_message=message,
)
# Call LLM without tools
response = await self.provider.chat_with_retry(
messages=messages,
tools=None, # No tools for simple requests
model=self.model,
)
content = self._strip_think(response.content) or "好的,让我来回答这个问题。"
# Save to history
self._save_history(session_key, messages, len(history))
return content
except Exception as e:
logger.warning(f"Intent routing failed: {e}, continuing with normal flow")
# Load history from session if session_key is provided # Load history from session if session_key is provided
if session_key and session_key != "default": if session_key and session_key != "default":
loaded_history = self.memory.get_history(session_key, max_messages=20) loaded_history = self.memory.get_history(session_key, max_messages=20)
if loaded_history: if loaded_history:
# Merge any split assistant messages
loaded_history = self._merge_history_messages(loaded_history)
logger.info(f"Loaded {len(loaded_history)} messages from session history") logger.info(f"Loaded {len(loaded_history)} messages from session history")
# Merge loaded history with provided history (loaded takes precedence if empty) # Merge loaded history with provided history (loaded takes precedence if empty)
if not history: if not history:
@@ -334,6 +410,28 @@ class AgentLoop:
tool_defs = self.tools.get_definitions() if self.tools else [] tool_defs = self.tools.get_definitions() if self.tools else []
# Intent recognition - determine if tools are needed before first LLM call
user_message = ""
for msg in messages:
if msg.get("role") == "user":
user_message = msg.get("content", "")
break
# Apply intent recognition on first iteration
if self.enable_intent_routing and self.intent_router and user_message:
available_tools = [t.get("function", {}).get("name", "") for t in tool_defs] if tool_defs else []
routing_decision = self.intent_router.route(
user_message,
available_tools=available_tools,
)
intent = routing_decision.get("intent", "simple")
logger.info(f"Intent recognized: {intent} for message: {user_message[:50]}...")
# If simple intent, don't pass tools to reduce unnecessary tool calls
if intent == "simple":
tool_defs = []
logger.info("Simple intent detected - disabling tool definitions for this request")
while iteration < self.max_iterations: while iteration < self.max_iterations:
iteration += 1 iteration += 1
@@ -423,6 +521,28 @@ class AgentLoop:
model = model or self.model model = model or self.model
tool_defs = self.tools.get_definitions() if self.tools else [] tool_defs = self.tools.get_definitions() if self.tools else []
# Intent recognition - determine if tools are needed before first LLM call
user_message = ""
for msg in initial_messages:
if msg.get("role") == "user":
user_message = msg.get("content", "")
break
# Apply intent recognition
if self.enable_intent_routing and self.intent_router and user_message:
available_tools = [t.get("function", {}).get("name", "") for t in tool_defs] if tool_defs else []
routing_decision = self.intent_router.route(
user_message,
available_tools=available_tools,
)
intent = routing_decision.get("intent", "simple")
logger.info(f"[stream] Intent recognized: {intent} for message: {user_message[:50]}...")
# If simple intent, don't pass tools to reduce unnecessary tool calls
if intent == "simple":
tool_defs = []
logger.info("[stream] Simple intent detected - disabling tool definitions")
# First call to check for tool calls # First call to check for tool calls
response = await provider.chat_with_retry( response = await provider.chat_with_retry(
messages=initial_messages, messages=initial_messages,
@@ -490,6 +610,55 @@ class AgentLoop:
return f'{tc.name}("{val[:40]}...")' if len(val) > 40 else f'{tc.name}("{val}")' return f'{tc.name}("{val[:40]}...")' if len(val) > 40 else f'{tc.name}("{val}")'
return ", ".join(_fmt(tc) for tc in tool_calls) return ", ".join(_fmt(tc) for tc in tool_calls)
@staticmethod
def _merge_history_messages(messages: list[dict]) -> list[dict]:
"""Merge adjacent assistant messages that have content and tool_calls separately.
When saving/loading history, assistant messages with both content and tool_calls
might be split into multiple entries. This method merges them back together.
Args:
messages: List of message dictionaries
Returns:
Merged list of messages
"""
if not messages:
return messages
merged = []
i = 0
while i < len(messages):
current = messages[i].copy()
# If current is an assistant message with tool_calls, check if next is
# an assistant message with content (or vice versa)
if current.get("role") == "assistant" and current.get("tool_calls"):
# Look ahead for another assistant message to merge with
j = i + 1
while j < len(messages):
next_msg = messages[j]
if next_msg.get("role") == "assistant":
# Merge content
if next_msg.get("content") and not current.get("content"):
current["content"] = next_msg.get("content")
# Merge tool_calls (should already be in current)
if next_msg.get("tool_calls") and not current.get("tool_calls"):
current["tool_calls"] = next_msg.get("tool_calls")
j += 1
else:
break
# If we merged multiple messages, skip them
if j > i + 1:
logger.debug(f"Merged {j - i} assistant messages")
i = j
else:
merged.append(current)
i += 1
return merged
def _save_history( def _save_history(
self, self,
session_key: str, session_key: str,
@@ -510,13 +679,18 @@ class AgentLoop:
if role == "user" and content: if role == "user" and content:
self.memory.add_to_history("user", str(content)[:1000], session_key) self.memory.add_to_history("user", str(content)[:1000], session_key)
elif role == "assistant": elif role == "assistant":
# Save assistant message content # Build a combined message with content and tool_calls
msg_data = {}
if content: if content:
self.memory.add_to_history("assistant", str(content)[:1000], session_key) msg_data["content"] = str(content)[:1000]
# Save tool_calls if present (needed for multi-turn tool calls)
if m.get("tool_calls"): if m.get("tool_calls"):
tool_calls_str = json.dumps(m.get("tool_calls", [])) msg_data["tool_calls"] = m.get("tool_calls", [])
self.memory.add_to_history("assistant", f"[tool_calls]{tool_calls_str}", session_key)
# Save as a single JSON message with all data
if msg_data:
msg_str = json.dumps(msg_data)
self.memory.add_to_history("assistant", msg_str, session_key)
# Save tool results (needed for multi-turn conversations) # Save tool results (needed for multi-turn conversations)
elif role == "tool": elif role == "tool":
tool_call_id = m.get("tool_call_id", "") tool_call_id = m.get("tool_call_id", "")

View File

@@ -537,7 +537,7 @@ class AgentMemory:
except: except:
pass pass
# Check if content contains tool_calls or tool_result markers # Check if content contains tool_calls or tool_result markers, or is JSON
# Format as Markdown (产品经理指定格式) # Format as Markdown (产品经理指定格式)
entry_lines = [ entry_lines = [
f"## 消息 {msg_count}", f"## 消息 {msg_count}",
@@ -553,7 +553,20 @@ class AgentMemory:
entry_lines.append(f"工具结果: {content[len('[tool_result]'):]}") entry_lines.append(f"工具结果: {content[len('[tool_result]'):]}")
entry_lines.append(f"内容: ") entry_lines.append(f"内容: ")
else: else:
entry_lines.append(f"内容: {content}") # Check if it's a JSON object (new format with content + tool_calls)
try:
data = json.loads(content)
if isinstance(data, dict):
# New JSON format: might have content and/or tool_calls
if "content" in data:
entry_lines.append(f"内容: {data['content']}")
if "tool_calls" in data:
entry_lines.append(f"工具调用: {json.dumps(data['tool_calls'])}")
else:
entry_lines.append(f"内容: {content}")
except (json.JSONDecodeError, TypeError):
# Not JSON, treat as regular content
entry_lines.append(f"内容: {content}")
entry = "\n".join(entry_lines) + "\n\n" entry = "\n".join(entry_lines) + "\n\n"
@@ -631,6 +644,9 @@ class AgentMemory:
if line.startswith("工具调用:") and current_message is not None: if line.startswith("工具调用:") and current_message is not None:
tool_calls_json = line.split(":", 1)[1].strip() tool_calls_json = line.split(":", 1)[1].strip()
try: try:
# Set role if not already set
if not current_message.get("role"):
current_message["role"] = "assistant"
current_message["tool_calls"] = json.loads(tool_calls_json) current_message["tool_calls"] = json.loads(tool_calls_json)
except json.JSONDecodeError: except json.JSONDecodeError:
pass pass
@@ -641,6 +657,7 @@ class AgentMemory:
tool_result_json = line.split(":", 1)[1].strip() tool_result_json = line.split(":", 1)[1].strip()
try: try:
tool_result = json.loads(tool_result_json) tool_result = json.loads(tool_result_json)
current_message["role"] = "tool" # Set role to tool
current_message["tool_call_id"] = tool_result.get("tool_call_id", "") current_message["tool_call_id"] = tool_result.get("tool_call_id", "")
current_message["name"] = tool_result.get("name", "") current_message["name"] = tool_result.get("name", "")
current_message["content"] = tool_result.get("content", "") current_message["content"] = tool_result.get("content", "")

View File

@@ -275,7 +275,7 @@ class WebSearchTool(Tool):
@property @property
def description(self) -> str: def description(self) -> str:
return "Search the web for information using a search engine." return "Search the web for current information, real-time data, or information that is not in your training data. **Only use this when the user explicitly asks for** latest news, current events, real-time information, or specifically requests a web search. **DO NOT use for simple questions** like '介绍一下武汉', '什么是AI' - answer from your knowledge instead."
@property @property
def parameters(self) -> dict[str, Any]: def parameters(self) -> dict[str, Any]: