feat: 新增 agent/app/core 目录
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
233
agent/app/core/tools/impl/web.py
Normal file
233
agent/app/core/tools/impl/web.py
Normal file
@@ -0,0 +1,233 @@
|
||||
"""
|
||||
网页获取工具
|
||||
提供安全的网页内容抓取功能
|
||||
"""
|
||||
import httpx
|
||||
from typing import Dict, Any, Optional
|
||||
|
||||
|
||||
class WebToolConfig:
|
||||
"""网页工具配置"""
|
||||
REQUEST_TIMEOUT = 30 # 请求超时(秒)
|
||||
MAX_RESPONSE_SIZE = 2 * 1024 * 1024 # 最大响应大小(2MB)
|
||||
MAX_REDIRECTS = 5 # 最大重定向次数
|
||||
ALLOWED_PROTOCOLS = ["http", "https"] # 允许的协议
|
||||
|
||||
|
||||
async def web_fetch(
|
||||
url: str,
|
||||
method: str = "GET",
|
||||
params: Optional[Dict[str, Any]] = None,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
body: Optional[str] = None,
|
||||
timeout: Optional[int] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
获取网页内容
|
||||
|
||||
Args:
|
||||
url: 目标URL
|
||||
method: HTTP方法
|
||||
params: 查询参数
|
||||
headers: 请求头
|
||||
body: 请求体
|
||||
timeout: 超时时间
|
||||
|
||||
Returns:
|
||||
网页内容
|
||||
"""
|
||||
timeout = timeout or WebToolConfig.REQUEST_TIMEOUT
|
||||
|
||||
# 安全检查:协议
|
||||
if not url.startswith(("http://", "https://")):
|
||||
return {
|
||||
"success": False,
|
||||
"error": "Only HTTP and HTTPS protocols are allowed"
|
||||
}
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(
|
||||
timeout=timeout,
|
||||
max_redirects=WebToolConfig.MAX_REDIRECTS,
|
||||
follow_redirects=True,
|
||||
) as client:
|
||||
# 发送请求
|
||||
response = await client.request(
|
||||
method=method,
|
||||
url=url,
|
||||
params=params,
|
||||
headers=headers,
|
||||
content=body,
|
||||
)
|
||||
|
||||
# 检查响应大小
|
||||
if len(response.content) > WebToolConfig.MAX_RESPONSE_SIZE:
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"Response too large: {len(response.content)} bytes (max {WebToolConfig.MAX_RESPONSE_SIZE})"
|
||||
}
|
||||
|
||||
# 尝试解析JSON
|
||||
content_type = response.headers.get("content-type", "")
|
||||
if "application/json" in content_type:
|
||||
try:
|
||||
data = response.json()
|
||||
return {
|
||||
"success": True,
|
||||
"url": str(response.url),
|
||||
"status_code": response.status_code,
|
||||
"content_type": content_type,
|
||||
"data": data,
|
||||
"headers": dict(response.headers)
|
||||
}
|
||||
except:
|
||||
pass
|
||||
|
||||
# 返回文本
|
||||
return {
|
||||
"success": True,
|
||||
"url": str(response.url),
|
||||
"status_code": response.status_code,
|
||||
"content_type": content_type,
|
||||
"content": response.text[:WebToolConfig.MAX_RESPONSE_SIZE],
|
||||
"headers": dict(response.headers)
|
||||
}
|
||||
|
||||
except httpx.TimeoutException:
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"Request timeout ({timeout}s)"
|
||||
}
|
||||
except httpx.RedirectLoop:
|
||||
return {
|
||||
"success": False,
|
||||
"error": "Too many redirects"
|
||||
}
|
||||
except httpx.InvalidURL:
|
||||
return {
|
||||
"success": False,
|
||||
"error": "Invalid URL"
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"success": False,
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
|
||||
async def web_search(
|
||||
query: str,
|
||||
max_results: int = 5
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
搜索网页
|
||||
|
||||
Args:
|
||||
query: 搜索关键词
|
||||
max_results: 最大结果数
|
||||
|
||||
Returns:
|
||||
搜索结果
|
||||
"""
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
response = await client.get(
|
||||
"https://api.duckduckgo.com/",
|
||||
params={
|
||||
"q": query,
|
||||
"format": "json",
|
||||
"no_html": 1,
|
||||
"skip_disambig": 1
|
||||
}
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
results = []
|
||||
|
||||
if "RelatedTopics" in data:
|
||||
for item in data["RelatedTopics"][:max_results]:
|
||||
if "Text" in item:
|
||||
text = item.get("Text", "")
|
||||
results.append({
|
||||
"title": text.split(" - ")[0] if " - " in text else "",
|
||||
"content": text,
|
||||
"url": item.get("URL", "")
|
||||
})
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"query": query,
|
||||
"results": results,
|
||||
"count": len(results)
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"Search API returned status {response.status_code}"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
"success": False,
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
|
||||
# 工具定义
|
||||
WEB_FETCH_TOOL = {
|
||||
"name": "web_fetch",
|
||||
"description": "Fetch content from a web URL. Supports GET, POST methods and can return JSON or text content.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"url": {
|
||||
"type": "string",
|
||||
"description": "The URL to fetch"
|
||||
},
|
||||
"method": {
|
||||
"type": "string",
|
||||
"description": "HTTP method (GET, POST)",
|
||||
"default": "GET"
|
||||
},
|
||||
"params": {
|
||||
"type": "object",
|
||||
"description": "Query parameters"
|
||||
},
|
||||
"headers": {
|
||||
"type": "object",
|
||||
"description": "Request headers"
|
||||
},
|
||||
"body": {
|
||||
"type": "string",
|
||||
"description": "Request body (for POST)"
|
||||
},
|
||||
"timeout": {
|
||||
"type": "integer",
|
||||
"description": "Request timeout in seconds",
|
||||
"default": 30
|
||||
}
|
||||
},
|
||||
"required": ["url"]
|
||||
}
|
||||
}
|
||||
|
||||
WEB_SEARCH_TOOL = {
|
||||
"name": "web_search",
|
||||
"description": "Search the web for information. Use this when you need to find current information or facts.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "The search query"
|
||||
},
|
||||
"max_results": {
|
||||
"type": "integer",
|
||||
"description": "Maximum number of results to return",
|
||||
"default": 5
|
||||
}
|
||||
},
|
||||
"required": ["query"]
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user