"""
网页获取工具
提供安全的网页内容抓取功能
"""
import httpx
from typing import Dict, Any, Optional


class WebToolConfig:
    """网页工具配置"""
    REQUEST_TIMEOUT = 30          # 请求超时(秒)
    MAX_RESPONSE_SIZE = 2 * 1024 * 1024  # 最大响应大小(2MB)
    MAX_REDIRECTS = 5             # 最大重定向次数
    ALLOWED_PROTOCOLS = ["http", "https"]  # 允许的协议


async def web_fetch(
    url: str,
    method: str = "GET",
    params: Optional[Dict[str, Any]] = None,
    headers: Optional[Dict[str, str]] = None,
    body: Optional[str] = None,
    timeout: Optional[int] = None
) -> Dict[str, Any]:
    """
    获取网页内容

    Args:
        url: 目标URL
        method: HTTP方法
        params: 查询参数
        headers: 请求头
        body: 请求体
        timeout: 超时时间

    Returns:
        网页内容
    """
    timeout = timeout or WebToolConfig.REQUEST_TIMEOUT

    # 安全检查：协议
    if not url.startswith(("http://", "https://")):
        return {
            "success": False,
            "error": "Only HTTP and HTTPS protocols are allowed"
        }

    try:
        async with httpx.AsyncClient(
            timeout=timeout,
            max_redirects=WebToolConfig.MAX_REDIRECTS,
            follow_redirects=True,
        ) as client:
            # 发送请求
            response = await client.request(
                method=method,
                url=url,
                params=params,
                headers=headers,
                content=body,
            )

            # 检查响应大小
            if len(response.content) > WebToolConfig.MAX_RESPONSE_SIZE:
                return {
                    "success": False,
                    "error": f"Response too large: {len(response.content)} bytes (max {WebToolConfig.MAX_RESPONSE_SIZE})"
                }

            # 尝试解析JSON
            content_type = response.headers.get("content-type", "")
            if "application/json" in content_type:
                try:
                    data = response.json()
                    return {
                        "success": True,
                        "url": str(response.url),
                        "status_code": response.status_code,
                        "content_type": content_type,
                        "data": data,
                        "headers": dict(response.headers)
                    }
                except:
                    pass

            # 返回文本
            return {
                "success": True,
                "url": str(response.url),
                "status_code": response.status_code,
                "content_type": content_type,
                "content": response.text[:WebToolConfig.MAX_RESPONSE_SIZE],
                "headers": dict(response.headers)
            }

    except httpx.TimeoutException:
        return {
            "success": False,
            "error": f"Request timeout ({timeout}s)"
        }
    except httpx.RedirectLoop:
        return {
            "success": False,
            "error": "Too many redirects"
        }
    except httpx.InvalidURL:
        return {
            "success": False,
            "error": "Invalid URL"
        }
    except Exception as e:
        return {
            "success": False,
            "error": str(e)
        }


async def web_search(
    query: str,
    max_results: int = 5
) -> Dict[str, Any]:
    """
    搜索网页

    Args:
        query: 搜索关键词
        max_results: 最大结果数

    Returns:
        搜索结果
    """
    try:
        async with httpx.AsyncClient(timeout=10.0) as client:
            response = await client.get(
                "https://api.duckduckgo.com/",
                params={
                    "q": query,
                    "format": "json",
                    "no_html": 1,
                    "skip_disambig": 1
                }
            )

            if response.status_code == 200:
                data = response.json()
                results = []

                if "RelatedTopics" in data:
                    for item in data["RelatedTopics"][:max_results]:
                        if "Text" in item:
                            text = item.get("Text", "")
                            results.append({
                                "title": text.split(" - ")[0] if " - " in text else "",
                                "content": text,
                                "url": item.get("URL", "")
                            })

                return {
                    "success": True,
                    "query": query,
                    "results": results,
                    "count": len(results)
                }
            else:
                return {
                    "success": False,
                    "error": f"Search API returned status {response.status_code}"
                }

    except Exception as e:
        return {
            "success": False,
            "error": str(e)
        }


# 工具定义
WEB_FETCH_TOOL = {
    "name": "web_fetch",
    "description": "Fetch content from a web URL. Supports GET, POST methods and can return JSON or text content.",
    "parameters": {
        "type": "object",
        "properties": {
            "url": {
                "type": "string",
                "description": "The URL to fetch"
            },
            "method": {
                "type": "string",
                "description": "HTTP method (GET, POST)",
                "default": "GET"
            },
            "params": {
                "type": "object",
                "description": "Query parameters"
            },
            "headers": {
                "type": "object",
                "description": "Request headers"
            },
            "body": {
                "type": "string",
                "description": "Request body (for POST)"
            },
            "timeout": {
                "type": "integer",
                "description": "Request timeout in seconds",
                "default": 30
            }
        },
        "required": ["url"]
    }
}

WEB_SEARCH_TOOL = {
    "name": "web_search",
    "description": "Search the web for information. Use this when you need to find current information or facts.",
    "parameters": {
        "type": "object",
        "properties": {
            "query": {
                "type": "string",
                "description": "The search query"
            },
            "max_results": {
                "type": "integer",
                "description": "Maximum number of results to return",
                "default": 5
            }
        },
        "required": ["query"]
    }
}