209 lines
6.4 KiB
Python
209 lines
6.4 KiB
Python
"""
|
||
Agent 工具集 - 知识库 & 图谱相关
|
||
|
||
这些工具在 LangChain ToolNode 中被调用。
|
||
由于 LangChain 工具系统是同步的,内部用 run_in_executor 处理 async 逻辑。
|
||
"""
|
||
|
||
from concurrent.futures import ThreadPoolExecutor
|
||
import asyncio
|
||
|
||
from langchain_core.tools import tool
|
||
|
||
from app.agents.context import get_current_user
|
||
from app.database import async_session
|
||
|
||
_executor = ThreadPoolExecutor(max_workers=4)
|
||
|
||
|
||
def _run_async(coro, timeout: int = 30):
|
||
"""在同步上下文中运行 async 代码"""
|
||
try:
|
||
loop = asyncio.get_running_loop()
|
||
future = loop.run_in_executor(_executor, lambda: asyncio.run(coro))
|
||
return future.result(timeout=timeout)
|
||
except RuntimeError:
|
||
return asyncio.run(coro)
|
||
|
||
|
||
@tool
|
||
def search_knowledge(query: str, top_k: int = 5) -> str:
|
||
"""
|
||
搜索用户的私人知识库。根据查询返回最相关的文档片段,支持语义检索。
|
||
|
||
Args:
|
||
query: 搜索查询
|
||
top_k: 返回结果数量,默认5条
|
||
|
||
Returns:
|
||
包含相关文档片段和来源信息的格式化文本
|
||
"""
|
||
from app.services.knowledge_service import KnowledgeService
|
||
uid = get_current_user()
|
||
|
||
async def _search():
|
||
async with async_session() as db:
|
||
service = KnowledgeService(db, user_id=uid)
|
||
results = await service.retrieve(query, user_id=uid, top_k=top_k)
|
||
if not results:
|
||
return "未找到相关知识。知识库可能为空,或尝试用其他关键词搜索。"
|
||
texts = []
|
||
for i, r in enumerate(results, 1):
|
||
prev = f"\n上一段: {r.prev_chunk[:100]}..." if r.prev_chunk else ""
|
||
next_ = f"\n下一段: {r.next_chunk[:100]}..." if r.next_chunk else ""
|
||
texts.append(
|
||
f"[{i}] 来源: {r.document_title}\n"
|
||
f"相关度: {r.score:.2f}\n"
|
||
f"{prev}{next_}\n"
|
||
f"内容: {r.content[:300]}{'...' if len(r.content) > 300 else ''}"
|
||
)
|
||
return "\n\n---\n\n".join(texts)
|
||
|
||
try:
|
||
return _run_async(_search(), timeout=30)
|
||
except Exception as e:
|
||
return f"知识检索失败: {str(e)}"
|
||
|
||
|
||
@tool
|
||
def get_knowledge_graph_context(entity: str | None = None) -> str:
|
||
"""
|
||
获取用户知识图谱的上下文信息。
|
||
|
||
Args:
|
||
entity: 可选,指定要查询的实体名称。如果为空则返回整体图谱摘要。
|
||
|
||
Returns:
|
||
知识图谱节点和关系的描述
|
||
"""
|
||
from app.services.graph_service import GraphService
|
||
uid = get_current_user()
|
||
|
||
async def _get():
|
||
async with async_session() as db:
|
||
service = GraphService(db)
|
||
if entity:
|
||
return await service.get_entity_context(entity, uid)
|
||
return await service.get_graph_summary(uid)
|
||
|
||
try:
|
||
return _run_async(_get(), timeout=30)
|
||
except Exception as e:
|
||
return f"图谱查询失败: {str(e)}"
|
||
|
||
|
||
@tool
|
||
def build_knowledge_graph(document_ids: list[str] | None = None) -> str:
|
||
"""
|
||
从文档构建/更新知识图谱。
|
||
|
||
Args:
|
||
document_ids: 可选,指定要处理的文档ID列表。如果为空则处理所有文档。
|
||
|
||
Returns:
|
||
构建结果摘要
|
||
"""
|
||
from app.services.graph_service import GraphService
|
||
uid = get_current_user()
|
||
|
||
async def _build():
|
||
async with async_session() as db:
|
||
service = GraphService(db)
|
||
await service.build_graph(user_id=uid, document_ids=document_ids)
|
||
return "知识图谱构建完成"
|
||
|
||
try:
|
||
return _run_async(_build(), timeout=120)
|
||
except Exception as e:
|
||
return f"图谱构建失败: {str(e)}"
|
||
|
||
|
||
@tool
|
||
def hybrid_search(query: str, top_k: int = 5) -> str:
|
||
"""
|
||
混合搜索,结合向量语义检索和关键词匹配,返回最相关结果。
|
||
|
||
Args:
|
||
query: 搜索查询
|
||
top_k: 返回结果数量,默认5条
|
||
|
||
Returns:
|
||
混合检索结果
|
||
"""
|
||
from app.services.knowledge_service import KnowledgeService
|
||
uid = get_current_user()
|
||
|
||
async def _search():
|
||
async with async_session() as db:
|
||
service = KnowledgeService(db, user_id=uid)
|
||
results = await service.hybrid_search(query, user_id=uid, top_k=top_k)
|
||
if not results:
|
||
return "未找到相关知识。"
|
||
texts = []
|
||
for i, r in enumerate(results, 1):
|
||
texts.append(
|
||
f"[{i}] {r.document_title} (相关度: {r.score:.2f})\n"
|
||
f"{r.content[:200]}{'...' if len(r.content) > 200 else ''}"
|
||
)
|
||
return "\n\n---\n\n".join(texts)
|
||
|
||
try:
|
||
return _run_async(_search(), timeout=30)
|
||
except Exception as e:
|
||
return f"混合搜索失败: {str(e)}"
|
||
|
||
|
||
@tool
|
||
def web_search(query: str, top_k: int = 5) -> str:
|
||
"""
|
||
通过 SearxNG 搜索外部网页信息,返回标题、链接和摘要。
|
||
|
||
Args:
|
||
query: 搜索关键词
|
||
top_k: 返回结果数量,默认 5 条
|
||
|
||
Returns:
|
||
适合模型综合的网页结果文本
|
||
"""
|
||
from app.services.web_search_service import (
|
||
WebSearchConfigurationError,
|
||
WebSearchRequestError,
|
||
WebSearchService,
|
||
)
|
||
|
||
async def _search():
|
||
service = WebSearchService()
|
||
results = await service.search(query, limit=top_k)
|
||
if not results:
|
||
return "未找到相关网页结果。"
|
||
|
||
texts = []
|
||
for index, result in enumerate(results, 1):
|
||
source = f"\n来源: {result.source}" if result.source else ""
|
||
published_at = f"\n时间: {result.published_at}" if result.published_at else ""
|
||
snippet = result.snippet or "(无摘要)"
|
||
texts.append(
|
||
f"[{index}] {result.title}\n"
|
||
f"链接: {result.url}{source}{published_at}\n"
|
||
f"摘要: {snippet}"
|
||
)
|
||
return "\n\n---\n\n".join(texts)
|
||
|
||
try:
|
||
return _run_async(_search(), timeout=30)
|
||
except WebSearchConfigurationError as exc:
|
||
return f"网页搜索不可用: {exc}"
|
||
except WebSearchRequestError as exc:
|
||
return f"网页搜索失败: {exc}"
|
||
except Exception as exc:
|
||
return f"网页搜索失败: {exc}"
|
||
|
||
|
||
__all__ = [
|
||
"search_knowledge",
|
||
"get_knowledge_graph_context",
|
||
"build_knowledge_graph",
|
||
"hybrid_search",
|
||
"web_search",
|
||
]
|