Add brain memory services and APIs
Introduce the backend pieces for brain memory ingestion, routing, and system telemetry so the new knowledge workflows can project data into a brain view. The supporting tests lock in the new behavior and keep the expanded backend surface stable. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -4,11 +4,8 @@
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, func
|
||||
from app.models.brain import BrainMemory, BrainTag
|
||||
from app.models.knowledge_graph import KGNode, KGEdge
|
||||
from app.models.document import Document, DocumentChunk
|
||||
from app.services.llm_service import get_llm
|
||||
from langchain_core.messages import HumanMessage
|
||||
import json
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -75,110 +72,93 @@ confidence: 0.0-1.0,表示推断置信度
|
||||
class GraphService:
|
||||
def __init__(self, db: AsyncSession):
|
||||
self.db = db
|
||||
self.llm = get_llm()
|
||||
|
||||
async def build_graph(self, user_id: str, document_ids: list[str] | None = None):
|
||||
"""
|
||||
从文档构建/更新知识图谱
|
||||
- 遍历所有 chunk
|
||||
- LLM 实体识别
|
||||
- LLM 关系抽取
|
||||
- 去重合并
|
||||
"""
|
||||
query = (
|
||||
select(DocumentChunk)
|
||||
.join(Document)
|
||||
.where(Document.user_id == user_id)
|
||||
.where(Document.is_indexed == True)
|
||||
"""从知识大脑投影图谱。"""
|
||||
existing_nodes_result = await self.db.execute(select(KGNode).where(KGNode.user_id == user_id))
|
||||
for node in existing_nodes_result.scalars().all():
|
||||
await self.db.delete(node)
|
||||
await self.db.flush()
|
||||
|
||||
memory_result = await self.db.execute(
|
||||
select(BrainMemory)
|
||||
.where(BrainMemory.user_id == user_id, BrainMemory.status == "active")
|
||||
.order_by(BrainMemory.importance.desc(), BrainMemory.created_at.desc())
|
||||
)
|
||||
if document_ids:
|
||||
query = query.where(DocumentChunk.document_id.in_(document_ids))
|
||||
memories = list(memory_result.scalars().all())
|
||||
|
||||
result = await self.db.execute(query)
|
||||
chunks = list(result.scalars().all())
|
||||
tag_result = await self.db.execute(
|
||||
select(BrainTag)
|
||||
.where(BrainTag.user_id == user_id)
|
||||
.order_by(BrainTag.score.desc(), BrainTag.created_at.desc())
|
||||
)
|
||||
tags = list(tag_result.scalars().all())
|
||||
|
||||
logger.info(f"[GraphService] 开始构建图谱,共 {len(chunks)} 个 chunks")
|
||||
logger.info(f"[GraphService] 开始从 brain 数据投影图谱,memories={len(memories)}, tags={len(tags)}")
|
||||
|
||||
for chunk in chunks:
|
||||
try:
|
||||
await self._process_chunk(chunk, user_id)
|
||||
except Exception as e:
|
||||
logger.error(f"[GraphService] 处理 chunk {chunk.id} 失败: {e}")
|
||||
continue
|
||||
|
||||
logger.info(f"[GraphService] 图谱构建完成")
|
||||
|
||||
async def _process_chunk(self, chunk: DocumentChunk, user_id: str):
|
||||
"""处理单个 chunk,提取实体和关系"""
|
||||
prompt = ENTITY_EXTRACTION_PROMPT.format(text=chunk.content[:2000])
|
||||
response = await self.llm.invoke([HumanMessage(content=prompt)])
|
||||
|
||||
try:
|
||||
data = json.loads(response.content)
|
||||
except json.JSONDecodeError:
|
||||
return
|
||||
|
||||
entities = data.get("entities", [])
|
||||
relations = data.get("relations", [])
|
||||
|
||||
if not entities:
|
||||
return
|
||||
|
||||
# 先查找已存在的节点
|
||||
existing_nodes = {}
|
||||
for entity_data in entities:
|
||||
name = entity_data["name"]
|
||||
result = await self.db.execute(
|
||||
select(KGNode)
|
||||
.where(KGNode.user_id == user_id)
|
||||
.where(KGNode.name == name)
|
||||
node_map: dict[str, KGNode] = {}
|
||||
for memory in memories:
|
||||
node = KGNode(
|
||||
user_id=user_id,
|
||||
name=memory.title,
|
||||
entity_type="memory",
|
||||
description=memory.content,
|
||||
properties_={
|
||||
"memory_type": memory.memory_type,
|
||||
"origin_source_types": memory.origin_source_types or [],
|
||||
},
|
||||
importance=min(max(memory.importance / 10, 0.1), 1.0),
|
||||
)
|
||||
node = result.scalar_one_or_none()
|
||||
if node:
|
||||
existing_nodes[name] = node
|
||||
self.db.add(node)
|
||||
await self.db.flush()
|
||||
node_map[f"memory:{memory.id}"] = node
|
||||
|
||||
# 插入新节点
|
||||
entity_map = {}
|
||||
for entity_data in entities:
|
||||
name = entity_data["name"]
|
||||
if name in existing_nodes:
|
||||
entity_map[name] = existing_nodes[name].id
|
||||
else:
|
||||
node = KGNode(
|
||||
user_id=user_id,
|
||||
name=name,
|
||||
entity_type=entity_data["type"],
|
||||
description=entity_data.get("description", ""),
|
||||
source_document_id=chunk.document_id,
|
||||
)
|
||||
self.db.add(node)
|
||||
await self.db.flush()
|
||||
entity_map[name] = node.id
|
||||
|
||||
# 插入关系(去重)
|
||||
for rel in relations:
|
||||
src, tgt = rel["source"], rel["target"]
|
||||
if src not in entity_map or tgt not in entity_map:
|
||||
continue
|
||||
|
||||
# 检查关系是否已存在
|
||||
result = await self.db.execute(
|
||||
select(KGEdge).where(
|
||||
KGEdge.source_id == entity_map[src],
|
||||
KGEdge.target_id == entity_map[tgt],
|
||||
KGEdge.relation_type == rel["relation_type"],
|
||||
)
|
||||
for tag in tags:
|
||||
node = KGNode(
|
||||
user_id=user_id,
|
||||
name=tag.name,
|
||||
entity_type="tag",
|
||||
description=f"{tag.category} / {tag.priority}",
|
||||
properties_={
|
||||
"category": tag.category,
|
||||
"priority": tag.priority,
|
||||
"score": tag.score,
|
||||
},
|
||||
importance=min(max(tag.score / 10, 0.1), 1.0),
|
||||
)
|
||||
existing = result.scalar_one_or_none()
|
||||
if not existing:
|
||||
edge = KGEdge(
|
||||
source_id=entity_map[src],
|
||||
target_id=entity_map[tgt],
|
||||
relation_type=rel["relation_type"],
|
||||
)
|
||||
self.db.add(edge)
|
||||
self.db.add(node)
|
||||
await self.db.flush()
|
||||
node_map[f"tag:{tag.id}"] = node
|
||||
|
||||
for memory in memories:
|
||||
memory_node = node_map.get(f"memory:{memory.id}")
|
||||
if not memory_node:
|
||||
continue
|
||||
memory_text = f"{memory.title} {memory.content}".lower()
|
||||
for tag in tags:
|
||||
if tag.name.lower() in memory_text:
|
||||
tag_node = node_map.get(f"tag:{tag.id}")
|
||||
if not tag_node:
|
||||
continue
|
||||
self.db.add(KGEdge(
|
||||
source_id=memory_node.id,
|
||||
target_id=tag_node.id,
|
||||
relation_type="tagged_with",
|
||||
weight=min(max(tag.score / 10, 0.1), 1.0),
|
||||
))
|
||||
|
||||
memory_nodes = [node_map[f"memory:{memory.id}"] for memory in memories if f"memory:{memory.id}" in node_map]
|
||||
for index, source_node in enumerate(memory_nodes):
|
||||
for target_node in memory_nodes[index + 1:]:
|
||||
self.db.add(KGEdge(
|
||||
source_id=source_node.id,
|
||||
target_id=target_node.id,
|
||||
relation_type="related_to",
|
||||
weight=0.5,
|
||||
))
|
||||
|
||||
await self.db.commit()
|
||||
logger.info("[GraphService] brain 图谱投影完成")
|
||||
|
||||
async def get_graph_summary(self, user_id: str) -> str:
|
||||
"""获取用户图谱的整体摘要"""
|
||||
|
||||
Reference in New Issue
Block a user