Add brain memory services and APIs

Introduce the backend pieces for brain memory ingestion, routing, and
system telemetry so the new knowledge workflows can project data into a
brain view. The supporting tests lock in the new behavior and keep the
expanded backend surface stable.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-22 13:47:34 +08:00
parent e3691b01bb
commit d2447ee635
28 changed files with 2278 additions and 197 deletions

View File

@@ -4,11 +4,8 @@
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select, func
from app.models.brain import BrainMemory, BrainTag
from app.models.knowledge_graph import KGNode, KGEdge
from app.models.document import Document, DocumentChunk
from app.services.llm_service import get_llm
from langchain_core.messages import HumanMessage
import json
import logging
logger = logging.getLogger(__name__)
@@ -75,110 +72,93 @@ confidence: 0.0-1.0,表示推断置信度
class GraphService:
def __init__(self, db: AsyncSession):
self.db = db
self.llm = get_llm()
async def build_graph(self, user_id: str, document_ids: list[str] | None = None):
"""
从文档构建/更新知识图谱
- 遍历所有 chunk
- LLM 实体识别
- LLM 关系抽取
- 去重合并
"""
query = (
select(DocumentChunk)
.join(Document)
.where(Document.user_id == user_id)
.where(Document.is_indexed == True)
"""从知识大脑投影图谱。"""
existing_nodes_result = await self.db.execute(select(KGNode).where(KGNode.user_id == user_id))
for node in existing_nodes_result.scalars().all():
await self.db.delete(node)
await self.db.flush()
memory_result = await self.db.execute(
select(BrainMemory)
.where(BrainMemory.user_id == user_id, BrainMemory.status == "active")
.order_by(BrainMemory.importance.desc(), BrainMemory.created_at.desc())
)
if document_ids:
query = query.where(DocumentChunk.document_id.in_(document_ids))
memories = list(memory_result.scalars().all())
result = await self.db.execute(query)
chunks = list(result.scalars().all())
tag_result = await self.db.execute(
select(BrainTag)
.where(BrainTag.user_id == user_id)
.order_by(BrainTag.score.desc(), BrainTag.created_at.desc())
)
tags = list(tag_result.scalars().all())
logger.info(f"[GraphService] 开始构建图谱,共 {len(chunks)} 个 chunks")
logger.info(f"[GraphService] 开始从 brain 数据投影图谱memories={len(memories)}, tags={len(tags)}")
for chunk in chunks:
try:
await self._process_chunk(chunk, user_id)
except Exception as e:
logger.error(f"[GraphService] 处理 chunk {chunk.id} 失败: {e}")
continue
logger.info(f"[GraphService] 图谱构建完成")
async def _process_chunk(self, chunk: DocumentChunk, user_id: str):
"""处理单个 chunk提取实体和关系"""
prompt = ENTITY_EXTRACTION_PROMPT.format(text=chunk.content[:2000])
response = await self.llm.invoke([HumanMessage(content=prompt)])
try:
data = json.loads(response.content)
except json.JSONDecodeError:
return
entities = data.get("entities", [])
relations = data.get("relations", [])
if not entities:
return
# 先查找已存在的节点
existing_nodes = {}
for entity_data in entities:
name = entity_data["name"]
result = await self.db.execute(
select(KGNode)
.where(KGNode.user_id == user_id)
.where(KGNode.name == name)
node_map: dict[str, KGNode] = {}
for memory in memories:
node = KGNode(
user_id=user_id,
name=memory.title,
entity_type="memory",
description=memory.content,
properties_={
"memory_type": memory.memory_type,
"origin_source_types": memory.origin_source_types or [],
},
importance=min(max(memory.importance / 10, 0.1), 1.0),
)
node = result.scalar_one_or_none()
if node:
existing_nodes[name] = node
self.db.add(node)
await self.db.flush()
node_map[f"memory:{memory.id}"] = node
# 插入新节点
entity_map = {}
for entity_data in entities:
name = entity_data["name"]
if name in existing_nodes:
entity_map[name] = existing_nodes[name].id
else:
node = KGNode(
user_id=user_id,
name=name,
entity_type=entity_data["type"],
description=entity_data.get("description", ""),
source_document_id=chunk.document_id,
)
self.db.add(node)
await self.db.flush()
entity_map[name] = node.id
# 插入关系(去重)
for rel in relations:
src, tgt = rel["source"], rel["target"]
if src not in entity_map or tgt not in entity_map:
continue
# 检查关系是否已存在
result = await self.db.execute(
select(KGEdge).where(
KGEdge.source_id == entity_map[src],
KGEdge.target_id == entity_map[tgt],
KGEdge.relation_type == rel["relation_type"],
)
for tag in tags:
node = KGNode(
user_id=user_id,
name=tag.name,
entity_type="tag",
description=f"{tag.category} / {tag.priority}",
properties_={
"category": tag.category,
"priority": tag.priority,
"score": tag.score,
},
importance=min(max(tag.score / 10, 0.1), 1.0),
)
existing = result.scalar_one_or_none()
if not existing:
edge = KGEdge(
source_id=entity_map[src],
target_id=entity_map[tgt],
relation_type=rel["relation_type"],
)
self.db.add(edge)
self.db.add(node)
await self.db.flush()
node_map[f"tag:{tag.id}"] = node
for memory in memories:
memory_node = node_map.get(f"memory:{memory.id}")
if not memory_node:
continue
memory_text = f"{memory.title} {memory.content}".lower()
for tag in tags:
if tag.name.lower() in memory_text:
tag_node = node_map.get(f"tag:{tag.id}")
if not tag_node:
continue
self.db.add(KGEdge(
source_id=memory_node.id,
target_id=tag_node.id,
relation_type="tagged_with",
weight=min(max(tag.score / 10, 0.1), 1.0),
))
memory_nodes = [node_map[f"memory:{memory.id}"] for memory in memories if f"memory:{memory.id}" in node_map]
for index, source_node in enumerate(memory_nodes):
for target_node in memory_nodes[index + 1:]:
self.db.add(KGEdge(
source_id=source_node.id,
target_id=target_node.id,
relation_type="related_to",
weight=0.5,
))
await self.db.commit()
logger.info("[GraphService] brain 图谱投影完成")
async def get_graph_summary(self, user_id: str) -> str:
"""获取用户图谱的整体摘要"""