Add brain memory services and APIs

Introduce the backend pieces for brain memory ingestion, routing, and system telemetry so the new knowledge workflows can project data into a brain view. The supporting tests lock in the new behavior and keep the expanded backend surface stable. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-22 13:47:34 +08:00
parent e3691b01bb
commit d2447ee635
28 changed files with 2278 additions and 197 deletions
--- a/backend/app/services/graph_service.py
+++ b/backend/app/services/graph_service.py
@@ -4,11 +4,8 @@

 from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy import select, func
+from app.models.brain import BrainMemory, BrainTag
 from app.models.knowledge_graph import KGNode, KGEdge
-from app.models.document import Document, DocumentChunk
-from app.services.llm_service import get_llm
-from langchain_core.messages import HumanMessage
-import json
 import logging

 logger = logging.getLogger(__name__)
@@ -75,110 +72,93 @@ confidence: 0.0-1.0，表示推断置信度
 class GraphService:
    def __init__(self, db: AsyncSession):
        self.db = db
-        self.llm = get_llm()

    async def build_graph(self, user_id: str, document_ids: list[str] | None = None):
-        """
-        从文档构建/更新知识图谱
-        - 遍历所有 chunk
-        - LLM 实体识别
-        - LLM 关系抽取
-        - 去重合并
-        """
-        query = (
-            select(DocumentChunk)
-            .join(Document)
-            .where(Document.user_id == user_id)
-            .where(Document.is_indexed == True)
+        """从知识大脑投影图谱。"""
+        existing_nodes_result = await self.db.execute(select(KGNode).where(KGNode.user_id == user_id))
+        for node in existing_nodes_result.scalars().all():
+            await self.db.delete(node)
+        await self.db.flush()
+
+        memory_result = await self.db.execute(
+            select(BrainMemory)
+            .where(BrainMemory.user_id == user_id, BrainMemory.status == "active")
+            .order_by(BrainMemory.importance.desc(), BrainMemory.created_at.desc())
        )
-        if document_ids:
-            query = query.where(DocumentChunk.document_id.in_(document_ids))
+        memories = list(memory_result.scalars().all())

-        result = await self.db.execute(query)
-        chunks = list(result.scalars().all())
+        tag_result = await self.db.execute(
+            select(BrainTag)
+            .where(BrainTag.user_id == user_id)
+            .order_by(BrainTag.score.desc(), BrainTag.created_at.desc())
+        )
+        tags = list(tag_result.scalars().all())

-        logger.info(f"[GraphService] 开始构建图谱，共 {len(chunks)} 个 chunks")
+        logger.info(f"[GraphService] 开始从 brain 数据投影图谱，memories={len(memories)}, tags={len(tags)}")

-        for chunk in chunks:
-            try:
-                await self._process_chunk(chunk, user_id)
-            except Exception as e:
-                logger.error(f"[GraphService] 处理 chunk {chunk.id} 失败: {e}")
-                continue
-
-        logger.info(f"[GraphService] 图谱构建完成")
-
-    async def _process_chunk(self, chunk: DocumentChunk, user_id: str):
-        """处理单个 chunk，提取实体和关系"""
-        prompt = ENTITY_EXTRACTION_PROMPT.format(text=chunk.content[:2000])
-        response = await self.llm.invoke([HumanMessage(content=prompt)])
-
-        try:
-            data = json.loads(response.content)
-        except json.JSONDecodeError:
-            return
-
-        entities = data.get("entities", [])
-        relations = data.get("relations", [])
-
-        if not entities:
-            return
-
-        # 先查找已存在的节点
-        existing_nodes = {}
-        for entity_data in entities:
-            name = entity_data["name"]
-            result = await self.db.execute(
-                select(KGNode)
-                .where(KGNode.user_id == user_id)
-                .where(KGNode.name == name)
+        node_map: dict[str, KGNode] = {}
+        for memory in memories:
+            node = KGNode(
+                user_id=user_id,
+                name=memory.title,
+                entity_type="memory",
+                description=memory.content,
+                properties_={
+                    "memory_type": memory.memory_type,
+                    "origin_source_types": memory.origin_source_types or [],
+                },
+                importance=min(max(memory.importance / 10, 0.1), 1.0),
            )
-            node = result.scalar_one_or_none()
-            if node:
-                existing_nodes[name] = node
+            self.db.add(node)
+            await self.db.flush()
+            node_map[f"memory:{memory.id}"] = node

-        # 插入新节点
-        entity_map = {}
-        for entity_data in entities:
-            name = entity_data["name"]
-            if name in existing_nodes:
-                entity_map[name] = existing_nodes[name].id
-            else:
-                node = KGNode(
-                    user_id=user_id,
-                    name=name,
-                    entity_type=entity_data["type"],
-                    description=entity_data.get("description", ""),
-                    source_document_id=chunk.document_id,
-                )
-                self.db.add(node)
-                await self.db.flush()
-                entity_map[name] = node.id
-
-        # 插入关系（去重）
-        for rel in relations:
-            src, tgt = rel["source"], rel["target"]
-            if src not in entity_map or tgt not in entity_map:
-                continue
-
-            # 检查关系是否已存在
-            result = await self.db.execute(
-                select(KGEdge).where(
-                    KGEdge.source_id == entity_map[src],
-                    KGEdge.target_id == entity_map[tgt],
-                    KGEdge.relation_type == rel["relation_type"],
-                )
+        for tag in tags:
+            node = KGNode(
+                user_id=user_id,
+                name=tag.name,
+                entity_type="tag",
+                description=f"{tag.category} / {tag.priority}",
+                properties_={
+                    "category": tag.category,
+                    "priority": tag.priority,
+                    "score": tag.score,
+                },
+                importance=min(max(tag.score / 10, 0.1), 1.0),
            )
-            existing = result.scalar_one_or_none()
-            if not existing:
-                edge = KGEdge(
-                    source_id=entity_map[src],
-                    target_id=entity_map[tgt],
-                    relation_type=rel["relation_type"],
-                )
-                self.db.add(edge)
+            self.db.add(node)
+            await self.db.flush()
+            node_map[f"tag:{tag.id}"] = node
+
+        for memory in memories:
+            memory_node = node_map.get(f"memory:{memory.id}")
+            if not memory_node:
+                continue
+            memory_text = f"{memory.title} {memory.content}".lower()
+            for tag in tags:
+                if tag.name.lower() in memory_text:
+                    tag_node = node_map.get(f"tag:{tag.id}")
+                    if not tag_node:
+                        continue
+                    self.db.add(KGEdge(
+                        source_id=memory_node.id,
+                        target_id=tag_node.id,
+                        relation_type="tagged_with",
+                        weight=min(max(tag.score / 10, 0.1), 1.0),
+                    ))
+
+        memory_nodes = [node_map[f"memory:{memory.id}"] for memory in memories if f"memory:{memory.id}" in node_map]
+        for index, source_node in enumerate(memory_nodes):
+            for target_node in memory_nodes[index + 1:]:
+                self.db.add(KGEdge(
+                    source_id=source_node.id,
+                    target_id=target_node.id,
+                    relation_type="related_to",
+                    weight=0.5,
+                ))

        await self.db.commit()
+        logger.info("[GraphService] brain 图谱投影完成")

    async def get_graph_summary(self, user_id: str) -> str:
        """获取用户图谱的整体摘要"""