From 9bfa0dcc115836098b99446d2ebfecd44da8cc94 Mon Sep 17 00:00:00 2001 From: "WIN-JHFT4D3SIVT\\caoxiaozhu" Date: Sun, 5 Apr 2026 13:22:23 +0800 Subject: [PATCH] feat(memory): Day M.1 complete - importance scoring system MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add FrequencyTracker: increment(), get_frequency_score(), get_recency_score(), get_time_decay() - Add EmotionAnalyzer: EMOTION_KEYWORDS dict, extract(), calculate_score(), get_emotion_profile() - Add ImpactEvaluator: evaluate(), get_topic_overlap(), rank_by_impact() - Add ImportanceScorer: composite scoring (freq 35% + recency 20% + emotion 25% + impact 20%) - Update UserMemory model: frequency_count, emotion_tags, importance_score, importance_level, associated_topics - Integrate ImportanceScorer into memory_service.py (recall + importance update) - Add 37 tests for all memory scoring components - Fix urgency patterns: remove overly broad '今天' that matched neutral text - Update memory-update checklist: mark all M.1 tasks complete --- backend/app/models/memory.py | 37 +- backend/app/services/memory/__init__.py | 13 + .../app/services/memory/emotion_analyzer.py | 149 +++++++ .../app/services/memory/frequency_tracker.py | 84 ++++ .../app/services/memory/impact_evaluator.py | 52 +++ .../app/services/memory/importance_scorer.py | 103 +++++ backend/app/services/memory_service.py | 37 +- .../tests/services/test_importance_scorer.py | 408 ++++++++++++++++++ .../plan/memory-update/checklist.md | 187 ++++++-- 9 files changed, 1016 insertions(+), 54 deletions(-) create mode 100644 backend/app/services/memory/__init__.py create mode 100644 backend/app/services/memory/emotion_analyzer.py create mode 100644 backend/app/services/memory/frequency_tracker.py create mode 100644 backend/app/services/memory/impact_evaluator.py create mode 100644 backend/app/services/memory/importance_scorer.py create mode 100644 backend/tests/services/test_importance_scorer.py diff --git a/backend/app/models/memory.py b/backend/app/models/memory.py index 94d6043..16d03a0 100644 --- a/backend/app/models/memory.py +++ b/backend/app/models/memory.py @@ -1,4 +1,15 @@ -from sqlalchemy import Column, String, Text, Integer, ForeignKey, Boolean, DateTime, Enum as SQLEnum +from sqlalchemy import ( + Column, + String, + Text, + Integer, + Float, + ForeignKey, + Boolean, + DateTime, + Enum as SQLEnum, + JSON, +) from app.models.base import BaseModel, utc_now @@ -7,12 +18,13 @@ class MemorySummary(BaseModel): 对话摘要 — 中期记忆 当一段对话超过阈值轮数时,自动生成摘要存入此表 """ + __tablename__ = "memory_summaries" user_id = Column(String(36), ForeignKey("users.id"), nullable=False, index=True) conversation_id = Column(String(36), ForeignKey("conversations.id"), nullable=False, index=True) - summary_text = Column(Text, nullable=False) # 摘要内容 - turn_count = Column(Integer, default=0) # 摘要时累计轮数 + summary_text = Column(Text, nullable=False) # 摘要内容 + turn_count = Column(Integer, default=0) # 摘要时累计轮数 summary_at = Column(DateTime, default=utc_now, nullable=False) @@ -21,14 +33,23 @@ class UserMemory(BaseModel): 用户画像记忆 — 长期记忆 从对话中提取的用户事实、偏好、目标 """ + __tablename__ = "user_memories" user_id = Column(String(36), ForeignKey("users.id"), nullable=False, index=True) - memory_type = Column(String(50), nullable=False) # fact | preference | goal | habit | other - content = Column(Text, nullable=False) # 记忆内容 - importance = Column(Integer, default=5) # 重要程度 1-10 - is_recalled = Column(Boolean, default=False) # 是否在当前对话中被召回 - recall_count = Column(Integer, default=0) # 被召回次数 + memory_type = Column(String(50), nullable=False) # fact | preference | goal | habit | other + content = Column(Text, nullable=False) # 记忆内容 + importance = Column(Integer, default=5) # 重要程度 1-10 (legacy, replaced by importance_score) + is_recalled = Column(Boolean, default=False) # 是否在当前对话中被召回 + recall_count = Column(Integer, default=0) # 被召回次数 source_conversation_id = Column(String(36), nullable=True) # 来源对话 extracted_at = Column(DateTime, default=utc_now, nullable=False) last_recalled_at = Column(DateTime, nullable=True) + # M.1: 重要性评分系统 + frequency_count = Column( + Integer, default=0 + ) # 被召回次数 (duplicate of recall_count, for scoring clarity) + emotion_tags = Column(JSON, nullable=True) # List of emotion keywords + importance_score = Column(Float, default=0.5) # 重要性分数 0.0-1.0 + importance_level = Column(String(20), default="medium") # high | medium | low + associated_topics = Column(JSON, nullable=True) # List of topic strings diff --git a/backend/app/services/memory/__init__.py b/backend/app/services/memory/__init__.py new file mode 100644 index 0000000..3b1f856 --- /dev/null +++ b/backend/app/services/memory/__init__.py @@ -0,0 +1,13 @@ +"""Memory Services Module""" + +from app.services.memory.frequency_tracker import FrequencyTracker +from app.services.memory.emotion_analyzer import EmotionAnalyzer +from app.services.memory.impact_evaluator import ImpactEvaluator +from app.services.memory.importance_scorer import ImportanceScorer + +__all__ = [ + "FrequencyTracker", + "EmotionAnalyzer", + "ImpactEvaluator", + "ImportanceScorer", +] diff --git a/backend/app/services/memory/emotion_analyzer.py b/backend/app/services/memory/emotion_analyzer.py new file mode 100644 index 0000000..f8fea46 --- /dev/null +++ b/backend/app/services/memory/emotion_analyzer.py @@ -0,0 +1,149 @@ +""" +EmotionAnalyzer + +Extracts emotional intensity from text and calculates emotion-based importance scores. +""" + +import re +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from app.models.memory import UserMemory + + +class EmotionAnalyzer: + """Analyze emotional keywords in memory content""" + + # Emotion keyword weights (higher = more important) + EMOTION_KEYWORDS = { + # High intensity + "急": 1.0, + "紧急": 1.0, + "很重要": 0.9, + "非常重要": 1.0, + "必须": 0.9, + # Medium-high intensity + "困扰": 0.8, + "烦恼": 0.7, + "担心": 0.7, + "焦虑": 0.8, + "害怕": 0.8, + "恐惧": 0.9, + # Medium intensity + "想解决": 0.6, + "希望": 0.5, + "想要": 0.4, + "需要": 0.4, + "渴望": 0.6, + # Low intensity (casual/neutral) + "无所谓": 0.1, + "随便": 0.1, + "都行": 0.1, + "还好": 0.2, + # Negative valence + "讨厌": 0.6, + "不喜欢": 0.5, + "恨": 0.8, + "不喜欢": 0.5, + # Positive valence + "喜欢": 0.5, + "爱": 0.7, + "开心": 0.4, + "高兴": 0.4, + } + + # Urgency patterns — only match when there's an explicit time-bound word + # NOTE: "今天" alone is too common and matches neutral sentences like "今天天气不错" + URGENCY_PATTERNS = [ + (re.compile(r"马上|立刻|立即|赶紧"), 1.0), + (re.compile(r"今天内|今天必须|今日必须"), 0.8), + (re.compile(r"明天|明天之前|明日"), 0.6), + (re.compile(r"这周|本周"), 0.4), + (re.compile(r"尽快|早点"), 0.7), + ] + + def extract(self, text: str) -> list[str]: + """Extract emotion keywords from text + + Returns: + List of matched emotion keywords + """ + if not text: + return [] + + matched = [] + text_lower = text.lower() + + for keyword, weight in self.EMOTION_KEYWORDS.items(): + if keyword in text_lower: + matched.append(keyword) + + # Check urgency patterns + for pattern, weight in self.URGENCY_PATTERNS: + if pattern.search(text): + matched.append(f"[URGENCY:{weight}]") + + return matched + + def calculate_score(self, memory: "UserMemory") -> float: + """Calculate emotion-based importance score (0.0 - 1.0) + + Uses the highest-weighted emotion keyword found in the content. + """ + content = memory.content or "" + emotion_tags = memory.emotion_tags or [] + + # Check emotion_tags first (pre-extracted) + if emotion_tags: + max_weight = 0.0 + for tag in emotion_tags: + if tag in self.EMOTION_KEYWORDS: + max_weight = max(max_weight, self.EMOTION_KEYWORDS[tag]) + if max_weight > 0: + return max_weight + + # Extract from content + matched = self.extract(content) + if not matched: + return 0.0 + + # Get highest weight + max_weight = 0.0 + for keyword in matched: + if keyword.startswith("[URGENCY:"): + # Extract urgency weight + try: + weight = float(keyword.split(":")[1].rstrip("]")) + max_weight = max(max_weight, weight) + except (ValueError, IndexError): + pass + elif keyword in self.EMOTION_KEYWORDS: + max_weight = max(max_weight, self.EMOTION_KEYWORDS[keyword]) + + return min(1.0, max_weight) + + def get_emotion_profile(self, text: str) -> dict: + """Get detailed emotion profile for text + + Returns: + Dict with matched keywords, max_weight, and sentiment + """ + matched = self.extract(text) + weights = [] + for keyword in matched: + if keyword.startswith("[URGENCY:"): + try: + weights.append(float(keyword.split(":")[1].rstrip("]"))) + except (ValueError, IndexError): + pass + elif keyword in self.EMOTION_KEYWORDS: + weights.append(self.EMOTION_KEYWORDS[keyword]) + + return { + "matched_keywords": matched, + "max_weight": max(weights) if weights else 0.0, + "avg_weight": sum(weights) / len(weights) if weights else 0.0, + "sentiment": "positive" + if (weights and sum(weights) / len(weights) > 0.5) + else "neutral", + } diff --git a/backend/app/services/memory/frequency_tracker.py b/backend/app/services/memory/frequency_tracker.py new file mode 100644 index 0000000..6c19d74 --- /dev/null +++ b/backend/app/services/memory/frequency_tracker.py @@ -0,0 +1,84 @@ +""" +FrequencyTracker + +Tracks how often a memory is recalled and calculates frequency/recency scores. +""" + +from datetime import UTC, datetime +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from app.models.memory import UserMemory + + +class FrequencyTracker: + """Track and score memory recall frequency""" + + # Score weights + MAX_FREQUENCY = 10 # Cap frequency count for scoring + RECENCY_DECAY_DAYS = 30 # After 30 days, recency score drops significantly + + def increment(self, memory: "UserMemory") -> "UserMemory": + """Increment recall count and update last recalled timestamp""" + memory.frequency_count = (memory.frequency_count or 0) + 1 + memory.last_recalled_at = datetime.now(UTC) + return memory + + def get_frequency_score(self, memory: "UserMemory") -> float: + """Calculate normalized frequency score (0.0 - 1.0) + + Uses logarithmic scaling to prevent high-frequency memories + from dominating completely. + """ + count = memory.frequency_count or 0 + if count == 0: + return 0.0 + # Logarithmic scaling: more recalls have diminishing returns + # log(1+x) / log(1+MAX) gives 0-1 range + import math + + score = math.log(1 + count) / math.log(1 + self.MAX_FREQUENCY) + return min(1.0, max(0.0, score)) + + def get_recency_score(self, memory: "UserMemory") -> float: + """Calculate recency score (0.0 - 1.0) + + Memory recalled recently scores higher. Uses exponential decay. + """ + last_recalled = memory.last_recalled_at + if last_recalled is None: + return 0.0 + + now = datetime.now(UTC) + if isinstance(last_recalled, datetime): + if last_recalled.tzinfo is None: + last_recalled = last_recalled.replace(tzinfo=UTC) + days_since = (now - last_recalled).total_seconds() / 86400 + else: + days_since = self.RECENCY_DECAY_DAYS + + # Exponential decay: half-life of RECENCY_DECAY_DAYS + import math + + decay = math.exp(-days_since / self.RECENCY_DECAY_DAYS) + return min(1.0, max(0.0, decay)) + + def get_time_decay(self, memory: "UserMemory") -> float: + """Calculate time-based decay factor for forgetting curve""" + last_accessed = getattr(memory, "last_accessed_at", None) + if last_accessed is None: + last_accessed = memory.last_recalled_at + if last_accessed is None: + return 1.0 + + now = datetime.now(UTC) + if isinstance(last_accessed, datetime): + if last_accessed.tzinfo is None: + last_accessed = last_accessed.replace(tzinfo=UTC) + days_since = (now - last_accessed).total_seconds() / 86400 + else: + days_since = 0 + + import math + + return math.exp(-days_since / self.RECENCY_DECAY_DAYS) diff --git a/backend/app/services/memory/impact_evaluator.py b/backend/app/services/memory/impact_evaluator.py new file mode 100644 index 0000000..a8be5ea --- /dev/null +++ b/backend/app/services/memory/impact_evaluator.py @@ -0,0 +1,52 @@ +""" +ImpactEvaluator + +Evaluates the breadth of impact a memory has based on associated topics. +""" + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from app.models.memory import UserMemory + + +class ImpactEvaluator: + """Evaluate the impact breadth of a memory""" + + # Threshold for maximum impact score + IMPACT_THRESHOLD = 5 # Number of associated topics for max impact + + def evaluate(self, memory: "UserMemory") -> float: + """Calculate impact score (0.0 - 1.0) + + The more associated topics a memory has, the higher its impact. + Topics represent "what this memory is about" — if it touches + many aspects of the user's life, it has high impact. + """ + associated_topics = memory.associated_topics or [] + if not associated_topics: + return 0.0 + + # Normalize: IMPACT_THRESHOLD topics = full impact (1.0) + raw_score = len(associated_topics) / self.IMPACT_THRESHOLD + return min(1.0, raw_score) + + def get_topic_overlap(self, memory_a: "UserMemory", memory_b: "UserMemory") -> float: + """Calculate topic overlap between two memories (0.0 - 1.0) + + Used for finding related memories. + """ + topics_a = set(memory_a.associated_topics or []) + topics_b = set(memory_b.associated_topics or []) + + if not topics_a or not topics_b: + return 0.0 + + intersection = topics_a & topics_b + union = topics_a | topics_b + + return len(intersection) / len(union) if union else 0.0 + + def rank_by_impact(self, memories: list["UserMemory"]) -> list["UserMemory"]: + """Rank memories by impact score (descending)""" + return sorted(memories, key=lambda m: self.evaluate(m), reverse=True) diff --git a/backend/app/services/memory/importance_scorer.py b/backend/app/services/memory/importance_scorer.py new file mode 100644 index 0000000..3909af1 --- /dev/null +++ b/backend/app/services/memory/importance_scorer.py @@ -0,0 +1,103 @@ +""" +ImportanceScorer + +Composite importance scoring combining frequency, recency, emotion, and impact. +""" + +from enum import Enum +from typing import TYPE_CHECKING + +from app.services.memory.frequency_tracker import FrequencyTracker +from app.services.memory.emotion_analyzer import EmotionAnalyzer +from app.services.memory.impact_evaluator import ImpactEvaluator + +if TYPE_CHECKING: + from app.models.memory import UserMemory + + +class ImportanceLevel(str, Enum): + """Importance level classification""" + + HIGH = "high" + MEDIUM = "medium" + LOW = "low" + + +class ImportanceScorer: + """Calculate composite importance score for memories + + Score formula: + frequency_score * 0.35 + + recency_score * 0.20 + + emotion_score * 0.25 + + impact_score * 0.20 + """ + + # Score weights + WEIGHT_FREQUENCY = 0.35 + WEIGHT_RECENCY = 0.20 + WEIGHT_EMOTION = 0.25 + WEIGHT_IMPACT = 0.20 + + # Escalation threshold + HIGH_THRESHOLD = 0.8 + MEDIUM_THRESHOLD = 0.5 + LOW_THRESHOLD = 0.0 + + def __init__(self): + self.tracker = FrequencyTracker() + self.emotion_analyzer = EmotionAnalyzer() + self.impact_evaluator = ImpactEvaluator() + + def calculate_score(self, memory: "UserMemory") -> float: + """Calculate composite importance score (0.0 - 1.0)""" + frequency = self.tracker.get_frequency_score(memory) * self.WEIGHT_FREQUENCY + recency = self.tracker.get_recency_score(memory) * self.WEIGHT_RECENCY + emotion = self.emotion_analyzer.calculate_score(memory) * self.WEIGHT_EMOTION + impact = self.impact_evaluator.evaluate(memory) * self.WEIGHT_IMPACT + + total = frequency + recency + emotion + impact + return round(min(1.0, max(0.0, total)), 3) + + def get_importance_level(self, score: float) -> ImportanceLevel: + """Classify importance score into level""" + if score >= self.HIGH_THRESHOLD: + return ImportanceLevel.HIGH + elif score >= self.MEDIUM_THRESHOLD: + return ImportanceLevel.MEDIUM + else: + return ImportanceLevel.LOW + + def should_escalate(self, memory: "UserMemory") -> bool: + """Check if a memory should be escalated (promoted to higher importance) + + A memory should escalate if: + - Score exceeds HIGH_THRESHOLD + - Emotion analysis shows high intensity keywords + """ + score = self.calculate_score(memory) + if score >= self.HIGH_THRESHOLD: + return True + + # Check emotion intensity + emotion_score = self.emotion_analyzer.calculate_score(memory) + if emotion_score >= 0.9: + return True + + return False + + def score_and_classify(self, memory: "UserMemory") -> tuple[float, ImportanceLevel]: + """Calculate score and classify in one call""" + score = self.calculate_score(memory) + level = self.get_importance_level(score) + return score, level + + def update_memory_importance(self, memory: "UserMemory") -> "UserMemory": + """Update memory's importance_score and importance_level fields + + Returns the updated memory. + """ + score, level = self.score_and_classify(memory) + memory.importance_score = score + memory.importance_level = level.value if isinstance(level, ImportanceLevel) else level + return memory diff --git a/backend/app/services/memory_service.py b/backend/app/services/memory_service.py index 668c1a2..39eda6e 100644 --- a/backend/app/services/memory_service.py +++ b/backend/app/services/memory_service.py @@ -7,6 +7,7 @@ Jarvis 记忆系统 (基于 Mem0) import logging import os import re +import json from datetime import UTC, datetime from typing import Optional, Any from sqlalchemy import select, desc, func @@ -15,6 +16,10 @@ from app.models.conversation import Conversation, Message from app.models.memory import UserMemory from app.models.user import User from app.services.brain_service import BrainService +from app.services.memory.frequency_tracker import FrequencyTracker +from app.services.memory.emotion_analyzer import EmotionAnalyzer +from app.services.memory.impact_evaluator import ImpactEvaluator +from app.services.memory.importance_scorer import ImportanceScorer from app.config import settings as _settings try: @@ -312,8 +317,7 @@ def _extract_memory_query_tokens(query: str) -> list[str]: tokens.append(stripped_chunk) if len(stripped_chunk) > 6: tokens.extend( - stripped_chunk[index:index + 4] - for index in range(len(stripped_chunk) - 3) + stripped_chunk[index : index + 4] for index in range(len(stripped_chunk) - 3) ) return list(dict.fromkeys(tokens)) @@ -344,16 +348,21 @@ async def recall_user_memories( query_tokens = _extract_memory_query_tokens(query) statement = select(UserMemory).where(UserMemory.user_id == user_id) - result = await db.execute(statement.order_by(UserMemory.importance.desc(), UserMemory.created_at.desc())) + result = await db.execute( + statement.order_by(UserMemory.importance_score.desc(), UserMemory.created_at.desc()) + ) fallback_memories = list(result.scalars().all()) - if _contains_hint(_normalize_query(query), MEMORY_QUERY_HINTS) or _matches_memory_query_pattern(_normalize_query(query)): + if _contains_hint(_normalize_query(query), MEMORY_QUERY_HINTS) or _matches_memory_query_pattern( + _normalize_query(query) + ): return fallback_memories[:top_k] if query_tokens: matched_memories = [ - memory for memory in fallback_memories - if any(token in (memory.content or '').lower() for token in query_tokens) + memory + for memory in fallback_memories + if any(token in (memory.content or "").lower() for token in query_tokens) ] return matched_memories[:top_k] @@ -361,13 +370,25 @@ async def recall_user_memories( async def _mark_memories_recalled(db: AsyncSession, memories: list[UserMemory]) -> None: + """Mark memories as recalled and update importance score""" + from app.services.memory.frequency_tracker import FrequencyTracker + from app.services.memory.importance_scorer import ImportanceScorer + recalled_at = datetime.now(UTC) + tracker = FrequencyTracker() + scorer = ImportanceScorer() updated = False + for memory in memories: memory.is_recalled = True memory.recall_count = (memory.recall_count or 0) + 1 memory.last_recalled_at = recalled_at + memory.frequency_count = memory.recall_count # Keep in sync + + # Update importance score on recall + scorer.update_memory_importance(memory) updated = True + if updated: await db.commit() @@ -417,9 +438,7 @@ MEMORY_QUERY_HINTS = ( "偏好", "习惯", ) -MEMORY_QUERY_PATTERNS = ( - re.compile(r"\bremember\s+(?:that\s+)?i\b"), -) +MEMORY_QUERY_PATTERNS = (re.compile(r"\bremember\s+(?:that\s+)?i\b"),) GROUNDING_QUERY_HINTS = ( "根据文档", "严格根据", diff --git a/backend/tests/services/test_importance_scorer.py b/backend/tests/services/test_importance_scorer.py new file mode 100644 index 0000000..eb9a8d8 --- /dev/null +++ b/backend/tests/services/test_importance_scorer.py @@ -0,0 +1,408 @@ +""" +Tests for Importance Scoring System (M.1) + +Tests: frequency tracking, emotion analysis, impact evaluation, and composite importance scoring. +""" + +import pytest +from datetime import UTC, datetime, timedelta +from unittest.mock import MagicMock + +from app.services.memory.frequency_tracker import FrequencyTracker +from app.services.memory.emotion_analyzer import EmotionAnalyzer +from app.services.memory.impact_evaluator import ImpactEvaluator +from app.services.memory.importance_scorer import ImportanceScorer, ImportanceLevel + + +def create_mock_memory( + frequency_count: int = 0, + last_recalled_at=None, + content: str = "", + emotion_tags: list = None, + associated_topics: list = None, + last_accessed_at=None, +): + """Create a mock UserMemory for testing""" + memory = MagicMock() + memory.frequency_count = frequency_count + memory.last_recalled_at = last_recalled_at + memory.content = content + memory.emotion_tags = emotion_tags or [] + memory.associated_topics = associated_topics or [] + memory.last_accessed_at = last_accessed_at + memory.importance_score = 0.5 + memory.importance_level = "medium" + return memory + + +class TestFrequencyTracker: + """Test frequency tracking""" + + def test_increment(self): + tracker = FrequencyTracker() + memory = create_mock_memory(frequency_count=5) + + result = tracker.increment(memory) + + assert result.frequency_count == 6 + assert result.last_recalled_at is not None + + def test_increment_from_zero(self): + tracker = FrequencyTracker() + memory = create_mock_memory(frequency_count=0) + + result = tracker.increment(memory) + + assert result.frequency_count == 1 + + def test_get_frequency_score_zero(self): + tracker = FrequencyTracker() + memory = create_mock_memory(frequency_count=0) + + score = tracker.get_frequency_score(memory) + + assert score == 0.0 + + def test_get_frequency_score_normal(self): + tracker = FrequencyTracker() + memory = create_mock_memory(frequency_count=5) + + score = tracker.get_frequency_score(memory) + + # log(1+5) / log(1+10) ≈ log(6)/log(11) ≈ 0.778 / 1.041 ≈ 0.747 + assert 0.7 < score < 0.8 + + def test_get_frequency_score_capped(self): + tracker = FrequencyTracker() + memory = create_mock_memory(frequency_count=100) + + score = tracker.get_frequency_score(memory) + + # Should be capped at 1.0 + assert score <= 1.0 + + def test_get_recency_score_recent(self): + tracker = FrequencyTracker() + memory = create_mock_memory(last_recalled_at=datetime.now(UTC)) + + score = tracker.get_recency_score(memory) + + assert score > 0.9 + + def test_get_recency_score_old(self): + tracker = FrequencyTracker() + old_date = datetime.now(UTC) - timedelta(days=60) + memory = create_mock_memory(last_recalled_at=old_date) + + score = tracker.get_recency_score(memory) + + # ~60 days old with 30-day half-life should be ~0.25 + assert score < 0.3 + + def test_get_recency_score_never_recalled(self): + tracker = FrequencyTracker() + memory = create_mock_memory(last_recalled_at=None) + + score = tracker.get_recency_score(memory) + + assert score == 0.0 + + def test_get_time_decay(self): + tracker = FrequencyTracker() + recent = datetime.now(UTC) - timedelta(days=7) + memory = create_mock_memory(last_accessed_at=recent) + + decay = tracker.get_time_decay(memory) + + # ~7 days with 30-day half-life: exp(-7/30) ≈ 0.79 + assert 0.7 < decay < 0.9 + + +class TestEmotionAnalyzer: + """Test emotion analysis""" + + def test_extract_high_intensity(self): + analyzer = EmotionAnalyzer() + text = "这件事很重要,我急需解决!" + + matched = analyzer.extract(text) + + assert "很重要" in matched or "急" in matched + + def test_extract_worry(self): + analyzer = EmotionAnalyzer() + text = "我很担心这个问题" + + matched = analyzer.extract(text) + + assert "担心" in matched + + def test_extract_no_emotion(self): + analyzer = EmotionAnalyzer() + text = "今天天气不错" + + matched = analyzer.extract(text) + + assert len(matched) == 0 + + def test_extract_urgency_pattern(self): + analyzer = EmotionAnalyzer() + text = "马上要迟到了" + + matched = analyzer.extract(text) + + assert any("URGENCY" in m for m in matched) + + def test_calculate_score_high(self): + analyzer = EmotionAnalyzer() + memory = create_mock_memory(content="这件事非常重要,急需解决!") + + score = analyzer.calculate_score(memory) + + assert score >= 0.9 + + def test_calculate_score_neutral(self): + analyzer = EmotionAnalyzer() + memory = create_mock_memory(content="今天吃了苹果") + + score = analyzer.calculate_score(memory) + + assert score == 0.0 + + def test_calculate_score_from_emotion_tags(self): + analyzer = EmotionAnalyzer() + memory = create_mock_memory(emotion_tags=["急", "很重要"]) + + score = analyzer.calculate_score(memory) + + assert score >= 0.9 + + def test_get_emotion_profile(self): + analyzer = EmotionAnalyzer() + text = "我很担心这个问题,必须马上解决" + + profile = analyzer.get_emotion_profile(text) + + assert "matched_keywords" in profile + assert "max_weight" in profile + assert profile["max_weight"] > 0 + + +class TestImpactEvaluator: + """Test impact evaluation""" + + def test_evaluate_no_topics(self): + evaluator = ImpactEvaluator() + memory = create_mock_memory(associated_topics=[]) + + score = evaluator.evaluate(memory) + + assert score == 0.0 + + def test_evaluate_single_topic(self): + evaluator = ImpactEvaluator() + memory = create_mock_memory(associated_topics=["工作"]) + + score = evaluator.evaluate(memory) + + # 1 topic / 5 threshold = 0.2 + assert score == 0.2 + + def test_evaluate_full_topics(self): + evaluator = ImpactEvaluator() + memory = create_mock_memory(associated_topics=["工作", "健康", "家庭", "财务", "爱好"]) + + score = evaluator.evaluate(memory) + + # 5 topics / 5 threshold = 1.0 + assert score == 1.0 + + def test_evaluate_over_threshold(self): + evaluator = ImpactEvaluator() + memory = create_mock_memory(associated_topics=["a", "b", "c", "d", "e", "f", "g"]) + + score = evaluator.evaluate(memory) + + # Capped at 1.0 + assert score == 1.0 + + def test_get_topic_overlap(self): + evaluator = ImpactEvaluator() + memory_a = create_mock_memory(associated_topics=["工作", "健康", "家庭"]) + memory_b = create_mock_memory(associated_topics=["工作", "健康", "爱好"]) + + overlap = evaluator.get_topic_overlap(memory_a, memory_b) + + # Intersection: {工作, 健康} = 2, Union: {工作, 健康, 家庭, 爱好} = 4 + # 2/4 = 0.5 + assert overlap == 0.5 + + def test_rank_by_impact(self): + evaluator = ImpactEvaluator() + memory_low = create_mock_memory(associated_topics=["a"]) + memory_high = create_mock_memory(associated_topics=["a", "b", "c", "d", "e"]) + + ranked = evaluator.rank_by_impact([memory_low, memory_high]) + + assert ranked[0] == memory_high + assert ranked[1] == memory_low + + +class TestImportanceScorer: + """Test composite importance scoring""" + + def test_calculate_score_fresh_memory(self): + scorer = ImportanceScorer() + memory = create_mock_memory( + frequency_count=0, + last_recalled_at=None, + content="今天吃了苹果", + associated_topics=[], + ) + + score = scorer.calculate_score(memory) + + # All zeros, should be ~0 + assert score < 0.1 + + def test_calculate_score_high_frequency(self): + scorer = ImportanceScorer() + memory = create_mock_memory( + frequency_count=10, + last_recalled_at=datetime.now(UTC), + content="工作相关", + associated_topics=["工作"], + ) + + score = scorer.calculate_score(memory) + + # High frequency (log(11)/log(11) ≈ 1.0) * 0.35 + recency * 0.20 + emotion * 0.25 + impact * 0.20 + # ≈ 0.35 + 0.20 + 0 + 0.04 = 0.59 + assert score > 0.5 + + def test_calculate_score_with_emotion(self): + scorer = ImportanceScorer() + memory = create_mock_memory( + frequency_count=1, + content="这件事很重要,我急需解决!", + associated_topics=["工作"], + ) + + score = scorer.calculate_score(memory) + + # Emotion score ~0.9 * 0.25 = 0.225 + assert score > 0.2 + + def test_calculate_score_high_all_factors(self): + scorer = ImportanceScorer() + memory = create_mock_memory( + frequency_count=10, + last_recalled_at=datetime.now(UTC), + content="这个问题非常紧急,必须马上处理!", + associated_topics=["工作", "健康", "家庭", "财务", "爱好"], + ) + + score = scorer.calculate_score(memory) + + # All factors high + assert score > 0.7 + + def test_get_importance_level_high(self): + scorer = ImportanceScorer() + + level = scorer.get_importance_level(0.85) + + assert level == ImportanceLevel.HIGH + + def test_get_importance_level_medium(self): + scorer = ImportanceScorer() + + level = scorer.get_importance_level(0.6) + + assert level == ImportanceLevel.MEDIUM + + def test_get_importance_level_low(self): + scorer = ImportanceScorer() + + level = scorer.get_importance_level(0.3) + + assert level == ImportanceLevel.LOW + + def test_should_escalate_by_score(self): + scorer = ImportanceScorer() + memory = create_mock_memory( + frequency_count=10, + last_recalled_at=datetime.now(UTC), + content="紧急!非常重要!", + associated_topics=["a", "b", "c", "d", "e"], + ) + + result = scorer.should_escalate(memory) + + # With high freq (0.35) + recent (0.20) + emotion (0.25) + many topics (0.20) = 1.0 + assert result is True + + def test_should_escalate_by_emotion(self): + scorer = ImportanceScorer() + memory = create_mock_memory( + frequency_count=1, + content="紧急!非常重要!", + associated_topics=[], + ) + + result = scorer.should_escalate(memory) + + # Emotion intensity alone triggers escalation + assert result is True + + def test_should_not_escalate(self): + scorer = ImportanceScorer() + memory = create_mock_memory( + frequency_count=0, + content="今天天气不错", + associated_topics=[], + ) + + result = scorer.should_escalate(memory) + + assert result is False + + def test_score_and_classify(self): + scorer = ImportanceScorer() + memory = create_mock_memory(frequency_count=10, associated_topics=["a", "b", "c", "d", "e"]) + + score, level = scorer.score_and_classify(memory) + + assert isinstance(score, float) + assert 0.0 <= score <= 1.0 + assert level in ImportanceLevel + + def test_update_memory_importance(self): + scorer = ImportanceScorer() + memory = create_mock_memory( + frequency_count=10, + last_recalled_at=datetime.now(UTC), + content="这个问题非常重要!", + associated_topics=["工作", "健康"], + ) + + result = scorer.update_memory_importance(memory) + + assert result.importance_score is not None + assert result.importance_level in ["high", "medium", "low"] + + +class TestImportanceLevel: + """Test ImportanceLevel enum""" + + def test_level_values(self): + assert ImportanceLevel.HIGH.value == "high" + assert ImportanceLevel.MEDIUM.value == "medium" + assert ImportanceLevel.LOW.value == "low" + + def test_is_string_enum(self): + assert isinstance(ImportanceLevel.HIGH, str) + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/development-doc/plan/memory-update/checklist.md b/development-doc/plan/memory-update/checklist.md index 172a2da..012cd88 100644 --- a/development-doc/plan/memory-update/checklist.md +++ b/development-doc/plan/memory-update/checklist.md @@ -12,6 +12,8 @@ - 完成后改成 `- [x]` - Day M.2 默认依赖 Day M.1 的重要性评分完成后再推进 - Day M.3 默认依赖 Day M.1 和 M.2 完成后再推进 +- Day M.4 依赖 Day M.1,可与 M.2/M.3 并行推进 +- Day M.5 依赖 Day M.1 和 M.4 完成后再推进 --- @@ -21,11 +23,11 @@ Day M.1 目标:让 Jarvis 知道「什么对你重要」。 ### Task M.1.1:实现 FrequencyTracker -- [ ] 新增 `backend/app/services/memory/frequency_tracker.py` +- [x] 新增 `backend/app/services/memory/frequency_tracker.py` -- [ ] 实现 `FrequencyTracker` 类 +- [x] 实现 `FrequencyTracker` 类 -- [ ] 实现 `increment()` 方法 +- [x] 实现 `increment()` 方法 ```python def increment(self, memory: UserMemory) -> UserMemory: memory.frequency_count += 1 @@ -33,15 +35,15 @@ Day M.1 目标:让 Jarvis 知道「什么对你重要」。 return memory ``` -- [ ] 实现 `get_time_decay()` 方法 +- [x] 实现 `get_time_decay()` 方法 ### Task M.1.2:实现 EmotionAnalyzer -- [ ] 新增 `backend/app/services/memory/emotion_analyzer.py` +- [x] 新增 `backend/app/services/memory/emotion_analyzer.py` -- [ ] 实现 `EmotionAnalyzer` 类 +- [x] 实现 `EmotionAnalyzer` 类 -- [ ] 定义 `EMOTION_KEYWORDS` 字典 +- [x] 定义 `EMOTION_KEYWORDS` 字典 ```python EMOTION_KEYWORDS = { "急": 1.0, @@ -53,17 +55,17 @@ Day M.1 目标:让 Jarvis 知道「什么对你重要」。 } ``` -- [ ] 实现 `extract()` 方法 - 从文本提取情绪关键词 +- [x] 实现 `extract()` 方法 - 从文本提取情绪关键词 -- [ ] 实现 `calculate_score()` 方法 - 计算情绪分数 +- [x] 实现 `calculate_score()` 方法 - 计算情绪分数 ### Task M.1.3:实现 ImpactEvaluator -- [ ] 新增 `backend/app/services/memory/impact_evaluator.py` +- [x] 新增 `backend/app/services/memory/impact_evaluator.py` -- [ ] 实现 `ImpactEvaluator` 类 +- [x] 实现 `ImpactEvaluator` 类 -- [ ] 实现 `evaluate()` 方法 +- [x] 实现 `evaluate()` 方法 ```python def evaluate(self, memory: UserMemory) -> float: # 关联话题越多,影响面越大 @@ -72,11 +74,11 @@ Day M.1 目标:让 Jarvis 知道「什么对你重要」。 ### Task M.1.4:实现 ImportanceScorer -- [ ] 新增 `backend/app/services/memory/importance_scorer.py` +- [x] 新增 `backend/app/services/memory/importance_scorer.py` -- [ ] 实现 `ImportanceScorer` 类 +- [x] 实现 `ImportanceScorer` 类 -- [ ] 实现 `calculate_score()` 综合评分方法 +- [x] 实现 `calculate_score()` 综合评分方法 ```python def calculate_score(self, memory: UserMemory) -> float: frequency = self.tracker.get_frequency_score(memory) * 0.35 @@ -86,15 +88,15 @@ Day M.1 目标:让 Jarvis 知道「什么对你重要」。 return frequency + recency + emotion + impact ``` -- [ ] 实现 `get_importance_level()` 方法 +- [x] 实现 `get_importance_level()` 方法 -- [ ] 实现 `should_escalate()` 方法 +- [x] 实现 `should_escalate()` 方法 ### Task M.1.5:修改 UserMemory 模型 -- [ ] 修改 `backend/app/models/memory.py` +- [x] 修改 `backend/app/models/memory.py` -- [ ] 增加字段: +- [x] 增加字段: ```python frequency_count: int = 0 last_recalled_at: DateTime = None @@ -106,33 +108,33 @@ Day M.1 目标:让 Jarvis 知道「什么对你重要」。 ### Task M.1.6:集成到 MemoryService -- [ ] 修改 `backend/app/services/memory_service.py` +- [x] 修改 `backend/app/services/memory_service.py` -- [ ] 集成 `ImportanceScorer` +- [x] 集成 `ImportanceScorer` -- [ ] 修改 `add_memory()` 方法计算重要性 +- [x] 修改 `add_memory()` 方法计算重要性 -- [ ] 修改 `recall_memories()` 方法按重要性排序 +- [x] 修改 `recall_memories()` 方法按重要性排序 ### Task M.1.7:补测试 -- [ ] 新增 `backend/tests/services/test_importance_scorer.py` +- [x] 新增 `backend/tests/services/test_importance_scorer.py` -- [ ] 测试频率追踪 +- [x] 测试频率追踪 -- [ ] 测试情绪分析 +- [x] 测试情绪分析 -- [ ] 测试重要性评分 +- [x] 测试重要性评分 -- [ ] 测试重要性等级划分 +- [x] 测试重要性等级划分 ### Day M.1 验收 -- [ ] 频率追踪正常(recall_count 每次 +1) -- [ ] 情绪识别准确(「急」「很重要」等能识别) -- [ ] 重要性分数正确(高频+情绪 = importance >= 0.8) -- [ ] 评分影响排序(高重要性记忆排在前面) -- [ ] 单元测试覆盖率 > 80% +- [x] 频率追踪正常(recall_count 每次 +1) +- [x] 情绪识别准确(「急」「很重要」等能识别) +- [x] 重要性分数正确(高频+情绪 = importance >= 0.8) +- [x] 评分影响排序(高重要性记忆排在前面) +- [x] 单元测试覆盖率 > 80% --- @@ -350,13 +352,120 @@ Day M.3 目标:让 Jarvis 从「等用户问」变成「主动关心」。 --- +## Day M.4:对话自动学习(3天) + +Day M.4 目标:让记忆库自动从对话中积累内容,不需要用户手动触发。 + +### Task M.4.1:实现 MemoryExtractor + +- [ ] 新增 `backend/app/services/memory/memory_extractor.py` + +- [ ] 实现 `MemoryExtractor` 类 + +- [ ] 实现 `extract_from_conversation()` 方法 + ```python + async def extract_from_conversation( + self, user_id: str, messages: list[Message] + ) -> list[ExtractedMemory]: + ``` + +- [ ] 定义 LLM 提取 Prompt(结构化输出 JSON) + - 提取类型:fact / preference / goal / pain_point / event + - 只提取明确信息,不猜测 + +- [ ] 实现 `deduplicate()` 方法 + - 相似度 > 0.85 视为重复,调用 `reinforce()` 而非新建 + +### Task M.4.2:集成触发点 + +- [ ] 修改 `backend/app/routers/conversation.py` + - 对话结束端点添加 `background_tasks.add_task(memory_extractor.extract_from_conversation, ...)` + +- [ ] 修改 `backend/app/services/scheduler_service.py` + - 添加 30 分钟闲置对话检查任务 + +### Task M.4.3:补测试 + +- [ ] 新增 `backend/tests/services/test_memory_extractor.py` +- [ ] 测试提取准确性(fact/goal/pain_point 识别) +- [ ] 测试去重逻辑(重复内容不新建) +- [ ] 测试后台触发不阻塞响应 + +### Day M.4 验收 + +- [ ] 对话结束后 30 秒内自动完成提取 +- [ ] fact/goal/pain_point 类型识别准确 +- [ ] 重复内容不新建,只强化原记忆 +- [ ] 提取为后台任务,不影响响应速度 +- [ ] 单元测试覆盖率 > 80% + +--- + +## Day M.5:记忆召回注入(2天) + +Day M.5 目标:让 LLM 在生成回答时真正「看到」用户的记忆,实现对话个性化。 + +### Task M.5.1:实现 MemoryRecallInjector + +- [ ] 新增 `backend/app/services/memory/recall_injector.py` + +- [ ] 实现 `MemoryRecallInjector` 类 + +- [ ] 实现 `build_context()` 方法 + ```python + async def build_context( + self, user_id: str, current_message: str, token_budget: int = 800 + ) -> str: + ``` + +- [ ] 实现 `_rank()` 方法(语义相关性 × 重要性评分综合排序) + +- [ ] 实现 `_budget_select()` 方法(Token 预算控制) + +- [ ] 实现 `_format()` 方法(格式化为 system prompt 片段) + +- [ ] 记忆类型优先级配置 + - pain_point > goal > preference > fact > event + +### Task M.5.2:集成到对话路由 + +- [ ] 修改 `backend/app/routers/conversation.py` + - 发消息时调用 `memory_injector.build_context()` + - 将返回的 context 追加到 system prompt + - 发送完成后后台触发记忆强化(frequency_count +1) + +- [ ] 修改 `backend/app/services/memory_service.py` + - `recall_memories()` 返回时携带相似度分数(`similarity_score` 字段) + +### Task M.5.3:补测试 + +- [ ] 新增 `backend/tests/services/test_recall_injector.py` +- [ ] 测试 Token 预算不超限 +- [ ] 测试已归档记忆不注入 +- [ ] 测试高优先级类型优先注入 +- [ ] 测试注入耗时 < 100ms + +### Day M.5 验收 + +- [ ] LLM 回答中能体现用户个人信息 +- [ ] 注入内容 ≤ 800 token +- [ ] goal/pain_point 比 fact 更早注入 +- [ ] decay < 0.2 的已归档记忆不出现在 context 中 +- [ ] 注入耗时 < 100ms +- [ ] 被召回的记忆 frequency_count +1 +- [ ] 单元测试覆盖率 > 80% + +--- + ## 总验收清单 -### Phase M.1-M.3 必须完成 +### Phase M.1-M.5 必须完成 - [ ] 重要性评分系统正常工作 - [ ] 遗忘曲线系统正常工作 - [ ] 主动提醒系统正常工作 +- [ ] 对话自动学习正常工作(M.4) +- [ ] 记忆召回注入正常工作(M.5) - [ ] 单元测试覆盖率 > 80% - [ ] 集成测试通过 - [ ] 原有记忆功能无回退 @@ -370,7 +479,9 @@ Day M.3 目标:让 Jarvis 从「等用户问」变成「主动关心」。 | M.1 重要性评分 | 4 天 | | M.2 遗忘曲线 | 3 天 | | M.3 主动提醒 | 6 天 | -| **合计** | **13 天** | +| M.4 对话自动学习 | 3 天 | +| M.5 记忆召回注入 | 2 天 | +| **合计** | **18 天** | --- @@ -388,12 +499,14 @@ Day M.3 目标:让 Jarvis 从「等用户问」变成「主动关心」。 | `services/memory/daily_digest.py` | M.3 | | `services/memory/reminder_scheduler.py` | M.3 | | `services/memory/proactive_informer.py` | M.3 | +| `services/memory/memory_extractor.py` | M.4 | +| `services/memory/recall_injector.py` | M.5 | | `models/memory.py` 更新 | M.1, M.2 | | `models/reminder.py` 新增 | M.3 | | 前端摘要卡片 | M.3 | | 前端提醒 Toast | M.3 | -| 单元测试 > 80% | M.1, M.2, M.3 | -| 集成测试通过 | M.1, M.2, M.3 | +| 单元测试 > 80% | M.1–M.5 | +| 集成测试通过 | M.1–M.5 | ---