feat(memory): Day M.1 complete - importance scoring system

- Add FrequencyTracker: increment(), get_frequency_score(), get_recency_score(), get_time_decay() - Add EmotionAnalyzer: EMOTION_KEYWORDS dict, extract(), calculate_score(), get_emotion_profile() - Add ImpactEvaluator: evaluate(), get_topic_overlap(), rank_by_impact() - Add ImportanceScorer: composite scoring (freq 35% + recency 20% + emotion 25% + impact 20%) - Update UserMemory model: frequency_count, emotion_tags, importance_score, importance_level, associated_topics - Integrate ImportanceScorer into memory_service.py (recall + importance update) - Add 37 tests for all memory scoring components - Fix urgency patterns: remove overly broad '今天' that matched neutral text - Update memory-update checklist: mark all M.1 tasks complete
2026-04-05 13:22:23 +08:00
parent bfe3b6bb9d
commit 9bfa0dcc11
9 changed files with 1016 additions and 54 deletions
--- a/backend/app/services/memory/emotion_analyzer.py
+++ b/backend/app/services/memory/emotion_analyzer.py
@@ -0,0 +1,149 @@
+"""
+EmotionAnalyzer
+
+Extracts emotional intensity from text and calculates emotion-based importance scores.
+"""
+
+import re
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from app.models.memory import UserMemory
+
+
+class EmotionAnalyzer:
+    """Analyze emotional keywords in memory content"""
+
+    # Emotion keyword weights (higher = more important)
+    EMOTION_KEYWORDS = {
+        # High intensity
+        "急": 1.0,
+        "紧急": 1.0,
+        "很重要": 0.9,
+        "非常重要": 1.0,
+        "必须": 0.9,
+        # Medium-high intensity
+        "困扰": 0.8,
+        "烦恼": 0.7,
+        "担心": 0.7,
+        "焦虑": 0.8,
+        "害怕": 0.8,
+        "恐惧": 0.9,
+        # Medium intensity
+        "想解决": 0.6,
+        "希望": 0.5,
+        "想要": 0.4,
+        "需要": 0.4,
+        "渴望": 0.6,
+        # Low intensity (casual/neutral)
+        "无所谓": 0.1,
+        "随便": 0.1,
+        "都行": 0.1,
+        "还好": 0.2,
+        # Negative valence
+        "讨厌": 0.6,
+        "不喜欢": 0.5,
+        "恨": 0.8,
+        "不喜欢": 0.5,
+        # Positive valence
+        "喜欢": 0.5,
+        "爱": 0.7,
+        "开心": 0.4,
+        "高兴": 0.4,
+    }
+
+    # Urgency patterns — only match when there's an explicit time-bound word
+    # NOTE: "今天" alone is too common and matches neutral sentences like "今天天气不错"
+    URGENCY_PATTERNS = [
+        (re.compile(r"马上|立刻|立即|赶紧"), 1.0),
+        (re.compile(r"今天内|今天必须|今日必须"), 0.8),
+        (re.compile(r"明天|明天之前|明日"), 0.6),
+        (re.compile(r"这周|本周"), 0.4),
+        (re.compile(r"尽快|早点"), 0.7),
+    ]
+
+    def extract(self, text: str) -> list[str]:
+        """Extract emotion keywords from text
+
+        Returns:
+            List of matched emotion keywords
+        """
+        if not text:
+            return []
+
+        matched = []
+        text_lower = text.lower()
+
+        for keyword, weight in self.EMOTION_KEYWORDS.items():
+            if keyword in text_lower:
+                matched.append(keyword)
+
+        # Check urgency patterns
+        for pattern, weight in self.URGENCY_PATTERNS:
+            if pattern.search(text):
+                matched.append(f"[URGENCY:{weight}]")
+
+        return matched
+
+    def calculate_score(self, memory: "UserMemory") -> float:
+        """Calculate emotion-based importance score (0.0 - 1.0)
+
+        Uses the highest-weighted emotion keyword found in the content.
+        """
+        content = memory.content or ""
+        emotion_tags = memory.emotion_tags or []
+
+        # Check emotion_tags first (pre-extracted)
+        if emotion_tags:
+            max_weight = 0.0
+            for tag in emotion_tags:
+                if tag in self.EMOTION_KEYWORDS:
+                    max_weight = max(max_weight, self.EMOTION_KEYWORDS[tag])
+            if max_weight > 0:
+                return max_weight
+
+        # Extract from content
+        matched = self.extract(content)
+        if not matched:
+            return 0.0
+
+        # Get highest weight
+        max_weight = 0.0
+        for keyword in matched:
+            if keyword.startswith("[URGENCY:"):
+                # Extract urgency weight
+                try:
+                    weight = float(keyword.split(":")[1].rstrip("]"))
+                    max_weight = max(max_weight, weight)
+                except (ValueError, IndexError):
+                    pass
+            elif keyword in self.EMOTION_KEYWORDS:
+                max_weight = max(max_weight, self.EMOTION_KEYWORDS[keyword])
+
+        return min(1.0, max_weight)
+
+    def get_emotion_profile(self, text: str) -> dict:
+        """Get detailed emotion profile for text
+
+        Returns:
+            Dict with matched keywords, max_weight, and sentiment
+        """
+        matched = self.extract(text)
+        weights = []
+        for keyword in matched:
+            if keyword.startswith("[URGENCY:"):
+                try:
+                    weights.append(float(keyword.split(":")[1].rstrip("]")))
+                except (ValueError, IndexError):
+                    pass
+            elif keyword in self.EMOTION_KEYWORDS:
+                weights.append(self.EMOTION_KEYWORDS[keyword])
+
+        return {
+            "matched_keywords": matched,
+            "max_weight": max(weights) if weights else 0.0,
+            "avg_weight": sum(weights) / len(weights) if weights else 0.0,
+            "sentiment": "positive"
+            if (weights and sum(weights) / len(weights) > 0.5)
+            else "neutral",
+        }