- Add FrequencyTracker: increment(), get_frequency_score(), get_recency_score(), get_time_decay() - Add EmotionAnalyzer: EMOTION_KEYWORDS dict, extract(), calculate_score(), get_emotion_profile() - Add ImpactEvaluator: evaluate(), get_topic_overlap(), rank_by_impact() - Add ImportanceScorer: composite scoring (freq 35% + recency 20% + emotion 25% + impact 20%) - Update UserMemory model: frequency_count, emotion_tags, importance_score, importance_level, associated_topics - Integrate ImportanceScorer into memory_service.py (recall + importance update) - Add 37 tests for all memory scoring components - Fix urgency patterns: remove overly broad '今天' that matched neutral text - Update memory-update checklist: mark all M.1 tasks complete
150 lines
4.5 KiB
Python
150 lines
4.5 KiB
Python
"""
|
|
EmotionAnalyzer
|
|
|
|
Extracts emotional intensity from text and calculates emotion-based importance scores.
|
|
"""
|
|
|
|
import re
|
|
from typing import TYPE_CHECKING
|
|
|
|
if TYPE_CHECKING:
|
|
from app.models.memory import UserMemory
|
|
|
|
|
|
class EmotionAnalyzer:
|
|
"""Analyze emotional keywords in memory content"""
|
|
|
|
# Emotion keyword weights (higher = more important)
|
|
EMOTION_KEYWORDS = {
|
|
# High intensity
|
|
"急": 1.0,
|
|
"紧急": 1.0,
|
|
"很重要": 0.9,
|
|
"非常重要": 1.0,
|
|
"必须": 0.9,
|
|
# Medium-high intensity
|
|
"困扰": 0.8,
|
|
"烦恼": 0.7,
|
|
"担心": 0.7,
|
|
"焦虑": 0.8,
|
|
"害怕": 0.8,
|
|
"恐惧": 0.9,
|
|
# Medium intensity
|
|
"想解决": 0.6,
|
|
"希望": 0.5,
|
|
"想要": 0.4,
|
|
"需要": 0.4,
|
|
"渴望": 0.6,
|
|
# Low intensity (casual/neutral)
|
|
"无所谓": 0.1,
|
|
"随便": 0.1,
|
|
"都行": 0.1,
|
|
"还好": 0.2,
|
|
# Negative valence
|
|
"讨厌": 0.6,
|
|
"不喜欢": 0.5,
|
|
"恨": 0.8,
|
|
"不喜欢": 0.5,
|
|
# Positive valence
|
|
"喜欢": 0.5,
|
|
"爱": 0.7,
|
|
"开心": 0.4,
|
|
"高兴": 0.4,
|
|
}
|
|
|
|
# Urgency patterns — only match when there's an explicit time-bound word
|
|
# NOTE: "今天" alone is too common and matches neutral sentences like "今天天气不错"
|
|
URGENCY_PATTERNS = [
|
|
(re.compile(r"马上|立刻|立即|赶紧"), 1.0),
|
|
(re.compile(r"今天内|今天必须|今日必须"), 0.8),
|
|
(re.compile(r"明天|明天之前|明日"), 0.6),
|
|
(re.compile(r"这周|本周"), 0.4),
|
|
(re.compile(r"尽快|早点"), 0.7),
|
|
]
|
|
|
|
def extract(self, text: str) -> list[str]:
|
|
"""Extract emotion keywords from text
|
|
|
|
Returns:
|
|
List of matched emotion keywords
|
|
"""
|
|
if not text:
|
|
return []
|
|
|
|
matched = []
|
|
text_lower = text.lower()
|
|
|
|
for keyword, weight in self.EMOTION_KEYWORDS.items():
|
|
if keyword in text_lower:
|
|
matched.append(keyword)
|
|
|
|
# Check urgency patterns
|
|
for pattern, weight in self.URGENCY_PATTERNS:
|
|
if pattern.search(text):
|
|
matched.append(f"[URGENCY:{weight}]")
|
|
|
|
return matched
|
|
|
|
def calculate_score(self, memory: "UserMemory") -> float:
|
|
"""Calculate emotion-based importance score (0.0 - 1.0)
|
|
|
|
Uses the highest-weighted emotion keyword found in the content.
|
|
"""
|
|
content = memory.content or ""
|
|
emotion_tags = memory.emotion_tags or []
|
|
|
|
# Check emotion_tags first (pre-extracted)
|
|
if emotion_tags:
|
|
max_weight = 0.0
|
|
for tag in emotion_tags:
|
|
if tag in self.EMOTION_KEYWORDS:
|
|
max_weight = max(max_weight, self.EMOTION_KEYWORDS[tag])
|
|
if max_weight > 0:
|
|
return max_weight
|
|
|
|
# Extract from content
|
|
matched = self.extract(content)
|
|
if not matched:
|
|
return 0.0
|
|
|
|
# Get highest weight
|
|
max_weight = 0.0
|
|
for keyword in matched:
|
|
if keyword.startswith("[URGENCY:"):
|
|
# Extract urgency weight
|
|
try:
|
|
weight = float(keyword.split(":")[1].rstrip("]"))
|
|
max_weight = max(max_weight, weight)
|
|
except (ValueError, IndexError):
|
|
pass
|
|
elif keyword in self.EMOTION_KEYWORDS:
|
|
max_weight = max(max_weight, self.EMOTION_KEYWORDS[keyword])
|
|
|
|
return min(1.0, max_weight)
|
|
|
|
def get_emotion_profile(self, text: str) -> dict:
|
|
"""Get detailed emotion profile for text
|
|
|
|
Returns:
|
|
Dict with matched keywords, max_weight, and sentiment
|
|
"""
|
|
matched = self.extract(text)
|
|
weights = []
|
|
for keyword in matched:
|
|
if keyword.startswith("[URGENCY:"):
|
|
try:
|
|
weights.append(float(keyword.split(":")[1].rstrip("]")))
|
|
except (ValueError, IndexError):
|
|
pass
|
|
elif keyword in self.EMOTION_KEYWORDS:
|
|
weights.append(self.EMOTION_KEYWORDS[keyword])
|
|
|
|
return {
|
|
"matched_keywords": matched,
|
|
"max_weight": max(weights) if weights else 0.0,
|
|
"avg_weight": sum(weights) / len(weights) if weights else 0.0,
|
|
"sentiment": "positive"
|
|
if (weights and sum(weights) / len(weights) > 0.5)
|
|
else "neutral",
|
|
}
|