feat(memory): Day M.1 complete - importance scoring system

- Add FrequencyTracker: increment(), get_frequency_score(), get_recency_score(), get_time_decay()
- Add EmotionAnalyzer: EMOTION_KEYWORDS dict, extract(), calculate_score(), get_emotion_profile()
- Add ImpactEvaluator: evaluate(), get_topic_overlap(), rank_by_impact()
- Add ImportanceScorer: composite scoring (freq 35% + recency 20% + emotion 25% + impact 20%)
- Update UserMemory model: frequency_count, emotion_tags, importance_score, importance_level, associated_topics
- Integrate ImportanceScorer into memory_service.py (recall + importance update)
- Add 37 tests for all memory scoring components
- Fix urgency patterns: remove overly broad '今天' that matched neutral text
- Update memory-update checklist: mark all M.1 tasks complete
This commit is contained in:
2026-04-05 13:22:23 +08:00
parent bfe3b6bb9d
commit 9bfa0dcc11
9 changed files with 1016 additions and 54 deletions

View File

@@ -0,0 +1,13 @@
"""Memory Services Module"""
from app.services.memory.frequency_tracker import FrequencyTracker
from app.services.memory.emotion_analyzer import EmotionAnalyzer
from app.services.memory.impact_evaluator import ImpactEvaluator
from app.services.memory.importance_scorer import ImportanceScorer
__all__ = [
"FrequencyTracker",
"EmotionAnalyzer",
"ImpactEvaluator",
"ImportanceScorer",
]

View File

@@ -0,0 +1,149 @@
"""
EmotionAnalyzer
Extracts emotional intensity from text and calculates emotion-based importance scores.
"""
import re
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from app.models.memory import UserMemory
class EmotionAnalyzer:
"""Analyze emotional keywords in memory content"""
# Emotion keyword weights (higher = more important)
EMOTION_KEYWORDS = {
# High intensity
"": 1.0,
"紧急": 1.0,
"很重要": 0.9,
"非常重要": 1.0,
"必须": 0.9,
# Medium-high intensity
"困扰": 0.8,
"烦恼": 0.7,
"担心": 0.7,
"焦虑": 0.8,
"害怕": 0.8,
"恐惧": 0.9,
# Medium intensity
"想解决": 0.6,
"希望": 0.5,
"想要": 0.4,
"需要": 0.4,
"渴望": 0.6,
# Low intensity (casual/neutral)
"无所谓": 0.1,
"随便": 0.1,
"都行": 0.1,
"还好": 0.2,
# Negative valence
"讨厌": 0.6,
"不喜欢": 0.5,
"": 0.8,
"不喜欢": 0.5,
# Positive valence
"喜欢": 0.5,
"": 0.7,
"开心": 0.4,
"高兴": 0.4,
}
# Urgency patterns — only match when there's an explicit time-bound word
# NOTE: "今天" alone is too common and matches neutral sentences like "今天天气不错"
URGENCY_PATTERNS = [
(re.compile(r"马上|立刻|立即|赶紧"), 1.0),
(re.compile(r"今天内|今天必须|今日必须"), 0.8),
(re.compile(r"明天|明天之前|明日"), 0.6),
(re.compile(r"这周|本周"), 0.4),
(re.compile(r"尽快|早点"), 0.7),
]
def extract(self, text: str) -> list[str]:
"""Extract emotion keywords from text
Returns:
List of matched emotion keywords
"""
if not text:
return []
matched = []
text_lower = text.lower()
for keyword, weight in self.EMOTION_KEYWORDS.items():
if keyword in text_lower:
matched.append(keyword)
# Check urgency patterns
for pattern, weight in self.URGENCY_PATTERNS:
if pattern.search(text):
matched.append(f"[URGENCY:{weight}]")
return matched
def calculate_score(self, memory: "UserMemory") -> float:
"""Calculate emotion-based importance score (0.0 - 1.0)
Uses the highest-weighted emotion keyword found in the content.
"""
content = memory.content or ""
emotion_tags = memory.emotion_tags or []
# Check emotion_tags first (pre-extracted)
if emotion_tags:
max_weight = 0.0
for tag in emotion_tags:
if tag in self.EMOTION_KEYWORDS:
max_weight = max(max_weight, self.EMOTION_KEYWORDS[tag])
if max_weight > 0:
return max_weight
# Extract from content
matched = self.extract(content)
if not matched:
return 0.0
# Get highest weight
max_weight = 0.0
for keyword in matched:
if keyword.startswith("[URGENCY:"):
# Extract urgency weight
try:
weight = float(keyword.split(":")[1].rstrip("]"))
max_weight = max(max_weight, weight)
except (ValueError, IndexError):
pass
elif keyword in self.EMOTION_KEYWORDS:
max_weight = max(max_weight, self.EMOTION_KEYWORDS[keyword])
return min(1.0, max_weight)
def get_emotion_profile(self, text: str) -> dict:
"""Get detailed emotion profile for text
Returns:
Dict with matched keywords, max_weight, and sentiment
"""
matched = self.extract(text)
weights = []
for keyword in matched:
if keyword.startswith("[URGENCY:"):
try:
weights.append(float(keyword.split(":")[1].rstrip("]")))
except (ValueError, IndexError):
pass
elif keyword in self.EMOTION_KEYWORDS:
weights.append(self.EMOTION_KEYWORDS[keyword])
return {
"matched_keywords": matched,
"max_weight": max(weights) if weights else 0.0,
"avg_weight": sum(weights) / len(weights) if weights else 0.0,
"sentiment": "positive"
if (weights and sum(weights) / len(weights) > 0.5)
else "neutral",
}

View File

@@ -0,0 +1,84 @@
"""
FrequencyTracker
Tracks how often a memory is recalled and calculates frequency/recency scores.
"""
from datetime import UTC, datetime
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from app.models.memory import UserMemory
class FrequencyTracker:
"""Track and score memory recall frequency"""
# Score weights
MAX_FREQUENCY = 10 # Cap frequency count for scoring
RECENCY_DECAY_DAYS = 30 # After 30 days, recency score drops significantly
def increment(self, memory: "UserMemory") -> "UserMemory":
"""Increment recall count and update last recalled timestamp"""
memory.frequency_count = (memory.frequency_count or 0) + 1
memory.last_recalled_at = datetime.now(UTC)
return memory
def get_frequency_score(self, memory: "UserMemory") -> float:
"""Calculate normalized frequency score (0.0 - 1.0)
Uses logarithmic scaling to prevent high-frequency memories
from dominating completely.
"""
count = memory.frequency_count or 0
if count == 0:
return 0.0
# Logarithmic scaling: more recalls have diminishing returns
# log(1+x) / log(1+MAX) gives 0-1 range
import math
score = math.log(1 + count) / math.log(1 + self.MAX_FREQUENCY)
return min(1.0, max(0.0, score))
def get_recency_score(self, memory: "UserMemory") -> float:
"""Calculate recency score (0.0 - 1.0)
Memory recalled recently scores higher. Uses exponential decay.
"""
last_recalled = memory.last_recalled_at
if last_recalled is None:
return 0.0
now = datetime.now(UTC)
if isinstance(last_recalled, datetime):
if last_recalled.tzinfo is None:
last_recalled = last_recalled.replace(tzinfo=UTC)
days_since = (now - last_recalled).total_seconds() / 86400
else:
days_since = self.RECENCY_DECAY_DAYS
# Exponential decay: half-life of RECENCY_DECAY_DAYS
import math
decay = math.exp(-days_since / self.RECENCY_DECAY_DAYS)
return min(1.0, max(0.0, decay))
def get_time_decay(self, memory: "UserMemory") -> float:
"""Calculate time-based decay factor for forgetting curve"""
last_accessed = getattr(memory, "last_accessed_at", None)
if last_accessed is None:
last_accessed = memory.last_recalled_at
if last_accessed is None:
return 1.0
now = datetime.now(UTC)
if isinstance(last_accessed, datetime):
if last_accessed.tzinfo is None:
last_accessed = last_accessed.replace(tzinfo=UTC)
days_since = (now - last_accessed).total_seconds() / 86400
else:
days_since = 0
import math
return math.exp(-days_since / self.RECENCY_DECAY_DAYS)

View File

@@ -0,0 +1,52 @@
"""
ImpactEvaluator
Evaluates the breadth of impact a memory has based on associated topics.
"""
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from app.models.memory import UserMemory
class ImpactEvaluator:
"""Evaluate the impact breadth of a memory"""
# Threshold for maximum impact score
IMPACT_THRESHOLD = 5 # Number of associated topics for max impact
def evaluate(self, memory: "UserMemory") -> float:
"""Calculate impact score (0.0 - 1.0)
The more associated topics a memory has, the higher its impact.
Topics represent "what this memory is about" — if it touches
many aspects of the user's life, it has high impact.
"""
associated_topics = memory.associated_topics or []
if not associated_topics:
return 0.0
# Normalize: IMPACT_THRESHOLD topics = full impact (1.0)
raw_score = len(associated_topics) / self.IMPACT_THRESHOLD
return min(1.0, raw_score)
def get_topic_overlap(self, memory_a: "UserMemory", memory_b: "UserMemory") -> float:
"""Calculate topic overlap between two memories (0.0 - 1.0)
Used for finding related memories.
"""
topics_a = set(memory_a.associated_topics or [])
topics_b = set(memory_b.associated_topics or [])
if not topics_a or not topics_b:
return 0.0
intersection = topics_a & topics_b
union = topics_a | topics_b
return len(intersection) / len(union) if union else 0.0
def rank_by_impact(self, memories: list["UserMemory"]) -> list["UserMemory"]:
"""Rank memories by impact score (descending)"""
return sorted(memories, key=lambda m: self.evaluate(m), reverse=True)

View File

@@ -0,0 +1,103 @@
"""
ImportanceScorer
Composite importance scoring combining frequency, recency, emotion, and impact.
"""
from enum import Enum
from typing import TYPE_CHECKING
from app.services.memory.frequency_tracker import FrequencyTracker
from app.services.memory.emotion_analyzer import EmotionAnalyzer
from app.services.memory.impact_evaluator import ImpactEvaluator
if TYPE_CHECKING:
from app.models.memory import UserMemory
class ImportanceLevel(str, Enum):
"""Importance level classification"""
HIGH = "high"
MEDIUM = "medium"
LOW = "low"
class ImportanceScorer:
"""Calculate composite importance score for memories
Score formula:
frequency_score * 0.35 +
recency_score * 0.20 +
emotion_score * 0.25 +
impact_score * 0.20
"""
# Score weights
WEIGHT_FREQUENCY = 0.35
WEIGHT_RECENCY = 0.20
WEIGHT_EMOTION = 0.25
WEIGHT_IMPACT = 0.20
# Escalation threshold
HIGH_THRESHOLD = 0.8
MEDIUM_THRESHOLD = 0.5
LOW_THRESHOLD = 0.0
def __init__(self):
self.tracker = FrequencyTracker()
self.emotion_analyzer = EmotionAnalyzer()
self.impact_evaluator = ImpactEvaluator()
def calculate_score(self, memory: "UserMemory") -> float:
"""Calculate composite importance score (0.0 - 1.0)"""
frequency = self.tracker.get_frequency_score(memory) * self.WEIGHT_FREQUENCY
recency = self.tracker.get_recency_score(memory) * self.WEIGHT_RECENCY
emotion = self.emotion_analyzer.calculate_score(memory) * self.WEIGHT_EMOTION
impact = self.impact_evaluator.evaluate(memory) * self.WEIGHT_IMPACT
total = frequency + recency + emotion + impact
return round(min(1.0, max(0.0, total)), 3)
def get_importance_level(self, score: float) -> ImportanceLevel:
"""Classify importance score into level"""
if score >= self.HIGH_THRESHOLD:
return ImportanceLevel.HIGH
elif score >= self.MEDIUM_THRESHOLD:
return ImportanceLevel.MEDIUM
else:
return ImportanceLevel.LOW
def should_escalate(self, memory: "UserMemory") -> bool:
"""Check if a memory should be escalated (promoted to higher importance)
A memory should escalate if:
- Score exceeds HIGH_THRESHOLD
- Emotion analysis shows high intensity keywords
"""
score = self.calculate_score(memory)
if score >= self.HIGH_THRESHOLD:
return True
# Check emotion intensity
emotion_score = self.emotion_analyzer.calculate_score(memory)
if emotion_score >= 0.9:
return True
return False
def score_and_classify(self, memory: "UserMemory") -> tuple[float, ImportanceLevel]:
"""Calculate score and classify in one call"""
score = self.calculate_score(memory)
level = self.get_importance_level(score)
return score, level
def update_memory_importance(self, memory: "UserMemory") -> "UserMemory":
"""Update memory's importance_score and importance_level fields
Returns the updated memory.
"""
score, level = self.score_and_classify(memory)
memory.importance_score = score
memory.importance_level = level.value if isinstance(level, ImportanceLevel) else level
return memory

View File

@@ -7,6 +7,7 @@ Jarvis 记忆系统 (基于 Mem0)
import logging
import os
import re
import json
from datetime import UTC, datetime
from typing import Optional, Any
from sqlalchemy import select, desc, func
@@ -15,6 +16,10 @@ from app.models.conversation import Conversation, Message
from app.models.memory import UserMemory
from app.models.user import User
from app.services.brain_service import BrainService
from app.services.memory.frequency_tracker import FrequencyTracker
from app.services.memory.emotion_analyzer import EmotionAnalyzer
from app.services.memory.impact_evaluator import ImpactEvaluator
from app.services.memory.importance_scorer import ImportanceScorer
from app.config import settings as _settings
try:
@@ -312,8 +317,7 @@ def _extract_memory_query_tokens(query: str) -> list[str]:
tokens.append(stripped_chunk)
if len(stripped_chunk) > 6:
tokens.extend(
stripped_chunk[index:index + 4]
for index in range(len(stripped_chunk) - 3)
stripped_chunk[index : index + 4] for index in range(len(stripped_chunk) - 3)
)
return list(dict.fromkeys(tokens))
@@ -344,16 +348,21 @@ async def recall_user_memories(
query_tokens = _extract_memory_query_tokens(query)
statement = select(UserMemory).where(UserMemory.user_id == user_id)
result = await db.execute(statement.order_by(UserMemory.importance.desc(), UserMemory.created_at.desc()))
result = await db.execute(
statement.order_by(UserMemory.importance_score.desc(), UserMemory.created_at.desc())
)
fallback_memories = list(result.scalars().all())
if _contains_hint(_normalize_query(query), MEMORY_QUERY_HINTS) or _matches_memory_query_pattern(_normalize_query(query)):
if _contains_hint(_normalize_query(query), MEMORY_QUERY_HINTS) or _matches_memory_query_pattern(
_normalize_query(query)
):
return fallback_memories[:top_k]
if query_tokens:
matched_memories = [
memory for memory in fallback_memories
if any(token in (memory.content or '').lower() for token in query_tokens)
memory
for memory in fallback_memories
if any(token in (memory.content or "").lower() for token in query_tokens)
]
return matched_memories[:top_k]
@@ -361,13 +370,25 @@ async def recall_user_memories(
async def _mark_memories_recalled(db: AsyncSession, memories: list[UserMemory]) -> None:
"""Mark memories as recalled and update importance score"""
from app.services.memory.frequency_tracker import FrequencyTracker
from app.services.memory.importance_scorer import ImportanceScorer
recalled_at = datetime.now(UTC)
tracker = FrequencyTracker()
scorer = ImportanceScorer()
updated = False
for memory in memories:
memory.is_recalled = True
memory.recall_count = (memory.recall_count or 0) + 1
memory.last_recalled_at = recalled_at
memory.frequency_count = memory.recall_count # Keep in sync
# Update importance score on recall
scorer.update_memory_importance(memory)
updated = True
if updated:
await db.commit()
@@ -417,9 +438,7 @@ MEMORY_QUERY_HINTS = (
"偏好",
"习惯",
)
MEMORY_QUERY_PATTERNS = (
re.compile(r"\bremember\s+(?:that\s+)?i\b"),
)
MEMORY_QUERY_PATTERNS = (re.compile(r"\bremember\s+(?:that\s+)?i\b"),)
GROUNDING_QUERY_HINTS = (
"根据文档",
"严格根据",