feat(skills): enhance skills system with matching and evaluation

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
2026-04-08 00:11:34 +08:00
parent 4ef7549efe
commit 72a60c698a
10 changed files with 374 additions and 16 deletions

View File

@@ -0,0 +1,38 @@
from __future__ import annotations
from datetime import datetime, timezone
from typing import Any, Literal
from pydantic import BaseModel, Field
SkillStatus = Literal["candidate", "shadow", "active", "deprecated", "retired"]
SkillInjectionMode = Literal["metadata_only", "summary", "full"]
class SkillShortlistEntry(BaseModel):
skill_name: str
source: str = "runtime"
source_id: str | None = None
status: SkillStatus = "active"
scope: list[str] = Field(default_factory=list)
effectiveness: float | None = None
score: float = 0.0
rationale: str | None = None
summary: str | None = None
matched_terms: list[str] = Field(default_factory=list)
injection_mode: SkillInjectionMode = "metadata_only"
metadata: dict[str, Any] = Field(default_factory=dict)
class SkillActivationRecord(BaseModel):
skill_name: str
source: str = "runtime"
source_id: str | None = None
status: SkillStatus = "active"
injection_mode: SkillInjectionMode = "metadata_only"
matched_terms: list[str] = Field(default_factory=list)
rationale: str | None = None
activated_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
outcome: str | None = None
metadata: dict[str, Any] = Field(default_factory=dict)

View File

@@ -1,16 +1 @@
"""Skills 注册表 - Phase 9"""
from app.agents.skills.registry import SkillRegistry, get_skill_registry
from app.agents.skills.metadata import SkillMetadata
from app.agents.skills.loaders.local_loader import LocalSkillLoader
from app.agents.skills.loaders.plugin_loader import PluginSkillLoader
from app.agents.skills.mcp_builder import MCPSkillBuilder
__all__ = [
"SkillRegistry",
"SkillMetadata",
"LocalSkillLoader",
"PluginSkillLoader",
"MCPSkillBuilder",
"get_skill_registry",
]
"""Skill package."""

View File

@@ -0,0 +1,14 @@
from __future__ import annotations
from app.models.skill import Skill
def summarize_skill_effectiveness(skill: Skill) -> dict[str, object]:
return {
"name": skill.name,
"status": skill.status,
"effectiveness": skill.effectiveness,
"activation_count": skill.activation_count,
"candidate_count": getattr(skill, "candidate_count", 0),
"last_activated_at": skill.last_activated_at.isoformat() if skill.last_activated_at else None,
}

View File

@@ -0,0 +1,58 @@
from __future__ import annotations
from datetime import UTC, datetime, timedelta
from app.agents.schemas.learning import SessionRetrospective, SkillCandidate
from app.agents.skills.models import SkillLifecycleDecision
from app.services.skill_service import SkillService
class SkillPromotionEvaluator:
def __init__(self, db):
self.db = db
self.skill_service = SkillService(db)
async def sync_retrospective(
self,
*,
user_id: str,
retrospective: SessionRetrospective,
) -> list[SkillLifecycleDecision]:
decisions: list[SkillLifecycleDecision] = []
for candidate in retrospective.skill_candidates:
decisions.append(
await self.skill_service.upsert_learned_candidate(
user_id=user_id,
candidate=candidate,
primary_agent=retrospective.primary_agent,
evidence_refs=candidate.evidence_refs,
)
)
outcome_score = self._derive_outcome_score(retrospective)
for skill_name in retrospective.used_skill_names:
decision = await self.skill_service.record_activation_feedback(
user_id=user_id,
skill_name=skill_name,
outcome_score=outcome_score,
evidence_refs=retrospective.evidence_refs,
)
if decision is not None:
decisions.append(decision)
return decisions
@staticmethod
def _derive_outcome_score(retrospective: SessionRetrospective) -> float:
if retrospective.verification_status == "passed":
return 0.9
if retrospective.verification_status == "skipped":
return 0.55
if retrospective.verification_status == "failed":
return 0.15
return 0.7 if retrospective.outcome == "completed" else 0.2
def next_review_after(days: int = 7) -> datetime:
return datetime.now(UTC) + timedelta(days=days)

View File

@@ -0,0 +1,32 @@
from __future__ import annotations
import re
def extract_match_terms(text: str | None) -> list[str]:
source = (text or "").lower()
terms = [token for token in re.findall(r"[a-z0-9_]+", source) if len(token) >= 3]
for chunk in re.findall(r"[\u4e00-\u9fff]+", text or ""):
if len(chunk) >= 2:
terms.append(chunk)
if len(chunk) > 4:
for index in range(len(chunk) - 1):
terms.append(chunk[index : index + 2])
return list(dict.fromkeys(terms))
def score_text_match(query_text: str, *corpus_parts: str | None) -> tuple[float, list[str]]:
query_terms = extract_match_terms(query_text)
if not query_terms:
return 0.0, []
corpus = " ".join(part for part in corpus_parts if part).lower()
matched_terms = [term for term in query_terms if term and term in corpus]
if not matched_terms:
return 0.0, []
coverage = len(matched_terms) / max(len(query_terms), 1)
density = min(len(matched_terms), 4) / 4
return round(min(1.0, coverage * 0.7 + density * 0.3), 3), matched_terms

View File

@@ -20,6 +20,10 @@ class SkillMetadata:
source_id: str = "" # 来源 ID
enabled: bool = True # 是否启用
tools: list[str] = field(default_factory=list) # 关联的工具
status: str = "active" # candidate/shadow/active/deprecated/retired
scope: list[str] = field(default_factory=list)
effectiveness: float | None = None
review_after: str | None = None
def to_dict(self) -> dict[str, Any]:
return {
@@ -35,6 +39,10 @@ class SkillMetadata:
"source_id": self.source_id,
"enabled": self.enabled,
"tools": self.tools,
"status": self.status,
"scope": self.scope,
"effectiveness": self.effectiveness,
"review_after": self.review_after,
}
@classmethod

View File

@@ -0,0 +1,29 @@
from __future__ import annotations
from datetime import datetime
from typing import Literal
from pydantic import BaseModel, Field
SkillLifecycleAction = Literal[
"created_candidate",
"promoted_to_shadow",
"promoted_to_active",
"degraded_to_deprecated",
"retired",
"reactivated",
"feedback_recorded",
"no_change",
]
class SkillLifecycleDecision(BaseModel):
skill_name: str
action: SkillLifecycleAction
previous_status: str | None = None
new_status: str
reason: str
evidence_refs: list[dict[str, object]] = Field(default_factory=list)
confidence: float | None = None
review_after: datetime | None = None

View File

@@ -0,0 +1,27 @@
from __future__ import annotations
from app.agents.schemas.skills import SkillInjectionMode, SkillShortlistEntry
MAX_SUMMARY_CHARS = 120
def choose_injection_mode(score: float, summary_available: bool) -> SkillInjectionMode:
if score >= 0.75 and summary_available:
return "summary"
return "metadata_only"
def render_skill_shortlist_context(entries: list[SkillShortlistEntry]) -> str:
if not entries:
return ""
lines = ["[Task-Scoped Skills]"]
for entry in entries[:3]:
detail = entry.summary or "Relevant to the current request."
detail = detail[:MAX_SUMMARY_CHARS]
lines.append(f"- {entry.skill_name} | mode={entry.injection_mode} | score={entry.score:.2f}")
lines.append(f" {detail}")
if entry.matched_terms:
lines.append(f" matched_terms={', '.join(entry.matched_terms[:6])}")
return "\n".join(lines)

View File

@@ -0,0 +1,153 @@
from __future__ import annotations
from collections import OrderedDict
from app.agents.schemas.skills import SkillShortlistEntry
from app.agents.skills.matcher import score_text_match
from app.agents.skills.policy import choose_injection_mode, render_skill_shortlist_context
from app.agents.skills.registry import get_skill_registry
from app.services.skill_service import SkillService
class RuntimeSkillRetriever:
def __init__(self, db):
self.db = db
async def shortlist(
self,
*,
user_id: str,
query_text: str,
memory_context: str | None = None,
retrospectives: list[dict] | None = None,
include_learned: bool = True,
limit: int = 3,
) -> list[SkillShortlistEntry]:
deduped: "OrderedDict[str, SkillShortlistEntry]" = OrderedDict()
retrospective_text = "\n".join(
(item.get("summary") or item.get("summary_text") or "")
for item in (retrospectives or [])
if isinstance(item, dict)
)
service = SkillService(self.db)
for skill in await service.list_runtime_candidates(user_id, include_learned=include_learned):
score, matched_terms = score_text_match(
query_text,
skill.name,
skill.description,
skill.instructions,
retrospective_text,
memory_context,
)
if score <= 0:
continue
entry = SkillShortlistEntry(
skill_name=skill.name,
source="database",
source_id=skill.id,
scope=[skill.agent_type, skill.visibility],
status=skill.status,
effectiveness=skill.effectiveness,
score=score,
matched_terms=matched_terms,
rationale=(
"Shadow skill matched current request; keep metadata-only injection."
if skill.status == "shadow"
else "Matched against DB skill metadata and instructions."
),
summary=skill.description or (skill.instructions[:160] if skill.instructions else None),
injection_mode=(
"metadata_only"
if skill.status == "shadow"
else choose_injection_mode(score, bool(skill.description or skill.instructions))
),
)
self._upsert(deduped, entry)
registry = get_skill_registry()
if not registry.list_all():
try:
registry.load_all()
except Exception:
pass
for skill in registry.list_all():
score, matched_terms = score_text_match(
query_text,
skill.name,
skill.description,
" ".join(skill.tags),
" ".join(skill.triggers),
skill.content[:400],
retrospective_text,
memory_context,
)
if score <= 0:
continue
entry = SkillShortlistEntry(
skill_name=skill.name,
source=skill.source,
source_id=skill.source_id or skill.id,
scope=skill.scope or list(skill.tags),
status=skill.status,
effectiveness=skill.effectiveness,
score=score,
matched_terms=matched_terms,
rationale="Matched against local or external skill metadata.",
summary=skill.description or skill.content[:160],
injection_mode=choose_injection_mode(
score,
bool(skill.description or skill.content),
),
)
self._upsert(deduped, entry)
return sorted(deduped.values(), key=lambda item: item.score, reverse=True)[:limit]
@staticmethod
def _upsert(
deduped: "OrderedDict[str, SkillShortlistEntry]",
entry: SkillShortlistEntry,
) -> None:
existing = deduped.get(entry.skill_name)
if existing is None or existing.score < entry.score:
deduped[entry.skill_name] = entry
def build_shortlisted_skill_context(
shortlist: list[dict] | list[SkillShortlistEntry] | None,
*,
agent_type: str | None = None,
) -> str:
if not shortlist:
return ""
entries: list[SkillShortlistEntry] = []
for item in shortlist:
entry = item if isinstance(item, SkillShortlistEntry) else SkillShortlistEntry.model_validate(item)
if agent_type and entry.scope and agent_type not in entry.scope:
continue
entries.append(entry)
return render_skill_shortlist_context(entries)
async def shortlist_skills_for_request(
db,
*,
user_id: str,
user_query: str,
memory_context: str | None = None,
retrospectives: list[dict] | None = None,
include_learned: bool = True,
limit: int = 3,
) -> list[SkillShortlistEntry]:
return await RuntimeSkillRetriever(db).shortlist(
user_id=user_id,
query_text=user_query,
memory_context=memory_context,
retrospectives=retrospectives,
include_learned=include_learned,
limit=limit,
)

View File

@@ -14,6 +14,10 @@ class SkillCreate(BaseModel):
visibility: str = "private"
team_id: Optional[str] = None
is_active: bool = True
status: str = "active"
scope: list[str] = []
effectiveness: Optional[float] = None
review_after: Optional[datetime] = None
class SkillUpdate(BaseModel):
@@ -28,6 +32,10 @@ class SkillUpdate(BaseModel):
visibility: Optional[str] = None
team_id: Optional[str] = None
is_active: Optional[bool] = None
status: Optional[str] = None
scope: Optional[list[str]] = None
effectiveness: Optional[float] = None
review_after: Optional[datetime] = None
class SkillOut(BaseModel):
@@ -43,6 +51,12 @@ class SkillOut(BaseModel):
is_builtin: bool
team_id: Optional[str]
is_active: bool
status: str
scope: list[str]
effectiveness: Optional[float]
review_after: Optional[datetime]
activation_count: int
last_activated_at: Optional[datetime]
owner_id: str
created_at: datetime
updated_at: datetime