feat(skills): enhance skills system with matching and evaluation
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
38
backend/app/agents/schemas/skills.py
Normal file
38
backend/app/agents/schemas/skills.py
Normal file
@@ -0,0 +1,38 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Literal
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
SkillStatus = Literal["candidate", "shadow", "active", "deprecated", "retired"]
|
||||
SkillInjectionMode = Literal["metadata_only", "summary", "full"]
|
||||
|
||||
|
||||
class SkillShortlistEntry(BaseModel):
|
||||
skill_name: str
|
||||
source: str = "runtime"
|
||||
source_id: str | None = None
|
||||
status: SkillStatus = "active"
|
||||
scope: list[str] = Field(default_factory=list)
|
||||
effectiveness: float | None = None
|
||||
score: float = 0.0
|
||||
rationale: str | None = None
|
||||
summary: str | None = None
|
||||
matched_terms: list[str] = Field(default_factory=list)
|
||||
injection_mode: SkillInjectionMode = "metadata_only"
|
||||
metadata: dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class SkillActivationRecord(BaseModel):
|
||||
skill_name: str
|
||||
source: str = "runtime"
|
||||
source_id: str | None = None
|
||||
status: SkillStatus = "active"
|
||||
injection_mode: SkillInjectionMode = "metadata_only"
|
||||
matched_terms: list[str] = Field(default_factory=list)
|
||||
rationale: str | None = None
|
||||
activated_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
||||
outcome: str | None = None
|
||||
metadata: dict[str, Any] = Field(default_factory=dict)
|
||||
@@ -1,16 +1 @@
|
||||
"""Skills 注册表 - Phase 9"""
|
||||
|
||||
from app.agents.skills.registry import SkillRegistry, get_skill_registry
|
||||
from app.agents.skills.metadata import SkillMetadata
|
||||
from app.agents.skills.loaders.local_loader import LocalSkillLoader
|
||||
from app.agents.skills.loaders.plugin_loader import PluginSkillLoader
|
||||
from app.agents.skills.mcp_builder import MCPSkillBuilder
|
||||
|
||||
__all__ = [
|
||||
"SkillRegistry",
|
||||
"SkillMetadata",
|
||||
"LocalSkillLoader",
|
||||
"PluginSkillLoader",
|
||||
"MCPSkillBuilder",
|
||||
"get_skill_registry",
|
||||
]
|
||||
"""Skill package."""
|
||||
|
||||
14
backend/app/agents/skills/effectiveness.py
Normal file
14
backend/app/agents/skills/effectiveness.py
Normal file
@@ -0,0 +1,14 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from app.models.skill import Skill
|
||||
|
||||
|
||||
def summarize_skill_effectiveness(skill: Skill) -> dict[str, object]:
|
||||
return {
|
||||
"name": skill.name,
|
||||
"status": skill.status,
|
||||
"effectiveness": skill.effectiveness,
|
||||
"activation_count": skill.activation_count,
|
||||
"candidate_count": getattr(skill, "candidate_count", 0),
|
||||
"last_activated_at": skill.last_activated_at.isoformat() if skill.last_activated_at else None,
|
||||
}
|
||||
58
backend/app/agents/skills/evaluator.py
Normal file
58
backend/app/agents/skills/evaluator.py
Normal file
@@ -0,0 +1,58 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import UTC, datetime, timedelta
|
||||
|
||||
from app.agents.schemas.learning import SessionRetrospective, SkillCandidate
|
||||
from app.agents.skills.models import SkillLifecycleDecision
|
||||
from app.services.skill_service import SkillService
|
||||
|
||||
|
||||
class SkillPromotionEvaluator:
|
||||
def __init__(self, db):
|
||||
self.db = db
|
||||
self.skill_service = SkillService(db)
|
||||
|
||||
async def sync_retrospective(
|
||||
self,
|
||||
*,
|
||||
user_id: str,
|
||||
retrospective: SessionRetrospective,
|
||||
) -> list[SkillLifecycleDecision]:
|
||||
decisions: list[SkillLifecycleDecision] = []
|
||||
|
||||
for candidate in retrospective.skill_candidates:
|
||||
decisions.append(
|
||||
await self.skill_service.upsert_learned_candidate(
|
||||
user_id=user_id,
|
||||
candidate=candidate,
|
||||
primary_agent=retrospective.primary_agent,
|
||||
evidence_refs=candidate.evidence_refs,
|
||||
)
|
||||
)
|
||||
|
||||
outcome_score = self._derive_outcome_score(retrospective)
|
||||
for skill_name in retrospective.used_skill_names:
|
||||
decision = await self.skill_service.record_activation_feedback(
|
||||
user_id=user_id,
|
||||
skill_name=skill_name,
|
||||
outcome_score=outcome_score,
|
||||
evidence_refs=retrospective.evidence_refs,
|
||||
)
|
||||
if decision is not None:
|
||||
decisions.append(decision)
|
||||
|
||||
return decisions
|
||||
|
||||
@staticmethod
|
||||
def _derive_outcome_score(retrospective: SessionRetrospective) -> float:
|
||||
if retrospective.verification_status == "passed":
|
||||
return 0.9
|
||||
if retrospective.verification_status == "skipped":
|
||||
return 0.55
|
||||
if retrospective.verification_status == "failed":
|
||||
return 0.15
|
||||
return 0.7 if retrospective.outcome == "completed" else 0.2
|
||||
|
||||
|
||||
def next_review_after(days: int = 7) -> datetime:
|
||||
return datetime.now(UTC) + timedelta(days=days)
|
||||
32
backend/app/agents/skills/matcher.py
Normal file
32
backend/app/agents/skills/matcher.py
Normal file
@@ -0,0 +1,32 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
|
||||
def extract_match_terms(text: str | None) -> list[str]:
|
||||
source = (text or "").lower()
|
||||
terms = [token for token in re.findall(r"[a-z0-9_]+", source) if len(token) >= 3]
|
||||
|
||||
for chunk in re.findall(r"[\u4e00-\u9fff]+", text or ""):
|
||||
if len(chunk) >= 2:
|
||||
terms.append(chunk)
|
||||
if len(chunk) > 4:
|
||||
for index in range(len(chunk) - 1):
|
||||
terms.append(chunk[index : index + 2])
|
||||
|
||||
return list(dict.fromkeys(terms))
|
||||
|
||||
|
||||
def score_text_match(query_text: str, *corpus_parts: str | None) -> tuple[float, list[str]]:
|
||||
query_terms = extract_match_terms(query_text)
|
||||
if not query_terms:
|
||||
return 0.0, []
|
||||
|
||||
corpus = " ".join(part for part in corpus_parts if part).lower()
|
||||
matched_terms = [term for term in query_terms if term and term in corpus]
|
||||
if not matched_terms:
|
||||
return 0.0, []
|
||||
|
||||
coverage = len(matched_terms) / max(len(query_terms), 1)
|
||||
density = min(len(matched_terms), 4) / 4
|
||||
return round(min(1.0, coverage * 0.7 + density * 0.3), 3), matched_terms
|
||||
@@ -20,6 +20,10 @@ class SkillMetadata:
|
||||
source_id: str = "" # 来源 ID
|
||||
enabled: bool = True # 是否启用
|
||||
tools: list[str] = field(default_factory=list) # 关联的工具
|
||||
status: str = "active" # candidate/shadow/active/deprecated/retired
|
||||
scope: list[str] = field(default_factory=list)
|
||||
effectiveness: float | None = None
|
||||
review_after: str | None = None
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
@@ -35,6 +39,10 @@ class SkillMetadata:
|
||||
"source_id": self.source_id,
|
||||
"enabled": self.enabled,
|
||||
"tools": self.tools,
|
||||
"status": self.status,
|
||||
"scope": self.scope,
|
||||
"effectiveness": self.effectiveness,
|
||||
"review_after": self.review_after,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
|
||||
29
backend/app/agents/skills/models.py
Normal file
29
backend/app/agents/skills/models.py
Normal file
@@ -0,0 +1,29 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Literal
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
SkillLifecycleAction = Literal[
|
||||
"created_candidate",
|
||||
"promoted_to_shadow",
|
||||
"promoted_to_active",
|
||||
"degraded_to_deprecated",
|
||||
"retired",
|
||||
"reactivated",
|
||||
"feedback_recorded",
|
||||
"no_change",
|
||||
]
|
||||
|
||||
|
||||
class SkillLifecycleDecision(BaseModel):
|
||||
skill_name: str
|
||||
action: SkillLifecycleAction
|
||||
previous_status: str | None = None
|
||||
new_status: str
|
||||
reason: str
|
||||
evidence_refs: list[dict[str, object]] = Field(default_factory=list)
|
||||
confidence: float | None = None
|
||||
review_after: datetime | None = None
|
||||
27
backend/app/agents/skills/policy.py
Normal file
27
backend/app/agents/skills/policy.py
Normal file
@@ -0,0 +1,27 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from app.agents.schemas.skills import SkillInjectionMode, SkillShortlistEntry
|
||||
|
||||
MAX_SUMMARY_CHARS = 120
|
||||
|
||||
|
||||
def choose_injection_mode(score: float, summary_available: bool) -> SkillInjectionMode:
|
||||
if score >= 0.75 and summary_available:
|
||||
return "summary"
|
||||
return "metadata_only"
|
||||
|
||||
|
||||
def render_skill_shortlist_context(entries: list[SkillShortlistEntry]) -> str:
|
||||
if not entries:
|
||||
return ""
|
||||
|
||||
lines = ["[Task-Scoped Skills]"]
|
||||
for entry in entries[:3]:
|
||||
detail = entry.summary or "Relevant to the current request."
|
||||
detail = detail[:MAX_SUMMARY_CHARS]
|
||||
lines.append(f"- {entry.skill_name} | mode={entry.injection_mode} | score={entry.score:.2f}")
|
||||
lines.append(f" {detail}")
|
||||
if entry.matched_terms:
|
||||
lines.append(f" matched_terms={', '.join(entry.matched_terms[:6])}")
|
||||
|
||||
return "\n".join(lines)
|
||||
153
backend/app/agents/skills/retriever.py
Normal file
153
backend/app/agents/skills/retriever.py
Normal file
@@ -0,0 +1,153 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
from app.agents.schemas.skills import SkillShortlistEntry
|
||||
from app.agents.skills.matcher import score_text_match
|
||||
from app.agents.skills.policy import choose_injection_mode, render_skill_shortlist_context
|
||||
from app.agents.skills.registry import get_skill_registry
|
||||
from app.services.skill_service import SkillService
|
||||
|
||||
|
||||
class RuntimeSkillRetriever:
|
||||
def __init__(self, db):
|
||||
self.db = db
|
||||
|
||||
async def shortlist(
|
||||
self,
|
||||
*,
|
||||
user_id: str,
|
||||
query_text: str,
|
||||
memory_context: str | None = None,
|
||||
retrospectives: list[dict] | None = None,
|
||||
include_learned: bool = True,
|
||||
limit: int = 3,
|
||||
) -> list[SkillShortlistEntry]:
|
||||
deduped: "OrderedDict[str, SkillShortlistEntry]" = OrderedDict()
|
||||
retrospective_text = "\n".join(
|
||||
(item.get("summary") or item.get("summary_text") or "")
|
||||
for item in (retrospectives or [])
|
||||
if isinstance(item, dict)
|
||||
)
|
||||
|
||||
service = SkillService(self.db)
|
||||
for skill in await service.list_runtime_candidates(user_id, include_learned=include_learned):
|
||||
score, matched_terms = score_text_match(
|
||||
query_text,
|
||||
skill.name,
|
||||
skill.description,
|
||||
skill.instructions,
|
||||
retrospective_text,
|
||||
memory_context,
|
||||
)
|
||||
if score <= 0:
|
||||
continue
|
||||
entry = SkillShortlistEntry(
|
||||
skill_name=skill.name,
|
||||
source="database",
|
||||
source_id=skill.id,
|
||||
scope=[skill.agent_type, skill.visibility],
|
||||
status=skill.status,
|
||||
effectiveness=skill.effectiveness,
|
||||
score=score,
|
||||
matched_terms=matched_terms,
|
||||
rationale=(
|
||||
"Shadow skill matched current request; keep metadata-only injection."
|
||||
if skill.status == "shadow"
|
||||
else "Matched against DB skill metadata and instructions."
|
||||
),
|
||||
summary=skill.description or (skill.instructions[:160] if skill.instructions else None),
|
||||
injection_mode=(
|
||||
"metadata_only"
|
||||
if skill.status == "shadow"
|
||||
else choose_injection_mode(score, bool(skill.description or skill.instructions))
|
||||
),
|
||||
)
|
||||
self._upsert(deduped, entry)
|
||||
|
||||
registry = get_skill_registry()
|
||||
if not registry.list_all():
|
||||
try:
|
||||
registry.load_all()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
for skill in registry.list_all():
|
||||
score, matched_terms = score_text_match(
|
||||
query_text,
|
||||
skill.name,
|
||||
skill.description,
|
||||
" ".join(skill.tags),
|
||||
" ".join(skill.triggers),
|
||||
skill.content[:400],
|
||||
retrospective_text,
|
||||
memory_context,
|
||||
)
|
||||
if score <= 0:
|
||||
continue
|
||||
entry = SkillShortlistEntry(
|
||||
skill_name=skill.name,
|
||||
source=skill.source,
|
||||
source_id=skill.source_id or skill.id,
|
||||
scope=skill.scope or list(skill.tags),
|
||||
status=skill.status,
|
||||
effectiveness=skill.effectiveness,
|
||||
score=score,
|
||||
matched_terms=matched_terms,
|
||||
rationale="Matched against local or external skill metadata.",
|
||||
summary=skill.description or skill.content[:160],
|
||||
injection_mode=choose_injection_mode(
|
||||
score,
|
||||
bool(skill.description or skill.content),
|
||||
),
|
||||
)
|
||||
self._upsert(deduped, entry)
|
||||
|
||||
return sorted(deduped.values(), key=lambda item: item.score, reverse=True)[:limit]
|
||||
|
||||
@staticmethod
|
||||
def _upsert(
|
||||
deduped: "OrderedDict[str, SkillShortlistEntry]",
|
||||
entry: SkillShortlistEntry,
|
||||
) -> None:
|
||||
existing = deduped.get(entry.skill_name)
|
||||
if existing is None or existing.score < entry.score:
|
||||
deduped[entry.skill_name] = entry
|
||||
|
||||
|
||||
def build_shortlisted_skill_context(
|
||||
shortlist: list[dict] | list[SkillShortlistEntry] | None,
|
||||
*,
|
||||
agent_type: str | None = None,
|
||||
) -> str:
|
||||
if not shortlist:
|
||||
return ""
|
||||
|
||||
entries: list[SkillShortlistEntry] = []
|
||||
for item in shortlist:
|
||||
entry = item if isinstance(item, SkillShortlistEntry) else SkillShortlistEntry.model_validate(item)
|
||||
if agent_type and entry.scope and agent_type not in entry.scope:
|
||||
continue
|
||||
entries.append(entry)
|
||||
|
||||
return render_skill_shortlist_context(entries)
|
||||
|
||||
|
||||
async def shortlist_skills_for_request(
|
||||
db,
|
||||
*,
|
||||
user_id: str,
|
||||
user_query: str,
|
||||
memory_context: str | None = None,
|
||||
retrospectives: list[dict] | None = None,
|
||||
include_learned: bool = True,
|
||||
limit: int = 3,
|
||||
) -> list[SkillShortlistEntry]:
|
||||
return await RuntimeSkillRetriever(db).shortlist(
|
||||
user_id=user_id,
|
||||
query_text=user_query,
|
||||
memory_context=memory_context,
|
||||
retrospectives=retrospectives,
|
||||
include_learned=include_learned,
|
||||
limit=limit,
|
||||
)
|
||||
@@ -14,6 +14,10 @@ class SkillCreate(BaseModel):
|
||||
visibility: str = "private"
|
||||
team_id: Optional[str] = None
|
||||
is_active: bool = True
|
||||
status: str = "active"
|
||||
scope: list[str] = []
|
||||
effectiveness: Optional[float] = None
|
||||
review_after: Optional[datetime] = None
|
||||
|
||||
|
||||
class SkillUpdate(BaseModel):
|
||||
@@ -28,6 +32,10 @@ class SkillUpdate(BaseModel):
|
||||
visibility: Optional[str] = None
|
||||
team_id: Optional[str] = None
|
||||
is_active: Optional[bool] = None
|
||||
status: Optional[str] = None
|
||||
scope: Optional[list[str]] = None
|
||||
effectiveness: Optional[float] = None
|
||||
review_after: Optional[datetime] = None
|
||||
|
||||
|
||||
class SkillOut(BaseModel):
|
||||
@@ -43,6 +51,12 @@ class SkillOut(BaseModel):
|
||||
is_builtin: bool
|
||||
team_id: Optional[str]
|
||||
is_active: bool
|
||||
status: str
|
||||
scope: list[str]
|
||||
effectiveness: Optional[float]
|
||||
review_after: Optional[datetime]
|
||||
activation_count: int
|
||||
last_activated_at: Optional[datetime]
|
||||
owner_id: str
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
Reference in New Issue
Block a user