backend/app/agents/skills/matcher.py

from __future__ import annotations

import re


def extract_match_terms(text: str | None) -> list[str]:
    source = (text or "").lower()
    terms = [token for token in re.findall(r"[a-z0-9_]+", source) if len(token) >= 3]

    for chunk in re.findall(r"[\u4e00-\u9fff]+", text or ""):
        if len(chunk) >= 2:
            terms.append(chunk)
            if len(chunk) > 4:
                for index in range(len(chunk) - 1):
                    terms.append(chunk[index : index + 2])

    return list(dict.fromkeys(terms))


def score_text_match(query_text: str, *corpus_parts: str | None) -> tuple[float, list[str]]:
    query_terms = extract_match_terms(query_text)
    if not query_terms:
        return 0.0, []

    corpus = " ".join(part for part in corpus_parts if part).lower()
    matched_terms = [term for term in query_terms if term and term in corpus]
    if not matched_terms:
        return 0.0, []

    coverage = len(matched_terms) / max(len(query_terms), 1)
    density = min(len(matched_terms), 4) / 4
    return round(min(1.0, coverage * 0.7 + density * 0.3), 3), matched_terms
feat(skills): enhance skills system with matching and evaluation Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai> 2026-04-08 00:11:34 +08:00			`from __future__ import annotations`

			`import re`


			`def extract_match_terms(text: str \| None) -> list[str]:`
			`source = (text or "").lower()`
			`terms = [token for token in re.findall(r"[a-z0-9_]+", source) if len(token) >= 3]`

			`for chunk in re.findall(r"[\u4e00-\u9fff]+", text or ""):`
			`if len(chunk) >= 2:`
			`terms.append(chunk)`
			`if len(chunk) > 4:`
			`for index in range(len(chunk) - 1):`
			`terms.append(chunk[index : index + 2])`

			`return list(dict.fromkeys(terms))`


			`def score_text_match(query_text: str, *corpus_parts: str \| None) -> tuple[float, list[str]]:`
			`query_terms = extract_match_terms(query_text)`
			`if not query_terms:`
			`return 0.0, []`

			`corpus = " ".join(part for part in corpus_parts if part).lower()`
			`matched_terms = [term for term in query_terms if term and term in corpus]`
			`if not matched_terms:`
			`return 0.0, []`

			`coverage = len(matched_terms) / max(len(query_terms), 1)`
			`density = min(len(matched_terms), 4) / 4`
			`return round(min(1.0, coverage * 0.7 + density * 0.3), 3), matched_terms`