feat(flywheel): golden case 管理 API 与评测单测

- 新增 GoldenCaseCreate/Read、GoldenEvalRequest/Read schema
- 新增 3 个端点:创建 golden case、按规则列表、手动触发 golden 评测
  (不入门禁,供运营试跑)
- 单测 15 passed:单条 hit/severity 比对、集合 accuracy/precision/recall
  聚合、空集降级、100% 通过/失败拦截、feature flag、异常降级
- 回归 test_agent_asset_service 27 passed(1 个预存失败与本改动无关)
This commit is contained in:
caoxiaozhu
2026-07-03 14:38:43 +08:00
parent 67c3f30eb2
commit c7ba7bb453
3 changed files with 413 additions and 0 deletions

View File

@@ -43,6 +43,10 @@ from app.schemas.agent_asset import (
AgentAssetVersionCreate,
AgentAssetVersionRead,
AgentAssetVersionTimelineItemRead,
GoldenCaseCreate,
GoldenCaseRead,
GoldenEvalRead,
GoldenEvalRequest,
)
from app.schemas.common import ErrorResponse, PaginatedResponse
from app.services.agent_assets import AgentAssetService
@@ -923,3 +927,110 @@ def get_agent_asset_version_timeline(
return AgentAssetService(db).list_version_timeline(asset_id)
except Exception as exc:
_handle_asset_error(exc)
@router.post(
"/risk-rules/golden-cases",
response_model=GoldenCaseRead,
status_code=status.HTTP_201_CREATED,
summary="创建 golden set 黄金用例",
description="为指定规则(或通用场景)创建一条回归用例,发布前作为门禁集执行。",
)
def create_golden_case(
body: GoldenCaseCreate,
_: RuleEditorUser,
db: DbSession,
) -> GoldenCaseRead:
from app.models.golden_case import GoldenCase
from sqlalchemy import select
existing = db.scalar(select(GoldenCase).where(GoldenCase.case_key == body.case_key))
if existing is not None:
raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail="case_key 已存在")
case = GoldenCase(
case_key=body.case_key,
rule_code=body.rule_code,
scene=body.scene,
name=body.name,
values_json=body.values,
expected_hit=body.expected_hit,
expected_severity=body.expected_severity,
note=body.note,
status="active",
source="manual",
)
db.add(case)
db.commit()
db.refresh(case)
return _golden_case_read(case)
@router.get(
"/risk-rules/{rule_code}/golden-cases",
response_model=list[GoldenCaseRead],
summary="列出规则的 golden 用例",
)
def list_golden_cases(
rule_code: str,
_: CurrentUser,
db: DbSession,
) -> list[GoldenCaseRead]:
from app.models.golden_case import GoldenCase
from sqlalchemy import select
cases = db.scalars(
select(GoldenCase).where(GoldenCase.rule_code == rule_code).order_by(GoldenCase.created_at)
).all()
return [_golden_case_read(case) for case in cases]
@router.post(
"/{asset_id}/golden-eval",
response_model=GoldenEvalRead,
summary="手动触发 golden set 评测(不入门禁)",
description="在当前规则版本上跑 golden 用例集,返回指标。门禁由 publish 时自动触发。",
)
def run_golden_eval(
asset_id: str,
body: GoldenEvalRequest,
_: RuleReviewerUser,
db: DbSession,
) -> GoldenEvalRead:
from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY
from app.services.risk_rule_golden_evaluator import RiskRuleGoldenEvaluator
try:
asset = AgentAssetService(db).get_asset(asset_id)
if asset is None:
raise LookupError("Asset not found")
config = asset.config_json if isinstance(asset.config_json, dict) else {}
rule_document = config.get("rule_document") if isinstance(config.get("rule_document"), dict) else {}
file_name = str(rule_document.get("file_name") or "").strip()
if not file_name:
raise ValueError("该规则没有可执行的 manifest 文件。")
manager = AgentAssetService(db).rule_library_manager
manifest = manager.read_rule_library_json(library=RISK_RULES_LIBRARY, file_name=file_name)
rule_code = str(manifest.get("rule_code") or "").strip()
if not rule_code:
raise ValueError("manifest 缺少 rule_code。")
version = body.version or asset.working_version or ""
report = RiskRuleGoldenEvaluator().evaluate_for_rule(db, manifest, rule_code)
return GoldenEvalRead(**report.to_dict())
except Exception as exc:
_handle_asset_error(exc)
def _golden_case_read(case) -> GoldenCaseRead:
return GoldenCaseRead(
id=case.id,
case_key=case.case_key,
rule_code=case.rule_code,
scene=case.scene or "",
name=case.name or "",
values=case.values_json or {},
expected_hit=bool(case.expected_hit),
expected_severity=case.expected_severity,
note=case.note,
status=case.status,
source=case.source,
)