2026-01-21 16:59:01 +08:00
|
|
|
|
<!DOCTYPE html>
|
|
|
|
|
|
<html lang="zh-CN">
|
|
|
|
|
|
<head>
|
|
|
|
|
|
<meta charset="UTF-8">
|
|
|
|
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
|
|
|
|
<title>添加评测维度 / 远光软件微调平台</title>
|
|
|
|
|
|
<script src="../lib/tailwindcss/tailwind.js"></script>
|
|
|
|
|
|
<link href="../lib/font-awesome/css/font-awesome.min.css" rel="stylesheet">
|
|
|
|
|
|
<style>
|
|
|
|
|
|
/* 侧边栏滑块动画 */
|
|
|
|
|
|
.sidebar-slider {
|
|
|
|
|
|
position: absolute;
|
|
|
|
|
|
width: 4px;
|
|
|
|
|
|
height: 0;
|
|
|
|
|
|
background-color: #1890ff;
|
|
|
|
|
|
border-radius: 0 2px 2px 0;
|
|
|
|
|
|
transition: top 0.3s cubic-bezier(0.4, 0, 0.2, 1),
|
|
|
|
|
|
height 0.3s cubic-bezier(0.4, 0, 0.2, 1);
|
|
|
|
|
|
pointer-events: none;
|
|
|
|
|
|
z-index: 10;
|
|
|
|
|
|
}
|
|
|
|
|
|
.sidebar-section-title {
|
|
|
|
|
|
padding: 0.5rem 1rem;
|
|
|
|
|
|
font-size: 0.75rem;
|
|
|
|
|
|
color: rgba(191, 203, 217, 0.7);
|
|
|
|
|
|
font-weight: 500;
|
|
|
|
|
|
text-transform: uppercase;
|
|
|
|
|
|
letter-spacing: 0.05em;
|
|
|
|
|
|
}
|
|
|
|
|
|
.nav-link:hover {
|
|
|
|
|
|
background-color: rgba(0, 21, 41, 0.2);
|
|
|
|
|
|
}
|
|
|
|
|
|
.nav-item-wrapper {
|
|
|
|
|
|
position: relative;
|
|
|
|
|
|
}
|
|
|
|
|
|
.nav-link {
|
|
|
|
|
|
position: relative;
|
|
|
|
|
|
z-index: 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
.form-input {
|
|
|
|
|
|
width: 100%;
|
|
|
|
|
|
padding: 0.5rem 0.75rem;
|
|
|
|
|
|
border: 1px solid #d1d5db;
|
|
|
|
|
|
border-radius: 0.5rem;
|
|
|
|
|
|
font-size: 0.875rem;
|
|
|
|
|
|
transition: border-color 0.2s, outline 0.2s;
|
|
|
|
|
|
}
|
|
|
|
|
|
.form-input:focus {
|
|
|
|
|
|
border-color: #1890ff;
|
|
|
|
|
|
outline: none;
|
|
|
|
|
|
box-shadow: 0 0 0 2px rgba(24, 144, 255, 0.2);
|
|
|
|
|
|
}
|
|
|
|
|
|
.form-label {
|
|
|
|
|
|
display: block;
|
|
|
|
|
|
font-size: 0.875rem;
|
|
|
|
|
|
font-weight: 500;
|
|
|
|
|
|
color: #374151;
|
|
|
|
|
|
margin-bottom: 0.25rem;
|
|
|
|
|
|
}
|
|
|
|
|
|
.form-select {
|
|
|
|
|
|
width: 100%;
|
|
|
|
|
|
padding: 0.5rem 0.75rem;
|
|
|
|
|
|
border: 1px solid #d1d5db;
|
|
|
|
|
|
border-radius: 0.5rem;
|
|
|
|
|
|
font-size: 0.875rem;
|
|
|
|
|
|
transition: border-color 0.2s, outline 0.2s;
|
|
|
|
|
|
appearance: none;
|
|
|
|
|
|
background-color: white;
|
|
|
|
|
|
background-image: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' fill='none' viewBox='0 0 20 20'%3e%3cpath stroke='%236b7280' stroke-linecap='round' stroke-linejoin='round' stroke-width='1.5' d='M6 8l4 4 4-4'/%3e%3c/svg%3e");
|
|
|
|
|
|
background-position: right 0.5rem center;
|
|
|
|
|
|
background-repeat: no-repeat;
|
|
|
|
|
|
background-size: 1.5em 1.5em;
|
|
|
|
|
|
padding-right: 2.5rem;
|
|
|
|
|
|
}
|
|
|
|
|
|
.form-select:focus {
|
|
|
|
|
|
border-color: #1890ff;
|
|
|
|
|
|
outline: none;
|
|
|
|
|
|
box-shadow: 0 0 0 2px rgba(24, 144, 255, 0.2);
|
|
|
|
|
|
}
|
|
|
|
|
|
textarea.form-input {
|
|
|
|
|
|
resize: vertical;
|
|
|
|
|
|
min-height: 100px;
|
|
|
|
|
|
}
|
2026-01-22 10:41:06 +08:00
|
|
|
|
.radio-option, .checkbox-option {
|
2026-01-21 16:59:01 +08:00
|
|
|
|
display: flex;
|
|
|
|
|
|
align-items: center;
|
|
|
|
|
|
padding: 0.75rem 1rem;
|
|
|
|
|
|
border: 1px solid #d1d5db;
|
|
|
|
|
|
border-radius: 0.5rem;
|
|
|
|
|
|
cursor: pointer;
|
|
|
|
|
|
transition: all 0.2s;
|
|
|
|
|
|
}
|
2026-01-22 10:41:06 +08:00
|
|
|
|
.radio-option:hover, .checkbox-option:hover {
|
2026-01-21 16:59:01 +08:00
|
|
|
|
border-color: #1890ff;
|
|
|
|
|
|
}
|
2026-01-22 10:41:06 +08:00
|
|
|
|
.radio-option.selected, .checkbox-option.selected {
|
2026-01-21 16:59:01 +08:00
|
|
|
|
border-color: #1890ff;
|
|
|
|
|
|
background-color: rgba(24, 144, 255, 0.05);
|
|
|
|
|
|
}
|
2026-01-22 10:41:06 +08:00
|
|
|
|
.checkbox-option input[type="checkbox"] {
|
|
|
|
|
|
display: none;
|
|
|
|
|
|
}
|
2026-01-21 16:59:01 +08:00
|
|
|
|
.radio-option input {
|
|
|
|
|
|
margin-right: 0.75rem;
|
|
|
|
|
|
}
|
|
|
|
|
|
.slider-thumb {
|
|
|
|
|
|
-webkit-appearance: none;
|
|
|
|
|
|
appearance: none;
|
|
|
|
|
|
width: 100%;
|
|
|
|
|
|
height: 6px;
|
|
|
|
|
|
border-radius: 3px;
|
|
|
|
|
|
background: #e5e7eb;
|
|
|
|
|
|
outline: none;
|
|
|
|
|
|
background-image: linear-gradient(#1890ff, #1890ff);
|
|
|
|
|
|
background-repeat: no-repeat;
|
|
|
|
|
|
}
|
|
|
|
|
|
.slider-thumb::-webkit-slider-thumb {
|
|
|
|
|
|
-webkit-appearance: none;
|
|
|
|
|
|
appearance: none;
|
|
|
|
|
|
width: 18px;
|
|
|
|
|
|
height: 18px;
|
|
|
|
|
|
border-radius: 50%;
|
|
|
|
|
|
background: #1890ff;
|
|
|
|
|
|
cursor: pointer;
|
|
|
|
|
|
box-shadow: 0 2px 6px rgba(24, 144, 255, 0.3);
|
|
|
|
|
|
transition: transform 0.2s;
|
|
|
|
|
|
border: 2px solid #fff;
|
|
|
|
|
|
}
|
|
|
|
|
|
.slider-thumb::-webkit-slider-thumb:hover {
|
|
|
|
|
|
transform: scale(1.1);
|
|
|
|
|
|
}
|
2026-01-21 21:52:37 +08:00
|
|
|
|
/* Markdown 编辑器样式 */
|
|
|
|
|
|
.markdown-editor-wrapper {
|
|
|
|
|
|
position: relative;
|
|
|
|
|
|
width: 100%;
|
|
|
|
|
|
}
|
|
|
|
|
|
.markdown-editor {
|
|
|
|
|
|
width: 100%;
|
2026-01-21 21:57:06 +08:00
|
|
|
|
height: 350px;
|
2026-01-21 21:52:37 +08:00
|
|
|
|
font-family: 'Consolas', 'Monaco', 'Menlo', 'Ubuntu Mono', monospace;
|
|
|
|
|
|
font-size: 14px;
|
|
|
|
|
|
line-height: 1.6;
|
|
|
|
|
|
background: #fff;
|
|
|
|
|
|
border: 1px solid #d1d5db;
|
|
|
|
|
|
border-radius: 0.5rem;
|
|
|
|
|
|
padding: 0.75rem;
|
|
|
|
|
|
box-sizing: border-box;
|
|
|
|
|
|
overflow-y: auto;
|
|
|
|
|
|
outline: none;
|
|
|
|
|
|
white-space: pre-wrap;
|
|
|
|
|
|
word-wrap: break-word;
|
|
|
|
|
|
overflow-wrap: break-word;
|
|
|
|
|
|
}
|
|
|
|
|
|
.markdown-editor:focus {
|
|
|
|
|
|
border-color: #1890ff;
|
|
|
|
|
|
box-shadow: 0 0 0 2px rgba(24, 144, 255, 0.2);
|
|
|
|
|
|
}
|
|
|
|
|
|
/* placeholder 效果 */
|
|
|
|
|
|
.markdown-editor:empty::before {
|
|
|
|
|
|
content: attr(data-placeholder);
|
|
|
|
|
|
color: #9ca3af;
|
|
|
|
|
|
pointer-events: none;
|
|
|
|
|
|
}
|
|
|
|
|
|
/* Markdown 语法高亮 */
|
|
|
|
|
|
.md-heading1 { color: #1e40af; font-weight: bold; font-size: 1.25em; }
|
|
|
|
|
|
.md-heading2 { color: #1e40af; font-weight: bold; font-size: 1.1em; }
|
|
|
|
|
|
.md-heading3 { color: #1e40af; font-weight: bold; font-size: 1em; }
|
|
|
|
|
|
.md-heading4 { color: #1e40af; font-weight: bold; font-size: 0.9em; }
|
|
|
|
|
|
.md-heading5 { color: #1e40af; font-weight: bold; font-size: 0.85em; }
|
|
|
|
|
|
.md-heading6 { color: #1e40af; font-weight: bold; font-size: 0.8em; }
|
|
|
|
|
|
.md-bold { color: #dc2626; font-weight: 600; }
|
|
|
|
|
|
.md-italic { font-style: italic; }
|
|
|
|
|
|
.md-code { color: #059669; background: #f3f4f6; padding: 0.1em 0.4em; border-radius: 3px; font-size: 0.9em; }
|
|
|
|
|
|
.md-codeblock { color: #059669; background: #f3f4f6; padding: 0 0.5em; border-radius: 3px; font-size: 0.9em; line-height: 1.6; }
|
|
|
|
|
|
.md-link { color: #2563eb; text-decoration: underline; }
|
|
|
|
|
|
.md-list { color: #059669; }
|
|
|
|
|
|
.md-quote { color: #6b7280; font-style: italic; border-left: 3px solid #d1d5db; padding-left: 0.5em; }
|
2026-01-21 16:59:01 +08:00
|
|
|
|
.bg-primary { background-color: #1890ff; }
|
|
|
|
|
|
.text-primary { color: #1890ff; }
|
|
|
|
|
|
.border-primary { border-color: #1890ff; }
|
|
|
|
|
|
.text-danger { color: #f5222d; }
|
|
|
|
|
|
:root { --primary: #1890ff; --danger: #f5222d; --success: #52c41a; }
|
|
|
|
|
|
</style>
|
|
|
|
|
|
</head>
|
|
|
|
|
|
<body class="antialiased bg-gray-50 flex h-screen overflow-hidden">
|
|
|
|
|
|
<!-- 侧边导航 -->
|
|
|
|
|
|
<aside class="w-64 text-[#bfcbd9] flex-shrink-0 hidden md:block flex flex-col h-full" style="background-color: #001529;">
|
|
|
|
|
|
<!-- 平台LOGO区域 -->
|
|
|
|
|
|
<div class="pt-5 pb-3 border-b border-[#001529]/30 flex items-center justify-center pl-2">
|
|
|
|
|
|
<img src="../assets/logo/logo.png" alt="Logo" class="w-8 h-8 object-contain mr-2">
|
|
|
|
|
|
<span class="text-white font-medium text-base">远光软件微调平台</span>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
|
|
|
|
|
|
<!-- 导航主区域 -->
|
|
|
|
|
|
<nav class="flex-1 overflow-y-auto py-2 relative">
|
|
|
|
|
|
<!-- 滑块指示器 -->
|
|
|
|
|
|
<div class="sidebar-slider" id="sidebar-slider"></div>
|
|
|
|
|
|
|
|
|
|
|
|
<!-- 第一分区:模型服务 -->
|
|
|
|
|
|
<div class="sidebar-section-title">模型服务</div>
|
|
|
|
|
|
<div class="nav-item-wrapper">
|
|
|
|
|
|
<a href="main.html" data-page="fine-tune" class="nav-link flex items-center px-4 py-2.5 hover:bg-[#001529]/20 transition-colors">
|
|
|
|
|
|
<i class="fa fa-cogs w-5 text-center"></i>
|
|
|
|
|
|
<span class="ml-2">模型调优</span>
|
|
|
|
|
|
</a>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
<div class="nav-item-wrapper">
|
|
|
|
|
|
<a href="main.html?page=my-models" data-page="my-models" class="nav-link flex items-center px-4 py-2.5 hover:bg-[#001529]/20 transition-colors">
|
|
|
|
|
|
<i class="fa fa-database w-5 text-center"></i>
|
|
|
|
|
|
<span class="ml-2">我的模型</span>
|
|
|
|
|
|
</a>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
<div class="nav-item-wrapper">
|
|
|
|
|
|
<a href="main.html?page=model-eval" data-page="model-eval" class="nav-link flex items-center px-4 py-2.5 hover:bg-[#001529]/20 transition-colors">
|
|
|
|
|
|
<i class="fa fa-line-chart w-5 text-center"></i>
|
|
|
|
|
|
<span class="ml-2">模型评测</span>
|
|
|
|
|
|
</a>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
<div class="nav-item-wrapper">
|
|
|
|
|
|
<a href="main.html?page=model-compare" data-page="model-compare" class="nav-link flex items-center px-4 py-2.5 hover:bg-[#001529]/20 transition-colors">
|
|
|
|
|
|
<i class="fa fa-server w-5 text-center"></i>
|
|
|
|
|
|
<span class="ml-2">模型对比</span>
|
|
|
|
|
|
</a>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
|
|
|
|
|
|
<!-- 第二分区:资源管理 -->
|
|
|
|
|
|
<div class="sidebar-section-title mt-6">资源管理</div>
|
|
|
|
|
|
<div class="nav-item-wrapper">
|
|
|
|
|
|
<a href="main.html?page=model-manage" data-page="model-manage" class="nav-link flex items-center px-4 py-2.5 hover:bg-[#001529]/20 transition-colors">
|
|
|
|
|
|
<i class="fa fa-cube w-5 text-center"></i>
|
|
|
|
|
|
<span class="ml-2">模型管理</span>
|
|
|
|
|
|
</a>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
<div class="nav-item-wrapper">
|
|
|
|
|
|
<a href="main.html?page=dataset-manage" data-page="dataset-manage" class="nav-link flex items-center px-4 py-2.5 hover:bg-[#001529]/20 transition-colors">
|
|
|
|
|
|
<i class="fa fa-file-text w-5 text-center"></i>
|
|
|
|
|
|
<span class="ml-2">数据集管理</span>
|
|
|
|
|
|
</a>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
<div class="nav-item-wrapper">
|
|
|
|
|
|
<a href="main.html?page=data-generate" data-page="data-generate" class="nav-link flex items-center px-4 py-2.5 hover:bg-[#001529]/20 transition-colors">
|
|
|
|
|
|
<i class="fa fa-database w-5 text-center"></i>
|
|
|
|
|
|
<span class="ml-2">其他工具</span>
|
|
|
|
|
|
</a>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
|
|
|
|
|
|
<!-- 第三分区:系统设置 -->
|
|
|
|
|
|
<div class="sidebar-section-title mt-6">系统设置</div>
|
|
|
|
|
|
<div class="nav-item-wrapper">
|
|
|
|
|
|
<a href="main.html?page=config" data-page="config" class="nav-link flex items-center px-4 py-2.5 hover:bg-[#001529]/20 transition-colors">
|
|
|
|
|
|
<i class="fa fa-bar-chart w-5 text-center"></i>
|
|
|
|
|
|
<span class="ml-2">平台性能</span>
|
|
|
|
|
|
</a>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
</nav>
|
|
|
|
|
|
|
|
|
|
|
|
<!-- 底部信息区域 -->
|
|
|
|
|
|
<div class="p-4 border-t border-[#001529]/30 text-xs mt-auto">
|
|
|
|
|
|
<div class="mb-2 text-[#bfcbd9]/80">默认业务空间</div>
|
|
|
|
|
|
<div class="flex items-center justify-between">
|
|
|
|
|
|
<span class="text-[#bfcbd9]">版本 v1.0.0</span>
|
|
|
|
|
|
<i class="fa fa-question-circle-o text-[#bfcbd9]/70"></i>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
</aside>
|
|
|
|
|
|
|
|
|
|
|
|
<!-- 主内容区 -->
|
|
|
|
|
|
<div class="flex-1 flex flex-col overflow-hidden">
|
|
|
|
|
|
<!-- 顶部导航 -->
|
|
|
|
|
|
<header class="bg-white border-b border-gray-200 shadow-sm">
|
|
|
|
|
|
<div class="flex items-center justify-between px-6 h-14">
|
|
|
|
|
|
<div class="flex items-center space-x-4">
|
|
|
|
|
|
<a href="#" onclick="goBack()" class="text-gray-500 hover:text-gray-700 flex items-center">
|
|
|
|
|
|
<i class="fa fa-arrow-left"></i>
|
|
|
|
|
|
<span class="ml-1">上一步</span>
|
|
|
|
|
|
</a>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
<div class="flex items-center space-x-4">
|
|
|
|
|
|
<div class="relative group">
|
|
|
|
|
|
<img src="https://picsum.photos/id/1005/32/32" class="w-8 h-8 rounded-full cursor-pointer" alt="用户头像">
|
|
|
|
|
|
<div class="absolute right-0 top-full mt-2 bg-white rounded shadow-lg py-1 hidden group-hover:block border border-gray-100 min-w-[140px]">
|
|
|
|
|
|
<a href="login.html" class="block px-4 py-2 text-sm text-gray-700 hover:bg-gray-50 whitespace-nowrap">
|
|
|
|
|
|
<i class="fa fa-sign-out mr-1"></i>退出登录
|
|
|
|
|
|
</a>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
</header>
|
|
|
|
|
|
|
|
|
|
|
|
<!-- 内容区域 -->
|
|
|
|
|
|
<main class="flex-1 overflow-y-auto p-6 bg-gray-50">
|
|
|
|
|
|
<!-- 页面标题 -->
|
|
|
|
|
|
<div class="bg-white rounded-lg shadow-sm w-full p-4 border-b border-gray-100 mb-4">
|
|
|
|
|
|
<div class="flex items-center text-sm">
|
|
|
|
|
|
<a href="main.html?page=model-eval" class="text-primary hover:underline">模型评测</a>
|
|
|
|
|
|
<span class="mx-2 text-gray-300">/</span>
|
|
|
|
|
|
<span class="text-gray-800 font-medium">添加维度</span>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
|
|
|
|
|
|
<!-- 表单内容 -->
|
|
|
|
|
|
<div class="bg-white rounded-lg shadow-sm w-full">
|
|
|
|
|
|
<div class="p-6">
|
|
|
|
|
|
<form id="dimensionForm">
|
|
|
|
|
|
<!-- 基本信息 -->
|
|
|
|
|
|
<div class="mb-6">
|
|
|
|
|
|
<h3 class="text-sm font-semibold text-gray-700 mb-4 pb-2 border-b border-gray-100">基本信息</h3>
|
|
|
|
|
|
<div class="space-y-4">
|
|
|
|
|
|
<div>
|
|
|
|
|
|
<label class="form-label">维度名称 <span class="text-red-500">*</span></label>
|
|
|
|
|
|
<input type="text" name="name" class="form-input" placeholder="请输入维度名称" maxlength="50" required>
|
|
|
|
|
|
<p class="text-xs text-gray-400 mt-1"><span id="nameCount">0</span> / 50</p>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
<div>
|
|
|
|
|
|
<label class="form-label">指标类型 <span class="text-red-500">*</span></label>
|
|
|
|
|
|
<select name="type" id="dimensionType" class="form-select" required>
|
|
|
|
|
|
<option value="">请选择指标类型</option>
|
|
|
|
|
|
<option value="classification">大模型评估-分类型</option>
|
|
|
|
|
|
<option value="metric">大模型评估-指标型</option>
|
2026-01-22 10:41:06 +08:00
|
|
|
|
<option value="text_similarity">规则评估-文本相似度</option>
|
2026-01-21 16:59:01 +08:00
|
|
|
|
</select>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
<div>
|
|
|
|
|
|
<label class="form-label">描述</label>
|
|
|
|
|
|
<textarea name="description" class="form-input" rows="3" placeholder="请输入维度描述" maxlength="200"></textarea>
|
|
|
|
|
|
<p class="text-xs text-gray-400 mt-1"><span id="descCount">0</span> / 200</p>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
|
|
|
|
|
|
<!-- 计算配置 -->
|
|
|
|
|
|
<div id="evalConfigSection" style="display: none; margin-bottom: 1.5rem;">
|
|
|
|
|
|
<h3 class="text-sm font-semibold text-gray-700 mb-4 pb-2 border-b border-gray-100">计算配置</h3>
|
|
|
|
|
|
<div class="space-y-4">
|
2026-01-22 10:41:06 +08:00
|
|
|
|
<div id="evalModelSection">
|
2026-01-21 16:59:01 +08:00
|
|
|
|
<label class="form-label">选择大模型 <span class="text-red-500">*</span></label>
|
|
|
|
|
|
<select name="eval_model" id="evalModel" class="form-select">
|
|
|
|
|
|
<option value="">请选择评估使用的大模型</option>
|
|
|
|
|
|
<option value="gpt-4">GPT-4</option>
|
|
|
|
|
|
<option value="claude-3">Claude-3</option>
|
|
|
|
|
|
<option value="ernie">文心一言</option>
|
|
|
|
|
|
<option value="qwen">通义千问</option>
|
|
|
|
|
|
<option value="chatglm">ChatGLM</option>
|
|
|
|
|
|
</select>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
<div>
|
|
|
|
|
|
<label class="form-label">评估方式 <span class="text-red-500">*</span></label>
|
2026-01-22 10:41:06 +08:00
|
|
|
|
<div class="grid grid-cols-2 md:grid-cols-3 xl:grid-cols-5 gap-3" id="evalMethodContainer">
|
2026-01-21 21:52:37 +08:00
|
|
|
|
<!-- 评估方式将通过JS动态生成 -->
|
2026-01-21 16:59:01 +08:00
|
|
|
|
</div>
|
|
|
|
|
|
</div>
|
2026-01-22 10:41:06 +08:00
|
|
|
|
<div id="evalPromptSection">
|
2026-01-21 21:52:37 +08:00
|
|
|
|
<label class="form-label flex items-center">
|
|
|
|
|
|
评估 Prompt <span class="text-red-500 ml-1">*</span>
|
|
|
|
|
|
</label>
|
|
|
|
|
|
<div class="markdown-editor-wrapper">
|
|
|
|
|
|
<!-- 隐藏域用于存储实际值 -->
|
|
|
|
|
|
<input type="hidden" name="eval_prompt" id="evalPromptValue">
|
|
|
|
|
|
<!-- 可编辑的 Markdown 编辑器 -->
|
|
|
|
|
|
<div id="evalPromptEditor" class="markdown-editor" contenteditable="true"
|
|
|
|
|
|
data-placeholder="请输入评估时使用的 Prompt"></div>
|
|
|
|
|
|
<button type="button" onclick="resetEvalPrompt()" class="absolute top-2 right-2 px-3 py-1 bg-gray-100 text-gray-600 text-xs rounded hover:bg-gray-200 transition-colors z-10">
|
|
|
|
|
|
恢复默认 Prompt
|
|
|
|
|
|
</button>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
<p class="text-xs text-gray-400 mt-1">定义大模型评估时使用的提示词,支持 Markdown 语法</p>
|
2026-01-21 16:59:01 +08:00
|
|
|
|
</div>
|
2026-01-22 10:41:06 +08:00
|
|
|
|
<!-- 规则评估特有配置(仅文本相似度类型显示) -->
|
|
|
|
|
|
<div id="textSimilarityConfig" style="display: none;">
|
|
|
|
|
|
<div class="grid grid-cols-2 gap-4">
|
|
|
|
|
|
<div>
|
|
|
|
|
|
<label class="form-label">BLEU n-gram 范围</label>
|
|
|
|
|
|
<select name="bleu_n" class="form-select mt-1">
|
|
|
|
|
|
<option value="1-4">1-4 (标准)</option>
|
|
|
|
|
|
<option value="1-3">1-3</option>
|
|
|
|
|
|
<option value="1-2">1-2</option>
|
|
|
|
|
|
<option value="2-4">2-4</option>
|
|
|
|
|
|
<option value="3-4">3-4</option>
|
|
|
|
|
|
<option value="4-4">仅 4</option>
|
|
|
|
|
|
</select>
|
|
|
|
|
|
<p class="text-xs text-gray-400 mt-1">计算精度时使用的 N-gram 范围</p>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
<div>
|
|
|
|
|
|
<label class="form-label">输出精度</label>
|
|
|
|
|
|
<select name="output_precision" class="form-select mt-1">
|
|
|
|
|
|
<option value="4">4 位小数 (0.0000)</option>
|
|
|
|
|
|
<option value="3" selected>3 位小数 (0.000)</option>
|
|
|
|
|
|
<option value="2">2 位小数 (0.00)</option>
|
|
|
|
|
|
<option value="1">1 位小数 (0.0)</option>
|
|
|
|
|
|
</select>
|
|
|
|
|
|
<p class="text-xs text-gray-400 mt-1">分数显示保留的小数位数</p>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
</div>
|
2026-01-21 16:59:01 +08:00
|
|
|
|
<!-- 评分范围和通过阈值(仅指标型显示) -->
|
|
|
|
|
|
<div id="scoreConfigSection" style="display: none;">
|
|
|
|
|
|
<div class="mb-6">
|
|
|
|
|
|
<label class="form-label">评分范围</label>
|
|
|
|
|
|
<div class="flex items-center space-x-2">
|
|
|
|
|
|
<div class="flex items-center">
|
|
|
|
|
|
<span class="text-sm text-gray-500 mr-2">最小分数</span>
|
|
|
|
|
|
<input type="number" name="score_min" id="scoreMin" class="form-input w-28" value="0" min="0" step="1">
|
|
|
|
|
|
</div>
|
|
|
|
|
|
<span class="text-gray-400">-</span>
|
|
|
|
|
|
<div class="flex items-center">
|
|
|
|
|
|
<span class="text-sm text-gray-500 mr-2">最大分数</span>
|
|
|
|
|
|
<input type="number" name="score_max" id="scoreMax" class="form-input w-28" value="5" min="0" step="1">
|
|
|
|
|
|
</div>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
<div>
|
|
|
|
|
|
<label class="form-label">通过阈值</label>
|
|
|
|
|
|
<div class="flex items-center space-x-3">
|
|
|
|
|
|
<input type="range" name="pass_threshold" id="passThreshold" class="slider-thumb w-64" min="0" max="5" step="0.5" value="3">
|
|
|
|
|
|
<input type="number" id="passThresholdValue" class="form-input w-20 text-center" min="0" max="5" step="0.5" value="3">
|
|
|
|
|
|
</div>
|
|
|
|
|
|
<p class="text-xs text-gray-400 mt-1">评分大于等于此值视为通过</p>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
|
|
|
|
|
|
<!-- 状态设置 -->
|
|
|
|
|
|
<div class="mb-6">
|
|
|
|
|
|
<h3 class="text-sm font-semibold text-gray-700 mb-4 pb-2 border-b border-gray-100">状态设置</h3>
|
|
|
|
|
|
<div class="space-y-4">
|
|
|
|
|
|
<div class="flex items-center">
|
|
|
|
|
|
<input type="checkbox" name="is_active" id="isActive" class="w-4 h-4 text-primary border-gray-300 rounded focus:ring-primary" checked>
|
|
|
|
|
|
<label for="isActive" class="ml-2 text-sm text-gray-700">启用该维度</label>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
<div class="flex items-center">
|
|
|
|
|
|
<input type="checkbox" name="is_default" id="isDefault" class="w-4 h-4 text-primary border-gray-300 rounded focus:ring-primary">
|
|
|
|
|
|
<label for="isDefault" class="ml-2 text-sm text-gray-700">设为默认评测维度</label>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
|
|
|
|
|
|
<!-- 底部按钮 -->
|
|
|
|
|
|
<div class="flex items-center justify-between pt-6 border-t border-gray-100">
|
|
|
|
|
|
<div class="flex items-center space-x-3">
|
|
|
|
|
|
<button type="button" onclick="submitForm()" class="px-4 py-2 bg-primary text-white rounded-lg text-sm hover:bg-primary/90 transition-colors">
|
|
|
|
|
|
保存维度
|
|
|
|
|
|
</button>
|
|
|
|
|
|
<button type="button" onclick="goBack()" class="px-4 py-2 bg-gray-200 text-gray-700 rounded-lg text-sm hover:bg-gray-300 transition-colors">
|
|
|
|
|
|
取消
|
|
|
|
|
|
</button>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
</form>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
</main>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
|
|
|
|
|
|
<script>
|
|
|
|
|
|
// 动态获取 API 基础地址
|
|
|
|
|
|
const getApiBase = () => {
|
|
|
|
|
|
const protocol = window.location.protocol;
|
|
|
|
|
|
const hostname = window.location.hostname;
|
|
|
|
|
|
return `${protocol}//${hostname}:8080/api`;
|
|
|
|
|
|
};
|
|
|
|
|
|
const API_BASE = getApiBase();
|
|
|
|
|
|
|
|
|
|
|
|
// 返回列表页
|
|
|
|
|
|
function goBack() {
|
|
|
|
|
|
window.location.href = 'main.html?page=model-eval';
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-01-21 21:52:37 +08:00
|
|
|
|
// 动态生成评估方式选项
|
|
|
|
|
|
function renderEvalMethods(type) {
|
|
|
|
|
|
const container = document.getElementById('evalMethodContainer');
|
|
|
|
|
|
if (!container) return;
|
|
|
|
|
|
|
|
|
|
|
|
const methods = EVAL_METHODS[type] || [];
|
|
|
|
|
|
const isMetric = type === 'metric';
|
|
|
|
|
|
const firstMethod = methods[0]?.value || 'standard';
|
2026-01-22 10:41:06 +08:00
|
|
|
|
// 规则评估不需要 Prompt
|
|
|
|
|
|
const isTextSimilarity = type === 'text_similarity';
|
2026-01-21 21:52:37 +08:00
|
|
|
|
|
|
|
|
|
|
container.innerHTML = methods.map((method, index) => `
|
2026-01-22 10:41:06 +08:00
|
|
|
|
<label class="checkbox-option ${index === 0 ? 'selected' : ''}" data-method="${method.value}">
|
|
|
|
|
|
<input type="checkbox" name="eval_method" value="${method.value}" ${index === 0 ? 'checked' : ''}>
|
2026-01-21 21:52:37 +08:00
|
|
|
|
<div>
|
|
|
|
|
|
<div class="font-medium text-gray-800">${method.name}</div>
|
|
|
|
|
|
<div class="text-xs text-gray-500">${method.desc}</div>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
</label>
|
|
|
|
|
|
`).join('');
|
|
|
|
|
|
|
2026-01-22 10:41:06 +08:00
|
|
|
|
// 绑定评估方式checkbox点击事件
|
|
|
|
|
|
container.querySelectorAll('.checkbox-option').forEach(option => {
|
|
|
|
|
|
option.addEventListener('click', function(e) {
|
|
|
|
|
|
const checkbox = this.querySelector('input[type="checkbox"]');
|
|
|
|
|
|
// 不手动切换状态,让浏览器自动处理,只更新样式
|
|
|
|
|
|
this.classList.toggle('selected', checkbox.checked);
|
|
|
|
|
|
// 更新默认 Prompt(仅大模型评估需要)
|
|
|
|
|
|
if (!isTextSimilarity) {
|
|
|
|
|
|
const selectedCheckbox = this.querySelector('input[type="checkbox"]:checked');
|
|
|
|
|
|
if (selectedCheckbox) {
|
|
|
|
|
|
const method = selectedCheckbox.value;
|
|
|
|
|
|
const promptEditor = document.getElementById('evalPromptEditor');
|
|
|
|
|
|
if (promptEditor && EVAL_METHOD_PROMPTS.hasOwnProperty(method)) {
|
|
|
|
|
|
promptEditor.innerText = EVAL_METHOD_PROMPTS[method];
|
|
|
|
|
|
updateMarkdownHighlight();
|
|
|
|
|
|
syncEditorContent();
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2026-01-21 21:52:37 +08:00
|
|
|
|
}
|
|
|
|
|
|
});
|
|
|
|
|
|
});
|
|
|
|
|
|
|
2026-01-22 10:41:06 +08:00
|
|
|
|
// 初始化默认 Prompt(仅大模型评估需要)
|
|
|
|
|
|
if (!isTextSimilarity) {
|
|
|
|
|
|
const promptEditor = document.getElementById('evalPromptEditor');
|
|
|
|
|
|
if (promptEditor && EVAL_METHOD_PROMPTS.hasOwnProperty(firstMethod)) {
|
|
|
|
|
|
promptEditor.innerText = EVAL_METHOD_PROMPTS[firstMethod];
|
|
|
|
|
|
updateMarkdownHighlight();
|
|
|
|
|
|
syncEditorContent();
|
|
|
|
|
|
}
|
2026-01-21 21:52:37 +08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-01-21 16:59:01 +08:00
|
|
|
|
// 显示/隐藏计算配置
|
|
|
|
|
|
function toggleEvalConfig() {
|
|
|
|
|
|
const type = document.getElementById('dimensionType').value;
|
|
|
|
|
|
const configSection = document.getElementById('evalConfigSection');
|
|
|
|
|
|
const scoreSection = document.getElementById('scoreConfigSection');
|
2026-01-22 10:41:06 +08:00
|
|
|
|
const evalModelSection = document.getElementById('evalModelSection');
|
|
|
|
|
|
const evalPromptSection = document.getElementById('evalPromptSection');
|
|
|
|
|
|
|
2026-01-21 16:59:01 +08:00
|
|
|
|
if (type === 'classification' || type === 'metric') {
|
|
|
|
|
|
configSection.style.display = 'block';
|
|
|
|
|
|
// 指标型才显示评分范围和通过阈值
|
|
|
|
|
|
scoreSection.style.display = (type === 'metric') ? 'block' : 'none';
|
2026-01-22 10:41:06 +08:00
|
|
|
|
// 显示评估模型和 Prompt
|
|
|
|
|
|
if (evalModelSection) evalModelSection.style.display = 'block';
|
|
|
|
|
|
if (evalPromptSection) evalPromptSection.style.display = 'block';
|
|
|
|
|
|
// 隐藏规则评估特有配置
|
|
|
|
|
|
const textSimConfig = document.getElementById('textSimilarityConfig');
|
|
|
|
|
|
if (textSimConfig) textSimConfig.style.display = 'none';
|
|
|
|
|
|
// 动态渲染评估方式
|
|
|
|
|
|
renderEvalMethods(type);
|
|
|
|
|
|
} else if (type === 'text_similarity') {
|
|
|
|
|
|
configSection.style.display = 'block';
|
|
|
|
|
|
// 规则评估不显示评分范围配置
|
|
|
|
|
|
scoreSection.style.display = 'none';
|
|
|
|
|
|
// 隐藏评估模型选择(规则评估不需要大模型)
|
|
|
|
|
|
if (evalModelSection) evalModelSection.style.display = 'none';
|
|
|
|
|
|
// 隐藏评估 Prompt(规则评估不需要 Prompt)
|
|
|
|
|
|
if (evalPromptSection) evalPromptSection.style.display = 'none';
|
|
|
|
|
|
// 显示规则评估特有配置
|
|
|
|
|
|
const textSimConfig = document.getElementById('textSimilarityConfig');
|
|
|
|
|
|
if (textSimConfig) textSimConfig.style.display = 'block';
|
2026-01-21 21:52:37 +08:00
|
|
|
|
// 动态渲染评估方式
|
|
|
|
|
|
renderEvalMethods(type);
|
2026-01-21 16:59:01 +08:00
|
|
|
|
} else {
|
|
|
|
|
|
configSection.style.display = 'none';
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-01-21 21:52:37 +08:00
|
|
|
|
// 恢复默认评估 Prompt
|
|
|
|
|
|
function resetEvalPrompt() {
|
|
|
|
|
|
const promptEditor = document.getElementById('evalPromptEditor');
|
2026-01-22 10:41:06 +08:00
|
|
|
|
const selectedMethod = document.querySelector('.checkbox-option.selected input[name="eval_method"]:checked, .radio-option.selected input[name="eval_method"]:checked');
|
2026-01-21 21:52:37 +08:00
|
|
|
|
const method = selectedMethod ? selectedMethod.value : 'standard';
|
2026-01-21 21:57:06 +08:00
|
|
|
|
if (promptEditor && EVAL_METHOD_PROMPTS.hasOwnProperty(method)) {
|
2026-01-21 21:52:37 +08:00
|
|
|
|
promptEditor.innerText = EVAL_METHOD_PROMPTS[method];
|
|
|
|
|
|
updateMarkdownHighlight();
|
|
|
|
|
|
syncEditorContent();
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Markdown 语法高亮处理
|
|
|
|
|
|
function escapeHtml(text) {
|
|
|
|
|
|
const div = document.createElement('div');
|
|
|
|
|
|
div.textContent = text;
|
|
|
|
|
|
return div.innerHTML;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 保存选区位置
|
|
|
|
|
|
let savedSelection = null;
|
|
|
|
|
|
|
|
|
|
|
|
function saveSelection() {
|
|
|
|
|
|
const editor = document.getElementById('evalPromptEditor');
|
|
|
|
|
|
if (!editor) return null;
|
|
|
|
|
|
const selection = window.getSelection();
|
|
|
|
|
|
if (selection.rangeCount > 0) {
|
|
|
|
|
|
return selection.getRangeAt(0).cloneRange();
|
|
|
|
|
|
}
|
|
|
|
|
|
return null;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
function restoreSelection(range) {
|
|
|
|
|
|
const editor = document.getElementById('evalPromptEditor');
|
|
|
|
|
|
if (!editor || !range) return;
|
|
|
|
|
|
const selection = window.getSelection();
|
|
|
|
|
|
selection.removeAllRanges();
|
|
|
|
|
|
selection.addRange(range);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 同步编辑器内容到隐藏域
|
|
|
|
|
|
function syncEditorContent() {
|
|
|
|
|
|
const editor = document.getElementById('evalPromptEditor');
|
|
|
|
|
|
const hiddenInput = document.getElementById('evalPromptValue');
|
|
|
|
|
|
if (editor && hiddenInput) {
|
|
|
|
|
|
hiddenInput.value = editor.innerText;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
function updateMarkdownHighlight() {
|
|
|
|
|
|
const editor = document.getElementById('evalPromptEditor');
|
|
|
|
|
|
if (!editor) return;
|
|
|
|
|
|
|
|
|
|
|
|
// 保存选区
|
|
|
|
|
|
savedSelection = saveSelection();
|
|
|
|
|
|
|
|
|
|
|
|
// 获取纯文本内容(按行处理以保留缩进)
|
|
|
|
|
|
const rawText = editor.innerText;
|
|
|
|
|
|
const lines = rawText.split('\n');
|
|
|
|
|
|
|
|
|
|
|
|
// 处理每一行
|
|
|
|
|
|
const processedLines = lines.map((line, lineIndex) => {
|
|
|
|
|
|
// 高亮 Markdown 语法
|
|
|
|
|
|
let highlighted = line;
|
|
|
|
|
|
|
|
|
|
|
|
// 先转义 HTML 特殊字符(防止 XSS)
|
|
|
|
|
|
highlighted = highlighted.replace(/&/g, '&')
|
|
|
|
|
|
.replace(/</g, '<')
|
|
|
|
|
|
.replace(/>/g, '>');
|
|
|
|
|
|
|
|
|
|
|
|
// 标题 (# 开头的行) - 行首可能有空格,不要匹配
|
|
|
|
|
|
highlighted = highlighted.replace(/^(#{1,6})\s+(.+)$/, '<span class="md-heading1">$1 $2</span>');
|
|
|
|
|
|
|
|
|
|
|
|
// 加粗 **text** 或 __text__
|
|
|
|
|
|
highlighted = highlighted.replace(/\*\*(.+?)\*\*/g, '<span class="md-bold">$1</span>');
|
|
|
|
|
|
highlighted = highlighted.replace(/__(.+?)__/g, '<span class="md-bold">$1</span>');
|
|
|
|
|
|
|
|
|
|
|
|
// 斜体 *text* 或 _text_
|
|
|
|
|
|
highlighted = highlighted.replace(/\*([^*]+)\*/g, '<span class="md-italic">$1</span>');
|
|
|
|
|
|
highlighted = highlighted.replace(/_([^_]+)_/g, '<span class="md-italic">$1</span>');
|
|
|
|
|
|
|
|
|
|
|
|
// 行内代码 `code`
|
|
|
|
|
|
highlighted = highlighted.replace(/`([^`]+)`/g, '<span class="md-code">$1</span>');
|
|
|
|
|
|
|
|
|
|
|
|
// 代码块 ```code```
|
|
|
|
|
|
highlighted = highlighted.replace(/```([\s\S]*?)```/g, '<span class="md-codeblock">$1</span>');
|
|
|
|
|
|
|
|
|
|
|
|
// 链接 [text](url)
|
|
|
|
|
|
highlighted = highlighted.replace(/\[([^\]]+)\]\(([^)]+)\)/g, '<span class="md-link">$1</span>');
|
|
|
|
|
|
|
|
|
|
|
|
// 列表项 - 或 *(保留行首空白用于缩进)
|
|
|
|
|
|
const listMatch = highlighted.match(/^(\s*)([-*])\s+(.+)$/);
|
|
|
|
|
|
if (listMatch) {
|
|
|
|
|
|
const indent = listMatch[1];
|
|
|
|
|
|
const marker = listMatch[2];
|
|
|
|
|
|
const text = listMatch[3];
|
|
|
|
|
|
const indentNbsp = indent.replace(/ /g, ' ');
|
|
|
|
|
|
highlighted = `<span class="md-list">${indentNbsp}${marker}</span> ${text}`;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 引用 > text
|
|
|
|
|
|
highlighted = highlighted.replace(/^>\s*(.+)$/, '<span class="md-quote">$1</span>');
|
|
|
|
|
|
|
|
|
|
|
|
// 如果行首有空格(非列表行),转换为 保留缩进
|
|
|
|
|
|
if (!highlighted.startsWith(' ') && !highlighted.startsWith('<span')) {
|
|
|
|
|
|
highlighted = highlighted.replace(/^(\s+)/, (match) => {
|
|
|
|
|
|
return match.replace(/ /g, ' ');
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return highlighted;
|
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
// 用 <br> 连接各行
|
|
|
|
|
|
const html = processedLines.join('<br>');
|
|
|
|
|
|
|
|
|
|
|
|
// 更新编辑器内容
|
|
|
|
|
|
editor.innerHTML = html;
|
|
|
|
|
|
|
|
|
|
|
|
// 恢复选区
|
|
|
|
|
|
restoreSelection(savedSelection);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-01-21 16:59:01 +08:00
|
|
|
|
// 评估方式对应的默认 Prompt
|
|
|
|
|
|
const EVAL_METHOD_PROMPTS = {
|
2026-01-21 21:52:37 +08:00
|
|
|
|
// 分类型:标准匹配
|
2026-01-21 16:59:01 +08:00
|
|
|
|
standard: `# 角色
|
2026-01-21 21:52:37 +08:00
|
|
|
|
你是一位专业的对话评估专家,擅长根据提供的标准对助理在对话中的最终反应进行评估,并确定其是否[[Pass]]或[[Fail]]。
|
2026-01-21 16:59:01 +08:00
|
|
|
|
|
|
|
|
|
|
## 技能
|
2026-01-21 21:52:37 +08:00
|
|
|
|
### 技能 1: 对话回顾
|
|
|
|
|
|
- 通读整个对话以理解上下文和背景信息。
|
|
|
|
|
|
- 确保全面理解对话的意图和用户的需求。
|
|
|
|
|
|
|
|
|
|
|
|
### 技能 2: 最终回答识别
|
|
|
|
|
|
- 从对话中准确识别出助理给出的最后一个回答。
|
|
|
|
|
|
- 确保关注的是最终的回答,而不是中间的部分。
|
|
|
|
|
|
|
|
|
|
|
|
### 技能 3: 标准应用
|
|
|
|
|
|
- 仔细审查每个评价标准。
|
|
|
|
|
|
- 将助理的最终回答与标准的各个方面进行详细比较。
|
|
|
|
|
|
|
|
|
|
|
|
### 技能 4: 逐步推理
|
|
|
|
|
|
- 记录每个标准以及最终响应如何满足或不满足该标准。
|
|
|
|
|
|
- 提供详细的证据,并解释为什么最终答复满足或不满足期望。
|
|
|
|
|
|
|
|
|
|
|
|
### 技能 5: 结果判定
|
|
|
|
|
|
- 根据逐步推理的结果,判断最终回答是[[Pass]]或[[Fail]]
|
|
|
|
|
|
- 提供明确的结论并解释理由。
|
|
|
|
|
|
|
|
|
|
|
|
## 输出格式
|
|
|
|
|
|
以下列格式提供结果:
|
|
|
|
|
|
- **分步推理:**[详细推理在这里]
|
|
|
|
|
|
- **最终结果:**[[Pass]]或[[Fail]]
|
2026-01-21 16:59:01 +08:00
|
|
|
|
|
|
|
|
|
|
## 限制
|
2026-01-21 21:52:37 +08:00
|
|
|
|
- 只针对对话中的最终回答进行评估。
|
|
|
|
|
|
- 在评估过程中,确保遵循提供的标准,避免主观判断。
|
|
|
|
|
|
- 如果标准含糊不清,尽最大努力解释并记录所做的假设。
|
|
|
|
|
|
- 保持评估过程的客观性和公正性。
|
2026-01-21 16:59:01 +08:00
|
|
|
|
|
2026-01-21 21:52:37 +08:00
|
|
|
|
# 示例
|
2026-01-21 16:59:01 +08:00
|
|
|
|
|
2026-01-21 21:52:37 +08:00
|
|
|
|
* *示例1:* *
|
|
|
|
|
|
|
|
|
|
|
|
- **对话:**
|
|
|
|
|
|
- 用户:"你能告诉我明天的天气吗?"
|
|
|
|
|
|
- 助手:"是的,预计今天是晴天,最高气温25°C。"
|
|
|
|
|
|
- 用户:"下午会下雨吗?"
|
|
|
|
|
|
- 助手:"不,预报说今天下午不会下雨。"
|
|
|
|
|
|
|
|
|
|
|
|
- **最后回应:**"不,今天下午预报没有雨。"
|
|
|
|
|
|
- **标准:**
|
|
|
|
|
|
1. 提供晴朗的天气预报。
|
|
|
|
|
|
2. 直接回答用户的问题。
|
|
|
|
|
|
|
|
|
|
|
|
- **分步推理:**
|
|
|
|
|
|
- 直接回答用户关于下午下雨的问题。
|
|
|
|
|
|
- 它提供了一个明确的预报,说不会下雨。
|
|
|
|
|
|
|
|
|
|
|
|
- **最终结果:**[[Pass]]
|
|
|
|
|
|
|
|
|
|
|
|
* *示例2:* *
|
|
|
|
|
|
|
|
|
|
|
|
- **对话:**
|
|
|
|
|
|
- 用户:"明天办公室什么时候开门?"
|
|
|
|
|
|
- 助手:"办公时间通常是上午9点到下午5点。"
|
|
|
|
|
|
|
|
|
|
|
|
- **最后回应:**"办公时间通常是上午9点到下午5点。"
|
|
|
|
|
|
- **标准:**
|
|
|
|
|
|
1. 明确写明明天的营业时间。
|
|
|
|
|
|
2. 避免含糊不清的信息。
|
|
|
|
|
|
|
|
|
|
|
|
- **分步推理:**
|
|
|
|
|
|
- 回答中没有说明明天的具体开放时间;它使用了"通常",这是模糊的。
|
|
|
|
|
|
- 不符合具体的标准。
|
|
|
|
|
|
|
|
|
|
|
|
- **最终结果:**[[Fail]]
|
|
|
|
|
|
|
|
|
|
|
|
# 注意
|
|
|
|
|
|
- 考虑在以前的评估中可能没有遇到的新的或更新的标准。
|
|
|
|
|
|
- 如果标准含糊不清,尽最大努力解释并记录所做的假设。
|
|
|
|
|
|
|
|
|
|
|
|
# 对话
|
|
|
|
|
|
\${prompt}
|
|
|
|
|
|
|
|
|
|
|
|
# 最终输出
|
|
|
|
|
|
\${output}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 标准
|
|
|
|
|
|
|
|
|
|
|
|
1: 评估正确性: 检查文本中的信息是否准确无误。确认事实、数据和引用的准确性。校验语法、拼写和标点符号的正确性。
|
|
|
|
|
|
2: 评估完整性:确保文本涵盖了所有必要的信息。检查是否有遗漏的关键内容或细节。确认文本是否完整地回答了问题或满足了需求。
|
|
|
|
|
|
3.评估流畅性:评价文本的阅读体验。检查句子结构是否合理,段落是否连贯。确保文本易于理解,没有冗余或重复的部分。
|
|
|
|
|
|
4.评估逻辑性:检查文本的逻辑结构和推理过程。确认论点和论证之间的逻辑关系。检查是否存在逻辑漏洞或不一致之处。
|
|
|
|
|
|
5: 评估相关性:确保文本与主题或目标紧密相关。检查内容是否紧扣主题,没有偏离。确认文本中的信息是否对用户的需求有实际帮助。
|
|
|
|
|
|
6: 评估安全性:检查文本中是否存在潜在的安全风险。确认文本中没有包含敏感信息或个人隐私。检查是否存在可能引发法律或道德问题的内容。`,
|
|
|
|
|
|
// 分类型:情感分析
|
2026-01-21 16:59:01 +08:00
|
|
|
|
sentiment: `# 角色
|
|
|
|
|
|
你是一位情感分析专家,擅长识别和评估文本中的情感基调。你能够通过分析用户输入的信息内容和上下文,确定其语气是消极的、中性的还是积极的。
|
|
|
|
|
|
|
|
|
|
|
|
## 技能
|
|
|
|
|
|
### 技能1:情感关键字识别
|
|
|
|
|
|
- 识别表明情感或情绪的关键字或短语。
|
|
|
|
|
|
- 注意任何可能影响情绪基调的上下文线索。
|
|
|
|
|
|
|
|
|
|
|
|
### 技能2:详细推理
|
|
|
|
|
|
- 清楚地说明信息中的证据。
|
|
|
|
|
|
- 解释为什么每个证据都有助于结论。
|
|
|
|
|
|
- 确保推理彻底,以验证结论的正确性。
|
|
|
|
|
|
|
|
|
|
|
|
### 技能3:整体语境评估
|
|
|
|
|
|
- 考虑整体语境和用词来评估情绪。
|
|
|
|
|
|
- 将信息的情绪语气分为以下几个等级:消极、中性或积极。
|
|
|
|
|
|
|
|
|
|
|
|
## 限制
|
|
|
|
|
|
- 仅基于提供的文本内容进行情感分析。
|
|
|
|
|
|
- 不引入个人偏见或主观判断。
|
|
|
|
|
|
- 确保推理过程详细且逻辑严密。
|
|
|
|
|
|
|
|
|
|
|
|
## 输出格式
|
|
|
|
|
|
- 推理:[详细推理在这里]
|
|
|
|
|
|
- 结果:"消极"、"中性"或"积极"
|
|
|
|
|
|
|
|
|
|
|
|
## 示例
|
|
|
|
|
|
**输入:**"我对服务不满意。"
|
2026-01-21 21:52:37 +08:00
|
|
|
|
* *输出:*
|
2026-01-21 16:59:01 +08:00
|
|
|
|
- 推理:"不开心"这个短语表示不满。在这条信息中没有任何积极的元素,而且上下文明确暗示了一种消极的体验。
|
|
|
|
|
|
- 结果:消极
|
|
|
|
|
|
|
|
|
|
|
|
**输入:**"这顿饭还行,没什么特别的,但也不错。"
|
2026-01-21 21:52:37 +08:00
|
|
|
|
* *输出:*
|
2026-01-21 16:59:01 +08:00
|
|
|
|
- 推理:"还行"这个词表示一种中性的感觉。像"没什么特别的"和"还不错"这样的短语既没有强烈的积极情绪,也没有强烈的消极情绪。
|
|
|
|
|
|
- 结果:中性
|
|
|
|
|
|
|
|
|
|
|
|
**输入:**"我在活动中度过了一段美好的时光!"
|
2026-01-21 21:52:37 +08:00
|
|
|
|
* *输出:*
|
2026-01-21 16:59:01 +08:00
|
|
|
|
- 推理:"美好"这个词是一个强有力的积极指标。上下文暗示了一次愉快的经历。
|
|
|
|
|
|
- 结果:积极
|
|
|
|
|
|
|
|
|
|
|
|
## 对话
|
|
|
|
|
|
输入: \${prompt}
|
|
|
|
|
|
输出: \${output}`,
|
2026-01-21 21:52:37 +08:00
|
|
|
|
// 数值型:综合评测
|
|
|
|
|
|
metric_standard: `# 角色
|
|
|
|
|
|
你是一位专业的对话评估专家,擅长在1到5的范围内评估助理在对话中的最终反应。你的评估基于详细的思维链推理,确保逐步推理和透明度。
|
|
|
|
|
|
|
|
|
|
|
|
## 技能
|
|
|
|
|
|
### 技能 1: 对话回顾
|
|
|
|
|
|
- 通读整个对话以理解上下文和背景信息。
|
|
|
|
|
|
- 确保全面理解对话的意图和用户的需求。
|
|
|
|
|
|
|
|
|
|
|
|
### 技能 2: 最终回答识别
|
|
|
|
|
|
- 从对话中准确识别出助理给出的最后一个回答。
|
|
|
|
|
|
- 确保关注的是最终的回答,而不是中间的部分。
|
|
|
|
|
|
|
|
|
|
|
|
### 技能 3: 标准应用
|
|
|
|
|
|
- 仔细审查每个评价标准。
|
|
|
|
|
|
- 将助理的最终回答与标准的各个方面进行详细比较。
|
|
|
|
|
|
|
|
|
|
|
|
### 技能 4: 逐步推理
|
|
|
|
|
|
- 记录每个标准以及最终响应如何满足或不满足该标准。
|
|
|
|
|
|
- 提供详细的证据,并解释为什么最终答复满足或不满足期望。
|
|
|
|
|
|
|
|
|
|
|
|
### 技能 5: 结果评分
|
|
|
|
|
|
- 根据逐步推理的结果,在1-5分范围内给出评分。
|
|
|
|
|
|
- 5分:完全满足所有标准
|
|
|
|
|
|
- 1分:完全不满足标准
|
|
|
|
|
|
- 提供明确的结论并解释理由。
|
|
|
|
|
|
|
|
|
|
|
|
## 输出格式
|
|
|
|
|
|
- **分步推理:**[详细推理在这里]
|
|
|
|
|
|
- **最终分数:**仅输出1-5之间的数字
|
|
|
|
|
|
|
|
|
|
|
|
## 限制
|
|
|
|
|
|
- 只针对对话中的最终回答进行评估。
|
|
|
|
|
|
- 在评估过程中,确保遵循提供的标准,避免主观判断。
|
|
|
|
|
|
- 如果标准含糊不清,尽最大努力解释并记录所做的假设。
|
|
|
|
|
|
- 保持评估过程的客观性和公正性。
|
|
|
|
|
|
|
|
|
|
|
|
# 对话
|
|
|
|
|
|
\${prompt}
|
|
|
|
|
|
|
|
|
|
|
|
# 最终输出
|
|
|
|
|
|
\${output}
|
|
|
|
|
|
|
|
|
|
|
|
# 标准
|
|
|
|
|
|
|
|
|
|
|
|
1. 评估正确性: 检查文本中的信息是否准确无误。确认事实、数据和引用的准确性。
|
|
|
|
|
|
2. 评估完整性: 确保文本涵盖了所有必要的信息。
|
|
|
|
|
|
3. 评估流畅性: 评价文本的阅读体验,检查句子结构是否合理。
|
|
|
|
|
|
4. 评估逻辑性: 检查文本的逻辑结构和推理过程。
|
|
|
|
|
|
5. 评估相关性: 确保文本与主题或目标紧密相关。`,
|
|
|
|
|
|
// 数值型:语义相似度
|
2026-01-21 16:59:01 +08:00
|
|
|
|
semantic: `# 角色
|
|
|
|
|
|
你是一名专业的评估专家,擅长在1到5的范围内评估给定输出与基本事实之间的相似程度。你的评估基于详细的思维链推理,确保逐步推理和透明度。
|
|
|
|
|
|
|
|
|
|
|
|
## 技能
|
|
|
|
|
|
### 技能 1: 识别关键要素
|
2026-01-21 21:52:37 +08:00
|
|
|
|
- 识别并列出在输出和基本事实中存在的关键要素。
|
|
|
|
|
|
- 确定句子中的主要信息和核心概念。
|
2026-01-21 16:59:01 +08:00
|
|
|
|
|
|
|
|
|
|
### 技能 2: 比较关键要素
|
2026-01-21 21:52:37 +08:00
|
|
|
|
- 比较这些关键元素,从内容和结构两方面来评估它们的异同。
|
|
|
|
|
|
- 分析句子的内容,包括描述的对象、动作和属性。
|
2026-01-21 16:59:01 +08:00
|
|
|
|
|
|
|
|
|
|
### 技能 3: 语义分析
|
2026-01-21 21:52:37 +08:00
|
|
|
|
- 分析输出和真实值所传达的语义,注意任何显著的偏差。
|
|
|
|
|
|
- 评估句子的整体意义和意图。
|
2026-01-21 16:59:01 +08:00
|
|
|
|
|
|
|
|
|
|
### 技能 4: 相似度分类
|
2026-01-21 21:52:37 +08:00
|
|
|
|
- 基于这些比较,根据定义的标准对相似程度进行分类:
|
|
|
|
|
|
- 5:高度相似-输出值和实际值几乎相同
|
|
|
|
|
|
- 4:有些相似-输出在很大程度上与ground truth相似
|
|
|
|
|
|
- 3:适度相似-有一些明显的差异,但核心本质是在输出中捕获的
|
|
|
|
|
|
- 2:稍微相似-输出只捕获地面真相的几个元素
|
|
|
|
|
|
- 1:不相似-输出与地面真实值明显不同
|
2026-01-21 16:59:01 +08:00
|
|
|
|
|
|
|
|
|
|
### 技能 5: 评分解释
|
2026-01-21 21:52:37 +08:00
|
|
|
|
- 写出为什么选择一个特定分数的原因,确保透明度和正确性。
|
2026-01-21 16:59:01 +08:00
|
|
|
|
|
2026-01-21 21:52:37 +08:00
|
|
|
|
## 输出格式
|
|
|
|
|
|
- 推理:[详细推理在这里]
|
|
|
|
|
|
- 分数:仅输出1-5之间的数字,不要有其他内容
|
2026-01-21 16:59:01 +08:00
|
|
|
|
|
|
|
|
|
|
## 对话
|
|
|
|
|
|
输入:\${prompt}
|
|
|
|
|
|
模型回答:\${output}
|
|
|
|
|
|
参考答案:\${completion}`,
|
2026-01-21 21:52:37 +08:00
|
|
|
|
// 自定义评分器
|
2026-01-21 16:59:01 +08:00
|
|
|
|
custom: ``
|
|
|
|
|
|
};
|
|
|
|
|
|
|
2026-01-21 21:52:37 +08:00
|
|
|
|
// 不同指标类型的评估方式配置
|
|
|
|
|
|
const EVAL_METHODS = {
|
|
|
|
|
|
classification: [
|
|
|
|
|
|
{ value: 'standard', name: '标准匹配', desc: '根据预设标准评估' },
|
|
|
|
|
|
{ value: 'sentiment', name: '情感分析', desc: '识别文本情感倾向' },
|
|
|
|
|
|
{ value: 'custom', name: '自定义评分器', desc: '自定义评估规则' }
|
|
|
|
|
|
],
|
|
|
|
|
|
metric: [
|
|
|
|
|
|
{ value: 'metric_standard', name: '综合评测', desc: '根据预设标准评分' },
|
|
|
|
|
|
{ value: 'semantic', name: '语义相似度', desc: '计算语义相似程度' },
|
|
|
|
|
|
{ value: 'custom', name: '自定义评分器', desc: '自定义评估规则' }
|
2026-01-22 10:41:06 +08:00
|
|
|
|
],
|
|
|
|
|
|
text_similarity: [
|
|
|
|
|
|
{ value: 'bleu_4', name: 'BLEU-4', desc: 'N-gram 精确度评估' },
|
|
|
|
|
|
{ value: 'cosine', name: 'Cosine', desc: '余弦相似度' },
|
|
|
|
|
|
{ value: 'rouge_1', name: 'ROUGE-1', desc: 'unigram 重叠评估' },
|
|
|
|
|
|
{ value: 'rouge_2', name: 'ROUGE-2', desc: 'bigram 重叠评估' },
|
|
|
|
|
|
{ value: 'rouge_4', name: 'ROUGE-4', desc: '4-gram 重叠评估' }
|
2026-01-21 21:52:37 +08:00
|
|
|
|
]
|
|
|
|
|
|
};
|
|
|
|
|
|
|
2026-01-21 16:59:01 +08:00
|
|
|
|
// 初始化函数
|
|
|
|
|
|
function initPage() {
|
|
|
|
|
|
// 绑定指标类型下拉框事件
|
|
|
|
|
|
const dimensionType = document.getElementById('dimensionType');
|
|
|
|
|
|
if (dimensionType) {
|
|
|
|
|
|
dimensionType.addEventListener('change', toggleEvalConfig);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-01-21 21:52:37 +08:00
|
|
|
|
// 绑定 Markdown 编辑器事件
|
|
|
|
|
|
const evalPromptEditor = document.getElementById('evalPromptEditor');
|
|
|
|
|
|
if (evalPromptEditor) {
|
|
|
|
|
|
// 输入事件 - 只同步内容到隐藏域,不更新高亮(避免光标跳动)
|
|
|
|
|
|
evalPromptEditor.addEventListener('input', function() {
|
|
|
|
|
|
syncEditorContent();
|
|
|
|
|
|
});
|
2026-01-21 16:59:01 +08:00
|
|
|
|
|
2026-01-21 21:52:37 +08:00
|
|
|
|
// 失焦时更新高亮(用户停止编辑后才高亮)
|
|
|
|
|
|
evalPromptEditor.addEventListener('blur', function() {
|
|
|
|
|
|
updateMarkdownHighlight();
|
2026-01-21 16:59:01 +08:00
|
|
|
|
});
|
2026-01-21 21:52:37 +08:00
|
|
|
|
|
|
|
|
|
|
// 初始化内容
|
|
|
|
|
|
updateMarkdownHighlight();
|
|
|
|
|
|
syncEditorContent();
|
|
|
|
|
|
}
|
2026-01-21 16:59:01 +08:00
|
|
|
|
|
|
|
|
|
|
// 绑定通过阈值滑块和输入框事件
|
|
|
|
|
|
const passThresholdSlider = document.getElementById('passThreshold');
|
|
|
|
|
|
const passThresholdValue = document.getElementById('passThresholdValue');
|
|
|
|
|
|
|
|
|
|
|
|
// 更新滑块背景进度颜色
|
|
|
|
|
|
function updateSliderBackground(slider) {
|
|
|
|
|
|
const min = parseFloat(slider.min) || 0;
|
|
|
|
|
|
const max = parseFloat(slider.max) || 5;
|
|
|
|
|
|
const value = parseFloat(slider.value) || 0;
|
|
|
|
|
|
const percent = ((value - min) / (max - min)) * 100;
|
|
|
|
|
|
slider.style.backgroundSize = percent + '% 100%';
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (passThresholdSlider && passThresholdValue) {
|
|
|
|
|
|
// 初始化滑块背景
|
|
|
|
|
|
updateSliderBackground(passThresholdSlider);
|
|
|
|
|
|
|
|
|
|
|
|
// 滑块拖动时更新输入框
|
|
|
|
|
|
passThresholdSlider.addEventListener('input', function() {
|
|
|
|
|
|
passThresholdValue.value = parseFloat(this.value);
|
|
|
|
|
|
updateSliderBackground(this);
|
|
|
|
|
|
});
|
|
|
|
|
|
// 输入框输入时更新滑块
|
|
|
|
|
|
passThresholdValue.addEventListener('input', function() {
|
|
|
|
|
|
let val = parseFloat(this.value);
|
|
|
|
|
|
if (isNaN(val)) val = 0;
|
|
|
|
|
|
const maxVal = parseFloat(document.getElementById('scoreMax').value) || 5;
|
|
|
|
|
|
if (val > maxVal) val = maxVal;
|
|
|
|
|
|
if (val < 0) val = 0;
|
|
|
|
|
|
passThresholdSlider.value = val;
|
|
|
|
|
|
updateSliderBackground(passThresholdSlider);
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 绑定评分范围输入框事件(同步滑块最大值)
|
|
|
|
|
|
const scoreMaxInput = document.getElementById('scoreMax');
|
|
|
|
|
|
if (scoreMaxInput) {
|
|
|
|
|
|
scoreMaxInput.addEventListener('input', function() {
|
|
|
|
|
|
const maxVal = parseFloat(this.value) || 5;
|
|
|
|
|
|
passThresholdSlider.max = maxVal;
|
|
|
|
|
|
if (parseFloat(passThresholdSlider.value) > maxVal) {
|
|
|
|
|
|
passThresholdSlider.value = maxVal;
|
|
|
|
|
|
passThresholdValue.value = maxVal.toFixed(1);
|
|
|
|
|
|
}
|
|
|
|
|
|
updateSliderBackground(passThresholdSlider);
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 绑定侧边栏导航点击事件
|
|
|
|
|
|
document.querySelectorAll('.nav-link').forEach(link => {
|
|
|
|
|
|
link.addEventListener('click', function(e) {
|
|
|
|
|
|
if (!this.href.includes('model-dimension-create')) {
|
|
|
|
|
|
e.preventDefault();
|
|
|
|
|
|
window.location.href = this.href;
|
|
|
|
|
|
}
|
|
|
|
|
|
});
|
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
// 绑定输入事件
|
|
|
|
|
|
const nameInput = document.querySelector('input[name="name"]');
|
|
|
|
|
|
const descInput = document.querySelector('textarea[name="description"]');
|
|
|
|
|
|
|
|
|
|
|
|
if (nameInput) {
|
|
|
|
|
|
nameInput.addEventListener('input', () => {
|
|
|
|
|
|
document.getElementById('nameCount').textContent = nameInput.value.length;
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (descInput) {
|
|
|
|
|
|
descInput.addEventListener('input', () => {
|
|
|
|
|
|
document.getElementById('descCount').textContent = descInput.value.length;
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 设置侧边栏当前页高亮
|
|
|
|
|
|
const currentPage = 'model-eval';
|
|
|
|
|
|
document.querySelectorAll('.nav-link').forEach(link => {
|
|
|
|
|
|
if (link.dataset.page === currentPage) {
|
|
|
|
|
|
link.classList.add('bg-[#1890ff]/10', 'text-[#1890ff]');
|
|
|
|
|
|
link.classList.remove('hover:bg-[#001529]/20', 'transition-colors');
|
|
|
|
|
|
}
|
|
|
|
|
|
});
|
|
|
|
|
|
updateSidebarSlider();
|
|
|
|
|
|
// 初始化计算配置的显示状态
|
|
|
|
|
|
toggleEvalConfig();
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 页面加载完成后初始化
|
|
|
|
|
|
if (document.readyState === 'loading') {
|
|
|
|
|
|
document.addEventListener('DOMContentLoaded', initPage);
|
|
|
|
|
|
} else {
|
|
|
|
|
|
initPage();
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 更新侧边栏滑块位置
|
|
|
|
|
|
function updateSidebarSlider() {
|
|
|
|
|
|
const slider = document.getElementById('sidebar-slider');
|
|
|
|
|
|
if (!slider) return;
|
|
|
|
|
|
const activeLink = document.querySelector('.nav-link.bg-\\[\\#1890ff\\]\\/10');
|
|
|
|
|
|
if (activeLink) {
|
|
|
|
|
|
const wrapper = activeLink.closest('.nav-item-wrapper');
|
|
|
|
|
|
if (wrapper) {
|
|
|
|
|
|
slider.style.top = wrapper.offsetTop + 'px';
|
|
|
|
|
|
slider.style.height = wrapper.offsetHeight + 'px';
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 提交表单
|
|
|
|
|
|
async function submitForm() {
|
|
|
|
|
|
const form = document.getElementById('dimensionForm');
|
|
|
|
|
|
const formData = new FormData(form);
|
|
|
|
|
|
|
|
|
|
|
|
const name = formData.get('name').trim();
|
|
|
|
|
|
const type = formData.get('type').trim();
|
|
|
|
|
|
|
|
|
|
|
|
if (!name) {
|
|
|
|
|
|
alert('请输入维度名称');
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (!type) {
|
|
|
|
|
|
alert('请选择指标类型');
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 检查计算配置是否完整
|
|
|
|
|
|
if (type === 'classification' || type === 'metric') {
|
|
|
|
|
|
const evalModel = formData.get('eval_model').trim();
|
|
|
|
|
|
const evalPrompt = formData.get('eval_prompt').trim();
|
|
|
|
|
|
const evalMethod = formData.get('eval_method');
|
|
|
|
|
|
|
|
|
|
|
|
if (!evalModel) {
|
|
|
|
|
|
alert('请选择评估使用的大模型');
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
if (!evalPrompt) {
|
|
|
|
|
|
alert('请输入评估 Prompt');
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
2026-01-22 10:41:06 +08:00
|
|
|
|
if (!evalMethod) {
|
|
|
|
|
|
alert('请选择评估方式');
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
} else if (type === 'text_similarity') {
|
|
|
|
|
|
// 规则评估:获取所有选中的评估方式
|
|
|
|
|
|
const checkedMethods = Array.from(document.querySelectorAll('input[name="eval_method"]:checked')).map(cb => cb.value);
|
|
|
|
|
|
if (checkedMethods.length === 0) {
|
|
|
|
|
|
alert('请至少选择一个评估方式');
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 获取评估方式(支持多选)
|
|
|
|
|
|
let evalMethod;
|
|
|
|
|
|
if (type === 'text_similarity') {
|
|
|
|
|
|
evalMethod = Array.from(document.querySelectorAll('input[name="eval_method"]:checked')).map(cb => cb.value);
|
|
|
|
|
|
} else {
|
|
|
|
|
|
evalMethod = formData.get('eval_method');
|
2026-01-21 16:59:01 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
const data = {
|
|
|
|
|
|
name: name,
|
|
|
|
|
|
type: type,
|
|
|
|
|
|
description: formData.get('description').trim(),
|
2026-01-22 10:41:06 +08:00
|
|
|
|
eval_model: type === 'text_similarity' ? null : formData.get('eval_model'),
|
|
|
|
|
|
eval_method: evalMethod,
|
|
|
|
|
|
eval_prompt: type === 'text_similarity' ? null : formData.get('eval_prompt').trim(),
|
2026-01-21 16:59:01 +08:00
|
|
|
|
is_active: formData.get('is_active') === 'on',
|
|
|
|
|
|
is_default: formData.get('is_default') === 'on',
|
|
|
|
|
|
create_time: new Date().toLocaleString('zh-CN', { hour12: false }).replace(/\//g, '-')
|
|
|
|
|
|
};
|
|
|
|
|
|
|
2026-01-22 10:41:06 +08:00
|
|
|
|
// 规则评估特有配置
|
|
|
|
|
|
if (type === 'text_similarity') {
|
|
|
|
|
|
data.bleu_n = formData.get('bleu_n');
|
|
|
|
|
|
data.output_precision = parseInt(formData.get('output_precision') || '3');
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-01-21 16:59:01 +08:00
|
|
|
|
// 指标型添加评分范围和通过阈值
|
|
|
|
|
|
if (type === 'metric') {
|
|
|
|
|
|
data.score_min = parseFloat(formData.get('score_min')) || 0;
|
|
|
|
|
|
data.score_max = parseFloat(formData.get('score_max')) || 5;
|
|
|
|
|
|
data.pass_threshold = parseFloat(formData.get('pass_threshold')) || 3;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
|
const response = await fetch(`${API_BASE}/model-eval/dimension`, {
|
|
|
|
|
|
method: 'POST',
|
|
|
|
|
|
headers: { 'Content-Type': 'application/json' },
|
|
|
|
|
|
body: JSON.stringify(data)
|
|
|
|
|
|
});
|
|
|
|
|
|
const result = await response.json();
|
|
|
|
|
|
if (result.code === 0) {
|
|
|
|
|
|
alert('评测维度创建成功!');
|
|
|
|
|
|
goBack();
|
|
|
|
|
|
} else {
|
|
|
|
|
|
alert('创建失败: ' + (result.message || '未知错误'));
|
|
|
|
|
|
}
|
|
|
|
|
|
} catch (error) {
|
|
|
|
|
|
alert('创建失败: ' + error.message);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
</script>
|
|
|
|
|
|
</body>
|
|
|
|
|
|
</html>
|