前端重构: - 删除旧的大体积 Vue 组件(HomeView, FileManage, TextSplit 等) - 删除旧的 composables(useFormatters, useModels, useProjects) - 新增 core/, page-logic/, pages/, shared/ 模块化目录结构 - 提取 CSS 到 styles/pages/ 目录 - 添加全局样式 variables.css 和 common.css 后端 API 更新: - chunks: 语义分割 API 增强 - files: 文件处理 API 更新 - models: 模型管理 API 更新 - questions: 问答管理 API 更新 - database: 数据库连接优化 - semantic_embedding: 语义嵌入服务优化 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
165 lines
6.8 KiB
Python
165 lines
6.8 KiB
Python
"""
|
|
Database Models for YG-Dataset
|
|
"""
|
|
from sqlalchemy import Column, String, Text, Integer, BigInteger, ForeignKey, JSON
|
|
from sqlalchemy.dialects.postgresql import UUID
|
|
from sqlalchemy.orm import relationship
|
|
from app.core.database import Base
|
|
from app.models.base import UUIDMixin, TimestampMixin
|
|
|
|
|
|
class Project(Base, UUIDMixin, TimestampMixin):
|
|
"""Project model"""
|
|
__tablename__ = "projects"
|
|
|
|
name = Column(String(255), nullable=False)
|
|
description = Column(Text)
|
|
type = Column(String(50), default="qa") # qa, table, database
|
|
|
|
# Relationships
|
|
files = relationship("File", back_populates="project", cascade="all, delete-orphan")
|
|
chunks = relationship("Chunk", back_populates="project", cascade="all, delete-orphan")
|
|
tags = relationship("Tag", back_populates="project", cascade="all, delete-orphan")
|
|
datasets = relationship("Dataset", back_populates="project", cascade="all, delete-orphan")
|
|
eval_datasets = relationship("EvalDataset", back_populates="project", cascade="all, delete-orphan")
|
|
model_configs = relationship("ModelConfig", back_populates="project", cascade="all, delete-orphan")
|
|
tasks = relationship("Task", back_populates="project", cascade="all, delete-orphan")
|
|
|
|
|
|
class File(Base, UUIDMixin, TimestampMixin):
|
|
"""File model for uploaded documents"""
|
|
__tablename__ = "files"
|
|
|
|
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=False)
|
|
filename = Column(String(255), nullable=False)
|
|
file_type = Column(String(50), nullable=False) # pdf, docx, xlsx, csv, epub, md, txt
|
|
file_path = Column(String(500))
|
|
size = Column(BigInteger) # file size in bytes
|
|
status = Column(String(20), default="pending") # pending, processing, completed, failed
|
|
|
|
# Relationships
|
|
project = relationship("Project", back_populates="files")
|
|
chunks = relationship("Chunk", back_populates="file", cascade="all, delete-orphan")
|
|
|
|
|
|
class Chunk(Base, UUIDMixin, TimestampMixin):
|
|
"""Text chunk model after splitting"""
|
|
__tablename__ = "chunks"
|
|
|
|
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=False)
|
|
file_id = Column(UUID(as_uuid=True), ForeignKey("files.id", ondelete="CASCADE"))
|
|
name = Column(String(255))
|
|
content = Column(Text, nullable=False)
|
|
summary = Column(Text)
|
|
word_count = Column(Integer)
|
|
extra_data = Column(JSON) # store additional info like headings, page numbers
|
|
|
|
# Relationships
|
|
project = relationship("Project", back_populates="chunks")
|
|
file = relationship("File", back_populates="chunks")
|
|
questions = relationship("Question", back_populates="chunk", cascade="all, delete-orphan")
|
|
chunk_tags = relationship("ChunkTag", back_populates="chunk", cascade="all, delete-orphan")
|
|
|
|
|
|
class Tag(Base, UUIDMixin, TimestampMixin):
|
|
"""Tag/Label model for categorizing content"""
|
|
__tablename__ = "tags"
|
|
|
|
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=False)
|
|
label = Column(String(255), nullable=False)
|
|
parent_id = Column(UUID(as_uuid=True), ForeignKey("tags.id", ondelete="CASCADE"))
|
|
color = Column(String(20)) # hex color code
|
|
|
|
# Relationships
|
|
project = relationship("Project", back_populates="tags")
|
|
parent = relationship("Tag", remote_side="Tag.id", back_populates="children")
|
|
children = relationship("Tag", back_populates="parent")
|
|
chunk_tags = relationship("ChunkTag", back_populates="tag")
|
|
|
|
|
|
class ChunkTag(Base, UUIDMixin):
|
|
"""Many-to-many relationship between chunks and tags"""
|
|
__tablename__ = "chunk_tags"
|
|
|
|
chunk_id = Column(UUID(as_uuid=True), ForeignKey("chunks.id", ondelete="CASCADE"), nullable=False)
|
|
tag_id = Column(UUID(as_uuid=True), ForeignKey("tags.id", ondelete="CASCADE"), nullable=False)
|
|
|
|
# Relationships
|
|
chunk = relationship("Chunk", back_populates="chunk_tags")
|
|
tag = relationship("Tag", back_populates="chunk_tags")
|
|
|
|
|
|
class Question(Base, UUIDMixin, TimestampMixin):
|
|
"""Question/QA pair model"""
|
|
__tablename__ = "questions"
|
|
|
|
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=False)
|
|
chunk_id = Column(UUID(as_uuid=True), ForeignKey("chunks.id", ondelete="CASCADE"))
|
|
content = Column(Text, nullable=False) # question content
|
|
answer = Column(Text) # answer content
|
|
question_type = Column(String(50)) # fact, summary, reasoning, etc.
|
|
source = Column(String(50), default="manual") # manual, generated
|
|
|
|
# Relationships
|
|
project = relationship("Project")
|
|
chunk = relationship("Chunk", back_populates="questions")
|
|
|
|
|
|
class Dataset(Base, UUIDMixin, TimestampMixin):
|
|
"""Dataset model"""
|
|
__tablename__ = "datasets"
|
|
|
|
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=False)
|
|
name = Column(String(255), nullable=False)
|
|
description = Column(Text)
|
|
dataset_type = Column(String(50)) # qa, conversation, instruction
|
|
extra_data = Column(JSON)
|
|
|
|
# Relationships
|
|
project = relationship("Project", back_populates="datasets")
|
|
|
|
|
|
class EvalDataset(Base, UUIDMixin, TimestampMixin):
|
|
"""Evaluation dataset model"""
|
|
__tablename__ = "eval_datasets"
|
|
|
|
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=False)
|
|
name = Column(String(255), nullable=False)
|
|
question_type = Column(String(50)) # mixed, fact, reasoning
|
|
extra_data = Column(JSON)
|
|
|
|
# Relationships
|
|
project = relationship("Project", back_populates="eval_datasets")
|
|
|
|
|
|
class ModelConfig(Base, UUIDMixin, TimestampMixin):
|
|
"""Model configuration for LLM providers"""
|
|
__tablename__ = "model_configs"
|
|
|
|
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=True)
|
|
provider = Column(String(50), nullable=False) # minimax, glm, openai, ali
|
|
model_type = Column(String(50), nullable=False, default="chat") # chat, vlm, embedding, rerank
|
|
model_name = Column(String(100))
|
|
api_key = Column(String(500))
|
|
api_base = Column(String(500))
|
|
is_default = Column(String(10), default="false")
|
|
connection_status = Column(String(20), default="untested") # untested, connected, disconnected
|
|
|
|
# Relationships
|
|
project = relationship("Project", back_populates="model_configs")
|
|
|
|
|
|
class Task(Base, UUIDMixin, TimestampMixin):
|
|
"""Task model for background jobs"""
|
|
__tablename__ = "tasks"
|
|
|
|
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"))
|
|
task_type = Column(String(50)) # split, generate, eval, export
|
|
status = Column(String(20), default="pending") # pending, running, completed, failed
|
|
progress = Column(Integer, default=0) # 0-100
|
|
result = Column(JSON)
|
|
error = Column(Text)
|
|
|
|
# Relationships
|
|
project = relationship("Project", back_populates="tasks")
|