Files
YG-Datasets/backend/app/models/models.py
Developer 6aa271c4f7 refactor: 前端架构重构 - 提取 CSS 和逻辑到独立模块
前端重构:
- 删除旧的大体积 Vue 组件(HomeView, FileManage, TextSplit 等)
- 删除旧的 composables(useFormatters, useModels, useProjects)
- 新增 core/, page-logic/, pages/, shared/ 模块化目录结构
- 提取 CSS 到 styles/pages/ 目录
- 添加全局样式 variables.css 和 common.css

后端 API 更新:
- chunks: 语义分割 API 增强
- files: 文件处理 API 更新
- models: 模型管理 API 更新
- questions: 问答管理 API 更新
- database: 数据库连接优化
- semantic_embedding: 语义嵌入服务优化

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-19 14:23:34 +08:00

165 lines
6.8 KiB
Python

"""
Database Models for YG-Dataset
"""
from sqlalchemy import Column, String, Text, Integer, BigInteger, ForeignKey, JSON
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import relationship
from app.core.database import Base
from app.models.base import UUIDMixin, TimestampMixin
class Project(Base, UUIDMixin, TimestampMixin):
"""Project model"""
__tablename__ = "projects"
name = Column(String(255), nullable=False)
description = Column(Text)
type = Column(String(50), default="qa") # qa, table, database
# Relationships
files = relationship("File", back_populates="project", cascade="all, delete-orphan")
chunks = relationship("Chunk", back_populates="project", cascade="all, delete-orphan")
tags = relationship("Tag", back_populates="project", cascade="all, delete-orphan")
datasets = relationship("Dataset", back_populates="project", cascade="all, delete-orphan")
eval_datasets = relationship("EvalDataset", back_populates="project", cascade="all, delete-orphan")
model_configs = relationship("ModelConfig", back_populates="project", cascade="all, delete-orphan")
tasks = relationship("Task", back_populates="project", cascade="all, delete-orphan")
class File(Base, UUIDMixin, TimestampMixin):
"""File model for uploaded documents"""
__tablename__ = "files"
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=False)
filename = Column(String(255), nullable=False)
file_type = Column(String(50), nullable=False) # pdf, docx, xlsx, csv, epub, md, txt
file_path = Column(String(500))
size = Column(BigInteger) # file size in bytes
status = Column(String(20), default="pending") # pending, processing, completed, failed
# Relationships
project = relationship("Project", back_populates="files")
chunks = relationship("Chunk", back_populates="file", cascade="all, delete-orphan")
class Chunk(Base, UUIDMixin, TimestampMixin):
"""Text chunk model after splitting"""
__tablename__ = "chunks"
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=False)
file_id = Column(UUID(as_uuid=True), ForeignKey("files.id", ondelete="CASCADE"))
name = Column(String(255))
content = Column(Text, nullable=False)
summary = Column(Text)
word_count = Column(Integer)
extra_data = Column(JSON) # store additional info like headings, page numbers
# Relationships
project = relationship("Project", back_populates="chunks")
file = relationship("File", back_populates="chunks")
questions = relationship("Question", back_populates="chunk", cascade="all, delete-orphan")
chunk_tags = relationship("ChunkTag", back_populates="chunk", cascade="all, delete-orphan")
class Tag(Base, UUIDMixin, TimestampMixin):
"""Tag/Label model for categorizing content"""
__tablename__ = "tags"
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=False)
label = Column(String(255), nullable=False)
parent_id = Column(UUID(as_uuid=True), ForeignKey("tags.id", ondelete="CASCADE"))
color = Column(String(20)) # hex color code
# Relationships
project = relationship("Project", back_populates="tags")
parent = relationship("Tag", remote_side="Tag.id", back_populates="children")
children = relationship("Tag", back_populates="parent")
chunk_tags = relationship("ChunkTag", back_populates="tag")
class ChunkTag(Base, UUIDMixin):
"""Many-to-many relationship between chunks and tags"""
__tablename__ = "chunk_tags"
chunk_id = Column(UUID(as_uuid=True), ForeignKey("chunks.id", ondelete="CASCADE"), nullable=False)
tag_id = Column(UUID(as_uuid=True), ForeignKey("tags.id", ondelete="CASCADE"), nullable=False)
# Relationships
chunk = relationship("Chunk", back_populates="chunk_tags")
tag = relationship("Tag", back_populates="chunk_tags")
class Question(Base, UUIDMixin, TimestampMixin):
"""Question/QA pair model"""
__tablename__ = "questions"
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=False)
chunk_id = Column(UUID(as_uuid=True), ForeignKey("chunks.id", ondelete="CASCADE"))
content = Column(Text, nullable=False) # question content
answer = Column(Text) # answer content
question_type = Column(String(50)) # fact, summary, reasoning, etc.
source = Column(String(50), default="manual") # manual, generated
# Relationships
project = relationship("Project")
chunk = relationship("Chunk", back_populates="questions")
class Dataset(Base, UUIDMixin, TimestampMixin):
"""Dataset model"""
__tablename__ = "datasets"
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=False)
name = Column(String(255), nullable=False)
description = Column(Text)
dataset_type = Column(String(50)) # qa, conversation, instruction
extra_data = Column(JSON)
# Relationships
project = relationship("Project", back_populates="datasets")
class EvalDataset(Base, UUIDMixin, TimestampMixin):
"""Evaluation dataset model"""
__tablename__ = "eval_datasets"
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=False)
name = Column(String(255), nullable=False)
question_type = Column(String(50)) # mixed, fact, reasoning
extra_data = Column(JSON)
# Relationships
project = relationship("Project", back_populates="eval_datasets")
class ModelConfig(Base, UUIDMixin, TimestampMixin):
"""Model configuration for LLM providers"""
__tablename__ = "model_configs"
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=True)
provider = Column(String(50), nullable=False) # minimax, glm, openai, ali
model_type = Column(String(50), nullable=False, default="chat") # chat, vlm, embedding, rerank
model_name = Column(String(100))
api_key = Column(String(500))
api_base = Column(String(500))
is_default = Column(String(10), default="false")
connection_status = Column(String(20), default="untested") # untested, connected, disconnected
# Relationships
project = relationship("Project", back_populates="model_configs")
class Task(Base, UUIDMixin, TimestampMixin):
"""Task model for background jobs"""
__tablename__ = "tasks"
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"))
task_type = Column(String(50)) # split, generate, eval, export
status = Column(String(20), default="pending") # pending, running, completed, failed
progress = Column(Integer, default=0) # 0-100
result = Column(JSON)
error = Column(Text)
# Relationships
project = relationship("Project", back_populates="tasks")