Files
YG-Datasets/backend/app/models/models.py
Developer 7514e7e763 feat: 完善模型管理功能
- 新增模型 API 路由,支持 CRUD 和测试连接
- 支持 MiniMax、GLM、OpenAI Compatible 三种供应商
- 添加连接状态持久化 (untested/connected/disconnected)
- 修复 CORS 和数据库模型兼容性问题
- 前端 UI 优化:供应商默认 API 地址自动填充

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-17 23:02:43 +08:00

163 lines
6.6 KiB
Python

"""
Database Models for YG-Dataset
"""
from sqlalchemy import Column, String, Text, Integer, BigInteger, ForeignKey, JSON
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import relationship
from app.core.database import Base
from app.models.base import UUIDMixin, TimestampMixin
class Project(Base, UUIDMixin, TimestampMixin):
"""Project model"""
__tablename__ = "projects"
name = Column(String(255), nullable=False)
description = Column(Text)
# Relationships
files = relationship("File", back_populates="project", cascade="all, delete-orphan")
chunks = relationship("Chunk", back_populates="project", cascade="all, delete-orphan")
tags = relationship("Tag", back_populates="project", cascade="all, delete-orphan")
datasets = relationship("Dataset", back_populates="project", cascade="all, delete-orphan")
eval_datasets = relationship("EvalDataset", back_populates="project", cascade="all, delete-orphan")
model_configs = relationship("ModelConfig", back_populates="project", cascade="all, delete-orphan")
tasks = relationship("Task", back_populates="project", cascade="all, delete-orphan")
class File(Base, UUIDMixin, TimestampMixin):
"""File model for uploaded documents"""
__tablename__ = "files"
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=False)
filename = Column(String(255), nullable=False)
file_type = Column(String(50), nullable=False) # pdf, docx, xlsx, csv, epub, md, txt
file_path = Column(String(500))
size = Column(BigInteger) # file size in bytes
status = Column(String(20), default="pending") # pending, processing, completed, failed
# Relationships
project = relationship("Project", back_populates="files")
chunks = relationship("Chunk", back_populates="file", cascade="all, delete-orphan")
class Chunk(Base, UUIDMixin, TimestampMixin):
"""Text chunk model after splitting"""
__tablename__ = "chunks"
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=False)
file_id = Column(UUID(as_uuid=True), ForeignKey("files.id", ondelete="CASCADE"))
name = Column(String(255))
content = Column(Text, nullable=False)
summary = Column(Text)
word_count = Column(Integer)
extra_data = Column(JSON) # store additional info like headings, page numbers
# Relationships
project = relationship("Project", back_populates="chunks")
file = relationship("File", back_populates="chunks")
questions = relationship("Question", back_populates="chunk", cascade="all, delete-orphan")
chunk_tags = relationship("ChunkTag", back_populates="chunk", cascade="all, delete-orphan")
class Tag(Base, UUIDMixin, TimestampMixin):
"""Tag/Label model for categorizing content"""
__tablename__ = "tags"
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=False)
label = Column(String(255), nullable=False)
parent_id = Column(UUID(as_uuid=True), ForeignKey("tags.id", ondelete="CASCADE"))
color = Column(String(20)) # hex color code
# Relationships
project = relationship("Project", back_populates="tags")
parent = relationship("Tag", remote_side="Tag.id", back_populates="children")
children = relationship("Tag", back_populates="parent")
chunk_tags = relationship("ChunkTag", back_populates="tag")
class ChunkTag(Base, UUIDMixin):
"""Many-to-many relationship between chunks and tags"""
__tablename__ = "chunk_tags"
chunk_id = Column(UUID(as_uuid=True), ForeignKey("chunks.id", ondelete="CASCADE"), nullable=False)
tag_id = Column(UUID(as_uuid=True), ForeignKey("tags.id", ondelete="CASCADE"), nullable=False)
# Relationships
chunk = relationship("Chunk", back_populates="chunk_tags")
tag = relationship("Tag", back_populates="chunk_tags")
class Question(Base, UUIDMixin, TimestampMixin):
"""Question/QA pair model"""
__tablename__ = "questions"
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=False)
chunk_id = Column(UUID(as_uuid=True), ForeignKey("chunks.id", ondelete="CASCADE"))
content = Column(Text, nullable=False) # question content
answer = Column(Text) # answer content
question_type = Column(String(50)) # fact, summary, reasoning, etc.
source = Column(String(50), default="manual") # manual, generated
# Relationships
project = relationship("Project")
chunk = relationship("Chunk", back_populates="questions")
class Dataset(Base, UUIDMixin, TimestampMixin):
"""Dataset model"""
__tablename__ = "datasets"
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=False)
name = Column(String(255), nullable=False)
description = Column(Text)
dataset_type = Column(String(50)) # qa, conversation, instruction
extra_data = Column(JSON)
# Relationships
project = relationship("Project", back_populates="datasets")
class EvalDataset(Base, UUIDMixin, TimestampMixin):
"""Evaluation dataset model"""
__tablename__ = "eval_datasets"
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=False)
name = Column(String(255), nullable=False)
question_type = Column(String(50)) # mixed, fact, reasoning
extra_data = Column(JSON)
# Relationships
project = relationship("Project", back_populates="eval_datasets")
class ModelConfig(Base, UUIDMixin, TimestampMixin):
"""Model configuration for LLM providers"""
__tablename__ = "model_configs"
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=True)
provider = Column(String(50), nullable=False) # minimax, glm, openai
model_name = Column(String(100))
api_key = Column(String(500))
api_base = Column(String(500))
is_default = Column(String(10), default="false")
connection_status = Column(String(20), default="untested") # untested, connected, disconnected
# Relationships
project = relationship("Project", back_populates="model_configs")
class Task(Base, UUIDMixin, TimestampMixin):
"""Task model for background jobs"""
__tablename__ = "tasks"
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"))
task_type = Column(String(50)) # split, generate, eval, export
status = Column(String(20), default="pending") # pending, running, completed, failed
progress = Column(Integer, default=0) # 0-100
result = Column(JSON)
error = Column(Text)
# Relationships
project = relationship("Project", back_populates="tasks")