Files
YG-Datasets/backend/app/models/models.py
Developer a1342b7634 feat: 完善前端功能,添加爬虫页面和项目分页
- 新增 CrawlerView 爬虫页面
- 完善 HomeView 分页展示(9个/页)
- 更新 ProjectCard 组件图标
- 优化 API 客户端和类型定义
- 重构样式文件结构到独立目录

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-18 10:45:32 +08:00

164 lines
6.7 KiB
Python

"""
Database Models for YG-Dataset
"""
from sqlalchemy import Column, String, Text, Integer, BigInteger, ForeignKey, JSON
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import relationship
from app.core.database import Base
from app.models.base import UUIDMixin, TimestampMixin
class Project(Base, UUIDMixin, TimestampMixin):
"""Project model"""
__tablename__ = "projects"
name = Column(String(255), nullable=False)
description = Column(Text)
type = Column(String(50), default="qa") # qa, table, database
# Relationships
files = relationship("File", back_populates="project", cascade="all, delete-orphan")
chunks = relationship("Chunk", back_populates="project", cascade="all, delete-orphan")
tags = relationship("Tag", back_populates="project", cascade="all, delete-orphan")
datasets = relationship("Dataset", back_populates="project", cascade="all, delete-orphan")
eval_datasets = relationship("EvalDataset", back_populates="project", cascade="all, delete-orphan")
model_configs = relationship("ModelConfig", back_populates="project", cascade="all, delete-orphan")
tasks = relationship("Task", back_populates="project", cascade="all, delete-orphan")
class File(Base, UUIDMixin, TimestampMixin):
"""File model for uploaded documents"""
__tablename__ = "files"
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=False)
filename = Column(String(255), nullable=False)
file_type = Column(String(50), nullable=False) # pdf, docx, xlsx, csv, epub, md, txt
file_path = Column(String(500))
size = Column(BigInteger) # file size in bytes
status = Column(String(20), default="pending") # pending, processing, completed, failed
# Relationships
project = relationship("Project", back_populates="files")
chunks = relationship("Chunk", back_populates="file", cascade="all, delete-orphan")
class Chunk(Base, UUIDMixin, TimestampMixin):
"""Text chunk model after splitting"""
__tablename__ = "chunks"
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=False)
file_id = Column(UUID(as_uuid=True), ForeignKey("files.id", ondelete="CASCADE"))
name = Column(String(255))
content = Column(Text, nullable=False)
summary = Column(Text)
word_count = Column(Integer)
extra_data = Column(JSON) # store additional info like headings, page numbers
# Relationships
project = relationship("Project", back_populates="chunks")
file = relationship("File", back_populates="chunks")
questions = relationship("Question", back_populates="chunk", cascade="all, delete-orphan")
chunk_tags = relationship("ChunkTag", back_populates="chunk", cascade="all, delete-orphan")
class Tag(Base, UUIDMixin, TimestampMixin):
"""Tag/Label model for categorizing content"""
__tablename__ = "tags"
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=False)
label = Column(String(255), nullable=False)
parent_id = Column(UUID(as_uuid=True), ForeignKey("tags.id", ondelete="CASCADE"))
color = Column(String(20)) # hex color code
# Relationships
project = relationship("Project", back_populates="tags")
parent = relationship("Tag", remote_side="Tag.id", back_populates="children")
children = relationship("Tag", back_populates="parent")
chunk_tags = relationship("ChunkTag", back_populates="tag")
class ChunkTag(Base, UUIDMixin):
"""Many-to-many relationship between chunks and tags"""
__tablename__ = "chunk_tags"
chunk_id = Column(UUID(as_uuid=True), ForeignKey("chunks.id", ondelete="CASCADE"), nullable=False)
tag_id = Column(UUID(as_uuid=True), ForeignKey("tags.id", ondelete="CASCADE"), nullable=False)
# Relationships
chunk = relationship("Chunk", back_populates="chunk_tags")
tag = relationship("Tag", back_populates="chunk_tags")
class Question(Base, UUIDMixin, TimestampMixin):
"""Question/QA pair model"""
__tablename__ = "questions"
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=False)
chunk_id = Column(UUID(as_uuid=True), ForeignKey("chunks.id", ondelete="CASCADE"))
content = Column(Text, nullable=False) # question content
answer = Column(Text) # answer content
question_type = Column(String(50)) # fact, summary, reasoning, etc.
source = Column(String(50), default="manual") # manual, generated
# Relationships
project = relationship("Project")
chunk = relationship("Chunk", back_populates="questions")
class Dataset(Base, UUIDMixin, TimestampMixin):
"""Dataset model"""
__tablename__ = "datasets"
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=False)
name = Column(String(255), nullable=False)
description = Column(Text)
dataset_type = Column(String(50)) # qa, conversation, instruction
extra_data = Column(JSON)
# Relationships
project = relationship("Project", back_populates="datasets")
class EvalDataset(Base, UUIDMixin, TimestampMixin):
"""Evaluation dataset model"""
__tablename__ = "eval_datasets"
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=False)
name = Column(String(255), nullable=False)
question_type = Column(String(50)) # mixed, fact, reasoning
extra_data = Column(JSON)
# Relationships
project = relationship("Project", back_populates="eval_datasets")
class ModelConfig(Base, UUIDMixin, TimestampMixin):
"""Model configuration for LLM providers"""
__tablename__ = "model_configs"
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=True)
provider = Column(String(50), nullable=False) # minimax, glm, openai
model_name = Column(String(100))
api_key = Column(String(500))
api_base = Column(String(500))
is_default = Column(String(10), default="false")
connection_status = Column(String(20), default="untested") # untested, connected, disconnected
# Relationships
project = relationship("Project", back_populates="model_configs")
class Task(Base, UUIDMixin, TimestampMixin):
"""Task model for background jobs"""
__tablename__ = "tasks"
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"))
task_type = Column(String(50)) # split, generate, eval, export
status = Column(String(20), default="pending") # pending, running, completed, failed
progress = Column(Integer, default=0) # 0-100
result = Column(JSON)
error = Column(Text)
# Relationships
project = relationship("Project", back_populates="tasks")