first-update
This commit is contained in:
161
backend/app/models/models.py
Normal file
161
backend/app/models/models.py
Normal file
@@ -0,0 +1,161 @@
|
||||
"""
|
||||
Database Models for YG-Dataset
|
||||
"""
|
||||
from sqlalchemy import Column, String, Text, Integer, BigInteger, ForeignKey, JSON
|
||||
from sqlalchemy.dialects.postgresql import UUID
|
||||
from sqlalchemy.orm import relationship
|
||||
from app.core.database import Base
|
||||
from app.models.base import UUIDMixin, TimestampMixin
|
||||
|
||||
|
||||
class Project(Base, UUIDMixin, TimestampMixin):
|
||||
"""Project model"""
|
||||
__tablename__ = "projects"
|
||||
|
||||
name = Column(String(255), nullable=False)
|
||||
description = Column(Text)
|
||||
|
||||
# Relationships
|
||||
files = relationship("File", back_populates="project", cascade="all, delete-orphan")
|
||||
chunks = relationship("Chunk", back_populates="project", cascade="all, delete-orphan")
|
||||
tags = relationship("Tag", back_populates="project", cascade="all, delete-orphan")
|
||||
datasets = relationship("Dataset", back_populates="project", cascade="all, delete-orphan")
|
||||
eval_datasets = relationship("EvalDataset", back_populates="project", cascade="all, delete-orphan")
|
||||
model_configs = relationship("ModelConfig", back_populates="project", cascade="all, delete-orphan")
|
||||
tasks = relationship("Task", back_populates="project", cascade="all, delete-orphan")
|
||||
|
||||
|
||||
class File(Base, UUIDMixin, TimestampMixin):
|
||||
"""File model for uploaded documents"""
|
||||
__tablename__ = "files"
|
||||
|
||||
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=False)
|
||||
filename = Column(String(255), nullable=False)
|
||||
file_type = Column(String(50), nullable=False) # pdf, docx, xlsx, csv, epub, md, txt
|
||||
file_path = Column(String(500))
|
||||
size = Column(BigInteger) # file size in bytes
|
||||
status = Column(String(20), default="pending") # pending, processing, completed, failed
|
||||
|
||||
# Relationships
|
||||
project = relationship("Project", back_populates="files")
|
||||
chunks = relationship("Chunk", back_populates="file", cascade="all, delete-orphan")
|
||||
|
||||
|
||||
class Chunk(Base, UUIDMixin, TimestampMixin):
|
||||
"""Text chunk model after splitting"""
|
||||
__tablename__ = "chunks"
|
||||
|
||||
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=False)
|
||||
file_id = Column(UUID(as_uuid=True), ForeignKey("files.id", ondelete="CASCADE"))
|
||||
name = Column(String(255))
|
||||
content = Column(Text, nullable=False)
|
||||
summary = Column(Text)
|
||||
word_count = Column(Integer)
|
||||
metadata = Column(JSON) # store additional info like headings, page numbers
|
||||
|
||||
# Relationships
|
||||
project = relationship("Project", back_populates="chunks")
|
||||
file = relationship("File", back_populates="chunks")
|
||||
questions = relationship("Question", back_populates="chunk", cascade="all, delete-orphan")
|
||||
chunk_tags = relationship("ChunkTag", back_populates="chunk", cascade="all, delete-orphan")
|
||||
|
||||
|
||||
class Tag(Base, UUIDMixin, TimestampMixin):
|
||||
"""Tag/Label model for categorizing content"""
|
||||
__tablename__ = "tags"
|
||||
|
||||
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=False)
|
||||
label = Column(String(255), nullable=False)
|
||||
parent_id = Column(UUID(as_uuid=True), ForeignKey("tags.id", ondelete="CASCADE"))
|
||||
color = Column(String(20)) # hex color code
|
||||
|
||||
# Relationships
|
||||
project = relationship("Project", back_populates="tags")
|
||||
parent = relationship("Tag", remote_side="Tag.id", back_populates="children")
|
||||
children = relationship("Tag", back_populates="parent")
|
||||
chunk_tags = relationship("ChunkTag", back_populates="tag")
|
||||
|
||||
|
||||
class ChunkTag(Base, UUIDMixin):
|
||||
"""Many-to-many relationship between chunks and tags"""
|
||||
__tablename__ = "chunk_tags"
|
||||
|
||||
chunk_id = Column(UUID(as_uuid=True), ForeignKey("chunks.id", ondelete="CASCADE"), nullable=False)
|
||||
tag_id = Column(UUID(as_uuid=True), ForeignKey("tags.id", ondelete="CASCADE"), nullable=False)
|
||||
|
||||
# Relationships
|
||||
chunk = relationship("Chunk", back_populates="chunk_tags")
|
||||
tag = relationship("Tag", back_populates="chunk_tags")
|
||||
|
||||
|
||||
class Question(Base, UUIDMixin, TimestampMixin):
|
||||
"""Question/QA pair model"""
|
||||
__tablename__ = "questions"
|
||||
|
||||
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=False)
|
||||
chunk_id = Column(UUID(as_uuid=True), ForeignKey("chunks.id", ondelete="CASCADE"))
|
||||
content = Column(Text, nullable=False) # question content
|
||||
answer = Column(Text) # answer content
|
||||
question_type = Column(String(50)) # fact, summary, reasoning, etc.
|
||||
source = Column(String(50), default="manual") # manual, generated
|
||||
|
||||
# Relationships
|
||||
project = relationship("Project")
|
||||
chunk = relationship("Chunk", back_populates="questions")
|
||||
|
||||
|
||||
class Dataset(Base, UUIDMixin, TimestampMixin):
|
||||
"""Dataset model"""
|
||||
__tablename__ = "datasets"
|
||||
|
||||
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=False)
|
||||
name = Column(String(255), nullable=False)
|
||||
description = Column(Text)
|
||||
dataset_type = Column(String(50)) # qa, conversation, instruction
|
||||
metadata = Column(JSON)
|
||||
|
||||
# Relationships
|
||||
project = relationship("Project", back_populates="datasets")
|
||||
|
||||
|
||||
class EvalDataset(Base, UUIDMixin, TimestampMixin):
|
||||
"""Evaluation dataset model"""
|
||||
__tablename__ = "eval_datasets"
|
||||
|
||||
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=False)
|
||||
name = Column(String(255), nullable=False)
|
||||
question_type = Column(String(50)) # mixed, fact, reasoning
|
||||
metadata = Column(JSON)
|
||||
|
||||
# Relationships
|
||||
project = relationship("Project", back_populates="eval_datasets")
|
||||
|
||||
|
||||
class ModelConfig(Base, UUIDMixin, TimestampMixin):
|
||||
"""Model configuration for LLM providers"""
|
||||
__tablename__ = "model_configs"
|
||||
|
||||
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=False)
|
||||
provider = Column(String(50), nullable=False) # openai, anthropic, ollama, custom
|
||||
model_name = Column(String(100))
|
||||
api_key = Column(String(500))
|
||||
api_base = Column(String(500))
|
||||
is_default = Column(String(10), default="false")
|
||||
|
||||
# Relationships
|
||||
project = relationship("Project", back_populates="model_configs")
|
||||
|
||||
|
||||
class Task(Base, UUIDMixin, TimestampMixin):
|
||||
"""Task model for background jobs"""
|
||||
__tablename__ = "tasks"
|
||||
|
||||
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"))
|
||||
task_type = Column(String(50)) # split, generate, eval, export
|
||||
status = Column(String(20), default="pending") # pending, running, completed, failed
|
||||
progress = Column(Integer, default=0) # 0-100
|
||||
result = Column(JSON)
|
||||
error = Column(Text)
|
||||
|
||||
# Relationships
|
||||
project = relationship("Project", back_populates="tasks")
|
||||
Reference in New Issue
Block a user