Files
JARVIS/backend/app/database.py

519 lines
20 KiB
Python

from collections.abc import AsyncGenerator
import os
import re
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
from sqlalchemy.orm import DeclarativeBase
from app.config import settings
os.makedirs(settings.DATA_DIR, exist_ok=True)
engine = create_async_engine(
settings.DATABASE_URL,
echo=settings.DEBUG,
pool_pre_ping=True,
)
async_session = async_sessionmaker(
engine,
class_=AsyncSession,
expire_on_commit=False,
)
class Base(DeclarativeBase):
pass
async def get_db() -> AsyncGenerator[AsyncSession, None]:
async with async_session() as session:
yield session
async def init_db():
async with engine.begin() as conn:
await conn.run_sync(Base.metadata.create_all)
await ensure_task_columns(conn)
await ensure_log_columns(conn)
await ensure_message_columns(conn)
await ensure_conversation_columns(conn)
await ensure_document_columns(conn)
await ensure_memory_columns(conn)
await ensure_user_columns(conn)
await ensure_forum_columns(conn)
await ensure_agent_columns(conn)
await ensure_skill_columns(conn)
await ensure_learning_artifact_tables(conn)
async def ensure_task_columns(conn):
rows = await _get_table_info(conn, 'tasks')
if not rows:
return
columns = {row[1] for row in rows}
required_columns = {
'source': "ALTER TABLE tasks ADD COLUMN source VARCHAR(32) DEFAULT 'manual' NOT NULL",
'conversation_id': "ALTER TABLE tasks ADD COLUMN conversation_id VARCHAR(36)",
'quadrant': "ALTER TABLE tasks ADD COLUMN quadrant VARCHAR(64)",
'assignee_type': "ALTER TABLE tasks ADD COLUMN assignee_type VARCHAR(32)",
'assignee_id': "ALTER TABLE tasks ADD COLUMN assignee_id VARCHAR(255)",
'dispatch_status': "ALTER TABLE tasks ADD COLUMN dispatch_status VARCHAR(32) DEFAULT 'idle' NOT NULL",
'dispatch_run_id': "ALTER TABLE tasks ADD COLUMN dispatch_run_id VARCHAR(64)",
'result_summary': "ALTER TABLE tasks ADD COLUMN result_summary TEXT",
'started_at': "ALTER TABLE tasks ADD COLUMN started_at DATETIME",
'last_synced_at': "ALTER TABLE tasks ADD COLUMN last_synced_at DATETIME",
}
for column, ddl in required_columns.items():
if column not in columns:
await conn.execute(text(ddl))
indexes = {
'ix_tasks_due_date': "CREATE INDEX IF NOT EXISTS ix_tasks_due_date ON tasks (due_date)",
'ix_tasks_source': "CREATE INDEX IF NOT EXISTS ix_tasks_source ON tasks (source)",
'ix_tasks_conversation_id': "CREATE INDEX IF NOT EXISTS ix_tasks_conversation_id ON tasks (conversation_id)",
'ix_tasks_quadrant': "CREATE INDEX IF NOT EXISTS ix_tasks_quadrant ON tasks (quadrant)",
'ix_tasks_assignee_type': "CREATE INDEX IF NOT EXISTS ix_tasks_assignee_type ON tasks (assignee_type)",
'ix_tasks_assignee_id': "CREATE INDEX IF NOT EXISTS ix_tasks_assignee_id ON tasks (assignee_id)",
'ix_tasks_dispatch_status': "CREATE INDEX IF NOT EXISTS ix_tasks_dispatch_status ON tasks (dispatch_status)",
'ix_tasks_dispatch_run_id': "CREATE INDEX IF NOT EXISTS ix_tasks_dispatch_run_id ON tasks (dispatch_run_id)",
}
for ddl in indexes.values():
await conn.execute(text(ddl))
history_rows = await _get_table_info(conn, 'task_histories')
if history_rows:
history_columns = {row[1] for row in history_rows}
if 'subtask_id' not in history_columns:
await conn.execute(text("ALTER TABLE task_histories ADD COLUMN subtask_id VARCHAR(36)"))
await conn.execute(text("CREATE INDEX IF NOT EXISTS ix_task_histories_subtask_id ON task_histories (subtask_id)"))
await conn.execute(
text(
"""
CREATE TABLE IF NOT EXISTS task_subtasks (
id VARCHAR(36) PRIMARY KEY,
created_at DATETIME NOT NULL,
updated_at DATETIME NOT NULL,
task_id VARCHAR(36) NOT NULL,
title VARCHAR(500) NOT NULL,
description TEXT,
status VARCHAR(32) NOT NULL DEFAULT 'todo',
order_index INTEGER NOT NULL DEFAULT 0,
assignee_type VARCHAR(32),
assignee_id VARCHAR(255),
dispatch_status VARCHAR(32) NOT NULL DEFAULT 'idle',
dispatch_run_id VARCHAR(64),
completed_at DATETIME,
FOREIGN KEY(task_id) REFERENCES tasks (id)
)
"""
)
)
subtask_rows = await _get_table_info(conn, 'task_subtasks')
subtask_columns = {row[1] for row in subtask_rows}
if 'result_summary' not in subtask_columns:
await conn.execute(text("ALTER TABLE task_subtasks ADD COLUMN result_summary TEXT"))
subtask_indexes = {
'ix_task_subtasks_task_id': "CREATE INDEX IF NOT EXISTS ix_task_subtasks_task_id ON task_subtasks (task_id)",
'ix_task_subtasks_status': "CREATE INDEX IF NOT EXISTS ix_task_subtasks_status ON task_subtasks (status)",
'ix_task_subtasks_order_index': "CREATE INDEX IF NOT EXISTS ix_task_subtasks_order_index ON task_subtasks (order_index)",
'ix_task_subtasks_assignee_type': "CREATE INDEX IF NOT EXISTS ix_task_subtasks_assignee_type ON task_subtasks (assignee_type)",
'ix_task_subtasks_assignee_id': "CREATE INDEX IF NOT EXISTS ix_task_subtasks_assignee_id ON task_subtasks (assignee_id)",
'ix_task_subtasks_dispatch_status': "CREATE INDEX IF NOT EXISTS ix_task_subtasks_dispatch_status ON task_subtasks (dispatch_status)",
'ix_task_subtasks_dispatch_run_id': "CREATE INDEX IF NOT EXISTS ix_task_subtasks_dispatch_run_id ON task_subtasks (dispatch_run_id)",
}
for ddl in subtask_indexes.values():
await conn.execute(text(ddl))
# Normalize legacy/invalid enum-like values to prevent ORM Enum decoding failures.
await conn.execute(
text(
"""
UPDATE tasks
SET source = 'manual'
WHERE source IS NULL
OR TRIM(source) = ''
OR source NOT IN ('manual','chat','schedule_center','today_status','commander')
"""
)
)
await conn.execute(
text(
"""
UPDATE tasks
SET status = 'todo'
WHERE status IS NULL
OR TRIM(status) = ''
OR status NOT IN ('todo','in_progress','done','cancelled')
"""
)
)
await conn.execute(
text(
"""
UPDATE tasks
SET priority = 'medium'
WHERE priority IS NULL
OR TRIM(priority) = ''
OR priority NOT IN ('low','medium','high','urgent')
"""
)
)
await conn.execute(
text(
"""
UPDATE tasks
SET quadrant = NULL
WHERE quadrant IS NOT NULL
AND (TRIM(quadrant) = '' OR quadrant NOT IN (
'urgent-important',
'not-urgent-important',
'urgent-not-important',
'not-urgent-not-important'
))
"""
)
)
await conn.execute(
text(
"""
UPDATE tasks
SET assignee_type = NULL
WHERE assignee_type IS NOT NULL
AND (TRIM(assignee_type) = '' OR assignee_type NOT IN (
'user','commander','agent','planner','executor','knowledge','analyst','coder','researcher'
))
"""
)
)
await conn.execute(
text(
"""
UPDATE tasks
SET dispatch_status = 'idle'
WHERE dispatch_status IS NULL
OR TRIM(dispatch_status) = ''
OR dispatch_status NOT IN ('idle','queued','running','completed','failed')
"""
)
)
await conn.execute(
text(
"""
UPDATE task_subtasks
SET status = 'todo'
WHERE status IS NULL
OR TRIM(status) = ''
OR status NOT IN ('todo','in_progress','done','cancelled')
"""
)
)
await conn.execute(
text(
"""
UPDATE task_subtasks
SET assignee_type = NULL
WHERE assignee_type IS NOT NULL
AND (TRIM(assignee_type) = '' OR assignee_type NOT IN (
'user','commander','agent','planner','executor','knowledge','analyst','coder','researcher'
))
"""
)
)
await conn.execute(
text(
"""
UPDATE task_subtasks
SET dispatch_status = 'idle'
WHERE dispatch_status IS NULL
OR TRIM(dispatch_status) = ''
OR dispatch_status NOT IN ('idle','queued','running','completed','failed')
"""
)
)
async def ensure_log_columns(conn):
result = await conn.execute(text("PRAGMA table_info(logs)"))
rows = result.fetchall()
if not rows:
return
columns = {row[1] for row in rows}
required_columns = {
"request_id": "ALTER TABLE logs ADD COLUMN request_id VARCHAR(64)",
"route": "ALTER TABLE logs ADD COLUMN route VARCHAR(255)",
"method": "ALTER TABLE logs ADD COLUMN method VARCHAR(16)",
"status_code": "ALTER TABLE logs ADD COLUMN status_code INTEGER",
"error_type": "ALTER TABLE logs ADD COLUMN error_type VARCHAR(100)",
"operation": "ALTER TABLE logs ADD COLUMN operation VARCHAR(100)",
}
for column, ddl in required_columns.items():
if column not in columns:
await conn.execute(text(ddl))
async def ensure_message_columns(conn):
result = await conn.execute(text("PRAGMA table_info(messages)"))
rows = result.fetchall()
if not rows:
return
columns = {row[1] for row in rows}
required_columns = {
"attachments": "ALTER TABLE messages ADD COLUMN attachments JSON",
}
for column, ddl in required_columns.items():
if column not in columns:
await conn.execute(text(ddl))
async def ensure_conversation_columns(conn):
rows = await _get_table_info(conn, 'conversations')
if not rows:
return
columns = {row[1] for row in rows}
required_columns = {
'agent_state': "ALTER TABLE conversations ADD COLUMN agent_state JSON",
}
for column, ddl in required_columns.items():
if column not in columns:
await conn.execute(text(ddl))
async def ensure_document_columns(conn):
result = await conn.execute(text("PRAGMA table_info(documents)"))
rows = result.fetchall()
if not rows:
return
columns = {row[1] for row in rows}
required_columns = {
"ingestion_status": "ALTER TABLE documents ADD COLUMN ingestion_status VARCHAR(50) DEFAULT 'uploaded' NOT NULL",
"ingestion_error": "ALTER TABLE documents ADD COLUMN ingestion_error TEXT",
"indexed_at": "ALTER TABLE documents ADD COLUMN indexed_at DATETIME",
"parser_version": "ALTER TABLE documents ADD COLUMN parser_version VARCHAR(50)",
"index_version": "ALTER TABLE documents ADD COLUMN index_version VARCHAR(50)",
"normalized_content": "ALTER TABLE documents ADD COLUMN normalized_content TEXT",
"normalized_format": "ALTER TABLE documents ADD COLUMN normalized_format VARCHAR(50)",
}
for column, ddl in required_columns.items():
if column not in columns:
await conn.execute(text(ddl))
async def ensure_memory_columns(conn):
rows = await _get_table_info(conn, 'user_memories')
if not rows:
return
columns = {row[1] for row in rows}
required_columns = {
'frequency_count': "ALTER TABLE user_memories ADD COLUMN frequency_count INTEGER DEFAULT 0",
'emotion_tags': "ALTER TABLE user_memories ADD COLUMN emotion_tags JSON",
'importance_score': "ALTER TABLE user_memories ADD COLUMN importance_score FLOAT DEFAULT 0.5",
'importance_level': "ALTER TABLE user_memories ADD COLUMN importance_level VARCHAR(20) DEFAULT 'medium'",
'associated_topics': "ALTER TABLE user_memories ADD COLUMN associated_topics JSON",
'decay_score': "ALTER TABLE user_memories ADD COLUMN decay_score FLOAT DEFAULT 1.0",
'is_archived': "ALTER TABLE user_memories ADD COLUMN is_archived BOOLEAN DEFAULT 0",
'last_accessed_at': "ALTER TABLE user_memories ADD COLUMN last_accessed_at DATETIME",
'archive_at': "ALTER TABLE user_memories ADD COLUMN archive_at DATETIME",
}
for column, ddl in required_columns.items():
if column not in columns:
await conn.execute(text(ddl))
async def ensure_user_columns(conn):
rows = await _get_table_info(conn, 'users')
if not rows:
return
columns = {row[1] for row in rows}
if 'username' not in columns:
await conn.execute(text("ALTER TABLE users ADD COLUMN username VARCHAR(255)"))
rows = await _get_table_info(conn, 'users')
await _backfill_usernames(conn)
username_row = next(row for row in rows if row[1] == 'username')
indexes = await _get_index_info(conn, 'users')
has_username_index = any(row[1] == 'ix_users_username' and row[2] == 1 for row in indexes)
has_email_index = any(row[1] == 'ix_users_email' and row[2] == 1 for row in indexes)
if username_row[3] != 1 or not has_username_index or not has_email_index:
await _rebuild_users_table(conn)
async def ensure_forum_columns(conn):
rows = await _get_table_info(conn, 'forum_posts')
if not rows:
return
columns = {row[1] for row in rows}
required_columns = {
"board": "ALTER TABLE forum_posts ADD COLUMN board VARCHAR(100) DEFAULT 'general' NOT NULL",
"is_pinned": "ALTER TABLE forum_posts ADD COLUMN is_pinned BOOLEAN DEFAULT 0 NOT NULL",
}
for column, ddl in required_columns.items():
if column not in columns:
await conn.execute(text(ddl))
indexes = await _get_index_info(conn, 'forum_posts')
index_names = {row[1] for row in indexes}
if 'ix_forum_posts_board' not in index_names:
await conn.execute(text("CREATE INDEX IF NOT EXISTS ix_forum_posts_board ON forum_posts (board)"))
async def ensure_agent_columns(conn):
rows = await _get_table_info(conn, 'agents')
if not rows:
return
columns = {row[1] for row in rows}
required_columns = {
'selected_skill_ids': "ALTER TABLE agents ADD COLUMN selected_skill_ids JSON DEFAULT '[]' NOT NULL",
}
for column, ddl in required_columns.items():
if column not in columns:
await conn.execute(text(ddl))
async def ensure_skill_columns(conn):
rows = await _get_table_info(conn, 'skills')
if not rows:
return
columns = {row[1] for row in rows}
required_columns = {
'required_context': "ALTER TABLE skills ADD COLUMN required_context JSON DEFAULT '[]' NOT NULL",
'output_format': "ALTER TABLE skills ADD COLUMN output_format TEXT",
'is_builtin': "ALTER TABLE skills ADD COLUMN is_builtin BOOLEAN DEFAULT 0 NOT NULL",
'team_id': "ALTER TABLE skills ADD COLUMN team_id VARCHAR(36)",
'status': "ALTER TABLE skills ADD COLUMN status VARCHAR(20) DEFAULT 'active' NOT NULL",
'scope': "ALTER TABLE skills ADD COLUMN scope JSON DEFAULT '[]' NOT NULL",
'effectiveness': "ALTER TABLE skills ADD COLUMN effectiveness FLOAT DEFAULT 0.0 NOT NULL",
'review_after': "ALTER TABLE skills ADD COLUMN review_after DATETIME",
'candidate_count': "ALTER TABLE skills ADD COLUMN candidate_count INTEGER DEFAULT 0 NOT NULL",
'candidate_source_hashes': "ALTER TABLE skills ADD COLUMN candidate_source_hashes JSON DEFAULT '[]' NOT NULL",
'activation_count': "ALTER TABLE skills ADD COLUMN activation_count INTEGER DEFAULT 0 NOT NULL",
'last_activated_at': "ALTER TABLE skills ADD COLUMN last_activated_at DATETIME",
}
for column, ddl in required_columns.items():
if column not in columns:
await conn.execute(text(ddl))
await conn.execute(text("UPDATE skills SET agent_type = 'schedule_planner' WHERE agent_type = 'planner'"))
builtin_names = [
'今日重点拆解',
'周计划编排',
'时间冲突分析',
'任务执行 SOP',
'外部交互推进',
'知识检索摘要',
'图谱沉淀策略',
'风险识别模板',
'趋势洞察模板',
]
for name in builtin_names:
await conn.execute(
text("UPDATE skills SET is_builtin = 1 WHERE name = :name"),
{'name': name},
)
async def ensure_learning_artifact_tables(conn):
await conn.execute(
text(
"""
CREATE TABLE IF NOT EXISTS learning_artifacts (
id VARCHAR(36) PRIMARY KEY,
created_at DATETIME NOT NULL,
updated_at DATETIME NOT NULL,
user_id VARCHAR(36) NOT NULL,
conversation_id VARCHAR(36) NOT NULL,
retrospective_id VARCHAR(36),
artifact_type VARCHAR(32) NOT NULL,
artifact_key VARCHAR(128),
summary_text TEXT NOT NULL,
payload JSON NOT NULL,
recorded_at DATETIME NOT NULL
)
"""
)
)
await conn.execute(
text(
"CREATE INDEX IF NOT EXISTS ix_learning_artifacts_user_id ON learning_artifacts (user_id)"
)
)
await conn.execute(
text(
"CREATE INDEX IF NOT EXISTS ix_learning_artifacts_conversation_id ON learning_artifacts (conversation_id)"
)
)
await conn.execute(
text(
"CREATE INDEX IF NOT EXISTS ix_learning_artifacts_retrospective_id ON learning_artifacts (retrospective_id)"
)
)
await conn.execute(
text(
"CREATE INDEX IF NOT EXISTS ix_learning_artifacts_artifact_type ON learning_artifacts (artifact_type)"
)
)
async def _backfill_usernames(conn):
result = await conn.execute(text("SELECT id, email, username FROM users ORDER BY created_at, id"))
users = result.fetchall()
seen_usernames: set[str] = set()
for user_id, email, username in users:
if username:
seen_usernames.add(username)
continue
base_username = _slugify_username((email or '').split('@', 1)[0])
candidate = base_username
suffix = 2
while candidate in seen_usernames:
candidate = f"{base_username}_{suffix}"
suffix += 1
await conn.execute(
text("UPDATE users SET username = :username WHERE id = :user_id AND username IS NULL"),
{"username": candidate, "user_id": user_id},
)
seen_usernames.add(candidate)
async def _rebuild_users_table(conn):
await conn.execute(text("CREATE TABLE users__new (id VARCHAR(36) PRIMARY KEY, username VARCHAR(255) NOT NULL, email VARCHAR(255) NOT NULL, hashed_password VARCHAR(255) NOT NULL, full_name VARCHAR(255), is_active BOOLEAN NOT NULL DEFAULT 1, is_superuser BOOLEAN NOT NULL DEFAULT 0, llm_config JSON, scheduler_config JSON, created_at DATETIME NOT NULL, updated_at DATETIME NOT NULL)"))
await conn.execute(text("INSERT INTO users__new (id, username, email, hashed_password, full_name, is_active, is_superuser, llm_config, scheduler_config, created_at, updated_at) SELECT id, username, email, hashed_password, full_name, COALESCE(is_active, 1), COALESCE(is_superuser, 0), llm_config, scheduler_config, COALESCE(created_at, CURRENT_TIMESTAMP), COALESCE(updated_at, CURRENT_TIMESTAMP) FROM users"))
await conn.execute(text("DROP TABLE users"))
await conn.execute(text("ALTER TABLE users__new RENAME TO users"))
await conn.execute(text("CREATE UNIQUE INDEX IF NOT EXISTS ix_users_username ON users (username)"))
await conn.execute(text("CREATE UNIQUE INDEX IF NOT EXISTS ix_users_email ON users (email)"))
async def _get_table_info(conn, table_name: str):
result = await conn.execute(text(f"PRAGMA table_info({table_name})"))
return result.fetchall()
async def _get_index_info(conn, table_name: str):
result = await conn.execute(text(f"PRAGMA index_list({table_name})"))
return result.fetchall()
def _slugify_username(value: str) -> str:
normalized = re.sub(r'[^a-z0-9_]+', '_', value.strip().lower())
normalized = re.sub(r'_+', '_', normalized).strip('_')
return normalized or 'user'