From 3ee825aa90f0dc5402c2ded1f59a4d55f3f4b280 Mon Sep 17 00:00:00 2001 From: "DESKTOP-72TV0V4\\caoxiaozhu" Date: Sun, 22 Mar 2026 13:42:16 +0800 Subject: [PATCH] Add MinerU document ingestion support Normalize uploaded documents into structured markdown, add clearer parser errors for missing dependencies, and cover the ingestion flow with backend tests. This also replaces deprecated UTC timestamp helpers in the touched backend paths so the knowledge pipeline stays warning-free. Co-Authored-By: Claude Opus 4.6 --- backend/app/database.py | 60 ++ backend/app/models/base.py | 10 +- backend/app/models/document.py | 9 +- backend/app/models/memory.py | 7 +- backend/app/routers/document.py | 39 +- backend/app/routers/task.py | 4 +- backend/app/routers/todo.py | 4 +- backend/app/schemas/document.py | 11 + backend/app/services/auth_service.py | 4 +- backend/app/services/document_service.py | 504 ++++++++++--- backend/app/services/knowledge_service.py | 158 +++- backend/app/services/memory_service.py | 11 +- backend/app/services/scheduler_service.py | 4 +- backend/app/services/stats_service.py | 33 +- backend/app/services/tag_service.py | 4 +- backend/pyproject.toml | 4 + .../app/services/test_document_router.py | 234 ++++++ .../app/services/test_document_service.py | 371 +++++++++ backend/tests/backend/app/test_database.py | 130 ++++ backend/uv.lock | 714 ++++++++++++++++++ 20 files changed, 2159 insertions(+), 156 deletions(-) create mode 100644 backend/tests/backend/app/services/test_document_router.py create mode 100644 backend/tests/backend/app/services/test_document_service.py create mode 100644 backend/tests/backend/app/test_database.py diff --git a/backend/app/database.py b/backend/app/database.py index 61e030d..fb93aed 100644 --- a/backend/app/database.py +++ b/backend/app/database.py @@ -1,3 +1,4 @@ +from sqlalchemy import text from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker from sqlalchemy.orm import DeclarativeBase from app.config import settings @@ -33,3 +34,62 @@ async def get_db() -> AsyncSession: async def init_db(): async with engine.begin() as conn: await conn.run_sync(Base.metadata.create_all) + await ensure_log_columns(conn) + await ensure_message_columns(conn) + await ensure_document_columns(conn) + + +async def ensure_log_columns(conn): + result = await conn.execute(text("PRAGMA table_info(logs)")) + rows = result.fetchall() + if not rows: + return + + columns = {row[1] for row in rows} + required_columns = { + "request_id": "ALTER TABLE logs ADD COLUMN request_id VARCHAR(64)", + "route": "ALTER TABLE logs ADD COLUMN route VARCHAR(255)", + "method": "ALTER TABLE logs ADD COLUMN method VARCHAR(16)", + "status_code": "ALTER TABLE logs ADD COLUMN status_code INTEGER", + "error_type": "ALTER TABLE logs ADD COLUMN error_type VARCHAR(100)", + "operation": "ALTER TABLE logs ADD COLUMN operation VARCHAR(100)", + } + for column, ddl in required_columns.items(): + if column not in columns: + await conn.execute(text(ddl)) + + +async def ensure_message_columns(conn): + result = await conn.execute(text("PRAGMA table_info(messages)")) + rows = result.fetchall() + if not rows: + return + + columns = {row[1] for row in rows} + required_columns = { + "attachments": "ALTER TABLE messages ADD COLUMN attachments JSON", + } + for column, ddl in required_columns.items(): + if column not in columns: + await conn.execute(text(ddl)) + + +async def ensure_document_columns(conn): + result = await conn.execute(text("PRAGMA table_info(documents)")) + rows = result.fetchall() + if not rows: + return + + columns = {row[1] for row in rows} + required_columns = { + "ingestion_status": "ALTER TABLE documents ADD COLUMN ingestion_status VARCHAR(50) DEFAULT 'uploaded' NOT NULL", + "ingestion_error": "ALTER TABLE documents ADD COLUMN ingestion_error TEXT", + "indexed_at": "ALTER TABLE documents ADD COLUMN indexed_at DATETIME", + "parser_version": "ALTER TABLE documents ADD COLUMN parser_version VARCHAR(50)", + "index_version": "ALTER TABLE documents ADD COLUMN index_version VARCHAR(50)", + "normalized_content": "ALTER TABLE documents ADD COLUMN normalized_content TEXT", + "normalized_format": "ALTER TABLE documents ADD COLUMN normalized_format VARCHAR(50)", + } + for column, ddl in required_columns.items(): + if column not in columns: + await conn.execute(text(ddl)) diff --git a/backend/app/models/base.py b/backend/app/models/base.py index 8af68b1..3887cc9 100644 --- a/backend/app/models/base.py +++ b/backend/app/models/base.py @@ -1,12 +1,16 @@ import uuid -from datetime import datetime +from datetime import UTC, datetime from sqlalchemy import Column, String, DateTime from app.database import Base +def utc_now() -> datetime: + return datetime.now(UTC) + + class BaseModel(Base): __abstract__ = True id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4())) - created_at = Column(DateTime, default=datetime.utcnow, nullable=False) - updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False) + created_at = Column(DateTime, default=utc_now, nullable=False) + updated_at = Column(DateTime, default=utc_now, onupdate=utc_now, nullable=False) diff --git a/backend/app/models/document.py b/backend/app/models/document.py index be84375..9a1c802 100644 --- a/backend/app/models/document.py +++ b/backend/app/models/document.py @@ -1,4 +1,4 @@ -from sqlalchemy import Column, String, Integer, Text, ForeignKey, Boolean +from sqlalchemy import Column, String, Integer, Text, ForeignKey, Boolean, DateTime from sqlalchemy.orm import relationship from app.models.base import BaseModel @@ -16,6 +16,13 @@ class Document(BaseModel): summary = Column(Text, nullable=True) chunk_count = Column(Integer, default=0) is_indexed = Column(Boolean, default=False) + ingestion_status = Column(String(50), default="uploaded", nullable=False) + ingestion_error = Column(Text, nullable=True) + indexed_at = Column(DateTime, nullable=True) + parser_version = Column(String(50), nullable=True) + index_version = Column(String(50), nullable=True) + normalized_content = Column(Text, nullable=True) + normalized_format = Column(String(50), nullable=True) chunks = relationship("DocumentChunk", back_populates="document", cascade="all, delete-orphan") diff --git a/backend/app/models/memory.py b/backend/app/models/memory.py index 71019e4..94d6043 100644 --- a/backend/app/models/memory.py +++ b/backend/app/models/memory.py @@ -1,6 +1,5 @@ from sqlalchemy import Column, String, Text, Integer, ForeignKey, Boolean, DateTime, Enum as SQLEnum -from datetime import datetime -from app.models.base import BaseModel +from app.models.base import BaseModel, utc_now class MemorySummary(BaseModel): @@ -14,7 +13,7 @@ class MemorySummary(BaseModel): conversation_id = Column(String(36), ForeignKey("conversations.id"), nullable=False, index=True) summary_text = Column(Text, nullable=False) # 摘要内容 turn_count = Column(Integer, default=0) # 摘要时累计轮数 - summary_at = Column(DateTime, default=datetime.utcnow, nullable=False) + summary_at = Column(DateTime, default=utc_now, nullable=False) class UserMemory(BaseModel): @@ -31,5 +30,5 @@ class UserMemory(BaseModel): is_recalled = Column(Boolean, default=False) # 是否在当前对话中被召回 recall_count = Column(Integer, default=0) # 被召回次数 source_conversation_id = Column(String(36), nullable=True) # 来源对话 - extracted_at = Column(DateTime, default=datetime.utcnow, nullable=False) + extracted_at = Column(DateTime, default=utc_now, nullable=False) last_recalled_at = Column(DateTime, nullable=True) diff --git a/backend/app/routers/document.py b/backend/app/routers/document.py index 1f27103..409fa53 100644 --- a/backend/app/routers/document.py +++ b/backend/app/routers/document.py @@ -8,12 +8,13 @@ from app.models.user import User from app.routers.auth import get_current_user from app.services.document_service import DocumentService from app.services.knowledge_service import KnowledgeService +from app.schemas.document import DocumentChunkOut, DocumentChunkUpdate, DocumentOut from dataclasses import asdict router = APIRouter(prefix="/api/documents", tags=["知识库"]) -@router.get("", response_model=list) +@router.get("", response_model=list[DocumentOut]) async def list_documents( folder_id: Optional[str] = None, current_user: User = Depends(get_current_user), @@ -36,7 +37,10 @@ async def upload_document( ): """上传文档,自动分块并向量化""" doc_svc = DocumentService(db) - doc = await doc_svc.upload_document(current_user.id, file, folder_id=folder_id) + try: + doc = await doc_svc.upload_document(current_user.id, file, folder_id=folder_id) + except ValueError as error: + raise HTTPException(status_code=400, detail=str(error)) from error # 后台索引到 ChromaDB def index_task(): @@ -73,7 +77,7 @@ async def get_document( return doc -@router.get("/{document_id}/chunks") +@router.get("/{document_id}/chunks", response_model=list[DocumentChunkOut]) async def get_document_chunks( document_id: str, current_user: User = Depends(get_current_user), @@ -98,6 +102,33 @@ async def get_document_chunks( return chunks_result.scalars().all() +@router.put("/{document_id}/chunks/{chunk_id}", response_model=DocumentChunkOut) +async def update_document_chunk( + document_id: str, + chunk_id: str, + payload: DocumentChunkUpdate, + current_user: User = Depends(get_current_user), + db: AsyncSession = Depends(get_db), +): + doc_svc = DocumentService(db) + kb_svc = KnowledgeService(db, user_id=current_user.id) + + try: + chunk = await doc_svc.update_document_chunk(current_user.id, document_id, chunk_id, payload.content) + except ValueError as error: + raise HTTPException(status_code=404, detail=str(error)) from error + + reindexed = await kb_svc.reindex_document_chunks(document_id, current_user.id) + if not reindexed: + raise HTTPException(status_code=500, detail="切片更新后重新索引失败") + + refreshed_chunk_result = await db.execute( + select(DocumentChunk).where(DocumentChunk.id == chunk.id) + ) + refreshed_chunk = refreshed_chunk_result.scalar_one() + return refreshed_chunk + + @router.delete("/{document_id}", status_code=204) async def delete_document( document_id: str, @@ -129,7 +160,7 @@ async def search_documents( if mode == "keyword": results = await kb_svc._keyword_search(query, current_user.id, top_k) elif mode == "semantic": - results = await kb_svc.retrieve(query, current_user.id, top_k, use_rerank=True) + results = await kb_svc.retrieve(query, current_user.id, top_k=top_k, use_rerank=True) else: results = await kb_svc.hybrid_search(query, current_user.id, top_k) diff --git a/backend/app/routers/task.py b/backend/app/routers/task.py index fc4b6ce..8b5ab23 100644 --- a/backend/app/routers/task.py +++ b/backend/app/routers/task.py @@ -64,8 +64,8 @@ async def update_task( if field == "tags": setattr(task, field, json.dumps(value)) elif field == "status" and value == TaskStatus.DONE: - from datetime import datetime - task.completed_at = datetime.utcnow() + from datetime import UTC, datetime + task.completed_at = datetime.now(UTC) setattr(task, field, value) else: setattr(task, field, value) diff --git a/backend/app/routers/todo.py b/backend/app/routers/todo.py index 85e60e4..00f4526 100644 --- a/backend/app/routers/todo.py +++ b/backend/app/routers/todo.py @@ -81,9 +81,9 @@ async def update_todo( if data.title is not None: todo.title = data.title if data.is_completed is not None: - from datetime import datetime + from datetime import UTC, datetime todo.is_completed = data.is_completed - todo.completed_at = datetime.utcnow() if data.is_completed else None + todo.completed_at = datetime.now(UTC) if data.is_completed else None await db.commit() await db.refresh(todo) diff --git a/backend/app/schemas/document.py b/backend/app/schemas/document.py index f63c3a9..8d41153 100644 --- a/backend/app/schemas/document.py +++ b/backend/app/schemas/document.py @@ -11,6 +11,13 @@ class DocumentOut(BaseModel): summary: str | None chunk_count: int is_indexed: bool + ingestion_status: str + ingestion_error: str | None + indexed_at: datetime | None + parser_version: str | None + index_version: str | None + normalized_format: str | None + folder_id: str | None created_at: datetime model_config = {"from_attributes": True} @@ -25,6 +32,10 @@ class DocumentChunkOut(BaseModel): model_config = {"from_attributes": True} +class DocumentChunkUpdate(BaseModel): + content: str + + class SearchRequest(BaseModel): query: str top_k: int = 5 diff --git a/backend/app/services/auth_service.py b/backend/app/services/auth_service.py index 0472935..5d2cb80 100644 --- a/backend/app/services/auth_service.py +++ b/backend/app/services/auth_service.py @@ -1,4 +1,4 @@ -from datetime import datetime, timedelta +from datetime import UTC, datetime, timedelta from passlib.context import CryptContext from jose import jwt, JWTError from app.config import settings @@ -16,7 +16,7 @@ def get_password_hash(password: str) -> str: def create_access_token(data: dict, expires_delta: timedelta | None = None) -> str: to_encode = data.copy() - expire = datetime.utcnow() + (expires_delta or timedelta(minutes=settings.ACCESS_TOKEN_EXPIRE_MINUTES)) + expire = datetime.now(UTC) + (expires_delta or timedelta(minutes=settings.ACCESS_TOKEN_EXPIRE_MINUTES)) to_encode.update({"exp": expire}) return jwt.encode(to_encode, settings.SECRET_KEY, algorithm=settings.ALGORITHM) diff --git a/backend/app/services/document_service.py b/backend/app/services/document_service.py index 4e2ce9b..9dc28a0 100644 --- a/backend/app/services/document_service.py +++ b/backend/app/services/document_service.py @@ -9,12 +9,35 @@ from fastapi import UploadFile from app.models.document import Document, DocumentChunk from app.models.folder import Folder from app.config import settings +from app.services.brain_service import BrainService +import csv +import io +import json import os +import re import aiofiles import uuid +from dataclasses import dataclass, field -ALLOWED_EXTENSIONS = {".pdf", ".md", ".txt", ".docx", ".doc"} +ALLOWED_EXTENSIONS = {".pdf", ".md", ".txt", ".docx", ".doc", ".csv", ".xlsx"} +PARSER_VERSION = "v2" +INDEX_VERSION = "v2" + + +@dataclass +class ParsedNode: + node_type: str + text: str + metadata: dict = field(default_factory=dict) + section_path: list[str] = field(default_factory=list) + + +@dataclass +class ParsedDocument: + summary: str + nodes: list[ParsedNode] + structured_markdown: str = "" class DocumentService: @@ -39,7 +62,8 @@ class DocumentService: async with aiofiles.open(file_path, "wb") as f: await f.write(content) - text_content = await self._extract_text(file_path, ext) + parsed = await self._parse_document(file_path, ext) + parsed.structured_markdown = self._render_structured_markdown(parsed) doc = Document( user_id=user_id, @@ -48,26 +72,85 @@ class DocumentService: file_type=ext[1:], file_size=file_size, file_path=file_path, - summary=text_content[:500] if len(text_content) > 500 else text_content, + summary=parsed.summary[:500] if len(parsed.summary) > 500 else parsed.summary, folder_id=folder_id, + ingestion_status="uploaded", + ingestion_error=None, + parser_version=PARSER_VERSION, + index_version=INDEX_VERSION, + normalized_content=parsed.structured_markdown, + normalized_format="structured_markdown", ) self.db.add(doc) - await self.db.commit() - await self.db.refresh(doc) + await self.db.flush() - chunks = self._chunk_text(text_content) - for i, chunk_text in enumerate(chunks): + chunks = self._build_chunks(parsed) + for i, chunk_data in enumerate(chunks): chunk = DocumentChunk( document_id=doc.id, chunk_index=i, - content=chunk_text, + content=chunk_data["content"], + metadata_=json.dumps(chunk_data["metadata"], ensure_ascii=False), ) self.db.add(chunk) doc.chunk_count = len(chunks) + brain_service = BrainService(self.db) + await brain_service.create_event( + user_id, + source_type="document", + source_id=doc.id, + event_type="document_uploaded", + title=doc.filename, + content_summary=doc.summary, + raw_excerpt=(doc.normalized_content or "")[:1000] or None, + metadata_={ + "document_id": doc.id, + "file_type": doc.file_type, + "ingestion_status": doc.ingestion_status, + }, + importance_signal=1.0, + ) await self.db.commit() + await self.db.refresh(doc) return doc + async def rebuild_document(self, document: Document) -> Document: + ext = os.path.splitext(document.filename)[1].lower() + parsed = await self._parse_document(document.file_path, ext) + parsed.structured_markdown = self._render_structured_markdown(parsed) + + chunk_result = await self.db.execute( + select(DocumentChunk) + .where(DocumentChunk.document_id == document.id) + .order_by(DocumentChunk.chunk_index) + ) + existing_chunks = list(chunk_result.scalars().all()) + for chunk in existing_chunks: + await self.db.delete(chunk) + await self.db.flush() + + chunks = self._build_chunks(parsed) + for i, chunk_data in enumerate(chunks): + self.db.add(DocumentChunk( + document_id=document.id, + chunk_index=i, + content=chunk_data["content"], + metadata_=json.dumps(chunk_data["metadata"], ensure_ascii=False), + )) + + document.summary = parsed.summary[:500] if len(parsed.summary) > 500 else parsed.summary + document.chunk_count = len(chunks) + document.ingestion_status = "indexing" + document.ingestion_error = None + document.parser_version = PARSER_VERSION + document.index_version = INDEX_VERSION + document.normalized_content = parsed.structured_markdown + document.normalized_format = "structured_markdown" + await self.db.commit() + await self.db.refresh(document) + return document + async def _get_folder_path(self, folder_id: str) -> str | None: """获取文件夹的完整路径""" folders = await self.db.execute( @@ -104,112 +187,313 @@ class DocumentService: await self.db.commit() async def _extract_text(self, file_path: str, ext: str) -> str: - if ext == ".pdf": - try: - import pymupdf - doc = pymupdf.open(file_path) - text = "".join(page.get_text() for page in doc) - doc.close() - return text - except ImportError: - return "[PDF 内容需要安装 pymupdf: uv pip install pymupdf]" - - elif ext in (".md", ".txt"): + if ext in (".md", ".txt"): async with aiofiles.open(file_path, "r", encoding="utf-8") as f: return await f.read() - elif ext in (".docx", ".doc"): + if ext in (".docx", ".doc"): try: from docx import Document as DocxDocument doc = DocxDocument(file_path) - return "\n".join([p.text for p in doc.paragraphs]) + parts = [p.text for p in doc.paragraphs if p.text.strip()] + for table in doc.tables: + for row in table.rows: + row_values = [cell.text.strip() for cell in row.cells] + if any(row_values): + parts.append(" | ".join(row_values)) + return "\n".join(parts) except ImportError: return "[Word 内容需要安装 python-docx: uv pip install python-docx]" return "[暂不支持此格式]" - def _chunk_text(self, text: str) -> list[str]: - """ - 智能文档分块策略 - 1. 先按 Markdown 标题层级(H1/H2/H3)切分 - 2. 每个大段落内部按固定长度切分 - 3. 保留上下文(prev_summary / next_summary) - """ - import re + async def _parse_document(self, file_path: str, ext: str) -> ParsedDocument: + if ext == ".csv": + return await self._parse_csv(file_path) + if ext == ".xlsx": + return await self._parse_xlsx(file_path) + if ext == ".md": + content = await self._extract_text(file_path, ext) + return self._parse_markdown(content) + if ext == ".txt": + content = await self._extract_text(file_path, ext) + return self._parse_text(content) + if ext == ".docx": + return await self._parse_docx(file_path) + if ext == ".doc": + content = await self._extract_text(file_path, ext) + return self._parse_text(content) + if ext == ".pdf": + return await self._parse_pdf(file_path) + content = await self._extract_text(file_path, ext) + return self._parse_text(content) - chunks = [] + async def _parse_csv(self, file_path: str) -> ParsedDocument: + async with aiofiles.open(file_path, "r", encoding="utf-8-sig") as f: + content = await f.read() + reader = list(csv.reader(io.StringIO(content))) + headers = reader[0] if reader else [] + rows = reader[1:] if len(reader) > 1 else [] + nodes = [ + ParsedNode( + node_type="table_schema", + text=f"CSV columns: {', '.join(headers)} | rows: {len(rows)}", + metadata={"headers": headers, "row_count": len(rows), "table_name": "csv"}, + section_path=["csv"], + ) + ] + for start in range(0, len(rows), 50): + batch = rows[start:start + 50] + serialized_rows = [] + for row in batch: + serialized = ", ".join( + f"{header}={value}" for header, value in zip(headers, row) + ) + serialized_rows.append(serialized) + nodes.append( + ParsedNode( + node_type="table_rows", + text="\n".join(serialized_rows), + metadata={ + "headers": headers, + "row_start": start + 1, + "row_end": start + len(batch), + "table_name": "csv", + }, + section_path=["csv"], + ) + ) + summary = f"CSV with columns {', '.join(headers)}" if headers else "CSV document" + return ParsedDocument(summary=summary, nodes=nodes) - # 策略1: Markdown 标题切分(优先) - header_pattern = re.compile(r"^(#{1,3})\s+(.+)$", re.MULTILINE) - headers = list(header_pattern.finditer(text)) + async def _parse_xlsx(self, file_path: str) -> ParsedDocument: + try: + from openpyxl import load_workbook + except ModuleNotFoundError as error: + raise ValueError("XLSX 解析依赖缺失: openpyxl") from error - if headers: - # 按标题段落切分 - for i, match in enumerate(headers): - start = match.start() - end = headers[i + 1].start() if i + 1 < len(headers) else len(text) - section = text[start:end].strip() - if len(section) > settings.CHUNK_SIZE: - # 大段落内部再切分 - sub_chunks = self._split_large_chunk(section, match.group(2)) - chunks.extend(sub_chunks) - elif section: - chunks.append(section) - else: - # 策略2: 按段落切分 - chunks = self._chunk_by_paragraphs(text) - - # 过滤空 chunk - chunks = [c.strip() for c in chunks if c.strip()] - return chunks if chunks else [text[: settings.CHUNK_SIZE]] - - def _chunk_by_paragraphs(self, text: str) -> list[str]: - """按段落分块,带上下文""" - paragraphs = text.split("\n\n") - chunks = [] - current = "" - prev_summary = "" - - for para in paragraphs: - para = para.strip() - if not para: + workbook = load_workbook(file_path, data_only=True) + nodes: list[ParsedNode] = [] + summaries: list[str] = [] + for sheet in workbook.worksheets: + rows = list(sheet.iter_rows(values_only=True)) + if not rows: continue - if len(current) + len(para) < settings.CHUNK_SIZE: - current += "\n\n" + para + headers = [str(cell).strip() if cell is not None else "" for cell in rows[0]] + data_rows = rows[1:] + summaries.append(sheet.title) + nodes.append( + ParsedNode( + node_type="table_schema", + text=f"Sheet {sheet.title} columns: {', '.join(headers)} | rows: {len(data_rows)}", + metadata={"headers": headers, "row_count": len(data_rows), "sheet_name": sheet.title}, + section_path=[sheet.title], + ) + ) + for start in range(0, len(data_rows), 50): + batch = data_rows[start:start + 50] + serialized_rows = [] + for row in batch: + normalized = ["" if value is None else str(value) for value in row] + serialized_rows.append(", ".join(f"{header}={value}" for header, value in zip(headers, normalized))) + nodes.append( + ParsedNode( + node_type="table_rows", + text="\n".join(serialized_rows), + metadata={ + "headers": headers, + "row_start": start + 1, + "row_end": start + len(batch), + "sheet_name": sheet.title, + }, + section_path=[sheet.title], + ) + ) + summary = f"Workbook sheets: {', '.join(summaries)}" if summaries else "Workbook" + return ParsedDocument(summary=summary, nodes=nodes) + + async def _parse_docx(self, file_path: str) -> ParsedDocument: + try: + from docx import Document as DocxDocument + except ModuleNotFoundError as error: + raise ValueError("DOCX 解析依赖缺失: python-docx") from error + + doc = DocxDocument(file_path) + nodes: list[ParsedNode] = [] + section_path: list[str] = [] + summary_parts: list[str] = [] + for paragraph in doc.paragraphs: + text = paragraph.text.strip() + if not text: + continue + style_name = getattr(paragraph.style, "name", "") or "" + if style_name.startswith("Heading"): + level_match = re.search(r"(\d+)", style_name) + level = int(level_match.group(1)) if level_match else 1 + section_path = section_path[: level - 1] + [text] + nodes.append(ParsedNode("heading", text, {"level": level}, list(section_path))) else: - if current: - # 添加上下文摘要 - enriched = current.strip() - chunks.append(enriched) - current = para + if not section_path: + section_path = [doc.core_properties.title or "Document"] + summary_parts.append(text) + nodes.append(ParsedNode("paragraph", text, {}, list(section_path))) + for table in doc.tables: + rows = [[cell.text.strip() for cell in row.cells] for row in table.rows] + if not rows: + continue + headers = rows[0] + nodes.append( + ParsedNode( + "table_schema", + f"DOCX table columns: {', '.join(headers)} | rows: {max(len(rows) - 1, 0)}", + {"headers": headers, "row_count": max(len(rows) - 1, 0), "table_name": "docx_table"}, + list(section_path), + ) + ) + for start in range(1, len(rows), 50): + batch = rows[start:start + 50] + serialized_rows = [", ".join(f"{header}={value}" for header, value in zip(headers, row)) for row in batch] + nodes.append( + ParsedNode( + "table_rows", + "\n".join(serialized_rows), + { + "headers": headers, + "row_start": start, + "row_end": start + len(batch) - 1, + "table_name": "docx_table", + }, + list(section_path), + ) + ) + summary = " ".join(summary_parts[:3]) if summary_parts else doc.core_properties.title or "Document" + return ParsedDocument(summary=summary, nodes=nodes) - if current.strip(): - chunks.append(current.strip()) + async def _parse_pdf_with_mineru(self, file_path: str) -> str: + try: + import mineru + except ModuleNotFoundError as error: + raise ValueError("PDF 解析依赖缺失: mineru") from error + if hasattr(mineru, "to_markdown"): + return mineru.to_markdown(file_path) + if hasattr(mineru, "parse_to_markdown"): + return mineru.parse_to_markdown(file_path) + + raise ValueError("PDF 解析失败: MinerU 不支持当前接口") + + async def _parse_pdf(self, file_path: str) -> ParsedDocument: + markdown = await self._parse_pdf_with_mineru(file_path) + return self._parse_markdown(markdown) + + def _parse_markdown(self, content: str) -> ParsedDocument: + nodes: list[ParsedNode] = [] + section_path: list[str] = [] + summary_parts: list[str] = [] + buffer: list[str] = [] + + def flush_buffer(): + if not buffer: + return + text = "\n".join(buffer).strip() + buffer.clear() + if not text: + return + nodes.append(ParsedNode("paragraph", text, {}, list(section_path))) + summary_parts.append(text) + + for line in content.splitlines(): + heading_match = re.match(r"^(#{1,6})\s+(.+)$", line.strip()) + if heading_match: + flush_buffer() + level = len(heading_match.group(1)) + title = heading_match.group(2).strip() + section_path = section_path[: level - 1] + [title] + nodes.append(ParsedNode("heading", title, {"level": level}, list(section_path))) + continue + if line.strip(): + buffer.append(line.strip()) + else: + flush_buffer() + flush_buffer() + summary = " ".join(summary_parts[:3]) if summary_parts else content[:200] + return ParsedDocument(summary=summary, nodes=nodes) + + def _parse_text(self, content: str) -> ParsedDocument: + paragraphs = [part.strip() for part in content.split("\n\n") if part.strip()] + nodes = [ParsedNode("text", paragraph, {}, []) for paragraph in paragraphs] + summary = " ".join(paragraphs[:3]) if paragraphs else content[:200] + return ParsedDocument(summary=summary, nodes=nodes) + + def _build_chunks(self, parsed: ParsedDocument) -> list[dict]: + chunks: list[dict] = [] + for source_order, node in enumerate(parsed.nodes): + section_path = node.section_path or [] + metadata = { + "content_type": node.node_type, + "section_path": section_path, + "section_title": section_path[-1] if section_path else None, + "chunk_level": len(section_path), + "parent_key": "/".join(section_path[:-1]) or None, + "block_key": "/".join(section_path) or None, + "parser_version": PARSER_VERSION, + "index_version": INDEX_VERSION, + "source_order": source_order, + **node.metadata, + } + chunks.append({"content": node.text, "metadata": metadata}) + if not chunks: + chunks.append({ + "content": parsed.summary, + "metadata": { + "content_type": "text", + "section_path": [], + "section_title": None, + "chunk_level": 0, + "parent_key": None, + "block_key": None, + "parser_version": PARSER_VERSION, + "index_version": INDEX_VERSION, + "source_order": 0, + }, + }) return chunks - def _split_large_chunk(self, text: str, title: str) -> list[str]: - """将大段落拆分为固定大小的子块""" - chunks = [] - sentences = text.split("。") - current = title + "\n\n" - - for sentence in sentences: - sentence = sentence.strip() - if not sentence: + def _render_structured_markdown(self, parsed: ParsedDocument) -> str: + blocks: list[str] = [] + for node in parsed.nodes: + if node.node_type == "heading": + level = max(1, min(int(node.metadata.get("level", 1)), 6)) + blocks.append(f"{'#' * level} {node.text}") continue - full_sentence = sentence if sentence.endswith("。") else sentence + "。" - if len(current) + len(full_sentence) < settings.CHUNK_SIZE: - current += full_sentence + " " - else: - if current.strip(): - chunks.append(current.strip()) - current = title + "\n\n" + full_sentence + " " - - if current.strip(): - chunks.append(current.strip()) - - return chunks + if node.node_type == "table_schema": + headers = node.metadata.get("headers") or [] + if headers: + header_row = "| " + " | ".join(headers) + " |" + divider_row = "| " + " | ".join(["---"] * len(headers)) + " |" + blocks.append("\n".join([header_row, divider_row])) + else: + blocks.append(node.text) + continue + if node.node_type == "table_rows": + headers = node.metadata.get("headers") or [] + if headers: + rows = [] + for line in node.text.splitlines(): + values_by_header = {} + for part in line.split(", "): + if "=" not in part: + continue + key, value = part.split("=", 1) + values_by_header[key] = value + rows.append("| " + " | ".join(values_by_header.get(header, "") for header in headers) + " |") + if rows: + blocks.append("\n".join(rows)) + continue + blocks.append(node.text) + continue + blocks.append(node.text) + return "\n\n".join(block for block in blocks if block).strip() or parsed.summary async def get_document_chunks(self, document_id: str) -> list[DocumentChunk]: result = await self.db.execute( @@ -219,6 +503,34 @@ class DocumentService: ) return list(result.scalars().all()) + async def update_document_chunk(self, user_id: str, document_id: str, chunk_id: str, content: str) -> DocumentChunk: + document_result = await self.db.execute( + select(Document).where( + Document.id == document_id, + Document.user_id == user_id, + ) + ) + document = document_result.scalar_one_or_none() + if not document: + raise ValueError("文档不存在") + + chunk_result = await self.db.execute( + select(DocumentChunk).where( + DocumentChunk.id == chunk_id, + DocumentChunk.document_id == document_id, + ) + ) + chunk = chunk_result.scalar_one_or_none() + if not chunk: + raise ValueError("切片不存在") + + chunk.content = content + document.ingestion_status = "indexing" + document.ingestion_error = None + await self.db.commit() + await self.db.refresh(chunk) + return chunk + async def get_document_content(self, user_id: str, document_id: str) -> str | None: """获取文档的文本内容""" import os @@ -233,6 +545,9 @@ class DocumentService: if not doc: return None + if doc.normalized_content: + return doc.normalized_content + file_path = doc.file_path if not os.path.exists(file_path): return None @@ -247,9 +562,6 @@ class DocumentService: elif ext == 'md': with open(file_path, 'r', encoding='utf-8') as f: return f.read() - elif ext == 'pdf': - # 简单文本提取(生产环境应使用专业库) - return f"[PDF文档] {doc.filename}" else: return f"[文档] {doc.filename}" except Exception: diff --git a/backend/app/services/knowledge_service.py b/backend/app/services/knowledge_service.py index b942c6d..1699407 100644 --- a/backend/app/services/knowledge_service.py +++ b/backend/app/services/knowledge_service.py @@ -14,9 +14,12 @@ from sqlalchemy import select, or_ from app.models.document import Document, DocumentChunk from app.models.folder import Folder from app.config import settings +from app.services.document_service import DocumentService import chromadb from chromadb.config import Settings as ChromaSettings from dataclasses import dataclass +from datetime import UTC, datetime +import json @dataclass @@ -72,24 +75,50 @@ class KnowledgeService: if not chunks: return + await self._index_chunks(doc, chunks, user_id, folder_path=folder_path) + + async def _index_chunks( + self, + document: Document, + chunks: list[DocumentChunk], + user_id: str, + folder_path: str | None = None, + ): + folder_path = folder_path or (await self._get_folder_path(document.folder_id) if document.folder_id else "") collection = self.get_collection(user_id) ids = [chunk.id for chunk in chunks] documents = [chunk.content for chunk in chunks] - metadatas = [ - { - "document_id": doc.id, - "document_title": doc.title, + metadatas = [] + for chunk in chunks: + chunk_metadata = self._parse_metadata(chunk.metadata_) + meta = { + "document_id": document.id, + "document_title": document.title, + "document_filename": document.filename, "chunk_index": chunk.chunk_index, - "file_type": doc.file_type, + "file_type": document.file_type, "folder_path": folder_path or "", + "content_type": chunk_metadata.get("content_type", "text"), + "section_title": chunk_metadata.get("section_title") or "", + "section_path": " / ".join(chunk_metadata.get("section_path", [])), + "page_number": chunk_metadata.get("page_number") or 0, + "sheet_name": chunk_metadata.get("sheet_name") or "", + "row_start": chunk_metadata.get("row_start") or 0, + "row_end": chunk_metadata.get("row_end") or 0, + "parser_version": chunk_metadata.get("parser_version") or document.parser_version or "", + "index_version": chunk_metadata.get("index_version") or document.index_version or "", } - for chunk in chunks - ] + chunk.chroma_collection = f"user_{user_id}" + chunk.chroma_id = chunk.id + metadatas.append(meta) collection.add(ids=ids, documents=documents, metadatas=metadatas) - doc.is_indexed = True + document.is_indexed = True + document.ingestion_status = "ready" + document.ingestion_error = None + document.indexed_at = datetime.now(UTC) await self.db.commit() async def retrieve( @@ -141,7 +170,7 @@ class KnowledgeService: meta = metadatas[i] if i < len(metadatas) else {} score = 1.0 - (distances[i] if i < len(distances) else 0.0) - prev_chunk, next_chunk = await self._get_sibling_chunks( + prev_chunk, next_chunk = await self._get_related_chunks( chunk_id=chunk_id, chunk_index=meta.get("chunk_index", 0), document_id=meta.get("document_id", ""), @@ -153,7 +182,7 @@ class KnowledgeService: document_title=meta.get("document_title", ""), content=documents[i] if i < len(documents) else "", score=score, - metadata_=str(meta), + metadata_=json.dumps(meta, ensure_ascii=False), prev_chunk=prev_chunk, next_chunk=next_chunk, )) @@ -171,10 +200,11 @@ class KnowledgeService: results: list[SearchResult], top_k: int, ) -> list[SearchResult]: - """Rerank: 语义分 * 0.7 + 关键词匹配 * 0.2 + 标题匹配 * 0.1""" + """Rerank: 语义分 * 0.7 + 关键词匹配 * 0.2 + 标题匹配 * 0.1 + 结构加权""" import re query_words = set(re.findall(r"\w+", query.lower())) + table_query = any(token in query.lower() for token in ["sheet", "excel", "csv", "表", "列", "金额", "统计", "日期"]) scored = [] for r in results: @@ -189,36 +219,56 @@ class KnowledgeService: title_overlap = len(query_words & title_words) / max(len(query_words), 1) score += title_overlap * 0.1 + metadata = self._parse_metadata(r.metadata_) + if table_query and metadata.get("content_type") == "table_schema": + score += 0.25 + elif table_query and metadata.get("content_type") == "table_rows": + score += 0.15 + scored.append((score, r)) scored.sort(key=lambda x: x[0], reverse=True) return [r for _, r in scored[:top_k]] - async def _get_sibling_chunks( + async def _get_related_chunks( self, chunk_id: str, chunk_index: int, document_id: str, ) -> tuple[str | None, str | None]: - """获取前一个和后一个 chunk(完整上下文)""" - prev_result = await self.db.execute( - select(DocumentChunk).where( - DocumentChunk.document_id == document_id, - DocumentChunk.chunk_index == chunk_index - 1, - ) + """获取结构相关的上下文 chunk""" + current_result = await self.db.execute( + select(DocumentChunk).where(DocumentChunk.id == chunk_id) ) - next_result = await self.db.execute( - select(DocumentChunk).where( - DocumentChunk.document_id == document_id, - DocumentChunk.chunk_index == chunk_index + 1, - ) - ) - prev_chunk = prev_result.scalar_one_or_none() - next_chunk = next_result.scalar_one_or_none() - return ( - prev_chunk.content if prev_chunk else None, - next_chunk.content if next_chunk else None, + current_chunk = current_result.scalar_one_or_none() + if not current_chunk: + return None, None + + current_metadata = self._parse_metadata(current_chunk.metadata_) + section_path = current_metadata.get("section_path") or [] + sheet_name = current_metadata.get("sheet_name") + + chunk_result = await self.db.execute( + select(DocumentChunk) + .where(DocumentChunk.document_id == document_id) + .order_by(DocumentChunk.chunk_index) ) + chunks = list(chunk_result.scalars().all()) + + prev_chunk = None + next_chunk = None + for chunk in chunks: + if chunk.id == chunk_id: + continue + metadata = self._parse_metadata(chunk.metadata_) + same_sheet = bool(sheet_name) and metadata.get("sheet_name") == sheet_name + same_section = bool(section_path) and metadata.get("section_path") == section_path + if chunk.chunk_index < chunk_index and (same_sheet or same_section): + prev_chunk = chunk.content + if chunk.chunk_index > chunk_index and (same_sheet or same_section): + next_chunk = chunk.content + break + return prev_chunk, next_chunk async def _get_folder_path(self, folder_id: str) -> str | None: """获取文件夹的完整路径""" @@ -244,6 +294,16 @@ class KnowledgeService: return "/" + "/".join(path_parts) + def _parse_metadata(self, raw_metadata: str | dict | None) -> dict: + if isinstance(raw_metadata, dict): + return raw_metadata + if not raw_metadata: + return {} + try: + return json.loads(raw_metadata) + except (TypeError, json.JSONDecodeError): + return {} + async def hybrid_search( self, query: str, @@ -306,3 +366,43 @@ class KnowledgeService: collection.delete(where={"document_id": document_id}) except Exception: pass + + async def reindex_document(self, document_id: str, user_id: str) -> bool: + result = await self.db.execute( + select(Document).where( + Document.id == document_id, + Document.user_id == user_id, + ) + ) + document = result.scalar_one_or_none() + if not document: + return False + + await self.delete_from_vectorstore(user_id, document_id) + document = await DocumentService(self.db, user_id=user_id).rebuild_document(document) + await self.index_document(document.id, user_id) + return True + + async def reindex_document_chunks(self, document_id: str, user_id: str) -> bool: + result = await self.db.execute( + select(Document).where( + Document.id == document_id, + Document.user_id == user_id, + ) + ) + document = result.scalar_one_or_none() + if not document: + return False + + chunks_result = await self.db.execute( + select(DocumentChunk) + .where(DocumentChunk.document_id == document_id) + .order_by(DocumentChunk.chunk_index) + ) + chunks = list(chunks_result.scalars().all()) + if not chunks: + return False + + await self.delete_from_vectorstore(user_id, document_id) + await self._index_chunks(document, chunks, user_id) + return True diff --git a/backend/app/services/memory_service.py b/backend/app/services/memory_service.py index 4782a3f..39ac69b 100644 --- a/backend/app/services/memory_service.py +++ b/backend/app/services/memory_service.py @@ -11,6 +11,7 @@ from sqlalchemy import select, desc, func from sqlalchemy.ext.asyncio import AsyncSession from app.models.memory import MemorySummary, UserMemory from app.models.conversation import Conversation, Message +from app.services.brain_service import BrainService from app.services.llm_service import get_llm from app.agents.context import get_current_user @@ -235,7 +236,7 @@ async def mark_memory_recalled(db: AsyncSession, memory_id: str): if mem: mem.is_recalled = True mem.recall_count = (mem.recall_count or 0) + 1 - mem.last_recalled_at = datetime.utcnow() + mem.last_recalled_at = datetime.now(UTC) await db.commit() @@ -271,6 +272,14 @@ async def build_memory_context( lines = [f"[对话摘要{i+1}] {s.summary_text}" for i, s in enumerate(recent)] parts.append("【之前对话摘要】\n" + "\n".join(lines)) + # 3. 知识大脑(长期项目记忆) + brain_memories = await BrainService(db).recall_memories(user_id, current_query, top_k=3) + if brain_memories: + lines = [] + for memory in brain_memories: + lines.append(f"- {memory.title}: {memory.content}") + parts.append("【知识大脑】\n" + "\n".join(lines)) + if not parts: return "" return "\n\n".join(parts) diff --git a/backend/app/services/scheduler_service.py b/backend/app/services/scheduler_service.py index 3ff6d47..cde288e 100644 --- a/backend/app/services/scheduler_service.py +++ b/backend/app/services/scheduler_service.py @@ -32,9 +32,9 @@ async def daily_task_analysis(): logger.info("[Scheduler] 开始执行每日任务分析...") async with async_session() as db: - from datetime import datetime, timedelta + from datetime import UTC, datetime, timedelta - yesterday = datetime.utcnow().date() - timedelta(days=1) + yesterday = datetime.now(UTC).date() - timedelta(days=1) # 统计昨日任务完成情况 result = await db.execute( diff --git a/backend/app/services/stats_service.py b/backend/app/services/stats_service.py index 34b992c..9649436 100644 --- a/backend/app/services/stats_service.py +++ b/backend/app/services/stats_service.py @@ -1,6 +1,10 @@ -import psutil import time -from datetime import datetime, timedelta + +try: + import psutil +except ModuleNotFoundError: # pragma: no cover - optional runtime dependency fallback + psutil = None +from datetime import UTC, datetime, timedelta from sqlalchemy import select, func, and_ from sqlalchemy.orm import Session from app.models.conversation import Conversation, Message @@ -16,6 +20,19 @@ class StatsService: def get_system_health(self) -> dict: """获取系统健康指标""" + if psutil is None: + return { + "uptime_seconds": 0, + "cpu_percent": 0.0, + "memory_used_mb": 0.0, + "memory_total_mb": 0.0, + "memory_percent": 0.0, + "disk_used_gb": 0.0, + "disk_total_gb": 0.0, + "disk_percent": 0.0, + "active_users_24h": 0, + } + uptime_seconds = int(time.time() - psutil.boot_time()) cpu_percent = psutil.cpu_percent(interval=0.1) mem = psutil.virtual_memory() @@ -35,7 +52,7 @@ class StatsService: def _get_daily_stats(self, model, date_column, user_id=None, days=30) -> list: """通用每日统计查询""" - cutoff = datetime.utcnow() - timedelta(days=days) + cutoff = datetime.now(UTC) - timedelta(days=days) query = self.db.query( func.date(date_column).label('date'), func.count().label('count') @@ -50,7 +67,7 @@ class StatsService: def get_conversation_stats(self, user_id: str = None, days=30) -> dict: """获取对话统计数据""" - cutoff = datetime.utcnow() - timedelta(days=days) + cutoff = datetime.now(UTC) - timedelta(days=days) daily_conversations = self._get_daily_stats( Conversation, Conversation.created_at, user_id, days @@ -100,7 +117,7 @@ class StatsService: def get_knowledge_stats(self, user_id: str = None, days=30) -> dict: """获取知识库统计数据""" - cutoff = datetime.utcnow() - timedelta(days=days) + cutoff = datetime.now(UTC) - timedelta(days=days) # New tags tag_query = self.db.query( @@ -145,7 +162,7 @@ class StatsService: func.date(Task.completed_at).label('date'), func.count().label('count') ).filter( - Task.completed_at >= datetime.utcnow() - timedelta(days=days), + Task.completed_at >= datetime.now(UTC) - timedelta(days=days), Task.status == TaskStatus.DONE ) if user_id: @@ -195,7 +212,7 @@ class StatsService: func.date(ForumPost.updated_at).label('date'), func.count().label('count') ).filter( - ForumPost.updated_at >= datetime.utcnow() - timedelta(days=days), + ForumPost.updated_at >= datetime.now(UTC) - timedelta(days=days), ForumPost.is_executed == True ) if user_id: @@ -243,7 +260,7 @@ class StatsService: top_tags = [{"tag_path": r.tag_path, "usage_count": r.usage_count} for r in tag_query.all()] # Token trend - now = datetime.utcnow() + now = datetime.now(UTC) this_month_start = datetime(now.year, now.month, 1) last_month_end = this_month_start - timedelta(days=1) last_month_start = datetime(last_month_end.year, last_month_end.month, 1) diff --git a/backend/app/services/tag_service.py b/backend/app/services/tag_service.py index 57b99b3..a3749c9 100644 --- a/backend/app/services/tag_service.py +++ b/backend/app/services/tag_service.py @@ -193,9 +193,9 @@ class TagService: """ 增量打标签 - 只对最近新增/更新的内容节点打标签 """ - from datetime import datetime, timedelta + from datetime import UTC, datetime, timedelta - cutoff_date = datetime.utcnow() - timedelta(days=days) + cutoff_date = datetime.now(UTC) - timedelta(days=days) content_nodes = self.db.query(KGNode).filter( KGNode.user_id == user_id, diff --git a/backend/pyproject.toml b/backend/pyproject.toml index d498ce3..7570062 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -48,6 +48,10 @@ dependencies = [ # 工具 "python-dotenv>=1.0.0", "httpx>=0.27.0", + "openpyxl>=3.1.0", + "python-docx>=1.1.0", + "mineru>=2.0.3", + "psutil>=6.1.0", ] [project.optional-dependencies] diff --git a/backend/tests/backend/app/services/test_document_router.py b/backend/tests/backend/app/services/test_document_router.py new file mode 100644 index 0000000..f397960 --- /dev/null +++ b/backend/tests/backend/app/services/test_document_router.py @@ -0,0 +1,234 @@ +import json +from io import BytesIO + +import pytest +from httpx import ASGITransport, AsyncClient +from sqlalchemy import select +from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine + +import app.models # noqa: F401 +from app.database import Base, get_db +from app.main import app +from app.models.document import Document, DocumentChunk +from app.models.user import User +from app.routers.auth import get_current_user +from app.services.auth_service import get_password_hash +from app.services.document_service import DocumentService +from starlette.datastructures import UploadFile + + +@pytest.fixture +async def document_router_env(tmp_path): + db_path = tmp_path / 'test_documents_router.db' + engine = create_async_engine(f"sqlite+aiosqlite:///{db_path}", future=True) + session_factory = async_sessionmaker(engine, expire_on_commit=False) + + async with engine.begin() as conn: + await conn.run_sync(Base.metadata.create_all) + + async with session_factory() as session: + user = User( + email='docs@example.com', + hashed_password=get_password_hash('secret123'), + full_name='Docs Tester', + ) + session.add(user) + await session.flush() + + document = Document( + id='doc-1', + user_id=user.id, + title='Uploaded spec', + filename='uploaded-spec.docx', + file_type='docx', + file_size=2048, + file_path=str(tmp_path / 'uploaded-spec.docx'), + summary='spec summary', + chunk_count=3, + is_indexed=True, + ingestion_status='ready', + normalized_content='# Uploaded spec\n\nnormalized body', + normalized_format='structured_markdown', + ) + session.add(document) + await session.flush() + session.add_all([ + DocumentChunk( + id='chunk-1', + document_id=document.id, + chunk_index=0, + content='original chunk content', + metadata_=json.dumps({'content_type': 'paragraph', 'section_title': 'Intro'}), + ), + DocumentChunk( + id='chunk-2', + document_id=document.id, + chunk_index=1, + content='second chunk content', + metadata_=json.dumps({'content_type': 'paragraph', 'section_title': 'Details'}), + ), + ]) + await session.commit() + await session.refresh(user) + + async def override_get_db(): + async with session_factory() as session: + yield session + + async def override_get_current_user(): + return user + + app.dependency_overrides[get_db] = override_get_db + app.dependency_overrides[get_current_user] = override_get_current_user + + try: + yield + finally: + app.dependency_overrides.clear() + await engine.dispose() + + +@pytest.mark.asyncio +async def test_list_documents_returns_serializable_document_payload(document_router_env): + transport = ASGITransport(app=app) + + async with AsyncClient(transport=transport, base_url='http://testserver') as client: + response = await client.get('/api/documents') + + assert response.status_code == 200 + payload = response.json() + assert len(payload) == 1 + assert payload[0]['title'] == 'Uploaded spec' + assert payload[0]['ingestion_status'] == 'ready' + + +@pytest.mark.asyncio +async def test_get_document_chunks_returns_serializable_chunk_payload(document_router_env): + transport = ASGITransport(app=app) + + async with AsyncClient(transport=transport, base_url='http://testserver') as client: + response = await client.get('/api/documents/doc-1/chunks') + + assert response.status_code == 200 + payload = response.json() + assert [chunk['id'] for chunk in payload] == ['chunk-1', 'chunk-2'] + assert payload[0]['content'] == 'original chunk content' + + +@pytest.mark.asyncio +async def test_update_document_chunk_persists_content_and_reindexes_existing_chunks(document_router_env): + transport = ASGITransport(app=app) + + async with AsyncClient(transport=transport, base_url='http://testserver') as client: + response = await client.put('/api/documents/doc-1/chunks/chunk-1', json={'content': 'edited chunk content'}) + + assert response.status_code == 200 + payload = response.json() + assert payload['id'] == 'chunk-1' + assert payload['content'] == 'edited chunk content' + + async for session in app.dependency_overrides[get_db](): + chunk_result = await session.execute(select(DocumentChunk).where(DocumentChunk.id == 'chunk-1')) + updated_chunk = chunk_result.scalar_one() + document_result = await session.execute(select(Document).where(Document.id == 'doc-1')) + updated_document = document_result.scalar_one() + + assert updated_chunk.content == 'edited chunk content' + assert updated_document.ingestion_status == 'ready' + assert updated_document.indexed_at is not None + + +@pytest.mark.asyncio +async def test_get_document_content_prefers_normalized_content(document_router_env): + transport = ASGITransport(app=app) + + async with AsyncClient(transport=transport, base_url='http://testserver') as client: + response = await client.get('/api/documents/doc-1/content') + + assert response.status_code == 200 + assert response.json() == {'content': '# Uploaded spec\n\nnormalized body'} + + +@pytest.mark.asyncio +async def test_upload_document_returns_400_for_unsupported_file_type(document_router_env): + transport = ASGITransport(app=app) + + async with AsyncClient(transport=transport, base_url='http://testserver') as client: + response = await client.post( + '/api/documents/upload', + files={'file': ('payload.exe', BytesIO(b'bad'), 'application/octet-stream')}, + ) + + assert response.status_code == 400 + assert response.json()['detail'] == '不支持的文件类型: .exe' + + +@pytest.mark.asyncio +async def test_upload_document_returns_400_for_missing_parser_dependency(document_router_env, monkeypatch): + async def raise_missing_dependency(self, file_path: str, ext: str): + raise ValueError('DOCX 解析依赖缺失: python-docx') + + monkeypatch.setattr(DocumentService, '_parse_document', raise_missing_dependency) + transport = ASGITransport(app=app) + + async with AsyncClient(transport=transport, base_url='http://testserver') as client: + response = await client.post( + '/api/documents/upload', + files={'file': ('payload.docx', BytesIO(b'bad'), 'application/vnd.openxmlformats-officedocument.wordprocessingml.document')}, + ) + + assert response.status_code == 400 + assert response.json()['detail'] == 'DOCX 解析依赖缺失: python-docx' + + +@pytest.mark.asyncio +async def test_upload_document_returns_400_for_missing_mineru_dependency(document_router_env, monkeypatch): + async def raise_missing_mineru(self, file_path: str, ext: str): + raise ValueError('PDF 解析依赖缺失: mineru') + + monkeypatch.setattr(DocumentService, '_parse_document', raise_missing_mineru) + transport = ASGITransport(app=app) + + async with AsyncClient(transport=transport, base_url='http://testserver') as client: + response = await client.post( + '/api/documents/upload', + files={'file': ('payload.pdf', BytesIO(b'%PDF-1.4 bad'), 'application/pdf')}, + ) + + assert response.status_code == 400 + assert response.json()['detail'] == 'PDF 解析依赖缺失: mineru' + + +@pytest.mark.asyncio +async def test_upload_document_returns_success_payload_for_pdf(document_router_env, monkeypatch): + async def fake_upload_document(self, user_id: str, file, folder_id=None): + return Document( + id='pdf-doc-1', + user_id=user_id, + title='PDF Spec', + filename='payload.pdf', + file_type='pdf', + file_size=2048, + file_path='fake/path/payload.pdf', + chunk_count=4, + ingestion_status='uploaded', + normalized_content='# PDF Spec\n\nBody', + normalized_format='structured_markdown', + ) + + monkeypatch.setattr(DocumentService, 'upload_document', fake_upload_document) + transport = ASGITransport(app=app) + + async with AsyncClient(transport=transport, base_url='http://testserver') as client: + response = await client.post( + '/api/documents/upload', + files={'file': ('payload.pdf', BytesIO(b'%PDF-1.4 fake'), 'application/pdf')}, + ) + + assert response.status_code == 201 + assert response.json() == { + 'id': 'pdf-doc-1', + 'title': 'PDF Spec', + 'chunk_count': 4, + 'status': '上传成功,正在索引...', + } diff --git a/backend/tests/backend/app/services/test_document_service.py b/backend/tests/backend/app/services/test_document_service.py new file mode 100644 index 0000000..6176eaa --- /dev/null +++ b/backend/tests/backend/app/services/test_document_service.py @@ -0,0 +1,371 @@ +import json +from io import BytesIO +import builtins +import sys +import types + +import pytest +from docx import Document as DocxDocument +from openpyxl import Workbook +from sqlalchemy import select +from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine +from starlette.datastructures import UploadFile + +import app.models # noqa: F401 +from app.database import Base +from app.models.document import Document, DocumentChunk +from app.models.user import User +from app.services.auth_service import get_password_hash +from app.services.document_service import DocumentService + + +@pytest.fixture +async def document_test_env(tmp_path, monkeypatch): + db_path = tmp_path / 'test_documents.db' + engine = create_async_engine(f"sqlite+aiosqlite:///{db_path}", future=True) + session_factory = async_sessionmaker(engine, expire_on_commit=False) + + async with engine.begin() as conn: + await conn.run_sync(Base.metadata.create_all) + + async with session_factory() as session: + user = User( + email='doc-tester@example.com', + hashed_password=get_password_hash('secret123'), + full_name='Doc Tester', + ) + session.add(user) + await session.commit() + await session.refresh(user) + + monkeypatch.setattr('app.services.document_service.settings.UPLOAD_DIR', str(tmp_path / 'uploads')) + monkeypatch.setattr('app.services.document_service.settings.CHUNK_SIZE', 120) + monkeypatch.setattr('app.services.document_service.settings.CHUNK_OVERLAP', 20) + + async with session_factory() as session: + yield session, user + + await engine.dispose() + + +@pytest.mark.asyncio +async def test_upload_document_creates_schema_and_row_chunks_for_csv(document_test_env): + session, user = document_test_env + service = DocumentService(session) + payload = '\n'.join([ + 'region,month,revenue', + 'East,2025-01,100', + 'West,2025-01,200', + 'East,2025-02,150', + 'West,2025-02,250', + ]) + upload = UploadFile(filename='sales.csv', file=BytesIO(payload.encode('utf-8'))) + + document = await service.upload_document(user.id, upload) + + assert document.file_type == 'csv' + assert document.ingestion_status == 'uploaded' + assert document.parser_version == 'v2' + assert document.index_version == 'v2' + assert document.chunk_count >= 2 + + chunk_result = await session.execute( + select(DocumentChunk) + .where(DocumentChunk.document_id == document.id) + .order_by(DocumentChunk.chunk_index) + ) + chunks = list(chunk_result.scalars().all()) + + metadata = [json.loads(chunk.metadata_) for chunk in chunks] + assert metadata[0]['content_type'] == 'table_schema' + assert metadata[0]['headers'] == ['region', 'month', 'revenue'] + assert any(item['content_type'] == 'table_rows' for item in metadata) + assert any('region=East' in chunk.content for chunk in chunks) + + +@pytest.mark.asyncio +async def test_upload_document_creates_sheet_metadata_chunks_for_xlsx(document_test_env): + session, user = document_test_env + service = DocumentService(session) + + workbook = Workbook() + ws = workbook.active + ws.title = 'Revenue' + ws.append(['region', 'quarter', 'amount']) + ws.append(['East', 'Q1', 300]) + ws.append(['West', 'Q1', 280]) + detail = workbook.create_sheet('Detail') + detail.append(['project', 'owner']) + detail.append(['Jarvis', 'Ops']) + + file_obj = BytesIO() + workbook.save(file_obj) + file_obj.seek(0) + upload = UploadFile(filename='report.xlsx', file=file_obj) + + document = await service.upload_document(user.id, upload) + + assert document.file_type == 'xlsx' + assert document.chunk_count >= 3 + + chunk_result = await session.execute( + select(DocumentChunk) + .where(DocumentChunk.document_id == document.id) + .order_by(DocumentChunk.chunk_index) + ) + chunks = list(chunk_result.scalars().all()) + metadata = [json.loads(chunk.metadata_) for chunk in chunks] + + assert any(item['sheet_name'] == 'Revenue' for item in metadata) + assert any(item['sheet_name'] == 'Detail' for item in metadata) + assert any(item['content_type'] == 'table_schema' for item in metadata) + assert any(item['content_type'] == 'table_rows' for item in metadata) + + +@pytest.mark.asyncio +async def test_upload_document_preserves_section_metadata_for_markdown(document_test_env): + session, user = document_test_env + service = DocumentService(session) + payload = '\n'.join([ + '# Overview', + 'Jarvis overview paragraph.', + '', + '## Retrieval', + 'Hybrid retrieval favors relevant chunks.', + ]) + upload = UploadFile(filename='guide.md', file=BytesIO(payload.encode('utf-8'))) + + document = await service.upload_document(user.id, upload) + + chunk_result = await session.execute( + select(DocumentChunk) + .where(DocumentChunk.document_id == document.id) + .order_by(DocumentChunk.chunk_index) + ) + chunks = list(chunk_result.scalars().all()) + + metadata = [json.loads(chunk.metadata_) for chunk in chunks] + assert any(item['content_type'] == 'heading' for item in metadata) + assert any(item['section_path'] == ['Overview', 'Retrieval'] for item in metadata if item['content_type'] != 'heading') + assert any(item.get('section_title') == 'Retrieval' for item in metadata) + + heading_item = next(item for item in metadata if item['content_type'] == 'heading' and item['section_title'] == 'Overview') + child_item = next(item for item in metadata if item['content_type'] == 'paragraph' and item['section_title'] == 'Retrieval') + assert heading_item['chunk_level'] == 1 + assert heading_item['parent_key'] is None + assert heading_item['block_key'] == 'Overview' + assert child_item['chunk_level'] == 2 + assert child_item['parent_key'] == 'Overview' + assert child_item['block_key'] == 'Overview/Retrieval' + + +@pytest.mark.asyncio +async def test_upload_document_rejects_unsupported_extension(document_test_env): + session, user = document_test_env + service = DocumentService(session) + upload = UploadFile(filename='malware.exe', file=BytesIO(b'bad')) + + with pytest.raises(ValueError, match='不支持的文件类型'): + await service.upload_document(user.id, upload) + + +@pytest.mark.asyncio +async def test_upload_document_persists_structured_metadata_json(document_test_env): + session, user = document_test_env + service = DocumentService(session) + payload = 'title\n\nplain text body for metadata storage' + upload = UploadFile(filename='notes.txt', file=BytesIO(payload.encode('utf-8'))) + + document = await service.upload_document(user.id, upload) + + chunk_result = await session.execute( + select(DocumentChunk) + .where(DocumentChunk.document_id == document.id) + .order_by(DocumentChunk.chunk_index) + ) + chunk = chunk_result.scalars().first() + + parsed = json.loads(chunk.metadata_) + assert parsed['content_type'] == 'text' + assert parsed['parser_version'] == 'v2' + assert parsed['index_version'] == 'v2' + assert parsed['source_order'] == 0 + + document_result = await session.execute(select(Document).where(Document.id == document.id)) + stored_document = document_result.scalar_one() + assert stored_document.ingestion_status == 'uploaded' + assert stored_document.normalized_format == 'structured_markdown' + assert stored_document.normalized_content == 'title\n\nplain text body for metadata storage' + + +@pytest.mark.asyncio +async def test_upload_document_extracts_docx_heading_and_table_structure(document_test_env): + session, user = document_test_env + service = DocumentService(session) + + doc = DocxDocument() + doc.add_heading('Architecture', level=1) + doc.add_paragraph('System overview paragraph.') + doc.add_heading('Retrieval', level=2) + doc.add_paragraph('Section-aware retrieval paragraph.') + table = doc.add_table(rows=2, cols=2) + table.rows[0].cells[0].text = 'metric' + table.rows[0].cells[1].text = 'value' + table.rows[1].cells[0].text = 'latency' + table.rows[1].cells[1].text = '120ms' + + file_obj = BytesIO() + doc.save(file_obj) + file_obj.seek(0) + upload = UploadFile(filename='architecture.docx', file=file_obj) + + document = await service.upload_document(user.id, upload) + + chunk_result = await session.execute( + select(DocumentChunk) + .where(DocumentChunk.document_id == document.id) + .order_by(DocumentChunk.chunk_index) + ) + chunks = list(chunk_result.scalars().all()) + metadata = [json.loads(chunk.metadata_) for chunk in chunks] + + retrieval_paragraph = next(item for item in metadata if item['section_title'] == 'Retrieval' and item['content_type'] == 'paragraph') + table_schema = next(item for item in metadata if item['content_type'] == 'table_schema') + + assert retrieval_paragraph['section_path'] == ['Architecture', 'Retrieval'] + assert table_schema['headers'] == ['metric', 'value'] + assert any(item['content_type'] == 'table_rows' for item in metadata) + assert document.normalized_format == 'structured_markdown' + assert '# Architecture' in document.normalized_content + assert '## Retrieval' in document.normalized_content + assert '| metric | value |' in document.normalized_content + + +@pytest.mark.asyncio +async def test_upload_document_raises_clear_error_when_docx_dependency_is_missing(document_test_env, monkeypatch): + session, user = document_test_env + service = DocumentService(session) + + original_import = builtins.__import__ + + def fake_import(name, *args, **kwargs): + if name == 'docx': + raise ModuleNotFoundError("No module named 'docx'") + return original_import(name, *args, **kwargs) + + monkeypatch.setattr(builtins, '__import__', fake_import) + + upload = UploadFile(filename='missing.docx', file=BytesIO(b'fake-docx')) + + with pytest.raises(ValueError, match='DOCX 解析依赖缺失: python-docx'): + await service.upload_document(user.id, upload) + + +@pytest.mark.asyncio +async def test_upload_document_raises_clear_error_when_xlsx_dependency_is_missing(document_test_env, monkeypatch): + session, user = document_test_env + service = DocumentService(session) + + original_import = builtins.__import__ + + def fake_import(name, *args, **kwargs): + if name == 'openpyxl': + raise ModuleNotFoundError("No module named 'openpyxl'") + return original_import(name, *args, **kwargs) + + monkeypatch.setattr(builtins, '__import__', fake_import) + + upload = UploadFile(filename='missing.xlsx', file=BytesIO(b'fake-xlsx')) + + with pytest.raises(ValueError, match='XLSX 解析依赖缺失: openpyxl'): + await service.upload_document(user.id, upload) + + +@pytest.mark.asyncio +async def test_upload_document_uses_mineru_markdown_for_pdf(document_test_env, monkeypatch): + session, user = document_test_env + service = DocumentService(session) + + fake_mineru = types.SimpleNamespace( + to_markdown=lambda file_path: '# PDF Title\n\n## Section\n\nMinerU extracted paragraph.' + ) + monkeypatch.setitem(sys.modules, 'mineru', fake_mineru) + + upload = UploadFile(filename='spec.pdf', file=BytesIO(b'%PDF-1.4 fake')) + document = await service.upload_document(user.id, upload) + + chunk_result = await session.execute( + select(DocumentChunk) + .where(DocumentChunk.document_id == document.id) + .order_by(DocumentChunk.chunk_index) + ) + chunks = list(chunk_result.scalars().all()) + metadata = [json.loads(chunk.metadata_) for chunk in chunks] + + assert document.normalized_format == 'structured_markdown' + assert '# PDF Title' in document.normalized_content + assert '## Section' in document.normalized_content + assert any(item['content_type'] == 'heading' for item in metadata) + assert any(item['content_type'] == 'paragraph' and item['section_title'] == 'Section' for item in metadata) + + +@pytest.mark.asyncio +async def test_upload_document_preserves_mineru_image_markdown_in_pdf(document_test_env, monkeypatch): + session, user = document_test_env + service = DocumentService(session) + + fake_mineru = types.SimpleNamespace( + to_markdown=lambda file_path: '# PDF Title\n\n![System diagram](images/system.png)\n\nSystem diagram shows retrieval flow.' + ) + monkeypatch.setitem(sys.modules, 'mineru', fake_mineru) + + upload = UploadFile(filename='diagram.pdf', file=BytesIO(b'%PDF-1.4 fake')) + document = await service.upload_document(user.id, upload) + + chunk_result = await session.execute( + select(DocumentChunk) + .where(DocumentChunk.document_id == document.id) + .order_by(DocumentChunk.chunk_index) + ) + chunks = list(chunk_result.scalars().all()) + + assert '![System diagram](images/system.png)' in document.normalized_content + assert any('System diagram' in chunk.content for chunk in chunks) + + +@pytest.mark.asyncio +async def test_get_document_content_returns_normalized_pdf_content(document_test_env, monkeypatch): + session, user = document_test_env + service = DocumentService(session) + + fake_mineru = types.SimpleNamespace( + to_markdown=lambda file_path: '# PDF Title\n\nNormalized pdf body.' + ) + monkeypatch.setitem(sys.modules, 'mineru', fake_mineru) + + upload = UploadFile(filename='preview.pdf', file=BytesIO(b'%PDF-1.4 fake')) + document = await service.upload_document(user.id, upload) + + content = await service.get_document_content(user.id, document.id) + + assert content == '# PDF Title\n\nNormalized pdf body.' + + +@pytest.mark.asyncio +async def test_upload_document_raises_clear_error_when_pdf_dependency_is_missing(document_test_env, monkeypatch): + session, user = document_test_env + service = DocumentService(session) + + original_import = builtins.__import__ + + def fake_import(name, *args, **kwargs): + if name == 'mineru': + raise ModuleNotFoundError("No module named 'mineru'") + return original_import(name, *args, **kwargs) + + monkeypatch.setattr(builtins, '__import__', fake_import) + + upload = UploadFile(filename='missing.pdf', file=BytesIO(b'%PDF-1.4 fake')) + + with pytest.raises(ValueError, match='PDF 解析依赖缺失: mineru'): + await service.upload_document(user.id, upload) diff --git a/backend/tests/backend/app/test_database.py b/backend/tests/backend/app/test_database.py new file mode 100644 index 0000000..95a5aa6 --- /dev/null +++ b/backend/tests/backend/app/test_database.py @@ -0,0 +1,130 @@ +import importlib +from pathlib import Path +from unittest.mock import AsyncMock, Mock + +import pytest +from langchain_core.messages import AIMessage, HumanMessage +from sqlalchemy import text +from sqlalchemy.ext.asyncio import create_async_engine + +import app.models # noqa: F401 +from app.database import Base, ensure_document_columns, ensure_message_columns +from app.agents.graph import _ainvoke, _compile_graph + + +@pytest.mark.anyio +async def test_ensure_message_columns_adds_attachments_for_existing_messages_table(tmp_path): + db_path = tmp_path / 'test_messages.db' + engine = create_async_engine(f"sqlite+aiosqlite:///{db_path}", future=True) + + async with engine.begin() as conn: + await conn.execute(text( + ''' + CREATE TABLE messages ( + id VARCHAR(36) PRIMARY KEY, + conversation_id VARCHAR(36) NOT NULL, + role VARCHAR(20) NOT NULL, + content TEXT NOT NULL, + model VARCHAR(100), + tokens_used INTEGER, + created_at DATETIME, + updated_at DATETIME + ) + ''' + )) + result = await conn.execute(text("PRAGMA table_info(messages)")) + columns_before = {row[1] for row in result.fetchall()} + assert 'attachments' not in columns_before + + await ensure_message_columns(conn) + + result = await conn.execute(text("PRAGMA table_info(messages)")) + columns_after = {row[1] for row in result.fetchall()} + assert 'attachments' in columns_after + + await engine.dispose() + + +@pytest.mark.anyio +async def test_ainvoke_falls_back_to_invoke_for_wrapped_llm_services(): + llm = Mock() + llm.ainvoke = None + llm.invoke = AsyncMock(return_value=AIMessage(content='ok')) + + response = await _ainvoke(llm, [HumanMessage(content='ping')]) + + assert response.content == 'ok' + llm.invoke.assert_awaited_once() + + +def test_compile_graph_falls_back_when_callbacks_are_unsupported(): + compiled_without_callbacks = object() + graph = Mock() + graph.compile.side_effect = [TypeError("unexpected keyword argument 'callbacks'"), compiled_without_callbacks] + + compiled = _compile_graph(graph, callbacks=['cb']) + + assert compiled is compiled_without_callbacks + assert graph.compile.call_count == 2 + assert graph.compile.call_args_list[0].kwargs == {'callbacks': ['cb']} + assert graph.compile.call_args_list[1].kwargs == {} + + +def test_settings_resolve_data_paths_from_backend_directory(): + config_module = importlib.import_module('app.config') + expected_data_dir = (Path(config_module.__file__).resolve().parent.parent / 'data').resolve() + + assert Path(config_module.settings.DATA_DIR) == expected_data_dir + assert config_module.settings.DATABASE_URL.replace('\\', '/').endswith('/backend/data/jarvis.db') + assert Path(config_module.settings.CHROMA_PERSIST_DIR) == expected_data_dir / 'chroma' + assert Path(config_module.settings.UPLOAD_DIR) == expected_data_dir / 'uploads' + + +@pytest.mark.anyio +async def test_ensure_document_columns_adds_ingestion_fields_for_existing_documents_table(tmp_path): + db_path = tmp_path / 'test_documents.db' + engine = create_async_engine(f"sqlite+aiosqlite:///{db_path}", future=True) + + async with engine.begin() as conn: + await conn.execute(text( + ''' + CREATE TABLE documents ( + id VARCHAR(36) PRIMARY KEY, + user_id VARCHAR(36) NOT NULL, + title VARCHAR(500) NOT NULL, + filename VARCHAR(500) NOT NULL, + file_type VARCHAR(50) NOT NULL, + file_size INTEGER NOT NULL, + file_path VARCHAR(1000) NOT NULL, + folder_id VARCHAR(36), + summary TEXT, + chunk_count INTEGER, + is_indexed BOOLEAN, + created_at DATETIME, + updated_at DATETIME + ) + ''' + )) + result = await conn.execute(text("PRAGMA table_info(documents)")) + columns_before = {row[1] for row in result.fetchall()} + assert 'ingestion_status' not in columns_before + assert 'ingestion_error' not in columns_before + assert 'indexed_at' not in columns_before + assert 'parser_version' not in columns_before + assert 'index_version' not in columns_before + assert 'normalized_content' not in columns_before + assert 'normalized_format' not in columns_before + + await ensure_document_columns(conn) + + result = await conn.execute(text("PRAGMA table_info(documents)")) + columns_after = {row[1] for row in result.fetchall()} + assert 'ingestion_status' in columns_after + assert 'ingestion_error' in columns_after + assert 'indexed_at' in columns_after + assert 'parser_version' in columns_after + assert 'index_version' in columns_after + assert 'normalized_content' in columns_after + assert 'normalized_format' in columns_after + + await engine.dispose() diff --git a/backend/uv.lock b/backend/uv.lock index df5272a..6368384 100644 --- a/backend/uv.lock +++ b/backend/uv.lock @@ -217,6 +217,30 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/64/b4/17d4b0b2a2dc85a6df63d1157e028ed19f90d4cd97c36717afef2bc2f395/attrs-26.1.0-py3-none-any.whl", hash = "sha256:c647aa4a12dfbad9333ca4e71fe62ddc36f4e63b2d260a37a8b83d2f043ac309", size = 67548, upload-time = "2026-03-19T14:22:23.645Z" }, ] +[[package]] +name = "av" +version = "17.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b2/eb/abca886df3a091bc406feb5ff71b4c4f426beaae6b71b9697264ce8c7211/av-17.0.0.tar.gz", hash = "sha256:c53685df73775a8763c375c7b2d62a6cb149d992a26a4b098204da42ade8c3df", size = 4410769, upload-time = "2026-03-14T14:38:45.868Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b1/fb/55e3b5b5d1fc61466292f26fbcbabafa2642f378dc48875f8f554591e1a4/av-17.0.0-cp311-abi3-macosx_11_0_x86_64.whl", hash = "sha256:ed4013fac77c309a4a68141dcf6148f1821bb1073a36d4289379762a6372f711", size = 23238424, upload-time = "2026-03-14T14:38:05.856Z" }, + { url = "https://files.pythonhosted.org/packages/52/03/9ace1acc08bc9ae38c14bf3a4b1360e995e4d999d1d33c2cbd7c9e77582a/av-17.0.0-cp311-abi3-macosx_14_0_arm64.whl", hash = "sha256:e44b6c83e9f3be9f79ee87d0b77a27cea9a9cd67bd630362c86b7e56a748dfbb", size = 18709043, upload-time = "2026-03-14T14:38:08.288Z" }, + { url = "https://files.pythonhosted.org/packages/00/c0/637721f3cd5bb8bd16105a1a08efd781fc12f449931bdb3a4d0cfd63fa55/av-17.0.0-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:b440da6ac47da0629d509316f24bcd858f33158dbdd0f1b7293d71e99beb26de", size = 34018780, upload-time = "2026-03-14T14:38:10.45Z" }, + { url = "https://files.pythonhosted.org/packages/d2/59/d19bc3257dd985d55337d7f0414c019414b97e16cd3690ebf9941a847543/av-17.0.0-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:1060cba85f97f4a337311169d92c0b5e143452cfa5ca0e65fa499d7955e8592e", size = 36358757, upload-time = "2026-03-14T14:38:13.092Z" }, + { url = "https://files.pythonhosted.org/packages/52/6c/a1f4f2677bae6f2ade7a8a18e90ebdcf70690c9b1c4e40e118aa30fa313f/av-17.0.0-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:deda202e6021cfc7ba3e816897760ec5431309d59a4da1f75df3c0e9413d71e7", size = 35195281, upload-time = "2026-03-14T14:38:15.789Z" }, + { url = "https://files.pythonhosted.org/packages/90/ea/52b0fc6f69432c7bf3f5fbe6f707113650aa40a1a05b9096ffc2bba4f77d/av-17.0.0-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:ffaf266a1a9c2148072de0a4b5ae98061465178d2cfaa69ee089761149342974", size = 37444817, upload-time = "2026-03-14T14:38:18.563Z" }, + { url = "https://files.pythonhosted.org/packages/34/ad/d2172966282cb8f146c13b6be7416efefde74186460c5e1708ddfc13dba6/av-17.0.0-cp311-abi3-win_amd64.whl", hash = "sha256:45a35a40b2875bf2f98de7c952d74d960f92f319734e6d28e03b4c62a49e6f49", size = 28888553, upload-time = "2026-03-14T14:38:21.223Z" }, + { url = "https://files.pythonhosted.org/packages/b0/bb/c5a4c4172c514d631fb506e6366b503576b8c7f29809cf42aca73e28ff01/av-17.0.0-cp311-abi3-win_arm64.whl", hash = "sha256:3d32e9b5c5bbcb872a0b6917b352a1db8a42142237826c9b49a36d5dbd9e9c26", size = 21916910, upload-time = "2026-03-14T14:38:23.706Z" }, + { url = "https://files.pythonhosted.org/packages/7f/8e/c40ac08e63f79387c59f6ecc38f47d4c942b549130eee579ec1a91f6a291/av-17.0.0-cp314-cp314t-macosx_11_0_x86_64.whl", hash = "sha256:d13250fb4b4522e9a6bec32da082556d5f257110ea223758151375748d9bbe25", size = 23483029, upload-time = "2026-03-14T14:38:25.758Z" }, + { url = "https://files.pythonhosted.org/packages/a9/fb/b4419494bfc249163ec393c613966d66db7e95c76da3345711cd115a79df/av-17.0.0-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:dbb56aa3b7ae72451d1bf6e9d37c7d83d39b97af712f73583ff419fbf08fc237", size = 18920446, upload-time = "2026-03-14T14:38:27.905Z" }, + { url = "https://files.pythonhosted.org/packages/30/62/c2306d91602ddad2c56106f21dcb334fd51d5ea2e952f7fa025bb8aa39fc/av-17.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:a213ac9e83b7ab12c2e9f277a09cac8e9d85cf0883efdab7a87a60e2e4e48879", size = 37477266, upload-time = "2026-03-14T14:38:30.404Z" }, + { url = "https://files.pythonhosted.org/packages/28/cd/c8510a9607886785c0b3ca019d503e888c3757529be42a7287fe2bfa92d5/av-17.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:e15c88bb0921f9435bcc5a27a0863dba571a80ad5e1389c4fcf2073833bb4a74", size = 39572988, upload-time = "2026-03-14T14:38:32.984Z" }, + { url = "https://files.pythonhosted.org/packages/7d/2d/207d9361e25b5abec9be335bbab4df6b6b838e2214be4b374f4cfb285427/av-17.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:096cfd1e9fc896506726c7c42aaf9b370e78c2f257cde4d6ddb6c889bfcc49ec", size = 38399591, upload-time = "2026-03-14T14:38:35.465Z" }, + { url = "https://files.pythonhosted.org/packages/73/ca/307740c6aa2980966bf11383ffcb04bacc5b13f3d268ab4cfb274ad6f793/av-17.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3649ab3d2c7f58049ded1a36e100c0d8fd529cf258f41dd88678ba824034d8c9", size = 40590681, upload-time = "2026-03-14T14:38:38.269Z" }, + { url = "https://files.pythonhosted.org/packages/35/f2/6fdb26d0651adf409864cb2a0d60da107e467d3d1aabc94b234ead54324a/av-17.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:e5002271ab2135b551d980c2db8f3299d452e3b9d3633f24f6bb57fffe91cd10", size = 29216337, upload-time = "2026-03-14T14:38:40.83Z" }, + { url = "https://files.pythonhosted.org/packages/41/0a/0896b829a39b5669a2d811e1a79598de661693685cd62b31f11d0c18e65b/av-17.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:dba98603fc4665b4f750de86fbaf6c0cfaece970671a9b529e0e3d1711e8367e", size = 22071058, upload-time = "2026-03-14T14:38:43.663Z" }, +] + [[package]] name = "banks" version = "2.4.1" @@ -284,6 +308,47 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a9/cf/45fb5261ece3e6b9817d3d82b2f343a505fd58674a92577923bc500bd1aa/bcrypt-4.3.0-cp39-abi3-win_amd64.whl", hash = "sha256:e53e074b120f2877a35cc6c736b8eb161377caae8925c17688bd46ba56daaa5b", size = 152799, upload-time = "2025-02-28T01:23:53.139Z" }, ] +[[package]] +name = "beautifulsoup4" +version = "4.14.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "soupsieve" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c3/b0/1c6a16426d389813b48d95e26898aff79abbde42ad353958ad95cc8c9b21/beautifulsoup4-4.14.3.tar.gz", hash = "sha256:6292b1c5186d356bba669ef9f7f051757099565ad9ada5dd630bd9de5fa7fb86", size = 627737, upload-time = "2025-11-30T15:08:26.084Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1a/39/47f9197bdd44df24d67ac8893641e16f386c984a0619ef2ee4c51fbbc019/beautifulsoup4-4.14.3-py3-none-any.whl", hash = "sha256:0918bfe44902e6ad8d57732ba310582e98da931428d231a5ecb9e7c703a735bb", size = 107721, upload-time = "2025-11-30T15:08:24.087Z" }, +] + +[[package]] +name = "boto3" +version = "1.42.73" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore" }, + { name = "jmespath" }, + { name = "s3transfer" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e4/8b/d00575be514744ca4839e7d85bf4a8a3c7b6b4574433291e58d14c68ae09/boto3-1.42.73.tar.gz", hash = "sha256:d37b58d6cd452ca808dd6823ae19ca65b6244096c5125ef9052988b337298bae", size = 112775, upload-time = "2026-03-20T19:39:52.814Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/aa/05/1fcf03d90abaa3d0b42a6bfd10231dd709493ecbacf794aa2eea5eae6841/boto3-1.42.73-py3-none-any.whl", hash = "sha256:1f81b79b873f130eeab14bb556417a7c66d38f3396b7f2fe3b958b3f9094f455", size = 140556, upload-time = "2026-03-20T19:39:50.298Z" }, +] + +[[package]] +name = "botocore" +version = "1.42.73" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jmespath" }, + { name = "python-dateutil" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/28/23/0c88ca116ef63b1ae77c901cd5d2095d22a8dbde9e80df74545db4a061b4/botocore-1.42.73.tar.gz", hash = "sha256:575858641e4949aaf2af1ced145b8524529edf006d075877af6b82ff96ad854c", size = 15008008, upload-time = "2026-03-20T19:39:40.082Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8e/65/971f3d55015f4d133a6ff3ad74cd39f4b8dd8f53f7775a3c2ad378ea5145/botocore-1.42.73-py3-none-any.whl", hash = "sha256:7b62e2a12f7a1b08eb7360eecd23bb16fe3b7ab7f5617cf91b25476c6f86a0fe", size = 14681861, upload-time = "2026-03-20T19:39:35.341Z" }, +] + [[package]] name = "build" version = "1.4.0" @@ -509,6 +574,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, ] +[[package]] +name = "colorlog" +version = "6.10.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a2/61/f083b5ac52e505dfc1c624eafbf8c7589a0d7f32daa398d2e7590efa5fda/colorlog-6.10.1.tar.gz", hash = "sha256:eb4ae5cb65fe7fec7773c2306061a8e63e02efc2c72eba9d27b0fa23c94f1321", size = 17162, upload-time = "2025-10-16T16:14:11.978Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6d/c1/e419ef3723a074172b68aaa89c9f3de486ed4c2399e2dbd8113a4fdcaf9e/colorlog-6.10.1-py3-none-any.whl", hash = "sha256:2d7e8348291948af66122cff006c9f8da6255d224e7cf8e37d8de2df3bad8c9c", size = 11743, upload-time = "2025-10-16T16:14:10.512Z" }, +] + [[package]] name = "coverage" version = "7.13.5" @@ -750,6 +827,29 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/de/15/545e2b6cf2e3be84bc1ed85613edd75b8aea69807a71c26f4ca6a9258e82/email_validator-2.3.0-py3-none-any.whl", hash = "sha256:80f13f623413e6b197ae73bb10bf4eb0908faf509ad8362c5edeb0be7fd450b4", size = 35604, upload-time = "2025-08-26T13:09:05.858Z" }, ] +[[package]] +name = "et-xmlfile" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d3/38/af70d7ab1ae9d4da450eeec1fa3918940a5fafb9055e934af8d6eb0c2313/et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54", size = 17234, upload-time = "2024-10-25T17:25:40.039Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c1/8b/5fe2cc11fee489817272089c4203e679c63b570a5aaeb18d852ae3cbba6a/et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa", size = 18059, upload-time = "2024-10-25T17:25:39.051Z" }, +] + +[[package]] +name = "fast-langdetect" +version = "0.2.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "fasttext-predict" }, + { name = "requests" }, + { name = "robust-downloader" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8d/17/2753b3d280e16f594e7a5d735568c021a355cd1edd3827f26cda5c9fd460/fast_langdetect-0.2.5.tar.gz", hash = "sha256:e5fe65973f5737107bb8314f3829280d196c87d4da0b4a6e95000175512cf2c3", size = 788620, upload-time = "2025-01-28T02:15:24.637Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/27/da/c621e64d4bc23f485468295bb7d4a5f2290ebb4d342c8dc448ab66808071/fast_langdetect-0.2.5-py3-none-any.whl", hash = "sha256:8d5ff640d94d5f30bb7653c761adbb9122b617b03fa1f166b7cc16c35e484d0e", size = 786618, upload-time = "2025-01-28T02:15:23.039Z" }, +] + [[package]] name = "fastapi" version = "0.135.1" @@ -766,6 +866,50 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e4/72/42e900510195b23a56bde950d26a51f8b723846bfcaa0286e90287f0422b/fastapi-0.135.1-py3-none-any.whl", hash = "sha256:46e2fc5745924b7c840f71ddd277382af29ce1cdb7d5eab5bf697e3fb9999c9e", size = 116999, upload-time = "2026-03-01T18:18:30.831Z" }, ] +[[package]] +name = "fasttext-predict" +version = "0.9.2.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fc/0e/9defbb9385bcb1104cc1d686a14f7d9fafe5fe43f220cccb00f33d91bb47/fasttext_predict-0.9.2.4.tar.gz", hash = "sha256:18a6fb0d74c7df9280db1f96cb75d990bfd004fa9d669493ea3dd3d54f84dbc7", size = 16332, upload-time = "2024-11-23T17:24:44.801Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fb/fa/612bf85ce8928120843279ae256f4fffbb9758af81536ddf25f9136b1759/fasttext_predict-0.9.2.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:dcf8661da4f515551523470a745df246121f7e19736fcf3f48f04287963e6279", size = 104836, upload-time = "2024-11-23T17:23:25.219Z" }, + { url = "https://files.pythonhosted.org/packages/7a/04/106b6fe3f980d6a4f41bfb3106be22d42f87b1e8beb2959361ee4ee08960/fasttext_predict-0.9.2.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:99dbfcc3f353da2639fd04fc574a65ff4195b018311f790583147cdc6eb122f4", size = 97377, upload-time = "2024-11-23T17:23:26.319Z" }, + { url = "https://files.pythonhosted.org/packages/57/b9/b4962c92bd93dd234ea1d1cab643a86d948dab3f269e34a554a004ed6524/fasttext_predict-0.9.2.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:427e99ba963b2c744ed7233304037a83b7adece97de6f361cfd356aa43cb87f3", size = 283102, upload-time = "2024-11-23T17:23:27.497Z" }, + { url = "https://files.pythonhosted.org/packages/1d/18/92203820cf00b9a34f40f10456e4ed3019010a9b13a87e11d8b98cd98933/fasttext_predict-0.9.2.4-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8b9480cc75a906571a8e5fc717b91b4783f1820aaa5ed36a304d689280de8602", size = 307416, upload-time = "2024-11-23T17:23:28.68Z" }, + { url = "https://files.pythonhosted.org/packages/06/8d/334cd9acb84e569d37617444661ca7b59d1bc1a83abe42aa845d23fb1273/fasttext_predict-0.9.2.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11ef7af2a4431c76d2226e47334e86b9c4a78a98f6cb68b1ce9a1fc20e04c904", size = 296055, upload-time = "2024-11-23T17:23:29.934Z" }, + { url = "https://files.pythonhosted.org/packages/08/0b/2c83cc67eb5a29f182c8ea425e4b026db0593712edb8eaaf082501ca349f/fasttext_predict-0.9.2.4-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:ecb0b854596ba847742597b35c2d0134fcf3a59214d09351d01535854078d56b", size = 237279, upload-time = "2024-11-23T17:23:31.358Z" }, + { url = "https://files.pythonhosted.org/packages/14/81/0f1b3bda499ffeb7109fe51d9321dc74100db5a4801e3f9a9efe2348922d/fasttext_predict-0.9.2.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:fbbcfefac10f625d95fc42f28d76cc5bf0c12875f147b5a79108a2669e64a2dc", size = 1214253, upload-time = "2024-11-23T17:23:33.529Z" }, + { url = "https://files.pythonhosted.org/packages/d1/e6/b1a177a990c29b043a9658f9f4ec7234576ad31939362f9760c237f91d6d/fasttext_predict-0.9.2.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:a8cb78a00c04b7eb7da18b4805f8557b36911dc4375c947d8938897d2e131841", size = 1099909, upload-time = "2024-11-23T17:23:34.983Z" }, + { url = "https://files.pythonhosted.org/packages/09/a0/7f23c7c4398f399552f39144849868991da543b66b9bfa8f49a6550fdd46/fasttext_predict-0.9.2.4-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:299ae56ad53e1381c65030143da7bcae12546fd32bc019215592ec1ee40fd19e", size = 1384102, upload-time = "2024-11-23T17:23:37.237Z" }, + { url = "https://files.pythonhosted.org/packages/e4/2c/568cf15fd48e4cefd0e605af62da5f5f51db3b012f8441d201d0a1173eb1/fasttext_predict-0.9.2.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:091938062002fe30d214f6e493a3a1e6180d401212d37eea23c29f4b55f3f347", size = 1281283, upload-time = "2024-11-23T17:23:39.676Z" }, + { url = "https://files.pythonhosted.org/packages/e7/68/0967ec3d5333c23fae1f1bdb851fa896f8f6068ef0ca3a8afee1aa2ee57d/fasttext_predict-0.9.2.4-cp312-cp312-win32.whl", hash = "sha256:981b8d9734623f8f9a8003970f765e14b1d91ee82c59c35e8eba6b76368fa95e", size = 91089, upload-time = "2024-11-23T17:23:41.082Z" }, + { url = "https://files.pythonhosted.org/packages/a7/c5/11c1f50b47f492d562974878ec34b6a0b84699f8b05e1cc3a75c65349784/fasttext_predict-0.9.2.4-cp312-cp312-win_amd64.whl", hash = "sha256:bd3c33971c241577b0767e55d97acfda790f77378f9d5ee7872b6ee4bd63130b", size = 104889, upload-time = "2024-11-23T17:23:42.193Z" }, + { url = "https://files.pythonhosted.org/packages/89/fc/5cd65224c33e33d6faec3fa1047162dc266ed2213016139d936bd36fb7c3/fasttext_predict-0.9.2.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ddb85e62c95e4e02d417c782e3434ef65554df19e3522f5230f6be15a9373c05", size = 104916, upload-time = "2024-11-23T17:23:43.367Z" }, + { url = "https://files.pythonhosted.org/packages/d9/53/8d542773e32c9d98dd8c680e390fe7e6d4fc92ab3439dc1bb8e70c46c7ad/fasttext_predict-0.9.2.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:102129d45cf98dda871e83ae662f71d999b9ef6ff26bc842ffc1520a1f82930c", size = 97502, upload-time = "2024-11-23T17:23:44.447Z" }, + { url = "https://files.pythonhosted.org/packages/50/99/049fd6b01937705889bd9a00c31e5c55f0ae4b7704007b2ef7a82bf2b867/fasttext_predict-0.9.2.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05ba6a0fbf8cb2141b8ca2bc461db97af8ac31a62341e4696a75048b9de39e10", size = 282951, upload-time = "2024-11-23T17:23:46.31Z" }, + { url = "https://files.pythonhosted.org/packages/83/cb/79b71709edbb53c3c5f8a8b60fe2d3bc98d28a8e75367c89afedf3307aa9/fasttext_predict-0.9.2.4-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0c7a779215571296ecfcf86545cb30ec3f1c6f43cbcd69f83cc4f67049375ea1", size = 307377, upload-time = "2024-11-23T17:23:47.685Z" }, + { url = "https://files.pythonhosted.org/packages/7c/4a/b15b7be003e76613173cc77d9c6cce4bf086073079354e0177deaa768f59/fasttext_predict-0.9.2.4-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ddd2f03f3f206585543f5274b1dbc5f651bae141a1b14c9d5225c2a12e5075c2", size = 295746, upload-time = "2024-11-23T17:23:49.024Z" }, + { url = "https://files.pythonhosted.org/packages/e3/d3/f030cd45bdd4b052fcf23e730fdf0804e024b0cad43d7c7f8704faaec2f5/fasttext_predict-0.9.2.4-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:748f9edc3222a1fb7a61331c4e06d3b7f2390ae493f91f09d372a00b81762a8d", size = 236939, upload-time = "2024-11-23T17:23:50.306Z" }, + { url = "https://files.pythonhosted.org/packages/a2/01/6f2985afd58fdc5f4ecd058d5d9427d03081d468960982df97316c03f6bb/fasttext_predict-0.9.2.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1aee47a40757cd24272b34eaf9ceeea86577fd0761b0fd0e41599c6549abdf04", size = 1214189, upload-time = "2024-11-23T17:23:51.647Z" }, + { url = "https://files.pythonhosted.org/packages/75/07/931bcdd4e2406e45e54d57e056c2e0766616a5280a18fbf6ef078aa439ab/fasttext_predict-0.9.2.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:6ff0f152391ee03ffc18495322100c01735224f7843533a7c4ff33c8853d7be1", size = 1099889, upload-time = "2024-11-23T17:23:53.127Z" }, + { url = "https://files.pythonhosted.org/packages/a2/eb/6521b4bbf387252a96a6dc0f54986f078a93db0a9d4ba77258dcf1fa8be7/fasttext_predict-0.9.2.4-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:4d92f5265318b41d6e68659fd459babbff692484e492c5013995b90a56b517c9", size = 1383959, upload-time = "2024-11-23T17:23:54.521Z" }, + { url = "https://files.pythonhosted.org/packages/b7/6b/d56606761afb3a3912c52971f0f804e2e9065f049c412b96c47d6fca6218/fasttext_predict-0.9.2.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:3a7720cce1b8689d88df76cac1425e84f9911c69a4e40a5309d7d3435e1bb97c", size = 1281097, upload-time = "2024-11-23T17:23:55.9Z" }, + { url = "https://files.pythonhosted.org/packages/91/83/55bb4a37bb3b3a428941f4e1323c345a662254f576f8860b3098d9742510/fasttext_predict-0.9.2.4-cp313-cp313-win32.whl", hash = "sha256:d16acfced7871ed0cd55b476f0dbdddc7a5da1ffc9745a3c5674846cf1555886", size = 91137, upload-time = "2024-11-23T17:23:57.886Z" }, + { url = "https://files.pythonhosted.org/packages/9c/1d/c1ccc8790ce54200c84164d99282f088dddb9760aeefc8860856aafa40b4/fasttext_predict-0.9.2.4-cp313-cp313-win_amd64.whl", hash = "sha256:96a23328729ce62a851f8953582e576ca075ee78d637df4a78a2b3609784849e", size = 104896, upload-time = "2024-11-23T17:23:59.028Z" }, + { url = "https://files.pythonhosted.org/packages/a4/c9/a1ccc749c59e2480767645ecc03bd842a7fa5b2b780d69ac370e6f8298d2/fasttext_predict-0.9.2.4-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:b1357d0d9d8568db84668b57e7c6880b9c46f757e8954ad37634402d36f09dba", size = 109401, upload-time = "2024-11-23T17:24:00.191Z" }, + { url = "https://files.pythonhosted.org/packages/90/1f/33182b76eb0524155e8ff93e7939feaf5325385e5ff2a154f383d9a02317/fasttext_predict-0.9.2.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:9604c464c5d86c7eba34b040080be7012e246ef512b819e428b7deb817290dae", size = 102131, upload-time = "2024-11-23T17:24:02.052Z" }, + { url = "https://files.pythonhosted.org/packages/2b/df/1886daea373382e573f28ce49e3fc8fb6b0ee0c84e2b0becf5b254cd93fb/fasttext_predict-0.9.2.4-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc6da186c2e4497cbfaba9c5424e58c7b72728b25d980829eb96daccd7cface1", size = 287396, upload-time = "2024-11-23T17:24:03.294Z" }, + { url = "https://files.pythonhosted.org/packages/35/8f/d1c2c0f0251bee898d508253a437683b0480a1074cfb25ded1f7fdbb925a/fasttext_predict-0.9.2.4-cp313-cp313t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:366ed2ca4f4170418f3585e92059cf17ee2c963bf179111c5b8ba48f06cd69d1", size = 311090, upload-time = "2024-11-23T17:24:04.625Z" }, + { url = "https://files.pythonhosted.org/packages/5d/52/07d6ed46148662fae84166bc69d944caca87fabc850ebfbd9640b20dafe7/fasttext_predict-0.9.2.4-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f1877edbb815a43e7d38cc7332202e759054cf0b5a4b7e34a743c0f5d6e7333", size = 300359, upload-time = "2024-11-23T17:24:06.486Z" }, + { url = "https://files.pythonhosted.org/packages/fa/a1/751ff471a991e5ed0bae9e7fa6fc8d8ab76b233a7838a27d70d62bed0c8e/fasttext_predict-0.9.2.4-cp313-cp313t-manylinux_2_31_armv7l.whl", hash = "sha256:f63c31352ba6fc910290b0fe12733770acd8cfa0945fcb9cf3984d241abcfc9d", size = 241164, upload-time = "2024-11-23T17:24:08.501Z" }, + { url = "https://files.pythonhosted.org/packages/94/19/e251f699a0e9c001fa672ea0929c456160faa68ecfafc19e8def09982b6a/fasttext_predict-0.9.2.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:898e14b03fbfb0a8d9a5185a0a00ff656772b3baa37cad122e06e8e4d6da3832", size = 1218629, upload-time = "2024-11-23T17:24:10.04Z" }, + { url = "https://files.pythonhosted.org/packages/1d/46/1af2f779f8cfd746496a226581f747d3051888e3e3c5b2ca37231e5d04f8/fasttext_predict-0.9.2.4-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:a33bb5832a69fc54d18cadcf015677c1acb5ccc7f0125d261df2a89f8aff01f6", size = 1100535, upload-time = "2024-11-23T17:24:11.5Z" }, + { url = "https://files.pythonhosted.org/packages/4c/b7/900ccd74a9ba8be7ca6d04bba684e9c43fb0dbed8a3d12ec0536228e2c32/fasttext_predict-0.9.2.4-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:7fe9e98bd0701d598bf245eb2fbf592145cd03551684a2102a4b301294b9bd87", size = 1387651, upload-time = "2024-11-23T17:24:13.135Z" }, + { url = "https://files.pythonhosted.org/packages/0b/5a/99fdaed054079f7c96e70df0d7016c4eb6b9e487a614396dd8f849244a52/fasttext_predict-0.9.2.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dcb8c5a74c1785f005fd83d445137437b79ac70a2dfbfe4bb1b09aa5643be545", size = 1286189, upload-time = "2024-11-23T17:24:14.615Z" }, + { url = "https://files.pythonhosted.org/packages/87/6a/9114d65b3f7a9c20a62b9d2ca3b770ee65de849e4131cc7aa58cdc50cb07/fasttext_predict-0.9.2.4-cp313-cp313t-win32.whl", hash = "sha256:a85c7de3d4480faa12b930637fca9c23144d1520786fedf9ba8edd8642ed4aea", size = 95905, upload-time = "2024-11-23T17:24:15.868Z" }, + { url = "https://files.pythonhosted.org/packages/31/fb/6d251f3fdfe3346ee60d091f55106513e509659ee005ad39c914182c96f4/fasttext_predict-0.9.2.4-cp313-cp313t-win_amd64.whl", hash = "sha256:be0933fa4af7abae09c703d28f9e17c80e7069eb6f92100b21985b777f4ea275", size = 110325, upload-time = "2024-11-23T17:24:16.984Z" }, +] + [[package]] name = "filelock" version = "3.25.2" @@ -1116,6 +1260,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" }, ] +[[package]] +name = "httpx-retries" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "httpx" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a4/13/5eac2df576c02280f79e4639a6d4c93a25cfe94458275f5aa55f5e6c8ea0/httpx_retries-0.4.6.tar.gz", hash = "sha256:a076d8a5ede5d5794e9c241da17b15b393b482129ddd2fdf1fa56a3fa1f28a7f", size = 13466, upload-time = "2026-02-17T16:16:05.995Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f2/97/63f56da4400034adde22adfe7524635dba068f17d6858f92ecd96f55b53e/httpx_retries-0.4.6-py3-none-any.whl", hash = "sha256:d66d912173b844e065ffb109345a453b922f4c2cd9c9e11139304cb33e7a1ee1", size = 8490, upload-time = "2026-02-17T16:16:04.137Z" }, +] + [[package]] name = "huggingface-hub" version = "1.7.2" @@ -1154,6 +1310,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" }, ] +[[package]] +name = "imageio" +version = "2.37.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "pillow" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/84/93bcd1300216ea50811cee96873b84a1bebf8d0489ffaf7f2a3756bab866/imageio-2.37.3.tar.gz", hash = "sha256:bbb37efbfc4c400fcd534b367b91fcd66d5da639aaa138034431a1c5e0a41451", size = 389673, upload-time = "2026-03-09T11:31:12.573Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/49/fa/391e437a34e55095173dca5f24070d89cbc233ff85bf1c29c93248c6588d/imageio-2.37.3-py3-none-any.whl", hash = "sha256:46f5bb8522cd421c0f5ae104d8268f569d856b29eb1a13b92829d1970f32c9f0", size = 317646, upload-time = "2026-03-09T11:31:10.771Z" }, +] + [[package]] name = "importlib-metadata" version = "8.7.1" @@ -1206,9 +1375,13 @@ dependencies = [ { name = "langsmith" }, { name = "llama-index" }, { name = "llama-index-vector-stores-chroma" }, + { name = "mineru" }, + { name = "openpyxl" }, { name = "passlib", extra = ["bcrypt"] }, + { name = "psutil" }, { name = "pydantic" }, { name = "pydantic-settings" }, + { name = "python-docx" }, { name = "python-dotenv" }, { name = "python-jose", extra = ["cryptography"] }, { name = "python-multipart" }, @@ -1246,14 +1419,18 @@ requires-dist = [ { name = "langsmith", specifier = ">=0.1.0" }, { name = "llama-index", specifier = ">=0.12.0" }, { name = "llama-index-vector-stores-chroma", specifier = ">=0.3.0" }, + { name = "mineru", specifier = ">=2.0.3" }, { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.10.0" }, + { name = "openpyxl", specifier = ">=3.1.0" }, { name = "passlib", extras = ["bcrypt"], specifier = ">=1.7.4" }, { name = "pre-commit", marker = "extra == 'dev'", specifier = ">=3.7.0" }, + { name = "psutil", specifier = ">=6.1.0" }, { name = "pydantic", specifier = ">=2.0.0" }, { name = "pydantic-settings", specifier = ">=2.0.0" }, { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0.0" }, { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.23.0" }, { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=4.1.0" }, + { name = "python-docx", specifier = ">=1.1.0" }, { name = "python-dotenv", specifier = ">=1.0.0" }, { name = "python-jose", extras = ["cryptography"], specifier = ">=3.3.0" }, { name = "python-multipart", specifier = ">=0.0.12" }, @@ -1344,6 +1521,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/67/8a/a342b2f0251f3dac4ca17618265d93bf244a2a4d089126e81e4c1056ac50/jiter-0.13.0-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7bb00b6d26db67a05fe3e12c76edc75f32077fb51deed13822dc648fa373bc19", size = 343768, upload-time = "2026-02-02T12:37:55.055Z" }, ] +[[package]] +name = "jmespath" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d3/59/322338183ecda247fb5d1763a6cbe46eff7222eaeebafd9fa65d4bf5cb11/jmespath-1.1.0.tar.gz", hash = "sha256:472c87d80f36026ae83c6ddd0f1d05d4e510134ed462851fd5f754c8c3cbb88d", size = 27377, upload-time = "2026-01-22T16:35:26.279Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/14/2f/967ba146e6d58cf6a652da73885f52fc68001525b4197effc174321d70b4/jmespath-1.1.0-py3-none-any.whl", hash = "sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64", size = 20419, upload-time = "2026-01-22T16:35:24.919Z" }, +] + [[package]] name = "joblib" version = "1.5.3" @@ -1353,6 +1539,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7b/91/984aca2ec129e2757d1e4e3c81c3fcda9d0f85b74670a094cc443d9ee949/joblib-1.5.3-py3-none-any.whl", hash = "sha256:5fc3c5039fc5ca8c0276333a188bbd59d6b7ab37fe6632daa76bc7f9ec18e713", size = 309071, upload-time = "2025-12-15T08:41:44.973Z" }, ] +[[package]] +name = "json-repair" +version = "0.58.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/76/70/484f97a744d2614218a2b162004accda3f3c4ccc8c5d688712624567ebec/json_repair-0.58.6.tar.gz", hash = "sha256:aa740113a1c9dede4ba84c29aa8f81493253aede6f0e4edde9a560ec4b1d7762", size = 44804, upload-time = "2026-03-16T13:43:34.722Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/da/bb/c019ac05a6923c5776fa134c65e5b19d216ef17227618d93b1f608bc2806/json_repair-0.58.6-py3-none-any.whl", hash = "sha256:e438a1e4ea03179dfe9a05dfd738e678e888f1ea5b4a40398f8f220925df1c5c", size = 43482, upload-time = "2026-03-16T13:43:33.569Z" }, +] + [[package]] name = "jsonpatch" version = "1.33" @@ -1557,6 +1752,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1a/94/1f5d72655ab6534129540843776c40eff757387b88e798d8b3bf7e313fd4/langsmith-0.7.22-py3-none-any.whl", hash = "sha256:6e9d5148314d74e86748cb9d3898632cad0320c9323d95f70f969e5bc078eee4", size = 359927, upload-time = "2026-03-19T22:45:21.603Z" }, ] +[[package]] +name = "lazy-loader" +version = "0.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "packaging" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/49/ac/21a1f8aa3777f5658576777ea76bfb124b702c520bbe90edf4ae9915eafa/lazy_loader-0.5.tar.gz", hash = "sha256:717f9179a0dbed357012ddad50a5ad3d5e4d9a0b8712680d4e687f5e6e6ed9b3", size = 15294, upload-time = "2026-03-06T15:45:09.054Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8a/a1/8d812e53a5da1687abb10445275d41a8b13adb781bbf7196ddbcf8d88505/lazy_loader-0.5-py3-none-any.whl", hash = "sha256:ab0ea149e9c554d4ffeeb21105ac60bed7f3b4fd69b1d2360a4add51b170b005", size = 8044, upload-time = "2026-03-06T15:45:07.668Z" }, +] + [[package]] name = "librt" version = "0.8.1" @@ -1811,6 +2018,115 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8c/7c/203b7ffc633b9c0823f0d0701e361e002b93bf4e493f4c494d4bd5934c0b/llama_parse-0.5.20-py3-none-any.whl", hash = "sha256:9617edb3428d3218ea01f1708f0b6105f3ffef142fedbeb8c98d50082c37e226", size = 16163, upload-time = "2025-01-22T21:04:20.751Z" }, ] +[[package]] +name = "loguru" +version = "0.7.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "win32-setctime", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3a/05/a1dae3dffd1116099471c643b8924f5aa6524411dc6c63fdae648c4f1aca/loguru-0.7.3.tar.gz", hash = "sha256:19480589e77d47b8d85b2c827ad95d49bf31b0dcde16593892eb51dd18706eb6", size = 63559, upload-time = "2024-12-06T11:20:56.608Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/29/0348de65b8cc732daa3e33e67806420b2ae89bdce2b04af740289c5c6c8c/loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c", size = 61595, upload-time = "2024-12-06T11:20:54.538Z" }, +] + +[[package]] +name = "lxml" +version = "6.0.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/aa/88/262177de60548e5a2bfc46ad28232c9e9cbde697bd94132aeb80364675cb/lxml-6.0.2.tar.gz", hash = "sha256:cd79f3367bd74b317dda655dc8fcfa304d9eb6e4fb06b7168c5cf27f96e0cd62", size = 4073426, upload-time = "2025-09-22T04:04:59.287Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f3/c8/8ff2bc6b920c84355146cd1ab7d181bc543b89241cfb1ebee824a7c81457/lxml-6.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:a59f5448ba2ceccd06995c95ea59a7674a10de0810f2ce90c9006f3cbc044456", size = 8661887, upload-time = "2025-09-22T04:01:17.265Z" }, + { url = "https://files.pythonhosted.org/packages/37/6f/9aae1008083bb501ef63284220ce81638332f9ccbfa53765b2b7502203cf/lxml-6.0.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e8113639f3296706fbac34a30813929e29247718e88173ad849f57ca59754924", size = 4667818, upload-time = "2025-09-22T04:01:19.688Z" }, + { url = "https://files.pythonhosted.org/packages/f1/ca/31fb37f99f37f1536c133476674c10b577e409c0a624384147653e38baf2/lxml-6.0.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a8bef9b9825fa8bc816a6e641bb67219489229ebc648be422af695f6e7a4fa7f", size = 4950807, upload-time = "2025-09-22T04:01:21.487Z" }, + { url = "https://files.pythonhosted.org/packages/da/87/f6cb9442e4bada8aab5ae7e1046264f62fdbeaa6e3f6211b93f4c0dd97f1/lxml-6.0.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:65ea18d710fd14e0186c2f973dc60bb52039a275f82d3c44a0e42b43440ea534", size = 5109179, upload-time = "2025-09-22T04:01:23.32Z" }, + { url = "https://files.pythonhosted.org/packages/c8/20/a7760713e65888db79bbae4f6146a6ae5c04e4a204a3c48896c408cd6ed2/lxml-6.0.2-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c371aa98126a0d4c739ca93ceffa0fd7a5d732e3ac66a46e74339acd4d334564", size = 5023044, upload-time = "2025-09-22T04:01:25.118Z" }, + { url = "https://files.pythonhosted.org/packages/a2/b0/7e64e0460fcb36471899f75831509098f3fd7cd02a3833ac517433cb4f8f/lxml-6.0.2-cp312-cp312-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:700efd30c0fa1a3581d80a748157397559396090a51d306ea59a70020223d16f", size = 5359685, upload-time = "2025-09-22T04:01:27.398Z" }, + { url = "https://files.pythonhosted.org/packages/b9/e1/e5df362e9ca4e2f48ed6411bd4b3a0ae737cc842e96877f5bf9428055ab4/lxml-6.0.2-cp312-cp312-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c33e66d44fe60e72397b487ee92e01da0d09ba2d66df8eae42d77b6d06e5eba0", size = 5654127, upload-time = "2025-09-22T04:01:29.629Z" }, + { url = "https://files.pythonhosted.org/packages/c6/d1/232b3309a02d60f11e71857778bfcd4acbdb86c07db8260caf7d008b08f8/lxml-6.0.2-cp312-cp312-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:90a345bbeaf9d0587a3aaffb7006aa39ccb6ff0e96a57286c0cb2fd1520ea192", size = 5253958, upload-time = "2025-09-22T04:01:31.535Z" }, + { url = "https://files.pythonhosted.org/packages/35/35/d955a070994725c4f7d80583a96cab9c107c57a125b20bb5f708fe941011/lxml-6.0.2-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:064fdadaf7a21af3ed1dcaa106b854077fbeada827c18f72aec9346847cd65d0", size = 4711541, upload-time = "2025-09-22T04:01:33.801Z" }, + { url = "https://files.pythonhosted.org/packages/1e/be/667d17363b38a78c4bd63cfd4b4632029fd68d2c2dc81f25ce9eb5224dd5/lxml-6.0.2-cp312-cp312-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fbc74f42c3525ac4ffa4b89cbdd00057b6196bcefe8bce794abd42d33a018092", size = 5267426, upload-time = "2025-09-22T04:01:35.639Z" }, + { url = "https://files.pythonhosted.org/packages/ea/47/62c70aa4a1c26569bc958c9ca86af2bb4e1f614e8c04fb2989833874f7ae/lxml-6.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6ddff43f702905a4e32bc24f3f2e2edfe0f8fde3277d481bffb709a4cced7a1f", size = 5064917, upload-time = "2025-09-22T04:01:37.448Z" }, + { url = "https://files.pythonhosted.org/packages/bd/55/6ceddaca353ebd0f1908ef712c597f8570cc9c58130dbb89903198e441fd/lxml-6.0.2-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:6da5185951d72e6f5352166e3da7b0dc27aa70bd1090b0eb3f7f7212b53f1bb8", size = 4788795, upload-time = "2025-09-22T04:01:39.165Z" }, + { url = "https://files.pythonhosted.org/packages/cf/e8/fd63e15da5e3fd4c2146f8bbb3c14e94ab850589beab88e547b2dbce22e1/lxml-6.0.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:57a86e1ebb4020a38d295c04fc79603c7899e0df71588043eb218722dabc087f", size = 5676759, upload-time = "2025-09-22T04:01:41.506Z" }, + { url = "https://files.pythonhosted.org/packages/76/47/b3ec58dc5c374697f5ba37412cd2728f427d056315d124dd4b61da381877/lxml-6.0.2-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:2047d8234fe735ab77802ce5f2297e410ff40f5238aec569ad7c8e163d7b19a6", size = 5255666, upload-time = "2025-09-22T04:01:43.363Z" }, + { url = "https://files.pythonhosted.org/packages/19/93/03ba725df4c3d72afd9596eef4a37a837ce8e4806010569bedfcd2cb68fd/lxml-6.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6f91fd2b2ea15a6800c8e24418c0775a1694eefc011392da73bc6cef2623b322", size = 5277989, upload-time = "2025-09-22T04:01:45.215Z" }, + { url = "https://files.pythonhosted.org/packages/c6/80/c06de80bfce881d0ad738576f243911fccf992687ae09fd80b734712b39c/lxml-6.0.2-cp312-cp312-win32.whl", hash = "sha256:3ae2ce7d6fedfb3414a2b6c5e20b249c4c607f72cb8d2bb7cc9c6ec7c6f4e849", size = 3611456, upload-time = "2025-09-22T04:01:48.243Z" }, + { url = "https://files.pythonhosted.org/packages/f7/d7/0cdfb6c3e30893463fb3d1e52bc5f5f99684a03c29a0b6b605cfae879cd5/lxml-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:72c87e5ee4e58a8354fb9c7c84cbf95a1c8236c127a5d1b7683f04bed8361e1f", size = 4011793, upload-time = "2025-09-22T04:01:50.042Z" }, + { url = "https://files.pythonhosted.org/packages/ea/7b/93c73c67db235931527301ed3785f849c78991e2e34f3fd9a6663ffda4c5/lxml-6.0.2-cp312-cp312-win_arm64.whl", hash = "sha256:61cb10eeb95570153e0c0e554f58df92ecf5109f75eacad4a95baa709e26c3d6", size = 3672836, upload-time = "2025-09-22T04:01:52.145Z" }, + { url = "https://files.pythonhosted.org/packages/53/fd/4e8f0540608977aea078bf6d79f128e0e2c2bba8af1acf775c30baa70460/lxml-6.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:9b33d21594afab46f37ae58dfadd06636f154923c4e8a4d754b0127554eb2e77", size = 8648494, upload-time = "2025-09-22T04:01:54.242Z" }, + { url = "https://files.pythonhosted.org/packages/5d/f4/2a94a3d3dfd6c6b433501b8d470a1960a20ecce93245cf2db1706adf6c19/lxml-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:6c8963287d7a4c5c9a432ff487c52e9c5618667179c18a204bdedb27310f022f", size = 4661146, upload-time = "2025-09-22T04:01:56.282Z" }, + { url = "https://files.pythonhosted.org/packages/25/2e/4efa677fa6b322013035d38016f6ae859d06cac67437ca7dc708a6af7028/lxml-6.0.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1941354d92699fb5ffe6ed7b32f9649e43c2feb4b97205f75866f7d21aa91452", size = 4946932, upload-time = "2025-09-22T04:01:58.989Z" }, + { url = "https://files.pythonhosted.org/packages/ce/0f/526e78a6d38d109fdbaa5049c62e1d32fdd70c75fb61c4eadf3045d3d124/lxml-6.0.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bb2f6ca0ae2d983ded09357b84af659c954722bbf04dea98030064996d156048", size = 5100060, upload-time = "2025-09-22T04:02:00.812Z" }, + { url = "https://files.pythonhosted.org/packages/81/76/99de58d81fa702cc0ea7edae4f4640416c2062813a00ff24bd70ac1d9c9b/lxml-6.0.2-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eb2a12d704f180a902d7fa778c6d71f36ceb7b0d317f34cdc76a5d05aa1dd1df", size = 5019000, upload-time = "2025-09-22T04:02:02.671Z" }, + { url = "https://files.pythonhosted.org/packages/b5/35/9e57d25482bc9a9882cb0037fdb9cc18f4b79d85df94fa9d2a89562f1d25/lxml-6.0.2-cp313-cp313-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:6ec0e3f745021bfed19c456647f0298d60a24c9ff86d9d051f52b509663feeb1", size = 5348496, upload-time = "2025-09-22T04:02:04.904Z" }, + { url = "https://files.pythonhosted.org/packages/a6/8e/cb99bd0b83ccc3e8f0f528e9aa1f7a9965dfec08c617070c5db8d63a87ce/lxml-6.0.2-cp313-cp313-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:846ae9a12d54e368933b9759052d6206a9e8b250291109c48e350c1f1f49d916", size = 5643779, upload-time = "2025-09-22T04:02:06.689Z" }, + { url = "https://files.pythonhosted.org/packages/d0/34/9e591954939276bb679b73773836c6684c22e56d05980e31d52a9a8deb18/lxml-6.0.2-cp313-cp313-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ef9266d2aa545d7374938fb5c484531ef5a2ec7f2d573e62f8ce722c735685fd", size = 5244072, upload-time = "2025-09-22T04:02:08.587Z" }, + { url = "https://files.pythonhosted.org/packages/8d/27/b29ff065f9aaca443ee377aff699714fcbffb371b4fce5ac4ca759e436d5/lxml-6.0.2-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:4077b7c79f31755df33b795dc12119cb557a0106bfdab0d2c2d97bd3cf3dffa6", size = 4718675, upload-time = "2025-09-22T04:02:10.783Z" }, + { url = "https://files.pythonhosted.org/packages/2b/9f/f756f9c2cd27caa1a6ef8c32ae47aadea697f5c2c6d07b0dae133c244fbe/lxml-6.0.2-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a7c5d5e5f1081955358533be077166ee97ed2571d6a66bdba6ec2f609a715d1a", size = 5255171, upload-time = "2025-09-22T04:02:12.631Z" }, + { url = "https://files.pythonhosted.org/packages/61/46/bb85ea42d2cb1bd8395484fd72f38e3389611aa496ac7772da9205bbda0e/lxml-6.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8f8d0cbd0674ee89863a523e6994ac25fd5be9c8486acfc3e5ccea679bad2679", size = 5057175, upload-time = "2025-09-22T04:02:14.718Z" }, + { url = "https://files.pythonhosted.org/packages/95/0c/443fc476dcc8e41577f0af70458c50fe299a97bb6b7505bb1ae09aa7f9ac/lxml-6.0.2-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:2cbcbf6d6e924c28f04a43f3b6f6e272312a090f269eff68a2982e13e5d57659", size = 4785688, upload-time = "2025-09-22T04:02:16.957Z" }, + { url = "https://files.pythonhosted.org/packages/48/78/6ef0b359d45bb9697bc5a626e1992fa5d27aa3f8004b137b2314793b50a0/lxml-6.0.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:dfb874cfa53340009af6bdd7e54ebc0d21012a60a4e65d927c2e477112e63484", size = 5660655, upload-time = "2025-09-22T04:02:18.815Z" }, + { url = "https://files.pythonhosted.org/packages/ff/ea/e1d33808f386bc1339d08c0dcada6e4712d4ed8e93fcad5f057070b7988a/lxml-6.0.2-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:fb8dae0b6b8b7f9e96c26fdd8121522ce5de9bb5538010870bd538683d30e9a2", size = 5247695, upload-time = "2025-09-22T04:02:20.593Z" }, + { url = "https://files.pythonhosted.org/packages/4f/47/eba75dfd8183673725255247a603b4ad606f4ae657b60c6c145b381697da/lxml-6.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:358d9adae670b63e95bc59747c72f4dc97c9ec58881d4627fe0120da0f90d314", size = 5269841, upload-time = "2025-09-22T04:02:22.489Z" }, + { url = "https://files.pythonhosted.org/packages/76/04/5c5e2b8577bc936e219becb2e98cdb1aca14a4921a12995b9d0c523502ae/lxml-6.0.2-cp313-cp313-win32.whl", hash = "sha256:e8cd2415f372e7e5a789d743d133ae474290a90b9023197fd78f32e2dc6873e2", size = 3610700, upload-time = "2025-09-22T04:02:24.465Z" }, + { url = "https://files.pythonhosted.org/packages/fe/0a/4643ccc6bb8b143e9f9640aa54e38255f9d3b45feb2cbe7ae2ca47e8782e/lxml-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:b30d46379644fbfc3ab81f8f82ae4de55179414651f110a1514f0b1f8f6cb2d7", size = 4010347, upload-time = "2025-09-22T04:02:26.286Z" }, + { url = "https://files.pythonhosted.org/packages/31/ef/dcf1d29c3f530577f61e5fe2f1bd72929acf779953668a8a47a479ae6f26/lxml-6.0.2-cp313-cp313-win_arm64.whl", hash = "sha256:13dcecc9946dca97b11b7c40d29fba63b55ab4170d3c0cf8c0c164343b9bfdcf", size = 3671248, upload-time = "2025-09-22T04:02:27.918Z" }, + { url = "https://files.pythonhosted.org/packages/03/15/d4a377b385ab693ce97b472fe0c77c2b16ec79590e688b3ccc71fba19884/lxml-6.0.2-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:b0c732aa23de8f8aec23f4b580d1e52905ef468afb4abeafd3fec77042abb6fe", size = 8659801, upload-time = "2025-09-22T04:02:30.113Z" }, + { url = "https://files.pythonhosted.org/packages/c8/e8/c128e37589463668794d503afaeb003987373c5f94d667124ffd8078bbd9/lxml-6.0.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:4468e3b83e10e0317a89a33d28f7aeba1caa4d1a6fd457d115dd4ffe90c5931d", size = 4659403, upload-time = "2025-09-22T04:02:32.119Z" }, + { url = "https://files.pythonhosted.org/packages/00/ce/74903904339decdf7da7847bb5741fc98a5451b42fc419a86c0c13d26fe2/lxml-6.0.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:abd44571493973bad4598a3be7e1d807ed45aa2adaf7ab92ab7c62609569b17d", size = 4966974, upload-time = "2025-09-22T04:02:34.155Z" }, + { url = "https://files.pythonhosted.org/packages/1f/d3/131dec79ce61c5567fecf82515bd9bc36395df42501b50f7f7f3bd065df0/lxml-6.0.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:370cd78d5855cfbffd57c422851f7d3864e6ae72d0da615fca4dad8c45d375a5", size = 5102953, upload-time = "2025-09-22T04:02:36.054Z" }, + { url = "https://files.pythonhosted.org/packages/3a/ea/a43ba9bb750d4ffdd885f2cd333572f5bb900cd2408b67fdda07e85978a0/lxml-6.0.2-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:901e3b4219fa04ef766885fb40fa516a71662a4c61b80c94d25336b4934b71c0", size = 5055054, upload-time = "2025-09-22T04:02:38.154Z" }, + { url = "https://files.pythonhosted.org/packages/60/23/6885b451636ae286c34628f70a7ed1fcc759f8d9ad382d132e1c8d3d9bfd/lxml-6.0.2-cp314-cp314-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:a4bf42d2e4cf52c28cc1812d62426b9503cdb0c87a6de81442626aa7d69707ba", size = 5352421, upload-time = "2025-09-22T04:02:40.413Z" }, + { url = "https://files.pythonhosted.org/packages/48/5b/fc2ddfc94ddbe3eebb8e9af6e3fd65e2feba4967f6a4e9683875c394c2d8/lxml-6.0.2-cp314-cp314-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b2c7fdaa4d7c3d886a42534adec7cfac73860b89b4e5298752f60aa5984641a0", size = 5673684, upload-time = "2025-09-22T04:02:42.288Z" }, + { url = "https://files.pythonhosted.org/packages/29/9c/47293c58cc91769130fbf85531280e8cc7868f7fbb6d92f4670071b9cb3e/lxml-6.0.2-cp314-cp314-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:98a5e1660dc7de2200b00d53fa00bcd3c35a3608c305d45a7bbcaf29fa16e83d", size = 5252463, upload-time = "2025-09-22T04:02:44.165Z" }, + { url = "https://files.pythonhosted.org/packages/9b/da/ba6eceb830c762b48e711ded880d7e3e89fc6c7323e587c36540b6b23c6b/lxml-6.0.2-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:dc051506c30b609238d79eda75ee9cab3e520570ec8219844a72a46020901e37", size = 4698437, upload-time = "2025-09-22T04:02:46.524Z" }, + { url = "https://files.pythonhosted.org/packages/a5/24/7be3f82cb7990b89118d944b619e53c656c97dc89c28cfb143fdb7cd6f4d/lxml-6.0.2-cp314-cp314-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8799481bbdd212470d17513a54d568f44416db01250f49449647b5ab5b5dccb9", size = 5269890, upload-time = "2025-09-22T04:02:48.812Z" }, + { url = "https://files.pythonhosted.org/packages/1b/bd/dcfb9ea1e16c665efd7538fc5d5c34071276ce9220e234217682e7d2c4a5/lxml-6.0.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:9261bb77c2dab42f3ecd9103951aeca2c40277701eb7e912c545c1b16e0e4917", size = 5097185, upload-time = "2025-09-22T04:02:50.746Z" }, + { url = "https://files.pythonhosted.org/packages/21/04/a60b0ff9314736316f28316b694bccbbabe100f8483ad83852d77fc7468e/lxml-6.0.2-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:65ac4a01aba353cfa6d5725b95d7aed6356ddc0a3cd734de00124d285b04b64f", size = 4745895, upload-time = "2025-09-22T04:02:52.968Z" }, + { url = "https://files.pythonhosted.org/packages/d6/bd/7d54bd1846e5a310d9c715921c5faa71cf5c0853372adf78aee70c8d7aa2/lxml-6.0.2-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:b22a07cbb82fea98f8a2fd814f3d1811ff9ed76d0fc6abc84eb21527596e7cc8", size = 5695246, upload-time = "2025-09-22T04:02:54.798Z" }, + { url = "https://files.pythonhosted.org/packages/fd/32/5643d6ab947bc371da21323acb2a6e603cedbe71cb4c99c8254289ab6f4e/lxml-6.0.2-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:d759cdd7f3e055d6bc8d9bec3ad905227b2e4c785dc16c372eb5b5e83123f48a", size = 5260797, upload-time = "2025-09-22T04:02:57.058Z" }, + { url = "https://files.pythonhosted.org/packages/33/da/34c1ec4cff1eea7d0b4cd44af8411806ed943141804ac9c5d565302afb78/lxml-6.0.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:945da35a48d193d27c188037a05fec5492937f66fb1958c24fc761fb9d40d43c", size = 5277404, upload-time = "2025-09-22T04:02:58.966Z" }, + { url = "https://files.pythonhosted.org/packages/82/57/4eca3e31e54dc89e2c3507e1cd411074a17565fa5ffc437c4ae0a00d439e/lxml-6.0.2-cp314-cp314-win32.whl", hash = "sha256:be3aaa60da67e6153eb15715cc2e19091af5dc75faef8b8a585aea372507384b", size = 3670072, upload-time = "2025-09-22T04:03:38.05Z" }, + { url = "https://files.pythonhosted.org/packages/e3/e0/c96cf13eccd20c9421ba910304dae0f619724dcf1702864fd59dd386404d/lxml-6.0.2-cp314-cp314-win_amd64.whl", hash = "sha256:fa25afbadead523f7001caf0c2382afd272c315a033a7b06336da2637d92d6ed", size = 4080617, upload-time = "2025-09-22T04:03:39.835Z" }, + { url = "https://files.pythonhosted.org/packages/d5/5d/b3f03e22b3d38d6f188ef044900a9b29b2fe0aebb94625ce9fe244011d34/lxml-6.0.2-cp314-cp314-win_arm64.whl", hash = "sha256:063eccf89df5b24e361b123e257e437f9e9878f425ee9aae3144c77faf6da6d8", size = 3754930, upload-time = "2025-09-22T04:03:41.565Z" }, + { url = "https://files.pythonhosted.org/packages/5e/5c/42c2c4c03554580708fc738d13414801f340c04c3eff90d8d2d227145275/lxml-6.0.2-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:6162a86d86893d63084faaf4ff937b3daea233e3682fb4474db07395794fa80d", size = 8910380, upload-time = "2025-09-22T04:03:01.645Z" }, + { url = "https://files.pythonhosted.org/packages/bf/4f/12df843e3e10d18d468a7557058f8d3733e8b6e12401f30b1ef29360740f/lxml-6.0.2-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:414aaa94e974e23a3e92e7ca5b97d10c0cf37b6481f50911032c69eeb3991bba", size = 4775632, upload-time = "2025-09-22T04:03:03.814Z" }, + { url = "https://files.pythonhosted.org/packages/e4/0c/9dc31e6c2d0d418483cbcb469d1f5a582a1cd00a1f4081953d44051f3c50/lxml-6.0.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:48461bd21625458dd01e14e2c38dd0aea69addc3c4f960c30d9f59d7f93be601", size = 4975171, upload-time = "2025-09-22T04:03:05.651Z" }, + { url = "https://files.pythonhosted.org/packages/e7/2b/9b870c6ca24c841bdd887504808f0417aa9d8d564114689266f19ddf29c8/lxml-6.0.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:25fcc59afc57d527cfc78a58f40ab4c9b8fd096a9a3f964d2781ffb6eb33f4ed", size = 5110109, upload-time = "2025-09-22T04:03:07.452Z" }, + { url = "https://files.pythonhosted.org/packages/bf/0c/4f5f2a4dd319a178912751564471355d9019e220c20d7db3fb8307ed8582/lxml-6.0.2-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5179c60288204e6ddde3f774a93350177e08876eaf3ab78aa3a3649d43eb7d37", size = 5041061, upload-time = "2025-09-22T04:03:09.297Z" }, + { url = "https://files.pythonhosted.org/packages/12/64/554eed290365267671fe001a20d72d14f468ae4e6acef1e179b039436967/lxml-6.0.2-cp314-cp314t-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:967aab75434de148ec80597b75062d8123cadf2943fb4281f385141e18b21338", size = 5306233, upload-time = "2025-09-22T04:03:11.651Z" }, + { url = "https://files.pythonhosted.org/packages/7a/31/1d748aa275e71802ad9722df32a7a35034246b42c0ecdd8235412c3396ef/lxml-6.0.2-cp314-cp314t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d100fcc8930d697c6561156c6810ab4a508fb264c8b6779e6e61e2ed5e7558f9", size = 5604739, upload-time = "2025-09-22T04:03:13.592Z" }, + { url = "https://files.pythonhosted.org/packages/8f/41/2c11916bcac09ed561adccacceaedd2bf0e0b25b297ea92aab99fd03d0fa/lxml-6.0.2-cp314-cp314t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2ca59e7e13e5981175b8b3e4ab84d7da57993eeff53c07764dcebda0d0e64ecd", size = 5225119, upload-time = "2025-09-22T04:03:15.408Z" }, + { url = "https://files.pythonhosted.org/packages/99/05/4e5c2873d8f17aa018e6afde417c80cc5d0c33be4854cce3ef5670c49367/lxml-6.0.2-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:957448ac63a42e2e49531b9d6c0fa449a1970dbc32467aaad46f11545be9af1d", size = 4633665, upload-time = "2025-09-22T04:03:17.262Z" }, + { url = "https://files.pythonhosted.org/packages/0f/c9/dcc2da1bebd6275cdc723b515f93edf548b82f36a5458cca3578bc899332/lxml-6.0.2-cp314-cp314t-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b7fc49c37f1786284b12af63152fe1d0990722497e2d5817acfe7a877522f9a9", size = 5234997, upload-time = "2025-09-22T04:03:19.14Z" }, + { url = "https://files.pythonhosted.org/packages/9c/e2/5172e4e7468afca64a37b81dba152fc5d90e30f9c83c7c3213d6a02a5ce4/lxml-6.0.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e19e0643cc936a22e837f79d01a550678da8377d7d801a14487c10c34ee49c7e", size = 5090957, upload-time = "2025-09-22T04:03:21.436Z" }, + { url = "https://files.pythonhosted.org/packages/a5/b3/15461fd3e5cd4ddcb7938b87fc20b14ab113b92312fc97afe65cd7c85de1/lxml-6.0.2-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:1db01e5cf14345628e0cbe71067204db658e2fb8e51e7f33631f5f4735fefd8d", size = 4764372, upload-time = "2025-09-22T04:03:23.27Z" }, + { url = "https://files.pythonhosted.org/packages/05/33/f310b987c8bf9e61c4dd8e8035c416bd3230098f5e3cfa69fc4232de7059/lxml-6.0.2-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:875c6b5ab39ad5291588aed6925fac99d0097af0dd62f33c7b43736043d4a2ec", size = 5634653, upload-time = "2025-09-22T04:03:25.767Z" }, + { url = "https://files.pythonhosted.org/packages/70/ff/51c80e75e0bc9382158133bdcf4e339b5886c6ee2418b5199b3f1a61ed6d/lxml-6.0.2-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:cdcbed9ad19da81c480dfd6dd161886db6096083c9938ead313d94b30aadf272", size = 5233795, upload-time = "2025-09-22T04:03:27.62Z" }, + { url = "https://files.pythonhosted.org/packages/56/4d/4856e897df0d588789dd844dbed9d91782c4ef0b327f96ce53c807e13128/lxml-6.0.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:80dadc234ebc532e09be1975ff538d154a7fa61ea5031c03d25178855544728f", size = 5257023, upload-time = "2025-09-22T04:03:30.056Z" }, + { url = "https://files.pythonhosted.org/packages/0f/85/86766dfebfa87bea0ab78e9ff7a4b4b45225df4b4d3b8cc3c03c5cd68464/lxml-6.0.2-cp314-cp314t-win32.whl", hash = "sha256:da08e7bb297b04e893d91087df19638dc7a6bb858a954b0cc2b9f5053c922312", size = 3911420, upload-time = "2025-09-22T04:03:32.198Z" }, + { url = "https://files.pythonhosted.org/packages/fe/1a/b248b355834c8e32614650b8008c69ffeb0ceb149c793961dd8c0b991bb3/lxml-6.0.2-cp314-cp314t-win_amd64.whl", hash = "sha256:252a22982dca42f6155125ac76d3432e548a7625d56f5a273ee78a5057216eca", size = 4406837, upload-time = "2025-09-22T04:03:34.027Z" }, + { url = "https://files.pythonhosted.org/packages/92/aa/df863bcc39c5e0946263454aba394de8a9084dbaff8ad143846b0d844739/lxml-6.0.2-cp314-cp314t-win_arm64.whl", hash = "sha256:bb4c1847b303835d89d785a18801a883436cdfd5dc3d62947f9c49e24f0f5a2c", size = 3822205, upload-time = "2025-09-22T04:03:36.249Z" }, +] + +[[package]] +name = "magika" +version = "1.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "onnxruntime" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/79/ca/dfb30534be5ad84363e0e8ce08bc6e990ce0430aec1eaafb0633b4bb3f7f/magika-1.0.2.tar.gz", hash = "sha256:8ed912d8f14d044f43fdbd17d6bd2cbdd6e8b8246e89be49f6cd547053636677", size = 3041955, upload-time = "2026-02-25T16:07:03.805Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/12/46/b8180a34c64470e2f40a3676ef3284a32efd2b3598aa99946ee319eb66e8/magika-1.0.2-py3-none-any.whl", hash = "sha256:c50be7a6a7132ef1a92956694401aaf911bda8fc5e2a591092e0dac5b5865a8a", size = 2969547, upload-time = "2026-02-25T16:06:55.987Z" }, + { url = "https://files.pythonhosted.org/packages/38/f3/a65650c36a472fed1ca1c4868e567cf015c14c73a6bb5fa4a808932e0944/magika-1.0.2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:1db8e2d57556e7244f5fce9cfd023aa0da05d204ea7313f3c75b32feab2bcd6d", size = 13811935, upload-time = "2026-02-25T16:06:57.589Z" }, + { url = "https://files.pythonhosted.org/packages/ba/9e/429608833917b7d4c4f7071a270bbca96821fb592e275d85bc9eae5a94c8/magika-1.0.2-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:df4706c18153431548b1d36c8ca11c8a8a415197dcc741281846c61ebfc94a5b", size = 15924817, upload-time = "2026-02-25T16:06:59.765Z" }, + { url = "https://files.pythonhosted.org/packages/1a/12/185a8822994a2f7b5e7d88d19a88d80637917bbb0a6f3f59a2564aabc125/magika-1.0.2-py3-none-win_amd64.whl", hash = "sha256:4937e876d55642423d6416e5db4e5ca7523ab7f855cbc5389efdeac1d149df04", size = 13099543, upload-time = "2026-02-25T16:07:01.942Z" }, +] + [[package]] name = "mako" version = "1.3.10" @@ -1919,6 +2235,56 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, ] +[[package]] +name = "mineru" +version = "2.7.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "beautifulsoup4" }, + { name = "boto3" }, + { name = "click" }, + { name = "fast-langdetect" }, + { name = "httpx" }, + { name = "huggingface-hub" }, + { name = "json-repair" }, + { name = "loguru" }, + { name = "magika" }, + { name = "mineru-vl-utils" }, + { name = "modelscope" }, + { name = "numpy" }, + { name = "openai" }, + { name = "opencv-python" }, + { name = "pdfminer-six" }, + { name = "pdftext" }, + { name = "pillow" }, + { name = "pypdf" }, + { name = "pypdfium2" }, + { name = "qwen-vl-utils" }, + { name = "reportlab" }, + { name = "requests" }, + { name = "scikit-image" }, + { name = "tqdm" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/44/99/a4cf2751c4055d25d16e915215c031ae362bf4bf198efed4991161c6159e/mineru-2.7.6-py3-none-any.whl", hash = "sha256:b85ea4cef26397013e92dfdf3a32e68b422e233d27ba103a152843559fd7eb51", size = 1305425, upload-time = "2026-02-06T03:40:00.951Z" }, +] + +[[package]] +name = "mineru-vl-utils" +version = "0.1.22" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "aiofiles" }, + { name = "httpx" }, + { name = "httpx-retries" }, + { name = "loguru" }, + { name = "pillow" }, + { name = "pydantic" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/21/22/40fe2009c3effaaf054964e17e4ef80bb0c2becd290e9da06ea593aedc5d/mineru_vl_utils-0.1.22-py3-none-any.whl", hash = "sha256:75e6dbc2720eb0275717e6d7b6438aa1033716120aeab544c81c914b4189bdf2", size = 59498, upload-time = "2026-01-22T06:21:59.044Z" }, +] + [[package]] name = "mmh3" version = "5.2.1" @@ -2001,6 +2367,23 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a0/0f/59204bf136d1201f8d7884cfbaf7498c5b4674e87a4c693f9bde63741ce1/mmh3-5.2.1-cp314-cp314t-win_arm64.whl", hash = "sha256:dfd51b4c56b673dfbc43d7d27ef857dd91124801e2806c69bb45585ce0fa019b", size = 40391, upload-time = "2026-03-05T15:55:56.697Z" }, ] +[[package]] +name = "modelscope" +version = "1.35.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "filelock" }, + { name = "packaging" }, + { name = "requests" }, + { name = "setuptools" }, + { name = "tqdm" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3d/fc/5822a2fa4f16054a74edf7949090cda86c8e80b3fa6e52d726a17caf2bb1/modelscope-1.35.1.tar.gz", hash = "sha256:b68eb8a8169f74766c3a7d8a95805714174d082d5568d6b281740536e7cc9f19", size = 4561746, upload-time = "2026-03-19T06:53:02.769Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/71/d3/c98f736bbb5739871214e567ef9a1f5fca65f10d1b7bdc5e1bd565d492cf/modelscope-1.35.1-py3-none-any.whl", hash = "sha256:364db742867988da6be0493e0b9c4fd3e13bb0f5dd230c0c928102775aeed375", size = 6053743, upload-time = "2026-03-19T06:52:59.37Z" }, +] + [[package]] name = "mpmath" version = "1.3.0" @@ -2328,6 +2711,36 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d0/b1/35b6f9c8cf9318e3dbb7146cc82dab4cf61182a8d5406fc9b50864362895/openai-2.29.0-py3-none-any.whl", hash = "sha256:b7c5de513c3286d17c5e29b92c4c98ceaf0d775244ac8159aeb1bddf840eb42a", size = 1141533, upload-time = "2026-03-17T17:53:47.348Z" }, ] +[[package]] +name = "opencv-python" +version = "4.13.0.92" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/fc/6f/5a28fef4c4a382be06afe3938c64cc168223016fa520c5abaf37e8862aa5/opencv_python-4.13.0.92-cp37-abi3-macosx_13_0_arm64.whl", hash = "sha256:caf60c071ec391ba51ed00a4a920f996d0b64e3e46068aac1f646b5de0326a19", size = 46247052, upload-time = "2026-02-05T07:01:25.046Z" }, + { url = "https://files.pythonhosted.org/packages/08/ac/6c98c44c650b8114a0fb901691351cfb3956d502e8e9b5cd27f4ee7fbf2f/opencv_python-4.13.0.92-cp37-abi3-macosx_14_0_x86_64.whl", hash = "sha256:5868a8c028a0b37561579bfb8ac1875babdc69546d236249fff296a8c010ccf9", size = 32568781, upload-time = "2026-02-05T07:01:41.379Z" }, + { url = "https://files.pythonhosted.org/packages/3e/51/82fed528b45173bf629fa44effb76dff8bc9f4eeaee759038362dfa60237/opencv_python-4.13.0.92-cp37-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0bc2596e68f972ca452d80f444bc404e08807d021fbba40df26b61b18e01838a", size = 47685527, upload-time = "2026-02-05T06:59:11.24Z" }, + { url = "https://files.pythonhosted.org/packages/db/07/90b34a8e2cf9c50fe8ed25cac9011cde0676b4d9d9c973751ac7616223a2/opencv_python-4.13.0.92-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:402033cddf9d294693094de5ef532339f14ce821da3ad7df7c9f6e8316da32cf", size = 70460872, upload-time = "2026-02-05T06:59:19.162Z" }, + { url = "https://files.pythonhosted.org/packages/02/6d/7a9cc719b3eaf4377b9c2e3edeb7ed3a81de41f96421510c0a169ca3cfd4/opencv_python-4.13.0.92-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:bccaabf9eb7f897ca61880ce2869dcd9b25b72129c28478e7f2a5e8dee945616", size = 46708208, upload-time = "2026-02-05T06:59:15.419Z" }, + { url = "https://files.pythonhosted.org/packages/fd/55/b3b49a1b97aabcfbbd6c7326df9cb0b6fa0c0aefa8e89d500939e04aa229/opencv_python-4.13.0.92-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:620d602b8f7d8b8dab5f4b99c6eb353e78d3fb8b0f53db1bd258bb1aa001c1d5", size = 72927042, upload-time = "2026-02-05T06:59:23.389Z" }, + { url = "https://files.pythonhosted.org/packages/fb/17/de5458312bcb07ddf434d7bfcb24bb52c59635ad58c6e7c751b48949b009/opencv_python-4.13.0.92-cp37-abi3-win32.whl", hash = "sha256:372fe164a3148ac1ca51e5f3ad0541a4a276452273f503441d718fab9c5e5f59", size = 30932638, upload-time = "2026-02-05T07:02:14.98Z" }, + { url = "https://files.pythonhosted.org/packages/e9/a5/1be1516390333ff9be3a9cb648c9f33df79d5096e5884b5df71a588af463/opencv_python-4.13.0.92-cp37-abi3-win_amd64.whl", hash = "sha256:423d934c9fafb91aad38edf26efb46da91ffbc05f3f59c4b0c72e699720706f5", size = 40212062, upload-time = "2026-02-05T07:02:12.724Z" }, +] + +[[package]] +name = "openpyxl" +version = "3.1.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "et-xmlfile" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3d/f9/88d94a75de065ea32619465d2f77b29a0469500e99012523b91cc4141cd1/openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050", size = 186464, upload-time = "2024-06-28T14:03:44.161Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c0/da/977ded879c29cbd04de313843e76868e6e13408a94ed6b987245dc7c8506/openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2", size = 250910, upload-time = "2024-06-28T14:03:41.161Z" }, +] + [[package]] name = "opentelemetry-api" version = "1.40.0" @@ -2543,6 +2956,34 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ef/3c/2c197d226f9ea224a9ab8d197933f9da0ae0aac5b6e0f884e2b8d9c8e9f7/pathspec-1.0.4-py3-none-any.whl", hash = "sha256:fb6ae2fd4e7c921a165808a552060e722767cfa526f99ca5156ed2ce45a5c723", size = 55206, upload-time = "2026-01-27T03:59:45.137Z" }, ] +[[package]] +name = "pdfminer-six" +version = "20260107" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "charset-normalizer" }, + { name = "cryptography" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/34/a4/5cec1112009f0439a5ca6afa8ace321f0ab2f48da3255b7a1c8953014670/pdfminer_six-20260107.tar.gz", hash = "sha256:96bfd431e3577a55a0efd25676968ca4ce8fd5b53f14565f85716ff363889602", size = 8512094, upload-time = "2026-01-07T13:29:12.937Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/20/8b/28c4eaec9d6b036a52cb44720408f26b1a143ca9bce76cc19e8f5de00ab4/pdfminer_six-20260107-py3-none-any.whl", hash = "sha256:366585ba97e80dffa8f00cebe303d2f381884d8637af4ce422f1df3ef38111a9", size = 6592252, upload-time = "2026-01-07T13:29:10.742Z" }, +] + +[[package]] +name = "pdftext" +version = "0.6.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "pydantic" }, + { name = "pydantic-settings" }, + { name = "pypdfium2" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a9/7b/fe3205d44d6058932bbc785f0b9da2ed35b62e17479a8a7d2baca9df1cc6/pdftext-0.6.3.tar.gz", hash = "sha256:ab5c5dfe0f1fb78de1db837ccadac1ea41b07ce1890fead973c9a84cdaf54dec", size = 21968, upload-time = "2025-06-11T14:42:09.492Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bc/b9/4437bb89f04e57f48c96492a50d6168da5e201940de6620730d390449991/pdftext-0.6.3-py3-none-any.whl", hash = "sha256:528431ed8bdce39d74372cd3d27e8544af812f1f1adc81db229cf9fb48dacacb", size = 23693, upload-time = "2025-06-11T14:42:08.157Z" }, +] + [[package]] name = "pillow" version = "12.1.1" @@ -2745,6 +3186,34 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c4/72/02445137af02769918a93807b2b7890047c32bfb9f90371cbc12688819eb/protobuf-6.33.6-py3-none-any.whl", hash = "sha256:77179e006c476e69bf8e8ce866640091ec42e1beb80b213c3900006ecfba6901", size = 170656, upload-time = "2026-03-18T19:04:59.826Z" }, ] +[[package]] +name = "psutil" +version = "7.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/aa/c6/d1ddf4abb55e93cebc4f2ed8b5d6dbad109ecb8d63748dd2b20ab5e57ebe/psutil-7.2.2.tar.gz", hash = "sha256:0746f5f8d406af344fd547f1c8daa5f5c33dbc293bb8d6a16d80b4bb88f59372", size = 493740, upload-time = "2026-01-28T18:14:54.428Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/51/08/510cbdb69c25a96f4ae523f733cdc963ae654904e8db864c07585ef99875/psutil-7.2.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:2edccc433cbfa046b980b0df0171cd25bcaeb3a68fe9022db0979e7aa74a826b", size = 130595, upload-time = "2026-01-28T18:14:57.293Z" }, + { url = "https://files.pythonhosted.org/packages/d6/f5/97baea3fe7a5a9af7436301f85490905379b1c6f2dd51fe3ecf24b4c5fbf/psutil-7.2.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e78c8603dcd9a04c7364f1a3e670cea95d51ee865e4efb3556a3a63adef958ea", size = 131082, upload-time = "2026-01-28T18:14:59.732Z" }, + { url = "https://files.pythonhosted.org/packages/37/d6/246513fbf9fa174af531f28412297dd05241d97a75911ac8febefa1a53c6/psutil-7.2.2-cp313-cp313t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1a571f2330c966c62aeda00dd24620425d4b0cc86881c89861fbc04549e5dc63", size = 181476, upload-time = "2026-01-28T18:15:01.884Z" }, + { url = "https://files.pythonhosted.org/packages/b8/b5/9182c9af3836cca61696dabe4fd1304e17bc56cb62f17439e1154f225dd3/psutil-7.2.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:917e891983ca3c1887b4ef36447b1e0873e70c933afc831c6b6da078ba474312", size = 184062, upload-time = "2026-01-28T18:15:04.436Z" }, + { url = "https://files.pythonhosted.org/packages/16/ba/0756dca669f5a9300d0cbcbfae9a4c30e446dfc7440ffe43ded5724bfd93/psutil-7.2.2-cp313-cp313t-win_amd64.whl", hash = "sha256:ab486563df44c17f5173621c7b198955bd6b613fb87c71c161f827d3fb149a9b", size = 139893, upload-time = "2026-01-28T18:15:06.378Z" }, + { url = "https://files.pythonhosted.org/packages/1c/61/8fa0e26f33623b49949346de05ec1ddaad02ed8ba64af45f40a147dbfa97/psutil-7.2.2-cp313-cp313t-win_arm64.whl", hash = "sha256:ae0aefdd8796a7737eccea863f80f81e468a1e4cf14d926bd9b6f5f2d5f90ca9", size = 135589, upload-time = "2026-01-28T18:15:08.03Z" }, + { url = "https://files.pythonhosted.org/packages/81/69/ef179ab5ca24f32acc1dac0c247fd6a13b501fd5534dbae0e05a1c48b66d/psutil-7.2.2-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:eed63d3b4d62449571547b60578c5b2c4bcccc5387148db46e0c2313dad0ee00", size = 130664, upload-time = "2026-01-28T18:15:09.469Z" }, + { url = "https://files.pythonhosted.org/packages/7b/64/665248b557a236d3fa9efc378d60d95ef56dd0a490c2cd37dafc7660d4a9/psutil-7.2.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7b6d09433a10592ce39b13d7be5a54fbac1d1228ed29abc880fb23df7cb694c9", size = 131087, upload-time = "2026-01-28T18:15:11.724Z" }, + { url = "https://files.pythonhosted.org/packages/d5/2e/e6782744700d6759ebce3043dcfa661fb61e2fb752b91cdeae9af12c2178/psutil-7.2.2-cp314-cp314t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1fa4ecf83bcdf6e6c8f4449aff98eefb5d0604bf88cb883d7da3d8d2d909546a", size = 182383, upload-time = "2026-01-28T18:15:13.445Z" }, + { url = "https://files.pythonhosted.org/packages/57/49/0a41cefd10cb7505cdc04dab3eacf24c0c2cb158a998b8c7b1d27ee2c1f5/psutil-7.2.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e452c464a02e7dc7822a05d25db4cde564444a67e58539a00f929c51eddda0cf", size = 185210, upload-time = "2026-01-28T18:15:16.002Z" }, + { url = "https://files.pythonhosted.org/packages/dd/2c/ff9bfb544f283ba5f83ba725a3c5fec6d6b10b8f27ac1dc641c473dc390d/psutil-7.2.2-cp314-cp314t-win_amd64.whl", hash = "sha256:c7663d4e37f13e884d13994247449e9f8f574bc4655d509c3b95e9ec9e2b9dc1", size = 141228, upload-time = "2026-01-28T18:15:18.385Z" }, + { url = "https://files.pythonhosted.org/packages/f2/fc/f8d9c31db14fcec13748d373e668bc3bed94d9077dbc17fb0eebc073233c/psutil-7.2.2-cp314-cp314t-win_arm64.whl", hash = "sha256:11fe5a4f613759764e79c65cf11ebdf26e33d6dd34336f8a337aa2996d71c841", size = 136284, upload-time = "2026-01-28T18:15:19.912Z" }, + { url = "https://files.pythonhosted.org/packages/e7/36/5ee6e05c9bd427237b11b3937ad82bb8ad2752d72c6969314590dd0c2f6e/psutil-7.2.2-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:ed0cace939114f62738d808fdcecd4c869222507e266e574799e9c0faa17d486", size = 129090, upload-time = "2026-01-28T18:15:22.168Z" }, + { url = "https://files.pythonhosted.org/packages/80/c4/f5af4c1ca8c1eeb2e92ccca14ce8effdeec651d5ab6053c589b074eda6e1/psutil-7.2.2-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:1a7b04c10f32cc88ab39cbf606e117fd74721c831c98a27dc04578deb0c16979", size = 129859, upload-time = "2026-01-28T18:15:23.795Z" }, + { url = "https://files.pythonhosted.org/packages/b5/70/5d8df3b09e25bce090399cf48e452d25c935ab72dad19406c77f4e828045/psutil-7.2.2-cp36-abi3-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:076a2d2f923fd4821644f5ba89f059523da90dc9014e85f8e45a5774ca5bc6f9", size = 155560, upload-time = "2026-01-28T18:15:25.976Z" }, + { url = "https://files.pythonhosted.org/packages/63/65/37648c0c158dc222aba51c089eb3bdfa238e621674dc42d48706e639204f/psutil-7.2.2-cp36-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b0726cecd84f9474419d67252add4ac0cd9811b04d61123054b9fb6f57df6e9e", size = 156997, upload-time = "2026-01-28T18:15:27.794Z" }, + { url = "https://files.pythonhosted.org/packages/8e/13/125093eadae863ce03c6ffdbae9929430d116a246ef69866dad94da3bfbc/psutil-7.2.2-cp36-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:fd04ef36b4a6d599bbdb225dd1d3f51e00105f6d48a28f006da7f9822f2606d8", size = 148972, upload-time = "2026-01-28T18:15:29.342Z" }, + { url = "https://files.pythonhosted.org/packages/04/78/0acd37ca84ce3ddffaa92ef0f571e073faa6d8ff1f0559ab1272188ea2be/psutil-7.2.2-cp36-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b58fabe35e80b264a4e3bb23e6b96f9e45a3df7fb7eed419ac0e5947c61e47cc", size = 148266, upload-time = "2026-01-28T18:15:31.597Z" }, + { url = "https://files.pythonhosted.org/packages/b4/90/e2159492b5426be0c1fef7acba807a03511f97c5f86b3caeda6ad92351a7/psutil-7.2.2-cp37-abi3-win_amd64.whl", hash = "sha256:eb7e81434c8d223ec4a219b5fc1c47d0417b12be7ea866e24fb5ad6e84b3d988", size = 137737, upload-time = "2026-01-28T18:15:33.849Z" }, + { url = "https://files.pythonhosted.org/packages/8c/c7/7bb2e321574b10df20cbde462a94e2b71d05f9bbda251ef27d104668306a/psutil-7.2.2-cp37-abi3-win_arm64.whl", hash = "sha256:8c233660f575a5a89e6d4cb65d9f938126312bca76d8fe087b947b3a1aaac9ee", size = 134617, upload-time = "2026-01-28T18:15:36.514Z" }, +] + [[package]] name = "pyasn1" version = "0.6.3" @@ -2990,6 +3459,35 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, ] +[[package]] +name = "pypdf" +version = "6.9.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f9/fb/dc2e8cb006e80b0020ed20d8649106fe4274e82d8e756ad3e24ade19c0df/pypdf-6.9.1.tar.gz", hash = "sha256:ae052407d33d34de0c86c5c729be6d51010bf36e03035a8f23ab449bca52377d", size = 5311551, upload-time = "2026-03-17T10:46:07.876Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f9/f4/75543fa802b86e72f87e9395440fe1a89a6d149887e3e55745715c3352ac/pypdf-6.9.1-py3-none-any.whl", hash = "sha256:f35a6a022348fae47e092a908339a8f3dc993510c026bb39a96718fc7185e89f", size = 333661, upload-time = "2026-03-17T10:46:06.286Z" }, +] + +[[package]] +name = "pypdfium2" +version = "4.30.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a1/14/838b3ba247a0ba92e4df5d23f2bea9478edcfd72b78a39d6ca36ccd84ad2/pypdfium2-4.30.0.tar.gz", hash = "sha256:48b5b7e5566665bc1015b9d69c1ebabe21f6aee468b509531c3c8318eeee2e16", size = 140239, upload-time = "2024-05-09T18:33:17.552Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/9a/c8ff5cc352c1b60b0b97642ae734f51edbab6e28b45b4fcdfe5306ee3c83/pypdfium2-4.30.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:b33ceded0b6ff5b2b93bc1fe0ad4b71aa6b7e7bd5875f1ca0cdfb6ba6ac01aab", size = 2837254, upload-time = "2024-05-09T18:32:48.653Z" }, + { url = "https://files.pythonhosted.org/packages/21/8b/27d4d5409f3c76b985f4ee4afe147b606594411e15ac4dc1c3363c9a9810/pypdfium2-4.30.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:4e55689f4b06e2d2406203e771f78789bd4f190731b5d57383d05cf611d829de", size = 2707624, upload-time = "2024-05-09T18:32:51.458Z" }, + { url = "https://files.pythonhosted.org/packages/11/63/28a73ca17c24b41a205d658e177d68e198d7dde65a8c99c821d231b6ee3d/pypdfium2-4.30.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e6e50f5ce7f65a40a33d7c9edc39f23140c57e37144c2d6d9e9262a2a854854", size = 2793126, upload-time = "2024-05-09T18:32:53.581Z" }, + { url = "https://files.pythonhosted.org/packages/d1/96/53b3ebf0955edbd02ac6da16a818ecc65c939e98fdeb4e0958362bd385c8/pypdfium2-4.30.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3d0dd3ecaffd0b6dbda3da663220e705cb563918249bda26058c6036752ba3a2", size = 2591077, upload-time = "2024-05-09T18:32:55.99Z" }, + { url = "https://files.pythonhosted.org/packages/ec/ee/0394e56e7cab8b5b21f744d988400948ef71a9a892cbeb0b200d324ab2c7/pypdfium2-4.30.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cc3bf29b0db8c76cdfaac1ec1cde8edf211a7de7390fbf8934ad2aa9b4d6dfad", size = 2864431, upload-time = "2024-05-09T18:32:57.911Z" }, + { url = "https://files.pythonhosted.org/packages/65/cd/3f1edf20a0ef4a212a5e20a5900e64942c5a374473671ac0780eaa08ea80/pypdfium2-4.30.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f1f78d2189e0ddf9ac2b7a9b9bd4f0c66f54d1389ff6c17e9fd9dc034d06eb3f", size = 2812008, upload-time = "2024-05-09T18:32:59.886Z" }, + { url = "https://files.pythonhosted.org/packages/c8/91/2d517db61845698f41a2a974de90762e50faeb529201c6b3574935969045/pypdfium2-4.30.0-py3-none-musllinux_1_1_aarch64.whl", hash = "sha256:5eda3641a2da7a7a0b2f4dbd71d706401a656fea521b6b6faa0675b15d31a163", size = 6181543, upload-time = "2024-05-09T18:33:02.597Z" }, + { url = "https://files.pythonhosted.org/packages/ba/c4/ed1315143a7a84b2c7616569dfb472473968d628f17c231c39e29ae9d780/pypdfium2-4.30.0-py3-none-musllinux_1_1_i686.whl", hash = "sha256:0dfa61421b5eb68e1188b0b2231e7ba35735aef2d867d86e48ee6cab6975195e", size = 6175911, upload-time = "2024-05-09T18:33:05.376Z" }, + { url = "https://files.pythonhosted.org/packages/7a/c4/9e62d03f414e0e3051c56d5943c3bf42aa9608ede4e19dc96438364e9e03/pypdfium2-4.30.0-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:f33bd79e7a09d5f7acca3b0b69ff6c8a488869a7fab48fdf400fec6e20b9c8be", size = 6267430, upload-time = "2024-05-09T18:33:08.067Z" }, + { url = "https://files.pythonhosted.org/packages/90/47/eda4904f715fb98561e34012826e883816945934a851745570521ec89520/pypdfium2-4.30.0-py3-none-win32.whl", hash = "sha256:ee2410f15d576d976c2ab2558c93d392a25fb9f6635e8dd0a8a3a5241b275e0e", size = 2775951, upload-time = "2024-05-09T18:33:10.567Z" }, + { url = "https://files.pythonhosted.org/packages/25/bd/56d9ec6b9f0fc4e0d95288759f3179f0fcd34b1a1526b75673d2f6d5196f/pypdfium2-4.30.0-py3-none-win_amd64.whl", hash = "sha256:90dbb2ac07be53219f56be09961eb95cf2473f834d01a42d901d13ccfad64b4c", size = 2892098, upload-time = "2024-05-09T18:33:13.107Z" }, + { url = "https://files.pythonhosted.org/packages/be/7a/097801205b991bc3115e8af1edb850d30aeaf0118520b016354cf5ccd3f6/pypdfium2-4.30.0-py3-none-win_arm64.whl", hash = "sha256:119b2969a6d6b1e8d55e99caaf05290294f2d0fe49c12a3f17102d01c441bd29", size = 2752118, upload-time = "2024-05-09T18:33:15.489Z" }, +] + [[package]] name = "pypika" version = "0.51.1" @@ -3076,6 +3574,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c2/3c/2005227cb951df502412de2fa781f800663cccbef8d90ec6f1b371ac2c0d/python_discovery-1.2.0-py3-none-any.whl", hash = "sha256:1e108f1bbe2ed0ef089823d28805d5ad32be8e734b86a5f212bf89b71c266e4a", size = 31524, upload-time = "2026-03-19T01:43:07.045Z" }, ] +[[package]] +name = "python-docx" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "lxml" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a9/f7/eddfe33871520adab45aaa1a71f0402a2252050c14c7e3009446c8f4701c/python_docx-1.2.0.tar.gz", hash = "sha256:7bc9d7b7d8a69c9c02ca09216118c86552704edc23bac179283f2e38f86220ce", size = 5723256, upload-time = "2025-06-16T20:46:27.921Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d0/00/1e03a4989fa5795da308cd774f05b704ace555a70f9bf9d3be057b680bcf/python_docx-1.2.0-py3-none-any.whl", hash = "sha256:3fd478f3250fbbbfd3b94fe1e985955737c145627498896a8a6bf81f4baf66c7", size = 252987, upload-time = "2025-06-16T20:46:22.506Z" }, +] + [[package]] name = "python-dotenv" version = "1.2.2" @@ -3159,6 +3670,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" }, ] +[[package]] +name = "qwen-vl-utils" +version = "0.0.14" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "av" }, + { name = "packaging" }, + { name = "pillow" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b6/b1/ad4fc2260a3badd278b38d642f3b987412f1f6682f0ef2b31b0572d5caa8/qwen_vl_utils-0.0.14.tar.gz", hash = "sha256:9c7cad5ae803b3a10f8bb7194deb12aeacdd032f92f4224e880c73587a7346ad", size = 8453, upload-time = "2025-09-23T09:38:57.532Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c4/43/80f67e0336cb2fc725f8e06f7fe35c1d0fe946f4d2b8b2175e797e07349e/qwen_vl_utils-0.0.14-py3-none-any.whl", hash = "sha256:5e28657bfd031e56bd447c5901b58ddfc3835285ed100f4c56580e0ade054e96", size = 8120, upload-time = "2025-09-23T09:38:56.297Z" }, +] + [[package]] name = "referencing" version = "0.37.0" @@ -3261,6 +3787,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/13/c0/ad225f4a405827486f1955283407cf758b6d2fb966712644c5f5aef33d1b/regex-2026.2.28-cp314-cp314t-win_arm64.whl", hash = "sha256:dee50f1be42222f89767b64b283283ef963189da0dda4a515aa54a5563c62dec", size = 275010, upload-time = "2026-02-28T02:19:40.65Z" }, ] +[[package]] +name = "reportlab" +version = "4.4.10" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "charset-normalizer" }, + { name = "pillow" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/48/57/28bfbf0a775b618b6e4d854ef8dd3f5c8988e5d614d8898703502a35f61c/reportlab-4.4.10.tar.gz", hash = "sha256:5cbbb34ac3546039d0086deb2938cdec06b12da3cdb836e813258eb33cd28487", size = 3714962, upload-time = "2026-02-12T10:45:21.325Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8a/2e/e1798b8b248e1517e74c6cdf10dd6edd485044e7edf46b5f11ffcc5a0add/reportlab-4.4.10-py3-none-any.whl", hash = "sha256:5abc815746ae2bc44e7ff25db96814f921349ca814c992c7eac3c26029bf7c24", size = 1955400, upload-time = "2026-02-12T10:45:18.828Z" }, +] + [[package]] name = "requests" version = "2.32.5" @@ -3314,6 +3853,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/14/25/b208c5683343959b670dc001595f2f3737e051da617f66c31f7c4fa93abc/rich-14.3.3-py3-none-any.whl", hash = "sha256:793431c1f8619afa7d3b52b2cdec859562b950ea0d4b6b505397612db8d5362d", size = 310458, upload-time = "2026-02-19T17:23:13.732Z" }, ] +[[package]] +name = "robust-downloader" +version = "0.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorlog" }, + { name = "requests" }, + { name = "tqdm" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/63/20/8d28efa080f58fa06f6378875ac482ee511c076369e5293a2e65128cf9a0/robust-downloader-0.0.2.tar.gz", hash = "sha256:08c938b96e317abe6b037e34230a91bda9b5d613f009bca4a47664997c61de90", size = 15785, upload-time = "2023-11-13T03:00:20.637Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/56/a1/779e9d0ebbdc704411ce30915a1105eb01aeaa9e402d7e446613ff8fb121/robust_downloader-0.0.2-py3-none-any.whl", hash = "sha256:8fe08bfb64d714fd1a048a7df6eb7b413eb4e624309a49db2c16fbb80a62869d", size = 15534, upload-time = "2023-11-13T03:00:18.957Z" }, +] + [[package]] name = "rpds-py" version = "0.30.0" @@ -3432,6 +3985,137 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8f/e8/726643a3ea68c727da31570bde48c7a10f1aa60eddd628d94078fec586ff/ruff-0.15.7-py3-none-win_arm64.whl", hash = "sha256:18e8d73f1c3fdf27931497972250340f92e8c861722161a9caeb89a58ead6ed2", size = 11023304, upload-time = "2026-03-19T16:26:51.669Z" }, ] +[[package]] +name = "s3transfer" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/05/04/74127fc843314818edfa81b5540e26dd537353b123a4edc563109d8f17dd/s3transfer-0.16.0.tar.gz", hash = "sha256:8e990f13268025792229cd52fa10cb7163744bf56e719e0b9cb925ab79abf920", size = 153827, upload-time = "2025-12-01T02:30:59.114Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fc/51/727abb13f44c1fcf6d145979e1535a35794db0f6e450a0cb46aa24732fe2/s3transfer-0.16.0-py3-none-any.whl", hash = "sha256:18e25d66fed509e3868dc1572b3f427ff947dd2c56f844a5bf09481ad3f3b2fe", size = 86830, upload-time = "2025-12-01T02:30:57.729Z" }, +] + +[[package]] +name = "scikit-image" +version = "0.26.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "imageio" }, + { name = "lazy-loader" }, + { name = "networkx" }, + { name = "numpy" }, + { name = "packaging" }, + { name = "pillow" }, + { name = "scipy" }, + { name = "tifffile" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a1/b4/2528bb43c67d48053a7a649a9666432dc307d66ba02e3a6d5c40f46655df/scikit_image-0.26.0.tar.gz", hash = "sha256:f5f970ab04efad85c24714321fcc91613fcb64ef2a892a13167df2f3e59199fa", size = 22729739, upload-time = "2025-12-20T17:12:21.824Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/99/e8/e13757982264b33a1621628f86b587e9a73a13f5256dad49b19ba7dc9083/scikit_image-0.26.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d454b93a6fa770ac5ae2d33570f8e7a321bb80d29511ce4b6b78058ebe176e8c", size = 12376452, upload-time = "2025-12-20T17:10:52.796Z" }, + { url = "https://files.pythonhosted.org/packages/e3/be/f8dd17d0510f9911f9f17ba301f7455328bf13dae416560126d428de9568/scikit_image-0.26.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3409e89d66eff5734cd2b672d1c48d2759360057e714e1d92a11df82c87cba37", size = 12061567, upload-time = "2025-12-20T17:10:55.207Z" }, + { url = "https://files.pythonhosted.org/packages/b3/2b/c70120a6880579fb42b91567ad79feb4772f7be72e8d52fec403a3dde0c6/scikit_image-0.26.0-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4c717490cec9e276afb0438dd165b7c3072d6c416709cc0f9f5a4c1070d23a44", size = 13084214, upload-time = "2025-12-20T17:10:57.468Z" }, + { url = "https://files.pythonhosted.org/packages/f4/a2/70401a107d6d7466d64b466927e6b96fcefa99d57494b972608e2f8be50f/scikit_image-0.26.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7df650e79031634ac90b11e64a9eedaf5a5e06fcd09bcd03a34be01745744466", size = 13561683, upload-time = "2025-12-20T17:10:59.49Z" }, + { url = "https://files.pythonhosted.org/packages/13/a5/48bdfd92794c5002d664e0910a349d0a1504671ef5ad358150f21643c79a/scikit_image-0.26.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:cefd85033e66d4ea35b525bb0937d7f42d4cdcfed2d1888e1570d5ce450d3932", size = 14112147, upload-time = "2025-12-20T17:11:02.083Z" }, + { url = "https://files.pythonhosted.org/packages/ee/b5/ac71694da92f5def5953ca99f18a10fe98eac2dd0a34079389b70b4d0394/scikit_image-0.26.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3f5bf622d7c0435884e1e141ebbe4b2804e16b2dd23ae4c6183e2ea99233be70", size = 14661625, upload-time = "2025-12-20T17:11:04.528Z" }, + { url = "https://files.pythonhosted.org/packages/23/4d/a3cc1e96f080e253dad2251bfae7587cf2b7912bcd76fd43fd366ff35a87/scikit_image-0.26.0-cp312-cp312-win_amd64.whl", hash = "sha256:abed017474593cd3056ae0fe948d07d0747b27a085e92df5474f4955dd65aec0", size = 11911059, upload-time = "2025-12-20T17:11:06.61Z" }, + { url = "https://files.pythonhosted.org/packages/35/8a/d1b8055f584acc937478abf4550d122936f420352422a1a625eef2c605d8/scikit_image-0.26.0-cp312-cp312-win_arm64.whl", hash = "sha256:4d57e39ef67a95d26860c8caf9b14b8fb130f83b34c6656a77f191fa6d1d04d8", size = 11348740, upload-time = "2025-12-20T17:11:09.118Z" }, + { url = "https://files.pythonhosted.org/packages/4f/48/02357ffb2cca35640f33f2cfe054a4d6d5d7a229b88880a64f1e45c11f4e/scikit_image-0.26.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a2e852eccf41d2d322b8e60144e124802873a92b8d43a6f96331aa42888491c7", size = 12346329, upload-time = "2025-12-20T17:11:11.599Z" }, + { url = "https://files.pythonhosted.org/packages/67/b9/b792c577cea2c1e94cda83b135a656924fc57c428e8a6d302cd69aac1b60/scikit_image-0.26.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:98329aab3bc87db352b9887f64ce8cdb8e75f7c2daa19927f2e121b797b678d5", size = 12031726, upload-time = "2025-12-20T17:11:13.871Z" }, + { url = "https://files.pythonhosted.org/packages/07/a9/9564250dfd65cb20404a611016db52afc6268b2b371cd19c7538ea47580f/scikit_image-0.26.0-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:915bb3ba66455cf8adac00dc8fdf18a4cd29656aec7ddd38cb4dda90289a6f21", size = 13094910, upload-time = "2025-12-20T17:11:16.2Z" }, + { url = "https://files.pythonhosted.org/packages/a3/b8/0d8eeb5a9fd7d34ba84f8a55753a0a3e2b5b51b2a5a0ade648a8db4a62f7/scikit_image-0.26.0-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b36ab5e778bf50af5ff386c3ac508027dc3aaeccf2161bdf96bde6848f44d21b", size = 13660939, upload-time = "2025-12-20T17:11:18.464Z" }, + { url = "https://files.pythonhosted.org/packages/2f/d6/91d8973584d4793d4c1a847d388e34ef1218d835eeddecfc9108d735b467/scikit_image-0.26.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:09bad6a5d5949c7896c8347424c4cca899f1d11668030e5548813ab9c2865dcb", size = 14138938, upload-time = "2025-12-20T17:11:20.919Z" }, + { url = "https://files.pythonhosted.org/packages/39/9a/7e15d8dc10d6bbf212195fb39bdeb7f226c46dd53f9c63c312e111e2e175/scikit_image-0.26.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:aeb14db1ed09ad4bee4ceb9e635547a8d5f3549be67fc6c768c7f923e027e6cd", size = 14752243, upload-time = "2025-12-20T17:11:23.347Z" }, + { url = "https://files.pythonhosted.org/packages/8f/58/2b11b933097bc427e42b4a8b15f7de8f24f2bac1fd2779d2aea1431b2c31/scikit_image-0.26.0-cp313-cp313-win_amd64.whl", hash = "sha256:ac529eb9dbd5954f9aaa2e3fe9a3fd9661bfe24e134c688587d811a0233127f1", size = 11906770, upload-time = "2025-12-20T17:11:25.297Z" }, + { url = "https://files.pythonhosted.org/packages/ad/ec/96941474a18a04b69b6f6562a5bd79bd68049fa3728d3b350976eccb8b93/scikit_image-0.26.0-cp313-cp313-win_arm64.whl", hash = "sha256:a2d211bc355f59725efdcae699b93b30348a19416cc9e017f7b2fb599faf7219", size = 11342506, upload-time = "2025-12-20T17:11:27.399Z" }, + { url = "https://files.pythonhosted.org/packages/03/e5/c1a9962b0cf1952f42d32b4a2e48eed520320dbc4d2ff0b981c6fa508b6b/scikit_image-0.26.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:9eefb4adad066da408a7601c4c24b07af3b472d90e08c3e7483d4e9e829d8c49", size = 12663278, upload-time = "2025-12-20T17:11:29.358Z" }, + { url = "https://files.pythonhosted.org/packages/ae/97/c1a276a59ce8e4e24482d65c1a3940d69c6b3873279193b7ebd04e5ee56b/scikit_image-0.26.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:6caec76e16c970c528d15d1c757363334d5cb3069f9cea93d2bead31820511f3", size = 12405142, upload-time = "2025-12-20T17:11:31.282Z" }, + { url = "https://files.pythonhosted.org/packages/d4/4a/f1cbd1357caef6c7993f7efd514d6e53d8fd6f7fe01c4714d51614c53289/scikit_image-0.26.0-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a07200fe09b9d99fcdab959859fe0f7db8df6333d6204344425d476850ce3604", size = 12942086, upload-time = "2025-12-20T17:11:33.683Z" }, + { url = "https://files.pythonhosted.org/packages/5b/6f/74d9fb87c5655bd64cf00b0c44dc3d6206d9002e5f6ba1c9aeb13236f6bf/scikit_image-0.26.0-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:92242351bccf391fc5df2d1529d15470019496d2498d615beb68da85fe7fdf37", size = 13265667, upload-time = "2025-12-20T17:11:36.11Z" }, + { url = "https://files.pythonhosted.org/packages/a7/73/faddc2413ae98d863f6fa2e3e14da4467dd38e788e1c23346cf1a2b06b97/scikit_image-0.26.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:52c496f75a7e45844d951557f13c08c81487c6a1da2e3c9c8a39fcde958e02cc", size = 14001966, upload-time = "2025-12-20T17:11:38.55Z" }, + { url = "https://files.pythonhosted.org/packages/02/94/9f46966fa042b5d57c8cd641045372b4e0df0047dd400e77ea9952674110/scikit_image-0.26.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:20ef4a155e2e78b8ab973998e04d8a361d49d719e65412405f4dadd9155a61d9", size = 14359526, upload-time = "2025-12-20T17:11:41.087Z" }, + { url = "https://files.pythonhosted.org/packages/5d/b4/2840fe38f10057f40b1c9f8fb98a187a370936bf144a4ac23452c5ef1baf/scikit_image-0.26.0-cp313-cp313t-win_amd64.whl", hash = "sha256:c9087cf7d0e7f33ab5c46d2068d86d785e70b05400a891f73a13400f1e1faf6a", size = 12287629, upload-time = "2025-12-20T17:11:43.11Z" }, + { url = "https://files.pythonhosted.org/packages/22/ba/73b6ca70796e71f83ab222690e35a79612f0117e5aaf167151b7d46f5f2c/scikit_image-0.26.0-cp313-cp313t-win_arm64.whl", hash = "sha256:27d58bc8b2acd351f972c6508c1b557cfed80299826080a4d803dd29c51b707e", size = 11647755, upload-time = "2025-12-20T17:11:45.279Z" }, + { url = "https://files.pythonhosted.org/packages/51/44/6b744f92b37ae2833fd423cce8f806d2368859ec325a699dc30389e090b9/scikit_image-0.26.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:63af3d3a26125f796f01052052f86806da5b5e54c6abef152edb752683075a9c", size = 12365810, upload-time = "2025-12-20T17:11:47.357Z" }, + { url = "https://files.pythonhosted.org/packages/40/f5/83590d9355191f86ac663420fec741b82cc547a4afe7c4c1d986bf46e4db/scikit_image-0.26.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ce00600cd70d4562ed59f80523e18cdcc1fae0e10676498a01f73c255774aefd", size = 12075717, upload-time = "2025-12-20T17:11:49.483Z" }, + { url = "https://files.pythonhosted.org/packages/72/48/253e7cf5aee6190459fe136c614e2cbccc562deceb4af96e0863f1b8ee29/scikit_image-0.26.0-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6381edf972b32e4f54085449afde64365a57316637496c1325a736987083e2ab", size = 13161520, upload-time = "2025-12-20T17:11:51.58Z" }, + { url = "https://files.pythonhosted.org/packages/73/c3/cec6a3cbaadfdcc02bd6ff02f3abfe09eaa7f4d4e0a525a1e3a3f4bce49c/scikit_image-0.26.0-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c6624a76c6085218248154cc7e1500e6b488edcd9499004dd0d35040607d7505", size = 13684340, upload-time = "2025-12-20T17:11:53.708Z" }, + { url = "https://files.pythonhosted.org/packages/d4/0d/39a776f675d24164b3a267aa0db9f677a4cb20127660d8bf4fd7fef66817/scikit_image-0.26.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f775f0e420faac9c2aa6757135f4eb468fb7b70e0b67fa77a5e79be3c30ee331", size = 14203839, upload-time = "2025-12-20T17:11:55.89Z" }, + { url = "https://files.pythonhosted.org/packages/ee/25/2514df226bbcedfe9b2caafa1ba7bc87231a0c339066981b182b08340e06/scikit_image-0.26.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ede4d6d255cc5da9faeb2f9ba7fedbc990abbc652db429f40a16b22e770bb578", size = 14770021, upload-time = "2025-12-20T17:11:58.014Z" }, + { url = "https://files.pythonhosted.org/packages/8d/5b/0671dc91c0c79340c3fe202f0549c7d3681eb7640fe34ab68a5f090a7c7f/scikit_image-0.26.0-cp314-cp314-win_amd64.whl", hash = "sha256:0660b83968c15293fd9135e8d860053ee19500d52bf55ca4fb09de595a1af650", size = 12023490, upload-time = "2025-12-20T17:12:00.013Z" }, + { url = "https://files.pythonhosted.org/packages/65/08/7c4cb59f91721f3de07719085212a0b3962e3e3f2d1818cbac4eeb1ea53e/scikit_image-0.26.0-cp314-cp314-win_arm64.whl", hash = "sha256:b8d14d3181c21c11170477a42542c1addc7072a90b986675a71266ad17abc37f", size = 11473782, upload-time = "2025-12-20T17:12:01.983Z" }, + { url = "https://files.pythonhosted.org/packages/49/41/65c4258137acef3d73cb561ac55512eacd7b30bb4f4a11474cad526bc5db/scikit_image-0.26.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:cde0bbd57e6795eba83cb10f71a677f7239271121dc950bc060482834a668ad1", size = 12686060, upload-time = "2025-12-20T17:12:03.886Z" }, + { url = "https://files.pythonhosted.org/packages/e7/32/76971f8727b87f1420a962406388a50e26667c31756126444baf6668f559/scikit_image-0.26.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:163e9afb5b879562b9aeda0dd45208a35316f26cc7a3aed54fd601604e5cf46f", size = 12422628, upload-time = "2025-12-20T17:12:05.921Z" }, + { url = "https://files.pythonhosted.org/packages/37/0d/996febd39f757c40ee7b01cdb861867327e5c8e5f595a634e8201462d958/scikit_image-0.26.0-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:724f79fd9b6cb6f4a37864fe09f81f9f5d5b9646b6868109e1b100d1a7019e59", size = 12962369, upload-time = "2025-12-20T17:12:07.912Z" }, + { url = "https://files.pythonhosted.org/packages/48/b4/612d354f946c9600e7dea012723c11d47e8d455384e530f6daaaeb9bf62c/scikit_image-0.26.0-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3268f13310e6857508bd87202620df996199a016a1d281b309441d227c822394", size = 13272431, upload-time = "2025-12-20T17:12:10.255Z" }, + { url = "https://files.pythonhosted.org/packages/0a/6e/26c00b466e06055a086de2c6e2145fe189ccdc9a1d11ccc7de020f2591ad/scikit_image-0.26.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:fac96a1f9b06cd771cbbb3cd96c5332f36d4efd839b1d8b053f79e5887acde62", size = 14016362, upload-time = "2025-12-20T17:12:12.793Z" }, + { url = "https://files.pythonhosted.org/packages/47/88/00a90402e1775634043c2a0af8a3c76ad450866d9fa444efcc43b553ba2d/scikit_image-0.26.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:2c1e7bd342f43e7a97e571b3f03ba4c1293ea1a35c3f13f41efdc8a81c1dc8f2", size = 14364151, upload-time = "2025-12-20T17:12:14.909Z" }, + { url = "https://files.pythonhosted.org/packages/da/ca/918d8d306bd43beacff3b835c6d96fac0ae64c0857092f068b88db531a7c/scikit_image-0.26.0-cp314-cp314t-win_amd64.whl", hash = "sha256:b702c3bb115e1dcf4abf5297429b5c90f2189655888cbed14921f3d26f81d3a4", size = 12413484, upload-time = "2025-12-20T17:12:17.046Z" }, + { url = "https://files.pythonhosted.org/packages/dc/cd/4da01329b5a8d47ff7ec3c99a2b02465a8017b186027590dc7425cee0b56/scikit_image-0.26.0-cp314-cp314t-win_arm64.whl", hash = "sha256:0608aa4a9ec39e0843de10d60edb2785a30c1c47819b67866dd223ebd149acaf", size = 11769501, upload-time = "2025-12-20T17:12:19.339Z" }, +] + +[[package]] +name = "scipy" +version = "1.17.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7a/97/5a3609c4f8d58b039179648e62dd220f89864f56f7357f5d4f45c29eb2cc/scipy-1.17.1.tar.gz", hash = "sha256:95d8e012d8cb8816c226aef832200b1d45109ed4464303e997c5b13122b297c0", size = 30573822, upload-time = "2026-02-23T00:26:24.851Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/35/48/b992b488d6f299dbe3f11a20b24d3dda3d46f1a635ede1c46b5b17a7b163/scipy-1.17.1-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:35c3a56d2ef83efc372eaec584314bd0ef2e2f0d2adb21c55e6ad5b344c0dcb8", size = 31610954, upload-time = "2026-02-23T00:17:49.855Z" }, + { url = "https://files.pythonhosted.org/packages/b2/02/cf107b01494c19dc100f1d0b7ac3cc08666e96ba2d64db7626066cee895e/scipy-1.17.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:fcb310ddb270a06114bb64bbe53c94926b943f5b7f0842194d585c65eb4edd76", size = 28172662, upload-time = "2026-02-23T00:18:01.64Z" }, + { url = "https://files.pythonhosted.org/packages/cf/a9/599c28631bad314d219cf9ffd40e985b24d603fc8a2f4ccc5ae8419a535b/scipy-1.17.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:cc90d2e9c7e5c7f1a482c9875007c095c3194b1cfedca3c2f3291cdc2bc7c086", size = 20344366, upload-time = "2026-02-23T00:18:12.015Z" }, + { url = "https://files.pythonhosted.org/packages/35/f5/906eda513271c8deb5af284e5ef0206d17a96239af79f9fa0aebfe0e36b4/scipy-1.17.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:c80be5ede8f3f8eded4eff73cc99a25c388ce98e555b17d31da05287015ffa5b", size = 22704017, upload-time = "2026-02-23T00:18:21.502Z" }, + { url = "https://files.pythonhosted.org/packages/da/34/16f10e3042d2f1d6b66e0428308ab52224b6a23049cb2f5c1756f713815f/scipy-1.17.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e19ebea31758fac5893a2ac360fedd00116cbb7628e650842a6691ba7ca28a21", size = 32927842, upload-time = "2026-02-23T00:18:35.367Z" }, + { url = "https://files.pythonhosted.org/packages/01/8e/1e35281b8ab6d5d72ebe9911edcdffa3f36b04ed9d51dec6dd140396e220/scipy-1.17.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:02ae3b274fde71c5e92ac4d54bc06c42d80e399fec704383dcd99b301df37458", size = 35235890, upload-time = "2026-02-23T00:18:49.188Z" }, + { url = "https://files.pythonhosted.org/packages/c5/5c/9d7f4c88bea6e0d5a4f1bc0506a53a00e9fcb198de372bfe4d3652cef482/scipy-1.17.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8a604bae87c6195d8b1045eddece0514d041604b14f2727bbc2b3020172045eb", size = 35003557, upload-time = "2026-02-23T00:18:54.74Z" }, + { url = "https://files.pythonhosted.org/packages/65/94/7698add8f276dbab7a9de9fb6b0e02fc13ee61d51c7c3f85ac28b65e1239/scipy-1.17.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f590cd684941912d10becc07325a3eeb77886fe981415660d9265c4c418d0bea", size = 37625856, upload-time = "2026-02-23T00:19:00.307Z" }, + { url = "https://files.pythonhosted.org/packages/a2/84/dc08d77fbf3d87d3ee27f6a0c6dcce1de5829a64f2eae85a0ecc1f0daa73/scipy-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:41b71f4a3a4cab9d366cd9065b288efc4d4f3c0b37a91a8e0947fb5bd7f31d87", size = 36549682, upload-time = "2026-02-23T00:19:07.67Z" }, + { url = "https://files.pythonhosted.org/packages/bc/98/fe9ae9ffb3b54b62559f52dedaebe204b408db8109a8c66fdd04869e6424/scipy-1.17.1-cp312-cp312-win_arm64.whl", hash = "sha256:f4115102802df98b2b0db3cce5cb9b92572633a1197c77b7553e5203f284a5b3", size = 24547340, upload-time = "2026-02-23T00:19:12.024Z" }, + { url = "https://files.pythonhosted.org/packages/76/27/07ee1b57b65e92645f219b37148a7e7928b82e2b5dbeccecb4dff7c64f0b/scipy-1.17.1-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:5e3c5c011904115f88a39308379c17f91546f77c1667cea98739fe0fccea804c", size = 31590199, upload-time = "2026-02-23T00:19:17.192Z" }, + { url = "https://files.pythonhosted.org/packages/ec/ae/db19f8ab842e9b724bf5dbb7db29302a91f1e55bc4d04b1025d6d605a2c5/scipy-1.17.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:6fac755ca3d2c3edcb22f479fceaa241704111414831ddd3bc6056e18516892f", size = 28154001, upload-time = "2026-02-23T00:19:22.241Z" }, + { url = "https://files.pythonhosted.org/packages/5b/58/3ce96251560107b381cbd6e8413c483bbb1228a6b919fa8652b0d4090e7f/scipy-1.17.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:7ff200bf9d24f2e4d5dc6ee8c3ac64d739d3a89e2326ba68aaf6c4a2b838fd7d", size = 20325719, upload-time = "2026-02-23T00:19:26.329Z" }, + { url = "https://files.pythonhosted.org/packages/b2/83/15087d945e0e4d48ce2377498abf5ad171ae013232ae31d06f336e64c999/scipy-1.17.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:4b400bdc6f79fa02a4d86640310dde87a21fba0c979efff5248908c6f15fad1b", size = 22683595, upload-time = "2026-02-23T00:19:30.304Z" }, + { url = "https://files.pythonhosted.org/packages/b4/e0/e58fbde4a1a594c8be8114eb4aac1a55bcd6587047efc18a61eb1f5c0d30/scipy-1.17.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2b64ca7d4aee0102a97f3ba22124052b4bd2152522355073580bf4845e2550b6", size = 32896429, upload-time = "2026-02-23T00:19:35.536Z" }, + { url = "https://files.pythonhosted.org/packages/f5/5f/f17563f28ff03c7b6799c50d01d5d856a1d55f2676f537ca8d28c7f627cd/scipy-1.17.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:581b2264fc0aa555f3f435a5944da7504ea3a065d7029ad60e7c3d1ae09c5464", size = 35203952, upload-time = "2026-02-23T00:19:42.259Z" }, + { url = "https://files.pythonhosted.org/packages/8d/a5/9afd17de24f657fdfe4df9a3f1ea049b39aef7c06000c13db1530d81ccca/scipy-1.17.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:beeda3d4ae615106d7094f7e7cef6218392e4465cc95d25f900bebabfded0950", size = 34979063, upload-time = "2026-02-23T00:19:47.547Z" }, + { url = "https://files.pythonhosted.org/packages/8b/13/88b1d2384b424bf7c924f2038c1c409f8d88bb2a8d49d097861dd64a57b2/scipy-1.17.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6609bc224e9568f65064cfa72edc0f24ee6655b47575954ec6339534b2798369", size = 37598449, upload-time = "2026-02-23T00:19:53.238Z" }, + { url = "https://files.pythonhosted.org/packages/35/e5/d6d0e51fc888f692a35134336866341c08655d92614f492c6860dc45bb2c/scipy-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:37425bc9175607b0268f493d79a292c39f9d001a357bebb6b88fdfaff13f6448", size = 36510943, upload-time = "2026-02-23T00:20:50.89Z" }, + { url = "https://files.pythonhosted.org/packages/2a/fd/3be73c564e2a01e690e19cc618811540ba5354c67c8680dce3281123fb79/scipy-1.17.1-cp313-cp313-win_arm64.whl", hash = "sha256:5cf36e801231b6a2059bf354720274b7558746f3b1a4efb43fcf557ccd484a87", size = 24545621, upload-time = "2026-02-23T00:20:55.871Z" }, + { url = "https://files.pythonhosted.org/packages/6f/6b/17787db8b8114933a66f9dcc479a8272e4b4da75fe03b0c282f7b0ade8cd/scipy-1.17.1-cp313-cp313t-macosx_10_14_x86_64.whl", hash = "sha256:d59c30000a16d8edc7e64152e30220bfbd724c9bbb08368c054e24c651314f0a", size = 31936708, upload-time = "2026-02-23T00:19:58.694Z" }, + { url = "https://files.pythonhosted.org/packages/38/2e/524405c2b6392765ab1e2b722a41d5da33dc5c7b7278184a8ad29b6cb206/scipy-1.17.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:010f4333c96c9bb1a4516269e33cb5917b08ef2166d5556ca2fd9f082a9e6ea0", size = 28570135, upload-time = "2026-02-23T00:20:03.934Z" }, + { url = "https://files.pythonhosted.org/packages/fd/c3/5bd7199f4ea8556c0c8e39f04ccb014ac37d1468e6cfa6a95c6b3562b76e/scipy-1.17.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:2ceb2d3e01c5f1d83c4189737a42d9cb2fc38a6eeed225e7515eef71ad301dce", size = 20741977, upload-time = "2026-02-23T00:20:07.935Z" }, + { url = "https://files.pythonhosted.org/packages/d9/b8/8ccd9b766ad14c78386599708eb745f6b44f08400a5fd0ade7cf89b6fc93/scipy-1.17.1-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:844e165636711ef41f80b4103ed234181646b98a53c8f05da12ca5ca289134f6", size = 23029601, upload-time = "2026-02-23T00:20:12.161Z" }, + { url = "https://files.pythonhosted.org/packages/6d/a0/3cb6f4d2fb3e17428ad2880333cac878909ad1a89f678527b5328b93c1d4/scipy-1.17.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:158dd96d2207e21c966063e1635b1063cd7787b627b6f07305315dd73d9c679e", size = 33019667, upload-time = "2026-02-23T00:20:17.208Z" }, + { url = "https://files.pythonhosted.org/packages/f3/c3/2d834a5ac7bf3a0c806ad1508efc02dda3c8c61472a56132d7894c312dea/scipy-1.17.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:74cbb80d93260fe2ffa334efa24cb8f2f0f622a9b9febf8b483c0b865bfb3475", size = 35264159, upload-time = "2026-02-23T00:20:23.087Z" }, + { url = "https://files.pythonhosted.org/packages/4d/77/d3ed4becfdbd217c52062fafe35a72388d1bd82c2d0ba5ca19d6fcc93e11/scipy-1.17.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:dbc12c9f3d185f5c737d801da555fb74b3dcfa1a50b66a1a93e09190f41fab50", size = 35102771, upload-time = "2026-02-23T00:20:28.636Z" }, + { url = "https://files.pythonhosted.org/packages/bd/12/d19da97efde68ca1ee5538bb261d5d2c062f0c055575128f11a2730e3ac1/scipy-1.17.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:94055a11dfebe37c656e70317e1996dc197e1a15bbcc351bcdd4610e128fe1ca", size = 37665910, upload-time = "2026-02-23T00:20:34.743Z" }, + { url = "https://files.pythonhosted.org/packages/06/1c/1172a88d507a4baaf72c5a09bb6c018fe2ae0ab622e5830b703a46cc9e44/scipy-1.17.1-cp313-cp313t-win_amd64.whl", hash = "sha256:e30bdeaa5deed6bc27b4cc490823cd0347d7dae09119b8803ae576ea0ce52e4c", size = 36562980, upload-time = "2026-02-23T00:20:40.575Z" }, + { url = "https://files.pythonhosted.org/packages/70/b0/eb757336e5a76dfa7911f63252e3b7d1de00935d7705cf772db5b45ec238/scipy-1.17.1-cp313-cp313t-win_arm64.whl", hash = "sha256:a720477885a9d2411f94a93d16f9d89bad0f28ca23c3f8daa521e2dcc3f44d49", size = 24856543, upload-time = "2026-02-23T00:20:45.313Z" }, + { url = "https://files.pythonhosted.org/packages/cf/83/333afb452af6f0fd70414dc04f898647ee1423979ce02efa75c3b0f2c28e/scipy-1.17.1-cp314-cp314-macosx_10_14_x86_64.whl", hash = "sha256:a48a72c77a310327f6a3a920092fa2b8fd03d7deaa60f093038f22d98e096717", size = 31584510, upload-time = "2026-02-23T00:21:01.015Z" }, + { url = "https://files.pythonhosted.org/packages/ed/a6/d05a85fd51daeb2e4ea71d102f15b34fedca8e931af02594193ae4fd25f7/scipy-1.17.1-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:45abad819184f07240d8a696117a7aacd39787af9e0b719d00285549ed19a1e9", size = 28170131, upload-time = "2026-02-23T00:21:05.888Z" }, + { url = "https://files.pythonhosted.org/packages/db/7b/8624a203326675d7746a254083a187398090a179335b2e4a20e2ddc46e83/scipy-1.17.1-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:3fd1fcdab3ea951b610dc4cef356d416d5802991e7e32b5254828d342f7b7e0b", size = 20342032, upload-time = "2026-02-23T00:21:09.904Z" }, + { url = "https://files.pythonhosted.org/packages/c9/35/2c342897c00775d688d8ff3987aced3426858fd89d5a0e26e020b660b301/scipy-1.17.1-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:7bdf2da170b67fdf10bca777614b1c7d96ae3ca5794fd9587dce41eb2966e866", size = 22678766, upload-time = "2026-02-23T00:21:14.313Z" }, + { url = "https://files.pythonhosted.org/packages/ef/f2/7cdb8eb308a1a6ae1e19f945913c82c23c0c442a462a46480ce487fdc0ac/scipy-1.17.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:adb2642e060a6549c343603a3851ba76ef0b74cc8c079a9a58121c7ec9fe2350", size = 32957007, upload-time = "2026-02-23T00:21:19.663Z" }, + { url = "https://files.pythonhosted.org/packages/0b/2e/7eea398450457ecb54e18e9d10110993fa65561c4f3add5e8eccd2b9cd41/scipy-1.17.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:eee2cfda04c00a857206a4330f0c5e3e56535494e30ca445eb19ec624ae75118", size = 35221333, upload-time = "2026-02-23T00:21:25.278Z" }, + { url = "https://files.pythonhosted.org/packages/d9/77/5b8509d03b77f093a0d52e606d3c4f79e8b06d1d38c441dacb1e26cacf46/scipy-1.17.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d2650c1fb97e184d12d8ba010493ee7b322864f7d3d00d3f9bb97d9c21de4068", size = 35042066, upload-time = "2026-02-23T00:21:31.358Z" }, + { url = "https://files.pythonhosted.org/packages/f9/df/18f80fb99df40b4070328d5ae5c596f2f00fffb50167e31439e932f29e7d/scipy-1.17.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:08b900519463543aa604a06bec02461558a6e1cef8fdbb8098f77a48a83c8118", size = 37612763, upload-time = "2026-02-23T00:21:37.247Z" }, + { url = "https://files.pythonhosted.org/packages/4b/39/f0e8ea762a764a9dc52aa7dabcfad51a354819de1f0d4652b6a1122424d6/scipy-1.17.1-cp314-cp314-win_amd64.whl", hash = "sha256:3877ac408e14da24a6196de0ddcace62092bfc12a83823e92e49e40747e52c19", size = 37290984, upload-time = "2026-02-23T00:22:35.023Z" }, + { url = "https://files.pythonhosted.org/packages/7c/56/fe201e3b0f93d1a8bcf75d3379affd228a63d7e2d80ab45467a74b494947/scipy-1.17.1-cp314-cp314-win_arm64.whl", hash = "sha256:f8885db0bc2bffa59d5c1b72fad7a6a92d3e80e7257f967dd81abb553a90d293", size = 25192877, upload-time = "2026-02-23T00:22:39.798Z" }, + { url = "https://files.pythonhosted.org/packages/96/ad/f8c414e121f82e02d76f310f16db9899c4fcde36710329502a6b2a3c0392/scipy-1.17.1-cp314-cp314t-macosx_10_14_x86_64.whl", hash = "sha256:1cc682cea2ae55524432f3cdff9e9a3be743d52a7443d0cba9017c23c87ae2f6", size = 31949750, upload-time = "2026-02-23T00:21:42.289Z" }, + { url = "https://files.pythonhosted.org/packages/7c/b0/c741e8865d61b67c81e255f4f0a832846c064e426636cd7de84e74d209be/scipy-1.17.1-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:2040ad4d1795a0ae89bfc7e8429677f365d45aa9fd5e4587cf1ea737f927b4a1", size = 28585858, upload-time = "2026-02-23T00:21:47.706Z" }, + { url = "https://files.pythonhosted.org/packages/ed/1b/3985219c6177866628fa7c2595bfd23f193ceebbe472c98a08824b9466ff/scipy-1.17.1-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:131f5aaea57602008f9822e2115029b55d4b5f7c070287699fe45c661d051e39", size = 20757723, upload-time = "2026-02-23T00:21:52.039Z" }, + { url = "https://files.pythonhosted.org/packages/c0/19/2a04aa25050d656d6f7b9e7b685cc83d6957fb101665bfd9369ca6534563/scipy-1.17.1-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:9cdc1a2fcfd5c52cfb3045feb399f7b3ce822abdde3a193a6b9a60b3cb5854ca", size = 23043098, upload-time = "2026-02-23T00:21:56.185Z" }, + { url = "https://files.pythonhosted.org/packages/86/f1/3383beb9b5d0dbddd030335bf8a8b32d4317185efe495374f134d8be6cce/scipy-1.17.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e3dcd57ab780c741fde8dc68619de988b966db759a3c3152e8e9142c26295ad", size = 33030397, upload-time = "2026-02-23T00:22:01.404Z" }, + { url = "https://files.pythonhosted.org/packages/41/68/8f21e8a65a5a03f25a79165ec9d2b28c00e66dc80546cf5eb803aeeff35b/scipy-1.17.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a9956e4d4f4a301ebf6cde39850333a6b6110799d470dbbb1e25326ac447f52a", size = 35281163, upload-time = "2026-02-23T00:22:07.024Z" }, + { url = "https://files.pythonhosted.org/packages/84/8d/c8a5e19479554007a5632ed7529e665c315ae7492b4f946b0deb39870e39/scipy-1.17.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:a4328d245944d09fd639771de275701ccadf5f781ba0ff092ad141e017eccda4", size = 35116291, upload-time = "2026-02-23T00:22:12.585Z" }, + { url = "https://files.pythonhosted.org/packages/52/52/e57eceff0e342a1f50e274264ed47497b59e6a4e3118808ee58ddda7b74a/scipy-1.17.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a77cbd07b940d326d39a1d1b37817e2ee4d79cb30e7338f3d0cddffae70fcaa2", size = 37682317, upload-time = "2026-02-23T00:22:18.513Z" }, + { url = "https://files.pythonhosted.org/packages/11/2f/b29eafe4a3fbc3d6de9662b36e028d5f039e72d345e05c250e121a230dd4/scipy-1.17.1-cp314-cp314t-win_amd64.whl", hash = "sha256:eb092099205ef62cd1782b006658db09e2fed75bffcae7cc0d44052d8aa0f484", size = 37345327, upload-time = "2026-02-23T00:22:24.442Z" }, + { url = "https://files.pythonhosted.org/packages/07/39/338d9219c4e87f3e708f18857ecd24d22a0c3094752393319553096b98af/scipy-1.17.1-cp314-cp314t-win_arm64.whl", hash = "sha256:200e1050faffacc162be6a486a984a0497866ec54149a01270adc8a59b7c7d21", size = 25489165, upload-time = "2026-02-23T00:22:29.563Z" }, +] + [[package]] name = "setuptools" version = "82.0.1" @@ -3468,6 +4152,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" }, ] +[[package]] +name = "soupsieve" +version = "2.8.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7b/ae/2d9c981590ed9999a0d91755b47fc74f74de286b0f5cee14c9269041e6c4/soupsieve-2.8.3.tar.gz", hash = "sha256:3267f1eeea4251fb42728b6dfb746edc9acaffc4a45b27e19450b676586e8349", size = 118627, upload-time = "2026-01-20T04:27:02.457Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/46/2c/1462b1d0a634697ae9e55b3cecdcb64788e8b7d63f54d923fcd0bb140aed/soupsieve-2.8.3-py3-none-any.whl", hash = "sha256:ed64f2ba4eebeab06cc4962affce381647455978ffc1e36bb79a545b91f45a95", size = 37016, upload-time = "2026-01-20T04:27:01.012Z" }, +] + [[package]] name = "sqlalchemy" version = "2.0.48" @@ -3553,6 +4246,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d7/c1/eb8f9debc45d3b7918a32ab756658a0904732f75e555402972246b0b8e71/tenacity-9.1.4-py3-none-any.whl", hash = "sha256:6095a360c919085f28c6527de529e76a06ad89b23659fa881ae0649b867a9d55", size = 28926, upload-time = "2026-02-07T10:45:32.24Z" }, ] +[[package]] +name = "tifffile" +version = "2026.3.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c5/cb/2f6d79c7576e22c116352a801f4c3c8ace5957e9aced862012430b62e14f/tifffile-2026.3.3.tar.gz", hash = "sha256:d9a1266bed6f2ee1dd0abde2018a38b4f8b2935cb843df381d70ac4eac5458b7", size = 388745, upload-time = "2026-03-03T19:14:38.134Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1a/e4/e804505f87627cd8cdae9c010c47c4485fd8c1ce31a7dd0ab7fcc4707377/tifffile-2026.3.3-py3-none-any.whl", hash = "sha256:e8be15c94273113d31ecb7aa3a39822189dd11c4967e3cc88c178f1ad2fd1170", size = 243960, upload-time = "2026-03-03T19:14:35.808Z" }, +] + [[package]] name = "tiktoken" version = "0.12.0" @@ -3943,6 +4648,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6f/28/258ebab549c2bf3e64d2b0217b973467394a9cea8c42f70418ca2c5d0d2e/websockets-16.0-py3-none-any.whl", hash = "sha256:1637db62fad1dc833276dded54215f2c7fa46912301a24bd94d45d46a011ceec", size = 171598, upload-time = "2026-01-10T09:23:45.395Z" }, ] +[[package]] +name = "win32-setctime" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b3/8f/705086c9d734d3b663af0e9bb3d4de6578d08f46b1b101c2442fd9aecaa2/win32_setctime-1.2.0.tar.gz", hash = "sha256:ae1fdf948f5640aae05c511ade119313fb6a30d7eabe25fef9764dca5873c4c0", size = 4867, upload-time = "2024-12-07T15:28:28.314Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e1/07/c6fe3ad3e685340704d314d765b7912993bcb8dc198f0e7a89382d37974b/win32_setctime-1.2.0-py3-none-any.whl", hash = "sha256:95d644c4e708aba81dc3704a116d8cbc974d70b3bdb8be1d150e36be6e9d1390", size = 4083, upload-time = "2024-12-07T15:28:26.465Z" }, +] + [[package]] name = "wrapt" version = "2.1.2"