import json from io import BytesIO import pytest from httpx import ASGITransport, AsyncClient from sqlalchemy import select from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine import app.models # noqa: F401 from app.database import Base, get_db from app.main import app from app.models.document import Document, DocumentChunk from app.models.user import User from app.routers.auth import get_current_user from app.services.auth_service import get_password_hash from app.services.document_service import DocumentService from starlette.datastructures import UploadFile @pytest.fixture async def document_router_env(tmp_path): db_path = tmp_path / 'test_documents_router.db' engine = create_async_engine(f"sqlite+aiosqlite:///{db_path}", future=True) session_factory = async_sessionmaker(engine, expire_on_commit=False) async with engine.begin() as conn: await conn.run_sync(Base.metadata.create_all) async with session_factory() as session: user = User( email='docs@example.com', hashed_password=get_password_hash('secret123'), full_name='Docs Tester', ) session.add(user) await session.flush() document = Document( id='doc-1', user_id=user.id, title='Uploaded spec', filename='uploaded-spec.docx', file_type='docx', file_size=2048, file_path=str(tmp_path / 'uploaded-spec.docx'), summary='spec summary', chunk_count=3, is_indexed=True, ingestion_status='ready', normalized_content='# Uploaded spec\n\nnormalized body', normalized_format='structured_markdown', ) session.add(document) await session.flush() session.add_all([ DocumentChunk( id='chunk-1', document_id=document.id, chunk_index=0, content='original chunk content', metadata_=json.dumps({'content_type': 'paragraph', 'section_title': 'Intro'}), ), DocumentChunk( id='chunk-2', document_id=document.id, chunk_index=1, content='second chunk content', metadata_=json.dumps({'content_type': 'paragraph', 'section_title': 'Details'}), ), ]) await session.commit() await session.refresh(user) async def override_get_db(): async with session_factory() as session: yield session async def override_get_current_user(): return user app.dependency_overrides[get_db] = override_get_db app.dependency_overrides[get_current_user] = override_get_current_user try: yield finally: app.dependency_overrides.clear() await engine.dispose() @pytest.mark.asyncio async def test_list_documents_returns_serializable_document_payload(document_router_env): transport = ASGITransport(app=app) async with AsyncClient(transport=transport, base_url='http://testserver') as client: response = await client.get('/api/documents') assert response.status_code == 200 payload = response.json() assert len(payload) == 1 assert payload[0]['title'] == 'Uploaded spec' assert payload[0]['ingestion_status'] == 'ready' @pytest.mark.asyncio async def test_get_document_chunks_returns_serializable_chunk_payload(document_router_env): transport = ASGITransport(app=app) async with AsyncClient(transport=transport, base_url='http://testserver') as client: response = await client.get('/api/documents/doc-1/chunks') assert response.status_code == 200 payload = response.json() assert [chunk['id'] for chunk in payload] == ['chunk-1', 'chunk-2'] assert payload[0]['content'] == 'original chunk content' @pytest.mark.asyncio async def test_update_document_chunk_persists_content_and_reindexes_existing_chunks(document_router_env): transport = ASGITransport(app=app) async with AsyncClient(transport=transport, base_url='http://testserver') as client: response = await client.put('/api/documents/doc-1/chunks/chunk-1', json={'content': 'edited chunk content'}) assert response.status_code == 200 payload = response.json() assert payload['id'] == 'chunk-1' assert payload['content'] == 'edited chunk content' async for session in app.dependency_overrides[get_db](): chunk_result = await session.execute(select(DocumentChunk).where(DocumentChunk.id == 'chunk-1')) updated_chunk = chunk_result.scalar_one() document_result = await session.execute(select(Document).where(Document.id == 'doc-1')) updated_document = document_result.scalar_one() assert updated_chunk.content == 'edited chunk content' assert updated_document.ingestion_status == 'ready' assert updated_document.indexed_at is not None @pytest.mark.asyncio async def test_get_document_content_prefers_normalized_content(document_router_env): transport = ASGITransport(app=app) async with AsyncClient(transport=transport, base_url='http://testserver') as client: response = await client.get('/api/documents/doc-1/content') assert response.status_code == 200 assert response.json() == {'content': '# Uploaded spec\n\nnormalized body'} @pytest.mark.asyncio async def test_upload_document_returns_400_for_unsupported_file_type(document_router_env): transport = ASGITransport(app=app) async with AsyncClient(transport=transport, base_url='http://testserver') as client: response = await client.post( '/api/documents/upload', files={'file': ('payload.exe', BytesIO(b'bad'), 'application/octet-stream')}, ) assert response.status_code == 400 assert response.json()['detail'] == '不支持的文件类型: .exe' @pytest.mark.asyncio async def test_upload_document_returns_400_for_missing_parser_dependency(document_router_env, monkeypatch): async def raise_missing_dependency(self, file_path: str, ext: str): raise ValueError('DOCX 解析依赖缺失: python-docx') monkeypatch.setattr(DocumentService, '_parse_document', raise_missing_dependency) transport = ASGITransport(app=app) async with AsyncClient(transport=transport, base_url='http://testserver') as client: response = await client.post( '/api/documents/upload', files={'file': ('payload.docx', BytesIO(b'bad'), 'application/vnd.openxmlformats-officedocument.wordprocessingml.document')}, ) assert response.status_code == 400 assert response.json()['detail'] == 'DOCX 解析依赖缺失: python-docx' @pytest.mark.asyncio async def test_upload_document_returns_400_for_missing_mineru_dependency(document_router_env, monkeypatch): async def raise_missing_mineru(self, file_path: str, ext: str): raise ValueError('PDF 解析依赖缺失: mineru') monkeypatch.setattr(DocumentService, '_parse_document', raise_missing_mineru) transport = ASGITransport(app=app) async with AsyncClient(transport=transport, base_url='http://testserver') as client: response = await client.post( '/api/documents/upload', files={'file': ('payload.pdf', BytesIO(b'%PDF-1.4 bad'), 'application/pdf')}, ) assert response.status_code == 400 assert response.json()['detail'] == 'PDF 解析依赖缺失: mineru' @pytest.mark.asyncio async def test_upload_document_returns_success_payload_for_pdf(document_router_env, monkeypatch): async def fake_upload_document(self, user_id: str, file, folder_id=None): return Document( id='pdf-doc-1', user_id=user_id, title='PDF Spec', filename='payload.pdf', file_type='pdf', file_size=2048, file_path='fake/path/payload.pdf', chunk_count=4, ingestion_status='uploaded', normalized_content='# PDF Spec\n\nBody', normalized_format='structured_markdown', ) monkeypatch.setattr(DocumentService, 'upload_document', fake_upload_document) transport = ASGITransport(app=app) async with AsyncClient(transport=transport, base_url='http://testserver') as client: response = await client.post( '/api/documents/upload', files={'file': ('payload.pdf', BytesIO(b'%PDF-1.4 fake'), 'application/pdf')}, ) assert response.status_code == 201 assert response.json() == { 'id': 'pdf-doc-1', 'title': 'PDF Spec', 'chunk_count': 4, 'status': '上传成功,正在索引...', }