feat(backend): 更新核心模块和文件处理

- 更新配置模块 (config.py)
- 更新数据库连接 (database.py)
- 更新主应用入口 (main.py)
- 更新数据模型 (models.py)
- 更新基础 Schema (base.py)
- 更新文件处理器 (docx, excel, pdf)
- 更新 Dockerfile

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Developer
2026-03-17 17:30:11 +08:00
parent db11429290
commit 47d1da7cea
10 changed files with 393 additions and 189 deletions

View File

@@ -1,8 +1,9 @@
"""
DOCX Text Extractor
"""
import asyncio
from typing import Dict
from docx import Document
from typing import Dict, List
class DOCXProcessor:
@@ -26,6 +27,12 @@ class DOCXProcessor:
return "\n\n".join(text_parts)
async def extract_text_async(self, file_path: str) -> str:
"""Extract all text from DOCX asynchronously"""
return await asyncio.get_event_loop().run_in_executor(
None, self.extract_text, file_path
)
def extract_with_metadata(self, file_path: str) -> Dict:
"""Extract text with DOCX metadata"""
doc = Document(file_path)
@@ -46,8 +53,14 @@ class DOCXProcessor:
return result
async def extract_with_metadata_async(self, file_path: str) -> Dict:
"""Extract with metadata asynchronously"""
return await asyncio.get_event_loop().run_in_executor(
None, self.extract_with_metadata, file_path
)
def process_docx(file_path: str) -> str:
async def process_docx(file_path: str) -> str:
"""Process DOCX file and return text"""
processor = DOCXProcessor()
return processor.extract_text(file_path)
return await processor.extract_text_async(file_path)