feat(backend): 更新核心模块和文件处理

- 更新配置模块 (config.py)
- 更新数据库连接 (database.py)
- 更新主应用入口 (main.py)
- 更新数据模型 (models.py)
- 更新基础 Schema (base.py)
- 更新文件处理器 (docx, excel, pdf)
- 更新 Dockerfile

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Developer
2026-03-17 17:30:11 +08:00
parent db11429290
commit 47d1da7cea
10 changed files with 393 additions and 189 deletions

View File

@@ -1,8 +1,9 @@
"""
Excel/CSV Text Extractor
"""
import pandas as pd
import asyncio
from typing import Dict, List
import pandas as pd
class ExcelProcessor:
@@ -13,6 +14,12 @@ class ExcelProcessor:
df = pd.read_csv(file_path)
return self._dataframe_to_text(df)
async def extract_csv_async(self, file_path: str) -> str:
"""Extract CSV asynchronously"""
return await asyncio.get_event_loop().run_in_executor(
None, self.extract_csv, file_path
)
def extract_excel(self, file_path: str, sheet_name: str = None) -> str:
"""Extract text from Excel file"""
if sheet_name:
@@ -27,6 +34,12 @@ class ExcelProcessor:
text_parts.append(self._dataframe_to_text(df))
return "\n\n".join(text_parts)
async def extract_excel_async(self, file_path: str, sheet_name: str = None) -> str:
"""Extract Excel asynchronously"""
return await asyncio.get_event_loop().run_in_executor(
None, self.extract_excel, file_path, sheet_name
)
def _dataframe_to_text(self, df: pd.DataFrame) -> str:
"""Convert DataFrame to readable text"""
text_parts = []
@@ -48,19 +61,25 @@ class ExcelProcessor:
sheets = pd.read_excel(file_path, sheet_name=None)
return {name: self._dataframe_to_text(df) for name, df in sheets.items()}
async def extract_all_sheets_async(self, file_path: str) -> Dict[str, str]:
"""Extract all sheets asynchronously"""
return await asyncio.get_event_loop().run_in_executor(
None, self.extract_all_sheets, file_path
)
def get_sheet_names(self, file_path: str) -> List[str]:
"""Get all sheet names from Excel file"""
xl = pd.ExcelFile(file_path)
return xl.sheet_names
def process_csv(file_path: str) -> str:
async def process_csv(file_path: str) -> str:
"""Process CSV file and return text"""
processor = ExcelProcessor()
return processor.extract_csv(file_path)
return await processor.extract_csv_async(file_path)
def process_excel(file_path: str) -> str:
async def process_excel(file_path: str) -> str:
"""Process Excel file and return text"""
processor = ExcelProcessor()
return processor.extract_excel(file_path)
return await processor.extract_excel_async(file_path)