chore: 删除旧的 algorithm 目录

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-09 10:27:25 +08:00
parent 797518ec76
commit 0d4fd6b425
5 changed files with 0 additions and 360 deletions

View File

@@ -1,112 +0,0 @@
# Algorithm Service
Python 算法服务提供文档解析、Embedding、LLM 调用等功能。
## 环境要求
- Python 3.9+
- FastAPI
- Uvicorn
## 安装依赖
```bash
pip install -r requirements.txt
```
## 运行服务
```bash
# 开发模式
uvicorn main:app --reload --port 8081
# 生产模式
uvicorn main:app --host 0.0.0.0 --port 8081
```
## 接口列表
### 1. 文档解析
**请求**
```
POST /parse
Content-Type: application/json
```
| 参数 | 类型 | 必填 | 说明 |
|------|------|------|------|
| file_url | String | 是 | 文件 URL |
| engine | String | 是 | 解析引擎markitdown / docling |
| docling_url | String | 否 | Docling 服务 URL |
**响应**
```json
{
"success": true,
"content": "解析后的文本内容...",
"chunks": ["chunk1", "chunk2"],
"total_pages": 10,
"metadata": {
"filename": "document.pdf",
"file_size": 1234567
}
}
```
### 2. 生成 Embedding
**请求**
```
POST /embedding
Content-Type: application/json
```
| 参数 | 类型 | 必填 | 说明 |
|------|------|------|------|
| input | String/Array | 是 | 要 embedding 的文本 |
| model | String | 是 | 模型名称 |
**响应**
```json
{
"success": true,
"embeddings": [[0.1, 0.2, ...], [0.3, 0.4, ...]],
"model": "text-embedding-3-small"
}
```
### 3. LLM 对话
**请求**
```
POST /chat
Content-Type: application/json
```
| 参数 | 类型 | 必填 | 说明 |
|------|------|------|------|
| messages | Array | 是 | 消息列表 |
| model | String | 是 | 模型名称 |
| temperature | Float | 否 | 温度参数 |
**响应**
```json
{
"success": true,
"message": {
"role": "assistant",
"content": "回复内容..."
},
"usage": {
"prompt_tokens": 100,
"completion_tokens": 50
}
}
```

View File

@@ -1,175 +0,0 @@
"""
Algorithm Service - 文档解析、Embedding、LLM 调用服务
"""
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import Optional, List, Dict, Any
import requests
import os
import json
app = FastAPI(title="Algorithm Service")
# ========== Models ==========
class ParseRequest(BaseModel):
file_url: str
engine: str # markitdown / docling
docling_url: Optional[str] = None
class EmbeddingRequest(BaseModel):
input: str | List[str]
model: str
class ChatMessage(BaseModel):
role: str
content: str
class ChatRequest(BaseModel):
messages: List[ChatMessage]
model: str
temperature: Optional[float] = 0.7
api_key: Optional[str] = None
base_url: Optional[str] = None
# ========== 文档解析 ==========
@app.post("/parse")
async def parse_document(req: ParseRequest):
"""解析文档,支持 markitdown 和 docling"""
try:
if req.engine == "markitdown":
return await parse_with_markitdown(req.file_url)
elif req.engine == "docling":
return await parse_with_docling(req.file_url, req.docling_url)
else:
raise HTTPException(status_code=400, detail=f"Unsupported engine: {req.engine}")
except Exception as e:
return {"success": False, "error": str(e)}
async def parse_with_markitdown(file_url: str) -> Dict[str, Any]:
"""使用 markitdown 解析文档"""
try:
from markitdown import MarkItDown
md = MarkItDown()
result = md.convert(file_url)
# 简单分块(按段落分割)
content = result.text_content if hasattr(result, 'text_content') else str(result)
chunks = [c.strip() for c in content.split('\n\n') if c.strip()]
return {
"success": True,
"content": content,
"chunks": chunks[:100], # 限制 chunk 数量
"total_pages": 1,
"metadata": {
"filename": file_url.split('/')[-1]
}
}
except ImportError:
raise HTTPException(status_code=500, detail="markitdown not installed. Run: pip install markitdown")
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to parse with markitdown: {str(e)}")
async def parse_with_docling(file_url: str, docling_url: Optional[str] = None) -> Dict[str, Any]:
"""使用 docling 解析文档"""
if not docling_url:
raise HTTPException(status_code=400, detail="docling_url is required for docling engine")
try:
# 调用 docling 服务
response = requests.post(
f"{docling_url}/convert",
json={"url": file_url},
timeout=60
)
if response.status_code != 200:
raise HTTPException(status_code=500, detail=f"Docling service error: {response.text}")
result = response.json()
content = result.get("text", "")
chunks = [c.strip() for c in content.split('\n\n') if c.strip()]
return {
"success": True,
"content": content,
"chunks": chunks[:100],
"total_pages": result.get("num_pages", 1),
"metadata": {
"filename": file_url.split('/')[-1]
}
}
except requests.exceptions.RequestException as e:
raise HTTPException(status_code=500, detail=f"Failed to connect docling service: {str(e)}")
# ========== Embedding ==========
@app.post("/embedding")
async def generate_embedding(req: EmbeddingRequest):
"""生成 Embedding"""
try:
# TODO: 根据不同 provider 调用不同的 embedding 服务
# 目前返回模拟数据
texts = [req.input] if isinstance(req.input, str) else req.input
# 模拟 embedding 返回
embeddings = [[0.1] * 1536 for _ in texts] # 1536 维向量
return {
"success": True,
"embeddings": embeddings,
"model": req.model
}
except Exception as e:
return {"success": False, "error": str(e)}
# ========== Chat ==========
@app.post("/chat")
async def chat(req: ChatRequest):
"""LLM 对话"""
try:
# TODO: 根据 model 和 base_url 调用实际的 LLM 服务
# 目前返回模拟数据
last_message = req.messages[-1].content if req.messages else ""
return {
"success": True,
"message": {
"role": "assistant",
"content": f"Echo: {last_message}"
},
"usage": {
"prompt_tokens": len(last_message),
"completion_tokens": 10
}
}
except Exception as e:
return {"success": False, "error": str(e)}
# ========== Health Check ==========
@app.get("/health")
async def health():
return {"status": "ok"}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8081)

View File

@@ -1,17 +0,0 @@
# FastAPI
fastapi>=0.100.0
uvicorn[standard]>=0.23.0
# HTTP 请求
requests>=2.31.0
# 文档解析
markitdown>=0.0.1
# Pydantic
pydantic>=2.0.0
# 可选:其他解析库
# docling>=0.1.0
# pypdf>=3.0.0
# python-docx>=0.8.11

View File

@@ -1,30 +0,0 @@
@echo off
chcp 65001 >nul
title Algorithm Service
echo ========================================
echo 启动 Algorithm 服务
echo ========================================
cd /d %~dp0
echo.
echo 检查虚拟环境...
if not exist venv (
echo [INFO] 创建虚拟环境...
python -m venv venv
)
echo.
echo 安装/更新依赖...
call venv\Scripts\pip install -r requirements.txt -q
echo.
echo 启动服务...
echo 访问 http://localhost:8081/docs 查看 API 文档
echo 按 Ctrl+C 停止服务
echo.
call venv\Scripts\uvicorn main:app --reload --port 8081 --host 0.0.0.0
pause

View File

@@ -1,26 +0,0 @@
#!/bin/bash
echo "========================================"
echo " 启动 Algorithm 服务"
echo "========================================"
cd "$(dirname "$0")"
# 检查虚拟环境
if [ ! -d "venv" ]; then
echo "[INFO] 创建虚拟环境..."
python3 -m venv venv
fi
echo ""
echo "安装/更新依赖..."
source venv/bin/activate
pip install -r requirements.txt -q
echo ""
echo "启动服务..."
echo "访问 http://localhost:8081/docs 查看 API 文档"
echo "按 Ctrl+C 停止服务"
echo ""
uvicorn main:app --reload --port 8081 --host 0.0.0.0