chore: 删除旧的 algorithm 目录

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-09 10:27:25 +08:00
parent 797518ec76
commit 0d4fd6b425
5 changed files with 0 additions and 360 deletions
--- a/algorithm/README.md
+++ b/algorithm/README.md
@@ -1,112 +0,0 @@
 # Algorithm Service
 Python 算法服务，提供文档解析、Embedding、LLM 调用等功能。
 ## 环境要求
 - Python 3.9+
 - FastAPI
 - Uvicorn
 ## 安装依赖
 ```bash
 pip install -r requirements.txt
 ```
 ## 运行服务
 ```bash
 # 开发模式
 uvicorn main:app --reload --port 8081
 # 生产模式
 uvicorn main:app --host 0.0.0.0 --port 8081
 ```
 ## 接口列表
 ### 1. 文档解析
 **请求**
 ```
 POST /parse
 Content-Type: application/json
 ```
 | 参数 | 类型 | 必填 | 说明 |
 |------|------|------|------|
 | file_url | String | 是 | 文件 URL |
 | engine | String | 是 | 解析引擎：markitdown / docling |
 | docling_url | String | 否 | Docling 服务 URL |
 **响应**
 ```json
 {
  "success": true,
  "content": "解析后的文本内容...",
  "chunks": ["chunk1", "chunk2"],
  "total_pages": 10,
  "metadata": {
    "filename": "document.pdf",
    "file_size": 1234567
  }
 }
 ```
 ### 2. 生成 Embedding
 **请求**
 ```
 POST /embedding
 Content-Type: application/json
 ```
 | 参数 | 类型 | 必填 | 说明 |
 |------|------|------|------|
 | input | String/Array | 是 | 要 embedding 的文本 |
 | model | String | 是 | 模型名称 |
 **响应**
 ```json
 {
  "success": true,
  "embeddings": [[0.1, 0.2, ...], [0.3, 0.4, ...]],
  "model": "text-embedding-3-small"
 }
 ```
 ### 3. LLM 对话
 **请求**
 ```
 POST /chat
 Content-Type: application/json
 ```
 | 参数 | 类型 | 必填 | 说明 |
 |------|------|------|------|
 | messages | Array | 是 | 消息列表 |
 | model | String | 是 | 模型名称 |
 | temperature | Float | 否 | 温度参数 |
 **响应**
 ```json
 {
  "success": true,
  "message": {
    "role": "assistant",
    "content": "回复内容..."
  },
  "usage": {
    "prompt_tokens": 100,
    "completion_tokens": 50
  }
 }
 ```
--- a/algorithm/main.py
+++ b/algorithm/main.py
@@ -1,175 +0,0 @@
 """
 Algorithm Service - 文档解析、Embedding、LLM 调用服务
 """
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from typing import Optional, List, Dict, Any
 import requests
 import os
 import json
 app = FastAPI(title="Algorithm Service")
 # ========== Models ==========
 class ParseRequest(BaseModel):
    file_url: str
    engine: str  # markitdown / docling
    docling_url: Optional[str] = None
 class EmbeddingRequest(BaseModel):
    input: str | List[str]
    model: str
 class ChatMessage(BaseModel):
    role: str
    content: str
 class ChatRequest(BaseModel):
    messages: List[ChatMessage]
    model: str
    temperature: Optional[float] = 0.7
    api_key: Optional[str] = None
    base_url: Optional[str] = None
 # ========== 文档解析 ==========
@app.post("/parse")
 async def parse_document(req: ParseRequest):
    """解析文档，支持 markitdown 和 docling"""
    try:
        if req.engine == "markitdown":
            return await parse_with_markitdown(req.file_url)
        elif req.engine == "docling":
            return await parse_with_docling(req.file_url, req.docling_url)
        else:
            raise HTTPException(status_code=400, detail=f"Unsupported engine: {req.engine}")
    except Exception as e:
        return {"success": False, "error": str(e)}
 async def parse_with_markitdown(file_url: str) -> Dict[str, Any]:
    """使用 markitdown 解析文档"""
    try:
        from markitdown import MarkItDown
        md = MarkItDown()
        result = md.convert(file_url)
        # 简单分块（按段落分割）
        content = result.text_content if hasattr(result, 'text_content') else str(result)
        chunks = [c.strip() for c in content.split('\n\n') if c.strip()]
        return {
            "success": True,
            "content": content,
            "chunks": chunks[:100],  # 限制 chunk 数量
            "total_pages": 1,
            "metadata": {
                "filename": file_url.split('/')[-1]
            }
        }
    except ImportError:
        raise HTTPException(status_code=500, detail="markitdown not installed. Run: pip install markitdown")
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Failed to parse with markitdown: {str(e)}")
 async def parse_with_docling(file_url: str, docling_url: Optional[str] = None) -> Dict[str, Any]:
    """使用 docling 解析文档"""
    if not docling_url:
        raise HTTPException(status_code=400, detail="docling_url is required for docling engine")
    try:
        # 调用 docling 服务
        response = requests.post(
            f"{docling_url}/convert",
            json={"url": file_url},
            timeout=60
        )
        if response.status_code != 200:
            raise HTTPException(status_code=500, detail=f"Docling service error: {response.text}")
        result = response.json()
        content = result.get("text", "")
        chunks = [c.strip() for c in content.split('\n\n') if c.strip()]
        return {
            "success": True,
            "content": content,
            "chunks": chunks[:100],
            "total_pages": result.get("num_pages", 1),
            "metadata": {
                "filename": file_url.split('/')[-1]
            }
        }
    except requests.exceptions.RequestException as e:
        raise HTTPException(status_code=500, detail=f"Failed to connect docling service: {str(e)}")
 # ========== Embedding ==========
@app.post("/embedding")
 async def generate_embedding(req: EmbeddingRequest):
    """生成 Embedding"""
    try:
        # TODO: 根据不同 provider 调用不同的 embedding 服务
        # 目前返回模拟数据
        texts = [req.input] if isinstance(req.input, str) else req.input
        # 模拟 embedding 返回
        embeddings = [[0.1] * 1536 for _ in texts]  # 1536 维向量
        return {
            "success": True,
            "embeddings": embeddings,
            "model": req.model
        }
    except Exception as e:
        return {"success": False, "error": str(e)}
 # ========== Chat ==========
@app.post("/chat")
 async def chat(req: ChatRequest):
    """LLM 对话"""
    try:
        # TODO: 根据 model 和 base_url 调用实际的 LLM 服务
        # 目前返回模拟数据
        last_message = req.messages[-1].content if req.messages else ""
        return {
            "success": True,
            "message": {
                "role": "assistant",
                "content": f"Echo: {last_message}"
            },
            "usage": {
                "prompt_tokens": len(last_message),
                "completion_tokens": 10
            }
        }
    except Exception as e:
        return {"success": False, "error": str(e)}
 # ========== Health Check ==========
@app.get("/health")
 async def health():
    return {"status": "ok"}
 if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8081)
--- a/algorithm/requirements.txt
+++ b/algorithm/requirements.txt
@@ -1,17 +0,0 @@
 # FastAPI
 fastapi>=0.100.0
 uvicorn[standard]>=0.23.0
 # HTTP 请求
 requests>=2.31.0
 # 文档解析
 markitdown>=0.0.1
 # Pydantic
 pydantic>=2.0.0
 # 可选：其他解析库
 # docling>=0.1.0
 # pypdf>=3.0.0
 # python-docx>=0.8.11
--- a/algorithm/start.bat
+++ b/algorithm/start.bat
@@ -1,30 +0,0 @@
@echo off
 chcp 65001 >nul
 title Algorithm Service
 echo ========================================
 echo   启动 Algorithm 服务
 echo ========================================
 cd /d %~dp0
 echo.
 echo 检查虚拟环境...
 if not exist venv (
    echo [INFO] 创建虚拟环境...
    python -m venv venv
 )
 echo.
 echo 安装/更新依赖...
 call venv\Scripts\pip install -r requirements.txt -q
 echo.
 echo 启动服务...
 echo 访问 http://localhost:8081/docs 查看 API 文档
 echo 按 Ctrl+C 停止服务
 echo.
 call venv\Scripts\uvicorn main:app --reload --port 8081 --host 0.0.0.0
 pause
--- a/algorithm/start.sh
+++ b/algorithm/start.sh
@@ -1,26 +0,0 @@
 #!/bin/bash
 echo "========================================"
 echo "  启动 Algorithm 服务"
 echo "========================================"
 cd "$(dirname "$0")"
 # 检查虚拟环境
 if [ ! -d "venv" ]; then
    echo "[INFO] 创建虚拟环境..."
    python3 -m venv venv
 fi
 echo ""
 echo "安装/更新依赖..."
 source venv/bin/activate
 pip install -r requirements.txt -q
 echo ""
 echo "启动服务..."
 echo "访问 http://localhost:8081/docs 查看 API 文档"
 echo "按 Ctrl+C 停止服务"
 echo ""
 uvicorn main:app --reload --port 8081 --host 0.0.0.0