Files
YG-Rules/app/utils/llm.py

91 lines
3.2 KiB
Python
Raw Normal View History

2026-06-10 19:15:24 +08:00
"""LLM 调用封装。"""
from __future__ import annotations
import os
import re
import time
from pathlib import Path
from typing import Any
import requests
from dotenv import load_dotenv
class LLMError(RuntimeError):
"""LLM 请求失败。"""
def strip_thinking(text: str) -> str:
"""移除模型返回中的 think 块。"""
if not text:
return ""
cleaned = re.sub(r"<think(?:ing)?>.*?</think(?:ing)?>", "", text, flags=re.DOTALL | re.IGNORECASE).strip()
unclosed = re.search(r"<think(?:ing)?>", cleaned, flags=re.IGNORECASE)
if not unclosed:
return cleaned
before = cleaned[: unclosed.start()].strip()
after = cleaned[unclosed.end():]
json_fence = re.search(r"```(?:json)?\s*\{.*", after, flags=re.DOTALL | re.IGNORECASE)
if json_fence:
return (before + "\n" + json_fence.group(0).strip()).strip()
json_start = after.find("{")
if json_start >= 0:
return (before + "\n" + after[json_start:].strip()).strip()
return before
class LLMClient:
def __init__(
self,
api_key: str | None = None,
base_url: str | None = None,
model: str | None = None,
retry_attempts: int = 3,
retry_delay_seconds: float = 2,
timeout: int = 120,
) -> None:
self._load_project_env()
self.api_key = api_key or os.environ.get("OPENAI_API_KEY", "")
self.base_url = (base_url or os.environ.get("OPENAI_BASE_URL") or "https://api.openai.com/v1").rstrip("/")
self.model = model or os.environ.get("OPENAI_MODEL", "gpt-4o-mini")
self.retry_attempts = retry_attempts
self.retry_delay_seconds = retry_delay_seconds
self.timeout = timeout
def _load_project_env(self) -> None:
env_path = Path(os.getcwd()) / ".env"
if env_path.exists():
load_dotenv(dotenv_path=env_path, override=False)
def chat(self, messages: list[dict[str, str]], **kwargs: Any) -> str:
url = f"{self.base_url}/chat/completions"
payload = {"model": self.model, "messages": messages, **kwargs}
headers = {"Content-Type": "application/json"}
if self.api_key:
headers["Authorization"] = f"Bearer {self.api_key}"
last_error: Exception | None = None
for attempt in range(self.retry_attempts):
try:
response = requests.post(url, json=payload, headers=headers, timeout=self.timeout)
response.raise_for_status()
content = response.json()["choices"][0]["message"]["content"]
return strip_thinking(content)
except requests.exceptions.HTTPError as exc:
last_error = exc
status = getattr(getattr(exc, "response", None), "status_code", None)
if status in {429, 529, 500, 502, 503, 504} and attempt < self.retry_attempts - 1:
time.sleep(self.retry_delay_seconds)
continue
break
except Exception as exc:
last_error = exc
if attempt < self.retry_attempts - 1:
time.sleep(self.retry_delay_seconds)
continue
break
raise LLMError(f"LLM 请求失败: {last_error}")