feat: 报销预审会话状态管理与工作台交互增强

- 新增差旅报销会话状态管理与对话模型重构
- 增强风险观测服务与运行时聊天上下文作用域
- 优化工作台图标资源、助理意图识别与摘要工具
- 完善报销创建视图样式与差旅详情页标准调整交互
- 补充风险观测、运行时聊天与报销端点测试覆盖
This commit is contained in:
caoxiaozhu
2026-06-04 11:03:29 +08:00
parent 87da5df91b
commit 1cbf3fee44
60 changed files with 4156 additions and 393 deletions

View File

@@ -1,5 +1,6 @@
from __future__ import annotations
import json
from dataclasses import dataclass
from http import HTTPStatus
from time import monotonic, sleep
@@ -61,6 +62,23 @@ class RuntimeChatResult:
return [item.model_dump() for item in self.calls]
@dataclass(slots=True)
class RuntimeChatToolCall:
name: str
arguments: dict[str, Any]
call_id: str | None = None
raw_arguments: str = ""
@dataclass(slots=True)
class RuntimeToolCallResult:
tool_call: RuntimeChatToolCall | None
calls: list[RuntimeChatCallTrace]
def calls_as_dicts(self) -> list[dict[str, Any]]:
return [item.model_dump() for item in self.calls]
class RuntimeChatService:
def __init__(self, db: Session) -> None:
self.db = db
@@ -208,6 +226,131 @@ class RuntimeChatService:
return RuntimeChatResult(None, calls)
def complete_with_tool_call(
self,
messages: list[dict[str, Any]],
*,
tools: list[dict[str, Any]],
tool_choice: dict[str, Any] | str | None = None,
slot_priority: tuple[str, ...] = ("main", "backup"),
max_tokens: int = 1200,
temperature: float = 0.1,
timeout_seconds: int | None = None,
slot_timeouts: dict[str, int] | None = None,
max_attempts: int | None = None,
) -> RuntimeToolCallResult:
configs: list[dict[str, str]] = []
calls: list[RuntimeChatCallTrace] = []
for slot in slot_priority:
config = self._load_chat_slot(slot)
if config is None:
calls.append(
RuntimeChatCallTrace(
slot=slot,
provider="",
model="",
attempt=0,
status="skipped",
skipped_reason="not_configured",
)
)
continue
configs.append(config)
if not configs:
return RuntimeToolCallResult(None, calls)
resolved_timeout_seconds = timeout_seconds or DEFAULT_RUNTIME_CHAT_TIMEOUT_SECONDS
resolved_slot_timeouts = dict(slot_timeouts or {})
resolved_max_attempts = max_attempts or DEFAULT_RUNTIME_CHAT_RETRY_ATTEMPTS
for attempt in range(1, resolved_max_attempts + 1):
for config in configs:
cache_key = self._build_slot_cache_key(config)
if _slot_failure_until.get(cache_key, 0.0) > monotonic():
logger.info(
"Skip runtime chat tool slot=%s provider=%s because it is in cooldown",
config["slot"],
config["provider"],
)
calls.append(
RuntimeChatCallTrace(
slot=config["slot"],
provider=config["provider"],
model=config["model"],
attempt=attempt,
status="skipped",
skipped_reason="cooldown",
)
)
continue
started = monotonic()
try:
tool_call = self._request_chat_tool_call(
config,
messages,
tools=tools,
tool_choice=tool_choice,
max_tokens=max_tokens,
temperature=temperature,
timeout_seconds=resolved_slot_timeouts.get(
config["slot"],
resolved_timeout_seconds,
),
)
duration_ms = int((monotonic() - started) * 1000)
if tool_call is not None:
_slot_failure_until.pop(cache_key, None)
calls.append(
RuntimeChatCallTrace(
slot=config["slot"],
provider=config["provider"],
model=config["model"],
attempt=attempt,
status="succeeded",
duration_ms=duration_ms,
)
)
return RuntimeToolCallResult(tool_call, calls)
calls.append(
RuntimeChatCallTrace(
slot=config["slot"],
provider=config["provider"],
model=config["model"],
attempt=attempt,
status="empty",
duration_ms=duration_ms,
error_message="模型未返回工具调用。",
)
)
except Exception as exc:
duration_ms = int((monotonic() - started) * 1000)
_slot_failure_until[cache_key] = (
monotonic() + DEFAULT_RUNTIME_CHAT_FAILURE_COOLDOWN_SECONDS
)
calls.append(
RuntimeChatCallTrace(
slot=config["slot"],
provider=config["provider"],
model=config["model"],
attempt=attempt,
status="failed",
duration_ms=duration_ms,
error_message=str(exc),
)
)
logger.warning(
"Runtime chat tool request failed slot=%s provider=%s attempt=%s/%s: %s",
config["slot"],
config["provider"],
attempt,
resolved_max_attempts,
exc,
)
if attempt < resolved_max_attempts:
sleep(DEFAULT_RUNTIME_CHAT_RETRY_DELAY_SECONDS)
return RuntimeToolCallResult(None, calls)
@staticmethod
def _build_slot_cache_key(config: dict[str, str]) -> str:
return "|".join(
@@ -295,6 +438,51 @@ class RuntimeChatService:
timeout_seconds=timeout_seconds,
)
def _request_chat_tool_call(
self,
config: dict[str, str],
messages: list[dict[str, Any]],
*,
tools: list[dict[str, Any]],
tool_choice: dict[str, Any] | str | None,
max_tokens: int,
temperature: float,
timeout_seconds: int,
) -> RuntimeChatToolCall | None:
provider = config["provider"]
endpoint = config["endpoint"]
model = config["model"]
api_key = config["apiKey"]
if provider == "Azure OpenAI":
return self._request_azure_openai_tool_call(
endpoint=endpoint,
model=model,
api_key=api_key,
messages=messages,
tools=tools,
tool_choice=tool_choice,
max_tokens=max_tokens,
temperature=temperature,
timeout_seconds=timeout_seconds,
)
if provider == "Ollama":
raise ConnectivityCheckError("Ollama 暂不支持小财管家 function calling。")
return self._request_openai_compatible_tool_call(
provider=provider,
endpoint=endpoint,
model=model,
api_key=api_key,
messages=messages,
tools=tools,
tool_choice=tool_choice,
max_tokens=max_tokens,
temperature=temperature,
timeout_seconds=timeout_seconds,
)
def _request_openai_compatible(
self,
*,
@@ -331,6 +519,46 @@ class RuntimeChatService:
)
return self._extract_openai_text(payload)
def _request_openai_compatible_tool_call(
self,
*,
provider: str,
endpoint: str,
model: str,
api_key: str,
messages: list[dict[str, Any]],
tools: list[dict[str, Any]],
tool_choice: dict[str, Any] | str | None,
max_tokens: int,
temperature: float,
timeout_seconds: int,
) -> RuntimeChatToolCall | None:
url = _ensure_path(_normalize_endpoint(endpoint), "chat/completions")
request_payload: dict[str, Any] = {
"model": model,
"messages": messages,
"tools": tools,
"tool_choice": tool_choice or "auto",
"max_tokens": max_tokens,
"temperature": temperature,
}
if provider == "GLM":
request_payload["thinking"] = {"type": "disabled"}
status_code, payload = _send_json_request(
"POST",
url,
headers=_build_headers(api_key=api_key, use_bearer=True),
payload=request_payload,
timeout_seconds=timeout_seconds,
)
if status_code >= HTTPStatus.BAD_REQUEST:
raise ConnectivityCheckError(
f"模型接口返回异常状态 {status_code}",
status_code=status_code,
)
return self._extract_openai_tool_call(payload)
def _request_ollama(
self,
*,
@@ -396,6 +624,41 @@ class RuntimeChatService:
)
return self._extract_openai_text(payload)
def _request_azure_openai_tool_call(
self,
*,
endpoint: str,
model: str,
api_key: str,
messages: list[dict[str, Any]],
tools: list[dict[str, Any]],
tool_choice: dict[str, Any] | str | None,
max_tokens: int,
temperature: float,
timeout_seconds: int,
) -> RuntimeChatToolCall | None:
deployment_base = _build_azure_deployment_base(endpoint, model)
url = f"{deployment_base}/chat/completions?api-version={AZURE_API_VERSION}"
status_code, payload = _send_json_request(
"POST",
url,
headers=_build_headers(api_key=api_key, use_bearer=False, use_api_key=True),
payload={
"messages": messages,
"tools": tools,
"tool_choice": tool_choice or "auto",
"max_tokens": max_tokens,
"temperature": temperature,
},
timeout_seconds=timeout_seconds,
)
if status_code >= HTTPStatus.BAD_REQUEST:
raise ConnectivityCheckError(
f"Azure OpenAI 返回异常状态 {status_code}",
status_code=status_code,
)
return self._extract_openai_tool_call(payload)
@staticmethod
def _extract_openai_text(payload: Any) -> str:
if not isinstance(payload, dict):
@@ -426,3 +689,74 @@ class RuntimeChatService:
return text.strip()
return ""
@staticmethod
def _extract_openai_tool_call(payload: Any) -> RuntimeChatToolCall | None:
if not isinstance(payload, dict):
return None
choices = payload.get("choices")
if not isinstance(choices, list) or not choices:
return None
first_choice = choices[0]
if not isinstance(first_choice, dict):
return None
message = first_choice.get("message")
if not isinstance(message, dict):
return None
tool_calls = message.get("tool_calls")
if isinstance(tool_calls, list) and tool_calls:
first_tool = tool_calls[0]
if isinstance(first_tool, dict):
function_payload = first_tool.get("function")
if isinstance(function_payload, dict):
return RuntimeChatService._build_runtime_tool_call(
name=function_payload.get("name"),
arguments=function_payload.get("arguments"),
call_id=first_tool.get("id"),
)
function_call = message.get("function_call")
if isinstance(function_call, dict):
return RuntimeChatService._build_runtime_tool_call(
name=function_call.get("name"),
arguments=function_call.get("arguments"),
call_id=None,
)
return None
@staticmethod
def _build_runtime_tool_call(
*,
name: Any,
arguments: Any,
call_id: Any,
) -> RuntimeChatToolCall | None:
tool_name = str(name or "").strip()
if not tool_name:
return None
raw_arguments = ""
if isinstance(arguments, dict):
parsed_arguments = arguments
raw_arguments = json.dumps(arguments, ensure_ascii=False)
else:
raw_arguments = str(arguments or "").strip()
if not raw_arguments:
parsed_arguments = {}
else:
parsed = json.loads(raw_arguments)
if not isinstance(parsed, dict):
raise ValueError("工具调用参数必须是 JSON object。")
parsed_arguments = parsed
return RuntimeChatToolCall(
name=tool_name,
arguments=parsed_arguments,
call_id=str(call_id).strip() if call_id else None,
raw_arguments=raw_arguments,
)