refactor(server): steward 决策链路改用 LangGraph 编排

- 新增 StewardGraphPlannerService，用 LangGraph 状态图编排意图识别→流程判断→模型/规则分支→兜底，替代原 planner 内线性调用 - 新增 StewardGraphRuntimeService 编排运行时决策与槽位决策；StewardActionContracts/Executor 统一动作合约与执行 - steward_intent_agent/application_fact_resolver/runtime_chat 适配图执行器，config 暴露图相关开关 - pyproject/uv.lock 新增 langgraph 依赖 - 新增 graph_planner/graph_runtime/action_executor 测试，更新 intent_agent/planner/fact_resolver/runtime_chat/reimbursement 测试
2026-06-24 21:58:35 +08:00
parent 545b31d32f
commit 5311c99d69
25 changed files with 3580 additions and 104 deletions
--- a/server/tests/test_runtime_chat_service.py
+++ b/server/tests/test_runtime_chat_service.py
@@ -242,6 +242,50 @@ def test_runtime_chat_supports_single_pass_fast_failover(monkeypatch) -> None:
        assert calls == [("main", 8), ("backup", 20)]


+def test_runtime_chat_complete_with_tool_call_fails_over_to_backup_before_retrying_main(monkeypatch) -> None:
+    _clear_runtime_chat_cooldown()
+    session_factory = build_session_factory()
+    with session_factory() as db:
+        service = RuntimeChatService(db)
+        calls: list[str] = []
+
+        def fake_load_chat_slot(slot: str):
+            return {
+                "slot": slot,
+                "provider": "MiniMax" if slot == "main" else "GLM",
+                "endpoint": "https://example.com/v1",
+                "model": "main-model" if slot == "main" else "backup-model",
+                "apiKey": "secret",
+            }
+
+        def fake_request_chat_tool_call(config, messages, *, tools, tool_choice, max_tokens, temperature, timeout_seconds):
+            del messages, tools, tool_choice, max_tokens, temperature, timeout_seconds
+            calls.append(config["slot"])
+            if config["slot"] == "main":
+                raise RuntimeError("main tool call unavailable")
+            return runtime_chat_module.RuntimeChatToolCall(
+                name="submit_steward_intent_plan",
+                arguments={"tasks": [{"task_type": "expense_application"}]},
+            )
+
+        monkeypatch.setattr(service, "_load_chat_slot", fake_load_chat_slot)
+        monkeypatch.setattr(service, "_request_chat_tool_call", fake_request_chat_tool_call)
+
+        result = service.complete_with_tool_call(
+            [{"role": "user", "content": "保存草稿"}],
+            tools=[{"type": "function", "function": {"name": "submit_steward_intent_plan"}}],
+            tool_choice={"type": "function", "function": {"name": "submit_steward_intent_plan"}},
+            max_attempts=3,
+            use_failure_cooldown=False,
+        )
+
+        assert result.tool_call is not None
+        assert result.tool_call.name == "submit_steward_intent_plan"
+        assert result.tool_call.arguments["tasks"][0]["task_type"] == "expense_application"
+        assert calls == ["main", "backup"]
+        assert [item.status for item in result.calls] == ["failed", "succeeded"]
+
+
 def test_runtime_chat_skips_slot_during_cooldown(monkeypatch) -> None:
    _clear_runtime_chat_cooldown()
    session_factory = build_session_factory()
@@ -271,3 +315,51 @@ def test_runtime_chat_skips_slot_during_cooldown(monkeypatch) -> None:
        assert service.complete([{"role": "user", "content": "hello"}], max_attempts=1) == "backup answer"
        assert service.complete([{"role": "user", "content": "hello again"}], max_attempts=1) == "backup answer"
        assert calls == ["main", "backup", "backup"]
+
+
+def test_runtime_chat_tool_call_can_retry_without_failure_cooldown(monkeypatch) -> None:
+    _clear_runtime_chat_cooldown()
+    session_factory = build_session_factory()
+    with session_factory() as db:
+        service = RuntimeChatService(db)
+        calls: list[str] = []
+
+        def fake_load_chat_slot(slot: str):
+            return {
+                "slot": slot,
+                "provider": slot,
+                "endpoint": "https://example.com/v1",
+                "model": f"{slot}-model",
+                "apiKey": "secret",
+            }
+
+        def fake_request_chat_tool_call(
+            config,
+            messages,
+            *,
+            tools,
+            tool_choice,
+            max_tokens,
+            temperature,
+            timeout_seconds,
+        ):
+            del messages, tools, tool_choice, max_tokens, temperature, timeout_seconds
+            calls.append(config["slot"])
+            raise RuntimeError("tool call timeout")
+
+        monkeypatch.setattr(service, "_load_chat_slot", fake_load_chat_slot)
+        monkeypatch.setattr(service, "_request_chat_tool_call", fake_request_chat_tool_call)
+        monkeypatch.setattr("app.services.runtime_chat.sleep", lambda *_args, **_kwargs: None)
+
+        result = service.complete_with_tool_call(
+            [{"role": "user", "content": "hello"}],
+            tools=[{"type": "function", "function": {"name": "submit_steward_intent_plan"}}],
+            tool_choice={"type": "function", "function": {"name": "submit_steward_intent_plan"}},
+            slot_priority=("main",),
+            max_attempts=3,
+            use_failure_cooldown=False,
+        )
+
+        assert result.tool_call is None
+        assert calls == ["main", "main", "main"]
+        assert [item.status for item in result.calls] == ["failed", "failed", "failed"]