refactor(server): steward 决策链路改用 LangGraph 编排

- 新增 StewardGraphPlannerService,用 LangGraph 状态图编排意图识别→流程判断→模型/规则分支→兜底,替代原 planner 内线性调用
- 新增 StewardGraphRuntimeService 编排运行时决策与槽位决策;StewardActionContracts/Executor 统一动作合约与执行
- steward_intent_agent/application_fact_resolver/runtime_chat 适配图执行器,config 暴露图相关开关
- pyproject/uv.lock 新增 langgraph 依赖
- 新增 graph_planner/graph_runtime/action_executor 测试,更新 intent_agent/planner/fact_resolver/runtime_chat/reimbursement 测试
This commit is contained in:
caoxiaozhu
2026-06-24 21:58:35 +08:00
parent 545b31d32f
commit 5311c99d69
25 changed files with 3580 additions and 104 deletions

View File

@@ -242,6 +242,50 @@ def test_runtime_chat_supports_single_pass_fast_failover(monkeypatch) -> None:
assert calls == [("main", 8), ("backup", 20)]
def test_runtime_chat_complete_with_tool_call_fails_over_to_backup_before_retrying_main(monkeypatch) -> None:
_clear_runtime_chat_cooldown()
session_factory = build_session_factory()
with session_factory() as db:
service = RuntimeChatService(db)
calls: list[str] = []
def fake_load_chat_slot(slot: str):
return {
"slot": slot,
"provider": "MiniMax" if slot == "main" else "GLM",
"endpoint": "https://example.com/v1",
"model": "main-model" if slot == "main" else "backup-model",
"apiKey": "secret",
}
def fake_request_chat_tool_call(config, messages, *, tools, tool_choice, max_tokens, temperature, timeout_seconds):
del messages, tools, tool_choice, max_tokens, temperature, timeout_seconds
calls.append(config["slot"])
if config["slot"] == "main":
raise RuntimeError("main tool call unavailable")
return runtime_chat_module.RuntimeChatToolCall(
name="submit_steward_intent_plan",
arguments={"tasks": [{"task_type": "expense_application"}]},
)
monkeypatch.setattr(service, "_load_chat_slot", fake_load_chat_slot)
monkeypatch.setattr(service, "_request_chat_tool_call", fake_request_chat_tool_call)
result = service.complete_with_tool_call(
[{"role": "user", "content": "保存草稿"}],
tools=[{"type": "function", "function": {"name": "submit_steward_intent_plan"}}],
tool_choice={"type": "function", "function": {"name": "submit_steward_intent_plan"}},
max_attempts=3,
use_failure_cooldown=False,
)
assert result.tool_call is not None
assert result.tool_call.name == "submit_steward_intent_plan"
assert result.tool_call.arguments["tasks"][0]["task_type"] == "expense_application"
assert calls == ["main", "backup"]
assert [item.status for item in result.calls] == ["failed", "succeeded"]
def test_runtime_chat_skips_slot_during_cooldown(monkeypatch) -> None:
_clear_runtime_chat_cooldown()
session_factory = build_session_factory()
@@ -271,3 +315,51 @@ def test_runtime_chat_skips_slot_during_cooldown(monkeypatch) -> None:
assert service.complete([{"role": "user", "content": "hello"}], max_attempts=1) == "backup answer"
assert service.complete([{"role": "user", "content": "hello again"}], max_attempts=1) == "backup answer"
assert calls == ["main", "backup", "backup"]
def test_runtime_chat_tool_call_can_retry_without_failure_cooldown(monkeypatch) -> None:
_clear_runtime_chat_cooldown()
session_factory = build_session_factory()
with session_factory() as db:
service = RuntimeChatService(db)
calls: list[str] = []
def fake_load_chat_slot(slot: str):
return {
"slot": slot,
"provider": slot,
"endpoint": "https://example.com/v1",
"model": f"{slot}-model",
"apiKey": "secret",
}
def fake_request_chat_tool_call(
config,
messages,
*,
tools,
tool_choice,
max_tokens,
temperature,
timeout_seconds,
):
del messages, tools, tool_choice, max_tokens, temperature, timeout_seconds
calls.append(config["slot"])
raise RuntimeError("tool call timeout")
monkeypatch.setattr(service, "_load_chat_slot", fake_load_chat_slot)
monkeypatch.setattr(service, "_request_chat_tool_call", fake_request_chat_tool_call)
monkeypatch.setattr("app.services.runtime_chat.sleep", lambda *_args, **_kwargs: None)
result = service.complete_with_tool_call(
[{"role": "user", "content": "hello"}],
tools=[{"type": "function", "function": {"name": "submit_steward_intent_plan"}}],
tool_choice={"type": "function", "function": {"name": "submit_steward_intent_plan"}},
slot_priority=("main",),
max_attempts=3,
use_failure_cooldown=False,
)
assert result.tool_call is None
assert calls == ["main", "main", "main"]
assert [item.status for item in result.calls] == ["failed", "failed", "failed"]