server/tests/test_runtime_chat_service.py

from __future__ import annotations

from sqlalchemy import create_engine
from sqlalchemy.orm import Session, sessionmaker
from sqlalchemy.pool import StaticPool

from app.db.base import Base
from app.services import runtime_chat as runtime_chat_module
from app.services.runtime_chat import RuntimeChatService


def build_session_factory() -> sessionmaker[Session]:
    engine = create_engine(
        "sqlite+pysqlite:///:memory:",
        connect_args={"check_same_thread": False},
        poolclass=StaticPool,
    )
    Base.metadata.create_all(bind=engine)
    return sessionmaker(bind=engine, autoflush=False, autocommit=False)


def _clear_runtime_chat_cooldown() -> None:
    runtime_chat_module._slot_failure_until.clear()


def test_runtime_chat_fails_over_to_backup_before_retrying_main(monkeypatch) -> None:
    _clear_runtime_chat_cooldown()
    session_factory = build_session_factory()
    with session_factory() as db:
        service = RuntimeChatService(db)
        calls: list[str] = []

        def fake_load_chat_slot(slot: str):
            return {
                "slot": slot,
                "provider": "MiniMax" if slot == "main" else "GLM",
                "endpoint": "https://example.com/v1",
                "model": "main-model" if slot == "main" else "backup-model",
                "apiKey": "secret",
            }

        def fake_request_chat_completion(config, messages, *, max_tokens, temperature, timeout_seconds):
            del messages, max_tokens, temperature, timeout_seconds
            calls.append(config["slot"])
            if config["slot"] == "main":
                raise RuntimeError("main unavailable")
            return "backup answer"

        monkeypatch.setattr(service, "_load_chat_slot", fake_load_chat_slot)
        monkeypatch.setattr(service, "_request_chat_completion", fake_request_chat_completion)

        answer = service.complete([{"role": "user", "content": "hello"}])

        assert answer == "backup answer"
        assert calls == ["main", "backup"]


def test_runtime_chat_complete_with_trace_records_slot_failover(monkeypatch) -> None:
    _clear_runtime_chat_cooldown()
    session_factory = build_session_factory()
    with session_factory() as db:
        service = RuntimeChatService(db)

        def fake_load_chat_slot(slot: str):
            return {
                "slot": slot,
                "provider": "MiniMax" if slot == "main" else "GLM",
                "endpoint": "https://example.com/v1",
                "model": "main-model" if slot == "main" else "backup-model",
                "apiKey": "secret",
            }

        def fake_request_chat_completion(config, messages, *, max_tokens, temperature, timeout_seconds):
            del messages, max_tokens, temperature, timeout_seconds
            if config["slot"] == "main":
                raise RuntimeError("incorrect api key")
            return "backup answer"

        monkeypatch.setattr(service, "_load_chat_slot", fake_load_chat_slot)
        monkeypatch.setattr(service, "_request_chat_completion", fake_request_chat_completion)

        result = service.complete_with_trace([{"role": "user", "content": "hello"}])

        assert result.text == "backup answer"
        assert [item.status for item in result.calls] == ["failed", "succeeded"]
        assert result.calls[0].provider == "MiniMax"
        assert result.calls[0].error_message == "incorrect api key"
        assert result.calls_as_dicts()[1]["model"] == "backup-model"


def test_runtime_chat_does_not_rehit_failed_slots_during_cooldown(monkeypatch) -> None:
    _clear_runtime_chat_cooldown()
    session_factory = build_session_factory()
    with session_factory() as db:
        service = RuntimeChatService(db)
        calls: list[str] = []

        def fake_load_chat_slot(slot: str):
            return {
                "slot": slot,
                "provider": slot,
                "endpoint": "https://example.com/v1",
                "model": f"{slot}-model",
                "apiKey": "secret",
            }

        def fake_request_chat_completion(config, messages, *, max_tokens, temperature, timeout_seconds):
            del messages, max_tokens, temperature, timeout_seconds
            calls.append(config["slot"])
            raise RuntimeError("unavailable")

        monkeypatch.setattr(service, "_load_chat_slot", fake_load_chat_slot)
        monkeypatch.setattr(service, "_request_chat_completion", fake_request_chat_completion)
        monkeypatch.setattr("app.services.runtime_chat.sleep", lambda *_args, **_kwargs: None)

        assert service.complete([{"role": "user", "content": "hello"}]) is None
        assert calls == ["main", "backup"]


def test_runtime_chat_disables_glm_thinking_for_direct_user_answers(monkeypatch) -> None:
    _clear_runtime_chat_cooldown()
    session_factory = build_session_factory()
    with session_factory() as db:
        service = RuntimeChatService(db)
        captured: dict[str, object] = {}

        def fake_send_json_request(method, url, *, headers, payload, timeout_seconds):
            captured["method"] = method
            captured["url"] = url
            captured["headers"] = headers
            captured["payload"] = payload
            captured["timeout_seconds"] = timeout_seconds
            return 200, {"choices": [{"message": {"content": "ok"}}]}

        monkeypatch.setattr("app.services.runtime_chat._send_json_request", fake_send_json_request)

        answer = service._request_openai_compatible(
            provider="GLM",
            endpoint="https://open.bigmodel.cn/api/paas/v4/",
            model="glm-5.1",
            api_key="secret",
            messages=[{"role": "user", "content": "hello"}],
            max_tokens=32,
            temperature=0.2,
            timeout_seconds=17,
        )

        assert answer == "ok"
        assert captured["payload"]["thinking"] == {"type": "disabled"}
        assert captured["timeout_seconds"] == 17


def test_runtime_chat_openai_compatible_tool_call_payload(monkeypatch) -> None:
    _clear_runtime_chat_cooldown()
    session_factory = build_session_factory()
    with session_factory() as db:
        service = RuntimeChatService(db)
        captured: dict[str, object] = {}

        def fake_send_json_request(method, url, *, headers, payload, timeout_seconds):
            captured["method"] = method
            captured["url"] = url
            captured["headers"] = headers
            captured["payload"] = payload
            captured["timeout_seconds"] = timeout_seconds
            return 200, {
                "choices": [
                    {
                        "message": {
                            "tool_calls": [
                                {
                                    "id": "call_001",
                                    "type": "function",
                                    "function": {
                                        "name": "submit_steward_intent_plan",
                                        "arguments": "{\"tasks\": []}",
                                    },
                                }
                            ]
                        }
                    }
                ]
            }

        monkeypatch.setattr("app.services.runtime_chat._send_json_request", fake_send_json_request)

        tool_call = service._request_openai_compatible_tool_call(
            provider="OpenAI Compatible",
            endpoint="https://api.example.com/v1",
            model="gpt-test",
            api_key="secret",
            messages=[{"role": "user", "content": "hello"}],
            tools=[{"type": "function", "function": {"name": "submit_steward_intent_plan"}}],
            tool_choice={"type": "function", "function": {"name": "submit_steward_intent_plan"}},
            max_tokens=128,
            temperature=0.1,
            timeout_seconds=19,
        )

        assert tool_call is not None
        assert tool_call.name == "submit_steward_intent_plan"
        assert tool_call.arguments == {"tasks": []}
        assert captured["url"] == "https://api.example.com/v1/chat/completions"
        assert captured["payload"]["tools"][0]["function"]["name"] == "submit_steward_intent_plan"
        assert captured["payload"]["tool_choice"]["function"]["name"] == "submit_steward_intent_plan"
        assert captured["headers"]["Authorization"] == "Bearer secret"


def test_runtime_chat_supports_single_pass_fast_failover(monkeypatch) -> None:
    _clear_runtime_chat_cooldown()
    session_factory = build_session_factory()
    with session_factory() as db:
        service = RuntimeChatService(db)
        calls: list[tuple[str, int]] = []

        def fake_load_chat_slot(slot: str):
            return {
                "slot": slot,
                "provider": slot,
                "endpoint": "https://example.com/v1",
                "model": f"{slot}-model",
                "apiKey": "secret",
            }

        def fake_request_chat_completion(config, messages, *, max_tokens, temperature, timeout_seconds):
            del messages, max_tokens, temperature
            calls.append((config["slot"], timeout_seconds))
            raise RuntimeError("unavailable")

        monkeypatch.setattr(service, "_load_chat_slot", fake_load_chat_slot)
        monkeypatch.setattr(service, "_request_chat_completion", fake_request_chat_completion)

        assert (
            service.complete(
                [{"role": "user", "content": "hello"}],
                timeout_seconds=15,
                slot_timeouts={"main": 8, "backup": 20},
                max_attempts=1,
            )
            is None
        )
        assert calls == [("main", 8), ("backup", 20)]


def test_runtime_chat_skips_slot_during_cooldown(monkeypatch) -> None:
    _clear_runtime_chat_cooldown()
    session_factory = build_session_factory()
    with session_factory() as db:
        service = RuntimeChatService(db)
        calls: list[str] = []

        def fake_load_chat_slot(slot: str):
            return {
                "slot": slot,
                "provider": slot,
                "endpoint": "https://example.com/v1",
                "model": f"{slot}-model",
                "apiKey": "secret",
            }

        def fake_request_chat_completion(config, messages, *, max_tokens, temperature, timeout_seconds):
            del messages, max_tokens, temperature, timeout_seconds
            calls.append(config["slot"])
            if config["slot"] == "main":
                raise RuntimeError("main unavailable")
            return "backup answer"

        monkeypatch.setattr(service, "_load_chat_slot", fake_load_chat_slot)
        monkeypatch.setattr(service, "_request_chat_completion", fake_request_chat_completion)

        assert service.complete([{"role": "user", "content": "hello"}], max_attempts=1) == "backup answer"
        assert service.complete([{"role": "user", "content": "hello again"}], max_attempts=1) == "backup answer"
        assert calls == ["main", "backup", "backup"]
feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能主要变更: - 移除Hermes智能体及相关回调服务 - 新增知识库RAG、同步、调度、规范化和索引任务服务 - 重构orchestrator服务，增强运行时聊天功能 - 更新前端聊天、政策制度、设置等页面样式和逻辑 - 更新expense_claims和document_intelligence服务 - 删除llm_wiki相关服务和测试文件 - 更新docker-compose配置和启动脚本 2026-05-17 08:38:41 +00:00			`from __future__ import annotations`

			`from sqlalchemy import create_engine`
			`from sqlalchemy.orm import Session, sessionmaker`
			`from sqlalchemy.pool import StaticPool`

			`from app.db.base import Base`
			`from app.services import runtime_chat as runtime_chat_module`
			`from app.services.runtime_chat import RuntimeChatService`


			`def build_session_factory() -> sessionmaker[Session]:`
			`engine = create_engine(`
			`"sqlite+pysqlite:///:memory:",`
			`connect_args={"check_same_thread": False},`
			`poolclass=StaticPool,`
			`)`
			`Base.metadata.create_all(bind=engine)`
			`return sessionmaker(bind=engine, autoflush=False, autocommit=False)`


			`def _clear_runtime_chat_cooldown() -> None:`
			`runtime_chat_module._slot_failure_until.clear()`


			`def test_runtime_chat_fails_over_to_backup_before_retrying_main(monkeypatch) -> None:`
			`_clear_runtime_chat_cooldown()`
			`session_factory = build_session_factory()`
			`with session_factory() as db:`
			`service = RuntimeChatService(db)`
			`calls: list[str] = []`

			`def fake_load_chat_slot(slot: str):`
			`return {`
			`"slot": slot,`
			`"provider": "MiniMax" if slot == "main" else "GLM",`
			`"endpoint": "https://example.com/v1",`
			`"model": "main-model" if slot == "main" else "backup-model",`
			`"apiKey": "secret",`
			`}`

			`def fake_request_chat_completion(config, messages, *, max_tokens, temperature, timeout_seconds):`
			`del messages, max_tokens, temperature, timeout_seconds`
			`calls.append(config["slot"])`
			`if config["slot"] == "main":`
			`raise RuntimeError("main unavailable")`
			`return "backup answer"`

			`monkeypatch.setattr(service, "_load_chat_slot", fake_load_chat_slot)`
			`monkeypatch.setattr(service, "_request_chat_completion", fake_request_chat_completion)`

			`answer = service.complete([{"role": "user", "content": "hello"}])`

			`assert answer == "backup answer"`
			`assert calls == ["main", "backup"]`


feat: 新增风险图谱算法与系统仪表盘及操作反馈体系后端新增风险图谱算法模块、风险观察与反馈服务、规则 DSL 校验器和可解释性引擎，完善系统仪表盘和财务仪表盘统计，优化 agent 运行和编排执行链路，清理旧开发文档，前端新增系统趋势、负载热力图等多种仪表盘图表组件，完善操作反馈对话框和工作台日期选择器，优化报销创建和审批详情交互，补充单元测试覆盖。 2026-05-30 15:46:51 +08:00			`def test_runtime_chat_complete_with_trace_records_slot_failover(monkeypatch) -> None:`
			`_clear_runtime_chat_cooldown()`
			`session_factory = build_session_factory()`
			`with session_factory() as db:`
			`service = RuntimeChatService(db)`

			`def fake_load_chat_slot(slot: str):`
			`return {`
			`"slot": slot,`
			`"provider": "MiniMax" if slot == "main" else "GLM",`
			`"endpoint": "https://example.com/v1",`
			`"model": "main-model" if slot == "main" else "backup-model",`
			`"apiKey": "secret",`
			`}`

			`def fake_request_chat_completion(config, messages, *, max_tokens, temperature, timeout_seconds):`
			`del messages, max_tokens, temperature, timeout_seconds`
			`if config["slot"] == "main":`
			`raise RuntimeError("incorrect api key")`
			`return "backup answer"`

			`monkeypatch.setattr(service, "_load_chat_slot", fake_load_chat_slot)`
			`monkeypatch.setattr(service, "_request_chat_completion", fake_request_chat_completion)`

			`result = service.complete_with_trace([{"role": "user", "content": "hello"}])`

			`assert result.text == "backup answer"`
			`assert [item.status for item in result.calls] == ["failed", "succeeded"]`
			`assert result.calls[0].provider == "MiniMax"`
			`assert result.calls[0].error_message == "incorrect api key"`
			`assert result.calls_as_dicts()[1]["model"] == "backup-model"`


feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能主要变更: - 移除Hermes智能体及相关回调服务 - 新增知识库RAG、同步、调度、规范化和索引任务服务 - 重构orchestrator服务，增强运行时聊天功能 - 更新前端聊天、政策制度、设置等页面样式和逻辑 - 更新expense_claims和document_intelligence服务 - 删除llm_wiki相关服务和测试文件 - 更新docker-compose配置和启动脚本 2026-05-17 08:38:41 +00:00			`def test_runtime_chat_does_not_rehit_failed_slots_during_cooldown(monkeypatch) -> None:`
			`_clear_runtime_chat_cooldown()`
			`session_factory = build_session_factory()`
			`with session_factory() as db:`
			`service = RuntimeChatService(db)`
			`calls: list[str] = []`

			`def fake_load_chat_slot(slot: str):`
			`return {`
			`"slot": slot,`
			`"provider": slot,`
			`"endpoint": "https://example.com/v1",`
			`"model": f"{slot}-model",`
			`"apiKey": "secret",`
			`}`

			`def fake_request_chat_completion(config, messages, *, max_tokens, temperature, timeout_seconds):`
			`del messages, max_tokens, temperature, timeout_seconds`
			`calls.append(config["slot"])`
			`raise RuntimeError("unavailable")`

			`monkeypatch.setattr(service, "_load_chat_slot", fake_load_chat_slot)`
			`monkeypatch.setattr(service, "_request_chat_completion", fake_request_chat_completion)`
			`monkeypatch.setattr("app.services.runtime_chat.sleep", lambda _args, *_kwargs: None)`

			`assert service.complete([{"role": "user", "content": "hello"}]) is None`
			`assert calls == ["main", "backup"]`


			`def test_runtime_chat_disables_glm_thinking_for_direct_user_answers(monkeypatch) -> None:`
			`_clear_runtime_chat_cooldown()`
			`session_factory = build_session_factory()`
			`with session_factory() as db:`
			`service = RuntimeChatService(db)`
			`captured: dict[str, object] = {}`

			`def fake_send_json_request(method, url, *, headers, payload, timeout_seconds):`
			`captured["method"] = method`
			`captured["url"] = url`
			`captured["headers"] = headers`
			`captured["payload"] = payload`
			`captured["timeout_seconds"] = timeout_seconds`
			`return 200, {"choices": [{"message": {"content": "ok"}}]}`

			`monkeypatch.setattr("app.services.runtime_chat._send_json_request", fake_send_json_request)`

			`answer = service._request_openai_compatible(`
			`provider="GLM",`
			`endpoint="https://open.bigmodel.cn/api/paas/v4/",`
			`model="glm-5.1",`
			`api_key="secret",`
			`messages=[{"role": "user", "content": "hello"}],`
			`max_tokens=32,`
			`temperature=0.2,`
			`timeout_seconds=17,`
			`)`

			`assert answer == "ok"`
			`assert captured["payload"]["thinking"] == {"type": "disabled"}`
			`assert captured["timeout_seconds"] == 17`


feat: 报销预审会话状态管理与工作台交互增强 - 新增差旅报销会话状态管理与对话模型重构 - 增强风险观测服务与运行时聊天上下文作用域 - 优化工作台图标资源、助理意图识别与摘要工具 - 完善报销创建视图样式与差旅详情页标准调整交互 - 补充风险观测、运行时聊天与报销端点测试覆盖 2026-06-04 11:03:29 +08:00			`def test_runtime_chat_openai_compatible_tool_call_payload(monkeypatch) -> None:`
			`_clear_runtime_chat_cooldown()`
			`session_factory = build_session_factory()`
			`with session_factory() as db:`
			`service = RuntimeChatService(db)`
			`captured: dict[str, object] = {}`

			`def fake_send_json_request(method, url, *, headers, payload, timeout_seconds):`
			`captured["method"] = method`
			`captured["url"] = url`
			`captured["headers"] = headers`
			`captured["payload"] = payload`
			`captured["timeout_seconds"] = timeout_seconds`
			`return 200, {`
			`"choices": [`
			`{`
			`"message": {`
			`"tool_calls": [`
			`{`
			`"id": "call_001",`
			`"type": "function",`
			`"function": {`
			`"name": "submit_steward_intent_plan",`
			`"arguments": "{\"tasks\": []}",`
			`},`
			`}`
			`]`
			`}`
			`}`
			`]`
			`}`

			`monkeypatch.setattr("app.services.runtime_chat._send_json_request", fake_send_json_request)`

			`tool_call = service._request_openai_compatible_tool_call(`
			`provider="OpenAI Compatible",`
			`endpoint="https://api.example.com/v1",`
			`model="gpt-test",`
			`api_key="secret",`
			`messages=[{"role": "user", "content": "hello"}],`
			`tools=[{"type": "function", "function": {"name": "submit_steward_intent_plan"}}],`
			`tool_choice={"type": "function", "function": {"name": "submit_steward_intent_plan"}},`
			`max_tokens=128,`
			`temperature=0.1,`
			`timeout_seconds=19,`
			`)`

			`assert tool_call is not None`
			`assert tool_call.name == "submit_steward_intent_plan"`
			`assert tool_call.arguments == {"tasks": []}`
			`assert captured["url"] == "https://api.example.com/v1/chat/completions"`
			`assert captured["payload"]["tools"][0]["function"]["name"] == "submit_steward_intent_plan"`
			`assert captured["payload"]["tool_choice"]["function"]["name"] == "submit_steward_intent_plan"`
			`assert captured["headers"]["Authorization"] == "Bearer secret"`


feat: 重构知识库系统，移除Hermes集成，增强RAG和同步功能主要变更: - 移除Hermes智能体及相关回调服务 - 新增知识库RAG、同步、调度、规范化和索引任务服务 - 重构orchestrator服务，增强运行时聊天功能 - 更新前端聊天、政策制度、设置等页面样式和逻辑 - 更新expense_claims和document_intelligence服务 - 删除llm_wiki相关服务和测试文件 - 更新docker-compose配置和启动脚本 2026-05-17 08:38:41 +00:00			`def test_runtime_chat_supports_single_pass_fast_failover(monkeypatch) -> None:`
			`_clear_runtime_chat_cooldown()`
			`session_factory = build_session_factory()`
			`with session_factory() as db:`
			`service = RuntimeChatService(db)`
			`calls: list[tuple[str, int]] = []`

			`def fake_load_chat_slot(slot: str):`
			`return {`
			`"slot": slot,`
			`"provider": slot,`
			`"endpoint": "https://example.com/v1",`
			`"model": f"{slot}-model",`
			`"apiKey": "secret",`
			`}`

			`def fake_request_chat_completion(config, messages, *, max_tokens, temperature, timeout_seconds):`
			`del messages, max_tokens, temperature`
			`calls.append((config["slot"], timeout_seconds))`
			`raise RuntimeError("unavailable")`

			`monkeypatch.setattr(service, "_load_chat_slot", fake_load_chat_slot)`
			`monkeypatch.setattr(service, "_request_chat_completion", fake_request_chat_completion)`

			`assert (`
			`service.complete(`
			`[{"role": "user", "content": "hello"}],`
			`timeout_seconds=15,`
			`slot_timeouts={"main": 8, "backup": 20},`
			`max_attempts=1,`
			`)`
			`is None`
			`)`
			`assert calls == [("main", 8), ("backup", 20)]`


			`def test_runtime_chat_skips_slot_during_cooldown(monkeypatch) -> None:`
			`_clear_runtime_chat_cooldown()`
			`session_factory = build_session_factory()`
			`with session_factory() as db:`
			`service = RuntimeChatService(db)`
			`calls: list[str] = []`

			`def fake_load_chat_slot(slot: str):`
			`return {`
			`"slot": slot,`
			`"provider": slot,`
			`"endpoint": "https://example.com/v1",`
			`"model": f"{slot}-model",`
			`"apiKey": "secret",`
			`}`

			`def fake_request_chat_completion(config, messages, *, max_tokens, temperature, timeout_seconds):`
			`del messages, max_tokens, temperature, timeout_seconds`
			`calls.append(config["slot"])`
			`if config["slot"] == "main":`
			`raise RuntimeError("main unavailable")`
			`return "backup answer"`

			`monkeypatch.setattr(service, "_load_chat_slot", fake_load_chat_slot)`
			`monkeypatch.setattr(service, "_request_chat_completion", fake_request_chat_completion)`

			`assert service.complete([{"role": "user", "content": "hello"}], max_attempts=1) == "backup answer"`
			`assert service.complete([{"role": "user", "content": "hello again"}], max_attempts=1) == "backup answer"`
			`assert calls == ["main", "backup", "backup"]`