Files
X-Financial/server/tests/test_knowledge_normalizer.py

124 lines
4.8 KiB
Python
Raw Permalink Normal View History

from __future__ import annotations
from sqlalchemy import create_engine
from sqlalchemy.orm import Session, sessionmaker
from sqlalchemy.pool import StaticPool
from app.db.base import Base
from app.services.knowledge_normalizer import KnowledgeNormalizationService
def build_session_factory() -> sessionmaker[Session]:
engine = create_engine(
"sqlite+pysqlite:///:memory:",
connect_args={"check_same_thread": False},
poolclass=StaticPool,
)
Base.metadata.create_all(bind=engine)
return sessionmaker(bind=engine, autoflush=False, autocommit=False)
def test_knowledge_normalizer_appends_structured_table(monkeypatch) -> None:
session_factory = build_session_factory()
raw_text = (
"表3 出差补贴标准\n\n"
"单位:人民币元/天\n"
"补助类型 项目 港澳台 直辖市/特区/西藏 其他地区 国外\n"
"餐补 自行解决餐食 75 65 55 140\n"
"基本出差补贴 35 35 35 35\n"
"合计 110 100 90 175\n"
)
with session_factory() as db:
service = KnowledgeNormalizationService(db)
monkeypatch.setattr(
service.runtime_chat_service,
"complete",
lambda *args, **kwargs: (
"| 补助类型 | 港澳台 | 直辖市/特区/西藏 | 其他地区 | 国外 |\n"
"|---|---:|---:|---:|---:|\n"
"| 餐补 | 75 | 65 | 55 | 140 |\n"
"| 基本出差补贴 | 35 | 35 | 35 | 35 |\n"
"| 合计 | 110 | 100 | 90 | 175 |"
),
)
enriched = service.build_enriched_text(raw_text)
assert enriched.startswith(raw_text.strip())
assert "| 餐补 | 75 | 65 | 55 | 140 |" in enriched
assert enriched.endswith("| 合计 | 110 | 100 | 90 | 175 |")
def test_knowledge_normalizer_keeps_only_markdown_table_body() -> None:
cleaned = KnowledgeNormalizationService._sanitize_answer(
"## 表3 出差补贴标准\n\n"
"| 补助类型 | 港澳台 | 直辖市/特区/西藏 |\n"
"|---|---:|---:|\n"
"| 餐补 | 75 | 65 |\n\n"
"注:主办方统一安排餐食时,不再报销餐补。"
)
assert cleaned == (
"| 补助类型 | 港澳台 | 直辖市/特区/西藏 |\n"
"|---|---:|---:|\n"
"| 餐补 | 75 | 65 |"
)
def test_knowledge_normalizer_builds_section_navigation_without_table() -> None:
session_factory = build_session_factory()
raw_text = (
"第一章 总则\n"
"本制度适用于员工差旅报销和审批管理。\n\n"
"第二章 住宿费标准\n"
"住宿费按照出差城市档位和职级标准执行。\n\n"
"第三章 交通费标准\n"
"交通费应结合出差工具、舱位和审批要求报销。\n"
)
with session_factory() as db:
service = KnowledgeNormalizationService(db)
enriched = service.build_enriched_text(raw_text)
assert enriched.startswith(raw_text.strip())
assert "- 第一章 总则" in enriched
assert "## 第二章 住宿费标准" in enriched
assert "# 问答线索补充" in enriched
assert "- 第二章 住宿费标准:住宿费按照出差城市档位和职级标准执行" in enriched
assert "# 章节导航" in enriched
def test_knowledge_normalizer_builds_answer_clues_from_lists_and_kv_lines() -> None:
session_factory = build_session_factory()
raw_text = (
"第一章 报销要求\n"
"报销时限:费用发生后 30 日内提交申请。\n"
"- 超过 30 日需补充审批说明。\n"
"第十条 发票遗失的,应先提交遗失说明。\n"
)
with session_factory() as db:
service = KnowledgeNormalizationService(db)
enriched = service.build_enriched_text(raw_text)
assert "# 问答线索补充" in enriched
assert "- 第一章 报销要求:报销时限:费用发生后 30 日内提交申请" in enriched
assert "- 第一章 报销要求:超过 30 日需补充审批说明" in enriched
assert "- 第一章 报销要求:第十条 发票遗失的,应先提交遗失说明" in enriched
def test_knowledge_normalizer_builds_answer_clues_without_section_headings() -> None:
session_factory = build_session_factory()
raw_text = (
"报销时限:费用发生后 30 日内提交申请。\n"
"超过 30 日需补充审批说明。\n"
"审批材料包括发票、行程单和付款凭证。\n"
)
with session_factory() as db:
service = KnowledgeNormalizationService(db)
enriched = service.build_enriched_text(raw_text)
assert "# 问答线索补充" in enriched
assert "- 正文:报销时限:费用发生后 30 日内提交申请" in enriched
assert "- 正文:超过 30 日需补充审批说明" in enriched