124 lines
4.8 KiB
Python
124 lines
4.8 KiB
Python
|
|
from __future__ import annotations
|
||
|
|
|
||
|
|
from sqlalchemy import create_engine
|
||
|
|
from sqlalchemy.orm import Session, sessionmaker
|
||
|
|
from sqlalchemy.pool import StaticPool
|
||
|
|
|
||
|
|
from app.db.base import Base
|
||
|
|
from app.services.knowledge_normalizer import KnowledgeNormalizationService
|
||
|
|
|
||
|
|
|
||
|
|
def build_session_factory() -> sessionmaker[Session]:
|
||
|
|
engine = create_engine(
|
||
|
|
"sqlite+pysqlite:///:memory:",
|
||
|
|
connect_args={"check_same_thread": False},
|
||
|
|
poolclass=StaticPool,
|
||
|
|
)
|
||
|
|
Base.metadata.create_all(bind=engine)
|
||
|
|
return sessionmaker(bind=engine, autoflush=False, autocommit=False)
|
||
|
|
|
||
|
|
|
||
|
|
def test_knowledge_normalizer_appends_structured_table(monkeypatch) -> None:
|
||
|
|
session_factory = build_session_factory()
|
||
|
|
raw_text = (
|
||
|
|
"表3 出差补贴标准\n\n"
|
||
|
|
"单位:人民币元/天\n"
|
||
|
|
"补助类型 项目 港澳台 直辖市/特区/西藏 其他地区 国外\n"
|
||
|
|
"餐补 自行解决餐食 75 65 55 140\n"
|
||
|
|
"基本出差补贴 35 35 35 35\n"
|
||
|
|
"合计 110 100 90 175\n"
|
||
|
|
)
|
||
|
|
with session_factory() as db:
|
||
|
|
service = KnowledgeNormalizationService(db)
|
||
|
|
monkeypatch.setattr(
|
||
|
|
service.runtime_chat_service,
|
||
|
|
"complete",
|
||
|
|
lambda *args, **kwargs: (
|
||
|
|
"| 补助类型 | 港澳台 | 直辖市/特区/西藏 | 其他地区 | 国外 |\n"
|
||
|
|
"|---|---:|---:|---:|---:|\n"
|
||
|
|
"| 餐补 | 75 | 65 | 55 | 140 |\n"
|
||
|
|
"| 基本出差补贴 | 35 | 35 | 35 | 35 |\n"
|
||
|
|
"| 合计 | 110 | 100 | 90 | 175 |"
|
||
|
|
),
|
||
|
|
)
|
||
|
|
|
||
|
|
enriched = service.build_enriched_text(raw_text)
|
||
|
|
|
||
|
|
assert enriched.startswith("# 结构化表格补充")
|
||
|
|
assert "| 餐补 | 75 | 65 | 55 | 140 |" in enriched
|
||
|
|
assert enriched.endswith(raw_text.strip())
|
||
|
|
|
||
|
|
|
||
|
|
def test_knowledge_normalizer_keeps_only_markdown_table_body() -> None:
|
||
|
|
cleaned = KnowledgeNormalizationService._sanitize_answer(
|
||
|
|
"## 表3 出差补贴标准\n\n"
|
||
|
|
"| 补助类型 | 港澳台 | 直辖市/特区/西藏 |\n"
|
||
|
|
"|---|---:|---:|\n"
|
||
|
|
"| 餐补 | 75 | 65 |\n\n"
|
||
|
|
"注:主办方统一安排餐食时,不再报销餐补。"
|
||
|
|
)
|
||
|
|
|
||
|
|
assert cleaned == (
|
||
|
|
"| 补助类型 | 港澳台 | 直辖市/特区/西藏 |\n"
|
||
|
|
"|---|---:|---:|\n"
|
||
|
|
"| 餐补 | 75 | 65 |"
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
def test_knowledge_normalizer_builds_section_navigation_without_table() -> None:
|
||
|
|
session_factory = build_session_factory()
|
||
|
|
raw_text = (
|
||
|
|
"第一章 总则\n"
|
||
|
|
"本制度适用于员工差旅报销和审批管理。\n\n"
|
||
|
|
"第二章 住宿费标准\n"
|
||
|
|
"住宿费按照出差城市档位和职级标准执行。\n\n"
|
||
|
|
"第三章 交通费标准\n"
|
||
|
|
"交通费应结合出差工具、舱位和审批要求报销。\n"
|
||
|
|
)
|
||
|
|
with session_factory() as db:
|
||
|
|
service = KnowledgeNormalizationService(db)
|
||
|
|
enriched = service.build_enriched_text(raw_text)
|
||
|
|
|
||
|
|
assert enriched.startswith("# 章节导航")
|
||
|
|
assert "- 第一章 总则" in enriched
|
||
|
|
assert "## 第二章 住宿费标准" in enriched
|
||
|
|
assert "# 问答线索补充" in enriched
|
||
|
|
assert "- 第二章 住宿费标准:住宿费按照出差城市档位和职级标准执行" in enriched
|
||
|
|
assert enriched.endswith(raw_text.strip())
|
||
|
|
|
||
|
|
|
||
|
|
def test_knowledge_normalizer_builds_answer_clues_from_lists_and_kv_lines() -> None:
|
||
|
|
session_factory = build_session_factory()
|
||
|
|
raw_text = (
|
||
|
|
"第一章 报销要求\n"
|
||
|
|
"报销时限:费用发生后 30 日内提交申请。\n"
|
||
|
|
"- 超过 30 日需补充审批说明。\n"
|
||
|
|
"第十条 发票遗失的,应先提交遗失说明。\n"
|
||
|
|
)
|
||
|
|
|
||
|
|
with session_factory() as db:
|
||
|
|
service = KnowledgeNormalizationService(db)
|
||
|
|
enriched = service.build_enriched_text(raw_text)
|
||
|
|
|
||
|
|
assert "# 问答线索补充" in enriched
|
||
|
|
assert "- 第一章 报销要求:报销时限:费用发生后 30 日内提交申请" in enriched
|
||
|
|
assert "- 第一章 报销要求:超过 30 日需补充审批说明" in enriched
|
||
|
|
assert "- 第一章 报销要求:第十条 发票遗失的,应先提交遗失说明" in enriched
|
||
|
|
|
||
|
|
|
||
|
|
def test_knowledge_normalizer_builds_answer_clues_without_section_headings() -> None:
|
||
|
|
session_factory = build_session_factory()
|
||
|
|
raw_text = (
|
||
|
|
"报销时限:费用发生后 30 日内提交申请。\n"
|
||
|
|
"超过 30 日需补充审批说明。\n"
|
||
|
|
"审批材料包括发票、行程单和付款凭证。\n"
|
||
|
|
)
|
||
|
|
|
||
|
|
with session_factory() as db:
|
||
|
|
service = KnowledgeNormalizationService(db)
|
||
|
|
enriched = service.build_enriched_text(raw_text)
|
||
|
|
|
||
|
|
assert "# 问答线索补充" in enriched
|
||
|
|
assert "- 正文:报销时限:费用发生后 30 日内提交申请" in enriched
|
||
|
|
assert "- 正文:超过 30 日需补充审批说明" in enriched
|