from __future__ import annotations from sqlalchemy import create_engine from sqlalchemy.orm import Session, sessionmaker from sqlalchemy.pool import StaticPool from app.db.base import Base from app.services.knowledge_normalizer import KnowledgeNormalizationService def build_session_factory() -> sessionmaker[Session]: engine = create_engine( "sqlite+pysqlite:///:memory:", connect_args={"check_same_thread": False}, poolclass=StaticPool, ) Base.metadata.create_all(bind=engine) return sessionmaker(bind=engine, autoflush=False, autocommit=False) def test_knowledge_normalizer_appends_structured_table(monkeypatch) -> None: session_factory = build_session_factory() raw_text = ( "表3 出差补贴标准\n\n" "单位:人民币元/天\n" "补助类型 项目 港澳台 直辖市/特区/西藏 其他地区 国外\n" "餐补 自行解决餐食 75 65 55 140\n" "基本出差补贴 35 35 35 35\n" "合计 110 100 90 175\n" ) with session_factory() as db: service = KnowledgeNormalizationService(db) monkeypatch.setattr( service.runtime_chat_service, "complete", lambda *args, **kwargs: ( "| 补助类型 | 港澳台 | 直辖市/特区/西藏 | 其他地区 | 国外 |\n" "|---|---:|---:|---:|---:|\n" "| 餐补 | 75 | 65 | 55 | 140 |\n" "| 基本出差补贴 | 35 | 35 | 35 | 35 |\n" "| 合计 | 110 | 100 | 90 | 175 |" ), ) enriched = service.build_enriched_text(raw_text) assert enriched.startswith(raw_text.strip()) assert "| 餐补 | 75 | 65 | 55 | 140 |" in enriched assert enriched.endswith("| 合计 | 110 | 100 | 90 | 175 |") def test_knowledge_normalizer_keeps_only_markdown_table_body() -> None: cleaned = KnowledgeNormalizationService._sanitize_answer( "## 表3 出差补贴标准\n\n" "| 补助类型 | 港澳台 | 直辖市/特区/西藏 |\n" "|---|---:|---:|\n" "| 餐补 | 75 | 65 |\n\n" "注:主办方统一安排餐食时,不再报销餐补。" ) assert cleaned == ( "| 补助类型 | 港澳台 | 直辖市/特区/西藏 |\n" "|---|---:|---:|\n" "| 餐补 | 75 | 65 |" ) def test_knowledge_normalizer_builds_section_navigation_without_table() -> None: session_factory = build_session_factory() raw_text = ( "第一章 总则\n" "本制度适用于员工差旅报销和审批管理。\n\n" "第二章 住宿费标准\n" "住宿费按照出差城市档位和职级标准执行。\n\n" "第三章 交通费标准\n" "交通费应结合出差工具、舱位和审批要求报销。\n" ) with session_factory() as db: service = KnowledgeNormalizationService(db) enriched = service.build_enriched_text(raw_text) assert enriched.startswith(raw_text.strip()) assert "- 第一章 总则" in enriched assert "## 第二章 住宿费标准" in enriched assert "# 问答线索补充" in enriched assert "- 第二章 住宿费标准:住宿费按照出差城市档位和职级标准执行" in enriched assert "# 章节导航" in enriched def test_knowledge_normalizer_builds_answer_clues_from_lists_and_kv_lines() -> None: session_factory = build_session_factory() raw_text = ( "第一章 报销要求\n" "报销时限:费用发生后 30 日内提交申请。\n" "- 超过 30 日需补充审批说明。\n" "第十条 发票遗失的,应先提交遗失说明。\n" ) with session_factory() as db: service = KnowledgeNormalizationService(db) enriched = service.build_enriched_text(raw_text) assert "# 问答线索补充" in enriched assert "- 第一章 报销要求:报销时限:费用发生后 30 日内提交申请" in enriched assert "- 第一章 报销要求:超过 30 日需补充审批说明" in enriched assert "- 第一章 报销要求:第十条 发票遗失的,应先提交遗失说明" in enriched def test_knowledge_normalizer_builds_answer_clues_without_section_headings() -> None: session_factory = build_session_factory() raw_text = ( "报销时限:费用发生后 30 日内提交申请。\n" "超过 30 日需补充审批说明。\n" "审批材料包括发票、行程单和付款凭证。\n" ) with session_factory() as db: service = KnowledgeNormalizationService(db) enriched = service.build_enriched_text(raw_text) assert "# 问答线索补充" in enriched assert "- 正文:报销时限:费用发生后 30 日内提交申请" in enriched assert "- 正文:超过 30 日需补充审批说明" in enriched