feat: 完善差旅票据行程提取与费用明细回填逻辑

增强文档智能识别的票据场景关键词和字段提取能力,优化
会话关联草稿报销单的解析路径,修复费用明细合并和票据
去重边界问题,前端改进报销创建和审批详情交互,补充单
元测试覆盖。
This commit is contained in:
caoxiaozhu
2026-05-21 14:24:51 +08:00
parent b183b0bd5e
commit f28d7e6d16
24 changed files with 1565 additions and 433 deletions

View File

@@ -72,6 +72,24 @@ def test_document_intelligence_prefers_train_ticket_for_railway_e_ticket_invoice
assert any(field.label == "金额" and field.value == "354元" for field in insight.fields)
def test_document_intelligence_labels_train_ticket_date_as_train_departure_time() -> None:
insight = build_document_insight(
filename="铁路电子客票.pdf",
summary="铁路电子客票",
text=(
"中国铁路电子客票 开票日期 2026-02-18 "
"G456 上海虹桥-武汉 2026-02-20 08:30开 票价:¥354.00"
),
)
assert insight.document_type == "train_ticket"
assert any(
field.key == "date" and field.label == "列车出发时间" and field.value == "2026-02-20 08:30"
for field in insight.fields
)
assert not any(field.label == "开票日期" for field in insight.fields)
def test_document_intelligence_service_keeps_rule_fields_without_model_correction() -> None:
engine = create_engine(
"sqlite+pysqlite:///:memory:",

View File

@@ -207,6 +207,89 @@ def test_upsert_draft_from_ontology_defers_multi_document_association_choice() -
assert existing_claim.items[0].invoice_id == "old-trip.png"
def test_linked_document_supplement_keeps_existing_claim_expense_type() -> None:
user_id = "type-lock@example.com"
with build_session() as db:
employee = Employee(
employee_no="E5010",
name="类型锁定员工",
email=user_id,
)
db.add(employee)
db.flush()
existing_claim = ExpenseClaim(
claim_no="EXP-202605-020",
employee_id=employee.id,
employee_name="类型锁定员工",
department_name="市场部",
project_code=None,
expense_type="transport",
reason="原有交通报销",
location="深圳",
amount=Decimal("32.00"),
currency="CNY",
invoice_count=1,
occurred_at=datetime(2026, 5, 13, tzinfo=UTC),
status="draft",
approval_stage="待提交",
risk_flags_json=[],
)
existing_claim.items = [
ExpenseClaimItem(
claim_id=existing_claim.id,
item_date=date(2026, 5, 13),
item_type="transport",
item_reason="原有交通报销",
item_location="深圳",
item_amount=Decimal("32.00"),
invoice_id="old-trip.png",
)
]
db.add(existing_claim)
db.commit()
context_json = {
"name": "类型锁定员工",
"review_action": "link_to_existing_draft",
"draft_claim_id": existing_claim.id,
"attachment_names": ["hotel-invoice.pdf"],
"attachment_count": 1,
"ocr_documents": [
{
"filename": "hotel-invoice.pdf",
"document_type": "hotel_invoice",
"scene_code": "hotel",
"scene_label": "住宿票据",
"summary": "酒店住宿 发票金额 300 元",
"text": "酒店住宿 发票金额 ¥300.00",
"document_fields": [
{"key": "amount", "label": "金额", "value": "300"},
{"key": "merchant", "label": "酒店名称", "value": "上海酒店"},
],
}
],
}
ontology = SemanticOntologyService(db).parse(
OntologyParseRequest(
query="把酒店发票补充到现有草稿",
user_id=user_id,
context_json=context_json,
)
)
ExpenseClaimService(db).upsert_draft_from_ontology(
run_id=ontology.run_id,
user_id=user_id,
message="把酒店发票补充到现有草稿",
ontology=ontology,
context_json=context_json,
)
db.refresh(existing_claim)
assert existing_claim.expense_type == "transport"
assert any(item.item_type == "hotel_ticket" for item in existing_claim.items)
def test_upsert_draft_from_ontology_keeps_reason_missing_for_attachment_only_upload() -> None:
user_id = "wangwu@example.com"
@@ -471,7 +554,7 @@ def test_upsert_travel_draft_uses_ticket_item_types_and_auto_allowance() -> None
train_item = next(item for item in claim.items if item.item_type == "train_ticket")
allowance_item = next(item for item in claim.items if item.item_type == "travel_allowance")
assert train_item.item_amount == Decimal("354.00")
assert train_item.item_reason == "广州南北京南"
assert train_item.item_reason == "广州南-北京南"
assert allowance_item.item_amount == Decimal("300.00")
assert allowance_item.invoice_id is None
assert allowance_item.is_system_generated is True
@@ -864,8 +947,8 @@ def test_upload_train_ticket_attachment_backfills_item_amount(monkeypatch, tmp_p
OcrRecognizeDocumentRead(
filename="train-ticket.png",
media_type="image/png",
text="中国铁路电子客票 广州南-北京南 二等座 票价:¥354.00",
summary="铁路电子客票,票价 354 元。",
text="中国铁路电子客票 广州南-北京南 二等座 2026-02-20 08:30开 票价:¥354.00",
summary="铁路电子客票,2026-02-20 08:30 广州南至北京南,票价 354 元。",
avg_score=0.98,
line_count=1,
page_count=1,
@@ -874,6 +957,8 @@ def test_upload_train_ticket_attachment_backfills_item_amount(monkeypatch, tmp_p
scene_code="travel",
scene_label="差旅费",
document_fields=[
{"key": "invoice_date", "label": "开票日期", "value": "2026-02-18"},
{"key": "trip_date", "label": "行程日期", "value": "2026-02-20 08:30"},
{"key": "fare", "label": "票价", "value": "¥354.00"},
],
)
@@ -908,7 +993,8 @@ def test_upload_train_ticket_attachment_backfills_item_amount(monkeypatch, tmp_p
db.refresh(claim)
assert claim.items[0].item_amount == Decimal("354.00")
assert claim.items[0].item_type == "train_ticket"
assert claim.items[0].item_reason == "从广州南到北京南"
assert claim.items[0].item_date == date(2026, 2, 20)
assert claim.items[0].item_reason == "广州南-北京南"
assert claim.amount == Decimal("354.00")
uploaded_meta = service.get_claim_item_attachment_meta(
claim_id=claim.id,
@@ -917,10 +1003,97 @@ def test_upload_train_ticket_attachment_backfills_item_amount(monkeypatch, tmp_p
)
assert uploaded_meta is not None
assert uploaded_meta["document_info"]["document_type"] == "train_ticket"
assert any(
field["label"] == "列车出发时间" and field["value"] == "2026-02-20 08:30"
for field in uploaded_meta["document_info"]["fields"]
)
assert any(
field["label"] == "开票日期" and field["value"] == "2026-02-18"
for field in uploaded_meta["document_info"]["fields"]
)
assert any(
field["label"] == "票价" and field["value"] == "¥354.00"
for field in uploaded_meta["document_info"]["fields"]
)
assert not any("用途字段" in point for point in uploaded_meta["analysis"]["points"])
def test_attachment_analysis_does_not_compare_business_purpose_with_ticket_scene() -> None:
with build_session() as db:
claim = build_claim(expense_type="travel", location="上海")
claim.items[0].item_type = "train_ticket"
claim.items[0].item_reason = "2026-02-20 至 2026-02-23支撑上海电力项目部署"
claim.items[0].item_amount = Decimal("354.00")
db.add(claim)
db.commit()
document = OcrRecognizeDocumentRead(
filename="train-ticket.png",
media_type="image/png",
text="中国铁路电子客票 上海虹桥-武汉 二等座 2026-02-20 票价:¥354.00",
summary="铁路电子客票,上海虹桥至武汉,票价 354 元。",
avg_score=0.98,
line_count=1,
page_count=1,
document_type="train_ticket",
document_type_label="火车/高铁票",
scene_code="travel",
scene_label="差旅票据",
document_fields=[
{"key": "amount", "label": "票价", "value": "¥354.00"},
{"key": "date", "label": "日期", "value": "2026-02-20"},
{"key": "route", "label": "行程", "value": "上海虹桥-武汉"},
],
)
analysis = ExpenseClaimService(db)._build_attachment_analysis(
document=document,
item=claim.items[0],
)
assert analysis["severity"] == "medium"
assert not any("用途字段" in point for point in analysis["points"])
assert any("行程说明" in point and "始发地-目的地" in point for point in analysis["points"])
def test_attachment_risk_flag_message_uses_specific_points(monkeypatch, tmp_path) -> None:
with build_session() as db:
claim = build_claim(expense_type="travel", location="上海")
claim.items[0].invoice_id = "invoice.png"
db.add(claim)
db.commit()
generic_summary = "当前附件可见部分内容,但金额、用途、日期或附件类型仍有缺失或不一致。"
file_path = tmp_path / "invoice.png"
file_path.write_bytes(b"fake")
service = ExpenseClaimService(db)
monkeypatch.setattr(service, "_resolve_attachment_path", lambda storage_key: file_path)
monkeypatch.setattr(
service,
"_read_attachment_meta",
lambda path: {
"analysis": {
"severity": "medium",
"label": "中风险",
"summary": generic_summary,
"points": [
"日期字段:未识别到开票日期或业务发生日期。",
"金额字段:附件识别金额 300.00 元与报销金额 88.00 元不一致。",
],
}
},
)
flags = service._build_claim_attachment_risk_flags([claim.items[0]])
assert len(flags) == 1
assert "日期字段:未识别到开票日期或业务发生日期。" in flags[0]["message"]
assert "当前附件可见部分内容" not in flags[0]["message"]
assert flags[0]["summary"] == generic_summary
assert flags[0]["points"] == [
"日期字段:未识别到开票日期或业务发生日期。",
"金额字段:附件识别金额 300.00 元与报销金额 88.00 元不一致。",
]
def test_upload_ride_receipt_backfills_item_reason_from_addresses(monkeypatch, tmp_path) -> None:
@@ -987,7 +1160,7 @@ def test_upload_ride_receipt_backfills_item_reason_from_addresses(monkeypatch, t
assert updated is not None
db.refresh(claim)
assert claim.items[0].item_type == "ride_ticket"
assert claim.items[0].item_reason == "深圳北站腾讯滨海大厦"
assert claim.items[0].item_reason == "深圳北站-腾讯滨海大厦"
assert claim.items[0].item_amount == Decimal("42.00")
assert claim.amount == Decimal("42.00")

View File

@@ -178,3 +178,42 @@ def test_review_next_step_blocked_returns_reasons_and_removes_next_step_action(
"所属部门未完善" in str(item.get("content") or "")
for item in review_payload["risk_briefs"]
)
def test_conversation_hydration_does_not_reuse_review_type_for_fresh_expense_prompt() -> None:
session_factory = build_session_factory()
with session_factory() as db:
service = AgentConversationService(db)
conversation = service.get_or_create_conversation(
conversation_id="conv-review-type-lock",
user_id="emp-review-type@example.com",
source="user_message",
context_json={
"session_type": "expense",
"draft_claim_id": "claim-old",
"attachment_names": ["old-train-ticket.pdf"],
"attachment_count": 1,
"review_form_values": {
"expense_type": "差旅费",
"business_location": "北京",
},
},
)
fresh_context = service.hydrate_context_json(
conversation=conversation,
context_json={},
message="业务发生时间:2026-02-20 至 2026-02-23去上海支持上海电力部署项目申请报销",
)
continued_context = service.hydrate_context_json(
conversation=conversation,
context_json={},
message="继续补充酒店发票",
)
assert "draft_claim_id" not in fresh_context
assert "attachment_names" not in fresh_context
assert "review_form_values" not in fresh_context
assert fresh_context["conversation_state"]["review_form_values"]["expense_type"] == "差旅费"
assert continued_context["draft_claim_id"] == "claim-old"
assert continued_context["review_form_values"]["expense_type"] == "差旅费"

View File

@@ -477,9 +477,9 @@ def test_user_agent_model_prompt_supports_contextual_personalization() -> None:
assert '"user_grade": "P5"' in user_prompt
def test_user_agent_guides_generic_expense_request() -> None:
session_factory = build_session_factory()
with session_factory() as db:
def test_user_agent_guides_generic_expense_request() -> None:
session_factory = build_session_factory()
with session_factory() as db:
ontology = SemanticOntologyService(db).parse(
OntologyParseRequest(
query="我要报销",
@@ -506,16 +506,61 @@ def test_user_agent_guides_generic_expense_request() -> None:
"事由说明",
"票据附件",
]
assert [item.action_type for item in response.review_payload.confirmation_actions] == [
"cancel_review",
"edit_review",
"save_draft",
]
def test_user_agent_guides_implicit_expense_draft_request() -> None:
session_factory = build_session_factory()
with session_factory() as db:
assert [item.action_type for item in response.review_payload.confirmation_actions] == [
"cancel_review",
"edit_review",
]
edit_action = next(
item for item in response.review_payload.confirmation_actions if item.action_type == "edit_review"
)
assert edit_action.label == "选择报销类型"
assert edit_action.emphasis == "primary"
def test_user_agent_asks_for_type_when_trip_context_is_ambiguous() -> None:
session_factory = build_session_factory()
with session_factory() as db:
message = "业务发生时间:2026-02-20 至 2026-02-23去上海支持上海电力部署项目申请报销"
ontology = SemanticOntologyService(db).parse(
OntologyParseRequest(
query=message,
user_id="pytest-ambiguous-type@example.com",
)
)
response = UserAgentService(db).respond(
UserAgentRequest(
run_id=ontology.run_id,
user_id="pytest-ambiguous-type@example.com",
message=message,
ontology=ontology,
tool_payload={"draft_only": True},
)
)
assert response.review_payload is not None
slot_map = {item.key: item for item in response.review_payload.slot_cards}
assert slot_map["expense_type"].value == ""
assert slot_map["expense_type"].status == "missing"
assert slot_map["time_range"].value == "2026-02-20 至 2026-02-23"
assert slot_map["location"].value == "上海"
assert response.review_payload.can_proceed is False
assert "报销类型" in response.review_payload.missing_slots
assert "选择报销类型" in response.review_payload.body_message
assert "不会重新改判报销类型" in response.review_payload.body_message
edit_action = next(
item for item in response.review_payload.confirmation_actions if item.action_type == "edit_review"
)
assert edit_action.label == "选择报销类型"
assert edit_action.emphasis == "primary"
assert [item.action_type for item in response.review_payload.confirmation_actions] == [
"cancel_review",
"edit_review",
]
def test_user_agent_guides_implicit_expense_draft_request() -> None:
session_factory = build_session_factory()
with session_factory() as db:
today = datetime.now(UTC).date().isoformat()
ontology = SemanticOntologyService(db).parse(
OntologyParseRequest(
@@ -611,6 +656,126 @@ def test_user_agent_guides_riding_fare_as_transport_expense() -> None:
assert "“交通费”" in response.review_payload.intent_summary
def test_user_agent_keeps_travel_range_when_user_adds_receipts_after_text_context() -> None:
session_factory = build_session_factory()
with session_factory() as db:
message = "业务发生时间:2026-02-20 至 2026-02-23去上海支撑上海电力 服务器部署出差3天"
ontology = SemanticOntologyService(db).parse(
OntologyParseRequest(
query=message,
user_id="pytest-travel-range@example.com",
)
)
initial_response = UserAgentService(db).respond(
UserAgentRequest(
run_id=ontology.run_id,
user_id="pytest-travel-range@example.com",
message=message,
ontology=ontology,
tool_payload={"draft_only": True},
)
)
assert initial_response.review_payload is not None
initial_slots = {item.key: item for item in initial_response.review_payload.slot_cards}
assert initial_slots["expense_type"].normalized_value == "travel"
assert initial_slots["time_range"].value == "2026-02-20 至 2026-02-23"
assert initial_slots["location"].value == "上海"
assert "业务发生时间" not in initial_slots["reason"].raw_value
assert not initial_slots["reason"].value.startswith("至 2026-02-23")
followup_context = {
"name": "张三",
"grade": "P4",
"review_action": "link_to_existing_draft",
"review_form_values": {
"expense_type": "差旅费",
"occurred_date": "2026-02-20",
"time_range": "2026-02-20 至 2026-02-23",
"business_time": "2026-02-20 至 2026-02-23",
"business_location": "上海",
"reason": "去上海支撑上海电力服务器部署出差3天",
},
"business_time_context": {
"mode": "range",
"start_date": "2026-02-20",
"end_date": "2026-02-23",
"display_value": "2026-02-20 至 2026-02-23",
},
"attachment_names": ["2月20_武汉-上海.pdf", "2月23_上海-武汉.pdf", "上海酒店发票.pdf"],
"attachment_count": 3,
"ocr_documents": [
{
"filename": "2月20_武汉-上海.pdf",
"document_type": "train_ticket",
"scene_code": "travel",
"scene_label": "差旅票据",
"summary": "铁路电子客票 2026-02-20 武汉-上海 二等座 票价 354 元",
"text": "铁路电子客票 2026-02-20 武汉-上海 二等座 票价 ¥354.00",
"avg_score": 0.95,
"document_fields": [
{"key": "amount", "label": "票价", "value": "354"},
{"key": "route", "label": "行程", "value": "武汉-上海"},
{"key": "date", "label": "日期", "value": "2026-02-20"},
],
"warnings": [],
},
{
"filename": "2月23_上海-武汉.pdf",
"document_type": "train_ticket",
"scene_code": "travel",
"scene_label": "差旅票据",
"summary": "铁路电子客票 2026-02-23 上海-武汉 二等座 票价 354 元",
"text": "铁路电子客票 2026-02-23 上海-武汉 二等座 票价 ¥354.00",
"avg_score": 0.95,
"document_fields": [
{"key": "amount", "label": "票价", "value": "354"},
{"key": "route", "label": "行程", "value": "上海-武汉"},
{"key": "date", "label": "日期", "value": "2026-02-23"},
],
"warnings": [],
},
{
"filename": "上海酒店发票.pdf",
"document_type": "hotel_invoice",
"summary": "上海酒店 住宿 3 晚 金额 1200 元",
"text": "上海酒店 住宿 3 晚 金额 1200 元",
"avg_score": 0.96,
"document_fields": [
{"key": "amount", "label": "金额", "value": "1200"},
{"key": "merchant", "label": "酒店名称", "value": "上海酒店"},
],
"warnings": [],
},
],
}
followup_ontology = SemanticOntologyService(db).parse(
OntologyParseRequest(
query="请把当前上传的票据合并到现有报销草稿中。",
user_id="pytest-travel-range@example.com",
context_json=followup_context,
)
)
followup_response = UserAgentService(db).respond(
UserAgentRequest(
run_id=followup_ontology.run_id,
user_id="pytest-travel-range@example.com",
message="请把当前上传的票据合并到现有报销草稿中。",
ontology=followup_ontology,
context_json=followup_context,
tool_payload={"draft_only": True},
)
)
assert followup_response.review_payload is not None
followup_slots = {item.key: item for item in followup_response.review_payload.slot_cards}
assert followup_slots["expense_type"].value == "差旅费"
assert followup_slots["expense_type"].normalized_value == "travel"
assert followup_slots["time_range"].value == "2026-02-20 至 2026-02-23"
assert followup_slots["location"].value == "上海"
assert followup_slots["reason"].value == "去上海支撑上海电力服务器部署出差3天"
def test_user_agent_does_not_treat_draft_saved_message_as_precheck_risk_for_transport() -> None:
session_factory = build_session_factory()
with session_factory() as db:
@@ -1384,6 +1549,7 @@ def test_user_agent_review_payload_does_not_fill_hotel_name_from_train_ticket()
for field in card.fields
]
assert "商户/酒店" not in field_labels
assert "列车出发时间" in field_labels
def test_user_agent_review_payload_allows_next_step_when_only_optional_ride_receipt_is_missing() -> None: