feat: 增强规则资产管理与审计页面运行时调试
后端新增规则资产版本管理和规则文件 CRUD 接口,优化风险 规则生成模板执行和员工数据模型字段,知识库 RAG 增强本 地回退和文档提取能力,清理旧风险规则文件统一由生成引擎 管理,前端审计页面增加运行时调试面板和规则资产编辑交互, 补充单元测试覆盖。
This commit is contained in:
@@ -1,32 +0,0 @@
|
||||
{
|
||||
"schema_version": "1.0",
|
||||
"rule_code": "risk.expense.consecutive_transport_receipts",
|
||||
"name": "连号交通票据",
|
||||
"enabled": true,
|
||||
"risk_dimension": "consecutive_receipts",
|
||||
"ontology_signal": "consecutive_transport_receipts",
|
||||
"evaluator": "consecutive_transport_receipts",
|
||||
"applies_to": {
|
||||
"expense_types": ["transport", "travel"],
|
||||
"min_attachments": 2
|
||||
},
|
||||
"inputs": {
|
||||
"invoice_no": "attachment.invoice_no"
|
||||
},
|
||||
"params": {
|
||||
"min_consecutive_count": 3
|
||||
},
|
||||
"outcomes": {
|
||||
"pass": { "severity": "none", "action": "continue" },
|
||||
"fail": {
|
||||
"severity": "medium",
|
||||
"action": "manual_review"
|
||||
}
|
||||
},
|
||||
"metadata": {
|
||||
"owner": "风控与审计部",
|
||||
"stability": "platform_builtin",
|
||||
"source_ref": "常用risk.txt / 三、车辆交通 / 连号票集中报销",
|
||||
"updated_at": "2026-05-19"
|
||||
}
|
||||
}
|
||||
@@ -1,29 +0,0 @@
|
||||
{
|
||||
"schema_version": "1.0",
|
||||
"rule_code": "risk.expense.entertainment_missing_detail",
|
||||
"name": "招待费事由不完整",
|
||||
"enabled": true,
|
||||
"risk_dimension": "entertainment_detail",
|
||||
"ontology_signal": "entertainment_missing_detail",
|
||||
"evaluator": "entertainment_reason_missing",
|
||||
"applies_to": {
|
||||
"domains": ["meal"]
|
||||
},
|
||||
"inputs": {
|
||||
"reason": "claim.reason_corpus"
|
||||
},
|
||||
"params": {},
|
||||
"outcomes": {
|
||||
"pass": { "severity": "none", "action": "continue" },
|
||||
"fail": {
|
||||
"severity": "medium",
|
||||
"action": "warn"
|
||||
}
|
||||
},
|
||||
"metadata": {
|
||||
"owner": "风控与审计部",
|
||||
"stability": "platform_builtin",
|
||||
"source_ref": "常用risk.txt / 三、餐费招待 / 业务招待无事由对象",
|
||||
"updated_at": "2026-05-19"
|
||||
}
|
||||
}
|
||||
@@ -1,90 +0,0 @@
|
||||
{
|
||||
"schema_version": "2.0",
|
||||
"rule_code": "risk.expense.generated_20260523010818",
|
||||
"name": "住宿城市必须出现在本次差旅行程城市中风险规则",
|
||||
"description": "当报销业务满足“住宿城市必须出现在本次差旅行程城市中,如果酒店发票城市与申报目的地或交通票行程城市都不一致,则判定为高风险,并要求补充差旅说明。”时,系统会按高风险进行提示,并要求经办人或审核人补充核对依据。",
|
||||
"enabled": true,
|
||||
"risk_dimension": "natural_language_rule",
|
||||
"risk_category": "报销",
|
||||
"ontology_signal": "natural_language_risk",
|
||||
"evaluator": "template_rule",
|
||||
"template_key": "field_compare_v1",
|
||||
"applies_to": {
|
||||
"domains": [
|
||||
"expense"
|
||||
]
|
||||
},
|
||||
"inputs": {
|
||||
"fields": [
|
||||
{
|
||||
"key": "claim.reason",
|
||||
"label": "报销事由",
|
||||
"type": "text",
|
||||
"source": "claim"
|
||||
},
|
||||
{
|
||||
"key": "claim.location",
|
||||
"label": "申报地点",
|
||||
"type": "text",
|
||||
"source": "claim"
|
||||
},
|
||||
{
|
||||
"key": "attachment.hotel_city",
|
||||
"label": "住宿城市",
|
||||
"type": "text",
|
||||
"source": "attachment"
|
||||
},
|
||||
{
|
||||
"key": "attachment.route_cities",
|
||||
"label": "行程城市",
|
||||
"type": "list",
|
||||
"source": "attachment"
|
||||
}
|
||||
]
|
||||
},
|
||||
"params": {
|
||||
"template_key": "field_compare_v1",
|
||||
"field_keys": [
|
||||
"claim.reason",
|
||||
"claim.location",
|
||||
"attachment.hotel_city",
|
||||
"attachment.route_cities"
|
||||
],
|
||||
"condition_summary": "对比报销事由、申报地点、住宿城市之间是否一致或存在交集",
|
||||
"natural_language": "住宿城市必须出现在本次差旅行程城市中,如果酒店发票城市与申报目的地或交通票行程城市都不一致,则判定为高风险,并要求补充差旅说明。",
|
||||
"conditions": [
|
||||
{
|
||||
"left": "claim.reason",
|
||||
"operator": "overlap",
|
||||
"right": "claim.location"
|
||||
}
|
||||
]
|
||||
},
|
||||
"outcomes": {
|
||||
"pass": {
|
||||
"severity": "none",
|
||||
"action": "continue"
|
||||
},
|
||||
"fail": {
|
||||
"severity": "high",
|
||||
"action": "manual_review"
|
||||
}
|
||||
},
|
||||
"metadata": {
|
||||
"owner": "WangMin",
|
||||
"stability": "generated_draft",
|
||||
"source_ref": "自然语言风险规则",
|
||||
"created_at": "2026-05-23T01:08:18.310751+00:00",
|
||||
"created_by": "WangMin",
|
||||
"natural_language": "住宿城市必须出现在本次差旅行程城市中,如果酒店发票城市与申报目的地或交通票行程城市都不一致,则判定为高风险,并要求补充差旅说明。",
|
||||
"business_explanation": "当报销业务满足“住宿城市必须出现在本次差旅行程城市中,如果酒店发票城市与申报目的地或交通票行程城市都不一致,则判定为高风险,并要求补充差旅说明。”时,系统会按高风险进行提示,并要求经办人或审核人补充核对依据。",
|
||||
"condition_summary": "对比报销事由、申报地点、住宿城市之间是否一致或存在交集",
|
||||
"flow": {
|
||||
"start": "报销单据提交",
|
||||
"evidence": "读取报销事由、申报地点、住宿城市",
|
||||
"decision": "对比报销事由、申报地点、住宿城市之间是否一致或存在交集",
|
||||
"pass": "未命中风险,继续业务流转",
|
||||
"fail": "命中高风险,提示复核"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,90 +0,0 @@
|
||||
{
|
||||
"schema_version": "2.0",
|
||||
"rule_code": "risk.expense.generated_20260523010846",
|
||||
"name": "酒店发票城市必须与申报目的地或交通票风险规则",
|
||||
"description": "当报销业务满足“酒店发票城市必须与申报目的地或交通票行程城市一致,如果都不一致,则判定为高风险,并要求报销人补充异常行程说明。”时,系统会按高风险进行提示,并要求经办人或审核人补充核对依据。",
|
||||
"enabled": true,
|
||||
"risk_dimension": "natural_language_rule",
|
||||
"risk_category": "报销",
|
||||
"ontology_signal": "natural_language_risk",
|
||||
"evaluator": "template_rule",
|
||||
"template_key": "field_compare_v1",
|
||||
"applies_to": {
|
||||
"domains": [
|
||||
"expense"
|
||||
]
|
||||
},
|
||||
"inputs": {
|
||||
"fields": [
|
||||
{
|
||||
"key": "claim.reason",
|
||||
"label": "报销事由",
|
||||
"type": "text",
|
||||
"source": "claim"
|
||||
},
|
||||
{
|
||||
"key": "claim.location",
|
||||
"label": "申报地点",
|
||||
"type": "text",
|
||||
"source": "claim"
|
||||
},
|
||||
{
|
||||
"key": "claim.employee_name",
|
||||
"label": "报销人",
|
||||
"type": "text",
|
||||
"source": "claim"
|
||||
},
|
||||
{
|
||||
"key": "attachment.route_cities",
|
||||
"label": "行程城市",
|
||||
"type": "list",
|
||||
"source": "attachment"
|
||||
}
|
||||
]
|
||||
},
|
||||
"params": {
|
||||
"template_key": "field_compare_v1",
|
||||
"field_keys": [
|
||||
"claim.reason",
|
||||
"claim.location",
|
||||
"claim.employee_name",
|
||||
"attachment.route_cities"
|
||||
],
|
||||
"condition_summary": "对比报销事由、申报地点、报销人之间是否一致或存在交集",
|
||||
"natural_language": "酒店发票城市必须与申报目的地或交通票行程城市一致,如果都不一致,则判定为高风险,并要求报销人补充异常行程说明。",
|
||||
"conditions": [
|
||||
{
|
||||
"left": "claim.reason",
|
||||
"operator": "overlap",
|
||||
"right": "claim.location"
|
||||
}
|
||||
]
|
||||
},
|
||||
"outcomes": {
|
||||
"pass": {
|
||||
"severity": "none",
|
||||
"action": "continue"
|
||||
},
|
||||
"fail": {
|
||||
"severity": "high",
|
||||
"action": "manual_review"
|
||||
}
|
||||
},
|
||||
"metadata": {
|
||||
"owner": "min.wang@xfinance.com",
|
||||
"stability": "generated_draft",
|
||||
"source_ref": "自然语言风险规则",
|
||||
"created_at": "2026-05-23T01:08:46.286513+00:00",
|
||||
"created_by": "min.wang@xfinance.com",
|
||||
"natural_language": "酒店发票城市必须与申报目的地或交通票行程城市一致,如果都不一致,则判定为高风险,并要求报销人补充异常行程说明。",
|
||||
"business_explanation": "当报销业务满足“酒店发票城市必须与申报目的地或交通票行程城市一致,如果都不一致,则判定为高风险,并要求报销人补充异常行程说明。”时,系统会按高风险进行提示,并要求经办人或审核人补充核对依据。",
|
||||
"condition_summary": "对比报销事由、申报地点、报销人之间是否一致或存在交集",
|
||||
"flow": {
|
||||
"start": "报销单据提交",
|
||||
"evidence": "读取报销事由、申报地点、报销人",
|
||||
"decision": "对比报销事由、申报地点、报销人之间是否一致或存在交集",
|
||||
"pass": "未命中风险,继续业务流转",
|
||||
"fail": "命中高风险,提示复核"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,90 +0,0 @@
|
||||
{
|
||||
"schema_version": "2.0",
|
||||
"rule_code": "risk.expense.generated_20260523011139",
|
||||
"name": "酒店发票城市一致性校验",
|
||||
"description": "校验酒店发票城市是否与申报目的地或行程城市一致,不一致时标记为高风险并要求补充说明",
|
||||
"enabled": true,
|
||||
"risk_dimension": "natural_language_rule",
|
||||
"risk_category": "报销",
|
||||
"ontology_signal": "natural_language_risk",
|
||||
"evaluator": "template_rule",
|
||||
"template_key": "field_compare_v1",
|
||||
"applies_to": {
|
||||
"domains": [
|
||||
"expense"
|
||||
]
|
||||
},
|
||||
"inputs": {
|
||||
"fields": [
|
||||
{
|
||||
"key": "attachment.route_cities",
|
||||
"label": "行程城市",
|
||||
"type": "list",
|
||||
"source": "attachment"
|
||||
},
|
||||
{
|
||||
"key": "claim.location",
|
||||
"label": "申报地点",
|
||||
"type": "text",
|
||||
"source": "claim"
|
||||
},
|
||||
{
|
||||
"key": "attachment.hotel_city",
|
||||
"label": "住宿城市",
|
||||
"type": "text",
|
||||
"source": "attachment"
|
||||
},
|
||||
{
|
||||
"key": "claim.reason",
|
||||
"label": "报销事由",
|
||||
"type": "text",
|
||||
"source": "claim"
|
||||
}
|
||||
]
|
||||
},
|
||||
"params": {
|
||||
"template_key": "field_compare_v1",
|
||||
"field_keys": [
|
||||
"attachment.hotel_city",
|
||||
"claim.location",
|
||||
"attachment.route_cities",
|
||||
"claim.reason"
|
||||
],
|
||||
"condition_summary": "对比住宿城市、申报地点、行程城市之间是否一致或存在交集",
|
||||
"natural_language": "酒店发票城市必须与申报目的地或交通票行程城市一致,如果都不一致,则判定为高风险,并要求补充异常行程说明。",
|
||||
"conditions": [
|
||||
{
|
||||
"left": "attachment.hotel_city",
|
||||
"operator": "overlap",
|
||||
"right": "claim.location"
|
||||
}
|
||||
]
|
||||
},
|
||||
"outcomes": {
|
||||
"pass": {
|
||||
"severity": "none",
|
||||
"action": "continue"
|
||||
},
|
||||
"fail": {
|
||||
"severity": "high",
|
||||
"action": "manual_review"
|
||||
}
|
||||
},
|
||||
"metadata": {
|
||||
"owner": "WangMin",
|
||||
"stability": "generated_draft",
|
||||
"source_ref": "自然语言风险规则",
|
||||
"created_at": "2026-05-23T01:11:39.165281+00:00",
|
||||
"created_by": "WangMin",
|
||||
"natural_language": "酒店发票城市必须与申报目的地或交通票行程城市一致,如果都不一致,则判定为高风险,并要求补充异常行程说明。",
|
||||
"business_explanation": "校验酒店发票城市是否与申报目的地或行程城市一致,不一致时标记为高风险并要求补充说明",
|
||||
"condition_summary": "对比住宿城市、申报地点、行程城市之间是否一致或存在交集",
|
||||
"flow": {
|
||||
"start": "提交酒店发票",
|
||||
"evidence": "读取住宿城市、申报地点、行程城市",
|
||||
"decision": "对比住宿城市、申报地点、行程城市之间是否一致或存在交集",
|
||||
"pass": "继续流转",
|
||||
"fail": "提示高风险:酒店发票城市与申报目的地及行程城市均不一致,需补充异常行程说明"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
{
|
||||
"schema_version": "1.0",
|
||||
"rule_code": "risk.expense.meal_localized_as_travel",
|
||||
"name": "同城餐饮混入差旅",
|
||||
"enabled": true,
|
||||
"risk_dimension": "meal_travel_mix",
|
||||
"ontology_signal": "meal_as_travel",
|
||||
"evaluator": "meal_as_travel_same_city",
|
||||
"applies_to": {
|
||||
"domains": ["travel"]
|
||||
},
|
||||
"inputs": {
|
||||
"declared": "claim.location",
|
||||
"meal_city": "attachment.cities"
|
||||
},
|
||||
"params": {},
|
||||
"outcomes": {
|
||||
"pass": { "severity": "none", "action": "continue" },
|
||||
"fail": {
|
||||
"severity": "medium",
|
||||
"action": "warn"
|
||||
}
|
||||
},
|
||||
"metadata": {
|
||||
"owner": "风控与审计部",
|
||||
"stability": "platform_builtin",
|
||||
"source_ref": "常用risk.txt / 三、餐费招待 / 同城餐饮归集异地差旅",
|
||||
"updated_at": "2026-05-19"
|
||||
}
|
||||
}
|
||||
@@ -1,29 +0,0 @@
|
||||
{
|
||||
"schema_version": "1.0",
|
||||
"rule_code": "risk.expense.reason_too_brief",
|
||||
"name": "报销事由过短",
|
||||
"enabled": true,
|
||||
"risk_dimension": "reason_quality",
|
||||
"ontology_signal": "reason_too_brief",
|
||||
"evaluator": "reason_too_brief",
|
||||
"applies_to": {},
|
||||
"inputs": {
|
||||
"reason": "claim.reason_corpus"
|
||||
},
|
||||
"params": {
|
||||
"min_reason_length": 6
|
||||
},
|
||||
"outcomes": {
|
||||
"pass": { "severity": "none", "action": "continue" },
|
||||
"fail": {
|
||||
"severity": "medium",
|
||||
"action": "warn"
|
||||
}
|
||||
},
|
||||
"metadata": {
|
||||
"owner": "风控与审计部",
|
||||
"stability": "platform_builtin",
|
||||
"source_ref": "常用risk.txt / 通用 / 事由不足以支撑真实性判断",
|
||||
"updated_at": "2026-05-19"
|
||||
}
|
||||
}
|
||||
@@ -1,32 +0,0 @@
|
||||
{
|
||||
"schema_version": "1.0",
|
||||
"rule_code": "risk.invoice.claimant_buyer_name_match",
|
||||
"name": "报销人与发票抬头一致",
|
||||
"enabled": true,
|
||||
"risk_dimension": "identity_consistency",
|
||||
"ontology_signal": "buyer_name_mismatch",
|
||||
"evaluator": "identity_consistency",
|
||||
"applies_to": {
|
||||
"min_attachments": 1
|
||||
},
|
||||
"inputs": {
|
||||
"claimant": "claim.employee_name",
|
||||
"buyer": "attachment.buyer_name"
|
||||
},
|
||||
"params": {
|
||||
"allow_keywords": ["代报", "集团", "公司", "有限公司"]
|
||||
},
|
||||
"outcomes": {
|
||||
"pass": { "severity": "none", "action": "continue" },
|
||||
"fail": {
|
||||
"severity": "high",
|
||||
"action": "manual_review"
|
||||
}
|
||||
},
|
||||
"metadata": {
|
||||
"owner": "风控与审计部",
|
||||
"stability": "platform_builtin",
|
||||
"source_ref": "常用risk.txt / 二、发票类 / 抬头错误",
|
||||
"updated_at": "2026-05-19"
|
||||
}
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
{
|
||||
"schema_version": "1.0",
|
||||
"rule_code": "risk.invoice.cross_year_invoice",
|
||||
"name": "跨年发票入账",
|
||||
"enabled": true,
|
||||
"risk_dimension": "cross_year_invoice",
|
||||
"ontology_signal": "cross_year_invoice",
|
||||
"evaluator": "cross_year_invoice",
|
||||
"applies_to": {
|
||||
"min_attachments": 1
|
||||
},
|
||||
"inputs": {
|
||||
"invoice_date": "attachment.invoice_date",
|
||||
"claim_date": ["claim.occurred_at", "item.item_date"]
|
||||
},
|
||||
"params": {},
|
||||
"outcomes": {
|
||||
"pass": { "severity": "none", "action": "continue" },
|
||||
"fail": {
|
||||
"severity": "medium",
|
||||
"action": "warn"
|
||||
}
|
||||
},
|
||||
"metadata": {
|
||||
"owner": "风控与审计部",
|
||||
"stability": "platform_builtin",
|
||||
"source_ref": "常用risk.txt / 二、发票类 / 跨年发票",
|
||||
"updated_at": "2026-05-19"
|
||||
}
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
{
|
||||
"schema_version": "1.0",
|
||||
"rule_code": "risk.invoice.document_expense_mismatch",
|
||||
"name": "开票内容与报销场景不符",
|
||||
"enabled": true,
|
||||
"risk_dimension": "document_expense_mismatch",
|
||||
"ontology_signal": "document_expense_mismatch",
|
||||
"evaluator": "document_expense_mismatch",
|
||||
"applies_to": {
|
||||
"min_attachments": 1
|
||||
},
|
||||
"inputs": {
|
||||
"document_type": "attachment.document_type",
|
||||
"expense_type": ["claim.expense_type", "item.item_type"]
|
||||
},
|
||||
"params": {},
|
||||
"outcomes": {
|
||||
"pass": { "severity": "none", "action": "continue" },
|
||||
"fail": {
|
||||
"severity": "medium",
|
||||
"action": "warn"
|
||||
}
|
||||
},
|
||||
"metadata": {
|
||||
"owner": "风控与审计部",
|
||||
"stability": "platform_builtin",
|
||||
"source_ref": "常用risk.txt / 二、发票类 / 开票内容与业务不符",
|
||||
"updated_at": "2026-05-19"
|
||||
}
|
||||
}
|
||||
@@ -1,29 +0,0 @@
|
||||
{
|
||||
"schema_version": "1.0",
|
||||
"rule_code": "risk.invoice.duplicate_invoice",
|
||||
"name": "发票重复报销",
|
||||
"enabled": true,
|
||||
"risk_dimension": "duplicate_invoice",
|
||||
"ontology_signal": "duplicate_invoice",
|
||||
"evaluator": "duplicate_invoice",
|
||||
"applies_to": {
|
||||
"min_attachments": 1
|
||||
},
|
||||
"inputs": {
|
||||
"invoice_no": "attachment.invoice_no"
|
||||
},
|
||||
"params": {},
|
||||
"outcomes": {
|
||||
"pass": { "severity": "none", "action": "continue" },
|
||||
"fail": {
|
||||
"severity": "high",
|
||||
"action": "block"
|
||||
}
|
||||
},
|
||||
"metadata": {
|
||||
"owner": "风控与审计部",
|
||||
"stability": "platform_builtin",
|
||||
"source_ref": "常用risk.txt / 二、发票类 / 重复报销",
|
||||
"updated_at": "2026-05-19"
|
||||
}
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
{
|
||||
"schema_version": "1.0",
|
||||
"rule_code": "risk.invoice.vague_goods_description",
|
||||
"name": "发票品名过于笼统",
|
||||
"enabled": true,
|
||||
"risk_dimension": "vague_goods_description",
|
||||
"ontology_signal": "vague_goods_description",
|
||||
"evaluator": "vague_goods_description",
|
||||
"applies_to": {
|
||||
"expense_types": ["office", "other"],
|
||||
"min_attachments": 1
|
||||
},
|
||||
"inputs": {
|
||||
"ocr": "attachment.ocr_text"
|
||||
},
|
||||
"params": {},
|
||||
"outcomes": {
|
||||
"pass": { "severity": "none", "action": "continue" },
|
||||
"fail": {
|
||||
"severity": "medium",
|
||||
"action": "warn"
|
||||
}
|
||||
},
|
||||
"metadata": {
|
||||
"owner": "风控与审计部",
|
||||
"stability": "platform_builtin",
|
||||
"source_ref": "常用risk.txt / 二、发票类 / 品名笼统",
|
||||
"updated_at": "2026-05-19"
|
||||
}
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
{
|
||||
"schema_version": "1.0",
|
||||
"rule_code": "risk.invoice.void_or_red_invoice",
|
||||
"name": "作废或红冲发票",
|
||||
"enabled": true,
|
||||
"risk_dimension": "void_or_red_invoice",
|
||||
"ontology_signal": "void_or_red_invoice",
|
||||
"evaluator": "invoice_void_or_red",
|
||||
"applies_to": {
|
||||
"min_attachments": 1
|
||||
},
|
||||
"inputs": {
|
||||
"status": "attachment.invoice_status",
|
||||
"ocr": "attachment.ocr_text"
|
||||
},
|
||||
"params": {},
|
||||
"outcomes": {
|
||||
"pass": { "severity": "none", "action": "continue" },
|
||||
"fail": {
|
||||
"severity": "high",
|
||||
"action": "block"
|
||||
}
|
||||
},
|
||||
"metadata": {
|
||||
"owner": "风控与审计部",
|
||||
"stability": "platform_builtin",
|
||||
"source_ref": "常用risk.txt / 二、发票类 / 作废红冲发票",
|
||||
"updated_at": "2026-05-19"
|
||||
}
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
{
|
||||
"schema_version": "1.0",
|
||||
"rule_code": "risk.travel.base_location_overlap",
|
||||
"name": "常驻地重合出差风险",
|
||||
"enabled": true,
|
||||
"risk_dimension": "base_location_overlap",
|
||||
"ontology_signal": "base_location_overlap",
|
||||
"evaluator": "base_location_overlap",
|
||||
"applies_to": {
|
||||
"domains": ["travel"]
|
||||
},
|
||||
"inputs": {
|
||||
"employee_base": "employee.location",
|
||||
"declared": "claim.location"
|
||||
},
|
||||
"params": {},
|
||||
"outcomes": {
|
||||
"pass": { "severity": "none", "action": "continue" },
|
||||
"fail": {
|
||||
"severity": "high",
|
||||
"action": "manual_review"
|
||||
}
|
||||
},
|
||||
"metadata": {
|
||||
"owner": "风控与审计部",
|
||||
"stability": "platform_builtin",
|
||||
"source_ref": "常用risk.txt / 一、出差类 / 两头在外",
|
||||
"updated_at": "2026-05-19"
|
||||
}
|
||||
}
|
||||
@@ -1,29 +0,0 @@
|
||||
{
|
||||
"schema_version": "1.0",
|
||||
"rule_code": "risk.travel.destination_receipt_location",
|
||||
"name": "申报地点与票据地点一致",
|
||||
"risk_dimension": "location_consistency",
|
||||
"ontology_signal": "location_mismatch",
|
||||
"evaluator": "location_consistency",
|
||||
"inputs": {
|
||||
"declared": "claim.location",
|
||||
"evidence": ["attachment.cities", "item.item_location"]
|
||||
},
|
||||
"params": {
|
||||
"match_mode": "city_fuzzy",
|
||||
"missing_evidence": "warn"
|
||||
},
|
||||
"outcomes": {
|
||||
"pass": { "severity": "none", "action": "continue" },
|
||||
"fail": {
|
||||
"severity": "high",
|
||||
"action": "manual_review",
|
||||
"message_template": "申报地点 {declared} 与票据识别地点 {evidence} 不一致"
|
||||
}
|
||||
},
|
||||
"metadata": {
|
||||
"owner": "风控与审计部",
|
||||
"stability": "platform_builtin",
|
||||
"updated_at": "2026-05-18"
|
||||
}
|
||||
}
|
||||
@@ -1,32 +0,0 @@
|
||||
{
|
||||
"schema_version": "1.0",
|
||||
"rule_code": "risk.travel.hotel_without_itinerary",
|
||||
"name": "住宿城市与行程不一致",
|
||||
"enabled": true,
|
||||
"risk_dimension": "hotel_itinerary",
|
||||
"ontology_signal": "hotel_itinerary_mismatch",
|
||||
"evaluator": "hotel_without_itinerary",
|
||||
"applies_to": {
|
||||
"domains": ["travel"],
|
||||
"expense_types": ["hotel", "travel"]
|
||||
},
|
||||
"inputs": {
|
||||
"declared": "claim.location",
|
||||
"hotel": "attachment.hotel_city",
|
||||
"itinerary": "attachment.route_cities"
|
||||
},
|
||||
"params": {},
|
||||
"outcomes": {
|
||||
"pass": { "severity": "none", "action": "continue" },
|
||||
"fail": {
|
||||
"severity": "high",
|
||||
"action": "manual_review"
|
||||
}
|
||||
},
|
||||
"metadata": {
|
||||
"owner": "风控与审计部",
|
||||
"stability": "platform_builtin",
|
||||
"source_ref": "常用risk.txt / 三、住宿费 / 夜间异地住宿、酒店连续多天",
|
||||
"updated_at": "2026-05-19"
|
||||
}
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
{
|
||||
"schema_version": "1.0",
|
||||
"rule_code": "risk.travel.intracity_travel_claim",
|
||||
"name": "同城虚报差旅补贴",
|
||||
"enabled": true,
|
||||
"risk_dimension": "intracity_travel",
|
||||
"ontology_signal": "intracity_travel",
|
||||
"evaluator": "intracity_travel_claim",
|
||||
"applies_to": {
|
||||
"domains": ["travel"]
|
||||
},
|
||||
"inputs": {
|
||||
"declared": "claim.location",
|
||||
"evidence": ["attachment.route", "attachment.cities"]
|
||||
},
|
||||
"params": {},
|
||||
"outcomes": {
|
||||
"pass": { "severity": "none", "action": "continue" },
|
||||
"fail": {
|
||||
"severity": "high",
|
||||
"action": "manual_review"
|
||||
}
|
||||
},
|
||||
"metadata": {
|
||||
"owner": "风控与审计部",
|
||||
"stability": "platform_builtin",
|
||||
"source_ref": "常用risk.txt / 一、出差类 / 同城虚报差旅",
|
||||
"updated_at": "2026-05-19"
|
||||
}
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
{
|
||||
"schema_version": "1.0",
|
||||
"rule_code": "risk.travel.multi_city_reason_required",
|
||||
"name": "多城市行程需说明",
|
||||
"enabled": true,
|
||||
"risk_dimension": "multi_city_itinerary",
|
||||
"ontology_signal": "multi_city_itinerary",
|
||||
"evaluator": "multi_city_reason_required",
|
||||
"applies_to": {
|
||||
"domains": ["travel"]
|
||||
},
|
||||
"inputs": {
|
||||
"reason": "claim.reason_corpus",
|
||||
"cities": ["attachment.cities", "item.item_location"]
|
||||
},
|
||||
"params": {},
|
||||
"outcomes": {
|
||||
"pass": { "severity": "none", "action": "continue" },
|
||||
"fail": {
|
||||
"severity": "medium",
|
||||
"action": "warn"
|
||||
}
|
||||
},
|
||||
"metadata": {
|
||||
"owner": "风控与审计部",
|
||||
"stability": "platform_builtin",
|
||||
"source_ref": "常用risk.txt / 一、出差类 / 绕道出行、行程不符",
|
||||
"updated_at": "2026-05-19"
|
||||
}
|
||||
}
|
||||
68
server/scripts/start_hermes_daemon.py
Normal file
68
server/scripts/start_hermes_daemon.py
Normal file
@@ -0,0 +1,68 @@
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import logging
|
||||
|
||||
# Ensure src is in the python path
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../src")))
|
||||
|
||||
from app.core.logging import setup_logging
|
||||
from app.db.session import get_session_factory
|
||||
from app.models.hermes_config import HermesTaskConfig
|
||||
from app.services.hermes_scheduler import hermes_scheduler
|
||||
|
||||
logger = logging.getLogger("hermes_daemon")
|
||||
|
||||
def init_default_config():
|
||||
"""Ensure there is at least one active global_risk_scan task in the database."""
|
||||
session_factory = get_session_factory()
|
||||
db = session_factory()
|
||||
try:
|
||||
# 初始化 global_risk_scan
|
||||
existing_risk = db.query(HermesTaskConfig).filter_by(task_type="global_risk_scan").first()
|
||||
if not existing_risk:
|
||||
logger.info("No global_risk_scan config found. Initializing default config.")
|
||||
db.add(HermesTaskConfig(
|
||||
task_type="global_risk_scan",
|
||||
cron_expression="0 2 * * *",
|
||||
is_enabled=True
|
||||
))
|
||||
|
||||
# 初始化 weekly_expense_report
|
||||
existing_report = db.query(HermesTaskConfig).filter_by(task_type="weekly_expense_report").first()
|
||||
if not existing_report:
|
||||
logger.info("No weekly_expense_report config found. Initializing default config.")
|
||||
db.add(HermesTaskConfig(
|
||||
task_type="weekly_expense_report",
|
||||
cron_expression="0 9 * * 1", # 每周一早9点(在简化版中暂时代表周报频率)
|
||||
is_enabled=True
|
||||
))
|
||||
|
||||
db.commit()
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize default config: {e}")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
def main():
|
||||
setup_logging()
|
||||
logger.info("Initializing Hermes Background Daemon...")
|
||||
|
||||
# 注入默认配置
|
||||
init_default_config()
|
||||
|
||||
# 启动调度器
|
||||
hermes_scheduler.start()
|
||||
|
||||
logger.info("Hermes Daemon is running. Press Ctrl+C to stop.")
|
||||
try:
|
||||
while True:
|
||||
time.sleep(1) # 主线程保持存活
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Keyboard interrupt received. Shutting down...")
|
||||
hermes_scheduler.shutdown()
|
||||
logger.info("Shutdown complete.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -23,7 +23,16 @@ from app.schemas.agent_asset import (
|
||||
AgentAssetRead,
|
||||
AgentAssetReviewCreate,
|
||||
AgentAssetReviewRead,
|
||||
AgentAssetRiskRuleEnabledUpdate,
|
||||
AgentAssetRiskRuleGenerateRequest,
|
||||
AgentAssetRiskRuleLatestTestSummary,
|
||||
AgentAssetRiskRuleReportRequest,
|
||||
AgentAssetRiskRuleReturnRequest,
|
||||
AgentAssetRiskRuleSampleTestRequest,
|
||||
AgentAssetRiskRuleScenarioTestRequest,
|
||||
AgentAssetRiskRuleSimulationRead,
|
||||
AgentAssetRiskRuleSimulationRequest,
|
||||
AgentAssetRiskRuleTestRunRead,
|
||||
AgentAssetRuleJsonRead,
|
||||
AgentAssetRuleJsonWrite,
|
||||
AgentAssetSpreadsheetChangeRecordRead,
|
||||
@@ -131,6 +140,116 @@ def get_agent_asset_rule_json(
|
||||
_handle_asset_error(exc)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/{asset_id}/risk-rule-tests/latest",
|
||||
response_model=AgentAssetRiskRuleLatestTestSummary,
|
||||
summary="读取风险规则最近测试摘要",
|
||||
description="返回当前风险规则工作版本最近一次样例测试、场景试运行和测试报告。",
|
||||
)
|
||||
def get_agent_asset_risk_rule_latest_test(
|
||||
asset_id: str,
|
||||
_: CurrentUser,
|
||||
db: DbSession,
|
||||
) -> AgentAssetRiskRuleLatestTestSummary:
|
||||
try:
|
||||
return AgentAssetService(db).get_latest_risk_rule_test_summary(asset_id)
|
||||
except Exception as exc:
|
||||
_handle_asset_error(exc)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/{asset_id}/risk-rule-tests/simulate",
|
||||
response_model=AgentAssetRiskRuleSimulationRead,
|
||||
summary="执行风险规则对话仿真",
|
||||
description="基于临时对话输入和附件元信息执行风险识别,不创建业务单据,不写入测试记录。",
|
||||
)
|
||||
def simulate_agent_asset_risk_rule_test(
|
||||
asset_id: str,
|
||||
payload: AgentAssetRiskRuleSimulationRequest,
|
||||
_: RuleEditorUser,
|
||||
db: DbSession,
|
||||
) -> AgentAssetRiskRuleSimulationRead:
|
||||
try:
|
||||
return AgentAssetService(db).simulate_risk_rule_message(asset_id, payload)
|
||||
except Exception as exc:
|
||||
_handle_asset_error(exc)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/{asset_id}/risk-rule-tests/sample",
|
||||
response_model=AgentAssetRiskRuleTestRunRead,
|
||||
summary="执行风险规则快速样例测试",
|
||||
description="使用人工样例或系统默认样例执行当前 JSON 风险规则,不依赖大模型判断结果。",
|
||||
)
|
||||
def run_agent_asset_risk_rule_sample_test(
|
||||
asset_id: str,
|
||||
payload: AgentAssetRiskRuleSampleTestRequest,
|
||||
current_user: RuleEditorUser,
|
||||
db: DbSession,
|
||||
x_actor: ActorHeader = None,
|
||||
x_request_id: RequestIdHeader = None,
|
||||
) -> AgentAssetRiskRuleTestRunRead:
|
||||
try:
|
||||
return AgentAssetService(db).run_risk_rule_sample_test(
|
||||
asset_id,
|
||||
payload,
|
||||
actor=(x_actor or current_user.name or "system").strip() or "system",
|
||||
request_id=x_request_id,
|
||||
)
|
||||
except Exception as exc:
|
||||
_handle_asset_error(exc)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/{asset_id}/risk-rule-tests/scenario",
|
||||
response_model=AgentAssetRiskRuleTestRunRead,
|
||||
summary="执行风险规则真实场景试运行",
|
||||
description="按测试意图读取真实业务样本并沙盒执行风险规则,不写回业务单据。",
|
||||
)
|
||||
def run_agent_asset_risk_rule_scenario_test(
|
||||
asset_id: str,
|
||||
payload: AgentAssetRiskRuleScenarioTestRequest,
|
||||
current_user: RuleEditorUser,
|
||||
db: DbSession,
|
||||
x_actor: ActorHeader = None,
|
||||
x_request_id: RequestIdHeader = None,
|
||||
) -> AgentAssetRiskRuleTestRunRead:
|
||||
try:
|
||||
return AgentAssetService(db).run_risk_rule_scenario_test(
|
||||
asset_id,
|
||||
payload,
|
||||
actor=(x_actor or current_user.name or "system").strip() or "system",
|
||||
request_id=x_request_id,
|
||||
)
|
||||
except Exception as exc:
|
||||
_handle_asset_error(exc)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/{asset_id}/risk-rule-tests/report",
|
||||
response_model=AgentAssetRiskRuleTestRunRead,
|
||||
summary="确认风险规则测试报告",
|
||||
description="在样例测试和真实场景试运行通过后,保存当前版本测试通过记录。",
|
||||
)
|
||||
def confirm_agent_asset_risk_rule_test_report(
|
||||
asset_id: str,
|
||||
payload: AgentAssetRiskRuleReportRequest,
|
||||
current_user: RuleEditorUser,
|
||||
db: DbSession,
|
||||
x_actor: ActorHeader = None,
|
||||
x_request_id: RequestIdHeader = None,
|
||||
) -> AgentAssetRiskRuleTestRunRead:
|
||||
try:
|
||||
return AgentAssetService(db).confirm_risk_rule_test_report(
|
||||
asset_id,
|
||||
payload,
|
||||
actor=(x_actor or current_user.name or "system").strip() or "system",
|
||||
request_id=x_request_id,
|
||||
)
|
||||
except Exception as exc:
|
||||
_handle_asset_error(exc)
|
||||
|
||||
|
||||
@router.put(
|
||||
"/{asset_id}/rule-json",
|
||||
response_model=AgentAssetRuleJsonRead,
|
||||
@@ -586,6 +705,112 @@ def activate_agent_asset(
|
||||
_handle_asset_error(exc)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/{asset_id}/risk-rule-enabled",
|
||||
response_model=AgentAssetRead,
|
||||
summary="设置风险规则启用状态",
|
||||
description=(
|
||||
"高级管理人员可独立启用或停用 JSON 风险规则;停用后即使已上线也不会进入真实业务扫描。"
|
||||
),
|
||||
)
|
||||
def set_agent_asset_risk_rule_enabled(
|
||||
asset_id: str,
|
||||
payload: AgentAssetRiskRuleEnabledUpdate,
|
||||
current_user: RuleReviewerUser,
|
||||
db: DbSession,
|
||||
x_actor: ActorHeader = None,
|
||||
x_request_id: RequestIdHeader = None,
|
||||
) -> AgentAssetRead:
|
||||
try:
|
||||
asset = AgentAssetService(db).set_risk_rule_enabled(
|
||||
asset_id,
|
||||
enabled=payload.enabled,
|
||||
actor=(x_actor or current_user.name or "system").strip() or "system",
|
||||
request_id=x_request_id,
|
||||
)
|
||||
detail = AgentAssetService(db).get_asset(asset.id)
|
||||
if detail is None:
|
||||
raise LookupError("Asset not found")
|
||||
return detail
|
||||
except Exception as exc:
|
||||
_handle_asset_error(exc)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/{asset_id}/return",
|
||||
response_model=AgentAssetRiskRuleLatestTestSummary,
|
||||
summary="回退待审核风险规则",
|
||||
description="高级管理人员将待审核风险规则回退到草稿,并记录回退原因。",
|
||||
)
|
||||
def return_agent_asset_risk_rule(
|
||||
asset_id: str,
|
||||
payload: AgentAssetRiskRuleReturnRequest,
|
||||
current_user: RuleReviewerUser,
|
||||
db: DbSession,
|
||||
x_actor: ActorHeader = None,
|
||||
x_request_id: RequestIdHeader = None,
|
||||
) -> AgentAssetRiskRuleLatestTestSummary:
|
||||
try:
|
||||
return AgentAssetService(db).return_risk_rule(
|
||||
asset_id,
|
||||
note=payload.note,
|
||||
actor=(x_actor or current_user.name or "system").strip() or "system",
|
||||
request_id=x_request_id,
|
||||
)
|
||||
except Exception as exc:
|
||||
_handle_asset_error(exc)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/{asset_id}/publish",
|
||||
response_model=AgentAssetRead,
|
||||
summary="审核并发布风险规则",
|
||||
description="高级管理人员确认测试通过后,将待审核风险规则一次性审核通过并发布上线。",
|
||||
)
|
||||
def publish_agent_asset_risk_rule(
|
||||
asset_id: str,
|
||||
current_user: RuleReviewerUser,
|
||||
db: DbSession,
|
||||
x_actor: ActorHeader = None,
|
||||
x_request_id: RequestIdHeader = None,
|
||||
) -> AgentAssetRead:
|
||||
try:
|
||||
asset = AgentAssetService(db).publish_risk_rule(
|
||||
asset_id,
|
||||
actor=(x_actor or current_user.name or "system").strip() or "system",
|
||||
request_id=x_request_id,
|
||||
)
|
||||
detail = AgentAssetService(db).get_asset(asset.id)
|
||||
if detail is None:
|
||||
raise LookupError("Asset not found")
|
||||
return detail
|
||||
except Exception as exc:
|
||||
_handle_asset_error(exc)
|
||||
|
||||
|
||||
@router.delete(
|
||||
"/{asset_id}",
|
||||
status_code=status.HTTP_204_NO_CONTENT,
|
||||
summary="删除未发布风险规则",
|
||||
description="仅允许删除从未发布过的 JSON 风险规则,并同步删除规则 JSON 文件。",
|
||||
)
|
||||
def delete_agent_asset(
|
||||
asset_id: str,
|
||||
current_user: RuleEditorUser,
|
||||
db: DbSession,
|
||||
x_actor: ActorHeader = None,
|
||||
x_request_id: RequestIdHeader = None,
|
||||
) -> None:
|
||||
try:
|
||||
AgentAssetService(db).delete_unpublished_asset(
|
||||
asset_id,
|
||||
actor=(x_actor or current_user.name or "system").strip() or "system",
|
||||
request_id=x_request_id,
|
||||
)
|
||||
except Exception as exc:
|
||||
_handle_asset_error(exc)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/{asset_id}/versions/{version}/restore",
|
||||
response_model=AgentAssetRead,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from app.db.base_class import Base
|
||||
from app.models.agent_conversation import AgentConversation, AgentConversationMessage
|
||||
from app.models.agent_asset import AgentAsset, AgentAssetReview, AgentAssetVersion
|
||||
from app.models.agent_asset import AgentAsset, AgentAssetReview, AgentAssetTestRun, AgentAssetVersion
|
||||
from app.models.agent_run import AgentRun, AgentToolCall, SemanticParseLog
|
||||
from app.models.approval import ApprovalRecord
|
||||
from app.models.audit_log import AuditLog
|
||||
@@ -27,6 +27,7 @@ __all__ = [
|
||||
"AgentConversationMessage",
|
||||
"AgentAsset",
|
||||
"AgentAssetReview",
|
||||
"AgentAssetTestRun",
|
||||
"AgentAssetVersion",
|
||||
"AgentRun",
|
||||
"AgentToolCall",
|
||||
|
||||
@@ -11,6 +11,8 @@ from app.models.financial_record import (
|
||||
ExpenseClaim,
|
||||
ExpenseClaimItem,
|
||||
)
|
||||
from app.models.hermes_config import HermesTaskConfig, HermesTaskExecutionLog
|
||||
from app.models.hermes_report import HermesRiskReport
|
||||
from app.models.organization import OrganizationUnit
|
||||
from app.models.reimbursement import ReimbursementRequest
|
||||
from app.models.role import Role
|
||||
@@ -34,6 +36,9 @@ __all__ = [
|
||||
"EmployeeChangeLog",
|
||||
"ExpenseClaim",
|
||||
"ExpenseClaimItem",
|
||||
"HermesTaskConfig",
|
||||
"HermesTaskExecutionLog",
|
||||
"HermesRiskReport",
|
||||
"OrganizationUnit",
|
||||
"ReimbursementRequest",
|
||||
"Role",
|
||||
|
||||
@@ -4,7 +4,7 @@ import uuid
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import DateTime, ForeignKey, String, Text, UniqueConstraint, func
|
||||
from sqlalchemy import Boolean, DateTime, ForeignKey, String, Text, UniqueConstraint, func
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
from sqlalchemy.types import JSON
|
||||
|
||||
@@ -46,6 +46,12 @@ class AgentAsset(Base):
|
||||
order_by="desc(AgentAssetReview.created_at)",
|
||||
)
|
||||
scheduled_runs = relationship("AgentRun", back_populates="task_asset")
|
||||
test_runs = relationship(
|
||||
"AgentAssetTestRun",
|
||||
back_populates="asset",
|
||||
cascade="all, delete-orphan",
|
||||
order_by="desc(AgentAssetTestRun.created_at)",
|
||||
)
|
||||
|
||||
|
||||
class AgentAssetVersion(Base):
|
||||
@@ -79,3 +85,21 @@ class AgentAssetReview(Base):
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
|
||||
|
||||
asset = relationship("AgentAsset", back_populates="reviews")
|
||||
|
||||
|
||||
class AgentAssetTestRun(Base):
|
||||
__tablename__ = "agent_asset_test_runs"
|
||||
|
||||
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
asset_id: Mapped[str] = mapped_column(ForeignKey("agent_assets.id"), index=True)
|
||||
version: Mapped[str] = mapped_column(String(30), index=True)
|
||||
test_type: Mapped[str] = mapped_column(String(30), index=True)
|
||||
status: Mapped[str] = mapped_column(String(20), index=True)
|
||||
passed: Mapped[bool] = mapped_column(Boolean, default=False, index=True)
|
||||
summary: Mapped[str] = mapped_column(Text(), default="")
|
||||
input_json: Mapped[dict[str, Any]] = mapped_column(JSON, default=dict)
|
||||
result_json: Mapped[dict[str, Any]] = mapped_column(JSON, default=dict)
|
||||
created_by: Mapped[str] = mapped_column(String(100))
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
|
||||
|
||||
asset = relationship("AgentAsset", back_populates="test_runs")
|
||||
|
||||
@@ -3,7 +3,7 @@ from __future__ import annotations
|
||||
import uuid
|
||||
from datetime import date, datetime
|
||||
|
||||
from sqlalchemy import Boolean, Column, Date, DateTime, ForeignKey, String, Table, func
|
||||
from sqlalchemy import Boolean, Column, Date, DateTime, ForeignKey, Integer, String, Table, func
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from app.db.base_class import Base
|
||||
@@ -35,6 +35,7 @@ class Employee(Base):
|
||||
password_hash: Mapped[str | None] = mapped_column(String(255), nullable=True)
|
||||
employment_status: Mapped[str] = mapped_column(String(30), default="在职", index=True)
|
||||
sync_state: Mapped[str] = mapped_column(String(30), default="已同步")
|
||||
compliance_score: Mapped[int] = mapped_column(Integer, default=100)
|
||||
spotlight: Mapped[bool] = mapped_column(Boolean, default=False)
|
||||
last_sync_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
|
||||
organization_unit_id: Mapped[str | None] = mapped_column(
|
||||
|
||||
@@ -5,7 +5,7 @@ from datetime import date, datetime
|
||||
from decimal import Decimal
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import Date, DateTime, ForeignKey, Integer, Numeric, String, Text, func
|
||||
from sqlalchemy import Boolean, Date, DateTime, ForeignKey, Integer, Numeric, String, Text, func
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
from sqlalchemy.types import JSON
|
||||
|
||||
@@ -39,6 +39,8 @@ class ExpenseClaim(Base):
|
||||
status: Mapped[str] = mapped_column(String(30), index=True)
|
||||
approval_stage: Mapped[str | None] = mapped_column(String(50), nullable=True)
|
||||
risk_flags_json: Mapped[list[Any]] = mapped_column(JSON, default=list)
|
||||
hermes_scanned_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
|
||||
hermes_risk_flag: Mapped[bool] = mapped_column(Boolean, default=False, index=True)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
|
||||
|
||||
48
server/src/app/models/hermes_config.py
Normal file
48
server/src/app/models/hermes_config.py
Normal file
@@ -0,0 +1,48 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import Boolean, DateTime, ForeignKey, String, Text, func
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
from sqlalchemy.types import JSON
|
||||
|
||||
from app.db.base_class import Base
|
||||
|
||||
|
||||
class HermesTaskConfig(Base):
|
||||
__tablename__ = "hermes_task_configs"
|
||||
|
||||
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
task_type: Mapped[str] = mapped_column(String(50), index=True)
|
||||
cron_expression: Mapped[str] = mapped_column(String(100))
|
||||
is_enabled: Mapped[bool] = mapped_column(Boolean, default=True)
|
||||
payload_template: Mapped[dict[str, Any]] = mapped_column(JSON, default=dict)
|
||||
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
|
||||
)
|
||||
|
||||
execution_logs = relationship(
|
||||
"HermesTaskExecutionLog",
|
||||
back_populates="config",
|
||||
cascade="all, delete-orphan",
|
||||
order_by="desc(HermesTaskExecutionLog.started_at)",
|
||||
)
|
||||
|
||||
|
||||
class HermesTaskExecutionLog(Base):
|
||||
__tablename__ = "hermes_task_execution_logs"
|
||||
|
||||
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
config_id: Mapped[str] = mapped_column(String(36), ForeignKey("hermes_task_configs.id"), index=True)
|
||||
status: Mapped[str] = mapped_column(String(30), index=True)
|
||||
result_summary: Mapped[str | None] = mapped_column(String(255), nullable=True)
|
||||
error_trace: Mapped[str | None] = mapped_column(Text(), nullable=True)
|
||||
|
||||
started_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
|
||||
completed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
|
||||
|
||||
config = relationship("HermesTaskConfig", back_populates="execution_logs")
|
||||
34
server/src/app/models/hermes_report.py
Normal file
34
server/src/app/models/hermes_report.py
Normal file
@@ -0,0 +1,34 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import DateTime, ForeignKey, String, Text, func
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
from sqlalchemy.types import JSON
|
||||
|
||||
from app.db.base_class import Base
|
||||
|
||||
|
||||
class HermesRiskReport(Base):
|
||||
__tablename__ = "hermes_risk_reports"
|
||||
|
||||
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
claim_id: Mapped[str] = mapped_column(ForeignKey("expense_claims.id"), index=True)
|
||||
execution_log_id: Mapped[str] = mapped_column(ForeignKey("hermes_task_execution_logs.id"), index=True)
|
||||
|
||||
risk_level: Mapped[str] = mapped_column(String(20), index=True)
|
||||
risk_type: Mapped[str] = mapped_column(String(50), index=True)
|
||||
risk_description: Mapped[str] = mapped_column(Text())
|
||||
|
||||
related_claim_ids: Mapped[list[str]] = mapped_column(JSON, default=list)
|
||||
status: Mapped[str] = mapped_column(String(30), default="pending_review", index=True)
|
||||
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
|
||||
)
|
||||
|
||||
claim = relationship("ExpenseClaim", foreign_keys=[claim_id])
|
||||
execution_log = relationship("HermesTaskExecutionLog", foreign_keys=[execution_log_id])
|
||||
@@ -3,7 +3,12 @@ from __future__ import annotations
|
||||
from sqlalchemy import or_, select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.models.agent_asset import AgentAsset, AgentAssetReview, AgentAssetVersion
|
||||
from app.models.agent_asset import (
|
||||
AgentAsset,
|
||||
AgentAssetReview,
|
||||
AgentAssetTestRun,
|
||||
AgentAssetVersion,
|
||||
)
|
||||
|
||||
|
||||
class AgentAssetRepository:
|
||||
@@ -84,6 +89,17 @@ class AgentAssetRepository:
|
||||
stmt = stmt.limit(limit)
|
||||
return list(self.db.scalars(stmt).all())
|
||||
|
||||
def list_reviews_for_assets(self, asset_ids: list[str]) -> list[AgentAssetReview]:
|
||||
if not asset_ids:
|
||||
return []
|
||||
|
||||
stmt = (
|
||||
select(AgentAssetReview)
|
||||
.where(AgentAssetReview.asset_id.in_(asset_ids))
|
||||
.order_by(AgentAssetReview.asset_id, AgentAssetReview.created_at.desc())
|
||||
)
|
||||
return list(self.db.scalars(stmt).all())
|
||||
|
||||
def get_review(
|
||||
self, asset_id: str, version: str, review_status: str | None = None
|
||||
) -> AgentAssetReview | None:
|
||||
@@ -119,3 +135,54 @@ class AgentAssetRepository:
|
||||
self.db.commit()
|
||||
self.db.refresh(review)
|
||||
return review
|
||||
|
||||
def list_test_runs(
|
||||
self,
|
||||
asset_id: str,
|
||||
*,
|
||||
version: str | None = None,
|
||||
test_type: str | None = None,
|
||||
status: str | None = None,
|
||||
limit: int | None = None,
|
||||
) -> list[AgentAssetTestRun]:
|
||||
stmt = (
|
||||
select(AgentAssetTestRun)
|
||||
.where(AgentAssetTestRun.asset_id == asset_id)
|
||||
.order_by(AgentAssetTestRun.created_at.desc())
|
||||
)
|
||||
if version:
|
||||
stmt = stmt.where(AgentAssetTestRun.version == version)
|
||||
if test_type:
|
||||
stmt = stmt.where(AgentAssetTestRun.test_type == test_type)
|
||||
if status:
|
||||
stmt = stmt.where(AgentAssetTestRun.status == status)
|
||||
if limit is not None:
|
||||
stmt = stmt.limit(limit)
|
||||
return list(self.db.scalars(stmt).all())
|
||||
|
||||
def get_latest_test_run(
|
||||
self,
|
||||
asset_id: str,
|
||||
*,
|
||||
version: str | None = None,
|
||||
test_type: str | None = None,
|
||||
status: str | None = None,
|
||||
) -> AgentAssetTestRun | None:
|
||||
items = self.list_test_runs(
|
||||
asset_id,
|
||||
version=version,
|
||||
test_type=test_type,
|
||||
status=status,
|
||||
limit=1,
|
||||
)
|
||||
return items[0] if items else None
|
||||
|
||||
def create_test_run(self, test_run: AgentAssetTestRun) -> AgentAssetTestRun:
|
||||
self.db.add(test_run)
|
||||
self.db.commit()
|
||||
self.db.refresh(test_run)
|
||||
return test_run
|
||||
|
||||
def delete_asset(self, asset: AgentAsset) -> None:
|
||||
self.db.delete(asset)
|
||||
self.db.commit()
|
||||
|
||||
@@ -112,8 +112,111 @@ class AgentAssetRuleJsonRead(BaseModel):
|
||||
|
||||
class AgentAssetRiskRuleGenerateRequest(BaseModel):
|
||||
business_domain: AgentAssetDomain = AgentAssetDomain.EXPENSE
|
||||
expense_category: str | None = Field(default=None, max_length=40)
|
||||
risk_level: str = Field(default="medium", pattern="^(low|medium|high)$")
|
||||
natural_language: str = Field(min_length=8, max_length=2000)
|
||||
requires_attachment: bool = False
|
||||
|
||||
|
||||
class AgentAssetRiskRuleSampleCase(BaseModel):
|
||||
case_id: str | None = Field(default=None, max_length=60)
|
||||
name: str = Field(default="测试样例", min_length=1, max_length=80)
|
||||
values: dict[str, Any] = Field(default_factory=dict)
|
||||
expected_hit: bool = True
|
||||
expected_severity: str | None = Field(default=None, max_length=20)
|
||||
note: str | None = None
|
||||
|
||||
|
||||
class AgentAssetRiskRuleSampleTestRequest(BaseModel):
|
||||
version: str | None = Field(default=None, max_length=30)
|
||||
cases: list[AgentAssetRiskRuleSampleCase] = Field(default_factory=list)
|
||||
|
||||
|
||||
class AgentAssetRiskRuleScenarioTestRequest(BaseModel):
|
||||
version: str | None = Field(default=None, max_length=30)
|
||||
intent: str = Field(default="", max_length=1000)
|
||||
filters: dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class AgentAssetRiskRuleReportRequest(BaseModel):
|
||||
version: str | None = Field(default=None, max_length=30)
|
||||
confirm_passed: bool = True
|
||||
note: str | None = Field(default=None, max_length=1000)
|
||||
|
||||
|
||||
class AgentAssetRiskRuleSimulationAttachment(BaseModel):
|
||||
name: str = Field(default="", max_length=240)
|
||||
content_type: str | None = Field(default=None, max_length=120)
|
||||
size: int | None = Field(default=None, ge=0)
|
||||
note: str | None = Field(default=None, max_length=500)
|
||||
ocr_text: str | None = Field(default=None, max_length=20000)
|
||||
summary: str | None = Field(default=None, max_length=2000)
|
||||
document_type: str | None = Field(default=None, max_length=80)
|
||||
document_type_label: str | None = Field(default=None, max_length=120)
|
||||
scene_code: str | None = Field(default=None, max_length=80)
|
||||
scene_label: str | None = Field(default=None, max_length=120)
|
||||
avg_score: float | None = Field(default=None, ge=0.0, le=1.0)
|
||||
recognition_status: str | None = Field(default=None, max_length=40)
|
||||
document_fields: list[dict[str, Any]] = Field(default_factory=list)
|
||||
|
||||
|
||||
class AgentAssetRiskRuleSimulationRequest(BaseModel):
|
||||
version: str | None = Field(default=None, max_length=30)
|
||||
message: str = Field(default="", max_length=4000)
|
||||
field_values: dict[str, Any] = Field(default_factory=dict)
|
||||
attachments: list[AgentAssetRiskRuleSimulationAttachment] = Field(default_factory=list)
|
||||
|
||||
|
||||
class AgentAssetRiskRuleSimulationRead(BaseModel):
|
||||
version: str
|
||||
ready: bool = True
|
||||
stage: str = "executed"
|
||||
hit: bool
|
||||
severity: str = "none"
|
||||
severity_label: str = "未命中"
|
||||
summary: str
|
||||
blocking_reason: str = ""
|
||||
message: str = ""
|
||||
field_values: dict[str, Any] = Field(default_factory=dict)
|
||||
evidence: dict[str, Any] = Field(default_factory=dict)
|
||||
attachments: list[dict[str, Any]] = Field(default_factory=list)
|
||||
recognized_fields: list[dict[str, Any]] = Field(default_factory=list)
|
||||
missing_fields: list[dict[str, Any]] = Field(default_factory=list)
|
||||
recognition_summary: list[dict[str, Any]] = Field(default_factory=list)
|
||||
execution_mode: str = "risk_rule_simulation"
|
||||
created_at: datetime
|
||||
|
||||
|
||||
class AgentAssetRiskRuleReturnRequest(BaseModel):
|
||||
note: str = Field(min_length=1, max_length=1000)
|
||||
|
||||
|
||||
class AgentAssetRiskRuleEnabledUpdate(BaseModel):
|
||||
enabled: bool
|
||||
|
||||
|
||||
class AgentAssetRiskRuleTestRunRead(BaseModel):
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
id: str
|
||||
asset_id: str
|
||||
version: str
|
||||
test_type: str
|
||||
status: str
|
||||
passed: bool
|
||||
summary: str
|
||||
input_json: dict[str, Any] = Field(default_factory=dict)
|
||||
result_json: dict[str, Any] = Field(default_factory=dict)
|
||||
created_by: str
|
||||
created_at: datetime
|
||||
|
||||
|
||||
class AgentAssetRiskRuleLatestTestSummary(BaseModel):
|
||||
version: str = ""
|
||||
sample: AgentAssetRiskRuleTestRunRead | None = None
|
||||
scenario: AgentAssetRiskRuleTestRunRead | None = None
|
||||
report: AgentAssetRiskRuleTestRunRead | None = None
|
||||
test_passed: bool = False
|
||||
|
||||
|
||||
class AgentAssetVersionTimelineItemRead(BaseModel):
|
||||
@@ -187,6 +290,8 @@ class AgentAssetListItem(BaseModel):
|
||||
config_json: dict[str, Any]
|
||||
change_count: int = 0
|
||||
modified_by: str | None = None
|
||||
published_by: str | None = None
|
||||
published_at: datetime | None = None
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
@@ -197,3 +302,4 @@ class AgentAssetRead(AgentAssetListItem):
|
||||
current_version_change_note: str | None = None
|
||||
recent_versions: list[AgentAssetVersionRead] = Field(default_factory=list)
|
||||
latest_review: AgentAssetReviewRead | None = None
|
||||
latest_test_summary: AgentAssetRiskRuleLatestTestSummary | None = None
|
||||
|
||||
@@ -164,6 +164,7 @@ class SettingsRead(BaseModel):
|
||||
companyForm: SettingsCompanyForm
|
||||
adminForm: SettingsAdminForm
|
||||
sessionForm: SettingsSessionForm
|
||||
hermesForm: dict
|
||||
llmForm: SettingsLlmForm
|
||||
renderForm: SettingsRenderForm
|
||||
logForm: SettingsLogForm
|
||||
@@ -174,6 +175,7 @@ class SettingsWrite(BaseModel):
|
||||
companyForm: SettingsCompanyForm
|
||||
adminForm: SettingsAdminForm
|
||||
sessionForm: SettingsSessionForm
|
||||
hermesForm: dict
|
||||
llmForm: SettingsLlmForm
|
||||
renderForm: SettingsRenderForm
|
||||
logForm: SettingsLogForm
|
||||
|
||||
552
server/src/app/services/agent_asset_risk_rule_simulation.py
Normal file
552
server/src/app/services/agent_asset_risk_rule_simulation.py
Normal file
@@ -0,0 +1,552 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from datetime import UTC, date, datetime
|
||||
from typing import Any
|
||||
|
||||
from app.schemas.agent_asset import (
|
||||
AgentAssetRiskRuleSimulationAttachment,
|
||||
AgentAssetRiskRuleSimulationRead,
|
||||
AgentAssetRiskRuleSimulationRequest,
|
||||
)
|
||||
from app.services.risk_rule_template_executor import RiskRuleTemplateExecutor
|
||||
|
||||
|
||||
class AgentAssetRiskRuleSimulationMixin:
|
||||
def simulate_risk_rule_message(
|
||||
self,
|
||||
asset_id: str,
|
||||
body: AgentAssetRiskRuleSimulationRequest,
|
||||
) -> AgentAssetRiskRuleSimulationRead:
|
||||
_, version, manifest = self._load_risk_rule_for_test(asset_id, body.version)
|
||||
attachments = self._normalize_simulation_attachments(body.attachments)
|
||||
field_values, source_map, recognized_fields = self._build_simulation_field_values(
|
||||
manifest,
|
||||
message=body.message,
|
||||
explicit_values=body.field_values,
|
||||
attachments=attachments,
|
||||
)
|
||||
recognition_summary = self._build_recognition_summary(attachments)
|
||||
required_keys = self._extract_execution_field_keys(manifest)
|
||||
missing_fields = self._build_missing_fields(
|
||||
manifest,
|
||||
field_values=field_values,
|
||||
source_map=source_map,
|
||||
required_keys=required_keys,
|
||||
)
|
||||
block = self._resolve_simulation_block(
|
||||
manifest,
|
||||
message=body.message,
|
||||
attachments=attachments,
|
||||
missing_fields=missing_fields,
|
||||
)
|
||||
if block:
|
||||
return AgentAssetRiskRuleSimulationRead(
|
||||
version=version,
|
||||
ready=False,
|
||||
stage=block["stage"],
|
||||
hit=False,
|
||||
severity="none",
|
||||
severity_label="待补充",
|
||||
summary=block["summary"],
|
||||
blocking_reason=block["reason"],
|
||||
field_values=field_values,
|
||||
attachments=attachments,
|
||||
recognized_fields=recognized_fields,
|
||||
missing_fields=missing_fields,
|
||||
recognition_summary=recognition_summary,
|
||||
created_at=datetime.now(UTC),
|
||||
)
|
||||
|
||||
claim, contexts = self._build_synthetic_claim(field_values, manifest)
|
||||
result = RiskRuleTemplateExecutor().evaluate(manifest, claim=claim, contexts=contexts)
|
||||
hit = result is not None
|
||||
severity = (
|
||||
str((manifest.get("outcomes") or {}).get("fail", {}).get("severity") or "medium")
|
||||
if hit
|
||||
else "none"
|
||||
)
|
||||
severity_label = self._risk_severity_label(severity)
|
||||
message = str(result.get("message") or "") if isinstance(result, dict) else ""
|
||||
summary = (
|
||||
f"本次仿真命中{severity_label},仅生成风险识别结果,不创建业务单据。"
|
||||
if hit
|
||||
else "本次仿真未命中风险,仅完成规则识别,不创建业务单据。"
|
||||
)
|
||||
evidence = result.get("evidence") if isinstance(result, dict) else {}
|
||||
return AgentAssetRiskRuleSimulationRead(
|
||||
version=version,
|
||||
ready=True,
|
||||
stage="executed",
|
||||
hit=hit,
|
||||
severity=severity,
|
||||
severity_label=severity_label,
|
||||
summary=summary,
|
||||
message=message,
|
||||
field_values=field_values,
|
||||
evidence=evidence if isinstance(evidence, dict) else {},
|
||||
attachments=attachments,
|
||||
recognized_fields=recognized_fields,
|
||||
missing_fields=[],
|
||||
recognition_summary=recognition_summary,
|
||||
created_at=datetime.now(UTC),
|
||||
)
|
||||
|
||||
def _build_simulation_field_values(
|
||||
self,
|
||||
manifest: dict[str, Any],
|
||||
*,
|
||||
message: str,
|
||||
explicit_values: dict[str, Any],
|
||||
attachments: list[dict[str, Any]],
|
||||
) -> tuple[dict[str, Any], dict[str, str], list[dict[str, Any]]]:
|
||||
fields = self._extract_manifest_fields(manifest)
|
||||
values: dict[str, Any] = {}
|
||||
source_map: dict[str, str] = {}
|
||||
safe_explicit_values = explicit_values if isinstance(explicit_values, dict) else {}
|
||||
corpus = self._build_simulation_corpus(message, attachments)
|
||||
city_mentions = self._extract_city_mentions(corpus)
|
||||
|
||||
for field in fields:
|
||||
key = field["key"]
|
||||
explicit_value = safe_explicit_values.get(key)
|
||||
if self._has_meaningful_value(explicit_value):
|
||||
values[key] = explicit_value
|
||||
source_map[key] = "manual"
|
||||
continue
|
||||
attachment_value = self._find_attachment_field_value(
|
||||
key,
|
||||
field.get("label") or key,
|
||||
attachments,
|
||||
)
|
||||
if self._has_meaningful_value(attachment_value):
|
||||
values[key] = attachment_value
|
||||
source_map[key] = "ocr"
|
||||
continue
|
||||
inferred = self._infer_simulation_value(
|
||||
key,
|
||||
field.get("label") or key,
|
||||
corpus=corpus,
|
||||
city_mentions=city_mentions,
|
||||
)
|
||||
if self._has_meaningful_value(inferred):
|
||||
values[key] = inferred
|
||||
source_map[key] = "inferred"
|
||||
|
||||
self._apply_compare_city_hints(manifest, values, source_map, city_mentions)
|
||||
recognized_fields = self._build_recognized_fields(fields, values, source_map)
|
||||
return values, source_map, recognized_fields
|
||||
|
||||
def _infer_simulation_value(
|
||||
self,
|
||||
field_key: str,
|
||||
label: str,
|
||||
*,
|
||||
corpus: str,
|
||||
city_mentions: list[str],
|
||||
) -> Any:
|
||||
key_text = f"{field_key} {label}".lower()
|
||||
if field_key.endswith("route_cities"):
|
||||
return city_mentions or []
|
||||
if "city" in field_key or "location" in field_key:
|
||||
if any(
|
||||
token in key_text
|
||||
for token in ("hotel", "invoice", "attachment", "发票", "酒店", "住宿")
|
||||
):
|
||||
return city_mentions[0] if city_mentions else ""
|
||||
if any(token in key_text for token in ("route", "trip", "目的", "行程", "申报")):
|
||||
return (
|
||||
city_mentions[1]
|
||||
if len(city_mentions) > 1
|
||||
else (city_mentions[0] if city_mentions else "")
|
||||
)
|
||||
return city_mentions[0] if city_mentions else ""
|
||||
if field_key.endswith("amount"):
|
||||
return self._extract_amount(corpus)
|
||||
if field_key.endswith("issue_date") or field_key.endswith("item_date"):
|
||||
return self._extract_iso_date(corpus)
|
||||
if field_key.endswith("invoice_no"):
|
||||
return self._extract_invoice_no(corpus)
|
||||
if field_key.endswith("ocr_text"):
|
||||
return corpus
|
||||
if field_key.endswith("goods_name"):
|
||||
return self._infer_goods_name(corpus)
|
||||
if field_key.endswith("item_type"):
|
||||
return self._infer_item_type(corpus)
|
||||
if field_key.endswith("reason") or field_key.endswith("item_reason"):
|
||||
return corpus or "仿真测试报销事由"
|
||||
return None
|
||||
|
||||
def _apply_compare_city_hints(
|
||||
self,
|
||||
manifest: dict[str, Any],
|
||||
values: dict[str, Any],
|
||||
source_map: dict[str, str],
|
||||
city_mentions: list[str],
|
||||
) -> None:
|
||||
if len(city_mentions) < 2:
|
||||
return
|
||||
params = manifest.get("params") if isinstance(manifest.get("params"), dict) else {}
|
||||
conditions = params.get("conditions") if isinstance(params.get("conditions"), list) else []
|
||||
for condition in conditions:
|
||||
if not isinstance(condition, dict):
|
||||
continue
|
||||
left = str(condition.get("left") or "").strip()
|
||||
right = str(condition.get("right") or "").strip()
|
||||
if not left or not right:
|
||||
continue
|
||||
if self._looks_like_city_field(left):
|
||||
values[left] = city_mentions[0]
|
||||
source_map[left] = source_map.get(left) or "inferred"
|
||||
if self._looks_like_city_field(right):
|
||||
values[right] = city_mentions[1]
|
||||
source_map[right] = source_map.get(right) or "inferred"
|
||||
|
||||
@staticmethod
|
||||
def _normalize_simulation_attachments(
|
||||
attachments: list[AgentAssetRiskRuleSimulationAttachment],
|
||||
) -> list[dict[str, Any]]:
|
||||
normalized: list[dict[str, Any]] = []
|
||||
for item in list(attachments or [])[:12]:
|
||||
normalized.append(
|
||||
{
|
||||
"name": str(item.name or "").strip(),
|
||||
"content_type": str(item.content_type or "").strip(),
|
||||
"size": item.size or 0,
|
||||
"note": str(item.note or "").strip(),
|
||||
"ocr_text": str(item.ocr_text or "").strip(),
|
||||
"summary": str(item.summary or "").strip(),
|
||||
"document_type": str(item.document_type or "").strip(),
|
||||
"document_type_label": str(item.document_type_label or "").strip(),
|
||||
"scene_code": str(item.scene_code or "").strip(),
|
||||
"scene_label": str(item.scene_label or "").strip(),
|
||||
"avg_score": float(item.avg_score or 0.0),
|
||||
"recognition_status": str(item.recognition_status or "").strip(),
|
||||
"document_fields": AgentAssetRiskRuleSimulationMixin._normalize_document_fields(
|
||||
item.document_fields
|
||||
),
|
||||
}
|
||||
)
|
||||
return normalized
|
||||
|
||||
@staticmethod
|
||||
def _build_simulation_corpus(message: str, attachments: list[dict[str, Any]]) -> str:
|
||||
parts = [str(message or "").strip()]
|
||||
for item in attachments:
|
||||
parts.append(str(item.get("name") or "").strip())
|
||||
parts.append(str(item.get("note") or "").strip())
|
||||
parts.append(str(item.get("summary") or "").strip())
|
||||
parts.append(str(item.get("ocr_text") or "").strip())
|
||||
for field in list(item.get("document_fields") or []):
|
||||
if isinstance(field, dict):
|
||||
parts.append(str(field.get("value") or "").strip())
|
||||
return "\n".join(part for part in parts if part)
|
||||
|
||||
@staticmethod
|
||||
def _normalize_document_fields(fields: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
normalized: list[dict[str, Any]] = []
|
||||
for field in list(fields or [])[:30]:
|
||||
if not isinstance(field, dict):
|
||||
continue
|
||||
key = str(field.get("key") or "").strip()
|
||||
label = str(field.get("label") or "").strip()
|
||||
value = field.get("value")
|
||||
if key and label and AgentAssetRiskRuleSimulationMixin._has_meaningful_value(value):
|
||||
normalized.append({"key": key, "label": label, "value": value})
|
||||
return normalized
|
||||
|
||||
def _find_attachment_field_value(
|
||||
self,
|
||||
field_key: str,
|
||||
label: str,
|
||||
attachments: list[dict[str, Any]],
|
||||
) -> Any:
|
||||
short_key = field_key.removeprefix("attachment.")
|
||||
for attachment in attachments:
|
||||
if short_key == "ocr_text":
|
||||
value = attachment.get("ocr_text") or attachment.get("summary")
|
||||
if self._has_meaningful_value(value):
|
||||
return value
|
||||
for field in list(attachment.get("document_fields") or []):
|
||||
if not isinstance(field, dict):
|
||||
continue
|
||||
candidate_key = str(field.get("key") or "").strip().lower()
|
||||
candidate_label = str(field.get("label") or "").strip()
|
||||
if self._field_matches_simulation_key(
|
||||
candidate_key, candidate_label, short_key, label
|
||||
):
|
||||
return field.get("value")
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _field_matches_simulation_key(
|
||||
candidate_key: str,
|
||||
candidate_label: str,
|
||||
short_key: str,
|
||||
target_label: str,
|
||||
) -> bool:
|
||||
compact_candidate = candidate_key.replace("_", "")
|
||||
compact_target = short_key.replace("_", "").lower()
|
||||
if compact_target and compact_target in compact_candidate:
|
||||
return True
|
||||
label_text = f"{candidate_label} {target_label}"
|
||||
label_map = {
|
||||
"invoice_no": ("发票号", "发票号码", "票号"),
|
||||
"hotel_city": ("住宿城市", "酒店城市", "酒店地点", "住宿", "酒店"),
|
||||
"route_cities": ("行程", "路线", "目的地", "出差城市"),
|
||||
"goods_name": ("品名", "商品", "服务名称"),
|
||||
"amount": ("金额", "价税合计", "合计"),
|
||||
"issue_date": ("日期", "开票日期", "发票日期"),
|
||||
}
|
||||
return any(token in label_text for token in label_map.get(short_key, ()))
|
||||
|
||||
def _extract_execution_field_keys(self, manifest: dict[str, Any]) -> list[str]:
|
||||
params = manifest.get("params") if isinstance(manifest.get("params"), dict) else {}
|
||||
template_key = str(manifest.get("template_key") or params.get("template_key") or "").strip()
|
||||
keys: list[str] = []
|
||||
if template_key == "field_compare_v1":
|
||||
conditions = (
|
||||
params.get("conditions") if isinstance(params.get("conditions"), list) else []
|
||||
)
|
||||
for condition in conditions:
|
||||
if not isinstance(condition, dict):
|
||||
continue
|
||||
for side in ("left", "right"):
|
||||
key = str(condition.get(side) or "").strip()
|
||||
if key and key not in keys:
|
||||
keys.append(key)
|
||||
elif template_key == "keyword_match_v1":
|
||||
for key in self._read_string_list(
|
||||
params.get("search_fields") or params.get("field_keys")
|
||||
):
|
||||
if key not in keys:
|
||||
keys.append(key)
|
||||
elif template_key == "field_required_v1":
|
||||
return []
|
||||
return keys
|
||||
|
||||
def _build_missing_fields(
|
||||
self,
|
||||
manifest: dict[str, Any],
|
||||
*,
|
||||
field_values: dict[str, Any],
|
||||
source_map: dict[str, str],
|
||||
required_keys: list[str],
|
||||
) -> list[dict[str, Any]]:
|
||||
labels = {field["key"]: field["label"] for field in self._extract_manifest_fields(manifest)}
|
||||
missing: list[dict[str, Any]] = []
|
||||
for key in required_keys:
|
||||
value = field_values.get(key)
|
||||
if key not in source_map or not self._has_meaningful_value(value):
|
||||
missing.append({"key": key, "label": labels.get(key, key)})
|
||||
return missing
|
||||
|
||||
def _resolve_simulation_block(
|
||||
self,
|
||||
manifest: dict[str, Any],
|
||||
*,
|
||||
message: str,
|
||||
attachments: list[dict[str, Any]],
|
||||
missing_fields: list[dict[str, Any]],
|
||||
) -> dict[str, str] | None:
|
||||
has_attachment = bool(attachments)
|
||||
requires_attachment = self._rule_requires_attachment(manifest)
|
||||
has_recognition = any(
|
||||
self._has_meaningful_value(item.get("ocr_text"))
|
||||
or self._has_meaningful_value(item.get("summary"))
|
||||
or self._has_meaningful_value(item.get("document_fields"))
|
||||
for item in attachments
|
||||
)
|
||||
has_user_evidence = self._has_meaningful_user_message(message)
|
||||
if requires_attachment and not has_attachment:
|
||||
return {
|
||||
"stage": "needs_attachment",
|
||||
"summary": "当前规则要求上传附件,暂不能仅凭文字执行风险判断。",
|
||||
"reason": "请上传测试单据,并填写本次测试意图后再执行仿真。",
|
||||
}
|
||||
if requires_attachment and not has_user_evidence:
|
||||
return {
|
||||
"stage": "needs_test_intent",
|
||||
"summary": "当前规则要求附件和测试说明一起进入仿真判断。",
|
||||
"reason": "请补充本次测试意图或关键业务事实,再执行风险识别。",
|
||||
}
|
||||
if has_attachment and not has_recognition and not has_user_evidence:
|
||||
return {
|
||||
"stage": "needs_recognition",
|
||||
"summary": "单据尚未完成识别,暂不能执行风险规则。",
|
||||
"reason": "请先完成 OCR 识别,或在对话中补充票据城市、金额、发票号等关键信息。",
|
||||
}
|
||||
template_key = str(
|
||||
manifest.get("template_key") or (manifest.get("params") or {}).get("template_key") or ""
|
||||
).strip()
|
||||
if template_key != "field_required_v1" and missing_fields:
|
||||
labels = "、".join(
|
||||
str(item.get("label") or item.get("key")) for item in missing_fields[:4]
|
||||
)
|
||||
return {
|
||||
"stage": "needs_field_confirmation",
|
||||
"summary": f"还缺少规则执行所需字段:{labels},暂不能判断是否命中。",
|
||||
"reason": "请补充缺失字段,或上传可识别出这些字段的票据后再执行。",
|
||||
}
|
||||
if not has_attachment and not has_user_evidence:
|
||||
return {
|
||||
"stage": "needs_input",
|
||||
"summary": "请先描述测试单据或上传票据,再执行风险识别。",
|
||||
"reason": "当前没有可用于规则判断的业务事实。",
|
||||
}
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _rule_requires_attachment(manifest: dict[str, Any]) -> bool:
|
||||
if bool(manifest.get("requires_attachment")):
|
||||
return True
|
||||
metadata = manifest.get("metadata") if isinstance(manifest.get("metadata"), dict) else {}
|
||||
return bool(metadata.get("requires_attachment"))
|
||||
|
||||
@staticmethod
|
||||
def _has_meaningful_user_message(message: str) -> bool:
|
||||
text = str(message or "").strip()
|
||||
if not text:
|
||||
return False
|
||||
generic_prompts = (
|
||||
"请识别我上传的临时单据是否命中这条风险规则",
|
||||
"请识别上传单据是否命中风险规则",
|
||||
)
|
||||
return not any(prompt in text for prompt in generic_prompts)
|
||||
|
||||
@staticmethod
|
||||
def _build_recognized_fields(
|
||||
fields: list[dict[str, str]],
|
||||
values: dict[str, Any],
|
||||
source_map: dict[str, str],
|
||||
) -> list[dict[str, Any]]:
|
||||
labels = {field["key"]: field["label"] for field in fields}
|
||||
return [
|
||||
{
|
||||
"key": key,
|
||||
"label": labels.get(key, key),
|
||||
"value": value,
|
||||
"source": source_map.get(key, ""),
|
||||
}
|
||||
for key, value in values.items()
|
||||
if source_map.get(key)
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _build_recognition_summary(attachments: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
return [
|
||||
{
|
||||
"name": item.get("name") or "",
|
||||
"status": item.get("recognition_status")
|
||||
or (
|
||||
"recognized"
|
||||
if item.get("ocr_text") or item.get("document_fields")
|
||||
else "pending"
|
||||
),
|
||||
"document_type_label": item.get("document_type_label") or "",
|
||||
"scene_label": item.get("scene_label") or "",
|
||||
"summary": item.get("summary") or "",
|
||||
"field_count": len(list(item.get("document_fields") or [])),
|
||||
"avg_score": item.get("avg_score") or 0.0,
|
||||
}
|
||||
for item in attachments
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _extract_city_mentions(text: str) -> list[str]:
|
||||
city_names = [
|
||||
"北京",
|
||||
"上海",
|
||||
"广州",
|
||||
"深圳",
|
||||
"杭州",
|
||||
"南京",
|
||||
"成都",
|
||||
"武汉",
|
||||
"重庆",
|
||||
"天津",
|
||||
"苏州",
|
||||
"西安",
|
||||
]
|
||||
pattern = "|".join(re.escape(city) for city in city_names)
|
||||
found: list[str] = []
|
||||
for match in re.finditer(pattern, text):
|
||||
city = match.group(0)
|
||||
if city not in found:
|
||||
found.append(city)
|
||||
return found
|
||||
|
||||
@staticmethod
|
||||
def _extract_amount(text: str) -> str:
|
||||
match = re.search(r"(\d{2,8}(?:\.\d{1,2})?)\s*(?:元|块|人民币|CNY)?", text, re.IGNORECASE)
|
||||
return match.group(1) if match else ""
|
||||
|
||||
@staticmethod
|
||||
def _extract_iso_date(text: str) -> str:
|
||||
match = re.search(r"(20\d{2})[-/.年](\d{1,2})[-/.月](\d{1,2})", text)
|
||||
if not match:
|
||||
return ""
|
||||
year, month, day = (int(part) for part in match.groups())
|
||||
try:
|
||||
return date(year, month, day).isoformat()
|
||||
except ValueError:
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
def _extract_invoice_no(text: str) -> str:
|
||||
match = re.search(r"(?:发票号|发票号码|票号)[::\s]*([A-Z0-9-]{6,32})", text, re.IGNORECASE)
|
||||
return match.group(1) if match else ""
|
||||
|
||||
@staticmethod
|
||||
def _infer_item_type(text: str) -> str:
|
||||
if not text:
|
||||
return ""
|
||||
if any(keyword in text for keyword in ("酒店", "住宿", "宾馆")):
|
||||
return "住宿费"
|
||||
if any(keyword in text for keyword in ("机票", "航班", "火车", "高铁", "打车")):
|
||||
return "交通费"
|
||||
if any(keyword in text for keyword in ("餐饮", "餐费", "招待")):
|
||||
return "餐饮费"
|
||||
return "差旅费"
|
||||
|
||||
@staticmethod
|
||||
def _infer_goods_name(text: str) -> str:
|
||||
if not text:
|
||||
return ""
|
||||
if any(keyword in text for keyword in ("酒店", "住宿", "宾馆")):
|
||||
return "住宿服务"
|
||||
if any(keyword in text for keyword in ("机票", "航班", "火车", "高铁", "打车")):
|
||||
return "交通服务"
|
||||
if any(keyword in text for keyword in ("餐饮", "餐费", "招待")):
|
||||
return "餐饮服务"
|
||||
return "报销服务"
|
||||
|
||||
@staticmethod
|
||||
def _looks_like_city_field(field_key: str) -> bool:
|
||||
lowered = field_key.lower()
|
||||
return "city" in lowered or "location" in lowered or lowered.endswith("route_cities")
|
||||
|
||||
@staticmethod
|
||||
def _has_meaningful_value(value: Any) -> bool:
|
||||
if value is None:
|
||||
return False
|
||||
if isinstance(value, str):
|
||||
return bool(value.strip())
|
||||
if isinstance(value, (list, tuple, set, dict)):
|
||||
return bool(value)
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def _risk_severity_label(severity: str) -> str:
|
||||
return {
|
||||
"low": "低风险",
|
||||
"medium": "中风险",
|
||||
"high": "高风险",
|
||||
"none": "未命中",
|
||||
}.get(str(severity or "").strip().lower(), "风险")
|
||||
|
||||
@staticmethod
|
||||
def _read_string_list(value: Any) -> list[str]:
|
||||
if not isinstance(value, list):
|
||||
return []
|
||||
return [str(item or "").strip() for item in value if str(item or "").strip()]
|
||||
723
server/src/app/services/agent_asset_risk_rule_testing.py
Normal file
723
server/src/app/services/agent_asset_risk_rule_testing.py
Normal file
@@ -0,0 +1,723 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from datetime import UTC, date, datetime, timedelta
|
||||
from decimal import Decimal, InvalidOperation
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import or_, select
|
||||
|
||||
from app.core.agent_enums import (
|
||||
AgentAssetDomain,
|
||||
AgentAssetStatus,
|
||||
AgentAssetType,
|
||||
AgentReviewStatus,
|
||||
)
|
||||
from app.models.agent_asset import AgentAsset, AgentAssetReview, AgentAssetTestRun
|
||||
from app.models.financial_record import ExpenseClaim, ExpenseClaimItem
|
||||
from app.schemas.agent_asset import (
|
||||
AgentAssetRiskRuleLatestTestSummary,
|
||||
AgentAssetRiskRuleReportRequest,
|
||||
AgentAssetRiskRuleSampleCase,
|
||||
AgentAssetRiskRuleSampleTestRequest,
|
||||
AgentAssetRiskRuleScenarioTestRequest,
|
||||
AgentAssetRiskRuleTestRunRead,
|
||||
)
|
||||
from app.services.expense_claims import ExpenseClaimService
|
||||
from app.services.risk_rule_template_executor import RiskRuleTemplateExecutor
|
||||
|
||||
|
||||
class AgentAssetRiskRuleTestingMixin:
|
||||
def get_latest_risk_rule_test_summary(
|
||||
self,
|
||||
asset_or_id: AgentAsset | str,
|
||||
*,
|
||||
version: str | None = None,
|
||||
) -> AgentAssetRiskRuleLatestTestSummary:
|
||||
asset = self._resolve_asset(asset_or_id)
|
||||
target_version = self._resolve_target_version(asset, version)
|
||||
sample = self.repository.get_latest_test_run(
|
||||
asset.id, version=target_version, test_type="sample"
|
||||
)
|
||||
scenario = self.repository.get_latest_test_run(
|
||||
asset.id, version=target_version, test_type="scenario"
|
||||
)
|
||||
report = self.repository.get_latest_test_run(
|
||||
asset.id, version=target_version, test_type="report", status="passed"
|
||||
)
|
||||
return AgentAssetRiskRuleLatestTestSummary(
|
||||
version=target_version,
|
||||
sample=self._serialize_test_run(sample),
|
||||
scenario=self._serialize_test_run(scenario),
|
||||
report=self._serialize_test_run(report),
|
||||
test_passed=bool(report and report.passed),
|
||||
)
|
||||
|
||||
def run_risk_rule_sample_test(
|
||||
self,
|
||||
asset_id: str,
|
||||
body: AgentAssetRiskRuleSampleTestRequest,
|
||||
*,
|
||||
actor: str,
|
||||
request_id: str | None = None,
|
||||
) -> AgentAssetRiskRuleTestRunRead:
|
||||
asset, version, manifest = self._load_risk_rule_for_test(asset_id, body.version)
|
||||
cases = body.cases or self._build_default_sample_cases(manifest)
|
||||
results = [self._run_sample_case(manifest, case) for case in cases]
|
||||
passed = bool(results) and all(item["passed"] for item in results)
|
||||
summary = f"快速样例测试 {'通过' if passed else '未通过'},共 {len(results)} 条。"
|
||||
return self._create_test_run(
|
||||
asset,
|
||||
version=version,
|
||||
test_type="sample",
|
||||
passed=passed,
|
||||
summary=summary,
|
||||
input_json={"cases": [case.model_dump() for case in cases]},
|
||||
result_json={"cases": results, "case_count": len(results)},
|
||||
actor=actor,
|
||||
request_id=request_id,
|
||||
)
|
||||
|
||||
def run_risk_rule_scenario_test(
|
||||
self,
|
||||
asset_id: str,
|
||||
body: AgentAssetRiskRuleScenarioTestRequest,
|
||||
*,
|
||||
actor: str,
|
||||
request_id: str | None = None,
|
||||
) -> AgentAssetRiskRuleTestRunRead:
|
||||
asset, version, manifest = self._load_risk_rule_for_test(asset_id, body.version)
|
||||
if asset.domain != AgentAssetDomain.EXPENSE.value:
|
||||
raise ValueError("一期真实场景试运行仅支持报销业务域。")
|
||||
|
||||
parsed_scope = self._parse_scenario_scope(body.intent, body.filters)
|
||||
claims = self._query_expense_claim_samples(parsed_scope)
|
||||
claim_results = [self._run_claim_scenario(manifest, claim) for claim in claims]
|
||||
hit_items = [item for item in claim_results if item["hit"]]
|
||||
severity_counts: dict[str, int] = {}
|
||||
for item in hit_items:
|
||||
severity = str(item.get("severity") or "unknown")
|
||||
severity_counts[severity] = severity_counts.get(severity, 0) + 1
|
||||
|
||||
passed = bool(claim_results)
|
||||
summary = (
|
||||
f"真实场景试运行完成,样本 {len(claim_results)} 条,命中 {len(hit_items)} 条。"
|
||||
if passed
|
||||
else "真实场景试运行未找到可测样本。"
|
||||
)
|
||||
return self._create_test_run(
|
||||
asset,
|
||||
version=version,
|
||||
test_type="scenario",
|
||||
passed=passed,
|
||||
summary=summary,
|
||||
input_json={
|
||||
"intent": body.intent,
|
||||
"filters": body.filters,
|
||||
"parsed_scope": parsed_scope,
|
||||
},
|
||||
result_json={
|
||||
"total_count": len(claim_results),
|
||||
"hit_count": len(hit_items),
|
||||
"severity_counts": severity_counts,
|
||||
"items": claim_results[:50],
|
||||
},
|
||||
actor=actor,
|
||||
request_id=request_id,
|
||||
)
|
||||
|
||||
def confirm_risk_rule_test_report(
|
||||
self,
|
||||
asset_id: str,
|
||||
body: AgentAssetRiskRuleReportRequest,
|
||||
*,
|
||||
actor: str,
|
||||
request_id: str | None = None,
|
||||
) -> AgentAssetRiskRuleTestRunRead:
|
||||
asset, version, _ = self._load_risk_rule_for_test(asset_id, body.version)
|
||||
sample = self.repository.get_latest_test_run(
|
||||
asset.id, version=version, test_type="sample", status="passed"
|
||||
)
|
||||
scenario = self.repository.get_latest_test_run(
|
||||
asset.id, version=version, test_type="scenario"
|
||||
)
|
||||
if sample is None:
|
||||
raise ValueError("提交审核前必须先完成快速样例测试。")
|
||||
if not body.confirm_passed:
|
||||
raise ValueError("请确认测试通过后再保存测试报告。")
|
||||
|
||||
summary = "测试报告已确认,当前版本可提交审核。"
|
||||
if scenario is None:
|
||||
summary = "快速样例测试已确认通过,真实场景试运行未执行。"
|
||||
elif not scenario.passed:
|
||||
summary = "快速样例测试已确认通过,真实场景试运行未找到可测样本。"
|
||||
return self._create_test_run(
|
||||
asset,
|
||||
version=version,
|
||||
test_type="report",
|
||||
passed=True,
|
||||
summary=summary,
|
||||
input_json={"confirm_passed": True, "note": body.note or ""},
|
||||
result_json={
|
||||
"sample_test_run_id": sample.id,
|
||||
"scenario_test_run_id": scenario.id,
|
||||
"sample_summary": sample.summary,
|
||||
"scenario_summary": scenario.summary,
|
||||
},
|
||||
actor=actor,
|
||||
request_id=request_id,
|
||||
)
|
||||
|
||||
def delete_unpublished_asset(
|
||||
self,
|
||||
asset_id: str,
|
||||
*,
|
||||
actor: str,
|
||||
request_id: str | None = None,
|
||||
) -> None:
|
||||
asset = self._resolve_asset(asset_id)
|
||||
self._require_json_risk_asset(asset)
|
||||
if str(asset.published_version or "").strip():
|
||||
raise PermissionError("已发布过的风险规则不能删除。")
|
||||
|
||||
before = self._asset_snapshot(asset)
|
||||
self._delete_risk_rule_json_file(asset)
|
||||
self.repository.delete_asset(asset)
|
||||
self.audit_service.log_action(
|
||||
actor=actor,
|
||||
action="delete_agent_asset",
|
||||
resource_type=AgentAssetType.RULE.value,
|
||||
resource_id=asset_id,
|
||||
before_json=before,
|
||||
after_json={"deleted": True},
|
||||
request_id=request_id,
|
||||
)
|
||||
|
||||
def return_risk_rule(
|
||||
self,
|
||||
asset_id: str,
|
||||
*,
|
||||
note: str,
|
||||
actor: str,
|
||||
request_id: str | None = None,
|
||||
) -> AgentAssetRiskRuleLatestTestSummary:
|
||||
asset = self._resolve_asset(asset_id)
|
||||
self._require_json_risk_asset(asset)
|
||||
version = self._resolve_target_version(asset, None)
|
||||
if asset.status != AgentAssetStatus.REVIEW.value:
|
||||
raise ValueError("只有待审核风险规则可以回退。")
|
||||
|
||||
before = self._asset_snapshot(asset)
|
||||
review = AgentAssetReview(
|
||||
asset_id=asset.id,
|
||||
version=version,
|
||||
reviewer=actor,
|
||||
review_status=AgentReviewStatus.REJECTED.value,
|
||||
review_note=str(note or "审核回退").strip() or "审核回退",
|
||||
reviewed_at=datetime.now(UTC),
|
||||
)
|
||||
self.db.add(review)
|
||||
asset.reviewer = actor
|
||||
asset.status = AgentAssetStatus.DRAFT.value
|
||||
self.db.add(asset)
|
||||
self.db.commit()
|
||||
self.audit_service.log_action(
|
||||
actor=actor,
|
||||
action="return_agent_asset",
|
||||
resource_type=AgentAssetType.RULE.value,
|
||||
resource_id=asset.id,
|
||||
before_json=before,
|
||||
after_json={"version": version, "status": asset.status, "note": note},
|
||||
request_id=request_id,
|
||||
)
|
||||
return self.get_latest_risk_rule_test_summary(asset)
|
||||
|
||||
def publish_risk_rule(
|
||||
self,
|
||||
asset_id: str,
|
||||
*,
|
||||
actor: str,
|
||||
request_id: str | None = None,
|
||||
) -> AgentAsset:
|
||||
asset = self._resolve_asset(asset_id)
|
||||
self._require_json_risk_asset(asset)
|
||||
version = self._resolve_target_version(asset, None)
|
||||
if asset.status != AgentAssetStatus.REVIEW.value:
|
||||
raise ValueError("只有待审核风险规则可以发布上线。")
|
||||
if not self.get_latest_risk_rule_test_summary(asset, version=version).test_passed:
|
||||
raise PermissionError("当前规则版本尚未完成测试通过确认,不能发布。")
|
||||
|
||||
before = self._asset_snapshot(asset)
|
||||
approved_review = self.repository.get_review(
|
||||
asset.id, version, AgentReviewStatus.APPROVED.value
|
||||
)
|
||||
if approved_review is None:
|
||||
self.db.add(
|
||||
AgentAssetReview(
|
||||
asset_id=asset.id,
|
||||
version=version,
|
||||
reviewer=actor,
|
||||
review_status=AgentReviewStatus.APPROVED.value,
|
||||
review_note="发布上线前审核通过。",
|
||||
reviewed_at=datetime.now(UTC),
|
||||
)
|
||||
)
|
||||
asset.reviewer = actor
|
||||
asset.published_version = version
|
||||
asset.status = AgentAssetStatus.ACTIVE.value
|
||||
self.db.add(asset)
|
||||
self.db.commit()
|
||||
self.audit_service.log_action(
|
||||
actor=actor,
|
||||
action="publish_agent_asset",
|
||||
resource_type=AgentAssetType.RULE.value,
|
||||
resource_id=asset.id,
|
||||
before_json=before,
|
||||
after_json=self._asset_snapshot(asset),
|
||||
request_id=request_id,
|
||||
)
|
||||
refreshed = self.repository.get(asset.id)
|
||||
if refreshed is None:
|
||||
raise LookupError("Asset not found")
|
||||
return refreshed
|
||||
|
||||
def set_risk_rule_enabled(
|
||||
self,
|
||||
asset_id: str,
|
||||
*,
|
||||
enabled: bool,
|
||||
actor: str,
|
||||
request_id: str | None = None,
|
||||
) -> AgentAsset:
|
||||
asset = self._resolve_asset(asset_id)
|
||||
self._require_json_risk_asset(asset)
|
||||
before = self._asset_snapshot(asset)
|
||||
rule_library, file_name = self._resolve_json_risk_rule_document(asset)
|
||||
manifest = self.rule_library_manager.read_rule_library_json(
|
||||
library=rule_library,
|
||||
file_name=file_name,
|
||||
)
|
||||
manifest["enabled"] = bool(enabled)
|
||||
self.rule_library_manager.write_rule_library_json(
|
||||
library=rule_library,
|
||||
file_name=file_name,
|
||||
payload=manifest,
|
||||
)
|
||||
|
||||
config_json = dict(asset.config_json or {})
|
||||
config_json["enabled"] = bool(enabled)
|
||||
asset.config_json = config_json
|
||||
updated = self.repository.save_asset(asset)
|
||||
self.audit_service.log_action(
|
||||
actor=actor,
|
||||
action="set_risk_rule_enabled",
|
||||
resource_type=AgentAssetType.RULE.value,
|
||||
resource_id=asset.id,
|
||||
before_json=before,
|
||||
after_json=self._asset_snapshot(updated),
|
||||
request_id=request_id,
|
||||
)
|
||||
return updated
|
||||
|
||||
def _load_risk_rule_for_test(
|
||||
self, asset_id: str, version: str | None
|
||||
) -> tuple[AgentAsset, str, dict[str, Any]]:
|
||||
asset = self._resolve_asset(asset_id)
|
||||
self._require_json_risk_asset(asset)
|
||||
target_version = self._resolve_target_version(asset, version)
|
||||
if self.repository.get_version(asset.id, target_version) is None:
|
||||
raise LookupError(f"版本 {target_version} 不存在")
|
||||
|
||||
rule_library, file_name = self._resolve_json_risk_rule_document(asset)
|
||||
manifest = self.rule_library_manager.read_rule_library_json(
|
||||
library=rule_library,
|
||||
file_name=file_name,
|
||||
)
|
||||
return asset, target_version, manifest
|
||||
|
||||
def _create_test_run(
|
||||
self,
|
||||
asset: AgentAsset,
|
||||
*,
|
||||
version: str,
|
||||
test_type: str,
|
||||
passed: bool,
|
||||
summary: str,
|
||||
input_json: dict[str, Any],
|
||||
result_json: dict[str, Any],
|
||||
actor: str,
|
||||
request_id: str | None,
|
||||
) -> AgentAssetRiskRuleTestRunRead:
|
||||
status = "passed" if passed else "failed"
|
||||
created = self.repository.create_test_run(
|
||||
AgentAssetTestRun(
|
||||
asset_id=asset.id,
|
||||
version=version,
|
||||
test_type=test_type,
|
||||
status=status,
|
||||
passed=passed,
|
||||
summary=summary,
|
||||
input_json=input_json,
|
||||
result_json=result_json,
|
||||
created_by=actor,
|
||||
)
|
||||
)
|
||||
self.audit_service.log_action(
|
||||
actor=actor,
|
||||
action=f"risk_rule_test_{test_type}",
|
||||
resource_type=AgentAssetType.RULE.value,
|
||||
resource_id=asset.id,
|
||||
before_json=None,
|
||||
after_json={"version": version, "status": status, "summary": summary},
|
||||
request_id=request_id,
|
||||
)
|
||||
return AgentAssetRiskRuleTestRunRead.model_validate(created)
|
||||
|
||||
def _run_sample_case(
|
||||
self,
|
||||
manifest: dict[str, Any],
|
||||
case: AgentAssetRiskRuleSampleCase,
|
||||
) -> dict[str, Any]:
|
||||
claim, contexts = self._build_synthetic_claim(case.values, manifest)
|
||||
result = RiskRuleTemplateExecutor().evaluate(manifest, claim=claim, contexts=contexts)
|
||||
actual_hit = result is not None
|
||||
actual_severity = (
|
||||
str((manifest.get("outcomes") or {}).get("fail", {}).get("severity") or "").strip()
|
||||
if actual_hit
|
||||
else "none"
|
||||
)
|
||||
expected_severity = str(case.expected_severity or "").strip()
|
||||
severity_passed = (
|
||||
not actual_hit or not expected_severity or expected_severity == actual_severity
|
||||
)
|
||||
passed = actual_hit == case.expected_hit and severity_passed
|
||||
return {
|
||||
"case_id": case.case_id or "",
|
||||
"name": case.name,
|
||||
"values": case.values,
|
||||
"expected_hit": case.expected_hit,
|
||||
"expected_severity": expected_severity,
|
||||
"actual_hit": actual_hit,
|
||||
"actual_severity": actual_severity,
|
||||
"passed": passed,
|
||||
"message": str(result.get("message") or "") if isinstance(result, dict) else "",
|
||||
"evidence": result.get("evidence") if isinstance(result, dict) else {},
|
||||
}
|
||||
|
||||
def _run_claim_scenario(self, manifest: dict[str, Any], claim: ExpenseClaim) -> dict[str, Any]:
|
||||
contexts = ExpenseClaimService(self.db)._build_claim_attachment_contexts(claim)
|
||||
result = RiskRuleTemplateExecutor().evaluate(manifest, claim=claim, contexts=contexts)
|
||||
hit = result is not None
|
||||
return {
|
||||
"claim_id": claim.id,
|
||||
"claim_no": claim.claim_no,
|
||||
"employee_name": claim.employee_name,
|
||||
"department_name": claim.department_name,
|
||||
"expense_type": claim.expense_type,
|
||||
"amount": float(claim.amount or 0),
|
||||
"status": claim.status,
|
||||
"occurred_at": claim.occurred_at.isoformat() if claim.occurred_at else "",
|
||||
"hit": hit,
|
||||
"severity": str((manifest.get("outcomes") or {}).get("fail", {}).get("severity") or "")
|
||||
if hit
|
||||
else "none",
|
||||
"message": str(result.get("message") or "") if isinstance(result, dict) else "",
|
||||
"evidence": result.get("evidence") if isinstance(result, dict) else {},
|
||||
}
|
||||
|
||||
def _build_synthetic_claim(
|
||||
self,
|
||||
values: dict[str, Any],
|
||||
manifest: dict[str, Any],
|
||||
) -> tuple[ExpenseClaim, list[dict[str, Any]]]:
|
||||
claim = ExpenseClaim(
|
||||
claim_no="TEST-RISK-RULE",
|
||||
employee_name=str(values.get("claim.employee_name") or "测试员工"),
|
||||
department_name=str(values.get("claim.department_name") or "测试部门"),
|
||||
expense_type=str(values.get("item.item_type") or "差旅费"),
|
||||
reason=str(values.get("claim.reason") or "测试报销事由"),
|
||||
location=str(values.get("claim.location") or "北京"),
|
||||
amount=self._to_decimal(values.get("claim.amount")),
|
||||
currency="CNY",
|
||||
invoice_count=1,
|
||||
occurred_at=datetime.now(UTC),
|
||||
status="draft",
|
||||
)
|
||||
item = ExpenseClaimItem(
|
||||
item_date=date.today(),
|
||||
item_type=str(values.get("item.item_type") or "住宿费"),
|
||||
item_reason=str(values.get("item.item_reason") or claim.reason),
|
||||
item_location=str(values.get("item.item_location") or claim.location),
|
||||
item_amount=self._to_decimal(values.get("item.item_amount") or claim.amount),
|
||||
)
|
||||
claim.items = [item]
|
||||
|
||||
attachment_fields = []
|
||||
document_info: dict[str, Any] = {"fields": attachment_fields}
|
||||
for field in self._extract_manifest_fields(manifest):
|
||||
key = field["key"]
|
||||
if key not in values:
|
||||
continue
|
||||
value = self._coerce_sample_value(key, values.get(key))
|
||||
if key.startswith("claim."):
|
||||
setattr(claim, key.removeprefix("claim."), value)
|
||||
elif key.startswith("item."):
|
||||
setattr(item, key.removeprefix("item."), value)
|
||||
elif key.startswith("attachment."):
|
||||
short_key = key.removeprefix("attachment.")
|
||||
document_info[short_key] = value
|
||||
attachment_fields.append(
|
||||
{"key": short_key, "label": field["label"], "value": value}
|
||||
)
|
||||
return claim, [
|
||||
{
|
||||
"document_info": document_info,
|
||||
"ocr_text": document_info.get("ocr_text", ""),
|
||||
}
|
||||
]
|
||||
|
||||
def _build_default_sample_cases(
|
||||
self,
|
||||
manifest: dict[str, Any],
|
||||
) -> list[AgentAssetRiskRuleSampleCase]:
|
||||
fields = self._extract_manifest_fields(manifest)
|
||||
severity = str((manifest.get("outcomes") or {}).get("fail", {}).get("severity") or "")
|
||||
template_key = str(manifest.get("template_key") or "").strip()
|
||||
hit_values = self._find_case_values_for_expected(manifest, fields, expected_hit=True)
|
||||
pass_values = self._find_case_values_for_expected(manifest, fields, expected_hit=False)
|
||||
cases = [
|
||||
AgentAssetRiskRuleSampleCase(
|
||||
case_id="hit",
|
||||
name="应该命中风险",
|
||||
values=hit_values,
|
||||
expected_hit=True,
|
||||
expected_severity=severity,
|
||||
note="验证规则能识别异常样本。",
|
||||
),
|
||||
AgentAssetRiskRuleSampleCase(
|
||||
case_id="pass",
|
||||
name="应该不命中",
|
||||
values=pass_values,
|
||||
expected_hit=False,
|
||||
expected_severity="none",
|
||||
note="验证正常样本不会误触发。",
|
||||
),
|
||||
]
|
||||
if template_key == "field_required_v1":
|
||||
cases.append(
|
||||
AgentAssetRiskRuleSampleCase(
|
||||
case_id="missing",
|
||||
name="关键字段缺失",
|
||||
values={key: "" for key in hit_values},
|
||||
expected_hit=True,
|
||||
expected_severity=severity,
|
||||
note="验证缺字段时会进入复核。",
|
||||
)
|
||||
)
|
||||
return cases
|
||||
|
||||
def _find_case_values_for_expected(
|
||||
self,
|
||||
manifest: dict[str, Any],
|
||||
fields: list[dict[str, str]],
|
||||
*,
|
||||
expected_hit: bool,
|
||||
) -> dict[str, Any]:
|
||||
candidates = [
|
||||
self._build_case_values(manifest, fields, hit=expected_hit),
|
||||
{field["key"]: self._default_value_for_field(field["key"]) for field in fields},
|
||||
{
|
||||
field["key"]: ("上海" if index == 0 else "北京")
|
||||
for index, field in enumerate(fields)
|
||||
},
|
||||
{field["key"]: "北京" for field in fields},
|
||||
{field["key"]: "" for field in fields},
|
||||
]
|
||||
severity = str((manifest.get("outcomes") or {}).get("fail", {}).get("severity") or "")
|
||||
for values in candidates:
|
||||
probe = AgentAssetRiskRuleSampleCase(
|
||||
name="默认样例探测",
|
||||
values=values,
|
||||
expected_hit=expected_hit,
|
||||
expected_severity=severity if expected_hit else "none",
|
||||
)
|
||||
result = self._run_sample_case(manifest, probe)
|
||||
if bool(result["actual_hit"]) == expected_hit:
|
||||
return values
|
||||
return candidates[0]
|
||||
|
||||
def _build_case_values(
|
||||
self,
|
||||
manifest: dict[str, Any],
|
||||
fields: list[dict[str, str]],
|
||||
*,
|
||||
hit: bool,
|
||||
) -> dict[str, Any]:
|
||||
values = {field["key"]: self._default_value_for_field(field["key"]) for field in fields}
|
||||
template_key = str(manifest.get("template_key") or "").strip()
|
||||
params = manifest.get("params") if isinstance(manifest.get("params"), dict) else {}
|
||||
if template_key == "field_compare_v1":
|
||||
condition = next(
|
||||
(item for item in params.get("conditions", []) if isinstance(item, dict)),
|
||||
{},
|
||||
)
|
||||
left = str(condition.get("left") or "").strip()
|
||||
right = str(condition.get("right") or "").strip()
|
||||
operator = str(condition.get("operator") or "overlap").strip()
|
||||
if left and operator == "is_empty":
|
||||
values[left] = "测试值" if hit else ""
|
||||
elif left and right and operator in {"not_equals", "not_in", "not_overlap"}:
|
||||
values[left] = "北京" if hit else "上海"
|
||||
values[right] = "北京"
|
||||
elif left and right:
|
||||
values[left] = "上海" if hit else "北京"
|
||||
values[right] = "北京"
|
||||
elif template_key == "field_required_v1" and hit and fields:
|
||||
values[fields[0]["key"]] = ""
|
||||
elif template_key == "keyword_match_v1":
|
||||
keywords = params.get("keywords") if isinstance(params.get("keywords"), list) else []
|
||||
keyword = str(next(iter(keywords), "咨询费") or "咨询费")
|
||||
target_key = fields[0]["key"] if fields else "claim.reason"
|
||||
values[target_key] = f"本次报销包含{keyword}" if hit else "正常差旅报销"
|
||||
return values
|
||||
|
||||
@staticmethod
|
||||
def _default_value_for_field(field_key: str) -> Any:
|
||||
if field_key.endswith("amount"):
|
||||
return "100.00"
|
||||
if field_key.endswith("issue_date"):
|
||||
return date.today().isoformat()
|
||||
if field_key.endswith("route_cities"):
|
||||
return ["北京"]
|
||||
if field_key.endswith("ocr_text"):
|
||||
return "正常发票内容"
|
||||
if "city" in field_key or "location" in field_key:
|
||||
return "北京"
|
||||
if field_key.endswith("item_type"):
|
||||
return "住宿费"
|
||||
return "测试值"
|
||||
|
||||
def _query_expense_claim_samples(self, parsed_scope: dict[str, Any]) -> list[ExpenseClaim]:
|
||||
days = int(parsed_scope.get("days") or 30)
|
||||
limit = min(max(int(parsed_scope.get("limit") or 50), 1), 200)
|
||||
since = datetime.now(UTC) - timedelta(days=days)
|
||||
stmt = select(ExpenseClaim).where(ExpenseClaim.created_at >= since)
|
||||
|
||||
expense_keyword = str(parsed_scope.get("expense_keyword") or "").strip()
|
||||
if expense_keyword:
|
||||
like_keyword = f"%{expense_keyword}%"
|
||||
stmt = stmt.where(
|
||||
or_(
|
||||
ExpenseClaim.expense_type.ilike(like_keyword),
|
||||
ExpenseClaim.reason.ilike(like_keyword),
|
||||
)
|
||||
)
|
||||
|
||||
cities = [str(item or "").strip() for item in parsed_scope.get("cities", []) if item]
|
||||
if cities:
|
||||
city_filters = []
|
||||
for city in cities[:8]:
|
||||
like_city = f"%{city}%"
|
||||
city_filters.extend(
|
||||
[
|
||||
ExpenseClaim.location.ilike(like_city),
|
||||
ExpenseClaim.reason.ilike(like_city),
|
||||
]
|
||||
)
|
||||
stmt = stmt.where(or_(*city_filters))
|
||||
|
||||
stmt = stmt.order_by(ExpenseClaim.created_at.desc()).limit(limit)
|
||||
return list(self.db.scalars(stmt).all())
|
||||
|
||||
@staticmethod
|
||||
def _parse_scenario_scope(intent: str, filters: dict[str, Any]) -> dict[str, Any]:
|
||||
text = str(intent or "")
|
||||
raw_days = filters.get("days") or filters.get("recent_days")
|
||||
days = int(raw_days) if str(raw_days or "").isdigit() else 30
|
||||
match = re.search(r"最近\s*(\d{1,3})\s*天", text)
|
||||
if match:
|
||||
days = int(match.group(1))
|
||||
limit = filters.get("limit") if str(filters.get("limit") or "").isdigit() else 50
|
||||
expense_keyword = str(filters.get("expense_keyword") or "").strip()
|
||||
if not expense_keyword and any(keyword in text for keyword in ("酒店", "住宿")):
|
||||
expense_keyword = "住宿"
|
||||
|
||||
city_candidates = ("北京", "上海", "广州", "深圳", "武汉", "杭州", "成都", "南京")
|
||||
cities = [
|
||||
city
|
||||
for city in city_candidates
|
||||
if city in text or city in [str(item) for item in filters.get("cities", []) or []]
|
||||
]
|
||||
return {
|
||||
"business_domain": "expense",
|
||||
"days": max(1, min(days, 365)),
|
||||
"limit": max(1, min(int(limit), 200)),
|
||||
"expense_keyword": expense_keyword,
|
||||
"cities": cities,
|
||||
"execution_mode": "dry_run",
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _extract_manifest_fields(manifest: dict[str, Any]) -> list[dict[str, str]]:
|
||||
inputs = manifest.get("inputs") if isinstance(manifest.get("inputs"), dict) else {}
|
||||
fields = inputs.get("fields") if isinstance(inputs.get("fields"), list) else []
|
||||
normalized = []
|
||||
for item in fields:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
key = str(item.get("key") or "").strip()
|
||||
if key:
|
||||
normalized.append({"key": key, "label": str(item.get("label") or key).strip()})
|
||||
return normalized
|
||||
|
||||
@staticmethod
|
||||
def _coerce_sample_value(field_key: str, value: Any) -> Any:
|
||||
if field_key.endswith("route_cities") and isinstance(value, str):
|
||||
return [item.strip() for item in re.split(r"[,,、/ ]+", value) if item.strip()]
|
||||
return value
|
||||
|
||||
@staticmethod
|
||||
def _to_decimal(value: Any) -> Decimal:
|
||||
try:
|
||||
return Decimal(str(value or "0"))
|
||||
except (InvalidOperation, ValueError):
|
||||
return Decimal("0")
|
||||
|
||||
def _resolve_asset(self, asset_or_id: AgentAsset | str) -> AgentAsset:
|
||||
if isinstance(asset_or_id, AgentAsset):
|
||||
return asset_or_id
|
||||
asset = self.repository.get(str(asset_or_id))
|
||||
if asset is None:
|
||||
raise LookupError("Asset not found")
|
||||
return asset
|
||||
|
||||
@staticmethod
|
||||
def _require_json_risk_asset(asset: AgentAsset) -> None:
|
||||
config_json = asset.config_json if isinstance(asset.config_json, dict) else {}
|
||||
if asset.asset_type != AgentAssetType.RULE.value:
|
||||
raise ValueError("仅规则资产支持风险规则操作。")
|
||||
if str(config_json.get("detail_mode") or "").strip().lower() != "json_risk":
|
||||
raise ValueError("仅 JSON 风险规则支持该操作。")
|
||||
|
||||
def _resolve_target_version(self, asset: AgentAsset, version: str | None) -> str:
|
||||
target = str(version or self._resolve_working_version(asset) or "").strip()
|
||||
if not target:
|
||||
raise ValueError("当前规则尚未配置工作版本。")
|
||||
return target
|
||||
|
||||
def _delete_risk_rule_json_file(self, asset: AgentAsset) -> None:
|
||||
try:
|
||||
rule_library, file_name = self._resolve_json_risk_rule_document(asset)
|
||||
target = self.rule_library_manager.resolve_rule_library_path(
|
||||
library=rule_library,
|
||||
file_name=file_name,
|
||||
)
|
||||
target.unlink(missing_ok=True)
|
||||
except (FileNotFoundError, ValueError):
|
||||
return
|
||||
|
||||
@staticmethod
|
||||
def _serialize_test_run(
|
||||
run: AgentAssetTestRun | None,
|
||||
) -> AgentAssetRiskRuleTestRunRead | None:
|
||||
return AgentAssetRiskRuleTestRunRead.model_validate(run) if run is not None else None
|
||||
@@ -4,6 +4,7 @@ import json
|
||||
from collections import defaultdict
|
||||
from datetime import UTC, datetime
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.agent_enums import (
|
||||
@@ -27,13 +28,14 @@ from app.schemas.agent_asset import (
|
||||
)
|
||||
from app.services.agent_asset_json_rules import AgentAssetJsonRuleMixin
|
||||
from app.services.agent_asset_onlyoffice import AgentAssetOnlyOfficeMixin
|
||||
from app.services.agent_asset_risk_rule_simulation import AgentAssetRiskRuleSimulationMixin
|
||||
from app.services.agent_asset_risk_rule_testing import AgentAssetRiskRuleTestingMixin
|
||||
from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
|
||||
from app.services.agent_asset_spreadsheet import AgentAssetSpreadsheetManager
|
||||
from app.services.agent_asset_spreadsheet_helpers import AgentAssetSpreadsheetHelperMixin
|
||||
from app.services.agent_asset_timeline import AgentAssetTimelineMixin
|
||||
from app.services.agent_asset_spreadsheet import AgentAssetSpreadsheetManager
|
||||
from app.services.agent_foundation import AgentFoundationService
|
||||
from app.services.audit import AuditLogService
|
||||
from app.services.settings import resolve_onlyoffice_settings
|
||||
|
||||
logger = get_logger("app.services.agent_assets")
|
||||
|
||||
@@ -41,6 +43,8 @@ logger = get_logger("app.services.agent_assets")
|
||||
class AgentAssetService(
|
||||
AgentAssetOnlyOfficeMixin,
|
||||
AgentAssetSpreadsheetHelperMixin,
|
||||
AgentAssetRiskRuleTestingMixin,
|
||||
AgentAssetRiskRuleSimulationMixin,
|
||||
AgentAssetTimelineMixin,
|
||||
AgentAssetJsonRuleMixin,
|
||||
):
|
||||
@@ -66,10 +70,7 @@ class AgentAssetService(
|
||||
asset_type=asset_type, status=status, domain=domain, keyword=keyword
|
||||
)
|
||||
version_stats = self._collect_version_stats(assets)
|
||||
return [
|
||||
self._serialize_list_item(asset, version_stats.get(asset.id))
|
||||
for asset in assets
|
||||
]
|
||||
return [self._serialize_list_item(asset, version_stats.get(asset.id)) for asset in assets]
|
||||
|
||||
def get_asset(self, asset_id: str) -> AgentAssetRead | None:
|
||||
self._ensure_ready()
|
||||
@@ -88,9 +89,7 @@ class AgentAssetService(
|
||||
else next(iter(self.repository.list_reviews(asset_id, limit=1)), None)
|
||||
)
|
||||
current_version = (
|
||||
self.repository.get_version(asset_id, working_version)
|
||||
if working_version
|
||||
else None
|
||||
self.repository.get_version(asset_id, working_version) if working_version else None
|
||||
)
|
||||
version_stats = self._collect_version_stats([asset]).get(asset.id)
|
||||
return AgentAssetRead(
|
||||
@@ -100,12 +99,14 @@ class AgentAssetService(
|
||||
else None,
|
||||
current_version_content_type=current_version.content_type if current_version else None,
|
||||
current_version_change_note=current_version.change_note if current_version else None,
|
||||
recent_versions=[
|
||||
self._serialize_version(item, asset) for item in recent_versions
|
||||
],
|
||||
recent_versions=[self._serialize_version(item, asset) for item in recent_versions],
|
||||
latest_review=AgentAssetReviewRead.model_validate(latest_review)
|
||||
if latest_review
|
||||
else None,
|
||||
latest_test_summary=self.get_latest_risk_rule_test_summary(asset)
|
||||
if str((asset.config_json or {}).get("detail_mode") or "").strip().lower()
|
||||
== "json_risk"
|
||||
else None,
|
||||
)
|
||||
|
||||
def create_asset(
|
||||
@@ -301,6 +302,13 @@ class AgentAssetService(
|
||||
if self.repository.get_version(asset_id, payload.version) is None:
|
||||
raise LookupError(f"版本 {payload.version} 不存在")
|
||||
if asset.asset_type == AgentAssetType.RULE.value:
|
||||
if (
|
||||
str((asset.config_json or {}).get("detail_mode") or "").strip().lower()
|
||||
== "json_risk"
|
||||
and payload.review_status == AgentReviewStatus.PENDING
|
||||
and not self.get_latest_risk_rule_test_summary(asset).test_passed
|
||||
):
|
||||
raise PermissionError("当前规则版本尚未完成测试通过确认,不能提交审核。")
|
||||
working_version = self._resolve_working_version(asset)
|
||||
if payload.version != working_version:
|
||||
raise ValueError("只能对当前工作版本发起审核。")
|
||||
@@ -594,11 +602,10 @@ class AgentAssetService(
|
||||
),
|
||||
)
|
||||
|
||||
def _collect_version_stats(
|
||||
self, assets: list[AgentAsset]
|
||||
) -> dict[str, dict[str, int | str | None]]:
|
||||
def _collect_version_stats(self, assets: list[AgentAsset]) -> dict[str, dict[str, Any]]:
|
||||
asset_ids = [item.id for item in assets]
|
||||
versions = self.repository.list_versions_for_assets(asset_ids)
|
||||
reviews = self.repository.list_reviews_for_assets(asset_ids)
|
||||
spreadsheet_logs = self.audit_service.repository.list_for_resources(
|
||||
resource_type=AgentAssetType.RULE.value,
|
||||
resource_ids=[
|
||||
@@ -610,23 +617,33 @@ class AgentAssetService(
|
||||
],
|
||||
action="edit_rule_spreadsheet",
|
||||
)
|
||||
working_versions = {
|
||||
item.id: self._resolve_working_version(item) for item in assets
|
||||
}
|
||||
working_versions = {item.id: self._resolve_working_version(item) for item in assets}
|
||||
version_counts: dict[str, int] = defaultdict(int)
|
||||
modified_by: dict[str, str | None] = {item.id: None for item in assets}
|
||||
published_versions = {item.id: self._resolve_published_version(item) for item in assets}
|
||||
published_by: dict[str, str | None] = {}
|
||||
published_at: dict[str, datetime | None] = {}
|
||||
spreadsheet_edit_counts: dict[str, int] = defaultdict(int)
|
||||
spreadsheet_last_actor: dict[str, str | None] = {}
|
||||
spreadsheet_last_changed_at: dict[str, datetime] = {}
|
||||
|
||||
for version in versions:
|
||||
version_counts[version.asset_id] += 1
|
||||
if (
|
||||
modified_by.get(version.asset_id) is None
|
||||
and version.version == working_versions.get(version.asset_id)
|
||||
):
|
||||
if modified_by.get(
|
||||
version.asset_id
|
||||
) is None and version.version == working_versions.get(version.asset_id):
|
||||
modified_by[version.asset_id] = version.created_by
|
||||
|
||||
for review in reviews:
|
||||
if review.asset_id in published_at:
|
||||
continue
|
||||
if review.version != published_versions.get(review.asset_id):
|
||||
continue
|
||||
if review.review_status != AgentReviewStatus.APPROVED.value:
|
||||
continue
|
||||
published_by[review.asset_id] = review.reviewer
|
||||
published_at[review.asset_id] = review.reviewed_at or review.created_at
|
||||
|
||||
for log in spreadsheet_logs:
|
||||
spreadsheet_edit_counts[log.resource_id] += 1
|
||||
last_changed_at = spreadsheet_last_changed_at.get(log.resource_id)
|
||||
@@ -652,6 +669,8 @@ class AgentAssetService(
|
||||
and spreadsheet_last_actor.get(item.id)
|
||||
else modified_by.get(item.id)
|
||||
),
|
||||
"published_by": published_by.get(item.id),
|
||||
"published_at": published_at.get(item.id),
|
||||
}
|
||||
for item in assets
|
||||
}
|
||||
@@ -663,9 +682,11 @@ class AgentAssetService(
|
||||
) -> AgentAssetListItem:
|
||||
payload = AgentAssetListItem.model_validate(asset).model_dump()
|
||||
payload["change_count"] = int((version_stats or {}).get("change_count") or 0)
|
||||
payload["modified_by"] = (
|
||||
str((version_stats or {}).get("modified_by") or "").strip() or None
|
||||
payload["modified_by"] = str((version_stats or {}).get("modified_by") or "").strip() or None
|
||||
payload["published_by"] = (
|
||||
str((version_stats or {}).get("published_by") or "").strip() or None
|
||||
)
|
||||
payload["published_at"] = (version_stats or {}).get("published_at")
|
||||
return AgentAssetListItem.model_validate(payload)
|
||||
|
||||
@staticmethod
|
||||
|
||||
@@ -2,7 +2,7 @@ from __future__ import annotations
|
||||
|
||||
import threading
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy import inspect, select, text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.config import get_settings
|
||||
@@ -75,6 +75,7 @@ class AgentFoundationService(
|
||||
try:
|
||||
Base.metadata.create_all(bind=self.db.get_bind())
|
||||
self._ensure_agent_asset_schema()
|
||||
self._ensure_financial_record_schema()
|
||||
self._seed_agent_assets()
|
||||
self._sync_demo_financial_records()
|
||||
self._seed_runs_and_logs()
|
||||
@@ -88,6 +89,36 @@ class AgentFoundationService(
|
||||
bind = self.db.get_bind()
|
||||
return str(getattr(bind, "url", "") or id(bind))
|
||||
|
||||
def _ensure_financial_record_schema(self) -> None:
|
||||
bind = self.db.get_bind()
|
||||
inspector = inspect(bind)
|
||||
if "expense_claims" not in inspector.get_table_names():
|
||||
return
|
||||
|
||||
column_names = {column["name"] for column in inspector.get_columns("expense_claims")}
|
||||
dialect_name = bind.dialect.name
|
||||
timestamp_type = "TIMESTAMP WITH TIME ZONE" if dialect_name == "postgresql" else "DATETIME"
|
||||
boolean_default = "FALSE" if dialect_name == "postgresql" else "0"
|
||||
|
||||
if "hermes_scanned_at" not in column_names:
|
||||
self.db.execute(
|
||||
text(f"ALTER TABLE expense_claims ADD COLUMN hermes_scanned_at {timestamp_type}")
|
||||
)
|
||||
if "hermes_risk_flag" not in column_names:
|
||||
self.db.execute(
|
||||
text(
|
||||
"ALTER TABLE expense_claims "
|
||||
f"ADD COLUMN hermes_risk_flag BOOLEAN DEFAULT {boolean_default} NOT NULL"
|
||||
)
|
||||
)
|
||||
self.db.execute(
|
||||
text(
|
||||
"CREATE INDEX IF NOT EXISTS ix_expense_claims_hermes_risk_flag "
|
||||
"ON expense_claims (hermes_risk_flag)"
|
||||
)
|
||||
)
|
||||
self.db.flush()
|
||||
|
||||
def _sync_demo_financial_records(self) -> None:
|
||||
if get_settings().seed_demo_financial_records:
|
||||
self._seed_financial_records()
|
||||
|
||||
@@ -651,7 +651,11 @@ class EmployeeService:
|
||||
column_names = {column["name"] for column in inspector.get_columns("employees")}
|
||||
if "password_hash" not in column_names:
|
||||
self.db.execute(text("ALTER TABLE employees ADD COLUMN password_hash VARCHAR(255)"))
|
||||
self.db.flush()
|
||||
if "compliance_score" not in column_names:
|
||||
self.db.execute(
|
||||
text("ALTER TABLE employees ADD COLUMN compliance_score INTEGER DEFAULT 100 NOT NULL")
|
||||
)
|
||||
self.db.flush()
|
||||
|
||||
def _seed_employee_history(self, employee: Employee, definition: dict[str, Any]) -> None:
|
||||
existing_keys = {
|
||||
|
||||
@@ -141,6 +141,10 @@ EXPENSE_TYPE_KEYWORD_GROUPS: tuple[tuple[str, str, tuple[str, ...]], ...] = (
|
||||
"办公用品",
|
||||
"办公耗材",
|
||||
"办公设备",
|
||||
"采购",
|
||||
"集中采购",
|
||||
"物资采购",
|
||||
"办公采购",
|
||||
"办公",
|
||||
"文具",
|
||||
"耗材",
|
||||
|
||||
104
server/src/app/services/hermes_expense_report.py
Normal file
104
server/src/app/services/hermes_expense_report.py
Normal file
@@ -0,0 +1,104 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import func, select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.logging import get_logger
|
||||
from app.models.financial_record import ExpenseClaim
|
||||
from app.services.runtime_chat import RuntimeChatService
|
||||
|
||||
logger = get_logger("app.services.hermes_expense_report")
|
||||
|
||||
|
||||
class HermesExpenseReportService:
|
||||
def __init__(self, db: Session) -> None:
|
||||
self.db = db
|
||||
self.chat_service = RuntimeChatService(db)
|
||||
|
||||
def generate_weekly_report(self, log_id: str | None = None) -> None:
|
||||
logger.info("Starting Hermes weekly expense report generation...")
|
||||
|
||||
# 1. 聚合数据
|
||||
aggregated_data = self._aggregate_recent_expenses(days=7)
|
||||
if not aggregated_data.get("total_amount"):
|
||||
logger.info("No expense data in the last 7 days. Skipping report.")
|
||||
return
|
||||
|
||||
# 2. 传入大模型分析
|
||||
report_markdown = self._generate_insights_with_llm(aggregated_data)
|
||||
|
||||
if not report_markdown:
|
||||
logger.warning("Failed to generate expense report from LLM.")
|
||||
return
|
||||
|
||||
# 3. 模拟发送报告
|
||||
self._deliver_report(report_markdown, log_id)
|
||||
logger.info("Hermes weekly expense report generation completed.")
|
||||
|
||||
def _aggregate_recent_expenses(self, days: int = 7) -> dict[str, Any]:
|
||||
target_date = datetime.now(timezone.utc) - timedelta(days=days)
|
||||
|
||||
# 基础过滤:最近N天且不是驳回状态的单据
|
||||
base_filter = [
|
||||
ExpenseClaim.occurred_at >= target_date,
|
||||
ExpenseClaim.status != "rejected"
|
||||
]
|
||||
|
||||
# 1. 按部门汇总
|
||||
dept_stmt = select(
|
||||
ExpenseClaim.department_name,
|
||||
func.sum(ExpenseClaim.amount).label("total")
|
||||
).where(*base_filter).group_by(ExpenseClaim.department_name)
|
||||
|
||||
dept_results = self.db.execute(dept_stmt).all()
|
||||
by_department = {row.department_name or "Unknown": float(row.total or 0) for row in dept_results}
|
||||
|
||||
# 2. 按类目汇总
|
||||
type_stmt = select(
|
||||
ExpenseClaim.expense_type,
|
||||
func.sum(ExpenseClaim.amount).label("total")
|
||||
).where(*base_filter).group_by(ExpenseClaim.expense_type)
|
||||
|
||||
type_results = self.db.execute(type_stmt).all()
|
||||
by_expense_type = {row.expense_type or "Unknown": float(row.total or 0) for row in type_results}
|
||||
|
||||
# 3. 总花费
|
||||
total_amount = sum(by_department.values())
|
||||
|
||||
return {
|
||||
"period": f"Last {days} days",
|
||||
"total_amount": total_amount,
|
||||
"by_department": by_department,
|
||||
"by_expense_type": by_expense_type
|
||||
}
|
||||
|
||||
def _generate_insights_with_llm(self, data: dict[str, Any]) -> str | None:
|
||||
system_prompt = (
|
||||
"你是公司的财务分析专家。请根据提供的最近期业务开销数据,撰写一份简洁有力的【高管费控洞察周报】。\n"
|
||||
"要求:\n"
|
||||
"1. 不要机械地罗列数字,要像人一样指出异常(例如:哪个部门花钱最多?打车费是不是异常高?)。\n"
|
||||
"2. 给出 1 条削减成本的实操建议。\n"
|
||||
"3. 纯 Markdown 格式输出,不超过 300 字。"
|
||||
)
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": f"开销统计数据:\n{json.dumps(data, ensure_ascii=False, indent=2)}"}
|
||||
]
|
||||
|
||||
response = self.chat_service.complete(
|
||||
messages,
|
||||
max_tokens=800,
|
||||
temperature=0.4
|
||||
)
|
||||
return response
|
||||
|
||||
def _deliver_report(self, report_markdown: str, log_id: str | None) -> None:
|
||||
# TODO: 未来在这里接入企微/钉钉机器人或邮件发送接口
|
||||
logger.info(f"\n================ Hermes Weekly Report [LogID: {log_id}] ================\n"
|
||||
f"{report_markdown}\n"
|
||||
f"==========================================================================")
|
||||
135
server/src/app/services/hermes_risk_scanner.py
Normal file
135
server/src/app/services/hermes_risk_scanner.py
Normal file
@@ -0,0 +1,135 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import or_, select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.logging import get_logger
|
||||
from app.models.financial_record import ExpenseClaim
|
||||
from app.models.hermes_config import HermesTaskExecutionLog
|
||||
from app.models.hermes_report import HermesRiskReport
|
||||
from app.services.runtime_chat import RuntimeChatService
|
||||
|
||||
logger = get_logger("app.services.hermes_risk_scanner")
|
||||
|
||||
class HermesRiskScannerService:
|
||||
def __init__(self, db: Session) -> None:
|
||||
self.db = db
|
||||
self.chat_service = RuntimeChatService(db)
|
||||
|
||||
def scan_global_risks(self, log_id: str | None = None) -> None:
|
||||
logger.info("Starting global risk scan for Hermes...")
|
||||
|
||||
# 1. Fetch unscanned claims
|
||||
claims = self._fetch_unscanned_claims()
|
||||
if not claims:
|
||||
logger.info("No unscanned claims found. Aborting scan.")
|
||||
return
|
||||
|
||||
logger.info(f"Fetched {len(claims)} claims to analyze.")
|
||||
|
||||
# 2. Extract context for LLM
|
||||
claims_context = []
|
||||
for c in claims:
|
||||
claims_context.append({
|
||||
"claim_id": c.id,
|
||||
"claim_no": c.claim_no,
|
||||
"employee_name": c.employee_name,
|
||||
"department_name": c.department_name,
|
||||
"expense_type": c.expense_type,
|
||||
"location": c.location,
|
||||
"amount": float(c.amount),
|
||||
"occurred_at": str(c.occurred_at) if c.occurred_at else None,
|
||||
"reason": c.reason,
|
||||
})
|
||||
|
||||
# 3. Analyze with LLM
|
||||
risk_results = self._analyze_claims_with_llm(claims_context)
|
||||
|
||||
# 4. Process and persist results
|
||||
detected_risk_count = 0
|
||||
if risk_results:
|
||||
for risk in risk_results:
|
||||
claim_ids = risk.get("claim_ids", [])
|
||||
if not claim_ids:
|
||||
continue
|
||||
|
||||
detected_risk_count += 1
|
||||
for cid in claim_ids:
|
||||
report = HermesRiskReport(
|
||||
claim_id=cid,
|
||||
execution_log_id=log_id,
|
||||
risk_level=risk.get("risk_level", "medium"),
|
||||
risk_type=risk.get("risk_type", "unknown"),
|
||||
risk_description=risk.get("description", "No description provided"),
|
||||
related_claim_ids=claim_ids,
|
||||
)
|
||||
self.db.add(report)
|
||||
|
||||
# Update claim flags
|
||||
claim_obj = next((c for c in claims if c.id == cid), None)
|
||||
if claim_obj:
|
||||
claim_obj.hermes_risk_flag = True
|
||||
|
||||
# 5. Mark all as scanned
|
||||
now = datetime.now(timezone.utc)
|
||||
for c in claims:
|
||||
c.hermes_scanned_at = now
|
||||
|
||||
self.db.commit()
|
||||
logger.info(f"Hermes risk scan completed. Found {detected_risk_count} risks.")
|
||||
|
||||
def _fetch_unscanned_claims(self) -> list[ExpenseClaim]:
|
||||
stmt = select(ExpenseClaim).where(
|
||||
ExpenseClaim.status.in_(["draft", "submitted", "review"]),
|
||||
or_(
|
||||
ExpenseClaim.hermes_scanned_at.is_(None),
|
||||
ExpenseClaim.hermes_risk_flag.is_(False) # only rescan if it has no flags yet
|
||||
)
|
||||
).limit(50) # Batch size to prevent Token overflow
|
||||
|
||||
return list(self.db.scalars(stmt).all())
|
||||
|
||||
def _analyze_claims_with_llm(self, claims_context: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
system_prompt = (
|
||||
"你是 X-Financial 的 Hermes 内控审计智能体。请分析以下近期的报销单数据集合,寻找以下潜在风险:\n"
|
||||
"1. 拆单行为 (split_billing):同一人在相邻日期针对同一类目/商户提交多笔恰好贴近免审额度的小额单据。\n"
|
||||
"2. 群体合谋 (collusion):不同部门的员工在同一天去同一家非标准酒店类偏僻商户高额消费。\n"
|
||||
"3. 异常频次 (frequency_anomaly):某员工在短时间内的打车或招待频次极度不合理。\n"
|
||||
"请严格以 JSON 数组格式返回结果,如果没有风险返回空数组 `[]`。\n"
|
||||
"JSON 格式要求:\n"
|
||||
"[\n"
|
||||
" {\n"
|
||||
' "risk_type": "split_billing",\n'
|
||||
' "risk_level": "high",\n'
|
||||
' "claim_ids": ["uuid-1", "uuid-2"],\n'
|
||||
' "description": "详细推理过程,为什么判定为拆单。"\n'
|
||||
" }\n"
|
||||
"]\n"
|
||||
)
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": json.dumps(claims_context, ensure_ascii=False, indent=2)}
|
||||
]
|
||||
|
||||
response_text = self.chat_service.complete(
|
||||
messages,
|
||||
max_tokens=1500,
|
||||
temperature=0.1
|
||||
)
|
||||
|
||||
if not response_text:
|
||||
logger.warning("LLM returned empty response for risk scan.")
|
||||
return []
|
||||
|
||||
# Clean markdown formatting if present
|
||||
cleaned_text = response_text.replace("```json", "").replace("```", "").strip()
|
||||
try:
|
||||
return json.loads(cleaned_text)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Failed to parse LLM risk scan response as JSON: {e}\nResponse: {response_text}")
|
||||
return []
|
||||
131
server/src/app/services/hermes_scheduler.py
Normal file
131
server/src/app/services/hermes_scheduler.py
Normal file
@@ -0,0 +1,131 @@
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
import traceback
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.logging import get_logger
|
||||
from app.db.session import get_session_factory
|
||||
from app.models.hermes_config import HermesTaskConfig, HermesTaskExecutionLog
|
||||
from app.services.hermes_risk_scanner import HermesRiskScannerService
|
||||
from app.services.hermes_expense_report import HermesExpenseReportService
|
||||
|
||||
logger = get_logger("app.services.hermes_scheduler")
|
||||
|
||||
|
||||
class HermesScheduler:
|
||||
def __init__(self) -> None:
|
||||
self._stop_event = threading.Event()
|
||||
self._thread: threading.Thread | None = None
|
||||
self._lock = threading.Lock()
|
||||
self.session_factory = get_session_factory()
|
||||
|
||||
def start(self) -> None:
|
||||
with self._lock:
|
||||
if self._thread is not None and self._thread.is_alive():
|
||||
return
|
||||
self._stop_event.clear()
|
||||
self._thread = threading.Thread(
|
||||
target=self._run_loop,
|
||||
name="hermes-agent-scheduler",
|
||||
daemon=True,
|
||||
)
|
||||
self._thread.start()
|
||||
logger.info("Hermes Agent Scheduler started.")
|
||||
|
||||
def shutdown(self) -> None:
|
||||
with self._lock:
|
||||
thread = self._thread
|
||||
self._thread = None
|
||||
self._stop_event.set()
|
||||
if thread is not None and thread.is_alive():
|
||||
thread.join(timeout=3)
|
||||
logger.info("Hermes Agent Scheduler stopped.")
|
||||
|
||||
def _run_loop(self) -> None:
|
||||
logger.info("Hermes background loop is now active. Polling interval: 60s.")
|
||||
while not self._stop_event.is_set():
|
||||
try:
|
||||
self._check_and_run_tasks()
|
||||
except Exception as e:
|
||||
logger.error(f"Error in Hermes run loop: {e}", exc_info=True)
|
||||
|
||||
# 睡眠一分钟,每分钟轮询一次
|
||||
if self._stop_event.wait(60.0):
|
||||
break
|
||||
|
||||
def _check_and_run_tasks(self) -> None:
|
||||
db = self.session_factory()
|
||||
try:
|
||||
# 获取所有启用的任务配置
|
||||
stmt = select(HermesTaskConfig).where(HermesTaskConfig.is_enabled == True)
|
||||
configs = db.scalars(stmt).all()
|
||||
|
||||
for config in configs:
|
||||
if self._should_run_now(db, config):
|
||||
self._execute_task(db, config)
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
def _should_run_now(self, db: Session, config: HermesTaskConfig) -> bool:
|
||||
# 简单策略:检查是否在过去24小时内运行过。
|
||||
# 如果没有 croniter 库,我们暂时采用按天执行的简化逻辑
|
||||
stmt = select(HermesTaskExecutionLog).where(
|
||||
HermesTaskExecutionLog.config_id == config.id,
|
||||
HermesTaskExecutionLog.status.in_(["success", "running"])
|
||||
).order_by(HermesTaskExecutionLog.started_at.desc()).limit(1)
|
||||
|
||||
last_log = db.scalars(stmt).first()
|
||||
|
||||
if not last_log:
|
||||
return True # 从未执行过,立即执行
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
elapsed_hours = (now - last_log.started_at).total_seconds() / 3600
|
||||
|
||||
# 简化:只要距离上次成功执行超过了 23.5 小时,就认为该跑了(模拟每天跑一次)
|
||||
if elapsed_hours >= 23.5:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def _execute_task(self, db: Session, config: HermesTaskConfig) -> None:
|
||||
logger.info(f"Triggering Hermes task: {config.task_type} (Config ID: {config.id})")
|
||||
|
||||
# 创建执行日志,标记为 running
|
||||
log_record = HermesTaskExecutionLog(
|
||||
config_id=config.id,
|
||||
status="running"
|
||||
)
|
||||
db.add(log_record)
|
||||
db.commit()
|
||||
db.refresh(log_record)
|
||||
|
||||
try:
|
||||
if config.task_type == "global_risk_scan":
|
||||
scanner = HermesRiskScannerService(db)
|
||||
scanner.scan_global_risks(log_id=log_record.id)
|
||||
elif config.task_type == "weekly_expense_report":
|
||||
reporter = HermesExpenseReportService(db)
|
||||
reporter.generate_weekly_report(log_id=log_record.id)
|
||||
|
||||
log_record.status = "success"
|
||||
log_record.completed_at = datetime.now(timezone.utc)
|
||||
log_record.result_summary = "Task executed successfully."
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to execute Hermes task {config.task_type}: {e}")
|
||||
log_record.status = "failed"
|
||||
log_record.completed_at = datetime.now(timezone.utc)
|
||||
log_record.error_trace = traceback.format_exc()
|
||||
|
||||
finally:
|
||||
db.commit()
|
||||
|
||||
|
||||
# 全局单例
|
||||
hermes_scheduler = HermesScheduler()
|
||||
@@ -34,10 +34,104 @@ def _extract_docx_text(file_path: Path) -> str:
|
||||
return "当前 Word 文件解析失败。"
|
||||
|
||||
root = ElementTree.fromstring(xml_content)
|
||||
body = next((node for node in root.iter() if node.tag.endswith("}body")), root)
|
||||
blocks: list[str] = []
|
||||
|
||||
for child in body:
|
||||
if child.tag.endswith("}p"):
|
||||
paragraph = _extract_docx_paragraph_text(child)
|
||||
if paragraph:
|
||||
blocks.append(paragraph)
|
||||
continue
|
||||
if child.tag.endswith("}tbl"):
|
||||
table = _extract_docx_table_rows(child)
|
||||
rendered = _build_docx_table_markdown(table)
|
||||
if rendered:
|
||||
blocks.append(rendered)
|
||||
|
||||
if blocks:
|
||||
return "\n\n".join(blocks)
|
||||
|
||||
texts = [node.text.strip() for node in root.iter() if node.tag.endswith("}t") and node.text]
|
||||
return "\n".join(texts)
|
||||
|
||||
|
||||
def _extract_docx_paragraph_text(node: ElementTree.Element) -> str:
|
||||
parts: list[str] = []
|
||||
for child in node.iter():
|
||||
if child.tag.endswith("}t") and child.text:
|
||||
parts.append(child.text)
|
||||
elif child.tag.endswith("}tab"):
|
||||
parts.append("\t")
|
||||
elif child.tag.endswith("}br"):
|
||||
parts.append("\n")
|
||||
return _normalize_docx_cell_text("".join(parts))
|
||||
|
||||
|
||||
def _extract_docx_table_rows(table_node: ElementTree.Element) -> list[list[str]]:
|
||||
rows: list[list[str]] = []
|
||||
for row_node in table_node:
|
||||
if not row_node.tag.endswith("}tr"):
|
||||
continue
|
||||
|
||||
row: list[str] = []
|
||||
for cell_node in row_node:
|
||||
if not cell_node.tag.endswith("}tc"):
|
||||
continue
|
||||
|
||||
cell_parts = [
|
||||
_extract_docx_paragraph_text(paragraph)
|
||||
for paragraph in cell_node
|
||||
if paragraph.tag.endswith("}p")
|
||||
]
|
||||
row.append(_normalize_docx_cell_text(" ".join(part for part in cell_parts if part)))
|
||||
|
||||
if any(row):
|
||||
rows.append(row)
|
||||
return rows
|
||||
|
||||
|
||||
def _build_docx_table_markdown(rows: list[list[str]]) -> str:
|
||||
visible_rows = [
|
||||
[_escape_markdown_cell(cell) for cell in row]
|
||||
for row in rows
|
||||
if any(str(cell or "").strip() for cell in row)
|
||||
]
|
||||
if len(visible_rows) < 2:
|
||||
return ""
|
||||
|
||||
column_count = max(len(row) for row in visible_rows)
|
||||
normalized_rows = [row + [""] * (column_count - len(row)) for row in visible_rows]
|
||||
header = [
|
||||
cell or f"列{column_index + 1}" for column_index, cell in enumerate(normalized_rows[0])
|
||||
]
|
||||
body_rows = normalized_rows[1:]
|
||||
parts = [_format_markdown_table(header, body_rows)]
|
||||
|
||||
row_clues: list[str] = []
|
||||
for row_number, row in enumerate(body_rows, start=2):
|
||||
pairs = [
|
||||
f"{header[column_index]}={value}"
|
||||
for column_index, value in enumerate(row)
|
||||
if value
|
||||
]
|
||||
if pairs:
|
||||
row_clues.append(f"- 表格第 {row_number} 行:" + ";".join(pairs))
|
||||
|
||||
if row_clues:
|
||||
parts.append("### 表格行级检索线索")
|
||||
parts.extend(row_clues)
|
||||
|
||||
return "\n\n".join(parts)
|
||||
|
||||
|
||||
def _normalize_docx_cell_text(value: str) -> str:
|
||||
normalized = str(value or "").replace("\r\n", "\n").replace("\r", "\n")
|
||||
normalized = re.sub(r"[ \t]*\n[ \t]*", " ", normalized)
|
||||
normalized = re.sub(r"\s+", " ", normalized)
|
||||
return normalized.strip()
|
||||
|
||||
|
||||
def _extract_document_text_from_path(
|
||||
*,
|
||||
file_path: Path,
|
||||
|
||||
@@ -12,7 +12,7 @@ logger = get_logger("app.services.knowledge_normalizer")
|
||||
|
||||
TABLE_MARKER_PATTERN = re.compile(r"表\s*(\d+)")
|
||||
SECTION_HEADING_PATTERN = re.compile(
|
||||
r"^(第[一二三四五六七八九十百零0-9]+[章节]\s*.*|[一二三四五六七八九十]+、.*|([一二三四五六七八九十]+).*|\([一二三四五六七八九十]+\).*)$"
|
||||
r"^(第[一二三四五六七八九十百零0-9]+[部分章节]\s*.*|[一二三四五六七八九十]+、.*|([一二三四五六七八九十]+).*|\([一二三四五六七八九十]+\).*)$"
|
||||
)
|
||||
LIST_ITEM_PATTERN = re.compile(r"^[-*•]\s+.+$")
|
||||
NUMBERED_ITEM_PATTERN = re.compile(r"^(?:\d+[.)、]|[①②③④⑤⑥⑦⑧⑨⑩])\s*.+$")
|
||||
|
||||
@@ -50,6 +50,12 @@ QUERY_TERM_STOPWORDS = {
|
||||
"哪些人",
|
||||
}
|
||||
TABLE_OR_STANDARD_QUERY_HINTS = (
|
||||
"表",
|
||||
"表格",
|
||||
"清单",
|
||||
"明细",
|
||||
"目录",
|
||||
"科目",
|
||||
"标准",
|
||||
"金额",
|
||||
"限额",
|
||||
@@ -61,6 +67,20 @@ TABLE_OR_STANDARD_QUERY_HINTS = (
|
||||
"档位",
|
||||
"额度",
|
||||
)
|
||||
QUERY_ANCHOR_TERMS = (
|
||||
"财务基础知识手册",
|
||||
"基础知识手册",
|
||||
"会计科目",
|
||||
"常用会计科目",
|
||||
"财务报表",
|
||||
"主要税种",
|
||||
"税种",
|
||||
"标准",
|
||||
"清单",
|
||||
"明细",
|
||||
"流程",
|
||||
)
|
||||
GENERIC_TITLE_TERMS = {"远光软件", "股份有限", "有限公司"}
|
||||
STRUCTURED_APPENDIX_LEADING_MARKERS = (
|
||||
"# 章节导航",
|
||||
"# 重点章节摘录",
|
||||
@@ -96,6 +116,10 @@ class KnowledgeRagService:
|
||||
"message": "请先输入要检索的知识库问题。",
|
||||
}
|
||||
|
||||
rewritten_query = normalized_query
|
||||
if conversation_history:
|
||||
rewritten_query = self._rewrite_query(normalized_query, conversation_history)
|
||||
|
||||
workspace = (
|
||||
os.environ.get("LIGHTRAG_WORKSPACE", DEFAULT_LIGHTRAG_WORKSPACE).strip()
|
||||
or DEFAULT_LIGHTRAG_WORKSPACE
|
||||
@@ -103,81 +127,102 @@ class KnowledgeRagService:
|
||||
local_result = query_local_text_chunks(
|
||||
lightrag_root=(self.storage_root / "knowledge" / ".lightrag").resolve(),
|
||||
workspace=workspace,
|
||||
query=normalized_query,
|
||||
query=rewritten_query,
|
||||
limit=limit,
|
||||
)
|
||||
if local_result.confident:
|
||||
return {
|
||||
"result_type": "knowledge_search",
|
||||
"query": normalized_query,
|
||||
"record_count": len(local_result.hits),
|
||||
"hits": local_result.hits,
|
||||
"references": [
|
||||
str(item.get("code") or "").strip()
|
||||
for item in local_result.hits
|
||||
if str(item.get("code") or "").strip()
|
||||
],
|
||||
"raw_references": [],
|
||||
"metadata": {
|
||||
"retrieval_strategy": "local_text_chunks",
|
||||
"elapsed_seconds": round(local_result.elapsed_seconds, 4),
|
||||
"total_chunks": local_result.total_chunks,
|
||||
"best_score": local_result.best_score,
|
||||
},
|
||||
"message": f"已从本地知识块中检索到 {len(local_result.hits)} 条相关内容。",
|
||||
}
|
||||
|
||||
runtime_hits: list[dict[str, Any]] = []
|
||||
runtime_references: list[str] = []
|
||||
try:
|
||||
runtime = self._get_runtime()
|
||||
raw = runtime.query_data(normalized_query, conversation_history=conversation_history)
|
||||
raw = runtime.query_data(rewritten_query, conversation_history=conversation_history)
|
||||
data = raw.get("data") if isinstance(raw, dict) else {}
|
||||
chunks = list(data.get("chunks") or []) if isinstance(data, dict) else []
|
||||
entities = list(data.get("entities") or []) if isinstance(data, dict) else []
|
||||
runtime_references = list(data.get("references") or []) if isinstance(data, dict) else []
|
||||
runtime_hits = self._build_hits_from_query_data(
|
||||
query=rewritten_query,
|
||||
chunks=chunks,
|
||||
entities=entities,
|
||||
limit=limit,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("Knowledge query failed: %s", exc)
|
||||
|
||||
all_hits: dict[str, dict[str, Any]] = {}
|
||||
for hit in local_result.hits:
|
||||
hit["score"] = int(hit.get("score") or 0)
|
||||
all_hits[hit["code"]] = hit
|
||||
|
||||
for hit in runtime_hits:
|
||||
code = hit["code"]
|
||||
if code in all_hits:
|
||||
all_hits[code]["score"] = max(all_hits[code]["score"], int(hit.get("score") or 0) + 20)
|
||||
if not all_hits[code].get("tags") and hit.get("tags"):
|
||||
all_hits[code]["tags"] = hit["tags"]
|
||||
else:
|
||||
hit["score"] = int(hit.get("score") or 0)
|
||||
all_hits[code] = hit
|
||||
|
||||
merged_hits = sorted(all_hits.values(), key=lambda x: int(x.get("score") or 0), reverse=True)[:max(1, limit)]
|
||||
|
||||
if not merged_hits:
|
||||
return {
|
||||
"result_type": "knowledge_search",
|
||||
"query": normalized_query,
|
||||
"query": rewritten_query,
|
||||
"record_count": 0,
|
||||
"hits": [],
|
||||
"references": [],
|
||||
"message": f"知识库检索暂不可用:{exc}",
|
||||
}
|
||||
|
||||
data = raw.get("data") if isinstance(raw, dict) else {}
|
||||
chunks = list(data.get("chunks") or []) if isinstance(data, dict) else []
|
||||
entities = list(data.get("entities") or []) if isinstance(data, dict) else []
|
||||
references = list(data.get("references") or []) if isinstance(data, dict) else []
|
||||
hits = self._build_hits_from_query_data(
|
||||
query=normalized_query,
|
||||
chunks=chunks,
|
||||
entities=entities,
|
||||
limit=limit,
|
||||
)
|
||||
|
||||
if not hits:
|
||||
return {
|
||||
"result_type": "knowledge_search",
|
||||
"query": normalized_query,
|
||||
"record_count": 0,
|
||||
"hits": [],
|
||||
"references": [],
|
||||
"raw_references": references,
|
||||
"raw_references": runtime_references,
|
||||
"message": "当前知识库中没有检索到与本次问题直接匹配的内容。",
|
||||
}
|
||||
|
||||
return {
|
||||
"result_type": "knowledge_search",
|
||||
"query": normalized_query,
|
||||
"record_count": len(hits),
|
||||
"hits": hits,
|
||||
"query": rewritten_query,
|
||||
"record_count": len(merged_hits),
|
||||
"hits": merged_hits,
|
||||
"references": [
|
||||
str(item.get("code") or "").strip()
|
||||
for item in hits
|
||||
for item in merged_hits
|
||||
if str(item.get("code") or "").strip()
|
||||
],
|
||||
"raw_references": references,
|
||||
"metadata": raw.get("metadata") if isinstance(raw, dict) else {},
|
||||
"message": f"已从知识库中检索到 {len(hits)} 条相关内容。",
|
||||
"raw_references": runtime_references,
|
||||
"metadata": {
|
||||
"retrieval_strategy": "fusion",
|
||||
"local_total_chunks": local_result.total_chunks,
|
||||
"local_best_score": local_result.best_score,
|
||||
},
|
||||
"message": f"已从知识库中联合检索到 {len(merged_hits)} 条相关内容。",
|
||||
}
|
||||
|
||||
def _rewrite_query(self, query: str, conversation_history: list[dict[str, str]]) -> str:
|
||||
if not self.db:
|
||||
return query
|
||||
|
||||
from app.services.runtime_chat import RuntimeChatService
|
||||
try:
|
||||
chat_service = RuntimeChatService(self.db)
|
||||
messages: list[dict[str, Any]] = [{"role": "system", "content": "你是一个查询重写助手。你的任务是根据用户的多轮对话历史,将用户的最后一次提问重写为一句独立、完整的查询语句,以便于在知识库中进行向量检索。只输出重写后的句子,不要任何解释。"}]
|
||||
for msg in conversation_history[-6:]:
|
||||
messages.append({"role": msg.get("role", "user"), "content": msg.get("content", "")})
|
||||
messages.append({"role": "user", "content": f"当前提问:{query}\n\n请重写当前提问。"})
|
||||
|
||||
rewritten = chat_service.complete(
|
||||
messages,
|
||||
max_tokens=60,
|
||||
temperature=0.1,
|
||||
timeout_seconds=10,
|
||||
)
|
||||
|
||||
if rewritten and len(rewritten) > 2 and len(rewritten) < 80:
|
||||
logger.info("Query rewritten: '%s' -> '%s'", query, rewritten)
|
||||
return rewritten
|
||||
except Exception as exc:
|
||||
logger.warning("Query rewrite failed: %s", exc)
|
||||
|
||||
return query
|
||||
|
||||
def index_documents(
|
||||
self,
|
||||
*,
|
||||
@@ -686,6 +731,24 @@ def _extract_query_terms(query: str) -> list[str]:
|
||||
remember(item)
|
||||
|
||||
for block in re.findall(r"[\u4e00-\u9fff]{2,20}", normalized_query):
|
||||
for marker in ("标准", "金额", "限额", "额度"):
|
||||
marker_index = block.find(marker)
|
||||
if marker_index <= 0:
|
||||
continue
|
||||
subject = block[:marker_index]
|
||||
for width in (6, 4, 3, 2):
|
||||
remember(subject[-width:])
|
||||
for anchor in QUERY_ANCHOR_TERMS:
|
||||
if anchor in block:
|
||||
remember(anchor)
|
||||
tail = block[-14:]
|
||||
for size in (8, 7, 6, 5, 4):
|
||||
for start in range(0, len(tail) - size + 1):
|
||||
piece = tail[start : start + size]
|
||||
if any(anchor in piece for anchor in QUERY_ANCHOR_TERMS):
|
||||
remember(piece)
|
||||
if len(terms) >= MAX_QUERY_TERMS:
|
||||
return terms
|
||||
if len(block) <= 4:
|
||||
remember(block)
|
||||
continue
|
||||
@@ -715,6 +778,11 @@ def _score_knowledge_hit(
|
||||
matched_terms = [term for term in query_terms if term in haystack]
|
||||
score += len(matched_terms) * 8
|
||||
score += sum(1 for term in matched_terms if term in title) * 6
|
||||
score += sum(
|
||||
(len(term) - 3) * 12
|
||||
for term in matched_terms
|
||||
if len(term) >= 4 and term in title and term not in GENERIC_TITLE_TERMS
|
||||
)
|
||||
|
||||
leading_appendix_marker = _leading_structured_appendix_marker(content)
|
||||
if leading_appendix_marker == "# 章节导航":
|
||||
|
||||
@@ -42,6 +42,12 @@ LOCAL_QUERY_STOPWORDS = {
|
||||
"问题",
|
||||
}
|
||||
LOCAL_TABLE_QUERY_HINTS = (
|
||||
"表",
|
||||
"表格",
|
||||
"清单",
|
||||
"明细",
|
||||
"目录",
|
||||
"科目",
|
||||
"标准",
|
||||
"金额",
|
||||
"限额",
|
||||
@@ -53,6 +59,20 @@ LOCAL_TABLE_QUERY_HINTS = (
|
||||
"档位",
|
||||
"额度",
|
||||
)
|
||||
LOCAL_QUERY_ANCHOR_TERMS = (
|
||||
"财务基础知识手册",
|
||||
"基础知识手册",
|
||||
"会计科目",
|
||||
"常用会计科目",
|
||||
"财务报表",
|
||||
"主要税种",
|
||||
"税种",
|
||||
"标准",
|
||||
"清单",
|
||||
"明细",
|
||||
"流程",
|
||||
)
|
||||
LOCAL_GENERIC_TITLE_TERMS = {"远光软件", "股份有限", "有限公司"}
|
||||
LOCAL_DOMAIN_TERMS = (
|
||||
"报销",
|
||||
"费用",
|
||||
@@ -253,6 +273,8 @@ def _score_local_chunk(
|
||||
score += weight
|
||||
if term in lowered_title:
|
||||
score += max(4, weight)
|
||||
if len(term) >= 4 and term not in LOCAL_GENERIC_TITLE_TERMS:
|
||||
score += (len(term) - 3) * 12
|
||||
occurrences = lowered_content.count(term)
|
||||
if occurrences > 1:
|
||||
score += min(8, occurrences * 2)
|
||||
@@ -299,6 +321,24 @@ def _extract_local_query_terms(query: str) -> list[str]:
|
||||
remember(item)
|
||||
|
||||
for block in re.findall(r"[\u4e00-\u9fff]{2,24}", normalized_query):
|
||||
for marker in ("标准", "金额", "限额", "额度"):
|
||||
marker_index = block.find(marker)
|
||||
if marker_index <= 0:
|
||||
continue
|
||||
subject = block[:marker_index]
|
||||
for width in (6, 4, 3, 2):
|
||||
remember(subject[-width:])
|
||||
for anchor in LOCAL_QUERY_ANCHOR_TERMS:
|
||||
if anchor in block:
|
||||
remember(anchor)
|
||||
tail = block[-14:]
|
||||
for size in (8, 7, 6, 5, 4):
|
||||
for start in range(0, len(tail) - size + 1):
|
||||
piece = tail[start : start + size]
|
||||
if any(anchor in piece for anchor in LOCAL_QUERY_ANCHOR_TERMS):
|
||||
remember(piece)
|
||||
if len(terms) >= MAX_LOCAL_QUERY_TERMS:
|
||||
return terms
|
||||
if len(block) <= 4:
|
||||
remember(block)
|
||||
continue
|
||||
|
||||
@@ -102,7 +102,7 @@ class SemanticOntologyService(
|
||||
context_json = payload.context_json or {}
|
||||
reference = self._load_reference_catalog()
|
||||
compact_query = self._compact(query)
|
||||
entities = self._extract_entities(query, compact_query, reference)
|
||||
entities = self._extract_entities(query, compact_query, reference, context_json=context_json)
|
||||
rule_scenario, scenario_score = self._detect_scenario(compact_query)
|
||||
time_range, _time_score = self._extract_time_range(
|
||||
query,
|
||||
@@ -111,9 +111,14 @@ class SemanticOntologyService(
|
||||
)
|
||||
session_scenario = self._resolve_session_type_scenario(context_json)
|
||||
context_scenario = self._resolve_context_scenario(context_json)
|
||||
application_context = self._is_expense_application_context(context_json)
|
||||
application_query = self._looks_like_expense_application(compact_query)
|
||||
if session_scenario == "knowledge":
|
||||
rule_scenario = "knowledge"
|
||||
scenario_score = max(scenario_score, 0.34)
|
||||
if session_scenario != "knowledge" and (application_context or application_query):
|
||||
rule_scenario = "expense"
|
||||
scenario_score = max(scenario_score, 0.22)
|
||||
if rule_scenario == "unknown" and context_scenario is not None:
|
||||
rule_scenario = context_scenario
|
||||
scenario_score = max(scenario_score, 0.14)
|
||||
@@ -138,6 +143,9 @@ class SemanticOntologyService(
|
||||
entities=entities,
|
||||
time_range=time_range,
|
||||
)
|
||||
if session_scenario != "knowledge" and (application_context or application_query):
|
||||
rule_intent = "draft"
|
||||
intent_score = max(intent_score, 0.22)
|
||||
if session_scenario != "knowledge" and self._should_inherit_expense_draft(
|
||||
compact_query,
|
||||
scenario=rule_scenario,
|
||||
|
||||
@@ -20,6 +20,8 @@ from app.services.ontology_rules import (
|
||||
COMPARE_KEYWORDS,
|
||||
DRAFT_FOLLOW_UP_KEYWORDS,
|
||||
DRAFT_KEYWORDS,
|
||||
EXPENSE_APPLICATION_CONTEXT_TYPES,
|
||||
EXPENSE_APPLICATION_KEYWORDS,
|
||||
EXPENSE_NARRATIVE_KEYWORDS,
|
||||
EXPENSE_REVIEW_ACTIONS,
|
||||
EXPLAIN_KEYWORDS,
|
||||
@@ -71,6 +73,21 @@ EXPLICIT_ENTERTAINMENT_KEYWORDS = (
|
||||
|
||||
|
||||
class OntologyDetectionMixin:
|
||||
@staticmethod
|
||||
def _is_expense_application_context(context_json: dict[str, Any]) -> bool:
|
||||
document_type = str(context_json.get("document_type") or "").strip()
|
||||
application_stage = str(context_json.get("application_stage") or "").strip()
|
||||
entry_source = str(context_json.get("entry_source") or "").strip()
|
||||
return (
|
||||
document_type in EXPENSE_APPLICATION_CONTEXT_TYPES
|
||||
or application_stage in EXPENSE_APPLICATION_CONTEXT_TYPES
|
||||
or entry_source in {"documents_application", "expense_application"}
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _looks_like_expense_application(compact_query: str) -> bool:
|
||||
return any(keyword in compact_query for keyword in EXPENSE_APPLICATION_KEYWORDS)
|
||||
|
||||
def _detect_scenario(self, compact_query: str) -> tuple[str, float]:
|
||||
scores = {key: 0.0 for key in SCENARIO_KEYWORDS}
|
||||
for scenario, keywords in SCENARIO_KEYWORDS.items():
|
||||
@@ -341,6 +358,9 @@ class OntologyDetectionMixin:
|
||||
"conversation_id": payload.context_json.get("conversation_id"),
|
||||
"conversation_scenario": payload.context_json.get("conversation_scenario"),
|
||||
"conversation_intent": payload.context_json.get("conversation_intent"),
|
||||
"document_type": payload.context_json.get("document_type"),
|
||||
"application_stage": payload.context_json.get("application_stage"),
|
||||
"application_fields": payload.context_json.get("application_fields"),
|
||||
"draft_claim_id": payload.context_json.get("draft_claim_id"),
|
||||
"review_action": payload.context_json.get("review_action"),
|
||||
"review_form_values": payload.context_json.get("review_form_values"),
|
||||
|
||||
@@ -18,7 +18,12 @@ from app.services.ontology_rules import (
|
||||
DATE_RANGE_PATTERN,
|
||||
EXPLICIT_DATE_PATTERN,
|
||||
EXPLICIT_MONTH_PATTERN,
|
||||
EXPENSE_APPLICATION_ATTACHMENT_REQUIRED_TYPES,
|
||||
EXPENSE_APPLICATION_CONTEXT_TYPES,
|
||||
EXPENSE_APPLICATION_KEYWORDS,
|
||||
EXPENSE_APPLICATION_REQUIRED_SLOT_KEYS,
|
||||
EXPENSE_TYPE_KEYWORDS,
|
||||
GENERIC_EXPENSE_APPLICATION_PROMPTS,
|
||||
GENERIC_EXPENSE_PROMPTS,
|
||||
LOCATION_KEYWORDS,
|
||||
MONTH_DAY_PATTERN,
|
||||
@@ -30,6 +35,21 @@ from app.services.ontology_rules import (
|
||||
|
||||
|
||||
class OntologyExtractionMixin:
|
||||
@staticmethod
|
||||
def _is_expense_application_context_value(context_json: dict[str, Any]) -> bool:
|
||||
document_type = str(context_json.get("document_type") or "").strip()
|
||||
application_stage = str(context_json.get("application_stage") or "").strip()
|
||||
entry_source = str(context_json.get("entry_source") or "").strip()
|
||||
return (
|
||||
document_type in EXPENSE_APPLICATION_CONTEXT_TYPES
|
||||
or application_stage in EXPENSE_APPLICATION_CONTEXT_TYPES
|
||||
or entry_source in {"documents_application", "expense_application"}
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _has_expense_application_signal(compact_query: str) -> bool:
|
||||
return any(keyword in compact_query for keyword in EXPENSE_APPLICATION_KEYWORDS)
|
||||
|
||||
def _infer_default_missing_slots(
|
||||
self,
|
||||
compact_query: str,
|
||||
@@ -46,6 +66,44 @@ class OntologyExtractionMixin:
|
||||
entity_types = {item.type for item in entities}
|
||||
attachment_count = int(context_json.get("attachment_count") or 0)
|
||||
missing_slots: list[str] = []
|
||||
application_mode = (
|
||||
self._is_expense_application_context_value(context_json)
|
||||
or self._has_expense_application_signal(compact_query)
|
||||
or any(
|
||||
item.type == "document_type" and item.normalized_value == "expense_application"
|
||||
for item in entities
|
||||
)
|
||||
)
|
||||
|
||||
if application_mode:
|
||||
form_values = context_json.get("review_form_values")
|
||||
if not isinstance(form_values, dict):
|
||||
form_values = {}
|
||||
expense_type_codes = {
|
||||
str(item.normalized_value or item.value or "").strip()
|
||||
for item in entities
|
||||
if item.type == "expense_type"
|
||||
}
|
||||
if "expense_type" not in entity_types and not str(form_values.get("expense_type") or "").strip():
|
||||
missing_slots.append("expense_type")
|
||||
if "amount" not in entity_types and not str(form_values.get("amount") or "").strip():
|
||||
missing_slots.append("amount")
|
||||
if not time_range.start_date and not (
|
||||
str(form_values.get("time_range") or form_values.get("business_time") or "").strip()
|
||||
):
|
||||
missing_slots.append("time_range")
|
||||
reason_value = str(
|
||||
form_values.get("reason")
|
||||
or form_values.get("business_reason")
|
||||
or form_values.get("reason_value")
|
||||
or ""
|
||||
).strip()
|
||||
if not reason_value and compact_query in GENERIC_EXPENSE_APPLICATION_PROMPTS:
|
||||
missing_slots.append("reason")
|
||||
if attachment_count <= 0 and expense_type_codes & EXPENSE_APPLICATION_ATTACHMENT_REQUIRED_TYPES:
|
||||
missing_slots.append("attachments")
|
||||
ordered_keys = [*EXPENSE_APPLICATION_REQUIRED_SLOT_KEYS, "attachments"]
|
||||
return [item for item in ordered_keys if item in missing_slots]
|
||||
|
||||
if self._is_generic_expense_prompt(compact_query):
|
||||
if "expense_type" not in entity_types:
|
||||
@@ -98,14 +156,40 @@ class OntologyExtractionMixin:
|
||||
query: str,
|
||||
compact_query: str,
|
||||
reference: ReferenceCatalog,
|
||||
*,
|
||||
context_json: dict[str, Any] | None = None,
|
||||
) -> list[OntologyEntity]:
|
||||
entities: dict[tuple[str, str], OntologyEntity] = {}
|
||||
context_json = context_json or {}
|
||||
|
||||
def upsert(entity: OntologyEntity) -> None:
|
||||
key = (entity.type, entity.normalized_value)
|
||||
if key not in entities:
|
||||
entities[key] = entity
|
||||
|
||||
if (
|
||||
self._is_expense_application_context_value(context_json)
|
||||
or self._has_expense_application_signal(compact_query)
|
||||
):
|
||||
upsert(
|
||||
self._make_entity(
|
||||
"document_type",
|
||||
"费用申请",
|
||||
"expense_application",
|
||||
role="target",
|
||||
confidence=0.94,
|
||||
)
|
||||
)
|
||||
upsert(
|
||||
self._make_entity(
|
||||
"workflow_stage",
|
||||
"前置申请",
|
||||
"pre_approval",
|
||||
role="target",
|
||||
confidence=0.9,
|
||||
)
|
||||
)
|
||||
|
||||
for match in re.finditer(r"客户\s*([A-Za-z0-9一二三四五六七八九十]+)", query):
|
||||
suffix = match.group(1).strip()
|
||||
normalized = f"客户{suffix}".replace(" ", "")
|
||||
@@ -510,6 +594,8 @@ class OntologyExtractionMixin:
|
||||
"project",
|
||||
"location",
|
||||
"expense_type",
|
||||
"document_type",
|
||||
"workflow_stage",
|
||||
}:
|
||||
upsert(
|
||||
OntologyConstraint(
|
||||
|
||||
@@ -173,6 +173,49 @@ GENERIC_EXPENSE_PROMPTS = {
|
||||
"发起报销",
|
||||
"提交报销",
|
||||
}
|
||||
EXPENSE_APPLICATION_CONTEXT_TYPES = {
|
||||
"expense_application",
|
||||
"application",
|
||||
"pre_approval",
|
||||
"preapproval",
|
||||
}
|
||||
EXPENSE_APPLICATION_KEYWORDS = (
|
||||
"费用申请",
|
||||
"申请单",
|
||||
"发起申请",
|
||||
"提交申请",
|
||||
"提出申请",
|
||||
"前置申请",
|
||||
"报销申请",
|
||||
"申请报销",
|
||||
"差旅申请",
|
||||
"出差申请",
|
||||
"会务申请",
|
||||
"会议申请",
|
||||
"采购申请",
|
||||
"培训申请",
|
||||
"预算申请",
|
||||
)
|
||||
GENERIC_EXPENSE_APPLICATION_PROMPTS = {
|
||||
"申请",
|
||||
"费用申请",
|
||||
"发起申请",
|
||||
"提交申请",
|
||||
"提出申请",
|
||||
"申请报销",
|
||||
"报销申请",
|
||||
}
|
||||
EXPENSE_APPLICATION_REQUIRED_SLOT_KEYS = (
|
||||
"expense_type",
|
||||
"amount",
|
||||
"time_range",
|
||||
"reason",
|
||||
)
|
||||
EXPENSE_APPLICATION_ATTACHMENT_REQUIRED_TYPES = {
|
||||
"meeting",
|
||||
"office",
|
||||
"training",
|
||||
}
|
||||
MISSING_SLOT_LABELS = {
|
||||
"expense_type": "费用类型",
|
||||
"amount": "金额",
|
||||
|
||||
@@ -14,6 +14,7 @@ from app.schemas.agent_asset import AgentAssetRiskRuleGenerateRequest
|
||||
from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
|
||||
from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY
|
||||
from app.services.audit import AuditLogService
|
||||
from app.services.expense_type_keywords import EXPENSE_TYPE_LABEL_BY_CODE
|
||||
from app.services.risk_rule_flow_diagram import (
|
||||
RiskRuleFlowDiagramField,
|
||||
RiskRuleFlowDiagramRenderer,
|
||||
@@ -43,6 +44,24 @@ RISK_LEVEL_LABELS: dict[str, str] = {
|
||||
"high": "高风险",
|
||||
}
|
||||
|
||||
EXPENSE_RISK_CATEGORY_CODES: tuple[str, ...] = (
|
||||
"travel",
|
||||
"hotel",
|
||||
"transport",
|
||||
"meal",
|
||||
"meeting",
|
||||
"office",
|
||||
"training",
|
||||
"communication",
|
||||
"welfare",
|
||||
)
|
||||
EXPENSE_RISK_CATEGORY_LABELS: dict[str, str] = {
|
||||
code: EXPENSE_TYPE_LABEL_BY_CODE[code] for code in EXPENSE_RISK_CATEGORY_CODES
|
||||
}
|
||||
EXPENSE_RISK_CATEGORY_ALIASES = {
|
||||
"entertainment": "meal",
|
||||
}
|
||||
|
||||
FIELD_ONTOLOGY: tuple[RiskRuleField, ...] = (
|
||||
RiskRuleField("claim.reason", "报销事由", "text", "claim", ("事由", "说明", "理由", "用途")),
|
||||
RiskRuleField(
|
||||
@@ -156,17 +175,23 @@ class RiskRuleGenerationService:
|
||||
risk_level = str(body.risk_level or "medium").strip().lower()
|
||||
if risk_level not in RISK_LEVEL_LABELS:
|
||||
raise ValueError("风险等级仅支持 low、medium、high。")
|
||||
requires_attachment = bool(body.requires_attachment)
|
||||
expense_category = self._normalize_expense_category(body.expense_category, domain)
|
||||
expense_category_label = EXPENSE_RISK_CATEGORY_LABELS.get(expense_category or "", "")
|
||||
|
||||
created_at = datetime.now(UTC)
|
||||
fields = self._resolve_fields(natural_language, domain=domain)
|
||||
draft = self._compile_with_model(
|
||||
natural_language=natural_language,
|
||||
domain=domain,
|
||||
expense_category=expense_category,
|
||||
expense_category_label=expense_category_label,
|
||||
risk_level=risk_level,
|
||||
fields=fields,
|
||||
) or self._build_fallback_draft(
|
||||
natural_language=natural_language,
|
||||
domain=domain,
|
||||
expense_category_label=expense_category_label,
|
||||
risk_level=risk_level,
|
||||
fields=fields,
|
||||
)
|
||||
@@ -179,10 +204,13 @@ class RiskRuleGenerationService:
|
||||
draft,
|
||||
natural_language=natural_language,
|
||||
domain=domain,
|
||||
expense_category=expense_category,
|
||||
expense_category_label=expense_category_label,
|
||||
risk_level=risk_level,
|
||||
fields=fields,
|
||||
created_at=created_at,
|
||||
actor=actor,
|
||||
requires_attachment=requires_attachment,
|
||||
)
|
||||
rule_code = str(payload["rule_code"])
|
||||
file_name = f"{rule_code}.json"
|
||||
@@ -209,8 +237,11 @@ class RiskRuleGenerationService:
|
||||
config_json={
|
||||
"severity": risk_level,
|
||||
"enabled": True,
|
||||
"requires_attachment": requires_attachment,
|
||||
"tag": "风险规则",
|
||||
"detail_mode": "json_risk",
|
||||
"expense_category": expense_category,
|
||||
"expense_category_label": expense_category_label,
|
||||
"risk_category": payload.get("risk_category"),
|
||||
"rule_library": RISK_RULES_LIBRARY,
|
||||
"rule_document": {
|
||||
@@ -241,7 +272,13 @@ class RiskRuleGenerationService:
|
||||
resource_type=AgentAssetType.RULE.value,
|
||||
resource_id=asset.id,
|
||||
before_json=None,
|
||||
after_json={"rule_code": rule_code, "risk_level": risk_level, "domain": domain},
|
||||
after_json={
|
||||
"rule_code": rule_code,
|
||||
"risk_level": risk_level,
|
||||
"domain": domain,
|
||||
"expense_category": expense_category,
|
||||
"requires_attachment": requires_attachment,
|
||||
},
|
||||
request_id=request_id,
|
||||
)
|
||||
self.db.refresh(asset)
|
||||
@@ -252,6 +289,8 @@ class RiskRuleGenerationService:
|
||||
*,
|
||||
natural_language: str,
|
||||
domain: str,
|
||||
expense_category: str | None,
|
||||
expense_category_label: str,
|
||||
risk_level: str,
|
||||
fields: list[RiskRuleField],
|
||||
) -> dict[str, Any] | None:
|
||||
@@ -279,6 +318,8 @@ class RiskRuleGenerationService:
|
||||
{
|
||||
"business_domain": domain,
|
||||
"business_domain_label": BUSINESS_DOMAIN_LABELS[domain],
|
||||
"expense_category": expense_category,
|
||||
"expense_category_label": expense_category_label,
|
||||
"risk_level": risk_level,
|
||||
"risk_level_label": RISK_LEVEL_LABELS[risk_level],
|
||||
"natural_language": natural_language,
|
||||
@@ -370,6 +411,7 @@ class RiskRuleGenerationService:
|
||||
*,
|
||||
natural_language: str,
|
||||
domain: str,
|
||||
expense_category_label: str,
|
||||
risk_level: str,
|
||||
fields: list[RiskRuleField],
|
||||
) -> dict[str, Any]:
|
||||
@@ -381,8 +423,9 @@ class RiskRuleGenerationService:
|
||||
fields=fields,
|
||||
)
|
||||
name = self._infer_rule_name(natural_language)
|
||||
business_label = expense_category_label or BUSINESS_DOMAIN_LABELS[domain]
|
||||
description = (
|
||||
f"当{BUSINESS_DOMAIN_LABELS[domain]}业务满足“{natural_language}”时,系统会按"
|
||||
f"当{business_label}业务满足“{natural_language}”时,系统会按"
|
||||
f"{RISK_LEVEL_LABELS[risk_level]}进行提示,并要求经办人或审核人补充核对依据。"
|
||||
)
|
||||
return {
|
||||
@@ -393,7 +436,7 @@ class RiskRuleGenerationService:
|
||||
"condition_summary": condition_summary,
|
||||
"keywords": self._infer_keywords(natural_language),
|
||||
"flow": {
|
||||
"start": f"{BUSINESS_DOMAIN_LABELS[domain]}单据提交",
|
||||
"start": f"{business_label}单据提交",
|
||||
"evidence": "读取" + "、".join(item.label for item in fields[:3]),
|
||||
"decision": condition_summary,
|
||||
"pass": "未命中风险,继续业务流转",
|
||||
@@ -407,14 +450,18 @@ class RiskRuleGenerationService:
|
||||
*,
|
||||
natural_language: str,
|
||||
domain: str,
|
||||
expense_category: str | None,
|
||||
expense_category_label: str,
|
||||
risk_level: str,
|
||||
fields: list[RiskRuleField],
|
||||
created_at: datetime,
|
||||
actor: str,
|
||||
requires_attachment: bool,
|
||||
) -> dict[str, Any]:
|
||||
created_stamp = created_at.strftime("%Y%m%d%H%M%S")
|
||||
created_stamp = created_at.strftime("%Y%m%d%H%M%S%f")
|
||||
domain_slug = {"expense": "expense", "ar": "ar", "ap": "ap"}[domain]
|
||||
rule_code = f"risk.{domain_slug}.generated_{created_stamp}"
|
||||
category_slug = f".{expense_category}" if expense_category else ""
|
||||
rule_code = f"risk.{domain_slug}{category_slug}.generated_{created_stamp}"
|
||||
template_key = str(draft.get("template_key") or "field_required_v1").strip()
|
||||
field_keys = [
|
||||
str(item or "").strip()
|
||||
@@ -424,7 +471,7 @@ class RiskRuleGenerationService:
|
||||
condition_summary = (
|
||||
self._clean_text(draft.get("condition_summary")) or "判断是否符合自然语言规则描述"
|
||||
)
|
||||
risk_category = BUSINESS_DOMAIN_LABELS[domain]
|
||||
risk_category = expense_category_label or BUSINESS_DOMAIN_LABELS[domain]
|
||||
keywords = list(draft.get("keywords") or [])
|
||||
field_by_key = {item.key: item for item in fields}
|
||||
params: dict[str, Any] = {
|
||||
@@ -440,6 +487,9 @@ class RiskRuleGenerationService:
|
||||
if template_key == "keyword_match_v1":
|
||||
params["keywords"] = keywords
|
||||
params["search_fields"] = field_keys
|
||||
applies_to: dict[str, Any] = {"domains": [domain]}
|
||||
if expense_category:
|
||||
applies_to["expense_categories"] = [expense_category]
|
||||
|
||||
payload = {
|
||||
"schema_version": "2.0",
|
||||
@@ -447,12 +497,13 @@ class RiskRuleGenerationService:
|
||||
"name": self._clean_text(draft.get("name")) or self._infer_rule_name(natural_language),
|
||||
"description": self._clean_text(draft.get("description")) or natural_language,
|
||||
"enabled": True,
|
||||
"requires_attachment": requires_attachment,
|
||||
"risk_dimension": "natural_language_rule",
|
||||
"risk_category": risk_category,
|
||||
"ontology_signal": "natural_language_risk",
|
||||
"evaluator": "template_rule",
|
||||
"template_key": template_key,
|
||||
"applies_to": {"domains": [domain]},
|
||||
"applies_to": applies_to,
|
||||
"inputs": {
|
||||
"fields": [
|
||||
{
|
||||
@@ -478,6 +529,9 @@ class RiskRuleGenerationService:
|
||||
"source_ref": "自然语言风险规则",
|
||||
"created_at": created_at.isoformat(),
|
||||
"created_by": actor,
|
||||
"requires_attachment": requires_attachment,
|
||||
"expense_category": expense_category,
|
||||
"expense_category_label": expense_category_label,
|
||||
"natural_language": natural_language,
|
||||
"business_explanation": self._clean_text(draft.get("description")),
|
||||
"condition_summary": condition_summary,
|
||||
@@ -488,6 +542,7 @@ class RiskRuleGenerationService:
|
||||
payload,
|
||||
fields=[field_by_key[key] for key in field_keys if key in field_by_key],
|
||||
domain=domain,
|
||||
domain_label=risk_category,
|
||||
risk_level=risk_level,
|
||||
)
|
||||
return payload
|
||||
@@ -498,6 +553,7 @@ class RiskRuleGenerationService:
|
||||
*,
|
||||
fields: list[RiskRuleField],
|
||||
domain: str,
|
||||
domain_label: str | None = None,
|
||||
risk_level: str,
|
||||
) -> str:
|
||||
metadata = payload.get("metadata") if isinstance(payload.get("metadata"), dict) else {}
|
||||
@@ -506,7 +562,7 @@ class RiskRuleGenerationService:
|
||||
return self.flow_diagram_renderer.render(
|
||||
RiskRuleFlowDiagramSpec(
|
||||
title=self._clean_text(payload.get("name")) or "风险规则判断流程",
|
||||
domain_label=BUSINESS_DOMAIN_LABELS.get(domain, "业务"),
|
||||
domain_label=domain_label or BUSINESS_DOMAIN_LABELS.get(domain, "业务"),
|
||||
severity=risk_level,
|
||||
severity_label=RISK_LEVEL_LABELS.get(risk_level, "中风险"),
|
||||
fields=tuple(
|
||||
@@ -528,6 +584,21 @@ class RiskRuleGenerationService:
|
||||
)
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _normalize_expense_category(value: str | None, domain: str) -> str | None:
|
||||
if domain != AgentAssetDomain.EXPENSE.value:
|
||||
return None
|
||||
|
||||
normalized = str(value or "").strip().lower()
|
||||
if not normalized:
|
||||
return None
|
||||
|
||||
normalized = EXPENSE_RISK_CATEGORY_ALIASES.get(normalized, normalized)
|
||||
if normalized not in EXPENSE_RISK_CATEGORY_LABELS:
|
||||
allowed = "、".join(EXPENSE_RISK_CATEGORY_LABELS.values())
|
||||
raise ValueError(f"费用领域仅支持:{allowed}。")
|
||||
return normalized
|
||||
|
||||
def _resolve_fields(self, text: str, *, domain: str) -> list[RiskRuleField]:
|
||||
prefixes = DOMAIN_FIELD_PREFIXES.get(domain, ())
|
||||
candidates = [field for field in FIELD_ONTOLOGY if field.key.startswith(prefixes)]
|
||||
|
||||
@@ -172,8 +172,12 @@ class RiskRuleTemplateExecutor:
|
||||
if field_key == "ocr_text":
|
||||
values.extend([context.get("ocr_text"), context.get("ocr_summary")])
|
||||
if field_key in {"hotel_city", "route_cities"}:
|
||||
values.extend(self._scan_document_values(document_info, field_key))
|
||||
values.extend(self._scan_document_values(document_info, "city"))
|
||||
specific_values = self._scan_document_values(document_info, field_key)
|
||||
values.extend(
|
||||
specific_values
|
||||
if specific_values
|
||||
else self._scan_document_values(document_info, "city")
|
||||
)
|
||||
else:
|
||||
values.extend(self._scan_document_values(document_info, field_key))
|
||||
return self._normalize_values(values)
|
||||
@@ -203,8 +207,8 @@ class RiskRuleTemplateExecutor:
|
||||
"buyer_name": ("购买方", "抬头", "买方"),
|
||||
"goods_name": ("品名", "商品", "服务名称"),
|
||||
"issue_date": ("日期", "开票日期", "发票日期"),
|
||||
"hotel_city": ("住宿城市", "酒店城市", "酒店地点"),
|
||||
"route_cities": ("行程", "路线", "城市"),
|
||||
"hotel_city": ("住宿城市", "酒店城市", "酒店地点", "住宿", "酒店"),
|
||||
"route_cities": ("行程", "路线", "目的地", "出差城市"),
|
||||
"city": ("城市", "地点"),
|
||||
}
|
||||
return any(item in label for item in label_map.get(field_key, ()))
|
||||
|
||||
@@ -16,6 +16,7 @@ from app.db.session import get_session_factory
|
||||
from app.models.system_model_setting import SystemModelSetting
|
||||
from app.models.system_setting import SystemSetting
|
||||
from app.models.system_setting_secret import SystemSettingSecret
|
||||
from app.models.hermes_config import HermesTaskConfig
|
||||
from app.repositories.settings import SETTINGS_ROW_ID, SettingsRepository
|
||||
from app.schemas.settings import SettingsRead, SettingsWrite
|
||||
from app.services.hermes_sync import (
|
||||
@@ -183,28 +184,30 @@ class SettingsService:
|
||||
capability=config.capability,
|
||||
priority=config.priority,
|
||||
enabled=True,
|
||||
api_key_encrypted=str(getattr(secrets_row, config.legacy_secret_attr, "") or ""),
|
||||
)
|
||||
self.db.add(model_row)
|
||||
model_rows[slot] = model_row
|
||||
should_commit = True
|
||||
|
||||
if should_commit:
|
||||
self.db.commit()
|
||||
for model_row in model_rows.values():
|
||||
self.db.refresh(model_row)
|
||||
|
||||
return model_rows
|
||||
|
||||
def get_settings_snapshot(self) -> SettingsRead:
|
||||
settings_row, secrets_row = self.ensure_settings_ready()
|
||||
model_rows = self.ensure_model_settings_ready(settings_row, secrets_row)
|
||||
return self._serialize(settings_row, secrets_row, model_rows)
|
||||
|
||||
|
||||
|
||||
api_key_encrypted=str(getattr(secrets_row, config.legacy_secret_attr, "") or ""),
|
||||
)
|
||||
self.db.add(model_row)
|
||||
model_rows[slot] = model_row
|
||||
should_commit = True
|
||||
|
||||
if should_commit:
|
||||
self.db.commit()
|
||||
for model_row in model_rows.values():
|
||||
self.db.refresh(model_row)
|
||||
|
||||
return model_rows
|
||||
|
||||
def get_settings_snapshot(self) -> SettingsRead:
|
||||
settings_row, secrets_row = self.ensure_settings_ready()
|
||||
model_rows = self.ensure_model_settings_ready(settings_row, secrets_row)
|
||||
hermes_form = self._build_hermes_form_snapshot()
|
||||
return self._serialize(settings_row, secrets_row, model_rows, hermes_form)
|
||||
|
||||
def save_settings_snapshot(self, payload: SettingsWrite) -> SettingsRead:
|
||||
settings_row, secrets_row = self.ensure_settings_ready()
|
||||
model_rows = self.ensure_model_settings_ready(settings_row, secrets_row)
|
||||
|
||||
if payload.adminForm.newPassword:
|
||||
if len(payload.adminForm.newPassword) < 5:
|
||||
raise ValueError("管理员密码至少需要 5 位。")
|
||||
@@ -308,6 +311,8 @@ class SettingsService:
|
||||
self._replace_secret_if_present(secrets_row, "smtp_password_encrypted", payload.mailForm.password)
|
||||
|
||||
hermes_snapshot = capture_hermes_config_snapshot()
|
||||
|
||||
self._save_hermes_form_snapshot(payload.hermesForm)
|
||||
|
||||
try:
|
||||
sync_hermes_model_settings(
|
||||
@@ -642,46 +647,107 @@ class SettingsService:
|
||||
|
||||
return should_commit
|
||||
|
||||
def _build_hermes_form_snapshot(self) -> dict:
|
||||
configs = self.db.query(HermesTaskConfig).all()
|
||||
capabilities = {}
|
||||
schedules = {}
|
||||
master_enabled = True # 这里假设只要有一个开启,主开关就是开启的(为简单起见)
|
||||
|
||||
for config in configs:
|
||||
task_type = config.task_type
|
||||
capabilities[task_type] = config.is_enabled
|
||||
|
||||
# 简化解析 cron_expression 到 time (假设 cron 为 "0 9 * * 1" 这种形式)
|
||||
time_str = "00:00"
|
||||
if config.cron_expression:
|
||||
parts = config.cron_expression.split(" ")
|
||||
if len(parts) >= 2:
|
||||
minute, hour = parts[0], parts[1]
|
||||
try:
|
||||
time_str = f"{int(hour):02d}:{int(minute):02d}"
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
schedules[task_type] = {
|
||||
"enabled": config.is_enabled,
|
||||
"time": time_str
|
||||
}
|
||||
|
||||
return {
|
||||
"masterEnabled": master_enabled,
|
||||
"notifyOnFailure": True,
|
||||
"capabilities": capabilities,
|
||||
"schedules": schedules
|
||||
}
|
||||
|
||||
def _save_hermes_form_snapshot(self, hermes_form: dict) -> None:
|
||||
if not hermes_form:
|
||||
return
|
||||
|
||||
schedules = hermes_form.get("schedules", {})
|
||||
capabilities = hermes_form.get("capabilities", {})
|
||||
master_enabled = hermes_form.get("masterEnabled", True)
|
||||
|
||||
for task_type, schedule in schedules.items():
|
||||
config = self.db.query(HermesTaskConfig).filter_by(task_type=task_type).first()
|
||||
if not config:
|
||||
config = HermesTaskConfig(task_type=task_type)
|
||||
self.db.add(config)
|
||||
|
||||
task_enabled = schedule.get("enabled", False) and capabilities.get(task_type, False) and master_enabled
|
||||
config.is_enabled = task_enabled
|
||||
|
||||
# 从 time 构建简单的 cron expression
|
||||
time_str = schedule.get("time", "00:00")
|
||||
parts = time_str.split(":")
|
||||
if len(parts) == 2:
|
||||
# 简单映射:把时分放进去,后面保留为 * * * (或者保留旧的后半段)
|
||||
# 这里偷个懒,风险扫描每天跑,周报每周一跑
|
||||
if task_type == "global_risk_scan":
|
||||
config.cron_expression = f"{int(parts[1])} {int(parts[0])} * * *"
|
||||
elif task_type == "weekly_expense_report":
|
||||
config.cron_expression = f"{int(parts[1])} {int(parts[0])} * * 1"
|
||||
else:
|
||||
config.cron_expression = f"{int(parts[1])} {int(parts[0])} * * *"
|
||||
|
||||
@staticmethod
|
||||
def _serialize(
|
||||
settings_row: SystemSetting,
|
||||
secrets_row: SystemSettingSecret,
|
||||
model_rows: dict[str, SystemModelSetting],
|
||||
hermes_form: dict,
|
||||
) -> SettingsRead:
|
||||
main_model = model_rows["main"]
|
||||
backup_model = model_rows["backup"]
|
||||
embedding_model = model_rows["embedding"]
|
||||
reranker_model = model_rows["reranker"]
|
||||
|
||||
return SettingsRead(
|
||||
companyForm={
|
||||
"companyName": settings_row.company_name,
|
||||
"displayName": settings_row.display_name,
|
||||
"companyCode": settings_row.company_code,
|
||||
"recordNumber": settings_row.record_number,
|
||||
"copyright": settings_row.copyright_text,
|
||||
},
|
||||
|
||||
return SettingsRead(
|
||||
companyForm={
|
||||
"companyName": settings_row.company_name,
|
||||
"displayName": settings_row.display_name,
|
||||
"companyCode": settings_row.company_code,
|
||||
"recordNumber": settings_row.record_number,
|
||||
"copyright": settings_row.copyright_text,
|
||||
},
|
||||
adminForm={
|
||||
"adminAccount": settings_row.admin_account,
|
||||
"adminEmail": settings_row.admin_email,
|
||||
"newPassword": "",
|
||||
"confirmPassword": "",
|
||||
"sessionTimeout": settings_row.session_timeout,
|
||||
"noticeEmail": settings_row.notice_email,
|
||||
"mfaEnabled": settings_row.mfa_enabled,
|
||||
"strongPassword": settings_row.strong_password,
|
||||
"sessionTimeout": settings_row.session_timeout,
|
||||
"noticeEmail": settings_row.notice_email,
|
||||
"mfaEnabled": settings_row.mfa_enabled,
|
||||
"strongPassword": settings_row.strong_password,
|
||||
"loginAlertEnabled": settings_row.login_alert_enabled,
|
||||
"adminPasswordConfigured": bool(secrets_row.admin_password_hash),
|
||||
},
|
||||
sessionForm={
|
||||
"conversationRetentionDays": settings_row.conversation_retention_days,
|
||||
},
|
||||
hermesForm=hermes_form,
|
||||
llmForm={
|
||||
"mainProvider": main_model.provider,
|
||||
"mainModel": main_model.model_name,
|
||||
"mainEndpoint": main_model.endpoint,
|
||||
"mainApiKey": "",
|
||||
"mainApiKeyConfigured": bool(main_model.api_key_encrypted),
|
||||
"backupProvider": backup_model.provider,
|
||||
"backupModel": backup_model.model_name,
|
||||
"backupEndpoint": backup_model.endpoint,
|
||||
|
||||
@@ -71,8 +71,8 @@ EXPENSE_SCENE_SELECTION_OPTIONS = (
|
||||
("other", "其他费用", "暂不属于以上分类的报销场景。"),
|
||||
)
|
||||
|
||||
KNOWLEDGE_MODEL_MAIN_TIMEOUT_SECONDS = 3
|
||||
KNOWLEDGE_MODEL_BACKUP_TIMEOUT_SECONDS = 5
|
||||
KNOWLEDGE_MODEL_MAIN_TIMEOUT_SECONDS = 20
|
||||
KNOWLEDGE_MODEL_BACKUP_TIMEOUT_SECONDS = 30
|
||||
KNOWLEDGE_MODEL_TIMEOUT_SECONDS = KNOWLEDGE_MODEL_BACKUP_TIMEOUT_SECONDS
|
||||
|
||||
EXPENSE_STATUS_LABELS = {
|
||||
|
||||
@@ -86,6 +86,7 @@ class UserAgentKnowledgeMixin(UserAgentKnowledgeHelpersMixin):
|
||||
*,
|
||||
citations: list[UserAgentCitation],
|
||||
) -> str | None:
|
||||
return None
|
||||
if payload.ontology.scenario != "knowledge":
|
||||
return None
|
||||
if str(payload.tool_payload.get("result_type") or "").strip() != "knowledge_search":
|
||||
@@ -583,20 +584,23 @@ class UserAgentKnowledgeMixin(UserAgentKnowledgeHelpersMixin):
|
||||
evidence_lines: list[str] = []
|
||||
for item in evidence_items[:3]:
|
||||
heading = str(item.get("heading") or "").strip()
|
||||
heading_text = f" > {heading}" if heading else ""
|
||||
if "表格行级检索线索" in heading:
|
||||
heading = heading.replace("表格行级检索线索", "").strip(" >")
|
||||
heading_text = f"({heading})" if heading else ""
|
||||
item_title = item.get("title") or title
|
||||
if str(item.get("kind") or "") == "table":
|
||||
preview = self._extract_relevant_table_preview(
|
||||
str(item.get("content") or ""),
|
||||
self._extract_knowledge_query_terms(self._resolve_knowledge_question(payload)),
|
||||
)
|
||||
evidence_lines.append(f"- 《{item.get('title') or title}》{heading_text}:\n{preview}")
|
||||
evidence_lines.append(f"- **《{item_title}》** {heading_text}\n{preview}")
|
||||
continue
|
||||
rendered = self._render_knowledge_evidence_text(item)
|
||||
if rendered:
|
||||
if "\n" in rendered:
|
||||
evidence_lines.append(f"- 《{item.get('title') or title}》{heading_text}:\n{rendered}")
|
||||
evidence_lines.append(f"- **《{item_title}》** {heading_text}\n{rendered}")
|
||||
else:
|
||||
evidence_lines.append(f"- 《{item.get('title') or title}》{heading_text}:{rendered}")
|
||||
evidence_lines.append(f"- **《{item_title}》** {heading_text}\n {rendered}")
|
||||
|
||||
if not evidence_lines:
|
||||
for item in hits[:2]:
|
||||
@@ -607,21 +611,22 @@ class UserAgentKnowledgeMixin(UserAgentKnowledgeHelpersMixin):
|
||||
)
|
||||
if not excerpt:
|
||||
continue
|
||||
evidence_lines.append(f"- 《{item_title}》:{excerpt}")
|
||||
evidence_lines.append(f"- **《{item_title}》**:{excerpt}")
|
||||
|
||||
if not evidence_lines:
|
||||
return (
|
||||
f"{prefix}我已经从《{title}》中检索到与你这次问题相关的制度依据,"
|
||||
"但本次答案生成环节暂时没有成功返回。请稍后重试一次;如果仍然失败,"
|
||||
"建议先检查主对话模型的连通性。"
|
||||
f"{prefix}当前《{title}》里可用于回答的关键条款还不够明确。"
|
||||
"请补充费用类型、适用地区、职级或具体业务场景,我再继续帮你缩小范围。"
|
||||
)
|
||||
|
||||
return "\n".join(
|
||||
[
|
||||
f"{prefix}我已经命中与你这次问题最相关的制度依据,但答案整理阶段本轮没有及时返回。",
|
||||
"先给你当前最直接的依据:",
|
||||
f"{prefix}我先根据当前制度依据给出可以确认的部分。",
|
||||
"",
|
||||
"**依据**:",
|
||||
*evidence_lines,
|
||||
"如果你希望我继续把这些依据整理成更完整的结论、步骤或对比说明,可以继续缩小问题范围后再问一次。",
|
||||
"",
|
||||
"**说明**:以上只使用当前命中的知识库证据;没有在证据中出现的适用条件或金额,我不会替你默认补齐。",
|
||||
]
|
||||
).strip()
|
||||
|
||||
|
||||
@@ -4,6 +4,9 @@ import re
|
||||
|
||||
KNOWLEDGE_DIRECT_ANSWER_HINTS = (
|
||||
"是什么",
|
||||
"介绍",
|
||||
"说明",
|
||||
"概述",
|
||||
"标准",
|
||||
"限额",
|
||||
"流程",
|
||||
@@ -45,7 +48,7 @@ MAX_KNOWLEDGE_QUERY_TERMS = 12
|
||||
MAX_KNOWLEDGE_DIRECT_EVIDENCE = 4
|
||||
MAX_KNOWLEDGE_MODEL_HITS = 5
|
||||
KNOWLEDGE_SECTION_HEADING_PATTERN = re.compile(
|
||||
r"^(#\s*.+|##\s*.+|###\s*.+|第[一二三四五六七八九十百零0-9]+[章节条]\s*.*|[一二三四五六七八九十]+、.*|([一二三四五六七八九十]+).*|\([一二三四五六七八九十]+\).*)$"
|
||||
r"^(#\s*.+|##\s*.+|###\s*.+|第[一二三四五六七八九十百零0-9]+[部分章节条]\s*.*|[一二三四五六七八九十]+、.*|([一二三四五六七八九十]+).*|\([一二三四五六七八九十]+\).*)$"
|
||||
)
|
||||
KNOWLEDGE_LIST_ITEM_PATTERN = re.compile(r"^[-*•]\s+.+$")
|
||||
KNOWLEDGE_NUMBERED_ITEM_PATTERN = re.compile(
|
||||
|
||||
@@ -15,6 +15,20 @@ from app.services.user_agent_knowledge_constants import (
|
||||
|
||||
|
||||
class UserAgentKnowledgeHelpersMixin:
|
||||
GENERIC_KNOWLEDGE_TITLE_TERMS = {"远光软件", "股份有限", "有限公司"}
|
||||
KNOWLEDGE_QUERY_ANCHOR_TERMS = (
|
||||
"财务基础知识手册",
|
||||
"基础知识手册",
|
||||
"会计科目",
|
||||
"常用会计科目",
|
||||
"财务报表",
|
||||
"主要税种",
|
||||
"税种",
|
||||
"标准",
|
||||
"清单",
|
||||
"明细",
|
||||
"流程",
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _select_knowledge_model_hits(
|
||||
@@ -26,7 +40,7 @@ class UserAgentKnowledgeHelpersMixin:
|
||||
item
|
||||
for item in list(tool_payload.get("hits") or [])
|
||||
if isinstance(item, dict)
|
||||
][: max(MAX_KNOWLEDGE_MODEL_HITS + 1, 6)]
|
||||
][: max(MAX_KNOWLEDGE_MODEL_HITS + 3, 8)]
|
||||
if not raw_hits:
|
||||
return []
|
||||
|
||||
@@ -64,7 +78,16 @@ class UserAgentKnowledgeHelpersMixin:
|
||||
matched_terms = [term for term in query_terms if term in haystack]
|
||||
score = max(1, 48 - rank_index * 4)
|
||||
score += len(matched_terms) * 10
|
||||
score += sum(max(0, len(term) - 4) * 8 for term in matched_terms)
|
||||
score += sum(1 for term in matched_terms if term in title) * 8
|
||||
score += sum(max(0, len(term) - 4) * 6 for term in matched_terms if term in title)
|
||||
score += sum(
|
||||
(len(term) - 3) * 10
|
||||
for term in matched_terms
|
||||
if len(term) >= 4
|
||||
and term in title
|
||||
and term not in UserAgentKnowledgeHelpersMixin.GENERIC_KNOWLEDGE_TITLE_TERMS
|
||||
)
|
||||
|
||||
leading_marker = UserAgentKnowledgeHelpersMixin._leading_knowledge_appendix_marker(content)
|
||||
if leading_marker == "# 章节导航":
|
||||
@@ -149,6 +172,40 @@ class UserAgentKnowledgeHelpersMixin:
|
||||
return ""
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _knowledge_list_marker_sort_key(content: str) -> int:
|
||||
normalized = str(content or "").strip()
|
||||
match = re.match(r"^[((]([一二三四五六七八九十百零0-9]+)[))]", normalized)
|
||||
if not match:
|
||||
return 999
|
||||
marker = match.group(1)
|
||||
if marker.isdigit():
|
||||
return int(marker)
|
||||
values = {
|
||||
"零": 0,
|
||||
"一": 1,
|
||||
"二": 2,
|
||||
"三": 3,
|
||||
"四": 4,
|
||||
"五": 5,
|
||||
"六": 6,
|
||||
"七": 7,
|
||||
"八": 8,
|
||||
"九": 9,
|
||||
"十": 10,
|
||||
}
|
||||
if marker in values:
|
||||
return values[marker]
|
||||
if marker.startswith("十") and len(marker) == 2:
|
||||
return 10 + values.get(marker[1], 0)
|
||||
if marker.endswith("十") and len(marker) == 2:
|
||||
return values.get(marker[0], 0) * 10
|
||||
if "十" in marker:
|
||||
left, right = marker.split("十", 1)
|
||||
return values.get(left, 1) * 10 + values.get(right, 0)
|
||||
return 999
|
||||
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _format_knowledge_heading_label(heading: str) -> str:
|
||||
@@ -156,6 +213,169 @@ class UserAgentKnowledgeHelpersMixin:
|
||||
return " / ".join(parts)
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _has_inline_numbered_knowledge_items(content: str) -> bool:
|
||||
return len(
|
||||
re.findall(
|
||||
r"[((][一二三四五六七八九十百零0-9]+[))]",
|
||||
str(content or ""),
|
||||
)
|
||||
) >= 2
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _split_inline_numbered_knowledge_items(content: str) -> list[str]:
|
||||
normalized = str(content or "").strip()
|
||||
if not UserAgentKnowledgeHelpersMixin._has_inline_numbered_knowledge_items(normalized):
|
||||
return [normalized] if normalized else []
|
||||
|
||||
marker_pattern = r"[((][一二三四五六七八九十百零0-9]+[))]"
|
||||
first_marker = re.search(marker_pattern, normalized)
|
||||
if first_marker is None:
|
||||
return [normalized] if normalized else []
|
||||
|
||||
prefix = normalized[: first_marker.start()].strip(" ::")
|
||||
tail = normalized[first_marker.start() :].strip()
|
||||
item_pattern = (
|
||||
r"([((][一二三四五六七八九十百零0-9]+[))]\s*.*?"
|
||||
r"(?=\s*[((][一二三四五六七八九十百零0-9]+[))]|\s*$))"
|
||||
)
|
||||
items = [item.strip() for item in re.findall(item_pattern, tail) if item.strip()]
|
||||
if prefix:
|
||||
return [prefix, *items]
|
||||
return items or [normalized]
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _focus_knowledge_segment_content(content: str, query_terms: list[str]) -> str:
|
||||
normalized = re.sub(r"\s+", " ", str(content or "").strip())
|
||||
if not normalized:
|
||||
return ""
|
||||
|
||||
anchor_terms = sorted(
|
||||
{
|
||||
str(term or "").strip()
|
||||
for term in query_terms
|
||||
if len(str(term or "").strip()) >= 3
|
||||
},
|
||||
key=len,
|
||||
reverse=True,
|
||||
)
|
||||
anchor_index = -1
|
||||
for term in anchor_terms:
|
||||
anchor_index = normalized.lower().find(term.lower())
|
||||
if anchor_index >= 0:
|
||||
break
|
||||
if anchor_index < 0:
|
||||
return normalized
|
||||
|
||||
prefix_window = normalized[max(0, anchor_index - 40) : anchor_index]
|
||||
marker_match = None
|
||||
for match in re.finditer(
|
||||
r"(?:第[一二三四五六七八九十百零0-9]+[部分章节条]|[一二三四五六七八九十]+、|[((][一二三四五六七八九十百零0-9]+[))])",
|
||||
prefix_window,
|
||||
):
|
||||
marker_match = match
|
||||
start = anchor_index
|
||||
if marker_match is not None:
|
||||
start = max(0, anchor_index - len(prefix_window) + marker_match.start())
|
||||
|
||||
return normalized[start : start + 700].strip()
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _split_markdown_table_cells(line: str) -> list[str]:
|
||||
stripped = str(line or "").strip()
|
||||
if stripped.startswith("|"):
|
||||
stripped = stripped[1:]
|
||||
if stripped.endswith("|"):
|
||||
stripped = stripped[:-1]
|
||||
return [
|
||||
re.sub(r"\s+", " ", cell.replace("**", "").strip())
|
||||
for cell in stripped.split("|")
|
||||
]
|
||||
|
||||
|
||||
@classmethod
|
||||
def _summarize_knowledge_table_preview(cls, preview: str) -> str:
|
||||
rows: list[list[str]] = []
|
||||
for line in str(preview or "").splitlines():
|
||||
if line.count("|") < 2:
|
||||
continue
|
||||
cells = cls._split_markdown_table_cells(line)
|
||||
if not cells or all(re.fullmatch(r":?-{2,}:?", cell.replace(" ", "")) for cell in cells):
|
||||
continue
|
||||
rows.append(cells)
|
||||
|
||||
if len(rows) < 2:
|
||||
return "可直接参考的标准表如下。"
|
||||
|
||||
header = rows[0]
|
||||
data_rows = [row for row in rows[1:] if len(row) == len(header)]
|
||||
if len(data_rows) == 1 and len(header) >= 2:
|
||||
row = data_rows[0]
|
||||
subject = row[0] or "该项目"
|
||||
pairs = [
|
||||
f"{label}:{value}"
|
||||
for label, value in zip(header[1:], row[1:])
|
||||
if label and value and value not in {"-", "—"}
|
||||
]
|
||||
if pairs:
|
||||
return f"{subject}的标准为:{';'.join(pairs)}。"
|
||||
|
||||
return "相关标准项如下,请按表头和行内容对应使用。"
|
||||
|
||||
|
||||
def _summarize_knowledge_lines_conclusion(
|
||||
self,
|
||||
lines: list[str],
|
||||
*,
|
||||
heading: str = "",
|
||||
) -> str:
|
||||
clean_lines = [
|
||||
self._clean_knowledge_segment_text(line)
|
||||
for line in lines
|
||||
if self._clean_knowledge_segment_text(line)
|
||||
]
|
||||
if not clean_lines:
|
||||
return ""
|
||||
|
||||
clean_heading = str(heading or "").strip()
|
||||
if not clean_heading and clean_lines and ":" not in clean_lines[0] and ":" not in clean_lines[0]:
|
||||
clean_heading = clean_lines[0]
|
||||
clean_heading = re.sub(
|
||||
r"^[一二三四五六七八九十百零0-9]+、\s*",
|
||||
"",
|
||||
clean_heading,
|
||||
)
|
||||
item_labels: list[str] = []
|
||||
for line in clean_lines:
|
||||
if ":" not in line and ":" not in line:
|
||||
continue
|
||||
label = re.split(r"[::]", line, maxsplit=1)[0].strip()
|
||||
if 1 <= len(label) <= 24:
|
||||
item_labels.append(label)
|
||||
|
||||
if clean_heading and len(item_labels) >= 2:
|
||||
return f"{clean_heading}包括:{'、'.join(item_labels[:6])}。"
|
||||
if item_labels:
|
||||
return f"{item_labels[0]}:{clean_lines[0].split(':', 1)[-1].strip()}"
|
||||
return clean_lines[0]
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _knowledge_lines_have_multiple_labeled_items(lines: list[str]) -> bool:
|
||||
labeled_count = 0
|
||||
for line in lines:
|
||||
normalized = str(line or "").strip()
|
||||
if ":" not in normalized and ":" not in normalized:
|
||||
continue
|
||||
label = re.split(r"[::]", normalized, maxsplit=1)[0].strip()
|
||||
if 1 <= len(label) <= 24:
|
||||
labeled_count += 1
|
||||
return labeled_count >= 2
|
||||
|
||||
|
||||
|
||||
def _score_knowledge_evidence_candidate(
|
||||
self,
|
||||
@@ -169,10 +389,14 @@ class UserAgentKnowledgeHelpersMixin:
|
||||
|
||||
matched_terms = [term for term in query_terms if term in haystack]
|
||||
score = len(matched_terms) * 10
|
||||
score += sum(max(0, len(term) - 4) * 8 for term in matched_terms)
|
||||
score += sum(1 for term in matched_terms if term in heading) * 6
|
||||
score += sum(max(0, len(term) - 4) * 6 for term in matched_terms if term in heading)
|
||||
|
||||
if kind == "table":
|
||||
score += 10
|
||||
if content.count("\n") < 2:
|
||||
score -= 24
|
||||
elif kind in {"kv", "clause", "list"}:
|
||||
score += 8
|
||||
elif kind == "paragraph":
|
||||
@@ -220,6 +444,30 @@ class UserAgentKnowledgeHelpersMixin:
|
||||
remember(item)
|
||||
|
||||
for block in re.findall(r"[\u4e00-\u9fff]{2,20}", normalized_question):
|
||||
remember(block)
|
||||
if len(terms) >= MAX_KNOWLEDGE_QUERY_TERMS:
|
||||
return terms
|
||||
for marker in ("标准", "金额", "限额", "额度"):
|
||||
marker_index = block.find(marker)
|
||||
if marker_index <= 0:
|
||||
continue
|
||||
subject = block[:marker_index]
|
||||
for width in (6, 4, 3, 2):
|
||||
remember(subject[-width:])
|
||||
for anchor in UserAgentKnowledgeHelpersMixin.KNOWLEDGE_QUERY_ANCHOR_TERMS:
|
||||
if anchor in block:
|
||||
remember(anchor)
|
||||
tail = block[-14:]
|
||||
for size in (8, 7, 6, 5, 4):
|
||||
for start in range(0, len(tail) - size + 1):
|
||||
piece = tail[start : start + size]
|
||||
if any(
|
||||
anchor in piece
|
||||
for anchor in UserAgentKnowledgeHelpersMixin.KNOWLEDGE_QUERY_ANCHOR_TERMS
|
||||
):
|
||||
remember(piece)
|
||||
if len(terms) >= MAX_KNOWLEDGE_QUERY_TERMS:
|
||||
return terms
|
||||
if len(block) <= 4:
|
||||
remember(block)
|
||||
continue
|
||||
@@ -276,7 +524,14 @@ class UserAgentKnowledgeHelpersMixin:
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _extract_relevant_table_preview(content: str, query_terms: list[str]) -> str:
|
||||
def _extract_relevant_table_preview(
|
||||
content: str,
|
||||
query_terms: list[str],
|
||||
*,
|
||||
preferred_terms: list[str] | None = None,
|
||||
max_rows: int = 3,
|
||||
fallback_rows: int = 2,
|
||||
) -> str:
|
||||
lines = [line.strip() for line in str(content or "").splitlines() if line.strip()]
|
||||
if len(lines) <= 3:
|
||||
return "\n".join(lines)
|
||||
@@ -285,12 +540,39 @@ class UserAgentKnowledgeHelpersMixin:
|
||||
divider = lines[1] if len(lines) > 1 else ""
|
||||
body = lines[2:] if divider.count("|") >= 2 else lines[1:]
|
||||
|
||||
preferred = [
|
||||
str(term or "").strip().lower()
|
||||
for term in list(preferred_terms or [])
|
||||
if str(term or "").strip()
|
||||
]
|
||||
base_terms = preferred + [
|
||||
str(term or "").strip().lower()
|
||||
for term in query_terms
|
||||
if str(term or "").strip().lower() not in preferred
|
||||
]
|
||||
derived_terms: list[str] = []
|
||||
for term in base_terms:
|
||||
for marker in ("标准", "金额", "限额", "额度", "是多少"):
|
||||
marker_index = term.find(marker)
|
||||
if marker_index <= 0:
|
||||
continue
|
||||
subject = term[:marker_index].strip()
|
||||
if len(subject) < 2:
|
||||
continue
|
||||
for width in (6, 4, 3, 2):
|
||||
derived_terms.append(subject[-width:])
|
||||
|
||||
search_terms: list[str] = []
|
||||
for term in [*preferred, *derived_terms, *base_terms]:
|
||||
if term and term not in search_terms:
|
||||
search_terms.append(term)
|
||||
|
||||
matched_rows = [
|
||||
row
|
||||
for row in body
|
||||
if any(term in row.lower() for term in query_terms)
|
||||
if any(term in row.lower() for term in search_terms)
|
||||
]
|
||||
selected_rows = matched_rows[:3] or body[:2]
|
||||
selected_rows = matched_rows[:max_rows] or body[:fallback_rows]
|
||||
preview_lines = [header]
|
||||
if divider:
|
||||
preview_lines.append(divider)
|
||||
@@ -298,6 +580,18 @@ class UserAgentKnowledgeHelpersMixin:
|
||||
return "\n".join(preview_lines).strip()
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _question_requests_broad_knowledge_table(question: str) -> bool:
|
||||
normalized = str(question or "").strip()
|
||||
if not normalized:
|
||||
return False
|
||||
broad_hints = ("有哪些", "是什么", "介绍", "说明", "列表", "清单", "全部", "完整")
|
||||
table_subject_hints = ("科目", "目录", "清单", "列表", "表", "明细")
|
||||
return any(hint in normalized for hint in broad_hints) and any(
|
||||
hint in normalized for hint in table_subject_hints
|
||||
)
|
||||
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _question_requires_explicit_condition(question: str) -> bool:
|
||||
|
||||
@@ -261,7 +261,6 @@ class UserAgentResponseMixin:
|
||||
"draft_payload": draft_payload.model_dump(mode="json") if draft_payload is not None else None,
|
||||
"selected_capability_codes": payload.selected_capability_codes,
|
||||
"requires_confirmation": payload.requires_confirmation,
|
||||
"fallback_answer": fallback_answer,
|
||||
}
|
||||
if payload.ontology.scenario == "knowledge":
|
||||
facts["knowledge_evidence_blocks"] = self._build_knowledge_evidence_blocks(
|
||||
|
||||
@@ -57,8 +57,8 @@
|
||||
"uploaded_by": "系统导入",
|
||||
"version_number": 1,
|
||||
"ingest_status": 3,
|
||||
"ingest_status_updated_at": "2026-05-22T09:22:52.110824+00:00",
|
||||
"ingest_completed_at": "2026-05-22T09:22:52.110824+00:00",
|
||||
"ingest_status_updated_at": "2026-05-23T14:30:33.605531+00:00",
|
||||
"ingest_completed_at": "2026-05-23T14:30:33.605531+00:00",
|
||||
"ingest_document_name": "远光软件财务基础知识手册.docx",
|
||||
"ingest_document_updated_at": "2026-05-22T07:00:22.011016+00:00",
|
||||
"ingest_document_sha256": "",
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -82,23 +82,6 @@
|
||||
"processing_end_time": 1779441745
|
||||
}
|
||||
},
|
||||
"b0277cd76034437997fbf5219662725a": {
|
||||
"status": "processed",
|
||||
"chunks_count": 1,
|
||||
"chunks_list": [
|
||||
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
|
||||
],
|
||||
"content_summary": "远光软件股份有限公司\n财务基础知识手册\n第一部分 会计基础知识\n一、会计要素\n会计要素包括:资产、负债、所有者权益、收入、费用和利润。\n会计恒等式:资产 = 负债 + 所有者权益\n二、常用会计科目\n科目类别\n科目名称\n说明\n资产类\n库存现金\n公司持有的现金\n资产类\n银行存款\n存放在银行的资金\n资产类\n应收账款\n因销售商品或提供劳务应收的款项\n资产类\n固定资产\n使用年限超过一年的有形资产\n负债类\n应付账款\n因购买商品或接受劳务应付的款项\n负债类\n应交税费\n应缴纳的各种税费\n负债类\n应付职工薪酬\n...",
|
||||
"content_length": 1082,
|
||||
"created_at": "2026-05-22T09:22:31.538281+00:00",
|
||||
"updated_at": "2026-05-22T09:22:52.110824+00:00",
|
||||
"file_path": "/app/server/storage/knowledge/财务知识库/远光软件财务基础知识手册.docx",
|
||||
"track_id": "insert_20260522_092231_e1b9d415",
|
||||
"metadata": {
|
||||
"processing_start_time": 1779441751,
|
||||
"processing_end_time": 1779441772
|
||||
}
|
||||
},
|
||||
"23f56f159a3e4bc3b2338056544120dd": {
|
||||
"status": "processed",
|
||||
"chunks_count": 1,
|
||||
@@ -688,5 +671,23 @@
|
||||
"processing_start_time": 1779467725,
|
||||
"processing_end_time": 1779467727
|
||||
}
|
||||
},
|
||||
"b0277cd76034437997fbf5219662725a": {
|
||||
"status": "processed",
|
||||
"chunks_count": 2,
|
||||
"chunks_list": [
|
||||
"chunk-6fdc554482754c7c662adc7804d3cf0b",
|
||||
"chunk-f894acfbb6c681d00f75cf9c486d491b"
|
||||
],
|
||||
"content_summary": "远光软件股份有限公司 财务基础知识手册\n\n第一部分 会计基础知识\n\n一、会计要素\n\n会计要素包括:资产、负债、所有者权益、收入、费用和利润。\n\n会计恒等式:资产 = 负债 + 所有者权益\n\n二、常用会计科目\n\n| 科目类别 | 科目名称 | 说明 |\n| --- | --- | --- |\n| 资产类 | 库存现金 | 公司持有的现金 |\n| 资产类 | 银行存款 | 存放在银行的资金 |\n| 资产类 | 应收账款 | 因销售商品或提供劳务应收的款项 |\n| 资产类 | 固定资产 | 使用年限超...",
|
||||
"content_length": 1641,
|
||||
"created_at": "2026-05-23T14:29:37.233751+00:00",
|
||||
"updated_at": "2026-05-23T14:30:33.605531+00:00",
|
||||
"file_path": "/app/server/storage/knowledge/财务知识库/远光软件财务基础知识手册.docx",
|
||||
"track_id": "insert_20260523_142937_5cd25327",
|
||||
"metadata": {
|
||||
"processing_start_time": 1779546577,
|
||||
"processing_end_time": 1779546633
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -6,7 +6,6 @@
|
||||
"chunk-dd87aa5bc62cc9587ecb4c26d35a5263",
|
||||
"chunk-31ff57cf79d009c378478f065eda9d4d",
|
||||
"chunk-e726f44fb0287c5192cf61b350f18abb",
|
||||
"chunk-78edb0c8ccc8238159196ecaeeb08d43",
|
||||
"chunk-2ee7e2a66cb544bdfe1b09e133863ad1",
|
||||
"chunk-2c8384b328272063de4dac306a52d21e",
|
||||
"chunk-6c549250b13b7728acb37eb6082bc178",
|
||||
@@ -44,8 +43,9 @@
|
||||
"chunk-6175768b05adf2e7229c16f13ee7cffd",
|
||||
"chunk-cfac1ddf5942f8fe2d5a296380818faf"
|
||||
],
|
||||
"count": 42,
|
||||
"update_time": 1779467739,
|
||||
"count": 41,
|
||||
"updated_at": 1779546575,
|
||||
"update_time": 1779546575,
|
||||
"_id": "远光软件股份有限公司"
|
||||
},
|
||||
"第一章总则": {
|
||||
@@ -3504,31 +3504,31 @@
|
||||
},
|
||||
"库存现金": {
|
||||
"chunk_ids": [
|
||||
"chunk-31ff57cf79d009c378478f065eda9d4d",
|
||||
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
|
||||
"chunk-31ff57cf79d009c378478f065eda9d4d"
|
||||
],
|
||||
"count": 2,
|
||||
"update_time": 1779441756,
|
||||
"count": 1,
|
||||
"updated_at": 1779546575,
|
||||
"update_time": 1779546575,
|
||||
"_id": "库存现金"
|
||||
},
|
||||
"银行存款": {
|
||||
"chunk_ids": [
|
||||
"chunk-31ff57cf79d009c378478f065eda9d4d",
|
||||
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
|
||||
"chunk-31ff57cf79d009c378478f065eda9d4d"
|
||||
],
|
||||
"count": 2,
|
||||
"update_time": 1779441756,
|
||||
"count": 1,
|
||||
"updated_at": 1779546575,
|
||||
"update_time": 1779546575,
|
||||
"_id": "银行存款"
|
||||
},
|
||||
"应收账款": {
|
||||
"chunk_ids": [
|
||||
"chunk-31ff57cf79d009c378478f065eda9d4d",
|
||||
"chunk-78edb0c8ccc8238159196ecaeeb08d43",
|
||||
"chunk-6f1d6991d45799bc8ff24afaed39244d",
|
||||
"chunk-af56151a803634f02e294f2d692fc1f0"
|
||||
"chunk-af56151a803634f02e294f2d692fc1f0",
|
||||
"chunk-f894acfbb6c681d00f75cf9c486d491b"
|
||||
],
|
||||
"count": 4,
|
||||
"update_time": 1779467128,
|
||||
"update_time": 1779546632,
|
||||
"_id": "应收账款"
|
||||
},
|
||||
"其他应收款": {
|
||||
@@ -3552,12 +3552,12 @@
|
||||
"固定资产": {
|
||||
"chunk_ids": [
|
||||
"chunk-31ff57cf79d009c378478f065eda9d4d",
|
||||
"chunk-78edb0c8ccc8238159196ecaeeb08d43",
|
||||
"chunk-4e95fc3e38b2bf65fcb3f6f0664fd9df",
|
||||
"chunk-4287121b009a169fe4155526bfe413ea"
|
||||
],
|
||||
"count": 4,
|
||||
"update_time": 1779465910,
|
||||
"count": 3,
|
||||
"updated_at": 1779546575,
|
||||
"update_time": 1779546575,
|
||||
"_id": "固定资产"
|
||||
},
|
||||
"累计折旧": {
|
||||
@@ -3572,39 +3572,39 @@
|
||||
"应付账款": {
|
||||
"chunk_ids": [
|
||||
"chunk-31ff57cf79d009c378478f065eda9d4d",
|
||||
"chunk-78edb0c8ccc8238159196ecaeeb08d43",
|
||||
"chunk-6f1d6991d45799bc8ff24afaed39244d",
|
||||
"chunk-af56151a803634f02e294f2d692fc1f0"
|
||||
],
|
||||
"count": 4,
|
||||
"update_time": 1779467128,
|
||||
"count": 3,
|
||||
"updated_at": 1779546575,
|
||||
"update_time": 1779546575,
|
||||
"_id": "应付账款"
|
||||
},
|
||||
"应交税费": {
|
||||
"chunk_ids": [
|
||||
"chunk-31ff57cf79d009c378478f065eda9d4d",
|
||||
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
|
||||
"chunk-31ff57cf79d009c378478f065eda9d4d"
|
||||
],
|
||||
"count": 2,
|
||||
"update_time": 1779441756,
|
||||
"count": 1,
|
||||
"updated_at": 1779546575,
|
||||
"update_time": 1779546575,
|
||||
"_id": "应交税费"
|
||||
},
|
||||
"应付职工薪酬": {
|
||||
"chunk_ids": [
|
||||
"chunk-31ff57cf79d009c378478f065eda9d4d",
|
||||
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
|
||||
"chunk-31ff57cf79d009c378478f065eda9d4d"
|
||||
],
|
||||
"count": 2,
|
||||
"update_time": 1779441756,
|
||||
"count": 1,
|
||||
"updated_at": 1779546575,
|
||||
"update_time": 1779546575,
|
||||
"_id": "应付职工薪酬"
|
||||
},
|
||||
"主营业务收入": {
|
||||
"chunk_ids": [
|
||||
"chunk-31ff57cf79d009c378478f065eda9d4d",
|
||||
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
|
||||
"chunk-31ff57cf79d009c378478f065eda9d4d"
|
||||
],
|
||||
"count": 2,
|
||||
"update_time": 1779441756,
|
||||
"count": 1,
|
||||
"updated_at": 1779546575,
|
||||
"update_time": 1779546575,
|
||||
"_id": "主营业务收入"
|
||||
},
|
||||
"主营业务成本": {
|
||||
@@ -3618,20 +3618,20 @@
|
||||
},
|
||||
"管理费用": {
|
||||
"chunk_ids": [
|
||||
"chunk-31ff57cf79d009c378478f065eda9d4d",
|
||||
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
|
||||
"chunk-31ff57cf79d009c378478f065eda9d4d"
|
||||
],
|
||||
"count": 2,
|
||||
"update_time": 1779441756,
|
||||
"count": 1,
|
||||
"updated_at": 1779546575,
|
||||
"update_time": 1779546575,
|
||||
"_id": "管理费用"
|
||||
},
|
||||
"销售费用": {
|
||||
"chunk_ids": [
|
||||
"chunk-31ff57cf79d009c378478f065eda9d4d",
|
||||
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
|
||||
"chunk-31ff57cf79d009c378478f065eda9d4d"
|
||||
],
|
||||
"count": 2,
|
||||
"update_time": 1779441756,
|
||||
"count": 1,
|
||||
"updated_at": 1779546575,
|
||||
"update_time": 1779546575,
|
||||
"_id": "销售费用"
|
||||
},
|
||||
"财务费用": {
|
||||
@@ -3667,11 +3667,11 @@
|
||||
"资产类": {
|
||||
"chunk_ids": [
|
||||
"chunk-31ff57cf79d009c378478f065eda9d4d",
|
||||
"chunk-e726f44fb0287c5192cf61b350f18abb"
|
||||
"chunk-e726f44fb0287c5192cf61b350f18abb",
|
||||
"chunk-f894acfbb6c681d00f75cf9c486d491b"
|
||||
],
|
||||
"count": 2,
|
||||
"create_time": 1779441742,
|
||||
"update_time": 1779441742,
|
||||
"count": 3,
|
||||
"update_time": 1779546632,
|
||||
"_id": "资产类"
|
||||
},
|
||||
"负债类": {
|
||||
@@ -3811,192 +3811,48 @@
|
||||
"update_time": 1779441742,
|
||||
"_id": "1221其他应收款"
|
||||
},
|
||||
"财务基础知识手册": {
|
||||
"chunk_ids": [
|
||||
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779441755,
|
||||
"update_time": 1779441755,
|
||||
"_id": "财务基础知识手册"
|
||||
},
|
||||
"会计要素": {
|
||||
"chunk_ids": [
|
||||
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779441755,
|
||||
"update_time": 1779441755,
|
||||
"_id": "会计要素"
|
||||
},
|
||||
"资产": {
|
||||
"chunk_ids": [
|
||||
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779441755,
|
||||
"update_time": 1779441755,
|
||||
"_id": "资产"
|
||||
},
|
||||
"负债": {
|
||||
"chunk_ids": [
|
||||
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779441755,
|
||||
"update_time": 1779441755,
|
||||
"_id": "负债"
|
||||
},
|
||||
"所有者权益": {
|
||||
"chunk_ids": [
|
||||
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779441755,
|
||||
"update_time": 1779441755,
|
||||
"_id": "所有者权益"
|
||||
},
|
||||
"收入": {
|
||||
"chunk_ids": [
|
||||
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779441755,
|
||||
"update_time": 1779441755,
|
||||
"_id": "收入"
|
||||
},
|
||||
"费用": {
|
||||
"chunk_ids": [
|
||||
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779441755,
|
||||
"update_time": 1779441755,
|
||||
"_id": "费用"
|
||||
},
|
||||
"利润": {
|
||||
"chunk_ids": [
|
||||
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779441756,
|
||||
"update_time": 1779441756,
|
||||
"_id": "利润"
|
||||
},
|
||||
"会计恒等式": {
|
||||
"chunk_ids": [
|
||||
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779441756,
|
||||
"update_time": 1779441756,
|
||||
"_id": "会计恒等式"
|
||||
},
|
||||
"增值税": {
|
||||
"chunk_ids": [
|
||||
"chunk-78edb0c8ccc8238159196ecaeeb08d43",
|
||||
"chunk-2c8384b328272063de4dac306a52d21e",
|
||||
"chunk-f61c91e28e8d0f773f83e3daf161ab1c",
|
||||
"chunk-570642e8a00db7819c2b4048ebf1b279",
|
||||
"chunk-89afdbbf904b60cf6494cba2638e08a8"
|
||||
],
|
||||
"count": 5,
|
||||
"update_time": 1779467303,
|
||||
"_id": "增值税"
|
||||
},
|
||||
"企业所得税": {
|
||||
"chunk_ids": [
|
||||
"chunk-78edb0c8ccc8238159196ecaeeb08d43",
|
||||
"chunk-2c8384b328272063de4dac306a52d21e",
|
||||
"chunk-f61c91e28e8d0f773f83e3daf161ab1c"
|
||||
],
|
||||
"count": 3,
|
||||
"update_time": 1779466482,
|
||||
"_id": "企业所得税"
|
||||
},
|
||||
"个人所得税": {
|
||||
"chunk_ids": [
|
||||
"chunk-78edb0c8ccc8238159196ecaeeb08d43",
|
||||
"chunk-f61c91e28e8d0f773f83e3daf161ab1c",
|
||||
"chunk-570642e8a00db7819c2b4048ebf1b279",
|
||||
"chunk-89afdbbf904b60cf6494cba2638e08a8"
|
||||
],
|
||||
"count": 4,
|
||||
"update_time": 1779467303,
|
||||
"updated_at": 1779546575,
|
||||
"update_time": 1779546575,
|
||||
"_id": "增值税"
|
||||
},
|
||||
"企业所得税": {
|
||||
"chunk_ids": [
|
||||
"chunk-2c8384b328272063de4dac306a52d21e",
|
||||
"chunk-f61c91e28e8d0f773f83e3daf161ab1c"
|
||||
],
|
||||
"count": 2,
|
||||
"updated_at": 1779546575,
|
||||
"update_time": 1779546575,
|
||||
"_id": "企业所得税"
|
||||
},
|
||||
"个人所得税": {
|
||||
"chunk_ids": [
|
||||
"chunk-f61c91e28e8d0f773f83e3daf161ab1c",
|
||||
"chunk-570642e8a00db7819c2b4048ebf1b279",
|
||||
"chunk-89afdbbf904b60cf6494cba2638e08a8"
|
||||
],
|
||||
"count": 3,
|
||||
"updated_at": 1779546575,
|
||||
"update_time": 1779546575,
|
||||
"_id": "个人所得税"
|
||||
},
|
||||
"印花税": {
|
||||
"chunk_ids": [
|
||||
"chunk-78edb0c8ccc8238159196ecaeeb08d43",
|
||||
"chunk-f61c91e28e8d0f773f83e3daf161ab1c"
|
||||
],
|
||||
"count": 2,
|
||||
"update_time": 1779466482,
|
||||
"count": 1,
|
||||
"updated_at": 1779546575,
|
||||
"update_time": 1779546575,
|
||||
"_id": "印花税"
|
||||
},
|
||||
"三大财务报表": {
|
||||
"chunk_ids": [
|
||||
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779441756,
|
||||
"update_time": 1779441756,
|
||||
"_id": "三大财务报表"
|
||||
},
|
||||
"资产负债表": {
|
||||
"chunk_ids": [
|
||||
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779441756,
|
||||
"update_time": 1779441756,
|
||||
"_id": "资产负债表"
|
||||
},
|
||||
"利润表": {
|
||||
"chunk_ids": [
|
||||
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779441756,
|
||||
"update_time": 1779441756,
|
||||
"_id": "利润表"
|
||||
},
|
||||
"现金流量表": {
|
||||
"chunk_ids": [
|
||||
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779441756,
|
||||
"update_time": 1779441756,
|
||||
"_id": "现金流量表"
|
||||
},
|
||||
"会计基础知识": {
|
||||
"chunk_ids": [
|
||||
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779441756,
|
||||
"update_time": 1779441756,
|
||||
"_id": "会计基础知识"
|
||||
},
|
||||
"税务基础知识": {
|
||||
"chunk_ids": [
|
||||
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779441757,
|
||||
"update_time": 1779441757,
|
||||
"_id": "税务基础知识"
|
||||
},
|
||||
"财务报表解读": {
|
||||
"chunk_ids": [
|
||||
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779441757,
|
||||
"update_time": 1779441757,
|
||||
"_id": "财务报表解读"
|
||||
},
|
||||
"财务术语解释手册": {
|
||||
"chunk_ids": [
|
||||
"chunk-2ee7e2a66cb544bdfe1b09e133863ad1"
|
||||
@@ -4181,10 +4037,11 @@
|
||||
"Corporate Income Tax": {
|
||||
"chunk_ids": [
|
||||
"chunk-93d2389cdb74257e90201dccbc3f6539",
|
||||
"chunk-bdfd18ae478b23604f1318623e8e9508"
|
||||
"chunk-bdfd18ae478b23604f1318623e8e9508",
|
||||
"chunk-6fdc554482754c7c662adc7804d3cf0b"
|
||||
],
|
||||
"count": 2,
|
||||
"update_time": 1779466433,
|
||||
"count": 3,
|
||||
"update_time": 1779546632,
|
||||
"_id": "Corporate Income Tax"
|
||||
},
|
||||
"Venture Capital Deduction": {
|
||||
@@ -5896,11 +5753,11 @@
|
||||
},
|
||||
"Yuan Guang Software Co., Ltd.": {
|
||||
"chunk_ids": [
|
||||
"chunk-96ab661ad24e0cb4c468128a58a76b6d"
|
||||
"chunk-96ab661ad24e0cb4c468128a58a76b6d",
|
||||
"chunk-6fdc554482754c7c662adc7804d3cf0b"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779465962,
|
||||
"update_time": 1779465962,
|
||||
"count": 2,
|
||||
"update_time": 1779546631,
|
||||
"_id": "Yuan Guang Software Co., Ltd."
|
||||
},
|
||||
"Procurement Management Methods": {
|
||||
@@ -12387,5 +12244,302 @@
|
||||
"create_time": 1779467740,
|
||||
"update_time": 1779467740,
|
||||
"_id": "第三部分发票问题"
|
||||
},
|
||||
"Accounting Elements": {
|
||||
"chunk_ids": [
|
||||
"chunk-6fdc554482754c7c662adc7804d3cf0b"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779546631,
|
||||
"update_time": 1779546631,
|
||||
"_id": "Accounting Elements"
|
||||
},
|
||||
"Assets": {
|
||||
"chunk_ids": [
|
||||
"chunk-6fdc554482754c7c662adc7804d3cf0b"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779546631,
|
||||
"update_time": 1779546631,
|
||||
"_id": "Assets"
|
||||
},
|
||||
"Liabilities": {
|
||||
"chunk_ids": [
|
||||
"chunk-6fdc554482754c7c662adc7804d3cf0b"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779546631,
|
||||
"update_time": 1779546631,
|
||||
"_id": "Liabilities"
|
||||
},
|
||||
"Owner's Equity": {
|
||||
"chunk_ids": [
|
||||
"chunk-6fdc554482754c7c662adc7804d3cf0b"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779546631,
|
||||
"update_time": 1779546631,
|
||||
"_id": "Owner's Equity"
|
||||
},
|
||||
"Revenue": {
|
||||
"chunk_ids": [
|
||||
"chunk-6fdc554482754c7c662adc7804d3cf0b"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779546631,
|
||||
"update_time": 1779546631,
|
||||
"_id": "Revenue"
|
||||
},
|
||||
"Expenses": {
|
||||
"chunk_ids": [
|
||||
"chunk-6fdc554482754c7c662adc7804d3cf0b"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779546631,
|
||||
"update_time": 1779546631,
|
||||
"_id": "Expenses"
|
||||
},
|
||||
"Profit": {
|
||||
"chunk_ids": [
|
||||
"chunk-6fdc554482754c7c662adc7804d3cf0b"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779546631,
|
||||
"update_time": 1779546631,
|
||||
"_id": "Profit"
|
||||
},
|
||||
"Accounting Equation": {
|
||||
"chunk_ids": [
|
||||
"chunk-6fdc554482754c7c662adc7804d3cf0b"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779546631,
|
||||
"update_time": 1779546631,
|
||||
"_id": "Accounting Equation"
|
||||
},
|
||||
"Common Accounting Items": {
|
||||
"chunk_ids": [
|
||||
"chunk-6fdc554482754c7c662adc7804d3cf0b"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779546631,
|
||||
"update_time": 1779546631,
|
||||
"_id": "Common Accounting Items"
|
||||
},
|
||||
"Assets Category": {
|
||||
"chunk_ids": [
|
||||
"chunk-6fdc554482754c7c662adc7804d3cf0b"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779546631,
|
||||
"update_time": 1779546631,
|
||||
"_id": "Assets Category"
|
||||
},
|
||||
"Liabilities Category": {
|
||||
"chunk_ids": [
|
||||
"chunk-6fdc554482754c7c662adc7804d3cf0b"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779546631,
|
||||
"update_time": 1779546631,
|
||||
"_id": "Liabilities Category"
|
||||
},
|
||||
"Income And Expense Category": {
|
||||
"chunk_ids": [
|
||||
"chunk-6fdc554482754c7c662adc7804d3cf0b"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779546631,
|
||||
"update_time": 1779546631,
|
||||
"_id": "Income And Expense Category"
|
||||
},
|
||||
"Cash": {
|
||||
"chunk_ids": [
|
||||
"chunk-6fdc554482754c7c662adc7804d3cf0b"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779546631,
|
||||
"update_time": 1779546631,
|
||||
"_id": "Cash"
|
||||
},
|
||||
"Bank Deposits": {
|
||||
"chunk_ids": [
|
||||
"chunk-6fdc554482754c7c662adc7804d3cf0b"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779546631,
|
||||
"update_time": 1779546631,
|
||||
"_id": "Bank Deposits"
|
||||
},
|
||||
"Accounts Receivable": {
|
||||
"chunk_ids": [
|
||||
"chunk-6fdc554482754c7c662adc7804d3cf0b"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779546631,
|
||||
"update_time": 1779546631,
|
||||
"_id": "Accounts Receivable"
|
||||
},
|
||||
"Fixed Assets": {
|
||||
"chunk_ids": [
|
||||
"chunk-6fdc554482754c7c662adc7804d3cf0b"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779546631,
|
||||
"update_time": 1779546631,
|
||||
"_id": "Fixed Assets"
|
||||
},
|
||||
"Accounts Payable": {
|
||||
"chunk_ids": [
|
||||
"chunk-6fdc554482754c7c662adc7804d3cf0b"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779546631,
|
||||
"update_time": 1779546631,
|
||||
"_id": "Accounts Payable"
|
||||
},
|
||||
"Taxes Payable": {
|
||||
"chunk_ids": [
|
||||
"chunk-6fdc554482754c7c662adc7804d3cf0b"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779546631,
|
||||
"update_time": 1779546631,
|
||||
"_id": "Taxes Payable"
|
||||
},
|
||||
"Employee Compensation Payable": {
|
||||
"chunk_ids": [
|
||||
"chunk-6fdc554482754c7c662adc7804d3cf0b"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779546631,
|
||||
"update_time": 1779546631,
|
||||
"_id": "Employee Compensation Payable"
|
||||
},
|
||||
"Main Business Revenue": {
|
||||
"chunk_ids": [
|
||||
"chunk-6fdc554482754c7c662adc7804d3cf0b"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779546631,
|
||||
"update_time": 1779546631,
|
||||
"_id": "Main Business Revenue"
|
||||
},
|
||||
"Management Expenses": {
|
||||
"chunk_ids": [
|
||||
"chunk-6fdc554482754c7c662adc7804d3cf0b"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779546631,
|
||||
"update_time": 1779546631,
|
||||
"_id": "Management Expenses"
|
||||
},
|
||||
"Sales Expenses": {
|
||||
"chunk_ids": [
|
||||
"chunk-6fdc554482754c7c662adc7804d3cf0b"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779546631,
|
||||
"update_time": 1779546631,
|
||||
"_id": "Sales Expenses"
|
||||
},
|
||||
"Tax Fundamentals": {
|
||||
"chunk_ids": [
|
||||
"chunk-6fdc554482754c7c662adc7804d3cf0b"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779546631,
|
||||
"update_time": 1779546631,
|
||||
"_id": "Tax Fundamentals"
|
||||
},
|
||||
"Major Tax Types": {
|
||||
"chunk_ids": [
|
||||
"chunk-6fdc554482754c7c662adc7804d3cf0b"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779546632,
|
||||
"update_time": 1779546632,
|
||||
"_id": "Major Tax Types"
|
||||
},
|
||||
"Value Added Tax": {
|
||||
"chunk_ids": [
|
||||
"chunk-6fdc554482754c7c662adc7804d3cf0b"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779546632,
|
||||
"update_time": 1779546632,
|
||||
"_id": "Value Added Tax"
|
||||
},
|
||||
"Individual Income Tax": {
|
||||
"chunk_ids": [
|
||||
"chunk-6fdc554482754c7c662adc7804d3cf0b"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779546632,
|
||||
"update_time": 1779546632,
|
||||
"_id": "Individual Income Tax"
|
||||
},
|
||||
"Stamp Duty": {
|
||||
"chunk_ids": [
|
||||
"chunk-6fdc554482754c7c662adc7804d3cf0b"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779546632,
|
||||
"update_time": 1779546632,
|
||||
"_id": "Stamp Duty"
|
||||
},
|
||||
"Software Services Tax Rate 6%": {
|
||||
"chunk_ids": [
|
||||
"chunk-6fdc554482754c7c662adc7804d3cf0b"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779546632,
|
||||
"update_time": 1779546632,
|
||||
"_id": "Software Services Tax Rate 6%"
|
||||
},
|
||||
"Software Product Sales Tax Rate 13%": {
|
||||
"chunk_ids": [
|
||||
"chunk-6fdc554482754c7c662adc7804d3cf0b"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779546632,
|
||||
"update_time": 1779546632,
|
||||
"_id": "Software Product Sales Tax Rate 13%"
|
||||
},
|
||||
"Corporate Income Tax Rate 25%": {
|
||||
"chunk_ids": [
|
||||
"chunk-6fdc554482754c7c662adc7804d3cf0b"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779546632,
|
||||
"update_time": 1779546632,
|
||||
"_id": "Corporate Income Tax Rate 25%"
|
||||
},
|
||||
"High-Tech Enterprise Preferential Tax Rate 15%": {
|
||||
"chunk_ids": [
|
||||
"chunk-6fdc554482754c7c662adc7804d3cf0b"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779546632,
|
||||
"update_time": 1779546632,
|
||||
"_id": "High-Tech Enterprise Preferential Tax Rate 15%"
|
||||
},
|
||||
"所有者权益": {
|
||||
"chunk_ids": [
|
||||
"chunk-f894acfbb6c681d00f75cf9c486d491b"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779546632,
|
||||
"update_time": 1779546632,
|
||||
"_id": "所有者权益"
|
||||
},
|
||||
"常用会计科目": {
|
||||
"chunk_ids": [
|
||||
"chunk-f894acfbb6c681d00f75cf9c486d491b"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779546632,
|
||||
"update_time": 1779546632,
|
||||
"_id": "常用会计科目"
|
||||
}
|
||||
}
|
||||
@@ -20,13 +20,6 @@
|
||||
"update_time": 1779441661,
|
||||
"_id": "c7601043d9944ef2bcf4d3f67ed253f7"
|
||||
},
|
||||
"b0277cd76034437997fbf5219662725a": {
|
||||
"content": "远光软件股份有限公司\n财务基础知识手册\n第一部分 会计基础知识\n一、会计要素\n会计要素包括:资产、负债、所有者权益、收入、费用和利润。\n会计恒等式:资产 = 负债 + 所有者权益\n二、常用会计科目\n科目类别\n科目名称\n说明\n资产类\n库存现金\n公司持有的现金\n资产类\n银行存款\n存放在银行的资金\n资产类\n应收账款\n因销售商品或提供劳务应收的款项\n资产类\n固定资产\n使用年限超过一年的有形资产\n负债类\n应付账款\n因购买商品或接受劳务应付的款项\n负债类\n应交税费\n应缴纳的各种税费\n负债类\n应付职工薪酬\n应付给职工的工资、福利等\n损益类\n主营业务收入\n主要经营业务产生的收入\n损益类\n管理费用\n为管理生产经营发生的费用\n损益类\n销售费用\n为销售产品发生的费用\n第二部分 税务基础知识\n三、主要税种介绍\n(一)增值税:公司为一般纳税人,软件服务适用6%税率,软件产品销售适用13%税率。\n(二)企业所得税:税率为25%,高新技术企业享受15%优惠税率。\n(三)个人所得税:按累进税率3%-45%,由公司代扣代缴。\n(四)印花税:对经济活动中的应税凭证征收。\n第三部分 财务报表解读\n四、三大财务报表\n(一)资产负债表:反映企业在某一特定日期的财务状况。\n(二)利润表:反映企业在一定期间的经营成果。\n(三)现金流量表:反映企业在一定期间现金和现金等价物的流入和流出。\n\n# 章节导航\n\n以下内容由入库阶段从制度原文中提取,供检索时优先理解制度层级、条目和标准所在章节。\n\n- 一、会计要素\n- 二、常用会计科目\n- (四)印花税:对经济活动中的应税凭证征收。\n\n# 重点章节摘录\n\n## 一、会计要素\n\n会计要素包括:资产、负债、所有者权益、收入、费用和利润。;会计恒等式:资产 = 负债 + 所有者权益\n\n## 二、常用会计科目\n\n科目类别;科目名称;说明\n\n## (四)印花税:对经济活动中的应税凭证征收。\n\n第三部分 财务报表解读\n\n# 问答线索补充\n\n以下内容由入库阶段根据章节标题、条款、列表、键值对与相邻正文提炼,供问答检索时优先命中更短、更直接的制度依据。\n\n- 一、会计要素:会计要素包括:资产、负债、所有者权益、收入、费用和利润\n- 一、会计要素:会计恒等式:资产 = 负债 + 所有者权益\n- 二、常用会计科目:因销售商品或提供劳务应收的款项\n- 二、常用会计科目:因购买商品或接受劳务应付的款项\n- 二、常用会计科目:应缴纳的各种税费\n- 二、常用会计科目:应付职工薪酬\n- (四)印花税:对经济活动中的应税凭证征收。:第三部分 财务报表解读",
|
||||
"file_path": "/app/server/storage/knowledge/财务知识库/远光软件财务基础知识手册.docx",
|
||||
"create_time": 1779441751,
|
||||
"update_time": 1779441751,
|
||||
"_id": "b0277cd76034437997fbf5219662725a"
|
||||
},
|
||||
"23f56f159a3e4bc3b2338056544120dd": {
|
||||
"content": "远光软件股份有限公司\n财务术语解释手册\n权责发生制\n以权利和责任的发生来决定收入和费用归属期的会计基础。即凡是当期已经实现的收入和已经发生或应当负担的费用,不论款项是否收付,都应当作为当期的收入和费用。\n收付实现制\n以现金收到或付出为标准来记录收入的实现和费用的发生。即凡是当期收到和支付的现金,都作为当期的收入和费用。\n固定资产折旧\n固定资产在使用过程中因磨损而逐渐转移的价值。公司采用年限平均法计提折旧。\n摊销\n将无形资产或长期待摊费用按照规定期限分期计入当期损益的过程。\n增值税进项税额\n企业购进货物、接受应税劳务或应税服务支付的增值税额,可以从销项税额中抵扣。\n增值税销项税额\n企业销售货物、提供应税劳务或应税服务收取的增值税额。\n预算\n企业对未来一定时期内经营活动的数量化计划,包括收入预算、支出预算、资本预算等。\n现金流\n企业在一定期间内现金和现金等价物流入和流出的数量。\n毛利率\n毛利润占营业收入的百分比,反映企业产品或服务的初始盈利能力。计算公式:毛利率 = (营业收入 - 营业成本)/ 营业收入 × 100%\n净资产收益率(ROE)\n净利润占股东权益的百分比,反映股东投入资金的获利能力。计算公式:ROE = 净利润 / 股东权益 × 100%\n成本中心\n企业内部只发生成本费用而不产生收入的组织单位,用于成本核算和控制。\n利润中心\n企业内部既发生成本费用又产生收入的组织单位,用于考核盈利能力。\n\n# 问答线索补充\n\n以下内容由入库阶段根据章节标题、条款、列表、键值对与相邻正文提炼,供问答检索时优先命中更短、更直接的制度依据。\n\n- 正文:以权利和责任的发生来决定收入和费用归属期的会计基础\n- 正文:即凡是当期已经实现的收入和已经发生或应当负担的费用,不论款项是否收付,都应当作为当期的收入和费用\n- 正文:以现金收到或付出为标准来记录收入的实现和费用的发生\n- 正文:即凡是当期收到和支付的现金,都作为当期的收入和费用\n- 正文:企业购进货物、接受应税劳务或应税服务支付的增值税额,可以从销项税额中抵扣\n- 正文:企业销售货物、提供应税劳务或应税服务收取的增值税额\n- 正文:毛利润占营业收入的百分比,反映企业产品或服务的初始盈利能力\n- 正文:计算公式:毛利率 = (营业收入 - 营业成本)/ 营业收入 × 100%\n- 正文:净利润占股东权益的百分比,反映股东投入资金的获利能力\n- 正文:计算公式:ROE = 净利润 / 股东权益 × 100%",
|
||||
"file_path": "/app/server/storage/knowledge/财务知识库/远光软件财务术语解释手册.docx",
|
||||
@@ -257,5 +250,12 @@
|
||||
"create_time": 1779467725,
|
||||
"update_time": 1779467725,
|
||||
"_id": "3acd9c2df63b4a438c7eab876269b25d"
|
||||
},
|
||||
"b0277cd76034437997fbf5219662725a": {
|
||||
"content": "远光软件股份有限公司 财务基础知识手册\n\n第一部分 会计基础知识\n\n一、会计要素\n\n会计要素包括:资产、负债、所有者权益、收入、费用和利润。\n\n会计恒等式:资产 = 负债 + 所有者权益\n\n二、常用会计科目\n\n| 科目类别 | 科目名称 | 说明 |\n| --- | --- | --- |\n| 资产类 | 库存现金 | 公司持有的现金 |\n| 资产类 | 银行存款 | 存放在银行的资金 |\n| 资产类 | 应收账款 | 因销售商品或提供劳务应收的款项 |\n| 资产类 | 固定资产 | 使用年限超过一年的有形资产 |\n| 负债类 | 应付账款 | 因购买商品或接受劳务应付的款项 |\n| 负债类 | 应交税费 | 应缴纳的各种税费 |\n| 负债类 | 应付职工薪酬 | 应付给职工的工资、福利等 |\n| 损益类 | 主营业务收入 | 主要经营业务产生的收入 |\n| 损益类 | 管理费用 | 为管理生产经营发生的费用 |\n| 损益类 | 销售费用 | 为销售产品发生的费用 |\n\n### 表格行级检索线索\n\n- 表格第 2 行:科目类别=资产类;科目名称=库存现金;说明=公司持有的现金\n\n- 表格第 3 行:科目类别=资产类;科目名称=银行存款;说明=存放在银行的资金\n\n- 表格第 4 行:科目类别=资产类;科目名称=应收账款;说明=因销售商品或提供劳务应收的款项\n\n- 表格第 5 行:科目类别=资产类;科目名称=固定资产;说明=使用年限超过一年的有形资产\n\n- 表格第 6 行:科目类别=负债类;科目名称=应付账款;说明=因购买商品或接受劳务应付的款项\n\n- 表格第 7 行:科目类别=负债类;科目名称=应交税费;说明=应缴纳的各种税费\n\n- 表格第 8 行:科目类别=负债类;科目名称=应付职工薪酬;说明=应付给职工的工资、福利等\n\n- 表格第 9 行:科目类别=损益类;科目名称=主营业务收入;说明=主要经营业务产生的收入\n\n- 表格第 10 行:科目类别=损益类;科目名称=管理费用;说明=为管理生产经营发生的费用\n\n- 表格第 11 行:科目类别=损益类;科目名称=销售费用;说明=为销售产品发生的费用\n\n第二部分 税务基础知识\n\n三、主要税种介绍\n\n(一)增值税:公司为一般纳税人,软件服务适用6%税率,软件产品销售适用13%税率。\n\n(二)企业所得税:税率为25%,高新技术企业享受15%优惠税率。\n\n(三)个人所得税:按累进税率3%-45%,由公司代扣代缴。\n\n(四)印花税:对经济活动中的应税凭证征收。\n\n第三部分 财务报表解读\n\n四、三大财务报表\n\n(一)资产负债表:反映企业在某一特定日期的财务状况。\n\n(二)利润表:反映企业在一定期间的经营成果。\n\n(三)现金流量表:反映企业在一定期间现金和现金等价物的流入和流出。\n\n# 章节导航\n\n以下内容由入库阶段从制度原文中提取,供检索时优先理解制度层级、条目和标准所在章节。\n\n- 一、会计要素\n- 二、常用会计科目\n\n# 重点章节摘录\n\n## 一、会计要素\n\n会计要素包括:资产、负债、所有者权益、收入、费用和利润。;会计恒等式:资产 = 负债 + 所有者权益\n\n## 二、常用会计科目\n\n| 科目类别 | 科目名称 | 说明 |;| --- | --- | --- |;| 资产类 | 库存现金 | 公司持有的现金 |\n\n# 问答线索补充\n\n以下内容由入库阶段根据章节标题、条款、列表、键值对与相邻正文提炼,供问答检索时优先命中更短、更直接的制度依据。\n\n- 一、会计要素:会计要素包括:资产、负债、所有者权益、收入、费用和利润\n- 一、会计要素:会计恒等式:资产 = 负债 + 所有者权益\n- 二、常用会计科目:表格第 2 行:科目类别=资产类\n- 二、常用会计科目:表格第 3 行:科目类别=资产类\n- 二、常用会计科目:表格第 4 行:科目类别=资产类\n- 二、常用会计科目:科目名称=应收账款",
|
||||
"file_path": "/app/server/storage/knowledge/财务知识库/远光软件财务基础知识手册.docx",
|
||||
"create_time": 1779546577,
|
||||
"update_time": 1779546577,
|
||||
"_id": "b0277cd76034437997fbf5219662725a"
|
||||
}
|
||||
}
|
||||
@@ -432,45 +432,6 @@
|
||||
"update_time": 1779441745,
|
||||
"_id": "c7601043d9944ef2bcf4d3f67ed253f7"
|
||||
},
|
||||
"b0277cd76034437997fbf5219662725a": {
|
||||
"entity_names": [
|
||||
"固定资产",
|
||||
"财务报表解读",
|
||||
"银行存款",
|
||||
"收入",
|
||||
"负债",
|
||||
"现金流量表",
|
||||
"企业所得税",
|
||||
"三大财务报表",
|
||||
"会计恒等式",
|
||||
"库存现金",
|
||||
"所有者权益",
|
||||
"费用",
|
||||
"财务基础知识手册",
|
||||
"应付账款",
|
||||
"利润表",
|
||||
"会计基础知识",
|
||||
"应收账款",
|
||||
"应交税费",
|
||||
"主营业务收入",
|
||||
"资产",
|
||||
"管理费用",
|
||||
"税务基础知识",
|
||||
"应付职工薪酬",
|
||||
"销售费用",
|
||||
"印花税",
|
||||
"资产负债表",
|
||||
"个人所得税",
|
||||
"会计要素",
|
||||
"远光软件股份有限公司",
|
||||
"利润",
|
||||
"增值税"
|
||||
],
|
||||
"count": 31,
|
||||
"create_time": 1779441772,
|
||||
"update_time": 1779441772,
|
||||
"_id": "b0277cd76034437997fbf5219662725a"
|
||||
},
|
||||
"23f56f159a3e4bc3b2338056544120dd": {
|
||||
"entity_names": [
|
||||
"净利润",
|
||||
@@ -1890,5 +1851,50 @@
|
||||
"create_time": 1779467727,
|
||||
"update_time": 1779467727,
|
||||
"_id": "3acd9c2df63b4a438c7eab876269b25d"
|
||||
},
|
||||
"b0277cd76034437997fbf5219662725a": {
|
||||
"entity_names": [
|
||||
"Fixed Assets",
|
||||
"Stamp Duty",
|
||||
"Liabilities",
|
||||
"Revenue",
|
||||
"Management Expenses",
|
||||
"Accounts Receivable",
|
||||
"Profit",
|
||||
"Income And Expense Category",
|
||||
"Software Product Sales Tax Rate 13%",
|
||||
"Expenses",
|
||||
"High-Tech Enterprise Preferential Tax Rate 15%",
|
||||
"Software Services Tax Rate 6%",
|
||||
"应收账款",
|
||||
"所有者权益",
|
||||
"Accounting Equation",
|
||||
"资产类",
|
||||
"Main Business Revenue",
|
||||
"Accounts Payable",
|
||||
"Employee Compensation Payable",
|
||||
"Individual Income Tax",
|
||||
"Liabilities Category",
|
||||
"Value Added Tax",
|
||||
"Bank Deposits",
|
||||
"Common Accounting Items",
|
||||
"常用会计科目",
|
||||
"Owner's Equity",
|
||||
"Accounting Elements",
|
||||
"Assets Category",
|
||||
"Corporate Income Tax Rate 25%",
|
||||
"Yuan Guang Software Co., Ltd.",
|
||||
"Assets",
|
||||
"Taxes Payable",
|
||||
"Corporate Income Tax",
|
||||
"Tax Fundamentals",
|
||||
"Cash",
|
||||
"Sales Expenses",
|
||||
"Major Tax Types"
|
||||
],
|
||||
"count": 37,
|
||||
"create_time": 1779546633,
|
||||
"update_time": 1779546633,
|
||||
"_id": "b0277cd76034437997fbf5219662725a"
|
||||
}
|
||||
}
|
||||
@@ -355,34 +355,6 @@
|
||||
"update_time": 1779441745,
|
||||
"_id": "c7601043d9944ef2bcf4d3f67ed253f7"
|
||||
},
|
||||
"b0277cd76034437997fbf5219662725a": {
|
||||
"relation_pairs": [
|
||||
[
|
||||
"会计要素",
|
||||
"资产"
|
||||
],
|
||||
[
|
||||
"财务基础知识手册",
|
||||
"远光软件股份有限公司"
|
||||
],
|
||||
[
|
||||
"财务基础知识手册",
|
||||
"财务报表解读"
|
||||
],
|
||||
[
|
||||
"税务基础知识",
|
||||
"财务基础知识手册"
|
||||
],
|
||||
[
|
||||
"会计基础知识",
|
||||
"财务基础知识手册"
|
||||
]
|
||||
],
|
||||
"count": 5,
|
||||
"create_time": 1779441772,
|
||||
"update_time": 1779441772,
|
||||
"_id": "b0277cd76034437997fbf5219662725a"
|
||||
},
|
||||
"23f56f159a3e4bc3b2338056544120dd": {
|
||||
"relation_pairs": [
|
||||
[
|
||||
@@ -2194,5 +2166,21 @@
|
||||
"create_time": 1779467727,
|
||||
"update_time": 1779467727,
|
||||
"_id": "3acd9c2df63b4a438c7eab876269b25d"
|
||||
},
|
||||
"b0277cd76034437997fbf5219662725a": {
|
||||
"relation_pairs": [
|
||||
[
|
||||
"应收账款",
|
||||
"资产类"
|
||||
],
|
||||
[
|
||||
"常用会计科目",
|
||||
"资产类"
|
||||
]
|
||||
],
|
||||
"count": 2,
|
||||
"create_time": 1779546633,
|
||||
"update_time": 1779546633,
|
||||
"_id": "b0277cd76034437997fbf5219662725a"
|
||||
}
|
||||
}
|
||||
@@ -746,51 +746,6 @@
|
||||
"update_time": 1779441745,
|
||||
"_id": "1221其他应收款<SEP>远光软件股份有限公司"
|
||||
},
|
||||
"财务基础知识手册<SEP>远光软件股份有限公司": {
|
||||
"chunk_ids": [
|
||||
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779441770,
|
||||
"update_time": 1779441770,
|
||||
"_id": "财务基础知识手册<SEP>远光软件股份有限公司"
|
||||
},
|
||||
"会计要素<SEP>资产": {
|
||||
"chunk_ids": [
|
||||
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779441770,
|
||||
"update_time": 1779441770,
|
||||
"_id": "会计要素<SEP>资产"
|
||||
},
|
||||
"会计基础知识<SEP>财务基础知识手册": {
|
||||
"chunk_ids": [
|
||||
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779441771,
|
||||
"update_time": 1779441771,
|
||||
"_id": "会计基础知识<SEP>财务基础知识手册"
|
||||
},
|
||||
"税务基础知识<SEP>财务基础知识手册": {
|
||||
"chunk_ids": [
|
||||
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779441771,
|
||||
"update_time": 1779441771,
|
||||
"_id": "税务基础知识<SEP>财务基础知识手册"
|
||||
},
|
||||
"财务基础知识手册<SEP>财务报表解读": {
|
||||
"chunk_ids": [
|
||||
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779441771,
|
||||
"update_time": 1779441771,
|
||||
"_id": "财务基础知识手册<SEP>财务报表解读"
|
||||
},
|
||||
"财务术语解释手册<SEP>远光软件股份有限公司": {
|
||||
"chunk_ids": [
|
||||
"chunk-2ee7e2a66cb544bdfe1b09e133863ad1"
|
||||
@@ -4230,5 +4185,23 @@
|
||||
"create_time": 1779467726,
|
||||
"update_time": 1779467726,
|
||||
"_id": "第三部分发票问题<SEP>财务报销常见问题解答"
|
||||
},
|
||||
"常用会计科目<SEP>资产类": {
|
||||
"chunk_ids": [
|
||||
"chunk-f894acfbb6c681d00f75cf9c486d491b"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779546632,
|
||||
"update_time": 1779546632,
|
||||
"_id": "常用会计科目<SEP>资产类"
|
||||
},
|
||||
"应收账款<SEP>资产类": {
|
||||
"chunk_ids": [
|
||||
"chunk-f894acfbb6c681d00f75cf9c486d491b"
|
||||
],
|
||||
"count": 1,
|
||||
"create_time": 1779546633,
|
||||
"update_time": 1779546633,
|
||||
"_id": "应收账款<SEP>资产类"
|
||||
}
|
||||
}
|
||||
@@ -197,17 +197,6 @@
|
||||
"update_time": 1779441661,
|
||||
"_id": "chunk-e726f44fb0287c5192cf61b350f18abb"
|
||||
},
|
||||
"chunk-78edb0c8ccc8238159196ecaeeb08d43": {
|
||||
"tokens": 839,
|
||||
"content": "远光软件股份有限公司\n财务基础知识手册\n第一部分 会计基础知识\n一、会计要素\n会计要素包括:资产、负债、所有者权益、收入、费用和利润。\n会计恒等式:资产 = 负债 + 所有者权益\n二、常用会计科目\n科目类别\n科目名称\n说明\n资产类\n库存现金\n公司持有的现金\n资产类\n银行存款\n存放在银行的资金\n资产类\n应收账款\n因销售商品或提供劳务应收的款项\n资产类\n固定资产\n使用年限超过一年的有形资产\n负债类\n应付账款\n因购买商品或接受劳务应付的款项\n负债类\n应交税费\n应缴纳的各种税费\n负债类\n应付职工薪酬\n应付给职工的工资、福利等\n损益类\n主营业务收入\n主要经营业务产生的收入\n损益类\n管理费用\n为管理生产经营发生的费用\n损益类\n销售费用\n为销售产品发生的费用\n第二部分 税务基础知识\n三、主要税种介绍\n(一)增值税:公司为一般纳税人,软件服务适用6%税率,软件产品销售适用13%税率。\n(二)企业所得税:税率为25%,高新技术企业享受15%优惠税率。\n(三)个人所得税:按累进税率3%-45%,由公司代扣代缴。\n(四)印花税:对经济活动中的应税凭证征收。\n第三部分 财务报表解读\n四、三大财务报表\n(一)资产负债表:反映企业在某一特定日期的财务状况。\n(二)利润表:反映企业在一定期间的经营成果。\n(三)现金流量表:反映企业在一定期间现金和现金等价物的流入和流出。\n\n# 章节导航\n\n以下内容由入库阶段从制度原文中提取,供检索时优先理解制度层级、条目和标准所在章节。\n\n- 一、会计要素\n- 二、常用会计科目\n- (四)印花税:对经济活动中的应税凭证征收。\n\n# 重点章节摘录\n\n## 一、会计要素\n\n会计要素包括:资产、负债、所有者权益、收入、费用和利润。;会计恒等式:资产 = 负债 + 所有者权益\n\n## 二、常用会计科目\n\n科目类别;科目名称;说明\n\n## (四)印花税:对经济活动中的应税凭证征收。\n\n第三部分 财务报表解读\n\n# 问答线索补充\n\n以下内容由入库阶段根据章节标题、条款、列表、键值对与相邻正文提炼,供问答检索时优先命中更短、更直接的制度依据。\n\n- 一、会计要素:会计要素包括:资产、负债、所有者权益、收入、费用和利润\n- 一、会计要素:会计恒等式:资产 = 负债 + 所有者权益\n- 二、常用会计科目:因销售商品或提供劳务应收的款项\n- 二、常用会计科目:因购买商品或接受劳务应付的款项\n- 二、常用会计科目:应缴纳的各种税费\n- 二、常用会计科目:应付职工薪酬\n- (四)印花税:对经济活动中的应税凭证征收。:第三部分 财务报表解读",
|
||||
"chunk_order_index": 0,
|
||||
"full_doc_id": "b0277cd76034437997fbf5219662725a",
|
||||
"file_path": "/app/server/storage/knowledge/财务知识库/远光软件财务基础知识手册.docx",
|
||||
"llm_cache_list": [],
|
||||
"create_time": 1779441751,
|
||||
"update_time": 1779441751,
|
||||
"_id": "chunk-78edb0c8ccc8238159196ecaeeb08d43"
|
||||
},
|
||||
"chunk-2ee7e2a66cb544bdfe1b09e133863ad1": {
|
||||
"tokens": 760,
|
||||
"content": "远光软件股份有限公司\n财务术语解释手册\n权责发生制\n以权利和责任的发生来决定收入和费用归属期的会计基础。即凡是当期已经实现的收入和已经发生或应当负担的费用,不论款项是否收付,都应当作为当期的收入和费用。\n收付实现制\n以现金收到或付出为标准来记录收入的实现和费用的发生。即凡是当期收到和支付的现金,都作为当期的收入和费用。\n固定资产折旧\n固定资产在使用过程中因磨损而逐渐转移的价值。公司采用年限平均法计提折旧。\n摊销\n将无形资产或长期待摊费用按照规定期限分期计入当期损益的过程。\n增值税进项税额\n企业购进货物、接受应税劳务或应税服务支付的增值税额,可以从销项税额中抵扣。\n增值税销项税额\n企业销售货物、提供应税劳务或应税服务收取的增值税额。\n预算\n企业对未来一定时期内经营活动的数量化计划,包括收入预算、支出预算、资本预算等。\n现金流\n企业在一定期间内现金和现金等价物流入和流出的数量。\n毛利率\n毛利润占营业收入的百分比,反映企业产品或服务的初始盈利能力。计算公式:毛利率 = (营业收入 - 营业成本)/ 营业收入 × 100%\n净资产收益率(ROE)\n净利润占股东权益的百分比,反映股东投入资金的获利能力。计算公式:ROE = 净利润 / 股东权益 × 100%\n成本中心\n企业内部只发生成本费用而不产生收入的组织单位,用于成本核算和控制。\n利润中心\n企业内部既发生成本费用又产生收入的组织单位,用于考核盈利能力。\n\n# 问答线索补充\n\n以下内容由入库阶段根据章节标题、条款、列表、键值对与相邻正文提炼,供问答检索时优先命中更短、更直接的制度依据。\n\n- 正文:以权利和责任的发生来决定收入和费用归属期的会计基础\n- 正文:即凡是当期已经实现的收入和已经发生或应当负担的费用,不论款项是否收付,都应当作为当期的收入和费用\n- 正文:以现金收到或付出为标准来记录收入的实现和费用的发生\n- 正文:即凡是当期收到和支付的现金,都作为当期的收入和费用\n- 正文:企业购进货物、接受应税劳务或应税服务支付的增值税额,可以从销项税额中抵扣\n- 正文:企业销售货物、提供应税劳务或应税服务收取的增值税额\n- 正文:毛利润占营业收入的百分比,反映企业产品或服务的初始盈利能力\n- 正文:计算公式:毛利率 = (营业收入 - 营业成本)/ 营业收入 × 100%\n- 正文:净利润占股东权益的百分比,反映股东投入资金的获利能力\n- 正文:计算公式:ROE = 净利润 / 股东权益 × 100%",
|
||||
@@ -889,5 +878,27 @@
|
||||
"create_time": 1779467725,
|
||||
"update_time": 1779467725,
|
||||
"_id": "chunk-cfac1ddf5942f8fe2d5a296380818faf"
|
||||
},
|
||||
"chunk-6fdc554482754c7c662adc7804d3cf0b": {
|
||||
"tokens": 1195,
|
||||
"content": "远光软件股份有限公司 财务基础知识手册\n\n第一部分 会计基础知识\n\n一、会计要素\n\n会计要素包括:资产、负债、所有者权益、收入、费用和利润。\n\n会计恒等式:资产 = 负债 + 所有者权益\n\n二、常用会计科目\n\n| 科目类别 | 科目名称 | 说明 |\n| --- | --- | --- |\n| 资产类 | 库存现金 | 公司持有的现金 |\n| 资产类 | 银行存款 | 存放在银行的资金 |\n| 资产类 | 应收账款 | 因销售商品或提供劳务应收的款项 |\n| 资产类 | 固定资产 | 使用年限超过一年的有形资产 |\n| 负债类 | 应付账款 | 因购买商品或接受劳务应付的款项 |\n| 负债类 | 应交税费 | 应缴纳的各种税费 |\n| 负债类 | 应付职工薪酬 | 应付给职工的工资、福利等 |\n| 损益类 | 主营业务收入 | 主要经营业务产生的收入 |\n| 损益类 | 管理费用 | 为管理生产经营发生的费用 |\n| 损益类 | 销售费用 | 为销售产品发生的费用 |\n\n### 表格行级检索线索\n\n- 表格第 2 行:科目类别=资产类;科目名称=库存现金;说明=公司持有的现金\n\n- 表格第 3 行:科目类别=资产类;科目名称=银行存款;说明=存放在银行的资金\n\n- 表格第 4 行:科目类别=资产类;科目名称=应收账款;说明=因销售商品或提供劳务应收的款项\n\n- 表格第 5 行:科目类别=资产类;科目名称=固定资产;说明=使用年限超过一年的有形资产\n\n- 表格第 6 行:科目类别=负债类;科目名称=应付账款;说明=因购买商品或接受劳务应付的款项\n\n- 表格第 7 行:科目类别=负债类;科目名称=应交税费;说明=应缴纳的各种税费\n\n- 表格第 8 行:科目类别=负债类;科目名称=应付职工薪酬;说明=应付给职工的工资、福利等\n\n- 表格第 9 行:科目类别=损益类;科目名称=主营业务收入;说明=主要经营业务产生的收入\n\n- 表格第 10 行:科目类别=损益类;科目名称=管理费用;说明=为管理生产经营发生的费用\n\n- 表格第 11 行:科目类别=损益类;科目名称=销售费用;说明=为销售产品发生的费用\n\n第二部分 税务基础知识\n\n三、主要税种介绍\n\n(一)增值税:公司为一般纳税人,软件服务适用6%税率,软件产品销售适用13%税率。\n\n(二)企业所得税:税率为25%,高新技术企业享受15%优惠税率。\n\n(三)个人所得税:按累进税率3%-45%,由公司代扣代缴。\n\n(四)印花税:对经济活动中的应税凭证征收。\n\n第三部分 财务报表解读\n\n四、三大财务报表\n\n(一)资产负债表:反映企业在某一特定日期的财务状况。\n\n(二)利润表:反映企业在一定期间的经营成果。\n\n(三)现金流量表:反映企业在一定期间现金和现金等价物的流入和流出。\n\n# 章节导航\n\n以下内容由入库阶段从制度原文中提取,供检索时优先理解制度层级、条目和标准所在章节。\n\n- 一、会计要素\n- 二、常用会计科目\n\n# 重点章节摘录\n\n## 一、会计要素\n\n会计要素包括:资产、负债、所有者权益、收入、费用和利润。;会计恒等式:资产 = 负债 + 所有者权益\n\n## 二、常用会计科目\n\n| 科目类别 | 科目名称 | 说明 |;| --- | --- | --- |;| 资产类 | 库存现金 | 公司持有的现金 |\n\n# 问答线索补充\n\n以下内容由入库阶段根据章节标题、条款、列表、键值对与相邻正文提炼,供问答检索时优先命中更短、更直接的制度依据。\n\n- 一、会计要素:会计要素包括:资产、负债、所有者权益、收入、费用和利润\n- 一、会计要素:会计恒等式:资产 = 负债 + 所有者权益\n- 二、常用会计科目:表格第 2 行:科目类别=资产类\n- 二、常用会计科目:表格第 3 行:科目类别=资产类\n- 二、常用会计科目:表格第 4 行:科目类别=资产类\n- 二、常用会计科目:科目名称=应收账款",
|
||||
"chunk_order_index": 0,
|
||||
"full_doc_id": "b0277cd76034437997fbf5219662725a",
|
||||
"file_path": "/app/server/storage/knowledge/财务知识库/远光软件财务基础知识手册.docx",
|
||||
"llm_cache_list": [],
|
||||
"create_time": 1779546577,
|
||||
"update_time": 1779546577,
|
||||
"_id": "chunk-6fdc554482754c7c662adc7804d3cf0b"
|
||||
},
|
||||
"chunk-f894acfbb6c681d00f75cf9c486d491b": {
|
||||
"tokens": 95,
|
||||
"content": "所有者权益\n- 二、常用会计科目:表格第 2 行:科目类别=资产类\n- 二、常用会计科目:表格第 3 行:科目类别=资产类\n- 二、常用会计科目:表格第 4 行:科目类别=资产类\n- 二、常用会计科目:科目名称=应收账款",
|
||||
"chunk_order_index": 1,
|
||||
"full_doc_id": "b0277cd76034437997fbf5219662725a",
|
||||
"file_path": "/app/server/storage/knowledge/财务知识库/远光软件财务基础知识手册.docx",
|
||||
"llm_cache_list": [],
|
||||
"create_time": 1779546577,
|
||||
"update_time": 1779546577,
|
||||
"_id": "chunk-f894acfbb6c681d00f75cf9c486d491b"
|
||||
}
|
||||
}
|
||||
@@ -5,6 +5,38 @@ from zipfile import ZipFile
|
||||
from app.services.knowledge_document_extractors import _extract_document_text_from_path
|
||||
|
||||
|
||||
def test_extract_docx_document_text_preserves_tables_as_markdown(tmp_path) -> None:
|
||||
file_path = tmp_path / "financial-basic.docx"
|
||||
_write_minimal_docx_with_table(
|
||||
file_path,
|
||||
paragraphs=[
|
||||
"远光软件股份有限公司",
|
||||
"财务基础知识手册",
|
||||
"二、常用会计科目",
|
||||
],
|
||||
table=[
|
||||
["科目类别", "科目名称", "说明"],
|
||||
["资产类", "库存现金", "公司持有的现金"],
|
||||
["负债类", "应付账款", "因购买商品或接受劳务应付的款项"],
|
||||
["损益类", "销售费用", "为销售产品发生的费用"],
|
||||
],
|
||||
)
|
||||
|
||||
text = _extract_document_text_from_path(
|
||||
file_path=file_path,
|
||||
original_name="远光软件财务基础知识手册.docx",
|
||||
mime_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
)
|
||||
|
||||
assert "二、常用会计科目" in text
|
||||
assert "| 科目类别 | 科目名称 | 说明 |" in text
|
||||
assert "| 资产类 | 库存现金 | 公司持有的现金 |" in text
|
||||
assert "| 负债类 | 应付账款 | 因购买商品或接受劳务应付的款项 |" in text
|
||||
assert "| 损益类 | 销售费用 | 为销售产品发生的费用 |" in text
|
||||
assert "表格第 2 行:科目类别=资产类;科目名称=库存现金;说明=公司持有的现金" in text
|
||||
assert "科目类别\n科目名称\n说明" not in text
|
||||
|
||||
|
||||
def test_extract_xlsx_document_text_builds_markdown_with_row_clues(tmp_path) -> None:
|
||||
file_path = tmp_path / "company-expense-rules.xlsx"
|
||||
_write_minimal_xlsx(
|
||||
@@ -58,6 +90,39 @@ def test_extract_pptx_document_text_builds_markdown_slides(tmp_path) -> None:
|
||||
assert "- 发票、审批、预算三项要素必须齐全" in text
|
||||
|
||||
|
||||
def _write_minimal_docx_with_table(
|
||||
file_path,
|
||||
*,
|
||||
paragraphs: list[str],
|
||||
table: list[list[str]],
|
||||
) -> None:
|
||||
paragraph_xml = "\n".join(f"<w:p>{_docx_text_run(text)}</w:p>" for text in paragraphs)
|
||||
table_xml = (
|
||||
"<w:tbl>"
|
||||
+ "".join(
|
||||
"<w:tr>"
|
||||
+ "".join(f"<w:tc><w:p>{_docx_text_run(cell)}</w:p></w:tc>" for cell in row)
|
||||
+ "</w:tr>"
|
||||
for row in table
|
||||
)
|
||||
+ "</w:tbl>"
|
||||
)
|
||||
document_xml = f"""<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
|
||||
<w:body>
|
||||
{paragraph_xml}
|
||||
{table_xml}
|
||||
</w:body>
|
||||
</w:document>
|
||||
"""
|
||||
with ZipFile(file_path, "w") as archive:
|
||||
archive.writestr("word/document.xml", document_xml)
|
||||
|
||||
|
||||
def _docx_text_run(text: str) -> str:
|
||||
return f"<w:r><w:t>{text}</w:t></w:r>"
|
||||
|
||||
|
||||
def _write_minimal_xlsx(file_path, *, sheet_name: str, rows: list[list[str]]) -> None:
|
||||
workbook_xml = f"""<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<workbook xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main"
|
||||
|
||||
@@ -58,6 +58,34 @@ def test_build_hits_boosts_query_term_matches() -> None:
|
||||
assert [item["candidate_id"] for item in hits] == ["ent-1", "travel-1"]
|
||||
|
||||
|
||||
def test_build_hits_keeps_long_query_anchor_terms_for_accounting_table() -> None:
|
||||
hits = KnowledgeRagService._build_hits_from_query_data(
|
||||
query="远光软件财务基础知识手册里的常用会计科目是什么?",
|
||||
chunks=[
|
||||
{
|
||||
"chunk_id": "generic-1",
|
||||
"file_path": "/tmp/doc-1__远光软件财务制度培训手册.docx",
|
||||
"content": "远光软件股份有限公司财务培训内容,介绍费用报销和财务制度。",
|
||||
},
|
||||
{
|
||||
"chunk_id": "accounts-1",
|
||||
"file_path": "/tmp/doc-2__远光软件财务基础知识手册.docx",
|
||||
"content": (
|
||||
"二、常用会计科目\n\n"
|
||||
"| 科目类别 | 科目名称 | 说明 |\n"
|
||||
"| --- | --- | --- |\n"
|
||||
"| 资产类 | 库存现金 | 公司持有的现金 |\n"
|
||||
"| 损益类 | 销售费用 | 为销售产品发生的费用 |"
|
||||
),
|
||||
},
|
||||
],
|
||||
entities=[],
|
||||
limit=2,
|
||||
)
|
||||
|
||||
assert [item["candidate_id"] for item in hits] == ["accounts-1", "generic-1"]
|
||||
|
||||
|
||||
def test_build_hits_prioritizes_answer_clue_appendix_for_rule_queries() -> None:
|
||||
hits = KnowledgeRagService._build_hits_from_query_data(
|
||||
query="报销时限是多少?",
|
||||
|
||||
@@ -589,6 +589,66 @@ def test_semantic_ontology_service_covers_common_expense_scene_keywords(
|
||||
)
|
||||
|
||||
|
||||
def test_semantic_ontology_service_connects_expense_application_to_ontology() -> None:
|
||||
session_factory = build_session_factory()
|
||||
with session_factory() as db:
|
||||
result = SemanticOntologyService(db).parse(
|
||||
OntologyParseRequest(
|
||||
query="申请2026-06-01 ~ 2026-06-03去北京做客户现场验收,差旅预算18000元",
|
||||
user_id="pytest",
|
||||
context_json={
|
||||
"document_type": "expense_application",
|
||||
"application_stage": "pre_approval",
|
||||
"entry_source": "documents_application",
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
assert result.scenario == "expense"
|
||||
assert result.intent == "draft"
|
||||
assert any(
|
||||
item.type == "document_type" and item.normalized_value == "expense_application"
|
||||
for item in result.entities
|
||||
)
|
||||
assert any(
|
||||
item.type == "workflow_stage" and item.normalized_value == "pre_approval"
|
||||
for item in result.entities
|
||||
)
|
||||
assert any(
|
||||
item.field == "document_type" and item.value == "expense_application"
|
||||
for item in result.constraints
|
||||
)
|
||||
assert any(
|
||||
item.type == "expense_type" and item.normalized_value == "travel"
|
||||
for item in result.entities
|
||||
)
|
||||
|
||||
|
||||
def test_semantic_ontology_service_requires_attachment_for_meeting_application() -> None:
|
||||
session_factory = build_session_factory()
|
||||
with session_factory() as db:
|
||||
result = SemanticOntologyService(db).parse(
|
||||
OntologyParseRequest(
|
||||
query="发起会务申请,2026-06-01 ~ 2026-06-02上海产品发布会,预算32000元",
|
||||
user_id="pytest",
|
||||
context_json={
|
||||
"document_type": "expense_application",
|
||||
"application_stage": "pre_approval",
|
||||
"entry_source": "documents_application",
|
||||
"attachment_count": 0,
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
assert result.scenario == "expense"
|
||||
assert result.intent == "draft"
|
||||
assert any(
|
||||
item.type == "expense_type" and item.normalized_value == "meeting"
|
||||
for item in result.entities
|
||||
)
|
||||
assert "attachments" in result.missing_slots
|
||||
|
||||
|
||||
def test_semantic_ontology_service_uses_model_parse_when_available(monkeypatch) -> None:
|
||||
session_factory = build_session_factory()
|
||||
with session_factory() as db:
|
||||
|
||||
@@ -1,18 +1,32 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import UTC, datetime
|
||||
from decimal import Decimal
|
||||
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import Session, sessionmaker
|
||||
from sqlalchemy.pool import StaticPool
|
||||
|
||||
from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus
|
||||
from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentReviewStatus
|
||||
from app.db.base import Base
|
||||
from app.models.agent_asset import AgentAsset
|
||||
from app.schemas.agent_asset import AgentAssetRiskRuleGenerateRequest
|
||||
from app.models.financial_record import ExpenseClaim
|
||||
from app.schemas.agent_asset import (
|
||||
AgentAssetReviewCreate,
|
||||
AgentAssetRiskRuleGenerateRequest,
|
||||
AgentAssetRiskRuleReportRequest,
|
||||
AgentAssetRiskRuleSampleTestRequest,
|
||||
AgentAssetRiskRuleScenarioTestRequest,
|
||||
AgentAssetRiskRuleSimulationRequest,
|
||||
)
|
||||
from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
|
||||
from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY
|
||||
from app.services.risk_rule_flow_diagram import RiskRuleFlowDiagramRenderer, RiskRuleFlowDiagramSpec
|
||||
from app.services.agent_assets import AgentAssetService
|
||||
from app.services.risk_rule_flow_diagram import (
|
||||
RiskRuleFlowDiagramRenderer,
|
||||
RiskRuleFlowDiagramSpec,
|
||||
)
|
||||
from app.services.risk_rule_generation import RiskRuleGenerationService
|
||||
|
||||
|
||||
@@ -43,6 +57,7 @@ def test_generate_risk_rule_asset_creates_draft_json_rule(tmp_path) -> None:
|
||||
asset_id = service.generate_rule_asset(
|
||||
AgentAssetRiskRuleGenerateRequest(
|
||||
business_domain=AgentAssetDomain.EXPENSE,
|
||||
expense_category="travel",
|
||||
risk_level="high",
|
||||
natural_language="住宿城市必须出现在本次差旅行程城市中,否则提示高风险。",
|
||||
),
|
||||
@@ -54,12 +69,18 @@ def test_generate_risk_rule_asset_creates_draft_json_rule(tmp_path) -> None:
|
||||
assert asset.status == AgentAssetStatus.DRAFT.value
|
||||
assert asset.config_json["detail_mode"] == "json_risk"
|
||||
assert asset.config_json["evaluator"] == "template_rule"
|
||||
assert asset.config_json["expense_category"] == "travel"
|
||||
assert asset.config_json["risk_category"] == "差旅费"
|
||||
assert asset.scenario_json == ["差旅费"]
|
||||
assert asset.current_version == "v0.1.0"
|
||||
|
||||
file_name = asset.config_json["rule_document"]["file_name"]
|
||||
rule_path = tmp_path / "rules" / RISK_RULES_LIBRARY / file_name
|
||||
payload = json.loads(rule_path.read_text(encoding="utf-8"))
|
||||
assert payload["rule_code"] == asset.code
|
||||
assert payload["applies_to"]["expense_categories"] == ["travel"]
|
||||
assert payload["risk_category"] == "差旅费"
|
||||
assert payload["metadata"]["expense_category"] == "travel"
|
||||
assert payload["outcomes"]["fail"]["severity"] == "high"
|
||||
assert payload["template_key"] == "field_compare_v1"
|
||||
assert payload["metadata"]["natural_language"].startswith("住宿城市")
|
||||
@@ -104,3 +125,206 @@ def test_risk_rule_flow_diagram_uses_risk_level_palette() -> None:
|
||||
assert "#dc2626" in high_svg
|
||||
assert high_svg.count("#dc2626") == 1
|
||||
assert "#10a37f" not in high_svg
|
||||
|
||||
|
||||
def test_risk_rule_requires_test_report_before_review_and_publish(tmp_path) -> None:
|
||||
with build_session() as db:
|
||||
manager = AgentAssetRuleLibraryManager(rule_root=tmp_path / "rules")
|
||||
generator = RiskRuleGenerationService(
|
||||
db,
|
||||
rule_library_manager=manager,
|
||||
runtime_chat_service=NullRuntimeChatService(),
|
||||
)
|
||||
asset_id = generator.generate_rule_asset(
|
||||
AgentAssetRiskRuleGenerateRequest(
|
||||
business_domain=AgentAssetDomain.EXPENSE,
|
||||
risk_level="high",
|
||||
natural_language="酒店发票城市必须与行程城市一致,不一致时标记高风险。",
|
||||
),
|
||||
actor="pytest",
|
||||
)
|
||||
service = AgentAssetService(db)
|
||||
service.rule_library_manager = manager
|
||||
|
||||
asset = db.get(AgentAsset, asset_id)
|
||||
assert asset is not None
|
||||
try:
|
||||
service.create_review(
|
||||
asset_id,
|
||||
AgentAssetReviewCreate(
|
||||
version=asset.working_version or "v0.1.0",
|
||||
reviewer="manager",
|
||||
review_status=AgentReviewStatus.PENDING,
|
||||
review_note="送审",
|
||||
),
|
||||
actor="pytest",
|
||||
)
|
||||
except PermissionError as exc:
|
||||
assert "测试通过" in str(exc)
|
||||
else:
|
||||
raise AssertionError("未测试通过的风险规则不应允许提交审核")
|
||||
|
||||
simulation = service.simulate_risk_rule_message(
|
||||
asset_id,
|
||||
AgentAssetRiskRuleSimulationRequest(
|
||||
message="我想仿真一张酒店报销单,酒店发票城市上海,申报目的地北京,金额580元。",
|
||||
),
|
||||
)
|
||||
assert simulation.execution_mode == "risk_rule_simulation"
|
||||
assert simulation.ready is True
|
||||
assert simulation.hit is True
|
||||
assert simulation.severity == "high"
|
||||
assert "不创建业务单据" in simulation.summary
|
||||
assert service.get_latest_risk_rule_test_summary(asset_id).sample is None
|
||||
|
||||
blocked_simulation = service.simulate_risk_rule_message(
|
||||
asset_id,
|
||||
AgentAssetRiskRuleSimulationRequest(
|
||||
message="请识别上传单据是否命中风险规则。",
|
||||
attachments=[{"name": "hotel-invoice.pdf", "content_type": "application/pdf"}],
|
||||
),
|
||||
)
|
||||
assert blocked_simulation.ready is False
|
||||
assert blocked_simulation.stage == "needs_recognition"
|
||||
assert blocked_simulation.hit is False
|
||||
assert "尚未完成识别" in blocked_simulation.summary
|
||||
|
||||
db.add(
|
||||
ExpenseClaim(
|
||||
claim_no="TEST-CLAIM-001",
|
||||
employee_name="张三",
|
||||
department_name="财务部",
|
||||
expense_type="住宿费",
|
||||
reason="北京出差住宿",
|
||||
location="北京",
|
||||
amount=Decimal("300.00"),
|
||||
currency="CNY",
|
||||
invoice_count=0,
|
||||
occurred_at=datetime.now(UTC),
|
||||
created_at=datetime.now(UTC),
|
||||
status="draft",
|
||||
)
|
||||
)
|
||||
db.commit()
|
||||
|
||||
sample = service.run_risk_rule_sample_test(
|
||||
asset_id,
|
||||
AgentAssetRiskRuleSampleTestRequest(),
|
||||
actor="pytest",
|
||||
)
|
||||
assert sample.passed is True
|
||||
|
||||
scenario = service.run_risk_rule_scenario_test(
|
||||
asset_id,
|
||||
AgentAssetRiskRuleScenarioTestRequest(intent="用最近30天的住宿报销单试运行"),
|
||||
actor="pytest",
|
||||
)
|
||||
assert scenario.passed is True
|
||||
assert scenario.result_json["total_count"] == 1
|
||||
|
||||
report = service.confirm_risk_rule_test_report(
|
||||
asset_id,
|
||||
AgentAssetRiskRuleReportRequest(confirm_passed=True),
|
||||
actor="pytest",
|
||||
)
|
||||
assert report.passed is True
|
||||
|
||||
review = service.create_review(
|
||||
asset_id,
|
||||
AgentAssetReviewCreate(
|
||||
version=asset.working_version or "v0.1.0",
|
||||
reviewer="manager",
|
||||
review_status=AgentReviewStatus.PENDING,
|
||||
review_note="送审",
|
||||
),
|
||||
actor="pytest",
|
||||
)
|
||||
assert review.review_status == AgentReviewStatus.PENDING.value
|
||||
published = service.publish_risk_rule(asset_id, actor="manager")
|
||||
assert published.status == AgentAssetStatus.ACTIVE.value
|
||||
assert published.published_version == asset.working_version
|
||||
|
||||
disabled = service.set_risk_rule_enabled(
|
||||
asset_id,
|
||||
enabled=False,
|
||||
actor="manager",
|
||||
)
|
||||
assert disabled.config_json["enabled"] is False
|
||||
rule_document = disabled.config_json["rule_document"]
|
||||
manifest = manager.read_rule_library_json(
|
||||
library=RISK_RULES_LIBRARY,
|
||||
file_name=rule_document["file_name"],
|
||||
)
|
||||
assert manifest["enabled"] is False
|
||||
|
||||
attachment_required_id = generator.generate_rule_asset(
|
||||
AgentAssetRiskRuleGenerateRequest(
|
||||
business_domain=AgentAssetDomain.EXPENSE,
|
||||
risk_level="medium",
|
||||
natural_language="发票号码不能为空,缺失时进入中风险复核。",
|
||||
requires_attachment=True,
|
||||
),
|
||||
actor="pytest",
|
||||
)
|
||||
attachment_required_asset = db.get(AgentAsset, attachment_required_id)
|
||||
assert attachment_required_asset is not None
|
||||
assert attachment_required_asset.config_json["requires_attachment"] is True
|
||||
attachment_rule_document = attachment_required_asset.config_json["rule_document"]
|
||||
attachment_manifest = manager.read_rule_library_json(
|
||||
library=RISK_RULES_LIBRARY,
|
||||
file_name=attachment_rule_document["file_name"],
|
||||
)
|
||||
assert attachment_manifest["requires_attachment"] is True
|
||||
no_attachment_simulation = service.simulate_risk_rule_message(
|
||||
attachment_required_id,
|
||||
AgentAssetRiskRuleSimulationRequest(message="请测试这条规则。"),
|
||||
)
|
||||
assert no_attachment_simulation.ready is False
|
||||
assert no_attachment_simulation.stage == "needs_attachment"
|
||||
|
||||
attachment_only_simulation = service.simulate_risk_rule_message(
|
||||
attachment_required_id,
|
||||
AgentAssetRiskRuleSimulationRequest(
|
||||
message="请识别上传单据是否命中风险规则。",
|
||||
attachments=[
|
||||
{
|
||||
"name": "invoice.pdf",
|
||||
"content_type": "application/pdf",
|
||||
"document_fields": [
|
||||
{"key": "invoice_no", "label": "发票号码", "value": "INV-001"}
|
||||
],
|
||||
}
|
||||
],
|
||||
),
|
||||
)
|
||||
assert attachment_only_simulation.ready is False
|
||||
assert attachment_only_simulation.stage == "needs_test_intent"
|
||||
|
||||
|
||||
def test_delete_unpublished_risk_rule_removes_asset_and_json_file(tmp_path) -> None:
|
||||
with build_session() as db:
|
||||
manager = AgentAssetRuleLibraryManager(rule_root=tmp_path / "rules")
|
||||
asset_id = RiskRuleGenerationService(
|
||||
db,
|
||||
rule_library_manager=manager,
|
||||
runtime_chat_service=NullRuntimeChatService(),
|
||||
).generate_rule_asset(
|
||||
AgentAssetRiskRuleGenerateRequest(
|
||||
business_domain=AgentAssetDomain.EXPENSE,
|
||||
risk_level="medium",
|
||||
natural_language="报销事由不能为空,缺失时进入中风险复核。",
|
||||
),
|
||||
actor="pytest",
|
||||
)
|
||||
asset = db.get(AgentAsset, asset_id)
|
||||
assert asset is not None
|
||||
file_name = asset.config_json["rule_document"]["file_name"]
|
||||
rule_path = tmp_path / "rules" / RISK_RULES_LIBRARY / file_name
|
||||
assert rule_path.exists()
|
||||
|
||||
service = AgentAssetService(db)
|
||||
service.rule_library_manager = manager
|
||||
service.delete_unpublished_asset(asset_id, actor="pytest")
|
||||
|
||||
assert db.get(AgentAsset, asset_id) is None
|
||||
assert not rule_path.exists()
|
||||
|
||||
@@ -131,6 +131,8 @@ def test_user_agent_knowledge_prompt_enforces_knowledge_boundary() -> None:
|
||||
assert "不能用常识、外部知识或主观推断补齐缺失条件" in messages[0]["content"]
|
||||
assert "不能只依赖排在最前面的片段" in messages[0]["content"]
|
||||
assert "不能把第一列的数值直接套给后面的列名" in messages[0]["content"]
|
||||
assert "最终答复必须像助手在认真回答问题" in messages[0]["content"]
|
||||
assert "禁止使用“已命中”“答案整理阶段”“稍后重试”" in messages[0]["content"]
|
||||
assert "knowledge_evidence_blocks" in messages[0]["content"]
|
||||
assert '"knowledge_answer_evidence": []' in messages[1]["content"]
|
||||
|
||||
@@ -162,8 +164,9 @@ def test_user_agent_knowledge_fallback_is_honest_and_personalized() -> None:
|
||||
)
|
||||
|
||||
assert answer.startswith("张三,您好。")
|
||||
assert "答案整理阶段本轮没有及时返回" in answer
|
||||
assert "先给你当前最直接的依据" in answer
|
||||
assert "我先根据当前制度依据给出可以确认的部分" in answer
|
||||
assert "已命中" not in answer
|
||||
assert "答案整理阶段本轮没有及时返回" not in answer
|
||||
assert "《差旅费制度》" in answer
|
||||
|
||||
|
||||
@@ -241,6 +244,40 @@ def test_user_agent_prefers_relevant_raw_hit_over_generic_appendix() -> None:
|
||||
assert "组织人事部" in selected[0]["content"]
|
||||
|
||||
|
||||
def test_user_agent_model_hit_selection_keeps_later_relevant_hits() -> None:
|
||||
selected = UserAgentService._select_knowledge_model_hits(
|
||||
{
|
||||
"hits": [
|
||||
{"content": "一般说明一"},
|
||||
{"content": "一般说明二"},
|
||||
{"content": "一般说明三"},
|
||||
{"content": "一般说明四"},
|
||||
{"content": "一般说明五"},
|
||||
{"content": "一般说明六"},
|
||||
{"content": "一般说明七"},
|
||||
{
|
||||
"content": (
|
||||
"# 问答线索补充\n\n"
|
||||
"- 第二章 报销时限:差旅费应在行程结束三个月内提交;逾期不予报销出差补贴。"
|
||||
)
|
||||
},
|
||||
]
|
||||
},
|
||||
question="差旅费报销时限是多少?",
|
||||
)
|
||||
|
||||
assert "三个月内提交" in selected[0]["content"]
|
||||
|
||||
|
||||
def test_user_agent_knowledge_terms_keep_accounting_subject_in_long_query() -> None:
|
||||
terms = UserAgentService._extract_knowledge_query_terms(
|
||||
"远光软件财务基础知识手册里的常用会计科目是什么?"
|
||||
)
|
||||
|
||||
assert "常用会计科目" in terms
|
||||
assert "会计科目" in terms
|
||||
|
||||
|
||||
def test_user_agent_uses_fast_knowledge_answer_without_model(monkeypatch) -> None:
|
||||
session_factory = build_session_factory()
|
||||
with session_factory() as db:
|
||||
@@ -286,12 +323,170 @@ def test_user_agent_uses_fast_knowledge_answer_without_model(monkeypatch) -> Non
|
||||
)
|
||||
|
||||
assert response.answer.startswith("张三,您好。")
|
||||
assert "当前能直接确认的是" in response.answer
|
||||
assert "**结论**" in response.answer
|
||||
assert "30 日内提交报销申请" in response.answer
|
||||
assert "## 依据" not in response.answer
|
||||
assert "答案整理阶段本轮没有及时返回" not in response.answer
|
||||
|
||||
|
||||
def test_user_agent_fast_knowledge_answer_focuses_inline_section_items() -> None:
|
||||
session_factory = build_session_factory()
|
||||
with session_factory() as db:
|
||||
ontology = SemanticOntologyService(db).parse(
|
||||
OntologyParseRequest(
|
||||
query="主要税种介绍",
|
||||
user_id="pytest",
|
||||
context_json={"session_type": "knowledge"},
|
||||
)
|
||||
)
|
||||
service = UserAgentService(db)
|
||||
|
||||
answer = service._build_fast_knowledge_answer(
|
||||
UserAgentRequest(
|
||||
run_id=ontology.run_id,
|
||||
user_id="pytest",
|
||||
message="主要税种介绍",
|
||||
ontology=ontology,
|
||||
context_json={
|
||||
"session_type": "knowledge",
|
||||
"user_input_text": "主要税种介绍",
|
||||
},
|
||||
tool_payload={
|
||||
"result_type": "knowledge_search",
|
||||
"hits": [
|
||||
{
|
||||
"title": "财务基础知识",
|
||||
"content": (
|
||||
"资产类 银行存款 企业存放在银行的款项 负债类 应付账款 "
|
||||
"因购买商品或接受劳务应付的款项 负债类 应交税费 应缴纳的各种税费 "
|
||||
"第二部分 税务基础知识 三、主要税种介绍 "
|
||||
"(一)增值税:公司为一般纳税人,软件服务适用6%税率,软件产品销售适用13%税率。 "
|
||||
"(二)企业所得税:税率为25%,高新技术企业享受15%优惠税率。 "
|
||||
"(三)个人所得税:员工工资薪金由公司代扣代缴。 "
|
||||
"(四)印花税:购销合同、账簿等按规定缴纳。"
|
||||
),
|
||||
}
|
||||
],
|
||||
},
|
||||
),
|
||||
citations=[],
|
||||
)
|
||||
|
||||
assert answer is not None
|
||||
assert "主要税种介绍包括:增值税、企业所得税、个人所得税、印花税" in answer
|
||||
assert "软件服务适用6%税率" in answer
|
||||
assert "软件产品销售适用13%税率" in answer
|
||||
assert "高新技术企业享受15%优惠税率" in answer
|
||||
assert "员工工资薪金由公司代扣代缴" in answer
|
||||
assert "购销合同、账簿等按规定缴纳" in answer
|
||||
assert "应付账款" not in answer
|
||||
assert "银行存款" not in answer
|
||||
|
||||
|
||||
def test_user_agent_fast_knowledge_answer_summarizes_financial_statements() -> None:
|
||||
session_factory = build_session_factory()
|
||||
with session_factory() as db:
|
||||
ontology = SemanticOntologyService(db).parse(
|
||||
OntologyParseRequest(
|
||||
query="三大财务报表 是什么?",
|
||||
user_id="pytest",
|
||||
context_json={"session_type": "knowledge"},
|
||||
)
|
||||
)
|
||||
service = UserAgentService(db)
|
||||
|
||||
answer = service._build_fast_knowledge_answer(
|
||||
UserAgentRequest(
|
||||
run_id=ontology.run_id,
|
||||
user_id="pytest",
|
||||
message="三大财务报表 是什么?",
|
||||
ontology=ontology,
|
||||
context_json={
|
||||
"session_type": "knowledge",
|
||||
"user_input_text": "三大财务报表 是什么?",
|
||||
},
|
||||
tool_payload={
|
||||
"result_type": "knowledge_search",
|
||||
"hits": [
|
||||
{
|
||||
"title": "财务基础知识",
|
||||
"content": (
|
||||
"第三部分 财务报表解读 四、三大财务报表 "
|
||||
"(一)资产负债表:反映企业在某一特定日期的财务状况。 "
|
||||
"(二)利润表:反映企业在一定期间的经营成果。 "
|
||||
"(三)现金流量表:反映企业在一定期间现金和现金等价物的流入和流出。"
|
||||
),
|
||||
}
|
||||
],
|
||||
},
|
||||
),
|
||||
citations=[],
|
||||
)
|
||||
|
||||
assert answer is not None
|
||||
assert "三大财务报表包括:资产负债表、利润表、现金流量表" in answer
|
||||
assert "资产负债表:反映企业在某一特定日期的财务状况" in answer
|
||||
assert "利润表:反映企业在一定期间的经营成果" in answer
|
||||
assert "现金流量表:反映企业在一定期间现金和现金等价物的流入和流出" in answer
|
||||
assert "第三部分 财务报表解读" not in answer
|
||||
|
||||
|
||||
def test_user_agent_fast_knowledge_answer_expands_broad_accounting_table() -> None:
|
||||
session_factory = build_session_factory()
|
||||
with session_factory() as db:
|
||||
ontology = SemanticOntologyService(db).parse(
|
||||
OntologyParseRequest(
|
||||
query="常用会计科目是什么?",
|
||||
user_id="pytest",
|
||||
context_json={"session_type": "knowledge"},
|
||||
)
|
||||
)
|
||||
service = UserAgentService(db)
|
||||
|
||||
answer = service._build_fast_knowledge_answer(
|
||||
UserAgentRequest(
|
||||
run_id=ontology.run_id,
|
||||
user_id="pytest",
|
||||
message="常用会计科目是什么?",
|
||||
ontology=ontology,
|
||||
context_json={
|
||||
"session_type": "knowledge",
|
||||
"user_input_text": "常用会计科目是什么?",
|
||||
},
|
||||
tool_payload={
|
||||
"result_type": "knowledge_search",
|
||||
"hits": [
|
||||
{
|
||||
"title": "财务基础知识",
|
||||
"content": (
|
||||
"二、常用会计科目\n\n"
|
||||
"| 科目类别 | 科目名称 | 说明 |\n"
|
||||
"| --- | --- | --- |\n"
|
||||
"| 资产类 | 库存现金 | 公司持有的现金 |\n"
|
||||
"| 资产类 | 银行存款 | 存放在银行的资金 |\n"
|
||||
"| 资产类 | 应收账款 | 因销售商品或提供劳务应收的款项 |\n"
|
||||
"| 资产类 | 固定资产 | 使用年限超过一年的有形资产 |\n"
|
||||
"| 负债类 | 应付账款 | 因购买商品或接受劳务应付的款项 |\n"
|
||||
"| 负债类 | 应交税费 | 应缴纳的各种税费 |\n"
|
||||
"| 负债类 | 应付职工薪酬 | 应付给职工的工资、福利等 |\n"
|
||||
"| 损益类 | 主营业务收入 | 主要经营业务产生的收入 |\n"
|
||||
"| 损益类 | 管理费用 | 为管理生产经营发生的费用 |\n"
|
||||
"| 损益类 | 销售费用 | 为销售产品发生的费用 |\n"
|
||||
),
|
||||
}
|
||||
],
|
||||
},
|
||||
),
|
||||
citations=[],
|
||||
)
|
||||
|
||||
assert answer is not None
|
||||
assert "| 科目类别 | 科目名称 | 说明 |" in answer
|
||||
assert "| 资产类 | 库存现金 | 公司持有的现金 |" in answer
|
||||
assert "| 负债类 | 应付职工薪酬 | 应付给职工的工资、福利等 |" in answer
|
||||
assert "| 损益类 | 销售费用 | 为销售产品发生的费用 |" in answer
|
||||
|
||||
|
||||
def test_user_agent_fast_knowledge_answer_renders_relevant_table_preview() -> None:
|
||||
session_factory = build_session_factory()
|
||||
with session_factory() as db:
|
||||
@@ -337,9 +532,65 @@ def test_user_agent_fast_knowledge_answer_renders_relevant_table_preview() -> No
|
||||
assert answer is not None
|
||||
assert "| 项目 | 港澳台 | 其他地区 | 国外 |" in answer
|
||||
assert "| 餐补 | 75 | 55 | 140 |" in answer
|
||||
assert "餐补的标准为" in answer
|
||||
assert "## 依据" not in answer
|
||||
|
||||
|
||||
def test_user_agent_fast_knowledge_answer_uses_user_grade_for_table_row() -> None:
|
||||
session_factory = build_session_factory()
|
||||
with session_factory() as db:
|
||||
ontology = SemanticOntologyService(db).parse(
|
||||
OntologyParseRequest(
|
||||
query="我的住宿费标准是多少?",
|
||||
user_id="pytest",
|
||||
context_json={"session_type": "knowledge"},
|
||||
)
|
||||
)
|
||||
service = UserAgentService(db)
|
||||
|
||||
answer = service._build_fast_knowledge_answer(
|
||||
UserAgentRequest(
|
||||
run_id=ontology.run_id,
|
||||
user_id="pytest",
|
||||
message="我的住宿费标准是多少?",
|
||||
ontology=ontology,
|
||||
context_json={
|
||||
"name": "张三",
|
||||
"grade": "P5",
|
||||
"position": "实施经理",
|
||||
"session_type": "knowledge",
|
||||
"user_input_text": "我的住宿费标准是多少?",
|
||||
},
|
||||
tool_payload={
|
||||
"result_type": "knowledge_search",
|
||||
"hits": [
|
||||
{
|
||||
"title": "费用报销制度",
|
||||
"content": (
|
||||
"# 结构化表格补充\n\n"
|
||||
"## 国内住宿限额标准\n\n"
|
||||
"| 职级 | 直辖市/特区/港澳台 | 省会城市 | 其他地区 |\n"
|
||||
"| --- | --- | --- | --- |\n"
|
||||
"| 公司领导(P8及以上) | 800 | 500 | 400 |\n"
|
||||
"| 高层经理(P7) | 700 | 450 | 400 |\n"
|
||||
"| 中层经理、基层经理(P4~P6、外聘专家) | 600 | 400 | 350 |\n"
|
||||
"| 其他员工 | 500 | 350 | 300 |\n"
|
||||
),
|
||||
}
|
||||
],
|
||||
},
|
||||
),
|
||||
citations=[],
|
||||
)
|
||||
|
||||
assert answer is not None
|
||||
assert answer.startswith("张三,您好。")
|
||||
assert "中层经理、基层经理(P4~P6、外聘专家)的标准为" in answer
|
||||
assert "| 中层经理、基层经理(P4~P6、外聘专家) | 600 | 400 | 350 |" in answer
|
||||
assert "| 公司领导(P8及以上) | 800 | 500 | 400 |" not in answer
|
||||
assert "| 高层经理(P7) | 700 | 450 | 400 |" not in answer
|
||||
|
||||
|
||||
def test_user_agent_fast_knowledge_answer_notes_missing_location_grounding() -> None:
|
||||
session_factory = build_session_factory()
|
||||
with session_factory() as db:
|
||||
@@ -384,6 +635,7 @@ def test_user_agent_fast_knowledge_answer_notes_missing_location_grounding() ->
|
||||
|
||||
assert answer is not None
|
||||
assert "没有直接写出“北京”对应的地区档位或映射关系" in answer
|
||||
assert "**说明**" in answer
|
||||
assert "## 依据" not in answer
|
||||
|
||||
|
||||
@@ -429,7 +681,7 @@ def test_user_agent_fast_knowledge_answer_expands_lead_in_list_items() -> None:
|
||||
)
|
||||
|
||||
assert answer is not None
|
||||
assert "当前能直接确认的是" in answer
|
||||
assert "**结论**" in answer
|
||||
assert "登机牌、高速道路通行记录" in answer
|
||||
assert "支付记录" in answer
|
||||
assert "出差审批邮件、短信、微信等" in answer
|
||||
|
||||
Reference in New Issue
Block a user