feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重,优
化本体检测和规则匹配精度,前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式,新增日志详情组件和
知识入库日志模型,补充单元测试覆盖。
This commit is contained in:
caoxiaozhu
2026-05-22 23:47:28 +08:00
parent 88ff04bef8
commit 5b388d08c0
84 changed files with 10170 additions and 2599 deletions

View File

@@ -120,7 +120,7 @@ class UserAgentReviewDocumentCard(BaseModel):
filename: str = Field(description="原始文件名。")
document_type: str = Field(default="other", description="票据候选类型。")
suggested_expense_type: str = Field(default="other", description="建议归属费用类型。")
scene_label: str = Field(default="", description="面向用户展示的场景标签。")
scene_label: str = Field(default="", description="面向用户展示的票据类型标签。")
summary: str = Field(default="", description="逐票据摘要。")
avg_score: float = Field(default=0.0, ge=0.0, le=1.0, description="OCR 平均得分。")
preview_kind: str = Field(default="", description="票据预览类型,例如 image。")

View File

@@ -3,6 +3,8 @@ from __future__ import annotations
import re
from decimal import Decimal
from app.services.expense_type_keywords import iter_expense_keywords
EXPENSE_TYPE_LABELS = {
"travel": "差旅",
"train_ticket": "火车票",
@@ -12,10 +14,10 @@ EXPENSE_TYPE_LABELS = {
"travel_allowance": "出差补贴",
"hotel": "住宿",
"transport": "交通",
"meal": "餐费",
"meal": "业务招待",
"meeting": "会务",
"entertainment": "招待",
"office": "办公",
"office": "办公用品",
"training": "培训",
"communication": "通讯",
"welfare": "福利",
@@ -131,40 +133,19 @@ DOCUMENT_ROUTE_DESTINATION_LABELS = {
GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES = {"", "other", "travel", "transport", "hotel"}
LOCATION_REQUIRED_EXPENSE_TYPES = {"travel", "meeting", "entertainment"}
EXPENSE_SCENE_KEYWORDS = {
"travel": ("差旅", "出差", "行程"),
"hotel": ("酒店", "住宿", "房费", "客房", "入住", "离店"),
"transport": (
"交通",
"打车",
"出租车",
"网约车",
"滴滴",
"出行",
"乘车",
"用车",
"叫车",
"车费",
"车资",
"的士",
"高铁",
"动车",
"火车",
"机票",
"航班",
"行程单",
"登机",
"客票",
"公交",
"地铁",
"过路费",
"通行费",
"停车",
),
"meal": ("餐饮", "餐费", "用餐", "外卖", "快餐", "酒楼", "饭店", "饭馆", "食品", "咖啡"),
"entertainment": ("招待", "宴请", "接待", "客户餐", "商务餐", "业务招待"),
"office": ("办公", "办公用品", "文具", "耗材", "打印", "纸张", "硒鼓", "墨盒", "鼠标", "键盘", "电脑"),
"meeting": ("会议", "会务", "会展", "会议室", "会场", "场地费", "论坛"),
"training": ("培训", "课程", "讲师", "教材", "学费", "认证"),
code: tuple(iter_expense_keywords(code))
for code in (
"travel",
"hotel",
"transport",
"meal",
"entertainment",
"office",
"meeting",
"training",
"communication",
"welfare",
)
}
EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES = {
"travel": {"travel", "hotel", "transport", "meal"},
@@ -185,7 +166,7 @@ DOCUMENT_SCENE_LABELS = {
"travel": "差旅",
"hotel": "住宿",
"transport": "交通",
"meal": "餐饮",
"meal": "业务招待",
"entertainment": "业务招待",
"office": "办公用品",
"meeting": "会务",

View File

@@ -87,6 +87,7 @@ from app.services.expense_claim_constants import (
TRAVEL_POLICY_TRAIN_CLASS_PATTERNS,
TRAVEL_POLICY_HOTEL_NIGHT_PATTERN,
)
from app.services.expense_type_keywords import resolve_expense_type_code_from_text
from app.services.expense_claim_risk_review import ExpenseClaimRiskReviewMixin
from app.services.expense_amounts import (
extract_amount_candidates,
@@ -209,26 +210,7 @@ class ExpenseClaimOntologyResolverMixin:
or ""
).replace(" ", "")
if compact:
if "招待" in compact or ("客户" in compact and any(word in compact for word in ("吃饭", "宴请", "请客", "用餐"))):
return "entertainment"
if any(word in compact for word in ("差旅", "出差", "机票", "行程")):
return "travel"
if any(word in compact for word in ("住宿", "酒店", "宾馆")):
return "hotel"
if any(word in compact for word in ("交通", "打车", "网约车", "出租车", "乘车", "用车", "叫车", "车费", "车资", "的士", "停车")):
return "transport"
if any(word in compact for word in ("餐费", "用餐", "午餐", "晚餐", "早餐", "伙食")):
return "meal"
if "会务" in compact:
return "meeting"
if any(word in compact for word in ("办公费", "办公用品", "文具", "耗材", "办公耗材", "打印纸", "办公设备", "键盘", "鼠标", "白板")):
return "office"
if any(word in compact for word in ("培训费", "培训", "讲师费", "课时费", "课程费")):
return "training"
if any(word in compact for word in ("通讯费", "话费", "流量费", "宽带费")):
return "communication"
if any(word in compact for word in ("福利费", "团建", "慰问", "节日福利", "体检费")):
return "welfare"
return resolve_expense_type_code_from_text(compact)
return None
@staticmethod

View File

@@ -538,8 +538,8 @@ class ExpenseRuleRuntimeService:
if any(keyword in normalized for keyword in ("市内交通", "打车", "网约车", "出租车")):
return "transport"
if "招待" in normalized and "" in normalized:
return "entertainment"
if "餐补" in normalized or normalized == "餐费":
return "meal"
if "餐补" in normalized or normalized in {"餐费", "业务招待费"}:
return "meal"
return ""
@@ -547,7 +547,7 @@ class ExpenseRuleRuntimeService:
def _spreadsheet_metric_label(expense_type: str) -> str:
return {
"transport": "单笔交通金额",
"meal": "差旅餐补金额",
"meal": "业务招待费金额",
"entertainment": "人均招待餐费",
}.get(expense_type, "金额")

View File

@@ -8,6 +8,7 @@ EXPENSE_RULE_CODE_BLOCK_PATTERN = re.compile(r"```expense-rule\s*(\{.*?\})\s*```
DOCUMENT_TYPE_LABELS = {
"flight_itinerary": "机票/航班行程单",
"train_ticket": "火车/高铁票",
"ship_ticket": "轮船票",
"hotel_invoice": "酒店住宿票据",
"taxi_receipt": "出租车/网约车票据",
"parking_toll_receipt": "停车/通行费票据",
@@ -24,9 +25,9 @@ SCENE_LABELS = {
"travel": "差旅",
"hotel": "住宿",
"transport": "交通",
"meal": "餐饮",
"meal": "业务招待",
"entertainment": "业务招待",
"office": "办公",
"office": "办公用品",
"meeting": "会务",
"training": "培训",
"communication": "通讯",
@@ -73,7 +74,7 @@ DEFAULT_SCENE_MATRIX_CONFIG: dict[str, Any] = {
},
},
"meal": {
"label": "",
"label": "业务招待",
"location_required": False,
"min_attachment_count": 1,
"allowed_scene_codes": ["meal"],
@@ -84,7 +85,7 @@ DEFAULT_SCENE_MATRIX_CONFIG: dict[str, Any] = {
"warn_amount": "300.00",
"block_amount": "800.00",
"exception_keywords": ["客户接待", "团队活动", "加班", "展会", "超标说明"],
"metric_label": "费合计",
"metric_label": "业务招待费合计",
},
},
"entertainment": {
@@ -103,7 +104,7 @@ DEFAULT_SCENE_MATRIX_CONFIG: dict[str, Any] = {
},
},
"office": {
"label": "办公费",
"label": "办公用品",
"location_required": False,
"min_attachment_count": 1,
"allowed_scene_codes": ["office"],
@@ -114,7 +115,7 @@ DEFAULT_SCENE_MATRIX_CONFIG: dict[str, Any] = {
"warn_amount": "1500.00",
"block_amount": "5000.00",
"exception_keywords": ["批量采购", "固定资产", "部门集中采购", "超标说明"],
"metric_label": "办公费合计",
"metric_label": "办公用品费合计",
},
},
"meeting": {

View File

@@ -0,0 +1,245 @@
from __future__ import annotations
from typing import Iterable
EXPENSE_TYPE_KEYWORD_GROUPS: tuple[tuple[str, str, tuple[str, ...]], ...] = (
(
"travel",
"差旅费",
(
"差旅费",
"差旅",
"出差",
"外地出差",
"跨城交通",
"往返车票",
"机票",
"飞机票",
"航班",
"登机牌",
"行程单",
"火车票",
"高铁票",
"动车票",
"铁路客票",
"客票",
),
),
(
"hotel",
"住宿费",
(
"住宿费",
"住宿",
"酒店发票",
"酒店",
"宾馆",
"民宿",
"房费",
"客房",
"住店",
"入住",
"离店",
"住宿清单",
),
),
(
"transport",
"交通费",
(
"交通费",
"交通",
"市内交通",
"打车",
"网约车",
"出租车票",
"出租车",
"的士票",
"的士",
"滴滴",
"曹操出行",
"T3出行",
"出行",
"乘车费",
"乘车",
"用车",
"叫车",
"车费",
"车资",
"公交",
"地铁",
"停车费",
"停车",
"过路费",
"通行费",
"高速费",
"燃油费",
"油费",
),
),
(
"meal",
"业务招待费",
(
"业务招待费",
"业务招待",
"招待费",
"招待",
"客户招待",
"客户接待",
"商务接待",
"商务宴请",
"宴请",
"请客",
"请客户",
"客户用餐",
"客户餐",
"客户吃饭",
"陪同用餐",
"接待餐",
"餐费",
"伙食费",
"伙食",
"工作餐",
"餐饮",
"用餐",
"早餐",
"午餐",
"晚餐",
"夜宵",
"盒饭",
"茶歇",
"餐票",
"饭票",
),
),
(
"meeting",
"会务费",
(
"会务费",
"会务",
"会议费",
"会议",
"参会",
"会场",
"场地费",
"论坛",
"展会",
"研讨会",
"峰会",
"布展",
),
),
(
"office",
"办公用品费",
(
"办公用品费",
"办公费",
"办公用品",
"办公耗材",
"办公设备",
"办公",
"文具",
"耗材",
"打印纸",
"打印",
"纸张",
"硒鼓",
"墨盒",
"键盘",
"鼠标",
"白板",
"电脑配件",
),
),
(
"training",
"培训费",
(
"培训费",
"培训",
"讲师费",
"讲师",
"课时费",
"课程费",
"课程",
"教材",
"学费",
"考试费",
"认证费",
"认证",
),
),
(
"communication",
"通讯费",
(
"通讯费",
"通讯",
"话费",
"电话费",
"手机费",
"流量费",
"流量",
"宽带费",
"宽带",
"网络费",
),
),
(
"welfare",
"福利费",
(
"福利费",
"福利",
"团建",
"慰问",
"节日福利",
"体检费",
"体检",
"员工关怀",
),
),
)
EXPENSE_TYPE_LABEL_BY_CODE = {
code: label for code, label, _keywords in EXPENSE_TYPE_KEYWORD_GROUPS
}
EXPENSE_TYPE_LABEL_BY_CODE.setdefault("entertainment", "业务招待费")
def build_expense_type_keyword_map() -> dict[str, str]:
mapping: dict[str, str] = {}
for code, _label, keywords in EXPENSE_TYPE_KEYWORD_GROUPS:
for keyword in keywords:
mapping.setdefault(keyword, code)
return mapping
def iter_expense_keywords(*codes: str) -> Iterable[str]:
allowed_codes = {str(code or "").strip() for code in codes if str(code or "").strip()}
for code, _label, keywords in EXPENSE_TYPE_KEYWORD_GROUPS:
if allowed_codes and code not in allowed_codes:
continue
yield from keywords
def resolve_expense_type_code_from_text(value: str) -> str | None:
compact = str(value or "").replace(" ", "")
if not compact:
return None
for code, _label, keywords in EXPENSE_TYPE_KEYWORD_GROUPS:
if any(keyword in compact for keyword in keywords):
return code
return None
def resolve_expense_type_label_from_text(value: str) -> tuple[str, str] | None:
code = resolve_expense_type_code_from_text(value)
if not code:
return None
return code, EXPENSE_TYPE_LABEL_BY_CODE.get(code, str(value or "").strip())

View File

@@ -63,6 +63,7 @@ class KnowledgeIndexTaskManager:
heartbeat_stop = threading.Event()
heartbeat_thread: threading.Thread | None = None
tool_call_id = ""
knowledge_ingest: dict[str, Any] | None = None
tool_request_json = {
"agent": AgentName.HERMES.value,
"folder": folder,
@@ -74,6 +75,10 @@ class KnowledgeIndexTaskManager:
run_service = AgentRunService(db)
knowledge_service = KnowledgeService(db=db)
rag_service = KnowledgeRagService(db=db)
knowledge_ingest = _build_initial_knowledge_ingest_state(
knowledge_service,
document_ids=document_ids,
)
run_service.merge_route_json(
agent_run_id,
@@ -93,7 +98,18 @@ class KnowledgeIndexTaskManager:
"skipped_documents": 0,
"percent": 10 if document_ids else 100,
},
"knowledge_ingest": knowledge_ingest,
},
result_summary=_build_ingest_running_summary(
knowledge_ingest,
{
"total_documents": len(document_ids),
"completed_documents": 0,
"failed_documents": 0,
"skipped_documents": 0,
"percent": 10 if document_ids else 100,
},
),
)
tool_call = run_service.record_tool_call(
run_id=agent_run_id,
@@ -134,44 +150,159 @@ class KnowledgeIndexTaskManager:
)
heartbeat_thread.start()
response = rag_service.index_documents(document_ids=document_ids, force=force)
succeeded_document_ids = [
str(item).strip()
for item in list(response.get("succeeded_document_ids") or [])
if str(item).strip()
]
failed_documents = [
item
for item in list(response.get("failed_documents") or [])
if isinstance(item, dict)
]
responses: list[dict[str, Any]] = []
succeeded_document_ids: list[str] = []
failed_documents: list[dict[str, str]] = []
total_documents = len(document_ids)
for index, document_id in enumerate(document_ids, start=1):
_patch_ingest_document(
knowledge_ingest,
document_id,
{
"status": "running",
"phase": "indexing",
"started_at": datetime.now(UTC).isoformat(),
},
event=f"开始处理第 {index}/{total_documents} 个文件,正在写入 LightRAG。",
)
knowledge_ingest["current_document_id"] = document_id
_sync_ingest_route_json(
run_service,
agent_run_id,
knowledge_ingest,
progress=_build_ingest_progress(knowledge_ingest, total_documents),
)
try:
response = rag_service.index_documents(document_ids=[document_id], force=force)
except Exception as exc:
logger.exception(
"Knowledge document index failed run_id=%s doc_id=%s",
agent_run_id,
document_id,
)
failed_documents.append(
{
"document_id": document_id,
"status": "exception",
"error": str(exc),
}
)
_patch_ingest_document(
knowledge_ingest,
document_id,
{
"status": "failed",
"phase": "failed",
"finished_at": datetime.now(UTC).isoformat(),
"error": str(exc),
},
event=f"归集失败:{exc}",
level="error",
)
knowledge_service.set_document_ingest_statuses(
[document_id],
KNOWLEDGE_INGEST_STATUS_FAILED,
agent_run_id=agent_run_id,
)
_refresh_ingest_graph(knowledge_ingest)
_sync_ingest_route_json(
run_service,
agent_run_id,
knowledge_ingest,
progress=_build_ingest_progress(knowledge_ingest, total_documents),
)
continue
responses.append(response)
response_failed_documents = _extract_failed_documents(response, document_id)
document_summary = _extract_document_summary(response, document_id)
if response_failed_documents:
failed_documents.extend(response_failed_documents)
error_text = (
response_failed_documents[0].get("error") or "LightRAG 未返回可查询状态"
)
_patch_ingest_document(
knowledge_ingest,
document_id,
{
**document_summary,
"status": "failed",
"phase": "failed",
"finished_at": datetime.now(UTC).isoformat(),
"error": error_text,
"track_id": str(response.get("track_id") or "").strip(),
},
event=f"LightRAG 索引失败:{error_text}",
level="error",
)
knowledge_service.set_document_ingest_statuses(
[document_id],
KNOWLEDGE_INGEST_STATUS_FAILED,
agent_run_id=agent_run_id,
)
else:
succeeded_document_ids.append(document_id)
chunk_count = int(document_summary.get("chunk_count") or 0)
entity_count = int(document_summary.get("entity_count") or 0)
relation_count = int(document_summary.get("relation_count") or 0)
_patch_ingest_document(
knowledge_ingest,
document_id,
{
**document_summary,
"status": "succeeded",
"phase": "indexed",
"finished_at": datetime.now(UTC).isoformat(),
"track_id": str(response.get("track_id") or "").strip(),
},
event=(
"LightRAG 索引完成:"
f"{chunk_count} 个 chunk{entity_count} 个实体,"
f"{relation_count} 条关系。"
),
)
knowledge_service.set_document_ingest_statuses(
[document_id],
KNOWLEDGE_INGEST_STATUS_INGESTED,
agent_run_id=agent_run_id,
)
_refresh_ingest_graph(knowledge_ingest)
_sync_ingest_route_json(
run_service,
agent_run_id,
knowledge_ingest,
progress=_build_ingest_progress(knowledge_ingest, total_documents),
)
failed_document_ids = [
str(item.get("document_id") or "").strip()
for item in failed_documents
if str(item.get("document_id") or "").strip()
]
if succeeded_document_ids:
knowledge_service.set_document_ingest_statuses(
succeeded_document_ids,
KNOWLEDGE_INGEST_STATUS_INGESTED,
agent_run_id=agent_run_id,
)
if failed_document_ids:
knowledge_service.set_document_ingest_statuses(
failed_document_ids,
KNOWLEDGE_INGEST_STATUS_FAILED,
agent_run_id=agent_run_id,
)
duration_ms = int((perf_counter() - started) * 1000)
tool_status = "succeeded" if not failed_document_ids else "failed"
latest_track_id = _resolve_latest_track_id(responses)
knowledge_ingest["current_document_id"] = ""
knowledge_ingest["status"] = tool_status
knowledge_ingest["phase"] = "completed"
knowledge_ingest["finished_at"] = datetime.now(UTC).isoformat()
knowledge_ingest["graph"] = _build_ingest_graph(knowledge_ingest)
heartbeat_stop.set()
if heartbeat_thread is not None:
heartbeat_thread.join(timeout=1)
run_service.update_tool_call(
tool_call_id,
response_json=response,
response_json={
"track_id": latest_track_id,
"requested_document_ids": document_ids,
"succeeded_document_ids": succeeded_document_ids,
"failed_documents": failed_documents,
"documents": knowledge_ingest.get("documents", []),
"responses": responses,
},
status=tool_status,
duration_ms=duration_ms,
error_message=None if tool_status == "succeeded" else "部分文档索引失败。",
@@ -183,14 +314,17 @@ class KnowledgeIndexTaskManager:
summary = (
f"LightRAG 已完成 {completed_documents}/{total_documents} 个知识文档索引。"
if failed_count == 0
else f"LightRAG 已完成 {completed_documents}/{total_documents} 个知识文档索引,失败 {failed_count} 个。"
else (
f"LightRAG 已完成 {completed_documents}/{total_documents} 个知识文档索引,"
f"失败 {failed_count} 个。"
)
)
run_service.merge_route_json(
agent_run_id,
{
"job_type": "knowledge_index_sync",
"phase": "completed",
"track_id": str(response.get("track_id") or "").strip(),
"track_id": latest_track_id,
"heartbeat_at": datetime.now(UTC).isoformat(),
"progress": {
"total_documents": total_documents,
@@ -199,6 +333,7 @@ class KnowledgeIndexTaskManager:
"skipped_documents": 0,
"percent": 100,
},
"knowledge_ingest": knowledge_ingest,
},
status=(
AgentRunStatus.SUCCEEDED.value
@@ -234,24 +369,50 @@ class KnowledgeIndexTaskManager:
error_message=str(exc),
)
KnowledgeService(db=db).set_document_ingest_statuses(
document_ids,
_resolve_failed_ingest_document_ids(knowledge_ingest, document_ids),
KNOWLEDGE_INGEST_STATUS_FAILED,
agent_run_id=agent_run_id,
)
if knowledge_ingest is not None:
for document_id in document_ids:
document = _find_ingest_document(knowledge_ingest, document_id)
if document is None or document.get("status") in {"succeeded", "failed"}:
continue
_patch_ingest_document(
knowledge_ingest,
document_id,
{
"status": "failed",
"phase": "failed",
"finished_at": datetime.now(UTC).isoformat(),
"error": str(exc),
},
event=f"归集任务中断:{exc}",
level="error",
)
knowledge_ingest["status"] = "failed"
knowledge_ingest["phase"] = "failed"
knowledge_ingest["current_document_id"] = ""
knowledge_ingest["finished_at"] = datetime.now(UTC).isoformat()
knowledge_ingest["graph"] = _build_ingest_graph(knowledge_ingest)
route_payload: dict[str, Any] = {
"job_type": "knowledge_index_sync",
"phase": "failed",
"heartbeat_at": datetime.now(UTC).isoformat(),
"progress": {
"total_documents": len(document_ids),
"completed_documents": 0,
"failed_documents": len(document_ids),
"skipped_documents": 0,
"percent": 100,
},
}
if knowledge_ingest is not None:
route_payload["knowledge_ingest"] = knowledge_ingest
AgentRunService(db).merge_route_json(
agent_run_id,
{
"job_type": "knowledge_index_sync",
"phase": "failed",
"heartbeat_at": datetime.now(UTC).isoformat(),
"progress": {
"total_documents": len(document_ids),
"completed_documents": 0,
"failed_documents": len(document_ids),
"skipped_documents": 0,
"percent": 100,
},
},
route_payload,
status=AgentRunStatus.FAILED.value,
result_summary=str(exc),
error_message=str(exc),
@@ -267,4 +428,312 @@ class KnowledgeIndexTaskManager:
db.close()
def _build_initial_knowledge_ingest_state(
knowledge_service: KnowledgeService,
*,
document_ids: list[str],
) -> dict[str, Any]:
now = datetime.now(UTC).isoformat()
documents = [
_build_initial_knowledge_ingest_document(knowledge_service, document_id, now=now)
for document_id in document_ids
]
return {
"schema_version": 1,
"status": "running",
"phase": "queued",
"started_at": now,
"finished_at": None,
"current_document_id": documents[0]["document_id"] if documents else "",
"documents": documents,
"graph": _build_ingest_graph({"documents": documents}),
}
def _build_initial_knowledge_ingest_document(
knowledge_service: KnowledgeService,
document_id: str,
*,
now: str,
) -> dict[str, Any]:
try:
entry = knowledge_service.get_document_entry(document_id)
except Exception:
entry = {}
return {
"document_id": document_id,
"name": str(entry.get("original_name") or document_id).strip(),
"folder": str(entry.get("folder") or "").strip(),
"extension": str(entry.get("extension") or "").strip(),
"mime_type": str(entry.get("mime_type") or "").strip(),
"status": "queued",
"phase": "queued",
"started_at": None,
"finished_at": None,
"text_chars": 0,
"indexed_text_chars": 0,
"section_count": 0,
"sections": [],
"chunk_count": 0,
"chunk_ids": [],
"chunks": [],
"entity_count": 0,
"relation_count": 0,
"entities": [],
"relations": [],
"events": [
{
"at": now,
"level": "info",
"message": "已进入知识归集队列,等待 LightRAG 处理。",
}
],
}
def _patch_ingest_document(
knowledge_ingest: dict[str, Any],
document_id: str,
updates: dict[str, Any],
*,
event: str = "",
level: str = "info",
) -> None:
document = _find_ingest_document(knowledge_ingest, document_id)
if document is None:
return
document.update(updates)
if event:
_append_ingest_event(document, event, level=level)
def _append_ingest_event(document: dict[str, Any], message: str, *, level: str) -> None:
events = document.get("events")
if not isinstance(events, list):
events = []
events.append(
{
"at": datetime.now(UTC).isoformat(),
"level": level,
"message": message,
}
)
document["events"] = events[-30:]
def _find_ingest_document(
knowledge_ingest: dict[str, Any],
document_id: str,
) -> dict[str, Any] | None:
for document in list(knowledge_ingest.get("documents") or []):
if not isinstance(document, dict):
continue
if str(document.get("document_id") or "").strip() == document_id:
return document
return None
def _sync_ingest_route_json(
run_service: AgentRunService,
agent_run_id: str,
knowledge_ingest: dict[str, Any],
*,
progress: dict[str, int],
) -> None:
run_service.merge_route_json(
agent_run_id,
{
"job_type": "knowledge_index_sync",
"phase": "indexing",
"heartbeat_at": datetime.now(UTC).isoformat(),
"progress": progress,
"knowledge_ingest": knowledge_ingest,
},
result_summary=_build_ingest_running_summary(knowledge_ingest, progress),
)
def _build_ingest_running_summary(
knowledge_ingest: dict[str, Any],
progress: dict[str, int],
) -> str:
total_documents = int(progress.get("total_documents") or 0)
completed_documents = int(progress.get("completed_documents") or 0)
failed_documents = int(progress.get("failed_documents") or 0)
current_document_id = str(knowledge_ingest.get("current_document_id") or "").strip()
current_document = (
_find_ingest_document(knowledge_ingest, current_document_id)
if current_document_id
else None
)
if current_document is not None:
name = str(current_document.get("name") or current_document_id).strip()
current_index = _resolve_ingest_document_index(knowledge_ingest, current_document_id)
return (
f"知识归纳正在处理 {current_index}/{total_documents}{name}"
f"已完成 {completed_documents} 个,失败 {failed_documents} 个。"
)
return (
f"知识归纳正在运行,已完成 {completed_documents}/{total_documents} 个文档,"
f"失败 {failed_documents} 个。"
)
def _resolve_ingest_document_index(
knowledge_ingest: dict[str, Any],
document_id: str,
) -> int:
documents = [
item for item in list(knowledge_ingest.get("documents") or []) if isinstance(item, dict)
]
for index, document in enumerate(documents, start=1):
if str(document.get("document_id") or "").strip() == document_id:
return index
return 0
def _build_ingest_progress(
knowledge_ingest: dict[str, Any],
total_documents: int,
) -> dict[str, int]:
documents = [
item for item in list(knowledge_ingest.get("documents") or []) if isinstance(item, dict)
]
completed_documents = sum(1 for item in documents if item.get("status") == "succeeded")
failed_documents = sum(1 for item in documents if item.get("status") == "failed")
skipped_documents = sum(1 for item in documents if item.get("status") == "skipped")
done_documents = completed_documents + failed_documents + skipped_documents
if total_documents <= 0:
percent = 100
else:
percent = min(95, max(10, 10 + int(done_documents * 85 / total_documents)))
return {
"total_documents": total_documents,
"completed_documents": completed_documents,
"failed_documents": failed_documents,
"skipped_documents": skipped_documents,
"percent": percent,
}
def _extract_document_summary(response: dict[str, Any], document_id: str) -> dict[str, Any]:
for item in list(response.get("document_summaries") or []):
if not isinstance(item, dict):
continue
if str(item.get("document_id") or "").strip() == document_id:
return dict(item)
return {}
def _extract_failed_documents(
response: dict[str, Any],
document_id: str,
) -> list[dict[str, str]]:
failed_documents: list[dict[str, str]] = []
for item in list(response.get("failed_documents") or []):
if not isinstance(item, dict):
continue
item_document_id = str(item.get("document_id") or "").strip()
if item_document_id and item_document_id != document_id:
continue
failed_documents.append(
{
"document_id": item_document_id or document_id,
"status": str(item.get("status") or "failed").strip(),
"error": str(item.get("error") or "LightRAG 索引失败").strip(),
}
)
return failed_documents
def _resolve_failed_ingest_document_ids(
knowledge_ingest: dict[str, Any] | None,
document_ids: list[str],
) -> list[str]:
if knowledge_ingest is None:
return document_ids
failed_document_ids: list[str] = []
seen_document_ids: set[str] = set()
for document in list(knowledge_ingest.get("documents") or []):
if not isinstance(document, dict):
continue
document_id = str(document.get("document_id") or "").strip()
if not document_id:
continue
seen_document_ids.add(document_id)
if document.get("status") != "succeeded":
failed_document_ids.append(document_id)
failed_document_ids.extend(
document_id for document_id in document_ids if document_id not in seen_document_ids
)
return failed_document_ids
def _refresh_ingest_graph(knowledge_ingest: dict[str, Any]) -> None:
knowledge_ingest["graph"] = _build_ingest_graph(knowledge_ingest)
def _build_ingest_graph(knowledge_ingest: dict[str, Any]) -> dict[str, Any]:
documents = [
item for item in list(knowledge_ingest.get("documents") or []) if isinstance(item, dict)
]
entities = _dedupe_text_items(
entity for document in documents for entity in list(document.get("entities") or [])
)
relations = _dedupe_relations(
relation for document in documents for relation in list(document.get("relations") or [])
)
return {
"chunk_count": sum(_to_int(document.get("chunk_count")) for document in documents),
"entity_count": sum(_to_int(document.get("entity_count")) for document in documents),
"relation_count": sum(_to_int(document.get("relation_count")) for document in documents),
"entities": entities[:60],
"relations": relations[:60],
}
def _dedupe_text_items(items: Any) -> list[str]:
deduped: list[str] = []
seen: set[str] = set()
for item in items:
text = str(item or "").strip()
if not text or text in seen:
continue
seen.add(text)
deduped.append(text)
return deduped
def _dedupe_relations(items: Any) -> list[dict[str, str]]:
deduped: list[dict[str, str]] = []
seen: set[tuple[str, str, str]] = set()
for item in items:
if not isinstance(item, dict):
continue
source = str(item.get("source") or "").strip()
target = str(item.get("target") or "").strip()
relation_type = str(item.get("type") or "关联").strip()
key = (source, target, relation_type)
if not source or not target or key in seen:
continue
seen.add(key)
deduped.append({"source": source, "target": target, "type": relation_type})
return deduped
def _resolve_latest_track_id(responses: list[dict[str, Any]]) -> str:
for response in reversed(responses):
track_id = str(response.get("track_id") or "").strip()
if track_id:
return track_id
return ""
def _to_int(value: Any) -> int:
try:
return int(value or 0)
except (TypeError, ValueError):
return 0
knowledge_index_task_manager = KnowledgeIndexTaskManager()

View File

@@ -0,0 +1,224 @@
from __future__ import annotations
import json
import re
from pathlib import Path
from typing import Any
MAX_INGEST_LOG_CHUNKS = 24
MAX_INGEST_LOG_ENTITIES = 24
MAX_INGEST_LOG_RELATIONS = 24
MAX_INGEST_LOG_SECTIONS = 12
MAX_INGEST_LOG_TEXT_PREVIEW = 180
INGEST_SECTION_HEADING_PATTERN = re.compile(
r"^(?:#{1,4}\s+.+|第[一二三四五六七八九十百零0-9]+[章节条]\s*.*)$"
)
def build_ingest_document_summary(
*,
document_id: str,
entry: dict[str, Any],
raw_text: str,
indexed_text: str,
) -> dict[str, Any]:
raw_text_value = str(raw_text or "")
indexed_text_value = str(indexed_text or "")
sections = _extract_ingest_sections(indexed_text_value)
return {
"document_id": document_id,
"name": str(entry.get("original_name") or "").strip(),
"folder": str(entry.get("folder") or "").strip(),
"extension": str(entry.get("extension") or "").strip(),
"mime_type": str(entry.get("mime_type") or "").strip(),
"text_chars": len(raw_text_value),
"indexed_text_chars": len(indexed_text_value),
"section_count": len(sections),
"sections": sections,
"chunk_count": 0,
"chunk_ids": [],
"chunks": [],
"entity_count": 0,
"relation_count": 0,
"entities": [],
"relations": [],
}
def build_ingest_status_summary(
*,
status_payload: dict[str, Any],
graph_summary: dict[str, Any],
) -> dict[str, Any]:
chunk_ids = _normalize_chunk_ids(status_payload)
chunk_count = _resolve_chunk_count(status_payload, chunk_ids)
return {
"lightrag_status": str(status_payload.get("status") or "").strip(),
"query_ready": bool(status_payload.get("query_ready")),
"chunk_count": chunk_count,
"chunk_ids": chunk_ids[:MAX_INGEST_LOG_CHUNKS],
**graph_summary,
}
def build_document_graph_summary(
storage_root: Path,
*,
workspace: str,
document_id: str,
) -> dict[str, Any]:
workspace_dir = (
Path(storage_root) / "knowledge" / ".lightrag" / str(workspace).strip()
).resolve()
entities_payload = _load_json_file(workspace_dir / "kv_store_full_entities.json")
relations_payload = _load_json_file(workspace_dir / "kv_store_full_relations.json")
chunks_payload = _load_json_file(workspace_dir / "kv_store_text_chunks.json")
entities = _normalize_document_entities(entities_payload, document_id)
relations = _normalize_document_relations(relations_payload, document_id)
chunks = _normalize_document_chunks(chunks_payload, document_id)
return {
"entity_count": len(entities),
"relation_count": len(relations),
"entities": entities[:MAX_INGEST_LOG_ENTITIES],
"relations": relations[:MAX_INGEST_LOG_RELATIONS],
"chunks": chunks[:MAX_INGEST_LOG_CHUNKS],
}
def _extract_ingest_sections(text: str) -> list[dict[str, str]]:
sections: list[dict[str, str]] = []
lines = [line.strip() for line in str(text or "").splitlines()]
for index, line in enumerate(lines):
if len(sections) >= MAX_INGEST_LOG_SECTIONS:
break
if not line or len(line) > 90 or not INGEST_SECTION_HEADING_PATTERN.match(line):
continue
sections.append(
{
"title": line.lstrip("#").strip(),
"excerpt": _find_following_excerpt(lines[index + 1 :]),
}
)
return sections
def _find_following_excerpt(lines: list[str]) -> str:
collected: list[str] = []
for line in lines:
if not line:
continue
if INGEST_SECTION_HEADING_PATTERN.match(line):
break
collected.append(line)
if len(" ".join(collected)) >= MAX_INGEST_LOG_TEXT_PREVIEW:
break
return _truncate_text(" ".join(collected), max_length=MAX_INGEST_LOG_TEXT_PREVIEW)
def _normalize_chunk_ids(status_payload: dict[str, Any]) -> list[str]:
chunks_list = status_payload.get("chunks_list")
if not isinstance(chunks_list, list):
return []
return [str(item).strip() for item in chunks_list if str(item or "").strip()]
def _resolve_chunk_count(status_payload: dict[str, Any], chunk_ids: list[str]) -> int:
try:
return int(status_payload.get("chunks_count") or len(chunk_ids))
except (TypeError, ValueError):
return len(chunk_ids)
def _load_json_file(path: Path) -> dict[str, Any]:
try:
payload = json.loads(path.read_text(encoding="utf-8"))
except (FileNotFoundError, json.JSONDecodeError, OSError):
return {}
return payload if isinstance(payload, dict) else {}
def _normalize_document_entities(payload: dict[str, Any], document_id: str) -> list[str]:
document_payload = payload.get(document_id) if isinstance(payload, dict) else {}
entity_names = (
document_payload.get("entity_names") if isinstance(document_payload, dict) else []
)
if not isinstance(entity_names, list):
return []
return _dedupe_text_items(entity_names)
def _normalize_document_relations(
payload: dict[str, Any], document_id: str
) -> list[dict[str, str]]:
document_payload = payload.get(document_id) if isinstance(payload, dict) else {}
relation_pairs = (
document_payload.get("relation_pairs") if isinstance(document_payload, dict) else []
)
if not isinstance(relation_pairs, list):
return []
relations: list[dict[str, str]] = []
seen: set[tuple[str, str]] = set()
for pair in relation_pairs:
if not isinstance(pair, (list, tuple)) or len(pair) < 2:
continue
source = str(pair[0] or "").strip()
target = str(pair[1] or "").strip()
if not source or not target or (source, target) in seen:
continue
seen.add((source, target))
relations.append({"source": source, "target": target, "type": "关联"})
return relations
def _normalize_document_chunks(payload: dict[str, Any], document_id: str) -> list[dict[str, Any]]:
chunks: list[dict[str, Any]] = []
for chunk_id, raw_chunk in payload.items():
if not isinstance(raw_chunk, dict):
continue
if str(raw_chunk.get("full_doc_id") or "").strip() != document_id:
continue
content = str(raw_chunk.get("content") or "").strip()
chunks.append(
{
"id": str(raw_chunk.get("_id") or chunk_id).strip(),
"order": _to_int(raw_chunk.get("chunk_order_index")),
"tokens": _to_int(raw_chunk.get("tokens")),
"summary": _build_chunk_summary(content),
}
)
return sorted(chunks, key=lambda item: (item["order"], item["id"]))
def _build_chunk_summary(content: str) -> str:
lines = [line.strip() for line in str(content or "").splitlines() if line.strip()]
text = next((line for line in lines if len(line) >= 12), lines[0] if lines else "")
return _truncate_text(text, max_length=MAX_INGEST_LOG_TEXT_PREVIEW)
def _dedupe_text_items(items: list[Any]) -> list[str]:
deduped: list[str] = []
seen: set[str] = set()
for item in items:
text = str(item or "").strip()
if not text or text in seen:
continue
seen.add(text)
deduped.append(text)
return deduped
def _to_int(value: Any) -> int:
try:
return int(value or 0)
except (TypeError, ValueError):
return 0
def _truncate_text(text: str, *, max_length: int) -> str:
normalized = " ".join(str(text or "").split()).strip()
if len(normalized) <= max_length:
return normalized
return f"{normalized[: max_length - 3].rstrip()}..."

View File

@@ -12,24 +12,15 @@ from sqlalchemy.orm import Session
from app.core.config import get_settings
from app.core.logging import get_logger
from app.db.session import get_session_factory
from app.services.knowledge_ingest_log import (
build_document_graph_summary,
build_ingest_document_summary,
build_ingest_status_summary,
)
from app.services.knowledge_rag_runtime import (
DEFAULT_EMBEDDING_TIMEOUT_SECONDS,
DEFAULT_LIGHTRAG_QUERY_MODE,
DEFAULT_LLM_TIMEOUT_SECONDS,
KnowledgeRagError,
RuntimeModelConfig,
_LightRagRuntime,
_build_ali_rerank_request,
_build_azure_deployment_base,
_build_headers,
_ensure_path,
_extract_chat_text,
_extract_embedding_vectors,
_extract_error_message,
_extract_rerank_results,
_normalize_endpoint,
_parse_json_body,
_send_json_request,
)
from app.services.settings import SettingsService
@@ -76,11 +67,9 @@ STRUCTURED_APPENDIX_LEADING_MARKERS = (
"# 结构化表格补充",
)
STRUCTURED_APPENDIX_LEADING_WINDOW = 220
_runtime_lock = threading.RLock()
_runtime_instance: _LightRagRuntime | None = None
_runtime_signature: tuple[Any, ...] | None = None
_runtime_instances: dict[int, _LightRagRuntime] = {}
_runtime_signatures: dict[int, tuple[Any, ...]] = {}
class KnowledgeRagService:
@@ -147,7 +136,11 @@ class KnowledgeRagService:
"query": normalized_query,
"record_count": len(hits),
"hits": hits,
"references": [str(item.get("code") or "").strip() for item in hits if str(item.get("code") or "").strip()],
"references": [
str(item.get("code") or "").strip()
for item in hits
if str(item.get("code") or "").strip()
],
"raw_references": references,
"metadata": raw.get("metadata") if isinstance(raw, dict) else {},
"message": f"已从知识库中检索到 {len(hits)} 条相关内容。",
@@ -172,6 +165,7 @@ class KnowledgeRagService:
)
texts: list[str] = []
file_paths: list[str] = []
document_summaries: list[dict[str, Any]] = []
runtime = self._get_runtime()
existing_statuses = runtime.get_document_statuses(normalized_ids)
@@ -182,12 +176,29 @@ class KnowledgeRagService:
try:
runtime.delete_document(document_id)
except Exception as exc:
logger.warning("Delete existing LightRAG document failed doc_id=%s: %s", document_id, exc)
logger.warning(
"Delete existing LightRAG document failed doc_id=%s: %s", document_id, exc
)
text = knowledge_service.extract_document_text(document_id)
raw_text = text
if normalization_service is not None:
text = normalization_service.build_enriched_text(text)
texts.append(text)
file_paths.append(str((knowledge_service.library_root / entry["folder"] / entry["stored_name"]).resolve()))
file_paths.append(
str(
(
knowledge_service.library_root / entry["folder"] / entry["stored_name"]
).resolve()
)
)
document_summaries.append(
build_ingest_document_summary(
document_id=document_id,
entry=entry,
raw_text=raw_text,
indexed_text=text,
)
)
track_id = runtime.insert_documents(
texts=texts,
@@ -198,10 +209,32 @@ class KnowledgeRagService:
statuses = runtime.get_document_statuses(normalized_ids)
succeeded_document_ids: list[str] = []
failed_documents: list[dict[str, str]] = []
summary_by_id = {
str(item.get("document_id") or "").strip(): item
for item in document_summaries
if str(item.get("document_id") or "").strip()
}
for document_id in normalized_ids:
status_obj = statuses.get(document_id)
status_text = self._status_value(status_obj)
status_payload = self._serialize_status(status_obj)
workspace = (
os.environ.get("LIGHTRAG_WORKSPACE", DEFAULT_LIGHTRAG_WORKSPACE).strip()
or DEFAULT_LIGHTRAG_WORKSPACE
)
graph_summary = build_document_graph_summary(
self.storage_root,
workspace=workspace,
document_id=document_id,
)
if document_id in summary_by_id:
summary_by_id[document_id].update(
build_ingest_status_summary(
status_payload=status_payload,
graph_summary=graph_summary,
)
)
if self.is_query_ready_status(status_obj):
succeeded_document_ids.append(document_id)
continue
@@ -218,13 +251,18 @@ class KnowledgeRagService:
"requested_document_ids": normalized_ids,
"succeeded_document_ids": succeeded_document_ids,
"failed_documents": failed_documents,
"document_summaries": [
summary_by_id.get(document_id, {}) for document_id in normalized_ids
],
"status_snapshot": {
document_id: self._serialize_status(status_obj)
for document_id, status_obj in statuses.items()
},
}
def get_document_status_map(self, document_ids: list[str] | None = None) -> dict[str, dict[str, Any]]:
def get_document_status_map(
self, document_ids: list[str] | None = None
) -> dict[str, dict[str, Any]]:
target_ids = [str(item).strip() for item in document_ids or [] if str(item).strip()]
if not target_ids:
return {}
@@ -248,28 +286,32 @@ class KnowledgeRagService:
logger.warning("Delete LightRAG document ignored doc_id=%s: %s", normalized_id, exc)
def _get_runtime(self) -> _LightRagRuntime:
global _runtime_instance, _runtime_signature
signature, runtime_kwargs = self._build_runtime_signature()
thread_id = threading.get_ident()
with _runtime_lock:
if _runtime_instance is not None and _runtime_signature == signature:
return _runtime_instance
runtime = _runtime_instances.get(thread_id)
if runtime is not None and _runtime_signatures.get(thread_id) == signature:
return runtime
if _runtime_instance is not None:
if runtime is not None:
try:
_runtime_instance.finalize()
runtime.finalize()
except Exception as exc: # pragma: no cover - best effort cleanup
logger.warning("Finalize previous LightRAG runtime failed: %s", exc)
_runtime_instance = _LightRagRuntime(**runtime_kwargs)
_runtime_signature = signature
return _runtime_instance
runtime = _LightRagRuntime(**runtime_kwargs)
_runtime_instances[thread_id] = runtime
_runtime_signatures[thread_id] = signature
return runtime
def _build_runtime_signature(self) -> tuple[tuple[Any, ...], dict[str, Any]]:
configs = self._load_runtime_configs()
settings = get_settings()
working_dir = (self.storage_root / "knowledge" / ".lightrag").resolve()
workspace = os.environ.get("LIGHTRAG_WORKSPACE", DEFAULT_LIGHTRAG_WORKSPACE).strip() or DEFAULT_LIGHTRAG_WORKSPACE
workspace = (
os.environ.get("LIGHTRAG_WORKSPACE", DEFAULT_LIGHTRAG_WORKSPACE).strip()
or DEFAULT_LIGHTRAG_WORKSPACE
)
qdrant_url = os.environ.get("QDRANT_URL", "").strip() or _resolve_default_qdrant_url()
qdrant_api_key = os.environ.get("QDRANT_API_KEY", "").strip()
@@ -318,7 +360,9 @@ class KnowledgeRagService:
try:
settings_service = SettingsService(session)
main = self._normalize_runtime_model(settings_service.get_runtime_model_config("main"))
embedding = self._normalize_runtime_model(settings_service.get_runtime_model_config("embedding"))
embedding = self._normalize_runtime_model(
settings_service.get_runtime_model_config("embedding")
)
try:
backup_raw = settings_service.get_runtime_model_config("backup")
backup = self._normalize_runtime_model(backup_raw)
@@ -405,7 +449,9 @@ class KnowledgeRagService:
document_id, document_name = _parse_document_identity(file_path)
normalized_chunk_id = chunk_id or f"path-{rank}"
normalized_content = _truncate_text(content, max_length=MAX_KNOWLEDGE_HIT_CONTENT_LENGTH)
normalized_content = _truncate_text(
content, max_length=MAX_KNOWLEDGE_HIT_CONTENT_LENGTH
)
excerpt = _build_query_focused_excerpt(
normalized_content,
query_terms=query_terms,
@@ -510,17 +556,14 @@ class KnowledgeRagService:
def shutdown_knowledge_rag_runtime() -> None:
global _runtime_instance, _runtime_signature
with _runtime_lock:
if _runtime_instance is None:
return
try:
_runtime_instance.finalize()
except Exception as exc: # pragma: no cover - best effort cleanup
logger.warning("Finalize LightRAG runtime failed during shutdown: %s", exc)
_runtime_instance = None
_runtime_signature = None
for runtime in list(_runtime_instances.values()):
try:
runtime.finalize()
except Exception as exc: # pragma: no cover - best effort cleanup
logger.warning("Finalize LightRAG runtime failed during shutdown: %s", exc)
_runtime_instances.clear()
_runtime_signatures.clear()
def _parse_document_identity(file_path: str) -> tuple[str, str]:
@@ -551,9 +594,7 @@ def _build_query_focused_excerpt(
lowered = normalized.lower()
match_positions = [
lowered.find(term)
for term in query_terms
if term and lowered.find(term) >= 0
lowered.find(term) for term in query_terms if term and lowered.find(term) >= 0
]
if not match_positions:
return _build_excerpt(normalized, max_length=max_length)
@@ -649,7 +690,9 @@ def _score_knowledge_hit(
elif leading_appendix_marker == "# 重点章节摘录":
score += 4 if matched_terms else -12
elif leading_appendix_marker == "# 问答线索补充":
score += 8 if matched_terms and not prefers_tabular_evidence else 2 if matched_terms else -20
score += (
8 if matched_terms and not prefers_tabular_evidence else 2 if matched_terms else -20
)
elif leading_appendix_marker == "# 结构化表格补充":
if prefers_tabular_evidence and matched_terms:
score += 16
@@ -666,7 +709,11 @@ def _score_knowledge_hit(
score += 4
if matched_terms and any(marker in content for marker in ("附表", "", "")):
score += 4
if not prefers_tabular_evidence and matched_terms and any(marker in content for marker in ("", "", "", "-", "")):
if (
not prefers_tabular_evidence
and matched_terms
and any(marker in content for marker in ("", "", "", "-", ""))
):
score += 4
if title and any(term in title for term in query_terms):
score += 6

View File

@@ -170,6 +170,7 @@ class SemanticOntologyService(
entities = self._merge_entities(
entities,
model_parse.entity_hints if model_parse is not None else [],
compact_query,
)
intent = self._resolve_intent(
compact_query,
@@ -193,6 +194,11 @@ class SemanticOntologyService(
context_json=context_json,
)
)
missing_slots = self._filter_expense_missing_slots(
compact_query=compact_query,
entities=entities,
missing_slots=missing_slots,
)
relax_knowledge_follow_up = self._should_relax_knowledge_follow_up_clarification(
compact_query=compact_query,
scenario=scenario,
@@ -306,6 +312,45 @@ class SemanticOntologyService(
follow_up_markers = ("", "那么", "这个", "这种", "", "的话", "p", "P")
return any(marker in compact_query for marker in follow_up_markers)
@staticmethod
def _filter_expense_missing_slots(
*,
compact_query: str,
entities: list[object],
missing_slots: list[str],
) -> list[str]:
expense_types = {
str(getattr(item, "normalized_value", "") or getattr(item, "value", "") or "").strip()
for item in entities
if getattr(item, "type", "") == "expense_type"
}
has_transport = "transport" in expense_types
has_entertainment = "entertainment" in expense_types
explicit_entertainment = any(
keyword in compact_query
for keyword in (
"业务招待",
"招待费",
"招待",
"宴请",
"请客",
"请客户吃饭",
"客户吃饭",
"客户用餐",
"客户餐",
"商务接待",
"商务宴请",
"接待餐",
)
)
if has_transport and not has_entertainment and not explicit_entertainment:
return [
item
for item in missing_slots
if item not in {"customer_name", "participants"}
]
return missing_slots
def _record_semantic_parse(
self,
*,

View File

@@ -37,6 +37,39 @@ from app.services.ontology_rules import (
logger = get_logger("app.services.ontology")
TRANSPORT_EXPENSE_OVERRIDE_KEYWORDS = (
"打车",
"网约车",
"出租车票",
"出租车",
"的士票",
"的士",
"滴滴",
"市内交通",
"乘车",
"乘车费",
"用车",
"叫车",
"车费",
"车资",
"机场",
)
EXPLICIT_ENTERTAINMENT_KEYWORDS = (
"业务招待",
"招待费",
"招待",
"宴请",
"请客",
"请客户吃饭",
"客户吃饭",
"客户用餐",
"客户餐",
"商务接待",
"商务宴请",
"接待餐",
)
class OntologyDetectionMixin:
def _detect_scenario(self, compact_query: str) -> tuple[str, float]:
scores = {key: 0.0 for key in SCENARIO_KEYWORDS}
@@ -337,6 +370,9 @@ class OntologyDetectionMixin:
"出现“客户”不等于应收,出现“供应商”不等于应付,必须结合动作词和业务目标判断。"
"只有明确查询、统计、列出、多少、明细、对比时才优先使用 query 或 compare。"
"附件名称和 OCR 摘要只作为辅助证据,不能编造未出现的事实。"
"如果用户明确提到打车、的士票、出租车票、网约车、乘车费、车费等交通票据,"
"即使句子里出现“客户”,也必须优先识别为 transport不要推断为 entertainment。"
"不要输出用户原文未出现、且与规则候选冲突的费用类型。"
"信息不足时 clarification_required=true并给出一句简短中文追问。"
"missing_slots 使用简短 snake_case例如 expense_type, amount, "
"customer_name, participants, attachments。"
@@ -351,12 +387,12 @@ class OntologyDetectionMixin:
' "intent": "draft",\n'
' "confidence": 0.88,\n'
' "clarification_required": true,\n'
' "clarification_question": "请补充客户单位、参与人员和票据附件。",\n'
' "missing_slots": ["customer_name", "participants", "attachments"],\n'
' "clarification_question": "请补充发生时间、金额和票据附件。",\n'
' "missing_slots": ["time_range", "amount", "attachments"],\n'
' "ambiguity": [],\n'
' "entity_hints": [\n'
' {"type": "expense_type", "value": "招待", '
'"normalized_value": "entertainment", "role": "filter", '
' {"type": "expense_type", "value": "交通费", '
'"normalized_value": "transport", "role": "filter", '
'"confidence": 0.86}\n'
" ]\n"
"}"
@@ -432,6 +468,7 @@ class OntologyDetectionMixin:
def _merge_entities(
base_entities: list[OntologyEntity],
entity_hints: list[LlmOntologyEntityHint],
compact_query: str = "",
) -> list[OntologyEntity]:
merged: dict[tuple[str, str], OntologyEntity] = {
(item.type, item.normalized_value): item for item in base_entities
@@ -454,7 +491,36 @@ class OntologyDetectionMixin:
if existing is None or existing.confidence < candidate.confidence:
merged[key] = candidate
return list(merged.values())
items = list(merged.values())
if OntologyDetectionMixin._should_transport_override_entertainment(
compact_query,
items,
):
items = [
item
for item in items
if not (
item.type == "expense_type"
and item.normalized_value == "entertainment"
)
]
return items
@staticmethod
def _should_transport_override_entertainment(
compact_query: str,
entities: list[OntologyEntity],
) -> bool:
expense_types = {
str(item.normalized_value or item.value or "").strip()
for item in entities
if item.type == "expense_type"
}
if not {"transport", "entertainment"}.issubset(expense_types):
return False
if not any(keyword in compact_query for keyword in TRANSPORT_EXPENSE_OVERRIDE_KEYWORDS):
return False
return not any(keyword in compact_query for keyword in EXPLICIT_ENTERTAINMENT_KEYWORDS)
@staticmethod
def _normalize_short_text_list(values: list[str]) -> list[str]:

View File

@@ -59,11 +59,16 @@ class OntologyExtractionMixin:
missing_slots.append("attachments")
return missing_slots
if any(
has_entertainment_type = any(
item.normalized_value == "entertainment"
for item in entities
if item.type == "expense_type"
):
)
has_explicit_entertainment_text = "客户" in compact_query and any(
keyword in compact_query
for keyword in ("招待", "接待", "吃饭", "用餐", "宴请", "请客", "客户餐")
)
if has_entertainment_type or has_explicit_entertainment_text:
if "customer" not in entity_types:
missing_slots.append("customer_name")
missing_slots.append("participants")
@@ -171,14 +176,14 @@ class OntologyExtractionMixin:
upsert(self._make_entity("expense_type", label, normalized, role="filter"))
has_customer_entertainment_signal = "客户" in query and any(
keyword in query for keyword in ("吃饭", "用餐", "餐饮", "宴请", "请客", "招待")
keyword in query for keyword in ("吃饭", "用餐", "餐饮", "宴请", "请客", "招待", "接待")
)
if has_customer_entertainment_signal:
upsert(
self._make_entity(
"expense_type",
"客户招待",
"entertainment",
"业务招待",
"meal",
role="filter",
confidence=0.96,
)
@@ -189,46 +194,52 @@ class OntologyExtractionMixin:
for keyword in (
"打车",
"网约车",
"出租车",
"出租车票",
"出租车",
"车费",
"乘车",
"用车",
"叫车",
"车资",
"的士",
"的士票",
"的士",
"滴滴",
"市内交通",
"地铁",
"公交",
"停车费",
"过路费",
"通行费",
"高速费",
)
):
upsert(self._make_entity("expense_type", "交通", "transport", role="filter", confidence=0.9))
if any(keyword in query for keyword in ("出差", "机票", "火车", "高铁", "行程单")):
if any(keyword in query for keyword in ("出差", "机票", "飞机票", "航班", "火车票", "火车", "高铁票", "高铁", "动车", "行程单")):
upsert(self._make_entity("expense_type", "差旅", "travel", role="filter", confidence=0.88))
if any(keyword in query for keyword in ("酒店", "住宿", "宾馆")):
if any(keyword in query for keyword in ("酒店", "酒店发票", "住宿", "住宿费", "宾馆", "民宿", "房费", "客房")):
upsert(self._make_entity("expense_type", "住宿", "hotel", role="filter", confidence=0.86))
if (
not has_customer_entertainment_signal
and any(keyword in query for keyword in ("餐费", "用餐", "午餐", "晚餐", "早餐", "餐饮"))
):
upsert(self._make_entity("expense_type", "", "meal", role="filter", confidence=0.84))
upsert(self._make_entity("expense_type", "业务招待", "meal", role="filter", confidence=0.84))
if any(
keyword in query
for keyword in ("办公用品", "文具", "耗材", "办公耗材", "打印纸", "办公设备", "键盘", "鼠标", "白板")
for keyword in ("办公用品", "文具", "耗材", "办公耗材", "打印纸", "办公设备", "键盘", "鼠标", "白板", "硒鼓", "墨盒")
):
upsert(self._make_entity("expense_type", "办公费", "office", role="filter", confidence=0.87))
upsert(self._make_entity("expense_type", "办公用品", "office", role="filter", confidence=0.87))
if any(keyword in query for keyword in ("培训", "讲师费", "课时费", "课程费")):
if any(keyword in query for keyword in ("培训", "讲师费", "课时费", "课程费", "教材", "认证费", "考试费")):
upsert(self._make_entity("expense_type", "培训费", "training", role="filter", confidence=0.84))
if any(keyword in query for keyword in ("通讯费", "话费", "流量费", "宽带费")):
if any(keyword in query for keyword in ("通讯费", "话费", "电话费", "手机费", "流量费", "宽带费", "网络费")):
upsert(self._make_entity("expense_type", "通讯费", "communication", role="filter", confidence=0.84))
if any(keyword in query for keyword in ("福利费", "团建", "慰问", "节日福利", "体检费")):
if any(keyword in query for keyword in ("福利费", "团建", "慰问", "节日福利", "体检费", "员工关怀")):
upsert(self._make_entity("expense_type", "福利费", "welfare", role="filter", confidence=0.84))
for amount in self._extract_amount_entities(query):

View File

@@ -6,6 +6,7 @@ from dataclasses import dataclass
from pydantic import BaseModel, ConfigDict, Field
from app.schemas.ontology import OntologyIntent, OntologyScenario
from app.services.expense_type_keywords import build_expense_type_keyword_map
DATE_RANGE_PATTERN = re.compile(
r"(?P<start>\d{4}-\d{1,2}-\d{1,2})\s*(?:到|至|~|-)\s*(?P<end>\d{4}-\d{1,2}-\d{1,2})"
@@ -128,44 +129,7 @@ OPERATE_KEYWORDS = (
"删除",
)
EXPENSE_TYPE_KEYWORDS = {
"差旅": "travel",
"出差": "travel",
"住宿": "hotel",
"酒店": "hotel",
"交通": "transport",
"打车": "transport",
"网约车": "transport",
"出租车": "transport",
"出租车票": "transport",
"乘车": "transport",
"乘车费": "transport",
"用车": "transport",
"叫车": "transport",
"车资": "transport",
"的士": "transport",
"的士票": "transport",
"停车费": "transport",
"餐费": "meal",
"用餐": "meal",
"会务": "meeting",
"招待费": "entertainment",
"招待": "entertainment",
"宴请": "entertainment",
"办公费": "office",
"办公用品": "office",
"文具": "office",
"耗材": "office",
"办公耗材": "office",
"打印纸": "office",
"办公设备": "office",
"培训费": "training",
"培训": "training",
"通讯费": "communication",
"话费": "communication",
"福利费": "welfare",
"团建": "welfare",
}
EXPENSE_TYPE_KEYWORDS = build_expense_type_keyword_map()
EXPENSE_NARRATIVE_KEYWORDS = (
"报销",

View File

@@ -74,16 +74,16 @@ EXPENSE_RISK_LEVEL_LABELS = {
"medium": "中风险",
"warning": "中风险",
"low": "低风险",
"info": "低风险",
"info": "提示",
}
EXPENSE_TYPE_LABELS = {
"travel": "差旅费",
"hotel": "住宿费",
"transport": "交通费",
"meal": "",
"meal": "业务招待",
"meeting": "会务费",
"entertainment": "业务招待费",
"office": "办公费",
"office": "办公用品",
"training": "培训费",
"communication": "通讯费",
"welfare": "福利费",

View File

@@ -35,10 +35,10 @@ EXPENSE_TYPE_LABELS = {
"travel": "差旅费",
"hotel": "住宿费",
"transport": "交通费",
"meal": "",
"meal": "业务招待",
"meeting": "会务费",
"entertainment": "业务招待费",
"office": "办公费",
"office": "办公用品",
"training": "培训费",
"communication": "通讯费",
"welfare": "福利费",
@@ -48,10 +48,10 @@ EXPENSE_TYPE_LABELS = {
GROUP_SCENE_LABELS = {
"travel": "差旅费",
"entertainment": "业务招待费",
"meal": "伙食",
"meal": "业务招待",
"transport": "交通费",
"hotel": "住宿费",
"office": "办公费",
"office": "办公用品",
"training": "培训费",
"communication": "通讯费",
"welfare": "福利费",
@@ -62,8 +62,12 @@ EXPENSE_SCENE_SELECTION_OPTIONS = (
("travel", "差旅费", "出差、长途交通、住宿、差旅补贴等场景。"),
("transport", "交通费", "市内打车、停车、过路费等日常交通场景。"),
("hotel", "住宿费", "单独住宿、酒店发票等场景。"),
("entertainment", "业务招待费", "客户接待、宴请、招待等场景。"),
("office", "办公", "办公用品、耗材、办公设备等采购场景。"),
("meal", "业务招待费", "客户接待、工作餐、加班餐、餐饮票据等场景。"),
("meeting", "会务", "会议、论坛、会场、参会等场景。"),
("office", "办公用品费", "办公用品、耗材、办公设备等采购场景。"),
("training", "培训费", "培训课程、讲师费、教材、认证等场景。"),
("communication", "通讯费", "话费、流量、宽带、网络等场景。"),
("welfare", "福利费", "团建、体检、慰问、节日福利等场景。"),
("other", "其他费用", "暂不属于以上分类的报销场景。"),
)
@@ -130,10 +134,10 @@ INFERRED_REASON_LABELS = {
"travel": "出差行程",
"hotel": "住宿报销",
"transport": "交通出行",
"meal": "餐饮用餐",
"meal": "业务招待",
"meeting": "会务活动",
"entertainment": "客户接待",
"office": "办公采购",
"office": "办公用品采购",
"training": "培训学习",
"communication": "通讯使用",
"welfare": "员工福利",

View File

@@ -9,16 +9,32 @@ from app.schemas.user_agent import UserAgentRequest, UserAgentReviewDocumentCard
DEFAULT_GROUP_SCENE_LABELS = {
"travel": "差旅费",
"entertainment": "业务招待费",
"meal": "伙食",
"meal": "业务招待",
"transport": "交通费",
"hotel": "住宿费",
"office": "办公费",
"office": "办公用品",
"training": "培训费",
"communication": "通讯费",
"welfare": "福利费",
"other": "其他费用",
}
DOCUMENT_SCENE_LABELS = {
"flight_itinerary": "机票/航班行程单",
"train_ticket": "火车/高铁票",
"ship_ticket": "轮船票",
"travel_ticket": "交通出行票据",
"hotel_invoice": "酒店住宿票据",
"taxi_receipt": "出租车/网约车票据",
"transport_receipt": "乘车票据",
"parking_toll_receipt": "停车/通行费票据",
"meal_receipt": "餐饮发票",
"office_invoice": "文具/办公用品发票",
"meeting_invoice": "会议/会务票据",
"training_invoice": "培训票据",
"other": "其他票据",
}
DOCUMENT_DATE_TEXT_PATTERN = re.compile(
r"(\d{4}[年/-]\d{1,2}[月/-]\d{1,2}日?(?:\s*[T ]?\s*(?:[01]?\d|2[0-3])[:][0-5]\d)?)"
)
@@ -48,55 +64,55 @@ class UserAgentDocumentService:
provided_type = str(item.get("document_type") or "").strip().lower()
normalized_expense_type = str(expense_type_code or "").strip().lower()
if provided_type:
if provided_type in {"flight_itinerary", "train_ticket"}:
if provided_type in {"flight_itinerary", "train_ticket", "ship_ticket"}:
return {
"document_type": provided_type,
"expense_type": "travel",
"group_code": "travel",
"scene_label": "差旅票据",
"scene_label": DOCUMENT_SCENE_LABELS.get(provided_type, "交通出行票据"),
}
if provided_type == "hotel_invoice":
return {
"document_type": provided_type,
"expense_type": "hotel",
"group_code": "travel",
"scene_label": "住宿票据",
"scene_label": DOCUMENT_SCENE_LABELS["hotel_invoice"],
}
if provided_type in {"taxi_receipt", "parking_toll_receipt"}:
if provided_type in {"taxi_receipt", "transport_receipt", "parking_toll_receipt"}:
return {
"document_type": provided_type,
"expense_type": "transport",
"group_code": "travel",
"scene_label": "交通票据",
"scene_label": DOCUMENT_SCENE_LABELS.get(provided_type, "乘车票据"),
}
if provided_type == "meal_receipt":
group_code = "entertainment" if normalized_expense_type == "entertainment" or has_customer else "meal"
group_code = "meal"
return {
"document_type": provided_type,
"expense_type": group_code,
"group_code": group_code,
"scene_label": "餐饮票据",
"scene_label": DOCUMENT_SCENE_LABELS["meal_receipt"],
}
if provided_type == "office_invoice":
return {
"document_type": provided_type,
"expense_type": "office",
"group_code": "office",
"scene_label": "办公用品票据",
"scene_label": DOCUMENT_SCENE_LABELS["office_invoice"],
}
if provided_type == "meeting_invoice":
return {
"document_type": provided_type,
"expense_type": "meeting",
"group_code": "meeting",
"scene_label": "会务票据",
"scene_label": DOCUMENT_SCENE_LABELS["meeting_invoice"],
}
if provided_type == "training_invoice":
return {
"document_type": provided_type,
"expense_type": "training",
"group_code": "training",
"scene_label": "培训票据",
"scene_label": DOCUMENT_SCENE_LABELS["training_invoice"],
}
text = " ".join(
@@ -108,41 +124,69 @@ class UserAgentDocumentService:
).lower()
compact = text.replace(" ", "")
if any(keyword in compact for keyword in ("机票", "航班", "", "", "行程单")):
if any(keyword in compact for keyword in ("火车", "高铁", "", "", "车次")):
return {
"document_type": "travel_ticket",
"document_type": "train_ticket",
"expense_type": "travel",
"group_code": "travel",
"scene_label": "差旅票据",
"scene_label": DOCUMENT_SCENE_LABELS["train_ticket"],
}
if any(keyword in compact for keyword in ("过路费", "停车", "通行费", "收费站")):
return {
"document_type": "parking_toll_receipt",
"expense_type": "transport",
"group_code": "travel",
"scene_label": DOCUMENT_SCENE_LABELS["parking_toll_receipt"],
}
if any(keyword in compact for keyword in ("打车", "出租车", "滴滴", "网约车", "叫车", "车费", "车资", "的士")):
return {
"document_type": "taxi_receipt",
"expense_type": "transport",
"group_code": "travel",
"scene_label": DOCUMENT_SCENE_LABELS["taxi_receipt"],
}
if any(keyword in compact for keyword in ("乘车", "用车")):
return {
"document_type": "transport_receipt",
"expense_type": "transport",
"group_code": "travel",
"scene_label": DOCUMENT_SCENE_LABELS["transport_receipt"],
}
if any(keyword in compact for keyword in ("机票", "航班", "登机", "航空", "客票")):
return {
"document_type": "flight_itinerary",
"expense_type": "travel",
"group_code": "travel",
"scene_label": DOCUMENT_SCENE_LABELS["flight_itinerary"],
}
if any(keyword in compact for keyword in ("轮船", "船票", "客轮", "渡轮", "航运")):
return {
"document_type": "ship_ticket",
"expense_type": "travel",
"group_code": "travel",
"scene_label": DOCUMENT_SCENE_LABELS["ship_ticket"],
}
if any(keyword in compact for keyword in ("酒店", "住宿", "宾馆")):
return {
"document_type": "hotel_invoice",
"expense_type": "hotel",
"group_code": "travel",
"scene_label": "住宿票据",
}
if any(keyword in compact for keyword in ("打车", "出租车", "滴滴", "网约车", "乘车", "用车", "叫车", "车费", "车资", "的士", "过路费", "停车")):
return {
"document_type": "transport_receipt",
"expense_type": "transport",
"group_code": "travel",
"scene_label": "交通票据",
"scene_label": DOCUMENT_SCENE_LABELS["hotel_invoice"],
}
if any(keyword in compact for keyword in ("", "饭店", "酒楼", "酒家", "餐饮", "meal")):
group_code = "entertainment" if normalized_expense_type == "entertainment" or has_customer else "meal"
group_code = "meal"
return {
"document_type": "meal_receipt",
"expense_type": group_code,
"group_code": group_code,
"scene_label": "餐饮票据",
"scene_label": DOCUMENT_SCENE_LABELS["meal_receipt"],
}
if any(keyword in compact for keyword in ("办公用品", "文具", "耗材", "办公耗材", "打印纸", "键盘", "鼠标", "白板", "墨盒", "硒鼓")):
return {
"document_type": "other",
"document_type": "office_invoice",
"expense_type": "office",
"group_code": "office",
"scene_label": "办公用品票据",
"scene_label": DOCUMENT_SCENE_LABELS["office_invoice"],
}
return {
"document_type": "other",

View File

@@ -314,10 +314,7 @@ class UserAgentReviewCoreMixin:
filename=str(item.get("filename") or f"document-{index}"),
document_type=classified["document_type"],
suggested_expense_type=classified["expense_type"],
scene_label=GROUP_SCENE_LABELS.get(
classified["group_code"],
classified["scene_label"],
),
scene_label=self._resolve_review_document_scene_label(item, classified),
summary=str(item.get("summary") or item.get("text") or "").strip(),
avg_score=float(item.get("avg_score") or 0.0),
preview_kind=str(item.get("preview_kind") or "").strip(),
@@ -338,6 +335,25 @@ class UserAgentReviewCoreMixin:
return cards
@staticmethod
def _resolve_review_document_scene_label(item: dict[str, object], classified: dict[str, str]) -> str:
provided_label = str(item.get("document_type_label") or "").strip()
if provided_label and provided_label != "其他单据":
return provided_label
classified_scene_label = str(classified.get("scene_label") or "").strip()
if classified_scene_label:
return classified_scene_label
document_type = str(classified.get("document_type") or item.get("document_type") or "").strip()
document_type_label = resolve_document_type_label(document_type)
if document_type_label and document_type_label not in {"其他单据", document_type}:
return document_type_label
scene_label = str(item.get("scene_label") or "").strip()
return scene_label or "其他票据"
def _build_review_claim_groups(
self,
payload: UserAgentRequest,

View File

@@ -59,6 +59,20 @@ class UserAgentReviewProfileMixin:
manager_name = self._resolve_manager_name(employee)
reason = slot_map.get("reason").value if slot_map.get("reason") else ""
attachments = "".join(self._resolve_attachment_names(payload))
expense_type_code = str(slot_map.get("expense_type").normalized_value if slot_map.get("expense_type") else "").strip()
customer_name = str(slot_map.get("customer_name").value if slot_map.get("customer_name") else "").strip()
merchant_name = str(slot_map.get("merchant_name").value if slot_map.get("merchant_name") else "").strip()
participants = str(slot_map.get("participants").value if slot_map.get("participants") else "").strip()
customer_slot = slot_map.get("customer_name")
participants_slot = slot_map.get("participants")
customer_required = bool(
customer_slot
and (customer_slot.required or customer_slot.status == "missing")
)
participants_required = bool(
participants_slot
and (participants_slot.required or participants_slot.status == "missing")
)
fields = [
UserAgentReviewEditField(
@@ -98,13 +112,20 @@ class UserAgentReviewProfileMixin:
required=False,
group="basic",
),
UserAgentReviewEditField(
key="customer_name",
label="客户名称",
value=slot_map.get("customer_name").value if slot_map.get("customer_name") else "",
placeholder="请输入客户名称",
group="business",
),
]
if expense_type_code == "entertainment" or customer_required or customer_name:
fields.append(
UserAgentReviewEditField(
key="customer_name",
label="客户名称",
value=customer_name,
placeholder="请输入客户名称",
group="business",
)
)
fields.append(
UserAgentReviewEditField(
key="business_location",
label="业务地点",
@@ -112,15 +133,22 @@ class UserAgentReviewProfileMixin:
placeholder="例如:北京 / 客户现场",
required=False,
group="business",
),
UserAgentReviewEditField(
key="merchant_name",
label="酒店/商户",
value=slot_map.get("merchant_name").value if slot_map.get("merchant_name") else "",
placeholder="请输入酒店或商户名称",
required=False,
group="business",
),
)
)
if expense_type_code == "hotel" or merchant_name:
fields.append(
UserAgentReviewEditField(
key="merchant_name",
label="酒店/商户",
value=merchant_name,
placeholder="请输入酒店或商户名称",
required=False,
group="business",
)
)
fields.extend([
UserAgentReviewEditField(
key="amount",
label="金额",
@@ -128,13 +156,20 @@ class UserAgentReviewProfileMixin:
placeholder="例如200.00元",
group="business",
),
UserAgentReviewEditField(
key="participants",
label="参与人员",
value=slot_map.get("participants").value if slot_map.get("participants") else "",
placeholder="例如:客户 2 人,我方 1 人",
group="business",
),
])
if expense_type_code == "entertainment" or participants_required or participants:
fields.append(
UserAgentReviewEditField(
key="participants",
label="参与人员",
value=participants,
placeholder="例如:客户 2 人,我方 1 人",
group="business",
)
)
fields.extend([
UserAgentReviewEditField(
key="reason",
label="事由",
@@ -152,7 +187,7 @@ class UserAgentReviewProfileMixin:
field_type="textarea",
group="attachments",
),
]
])
return fields

View File

@@ -37,6 +37,7 @@ from app.services.expense_claims import ExpenseClaimService
from app.services.expense_rule_runtime import ExpenseRuleRuntimeService, RuntimeTravelPolicy, resolve_document_type_label
from app.services.risk_ontology_bridge import resolve_rule_codes_for_risk_check
from app.services.travel_reimbursement_calculator import TravelReimbursementCalculatorService
from app.services.expense_type_keywords import resolve_expense_type_label_from_text
from app.services.user_agent_constants import *
@@ -568,27 +569,9 @@ class UserAgentReviewSlotMixin:
@staticmethod
def _normalize_expense_type_input(value: str) -> tuple[str, str]:
compact = str(value or "").replace(" ", "")
if "招待" in compact or ("客户" in compact and any(keyword in compact for keyword in ("吃饭", "用餐", "宴请", "请客"))):
return "entertainment", "业务招待费"
if any(keyword in compact for keyword in ("差旅", "出差", "机票", "行程")):
return "travel", "差旅费"
if any(keyword in compact for keyword in ("住宿", "酒店", "宾馆")):
return "hotel", "住宿费"
if any(keyword in compact for keyword in ("交通", "打车", "网约车", "出租车", "乘车", "用车", "叫车", "车费", "车资", "的士", "停车")):
return "transport", "交通费"
if any(keyword in compact for keyword in ("餐费", "用餐", "午餐", "晚餐", "早餐", "伙食")):
return "meal", "餐费"
if "会务" in compact:
return "meeting", "会务费"
if any(keyword in compact for keyword in ("办公费", "办公用品", "文具", "耗材", "办公耗材", "打印纸", "办公设备", "键盘", "鼠标", "白板")):
return "office", "办公费"
if any(keyword in compact for keyword in ("培训费", "培训", "讲师费", "课时费", "课程费")):
return "training", "培训费"
if any(keyword in compact for keyword in ("通讯费", "话费", "流量费", "宽带费")):
return "communication", "通讯费"
if any(keyword in compact for keyword in ("福利费", "团建", "慰问", "节日福利", "体检费")):
return "welfare", "福利费"
resolved = resolve_expense_type_label_from_text(value)
if resolved is not None:
return resolved
return "other", str(value or "").strip() or "其他费用"

View File

@@ -137,14 +137,13 @@ class UserAgentReviewTravelPolicyMixin:
continue
night_count = self._extract_review_hotel_night_count(card)
nightly_amount = (amount / Decimal(max(night_count, 1))).quantize(Decimal("0.01"))
if nightly_amount <= cap:
continue
amount_measurement_lines.append(
f"{card.filename}:识别为{document_type_label},金额 {amount:.2f} 元,"
f"{night_count} 晚折算 {nightly_amount:.2f} 元/晚;"
f"适用标准为 {band_label}{city_tier_label} {cap:.2f} 元/晚,"
f"{'超出标准' if nightly_amount > cap else '测算通过'}"
f"适用标准为 {band_label}{city_tier_label} {cap:.2f} 元/晚,超出标准。"
)
if nightly_amount <= cap:
continue
basis = (
f"依据《{standard_rule_name}》({standard_rule_version}{band_label}{city_tier_label}"
@@ -200,12 +199,11 @@ class UserAgentReviewTravelPolicyMixin:
)
continue
amount_measurement_lines.append(
f"{card.filename}:识别为{document_type_label},金额 {amount:.2f} 元;"
f"适用《{standard_rule_name}{region_label}伙食补助标准 {standard_amount:.2f} 元/天,"
f"{'超出标准' if amount > standard_amount else '测算通过'}"
)
if amount > standard_amount:
amount_measurement_lines.append(
f"{card.filename}:识别为{document_type_label},金额 {amount:.2f} 元;"
f"适用《{standard_rule_name}{region_label}伙食补助标准 {standard_amount:.2f} 元/天,超出标准。"
)
append_once(
f"travel-meal-allowance-over-limit-{card.index}",
UserAgentReviewRiskBrief(
@@ -251,13 +249,6 @@ class UserAgentReviewTravelPolicyMixin:
)
continue
if standard_amount is not None:
amount_measurement_lines.append(
f"{card.filename}:识别为{document_type_label},金额 {amount:.2f} 元;"
f"适用《{scene_policy.rule_name}{metric_label}标准 {standard_amount:.2f} 元,"
f"{'超出标准' if amount > standard_amount else '测算通过'}"
)
amount_risk = self._evaluate_review_scene_amount(
amount=amount,
limit_config=scene_limit,
@@ -265,6 +256,11 @@ class UserAgentReviewTravelPolicyMixin:
)
if amount_risk is not None:
severity, threshold = amount_risk
if standard_amount is not None:
amount_measurement_lines.append(
f"{card.filename}:识别为{document_type_label},金额 {amount:.2f} 元;"
f"适用《{scene_policy.rule_name}{metric_label}标准 {standard_amount:.2f} 元,超出标准。"
)
append_once(
f"{scene_code}-amount-over-limit-{card.index}",
UserAgentReviewRiskBrief(
@@ -348,11 +344,11 @@ class UserAgentReviewTravelPolicyMixin:
briefs.insert(
0,
UserAgentReviewRiskBrief(
title="附件金额测算结果",
level="info",
content="系统根据首轮上传附件识别金额,并匹配当前可执行的报销标准进行测算",
title="附件金额测算异常",
level="warning",
content="系统根据首轮上传附件识别金额后,发现有需要进一步核查或说明的测算结果",
detail="".join(dict.fromkeys(amount_measurement_lines)),
suggestion="如测算结果超标,请补充超标说明、调整金额或更正票据类型后再继续。",
suggestion="请补充超标说明、调整金额或更正票据类型后再继续。",
),
)

View File

@@ -15,7 +15,7 @@
"uploaded_by": "admin",
"version_number": 1,
"ingest_status": 1,
"ingest_status_updated_at": "2026-05-22T07:04:12.388160+00:00",
"ingest_status_updated_at": "2026-05-22T15:12:34.420412+00:00",
"ingest_completed_at": "2026-05-17T10:01:33.272539+00:00",
"ingest_document_name": "远光《公司支出管理办法2024》.pdf",
"ingest_document_updated_at": "2026-05-17T09:28:28.999515+00:00",
@@ -36,12 +36,12 @@
"uploaded_by": "系统导入",
"version_number": 1,
"ingest_status": 1,
"ingest_status_updated_at": "2026-05-22T07:03:57.851719+00:00",
"ingest_completed_at": "",
"ingest_document_name": "",
"ingest_document_updated_at": "",
"ingest_status_updated_at": "2026-05-22T15:12:34.423374+00:00",
"ingest_completed_at": "2026-05-22T09:22:26.072669+00:00",
"ingest_document_name": "远光软件会计科目使用说明.xlsx",
"ingest_document_updated_at": "2026-05-22T07:00:22.328877+00:00",
"ingest_document_sha256": "",
"ingest_agent_run_id": ""
"ingest_agent_run_id": "run_8c1ab050c9734d96"
},
{
"id": "b0277cd76034437997fbf5219662725a",
@@ -57,12 +57,12 @@
"uploaded_by": "系统导入",
"version_number": 1,
"ingest_status": 1,
"ingest_status_updated_at": "2026-05-22T07:03:57.861469+00:00",
"ingest_completed_at": "",
"ingest_document_name": "",
"ingest_document_updated_at": "",
"ingest_status_updated_at": "2026-05-22T15:12:34.426517+00:00",
"ingest_completed_at": "2026-05-22T09:22:52.729264+00:00",
"ingest_document_name": "远光软件财务基础知识手册.docx",
"ingest_document_updated_at": "2026-05-22T07:00:22.011016+00:00",
"ingest_document_sha256": "",
"ingest_agent_run_id": ""
"ingest_agent_run_id": "run_8c1ab050c9734d96"
},
{
"id": "23f56f159a3e4bc3b2338056544120dd",
@@ -78,12 +78,12 @@
"uploaded_by": "系统导入",
"version_number": 1,
"ingest_status": 1,
"ingest_status_updated_at": "2026-05-22T07:03:57.870777+00:00",
"ingest_completed_at": "",
"ingest_document_name": "",
"ingest_document_updated_at": "",
"ingest_status_updated_at": "2026-05-22T15:12:34.429968+00:00",
"ingest_completed_at": "2026-05-22T09:22:58.498888+00:00",
"ingest_document_name": "远光软件财务术语解释手册.docx",
"ingest_document_updated_at": "2026-05-22T07:00:22.352133+00:00",
"ingest_document_sha256": "",
"ingest_agent_run_id": ""
"ingest_agent_run_id": "run_8c1ab050c9734d96"
},
{
"id": "09fbcae74d3b41e498a47e05b45262cb",
@@ -99,12 +99,12 @@
"uploaded_by": "系统导入",
"version_number": 1,
"ingest_status": 1,
"ingest_status_updated_at": "2026-05-22T07:03:57.879239+00:00",
"ingest_completed_at": "",
"ingest_document_name": "",
"ingest_document_updated_at": "",
"ingest_status_updated_at": "2026-05-22T15:12:34.433141+00:00",
"ingest_completed_at": "2026-05-22T09:24:19.530985+00:00",
"ingest_document_name": "远光软件高新技术企业税收优惠政策汇总.pdf",
"ingest_document_updated_at": "2026-05-22T07:00:22.304623+00:00",
"ingest_document_sha256": "",
"ingest_agent_run_id": ""
"ingest_agent_run_id": "run_8c1ab050c9734d96"
},
{
"id": "5fb3c63fbfe244a280cf3316a20150cd",

View File

@@ -63,5 +63,75 @@
"original_doc_id": "a8f8465df08e455ebe133351721d49f8",
"original_track_id": "insert_20260519_155957_88c49850"
}
},
"c7601043d9944ef2bcf4d3f67ed253f7": {
"status": "processed",
"chunks_count": 2,
"chunks_list": [
"chunk-31ff57cf79d009c378478f065eda9d4d",
"chunk-e726f44fb0287c5192cf61b350f18abb"
],
"content_summary": "# Excel 工作簿:远光软件会计科目使用说明.xlsx\n\n## 工作表 1会计科目说明\n\n| 远光软件股份有限公司常用会计科目使用说明 | 列2 | 列3 | 列4 | 列5 |\n| --- | --- | --- | --- | --- |\n| 科目编码 | 科目名称 | 科目类别 | 使用说明 | 备注 |\n| 1001 | 库存现金 | 资产类 | 核算公司库存现金 | 日清月结 |\n| 1002 | 银行存款 | 资产类 | 核算存入银行的各项存款 | 按开户行明细 |\n| 112...",
"content_length": 2808,
"created_at": "2026-05-22T09:21:01.230400+00:00",
"updated_at": "2026-05-22T09:22:25.565409+00:00",
"file_path": "/app/server/storage/knowledge/财务知识库/远光软件会计科目使用说明.xlsx",
"track_id": "insert_20260522_092101_e754a15e",
"metadata": {
"processing_start_time": 1779441661,
"processing_end_time": 1779441745
}
},
"b0277cd76034437997fbf5219662725a": {
"status": "processed",
"chunks_count": 1,
"chunks_list": [
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
],
"content_summary": "远光软件股份有限公司\n财务基础知识手册\n第一部分 会计基础知识\n一、会计要素\n会计要素包括资产、负债、所有者权益、收入、费用和利润。\n会计恒等式资产 = 负债 + 所有者权益\n二、常用会计科目\n科目类别\n科目名称\n说明\n资产类\n库存现金\n公司持有的现金\n资产类\n银行存款\n存放在银行的资金\n资产类\n应收账款\n因销售商品或提供劳务应收的款项\n资产类\n固定资产\n使用年限超过一年的有形资产\n负债类\n应付账款\n因购买商品或接受劳务应付的款项\n负债类\n应交税费\n应缴纳的各种税费\n负债类\n应付职工薪酬\n...",
"content_length": 1082,
"created_at": "2026-05-22T09:22:31.538281+00:00",
"updated_at": "2026-05-22T09:22:52.110824+00:00",
"file_path": "/app/server/storage/knowledge/财务知识库/远光软件财务基础知识手册.docx",
"track_id": "insert_20260522_092231_e1b9d415",
"metadata": {
"processing_start_time": 1779441751,
"processing_end_time": 1779441772
}
},
"23f56f159a3e4bc3b2338056544120dd": {
"status": "processed",
"chunks_count": 1,
"chunks_list": [
"chunk-2ee7e2a66cb544bdfe1b09e133863ad1"
],
"content_summary": "远光软件股份有限公司\n财务术语解释手册\n权责发生制\n以权利和责任的发生来决定收入和费用归属期的会计基础。即凡是当期已经实现的收入和已经发生或应当负担的费用不论款项是否收付都应当作为当期的收入和费用。\n收付实现制\n以现金收到或付出为标准来记录收入的实现和费用的发生。即凡是当期收到和支付的现金都作为当期的收入和费用。\n固定资产折旧\n固定资产在使用过程中因磨损而逐渐转移的价值。公司采用年限平均法计提折旧。\n摊销\n将无形资产或长期待摊费用按照规定期限分期计入当期损益的过程。\n增值税进项税额\n企业购...",
"content_length": 1040,
"created_at": "2026-05-22T09:22:44.268551+00:00",
"updated_at": "2026-05-22T09:23:11.334499+00:00",
"file_path": "/app/server/storage/knowledge/财务知识库/远光软件财务术语解释手册.docx",
"track_id": "insert_20260522_092244_2888d301",
"metadata": {
"processing_start_time": 1779441764,
"processing_end_time": 1779441791
}
},
"09fbcae74d3b41e498a47e05b45262cb": {
"status": "processed",
"chunks_count": 2,
"chunks_list": [
"chunk-2c8384b328272063de4dac306a52d21e",
"chunk-93d2389cdb74257e90201dccbc3f6539"
],
"content_summary": "远光软件股份有限公司高新技术企业税收优惠政策汇总\n\n 远光软件股份有限公司\n\n 2024年度\n\n一、企业所得税优惠\n\n1. 高新技术企业减按15%税率征收企业所得税\n\n- 条件:取得高新技术企业证书且在有效期内\n\n- 申请:向主管税务机关备案\n\n2. 技术转让所得优惠\n\n- 符合条件的技术转让所得500万元以下免征...",
"content_length": 1772,
"created_at": "2026-05-22T09:23:17.399741+00:00",
"updated_at": "2026-05-22T09:24:18.933073+00:00",
"file_path": "/app/server/storage/knowledge/财务知识库/远光软件高新技术企业税收优惠政策汇总.pdf",
"track_id": "insert_20260522_092317_ca603a9e",
"metadata": {
"processing_start_time": 1779441797,
"processing_end_time": 1779441858
}
}
}

View File

@@ -3,11 +3,15 @@
"chunk_ids": [
"chunk-aa5435156b829944c173fa1d2d7a93d4",
"chunk-18d968b78afe916b419c1b5973421ebe",
"chunk-dd87aa5bc62cc9587ecb4c26d35a5263"
"chunk-dd87aa5bc62cc9587ecb4c26d35a5263",
"chunk-31ff57cf79d009c378478f065eda9d4d",
"chunk-e726f44fb0287c5192cf61b350f18abb",
"chunk-78edb0c8ccc8238159196ecaeeb08d43",
"chunk-2ee7e2a66cb544bdfe1b09e133863ad1",
"chunk-2c8384b328272063de4dac306a52d21e"
],
"count": 3,
"create_time": 1779011991,
"update_time": 1779011991,
"count": 8,
"update_time": 1779441830,
"_id": "远光软件股份有限公司"
},
"第一章总则": {
@@ -3359,5 +3363,913 @@
"create_time": 1779379005,
"update_time": 1779379005,
"_id": "Warning Icon"
},
"Excel工作簿远光软件会计科目使用说明.xlsx": {
"chunk_ids": [
"chunk-31ff57cf79d009c378478f065eda9d4d"
],
"count": 1,
"create_time": 1779441741,
"update_time": 1779441741,
"_id": "Excel工作簿远光软件会计科目使用说明.xlsx"
},
"会计科目说明": {
"chunk_ids": [
"chunk-31ff57cf79d009c378478f065eda9d4d"
],
"count": 1,
"create_time": 1779441741,
"update_time": 1779441741,
"_id": "会计科目说明"
},
"科目编码": {
"chunk_ids": [
"chunk-31ff57cf79d009c378478f065eda9d4d",
"chunk-e726f44fb0287c5192cf61b350f18abb"
],
"count": 2,
"create_time": 1779441741,
"update_time": 1779441741,
"_id": "科目编码"
},
"科目名称": {
"chunk_ids": [
"chunk-31ff57cf79d009c378478f065eda9d4d",
"chunk-e726f44fb0287c5192cf61b350f18abb"
],
"count": 2,
"create_time": 1779441741,
"update_time": 1779441741,
"_id": "科目名称"
},
"科目类别": {
"chunk_ids": [
"chunk-31ff57cf79d009c378478f065eda9d4d",
"chunk-e726f44fb0287c5192cf61b350f18abb"
],
"count": 2,
"create_time": 1779441741,
"update_time": 1779441741,
"_id": "科目类别"
},
"使用说明": {
"chunk_ids": [
"chunk-31ff57cf79d009c378478f065eda9d4d",
"chunk-e726f44fb0287c5192cf61b350f18abb"
],
"count": 2,
"create_time": 1779441741,
"update_time": 1779441741,
"_id": "使用说明"
},
"库存现金": {
"chunk_ids": [
"chunk-31ff57cf79d009c378478f065eda9d4d",
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
],
"count": 2,
"update_time": 1779441756,
"_id": "库存现金"
},
"银行存款": {
"chunk_ids": [
"chunk-31ff57cf79d009c378478f065eda9d4d",
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
],
"count": 2,
"update_time": 1779441756,
"_id": "银行存款"
},
"应收账款": {
"chunk_ids": [
"chunk-31ff57cf79d009c378478f065eda9d4d",
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
],
"count": 2,
"update_time": 1779441756,
"_id": "应收账款"
},
"其他应收款": {
"chunk_ids": [
"chunk-31ff57cf79d009c378478f065eda9d4d"
],
"count": 1,
"create_time": 1779441741,
"update_time": 1779441741,
"_id": "其他应收款"
},
"原材料": {
"chunk_ids": [
"chunk-31ff57cf79d009c378478f065eda9d4d"
],
"count": 1,
"create_time": 1779441741,
"update_time": 1779441741,
"_id": "原材料"
},
"固定资产": {
"chunk_ids": [
"chunk-31ff57cf79d009c378478f065eda9d4d",
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
],
"count": 2,
"update_time": 1779441756,
"_id": "固定资产"
},
"累计折旧": {
"chunk_ids": [
"chunk-31ff57cf79d009c378478f065eda9d4d"
],
"count": 1,
"create_time": 1779441741,
"update_time": 1779441741,
"_id": "累计折旧"
},
"应付账款": {
"chunk_ids": [
"chunk-31ff57cf79d009c378478f065eda9d4d",
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
],
"count": 2,
"update_time": 1779441756,
"_id": "应付账款"
},
"应交税费": {
"chunk_ids": [
"chunk-31ff57cf79d009c378478f065eda9d4d",
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
],
"count": 2,
"update_time": 1779441756,
"_id": "应交税费"
},
"应付职工薪酬": {
"chunk_ids": [
"chunk-31ff57cf79d009c378478f065eda9d4d",
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
],
"count": 2,
"update_time": 1779441756,
"_id": "应付职工薪酬"
},
"主营业务收入": {
"chunk_ids": [
"chunk-31ff57cf79d009c378478f065eda9d4d",
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
],
"count": 2,
"update_time": 1779441756,
"_id": "主营业务收入"
},
"主营业务成本": {
"chunk_ids": [
"chunk-31ff57cf79d009c378478f065eda9d4d"
],
"count": 1,
"create_time": 1779441741,
"update_time": 1779441741,
"_id": "主营业务成本"
},
"管理费用": {
"chunk_ids": [
"chunk-31ff57cf79d009c378478f065eda9d4d",
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
],
"count": 2,
"update_time": 1779441756,
"_id": "管理费用"
},
"销售费用": {
"chunk_ids": [
"chunk-31ff57cf79d009c378478f065eda9d4d",
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
],
"count": 2,
"update_time": 1779441756,
"_id": "销售费用"
},
"财务费用": {
"chunk_ids": [
"chunk-31ff57cf79d009c378478f065eda9d4d"
],
"count": 1,
"create_time": 1779441741,
"update_time": 1779441741,
"_id": "财务费用"
},
"所得税费用": {
"chunk_ids": [
"chunk-31ff57cf79d009c378478f065eda9d4d"
],
"count": 1,
"create_time": 1779441741,
"update_time": 1779441741,
"_id": "所得税费用"
},
"备注": {
"chunk_ids": [
"chunk-31ff57cf79d009c378478f065eda9d4d",
"chunk-e726f44fb0287c5192cf61b350f18abb"
],
"count": 2,
"create_time": 1779441741,
"update_time": 1779441741,
"_id": "备注"
},
"资产类": {
"chunk_ids": [
"chunk-31ff57cf79d009c378478f065eda9d4d",
"chunk-e726f44fb0287c5192cf61b350f18abb"
],
"count": 2,
"create_time": 1779441742,
"update_time": 1779441742,
"_id": "资产类"
},
"负债类": {
"chunk_ids": [
"chunk-31ff57cf79d009c378478f065eda9d4d",
"chunk-e726f44fb0287c5192cf61b350f18abb"
],
"count": 2,
"create_time": 1779441742,
"update_time": 1779441742,
"_id": "负债类"
},
"损益类": {
"chunk_ids": [
"chunk-31ff57cf79d009c378478f065eda9d4d",
"chunk-e726f44fb0287c5192cf61b350f18abb"
],
"count": 2,
"create_time": 1779441742,
"update_time": 1779441742,
"_id": "损益类"
},
"远光软件会计科目使用说明.xlsx": {
"chunk_ids": [
"chunk-e726f44fb0287c5192cf61b350f18abb"
],
"count": 1,
"create_time": 1779441742,
"update_time": 1779441742,
"_id": "远光软件会计科目使用说明.xlsx"
},
"2221应交税费": {
"chunk_ids": [
"chunk-e726f44fb0287c5192cf61b350f18abb"
],
"count": 1,
"create_time": 1779441742,
"update_time": 1779441742,
"_id": "2221应交税费"
},
"2211应付职工薪酬": {
"chunk_ids": [
"chunk-e726f44fb0287c5192cf61b350f18abb"
],
"count": 1,
"create_time": 1779441742,
"update_time": 1779441742,
"_id": "2211应付职工薪酬"
},
"6001主营业务收入": {
"chunk_ids": [
"chunk-e726f44fb0287c5192cf61b350f18abb"
],
"count": 1,
"create_time": 1779441742,
"update_time": 1779441742,
"_id": "6001主营业务收入"
},
"6401主营业务成本": {
"chunk_ids": [
"chunk-e726f44fb0287c5192cf61b350f18abb"
],
"count": 1,
"create_time": 1779441742,
"update_time": 1779441742,
"_id": "6401主营业务成本"
},
"6601管理费用": {
"chunk_ids": [
"chunk-e726f44fb0287c5192cf61b350f18abb"
],
"count": 1,
"create_time": 1779441742,
"update_time": 1779441742,
"_id": "6601管理费用"
},
"6602销售费用": {
"chunk_ids": [
"chunk-e726f44fb0287c5192cf61b350f18abb"
],
"count": 1,
"create_time": 1779441742,
"update_time": 1779441742,
"_id": "6602销售费用"
},
"6603财务费用": {
"chunk_ids": [
"chunk-e726f44fb0287c5192cf61b350f18abb"
],
"count": 1,
"create_time": 1779441742,
"update_time": 1779441742,
"_id": "6603财务费用"
},
"6801所得税费用": {
"chunk_ids": [
"chunk-e726f44fb0287c5192cf61b350f18abb"
],
"count": 1,
"create_time": 1779441742,
"update_time": 1779441742,
"_id": "6801所得税费用"
},
"1001库存现金": {
"chunk_ids": [
"chunk-e726f44fb0287c5192cf61b350f18abb"
],
"count": 1,
"create_time": 1779441742,
"update_time": 1779441742,
"_id": "1001库存现金"
},
"1002银行存款": {
"chunk_ids": [
"chunk-e726f44fb0287c5192cf61b350f18abb"
],
"count": 1,
"create_time": 1779441742,
"update_time": 1779441742,
"_id": "1002银行存款"
},
"1122应收账款": {
"chunk_ids": [
"chunk-e726f44fb0287c5192cf61b350f18abb"
],
"count": 1,
"create_time": 1779441742,
"update_time": 1779441742,
"_id": "1122应收账款"
},
"1221其他应收款": {
"chunk_ids": [
"chunk-e726f44fb0287c5192cf61b350f18abb"
],
"count": 1,
"create_time": 1779441742,
"update_time": 1779441742,
"_id": "1221其他应收款"
},
"财务基础知识手册": {
"chunk_ids": [
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
],
"count": 1,
"create_time": 1779441755,
"update_time": 1779441755,
"_id": "财务基础知识手册"
},
"会计要素": {
"chunk_ids": [
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
],
"count": 1,
"create_time": 1779441755,
"update_time": 1779441755,
"_id": "会计要素"
},
"资产": {
"chunk_ids": [
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
],
"count": 1,
"create_time": 1779441755,
"update_time": 1779441755,
"_id": "资产"
},
"负债": {
"chunk_ids": [
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
],
"count": 1,
"create_time": 1779441755,
"update_time": 1779441755,
"_id": "负债"
},
"所有者权益": {
"chunk_ids": [
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
],
"count": 1,
"create_time": 1779441755,
"update_time": 1779441755,
"_id": "所有者权益"
},
"收入": {
"chunk_ids": [
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
],
"count": 1,
"create_time": 1779441755,
"update_time": 1779441755,
"_id": "收入"
},
"费用": {
"chunk_ids": [
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
],
"count": 1,
"create_time": 1779441755,
"update_time": 1779441755,
"_id": "费用"
},
"利润": {
"chunk_ids": [
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
],
"count": 1,
"create_time": 1779441756,
"update_time": 1779441756,
"_id": "利润"
},
"会计恒等式": {
"chunk_ids": [
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
],
"count": 1,
"create_time": 1779441756,
"update_time": 1779441756,
"_id": "会计恒等式"
},
"增值税": {
"chunk_ids": [
"chunk-78edb0c8ccc8238159196ecaeeb08d43",
"chunk-2c8384b328272063de4dac306a52d21e"
],
"count": 2,
"update_time": 1779441830,
"_id": "增值税"
},
"企业所得税": {
"chunk_ids": [
"chunk-78edb0c8ccc8238159196ecaeeb08d43",
"chunk-2c8384b328272063de4dac306a52d21e"
],
"count": 2,
"update_time": 1779441830,
"_id": "企业所得税"
},
"个人所得税": {
"chunk_ids": [
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
],
"count": 1,
"create_time": 1779441756,
"update_time": 1779441756,
"_id": "个人所得税"
},
"印花税": {
"chunk_ids": [
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
],
"count": 1,
"create_time": 1779441756,
"update_time": 1779441756,
"_id": "印花税"
},
"三大财务报表": {
"chunk_ids": [
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
],
"count": 1,
"create_time": 1779441756,
"update_time": 1779441756,
"_id": "三大财务报表"
},
"资产负债表": {
"chunk_ids": [
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
],
"count": 1,
"create_time": 1779441756,
"update_time": 1779441756,
"_id": "资产负债表"
},
"利润表": {
"chunk_ids": [
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
],
"count": 1,
"create_time": 1779441756,
"update_time": 1779441756,
"_id": "利润表"
},
"现金流量表": {
"chunk_ids": [
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
],
"count": 1,
"create_time": 1779441756,
"update_time": 1779441756,
"_id": "现金流量表"
},
"会计基础知识": {
"chunk_ids": [
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
],
"count": 1,
"create_time": 1779441756,
"update_time": 1779441756,
"_id": "会计基础知识"
},
"税务基础知识": {
"chunk_ids": [
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
],
"count": 1,
"create_time": 1779441757,
"update_time": 1779441757,
"_id": "税务基础知识"
},
"财务报表解读": {
"chunk_ids": [
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
],
"count": 1,
"create_time": 1779441757,
"update_time": 1779441757,
"_id": "财务报表解读"
},
"财务术语解释手册": {
"chunk_ids": [
"chunk-2ee7e2a66cb544bdfe1b09e133863ad1"
],
"count": 1,
"create_time": 1779441774,
"update_time": 1779441774,
"_id": "财务术语解释手册"
},
"权责发生制": {
"chunk_ids": [
"chunk-2ee7e2a66cb544bdfe1b09e133863ad1"
],
"count": 1,
"create_time": 1779441774,
"update_time": 1779441774,
"_id": "权责发生制"
},
"收付实现制": {
"chunk_ids": [
"chunk-2ee7e2a66cb544bdfe1b09e133863ad1"
],
"count": 1,
"create_time": 1779441774,
"update_time": 1779441774,
"_id": "收付实现制"
},
"固定资产折旧": {
"chunk_ids": [
"chunk-2ee7e2a66cb544bdfe1b09e133863ad1"
],
"count": 1,
"create_time": 1779441774,
"update_time": 1779441774,
"_id": "固定资产折旧"
},
"摊销": {
"chunk_ids": [
"chunk-2ee7e2a66cb544bdfe1b09e133863ad1"
],
"count": 1,
"create_time": 1779441774,
"update_time": 1779441774,
"_id": "摊销"
},
"增值税进项税额": {
"chunk_ids": [
"chunk-2ee7e2a66cb544bdfe1b09e133863ad1"
],
"count": 1,
"create_time": 1779441774,
"update_time": 1779441774,
"_id": "增值税进项税额"
},
"增值税销项税额": {
"chunk_ids": [
"chunk-2ee7e2a66cb544bdfe1b09e133863ad1"
],
"count": 1,
"create_time": 1779441774,
"update_time": 1779441774,
"_id": "增值税销项税额"
},
"预算": {
"chunk_ids": [
"chunk-2ee7e2a66cb544bdfe1b09e133863ad1"
],
"count": 1,
"create_time": 1779441774,
"update_time": 1779441774,
"_id": "预算"
},
"现金流": {
"chunk_ids": [
"chunk-2ee7e2a66cb544bdfe1b09e133863ad1"
],
"count": 1,
"create_time": 1779441774,
"update_time": 1779441774,
"_id": "现金流"
},
"毛利率": {
"chunk_ids": [
"chunk-2ee7e2a66cb544bdfe1b09e133863ad1"
],
"count": 1,
"create_time": 1779441774,
"update_time": 1779441774,
"_id": "毛利率"
},
"净资产收益率": {
"chunk_ids": [
"chunk-2ee7e2a66cb544bdfe1b09e133863ad1"
],
"count": 1,
"create_time": 1779441774,
"update_time": 1779441774,
"_id": "净资产收益率"
},
"成本中心": {
"chunk_ids": [
"chunk-2ee7e2a66cb544bdfe1b09e133863ad1"
],
"count": 1,
"create_time": 1779441774,
"update_time": 1779441774,
"_id": "成本中心"
},
"利润中心": {
"chunk_ids": [
"chunk-2ee7e2a66cb544bdfe1b09e133863ad1"
],
"count": 1,
"create_time": 1779441774,
"update_time": 1779441774,
"_id": "利润中心"
},
"年限平均法": {
"chunk_ids": [
"chunk-2ee7e2a66cb544bdfe1b09e133863ad1"
],
"count": 1,
"create_time": 1779441774,
"update_time": 1779441774,
"_id": "年限平均法"
},
"毛利润": {
"chunk_ids": [
"chunk-2ee7e2a66cb544bdfe1b09e133863ad1"
],
"count": 1,
"create_time": 1779441774,
"update_time": 1779441774,
"_id": "毛利润"
},
"营业收入": {
"chunk_ids": [
"chunk-2ee7e2a66cb544bdfe1b09e133863ad1"
],
"count": 1,
"create_time": 1779441775,
"update_time": 1779441775,
"_id": "营业收入"
},
"营业成本": {
"chunk_ids": [
"chunk-2ee7e2a66cb544bdfe1b09e133863ad1"
],
"count": 1,
"create_time": 1779441775,
"update_time": 1779441775,
"_id": "营业成本"
},
"净利润": {
"chunk_ids": [
"chunk-2ee7e2a66cb544bdfe1b09e133863ad1"
],
"count": 1,
"create_time": 1779441775,
"update_time": 1779441775,
"_id": "净利润"
},
"股东权益": {
"chunk_ids": [
"chunk-2ee7e2a66cb544bdfe1b09e133863ad1"
],
"count": 1,
"create_time": 1779441775,
"update_time": 1779441775,
"_id": "股东权益"
},
"Training Expenses": {
"chunk_ids": [
"chunk-93d2389cdb74257e90201dccbc3f6539"
],
"count": 1,
"create_time": 1779441843,
"update_time": 1779441843,
"_id": "Training Expenses"
},
"Corporate Income Tax": {
"chunk_ids": [
"chunk-93d2389cdb74257e90201dccbc3f6539"
],
"count": 1,
"create_time": 1779441843,
"update_time": 1779441843,
"_id": "Corporate Income Tax"
},
"Venture Capital Deduction": {
"chunk_ids": [
"chunk-93d2389cdb74257e90201dccbc3f6539"
],
"count": 1,
"create_time": 1779441843,
"update_time": 1779441843,
"_id": "Venture Capital Deduction"
},
"Small And Medium High-Tech Enterprises": {
"chunk_ids": [
"chunk-93d2389cdb74257e90201dccbc3f6539"
],
"count": 1,
"create_time": 1779441843,
"update_time": 1779441843,
"_id": "Small And Medium High-Tech Enterprises"
},
"Taxable Income": {
"chunk_ids": [
"chunk-93d2389cdb74257e90201dccbc3f6539"
],
"count": 1,
"create_time": 1779441843,
"update_time": 1779441843,
"_id": "Taxable Income"
},
"Preferential Tax Policies": {
"chunk_ids": [
"chunk-93d2389cdb74257e90201dccbc3f6539"
],
"count": 1,
"create_time": 1779441843,
"update_time": 1779441843,
"_id": "Preferential Tax Policies"
},
"Investment Amount": {
"chunk_ids": [
"chunk-93d2389cdb74257e90201dccbc3f6539"
],
"count": 1,
"create_time": 1779441843,
"update_time": 1779441843,
"_id": "Investment Amount"
},
"70% Deduction Rate": {
"chunk_ids": [
"chunk-93d2389cdb74257e90201dccbc3f6539"
],
"count": 1,
"create_time": 1779441843,
"update_time": 1779441843,
"_id": "70% Deduction Rate"
},
"Other Preferential Policies": {
"chunk_ids": [
"chunk-93d2389cdb74257e90201dccbc3f6539"
],
"count": 1,
"create_time": 1779441830,
"update_time": 1779441830,
"_id": "Other Preferential Policies"
},
"主管税务机关": {
"chunk_ids": [
"chunk-2c8384b328272063de4dac306a52d21e"
],
"count": 1,
"create_time": 1779441830,
"update_time": 1779441830,
"_id": "主管税务机关"
},
"高新技术企业证书": {
"chunk_ids": [
"chunk-2c8384b328272063de4dac306a52d21e"
],
"count": 1,
"create_time": 1779441830,
"update_time": 1779441830,
"_id": "高新技术企业证书"
},
"高新技术企业减按15%税率征收企业所得税": {
"chunk_ids": [
"chunk-2c8384b328272063de4dac306a52d21e"
],
"count": 1,
"create_time": 1779441830,
"update_time": 1779441830,
"_id": "高新技术企业减按15%税率征收企业所得税"
},
"技术转让所得优惠": {
"chunk_ids": [
"chunk-2c8384b328272063de4dac306a52d21e"
],
"count": 1,
"create_time": 1779441830,
"update_time": 1779441830,
"_id": "技术转让所得优惠"
},
"软件产品增值税即征即退": {
"chunk_ids": [
"chunk-2c8384b328272063de4dac306a52d21e"
],
"count": 1,
"create_time": 1779441830,
"update_time": 1779441830,
"_id": "软件产品增值税即征即退"
},
"技术服务免征增值税": {
"chunk_ids": [
"chunk-2c8384b328272063de4dac306a52d21e"
],
"count": 1,
"create_time": 1779441830,
"update_time": 1779441830,
"_id": "技术服务免征增值税"
},
"研发费用加计扣除": {
"chunk_ids": [
"chunk-2c8384b328272063de4dac306a52d21e"
],
"count": 1,
"create_time": 1779441830,
"update_time": 1779441830,
"_id": "研发费用加计扣除"
},
"固定资产加速折旧": {
"chunk_ids": [
"chunk-2c8384b328272063de4dac306a52d21e"
],
"count": 1,
"create_time": 1779441830,
"update_time": 1779441830,
"_id": "固定资产加速折旧"
},
"软件企业职工培训费用": {
"chunk_ids": [
"chunk-2c8384b328272063de4dac306a52d21e"
],
"count": 1,
"create_time": 1779441830,
"update_time": 1779441830,
"_id": "软件企业职工培训费用"
},
"创业投资抵扣": {
"chunk_ids": [
"chunk-2c8384b328272063de4dac306a52d21e"
],
"count": 1,
"create_time": 1779441830,
"update_time": 1779441830,
"_id": "创业投资抵扣"
},
"中小高新技术企业": {
"chunk_ids": [
"chunk-2c8384b328272063de4dac306a52d21e"
],
"count": 1,
"create_time": 1779441830,
"update_time": 1779441830,
"_id": "中小高新技术企业"
},
"13%税率": {
"chunk_ids": [
"chunk-2c8384b328272063de4dac306a52d21e"
],
"count": 1,
"create_time": 1779441831,
"update_time": 1779441831,
"_id": "13%税率"
},
"3%实际税负": {
"chunk_ids": [
"chunk-2c8384b328272063de4dac306a52d21e"
],
"count": 1,
"create_time": 1779441831,
"update_time": 1779441831,
"_id": "3%实际税负"
}
}

File diff suppressed because one or more lines are too long

View File

@@ -383,5 +383,153 @@
"create_time": 1779379018,
"update_time": 1779379018,
"_id": "a8f8465df08e455ebe133351721d49f8"
},
"c7601043d9944ef2bcf4d3f67ed253f7": {
"entity_names": [
"损益类",
"6603财务费用",
"固定资产",
"银行存款",
"6601管理费用",
"资产类",
"1122应收账款",
"会计科目说明",
"使用说明",
"科目名称",
"财务费用",
"累计折旧",
"库存现金",
"6602销售费用",
"远光软件会计科目使用说明.xlsx",
"主营业务成本",
"1001库存现金",
"应付账款",
"1221其他应收款",
"6001主营业务收入",
"6801所得税费用",
"备注",
"科目类别",
"所得税费用",
"Excel工作簿远光软件会计科目使用说明.xlsx",
"负债类",
"2221应交税费",
"6401主营业务成本",
"应收账款",
"科目编码",
"应交税费",
"其他应收款",
"主营业务收入",
"原材料",
"管理费用",
"销售费用",
"应付职工薪酬",
"2211应付职工薪酬",
"1002银行存款",
"远光软件股份有限公司"
],
"count": 40,
"create_time": 1779441745,
"update_time": 1779441745,
"_id": "c7601043d9944ef2bcf4d3f67ed253f7"
},
"b0277cd76034437997fbf5219662725a": {
"entity_names": [
"固定资产",
"财务报表解读",
"银行存款",
"收入",
"负债",
"现金流量表",
"企业所得税",
"三大财务报表",
"会计恒等式",
"库存现金",
"所有者权益",
"费用",
"财务基础知识手册",
"应付账款",
"利润表",
"会计基础知识",
"应收账款",
"应交税费",
"主营业务收入",
"资产",
"管理费用",
"税务基础知识",
"应付职工薪酬",
"销售费用",
"印花税",
"资产负债表",
"个人所得税",
"会计要素",
"远光软件股份有限公司",
"利润",
"增值税"
],
"count": 31,
"create_time": 1779441772,
"update_time": 1779441772,
"_id": "b0277cd76034437997fbf5219662725a"
},
"23f56f159a3e4bc3b2338056544120dd": {
"entity_names": [
"净利润",
"财务术语解释手册",
"年限平均法",
"毛利润",
"预算",
"权责发生制",
"成本中心",
"摊销",
"营业收入",
"增值税进项税额",
"收付实现制",
"营业成本",
"增值税销项税额",
"净资产收益率",
"利润中心",
"固定资产折旧",
"股东权益",
"现金流",
"毛利率",
"远光软件股份有限公司"
],
"count": 20,
"create_time": 1779441791,
"update_time": 1779441791,
"_id": "23f56f159a3e4bc3b2338056544120dd"
},
"09fbcae74d3b41e498a47e05b45262cb": {
"entity_names": [
"Other Preferential Policies",
"3%实际税负",
"Preferential Tax Policies",
"研发费用加计扣除",
"Corporate Income Tax",
"中小高新技术企业",
"企业所得税",
"Taxable Income",
"主管税务机关",
"技术转让所得优惠",
"固定资产加速折旧",
"高新技术企业减按15%税率征收企业所得税",
"技术服务免征增值税",
"软件产品增值税即征即退",
"13%税率",
"软件企业职工培训费用",
"创业投资抵扣",
"Small And Medium High-Tech Enterprises",
"Training Expenses",
"高新技术企业证书",
"Venture Capital Deduction",
"70% Deduction Rate",
"Investment Amount",
"远光软件股份有限公司",
"增值税"
],
"count": 25,
"create_time": 1779441858,
"update_time": 1779441858,
"_id": "09fbcae74d3b41e498a47e05b45262cb"
}
}

View File

@@ -274,5 +274,205 @@
"create_time": 1779379018,
"update_time": 1779379018,
"_id": "a8f8465df08e455ebe133351721d49f8"
},
"c7601043d9944ef2bcf4d3f67ed253f7": {
"relation_pairs": [
[
"2221应交税费",
"远光软件股份有限公司"
],
[
"会计科目说明",
"备注"
],
[
"会计科目说明",
"科目类别"
],
[
"会计科目说明",
"科目名称"
],
[
"Excel工作簿远光软件会计科目使用说明.xlsx",
"会计科目说明"
],
[
"库存现金",
"资产类"
],
[
"6001主营业务收入",
"远光软件股份有限公司"
],
[
"1002银行存款",
"远光软件股份有限公司"
],
[
"1221其他应收款",
"远光软件股份有限公司"
],
[
"资产类",
"银行存款"
],
[
"6401主营业务成本",
"远光软件股份有限公司"
],
[
"Excel工作簿远光软件会计科目使用说明.xlsx",
"远光软件股份有限公司"
],
[
"会计科目说明",
"科目编码"
],
[
"1001库存现金",
"远光软件股份有限公司"
],
[
"会计科目说明",
"使用说明"
],
[
"远光软件会计科目使用说明.xlsx",
"远光软件股份有限公司"
],
[
"2211应付职工薪酬",
"远光软件股份有限公司"
],
[
"1122应收账款",
"远光软件股份有限公司"
]
],
"count": 18,
"create_time": 1779441745,
"update_time": 1779441745,
"_id": "c7601043d9944ef2bcf4d3f67ed253f7"
},
"b0277cd76034437997fbf5219662725a": {
"relation_pairs": [
[
"会计要素",
"资产"
],
[
"财务基础知识手册",
"远光软件股份有限公司"
],
[
"财务基础知识手册",
"财务报表解读"
],
[
"税务基础知识",
"财务基础知识手册"
],
[
"会计基础知识",
"财务基础知识手册"
]
],
"count": 5,
"create_time": 1779441772,
"update_time": 1779441772,
"_id": "b0277cd76034437997fbf5219662725a"
},
"23f56f159a3e4bc3b2338056544120dd": {
"relation_pairs": [
[
"毛利率",
"营业成本"
],
[
"摊销",
"财务术语解释手册"
],
[
"年限平均法",
"远光软件股份有限公司"
],
[
"增值税进项税额",
"财务术语解释手册"
],
[
"固定资产折旧",
"财务术语解释手册"
],
[
"净利润",
"净资产收益率"
],
[
"收付实现制",
"财务术语解释手册"
],
[
"权责发生制",
"财务术语解释手册"
],
[
"毛利润",
"毛利率"
],
[
"毛利率",
"营业收入"
],
[
"财务术语解释手册",
"远光软件股份有限公司"
],
[
"净资产收益率",
"股东权益"
],
[
"固定资产折旧",
"年限平均法"
]
],
"count": 13,
"create_time": 1779441791,
"update_time": 1779441791,
"_id": "23f56f159a3e4bc3b2338056544120dd"
},
"09fbcae74d3b41e498a47e05b45262cb": {
"relation_pairs": [
[
"Corporate Income Tax",
"Training Expenses"
],
[
"Small And Medium High-Tech Enterprises",
"Venture Capital Deduction"
],
[
"Taxable Income",
"Venture Capital Deduction"
],
[
"Preferential Tax Policies",
"Venture Capital Deduction"
],
[
"70% Deduction Rate",
"Venture Capital Deduction"
],
[
"3%实际税负",
"软件产品增值税即征即退"
]
],
"count": 6,
"create_time": 1779441858,
"update_time": 1779441858,
"_id": "09fbcae74d3b41e498a47e05b45262cb"
}
}

View File

@@ -583,5 +583,383 @@
"create_time": 1779379017,
"update_time": 1779379017,
"_id": "Receipt-Free Reimbursement<SEP>Submit Reimbursement"
},
"Excel工作簿远光软件会计科目使用说明.xlsx<SEP>远光软件股份有限公司": {
"chunk_ids": [
"chunk-31ff57cf79d009c378478f065eda9d4d"
],
"count": 1,
"create_time": 1779441742,
"update_time": 1779441742,
"_id": "Excel工作簿远光软件会计科目使用说明.xlsx<SEP>远光软件股份有限公司"
},
"会计科目说明<SEP>科目编码": {
"chunk_ids": [
"chunk-31ff57cf79d009c378478f065eda9d4d"
],
"count": 1,
"create_time": 1779441742,
"update_time": 1779441742,
"_id": "会计科目说明<SEP>科目编码"
},
"库存现金<SEP>资产类": {
"chunk_ids": [
"chunk-31ff57cf79d009c378478f065eda9d4d"
],
"count": 1,
"create_time": 1779441742,
"update_time": 1779441742,
"_id": "库存现金<SEP>资产类"
},
"会计科目说明<SEP>科目名称": {
"chunk_ids": [
"chunk-31ff57cf79d009c378478f065eda9d4d"
],
"count": 1,
"create_time": 1779441742,
"update_time": 1779441742,
"_id": "会计科目说明<SEP>科目名称"
},
"资产类<SEP>银行存款": {
"chunk_ids": [
"chunk-31ff57cf79d009c378478f065eda9d4d"
],
"count": 1,
"create_time": 1779441742,
"update_time": 1779441742,
"_id": "资产类<SEP>银行存款"
},
"远光软件会计科目使用说明.xlsx<SEP>远光软件股份有限公司": {
"chunk_ids": [
"chunk-e726f44fb0287c5192cf61b350f18abb"
],
"count": 1,
"create_time": 1779441742,
"update_time": 1779441742,
"_id": "远光软件会计科目使用说明.xlsx<SEP>远光软件股份有限公司"
},
"2221应交税费<SEP>远光软件股份有限公司": {
"chunk_ids": [
"chunk-e726f44fb0287c5192cf61b350f18abb"
],
"count": 1,
"create_time": 1779441743,
"update_time": 1779441743,
"_id": "2221应交税费<SEP>远光软件股份有限公司"
},
"会计科目说明<SEP>科目类别": {
"chunk_ids": [
"chunk-31ff57cf79d009c378478f065eda9d4d"
],
"count": 1,
"create_time": 1779441743,
"update_time": 1779441743,
"_id": "会计科目说明<SEP>科目类别"
},
"会计科目说明<SEP>使用说明": {
"chunk_ids": [
"chunk-31ff57cf79d009c378478f065eda9d4d"
],
"count": 1,
"create_time": 1779441743,
"update_time": 1779441743,
"_id": "会计科目说明<SEP>使用说明"
},
"2211应付职工薪酬<SEP>远光软件股份有限公司": {
"chunk_ids": [
"chunk-e726f44fb0287c5192cf61b350f18abb"
],
"count": 1,
"create_time": 1779441743,
"update_time": 1779441743,
"_id": "2211应付职工薪酬<SEP>远光软件股份有限公司"
},
"会计科目说明<SEP>备注": {
"chunk_ids": [
"chunk-31ff57cf79d009c378478f065eda9d4d"
],
"count": 1,
"create_time": 1779441743,
"update_time": 1779441743,
"_id": "会计科目说明<SEP>备注"
},
"6001主营业务收入<SEP>远光软件股份有限公司": {
"chunk_ids": [
"chunk-e726f44fb0287c5192cf61b350f18abb"
],
"count": 1,
"create_time": 1779441743,
"update_time": 1779441743,
"_id": "6001主营业务收入<SEP>远光软件股份有限公司"
},
"Excel工作簿远光软件会计科目使用说明.xlsx<SEP>会计科目说明": {
"chunk_ids": [
"chunk-31ff57cf79d009c378478f065eda9d4d"
],
"count": 1,
"create_time": 1779441744,
"update_time": 1779441744,
"_id": "Excel工作簿远光软件会计科目使用说明.xlsx<SEP>会计科目说明"
},
"6401主营业务成本<SEP>远光软件股份有限公司": {
"chunk_ids": [
"chunk-e726f44fb0287c5192cf61b350f18abb"
],
"count": 1,
"create_time": 1779441744,
"update_time": 1779441744,
"_id": "6401主营业务成本<SEP>远光软件股份有限公司"
},
"1001库存现金<SEP>远光软件股份有限公司": {
"chunk_ids": [
"chunk-e726f44fb0287c5192cf61b350f18abb"
],
"count": 1,
"create_time": 1779441744,
"update_time": 1779441744,
"_id": "1001库存现金<SEP>远光软件股份有限公司"
},
"1002银行存款<SEP>远光软件股份有限公司": {
"chunk_ids": [
"chunk-e726f44fb0287c5192cf61b350f18abb"
],
"count": 1,
"create_time": 1779441744,
"update_time": 1779441744,
"_id": "1002银行存款<SEP>远光软件股份有限公司"
},
"1122应收账款<SEP>远光软件股份有限公司": {
"chunk_ids": [
"chunk-e726f44fb0287c5192cf61b350f18abb"
],
"count": 1,
"create_time": 1779441745,
"update_time": 1779441745,
"_id": "1122应收账款<SEP>远光软件股份有限公司"
},
"1221其他应收款<SEP>远光软件股份有限公司": {
"chunk_ids": [
"chunk-e726f44fb0287c5192cf61b350f18abb"
],
"count": 1,
"create_time": 1779441745,
"update_time": 1779441745,
"_id": "1221其他应收款<SEP>远光软件股份有限公司"
},
"财务基础知识手册<SEP>远光软件股份有限公司": {
"chunk_ids": [
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
],
"count": 1,
"create_time": 1779441770,
"update_time": 1779441770,
"_id": "财务基础知识手册<SEP>远光软件股份有限公司"
},
"会计要素<SEP>资产": {
"chunk_ids": [
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
],
"count": 1,
"create_time": 1779441770,
"update_time": 1779441770,
"_id": "会计要素<SEP>资产"
},
"会计基础知识<SEP>财务基础知识手册": {
"chunk_ids": [
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
],
"count": 1,
"create_time": 1779441771,
"update_time": 1779441771,
"_id": "会计基础知识<SEP>财务基础知识手册"
},
"税务基础知识<SEP>财务基础知识手册": {
"chunk_ids": [
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
],
"count": 1,
"create_time": 1779441771,
"update_time": 1779441771,
"_id": "税务基础知识<SEP>财务基础知识手册"
},
"财务基础知识手册<SEP>财务报表解读": {
"chunk_ids": [
"chunk-78edb0c8ccc8238159196ecaeeb08d43"
],
"count": 1,
"create_time": 1779441771,
"update_time": 1779441771,
"_id": "财务基础知识手册<SEP>财务报表解读"
},
"财务术语解释手册<SEP>远光软件股份有限公司": {
"chunk_ids": [
"chunk-2ee7e2a66cb544bdfe1b09e133863ad1"
],
"count": 1,
"create_time": 1779441775,
"update_time": 1779441775,
"_id": "财务术语解释手册<SEP>远光软件股份有限公司"
},
"年限平均法<SEP>远光软件股份有限公司": {
"chunk_ids": [
"chunk-2ee7e2a66cb544bdfe1b09e133863ad1"
],
"count": 1,
"create_time": 1779441776,
"update_time": 1779441776,
"_id": "年限平均法<SEP>远光软件股份有限公司"
},
"权责发生制<SEP>财务术语解释手册": {
"chunk_ids": [
"chunk-2ee7e2a66cb544bdfe1b09e133863ad1"
],
"count": 1,
"create_time": 1779441776,
"update_time": 1779441776,
"_id": "权责发生制<SEP>财务术语解释手册"
},
"毛利润<SEP>毛利率": {
"chunk_ids": [
"chunk-2ee7e2a66cb544bdfe1b09e133863ad1"
],
"count": 1,
"create_time": 1779441776,
"update_time": 1779441776,
"_id": "毛利润<SEP>毛利率"
},
"收付实现制<SEP>财务术语解释手册": {
"chunk_ids": [
"chunk-2ee7e2a66cb544bdfe1b09e133863ad1"
],
"count": 1,
"create_time": 1779441776,
"update_time": 1779441776,
"_id": "收付实现制<SEP>财务术语解释手册"
},
"毛利率<SEP>营业收入": {
"chunk_ids": [
"chunk-2ee7e2a66cb544bdfe1b09e133863ad1"
],
"count": 1,
"create_time": 1779441776,
"update_time": 1779441776,
"_id": "毛利率<SEP>营业收入"
},
"净利润<SEP>净资产收益率": {
"chunk_ids": [
"chunk-2ee7e2a66cb544bdfe1b09e133863ad1"
],
"count": 1,
"create_time": 1779441776,
"update_time": 1779441776,
"_id": "净利润<SEP>净资产收益率"
},
"固定资产折旧<SEP>财务术语解释手册": {
"chunk_ids": [
"chunk-2ee7e2a66cb544bdfe1b09e133863ad1"
],
"count": 1,
"create_time": 1779441776,
"update_time": 1779441776,
"_id": "固定资产折旧<SEP>财务术语解释手册"
},
"毛利率<SEP>营业成本": {
"chunk_ids": [
"chunk-2ee7e2a66cb544bdfe1b09e133863ad1"
],
"count": 1,
"create_time": 1779441776,
"update_time": 1779441776,
"_id": "毛利率<SEP>营业成本"
},
"净资产收益率<SEP>股东权益": {
"chunk_ids": [
"chunk-2ee7e2a66cb544bdfe1b09e133863ad1"
],
"count": 1,
"create_time": 1779441776,
"update_time": 1779441776,
"_id": "净资产收益率<SEP>股东权益"
},
"摊销<SEP>财务术语解释手册": {
"chunk_ids": [
"chunk-2ee7e2a66cb544bdfe1b09e133863ad1"
],
"count": 1,
"create_time": 1779441790,
"update_time": 1779441790,
"_id": "摊销<SEP>财务术语解释手册"
},
"固定资产折旧<SEP>年限平均法": {
"chunk_ids": [
"chunk-2ee7e2a66cb544bdfe1b09e133863ad1"
],
"count": 1,
"create_time": 1779441790,
"update_time": 1779441790,
"_id": "固定资产折旧<SEP>年限平均法"
},
"增值税进项税额<SEP>财务术语解释手册": {
"chunk_ids": [
"chunk-2ee7e2a66cb544bdfe1b09e133863ad1"
],
"count": 1,
"create_time": 1779441791,
"update_time": 1779441791,
"_id": "增值税进项税额<SEP>财务术语解释手册"
},
"Corporate Income Tax<SEP>Training Expenses": {
"chunk_ids": [
"chunk-93d2389cdb74257e90201dccbc3f6539"
],
"count": 1,
"create_time": 1779441857,
"update_time": 1779441857,
"_id": "Corporate Income Tax<SEP>Training Expenses"
},
"Taxable Income<SEP>Venture Capital Deduction": {
"chunk_ids": [
"chunk-93d2389cdb74257e90201dccbc3f6539"
],
"count": 1,
"create_time": 1779441857,
"update_time": 1779441857,
"_id": "Taxable Income<SEP>Venture Capital Deduction"
},
"3%实际税负<SEP>软件产品增值税即征即退": {
"chunk_ids": [
"chunk-2c8384b328272063de4dac306a52d21e"
],
"count": 1,
"create_time": 1779441857,
"update_time": 1779441857,
"_id": "3%实际税负<SEP>软件产品增值税即征即退"
},
"Small And Medium High-Tech Enterprises<SEP>Venture Capital Deduction": {
"chunk_ids": [
"chunk-93d2389cdb74257e90201dccbc3f6539"
],
"count": 1,
"create_time": 1779441858,
"update_time": 1779441858,
"_id": "Small And Medium High-Tech Enterprises<SEP>Venture Capital Deduction"
},
"Preferential Tax Policies<SEP>Venture Capital Deduction": {
"chunk_ids": [
"chunk-93d2389cdb74257e90201dccbc3f6539"
],
"count": 1,
"create_time": 1779441858,
"update_time": 1779441858,
"_id": "Preferential Tax Policies<SEP>Venture Capital Deduction"
},
"70% Deduction Rate<SEP>Venture Capital Deduction": {
"chunk_ids": [
"chunk-93d2389cdb74257e90201dccbc3f6539"
],
"count": 1,
"create_time": 1779441858,
"update_time": 1779441858,
"_id": "70% Deduction Rate<SEP>Venture Capital Deduction"
}
}

View File

@@ -174,5 +174,71 @@
"create_time": 1779378923,
"update_time": 1779378923,
"_id": "chunk-2224d777c0b72d0b2dab622c79096c2c"
},
"chunk-31ff57cf79d009c378478f065eda9d4d": {
"tokens": 1200,
"content": "# Excel 工作簿:远光软件会计科目使用说明.xlsx\n\n## 工作表 1会计科目说明\n\n| 远光软件股份有限公司常用会计科目使用说明 | 列2 | 列3 | 列4 | 列5 |\n| --- | --- | --- | --- | --- |\n| 科目编码 | 科目名称 | 科目类别 | 使用说明 | 备注 |\n| 1001 | 库存现金 | 资产类 | 核算公司库存现金 | 日清月结 |\n| 1002 | 银行存款 | 资产类 | 核算存入银行的各项存款 | 按开户行明细 |\n| 1122 | 应收账款 | 资产类 | 核算因销售商品/提供劳务应收款项 | 按客户明细 |\n| 1221 | 其他应收款 | 资产类 | 核算应收暂付款项 | 含备用金、押金 |\n| 1403 | 原材料 | 资产类 | 核算库存的各种材料 | |\n| 1601 | 固定资产 | 资产类 | 核算固定资产原值 | 按类别明细 |\n| 1602 | 累计折旧 | 资产类 | 核算固定资产累计折旧 | 贷方余额 |\n| 2202 | 应付账款 | 负债类 | 核算因购买商品/接受劳务应付款项 | 按供应商明细 |\n| 2221 | 应交税费 | 负债类 | 核算应缴纳的各种税费 | 按税种明细 |\n| 2211 | 应付职工薪酬 | 负债类 | 核算应付给职工的薪酬 | 含社保公积金 |\n| 6001 | 主营业务收入 | 损益类 | 核算主要经营业务产生的收入 | 按业务类型明细 |\n| 6401 | 主营业务成本 | 损益类 | 核算主要经营业务发生的成本 | |\n| 6601 | 管理费用 | 损益类 | 核算为管理生产经营发生的费用 | 按费用类型明细 |\n| 6602 | 销售费用 | 损益类 | 核算为销售产品发生的费用 | 按费用类型明细 |\n| 6603 | 财务费用 | 损益类 | 核算筹资等财务活动费用 | 含利息、手续费 |\n| 6801 | 所得税费用 | 损益类 | 核算企业所得税费用 | 含递延所得税 |\n\n### 行级检索线索\n\n- 会计科目说明 第 2 行:远光软件股份有限公司常用会计科目使用说明=科目编码列2=科目名称列3=科目类别列4=使用说明列5=备注\n\n- 会计科目说明 第 3 行:远光软件股份有限公司常用会计科目使用说明=1001列2=库存现金列3=资产类列4=核算公司库存现金列5=日清月结\n\n- 会计科目说明 第 4 行:远光软件股份有限公司常用会计科目使用说明=1002列2=银行存款列3=资产类列4=核算存入银行的各项存款列5=按开户行明细\n\n- 会计科目说明 第 5 行:远光软件股份有限公司常用会计科目使用说明=1122列2=应收账款列3=资产类列4=核算因销售商品/提供劳务应收款项列5=按客户明细\n\n- 会计科目说明 第 6 行:远光软件股份有限公司常用会计科目使用说明=1221列2=其他应收款列3=资产类列4=核算应收暂付款项列5=含备用金、押金\n\n- 会计科目说明 第 7 行:远光软件股份有限公司常用会计科目使用说明=1403列2=原材料列3=资产类列4=核算库存的各种材料\n\n- 会计科目说明 第 8 行:远光软件股份有限公司常用会计科目使用说明=1601列2=固定资产列3=资产类列4=核算固定资产原值列5=按类别明细\n\n- 会计科目说明 第 9 行:远光软件股份有限公司常用会计科目使用说明=1602列2=累计折旧列3=资产类列4=核算固定资产累计折旧列5=贷方余额\n\n- 会计科目说明 第 10 行:远光软件股份有限公司常用会计科目使用说明=2202列2=应付账款列3=负债类列4=核算因购买商品/接受劳务应付款项列5=按供应商明细\n\n- 会计科目说明 第 11 行:远光软件股份有限公司常用会计科目使用说明=2221列2=应交税费列3=负债类列4=核算应缴纳的各种税费列5=按税种明细",
"chunk_order_index": 0,
"full_doc_id": "c7601043d9944ef2bcf4d3f67ed253f7",
"file_path": "/app/server/storage/knowledge/财务知识库/远光软件会计科目使用说明.xlsx",
"llm_cache_list": [],
"create_time": 1779441661,
"update_time": 1779441661,
"_id": "chunk-31ff57cf79d009c378478f065eda9d4d"
},
"chunk-e726f44fb0287c5192cf61b350f18abb": {
"tokens": 952,
"content": "付账款列3=负债类列4=核算因购买商品/接受劳务应付款项列5=按供应商明细\n\n- 会计科目说明 第 11 行:远光软件股份有限公司常用会计科目使用说明=2221列2=应交税费列3=负债类列4=核算应缴纳的各种税费列5=按税种明细\n\n- 会计科目说明 第 12 行:远光软件股份有限公司常用会计科目使用说明=2211列2=应付职工薪酬列3=负债类列4=核算应付给职工的薪酬列5=含社保公积金\n\n- 会计科目说明 第 13 行:远光软件股份有限公司常用会计科目使用说明=6001列2=主营业务收入列3=损益类列4=核算主要经营业务产生的收入列5=按业务类型明细\n\n- 会计科目说明 第 14 行:远光软件股份有限公司常用会计科目使用说明=6401列2=主营业务成本列3=损益类列4=核算主要经营业务发生的成本\n\n- 会计科目说明 第 15 行:远光软件股份有限公司常用会计科目使用说明=6601列2=管理费用列3=损益类列4=核算为管理生产经营发生的费用列5=按费用类型明细\n\n- 会计科目说明 第 16 行:远光软件股份有限公司常用会计科目使用说明=6602列2=销售费用列3=损益类列4=核算为销售产品发生的费用列5=按费用类型明细\n\n- 会计科目说明 第 17 行:远光软件股份有限公司常用会计科目使用说明=6603列2=财务费用列3=损益类列4=核算筹资等财务活动费用列5=含利息、手续费\n\n- 会计科目说明 第 18 行:远光软件股份有限公司常用会计科目使用说明=6801列2=所得税费用列3=损益类列4=核算企业所得税费用列5=含递延所得税\n\n# 问答线索补充\n\n以下内容由入库阶段根据章节标题、条款、列表、键值对与相邻正文提炼供问答检索时优先命中更短、更直接的制度依据。\n\n- 正文:# Excel 工作簿:远光软件会计科目使用说明.xlsx\n- 正文:会计科目说明 第 2 行:远光软件股份有限公司常用会计科目使用说明=科目编码\n- 正文列2=科目名称\n- 正文列3=科目类别\n- 正文列4=使用说明\n- 正文列5=备注\n- 正文:会计科目说明 第 3 行:远光软件股份有限公司常用会计科目使用说明=1001\n- 正文列2=库存现金\n- 正文列3=资产类\n- 正文列4=核算公司库存现金\n- 正文列5=日清月结\n- 正文:会计科目说明 第 4 行:远光软件股份有限公司常用会计科目使用说明=1002\n- 正文列2=银行存款\n- 正文列4=核算存入银行的各项存款\n- 正文列5=按开户行明细\n- 正文:会计科目说明 第 5 行:远光软件股份有限公司常用会计科目使用说明=1122\n- 正文列2=应收账款\n- 正文列4=核算因销售商品/提供劳务应收款项\n- 正文列5=按客户明细\n- 正文:会计科目说明 第 6 行:远光软件股份有限公司常用会计科目使用说明=1221\n- 正文列2=其他应收款",
"chunk_order_index": 1,
"full_doc_id": "c7601043d9944ef2bcf4d3f67ed253f7",
"file_path": "/app/server/storage/knowledge/财务知识库/远光软件会计科目使用说明.xlsx",
"llm_cache_list": [],
"create_time": 1779441661,
"update_time": 1779441661,
"_id": "chunk-e726f44fb0287c5192cf61b350f18abb"
},
"chunk-78edb0c8ccc8238159196ecaeeb08d43": {
"tokens": 839,
"content": "远光软件股份有限公司\n财务基础知识手册\n第一部分 会计基础知识\n一、会计要素\n会计要素包括资产、负债、所有者权益、收入、费用和利润。\n会计恒等式资产 = 负债 + 所有者权益\n二、常用会计科目\n科目类别\n科目名称\n说明\n资产类\n库存现金\n公司持有的现金\n资产类\n银行存款\n存放在银行的资金\n资产类\n应收账款\n因销售商品或提供劳务应收的款项\n资产类\n固定资产\n使用年限超过一年的有形资产\n负债类\n应付账款\n因购买商品或接受劳务应付的款项\n负债类\n应交税费\n应缴纳的各种税费\n负债类\n应付职工薪酬\n应付给职工的工资、福利等\n损益类\n主营业务收入\n主要经营业务产生的收入\n损益类\n管理费用\n为管理生产经营发生的费用\n损益类\n销售费用\n为销售产品发生的费用\n第二部分 税务基础知识\n三、主要税种介绍\n增值税公司为一般纳税人软件服务适用6%税率软件产品销售适用13%税率。\n企业所得税税率为25%高新技术企业享受15%优惠税率。\n个人所得税按累进税率3%-45%,由公司代扣代缴。\n印花税对经济活动中的应税凭证征收。\n第三部分 财务报表解读\n四、三大财务报表\n资产负债表反映企业在某一特定日期的财务状况。\n利润表反映企业在一定期间的经营成果。\n现金流量表反映企业在一定期间现金和现金等价物的流入和流出。\n\n# 章节导航\n\n以下内容由入库阶段从制度原文中提取供检索时优先理解制度层级、条目和标准所在章节。\n\n- 一、会计要素\n- 二、常用会计科目\n- (四)印花税:对经济活动中的应税凭证征收。\n\n# 重点章节摘录\n\n## 一、会计要素\n\n会计要素包括资产、负债、所有者权益、收入、费用和利润。会计恒等式资产 = 负债 + 所有者权益\n\n## 二、常用会计科目\n\n科目类别科目名称说明\n\n## (四)印花税:对经济活动中的应税凭证征收。\n\n第三部分 财务报表解读\n\n# 问答线索补充\n\n以下内容由入库阶段根据章节标题、条款、列表、键值对与相邻正文提炼供问答检索时优先命中更短、更直接的制度依据。\n\n- 一、会计要素:会计要素包括:资产、负债、所有者权益、收入、费用和利润\n- 一、会计要素:会计恒等式:资产 = 负债 + 所有者权益\n- 二、常用会计科目:因销售商品或提供劳务应收的款项\n- 二、常用会计科目:因购买商品或接受劳务应付的款项\n- 二、常用会计科目:应缴纳的各种税费\n- 二、常用会计科目:应付职工薪酬\n- (四)印花税:对经济活动中的应税凭证征收。:第三部分 财务报表解读",
"chunk_order_index": 0,
"full_doc_id": "b0277cd76034437997fbf5219662725a",
"file_path": "/app/server/storage/knowledge/财务知识库/远光软件财务基础知识手册.docx",
"llm_cache_list": [],
"create_time": 1779441751,
"update_time": 1779441751,
"_id": "chunk-78edb0c8ccc8238159196ecaeeb08d43"
},
"chunk-2ee7e2a66cb544bdfe1b09e133863ad1": {
"tokens": 760,
"content": "远光软件股份有限公司\n财务术语解释手册\n权责发生制\n以权利和责任的发生来决定收入和费用归属期的会计基础。即凡是当期已经实现的收入和已经发生或应当负担的费用不论款项是否收付都应当作为当期的收入和费用。\n收付实现制\n以现金收到或付出为标准来记录收入的实现和费用的发生。即凡是当期收到和支付的现金都作为当期的收入和费用。\n固定资产折旧\n固定资产在使用过程中因磨损而逐渐转移的价值。公司采用年限平均法计提折旧。\n摊销\n将无形资产或长期待摊费用按照规定期限分期计入当期损益的过程。\n增值税进项税额\n企业购进货物、接受应税劳务或应税服务支付的增值税额可以从销项税额中抵扣。\n增值税销项税额\n企业销售货物、提供应税劳务或应税服务收取的增值税额。\n预算\n企业对未来一定时期内经营活动的数量化计划包括收入预算、支出预算、资本预算等。\n现金流\n企业在一定期间内现金和现金等价物流入和流出的数量。\n毛利率\n毛利润占营业收入的百分比反映企业产品或服务的初始盈利能力。计算公式毛利率 = (营业收入 - 营业成本)/ 营业收入 × 100%\n净资产收益率ROE\n净利润占股东权益的百分比反映股东投入资金的获利能力。计算公式ROE = 净利润 / 股东权益 × 100%\n成本中心\n企业内部只发生成本费用而不产生收入的组织单位用于成本核算和控制。\n利润中心\n企业内部既发生成本费用又产生收入的组织单位用于考核盈利能力。\n\n# 问答线索补充\n\n以下内容由入库阶段根据章节标题、条款、列表、键值对与相邻正文提炼供问答检索时优先命中更短、更直接的制度依据。\n\n- 正文:以权利和责任的发生来决定收入和费用归属期的会计基础\n- 正文:即凡是当期已经实现的收入和已经发生或应当负担的费用,不论款项是否收付,都应当作为当期的收入和费用\n- 正文:以现金收到或付出为标准来记录收入的实现和费用的发生\n- 正文:即凡是当期收到和支付的现金,都作为当期的收入和费用\n- 正文:企业购进货物、接受应税劳务或应税服务支付的增值税额,可以从销项税额中抵扣\n- 正文:企业销售货物、提供应税劳务或应税服务收取的增值税额\n- 正文:毛利润占营业收入的百分比,反映企业产品或服务的初始盈利能力\n- 正文:计算公式:毛利率 = (营业收入 - 营业成本)/ 营业收入 × 100%\n- 正文:净利润占股东权益的百分比,反映股东投入资金的获利能力\n- 正文计算公式ROE = 净利润 / 股东权益 × 100%",
"chunk_order_index": 0,
"full_doc_id": "23f56f159a3e4bc3b2338056544120dd",
"file_path": "/app/server/storage/knowledge/财务知识库/远光软件财务术语解释手册.docx",
"llm_cache_list": [],
"create_time": 1779441764,
"update_time": 1779441764,
"_id": "chunk-2ee7e2a66cb544bdfe1b09e133863ad1"
},
"chunk-2c8384b328272063de4dac306a52d21e": {
"tokens": 1150,
"content": "远光软件股份有限公司高新技术企业税收优惠政策汇总\n\n 远光软件股份有限公司\n\n 2024年度\n\n一、企业所得税优惠\n\n1. 高新技术企业减按15%税率征收企业所得税\n\n- 条件:取得高新技术企业证书且在有效期内\n\n- 申请:向主管税务机关备案\n\n2. 技术转让所得优惠\n\n- 符合条件的技术转让所得500万元以下免征企业所得税\n\n- 超过500万元的部分减半征收\n\n二、增值税优惠\n\n1. 软件产品增值税即征即退\n\n- 销售自行开发生产的软件产品按13%征收后\n\n- 实际税负超过3%的部分即征即退\n\n2. 技术服务免征增值税\n\n- 符合条件的技术转让、技术开发和相关的技术咨询、技术服务免征增值税\n\n三、研发费用加计扣除\n\n1. 一般企业研发费用按100%加计扣除\n\n- 形成无形资产的按200%摊销\n\n2. 适用范围:\n\n- 人员人工费用\n\n- 直接投入费用\n\n- 折旧费用\n\n- 无形资产摊销\n\n- 新产品设计费等\n\n 远光软件股份有限公司 - 第 <bound method FPDF.page_no of <__main__.ChinesePDF object at 0x000001F09798B750>>/2 页\n- 其他相关费用\n\n四、其他优惠政策\n\n1. 固定资产加速折旧:允许缩短折旧年限或加速折旧\n\n2. 软件企业职工培训费用:全额在企业所得税前扣除\n\n3. 创业投资抵扣投资未上市中小高新技术企业的按投资额70%抵扣应纳税所得额\n\n 远光软件股份有限公司 - 第 <bound method FPDF.page_no of <__main__.ChinesePDF object at 0x000001F09798B750>>/2 页\n\n# 章节导航\n\n以下内容由入库阶段从制度原文中提取供检索时优先理解制度层级、条目和标准所在章节。\n\n- 一、企业所得税优惠\n- 二、增值税优惠\n- 三、研发费用加计扣除\n- 四、其他优惠政策\n\n# 重点章节摘录\n\n## 一、企业所得税优惠\n\n1. 高新技术企业减按15%税率征收企业所得税;- 条件:取得高新技术企业证书且在有效期内;- 申请:向主管税务机关备案\n\n## 二、增值税优惠\n\n1. 软件产品增值税即征即退;- 销售自行开发生产的软件产品按13%征收后;- 实际税负超过3%的部分即征即退\n\n## 三、研发费用加计扣除\n\n1. 一般企业研发费用按100%加计扣除;- 形成无形资产的按200%摊销2. 适用范围:\n\n## 四、其他优惠政策\n\n1. 固定资产加速折旧允许缩短折旧年限或加速折旧2. 软件企业职工培训费用全额在企业所得税前扣除3. 创业投资抵扣投资未上市中小高新技术企业的按投资额70%抵扣应纳税所得额\n\n# 问答线索补充\n\n以下内容由入库阶段根据章节标题、条款、列表、键值对与相邻正文提炼供问答检索时优先命中更短、更直接的制度依据。\n\n- 一、企业所得税优惠1. 高新技术企业减按15%税率征收企业所得税\n- 一、企业所得税优惠:条件:取得高新技术企业证书且在有效期内\n- 一、企业所得税优惠:申请:向主管税务机关备案\n- 一、企业所得税优惠2. 技术转让所得优惠\n- 二、增值税优惠1. 软件产品增值税即征即退\n- 二、增值税优惠销售自行开发生产的软件产品按13%征收后\n- 二、增值税优惠实际税负超过3%的部分即征即退\n- 二、增值税优惠2. 技术服务免征增值税\n- 三、研发费用加计扣除1. 一般企业研发费用按100%加计扣除\n- 三、研发费用加计扣除形成无形资产的按200%摊销\n- 三、研发费用加计扣除2. 适用范围:\n- 三、研发费用加计扣除:人员人工费用\n- 四、其他优惠政策1. 固定资产加速折旧:允许缩短折旧年限或加速折旧\n- 四、其他优惠政策2. 软件企业职工培训费用:全额在企业所得税前扣除\n- 四、其他优惠政策3. 创业投资抵扣投资未上市中小高新技术企业的按投资额70%抵扣应纳税所得额",
"chunk_order_index": 0,
"full_doc_id": "09fbcae74d3b41e498a47e05b45262cb",
"file_path": "/app/server/storage/knowledge/财务知识库/远光软件高新技术企业税收优惠政策汇总.pdf",
"llm_cache_list": [],
"create_time": 1779441797,
"update_time": 1779441797,
"_id": "chunk-2c8384b328272063de4dac306a52d21e"
},
"chunk-93d2389cdb74257e90201dccbc3f6539": {
"tokens": 50,
"content": "培训费用:全额在企业所得税前扣除\n- 四、其他优惠政策3. 创业投资抵扣投资未上市中小高新技术企业的按投资额70%抵扣应纳税所得额",
"chunk_order_index": 1,
"full_doc_id": "09fbcae74d3b41e498a47e05b45262cb",
"file_path": "/app/server/storage/knowledge/财务知识库/远光软件高新技术企业税收优惠政策汇总.pdf",
"llm_cache_list": [],
"create_time": 1779441797,
"update_time": 1779441797,
"_id": "chunk-93d2389cdb74257e90201dccbc3f6539"
}
}

View File

@@ -181,6 +181,49 @@ def test_save_or_submit_persists_claim_only_after_save_draft_action() -> None:
assert _count_claims(db) == before_count + 1
def test_save_draft_persists_user_changed_expense_category() -> None:
user_id = "save-draft-category@example.com"
message = "业务发生时间:2026-03-04打车去客户现场交通费32元请帮我看看怎么报"
with build_session() as db:
employee = Employee(
employee_no="E5102",
name="分类员工",
email=user_id,
)
db.add(employee)
db.commit()
ontology = SemanticOntologyService(db).parse(
OntologyParseRequest(
query=message,
user_id=user_id,
)
)
result = ExpenseClaimService(db).save_or_submit_from_ontology(
run_id=ontology.run_id,
user_id=user_id,
message=message,
ontology=ontology,
context_json={
"name": "分类员工",
"user_input_text": message,
"review_action": "save_draft",
"review_form_values": {
"expense_type": "办公用品费",
"amount": "32元",
"occurred_date": "2026-03-04",
"reason": "右侧核对后改为办公用品费",
},
},
)
claim = db.get(ExpenseClaim, result["claim_id"])
assert claim is not None
assert claim.expense_type == "office"
assert claim.items[0].item_type == "office"
def test_unsaved_conversation_expires_after_retention_but_saved_conversation_stays() -> None:
with build_session() as db:
service = AgentConversationService(db)

View File

@@ -1,6 +1,14 @@
from __future__ import annotations
import json
import threading
from app.services import knowledge_rag as knowledge_rag_module
from app.services.knowledge_ingest_log import (
build_document_graph_summary,
build_ingest_document_summary,
build_ingest_status_summary,
)
from app.services.knowledge_rag import KnowledgeRagService
@@ -86,7 +94,10 @@ def test_build_hits_demotes_chapter_navigation_for_specific_rule_queries() -> No
{
"chunk_id": "body-1",
"file_path": "/tmp/doc-1__费用制度.md",
"content": "附表3支出归口管理部门与归口业务范围\n组织人事部:探亲差旅、条件艰苦及安全风险较高区域补助等支出。",
"content": (
"附表3支出归口管理部门与归口业务范围\n"
"组织人事部:探亲差旅、条件艰苦及安全风险较高区域补助等支出。"
),
},
],
entities=[],
@@ -100,9 +111,11 @@ def test_resolve_default_qdrant_url_prefers_container_host(monkeypatch) -> None:
monkeypatch.setattr(
knowledge_rag_module.socket,
"getaddrinfo",
lambda hostname, port: [("family", "type", "proto", "canonname", ("172.21.0.2", 0))]
if hostname == "qdrant"
else [],
lambda hostname, port: (
[("family", "type", "proto", "canonname", ("172.21.0.2", 0))]
if hostname == "qdrant"
else []
),
)
assert knowledge_rag_module._resolve_default_qdrant_url() == "http://qdrant:6333"
@@ -117,6 +130,45 @@ def test_resolve_default_qdrant_url_falls_back_to_loopback(monkeypatch) -> None:
assert knowledge_rag_module._resolve_default_qdrant_url() == "http://127.0.0.1:6333"
def test_runtime_cache_is_isolated_by_thread(monkeypatch) -> None:
knowledge_rag_module.shutdown_knowledge_rag_runtime()
created_runtimes = []
class FakeRuntime:
def __init__(self, **_kwargs):
self.finalized = False
created_runtimes.append(self)
def finalize(self):
self.finalized = True
monkeypatch.setattr(knowledge_rag_module, "_LightRagRuntime", FakeRuntime)
monkeypatch.setattr(
KnowledgeRagService,
"_build_runtime_signature",
lambda self: (("same-config",), {}),
)
service = KnowledgeRagService()
main_runtime = service._get_runtime()
assert service._get_runtime() is main_runtime
worker_runtimes = []
def load_worker_runtime() -> None:
worker_runtimes.append(KnowledgeRagService()._get_runtime())
thread = threading.Thread(target=load_worker_runtime)
thread.start()
thread.join(timeout=5)
assert len(created_runtimes) == 2
assert worker_runtimes[0] is not main_runtime
knowledge_rag_module.shutdown_knowledge_rag_runtime()
assert all(runtime.finalized for runtime in created_runtimes)
def test_is_query_ready_status_rejects_failed_status_even_with_chunks() -> None:
assert (
KnowledgeRagService.is_query_ready_status(
@@ -141,3 +193,89 @@ def test_is_query_ready_status_rejects_processing_status_even_with_chunks() -> N
)
is False
)
def test_build_document_graph_summary_reads_lightrag_storage(tmp_path) -> None:
workspace = tmp_path / "knowledge" / ".lightrag" / "test_workspace"
workspace.mkdir(parents=True)
(workspace / "kv_store_full_entities.json").write_text(
json.dumps({"doc-1": {"entity_names": ["远光软件", "支出管理", "远光软件"]}}),
encoding="utf-8",
)
(workspace / "kv_store_full_relations.json").write_text(
json.dumps({"doc-1": {"relation_pairs": [["远光软件", "支出管理"]]}}),
encoding="utf-8",
)
(workspace / "kv_store_text_chunks.json").write_text(
json.dumps(
{
"chunk-2": {
"_id": "chunk-2",
"full_doc_id": "doc-1",
"chunk_order_index": 1,
"tokens": 45,
"content": "第二条 支出审批需要结合预算、归口部门和授权标准执行。",
},
"chunk-1": {
"_id": "chunk-1",
"full_doc_id": "doc-1",
"chunk_order_index": 0,
"tokens": 31,
"content": "第一条 本办法适用于公司支出管理。",
},
}
),
encoding="utf-8",
)
summary = build_document_graph_summary(
tmp_path,
workspace="test_workspace",
document_id="doc-1",
)
assert summary["entity_count"] == 2
assert summary["entities"] == ["远光软件", "支出管理"]
assert summary["relation_count"] == 1
assert summary["relations"] == [{"source": "远光软件", "target": "支出管理", "type": "关联"}]
assert [item["id"] for item in summary["chunks"]] == ["chunk-1", "chunk-2"]
def test_build_ingest_document_summary_extracts_sections() -> None:
summary = build_ingest_document_summary(
document_id="doc-1",
entry={
"original_name": "公司支出管理办法.pdf",
"folder": "制度文件",
"extension": "pdf",
"mime_type": "application/pdf",
},
raw_text="第一章 总则\n本办法用于规范公司支出。",
indexed_text="# 第一章 总则\n本办法用于规范公司支出。\n第二条 审批\n审批需按授权执行。",
)
assert summary["name"] == "公司支出管理办法.pdf"
assert summary["section_count"] == 2
assert summary["sections"][0]["title"] == "第一章 总则"
def test_build_ingest_status_summary_keeps_chunk_status() -> None:
summary = build_ingest_status_summary(
status_payload={
"status": "processed",
"query_ready": True,
"chunks_count": 2,
"chunks_list": ["chunk-1", "chunk-2"],
},
graph_summary={
"entity_count": 1,
"relation_count": 0,
"entities": ["预算"],
"relations": [],
},
)
assert summary["lightrag_status"] == "processed"
assert summary["query_ready"] is True
assert summary["chunk_count"] == 2
assert summary["chunk_ids"] == ["chunk-1", "chunk-2"]

View File

@@ -389,10 +389,10 @@ def test_semantic_ontology_service_prefers_expense_for_customer_entertainment_na
assert result.clarification_required is True
assert "customer_name" in result.missing_slots
assert "participants" in result.missing_slots
assert any(
item.type == "expense_type" and item.normalized_value == "entertainment"
for item in result.entities
)
assert any(
item.type == "expense_type" and item.normalized_value == "meal"
for item in result.entities
)
def test_semantic_ontology_service_uses_client_local_date_for_relative_time() -> None:
@@ -556,6 +556,39 @@ def test_semantic_ontology_service_maps_taxi_ticket_reimbursement_to_transport_d
)
@pytest.mark.parametrize(
"query,expected_type",
[
("报销飞机票和行程单", "travel"),
("报销酒店发票和房费", "hotel"),
("报销滴滴打车票", "transport"),
("报销工作餐餐费", "meal"),
("报销会议场地费", "meeting"),
("报销客户接待餐", "meal"),
("报销打印纸和硒鼓", "office"),
("报销培训课程费", "training"),
("报销手机话费和流量费", "communication"),
("报销员工体检费", "welfare"),
],
)
def test_semantic_ontology_service_covers_common_expense_scene_keywords(
query: str,
expected_type: str,
) -> None:
session_factory = build_session_factory()
with session_factory() as db:
result = SemanticOntologyService(db).parse(
OntologyParseRequest(query=query, user_id="pytest")
)
assert result.scenario == "expense"
assert result.intent == "draft"
assert any(
item.type == "expense_type" and item.normalized_value == expected_type
for item in result.entities
)
def test_semantic_ontology_service_uses_model_parse_when_available(monkeypatch) -> None:
session_factory = build_session_factory()
with session_factory() as db:

View File

@@ -540,7 +540,11 @@ def test_user_agent_asks_for_type_when_trip_context_is_ambiguous() -> None:
"交通费",
"住宿费",
"业务招待费",
"办公",
"会务",
"办公用品费",
"培训费",
"通讯费",
"福利费",
"其他费用",
]
assert response.suggested_actions[0].payload["original_message"] == message
@@ -729,6 +733,9 @@ def test_user_agent_keeps_taxi_ticket_for_customer_dropoff_as_transport_expense(
assert "业务招待费" not in response.review_payload.intent_summary
assert "客户名称" not in response.review_payload.missing_slots
assert "参与人员" not in response.review_payload.missing_slots
edit_field_keys = {item.key for item in response.review_payload.edit_fields}
assert "merchant_name" not in edit_field_keys
assert "participants" not in edit_field_keys
def test_user_agent_keeps_travel_range_when_user_adds_receipts_after_text_context() -> None:
@@ -1000,6 +1007,9 @@ def test_user_agent_transport_flow_infers_reason_and_does_not_require_location_o
assert response.review_payload is not None
slot_map = {item.key: item for item in response.review_payload.slot_cards}
document_card = response.review_payload.document_cards[0]
assert document_card.scene_label == "出租车/网约车票据"
assert document_card.suggested_expense_type == "transport"
assert slot_map["reason"].value == "交通出行"
assert slot_map["reason"].status == "inferred"
assert "酒店/商户" not in response.review_payload.missing_slots
@@ -1189,8 +1199,15 @@ def test_user_agent_document_service_normalizes_ocr_fields_and_scene() -> None:
assert fields["列车出发时间"] == "2026-03-04"
assert "商户/酒店" not in fields
assert document_service.extract_amount_text_from_value("滴滴出行 支付金额 1 元,实付 13.4 元,订单号 12345678") == "13.40元"
taxi_classified = document_service.classify_document({"filename": "行程单_的士票.jpg", "summary": "的士 车费 48 元"})
assert taxi_classified["document_type"] == "taxi_receipt"
assert taxi_classified["expense_type"] == "transport"
assert taxi_classified["scene_label"] == "出租车/网约车票据"
ship_classified = document_service.classify_document({"filename": "轮船票.jpg", "summary": "轮船 船票 金额 180 元"})
assert ship_classified["document_type"] == "ship_ticket"
assert ship_classified["scene_label"] == "轮船票"
assert classified["document_type"] == "meal_receipt"
assert classified["expense_type"] == "entertainment"
assert classified["expense_type"] == "meal"
assert document_service.infer_expense_type_from_documents(
[{"filename": "客户餐饮发票.jpg", "summary": "餐饮发票 客户招待 金额 320 元"}],
expense_type_code="entertainment",
@@ -1262,11 +1279,13 @@ def test_user_agent_builds_review_payload_for_multi_document_expense_flow() -> N
assert response.review_payload is not None
assert len(response.review_payload.document_cards) == 2
assert len(response.review_payload.claim_groups) == 2
assert response.review_payload.missing_slots == ["参与人员"]
assert response.review_payload.missing_slots == ["参与人员", "酒店的报销票据待上传(必须)"]
assert [item.action_type for item in response.review_payload.confirmation_actions] == [
"save_draft",
]
assert any(item.scene_label == "业务招待费" for item in response.review_payload.document_cards)
assert any(item.scene_label == "餐饮发票" for item in response.review_payload.document_cards)
assert all(item.scene_label != "业务招待费" for item in response.review_payload.document_cards)
assert any(item.scene_label == "业务招待费" for item in response.review_payload.claim_groups)
assert f"时间:{yesterday}" in response.review_payload.intent_summary
slot_map = {item.key: item for item in response.review_payload.slot_cards}
assert slot_map["time_range"].value == yesterday
@@ -1899,7 +1918,58 @@ def test_user_agent_review_payload_prechecks_taxi_amount_against_rule_standard()
assert "单笔交通金额" in combined
assert "报销场景提交与附件标准" in combined
assert amount_brief.level == "high"
assert any(item.title == "附件金额测算结果" for item in response.review_payload.risk_briefs)
measurement = next(item for item in response.review_payload.risk_briefs if item.title == "附件金额测算异常")
assert measurement.level == "warning"
assert "超出标准" in measurement.detail
def test_user_agent_review_payload_does_not_mark_compliant_taxi_amount_as_low_risk() -> None:
session_factory = build_session_factory()
with session_factory() as db:
query = "我上传一张的士票59.10元,帮我生成交通费报销草稿"
context = {
"name": "张三",
"attachment_names": ["的士1.jpg"],
"attachment_count": 1,
"ocr_documents": [
{
"filename": "的士1.jpg",
"document_type": "taxi_receipt",
"summary": "出租车/网约车票据 支付金额 59.10 元",
"text": "的士 车费 59.10 元",
"avg_score": 0.95,
"document_fields": [
{"key": "amount", "label": "支付金额", "value": "59.10"},
],
"warnings": [],
}
],
}
ontology = SemanticOntologyService(db).parse(
OntologyParseRequest(
query=query,
user_id="pytest-taxi-pass@example.com",
context_json=context,
)
)
response = UserAgentService(db).respond(
UserAgentRequest(
run_id=ontology.run_id,
user_id="pytest-taxi-pass@example.com",
message=query,
ontology=ontology,
context_json=context,
tool_payload={"draft_only": True},
)
)
assert response.review_payload is not None
risk_titles = [item.title for item in response.review_payload.risk_briefs]
risk_details = "\n".join(item.detail for item in response.review_payload.risk_briefs)
assert "附件金额测算结果" not in risk_titles
assert "附件金额测算异常" not in risk_titles
assert "测算通过" not in risk_details
def test_user_agent_review_payload_uses_finance_spreadsheet_hotel_amount_standard() -> None:
@@ -2067,8 +2137,9 @@ def test_user_agent_review_payload_uses_finance_spreadsheet_meal_allowance_stand
assert "直辖市/特区" in combined
assert "公司差旅费报销规则" in combined
assert meal_brief.level == "high"
measurement = next(item for item in response.review_payload.risk_briefs if item.title == "附件金额测算结果")
measurement = next(item for item in response.review_payload.risk_briefs if item.title == "附件金额测算异常")
assert "伙食补助标准 65.00" in measurement.detail
assert "超出标准" in measurement.detail
def test_user_agent_filters_deprecated_review_risk_briefs() -> None: