feat: 重构知识库系统,移除Hermes集成,增强RAG和同步功能
主要变更: - 移除Hermes智能体及相关回调服务 - 新增知识库RAG、同步、调度、规范化和索引任务服务 - 重构orchestrator服务,增强运行时聊天功能 - 更新前端聊天、政策制度、设置等页面样式和逻辑 - 更新expense_claims和document_intelligence服务 - 删除llm_wiki相关服务和测试文件 - 更新docker-compose配置和启动脚本
This commit is contained in:
113
.tmp/lightrag_inspect/lightrag_pkg/lightrag/constants.py
Normal file
113
.tmp/lightrag_inspect/lightrag_pkg/lightrag/constants.py
Normal file
@@ -0,0 +1,113 @@
|
||||
"""
|
||||
Centralized configuration constants for LightRAG.
|
||||
|
||||
This module defines default values for configuration constants used across
|
||||
different parts of the LightRAG system. Centralizing these values ensures
|
||||
consistency and makes maintenance easier.
|
||||
"""
|
||||
|
||||
# Default values for server settings
|
||||
DEFAULT_WOKERS = 2
|
||||
DEFAULT_MAX_GRAPH_NODES = 1000
|
||||
|
||||
# Default values for extraction settings
|
||||
DEFAULT_SUMMARY_LANGUAGE = "English" # Default language for document processing
|
||||
DEFAULT_MAX_GLEANING = 1
|
||||
DEFAULT_ENTITY_NAME_MAX_LENGTH = 256
|
||||
|
||||
# Number of description fragments to trigger LLM summary
|
||||
DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE = 8
|
||||
# Max description token size to trigger LLM summary
|
||||
DEFAULT_SUMMARY_MAX_TOKENS = 1200
|
||||
# Recommended LLM summary output length in tokens
|
||||
DEFAULT_SUMMARY_LENGTH_RECOMMENDED = 600
|
||||
# Maximum token size sent to LLM for summary
|
||||
DEFAULT_SUMMARY_CONTEXT_SIZE = 12000
|
||||
# Maximum token size allowed for entity extraction input context
|
||||
DEFAULT_MAX_EXTRACT_INPUT_TOKENS = 20480
|
||||
# Default entities to extract if ENTITY_TYPES is not specified in .env
|
||||
DEFAULT_ENTITY_TYPES = [
|
||||
"Person",
|
||||
"Creature",
|
||||
"Organization",
|
||||
"Location",
|
||||
"Event",
|
||||
"Concept",
|
||||
"Method",
|
||||
"Content",
|
||||
"Data",
|
||||
"Artifact",
|
||||
"NaturalObject",
|
||||
]
|
||||
|
||||
# Separator for: description, source_id and relation-key fields(Can not be changed after data inserted)
|
||||
GRAPH_FIELD_SEP = "<SEP>"
|
||||
|
||||
# Query and retrieval configuration defaults
|
||||
DEFAULT_TOP_K = 40
|
||||
DEFAULT_CHUNK_TOP_K = 20
|
||||
DEFAULT_MAX_ENTITY_TOKENS = 6000
|
||||
DEFAULT_MAX_RELATION_TOKENS = 8000
|
||||
DEFAULT_MAX_TOTAL_TOKENS = 30000
|
||||
DEFAULT_COSINE_THRESHOLD = 0.2
|
||||
DEFAULT_RELATED_CHUNK_NUMBER = 5
|
||||
DEFAULT_KG_CHUNK_PICK_METHOD = "VECTOR"
|
||||
|
||||
# TODO: Deprated. All conversation_history messages is send to LLM.
|
||||
DEFAULT_HISTORY_TURNS = 0
|
||||
|
||||
# Rerank configuration defaults
|
||||
DEFAULT_MIN_RERANK_SCORE = 0.0
|
||||
DEFAULT_RERANK_BINDING = "null"
|
||||
|
||||
# Default source ids limit in meta data for entity and relation
|
||||
DEFAULT_MAX_SOURCE_IDS_PER_ENTITY = 300
|
||||
DEFAULT_MAX_SOURCE_IDS_PER_RELATION = 300
|
||||
### control chunk_ids limitation method: FIFO, FIFO
|
||||
### FIFO: First in first out
|
||||
### KEEP: Keep oldest (less merge action and faster)
|
||||
SOURCE_IDS_LIMIT_METHOD_KEEP = "KEEP"
|
||||
SOURCE_IDS_LIMIT_METHOD_FIFO = "FIFO"
|
||||
DEFAULT_SOURCE_IDS_LIMIT_METHOD = SOURCE_IDS_LIMIT_METHOD_FIFO
|
||||
VALID_SOURCE_IDS_LIMIT_METHODS = {
|
||||
SOURCE_IDS_LIMIT_METHOD_KEEP,
|
||||
SOURCE_IDS_LIMIT_METHOD_FIFO,
|
||||
}
|
||||
# Maximum number of file paths stored in entity/relation file_path field (For displayed only, does not affect query performance)
|
||||
DEFAULT_MAX_FILE_PATHS = 100
|
||||
|
||||
# Field length of file_path in Milvus Schema for entity and relation (Should not be changed)
|
||||
# file_path must store all file paths up to the DEFAULT_MAX_FILE_PATHS limit within the metadata.
|
||||
DEFAULT_MAX_FILE_PATH_LENGTH = 32768
|
||||
# Placeholder for more file paths in meta data for entity and relation (Should not be changed)
|
||||
DEFAULT_FILE_PATH_MORE_PLACEHOLDER = "truncated"
|
||||
|
||||
# Default temperature for LLM
|
||||
DEFAULT_TEMPERATURE = 1.0
|
||||
|
||||
# Async configuration defaults
|
||||
DEFAULT_MAX_ASYNC = 4 # Default maximum async operations
|
||||
DEFAULT_MAX_PARALLEL_INSERT = 2 # Default maximum parallel insert operations
|
||||
|
||||
# Embedding configuration defaults
|
||||
DEFAULT_EMBEDDING_FUNC_MAX_ASYNC = 8 # Default max async for embedding functions
|
||||
DEFAULT_EMBEDDING_BATCH_NUM = 10 # Default batch size for embedding computations
|
||||
|
||||
# Gunicorn worker timeout
|
||||
DEFAULT_TIMEOUT = 300
|
||||
|
||||
# Default llm and embedding timeout
|
||||
DEFAULT_LLM_TIMEOUT = 180
|
||||
DEFAULT_EMBEDDING_TIMEOUT = 30
|
||||
|
||||
# Logging configuration defaults
|
||||
DEFAULT_LOG_MAX_BYTES = 10485760 # Default 10MB
|
||||
DEFAULT_LOG_BACKUP_COUNT = 5 # Default 5 backups
|
||||
DEFAULT_LOG_FILENAME = "lightrag.log" # Default log filename
|
||||
|
||||
# Ollama server configuration defaults
|
||||
DEFAULT_OLLAMA_MODEL_NAME = "lightrag"
|
||||
DEFAULT_OLLAMA_MODEL_TAG = "latest"
|
||||
DEFAULT_OLLAMA_MODEL_SIZE = 7365960935
|
||||
DEFAULT_OLLAMA_CREATED_AT = "2024-01-15T00:00:00Z"
|
||||
DEFAULT_OLLAMA_DIGEST = "sha256:lightrag"
|
||||
Reference in New Issue
Block a user