feat: 重构知识库系统,移除Hermes集成,增强RAG和同步功能

主要变更:
- 移除Hermes智能体及相关回调服务
- 新增知识库RAG、同步、调度、规范化和索引任务服务
- 重构orchestrator服务,增强运行时聊天功能
- 更新前端聊天、政策制度、设置等页面样式和逻辑
- 更新expense_claims和document_intelligence服务
- 删除llm_wiki相关服务和测试文件
- 更新docker-compose配置和启动脚本
This commit is contained in:
caoxiaozhu
2026-05-17 08:38:41 +00:00
parent 212c935308
commit 68f663f2f4
308 changed files with 83729 additions and 13588 deletions

View File

@@ -0,0 +1,22 @@
from typing import TYPE_CHECKING, Any
from ._version import __version__ as __version__
__all__ = ["LightRAG", "QueryParam", "__version__"]
if TYPE_CHECKING:
from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam
def __getattr__(name: str) -> Any:
if name in {"LightRAG", "QueryParam"}:
from .lightrag import LightRAG, QueryParam
value = {"LightRAG": LightRAG, "QueryParam": QueryParam}[name]
globals()[name] = value
return value
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
__author__ = "Zirui Guo"
__url__ = "https://github.com/HKUDS/LightRAG"

View File

@@ -0,0 +1,4 @@
"""Lightweight version definitions shared by packaging and runtime code."""
__version__ = "1.4.16"
__api_version__ = "0291"

View File

@@ -0,0 +1 @@
from .._version import __api_version__ as __api_version__

View File

@@ -0,0 +1,163 @@
from datetime import datetime, timedelta, timezone
import jwt
from dotenv import load_dotenv
from fastapi import HTTPException, status
from pydantic import BaseModel
from ..utils import logger
from .config import DEFAULT_TOKEN_SECRET, global_args
from .passwords import verify_password
# use the .env that is inside the current folder
# allows to use different .env file for each lightrag instance
# the OS environment variables take precedence over the .env file
load_dotenv(dotenv_path=".env", override=False)
class TokenPayload(BaseModel):
sub: str # Username
exp: datetime # Expiration time
role: str = "user" # User role, default is regular user
metadata: dict = {} # Additional metadata
class AuthHandler:
def __init__(self):
auth_accounts = global_args.auth_accounts
self.secret = global_args.token_secret
if not self.secret:
if auth_accounts:
raise ValueError(
"TOKEN_SECRET must be explicitly set to a non-default value when AUTH_ACCOUNTS is configured."
)
self.secret = DEFAULT_TOKEN_SECRET
logger.warning(
"TOKEN_SECRET not set and AUTH_ACCOUNTS is not configured. "
"Falling back to the default guest-mode JWT secret. "
)
algorithm = global_args.jwt_algorithm
if not algorithm or algorithm.lower() == "none":
raise ValueError(
"JWT_ALGORITHM must be set to a secure algorithm (e.g. HS256). "
"The 'none' algorithm is not permitted."
)
self.algorithm = algorithm
self.expire_hours = global_args.token_expire_hours
self.guest_expire_hours = global_args.guest_token_expire_hours
self.accounts = {}
invalid_accounts = []
if auth_accounts:
for account in auth_accounts.split(","):
try:
username, password = account.split(":", 1)
if not username or not password:
raise ValueError
self.accounts[username] = password
except ValueError:
invalid_accounts.append(account)
if invalid_accounts:
invalid_entries = ", ".join(invalid_accounts)
logger.error(f"Invalid account format in AUTH_ACCOUNTS: {invalid_entries}")
raise ValueError(
"AUTH_ACCOUNTS must use comma-separated user:password pairs."
)
def verify_password(self, username: str, plain_password: str) -> bool:
"""
Verify password for a user. Supports explicit bcrypt values and plaintext.
Args:
username: Username to verify
plain_password: Plaintext password to check
Returns:
bool: True if password is correct, False otherwise
"""
if username not in self.accounts:
return False
stored_password = self.accounts[username]
return verify_password(plain_password, stored_password)
def create_token(
self,
username: str,
role: str = "user",
custom_expire_hours: int = None,
metadata: dict = None,
) -> str:
"""
Create JWT token
Args:
username: Username
role: User role, default is "user", guest is "guest"
custom_expire_hours: Custom expiration time (hours), if None use default value
metadata: Additional metadata
Returns:
str: Encoded JWT token
"""
# Choose default expiration time based on role
if custom_expire_hours is None:
if role == "guest":
expire_hours = self.guest_expire_hours
else:
expire_hours = self.expire_hours
else:
expire_hours = custom_expire_hours
expire = datetime.now(timezone.utc) + timedelta(hours=expire_hours)
# Create payload
payload = TokenPayload(
sub=username, exp=expire, role=role, metadata=metadata or {}
)
return jwt.encode(payload.model_dump(), self.secret, algorithm=self.algorithm)
def validate_token(self, token: str) -> dict:
"""
Validate JWT token
Args:
token: JWT token
Returns:
dict: Dictionary containing user information
Raises:
HTTPException: If token is invalid or expired
"""
try:
# Explicitly exclude 'none' to prevent algorithm confusion attacks
allowed_algorithms = [self.algorithm]
if "none" in (a.lower() for a in allowed_algorithms):
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Insecure JWT algorithm configuration",
)
payload = jwt.decode(token, self.secret, algorithms=allowed_algorithms)
expire_timestamp = payload["exp"]
expire_time = datetime.fromtimestamp(expire_timestamp, timezone.utc)
if datetime.now(timezone.utc) > expire_time:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED, detail="Token expired"
)
# Return complete payload instead of just username
return {
"username": payload["sub"],
"role": payload.get("role", "user"),
"metadata": payload.get("metadata", {}),
"exp": expire_time,
}
except jwt.PyJWTError:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid token"
)
auth_handler = AuthHandler()

View File

@@ -0,0 +1,697 @@
"""
Configs for the LightRAG API.
"""
import os
import re
import argparse
import logging
from dotenv import load_dotenv
from lightrag.utils import get_env_value, logger
from lightrag.llm.binding_options import (
GeminiEmbeddingOptions,
GeminiLLMOptions,
OllamaEmbeddingOptions,
OllamaLLMOptions,
OpenAILLMOptions,
)
from lightrag.base import OllamaServerInfos
import sys
from lightrag.constants import (
DEFAULT_WOKERS,
DEFAULT_TIMEOUT,
DEFAULT_TOP_K,
DEFAULT_CHUNK_TOP_K,
DEFAULT_HISTORY_TURNS,
DEFAULT_MAX_ENTITY_TOKENS,
DEFAULT_MAX_RELATION_TOKENS,
DEFAULT_MAX_TOTAL_TOKENS,
DEFAULT_COSINE_THRESHOLD,
DEFAULT_RELATED_CHUNK_NUMBER,
DEFAULT_MIN_RERANK_SCORE,
DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE,
DEFAULT_MAX_ASYNC,
DEFAULT_SUMMARY_MAX_TOKENS,
DEFAULT_SUMMARY_LENGTH_RECOMMENDED,
DEFAULT_SUMMARY_CONTEXT_SIZE,
DEFAULT_SUMMARY_LANGUAGE,
DEFAULT_EMBEDDING_FUNC_MAX_ASYNC,
DEFAULT_EMBEDDING_BATCH_NUM,
DEFAULT_OLLAMA_MODEL_NAME,
DEFAULT_OLLAMA_MODEL_TAG,
DEFAULT_RERANK_BINDING,
DEFAULT_ENTITY_TYPES,
)
# use the .env that is inside the current folder
# allows to use different .env file for each lightrag instance
# the OS environment variables take precedence over the .env file
load_dotenv(dotenv_path=".env", override=False)
ollama_server_infos = OllamaServerInfos()
DEFAULT_TOKEN_SECRET = "lightrag-jwt-default-secret-key!"
NO_PREFIX_SENTINEL = "NO_PREFIX"
PROVIDER_ASYMMETRIC_EMBEDDING_BINDINGS = {"gemini", "jina", "voyageai"}
PREFIX_ASYMMETRIC_EMBEDDING_BINDINGS = {"azure_openai", "ollama", "openai"}
class DefaultRAGStorageConfig:
KV_STORAGE = "JsonKVStorage"
VECTOR_STORAGE = "NanoVectorDBStorage"
GRAPH_STORAGE = "NetworkXStorage"
DOC_STATUS_STORAGE = "JsonDocStatusStorage"
def get_default_host(binding_type: str) -> str:
default_hosts = {
"ollama": os.getenv("LLM_BINDING_HOST", "http://localhost:11434"),
"lollms": os.getenv("LLM_BINDING_HOST", "http://localhost:9600"),
"azure_openai": os.getenv("AZURE_OPENAI_ENDPOINT", "https://api.openai.com/v1"),
"openai": os.getenv("LLM_BINDING_HOST", "https://api.openai.com/v1"),
"gemini": os.getenv(
"LLM_BINDING_HOST", "https://generativelanguage.googleapis.com"
),
}
return default_hosts.get(
binding_type, os.getenv("LLM_BINDING_HOST", "http://localhost:11434")
) # fallback to ollama if unknown
def resolve_asymmetric_embedding_opt_in(
*,
binding: str,
embedding_asymmetric: bool,
embedding_asymmetric_configured: bool,
query_prefix: str | None,
document_prefix: str | None,
query_prefix_configured: bool = False,
document_prefix_configured: bool = False,
) -> bool:
"""Resolve whether query/document-aware embedding behavior should be enabled."""
has_non_empty_prefix = bool(query_prefix or document_prefix)
has_prefix_config = query_prefix_configured or document_prefix_configured
if not embedding_asymmetric:
if has_prefix_config:
state = "false" if embedding_asymmetric_configured else "unset"
logger.warning(
f"EMBEDDING_ASYMMETRIC is {state}; "
"EMBEDDING_QUERY_PREFIX and EMBEDDING_DOCUMENT_PREFIX will be ignored."
)
return False
if binding in PROVIDER_ASYMMETRIC_EMBEDDING_BINDINGS:
if has_prefix_config:
logger.warning(
f"{binding} embeddings use provider task parameters for asymmetric "
"mode; EMBEDDING_QUERY_PREFIX and EMBEDDING_DOCUMENT_PREFIX will be ignored."
)
return True
if binding in PREFIX_ASYMMETRIC_EMBEDDING_BINDINGS:
if not query_prefix_configured or not document_prefix_configured:
raise ValueError(
f"EMBEDDING_ASYMMETRIC=true for {binding} embeddings requires both "
"EMBEDDING_QUERY_PREFIX and EMBEDDING_DOCUMENT_PREFIX. Use "
f"{NO_PREFIX_SENTINEL} for a side that should intentionally have no prefix."
)
if not has_non_empty_prefix:
raise ValueError(
"At least one of EMBEDDING_QUERY_PREFIX or EMBEDDING_DOCUMENT_PREFIX "
f"must be non-empty. Use {NO_PREFIX_SENTINEL} only for the side that "
"should intentionally have no prefix."
)
return True
raise ValueError(
f"EMBEDDING_ASYMMETRIC=true is not supported for {binding} embeddings."
)
def get_embedding_prefix_config(env_key: str) -> tuple[str | None, bool]:
"""Read an embedding prefix and whether it was explicitly configured."""
if env_key not in os.environ:
return None, False
value = os.environ[env_key]
if value == "None":
return None, False
if value == NO_PREFIX_SENTINEL:
return "", True
if value == "":
raise ValueError(
f"{env_key} is empty. Use {NO_PREFIX_SENTINEL} to explicitly request "
"no prefix, or remove the variable to leave it unconfigured."
)
return value, True
def validate_auth_configuration(args: argparse.Namespace) -> None:
"""Reject insecure JWT auth settings before the API starts."""
auth_accounts = (getattr(args, "auth_accounts", "") or "").strip()
token_secret = (getattr(args, "token_secret", "") or "").strip()
if auth_accounts and (not token_secret or token_secret == DEFAULT_TOKEN_SECRET):
raise ValueError(
"TOKEN_SECRET must be explicitly set to a non-default value when AUTH_ACCOUNTS is configured."
)
def parse_args() -> argparse.Namespace:
"""
Parse command line arguments with environment variable fallback
Args:
is_uvicorn_mode: Whether running under uvicorn mode
Returns:
argparse.Namespace: Parsed arguments
"""
parser = argparse.ArgumentParser(description="LightRAG API Server")
# Server configuration
parser.add_argument(
"--host",
default=get_env_value("HOST", "0.0.0.0"),
help="Server host (default: from env or 0.0.0.0)",
)
parser.add_argument(
"--port",
type=int,
default=get_env_value("PORT", 9621, int),
help="Server port (default: from env or 9621)",
)
# Directory configuration
parser.add_argument(
"--working-dir",
default=get_env_value("WORKING_DIR", "./rag_storage"),
help="Working directory for RAG storage (default: from env or ./rag_storage)",
)
parser.add_argument(
"--input-dir",
default=get_env_value("INPUT_DIR", "./inputs"),
help="Directory containing input documents (default: from env or ./inputs)",
)
parser.add_argument(
"--timeout",
default=get_env_value("TIMEOUT", DEFAULT_TIMEOUT, int, special_none=True),
type=int,
help="Timeout in seconds (useful when using slow AI). Use None for infinite timeout",
)
# RAG configuration
parser.add_argument(
"--max-async",
type=int,
default=get_env_value("MAX_ASYNC", DEFAULT_MAX_ASYNC, int),
help=f"Maximum async operations (default: from env or {DEFAULT_MAX_ASYNC})",
)
parser.add_argument(
"--summary-max-tokens",
type=int,
default=get_env_value("SUMMARY_MAX_TOKENS", DEFAULT_SUMMARY_MAX_TOKENS, int),
help=f"Maximum token size for entity/relation summary(default: from env or {DEFAULT_SUMMARY_MAX_TOKENS})",
)
parser.add_argument(
"--summary-context-size",
type=int,
default=get_env_value(
"SUMMARY_CONTEXT_SIZE", DEFAULT_SUMMARY_CONTEXT_SIZE, int
),
help=f"LLM Summary Context size (default: from env or {DEFAULT_SUMMARY_CONTEXT_SIZE})",
)
parser.add_argument(
"--summary-length-recommended",
type=int,
default=get_env_value(
"SUMMARY_LENGTH_RECOMMENDED", DEFAULT_SUMMARY_LENGTH_RECOMMENDED, int
),
help=f"LLM Summary Context size (default: from env or {DEFAULT_SUMMARY_LENGTH_RECOMMENDED})",
)
# Logging configuration
parser.add_argument(
"--log-level",
default=get_env_value("LOG_LEVEL", "INFO"),
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
help="Logging level (default: from env or INFO)",
)
parser.add_argument(
"--verbose",
action="store_true",
default=get_env_value("VERBOSE", False, bool),
help="Enable verbose debug output(only valid for DEBUG log-level)",
)
parser.add_argument(
"--key",
type=str,
default=get_env_value("LIGHTRAG_API_KEY", None),
help="API key for authentication. This protects lightrag server against unauthorized access",
)
# Optional https parameters
parser.add_argument(
"--ssl",
action="store_true",
default=get_env_value("SSL", False, bool),
help="Enable HTTPS (default: from env or False)",
)
parser.add_argument(
"--ssl-certfile",
default=get_env_value("SSL_CERTFILE", None),
help="Path to SSL certificate file (required if --ssl is enabled)",
)
parser.add_argument(
"--ssl-keyfile",
default=get_env_value("SSL_KEYFILE", None),
help="Path to SSL private key file (required if --ssl is enabled)",
)
# Ollama model configuration
parser.add_argument(
"--simulated-model-name",
type=str,
default=get_env_value("OLLAMA_EMULATING_MODEL_NAME", DEFAULT_OLLAMA_MODEL_NAME),
help="Name for the simulated Ollama model (default: from env or lightrag)",
)
parser.add_argument(
"--simulated-model-tag",
type=str,
default=get_env_value("OLLAMA_EMULATING_MODEL_TAG", DEFAULT_OLLAMA_MODEL_TAG),
help="Tag for the simulated Ollama model (default: from env or latest)",
)
# Namespace
parser.add_argument(
"--workspace",
type=str,
default=get_env_value("WORKSPACE", ""),
help="Default workspace for all storage",
)
# Server workers configuration
parser.add_argument(
"--workers",
type=int,
default=get_env_value("WORKERS", DEFAULT_WOKERS, int),
help="Number of worker processes (default: from env or 1)",
)
# LLM and embedding bindings
parser.add_argument(
"--llm-binding",
type=str,
default=get_env_value("LLM_BINDING", "ollama"),
choices=[
"lollms",
"ollama",
"openai",
"openai-ollama",
"azure_openai",
"aws_bedrock",
"gemini",
],
help="LLM binding type (default: from env or ollama)",
)
parser.add_argument(
"--embedding-binding",
type=str,
default=get_env_value("EMBEDDING_BINDING", "ollama"),
choices=[
"lollms",
"ollama",
"openai",
"azure_openai",
"aws_bedrock",
"jina",
"gemini",
"voyageai",
],
help="Embedding binding type (default: from env or ollama)",
)
parser.add_argument(
"--rerank-binding",
type=str,
default=get_env_value("RERANK_BINDING", DEFAULT_RERANK_BINDING),
choices=["null", "cohere", "jina", "aliyun"],
help=f"Rerank binding type (default: from env or {DEFAULT_RERANK_BINDING})",
)
# Document loading engine configuration
parser.add_argument(
"--docling",
action="store_true",
default=False,
help="Enable DOCLING document loading engine (default: from env or DEFAULT)",
)
# Conditionally add binding-specific options (Ollama, OpenAI, Azure OpenAI, Gemini)
# This registers command line arguments (e.g., --openai-llm-temperature)
# and reads corresponding environment variables (e.g., OPENAI_LLM_TEMPERATURE)
# Determine LLM binding value consistently from command line or environment
llm_binding_value = None
if "--llm-binding" in sys.argv:
try:
idx = sys.argv.index("--llm-binding")
if idx + 1 < len(sys.argv) and not sys.argv[idx + 1].startswith("-"):
llm_binding_value = sys.argv[idx + 1]
except IndexError:
pass
# Fall back to environment variable using same function as argparse default
if llm_binding_value is None:
llm_binding_value = get_env_value("LLM_BINDING", "ollama")
# Add LLM binding options based on determined value
if llm_binding_value == "ollama":
OllamaLLMOptions.add_args(parser)
elif llm_binding_value in ["openai", "azure_openai"]:
OpenAILLMOptions.add_args(parser)
elif llm_binding_value == "gemini":
GeminiLLMOptions.add_args(parser)
# Determine embedding binding value consistently from command line or environment
embedding_binding_value = None
if "--embedding-binding" in sys.argv:
try:
idx = sys.argv.index("--embedding-binding")
if idx + 1 < len(sys.argv) and not sys.argv[idx + 1].startswith("-"):
embedding_binding_value = sys.argv[idx + 1]
except IndexError:
pass
# Fall back to environment variable using same function as argparse default
if embedding_binding_value is None:
embedding_binding_value = get_env_value("EMBEDDING_BINDING", "ollama")
# Add embedding binding options based on determined value
if embedding_binding_value == "ollama":
OllamaEmbeddingOptions.add_args(parser)
elif embedding_binding_value == "gemini":
GeminiEmbeddingOptions.add_args(parser)
args = parser.parse_args()
# convert relative path to absolute path
args.working_dir = os.path.abspath(args.working_dir)
args.input_dir = os.path.abspath(args.input_dir)
# Inject storage configuration from environment variables
args.kv_storage = get_env_value(
"LIGHTRAG_KV_STORAGE", DefaultRAGStorageConfig.KV_STORAGE
)
args.doc_status_storage = get_env_value(
"LIGHTRAG_DOC_STATUS_STORAGE", DefaultRAGStorageConfig.DOC_STATUS_STORAGE
)
args.graph_storage = get_env_value(
"LIGHTRAG_GRAPH_STORAGE", DefaultRAGStorageConfig.GRAPH_STORAGE
)
args.vector_storage = get_env_value(
"LIGHTRAG_VECTOR_STORAGE", DefaultRAGStorageConfig.VECTOR_STORAGE
)
# Get MAX_PARALLEL_INSERT from environment
args.max_parallel_insert = get_env_value("MAX_PARALLEL_INSERT", 2, int)
# Get MAX_GRAPH_NODES from environment
args.max_graph_nodes = get_env_value("MAX_GRAPH_NODES", 1000, int)
# Handle openai-ollama special case
if args.llm_binding == "openai-ollama":
args.llm_binding = "openai"
args.embedding_binding = "ollama"
args.llm_binding_host = get_env_value(
"LLM_BINDING_HOST", get_default_host(args.llm_binding)
)
args.embedding_binding_host = get_env_value(
"EMBEDDING_BINDING_HOST", get_default_host(args.embedding_binding)
)
args.llm_binding_api_key = get_env_value("LLM_BINDING_API_KEY", None)
args.embedding_binding_api_key = get_env_value("EMBEDDING_BINDING_API_KEY", "")
# Inject model configuration
args.llm_model = get_env_value("LLM_MODEL", "mistral-nemo:latest")
# EMBEDDING_MODEL defaults to None - each binding will use its own default model
# e.g., OpenAI uses "text-embedding-3-small", Jina uses "jina-embeddings-v4"
args.embedding_model = get_env_value("EMBEDDING_MODEL", None, special_none=True)
# EMBEDDING_DIM defaults to None - each binding will use its own default dimension
# Value is inherited from provider defaults via wrap_embedding_func_with_attrs decorator
args.embedding_dim = get_env_value("EMBEDDING_DIM", None, int, special_none=True)
args.embedding_send_dim = get_env_value("EMBEDDING_SEND_DIM", False, bool)
# Inject chunk configuration
args.chunk_size = get_env_value("CHUNK_SIZE", 1200, int)
args.chunk_overlap_size = get_env_value("CHUNK_OVERLAP_SIZE", 100, int)
# Inject LLM cache configuration
args.enable_llm_cache_for_extract = get_env_value(
"ENABLE_LLM_CACHE_FOR_EXTRACT", True, bool
)
args.enable_llm_cache = get_env_value("ENABLE_LLM_CACHE", True, bool)
# Set document_loading_engine from --docling flag
if args.docling:
args.document_loading_engine = "DOCLING"
else:
args.document_loading_engine = get_env_value(
"DOCUMENT_LOADING_ENGINE", "DEFAULT"
)
# PDF decryption password
args.pdf_decrypt_password = get_env_value("PDF_DECRYPT_PASSWORD", None)
# Add environment variables that were previously read directly
args.cors_origins = get_env_value("CORS_ORIGINS", "*")
args.summary_language = get_env_value("SUMMARY_LANGUAGE", DEFAULT_SUMMARY_LANGUAGE)
args.entity_types = get_env_value("ENTITY_TYPES", DEFAULT_ENTITY_TYPES, list)
args.whitelist_paths = get_env_value("WHITELIST_PATHS", "/health,/api/*")
# For JWT Auth
args.auth_accounts = get_env_value("AUTH_ACCOUNTS", "")
args.token_secret = get_env_value("TOKEN_SECRET", None)
args.token_expire_hours = get_env_value("TOKEN_EXPIRE_HOURS", 48, float)
args.guest_token_expire_hours = get_env_value("GUEST_TOKEN_EXPIRE_HOURS", 24, float)
args.jwt_algorithm = get_env_value("JWT_ALGORITHM", "HS256")
# Token auto-renewal configuration (sliding window expiration)
args.token_auto_renew = get_env_value("TOKEN_AUTO_RENEW", True, bool)
args.token_renew_threshold = get_env_value("TOKEN_RENEW_THRESHOLD", 0.5, float)
# Rerank model configuration
args.rerank_model = get_env_value("RERANK_MODEL", None)
args.rerank_binding_host = get_env_value("RERANK_BINDING_HOST", None)
args.rerank_binding_api_key = get_env_value("RERANK_BINDING_API_KEY", None)
# Note: rerank_binding is already set by argparse, no need to override from env
# Min rerank score configuration
args.min_rerank_score = get_env_value(
"MIN_RERANK_SCORE", DEFAULT_MIN_RERANK_SCORE, float
)
# Query configuration
args.history_turns = get_env_value("HISTORY_TURNS", DEFAULT_HISTORY_TURNS, int)
args.top_k = get_env_value("TOP_K", DEFAULT_TOP_K, int)
args.chunk_top_k = get_env_value("CHUNK_TOP_K", DEFAULT_CHUNK_TOP_K, int)
args.max_entity_tokens = get_env_value(
"MAX_ENTITY_TOKENS", DEFAULT_MAX_ENTITY_TOKENS, int
)
args.max_relation_tokens = get_env_value(
"MAX_RELATION_TOKENS", DEFAULT_MAX_RELATION_TOKENS, int
)
args.max_total_tokens = get_env_value(
"MAX_TOTAL_TOKENS", DEFAULT_MAX_TOTAL_TOKENS, int
)
args.cosine_threshold = get_env_value(
"COSINE_THRESHOLD", DEFAULT_COSINE_THRESHOLD, float
)
args.related_chunk_number = get_env_value(
"RELATED_CHUNK_NUMBER", DEFAULT_RELATED_CHUNK_NUMBER, int
)
# Add missing environment variables for health endpoint
args.force_llm_summary_on_merge = get_env_value(
"FORCE_LLM_SUMMARY_ON_MERGE", DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE, int
)
args.embedding_func_max_async = get_env_value(
"EMBEDDING_FUNC_MAX_ASYNC", DEFAULT_EMBEDDING_FUNC_MAX_ASYNC, int
)
args.embedding_batch_num = get_env_value(
"EMBEDDING_BATCH_NUM", DEFAULT_EMBEDDING_BATCH_NUM, int
)
# Embedding token limit configuration
args.embedding_token_limit = get_env_value(
"EMBEDDING_TOKEN_LIMIT", None, int, special_none=True
)
# File upload size limit (in bytes, None for unlimited)
# Default: 100MB (104857600 bytes)
args.max_upload_size = get_env_value(
"MAX_UPLOAD_SIZE", 104857600, int, special_none=True
)
# Embedding prefix configuration for context-aware embeddings. Empty prefixes
# must be explicit via NO_PREFIX so missing config is distinguishable.
(
args.embedding_document_prefix,
args.embedding_document_prefix_configured,
) = get_embedding_prefix_config("EMBEDDING_DOCUMENT_PREFIX")
(
args.embedding_query_prefix,
args.embedding_query_prefix_configured,
) = get_embedding_prefix_config("EMBEDDING_QUERY_PREFIX")
args.embedding_prefix_no_prefix_sentinel = NO_PREFIX_SENTINEL
args.embedding_prefixes_configured = (
args.embedding_document_prefix_configured
or args.embedding_query_prefix_configured
)
# Asymmetric embedding behavior toggle
args.embedding_asymmetric_configured = "EMBEDDING_ASYMMETRIC" in os.environ
args.embedding_asymmetric = get_env_value("EMBEDDING_ASYMMETRIC", False, bool)
ollama_server_infos.LIGHTRAG_NAME = args.simulated_model_name
ollama_server_infos.LIGHTRAG_TAG = args.simulated_model_tag
# Sanitize workspace: only alphanumeric characters and underscores are allowed
if args.workspace:
sanitized = re.sub(r"[^a-zA-Z0-9_]", "_", args.workspace)
if sanitized != args.workspace:
logging.warning(
f"Workspace name '{args.workspace}' contains invalid characters. "
f"It has been sanitized to '{sanitized}'. "
"Only alphanumeric characters and underscores are allowed."
)
args.workspace = sanitized
validate_auth_configuration(args)
return args
def update_uvicorn_mode_config():
# If in uvicorn mode and workers > 1, force it to 1 and log warning
if global_args.workers > 1:
original_workers = global_args.workers
global_args.workers = 1
# Log warning directly here
logging.debug(
f">> Forcing workers=1 in uvicorn mode(Ignoring workers={original_workers})"
)
# Global configuration with lazy initialization
_global_args = None
_initialized = False
def initialize_config(args=None, force=False):
"""Initialize global configuration
This function allows explicit initialization of the configuration,
which is useful for programmatic usage, testing, or embedding LightRAG
in other applications.
Args:
args: Pre-parsed argparse.Namespace or None to parse from sys.argv
force: Force re-initialization even if already initialized
Returns:
argparse.Namespace: The configured arguments
Example:
# Use parsed command line arguments (default)
initialize_config()
# Use custom configuration programmatically
custom_args = argparse.Namespace(
host='localhost',
port=8080,
working_dir='./custom_rag',
# ... other config
)
initialize_config(custom_args)
"""
global _global_args, _initialized
if _initialized and not force:
return _global_args
resolved_args = args if args is not None else parse_args()
validate_auth_configuration(resolved_args)
_global_args = resolved_args
_initialized = True
return _global_args
def get_config():
"""Get global configuration, auto-initializing if needed
Returns:
argparse.Namespace: The configured arguments
"""
if not _initialized:
initialize_config()
return _global_args
class _GlobalArgsProxy:
"""Proxy object that auto-initializes configuration on first access
This maintains backward compatibility with existing code while
allowing programmatic control over initialization timing.
The proxy fully delegates to the underlying argparse.Namespace,
including support for vars() calls which is used by binding_options
to extract provider-specific configuration options.
"""
def __getattribute__(self, name):
"""Override attribute access to support vars() and regular attribute access.
This method intercepts __dict__ access (used by vars()) and delegates
to the underlying _global_args namespace, ensuring binding options
can be properly extracted.
"""
global _initialized, _global_args
# Handle __dict__ access for vars() support
if name == "__dict__":
if not _initialized:
initialize_config()
return vars(_global_args)
# Handle class-level attributes that should come from the proxy itself
if name in ("__class__", "__repr__", "__getattribute__", "__setattr__"):
return object.__getattribute__(self, name)
# Delegate all other attribute access to the underlying namespace
if not _initialized:
initialize_config()
return getattr(_global_args, name)
def __setattr__(self, name, value):
global _initialized, _global_args
if not _initialized:
initialize_config()
setattr(_global_args, name, value)
def __repr__(self):
global _initialized, _global_args
if not _initialized:
return "<GlobalArgsProxy: Not initialized>"
return repr(_global_args)
# Create proxy instance for backward compatibility
# Existing code like `from config import global_args` continues to work
# The proxy will auto-initialize on first attribute access
global_args = _GlobalArgsProxy()

View File

@@ -0,0 +1,162 @@
# gunicorn_config.py
import os
import logging
from lightrag.kg.shared_storage import finalize_share_data
from lightrag.utils import setup_logger, get_env_value
from lightrag.constants import (
DEFAULT_LOG_MAX_BYTES,
DEFAULT_LOG_BACKUP_COUNT,
DEFAULT_LOG_FILENAME,
)
# Get log directory path from environment variable
log_dir = os.getenv("LOG_DIR", os.getcwd())
log_file_path = os.path.abspath(os.path.join(log_dir, DEFAULT_LOG_FILENAME))
# Ensure log directory exists
os.makedirs(os.path.dirname(log_file_path), exist_ok=True)
# Get log file max size and backup count from environment variables
log_max_bytes = get_env_value("LOG_MAX_BYTES", DEFAULT_LOG_MAX_BYTES, int)
log_backup_count = get_env_value("LOG_BACKUP_COUNT", DEFAULT_LOG_BACKUP_COUNT, int)
# These variables will be set by run_with_gunicorn.py
workers = None
bind = None
loglevel = None
certfile = None
keyfile = None
# Enable preload_app option
preload_app = True
# Use Uvicorn worker
worker_class = "uvicorn.workers.UvicornWorker"
# Other Gunicorn configurations
# Logging configuration
errorlog = os.getenv("ERROR_LOG", log_file_path) # Default write to lightrag.log
accesslog = os.getenv("ACCESS_LOG", log_file_path) # Default write to lightrag.log
logconfig_dict = {
"version": 1,
"disable_existing_loggers": False,
"formatters": {
"standard": {"format": "%(asctime)s [%(levelname)s] %(name)s: %(message)s"},
},
"handlers": {
"console": {
"class": "logging.StreamHandler",
"formatter": "standard",
"stream": "ext://sys.stdout",
},
"file": {
"class": "logging.handlers.RotatingFileHandler",
"formatter": "standard",
"filename": log_file_path,
"maxBytes": log_max_bytes,
"backupCount": log_backup_count,
"encoding": "utf8",
},
},
"filters": {
"path_filter": {
"()": "lightrag.utils.LightragPathFilter",
},
},
"loggers": {
"lightrag": {
"handlers": ["console", "file"],
"level": loglevel.upper() if loglevel else "INFO",
"propagate": False,
},
"gunicorn": {
"handlers": ["console", "file"],
"level": loglevel.upper() if loglevel else "INFO",
"propagate": False,
},
"gunicorn.error": {
"handlers": ["console", "file"],
"level": loglevel.upper() if loglevel else "INFO",
"propagate": False,
},
"gunicorn.access": {
"handlers": ["console", "file"],
"level": loglevel.upper() if loglevel else "INFO",
"propagate": False,
"filters": ["path_filter"],
},
},
}
def on_starting(server):
"""
Executed when Gunicorn starts, before forking the first worker processes
You can use this function to do more initialization tasks for all processes
"""
print("=" * 80)
print(f"GUNICORN MASTER PROCESS: on_starting jobs for {workers} worker(s)")
print(f"Process ID: {os.getpid()}")
print("=" * 80)
# Memory usage monitoring
try:
import psutil
process = psutil.Process(os.getpid())
memory_info = process.memory_info()
msg = (
f"Memory usage after initialization: {memory_info.rss / 1024 / 1024:.2f} MB"
)
print(msg)
except ImportError:
print("psutil not installed, skipping memory usage reporting")
# Log the location of the LightRAG log file
print(f"LightRAG log file: {log_file_path}\n")
print("Gunicorn initialization complete, forking workers...\n")
def on_exit(server):
"""
Executed when Gunicorn is shutting down.
This is a good place to release shared resources.
"""
print("=" * 80)
print("GUNICORN MASTER PROCESS: Shutting down")
print(f"Process ID: {os.getpid()}")
print("Finalizing shared storage...")
finalize_share_data()
print("Gunicorn shutdown complete")
print("=" * 80)
def post_fork(server, worker):
"""
Executed after a worker has been forked.
This is a good place to set up worker-specific configurations.
"""
# Set up main loggers
log_level = loglevel.upper() if loglevel else "INFO"
setup_logger("uvicorn", log_level, add_filter=False, log_file_path=log_file_path)
setup_logger(
"uvicorn.access", log_level, add_filter=True, log_file_path=log_file_path
)
setup_logger("lightrag", log_level, add_filter=True, log_file_path=log_file_path)
# Set up lightrag submodule loggers
for name in logging.root.manager.loggerDict:
if name.startswith("lightrag."):
setup_logger(name, log_level, add_filter=True, log_file_path=log_file_path)
# Disable uvicorn.error logger
uvicorn_error_logger = logging.getLogger("uvicorn.error")
uvicorn_error_logger.handlers = []
uvicorn_error_logger.setLevel(logging.CRITICAL)
uvicorn_error_logger.propagate = False

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,26 @@
import bcrypt
BCRYPT_PASSWORD_PREFIX = "{bcrypt}"
def hash_password(password: str) -> str:
"""Return an AUTH_ACCOUNTS-ready bcrypt password value."""
salt = bcrypt.gensalt()
hashed = bcrypt.hashpw(password.encode("utf-8"), salt).decode("utf-8")
return f"{BCRYPT_PASSWORD_PREFIX}{hashed}"
def verify_password(plain_password: str, stored_password: str) -> bool:
"""Verify a plaintext password against a stored password spec."""
if stored_password.startswith(BCRYPT_PASSWORD_PREFIX):
hashed_password = stored_password[len(BCRYPT_PASSWORD_PREFIX) :]
if not hashed_password:
return False
try:
return bcrypt.checkpw(
plain_password.encode("utf-8"), hashed_password.encode("utf-8")
)
except ValueError:
return False
return stored_password == plain_password

View File

@@ -0,0 +1,10 @@
"""
This module contains all the routers for the LightRAG API.
"""
from .document_routes import router as document_router
from .query_routes import router as query_router
from .graph_routes import router as graph_router
from .ollama_api import OllamaAPI
__all__ = ["document_router", "query_router", "graph_router", "OllamaAPI"]

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,688 @@
"""
This module contains all graph-related routes for the LightRAG API.
"""
from typing import Optional, Dict, Any
import traceback
from fastapi import APIRouter, Depends, Query, HTTPException
from pydantic import BaseModel, Field
from lightrag.utils import logger
from ..utils_api import get_combined_auth_dependency
router = APIRouter(tags=["graph"])
class EntityUpdateRequest(BaseModel):
entity_name: str
updated_data: Dict[str, Any]
allow_rename: bool = False
allow_merge: bool = False
class RelationUpdateRequest(BaseModel):
source_id: str
target_id: str
updated_data: Dict[str, Any]
class EntityMergeRequest(BaseModel):
entities_to_change: list[str] = Field(
...,
description="List of entity names to be merged and deleted. These are typically duplicate or misspelled entities.",
min_length=1,
examples=[["Elon Msk", "Ellon Musk"]],
)
entity_to_change_into: str = Field(
...,
description="Target entity name that will receive all relationships from the source entities. This entity will be preserved.",
min_length=1,
examples=["Elon Musk"],
)
class EntityCreateRequest(BaseModel):
entity_name: str = Field(
...,
description="Unique name for the new entity",
min_length=1,
examples=["Tesla"],
)
entity_data: Dict[str, Any] = Field(
...,
description="Dictionary containing entity properties. Common fields include 'description' and 'entity_type'.",
examples=[
{
"description": "Electric vehicle manufacturer",
"entity_type": "ORGANIZATION",
}
],
)
class RelationCreateRequest(BaseModel):
source_entity: str = Field(
...,
description="Name of the source entity. This entity must already exist in the knowledge graph.",
min_length=1,
examples=["Elon Musk"],
)
target_entity: str = Field(
...,
description="Name of the target entity. This entity must already exist in the knowledge graph.",
min_length=1,
examples=["Tesla"],
)
relation_data: Dict[str, Any] = Field(
...,
description="Dictionary containing relationship properties. Common fields include 'description', 'keywords', and 'weight'.",
examples=[
{
"description": "Elon Musk is the CEO of Tesla",
"keywords": "CEO, founder",
"weight": 1.0,
}
],
)
def create_graph_routes(rag, api_key: Optional[str] = None):
combined_auth = get_combined_auth_dependency(api_key)
@router.get("/graph/label/list", dependencies=[Depends(combined_auth)])
async def get_graph_labels():
"""
Get all graph labels
Returns:
List[str]: List of graph labels
"""
try:
return await rag.get_graph_labels()
except Exception as e:
logger.error(f"Error getting graph labels: {str(e)}")
logger.error(traceback.format_exc())
raise HTTPException(
status_code=500, detail=f"Error getting graph labels: {str(e)}"
)
@router.get("/graph/label/popular", dependencies=[Depends(combined_auth)])
async def get_popular_labels(
limit: int = Query(
300, description="Maximum number of popular labels to return", ge=1, le=1000
),
):
"""
Get popular labels by node degree (most connected entities)
Args:
limit (int): Maximum number of labels to return (default: 300, max: 1000)
Returns:
List[str]: List of popular labels sorted by degree (highest first)
"""
try:
return await rag.chunk_entity_relation_graph.get_popular_labels(limit)
except Exception as e:
logger.error(f"Error getting popular labels: {str(e)}")
logger.error(traceback.format_exc())
raise HTTPException(
status_code=500, detail=f"Error getting popular labels: {str(e)}"
)
@router.get("/graph/label/search", dependencies=[Depends(combined_auth)])
async def search_labels(
q: str = Query(..., description="Search query string"),
limit: int = Query(
50, description="Maximum number of search results to return", ge=1, le=100
),
):
"""
Search labels with fuzzy matching
Args:
q (str): Search query string
limit (int): Maximum number of results to return (default: 50, max: 100)
Returns:
List[str]: List of matching labels sorted by relevance
"""
try:
return await rag.chunk_entity_relation_graph.search_labels(q, limit)
except Exception as e:
logger.error(f"Error searching labels with query '{q}': {str(e)}")
logger.error(traceback.format_exc())
raise HTTPException(
status_code=500, detail=f"Error searching labels: {str(e)}"
)
@router.get("/graphs", dependencies=[Depends(combined_auth)])
async def get_knowledge_graph(
label: str = Query(..., description="Label to get knowledge graph for"),
max_depth: int = Query(3, description="Maximum depth of graph", ge=1),
max_nodes: int = Query(1000, description="Maximum nodes to return", ge=1),
):
"""
Retrieve a connected subgraph of nodes where the label includes the specified label.
When reducing the number of nodes, the prioritization criteria are as follows:
1. Hops(path) to the staring node take precedence
2. Followed by the degree of the nodes
Args:
label (str): Label of the starting node
max_depth (int, optional): Maximum depth of the subgraph,Defaults to 3
max_nodes: Maxiumu nodes to return
Returns:
Dict[str, List[str]]: Knowledge graph for label
"""
try:
# Log the label parameter to check for leading spaces
logger.debug(
f"get_knowledge_graph called with label: '{label}' (length: {len(label)}, repr: {repr(label)})"
)
return await rag.get_knowledge_graph(
node_label=label,
max_depth=max_depth,
max_nodes=max_nodes,
)
except Exception as e:
logger.error(f"Error getting knowledge graph for label '{label}': {str(e)}")
logger.error(traceback.format_exc())
raise HTTPException(
status_code=500, detail=f"Error getting knowledge graph: {str(e)}"
)
@router.get("/graph/entity/exists", dependencies=[Depends(combined_auth)])
async def check_entity_exists(
name: str = Query(..., description="Entity name to check"),
):
"""
Check if an entity with the given name exists in the knowledge graph
Args:
name (str): Name of the entity to check
Returns:
Dict[str, bool]: Dictionary with 'exists' key indicating if entity exists
"""
try:
exists = await rag.chunk_entity_relation_graph.has_node(name)
return {"exists": exists}
except Exception as e:
logger.error(f"Error checking entity existence for '{name}': {str(e)}")
logger.error(traceback.format_exc())
raise HTTPException(
status_code=500, detail=f"Error checking entity existence: {str(e)}"
)
@router.post("/graph/entity/edit", dependencies=[Depends(combined_auth)])
async def update_entity(request: EntityUpdateRequest):
"""
Update an entity's properties in the knowledge graph
This endpoint allows updating entity properties, including renaming entities.
When renaming to an existing entity name, the behavior depends on allow_merge:
Args:
request (EntityUpdateRequest): Request containing:
- entity_name (str): Name of the entity to update
- updated_data (Dict[str, Any]): Dictionary of properties to update
- allow_rename (bool): Whether to allow entity renaming (default: False)
- allow_merge (bool): Whether to merge into existing entity when renaming
causes name conflict (default: False)
Returns:
Dict with the following structure:
{
"status": "success",
"message": "Entity updated successfully" | "Entity merged successfully into 'target_name'",
"data": {
"entity_name": str, # Final entity name
"description": str, # Entity description
"entity_type": str, # Entity type
"source_id": str, # Source chunk IDs
... # Other entity properties
},
"operation_summary": {
"merged": bool, # Whether entity was merged into another
"merge_status": str, # "success" | "failed" | "not_attempted"
"merge_error": str | None, # Error message if merge failed
"operation_status": str, # "success" | "partial_success" | "failure"
"target_entity": str | None, # Target entity name if renaming/merging
"final_entity": str, # Final entity name after operation
"renamed": bool # Whether entity was renamed
}
}
operation_status values explained:
- "success": All operations completed successfully
* For simple updates: entity properties updated
* For renames: entity renamed successfully
* For merges: non-name updates applied AND merge completed
- "partial_success": Update succeeded but merge failed
* Non-name property updates were applied successfully
* Merge operation failed (entity not merged)
* Original entity still exists with updated properties
* Use merge_error for failure details
- "failure": Operation failed completely
* If merge_status == "failed": Merge attempted but both update and merge failed
* If merge_status == "not_attempted": Regular update failed
* No changes were applied to the entity
merge_status values explained:
- "success": Entity successfully merged into target entity
- "failed": Merge operation was attempted but failed
- "not_attempted": No merge was attempted (normal update/rename)
Behavior when renaming to an existing entity:
- If allow_merge=False: Raises ValueError with 400 status (default behavior)
- If allow_merge=True: Automatically merges the source entity into the existing target entity,
preserving all relationships and applying non-name updates first
Example Request (simple update):
POST /graph/entity/edit
{
"entity_name": "Tesla",
"updated_data": {"description": "Updated description"},
"allow_rename": false,
"allow_merge": false
}
Example Response (simple update success):
{
"status": "success",
"message": "Entity updated successfully",
"data": { ... },
"operation_summary": {
"merged": false,
"merge_status": "not_attempted",
"merge_error": null,
"operation_status": "success",
"target_entity": null,
"final_entity": "Tesla",
"renamed": false
}
}
Example Request (rename with auto-merge):
POST /graph/entity/edit
{
"entity_name": "Elon Msk",
"updated_data": {
"entity_name": "Elon Musk",
"description": "Corrected description"
},
"allow_rename": true,
"allow_merge": true
}
Example Response (merge success):
{
"status": "success",
"message": "Entity merged successfully into 'Elon Musk'",
"data": { ... },
"operation_summary": {
"merged": true,
"merge_status": "success",
"merge_error": null,
"operation_status": "success",
"target_entity": "Elon Musk",
"final_entity": "Elon Musk",
"renamed": true
}
}
Example Response (partial success - update succeeded but merge failed):
{
"status": "success",
"message": "Entity updated successfully",
"data": { ... }, # Data reflects updated "Elon Msk" entity
"operation_summary": {
"merged": false,
"merge_status": "failed",
"merge_error": "Target entity locked by another operation",
"operation_status": "partial_success",
"target_entity": "Elon Musk",
"final_entity": "Elon Msk", # Original entity still exists
"renamed": true
}
}
"""
try:
result = await rag.aedit_entity(
entity_name=request.entity_name,
updated_data=request.updated_data,
allow_rename=request.allow_rename,
allow_merge=request.allow_merge,
)
# Extract operation_summary from result, with fallback for backward compatibility
operation_summary = result.get(
"operation_summary",
{
"merged": False,
"merge_status": "not_attempted",
"merge_error": None,
"operation_status": "success",
"target_entity": None,
"final_entity": request.updated_data.get(
"entity_name", request.entity_name
),
"renamed": request.updated_data.get(
"entity_name", request.entity_name
)
!= request.entity_name,
},
)
# Separate entity data from operation_summary for clean response
entity_data = dict(result)
entity_data.pop("operation_summary", None)
# Generate appropriate response message based on merge status
response_message = (
f"Entity merged successfully into '{operation_summary['final_entity']}'"
if operation_summary.get("merged")
else "Entity updated successfully"
)
return {
"status": "success",
"message": response_message,
"data": entity_data,
"operation_summary": operation_summary,
}
except ValueError as ve:
logger.error(
f"Validation error updating entity '{request.entity_name}': {str(ve)}"
)
raise HTTPException(status_code=400, detail=str(ve))
except Exception as e:
logger.error(f"Error updating entity '{request.entity_name}': {str(e)}")
logger.error(traceback.format_exc())
raise HTTPException(
status_code=500, detail=f"Error updating entity: {str(e)}"
)
@router.post("/graph/relation/edit", dependencies=[Depends(combined_auth)])
async def update_relation(request: RelationUpdateRequest):
"""Update a relation's properties in the knowledge graph
Args:
request (RelationUpdateRequest): Request containing source ID, target ID and updated data
Returns:
Dict: Updated relation information
"""
try:
result = await rag.aedit_relation(
source_entity=request.source_id,
target_entity=request.target_id,
updated_data=request.updated_data,
)
return {
"status": "success",
"message": "Relation updated successfully",
"data": result,
}
except ValueError as ve:
logger.error(
f"Validation error updating relation between '{request.source_id}' and '{request.target_id}': {str(ve)}"
)
raise HTTPException(status_code=400, detail=str(ve))
except Exception as e:
logger.error(
f"Error updating relation between '{request.source_id}' and '{request.target_id}': {str(e)}"
)
logger.error(traceback.format_exc())
raise HTTPException(
status_code=500, detail=f"Error updating relation: {str(e)}"
)
@router.post("/graph/entity/create", dependencies=[Depends(combined_auth)])
async def create_entity(request: EntityCreateRequest):
"""
Create a new entity in the knowledge graph
This endpoint creates a new entity node in the knowledge graph with the specified
properties. The system automatically generates vector embeddings for the entity
to enable semantic search and retrieval.
Request Body:
entity_name (str): Unique name identifier for the entity
entity_data (dict): Entity properties including:
- description (str): Textual description of the entity
- entity_type (str): Category/type of the entity (e.g., PERSON, ORGANIZATION, LOCATION)
- source_id (str): Related chunk_id from which the description originates
- Additional custom properties as needed
Response Schema:
{
"status": "success",
"message": "Entity 'Tesla' created successfully",
"data": {
"entity_name": "Tesla",
"description": "Electric vehicle manufacturer",
"entity_type": "ORGANIZATION",
"source_id": "chunk-123<SEP>chunk-456"
... (other entity properties)
}
}
HTTP Status Codes:
200: Entity created successfully
400: Invalid request (e.g., missing required fields, duplicate entity)
500: Internal server error
Example Request:
POST /graph/entity/create
{
"entity_name": "Tesla",
"entity_data": {
"description": "Electric vehicle manufacturer",
"entity_type": "ORGANIZATION"
}
}
"""
try:
# Use the proper acreate_entity method which handles:
# - Graph lock for concurrency
# - Vector embedding creation in entities_vdb
# - Metadata population and defaults
# - Index consistency via _edit_entity_done
result = await rag.acreate_entity(
entity_name=request.entity_name,
entity_data=request.entity_data,
)
return {
"status": "success",
"message": f"Entity '{request.entity_name}' created successfully",
"data": result,
}
except ValueError as ve:
logger.error(
f"Validation error creating entity '{request.entity_name}': {str(ve)}"
)
raise HTTPException(status_code=400, detail=str(ve))
except Exception as e:
logger.error(f"Error creating entity '{request.entity_name}': {str(e)}")
logger.error(traceback.format_exc())
raise HTTPException(
status_code=500, detail=f"Error creating entity: {str(e)}"
)
@router.post("/graph/relation/create", dependencies=[Depends(combined_auth)])
async def create_relation(request: RelationCreateRequest):
"""
Create a new relationship between two entities in the knowledge graph
This endpoint establishes an undirected relationship between two existing entities.
The provided source/target order is accepted for convenience, but the backend
stored edge is undirected and may be returned with the entities swapped.
Both entities must already exist in the knowledge graph. The system automatically
generates vector embeddings for the relationship to enable semantic search and graph traversal.
Prerequisites:
- Both source_entity and target_entity must exist in the knowledge graph
- Use /graph/entity/create to create entities first if they don't exist
Request Body:
source_entity (str): Name of the source entity (relationship origin)
target_entity (str): Name of the target entity (relationship destination)
relation_data (dict): Relationship properties including:
- description (str): Textual description of the relationship
- keywords (str): Comma-separated keywords describing the relationship type
- source_id (str): Related chunk_id from which the description originates
- weight (float): Relationship strength/importance (default: 1.0)
- Additional custom properties as needed
Response Schema:
{
"status": "success",
"message": "Relation created successfully between 'Elon Musk' and 'Tesla'",
"data": {
"src_id": "Elon Musk",
"tgt_id": "Tesla",
"description": "Elon Musk is the CEO of Tesla",
"keywords": "CEO, founder",
"source_id": "chunk-123<SEP>chunk-456"
"weight": 1.0,
... (other relationship properties)
}
}
HTTP Status Codes:
200: Relationship created successfully
400: Invalid request (e.g., missing entities, invalid data, duplicate relationship)
500: Internal server error
Example Request:
POST /graph/relation/create
{
"source_entity": "Elon Musk",
"target_entity": "Tesla",
"relation_data": {
"description": "Elon Musk is the CEO of Tesla",
"keywords": "CEO, founder",
"weight": 1.0
}
}
"""
try:
# Use the proper acreate_relation method which handles:
# - Graph lock for concurrency
# - Entity existence validation
# - Duplicate relation checks
# - Vector embedding creation in relationships_vdb
# - Index consistency via _edit_relation_done
result = await rag.acreate_relation(
source_entity=request.source_entity,
target_entity=request.target_entity,
relation_data=request.relation_data,
)
return {
"status": "success",
"message": f"Relation created successfully between '{request.source_entity}' and '{request.target_entity}'",
"data": result,
}
except ValueError as ve:
logger.error(
f"Validation error creating relation between '{request.source_entity}' and '{request.target_entity}': {str(ve)}"
)
raise HTTPException(status_code=400, detail=str(ve))
except Exception as e:
logger.error(
f"Error creating relation between '{request.source_entity}' and '{request.target_entity}': {str(e)}"
)
logger.error(traceback.format_exc())
raise HTTPException(
status_code=500, detail=f"Error creating relation: {str(e)}"
)
@router.post("/graph/entities/merge", dependencies=[Depends(combined_auth)])
async def merge_entities(request: EntityMergeRequest):
"""
Merge multiple entities into a single entity, preserving all relationships
This endpoint consolidates duplicate or misspelled entities while preserving the entire
graph structure. It's particularly useful for cleaning up knowledge graphs after document
processing or correcting entity name variations.
What the Merge Operation Does:
1. Deletes the specified source entities from the knowledge graph
2. Transfers all relationships from source entities to the target entity
3. Intelligently merges duplicate relationships (if multiple sources have the same relationship)
4. Updates vector embeddings for accurate retrieval and search
5. Preserves the complete graph structure and connectivity
6. Maintains relationship properties and metadata
Use Cases:
- Fixing spelling errors in entity names (e.g., "Elon Msk" -> "Elon Musk")
- Consolidating duplicate entities discovered after document processing
- Merging name variations (e.g., "NY", "New York", "New York City")
- Cleaning up the knowledge graph for better query performance
- Standardizing entity names across the knowledge base
Request Body:
entities_to_change (list[str]): List of entity names to be merged and deleted
entity_to_change_into (str): Target entity that will receive all relationships
Response Schema:
{
"status": "success",
"message": "Successfully merged 2 entities into 'Elon Musk'",
"data": {
"merged_entity": "Elon Musk",
"deleted_entities": ["Elon Msk", "Ellon Musk"],
"relationships_transferred": 15,
... (merge operation details)
}
}
HTTP Status Codes:
200: Entities merged successfully
400: Invalid request (e.g., empty entity list, target entity doesn't exist)
500: Internal server error
Example Request:
POST /graph/entities/merge
{
"entities_to_change": ["Elon Msk", "Ellon Musk"],
"entity_to_change_into": "Elon Musk"
}
Note:
- The target entity (entity_to_change_into) must exist in the knowledge graph
- Source entities will be permanently deleted after the merge
- This operation cannot be undone, so verify entity names before merging
"""
try:
result = await rag.amerge_entities(
source_entities=request.entities_to_change,
target_entity=request.entity_to_change_into,
)
return {
"status": "success",
"message": f"Successfully merged {len(request.entities_to_change)} entities into '{request.entity_to_change_into}'",
"data": result,
}
except ValueError as ve:
logger.error(
f"Validation error merging entities {request.entities_to_change} into '{request.entity_to_change_into}': {str(ve)}"
)
raise HTTPException(status_code=400, detail=str(ve))
except Exception as e:
logger.error(
f"Error merging entities {request.entities_to_change} into '{request.entity_to_change_into}': {str(e)}"
)
logger.error(traceback.format_exc())
raise HTTPException(
status_code=500, detail=f"Error merging entities: {str(e)}"
)
return router

View File

@@ -0,0 +1,723 @@
from fastapi import APIRouter, HTTPException, Request
from pydantic import BaseModel
from typing import List, Dict, Any, Optional, Type
from lightrag.utils import logger
import time
import json
import re
from enum import Enum
from fastapi.responses import StreamingResponse
import asyncio
from lightrag import LightRAG, QueryParam
from lightrag.utils import TiktokenTokenizer
from lightrag.api.utils_api import get_combined_auth_dependency
from fastapi import Depends
# query mode according to query prefix (bypass is not LightRAG quer mode)
class SearchMode(str, Enum):
naive = "naive"
local = "local"
global_ = "global"
hybrid = "hybrid"
mix = "mix"
bypass = "bypass"
context = "context"
class OllamaMessage(BaseModel):
role: str
content: str
images: Optional[List[str]] = None
class OllamaChatRequest(BaseModel):
model: str
messages: List[OllamaMessage]
stream: bool = True
options: Optional[Dict[str, Any]] = None
system: Optional[str] = None
class OllamaChatResponse(BaseModel):
model: str
created_at: str
message: OllamaMessage
done: bool
class OllamaGenerateRequest(BaseModel):
model: str
prompt: str
system: Optional[str] = None
stream: bool = False
options: Optional[Dict[str, Any]] = None
class OllamaGenerateResponse(BaseModel):
model: str
created_at: str
response: str
done: bool
context: Optional[List[int]]
total_duration: Optional[int]
load_duration: Optional[int]
prompt_eval_count: Optional[int]
prompt_eval_duration: Optional[int]
eval_count: Optional[int]
eval_duration: Optional[int]
class OllamaVersionResponse(BaseModel):
version: str
class OllamaModelDetails(BaseModel):
parent_model: str
format: str
family: str
families: List[str]
parameter_size: str
quantization_level: str
class OllamaModel(BaseModel):
name: str
model: str
size: int
digest: str
modified_at: str
details: OllamaModelDetails
class OllamaTagResponse(BaseModel):
models: List[OllamaModel]
class OllamaRunningModelDetails(BaseModel):
parent_model: str
format: str
family: str
families: List[str]
parameter_size: str
quantization_level: str
class OllamaRunningModel(BaseModel):
name: str
model: str
size: int
digest: str
details: OllamaRunningModelDetails
expires_at: str
size_vram: int
class OllamaPsResponse(BaseModel):
models: List[OllamaRunningModel]
async def parse_request_body(
request: Request, model_class: Type[BaseModel]
) -> BaseModel:
"""
Parse request body based on Content-Type header.
Supports both application/json and application/octet-stream.
Args:
request: The FastAPI Request object
model_class: The Pydantic model class to parse the request into
Returns:
An instance of the provided model_class
"""
content_type = request.headers.get("content-type", "").lower()
try:
if content_type.startswith("application/json"):
# FastAPI already handles JSON parsing for us
body = await request.json()
elif content_type.startswith("application/octet-stream"):
# Manually parse octet-stream as JSON
body_bytes = await request.body()
body = json.loads(body_bytes.decode("utf-8"))
else:
# Try to parse as JSON for any other content type
body_bytes = await request.body()
body = json.loads(body_bytes.decode("utf-8"))
# Create an instance of the model
return model_class(**body)
except json.JSONDecodeError:
raise HTTPException(status_code=400, detail="Invalid JSON in request body")
except Exception as e:
raise HTTPException(
status_code=400, detail=f"Error parsing request body: {str(e)}"
)
def estimate_tokens(text: str) -> int:
"""Estimate the number of tokens in text using tiktoken"""
tokens = TiktokenTokenizer().encode(text)
return len(tokens)
def parse_query_mode(query: str) -> tuple[str, SearchMode, bool, Optional[str]]:
"""Parse query prefix to determine search mode
Returns tuple of (cleaned_query, search_mode, only_need_context, user_prompt)
Examples:
- "/local[use mermaid format for diagrams] query string" -> (cleaned_query, SearchMode.local, False, "use mermaid format for diagrams")
- "/[use mermaid format for diagrams] query string" -> (cleaned_query, SearchMode.hybrid, False, "use mermaid format for diagrams")
- "/local query string" -> (cleaned_query, SearchMode.local, False, None)
"""
# Initialize user_prompt as None
user_prompt = None
# First check if there's a bracket format for user prompt
bracket_pattern = r"^/([a-z]*)\[(.*?)\](.*)"
bracket_match = re.match(bracket_pattern, query)
if bracket_match:
mode_prefix = bracket_match.group(1)
user_prompt = bracket_match.group(2)
remaining_query = bracket_match.group(3).lstrip()
# Reconstruct query, removing the bracket part
query = f"/{mode_prefix} {remaining_query}".strip()
# Unified handling of mode and only_need_context determination
mode_map = {
"/local ": (SearchMode.local, False),
"/global ": (
SearchMode.global_,
False,
), # global_ is used because 'global' is a Python keyword
"/naive ": (SearchMode.naive, False),
"/hybrid ": (SearchMode.hybrid, False),
"/mix ": (SearchMode.mix, False),
"/bypass ": (SearchMode.bypass, False),
"/context": (
SearchMode.mix,
True,
),
"/localcontext": (SearchMode.local, True),
"/globalcontext": (SearchMode.global_, True),
"/hybridcontext": (SearchMode.hybrid, True),
"/naivecontext": (SearchMode.naive, True),
"/mixcontext": (SearchMode.mix, True),
}
for prefix, (mode, only_need_context) in mode_map.items():
if query.startswith(prefix):
# After removing prefix and leading spaces
cleaned_query = query[len(prefix) :].lstrip()
return cleaned_query, mode, only_need_context, user_prompt
return query, SearchMode.mix, False, user_prompt
class OllamaAPI:
def __init__(self, rag: LightRAG, top_k: int = 60, api_key: Optional[str] = None):
self.rag = rag
self.ollama_server_infos = rag.ollama_server_infos
self.top_k = top_k
self.api_key = api_key
self.router = APIRouter(tags=["ollama"])
self.setup_routes()
def setup_routes(self):
# Create combined auth dependency for Ollama API routes
combined_auth = get_combined_auth_dependency(self.api_key)
@self.router.get("/version", dependencies=[Depends(combined_auth)])
async def get_version():
"""Get Ollama version information"""
return OllamaVersionResponse(version="0.9.3")
@self.router.get("/tags", dependencies=[Depends(combined_auth)])
async def get_tags():
"""Return available models acting as an Ollama server"""
return OllamaTagResponse(
models=[
{
"name": self.ollama_server_infos.LIGHTRAG_MODEL,
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
"modified_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT,
"size": self.ollama_server_infos.LIGHTRAG_SIZE,
"digest": self.ollama_server_infos.LIGHTRAG_DIGEST,
"details": {
"parent_model": "",
"format": "gguf",
"family": self.ollama_server_infos.LIGHTRAG_NAME,
"families": [self.ollama_server_infos.LIGHTRAG_NAME],
"parameter_size": "13B",
"quantization_level": "Q4_0",
},
}
]
)
@self.router.get("/ps", dependencies=[Depends(combined_auth)])
async def get_running_models():
"""List Running Models - returns currently running models"""
return OllamaPsResponse(
models=[
{
"name": self.ollama_server_infos.LIGHTRAG_MODEL,
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
"size": self.ollama_server_infos.LIGHTRAG_SIZE,
"digest": self.ollama_server_infos.LIGHTRAG_DIGEST,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": ["llama"],
"parameter_size": "7.2B",
"quantization_level": "Q4_0",
},
"expires_at": "2050-12-31T14:38:31.83753-07:00",
"size_vram": self.ollama_server_infos.LIGHTRAG_SIZE,
}
]
)
@self.router.post(
"/generate", dependencies=[Depends(combined_auth)], include_in_schema=True
)
async def generate(raw_request: Request):
"""Handle generate completion requests acting as an Ollama model
For compatibility purpose, the request is not processed by LightRAG,
and will be handled by underlying LLM model.
Supports both application/json and application/octet-stream Content-Types.
"""
try:
# Parse the request body manually
request = await parse_request_body(raw_request, OllamaGenerateRequest)
query = request.prompt
start_time = time.time_ns()
prompt_tokens = estimate_tokens(query)
if request.system:
self.rag.llm_model_kwargs["system_prompt"] = request.system
if request.stream:
response = await self.rag.llm_model_func(
query, stream=True, **self.rag.llm_model_kwargs
)
async def stream_generator():
first_chunk_time = None
last_chunk_time = time.time_ns()
total_response = ""
# Ensure response is an async generator
if isinstance(response, str):
# If it's a string, send in two parts
first_chunk_time = start_time
last_chunk_time = time.time_ns()
total_response = response
data = {
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
"created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT,
"response": response,
"done": False,
}
yield f"{json.dumps(data, ensure_ascii=False)}\n"
completion_tokens = estimate_tokens(total_response)
total_time = last_chunk_time - start_time
prompt_eval_time = first_chunk_time - start_time
eval_time = last_chunk_time - first_chunk_time
data = {
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
"created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT,
"response": "",
"done": True,
"done_reason": "stop",
"context": [],
"total_duration": total_time,
"load_duration": 0,
"prompt_eval_count": prompt_tokens,
"prompt_eval_duration": prompt_eval_time,
"eval_count": completion_tokens,
"eval_duration": eval_time,
}
yield f"{json.dumps(data, ensure_ascii=False)}\n"
else:
try:
async for chunk in response:
if chunk:
if first_chunk_time is None:
first_chunk_time = time.time_ns()
last_chunk_time = time.time_ns()
total_response += chunk
data = {
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
"created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT,
"response": chunk,
"done": False,
}
yield f"{json.dumps(data, ensure_ascii=False)}\n"
except (asyncio.CancelledError, Exception) as e:
error_msg = str(e)
if isinstance(e, asyncio.CancelledError):
error_msg = "Stream was cancelled by server"
else:
error_msg = f"Provider error: {error_msg}"
logger.error(f"Stream error: {error_msg}")
# Send error message to client
error_data = {
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
"created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT,
"response": f"\n\nError: {error_msg}",
"error": f"\n\nError: {error_msg}",
"done": False,
}
yield f"{json.dumps(error_data, ensure_ascii=False)}\n"
# Send final message to close the stream
final_data = {
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
"created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT,
"response": "",
"done": True,
}
yield f"{json.dumps(final_data, ensure_ascii=False)}\n"
return
if first_chunk_time is None:
first_chunk_time = start_time
completion_tokens = estimate_tokens(total_response)
total_time = last_chunk_time - start_time
prompt_eval_time = first_chunk_time - start_time
eval_time = last_chunk_time - first_chunk_time
data = {
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
"created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT,
"response": "",
"done": True,
"done_reason": "stop",
"context": [],
"total_duration": total_time,
"load_duration": 0,
"prompt_eval_count": prompt_tokens,
"prompt_eval_duration": prompt_eval_time,
"eval_count": completion_tokens,
"eval_duration": eval_time,
}
yield f"{json.dumps(data, ensure_ascii=False)}\n"
return
return StreamingResponse(
stream_generator(),
media_type="application/x-ndjson",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-Type": "application/x-ndjson",
"X-Accel-Buffering": "no", # Ensure proper handling of streaming responses in Nginx proxy
},
)
else:
first_chunk_time = time.time_ns()
response_text = await self.rag.llm_model_func(
query, stream=False, **self.rag.llm_model_kwargs
)
last_chunk_time = time.time_ns()
if not response_text:
response_text = "No response generated"
completion_tokens = estimate_tokens(str(response_text))
total_time = last_chunk_time - start_time
prompt_eval_time = first_chunk_time - start_time
eval_time = last_chunk_time - first_chunk_time
return {
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
"created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT,
"response": str(response_text),
"done": True,
"done_reason": "stop",
"context": [],
"total_duration": total_time,
"load_duration": 0,
"prompt_eval_count": prompt_tokens,
"prompt_eval_duration": prompt_eval_time,
"eval_count": completion_tokens,
"eval_duration": eval_time,
}
except Exception as e:
logger.error(f"Ollama generate error: {str(e)}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@self.router.post(
"/chat", dependencies=[Depends(combined_auth)], include_in_schema=True
)
async def chat(raw_request: Request):
"""Process chat completion requests by acting as an Ollama model.
Routes user queries through LightRAG by selecting query mode based on query prefix.
Detects and forwards OpenWebUI session-related requests (for meta data generation task) directly to LLM.
Supports both application/json and application/octet-stream Content-Types.
"""
try:
# Parse the request body manually
request = await parse_request_body(raw_request, OllamaChatRequest)
# Get all messages
messages = request.messages
if not messages:
raise HTTPException(status_code=400, detail="No messages provided")
# Validate that the last message is from a user
if messages[-1].role != "user":
raise HTTPException(
status_code=400, detail="Last message must be from user role"
)
# Get the last message as query and previous messages as history
query = messages[-1].content
# Convert OllamaMessage objects to dictionaries
conversation_history = [
{"role": msg.role, "content": msg.content} for msg in messages[:-1]
]
# Check for query prefix
cleaned_query, mode, only_need_context, user_prompt = parse_query_mode(
query
)
start_time = time.time_ns()
prompt_tokens = estimate_tokens(cleaned_query)
param_dict = {
"mode": mode.value,
"stream": request.stream,
"only_need_context": only_need_context,
"conversation_history": conversation_history,
"top_k": self.top_k,
}
# Add user_prompt to param_dict
if user_prompt is not None:
param_dict["user_prompt"] = user_prompt
query_param = QueryParam(**param_dict)
if request.stream:
# Determine if the request is prefix with "/bypass"
if mode == SearchMode.bypass:
if request.system:
self.rag.llm_model_kwargs["system_prompt"] = request.system
response = await self.rag.llm_model_func(
cleaned_query,
stream=True,
history_messages=conversation_history,
**self.rag.llm_model_kwargs,
)
else:
response = await self.rag.aquery(
cleaned_query, param=query_param
)
async def stream_generator():
first_chunk_time = None
last_chunk_time = time.time_ns()
total_response = ""
# Ensure response is an async generator
if isinstance(response, str):
# If it's a string, send in two parts
first_chunk_time = start_time
last_chunk_time = time.time_ns()
total_response = response
data = {
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
"created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT,
"message": {
"role": "assistant",
"content": response,
"images": None,
},
"done": False,
}
yield f"{json.dumps(data, ensure_ascii=False)}\n"
completion_tokens = estimate_tokens(total_response)
total_time = last_chunk_time - start_time
prompt_eval_time = first_chunk_time - start_time
eval_time = last_chunk_time - first_chunk_time
data = {
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
"created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT,
"message": {
"role": "assistant",
"content": "",
"images": None,
},
"done_reason": "stop",
"done": True,
"total_duration": total_time,
"load_duration": 0,
"prompt_eval_count": prompt_tokens,
"prompt_eval_duration": prompt_eval_time,
"eval_count": completion_tokens,
"eval_duration": eval_time,
}
yield f"{json.dumps(data, ensure_ascii=False)}\n"
else:
try:
async for chunk in response:
if chunk:
if first_chunk_time is None:
first_chunk_time = time.time_ns()
last_chunk_time = time.time_ns()
total_response += chunk
data = {
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
"created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT,
"message": {
"role": "assistant",
"content": chunk,
"images": None,
},
"done": False,
}
yield f"{json.dumps(data, ensure_ascii=False)}\n"
except (asyncio.CancelledError, Exception) as e:
error_msg = str(e)
if isinstance(e, asyncio.CancelledError):
error_msg = "Stream was cancelled by server"
else:
error_msg = f"Provider error: {error_msg}"
logger.error(f"Stream error: {error_msg}")
# Send error message to client
error_data = {
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
"created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT,
"message": {
"role": "assistant",
"content": f"\n\nError: {error_msg}",
"images": None,
},
"error": f"\n\nError: {error_msg}",
"done": False,
}
yield f"{json.dumps(error_data, ensure_ascii=False)}\n"
# Send final message to close the stream
final_data = {
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
"created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT,
"message": {
"role": "assistant",
"content": "",
"images": None,
},
"done": True,
}
yield f"{json.dumps(final_data, ensure_ascii=False)}\n"
return
if first_chunk_time is None:
first_chunk_time = start_time
completion_tokens = estimate_tokens(total_response)
total_time = last_chunk_time - start_time
prompt_eval_time = first_chunk_time - start_time
eval_time = last_chunk_time - first_chunk_time
data = {
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
"created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT,
"message": {
"role": "assistant",
"content": "",
"images": None,
},
"done_reason": "stop",
"done": True,
"total_duration": total_time,
"load_duration": 0,
"prompt_eval_count": prompt_tokens,
"prompt_eval_duration": prompt_eval_time,
"eval_count": completion_tokens,
"eval_duration": eval_time,
}
yield f"{json.dumps(data, ensure_ascii=False)}\n"
return StreamingResponse(
stream_generator(),
media_type="application/x-ndjson",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-Type": "application/x-ndjson",
"X-Accel-Buffering": "no", # Ensure proper handling of streaming responses in Nginx proxy
},
)
else:
first_chunk_time = time.time_ns()
# Determine if the request is prefix with "/bypass" or from Open WebUI's session title and session keyword generation task
match_result = re.search(
r"\n<chat_history>\nUSER:", cleaned_query, re.MULTILINE
)
if match_result or mode == SearchMode.bypass:
if request.system:
self.rag.llm_model_kwargs["system_prompt"] = request.system
response_text = await self.rag.llm_model_func(
cleaned_query,
stream=False,
history_messages=conversation_history,
**self.rag.llm_model_kwargs,
)
else:
response_text = await self.rag.aquery(
cleaned_query, param=query_param
)
last_chunk_time = time.time_ns()
if not response_text:
response_text = "No response generated"
completion_tokens = estimate_tokens(str(response_text))
total_time = last_chunk_time - start_time
prompt_eval_time = first_chunk_time - start_time
eval_time = last_chunk_time - first_chunk_time
return {
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
"created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT,
"message": {
"role": "assistant",
"content": str(response_text),
"images": None,
},
"done_reason": "stop",
"done": True,
"total_duration": total_time,
"load_duration": 0,
"prompt_eval_count": prompt_tokens,
"prompt_eval_duration": prompt_eval_time,
"eval_count": completion_tokens,
"eval_duration": eval_time,
}
except Exception as e:
logger.error(f"Ollama chat error: {str(e)}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,282 @@
#!/usr/bin/env python
"""
Start LightRAG server with Gunicorn
"""
import os
import sys
import platform
import pipmaster as pm
from lightrag.api.utils_api import display_splash_screen, check_env_file
from lightrag.api.config import global_args
from lightrag.utils import get_env_value
from lightrag.kg.shared_storage import initialize_share_data
from lightrag.constants import (
DEFAULT_WOKERS,
DEFAULT_TIMEOUT,
)
def check_and_install_dependencies():
"""Check and install required dependencies"""
required_packages = [
"gunicorn",
"tiktoken",
"psutil",
# Add other required packages here
]
for package in required_packages:
if not pm.is_installed(package):
print(f"Installing {package}...")
pm.install(package)
print(f"{package} installed successfully")
def main():
# Explicitly initialize configuration for Gunicorn mode
from lightrag.api.config import initialize_config
initialize_config()
# Set Gunicorn mode flag for lifespan cleanup detection
os.environ["LIGHTRAG_GUNICORN_MODE"] = "1"
# Check .env file
if not check_env_file():
sys.exit(1)
# Check DOCLING compatibility with Gunicorn multi-worker mode on macOS
if (
platform.system() == "Darwin"
and global_args.document_loading_engine == "DOCLING"
and global_args.workers > 1
):
print("\n" + "=" * 80)
print("❌ ERROR: Incompatible configuration detected!")
print("=" * 80)
print(
"\nDOCLING engine with Gunicorn multi-worker mode is not supported on macOS"
)
print("\nReason:")
print(" PyTorch (required by DOCLING) has known compatibility issues with")
print(" fork-based multiprocessing on macOS, which can cause crashes or")
print(" unexpected behavior when using Gunicorn with multiple workers.")
print("\nCurrent configuration:")
print(" - Operating System: macOS (Darwin)")
print(f" - Document Engine: {global_args.document_loading_engine}")
print(f" - Workers: {global_args.workers}")
print("\nPossible solutions:")
print(" 1. Use single worker mode:")
print(" --workers 1")
print("\n 2. Change document loading engine in .env:")
print(" DOCUMENT_LOADING_ENGINE=DEFAULT")
print("\n 3. Deploy on Linux where multi-worker mode is fully supported")
print("=" * 80 + "\n")
sys.exit(1)
# Check macOS fork safety environment variable for multi-worker mode
if (
platform.system() == "Darwin"
and global_args.workers > 1
and os.environ.get("OBJC_DISABLE_INITIALIZE_FORK_SAFETY") != "YES"
):
print("\n" + "=" * 80)
print("❌ ERROR: Missing required environment variable on macOS!")
print("=" * 80)
print("\nmacOS with Gunicorn multi-worker mode requires:")
print(" OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES")
print("\nReason:")
print(" NumPy uses macOS's Accelerate framework (Objective-C based) for")
print(" vector computations. The Objective-C runtime has fork safety checks")
print(" that will crash worker processes when embedding functions are called.")
print("\nCurrent configuration:")
print(" - Operating System: macOS (Darwin)")
print(f" - Workers: {global_args.workers}")
print(
f" - Environment Variable: {os.environ.get('OBJC_DISABLE_INITIALIZE_FORK_SAFETY', 'NOT SET')}"
)
print("\nHow to fix:")
print(" Option 1 - Set environment variable before starting (recommended):")
print(" export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES")
print(" lightrag-gunicorn --workers 2")
print("\n Option 2 - Add to your shell profile (~/.zshrc or ~/.bash_profile):")
print(" echo 'export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES' >> ~/.zshrc")
print(" source ~/.zshrc")
print("\n Option 3 - Use single worker mode (no multiprocessing):")
print(" lightrag-server --workers 1")
print("=" * 80 + "\n")
sys.exit(1)
# Check and install dependencies
check_and_install_dependencies()
# Note: Signal handlers are NOT registered here because:
# - Master cleanup already handled by gunicorn_config.on_exit()
# Display startup information
display_splash_screen(global_args)
print("🚀 Starting LightRAG with Gunicorn")
print(f"🔄 Worker management: Gunicorn (workers={global_args.workers})")
print("🔍 Preloading app: Enabled")
print("📝 Note: Using Gunicorn's preload feature for shared data initialization")
print("\n\n" + "=" * 80)
print("MAIN PROCESS INITIALIZATION")
print(f"Process ID: {os.getpid()}")
print(f"Workers setting: {global_args.workers}")
print("=" * 80 + "\n")
# Import Gunicorn's StandaloneApplication
from gunicorn.app.base import BaseApplication
# Define a custom application class that loads our config
class GunicornApp(BaseApplication):
def __init__(self, app, options=None):
self.options = options or {}
self.application = app
super().__init__()
def load_config(self):
# Define valid Gunicorn configuration options
valid_options = {
"bind",
"workers",
"worker_class",
"timeout",
"keepalive",
"preload_app",
"errorlog",
"accesslog",
"loglevel",
"certfile",
"keyfile",
"limit_request_line",
"limit_request_fields",
"limit_request_field_size",
"graceful_timeout",
"max_requests",
"max_requests_jitter",
}
# Special hooks that need to be set separately
special_hooks = {
"on_starting",
"on_reload",
"on_exit",
"pre_fork",
"post_fork",
"pre_exec",
"pre_request",
"post_request",
"worker_init",
"worker_exit",
"nworkers_changed",
"child_exit",
}
# Import and configure the gunicorn_config module
from lightrag.api import gunicorn_config
# Set configuration variables in gunicorn_config, prioritizing command line arguments
gunicorn_config.workers = (
global_args.workers
if global_args.workers
else get_env_value("WORKERS", DEFAULT_WOKERS, int)
)
# Bind configuration prioritizes command line arguments
host = (
global_args.host
if global_args.host != "0.0.0.0"
else os.getenv("HOST", "0.0.0.0")
)
port = (
global_args.port
if global_args.port != 9621
else get_env_value("PORT", 9621, int)
)
gunicorn_config.bind = f"{host}:{port}"
# Log level configuration prioritizes command line arguments
gunicorn_config.loglevel = (
global_args.log_level.lower()
if global_args.log_level
else os.getenv("LOG_LEVEL", "info")
)
# Timeout configuration prioritizes command line arguments
gunicorn_config.timeout = (
global_args.timeout + 30
if global_args.timeout is not None
else get_env_value(
"TIMEOUT", DEFAULT_TIMEOUT + 30, int, special_none=True
)
)
# Keepalive configuration
gunicorn_config.keepalive = get_env_value("KEEPALIVE", 5, int)
# SSL configuration prioritizes command line arguments
if global_args.ssl or os.getenv("SSL", "").lower() in (
"true",
"1",
"yes",
"t",
"on",
):
gunicorn_config.certfile = (
global_args.ssl_certfile
if global_args.ssl_certfile
else os.getenv("SSL_CERTFILE")
)
gunicorn_config.keyfile = (
global_args.ssl_keyfile
if global_args.ssl_keyfile
else os.getenv("SSL_KEYFILE")
)
# Set configuration options from the module
for key in dir(gunicorn_config):
if key in valid_options:
value = getattr(gunicorn_config, key)
# Skip functions like on_starting and None values
if not callable(value) and value is not None:
self.cfg.set(key, value)
# Set special hooks
elif key in special_hooks:
value = getattr(gunicorn_config, key)
if callable(value):
self.cfg.set(key, value)
if hasattr(gunicorn_config, "logconfig_dict"):
self.cfg.set(
"logconfig_dict", getattr(gunicorn_config, "logconfig_dict")
)
def load(self):
# Import the application
from lightrag.api.lightrag_server import get_application
return get_application(global_args)
# Create the application
app = GunicornApp("")
# Force workers to be an integer and greater than 1 for multi-process mode
workers_count = global_args.workers
if workers_count > 1:
# Set a flag to indicate we're in the main process
os.environ["LIGHTRAG_MAIN_PROCESS"] = "1"
initialize_share_data(workers_count)
else:
initialize_share_data(1)
# Run the application
print("\nStarting Gunicorn with direct Python API...")
app.run()
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,128 @@
"""Helpers for validating startup runtime expectations from `.env`."""
from __future__ import annotations
import os
from dataclasses import dataclass
from pathlib import Path
from dotenv import dotenv_values
_CONTAINER_RUNTIME_TARGETS = {"compose", "docker"}
@dataclass(frozen=True)
class RuntimeEnvironment:
"""Describes whether the current process is running in a container runtime."""
in_container: bool
in_docker: bool
in_kubernetes: bool
@property
def label(self) -> str:
if self.in_kubernetes:
return "Kubernetes"
if self.in_docker:
return "Docker"
return "host"
def _read_cgroup_content() -> str:
"""Best-effort read of cgroup metadata for container detection."""
for candidate in ("/proc/1/cgroup", "/proc/self/cgroup"):
try:
return Path(candidate).read_text(encoding="utf-8")
except OSError:
continue
return ""
def detect_runtime_environment(
environ: dict[str, str] | None = None,
) -> RuntimeEnvironment:
"""Detect whether the current process is running on host, Docker, or Kubernetes."""
environ = environ or os.environ
cgroup_content = _read_cgroup_content().lower()
in_kubernetes = bool(
environ.get("KUBERNETES_SERVICE_HOST")
or Path("/var/run/secrets/kubernetes.io/serviceaccount").exists()
or "kubepods" in cgroup_content
or "kubernetes" in cgroup_content
)
in_docker = bool(
Path("/.dockerenv").exists()
or Path("/run/.containerenv").exists()
or any(
marker in cgroup_content
for marker in ("docker", "containerd", "libpod", "podman")
)
)
return RuntimeEnvironment(
in_container=in_kubernetes or in_docker,
in_docker=in_docker,
in_kubernetes=in_kubernetes,
)
def load_runtime_target_from_env_file(env_path: str | Path = ".env") -> str | None:
"""Return the raw LIGHTRAG_RUNTIME_TARGET value from the `.env` file, if present."""
env_values = dotenv_values(str(env_path))
runtime_target = env_values.get("LIGHTRAG_RUNTIME_TARGET")
if runtime_target is None:
return None
return runtime_target.strip()
def validate_runtime_target(
runtime_target: str | None,
runtime_environment: RuntimeEnvironment | None = None,
) -> tuple[bool, str | None]:
"""Validate `.env` runtime target against the current runtime environment."""
if runtime_target is None:
return True, None
normalized_target = runtime_target.strip().lower()
runtime_environment = runtime_environment or detect_runtime_environment()
if normalized_target == "host":
if runtime_environment.in_container:
return (
False,
"Configuration error in .env: LIGHTRAG_RUNTIME_TARGET=host.\n"
"This value from .env requires the server process to run on the host, "
f"but the current process is running inside {runtime_environment.label}.",
)
return True, None
if normalized_target in _CONTAINER_RUNTIME_TARGETS:
if runtime_environment.in_container:
return True, None
return (
False,
f"Configuration error in .env: LIGHTRAG_RUNTIME_TARGET={runtime_target}.\n"
"This value from .env requires the server process to run inside Docker or "
"Kubernetes, but the current process is running on the host.",
)
return (
False,
f"Configuration error in .env: LIGHTRAG_RUNTIME_TARGET={runtime_target!r}.\n"
"This value from .env must be 'host' or 'compose' (alias: 'docker').",
)
def validate_runtime_target_from_env_file(
env_path: str | Path = ".env",
runtime_environment: RuntimeEnvironment | None = None,
) -> tuple[bool, str | None]:
"""Load LIGHTRAG_RUNTIME_TARGET from `.env` and validate it if declared."""
runtime_target = load_runtime_target_from_env_file(env_path)
return validate_runtime_target(runtime_target, runtime_environment)

Binary file not shown.

After

Width:  |  Height:  |  Size: 628 B

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,451 @@
"""
Utility functions for the LightRAG API.
"""
import os
import argparse
from typing import Optional, List, Tuple
import sys
import time
import logging
from ascii_colors import ASCIIColors
from .._version import __api_version__ as api_version
from .._version import __version__ as core_version
from lightrag.constants import (
DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE,
)
from lightrag.api.runtime_validation import validate_runtime_target_from_env_file
from fastapi import HTTPException, Security, Request, Response, status
from fastapi.security import APIKeyHeader, OAuth2PasswordBearer
from starlette.status import HTTP_403_FORBIDDEN
from .auth import auth_handler
from .config import ollama_server_infos, global_args, get_env_value
logger = logging.getLogger("lightrag")
# ========== Token Renewal Rate Limiting ==========
# Cache to track last renewal time per user (username as key)
# Format: {username: last_renewal_timestamp}
_token_renewal_cache: dict[str, float] = {}
_RENEWAL_MIN_INTERVAL = 60 # Minimum 60 seconds between renewals for same user
# ========== Token Renewal Path Exclusions ==========
# Paths that should NOT trigger token auto-renewal
# - /health: Health check endpoint, no login required
# - /documents/paginated: Client polls this frequently (5-30s), renewal not needed
# - /documents/pipeline_status: Client polls this very frequently (2s), renewal not needed
_TOKEN_RENEWAL_SKIP_PATHS = [
"/health",
"/documents/paginated",
"/documents/pipeline_status",
]
def check_env_file():
"""
Check if .env file exists and handle user confirmation if needed.
Returns True if should continue, False if should exit.
"""
env_path = ".env"
if not os.path.exists(env_path):
warning_msg = "Warning: Startup directory must contain .env file for multi-instance support."
ASCIIColors.yellow(warning_msg)
# Check if running in interactive terminal
if sys.stdin.isatty():
response = input("Do you want to continue? (yes/NO): ")
if response.lower() != "yes":
ASCIIColors.red("Server startup cancelled")
return False
return True
is_valid, error_message = validate_runtime_target_from_env_file(env_path)
if not is_valid:
for line in error_message.splitlines():
ASCIIColors.red(line)
return False
return True
# Get whitelist paths from global_args, only once during initialization
whitelist_paths = global_args.whitelist_paths.split(",")
# Pre-compile path matching patterns
whitelist_patterns: List[Tuple[str, bool]] = []
for path in whitelist_paths:
path = path.strip()
if path:
# If path ends with /*, match all paths with that prefix
if path.endswith("/*"):
prefix = path[:-2]
whitelist_patterns.append((prefix, True)) # (prefix, is_prefix_match)
else:
whitelist_patterns.append((path, False)) # (exact_path, is_prefix_match)
# Global authentication configuration
auth_configured = bool(auth_handler.accounts)
def get_combined_auth_dependency(api_key: Optional[str] = None):
"""
Create a combined authentication dependency that implements authentication logic
based on API key, OAuth2 token, and whitelist paths.
Args:
api_key (Optional[str]): API key for validation
Returns:
Callable: A dependency function that implements the authentication logic
"""
# Use global whitelist_patterns and auth_configured variables
# whitelist_patterns and auth_configured are already initialized at module level
# Only calculate api_key_configured as it depends on the function parameter
api_key_configured = bool(api_key)
# Create security dependencies with proper descriptions for Swagger UI
oauth2_scheme = OAuth2PasswordBearer(
tokenUrl="login", auto_error=False, description="OAuth2 Password Authentication"
)
# If API key is configured, create an API key header security
api_key_header = None
if api_key_configured:
api_key_header = APIKeyHeader(
name="X-API-Key", auto_error=False, description="API Key Authentication"
)
async def combined_dependency(
request: Request,
response: Response, # Added: needed to return new token via response header
token: str = Security(oauth2_scheme),
api_key_header_value: Optional[str] = None
if api_key_header is None
else Security(api_key_header),
):
# 1. Check if path is in whitelist
path = request.url.path
for pattern, is_prefix in whitelist_patterns:
if (is_prefix and path.startswith(pattern)) or (
not is_prefix and path == pattern
):
return # Whitelist path, allow access
# 2. Validate token first if provided in the request (Ensure 401 error if token is invalid)
if token:
try:
token_info = auth_handler.validate_token(token)
# ========== Token Auto-Renewal Logic ==========
from lightrag.api.config import global_args
from datetime import datetime, timezone
if global_args.token_auto_renew:
# Check if current path should skip token renewal
skip_renewal = any(
path == skip_path or path.startswith(skip_path + "/")
for skip_path in _TOKEN_RENEWAL_SKIP_PATHS
)
if skip_renewal:
logger.debug(f"Token auto-renewal skipped for path: {path}")
else:
try:
expire_time = token_info.get("exp")
if expire_time:
# Calculate remaining time ratio
now = datetime.now(timezone.utc)
remaining_seconds = (expire_time - now).total_seconds()
# Get original token expiration duration
role = token_info.get("role", "user")
total_hours = (
auth_handler.guest_expire_hours
if role == "guest"
else auth_handler.expire_hours
)
total_seconds = total_hours * 3600
# Issue new token if remaining time < threshold
if (
remaining_seconds
< total_seconds * global_args.token_renew_threshold
):
# ========== Rate Limiting Check ==========
username = token_info["username"]
current_time = time.time()
last_renewal = _token_renewal_cache.get(username, 0)
time_since_last_renewal = (
current_time - last_renewal
)
# Only renew if enough time has passed since last renewal
if time_since_last_renewal >= _RENEWAL_MIN_INTERVAL:
new_token = auth_handler.create_token(
username=username,
role=role,
metadata=token_info.get("metadata", {}),
)
# Return new token via response header
response.headers["X-New-Token"] = new_token
# Update renewal cache
_token_renewal_cache[username] = current_time
# Optional: log renewal
logger.info(
f"Token auto-renewed for user {username} "
f"(role: {role}, remaining: {remaining_seconds:.0f}s)"
)
else:
# Log skip due to rate limit
logger.debug(
f"Token renewal skipped for {username} "
f"(rate limit: last renewal {time_since_last_renewal:.0f}s ago)"
)
# ========== End of Rate Limiting Check ==========
except Exception as e:
# Renewal failure should not affect normal request, just log
logger.warning(f"Token auto-renew failed: {e}")
# ========== End of Token Auto-Renewal Logic ==========
# Accept guest token if no auth is configured
if not auth_configured and token_info.get("role") == "guest":
return
# Accept non-guest token if auth is configured
if auth_configured and token_info.get("role") != "guest":
return
# Token validation failed, immediately return 401 error
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid token. Please login again.",
)
except HTTPException as e:
# If already a 401 error, re-raise it
if e.status_code == status.HTTP_401_UNAUTHORIZED:
raise
# For other exceptions, continue processing
# 3. Acept all request if no API protection needed
if not auth_configured and not api_key_configured:
return
# 4. Validate API key if provided and API-Key authentication is configured
if (
api_key_configured
and api_key_header_value
and api_key_header_value == api_key
):
return # API key validation successful
### Authentication failed ####
# if password authentication is configured but not provided, ensure 401 error if auth_configured
if auth_configured and not token:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="No credentials provided. Please login.",
)
# if api key is provided but validation failed
if api_key_header_value:
raise HTTPException(
status_code=HTTP_403_FORBIDDEN,
detail="Invalid API Key",
)
# if api_key_configured but not provided
if api_key_configured and not api_key_header_value:
raise HTTPException(
status_code=HTTP_403_FORBIDDEN,
detail="API Key required",
)
# Otherwise: refuse access and return 403 error
raise HTTPException(
status_code=HTTP_403_FORBIDDEN,
detail="API Key required or login authentication required.",
)
return combined_dependency
def display_splash_screen(args: argparse.Namespace) -> None:
"""
Display a colorful splash screen showing LightRAG server configuration
Args:
args: Parsed command line arguments
"""
# Banner
# Banner
top_border = "╔══════════════════════════════════════════════════════════════╗"
bottom_border = "╚══════════════════════════════════════════════════════════════╝"
width = len(top_border) - 4 # width inside the borders
line1_text = f"LightRAG Server v{core_version}/{api_version}"
line2_text = "Fast, Lightweight RAG Server Implementation"
line1 = f"{line1_text.center(width)}"
line2 = f"{line2_text.center(width)}"
banner = f"""
{top_border}
{line1}
{line2}
{bottom_border}
"""
ASCIIColors.cyan(banner)
# Server Configuration
ASCIIColors.magenta("\n📡 Server Configuration:")
ASCIIColors.white(" ├─ Host: ", end="")
ASCIIColors.yellow(f"{args.host}")
ASCIIColors.white(" ├─ Port: ", end="")
ASCIIColors.yellow(f"{args.port}")
ASCIIColors.white(" ├─ Workers: ", end="")
ASCIIColors.yellow(f"{args.workers}")
ASCIIColors.white(" ├─ Timeout: ", end="")
ASCIIColors.yellow(f"{args.timeout}")
ASCIIColors.white(" ├─ CORS Origins: ", end="")
ASCIIColors.yellow(f"{args.cors_origins}")
ASCIIColors.white(" ├─ SSL Enabled: ", end="")
ASCIIColors.yellow(f"{args.ssl}")
if args.ssl:
ASCIIColors.white(" ├─ SSL Cert: ", end="")
ASCIIColors.yellow(f"{args.ssl_certfile}")
ASCIIColors.white(" ├─ SSL Key: ", end="")
ASCIIColors.yellow(f"{args.ssl_keyfile}")
ASCIIColors.white(" ├─ Ollama Emulating Model: ", end="")
ASCIIColors.yellow(f"{ollama_server_infos.LIGHTRAG_MODEL}")
ASCIIColors.white(" ├─ Log Level: ", end="")
ASCIIColors.yellow(f"{args.log_level}")
ASCIIColors.white(" ├─ Verbose Debug: ", end="")
ASCIIColors.yellow(f"{args.verbose}")
ASCIIColors.white(" ├─ API Key: ", end="")
ASCIIColors.yellow("Set" if args.key else "Not Set")
ASCIIColors.white(" └─ JWT Auth: ", end="")
ASCIIColors.yellow("Enabled" if args.auth_accounts else "Disabled")
# Directory Configuration
ASCIIColors.magenta("\n📂 Directory Configuration:")
ASCIIColors.white(" ├─ Working Directory: ", end="")
ASCIIColors.yellow(f"{args.working_dir}")
ASCIIColors.white(" └─ Input Directory: ", end="")
ASCIIColors.yellow(f"{args.input_dir}")
# LLM Configuration
ASCIIColors.magenta("\n🤖 LLM Configuration:")
ASCIIColors.white(" ├─ Binding: ", end="")
ASCIIColors.yellow(f"{args.llm_binding}")
ASCIIColors.white(" ├─ Host: ", end="")
ASCIIColors.yellow(f"{args.llm_binding_host}")
ASCIIColors.white(" ├─ Model: ", end="")
ASCIIColors.yellow(f"{args.llm_model}")
ASCIIColors.white(" ├─ Max Async for LLM: ", end="")
ASCIIColors.yellow(f"{args.max_async}")
ASCIIColors.white(" ├─ Summary Context Size: ", end="")
ASCIIColors.yellow(f"{args.summary_context_size}")
ASCIIColors.white(" ├─ LLM Cache Enabled: ", end="")
ASCIIColors.yellow(f"{args.enable_llm_cache}")
ASCIIColors.white(" └─ LLM Cache for Extraction Enabled: ", end="")
ASCIIColors.yellow(f"{args.enable_llm_cache_for_extract}")
# Embedding Configuration
ASCIIColors.magenta("\n📊 Embedding Configuration:")
ASCIIColors.white(" ├─ Binding: ", end="")
ASCIIColors.yellow(f"{args.embedding_binding}")
ASCIIColors.white(" ├─ Host: ", end="")
ASCIIColors.yellow(f"{args.embedding_binding_host}")
ASCIIColors.white(" ├─ Model: ", end="")
ASCIIColors.yellow(f"{args.embedding_model}")
ASCIIColors.white(" ├─ Dimensions: ", end="")
ASCIIColors.yellow(f"{args.embedding_dim}")
ASCIIColors.white(" └─ Asymmetric: ", end="")
ASCIIColors.yellow(f"{args.embedding_asymmetric}")
# RAG Configuration
ASCIIColors.magenta("\n⚙️ RAG Configuration:")
ASCIIColors.white(" ├─ Summary Language: ", end="")
ASCIIColors.yellow(f"{args.summary_language}")
ASCIIColors.white(" ├─ Entity Types: ", end="")
ASCIIColors.yellow(f"{args.entity_types}")
ASCIIColors.white(" ├─ Max Parallel Insert: ", end="")
ASCIIColors.yellow(f"{args.max_parallel_insert}")
ASCIIColors.white(" ├─ Chunk Size: ", end="")
ASCIIColors.yellow(f"{args.chunk_size}")
ASCIIColors.white(" ├─ Chunk Overlap Size: ", end="")
ASCIIColors.yellow(f"{args.chunk_overlap_size}")
ASCIIColors.white(" ├─ Cosine Threshold: ", end="")
ASCIIColors.yellow(f"{args.cosine_threshold}")
ASCIIColors.white(" ├─ Top-K: ", end="")
ASCIIColors.yellow(f"{args.top_k}")
ASCIIColors.white(" └─ Force LLM Summary on Merge: ", end="")
ASCIIColors.yellow(
f"{get_env_value('FORCE_LLM_SUMMARY_ON_MERGE', DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE, int)}"
)
# System Configuration
ASCIIColors.magenta("\n💾 Storage Configuration:")
ASCIIColors.white(" ├─ KV Storage: ", end="")
ASCIIColors.yellow(f"{args.kv_storage}")
ASCIIColors.white(" ├─ Vector Storage: ", end="")
ASCIIColors.yellow(f"{args.vector_storage}")
ASCIIColors.white(" ├─ Graph Storage: ", end="")
ASCIIColors.yellow(f"{args.graph_storage}")
ASCIIColors.white(" ├─ Document Status Storage: ", end="")
ASCIIColors.yellow(f"{args.doc_status_storage}")
ASCIIColors.white(" └─ Workspace: ", end="")
ASCIIColors.yellow(f"{args.workspace if args.workspace else '-'}")
# Server Status
ASCIIColors.green("\n✨ Server starting up...\n")
# Server Access Information
protocol = "https" if args.ssl else "http"
if args.host == "0.0.0.0":
ASCIIColors.magenta("\n🌐 Server Access Information:")
ASCIIColors.white(" ├─ WebUI (local): ", end="")
ASCIIColors.yellow(f"{protocol}://localhost:{args.port}")
ASCIIColors.white(" ├─ Remote Access: ", end="")
ASCIIColors.yellow(f"{protocol}://<your-ip-address>:{args.port}")
ASCIIColors.white(" ├─ API Documentation (local): ", end="")
ASCIIColors.yellow(f"{protocol}://localhost:{args.port}/docs")
ASCIIColors.white(" └─ Alternative Documentation (local): ", end="")
ASCIIColors.yellow(f"{protocol}://localhost:{args.port}/redoc")
ASCIIColors.magenta("\n📝 Note:")
ASCIIColors.cyan(""" Since the server is running on 0.0.0.0:
- Use 'localhost' or '127.0.0.1' for local access
- Use your machine's IP address for remote access
- To find your IP address:
• Windows: Run 'ipconfig' in terminal
• Linux/Mac: Run 'ifconfig' or 'ip addr' in terminal
""")
else:
base_url = f"{protocol}://{args.host}:{args.port}"
ASCIIColors.magenta("\n🌐 Server Access Information:")
ASCIIColors.white(" ├─ WebUI (local): ", end="")
ASCIIColors.yellow(f"{base_url}")
ASCIIColors.white(" ├─ API Documentation: ", end="")
ASCIIColors.yellow(f"{base_url}/docs")
ASCIIColors.white(" └─ Alternative Documentation: ", end="")
ASCIIColors.yellow(f"{base_url}/redoc")
# Security Notice
if args.key:
ASCIIColors.yellow("\n⚠️ Security Notice:")
ASCIIColors.white(""" API Key authentication is enabled.
Make sure to include the X-API-Key header in all your requests.
""")
if args.auth_accounts:
ASCIIColors.yellow("\n⚠️ Security Notice:")
ASCIIColors.white(""" JWT authentication is enabled.
Make sure to login before making the request, and include the 'Authorization' in the header.
""")
# Ensure splash output flush to system log
sys.stdout.flush()

View File

@@ -0,0 +1 @@
import{H as e,R as t,S as n,U as r,b as i,c as a,f as o,n as s,r as c,u as l,v as u}from"./_createAssigner-CdflnHPZ.js";import{A as d,C as f,D as p,M as m,N as h,S as g,T as _,f as v,h as y,u as b,w as x}from"./_baseUniq-CSNgIvS9.js";var S=/\s/;function C(e){for(var t=e.length;t--&&S.test(e.charAt(t)););return t}var w=/^\s+/;function T(e){return e&&e.slice(0,C(e)+1).replace(w,``)}var E=NaN,D=/^[-+]0x[0-9a-f]+$/i,O=/^0b[01]+$/i,k=/^0o[0-7]+$/i,A=parseInt;function j(t){if(typeof t==`number`)return t;if(h(t))return E;if(e(t)){var n=typeof t.valueOf==`function`?t.valueOf():t;t=e(n)?n+``:n}if(typeof t!=`string`)return t===0?t:+t;t=T(t);var r=O.test(t);return r||k.test(t)?A(t.slice(2),r?2:8):D.test(t)?E:+t}var M=1/0,N=17976931348623157e292;function P(e){return e?(e=j(e),e===M||e===-M?(e<0?-1:1)*N:e===e?e:0):e===0?e:0}function F(e){var t=P(e),n=t%1;return t===t?n?t-n:t:0}function I(e){return e!=null&&e.length?g(e,1):[]}var L=Object.prototype,R=L.hasOwnProperty,z=c(function(e,n){e=Object(e);var r=-1,i=n.length,o=i>2?n[2]:void 0;for(o&&s(n[0],n[1],o)&&(i=1);++r<i;)for(var c=n[r],l=a(c),u=-1,d=l.length;++u<d;){var f=l[u],p=e[f];(p===void 0||t(p,L[f])&&!R.call(e,f))&&(e[f]=c[f])}return e});function B(e){var t=e==null?0:e.length;return t?e[t-1]:void 0}function V(e){return function(t,n,r){var i=Object(t);if(!u(t)){var a=v(n,3);t=p(t),n=function(e){return a(i[e],e,i)}}var o=e(t,n,r);return o>-1?i[a?t[o]:o]:void 0}}var H=Math.max;function U(e,t,n){var r=e==null?0:e.length;if(!r)return-1;var i=n==null?0:F(n);return i<0&&(i=H(r+i,0)),d(e,v(t,3),i)}var W=V(U);function G(e,t){var n=-1,r=u(e)?Array(e.length):[];return b(e,function(e,i,a){r[++n]=t(e,i,a)}),r}function K(e,t){return(i(e)?m:G)(e,v(t,3))}var q=Object.prototype.hasOwnProperty;function J(e,t){return e!=null&&q.call(e,t)}function Y(e,t){return e!=null&&y(e,t,J)}var X=`[object String]`;function Z(e){return typeof e==`string`||!i(e)&&n(e)&&r(e)==X}function Q(t,n,r,i){if(!e(t))return t;n=_(n,t);for(var a=-1,s=n.length,c=s-1,u=t;u!=null&&++a<s;){var d=x(n[a]),f=r;if(d===`__proto__`||d===`constructor`||d===`prototype`)return t;if(a!=c){var p=u[d];f=i?i(p,d,u):void 0,f===void 0&&(f=e(p)?p:l(n[a+1])?[]:{})}o(u,d,f),u=u[d]}return t}function $(e,t,n){for(var r=-1,i=t.length,a={};++r<i;){var o=t[r],s=f(e,o);n(s,o)&&Q(a,_(o,e),s)}return a}export{G as a,z as c,P as d,K as i,I as l,Z as n,W as o,Y as r,B as s,$ as t,F as u};

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1 @@
import{n as e,t}from"./path-BmDdnQs8.js";import{$ as n,Q as r,X as i,Y as a,Z as o,at as s,it as c,nt as l,ot as u,rt as d,st as f,tt as p}from"./index-Cmfh6eB3.js";function m(e){return e.innerRadius}function h(e){return e.outerRadius}function g(e){return e.startAngle}function _(e){return e.endAngle}function v(e){return e&&e.padAngle}function y(e,t,n,r,i,a,o,s){var c=n-e,l=r-t,u=o-i,d=s-a,f=d*c-u*l;if(!(f*f<1e-12))return f=(u*(t-a)-d*(e-i))/f,[e+f*c,t+f*l]}function b(e,t,n,r,i,a,o){var s=e-n,c=t-r,d=(o?a:-a)/u(s*s+c*c),f=d*c,p=-d*s,m=e+f,h=t+p,g=n+f,_=r+p,v=(m+g)/2,y=(h+_)/2,b=g-m,x=_-h,S=b*b+x*x,C=i-a,w=m*_-g*h,T=(x<0?-1:1)*u(l(0,C*C*S-w*w)),E=(w*x-b*T)/S,D=(-w*b-x*T)/S,O=(w*x+b*T)/S,k=(-w*b+x*T)/S,A=E-v,j=D-y,M=O-v,N=k-y;return A*A+j*j>M*M+N*N&&(E=O,D=k),{cx:E,cy:D,x01:-f,y01:-p,x11:E*(i/C-1),y11:D*(i/C-1)}}function x(){var l=m,x=h,S=e(0),C=null,w=g,T=_,E=v,D=null,O=t(k);function k(){var e,t,m=+l.apply(this,arguments),h=+x.apply(this,arguments),g=w.apply(this,arguments)-p,_=T.apply(this,arguments)-p,v=a(_-g),k=_>g;if(D||=e=O(),h<m&&(t=h,h=m,m=t),!(h>1e-12))D.moveTo(0,0);else if(v>f-1e-12)D.moveTo(h*n(g),h*s(g)),D.arc(0,0,h,g,_,!k),m>1e-12&&(D.moveTo(m*n(_),m*s(_)),D.arc(0,0,m,_,g,k));else{var A=g,j=_,M=g,N=_,P=v,F=v,I=E.apply(this,arguments)/2,L=I>1e-12&&(C?+C.apply(this,arguments):u(m*m+h*h)),R=d(a(h-m)/2,+S.apply(this,arguments)),z=R,B=R,V,H;if(L>1e-12){var U=o(L/m*s(I)),W=o(L/h*s(I));(P-=U*2)>1e-12?(U*=k?1:-1,M+=U,N-=U):(P=0,M=N=(g+_)/2),(F-=W*2)>1e-12?(W*=k?1:-1,A+=W,j-=W):(F=0,A=j=(g+_)/2)}var G=h*n(A),K=h*s(A),q=m*n(N),J=m*s(N);if(R>1e-12){var Y=h*n(j),X=h*s(j),Z=m*n(M),Q=m*s(M),$;if(v<c)if($=y(G,K,Z,Q,Y,X,q,J)){var ee=G-$[0],te=K-$[1],ne=Y-$[0],re=X-$[1],ie=1/s(i((ee*ne+te*re)/(u(ee*ee+te*te)*u(ne*ne+re*re)))/2),ae=u($[0]*$[0]+$[1]*$[1]);z=d(R,(m-ae)/(ie-1)),B=d(R,(h-ae)/(ie+1))}else z=B=0}F>1e-12?B>1e-12?(V=b(Z,Q,G,K,h,B,k),H=b(Y,X,q,J,h,B,k),D.moveTo(V.cx+V.x01,V.cy+V.y01),B<R?D.arc(V.cx,V.cy,B,r(V.y01,V.x01),r(H.y01,H.x01),!k):(D.arc(V.cx,V.cy,B,r(V.y01,V.x01),r(V.y11,V.x11),!k),D.arc(0,0,h,r(V.cy+V.y11,V.cx+V.x11),r(H.cy+H.y11,H.cx+H.x11),!k),D.arc(H.cx,H.cy,B,r(H.y11,H.x11),r(H.y01,H.x01),!k))):(D.moveTo(G,K),D.arc(0,0,h,A,j,!k)):D.moveTo(G,K),!(m>1e-12)||!(P>1e-12)?D.lineTo(q,J):z>1e-12?(V=b(q,J,Y,X,m,-z,k),H=b(G,K,Z,Q,m,-z,k),D.lineTo(V.cx+V.x01,V.cy+V.y01),z<R?D.arc(V.cx,V.cy,z,r(V.y01,V.x01),r(H.y01,H.x01),!k):(D.arc(V.cx,V.cy,z,r(V.y01,V.x01),r(V.y11,V.x11),!k),D.arc(0,0,m,r(V.cy+V.y11,V.cx+V.x11),r(H.cy+H.y11,H.cx+H.x11),k),D.arc(H.cx,H.cy,z,r(H.y11,H.x11),r(H.y01,H.x01),!k))):D.arc(0,0,m,N,M,k)}if(D.closePath(),e)return D=null,e+``||null}return k.centroid=function(){var e=(+l.apply(this,arguments)+ +x.apply(this,arguments))/2,t=(+w.apply(this,arguments)+ +T.apply(this,arguments))/2-c/2;return[n(t)*e,s(t)*e]},k.innerRadius=function(t){return arguments.length?(l=typeof t==`function`?t:e(+t),k):l},k.outerRadius=function(t){return arguments.length?(x=typeof t==`function`?t:e(+t),k):x},k.cornerRadius=function(t){return arguments.length?(S=typeof t==`function`?t:e(+t),k):S},k.padRadius=function(t){return arguments.length?(C=t==null?null:typeof t==`function`?t:e(+t),k):C},k.startAngle=function(t){return arguments.length?(w=typeof t==`function`?t:e(+t),k):w},k.endAngle=function(t){return arguments.length?(T=typeof t==`function`?t:e(+t),k):T},k.padAngle=function(t){return arguments.length?(E=typeof t==`function`?t:e(+t),k):E},k.context=function(e){return arguments.length?(D=e??null,k):D},k}export{x as t};

View File

@@ -0,0 +1 @@
import"./chunk-K5T4RW27-Bdzw7m0z.js";import{n as e}from"./chunk-7N4EOEYR-Bu6dy4pK.js";export{e as createArchitectureServices};

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1 @@
Array.prototype.slice;function e(e){return typeof e==`object`&&`length`in e?e:Array.from(e)}export{e as t};

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1 @@
import{nn as e,tn as t}from"./index-Cmfh6eB3.js";var n=(n,r)=>e.lang.round(t.parse(n)[r]);export{n as t};

View File

@@ -0,0 +1 @@
import{_ as e,g as t,h as n,i as r,m as i,s as a,t as o,u as s,v as c}from"./chunk-K5T4RW27-Bdzw7m0z.js";var l=class extends o{static{i(this,`RadarTokenBuilder`)}constructor(){super([`radar-beta`])}},u={parser:{TokenBuilder:i(()=>new l,`TokenBuilder`),ValueConverter:i(()=>new r,`ValueConverter`)}};function d(r=n){let i=t(c(r),a),o=t(e({shared:i}),s,u);return i.ServiceRegistry.register(o),{shared:i,Radar:o}}i(d,`createRadarServices`);export{d as n,u as t};

View File

@@ -0,0 +1 @@
import{in as e}from"./index-Cmfh6eB3.js";function t(e,t){e.accDescr&&t.setAccDescription?.(e.accDescr),e.accTitle&&t.setAccTitle?.(e.accTitle),e.title&&t.setDiagramTitle?.(e.title)}e(t,`populateCommonDb`);export{t};

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1 @@
import{bt as e,in as t}from"./index-Cmfh6eB3.js";var n=t((t,n)=>{let r;return n===`sandbox`&&(r=e(`#i`+t)),e(n===`sandbox`?r.nodes()[0].contentDocument.body:`body`).select(`[id="${t}"]`)},`getDiagramElement`);export{n as t};

View File

@@ -0,0 +1 @@
import{_ as e,a as t,g as n,h as r,i,m as a,s as o,t as s,v as c}from"./chunk-K5T4RW27-Bdzw7m0z.js";var l=class extends s{static{a(this,`GitGraphTokenBuilder`)}constructor(){super([`gitGraph`])}},u={parser:{TokenBuilder:a(()=>new l,`TokenBuilder`),ValueConverter:a(()=>new i,`ValueConverter`)}};function d(i=r){let a=n(c(i),o),s=n(e({shared:a}),t,u);return a.ServiceRegistry.register(s),{shared:a,GitGraph:s}}a(d,`createGitGraphServices`);export{d as n,u as t};

View File

@@ -0,0 +1 @@
import{_ as e,g as t,h as n,m as r,n as i,r as a,s as o,t as s,v as c}from"./chunk-K5T4RW27-Bdzw7m0z.js";var l=class extends s{static{r(this,`ArchitectureTokenBuilder`)}constructor(){super([`architecture`])}},u=class extends i{static{r(this,`ArchitectureValueConverter`)}runCustomConverter(e,t,n){if(e.name===`ARCH_ICON`)return t.replace(/[()]/g,``).trim();if(e.name===`ARCH_TEXT_ICON`)return t.replace(/["()]/g,``);if(e.name===`ARCH_TITLE`){let e=t.replace(/^\[|]$/g,``).trim();return(e.startsWith(`"`)&&e.endsWith(`"`)||e.startsWith(`'`)&&e.endsWith(`'`))&&(e=e.slice(1,-1),e=e.replace(/\\"/g,`"`).replace(/\\'/g,`'`)),e.trim()}}},d={parser:{TokenBuilder:r(()=>new l,`TokenBuilder`),ValueConverter:r(()=>new u,`ValueConverter`)}};function f(r=n){let i=t(c(r),o),s=t(e({shared:i}),a,d);return i.ServiceRegistry.register(s),{shared:i,Architecture:s}}r(f,`createArchitectureServices`);export{f as n,d as t};

View File

@@ -0,0 +1 @@
import{_ as e,g as t,h as n,l as r,m as i,n as a,s as o,t as s,v as c}from"./chunk-K5T4RW27-Bdzw7m0z.js";var l=class extends s{static{i(this,`PieTokenBuilder`)}constructor(){super([`pie`,`showData`])}},u=class extends a{static{i(this,`PieValueConverter`)}runCustomConverter(e,t,n){if(e.name===`PIE_SECTION_LABEL`)return t.replace(/"/g,``).trim()}},d={parser:{TokenBuilder:i(()=>new l,`TokenBuilder`),ValueConverter:i(()=>new u,`ValueConverter`)}};function f(i=n){let a=t(c(i),o),s=t(e({shared:a}),r,d);return a.ServiceRegistry.register(s),{shared:a,Pie:s}}i(f,`createPieServices`);export{f as n,d as t};

View File

@@ -0,0 +1 @@
import{_ as e,g as t,h as n,m as r,n as i,p as a,s as o,v as s}from"./chunk-K5T4RW27-Bdzw7m0z.js";var c=class extends i{static{r(this,`WardleyValueConverter`)}runCustomConverter(e,t,n){switch(e.name.toUpperCase()){case`LINK_LABEL`:return t.substring(1).trim();default:return}}},l={parser:{ValueConverter:r(()=>new c,`ValueConverter`)}};function u(r=n){let i=t(s(r),o),c=t(e({shared:i}),a,l);return i.ServiceRegistry.register(c),{shared:i,Wardley:c}}r(u,`createWardleyServices`);export{u as n,l as t};

View File

@@ -0,0 +1 @@
var e=Object.create,t=Object.defineProperty,n=Object.getOwnPropertyDescriptor,r=Object.getOwnPropertyNames,i=Object.getPrototypeOf,a=Object.prototype.hasOwnProperty,o=(e,t)=>()=>(e&&(t=e(e=0)),t),s=(e,t)=>()=>(t||(e((t={exports:{}}).exports,t),e=null),t.exports),c=(e,n)=>{let r={};for(var i in e)t(r,i,{get:e[i],enumerable:!0});return n||t(r,Symbol.toStringTag,{value:`Module`}),r},l=(e,i,o,s)=>{if(i&&typeof i==`object`||typeof i==`function`)for(var c=r(i),l=0,u=c.length,d;l<u;l++)d=c[l],!a.call(e,d)&&d!==o&&t(e,d,{get:(e=>i[e]).bind(null,d),enumerable:!(s=n(i,d))||s.enumerable});return e},u=(e,t,n)=>(l(e,t,`default`),n&&l(n,t,`default`)),d=(n,r,a)=>(a=n==null?{}:e(i(n)),l(r||!n||!n.__esModule?t(a,`default`,{value:n,enumerable:!0}):a,n)),f=e=>a.call(e,`module.exports`)?e[`module.exports`]:l(t({},`__esModule`,{value:!0}),e);export{f as a,u as i,o as n,d as o,c as r,s as t};

View File

@@ -0,0 +1 @@
import{Et as e,an as t,in as n}from"./index-Cmfh6eB3.js";var r=n((n,r,o,s)=>{n.attr(`class`,o);let{width:c,height:l,x:u,y:d}=i(n,r);e(n,l,c,s);let f=a(u,d,c,l,r);n.attr(`viewBox`,f),t.debug(`viewBox configured: ${f} with padding: ${r}`)},`setupViewPortForSVG`),i=n((e,t)=>{let n=e.node()?.getBBox()||{width:0,height:0,x:0,y:0};return{width:n.width+t*2,height:n.height+t*2,x:n.x,y:n.y}},`calculateDimensionsWithPadding`),a=n((e,t,n,r,i)=>`${e-i} ${t-i} ${n} ${r}`,`createViewBox`);export{r as t};

View File

@@ -0,0 +1,15 @@
import{in as e}from"./index-Cmfh6eB3.js";var t=e(()=>`
/* Font Awesome icon styling - consolidated */
.label-icon {
display: inline-block;
height: 1em;
overflow: visible;
vertical-align: -0.125em;
}
.node .label-icon path {
fill: currentColor;
stroke: revert;
stroke-width: revert;
}
`,`getIconStyles`);export{t};

Some files were not shown because too many files have changed in this diff Show More