chore: 添加上传文件存储目录

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
chore: 清理过期markdown文件并添加截图
2026-03-10 15:42:46 +08:00 · 2026-03-10 15:42:33 +08:00 · 2026-03-10 15:42:21 +08:00 · 2026-03-10 15:02:55 +08:00 · 2026-03-10 15:01:52 +08:00
48 changed files with 23871 additions and 2673 deletions
--- a/ai-core/main.py
+++ b/ai-core/main.py
@@ -0,0 +1,66 @@
+"""
+AI-Core Document Parser gRPC Server
+
+启动命令: python main.py [--port PORT] [--max-workers MAX_WORKERS] [--log-level LEVEL]
+"""
+import argparse
+import logging
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(__file__))
+
+from service.grpc_server import serve
+
+DEFAULT_PORT = 50051
+DEFAULT_MAX_WORKERS = 10
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Document Parser gRPC Server",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument(
+        "--port",
+        type=int,
+        default=DEFAULT_PORT,
+        help="Port to listen on",
+    )
+    parser.add_argument(
+        "--max-workers",
+        type=int,
+        default=DEFAULT_MAX_WORKERS,
+        help="Maximum number of worker threads",
+    )
+    parser.add_argument(
+        "--log-level",
+        type=str,
+        default="INFO",
+        choices=["DEBUG", "INFO", "WARNING", "ERROR"],
+        help="Log level",
+    )
+
+    args = parser.parse_args()
+
+    logging.basicConfig(
+        level=getattr(logging, args.log_level),
+        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+    )
+
+    logger = logging.getLogger(__name__)
+    logger.info("Starting Document Parser gRPC Server")
+    logger.info("Port: %d", args.port)
+    logger.info("Max workers: %d", args.max_workers)
+
+    try:
+        serve(port=args.port, max_workers=args.max_workers)
+    except KeyboardInterrupt:
+        logger.info("Server shutdown requested")
+    except Exception as e:
+        logger.error("Server error: %s", str(e), exc_info=True)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/ai-core/parser/init.py
+++ b/ai-core/parser/init.py
@@ -1,38 +1,10 @@
 """
-Parser module for WeKnora document processing system.
-
-This module provides document parsers for various file formats including:
- Microsoft Word documents (.doc, .docx)
- PDF documents
- Markdown files
- Plain text files
- Images with text content
- Web pages
-
-The parsers extract content from documents and can split them into
-meaningful chunks for further processing and indexing.
+Parser module for AI-Core document processing.
 """

-from .doc_parser import DocParser
-from .docx2_parser import Docx2Parser
-from .excel_parser import ExcelParser
-from .image_parser import ImageParser
-from .markdown_parser import MarkdownParser
-from .parser import Parser
-from .pdf_parser import PDFParser
-from .registry import ParserEngineRegistry, registry
-from .web_parser import WebParser
+from .parser_simple import Parser, Document

-# Export public classes and modules
 __all__ = [
-    "Docx2Parser",
-    "DocParser",
-    "PDFParser",
-    "MarkdownParser",
-    "ImageParser",
-    "WebParser",
    "Parser",
-    "ExcelParser",
-    "ParserEngineRegistry",
-    "registry",
+    "Document",
 ]
--- a/ai-core/parser/base_parser.py
+++ b/ai-core/parser/base_parser.py
@@ -0,0 +1,61 @@
+# -*- coding: utf-8 -*-
+import logging
+import os
+from abc import ABC, abstractmethod
+from typing import Optional
+
+from docreader.models.document import Document
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+
+class BaseParser(ABC):
+    """Base parser interface.
+
+    After the lightweight refactoring, BaseParser only extracts markdown text
+    and raw image references from documents. Chunking, image storage, OCR,
+    and VLM caption are handled by the Go App module.
+    """
+
+    def __init__(
+        self,
+        file_name: str = "",
+        file_type: Optional[str] = None,
+        **kwargs,
+    ):
+        self.file_name = file_name
+        self.file_type = file_type or os.path.splitext(file_name)[1].lstrip(".")
+
+        logger.info(
+            "Initializing parser for file=%s, type=%s",
+            file_name,
+            self.file_type,
+        )
+
+    @abstractmethod
+    def parse_into_text(self, content: bytes) -> Document:
+        """Parse document content into markdown text.
+
+        Returns:
+            Document with ``content`` (markdown string) and optional
+            ``images`` dict mapping storage-relative paths to base64 data.
+        """
+
+    def parse(self, content: bytes) -> Document:
+        """Parse document and return markdown + image references.
+
+        No chunking, no OCR, no VLM caption — those are done in Go.
+        """
+        logger.info(
+            "Parsing document with %s, bytes: %d",
+            self.__class__.__name__,
+            len(content),
+        )
+        document = self.parse_into_text(content)
+        logger.info(
+            "Extracted %d characters from %s",
+            len(document.content),
+            self.file_name,
+        )
+        return document
--- a/ai-core/parser/chain_parser.py
+++ b/ai-core/parser/chain_parser.py
@@ -0,0 +1,176 @@
+"""
+Chain Parser Module
+
+This module provides two chain-of-responsibility pattern implementations for document parsing:
+1. FirstParser: Tries multiple parsers sequentially until one succeeds
+2. PipelineParser: Chains parsers where each parser processes the output of the previous one
+"""
+
+import logging
+from typing import Dict, List, Tuple, Type
+
+from docreader.models.document import Document
+from docreader.parser.base_parser import BaseParser
+from docreader.utils import endecode
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+
+class FirstParser(BaseParser):
+    """
+    First-success parser that tries multiple parsers in sequence.
+
+    This parser attempts to parse content using each registered parser in order.
+    It returns the result from the first parser that successfully produces a valid document.
+    If all parsers fail, it returns an empty Document.
+
+    Usage:
+        # Create a custom FirstParser with specific parser classes
+        CustomParser = FirstParser.create(MarkdownParser, HTMLParser)
+        parser = CustomParser()
+        document = parser.parse_into_text(content_bytes)
+    """
+
+    # Tuple of parser classes to be instantiated
+    _parser_cls: Tuple[Type["BaseParser"], ...] = ()
+
+    def __init__(self, *args, **kwargs):
+        """Initialize FirstParser with configured parser classes."""
+        super().__init__(*args, **kwargs)
+
+        # Instantiate all parser classes into parser instances
+        self._parsers: List[BaseParser] = []
+        for parser_cls in self._parser_cls:
+            parser = parser_cls(*args, **kwargs)
+            self._parsers.append(parser)
+
+    def parse_into_text(self, content: bytes) -> Document:
+        """Parse content using the first parser that succeeds.
+
+        Args:
+            content: Raw bytes content to be parsed
+
+        Returns:
+            Document: Parsed document from the first successful parser,
+                     or an empty Document if all parsers fail
+        """
+        for p in self._parsers:
+            logger.info(f"FirstParser: using parser {p.__class__.__name__}")
+            try:
+                document = p.parse_into_text(content)
+            except Exception:
+                logger.exception(
+                    "FirstParser: parser %s raised exception; trying next parser",
+                    p.__class__.__name__,
+                )
+                continue
+
+            if document.is_valid():
+                logger.info(f"FirstParser: parser {p.__class__.__name__} succeeded")
+                return document
+        return Document()
+
+    @classmethod
+    def create(cls, *parser_classes: Type["BaseParser"]) -> Type["FirstParser"]:
+        """Factory method to create a FirstParser subclass with specific parsers.
+
+        Args:
+            *parser_classes: Variable number of BaseParser subclasses to try in order
+
+        Returns:
+            Type[FirstParser]: A new FirstParser subclass configured with the given parsers
+
+        Example:
+            CustomParser = FirstParser.create(MarkdownParser, HTMLParser)
+            parser = CustomParser()
+        """
+        # Generate a descriptive class name based on parser names
+        names = "_".join([p.__name__ for p in parser_classes])
+        # Dynamically create a new class with the parser configuration
+        return type(f"FirstParser_{names}", (cls,), {"_parser_cls": parser_classes})
+
+
+class PipelineParser(BaseParser):
+    """
+    Pipeline parser that chains multiple parsers sequentially.
+
+    This parser processes content through a series of parsers where each parser
+    receives the output of the previous parser as input. Images from all parsers
+    are accumulated and merged into the final document.
+
+    Usage:
+        # Create a custom PipelineParser with specific parser classes
+        CustomParser = PipelineParser.create(PreParser, MarkdownParser, PostParser)
+        parser = CustomParser()
+        document = parser.parse_into_text(content_bytes)
+    """
+
+    # Tuple of parser classes to be instantiated and chained
+    _parser_cls: Tuple[Type["BaseParser"], ...] = ()
+
+    def __init__(self, *args, **kwargs):
+        """Initialize PipelineParser with configured parser classes."""
+        super().__init__(*args, **kwargs)
+
+        # Instantiate all parser classes into parser instances
+        self._parsers: List[BaseParser] = []
+        for parser_cls in self._parser_cls:
+            parser = parser_cls(*args, **kwargs)
+            self._parsers.append(parser)
+
+    def parse_into_text(self, content: bytes) -> Document:
+        """Parse content through a pipeline of parsers.
+
+        Each parser in the pipeline processes the output of the previous parser.
+        Images from all parsers are accumulated and merged into the final document.
+
+        Args:
+            content: Raw bytes content to be parsed
+
+        Returns:
+            Document: Final document after processing through all parsers,
+                     with accumulated images from all stages
+        """
+        # Accumulate images from all parsers
+        images: Dict[str, str] = {}
+        document = Document()
+        for p in self._parsers:
+            logger.info(f"PipelineParser: using parser {p.__class__.__name__}")
+            # Parse content with current parser
+            document = p.parse_into_text(content)
+            # Convert document content back to bytes for next parser
+            content = endecode.encode_bytes(document.content)
+            # Accumulate images from this parser
+            images.update(document.images)
+        # Merge all accumulated images into final document
+        document.images.update(images)
+        return document
+
+    @classmethod
+    def create(cls, *parser_classes: Type["BaseParser"]) -> Type["PipelineParser"]:
+        """Factory method to create a PipelineParser subclass with specific parsers.
+
+        Args:
+            *parser_classes: Variable number of BaseParser subclasses to chain in order
+
+        Returns:
+            Type[PipelineParser]: A new PipelineParser subclass configured with the given parsers
+
+        Example:
+            CustomParser = PipelineParser.create(PreprocessParser, MarkdownParser)
+            parser = CustomParser()
+        """
+        # Generate a descriptive class name based on parser names
+        names = "_".join([p.__name__ for p in parser_classes])
+        # Dynamically create a new class with the parser configuration
+        return type(f"PipelineParser_{names}", (cls,), {"_parser_cls": parser_classes})
+
+
+if __name__ == "__main__":
+    from docreader.parser.markdown_parser import MarkdownParser
+
+    # Example: Create and use a FirstParser with MarkdownParser
+    FpCls = FirstParser.create(MarkdownParser)
+    lparser = FpCls()
+    print(lparser.parse_into_text(b"aaa"))
--- a/ai-core/parser/doc_parser.py
+++ b/ai-core/parser/doc_parser.py
@@ -0,0 +1,331 @@
+import logging
+import os
+import subprocess
+from typing import List, Optional
+
+import textract
+
+from docreader.config import CONFIG
+from docreader.models.document import Document
+from docreader.parser.docx2_parser import Docx2Parser
+from docreader.utils.tempfile import TempDirContext, TempFileContext
+
+logger = logging.getLogger(__name__)
+
+
+class SandboxExecutor:
+    """Sandbox executor for running commands with proxy configuration"""
+
+    def __init__(self, proxy: Optional[str] = None, default_timeout: int = 60):
+        """Initialize sandbox executor with configuration
+
+        Args:
+            proxy: Proxy URL to use for network access. If None, will use WEB_PROXY environment variable
+            default_timeout: Default timeout in seconds for command execution
+        """
+        # Get proxy from parameter, environment variable, or use default blocking proxy
+        # Use 'or None' to convert empty string to None, then apply default value
+        self.proxy = proxy or CONFIG.external_https_proxy or "http://128.0.0.1:1"
+        self.default_timeout = default_timeout
+
+    def execute_in_sandbox(self, cmd: List[str]) -> tuple:
+        """Execute command in sandbox with proxy configuration
+
+        Args:
+            cmd: Command to execute
+
+        Returns:
+            Tuple of (stdout, stderr, returncode)
+        """
+        # Try different sandbox methods in order of preference
+        sandbox_methods = [
+            self._execute_with_proxy,
+        ]
+
+        for method in sandbox_methods:
+            try:
+                return method(cmd)
+            except Exception as e:
+                logger.warning(f"Sandbox method {method.__name__} failed: {e}")
+                continue
+
+        raise RuntimeError("All sandbox methods failed")
+
+    def _execute_with_proxy(self, cmd: List[str]) -> tuple:
+        """Execute command with proxy configuration
+
+        Args:
+            cmd: Command to execute
+
+        Returns:
+            Tuple of (stdout, stderr, returncode)
+        """
+        # Set up environment with proxy configuration
+        env = os.environ.copy()
+        if self.proxy:
+            env["http_proxy"] = self.proxy
+            env["https_proxy"] = self.proxy
+            env["HTTP_PROXY"] = self.proxy
+            env["HTTPS_PROXY"] = self.proxy
+
+        logger.info(f"Executing command with proxy: {' '.join(cmd)}")
+        if self.proxy:
+            logger.info(f"Using proxy: {self.proxy}")
+
+        process = subprocess.Popen(
+            cmd,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            env=env,
+        )
+
+        try:
+            stdout, stderr = process.communicate(timeout=self.default_timeout)
+            return stdout, stderr, process.returncode
+        except subprocess.TimeoutExpired:
+            process.kill()
+            raise RuntimeError(
+                f"Command execution timeout after {self.default_timeout} seconds"
+            )
+
+
+logger = logging.getLogger(__name__)
+
+
+class DocParser(Docx2Parser):
+    """DOC document parser"""
+
+    def __init__(self, *args, **kwargs):
+        """Initialize DOC parser with sandbox executor"""
+        super().__init__(*args, **kwargs)
+        self.sandbox_executor = SandboxExecutor()
+
+    def parse_into_text(self, content: bytes) -> Document:
+        logger.info(f"Parsing DOC document, content size: {len(content)} bytes")
+
+        handle_chain = [
+            # 1. Try to convert to docx format to extract images
+            self._parse_with_docx,
+            # 2. If image extraction is not needed or conversion failed,
+            # try using antiword to extract text
+            self._parse_with_antiword,
+            # 3. If antiword extraction fails, use textract
+            # NOTE: _parse_with_textract is disabled due to SSRF vulnerability
+            # self._parse_with_textract,
+        ]
+
+        # Save byte content as a temporary file
+        with TempFileContext(content, ".doc") as temp_file_path:
+            for handle in handle_chain:
+                try:
+                    document = handle(temp_file_path)
+                    if document:
+                        return document
+                except Exception as e:
+                    logger.warning(f"Failed to parse DOC with {handle.__name__} {e}")
+
+            return Document(content="")
+
+    def _parse_with_docx(self, temp_file_path: str) -> Document:
+        logger.info("Multimodal enabled, attempting to extract images from DOC")
+
+        docx_content = self._try_convert_doc_to_docx(temp_file_path)
+        if not docx_content:
+            raise RuntimeError("Failed to convert DOC to DOCX")
+
+        logger.info("Successfully converted DOC to DOCX, using DocxParser")
+        # Use existing DocxParser to parse the converted docx
+        document = super(Docx2Parser, self).parse_into_text(docx_content)
+        logger.info(f"Extracted {len(document.content)} characters using DocxParser")
+        return document
+
+    def _parse_with_antiword(self, temp_file_path: str) -> Document:
+        logger.info("Attempting to parse DOC file with antiword")
+
+        # Check if antiword is installed
+        antiword_path = self._try_find_antiword()
+        if not antiword_path:
+            raise RuntimeError("antiword not found in PATH")
+
+        # Use antiword to extract text directly in sandbox
+        cmd = [antiword_path, temp_file_path]
+        logger.info("Executing antiword in sandbox with proxy configuration")
+
+        stdout, stderr, returncode = self.sandbox_executor.execute_in_sandbox(cmd)
+
+        if returncode != 0:
+            raise RuntimeError(
+                f"antiword extraction failed: {stderr.decode('utf-8', errors='ignore')}"
+            )
+        text = stdout.decode("utf-8", errors="ignore")
+        logger.info(f"Successfully extracted {len(text)} characters using antiword")
+        return Document(content=text)
+
+    def _parse_with_textract(self, temp_file_path: str) -> Document:
+        logger.info(f"Parsing DOC file with textract: {temp_file_path}")
+        text = textract.process(temp_file_path, method="antiword").decode("utf-8")
+        logger.info(f"Successfully extracted {len(text)} bytes of DOC using textract")
+        return Document(content=str(text))
+
+    def _try_convert_doc_to_docx(self, doc_path: str) -> Optional[bytes]:
+        """Convert DOC file to DOCX format
+
+        Uses LibreOffice/OpenOffice for conversion
+
+        Args:
+            doc_path: DOC file path
+
+        Returns:
+            Byte stream of DOCX file content, or None if conversion fails
+        """
+        logger.info(f"Converting DOC to DOCX: {doc_path}")
+
+        # Check if LibreOffice or OpenOffice is installed
+        soffice_path = self._try_find_soffice()
+        if not soffice_path:
+            return None
+
+        # Execute conversion command
+        logger.info(f"Using {soffice_path} to convert DOC to DOCX")
+
+        # Create a temporary directory to store the converted file
+        with TempDirContext() as temp_dir:
+            cmd = [
+                soffice_path,
+                "--headless",
+                "--convert-to",
+                "docx",
+                "--outdir",
+                temp_dir,
+                doc_path,
+            ]
+            logger.info(f"Running command in sandbox: {' '.join(cmd)}")
+
+            # Execute in sandbox with proxy configuration
+            stdout, stderr, returncode = self.sandbox_executor.execute_in_sandbox(cmd)
+
+            if returncode != 0:
+                logger.warning(
+                    f"Error converting DOC to DOCX: {stderr.decode('utf-8')}"
+                )
+                return None
+
+            # Find the converted file
+            docx_file = [
+                file for file in os.listdir(temp_dir) if file.endswith(".docx")
+            ]
+            logger.info(f"Found {len(docx_file)} DOCX file(s) in temporary directory")
+            for file in docx_file:
+                converted_file = os.path.join(temp_dir, file)
+                logger.info(f"Found converted file: {converted_file}")
+
+                # Read the converted file content
+                with open(converted_file, "rb") as f:
+                    docx_content = f.read()
+                    logger.info(
+                        f"Successfully read DOCX file, size: {len(docx_content)}"
+                    )
+                    return docx_content
+        return None
+
+    def _try_find_executable_path(
+        self,
+        executable_name: str,
+        possible_path: List[str] = [],
+        environment_variable: List[str] = [],
+    ) -> Optional[str]:
+        """Find executable path
+        Args:
+            executable_name: Executable name
+            possible_path: List of possible paths
+            environment_variable: List of environment variables to check
+            Returns:
+                Executable path, or None if not found
+        """
+        # Common executable paths
+        paths: List[str] = []
+        paths.extend(possible_path)
+        paths.extend(os.environ.get(env_var, "") for env_var in environment_variable)
+        paths = list(set(paths))
+
+        # Check if path is set in environment variable
+        for path in paths:
+            if os.path.exists(path):
+                logger.info(f"Found {executable_name} at {path}")
+                return path
+
+        # Try to find in PATH
+        result = subprocess.run(
+            ["which", executable_name], capture_output=True, text=True
+        )
+        if result.returncode == 0 and result.stdout.strip():
+            path = result.stdout.strip()
+            logger.info(f"Found {executable_name} at {path}")
+            return path
+
+        logger.warning(f"Failed to find {executable_name}")
+        return None
+
+    def _try_find_soffice(self) -> Optional[str]:
+        """Find LibreOffice/OpenOffice executable path
+
+        Returns:
+            Executable path, or None if not found
+        """
+        # Common LibreOffice/OpenOffice executable paths
+        possible_paths = [
+            # Linux
+            "/usr/bin/soffice",
+            "/usr/lib/libreoffice/program/soffice",
+            "/opt/libreoffice25.2/program/soffice",
+            # macOS
+            "/Applications/LibreOffice.app/Contents/MacOS/soffice",
+            # Windows
+            "C:\\Program Files\\LibreOffice\\program\\soffice.exe",
+            "C:\\Program Files (x86)\\LibreOffice\\program\\soffice.exe",
+        ]
+        return self._try_find_executable_path(
+            executable_name="soffice",
+            possible_path=possible_paths,
+            environment_variable=["LIBREOFFICE_PATH"],
+        )
+
+    def _try_find_antiword(self) -> Optional[str]:
+        """Find antiword executable path
+
+        Returns:
+            Executable path, or None if not found
+        """
+        # Common antiword executable paths
+        possible_paths = [
+            # Linux/macOS
+            "/usr/bin/antiword",
+            "/usr/local/bin/antiword",
+            # Windows
+            "C:\\Program Files\\Antiword\\antiword.exe",
+            "C:\\Program Files (x86)\\Antiword\\antiword.exe",
+        ]
+        return self._try_find_executable_path(
+            executable_name="antiword",
+            possible_path=possible_paths,
+            environment_variable=["ANTIWORD_PATH"],
+        )
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.DEBUG)
+
+    file_name = "/path/to/your/test.doc"
+    logger.info(f"Processing file: {file_name}")
+    doc_parser = DocParser(
+        file_name=file_name,
+        enable_multimodal=True,
+        chunk_size=512,
+        chunk_overlap=60,
+    )
+    with open(file_name, "rb") as f:
+        content = f.read()
+
+    document = doc_parser.parse_into_text(content)
+    logger.info(f"Processing complete, extracted text length: {len(document.content)}")
+    logger.info(f"Sample text: {document.content[:200]}...")
--- a/ai-core/parser/docx2_parser.py
+++ b/ai-core/parser/docx2_parser.py
@@ -0,0 +1,28 @@
+import logging
+
+from docreader.parser.chain_parser import FirstParser
+from docreader.parser.docx_parser import DocxParser
+from docreader.parser.markitdown_parser import MarkitdownParser
+
+logger = logging.getLogger(__name__)
+
+
+class Docx2Parser(FirstParser):
+    _parser_cls = (MarkitdownParser, DocxParser)
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.DEBUG)
+
+    your_file = "/path/to/your/file.docx"
+    parser = Docx2Parser(separators=[".", "?", "!", "。", "？", "！"])
+    with open(your_file, "rb") as f:
+        content = f.read()
+
+        document = parser.parse(content)
+        for cc in document.chunks:
+            logger.info(f"chunk: {cc}")
+
+        # document = parser.parse_into_text(content)
+        # logger.info(f"docx content: {document.content}")
+        # logger.info(f"find images {document.images.keys()}")
--- a/ai-core/parser/docx_parser.py
+++ b/ai-core/parser/docx_parser.py
--- a/ai-core/parser/excel_parser.py
+++ b/ai-core/parser/excel_parser.py
@@ -0,0 +1,119 @@
+"""
+Excel Parser Module
+
+This module provides functionality to parse Excel files (.xlsx, .xls) into
+structured Document objects with text content and chunks. It supports multiple
+sheets and handles various Excel formats using pandas.
+"""
+import logging
+from io import BytesIO
+from typing import List
+
+import pandas as pd
+
+from docreader.models.document import Chunk, Document
+from docreader.parser.base_parser import BaseParser
+
+logger = logging.getLogger(__name__)
+
+
+class ExcelParser(BaseParser):
+    """Parser for Excel files (.xlsx, .xls).
+    
+    This parser extracts text content from Excel files by processing all sheets
+    and converting each row into a structured text format. Each row becomes a
+    separate chunk with key-value pairs.
+    
+    Features:
+        - Supports multiple sheets in a single Excel file
+        - Automatically removes completely empty rows
+        - Converts each row to "column: value" format
+        - Creates individual chunks for each row for better granularity
+        
+    Example:
+        >>> parser = ExcelParser()
+        >>> with open("data.xlsx", "rb") as f:
+        ...     content = f.read()
+        ...     document = parser.parse_into_text(content)
+        >>> print(document.content)
+        Name: John,Age: 30,City: NYC
+        Name: Jane,Age: 25,City: LA
+    """
+    
+    def parse_into_text(self, content: bytes) -> Document:
+        """Parse Excel file bytes into a Document object.
+        
+        Args:
+            content: Raw bytes of the Excel file
+            
+        Returns:
+            Document: Parsed document containing:
+                - content: Full text with all rows from all sheets
+                - chunks: List of Chunk objects, one per row
+                
+        Note:
+            - Empty rows (all NaN values) are automatically skipped
+            - Each row is formatted as: "col1: val1,col2: val2,..."
+            - Chunks maintain sequential ordering across all sheets
+        """
+        chunks: List[Chunk] = []
+        text: List[str] = []
+        start, end = 0, 0
+
+        # Load Excel file from bytes into pandas ExcelFile object
+        excel_file = pd.ExcelFile(BytesIO(content))
+        
+        # Process each sheet in the Excel file
+        for excel_sheet_name in excel_file.sheet_names:
+            # Parse the sheet into a DataFrame
+            df = excel_file.parse(sheet_name=excel_sheet_name)
+            # Remove rows where all values are NaN (completely empty rows)
+            df.dropna(how="all", inplace=True)
+
+            # Process each row in the DataFrame
+            for _, row in df.iterrows():
+                page_content = []
+                # Build key-value pairs for non-null values
+                for k, v in row.items():
+                    if pd.notna(v):  # Skip NaN/null values
+                        page_content.append(f"{k}: {v}")
+                
+                # Skip rows with no valid content
+                if not page_content:
+                    continue
+                
+                # Format row as comma-separated key-value pairs
+                content_row = ",".join(page_content) + "\n"
+                end += len(content_row)
+                text.append(content_row)
+                
+                # Create a chunk for this row with position tracking
+                chunks.append(
+                    Chunk(content=content_row, seq=len(chunks), start=start, end=end)
+                )
+                start = end
+
+        # Combine all text and return as Document
+        return Document(content="".join(text), chunks=chunks)
+
+
+if __name__ == "__main__":
+    # Example usage: Parse an Excel file and display results
+    logging.basicConfig(level=logging.DEBUG)
+
+    # Specify the path to your Excel file
+    your_file = "/path/to/your/file.xlsx"
+    parser = ExcelParser()
+    
+    # Read and parse the Excel file
+    with open(your_file, "rb") as f:
+        content = f.read()
+        document = parser.parse_into_text(content)
+        
+        # Display the full document content
+        logger.error(document.content)
+
+        # Display the first chunk as an example
+        for chunk in document.chunks:
+            logger.error(chunk.content)
+            break  # Only show the first chunk
--- a/ai-core/parser/image_parser.py
+++ b/ai-core/parser/image_parser.py
@@ -0,0 +1,28 @@
+import base64
+import logging
+import os
+
+from docreader.models.document import Document
+from docreader.parser.base_parser import BaseParser
+
+logger = logging.getLogger(__name__)
+
+
+class ImageParser(BaseParser):
+    """Parser for standalone image files.
+
+    Returns the image as a markdown reference with the raw image data
+    in Document.images so that the Go-side ImageResolver (or main.py's
+    _resolve_images) can handle storage upload.
+    """
+
+    def parse_into_text(self, content: bytes) -> Document:
+        logger.info("Parsing image file=%s, size=%d bytes", self.file_name, len(content))
+
+        ext = os.path.splitext(self.file_name)[1].lower() or ".png"
+        ref_path = f"images/{self.file_name}"
+
+        text = f"![{self.file_name}]({ref_path})"
+        images = {ref_path: base64.b64encode(content).decode()}
+
+        return Document(content=text, images=images)
--- a/ai-core/parser/markdown_parser.py
+++ b/ai-core/parser/markdown_parser.py
@@ -0,0 +1,403 @@
+"""
+Markdown Parser Module
+
+This module provides comprehensive Markdown parsing functionality including:
+- Table formatting and standardization
+- Base64 image extraction and conversion
+- Image path replacement and URL generation
+- Pipeline-based parsing with multiple stages
+
+The parser uses a pipeline approach to process Markdown content through
+multiple stages: table formatting -> image processing.
+"""
+
+import base64
+import logging
+import os
+import re
+import uuid
+from typing import Dict, List, Match, Optional, Tuple
+
+from docreader.models.document import Document
+from docreader.parser.base_parser import BaseParser
+from docreader.parser.chain_parser import PipelineParser
+from docreader.utils import endecode
+
+# Get logger object
+logger = logging.getLogger(__name__)
+
+
+class MarkdownTableUtil:
+    """Utility class for formatting Markdown tables.
+
+    This class standardizes Markdown table formatting by:
+    - Normalizing column alignment markers (e.g., :---, :---:, ---:)
+    - Adding consistent spacing around pipes (|)
+    - Preserving indentation levels
+    - Handling both header rows and data rows
+
+    Example:
+        Input:  |姓名|年龄|城市|
+                |:---|---:|:---:|
+                |张三|25|北京|
+
+        Output: | 姓名 | 年龄 | 城市 |
+                | :--- | ---: | :---: |
+                | 张三 | 25 | 北京 |
+    """
+
+    def __init__(self):
+        # Pattern to match alignment row (e.g., |:---|---:|:---:|)
+        self.align_pattern = re.compile(
+            r"^([\t ]*)\|[\t ]*[:-]+(?:[\t ]*\|[\t ]*[:-]+)*[\t ]*\|[\t ]*$",
+            re.MULTILINE,
+        )
+        # Pattern to match regular table rows (header or data)
+        self.line_pattern = re.compile(
+            r"^([\t ]*)\|[\t ]*[^|\r\n]*(?:[\t ]*\|[^|\r\n]*)*\|[\t ]*$",
+            re.MULTILINE,
+        )
+
+    def format_table(self, content: str) -> str:
+        """Format all Markdown tables in the content.
+
+        Args:
+            content: Raw Markdown text containing tables
+
+        Returns:
+            Formatted Markdown text with standardized table formatting
+        """
+
+        def process_align(match: Match[str]) -> str:
+            """Process alignment row to standardize format."""
+            # Split by | and remove empty strings
+            columns = [col.strip() for col in match.group(0).split("|") if col.strip()]
+
+            processed = []
+            for col in columns:
+                # Preserve left alignment marker (:---)
+                left_colon = ":" if col.startswith(":") else ""
+                # Preserve right alignment marker (---:)
+                right_colon = ":" if col.endswith(":") else ""
+                processed.append(left_colon + "---" + right_colon)
+
+            # Preserve original indentation
+            prefix = match.group(1)
+            return prefix + "| " + " | ".join(processed) + " |"
+
+        def process_line(match: Match[str]) -> str:
+            """Process regular table row to standardize format."""
+            # Split by | and remove empty strings
+            columns = [col.strip() for col in match.group(0).split("|") if col.strip()]
+
+            # Preserve original indentation
+            prefix = match.group(1)
+            return prefix + "| " + " | ".join(columns) + " |"
+
+        formatted_content = content
+        # First format regular rows (header and data)
+        formatted_content = self.line_pattern.sub(process_line, formatted_content)
+        # Then format alignment rows (must be done after to avoid conflicts)
+        formatted_content = self.align_pattern.sub(process_align, formatted_content)
+
+        return formatted_content
+
+    @staticmethod
+    def _self_test():
+        test_content = """
+# 测试表格
+普通文本---不会被匹配
+
+## 表格1（无前置空格）
+
+| 姓名   | 年龄  | 城市          |
+|      :---------- | -------: | :------      |
+| 张三 | 25 | 北京 |
+
+## 表格3（前置4个空格+首尾|）
+    |   产品   |   价格   |   库存   |
+    | :-------------: | ----------- | :-----------: |
+    | 手机 | 5999       | 100 |
+"""
+        util = MarkdownTableUtil()
+        format_content = util.format_table(test_content)
+        print(format_content)
+
+
+class MarkdownTableFormatter(BaseParser):
+    """Parser for formatting Markdown tables.
+
+    This parser standardizes the formatting of all Markdown tables in the
+    document to ensure consistent spacing and alignment markers.
+
+    Example:
+        >>> formatter = MarkdownTableFormatter()
+        >>> content = b"|Name|Age|\n|---|---|\n|John|30|"
+        >>> doc = formatter.parse_into_text(content)
+        >>> print(doc.content)
+        | Name | Age |
+        | --- | --- |
+        | John | 30 |
+    """
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.table_helper = MarkdownTableUtil()
+
+    def parse_into_text(self, content: bytes) -> Document:
+        """Parse and format Markdown tables.
+
+        Args:
+            content: Raw Markdown content as bytes
+
+        Returns:
+            Document with formatted table content
+        """
+        # Decode bytes to string with automatic encoding detection
+        text = endecode.decode_bytes(content)
+        # Format all tables in the content
+        text = self.table_helper.format_table(text)
+        return Document(content=text)
+
+
+class MarkdownImageUtil:
+    """Utility class for handling images in Markdown.
+
+    This class provides functionality to:
+    - Extract base64-encoded images from Markdown
+    - Extract image paths from Markdown
+    - Replace image paths with new URLs
+    - Convert base64 images to binary format
+
+    Supported formats:
+    - Base64 embedded images: ![alt](data:image/png;base64,iVBORw0...)
+    - Regular image links: ![alt](path/to/image.png)
+    """
+
+    def __init__(self):
+        # Pattern to match base64 embedded images
+        # Captures: (1) alt text, (2) image format, (3) base64 data
+        self.b64_pattern = re.compile(
+            r"!\[([^\]]*)\]\(data:image/(\w+)\+?\w*;base64,([^\)]+)\)"
+        )
+        # Pattern to match regular image syntax
+        self.image_pattern = re.compile(r"!\[([^\]]*)\]\(([^)]+)\)")
+        # Pattern for replacing image paths
+        self.replace_pattern = re.compile(r"!\[([^\]]*)\]\(([^)]+)\)")
+
+    def extract_image(
+        self,
+        content: str,
+        path_prefix: Optional[str] = None,
+        replace: bool = True,
+    ) -> Tuple[str, List[str]]:
+        """Extract image paths from Markdown content.
+
+        Args:
+            content: Markdown text containing images
+            path_prefix: Optional prefix to add to image paths
+            replace: Whether to replace image syntax in content
+
+        Returns:
+            Tuple of (processed_text, list_of_image_paths)
+
+        Example:
+            >>> util = MarkdownImageUtil()
+            >>> text, images = util.extract_image("![logo](img/logo.png)")
+            >>> print(images)
+            ['img/logo.png']
+        """
+        # List to store extracted image paths
+        images: List[str] = []
+
+        def repl(match: Match[str]) -> str:
+            """Replacement function for each image match."""
+            title = match.group(1)  # Alt text
+            image_path = match.group(2)  # Image path
+
+            # Add prefix if specified
+            if path_prefix:
+                image_path = f"{path_prefix}/{image_path}"
+
+            images.append(image_path)
+
+            # Keep original if replace is False
+            if not replace:
+                return match.group(0)
+
+            # Replace image path with potentially prefixed path
+            return f"![{title}]({image_path})"
+
+        text = self.image_pattern.sub(repl, content)
+        logger.debug(f"Extracted {len(images)} images from markdown")
+        return text, images
+
+    def extract_base64(
+        self,
+        content: str,
+        path_prefix: Optional[str] = None,
+        replace: bool = True,
+    ) -> Tuple[str, Dict[str, bytes]]:
+        """Extract and decode base64 embedded images from Markdown.
+
+        This method finds all base64-encoded images in the Markdown content,
+        decodes them to binary format, generates unique filenames, and
+        optionally replaces them with file path references.
+
+        Args:
+            content: Markdown text containing base64 images
+            path_prefix: Optional directory prefix for generated paths
+            replace: Whether to replace base64 syntax with file paths
+
+        Returns:
+            Tuple of (processed_text, dict_of_path_to_bytes)
+
+        Example:
+            >>> util = MarkdownImageUtil()
+            >>> text = "![logo](data:image/png;base64,iVBORw0KGg...)"
+            >>> new_text, images = util.extract_base64(text, "images")
+            >>> print(new_text)
+            ![logo](images/uuid.png)
+            >>> print(len(images))
+            1
+        """
+        # Dictionary mapping generated file paths to binary image data
+        images: Dict[str, bytes] = {}
+
+        def repl(match: Match[str]) -> str:
+            """Replacement function for each base64 image match."""
+            title = match.group(1)  # Alt text
+            img_ext = match.group(2)  # Image format (png, jpg, etc.)
+            img_b64 = match.group(3)  # Base64 encoded data
+
+            # Decode base64 string to bytes
+            image_byte = endecode.encode_image(img_b64, errors="ignore")
+            if not image_byte:
+                logger.error(f"Failed to decode base64 image skip it: {img_b64}")
+                return title  # Return just the alt text if decode fails
+
+            # Generate unique filename with original extension
+            image_path = f"{uuid.uuid4()}.{img_ext}"
+            if path_prefix:
+                image_path = f"{path_prefix}/{image_path}"
+            images[image_path] = image_byte
+
+            # Keep original base64 if replace is False
+            if not replace:
+                return match.group(0)
+
+            # Replace base64 data with file path reference
+            return f"![{title}]({image_path})"
+
+        text = self.b64_pattern.sub(repl, content)
+        logger.debug(f"Extracted {len(images)} base64 images from markdown")
+        return text, images
+
+    def replace_path(self, content: str, images: Dict[str, str]) -> str:
+        """Replace image paths in Markdown with new URLs.
+
+        This method is typically used to replace local file paths with
+        uploaded URLs after images have been stored.
+
+        Args:
+            content: Markdown text with image references
+            images: Mapping of old paths to new URLs
+
+        Returns:
+            Markdown text with updated image URLs
+
+        Example:
+            >>> util = MarkdownImageUtil()
+            >>> content = "![logo](temp/img.png)"
+            >>> mapping = {"temp/img.png": "https://cdn.com/img.png"}
+            >>> result = util.replace_path(content, mapping)
+            >>> print(result)
+            ![logo](https://cdn.com/img.png)
+        """
+        # Track which paths were actually replaced
+        content_replace: set = set()
+
+        def repl(match: Match[str]) -> str:
+            """Replacement function for each image match."""
+            title = match.group(1)  # Alt text
+            image_path = match.group(2)  # Current image path
+
+            # Only replace if path exists in mapping
+            if image_path not in images:
+                return match.group(0)  # Keep original
+
+            content_replace.add(image_path)
+            # Get new URL from mapping
+            image_path = images[image_path]
+            return f"![{title}]({image_path})" if image_path else title
+
+        text = self.replace_pattern.sub(repl, content)
+        logger.debug(f"Replaced {len(content_replace)} images in markdown")
+        return text
+
+    @staticmethod
+    def _self_test():
+        your_content = "test![](data:image/png;base64,iVBORw0KGgoAAAA)test"
+        image_handle = MarkdownImageUtil()
+        text, images = image_handle.extract_base64(your_content)
+        print(text)
+
+        for image_url, image_byte in images.items():
+            with open(image_url, "wb") as f:
+                f.write(image_byte)
+
+
+class MarkdownImageBase64(BaseParser):
+    """Parser for extracting base64 images from Markdown.
+
+    Extracts base64-encoded images, replaces them with path references,
+    and returns the raw image data in Document.images for the Go-side
+    ImageResolver (or main.py _resolve_images) to handle storage.
+    """
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.image_helper = MarkdownImageUtil()
+
+    def parse_into_text(self, content: bytes) -> Document:
+        text = endecode.decode_bytes(content)
+        text, img_b64 = self.image_helper.extract_base64(text, path_prefix="images")
+
+        images: Dict[str, str] = {}
+        for ipath, raw_bytes in img_b64.items():
+            images[ipath] = base64.b64encode(raw_bytes).decode()
+
+        logger.debug("Extracted %d base64 images from markdown", len(images))
+        return Document(content=text, images=images)
+
+
+class MarkdownParser(PipelineParser):
+    """Complete Markdown parser using pipeline approach.
+
+    This parser processes Markdown content through multiple stages:
+    1. MarkdownTableFormatter: Standardizes table formatting
+    2. MarkdownImageBase64: Extracts and uploads base64 images
+
+    The pipeline ensures that content flows through each parser in sequence,
+    with each stage's output becoming the next stage's input.
+    """
+
+    _parser_cls = (MarkdownTableFormatter, MarkdownImageBase64)
+
+
+if __name__ == "__main__":
+    # Example usage and testing
+    logging.basicConfig(level=logging.DEBUG)
+
+    # Test the complete MarkdownParser pipeline
+    your_content = "test![](data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAMgA)test"
+    parser = MarkdownParser()
+
+    # Parse content and display results
+    document = parser.parse_into_text(your_content.encode())
+    logger.info(document.content)
+    logger.info(f"Images: {len(document.images)}, name: {document.images.keys()}")
+
+    # Run individual utility tests
+    MarkdownImageUtil._self_test()
+    MarkdownTableUtil._self_test()
--- a/ai-core/parser/markitdown_parser.py
+++ b/ai-core/parser/markitdown_parser.py
@@ -0,0 +1,107 @@
+import io
+import logging
+import re
+import base64
+
+from markitdown import MarkItDown
+
+from docreader.models.document import Document
+from docreader.parser.base_parser import BaseParser
+from docreader.parser.chain_parser import PipelineParser
+from docreader.parser.markdown_parser import MarkdownParser
+
+# 尝试导入 VLMClient
+try:
+    from parser.vlm_client import VLMClient
+except ImportError:
+    VLMClient = None
+
+logger = logging.getLogger(__name__)
+
+
+class StdMarkitdownParser(BaseParser):
+    """
+    Standard MarkItDown Parser Wrapper
+
+    This parser uses the markitdown library to convert various document formats
+    (docx, pptx, pdf, etc.) into text/markdown.
+    Optionally uses VLM to process images.
+    """
+
+    def __init__(self, *args, vlm_config=None, **kwargs):
+        # 这里的 super() 会调用 BaseParser 的初始化，确保 self.file_type 被正确赋值
+        super().__init__(*args, **kwargs)
+        self.markitdown = MarkItDown()
+        self.vlm_config = vlm_config
+        self.vlm_client = None
+
+        # 如果有 VLM 配置，初始化 VLM 客户端
+        if vlm_config and vlm_config.get("enabled") and VLMClient:
+            try:
+                self.vlm_client = VLMClient(vlm_config)
+                logger.info(f"VLM client initialized: provider={vlm_config.get('provider')}, model={vlm_config.get('model')}")
+            except Exception as e:
+                logger.warning(f"Failed to initialize VLM client: {e}")
+
+    def parse_into_text(self, content: bytes) -> Document:
+        """
+        Parses content using MarkItDown.
+        Uses self.file_type (inherited from BaseParser) to hint the stream format.
+        """
+        ext = self.file_type
+        if ext and not ext.startswith('.'):
+            ext = '.' + ext
+
+        # 直接调用 convert，移除 try-catch，让异常由上层 PipelineParser 统一捕获
+        result = self.markitdown.convert(
+            io.BytesIO(content),
+            file_extension=ext,
+            keep_data_uris=True
+        )
+
+        markdown_content = result.text_content
+
+        # 如果有 VLM 客户端，尝试处理图片
+        if self.vlm_client and markdown_content:
+            markdown_content = self._process_images_with_vlm(markdown_content)
+
+        return Document(content=markdown_content)
+
+    def _process_images_with_vlm(self, content: str) -> str:
+        """
+        处理 Markdown 内容中的图片，使用 VLM 分析并替换
+        """
+        # 匹配 data:image 开头的 Base64 图片
+        pattern = r'!\[([^\]]*)\]\((data:image/([^;]+);base64,([A-Za-z0-9+/=]+))\)'
+
+        def replace_image(match):
+            alt_text = match.group(1)
+            data_url = match.group(2)
+            mime_type = match.group(3) or "image/png"
+            base64_data = match.group(4)
+
+            try:
+                # 解码 Base64 图片
+                image_bytes = base64.b64decode(base64_data)
+
+                # 调用 VLM 分析图片
+                logger.info(f"Processing image with VLM: {alt_text or 'unnamed'}")
+                vlm_result = self.vlm_client.analyze_image(image_bytes, mime_type)
+
+                if vlm_result.get("success"):
+                    vlm_content = vlm_result.get("content", "")
+                    logger.info(f"VLM processed image successfully, content length: {len(vlm_content)}")
+                    # 替换为 VLM 解析的内容
+                    return f"<!-- Image: {alt_text} -->\n{vlm_content}\n<!-- End Image -->"
+                else:
+                    logger.warning(f"VLM failed for image: {vlm_result.get('error')}")
+                    return match.group(0)  # 保留原图片引用
+            except Exception as e:
+                logger.error(f"Error processing image with VLM: {e}")
+                return match.group(0)  # 保留原图片引用
+
+        return re.sub(pattern, replace_image, content)
+
+
+class MarkitdownParser(PipelineParser):
+    _parser_cls = (StdMarkitdownParser, MarkdownParser)
--- a/ai-core/parser/parser.py
+++ b/ai-core/parser/parser.py
@@ -0,0 +1,88 @@
+import logging
+from typing import Any, Optional
+
+from docreader.models.document import Document
+from docreader.parser.registry import registry
+from docreader.parser.web_parser import WebParser
+
+logger = logging.getLogger(__name__)
+
+
+class Parser:
+    """Document parser facade (lightweight version).
+
+    Converts files/URLs to markdown + image references.
+    No chunking, no storage, no OCR, no VLM.
+    """
+
+    def __init__(self):
+        self.registry = registry
+        logger.info(
+            "Parser initialized with engines: %s",
+            ", ".join(self.registry.get_engine_names()),
+        )
+
+    def parse_file(
+        self,
+        file_name: str,
+        file_type: str,
+        content: bytes,
+        parser_engine: Optional[str] = None,
+        engine_overrides: Optional[dict[str, Any]] = None,
+        vlm_config: Optional[dict[str, Any]] = None,
+    ) -> Document:
+        """Parse file content to markdown."""
+        engine = parser_engine or ""
+        overrides = engine_overrides or {}
+        logger.info(
+            "Parsing file: %s, type: %s, engine: %s, vlm_enabled: %s",
+            file_name,
+            file_type,
+            engine or "builtin",
+            vlm_config.get("enabled") if vlm_config else False,
+        )
+
+        # 如果有 VLM 配置，添加到 overrides 中
+        if vlm_config and vlm_config.get("enabled"):
+            overrides["vlm_config"] = vlm_config
+
+        cls = self.registry.get_parser_class(engine, file_type)
+        logger.info(
+            "Creating %s parser instance for %s file",
+            cls.__name__,
+            file_type,
+        )
+        parser = cls(
+            file_name=file_name,
+            file_type=file_type,
+            **overrides,
+        )
+
+        logger.info("Starting to parse file content, size: %d bytes", len(content))
+        result = parser.parse(content)
+
+        if not result.content:
+            logger.warning("Parser returned empty content for file: %s", file_name)
+        logger.info(
+            "Parsed file %s, content length=%d", file_name, len(result.content)
+        )
+        return result
+
+    def parse_url(
+        self,
+        url: str,
+        title: str,
+        parser_engine: Optional[str] = None,
+        engine_overrides: Optional[dict[str, Any]] = None,
+    ) -> Document:
+        """Parse content from a URL to markdown."""
+        logger.info("Parsing URL: %s, title: %s", url, title)
+
+        parser = WebParser(title=title)
+        logger.info("Starting to parse URL content")
+        result = parser.parse(url.encode())
+
+        if not result.content:
+            logger.warning("Parser returned empty content for url: %s", url)
+        logger.info("Parsed url %s, content length=%d", url, len(result.content))
+        return result
--- a/ai-core/parser/parser_simple.py
+++ b/ai-core/parser/parser_simple.py
@@ -0,0 +1,275 @@
+"""
+简化的 Parser - 使用 markitdown + VLM
+"""
+import logging
+import os
+import io
+import re
+import base64
+from typing import Optional, Any, Dict
+from markitdown import MarkItDown
+
+logger = logging.getLogger(__name__)
+
+
+class Document:
+    """简单的文档对象"""
+    def __init__(self, content: str = "", chunks: list = None, metadata: dict = None):
+        self.content = content
+        self.chunks = chunks or []
+        self.metadata = metadata or {}
+
+
+class VLMClient:
+    """VLM 客户端"""
+
+    def __init__(self, config: Dict[str, Any]):
+        self.provider = config.get("provider", "openai")
+        self.model = config.get("model", "gpt-4o")
+        self.api_key = config.get("api_key", "")
+        self.base_url = config.get("base_url", "")
+        self.prompt = config.get("prompt", "") or self._default_prompt()
+        logger.info(f"VLMClient initialized: provider={self.provider}, model={self.model}")
+
+    def _default_prompt(self) -> str:
+        return """请分析这个文档图片的内容，并将其转换为 Markdown 格式。
+要求：
+1. 保持原文的格式和结构
+2. 表格用 Markdown 表格格式
+3. 标题用 # ## ### 标记
+4. 尽量保留原文的所有信息"""
+
+    def analyze_image(self, content: bytes, mime_type: str) -> Dict[str, Any]:
+        """分析图片"""
+        if self.provider == "openai":
+            return self._call_openai(content, mime_type)
+        elif self.provider == "anthropic":
+            return self._call_anthropic(content, mime_type)
+        elif self.provider == "qwen":
+            return self._call_qwen(content, mime_type)
+        else:
+            return {"success": False, "error": f"Unknown provider: {self.provider}"}
+
+    def _call_openai(self, content: bytes, mime_type: str) -> Dict[str, Any]:
+        try:
+            import requests
+            url = (self.base_url or "https://api.openai.com/v1") + "/chat/completions"
+            image_b64 = base64.b64encode(content).decode("utf-8")
+
+            headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
+            payload = {
+                "model": self.model,
+                "messages": [{
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": self.prompt},
+                        {"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{image_b64}"}}
+                    ]
+                }],
+                "max_tokens": 4096
+            }
+
+            resp = requests.post(url, headers=headers, json=payload, timeout=120)
+            resp.raise_for_status()
+            result = resp.json()
+            return {"success": True, "content": result["choices"][0]["message"]["content"]}
+        except Exception as e:
+            logger.error(f"OpenAI VLM error: {e}")
+            return {"success": False, "error": str(e)}
+
+    def _call_anthropic(self, content: bytes, mime_type: str) -> Dict[str, Any]:
+        try:
+            import requests
+            url = (self.base_url or "https://api.anthropic.com/v1") + "/messages"
+            image_b64 = base64.b64encode(content).decode("utf-8")
+
+            headers = {
+                "x-api-key": self.api_key,
+                "anthropic-version": "2023-06-01",
+                "Content-Type": "application/json"
+            }
+            payload = {
+                "model": self.model,
+                "max_tokens": 4096,
+                "messages": [{
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": self.prompt},
+                        {"type": "image", "source": {"type": "base64", "media_type": mime_type, "data": image_b64}}
+                    ]
+                }]
+            }
+
+            resp = requests.post(url, headers=headers, json=payload, timeout=120)
+            resp.raise_for_status()
+            result = resp.json()
+            return {"success": True, "content": result["content"][0]["text"]}
+        except Exception as e:
+            logger.error(f"Anthropic VLM error: {e}")
+            return {"success": False, "error": str(e)}
+
+    def _call_qwen(self, content: bytes, mime_type: str) -> Dict[str, Any]:
+        try:
+            import requests
+            url = (self.base_url or "https://dashscope.aliyuncs.com/compatible-mode/v1") + "/chat/completions"
+            image_b64 = base64.b64encode(content).decode("utf-8")
+
+            headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
+            payload = {
+                "model": self.model,
+                "messages": [{
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": self.prompt},
+                        {"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{image_b64}"}}
+                    ]
+                }]
+            }
+
+            resp = requests.post(url, headers=headers, json=payload, timeout=120)
+            resp.raise_for_status()
+            result = resp.json()
+            return {"success": True, "content": result["choices"][0]["message"]["content"]}
+        except Exception as e:
+            logger.error(f"Qwen VLM error: {e}")
+            return {"success": False, "error": str(e)}
+
+
+class Parser:
+    """基于 MarkItDown + VLM 的文档解析器"""
+
+    def __init__(self):
+        self.markitdown = MarkItDown()
+        self.vlm_client: Optional[VLMClient] = None
+        logger.info("Parser initialized with MarkItDown")
+
+    def set_vlm_config(self, config: Dict[str, Any]) -> None:
+        """设置 VLM 配置"""
+        if config and config.get("enabled") and config.get("api_key"):
+            self.vlm_client = VLMClient(config)
+            logger.info(f"VLM enabled: provider={config.get('provider')}, model={config.get('model')}")
+        else:
+            self.vlm_client = None
+
+    def _should_use_vlm(self, file_name: str) -> bool:
+        """判断是否应该使用 VLM"""
+        if not self.vlm_client:
+            return False
+        ext = os.path.splitext(file_name)[1].lower()
+        # 图片和 PDF 都使用 VLM
+        image_exts = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.tiff']
+        return ext in image_exts or ext == '.pdf'
+
+    def _process_images_with_vlm(self, content: str) -> str:
+        """处理 Markdown 内容中的图片"""
+        # 匹配 data:image 开头的 Base64 图片
+        pattern = r'!\[([^\]]*)\]\((data:image/([^;]+);base64,([A-Za-z0-9+/=]+))\)'
+
+        def replace_image(match):
+            alt_text = match.group(1)
+            data_url = match.group(2)
+            mime_type = match.group(3) or "image/png"
+            base64_data = match.group(4)
+
+            try:
+                image_bytes = base64.b64decode(base64_data)
+                logger.info(f"Processing image with VLM: {alt_text or 'unnamed'}")
+                vlm_result = self.vlm_client.analyze_image(image_bytes, mime_type)
+
+                if vlm_result.get("success"):
+                    vlm_content = vlm_result.get("content", "")
+                    logger.info(f"VLM processed image, content length: {len(vlm_content)}")
+                    return f"<!-- Image: {alt_text} -->\n{vlm_content}\n<!-- End Image -->"
+                else:
+                    logger.warning(f"VLM failed: {vlm_result.get('error')}")
+                    return match.group(0)
+            except Exception as e:
+                logger.error(f"VLM error: {e}")
+                return match.group(0)
+
+        return re.sub(pattern, replace_image, content)
+
+    def _parse_with_vlm(self, content: bytes, file_name: str) -> Document:
+        """使用 VLM 直接解析整个文件"""
+        ext = os.path.splitext(file_name)[1].lower()
+        mime_types = {
+            '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg', '.png': 'image/png',
+            '.gif': 'image/gif', '.bmp': 'image/bmp', '.webp': 'image/webp',
+            '.tiff': 'image/tiff', '.pdf': 'application/pdf',
+        }
+        mime_type = mime_types.get(ext, 'image/png')
+
+        result = self.vlm_client.analyze_image(content, mime_type)
+        if result.get("success"):
+            return Document(content=result["content"], metadata={"vlm": True})
+        else:
+            logger.error(f"VLM failed: {result.get('error')}")
+            return Document(content="")
+
+    def parse_file(
+        self,
+        file_name: str,
+        file_type: str,
+        content: bytes,
+        parser_engine: Optional[str] = None,
+        engine_overrides: Optional[dict[str, Any]] = None,
+        vlm_config: Optional[dict[str, Any]] = None,
+    ) -> Document:
+        """解析文件内容"""
+        logger.info(f"Parsing file: {file_name}, type: {file_type}, vlm_config={'enabled' if vlm_config and vlm_config.get('enabled') else 'none'}")
+
+        # 设置 VLM 配置
+        if vlm_config and vlm_config.get("enabled"):
+            self.set_vlm_config(vlm_config)
+
+        # 判断是否使用 VLM 直接解析
+        if self._should_use_vlm(file_name):
+            logger.info(f"Using VLM for {file_name}")
+            return self._parse_with_vlm(content, file_name)
+
+        # 使用 MarkItDown 解析
+        try:
+            ext = file_type
+            if not ext.startswith('.'):
+                ext = '.' + ext
+
+            result = self.markitdown.convert(
+                io.BytesIO(content),
+                file_extension=ext,
+                keep_data_uris=True
+            )
+
+            markdown_content = result.text_content or ""
+
+            # 如果有 VLM，处理图片
+            if self.vlm_client and markdown_content:
+                markdown_content = self._process_images_with_vlm(markdown_content)
+
+            return Document(
+                content=markdown_content,
+                metadata=result.metadata if hasattr(result, 'metadata') else {}
+            )
+        except Exception as e:
+            logger.error(f"Parse error: {e}")
+            return Document(content="")
+
+    def parse_url(
+        self,
+        url: str,
+        title: str,
+        parser_engine: Optional[str] = None,
+        engine_overrides: Optional[dict[str, Any]] = None,
+    ) -> Document:
+        """解析 URL"""
+        logger.info(f"Parsing URL: {url}, title: {title}")
+
+        try:
+            result = self.markitdown.convert(url)
+            return Document(content=result.text_content or "")
+        except Exception as e:
+            logger.error(f"URL parse error: {e}")
+            return Document(content="")
+
+
+# 导出
+__all__ = ["Parser", "Document"]
--- a/ai-core/parser/pdf_parser.py
+++ b/ai-core/parser/pdf_parser.py
@@ -0,0 +1,15 @@
+from docreader.parser.chain_parser import FirstParser
+from docreader.parser.markitdown_parser import MarkitdownParser
+
+
+class PDFParser(FirstParser):
+    """PDF Parser using chain of responsibility pattern
+    
+    Attempts to parse PDF files using multiple parser backends in order:
+    1. MinerUParser - Primary parser for PDF documents
+    2. MarkitdownParser - Fallback parser if MinerU fails
+    
+    The first successful parser result will be returned.
+    """
+    # Parser classes to try in order (chain of responsibility pattern)
+    _parser_cls = (MarkitdownParser,)
--- a/ai-core/parser/registry.py
+++ b/ai-core/parser/registry.py
@@ -0,0 +1,160 @@
+import logging
+from typing import Any, Callable, Dict, List, Optional, Tuple, Type
+
+from docreader.parser.base_parser import BaseParser
+from docreader.parser.doc_parser import DocParser
+from docreader.parser.docx2_parser import Docx2Parser
+from docreader.parser.excel_parser import ExcelParser
+from docreader.parser.image_parser import ImageParser
+from docreader.parser.markdown_parser import MarkdownParser
+from docreader.parser.markitdown_parser import MarkitdownParser
+from docreader.parser.pdf_parser import PDFParser
+
+logger = logging.getLogger(__name__)
+
+BUILTIN_ENGINE = "builtin"
+
+
+class ParserEngineRegistry:
+    """Registry for parser engines.
+
+    Each engine maps file extensions to parser classes.
+    When a requested engine doesn't support a file type, the registry
+    falls back to the builtin engine automatically.
+    """
+
+    def __init__(self):
+        self._engines: Dict[str, Dict[str, Type[BaseParser]]] = {}
+        self._descriptions: Dict[str, str] = {}
+        self._check_available: Dict[str, Callable[..., Tuple[bool, str]]] = {}
+        self._unavailable_hint: Dict[str, str] = {}
+
+    def register(
+        self,
+        name: str,
+        file_types: Dict[str, Type[BaseParser]],
+        description: str = "",
+        check_available: Callable[..., Tuple[bool, str]] | None = None,
+        unavailable_hint: str = "",
+    ):
+        self._engines[name] = file_types
+        self._descriptions[name] = description
+        if check_available is not None:
+            self._check_available[name] = check_available
+            self._unavailable_hint[name] = unavailable_hint
+        logger.info(
+            "Registered parser engine '%s' with file types: %s",
+            name,
+            ", ".join(file_types.keys()),
+        )
+
+    def get_parser_class(self, engine: str, file_type: str) -> Type[BaseParser]:
+        """Resolve parser class for the given engine and file type.
+
+        Falls back to builtin engine when the requested engine doesn't
+        support the file type.
+        """
+        ft = file_type.lower()
+
+        if engine and engine in self._engines:
+            cls = self._engines[engine].get(ft)
+            if cls:
+                logger.info("Using engine '%s' for file type '%s'", engine, ft)
+                return cls
+            logger.info(
+                "Engine '%s' does not support '%s', falling back to builtin",
+                engine,
+                ft,
+            )
+
+        builtin = self._engines.get(BUILTIN_ENGINE, {})
+        cls = builtin.get(ft)
+        if cls:
+            return cls
+
+        raise ValueError(f"Unsupported file type: {file_type}")
+
+    def list_engines(self, overrides: Optional[Dict[str, str]] = None) -> List[Dict]:
+        """Return metadata for all registered engines, including availability.
+
+        Args:
+            overrides: tenant-level config overrides (e.g. mineru_endpoint, mineru_api_key)
+                       forwarded to each engine's check_available function.
+        """
+        result = []
+        for name, parsers in self._engines.items():
+            available = True
+            unavailable_reason = ""
+            check = self._check_available.get(name)
+            if check is not None:
+                try:
+                    available, unavailable_reason = check(overrides)
+                except Exception as e:
+                    available = False
+                    unavailable_reason = str(e) or self._unavailable_hint.get(name, "")
+            if not available and not unavailable_reason:
+                unavailable_reason = self._unavailable_hint.get(name, "不可用")
+            result.append(
+                {
+                    "name": name,
+                    "description": self._descriptions.get(name, ""),
+                    "file_types": sorted(parsers.keys()),
+                    "available": available,
+                    "unavailable_reason": unavailable_reason,
+                }
+            )
+        return result
+
+    def get_engine_names(self) -> List[str]:
+        return list(self._engines.keys())
+
+
+def _build_default_registry() -> ParserEngineRegistry:
+    """Create and populate the default registry with all known engines."""
+    reg = ParserEngineRegistry()
+
+    _image_types = {
+        ext: ImageParser for ext in ("jpg", "jpeg", "png", "gif", "bmp", "tiff", "webp")
+    }
+
+    reg.register(
+        BUILTIN_ENGINE,
+        {
+            "docx": Docx2Parser,
+            "doc": DocParser,
+            "pdf": PDFParser,
+            "md": MarkdownParser,
+            "markdown": MarkdownParser,
+            "xlsx": ExcelParser,
+            "xls": ExcelParser,
+            **_image_types,
+        },
+        description="内置解析引擎",
+    )
+
+    reg.register(
+        "markitdown",
+        {
+            "md": MarkitdownParser,
+            "markdown": MarkitdownParser,
+            "pdf": MarkitdownParser,
+            "docx": MarkitdownParser,
+            "doc": MarkitdownParser,
+            "pptx": MarkitdownParser,
+            "ppt": MarkitdownParser,
+            "xlsx": MarkitdownParser,
+            "xls": MarkitdownParser,
+            "csv": MarkitdownParser,
+        },
+        description="MarkItDown 解析引擎（微软 MarkItDown 库）",
+    )
+
+    # NOTE: Engine listing is managed by Go-side engine registry
+    # (docparser.ListAllEngines). The Python list_engines method is kept for
+    # backward compatibility with the gRPC ListEngines RPC but the Go app
+    # no longer calls it. MinerU engines are handled natively by Go.
+
+    return reg
+
+
+registry = _build_default_registry()
--- a/ai-core/parser/storage.py
+++ b/ai-core/parser/storage.py
@@ -0,0 +1,322 @@
+# -*- coding: utf-8 -*-
+import io
+import logging
+import os
+import traceback
+import uuid
+from abc import ABC, abstractmethod
+from typing import Dict, Optional
+
+from minio import Minio
+from qcloud_cos import CosConfig, CosS3Client
+
+from docreader.utils import endecode
+
+logger = logging.getLogger(__name__)
+
+
+def _cfg(storage_config: Optional[Dict], key: str, *env_keys: str, default: str = "") -> str:
+    """Read a value from storage_config dict, falling back to env vars."""
+    if storage_config:
+        v = storage_config.get(key, "")
+        if v:
+            return str(v)
+    for ek in env_keys:
+        v = os.environ.get(ek, "")
+        if v:
+            return v
+    return default
+
+
+class Storage(ABC):
+    """Abstract base class for object storage operations"""
+
+    @abstractmethod
+    def upload_file(self, file_path: str) -> str:
+        pass
+
+    @abstractmethod
+    def upload_bytes(self, content: bytes, file_ext: str = ".png") -> str:
+        pass
+
+
+class CosStorage(Storage):
+    """Tencent Cloud COS storage implementation"""
+
+    def __init__(self, storage_config: Optional[Dict] = None):
+        self.storage_config = storage_config
+        self.client, self.bucket_name, self.region, self.prefix = (
+            self._init_cos_client()
+        )
+
+    def _init_cos_client(self):
+        try:
+            sc = self.storage_config
+            secret_id = _cfg(sc, "access_key_id", "COS_SECRET_ID")
+            secret_key = _cfg(sc, "secret_access_key", "COS_SECRET_KEY")
+            region = _cfg(sc, "region", "COS_REGION")
+            bucket_name = _cfg(sc, "bucket_name", "COS_BUCKET_NAME")
+            appid = _cfg(sc, "app_id", "COS_APP_ID")
+            prefix = _cfg(sc, "path_prefix", "COS_PATH_PREFIX")
+            enable_old_domain = os.environ.get("COS_ENABLE_OLD_DOMAIN", "").lower() in ("1", "true", "yes")
+
+            if not all([secret_id, secret_key, region, bucket_name, appid]):
+                logger.error(
+                    "Incomplete COS configuration: "
+                    "secret_id=%s, region=%s, bucket=%s, appid=%s",
+                    bool(secret_id), region, bucket_name, appid,
+                )
+                return None, None, None, None
+
+            logger.info("Initializing COS client: region=%s, bucket=%s", region, bucket_name)
+            config = CosConfig(
+                Appid=appid,
+                Region=region,
+                SecretId=secret_id,
+                SecretKey=secret_key,
+                EnableOldDomain=enable_old_domain,
+            )
+            client = CosS3Client(config)
+            return client, bucket_name, region, prefix
+        except Exception as e:
+            logger.error("Failed to initialize COS client: %s", e)
+            return None, None, None, None
+
+    def _get_download_url(self, bucket_name, region, object_key):
+        return f"https://{bucket_name}.cos.{region}.myqcloud.com/{object_key}"
+
+    def upload_file(self, file_path: str) -> str:
+        try:
+            if not self.client:
+                return ""
+            file_ext = os.path.splitext(file_path)[1]
+            object_key = f"{self.prefix}/images/{uuid.uuid4().hex}{file_ext}"
+            self.client.upload_file(
+                Bucket=self.bucket_name,
+                LocalFilePath=file_path,
+                Key=object_key,
+            )
+            file_url = self._get_download_url(self.bucket_name, self.region, object_key)
+            logger.info("COS upload_file ok: %s", file_url)
+            return file_url
+        except Exception as e:
+            logger.error("COS upload_file failed: %s", e)
+            return ""
+
+    def upload_bytes(self, content: bytes, file_ext: str = ".png") -> str:
+        try:
+            if not self.client:
+                return ""
+            object_key = (
+                f"{self.prefix}/images/{uuid.uuid4().hex}{file_ext}"
+                if self.prefix
+                else f"images/{uuid.uuid4().hex}{file_ext}"
+            )
+            self.client.put_object(
+                Bucket=self.bucket_name, Body=content, Key=object_key
+            )
+            file_url = self._get_download_url(self.bucket_name, self.region, object_key)
+            logger.info("COS upload_bytes ok: %s", file_url)
+            return file_url
+        except Exception as e:
+            logger.error("COS upload_bytes failed: %s", e)
+            traceback.print_exc()
+            return ""
+
+
+class MinioStorage(Storage):
+    """MinIO storage implementation"""
+
+    def __init__(self, storage_config: Optional[Dict] = None):
+        self.storage_config = storage_config
+        self.client, self.bucket_name, self.use_ssl, self.endpoint, self.path_prefix = (
+            self._init_minio_client()
+        )
+
+    def _init_minio_client(self):
+        try:
+            sc = self.storage_config
+            access_key = _cfg(sc, "access_key_id", "MINIO_ACCESS_KEY_ID")
+            secret_key = _cfg(sc, "secret_access_key", "MINIO_SECRET_ACCESS_KEY")
+            bucket_name = _cfg(sc, "bucket_name", "MINIO_BUCKET_NAME")
+            path_prefix_raw = _cfg(sc, "path_prefix", "MINIO_PATH_PREFIX")
+            path_prefix = path_prefix_raw.strip().strip("/") if path_prefix_raw else ""
+            endpoint = _cfg(sc, "endpoint", "MINIO_ENDPOINT")
+            use_ssl = os.environ.get("MINIO_USE_SSL", "").lower() in ("1", "true", "yes")
+
+            if not all([endpoint, access_key, secret_key, bucket_name]):
+                logger.error("Incomplete MinIO configuration")
+                return None, None, None, None, None
+
+            client = Minio(
+                endpoint, access_key=access_key, secret_key=secret_key, secure=use_ssl
+            )
+
+            found = client.bucket_exists(bucket_name)
+            if not found:
+                client.make_bucket(bucket_name)
+                policy = (
+                    "{"
+                    '"Version":"2012-10-17",'
+                    '"Statement":['
+                    '{"Effect":"Allow","Principal":{"AWS":["*"]},'
+                    '"Action":["s3:GetBucketLocation","s3:ListBucket"],'
+                    '"Resource":["arn:aws:s3:::%s"]},'
+                    '{"Effect":"Allow","Principal":{"AWS":["*"]},'
+                    '"Action":["s3:GetObject"],'
+                    '"Resource":["arn:aws:s3:::%s/*"]}'
+                    "]}" % (bucket_name, bucket_name)
+                )
+                client.set_bucket_policy(bucket_name, policy)
+
+            return client, bucket_name, use_ssl, endpoint, path_prefix
+        except Exception as e:
+            logger.error("Failed to initialize MinIO client: %s", e)
+            return None, None, None, None, None
+
+    def _get_download_url(self, object_key: str):
+        public_endpoint = os.environ.get("MINIO_PUBLIC_ENDPOINT", "")
+        if public_endpoint:
+            return f"{public_endpoint}/{self.bucket_name}/{object_key}"
+        scheme = "https" if self.use_ssl else "http"
+        return f"{scheme}://{self.endpoint}/{self.bucket_name}/{object_key}"
+
+    def upload_file(self, file_path: str) -> str:
+        try:
+            if not self.client:
+                return ""
+            file_name = os.path.basename(file_path)
+            object_key = (
+                f"{self.path_prefix}/images/{uuid.uuid4().hex}{os.path.splitext(file_name)[1]}"
+                if self.path_prefix
+                else f"images/{uuid.uuid4().hex}{os.path.splitext(file_name)[1]}"
+            )
+            with open(file_path, "rb") as file_data:
+                file_size = os.path.getsize(file_path)
+                self.client.put_object(
+                    bucket_name=self.bucket_name or "",
+                    object_name=object_key,
+                    data=file_data,
+                    length=file_size,
+                    content_type="application/octet-stream",
+                )
+            file_url = self._get_download_url(object_key)
+            logger.info("MinIO upload_file ok: %s", file_url)
+            return file_url
+        except Exception as e:
+            logger.error("MinIO upload_file failed: %s", e)
+            return ""
+
+    def upload_bytes(self, content: bytes, file_ext: str = ".png") -> str:
+        try:
+            if not self.client:
+                return ""
+            object_key = (
+                f"{self.path_prefix}/images/{uuid.uuid4().hex}{file_ext}"
+                if self.path_prefix
+                else f"images/{uuid.uuid4().hex}{file_ext}"
+            )
+            self.client.put_object(
+                self.bucket_name or "",
+                object_key,
+                data=io.BytesIO(content),
+                length=len(content),
+                content_type="application/octet-stream",
+            )
+            file_url = self._get_download_url(object_key)
+            logger.info("MinIO upload_bytes ok: %s", file_url)
+            return file_url
+        except Exception as e:
+            logger.error("MinIO upload_bytes failed: %s", e)
+            traceback.print_exc()
+            return ""
+
+
+class LocalStorage(Storage):
+    """Local file system storage implementation.
+
+    Saves files under base_dir and returns web-accessible URL paths
+    (e.g. /files/images/uuid.jpg) so that the Go app can serve them.
+    """
+
+    def __init__(self, storage_config: Optional[Dict] = None):
+        sc = storage_config or {}
+        self.base_dir = (
+            sc.get("base_dir")
+            or os.environ.get("LOCAL_STORAGE_BASE_DIR", "/data/files")
+        )
+        path_prefix = (sc.get("path_prefix") or "").strip().strip("/")
+        if path_prefix:
+            self.image_dir = os.path.join(self.base_dir, path_prefix, "images")
+        else:
+            self.image_dir = os.path.join(self.base_dir, "images")
+        self.url_prefix = (
+            sc.get("url_prefix")
+            or os.environ.get("LOCAL_STORAGE_URL_PREFIX", "/files")
+        )
+        os.makedirs(self.image_dir, exist_ok=True)
+
+    def _to_url(self, fpath: str) -> str:
+        if self.url_prefix:
+            rel = os.path.relpath(fpath, self.base_dir)
+            return f"{self.url_prefix}/{rel}"
+        return fpath
+
+    def upload_file(self, file_path: str) -> str:
+        return file_path
+
+    def upload_bytes(self, content: bytes, file_ext: str = ".png") -> str:
+        fpath = os.path.join(self.image_dir, f"{uuid.uuid4()}{file_ext}")
+        with open(fpath, "wb") as f:
+            f.write(content)
+        url = self._to_url(fpath)
+        logger.info("Local storage saved: %s -> %s", fpath, url)
+        return url
+
+
+class Base64Storage(Storage):
+    def upload_file(self, file_path: str) -> str:
+        return file_path
+
+    def upload_bytes(self, content: bytes, file_ext: str = ".png") -> str:
+        file_ext = file_ext.lstrip(".")
+        return f"data:image/{file_ext};base64,{endecode.decode_image(content)}"
+
+
+class DummyStorage(Storage):
+    """Dummy storage — all uploads return empty string."""
+
+    def upload_file(self, file_path: str) -> str:
+        return ""
+
+    def upload_bytes(self, content: bytes, file_ext: str = ".png") -> str:
+        return ""
+
+
+def create_storage(storage_config: Optional[Dict[str, str]] = None) -> Storage:
+    """Create a storage instance based on storage_config dict.
+
+    The ``provider`` key in storage_config determines the backend:
+      minio, cos, local, base64.
+    Falls back to STORAGE_TYPE env var, then ``local``.
+    """
+    storage_type = ""
+    if storage_config:
+        provider = str(storage_config.get("provider", "")).lower().strip()
+        if provider and provider not in ("unspecified", "storage_provider_unspecified"):
+            storage_type = provider
+
+    if not storage_type:
+        storage_type = os.environ.get("STORAGE_TYPE", "local").lower().strip()
+
+    logger.info("Creating %s storage instance", storage_type)
+
+    if storage_type == "minio":
+        return MinioStorage(storage_config)
+    elif storage_type == "cos":
+        return CosStorage(storage_config)
+    elif storage_type == "local":
+        return LocalStorage(storage_config)
+    elif storage_type == "base64":
+        return Base64Storage()
+    return DummyStorage()
--- a/ai-core/parser/web_parser.py
+++ b/ai-core/parser/web_parser.py
@@ -0,0 +1,141 @@
+import asyncio
+import logging
+
+from playwright.async_api import async_playwright
+from trafilatura import extract
+
+from docreader.config import CONFIG
+from docreader.models.document import Document
+from docreader.parser.base_parser import BaseParser
+from docreader.parser.chain_parser import PipelineParser
+from docreader.parser.markdown_parser import MarkdownParser
+from docreader.utils import endecode
+
+logger = logging.getLogger(__name__)
+
+
+class StdWebParser(BaseParser):
+    """Standard web page parser using Playwright and Trafilatura.
+
+    This parser scrapes web pages using Playwright's WebKit browser and extracts
+    clean content using Trafilatura library. It supports proxy configuration and
+    converts HTML content to markdown format.
+    """
+
+    def __init__(self, title: str, **kwargs):
+        """Initialize the web parser.
+
+        Args:
+            title: Title of the web page to be used as file name
+            **kwargs: Additional arguments passed to BaseParser
+        """
+        self.title = title
+        # Get proxy configuration from config if available
+        self.proxy = CONFIG.external_https_proxy
+        super().__init__(file_name=title, **kwargs)
+        logger.info(f"Initialized WebParser with title: {title}")
+
+    async def scrape(self, url: str) -> str:
+        """Scrape web page content using Playwright.
+
+        Args:
+            url: The URL of the web page to scrape
+
+        Returns:
+            HTML content of the web page as string, empty string on error
+        """
+        logger.info(f"Starting web page scraping for URL: {url}")
+        try:
+            async with async_playwright() as p:
+                kwargs = {}
+                # Configure proxy if available
+                if self.proxy:
+                    kwargs["proxy"] = {"server": self.proxy}
+                logger.info("Launching WebKit browser")
+                browser = await p.webkit.launch(**kwargs)
+                page = await browser.new_page()
+
+                logger.info(f"Navigating to URL: {url}")
+                try:
+                    # Navigate to URL with 30 second timeout
+                    await page.goto(url, timeout=30000)
+                    logger.info("Initial page load complete")
+                except Exception as e:
+                    logger.error(f"Error navigating to URL: {str(e)}")
+                    await browser.close()
+                    return ""
+
+                logger.info("Retrieving page HTML content")
+                # Get the full HTML content of the page
+                content = await page.content()
+                logger.info(f"Retrieved {len(content)} bytes of HTML content")
+
+                await browser.close()
+                logger.info("Browser closed")
+
+            # Return raw HTML content for further processing
+            logger.info("Successfully retrieved HTML content")
+            return content
+
+        except Exception as e:
+            logger.error(f"Failed to scrape web page: {str(e)}")
+            # Return empty string on error
+            return ""
+
+    def parse_into_text(self, content: bytes) -> Document:
+        """Parse web page content into a Document object.
+
+        Args:
+            content: URL encoded as bytes
+
+        Returns:
+            Document object containing the parsed markdown content
+        """
+        # Decode bytes to get the URL string
+        url = endecode.decode_bytes(content)
+
+        logger.info(f"Scraping web page: {url}")
+        # Run async scraping in sync context
+        chtml = asyncio.run(self.scrape(url))
+        # Extract clean content from HTML using Trafilatura
+        # Convert to markdown format with metadata, images, tables, and links
+        md_text = extract(
+            chtml,
+            output_format="markdown",
+            with_metadata=True,
+            include_images=True,
+            include_tables=True,
+            include_links=True,
+        )
+        if not md_text:
+            logger.error("Failed to parse web page")
+            return Document(content=f"Error parsing web page: {url}")
+        return Document(content=md_text)
+
+
+class WebParser(PipelineParser):
+    """Web parser using pipeline pattern.
+
+    This parser chains StdWebParser (for web scraping and HTML to markdown conversion)
+    with MarkdownParser (for markdown processing). The pipeline processes content
+    sequentially through both parsers.
+    """
+
+    # Parser classes to be executed in sequence
+    _parser_cls = (StdWebParser, MarkdownParser)
+
+
+if __name__ == "__main__":
+    # Configure logging for debugging
+    logging.basicConfig(level=logging.DEBUG)
+    logger.setLevel(logging.DEBUG)
+
+    # Example URL to scrape
+    url = "https://cloud.tencent.com/document/product/457/6759"
+
+    # Create parser instance and parse the web page
+    parser = WebParser(title="")
+    cc = parser.parse_into_text(url.encode())
+    # Save the parsed markdown content to file
+    with open("./tencent.md", "w") as f:
+        f.write(cc.content)
--- a/ai-core/requirements.txt
+++ b/ai-core/requirements.txt
@@ -0,0 +1,16 @@
+# AI-Core Document Parser
+
+# gRPC 框架
+grpcio>=1.60.0
+grpcio-tools>=1.60.0
+grpcio-reflection>=1.60.0
+protobuf>=4.25.0
+
+# HTTP 请求
+requests>=2.31.0
+
+# 配置文件解析
+pyyaml>=6.0
+
+# 文档解析
+markitdown[pdf,docx,pptx,xlsx,all]>=0.0.1
--- a/ai-core/service/grpc_server.py
+++ b/ai-core/service/grpc_server.py
@@ -0,0 +1,208 @@
+"""
+gRPC Server for Document Parser
+"""
+import logging
+import requests
+from concurrent import futures
+import grpc
+from grpc_reflection.v1alpha import reflection
+import sys
+import os
+import io
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "proto"))
+
+from parser import Parser
+
+logger = logging.getLogger(__name__)
+
+# 导入 proto 生成的文件
+try:
+    import document_parser_pb2
+    import document_parser_pb2_grpc
+    PROTO_AVAILABLE = True
+except ImportError:
+    logger.warning("Proto files not found, please run: python generate_grpc.py")
+    PROTO_AVAILABLE = False
+
+
+class DocumentParserServicer:
+    """gRPC 服务实现"""
+
+    def __init__(self, max_workers: int = 10):
+        self.parser = Parser()
+        self.max_workers = max_workers
+        logger.info("DocumentParserServicer initialized")
+
+    def ParseDocument(self, request, context):
+        """解析文档"""
+        if not PROTO_AVAILABLE:
+            return {"success": False, "message": "Proto not available"}
+
+        try:
+            logger.info(
+                "ParseDocument request: file_url=%s, file_name=%s",
+                request.file_url,
+                request.file_name,
+            )
+
+            file_url = request.file_url
+            file_name = request.file_name
+
+            if not file_url:
+                return document_parser_pb2.ParseResponse(
+                    success=False,
+                    content="",
+                    message="file_url is required",
+                    content_length=0,
+                )
+
+            if not file_name:
+                return document_parser_pb2.ParseResponse(
+                    success=False,
+                    content="",
+                    message="file_name is required",
+                    content_length=0,
+                )
+
+            # 提取 VLM 配置
+            vlm_config = None
+            if hasattr(request, 'vlm_config') and request.vlm_config:
+                vlm_cfg = request.vlm_config
+                if vlm_cfg.enabled:
+                    vlm_config = {
+                        "enabled": vlm_cfg.enabled,
+                        "provider": vlm_cfg.provider,
+                        "model": vlm_cfg.model,
+                        "api_key": vlm_cfg.api_key,
+                        "base_url": vlm_cfg.base_url,
+                        "prompt": vlm_cfg.prompt,
+                    }
+                    logger.info(f"VLM config: provider={vlm_cfg.provider}, model={vlm_cfg.model}")
+
+            # 下载文件
+            logger.info("Downloading file from URL: %s", file_url)
+            try:
+                response = requests.get(
+                    file_url,
+                    timeout=60,
+                    headers={"User-Agent": "DocParser/1.0"},
+                )
+                response.raise_for_status()
+                content = response.content
+                logger.info("Downloaded %d bytes", len(content))
+            except requests.RequestException as e:
+                logger.error("Failed to download file: %s", str(e))
+                return document_parser_pb2.ParseResponse(
+                    success=False,
+                    content="",
+                    message=f"Failed to download file: {str(e)}",
+                    content_length=0,
+                )
+
+            # 解析
+            logger.info("Parsing file")
+            file_type = os.path.splitext(file_name)[1][1:]  # 去掉点的扩展名
+
+            result = self.parser.parse_file(
+                file_name=file_name,
+                file_type=file_type,
+                content=content,
+                vlm_config=vlm_config,
+            )
+
+            if not result.content:
+                return document_parser_pb2.ParseResponse(
+                    success=False,
+                    content="",
+                    message="Parse failed or empty content",
+                    content_length=0,
+                )
+
+            markdown_content = result.content
+            logger.info("Parse successful: content_length=%d", len(markdown_content))
+
+            return document_parser_pb2.ParseResponse(
+                success=True,
+                content=markdown_content,
+                message="Parse successful",
+                content_length=len(markdown_content),
+                file_type=file_type or "auto",
+                parser_engine="markitdown",
+            )
+
+        except Exception as e:
+            logger.error("ParseDocument error: %s", str(e), exc_info=True)
+            return document_parser_pb2.ParseResponse(
+                success=False,
+                content="",
+                message=f"Parse error: {str(e)}",
+                content_length=0,
+            )
+
+    def GetSupportedFormats(self, request, context):
+        """获取支持的格式"""
+        if not PROTO_AVAILABLE:
+            return None
+
+        try:
+            file_types = [
+                "pdf", "docx", "doc", "pptx", "ppt",
+                "xlsx", "xls", "csv",
+                "md", "markdown",
+                "jpg", "jpeg", "png", "gif", "bmp", "tiff", "webp",
+                "html", "htm", "txt",
+            ]
+            return document_parser_pb2.SupportedFormatsResponse(
+                file_types=file_types,
+            )
+        except Exception as e:
+            logger.error("GetSupportedFormats error: %s", str(e))
+            return None
+
+    def GetEngines(self, request, context):
+        """获取解析引擎"""
+        if not PROTO_AVAILABLE:
+            return None
+
+        try:
+            engines = [
+                document_parser_pb2.EngineInfo(
+                    name="markitdown",
+                    description="MarkItDown parser - supports various document formats",
+                    supported_file_types=["pdf", "docx", "pptx", "xlsx", "md", "html", "txt"],
+                    available=True,
+                )
+            ]
+            return document_parser_pb2.EnginesResponse(engines=engines)
+        except Exception as e:
+            logger.error("GetEngines error: %s", str(e))
+            return None
+
+
+def serve(port: int = 50051, max_workers: int = 10):
+    """启动 gRPC 服务"""
+    if not PROTO_AVAILABLE:
+        logger.error("Proto files not available, cannot start server")
+        return
+
+    server = grpc.server(futures.ThreadPoolExecutor(max_workers=max_workers))
+    servicer = DocumentParserServicer(max_workers=max_workers)
+
+    # 注册服务
+    document_parser_pb2_grpc.add_DocumentParserServicer_to_server(
+        servicer, server
+    )
+
+    # 启用反射
+    reflection.enable_server_reflection(
+        [document_parser_pb2.DESCRIPTOR.services_by_name['DocumentParser']],
+        server
+    )
+
+    server.add_insecure_port(f"0.0.0.0:{port}")
+    server.start()
+    logger.info(f"DocumentParser gRPC server started on port {port}")
+    logger.info("gRPC reflection enabled")
+    server.wait_for_termination()
--- a/screenshots/agent管理.png
+++ b/screenshots/agent管理.png
--- a/screenshots/对话界面.png
+++ b/screenshots/对话界面.png
--- a/server/document_parser.pb.go
+++ b/server/document_parser.pb.go
@@ -28,8 +28,10 @@ type ParseRequest struct {
 	FileType        string                 `protobuf:"bytes,3,opt,name=file_type,json=fileType,proto3" json:"file_type,omitempty"`
 	ParserEngine    string                 `protobuf:"bytes,4,opt,name=parser_engine,json=parserEngine,proto3" json:"parser_engine,omitempty"`
 	EngineOverrides map[string]string      `protobuf:"bytes,5,rep,name=engine_overrides,json=engineOverrides,proto3" json:"engine_overrides,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"`
-	unknownFields   protoimpl.UnknownFields
-	sizeCache       protoimpl.SizeCache
+	// VLM 配置（可选）
+	VlmConfig     *VLMConfig `protobuf:"bytes,6,opt,name=vlm_config,json=vlmConfig,proto3" json:"vlm_config,omitempty"`
+	unknownFields protoimpl.UnknownFields
+	sizeCache     protoimpl.SizeCache
 }

 func (x *ParseRequest) Reset() {
@@ -97,6 +99,97 @@ func (x *ParseRequest) GetEngineOverrides() map[string]string {
 	return nil
 }

+func (x *ParseRequest) GetVlmConfig() *VLMConfig {
+	if x != nil {
+		return x.VlmConfig
+	}
+	return nil
+}
+
+type VLMConfig struct {
+	state         protoimpl.MessageState `protogen:"open.v1"`
+	Enabled       bool                   `protobuf:"varint,1,opt,name=enabled,proto3" json:"enabled,omitempty"`               // 是否启用 VLM
+	Provider      string                 `protobuf:"bytes,2,opt,name=provider,proto3" json:"provider,omitempty"`              // VLM 提供商: openai, anthropic, local 等
+	Model         string                 `protobuf:"bytes,3,opt,name=model,proto3" json:"model,omitempty"`                    // 模型名称
+	ApiKey        string                 `protobuf:"bytes,4,opt,name=api_key,json=apiKey,proto3" json:"api_key,omitempty"`    // API Key
+	BaseUrl       string                 `protobuf:"bytes,5,opt,name=base_url,json=baseUrl,proto3" json:"base_url,omitempty"` // 自定义 API 地址
+	Prompt        string                 `protobuf:"bytes,6,opt,name=prompt,proto3" json:"prompt,omitempty"`                  // 自定义提示词
+	unknownFields protoimpl.UnknownFields
+	sizeCache     protoimpl.SizeCache
+}
+
+func (x *VLMConfig) Reset() {
+	*x = VLMConfig{}
+	mi := &file_document_parser_proto_msgTypes[1]
+	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+	ms.StoreMessageInfo(mi)
+}
+
+func (x *VLMConfig) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*VLMConfig) ProtoMessage() {}
+
+func (x *VLMConfig) ProtoReflect() protoreflect.Message {
+	mi := &file_document_parser_proto_msgTypes[1]
+	if x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use VLMConfig.ProtoReflect.Descriptor instead.
+func (*VLMConfig) Descriptor() ([]byte, []int) {
+	return file_document_parser_proto_rawDescGZIP(), []int{1}
+}
+
+func (x *VLMConfig) GetEnabled() bool {
+	if x != nil {
+		return x.Enabled
+	}
+	return false
+}
+
+func (x *VLMConfig) GetProvider() string {
+	if x != nil {
+		return x.Provider
+	}
+	return ""
+}
+
+func (x *VLMConfig) GetModel() string {
+	if x != nil {
+		return x.Model
+	}
+	return ""
+}
+
+func (x *VLMConfig) GetApiKey() string {
+	if x != nil {
+		return x.ApiKey
+	}
+	return ""
+}
+
+func (x *VLMConfig) GetBaseUrl() string {
+	if x != nil {
+		return x.BaseUrl
+	}
+	return ""
+}
+
+func (x *VLMConfig) GetPrompt() string {
+	if x != nil {
+		return x.Prompt
+	}
+	return ""
+}
+
 type ParseResponse struct {
 	state         protoimpl.MessageState `protogen:"open.v1"`
 	Success       bool                   `protobuf:"varint,1,opt,name=success,proto3" json:"success,omitempty"`
@@ -111,7 +204,7 @@ type ParseResponse struct {

 func (x *ParseResponse) Reset() {
 	*x = ParseResponse{}
-	mi := &file_document_parser_proto_msgTypes[1]
+	mi := &file_document_parser_proto_msgTypes[2]
 	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
 	ms.StoreMessageInfo(mi)
 }
@@ -123,7 +216,7 @@ func (x *ParseResponse) String() string {
 func (*ParseResponse) ProtoMessage() {}

 func (x *ParseResponse) ProtoReflect() protoreflect.Message {
-	mi := &file_document_parser_proto_msgTypes[1]
+	mi := &file_document_parser_proto_msgTypes[2]
 	if x != nil {
 		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
 		if ms.LoadMessageInfo() == nil {
@@ -136,7 +229,7 @@ func (x *ParseResponse) ProtoReflect() protoreflect.Message {

 // Deprecated: Use ParseResponse.ProtoReflect.Descriptor instead.
 func (*ParseResponse) Descriptor() ([]byte, []int) {
-	return file_document_parser_proto_rawDescGZIP(), []int{1}
+	return file_document_parser_proto_rawDescGZIP(), []int{2}
 }

 func (x *ParseResponse) GetSuccess() bool {
@@ -189,7 +282,7 @@ type Empty struct {

 func (x *Empty) Reset() {
 	*x = Empty{}
-	mi := &file_document_parser_proto_msgTypes[2]
+	mi := &file_document_parser_proto_msgTypes[3]
 	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
 	ms.StoreMessageInfo(mi)
 }
@@ -201,7 +294,7 @@ func (x *Empty) String() string {
 func (*Empty) ProtoMessage() {}

 func (x *Empty) ProtoReflect() protoreflect.Message {
-	mi := &file_document_parser_proto_msgTypes[2]
+	mi := &file_document_parser_proto_msgTypes[3]
 	if x != nil {
 		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
 		if ms.LoadMessageInfo() == nil {
@@ -214,7 +307,7 @@ func (x *Empty) ProtoReflect() protoreflect.Message {

 // Deprecated: Use Empty.ProtoReflect.Descriptor instead.
 func (*Empty) Descriptor() ([]byte, []int) {
-	return file_document_parser_proto_rawDescGZIP(), []int{2}
+	return file_document_parser_proto_rawDescGZIP(), []int{3}
 }

 type SupportedFormatsResponse struct {
@@ -227,7 +320,7 @@ type SupportedFormatsResponse struct {

 func (x *SupportedFormatsResponse) Reset() {
 	*x = SupportedFormatsResponse{}
-	mi := &file_document_parser_proto_msgTypes[3]
+	mi := &file_document_parser_proto_msgTypes[4]
 	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
 	ms.StoreMessageInfo(mi)
 }
@@ -239,7 +332,7 @@ func (x *SupportedFormatsResponse) String() string {
 func (*SupportedFormatsResponse) ProtoMessage() {}

 func (x *SupportedFormatsResponse) ProtoReflect() protoreflect.Message {
-	mi := &file_document_parser_proto_msgTypes[3]
+	mi := &file_document_parser_proto_msgTypes[4]
 	if x != nil {
 		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
 		if ms.LoadMessageInfo() == nil {
@@ -252,7 +345,7 @@ func (x *SupportedFormatsResponse) ProtoReflect() protoreflect.Message {

 // Deprecated: Use SupportedFormatsResponse.ProtoReflect.Descriptor instead.
 func (*SupportedFormatsResponse) Descriptor() ([]byte, []int) {
-	return file_document_parser_proto_rawDescGZIP(), []int{3}
+	return file_document_parser_proto_rawDescGZIP(), []int{4}
 }

 func (x *SupportedFormatsResponse) GetFileTypes() []string {
@@ -278,7 +371,7 @@ type EnginesResponse struct {

 func (x *EnginesResponse) Reset() {
 	*x = EnginesResponse{}
-	mi := &file_document_parser_proto_msgTypes[4]
+	mi := &file_document_parser_proto_msgTypes[5]
 	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
 	ms.StoreMessageInfo(mi)
 }
@@ -290,7 +383,7 @@ func (x *EnginesResponse) String() string {
 func (*EnginesResponse) ProtoMessage() {}

 func (x *EnginesResponse) ProtoReflect() protoreflect.Message {
-	mi := &file_document_parser_proto_msgTypes[4]
+	mi := &file_document_parser_proto_msgTypes[5]
 	if x != nil {
 		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
 		if ms.LoadMessageInfo() == nil {
@@ -303,7 +396,7 @@ func (x *EnginesResponse) ProtoReflect() protoreflect.Message {

 // Deprecated: Use EnginesResponse.ProtoReflect.Descriptor instead.
 func (*EnginesResponse) Descriptor() ([]byte, []int) {
-	return file_document_parser_proto_rawDescGZIP(), []int{4}
+	return file_document_parser_proto_rawDescGZIP(), []int{5}
 }

 func (x *EnginesResponse) GetEngines() []*EngineInfo {
@@ -326,7 +419,7 @@ type EngineInfo struct {

 func (x *EngineInfo) Reset() {
 	*x = EngineInfo{}
-	mi := &file_document_parser_proto_msgTypes[5]
+	mi := &file_document_parser_proto_msgTypes[6]
 	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
 	ms.StoreMessageInfo(mi)
 }
@@ -338,7 +431,7 @@ func (x *EngineInfo) String() string {
 func (*EngineInfo) ProtoMessage() {}

 func (x *EngineInfo) ProtoReflect() protoreflect.Message {
-	mi := &file_document_parser_proto_msgTypes[5]
+	mi := &file_document_parser_proto_msgTypes[6]
 	if x != nil {
 		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
 		if ms.LoadMessageInfo() == nil {
@@ -351,7 +444,7 @@ func (x *EngineInfo) ProtoReflect() protoreflect.Message {

 // Deprecated: Use EngineInfo.ProtoReflect.Descriptor instead.
 func (*EngineInfo) Descriptor() ([]byte, []int) {
-	return file_document_parser_proto_rawDescGZIP(), []int{5}
+	return file_document_parser_proto_rawDescGZIP(), []int{6}
 }

 func (x *EngineInfo) GetName() string {
@@ -393,16 +486,25 @@ var File_document_parser_proto protoreflect.FileDescriptor

 const file_document_parser_proto_rawDesc = "" +
 	"\n" +
-	"\x15document_parser.proto\x12\tdocparser\"\xa5\x02\n" +
+	"\x15document_parser.proto\x12\tdocparser\"\xda\x02\n" +
 	"\fParseRequest\x12\x19\n" +
 	"\bfile_url\x18\x01 \x01(\tR\afileUrl\x12\x1b\n" +
 	"\tfile_name\x18\x02 \x01(\tR\bfileName\x12\x1b\n" +
 	"\tfile_type\x18\x03 \x01(\tR\bfileType\x12#\n" +
 	"\rparser_engine\x18\x04 \x01(\tR\fparserEngine\x12W\n" +
-	"\x10engine_overrides\x18\x05 \x03(\v2,.docparser.ParseRequest.EngineOverridesEntryR\x0fengineOverrides\x1aB\n" +
+	"\x10engine_overrides\x18\x05 \x03(\v2,.docparser.ParseRequest.EngineOverridesEntryR\x0fengineOverrides\x123\n" +
+	"\n" +
+	"vlm_config\x18\x06 \x01(\v2\x14.docparser.VLMConfigR\tvlmConfig\x1aB\n" +
 	"\x14EngineOverridesEntry\x12\x10\n" +
 	"\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n" +
-	"\x05value\x18\x02 \x01(\tR\x05value:\x028\x01\"\xc6\x01\n" +
+	"\x05value\x18\x02 \x01(\tR\x05value:\x028\x01\"\xa3\x01\n" +
+	"\tVLMConfig\x12\x18\n" +
+	"\aenabled\x18\x01 \x01(\bR\aenabled\x12\x1a\n" +
+	"\bprovider\x18\x02 \x01(\tR\bprovider\x12\x14\n" +
+	"\x05model\x18\x03 \x01(\tR\x05model\x12\x17\n" +
+	"\aapi_key\x18\x04 \x01(\tR\x06apiKey\x12\x19\n" +
+	"\bbase_url\x18\x05 \x01(\tR\abaseUrl\x12\x16\n" +
+	"\x06prompt\x18\x06 \x01(\tR\x06prompt\"\xc6\x01\n" +
 	"\rParseResponse\x12\x18\n" +
 	"\asuccess\x18\x01 \x01(\bR\asuccess\x12\x18\n" +
 	"\acontent\x18\x02 \x01(\tR\acontent\x12\x18\n" +
@@ -445,32 +547,34 @@ func file_document_parser_proto_rawDescGZIP() []byte {
 	return file_document_parser_proto_rawDescData
 }

-var file_document_parser_proto_msgTypes = make([]protoimpl.MessageInfo, 8)
+var file_document_parser_proto_msgTypes = make([]protoimpl.MessageInfo, 9)
 var file_document_parser_proto_goTypes = []any{
 	(*ParseRequest)(nil),             // 0: docparser.ParseRequest
-	(*ParseResponse)(nil),            // 1: docparser.ParseResponse
-	(*Empty)(nil),                    // 2: docparser.Empty
-	(*SupportedFormatsResponse)(nil), // 3: docparser.SupportedFormatsResponse
-	(*EnginesResponse)(nil),          // 4: docparser.EnginesResponse
-	(*EngineInfo)(nil),               // 5: docparser.EngineInfo
-	nil,                              // 6: docparser.ParseRequest.EngineOverridesEntry
-	nil,                              // 7: docparser.SupportedFormatsResponse.FileTypeDescriptionsEntry
+	(*VLMConfig)(nil),                // 1: docparser.VLMConfig
+	(*ParseResponse)(nil),            // 2: docparser.ParseResponse
+	(*Empty)(nil),                    // 3: docparser.Empty
+	(*SupportedFormatsResponse)(nil), // 4: docparser.SupportedFormatsResponse
+	(*EnginesResponse)(nil),          // 5: docparser.EnginesResponse
+	(*EngineInfo)(nil),               // 6: docparser.EngineInfo
+	nil,                              // 7: docparser.ParseRequest.EngineOverridesEntry
+	nil,                              // 8: docparser.SupportedFormatsResponse.FileTypeDescriptionsEntry
 }
 var file_document_parser_proto_depIdxs = []int32{
-	6, // 0: docparser.ParseRequest.engine_overrides:type_name -> docparser.ParseRequest.EngineOverridesEntry
-	7, // 1: docparser.SupportedFormatsResponse.file_type_descriptions:type_name -> docparser.SupportedFormatsResponse.FileTypeDescriptionsEntry
-	5, // 2: docparser.EnginesResponse.engines:type_name -> docparser.EngineInfo
-	0, // 3: docparser.DocumentParser.ParseDocument:input_type -> docparser.ParseRequest
-	2, // 4: docparser.DocumentParser.GetSupportedFormats:input_type -> docparser.Empty
-	2, // 5: docparser.DocumentParser.GetEngines:input_type -> docparser.Empty
-	1, // 6: docparser.DocumentParser.ParseDocument:output_type -> docparser.ParseResponse
-	3, // 7: docparser.DocumentParser.GetSupportedFormats:output_type -> docparser.SupportedFormatsResponse
-	4, // 8: docparser.DocumentParser.GetEngines:output_type -> docparser.EnginesResponse
-	6, // [6:9] is the sub-list for method output_type
-	3, // [3:6] is the sub-list for method input_type
-	3, // [3:3] is the sub-list for extension type_name
-	3, // [3:3] is the sub-list for extension extendee
-	0, // [0:3] is the sub-list for field type_name
+	7, // 0: docparser.ParseRequest.engine_overrides:type_name -> docparser.ParseRequest.EngineOverridesEntry
+	1, // 1: docparser.ParseRequest.vlm_config:type_name -> docparser.VLMConfig
+	8, // 2: docparser.SupportedFormatsResponse.file_type_descriptions:type_name -> docparser.SupportedFormatsResponse.FileTypeDescriptionsEntry
+	6, // 3: docparser.EnginesResponse.engines:type_name -> docparser.EngineInfo
+	0, // 4: docparser.DocumentParser.ParseDocument:input_type -> docparser.ParseRequest
+	3, // 5: docparser.DocumentParser.GetSupportedFormats:input_type -> docparser.Empty
+	3, // 6: docparser.DocumentParser.GetEngines:input_type -> docparser.Empty
+	2, // 7: docparser.DocumentParser.ParseDocument:output_type -> docparser.ParseResponse
+	4, // 8: docparser.DocumentParser.GetSupportedFormats:output_type -> docparser.SupportedFormatsResponse
+	5, // 9: docparser.DocumentParser.GetEngines:output_type -> docparser.EnginesResponse
+	7, // [7:10] is the sub-list for method output_type
+	4, // [4:7] is the sub-list for method input_type
+	4, // [4:4] is the sub-list for extension type_name
+	4, // [4:4] is the sub-list for extension extendee
+	0, // [0:4] is the sub-list for field type_name
 }

 func init() { file_document_parser_proto_init() }
@@ -484,7 +588,7 @@ func file_document_parser_proto_init() {
 			GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
 			RawDescriptor: unsafe.Slice(unsafe.StringData(file_document_parser_proto_rawDesc), len(file_document_parser_proto_rawDesc)),
 			NumEnums:      0,
-			NumMessages:   8,
+			NumMessages:   9,
 			NumExtensions: 0,
 			NumServices:   1,
 		},
--- a/server/internal/model/knowledge_info.go
+++ b/server/internal/model/knowledge_info.go
@@ -10,9 +10,17 @@ import (
 // ParsingConfig 解析配置
 type ParsingConfig struct {
 	Engine     string `json:"engine"`      // markitdown / docling
-	DoclingURL string `json:"docling_url"`  // Docling 服务 URL
-	EnablePDF  bool   `json:"enable_pdf"`   // 是否启用 PDF 解析
+	DoclingURL string `json:"docling_url"` // Docling 服务 URL
+	EnablePDF  bool   `json:"enable_pdf"`  // 是否启用 PDF 解析
 	Pandoc     bool   `json:"pandoc"`      // 是否启用 Pandoc
+
+	// VLM 配置（用于图片 OCR 等）
+	VLMEnabled  bool   `json:"vlm_enabled"`   // 是否启用 VLM
+	VLMProvider string `json:"vlm_provider"`  // VLM 提供商: openai, anthropic, local 等
+	VLMModel    string `json:"vlm_model"`    // 模型名称
+	VLMAPIKey   string `json:"vlm_api_key"`  // API Key
+	VLMBaseURL  string `json:"vlm_base_url"` // 自定义 API 地址
+	VLMPrompt   string `json:"vlm_prompt"`   // 自定义提示词
 }

 // Scan 实现 sql.Scanner 接口
--- a/server/internal/service/document_parser_client.go
+++ b/server/internal/service/document_parser_client.go
@@ -27,6 +27,16 @@ type ParseResult struct {
 	ParserEngine  string
 }

+// VLMConfig VLM 模型配置
+type VLMConfig struct {
+	Enabled  bool
+	Provider string // openai, anthropic, local 等
+	Model    string
+	APIKey   string
+	BaseURL  string
+	Prompt   string
+}
+
 // NewAICoreClient 创建 AI-Core 客户端
 func NewAICoreClient(address string) (*AICoreClient, error) {
 	return &AICoreClient{address: address}, nil
@@ -56,7 +66,8 @@ func (c *AICoreClient) Close() {
 }

 // ParseDocument 解析文档 - 使用生成的 protobuf 代码
-func (c *AICoreClient) ParseDocument(fileURL, fileName, fileType string) (*ParseResult, error) {
+// vlmConfig 可选，如果不使用 VLM 传 nil
+func (c *AICoreClient) ParseDocument(fileURL, fileName, fileType string, vlmConfig *VLMConfig) (*ParseResult, error) {
 	if c.conn == nil {
 		if err := c.Connect(); err != nil {
 			return nil, err
@@ -72,6 +83,18 @@ func (c *AICoreClient) ParseDocument(fileURL, fileName, fileType string) (*Parse
 		FileType: fileType,
 	}

+	// 如果提供了 VLM 配置，添加到请求中
+	if vlmConfig != nil {
+		req.VlmConfig = &docparser.VLMConfig{
+			Enabled:  vlmConfig.Enabled,
+			Provider: vlmConfig.Provider,
+			Model:    vlmConfig.Model,
+			ApiKey:   vlmConfig.APIKey,
+			BaseUrl:  vlmConfig.BaseURL,
+			Prompt:   vlmConfig.Prompt,
+		}
+	}
+
 	ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
 	defer cancel()

--- a/server/internal/service/knowledge_service.go
+++ b/server/internal/service/knowledge_service.go
@@ -3,6 +3,7 @@ package service
 import (
 	"bytes"
 	"encoding/json"
+	"io"
 	"log"
 	"mime/multipart"
 	"net/http"
@@ -19,8 +20,15 @@ import (
 var knowledgeDebugLog *log.Logger

 func init() {
-	debugFile, _ := os.OpenFile("logs/debug.log", os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
-	knowledgeDebugLog = log.New(debugFile, "", log.Ldate|log.Ltime)
+	// 确保 logs 目录存在
+	os.MkdirAll("logs", 0755)
+	debugFile, err := os.OpenFile("logs/debug.log", os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
+	if err != nil {
+		// 如果文件打开失败，使用 discard 避免输出到控制台
+		knowledgeDebugLog = log.New(io.Discard, "", log.Ldate|log.Ltime)
+	} else {
+		knowledgeDebugLog = log.New(debugFile, "", log.Ldate|log.Ltime)
+	}
 }

 type KnowledgeService struct {
@@ -133,10 +141,36 @@ func (s *KnowledgeService) Update(id string, req model.UpdateKnowledgeRequest) e

 // Delete 删除知识库
 func (s *KnowledgeService) Delete(id string) error {
-	// 先删除关联的文档
+	// 获取知识库信息
+	kb, err := s.repo.FindByID(id)
+	if err != nil {
+		return err
+	}
+
+	// 获取知识库下所有文档
+	docs, err := s.repo.FindDocumentsByKBID(id, "")
+	if err != nil {
+		return err
+	}
+
+	// 删除每个文档的 MinIO 文件和本地 Markdown 文件
+	for _, doc := range docs {
+		// 删除 MinIO 文件
+		if doc.FileKey != "" && kb.StorageConfig.Type == "minio" {
+			s.uploadService.DeleteFileWithConfig(doc.FileKey, kb.StorageConfig)
+		}
+		// 删除本地 Markdown 文件
+		if s.markdownLocalPath != "" {
+			markdownPath := s.markdownLocalPath + "/" + doc.ID + ".md"
+			os.Remove(markdownPath)
+		}
+	}
+
+	// 删除关联的文档（数据库记录）
 	if err := s.repo.DeleteDocumentsByKBID(id); err != nil {
 		return err
 	}
+
 	return s.repo.Delete(id)
 }

@@ -233,7 +267,7 @@ func (s *KnowledgeService) UploadDocument(kbID string, file *multipart.FileHeade
 	go s.parseDocument(kbID, doc.ID, result.URL, kb.ParsingConfig)

 	// 异步调用 AI-Core gRPC 服务解析文档（获取 Markdown）
-	go s.parseDocumentWithAICore(doc.ID, result.URL, doc.Name)
+	go s.parseDocumentWithAICore(doc.ID, result.URL, doc.Name, kb.ParsingConfig)

 	return doc, result.URL, nil
 }
@@ -293,7 +327,7 @@ func (s *KnowledgeService) parseDocument(kbID, docID, fileURL string, config mod
 }

 // parseDocumentWithAICore 调用 AI-Core gRPC 服务解析文档
-func (s *KnowledgeService) parseDocumentWithAICore(docID, fileURL, fileName string) {
+func (s *KnowledgeService) parseDocumentWithAICore(docID, fileURL, fileName string, config model.ParsingConfig) {
 	if s.aiCoreClient == nil {
 		knowledgeDebugLog.Printf("[AICore] AI-Core 客户端未初始化")
 		return
@@ -301,7 +335,21 @@ func (s *KnowledgeService) parseDocumentWithAICore(docID, fileURL, fileName stri

 	knowledgeDebugLog.Printf("[AICore] 开始解析文档: docID=%s, fileURL=%s, fileName=%s", docID, fileURL, fileName)

-	result, err := s.aiCoreClient.ParseDocument(fileURL, fileName, "")
+	// 构建 VLM 配置
+	var vlmConfig *VLMConfig
+	if config.VLMEnabled {
+		vlmConfig = &VLMConfig{
+			Enabled:  config.VLMEnabled,
+			Provider: config.VLMProvider,
+			Model:    config.VLMModel,
+			APIKey:   config.VLMAPIKey,
+			BaseURL:  config.VLMBaseURL,
+			Prompt:   config.VLMPrompt,
+		}
+		knowledgeDebugLog.Printf("[AICore] VLM 配置: provider=%s, model=%s, enabled=%v", config.VLMProvider, config.VLMModel, config.VLMEnabled)
+	}
+
+	result, err := s.aiCoreClient.ParseDocument(fileURL, fileName, "", vlmConfig)
 	if err != nil {
 		knowledgeDebugLog.Printf("[AICore] 解析失败: docID=%s, err=%v", docID, err)
 		return
@@ -462,7 +510,7 @@ func (s *KnowledgeService) GetDocumentPreview(kbID, docID string, page int) (*mo
 	// Office文件调用解析服务转换为HTML
 	if isOffice && s.aiCoreClient != nil {
 		knowledgeDebugLog.Printf("[Preview] Parsing office file: %s, URL: %s", fileName, fileURL)
-		result, err := s.aiCoreClient.ParseDocument(fileURL, fileName, "")
+		result, err := s.aiCoreClient.ParseDocument(fileURL, fileName, "", nil) // Preview 不使用 VLM
 		if err != nil {
 			// 解析失败，返回文件URL
 			knowledgeDebugLog.Printf("[Preview] Parse document failed: %v", err)
--- a/server/resource/files/27899cc7-2382-4e8c-90a9-b4dd1877470f.pdf
+++ b/server/resource/files/27899cc7-2382-4e8c-90a9-b4dd1877470f.pdf
--- a/server/resource/markdown/0b12fa3a-e4ce-4eb8-b963-a11f78171bba.md
+++ b/server/resource/markdown/0b12fa3a-e4ce-4eb8-b963-a11f78171bba.md
@@ -1,5 +0,0 @@
-sk-5706307e3e3a4eb09452dbf0bb87fe31
-
-https://dashscope.aliyuncs.com/compatible-mode/v1
-
-qwen3.5-flash
--- a/server/resource/markdown/16dd7acd-ad0c-4e29-aa34-57ffd62c42a3.md
+++ b/server/resource/markdown/16dd7acd-ad0c-4e29-aa34-57ffd62c42a3.md
@@ -1,5 +0,0 @@
-sk-5706307e3e3a4eb09452dbf0bb87fe31
-
-https://dashscope.aliyuncs.com/compatible-mode/v1
-
-qwen3.5-flash
--- a/server/resource/markdown/1a54a087-4a07-4dbc-8631-e211275d20f7.md
+++ b/server/resource/markdown/1a54a087-4a07-4dbc-8631-e211275d20f7.md
@@ -1,4 +0,0 @@
-## students
-| 班级 | 姓名 | 年龄 | 性别 |
-| --- | --- | --- | --- |
-| 1 | 曹 | 123 | 男 |
--- a/server/resource/markdown/22d840d2-1752-450a-8788-a780903ca946.md
+++ b/server/resource/markdown/22d840d2-1752-450a-8788-a780903ca946.md
@@ -1,13 +0,0 @@
-# 1 . 你好么？
-
-### 表哥啊啊
-
-大叔大婶打扫的暗示打扫暗示
-
-> 太好了
-
-```python
-
-print("hello world")
-
-```
--- a/server/resource/markdown/2f2d2373-9f33-46c0-b129-5ababd3cedb8.md
+++ b/server/resource/markdown/2f2d2373-9f33-46c0-b129-5ababd3cedb8.md
@@ -1,13 +0,0 @@
-# 1 . 你好么？
-
-### 表哥啊啊
-
-大叔大婶打扫的暗示打扫暗示
-
-> 太好了
-
-```python
-
-print("hello world")
-
-```
--- a/server/resource/markdown/324ddd68-4c89-4f6b-9246-0447eddca73a.md
+++ b/server/resource/markdown/324ddd68-4c89-4f6b-9246-0447eddca73a.md
@@ -1,13 +0,0 @@
-# 1 . 你好么？
-
-### 表哥啊啊
-
-大叔大婶打扫的暗示打扫暗示
-
-> 太好了
-
-```python
-
-print("hello world")
-
-```
--- a/server/resource/markdown/617ddab2-dc12-4a58-91ff-45c614a012c1.md
+++ b/server/resource/markdown/617ddab2-dc12-4a58-91ff-45c614a012c1.md
@@ -1,13 +0,0 @@
-# 1 . 你好么？
-
-### 表哥啊啊
-
-大叔大婶打扫的暗示打扫暗示
-
-> 太好了
-
-```python
-
-print("hello world")
-
-```
--- a/server/resource/markdown/698e892b-cf0c-4fa8-998a-d46c5ec5dc5d.md
+++ b/server/resource/markdown/698e892b-cf0c-4fa8-998a-d46c5ec5dc5d.md
--- a/server/resource/markdown/782449c6-e4f1-47d5-b90e-341205540439.md
+++ b/server/resource/markdown/782449c6-e4f1-47d5-b90e-341205540439.md
@@ -1,13 +0,0 @@
-# 1 . 你好么？
-
-### 表哥啊啊
-
-大叔大婶打扫的暗示打扫暗示
-
-> 太好了
-
-```python
-
-print("hello world")
-
-```
--- a/server/resource/markdown/8acd5ffa-f5c1-4af9-9e4a-b7357324fd53.md
+++ b/server/resource/markdown/8acd5ffa-f5c1-4af9-9e4a-b7357324fd53.md
@@ -1,5 +0,0 @@
-sk-5706307e3e3a4eb09452dbf0bb87fe31
-
-https://dashscope.aliyuncs.com/compatible-mode/v1
-
-qwen3.5-flash
--- a/server/resource/markdown/979e94ae-6b4e-4952-8ebe-4de34d38d10d.md
+++ b/server/resource/markdown/979e94ae-6b4e-4952-8ebe-4de34d38d10d.md
@@ -1,3 +0,0 @@
-| 班级 | 姓名 | 年龄 | 性别 |
-| --- | --- | --- | --- |
-| 1 | 曹 | 123 | 男 |
--- a/server/resource/markdown/a64f5576-c3c1-483f-aadd-1cbbf0122651.md
+++ b/server/resource/markdown/a64f5576-c3c1-483f-aadd-1cbbf0122651.md
@@ -1,5 +0,0 @@
-sk-5706307e3e3a4eb09452dbf0bb87fe31
-
-https://dashscope.aliyuncs.com/compatible-mode/v1
-
-qwen3.5-flash
--- a/server/resource/markdown/b99f6ed4-0a54-4c4c-bb37-bb3e907bd5d1.md
+++ b/server/resource/markdown/b99f6ed4-0a54-4c4c-bb37-bb3e907bd5d1.md
@@ -1,13 +0,0 @@
-# 1 . 你好么？
-
-### 表哥啊啊
-
-大叔大婶打扫的暗示打扫暗示
-
-> 太好了
-
-```python
-
-print("hello world")
-
-```
--- a/server/resource/markdown/eabb7c06-a9ab-4c72-8361-9855d4d92731.md
+++ b/server/resource/markdown/eabb7c06-a9ab-4c72-8361-9855d4d92731.md
@@ -1,3 +0,0 @@
-| 겯섬 | 檎츰 | 쾨쥑 | 昑깎 |
-| --- | --- | --- | --- |
-| 1 | 꿀 | 123 | 켕 |
--- a/server/resource/markdown/ebb099b5-1e85-4653-9c3f-7adc42e0f158.md
+++ b/server/resource/markdown/ebb099b5-1e85-4653-9c3f-7adc42e0f158.md
--- a/server/resource/markdown/ebbb8a77-5f54-400a-b398-1ce7ce72e6ef.md
+++ b/server/resource/markdown/ebbb8a77-5f54-400a-b398-1ce7ce72e6ef.md
@@ -1,5 +0,0 @@
-sk-5706307e3e3a4eb09452dbf0bb87fe31
-
-https://dashscope.aliyuncs.com/compatible-mode/v1
-
-qwen3.5-flash
--- a/web/package-lock.json
+++ b/web/package-lock.json
@@ -20,6 +20,7 @@
        "vue-router": "^4.3.0"
      },
      "devDependencies": {
+        "@types/papaparse": "^5.5.2",
        "@vitejs/plugin-vue": "^5.0.4",
        "autoprefixer": "^10.4.19",
        "postcss": "^8.4.38",
@@ -982,6 +983,27 @@
        "@types/lodash": "*"
      }
    },
+    "node_modules/@types/node": {
+      "version": "25.3.5",
+      "resolved": "https://registry.npmmirror.com/@types/node/-/node-25.3.5.tgz",
+      "integrity": "sha512-oX8xrhvpiyRCQkG1MFchB09f+cXftgIXb3a7UUa4Y3wpmZPw5tyZGTLWhlESOLq1Rq6oDlc8npVU2/9xiCuXMA==",
+      "dev": true,
+      "license": "MIT",
+      "peer": true,
+      "dependencies": {
+        "undici-types": "~7.18.0"
+      }
+    },
+    "node_modules/@types/papaparse": {
+      "version": "5.5.2",
+      "resolved": "https://registry.npmmirror.com/@types/papaparse/-/papaparse-5.5.2.tgz",
+      "integrity": "sha512-gFnFp/JMzLHCwRf7tQHrNnfhN4eYBVYYI897CGX4MY1tzY9l2aLkVyx2IlKZ/SAqDbB3I1AOZW5gTMGGsqWliA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@types/node": "*"
+      }
+    },
    "node_modules/@types/web-bluetooth": {
      "version": "0.0.20",
      "resolved": "https://registry.npmmirror.com/@types/web-bluetooth/-/web-bluetooth-0.0.20.tgz",
@@ -2624,6 +2646,13 @@
        "node": ">=14.17"
      }
    },
+    "node_modules/undici-types": {
+      "version": "7.18.2",
+      "resolved": "https://registry.npmmirror.com/undici-types/-/undici-types-7.18.2.tgz",
+      "integrity": "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w==",
+      "dev": true,
+      "license": "MIT"
+    },
    "node_modules/update-browserslist-db": {
      "version": "1.2.3",
      "resolved": "https://registry.npmmirror.com/update-browserslist-db/-/update-browserslist-db-1.2.3.tgz",
--- a/web/package.json
+++ b/web/package.json
@@ -21,6 +21,7 @@
    "vue-router": "^4.3.0"
  },
  "devDependencies": {
+    "@types/papaparse": "^5.5.2",
    "@vitejs/plugin-vue": "^5.0.4",
    "autoprefixer": "^10.4.19",
    "postcss": "^8.4.38",
--- a/web/src/components/Sidebar.vue
+++ b/web/src/components/Sidebar.vue
@@ -39,42 +39,45 @@ interface MenuItem {
  path?: string
 }

-const mainMenu = computed<MenuItem[]>(() => [
-  { name: 'Dashboard', icon: 'fa-gauge', path: '/dashboard' },
-  { name: 'Agents', icon: 'fa-robot', badge: 3, path: '/agents' },
-  { name: 'Script', icon: 'fa-code', path: '/script' },
+// 第1组: Chat, Agents
+const group1 = computed(() => [
+  { name: 'Chat', icon: 'fa-robot', path: '/agents' },
+  { name: 'Agents', icon: 'fa-users', badge: 3, path: '/agents' },
+])
+
+// 第2组: Database, Knowledge
+const group2 = computed(() => [
  { name: 'Database', icon: 'fa-database', path: '/database', badge: databaseCount.value },
  { name: 'Knowledge', icon: 'fa-brain', path: '/knowledge', badge: knowledgeCount.value },
 ])

-const middleMenu: MenuItem[] = [
+// 第3组: Skills, Tools, Script
+const group3 = computed(() => [
  { name: 'Skills', icon: 'fa-wand-magic-sparkles', badge: 21, path: '/mcp' },
  { name: 'Tools', icon: 'fa-tools', badge: 13, path: '/model-apis' },
-]
+  { name: 'Script', icon: 'fa-code', path: '/script' },
+])

-const bottomMenu: MenuItem[] = [
-  { name: 'Settings', icon: 'fa-gear', path: '/settings' },
-]
-
-const bottomMenu2: MenuItem[] = [
+// 第4组: Dashboard, Account, Settings
+const group4 = computed(() => [
+  { name: 'Dashboard', icon: 'fa-gauge', path: '/dashboard' },
  { name: 'Account', icon: 'fa-user', path: '/account' },
-]
+  { name: 'Settings', icon: 'fa-gear', path: '/settings' },
+])

 const activeMenu = computed(() => {
  const currentPath = route.path
-  // Check main menu
-  const menuItem = mainMenu.value.find(item => item.path === currentPath)
-  if (menuItem) return menuItem.name
-  // Check middle menu (Skills, Tools)
-  const middleItem = middleMenu.find(item => item.path === currentPath)
-  if (middleItem) return middleItem.name
-  // Check bottom menu (Settings)
-  const bottomItem = bottomMenu.find(item => item.path === currentPath)
-  if (bottomItem) return bottomItem.name
-  // Check bottomMenu2 (Account)
-  const bottomItem2 = bottomMenu2.find(item => item.path === currentPath)
-  if (bottomItem2) return bottomItem2.name
-  return 'Dashboard'
+
+  // Special case for /agents - prioritize Chat over Agents
+  if (currentPath === '/agents') {
+    return 'Chat'
+  }
+
+  // Check all groups
+  const allGroups = [...group1.value, ...group2.value, ...group3.value, ...group4.value]
+  const item = allGroups.find(item => item.path === currentPath)
+  if (item) return item.name
+  return 'Chat'
 })

 const navigateTo = (item: MenuItem) => {
@@ -129,8 +132,8 @@ const handleUserCommand = (command: string) => {
    <!-- 导航菜单 -->
    <nav class="flex-1 px-3 py-2">
      <ul class="space-y-1">
-        <!-- Dashboard, Agents -->
-        <li v-for="item in mainMenu.slice(0, 2)" :key="item.name">
+        <!-- 第1组: Chat, Agents -->
+        <li v-for="item in group1" :key="item.name">
          <a
            href="#"
            class="flex items-center justify-between px-3 py-2.5 rounded-lg transition-colors text-sm"
@@ -148,8 +151,8 @@ const handleUserCommand = (command: string) => {
        <!-- 分隔线1 -->
        <li class="my-4 border-t border-dark-500"></li>

-        <!-- Database, Knowledge -->
-        <li v-for="item in mainMenu.slice(2)" :key="item.name">
+        <!-- 第2组: Database, Knowledge -->
+        <li v-for="item in group2" :key="item.name">
          <a
            href="#"
            class="flex items-center justify-between px-3 py-2.5 rounded-lg transition-colors text-sm"
@@ -167,8 +170,8 @@ const handleUserCommand = (command: string) => {
        <!-- 分隔线2 -->
        <li class="my-4 border-t border-dark-500"></li>

-        <!-- Skills & Tools -->
-        <li v-for="item in middleMenu" :key="item.name">
+        <!-- 第3组: Skills, Tools, Script -->
+        <li v-for="item in group3" :key="item.name">
          <a
            href="#"
            class="flex items-center justify-between px-3 py-2.5 rounded-lg transition-colors text-sm"
@@ -183,8 +186,11 @@ const handleUserCommand = (command: string) => {
          </a>
        </li>

-        <!-- Settings -->
-        <li v-for="item in bottomMenu" :key="item.name">
+        <!-- 分隔线3 -->
+        <li class="my-4 border-t border-dark-500"></li>
+
+        <!-- 第4组: Dashboard, Account, Settings -->
+        <li v-for="item in group4" :key="item.name">
          <a
            href="#"
            class="flex items-center justify-between px-3 py-2.5 rounded-lg transition-colors text-sm"
@@ -202,24 +208,6 @@ const handleUserCommand = (command: string) => {
            </div>
          </a>
        </li>
-
-        <!-- 分隔线 -->
-        <li class="my-4 border-t border-dark-500"></li>
-
-        <!-- Account -->
-        <li v-for="item in bottomMenu2" :key="item.name">
-          <a
-            href="#"
-            class="flex items-center justify-between px-3 py-2.5 rounded-lg transition-colors text-sm"
-            :class="activeMenu === item.name ? 'bg-dark-600 text-white' : 'text-gray-400 hover:bg-dark-600 hover:text-white'"
-            @click="navigateTo(item)"
-          >
-            <div class="flex items-center gap-3">
-              <i :class="['fa-solid', item.icon, 'w-5', 'text-center']"></i>
-              <span>{{ item.name }}</span>
-            </div>
-          </a>
-        </li>
      </ul>
    </nav>

--- a/web/src/views/Agents.vue
+++ b/web/src/views/Agents.vue
--- a/web/src/views/Knowledge.vue
+++ b/web/src/views/Knowledge.vue
@@ -1,6 +1,6 @@
 <script setup lang="ts">
 import { ref, computed, onMounted } from 'vue'
-import { ElMessage } from 'element-plus'
+import { ElMessage, ElMessageBox } from 'element-plus'
 import { useModelSettings } from './settings/useModelSettings'
 import { fetchKnowledgeBases, createKnowledgeBase as apiCreateKnowledgeBase, deleteKnowledgeBase as apiDeleteKnowledgeBase, fetchKnowledgeDocuments } from './knowledge/useKnowledge'
 import VueOfficeDocx from '@vue-office/docx'
@@ -42,6 +42,11 @@ const embeddingModels = computed(() => {
  return models.value.filter((m: any) => m.model_type === 'embedding')
 })

+// 筛选 VLM 模型
+const vlmModels = computed(() => {
+  return models.value.filter((m: any) => m.model_type === 'vlm')
+})
+
 // 步骤验证
 const step1Valid = computed(() => !!newKbForm.value.name.trim())
 const step2Valid = computed(() => !!modelConfig.value.llmModelId && !!modelConfig.value.embeddingModelId)
@@ -188,6 +193,7 @@ const newKbForm = ref({
 const modelConfig = ref({
  llmModelId: '',
  embeddingModelId: '',
+  vlmModelId: '',
 })

 const parsingConfig = ref({
@@ -212,7 +218,11 @@ const storageConfig = ref({
 const openCreateDialog = () => {
  createStep.value = 1
  newKbForm.value = { name: '', description: '' }
-  modelConfig.value = { llmModelId: '', embeddingModelId: '' }
+  modelConfig.value = {
+    llmModelId: '',
+    embeddingModelId: '',
+    vlmModelId: '',
+  }
  parsingConfig.value = {
    enablePdf: true,
    engine: 'markitdown',
@@ -222,13 +232,23 @@ const openCreateDialog = () => {
    highRes: false,
    fileSizeLimit: '5242880',
  }
-  storageConfig.value = { type: 'local' }
+  storageConfig.value = {
+    type: 'local',
+    endpoint: '',
+    accessKeyId: '',
+    secretAccessKey: '',
+    bucket: '',
+  }
  showCreateDialog.value = true
 }

 const cancelCreate = () => {
  newKbForm.value = { name: '', description: '' }
-  modelConfig.value = { llmModelId: '', embeddingModelId: '' }
+  modelConfig.value = {
+    llmModelId: '',
+    embeddingModelId: '',
+    vlmModelId: '',
+  }
  parsingConfig.value = {
    enablePdf: true,
    engine: 'markitdown',
@@ -238,7 +258,13 @@ const cancelCreate = () => {
    highRes: false,
    fileSizeLimit: '5242880',
  }
-  storageConfig.value = { type: 'local' }
+  storageConfig.value = {
+    type: 'local',
+    endpoint: '',
+    accessKeyId: '',
+    secretAccessKey: '',
+    bucket: '',
+  }
  showCreateDialog.value = false
 }

@@ -254,6 +280,10 @@ const createKnowledgeBase = async () => {
      enable_pdf: parsingConfig.value.enablePdf,
      pandoc: parsingConfig.value.pandoc,
    },
+    vlm_config: modelConfig.value.vlmModelId ? {
+      enabled: true,
+      model_id: modelConfig.value.vlmModelId,
+    } : undefined,
    storage_config: {
      type: storageConfig.value.type,
      endpoint: storageConfig.value.type === 'minio' ? storageConfig.value.endpoint : undefined,
@@ -266,7 +296,11 @@ const createKnowledgeBase = async () => {
  if (result.success) {
    await fetchKbList()
    newKbForm.value = { name: '', description: '' }
-    modelConfig.value = { llmModelId: '', embeddingModelId: '' }
+    modelConfig.value = {
+      llmModelId: '',
+      embeddingModelId: '',
+      vlmModelId: '',
+    }
    parsingConfig.value = {
      enablePdf: true,
      engine: 'markitdown',
@@ -316,12 +350,26 @@ const cancelEdit = () => {

 // 删除知识库
 const deleteKb = async (id: string) => {
-  const result = await apiDeleteKnowledgeBase(id)
-  if (result.success) {
-    await fetchKbList()
-    ElMessage.success('Knowledge base deleted')
-  } else {
-    ElMessage.error(result.message || 'Failed to delete knowledge base')
+  try {
+    await ElMessageBox.confirm(
+      'Are you sure you want to delete this knowledge base? This action cannot be undone.',
+      'Delete Knowledge Base',
+      {
+        confirmButtonText: 'Delete',
+        cancelButtonText: 'Cancel',
+        type: 'warning',
+      }
+    )
+
+    const result = await apiDeleteKnowledgeBase(id)
+    if (result.success) {
+      await fetchKbList()
+      ElMessage.success('Knowledge base deleted')
+    } else {
+      ElMessage.error(result.message || 'Failed to delete knowledge base')
+    }
+  } catch {
+    // User cancelled
  }
 }

@@ -890,6 +938,23 @@ const deleteDocument = async (docId: string) => {
                  </el-option>
                </el-select>
              </el-form-item>
+
+              <!-- VLM Configuration -->
+              <el-form-item label="VLM Model (Optional)">
+                <el-select v-model="modelConfig.vlmModelId" placeholder="Select a VLM model" class="w-full" popper-class="dark-select-dropdown" clearable>
+                  <el-option
+                    v-for="model in vlmModels"
+                    :key="model.id"
+                    :label="model.name"
+                    :value="model.id"
+                  >
+                    <div class="model-option">
+                      <span class="model-name">{{ model.name }}</span>
+                      <span class="model-info">{{ model.provider }} - {{ model.model }}</span>
+                    </div>
+                  </el-option>
+                </el-select>
+              </el-form-item>
            </el-form>
          </div>

--- a/web/src/views/knowledge/useKnowledge.ts
+++ b/web/src/views/knowledge/useKnowledge.ts
@@ -77,6 +77,10 @@ export const createKnowledgeBase = async (params: {
    enable_pdf?: boolean
    pandoc?: boolean
  }
+  vlm_config?: {
+    enabled: boolean
+    model_id: string
+  }
  storage_config?: {
    type: string
    endpoint?: string
@@ -195,7 +199,7 @@ export const reparseDocument = async (kbId: string, docId: string): Promise<{ su
 }

 // 获取文档预览内容
-export const getDocumentPreview = async (kbId: string, docId: string, page: number = 1): Promise<{ success: boolean; data?: { total_pages: number; current_page: number; content: string }; message?: string }> => {
+export const getDocumentPreview = async (kbId: string, docId: string, page: number = 1): Promise<{ success: boolean; data?: { total_pages: number; current_page: number; content: string; content_type?: string }; message?: string }> => {
  try {
    const response = await fetch(`${API_BASE}/api/knowledge/${kbId}/documents/${docId}/preview?page=${page}`)
    const data = await response.json()
Author	SHA1	Message	Date
DESKTOP-72TV0V4\caoxiaozhu	b8110b6bdd	chore: 添加上传文件存储目录 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-03-10 15:42:46 +08:00
DESKTOP-72TV0V4\caoxiaozhu	8208858f38	chore: 清理过期markdown文件并添加截图 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-03-10 15:42:33 +08:00
DESKTOP-72TV0V4\caoxiaozhu	cac05b4297	feat: 优化前端页面和组件 - 重构 Agents 页面 - 优化 Knowledge 页面 - 更新侧边栏导航 - 添加前端依赖 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-03-10 15:42:21 +08:00
DESKTOP-72TV0V4\caoxiaozhu	0a9f6e278e	feat: 优化后端知识库服务和文档解析 - 更新文档解析客户端 - 优化知识库服务逻辑 - 更新 protobuf 定义 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-03-10 15:02:55 +08:00
DESKTOP-72TV0V4\caoxiaozhu	d24b29afe4	feat: 完善 AI-Core 文档解析器 - 添加多种文档解析器 (PDF, Word, Excel, Markdown 等) - 添加基础解析器和链式解析器 - 添加存储和注册机制 - 添加 gRPC 服务实现 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-03-10 15:01:52 +08:00