Compare commits

...

5 Commits

Author SHA1 Message Date
b8110b6bdd chore: 添加上传文件存储目录
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-10 15:42:46 +08:00
8208858f38 chore: 清理过期markdown文件并添加截图
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-10 15:42:33 +08:00
cac05b4297 feat: 优化前端页面和组件
- 重构 Agents 页面
- 优化 Knowledge 页面
- 更新侧边栏导航
- 添加前端依赖

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-10 15:42:21 +08:00
0a9f6e278e feat: 优化后端知识库服务和文档解析
- 更新文档解析客户端
- 优化知识库服务逻辑
- 更新 protobuf 定义

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-10 15:02:55 +08:00
d24b29afe4 feat: 完善 AI-Core 文档解析器
- 添加多种文档解析器 (PDF, Word, Excel, Markdown 等)
- 添加基础解析器和链式解析器
- 添加存储和注册机制
- 添加 gRPC 服务实现

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-10 15:01:52 +08:00
48 changed files with 23871 additions and 2673 deletions

66
ai-core/main.py Normal file
View File

@@ -0,0 +1,66 @@
"""
AI-Core Document Parser gRPC Server
启动命令: python main.py [--port PORT] [--max-workers MAX_WORKERS] [--log-level LEVEL]
"""
import argparse
import logging
import os
import sys
sys.path.insert(0, os.path.dirname(__file__))
from service.grpc_server import serve
DEFAULT_PORT = 50051
DEFAULT_MAX_WORKERS = 10
def main():
parser = argparse.ArgumentParser(
description="Document Parser gRPC Server",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
"--port",
type=int,
default=DEFAULT_PORT,
help="Port to listen on",
)
parser.add_argument(
"--max-workers",
type=int,
default=DEFAULT_MAX_WORKERS,
help="Maximum number of worker threads",
)
parser.add_argument(
"--log-level",
type=str,
default="INFO",
choices=["DEBUG", "INFO", "WARNING", "ERROR"],
help="Log level",
)
args = parser.parse_args()
logging.basicConfig(
level=getattr(logging, args.log_level),
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)
logger.info("Starting Document Parser gRPC Server")
logger.info("Port: %d", args.port)
logger.info("Max workers: %d", args.max_workers)
try:
serve(port=args.port, max_workers=args.max_workers)
except KeyboardInterrupt:
logger.info("Server shutdown requested")
except Exception as e:
logger.error("Server error: %s", str(e), exc_info=True)
sys.exit(1)
if __name__ == "__main__":
main()

View File

@@ -1,38 +1,10 @@
"""
Parser module for WeKnora document processing system.
This module provides document parsers for various file formats including:
- Microsoft Word documents (.doc, .docx)
- PDF documents
- Markdown files
- Plain text files
- Images with text content
- Web pages
The parsers extract content from documents and can split them into
meaningful chunks for further processing and indexing.
Parser module for AI-Core document processing.
"""
from .doc_parser import DocParser
from .docx2_parser import Docx2Parser
from .excel_parser import ExcelParser
from .image_parser import ImageParser
from .markdown_parser import MarkdownParser
from .parser import Parser
from .pdf_parser import PDFParser
from .registry import ParserEngineRegistry, registry
from .web_parser import WebParser
from .parser_simple import Parser, Document
# Export public classes and modules
__all__ = [
"Docx2Parser",
"DocParser",
"PDFParser",
"MarkdownParser",
"ImageParser",
"WebParser",
"Parser",
"ExcelParser",
"ParserEngineRegistry",
"registry",
"Document",
]

View File

@@ -0,0 +1,61 @@
# -*- coding: utf-8 -*-
import logging
import os
from abc import ABC, abstractmethod
from typing import Optional
from docreader.models.document import Document
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
class BaseParser(ABC):
"""Base parser interface.
After the lightweight refactoring, BaseParser only extracts markdown text
and raw image references from documents. Chunking, image storage, OCR,
and VLM caption are handled by the Go App module.
"""
def __init__(
self,
file_name: str = "",
file_type: Optional[str] = None,
**kwargs,
):
self.file_name = file_name
self.file_type = file_type or os.path.splitext(file_name)[1].lstrip(".")
logger.info(
"Initializing parser for file=%s, type=%s",
file_name,
self.file_type,
)
@abstractmethod
def parse_into_text(self, content: bytes) -> Document:
"""Parse document content into markdown text.
Returns:
Document with ``content`` (markdown string) and optional
``images`` dict mapping storage-relative paths to base64 data.
"""
def parse(self, content: bytes) -> Document:
"""Parse document and return markdown + image references.
No chunking, no OCR, no VLM caption — those are done in Go.
"""
logger.info(
"Parsing document with %s, bytes: %d",
self.__class__.__name__,
len(content),
)
document = self.parse_into_text(content)
logger.info(
"Extracted %d characters from %s",
len(document.content),
self.file_name,
)
return document

View File

@@ -0,0 +1,176 @@
"""
Chain Parser Module
This module provides two chain-of-responsibility pattern implementations for document parsing:
1. FirstParser: Tries multiple parsers sequentially until one succeeds
2. PipelineParser: Chains parsers where each parser processes the output of the previous one
"""
import logging
from typing import Dict, List, Tuple, Type
from docreader.models.document import Document
from docreader.parser.base_parser import BaseParser
from docreader.utils import endecode
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
class FirstParser(BaseParser):
"""
First-success parser that tries multiple parsers in sequence.
This parser attempts to parse content using each registered parser in order.
It returns the result from the first parser that successfully produces a valid document.
If all parsers fail, it returns an empty Document.
Usage:
# Create a custom FirstParser with specific parser classes
CustomParser = FirstParser.create(MarkdownParser, HTMLParser)
parser = CustomParser()
document = parser.parse_into_text(content_bytes)
"""
# Tuple of parser classes to be instantiated
_parser_cls: Tuple[Type["BaseParser"], ...] = ()
def __init__(self, *args, **kwargs):
"""Initialize FirstParser with configured parser classes."""
super().__init__(*args, **kwargs)
# Instantiate all parser classes into parser instances
self._parsers: List[BaseParser] = []
for parser_cls in self._parser_cls:
parser = parser_cls(*args, **kwargs)
self._parsers.append(parser)
def parse_into_text(self, content: bytes) -> Document:
"""Parse content using the first parser that succeeds.
Args:
content: Raw bytes content to be parsed
Returns:
Document: Parsed document from the first successful parser,
or an empty Document if all parsers fail
"""
for p in self._parsers:
logger.info(f"FirstParser: using parser {p.__class__.__name__}")
try:
document = p.parse_into_text(content)
except Exception:
logger.exception(
"FirstParser: parser %s raised exception; trying next parser",
p.__class__.__name__,
)
continue
if document.is_valid():
logger.info(f"FirstParser: parser {p.__class__.__name__} succeeded")
return document
return Document()
@classmethod
def create(cls, *parser_classes: Type["BaseParser"]) -> Type["FirstParser"]:
"""Factory method to create a FirstParser subclass with specific parsers.
Args:
*parser_classes: Variable number of BaseParser subclasses to try in order
Returns:
Type[FirstParser]: A new FirstParser subclass configured with the given parsers
Example:
CustomParser = FirstParser.create(MarkdownParser, HTMLParser)
parser = CustomParser()
"""
# Generate a descriptive class name based on parser names
names = "_".join([p.__name__ for p in parser_classes])
# Dynamically create a new class with the parser configuration
return type(f"FirstParser_{names}", (cls,), {"_parser_cls": parser_classes})
class PipelineParser(BaseParser):
"""
Pipeline parser that chains multiple parsers sequentially.
This parser processes content through a series of parsers where each parser
receives the output of the previous parser as input. Images from all parsers
are accumulated and merged into the final document.
Usage:
# Create a custom PipelineParser with specific parser classes
CustomParser = PipelineParser.create(PreParser, MarkdownParser, PostParser)
parser = CustomParser()
document = parser.parse_into_text(content_bytes)
"""
# Tuple of parser classes to be instantiated and chained
_parser_cls: Tuple[Type["BaseParser"], ...] = ()
def __init__(self, *args, **kwargs):
"""Initialize PipelineParser with configured parser classes."""
super().__init__(*args, **kwargs)
# Instantiate all parser classes into parser instances
self._parsers: List[BaseParser] = []
for parser_cls in self._parser_cls:
parser = parser_cls(*args, **kwargs)
self._parsers.append(parser)
def parse_into_text(self, content: bytes) -> Document:
"""Parse content through a pipeline of parsers.
Each parser in the pipeline processes the output of the previous parser.
Images from all parsers are accumulated and merged into the final document.
Args:
content: Raw bytes content to be parsed
Returns:
Document: Final document after processing through all parsers,
with accumulated images from all stages
"""
# Accumulate images from all parsers
images: Dict[str, str] = {}
document = Document()
for p in self._parsers:
logger.info(f"PipelineParser: using parser {p.__class__.__name__}")
# Parse content with current parser
document = p.parse_into_text(content)
# Convert document content back to bytes for next parser
content = endecode.encode_bytes(document.content)
# Accumulate images from this parser
images.update(document.images)
# Merge all accumulated images into final document
document.images.update(images)
return document
@classmethod
def create(cls, *parser_classes: Type["BaseParser"]) -> Type["PipelineParser"]:
"""Factory method to create a PipelineParser subclass with specific parsers.
Args:
*parser_classes: Variable number of BaseParser subclasses to chain in order
Returns:
Type[PipelineParser]: A new PipelineParser subclass configured with the given parsers
Example:
CustomParser = PipelineParser.create(PreprocessParser, MarkdownParser)
parser = CustomParser()
"""
# Generate a descriptive class name based on parser names
names = "_".join([p.__name__ for p in parser_classes])
# Dynamically create a new class with the parser configuration
return type(f"PipelineParser_{names}", (cls,), {"_parser_cls": parser_classes})
if __name__ == "__main__":
from docreader.parser.markdown_parser import MarkdownParser
# Example: Create and use a FirstParser with MarkdownParser
FpCls = FirstParser.create(MarkdownParser)
lparser = FpCls()
print(lparser.parse_into_text(b"aaa"))

View File

@@ -0,0 +1,331 @@
import logging
import os
import subprocess
from typing import List, Optional
import textract
from docreader.config import CONFIG
from docreader.models.document import Document
from docreader.parser.docx2_parser import Docx2Parser
from docreader.utils.tempfile import TempDirContext, TempFileContext
logger = logging.getLogger(__name__)
class SandboxExecutor:
"""Sandbox executor for running commands with proxy configuration"""
def __init__(self, proxy: Optional[str] = None, default_timeout: int = 60):
"""Initialize sandbox executor with configuration
Args:
proxy: Proxy URL to use for network access. If None, will use WEB_PROXY environment variable
default_timeout: Default timeout in seconds for command execution
"""
# Get proxy from parameter, environment variable, or use default blocking proxy
# Use 'or None' to convert empty string to None, then apply default value
self.proxy = proxy or CONFIG.external_https_proxy or "http://128.0.0.1:1"
self.default_timeout = default_timeout
def execute_in_sandbox(self, cmd: List[str]) -> tuple:
"""Execute command in sandbox with proxy configuration
Args:
cmd: Command to execute
Returns:
Tuple of (stdout, stderr, returncode)
"""
# Try different sandbox methods in order of preference
sandbox_methods = [
self._execute_with_proxy,
]
for method in sandbox_methods:
try:
return method(cmd)
except Exception as e:
logger.warning(f"Sandbox method {method.__name__} failed: {e}")
continue
raise RuntimeError("All sandbox methods failed")
def _execute_with_proxy(self, cmd: List[str]) -> tuple:
"""Execute command with proxy configuration
Args:
cmd: Command to execute
Returns:
Tuple of (stdout, stderr, returncode)
"""
# Set up environment with proxy configuration
env = os.environ.copy()
if self.proxy:
env["http_proxy"] = self.proxy
env["https_proxy"] = self.proxy
env["HTTP_PROXY"] = self.proxy
env["HTTPS_PROXY"] = self.proxy
logger.info(f"Executing command with proxy: {' '.join(cmd)}")
if self.proxy:
logger.info(f"Using proxy: {self.proxy}")
process = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
env=env,
)
try:
stdout, stderr = process.communicate(timeout=self.default_timeout)
return stdout, stderr, process.returncode
except subprocess.TimeoutExpired:
process.kill()
raise RuntimeError(
f"Command execution timeout after {self.default_timeout} seconds"
)
logger = logging.getLogger(__name__)
class DocParser(Docx2Parser):
"""DOC document parser"""
def __init__(self, *args, **kwargs):
"""Initialize DOC parser with sandbox executor"""
super().__init__(*args, **kwargs)
self.sandbox_executor = SandboxExecutor()
def parse_into_text(self, content: bytes) -> Document:
logger.info(f"Parsing DOC document, content size: {len(content)} bytes")
handle_chain = [
# 1. Try to convert to docx format to extract images
self._parse_with_docx,
# 2. If image extraction is not needed or conversion failed,
# try using antiword to extract text
self._parse_with_antiword,
# 3. If antiword extraction fails, use textract
# NOTE: _parse_with_textract is disabled due to SSRF vulnerability
# self._parse_with_textract,
]
# Save byte content as a temporary file
with TempFileContext(content, ".doc") as temp_file_path:
for handle in handle_chain:
try:
document = handle(temp_file_path)
if document:
return document
except Exception as e:
logger.warning(f"Failed to parse DOC with {handle.__name__} {e}")
return Document(content="")
def _parse_with_docx(self, temp_file_path: str) -> Document:
logger.info("Multimodal enabled, attempting to extract images from DOC")
docx_content = self._try_convert_doc_to_docx(temp_file_path)
if not docx_content:
raise RuntimeError("Failed to convert DOC to DOCX")
logger.info("Successfully converted DOC to DOCX, using DocxParser")
# Use existing DocxParser to parse the converted docx
document = super(Docx2Parser, self).parse_into_text(docx_content)
logger.info(f"Extracted {len(document.content)} characters using DocxParser")
return document
def _parse_with_antiword(self, temp_file_path: str) -> Document:
logger.info("Attempting to parse DOC file with antiword")
# Check if antiword is installed
antiword_path = self._try_find_antiword()
if not antiword_path:
raise RuntimeError("antiword not found in PATH")
# Use antiword to extract text directly in sandbox
cmd = [antiword_path, temp_file_path]
logger.info("Executing antiword in sandbox with proxy configuration")
stdout, stderr, returncode = self.sandbox_executor.execute_in_sandbox(cmd)
if returncode != 0:
raise RuntimeError(
f"antiword extraction failed: {stderr.decode('utf-8', errors='ignore')}"
)
text = stdout.decode("utf-8", errors="ignore")
logger.info(f"Successfully extracted {len(text)} characters using antiword")
return Document(content=text)
def _parse_with_textract(self, temp_file_path: str) -> Document:
logger.info(f"Parsing DOC file with textract: {temp_file_path}")
text = textract.process(temp_file_path, method="antiword").decode("utf-8")
logger.info(f"Successfully extracted {len(text)} bytes of DOC using textract")
return Document(content=str(text))
def _try_convert_doc_to_docx(self, doc_path: str) -> Optional[bytes]:
"""Convert DOC file to DOCX format
Uses LibreOffice/OpenOffice for conversion
Args:
doc_path: DOC file path
Returns:
Byte stream of DOCX file content, or None if conversion fails
"""
logger.info(f"Converting DOC to DOCX: {doc_path}")
# Check if LibreOffice or OpenOffice is installed
soffice_path = self._try_find_soffice()
if not soffice_path:
return None
# Execute conversion command
logger.info(f"Using {soffice_path} to convert DOC to DOCX")
# Create a temporary directory to store the converted file
with TempDirContext() as temp_dir:
cmd = [
soffice_path,
"--headless",
"--convert-to",
"docx",
"--outdir",
temp_dir,
doc_path,
]
logger.info(f"Running command in sandbox: {' '.join(cmd)}")
# Execute in sandbox with proxy configuration
stdout, stderr, returncode = self.sandbox_executor.execute_in_sandbox(cmd)
if returncode != 0:
logger.warning(
f"Error converting DOC to DOCX: {stderr.decode('utf-8')}"
)
return None
# Find the converted file
docx_file = [
file for file in os.listdir(temp_dir) if file.endswith(".docx")
]
logger.info(f"Found {len(docx_file)} DOCX file(s) in temporary directory")
for file in docx_file:
converted_file = os.path.join(temp_dir, file)
logger.info(f"Found converted file: {converted_file}")
# Read the converted file content
with open(converted_file, "rb") as f:
docx_content = f.read()
logger.info(
f"Successfully read DOCX file, size: {len(docx_content)}"
)
return docx_content
return None
def _try_find_executable_path(
self,
executable_name: str,
possible_path: List[str] = [],
environment_variable: List[str] = [],
) -> Optional[str]:
"""Find executable path
Args:
executable_name: Executable name
possible_path: List of possible paths
environment_variable: List of environment variables to check
Returns:
Executable path, or None if not found
"""
# Common executable paths
paths: List[str] = []
paths.extend(possible_path)
paths.extend(os.environ.get(env_var, "") for env_var in environment_variable)
paths = list(set(paths))
# Check if path is set in environment variable
for path in paths:
if os.path.exists(path):
logger.info(f"Found {executable_name} at {path}")
return path
# Try to find in PATH
result = subprocess.run(
["which", executable_name], capture_output=True, text=True
)
if result.returncode == 0 and result.stdout.strip():
path = result.stdout.strip()
logger.info(f"Found {executable_name} at {path}")
return path
logger.warning(f"Failed to find {executable_name}")
return None
def _try_find_soffice(self) -> Optional[str]:
"""Find LibreOffice/OpenOffice executable path
Returns:
Executable path, or None if not found
"""
# Common LibreOffice/OpenOffice executable paths
possible_paths = [
# Linux
"/usr/bin/soffice",
"/usr/lib/libreoffice/program/soffice",
"/opt/libreoffice25.2/program/soffice",
# macOS
"/Applications/LibreOffice.app/Contents/MacOS/soffice",
# Windows
"C:\\Program Files\\LibreOffice\\program\\soffice.exe",
"C:\\Program Files (x86)\\LibreOffice\\program\\soffice.exe",
]
return self._try_find_executable_path(
executable_name="soffice",
possible_path=possible_paths,
environment_variable=["LIBREOFFICE_PATH"],
)
def _try_find_antiword(self) -> Optional[str]:
"""Find antiword executable path
Returns:
Executable path, or None if not found
"""
# Common antiword executable paths
possible_paths = [
# Linux/macOS
"/usr/bin/antiword",
"/usr/local/bin/antiword",
# Windows
"C:\\Program Files\\Antiword\\antiword.exe",
"C:\\Program Files (x86)\\Antiword\\antiword.exe",
]
return self._try_find_executable_path(
executable_name="antiword",
possible_path=possible_paths,
environment_variable=["ANTIWORD_PATH"],
)
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
file_name = "/path/to/your/test.doc"
logger.info(f"Processing file: {file_name}")
doc_parser = DocParser(
file_name=file_name,
enable_multimodal=True,
chunk_size=512,
chunk_overlap=60,
)
with open(file_name, "rb") as f:
content = f.read()
document = doc_parser.parse_into_text(content)
logger.info(f"Processing complete, extracted text length: {len(document.content)}")
logger.info(f"Sample text: {document.content[:200]}...")

View File

@@ -0,0 +1,28 @@
import logging
from docreader.parser.chain_parser import FirstParser
from docreader.parser.docx_parser import DocxParser
from docreader.parser.markitdown_parser import MarkitdownParser
logger = logging.getLogger(__name__)
class Docx2Parser(FirstParser):
_parser_cls = (MarkitdownParser, DocxParser)
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
your_file = "/path/to/your/file.docx"
parser = Docx2Parser(separators=[".", "?", "!", "", "", ""])
with open(your_file, "rb") as f:
content = f.read()
document = parser.parse(content)
for cc in document.chunks:
logger.info(f"chunk: {cc}")
# document = parser.parse_into_text(content)
# logger.info(f"docx content: {document.content}")
# logger.info(f"find images {document.images.keys()}")

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,119 @@
"""
Excel Parser Module
This module provides functionality to parse Excel files (.xlsx, .xls) into
structured Document objects with text content and chunks. It supports multiple
sheets and handles various Excel formats using pandas.
"""
import logging
from io import BytesIO
from typing import List
import pandas as pd
from docreader.models.document import Chunk, Document
from docreader.parser.base_parser import BaseParser
logger = logging.getLogger(__name__)
class ExcelParser(BaseParser):
"""Parser for Excel files (.xlsx, .xls).
This parser extracts text content from Excel files by processing all sheets
and converting each row into a structured text format. Each row becomes a
separate chunk with key-value pairs.
Features:
- Supports multiple sheets in a single Excel file
- Automatically removes completely empty rows
- Converts each row to "column: value" format
- Creates individual chunks for each row for better granularity
Example:
>>> parser = ExcelParser()
>>> with open("data.xlsx", "rb") as f:
... content = f.read()
... document = parser.parse_into_text(content)
>>> print(document.content)
Name: John,Age: 30,City: NYC
Name: Jane,Age: 25,City: LA
"""
def parse_into_text(self, content: bytes) -> Document:
"""Parse Excel file bytes into a Document object.
Args:
content: Raw bytes of the Excel file
Returns:
Document: Parsed document containing:
- content: Full text with all rows from all sheets
- chunks: List of Chunk objects, one per row
Note:
- Empty rows (all NaN values) are automatically skipped
- Each row is formatted as: "col1: val1,col2: val2,..."
- Chunks maintain sequential ordering across all sheets
"""
chunks: List[Chunk] = []
text: List[str] = []
start, end = 0, 0
# Load Excel file from bytes into pandas ExcelFile object
excel_file = pd.ExcelFile(BytesIO(content))
# Process each sheet in the Excel file
for excel_sheet_name in excel_file.sheet_names:
# Parse the sheet into a DataFrame
df = excel_file.parse(sheet_name=excel_sheet_name)
# Remove rows where all values are NaN (completely empty rows)
df.dropna(how="all", inplace=True)
# Process each row in the DataFrame
for _, row in df.iterrows():
page_content = []
# Build key-value pairs for non-null values
for k, v in row.items():
if pd.notna(v): # Skip NaN/null values
page_content.append(f"{k}: {v}")
# Skip rows with no valid content
if not page_content:
continue
# Format row as comma-separated key-value pairs
content_row = ",".join(page_content) + "\n"
end += len(content_row)
text.append(content_row)
# Create a chunk for this row with position tracking
chunks.append(
Chunk(content=content_row, seq=len(chunks), start=start, end=end)
)
start = end
# Combine all text and return as Document
return Document(content="".join(text), chunks=chunks)
if __name__ == "__main__":
# Example usage: Parse an Excel file and display results
logging.basicConfig(level=logging.DEBUG)
# Specify the path to your Excel file
your_file = "/path/to/your/file.xlsx"
parser = ExcelParser()
# Read and parse the Excel file
with open(your_file, "rb") as f:
content = f.read()
document = parser.parse_into_text(content)
# Display the full document content
logger.error(document.content)
# Display the first chunk as an example
for chunk in document.chunks:
logger.error(chunk.content)
break # Only show the first chunk

View File

@@ -0,0 +1,28 @@
import base64
import logging
import os
from docreader.models.document import Document
from docreader.parser.base_parser import BaseParser
logger = logging.getLogger(__name__)
class ImageParser(BaseParser):
"""Parser for standalone image files.
Returns the image as a markdown reference with the raw image data
in Document.images so that the Go-side ImageResolver (or main.py's
_resolve_images) can handle storage upload.
"""
def parse_into_text(self, content: bytes) -> Document:
logger.info("Parsing image file=%s, size=%d bytes", self.file_name, len(content))
ext = os.path.splitext(self.file_name)[1].lower() or ".png"
ref_path = f"images/{self.file_name}"
text = f"![{self.file_name}]({ref_path})"
images = {ref_path: base64.b64encode(content).decode()}
return Document(content=text, images=images)

View File

@@ -0,0 +1,403 @@
"""
Markdown Parser Module
This module provides comprehensive Markdown parsing functionality including:
- Table formatting and standardization
- Base64 image extraction and conversion
- Image path replacement and URL generation
- Pipeline-based parsing with multiple stages
The parser uses a pipeline approach to process Markdown content through
multiple stages: table formatting -> image processing.
"""
import base64
import logging
import os
import re
import uuid
from typing import Dict, List, Match, Optional, Tuple
from docreader.models.document import Document
from docreader.parser.base_parser import BaseParser
from docreader.parser.chain_parser import PipelineParser
from docreader.utils import endecode
# Get logger object
logger = logging.getLogger(__name__)
class MarkdownTableUtil:
"""Utility class for formatting Markdown tables.
This class standardizes Markdown table formatting by:
- Normalizing column alignment markers (e.g., :---, :---:, ---:)
- Adding consistent spacing around pipes (|)
- Preserving indentation levels
- Handling both header rows and data rows
Example:
Input: |姓名|年龄|城市|
|:---|---:|:---:|
|张三|25|北京|
Output: | 姓名 | 年龄 | 城市 |
| :--- | ---: | :---: |
| 张三 | 25 | 北京 |
"""
def __init__(self):
# Pattern to match alignment row (e.g., |:---|---:|:---:|)
self.align_pattern = re.compile(
r"^([\t ]*)\|[\t ]*[:-]+(?:[\t ]*\|[\t ]*[:-]+)*[\t ]*\|[\t ]*$",
re.MULTILINE,
)
# Pattern to match regular table rows (header or data)
self.line_pattern = re.compile(
r"^([\t ]*)\|[\t ]*[^|\r\n]*(?:[\t ]*\|[^|\r\n]*)*\|[\t ]*$",
re.MULTILINE,
)
def format_table(self, content: str) -> str:
"""Format all Markdown tables in the content.
Args:
content: Raw Markdown text containing tables
Returns:
Formatted Markdown text with standardized table formatting
"""
def process_align(match: Match[str]) -> str:
"""Process alignment row to standardize format."""
# Split by | and remove empty strings
columns = [col.strip() for col in match.group(0).split("|") if col.strip()]
processed = []
for col in columns:
# Preserve left alignment marker (:---)
left_colon = ":" if col.startswith(":") else ""
# Preserve right alignment marker (---:)
right_colon = ":" if col.endswith(":") else ""
processed.append(left_colon + "---" + right_colon)
# Preserve original indentation
prefix = match.group(1)
return prefix + "| " + " | ".join(processed) + " |"
def process_line(match: Match[str]) -> str:
"""Process regular table row to standardize format."""
# Split by | and remove empty strings
columns = [col.strip() for col in match.group(0).split("|") if col.strip()]
# Preserve original indentation
prefix = match.group(1)
return prefix + "| " + " | ".join(columns) + " |"
formatted_content = content
# First format regular rows (header and data)
formatted_content = self.line_pattern.sub(process_line, formatted_content)
# Then format alignment rows (must be done after to avoid conflicts)
formatted_content = self.align_pattern.sub(process_align, formatted_content)
return formatted_content
@staticmethod
def _self_test():
test_content = """
# 测试表格
普通文本---不会被匹配
## 表格1无前置空格
| 姓名 | 年龄 | 城市 |
| :---------- | -------: | :------ |
| 张三 | 25 | 北京 |
## 表格3前置4个空格+首尾|
| 产品 | 价格 | 库存 |
| :-------------: | ----------- | :-----------: |
| 手机 | 5999 | 100 |
"""
util = MarkdownTableUtil()
format_content = util.format_table(test_content)
print(format_content)
class MarkdownTableFormatter(BaseParser):
"""Parser for formatting Markdown tables.
This parser standardizes the formatting of all Markdown tables in the
document to ensure consistent spacing and alignment markers.
Example:
>>> formatter = MarkdownTableFormatter()
>>> content = b"|Name|Age|\n|---|---|\n|John|30|"
>>> doc = formatter.parse_into_text(content)
>>> print(doc.content)
| Name | Age |
| --- | --- |
| John | 30 |
"""
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.table_helper = MarkdownTableUtil()
def parse_into_text(self, content: bytes) -> Document:
"""Parse and format Markdown tables.
Args:
content: Raw Markdown content as bytes
Returns:
Document with formatted table content
"""
# Decode bytes to string with automatic encoding detection
text = endecode.decode_bytes(content)
# Format all tables in the content
text = self.table_helper.format_table(text)
return Document(content=text)
class MarkdownImageUtil:
"""Utility class for handling images in Markdown.
This class provides functionality to:
- Extract base64-encoded images from Markdown
- Extract image paths from Markdown
- Replace image paths with new URLs
- Convert base64 images to binary format
Supported formats:
- Base64 embedded images: ![alt](data:image/png;base64,iVBORw0...)
- Regular image links: ![alt](path/to/image.png)
"""
def __init__(self):
# Pattern to match base64 embedded images
# Captures: (1) alt text, (2) image format, (3) base64 data
self.b64_pattern = re.compile(
r"!\[([^\]]*)\]\(data:image/(\w+)\+?\w*;base64,([^\)]+)\)"
)
# Pattern to match regular image syntax
self.image_pattern = re.compile(r"!\[([^\]]*)\]\(([^)]+)\)")
# Pattern for replacing image paths
self.replace_pattern = re.compile(r"!\[([^\]]*)\]\(([^)]+)\)")
def extract_image(
self,
content: str,
path_prefix: Optional[str] = None,
replace: bool = True,
) -> Tuple[str, List[str]]:
"""Extract image paths from Markdown content.
Args:
content: Markdown text containing images
path_prefix: Optional prefix to add to image paths
replace: Whether to replace image syntax in content
Returns:
Tuple of (processed_text, list_of_image_paths)
Example:
>>> util = MarkdownImageUtil()
>>> text, images = util.extract_image("![logo](img/logo.png)")
>>> print(images)
['img/logo.png']
"""
# List to store extracted image paths
images: List[str] = []
def repl(match: Match[str]) -> str:
"""Replacement function for each image match."""
title = match.group(1) # Alt text
image_path = match.group(2) # Image path
# Add prefix if specified
if path_prefix:
image_path = f"{path_prefix}/{image_path}"
images.append(image_path)
# Keep original if replace is False
if not replace:
return match.group(0)
# Replace image path with potentially prefixed path
return f"![{title}]({image_path})"
text = self.image_pattern.sub(repl, content)
logger.debug(f"Extracted {len(images)} images from markdown")
return text, images
def extract_base64(
self,
content: str,
path_prefix: Optional[str] = None,
replace: bool = True,
) -> Tuple[str, Dict[str, bytes]]:
"""Extract and decode base64 embedded images from Markdown.
This method finds all base64-encoded images in the Markdown content,
decodes them to binary format, generates unique filenames, and
optionally replaces them with file path references.
Args:
content: Markdown text containing base64 images
path_prefix: Optional directory prefix for generated paths
replace: Whether to replace base64 syntax with file paths
Returns:
Tuple of (processed_text, dict_of_path_to_bytes)
Example:
>>> util = MarkdownImageUtil()
>>> text = "![logo](data:image/png;base64,iVBORw0KGg...)"
>>> new_text, images = util.extract_base64(text, "images")
>>> print(new_text)
![logo](images/uuid.png)
>>> print(len(images))
1
"""
# Dictionary mapping generated file paths to binary image data
images: Dict[str, bytes] = {}
def repl(match: Match[str]) -> str:
"""Replacement function for each base64 image match."""
title = match.group(1) # Alt text
img_ext = match.group(2) # Image format (png, jpg, etc.)
img_b64 = match.group(3) # Base64 encoded data
# Decode base64 string to bytes
image_byte = endecode.encode_image(img_b64, errors="ignore")
if not image_byte:
logger.error(f"Failed to decode base64 image skip it: {img_b64}")
return title # Return just the alt text if decode fails
# Generate unique filename with original extension
image_path = f"{uuid.uuid4()}.{img_ext}"
if path_prefix:
image_path = f"{path_prefix}/{image_path}"
images[image_path] = image_byte
# Keep original base64 if replace is False
if not replace:
return match.group(0)
# Replace base64 data with file path reference
return f"![{title}]({image_path})"
text = self.b64_pattern.sub(repl, content)
logger.debug(f"Extracted {len(images)} base64 images from markdown")
return text, images
def replace_path(self, content: str, images: Dict[str, str]) -> str:
"""Replace image paths in Markdown with new URLs.
This method is typically used to replace local file paths with
uploaded URLs after images have been stored.
Args:
content: Markdown text with image references
images: Mapping of old paths to new URLs
Returns:
Markdown text with updated image URLs
Example:
>>> util = MarkdownImageUtil()
>>> content = "![logo](temp/img.png)"
>>> mapping = {"temp/img.png": "https://cdn.com/img.png"}
>>> result = util.replace_path(content, mapping)
>>> print(result)
![logo](https://cdn.com/img.png)
"""
# Track which paths were actually replaced
content_replace: set = set()
def repl(match: Match[str]) -> str:
"""Replacement function for each image match."""
title = match.group(1) # Alt text
image_path = match.group(2) # Current image path
# Only replace if path exists in mapping
if image_path not in images:
return match.group(0) # Keep original
content_replace.add(image_path)
# Get new URL from mapping
image_path = images[image_path]
return f"![{title}]({image_path})" if image_path else title
text = self.replace_pattern.sub(repl, content)
logger.debug(f"Replaced {len(content_replace)} images in markdown")
return text
@staticmethod
def _self_test():
your_content = "test![](data:image/png;base64,iVBORw0KGgoAAAA)test"
image_handle = MarkdownImageUtil()
text, images = image_handle.extract_base64(your_content)
print(text)
for image_url, image_byte in images.items():
with open(image_url, "wb") as f:
f.write(image_byte)
class MarkdownImageBase64(BaseParser):
"""Parser for extracting base64 images from Markdown.
Extracts base64-encoded images, replaces them with path references,
and returns the raw image data in Document.images for the Go-side
ImageResolver (or main.py _resolve_images) to handle storage.
"""
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.image_helper = MarkdownImageUtil()
def parse_into_text(self, content: bytes) -> Document:
text = endecode.decode_bytes(content)
text, img_b64 = self.image_helper.extract_base64(text, path_prefix="images")
images: Dict[str, str] = {}
for ipath, raw_bytes in img_b64.items():
images[ipath] = base64.b64encode(raw_bytes).decode()
logger.debug("Extracted %d base64 images from markdown", len(images))
return Document(content=text, images=images)
class MarkdownParser(PipelineParser):
"""Complete Markdown parser using pipeline approach.
This parser processes Markdown content through multiple stages:
1. MarkdownTableFormatter: Standardizes table formatting
2. MarkdownImageBase64: Extracts and uploads base64 images
The pipeline ensures that content flows through each parser in sequence,
with each stage's output becoming the next stage's input.
"""
_parser_cls = (MarkdownTableFormatter, MarkdownImageBase64)
if __name__ == "__main__":
# Example usage and testing
logging.basicConfig(level=logging.DEBUG)
# Test the complete MarkdownParser pipeline
your_content = "test![](data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAMgA)test"
parser = MarkdownParser()
# Parse content and display results
document = parser.parse_into_text(your_content.encode())
logger.info(document.content)
logger.info(f"Images: {len(document.images)}, name: {document.images.keys()}")
# Run individual utility tests
MarkdownImageUtil._self_test()
MarkdownTableUtil._self_test()

View File

@@ -0,0 +1,107 @@
import io
import logging
import re
import base64
from markitdown import MarkItDown
from docreader.models.document import Document
from docreader.parser.base_parser import BaseParser
from docreader.parser.chain_parser import PipelineParser
from docreader.parser.markdown_parser import MarkdownParser
# 尝试导入 VLMClient
try:
from parser.vlm_client import VLMClient
except ImportError:
VLMClient = None
logger = logging.getLogger(__name__)
class StdMarkitdownParser(BaseParser):
"""
Standard MarkItDown Parser Wrapper
This parser uses the markitdown library to convert various document formats
(docx, pptx, pdf, etc.) into text/markdown.
Optionally uses VLM to process images.
"""
def __init__(self, *args, vlm_config=None, **kwargs):
# 这里的 super() 会调用 BaseParser 的初始化,确保 self.file_type 被正确赋值
super().__init__(*args, **kwargs)
self.markitdown = MarkItDown()
self.vlm_config = vlm_config
self.vlm_client = None
# 如果有 VLM 配置,初始化 VLM 客户端
if vlm_config and vlm_config.get("enabled") and VLMClient:
try:
self.vlm_client = VLMClient(vlm_config)
logger.info(f"VLM client initialized: provider={vlm_config.get('provider')}, model={vlm_config.get('model')}")
except Exception as e:
logger.warning(f"Failed to initialize VLM client: {e}")
def parse_into_text(self, content: bytes) -> Document:
"""
Parses content using MarkItDown.
Uses self.file_type (inherited from BaseParser) to hint the stream format.
"""
ext = self.file_type
if ext and not ext.startswith('.'):
ext = '.' + ext
# 直接调用 convert移除 try-catch让异常由上层 PipelineParser 统一捕获
result = self.markitdown.convert(
io.BytesIO(content),
file_extension=ext,
keep_data_uris=True
)
markdown_content = result.text_content
# 如果有 VLM 客户端,尝试处理图片
if self.vlm_client and markdown_content:
markdown_content = self._process_images_with_vlm(markdown_content)
return Document(content=markdown_content)
def _process_images_with_vlm(self, content: str) -> str:
"""
处理 Markdown 内容中的图片,使用 VLM 分析并替换
"""
# 匹配 data:image 开头的 Base64 图片
pattern = r'!\[([^\]]*)\]\((data:image/([^;]+);base64,([A-Za-z0-9+/=]+))\)'
def replace_image(match):
alt_text = match.group(1)
data_url = match.group(2)
mime_type = match.group(3) or "image/png"
base64_data = match.group(4)
try:
# 解码 Base64 图片
image_bytes = base64.b64decode(base64_data)
# 调用 VLM 分析图片
logger.info(f"Processing image with VLM: {alt_text or 'unnamed'}")
vlm_result = self.vlm_client.analyze_image(image_bytes, mime_type)
if vlm_result.get("success"):
vlm_content = vlm_result.get("content", "")
logger.info(f"VLM processed image successfully, content length: {len(vlm_content)}")
# 替换为 VLM 解析的内容
return f"<!-- Image: {alt_text} -->\n{vlm_content}\n<!-- End Image -->"
else:
logger.warning(f"VLM failed for image: {vlm_result.get('error')}")
return match.group(0) # 保留原图片引用
except Exception as e:
logger.error(f"Error processing image with VLM: {e}")
return match.group(0) # 保留原图片引用
return re.sub(pattern, replace_image, content)
class MarkitdownParser(PipelineParser):
_parser_cls = (StdMarkitdownParser, MarkdownParser)

88
ai-core/parser/parser.py Normal file
View File

@@ -0,0 +1,88 @@
import logging
from typing import Any, Optional
from docreader.models.document import Document
from docreader.parser.registry import registry
from docreader.parser.web_parser import WebParser
logger = logging.getLogger(__name__)
class Parser:
"""Document parser facade (lightweight version).
Converts files/URLs to markdown + image references.
No chunking, no storage, no OCR, no VLM.
"""
def __init__(self):
self.registry = registry
logger.info(
"Parser initialized with engines: %s",
", ".join(self.registry.get_engine_names()),
)
def parse_file(
self,
file_name: str,
file_type: str,
content: bytes,
parser_engine: Optional[str] = None,
engine_overrides: Optional[dict[str, Any]] = None,
vlm_config: Optional[dict[str, Any]] = None,
) -> Document:
"""Parse file content to markdown."""
engine = parser_engine or ""
overrides = engine_overrides or {}
logger.info(
"Parsing file: %s, type: %s, engine: %s, vlm_enabled: %s",
file_name,
file_type,
engine or "builtin",
vlm_config.get("enabled") if vlm_config else False,
)
# 如果有 VLM 配置,添加到 overrides 中
if vlm_config and vlm_config.get("enabled"):
overrides["vlm_config"] = vlm_config
cls = self.registry.get_parser_class(engine, file_type)
logger.info(
"Creating %s parser instance for %s file",
cls.__name__,
file_type,
)
parser = cls(
file_name=file_name,
file_type=file_type,
**overrides,
)
logger.info("Starting to parse file content, size: %d bytes", len(content))
result = parser.parse(content)
if not result.content:
logger.warning("Parser returned empty content for file: %s", file_name)
logger.info(
"Parsed file %s, content length=%d", file_name, len(result.content)
)
return result
def parse_url(
self,
url: str,
title: str,
parser_engine: Optional[str] = None,
engine_overrides: Optional[dict[str, Any]] = None,
) -> Document:
"""Parse content from a URL to markdown."""
logger.info("Parsing URL: %s, title: %s", url, title)
parser = WebParser(title=title)
logger.info("Starting to parse URL content")
result = parser.parse(url.encode())
if not result.content:
logger.warning("Parser returned empty content for url: %s", url)
logger.info("Parsed url %s, content length=%d", url, len(result.content))
return result

View File

@@ -0,0 +1,275 @@
"""
简化的 Parser - 使用 markitdown + VLM
"""
import logging
import os
import io
import re
import base64
from typing import Optional, Any, Dict
from markitdown import MarkItDown
logger = logging.getLogger(__name__)
class Document:
"""简单的文档对象"""
def __init__(self, content: str = "", chunks: list = None, metadata: dict = None):
self.content = content
self.chunks = chunks or []
self.metadata = metadata or {}
class VLMClient:
"""VLM 客户端"""
def __init__(self, config: Dict[str, Any]):
self.provider = config.get("provider", "openai")
self.model = config.get("model", "gpt-4o")
self.api_key = config.get("api_key", "")
self.base_url = config.get("base_url", "")
self.prompt = config.get("prompt", "") or self._default_prompt()
logger.info(f"VLMClient initialized: provider={self.provider}, model={self.model}")
def _default_prompt(self) -> str:
return """请分析这个文档图片的内容,并将其转换为 Markdown 格式。
要求:
1. 保持原文的格式和结构
2. 表格用 Markdown 表格格式
3. 标题用 # ## ### 标记
4. 尽量保留原文的所有信息"""
def analyze_image(self, content: bytes, mime_type: str) -> Dict[str, Any]:
"""分析图片"""
if self.provider == "openai":
return self._call_openai(content, mime_type)
elif self.provider == "anthropic":
return self._call_anthropic(content, mime_type)
elif self.provider == "qwen":
return self._call_qwen(content, mime_type)
else:
return {"success": False, "error": f"Unknown provider: {self.provider}"}
def _call_openai(self, content: bytes, mime_type: str) -> Dict[str, Any]:
try:
import requests
url = (self.base_url or "https://api.openai.com/v1") + "/chat/completions"
image_b64 = base64.b64encode(content).decode("utf-8")
headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
payload = {
"model": self.model,
"messages": [{
"role": "user",
"content": [
{"type": "text", "text": self.prompt},
{"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{image_b64}"}}
]
}],
"max_tokens": 4096
}
resp = requests.post(url, headers=headers, json=payload, timeout=120)
resp.raise_for_status()
result = resp.json()
return {"success": True, "content": result["choices"][0]["message"]["content"]}
except Exception as e:
logger.error(f"OpenAI VLM error: {e}")
return {"success": False, "error": str(e)}
def _call_anthropic(self, content: bytes, mime_type: str) -> Dict[str, Any]:
try:
import requests
url = (self.base_url or "https://api.anthropic.com/v1") + "/messages"
image_b64 = base64.b64encode(content).decode("utf-8")
headers = {
"x-api-key": self.api_key,
"anthropic-version": "2023-06-01",
"Content-Type": "application/json"
}
payload = {
"model": self.model,
"max_tokens": 4096,
"messages": [{
"role": "user",
"content": [
{"type": "text", "text": self.prompt},
{"type": "image", "source": {"type": "base64", "media_type": mime_type, "data": image_b64}}
]
}]
}
resp = requests.post(url, headers=headers, json=payload, timeout=120)
resp.raise_for_status()
result = resp.json()
return {"success": True, "content": result["content"][0]["text"]}
except Exception as e:
logger.error(f"Anthropic VLM error: {e}")
return {"success": False, "error": str(e)}
def _call_qwen(self, content: bytes, mime_type: str) -> Dict[str, Any]:
try:
import requests
url = (self.base_url or "https://dashscope.aliyuncs.com/compatible-mode/v1") + "/chat/completions"
image_b64 = base64.b64encode(content).decode("utf-8")
headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
payload = {
"model": self.model,
"messages": [{
"role": "user",
"content": [
{"type": "text", "text": self.prompt},
{"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{image_b64}"}}
]
}]
}
resp = requests.post(url, headers=headers, json=payload, timeout=120)
resp.raise_for_status()
result = resp.json()
return {"success": True, "content": result["choices"][0]["message"]["content"]}
except Exception as e:
logger.error(f"Qwen VLM error: {e}")
return {"success": False, "error": str(e)}
class Parser:
"""基于 MarkItDown + VLM 的文档解析器"""
def __init__(self):
self.markitdown = MarkItDown()
self.vlm_client: Optional[VLMClient] = None
logger.info("Parser initialized with MarkItDown")
def set_vlm_config(self, config: Dict[str, Any]) -> None:
"""设置 VLM 配置"""
if config and config.get("enabled") and config.get("api_key"):
self.vlm_client = VLMClient(config)
logger.info(f"VLM enabled: provider={config.get('provider')}, model={config.get('model')}")
else:
self.vlm_client = None
def _should_use_vlm(self, file_name: str) -> bool:
"""判断是否应该使用 VLM"""
if not self.vlm_client:
return False
ext = os.path.splitext(file_name)[1].lower()
# 图片和 PDF 都使用 VLM
image_exts = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.tiff']
return ext in image_exts or ext == '.pdf'
def _process_images_with_vlm(self, content: str) -> str:
"""处理 Markdown 内容中的图片"""
# 匹配 data:image 开头的 Base64 图片
pattern = r'!\[([^\]]*)\]\((data:image/([^;]+);base64,([A-Za-z0-9+/=]+))\)'
def replace_image(match):
alt_text = match.group(1)
data_url = match.group(2)
mime_type = match.group(3) or "image/png"
base64_data = match.group(4)
try:
image_bytes = base64.b64decode(base64_data)
logger.info(f"Processing image with VLM: {alt_text or 'unnamed'}")
vlm_result = self.vlm_client.analyze_image(image_bytes, mime_type)
if vlm_result.get("success"):
vlm_content = vlm_result.get("content", "")
logger.info(f"VLM processed image, content length: {len(vlm_content)}")
return f"<!-- Image: {alt_text} -->\n{vlm_content}\n<!-- End Image -->"
else:
logger.warning(f"VLM failed: {vlm_result.get('error')}")
return match.group(0)
except Exception as e:
logger.error(f"VLM error: {e}")
return match.group(0)
return re.sub(pattern, replace_image, content)
def _parse_with_vlm(self, content: bytes, file_name: str) -> Document:
"""使用 VLM 直接解析整个文件"""
ext = os.path.splitext(file_name)[1].lower()
mime_types = {
'.jpg': 'image/jpeg', '.jpeg': 'image/jpeg', '.png': 'image/png',
'.gif': 'image/gif', '.bmp': 'image/bmp', '.webp': 'image/webp',
'.tiff': 'image/tiff', '.pdf': 'application/pdf',
}
mime_type = mime_types.get(ext, 'image/png')
result = self.vlm_client.analyze_image(content, mime_type)
if result.get("success"):
return Document(content=result["content"], metadata={"vlm": True})
else:
logger.error(f"VLM failed: {result.get('error')}")
return Document(content="")
def parse_file(
self,
file_name: str,
file_type: str,
content: bytes,
parser_engine: Optional[str] = None,
engine_overrides: Optional[dict[str, Any]] = None,
vlm_config: Optional[dict[str, Any]] = None,
) -> Document:
"""解析文件内容"""
logger.info(f"Parsing file: {file_name}, type: {file_type}, vlm_config={'enabled' if vlm_config and vlm_config.get('enabled') else 'none'}")
# 设置 VLM 配置
if vlm_config and vlm_config.get("enabled"):
self.set_vlm_config(vlm_config)
# 判断是否使用 VLM 直接解析
if self._should_use_vlm(file_name):
logger.info(f"Using VLM for {file_name}")
return self._parse_with_vlm(content, file_name)
# 使用 MarkItDown 解析
try:
ext = file_type
if not ext.startswith('.'):
ext = '.' + ext
result = self.markitdown.convert(
io.BytesIO(content),
file_extension=ext,
keep_data_uris=True
)
markdown_content = result.text_content or ""
# 如果有 VLM处理图片
if self.vlm_client and markdown_content:
markdown_content = self._process_images_with_vlm(markdown_content)
return Document(
content=markdown_content,
metadata=result.metadata if hasattr(result, 'metadata') else {}
)
except Exception as e:
logger.error(f"Parse error: {e}")
return Document(content="")
def parse_url(
self,
url: str,
title: str,
parser_engine: Optional[str] = None,
engine_overrides: Optional[dict[str, Any]] = None,
) -> Document:
"""解析 URL"""
logger.info(f"Parsing URL: {url}, title: {title}")
try:
result = self.markitdown.convert(url)
return Document(content=result.text_content or "")
except Exception as e:
logger.error(f"URL parse error: {e}")
return Document(content="")
# 导出
__all__ = ["Parser", "Document"]

View File

@@ -0,0 +1,15 @@
from docreader.parser.chain_parser import FirstParser
from docreader.parser.markitdown_parser import MarkitdownParser
class PDFParser(FirstParser):
"""PDF Parser using chain of responsibility pattern
Attempts to parse PDF files using multiple parser backends in order:
1. MinerUParser - Primary parser for PDF documents
2. MarkitdownParser - Fallback parser if MinerU fails
The first successful parser result will be returned.
"""
# Parser classes to try in order (chain of responsibility pattern)
_parser_cls = (MarkitdownParser,)

160
ai-core/parser/registry.py Normal file
View File

@@ -0,0 +1,160 @@
import logging
from typing import Any, Callable, Dict, List, Optional, Tuple, Type
from docreader.parser.base_parser import BaseParser
from docreader.parser.doc_parser import DocParser
from docreader.parser.docx2_parser import Docx2Parser
from docreader.parser.excel_parser import ExcelParser
from docreader.parser.image_parser import ImageParser
from docreader.parser.markdown_parser import MarkdownParser
from docreader.parser.markitdown_parser import MarkitdownParser
from docreader.parser.pdf_parser import PDFParser
logger = logging.getLogger(__name__)
BUILTIN_ENGINE = "builtin"
class ParserEngineRegistry:
"""Registry for parser engines.
Each engine maps file extensions to parser classes.
When a requested engine doesn't support a file type, the registry
falls back to the builtin engine automatically.
"""
def __init__(self):
self._engines: Dict[str, Dict[str, Type[BaseParser]]] = {}
self._descriptions: Dict[str, str] = {}
self._check_available: Dict[str, Callable[..., Tuple[bool, str]]] = {}
self._unavailable_hint: Dict[str, str] = {}
def register(
self,
name: str,
file_types: Dict[str, Type[BaseParser]],
description: str = "",
check_available: Callable[..., Tuple[bool, str]] | None = None,
unavailable_hint: str = "",
):
self._engines[name] = file_types
self._descriptions[name] = description
if check_available is not None:
self._check_available[name] = check_available
self._unavailable_hint[name] = unavailable_hint
logger.info(
"Registered parser engine '%s' with file types: %s",
name,
", ".join(file_types.keys()),
)
def get_parser_class(self, engine: str, file_type: str) -> Type[BaseParser]:
"""Resolve parser class for the given engine and file type.
Falls back to builtin engine when the requested engine doesn't
support the file type.
"""
ft = file_type.lower()
if engine and engine in self._engines:
cls = self._engines[engine].get(ft)
if cls:
logger.info("Using engine '%s' for file type '%s'", engine, ft)
return cls
logger.info(
"Engine '%s' does not support '%s', falling back to builtin",
engine,
ft,
)
builtin = self._engines.get(BUILTIN_ENGINE, {})
cls = builtin.get(ft)
if cls:
return cls
raise ValueError(f"Unsupported file type: {file_type}")
def list_engines(self, overrides: Optional[Dict[str, str]] = None) -> List[Dict]:
"""Return metadata for all registered engines, including availability.
Args:
overrides: tenant-level config overrides (e.g. mineru_endpoint, mineru_api_key)
forwarded to each engine's check_available function.
"""
result = []
for name, parsers in self._engines.items():
available = True
unavailable_reason = ""
check = self._check_available.get(name)
if check is not None:
try:
available, unavailable_reason = check(overrides)
except Exception as e:
available = False
unavailable_reason = str(e) or self._unavailable_hint.get(name, "")
if not available and not unavailable_reason:
unavailable_reason = self._unavailable_hint.get(name, "不可用")
result.append(
{
"name": name,
"description": self._descriptions.get(name, ""),
"file_types": sorted(parsers.keys()),
"available": available,
"unavailable_reason": unavailable_reason,
}
)
return result
def get_engine_names(self) -> List[str]:
return list(self._engines.keys())
def _build_default_registry() -> ParserEngineRegistry:
"""Create and populate the default registry with all known engines."""
reg = ParserEngineRegistry()
_image_types = {
ext: ImageParser for ext in ("jpg", "jpeg", "png", "gif", "bmp", "tiff", "webp")
}
reg.register(
BUILTIN_ENGINE,
{
"docx": Docx2Parser,
"doc": DocParser,
"pdf": PDFParser,
"md": MarkdownParser,
"markdown": MarkdownParser,
"xlsx": ExcelParser,
"xls": ExcelParser,
**_image_types,
},
description="内置解析引擎",
)
reg.register(
"markitdown",
{
"md": MarkitdownParser,
"markdown": MarkitdownParser,
"pdf": MarkitdownParser,
"docx": MarkitdownParser,
"doc": MarkitdownParser,
"pptx": MarkitdownParser,
"ppt": MarkitdownParser,
"xlsx": MarkitdownParser,
"xls": MarkitdownParser,
"csv": MarkitdownParser,
},
description="MarkItDown 解析引擎(微软 MarkItDown 库)",
)
# NOTE: Engine listing is managed by Go-side engine registry
# (docparser.ListAllEngines). The Python list_engines method is kept for
# backward compatibility with the gRPC ListEngines RPC but the Go app
# no longer calls it. MinerU engines are handled natively by Go.
return reg
registry = _build_default_registry()

322
ai-core/parser/storage.py Normal file
View File

@@ -0,0 +1,322 @@
# -*- coding: utf-8 -*-
import io
import logging
import os
import traceback
import uuid
from abc import ABC, abstractmethod
from typing import Dict, Optional
from minio import Minio
from qcloud_cos import CosConfig, CosS3Client
from docreader.utils import endecode
logger = logging.getLogger(__name__)
def _cfg(storage_config: Optional[Dict], key: str, *env_keys: str, default: str = "") -> str:
"""Read a value from storage_config dict, falling back to env vars."""
if storage_config:
v = storage_config.get(key, "")
if v:
return str(v)
for ek in env_keys:
v = os.environ.get(ek, "")
if v:
return v
return default
class Storage(ABC):
"""Abstract base class for object storage operations"""
@abstractmethod
def upload_file(self, file_path: str) -> str:
pass
@abstractmethod
def upload_bytes(self, content: bytes, file_ext: str = ".png") -> str:
pass
class CosStorage(Storage):
"""Tencent Cloud COS storage implementation"""
def __init__(self, storage_config: Optional[Dict] = None):
self.storage_config = storage_config
self.client, self.bucket_name, self.region, self.prefix = (
self._init_cos_client()
)
def _init_cos_client(self):
try:
sc = self.storage_config
secret_id = _cfg(sc, "access_key_id", "COS_SECRET_ID")
secret_key = _cfg(sc, "secret_access_key", "COS_SECRET_KEY")
region = _cfg(sc, "region", "COS_REGION")
bucket_name = _cfg(sc, "bucket_name", "COS_BUCKET_NAME")
appid = _cfg(sc, "app_id", "COS_APP_ID")
prefix = _cfg(sc, "path_prefix", "COS_PATH_PREFIX")
enable_old_domain = os.environ.get("COS_ENABLE_OLD_DOMAIN", "").lower() in ("1", "true", "yes")
if not all([secret_id, secret_key, region, bucket_name, appid]):
logger.error(
"Incomplete COS configuration: "
"secret_id=%s, region=%s, bucket=%s, appid=%s",
bool(secret_id), region, bucket_name, appid,
)
return None, None, None, None
logger.info("Initializing COS client: region=%s, bucket=%s", region, bucket_name)
config = CosConfig(
Appid=appid,
Region=region,
SecretId=secret_id,
SecretKey=secret_key,
EnableOldDomain=enable_old_domain,
)
client = CosS3Client(config)
return client, bucket_name, region, prefix
except Exception as e:
logger.error("Failed to initialize COS client: %s", e)
return None, None, None, None
def _get_download_url(self, bucket_name, region, object_key):
return f"https://{bucket_name}.cos.{region}.myqcloud.com/{object_key}"
def upload_file(self, file_path: str) -> str:
try:
if not self.client:
return ""
file_ext = os.path.splitext(file_path)[1]
object_key = f"{self.prefix}/images/{uuid.uuid4().hex}{file_ext}"
self.client.upload_file(
Bucket=self.bucket_name,
LocalFilePath=file_path,
Key=object_key,
)
file_url = self._get_download_url(self.bucket_name, self.region, object_key)
logger.info("COS upload_file ok: %s", file_url)
return file_url
except Exception as e:
logger.error("COS upload_file failed: %s", e)
return ""
def upload_bytes(self, content: bytes, file_ext: str = ".png") -> str:
try:
if not self.client:
return ""
object_key = (
f"{self.prefix}/images/{uuid.uuid4().hex}{file_ext}"
if self.prefix
else f"images/{uuid.uuid4().hex}{file_ext}"
)
self.client.put_object(
Bucket=self.bucket_name, Body=content, Key=object_key
)
file_url = self._get_download_url(self.bucket_name, self.region, object_key)
logger.info("COS upload_bytes ok: %s", file_url)
return file_url
except Exception as e:
logger.error("COS upload_bytes failed: %s", e)
traceback.print_exc()
return ""
class MinioStorage(Storage):
"""MinIO storage implementation"""
def __init__(self, storage_config: Optional[Dict] = None):
self.storage_config = storage_config
self.client, self.bucket_name, self.use_ssl, self.endpoint, self.path_prefix = (
self._init_minio_client()
)
def _init_minio_client(self):
try:
sc = self.storage_config
access_key = _cfg(sc, "access_key_id", "MINIO_ACCESS_KEY_ID")
secret_key = _cfg(sc, "secret_access_key", "MINIO_SECRET_ACCESS_KEY")
bucket_name = _cfg(sc, "bucket_name", "MINIO_BUCKET_NAME")
path_prefix_raw = _cfg(sc, "path_prefix", "MINIO_PATH_PREFIX")
path_prefix = path_prefix_raw.strip().strip("/") if path_prefix_raw else ""
endpoint = _cfg(sc, "endpoint", "MINIO_ENDPOINT")
use_ssl = os.environ.get("MINIO_USE_SSL", "").lower() in ("1", "true", "yes")
if not all([endpoint, access_key, secret_key, bucket_name]):
logger.error("Incomplete MinIO configuration")
return None, None, None, None, None
client = Minio(
endpoint, access_key=access_key, secret_key=secret_key, secure=use_ssl
)
found = client.bucket_exists(bucket_name)
if not found:
client.make_bucket(bucket_name)
policy = (
"{"
'"Version":"2012-10-17",'
'"Statement":['
'{"Effect":"Allow","Principal":{"AWS":["*"]},'
'"Action":["s3:GetBucketLocation","s3:ListBucket"],'
'"Resource":["arn:aws:s3:::%s"]},'
'{"Effect":"Allow","Principal":{"AWS":["*"]},'
'"Action":["s3:GetObject"],'
'"Resource":["arn:aws:s3:::%s/*"]}'
"]}" % (bucket_name, bucket_name)
)
client.set_bucket_policy(bucket_name, policy)
return client, bucket_name, use_ssl, endpoint, path_prefix
except Exception as e:
logger.error("Failed to initialize MinIO client: %s", e)
return None, None, None, None, None
def _get_download_url(self, object_key: str):
public_endpoint = os.environ.get("MINIO_PUBLIC_ENDPOINT", "")
if public_endpoint:
return f"{public_endpoint}/{self.bucket_name}/{object_key}"
scheme = "https" if self.use_ssl else "http"
return f"{scheme}://{self.endpoint}/{self.bucket_name}/{object_key}"
def upload_file(self, file_path: str) -> str:
try:
if not self.client:
return ""
file_name = os.path.basename(file_path)
object_key = (
f"{self.path_prefix}/images/{uuid.uuid4().hex}{os.path.splitext(file_name)[1]}"
if self.path_prefix
else f"images/{uuid.uuid4().hex}{os.path.splitext(file_name)[1]}"
)
with open(file_path, "rb") as file_data:
file_size = os.path.getsize(file_path)
self.client.put_object(
bucket_name=self.bucket_name or "",
object_name=object_key,
data=file_data,
length=file_size,
content_type="application/octet-stream",
)
file_url = self._get_download_url(object_key)
logger.info("MinIO upload_file ok: %s", file_url)
return file_url
except Exception as e:
logger.error("MinIO upload_file failed: %s", e)
return ""
def upload_bytes(self, content: bytes, file_ext: str = ".png") -> str:
try:
if not self.client:
return ""
object_key = (
f"{self.path_prefix}/images/{uuid.uuid4().hex}{file_ext}"
if self.path_prefix
else f"images/{uuid.uuid4().hex}{file_ext}"
)
self.client.put_object(
self.bucket_name or "",
object_key,
data=io.BytesIO(content),
length=len(content),
content_type="application/octet-stream",
)
file_url = self._get_download_url(object_key)
logger.info("MinIO upload_bytes ok: %s", file_url)
return file_url
except Exception as e:
logger.error("MinIO upload_bytes failed: %s", e)
traceback.print_exc()
return ""
class LocalStorage(Storage):
"""Local file system storage implementation.
Saves files under base_dir and returns web-accessible URL paths
(e.g. /files/images/uuid.jpg) so that the Go app can serve them.
"""
def __init__(self, storage_config: Optional[Dict] = None):
sc = storage_config or {}
self.base_dir = (
sc.get("base_dir")
or os.environ.get("LOCAL_STORAGE_BASE_DIR", "/data/files")
)
path_prefix = (sc.get("path_prefix") or "").strip().strip("/")
if path_prefix:
self.image_dir = os.path.join(self.base_dir, path_prefix, "images")
else:
self.image_dir = os.path.join(self.base_dir, "images")
self.url_prefix = (
sc.get("url_prefix")
or os.environ.get("LOCAL_STORAGE_URL_PREFIX", "/files")
)
os.makedirs(self.image_dir, exist_ok=True)
def _to_url(self, fpath: str) -> str:
if self.url_prefix:
rel = os.path.relpath(fpath, self.base_dir)
return f"{self.url_prefix}/{rel}"
return fpath
def upload_file(self, file_path: str) -> str:
return file_path
def upload_bytes(self, content: bytes, file_ext: str = ".png") -> str:
fpath = os.path.join(self.image_dir, f"{uuid.uuid4()}{file_ext}")
with open(fpath, "wb") as f:
f.write(content)
url = self._to_url(fpath)
logger.info("Local storage saved: %s -> %s", fpath, url)
return url
class Base64Storage(Storage):
def upload_file(self, file_path: str) -> str:
return file_path
def upload_bytes(self, content: bytes, file_ext: str = ".png") -> str:
file_ext = file_ext.lstrip(".")
return f"data:image/{file_ext};base64,{endecode.decode_image(content)}"
class DummyStorage(Storage):
"""Dummy storage — all uploads return empty string."""
def upload_file(self, file_path: str) -> str:
return ""
def upload_bytes(self, content: bytes, file_ext: str = ".png") -> str:
return ""
def create_storage(storage_config: Optional[Dict[str, str]] = None) -> Storage:
"""Create a storage instance based on storage_config dict.
The ``provider`` key in storage_config determines the backend:
minio, cos, local, base64.
Falls back to STORAGE_TYPE env var, then ``local``.
"""
storage_type = ""
if storage_config:
provider = str(storage_config.get("provider", "")).lower().strip()
if provider and provider not in ("unspecified", "storage_provider_unspecified"):
storage_type = provider
if not storage_type:
storage_type = os.environ.get("STORAGE_TYPE", "local").lower().strip()
logger.info("Creating %s storage instance", storage_type)
if storage_type == "minio":
return MinioStorage(storage_config)
elif storage_type == "cos":
return CosStorage(storage_config)
elif storage_type == "local":
return LocalStorage(storage_config)
elif storage_type == "base64":
return Base64Storage()
return DummyStorage()

View File

@@ -0,0 +1,141 @@
import asyncio
import logging
from playwright.async_api import async_playwright
from trafilatura import extract
from docreader.config import CONFIG
from docreader.models.document import Document
from docreader.parser.base_parser import BaseParser
from docreader.parser.chain_parser import PipelineParser
from docreader.parser.markdown_parser import MarkdownParser
from docreader.utils import endecode
logger = logging.getLogger(__name__)
class StdWebParser(BaseParser):
"""Standard web page parser using Playwright and Trafilatura.
This parser scrapes web pages using Playwright's WebKit browser and extracts
clean content using Trafilatura library. It supports proxy configuration and
converts HTML content to markdown format.
"""
def __init__(self, title: str, **kwargs):
"""Initialize the web parser.
Args:
title: Title of the web page to be used as file name
**kwargs: Additional arguments passed to BaseParser
"""
self.title = title
# Get proxy configuration from config if available
self.proxy = CONFIG.external_https_proxy
super().__init__(file_name=title, **kwargs)
logger.info(f"Initialized WebParser with title: {title}")
async def scrape(self, url: str) -> str:
"""Scrape web page content using Playwright.
Args:
url: The URL of the web page to scrape
Returns:
HTML content of the web page as string, empty string on error
"""
logger.info(f"Starting web page scraping for URL: {url}")
try:
async with async_playwright() as p:
kwargs = {}
# Configure proxy if available
if self.proxy:
kwargs["proxy"] = {"server": self.proxy}
logger.info("Launching WebKit browser")
browser = await p.webkit.launch(**kwargs)
page = await browser.new_page()
logger.info(f"Navigating to URL: {url}")
try:
# Navigate to URL with 30 second timeout
await page.goto(url, timeout=30000)
logger.info("Initial page load complete")
except Exception as e:
logger.error(f"Error navigating to URL: {str(e)}")
await browser.close()
return ""
logger.info("Retrieving page HTML content")
# Get the full HTML content of the page
content = await page.content()
logger.info(f"Retrieved {len(content)} bytes of HTML content")
await browser.close()
logger.info("Browser closed")
# Return raw HTML content for further processing
logger.info("Successfully retrieved HTML content")
return content
except Exception as e:
logger.error(f"Failed to scrape web page: {str(e)}")
# Return empty string on error
return ""
def parse_into_text(self, content: bytes) -> Document:
"""Parse web page content into a Document object.
Args:
content: URL encoded as bytes
Returns:
Document object containing the parsed markdown content
"""
# Decode bytes to get the URL string
url = endecode.decode_bytes(content)
logger.info(f"Scraping web page: {url}")
# Run async scraping in sync context
chtml = asyncio.run(self.scrape(url))
# Extract clean content from HTML using Trafilatura
# Convert to markdown format with metadata, images, tables, and links
md_text = extract(
chtml,
output_format="markdown",
with_metadata=True,
include_images=True,
include_tables=True,
include_links=True,
)
if not md_text:
logger.error("Failed to parse web page")
return Document(content=f"Error parsing web page: {url}")
return Document(content=md_text)
class WebParser(PipelineParser):
"""Web parser using pipeline pattern.
This parser chains StdWebParser (for web scraping and HTML to markdown conversion)
with MarkdownParser (for markdown processing). The pipeline processes content
sequentially through both parsers.
"""
# Parser classes to be executed in sequence
_parser_cls = (StdWebParser, MarkdownParser)
if __name__ == "__main__":
# Configure logging for debugging
logging.basicConfig(level=logging.DEBUG)
logger.setLevel(logging.DEBUG)
# Example URL to scrape
url = "https://cloud.tencent.com/document/product/457/6759"
# Create parser instance and parse the web page
parser = WebParser(title="")
cc = parser.parse_into_text(url.encode())
# Save the parsed markdown content to file
with open("./tencent.md", "w") as f:
f.write(cc.content)

16
ai-core/requirements.txt Normal file
View File

@@ -0,0 +1,16 @@
# AI-Core Document Parser
# gRPC 框架
grpcio>=1.60.0
grpcio-tools>=1.60.0
grpcio-reflection>=1.60.0
protobuf>=4.25.0
# HTTP 请求
requests>=2.31.0
# 配置文件解析
pyyaml>=6.0
# 文档解析
markitdown[pdf,docx,pptx,xlsx,all]>=0.0.1

View File

@@ -0,0 +1,208 @@
"""
gRPC Server for Document Parser
"""
import logging
import requests
from concurrent import futures
import grpc
from grpc_reflection.v1alpha import reflection
import sys
import os
import io
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "proto"))
from parser import Parser
logger = logging.getLogger(__name__)
# 导入 proto 生成的文件
try:
import document_parser_pb2
import document_parser_pb2_grpc
PROTO_AVAILABLE = True
except ImportError:
logger.warning("Proto files not found, please run: python generate_grpc.py")
PROTO_AVAILABLE = False
class DocumentParserServicer:
"""gRPC 服务实现"""
def __init__(self, max_workers: int = 10):
self.parser = Parser()
self.max_workers = max_workers
logger.info("DocumentParserServicer initialized")
def ParseDocument(self, request, context):
"""解析文档"""
if not PROTO_AVAILABLE:
return {"success": False, "message": "Proto not available"}
try:
logger.info(
"ParseDocument request: file_url=%s, file_name=%s",
request.file_url,
request.file_name,
)
file_url = request.file_url
file_name = request.file_name
if not file_url:
return document_parser_pb2.ParseResponse(
success=False,
content="",
message="file_url is required",
content_length=0,
)
if not file_name:
return document_parser_pb2.ParseResponse(
success=False,
content="",
message="file_name is required",
content_length=0,
)
# 提取 VLM 配置
vlm_config = None
if hasattr(request, 'vlm_config') and request.vlm_config:
vlm_cfg = request.vlm_config
if vlm_cfg.enabled:
vlm_config = {
"enabled": vlm_cfg.enabled,
"provider": vlm_cfg.provider,
"model": vlm_cfg.model,
"api_key": vlm_cfg.api_key,
"base_url": vlm_cfg.base_url,
"prompt": vlm_cfg.prompt,
}
logger.info(f"VLM config: provider={vlm_cfg.provider}, model={vlm_cfg.model}")
# 下载文件
logger.info("Downloading file from URL: %s", file_url)
try:
response = requests.get(
file_url,
timeout=60,
headers={"User-Agent": "DocParser/1.0"},
)
response.raise_for_status()
content = response.content
logger.info("Downloaded %d bytes", len(content))
except requests.RequestException as e:
logger.error("Failed to download file: %s", str(e))
return document_parser_pb2.ParseResponse(
success=False,
content="",
message=f"Failed to download file: {str(e)}",
content_length=0,
)
# 解析
logger.info("Parsing file")
file_type = os.path.splitext(file_name)[1][1:] # 去掉点的扩展名
result = self.parser.parse_file(
file_name=file_name,
file_type=file_type,
content=content,
vlm_config=vlm_config,
)
if not result.content:
return document_parser_pb2.ParseResponse(
success=False,
content="",
message="Parse failed or empty content",
content_length=0,
)
markdown_content = result.content
logger.info("Parse successful: content_length=%d", len(markdown_content))
return document_parser_pb2.ParseResponse(
success=True,
content=markdown_content,
message="Parse successful",
content_length=len(markdown_content),
file_type=file_type or "auto",
parser_engine="markitdown",
)
except Exception as e:
logger.error("ParseDocument error: %s", str(e), exc_info=True)
return document_parser_pb2.ParseResponse(
success=False,
content="",
message=f"Parse error: {str(e)}",
content_length=0,
)
def GetSupportedFormats(self, request, context):
"""获取支持的格式"""
if not PROTO_AVAILABLE:
return None
try:
file_types = [
"pdf", "docx", "doc", "pptx", "ppt",
"xlsx", "xls", "csv",
"md", "markdown",
"jpg", "jpeg", "png", "gif", "bmp", "tiff", "webp",
"html", "htm", "txt",
]
return document_parser_pb2.SupportedFormatsResponse(
file_types=file_types,
)
except Exception as e:
logger.error("GetSupportedFormats error: %s", str(e))
return None
def GetEngines(self, request, context):
"""获取解析引擎"""
if not PROTO_AVAILABLE:
return None
try:
engines = [
document_parser_pb2.EngineInfo(
name="markitdown",
description="MarkItDown parser - supports various document formats",
supported_file_types=["pdf", "docx", "pptx", "xlsx", "md", "html", "txt"],
available=True,
)
]
return document_parser_pb2.EnginesResponse(engines=engines)
except Exception as e:
logger.error("GetEngines error: %s", str(e))
return None
def serve(port: int = 50051, max_workers: int = 10):
"""启动 gRPC 服务"""
if not PROTO_AVAILABLE:
logger.error("Proto files not available, cannot start server")
return
server = grpc.server(futures.ThreadPoolExecutor(max_workers=max_workers))
servicer = DocumentParserServicer(max_workers=max_workers)
# 注册服务
document_parser_pb2_grpc.add_DocumentParserServicer_to_server(
servicer, server
)
# 启用反射
reflection.enable_server_reflection(
[document_parser_pb2.DESCRIPTOR.services_by_name['DocumentParser']],
server
)
server.add_insecure_port(f"0.0.0.0:{port}")
server.start()
logger.info(f"DocumentParser gRPC server started on port {port}")
logger.info("gRPC reflection enabled")
server.wait_for_termination()

BIN
screenshots/agent管理.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 268 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 62 KiB

View File

@@ -28,6 +28,8 @@ type ParseRequest struct {
FileType string `protobuf:"bytes,3,opt,name=file_type,json=fileType,proto3" json:"file_type,omitempty"`
ParserEngine string `protobuf:"bytes,4,opt,name=parser_engine,json=parserEngine,proto3" json:"parser_engine,omitempty"`
EngineOverrides map[string]string `protobuf:"bytes,5,rep,name=engine_overrides,json=engineOverrides,proto3" json:"engine_overrides,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"`
// VLM 配置(可选)
VlmConfig *VLMConfig `protobuf:"bytes,6,opt,name=vlm_config,json=vlmConfig,proto3" json:"vlm_config,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
@@ -97,6 +99,97 @@ func (x *ParseRequest) GetEngineOverrides() map[string]string {
return nil
}
func (x *ParseRequest) GetVlmConfig() *VLMConfig {
if x != nil {
return x.VlmConfig
}
return nil
}
type VLMConfig struct {
state protoimpl.MessageState `protogen:"open.v1"`
Enabled bool `protobuf:"varint,1,opt,name=enabled,proto3" json:"enabled,omitempty"` // 是否启用 VLM
Provider string `protobuf:"bytes,2,opt,name=provider,proto3" json:"provider,omitempty"` // VLM 提供商: openai, anthropic, local 等
Model string `protobuf:"bytes,3,opt,name=model,proto3" json:"model,omitempty"` // 模型名称
ApiKey string `protobuf:"bytes,4,opt,name=api_key,json=apiKey,proto3" json:"api_key,omitempty"` // API Key
BaseUrl string `protobuf:"bytes,5,opt,name=base_url,json=baseUrl,proto3" json:"base_url,omitempty"` // 自定义 API 地址
Prompt string `protobuf:"bytes,6,opt,name=prompt,proto3" json:"prompt,omitempty"` // 自定义提示词
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *VLMConfig) Reset() {
*x = VLMConfig{}
mi := &file_document_parser_proto_msgTypes[1]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *VLMConfig) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*VLMConfig) ProtoMessage() {}
func (x *VLMConfig) ProtoReflect() protoreflect.Message {
mi := &file_document_parser_proto_msgTypes[1]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use VLMConfig.ProtoReflect.Descriptor instead.
func (*VLMConfig) Descriptor() ([]byte, []int) {
return file_document_parser_proto_rawDescGZIP(), []int{1}
}
func (x *VLMConfig) GetEnabled() bool {
if x != nil {
return x.Enabled
}
return false
}
func (x *VLMConfig) GetProvider() string {
if x != nil {
return x.Provider
}
return ""
}
func (x *VLMConfig) GetModel() string {
if x != nil {
return x.Model
}
return ""
}
func (x *VLMConfig) GetApiKey() string {
if x != nil {
return x.ApiKey
}
return ""
}
func (x *VLMConfig) GetBaseUrl() string {
if x != nil {
return x.BaseUrl
}
return ""
}
func (x *VLMConfig) GetPrompt() string {
if x != nil {
return x.Prompt
}
return ""
}
type ParseResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
Success bool `protobuf:"varint,1,opt,name=success,proto3" json:"success,omitempty"`
@@ -111,7 +204,7 @@ type ParseResponse struct {
func (x *ParseResponse) Reset() {
*x = ParseResponse{}
mi := &file_document_parser_proto_msgTypes[1]
mi := &file_document_parser_proto_msgTypes[2]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
@@ -123,7 +216,7 @@ func (x *ParseResponse) String() string {
func (*ParseResponse) ProtoMessage() {}
func (x *ParseResponse) ProtoReflect() protoreflect.Message {
mi := &file_document_parser_proto_msgTypes[1]
mi := &file_document_parser_proto_msgTypes[2]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
@@ -136,7 +229,7 @@ func (x *ParseResponse) ProtoReflect() protoreflect.Message {
// Deprecated: Use ParseResponse.ProtoReflect.Descriptor instead.
func (*ParseResponse) Descriptor() ([]byte, []int) {
return file_document_parser_proto_rawDescGZIP(), []int{1}
return file_document_parser_proto_rawDescGZIP(), []int{2}
}
func (x *ParseResponse) GetSuccess() bool {
@@ -189,7 +282,7 @@ type Empty struct {
func (x *Empty) Reset() {
*x = Empty{}
mi := &file_document_parser_proto_msgTypes[2]
mi := &file_document_parser_proto_msgTypes[3]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
@@ -201,7 +294,7 @@ func (x *Empty) String() string {
func (*Empty) ProtoMessage() {}
func (x *Empty) ProtoReflect() protoreflect.Message {
mi := &file_document_parser_proto_msgTypes[2]
mi := &file_document_parser_proto_msgTypes[3]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
@@ -214,7 +307,7 @@ func (x *Empty) ProtoReflect() protoreflect.Message {
// Deprecated: Use Empty.ProtoReflect.Descriptor instead.
func (*Empty) Descriptor() ([]byte, []int) {
return file_document_parser_proto_rawDescGZIP(), []int{2}
return file_document_parser_proto_rawDescGZIP(), []int{3}
}
type SupportedFormatsResponse struct {
@@ -227,7 +320,7 @@ type SupportedFormatsResponse struct {
func (x *SupportedFormatsResponse) Reset() {
*x = SupportedFormatsResponse{}
mi := &file_document_parser_proto_msgTypes[3]
mi := &file_document_parser_proto_msgTypes[4]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
@@ -239,7 +332,7 @@ func (x *SupportedFormatsResponse) String() string {
func (*SupportedFormatsResponse) ProtoMessage() {}
func (x *SupportedFormatsResponse) ProtoReflect() protoreflect.Message {
mi := &file_document_parser_proto_msgTypes[3]
mi := &file_document_parser_proto_msgTypes[4]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
@@ -252,7 +345,7 @@ func (x *SupportedFormatsResponse) ProtoReflect() protoreflect.Message {
// Deprecated: Use SupportedFormatsResponse.ProtoReflect.Descriptor instead.
func (*SupportedFormatsResponse) Descriptor() ([]byte, []int) {
return file_document_parser_proto_rawDescGZIP(), []int{3}
return file_document_parser_proto_rawDescGZIP(), []int{4}
}
func (x *SupportedFormatsResponse) GetFileTypes() []string {
@@ -278,7 +371,7 @@ type EnginesResponse struct {
func (x *EnginesResponse) Reset() {
*x = EnginesResponse{}
mi := &file_document_parser_proto_msgTypes[4]
mi := &file_document_parser_proto_msgTypes[5]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
@@ -290,7 +383,7 @@ func (x *EnginesResponse) String() string {
func (*EnginesResponse) ProtoMessage() {}
func (x *EnginesResponse) ProtoReflect() protoreflect.Message {
mi := &file_document_parser_proto_msgTypes[4]
mi := &file_document_parser_proto_msgTypes[5]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
@@ -303,7 +396,7 @@ func (x *EnginesResponse) ProtoReflect() protoreflect.Message {
// Deprecated: Use EnginesResponse.ProtoReflect.Descriptor instead.
func (*EnginesResponse) Descriptor() ([]byte, []int) {
return file_document_parser_proto_rawDescGZIP(), []int{4}
return file_document_parser_proto_rawDescGZIP(), []int{5}
}
func (x *EnginesResponse) GetEngines() []*EngineInfo {
@@ -326,7 +419,7 @@ type EngineInfo struct {
func (x *EngineInfo) Reset() {
*x = EngineInfo{}
mi := &file_document_parser_proto_msgTypes[5]
mi := &file_document_parser_proto_msgTypes[6]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
@@ -338,7 +431,7 @@ func (x *EngineInfo) String() string {
func (*EngineInfo) ProtoMessage() {}
func (x *EngineInfo) ProtoReflect() protoreflect.Message {
mi := &file_document_parser_proto_msgTypes[5]
mi := &file_document_parser_proto_msgTypes[6]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
@@ -351,7 +444,7 @@ func (x *EngineInfo) ProtoReflect() protoreflect.Message {
// Deprecated: Use EngineInfo.ProtoReflect.Descriptor instead.
func (*EngineInfo) Descriptor() ([]byte, []int) {
return file_document_parser_proto_rawDescGZIP(), []int{5}
return file_document_parser_proto_rawDescGZIP(), []int{6}
}
func (x *EngineInfo) GetName() string {
@@ -393,16 +486,25 @@ var File_document_parser_proto protoreflect.FileDescriptor
const file_document_parser_proto_rawDesc = "" +
"\n" +
"\x15document_parser.proto\x12\tdocparser\"\xa5\x02\n" +
"\x15document_parser.proto\x12\tdocparser\"\xda\x02\n" +
"\fParseRequest\x12\x19\n" +
"\bfile_url\x18\x01 \x01(\tR\afileUrl\x12\x1b\n" +
"\tfile_name\x18\x02 \x01(\tR\bfileName\x12\x1b\n" +
"\tfile_type\x18\x03 \x01(\tR\bfileType\x12#\n" +
"\rparser_engine\x18\x04 \x01(\tR\fparserEngine\x12W\n" +
"\x10engine_overrides\x18\x05 \x03(\v2,.docparser.ParseRequest.EngineOverridesEntryR\x0fengineOverrides\x1aB\n" +
"\x10engine_overrides\x18\x05 \x03(\v2,.docparser.ParseRequest.EngineOverridesEntryR\x0fengineOverrides\x123\n" +
"\n" +
"vlm_config\x18\x06 \x01(\v2\x14.docparser.VLMConfigR\tvlmConfig\x1aB\n" +
"\x14EngineOverridesEntry\x12\x10\n" +
"\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n" +
"\x05value\x18\x02 \x01(\tR\x05value:\x028\x01\"\xc6\x01\n" +
"\x05value\x18\x02 \x01(\tR\x05value:\x028\x01\"\xa3\x01\n" +
"\tVLMConfig\x12\x18\n" +
"\aenabled\x18\x01 \x01(\bR\aenabled\x12\x1a\n" +
"\bprovider\x18\x02 \x01(\tR\bprovider\x12\x14\n" +
"\x05model\x18\x03 \x01(\tR\x05model\x12\x17\n" +
"\aapi_key\x18\x04 \x01(\tR\x06apiKey\x12\x19\n" +
"\bbase_url\x18\x05 \x01(\tR\abaseUrl\x12\x16\n" +
"\x06prompt\x18\x06 \x01(\tR\x06prompt\"\xc6\x01\n" +
"\rParseResponse\x12\x18\n" +
"\asuccess\x18\x01 \x01(\bR\asuccess\x12\x18\n" +
"\acontent\x18\x02 \x01(\tR\acontent\x12\x18\n" +
@@ -445,32 +547,34 @@ func file_document_parser_proto_rawDescGZIP() []byte {
return file_document_parser_proto_rawDescData
}
var file_document_parser_proto_msgTypes = make([]protoimpl.MessageInfo, 8)
var file_document_parser_proto_msgTypes = make([]protoimpl.MessageInfo, 9)
var file_document_parser_proto_goTypes = []any{
(*ParseRequest)(nil), // 0: docparser.ParseRequest
(*ParseResponse)(nil), // 1: docparser.ParseResponse
(*Empty)(nil), // 2: docparser.Empty
(*SupportedFormatsResponse)(nil), // 3: docparser.SupportedFormatsResponse
(*EnginesResponse)(nil), // 4: docparser.EnginesResponse
(*EngineInfo)(nil), // 5: docparser.EngineInfo
nil, // 6: docparser.ParseRequest.EngineOverridesEntry
nil, // 7: docparser.SupportedFormatsResponse.FileTypeDescriptionsEntry
(*VLMConfig)(nil), // 1: docparser.VLMConfig
(*ParseResponse)(nil), // 2: docparser.ParseResponse
(*Empty)(nil), // 3: docparser.Empty
(*SupportedFormatsResponse)(nil), // 4: docparser.SupportedFormatsResponse
(*EnginesResponse)(nil), // 5: docparser.EnginesResponse
(*EngineInfo)(nil), // 6: docparser.EngineInfo
nil, // 7: docparser.ParseRequest.EngineOverridesEntry
nil, // 8: docparser.SupportedFormatsResponse.FileTypeDescriptionsEntry
}
var file_document_parser_proto_depIdxs = []int32{
6, // 0: docparser.ParseRequest.engine_overrides:type_name -> docparser.ParseRequest.EngineOverridesEntry
7, // 1: docparser.SupportedFormatsResponse.file_type_descriptions:type_name -> docparser.SupportedFormatsResponse.FileTypeDescriptionsEntry
5, // 2: docparser.EnginesResponse.engines:type_name -> docparser.EngineInfo
0, // 3: docparser.DocumentParser.ParseDocument:input_type -> docparser.ParseRequest
2, // 4: docparser.DocumentParser.GetSupportedFormats:input_type -> docparser.Empty
2, // 5: docparser.DocumentParser.GetEngines:input_type -> docparser.Empty
1, // 6: docparser.DocumentParser.ParseDocument:output_type -> docparser.ParseResponse
3, // 7: docparser.DocumentParser.GetSupportedFormats:output_type -> docparser.SupportedFormatsResponse
4, // 8: docparser.DocumentParser.GetEngines:output_type -> docparser.EnginesResponse
6, // [6:9] is the sub-list for method output_type
3, // [3:6] is the sub-list for method input_type
3, // [3:3] is the sub-list for extension type_name
3, // [3:3] is the sub-list for extension extendee
0, // [0:3] is the sub-list for field type_name
7, // 0: docparser.ParseRequest.engine_overrides:type_name -> docparser.ParseRequest.EngineOverridesEntry
1, // 1: docparser.ParseRequest.vlm_config:type_name -> docparser.VLMConfig
8, // 2: docparser.SupportedFormatsResponse.file_type_descriptions:type_name -> docparser.SupportedFormatsResponse.FileTypeDescriptionsEntry
6, // 3: docparser.EnginesResponse.engines:type_name -> docparser.EngineInfo
0, // 4: docparser.DocumentParser.ParseDocument:input_type -> docparser.ParseRequest
3, // 5: docparser.DocumentParser.GetSupportedFormats:input_type -> docparser.Empty
3, // 6: docparser.DocumentParser.GetEngines:input_type -> docparser.Empty
2, // 7: docparser.DocumentParser.ParseDocument:output_type -> docparser.ParseResponse
4, // 8: docparser.DocumentParser.GetSupportedFormats:output_type -> docparser.SupportedFormatsResponse
5, // 9: docparser.DocumentParser.GetEngines:output_type -> docparser.EnginesResponse
7, // [7:10] is the sub-list for method output_type
4, // [4:7] is the sub-list for method input_type
4, // [4:4] is the sub-list for extension type_name
4, // [4:4] is the sub-list for extension extendee
0, // [0:4] is the sub-list for field type_name
}
func init() { file_document_parser_proto_init() }
@@ -484,7 +588,7 @@ func file_document_parser_proto_init() {
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
RawDescriptor: unsafe.Slice(unsafe.StringData(file_document_parser_proto_rawDesc), len(file_document_parser_proto_rawDesc)),
NumEnums: 0,
NumMessages: 8,
NumMessages: 9,
NumExtensions: 0,
NumServices: 1,
},

View File

@@ -13,6 +13,14 @@ type ParsingConfig struct {
DoclingURL string `json:"docling_url"` // Docling 服务 URL
EnablePDF bool `json:"enable_pdf"` // 是否启用 PDF 解析
Pandoc bool `json:"pandoc"` // 是否启用 Pandoc
// VLM 配置(用于图片 OCR 等)
VLMEnabled bool `json:"vlm_enabled"` // 是否启用 VLM
VLMProvider string `json:"vlm_provider"` // VLM 提供商: openai, anthropic, local 等
VLMModel string `json:"vlm_model"` // 模型名称
VLMAPIKey string `json:"vlm_api_key"` // API Key
VLMBaseURL string `json:"vlm_base_url"` // 自定义 API 地址
VLMPrompt string `json:"vlm_prompt"` // 自定义提示词
}
// Scan 实现 sql.Scanner 接口

View File

@@ -27,6 +27,16 @@ type ParseResult struct {
ParserEngine string
}
// VLMConfig VLM 模型配置
type VLMConfig struct {
Enabled bool
Provider string // openai, anthropic, local 等
Model string
APIKey string
BaseURL string
Prompt string
}
// NewAICoreClient 创建 AI-Core 客户端
func NewAICoreClient(address string) (*AICoreClient, error) {
return &AICoreClient{address: address}, nil
@@ -56,7 +66,8 @@ func (c *AICoreClient) Close() {
}
// ParseDocument 解析文档 - 使用生成的 protobuf 代码
func (c *AICoreClient) ParseDocument(fileURL, fileName, fileType string) (*ParseResult, error) {
// vlmConfig 可选,如果不使用 VLM 传 nil
func (c *AICoreClient) ParseDocument(fileURL, fileName, fileType string, vlmConfig *VLMConfig) (*ParseResult, error) {
if c.conn == nil {
if err := c.Connect(); err != nil {
return nil, err
@@ -72,6 +83,18 @@ func (c *AICoreClient) ParseDocument(fileURL, fileName, fileType string) (*Parse
FileType: fileType,
}
// 如果提供了 VLM 配置,添加到请求中
if vlmConfig != nil {
req.VlmConfig = &docparser.VLMConfig{
Enabled: vlmConfig.Enabled,
Provider: vlmConfig.Provider,
Model: vlmConfig.Model,
ApiKey: vlmConfig.APIKey,
BaseUrl: vlmConfig.BaseURL,
Prompt: vlmConfig.Prompt,
}
}
ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
defer cancel()

View File

@@ -3,6 +3,7 @@ package service
import (
"bytes"
"encoding/json"
"io"
"log"
"mime/multipart"
"net/http"
@@ -19,8 +20,15 @@ import (
var knowledgeDebugLog *log.Logger
func init() {
debugFile, _ := os.OpenFile("logs/debug.log", os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
// 确保 logs 目录存在
os.MkdirAll("logs", 0755)
debugFile, err := os.OpenFile("logs/debug.log", os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
if err != nil {
// 如果文件打开失败,使用 discard 避免输出到控制台
knowledgeDebugLog = log.New(io.Discard, "", log.Ldate|log.Ltime)
} else {
knowledgeDebugLog = log.New(debugFile, "", log.Ldate|log.Ltime)
}
}
type KnowledgeService struct {
@@ -133,10 +141,36 @@ func (s *KnowledgeService) Update(id string, req model.UpdateKnowledgeRequest) e
// Delete 删除知识库
func (s *KnowledgeService) Delete(id string) error {
// 先删除关联的文档
// 获取知识库信息
kb, err := s.repo.FindByID(id)
if err != nil {
return err
}
// 获取知识库下所有文档
docs, err := s.repo.FindDocumentsByKBID(id, "")
if err != nil {
return err
}
// 删除每个文档的 MinIO 文件和本地 Markdown 文件
for _, doc := range docs {
// 删除 MinIO 文件
if doc.FileKey != "" && kb.StorageConfig.Type == "minio" {
s.uploadService.DeleteFileWithConfig(doc.FileKey, kb.StorageConfig)
}
// 删除本地 Markdown 文件
if s.markdownLocalPath != "" {
markdownPath := s.markdownLocalPath + "/" + doc.ID + ".md"
os.Remove(markdownPath)
}
}
// 删除关联的文档(数据库记录)
if err := s.repo.DeleteDocumentsByKBID(id); err != nil {
return err
}
return s.repo.Delete(id)
}
@@ -233,7 +267,7 @@ func (s *KnowledgeService) UploadDocument(kbID string, file *multipart.FileHeade
go s.parseDocument(kbID, doc.ID, result.URL, kb.ParsingConfig)
// 异步调用 AI-Core gRPC 服务解析文档(获取 Markdown
go s.parseDocumentWithAICore(doc.ID, result.URL, doc.Name)
go s.parseDocumentWithAICore(doc.ID, result.URL, doc.Name, kb.ParsingConfig)
return doc, result.URL, nil
}
@@ -293,7 +327,7 @@ func (s *KnowledgeService) parseDocument(kbID, docID, fileURL string, config mod
}
// parseDocumentWithAICore 调用 AI-Core gRPC 服务解析文档
func (s *KnowledgeService) parseDocumentWithAICore(docID, fileURL, fileName string) {
func (s *KnowledgeService) parseDocumentWithAICore(docID, fileURL, fileName string, config model.ParsingConfig) {
if s.aiCoreClient == nil {
knowledgeDebugLog.Printf("[AICore] AI-Core 客户端未初始化")
return
@@ -301,7 +335,21 @@ func (s *KnowledgeService) parseDocumentWithAICore(docID, fileURL, fileName stri
knowledgeDebugLog.Printf("[AICore] 开始解析文档: docID=%s, fileURL=%s, fileName=%s", docID, fileURL, fileName)
result, err := s.aiCoreClient.ParseDocument(fileURL, fileName, "")
// 构建 VLM 配置
var vlmConfig *VLMConfig
if config.VLMEnabled {
vlmConfig = &VLMConfig{
Enabled: config.VLMEnabled,
Provider: config.VLMProvider,
Model: config.VLMModel,
APIKey: config.VLMAPIKey,
BaseURL: config.VLMBaseURL,
Prompt: config.VLMPrompt,
}
knowledgeDebugLog.Printf("[AICore] VLM 配置: provider=%s, model=%s, enabled=%v", config.VLMProvider, config.VLMModel, config.VLMEnabled)
}
result, err := s.aiCoreClient.ParseDocument(fileURL, fileName, "", vlmConfig)
if err != nil {
knowledgeDebugLog.Printf("[AICore] 解析失败: docID=%s, err=%v", docID, err)
return
@@ -462,7 +510,7 @@ func (s *KnowledgeService) GetDocumentPreview(kbID, docID string, page int) (*mo
// Office文件调用解析服务转换为HTML
if isOffice && s.aiCoreClient != nil {
knowledgeDebugLog.Printf("[Preview] Parsing office file: %s, URL: %s", fileName, fileURL)
result, err := s.aiCoreClient.ParseDocument(fileURL, fileName, "")
result, err := s.aiCoreClient.ParseDocument(fileURL, fileName, "", nil) // Preview 不使用 VLM
if err != nil {
// 解析失败返回文件URL
knowledgeDebugLog.Printf("[Preview] Parse document failed: %v", err)

File diff suppressed because one or more lines are too long

View File

@@ -1,5 +0,0 @@
sk-5706307e3e3a4eb09452dbf0bb87fe31
https://dashscope.aliyuncs.com/compatible-mode/v1
qwen3.5-flash

View File

@@ -1,5 +0,0 @@
sk-5706307e3e3a4eb09452dbf0bb87fe31
https://dashscope.aliyuncs.com/compatible-mode/v1
qwen3.5-flash

View File

@@ -1,4 +0,0 @@
## students
| 班级 | 姓名 | 年龄 | 性别 |
| --- | --- | --- | --- |
| 1 | 曹 | 123 | 男 |

View File

@@ -1,13 +0,0 @@
# 1 . 你好么?
### 表哥啊啊
大叔大婶打扫的暗示打扫暗示
> 太好了
```python
print("hello world")
```

View File

@@ -1,13 +0,0 @@
# 1 . 你好么?
### 表哥啊啊
大叔大婶打扫的暗示打扫暗示
> 太好了
```python
print("hello world")
```

View File

@@ -1,13 +0,0 @@
# 1 . 你好么?
### 表哥啊啊
大叔大婶打扫的暗示打扫暗示
> 太好了
```python
print("hello world")
```

View File

@@ -1,13 +0,0 @@
# 1 . 你好么?
### 表哥啊啊
大叔大婶打扫的暗示打扫暗示
> 太好了
```python
print("hello world")
```

View File

@@ -1,13 +0,0 @@
# 1 . 你好么?
### 表哥啊啊
大叔大婶打扫的暗示打扫暗示
> 太好了
```python
print("hello world")
```

View File

@@ -1,5 +0,0 @@
sk-5706307e3e3a4eb09452dbf0bb87fe31
https://dashscope.aliyuncs.com/compatible-mode/v1
qwen3.5-flash

View File

@@ -1,3 +0,0 @@
| 班级 | 姓名 | 年龄 | 性别 |
| --- | --- | --- | --- |
| 1 | 曹 | 123 | 男 |

View File

@@ -1,5 +0,0 @@
sk-5706307e3e3a4eb09452dbf0bb87fe31
https://dashscope.aliyuncs.com/compatible-mode/v1
qwen3.5-flash

View File

@@ -1,13 +0,0 @@
# 1 . 你好么?
### 表哥啊啊
大叔大婶打扫的暗示打扫暗示
> 太好了
```python
print("hello world")
```

View File

@@ -1,3 +0,0 @@
| 겯섬 | 檎츰 | 쾨쥑 | 昑깎 |
| --- | --- | --- | --- |
| 1 | 꿀 | 123 | 켕 |

View File

@@ -1,5 +0,0 @@
sk-5706307e3e3a4eb09452dbf0bb87fe31
https://dashscope.aliyuncs.com/compatible-mode/v1
qwen3.5-flash

29
web/package-lock.json generated
View File

@@ -20,6 +20,7 @@
"vue-router": "^4.3.0"
},
"devDependencies": {
"@types/papaparse": "^5.5.2",
"@vitejs/plugin-vue": "^5.0.4",
"autoprefixer": "^10.4.19",
"postcss": "^8.4.38",
@@ -982,6 +983,27 @@
"@types/lodash": "*"
}
},
"node_modules/@types/node": {
"version": "25.3.5",
"resolved": "https://registry.npmmirror.com/@types/node/-/node-25.3.5.tgz",
"integrity": "sha512-oX8xrhvpiyRCQkG1MFchB09f+cXftgIXb3a7UUa4Y3wpmZPw5tyZGTLWhlESOLq1Rq6oDlc8npVU2/9xiCuXMA==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"undici-types": "~7.18.0"
}
},
"node_modules/@types/papaparse": {
"version": "5.5.2",
"resolved": "https://registry.npmmirror.com/@types/papaparse/-/papaparse-5.5.2.tgz",
"integrity": "sha512-gFnFp/JMzLHCwRf7tQHrNnfhN4eYBVYYI897CGX4MY1tzY9l2aLkVyx2IlKZ/SAqDbB3I1AOZW5gTMGGsqWliA==",
"dev": true,
"license": "MIT",
"dependencies": {
"@types/node": "*"
}
},
"node_modules/@types/web-bluetooth": {
"version": "0.0.20",
"resolved": "https://registry.npmmirror.com/@types/web-bluetooth/-/web-bluetooth-0.0.20.tgz",
@@ -2624,6 +2646,13 @@
"node": ">=14.17"
}
},
"node_modules/undici-types": {
"version": "7.18.2",
"resolved": "https://registry.npmmirror.com/undici-types/-/undici-types-7.18.2.tgz",
"integrity": "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w==",
"dev": true,
"license": "MIT"
},
"node_modules/update-browserslist-db": {
"version": "1.2.3",
"resolved": "https://registry.npmmirror.com/update-browserslist-db/-/update-browserslist-db-1.2.3.tgz",

View File

@@ -21,6 +21,7 @@
"vue-router": "^4.3.0"
},
"devDependencies": {
"@types/papaparse": "^5.5.2",
"@vitejs/plugin-vue": "^5.0.4",
"autoprefixer": "^10.4.19",
"postcss": "^8.4.38",

View File

@@ -39,42 +39,45 @@ interface MenuItem {
path?: string
}
const mainMenu = computed<MenuItem[]>(() => [
{ name: 'Dashboard', icon: 'fa-gauge', path: '/dashboard' },
{ name: 'Agents', icon: 'fa-robot', badge: 3, path: '/agents' },
{ name: 'Script', icon: 'fa-code', path: '/script' },
// 第1组: Chat, Agents
const group1 = computed(() => [
{ name: 'Chat', icon: 'fa-robot', path: '/agents' },
{ name: 'Agents', icon: 'fa-users', badge: 3, path: '/agents' },
])
// 第2组: Database, Knowledge
const group2 = computed(() => [
{ name: 'Database', icon: 'fa-database', path: '/database', badge: databaseCount.value },
{ name: 'Knowledge', icon: 'fa-brain', path: '/knowledge', badge: knowledgeCount.value },
])
const middleMenu: MenuItem[] = [
// 第3组: Skills, Tools, Script
const group3 = computed(() => [
{ name: 'Skills', icon: 'fa-wand-magic-sparkles', badge: 21, path: '/mcp' },
{ name: 'Tools', icon: 'fa-tools', badge: 13, path: '/model-apis' },
]
{ name: 'Script', icon: 'fa-code', path: '/script' },
])
const bottomMenu: MenuItem[] = [
{ name: 'Settings', icon: 'fa-gear', path: '/settings' },
]
const bottomMenu2: MenuItem[] = [
// 第4组: Dashboard, Account, Settings
const group4 = computed(() => [
{ name: 'Dashboard', icon: 'fa-gauge', path: '/dashboard' },
{ name: 'Account', icon: 'fa-user', path: '/account' },
]
{ name: 'Settings', icon: 'fa-gear', path: '/settings' },
])
const activeMenu = computed(() => {
const currentPath = route.path
// Check main menu
const menuItem = mainMenu.value.find(item => item.path === currentPath)
if (menuItem) return menuItem.name
// Check middle menu (Skills, Tools)
const middleItem = middleMenu.find(item => item.path === currentPath)
if (middleItem) return middleItem.name
// Check bottom menu (Settings)
const bottomItem = bottomMenu.find(item => item.path === currentPath)
if (bottomItem) return bottomItem.name
// Check bottomMenu2 (Account)
const bottomItem2 = bottomMenu2.find(item => item.path === currentPath)
if (bottomItem2) return bottomItem2.name
return 'Dashboard'
// Special case for /agents - prioritize Chat over Agents
if (currentPath === '/agents') {
return 'Chat'
}
// Check all groups
const allGroups = [...group1.value, ...group2.value, ...group3.value, ...group4.value]
const item = allGroups.find(item => item.path === currentPath)
if (item) return item.name
return 'Chat'
})
const navigateTo = (item: MenuItem) => {
@@ -129,8 +132,8 @@ const handleUserCommand = (command: string) => {
<!-- 导航菜单 -->
<nav class="flex-1 px-3 py-2">
<ul class="space-y-1">
<!-- Dashboard, Agents -->
<li v-for="item in mainMenu.slice(0, 2)" :key="item.name">
<!-- 第1组: Chat, Agents -->
<li v-for="item in group1" :key="item.name">
<a
href="#"
class="flex items-center justify-between px-3 py-2.5 rounded-lg transition-colors text-sm"
@@ -148,8 +151,8 @@ const handleUserCommand = (command: string) => {
<!-- 分隔线1 -->
<li class="my-4 border-t border-dark-500"></li>
<!-- Database, Knowledge -->
<li v-for="item in mainMenu.slice(2)" :key="item.name">
<!-- 第2组: Database, Knowledge -->
<li v-for="item in group2" :key="item.name">
<a
href="#"
class="flex items-center justify-between px-3 py-2.5 rounded-lg transition-colors text-sm"
@@ -167,8 +170,8 @@ const handleUserCommand = (command: string) => {
<!-- 分隔线2 -->
<li class="my-4 border-t border-dark-500"></li>
<!-- Skills & Tools -->
<li v-for="item in middleMenu" :key="item.name">
<!-- 第3组: Skills, Tools, Script -->
<li v-for="item in group3" :key="item.name">
<a
href="#"
class="flex items-center justify-between px-3 py-2.5 rounded-lg transition-colors text-sm"
@@ -183,8 +186,11 @@ const handleUserCommand = (command: string) => {
</a>
</li>
<!-- Settings -->
<li v-for="item in bottomMenu" :key="item.name">
<!-- 分隔线3 -->
<li class="my-4 border-t border-dark-500"></li>
<!-- 第4组: Dashboard, Account, Settings -->
<li v-for="item in group4" :key="item.name">
<a
href="#"
class="flex items-center justify-between px-3 py-2.5 rounded-lg transition-colors text-sm"
@@ -202,24 +208,6 @@ const handleUserCommand = (command: string) => {
</div>
</a>
</li>
<!-- 分隔线 -->
<li class="my-4 border-t border-dark-500"></li>
<!-- Account -->
<li v-for="item in bottomMenu2" :key="item.name">
<a
href="#"
class="flex items-center justify-between px-3 py-2.5 rounded-lg transition-colors text-sm"
:class="activeMenu === item.name ? 'bg-dark-600 text-white' : 'text-gray-400 hover:bg-dark-600 hover:text-white'"
@click="navigateTo(item)"
>
<div class="flex items-center gap-3">
<i :class="['fa-solid', item.icon, 'w-5', 'text-center']"></i>
<span>{{ item.name }}</span>
</div>
</a>
</li>
</ul>
</nav>

View File

@@ -1,645 +1,429 @@
<script setup lang="ts">
import { ref } from 'vue'
import { ref, nextTick } from 'vue'
interface ChatMessage {
id: number
role: 'user' | 'assistant'
content: string
timestamp: Date
isStreaming?: boolean
}
interface Agent {
id: number
name: string
framework: string
status: 'running' | 'stopped' | 'error'
mcpServers: number
model: string
createdAt: string
avatar: string
description: string
accentColor: string
gradient: string
}
// Agents 数据
// AI 助手配置
const agents = ref<Agent[]>([
{ id: 1, name: 'template-google-adk-api', framework: 'Google ADK', status: 'running', mcpServers: 2, model: 'gemini-2.0-flash', createdAt: '2025-04-10', description: 'Google ADK template for agent deployment' },
{ id: 2, name: 'mcp-google-adk-api', framework: 'Google ADK', status: 'error', mcpServers: 1, model: 'gemini-2.0-flash', createdAt: '2025-04-08', description: 'MCP-enabled Google ADK agent' },
{ id: 3, name: 'template-openai-api', framework: 'OpenAI', status: 'stopped', mcpServers: 3, model: 'gpt-4o', createdAt: '2025-04-05', description: 'OpenAI API template agent' },
{ id: 4, name: 'pydantic-ai-agent', framework: 'PydanticAI', status: 'running', mcpServers: 2, model: 'gpt-4o-mini', createdAt: '2025-04-12', description: 'PydanticAI framework agent' },
{ id: 5, name: 'langchain-agent', framework: 'LangChain', status: 'running', mcpServers: 4, model: 'claude-3-5-sonnet', createdAt: '2025-04-11', description: 'LangChain based agent with tools' },
{ id: 1, name: 'Claude', avatar: '🧠', description: 'Anthropic AI', accentColor: '#f97316', gradient: 'from-orange-500/20 to-amber-500/20' },
{ id: 2, name: 'Gemini', avatar: '✨', description: 'Google DeepMind', accentColor: '#8b5cf6', gradient: 'from-violet-500/20 to-purple-500/20' },
{ id: 3, name: 'ChatGPT', avatar: '💬', description: 'OpenAI', accentColor: '#10b981', gradient: 'from-emerald-500/20 to-green-500/20' },
{ id: 4, name: 'DeepSeek', avatar: '🔮', description: 'DeepSeek AI', accentColor: '#3b82f6', gradient: 'from-blue-500/20 to-cyan-500/20' },
{ id: 5, name: 'Kimi', avatar: '🌙', description: 'Moonshot AI', accentColor: '#ec4899', gradient: 'from-pink-500/20 to-rose-500/20' },
{ id: 6, name: '文心一言', avatar: '🐉', description: 'Baidu', accentColor: '#ef4444', gradient: 'from-red-500/20 to-orange-500/20' },
{ id: 7, name: '通义千问', avatar: '☁️', description: 'Alibaba', accentColor: '#06b6d4', gradient: 'from-cyan-500/20 to-sky-500/20' },
])
// 编辑状态
const editingAgent = ref<Agent | null>(null)
const isEditing = ref(false)
const isCreating = ref(false)
const searchQuery = ref('')
const filterStatus = ref<string>('all')
// 当前选中的助手
const selectedAgent = ref<Agent | null>(agents.value[0])
const sidebarCollapsed = ref(false)
// 新建 Agent 表单
const newAgentForm = ref({
name: '',
framework: 'Google ADK',
model: 'gemini-2.0-flash',
description: '',
mcpServers: [] as string[],
})
// 聊天消息
const messages = ref<ChatMessage[]>([
{ id: 1, role: 'assistant', content: '你好!我是 Claude你的 AI 助手。有什么我可以帮助你的吗?', timestamp: new Date() },
])
const frameworks = [
{ name: 'Google ADK', icon: 'fa-google', color: 'from-blue-500 to-blue-600' },
{ name: 'OpenAI', icon: 'fa-openai', color: 'from-green-500 to-green-600' },
{ name: 'PydanticAI', icon: 'fa-robot', color: 'from-purple-500 to-purple-600' },
{ name: 'LangChain', icon: 'fa-link', color: 'from-orange-500 to-orange-600' },
]
// 输入内容
const inputMessage = ref('')
const isLoading = ref(false)
const messagesContainer = ref<HTMLElement | null>(null)
const models = [
{ name: 'Google ADK', models: ['gemini-2.0-flash', 'gemini-1.5-pro', 'gemini-pro'] },
{ name: 'OpenAI', models: ['gpt-4o', 'gpt-4o-mini', 'gpt-4-turbo'] },
{ name: 'PydanticAI', models: ['gpt-4o', 'gpt-4o-mini', 'claude-3-5-sonnet'] },
{ name: 'LangChain', models: ['claude-3-5-sonnet', 'gpt-4o', 'gpt-4o-mini'] },
]
// 发送消息
const sendMessage = async () => {
if (!inputMessage.value.trim() || isLoading.value) return
const availableMCPServers = [
{ name: 'linear-demo', icon: 'fa-check-circle', status: 'connected' },
{ name: 'google-maps', icon: 'fa-map-marker-alt', status: 'connected' },
{ name: 'explorer-mcp', icon: 'fa-folder', status: 'connected' },
{ name: 'postgres-mcp', icon: 'fa-database', status: 'disconnected' },
{ name: 'github-mcp', icon: 'fa-github', status: 'disconnected' },
]
const userContent = inputMessage.value.trim()
inputMessage.value = ''
// 打开新建弹窗
const openCreate = () => {
newAgentForm.value = {
name: '',
framework: 'Google ADK',
model: 'gemini-2.0-flash',
description: '',
mcpServers: [],
const userMessage: ChatMessage = {
id: Date.now(),
role: 'user',
content: userContent,
timestamp: new Date()
}
isCreating.value = true
}
messages.value.push(userMessage)
// 关闭新建弹窗
const closeCreate = () => {
isCreating.value = false
}
const aiMessage: ChatMessage = {
id: Date.now() + 1,
role: 'assistant',
content: '',
timestamp: new Date(),
isStreaming: true
}
messages.value.push(aiMessage)
// 保存新建
const saveNewAgent = () => {
const newId = Math.max(...agents.value.map(a => a.id)) + 1
agents.value.push({
id: newId,
name: newAgentForm.value.name || 'Untitled Agent',
framework: newAgentForm.value.framework,
status: 'stopped',
mcpServers: newAgentForm.value.mcpServers.length,
model: newAgentForm.value.model,
createdAt: new Date().toISOString().split('T')[0],
description: newAgentForm.value.description,
})
isCreating.value = false
}
nextTick(() => scrollToBottom())
// 切换 MCP 服务器
const toggleMCPServer = (serverName: string) => {
const index = newAgentForm.value.mcpServers.indexOf(serverName)
if (index === -1) {
newAgentForm.value.mcpServers.push(serverName)
isLoading.value = true
const fullResponse = `我理解你发送了消息: "${userContent}"
作为 AI 助手,我可以帮助你:
• 回答各种问题
• 编写代码和调试
• 分析和处理数据
• 翻译和写作
• 头脑风暴和创意建议
请告诉我你需要什么帮助?`
let currentIndex = 0
const words = fullResponse.split('')
const streamInterval = setInterval(() => {
if (currentIndex < words.length) {
aiMessage.content += words[currentIndex]
currentIndex++
nextTick(() => scrollToBottom())
} else {
newAgentForm.value.mcpServers.splice(index, 1)
clearInterval(streamInterval)
aiMessage.isStreaming = false
isLoading.value = false
}
}, 30)
}
// 滚动到底部
const scrollToBottom = () => {
if (messagesContainer.value) {
messagesContainer.value.scrollTop = messagesContainer.value.scrollHeight
}
}
// 编辑表单数据
const editForm = ref({
name: '',
framework: '',
model: '',
description: '',
})
// 打开编辑弹窗
const openEdit = (agent: Agent) => {
editingAgent.value = agent
editForm.value = {
name: agent.name,
framework: agent.framework,
model: agent.model,
description: agent.description,
}
isEditing.value = true
// 复制消息
const copyMessage = (content: string) => {
navigator.clipboard.writeText(content)
}
// 保存编辑
const saveEdit = () => {
if (editingAgent.value) {
const index = agents.value.findIndex(a => a.id === editingAgent.value!.id)
if (index !== -1) {
agents.value[index] = {
...agents.value[index],
...editForm.value,
}
}
}
isEditing.value = false
// 选择助手
const selectAgent = (agent: Agent) => {
selectedAgent.value = agent
messages.value = [
{ id: 1, role: 'assistant', content: `你好!我是 ${agent.name}。有什么我可以帮助你的吗?`, timestamp: new Date() }
]
}
// 取消编辑
const cancelEdit = () => {
isEditing.value = false
editingAgent.value = null
// 新建聊天
const newChat = () => {
messages.value = [
{ id: 1, role: 'assistant', content: `你好!我是 ${selectedAgent.value?.name || 'Claude'}。有什么我可以帮助你的吗?`, timestamp: new Date() }
]
}
// 切换状态
const toggleStatus = (agent: Agent) => {
if (agent.status === 'running') {
agent.status = 'stopped'
} else if (agent.status === 'stopped') {
agent.status = 'running'
// 格式化时间
const formatTime = (date: Date) => {
return date.toLocaleTimeString('zh-CN', { hour: '2-digit', minute: '2-digit' })
}
// 回车发送
const handleKeydown = (e: KeyboardEvent) => {
if (e.key === 'Enter' && !e.shiftKey) {
e.preventDefault()
sendMessage()
}
}
// 删除 Agent
const deleteAgent = (id: number) => {
agents.value = agents.value.filter(a => a.id !== id)
// 调整输入框高度
const autoResize = (e: Event) => {
const target = e.target as HTMLTextAreaElement
target.style.height = 'auto'
target.style.height = Math.min(target.scrollHeight, 160) + 'px'
}
// 过滤后的 Agents
const filteredAgents = () => {
return agents.value.filter(agent => {
const matchSearch = agent.name.toLowerCase().includes(searchQuery.value.toLowerCase()) ||
agent.framework.toLowerCase().includes(searchQuery.value.toLowerCase())
const matchStatus = filterStatus.value === 'all' || agent.status === filterStatus.value
return matchSearch && matchStatus
})
}
// 状态颜色
const statusClass = (status: string) => {
switch (status) {
case 'running': return 'bg-primary-success'
case 'stopped': return 'bg-gray-500'
case 'error': return 'bg-primary-danger'
default: return 'bg-gray-500'
}
// 切换侧边栏
const toggleSidebar = () => {
sidebarCollapsed.value = !sidebarCollapsed.value
setTimeout(() => {
scrollToBottom()
}, 350)
}
</script>
<style scoped>
/* 模态框进入动画 */
@keyframes modal-in {
0% {
::-webkit-scrollbar {
width: 4px;
}
::-webkit-scrollbar-track {
background: transparent;
}
::-webkit-scrollbar-thumb {
background: rgba(255, 255, 255, 0.1);
border-radius: 2px;
}
::-webkit-scrollbar-thumb:hover {
background: rgba(255, 255, 255, 0.2);
}
@keyframes messageSlideIn {
from {
opacity: 0;
transform: scale(0.95) translateY(20px);
transform: translateY(12px);
}
100% {
to {
opacity: 1;
transform: scale(1) translateY(0);
transform: translateY(0);
}
}
@keyframes fade-in {
0% { opacity: 0; transform: translateY(-5px); }
100% { opacity: 1; transform: translateY(0); }
.message-enter {
animation: messageSlideIn 0.35s cubic-bezier(0.16, 1, 0.3, 1) forwards;
}
@keyframes float {
0%, 100% { transform: translateY(0); }
50% { transform: translateY(-8px); }
@keyframes blink {
0%, 100% { opacity: 1; }
50% { opacity: 0; }
}
@keyframes scale-in {
0% { opacity: 0; transform: scale(0.9); }
100% { opacity: 1; transform: scale(1); }
}
.animate-modal-in {
animation: modal-in 0.4s cubic-bezier(0.16, 1, 0.3, 1) forwards;
}
.animate-fade-in {
animation: fade-in 0.3s ease-out forwards;
}
.animate-float {
animation: float 3s ease-in-out infinite;
}
.animate-scale-in {
animation: scale-in 0.5s ease-out forwards;
.cursor-blink {
animation: blink 1s step-end infinite;
}
</style>
<template>
<!-- 主内容区域 -->
<div class="p-6 min-h-screen">
<!-- 顶部导航 -->
<div class="flex justify-between items-center mb-6">
<div class="h-screen flex bg-[#0a0a0f]">
<!-- 主聊天区域 -->
<div class="flex-1 flex flex-col bg-[#0a0a0f]">
<!-- 顶部栏 -->
<div class="h-14 px-6 flex items-center justify-between border-b border-white/5 bg-[#0d0d12]/50 backdrop-blur-sm">
<!-- 左侧当前AI信息 -->
<div class="flex items-center gap-3">
<div v-if="selectedAgent" class="flex items-center gap-3">
<div
class="w-8 h-8 rounded-lg flex items-center justify-center text-lg shadow-lg"
:style="{ backgroundColor: selectedAgent.accentColor + '20', color: selectedAgent.accentColor }"
>
{{ selectedAgent.avatar }}
</div>
<div>
<div class="text-sm font-medium text-white">{{ selectedAgent?.name || 'Chat' }}</div>
<div class="text-[11px] flex items-center gap-1.5">
<span class="w-1.5 h-1.5 rounded-full bg-emerald-500 animate-pulse"></span>
<span class="text-white/40">Online</span>
</div>
</div>
</div>
</div>
<!-- 右上角操作 -->
<div class="flex items-center gap-2">
<i class="fa-solid fa-robot text-gray-400"></i>
<span class="font-medium">Agents</span>
</div>
<button @click="openCreate" class="btn-primary">
<i class="fa-solid fa-plus"></i>
New Agent
<button class="p-2 rounded-lg hover:bg-white/5 text-white/40 hover:text-white transition-colors">
<svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5" d="M13.828 10.172a4 4 0 00-5.656 0l-4 4a4 4 0 105.656 5.656l1.102-1.101m-.758-4.899a4 4 0 005.656 0l4-4a4 4 0 00-5.656-5.656l-1.1 1.1"></path>
</svg>
</button>
</div>
<!-- 搜索和筛选 -->
<div class="flex gap-4 mb-6">
<div class="flex-1 relative">
<i class="fa-solid fa-search absolute left-3 top-1/2 -translate-y-1/2 text-gray-400"></i>
<input
v-model="searchQuery"
type="text"
placeholder="Search agents..."
class="search-input w-full"
>
</div>
<el-select v-model="filterStatus" placeholder="Select" class="w-40" size="large">
<el-option label="All Status" value="all" />
<el-option label="Running" value="running" />
<el-option label="Stopped" value="stopped" />
<el-option label="Error" value="error" />
</el-select>
</div>
<!-- Agents 列表 -->
<div class="bg-dark-700 rounded-xl overflow-hidden">
<table class="w-full">
<thead class="bg-dark-600">
<tr>
<th class="text-left px-5 py-3 text-sm font-medium text-gray-400">Agent Name</th>
<th class="text-left px-5 py-3 text-sm font-medium text-gray-400">Framework</th>
<th class="text-left px-5 py-3 text-sm font-medium text-gray-400">Model</th>
<th class="text-left px-5 py-3 text-sm font-medium text-gray-400">MCP Servers</th>
<th class="text-left px-5 py-3 text-sm font-medium text-gray-400">Status</th>
<th class="text-left px-5 py-3 text-sm font-medium text-gray-400">Created</th>
<th class="text-right px-5 py-3 text-sm font-medium text-gray-400">Actions</th>
</tr>
</thead>
<tbody>
<tr v-for="agent in filteredAgents()" :key="agent.id" class="table-row">
<td class="px-5 py-4">
<div class="font-medium">{{ agent.name }}</div>
<div class="text-sm text-gray-500">{{ agent.description }}</div>
</td>
<td class="px-5 py-4">
<span class="bg-dark-500 px-2 py-1 rounded text-sm">{{ agent.framework }}</span>
</td>
<td class="px-5 py-4 text-gray-300">{{ agent.model }}</td>
<td class="px-5 py-4">
<span class="text-primary-cyan">{{ agent.mcpServers }}</span>
</td>
<td class="px-5 py-4">
<div class="flex items-center gap-2">
<span class="w-2 h-2 rounded-full" :class="statusClass(agent.status)"></span>
<span class="capitalize text-sm">{{ agent.status }}</span>
</div>
</td>
<td class="px-5 py-4 text-gray-400 text-sm">{{ agent.createdAt }}</td>
<td class="px-5 py-4">
<div class="flex items-center justify-end gap-2">
<button class="p-2 rounded-lg hover:bg-white/5 text-white/40 hover:text-white transition-colors">
<svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5" d="M12 5v.01M12 12v.01M12 19v.01M12 6a1 1 0 110-2 1 1 0 010 2zm0 7a1 1 0 110-2 1 1 0 010 2zm0 7a1 1 0 110-2 1 1 0 010 2z"></path>
</svg>
</button>
<!-- 展开侧边栏按钮仅在侧边栏隐藏时显示 -->
<button
@click="toggleStatus(agent)"
class="btn-icon"
:title="agent.status === 'running' ? 'Stop' : 'Start'"
v-if="sidebarCollapsed"
@click="toggleSidebar"
class="p-2 rounded-lg hover:bg-white/5 text-white/40 hover:text-white transition-colors"
title="Show AI assistants"
>
<i :class="['fa-solid', agent.status === 'running' ? 'fa-stop' : 'fa-play', 'text-gray-400']"></i>
<svg class="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5" d="M4 6h16M4 12h16M4 18h16"></path>
</svg>
</button>
</div>
</div>
<!-- 消息区域 -->
<div ref="messagesContainer" class="flex-1 overflow-y-auto px-6 py-6">
<div class="max-w-3xl mx-auto space-y-6">
<div
v-for="message in messages"
:key="message.id"
class="message-enter flex gap-4"
:class="message.role === 'user' ? 'flex-row-reverse' : ''"
>
<!-- 头像 -->
<div
class="w-8 h-8 rounded-lg flex items-center justify-center flex-shrink-0 shadow-lg"
:class="message.role === 'user' ? 'bg-gradient-to-br from-emerald-500 to-teal-600' : ''"
:style="message.role === 'assistant' && selectedAgent ? {
backgroundColor: selectedAgent.accentColor + '20',
color: selectedAgent.accentColor
} : {}"
>
<span v-if="message.role === 'user'" class="text-white text-sm">👤</span>
<span v-else class="text-lg">{{ selectedAgent?.avatar || '🧠' }}</span>
</div>
<!-- 消息内容 -->
<div
class="max-w-[75%] rounded-2xl px-4 py-3"
:class="message.role === 'user' ? 'bg-[#1e1e28] text-white' : 'bg-transparent'"
>
<div class="text-sm leading-relaxed whitespace-pre-wrap text-white/90">{{ message.content }}
<span v-if="message.isStreaming" class="inline-block w-0.5 h-4 ml-0.5 bg-violet-400 cursor-blink align-middle"></span>
</div>
<!-- 消息底部 -->
<div class="flex items-center justify-end mt-2 gap-3">
<span class="text-[10px] text-white/25">{{ formatTime(message.timestamp) }}</span>
<button
@click="openEdit(agent)"
class="btn-icon"
title="Edit"
v-if="message.role === 'assistant' && !message.isStreaming"
@click="copyMessage(message.content)"
class="text-white/25 hover:text-violet-400 transition-colors"
title="Copy"
>
<i class="fa-solid fa-pen text-gray-400"></i>
</button>
<button
@click="deleteAgent(agent.id)"
class="btn-icon"
title="Delete"
>
<i class="fa-solid fa-trash text-gray-400 hover:text-primary-danger"></i>
<svg class="w-3.5 h-3.5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5" d="M8 16H6a2 2 0 01-2-2V6a2 2 0 012-2h8a2 2 0 012 2v2m-6 12h8a2 2 0 002-2v-8a2 2 0 00-2-2h-8a2 2 0 00-2 2v8a2 2 0 002 2z"></path>
</svg>
</button>
</div>
</td>
</tr>
</tbody>
</table>
<!-- 空状态 -->
<div v-if="filteredAgents().length === 0" class="py-12 text-center text-gray-500">
<i class="fa-solid fa-robot text-4xl mb-3"></i>
<p>No agents found</p>
</div>
</div>
</div>
</div>
<!-- 编辑弹窗 -->
<Teleport to="body">
<div v-if="isEditing" class="fixed inset-0 bg-black/60 flex items-center justify-center z-50">
<div class="bg-dark-700 rounded-2xl w-full max-w-lg border border-dark-500 shadow-2xl">
<!-- 弹窗头部 -->
<div class="flex items-center justify-between p-5 border-b border-dark-500">
<h3 class="text-lg font-semibold">Edit Agent</h3>
<button @click="cancelEdit" class="text-gray-400 hover:text-white transition-colors">
<i class="fa-solid fa-xmark text-xl"></i>
<!-- 输入区域 -->
<div class="p-4 border-t border-white/5 bg-[#0d0d12]/50">
<div class="max-w-3xl mx-auto">
<div class="relative bg-[#12121a] rounded-2xl border border-white/8 focus-within:border-violet-500/40 focus-within:shadow-lg focus-within:shadow-violet-500/10 transition-all duration-300">
<!-- 附件按钮 -->
<button class="absolute left-4 top-1/2 -translate-y-1/2 text-white/30 hover:text-white/60 transition-colors p-1">
<svg class="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5" d="M15.172 7l-6.586 6.586a2 2 0 102.828 2.828l6.414-6.586a4 4 0 00-5.656-5.656l-6.415 6.585a6 6 0 108.486 8.486L20.5 13"></path>
</svg>
</button>
</div>
<!-- 弹窗内容 -->
<div class="p-5 space-y-4">
<div>
<label class="block text-sm font-medium text-gray-300 mb-2">Agent Name</label>
<input
v-model="editForm.name"
type="text"
class="input-field"
>
</div>
<div>
<label class="block text-sm font-medium text-gray-300 mb-2">Framework</label>
<el-select v-model="editForm.framework" placeholder="Select" class="w-full" size="large">
<el-option label="Google ADK" value="Google ADK" />
<el-option label="OpenAI" value="OpenAI" />
<el-option label="PydanticAI" value="PydanticAI" />
<el-option label="LangChain" value="LangChain" />
</el-select>
</div>
<div>
<label class="block text-sm font-medium text-gray-300 mb-2">Model</label>
<el-select v-model="editForm.model" placeholder="Select" class="w-full" size="large">
<el-option label="gemini-2.0-flash" value="gemini-2.0-flash" />
<el-option label="gpt-4o" value="gpt-4o" />
<el-option label="gpt-4o-mini" value="gpt-4o-mini" />
<el-option label="claude-3-5-sonnet" value="claude-3-5-sonnet" />
</el-select>
</div>
<div>
<label class="block text-sm font-medium text-gray-300 mb-2">Description</label>
<!-- 输入框 -->
<textarea
v-model="editForm.description"
rows="3"
class="input-field resize-none"
v-model="inputMessage"
@keydown="handleKeydown"
@input="autoResize"
placeholder="Send a message..."
rows="1"
class="w-full bg-transparent text-white placeholder-white/30 py-3.5 pl-12 pr-24 resize-none focus:outline-none text-sm"
></textarea>
</div>
</div>
<!-- 弹窗底部 -->
<div class="flex items-center justify-end gap-3 p-5 border-t border-dark-500">
<!-- 发送按钮 -->
<button
@click="cancelEdit"
class="btn-secondary"
@click="sendMessage"
:disabled="!inputMessage.trim() || isLoading"
class="absolute right-2 top-1/2 -translate-y-1/2 p-2 rounded-xl bg-violet-500 hover:bg-violet-400 disabled:bg-white/8 disabled:text-white/20 text-white transition-all duration-200 hover:shadow-lg hover:shadow-violet-500/25"
>
Cancel
</button>
<button
@click="saveEdit"
class="btn-primary"
>
Save Changes
</button>
</div>
</div>
</div>
</Teleport>
<!-- 新建 Agent 模态框 -->
<Teleport to="body">
<div v-if="isCreating" class="fixed inset-0 bg-black/80 flex items-center justify-center z-50 p-4">
<div class="bg-dark-800 rounded-2xl w-full max-w-6xl h-[85vh] border border-dark-600 shadow-2xl overflow-hidden flex flex-col animate-modal-in">
<!-- 模态框头部 -->
<div class="flex items-center justify-between p-5 border-b border-dark-600 bg-dark-700/50">
<div class="flex items-center gap-3">
<div class="w-10 h-10 rounded-xl bg-gradient-to-br from-primary-orange to-red-500 flex items-center justify-center animate-pulse">
<i class="fa-solid fa-robot text-white"></i>
</div>
<div>
<h3 class="text-xl font-semibold text-white">Create New Agent</h3>
<p class="text-sm text-gray-400">Configure your agent workflow</p>
</div>
</div>
<button @click="closeCreate" class="text-gray-400 hover:text-white transition-all p-2 hover:bg-dark-600 rounded-lg">
<i class="fa-solid fa-xmark text-xl"></i>
<svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 19l9 2-9-18-9 18 9-2zm0 0v-8"></path>
</svg>
</button>
</div>
<!-- 三栏布局主体 -->
<div class="flex-1 flex overflow-hidden">
<!-- 左侧框架选择 -->
<div class="w-72 bg-dark-700/50 border-r border-dark-600 p-5 overflow-y-auto">
<div class="flex items-center gap-2 mb-4">
<i class="fa-solid fa-layer-group text-primary-orange"></i>
<h4 class="font-medium text-white">Framework</h4>
<!-- 提示 -->
<div class="text-center mt-2.5">
<span class="text-[10px] text-white/20">AI can make mistakes. Please verify important information.</span>
</div>
<div class="space-y-3">
<div
v-for="fw in frameworks"
:key="fw.name"
@click="newAgentForm.framework = fw.name; newAgentForm.model = models.find(m => m.name === fw.name)?.models[0] || ''"
class="p-4 rounded-xl border-2 cursor-pointer transition-all duration-300 hover:scale-105"
:class="newAgentForm.framework === fw.name
? 'border-primary-orange bg-dark-600 shadow-lg shadow-primary-orange/20'
: 'border-dark-500 bg-dark-700 hover:border-gray-500'"
>
<div class="flex items-center gap-3">
<div :class="['w-10 h-10 rounded-lg bg-gradient-to-br flex items-center justify-center', fw.color]">
<i :class="['fa-solid', fw.icon, 'text-white text-lg']"></i>
</div>
<span class="font-medium text-white">{{ fw.name }}</span>
</div>
<div v-if="newAgentForm.framework === fw.name" class="mt-2 flex items-center gap-1 text-primary-orange text-sm animate-fade-in">
<i class="fa-solid fa-check-circle"></i>
<span>Selected</span>
</div>
</div>
</div>
<!-- 模型选择 -->
<div class="mt-6">
<div class="flex items-center gap-2 mb-3">
<i class="fa-solid fa-brain text-primary-cyan"></i>
<h4 class="font-medium text-white">Model</h4>
</div>
<el-select v-model="newAgentForm.model" placeholder="Select" class="w-full" size="large">
<el-option
v-for="model in models.find(m => m.name === newAgentForm.framework)?.models"
:key="model"
:label="model"
:value="model"
/>
</el-select>
</div>
<!-- Agent 名称 -->
<div class="mt-6">
<div class="flex items-center gap-2 mb-3">
<i class="fa-solid fa-tag text-primary-purple"></i>
<h4 class="font-medium text-white">Agent Name</h4>
</div>
<input
v-model="newAgentForm.name"
type="text"
placeholder="Enter agent name..."
class="w-full bg-dark-600 border border-dark-500 rounded-lg px-4 py-3 text-white placeholder-gray-500 focus:outline-none focus:border-primary-orange transition-colors"
>
</div>
</div>
<!-- 中间流程画布 -->
<div class="flex-1 bg-dark-900 relative overflow-hidden">
<!-- 背景网格 -->
<div class="absolute inset-0 opacity-10" style="background-image: radial-gradient(circle, #1E6BF9 1px, transparent 1px); background-size: 30px 30px;"></div>
<!-- 流程节点 -->
<div class="h-full flex flex-col items-center justify-center p-8 relative z-10">
<!-- 开始节点 -->
<div class="node bg-gradient-to-r from-blue-500 to-blue-600 rounded-xl w-64 p-4 shadow-lg shadow-blue-500/30 animate-float">
<div class="flex items-center gap-3">
<div class="w-8 h-8 rounded-lg bg-white/20 flex items-center justify-center">
<i class="fa-solid fa-play text-white text-sm"></i>
</div>
<div>
<div class="font-medium text-white">Start</div>
<div class="text-xs text-blue-200">Agent begins</div>
</div>
</div>
</div>
<!-- 连接线 -->
<div class="h-8 w-0.5 bg-gradient-to-b from-blue-500 to-primary-orange animate-pulse"></div>
<!-- 框架节点 -->
<div class="node bg-dark-700 border-2 border-primary-orange rounded-xl w-64 p-4 shadow-lg shadow-primary-orange/20 animate-scale-in">
<div class="flex items-center gap-3">
<div class="w-10 h-10 rounded-lg bg-gradient-to-br from-primary-orange to-red-500 flex items-center justify-center">
<i :class="['fa-solid', frameworks.find(f => f.name === newAgentForm.framework)?.icon || 'fa-robot', 'text-white']"></i>
</div>
<div>
<div class="font-medium text-white">{{ newAgentForm.framework }}</div>
<div class="text-xs text-gray-400">{{ newAgentForm.model }}</div>
</div>
</div>
</div>
<!-- 连接线 -->
<div class="h-8 w-0.5 bg-gradient-to-b from-primary-orange to-purple-500 animate-pulse"></div>
<!-- MCP 服务器节点 -->
<div class="node bg-dark-700 border-2 border-purple-500 rounded-xl w-64 p-4 shadow-lg shadow-purple-500/20 animate-scale-in" style="animation-delay: 0.2s">
<div class="flex items-center justify-between mb-2">
<div class="flex items-center gap-2">
<i class="fa-solid fa-server text-purple-400"></i>
<span class="font-medium text-white">MCP Servers</span>
</div>
<span class="bg-purple-500/30 text-purple-300 text-xs px-2 py-0.5 rounded">{{ newAgentForm.mcpServers.length }} connected</span>
</div>
<div class="flex flex-wrap gap-2">
<span v-for="mcp in newAgentForm.mcpServers" :key="mcp" class="bg-dark-600 text-gray-300 text-xs px-2 py-1 rounded flex items-center gap-1">
<i class="fa-solid fa-check-circle text-green-400"></i>
{{ mcp }}
</span>
<span v-if="newAgentForm.mcpServers.length === 0" class="text-gray-500 text-xs">No servers selected</span>
</div>
</div>
<!-- 连接线 -->
<div class="h-8 w-0.5 bg-gradient-to-b from-purple-500 to-green-500 animate-pulse"></div>
<!-- 结束节点 -->
<div class="node bg-gradient-to-r from-green-500 to-emerald-600 rounded-xl w-64 p-4 shadow-lg shadow-green-500/30 animate-float" style="animation-delay: 0.5s">
<div class="flex items-center gap-3">
<div class="w-8 h-8 rounded-lg bg-white/20 flex items-center justify-center">
<i class="fa-solid fa-check text-white text-sm"></i>
</div>
<div>
<div class="font-medium text-white">Ready</div>
<div class="text-xs text-green-200">Agent configured</div>
</div>
</div>
</div>
</div>
<!-- 装饰性光效 -->
<div class="absolute top-1/4 -left-20 w-40 h-40 bg-primary-orange/10 rounded-full blur-3xl animate-pulse"></div>
<div class="absolute bottom-1/4 -right-20 w-40 h-40 bg-purple-500/10 rounded-full blur-3xl animate-pulse" style="animation-delay: 0.5s"></div>
</div>
<!-- 右侧MCP 服务器选择 -->
<div class="w-80 bg-dark-700/50 border-l border-dark-600 p-5 overflow-y-auto">
<div class="flex items-center gap-2 mb-4">
<i class="fa-solid fa-plug text-primary-success"></i>
<h4 class="font-medium text-white">MCP Servers</h4>
<span class="text-xs text-gray-500">({{ newAgentForm.mcpServers.length }} selected)</span>
</div>
<div class="space-y-3">
<div
v-for="server in availableMCPServers"
:key="server.name"
@click="toggleMCPServer(server.name)"
class="p-4 rounded-xl border-2 cursor-pointer transition-all duration-300 hover:scale-105"
:class="newAgentForm.mcpServers.includes(server.name)
? 'border-green-500 bg-dark-600 shadow-lg shadow-green-500/20'
: 'border-dark-500 bg-dark-700 hover:border-gray-500'"
</div>
</div>
</div>
<!-- 右侧边栏 - 可折叠 -->
<transition
enter-active-class="transition-all duration-300 ease-out"
enter-from-class="opacity-0 translate-x-4"
enter-to-class="opacity-100 translate-x-0"
leave-active-class="transition-all duration-250 ease-in"
leave-from-class="opacity-100 translate-x-0"
leave-to-class="opacity-0 translate-x-4"
>
<div v-show="!sidebarCollapsed" class="w-72 bg-[#0d0d12] border-l border-white/5 flex flex-col">
<!-- Logo -->
<div class="p-4 border-b border-white/5">
<div class="flex items-center justify-between">
<div class="flex items-center gap-3">
<div class="w-10 h-10 rounded-lg bg-dark-500 flex items-center justify-center">
<i :class="['fa-solid', server.icon, server.status === 'connected' ? 'text-green-400' : 'text-gray-500']"></i>
<div class="w-9 h-9 rounded-xl bg-gradient-to-br from-violet-500 to-indigo-600 flex items-center justify-center shadow-lg shadow-violet-500/25">
<span class="text-white text-lg">🤖</span>
</div>
<div>
<div class="font-medium text-white">{{ server.name }}</div>
<div class="text-xs flex items-center gap-1" :class="server.status === 'connected' ? 'text-green-400' : 'text-gray-500'">
<span class="w-1.5 h-1.5 rounded-full" :class="server.status === 'connected' ? 'bg-green-400' : 'bg-gray-500'"></span>
{{ server.status }}
<span class="text-lg font-semibold text-white tracking-tight">AI Hub</span>
</div>
<button
@click="toggleSidebar"
class="p-1.5 rounded-lg hover:bg-white/5 text-white/30 hover:text-white/60 transition-colors"
title="Hide sidebar"
>
<svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5" d="M9 5l7 7-7 7"></path>
</svg>
</button>
</div>
</div>
<!-- 新建聊天按钮 -->
<div class="p-4">
<button
@click="newChat"
class="w-full py-2.5 px-4 bg-[#1a1a24] hover:bg-[#22222e] border border-white/8 hover:border-violet-500/30 rounded-xl text-white/90 text-sm flex items-center justify-center gap-2 transition-all duration-200 hover:shadow-lg hover:shadow-violet-500/10 group"
>
<svg class="w-4 h-4 text-violet-400 group-hover:rotate-90 transition-transform duration-300" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 4v16m8-8H4"></path>
</svg>
<span class="font-medium">New Chat</span>
</button>
</div>
<!-- AI 助手列表 -->
<div class="flex-1 overflow-y-auto px-3 py-2">
<div class="text-[11px] font-medium text-white/30 uppercase tracking-wider px-3 mb-3">AI Assistants</div>
<div class="space-y-1">
<div
class="w-6 h-6 rounded-md flex items-center justify-center transition-all"
:class="newAgentForm.mcpServers.includes(server.name) ? 'bg-green-500' : 'bg-dark-500'"
v-for="agent in agents"
:key="agent.id"
@click="selectAgent(agent)"
class="group px-3 py-2.5 rounded-xl cursor-pointer transition-all duration-200"
:class="selectedAgent?.id === agent.id
? 'bg-gradient-to-r ' + agent.gradient + ' border-l-2'
: 'hover:bg-white/[0.03] border-l-2 border-transparent'"
:style="selectedAgent?.id === agent.id ? `border-left-color: ${agent.accentColor}` : ''"
>
<i v-if="newAgentForm.mcpServers.includes(server.name)" class="fa-solid fa-check text-white text-xs"></i>
</div>
</div>
</div>
</div>
<!-- 描述 -->
<div class="mt-6">
<div class="flex items-center gap-2 mb-3">
<i class="fa-solid fa-align-left text-gray-400"></i>
<h4 class="font-medium text-white">Description</h4>
</div>
<textarea
v-model="newAgentForm.description"
rows="4"
placeholder="Describe your agent's purpose..."
class="w-full bg-dark-600 border border-dark-500 rounded-lg px-4 py-3 text-white placeholder-gray-500 focus:outline-none focus:border-primary-orange transition-colors resize-none"
></textarea>
</div>
</div>
</div>
<!-- 底部操作栏 -->
<div class="flex items-center justify-between p-5 border-t border-dark-600 bg-dark-700/50">
<div class="flex items-center gap-2 text-sm text-gray-400">
<i class="fa-solid fa-circle-info"></i>
<span>Configure your agent settings</span>
</div>
<div class="flex items-center gap-3">
<button
@click="closeCreate"
class="btn-secondary px-6 py-2.5"
<div
class="w-8 h-8 rounded-lg flex items-center justify-center text-lg transition-transform duration-200 group-hover:scale-110"
:class="selectedAgent?.id === agent.id ? 'shadow-lg' : ''"
:style="{ backgroundColor: agent.accentColor + '20', color: agent.accentColor }"
>
Cancel
</button>
<button
@click="saveNewAgent"
class="btn-primary px-6 py-2.5"
>
<i class="fa-solid fa-plus"></i>
Create Agent
{{ agent.avatar }}
</div>
<div class="flex-1 min-w-0">
<div class="text-sm font-medium text-white/90 truncate">{{ agent.name }}</div>
<div class="text-[11px] text-white/40 truncate">{{ agent.description }}</div>
</div>
</div>
</div>
</div>
</div>
<!-- 底部设置 -->
<div class="p-4 border-t border-white/5">
<button class="w-full py-2.5 rounded-xl bg-white/[0.02] hover:bg-white/[0.05] text-white/50 hover:text-white/80 text-sm flex items-center justify-center gap-2 transition-all duration-200">
<svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5" d="M10.325 4.317c.426-1.756 2.924-1.756 3.35 0a1.724 1.724 0 002.573 1.066c1.543-.94 3.31.826 2.37 2.37a1.724 1.724 0 001.065 2.572c1.756.426 1.756 2.924 0 3.35a1.724 1.724 0 00-1.066 2.573c.94 1.543-.826 3.31-2.37 2.37a1.724 1.724 0 00-2.572 1.065c-.426 1.756-2.924 1.756-3.35 0a1.724 1.724 0 00-2.573-1.066c-1.543.94-3.31-.826-2.37-2.37a1.724 1.724 0 00-1.065-2.572c-1.756-.426-1.756-2.924 0-3.35a1.724 1.724 0 001.066-2.573c-.94-1.543.826-3.31 2.37-2.37.996.608 2.296.07 2.572-1.065z"></path>
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5" d="M15 12a3 3 0 11-6 0 3 3 0 016 0z"></path>
</svg>
<span>Settings</span>
</button>
</div>
</div>
</div>
</div>
</Teleport>
</transition>
</div>
</template>

View File

@@ -1,6 +1,6 @@
<script setup lang="ts">
import { ref, computed, onMounted } from 'vue'
import { ElMessage } from 'element-plus'
import { ElMessage, ElMessageBox } from 'element-plus'
import { useModelSettings } from './settings/useModelSettings'
import { fetchKnowledgeBases, createKnowledgeBase as apiCreateKnowledgeBase, deleteKnowledgeBase as apiDeleteKnowledgeBase, fetchKnowledgeDocuments } from './knowledge/useKnowledge'
import VueOfficeDocx from '@vue-office/docx'
@@ -42,6 +42,11 @@ const embeddingModels = computed(() => {
return models.value.filter((m: any) => m.model_type === 'embedding')
})
// 筛选 VLM 模型
const vlmModels = computed(() => {
return models.value.filter((m: any) => m.model_type === 'vlm')
})
// 步骤验证
const step1Valid = computed(() => !!newKbForm.value.name.trim())
const step2Valid = computed(() => !!modelConfig.value.llmModelId && !!modelConfig.value.embeddingModelId)
@@ -188,6 +193,7 @@ const newKbForm = ref({
const modelConfig = ref({
llmModelId: '',
embeddingModelId: '',
vlmModelId: '',
})
const parsingConfig = ref({
@@ -212,7 +218,11 @@ const storageConfig = ref({
const openCreateDialog = () => {
createStep.value = 1
newKbForm.value = { name: '', description: '' }
modelConfig.value = { llmModelId: '', embeddingModelId: '' }
modelConfig.value = {
llmModelId: '',
embeddingModelId: '',
vlmModelId: '',
}
parsingConfig.value = {
enablePdf: true,
engine: 'markitdown',
@@ -222,13 +232,23 @@ const openCreateDialog = () => {
highRes: false,
fileSizeLimit: '5242880',
}
storageConfig.value = { type: 'local' }
storageConfig.value = {
type: 'local',
endpoint: '',
accessKeyId: '',
secretAccessKey: '',
bucket: '',
}
showCreateDialog.value = true
}
const cancelCreate = () => {
newKbForm.value = { name: '', description: '' }
modelConfig.value = { llmModelId: '', embeddingModelId: '' }
modelConfig.value = {
llmModelId: '',
embeddingModelId: '',
vlmModelId: '',
}
parsingConfig.value = {
enablePdf: true,
engine: 'markitdown',
@@ -238,7 +258,13 @@ const cancelCreate = () => {
highRes: false,
fileSizeLimit: '5242880',
}
storageConfig.value = { type: 'local' }
storageConfig.value = {
type: 'local',
endpoint: '',
accessKeyId: '',
secretAccessKey: '',
bucket: '',
}
showCreateDialog.value = false
}
@@ -254,6 +280,10 @@ const createKnowledgeBase = async () => {
enable_pdf: parsingConfig.value.enablePdf,
pandoc: parsingConfig.value.pandoc,
},
vlm_config: modelConfig.value.vlmModelId ? {
enabled: true,
model_id: modelConfig.value.vlmModelId,
} : undefined,
storage_config: {
type: storageConfig.value.type,
endpoint: storageConfig.value.type === 'minio' ? storageConfig.value.endpoint : undefined,
@@ -266,7 +296,11 @@ const createKnowledgeBase = async () => {
if (result.success) {
await fetchKbList()
newKbForm.value = { name: '', description: '' }
modelConfig.value = { llmModelId: '', embeddingModelId: '' }
modelConfig.value = {
llmModelId: '',
embeddingModelId: '',
vlmModelId: '',
}
parsingConfig.value = {
enablePdf: true,
engine: 'markitdown',
@@ -316,6 +350,17 @@ const cancelEdit = () => {
// 删除知识库
const deleteKb = async (id: string) => {
try {
await ElMessageBox.confirm(
'Are you sure you want to delete this knowledge base? This action cannot be undone.',
'Delete Knowledge Base',
{
confirmButtonText: 'Delete',
cancelButtonText: 'Cancel',
type: 'warning',
}
)
const result = await apiDeleteKnowledgeBase(id)
if (result.success) {
await fetchKbList()
@@ -323,6 +368,9 @@ const deleteKb = async (id: string) => {
} else {
ElMessage.error(result.message || 'Failed to delete knowledge base')
}
} catch {
// User cancelled
}
}
// 辅助函数:格式化文件大小
@@ -890,6 +938,23 @@ const deleteDocument = async (docId: string) => {
</el-option>
</el-select>
</el-form-item>
<!-- VLM Configuration -->
<el-form-item label="VLM Model (Optional)">
<el-select v-model="modelConfig.vlmModelId" placeholder="Select a VLM model" class="w-full" popper-class="dark-select-dropdown" clearable>
<el-option
v-for="model in vlmModels"
:key="model.id"
:label="model.name"
:value="model.id"
>
<div class="model-option">
<span class="model-name">{{ model.name }}</span>
<span class="model-info">{{ model.provider }} - {{ model.model }}</span>
</div>
</el-option>
</el-select>
</el-form-item>
</el-form>
</div>

View File

@@ -77,6 +77,10 @@ export const createKnowledgeBase = async (params: {
enable_pdf?: boolean
pandoc?: boolean
}
vlm_config?: {
enabled: boolean
model_id: string
}
storage_config?: {
type: string
endpoint?: string
@@ -195,7 +199,7 @@ export const reparseDocument = async (kbId: string, docId: string): Promise<{ su
}
// 获取文档预览内容
export const getDocumentPreview = async (kbId: string, docId: string, page: number = 1): Promise<{ success: boolean; data?: { total_pages: number; current_page: number; content: string }; message?: string }> => {
export const getDocumentPreview = async (kbId: string, docId: string, page: number = 1): Promise<{ success: boolean; data?: { total_pages: number; current_page: number; content: string; content_type?: string }; message?: string }> => {
try {
const response = await fetch(`${API_BASE}/api/knowledge/${kbId}/documents/${docId}/preview?page=${page}`)
const data = await response.json()