diff --git a/team-require/ai/ai-core-api.md b/team-require/ai/ai-core-api.md index b399ce7..bec446c 100644 --- a/team-require/ai/ai-core-api.md +++ b/team-require/ai/ai-core-api.md @@ -6,6 +6,43 @@ localhost:50051 ``` +## VLM 配置(可选) + +VLM 用于提升图片文件的解析效果。如果不配置 VLM,则使用默认的 MarkItDown 解析。 + +### 方式一:环境变量 + +```bash +# 设置环境变量 +export VLM_API_KEY="your-api-key" +export VLM_PROVIDER="openai" # openai / anthropic / qwen +export VLM_MODEL="gpt-4o" +``` + +### 方式二:配置文件 + +在 `ai-core/config.yaml` 中配置: + +```yaml +vlm: + enabled: true + provider: "openai" # openai / anthropic / qwen + model: "gpt-4o" # 模型名称 + api_key: "sk-xxx" # API Key + base_url: "" # 自定义 API 地址(可选) + prompt: "" # 自定义提示词(可选) +``` + +### 支持的 VLM 提供商 + +| 提供商 | 示例模型 | +|--------|----------| +| openai | gpt-4o, gpt-4o-mini | +| anthropic | claude-3-opus, claude-3-sonnet | +| qwen | qwen-vl-max, qwen2-vl-72b | + +--- + ## gRPC API 定义 ### 1. ParseDocument - 解析文档 @@ -16,128 +53,85 @@ message ParseRequest { string file_url = 1; // 文件 URL(必填) string file_name = 2; // 文件名,带扩展名(必填) string file_type = 3; // 文件类型(可选,自动检测) - map engine_overrides = 4; // 引擎配置 + string parser_engine = 4; // 解析引擎(可选) + map engine_overrides = 5; // 引擎配置 + + // VLM 配置(可选,优先级高于全局配置) + VLMConfig vlm_config = 6; +} + +message VLMConfig { + bool enabled = 1; + string provider = 2; + string model = 3; + string api_key = 4; + string base_url = 5; + string prompt = 6; } ``` **响应 (ParseResponse)** ```protobuf message ParseResponse { - bool success = 1; // 是否成功 + bool success = 1; string content = 2; // Markdown 内容 - string message = 3; // 状态消息 - int32 content_length = 4; // 内容长度 - string file_type = 5; // 文件类型 - string parser_engine = 6; // 解析引擎 (markitdown) + string message = 3; + int32 content_length = 4; + string file_type = 5; + string parser_engine = 6; } ``` -### 2. GetSupportedFormats - 获取支持的格式 - -**请求**: 空消息 - -**响应** -- `file_types`: string[] - 支持的扩展名列表 -- `file_type_descriptions`: map - 格式描述 - --- ## Golang 对接示例 -### 1. 安装依赖 - -```bash -go get google.golang.org/grpc -go get google.golang.org/grpc/credentials/insecure -``` - -### 2. 生成 Go Proto 代码 - -需要先将 `proto/document_parser.proto` 生成 Go 代码: - -```bash -# 方法一:使用 grpc_tools -go install google.golang.org/protobuf/cmd/protoc-gen-go@latest -go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest -protoc --go_out=. --go_opt=paths=source_relative \ - --go-grpc_out=. --go-grpc_opt=paths=source_relative \ - proto/document_parser.proto -``` - -### 3. 完整调用代码 +### 基础调用(无 VLM 配置时使用 MarkItDown) ```go -package main - -import ( - "context" - "fmt" - "log" - - "google.golang.org/grpc" - "google.golang.org/grpc/credentials/insecure" - - pb "your-project/proto" // 替换为你的 proto 包路径 -) - -func main() { - // 连接 gRPC 服务 - conn, err := grpc.Dial( - "localhost:50051", - grpc.WithTransportCredentials(insecure.NewCredentials()), - grpc.WithBlock(), - ) - if err != nil { - log.Fatalf("连接失败: %v", err) - } - defer conn.Close() - - client := pb.NewDocumentParserClient(conn) - ctx := context.Background() - - // 调用 ParseDocument - req := &pb.ParseRequest{ - FileUrl: "https://example.com/document.pdf", - FileName: "document.pdf", - } - - resp, err := client.ParseDocument(ctx, req) - if err != nil { - log.Fatalf("解析失败: %v", err) - } - - // 处理响应 - if resp.Success { - fmt.Printf("解析成功!\n") - fmt.Printf("内容长度: %d 字符\n", resp.ContentLength) - fmt.Printf("Markdown 内容:\n%s\n", resp.Content) - } else { - fmt.Printf("解析失败: %s\n", resp.Message) - } +req := &pb.ParseRequest{ + FileUrl: "https://example.com/document.pdf", + FileName: "document.pdf", } + +resp, client.ParseDocument(ctx, req) ``` -### 4. 获取支持的格式 +### 带 VLM 配置调用 ```go -// 获取支持的文件格式 -formatsReq := &pb.Empty{} -formatsResp, err := client.GetSupportedFormats(ctx, formatsReq) -if err != nil { - log.Fatal(err) +req := &pb.ParseRequest{ + FileUrl: "https://example.com/image.png", + FileName: "image.png", + VlmConfig: &pb.VLMConfig{ + Enabled: Provider: "open true, + ai", + Model: "gpt-4o", + ApiKey: "sk-xxx", + }, } -fmt.Println("支持的格式:") -for _, ft := range formatsResp.FileTypes { - desc := formatsResp.FileTypeDescriptions[ft] - fmt.Printf(" - %s: %s\n", ft, desc) -} +resp, err := client.ParseDocument(ctx, req) ``` --- +## 解析逻辑 + +1. **图片文件** (jpg, png, webp 等) + - 如果配置了 VLM → 使用 VLM 解析 + - 如果没有配置 VLM → 使用 MarkItDown 解析 + +2. **PDF/DOCX/PPTX 等文档** + - 使用 MarkItDown 解析 + +3. **VLM 优先级** + - gRPC 请求中的 vlm_config > 全局配置(config.yaml/环境变量) + +--- + ## 注意事项 -1. **文件 URL**: 必须是可直接访问的 URL,服务会下载文件到内存解析 -2. **文件名**: 必须带扩展名(如 `.pdf`, `.docx`),用于自动识别文件类型 -3. **返回内容**: 直接返回 Markdown 格式文本,可用于向量检索或 LLM 处理 +1. **文件 URL**: 必须是可直接访问的 URL +2. **文件名**: 必须带扩展名(如 `.pdf`, `.png`) +3. **返回内容**: Markdown 格式文本 diff --git a/team-require/ai/todo.md b/team-require/ai/todo.md index 4496387..08f7396 100644 --- a/team-require/ai/todo.md +++ b/team-require/ai/todo.md @@ -6,10 +6,15 @@ - [ ] **AI-Core 文档解析服务对接** - 服务:ai-core (gRPC, 端口 50051) - - 功能:将文档(PDF/DOCX/PPTX 等)转换为 Markdown + - 功能:将文档(PDF/DOCX/PPTX/图片等)转换为 Markdown - 对接方式:gRPC 调用 - 详细需求:[ai-core-api.md](./ai-core-api.md) +- [ ] **VLM 调用支持** + - 支持 OpenAI GPT-4o、Anthropic Claude、阿里 Qwen VL + - 通过 vlm_config 配置启用 + - 适用场景:图片文件(jpg, png 等)自动使用 VLM 解析 + --- > 需求完成后请完成者打 ✔ diff --git a/web/package-lock.json b/web/package-lock.json index bd7d538..2ca1723 100644 --- a/web/package-lock.json +++ b/web/package-lock.json @@ -8,10 +8,15 @@ "name": "x-agent-dashboard", "version": "0.0.1", "dependencies": { + "@vue-office/docx": "^1.6.3", + "@vue-office/excel": "^1.7.14", "echarts": "^6.0.0", "element-plus": "^2.13.3", + "marked": "^17.0.4", + "papaparse": "^5.5.3", "pinia": "^2.1.7", "vue": "^3.4.21", + "vue-demi": "^0.14.10", "vue-router": "^4.3.0" }, "devDependencies": { @@ -1026,6 +1031,40 @@ "vscode-uri": "^3.0.8" } }, + "node_modules/@vue-office/docx": { + "version": "1.6.3", + "resolved": "https://registry.npmmirror.com/@vue-office/docx/-/docx-1.6.3.tgz", + "integrity": "sha512-Cs+3CAaRBOWOiW4XAhTwwxJ0dy8cPIf6DqfNvYcD3YACiLwO4kuawLF2IAXxyijhbuOeoFsfvoVbOc16A/4bZA==", + "hasInstallScript": true, + "license": "MIT", + "peerDependencies": { + "@vue/composition-api": "^1.7.1", + "vue": "^2.0.0 || >=3.0.0", + "vue-demi": "^0.14.6" + }, + "peerDependenciesMeta": { + "@vue/composition-api": { + "optional": true + } + } + }, + "node_modules/@vue-office/excel": { + "version": "1.7.14", + "resolved": "https://registry.npmmirror.com/@vue-office/excel/-/excel-1.7.14.tgz", + "integrity": "sha512-pVUgt+emDQUnW7q22CfnQ+jl43mM/7IFwYzOg7lwOwPEbiVB4K4qEQf+y/bc4xGXz75w1/e3Kz3G6wAafmFBFg==", + "hasInstallScript": true, + "license": "MIT", + "peerDependencies": { + "@vue/composition-api": "^1.7.1", + "vue": "^2.0.0 || >=3.0.0", + "vue-demi": "^0.14.6" + }, + "peerDependenciesMeta": { + "@vue/composition-api": { + "optional": true + } + } + }, "node_modules/@vue/compiler-core": { "version": "3.5.29", "resolved": "https://registry.npmmirror.com/@vue/compiler-core/-/compiler-core-3.5.29.tgz", @@ -1862,6 +1901,18 @@ "@jridgewell/sourcemap-codec": "^1.5.5" } }, + "node_modules/marked": { + "version": "17.0.4", + "resolved": "https://registry.npmmirror.com/marked/-/marked-17.0.4.tgz", + "integrity": "sha512-NOmVMM+KAokHMvjWmC5N/ZOvgmSWuqJB8FoYI019j4ogb/PeRMKoKIjReZ2w3376kkA8dSJIP8uD993Kxc0iRQ==", + "license": "MIT", + "bin": { + "marked": "bin/marked.js" + }, + "engines": { + "node": ">= 20" + } + }, "node_modules/memoize-one": { "version": "6.0.0", "resolved": "https://registry.npmmirror.com/memoize-one/-/memoize-one-6.0.0.tgz", @@ -1988,6 +2039,12 @@ "node": ">= 6" } }, + "node_modules/papaparse": { + "version": "5.5.3", + "resolved": "https://registry.npmmirror.com/papaparse/-/papaparse-5.5.3.tgz", + "integrity": "sha512-5QvjGxYVjxO59MGU2lHVYpRWBBtKHnlIAcSe1uNFCkkptUh63NFRj0FJQm7nR67puEruUci/ZkjmEFrjCAyP4A==", + "license": "MIT" + }, "node_modules/path-browserify": { "version": "1.0.1", "resolved": "https://registry.npmmirror.com/path-browserify/-/path-browserify-1.0.1.tgz", @@ -2701,6 +2758,7 @@ "integrity": "sha512-nMZBOwuzabUO0nLgIcc6rycZEebF6eeUfaiQx9+WSk8e29IbLvPU9feI6tqW4kTo3hvoYAJkMh8n8D0fuISphg==", "hasInstallScript": true, "license": "MIT", + "peer": true, "bin": { "vue-demi-fix": "bin/vue-demi-fix.js", "vue-demi-switch": "bin/vue-demi-switch.js" diff --git a/web/package.json b/web/package.json index 2694426..5ead366 100644 --- a/web/package.json +++ b/web/package.json @@ -9,10 +9,15 @@ "preview": "vite preview" }, "dependencies": { + "@vue-office/docx": "^1.6.3", + "@vue-office/excel": "^1.7.14", "echarts": "^6.0.0", "element-plus": "^2.13.3", + "marked": "^17.0.4", + "papaparse": "^5.5.3", "pinia": "^2.1.7", "vue": "^3.4.21", + "vue-demi": "^0.14.10", "vue-router": "^4.3.0" }, "devDependencies": { diff --git a/web/src/components/FormDialog.vue b/web/src/components/FormDialog.vue index 4d3e715..5e8dafd 100644 --- a/web/src/components/FormDialog.vue +++ b/web/src/components/FormDialog.vue @@ -5,6 +5,7 @@ defineProps<{ description?: string icon?: string iconClass?: string + class?: string }>() const emit = defineEmits<{ @@ -18,7 +19,7 @@ const close = () => {