feat: 新增 account 和 plan 目录

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-11 16:26:22 +08:00
parent 243a190124
commit 0cab33b16b
694 changed files with 161549 additions and 0 deletions
--- a/account/admin/skills/image-understanding/scripts/image_understanding.py
+++ b/account/admin/skills/image-understanding/scripts/image_understanding.py
@@ -0,0 +1,381 @@
+#!/usr/bin/env python3
+"""
+Image Understanding using Dashscope (Qwen Vision Models)
+
+This script enables AI to understand and analyze images using Dashscope's
+vision API models (qwen-vl-plus, qwen-vl-max).
+
+Usage:
+    python image_understanding.py --image path/to/image.jpg
+    python image_understanding.py --image https://example.com/image.png --prompt "图片里有什么？"
+    python image_understanding.py --image ./screenshot.png --extract-text --describe
+"""
+
+import argparse
+import json
+import os
+import sys
+from typing import Optional, Dict, Any
+from pathlib import Path
+
+
+# Dashscope 配置
+DASHSCOPE_API_BASE = "https://dashscope.aliyuncs.com/compatible-mode/v1"
+DEFAULT_MODEL = "qwen-vl-plus"
+
+
+def validate_image_path(image_path: str) -> str:
+    """
+    Validate and normalize image path or URL.
+    
+    Args:
+        image_path: Path to local image or URL
+        
+    Returns:
+        Validated image path
+        
+    Raises:
+        ValueError: If image path is invalid or file doesn't exist
+    """
+    if image_path.startswith(('http://', 'https://')):
+        if not image_path.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp')):
+            raise ValueError(f"Invalid image URL format: {image_path}")
+        return image_path
+    
+    path = Path(image_path)
+    if not path.exists():
+        raise ValueError(f"Image file not found: {image_path}")
+    if not path.is_file():
+        raise ValueError(f"Path is not a file: {image_path}")
+    
+    valid_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp'}
+    if path.suffix.lower() not in valid_extensions:
+        raise ValueError(f"Invalid image format: {path.suffix}. Supported: {', '.join(valid_extensions)}")
+    
+    return str(path.absolute())
+
+
+def encode_image(image_path: str) -> str:
+    """
+    Encode image to base64 string.
+    
+    Args:
+        image_path: Path to image file
+        
+    Returns:
+        Base64 encoded image string
+    """
+    import base64
+    
+    with open(image_path, "rb") as f:
+        return base64.b64encode(f.read()).decode('utf-8')
+
+
+def call_dashscope_api(api_key: str, image_path: str, prompt: str, model: str) -> Dict[str, Any]:
+    """
+    Call Dashscope Vision API to analyze image.
+    
+    Args:
+        api_key: Dashscope API key
+        image_path: Path to image file or URL
+        prompt: Custom prompt for analysis
+        model: Model name (qwen-vl-plus or qwen-vl-max)
+        
+    Returns:
+        API response as dictionary
+    """
+    import requests
+    
+    # Prepare image content
+    if image_path.startswith(('http://', 'https://')):
+        image_content = {
+            "type": "image_url",
+            "image_url": {"url": image_path}
+        }
+    else:
+        base64_image = encode_image(image_path)
+        # Detect mime type
+        ext = Path(image_path).suffix.lower()
+        mime_type = "image/jpeg"
+        if ext == '.png':
+            mime_type = "image/png"
+        elif ext == '.gif':
+            mime_type = "image/gif"
+        elif ext == '.webp':
+            mime_type = "image/webp"
+        
+        image_content = {
+            "type": "image_url",
+            "image_url": {
+                "url": f"data:{mime_type};base64,{base64_image}"
+            }
+        }
+    
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": prompt},
+                image_content
+            ]
+        }
+    ]
+    
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {api_key}"
+    }
+    
+    payload = {
+        "model": model,
+        "messages": messages,
+        "max_tokens": 1500,
+        "temperature": 0.1
+    }
+    
+    try:
+        response = requests.post(
+            f"{DASHSCOPE_API_BASE}/chat/completions",
+            headers=headers,
+            json=payload,
+            timeout=90
+        )
+        response.raise_for_status()
+        return response.json()
+    except requests.exceptions.HTTPError as e:
+        error_msg = f"API request failed: {e.response.text}"
+        try:
+            error_data = e.response.json()
+            if 'error' in error_data:
+                error_msg = f"API error: {error_data['error'].get('message', str(error_data))}"
+        except:
+            pass
+        raise Exception(error_msg)
+    except requests.exceptions.RequestException as e:
+        raise Exception(f"Network error: {str(e)}")
+
+
+def analyze_image(
+    api_key: str,
+    image_path: str,
+    custom_prompt: Optional[str] = None,
+    describe: bool = True,
+    extract_text: bool = False,
+    identify_objects: bool = False,
+    model: str = DEFAULT_MODEL
+) -> Dict[str, Any]:
+    """
+    Analyze image with specified analysis types.
+    
+    Args:
+        api_key: Dashscope API key
+        image_path: Path to image file or URL
+        custom_prompt: Optional custom prompt
+        describe: Whether to describe the image
+        extract_text: Whether to extract text from image
+        identify_objects: Whether to identify objects
+        model: Model to use
+        
+    Returns:
+        Analysis results as dictionary
+    """
+    # Build prompt based on analysis type
+    if custom_prompt:
+        prompt = custom_prompt
+    else:
+        tasks = []
+        if describe:
+            tasks.append("详细描述这张图片的内容，包括物体、人物、场景、颜色和整体构成")
+        if extract_text:
+            tasks.append("提取图片中所有可见的文字(OCR)")
+        if identify_objects:
+            tasks.append("识别并列出图片中所有可辨认的物体、人物和元素")
+        
+        if tasks:
+            prompt = f"""请分析这张图片，提供以下信息：
+{'；'.join(tasks)}
+
+请用清晰的分段格式回答。"""
+        else:
+            prompt = "请对这张图片进行全面详细的描述，包括所有可见的物体、人物、场景、文字和任何值得注意的细节。"
+    
+    # Call API
+    response = call_dashscope_api(api_key, image_path, prompt, model)
+    
+    # Parse response
+    content = response.get("choices", [{}])[0].get("message", {}).get("content", "")
+    
+    # Extract usage info
+    usage = response.get("usage", {})
+    
+    return {
+        "success": True,
+        "image_path": image_path,
+        "model": model,
+        "api_provider": "dashscope",
+        "analysis": {
+            "description": content if describe else None,
+            "extracted_text": content if extract_text else None,
+            "objects": content if identify_objects else None,
+            "full_response": content
+        },
+        "usage": {
+            "prompt_tokens": usage.get("prompt_tokens", 0),
+            "completion_tokens": usage.get("completion_tokens", 0),
+            "total_tokens": usage.get("total_tokens", 0)
+        }
+    }
+
+
+def execute(args: argparse.Namespace) -> Dict[str, Any]:
+    """Execute the image understanding analysis."""
+    # Get API key - 支持 DASHSCOPE_API_KEY 或 OPENAI_API_KEY
+    api_key = args.api_key or os.environ.get("DASHSCOPE_API_KEY") or os.environ.get("OPENAI_API_KEY")
+    if not api_key:
+        raise ValueError(
+            "API key not provided. Use --api-key or set DASHSCOPE_API_KEY environment variable"
+        )
+    
+    # Validate image
+    image_path = validate_image_path(args.image)
+    
+    # Determine analysis type
+    describe = not (args.extract_text or args.identify_objects or args.custom_prompt)
+    extract_text = args.extract_text
+    identify_objects = args.identify_objects
+    custom_prompt = args.custom_prompt
+    model = args.model or DEFAULT_MODEL
+    
+    # Analyze
+    result = analyze_image(
+        api_key=api_key,
+        image_path=image_path,
+        custom_prompt=custom_prompt,
+        describe=describe,
+        extract_text=extract_text,
+        identify_objects=identify_objects,
+        model=model
+    )
+    
+    return result
+
+
+def main():
+    """Main entry point."""
+    parser = argparse.ArgumentParser(
+        description="使用 Dashscope（通义千问）视觉模型分析图片",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+示例:
+    # 基本图片描述
+    python image_understanding.py --image photo.jpg
+    
+    # 提取图片中的文字
+    python image_understanding.py --image screenshot.png --extract-text
+    
+    # 识别图片中的物体
+    python image_understanding.py --image photo.jpg --identify-objects
+    
+    # 自定义分析提示词
+    python image_understanding.py --image photo.jpg --prompt "这个产品多少钱？"
+    
+    # 使用环境变量中的 API key
+    export DASHSCOPE_API_KEY=your_key
+    python image_understanding.py --image photo.jpg
+    
+    # 使用网络图片URL
+    python image_understanding.py --image "https://example.com/photo.jpg" --describe
+    
+    # 使用更强的模型
+    python image_understanding.py --image photo.jpg --model qwen-vl-max
+
+环境变量:
+    DASHSCOPE_API_KEY    你的 Dashscope API key
+    OPENAI_API_KEY       也可以使用（兼容性支持）
+        """
+    )
+    
+    # Required
+    parser.add_argument(
+        "--image", "-i",
+        required=True,
+        help="本地图片路径或图片URL"
+    )
+    
+    # Optional
+    parser.add_argument(
+        "--api-key",
+        help="Dashscope API key (也可通过 DASHSCOPE_API_KEY 环境变量设置)"
+    )
+    
+    parser.add_argument(
+        "--model", "-m",
+        default=DEFAULT_MODEL,
+        choices=["qwen-vl-plus", "qwen-vl-max"],
+        help=f"使用的模型 (默认: {DEFAULT_MODEL})"
+    )
+    
+    parser.add_argument(
+        "--custom-prompt", "-p",
+        help="自定义图片分析提示词"
+    )
+    
+    # Analysis type
+    analysis_group = parser.add_mutually_exclusive_group()
+    analysis_group.add_argument(
+        "--describe",
+        action="store_true",
+        default=True,
+        help="描述图片内容 (默认行为)"
+    )
+    analysis_group.add_argument(
+        "--extract-text", "-e",
+        action="store_true",
+        help="从图片提取文字 (OCR)"
+    )
+    analysis_group.add_argument(
+        "--identify-objects", "-o",
+        action="store_true",
+        help="识别图片中的物体"
+    )
+    
+    # Output
+    parser.add_argument(
+        "--compact",
+        action="store_true",
+        help="输出紧凑JSON (不缩进)"
+    )
+    
+    args = parser.parse_args()
+    
+    try:
+        result = execute(args)
+        indent = None if args.compact else 2
+        print(json.dumps(result, ensure_ascii=False, indent=indent))
+        sys.exit(0)
+        
+    except ValueError as e:
+        error_result = {
+            "success": False,
+            "error": {
+                "type": "validation_error",
+                "message": str(e)
+            }
+        }
+        print(json.dumps(error_result, ensure_ascii=False, indent=2))
+        sys.exit(2)
+        
+    except Exception as e:
+        error_result = {
+            "success": False,
+            "error": {
+                "type": "execution_error",
+                "message": str(e)
+            }
+        }
+        print(json.dumps(error_result, ensure_ascii=False, indent=2))
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()