feat: 新增 account 和 plan 目录

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-11 16:26:22 +08:00
parent 243a190124
commit 0cab33b16b
694 changed files with 161549 additions and 0 deletions
--- a/account/admin/skills/image-understanding/scripts/image_understander.py
+++ b/account/admin/skills/image-understanding/scripts/image_understander.py
@@ -0,0 +1,177 @@
+"""
+图片理解模块 - 使用 Dashscope Qwen-VL 模型
+"""
+import os
+import base64
+import json
+import requests
+
+# 配置
+API_KEY = os.environ.get("DASHSCOPE_API_KEY", "")
+if not API_KEY:
+    print("错误: 请设置 DASHSCOPE_API_KEY 环境变量")
+    exit(1)
+
+BASE_URL = "https://dashscope.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation"
+
+
+def encode_image(image_path):
+    """将图片编码为base64"""
+    with open(image_path, "rb") as f:
+        return base64.b64encode(f.read()).decode("utf-8")
+
+
+def call_dashscope(messages, model="qwen-vl-plus", max_tokens=1000):
+    """调用Dashscope API"""
+    headers = {
+        "Authorization": f"Bearer {API_KEY}",
+        "Content-Type": "application/json"
+    }
+
+    payload = {
+        "model": model,
+        "input": {
+            "messages": messages
+        },
+        "parameters": {
+            "max_tokens": max_tokens
+        }
+    }
+
+    response = requests.post(BASE_URL, headers=headers, json=payload)
+    result = response.json()
+
+    if "output" in result and "choices" in result["output"]:
+        return result["output"]["choices"][0]["message"]["content"]
+    else:
+        return f"错误: {json.dumps(result, ensure_ascii=False, indent=2)}"
+
+
+def describe_image(image_path, model="qwen-vl-plus"):
+    """描述图片内容"""
+    image_b64 = encode_image(image_path)
+
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "请详细描述这张图片的内容，包括场景、人物、物体、颜色等所有细节。"
+                },
+                {
+                    "type": "image",
+                    "image": f"data:image/jpeg;base64,{image_b64}"
+                }
+            ]
+        }
+    ]
+
+    return call_dashscope(messages, model, max_tokens=1000)
+
+
+def extract_text(image_path, model="qwen-vl-plus"):
+    """提取图片中的文字 (OCR)"""
+    image_b64 = encode_image(image_path)
+
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "请提取图片中的所有文字，保持原格式，不要遗漏任何内容。"
+                },
+                {
+                    "type": "image",
+                    "image": f"data:image/jpeg;base64,{image_b64}"
+                }
+            ]
+        }
+    ]
+
+    return call_dashscope(messages, model, max_tokens=2000)
+
+
+def identify_objects(image_path, model="qwen-vl-plus"):
+    """识别图片中的物体"""
+    image_b64 = encode_image(image_path)
+
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "请列出图片中的所有物体、人物、元素，用清晰的列表格式。"
+                },
+                {
+                    "type": "image",
+                    "image": f"data:image/jpeg;base64,{image_b64}"
+                }
+            ]
+        }
+    ]
+
+    return call_dashscope(messages, model, max_tokens=500)
+
+
+def answer_question(image_path, question, model="qwen-vl-plus"):
+    """回答关于图片的问题"""
+    image_b64 = encode_image(image_path)
+
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": question
+                },
+                {
+                    "type": "image",
+                    "image": f"data:image/jpeg;base64,{image_b64}"
+                }
+            ]
+        }
+    ]
+
+    return call_dashscope(messages, model, max_tokens=500)
+
+
+if __name__ == "__main__":
+    import sys
+
+    if len(sys.argv) < 3:
+        print("用法:")
+        print("  python image_understander.py -i <图片路径> -m describe   # 描述图片")
+        print("  python image_understander.py -i <图片路径> -m ocr        # 提取文字")
+        print("  python image_understander.py -i <图片路径> -m objects    # 识别物体")
+        print("  python image_understander.py -i <图片路径> -m qa -q '问题'  # 图片问答")
+        sys.exit(1)
+
+    image_path = sys.argv[2]
+
+    if "-m" in sys.argv:
+        mode = sys.argv[sys.argv.index("-m") + 1]
+    else:
+        mode = "describe"
+
+    if "-q" in sys.argv:
+        question = sys.argv[sys.argv.index("-q") + 1]
+    else:
+        question = None
+
+    if mode == "describe":
+        print(describe_image(image_path))
+    elif mode == "ocr":
+        print(extract_text(image_path))
+    elif mode == "objects":
+        print(identify_objects(image_path))
+    elif mode == "qa":
+        if question:
+            print(answer_question(image_path, question))
+        else:
+            print("错误: 请使用 -q 指定问题")
+    else:
+        print(f"未知模式: {mode}")
--- a/account/admin/skills/image-understanding/scripts/image_understanding.py
+++ b/account/admin/skills/image-understanding/scripts/image_understanding.py
@@ -0,0 +1,381 @@
+#!/usr/bin/env python3
+"""
+Image Understanding using Dashscope (Qwen Vision Models)
+
+This script enables AI to understand and analyze images using Dashscope's
+vision API models (qwen-vl-plus, qwen-vl-max).
+
+Usage:
+    python image_understanding.py --image path/to/image.jpg
+    python image_understanding.py --image https://example.com/image.png --prompt "图片里有什么？"
+    python image_understanding.py --image ./screenshot.png --extract-text --describe
+"""
+
+import argparse
+import json
+import os
+import sys
+from typing import Optional, Dict, Any
+from pathlib import Path
+
+
+# Dashscope 配置
+DASHSCOPE_API_BASE = "https://dashscope.aliyuncs.com/compatible-mode/v1"
+DEFAULT_MODEL = "qwen-vl-plus"
+
+
+def validate_image_path(image_path: str) -> str:
+    """
+    Validate and normalize image path or URL.
+    
+    Args:
+        image_path: Path to local image or URL
+        
+    Returns:
+        Validated image path
+        
+    Raises:
+        ValueError: If image path is invalid or file doesn't exist
+    """
+    if image_path.startswith(('http://', 'https://')):
+        if not image_path.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp')):
+            raise ValueError(f"Invalid image URL format: {image_path}")
+        return image_path
+    
+    path = Path(image_path)
+    if not path.exists():
+        raise ValueError(f"Image file not found: {image_path}")
+    if not path.is_file():
+        raise ValueError(f"Path is not a file: {image_path}")
+    
+    valid_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp'}
+    if path.suffix.lower() not in valid_extensions:
+        raise ValueError(f"Invalid image format: {path.suffix}. Supported: {', '.join(valid_extensions)}")
+    
+    return str(path.absolute())
+
+
+def encode_image(image_path: str) -> str:
+    """
+    Encode image to base64 string.
+    
+    Args:
+        image_path: Path to image file
+        
+    Returns:
+        Base64 encoded image string
+    """
+    import base64
+    
+    with open(image_path, "rb") as f:
+        return base64.b64encode(f.read()).decode('utf-8')
+
+
+def call_dashscope_api(api_key: str, image_path: str, prompt: str, model: str) -> Dict[str, Any]:
+    """
+    Call Dashscope Vision API to analyze image.
+    
+    Args:
+        api_key: Dashscope API key
+        image_path: Path to image file or URL
+        prompt: Custom prompt for analysis
+        model: Model name (qwen-vl-plus or qwen-vl-max)
+        
+    Returns:
+        API response as dictionary
+    """
+    import requests
+    
+    # Prepare image content
+    if image_path.startswith(('http://', 'https://')):
+        image_content = {
+            "type": "image_url",
+            "image_url": {"url": image_path}
+        }
+    else:
+        base64_image = encode_image(image_path)
+        # Detect mime type
+        ext = Path(image_path).suffix.lower()
+        mime_type = "image/jpeg"
+        if ext == '.png':
+            mime_type = "image/png"
+        elif ext == '.gif':
+            mime_type = "image/gif"
+        elif ext == '.webp':
+            mime_type = "image/webp"
+        
+        image_content = {
+            "type": "image_url",
+            "image_url": {
+                "url": f"data:{mime_type};base64,{base64_image}"
+            }
+        }
+    
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": prompt},
+                image_content
+            ]
+        }
+    ]
+    
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {api_key}"
+    }
+    
+    payload = {
+        "model": model,
+        "messages": messages,
+        "max_tokens": 1500,
+        "temperature": 0.1
+    }
+    
+    try:
+        response = requests.post(
+            f"{DASHSCOPE_API_BASE}/chat/completions",
+            headers=headers,
+            json=payload,
+            timeout=90
+        )
+        response.raise_for_status()
+        return response.json()
+    except requests.exceptions.HTTPError as e:
+        error_msg = f"API request failed: {e.response.text}"
+        try:
+            error_data = e.response.json()
+            if 'error' in error_data:
+                error_msg = f"API error: {error_data['error'].get('message', str(error_data))}"
+        except:
+            pass
+        raise Exception(error_msg)
+    except requests.exceptions.RequestException as e:
+        raise Exception(f"Network error: {str(e)}")
+
+
+def analyze_image(
+    api_key: str,
+    image_path: str,
+    custom_prompt: Optional[str] = None,
+    describe: bool = True,
+    extract_text: bool = False,
+    identify_objects: bool = False,
+    model: str = DEFAULT_MODEL
+) -> Dict[str, Any]:
+    """
+    Analyze image with specified analysis types.
+    
+    Args:
+        api_key: Dashscope API key
+        image_path: Path to image file or URL
+        custom_prompt: Optional custom prompt
+        describe: Whether to describe the image
+        extract_text: Whether to extract text from image
+        identify_objects: Whether to identify objects
+        model: Model to use
+        
+    Returns:
+        Analysis results as dictionary
+    """
+    # Build prompt based on analysis type
+    if custom_prompt:
+        prompt = custom_prompt
+    else:
+        tasks = []
+        if describe:
+            tasks.append("详细描述这张图片的内容，包括物体、人物、场景、颜色和整体构成")
+        if extract_text:
+            tasks.append("提取图片中所有可见的文字(OCR)")
+        if identify_objects:
+            tasks.append("识别并列出图片中所有可辨认的物体、人物和元素")
+        
+        if tasks:
+            prompt = f"""请分析这张图片，提供以下信息：
+{'；'.join(tasks)}
+
+请用清晰的分段格式回答。"""
+        else:
+            prompt = "请对这张图片进行全面详细的描述，包括所有可见的物体、人物、场景、文字和任何值得注意的细节。"
+    
+    # Call API
+    response = call_dashscope_api(api_key, image_path, prompt, model)
+    
+    # Parse response
+    content = response.get("choices", [{}])[0].get("message", {}).get("content", "")
+    
+    # Extract usage info
+    usage = response.get("usage", {})
+    
+    return {
+        "success": True,
+        "image_path": image_path,
+        "model": model,
+        "api_provider": "dashscope",
+        "analysis": {
+            "description": content if describe else None,
+            "extracted_text": content if extract_text else None,
+            "objects": content if identify_objects else None,
+            "full_response": content
+        },
+        "usage": {
+            "prompt_tokens": usage.get("prompt_tokens", 0),
+            "completion_tokens": usage.get("completion_tokens", 0),
+            "total_tokens": usage.get("total_tokens", 0)
+        }
+    }
+
+
+def execute(args: argparse.Namespace) -> Dict[str, Any]:
+    """Execute the image understanding analysis."""
+    # Get API key - 支持 DASHSCOPE_API_KEY 或 OPENAI_API_KEY
+    api_key = args.api_key or os.environ.get("DASHSCOPE_API_KEY") or os.environ.get("OPENAI_API_KEY")
+    if not api_key:
+        raise ValueError(
+            "API key not provided. Use --api-key or set DASHSCOPE_API_KEY environment variable"
+        )
+    
+    # Validate image
+    image_path = validate_image_path(args.image)
+    
+    # Determine analysis type
+    describe = not (args.extract_text or args.identify_objects or args.custom_prompt)
+    extract_text = args.extract_text
+    identify_objects = args.identify_objects
+    custom_prompt = args.custom_prompt
+    model = args.model or DEFAULT_MODEL
+    
+    # Analyze
+    result = analyze_image(
+        api_key=api_key,
+        image_path=image_path,
+        custom_prompt=custom_prompt,
+        describe=describe,
+        extract_text=extract_text,
+        identify_objects=identify_objects,
+        model=model
+    )
+    
+    return result
+
+
+def main():
+    """Main entry point."""
+    parser = argparse.ArgumentParser(
+        description="使用 Dashscope（通义千问）视觉模型分析图片",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+示例:
+    # 基本图片描述
+    python image_understanding.py --image photo.jpg
+    
+    # 提取图片中的文字
+    python image_understanding.py --image screenshot.png --extract-text
+    
+    # 识别图片中的物体
+    python image_understanding.py --image photo.jpg --identify-objects
+    
+    # 自定义分析提示词
+    python image_understanding.py --image photo.jpg --prompt "这个产品多少钱？"
+    
+    # 使用环境变量中的 API key
+    export DASHSCOPE_API_KEY=your_key
+    python image_understanding.py --image photo.jpg
+    
+    # 使用网络图片URL
+    python image_understanding.py --image "https://example.com/photo.jpg" --describe
+    
+    # 使用更强的模型
+    python image_understanding.py --image photo.jpg --model qwen-vl-max
+
+环境变量:
+    DASHSCOPE_API_KEY    你的 Dashscope API key
+    OPENAI_API_KEY       也可以使用（兼容性支持）
+        """
+    )
+    
+    # Required
+    parser.add_argument(
+        "--image", "-i",
+        required=True,
+        help="本地图片路径或图片URL"
+    )
+    
+    # Optional
+    parser.add_argument(
+        "--api-key",
+        help="Dashscope API key (也可通过 DASHSCOPE_API_KEY 环境变量设置)"
+    )
+    
+    parser.add_argument(
+        "--model", "-m",
+        default=DEFAULT_MODEL,
+        choices=["qwen-vl-plus", "qwen-vl-max"],
+        help=f"使用的模型 (默认: {DEFAULT_MODEL})"
+    )
+    
+    parser.add_argument(
+        "--custom-prompt", "-p",
+        help="自定义图片分析提示词"
+    )
+    
+    # Analysis type
+    analysis_group = parser.add_mutually_exclusive_group()
+    analysis_group.add_argument(
+        "--describe",
+        action="store_true",
+        default=True,
+        help="描述图片内容 (默认行为)"
+    )
+    analysis_group.add_argument(
+        "--extract-text", "-e",
+        action="store_true",
+        help="从图片提取文字 (OCR)"
+    )
+    analysis_group.add_argument(
+        "--identify-objects", "-o",
+        action="store_true",
+        help="识别图片中的物体"
+    )
+    
+    # Output
+    parser.add_argument(
+        "--compact",
+        action="store_true",
+        help="输出紧凑JSON (不缩进)"
+    )
+    
+    args = parser.parse_args()
+    
+    try:
+        result = execute(args)
+        indent = None if args.compact else 2
+        print(json.dumps(result, ensure_ascii=False, indent=indent))
+        sys.exit(0)
+        
+    except ValueError as e:
+        error_result = {
+            "success": False,
+            "error": {
+                "type": "validation_error",
+                "message": str(e)
+            }
+        }
+        print(json.dumps(error_result, ensure_ascii=False, indent=2))
+        sys.exit(2)
+        
+    except Exception as e:
+        error_result = {
+            "success": False,
+            "error": {
+                "type": "execution_error",
+                "message": str(e)
+            }
+        }
+        print(json.dumps(error_result, ensure_ascii=False, indent=2))
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/account/admin/skills/image-understanding/scripts/main.py
+++ b/account/admin/skills/image-understanding/scripts/main.py
@@ -0,0 +1,390 @@
+#!/usr/bin/env python3
+"""
+image-understanding - Enable AI to understand and analyze images using vision API
+
+This script allows users to analyze images by calling vision API (like OpenAI GPT-4 Vision).
+It can describe image content, extract text, identify objects, and answer questions about images.
+
+Usage:
+    python image_understanding.py --image path/to/image.jpg
+    python image_understanding.py --image https://example.com/image.png --prompt "What objects are in this image?"
+    python image_understanding.py --image ./screenshot.png --extract-text --describe
+"""
+
+import argparse
+import json
+import os
+import sys
+from typing import Optional, Dict, Any, List
+from pathlib import Path
+
+
+def validate_image_path(image_path: str) -> str:
+    """
+    Validate and normalize image path or URL.
+    
+    Args:
+        image_path: Path to local image or URL
+        
+    Returns:
+        Validated image path
+        
+    Raises:
+        ValueError: If image path is invalid or file doesn't exist
+    """
+    if image_path.startswith(('http://', 'https://')):
+        # URL validation
+        if not image_path.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp')):
+            raise ValueError(f"Invalid image URL format: {image_path}")
+        return image_path
+    
+    # Local file path
+    path = Path(image_path)
+    if not path.exists():
+        raise ValueError(f"Image file not found: {image_path}")
+    if not path.is_file():
+        raise ValueError(f"Path is not a file: {image_path}")
+    
+    # Check file extension
+    valid_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.webp'}
+    if path.suffix.lower() not in valid_extensions:
+        raise ValueError(f"Invalid image format: {path.suffix}. Supported formats: {', '.join(valid_extensions)}")
+    
+    return str(path.absolute())
+
+
+def encode_image(image_path: str) -> str:
+    """
+    Encode image to base64 string for API upload.
+    
+    Args:
+        image_path: Path to image file
+        
+    Returns:
+        Base64 encoded image string
+        
+    Raises:
+        Exception: If encoding fails
+    """
+    import base64
+    
+    try:
+        with open(image_path, "rb") as image_file:
+            encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
+        return encoded_string
+    except Exception as e:
+        raise Exception(f"Failed to encode image: {str(e)}")
+
+
+def call_vision_api(api_key: str, image_path: str, prompt: str, model: str) -> Dict[str, Any]:
+    """
+    Call OpenAI Vision API to analyze image.
+    
+    Args:
+        api_key: OpenAI API key
+        image_path: Path to image file or URL
+        prompt: Custom prompt for analysis
+        model: Model name to use
+        
+    Returns:
+        API response as dictionary
+        
+    Raises:
+        Exception: If API call fails
+    """
+    import requests
+    
+    # Prepare image content
+    if image_path.startswith(('http://', 'https://')):
+        image_content = {"type": "image_url", "image_url": {"url": image_path}}
+    else:
+        base64_image = encode_image(image_path)
+        image_content = {
+            "type": "image_url",
+            "image_url": {
+                "url": f"data:image/jpeg;base64,{base64_image}"
+            }
+        }
+    
+    # Build messages
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": prompt},
+                image_content
+            ]
+        }
+    ]
+    
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {api_key}"
+    }
+    
+    payload = {
+        "model": model,
+        "messages": messages,
+        "max_tokens": 1000
+    }
+    
+    try:
+        response = requests.post(
+            "https://api.openai.com/v1/chat/completions",
+            headers=headers,
+            json=payload,
+            timeout=60
+        )
+        response.raise_for_status()
+        return response.json()
+    except requests.exceptions.HTTPError as e:
+        error_msg = f"API request failed: {e.response.text}"
+        try:
+            error_data = e.response.json()
+            if 'error' in error_data:
+                error_msg = f"API error: {error_data['error'].get('message', str(error_data))}"
+        except:
+            pass
+        raise Exception(error_msg)
+    except requests.exceptions.RequestException as e:
+        raise Exception(f"Network error: {str(e)}")
+
+
+def analyze_image(
+    api_key: str,
+    image_path: str,
+    custom_prompt: Optional[str] = None,
+    describe: bool = True,
+    extract_text: bool = False,
+    identify_objects: bool = False,
+    model: str = "gpt-4-vision-preview"
+) -> Dict[str, Any]:
+    """
+    Analyze image with specified analysis types.
+    
+    Args:
+        api_key: OpenAI API key
+        image_path: Path to image file or URL
+        custom_prompt: Optional custom prompt
+        describe: Whether to describe the image
+        extract_text: Whether to extract text from image
+        identify_objects: Whether to identify objects in image
+        model: Model to use for analysis
+        
+    Returns:
+        Analysis results as dictionary
+    """
+    # Build analysis prompt
+    analysis_tasks = []
+    
+    if describe:
+        analysis_tasks.append("Describe the image in detail, including objects, people,场景, colors, and overall composition")
+    
+    if extract_text:
+        analysis_tasks.append("Extract all visible text from the image (OCR)")
+    
+    if identify_objects:
+        analysis_tasks.append("Identify and list all recognizable objects, people, and elements in the image")
+    
+    if custom_prompt:
+        prompt = custom_prompt
+    else:
+        prompt = f"""Please analyze this image and provide the following information:
+1. {'Describe the image content in detail' if describe else ''}
+2. {'Extract all visible text from the image' if extract_text else ''}
+3. {'List all identifiable objects and elements' if identify_objects else ''}
+
+Please format your response as a structured analysis with clear sections."""
+    
+    # Remove empty tasks
+    analysis_tasks = [task for task in analysis_tasks if task]
+    
+    if not analysis_tasks and not custom_prompt:
+        # Default: full analysis
+        prompt = "Provide a comprehensive description of this image, including all visible objects, people,场景, text, and any notable details."
+    elif not analysis_tasks and custom_prompt:
+        prompt = custom_prompt
+    
+    # Call API
+    response = call_vision_api(api_key, image_path, prompt, model)
+    
+    # Parse response
+    content = response.get("choices", [{}])[0].get("message", {}).get("content", "")
+    
+    # Extract usage information if available
+    usage = response.get("usage", {})
+    
+    return {
+        "success": True,
+        "image_path": image_path,
+        "model": model,
+        "analysis": {
+            "description": content if describe else None,
+            "extracted_text": content if extract_text else None,
+            "objects": content if identify_objects else None,
+            "full_response": content
+        },
+        "usage": {
+            "prompt_tokens": usage.get("prompt_tokens", 0),
+            "completion_tokens": usage.get("completion_tokens", 0),
+            "total_tokens": usage.get("total_tokens", 0)
+        }
+    }
+
+
+def execute(args: argparse.Namespace) -> Dict[str, Any]:
+    """
+    Execute the image understanding analysis.
+    
+    Args:
+        args: Parsed command-line arguments
+        
+    Returns:
+        Analysis result as dictionary
+    """
+    # Get API key
+    api_key = args.api_key or os.environ.get("OPENAI_API_KEY")
+    if not api_key:
+        raise ValueError("API key not provided. Use --api-key or set OPENAI_API_KEY environment variable")
+    
+    # Validate image path
+    image_path = validate_image_path(args.image)
+    
+    # Determine analysis type
+    describe = not (args.extract_text or args.identify_objects or args.custom_prompt)
+    extract_text = args.extract_text
+    identify_objects = args.identify_objects
+    custom_prompt = args.custom_prompt
+    
+    # Perform analysis
+    result = analyze_image(
+        api_key=api_key,
+        image_path=image_path,
+        custom_prompt=custom_prompt,
+        describe=describe,
+        extract_text=extract_text,
+        identify_objects=identify_objects,
+        model=args.model
+    )
+    
+    return result
+
+
+def main():
+    """Main entry point for the image understanding script."""
+    parser = argparse.ArgumentParser(
+        description="Analyze images using AI vision capabilities (OpenAI GPT-4 Vision)",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+    # Basic image description
+    python image_understanding.py --image photo.jpg
+    
+    # Extract text from image
+    python image_understanding.py --image screenshot.png --extract-text
+    
+    # Identify objects in image
+    python image_understanding.py --image photo.jpg --identify-objects
+    
+    # Custom analysis with specific prompt
+    python image_understanding.py --image photo.jpg --prompt "What brand is this product?"
+    
+    # Using API key from environment variable
+    export OPENAI_API_KEY=your_key
+    python image_understanding.py --image photo.jpg
+    
+    # Using remote image URL
+    python image_understanding.py --image "https://example.com/photo.jpg" --describe
+
+Environment Variables:
+    OPENAI_API_KEY    Your OpenAI API key (can be used instead of --api-key)
+        """
+    )
+    
+    # Required arguments
+    parser.add_argument(
+        "--image",
+        required=True,
+        help="Path to local image file or URL of image"
+    )
+    
+    # Optional arguments
+    parser.add_argument(
+        "--api-key",
+        help="OpenAI API key (can also be set via OPENAI_API_KEY environment variable)"
+    )
+    
+    parser.add_argument(
+        "--model",
+        default="gpt-4-vision-preview",
+        help="Model to use for vision analysis (default: gpt-4-vision-preview)"
+    )
+    
+    parser.add_argument(
+        "--custom-prompt",
+        "-p",
+        help="Custom prompt for image analysis"
+    )
+    
+    # Analysis type flags (mutually exclusive with custom prompt)
+    analysis_group = parser.add_mutually_exclusive_group()
+    analysis_group.add_argument(
+        "--describe",
+        action="store_true",
+        default=True,
+        help="Describe the image content (default behavior)"
+    )
+    analysis_group.add_argument(
+        "--extract-text",
+        action="store_true",
+        help="Extract text from the image (OCR)"
+    )
+    analysis_group.add_argument(
+        "--identify-objects",
+        action="store_true",
+        help="Identify and list objects in the image"
+    )
+    
+    # Output options
+    parser.add_argument(
+        "--compact",
+        action="store_true",
+        help="Output compact JSON (without indentation)"
+    )
+    
+    args = parser.parse_args()
+    
+    try:
+        result = execute(args)
+        
+        # Print result
+        indent = None if args.compact else 2
+        print(json.dumps(result, ensure_ascii=False, indent=indent))
+        
+        sys.exit(0)
+        
+    except ValueError as e:
+        error_result = {
+            "success": False,
+            "error": {
+                "type": "validation_error",
+                "message": str(e)
+            }
+        }
+        print(json.dumps(error_result, ensure_ascii=False, indent=2))
+        sys.exit(2)
+        
+    except Exception as e:
+        error_result = {
+            "success": False,
+            "error": {
+                "type": "execution_error",
+                "message": str(e)
+            }
+        }
+        print(json.dumps(error_result, ensure_ascii=False, indent=2))
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()