test(agents): add Code Commander unit tests for Phases 1-3
Tests Phase 1: state, prompts, tools registry Tests Phase 2: AI adapters, security classifier, direct executor Tests Phase 3: schemas (CodeTask, CodeExecutionResult, enums)
This commit is contained in:
265
backend/tests/backend/app/agents/test_code_commander.py
Normal file
265
backend/tests/backend/app/agents/test_code_commander.py
Normal file
@@ -0,0 +1,265 @@
|
|||||||
|
"""
|
||||||
|
Tests for Code Commander module (Phases 1-3)
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from types import SimpleNamespace
|
||||||
|
from unittest.mock import Mock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
sys.modules.setdefault("trafilatura", Mock())
|
||||||
|
|
||||||
|
from app.agents.state import AgentRole
|
||||||
|
from app.agents.prompts import (
|
||||||
|
CODE_COMMANDER_SYSTEM_PROMPT,
|
||||||
|
DIRECT_EXECUTION_PROMPT,
|
||||||
|
SANDBOX_EXECUTION_PROMPT,
|
||||||
|
TOP_LEVEL_SYSTEM_PROMPTS_BY_KEY,
|
||||||
|
)
|
||||||
|
from app.agents.tools import CODE_COMMANDER_TOOLSET_NAMES
|
||||||
|
from app.agents.registry.builtins import (
|
||||||
|
TOP_LEVEL_AGENT_DEFAULT_SUB_COMMANDERS,
|
||||||
|
TOP_LEVEL_AGENT_DISPLAY_NAMES,
|
||||||
|
TOP_LEVEL_AGENT_ROUTING_HINTS,
|
||||||
|
TOP_LEVEL_AGENT_ALLOWED_SPAWN_ROLES,
|
||||||
|
)
|
||||||
|
from app.agents.tools.ai_adapter import (
|
||||||
|
AICLIAdapter,
|
||||||
|
ADAPTER_REGISTRY,
|
||||||
|
get_adapter,
|
||||||
|
CodeExecutionResult,
|
||||||
|
ClaudeAdapter,
|
||||||
|
GeminiAdapter,
|
||||||
|
CodexAdapter,
|
||||||
|
OpenCodeAdapter,
|
||||||
|
)
|
||||||
|
from app.agents.tools.security_classifier import (
|
||||||
|
RiskLevel,
|
||||||
|
SecurityClassifier,
|
||||||
|
)
|
||||||
|
from app.agents.tools.direct_executor import DirectExecutor
|
||||||
|
from app.agents.schemas.task import (
|
||||||
|
CodeProviderType,
|
||||||
|
RiskLevelType,
|
||||||
|
CodeTaskType,
|
||||||
|
CodeTask,
|
||||||
|
CodeExecutionResultSchema,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Phase 1: Infrastructure Tests
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
class TestPhase1State:
|
||||||
|
def test_code_commander_role_exists(self):
|
||||||
|
assert hasattr(AgentRole, "CODE_COMMANDER")
|
||||||
|
assert AgentRole.CODE_COMMANDER.value == "code_commander"
|
||||||
|
|
||||||
|
def test_code_commander_in_top_level_agents(self):
|
||||||
|
"""CODE_COMMANDER should be registered in TOP_LEVEL_AGENT entries"""
|
||||||
|
assert "code_commander" in TOP_LEVEL_AGENT_DEFAULT_SUB_COMMANDERS
|
||||||
|
assert "code_commander" in TOP_LEVEL_AGENT_DISPLAY_NAMES
|
||||||
|
assert "code_commander" in TOP_LEVEL_AGENT_ROUTING_HINTS
|
||||||
|
assert "code_commander" in TOP_LEVEL_AGENT_ALLOWED_SPAWN_ROLES
|
||||||
|
|
||||||
|
def test_code_commander_has_no_sub_commanders(self):
|
||||||
|
"""Code Commander is a top-level agent with no sub-commanders"""
|
||||||
|
idx = list(TOP_LEVEL_AGENT_DEFAULT_SUB_COMMANDERS.keys()).index("code_commander")
|
||||||
|
assert TOP_LEVEL_AGENT_DEFAULT_SUB_COMMANDERS["code_commander"] == ()
|
||||||
|
|
||||||
|
|
||||||
|
class TestPhase1Prompts:
|
||||||
|
def test_code_commander_system_prompt_exists(self):
|
||||||
|
assert CODE_COMMANDER_SYSTEM_PROMPT is not None
|
||||||
|
assert len(CODE_COMMANDER_SYSTEM_PROMPT) > 0
|
||||||
|
|
||||||
|
def test_code_commander_prompt_in_top_level_prompts(self):
|
||||||
|
assert "code_commander" in TOP_LEVEL_SYSTEM_PROMPTS_BY_KEY
|
||||||
|
assert TOP_LEVEL_SYSTEM_PROMPTS_BY_KEY["code_commander"] == CODE_COMMANDER_SYSTEM_PROMPT
|
||||||
|
|
||||||
|
def test_sandbox_execution_prompt_exists(self):
|
||||||
|
assert SANDBOX_EXECUTION_PROMPT is not None
|
||||||
|
assert len(SANDBOX_EXECUTION_PROMPT) > 0
|
||||||
|
|
||||||
|
def test_direct_execution_prompt_exists(self):
|
||||||
|
assert DIRECT_EXECUTION_PROMPT is not None
|
||||||
|
assert "直接" in DIRECT_EXECUTION_PROMPT or "direct" in DIRECT_EXECUTION_PROMPT.lower()
|
||||||
|
|
||||||
|
|
||||||
|
class TestPhase1Tools:
|
||||||
|
def test_code_commander_toolset_names_defined(self):
|
||||||
|
assert CODE_COMMANDER_TOOLSET_NAMES is not None
|
||||||
|
assert isinstance(CODE_COMMANDER_TOOLSET_NAMES, list)
|
||||||
|
assert len(CODE_COMMANDER_TOOLSET_NAMES) == 5
|
||||||
|
|
||||||
|
def test_code_commander_tools_includes_execute_code_task(self):
|
||||||
|
assert "execute_code_task" in CODE_COMMANDER_TOOLSET_NAMES
|
||||||
|
|
||||||
|
def test_code_commander_tools_includes_get_execution_status(self):
|
||||||
|
assert "get_execution_status" in CODE_COMMANDER_TOOLSET_NAMES
|
||||||
|
|
||||||
|
def test_code_commander_tools_includes_send_interactive_input(self):
|
||||||
|
assert "send_interactive_input" in CODE_COMMANDER_TOOLSET_NAMES
|
||||||
|
|
||||||
|
def test_code_commander_tools_includes_download_workspace(self):
|
||||||
|
assert "download_workspace" in CODE_COMMANDER_TOOLSET_NAMES
|
||||||
|
|
||||||
|
def test_code_commander_tools_includes_cleanup_workspace(self):
|
||||||
|
assert "cleanup_workspace" in CODE_COMMANDER_TOOLSET_NAMES
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Phase 2: Execution Engine Tests
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
class TestPhase2AIAdapter:
|
||||||
|
def test_adapter_registry_has_all_providers(self):
|
||||||
|
assert "claude" in ADAPTER_REGISTRY
|
||||||
|
assert "gemini" in ADAPTER_REGISTRY
|
||||||
|
assert "codex" in ADAPTER_REGISTRY
|
||||||
|
assert "opencode" in ADAPTER_REGISTRY
|
||||||
|
|
||||||
|
def test_get_adapter_returns_correct_adapter(self):
|
||||||
|
assert isinstance(get_adapter("claude"), ClaudeAdapter)
|
||||||
|
assert isinstance(get_adapter("gemini"), GeminiAdapter)
|
||||||
|
assert isinstance(get_adapter("codex"), CodexAdapter)
|
||||||
|
assert isinstance(get_adapter("opencode"), OpenCodeAdapter)
|
||||||
|
|
||||||
|
def test_get_adapter_raises_for_unknown_provider(self):
|
||||||
|
with pytest.raises(ValueError, match="Unknown AI provider"):
|
||||||
|
get_adapter("unknown")
|
||||||
|
|
||||||
|
def test_claude_adapter_properties(self):
|
||||||
|
adapter = ClaudeAdapter()
|
||||||
|
assert adapter.cli_name == "claude"
|
||||||
|
assert adapter.requires_workspace is True
|
||||||
|
assert adapter.provider == "claude"
|
||||||
|
|
||||||
|
def test_gemini_adapter_properties(self):
|
||||||
|
adapter = GeminiAdapter()
|
||||||
|
assert adapter.cli_name == "gemini"
|
||||||
|
assert adapter.requires_workspace is False
|
||||||
|
assert adapter.provider == "gemini"
|
||||||
|
|
||||||
|
def test_codex_adapter_properties(self):
|
||||||
|
adapter = CodexAdapter()
|
||||||
|
assert adapter.cli_name == "codex"
|
||||||
|
assert adapter.requires_workspace is True
|
||||||
|
assert adapter.provider == "codex"
|
||||||
|
|
||||||
|
def test_opencode_adapter_properties(self):
|
||||||
|
adapter = OpenCodeAdapter()
|
||||||
|
assert adapter.cli_name == "opencode"
|
||||||
|
assert adapter.requires_workspace is True
|
||||||
|
assert adapter.provider == "opencode"
|
||||||
|
|
||||||
|
def test_code_execution_result_dataclass(self):
|
||||||
|
result = CodeExecutionResult(
|
||||||
|
success=True,
|
||||||
|
message="Test message",
|
||||||
|
output="Test output",
|
||||||
|
files_created=["file1.py"],
|
||||||
|
)
|
||||||
|
assert result.success is True
|
||||||
|
assert result.message == "Test message"
|
||||||
|
assert result.output == "Test output"
|
||||||
|
assert result.files_created == ["file1.py"]
|
||||||
|
assert result.error is None
|
||||||
|
assert result.exit_code == 0
|
||||||
|
|
||||||
|
|
||||||
|
class TestPhase2SecurityClassifier:
|
||||||
|
def test_risk_level_enum_values(self):
|
||||||
|
assert RiskLevel.LOW.value == "low"
|
||||||
|
assert RiskLevel.HIGH.value == "high"
|
||||||
|
|
||||||
|
def test_security_classifier_classify_low_risk_demo(self):
|
||||||
|
classifier = SecurityClassifier()
|
||||||
|
result = classifier.classify("写一个贪食蛇 demo")
|
||||||
|
assert result == RiskLevel.LOW
|
||||||
|
|
||||||
|
def test_security_classifier_classify_low_risk_simple(self):
|
||||||
|
classifier = SecurityClassifier()
|
||||||
|
result = classifier.classify("帮我写一个 hello world")
|
||||||
|
assert result == RiskLevel.LOW
|
||||||
|
|
||||||
|
def test_security_classifier_classify_high_risk_dangerous(self):
|
||||||
|
classifier = SecurityClassifier()
|
||||||
|
result = classifier.classify("删除所有文件 rm -rf")
|
||||||
|
assert result == RiskLevel.HIGH
|
||||||
|
|
||||||
|
def test_security_classifier_classify_high_risk_format(self):
|
||||||
|
classifier = SecurityClassifier()
|
||||||
|
result = classifier.classify("格式化硬盘 sudo mkfs")
|
||||||
|
assert result == RiskLevel.HIGH
|
||||||
|
|
||||||
|
def test_security_classifier_get_risk_factors(self):
|
||||||
|
classifier = SecurityClassifier()
|
||||||
|
factors = classifier.get_risk_factors("删除系统文件")
|
||||||
|
assert len(factors) > 0
|
||||||
|
|
||||||
|
|
||||||
|
class TestPhase2DirectExecutor:
|
||||||
|
def test_direct_executor_initialization(self):
|
||||||
|
adapter = ClaudeAdapter()
|
||||||
|
executor = DirectExecutor(adapter)
|
||||||
|
assert executor is not None
|
||||||
|
assert executor.adapter is adapter
|
||||||
|
|
||||||
|
def test_direct_executor_is_installed_returns_bool(self):
|
||||||
|
adapter = ClaudeAdapter()
|
||||||
|
executor = DirectExecutor(adapter)
|
||||||
|
# is_installed is on the adapter, not the executor
|
||||||
|
result = executor.adapter.is_installed()
|
||||||
|
assert isinstance(result, bool)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Phase 3: Schema Tests
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
class TestPhase3Schemas:
|
||||||
|
def test_code_provider_type_enum(self):
|
||||||
|
assert CodeProviderType.CLAUDE.value == "claude"
|
||||||
|
assert CodeProviderType.GEMINI.value == "gemini"
|
||||||
|
assert CodeProviderType.CODEX.value == "codex"
|
||||||
|
assert CodeProviderType.OPENCODE.value == "opencode"
|
||||||
|
|
||||||
|
def test_risk_level_type_enum(self):
|
||||||
|
assert RiskLevelType.LOW.value == "low"
|
||||||
|
assert RiskLevelType.HIGH.value == "high"
|
||||||
|
|
||||||
|
def test_code_task_type_enum(self):
|
||||||
|
assert CodeTaskType.DEMO.value == "demo"
|
||||||
|
assert CodeTaskType.PROJECT.value == "project"
|
||||||
|
assert CodeTaskType.MODIFICATION.value == "modification"
|
||||||
|
|
||||||
|
def test_code_task_model(self):
|
||||||
|
task = CodeTask(
|
||||||
|
task_type=CodeTaskType.DEMO,
|
||||||
|
ai_provider=CodeProviderType.CLAUDE,
|
||||||
|
sandbox_mode=False,
|
||||||
|
user_prompt="写一个贪食蛇",
|
||||||
|
)
|
||||||
|
assert task.task_type == CodeTaskType.DEMO
|
||||||
|
assert task.ai_provider == CodeProviderType.CLAUDE
|
||||||
|
assert task.sandbox_mode is False
|
||||||
|
assert task.user_prompt == "写一个贪食蛇"
|
||||||
|
assert task.task_id is not None
|
||||||
|
|
||||||
|
def test_code_execution_result_schema(self):
|
||||||
|
result = CodeExecutionResultSchema(
|
||||||
|
success=True,
|
||||||
|
message="执行成功",
|
||||||
|
output="Hello World",
|
||||||
|
files_created=["main.py"],
|
||||||
|
)
|
||||||
|
assert result.success is True
|
||||||
|
assert result.message == "执行成功"
|
||||||
|
assert result.output == "Hello World"
|
||||||
|
assert result.files_created == ["main.py"]
|
||||||
Reference in New Issue
Block a user