JARVIS/backend/app/services/agent_service.py

"""
Jarvis Agent 服务层
负责 LangGraph Agent 的调用、流式输出、对话历史管理
"""

import json
import uuid
import logging
from datetime import UTC, datetime
from time import perf_counter
from typing import Any, AsyncGenerator
import asyncio
from openai import BadRequestError
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage

from app.database import async_session
from app.logging_utils import summarize_llm_config

from app.models.conversation import Conversation, Message
from app.models.user import User
from app.agents.graph import get_agent_graph
from app.agents.context import set_current_user, clear_current_user
from app.agents.learning.jobs import schedule_retrospective_job
from app.agents.learning.retrospector import build_session_retrospective
from app.agents.learning.session_search import SessionRetrospectiveSearch, summarize_retrospective
from app.agents.orchestration.task_graph import build_bounded_task_graph
from app.agents.learning.store import append_retrospective_attachment
from app.agents.schemas.orchestration import (
    RuntimeRequestContext,
    assess_parallel_worthiness,
    render_runtime_request_context_summary,
)
from app.agents.schemas.skills import SkillActivationRecord
from app.agents.skills.registry import get_skill_registry
from app.agents.skills.retriever import shortlist_skills_for_request
from app.services import memory_service
from app.services.brain_service import BrainService
from app.services.llm_service import create_llm_from_config, resolve_provider_capabilities
from app.services.rollback_controller import RollbackController
from app.services.runtime_observability import build_runtime_observability_report
from app.agents.tools.time_reasoning import extract_reference_datetime
from app.agents.state import initial_state

logger = logging.getLogger(__name__)


MEMORY_SECTION_HEADERS = (
    "【用户记忆】",
    "【之前对话摘要】",
    "【知识大脑】",
)
MEMORY_INLINE_HEADERS = {"[关于你的记忆]"}


def _split_memory_context_sections(memory_context: str | None) -> dict[str, str]:
    text = (memory_context or "").strip()
    if not text:
        return {}

    sections: dict[str, str] = {}
    current_header: str | None = None
    current_lines: list[str] = []

    for line in text.splitlines():
        stripped = line.strip()
        if stripped in MEMORY_SECTION_HEADERS:
            if current_header and current_lines:
                sections[current_header] = "\n".join(current_lines).strip()
            current_header = stripped
            current_lines = [stripped]
            continue
        if current_header:
            current_lines.append(line)

    if current_header and current_lines:
        sections[current_header] = "\n".join(current_lines).strip()

    return sections


def _derive_role_memory_contexts(memory_context: str | None) -> dict[str, str | None]:
    sections = _split_memory_context_sections(memory_context)
    user_memory = sections.get("【用户记忆】")
    summaries = sections.get("【之前对话摘要】")
    knowledge = sections.get("【知识大脑】")

    def _join_parts(*parts: str | None) -> str | None:
        values = [part for part in parts if part]
        return "\n\n".join(values) if values else None

    return {
        "schedule_context_summary": _join_parts(user_memory, summaries),
        "knowledge_context": knowledge,
        "analysis_report": _join_parts(summaries, knowledge),
    }


def _extract_memory_highlights(memory_context: str | None, *, limit: int = 5) -> list[str]:
    text = (memory_context or "").strip()
    if not text:
        return []

    highlights: list[str] = []
    for raw_line in text.splitlines():
        line = raw_line.strip()
        if not line or line in MEMORY_SECTION_HEADERS or line in MEMORY_INLINE_HEADERS:
            continue
        if line.startswith("-"):
            normalized = line[1:].strip()
        else:
            normalized = line
        if normalized:
            highlights.append(normalized)
        if len(highlights) >= limit:
            break
    return highlights


def _summarize_retrospective(retrospective: Any) -> str:
    summary = str(getattr(retrospective, "summary", "") or "").strip()
    task_type = str(getattr(retrospective, "task_type", "") or "").strip()
    execution_mode = str(getattr(retrospective, "execution_mode", "") or "").strip()
    outcome = str(getattr(retrospective, "outcome", "") or "").strip()

    parts = [summary[:80] or task_type or "历史复盘"]
    if execution_mode:
        parts.append(f"mode={execution_mode}")
    if outcome:
        parts.append(f"outcome={outcome}")
    return "；".join(parts)


def _is_streaming_rejection_error(error: Exception, user_llm_config: dict | None) -> bool:
    capabilities = resolve_provider_capabilities(user_llm_config)
    error_text = str(error).lower()
    markers = [
        "invalid chat setting",
        "invalid params",
        "stream",
        "streaming",
        "unsupported",
        "bad_request_error",
        "http 400",
        "error code: 400",
    ]

    if isinstance(error, BadRequestError):
        return getattr(capabilities, "provider", None) not in {"openai", "claude"} and any(
            marker in error_text for marker in markers
        )

    return any(marker in error_text for marker in markers)


def _coerce_event_text(content: Any) -> str:
    if isinstance(content, str):
        return content
    if isinstance(content, list):
        parts: list[str] = []
        for item in content:
            if isinstance(item, str):
                parts.append(item)
            elif isinstance(item, dict):
                text = item.get("text")
                if isinstance(text, str):
                    parts.append(text)
        return "".join(parts)
    return str(content) if content else ""


_CONTINUITY_STATE_VERSION = 1
_CONTINUITY_SNAPSHOT_FIELDS = (
    "turn_context",
    "routing_decision",
    "continuity_state",
    "pending_action",
    "last_completed_action",
    "clarification_context",
    "tool_outcomes",
    "pending_tasks",
    "completed_tasks",
    "created_entities",
    "current_agent",
    "next_step",
    "agent_trace",
    "agent_id",
    "parent_agent_id",
    "root_agent_id",
    "collaboration_depth",
    "thread_id",
    "last_message_id",
    "message_sequence",
    "spawned_agent_ids",
    "current_sub_commander",
    "active_sub_commanders",
    "sub_commander_trace",
    "event_trace",
    "message_trace",
    "active_tasks",
    "task_results",
    "task_hierarchy",
    "verification_status",
    "verification_summary",
    "verification_evidence",
    "isolation_mode",
    "isolation_id",
    "isolation_workspace_path",
    "isolation_parent_conversation_id",
    "isolation_metadata",
    "input_tokens",
    "output_tokens",
    "estimated_cost",
    "budget_warning",
    "cost_by_agent",
    "cost_thresholds",
    "budget_state",
    "collaboration_budget_history",
    "current_phase",
    "phase_history",
    "current_checkpoint",
    "checkpoint_history",
)


def _normalize_legacy_turn_context(turn_context: Any, current_agent: Any) -> dict[str, Any] | None:
    if not isinstance(turn_context, dict):
        return None
    normalized = dict(turn_context)
    active_agent = normalized.pop("active_agent", None)
    active_sub_flow = normalized.pop("active_sub_flow", None)
    if isinstance(active_agent, str) and active_agent and "active_agent" not in normalized:
        normalized["active_agent"] = active_agent
    if (
        isinstance(active_sub_flow, str)
        and active_sub_flow
        and "active_sub_commander" not in normalized
    ):
        normalized["active_sub_commander"] = active_sub_flow
    if not normalized.get("active_agent") and isinstance(current_agent, str) and current_agent:
        normalized["active_agent"] = current_agent
    return normalized or None


def _normalize_legacy_pending_action(pending_action: Any) -> dict[str, Any] | None:
    if not isinstance(pending_action, dict):
        return None
    normalized = dict(pending_action)
    legacy_action_type = normalized.pop("action_type", None)
    if legacy_action_type and "type" not in normalized:
        normalized["type"] = legacy_action_type
    legacy_agent = normalized.pop("agent", None)
    legacy_sub_flow = normalized.pop("sub_flow", None)
    if legacy_agent and "owner_agent" not in normalized:
        normalized["owner_agent"] = legacy_agent
    if legacy_sub_flow and "owner_sub_commander" not in normalized:
        normalized["owner_sub_commander"] = legacy_sub_flow
    legacy_status = normalized.get("status")
    if legacy_status == "awaiting_confirmation":
        normalized["status"] = "pending"
    elif legacy_status == "awaiting_clarification":
        normalized["status"] = "blocked_on_clarification"
    return normalized or None


def _normalize_legacy_clarification_context(
    clarification_context: Any,
    pending_action: dict[str, Any] | None,
    current_agent: Any,
) -> dict[str, Any] | None:
    if not isinstance(clarification_context, dict):
        return None
    normalized = dict(clarification_context)
    active_agent = normalized.pop("active_agent", None)
    sub_flow = normalized.pop("sub_flow", None)
    awaiting_user_input = normalized.pop("awaiting_user_input", None)
    if isinstance(active_agent, str) and active_agent and "owning_agent" not in normalized:
        normalized["owning_agent"] = active_agent
    if isinstance(sub_flow, str) and sub_flow and "owning_sub_commander" not in normalized:
        normalized["owning_sub_commander"] = sub_flow
    if "target_action" not in normalized:
        target_action = None
        if pending_action:
            pending_type = pending_action.get("type")
            if isinstance(pending_type, str) and pending_type and pending_type != "clarification":
                target_action = pending_type
        if target_action is None and isinstance(sub_flow, str) and sub_flow.startswith("create_"):
            target_action = sub_flow
        if target_action:
            normalized["target_action"] = target_action
    if not normalized.get("owning_agent") and isinstance(current_agent, str) and current_agent:
        normalized["owning_agent"] = current_agent
    if awaiting_user_input is True and "status" not in normalized:
        normalized["status"] = "pending"
    return normalized or None


def _normalize_legacy_continuity_state(
    continuity_state: Any,
    clarification_context: dict[str, Any] | None,
) -> dict[str, Any] | None:
    if not isinstance(continuity_state, dict):
        return None
    normalized = dict(continuity_state)
    normalized.pop("active_agent", None)
    normalized.pop("active_sub_flow", None)
    legacy_status = normalized.get("status")
    if legacy_status == "awaiting_clarification":
        normalized["status"] = "fresh"
        if clarification_context and "mode" not in normalized:
            normalized["mode"] = "resume_after_clarification"
    return normalized or None


def _normalize_continuity_snapshot(state: dict[str, Any]) -> dict[str, Any]:
    normalized = dict(state)
    current_agent = normalized.get("current_agent")
    pending_action = _normalize_legacy_pending_action(normalized.get("pending_action"))
    clarification_context = _normalize_legacy_clarification_context(
        normalized.get("clarification_context"),
        pending_action,
        current_agent,
    )
    continuity_state = _normalize_legacy_continuity_state(
        normalized.get("continuity_state"),
        clarification_context,
    )
    turn_context = _normalize_legacy_turn_context(normalized.get("turn_context"), current_agent)
    if pending_action is not None:
        normalized["pending_action"] = pending_action
    if clarification_context is not None:
        normalized["clarification_context"] = clarification_context
    if continuity_state is not None:
        normalized["continuity_state"] = continuity_state
    if turn_context is not None:
        normalized["turn_context"] = turn_context
    return normalized


def _build_continuity_snapshot(state: dict[str, Any]) -> dict[str, Any] | None:
    normalized_state = _normalize_continuity_snapshot(state)
    snapshot = {
        field: normalized_state.get(field)
        for field in _CONTINUITY_SNAPSHOT_FIELDS
        if normalized_state.get(field) is not None
    }
    if not snapshot:
        return None
    return {
        "version": _CONTINUITY_STATE_VERSION,
        "state": snapshot,
    }


def _extract_continuity_snapshot(payload: Any) -> dict[str, Any] | None:
    if isinstance(payload, list):
        for item in payload:
            snapshot = _extract_continuity_snapshot(item)
            if snapshot:
                return snapshot
        return None
    if not isinstance(payload, dict):
        return None
    if payload.get("kind") != "agent_continuity_state":
        return None
    if payload.get("version") != _CONTINUITY_STATE_VERSION:
        return None
    state = payload.get("state")
    if isinstance(state, dict):
        return _normalize_continuity_snapshot(state)
    return None


class AgentService:
    """对话 Agent 服务"""

    def __init__(self, db: AsyncSession):
        self.db = db

    async def _try_auto_summarize_background(self, user_id: str, conversation_id: str) -> None:
        async with async_session() as session:
            await memory_service.try_auto_summarize(session, user_id, conversation_id)

    # ———— M.4: 主动记忆提取 ————
    async def _extract_memories_background(self, user_id: str, conversation_id: str) -> None:
        """Background task to extract memories from conversation after response."""
        from app.services.memory.memory_extractor import MemoryExtractor
        from sqlalchemy import select
        from app.models.conversation import Message

        try:
            async with async_session() as db:
                # Load last 10 messages from conversation
                result = await db.execute(
                    select(Message)
                    .where(Message.conversation_id == conversation_id)
                    .order_by(Message.created_at.desc())
                    .limit(10)
                )
                messages = list(result.scalars().all())

                if len(messages) < 2:
                    return

                extractor = MemoryExtractor()
                new_memories = await extractor.extract_from_conversation(
                    db, user_id, conversation_id, messages
                )

                if new_memories:
                    await extractor.save_memories(db, user_id, conversation_id, new_memories)
                    logger.info(
                        f"[MemoryExtractor] Extracted {len(new_memories)} new memories from conversation {conversation_id}"
                    )
        except Exception as e:
            logger.exception(f"[MemoryExtractor] Extraction failed: {e}")

    def _build_progress_event(
        self,
        stage: str,
        label: str,
        *,
        agent: str | None = None,
        tool_name: str | None = None,
        step: str | None = None,
        steps: list[str] | None = None,
    ) -> dict[str, Any]:
        return {
            "type": "progress",
            "stage": stage,
            "label": label,
            "agent": agent,
            "tool_name": tool_name,
            "step": step,
            "steps": steps or [],
        }

    def _build_current_datetime_context(self) -> tuple[str, dict[str, str]]:
        now_utc = datetime.now(UTC)
        reference = {
            "current_time_iso": now_utc.isoformat(),
            "current_date_iso": now_utc.date().isoformat(),
        }
        context = (
            "【当前时间】\n"
            f"- current_time_utc: {reference['current_time_iso']}\n"
            f"- current_date_utc: {reference['current_date_iso']}\n"
            "说明：解析'今天/明天/后天/本周/下周'等相对时间时，请以 current_time_utc 为准。"
        )
        return context, reference

    def build_skill_context(self, skill_names: list[str]) -> dict:
        """构建 Skills 上下文

        Args:
            skill_names: Skill 名称列表

        Returns:
            包含 skills 上下文的字典
        """
        registry = get_skill_registry()
        merged_context = registry.get_skill_context(skill_names)
        return {
            "skills_context": merged_context,
            "skills_metadata": {
                "skills": skill_names,
                "count": len(skill_names),
            },
        }

    async def _get_user_llm_config(
        self, user_id: str, model_name: str | None = None
    ) -> dict | None:
        """获取用户的 LLM 模型配置"""
        user = await self.db.get(User, user_id)
        if not user or not user.llm_config:
            return None

        llm_config = user.llm_config

        if model_name:
            models = llm_config.get("chat", [])
            for m in models:
                if m.get("name") == model_name:
                    return m
            return None

        chat_models = llm_config.get("chat", [])
        for m in chat_models:
            if m.get("enabled"):
                return m

        return None

    async def _load_continuity_snapshot(self, conversation: Conversation) -> dict[str, Any] | None:
        snapshot = _extract_continuity_snapshot(getattr(conversation, "agent_state", None))
        if snapshot:
            return snapshot

        result = await self.db.execute(
            select(Message)
            .where(Message.conversation_id == conversation.id, Message.role == "assistant")
            .order_by(Message.created_at.desc())
        )
        for message in result.scalars():
            snapshot = _extract_continuity_snapshot(message.attachments)
            if snapshot:
                return snapshot
        return None

    async def _build_agent_state(
        self,
        *,
        request_id: str,
        user_id: str,
        conversation: Conversation,
        raw_user_query: str,
        full_message: str,
        memory_context: str | None,
        current_datetime_context: str,
        current_datetime_reference: dict[str, str],
        user_llm_config: dict | None,
        runtime_request_context: RuntimeRequestContext,
        recalled_retrospectives: list[dict[str, Any]],
        skill_shortlist: list[dict[str, Any]],
    ) -> dict[str, Any]:
        state = initial_state(user_id, conversation.id)
        runtime_summary = render_runtime_request_context_summary(runtime_request_context)
        state.update(
            {
                "messages": [
                    SystemMessage(content=runtime_summary),
                    HumanMessage(content=full_message),
                ],
                "memory_context": memory_context,
                "current_datetime_context": current_datetime_context,
                "current_datetime_reference": current_datetime_reference,
                "user_llm_config": user_llm_config,
            }
        )
        previous_snapshot = await self._load_continuity_snapshot(conversation)
        if previous_snapshot:
            state.update(previous_snapshot)
            state["messages"] = [
                SystemMessage(content=runtime_summary),
                HumanMessage(content=full_message),
            ]
        state.update(
            {
                "runtime_request_context": runtime_request_context.model_dump(mode="json"),
                "task_graph": (
                    runtime_request_context.task_graph.model_dump(mode="json")
                    if runtime_request_context.task_graph is not None
                    else None
                ),
                "feature_flags": RollbackController().snapshot_flags(),
                "recalled_retrospectives": recalled_retrospectives,
                "retrospective_shortlist": recalled_retrospectives,
                "skill_shortlist": skill_shortlist,
                "skill_activation_records": [
                    SkillActivationRecord(
                        skill_name=item.get("skill_name"),
                        source=item.get("source", "runtime"),
                        source_id=item.get("source_id"),
                        status=item.get("status", "active"),
                        injection_mode=item.get("injection_mode", "metadata_only"),
                        matched_terms=item.get("matched_terms", []),
                        rationale=item.get("rationale"),
                    ).model_dump(mode="json")
                    for item in skill_shortlist
                    if item.get("skill_name")
                ],
                "parallel_worthiness": runtime_request_context.parallel_worthiness.model_dump(
                    mode="json"
                ),
            }
        )
        return state

    async def _build_runtime_request_context(
        self,
        *,
        request_id: str,
        user_id: str,
        conversation: Conversation,
        user_query: str,
        memory_context: str | None,
    ) -> tuple[RuntimeRequestContext, list[dict[str, Any]], list[dict[str, Any]]]:
        started_at = perf_counter()
        retrospectives_started = perf_counter()
        recent_retrospectives = await SessionRetrospectiveSearch(self.db).shortlist(
            user_id=user_id,
            query_text=user_query,
            conversation_id=conversation.id,
            limit=3,
        )
        retrospective_ms = (perf_counter() - retrospectives_started) * 1000
        feature_flags = RollbackController().snapshot_flags()
        shortlist_started = perf_counter()
        skill_shortlist = await shortlist_skills_for_request(
            self.db,
            user_id=user_id,
            user_query=user_query,
            memory_context=memory_context,
            retrospectives=[item.model_dump(mode="json") for item in recent_retrospectives],
            include_learned=feature_flags["ENABLE_LEARNED_SKILL_LOADING"],
            limit=4,
        )
        skill_shortlist_ms = (perf_counter() - shortlist_started) * 1000
        parallel_worthiness = assess_parallel_worthiness(
            user_query,
            retrospective_count=len(recent_retrospectives),
            skill_count=len(skill_shortlist),
        )
        recommended_runtime_mode = (
            "collaboration" if parallel_worthiness.preferred_mode != "direct" else "direct"
        )
        task_graph = (
            build_bounded_task_graph(
                query_text=user_query,
                parallel_worthiness=parallel_worthiness,
            )
            if feature_flags["ENABLE_PARALLEL_TASK_GRAPH"]
            else None
        )
        runtime_request_context = RuntimeRequestContext(
            request_id=request_id,
            session_id=conversation.id,
            conversation_id=conversation.id,
            user_id=user_id,
            query_text=user_query,
            raw_user_query=user_query,
            recalled_memories=_extract_memory_highlights(memory_context),
            recalled_retrospectives=[
                summarize_retrospective(retrospective) for retrospective in recent_retrospectives
            ],
            shortlisted_skills=[entry.skill_name for entry in skill_shortlist],
            skill_shortlist=skill_shortlist,
            current_agent_role="master",
            execution_mode=recommended_runtime_mode,
            conversation_state_ref=conversation.id,
            parallel_worthiness=parallel_worthiness,
            task_graph=task_graph,
            recommended_runtime_mode=recommended_runtime_mode,
            assembly_metrics={
                "retrospective_ms": round(retrospective_ms, 3),
                "skill_shortlist_ms": round(skill_shortlist_ms, 3),
                "total_ms": round((perf_counter() - started_at) * 1000, 3),
            },
        )
        return (
            runtime_request_context,
            [item.model_dump(mode="json") for item in recent_retrospectives],
            [item.model_dump(mode="json") for item in skill_shortlist],
        )

    async def chat(
        self,
        user_id: str,
        message: str,
        conversation_id: str | None = None,
        file_ids: list[str] | None = None,
        model_name: str | None = None,
    ) -> tuple[str, str, AsyncGenerator[dict[str, Any], None]]:
        """
        处理对话请求（流式）
        """
        user_llm_config = await self._get_user_llm_config(user_id, model_name)
        model_name_used = model_name
        if model_name and not user_llm_config:
            raise ValueError("所选模型不可用于聊天，请切换到聊天模型")
        if user_llm_config:
            model_name_used = user_llm_config.get("name", model_name)

        logger.info(
            "agent_chat_started",
            extra={
                "details": {
                    "mode": "stream",
                    "requested_model_name": model_name,
                    "resolved_model_name": model_name_used,
                    "message_length": len(message or ""),
                }
            },
        )

        if conversation_id:
            result = await self.db.execute(
                select(Conversation).where(
                    Conversation.id == conversation_id,
                    Conversation.user_id == user_id,
                )
            )
            conv = result.scalar_one_or_none()
            if conv is None:
                raise ValueError("会话不存在或无权访问")
        else:
            conv = None

        if not conv:
            conv = Conversation(user_id=user_id, title=message[:50])
            self.db.add(conv)
            await self.db.commit()
            await self.db.refresh(conv)
            conversation_id = conv.id
        else:
            conversation_id = conv.id

        file_context = ""
        if file_ids:
            from app.services.document_service import DocumentService

            doc_svc = DocumentService(self.db)
            for file_id in file_ids:
                content = await doc_svc.get_document_content(user_id, file_id)
                if content:
                    file_context += f"\n\n[用户上传文件内容]\n{content}\n[/文件内容]"

        full_message = f"{message}\n{file_context}" if file_context else message

        user_msg = Message(
            conversation_id=conversation_id,
            role="user",
            content=message,
            attachments=[{"file_ids": file_ids}] if file_ids else None,
        )
        self.db.add(user_msg)
        await self.db.commit()
        await self.db.refresh(user_msg)

        brain_service = BrainService(self.db)
        await brain_service.create_event(
            user_id,
            source_type="conversation",
            source_id=conversation_id,
            event_type="message_created",
            title="User message",
            content_summary=message[:500],
            raw_excerpt=message[:2000],
            metadata_={"role": "user"},
            importance_signal=1.0,
        )
        await self.db.commit()

        memory_ctx = await memory_service.build_memory_context(
            self.db, user_id, conversation_id, message
        )

        # M.5: Inject recall memories into context (before LLM call)
        from app.services.memory.recall_injector import MemoryRecallInjector

        recall_ctx = await MemoryRecallInjector().build_context(self.db, user_id, message)
        if recall_ctx:
            memory_ctx = f"{memory_ctx}\n{recall_ctx}" if memory_ctx else recall_ctx

        assistant_msg = Message(
            conversation_id=conversation_id,
            role="assistant",
            content="",
            model=model_name_used or "jarvis",
            attachments=None,
        )
        self.db.add(assistant_msg)
        await self.db.commit()
        await self.db.refresh(assistant_msg)

        def _build_assistant_event_payload(content: str) -> dict[str, Any]:
            return {
                "source_type": "conversation",
                "source_id": conversation_id,
                "event_type": "message_created",
                "title": "Assistant message",
                "content_summary": content[:500],
                "raw_excerpt": content[:2000],
                "metadata_": {"role": "assistant"},
                "importance_signal": 0.8,
            }

        async def run_agent():
            collected = ""
            state: dict[str, Any] | None = None
            runtime_request_context: RuntimeRequestContext | None = None
            set_current_user(user_id)
            try:
                graph = get_agent_graph()
                current_datetime_context, current_datetime_reference = (
                    self._build_current_datetime_context()
                )
                (
                    runtime_request_context,
                    recalled_retrospectives,
                    skill_shortlist,
                ) = await self._build_runtime_request_context(
                    request_id=assistant_msg.id,
                    user_id=user_id,
                    conversation=conv,
                    user_query=message,
                    memory_context=memory_ctx,
                )

                state = await self._build_agent_state(
                    request_id=assistant_msg.id,
                    user_id=user_id,
                    conversation=conv,
                    raw_user_query=message,
                    full_message=full_message,
                    memory_context=memory_ctx,
                    current_datetime_context=current_datetime_context,
                    current_datetime_reference=current_datetime_reference,
                    user_llm_config=user_llm_config,
                    runtime_request_context=runtime_request_context,
                    recalled_retrospectives=recalled_retrospectives,
                    skill_shortlist=skill_shortlist,
                )
                state.update(_derive_role_memory_contexts(memory_ctx))

                yield self._build_progress_event(
                    "thinking", "Jarvis 正在分析请求", agent="master", step="理解你的问题"
                )

                try:
                    async for event in graph.astream_events(state, version="v2"):
                        kind = event.get("event")
                        event_name = event.get("name", "")
                        metadata = event.get("metadata", {})
                        data = event.get("data", {})

                        if kind == "on_chain_start" and event_name in {
                            "master",
                            "schedule_planner",
                            "executor",
                            "librarian",
                            "analyst",
                        }:
                            stage_map = {
                                "master": ("thinking", "Jarvis 正在理解请求"),
                                "schedule_planner": ("planning", "Jarvis 正在编排日程"),
                                "executor": ("tool", "Jarvis 正在执行操作"),
                                "librarian": ("tool", "Jarvis 正在检索知识"),
                                "analyst": ("thinking", "Jarvis 正在分析信息"),
                            }
                            stage, label = stage_map.get(
                                event_name, ("thinking", "Jarvis 正在思考")
                            )
                            yield self._build_progress_event(
                                stage, label, agent=event_name, step=label
                            )

                        elif kind == "on_tool_start":
                            yield self._build_progress_event(
                                "tool",
                                f"Jarvis 正在调用工具 {event_name}",
                                agent="executor",
                                tool_name=event_name,
                                step=f"正在执行 {event_name}",
                            )

                        elif kind == "on_tool_end":
                            tool_result = data.get("output")
                            step = f"已完成 {event_name}"
                            if isinstance(tool_result, str) and len(tool_result) > 0:
                                step = tool_result[:100]
                            yield self._build_progress_event(
                                "tool",
                                f"工具 {event_name} 已完成",
                                agent="executor",
                                tool_name=event_name,
                                step=step,
                            )

                        elif kind == "on_chat_model_stream":
                            chunk = data.get("chunk")
                            content = _coerce_event_text(
                                getattr(chunk, "content", "") if chunk else ""
                            )
                            if content:
                                collected += content
                                yield {"type": "chunk", "content": content}

                        elif kind == "on_chain_end":
                            output = data.get("output")
                            final_resp = None
                            if isinstance(output, dict):
                                state.update(output)
                                final_resp = output.get("final_response")
                            if final_resp:
                                final_text = str(final_resp)
                                if final_text != collected:
                                    collected = final_text
                                    yield {"type": "chunk", "content": final_text}

                        elif kind == "on_chat_model_end":
                            output = data.get("output")
                            final_content = _coerce_event_text(
                                getattr(output, "content", "") if output else ""
                            )
                            if final_content:
                                final_text = final_content
                                if final_text != collected:
                                    collected = final_text
                                    yield {"type": "chunk", "content": final_text}

                except Exception as e:
                    if _is_streaming_rejection_error(e, user_llm_config) and not collected:
                        yield self._build_progress_event(
                            "responding", "Jarvis 正在生成回复", agent="master", step="fallback"
                        )
                        try:
                            result_state = await graph.ainvoke(state)
                            if isinstance(result_state, dict):
                                state.update(result_state)
                            fallback_content = result_state.get("final_response") or str(
                                result_state.get("messages", [AIMessage(content="")])[-1].content
                            )
                            collected = str(fallback_content)
                            yield {"type": "chunk", "content": collected}
                        except Exception:
                            logger.exception("llm_sync_fallback_failed")
                            safe_error = "模型服务暂不可用，请稍后再试。"
                            yield {"type": "error", "error": safe_error}
                            collected = f"抱歉，发生错误: {safe_error}"
                            yield {"type": "chunk", "content": collected}
                    else:
                        logger.exception("agent_streaming_failed")
                        if not collected:
                            safe_error = "模型服务暂不可用，请稍后再试。"
                            yield {"type": "error", "error": safe_error}
                            collected = f"抱歉，发生错误: {safe_error}"
                            yield {"type": "chunk", "content": collected}
                        else:
                            yield {"type": "error", "error": str(e)}
            finally:
                clear_current_user()
                try:
                    if collected:
                        assistant_msg.content = collected
                        continuity_snapshot = _build_continuity_snapshot(state or {})
                        attachments = (
                            [
                                {
                                    "kind": "agent_continuity_state",
                                    **continuity_snapshot,
                                }
                            ]
                            if continuity_snapshot
                            else []
                        )
                        if state is not None and runtime_request_context is not None:
                            retrospective = build_session_retrospective(
                                request_id=assistant_msg.id,
                                session_id=conversation_id,
                                user_query=message,
                                state=state,
                                runtime_context=runtime_request_context,
                            )
                            attachments = append_retrospective_attachment(attachments, retrospective)
                            attachments.append(
                                {
                                    "kind": "runtime_observability",
                                    "payload": build_runtime_observability_report(
                                        state=state,
                                        feature_flags=state.get("feature_flags") or {},
                                    ),
                                }
                            )
                        conv.agent_state = (
                            {
                                "kind": "agent_continuity_state",
                                **continuity_snapshot,
                            }
                            if continuity_snapshot
                            else None
                        )
                        await BrainService(self.db).create_event(
                            user_id,
                            **_build_assistant_event_payload(collected),
                        )
                        assistant_msg.attachments = attachments or None
                        await self.db.commit()
                        await self.db.refresh(assistant_msg)
                        schedule_retrospective_job(
                            user_id=user_id,
                            conversation_id=conversation_id,
                            request_message_id=user_msg.id,
                            response_message_id=assistant_msg.id,
                            query_text=message,
                            final_response=collected,
                            state=state,
                        )
                except Exception:
                    logger.exception("save_assistant_message_failed")
                asyncio.create_task(self._try_auto_summarize_background(user_id, conversation_id))
                # M.4: Extract memories from conversation
                asyncio.create_task(self._extract_memories_background(user_id, conversation_id))

        return conversation_id, assistant_msg.id, run_agent()

    async def chat_simple(
        self,
        user_id: str,
        message: str,
        conversation_id: str | None = None,
        file_ids: list[str] | None = None,
        model_name: str | None = None,
    ) -> tuple[str, str, str, str | None]:
        """
        简单同步版对话
        """
        user_llm_config = await self._get_user_llm_config(user_id, model_name)
        model_name_used = model_name
        if model_name and not user_llm_config:
            raise ValueError("所选模型不可用于聊天，请切换到聊天模型")
        if user_llm_config:
            model_name_used = user_llm_config.get("name", model_name)

        if conversation_id:
            result = await self.db.execute(
                select(Conversation).where(
                    Conversation.id == conversation_id,
                    Conversation.user_id == user_id,
                )
            )
            conv = result.scalar_one_or_none()
            if conv is None:
                raise ValueError("会话不存在或无权访问")
        else:
            conv = None

        if not conv:
            conv = Conversation(user_id=user_id, title=message[:50])
            self.db.add(conv)
            await self.db.commit()
            await self.db.refresh(conv)
            conversation_id = conv.id
        else:
            conversation_id = conv.id

        user_msg = Message(conversation_id=conversation_id, role="user", content=message)
        self.db.add(user_msg)

        assistant_msg = Message(
            conversation_id=conversation_id,
            role="assistant",
            content="",
            model=model_name_used or "jarvis",
            attachments=None,
        )
        self.db.add(assistant_msg)

        brain_service = BrainService(self.db)
        await brain_service.create_event(
            user_id,
            source_type="conversation",
            source_id=conversation_id,
            event_type="message_created",
            title="User message",
            content_summary=message[:500],
            raw_excerpt=message[:2000],
            metadata_={"role": "user"},
            importance_signal=1.0,
        )

        memory_ctx = await memory_service.build_memory_context(
            self.db, user_id, conversation_id, message
        )

        # M.5: Inject recall memories into context (before LLM call)
        from app.services.memory.recall_injector import MemoryRecallInjector

        recall_ctx = await MemoryRecallInjector().build_context(self.db, user_id, message)
        if recall_ctx:
            memory_ctx = f"{memory_ctx}\n{recall_ctx}" if memory_ctx else recall_ctx

        set_current_user(user_id)
        try:
            graph = get_agent_graph()
            current_datetime_context, current_datetime_reference = (
                self._build_current_datetime_context()
            )
            (
                runtime_request_context,
                recalled_retrospectives,
                skill_shortlist,
            ) = await self._build_runtime_request_context(
                request_id=assistant_msg.id,
                user_id=user_id,
                conversation=conv,
                user_query=message,
                memory_context=memory_ctx,
            )
            state = await self._build_agent_state(
                request_id=assistant_msg.id,
                user_id=user_id,
                conversation=conv,
                raw_user_query=message,
                full_message=message,
                memory_context=memory_ctx,
                current_datetime_context=current_datetime_context,
                current_datetime_reference=current_datetime_reference,
                user_llm_config=user_llm_config,
                runtime_request_context=runtime_request_context,
                recalled_retrospectives=recalled_retrospectives,
                skill_shortlist=skill_shortlist,
            )
            state.update(_derive_role_memory_contexts(memory_ctx))
            result_state = await graph.ainvoke(state)
            response_content = result_state.get("final_response") or str(
                result_state.get("messages", [AIMessage(content="")])[-1].content
            )
        except Exception as e:
            logger.exception("agent_chat_simple_failed")
            response_content = "抱歉，发生错误。"
        finally:
            clear_current_user()

        brain_service = BrainService(self.db)
        await brain_service.create_event(
            user_id,
            source_type="conversation",
            source_id=conversation_id,
            event_type="message_created",
            title="Assistant message",
            content_summary=response_content[:500],
            raw_excerpt=response_content[:2000],
            metadata_={"role": "assistant"},
            importance_signal=0.8,
        )

        assistant_msg.content = response_content
        continuity_snapshot = (
            _build_continuity_snapshot(result_state) if "result_state" in locals() else None
        )
        attachments = (
            [
                {
                    "kind": "agent_continuity_state",
                    **continuity_snapshot,
                }
            ]
            if continuity_snapshot
            else []
        )
        if "result_state" in locals() and "runtime_request_context" in locals():
            retrospective = build_session_retrospective(
                request_id=assistant_msg.id,
                session_id=conversation_id,
                user_query=message,
                state=result_state,
                runtime_context=runtime_request_context,
            )
            attachments = append_retrospective_attachment(attachments, retrospective)
            attachments.append(
                {
                    "kind": "runtime_observability",
                    "payload": build_runtime_observability_report(
                        state=result_state,
                        feature_flags=result_state.get("feature_flags") or {},
                    ),
                }
            )
        conv.agent_state = (
            {
                "kind": "agent_continuity_state",
                **continuity_snapshot,
            }
            if continuity_snapshot
            else None
        )
        assistant_msg.attachments = attachments or None
        await self.db.commit()
        await self.db.refresh(assistant_msg)
        schedule_retrospective_job(
            user_id=user_id,
            conversation_id=conversation_id,
            request_message_id=user_msg.id,
            response_message_id=assistant_msg.id,
            query_text=message,
            final_response=response_content,
            state=result_state if "result_state" in locals() else None,
        )

        return conversation_id, assistant_msg.id, response_content, model_name_used