Files
JARVIS/backend/app/services/agent_service.py

1184 lines
46 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Jarvis Agent 服务层
负责 LangGraph Agent 的调用、流式输出、对话历史管理
"""
import json
import uuid
import logging
from datetime import UTC, datetime
from time import perf_counter
from typing import Any, AsyncGenerator
import asyncio
from openai import BadRequestError
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from app.database import async_session
from app.logging_utils import summarize_llm_config
from app.models.conversation import Conversation, Message
from app.models.user import User
from app.agents.graph import get_agent_graph
from app.agents.context import set_current_user, clear_current_user
from app.agents.learning.jobs import schedule_retrospective_job
from app.agents.learning.retrospector import build_session_retrospective
from app.agents.learning.session_search import SessionRetrospectiveSearch, summarize_retrospective
from app.agents.orchestration.task_graph import build_bounded_task_graph
from app.agents.learning.store import append_retrospective_attachment
from app.agents.schemas.orchestration import (
RuntimeRequestContext,
assess_parallel_worthiness,
render_runtime_request_context_summary,
)
from app.agents.schemas.skills import SkillActivationRecord
from app.agents.skills.registry import get_skill_registry
from app.agents.skills.retriever import shortlist_skills_for_request
from app.services import memory_service
from app.services.brain_service import BrainService
from app.services.llm_service import create_llm_from_config, resolve_provider_capabilities
from app.services.rollback_controller import RollbackController
from app.services.runtime_observability import build_runtime_observability_report
from app.agents.tools.time_reasoning import extract_reference_datetime
from app.agents.state import initial_state
logger = logging.getLogger(__name__)
MEMORY_SECTION_HEADERS = (
"【用户记忆】",
"【之前对话摘要】",
"【知识大脑】",
)
MEMORY_INLINE_HEADERS = {"[关于你的记忆]"}
def _split_memory_context_sections(memory_context: str | None) -> dict[str, str]:
text = (memory_context or "").strip()
if not text:
return {}
sections: dict[str, str] = {}
current_header: str | None = None
current_lines: list[str] = []
for line in text.splitlines():
stripped = line.strip()
if stripped in MEMORY_SECTION_HEADERS:
if current_header and current_lines:
sections[current_header] = "\n".join(current_lines).strip()
current_header = stripped
current_lines = [stripped]
continue
if current_header:
current_lines.append(line)
if current_header and current_lines:
sections[current_header] = "\n".join(current_lines).strip()
return sections
def _derive_role_memory_contexts(memory_context: str | None) -> dict[str, str | None]:
sections = _split_memory_context_sections(memory_context)
user_memory = sections.get("【用户记忆】")
summaries = sections.get("【之前对话摘要】")
knowledge = sections.get("【知识大脑】")
def _join_parts(*parts: str | None) -> str | None:
values = [part for part in parts if part]
return "\n\n".join(values) if values else None
return {
"schedule_context_summary": _join_parts(user_memory, summaries),
"knowledge_context": knowledge,
"analysis_report": _join_parts(summaries, knowledge),
}
def _extract_memory_highlights(memory_context: str | None, *, limit: int = 5) -> list[str]:
text = (memory_context or "").strip()
if not text:
return []
highlights: list[str] = []
for raw_line in text.splitlines():
line = raw_line.strip()
if not line or line in MEMORY_SECTION_HEADERS or line in MEMORY_INLINE_HEADERS:
continue
if line.startswith("-"):
normalized = line[1:].strip()
else:
normalized = line
if normalized:
highlights.append(normalized)
if len(highlights) >= limit:
break
return highlights
def _summarize_retrospective(retrospective: Any) -> str:
summary = str(getattr(retrospective, "summary", "") or "").strip()
task_type = str(getattr(retrospective, "task_type", "") or "").strip()
execution_mode = str(getattr(retrospective, "execution_mode", "") or "").strip()
outcome = str(getattr(retrospective, "outcome", "") or "").strip()
parts = [summary[:80] or task_type or "历史复盘"]
if execution_mode:
parts.append(f"mode={execution_mode}")
if outcome:
parts.append(f"outcome={outcome}")
return "".join(parts)
def _is_streaming_rejection_error(error: Exception, user_llm_config: dict | None) -> bool:
capabilities = resolve_provider_capabilities(user_llm_config)
error_text = str(error).lower()
markers = [
"invalid chat setting",
"invalid params",
"stream",
"streaming",
"unsupported",
"bad_request_error",
"http 400",
"error code: 400",
]
if isinstance(error, BadRequestError):
return getattr(capabilities, "provider", None) not in {"openai", "claude"} and any(
marker in error_text for marker in markers
)
return any(marker in error_text for marker in markers)
def _coerce_event_text(content: Any) -> str:
if isinstance(content, str):
return content
if isinstance(content, list):
parts: list[str] = []
for item in content:
if isinstance(item, str):
parts.append(item)
elif isinstance(item, dict):
text = item.get("text")
if isinstance(text, str):
parts.append(text)
return "".join(parts)
return str(content) if content else ""
_CONTINUITY_STATE_VERSION = 1
_CONTINUITY_SNAPSHOT_FIELDS = (
"turn_context",
"routing_decision",
"continuity_state",
"pending_action",
"last_completed_action",
"clarification_context",
"tool_outcomes",
"pending_tasks",
"completed_tasks",
"created_entities",
"current_agent",
"next_step",
"agent_trace",
"agent_id",
"parent_agent_id",
"root_agent_id",
"collaboration_depth",
"thread_id",
"last_message_id",
"message_sequence",
"spawned_agent_ids",
"current_sub_commander",
"active_sub_commanders",
"sub_commander_trace",
"event_trace",
"message_trace",
"active_tasks",
"task_results",
"task_hierarchy",
"verification_status",
"verification_summary",
"verification_evidence",
"isolation_mode",
"isolation_id",
"isolation_workspace_path",
"isolation_parent_conversation_id",
"isolation_metadata",
"input_tokens",
"output_tokens",
"estimated_cost",
"budget_warning",
"cost_by_agent",
"cost_thresholds",
"budget_state",
"collaboration_budget_history",
"current_phase",
"phase_history",
"current_checkpoint",
"checkpoint_history",
)
def _normalize_legacy_turn_context(turn_context: Any, current_agent: Any) -> dict[str, Any] | None:
if not isinstance(turn_context, dict):
return None
normalized = dict(turn_context)
active_agent = normalized.pop("active_agent", None)
active_sub_flow = normalized.pop("active_sub_flow", None)
if isinstance(active_agent, str) and active_agent and "active_agent" not in normalized:
normalized["active_agent"] = active_agent
if (
isinstance(active_sub_flow, str)
and active_sub_flow
and "active_sub_commander" not in normalized
):
normalized["active_sub_commander"] = active_sub_flow
if not normalized.get("active_agent") and isinstance(current_agent, str) and current_agent:
normalized["active_agent"] = current_agent
return normalized or None
def _normalize_legacy_pending_action(pending_action: Any) -> dict[str, Any] | None:
if not isinstance(pending_action, dict):
return None
normalized = dict(pending_action)
legacy_action_type = normalized.pop("action_type", None)
if legacy_action_type and "type" not in normalized:
normalized["type"] = legacy_action_type
legacy_agent = normalized.pop("agent", None)
legacy_sub_flow = normalized.pop("sub_flow", None)
if legacy_agent and "owner_agent" not in normalized:
normalized["owner_agent"] = legacy_agent
if legacy_sub_flow and "owner_sub_commander" not in normalized:
normalized["owner_sub_commander"] = legacy_sub_flow
legacy_status = normalized.get("status")
if legacy_status == "awaiting_confirmation":
normalized["status"] = "pending"
elif legacy_status == "awaiting_clarification":
normalized["status"] = "blocked_on_clarification"
return normalized or None
def _normalize_legacy_clarification_context(
clarification_context: Any,
pending_action: dict[str, Any] | None,
current_agent: Any,
) -> dict[str, Any] | None:
if not isinstance(clarification_context, dict):
return None
normalized = dict(clarification_context)
active_agent = normalized.pop("active_agent", None)
sub_flow = normalized.pop("sub_flow", None)
awaiting_user_input = normalized.pop("awaiting_user_input", None)
if isinstance(active_agent, str) and active_agent and "owning_agent" not in normalized:
normalized["owning_agent"] = active_agent
if isinstance(sub_flow, str) and sub_flow and "owning_sub_commander" not in normalized:
normalized["owning_sub_commander"] = sub_flow
if "target_action" not in normalized:
target_action = None
if pending_action:
pending_type = pending_action.get("type")
if isinstance(pending_type, str) and pending_type and pending_type != "clarification":
target_action = pending_type
if target_action is None and isinstance(sub_flow, str) and sub_flow.startswith("create_"):
target_action = sub_flow
if target_action:
normalized["target_action"] = target_action
if not normalized.get("owning_agent") and isinstance(current_agent, str) and current_agent:
normalized["owning_agent"] = current_agent
if awaiting_user_input is True and "status" not in normalized:
normalized["status"] = "pending"
return normalized or None
def _normalize_legacy_continuity_state(
continuity_state: Any,
clarification_context: dict[str, Any] | None,
) -> dict[str, Any] | None:
if not isinstance(continuity_state, dict):
return None
normalized = dict(continuity_state)
normalized.pop("active_agent", None)
normalized.pop("active_sub_flow", None)
legacy_status = normalized.get("status")
if legacy_status == "awaiting_clarification":
normalized["status"] = "fresh"
if clarification_context and "mode" not in normalized:
normalized["mode"] = "resume_after_clarification"
return normalized or None
def _normalize_continuity_snapshot(state: dict[str, Any]) -> dict[str, Any]:
normalized = dict(state)
current_agent = normalized.get("current_agent")
pending_action = _normalize_legacy_pending_action(normalized.get("pending_action"))
clarification_context = _normalize_legacy_clarification_context(
normalized.get("clarification_context"),
pending_action,
current_agent,
)
continuity_state = _normalize_legacy_continuity_state(
normalized.get("continuity_state"),
clarification_context,
)
turn_context = _normalize_legacy_turn_context(normalized.get("turn_context"), current_agent)
if pending_action is not None:
normalized["pending_action"] = pending_action
if clarification_context is not None:
normalized["clarification_context"] = clarification_context
if continuity_state is not None:
normalized["continuity_state"] = continuity_state
if turn_context is not None:
normalized["turn_context"] = turn_context
return normalized
def _build_continuity_snapshot(state: dict[str, Any]) -> dict[str, Any] | None:
normalized_state = _normalize_continuity_snapshot(state)
snapshot = {
field: normalized_state.get(field)
for field in _CONTINUITY_SNAPSHOT_FIELDS
if normalized_state.get(field) is not None
}
if not snapshot:
return None
return {
"version": _CONTINUITY_STATE_VERSION,
"state": snapshot,
}
def _extract_continuity_snapshot(payload: Any) -> dict[str, Any] | None:
if isinstance(payload, list):
for item in payload:
snapshot = _extract_continuity_snapshot(item)
if snapshot:
return snapshot
return None
if not isinstance(payload, dict):
return None
if payload.get("kind") != "agent_continuity_state":
return None
if payload.get("version") != _CONTINUITY_STATE_VERSION:
return None
state = payload.get("state")
if isinstance(state, dict):
return _normalize_continuity_snapshot(state)
return None
class AgentService:
"""对话 Agent 服务"""
def __init__(self, db: AsyncSession):
self.db = db
async def _try_auto_summarize_background(self, user_id: str, conversation_id: str) -> None:
async with async_session() as session:
await memory_service.try_auto_summarize(session, user_id, conversation_id)
# ———— M.4: 主动记忆提取 ————
async def _extract_memories_background(self, user_id: str, conversation_id: str) -> None:
"""Background task to extract memories from conversation after response."""
from app.services.memory.memory_extractor import MemoryExtractor
from sqlalchemy import select
from app.models.conversation import Message
try:
async with async_session() as db:
# Load last 10 messages from conversation
result = await db.execute(
select(Message)
.where(Message.conversation_id == conversation_id)
.order_by(Message.created_at.desc())
.limit(10)
)
messages = list(result.scalars().all())
if len(messages) < 2:
return
extractor = MemoryExtractor()
new_memories = await extractor.extract_from_conversation(
db, user_id, conversation_id, messages
)
if new_memories:
await extractor.save_memories(db, user_id, conversation_id, new_memories)
logger.info(
f"[MemoryExtractor] Extracted {len(new_memories)} new memories from conversation {conversation_id}"
)
except Exception as e:
logger.exception(f"[MemoryExtractor] Extraction failed: {e}")
def _build_progress_event(
self,
stage: str,
label: str,
*,
agent: str | None = None,
tool_name: str | None = None,
step: str | None = None,
steps: list[str] | None = None,
) -> dict[str, Any]:
return {
"type": "progress",
"stage": stage,
"label": label,
"agent": agent,
"tool_name": tool_name,
"step": step,
"steps": steps or [],
}
def _build_current_datetime_context(self) -> tuple[str, dict[str, str]]:
now_utc = datetime.now(UTC)
reference = {
"current_time_iso": now_utc.isoformat(),
"current_date_iso": now_utc.date().isoformat(),
}
context = (
"【当前时间】\n"
f"- current_time_utc: {reference['current_time_iso']}\n"
f"- current_date_utc: {reference['current_date_iso']}\n"
"说明:解析'今天/明天/后天/本周/下周'等相对时间时,请以 current_time_utc 为准。"
)
return context, reference
def build_skill_context(self, skill_names: list[str]) -> dict:
"""构建 Skills 上下文
Args:
skill_names: Skill 名称列表
Returns:
包含 skills 上下文的字典
"""
registry = get_skill_registry()
merged_context = registry.get_skill_context(skill_names)
return {
"skills_context": merged_context,
"skills_metadata": {
"skills": skill_names,
"count": len(skill_names),
},
}
async def _get_user_llm_config(
self, user_id: str, model_name: str | None = None
) -> dict | None:
"""获取用户的 LLM 模型配置"""
user = await self.db.get(User, user_id)
if not user or not user.llm_config:
return None
llm_config = user.llm_config
if model_name:
models = llm_config.get("chat", [])
for m in models:
if m.get("name") == model_name:
return m
return None
chat_models = llm_config.get("chat", [])
for m in chat_models:
if m.get("enabled"):
return m
return None
async def _load_continuity_snapshot(self, conversation: Conversation) -> dict[str, Any] | None:
snapshot = _extract_continuity_snapshot(getattr(conversation, "agent_state", None))
if snapshot:
return snapshot
result = await self.db.execute(
select(Message)
.where(Message.conversation_id == conversation.id, Message.role == "assistant")
.order_by(Message.created_at.desc())
)
for message in result.scalars():
snapshot = _extract_continuity_snapshot(message.attachments)
if snapshot:
return snapshot
return None
async def _build_agent_state(
self,
*,
request_id: str,
user_id: str,
conversation: Conversation,
raw_user_query: str,
full_message: str,
memory_context: str | None,
current_datetime_context: str,
current_datetime_reference: dict[str, str],
user_llm_config: dict | None,
runtime_request_context: RuntimeRequestContext,
recalled_retrospectives: list[dict[str, Any]],
skill_shortlist: list[dict[str, Any]],
) -> dict[str, Any]:
state = initial_state(user_id, conversation.id)
runtime_summary = render_runtime_request_context_summary(runtime_request_context)
state.update(
{
"messages": [
SystemMessage(content=runtime_summary),
HumanMessage(content=full_message),
],
"memory_context": memory_context,
"current_datetime_context": current_datetime_context,
"current_datetime_reference": current_datetime_reference,
"user_llm_config": user_llm_config,
}
)
previous_snapshot = await self._load_continuity_snapshot(conversation)
if previous_snapshot:
state.update(previous_snapshot)
state["messages"] = [
SystemMessage(content=runtime_summary),
HumanMessage(content=full_message),
]
state.update(
{
"runtime_request_context": runtime_request_context.model_dump(mode="json"),
"task_graph": (
runtime_request_context.task_graph.model_dump(mode="json")
if runtime_request_context.task_graph is not None
else None
),
"feature_flags": RollbackController().snapshot_flags(),
"recalled_retrospectives": recalled_retrospectives,
"retrospective_shortlist": recalled_retrospectives,
"skill_shortlist": skill_shortlist,
"skill_activation_records": [
SkillActivationRecord(
skill_name=item.get("skill_name"),
source=item.get("source", "runtime"),
source_id=item.get("source_id"),
status=item.get("status", "active"),
injection_mode=item.get("injection_mode", "metadata_only"),
matched_terms=item.get("matched_terms", []),
rationale=item.get("rationale"),
).model_dump(mode="json")
for item in skill_shortlist
if item.get("skill_name")
],
"parallel_worthiness": runtime_request_context.parallel_worthiness.model_dump(
mode="json"
),
}
)
return state
async def _build_runtime_request_context(
self,
*,
request_id: str,
user_id: str,
conversation: Conversation,
user_query: str,
memory_context: str | None,
) -> tuple[RuntimeRequestContext, list[dict[str, Any]], list[dict[str, Any]]]:
started_at = perf_counter()
retrospectives_started = perf_counter()
recent_retrospectives = await SessionRetrospectiveSearch(self.db).shortlist(
user_id=user_id,
query_text=user_query,
conversation_id=conversation.id,
limit=3,
)
retrospective_ms = (perf_counter() - retrospectives_started) * 1000
feature_flags = RollbackController().snapshot_flags()
shortlist_started = perf_counter()
skill_shortlist = await shortlist_skills_for_request(
self.db,
user_id=user_id,
user_query=user_query,
memory_context=memory_context,
retrospectives=[item.model_dump(mode="json") for item in recent_retrospectives],
include_learned=feature_flags["ENABLE_LEARNED_SKILL_LOADING"],
limit=4,
)
skill_shortlist_ms = (perf_counter() - shortlist_started) * 1000
parallel_worthiness = assess_parallel_worthiness(
user_query,
retrospective_count=len(recent_retrospectives),
skill_count=len(skill_shortlist),
)
recommended_runtime_mode = (
"collaboration" if parallel_worthiness.preferred_mode != "direct" else "direct"
)
task_graph = (
build_bounded_task_graph(
query_text=user_query,
parallel_worthiness=parallel_worthiness,
)
if feature_flags["ENABLE_PARALLEL_TASK_GRAPH"]
else None
)
runtime_request_context = RuntimeRequestContext(
request_id=request_id,
session_id=conversation.id,
conversation_id=conversation.id,
user_id=user_id,
query_text=user_query,
raw_user_query=user_query,
recalled_memories=_extract_memory_highlights(memory_context),
recalled_retrospectives=[
summarize_retrospective(retrospective) for retrospective in recent_retrospectives
],
shortlisted_skills=[entry.skill_name for entry in skill_shortlist],
skill_shortlist=skill_shortlist,
current_agent_role="master",
execution_mode=recommended_runtime_mode,
conversation_state_ref=conversation.id,
parallel_worthiness=parallel_worthiness,
task_graph=task_graph,
recommended_runtime_mode=recommended_runtime_mode,
assembly_metrics={
"retrospective_ms": round(retrospective_ms, 3),
"skill_shortlist_ms": round(skill_shortlist_ms, 3),
"total_ms": round((perf_counter() - started_at) * 1000, 3),
},
)
return (
runtime_request_context,
[item.model_dump(mode="json") for item in recent_retrospectives],
[item.model_dump(mode="json") for item in skill_shortlist],
)
async def chat(
self,
user_id: str,
message: str,
conversation_id: str | None = None,
file_ids: list[str] | None = None,
model_name: str | None = None,
) -> tuple[str, str, AsyncGenerator[dict[str, Any], None]]:
"""
处理对话请求(流式)
"""
user_llm_config = await self._get_user_llm_config(user_id, model_name)
model_name_used = model_name
if model_name and not user_llm_config:
raise ValueError("所选模型不可用于聊天,请切换到聊天模型")
if user_llm_config:
model_name_used = user_llm_config.get("name", model_name)
logger.info(
"agent_chat_started",
extra={
"details": {
"mode": "stream",
"requested_model_name": model_name,
"resolved_model_name": model_name_used,
"message_length": len(message or ""),
}
},
)
if conversation_id:
result = await self.db.execute(
select(Conversation).where(
Conversation.id == conversation_id,
Conversation.user_id == user_id,
)
)
conv = result.scalar_one_or_none()
if conv is None:
raise ValueError("会话不存在或无权访问")
else:
conv = None
if not conv:
conv = Conversation(user_id=user_id, title=message[:50])
self.db.add(conv)
await self.db.commit()
await self.db.refresh(conv)
conversation_id = conv.id
else:
conversation_id = conv.id
file_context = ""
if file_ids:
from app.services.document_service import DocumentService
doc_svc = DocumentService(self.db)
for file_id in file_ids:
content = await doc_svc.get_document_content(user_id, file_id)
if content:
file_context += f"\n\n[用户上传文件内容]\n{content}\n[/文件内容]"
full_message = f"{message}\n{file_context}" if file_context else message
user_msg = Message(
conversation_id=conversation_id,
role="user",
content=message,
attachments=[{"file_ids": file_ids}] if file_ids else None,
)
self.db.add(user_msg)
await self.db.commit()
await self.db.refresh(user_msg)
brain_service = BrainService(self.db)
await brain_service.create_event(
user_id,
source_type="conversation",
source_id=conversation_id,
event_type="message_created",
title="User message",
content_summary=message[:500],
raw_excerpt=message[:2000],
metadata_={"role": "user"},
importance_signal=1.0,
)
await self.db.commit()
memory_ctx = await memory_service.build_memory_context(
self.db, user_id, conversation_id, message
)
# M.5: Inject recall memories into context (before LLM call)
from app.services.memory.recall_injector import MemoryRecallInjector
recall_ctx = await MemoryRecallInjector().build_context(self.db, user_id, message)
if recall_ctx:
memory_ctx = f"{memory_ctx}\n{recall_ctx}" if memory_ctx else recall_ctx
assistant_msg = Message(
conversation_id=conversation_id,
role="assistant",
content="",
model=model_name_used or "jarvis",
attachments=None,
)
self.db.add(assistant_msg)
await self.db.commit()
await self.db.refresh(assistant_msg)
def _build_assistant_event_payload(content: str) -> dict[str, Any]:
return {
"source_type": "conversation",
"source_id": conversation_id,
"event_type": "message_created",
"title": "Assistant message",
"content_summary": content[:500],
"raw_excerpt": content[:2000],
"metadata_": {"role": "assistant"},
"importance_signal": 0.8,
}
async def run_agent():
collected = ""
state: dict[str, Any] | None = None
runtime_request_context: RuntimeRequestContext | None = None
set_current_user(user_id)
try:
graph = get_agent_graph()
current_datetime_context, current_datetime_reference = (
self._build_current_datetime_context()
)
(
runtime_request_context,
recalled_retrospectives,
skill_shortlist,
) = await self._build_runtime_request_context(
request_id=assistant_msg.id,
user_id=user_id,
conversation=conv,
user_query=message,
memory_context=memory_ctx,
)
state = await self._build_agent_state(
request_id=assistant_msg.id,
user_id=user_id,
conversation=conv,
raw_user_query=message,
full_message=full_message,
memory_context=memory_ctx,
current_datetime_context=current_datetime_context,
current_datetime_reference=current_datetime_reference,
user_llm_config=user_llm_config,
runtime_request_context=runtime_request_context,
recalled_retrospectives=recalled_retrospectives,
skill_shortlist=skill_shortlist,
)
state.update(_derive_role_memory_contexts(memory_ctx))
yield self._build_progress_event(
"thinking", "Jarvis 正在分析请求", agent="master", step="理解你的问题"
)
try:
async for event in graph.astream_events(state, version="v2"):
kind = event.get("event")
event_name = event.get("name", "")
metadata = event.get("metadata", {})
data = event.get("data", {})
if kind == "on_chain_start" and event_name in {
"master",
"schedule_planner",
"executor",
"librarian",
"analyst",
}:
stage_map = {
"master": ("thinking", "Jarvis 正在理解请求"),
"schedule_planner": ("planning", "Jarvis 正在编排日程"),
"executor": ("tool", "Jarvis 正在执行操作"),
"librarian": ("tool", "Jarvis 正在检索知识"),
"analyst": ("thinking", "Jarvis 正在分析信息"),
}
stage, label = stage_map.get(
event_name, ("thinking", "Jarvis 正在思考")
)
yield self._build_progress_event(
stage, label, agent=event_name, step=label
)
elif kind == "on_tool_start":
yield self._build_progress_event(
"tool",
f"Jarvis 正在调用工具 {event_name}",
agent="executor",
tool_name=event_name,
step=f"正在执行 {event_name}",
)
elif kind == "on_tool_end":
tool_result = data.get("output")
step = f"已完成 {event_name}"
if isinstance(tool_result, str) and len(tool_result) > 0:
step = tool_result[:100]
yield self._build_progress_event(
"tool",
f"工具 {event_name} 已完成",
agent="executor",
tool_name=event_name,
step=step,
)
elif kind == "on_chat_model_stream":
chunk = data.get("chunk")
content = _coerce_event_text(
getattr(chunk, "content", "") if chunk else ""
)
if content:
collected += content
yield {"type": "chunk", "content": content}
elif kind == "on_chain_end":
output = data.get("output")
final_resp = None
if isinstance(output, dict):
state.update(output)
final_resp = output.get("final_response")
if final_resp:
final_text = str(final_resp)
if final_text != collected:
collected = final_text
yield {"type": "chunk", "content": final_text}
elif kind == "on_chat_model_end":
output = data.get("output")
final_content = _coerce_event_text(
getattr(output, "content", "") if output else ""
)
if final_content:
final_text = final_content
if final_text != collected:
collected = final_text
yield {"type": "chunk", "content": final_text}
except Exception as e:
if _is_streaming_rejection_error(e, user_llm_config) and not collected:
yield self._build_progress_event(
"responding", "Jarvis 正在生成回复", agent="master", step="fallback"
)
try:
result_state = await graph.ainvoke(state)
if isinstance(result_state, dict):
state.update(result_state)
fallback_content = result_state.get("final_response") or str(
result_state.get("messages", [AIMessage(content="")])[-1].content
)
collected = str(fallback_content)
yield {"type": "chunk", "content": collected}
except Exception:
logger.exception("llm_sync_fallback_failed")
safe_error = "模型服务暂不可用,请稍后再试。"
yield {"type": "error", "error": safe_error}
collected = f"抱歉,发生错误: {safe_error}"
yield {"type": "chunk", "content": collected}
else:
logger.exception("agent_streaming_failed")
if not collected:
safe_error = "模型服务暂不可用,请稍后再试。"
yield {"type": "error", "error": safe_error}
collected = f"抱歉,发生错误: {safe_error}"
yield {"type": "chunk", "content": collected}
else:
yield {"type": "error", "error": str(e)}
finally:
clear_current_user()
try:
if collected:
assistant_msg.content = collected
continuity_snapshot = _build_continuity_snapshot(state or {})
attachments = (
[
{
"kind": "agent_continuity_state",
**continuity_snapshot,
}
]
if continuity_snapshot
else []
)
if state is not None and runtime_request_context is not None:
retrospective = build_session_retrospective(
request_id=assistant_msg.id,
session_id=conversation_id,
user_query=message,
state=state,
runtime_context=runtime_request_context,
)
attachments = append_retrospective_attachment(attachments, retrospective)
attachments.append(
{
"kind": "runtime_observability",
"payload": build_runtime_observability_report(
state=state,
feature_flags=state.get("feature_flags") or {},
),
}
)
conv.agent_state = (
{
"kind": "agent_continuity_state",
**continuity_snapshot,
}
if continuity_snapshot
else None
)
await BrainService(self.db).create_event(
user_id,
**_build_assistant_event_payload(collected),
)
assistant_msg.attachments = attachments or None
await self.db.commit()
await self.db.refresh(assistant_msg)
schedule_retrospective_job(
user_id=user_id,
conversation_id=conversation_id,
request_message_id=user_msg.id,
response_message_id=assistant_msg.id,
query_text=message,
final_response=collected,
state=state,
)
except Exception:
logger.exception("save_assistant_message_failed")
asyncio.create_task(self._try_auto_summarize_background(user_id, conversation_id))
# M.4: Extract memories from conversation
asyncio.create_task(self._extract_memories_background(user_id, conversation_id))
return conversation_id, assistant_msg.id, run_agent()
async def chat_simple(
self,
user_id: str,
message: str,
conversation_id: str | None = None,
file_ids: list[str] | None = None,
model_name: str | None = None,
) -> tuple[str, str, str, str | None]:
"""
简单同步版对话
"""
user_llm_config = await self._get_user_llm_config(user_id, model_name)
model_name_used = model_name
if model_name and not user_llm_config:
raise ValueError("所选模型不可用于聊天,请切换到聊天模型")
if user_llm_config:
model_name_used = user_llm_config.get("name", model_name)
if conversation_id:
result = await self.db.execute(
select(Conversation).where(
Conversation.id == conversation_id,
Conversation.user_id == user_id,
)
)
conv = result.scalar_one_or_none()
if conv is None:
raise ValueError("会话不存在或无权访问")
else:
conv = None
if not conv:
conv = Conversation(user_id=user_id, title=message[:50])
self.db.add(conv)
await self.db.commit()
await self.db.refresh(conv)
conversation_id = conv.id
else:
conversation_id = conv.id
user_msg = Message(conversation_id=conversation_id, role="user", content=message)
self.db.add(user_msg)
assistant_msg = Message(
conversation_id=conversation_id,
role="assistant",
content="",
model=model_name_used or "jarvis",
attachments=None,
)
self.db.add(assistant_msg)
brain_service = BrainService(self.db)
await brain_service.create_event(
user_id,
source_type="conversation",
source_id=conversation_id,
event_type="message_created",
title="User message",
content_summary=message[:500],
raw_excerpt=message[:2000],
metadata_={"role": "user"},
importance_signal=1.0,
)
memory_ctx = await memory_service.build_memory_context(
self.db, user_id, conversation_id, message
)
# M.5: Inject recall memories into context (before LLM call)
from app.services.memory.recall_injector import MemoryRecallInjector
recall_ctx = await MemoryRecallInjector().build_context(self.db, user_id, message)
if recall_ctx:
memory_ctx = f"{memory_ctx}\n{recall_ctx}" if memory_ctx else recall_ctx
set_current_user(user_id)
try:
graph = get_agent_graph()
current_datetime_context, current_datetime_reference = (
self._build_current_datetime_context()
)
(
runtime_request_context,
recalled_retrospectives,
skill_shortlist,
) = await self._build_runtime_request_context(
request_id=assistant_msg.id,
user_id=user_id,
conversation=conv,
user_query=message,
memory_context=memory_ctx,
)
state = await self._build_agent_state(
request_id=assistant_msg.id,
user_id=user_id,
conversation=conv,
raw_user_query=message,
full_message=message,
memory_context=memory_ctx,
current_datetime_context=current_datetime_context,
current_datetime_reference=current_datetime_reference,
user_llm_config=user_llm_config,
runtime_request_context=runtime_request_context,
recalled_retrospectives=recalled_retrospectives,
skill_shortlist=skill_shortlist,
)
state.update(_derive_role_memory_contexts(memory_ctx))
result_state = await graph.ainvoke(state)
response_content = result_state.get("final_response") or str(
result_state.get("messages", [AIMessage(content="")])[-1].content
)
except Exception as e:
logger.exception("agent_chat_simple_failed")
response_content = "抱歉,发生错误。"
finally:
clear_current_user()
brain_service = BrainService(self.db)
await brain_service.create_event(
user_id,
source_type="conversation",
source_id=conversation_id,
event_type="message_created",
title="Assistant message",
content_summary=response_content[:500],
raw_excerpt=response_content[:2000],
metadata_={"role": "assistant"},
importance_signal=0.8,
)
assistant_msg.content = response_content
continuity_snapshot = (
_build_continuity_snapshot(result_state) if "result_state" in locals() else None
)
attachments = (
[
{
"kind": "agent_continuity_state",
**continuity_snapshot,
}
]
if continuity_snapshot
else []
)
if "result_state" in locals() and "runtime_request_context" in locals():
retrospective = build_session_retrospective(
request_id=assistant_msg.id,
session_id=conversation_id,
user_query=message,
state=result_state,
runtime_context=runtime_request_context,
)
attachments = append_retrospective_attachment(attachments, retrospective)
attachments.append(
{
"kind": "runtime_observability",
"payload": build_runtime_observability_report(
state=result_state,
feature_flags=result_state.get("feature_flags") or {},
),
}
)
conv.agent_state = (
{
"kind": "agent_continuity_state",
**continuity_snapshot,
}
if continuity_snapshot
else None
)
assistant_msg.attachments = attachments or None
await self.db.commit()
await self.db.refresh(assistant_msg)
schedule_retrospective_job(
user_id=user_id,
conversation_id=conversation_id,
request_message_id=user_msg.id,
response_message_id=assistant_msg.id,
query_text=message,
final_response=response_content,
state=result_state if "result_state" in locals() else None,
)
return conversation_id, assistant_msg.id, response_content, model_name_used