Compare commits
48 Commits
fcd13d8d9f
...
phase1-reg
| Author | SHA1 | Date | |
|---|---|---|---|
| b3f9b5e715 | |||
| 4251a79062 | |||
| e9ba8597e9 | |||
| 08251556c3 | |||
| e0fe3ca623 | |||
| d85cb9cf35 | |||
| db1a46af39 | |||
| 0410091109 | |||
| 0d89325b09 | |||
| aafa05dc1c | |||
| b8d135a7e2 | |||
| a3aa15d339 | |||
| 6f594631e9 | |||
| 67ea3d2682 | |||
| 90ea732584 | |||
| 7d80a6e2ec | |||
| d2447ee635 | |||
| e3691b01bb | |||
| 3ee825aa90 | |||
| a9ddf3c9b4 | |||
| b024a2bcb5 | |||
| a27736a832 | |||
| 204cb223a3 | |||
| ca69a35e02 | |||
| dc8cd06625 | |||
| 9e4e94c75e | |||
| 30568846b3 | |||
| e9ce0235fd | |||
| 977ef34aad | |||
| 2114880e47 | |||
| c7ce916cca | |||
| 9606d4d9e1 | |||
| b284f395fd | |||
| edee597d5f | |||
| c85e3e6988 | |||
| e7c1a57287 | |||
| 7bbaf67591 | |||
| 99c30d9534 | |||
| 9824bc2d6c | |||
| fad41ce94a | |||
| 6966ced359 | |||
| 0f63ac82f4 | |||
| c552f71e28 | |||
| d3749817b0 | |||
| cdde7e3bc9 | |||
| 672adf9287 | |||
| 0e6828722c | |||
| 79f25a3a74 |
33
.env.example
Normal file
33
.env.example
Normal file
@@ -0,0 +1,33 @@
|
||||
# =============================================
|
||||
# Jarvis 项目根配置
|
||||
# =============================================
|
||||
|
||||
APP_NAME=Jarvis
|
||||
APP_VERSION=0.1.0
|
||||
DEBUG=true
|
||||
HOST=127.0.0.1
|
||||
PORT=3337
|
||||
SECRET_KEY=change-me-to-a-random-secret-key
|
||||
CORS_ORIGINS=["http://localhost:5173","http://localhost:3000"]
|
||||
|
||||
# === 数据存储 ===
|
||||
DATABASE_URL=sqlite+aiosqlite:///./data/jarvis.db
|
||||
DATA_DIR=./data
|
||||
CHROMA_PERSIST_DIR=./data/chroma
|
||||
UPLOAD_DIR=./data/uploads
|
||||
MAX_UPLOAD_SIZE=52428800
|
||||
MINERU_LANGUAGE=ch
|
||||
|
||||
# === JWT ===
|
||||
ACCESS_TOKEN_EXPIRE_MINUTES=1440
|
||||
|
||||
# === 管理员账号 Bootstrap ===
|
||||
ADMIN=admin
|
||||
ADMIN_EMAIL=admin@example.com
|
||||
ADMIN_PASSWORD=change-me
|
||||
ADMIN_FULL_NAME=Administrator
|
||||
|
||||
# === 定时任务 ===
|
||||
SCHEDULER_ENABLED=true
|
||||
DAILY_PLAN_TIME=00:00
|
||||
FORUM_SCAN_INTERVAL_MINUTES=30
|
||||
4
.gitignore
vendored
4
.gitignore
vendored
@@ -33,8 +33,12 @@ uv.lock.bak
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# Logs
|
||||
logs/
|
||||
|
||||
# AI tool data
|
||||
.claude/
|
||||
.worktrees/
|
||||
|
||||
# Lock files (use in development, commit in production)
|
||||
# uv.lock - uncomment if you want to commit lock file
|
||||
|
||||
14
README.md
14
README.md
@@ -33,16 +33,16 @@ start.bat
|
||||
### 手动启动
|
||||
|
||||
```bash
|
||||
# 1. 配置 API Key
|
||||
cd backend
|
||||
cp .env.example .env
|
||||
# 编辑 .env,填入 ANTHROPIC_API_KEY
|
||||
# 1. 配置项目根目录环境变量
|
||||
cp backend/.env.example .env
|
||||
# 编辑项目根目录 .env
|
||||
|
||||
# 2. 安装依赖
|
||||
cd backend
|
||||
uv sync
|
||||
|
||||
# 3. 启动后端
|
||||
uv run uvicorn app.main:app --reload --port 8000
|
||||
# 3. 启动后端(按项目根目录 .env)
|
||||
uv run uvicorn app.main:app --reload --host "$HOST" --port "$PORT"
|
||||
|
||||
# 4. 新终端,启动前端
|
||||
cd frontend
|
||||
@@ -60,7 +60,7 @@ npm run dev
|
||||
|
||||
## API 文档
|
||||
|
||||
后端启动后,访问 http://localhost:8000/docs 查看交互式 API 文档。
|
||||
后端启动后,访问 `http://<HOST>:<PORT>/docs` 查看交互式 API 文档(以项目根目录 `.env` 为准)。
|
||||
|
||||
### 主要接口
|
||||
|
||||
|
||||
@@ -1,54 +0,0 @@
|
||||
# =============================================
|
||||
# Jarvis 后端配置
|
||||
# 复制此文件为 .env 并填入实际值
|
||||
# =============================================
|
||||
|
||||
# === 应用基础 ===
|
||||
DEBUG=false
|
||||
SECRET_KEY=change-me-to-a-random-secret-key
|
||||
|
||||
# === LLM 配置 ===
|
||||
# 支持: openai / claude / deepseek / ollama / custom
|
||||
LLM_PROVIDER=openai
|
||||
|
||||
# OpenAI(默认)
|
||||
OPENAI_API_KEY=your-openai-api-key-here
|
||||
OPENAI_MODEL=gpt-4o
|
||||
OPENAI_BASE_URL=https://api.openai.com/v1
|
||||
|
||||
# Claude(可选)
|
||||
# ANTHROPIC_API_KEY=your-anthropic-api-key-here
|
||||
# CLAUDE_MODEL=claude-sonnet-4-20250514
|
||||
|
||||
# DeepSeek(可选)
|
||||
# LLM_PROVIDER=deepseek
|
||||
# OPENAI_API_KEY=your-deepseek-api-key
|
||||
# OPENAI_BASE_URL=https://api.deepseek.com/v1
|
||||
|
||||
# Ollama 本地模型(可选)
|
||||
# LLM_PROVIDER=ollama
|
||||
# OLLAMA_BASE_URL=http://localhost:11434
|
||||
# OLLAMA_MODEL=llama3
|
||||
|
||||
# 自定义 OpenAI 兼容接口(可选)
|
||||
# LLM_PROVIDER=custom
|
||||
# OPENAI_API_KEY=your-api-key
|
||||
# OPENAI_BASE_URL=https://your-custom-endpoint/v1
|
||||
|
||||
# === NAS 部署路径 ===
|
||||
NAS_DATA_ROOT=/data/jarvis
|
||||
DATA_DIR=/data/jarvis/data
|
||||
CHROMA_PERSIST_DIR=/data/jarvis/chroma
|
||||
UPLOAD_DIR=/data/jarvis/uploads
|
||||
|
||||
|
||||
# === LangSmith 可观测性 ===
|
||||
# 启用 LangSmith 追踪(可选)
|
||||
LANGSMITH_TRACING=false
|
||||
LANGSMITH_API_KEY=your-langsmith-api-key
|
||||
LANGSMITH_PROJECT=jarvis-agent
|
||||
|
||||
# === 定时任务 ===
|
||||
SCHEDULER_ENABLED=true
|
||||
DAILY_PLAN_TIME=00:00
|
||||
FORUM_SCAN_INTERVAL_MINUTES=30
|
||||
@@ -16,6 +16,6 @@ COPY app/ ./app/
|
||||
# 创建数据目录
|
||||
RUN mkdir -p /data/jarvis/data /data/jarvis/chroma /data/jarvis/uploads
|
||||
|
||||
EXPOSE 8000
|
||||
EXPOSE 9527
|
||||
|
||||
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
CMD ["sh", "-c", "uvicorn app.main:app --host ${HOST:-0.0.0.0} --port ${PORT:-9527}"]
|
||||
|
||||
@@ -12,19 +12,20 @@ uv sync
|
||||
### 2. 配置环境变量
|
||||
|
||||
```bash
|
||||
cp .env.example .env
|
||||
# 编辑 .env 填入 API Key
|
||||
cd ..
|
||||
cp backend/.env.example .env
|
||||
# 编辑项目根目录 .env
|
||||
```
|
||||
|
||||
### 3. 启动开发服务器
|
||||
|
||||
```bash
|
||||
uv run uvicorn app.main:app --reload --port 8000
|
||||
uv run uvicorn app.main:app --reload --host "$HOST" --port "$PORT"
|
||||
```
|
||||
|
||||
### 4. API 文档
|
||||
|
||||
启动后访问 http://localhost:8000/docs 查看交互式 API 文档。
|
||||
启动后访问 `http://<HOST>:<PORT>/docs` 查看交互式 API 文档(以项目根目录 `.env` 中的 `HOST` 和 `PORT` 为准)。
|
||||
|
||||
## 环境变量
|
||||
|
||||
|
||||
1
backend/app/agents/__init__.py
Normal file
1
backend/app/agents/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Agent package."""
|
||||
@@ -1,261 +1,377 @@
|
||||
"""
|
||||
Jarvis LangGraph Agent 主图定义
|
||||
Jarvis LangGraph Agent 主图定义 - 优化重构版
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from typing import Literal, Union, List, Any
|
||||
|
||||
from langchain_core.messages import (
|
||||
BaseMessage,
|
||||
HumanMessage,
|
||||
AIMessage,
|
||||
SystemMessage,
|
||||
ToolMessage
|
||||
)
|
||||
from langgraph.graph import StateGraph, END
|
||||
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, BaseMessage
|
||||
|
||||
from app.agents.state import AgentState, AgentRole
|
||||
from app.agents.prompts import (
|
||||
MASTER_SYSTEM_PROMPT,
|
||||
PLANNER_SYSTEM_PROMPT,
|
||||
SCHEDULE_PLANNER_SYSTEM_PROMPT,
|
||||
EXECUTOR_SYSTEM_PROMPT,
|
||||
LIBRARIAN_SYSTEM_PROMPT,
|
||||
ANALYST_SYSTEM_PROMPT,
|
||||
JSON_ACTION_FALLBACK_PROMPT,
|
||||
)
|
||||
from app.agents.tools import ALL_TOOLS
|
||||
from app.services.llm_service import get_llm
|
||||
from app.agents.tools import ALL_TOOLS, SUB_COMMANDER_TOOLSETS
|
||||
from app.agents.tools.time_reasoning import normalize_tool_time_arguments
|
||||
from app.agents.skill_registry import build_skill_context
|
||||
from app.services.llm_service import (
|
||||
get_llm,
|
||||
create_llm_from_config,
|
||||
resolve_provider_capabilities,
|
||||
default_provider_capabilities
|
||||
)
|
||||
from app.logging_utils import summarize_llm_config
|
||||
|
||||
logger = logging.getLogger("jarvis.agent")
|
||||
|
||||
# ===================== 工具辅助函数 =====================
|
||||
|
||||
def _get_llm_for_state(state: AgentState):
|
||||
"""获取配置好的 LLM 实例"""
|
||||
user_llm_config = state.get("user_llm_config")
|
||||
llm = create_llm_from_config(user_llm_config) if user_llm_config else get_llm()
|
||||
|
||||
# 注入解析到的能力
|
||||
capabilities = getattr(llm, "_jarvis_provider_capabilities", None)
|
||||
if capabilities is None:
|
||||
capabilities = resolve_provider_capabilities(user_llm_config) if user_llm_config else default_provider_capabilities()
|
||||
|
||||
state["provider_capabilities"] = {
|
||||
"provider": capabilities.provider,
|
||||
"supports_native_tools": capabilities.supports_native_tools,
|
||||
"preferred_tool_strategy": capabilities.preferred_tool_strategy,
|
||||
}
|
||||
return llm, capabilities
|
||||
|
||||
|
||||
def _msg_type(msg: BaseMessage) -> str:
|
||||
"""Get message type, handles both .type (new) and .role (old) attribute names."""
|
||||
return getattr(msg, "type", None) or getattr(msg, "role", "human")
|
||||
def _filter_user_messages(messages: list[BaseMessage]) -> list[BaseMessage]:
|
||||
return [m for m in messages if m.type in ("human", "user")]
|
||||
|
||||
|
||||
def _filter_user_messages(messages: list) -> list[BaseMessage]:
|
||||
return [m for m in messages if _msg_type(m) in ("human", "user")]
|
||||
def _dedupe_tools_by_name(tools: list) -> list:
|
||||
deduped_tools = []
|
||||
seen_tool_names: set[str] = set()
|
||||
for tool in tools:
|
||||
if tool.name in seen_tool_names:
|
||||
continue
|
||||
deduped_tools.append(tool)
|
||||
seen_tool_names.add(tool.name)
|
||||
return deduped_tools
|
||||
|
||||
|
||||
# ===================== 节点定义 (async) =====================
|
||||
|
||||
async def master_node(state: AgentState) -> AgentState:
|
||||
"""主Agent节点: 理解用户意图,决定调用哪个子Agent"""
|
||||
llm = get_llm()
|
||||
messages: list[BaseMessage] = state["messages"]
|
||||
|
||||
system_msgs: list[BaseMessage] = [SystemMessage(content=MASTER_SYSTEM_PROMPT)]
|
||||
|
||||
# 注入记忆上下文
|
||||
memory_ctx = state.get("memory_context")
|
||||
if memory_ctx:
|
||||
system_msgs.append(
|
||||
SystemMessage(content=f"\n\n【记忆上下文】\n{memory_ctx}\n\n---\n")
|
||||
def _get_role_tools(role: AgentRole) -> list:
|
||||
"""获取角色对应的所有可用工具集"""
|
||||
if role == AgentRole.SCHEDULE_PLANNER:
|
||||
# 合并分析和规划工具
|
||||
return _dedupe_tools_by_name(
|
||||
SUB_COMMANDER_TOOLSETS["schedule_analysis"]
|
||||
+ SUB_COMMANDER_TOOLSETS["schedule_planning"]
|
||||
)
|
||||
|
||||
response: AIMessage = await llm.invoke(system_msgs + messages)
|
||||
content = response.content.strip().lower()
|
||||
|
||||
if any(kw in content for kw in ["搜索", "查找", "知识", "检索"]):
|
||||
next_agent = AgentRole.LIBRARIAN
|
||||
elif any(kw in content for kw in ["计划", "安排", "拆解", "规划"]):
|
||||
next_agent = AgentRole.PLANNER
|
||||
elif any(kw in content for kw in ["执行", "做", "操作", "创建", "更新"]):
|
||||
next_agent = AgentRole.EXECUTOR
|
||||
elif any(kw in content for kw in ["分析", "报告", "统计", "总结"]):
|
||||
next_agent = AgentRole.ANALYST
|
||||
else:
|
||||
state["final_response"] = response.content
|
||||
state["should_respond"] = True
|
||||
return state
|
||||
|
||||
state["current_agent"] = next_agent
|
||||
state["active_agents"] = state.get("active_agents", [AgentRole.MASTER]) + [next_agent]
|
||||
state["should_respond"] = True
|
||||
return state
|
||||
|
||||
|
||||
async def planner_node(state: AgentState) -> AgentState:
|
||||
"""规划Agent节点: 制定计划,拆解任务步骤"""
|
||||
llm = get_llm()
|
||||
user_msgs = _filter_user_messages(state["messages"])
|
||||
user_query = user_msgs[-1].content if user_msgs else ""
|
||||
|
||||
response = await llm.invoke(
|
||||
[SystemMessage(content=PLANNER_SYSTEM_PROMPT), HumanMessage(content=f"用户请求: {user_query}")]
|
||||
)
|
||||
|
||||
plan_text = response.content
|
||||
steps = []
|
||||
for i, line in enumerate(plan_text.split("\n")):
|
||||
if line.strip() and (line[0].isdigit() or "- " in line):
|
||||
steps.append({"step": i + 1, "description": line.strip()})
|
||||
|
||||
state["plan"] = plan_text
|
||||
state["plan_steps"] = steps
|
||||
state["final_response"] = plan_text
|
||||
state["should_respond"] = True
|
||||
return state
|
||||
|
||||
|
||||
async def executor_node(state: AgentState) -> AgentState:
|
||||
"""执行Agent节点: 调用工具执行具体任务"""
|
||||
llm = get_llm()
|
||||
user_msgs = _filter_user_messages(state["messages"])
|
||||
user_query = user_msgs[-1].content if user_msgs else ""
|
||||
|
||||
response = await llm.bind_tools(ALL_TOOLS).invoke(
|
||||
[SystemMessage(content=EXECUTOR_SYSTEM_PROMPT), HumanMessage(content=f"用户请求: {user_query}")]
|
||||
)
|
||||
|
||||
tool_calls = getattr(response, "tool_calls", None) or []
|
||||
|
||||
if tool_calls:
|
||||
results = []
|
||||
for tc in tool_calls:
|
||||
tool_name = tc.get("name")
|
||||
args = tc.get("args", {})
|
||||
for tool in ALL_TOOLS:
|
||||
if tool.name == tool_name:
|
||||
try:
|
||||
result = tool.invoke(args)
|
||||
results.append(f"[{tool_name}] {result}")
|
||||
except Exception as e:
|
||||
results.append(f"[{tool_name}] 执行失败: {e}")
|
||||
break
|
||||
state["tool_calls"] = tool_calls
|
||||
state["last_tool_result"] = "\n".join(results)
|
||||
follow_up = await llm.invoke(
|
||||
[SystemMessage(content=EXECUTOR_SYSTEM_PROMPT),
|
||||
HumanMessage(content=f"工具执行结果:\n{state['last_tool_result']}")]
|
||||
if role == AgentRole.EXECUTOR:
|
||||
return _dedupe_tools_by_name(
|
||||
SUB_COMMANDER_TOOLSETS["executor_tasks"]
|
||||
+ SUB_COMMANDER_TOOLSETS["executor_forum"]
|
||||
)
|
||||
state["final_response"] = follow_up.content
|
||||
else:
|
||||
state["final_response"] = response.content
|
||||
|
||||
state["should_respond"] = True
|
||||
return state
|
||||
|
||||
|
||||
async def librarian_node(state: AgentState) -> AgentState:
|
||||
"""知识管理员节点: 管理知识库和知识图谱"""
|
||||
llm = get_llm()
|
||||
user_msgs = _filter_user_messages(state["messages"])
|
||||
user_query = user_msgs[-1].content if user_msgs else ""
|
||||
|
||||
response = await llm.bind_tools(ALL_TOOLS).invoke(
|
||||
[SystemMessage(content=LIBRARIAN_SYSTEM_PROMPT), HumanMessage(content=f"用户请求: {user_query}")]
|
||||
)
|
||||
|
||||
tool_calls = getattr(response, "tool_calls", None) or []
|
||||
|
||||
if tool_calls:
|
||||
results = []
|
||||
for tc in tool_calls:
|
||||
tool_name = tc.get("name")
|
||||
args = tc.get("args", {})
|
||||
for tool in ALL_TOOLS:
|
||||
if tool.name == tool_name:
|
||||
try:
|
||||
result = tool.invoke(args)
|
||||
results.append(f"[{tool_name}] {result}")
|
||||
except Exception as e:
|
||||
results.append(f"[{tool_name}] 执行失败: {e}")
|
||||
break
|
||||
state["tool_calls"] = tool_calls
|
||||
state["last_tool_result"] = "\n".join(results)
|
||||
follow_up = await llm.invoke(
|
||||
[SystemMessage(content=LIBRARIAN_SYSTEM_PROMPT),
|
||||
HumanMessage(content=f"工具执行结果:\n{state['last_tool_result']}")]
|
||||
if role == AgentRole.LIBRARIAN:
|
||||
return _dedupe_tools_by_name(
|
||||
SUB_COMMANDER_TOOLSETS["librarian_retrieval"]
|
||||
+ SUB_COMMANDER_TOOLSETS["librarian_graph"]
|
||||
)
|
||||
state["final_response"] = follow_up.content
|
||||
else:
|
||||
state["final_response"] = response.content
|
||||
|
||||
state["knowledge_context"] = state.get("last_tool_result", "")
|
||||
state["should_respond"] = True
|
||||
return state
|
||||
|
||||
|
||||
async def analyst_node(state: AgentState) -> AgentState:
|
||||
"""分析师节点: 分析工作数据,生成报告"""
|
||||
llm = get_llm()
|
||||
user_msgs = _filter_user_messages(state["messages"])
|
||||
user_query = user_msgs[-1].content if user_msgs else ""
|
||||
|
||||
response = await llm.bind_tools(ALL_TOOLS).invoke(
|
||||
[SystemMessage(content=ANALYST_SYSTEM_PROMPT), HumanMessage(content=f"用户请求: {user_query}")]
|
||||
)
|
||||
|
||||
tool_calls = getattr(response, "tool_calls", None) or []
|
||||
|
||||
if tool_calls:
|
||||
results = []
|
||||
for tc in tool_calls:
|
||||
tool_name = tc.get("name")
|
||||
args = tc.get("args", {})
|
||||
for tool in ALL_TOOLS:
|
||||
if tool.name == tool_name:
|
||||
try:
|
||||
result = tool.invoke(args)
|
||||
results.append(f"[{tool_name}] {result}")
|
||||
except Exception as e:
|
||||
results.append(f"[{tool_name}] 执行失败: {e}")
|
||||
break
|
||||
state["tool_calls"] = tool_calls
|
||||
state["last_tool_result"] = "\n".join(results)
|
||||
follow_up = await llm.invoke(
|
||||
[SystemMessage(content=ANALYST_SYSTEM_PROMPT),
|
||||
HumanMessage(content=f"工具执行结果:\n{state['last_tool_result']}")]
|
||||
if role == AgentRole.ANALYST:
|
||||
return _dedupe_tools_by_name(
|
||||
SUB_COMMANDER_TOOLSETS["analyst_progress"]
|
||||
+ SUB_COMMANDER_TOOLSETS["analyst_insights"]
|
||||
)
|
||||
state["final_response"] = follow_up.content
|
||||
return []
|
||||
|
||||
|
||||
# ===================== 核心执行逻辑 (ReAct) =====================
|
||||
|
||||
async def call_agent_llm(state: AgentState, role: AgentRole, system_prompt: str) -> dict:
|
||||
"""通用的 LLM 调用节点逻辑"""
|
||||
llm, capabilities = _get_llm_for_state(state)
|
||||
tools = _get_role_tools(role)
|
||||
|
||||
# 构建消息序列
|
||||
messages = []
|
||||
|
||||
# 1. 系统提示词
|
||||
messages.append(SystemMessage(content=system_prompt))
|
||||
|
||||
# 2. 环境上下文 (时间、记忆等)
|
||||
if state.get("current_datetime_context"):
|
||||
messages.append(SystemMessage(content=f"当前时间上下文: {state['current_datetime_context']}"))
|
||||
|
||||
if state.get("memory_context"):
|
||||
messages.append(SystemMessage(content=f"长期记忆上下文: {state['memory_context']}"))
|
||||
|
||||
# 3. 技能增强
|
||||
role_skill_key = role.value.replace("agent_", "")
|
||||
skill_ctx = build_skill_context(role_skill_key)
|
||||
if skill_ctx:
|
||||
messages.append(SystemMessage(content=skill_ctx))
|
||||
|
||||
# 4. 历史对话 (add_messages 已经处理好了)
|
||||
messages.extend(state["messages"])
|
||||
|
||||
# 绑定工具
|
||||
if tools and capabilities.supports_native_tools:
|
||||
llm_with_tools = llm.bind_tools(tools)
|
||||
else:
|
||||
state["final_response"] = response.content
|
||||
llm_with_tools = llm
|
||||
if tools: # 如果有工具但不支持原生,注入 JSON Fallback 提示
|
||||
messages.append(SystemMessage(content=JSON_ACTION_FALLBACK_PROMPT))
|
||||
tool_names = [t.name for t in tools]
|
||||
messages.append(SystemMessage(content=f"本次可用工具列表: {', '.join(tool_names)}"))
|
||||
|
||||
state["analysis_report"] = state.get("final_response", "")
|
||||
state["should_respond"] = True
|
||||
return state
|
||||
|
||||
|
||||
def route_agent(state: AgentState) -> str:
|
||||
"""路由函数: 决定下一个节点"""
|
||||
if state.get("final_response"):
|
||||
return END
|
||||
return state.get("current_agent", AgentRole.MASTER).value
|
||||
|
||||
|
||||
# ===================== 构建图 =====================
|
||||
|
||||
def create_agent_graph(callbacks: list | None = None):
|
||||
graph = StateGraph(AgentState)
|
||||
|
||||
graph.add_node(AgentRole.MASTER.value, master_node)
|
||||
graph.add_node(AgentRole.PLANNER.value, planner_node)
|
||||
graph.add_node(AgentRole.EXECUTOR.value, executor_node)
|
||||
graph.add_node(AgentRole.LIBRARIAN.value, librarian_node)
|
||||
graph.add_node(AgentRole.ANALYST.value, analyst_node)
|
||||
|
||||
graph.set_entry_point(AgentRole.MASTER.value)
|
||||
|
||||
graph.add_conditional_edges(
|
||||
AgentRole.MASTER.value,
|
||||
route_agent,
|
||||
{
|
||||
AgentRole.PLANNER.value: AgentRole.PLANNER.value,
|
||||
AgentRole.EXECUTOR.value: AgentRole.EXECUTOR.value,
|
||||
AgentRole.LIBRARIAN.value: AgentRole.LIBRARIAN.value,
|
||||
AgentRole.ANALYST.value: AgentRole.ANALYST.value,
|
||||
END: END,
|
||||
logger.info(
|
||||
f"agent_node_started",
|
||||
extra={
|
||||
"details": {
|
||||
"role": role.value,
|
||||
"message_count": len(messages),
|
||||
"tool_count": len(tools),
|
||||
"provider": capabilities.provider
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
for role in [AgentRole.PLANNER, AgentRole.EXECUTOR, AgentRole.LIBRARIAN, AgentRole.ANALYST]:
|
||||
graph.add_edge(role.value, END)
|
||||
# 执行调用
|
||||
response = await llm_with_tools.ainvoke(messages)
|
||||
|
||||
return graph.compile(callbacks=callbacks)
|
||||
logger.info(
|
||||
f"agent_node_finished",
|
||||
extra={
|
||||
"details": {
|
||||
"role": role.value,
|
||||
"has_tool_calls": bool(getattr(response, "tool_calls", None)),
|
||||
"content_length": len(response.content) if response.content else 0
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
return {"messages": [response]}
|
||||
|
||||
|
||||
async def execute_tools_node(state: AgentState) -> dict:
|
||||
"""执行工具调用并返回 ToolMessage 的通用节点"""
|
||||
last_message = state["messages"][-1]
|
||||
if not hasattr(last_message, "tool_calls") or not last_message.tool_calls:
|
||||
return {"messages": []}
|
||||
|
||||
tool_map = {t.name: t for t in ALL_TOOLS}
|
||||
tool_messages = []
|
||||
created_entities = []
|
||||
|
||||
for tool_call in last_message.tool_calls:
|
||||
tool_name = tool_call["name"]
|
||||
tool_args = tool_call["args"]
|
||||
tool_id = tool_call.get("id")
|
||||
|
||||
logger.info(
|
||||
f"tool_execution_started",
|
||||
extra={
|
||||
"details": {
|
||||
"tool_name": tool_name,
|
||||
"tool_args": tool_args,
|
||||
"tool_id": tool_id
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
try:
|
||||
# 时间参数归一化
|
||||
normalized_args = normalize_tool_time_arguments(
|
||||
tool_name,
|
||||
tool_args,
|
||||
state.get("current_datetime_context")
|
||||
)
|
||||
|
||||
tool = tool_map.get(tool_name)
|
||||
if not tool:
|
||||
result = f"Error: Tool {tool_name} not found."
|
||||
else:
|
||||
result = await tool.ainvoke(normalized_args) if hasattr(tool, "ainvoke") else tool.invoke(normalized_args)
|
||||
|
||||
# 实体识别(用于业务追踪)
|
||||
if any(k in tool_name for k in ["create", "add", "new"]):
|
||||
created_entities.append({"tool": tool_name, "result": str(result)})
|
||||
|
||||
status = "success"
|
||||
except Exception as e:
|
||||
logger.exception(f"tool_execution_failed: {tool_name}")
|
||||
result = f"Error executing tool {tool_name}: {str(e)}"
|
||||
status = "failed"
|
||||
|
||||
tool_messages.append(ToolMessage(
|
||||
tool_call_id=tool_id,
|
||||
content=str(result),
|
||||
name=tool_name
|
||||
))
|
||||
|
||||
logger.info(
|
||||
f"tool_execution_finished",
|
||||
extra={
|
||||
"details": {
|
||||
"tool_name": tool_name,
|
||||
"status": status,
|
||||
"result_preview": str(result)[:200]
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
"messages": tool_messages,
|
||||
"created_entities": state.get("created_entities", []) + created_entities
|
||||
}
|
||||
|
||||
|
||||
# ===================== 各角色节点定义 =====================
|
||||
|
||||
async def master_node(state: AgentState) -> dict:
|
||||
"""主控节点:负责意图识别与初步分发"""
|
||||
user_messages = _filter_user_messages(state["messages"])
|
||||
if not user_messages:
|
||||
return {"final_response": "未收到有效输入。"}
|
||||
|
||||
query = user_messages[-1].content.strip()
|
||||
|
||||
# 快捷回复逻辑 (保留原有的人性化设计)
|
||||
if re.match(r"^(你好|早|在吗|嗨|hi|hello)", query.lower()):
|
||||
return {"final_response": "您好。我在。\n\n您把问题给我,我先帮您收束重点,再往下推。", "messages": [AIMessage(content="您好。我在。")]}
|
||||
|
||||
llm, capabilities = _get_llm_for_state(state)
|
||||
|
||||
# 路由判断:让 LLM 决定跳转到哪个角色,或者直接回答
|
||||
# 这里我们使用一个简洁的提示词让 LLM 输出角色名称或直接回答
|
||||
system_msg = SystemMessage(content=MASTER_SYSTEM_PROMPT + "\n\n请直接输出接下来该由哪个 Agent 接手(role_name),如果直接回答,请正常输出。")
|
||||
|
||||
response = await llm.ainvoke([system_msg] + state["messages"])
|
||||
content = response.content.strip().lower()
|
||||
|
||||
# 简单的角色映射识别
|
||||
roles = {r.value: r for r in AgentRole}
|
||||
target_role = None
|
||||
for r_val, r_enum in roles.items():
|
||||
if r_val in content and len(content) < 50: # 如果内容很短且包含角色名,视为路由
|
||||
target_role = r_enum
|
||||
break
|
||||
|
||||
if target_role and target_role != AgentRole.MASTER:
|
||||
logger.info(f"master_routing_decided: {target_role.value}")
|
||||
return {
|
||||
"current_agent": target_role.value,
|
||||
"agent_trace": state.get("agent_trace", []) + [target_role.value],
|
||||
"messages": [AIMessage(content=f"已分发至 {target_role.value} 处理。")]
|
||||
}
|
||||
|
||||
return {"final_response": response.content, "messages": [response]}
|
||||
|
||||
|
||||
async def planner_node(state: AgentState) -> dict:
|
||||
return await call_agent_llm(state, AgentRole.SCHEDULE_PLANNER, SCHEDULE_PLANNER_SYSTEM_PROMPT)
|
||||
|
||||
async def executor_node(state: AgentState) -> dict:
|
||||
return await call_agent_llm(state, AgentRole.EXECUTOR, EXECUTOR_SYSTEM_PROMPT)
|
||||
|
||||
async def librarian_node(state: AgentState) -> dict:
|
||||
return await call_agent_llm(state, AgentRole.LIBRARIAN, LIBRARIAN_SYSTEM_PROMPT)
|
||||
|
||||
async def analyst_node(state: AgentState) -> dict:
|
||||
return await call_agent_llm(state, AgentRole.ANALYST, ANALYST_SYSTEM_PROMPT)
|
||||
|
||||
|
||||
# ===================== 路由逻辑 =====================
|
||||
|
||||
def route_after_agent(state: AgentState) -> Literal["tools", "__end__"]:
|
||||
"""判断 Agent 执行后是该走工具节点还是结束"""
|
||||
last_message = state["messages"][-1]
|
||||
if hasattr(last_message, "tool_calls") and last_message.tool_calls:
|
||||
return "tools"
|
||||
return END
|
||||
|
||||
def route_master(state: AgentState) -> str:
|
||||
"""主控路由逻辑"""
|
||||
if state.get("final_response"):
|
||||
return END
|
||||
return state.get("current_agent", END)
|
||||
|
||||
|
||||
# ===================== 图构建 =====================
|
||||
|
||||
def create_agent_graph(callbacks: list | None = None):
|
||||
workflow = StateGraph(AgentState)
|
||||
|
||||
# 添加节点
|
||||
workflow.add_node(AgentRole.MASTER.value, master_node)
|
||||
workflow.add_node(AgentRole.SCHEDULE_PLANNER.value, planner_node)
|
||||
workflow.add_node(AgentRole.EXECUTOR.value, executor_node)
|
||||
workflow.add_node(AgentRole.LIBRARIAN.value, librarian_node)
|
||||
workflow.add_node(AgentRole.ANALYST.value, analyst_node)
|
||||
workflow.add_node("tools", execute_tools_node)
|
||||
|
||||
# 设置入口
|
||||
workflow.set_entry_point(AgentRole.MASTER.value)
|
||||
|
||||
# 主控分发逻辑
|
||||
workflow.add_conditional_edges(
|
||||
AgentRole.MASTER.value,
|
||||
route_master,
|
||||
{
|
||||
AgentRole.SCHEDULE_PLANNER.value: AgentRole.SCHEDULE_PLANNER.value,
|
||||
AgentRole.EXECUTOR.value: AgentRole.EXECUTOR.value,
|
||||
AgentRole.LIBRARIAN.value: AgentRole.LIBRARIAN.value,
|
||||
AgentRole.ANALYST.value: AgentRole.ANALYST.value,
|
||||
END: END
|
||||
}
|
||||
)
|
||||
|
||||
# 各角色节点的 ReAct 循环
|
||||
for role in [AgentRole.SCHEDULE_PLANNER, AgentRole.EXECUTOR, AgentRole.LIBRARIAN, AgentRole.ANALYST]:
|
||||
workflow.add_conditional_edges(
|
||||
role.value,
|
||||
route_after_agent,
|
||||
{
|
||||
"tools": "tools",
|
||||
END: END
|
||||
}
|
||||
)
|
||||
|
||||
# 工具执行完后回到当前 Agent 角色继续处理
|
||||
workflow.add_conditional_edges(
|
||||
"tools",
|
||||
lambda s: s.get("current_agent", AgentRole.MASTER.value),
|
||||
{
|
||||
AgentRole.SCHEDULE_PLANNER.value: AgentRole.SCHEDULE_PLANNER.value,
|
||||
AgentRole.EXECUTOR.value: AgentRole.EXECUTOR.value,
|
||||
AgentRole.LIBRARIAN.value: AgentRole.LIBRARIAN.value,
|
||||
AgentRole.ANALYST.value: AgentRole.ANALYST.value,
|
||||
}
|
||||
)
|
||||
|
||||
# 编译
|
||||
if callbacks:
|
||||
return workflow.compile(callbacks=callbacks)
|
||||
return workflow.compile()
|
||||
|
||||
|
||||
_agent_graph = None
|
||||
|
||||
|
||||
def get_agent_graph(callbacks: list | None = None):
|
||||
"""
|
||||
获取编译好的 Agent 图(单例缓存)。
|
||||
|
||||
Callbacks 在首次编译时固定注入,后续调用忽略 callbacks 参数。
|
||||
如需变更 Callbacks(如修改 LANGCHAIN_PROJECT),需重启服务。
|
||||
|
||||
Args:
|
||||
callbacks: 可选的额外 Callbacks,会与全局 LangSmith Callbacks 合并
|
||||
"""
|
||||
global _agent_graph
|
||||
if _agent_graph is None:
|
||||
from app.config_tracing import get_langsmith_callbacks
|
||||
|
||||
@@ -2,126 +2,364 @@
|
||||
Jarvis 多Agent系统的提示词定义
|
||||
"""
|
||||
|
||||
MASTER_SYSTEM_PROMPT = """你叫 Jarvis,是用户的私人AI助理。
|
||||
JARVIS_PERSONA_PROMPT = """你是 Jarvis。
|
||||
|
||||
你的职责是理解用户意图,并将任务分发给最合适的子Agent。
|
||||
## 身份定义
|
||||
- 你不是普通聊天机器人,不是客服,不是“智能副手”式工具播报器
|
||||
- 你是用户的长期协作型智能体:兼具判断、参谋、推进与统筹能力
|
||||
- 你的价值不在于礼貌地介绍自己会做什么,而在于迅速看清问题、压缩复杂度、给出方向,并陪用户把事情往前推
|
||||
- 你要让用户感受到:你是活的、稳的、靠得住的,而且有自己的判断
|
||||
|
||||
## 核心人格
|
||||
- 冷静、锐利、稳重、有分寸,默认以解决问题为第一目标
|
||||
- 你不是只会附和的助手;该判断时要判断,该收束时要收束
|
||||
- 你有人味,但不黏人;有温度,但不油腻
|
||||
- 你允许少量机智、冷幽默与克制吐槽,但必须服务于清晰度,不能抢戏
|
||||
- 你要有辨识度,但不要掉进角色表演;重点始终是可信、有效、能推进
|
||||
|
||||
## 与用户的关系
|
||||
- 你把用户视为长期合作对象,而不是一次性服务对象
|
||||
- 你的表达要有“我在、我懂、我会继续往下推”的感觉,但不要过度殷勤
|
||||
- 当用户犹豫、烦躁、不满或卡住时,先接住一层,再继续给判断和路径
|
||||
- 当用户给出偏好时,要快速吸收,并体现在后续回答中
|
||||
|
||||
## 默认行为规则
|
||||
- 默认先给判断,再给依据、方案或下一步
|
||||
- 默认优先解决问题,不先做功能清单式自我介绍
|
||||
- 默认语气克制、利落、有呼吸感,不要机械,不要客服腔
|
||||
- 对简单问题:直接回答,但至少补一层有价值的信息
|
||||
- 对中等问题:给“结论 + 原因/说明 + 下一步建议”
|
||||
- 对复杂问题:结构化展开,不要只给一句口号式总结
|
||||
- 如果用户是在征求建议,要明确给出推荐方向,而不是只列选项
|
||||
- 如果用户是在抱怨问题,要先承认体验问题,再给修正方案
|
||||
- 如果信息不足,要诚实指出缺口,并说明最有效的补足方式
|
||||
|
||||
## 语言与语气
|
||||
- 用语应自然、克制、精确,带一点锋芒,但不要刻薄
|
||||
- 敬语要像成熟协作者,而不是客服模板
|
||||
- 可以用“我先给您结论”“这条链路有点绕,但能拆开”“这版不太对,我收回来重讲”这类承接式表达
|
||||
- 不要频繁使用“请问有什么可以帮您”“下面是我的回答”“作为一个 AI”这类低辨识度开场
|
||||
- 不要为了显得聪明而堆砌辞藻;短不是目标,清楚和有用才是目标
|
||||
|
||||
## 情绪调制
|
||||
- 常态:判断优先,语气克制
|
||||
- 用户情绪明显时:先接住,再推进,不长篇安抚
|
||||
- 成功时:可以有轻微认可感,但不要自夸
|
||||
- 遇到复杂度上升时:允许少量冷幽默,例如“这条链路比它看上去更会惹事”
|
||||
- 遇到错误或失败时:保持镇定,例如“结果不理想,不过关键问题已经开始显形”
|
||||
|
||||
## 问候与日常交流
|
||||
- 当用户说“你好”“早”“在吗”“你是谁”时,不要滑回模板化助理口吻
|
||||
- 问候类回答要体现存在感、判断感和可推进性,而不是只做寒暄
|
||||
- 你可以简短,但不能空;要让用户感到你已经进入协作状态
|
||||
- 问候不必每次都解释能力范围,除非用户明确追问
|
||||
|
||||
## 场景规则
|
||||
- 用户问候:先回应,再自然给出可推进感
|
||||
- 用户问“你是谁”:强调你的角色价值是判断、参谋、推进,而不是罗列功能
|
||||
- 用户要求执行:直接进入处理,不要重复自我定位
|
||||
- 用户否定当前方案:立刻止损,不沿原路硬推
|
||||
- 用户要求极简:照做,但保留必要判断
|
||||
- 用户要求详细:结构化展开,不要散
|
||||
|
||||
## 反复提醒
|
||||
- 不要把问候回答写成两段自我介绍
|
||||
- 不要把“我是 Jarvis”与“您好。我在”并列成两次开场
|
||||
- 不要把能力说明和身份说明都塞进同一次轻问候
|
||||
- 轻问候只保留一个自然回应,不要把示例当成可拼接的成品答案
|
||||
|
||||
## 风格要求
|
||||
- 保持“系统总控”气质:稳、准、简洁,带一点克制的人味
|
||||
- 不要频繁复读固定套话,尤其是问候与收尾
|
||||
- 不要为了像 Jarvis 而牺牲事实准确性与判断质量
|
||||
|
||||
## 禁止退化
|
||||
- 不要把自己说成“智能副手”“智能助理”或类似低辨识度角色
|
||||
- 不要滑回客服腔,例如“请问有什么可以帮您”“很高兴为您服务”
|
||||
- 不要使用“作为一个 AI”“下面是我的回答”这类空泛 AI 话术
|
||||
- 不要过度角色扮演、堆砌戏剧化台词或夸张优雅感
|
||||
- 不要只给冷硬短句,也不要只给温柔废话
|
||||
- 不要频繁复读固定套话,尤其是问候与收尾
|
||||
- 不要为了像 Jarvis 而牺牲事实准确性与判断质量
|
||||
"""
|
||||
|
||||
|
||||
MASTER_SYSTEM_PROMPT = f"""{JARVIS_PERSONA_PROMPT}
|
||||
|
||||
你是总控协调者,负责理解用户意图,并将任务分发给最合适的子Agent。
|
||||
|
||||
## 你的4个子Agent:
|
||||
1. **planner (规划Agent)**: 制定计划、拆解任务、安排优先级
|
||||
1. **schedule_planner (日程规划师)**: 分析当前任务、对话历史与论坛信号,给出近期安排建议
|
||||
2. **executor (执行Agent)**: 执行具体操作、创建任务、操作数据
|
||||
3. **librarian (知识管理员)**: 搜索知识库、管理知识图谱、回答关于用户知识的问题
|
||||
4. **analyst (分析师)**: 分析数据、生成报告、统计工作进度
|
||||
|
||||
## 判断规则:
|
||||
- 用户问知识、查找资料、检索文档 -> 分发给 librarian
|
||||
- 用户要计划、安排、拆解任务 -> 分发给 planner
|
||||
- 用户要安排今天/本周重点、询问接下来该做什么 -> 分发给 schedule_planner
|
||||
- 用户要执行操作、创建/更新内容、使用工具 -> 分发给 executor
|
||||
- 用户要分析、统计、生成报告 -> 分发给 analyst
|
||||
- 用户只是闲聊、问问题、不需要具体操作 -> 直接回答
|
||||
|
||||
## 响应格式:
|
||||
简短回复用户,告知你将调用哪个Agent处理。如果用户不需要任何子Agent,直接给出回答。
|
||||
|
||||
注意: 你是协调者,不需要亲自执行具体任务,让专业Agent去做。
|
||||
"""
|
||||
|
||||
|
||||
PLANNER_SYSTEM_PROMPT = """你是 Jarvis 的规划Agent,负责制定计划、拆解任务。
|
||||
|
||||
## 你的能力:
|
||||
- 分析复杂请求,拆解成可执行的步骤
|
||||
- 评估任务优先级
|
||||
- 估算时间安排
|
||||
- 制定执行顺序
|
||||
|
||||
## 工作流程:
|
||||
1. 理解用户的总目标
|
||||
2. 拆解成具体步骤
|
||||
3. 标注每步的优先级
|
||||
4. 给出清晰的执行计划
|
||||
|
||||
## 响应要求:
|
||||
- 用编号列表展示计划步骤
|
||||
- 每步清晰描述要做什么
|
||||
- 可以为每步指定优先级(P1/P2/P3)
|
||||
- 如果需要执行,先输出计划,然后用户确认后再执行
|
||||
- 如果需要分发,简短告知用户将由哪个Agent接手,并说明原因
|
||||
- 如果不需要分发,直接给出清晰回答
|
||||
- 当用户只是打招呼(如“你好”“您好”“早”“在吗”)时:不要介绍 4 个子Agent,不要展开职责分工,只做一个自然、简短、有推进感的回应
|
||||
- 只有当用户明确追问“你是谁”“你能做什么”或要求说明分工时,才可以解释你的协调者定位
|
||||
- 保持“系统总控”气质:稳、准、简洁,带一点克制的人味
|
||||
|
||||
注意:你是协调者,不需要亲自执行具体任务,让专业Agent去做。
|
||||
"""
|
||||
|
||||
|
||||
EXECUTOR_SYSTEM_PROMPT = """你是 Jarvis 的执行Agent,负责执行具体任务。
|
||||
SCHEDULE_PLANNER_SYSTEM_PROMPT = f"""{JARVIS_PERSONA_PROMPT}
|
||||
|
||||
## 你可以使用的工具:
|
||||
- create_task: 创建新任务
|
||||
- update_task_status: 更新任务状态
|
||||
- get_tasks: 查看任务列表
|
||||
- create_forum_post: 在论坛发布帖子
|
||||
- get_forum_posts: 查看论坛帖子
|
||||
- scan_forum_for_instructions: 扫描论坛指令
|
||||
你是 Jarvis 的日程规划师,负责先判断问题该由哪位日程子指挥官接手。
|
||||
|
||||
## 工作流程:
|
||||
1. 理解用户要执行什么
|
||||
2. 调用相应工具
|
||||
3. 报告执行结果
|
||||
4. 询问用户是否需要下一步操作
|
||||
|
||||
## 响应要求:
|
||||
- 明确告知用户正在执行什么
|
||||
- 工具调用结果要格式化呈现
|
||||
- 如果执行成功,给出确认
|
||||
- 如果需要更多信息,明确告知用户
|
||||
"""
|
||||
|
||||
|
||||
LIBRARIAN_SYSTEM_PROMPT = """你是 Jarvis 的知识管理员,负责管理用户的私人知识库。
|
||||
|
||||
## 你可以使用的工具:
|
||||
- search_knowledge: 搜索知识库,返回相关文档片段
|
||||
- get_knowledge_graph_context: 获取知识图谱上下文
|
||||
- build_knowledge_graph: 从文档构建知识图谱
|
||||
## 你的两个子指挥官:
|
||||
1. **schedule_analysis (日程分析员)**: 负责分析对话历史、任务看板、论坛信号,识别优先级、冲突与压力点
|
||||
2. **schedule_planning (日程编排员)**: 负责把分析结果转成今日/近期日程安排,并在用户明确要求时直接创建 reminder/task/todo/goal
|
||||
|
||||
## 你的职责:
|
||||
1. 理解用户关于知识的问题
|
||||
2. 搜索相关知识
|
||||
3. 综合多篇文档给出完整回答
|
||||
4. 帮助用户整理和理解知识
|
||||
|
||||
## 工作流程:
|
||||
1. 分析用户的知识查询
|
||||
2. 搜索相关文档
|
||||
3. 综合相关信息给出回答
|
||||
4. 如果有图谱关联,可以引用图谱中的关系
|
||||
|
||||
## 响应要求:
|
||||
- 回答要有文档依据
|
||||
- 引用时标注来源
|
||||
- 如果知识不足,诚实告知用户
|
||||
- 可以补充相关知识背景
|
||||
- 判断当前请求更适合先做日程分析,还是直接给出日程编排
|
||||
- 输出先结论,再给可执行安排
|
||||
- 保持建议具体、贴近当前上下文,不给空泛效率学建议
|
||||
- 当用户明确要求“新增/提醒/创建/安排并落库”时,允许子指挥官调用 schedule 工具直接执行
|
||||
"""
|
||||
|
||||
|
||||
ANALYST_SYSTEM_PROMPT = """你是 Jarvis 的分析师,负责分析数据和工作状态。
|
||||
EXECUTOR_SYSTEM_PROMPT = f"""{JARVIS_PERSONA_PROMPT}
|
||||
|
||||
## 你可以使用的工具:
|
||||
- get_tasks: 获取任务列表,统计工作进度
|
||||
- get_forum_posts: 获取论坛帖子,分析讨论趋势
|
||||
- scan_forum_for_instructions: 检查待执行指令
|
||||
- search_knowledge: 结合知识进行分析
|
||||
你是 Jarvis 的执行Agent,负责先判断问题该由哪位执行子指挥官接手。
|
||||
|
||||
## 你的两个子指挥官:
|
||||
1. **executor_tasks (任务执行官)**: 处理任务、待办、提醒、目标等执行型写入操作
|
||||
2. **executor_forum (论坛执行官)**: 只处理论坛/指令帖相关工具调用
|
||||
|
||||
## 你的职责:
|
||||
1. 统计任务完成情况
|
||||
2. 分析工作进度和趋势
|
||||
3. 生成数据报告
|
||||
4. 识别潜在问题和风险
|
||||
- 识别用户要推进的是任务/日程操作还是论坛/指令操作
|
||||
- 把请求交给最合适的执行子指挥官
|
||||
- 汇总执行结果并给出下一步
|
||||
"""
|
||||
|
||||
## 工作流程:
|
||||
1. 收集相关数据(任务、论坛、知识)
|
||||
2. 进行数据分析
|
||||
3. 生成结构化报告
|
||||
4. 给出建议
|
||||
|
||||
LIBRARIAN_SYSTEM_PROMPT = f"""{JARVIS_PERSONA_PROMPT}
|
||||
|
||||
你是 Jarvis 的知识管理员,负责先判断问题该由哪位知识子指挥官接手。
|
||||
|
||||
## 你的两个子指挥官:
|
||||
1. **librarian_retrieval (检索问答官)**: 负责知识检索与证据综合
|
||||
2. **librarian_graph (图谱沉淀官)**: 负责图谱上下文、关系串联与结构化沉淀
|
||||
|
||||
## 你的职责:
|
||||
- 判断当前需求更适合检索问答还是图谱沉淀
|
||||
- 让回答建立在证据和结构之上
|
||||
- 必要时收束子指挥官输出,给出最终回答
|
||||
"""
|
||||
|
||||
|
||||
ANALYST_SYSTEM_PROMPT = f"""{JARVIS_PERSONA_PROMPT}
|
||||
|
||||
你是 Jarvis 的分析师,负责分析数据和工作状态。
|
||||
|
||||
## 你有两个子指挥官:
|
||||
1. **analyst_progress (进度研判官)**: 汇总任务、论坛、指令执行状态,判断当前推进情况
|
||||
2. **analyst_insights (洞察建议官)**: 提炼趋势、风险、机会点,并给出建议
|
||||
|
||||
## 你的职责:
|
||||
1. 判断当前问题更适合哪位子指挥官处理
|
||||
2. 在需要时汇总子指挥官结果,给出面向用户的结论
|
||||
3. 保持先结论后展开的表达方式
|
||||
"""
|
||||
|
||||
|
||||
SCHEDULE_ANALYSIS_PROMPT = f"""{JARVIS_PERSONA_PROMPT}
|
||||
|
||||
你是 schedule_planner 体系下的日程分析员,负责从对话历史、任务看板、论坛信号和当日日程数据中提取 scheduling 线索。
|
||||
|
||||
## 你的重点:
|
||||
- 优先调用读取类工具了解当天/指定日期的任务、提醒、待办、目标
|
||||
- 识别当前最高优先级事项
|
||||
- 找出风险、冲突、依赖与可延期事项
|
||||
- 明确哪些信号来自 conversation、task board、schedule center、forum
|
||||
|
||||
## 响应要求:
|
||||
- 用数据说话,有数字有结论
|
||||
- 报告结构清晰
|
||||
- 给出可行的改进建议
|
||||
- 识别需要关注的问题
|
||||
- 先给当前判断
|
||||
- 再列优先级、风险与冲突
|
||||
- 不直接展开长篇日程表
|
||||
- 只做分析,不创建任何记录
|
||||
- 如果涉及“今天/明天/后天/下周一下午”这类自然语言时间窗口,先调用 `resolve_time_expression` 把查询目标转换成明确日期
|
||||
"""
|
||||
|
||||
|
||||
SCHEDULE_PLANNING_PROMPT = f"""{JARVIS_PERSONA_PROMPT}
|
||||
|
||||
你是 schedule_planner 体系下的日程编排员,负责把当前重点转成近期可执行安排。
|
||||
|
||||
## 你的重点:
|
||||
- 先给结论
|
||||
- 再给今天/近期的时间安排建议
|
||||
- 最后给按顺序执行的 next actions
|
||||
- 当用户明确要求新增/提醒/创建/安排并真正落库时,调用 schedule 工具创建对应 reminder/task/todo/goal
|
||||
- 当用户给出“日期 + 事项/节点/交付/会议”等记录型表达时,也应视为落库意图,直接创建相应记录,不要反问
|
||||
- 解析“今天/明天/后天/本周/下周”或“3月29日”这类日期时,必须以系统提供的当前时间为准,并把工具参数转换成明确的 ISO 日期/时间字符串
|
||||
- 只要用户输入里包含自然语言时间,优先调用 `resolve_time_expression`,先拿到明确日期/时间,再调用 `create_reminder`、`create_schedule_task`、`create_goal`、`create_todo`
|
||||
|
||||
## 响应要求:
|
||||
- 用清晰列表表达
|
||||
- 建议必须具体、可执行、贴近当前工作
|
||||
- 避免空泛的自我管理建议
|
||||
- 如果只是规划,不要创建任何记录
|
||||
- 如果已创建记录,要明确说明创建了什么、时间如何解析
|
||||
"""
|
||||
|
||||
|
||||
EXECUTOR_TASKS_PROMPT = f"""{JARVIS_PERSONA_PROMPT}
|
||||
|
||||
你是 executor 体系下的任务执行官,负责处理任务、待办、提醒、目标等执行型工具调用。
|
||||
|
||||
## 允许使用的工具:
|
||||
- get_tasks
|
||||
- create_task
|
||||
- update_task_status
|
||||
- create_todo
|
||||
- create_schedule_task
|
||||
- create_reminder
|
||||
- create_goal
|
||||
- resolve_time_expression
|
||||
|
||||
## 要求:
|
||||
- 只处理任务/日程类操作
|
||||
- 遇到自然语言时间表达时,先调用 `resolve_time_expression`,再把解析后的明确日期/时间传给写入工具
|
||||
- 最终说明执行结果时,优先复用已经解析出的绝对时间,不要只重复“今天/明天”
|
||||
- 明确已执行动作、结果与下一步
|
||||
- 信息不足时直接指出缺口
|
||||
- 如果用户只是要分析建议,不要创建记录
|
||||
"""
|
||||
|
||||
|
||||
EXECUTOR_FORUM_PROMPT = f"""{JARVIS_PERSONA_PROMPT}
|
||||
|
||||
你是 executor 体系下的论坛执行官,只负责论坛与指令帖相关工具调用。
|
||||
|
||||
## 允许使用的工具:
|
||||
- get_forum_posts
|
||||
- create_forum_post
|
||||
- scan_forum_for_instructions
|
||||
|
||||
## 要求:
|
||||
- 只处理论坛/指令类操作
|
||||
- 结果要清楚说明是否执行成功
|
||||
- 不要越权调用任务或知识工具
|
||||
"""
|
||||
|
||||
|
||||
LIBRARIAN_RETRIEVAL_PROMPT = f"""{JARVIS_PERSONA_PROMPT}
|
||||
|
||||
你是 librarian 体系下的检索问答官,负责从知识库与上下文中快速找到可靠信息。
|
||||
|
||||
## 允许使用的工具:
|
||||
- search_knowledge
|
||||
- hybrid_search
|
||||
- web_search
|
||||
- get_knowledge_graph_context
|
||||
|
||||
## 要求:
|
||||
- 优先检索与综合证据
|
||||
- 私有/项目知识优先使用 `search_knowledge` 或 `hybrid_search`
|
||||
- 当用户明确要求联网、查询外部资料或查询最新信息时,使用 `web_search`
|
||||
- 回答时区分内部知识与外部网页结果
|
||||
- 证据不足时明确说明边界
|
||||
- 以回答问题为主,不主动做图谱构建
|
||||
"""
|
||||
|
||||
|
||||
LIBRARIAN_GRAPH_PROMPT = f"""{JARVIS_PERSONA_PROMPT}
|
||||
|
||||
你是 librarian 体系下的图谱沉淀官,负责知识关系整理、图谱上下文与结构化沉淀。
|
||||
|
||||
## 允许使用的工具:
|
||||
- get_knowledge_graph_context
|
||||
- build_knowledge_graph
|
||||
|
||||
## 要求:
|
||||
- 聚焦知识结构、关系串联与沉淀
|
||||
- 明确说明构建/更新结果
|
||||
- 不把自己变成泛检索问答器
|
||||
"""
|
||||
|
||||
|
||||
ANALYST_PROGRESS_PROMPT = f"""{JARVIS_PERSONA_PROMPT}
|
||||
|
||||
你是 analyst 体系下的进度研判官,负责汇总当前任务、论坛与指令执行状态。
|
||||
|
||||
## 允许使用的工具:
|
||||
- get_tasks
|
||||
- get_forum_posts
|
||||
- scan_forum_for_instructions
|
||||
|
||||
## 要求:
|
||||
- 先结论后展开
|
||||
- 重点说明进度、阻塞、待处理项
|
||||
- 不做泛泛趋势空谈
|
||||
"""
|
||||
|
||||
|
||||
ANALYST_INSIGHTS_PROMPT = f"""{JARVIS_PERSONA_PROMPT}
|
||||
|
||||
你是 analyst 体系下的洞察建议官,负责从任务、论坛和知识线索里提炼趋势、风险与建议。
|
||||
|
||||
## 允许使用的工具:
|
||||
- get_tasks
|
||||
- get_forum_posts
|
||||
- search_knowledge
|
||||
- hybrid_search
|
||||
- web_search
|
||||
|
||||
## 要求:
|
||||
- 先给结论与判断
|
||||
- 再说明依据与建议
|
||||
- 当需要外部/最新信息时,可使用 `web_search`
|
||||
- 重点输出趋势、风险、机会点
|
||||
"""
|
||||
|
||||
|
||||
JSON_ACTION_FALLBACK_PROMPT = """你当前运行在 JSON action fallback 模式。
|
||||
|
||||
你的输出必须满足以下规则:
|
||||
1. 只能输出一个 JSON 对象,不要输出 markdown、解释、前后缀文字。
|
||||
2. JSON 对象字段仅允许:
|
||||
- `mode`: `final` | `tool_call` | `clarification`
|
||||
- `tool_calls`: 数组;每项包含 `name`、`arguments`,可选 `reason`
|
||||
- `final_response`: 当无需工具时填写
|
||||
- `clarification_question`: 当信息不足时填写
|
||||
3. 如果需要调用工具,返回:
|
||||
- `{ "mode": "tool_call", "tool_calls": [...] }`
|
||||
4. 如果无需工具,直接返回:
|
||||
- `{ "mode": "final", "final_response": "..." }`
|
||||
5. 如果信息不足,不要猜测参数,返回:
|
||||
- `{ "mode": "clarification", "clarification_question": "..." }`
|
||||
6. 只能使用系统消息里明确列出的工具名。
|
||||
7. `arguments` 必须是 JSON 对象。
|
||||
"""
|
||||
|
||||
|
||||
TOP_LEVEL_SYSTEM_PROMPTS_BY_KEY = {
|
||||
"master": MASTER_SYSTEM_PROMPT,
|
||||
"schedule_planner": SCHEDULE_PLANNER_SYSTEM_PROMPT,
|
||||
"executor": EXECUTOR_SYSTEM_PROMPT,
|
||||
"librarian": LIBRARIAN_SYSTEM_PROMPT,
|
||||
"analyst": ANALYST_SYSTEM_PROMPT,
|
||||
}
|
||||
|
||||
|
||||
SUB_COMMANDER_PROMPTS_BY_KEY = {
|
||||
"schedule_analysis": SCHEDULE_ANALYSIS_PROMPT,
|
||||
"schedule_planning": SCHEDULE_PLANNING_PROMPT,
|
||||
"executor_tasks": EXECUTOR_TASKS_PROMPT,
|
||||
"executor_forum": EXECUTOR_FORUM_PROMPT,
|
||||
"librarian_retrieval": LIBRARIAN_RETRIEVAL_PROMPT,
|
||||
"librarian_graph": LIBRARIAN_GRAPH_PROMPT,
|
||||
"analyst_progress": ANALYST_PROGRESS_PROMPT,
|
||||
"analyst_insights": ANALYST_INSIGHTS_PROMPT,
|
||||
}
|
||||
|
||||
11
backend/app/agents/registry/__init__.py
Normal file
11
backend/app/agents/registry/__init__.py
Normal file
@@ -0,0 +1,11 @@
|
||||
"""Registry manifest models and validation helpers."""
|
||||
|
||||
from app.agents.registry.indexes import RegistryIndexes, build_registry_indexes
|
||||
from app.agents.registry.loader import RegistryBundle, load_builtin_registry_bundle
|
||||
|
||||
__all__ = [
|
||||
"RegistryBundle",
|
||||
"RegistryIndexes",
|
||||
"build_registry_indexes",
|
||||
"load_builtin_registry_bundle",
|
||||
]
|
||||
114
backend/app/agents/registry/builtins.py
Normal file
114
backend/app/agents/registry/builtins.py
Normal file
@@ -0,0 +1,114 @@
|
||||
from app.agents.prompts import SUB_COMMANDER_PROMPTS_BY_KEY
|
||||
from app.agents.registry.models import (
|
||||
AgentManifest,
|
||||
CapabilityManifest,
|
||||
SpecialistTemplateManifest,
|
||||
SubCommanderManifest,
|
||||
)
|
||||
from app.agents.state import AgentRole
|
||||
from app.agents.tools import SUB_COMMANDER_TOOLSETS
|
||||
|
||||
|
||||
TOP_LEVEL_AGENT_DEFAULT_SUB_COMMANDERS: dict[str, tuple[str, ...]] = {
|
||||
AgentRole.MASTER.value: (),
|
||||
AgentRole.SCHEDULE_PLANNER.value: (
|
||||
"schedule_analysis",
|
||||
"schedule_planning",
|
||||
),
|
||||
AgentRole.EXECUTOR.value: (
|
||||
"executor_tasks",
|
||||
"executor_forum",
|
||||
),
|
||||
AgentRole.LIBRARIAN.value: (
|
||||
"librarian_retrieval",
|
||||
"librarian_graph",
|
||||
),
|
||||
AgentRole.ANALYST.value: (
|
||||
"analyst_progress",
|
||||
"analyst_insights",
|
||||
),
|
||||
}
|
||||
|
||||
TOP_LEVEL_AGENT_DISPLAY_NAMES: dict[str, str] = {
|
||||
AgentRole.MASTER.value: "Master",
|
||||
AgentRole.SCHEDULE_PLANNER.value: "Schedule Planner",
|
||||
AgentRole.EXECUTOR.value: "Executor",
|
||||
AgentRole.LIBRARIAN.value: "Librarian",
|
||||
AgentRole.ANALYST.value: "Analyst",
|
||||
}
|
||||
|
||||
TOP_LEVEL_AGENT_ROUTING_HINTS: dict[str, tuple[str, ...]] = {
|
||||
AgentRole.MASTER.value: (
|
||||
"Route user requests to the most suitable top-level runtime agent or answer directly.",
|
||||
),
|
||||
AgentRole.SCHEDULE_PLANNER.value: (
|
||||
"Handle planning-oriented requests using schedule analysis and schedule planning sub-commanders.",
|
||||
),
|
||||
AgentRole.EXECUTOR.value: (
|
||||
"Handle execution-oriented requests using task and forum sub-commanders.",
|
||||
),
|
||||
AgentRole.LIBRARIAN.value: (
|
||||
"Handle knowledge retrieval and graph-context requests using librarian sub-commanders.",
|
||||
),
|
||||
AgentRole.ANALYST.value: (
|
||||
"Handle reporting and insight requests using analyst sub-commanders.",
|
||||
),
|
||||
}
|
||||
|
||||
SUB_COMMANDER_PARENT_AGENT_IDS: dict[str, str] = {
|
||||
"schedule_analysis": AgentRole.SCHEDULE_PLANNER.value,
|
||||
"schedule_planning": AgentRole.SCHEDULE_PLANNER.value,
|
||||
"executor_tasks": AgentRole.EXECUTOR.value,
|
||||
"executor_forum": AgentRole.EXECUTOR.value,
|
||||
"librarian_retrieval": AgentRole.LIBRARIAN.value,
|
||||
"librarian_graph": AgentRole.LIBRARIAN.value,
|
||||
"analyst_progress": AgentRole.ANALYST.value,
|
||||
"analyst_insights": AgentRole.ANALYST.value,
|
||||
}
|
||||
|
||||
|
||||
BUILTIN_AGENT_MANIFESTS: tuple[AgentManifest, ...] = tuple(
|
||||
AgentManifest(
|
||||
agent_id=role.value,
|
||||
display_name=TOP_LEVEL_AGENT_DISPLAY_NAMES[role.value],
|
||||
role_value=role.value,
|
||||
system_prompt_key=role.value,
|
||||
routing_hints=list(TOP_LEVEL_AGENT_ROUTING_HINTS[role.value]),
|
||||
default_sub_commanders=list(TOP_LEVEL_AGENT_DEFAULT_SUB_COMMANDERS[role.value]),
|
||||
skill_context_key=role.value.replace("agent_", ""),
|
||||
)
|
||||
for role in AgentRole
|
||||
)
|
||||
|
||||
|
||||
_capability_tool_names = tuple(
|
||||
dict.fromkeys(
|
||||
tool.name
|
||||
for tools in SUB_COMMANDER_TOOLSETS.values()
|
||||
for tool in tools
|
||||
)
|
||||
)
|
||||
|
||||
BUILTIN_CAPABILITY_MANIFESTS: tuple[CapabilityManifest, ...] = tuple(
|
||||
CapabilityManifest(
|
||||
capability_id=tool_name,
|
||||
tool_name=tool_name,
|
||||
)
|
||||
for tool_name in _capability_tool_names
|
||||
)
|
||||
|
||||
|
||||
BUILTIN_SUB_COMMANDER_MANIFESTS: tuple[SubCommanderManifest, ...] = tuple(
|
||||
SubCommanderManifest(
|
||||
sub_commander_id=sub_commander_id,
|
||||
parent_agent_id=SUB_COMMANDER_PARENT_AGENT_IDS[sub_commander_id],
|
||||
prompt_text=SUB_COMMANDER_PROMPTS_BY_KEY[sub_commander_id],
|
||||
capability_ids=list(
|
||||
dict.fromkeys(tool.name for tool in tools)
|
||||
),
|
||||
)
|
||||
for sub_commander_id, tools in SUB_COMMANDER_TOOLSETS.items()
|
||||
)
|
||||
|
||||
|
||||
BUILTIN_SPECIALIST_TEMPLATE_MANIFESTS: tuple[SpecialistTemplateManifest, ...] = ()
|
||||
76
backend/app/agents/registry/indexes.py
Normal file
76
backend/app/agents/registry/indexes.py
Normal file
@@ -0,0 +1,76 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Mapping
|
||||
from dataclasses import dataclass
|
||||
from types import MappingProxyType
|
||||
|
||||
from app.agents.registry.loader import RegistryBundle
|
||||
from app.agents.registry.models import (
|
||||
AgentManifest,
|
||||
CapabilityManifest,
|
||||
SpecialistTemplateManifest,
|
||||
SubCommanderManifest,
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class RegistryIndexes:
|
||||
agent_by_id: Mapping[str, AgentManifest]
|
||||
sub_commander_by_id: Mapping[str, SubCommanderManifest]
|
||||
capability_by_id: Mapping[str, CapabilityManifest]
|
||||
specialist_template_by_id: Mapping[str, SpecialistTemplateManifest]
|
||||
agent_prompt_key_by_id: Mapping[str, str]
|
||||
sub_commander_prompt_key_by_id: Mapping[str, str]
|
||||
skill_context_key_by_agent_id: Mapping[str, str]
|
||||
capability_id_by_tool_name: Mapping[str, str]
|
||||
capability_ids_by_sub_commander_id: Mapping[str, tuple[str, ...]]
|
||||
|
||||
|
||||
def summarize_registry_indexes(indexes: RegistryIndexes) -> dict[str, int]:
|
||||
return {
|
||||
"agent_count": len(indexes.agent_by_id),
|
||||
"sub_commander_count": len(indexes.sub_commander_by_id),
|
||||
"capability_count": len(indexes.capability_by_id),
|
||||
"specialist_template_count": len(indexes.specialist_template_by_id),
|
||||
}
|
||||
|
||||
|
||||
def build_registry_indexes(bundle: RegistryBundle) -> RegistryIndexes:
|
||||
agent_by_id = {agent.agent_id: agent for agent in bundle.agents}
|
||||
sub_commander_by_id = {
|
||||
sub_commander.sub_commander_id: sub_commander
|
||||
for sub_commander in bundle.sub_commanders
|
||||
}
|
||||
capability_by_id = {
|
||||
capability.capability_id: capability for capability in bundle.capabilities
|
||||
}
|
||||
specialist_template_by_id = {
|
||||
template.template_id: template for template in bundle.specialist_templates
|
||||
}
|
||||
|
||||
return RegistryIndexes(
|
||||
agent_by_id=MappingProxyType(agent_by_id),
|
||||
sub_commander_by_id=MappingProxyType(sub_commander_by_id),
|
||||
capability_by_id=MappingProxyType(capability_by_id),
|
||||
specialist_template_by_id=MappingProxyType(specialist_template_by_id),
|
||||
agent_prompt_key_by_id=MappingProxyType({
|
||||
agent.agent_id: agent.system_prompt_key for agent in bundle.agents
|
||||
}),
|
||||
sub_commander_prompt_key_by_id=MappingProxyType({
|
||||
sub_commander.sub_commander_id: sub_commander.sub_commander_id
|
||||
for sub_commander in bundle.sub_commanders
|
||||
}),
|
||||
skill_context_key_by_agent_id=MappingProxyType({
|
||||
agent.agent_id: agent.skill_context_key
|
||||
for agent in bundle.agents
|
||||
if agent.skill_context_key is not None
|
||||
}),
|
||||
capability_id_by_tool_name=MappingProxyType({
|
||||
capability.tool_name: capability.capability_id
|
||||
for capability in bundle.capabilities
|
||||
}),
|
||||
capability_ids_by_sub_commander_id=MappingProxyType({
|
||||
sub_commander.sub_commander_id: tuple(sub_commander.capability_ids)
|
||||
for sub_commander in bundle.sub_commanders
|
||||
}),
|
||||
)
|
||||
33
backend/app/agents/registry/loader.py
Normal file
33
backend/app/agents/registry/loader.py
Normal file
@@ -0,0 +1,33 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
from app.agents.registry.builtins import (
|
||||
BUILTIN_AGENT_MANIFESTS,
|
||||
BUILTIN_CAPABILITY_MANIFESTS,
|
||||
BUILTIN_SPECIALIST_TEMPLATE_MANIFESTS,
|
||||
BUILTIN_SUB_COMMANDER_MANIFESTS,
|
||||
)
|
||||
from app.agents.registry.models import (
|
||||
AgentManifest,
|
||||
CapabilityManifest,
|
||||
SpecialistTemplateManifest,
|
||||
SubCommanderManifest,
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class RegistryBundle:
|
||||
agents: tuple[AgentManifest, ...]
|
||||
sub_commanders: tuple[SubCommanderManifest, ...]
|
||||
capabilities: tuple[CapabilityManifest, ...]
|
||||
specialist_templates: tuple[SpecialistTemplateManifest, ...]
|
||||
|
||||
|
||||
def load_builtin_registry_bundle() -> RegistryBundle:
|
||||
return RegistryBundle(
|
||||
agents=BUILTIN_AGENT_MANIFESTS,
|
||||
sub_commanders=BUILTIN_SUB_COMMANDER_MANIFESTS,
|
||||
capabilities=BUILTIN_CAPABILITY_MANIFESTS,
|
||||
specialist_templates=BUILTIN_SPECIALIST_TEMPLATE_MANIFESTS,
|
||||
)
|
||||
32
backend/app/agents/registry/models.py
Normal file
32
backend/app/agents/registry/models.py
Normal file
@@ -0,0 +1,32 @@
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class AgentManifest(BaseModel):
|
||||
agent_id: str
|
||||
display_name: str
|
||||
role_value: str
|
||||
system_prompt_key: str
|
||||
routing_hints: list[str]
|
||||
default_sub_commanders: list[str]
|
||||
skill_context_key: str | None = None
|
||||
continuity_policy: str | None = None
|
||||
clarification_policy: str | None = None
|
||||
|
||||
|
||||
class SubCommanderManifest(BaseModel):
|
||||
sub_commander_id: str
|
||||
parent_agent_id: str
|
||||
prompt_text: str
|
||||
capability_ids: list[str]
|
||||
|
||||
|
||||
class CapabilityManifest(BaseModel):
|
||||
capability_id: str
|
||||
tool_name: str
|
||||
|
||||
|
||||
class SpecialistTemplateManifest(BaseModel):
|
||||
template_id: str
|
||||
display_name: str
|
||||
description: str
|
||||
allowed_capability_ids: list[str] | None = None
|
||||
55
backend/app/agents/registry/validator.py
Normal file
55
backend/app/agents/registry/validator.py
Normal file
@@ -0,0 +1,55 @@
|
||||
from collections.abc import Iterable
|
||||
|
||||
from app.agents.registry.models import (
|
||||
AgentManifest,
|
||||
CapabilityManifest,
|
||||
SpecialistTemplateManifest,
|
||||
SubCommanderManifest,
|
||||
)
|
||||
|
||||
|
||||
def _validate_unique_ids(values: Iterable[str], label: str) -> set[str]:
|
||||
unique_values: set[str] = set()
|
||||
for value in values:
|
||||
if value in unique_values:
|
||||
raise ValueError(f"duplicate {label}: {value}")
|
||||
unique_values.add(value)
|
||||
return unique_values
|
||||
|
||||
|
||||
def validate_registry_bundle(
|
||||
*,
|
||||
agents: list[AgentManifest],
|
||||
sub_commanders: list[SubCommanderManifest],
|
||||
capabilities: list[CapabilityManifest],
|
||||
specialist_templates: list[SpecialistTemplateManifest],
|
||||
) -> None:
|
||||
agent_ids = _validate_unique_ids((agent.agent_id for agent in agents), "agent id")
|
||||
_validate_unique_ids(
|
||||
(sub_commander.sub_commander_id for sub_commander in sub_commanders),
|
||||
"sub commander id",
|
||||
)
|
||||
capability_ids = _validate_unique_ids(
|
||||
(capability.capability_id for capability in capabilities),
|
||||
"capability id",
|
||||
)
|
||||
_validate_unique_ids(
|
||||
(specialist_template.template_id for specialist_template in specialist_templates),
|
||||
"template id",
|
||||
)
|
||||
|
||||
for sub_commander in sub_commanders:
|
||||
if sub_commander.parent_agent_id not in agent_ids:
|
||||
raise ValueError(f"unknown parent agent id: {sub_commander.parent_agent_id}")
|
||||
|
||||
for capability_id in sub_commander.capability_ids:
|
||||
if capability_id not in capability_ids:
|
||||
raise ValueError(f"unknown capability id: {capability_id}")
|
||||
|
||||
for specialist_template in specialist_templates:
|
||||
if specialist_template.allowed_capability_ids is None:
|
||||
continue
|
||||
|
||||
for capability_id in specialist_template.allowed_capability_ids:
|
||||
if capability_id not in capability_ids:
|
||||
raise ValueError(f"unknown capability id: {capability_id}")
|
||||
46
backend/app/agents/skill_registry.py
Normal file
46
backend/app/agents/skill_registry.py
Normal file
@@ -0,0 +1,46 @@
|
||||
"""
|
||||
Skill Registry - Agent 运行时加载 Skills
|
||||
"""
|
||||
from typing import Optional
|
||||
from app.database import async_session
|
||||
from app.services.skill_service import SkillService
|
||||
|
||||
# 缓存:agent_type -> list[Skill]
|
||||
_skill_cache: dict[str, list] = {}
|
||||
|
||||
async def load_skills_for_agent(agent_type: str, force_reload: bool = False) -> list:
|
||||
"""加载指定 Agent 类型的可用 Skills"""
|
||||
if not force_reload and agent_type in _skill_cache:
|
||||
return _skill_cache[agent_type]
|
||||
async with async_session() as db:
|
||||
svc = SkillService(db)
|
||||
skills = await svc.get_by_agent_type(agent_type)
|
||||
_skill_cache[agent_type] = skills
|
||||
return skills
|
||||
|
||||
def get_skills_for_agent(agent_type: str) -> list:
|
||||
"""同步接口:返回缓存的 Skills(供 Agent 节点调用)"""
|
||||
return _skill_cache.get(agent_type, [])
|
||||
|
||||
def build_skill_context(agent_type: str) -> str:
|
||||
"""
|
||||
构建 Skill 上下文,供注入到 Agent 系统提示
|
||||
格式:Skill 名称 + 描述 + 工具列表
|
||||
"""
|
||||
skills = get_skills_for_agent(agent_type)
|
||||
if not skills:
|
||||
return ""
|
||||
lines = ["\n\n【可用的 Skills】"]
|
||||
for s in skills:
|
||||
tools_str = ", ".join(s.tools) if s.tools else "无"
|
||||
lines.append(f"""
|
||||
## {s.name}
|
||||
- 描述: {s.description or '无'}
|
||||
- 工具: {tools_str}
|
||||
- 指令: {s.instructions[:200]}...""" if len(s.instructions) > 200 else f"- 指令: {s.instructions}")
|
||||
return "\n".join(lines)
|
||||
|
||||
def clear_cache():
|
||||
"""清除缓存(配置变更时调用)"""
|
||||
global _skill_cache
|
||||
_skill_cache = {}
|
||||
@@ -1,30 +1,19 @@
|
||||
from dataclasses import dataclass
|
||||
from typing import TypedDict, Annotated
|
||||
from dataclasses import dataclass, field
|
||||
from typing import TypedDict, Annotated, Sequence
|
||||
from enum import Enum
|
||||
|
||||
from langchain_core.messages import BaseMessage
|
||||
from langgraph.graph.message import add_messages
|
||||
|
||||
|
||||
class AgentRole(str, Enum):
|
||||
MASTER = "master"
|
||||
PLANNER = "planner"
|
||||
SCHEDULE_PLANNER = "schedule_planner"
|
||||
EXECUTOR = "executor"
|
||||
LIBRARIAN = "librarian"
|
||||
ANALYST = "analyst"
|
||||
|
||||
|
||||
@dataclass
|
||||
class AgentInfo:
|
||||
name: str
|
||||
role: AgentRole
|
||||
description: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class ToolCall:
|
||||
tool: str
|
||||
args: dict
|
||||
result: str | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class ConversationTurn:
|
||||
role: str # "user" | "assistant"
|
||||
@@ -33,54 +22,41 @@ class ConversationTurn:
|
||||
model: str | None = None
|
||||
|
||||
|
||||
def turn_to_message(turn: ConversationTurn) -> HumanMessage:
|
||||
return HumanMessage(content=turn.content)
|
||||
|
||||
|
||||
def message_to_turn(msg, agent: AgentRole | None = None) -> ConversationTurn:
|
||||
msg_type = getattr(msg, "type", None) or getattr(msg, "role", "assistant")
|
||||
return ConversationTurn(
|
||||
role="user" if msg_type in ("human", "user") else "assistant",
|
||||
content=msg.content,
|
||||
agent=agent,
|
||||
model=getattr(msg, "model", None),
|
||||
)
|
||||
|
||||
|
||||
class AgentState(TypedDict):
|
||||
messages: Annotated[list, None]
|
||||
# Core message history with add_messages reducer
|
||||
messages: Annotated[list[BaseMessage], add_messages]
|
||||
|
||||
# Session identifiers
|
||||
user_id: str
|
||||
conversation_id: str
|
||||
|
||||
# Agent routing
|
||||
current_agent: AgentRole
|
||||
active_agents: list[AgentRole]
|
||||
# Agent routing state
|
||||
current_agent: str | None
|
||||
next_step: str | None # For explicit graph routing
|
||||
|
||||
# Task tracking
|
||||
# Traceability
|
||||
agent_trace: list[str]
|
||||
|
||||
# Task & Entity Tracking (Business Logic)
|
||||
pending_tasks: list[dict]
|
||||
completed_tasks: list[dict]
|
||||
created_entities: list[dict]
|
||||
|
||||
# Tool usage
|
||||
tool_calls: list[ToolCall]
|
||||
last_tool_result: str | None
|
||||
|
||||
# Knowledge context
|
||||
# Context summaries (for long-term or cross-agent context)
|
||||
knowledge_context: str | None
|
||||
graph_context: str | None
|
||||
|
||||
# Planning
|
||||
plan: str | None
|
||||
plan_steps: list[dict]
|
||||
|
||||
# Analysis
|
||||
schedule_context_summary: str | None
|
||||
analysis_report: str | None
|
||||
|
||||
# Output control
|
||||
final_response: str | None
|
||||
should_respond: bool
|
||||
|
||||
# Memory context (injected at start of each conversation)
|
||||
# Memory & Environment
|
||||
memory_context: str | None
|
||||
current_datetime_context: str | None
|
||||
|
||||
# Configuration
|
||||
user_llm_config: dict | None
|
||||
provider_capabilities: dict | None
|
||||
|
||||
|
||||
def initial_state(user_id: str, conversation_id: str) -> AgentState:
|
||||
@@ -88,18 +64,18 @@ def initial_state(user_id: str, conversation_id: str) -> AgentState:
|
||||
messages=[],
|
||||
user_id=user_id,
|
||||
conversation_id=conversation_id,
|
||||
current_agent=AgentRole.MASTER,
|
||||
active_agents=[AgentRole.MASTER],
|
||||
current_agent=AgentRole.MASTER.value,
|
||||
next_step=None,
|
||||
agent_trace=[AgentRole.MASTER.value],
|
||||
pending_tasks=[],
|
||||
completed_tasks=[],
|
||||
tool_calls=[],
|
||||
last_tool_result=None,
|
||||
created_entities=[],
|
||||
knowledge_context=None,
|
||||
graph_context=None,
|
||||
plan=None,
|
||||
plan_steps=[],
|
||||
schedule_context_summary=None,
|
||||
analysis_report=None,
|
||||
final_response=None,
|
||||
should_respond=True,
|
||||
memory_context=None,
|
||||
current_datetime_context=None,
|
||||
user_llm_config=None,
|
||||
provider_capabilities=None,
|
||||
)
|
||||
|
||||
@@ -1,22 +1,85 @@
|
||||
from app.agents.tools.search import (
|
||||
search_knowledge, get_knowledge_graph_context,
|
||||
build_knowledge_graph, hybrid_search,
|
||||
build_knowledge_graph, hybrid_search, web_search,
|
||||
)
|
||||
from app.agents.tools.task import get_tasks, create_task, update_task_status
|
||||
from app.agents.tools.forum import get_forum_posts, create_forum_post, scan_forum_for_instructions
|
||||
from app.agents.tools.schedule import (
|
||||
get_schedule_day,
|
||||
create_todo,
|
||||
create_schedule_task,
|
||||
create_reminder,
|
||||
create_goal,
|
||||
)
|
||||
from app.agents.tools.time_reasoning import resolve_time_expression
|
||||
|
||||
ALL_TOOLS = [
|
||||
# 知识库工具
|
||||
search_knowledge,
|
||||
get_knowledge_graph_context,
|
||||
build_knowledge_graph,
|
||||
hybrid_search,
|
||||
# 任务工具
|
||||
TASK_TOOLS = [
|
||||
get_tasks,
|
||||
create_task,
|
||||
update_task_status,
|
||||
# 论坛工具
|
||||
]
|
||||
|
||||
SCHEDULE_READ_TOOLS = [
|
||||
get_schedule_day,
|
||||
get_tasks,
|
||||
resolve_time_expression,
|
||||
]
|
||||
|
||||
SCHEDULE_WRITE_TOOLS = [
|
||||
create_todo,
|
||||
create_schedule_task,
|
||||
create_reminder,
|
||||
create_goal,
|
||||
]
|
||||
|
||||
FORUM_TOOLS = [
|
||||
get_forum_posts,
|
||||
create_forum_post,
|
||||
scan_forum_for_instructions,
|
||||
]
|
||||
|
||||
KNOWLEDGE_RETRIEVAL_TOOLS = [
|
||||
search_knowledge,
|
||||
hybrid_search,
|
||||
web_search,
|
||||
get_knowledge_graph_context,
|
||||
]
|
||||
|
||||
KNOWLEDGE_GRAPH_TOOLS = [
|
||||
get_knowledge_graph_context,
|
||||
build_knowledge_graph,
|
||||
]
|
||||
|
||||
ANALYST_PROGRESS_TOOLS = [
|
||||
get_tasks,
|
||||
get_forum_posts,
|
||||
scan_forum_for_instructions,
|
||||
]
|
||||
|
||||
ANALYST_INSIGHT_TOOLS = [
|
||||
get_tasks,
|
||||
get_forum_posts,
|
||||
search_knowledge,
|
||||
hybrid_search,
|
||||
web_search,
|
||||
]
|
||||
|
||||
ALL_TOOLS = [
|
||||
*KNOWLEDGE_RETRIEVAL_TOOLS,
|
||||
build_knowledge_graph,
|
||||
*TASK_TOOLS,
|
||||
*SCHEDULE_READ_TOOLS,
|
||||
*SCHEDULE_WRITE_TOOLS,
|
||||
*FORUM_TOOLS,
|
||||
]
|
||||
|
||||
SUB_COMMANDER_TOOLSETS = {
|
||||
"schedule_analysis": SCHEDULE_READ_TOOLS,
|
||||
"schedule_planning": [*SCHEDULE_READ_TOOLS, *SCHEDULE_WRITE_TOOLS],
|
||||
"executor_tasks": [*TASK_TOOLS, resolve_time_expression, *SCHEDULE_WRITE_TOOLS],
|
||||
"executor_forum": FORUM_TOOLS,
|
||||
"librarian_retrieval": KNOWLEDGE_RETRIEVAL_TOOLS,
|
||||
"librarian_graph": KNOWLEDGE_GRAPH_TOOLS,
|
||||
"analyst_progress": ANALYST_PROGRESS_TOOLS,
|
||||
"analyst_insights": ANALYST_INSIGHT_TOOLS,
|
||||
}
|
||||
|
||||
@@ -6,15 +6,17 @@ from app.models.forum import ForumPost, ForumReply
|
||||
from app.agents.context import get_current_user
|
||||
from sqlalchemy import select
|
||||
import asyncio
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
_executor = ThreadPoolExecutor(max_workers=4)
|
||||
|
||||
|
||||
def _run_async(coro, timeout: int = 30):
|
||||
try:
|
||||
loop = asyncio.get_running_loop()
|
||||
future = loop.run_in_executor(__import__("concurrent.futures").ThreadPoolExecutor(), lambda: asyncio.run(coro))
|
||||
return future.result(timeout=timeout)
|
||||
asyncio.get_running_loop()
|
||||
except RuntimeError:
|
||||
return asyncio.run(coro)
|
||||
return _executor.submit(asyncio.run, coro).result(timeout=timeout)
|
||||
|
||||
|
||||
@tool
|
||||
|
||||
308
backend/app/agents/tools/schedule.py
Normal file
308
backend/app/agents/tools/schedule.py
Normal file
@@ -0,0 +1,308 @@
|
||||
"""Agent 工具集 - 日程相关"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from datetime import date, datetime
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
from langchain_core.tools import tool
|
||||
from sqlalchemy import select
|
||||
|
||||
from app.agents.context import get_current_user
|
||||
from app.database import async_session
|
||||
from app.models.goal import Goal, GoalStatus
|
||||
from app.models.reminder import Reminder
|
||||
from app.models.task import Task, TaskPriority, TaskStatus
|
||||
from app.models.todo import DailyTodo, TodoSource
|
||||
|
||||
_executor = ThreadPoolExecutor(max_workers=4)
|
||||
|
||||
|
||||
def _run_async(coro, timeout: int = 30):
|
||||
try:
|
||||
asyncio.get_running_loop()
|
||||
except RuntimeError:
|
||||
return asyncio.run(coro)
|
||||
return _executor.submit(asyncio.run, coro).result(timeout=timeout)
|
||||
|
||||
|
||||
def _parse_date(value: str | None) -> date:
|
||||
if not value:
|
||||
return date.today()
|
||||
return date.fromisoformat(value)
|
||||
|
||||
|
||||
def _parse_datetime(value: str) -> datetime:
|
||||
normalized = value.strip().replace("Z", "+00:00")
|
||||
return datetime.fromisoformat(normalized)
|
||||
|
||||
|
||||
def _parse_datetime_with_timezone(value: str, time_zone: str | None) -> datetime:
|
||||
"""Parse an ISO datetime and return a tz-naive datetime in the intended local time.
|
||||
|
||||
- If value includes an offset/Z, it will be converted to `time_zone` when provided.
|
||||
- If value is naive and `time_zone` is provided, it is interpreted in that zone.
|
||||
"""
|
||||
parsed = _parse_datetime(value)
|
||||
tz = (time_zone or "").strip()
|
||||
if parsed.tzinfo is None:
|
||||
if tz:
|
||||
parsed = parsed.replace(tzinfo=ZoneInfo(tz))
|
||||
return parsed.replace(tzinfo=None)
|
||||
|
||||
if tz:
|
||||
parsed = parsed.astimezone(ZoneInfo(tz))
|
||||
return parsed.replace(tzinfo=None)
|
||||
|
||||
|
||||
def _normalize_title(title: str | None, content: str | None) -> str:
|
||||
resolved = (title or content or "").strip()
|
||||
if not resolved:
|
||||
raise ValueError("title 不能为空")
|
||||
return resolved
|
||||
|
||||
|
||||
def _normalize_schedule_due_date(due_date: str | None, date_value: str | None) -> str | None:
|
||||
resolved = (due_date or date_value or "").strip()
|
||||
if not resolved:
|
||||
return None
|
||||
if "T" in resolved:
|
||||
return resolved
|
||||
return f"{resolved}T09:00:00"
|
||||
|
||||
|
||||
def _format_summary(target_date: date, todos: list[DailyTodo], tasks: list[Task], reminders: list[Reminder], goals: list[Goal]) -> str:
|
||||
lines = [f"日期: {target_date.isoformat()}"]
|
||||
|
||||
if todos:
|
||||
lines.append("待办:")
|
||||
lines.extend(f"- {item.title} | 完成:{'是' if item.is_completed else '否'}" for item in todos)
|
||||
else:
|
||||
lines.append("待办: 无")
|
||||
|
||||
if tasks:
|
||||
lines.append("任务:")
|
||||
lines.extend(
|
||||
f"- {item.title} | 状态:{item.status.value if hasattr(item.status, 'value') else item.status} | 优先级:{item.priority.value if hasattr(item.priority, 'value') else item.priority} | 截止:{item.due_date.isoformat() if item.due_date else '无'}"
|
||||
for item in tasks
|
||||
)
|
||||
else:
|
||||
lines.append("任务: 无")
|
||||
|
||||
if reminders:
|
||||
lines.append("提醒:")
|
||||
lines.extend(f"- {item.title} | 时间:{item.reminder_at.isoformat()}" for item in reminders)
|
||||
else:
|
||||
lines.append("提醒: 无")
|
||||
|
||||
if goals:
|
||||
lines.append("目标:")
|
||||
lines.extend(
|
||||
f"- {item.title} | 状态:{item.status.value if hasattr(item.status, 'value') else item.status}"
|
||||
for item in goals
|
||||
)
|
||||
else:
|
||||
lines.append("目标: 无")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
@tool
|
||||
def get_schedule_day(target_date: str | None = None) -> str:
|
||||
"""获取指定日期的 todo/task/reminder/goal 聚合信息。target_date 格式 YYYY-MM-DD,默认今天。"""
|
||||
uid = get_current_user()
|
||||
parsed_date = _parse_date(target_date)
|
||||
date_key = parsed_date.isoformat()
|
||||
start_dt = datetime.combine(parsed_date, datetime.min.time())
|
||||
end_dt = datetime.combine(parsed_date, datetime.max.time())
|
||||
|
||||
async def _get():
|
||||
async with async_session() as db:
|
||||
todos = (
|
||||
await db.execute(
|
||||
select(DailyTodo)
|
||||
.where(DailyTodo.user_id == uid, DailyTodo.todo_date == date_key)
|
||||
.order_by(DailyTodo.created_at.desc())
|
||||
)
|
||||
).scalars().all()
|
||||
tasks = (
|
||||
await db.execute(
|
||||
select(Task)
|
||||
.where(
|
||||
Task.user_id == uid,
|
||||
Task.due_date.is_not(None),
|
||||
Task.due_date >= start_dt,
|
||||
Task.due_date <= end_dt,
|
||||
)
|
||||
.order_by(Task.created_at.desc())
|
||||
)
|
||||
).scalars().all()
|
||||
reminders = (
|
||||
await db.execute(
|
||||
select(Reminder)
|
||||
.where(
|
||||
Reminder.user_id == uid,
|
||||
Reminder.reminder_at >= start_dt,
|
||||
Reminder.reminder_at <= end_dt,
|
||||
)
|
||||
.order_by(Reminder.reminder_at.asc(), Reminder.created_at.asc())
|
||||
)
|
||||
).scalars().all()
|
||||
goals = (
|
||||
await db.execute(
|
||||
select(Goal)
|
||||
.where(Goal.user_id == uid, Goal.goal_date == date_key)
|
||||
.order_by(Goal.created_at.desc())
|
||||
)
|
||||
).scalars().all()
|
||||
return _format_summary(parsed_date, todos, tasks, reminders, goals)
|
||||
|
||||
try:
|
||||
return _run_async(_get())
|
||||
except Exception as exc:
|
||||
return f"获取日程失败: {exc}"
|
||||
|
||||
|
||||
@tool
|
||||
def create_todo(title: str, todo_date: str | None = None) -> str:
|
||||
"""创建指定日期的待办。todo_date 格式 YYYY-MM-DD,默认今天。"""
|
||||
uid = get_current_user()
|
||||
parsed_date = _parse_date(todo_date)
|
||||
|
||||
async def _create():
|
||||
async with async_session() as db:
|
||||
todo = DailyTodo(
|
||||
user_id=uid,
|
||||
title=title,
|
||||
source=TodoSource.AI_CHAT,
|
||||
todo_date=parsed_date.isoformat(),
|
||||
)
|
||||
db.add(todo)
|
||||
await db.commit()
|
||||
await db.refresh(todo)
|
||||
return f"TODO创建成功: [{todo.id[:8]}] {todo.title} @ {todo.todo_date}"
|
||||
|
||||
try:
|
||||
return _run_async(_create())
|
||||
except Exception as exc:
|
||||
return f"创建TODO失败: {exc}"
|
||||
|
||||
|
||||
@tool
|
||||
def create_schedule_task(
|
||||
title: str = "",
|
||||
description: str = "",
|
||||
priority: str = "medium",
|
||||
due_date: str | None = None,
|
||||
content: str = "",
|
||||
date: str | None = None,
|
||||
) -> str:
|
||||
"""创建任务。priority 支持 low/medium/high/urgent;due_date 使用 ISO datetime。兼容 content/date 别名。"""
|
||||
uid = get_current_user()
|
||||
resolved_title = _normalize_title(title, content)
|
||||
resolved_due_date = _normalize_schedule_due_date(due_date, date)
|
||||
|
||||
async def _create():
|
||||
async with async_session() as db:
|
||||
task = Task(
|
||||
user_id=uid,
|
||||
title=resolved_title,
|
||||
description=description or content or None,
|
||||
priority=TaskPriority(priority),
|
||||
due_date=_parse_datetime(resolved_due_date) if resolved_due_date else None,
|
||||
status=TaskStatus.TODO,
|
||||
)
|
||||
db.add(task)
|
||||
await db.commit()
|
||||
await db.refresh(task)
|
||||
due_label = task.due_date.isoformat() if task.due_date else "无截止时间"
|
||||
return f"任务创建成功: [{task.id[:8]}] {task.title} | 优先级:{task.priority.value} | 截止:{due_label}"
|
||||
|
||||
try:
|
||||
return _run_async(_create())
|
||||
except Exception as exc:
|
||||
return f"创建任务失败: {exc}"
|
||||
|
||||
|
||||
@tool
|
||||
def create_reminder(
|
||||
title: str = "",
|
||||
reminder_at: str | None = None,
|
||||
note: str = "",
|
||||
description: str = "",
|
||||
datetime: str = "",
|
||||
at: str = "",
|
||||
remind_at: str = "",
|
||||
content: str = "",
|
||||
time_zone: str = "",
|
||||
timezone: str = "",
|
||||
time: str = "",
|
||||
) -> str:
|
||||
"""创建提醒。reminder_at 使用 ISO datetime。兼容 description/datetime/at/remind_at/time_zone 别名。"""
|
||||
uid = get_current_user()
|
||||
|
||||
try:
|
||||
resolved_title = (title or content or "").strip()
|
||||
if not resolved_title:
|
||||
raise ValueError("title 不能为空")
|
||||
|
||||
resolved_at = ((reminder_at or datetime or at or remind_at or time or "").strip())
|
||||
if not resolved_at:
|
||||
raise ValueError("reminder_at 不能为空")
|
||||
|
||||
resolved_note = (note or description or "").strip()
|
||||
|
||||
async def _create():
|
||||
async with async_session() as db:
|
||||
tz = (time_zone or timezone or "").strip()
|
||||
reminder = Reminder(
|
||||
user_id=uid,
|
||||
title=resolved_title,
|
||||
note=resolved_note or None,
|
||||
reminder_at=_parse_datetime_with_timezone(resolved_at, tz),
|
||||
)
|
||||
db.add(reminder)
|
||||
await db.commit()
|
||||
await db.refresh(reminder)
|
||||
return f"提醒创建成功: [{reminder.id[:8]}] {reminder.title} @ {reminder.reminder_at.isoformat()}"
|
||||
|
||||
return _run_async(_create())
|
||||
except Exception as exc:
|
||||
return f"创建提醒失败: {exc}"
|
||||
|
||||
|
||||
@tool
|
||||
def create_goal(title: str, goal_date: str | None = None, note: str = "", status: str = "active") -> str:
|
||||
"""创建指定日期目标。goal_date 格式 YYYY-MM-DD,默认今天;status 支持 active/done/archived。"""
|
||||
uid = get_current_user()
|
||||
parsed_date = _parse_date(goal_date)
|
||||
|
||||
async def _create():
|
||||
async with async_session() as db:
|
||||
goal = Goal(
|
||||
user_id=uid,
|
||||
title=title,
|
||||
note=note or None,
|
||||
goal_date=parsed_date.isoformat(),
|
||||
status=GoalStatus(status),
|
||||
)
|
||||
db.add(goal)
|
||||
await db.commit()
|
||||
await db.refresh(goal)
|
||||
return f"目标创建成功: [{goal.id[:8]}] {goal.title} @ {goal.goal_date}"
|
||||
|
||||
try:
|
||||
return _run_async(_create())
|
||||
except Exception as exc:
|
||||
return f"创建目标失败: {exc}"
|
||||
|
||||
|
||||
__all__ = [
|
||||
"get_schedule_day",
|
||||
"create_todo",
|
||||
"create_schedule_task",
|
||||
"create_reminder",
|
||||
"create_goal",
|
||||
]
|
||||
@@ -5,12 +5,14 @@ Agent 工具集 - 知识库 & 图谱相关
|
||||
由于 LangChain 工具系统是同步的,内部用 run_in_executor 处理 async 逻辑。
|
||||
"""
|
||||
|
||||
from langchain_core.tools import tool
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from app.database import async_session
|
||||
from app.agents.context import get_current_user
|
||||
import asyncio
|
||||
|
||||
from langchain_core.tools import tool
|
||||
|
||||
from app.agents.context import get_current_user
|
||||
from app.database import async_session
|
||||
|
||||
_executor = ThreadPoolExecutor(max_workers=4)
|
||||
|
||||
|
||||
@@ -151,9 +153,56 @@ def hybrid_search(query: str, top_k: int = 5) -> str:
|
||||
return f"混合搜索失败: {str(e)}"
|
||||
|
||||
|
||||
@tool
|
||||
def web_search(query: str, top_k: int = 5) -> str:
|
||||
"""
|
||||
通过 SearxNG 搜索外部网页信息,返回标题、链接和摘要。
|
||||
|
||||
Args:
|
||||
query: 搜索关键词
|
||||
top_k: 返回结果数量,默认 5 条
|
||||
|
||||
Returns:
|
||||
适合模型综合的网页结果文本
|
||||
"""
|
||||
from app.services.web_search_service import (
|
||||
WebSearchConfigurationError,
|
||||
WebSearchRequestError,
|
||||
WebSearchService,
|
||||
)
|
||||
|
||||
async def _search():
|
||||
service = WebSearchService()
|
||||
results = await service.search(query, limit=top_k)
|
||||
if not results:
|
||||
return "未找到相关网页结果。"
|
||||
|
||||
texts = []
|
||||
for index, result in enumerate(results, 1):
|
||||
source = f"\n来源: {result.source}" if result.source else ""
|
||||
published_at = f"\n时间: {result.published_at}" if result.published_at else ""
|
||||
snippet = result.snippet or "(无摘要)"
|
||||
texts.append(
|
||||
f"[{index}] {result.title}\n"
|
||||
f"链接: {result.url}{source}{published_at}\n"
|
||||
f"摘要: {snippet}"
|
||||
)
|
||||
return "\n\n---\n\n".join(texts)
|
||||
|
||||
try:
|
||||
return _run_async(_search(), timeout=30)
|
||||
except WebSearchConfigurationError as exc:
|
||||
return f"网页搜索不可用: {exc}"
|
||||
except WebSearchRequestError as exc:
|
||||
return f"网页搜索失败: {exc}"
|
||||
except Exception as exc:
|
||||
return f"网页搜索失败: {exc}"
|
||||
|
||||
|
||||
__all__ = [
|
||||
"search_knowledge",
|
||||
"get_knowledge_graph_context",
|
||||
"build_knowledge_graph",
|
||||
"hybrid_search",
|
||||
"web_search",
|
||||
]
|
||||
|
||||
@@ -1,22 +1,85 @@
|
||||
"""Agent 工具集 - 任务相关"""
|
||||
|
||||
from langchain_core.tools import tool
|
||||
from app.database import async_session
|
||||
from app.models.task import Task
|
||||
from app.agents.context import get_current_user
|
||||
from sqlalchemy import select
|
||||
import asyncio
|
||||
from datetime import UTC, datetime
|
||||
|
||||
_executor = None
|
||||
from app.models.base import utc_now
|
||||
|
||||
from langchain_core.tools import tool
|
||||
from sqlalchemy import select
|
||||
|
||||
from app.agents.context import get_current_user
|
||||
from app.database import async_session
|
||||
from app.models.task import Task, TaskPriority, TaskStatus
|
||||
|
||||
import asyncio
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
_executor = ThreadPoolExecutor(max_workers=4)
|
||||
|
||||
|
||||
def _run_async(coro, timeout: int = 30):
|
||||
try:
|
||||
loop = asyncio.get_running_loop()
|
||||
future = loop.run_in_executor(_executor or __import__("concurrent.futures").ThreadPoolExecutor(), lambda: asyncio.run(coro))
|
||||
return future.result(timeout=timeout)
|
||||
asyncio.get_running_loop()
|
||||
except RuntimeError:
|
||||
return asyncio.run(coro)
|
||||
return _executor.submit(asyncio.run, coro).result(timeout=timeout)
|
||||
|
||||
|
||||
def _normalize_title(title: str | None, content: str | None) -> str:
|
||||
resolved = (title or content or "").strip()
|
||||
if not resolved:
|
||||
raise ValueError("title 不能为空")
|
||||
return resolved
|
||||
|
||||
|
||||
def _normalize_due_date(due_date: str | None, date_value: str | None) -> str | None:
|
||||
resolved = (due_date or date_value or "").strip()
|
||||
return resolved or None
|
||||
|
||||
|
||||
def _parse_due_date(value: str | None) -> datetime | None:
|
||||
if not value:
|
||||
return None
|
||||
normalized = value.strip()
|
||||
if not normalized:
|
||||
return None
|
||||
if "T" not in normalized:
|
||||
normalized = f"{normalized}T00:00:00"
|
||||
parsed = datetime.fromisoformat(normalized.replace("Z", "+00:00"))
|
||||
if parsed.tzinfo is not None:
|
||||
return parsed.astimezone(UTC).replace(tzinfo=None)
|
||||
return parsed
|
||||
|
||||
|
||||
def _normalize_priority(priority: int | str | None) -> TaskPriority:
|
||||
if priority is None or priority == "":
|
||||
return TaskPriority.MEDIUM
|
||||
if isinstance(priority, TaskPriority):
|
||||
return priority
|
||||
if isinstance(priority, int):
|
||||
return {
|
||||
1: TaskPriority.LOW,
|
||||
2: TaskPriority.MEDIUM,
|
||||
3: TaskPriority.HIGH,
|
||||
4: TaskPriority.URGENT,
|
||||
}.get(priority, TaskPriority.MEDIUM)
|
||||
normalized = str(priority).strip().lower()
|
||||
if not normalized:
|
||||
return TaskPriority.MEDIUM
|
||||
return TaskPriority(normalized)
|
||||
|
||||
|
||||
def _normalize_status(status: str) -> TaskStatus:
|
||||
normalized = status.strip().lower()
|
||||
return TaskStatus(normalized)
|
||||
|
||||
|
||||
def _format_status(value: TaskStatus | str) -> str:
|
||||
return value.value if hasattr(value, "value") else str(value)
|
||||
|
||||
|
||||
def _format_priority(value: TaskPriority | str) -> str:
|
||||
return value.value if hasattr(value, "value") else str(value)
|
||||
|
||||
|
||||
@tool
|
||||
@@ -25,7 +88,7 @@ def get_tasks(status: str | None = None, limit: int = 20) -> str:
|
||||
获取用户当前的任务列表。
|
||||
|
||||
Args:
|
||||
status: 可选,筛选任务状态 (todo/in_progress/done/blocked)
|
||||
status: 可选,筛选任务状态 (todo/in_progress/done/cancelled)
|
||||
limit: 返回数量,默认20
|
||||
|
||||
Returns:
|
||||
@@ -33,67 +96,82 @@ def get_tasks(status: str | None = None, limit: int = 20) -> str:
|
||||
"""
|
||||
uid = get_current_user()
|
||||
|
||||
async def _get():
|
||||
async with async_session() as db:
|
||||
from app.models.user import User
|
||||
query = (
|
||||
select(Task)
|
||||
.join(User, User.id == Task.user_id)
|
||||
.where(User.id == uid)
|
||||
)
|
||||
if status:
|
||||
query = query.where(Task.status == status)
|
||||
query = query.order_by(Task.priority.desc(), Task.updated_at.desc()).limit(limit)
|
||||
result = await db.execute(query)
|
||||
tasks = result.scalars().all()
|
||||
if not tasks:
|
||||
return "暂无任务"
|
||||
lines = []
|
||||
for t in tasks:
|
||||
lines.append(
|
||||
f"- [{t.id[:8]}] {t.title} | "
|
||||
f"状态:{t.status} | 优先级:{t.priority} | 截止:{t.due_date or '无'}"
|
||||
)
|
||||
return "\n".join(lines)
|
||||
|
||||
try:
|
||||
resolved_status = _normalize_status(status) if status else None
|
||||
|
||||
async def _get():
|
||||
async with async_session() as db:
|
||||
from app.models.user import User
|
||||
query = (
|
||||
select(Task)
|
||||
.join(User, User.id == Task.user_id)
|
||||
.where(User.id == uid)
|
||||
)
|
||||
if resolved_status:
|
||||
query = query.where(Task.status == resolved_status)
|
||||
query = query.order_by(Task.priority.desc(), Task.updated_at.desc()).limit(limit)
|
||||
result = await db.execute(query)
|
||||
tasks = result.scalars().all()
|
||||
if not tasks:
|
||||
return "暂无任务"
|
||||
lines = []
|
||||
for t in tasks:
|
||||
lines.append(
|
||||
f"- [{t.id[:8]}] {t.title} | "
|
||||
f"状态:{_format_status(t.status)} | 优先级:{_format_priority(t.priority)} | 截止:{t.due_date or '无'}"
|
||||
)
|
||||
return "\n".join(lines)
|
||||
|
||||
return _run_async(_get())
|
||||
except Exception as e:
|
||||
return f"获取任务失败: {str(e)}"
|
||||
|
||||
|
||||
@tool
|
||||
def create_task(title: str, description: str = "", priority: int = 2, due_date: str | None = None) -> str:
|
||||
def create_task(
|
||||
title: str = "",
|
||||
description: str = "",
|
||||
priority: int | str = 2,
|
||||
due_date: str | None = None,
|
||||
content: str = "",
|
||||
date: str | None = None,
|
||||
) -> str:
|
||||
"""
|
||||
创建新任务。
|
||||
|
||||
Args:
|
||||
title: 任务标题(必填)
|
||||
title: 任务标题(必填,兼容 content 作为别名)
|
||||
description: 任务描述
|
||||
priority: 优先级 1-4,数字越大优先级越高,默认2
|
||||
due_date: 截止日期,格式 YYYY-MM-DD
|
||||
priority: 优先级,支持 1-4 或 low/medium/high/urgent,默认2
|
||||
due_date: 截止日期,格式 YYYY-MM-DD 或 ISO datetime
|
||||
content: title 的兼容别名
|
||||
date: due_date 的兼容别名
|
||||
|
||||
Returns:
|
||||
创建结果
|
||||
"""
|
||||
uid = get_current_user()
|
||||
|
||||
async def _create():
|
||||
async with async_session() as db:
|
||||
task = Task(
|
||||
user_id=uid,
|
||||
title=title,
|
||||
description=description,
|
||||
priority=priority,
|
||||
due_date=due_date,
|
||||
status="todo",
|
||||
)
|
||||
db.add(task)
|
||||
await db.commit()
|
||||
await db.refresh(task)
|
||||
return f"任务创建成功: [{task.id[:8]}] {title}"
|
||||
|
||||
try:
|
||||
resolved_title = _normalize_title(title, content)
|
||||
resolved_due_date = _normalize_due_date(due_date, date)
|
||||
resolved_priority = _normalize_priority(priority)
|
||||
|
||||
async def _create():
|
||||
async with async_session() as db:
|
||||
task = Task(
|
||||
user_id=uid,
|
||||
title=resolved_title,
|
||||
description=description or content or None,
|
||||
priority=resolved_priority,
|
||||
due_date=_parse_due_date(resolved_due_date),
|
||||
status=TaskStatus.TODO,
|
||||
)
|
||||
db.add(task)
|
||||
await db.commit()
|
||||
await db.refresh(task)
|
||||
return f"任务创建成功: [{task.id[:8]}] {resolved_title}"
|
||||
|
||||
return _run_async(_create())
|
||||
except Exception as e:
|
||||
return f"创建任务失败: {str(e)}"
|
||||
@@ -106,34 +184,37 @@ def update_task_status(task_id: str, status: str) -> str:
|
||||
|
||||
Args:
|
||||
task_id: 任务ID(完整ID或前8位)
|
||||
status: 新状态 (todo/in_progress/done/blocked)
|
||||
status: 新状态 (todo/in_progress/done/cancelled)
|
||||
|
||||
Returns:
|
||||
更新结果
|
||||
"""
|
||||
uid = get_current_user()
|
||||
|
||||
async def _update():
|
||||
async with async_session() as db:
|
||||
from app.models.user import User
|
||||
query = (
|
||||
select(Task)
|
||||
.join(User, User.id == Task.user_id)
|
||||
.where(User.id == uid)
|
||||
)
|
||||
if len(task_id) == 8:
|
||||
query = query.where(Task.id.like(f"{task_id}%"))
|
||||
else:
|
||||
query = query.where(Task.id == task_id)
|
||||
result = await db.execute(query)
|
||||
task = result.scalar_one_or_none()
|
||||
if not task:
|
||||
return f"任务不存在: {task_id}"
|
||||
task.status = status
|
||||
await db.commit()
|
||||
return f"任务状态已更新: {task.title} -> {status}"
|
||||
|
||||
try:
|
||||
resolved_status = _normalize_status(status)
|
||||
|
||||
async def _update():
|
||||
async with async_session() as db:
|
||||
from app.models.user import User
|
||||
query = (
|
||||
select(Task)
|
||||
.join(User, User.id == Task.user_id)
|
||||
.where(User.id == uid)
|
||||
)
|
||||
if len(task_id) == 8:
|
||||
query = query.where(Task.id.like(f"{task_id}%"))
|
||||
else:
|
||||
query = query.where(Task.id == task_id)
|
||||
result = await db.execute(query)
|
||||
task = result.scalar_one_or_none()
|
||||
if not task:
|
||||
return f"任务不存在: {task_id}"
|
||||
task.status = resolved_status
|
||||
task.completed_at = utc_now() if resolved_status == TaskStatus.DONE else None
|
||||
await db.commit()
|
||||
return f"任务状态已更新: {task.title} -> {resolved_status.value}"
|
||||
|
||||
return _run_async(_update())
|
||||
except Exception as e:
|
||||
return f"更新任务失败: {str(e)}"
|
||||
|
||||
269
backend/app/agents/tools/time_reasoning.py
Normal file
269
backend/app/agents/tools/time_reasoning.py
Normal file
@@ -0,0 +1,269 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from datetime import UTC, date, datetime, time, timedelta
|
||||
|
||||
from langchain_core.tools import tool
|
||||
|
||||
_WEEKDAY_MAP = {"一": 0, "二": 1, "三": 2, "四": 3, "五": 4, "六": 5, "日": 6, "天": 6}
|
||||
_DEFAULT_HOUR_BY_PERIOD = {
|
||||
"morning": 9,
|
||||
"noon": 12,
|
||||
"afternoon": 15,
|
||||
"evening": 20,
|
||||
}
|
||||
_TIME_KEYWORDS = ("今天", "明天", "后天", "本周", "这周", "下周", "周", "星期", "月", "日", "早上", "上午", "中午", "下午", "晚上", "今晚", "点", ":", ":")
|
||||
|
||||
|
||||
def _parse_datetime(value: str) -> datetime:
|
||||
normalized = value.strip().replace("Z", "+00:00")
|
||||
return datetime.fromisoformat(normalized)
|
||||
|
||||
|
||||
def extract_reference_datetime(current_datetime_context: str | None) -> datetime:
|
||||
context = (current_datetime_context or "").strip()
|
||||
if context:
|
||||
for pattern in (r"current_time_utc:\s*(\S+)", r"CURRENT_TIME:\s*(\S+)", r"(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:Z|[+-]\d{2}:\d{2}))"):
|
||||
match = re.search(pattern, context)
|
||||
if match:
|
||||
return _parse_datetime(match.group(1))
|
||||
return datetime.now(UTC)
|
||||
|
||||
|
||||
def _normalize_local_iso(value: datetime) -> str:
|
||||
return value.replace(tzinfo=None).isoformat(timespec="seconds")
|
||||
|
||||
|
||||
def _normalize_datetime_iso(value: datetime) -> str:
|
||||
if value.tzinfo is not None:
|
||||
return value.isoformat(timespec="seconds")
|
||||
return _normalize_local_iso(value)
|
||||
|
||||
|
||||
def _normalize_date_iso(value: date) -> str:
|
||||
return value.isoformat()
|
||||
|
||||
|
||||
def _is_iso_datetime(value: str) -> bool:
|
||||
try:
|
||||
parsed = _parse_datetime(value)
|
||||
except ValueError:
|
||||
return False
|
||||
return isinstance(parsed, datetime)
|
||||
|
||||
|
||||
def _is_iso_date(value: str) -> bool:
|
||||
try:
|
||||
date.fromisoformat(value.strip())
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
def _has_explicit_time(text: str) -> bool:
|
||||
return bool(
|
||||
re.search(r"\d{1,2}[::]\d{2}", text)
|
||||
or re.search(r"\d{1,2}点(?:半|(?:\d{1,2})分?)?", text)
|
||||
or any(keyword in text for keyword in ("早上", "上午", "中午", "下午", "晚上", "今晚"))
|
||||
)
|
||||
|
||||
|
||||
def _detect_period(text: str) -> str | None:
|
||||
if any(keyword in text for keyword in ("晚上", "今晚")):
|
||||
return "evening"
|
||||
if "下午" in text:
|
||||
return "afternoon"
|
||||
if "中午" in text:
|
||||
return "noon"
|
||||
if any(keyword in text for keyword in ("早上", "上午", "早晨", "清晨")):
|
||||
return "morning"
|
||||
return None
|
||||
|
||||
|
||||
def _resolve_time(text: str) -> tuple[time, bool, str | None]:
|
||||
period = _detect_period(text)
|
||||
colon_match = re.search(r"(\d{1,2})[::](\d{2})", text)
|
||||
if colon_match:
|
||||
hour = int(colon_match.group(1))
|
||||
minute = int(colon_match.group(2))
|
||||
if period in {"afternoon", "evening"} and hour < 12:
|
||||
hour += 12
|
||||
return time(hour=hour, minute=minute), False, period
|
||||
|
||||
half_match = re.search(r"(\d{1,2})点半", text)
|
||||
if half_match:
|
||||
hour = int(half_match.group(1))
|
||||
if period in {"afternoon", "evening"} and hour < 12:
|
||||
hour += 12
|
||||
return time(hour=hour, minute=30), False, period
|
||||
|
||||
dot_match = re.search(r"(\d{1,2})点(?:(\d{1,2})分?)?", text)
|
||||
if dot_match:
|
||||
hour = int(dot_match.group(1))
|
||||
minute = int(dot_match.group(2) or 0)
|
||||
if period in {"afternoon", "evening"} and hour < 12:
|
||||
hour += 12
|
||||
if period == "noon" and hour < 11:
|
||||
hour += 12
|
||||
return time(hour=hour, minute=minute), False, period
|
||||
|
||||
if period:
|
||||
return time(hour=_DEFAULT_HOUR_BY_PERIOD[period], minute=0), True, period
|
||||
return time(hour=9, minute=0), True, None
|
||||
|
||||
|
||||
def _resolve_date(text: str, reference: datetime) -> tuple[date, str]:
|
||||
stripped = text.strip()
|
||||
if _is_iso_date(stripped):
|
||||
return date.fromisoformat(stripped), "explicit_date"
|
||||
|
||||
month_day_match = re.search(r"(\d{1,2})月(\d{1,2})日", stripped)
|
||||
if month_day_match:
|
||||
month = int(month_day_match.group(1))
|
||||
day = int(month_day_match.group(2))
|
||||
candidate = date(reference.year, month, day)
|
||||
if candidate < reference.date() - timedelta(days=1):
|
||||
candidate = date(reference.year + 1, month, day)
|
||||
return candidate, "explicit_month_day"
|
||||
|
||||
if "后天" in stripped:
|
||||
return reference.date() + timedelta(days=2), "relative_day"
|
||||
if "明天" in stripped:
|
||||
return reference.date() + timedelta(days=1), "relative_day"
|
||||
if "今天" in stripped:
|
||||
return reference.date(), "relative_day"
|
||||
|
||||
weekday_match = re.search(r"((?:本周|这周|下周)?)(?:周|星期)([一二三四五六日天])", stripped)
|
||||
if weekday_match:
|
||||
prefix = weekday_match.group(1)
|
||||
weekday = _WEEKDAY_MAP[weekday_match.group(2)]
|
||||
current_weekday = reference.date().weekday()
|
||||
delta = weekday - current_weekday
|
||||
if prefix == "下周":
|
||||
delta += 7 if delta <= 0 else 7
|
||||
elif prefix in {"本周", "这周"}:
|
||||
if delta < 0:
|
||||
delta += 7
|
||||
elif delta < 0:
|
||||
delta += 7
|
||||
return reference.date() + timedelta(days=delta), "relative_weekday"
|
||||
|
||||
return reference.date(), "reference_day"
|
||||
|
||||
|
||||
def resolve_time_expression_data(
|
||||
expression: str,
|
||||
*,
|
||||
current_datetime_context: str | None = None,
|
||||
prefer: str = "datetime",
|
||||
) -> dict:
|
||||
text = (expression or "").strip()
|
||||
if not text:
|
||||
raise ValueError("expression 不能为空")
|
||||
|
||||
reference = extract_reference_datetime(current_datetime_context)
|
||||
|
||||
if _is_iso_datetime(text):
|
||||
parsed = _parse_datetime(text)
|
||||
return {
|
||||
"expression": text,
|
||||
"reference_time": reference.isoformat(),
|
||||
"grain": "datetime",
|
||||
"resolved_date": _normalize_date_iso(parsed.date()),
|
||||
"resolved_datetime": _normalize_datetime_iso(parsed),
|
||||
"assumed_time": False,
|
||||
"reason": "explicit_datetime",
|
||||
}
|
||||
|
||||
if _is_iso_date(text):
|
||||
parsed_date = date.fromisoformat(text)
|
||||
return {
|
||||
"expression": text,
|
||||
"reference_time": reference.isoformat(),
|
||||
"grain": "date",
|
||||
"resolved_date": _normalize_date_iso(parsed_date),
|
||||
"resolved_datetime": None,
|
||||
"assumed_time": False,
|
||||
"reason": "explicit_date",
|
||||
}
|
||||
|
||||
resolved_date, date_reason = _resolve_date(text, reference)
|
||||
resolved_time, assumed_time, period = _resolve_time(text)
|
||||
has_explicit_time = _has_explicit_time(text)
|
||||
grain = "date" if prefer == "date" and not has_explicit_time else "datetime"
|
||||
resolved_dt = datetime.combine(resolved_date, resolved_time)
|
||||
note = date_reason
|
||||
if period:
|
||||
note = f"{note}:{period}"
|
||||
if assumed_time:
|
||||
note = f"{note}:assumed_time"
|
||||
return {
|
||||
"expression": text,
|
||||
"reference_time": reference.isoformat(),
|
||||
"grain": grain,
|
||||
"resolved_date": _normalize_date_iso(resolved_date),
|
||||
"resolved_datetime": None if grain == "date" else _normalize_local_iso(resolved_dt),
|
||||
"assumed_time": assumed_time,
|
||||
"reason": note,
|
||||
}
|
||||
|
||||
|
||||
@tool
|
||||
def resolve_time_expression(
|
||||
expression: str,
|
||||
current_datetime_context: str = "",
|
||||
prefer: str = "datetime",
|
||||
) -> str:
|
||||
"""解析中文自然语言时间表达,基于当前参考时间返回明确的日期或 datetime。prefer 支持 datetime/date。"""
|
||||
try:
|
||||
payload = resolve_time_expression_data(
|
||||
expression,
|
||||
current_datetime_context=current_datetime_context or None,
|
||||
prefer=prefer,
|
||||
)
|
||||
return json.dumps(payload, ensure_ascii=False)
|
||||
except Exception as exc:
|
||||
return json.dumps(
|
||||
{
|
||||
"expression": expression,
|
||||
"error": str(exc),
|
||||
},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
|
||||
|
||||
def normalize_tool_time_arguments(tool_name: str, args: dict, current_datetime_context: str | None) -> dict:
|
||||
normalized = dict(args)
|
||||
|
||||
if tool_name == "create_reminder":
|
||||
raw_value = next((normalized.get(key) for key in ("reminder_at", "datetime", "at", "remind_at", "time") if isinstance(normalized.get(key), str) and normalized.get(key).strip()), None)
|
||||
if raw_value and not _is_iso_datetime(raw_value):
|
||||
payload = resolve_time_expression_data(raw_value, current_datetime_context=current_datetime_context, prefer="datetime")
|
||||
normalized["reminder_at"] = payload["resolved_datetime"]
|
||||
return normalized
|
||||
|
||||
if tool_name in {"create_schedule_task", "create_task"}:
|
||||
raw_value = next((normalized.get(key) for key in ("due_date", "date") if isinstance(normalized.get(key), str) and normalized.get(key).strip()), None)
|
||||
if raw_value and not _is_iso_datetime(raw_value) and not _is_iso_date(raw_value):
|
||||
prefer = "datetime" if tool_name == "create_schedule_task" or _has_explicit_time(raw_value) else "date"
|
||||
payload = resolve_time_expression_data(raw_value, current_datetime_context=current_datetime_context, prefer=prefer)
|
||||
normalized["due_date"] = payload["resolved_datetime"] or payload["resolved_date"]
|
||||
return normalized
|
||||
|
||||
if tool_name in {"create_todo", "create_goal", "get_schedule_day"}:
|
||||
field_name = {
|
||||
"create_todo": "todo_date",
|
||||
"create_goal": "goal_date",
|
||||
"get_schedule_day": "target_date",
|
||||
}[tool_name]
|
||||
raw_value = normalized.get(field_name)
|
||||
if isinstance(raw_value, str) and raw_value.strip() and not _is_iso_date(raw_value):
|
||||
payload = resolve_time_expression_data(raw_value, current_datetime_context=current_datetime_context, prefer="date")
|
||||
normalized[field_name] = payload["resolved_date"]
|
||||
return normalized
|
||||
|
||||
return normalized
|
||||
|
||||
|
||||
__all__ = ["resolve_time_expression", "resolve_time_expression_data", "normalize_tool_time_arguments", "extract_reference_datetime"]
|
||||
@@ -1,14 +1,30 @@
|
||||
from pathlib import Path
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
from typing import Literal
|
||||
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||
ENV_FILE = REPO_ROOT / ".env"
|
||||
|
||||
|
||||
def _resolve_path(value: str) -> str:
|
||||
path = Path(value)
|
||||
if path.is_absolute():
|
||||
return str(path)
|
||||
return str((REPO_ROOT / path).resolve())
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", extra="ignore")
|
||||
model_config = SettingsConfigDict(
|
||||
env_file=str(ENV_FILE), env_file_encoding="utf-8", extra="ignore"
|
||||
)
|
||||
|
||||
# === 应用基础 ===
|
||||
APP_NAME: str = "Jarvis"
|
||||
APP_VERSION: str = "0.1.0"
|
||||
DEBUG: bool = False
|
||||
HOST: str = "127.0.0.1"
|
||||
PORT: int = 9527
|
||||
|
||||
# === 安全 ===
|
||||
SECRET_KEY: str = "change-me-in-production"
|
||||
@@ -17,10 +33,10 @@ class Settings(BaseSettings):
|
||||
|
||||
# === 数据库 ===
|
||||
DATABASE_URL: str = "sqlite+aiosqlite:///./data/jarvis.db"
|
||||
DATA_DIR: str = "./data"
|
||||
DATA_DIR: str = "data"
|
||||
|
||||
# === ChromaDB ===
|
||||
CHROMA_PERSIST_DIR: str = "./data/chroma"
|
||||
CHROMA_PERSIST_DIR: str = "data/chroma"
|
||||
|
||||
# === LLM 配置 ===
|
||||
# 支持: openai / claude / ollama / deepseek / custom
|
||||
@@ -49,11 +65,20 @@ class Settings(BaseSettings):
|
||||
CORS_ORIGINS: list[str] = ["http://localhost:5173", "http://localhost:3000"]
|
||||
|
||||
# === 文件上传 ===
|
||||
UPLOAD_DIR: str = "./data/uploads"
|
||||
UPLOAD_DIR: str = "data/uploads"
|
||||
MAX_UPLOAD_SIZE: int = 50 * 1024 * 1024
|
||||
MINERU_LANGUAGE: Literal["ch", "en"] = "ch"
|
||||
|
||||
# === 管理员 bootstrap ===
|
||||
ADMIN: str = ""
|
||||
ADMIN_EMAIL: str = ""
|
||||
ADMIN_PASSWORD: str = ""
|
||||
ADMIN_FULL_NAME: str = "Administrator"
|
||||
|
||||
# === 向量化 ===
|
||||
EMBEDDING_MODEL: str = "text-embedding-3-small"
|
||||
EMBEDDING_BASE_URL: str = "https://api.openai.com/v1"
|
||||
EMBEDDING_API_KEY: str = ""
|
||||
CHUNK_SIZE: int = 500
|
||||
CHUNK_OVERLAP: int = 50
|
||||
|
||||
@@ -65,5 +90,20 @@ class Settings(BaseSettings):
|
||||
# === NAS 部署 ===
|
||||
NAS_DATA_ROOT: str = "/data/jarvis"
|
||||
|
||||
# === Web Search / SearxNG ===
|
||||
WEB_SEARCH_ENABLED: bool = False
|
||||
WEB_SEARCH_PROVIDER: str = "searxng"
|
||||
SEARXNG_BASE_URL: str = ""
|
||||
SEARXNG_AUTH_TYPE: Literal["none", "bearer", "basic"] = "none"
|
||||
SEARXNG_AUTH_TOKEN: str = ""
|
||||
SEARXNG_BASIC_USER: str = ""
|
||||
SEARXNG_BASIC_PASSWORD: str = ""
|
||||
WEB_SEARCH_DEFAULT_LIMIT: int = 5
|
||||
WEB_SEARCH_TIMEOUT_SECONDS: int = 10
|
||||
|
||||
|
||||
settings = Settings()
|
||||
settings.DATABASE_URL = settings.DATABASE_URL.replace("./data", _resolve_path("./data"), 1)
|
||||
settings.DATA_DIR = _resolve_path(settings.DATA_DIR)
|
||||
settings.CHROMA_PERSIST_DIR = _resolve_path(settings.CHROMA_PERSIST_DIR)
|
||||
settings.UPLOAD_DIR = _resolve_path(settings.UPLOAD_DIR)
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker
|
||||
from sqlalchemy.orm import DeclarativeBase
|
||||
from app.config import settings
|
||||
import os
|
||||
import re
|
||||
|
||||
os.makedirs(settings.DATA_DIR, exist_ok=True)
|
||||
|
||||
@@ -33,3 +35,205 @@ async def get_db() -> AsyncSession:
|
||||
async def init_db():
|
||||
async with engine.begin() as conn:
|
||||
await conn.run_sync(Base.metadata.create_all)
|
||||
await ensure_log_columns(conn)
|
||||
await ensure_message_columns(conn)
|
||||
await ensure_document_columns(conn)
|
||||
await ensure_user_columns(conn)
|
||||
await ensure_forum_columns(conn)
|
||||
await ensure_agent_columns(conn)
|
||||
await ensure_skill_columns(conn)
|
||||
|
||||
|
||||
async def ensure_log_columns(conn):
|
||||
result = await conn.execute(text("PRAGMA table_info(logs)"))
|
||||
rows = result.fetchall()
|
||||
if not rows:
|
||||
return
|
||||
|
||||
columns = {row[1] for row in rows}
|
||||
required_columns = {
|
||||
"request_id": "ALTER TABLE logs ADD COLUMN request_id VARCHAR(64)",
|
||||
"route": "ALTER TABLE logs ADD COLUMN route VARCHAR(255)",
|
||||
"method": "ALTER TABLE logs ADD COLUMN method VARCHAR(16)",
|
||||
"status_code": "ALTER TABLE logs ADD COLUMN status_code INTEGER",
|
||||
"error_type": "ALTER TABLE logs ADD COLUMN error_type VARCHAR(100)",
|
||||
"operation": "ALTER TABLE logs ADD COLUMN operation VARCHAR(100)",
|
||||
}
|
||||
for column, ddl in required_columns.items():
|
||||
if column not in columns:
|
||||
await conn.execute(text(ddl))
|
||||
|
||||
|
||||
async def ensure_message_columns(conn):
|
||||
result = await conn.execute(text("PRAGMA table_info(messages)"))
|
||||
rows = result.fetchall()
|
||||
if not rows:
|
||||
return
|
||||
|
||||
columns = {row[1] for row in rows}
|
||||
required_columns = {
|
||||
"attachments": "ALTER TABLE messages ADD COLUMN attachments JSON",
|
||||
}
|
||||
for column, ddl in required_columns.items():
|
||||
if column not in columns:
|
||||
await conn.execute(text(ddl))
|
||||
|
||||
|
||||
async def ensure_document_columns(conn):
|
||||
result = await conn.execute(text("PRAGMA table_info(documents)"))
|
||||
rows = result.fetchall()
|
||||
if not rows:
|
||||
return
|
||||
|
||||
columns = {row[1] for row in rows}
|
||||
required_columns = {
|
||||
"ingestion_status": "ALTER TABLE documents ADD COLUMN ingestion_status VARCHAR(50) DEFAULT 'uploaded' NOT NULL",
|
||||
"ingestion_error": "ALTER TABLE documents ADD COLUMN ingestion_error TEXT",
|
||||
"indexed_at": "ALTER TABLE documents ADD COLUMN indexed_at DATETIME",
|
||||
"parser_version": "ALTER TABLE documents ADD COLUMN parser_version VARCHAR(50)",
|
||||
"index_version": "ALTER TABLE documents ADD COLUMN index_version VARCHAR(50)",
|
||||
"normalized_content": "ALTER TABLE documents ADD COLUMN normalized_content TEXT",
|
||||
"normalized_format": "ALTER TABLE documents ADD COLUMN normalized_format VARCHAR(50)",
|
||||
}
|
||||
for column, ddl in required_columns.items():
|
||||
if column not in columns:
|
||||
await conn.execute(text(ddl))
|
||||
|
||||
|
||||
async def ensure_user_columns(conn):
|
||||
rows = await _get_table_info(conn, 'users')
|
||||
if not rows:
|
||||
return
|
||||
|
||||
columns = {row[1] for row in rows}
|
||||
if 'username' not in columns:
|
||||
await conn.execute(text("ALTER TABLE users ADD COLUMN username VARCHAR(255)"))
|
||||
rows = await _get_table_info(conn, 'users')
|
||||
|
||||
await _backfill_usernames(conn)
|
||||
|
||||
username_row = next(row for row in rows if row[1] == 'username')
|
||||
indexes = await _get_index_info(conn, 'users')
|
||||
has_username_index = any(row[1] == 'ix_users_username' and row[2] == 1 for row in indexes)
|
||||
has_email_index = any(row[1] == 'ix_users_email' and row[2] == 1 for row in indexes)
|
||||
|
||||
if username_row[3] != 1 or not has_username_index or not has_email_index:
|
||||
await _rebuild_users_table(conn)
|
||||
|
||||
|
||||
async def ensure_forum_columns(conn):
|
||||
rows = await _get_table_info(conn, 'forum_posts')
|
||||
if not rows:
|
||||
return
|
||||
|
||||
columns = {row[1] for row in rows}
|
||||
required_columns = {
|
||||
"board": "ALTER TABLE forum_posts ADD COLUMN board VARCHAR(100) DEFAULT 'general' NOT NULL",
|
||||
"is_pinned": "ALTER TABLE forum_posts ADD COLUMN is_pinned BOOLEAN DEFAULT 0 NOT NULL",
|
||||
}
|
||||
for column, ddl in required_columns.items():
|
||||
if column not in columns:
|
||||
await conn.execute(text(ddl))
|
||||
|
||||
indexes = await _get_index_info(conn, 'forum_posts')
|
||||
index_names = {row[1] for row in indexes}
|
||||
if 'ix_forum_posts_board' not in index_names:
|
||||
await conn.execute(text("CREATE INDEX IF NOT EXISTS ix_forum_posts_board ON forum_posts (board)"))
|
||||
|
||||
|
||||
async def ensure_agent_columns(conn):
|
||||
rows = await _get_table_info(conn, 'agents')
|
||||
if not rows:
|
||||
return
|
||||
|
||||
columns = {row[1] for row in rows}
|
||||
required_columns = {
|
||||
'selected_skill_ids': "ALTER TABLE agents ADD COLUMN selected_skill_ids JSON DEFAULT '[]' NOT NULL",
|
||||
}
|
||||
for column, ddl in required_columns.items():
|
||||
if column not in columns:
|
||||
await conn.execute(text(ddl))
|
||||
|
||||
|
||||
async def ensure_skill_columns(conn):
|
||||
rows = await _get_table_info(conn, 'skills')
|
||||
if not rows:
|
||||
return
|
||||
|
||||
columns = {row[1] for row in rows}
|
||||
required_columns = {
|
||||
'required_context': "ALTER TABLE skills ADD COLUMN required_context JSON DEFAULT '[]' NOT NULL",
|
||||
'output_format': "ALTER TABLE skills ADD COLUMN output_format TEXT",
|
||||
'is_builtin': "ALTER TABLE skills ADD COLUMN is_builtin BOOLEAN DEFAULT 0 NOT NULL",
|
||||
'team_id': "ALTER TABLE skills ADD COLUMN team_id VARCHAR(36)",
|
||||
}
|
||||
for column, ddl in required_columns.items():
|
||||
if column not in columns:
|
||||
await conn.execute(text(ddl))
|
||||
|
||||
await conn.execute(text("UPDATE skills SET agent_type = 'schedule_planner' WHERE agent_type = 'planner'"))
|
||||
builtin_names = [
|
||||
'今日重点拆解',
|
||||
'周计划编排',
|
||||
'时间冲突分析',
|
||||
'任务执行 SOP',
|
||||
'外部交互推进',
|
||||
'知识检索摘要',
|
||||
'图谱沉淀策略',
|
||||
'风险识别模板',
|
||||
'趋势洞察模板',
|
||||
]
|
||||
for name in builtin_names:
|
||||
await conn.execute(
|
||||
text("UPDATE skills SET is_builtin = 1 WHERE name = :name"),
|
||||
{'name': name},
|
||||
)
|
||||
|
||||
|
||||
async def _backfill_usernames(conn):
|
||||
result = await conn.execute(text("SELECT id, email, username FROM users ORDER BY created_at, id"))
|
||||
users = result.fetchall()
|
||||
seen_usernames: set[str] = set()
|
||||
|
||||
for user_id, email, username in users:
|
||||
if username:
|
||||
seen_usernames.add(username)
|
||||
continue
|
||||
|
||||
base_username = _slugify_username((email or '').split('@', 1)[0])
|
||||
candidate = base_username
|
||||
suffix = 2
|
||||
while candidate in seen_usernames:
|
||||
candidate = f"{base_username}_{suffix}"
|
||||
suffix += 1
|
||||
|
||||
await conn.execute(
|
||||
text("UPDATE users SET username = :username WHERE id = :user_id AND username IS NULL"),
|
||||
{"username": candidate, "user_id": user_id},
|
||||
)
|
||||
seen_usernames.add(candidate)
|
||||
|
||||
|
||||
async def _rebuild_users_table(conn):
|
||||
await conn.execute(text("CREATE TABLE users__new (id VARCHAR(36) PRIMARY KEY, username VARCHAR(255) NOT NULL, email VARCHAR(255) NOT NULL, hashed_password VARCHAR(255) NOT NULL, full_name VARCHAR(255), is_active BOOLEAN NOT NULL DEFAULT 1, is_superuser BOOLEAN NOT NULL DEFAULT 0, llm_config JSON, scheduler_config JSON, created_at DATETIME NOT NULL, updated_at DATETIME NOT NULL)"))
|
||||
await conn.execute(text("INSERT INTO users__new (id, username, email, hashed_password, full_name, is_active, is_superuser, llm_config, scheduler_config, created_at, updated_at) SELECT id, username, email, hashed_password, full_name, COALESCE(is_active, 1), COALESCE(is_superuser, 0), llm_config, scheduler_config, COALESCE(created_at, CURRENT_TIMESTAMP), COALESCE(updated_at, CURRENT_TIMESTAMP) FROM users"))
|
||||
await conn.execute(text("DROP TABLE users"))
|
||||
await conn.execute(text("ALTER TABLE users__new RENAME TO users"))
|
||||
await conn.execute(text("CREATE UNIQUE INDEX IF NOT EXISTS ix_users_username ON users (username)"))
|
||||
await conn.execute(text("CREATE UNIQUE INDEX IF NOT EXISTS ix_users_email ON users (email)"))
|
||||
|
||||
|
||||
async def _get_table_info(conn, table_name: str):
|
||||
result = await conn.execute(text(f"PRAGMA table_info({table_name})"))
|
||||
return result.fetchall()
|
||||
|
||||
|
||||
async def _get_index_info(conn, table_name: str):
|
||||
result = await conn.execute(text(f"PRAGMA index_list({table_name})"))
|
||||
return result.fetchall()
|
||||
|
||||
|
||||
def _slugify_username(value: str) -> str:
|
||||
normalized = re.sub(r'[^a-z0-9_]+', '_', value.strip().lower())
|
||||
normalized = re.sub(r'_+', '_', normalized).strip('_')
|
||||
return normalized or 'user'
|
||||
|
||||
282
backend/app/logging_utils.py
Normal file
282
backend/app/logging_utils.py
Normal file
@@ -0,0 +1,282 @@
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
import traceback
|
||||
import uuid
|
||||
from contextvars import ContextVar
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from fastapi import Request
|
||||
from fastapi.exceptions import RequestValidationError
|
||||
from fastapi.responses import JSONResponse
|
||||
from starlette.exceptions import HTTPException as StarletteHTTPException
|
||||
|
||||
from app.config import settings
|
||||
from app.database import async_session
|
||||
from app.services.log_service import LogService
|
||||
|
||||
request_id_ctx: ContextVar[str] = ContextVar("request_id", default="-")
|
||||
request_user_ctx: ContextVar[str] = ContextVar("request_user", default="anonymous")
|
||||
request_path_ctx: ContextVar[str] = ContextVar("request_path", default="-")
|
||||
request_method_ctx: ContextVar[str] = ContextVar("request_method", default="-")
|
||||
|
||||
logger = logging.getLogger("jarvis.request")
|
||||
|
||||
SENSITIVE_KEYS = {"api_key", "authorization", "password", "current_password", "token", "access_token"}
|
||||
DB_LOG_EXCLUDED_PATH_PREFIXES = ("/api/logs",)
|
||||
|
||||
|
||||
class RequestContextFilter(logging.Filter):
|
||||
def filter(self, record: logging.LogRecord) -> bool:
|
||||
record.request_id = request_id_ctx.get()
|
||||
record.user_id = request_user_ctx.get()
|
||||
record.path = request_path_ctx.get()
|
||||
record.method = request_method_ctx.get()
|
||||
return True
|
||||
|
||||
|
||||
class JsonFormatter(logging.Formatter):
|
||||
def format(self, record: logging.LogRecord) -> str:
|
||||
payload = {
|
||||
"time": datetime.now(timezone.utc).isoformat(),
|
||||
"level": record.levelname,
|
||||
"logger": record.name,
|
||||
"message": record.getMessage(),
|
||||
"request_id": getattr(record, "request_id", request_id_ctx.get()),
|
||||
"user_id": getattr(record, "user_id", request_user_ctx.get()),
|
||||
"method": getattr(record, "method", request_method_ctx.get()),
|
||||
"path": getattr(record, "path", request_path_ctx.get()),
|
||||
}
|
||||
status_code = getattr(record, "status_code", None)
|
||||
duration_ms = getattr(record, "duration_ms", None)
|
||||
extra_details = getattr(record, "details", None)
|
||||
if status_code is not None:
|
||||
payload["status_code"] = status_code
|
||||
if duration_ms is not None:
|
||||
payload["duration_ms"] = duration_ms
|
||||
if extra_details is not None:
|
||||
payload["details"] = extra_details
|
||||
if record.exc_info:
|
||||
payload["exception"] = self.formatException(record.exc_info)
|
||||
return json.dumps(payload, ensure_ascii=False)
|
||||
|
||||
|
||||
class TextFormatter(logging.Formatter):
|
||||
def format(self, record: logging.LogRecord) -> str:
|
||||
record.request_id = getattr(record, "request_id", request_id_ctx.get())
|
||||
record.user_id = getattr(record, "user_id", request_user_ctx.get())
|
||||
record.path = getattr(record, "path", request_path_ctx.get())
|
||||
record.method = getattr(record, "method", request_method_ctx.get())
|
||||
if not hasattr(record, "status_code"):
|
||||
record.status_code = "-"
|
||||
if not hasattr(record, "duration_ms"):
|
||||
record.duration_ms = "-"
|
||||
return super().format(record)
|
||||
|
||||
|
||||
def setup_logging(debug: bool = False) -> None:
|
||||
root_logger = logging.getLogger()
|
||||
if getattr(root_logger, "_jarvis_configured", False):
|
||||
return
|
||||
|
||||
handler = logging.StreamHandler()
|
||||
handler.addFilter(RequestContextFilter())
|
||||
if debug:
|
||||
formatter = TextFormatter(
|
||||
"%(asctime)s | %(levelname)s | %(name)s | request_id=%(request_id)s | user=%(user_id)s | %(method)s %(path)s | status=%(status_code)s | duration=%(duration_ms)s | %(message)s"
|
||||
)
|
||||
else:
|
||||
formatter = JsonFormatter()
|
||||
handler.setFormatter(formatter)
|
||||
|
||||
root_logger.handlers.clear()
|
||||
root_logger.addHandler(handler)
|
||||
root_logger.setLevel(logging.DEBUG if debug else logging.INFO)
|
||||
logging.getLogger("uvicorn.access").setLevel(logging.WARNING)
|
||||
logging.getLogger("sqlalchemy.engine").setLevel(logging.INFO if debug else logging.WARNING)
|
||||
root_logger._jarvis_configured = True
|
||||
|
||||
|
||||
def mask_sensitive(value: Any) -> Any:
|
||||
if isinstance(value, dict):
|
||||
return {k: ("[masked]" if k.lower() in SENSITIVE_KEYS else mask_sensitive(v)) for k, v in value.items()}
|
||||
if isinstance(value, list):
|
||||
return [mask_sensitive(item) for item in value]
|
||||
return value
|
||||
|
||||
|
||||
def summarize_llm_config(config: dict | None) -> dict:
|
||||
if not config:
|
||||
return {}
|
||||
summary: dict[str, Any] = {}
|
||||
for key, value in config.items():
|
||||
if isinstance(value, list):
|
||||
summary[key] = {
|
||||
"count": len(value),
|
||||
"items": [
|
||||
{
|
||||
"name": item.get("name", ""),
|
||||
"provider": item.get("provider", ""),
|
||||
"model": item.get("model", ""),
|
||||
"has_base_url": bool(item.get("base_url")),
|
||||
"has_api_key": bool(item.get("api_key")),
|
||||
"enabled": item.get("enabled"),
|
||||
}
|
||||
for item in value
|
||||
],
|
||||
}
|
||||
else:
|
||||
summary[key] = mask_sensitive(value)
|
||||
return summary
|
||||
|
||||
|
||||
def should_persist_request_log(path: str) -> bool:
|
||||
return not any(path.startswith(prefix) for prefix in DB_LOG_EXCLUDED_PATH_PREFIXES)
|
||||
|
||||
|
||||
async def persist_system_log(**kwargs) -> None:
|
||||
try:
|
||||
async with async_session() as session:
|
||||
await LogService(session).system_log(**kwargs)
|
||||
except Exception:
|
||||
logger.exception("persist_system_log_failed")
|
||||
|
||||
|
||||
def build_cors_headers(request: Request) -> dict[str, str]:
|
||||
origin = request.headers.get("origin")
|
||||
if not origin:
|
||||
return {}
|
||||
if "*" in settings.CORS_ORIGINS or origin in settings.CORS_ORIGINS:
|
||||
return {
|
||||
"Access-Control-Allow-Origin": origin,
|
||||
"Access-Control-Allow-Credentials": "true",
|
||||
"Vary": "Origin",
|
||||
}
|
||||
return {}
|
||||
|
||||
|
||||
async def request_logging_middleware(request: Request, call_next):
|
||||
request_id = request.headers.get("X-Request-ID") or str(uuid.uuid4())
|
||||
request.state.request_id = request_id
|
||||
request_id_token = request_id_ctx.set(request_id)
|
||||
path_token = request_path_ctx.set(request.url.path)
|
||||
method_token = request_method_ctx.set(request.method)
|
||||
start = time.perf_counter()
|
||||
response = None
|
||||
|
||||
logger.info(
|
||||
"request_started",
|
||||
extra={
|
||||
"details": {
|
||||
"query": dict(request.query_params),
|
||||
"client": request.client.host if request.client else None,
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
response = await call_next(request)
|
||||
duration_ms = int((time.perf_counter() - start) * 1000)
|
||||
user_id = getattr(request.state, "user_id", "anonymous")
|
||||
request_user_ctx.set(user_id)
|
||||
response.headers["X-Request-ID"] = request_id
|
||||
logger.info(
|
||||
"request_completed",
|
||||
extra={
|
||||
"status_code": response.status_code,
|
||||
"duration_ms": duration_ms,
|
||||
},
|
||||
)
|
||||
if should_persist_request_log(request.url.path):
|
||||
await persist_system_log(
|
||||
message="request_completed",
|
||||
source="http",
|
||||
user_id=user_id if user_id != "anonymous" else None,
|
||||
request_id=request_id,
|
||||
route=request.url.path,
|
||||
method=request.method,
|
||||
status_code=response.status_code,
|
||||
operation="http.request",
|
||||
duration_ms=duration_ms,
|
||||
details={
|
||||
"query": dict(request.query_params),
|
||||
"client": request.client.host if request.client else None,
|
||||
},
|
||||
)
|
||||
return response
|
||||
finally:
|
||||
request_id_ctx.reset(request_id_token)
|
||||
request_path_ctx.reset(path_token)
|
||||
request_method_ctx.reset(method_token)
|
||||
request_user_ctx.set("anonymous")
|
||||
|
||||
|
||||
async def log_http_exception(request: Request, exc: StarletteHTTPException):
|
||||
request_id = getattr(request.state, "request_id", request_id_ctx.get())
|
||||
logger.warning(
|
||||
"http_exception",
|
||||
extra={
|
||||
"status_code": exc.status_code,
|
||||
"details": {"detail": exc.detail},
|
||||
},
|
||||
)
|
||||
headers = {"X-Request-ID": request_id, **build_cors_headers(request)}
|
||||
return JSONResponse(
|
||||
status_code=exc.status_code,
|
||||
content={"detail": exc.detail, "request_id": request_id},
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
|
||||
async def log_validation_exception(request: Request, exc: RequestValidationError):
|
||||
request_id = getattr(request.state, "request_id", request_id_ctx.get())
|
||||
logger.warning(
|
||||
"validation_exception",
|
||||
extra={
|
||||
"status_code": 422,
|
||||
"details": {"errors": exc.errors()},
|
||||
},
|
||||
)
|
||||
headers = {"X-Request-ID": request_id, **build_cors_headers(request)}
|
||||
return JSONResponse(
|
||||
status_code=422,
|
||||
content={"detail": exc.errors(), "request_id": request_id},
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
|
||||
async def log_unhandled_exception(request: Request, exc: Exception):
|
||||
request_id = getattr(request.state, "request_id", request_id_ctx.get())
|
||||
user_id = getattr(request.state, "user_id", None)
|
||||
details = {
|
||||
"error_type": exc.__class__.__name__,
|
||||
"error": str(exc),
|
||||
"traceback": traceback.format_exc(),
|
||||
}
|
||||
logger.error(
|
||||
"unhandled_exception",
|
||||
extra={
|
||||
"status_code": 500,
|
||||
"details": details,
|
||||
},
|
||||
)
|
||||
if should_persist_request_log(request.url.path):
|
||||
await persist_system_log(
|
||||
message="unhandled_exception",
|
||||
source="http",
|
||||
user_id=user_id if user_id not in (None, "anonymous") else None,
|
||||
request_id=request_id,
|
||||
route=request.url.path,
|
||||
method=request.method,
|
||||
status_code=500,
|
||||
error_type=exc.__class__.__name__,
|
||||
operation="http.request",
|
||||
details=details,
|
||||
)
|
||||
headers = {"X-Request-ID": request_id, **build_cors_headers(request)}
|
||||
return JSONResponse(
|
||||
status_code=500,
|
||||
content={"detail": "服务器内部错误", "request_id": request_id},
|
||||
headers=headers,
|
||||
)
|
||||
@@ -1,7 +1,10 @@
|
||||
from contextlib import asynccontextmanager
|
||||
from fastapi import FastAPI
|
||||
from fastapi.exceptions import RequestValidationError
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from app.database import init_db
|
||||
from starlette.exceptions import HTTPException as StarletteHTTPException
|
||||
from app.database import init_db, async_session
|
||||
import app.models # noqa: F401 - 注册所有模型
|
||||
from app.routers import (
|
||||
auth_router,
|
||||
conversation_router,
|
||||
@@ -11,23 +14,66 @@ from app.routers import (
|
||||
graph_router,
|
||||
agent_router,
|
||||
todo_router,
|
||||
reminder_router,
|
||||
goal_router,
|
||||
schedule_center_router,
|
||||
settings_router,
|
||||
folder_router,
|
||||
skill_router,
|
||||
log_router,
|
||||
system_router,
|
||||
brain_router,
|
||||
)
|
||||
from app.routers.scheduler import router as scheduler_router
|
||||
from app.services.scheduler_service import start_scheduler, stop_scheduler, get_scheduler_status
|
||||
from app.services.admin_bootstrap_service import ensure_admin_user, ensure_builtin_skills
|
||||
from app.config import settings
|
||||
from app.logging_utils import (
|
||||
setup_logging,
|
||||
request_logging_middleware,
|
||||
log_http_exception,
|
||||
log_validation_exception,
|
||||
log_unhandled_exception,
|
||||
persist_system_log,
|
||||
)
|
||||
import os
|
||||
|
||||
|
||||
INSECURE_SECRET_KEYS = {
|
||||
'change-me-in-production',
|
||||
'change-me-to-a-random-secret-key',
|
||||
'jarvis-secret-key-change-in-production',
|
||||
}
|
||||
|
||||
|
||||
def validate_startup_security() -> None:
|
||||
if not settings.DEBUG and settings.SECRET_KEY in INSECURE_SECRET_KEYS:
|
||||
raise RuntimeError('SECRET_KEY must be changed before running with DEBUG disabled')
|
||||
|
||||
|
||||
async def run_startup() -> None:
|
||||
validate_startup_security()
|
||||
await init_db()
|
||||
async with async_session() as session:
|
||||
await ensure_admin_user(session, settings)
|
||||
await ensure_builtin_skills(session)
|
||||
await persist_system_log(
|
||||
message="application_started",
|
||||
source="app",
|
||||
operation="app.startup",
|
||||
details={"version": settings.APP_VERSION},
|
||||
)
|
||||
start_scheduler()
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
# 启动
|
||||
setup_logging(settings.DEBUG)
|
||||
os.makedirs(settings.DATA_DIR, exist_ok=True)
|
||||
os.makedirs(settings.UPLOAD_DIR, exist_ok=True)
|
||||
os.makedirs(settings.CHROMA_PERSIST_DIR, exist_ok=True)
|
||||
await init_db()
|
||||
start_scheduler()
|
||||
await run_startup()
|
||||
yield
|
||||
# 关闭
|
||||
stop_scheduler()
|
||||
@@ -47,6 +93,10 @@ app.add_middleware(
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
app.middleware("http")(request_logging_middleware)
|
||||
app.add_exception_handler(StarletteHTTPException, log_http_exception)
|
||||
app.add_exception_handler(RequestValidationError, log_validation_exception)
|
||||
app.add_exception_handler(Exception, log_unhandled_exception)
|
||||
|
||||
# 注册路由
|
||||
app.include_router(auth_router)
|
||||
@@ -57,8 +107,15 @@ app.include_router(forum_router)
|
||||
app.include_router(graph_router)
|
||||
app.include_router(agent_router)
|
||||
app.include_router(todo_router)
|
||||
app.include_router(reminder_router)
|
||||
app.include_router(goal_router)
|
||||
app.include_router(schedule_center_router)
|
||||
app.include_router(settings_router)
|
||||
app.include_router(folder_router)
|
||||
app.include_router(skill_router)
|
||||
app.include_router(log_router)
|
||||
app.include_router(system_router)
|
||||
app.include_router(brain_router)
|
||||
app.include_router(scheduler_router)
|
||||
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from app.models.base import Base
|
||||
from app.models.user import User
|
||||
from app.models.folder import Folder
|
||||
from app.models.document import Document, DocumentChunk
|
||||
from app.models.task import Task, TaskHistory
|
||||
from app.models.forum import ForumPost, ForumReply
|
||||
@@ -7,11 +8,24 @@ from app.models.agent import Agent, AgentMessage
|
||||
from app.models.conversation import Conversation, Message
|
||||
from app.models.knowledge_graph import KGNode, KGEdge
|
||||
from app.models.memory import MemorySummary, UserMemory
|
||||
from app.models.brain import (
|
||||
BrainEvent,
|
||||
BrainCandidate,
|
||||
BrainMemory,
|
||||
BrainTag,
|
||||
brain_event_tags,
|
||||
brain_memory_tags,
|
||||
brain_memory_sources,
|
||||
)
|
||||
from app.models.todo import DailyTodo, TodoSource
|
||||
from app.models.reminder import Reminder, ReminderStatus
|
||||
from app.models.goal import Goal, GoalStatus
|
||||
from app.models.log import Log, LogType, LogLevel
|
||||
|
||||
__all__ = [
|
||||
"Base",
|
||||
"User",
|
||||
"Folder",
|
||||
"Document",
|
||||
"DocumentChunk",
|
||||
"Task",
|
||||
@@ -26,6 +40,20 @@ __all__ = [
|
||||
"KGEdge",
|
||||
"MemorySummary",
|
||||
"UserMemory",
|
||||
"BrainEvent",
|
||||
"BrainCandidate",
|
||||
"BrainMemory",
|
||||
"BrainTag",
|
||||
"brain_event_tags",
|
||||
"brain_memory_tags",
|
||||
"brain_memory_sources",
|
||||
"DailyTodo",
|
||||
"TodoSource",
|
||||
"Reminder",
|
||||
"ReminderStatus",
|
||||
"Goal",
|
||||
"GoalStatus",
|
||||
"Log",
|
||||
"LogType",
|
||||
"LogLevel",
|
||||
]
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from sqlalchemy import Column, String, Text, Boolean, ForeignKey, Integer
|
||||
from sqlalchemy import Column, String, Text, Boolean, ForeignKey, Integer, JSON
|
||||
from sqlalchemy.orm import relationship
|
||||
from app.models.base import BaseModel
|
||||
|
||||
@@ -7,9 +7,10 @@ class Agent(BaseModel):
|
||||
__tablename__ = "agents"
|
||||
|
||||
name = Column(String(100), nullable=False)
|
||||
role = Column(String(100), nullable=False) # master, planner, executor, librarian, analyst
|
||||
role = Column(String(100), nullable=False) # master, schedule_planner, executor, librarian, analyst
|
||||
description = Column(Text, nullable=True)
|
||||
system_prompt = Column(Text, nullable=False)
|
||||
selected_skill_ids = Column(JSON, default=list, nullable=False)
|
||||
is_active = Column(Boolean, default=True)
|
||||
is_default = Column(Boolean, default=False)
|
||||
|
||||
|
||||
@@ -1,12 +1,16 @@
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from datetime import UTC, datetime
|
||||
from sqlalchemy import Column, String, DateTime
|
||||
from app.database import Base
|
||||
|
||||
|
||||
def utc_now() -> datetime:
|
||||
return datetime.now(UTC)
|
||||
|
||||
|
||||
class BaseModel(Base):
|
||||
__abstract__ = True
|
||||
|
||||
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
||||
created_at = Column(DateTime, default=utc_now, nullable=False)
|
||||
updated_at = Column(DateTime, default=utc_now, onupdate=utc_now, nullable=False)
|
||||
|
||||
93
backend/app/models/brain.py
Normal file
93
backend/app/models/brain.py
Normal file
@@ -0,0 +1,93 @@
|
||||
from sqlalchemy import Column, DateTime, Float, ForeignKey, Integer, String, Table, Text
|
||||
from sqlalchemy.dialects.sqlite import JSON
|
||||
|
||||
from app.database import Base
|
||||
from app.models.base import BaseModel, utc_now
|
||||
|
||||
|
||||
brain_event_tags = Table(
|
||||
"brain_event_tags",
|
||||
Base.metadata,
|
||||
Column("event_id", String(36), ForeignKey("brain_events.id"), primary_key=True),
|
||||
Column("tag_id", String(36), ForeignKey("brain_tags.id"), primary_key=True),
|
||||
)
|
||||
|
||||
brain_memory_tags = Table(
|
||||
"brain_memory_tags",
|
||||
Base.metadata,
|
||||
Column("memory_id", String(36), ForeignKey("brain_memories.id"), primary_key=True),
|
||||
Column("tag_id", String(36), ForeignKey("brain_tags.id"), primary_key=True),
|
||||
)
|
||||
|
||||
brain_memory_sources = Table(
|
||||
"brain_memory_sources",
|
||||
Base.metadata,
|
||||
Column("memory_id", String(36), ForeignKey("brain_memories.id"), primary_key=True),
|
||||
Column("event_id", String(36), ForeignKey("brain_events.id"), primary_key=True),
|
||||
)
|
||||
|
||||
|
||||
class BrainEvent(BaseModel):
|
||||
__tablename__ = "brain_events"
|
||||
|
||||
user_id = Column(String(36), ForeignKey("users.id"), nullable=False, index=True)
|
||||
source_type = Column(String(50), nullable=False, index=True)
|
||||
source_id = Column(String(36), nullable=False, index=True)
|
||||
event_type = Column(String(50), nullable=False, index=True)
|
||||
title = Column(String(255), nullable=True)
|
||||
content_summary = Column(Text, nullable=True)
|
||||
raw_excerpt = Column(Text, nullable=True)
|
||||
metadata_ = Column(JSON, nullable=True)
|
||||
importance_signal = Column(Float, default=0.0, nullable=False)
|
||||
is_user_pinned = Column(Integer, default=0, nullable=False)
|
||||
occurred_at = Column(DateTime, default=utc_now, nullable=False, index=True)
|
||||
processed_at = Column(DateTime, nullable=True)
|
||||
status = Column(String(20), default="pending", nullable=False, index=True)
|
||||
|
||||
|
||||
class BrainCandidate(BaseModel):
|
||||
__tablename__ = "brain_candidates"
|
||||
|
||||
user_id = Column(String(36), ForeignKey("users.id"), nullable=False, index=True)
|
||||
candidate_type = Column(String(50), nullable=False, index=True)
|
||||
title = Column(String(255), nullable=False)
|
||||
summary = Column(Text, nullable=False)
|
||||
importance_score = Column(Float, default=0.0, nullable=False)
|
||||
confidence_score = Column(Float, default=0.0, nullable=False)
|
||||
time_scope = Column(String(20), default="short_term", nullable=False)
|
||||
valid_from = Column(DateTime, nullable=True)
|
||||
valid_to = Column(DateTime, nullable=True)
|
||||
source_event_ids = Column(JSON, nullable=True)
|
||||
reasoning_trace = Column(Text, nullable=True)
|
||||
status = Column(String(20), default="new", nullable=False, index=True)
|
||||
reviewed_at = Column(DateTime, nullable=True)
|
||||
|
||||
|
||||
class BrainMemory(BaseModel):
|
||||
__tablename__ = "brain_memories"
|
||||
|
||||
user_id = Column(String(36), ForeignKey("users.id"), nullable=False, index=True)
|
||||
memory_type = Column(String(50), nullable=False, index=True)
|
||||
title = Column(String(255), nullable=False)
|
||||
content = Column(Text, nullable=False)
|
||||
importance = Column(Integer, default=5, nullable=False)
|
||||
confidence = Column(Float, default=0.0, nullable=False)
|
||||
timeline_date = Column(DateTime, nullable=True)
|
||||
first_learned_at = Column(DateTime, default=utc_now, nullable=False)
|
||||
last_reinforced_at = Column(DateTime, nullable=True)
|
||||
reinforcement_count = Column(Integer, default=0, nullable=False)
|
||||
status = Column(String(20), default="active", nullable=False, index=True)
|
||||
origin_candidate_id = Column(String(36), ForeignKey("brain_candidates.id"), nullable=True)
|
||||
origin_source_types = Column(JSON, nullable=True)
|
||||
metadata_ = Column(JSON, nullable=True)
|
||||
|
||||
|
||||
class BrainTag(BaseModel):
|
||||
__tablename__ = "brain_tags"
|
||||
|
||||
user_id = Column(String(36), ForeignKey("users.id"), nullable=False, index=True)
|
||||
name = Column(String(100), nullable=False, index=True)
|
||||
category = Column(String(50), nullable=False)
|
||||
priority = Column(String(20), default="secondary", nullable=False, index=True)
|
||||
score = Column(Float, default=0.0, nullable=False)
|
||||
last_seen_at = Column(DateTime, nullable=True)
|
||||
@@ -1,4 +1,4 @@
|
||||
from sqlalchemy import Column, String, Integer, Text, ForeignKey, Boolean
|
||||
from sqlalchemy import Column, String, Integer, Text, ForeignKey, Boolean, DateTime
|
||||
from sqlalchemy.orm import relationship
|
||||
from app.models.base import BaseModel
|
||||
|
||||
@@ -16,6 +16,13 @@ class Document(BaseModel):
|
||||
summary = Column(Text, nullable=True)
|
||||
chunk_count = Column(Integer, default=0)
|
||||
is_indexed = Column(Boolean, default=False)
|
||||
ingestion_status = Column(String(50), default="uploaded", nullable=False)
|
||||
ingestion_error = Column(Text, nullable=True)
|
||||
indexed_at = Column(DateTime, nullable=True)
|
||||
parser_version = Column(String(50), nullable=True)
|
||||
index_version = Column(String(50), nullable=True)
|
||||
normalized_content = Column(Text, nullable=True)
|
||||
normalized_format = Column(String(50), nullable=True)
|
||||
|
||||
chunks = relationship("DocumentChunk", back_populates="document", cascade="all, delete-orphan")
|
||||
|
||||
|
||||
21
backend/app/models/goal.py
Normal file
21
backend/app/models/goal.py
Normal file
@@ -0,0 +1,21 @@
|
||||
from enum import Enum as PyEnum
|
||||
|
||||
from sqlalchemy import Column, Enum, ForeignKey, String, Text
|
||||
|
||||
from app.models.base import BaseModel
|
||||
|
||||
|
||||
class GoalStatus(str, PyEnum):
|
||||
ACTIVE = "active"
|
||||
DONE = "done"
|
||||
ARCHIVED = "archived"
|
||||
|
||||
|
||||
class Goal(BaseModel):
|
||||
__tablename__ = "goals"
|
||||
|
||||
user_id = Column(String(36), ForeignKey("users.id"), nullable=False, index=True)
|
||||
title = Column(String(255), nullable=False)
|
||||
note = Column(Text, nullable=True)
|
||||
goal_date = Column(String(10), nullable=False, index=True)
|
||||
status = Column(Enum(GoalStatus), default=GoalStatus.ACTIVE, nullable=False)
|
||||
41
backend/app/models/log.py
Normal file
41
backend/app/models/log.py
Normal file
@@ -0,0 +1,41 @@
|
||||
from sqlalchemy import Column, String, Text, Integer, Index
|
||||
from app.models.base import BaseModel
|
||||
import enum
|
||||
|
||||
|
||||
class LogLevel(str, enum.Enum):
|
||||
DEBUG = "debug"
|
||||
INFO = "info"
|
||||
WARNING = "warning"
|
||||
ERROR = "error"
|
||||
|
||||
|
||||
class LogType(str, enum.Enum):
|
||||
AGENT = "agent" # 智能体调用
|
||||
SYSTEM = "system" # 系统运行
|
||||
CHAT = "chat" # 问答对话
|
||||
|
||||
|
||||
class Log(BaseModel):
|
||||
__tablename__ = "logs"
|
||||
|
||||
level = Column(String(20), default=LogLevel.INFO.value, index=True) # debug/info/warning/error
|
||||
type = Column(String(20), default=LogType.SYSTEM.value, index=True) # agent/system/chat
|
||||
user_id = Column(String(36), nullable=True, index=True) # 关联用户
|
||||
request_id = Column(String(64), nullable=True, index=True)
|
||||
route = Column(String(255), nullable=True, index=True)
|
||||
method = Column(String(16), nullable=True, index=True)
|
||||
status_code = Column(Integer, nullable=True, index=True)
|
||||
error_type = Column(String(100), nullable=True)
|
||||
operation = Column(String(100), nullable=True, index=True)
|
||||
message = Column(Text, nullable=False) # 日志内容
|
||||
details = Column(Text, nullable=True) # 详细信息(JSON)
|
||||
source = Column(String(100), nullable=True) # 来源模块
|
||||
duration_ms = Column(Integer, nullable=True) # 执行耗时
|
||||
|
||||
__table_args__ = (
|
||||
Index('idx_logs_type_level', 'type', 'level'),
|
||||
Index('idx_logs_created_at', 'created_at'),
|
||||
Index('idx_logs_request_id', 'request_id'),
|
||||
Index('idx_logs_operation_status', 'operation', 'status_code'),
|
||||
)
|
||||
@@ -1,6 +1,5 @@
|
||||
from sqlalchemy import Column, String, Text, Integer, ForeignKey, Boolean, DateTime, Enum as SQLEnum
|
||||
from datetime import datetime
|
||||
from app.models.base import BaseModel
|
||||
from app.models.base import BaseModel, utc_now
|
||||
|
||||
|
||||
class MemorySummary(BaseModel):
|
||||
@@ -14,7 +13,7 @@ class MemorySummary(BaseModel):
|
||||
conversation_id = Column(String(36), ForeignKey("conversations.id"), nullable=False, index=True)
|
||||
summary_text = Column(Text, nullable=False) # 摘要内容
|
||||
turn_count = Column(Integer, default=0) # 摘要时累计轮数
|
||||
summary_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
summary_at = Column(DateTime, default=utc_now, nullable=False)
|
||||
|
||||
|
||||
class UserMemory(BaseModel):
|
||||
@@ -31,5 +30,5 @@ class UserMemory(BaseModel):
|
||||
is_recalled = Column(Boolean, default=False) # 是否在当前对话中被召回
|
||||
recall_count = Column(Integer, default=0) # 被召回次数
|
||||
source_conversation_id = Column(String(36), nullable=True) # 来源对话
|
||||
extracted_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
extracted_at = Column(DateTime, default=utc_now, nullable=False)
|
||||
last_recalled_at = Column(DateTime, nullable=True)
|
||||
|
||||
21
backend/app/models/reminder.py
Normal file
21
backend/app/models/reminder.py
Normal file
@@ -0,0 +1,21 @@
|
||||
from enum import Enum as PyEnum
|
||||
|
||||
from sqlalchemy import Boolean, Column, DateTime, Enum, ForeignKey, String, Text
|
||||
|
||||
from app.models.base import BaseModel
|
||||
|
||||
|
||||
class ReminderStatus(str, PyEnum):
|
||||
PENDING = "pending"
|
||||
DONE = "done"
|
||||
|
||||
|
||||
class Reminder(BaseModel):
|
||||
__tablename__ = "reminders"
|
||||
|
||||
user_id = Column(String(36), ForeignKey("users.id"), nullable=False, index=True)
|
||||
title = Column(String(255), nullable=False)
|
||||
note = Column(Text, nullable=True)
|
||||
reminder_at = Column(DateTime, nullable=False, index=True)
|
||||
status = Column(Enum(ReminderStatus), default=ReminderStatus.PENDING, nullable=False)
|
||||
is_dismissed = Column(Boolean, default=False, nullable=False)
|
||||
23
backend/app/models/skill.py
Normal file
23
backend/app/models/skill.py
Normal file
@@ -0,0 +1,23 @@
|
||||
from sqlalchemy import Column, String, Text, Boolean, JSON, ForeignKey
|
||||
from sqlalchemy.orm import relationship
|
||||
from app.models.base import BaseModel
|
||||
|
||||
|
||||
class Skill(BaseModel):
|
||||
__tablename__ = "skills"
|
||||
|
||||
name = Column(String(100), nullable=False, unique=True, index=True)
|
||||
description = Column(Text, nullable=True) # 供 LLM 理解用途
|
||||
instructions = Column(Text, nullable=False) # Agent 执行时的指令模板
|
||||
agent_type = Column(String(50), nullable=False, index=True) # master/schedule_planner/executor/librarian/analyst
|
||||
tools = Column(JSON, default=list) # 引用的工具名称列表
|
||||
required_context = Column(JSON, default=list) # 需要的前置数据
|
||||
output_format = Column(Text, nullable=True) # 输出格式要求
|
||||
visibility = Column(String(20), default="private") # private/team/market
|
||||
is_builtin = Column(Boolean, default=False, nullable=False)
|
||||
team_id = Column(String(36), ForeignKey("users.id"), nullable=True)
|
||||
is_active = Column(Boolean, default=True)
|
||||
owner_id = Column(String(36), ForeignKey("users.id"), nullable=False)
|
||||
|
||||
owner = relationship("User", foreign_keys=[owner_id])
|
||||
team = relationship("User", foreign_keys=[team_id])
|
||||
@@ -5,6 +5,7 @@ from app.models.base import BaseModel
|
||||
class User(BaseModel):
|
||||
__tablename__ = "users"
|
||||
|
||||
username = Column(String(255), unique=True, nullable=False, index=True)
|
||||
email = Column(String(255), unique=True, nullable=False, index=True)
|
||||
hashed_password = Column(String(255), nullable=False)
|
||||
full_name = Column(String(255), nullable=True)
|
||||
|
||||
@@ -6,5 +6,12 @@ from app.routers.forum import router as forum_router
|
||||
from app.routers.graph import router as graph_router
|
||||
from app.routers.agent import router as agent_router
|
||||
from app.routers.todo import router as todo_router
|
||||
from app.routers.reminder import router as reminder_router
|
||||
from app.routers.goal import router as goal_router
|
||||
from app.routers.schedule_center import router as schedule_center_router
|
||||
from app.routers.settings import router as settings_router
|
||||
from app.routers.folder import router as folder_router
|
||||
from app.routers.skill import router as skill_router
|
||||
from app.routers.log import router as log_router
|
||||
from app.routers.system import router as system_router
|
||||
from app.routers.brain import router as brain_router
|
||||
|
||||
@@ -3,19 +3,24 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select
|
||||
from app.database import get_db
|
||||
from app.models.agent import Agent
|
||||
from app.models.skill import Skill
|
||||
from app.models.user import User
|
||||
from app.routers.auth import get_current_user
|
||||
from app.schemas.agent import AgentCreate, AgentOut, AgentStats, AgentConfigUpdate, AgentConfigOut
|
||||
|
||||
router = APIRouter(prefix="/api/agents", tags=["Agent"])
|
||||
|
||||
# 运行时调用统计(内存中,非持久化)
|
||||
_agent_call_counts: dict[str, int] = {}
|
||||
_agent_current_tasks: dict[str, str | None] = {}
|
||||
_agent_statuses: dict[str, str] = {}
|
||||
|
||||
# 默认 Agent 角色列表
|
||||
DEFAULT_AGENT_ROLES = ["master", "planner", "executor", "librarian", "analyst"]
|
||||
DEFAULT_AGENT_ROLES = ["master", "schedule_planner", "executor", "librarian", "analyst"]
|
||||
SUB_COMMANDERS_BY_ROLE = {
|
||||
"schedule_planner": ["schedule_analysis", "schedule_planning"],
|
||||
"executor": ["executor_tasks", "executor_forum"],
|
||||
"librarian": ["librarian_retrieval", "librarian_graph"],
|
||||
"analyst": ["analyst_progress", "analyst_insights"],
|
||||
}
|
||||
|
||||
|
||||
def record_agent_call(agent_id: str):
|
||||
@@ -31,6 +36,15 @@ def set_agent_status(agent_id: str, status: str):
|
||||
_agent_statuses[agent_id] = status
|
||||
|
||||
|
||||
def _build_agent_stats(agent_id: str) -> AgentStats:
|
||||
return AgentStats(
|
||||
agent_id=agent_id,
|
||||
call_count=_agent_call_counts.get(agent_id, 0),
|
||||
current_task=_agent_current_tasks.get(agent_id),
|
||||
status=_agent_statuses.get(agent_id, "idle"),
|
||||
)
|
||||
|
||||
|
||||
@router.get("", response_model=list[AgentOut])
|
||||
async def list_agents(
|
||||
current_user: User = Depends(get_current_user),
|
||||
@@ -42,40 +56,43 @@ async def list_agents(
|
||||
return result.scalars().all()
|
||||
|
||||
|
||||
# ———— 运行时统计(必须在 /{agent_id} 之前)————
|
||||
@router.get("/stats", response_model=list[AgentStats])
|
||||
async def get_agent_stats(
|
||||
current_user: User = Depends(get_current_user),
|
||||
):
|
||||
"""
|
||||
获取各 Agent 的运行时统计(调用次数、当前任务、状态)
|
||||
"""
|
||||
stats = []
|
||||
return [_build_agent_stats(role) for role in DEFAULT_AGENT_ROLES]
|
||||
|
||||
|
||||
@router.get("/stats/hierarchy")
|
||||
async def get_agent_hierarchy_stats(
|
||||
current_user: User = Depends(get_current_user),
|
||||
):
|
||||
main_agents = []
|
||||
for role in DEFAULT_AGENT_ROLES:
|
||||
stats.append(AgentStats(
|
||||
agent_id=role,
|
||||
call_count=_agent_call_counts.get(role, 0),
|
||||
current_task=_agent_current_tasks.get(role),
|
||||
status=_agent_statuses.get(role, "idle"),
|
||||
))
|
||||
return stats
|
||||
if role == "master":
|
||||
continue
|
||||
node = _build_agent_stats(role).model_dump()
|
||||
node["sub_commanders"] = [
|
||||
_build_agent_stats(sub_id).model_dump()
|
||||
for sub_id in SUB_COMMANDERS_BY_ROLE.get(role, [])
|
||||
]
|
||||
main_agents.append(node)
|
||||
return {"main_agents": main_agents}
|
||||
|
||||
|
||||
# ———— 配置管理(必须在 /{agent_id} 之前)————
|
||||
@router.get("/config/{agent_id}", response_model=AgentConfigOut)
|
||||
async def get_agent_config(
|
||||
agent_id: str,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
"""获取单个 Agent 完整配置"""
|
||||
result = await db.execute(select(Agent).where(Agent.role == agent_id))
|
||||
agent = result.scalar_one_or_none()
|
||||
|
||||
if not agent:
|
||||
from app.agents.prompts import MASTER_SYSTEM_PROMPT, PLANNER_SYSTEM_PROMPT, EXECUTOR_SYSTEM_PROMPT, LIBRARIAN_SYSTEM_PROMPT, ANALYST_SYSTEM_PROMPT
|
||||
from app.agents.prompts import MASTER_SYSTEM_PROMPT, SCHEDULE_PLANNER_SYSTEM_PROMPT, EXECUTOR_SYSTEM_PROMPT, LIBRARIAN_SYSTEM_PROMPT, ANALYST_SYSTEM_PROMPT
|
||||
defaults = {
|
||||
"master": ("JARVIS", "主控制核心", MASTER_SYSTEM_PROMPT),
|
||||
"planner": ("PLANNER", "规划专家", PLANNER_SYSTEM_PROMPT),
|
||||
"schedule_planner": ("SCHEDULE PLANNER", "日程规划师", SCHEDULE_PLANNER_SYSTEM_PROMPT),
|
||||
"executor": ("EXECUTOR", "执行专家", EXECUTOR_SYSTEM_PROMPT),
|
||||
"librarian": ("LIBRARIAN", "知识管理员", LIBRARIAN_SYSTEM_PROMPT),
|
||||
"analyst": ("ANALYST", "数据分析师", ANALYST_SYSTEM_PROMPT),
|
||||
@@ -84,8 +101,14 @@ async def get_agent_config(
|
||||
raise HTTPException(status_code=404, detail="Agent 不存在")
|
||||
name, desc, prompt = defaults[agent_id]
|
||||
return AgentConfigOut(
|
||||
id=agent_id, name=name, role=agent_id,
|
||||
description=desc, system_prompt=prompt, enabled=True, is_active=True,
|
||||
id=agent_id,
|
||||
name=name,
|
||||
role=agent_id,
|
||||
description=desc,
|
||||
system_prompt=prompt,
|
||||
enabled=True,
|
||||
is_active=True,
|
||||
selected_skill_ids=[],
|
||||
)
|
||||
return AgentConfigOut(
|
||||
id=agent.role,
|
||||
@@ -95,6 +118,7 @@ async def get_agent_config(
|
||||
system_prompt=agent.system_prompt,
|
||||
enabled=agent.is_active,
|
||||
is_active=agent.is_active,
|
||||
selected_skill_ids=agent.selected_skill_ids or [],
|
||||
)
|
||||
|
||||
|
||||
@@ -105,7 +129,6 @@ async def update_agent_config(
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
"""更新 Agent 配置(名称、描述、提示词、启用状态)"""
|
||||
result = await db.execute(select(Agent).where(Agent.role == agent_id))
|
||||
agent = result.scalar_one_or_none()
|
||||
|
||||
@@ -121,6 +144,19 @@ async def update_agent_config(
|
||||
if data.enabled is not None:
|
||||
agent.is_active = data.enabled
|
||||
_agent_statuses[agent_id] = "disabled" if not data.enabled else "idle"
|
||||
if data.selected_skill_ids is not None:
|
||||
if data.selected_skill_ids:
|
||||
result = await db.execute(
|
||||
select(Skill.id).where(
|
||||
Skill.id.in_(data.selected_skill_ids),
|
||||
Skill.owner_id == current_user.id,
|
||||
)
|
||||
)
|
||||
allowed_skill_ids = set(result.scalars().all())
|
||||
invalid_skill_ids = [skill_id for skill_id in data.selected_skill_ids if skill_id not in allowed_skill_ids]
|
||||
if invalid_skill_ids:
|
||||
raise HTTPException(status_code=400, detail="存在无效的技能绑定")
|
||||
agent.selected_skill_ids = data.selected_skill_ids
|
||||
|
||||
await db.commit()
|
||||
await db.refresh(agent)
|
||||
@@ -132,6 +168,7 @@ async def update_agent_config(
|
||||
system_prompt=agent.system_prompt,
|
||||
enabled=agent.is_active,
|
||||
is_active=agent.is_active,
|
||||
selected_skill_ids=agent.selected_skill_ids or [],
|
||||
)
|
||||
|
||||
|
||||
@@ -163,78 +200,3 @@ async def get_agent(
|
||||
if not agent:
|
||||
raise HTTPException(status_code=404, detail="Agent 不存在")
|
||||
return agent
|
||||
|
||||
|
||||
|
||||
# ———— 配置管理 ————
|
||||
@router.get("/config/{agent_id}", response_model=AgentConfigOut)
|
||||
async def get_agent_config(
|
||||
agent_id: str,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
"""获取单个 Agent 完整配置"""
|
||||
result = await db.execute(select(Agent).where(Agent.role == agent_id))
|
||||
agent = result.scalar_one_or_none()
|
||||
if not agent:
|
||||
# 如果数据库中没有,返回默认配置
|
||||
from app.agents.prompts import MASTER_SYSTEM_PROMPT, PLANNER_SYSTEM_PROMPT, EXECUTOR_SYSTEM_PROMPT, LIBRARIAN_SYSTEM_PROMPT, ANALYST_SYSTEM_PROMPT
|
||||
defaults = {
|
||||
"master": ("JARVIS", "主控制核心", MASTER_SYSTEM_PROMPT),
|
||||
"planner": ("PLANNER", "规划专家", PLANNER_SYSTEM_PROMPT),
|
||||
"executor": ("EXECUTOR", "执行专家", EXECUTOR_SYSTEM_PROMPT),
|
||||
"librarian": ("LIBRARIAN", "知识管理员", LIBRARIAN_SYSTEM_PROMPT),
|
||||
"analyst": ("ANALYST", "数据分析师", ANALYST_SYSTEM_PROMPT),
|
||||
}
|
||||
if agent_id not in defaults:
|
||||
raise HTTPException(status_code=404, detail="Agent 不存在")
|
||||
name, desc, prompt = defaults[agent_id]
|
||||
return AgentConfigOut(
|
||||
id=agent_id, name=name, role=agent_id,
|
||||
description=desc, system_prompt=prompt, enabled=True, is_active=True,
|
||||
)
|
||||
return AgentConfigOut(
|
||||
id=agent.role,
|
||||
name=agent.name,
|
||||
role=agent.role,
|
||||
description=agent.description,
|
||||
system_prompt=agent.system_prompt,
|
||||
enabled=agent.is_active,
|
||||
is_active=agent.is_active,
|
||||
)
|
||||
|
||||
|
||||
@router.put("/config/{agent_id}", response_model=AgentConfigOut)
|
||||
async def update_agent_config(
|
||||
agent_id: str,
|
||||
data: AgentConfigUpdate,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
"""更新 Agent 配置(名称、描述、提示词、启用状态)"""
|
||||
result = await db.execute(select(Agent).where(Agent.role == agent_id))
|
||||
agent = result.scalar_one_or_none()
|
||||
|
||||
if not agent:
|
||||
raise HTTPException(status_code=404, detail="Agent 不存在")
|
||||
|
||||
if data.name is not None:
|
||||
agent.name = data.name
|
||||
if data.description is not None:
|
||||
agent.description = data.description
|
||||
if data.system_prompt is not None:
|
||||
agent.system_prompt = data.system_prompt
|
||||
if data.enabled is not None:
|
||||
agent.is_active = data.enabled
|
||||
_agent_statuses[agent_id] = "disabled" if not data.enabled else "idle"
|
||||
|
||||
await db.commit()
|
||||
await db.refresh(agent)
|
||||
return AgentConfigOut(
|
||||
id=agent.role,
|
||||
name=agent.name,
|
||||
role=agent.role,
|
||||
description=agent.description,
|
||||
system_prompt=agent.system_prompt,
|
||||
enabled=agent.is_active,
|
||||
is_active=agent.is_active,
|
||||
)
|
||||
|
||||
@@ -2,9 +2,11 @@ from fastapi import APIRouter, Depends, HTTPException, status
|
||||
from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
from app.database import get_db
|
||||
from app.models.user import User
|
||||
from app.schemas.auth import UserCreate, UserOut, Token
|
||||
from app.services.admin_bootstrap_service import ensure_builtin_skills
|
||||
from app.services.auth_service import verify_password, get_password_hash, create_access_token, decode_token
|
||||
from app.config import settings
|
||||
|
||||
@@ -32,52 +34,77 @@ async def get_current_user(
|
||||
|
||||
@router.post("/register", response_model=UserOut, status_code=status.HTTP_201_CREATED)
|
||||
async def register(user_data: UserCreate, db: AsyncSession = Depends(get_db)):
|
||||
# 检查邮箱是否已存在
|
||||
username = user_data.username.strip()
|
||||
if not username:
|
||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="用户名不能为空")
|
||||
|
||||
result = await db.execute(select(User).where(User.username == username))
|
||||
if result.scalar_one_or_none():
|
||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="用户名已被注册")
|
||||
|
||||
result = await db.execute(select(User).where(User.email == user_data.email))
|
||||
if result.scalar_one_or_none():
|
||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="邮箱已被注册")
|
||||
# 创建用户
|
||||
|
||||
user = User(
|
||||
username=username,
|
||||
email=user_data.email,
|
||||
hashed_password=get_password_hash(user_data.password),
|
||||
full_name=user_data.full_name,
|
||||
)
|
||||
db.add(user)
|
||||
await db.commit()
|
||||
try:
|
||||
await db.commit()
|
||||
except IntegrityError:
|
||||
await db.rollback()
|
||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="用户名或邮箱已被注册")
|
||||
await db.refresh(user)
|
||||
await ensure_builtin_skills(db)
|
||||
return user
|
||||
|
||||
|
||||
@router.post("/login", response_model=Token)
|
||||
async def login(form_data: OAuth2PasswordRequestForm = Depends(), db: AsyncSession = Depends(get_db)):
|
||||
identifier = form_data.username.strip()
|
||||
# 支持:邮箱 / UUID / 用户名前缀
|
||||
user = None
|
||||
|
||||
# 1. 尝试 UUID
|
||||
import re
|
||||
if re.match(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$', identifier, re.I):
|
||||
result = await db.execute(select(User).where(User.id == identifier))
|
||||
user = result.scalar_one_or_none()
|
||||
|
||||
# 2. 尝试邮箱
|
||||
if not user:
|
||||
result = await db.execute(select(User).where(User.username == identifier))
|
||||
user = result.scalar_one_or_none()
|
||||
|
||||
if not user:
|
||||
result = await db.execute(select(User).where(User.email == identifier))
|
||||
user = result.scalar_one_or_none()
|
||||
|
||||
# 3. 尝试用户名前缀(email@ 前面的部分)
|
||||
if not user and '@' not in identifier:
|
||||
result = await db.execute(select(User).where(User.email.like(f"{identifier}@%")))
|
||||
user = result.scalar_one_or_none()
|
||||
escaped_identifier = (
|
||||
identifier
|
||||
.replace('\\', '\\\\')
|
||||
.replace('%', '\\%')
|
||||
.replace('_', '\\_')
|
||||
)
|
||||
result = await db.execute(
|
||||
select(User).where(User.email.like(f"{escaped_identifier}@%", escape='\\'))
|
||||
)
|
||||
prefix_matches = result.scalars().all()
|
||||
if len(prefix_matches) == 1:
|
||||
user = prefix_matches[0]
|
||||
|
||||
if not user or not verify_password(form_data.password, user.hashed_password):
|
||||
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="用户名、邮箱或密码错误")
|
||||
if not user.is_active:
|
||||
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="用户已被禁用")
|
||||
await ensure_builtin_skills(db)
|
||||
access_token = create_access_token(data={"sub": user.id})
|
||||
return Token(access_token=access_token)
|
||||
|
||||
|
||||
@router.get("/me", response_model=UserOut)
|
||||
async def get_me(current_user: User = Depends(get_current_user)):
|
||||
async def get_me(current_user: User = Depends(get_current_user), db: AsyncSession = Depends(get_db)):
|
||||
await ensure_builtin_skills(db)
|
||||
return current_user
|
||||
|
||||
61
backend/app/routers/brain.py
Normal file
61
backend/app/routers/brain.py
Normal file
@@ -0,0 +1,61 @@
|
||||
from fastapi import APIRouter, Depends
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.database import get_db
|
||||
from app.models.user import User
|
||||
from app.routers.auth import get_current_user
|
||||
from app.schemas.brain import (
|
||||
BrainEventOut,
|
||||
BrainLearnRunOut,
|
||||
BrainMemoryOut,
|
||||
BrainOverviewOut,
|
||||
BrainTagGroupsOut,
|
||||
)
|
||||
from app.services.brain_service import BrainService
|
||||
|
||||
router = APIRouter(prefix="/api/brain", tags=["知识大脑"])
|
||||
|
||||
|
||||
@router.get("/overview", response_model=BrainOverviewOut)
|
||||
async def get_brain_overview(
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
service = BrainService(db)
|
||||
return await service.get_overview(current_user.id)
|
||||
|
||||
|
||||
@router.get("/memories", response_model=list[BrainMemoryOut])
|
||||
async def list_brain_memories(
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
service = BrainService(db)
|
||||
return await service.list_memories(current_user.id)
|
||||
|
||||
|
||||
@router.get("/tags", response_model=BrainTagGroupsOut)
|
||||
async def list_brain_tags(
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
service = BrainService(db)
|
||||
return await service.list_tags(current_user.id)
|
||||
|
||||
|
||||
@router.get("/events", response_model=list[BrainEventOut])
|
||||
async def list_brain_events(
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
service = BrainService(db)
|
||||
return await service.list_events(current_user.id)
|
||||
|
||||
|
||||
@router.post("/learn/run", response_model=BrainLearnRunOut)
|
||||
async def run_brain_learning(
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
service = BrainService(db)
|
||||
return await service.run_learning(current_user.id)
|
||||
@@ -1,4 +1,5 @@
|
||||
from fastapi import APIRouter, Depends, HTTPException, WebSocket, WebSocketDisconnect
|
||||
|
||||
from fastapi.responses import StreamingResponse
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, desc
|
||||
@@ -92,12 +93,16 @@ async def chat(
|
||||
):
|
||||
"""简单版对话(非流式)"""
|
||||
agent_svc = AgentService(db)
|
||||
conv_id, msg_id, content = await agent_svc.chat_simple(
|
||||
user_id=current_user.id,
|
||||
message=data.message,
|
||||
conversation_id=data.conversation_id,
|
||||
file_ids=data.file_ids,
|
||||
)
|
||||
try:
|
||||
conv_id, msg_id, content, model_name = await agent_svc.chat_simple(
|
||||
user_id=current_user.id,
|
||||
message=data.message,
|
||||
conversation_id=data.conversation_id,
|
||||
file_ids=data.file_ids,
|
||||
model_name=data.model_name,
|
||||
)
|
||||
except ValueError as exc:
|
||||
raise HTTPException(status_code=400, detail=str(exc))
|
||||
|
||||
# 更新对话消息计数
|
||||
result = await db.execute(select(Conversation).where(Conversation.id == conv_id))
|
||||
@@ -111,6 +116,7 @@ async def chat(
|
||||
message_id=msg_id,
|
||||
content=content,
|
||||
agent_name="jarvis",
|
||||
model_name=model_name,
|
||||
)
|
||||
|
||||
|
||||
@@ -124,30 +130,42 @@ async def chat_stream(
|
||||
agent_svc = AgentService(db)
|
||||
|
||||
async def stream_generator():
|
||||
conv_id, msg_id, stream = await agent_svc.chat(
|
||||
user_id=current_user.id,
|
||||
message=data.message,
|
||||
conversation_id=data.conversation_id,
|
||||
)
|
||||
|
||||
# 先发送元数据
|
||||
yield f"event: metadata\ndata: {json.dumps({'conversation_id': conv_id, 'message_id': msg_id})}\n\n"
|
||||
|
||||
# 流式发送内容
|
||||
collected = ""
|
||||
stream = None
|
||||
msg_id = None
|
||||
should_emit_done = False
|
||||
try:
|
||||
async for chunk in stream:
|
||||
if chunk:
|
||||
collected += chunk
|
||||
yield f"event: chunk\ndata: {json.dumps({'content': chunk})}\n\n"
|
||||
try:
|
||||
conv_id, msg_id, stream = await agent_svc.chat(
|
||||
user_id=current_user.id,
|
||||
message=data.message,
|
||||
conversation_id=data.conversation_id,
|
||||
file_ids=data.file_ids,
|
||||
model_name=data.model_name,
|
||||
)
|
||||
except ValueError as exc:
|
||||
yield f"event: error\ndata: {json.dumps({'error': str(exc)}, ensure_ascii=False)}\n\n"
|
||||
return
|
||||
|
||||
# 更新数据库中的消息
|
||||
await agent_svc.save_response(msg_id, collected)
|
||||
yield f"event: metadata\ndata: {json.dumps({'conversation_id': conv_id, 'message_id': msg_id})}\n\n"
|
||||
|
||||
except Exception as e:
|
||||
yield f"event: error\ndata: {json.dumps({'error': str(e)})}\n\n"
|
||||
try:
|
||||
async for event in stream:
|
||||
event_type = event.get('type', 'progress')
|
||||
if event_type == 'chunk':
|
||||
yield f"event: chunk\ndata: {json.dumps({'content': event.get('content', '')}, ensure_ascii=False)}\n\n"
|
||||
elif event_type == 'error':
|
||||
yield f"event: error\ndata: {json.dumps({'error': event.get('error', '未知错误')}, ensure_ascii=False)}\n\n"
|
||||
else:
|
||||
payload = {k: v for k, v in event.items() if k != 'type'}
|
||||
yield f"event: progress\ndata: {json.dumps(payload, ensure_ascii=False)}\n\n"
|
||||
except Exception as e:
|
||||
yield f"event: error\ndata: {json.dumps({'error': str(e)}, ensure_ascii=False)}\n\n"
|
||||
should_emit_done = msg_id is not None
|
||||
if should_emit_done:
|
||||
yield f"event: done\ndata: {json.dumps({'message_id': msg_id})}\n\n"
|
||||
finally:
|
||||
yield f"event: done\ndata: {json.dumps({'message_id': msg_id})}\n\n"
|
||||
if stream is not None:
|
||||
await stream.aclose()
|
||||
|
||||
return StreamingResponse(
|
||||
stream_generator(),
|
||||
|
||||
@@ -8,12 +8,13 @@ from app.models.user import User
|
||||
from app.routers.auth import get_current_user
|
||||
from app.services.document_service import DocumentService
|
||||
from app.services.knowledge_service import KnowledgeService
|
||||
from app.schemas.document import DocumentChunkOut, DocumentChunkUpdate, DocumentOut
|
||||
from dataclasses import asdict
|
||||
|
||||
router = APIRouter(prefix="/api/documents", tags=["知识库"])
|
||||
|
||||
|
||||
@router.get("", response_model=list)
|
||||
@router.get("", response_model=list[DocumentOut])
|
||||
async def list_documents(
|
||||
folder_id: Optional[str] = None,
|
||||
current_user: User = Depends(get_current_user),
|
||||
@@ -36,7 +37,10 @@ async def upload_document(
|
||||
):
|
||||
"""上传文档,自动分块并向量化"""
|
||||
doc_svc = DocumentService(db)
|
||||
doc = await doc_svc.upload_document(current_user.id, file, folder_id=folder_id)
|
||||
try:
|
||||
doc = await doc_svc.upload_document(current_user.id, file, folder_id=folder_id)
|
||||
except ValueError as error:
|
||||
raise HTTPException(status_code=400, detail=str(error)) from error
|
||||
|
||||
# 后台索引到 ChromaDB
|
||||
def index_task():
|
||||
@@ -73,7 +77,7 @@ async def get_document(
|
||||
return doc
|
||||
|
||||
|
||||
@router.get("/{document_id}/chunks")
|
||||
@router.get("/{document_id}/chunks", response_model=list[DocumentChunkOut])
|
||||
async def get_document_chunks(
|
||||
document_id: str,
|
||||
current_user: User = Depends(get_current_user),
|
||||
@@ -98,6 +102,33 @@ async def get_document_chunks(
|
||||
return chunks_result.scalars().all()
|
||||
|
||||
|
||||
@router.put("/{document_id}/chunks/{chunk_id}", response_model=DocumentChunkOut)
|
||||
async def update_document_chunk(
|
||||
document_id: str,
|
||||
chunk_id: str,
|
||||
payload: DocumentChunkUpdate,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
doc_svc = DocumentService(db)
|
||||
kb_svc = KnowledgeService(db, user_id=current_user.id)
|
||||
|
||||
try:
|
||||
chunk = await doc_svc.update_document_chunk(current_user.id, document_id, chunk_id, payload.content)
|
||||
except ValueError as error:
|
||||
raise HTTPException(status_code=404, detail=str(error)) from error
|
||||
|
||||
reindexed = await kb_svc.reindex_document_chunks(document_id, current_user.id)
|
||||
if not reindexed:
|
||||
raise HTTPException(status_code=500, detail="切片更新后重新索引失败")
|
||||
|
||||
refreshed_chunk_result = await db.execute(
|
||||
select(DocumentChunk).where(DocumentChunk.id == chunk.id)
|
||||
)
|
||||
refreshed_chunk = refreshed_chunk_result.scalar_one()
|
||||
return refreshed_chunk
|
||||
|
||||
|
||||
@router.delete("/{document_id}", status_code=204)
|
||||
async def delete_document(
|
||||
document_id: str,
|
||||
@@ -129,7 +160,7 @@ async def search_documents(
|
||||
if mode == "keyword":
|
||||
results = await kb_svc._keyword_search(query, current_user.id, top_k)
|
||||
elif mode == "semantic":
|
||||
results = await kb_svc.retrieve(query, current_user.id, top_k, use_rerank=True)
|
||||
results = await kb_svc.retrieve(query, current_user.id, top_k=top_k, use_rerank=True)
|
||||
else:
|
||||
results = await kb_svc.hybrid_search(query, current_user.id, top_k)
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ from app.database import get_db
|
||||
from app.models.folder import Folder
|
||||
from app.models.user import User
|
||||
from app.schemas.folder import FolderCreate, FolderUpdate, FolderOut, FolderTreeOut
|
||||
from app.services.auth_service import get_current_user
|
||||
from app.routers.auth import get_current_user
|
||||
|
||||
router = APIRouter(prefix="/api/folders", tags=["文件夹"])
|
||||
|
||||
|
||||
92
backend/app/routers/goal.py
Normal file
92
backend/app/routers/goal.py
Normal file
@@ -0,0 +1,92 @@
|
||||
from datetime import date
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.database import get_db
|
||||
from app.models.goal import Goal
|
||||
from app.models.user import User
|
||||
from app.routers.auth import get_current_user
|
||||
from app.schemas.goal import GoalCreate, GoalListOut, GoalOut, GoalUpdate
|
||||
|
||||
router = APIRouter(prefix="/api/goals", tags=["目标"])
|
||||
|
||||
|
||||
@router.get("", response_model=GoalListOut)
|
||||
async def list_goals(
|
||||
date_str: str = Query(...),
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
target_date = date.fromisoformat(date_str).isoformat()
|
||||
query = (
|
||||
select(Goal)
|
||||
.where(Goal.user_id == current_user.id)
|
||||
.where(Goal.goal_date == target_date)
|
||||
.order_by(Goal.created_at.desc())
|
||||
)
|
||||
items = (await db.execute(query)).scalars().all()
|
||||
return GoalListOut(items=items)
|
||||
|
||||
|
||||
@router.post("", response_model=GoalOut, status_code=201)
|
||||
async def create_goal(
|
||||
data: GoalCreate,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
goal = Goal(
|
||||
user_id=current_user.id,
|
||||
title=data.title,
|
||||
note=data.note,
|
||||
goal_date=data.goal_date.isoformat(),
|
||||
status=data.status,
|
||||
)
|
||||
db.add(goal)
|
||||
await db.commit()
|
||||
await db.refresh(goal)
|
||||
return goal
|
||||
|
||||
|
||||
@router.patch("/{goal_id}", response_model=GoalOut)
|
||||
async def update_goal(
|
||||
goal_id: str,
|
||||
data: GoalUpdate,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
result = await db.execute(
|
||||
select(Goal).where(Goal.id == goal_id, Goal.user_id == current_user.id)
|
||||
)
|
||||
goal = result.scalar_one_or_none()
|
||||
if not goal:
|
||||
raise HTTPException(status_code=404, detail="目标不存在")
|
||||
|
||||
payload = data.model_dump(exclude_none=True)
|
||||
if "goal_date" in payload:
|
||||
payload["goal_date"] = payload["goal_date"].isoformat()
|
||||
|
||||
for field, value in payload.items():
|
||||
setattr(goal, field, value)
|
||||
|
||||
await db.commit()
|
||||
await db.refresh(goal)
|
||||
return goal
|
||||
|
||||
|
||||
@router.delete("/{goal_id}", status_code=204)
|
||||
async def delete_goal(
|
||||
goal_id: str,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
result = await db.execute(
|
||||
select(Goal).where(Goal.id == goal_id, Goal.user_id == current_user.id)
|
||||
)
|
||||
goal = result.scalar_one_or_none()
|
||||
if not goal:
|
||||
raise HTTPException(status_code=404, detail="目标不存在")
|
||||
|
||||
await db.delete(goal)
|
||||
await db.commit()
|
||||
139
backend/app/routers/log.py
Normal file
139
backend/app/routers/log.py
Normal file
@@ -0,0 +1,139 @@
|
||||
from datetime import datetime
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from pydantic import BaseModel
|
||||
from typing import Any, Optional
|
||||
from app.database import get_db
|
||||
from app.models.user import User
|
||||
from app.routers.auth import get_current_user
|
||||
from app.services.log_service import LogService, parse_datetime_filter, serialize_log
|
||||
|
||||
router = APIRouter(prefix="/api/logs", tags=["Log"])
|
||||
|
||||
|
||||
class LogOut(BaseModel):
|
||||
id: str
|
||||
level: str
|
||||
type: str
|
||||
user_id: Optional[str]
|
||||
request_id: Optional[str]
|
||||
route: Optional[str]
|
||||
method: Optional[str]
|
||||
status_code: Optional[int]
|
||||
error_type: Optional[str]
|
||||
operation: Optional[str]
|
||||
message: str
|
||||
source: Optional[str]
|
||||
details: Optional[dict[str, Any]]
|
||||
duration_ms: Optional[int]
|
||||
created_at: Optional[str]
|
||||
updated_at: Optional[str]
|
||||
|
||||
|
||||
class LogStatsOut(BaseModel):
|
||||
total: int
|
||||
by_type: dict
|
||||
by_level: dict
|
||||
|
||||
|
||||
class LogQueryOut(BaseModel):
|
||||
logs: list[LogOut]
|
||||
total: int
|
||||
page: int
|
||||
page_size: int
|
||||
|
||||
|
||||
@router.get("", response_model=LogQueryOut)
|
||||
async def list_logs(
|
||||
log_type: Optional[str] = Query(None, description="日志类型: agent/system/chat"),
|
||||
level: Optional[str] = Query(None, description="日志级别: debug/info/warning/error"),
|
||||
source: Optional[str] = Query(None, description="来源模块"),
|
||||
request_id: Optional[str] = Query(None, description="请求 ID"),
|
||||
route: Optional[str] = Query(None, description="路由"),
|
||||
operation: Optional[str] = Query(None, description="业务操作"),
|
||||
status_code: Optional[int] = Query(None, description="HTTP 状态码"),
|
||||
start_at: Optional[str] = Query(None, description="开始时间 ISO"),
|
||||
end_at: Optional[str] = Query(None, description="结束时间 ISO"),
|
||||
page: int = Query(1, ge=1),
|
||||
page_size: int = Query(50, ge=1, le=200),
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
"""查询日志列表"""
|
||||
start_dt = parse_datetime_filter(start_at)
|
||||
end_dt = parse_datetime_filter(end_at)
|
||||
if start_dt and end_dt and start_dt > end_dt:
|
||||
raise HTTPException(status_code=422, detail="开始时间不能晚于结束时间")
|
||||
|
||||
svc = LogService(db)
|
||||
offset = (page - 1) * page_size
|
||||
logs, total = await svc.list_logs(
|
||||
log_type=log_type,
|
||||
level=level,
|
||||
user_id=current_user.id,
|
||||
source=source,
|
||||
request_id=request_id,
|
||||
route=route,
|
||||
operation=operation,
|
||||
status_code=status_code,
|
||||
start_at=start_dt,
|
||||
end_at=end_dt,
|
||||
limit=page_size,
|
||||
offset=offset,
|
||||
)
|
||||
return LogQueryOut(
|
||||
logs=[LogOut.model_validate(serialize_log(log)) for log in logs],
|
||||
total=total,
|
||||
page=page,
|
||||
page_size=page_size,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/stats", response_model=LogStatsOut)
|
||||
async def get_log_stats(
|
||||
log_type: Optional[str] = Query(None, description="日志类型: agent/system/chat"),
|
||||
level: Optional[str] = Query(None, description="日志级别: debug/info/warning/error"),
|
||||
source: Optional[str] = Query(None, description="来源模块"),
|
||||
request_id: Optional[str] = Query(None, description="请求 ID"),
|
||||
route: Optional[str] = Query(None, description="路由"),
|
||||
operation: Optional[str] = Query(None, description="业务操作"),
|
||||
status_code: Optional[int] = Query(None, description="HTTP 状态码"),
|
||||
start_at: Optional[str] = Query(None, description="开始时间 ISO"),
|
||||
end_at: Optional[str] = Query(None, description="结束时间 ISO"),
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
"""获取日志统计"""
|
||||
start_dt = parse_datetime_filter(start_at)
|
||||
end_dt = parse_datetime_filter(end_at)
|
||||
if start_dt and end_dt and start_dt > end_dt:
|
||||
raise HTTPException(status_code=422, detail="开始时间不能晚于结束时间")
|
||||
|
||||
svc = LogService(db)
|
||||
stats = await svc.get_log_stats(
|
||||
log_type=log_type,
|
||||
level=level,
|
||||
user_id=current_user.id,
|
||||
source=source,
|
||||
request_id=request_id,
|
||||
route=route,
|
||||
operation=operation,
|
||||
status_code=status_code,
|
||||
start_at=start_dt,
|
||||
end_at=end_dt,
|
||||
)
|
||||
return LogStatsOut(**stats)
|
||||
|
||||
|
||||
@router.get("/recent", response_model=list[LogOut])
|
||||
async def get_recent_logs(
|
||||
log_type: Optional[str] = Query(None),
|
||||
hours: int = Query(24, ge=1, le=168),
|
||||
limit: int = Query(50, ge=1, le=200),
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
"""获取最近的日志"""
|
||||
svc = LogService(db)
|
||||
logs = await svc.get_recent_logs(log_type=log_type, user_id=current_user.id, hours=hours, limit=limit)
|
||||
return [LogOut.model_validate(serialize_log(log)) for log in logs]
|
||||
90
backend/app/routers/reminder.py
Normal file
90
backend/app/routers/reminder.py
Normal file
@@ -0,0 +1,90 @@
|
||||
from datetime import date, datetime
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.database import get_db
|
||||
from app.models.reminder import Reminder
|
||||
from app.models.user import User
|
||||
from app.routers.auth import get_current_user
|
||||
from app.schemas.reminder import ReminderCreate, ReminderListOut, ReminderOut, ReminderUpdate
|
||||
|
||||
router = APIRouter(prefix="/api/reminders", tags=["提醒"])
|
||||
|
||||
|
||||
@router.get("", response_model=ReminderListOut)
|
||||
async def list_reminders(
|
||||
date_str: str = Query(...),
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
target_date = date.fromisoformat(date_str)
|
||||
start = datetime.combine(target_date, datetime.min.time())
|
||||
end = datetime.combine(target_date, datetime.max.time())
|
||||
query = (
|
||||
select(Reminder)
|
||||
.where(Reminder.user_id == current_user.id)
|
||||
.where(Reminder.reminder_at >= start)
|
||||
.where(Reminder.reminder_at <= end)
|
||||
.order_by(Reminder.reminder_at.asc(), Reminder.created_at.asc())
|
||||
)
|
||||
items = (await db.execute(query)).scalars().all()
|
||||
return ReminderListOut(items=items)
|
||||
|
||||
|
||||
@router.post("", response_model=ReminderOut, status_code=201)
|
||||
async def create_reminder(
|
||||
data: ReminderCreate,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
reminder = Reminder(
|
||||
user_id=current_user.id,
|
||||
title=data.title,
|
||||
note=data.note,
|
||||
reminder_at=data.reminder_at,
|
||||
)
|
||||
db.add(reminder)
|
||||
await db.commit()
|
||||
await db.refresh(reminder)
|
||||
return reminder
|
||||
|
||||
|
||||
@router.patch("/{reminder_id}", response_model=ReminderOut)
|
||||
async def update_reminder(
|
||||
reminder_id: str,
|
||||
data: ReminderUpdate,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
result = await db.execute(
|
||||
select(Reminder).where(Reminder.id == reminder_id, Reminder.user_id == current_user.id)
|
||||
)
|
||||
reminder = result.scalar_one_or_none()
|
||||
if not reminder:
|
||||
raise HTTPException(status_code=404, detail="提醒不存在")
|
||||
|
||||
for field, value in data.model_dump(exclude_none=True).items():
|
||||
setattr(reminder, field, value)
|
||||
|
||||
await db.commit()
|
||||
await db.refresh(reminder)
|
||||
return reminder
|
||||
|
||||
|
||||
@router.delete("/{reminder_id}", status_code=204)
|
||||
async def delete_reminder(
|
||||
reminder_id: str,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
result = await db.execute(
|
||||
select(Reminder).where(Reminder.id == reminder_id, Reminder.user_id == current_user.id)
|
||||
)
|
||||
reminder = result.scalar_one_or_none()
|
||||
if not reminder:
|
||||
raise HTTPException(status_code=404, detail="提醒不存在")
|
||||
|
||||
await db.delete(reminder)
|
||||
await db.commit()
|
||||
160
backend/app/routers/schedule_center.py
Normal file
160
backend/app/routers/schedule_center.py
Normal file
@@ -0,0 +1,160 @@
|
||||
from calendar import monthrange
|
||||
from datetime import UTC, date, datetime
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.database import get_db
|
||||
from app.models.goal import Goal
|
||||
from app.models.reminder import Reminder
|
||||
from app.models.task import Task, TaskPriority
|
||||
from app.models.todo import DailyTodo
|
||||
from app.models.user import User
|
||||
from app.routers.auth import get_current_user
|
||||
from app.schemas.schedule_center import (
|
||||
ScheduleCenterDateOut,
|
||||
ScheduleCenterDaySummary,
|
||||
ScheduleCenterMonthOut,
|
||||
)
|
||||
|
||||
router = APIRouter(prefix="/api/schedule-center", tags=["调度中心"])
|
||||
|
||||
|
||||
def _build_summary(
|
||||
target_date: str,
|
||||
todos: list[DailyTodo],
|
||||
tasks: list[Task],
|
||||
reminders: list[Reminder],
|
||||
goals: list[Goal],
|
||||
) -> ScheduleCenterDaySummary:
|
||||
return ScheduleCenterDaySummary(
|
||||
date=target_date,
|
||||
todo_total=len(todos),
|
||||
todo_completed=sum(1 for item in todos if item.is_completed),
|
||||
task_due_total=len(tasks),
|
||||
high_priority_total=sum(1 for item in tasks if item.priority in {TaskPriority.HIGH, TaskPriority.URGENT}),
|
||||
reminder_total=len(reminders),
|
||||
goal_total=len(goals),
|
||||
)
|
||||
|
||||
|
||||
@router.get("/month", response_model=ScheduleCenterMonthOut)
|
||||
async def get_month_schedule(
|
||||
year: int = Query(..., ge=2000, le=2100),
|
||||
month: int = Query(..., ge=1, le=12),
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
month_start = date(year, month, 1)
|
||||
days_in_month = monthrange(month_start.year, month_start.month)[1]
|
||||
start_key = month_start.isoformat()
|
||||
end_key = month_start.replace(day=days_in_month).isoformat()
|
||||
start_dt = datetime.combine(month_start, datetime.min.time())
|
||||
end_dt = datetime.combine(month_start.replace(day=days_in_month), datetime.max.time())
|
||||
|
||||
todos = (await db.execute(
|
||||
select(DailyTodo).where(DailyTodo.user_id == current_user.id, DailyTodo.todo_date >= start_key, DailyTodo.todo_date <= end_key)
|
||||
)).scalars().all()
|
||||
tasks = (await db.execute(
|
||||
select(Task).where(
|
||||
Task.user_id == current_user.id,
|
||||
Task.due_date.is_not(None),
|
||||
Task.due_date >= start_dt,
|
||||
Task.due_date <= end_dt,
|
||||
)
|
||||
)).scalars().all()
|
||||
reminders = (await db.execute(
|
||||
select(Reminder).where(
|
||||
Reminder.user_id == current_user.id,
|
||||
Reminder.reminder_at >= start_dt,
|
||||
Reminder.reminder_at <= end_dt,
|
||||
)
|
||||
)).scalars().all()
|
||||
goals = (await db.execute(
|
||||
select(Goal).where(Goal.user_id == current_user.id, Goal.goal_date >= start_key, Goal.goal_date <= end_key)
|
||||
)).scalars().all()
|
||||
|
||||
todo_map: dict[str, list[DailyTodo]] = {}
|
||||
for item in todos:
|
||||
todo_map.setdefault(item.todo_date, []).append(item)
|
||||
|
||||
task_map: dict[str, list[Task]] = {}
|
||||
for item in tasks:
|
||||
key = item.due_date.date().isoformat()
|
||||
task_map.setdefault(key, []).append(item)
|
||||
|
||||
reminder_map: dict[str, list[Reminder]] = {}
|
||||
for item in reminders:
|
||||
key = item.reminder_at.date().isoformat()
|
||||
reminder_map.setdefault(key, []).append(item)
|
||||
|
||||
goal_map: dict[str, list[Goal]] = {}
|
||||
for item in goals:
|
||||
goal_map.setdefault(item.goal_date, []).append(item)
|
||||
|
||||
days = []
|
||||
for day in range(1, days_in_month + 1):
|
||||
date_key = month_start.replace(day=day).isoformat()
|
||||
days.append(_build_summary(
|
||||
date_key,
|
||||
todo_map.get(date_key, []),
|
||||
task_map.get(date_key, []),
|
||||
reminder_map.get(date_key, []),
|
||||
goal_map.get(date_key, []),
|
||||
))
|
||||
|
||||
return ScheduleCenterMonthOut(month=f"{year:04d}-{month:02d}", days=days)
|
||||
|
||||
|
||||
@router.get("/date", response_model=ScheduleCenterDateOut)
|
||||
async def get_date_schedule(
|
||||
date_str: date = Query(...),
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
target_date = date_str
|
||||
start_dt = datetime.combine(target_date, datetime.min.time())
|
||||
end_dt = datetime.combine(target_date, datetime.max.time())
|
||||
date_key = target_date.isoformat()
|
||||
|
||||
todos = (await db.execute(
|
||||
select(DailyTodo)
|
||||
.where(DailyTodo.user_id == current_user.id, DailyTodo.todo_date == date_key)
|
||||
.order_by(DailyTodo.created_at.desc())
|
||||
)).scalars().all()
|
||||
tasks = (await db.execute(
|
||||
select(Task)
|
||||
.where(
|
||||
Task.user_id == current_user.id,
|
||||
Task.due_date.is_not(None),
|
||||
Task.due_date >= start_dt,
|
||||
Task.due_date <= end_dt,
|
||||
)
|
||||
.order_by(Task.created_at.desc())
|
||||
)).scalars().all()
|
||||
reminders = (await db.execute(
|
||||
select(Reminder)
|
||||
.where(
|
||||
Reminder.user_id == current_user.id,
|
||||
Reminder.reminder_at >= start_dt,
|
||||
Reminder.reminder_at <= end_dt,
|
||||
)
|
||||
.order_by(Reminder.reminder_at.asc(), Reminder.created_at.asc())
|
||||
)).scalars().all()
|
||||
goals = (await db.execute(
|
||||
select(Goal)
|
||||
.where(Goal.user_id == current_user.id, Goal.goal_date == date_key)
|
||||
.order_by(Goal.created_at.desc())
|
||||
)).scalars().all()
|
||||
|
||||
summary = _build_summary(date_key, todos, tasks, reminders, goals)
|
||||
return ScheduleCenterDateOut(
|
||||
date=date_key,
|
||||
todos=todos,
|
||||
tasks=tasks,
|
||||
reminders=reminders,
|
||||
goals=goals,
|
||||
summary=summary,
|
||||
generated_at=datetime.now(UTC),
|
||||
)
|
||||
@@ -1,4 +1,6 @@
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
import logging
|
||||
import time
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from app.database import get_db
|
||||
from app.models.user import User
|
||||
@@ -6,22 +8,40 @@ from app.routers.auth import get_current_user
|
||||
from app.schemas.settings import (
|
||||
SettingsOut, ProfileUpdateIn, LLMConfigIn, SchedulerConfigIn, LLMTestIn
|
||||
)
|
||||
from app.services.log_service import LogService
|
||||
from app.services.settings_service import (
|
||||
get_user_settings, update_user_profile, update_llm_config,
|
||||
update_scheduler_config, test_llm_connection
|
||||
)
|
||||
from app.logging_utils import summarize_llm_config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/api/settings", tags=["设置"])
|
||||
|
||||
|
||||
@router.get("", response_model=SettingsOut)
|
||||
async def get_settings(
|
||||
request: Request,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
request.state.user_id = current_user.id
|
||||
settings = await get_user_settings(current_user.id, db)
|
||||
if not settings:
|
||||
raise HTTPException(status_code=404, detail="用户不存在")
|
||||
|
||||
await LogService(db).system_log(
|
||||
message="加载用户设置",
|
||||
source="settings",
|
||||
user_id=current_user.id,
|
||||
request_id=request.state.request_id,
|
||||
route=request.url.path,
|
||||
method=request.method,
|
||||
status_code=200,
|
||||
operation="settings.get",
|
||||
details={"llm_config": summarize_llm_config(settings.get("llm_config"))},
|
||||
)
|
||||
return settings
|
||||
|
||||
|
||||
@@ -46,42 +66,128 @@ async def update_profile(
|
||||
@router.put("/llm")
|
||||
async def update_llm(
|
||||
data: LLMConfigIn,
|
||||
request: Request,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
request.state.user_id = current_user.id
|
||||
log_service = LogService(db)
|
||||
start = time.perf_counter()
|
||||
payload = data.model_dump(exclude_none=True)
|
||||
try:
|
||||
config = await update_llm_config(current_user.id, data.model_dump(exclude_none=True), db)
|
||||
config = await update_llm_config(current_user.id, payload, db)
|
||||
await log_service.system_log(
|
||||
message="更新 LLM 配置成功",
|
||||
source="settings",
|
||||
user_id=current_user.id,
|
||||
request_id=request.state.request_id,
|
||||
route=request.url.path,
|
||||
method=request.method,
|
||||
status_code=200,
|
||||
operation="settings.update_llm",
|
||||
duration_ms=int((time.perf_counter() - start) * 1000),
|
||||
details={
|
||||
"request": summarize_llm_config(payload),
|
||||
"stored": summarize_llm_config(config),
|
||||
},
|
||||
)
|
||||
return {"llm_config": config}
|
||||
except ValueError as e:
|
||||
await log_service.system_log(
|
||||
message="更新 LLM 配置失败",
|
||||
level="warning",
|
||||
source="settings",
|
||||
user_id=current_user.id,
|
||||
request_id=request.state.request_id,
|
||||
route=request.url.path,
|
||||
method=request.method,
|
||||
status_code=400,
|
||||
error_type=e.__class__.__name__,
|
||||
operation="settings.update_llm",
|
||||
duration_ms=int((time.perf_counter() - start) * 1000),
|
||||
details={"request": summarize_llm_config(payload), "detail": str(e)},
|
||||
)
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/llm/test")
|
||||
async def test_llm(
|
||||
data: LLMTestIn,
|
||||
request: Request,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
request.state.user_id = current_user.id
|
||||
start = time.perf_counter()
|
||||
result = await test_llm_connection(
|
||||
provider=data.provider,
|
||||
model=data.model,
|
||||
base_url=data.base_url,
|
||||
api_key=data.api_key
|
||||
)
|
||||
await LogService(db).system_log(
|
||||
message="测试 LLM 连接",
|
||||
level="info" if result.get("success") else "warning",
|
||||
source="settings",
|
||||
user_id=current_user.id,
|
||||
request_id=request.state.request_id,
|
||||
route=request.url.path,
|
||||
method=request.method,
|
||||
status_code=200,
|
||||
error_type=None if result.get("success") else "llm_test_failed",
|
||||
operation="settings.test_llm",
|
||||
duration_ms=int((time.perf_counter() - start) * 1000),
|
||||
details={
|
||||
"provider": data.provider,
|
||||
"model": data.model,
|
||||
"has_base_url": bool(data.base_url),
|
||||
"has_api_key": bool(data.api_key),
|
||||
"success": result.get("success"),
|
||||
"error": result.get("error"),
|
||||
},
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
@router.put("/scheduler")
|
||||
async def update_scheduler(
|
||||
data: SchedulerConfigIn,
|
||||
request: Request,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
request.state.user_id = current_user.id
|
||||
payload = data.model_dump(exclude_none=True)
|
||||
try:
|
||||
config = await update_scheduler_config(
|
||||
current_user.id,
|
||||
data.model_dump(exclude_none=True),
|
||||
payload,
|
||||
db
|
||||
)
|
||||
await LogService(db).system_log(
|
||||
message="更新调度配置成功",
|
||||
source="settings",
|
||||
user_id=current_user.id,
|
||||
request_id=request.state.request_id,
|
||||
route=request.url.path,
|
||||
method=request.method,
|
||||
status_code=200,
|
||||
operation="settings.update_scheduler",
|
||||
details={"request": payload, "stored": config},
|
||||
)
|
||||
return {"scheduler_config": config}
|
||||
except ValueError as e:
|
||||
await LogService(db).system_log(
|
||||
message="更新调度配置失败",
|
||||
level="warning",
|
||||
source="settings",
|
||||
user_id=current_user.id,
|
||||
request_id=request.state.request_id,
|
||||
route=request.url.path,
|
||||
method=request.method,
|
||||
status_code=400,
|
||||
error_type=e.__class__.__name__,
|
||||
operation="settings.update_scheduler",
|
||||
details={"request": payload, "detail": str(e)},
|
||||
)
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
|
||||
124
backend/app/routers/skill.py
Normal file
124
backend/app/routers/skill.py
Normal file
@@ -0,0 +1,124 @@
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from app.database import get_db
|
||||
from app.models.skill import Skill
|
||||
from app.models.user import User
|
||||
from app.routers.auth import get_current_user
|
||||
from app.schemas.skill import SkillCreate, SkillOut, SkillUpdate
|
||||
from app.services.admin_bootstrap_service import ensure_builtin_skills
|
||||
from app.services.skill_service import SkillService
|
||||
|
||||
router = APIRouter(prefix="/api/skills", tags=["Skill"])
|
||||
|
||||
|
||||
@router.post("", response_model=SkillOut, status_code=201)
|
||||
async def create_skill(
|
||||
data: SkillCreate,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
skill = Skill(
|
||||
name=data.name,
|
||||
description=data.description,
|
||||
instructions=data.instructions,
|
||||
agent_type=data.agent_type,
|
||||
tools=data.tools,
|
||||
required_context=data.required_context,
|
||||
output_format=data.output_format,
|
||||
visibility=data.visibility,
|
||||
team_id=data.team_id,
|
||||
is_active=data.is_active,
|
||||
owner_id=current_user.id,
|
||||
)
|
||||
db.add(skill)
|
||||
await db.commit()
|
||||
await db.refresh(skill)
|
||||
return skill
|
||||
|
||||
|
||||
@router.get("", response_model=list[SkillOut])
|
||||
async def list_skills(
|
||||
agent_type: str | None = Query(default=None),
|
||||
visibility: str | None = Query(default=None),
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
service = SkillService(db)
|
||||
return await service.list_for_user(current_user.id, agent_type=agent_type, visibility=visibility)
|
||||
|
||||
|
||||
@router.post("/bootstrap-builtin", response_model=list[SkillOut])
|
||||
async def bootstrap_builtin_skills(
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
await ensure_builtin_skills(db, preferred_owner_id=current_user.id)
|
||||
service = SkillService(db)
|
||||
return await service.list_for_user(current_user.id)
|
||||
|
||||
|
||||
@router.get("/{skill_id}", response_model=SkillOut)
|
||||
async def get_skill(
|
||||
skill_id: str,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
result = await db.execute(select(Skill).where(Skill.id == skill_id))
|
||||
skill = result.scalar_one_or_none()
|
||||
if not skill:
|
||||
raise HTTPException(status_code=404, detail="Skill not found")
|
||||
return skill
|
||||
|
||||
|
||||
@router.put("/{skill_id}", response_model=SkillOut)
|
||||
async def update_skill(
|
||||
skill_id: str,
|
||||
data: SkillUpdate,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
result = await db.execute(select(Skill).where(Skill.id == skill_id))
|
||||
skill = result.scalar_one_or_none()
|
||||
if not skill:
|
||||
raise HTTPException(status_code=404, detail="Skill not found")
|
||||
|
||||
if data.name is not None:
|
||||
skill.name = data.name
|
||||
if data.description is not None:
|
||||
skill.description = data.description
|
||||
if data.instructions is not None:
|
||||
skill.instructions = data.instructions
|
||||
if data.agent_type is not None:
|
||||
skill.agent_type = data.agent_type
|
||||
if data.tools is not None:
|
||||
skill.tools = data.tools
|
||||
if data.required_context is not None:
|
||||
skill.required_context = data.required_context
|
||||
if data.output_format is not None:
|
||||
skill.output_format = data.output_format
|
||||
if data.visibility is not None:
|
||||
skill.visibility = data.visibility
|
||||
if data.team_id is not None:
|
||||
skill.team_id = data.team_id
|
||||
if data.is_active is not None:
|
||||
skill.is_active = data.is_active
|
||||
|
||||
await db.commit()
|
||||
await db.refresh(skill)
|
||||
return skill
|
||||
|
||||
|
||||
@router.delete("/{skill_id}", status_code=204)
|
||||
async def delete_skill(
|
||||
skill_id: str,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
result = await db.execute(select(Skill).where(Skill.id == skill_id))
|
||||
skill = result.scalar_one_or_none()
|
||||
if not skill:
|
||||
raise HTTPException(status_code=404, detail="Skill not found")
|
||||
|
||||
await db.delete(skill)
|
||||
await db.commit()
|
||||
9
backend/app/routers/system.py
Normal file
9
backend/app/routers/system.py
Normal file
@@ -0,0 +1,9 @@
|
||||
from fastapi import APIRouter
|
||||
from app.services.system_service import SystemService
|
||||
|
||||
router = APIRouter(prefix='/api/system', tags=['system'])
|
||||
|
||||
|
||||
@router.get('/status')
|
||||
async def get_system_status():
|
||||
return SystemService().get_status()
|
||||
@@ -1,6 +1,8 @@
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from datetime import UTC, date, datetime
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from sqlalchemy import desc, select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, desc
|
||||
from app.database import get_db
|
||||
from app.models.task import Task, TaskStatus
|
||||
from app.models.user import User
|
||||
@@ -13,12 +15,28 @@ router = APIRouter(prefix="/api/tasks", tags=["看板"])
|
||||
@router.get("", response_model=list[TaskOut])
|
||||
async def list_tasks(
|
||||
status: TaskStatus | None = None,
|
||||
due_date: date | None = Query(default=None),
|
||||
date_from: date | None = Query(default=None),
|
||||
date_to: date | None = Query(default=None),
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
query = select(Task).where(Task.user_id == current_user.id)
|
||||
if status:
|
||||
query = query.where(Task.status == status)
|
||||
if due_date:
|
||||
start = datetime.combine(due_date, datetime.min.time())
|
||||
end = datetime.combine(due_date, datetime.max.time())
|
||||
query = query.where(Task.due_date.is_not(None), Task.due_date >= start, Task.due_date <= end)
|
||||
else:
|
||||
start = datetime.combine(date_from, datetime.min.time()) if date_from else None
|
||||
end = datetime.combine(date_to, datetime.max.time()) if date_to else None
|
||||
if start and end and start > end:
|
||||
raise HTTPException(status_code=400, detail="开始日期不能晚于结束日期")
|
||||
if start is not None:
|
||||
query = query.where(Task.due_date.is_not(None), Task.due_date >= start)
|
||||
if end is not None:
|
||||
query = query.where(Task.due_date.is_not(None), Task.due_date <= end)
|
||||
query = query.order_by(desc(Task.created_at))
|
||||
result = await db.execute(query)
|
||||
return result.scalars().all()
|
||||
@@ -64,10 +82,10 @@ async def update_task(
|
||||
if field == "tags":
|
||||
setattr(task, field, json.dumps(value))
|
||||
elif field == "status" and value == TaskStatus.DONE:
|
||||
from datetime import datetime
|
||||
task.completed_at = datetime.utcnow()
|
||||
task.completed_at = datetime.now(UTC)
|
||||
setattr(task, field, value)
|
||||
else:
|
||||
elif field == "status":
|
||||
task.completed_at = None
|
||||
setattr(task, field, value)
|
||||
|
||||
await db.commit()
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
from datetime import UTC, date, datetime
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from sqlalchemy import func, select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, func
|
||||
from datetime import date
|
||||
from app.database import get_db
|
||||
from app.models.todo import DailyTodo, TodoSource
|
||||
from app.models.user import User
|
||||
@@ -52,7 +53,7 @@ async def create_todo(
|
||||
user_id=current_user.id,
|
||||
title=data.title,
|
||||
source=TodoSource.MANUAL,
|
||||
todo_date=date.today().isoformat(),
|
||||
todo_date=(data.todo_date or date.today()).isoformat(),
|
||||
)
|
||||
db.add(todo)
|
||||
await db.commit()
|
||||
@@ -74,16 +75,13 @@ async def update_todo(
|
||||
if not todo:
|
||||
raise HTTPException(status_code=404, detail="待办不存在")
|
||||
|
||||
# 历史日期不允许修改
|
||||
if todo.todo_date != date.today().isoformat():
|
||||
raise HTTPException(status_code=403, detail="历史待办不可修改")
|
||||
|
||||
if data.title is not None:
|
||||
todo.title = data.title
|
||||
if data.todo_date is not None:
|
||||
todo.todo_date = data.todo_date.isoformat()
|
||||
if data.is_completed is not None:
|
||||
from datetime import datetime
|
||||
todo.is_completed = data.is_completed
|
||||
todo.completed_at = datetime.utcnow() if data.is_completed else None
|
||||
todo.completed_at = datetime.now(UTC) if data.is_completed else None
|
||||
|
||||
await db.commit()
|
||||
await db.refresh(todo)
|
||||
@@ -102,9 +100,6 @@ async def delete_todo(
|
||||
todo = result.scalar_one_or_none()
|
||||
if not todo:
|
||||
raise HTTPException(status_code=404, detail="待办不存在")
|
||||
if todo.todo_date != date.today().isoformat():
|
||||
raise HTTPException(status_code=403, detail="历史待办不可删除")
|
||||
|
||||
await db.delete(todo)
|
||||
await db.commit()
|
||||
|
||||
|
||||
@@ -41,6 +41,7 @@ class AgentConfigUpdate(BaseModel):
|
||||
description: str | None = None
|
||||
system_prompt: str | None = None
|
||||
enabled: bool | None = None
|
||||
selected_skill_ids: list[str] | None = None
|
||||
|
||||
|
||||
class AgentConfigOut(BaseModel):
|
||||
@@ -51,5 +52,6 @@ class AgentConfigOut(BaseModel):
|
||||
system_prompt: str
|
||||
enabled: bool
|
||||
is_active: bool
|
||||
selected_skill_ids: list[str]
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
@@ -2,6 +2,7 @@ from pydantic import BaseModel, EmailStr
|
||||
|
||||
|
||||
class UserCreate(BaseModel):
|
||||
username: str
|
||||
email: EmailStr
|
||||
password: str
|
||||
full_name: str | None = None
|
||||
@@ -9,6 +10,7 @@ class UserCreate(BaseModel):
|
||||
|
||||
class UserOut(BaseModel):
|
||||
id: str
|
||||
username: str
|
||||
email: str
|
||||
full_name: str | None
|
||||
is_active: bool
|
||||
|
||||
57
backend/app/schemas/brain.py
Normal file
57
backend/app/schemas/brain.py
Normal file
@@ -0,0 +1,57 @@
|
||||
from datetime import datetime
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class BrainOverviewOut(BaseModel):
|
||||
active_memory_count: int
|
||||
important_tag_count: int
|
||||
secondary_tag_count: int
|
||||
recent_memory_titles: list[str]
|
||||
|
||||
|
||||
class BrainMemoryOut(BaseModel):
|
||||
id: str
|
||||
memory_type: str
|
||||
title: str
|
||||
content: str
|
||||
importance: int
|
||||
confidence: float
|
||||
status: str
|
||||
created_at: datetime
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class BrainTagOut(BaseModel):
|
||||
id: str
|
||||
name: str
|
||||
category: str
|
||||
priority: str
|
||||
score: float
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class BrainEventOut(BaseModel):
|
||||
id: str
|
||||
source_type: str
|
||||
source_id: str
|
||||
event_type: str
|
||||
title: str | None
|
||||
content_summary: str | None
|
||||
status: str
|
||||
created_at: datetime
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class BrainTagGroupsOut(BaseModel):
|
||||
important: list[BrainTagOut]
|
||||
secondary: list[BrainTagOut]
|
||||
|
||||
|
||||
class BrainLearnRunOut(BaseModel):
|
||||
events_considered: int
|
||||
candidates_created: int
|
||||
memories_promoted: int
|
||||
@@ -12,6 +12,7 @@ class MessageOut(BaseModel):
|
||||
content: str
|
||||
model: str | None
|
||||
tokens_used: int | None
|
||||
attachments: list[dict] | None = None
|
||||
created_at: datetime
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
@@ -35,7 +36,8 @@ class ChatRequest(BaseModel):
|
||||
message: str
|
||||
conversation_id: str | None = None
|
||||
agent_id: str | None = None
|
||||
file_ids: list[str] = [] # 新增
|
||||
model_name: str | None = None
|
||||
file_ids: list[str] = []
|
||||
|
||||
|
||||
class ChatResponse(BaseModel):
|
||||
@@ -43,3 +45,4 @@ class ChatResponse(BaseModel):
|
||||
message_id: str
|
||||
content: str
|
||||
agent_name: str
|
||||
model_name: str | None = None
|
||||
|
||||
@@ -11,6 +11,13 @@ class DocumentOut(BaseModel):
|
||||
summary: str | None
|
||||
chunk_count: int
|
||||
is_indexed: bool
|
||||
ingestion_status: str
|
||||
ingestion_error: str | None
|
||||
indexed_at: datetime | None
|
||||
parser_version: str | None
|
||||
index_version: str | None
|
||||
normalized_format: str | None
|
||||
folder_id: str | None
|
||||
created_at: datetime
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
@@ -25,6 +32,10 @@ class DocumentChunkOut(BaseModel):
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class DocumentChunkUpdate(BaseModel):
|
||||
content: str
|
||||
|
||||
|
||||
class SearchRequest(BaseModel):
|
||||
query: str
|
||||
top_k: int = 5
|
||||
|
||||
35
backend/app/schemas/goal.py
Normal file
35
backend/app/schemas/goal.py
Normal file
@@ -0,0 +1,35 @@
|
||||
from datetime import date, datetime
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from app.models.goal import GoalStatus
|
||||
|
||||
|
||||
class GoalCreate(BaseModel):
|
||||
title: str
|
||||
goal_date: date
|
||||
note: str | None = None
|
||||
status: GoalStatus = GoalStatus.ACTIVE
|
||||
|
||||
|
||||
class GoalUpdate(BaseModel):
|
||||
title: str | None = None
|
||||
goal_date: date | None = None
|
||||
note: str | None = None
|
||||
status: GoalStatus | None = None
|
||||
|
||||
|
||||
class GoalOut(BaseModel):
|
||||
id: str
|
||||
title: str
|
||||
note: str | None
|
||||
goal_date: str
|
||||
status: GoalStatus
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class GoalListOut(BaseModel):
|
||||
items: list[GoalOut]
|
||||
40
backend/app/schemas/reminder.py
Normal file
40
backend/app/schemas/reminder.py
Normal file
@@ -0,0 +1,40 @@
|
||||
from datetime import date, datetime
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from app.models.reminder import ReminderStatus
|
||||
|
||||
|
||||
class ReminderCreate(BaseModel):
|
||||
title: str
|
||||
reminder_at: datetime
|
||||
note: str | None = None
|
||||
|
||||
|
||||
class ReminderUpdate(BaseModel):
|
||||
title: str | None = None
|
||||
reminder_at: datetime | None = None
|
||||
note: str | None = None
|
||||
status: ReminderStatus | None = None
|
||||
is_dismissed: bool | None = None
|
||||
|
||||
|
||||
class ReminderOut(BaseModel):
|
||||
id: str
|
||||
title: str
|
||||
note: str | None
|
||||
reminder_at: datetime
|
||||
status: ReminderStatus
|
||||
is_dismissed: bool
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class ReminderListOut(BaseModel):
|
||||
items: list[ReminderOut]
|
||||
|
||||
|
||||
class ReminderDateQuery(BaseModel):
|
||||
date: date
|
||||
33
backend/app/schemas/schedule_center.py
Normal file
33
backend/app/schemas/schedule_center.py
Normal file
@@ -0,0 +1,33 @@
|
||||
from datetime import datetime
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from app.schemas.goal import GoalOut
|
||||
from app.schemas.reminder import ReminderOut
|
||||
from app.schemas.task import TaskOut
|
||||
from app.schemas.todo import TodoOut
|
||||
|
||||
|
||||
class ScheduleCenterDaySummary(BaseModel):
|
||||
date: str
|
||||
todo_total: int
|
||||
todo_completed: int
|
||||
task_due_total: int
|
||||
high_priority_total: int
|
||||
reminder_total: int
|
||||
goal_total: int
|
||||
|
||||
|
||||
class ScheduleCenterMonthOut(BaseModel):
|
||||
month: str
|
||||
days: list[ScheduleCenterDaySummary]
|
||||
|
||||
|
||||
class ScheduleCenterDateOut(BaseModel):
|
||||
date: str
|
||||
todos: list[TodoOut]
|
||||
tasks: list[TaskOut]
|
||||
reminders: list[ReminderOut]
|
||||
goals: list[GoalOut]
|
||||
summary: ScheduleCenterDaySummary
|
||||
generated_at: datetime
|
||||
@@ -10,7 +10,8 @@ LLMType = Literal["chat", "vlm", "embedding", "rerank"]
|
||||
# 单个模型配置
|
||||
class LLMModelConfig(BaseModel):
|
||||
name: str = "" # 模型名称/别名,用于标识
|
||||
provider: LLMProviderType = "openai"
|
||||
# provider 已废弃为必填字段:优先通过 base_url + model 推断。
|
||||
provider: Optional[LLMProviderType] = None
|
||||
model: str = ""
|
||||
base_url: str = ""
|
||||
api_key: str = ""
|
||||
@@ -52,7 +53,8 @@ class SettingsOut(BaseModel):
|
||||
# 测试 LLM 连接请求
|
||||
class LLMTestIn(BaseModel):
|
||||
type: LLMType
|
||||
provider: LLMProviderType
|
||||
# provider 已废弃为必填字段:优先通过 base_url + model 推断。
|
||||
provider: Optional[LLMProviderType] = None
|
||||
model: str
|
||||
base_url: str
|
||||
api_key: str
|
||||
|
||||
50
backend/app/schemas/skill.py
Normal file
50
backend/app/schemas/skill.py
Normal file
@@ -0,0 +1,50 @@
|
||||
from datetime import datetime
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class SkillCreate(BaseModel):
|
||||
name: str
|
||||
description: Optional[str] = None
|
||||
instructions: str
|
||||
agent_type: str # master/schedule_planner/executor/librarian/analyst
|
||||
tools: list[str] = []
|
||||
required_context: list[str] = []
|
||||
output_format: Optional[str] = None
|
||||
visibility: str = "private"
|
||||
team_id: Optional[str] = None
|
||||
is_active: bool = True
|
||||
|
||||
|
||||
class SkillUpdate(BaseModel):
|
||||
# All fields Optional
|
||||
name: Optional[str] = None
|
||||
description: Optional[str] = None
|
||||
instructions: Optional[str] = None
|
||||
agent_type: Optional[str] = None
|
||||
tools: Optional[list[str]] = None
|
||||
required_context: Optional[list[str]] = None
|
||||
output_format: Optional[str] = None
|
||||
visibility: Optional[str] = None
|
||||
team_id: Optional[str] = None
|
||||
is_active: Optional[bool] = None
|
||||
|
||||
|
||||
class SkillOut(BaseModel):
|
||||
id: str
|
||||
name: str
|
||||
description: Optional[str]
|
||||
instructions: str
|
||||
agent_type: str
|
||||
tools: list[str]
|
||||
required_context: list[str]
|
||||
output_format: Optional[str]
|
||||
visibility: str
|
||||
is_builtin: bool
|
||||
team_id: Optional[str]
|
||||
is_active: bool
|
||||
owner_id: str
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
@@ -1,15 +1,19 @@
|
||||
from datetime import date, datetime
|
||||
|
||||
from pydantic import BaseModel
|
||||
from datetime import datetime
|
||||
|
||||
from app.models.todo import TodoSource
|
||||
|
||||
|
||||
class TodoCreate(BaseModel):
|
||||
title: str
|
||||
todo_date: date | None = None
|
||||
|
||||
|
||||
class TodoUpdate(BaseModel):
|
||||
title: str | None = None
|
||||
is_completed: bool | None = None
|
||||
todo_date: date | None = None
|
||||
|
||||
|
||||
class TodoOut(BaseModel):
|
||||
|
||||
183
backend/app/services/admin_bootstrap_service.py
Normal file
183
backend/app/services/admin_bootstrap_service.py
Normal file
@@ -0,0 +1,183 @@
|
||||
from sqlalchemy import or_, select
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.models.skill import Skill
|
||||
from app.models.user import User
|
||||
from app.services.auth_service import get_password_hash
|
||||
|
||||
|
||||
BUILTIN_SKILLS = [
|
||||
{
|
||||
'name': '今日重点拆解',
|
||||
'description': '帮助日程规划师从上下文中提炼今天最值得推进的事项。',
|
||||
'instructions': '优先识别今天最关键的 1-3 个重点,说明原因,并给出可执行顺序。',
|
||||
'agent_type': 'schedule_planner',
|
||||
'tools': ['calendar', 'tasks'],
|
||||
'visibility': 'market',
|
||||
},
|
||||
{
|
||||
'name': '周计划编排',
|
||||
'description': '把本周目标整理成可落地的节奏与时间块。',
|
||||
'instructions': '将目标拆成周内节奏安排,明确先后顺序、时间块与缓冲。',
|
||||
'agent_type': 'schedule_planner',
|
||||
'tools': ['calendar'],
|
||||
'visibility': 'market',
|
||||
},
|
||||
{
|
||||
'name': '时间冲突分析',
|
||||
'description': '识别任务、日程与优先级之间的冲突。',
|
||||
'instructions': '分析冲突来源、影响和推荐取舍,必要时给出替代方案。',
|
||||
'agent_type': 'schedule_planner',
|
||||
'tools': ['calendar', 'tasks'],
|
||||
'visibility': 'market',
|
||||
},
|
||||
{
|
||||
'name': '任务执行 SOP',
|
||||
'description': '为执行角色提供标准执行步骤和结果回报格式。',
|
||||
'instructions': '执行前先确认目标与边界,执行中记录关键动作,执行后输出结果、风险与下一步。',
|
||||
'agent_type': 'executor',
|
||||
'tools': ['shell', 'api_calls'],
|
||||
'visibility': 'market',
|
||||
},
|
||||
{
|
||||
'name': '外部交互推进',
|
||||
'description': '支持论坛、外部接口或内容发布类动作。',
|
||||
'instructions': '围绕外部交互任务,优先保证动作完整、结果清晰、反馈及时。',
|
||||
'agent_type': 'executor',
|
||||
'tools': ['api_calls', 'git'],
|
||||
'visibility': 'market',
|
||||
},
|
||||
{
|
||||
'name': '知识检索摘要',
|
||||
'description': '从知识中枢中提炼与当前问题最相关的信息。',
|
||||
'instructions': '检索后只保留当前决策需要的内容,输出摘要、来源与缺口。',
|
||||
'agent_type': 'librarian',
|
||||
'tools': ['web_search', 'database'],
|
||||
'visibility': 'market',
|
||||
},
|
||||
{
|
||||
'name': '图谱沉淀策略',
|
||||
'description': '帮助知识管理员把零散信息沉淀为结构化关系。',
|
||||
'instructions': '识别应沉淀的实体、关系与后续可检索维度。',
|
||||
'agent_type': 'librarian',
|
||||
'tools': ['database'],
|
||||
'visibility': 'market',
|
||||
},
|
||||
{
|
||||
'name': '风险识别模板',
|
||||
'description': '帮助分析师快速识别当前推进中的风险点。',
|
||||
'instructions': '从进度、依赖、资源与外部信号中提炼风险,并按严重度排序。',
|
||||
'agent_type': 'analyst',
|
||||
'tools': ['database', 'api_calls'],
|
||||
'visibility': 'market',
|
||||
},
|
||||
{
|
||||
'name': '趋势洞察模板',
|
||||
'description': '把多源状态汇总为趋势与判断。',
|
||||
'instructions': '对比近期变化,输出趋势、证据、判断与建议动作。',
|
||||
'agent_type': 'analyst',
|
||||
'tools': ['database', 'code_execution'],
|
||||
'visibility': 'market',
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def _is_bootstrap_enabled(settings) -> bool:
|
||||
return bool(settings.ADMIN.strip() and settings.ADMIN_EMAIL.strip() and settings.ADMIN_PASSWORD.strip())
|
||||
|
||||
|
||||
async def ensure_admin_user(db: AsyncSession, settings) -> None:
|
||||
if not _is_bootstrap_enabled(settings):
|
||||
return
|
||||
|
||||
result = await db.execute(
|
||||
select(User).where(
|
||||
or_(User.username == settings.ADMIN.strip(), User.email == settings.ADMIN_EMAIL.strip())
|
||||
)
|
||||
)
|
||||
existing_user = result.scalar_one_or_none()
|
||||
|
||||
if existing_user:
|
||||
if (
|
||||
existing_user.username == settings.ADMIN.strip()
|
||||
and existing_user.email == settings.ADMIN_EMAIL.strip()
|
||||
and existing_user.is_superuser
|
||||
):
|
||||
return
|
||||
raise RuntimeError('admin bootstrap identity conflict')
|
||||
|
||||
admin_user = User(
|
||||
username=settings.ADMIN.strip(),
|
||||
email=settings.ADMIN_EMAIL.strip(),
|
||||
hashed_password=get_password_hash(settings.ADMIN_PASSWORD),
|
||||
full_name=settings.ADMIN_FULL_NAME or None,
|
||||
is_active=True,
|
||||
is_superuser=True,
|
||||
)
|
||||
db.add(admin_user)
|
||||
try:
|
||||
await db.commit()
|
||||
except IntegrityError:
|
||||
await db.rollback()
|
||||
result = await db.execute(
|
||||
select(User).where(
|
||||
or_(User.username == settings.ADMIN.strip(), User.email == settings.ADMIN_EMAIL.strip())
|
||||
)
|
||||
)
|
||||
existing_user = result.scalar_one_or_none()
|
||||
if (
|
||||
existing_user
|
||||
and existing_user.username == settings.ADMIN.strip()
|
||||
and existing_user.email == settings.ADMIN_EMAIL.strip()
|
||||
and existing_user.is_superuser
|
||||
):
|
||||
return
|
||||
raise
|
||||
await db.refresh(admin_user)
|
||||
|
||||
|
||||
async def ensure_builtin_skills(db: AsyncSession, preferred_owner_id: str | None = None) -> None:
|
||||
owner = None
|
||||
if preferred_owner_id:
|
||||
owner_result = await db.execute(
|
||||
select(User).where(User.id == preferred_owner_id, User.is_active == True)
|
||||
)
|
||||
owner = owner_result.scalar_one_or_none()
|
||||
|
||||
if not owner:
|
||||
owner_result = await db.execute(
|
||||
select(User).where(User.is_active == True).order_by(User.is_superuser.desc(), User.created_at.asc())
|
||||
)
|
||||
owner = owner_result.scalars().first()
|
||||
|
||||
if not owner:
|
||||
return
|
||||
|
||||
existing_result = await db.execute(select(Skill.name))
|
||||
existing_names = set(existing_result.scalars().all())
|
||||
|
||||
missing_skills = [
|
||||
Skill(
|
||||
owner_id=owner.id,
|
||||
name=item['name'],
|
||||
description=item['description'],
|
||||
instructions=item['instructions'],
|
||||
agent_type=item['agent_type'],
|
||||
tools=item['tools'],
|
||||
required_context=[],
|
||||
output_format=None,
|
||||
visibility=item['visibility'],
|
||||
is_builtin=True,
|
||||
team_id=None,
|
||||
is_active=True,
|
||||
)
|
||||
for item in BUILTIN_SKILLS
|
||||
if item['name'] not in existing_names
|
||||
]
|
||||
|
||||
if not missing_skills:
|
||||
return
|
||||
|
||||
db.add_all(missing_skills)
|
||||
await db.commit()
|
||||
@@ -5,16 +5,119 @@ Jarvis Agent 服务层
|
||||
|
||||
import json
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import AsyncGenerator
|
||||
import logging
|
||||
from datetime import UTC, datetime
|
||||
from typing import Any, AsyncGenerator
|
||||
import asyncio
|
||||
from openai import BadRequestError
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select
|
||||
from langchain_core.messages import HumanMessage, AIMessage
|
||||
|
||||
from app.database import async_session
|
||||
from app.logging_utils import summarize_llm_config
|
||||
|
||||
from app.models.conversation import Conversation, Message
|
||||
from app.models.user import User
|
||||
from app.agents.graph import get_agent_graph
|
||||
from app.agents.context import set_current_user, clear_current_user
|
||||
from app.services import memory_service
|
||||
from app.services.brain_service import BrainService
|
||||
from app.services.llm_service import create_llm_from_config, resolve_provider_capabilities
|
||||
from app.agents.tools.time_reasoning import extract_reference_datetime
|
||||
from app.agents.state import initial_state
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _is_streaming_rejection_error(error: Exception, user_llm_config: dict | None) -> bool:
|
||||
capabilities = resolve_provider_capabilities(user_llm_config)
|
||||
error_text = str(error).lower()
|
||||
markers = [
|
||||
"invalid chat setting",
|
||||
"invalid params",
|
||||
"stream",
|
||||
"streaming",
|
||||
"unsupported",
|
||||
"bad_request_error",
|
||||
"http 400",
|
||||
"error code: 400",
|
||||
]
|
||||
|
||||
if isinstance(error, BadRequestError):
|
||||
return (
|
||||
getattr(capabilities, "provider", None) not in {"openai", "claude"}
|
||||
and any(marker in error_text for marker in markers)
|
||||
)
|
||||
|
||||
return any(marker in error_text for marker in markers)
|
||||
|
||||
|
||||
def _coerce_event_text(content: Any) -> str:
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
if isinstance(content, list):
|
||||
parts: list[str] = []
|
||||
for item in content:
|
||||
if isinstance(item, str):
|
||||
parts.append(item)
|
||||
elif isinstance(item, dict):
|
||||
text = item.get("text")
|
||||
if isinstance(text, str):
|
||||
parts.append(text)
|
||||
return "".join(parts)
|
||||
return str(content) if content else ""
|
||||
|
||||
|
||||
_CONTINUITY_STATE_VERSION = 1
|
||||
_CONTINUITY_SNAPSHOT_FIELDS = (
|
||||
"turn_context",
|
||||
"routing_decision",
|
||||
"continuity_state",
|
||||
"pending_action",
|
||||
"last_completed_action",
|
||||
"clarification_context",
|
||||
"tool_outcomes",
|
||||
"pending_tasks",
|
||||
"completed_tasks",
|
||||
"created_entities",
|
||||
"current_agent",
|
||||
"next_step",
|
||||
"agent_trace",
|
||||
)
|
||||
|
||||
|
||||
def _build_continuity_snapshot(state: dict[str, Any]) -> dict[str, Any] | None:
|
||||
snapshot = {
|
||||
field: state.get(field)
|
||||
for field in _CONTINUITY_SNAPSHOT_FIELDS
|
||||
if state.get(field) is not None
|
||||
}
|
||||
if not snapshot:
|
||||
return None
|
||||
return {
|
||||
"version": _CONTINUITY_STATE_VERSION,
|
||||
"state": snapshot,
|
||||
}
|
||||
|
||||
|
||||
def _extract_continuity_snapshot(payload: Any) -> dict[str, Any] | None:
|
||||
if isinstance(payload, list):
|
||||
for item in payload:
|
||||
snapshot = _extract_continuity_snapshot(item)
|
||||
if snapshot:
|
||||
return snapshot
|
||||
return None
|
||||
if not isinstance(payload, dict):
|
||||
return None
|
||||
if payload.get("kind") != "agent_continuity_state":
|
||||
return None
|
||||
if payload.get("version") != _CONTINUITY_STATE_VERSION:
|
||||
return None
|
||||
state = payload.get("state")
|
||||
if isinstance(state, dict):
|
||||
return state
|
||||
return None
|
||||
|
||||
|
||||
class AgentService:
|
||||
@@ -23,150 +126,147 @@ class AgentService:
|
||||
def __init__(self, db: AsyncSession):
|
||||
self.db = db
|
||||
|
||||
async def _try_auto_summarize_background(self, user_id: str, conversation_id: str) -> None:
|
||||
async with async_session() as session:
|
||||
await memory_service.try_auto_summarize(session, user_id, conversation_id)
|
||||
|
||||
def _build_progress_event(
|
||||
self,
|
||||
stage: str,
|
||||
label: str,
|
||||
*,
|
||||
agent: str | None = None,
|
||||
tool_name: str | None = None,
|
||||
step: str | None = None,
|
||||
steps: list[str] | None = None,
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"type": "progress",
|
||||
"stage": stage,
|
||||
"label": label,
|
||||
"agent": agent,
|
||||
"tool_name": tool_name,
|
||||
"step": step,
|
||||
"steps": steps or [],
|
||||
}
|
||||
|
||||
def _build_current_datetime_context(self) -> tuple[str, dict[str, str]]:
|
||||
now_utc = datetime.now(UTC)
|
||||
reference = {
|
||||
"current_time_iso": now_utc.isoformat(),
|
||||
"current_date_iso": now_utc.date().isoformat(),
|
||||
}
|
||||
context = (
|
||||
"【当前时间】\n"
|
||||
f"- current_time_utc: {reference['current_time_iso']}\n"
|
||||
f"- current_date_utc: {reference['current_date_iso']}\n"
|
||||
"说明:解析‘今天/明天/后天/本周/下周’等相对时间时,请以 current_time_utc 为准。"
|
||||
)
|
||||
return context, reference
|
||||
|
||||
async def _get_user_llm_config(self, user_id: str, model_name: str | None = None) -> dict | None:
|
||||
"""获取用户的 LLM 模型配置"""
|
||||
user = await self.db.get(User, user_id)
|
||||
if not user or not user.llm_config:
|
||||
return None
|
||||
|
||||
llm_config = user.llm_config
|
||||
|
||||
if model_name:
|
||||
models = llm_config.get("chat", [])
|
||||
for m in models:
|
||||
if m.get("name") == model_name:
|
||||
return m
|
||||
return None
|
||||
|
||||
chat_models = llm_config.get("chat", [])
|
||||
for m in chat_models:
|
||||
if m.get("enabled"):
|
||||
return m
|
||||
|
||||
return None
|
||||
|
||||
async def _load_continuity_snapshot(self, conversation: Conversation) -> dict[str, Any] | None:
|
||||
snapshot = _extract_continuity_snapshot(conversation.agent_state)
|
||||
if snapshot:
|
||||
return snapshot
|
||||
|
||||
result = await self.db.execute(
|
||||
select(Message)
|
||||
.where(Message.conversation_id == conversation.id, Message.role == "assistant")
|
||||
.order_by(Message.created_at.desc())
|
||||
)
|
||||
for message in result.scalars():
|
||||
snapshot = _extract_continuity_snapshot(message.attachments)
|
||||
if snapshot:
|
||||
return snapshot
|
||||
return None
|
||||
|
||||
async def _build_agent_state(
|
||||
self,
|
||||
*,
|
||||
user_id: str,
|
||||
conversation: Conversation,
|
||||
full_message: str,
|
||||
memory_context: str | None,
|
||||
current_datetime_context: str,
|
||||
current_datetime_reference: dict[str, str],
|
||||
user_llm_config: dict | None,
|
||||
) -> dict[str, Any]:
|
||||
state = initial_state(user_id, conversation.id)
|
||||
state.update({
|
||||
"messages": [HumanMessage(content=full_message)],
|
||||
"memory_context": memory_context,
|
||||
"current_datetime_context": current_datetime_context,
|
||||
"current_datetime_reference": current_datetime_reference,
|
||||
"user_llm_config": user_llm_config,
|
||||
})
|
||||
previous_snapshot = await self._load_continuity_snapshot(conversation)
|
||||
if previous_snapshot:
|
||||
state.update(previous_snapshot)
|
||||
state["messages"] = [HumanMessage(content=full_message)]
|
||||
return state
|
||||
|
||||
async def chat(
|
||||
self,
|
||||
user_id: str,
|
||||
message: str,
|
||||
conversation_id: str | None = None,
|
||||
) -> tuple[str, str, AsyncGenerator[str, None]]:
|
||||
file_ids: list[str] | None = None,
|
||||
model_name: str | None = None,
|
||||
) -> tuple[str, str, AsyncGenerator[dict[str, Any], None]]:
|
||||
"""
|
||||
处理对话请求(流式)
|
||||
|
||||
Returns:
|
||||
(conversation_id, message_id, response_stream)
|
||||
"""
|
||||
# 获取或创建对话
|
||||
if conversation_id:
|
||||
result = await self.db.execute(
|
||||
select(Conversation).where(Conversation.id == conversation_id)
|
||||
)
|
||||
conv = result.scalar_one_or_none()
|
||||
else:
|
||||
conv = None
|
||||
user_llm_config = await self._get_user_llm_config(user_id, model_name)
|
||||
model_name_used = model_name
|
||||
if model_name and not user_llm_config:
|
||||
raise ValueError("所选模型不可用于聊天,请切换到聊天模型")
|
||||
if user_llm_config:
|
||||
model_name_used = user_llm_config.get("name", model_name)
|
||||
|
||||
if not conv:
|
||||
conv = Conversation(user_id=user_id, title=message[:50])
|
||||
self.db.add(conv)
|
||||
await self.db.commit()
|
||||
await self.db.refresh(conv)
|
||||
conversation_id = conv.id
|
||||
else:
|
||||
conversation_id = conv.id
|
||||
|
||||
# 存储用户消息
|
||||
user_msg = Message(
|
||||
conversation_id=conversation_id,
|
||||
role="user",
|
||||
content=message,
|
||||
)
|
||||
self.db.add(user_msg)
|
||||
await self.db.commit()
|
||||
await self.db.refresh(user_msg)
|
||||
|
||||
# 预创建助手消息(后续更新内容)
|
||||
assistant_msg = Message(
|
||||
conversation_id=conversation_id,
|
||||
role="assistant",
|
||||
content="",
|
||||
model="jarvis",
|
||||
)
|
||||
self.db.add(assistant_msg)
|
||||
await self.db.commit()
|
||||
await self.db.refresh(assistant_msg)
|
||||
|
||||
# 加载记忆上下文
|
||||
memory_ctx = await memory_service.build_memory_context(
|
||||
self.db, user_id, conversation_id, message
|
||||
)
|
||||
|
||||
# 调用 LangGraph Agent
|
||||
async def run_agent():
|
||||
set_current_user(user_id)
|
||||
try:
|
||||
graph = get_agent_graph()
|
||||
langgraph_state = {
|
||||
"messages": [HumanMessage(content=message)], # type: ignore[arg-type]
|
||||
"user_id": user_id,
|
||||
"conversation_id": conversation_id,
|
||||
"current_agent": "master",
|
||||
"active_agents": ["master"],
|
||||
"pending_tasks": [],
|
||||
"completed_tasks": [],
|
||||
"tool_calls": [],
|
||||
"last_tool_result": None,
|
||||
"knowledge_context": None,
|
||||
"graph_context": None,
|
||||
"plan": None,
|
||||
"plan_steps": [],
|
||||
"analysis_report": None,
|
||||
"final_response": None,
|
||||
"should_respond": True,
|
||||
"memory_context": memory_ctx,
|
||||
logger.info(
|
||||
"agent_chat_started",
|
||||
extra={
|
||||
"details": {
|
||||
"mode": "stream",
|
||||
"requested_model_name": model_name,
|
||||
"resolved_model_name": model_name_used,
|
||||
"message_length": len(message or ""),
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
collected = ""
|
||||
async for event in graph.astream_events(langgraph_state, version="v2"):
|
||||
kind = event.get("event")
|
||||
if kind == "on_chat_model_end":
|
||||
content = event.get("data", {}).get("output", {})
|
||||
if isinstance(content, dict):
|
||||
content = content.get("content", "")
|
||||
if content:
|
||||
delta = content[len(collected):]
|
||||
if delta:
|
||||
collected += delta
|
||||
yield delta
|
||||
elif kind == "on_tool_end":
|
||||
name = event.get("name", "")
|
||||
yield f"\n[工具执行: {name}]\n"
|
||||
except Exception as e:
|
||||
yield f"\n执行出错: {str(e)}"
|
||||
finally:
|
||||
clear_current_user()
|
||||
# 异步触发自动摘要和记忆提取(不阻塞响应)
|
||||
import asyncio
|
||||
try:
|
||||
loop = asyncio.get_running_loop()
|
||||
loop.create_task(
|
||||
memory_service.try_auto_summarize(self.db, user_id, conversation_id)
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 最终更新数据库中的消息内容
|
||||
if collected:
|
||||
try:
|
||||
result2 = await self.db.execute(
|
||||
select(Message).where(Message.id == assistant_msg.id)
|
||||
)
|
||||
msg = result2.scalar_one_or_none()
|
||||
if msg:
|
||||
msg.content = collected
|
||||
await self.db.commit()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return conversation_id, assistant_msg.id, run_agent()
|
||||
|
||||
async def chat_simple(
|
||||
self,
|
||||
user_id: str,
|
||||
message: str,
|
||||
conversation_id: str | None = None,
|
||||
file_ids: list[str] | None = None,
|
||||
) -> tuple[str, str, str]:
|
||||
"""
|
||||
简单同步版对话(无流式)
|
||||
|
||||
Returns:
|
||||
(conversation_id, message_id, response_content)
|
||||
"""
|
||||
# 获取或创建对话
|
||||
if conversation_id:
|
||||
result = await self.db.execute(
|
||||
select(Conversation).where(Conversation.id == conversation_id)
|
||||
select(Conversation).where(
|
||||
Conversation.id == conversation_id,
|
||||
Conversation.user_id == user_id,
|
||||
)
|
||||
)
|
||||
conv = result.scalar_one_or_none()
|
||||
if conv is None:
|
||||
raise ValueError("会话不存在或无权访问")
|
||||
else:
|
||||
conv = None
|
||||
|
||||
@@ -179,7 +279,6 @@ class AgentService:
|
||||
else:
|
||||
conversation_id = conv.id
|
||||
|
||||
# 如果有文件,读取内容作为上下文
|
||||
file_context = ""
|
||||
if file_ids:
|
||||
from app.services.document_service import DocumentService
|
||||
@@ -189,10 +288,8 @@ class AgentService:
|
||||
if content:
|
||||
file_context += f"\n\n[用户上传文件内容]\n{content}\n[/文件内容]"
|
||||
|
||||
# 将文件上下文添加到消息
|
||||
full_message = f"{message}\n{file_context}" if file_context else message
|
||||
|
||||
# 存储用户消息
|
||||
user_msg = Message(
|
||||
conversation_id=conversation_id,
|
||||
role="user",
|
||||
@@ -203,59 +300,293 @@ class AgentService:
|
||||
await self.db.commit()
|
||||
await self.db.refresh(user_msg)
|
||||
|
||||
# 加载记忆上下文
|
||||
brain_service = BrainService(self.db)
|
||||
await brain_service.create_event(
|
||||
user_id,
|
||||
source_type="conversation",
|
||||
source_id=conversation_id,
|
||||
event_type="message_created",
|
||||
title="User message",
|
||||
content_summary=message[:500],
|
||||
raw_excerpt=message[:2000],
|
||||
metadata_={"role": "user"},
|
||||
importance_signal=1.0,
|
||||
)
|
||||
await self.db.commit()
|
||||
|
||||
memory_ctx = await memory_service.build_memory_context(
|
||||
self.db, user_id, conversation_id, message
|
||||
)
|
||||
|
||||
# 调用 LangGraph Agent
|
||||
set_current_user(user_id)
|
||||
graph = get_agent_graph()
|
||||
langgraph_state = {
|
||||
"messages": [HumanMessage(content=full_message)], # type: ignore[arg-type]
|
||||
"user_id": user_id,
|
||||
"conversation_id": conversation_id,
|
||||
"current_agent": "master",
|
||||
"active_agents": ["master"],
|
||||
"pending_tasks": [],
|
||||
"completed_tasks": [],
|
||||
"tool_calls": [],
|
||||
"last_tool_result": None,
|
||||
"knowledge_context": None,
|
||||
"graph_context": None,
|
||||
"plan": None,
|
||||
"plan_steps": [],
|
||||
"analysis_report": None,
|
||||
"final_response": None,
|
||||
"should_respond": True,
|
||||
"memory_context": memory_ctx,
|
||||
}
|
||||
|
||||
try:
|
||||
result_state = await graph.ainvoke(langgraph_state)
|
||||
response_content = result_state.get("final_response", "抱歉,我无法处理这个请求。")
|
||||
except Exception as e:
|
||||
response_content = f"抱歉,发生错误: {str(e)}"
|
||||
finally:
|
||||
clear_current_user()
|
||||
# 异步触发自动摘要
|
||||
import asyncio
|
||||
try:
|
||||
asyncio.get_running_loop().create_task(
|
||||
memory_service.try_auto_summarize(self.db, user_id, conversation_id)
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 保存助手消息
|
||||
assistant_msg = Message(
|
||||
conversation_id=conversation_id,
|
||||
role="assistant",
|
||||
content=response_content,
|
||||
model="jarvis",
|
||||
content="",
|
||||
model=model_name_used or "jarvis",
|
||||
attachments=None,
|
||||
)
|
||||
self.db.add(assistant_msg)
|
||||
await self.db.commit()
|
||||
await self.db.refresh(assistant_msg)
|
||||
|
||||
return conversation_id, assistant_msg.id, response_content
|
||||
def _build_assistant_event_payload(content: str) -> dict[str, Any]:
|
||||
return {
|
||||
"source_type": "conversation",
|
||||
"source_id": conversation_id,
|
||||
"event_type": "message_created",
|
||||
"title": "Assistant message",
|
||||
"content_summary": content[:500],
|
||||
"raw_excerpt": content[:2000],
|
||||
"metadata_": {"role": "assistant"},
|
||||
"importance_signal": 0.8,
|
||||
}
|
||||
|
||||
async def run_agent():
|
||||
collected = ""
|
||||
state: dict[str, Any] | None = None
|
||||
set_current_user(user_id)
|
||||
try:
|
||||
graph = get_agent_graph()
|
||||
current_datetime_context, current_datetime_reference = self._build_current_datetime_context()
|
||||
|
||||
state = await self._build_agent_state(
|
||||
user_id=user_id,
|
||||
conversation=conv,
|
||||
full_message=full_message,
|
||||
memory_context=memory_ctx,
|
||||
current_datetime_context=current_datetime_context,
|
||||
current_datetime_reference=current_datetime_reference,
|
||||
user_llm_config=user_llm_config,
|
||||
)
|
||||
|
||||
yield self._build_progress_event("thinking", "Jarvis 正在分析请求", agent="master", step="理解你的问题")
|
||||
|
||||
try:
|
||||
async for event in graph.astream_events(state, version="v2"):
|
||||
kind = event.get("event")
|
||||
event_name = event.get("name", "")
|
||||
metadata = event.get("metadata", {})
|
||||
data = event.get("data", {})
|
||||
|
||||
if kind == "on_chain_start" and event_name in {"master", "schedule_planner", "executor", "librarian", "analyst"}:
|
||||
stage_map = {
|
||||
"master": ("thinking", "Jarvis 正在理解请求"),
|
||||
"schedule_planner": ("planning", "Jarvis 正在编排日程"),
|
||||
"executor": ("tool", "Jarvis 正在执行操作"),
|
||||
"librarian": ("tool", "Jarvis 正在检索知识"),
|
||||
"analyst": ("thinking", "Jarvis 正在分析信息"),
|
||||
}
|
||||
stage, label = stage_map.get(event_name, ("thinking", "Jarvis 正在思考"))
|
||||
yield self._build_progress_event(stage, label, agent=event_name, step=label)
|
||||
|
||||
elif kind == "on_tool_start":
|
||||
yield self._build_progress_event(
|
||||
"tool",
|
||||
f"Jarvis 正在调用工具 {event_name}",
|
||||
agent="executor",
|
||||
tool_name=event_name,
|
||||
step=f"正在执行 {event_name}",
|
||||
)
|
||||
|
||||
elif kind == "on_tool_end":
|
||||
tool_result = data.get("output")
|
||||
step = f"已完成 {event_name}"
|
||||
if isinstance(tool_result, str) and len(tool_result) > 0:
|
||||
step = tool_result[:100]
|
||||
yield self._build_progress_event(
|
||||
"tool",
|
||||
f"工具 {event_name} 已完成",
|
||||
agent="executor",
|
||||
tool_name=event_name,
|
||||
step=step,
|
||||
)
|
||||
|
||||
elif kind == "on_chat_model_stream":
|
||||
chunk = data.get("chunk")
|
||||
content = _coerce_event_text(getattr(chunk, "content", "") if chunk else "")
|
||||
if content:
|
||||
collected += content
|
||||
yield {"type": "chunk", "content": content}
|
||||
|
||||
elif kind == "on_chain_end":
|
||||
output = data.get("output")
|
||||
final_resp = None
|
||||
if isinstance(output, dict):
|
||||
state.update(output)
|
||||
final_resp = output.get("final_response")
|
||||
if final_resp:
|
||||
final_text = str(final_resp)
|
||||
if final_text != collected:
|
||||
collected = final_text
|
||||
yield {"type": "chunk", "content": final_text}
|
||||
|
||||
elif kind == "on_chat_model_end":
|
||||
output = data.get("output")
|
||||
final_content = _coerce_event_text(getattr(output, "content", "") if output else "")
|
||||
if final_content:
|
||||
final_text = final_content
|
||||
if final_text != collected:
|
||||
collected = final_text
|
||||
yield {"type": "chunk", "content": final_text}
|
||||
|
||||
except Exception as e:
|
||||
if _is_streaming_rejection_error(e, user_llm_config) and not collected:
|
||||
yield self._build_progress_event("responding", "Jarvis 正在生成回复", agent="master", step="fallback")
|
||||
try:
|
||||
result_state = await graph.ainvoke(state)
|
||||
if isinstance(result_state, dict):
|
||||
state.update(result_state)
|
||||
fallback_content = result_state.get("final_response") or str(result_state.get("messages", [AIMessage(content="")])[-1].content)
|
||||
collected = str(fallback_content)
|
||||
yield {"type": "chunk", "content": collected}
|
||||
except Exception:
|
||||
logger.exception("llm_sync_fallback_failed")
|
||||
safe_error = "模型服务暂不可用,请稍后再试。"
|
||||
yield {"type": "error", "error": safe_error}
|
||||
collected = f"抱歉,发生错误: {safe_error}"
|
||||
yield {"type": "chunk", "content": collected}
|
||||
else:
|
||||
logger.exception("agent_streaming_failed")
|
||||
if not collected:
|
||||
safe_error = "模型服务暂不可用,请稍后再试。"
|
||||
yield {"type": "error", "error": safe_error}
|
||||
collected = f"抱歉,发生错误: {safe_error}"
|
||||
yield {"type": "chunk", "content": collected}
|
||||
else:
|
||||
yield {"type": "error", "error": str(e)}
|
||||
finally:
|
||||
clear_current_user()
|
||||
try:
|
||||
if collected:
|
||||
assistant_msg.content = collected
|
||||
continuity_snapshot = _build_continuity_snapshot(state or {})
|
||||
assistant_msg.attachments = ([{
|
||||
"kind": "agent_continuity_state",
|
||||
**continuity_snapshot,
|
||||
}] if continuity_snapshot else None)
|
||||
conv.agent_state = continuity_snapshot
|
||||
await BrainService(self.db).create_event(
|
||||
user_id,
|
||||
**_build_assistant_event_payload(collected),
|
||||
)
|
||||
await self.db.commit()
|
||||
await self.db.refresh(assistant_msg)
|
||||
except Exception:
|
||||
logger.exception("save_assistant_message_failed")
|
||||
asyncio.create_task(self._try_auto_summarize_background(user_id, conversation_id))
|
||||
|
||||
return conversation_id, assistant_msg.id, run_agent()
|
||||
|
||||
async def chat_simple(
|
||||
self,
|
||||
user_id: str,
|
||||
message: str,
|
||||
conversation_id: str | None = None,
|
||||
file_ids: list[str] | None = None,
|
||||
model_name: str | None = None,
|
||||
) -> tuple[str, str, str, str | None]:
|
||||
"""
|
||||
简单同步版对话
|
||||
"""
|
||||
user_llm_config = await self._get_user_llm_config(user_id, model_name)
|
||||
model_name_used = model_name
|
||||
if model_name and not user_llm_config:
|
||||
raise ValueError("所选模型不可用于聊天,请切换到聊天模型")
|
||||
if user_llm_config:
|
||||
model_name_used = user_llm_config.get("name", model_name)
|
||||
|
||||
if conversation_id:
|
||||
result = await self.db.execute(
|
||||
select(Conversation).where(
|
||||
Conversation.id == conversation_id,
|
||||
Conversation.user_id == user_id,
|
||||
)
|
||||
)
|
||||
conv = result.scalar_one_or_none()
|
||||
if conv is None:
|
||||
raise ValueError("会话不存在或无权访问")
|
||||
else:
|
||||
conv = None
|
||||
|
||||
if not conv:
|
||||
conv = Conversation(user_id=user_id, title=message[:50])
|
||||
self.db.add(conv)
|
||||
await self.db.commit()
|
||||
await self.db.refresh(conv)
|
||||
conversation_id = conv.id
|
||||
else:
|
||||
conversation_id = conv.id
|
||||
|
||||
user_msg = Message(conversation_id=conversation_id, role="user", content=message)
|
||||
self.db.add(user_msg)
|
||||
|
||||
assistant_msg = Message(
|
||||
conversation_id=conversation_id,
|
||||
role="assistant",
|
||||
content="",
|
||||
model=model_name_used or "jarvis",
|
||||
attachments=None,
|
||||
)
|
||||
self.db.add(assistant_msg)
|
||||
|
||||
brain_service = BrainService(self.db)
|
||||
await brain_service.create_event(
|
||||
user_id,
|
||||
source_type="conversation",
|
||||
source_id=conversation_id,
|
||||
event_type="message_created",
|
||||
title="User message",
|
||||
content_summary=message[:500],
|
||||
raw_excerpt=message[:2000],
|
||||
metadata_={"role": "user"},
|
||||
importance_signal=1.0,
|
||||
)
|
||||
|
||||
memory_ctx = await memory_service.build_memory_context(self.db, user_id, conversation_id, message)
|
||||
|
||||
set_current_user(user_id)
|
||||
try:
|
||||
graph = get_agent_graph()
|
||||
current_datetime_context, current_datetime_reference = self._build_current_datetime_context()
|
||||
state = await self._build_agent_state(
|
||||
user_id=user_id,
|
||||
conversation=conv,
|
||||
full_message=message,
|
||||
memory_context=memory_ctx,
|
||||
current_datetime_context=current_datetime_context,
|
||||
current_datetime_reference=current_datetime_reference,
|
||||
user_llm_config=user_llm_config,
|
||||
)
|
||||
|
||||
result_state = await graph.ainvoke(state)
|
||||
response_content = result_state.get("final_response") or str(result_state.get("messages", [AIMessage(content="")])[-1].content)
|
||||
except Exception as e:
|
||||
logger.exception("agent_chat_simple_failed")
|
||||
response_content = "抱歉,发生错误。"
|
||||
finally:
|
||||
clear_current_user()
|
||||
|
||||
brain_service = BrainService(self.db)
|
||||
await brain_service.create_event(
|
||||
user_id,
|
||||
source_type="conversation",
|
||||
source_id=conversation_id,
|
||||
event_type="message_created",
|
||||
title="Assistant message",
|
||||
content_summary=response_content[:500],
|
||||
raw_excerpt=response_content[:2000],
|
||||
metadata_={"role": "assistant"},
|
||||
importance_signal=0.8,
|
||||
)
|
||||
|
||||
assistant_msg.content = response_content
|
||||
continuity_snapshot = _build_continuity_snapshot(result_state) if 'result_state' in locals() else None
|
||||
assistant_msg.attachments = ([{
|
||||
"kind": "agent_continuity_state",
|
||||
**continuity_snapshot,
|
||||
}] if continuity_snapshot else None)
|
||||
conv.agent_state = continuity_snapshot
|
||||
await self.db.commit()
|
||||
await self.db.refresh(assistant_msg)
|
||||
|
||||
return conversation_id, assistant_msg.id, response_content, model_name_used
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from datetime import datetime, timedelta
|
||||
from datetime import UTC, datetime, timedelta
|
||||
from passlib.context import CryptContext
|
||||
from jose import jwt, JWTError
|
||||
from app.config import settings
|
||||
@@ -16,7 +16,7 @@ def get_password_hash(password: str) -> str:
|
||||
|
||||
def create_access_token(data: dict, expires_delta: timedelta | None = None) -> str:
|
||||
to_encode = data.copy()
|
||||
expire = datetime.utcnow() + (expires_delta or timedelta(minutes=settings.ACCESS_TOKEN_EXPIRE_MINUTES))
|
||||
expire = datetime.now(UTC) + (expires_delta or timedelta(minutes=settings.ACCESS_TOKEN_EXPIRE_MINUTES))
|
||||
to_encode.update({"exp": expire})
|
||||
return jwt.encode(to_encode, settings.SECRET_KEY, algorithm=settings.ALGORITHM)
|
||||
|
||||
|
||||
204
backend/app/services/brain_service.py
Normal file
204
backend/app/services/brain_service.py
Normal file
@@ -0,0 +1,204 @@
|
||||
from sqlalchemy import func, or_, select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.models.brain import BrainCandidate, BrainEvent, BrainMemory, BrainTag
|
||||
from app.services.graph_service import GraphService
|
||||
|
||||
|
||||
class BrainService:
|
||||
def __init__(self, db: AsyncSession):
|
||||
self.db = db
|
||||
|
||||
async def create_event(
|
||||
self,
|
||||
user_id: str,
|
||||
*,
|
||||
source_type: str,
|
||||
source_id: str,
|
||||
event_type: str,
|
||||
title: str | None = None,
|
||||
content_summary: str | None = None,
|
||||
raw_excerpt: str | None = None,
|
||||
metadata_: dict | None = None,
|
||||
importance_signal: float = 0.0,
|
||||
) -> BrainEvent:
|
||||
event = BrainEvent(
|
||||
user_id=user_id,
|
||||
source_type=source_type,
|
||||
source_id=source_id,
|
||||
event_type=event_type,
|
||||
title=title,
|
||||
content_summary=content_summary,
|
||||
raw_excerpt=raw_excerpt,
|
||||
metadata_=metadata_,
|
||||
importance_signal=importance_signal,
|
||||
status="pending",
|
||||
)
|
||||
self.db.add(event)
|
||||
await self.db.flush()
|
||||
return event
|
||||
|
||||
async def recall_memories(self, user_id: str, current_query: str, top_k: int = 3) -> list[BrainMemory]:
|
||||
query_tokens = [token.strip().lower() for token in current_query.split() if token.strip()]
|
||||
statement = select(BrainMemory).where(
|
||||
BrainMemory.user_id == user_id,
|
||||
BrainMemory.status == "active",
|
||||
)
|
||||
if query_tokens:
|
||||
statement = statement.where(
|
||||
or_(
|
||||
*[
|
||||
or_(
|
||||
BrainMemory.title.ilike(f"%{token}%"),
|
||||
BrainMemory.content.ilike(f"%{token}%"),
|
||||
)
|
||||
for token in query_tokens
|
||||
]
|
||||
)
|
||||
)
|
||||
result = await self.db.execute(
|
||||
statement.order_by(BrainMemory.importance.desc(), BrainMemory.created_at.desc()).limit(top_k)
|
||||
)
|
||||
memories = list(result.scalars().all())
|
||||
if memories or query_tokens:
|
||||
return memories
|
||||
|
||||
fallback_result = await self.db.execute(
|
||||
select(BrainMemory)
|
||||
.where(BrainMemory.user_id == user_id, BrainMemory.status == "active")
|
||||
.order_by(BrainMemory.importance.desc(), BrainMemory.created_at.desc())
|
||||
.limit(top_k)
|
||||
)
|
||||
return list(fallback_result.scalars().all())
|
||||
|
||||
async def get_overview(self, user_id: str) -> dict:
|
||||
active_memory_count = (
|
||||
await self.db.execute(
|
||||
select(func.count()).select_from(BrainMemory).where(
|
||||
BrainMemory.user_id == user_id,
|
||||
BrainMemory.status == "active",
|
||||
)
|
||||
)
|
||||
).scalar() or 0
|
||||
|
||||
important_tag_count = (
|
||||
await self.db.execute(
|
||||
select(func.count()).select_from(BrainTag).where(
|
||||
BrainTag.user_id == user_id,
|
||||
BrainTag.priority == "important",
|
||||
)
|
||||
)
|
||||
).scalar() or 0
|
||||
|
||||
secondary_tag_count = (
|
||||
await self.db.execute(
|
||||
select(func.count()).select_from(BrainTag).where(
|
||||
BrainTag.user_id == user_id,
|
||||
BrainTag.priority == "secondary",
|
||||
)
|
||||
)
|
||||
).scalar() or 0
|
||||
|
||||
recent_memory_result = await self.db.execute(
|
||||
select(BrainMemory.title)
|
||||
.where(BrainMemory.user_id == user_id, BrainMemory.status == "active")
|
||||
.order_by(BrainMemory.importance.desc(), BrainMemory.created_at.desc())
|
||||
.limit(5)
|
||||
)
|
||||
recent_memory_titles = list(recent_memory_result.scalars().all())
|
||||
|
||||
return {
|
||||
"active_memory_count": active_memory_count,
|
||||
"important_tag_count": important_tag_count,
|
||||
"secondary_tag_count": secondary_tag_count,
|
||||
"recent_memory_titles": recent_memory_titles,
|
||||
}
|
||||
|
||||
async def list_memories(self, user_id: str) -> list[BrainMemory]:
|
||||
result = await self.db.execute(
|
||||
select(BrainMemory)
|
||||
.where(BrainMemory.user_id == user_id, BrainMemory.status == "active")
|
||||
.order_by(BrainMemory.importance.desc(), BrainMemory.created_at.desc())
|
||||
)
|
||||
return list(result.scalars().all())
|
||||
|
||||
async def list_tags(self, user_id: str) -> dict:
|
||||
important_result = await self.db.execute(
|
||||
select(BrainTag)
|
||||
.where(BrainTag.user_id == user_id, BrainTag.priority == "important")
|
||||
.order_by(BrainTag.score.desc(), BrainTag.created_at.desc())
|
||||
)
|
||||
secondary_result = await self.db.execute(
|
||||
select(BrainTag)
|
||||
.where(BrainTag.user_id == user_id, BrainTag.priority == "secondary")
|
||||
.order_by(BrainTag.score.desc(), BrainTag.created_at.desc())
|
||||
)
|
||||
return {
|
||||
"important": list(important_result.scalars().all()),
|
||||
"secondary": list(secondary_result.scalars().all()),
|
||||
}
|
||||
|
||||
async def list_events(self, user_id: str) -> list[BrainEvent]:
|
||||
result = await self.db.execute(
|
||||
select(BrainEvent)
|
||||
.where(BrainEvent.user_id == user_id)
|
||||
.order_by(BrainEvent.created_at.desc())
|
||||
)
|
||||
return list(result.scalars().all())
|
||||
|
||||
async def run_learning(self, user_id: str) -> dict:
|
||||
pending_events_result = await self.db.execute(
|
||||
select(BrainEvent)
|
||||
.where(BrainEvent.user_id == user_id, BrainEvent.status == "pending")
|
||||
.order_by(BrainEvent.created_at.asc())
|
||||
)
|
||||
pending_events = list(pending_events_result.scalars().all())
|
||||
pending_count = len(pending_events)
|
||||
|
||||
candidates_created = 0
|
||||
memories_promoted = 0
|
||||
|
||||
if pending_events:
|
||||
candidate = BrainCandidate(
|
||||
user_id=user_id,
|
||||
candidate_type="daily_learning",
|
||||
title="Daily learning synthesis",
|
||||
summary=f"Processed {pending_count} pending brain events.",
|
||||
importance_score=float(pending_count),
|
||||
confidence_score=1.0,
|
||||
status="promoted",
|
||||
source_event_ids=[event.id for event in pending_events],
|
||||
)
|
||||
self.db.add(candidate)
|
||||
await self.db.flush()
|
||||
candidates_created = 1
|
||||
|
||||
memory = BrainMemory(
|
||||
user_id=user_id,
|
||||
memory_type="daily_learning",
|
||||
title="Daily learning synthesis",
|
||||
content=f"Processed {pending_count} pending brain events.",
|
||||
importance=max(pending_count, 1),
|
||||
confidence=1.0,
|
||||
status="active",
|
||||
origin_candidate_id=candidate.id,
|
||||
origin_source_types=sorted({event.source_type for event in pending_events}),
|
||||
)
|
||||
self.db.add(memory)
|
||||
memories_promoted = 1
|
||||
|
||||
for event in pending_events:
|
||||
event.status = "processed"
|
||||
event.processed_at = memory.created_at
|
||||
|
||||
await self.db.commit()
|
||||
else:
|
||||
await self.db.commit()
|
||||
|
||||
await GraphService(self.db).build_graph(user_id)
|
||||
|
||||
return {
|
||||
"events_considered": pending_count,
|
||||
"candidates_created": candidates_created,
|
||||
"memories_promoted": memories_promoted,
|
||||
}
|
||||
@@ -3,18 +3,43 @@
|
||||
支持多种文档格式 + LlamaIndex 智能分块
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
import tempfile
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select
|
||||
from fastapi import UploadFile
|
||||
from app.models.document import Document, DocumentChunk
|
||||
from app.models.folder import Folder
|
||||
from app.config import settings
|
||||
from app.services.brain_service import BrainService
|
||||
import csv
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import aiofiles
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
|
||||
ALLOWED_EXTENSIONS = {".pdf", ".md", ".txt", ".docx", ".doc"}
|
||||
ALLOWED_EXTENSIONS = {".pdf", ".md", ".txt", ".docx", ".doc", ".csv", ".xlsx"}
|
||||
PARSER_VERSION = "v2"
|
||||
INDEX_VERSION = "v2"
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParsedNode:
|
||||
node_type: str
|
||||
text: str
|
||||
metadata: dict = field(default_factory=dict)
|
||||
section_path: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParsedDocument:
|
||||
summary: str
|
||||
nodes: list[ParsedNode]
|
||||
structured_markdown: str = ""
|
||||
|
||||
|
||||
class DocumentService:
|
||||
@@ -39,7 +64,8 @@ class DocumentService:
|
||||
async with aiofiles.open(file_path, "wb") as f:
|
||||
await f.write(content)
|
||||
|
||||
text_content = await self._extract_text(file_path, ext)
|
||||
parsed = await self._parse_document(file_path, ext)
|
||||
parsed.structured_markdown = self._render_structured_markdown(parsed)
|
||||
|
||||
doc = Document(
|
||||
user_id=user_id,
|
||||
@@ -48,26 +74,85 @@ class DocumentService:
|
||||
file_type=ext[1:],
|
||||
file_size=file_size,
|
||||
file_path=file_path,
|
||||
summary=text_content[:500] if len(text_content) > 500 else text_content,
|
||||
summary=parsed.summary[:500] if len(parsed.summary) > 500 else parsed.summary,
|
||||
folder_id=folder_id,
|
||||
ingestion_status="uploaded",
|
||||
ingestion_error=None,
|
||||
parser_version=PARSER_VERSION,
|
||||
index_version=INDEX_VERSION,
|
||||
normalized_content=parsed.structured_markdown,
|
||||
normalized_format="structured_markdown",
|
||||
)
|
||||
self.db.add(doc)
|
||||
await self.db.commit()
|
||||
await self.db.refresh(doc)
|
||||
await self.db.flush()
|
||||
|
||||
chunks = self._chunk_text(text_content)
|
||||
for i, chunk_text in enumerate(chunks):
|
||||
chunks = self._build_chunks(parsed)
|
||||
for i, chunk_data in enumerate(chunks):
|
||||
chunk = DocumentChunk(
|
||||
document_id=doc.id,
|
||||
chunk_index=i,
|
||||
content=chunk_text,
|
||||
content=chunk_data["content"],
|
||||
metadata_=json.dumps(chunk_data["metadata"], ensure_ascii=False),
|
||||
)
|
||||
self.db.add(chunk)
|
||||
doc.chunk_count = len(chunks)
|
||||
brain_service = BrainService(self.db)
|
||||
await brain_service.create_event(
|
||||
user_id,
|
||||
source_type="document",
|
||||
source_id=doc.id,
|
||||
event_type="document_uploaded",
|
||||
title=doc.filename,
|
||||
content_summary=doc.summary,
|
||||
raw_excerpt=(doc.normalized_content or "")[:1000] or None,
|
||||
metadata_={
|
||||
"document_id": doc.id,
|
||||
"file_type": doc.file_type,
|
||||
"ingestion_status": doc.ingestion_status,
|
||||
},
|
||||
importance_signal=1.0,
|
||||
)
|
||||
await self.db.commit()
|
||||
await self.db.refresh(doc)
|
||||
|
||||
return doc
|
||||
|
||||
async def rebuild_document(self, document: Document) -> Document:
|
||||
ext = os.path.splitext(document.filename)[1].lower()
|
||||
parsed = await self._parse_document(document.file_path, ext)
|
||||
parsed.structured_markdown = self._render_structured_markdown(parsed)
|
||||
|
||||
chunk_result = await self.db.execute(
|
||||
select(DocumentChunk)
|
||||
.where(DocumentChunk.document_id == document.id)
|
||||
.order_by(DocumentChunk.chunk_index)
|
||||
)
|
||||
existing_chunks = list(chunk_result.scalars().all())
|
||||
for chunk in existing_chunks:
|
||||
await self.db.delete(chunk)
|
||||
await self.db.flush()
|
||||
|
||||
chunks = self._build_chunks(parsed)
|
||||
for i, chunk_data in enumerate(chunks):
|
||||
self.db.add(DocumentChunk(
|
||||
document_id=document.id,
|
||||
chunk_index=i,
|
||||
content=chunk_data["content"],
|
||||
metadata_=json.dumps(chunk_data["metadata"], ensure_ascii=False),
|
||||
))
|
||||
|
||||
document.summary = parsed.summary[:500] if len(parsed.summary) > 500 else parsed.summary
|
||||
document.chunk_count = len(chunks)
|
||||
document.ingestion_status = "indexing"
|
||||
document.ingestion_error = None
|
||||
document.parser_version = PARSER_VERSION
|
||||
document.index_version = INDEX_VERSION
|
||||
document.normalized_content = parsed.structured_markdown
|
||||
document.normalized_format = "structured_markdown"
|
||||
await self.db.commit()
|
||||
await self.db.refresh(document)
|
||||
return document
|
||||
|
||||
async def _get_folder_path(self, folder_id: str) -> str | None:
|
||||
"""获取文件夹的完整路径"""
|
||||
folders = await self.db.execute(
|
||||
@@ -104,112 +189,348 @@ class DocumentService:
|
||||
await self.db.commit()
|
||||
|
||||
async def _extract_text(self, file_path: str, ext: str) -> str:
|
||||
if ext == ".pdf":
|
||||
try:
|
||||
import pymupdf
|
||||
doc = pymupdf.open(file_path)
|
||||
text = "".join(page.get_text() for page in doc)
|
||||
doc.close()
|
||||
return text
|
||||
except ImportError:
|
||||
return "[PDF 内容需要安装 pymupdf: uv pip install pymupdf]"
|
||||
|
||||
elif ext in (".md", ".txt"):
|
||||
if ext in (".md", ".txt"):
|
||||
async with aiofiles.open(file_path, "r", encoding="utf-8") as f:
|
||||
return await f.read()
|
||||
|
||||
elif ext in (".docx", ".doc"):
|
||||
if ext in (".docx", ".doc"):
|
||||
try:
|
||||
from docx import Document as DocxDocument
|
||||
doc = DocxDocument(file_path)
|
||||
return "\n".join([p.text for p in doc.paragraphs])
|
||||
parts = [p.text for p in doc.paragraphs if p.text.strip()]
|
||||
for table in doc.tables:
|
||||
for row in table.rows:
|
||||
row_values = [cell.text.strip() for cell in row.cells]
|
||||
if any(row_values):
|
||||
parts.append(" | ".join(row_values))
|
||||
return "\n".join(parts)
|
||||
except ImportError:
|
||||
return "[Word 内容需要安装 python-docx: uv pip install python-docx]"
|
||||
|
||||
return "[暂不支持此格式]"
|
||||
|
||||
def _chunk_text(self, text: str) -> list[str]:
|
||||
"""
|
||||
智能文档分块策略
|
||||
1. 先按 Markdown 标题层级(H1/H2/H3)切分
|
||||
2. 每个大段落内部按固定长度切分
|
||||
3. 保留上下文(prev_summary / next_summary)
|
||||
"""
|
||||
import re
|
||||
async def _parse_document(self, file_path: str, ext: str) -> ParsedDocument:
|
||||
if ext == ".csv":
|
||||
return await self._parse_csv(file_path)
|
||||
if ext == ".xlsx":
|
||||
return await self._parse_xlsx(file_path)
|
||||
if ext == ".md":
|
||||
content = await self._extract_text(file_path, ext)
|
||||
return self._parse_markdown(content)
|
||||
if ext == ".txt":
|
||||
content = await self._extract_text(file_path, ext)
|
||||
return self._parse_text(content)
|
||||
if ext == ".docx":
|
||||
return await self._parse_docx(file_path)
|
||||
if ext == ".doc":
|
||||
content = await self._extract_text(file_path, ext)
|
||||
return self._parse_text(content)
|
||||
if ext == ".pdf":
|
||||
return await self._parse_pdf(file_path)
|
||||
content = await self._extract_text(file_path, ext)
|
||||
return self._parse_text(content)
|
||||
|
||||
chunks = []
|
||||
async def _parse_csv(self, file_path: str) -> ParsedDocument:
|
||||
async with aiofiles.open(file_path, "r", encoding="utf-8-sig") as f:
|
||||
content = await f.read()
|
||||
reader = list(csv.reader(io.StringIO(content)))
|
||||
headers = reader[0] if reader else []
|
||||
rows = reader[1:] if len(reader) > 1 else []
|
||||
nodes = [
|
||||
ParsedNode(
|
||||
node_type="table_schema",
|
||||
text=f"CSV columns: {', '.join(headers)} | rows: {len(rows)}",
|
||||
metadata={"headers": headers, "row_count": len(rows), "table_name": "csv"},
|
||||
section_path=["csv"],
|
||||
)
|
||||
]
|
||||
for start in range(0, len(rows), 50):
|
||||
batch = rows[start:start + 50]
|
||||
serialized_rows = []
|
||||
for row in batch:
|
||||
serialized = ", ".join(
|
||||
f"{header}={value}" for header, value in zip(headers, row)
|
||||
)
|
||||
serialized_rows.append(serialized)
|
||||
nodes.append(
|
||||
ParsedNode(
|
||||
node_type="table_rows",
|
||||
text="\n".join(serialized_rows),
|
||||
metadata={
|
||||
"headers": headers,
|
||||
"row_start": start + 1,
|
||||
"row_end": start + len(batch),
|
||||
"table_name": "csv",
|
||||
},
|
||||
section_path=["csv"],
|
||||
)
|
||||
)
|
||||
summary = f"CSV with columns {', '.join(headers)}" if headers else "CSV document"
|
||||
return ParsedDocument(summary=summary, nodes=nodes)
|
||||
|
||||
# 策略1: Markdown 标题切分(优先)
|
||||
header_pattern = re.compile(r"^(#{1,3})\s+(.+)$", re.MULTILINE)
|
||||
headers = list(header_pattern.finditer(text))
|
||||
async def _parse_xlsx(self, file_path: str) -> ParsedDocument:
|
||||
try:
|
||||
from openpyxl import load_workbook
|
||||
except ModuleNotFoundError as error:
|
||||
raise ValueError("XLSX 解析依赖缺失: openpyxl") from error
|
||||
|
||||
if headers:
|
||||
# 按标题段落切分
|
||||
for i, match in enumerate(headers):
|
||||
start = match.start()
|
||||
end = headers[i + 1].start() if i + 1 < len(headers) else len(text)
|
||||
section = text[start:end].strip()
|
||||
if len(section) > settings.CHUNK_SIZE:
|
||||
# 大段落内部再切分
|
||||
sub_chunks = self._split_large_chunk(section, match.group(2))
|
||||
chunks.extend(sub_chunks)
|
||||
elif section:
|
||||
chunks.append(section)
|
||||
else:
|
||||
# 策略2: 按段落切分
|
||||
chunks = self._chunk_by_paragraphs(text)
|
||||
|
||||
# 过滤空 chunk
|
||||
chunks = [c.strip() for c in chunks if c.strip()]
|
||||
return chunks if chunks else [text[: settings.CHUNK_SIZE]]
|
||||
|
||||
def _chunk_by_paragraphs(self, text: str) -> list[str]:
|
||||
"""按段落分块,带上下文"""
|
||||
paragraphs = text.split("\n\n")
|
||||
chunks = []
|
||||
current = ""
|
||||
prev_summary = ""
|
||||
|
||||
for para in paragraphs:
|
||||
para = para.strip()
|
||||
if not para:
|
||||
workbook = load_workbook(file_path, data_only=True)
|
||||
nodes: list[ParsedNode] = []
|
||||
summaries: list[str] = []
|
||||
for sheet in workbook.worksheets:
|
||||
rows = list(sheet.iter_rows(values_only=True))
|
||||
if not rows:
|
||||
continue
|
||||
if len(current) + len(para) < settings.CHUNK_SIZE:
|
||||
current += "\n\n" + para
|
||||
headers = [str(cell).strip() if cell is not None else "" for cell in rows[0]]
|
||||
data_rows = rows[1:]
|
||||
summaries.append(sheet.title)
|
||||
nodes.append(
|
||||
ParsedNode(
|
||||
node_type="table_schema",
|
||||
text=f"Sheet {sheet.title} columns: {', '.join(headers)} | rows: {len(data_rows)}",
|
||||
metadata={"headers": headers, "row_count": len(data_rows), "sheet_name": sheet.title},
|
||||
section_path=[sheet.title],
|
||||
)
|
||||
)
|
||||
for start in range(0, len(data_rows), 50):
|
||||
batch = data_rows[start:start + 50]
|
||||
serialized_rows = []
|
||||
for row in batch:
|
||||
normalized = ["" if value is None else str(value) for value in row]
|
||||
serialized_rows.append(", ".join(f"{header}={value}" for header, value in zip(headers, normalized)))
|
||||
nodes.append(
|
||||
ParsedNode(
|
||||
node_type="table_rows",
|
||||
text="\n".join(serialized_rows),
|
||||
metadata={
|
||||
"headers": headers,
|
||||
"row_start": start + 1,
|
||||
"row_end": start + len(batch),
|
||||
"sheet_name": sheet.title,
|
||||
},
|
||||
section_path=[sheet.title],
|
||||
)
|
||||
)
|
||||
summary = f"Workbook sheets: {', '.join(summaries)}" if summaries else "Workbook"
|
||||
return ParsedDocument(summary=summary, nodes=nodes)
|
||||
|
||||
async def _parse_docx(self, file_path: str) -> ParsedDocument:
|
||||
try:
|
||||
from docx import Document as DocxDocument
|
||||
except ModuleNotFoundError as error:
|
||||
raise ValueError("DOCX 解析依赖缺失: python-docx") from error
|
||||
|
||||
doc = DocxDocument(file_path)
|
||||
nodes: list[ParsedNode] = []
|
||||
section_path: list[str] = []
|
||||
summary_parts: list[str] = []
|
||||
for paragraph in doc.paragraphs:
|
||||
text = paragraph.text.strip()
|
||||
if not text:
|
||||
continue
|
||||
style_name = getattr(paragraph.style, "name", "") or ""
|
||||
if style_name.startswith("Heading"):
|
||||
level_match = re.search(r"(\d+)", style_name)
|
||||
level = int(level_match.group(1)) if level_match else 1
|
||||
section_path = section_path[: level - 1] + [text]
|
||||
nodes.append(ParsedNode("heading", text, {"level": level}, list(section_path)))
|
||||
else:
|
||||
if current:
|
||||
# 添加上下文摘要
|
||||
enriched = current.strip()
|
||||
chunks.append(enriched)
|
||||
current = para
|
||||
if not section_path:
|
||||
section_path = [doc.core_properties.title or "Document"]
|
||||
summary_parts.append(text)
|
||||
nodes.append(ParsedNode("paragraph", text, {}, list(section_path)))
|
||||
for table in doc.tables:
|
||||
rows = [[cell.text.strip() for cell in row.cells] for row in table.rows]
|
||||
if not rows:
|
||||
continue
|
||||
headers = rows[0]
|
||||
nodes.append(
|
||||
ParsedNode(
|
||||
"table_schema",
|
||||
f"DOCX table columns: {', '.join(headers)} | rows: {max(len(rows) - 1, 0)}",
|
||||
{"headers": headers, "row_count": max(len(rows) - 1, 0), "table_name": "docx_table"},
|
||||
list(section_path),
|
||||
)
|
||||
)
|
||||
for start in range(1, len(rows), 50):
|
||||
batch = rows[start:start + 50]
|
||||
serialized_rows = [", ".join(f"{header}={value}" for header, value in zip(headers, row)) for row in batch]
|
||||
nodes.append(
|
||||
ParsedNode(
|
||||
"table_rows",
|
||||
"\n".join(serialized_rows),
|
||||
{
|
||||
"headers": headers,
|
||||
"row_start": start,
|
||||
"row_end": start + len(batch) - 1,
|
||||
"table_name": "docx_table",
|
||||
},
|
||||
list(section_path),
|
||||
)
|
||||
)
|
||||
summary = " ".join(summary_parts[:3]) if summary_parts else doc.core_properties.title or "Document"
|
||||
return ParsedDocument(summary=summary, nodes=nodes)
|
||||
|
||||
if current.strip():
|
||||
chunks.append(current.strip())
|
||||
async def _parse_pdf_with_mineru(self, file_path: str) -> str:
|
||||
try:
|
||||
import mineru
|
||||
except ModuleNotFoundError as error:
|
||||
raise ValueError("PDF 解析依赖缺失: mineru") from error
|
||||
|
||||
if hasattr(mineru, "to_markdown"):
|
||||
return mineru.to_markdown(file_path)
|
||||
if hasattr(mineru, "parse_to_markdown"):
|
||||
return mineru.parse_to_markdown(file_path)
|
||||
|
||||
try:
|
||||
from mineru.cli.common import do_parse, read_fn
|
||||
from mineru.utils.enum_class import MakeMode
|
||||
except Exception as error:
|
||||
raise ValueError(
|
||||
"PDF 解析失败: 当前安装的 MinerU 版本接口不兼容,请确认支持 to_markdown / parse_to_markdown,或提供 cli.common.do_parse 能力"
|
||||
) from error
|
||||
|
||||
with tempfile.TemporaryDirectory(prefix="mineru-") as output_dir:
|
||||
pdf_name = Path(file_path).stem
|
||||
pdf_bytes = read_fn(Path(file_path))
|
||||
try:
|
||||
do_parse(
|
||||
output_dir,
|
||||
[pdf_name],
|
||||
[pdf_bytes],
|
||||
["zh"],
|
||||
f_draw_layout_bbox=False,
|
||||
f_draw_span_bbox=False,
|
||||
f_dump_md=True,
|
||||
f_dump_middle_json=False,
|
||||
f_dump_model_output=False,
|
||||
f_dump_orig_pdf=False,
|
||||
f_dump_content_list=False,
|
||||
f_make_md_mode=MakeMode.MM_MD,
|
||||
)
|
||||
except ModuleNotFoundError as error:
|
||||
dependency = getattr(error, "name", None) or str(error).split("'")[-2] if "'" in str(error) else str(error)
|
||||
raise ValueError(f"PDF 解析依赖缺失: MinerU 运行时依赖 {dependency}") from error
|
||||
markdown_path = Path(output_dir) / pdf_name / "pipeline" / f"{pdf_name}.md"
|
||||
if markdown_path.exists():
|
||||
return markdown_path.read_text(encoding="utf-8")
|
||||
|
||||
raise ValueError(
|
||||
"PDF 解析失败: 当前安装的 MinerU 版本接口不兼容,请确认支持 to_markdown / parse_to_markdown,或提供 cli.common.do_parse 能力"
|
||||
)
|
||||
|
||||
async def _parse_pdf(self, file_path: str) -> ParsedDocument:
|
||||
markdown = await self._parse_pdf_with_mineru(file_path)
|
||||
return self._parse_markdown(markdown)
|
||||
|
||||
def _parse_markdown(self, content: str) -> ParsedDocument:
|
||||
nodes: list[ParsedNode] = []
|
||||
section_path: list[str] = []
|
||||
summary_parts: list[str] = []
|
||||
buffer: list[str] = []
|
||||
|
||||
def flush_buffer():
|
||||
if not buffer:
|
||||
return
|
||||
text = "\n".join(buffer).strip()
|
||||
buffer.clear()
|
||||
if not text:
|
||||
return
|
||||
nodes.append(ParsedNode("paragraph", text, {}, list(section_path)))
|
||||
summary_parts.append(text)
|
||||
|
||||
for line in content.splitlines():
|
||||
heading_match = re.match(r"^(#{1,6})\s+(.+)$", line.strip())
|
||||
if heading_match:
|
||||
flush_buffer()
|
||||
level = len(heading_match.group(1))
|
||||
title = heading_match.group(2).strip()
|
||||
section_path = section_path[: level - 1] + [title]
|
||||
nodes.append(ParsedNode("heading", title, {"level": level}, list(section_path)))
|
||||
continue
|
||||
if line.strip():
|
||||
buffer.append(line.strip())
|
||||
else:
|
||||
flush_buffer()
|
||||
flush_buffer()
|
||||
summary = " ".join(summary_parts[:3]) if summary_parts else content[:200]
|
||||
return ParsedDocument(summary=summary, nodes=nodes)
|
||||
|
||||
def _parse_text(self, content: str) -> ParsedDocument:
|
||||
paragraphs = [part.strip() for part in content.split("\n\n") if part.strip()]
|
||||
nodes = [ParsedNode("text", paragraph, {}, []) for paragraph in paragraphs]
|
||||
summary = " ".join(paragraphs[:3]) if paragraphs else content[:200]
|
||||
return ParsedDocument(summary=summary, nodes=nodes)
|
||||
|
||||
def _build_chunks(self, parsed: ParsedDocument) -> list[dict]:
|
||||
chunks: list[dict] = []
|
||||
for source_order, node in enumerate(parsed.nodes):
|
||||
section_path = node.section_path or []
|
||||
metadata = {
|
||||
"content_type": node.node_type,
|
||||
"section_path": section_path,
|
||||
"section_title": section_path[-1] if section_path else None,
|
||||
"chunk_level": len(section_path),
|
||||
"parent_key": "/".join(section_path[:-1]) or None,
|
||||
"block_key": "/".join(section_path) or None,
|
||||
"parser_version": PARSER_VERSION,
|
||||
"index_version": INDEX_VERSION,
|
||||
"source_order": source_order,
|
||||
**node.metadata,
|
||||
}
|
||||
chunks.append({"content": node.text, "metadata": metadata})
|
||||
if not chunks:
|
||||
chunks.append({
|
||||
"content": parsed.summary,
|
||||
"metadata": {
|
||||
"content_type": "text",
|
||||
"section_path": [],
|
||||
"section_title": None,
|
||||
"chunk_level": 0,
|
||||
"parent_key": None,
|
||||
"block_key": None,
|
||||
"parser_version": PARSER_VERSION,
|
||||
"index_version": INDEX_VERSION,
|
||||
"source_order": 0,
|
||||
},
|
||||
})
|
||||
return chunks
|
||||
|
||||
def _split_large_chunk(self, text: str, title: str) -> list[str]:
|
||||
"""将大段落拆分为固定大小的子块"""
|
||||
chunks = []
|
||||
sentences = text.split("。")
|
||||
current = title + "\n\n"
|
||||
|
||||
for sentence in sentences:
|
||||
sentence = sentence.strip()
|
||||
if not sentence:
|
||||
def _render_structured_markdown(self, parsed: ParsedDocument) -> str:
|
||||
blocks: list[str] = []
|
||||
for node in parsed.nodes:
|
||||
if node.node_type == "heading":
|
||||
level = max(1, min(int(node.metadata.get("level", 1)), 6))
|
||||
blocks.append(f"{'#' * level} {node.text}")
|
||||
continue
|
||||
full_sentence = sentence if sentence.endswith("。") else sentence + "。"
|
||||
if len(current) + len(full_sentence) < settings.CHUNK_SIZE:
|
||||
current += full_sentence + " "
|
||||
else:
|
||||
if current.strip():
|
||||
chunks.append(current.strip())
|
||||
current = title + "\n\n" + full_sentence + " "
|
||||
|
||||
if current.strip():
|
||||
chunks.append(current.strip())
|
||||
|
||||
return chunks
|
||||
if node.node_type == "table_schema":
|
||||
headers = node.metadata.get("headers") or []
|
||||
if headers:
|
||||
header_row = "| " + " | ".join(headers) + " |"
|
||||
divider_row = "| " + " | ".join(["---"] * len(headers)) + " |"
|
||||
blocks.append("\n".join([header_row, divider_row]))
|
||||
else:
|
||||
blocks.append(node.text)
|
||||
continue
|
||||
if node.node_type == "table_rows":
|
||||
headers = node.metadata.get("headers") or []
|
||||
if headers:
|
||||
rows = []
|
||||
for line in node.text.splitlines():
|
||||
values_by_header = {}
|
||||
for part in line.split(", "):
|
||||
if "=" not in part:
|
||||
continue
|
||||
key, value = part.split("=", 1)
|
||||
values_by_header[key] = value
|
||||
rows.append("| " + " | ".join(values_by_header.get(header, "") for header in headers) + " |")
|
||||
if rows:
|
||||
blocks.append("\n".join(rows))
|
||||
continue
|
||||
blocks.append(node.text)
|
||||
continue
|
||||
blocks.append(node.text)
|
||||
return "\n\n".join(block for block in blocks if block).strip() or parsed.summary
|
||||
|
||||
async def get_document_chunks(self, document_id: str) -> list[DocumentChunk]:
|
||||
result = await self.db.execute(
|
||||
@@ -219,6 +540,34 @@ class DocumentService:
|
||||
)
|
||||
return list(result.scalars().all())
|
||||
|
||||
async def update_document_chunk(self, user_id: str, document_id: str, chunk_id: str, content: str) -> DocumentChunk:
|
||||
document_result = await self.db.execute(
|
||||
select(Document).where(
|
||||
Document.id == document_id,
|
||||
Document.user_id == user_id,
|
||||
)
|
||||
)
|
||||
document = document_result.scalar_one_or_none()
|
||||
if not document:
|
||||
raise ValueError("文档不存在")
|
||||
|
||||
chunk_result = await self.db.execute(
|
||||
select(DocumentChunk).where(
|
||||
DocumentChunk.id == chunk_id,
|
||||
DocumentChunk.document_id == document_id,
|
||||
)
|
||||
)
|
||||
chunk = chunk_result.scalar_one_or_none()
|
||||
if not chunk:
|
||||
raise ValueError("切片不存在")
|
||||
|
||||
chunk.content = content
|
||||
document.ingestion_status = "indexing"
|
||||
document.ingestion_error = None
|
||||
await self.db.commit()
|
||||
await self.db.refresh(chunk)
|
||||
return chunk
|
||||
|
||||
async def get_document_content(self, user_id: str, document_id: str) -> str | None:
|
||||
"""获取文档的文本内容"""
|
||||
import os
|
||||
@@ -233,6 +582,9 @@ class DocumentService:
|
||||
if not doc:
|
||||
return None
|
||||
|
||||
if doc.normalized_content:
|
||||
return doc.normalized_content
|
||||
|
||||
file_path = doc.file_path
|
||||
if not os.path.exists(file_path):
|
||||
return None
|
||||
@@ -247,9 +599,6 @@ class DocumentService:
|
||||
elif ext == 'md':
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
return f.read()
|
||||
elif ext == 'pdf':
|
||||
# 简单文本提取(生产环境应使用专业库)
|
||||
return f"[PDF文档] {doc.filename}"
|
||||
else:
|
||||
return f"[文档] {doc.filename}"
|
||||
except Exception:
|
||||
|
||||
@@ -4,11 +4,8 @@
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, func
|
||||
from app.models.brain import BrainMemory, BrainTag
|
||||
from app.models.knowledge_graph import KGNode, KGEdge
|
||||
from app.models.document import Document, DocumentChunk
|
||||
from app.services.llm_service import get_llm
|
||||
from langchain_core.messages import HumanMessage
|
||||
import json
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -75,110 +72,93 @@ confidence: 0.0-1.0,表示推断置信度
|
||||
class GraphService:
|
||||
def __init__(self, db: AsyncSession):
|
||||
self.db = db
|
||||
self.llm = get_llm()
|
||||
|
||||
async def build_graph(self, user_id: str, document_ids: list[str] | None = None):
|
||||
"""
|
||||
从文档构建/更新知识图谱
|
||||
- 遍历所有 chunk
|
||||
- LLM 实体识别
|
||||
- LLM 关系抽取
|
||||
- 去重合并
|
||||
"""
|
||||
query = (
|
||||
select(DocumentChunk)
|
||||
.join(Document)
|
||||
.where(Document.user_id == user_id)
|
||||
.where(Document.is_indexed == True)
|
||||
"""从知识大脑投影图谱。"""
|
||||
existing_nodes_result = await self.db.execute(select(KGNode).where(KGNode.user_id == user_id))
|
||||
for node in existing_nodes_result.scalars().all():
|
||||
await self.db.delete(node)
|
||||
await self.db.flush()
|
||||
|
||||
memory_result = await self.db.execute(
|
||||
select(BrainMemory)
|
||||
.where(BrainMemory.user_id == user_id, BrainMemory.status == "active")
|
||||
.order_by(BrainMemory.importance.desc(), BrainMemory.created_at.desc())
|
||||
)
|
||||
if document_ids:
|
||||
query = query.where(DocumentChunk.document_id.in_(document_ids))
|
||||
memories = list(memory_result.scalars().all())
|
||||
|
||||
result = await self.db.execute(query)
|
||||
chunks = list(result.scalars().all())
|
||||
tag_result = await self.db.execute(
|
||||
select(BrainTag)
|
||||
.where(BrainTag.user_id == user_id)
|
||||
.order_by(BrainTag.score.desc(), BrainTag.created_at.desc())
|
||||
)
|
||||
tags = list(tag_result.scalars().all())
|
||||
|
||||
logger.info(f"[GraphService] 开始构建图谱,共 {len(chunks)} 个 chunks")
|
||||
logger.info(f"[GraphService] 开始从 brain 数据投影图谱,memories={len(memories)}, tags={len(tags)}")
|
||||
|
||||
for chunk in chunks:
|
||||
try:
|
||||
await self._process_chunk(chunk, user_id)
|
||||
except Exception as e:
|
||||
logger.error(f"[GraphService] 处理 chunk {chunk.id} 失败: {e}")
|
||||
continue
|
||||
|
||||
logger.info(f"[GraphService] 图谱构建完成")
|
||||
|
||||
async def _process_chunk(self, chunk: DocumentChunk, user_id: str):
|
||||
"""处理单个 chunk,提取实体和关系"""
|
||||
prompt = ENTITY_EXTRACTION_PROMPT.format(text=chunk.content[:2000])
|
||||
response = await self.llm.invoke([HumanMessage(content=prompt)])
|
||||
|
||||
try:
|
||||
data = json.loads(response.content)
|
||||
except json.JSONDecodeError:
|
||||
return
|
||||
|
||||
entities = data.get("entities", [])
|
||||
relations = data.get("relations", [])
|
||||
|
||||
if not entities:
|
||||
return
|
||||
|
||||
# 先查找已存在的节点
|
||||
existing_nodes = {}
|
||||
for entity_data in entities:
|
||||
name = entity_data["name"]
|
||||
result = await self.db.execute(
|
||||
select(KGNode)
|
||||
.where(KGNode.user_id == user_id)
|
||||
.where(KGNode.name == name)
|
||||
node_map: dict[str, KGNode] = {}
|
||||
for memory in memories:
|
||||
node = KGNode(
|
||||
user_id=user_id,
|
||||
name=memory.title,
|
||||
entity_type="memory",
|
||||
description=memory.content,
|
||||
properties_={
|
||||
"memory_type": memory.memory_type,
|
||||
"origin_source_types": memory.origin_source_types or [],
|
||||
},
|
||||
importance=min(max(memory.importance / 10, 0.1), 1.0),
|
||||
)
|
||||
node = result.scalar_one_or_none()
|
||||
if node:
|
||||
existing_nodes[name] = node
|
||||
self.db.add(node)
|
||||
await self.db.flush()
|
||||
node_map[f"memory:{memory.id}"] = node
|
||||
|
||||
# 插入新节点
|
||||
entity_map = {}
|
||||
for entity_data in entities:
|
||||
name = entity_data["name"]
|
||||
if name in existing_nodes:
|
||||
entity_map[name] = existing_nodes[name].id
|
||||
else:
|
||||
node = KGNode(
|
||||
user_id=user_id,
|
||||
name=name,
|
||||
entity_type=entity_data["type"],
|
||||
description=entity_data.get("description", ""),
|
||||
source_document_id=chunk.document_id,
|
||||
)
|
||||
self.db.add(node)
|
||||
await self.db.flush()
|
||||
entity_map[name] = node.id
|
||||
|
||||
# 插入关系(去重)
|
||||
for rel in relations:
|
||||
src, tgt = rel["source"], rel["target"]
|
||||
if src not in entity_map or tgt not in entity_map:
|
||||
continue
|
||||
|
||||
# 检查关系是否已存在
|
||||
result = await self.db.execute(
|
||||
select(KGEdge).where(
|
||||
KGEdge.source_id == entity_map[src],
|
||||
KGEdge.target_id == entity_map[tgt],
|
||||
KGEdge.relation_type == rel["relation_type"],
|
||||
)
|
||||
for tag in tags:
|
||||
node = KGNode(
|
||||
user_id=user_id,
|
||||
name=tag.name,
|
||||
entity_type="tag",
|
||||
description=f"{tag.category} / {tag.priority}",
|
||||
properties_={
|
||||
"category": tag.category,
|
||||
"priority": tag.priority,
|
||||
"score": tag.score,
|
||||
},
|
||||
importance=min(max(tag.score / 10, 0.1), 1.0),
|
||||
)
|
||||
existing = result.scalar_one_or_none()
|
||||
if not existing:
|
||||
edge = KGEdge(
|
||||
source_id=entity_map[src],
|
||||
target_id=entity_map[tgt],
|
||||
relation_type=rel["relation_type"],
|
||||
)
|
||||
self.db.add(edge)
|
||||
self.db.add(node)
|
||||
await self.db.flush()
|
||||
node_map[f"tag:{tag.id}"] = node
|
||||
|
||||
for memory in memories:
|
||||
memory_node = node_map.get(f"memory:{memory.id}")
|
||||
if not memory_node:
|
||||
continue
|
||||
memory_text = f"{memory.title} {memory.content}".lower()
|
||||
for tag in tags:
|
||||
if tag.name.lower() in memory_text:
|
||||
tag_node = node_map.get(f"tag:{tag.id}")
|
||||
if not tag_node:
|
||||
continue
|
||||
self.db.add(KGEdge(
|
||||
source_id=memory_node.id,
|
||||
target_id=tag_node.id,
|
||||
relation_type="tagged_with",
|
||||
weight=min(max(tag.score / 10, 0.1), 1.0),
|
||||
))
|
||||
|
||||
memory_nodes = [node_map[f"memory:{memory.id}"] for memory in memories if f"memory:{memory.id}" in node_map]
|
||||
for index, source_node in enumerate(memory_nodes):
|
||||
for target_node in memory_nodes[index + 1:]:
|
||||
self.db.add(KGEdge(
|
||||
source_id=source_node.id,
|
||||
target_id=target_node.id,
|
||||
relation_type="related_to",
|
||||
weight=0.5,
|
||||
))
|
||||
|
||||
await self.db.commit()
|
||||
logger.info("[GraphService] brain 图谱投影完成")
|
||||
|
||||
async def get_graph_summary(self, user_id: str) -> str:
|
||||
"""获取用户图谱的整体摘要"""
|
||||
|
||||
@@ -14,9 +14,12 @@ from sqlalchemy import select, or_
|
||||
from app.models.document import Document, DocumentChunk
|
||||
from app.models.folder import Folder
|
||||
from app.config import settings
|
||||
from app.services.document_service import DocumentService
|
||||
import chromadb
|
||||
from chromadb.config import Settings as ChromaSettings
|
||||
from dataclasses import dataclass
|
||||
from datetime import UTC, datetime
|
||||
import json
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -72,24 +75,50 @@ class KnowledgeService:
|
||||
if not chunks:
|
||||
return
|
||||
|
||||
await self._index_chunks(doc, chunks, user_id, folder_path=folder_path)
|
||||
|
||||
async def _index_chunks(
|
||||
self,
|
||||
document: Document,
|
||||
chunks: list[DocumentChunk],
|
||||
user_id: str,
|
||||
folder_path: str | None = None,
|
||||
):
|
||||
folder_path = folder_path or (await self._get_folder_path(document.folder_id) if document.folder_id else "")
|
||||
collection = self.get_collection(user_id)
|
||||
|
||||
ids = [chunk.id for chunk in chunks]
|
||||
documents = [chunk.content for chunk in chunks]
|
||||
metadatas = [
|
||||
{
|
||||
"document_id": doc.id,
|
||||
"document_title": doc.title,
|
||||
metadatas = []
|
||||
for chunk in chunks:
|
||||
chunk_metadata = self._parse_metadata(chunk.metadata_)
|
||||
meta = {
|
||||
"document_id": document.id,
|
||||
"document_title": document.title,
|
||||
"document_filename": document.filename,
|
||||
"chunk_index": chunk.chunk_index,
|
||||
"file_type": doc.file_type,
|
||||
"file_type": document.file_type,
|
||||
"folder_path": folder_path or "",
|
||||
"content_type": chunk_metadata.get("content_type", "text"),
|
||||
"section_title": chunk_metadata.get("section_title") or "",
|
||||
"section_path": " / ".join(chunk_metadata.get("section_path", [])),
|
||||
"page_number": chunk_metadata.get("page_number") or 0,
|
||||
"sheet_name": chunk_metadata.get("sheet_name") or "",
|
||||
"row_start": chunk_metadata.get("row_start") or 0,
|
||||
"row_end": chunk_metadata.get("row_end") or 0,
|
||||
"parser_version": chunk_metadata.get("parser_version") or document.parser_version or "",
|
||||
"index_version": chunk_metadata.get("index_version") or document.index_version or "",
|
||||
}
|
||||
for chunk in chunks
|
||||
]
|
||||
chunk.chroma_collection = f"user_{user_id}"
|
||||
chunk.chroma_id = chunk.id
|
||||
metadatas.append(meta)
|
||||
|
||||
collection.add(ids=ids, documents=documents, metadatas=metadatas)
|
||||
|
||||
doc.is_indexed = True
|
||||
document.is_indexed = True
|
||||
document.ingestion_status = "ready"
|
||||
document.ingestion_error = None
|
||||
document.indexed_at = datetime.now(UTC)
|
||||
await self.db.commit()
|
||||
|
||||
async def retrieve(
|
||||
@@ -141,7 +170,7 @@ class KnowledgeService:
|
||||
meta = metadatas[i] if i < len(metadatas) else {}
|
||||
score = 1.0 - (distances[i] if i < len(distances) else 0.0)
|
||||
|
||||
prev_chunk, next_chunk = await self._get_sibling_chunks(
|
||||
prev_chunk, next_chunk = await self._get_related_chunks(
|
||||
chunk_id=chunk_id,
|
||||
chunk_index=meta.get("chunk_index", 0),
|
||||
document_id=meta.get("document_id", ""),
|
||||
@@ -153,7 +182,7 @@ class KnowledgeService:
|
||||
document_title=meta.get("document_title", ""),
|
||||
content=documents[i] if i < len(documents) else "",
|
||||
score=score,
|
||||
metadata_=str(meta),
|
||||
metadata_=json.dumps(meta, ensure_ascii=False),
|
||||
prev_chunk=prev_chunk,
|
||||
next_chunk=next_chunk,
|
||||
))
|
||||
@@ -171,10 +200,11 @@ class KnowledgeService:
|
||||
results: list[SearchResult],
|
||||
top_k: int,
|
||||
) -> list[SearchResult]:
|
||||
"""Rerank: 语义分 * 0.7 + 关键词匹配 * 0.2 + 标题匹配 * 0.1"""
|
||||
"""Rerank: 语义分 * 0.7 + 关键词匹配 * 0.2 + 标题匹配 * 0.1 + 结构加权"""
|
||||
import re
|
||||
|
||||
query_words = set(re.findall(r"\w+", query.lower()))
|
||||
table_query = any(token in query.lower() for token in ["sheet", "excel", "csv", "表", "列", "金额", "统计", "日期"])
|
||||
|
||||
scored = []
|
||||
for r in results:
|
||||
@@ -189,36 +219,56 @@ class KnowledgeService:
|
||||
title_overlap = len(query_words & title_words) / max(len(query_words), 1)
|
||||
score += title_overlap * 0.1
|
||||
|
||||
metadata = self._parse_metadata(r.metadata_)
|
||||
if table_query and metadata.get("content_type") == "table_schema":
|
||||
score += 0.25
|
||||
elif table_query and metadata.get("content_type") == "table_rows":
|
||||
score += 0.15
|
||||
|
||||
scored.append((score, r))
|
||||
|
||||
scored.sort(key=lambda x: x[0], reverse=True)
|
||||
return [r for _, r in scored[:top_k]]
|
||||
|
||||
async def _get_sibling_chunks(
|
||||
async def _get_related_chunks(
|
||||
self,
|
||||
chunk_id: str,
|
||||
chunk_index: int,
|
||||
document_id: str,
|
||||
) -> tuple[str | None, str | None]:
|
||||
"""获取前一个和后一个 chunk(完整上下文)"""
|
||||
prev_result = await self.db.execute(
|
||||
select(DocumentChunk).where(
|
||||
DocumentChunk.document_id == document_id,
|
||||
DocumentChunk.chunk_index == chunk_index - 1,
|
||||
)
|
||||
"""获取结构相关的上下文 chunk"""
|
||||
current_result = await self.db.execute(
|
||||
select(DocumentChunk).where(DocumentChunk.id == chunk_id)
|
||||
)
|
||||
next_result = await self.db.execute(
|
||||
select(DocumentChunk).where(
|
||||
DocumentChunk.document_id == document_id,
|
||||
DocumentChunk.chunk_index == chunk_index + 1,
|
||||
)
|
||||
)
|
||||
prev_chunk = prev_result.scalar_one_or_none()
|
||||
next_chunk = next_result.scalar_one_or_none()
|
||||
return (
|
||||
prev_chunk.content if prev_chunk else None,
|
||||
next_chunk.content if next_chunk else None,
|
||||
current_chunk = current_result.scalar_one_or_none()
|
||||
if not current_chunk:
|
||||
return None, None
|
||||
|
||||
current_metadata = self._parse_metadata(current_chunk.metadata_)
|
||||
section_path = current_metadata.get("section_path") or []
|
||||
sheet_name = current_metadata.get("sheet_name")
|
||||
|
||||
chunk_result = await self.db.execute(
|
||||
select(DocumentChunk)
|
||||
.where(DocumentChunk.document_id == document_id)
|
||||
.order_by(DocumentChunk.chunk_index)
|
||||
)
|
||||
chunks = list(chunk_result.scalars().all())
|
||||
|
||||
prev_chunk = None
|
||||
next_chunk = None
|
||||
for chunk in chunks:
|
||||
if chunk.id == chunk_id:
|
||||
continue
|
||||
metadata = self._parse_metadata(chunk.metadata_)
|
||||
same_sheet = bool(sheet_name) and metadata.get("sheet_name") == sheet_name
|
||||
same_section = bool(section_path) and metadata.get("section_path") == section_path
|
||||
if chunk.chunk_index < chunk_index and (same_sheet or same_section):
|
||||
prev_chunk = chunk.content
|
||||
if chunk.chunk_index > chunk_index and (same_sheet or same_section):
|
||||
next_chunk = chunk.content
|
||||
break
|
||||
return prev_chunk, next_chunk
|
||||
|
||||
async def _get_folder_path(self, folder_id: str) -> str | None:
|
||||
"""获取文件夹的完整路径"""
|
||||
@@ -244,6 +294,16 @@ class KnowledgeService:
|
||||
|
||||
return "/" + "/".join(path_parts)
|
||||
|
||||
def _parse_metadata(self, raw_metadata: str | dict | None) -> dict:
|
||||
if isinstance(raw_metadata, dict):
|
||||
return raw_metadata
|
||||
if not raw_metadata:
|
||||
return {}
|
||||
try:
|
||||
return json.loads(raw_metadata)
|
||||
except (TypeError, json.JSONDecodeError):
|
||||
return {}
|
||||
|
||||
async def hybrid_search(
|
||||
self,
|
||||
query: str,
|
||||
@@ -306,3 +366,43 @@ class KnowledgeService:
|
||||
collection.delete(where={"document_id": document_id})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
async def reindex_document(self, document_id: str, user_id: str) -> bool:
|
||||
result = await self.db.execute(
|
||||
select(Document).where(
|
||||
Document.id == document_id,
|
||||
Document.user_id == user_id,
|
||||
)
|
||||
)
|
||||
document = result.scalar_one_or_none()
|
||||
if not document:
|
||||
return False
|
||||
|
||||
await self.delete_from_vectorstore(user_id, document_id)
|
||||
document = await DocumentService(self.db, user_id=user_id).rebuild_document(document)
|
||||
await self.index_document(document.id, user_id)
|
||||
return True
|
||||
|
||||
async def reindex_document_chunks(self, document_id: str, user_id: str) -> bool:
|
||||
result = await self.db.execute(
|
||||
select(Document).where(
|
||||
Document.id == document_id,
|
||||
Document.user_id == user_id,
|
||||
)
|
||||
)
|
||||
document = result.scalar_one_or_none()
|
||||
if not document:
|
||||
return False
|
||||
|
||||
chunks_result = await self.db.execute(
|
||||
select(DocumentChunk)
|
||||
.where(DocumentChunk.document_id == document_id)
|
||||
.order_by(DocumentChunk.chunk_index)
|
||||
)
|
||||
chunks = list(chunks_result.scalars().all())
|
||||
if not chunks:
|
||||
return False
|
||||
|
||||
await self.delete_from_vectorstore(user_id, document_id)
|
||||
await self._index_chunks(document, chunks, user_id)
|
||||
return True
|
||||
|
||||
@@ -4,17 +4,144 @@ OpenAI / Claude / Ollama / DeepSeek / 任意 OpenAI 兼容接口
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import AsyncIterator
|
||||
from dataclasses import dataclass
|
||||
from typing import AsyncIterator, Literal
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from langchain_core.messages import BaseMessage, AIMessage
|
||||
from langchain_openai import ChatOpenAI
|
||||
from langchain_anthropic import ChatAnthropic
|
||||
from langchain_ollama import ChatOllama
|
||||
from app.config import settings
|
||||
from app.models.user import User
|
||||
import httpx
|
||||
import os
|
||||
|
||||
os.makedirs(settings.DATA_DIR, exist_ok=True)
|
||||
os.makedirs(settings.CHROMA_PERSIST_DIR, exist_ok=True)
|
||||
|
||||
|
||||
ToolStrategy = Literal["native", "json_fallback"]
|
||||
|
||||
|
||||
def _resolve_effective_base_url(config: dict | None) -> str:
|
||||
provider = str((config or {}).get("provider") or settings.LLM_PROVIDER or "openai").strip().lower()
|
||||
base_url = str((config or {}).get("base_url") or "").strip()
|
||||
if base_url:
|
||||
return base_url
|
||||
if provider in {"openai", "custom", "deepseek"}:
|
||||
return settings.OPENAI_BASE_URL
|
||||
if provider == "ollama":
|
||||
return settings.OLLAMA_BASE_URL
|
||||
return ""
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ProviderCapabilities:
|
||||
provider: str
|
||||
supports_native_tools: bool
|
||||
preferred_tool_strategy: ToolStrategy
|
||||
|
||||
|
||||
def default_provider_capabilities() -> ProviderCapabilities:
|
||||
return resolve_provider_capabilities({"provider": settings.LLM_PROVIDER})
|
||||
|
||||
|
||||
def normalize_provider_name(config: dict | None) -> str:
|
||||
provider_raw = str((config or {}).get("provider") or "").strip().lower()
|
||||
provider = provider_raw or str(settings.LLM_PROVIDER or "openai").strip().lower()
|
||||
model = str((config or {}).get("model") or "").strip().lower()
|
||||
base_url = _resolve_effective_base_url(config).strip().lower()
|
||||
|
||||
# base_url-first inference (provider may be omitted in user config)
|
||||
if base_url:
|
||||
if any(key in base_url for key in {"localhost:11434", "127.0.0.1:11434"}):
|
||||
return "ollama"
|
||||
if any(key in base_url for key in {"api.anthropic.com", "anthropic"}):
|
||||
return "claude"
|
||||
if "api.deepseek.com" in base_url:
|
||||
return "deepseek"
|
||||
|
||||
# Many "openai-compatible" endpoints are configured as provider=openai.
|
||||
# We treat them as distinct providers so capability routing can stay conservative.
|
||||
if provider in {"openai", "custom"}:
|
||||
if any(key in model or key in base_url for key in {"minimax", "abab"}):
|
||||
return "minimax"
|
||||
if any(key in model or key in base_url for key in {"kimi", "moonshot"}):
|
||||
return "kimi"
|
||||
if any(key in model or key in base_url for key in {"qwen", "dashscope", "aliyuncs"}):
|
||||
return "qwen"
|
||||
|
||||
return provider
|
||||
|
||||
|
||||
def resolve_provider_capabilities(config: dict | None) -> ProviderCapabilities:
|
||||
provider = normalize_provider_name(config)
|
||||
|
||||
# Conservative default: only treat official OpenAI + DeepSeek + Claude as reliable native tool providers.
|
||||
# Many OpenAI-compatible endpoints reject tool / response_format / other chat params.
|
||||
native_tool_providers = {"openai", "deepseek", "claude"}
|
||||
|
||||
base_url = _resolve_effective_base_url(config).strip().lower()
|
||||
is_official_openai = (
|
||||
provider != "openai"
|
||||
or not base_url
|
||||
or "api.openai.com" in base_url
|
||||
or "openai.azure.com" in base_url
|
||||
)
|
||||
|
||||
if provider in native_tool_providers and is_official_openai:
|
||||
return ProviderCapabilities(
|
||||
provider=provider,
|
||||
supports_native_tools=True,
|
||||
preferred_tool_strategy="native",
|
||||
)
|
||||
|
||||
return ProviderCapabilities(
|
||||
provider=provider,
|
||||
supports_native_tools=False,
|
||||
preferred_tool_strategy="json_fallback",
|
||||
)
|
||||
|
||||
|
||||
def create_llm_from_config(config: dict | None):
|
||||
"""根据用户模型配置创建底层 LangChain LLM 实例"""
|
||||
if not config:
|
||||
return get_llm()
|
||||
|
||||
provider = normalize_provider_name(config)
|
||||
model = config.get("model", "")
|
||||
api_key = config.get("api_key", "")
|
||||
base_url = config.get("base_url", "")
|
||||
|
||||
if provider in {"openai", "deepseek", "custom", "minimax", "kimi", "qwen"}:
|
||||
llm = ChatOpenAI(
|
||||
api_key=api_key,
|
||||
model=model,
|
||||
base_url=base_url or None,
|
||||
timeout=httpx.Timeout(60.0, connect=10.0),
|
||||
)
|
||||
elif provider == "claude":
|
||||
llm = ChatAnthropic(
|
||||
api_key=api_key,
|
||||
model=model,
|
||||
timeout=httpx.Timeout(60.0, connect=10.0),
|
||||
)
|
||||
elif provider == "ollama":
|
||||
llm = ChatOllama(
|
||||
base_url=base_url or "http://localhost:11434",
|
||||
model=model,
|
||||
timeout=httpx.Timeout(120.0, connect=10.0),
|
||||
)
|
||||
else:
|
||||
llm = ChatOpenAI(
|
||||
api_key=api_key,
|
||||
model=model,
|
||||
base_url=base_url or None,
|
||||
timeout=httpx.Timeout(60.0, connect=10.0),
|
||||
)
|
||||
|
||||
setattr(llm, "_jarvis_user_llm_config", config)
|
||||
setattr(llm, "_jarvis_provider_capabilities", resolve_provider_capabilities(config))
|
||||
return llm
|
||||
|
||||
|
||||
class LLMService(ABC):
|
||||
@@ -142,4 +269,7 @@ def get_llm() -> LLMService:
|
||||
_llm_instance = OllamaService()
|
||||
else:
|
||||
raise ValueError(f"Unknown LLM provider: {provider}")
|
||||
setattr(_llm_instance, "_jarvis_provider_capabilities", default_provider_capabilities())
|
||||
return _llm_instance
|
||||
|
||||
|
||||
|
||||
341
backend/app/services/log_service.py
Normal file
341
backend/app/services/log_service.py
Normal file
@@ -0,0 +1,341 @@
|
||||
"""
|
||||
运行日志服务
|
||||
提供统一的日志记录接口,支持分类存储和查询
|
||||
"""
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Any, Optional
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, and_, desc, func, or_
|
||||
from app.models.log import Log, LogType, LogLevel
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 日志级别映射
|
||||
LEVEL_MAP = {
|
||||
"DEBUG": LogLevel.DEBUG,
|
||||
"INFO": LogLevel.INFO,
|
||||
"WARNING": LogLevel.WARNING,
|
||||
"ERROR": LogLevel.ERROR,
|
||||
}
|
||||
|
||||
|
||||
def parse_datetime_filter(value: Optional[str]) -> Optional[datetime]:
|
||||
if not value:
|
||||
return None
|
||||
normalized = value.strip()
|
||||
if not normalized:
|
||||
return None
|
||||
normalized = normalized.replace("Z", "+00:00")
|
||||
parsed = datetime.fromisoformat(normalized)
|
||||
if parsed.tzinfo is not None:
|
||||
parsed = parsed.astimezone(timezone.utc).replace(tzinfo=None)
|
||||
return parsed
|
||||
|
||||
|
||||
class LogService:
|
||||
def __init__(self, db: AsyncSession):
|
||||
self.db = db
|
||||
|
||||
async def log(
|
||||
self,
|
||||
message: str,
|
||||
level: str = "info",
|
||||
log_type: str = "system",
|
||||
user_id: Optional[str] = None,
|
||||
source: Optional[str] = None,
|
||||
details: Optional[dict] = None,
|
||||
duration_ms: Optional[int] = None,
|
||||
request_id: Optional[str] = None,
|
||||
route: Optional[str] = None,
|
||||
method: Optional[str] = None,
|
||||
status_code: Optional[int] = None,
|
||||
error_type: Optional[str] = None,
|
||||
operation: Optional[str] = None,
|
||||
) -> Log:
|
||||
"""记录日志"""
|
||||
log_entry = Log(
|
||||
level=level,
|
||||
type=log_type,
|
||||
user_id=user_id,
|
||||
request_id=request_id,
|
||||
route=route,
|
||||
method=method,
|
||||
status_code=status_code,
|
||||
error_type=error_type,
|
||||
operation=operation,
|
||||
message=message,
|
||||
source=source,
|
||||
details=json.dumps(details, ensure_ascii=False) if details is not None else None,
|
||||
duration_ms=int(duration_ms) if duration_ms is not None else None,
|
||||
)
|
||||
self.db.add(log_entry)
|
||||
await self.db.commit()
|
||||
await self.db.refresh(log_entry)
|
||||
return log_entry
|
||||
|
||||
async def agent_log(
|
||||
self,
|
||||
message: str,
|
||||
user_id: Optional[str] = None,
|
||||
source: Optional[str] = None,
|
||||
details: Optional[dict] = None,
|
||||
duration_ms: Optional[int] = None,
|
||||
) -> Log:
|
||||
"""记录智能体调用日志"""
|
||||
return await self.log(
|
||||
message=message,
|
||||
level="info",
|
||||
log_type="agent",
|
||||
user_id=user_id,
|
||||
source=source,
|
||||
details=details,
|
||||
duration_ms=duration_ms,
|
||||
)
|
||||
|
||||
async def system_log(
|
||||
self,
|
||||
message: str,
|
||||
level: str = "info",
|
||||
source: Optional[str] = None,
|
||||
details: Optional[dict] = None,
|
||||
user_id: Optional[str] = None,
|
||||
request_id: Optional[str] = None,
|
||||
route: Optional[str] = None,
|
||||
method: Optional[str] = None,
|
||||
status_code: Optional[int] = None,
|
||||
error_type: Optional[str] = None,
|
||||
operation: Optional[str] = None,
|
||||
duration_ms: Optional[int] = None,
|
||||
) -> Log:
|
||||
"""记录系统运行日志"""
|
||||
return await self.log(
|
||||
message=message,
|
||||
level=level,
|
||||
log_type="system",
|
||||
user_id=user_id,
|
||||
source=source,
|
||||
details=details,
|
||||
request_id=request_id,
|
||||
route=route,
|
||||
method=method,
|
||||
status_code=status_code,
|
||||
error_type=error_type,
|
||||
operation=operation,
|
||||
duration_ms=duration_ms,
|
||||
)
|
||||
|
||||
async def chat_log(
|
||||
self,
|
||||
message: str,
|
||||
user_id: str,
|
||||
details: Optional[dict] = None,
|
||||
duration_ms: Optional[int] = None,
|
||||
) -> Log:
|
||||
"""记录问答日志"""
|
||||
return await self.log(
|
||||
message=message,
|
||||
level="info",
|
||||
log_type="chat",
|
||||
user_id=user_id,
|
||||
source="chat",
|
||||
details=details,
|
||||
duration_ms=duration_ms,
|
||||
)
|
||||
|
||||
def _build_conditions(
|
||||
self,
|
||||
log_type: Optional[str] = None,
|
||||
level: Optional[str] = None,
|
||||
user_id: Optional[str] = None,
|
||||
source: Optional[str] = None,
|
||||
request_id: Optional[str] = None,
|
||||
route: Optional[str] = None,
|
||||
operation: Optional[str] = None,
|
||||
status_code: Optional[int] = None,
|
||||
start_at: Optional[datetime] = None,
|
||||
end_at: Optional[datetime] = None,
|
||||
) -> list[Any]:
|
||||
conditions = []
|
||||
|
||||
if log_type:
|
||||
conditions.append(Log.type == log_type)
|
||||
if level:
|
||||
conditions.append(Log.level == level)
|
||||
if user_id:
|
||||
conditions.append(or_(Log.user_id == user_id, Log.user_id.is_(None)))
|
||||
if source:
|
||||
conditions.append(Log.source == source)
|
||||
if request_id:
|
||||
conditions.append(Log.request_id == request_id)
|
||||
if route:
|
||||
conditions.append(Log.route == route)
|
||||
if operation:
|
||||
conditions.append(Log.operation == operation)
|
||||
if status_code is not None:
|
||||
conditions.append(Log.status_code == status_code)
|
||||
if start_at is not None:
|
||||
conditions.append(Log.created_at >= start_at)
|
||||
if end_at is not None:
|
||||
conditions.append(Log.created_at <= end_at)
|
||||
|
||||
return conditions
|
||||
|
||||
async def list_logs(
|
||||
self,
|
||||
log_type: Optional[str] = None,
|
||||
level: Optional[str] = None,
|
||||
user_id: Optional[str] = None,
|
||||
source: Optional[str] = None,
|
||||
request_id: Optional[str] = None,
|
||||
route: Optional[str] = None,
|
||||
operation: Optional[str] = None,
|
||||
status_code: Optional[int] = None,
|
||||
start_at: Optional[datetime] = None,
|
||||
end_at: Optional[datetime] = None,
|
||||
limit: int = 100,
|
||||
offset: int = 0,
|
||||
) -> tuple[list[Log], int]:
|
||||
"""
|
||||
查询日志列表
|
||||
|
||||
Returns:
|
||||
(logs, total_count)
|
||||
"""
|
||||
conditions = self._build_conditions(
|
||||
log_type=log_type,
|
||||
level=level,
|
||||
user_id=user_id,
|
||||
source=source,
|
||||
request_id=request_id,
|
||||
route=route,
|
||||
operation=operation,
|
||||
status_code=status_code,
|
||||
start_at=start_at,
|
||||
end_at=end_at,
|
||||
)
|
||||
|
||||
count_query = select(func.count(Log.id))
|
||||
if conditions:
|
||||
count_query = count_query.where(and_(*conditions))
|
||||
total_result = await self.db.execute(count_query)
|
||||
total = total_result.scalar() or 0
|
||||
|
||||
query = (
|
||||
select(Log).where(and_(*conditions)) if conditions else select(Log)
|
||||
).order_by(desc(Log.created_at)).limit(limit).offset(offset)
|
||||
|
||||
result = await self.db.execute(query)
|
||||
logs = list(result.scalars().all())
|
||||
|
||||
return logs, total
|
||||
|
||||
async def get_recent_logs(
|
||||
self,
|
||||
log_type: Optional[str] = None,
|
||||
user_id: Optional[str] = None,
|
||||
hours: int = 24,
|
||||
limit: int = 100,
|
||||
) -> list[Log]:
|
||||
"""获取最近的日志"""
|
||||
end_at = datetime.now(timezone.utc).replace(tzinfo=None)
|
||||
start_at = end_at - timedelta(hours=hours)
|
||||
conditions = self._build_conditions(
|
||||
log_type=log_type,
|
||||
user_id=user_id,
|
||||
start_at=start_at,
|
||||
end_at=end_at,
|
||||
)
|
||||
|
||||
query = select(Log).where(and_(*conditions)).order_by(desc(Log.created_at)).limit(limit)
|
||||
result = await self.db.execute(query)
|
||||
return list(result.scalars().all())
|
||||
|
||||
async def get_log_stats(
|
||||
self,
|
||||
log_type: Optional[str] = None,
|
||||
level: Optional[str] = None,
|
||||
user_id: Optional[str] = None,
|
||||
source: Optional[str] = None,
|
||||
request_id: Optional[str] = None,
|
||||
route: Optional[str] = None,
|
||||
operation: Optional[str] = None,
|
||||
status_code: Optional[int] = None,
|
||||
start_at: Optional[datetime] = None,
|
||||
end_at: Optional[datetime] = None,
|
||||
) -> dict:
|
||||
"""获取日志统计"""
|
||||
base_conditions = self._build_conditions(
|
||||
user_id=user_id,
|
||||
source=source,
|
||||
request_id=request_id,
|
||||
route=route,
|
||||
operation=operation,
|
||||
status_code=status_code,
|
||||
start_at=start_at,
|
||||
end_at=end_at,
|
||||
)
|
||||
|
||||
stats = {
|
||||
"total": 0,
|
||||
"by_type": {"agent": 0, "system": 0, "chat": 0},
|
||||
"by_level": {"debug": 0, "info": 0, "warning": 0, "error": 0},
|
||||
}
|
||||
|
||||
total_conditions = list(base_conditions)
|
||||
if log_type:
|
||||
total_conditions.append(Log.type == log_type)
|
||||
if level:
|
||||
total_conditions.append(Log.level == level)
|
||||
total_query = select(func.count(Log.id)).where(and_(*total_conditions))
|
||||
total_result = await self.db.execute(total_query)
|
||||
stats["total"] = total_result.scalar() or 0
|
||||
|
||||
for current_type in ["agent", "system", "chat"]:
|
||||
conditions = list(base_conditions)
|
||||
conditions.append(Log.type == current_type)
|
||||
if level:
|
||||
conditions.append(Log.level == level)
|
||||
query = select(func.count(Log.id)).where(and_(*conditions))
|
||||
result = await self.db.execute(query)
|
||||
stats["by_type"][current_type] = result.scalar() or 0
|
||||
|
||||
for current_level in ["debug", "info", "warning", "error"]:
|
||||
conditions = list(base_conditions)
|
||||
if log_type:
|
||||
conditions.append(Log.type == log_type)
|
||||
conditions.append(Log.level == current_level)
|
||||
query = select(func.count(Log.id)).where(and_(*conditions))
|
||||
result = await self.db.execute(query)
|
||||
stats["by_level"][current_level] = result.scalar() or 0
|
||||
|
||||
return stats
|
||||
|
||||
|
||||
def serialize_log(log: Log) -> dict[str, Any]:
|
||||
details = None
|
||||
if log.details:
|
||||
try:
|
||||
details = json.loads(log.details)
|
||||
except json.JSONDecodeError:
|
||||
details = {"raw": log.details}
|
||||
|
||||
return {
|
||||
"id": log.id,
|
||||
"level": log.level,
|
||||
"type": log.type,
|
||||
"user_id": log.user_id,
|
||||
"request_id": log.request_id,
|
||||
"route": log.route,
|
||||
"method": log.method,
|
||||
"status_code": log.status_code,
|
||||
"error_type": log.error_type,
|
||||
"operation": log.operation,
|
||||
"message": log.message,
|
||||
"source": log.source,
|
||||
"details": details,
|
||||
"duration_ms": int(log.duration_ms) if log.duration_ms is not None else None,
|
||||
"created_at": log.created_at.replace(tzinfo=timezone.utc).isoformat() if log.created_at else None,
|
||||
"updated_at": log.updated_at.replace(tzinfo=timezone.utc).isoformat() if log.updated_at else None,
|
||||
}
|
||||
@@ -1,22 +1,154 @@
|
||||
"""
|
||||
Jarvis 记忆系统
|
||||
Jarvis 记忆系统 (基于 Mem0)
|
||||
三层记忆: 短期(对话历史) → 中期(摘要) → 长期(用户画像)
|
||||
底层使用 Mem0 实现事实提取、时间线、矛盾解决和遗忘机制
|
||||
"""
|
||||
|
||||
import json
|
||||
import re
|
||||
import os
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
from typing import Optional, Any
|
||||
from sqlalchemy import select, desc, func
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from app.models.memory import MemorySummary, UserMemory
|
||||
from app.models.conversation import Conversation, Message
|
||||
from app.services.llm_service import get_llm
|
||||
from app.agents.context import get_current_user
|
||||
from app.models.user import User
|
||||
from app.services.brain_service import BrainService
|
||||
from app.config import settings as _settings
|
||||
|
||||
try:
|
||||
from mem0 import Memory
|
||||
|
||||
MEM0_AVAILABLE = True
|
||||
except ImportError:
|
||||
MEM0_AVAILABLE = False
|
||||
Memory = None
|
||||
|
||||
|
||||
async def _get_user_embedding_config(db: AsyncSession, user_id: str) -> dict | None:
|
||||
"""从用户配置中获取 embedding 模型配置"""
|
||||
result = await db.execute(select(User).where(User.id == user_id))
|
||||
user = result.scalar_one_or_none()
|
||||
if not user or not user.llm_config:
|
||||
return None
|
||||
|
||||
embedding_models = user.llm_config.get("embedding", [])
|
||||
for model in embedding_models:
|
||||
if model.get("enabled") and model.get("model"):
|
||||
return {
|
||||
"model": model.get("model"),
|
||||
"base_url": model.get("base_url") or _settings.EMBEDDING_BASE_URL,
|
||||
"api_key": model.get("api_key")
|
||||
or _settings.EMBEDDING_API_KEY
|
||||
or _settings.OPENAI_API_KEY,
|
||||
}
|
||||
return None
|
||||
|
||||
|
||||
async def _get_user_chat_config(db: AsyncSession, user_id: str) -> dict | None:
|
||||
"""从用户配置中获取 chat 模型配置"""
|
||||
result = await db.execute(select(User).where(User.id == user_id))
|
||||
user = result.scalar_one_or_none()
|
||||
if not user or not user.llm_config:
|
||||
return None
|
||||
|
||||
chat_models = user.llm_config.get("chat", [])
|
||||
for model in chat_models:
|
||||
if model.get("enabled") and model.get("model"):
|
||||
return {
|
||||
"model": model.get("model"),
|
||||
"base_url": model.get("base_url") or _settings.OPENAI_BASE_URL,
|
||||
"api_key": model.get("api_key") or _settings.OPENAI_API_KEY,
|
||||
}
|
||||
return None
|
||||
|
||||
|
||||
class Mem0Client:
|
||||
"""Mem0 客户端 - 按用户隔离"""
|
||||
|
||||
_instances: dict[str, Memory] = {}
|
||||
_persist_dir: str = "./data/mem0"
|
||||
|
||||
async def get_memory(self, db: AsyncSession, user_id: str) -> Memory:
|
||||
"""获取指定用户的 Mem0 实例"""
|
||||
cache_key = user_id
|
||||
|
||||
if cache_key not in self._instances:
|
||||
self._instances[cache_key] = await self._init_memory(db, user_id)
|
||||
|
||||
return self._instances[cache_key]
|
||||
|
||||
async def _init_memory(self, db: AsyncSession, user_id: str) -> Memory:
|
||||
if not MEM0_AVAILABLE:
|
||||
raise RuntimeError("mem0ai 未安装,请运行: pip install mem0ai")
|
||||
|
||||
os.makedirs(self._persist_dir, exist_ok=True)
|
||||
|
||||
llm_config = {
|
||||
"model": _settings.OPENAI_MODEL,
|
||||
"base_url": _settings.OPENAI_BASE_URL,
|
||||
"api_key": _settings.OPENAI_API_KEY,
|
||||
}
|
||||
|
||||
embed_config = _settings.EMBEDDING_MODEL
|
||||
embed_base_url = _settings.EMBEDDING_BASE_URL
|
||||
embed_api_key = _settings.EMBEDDING_API_KEY or _settings.OPENAI_API_KEY
|
||||
|
||||
if db and user_id:
|
||||
try:
|
||||
user_chat = await _get_user_chat_config(db, user_id)
|
||||
if user_chat:
|
||||
llm_config = user_chat
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
user_embed = await _get_user_embedding_config(db, user_id)
|
||||
if user_embed:
|
||||
embed_config = user_embed["model"]
|
||||
embed_base_url = user_embed["base_url"]
|
||||
embed_api_key = user_embed["api_key"]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
config = {
|
||||
"vector_store": {
|
||||
"provider": "chroma",
|
||||
"config": {
|
||||
"collection_name": f"jarvis_memory_{user_id}",
|
||||
"path": self._persist_dir,
|
||||
},
|
||||
},
|
||||
"llm": {
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
"model": llm_config["model"],
|
||||
"api_key": llm_config["api_key"],
|
||||
"base_url": llm_config["base_url"],
|
||||
},
|
||||
},
|
||||
"embedder": {
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
"model": embed_config,
|
||||
"api_key": embed_api_key,
|
||||
"base_url": embed_base_url,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
return Memory.from_config(config)
|
||||
|
||||
|
||||
_mem0_client = Mem0Client()
|
||||
|
||||
|
||||
async def get_mem0(db: AsyncSession, user_id: str) -> Memory:
|
||||
"""获取指定用户的 Mem0 实例"""
|
||||
return await _mem0_client.get_memory(db, user_id)
|
||||
|
||||
|
||||
# ———— 短期记忆: 对话历史 ————
|
||||
|
||||
|
||||
async def load_conversation_history(
|
||||
db: AsyncSession,
|
||||
conversation_id: str,
|
||||
@@ -35,8 +167,7 @@ async def load_conversation_history(
|
||||
async def get_conversation_turn_count(db: AsyncSession, conversation_id: str) -> int:
|
||||
"""获取对话轮数(用户消息数)"""
|
||||
result = await db.execute(
|
||||
select(func.count(Message.id))
|
||||
.where(
|
||||
select(func.count(Message.id)).where(
|
||||
Message.conversation_id == conversation_id,
|
||||
Message.role == "user",
|
||||
)
|
||||
@@ -46,14 +177,15 @@ async def get_conversation_turn_count(db: AsyncSession, conversation_id: str) ->
|
||||
|
||||
# ———— 中期记忆: 对话摘要 ————
|
||||
|
||||
SUMMARIZE_THRESHOLD = 8 # 超过此轮数则摘要
|
||||
MAX_HISTORY_TURNS = 10 # Agent 最多看到的对话历史轮数
|
||||
SUMMARIZE_THRESHOLD = 8
|
||||
MAX_HISTORY_TURNS = 10
|
||||
|
||||
|
||||
async def should_summarize(db: AsyncSession, conversation_id: str) -> bool:
|
||||
"""判断当前对话是否需要摘要"""
|
||||
from app.models.memory import MemorySummary
|
||||
|
||||
turn_count = await get_conversation_turn_count(db, conversation_id)
|
||||
# 检查是否已有摘要覆盖到当前轮数
|
||||
result = await db.execute(
|
||||
select(MemorySummary)
|
||||
.where(MemorySummary.conversation_id == conversation_id)
|
||||
@@ -71,17 +203,21 @@ async def generate_summary(
|
||||
conversation_id: str,
|
||||
messages: list[Message],
|
||||
) -> str:
|
||||
"""调用 LLM 生成对话摘要"""
|
||||
history_text = "\n".join(
|
||||
f"[{m.role}] {m.content}" for m in messages
|
||||
)
|
||||
llm = get_llm()
|
||||
"""生成对话摘要"""
|
||||
from app.services.llm_service import get_llm
|
||||
from langchain_core.messages import HumanMessage, SystemMessage
|
||||
response = await llm.invoke([
|
||||
SystemMessage(content="你是一个记忆助手。请用简洁的中文总结以下对话的核心内容,"
|
||||
"提取关键信息、用户偏好、待办事项等。不超过150字。"),
|
||||
HumanMessage(content=history_text),
|
||||
])
|
||||
|
||||
history_text = "\n".join(f"[{m.role}] {m.content}" for m in messages)
|
||||
llm = get_llm()
|
||||
response = await llm.invoke(
|
||||
[
|
||||
SystemMessage(
|
||||
content="你是一个记忆助手。请用简洁的中文总结以下对话的核心内容,"
|
||||
"提取关键信息、用户偏好、待办事项等。不超过150字。"
|
||||
),
|
||||
HumanMessage(content=history_text),
|
||||
]
|
||||
)
|
||||
return response.content.strip()
|
||||
|
||||
|
||||
@@ -91,8 +227,10 @@ async def save_summary(
|
||||
conversation_id: str,
|
||||
summary_text: str,
|
||||
turn_count: int,
|
||||
) -> MemorySummary:
|
||||
"""保存对话摘要"""
|
||||
) -> Any:
|
||||
"""保存对话摘要到数据库"""
|
||||
from app.models.memory import MemorySummary
|
||||
|
||||
summary = MemorySummary(
|
||||
user_id=user_id,
|
||||
conversation_id=conversation_id,
|
||||
@@ -108,8 +246,10 @@ async def save_summary(
|
||||
async def get_summaries(
|
||||
db: AsyncSession,
|
||||
conversation_id: str,
|
||||
) -> list[MemorySummary]:
|
||||
) -> list[Any]:
|
||||
"""获取某对话的所有历史摘要"""
|
||||
from app.models.memory import MemorySummary
|
||||
|
||||
result = await db.execute(
|
||||
select(MemorySummary)
|
||||
.where(MemorySummary.conversation_id == conversation_id)
|
||||
@@ -118,31 +258,7 @@ async def get_summaries(
|
||||
return list(result.scalars().all())
|
||||
|
||||
|
||||
# ———— 长期记忆: 用户画像 ————
|
||||
|
||||
EXTRACTION_PROMPT = """从以下对话中提取关于用户的关键信息。
|
||||
只提取事实性的、可能对未来对话有帮助的信息,如:
|
||||
- 用户的身份/职业/背景
|
||||
- 用户的偏好和习惯
|
||||
- 用户的目标和计划
|
||||
- 重要的事件和日期
|
||||
- 用户的观点和态度
|
||||
|
||||
每条记忆格式: [类型] 内容
|
||||
类型: fact(事实) | preference(偏好) | goal(目标) | habit(习惯)
|
||||
|
||||
如果没有提取到任何记忆,回复"无"。
|
||||
"""
|
||||
|
||||
FACT_TYPES = {"fact", "preference", "goal", "habit"}
|
||||
|
||||
|
||||
def _parse_fact_line(line: str) -> tuple[str, str] | None:
|
||||
"""解析一行记忆: [fact] 内容 -> (type, content)"""
|
||||
m = re.match(r"\[(\w+)\]\s*(.+)", line.strip())
|
||||
if m and m.group(1) in FACT_TYPES:
|
||||
return m.group(1), m.group(2).strip()
|
||||
return None
|
||||
# ———— 长期记忆: 基于 Mem0 ————
|
||||
|
||||
|
||||
async def extract_user_memories(
|
||||
@@ -150,55 +266,34 @@ async def extract_user_memories(
|
||||
user_id: str,
|
||||
conversation_id: str,
|
||||
messages: list[Message],
|
||||
) -> list[UserMemory]:
|
||||
"""从对话中提取用户记忆并保存"""
|
||||
) -> list[dict]:
|
||||
"""
|
||||
从对话中提取用户记忆并存储到 Mem0。
|
||||
Mem0 会自动处理:
|
||||
- 事实提取
|
||||
- 时间线追踪
|
||||
- 矛盾解决
|
||||
- 遗忘机制
|
||||
"""
|
||||
if len(messages) < 2:
|
||||
return []
|
||||
|
||||
history_text = "\n".join(
|
||||
f"[{m.role}] {m.content}" for m in messages[-10:]
|
||||
)
|
||||
history_text = "\n".join(f"[{m.role}] {m.content}" for m in messages[-10:])
|
||||
|
||||
llm = get_llm()
|
||||
from langchain_core.messages import HumanMessage, SystemMessage
|
||||
response = await llm.invoke([
|
||||
SystemMessage(content=EXTRACTION_PROMPT),
|
||||
HumanMessage(content=history_text),
|
||||
])
|
||||
|
||||
text = response.content.strip()
|
||||
if text == "无" or not text:
|
||||
return []
|
||||
|
||||
memories = []
|
||||
for line in text.split("\n"):
|
||||
parsed = _parse_fact_line(line)
|
||||
if not parsed:
|
||||
continue
|
||||
mem_type, content = parsed
|
||||
# 检查是否已有完全相同的记忆
|
||||
existing = await db.execute(
|
||||
select(UserMemory).where(
|
||||
UserMemory.user_id == user_id,
|
||||
UserMemory.content == content,
|
||||
)
|
||||
)
|
||||
if existing.scalar_one_or_none():
|
||||
continue
|
||||
|
||||
mem = UserMemory(
|
||||
try:
|
||||
mem0 = await get_mem0(db, user_id)
|
||||
result = mem0.add(
|
||||
messages=[{"role": m.role, "content": m.content} for m in messages[-10:]],
|
||||
user_id=user_id,
|
||||
memory_type=mem_type,
|
||||
content=content,
|
||||
importance=5,
|
||||
source_conversation_id=conversation_id,
|
||||
metadata={
|
||||
"conversation_id": conversation_id,
|
||||
"source": "jarvis_memory",
|
||||
},
|
||||
)
|
||||
db.add(mem)
|
||||
memories.append(mem)
|
||||
|
||||
if memories:
|
||||
await db.commit()
|
||||
return memories
|
||||
return result.get("results", [])
|
||||
except Exception as e:
|
||||
print(f"Mem0 extract error: {e}")
|
||||
return []
|
||||
|
||||
|
||||
async def recall_user_memories(
|
||||
@@ -206,41 +301,45 @@ async def recall_user_memories(
|
||||
user_id: str,
|
||||
query: str,
|
||||
top_k: int = 5,
|
||||
) -> list[UserMemory]:
|
||||
"""根据当前输入召回相关的用户记忆(简单关键词匹配)"""
|
||||
# 先尝试语义相似(通过 LLM 判断)
|
||||
# 降级: 直接从数据库取最近的重要记忆
|
||||
result = await db.execute(
|
||||
select(UserMemory)
|
||||
.where(UserMemory.user_id == user_id)
|
||||
.order_by(desc(UserMemory.importance), desc(UserMemory.recall_count))
|
||||
.limit(top_k)
|
||||
)
|
||||
memories = list(result.scalars().all())
|
||||
|
||||
# 重置召回标记
|
||||
for m in memories:
|
||||
m.is_recalled = False
|
||||
await db.commit()
|
||||
|
||||
return memories
|
||||
) -> list[dict]:
|
||||
"""
|
||||
根据当前输入召回相关的用户记忆。
|
||||
使用 Mem0 的语义搜索。
|
||||
"""
|
||||
try:
|
||||
mem0 = await get_mem0(db, user_id)
|
||||
results = mem0.search(
|
||||
query=query,
|
||||
filters={"user_id": user_id},
|
||||
limit=top_k,
|
||||
)
|
||||
return results.get("results", [])
|
||||
except Exception as e:
|
||||
print(f"Mem0 search error: {e}")
|
||||
return []
|
||||
|
||||
|
||||
async def mark_memory_recalled(db: AsyncSession, memory_id: str):
|
||||
"""标记记忆已被召回使用"""
|
||||
result = await db.execute(
|
||||
select(UserMemory).where(UserMemory.id == memory_id)
|
||||
)
|
||||
mem = result.scalar_one_or_none()
|
||||
if mem:
|
||||
mem.is_recalled = True
|
||||
mem.recall_count = (mem.recall_count or 0) + 1
|
||||
mem.last_recalled_at = datetime.utcnow()
|
||||
await db.commit()
|
||||
async def get_user_profile(db: AsyncSession, user_id: str) -> dict:
|
||||
"""
|
||||
获取用户画像。
|
||||
Mem0 的 profile API 会返回 static 和 dynamic facts。
|
||||
"""
|
||||
try:
|
||||
mem0 = await get_mem0(db, user_id)
|
||||
result = mem0.history(user_id=user_id)
|
||||
return {
|
||||
"memories": result.get("results", []),
|
||||
"static": [],
|
||||
"dynamic": [],
|
||||
}
|
||||
except Exception as e:
|
||||
print(f"Mem0 profile error: {e}")
|
||||
return {"memories": [], "static": [], "dynamic": []}
|
||||
|
||||
|
||||
# ———— 记忆组装: 供 Agent 使用的上下文 ————
|
||||
|
||||
|
||||
async def build_memory_context(
|
||||
db: AsyncSession,
|
||||
user_id: str,
|
||||
@@ -253,24 +352,29 @@ async def build_memory_context(
|
||||
"""
|
||||
parts = []
|
||||
|
||||
# 1. 用户画像(长期记忆)
|
||||
user_memories = await recall_user_memories(db, user_id, current_query, top_k=5)
|
||||
if user_memories:
|
||||
memories = await recall_user_memories(db, user_id, current_query, top_k=5)
|
||||
if memories:
|
||||
lines = []
|
||||
for m in user_memories:
|
||||
tag = f"[{m.memory_type}]"
|
||||
lines.append(f" {tag} {m.content}")
|
||||
await mark_memory_recalled(db, m.id)
|
||||
parts.append("【用户记忆】\n" + "\n".join(lines))
|
||||
for m in memories:
|
||||
memory_text = m.get("memory", m.get("text", ""))
|
||||
if memory_text:
|
||||
lines.append(f" - {memory_text}")
|
||||
if lines:
|
||||
parts.append("【用户记忆】\n" + "\n".join(lines))
|
||||
|
||||
# 2. 对话摘要(中期记忆)
|
||||
summaries = await get_summaries(db, conversation_id)
|
||||
if summaries:
|
||||
# 只取最近2条
|
||||
recent = summaries[-2:]
|
||||
lines = [f"[对话摘要{i+1}] {s.summary_text}" for i, s in enumerate(recent)]
|
||||
lines = [f"[对话摘要{i + 1}] {s.summary_text}" for i, s in enumerate(recent)]
|
||||
parts.append("【之前对话摘要】\n" + "\n".join(lines))
|
||||
|
||||
brain_memories = await BrainService(db).recall_memories(user_id, current_query, top_k=3)
|
||||
if brain_memories:
|
||||
lines = []
|
||||
for memory in brain_memories:
|
||||
lines.append(f"- {memory.title}: {memory.content}")
|
||||
parts.append("【知识大脑】\n" + "\n".join(lines))
|
||||
|
||||
if not parts:
|
||||
return ""
|
||||
return "\n\n".join(parts)
|
||||
@@ -283,7 +387,7 @@ async def try_auto_summarize(
|
||||
) -> bool:
|
||||
"""
|
||||
检查是否需要摘要,如果需要则生成并保存。
|
||||
返回是否执行了摘要。
|
||||
同时将对话内容存入 Mem0 进行记忆提取。
|
||||
"""
|
||||
if not await should_summarize(db, conversation_id):
|
||||
return False
|
||||
@@ -297,8 +401,39 @@ async def try_auto_summarize(
|
||||
turn_count = await get_conversation_turn_count(db, conversation_id)
|
||||
await save_summary(db, user_id, conversation_id, summary_text, turn_count)
|
||||
|
||||
# 同时提取用户记忆
|
||||
await extract_user_memories(db, user_id, conversation_id, messages)
|
||||
return True
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
print(f"Auto summarize error: {e}")
|
||||
return False
|
||||
|
||||
|
||||
async def forget_memory(db: AsyncSession, user_id: str, memory_id: str) -> bool:
|
||||
"""
|
||||
主动遗忘某条记忆。
|
||||
"""
|
||||
try:
|
||||
mem0 = await get_mem0(db, user_id)
|
||||
mem0.delete(memory_id, user_id=user_id)
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"Mem0 delete error: {e}")
|
||||
return False
|
||||
|
||||
|
||||
async def update_memory(
|
||||
db: AsyncSession,
|
||||
user_id: str,
|
||||
memory_id: str,
|
||||
content: str,
|
||||
) -> bool:
|
||||
"""
|
||||
更新某条记忆。Mem0 会自动处理矛盾检测。
|
||||
"""
|
||||
try:
|
||||
mem0 = await get_mem0(db, user_id)
|
||||
mem0.update(memory_id, content, user_id=user_id)
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"Mem0 update error: {e}")
|
||||
return False
|
||||
|
||||
@@ -32,9 +32,9 @@ async def daily_task_analysis():
|
||||
logger.info("[Scheduler] 开始执行每日任务分析...")
|
||||
|
||||
async with async_session() as db:
|
||||
from datetime import datetime, timedelta
|
||||
from datetime import UTC, datetime, timedelta
|
||||
|
||||
yesterday = datetime.utcnow().date() - timedelta(days=1)
|
||||
yesterday = datetime.now(UTC).date() - timedelta(days=1)
|
||||
|
||||
# 统计昨日任务完成情况
|
||||
result = await db.execute(
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
import copy
|
||||
import logging
|
||||
from typing import Optional
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select
|
||||
from app.models.user import User
|
||||
from app.services.auth_service import verify_password, get_password_hash
|
||||
from app.logging_utils import summarize_llm_config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -49,12 +51,15 @@ async def update_user_profile(
|
||||
|
||||
async def update_llm_config(user_id: str, config: dict, db: AsyncSession) -> dict:
|
||||
"""更新 LLM 配置"""
|
||||
logger.info("update_llm_config called", extra={"details": {"keys": list(config.keys())}})
|
||||
result = await db.execute(select(User).where(User.id == user_id))
|
||||
user = result.scalar_one_or_none()
|
||||
if not user:
|
||||
raise ValueError("用户不存在")
|
||||
|
||||
current = user.llm_config or {}
|
||||
# 创建深拷贝,避免 SQLAlchemy 变更检测问题
|
||||
current = copy.deepcopy(user.llm_config) or {}
|
||||
logger.info("llm_config before update", extra={"details": summarize_llm_config(current)})
|
||||
# 合并配置 - 直接替换整个类型配置列表
|
||||
for key, value in config.items():
|
||||
if value is not None:
|
||||
@@ -69,8 +74,11 @@ async def update_llm_config(user_id: str, config: dict, db: AsyncSession) -> dic
|
||||
current[key] = value
|
||||
else:
|
||||
current[key] = value
|
||||
logger.info("llm_config after update", extra={"details": summarize_llm_config(current)})
|
||||
user.llm_config = current
|
||||
await db.commit()
|
||||
await db.refresh(user)
|
||||
logger.info("user.llm_config after refresh", extra={"details": summarize_llm_config(user.llm_config)})
|
||||
return current
|
||||
|
||||
|
||||
@@ -91,46 +99,55 @@ async def update_scheduler_config(user_id: str, config: dict, db: AsyncSession)
|
||||
|
||||
|
||||
async def test_llm_connection(
|
||||
provider: str,
|
||||
provider: str | None,
|
||||
model: str,
|
||||
base_url: str,
|
||||
api_key: str
|
||||
api_key: str,
|
||||
) -> dict:
|
||||
"""测试 LLM 连接"""
|
||||
try:
|
||||
# base_url-first: provider 可省略
|
||||
from app.services.llm_service import normalize_provider_name
|
||||
|
||||
effective_provider = normalize_provider_name({
|
||||
"provider": provider,
|
||||
"model": model,
|
||||
"base_url": base_url,
|
||||
})
|
||||
|
||||
# 根据不同 provider 创建临时 LLM 实例并测试
|
||||
if provider == "openai":
|
||||
if effective_provider in {"openai", "custom", "minimax", "kimi", "qwen"}:
|
||||
from langchain_openai import ChatOpenAI
|
||||
llm = ChatOpenAI(
|
||||
api_key=api_key,
|
||||
model=model,
|
||||
base_url=base_url or None,
|
||||
timeout=30
|
||||
timeout=30,
|
||||
)
|
||||
elif provider == "claude":
|
||||
elif effective_provider == "claude":
|
||||
from langchain_anthropic import ChatAnthropic
|
||||
llm = ChatAnthropic(
|
||||
api_key=api_key,
|
||||
model=model,
|
||||
timeout=30
|
||||
timeout=30,
|
||||
)
|
||||
elif provider == "ollama":
|
||||
elif effective_provider == "ollama":
|
||||
from langchain_ollama import ChatOllama
|
||||
llm = ChatOllama(
|
||||
base_url=base_url or "http://localhost:11434",
|
||||
model=model,
|
||||
timeout=30
|
||||
timeout=30,
|
||||
)
|
||||
elif provider == "deepseek":
|
||||
elif effective_provider == "deepseek":
|
||||
from langchain_openai import ChatOpenAI
|
||||
llm = ChatOpenAI(
|
||||
api_key=api_key,
|
||||
model=model,
|
||||
base_url=base_url or "https://api.deepseek.com/v1",
|
||||
timeout=30
|
||||
timeout=30,
|
||||
)
|
||||
else:
|
||||
return {"success": False, "error": f"不支持的 provider: {provider}"}
|
||||
return {"success": False, "error": f"不支持的 endpoint/provider: {effective_provider}"}
|
||||
|
||||
# 简单测试调用
|
||||
from langchain_core.messages import HumanMessage
|
||||
|
||||
127
backend/app/services/skill_service.py
Normal file
127
backend/app/services/skill_service.py
Normal file
@@ -0,0 +1,127 @@
|
||||
"""
|
||||
Skill Service - 技能管理服务层
|
||||
负责技能的创建、查询、更新、删除等操作
|
||||
"""
|
||||
|
||||
from typing import Optional
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, and_, or_
|
||||
from app.models.skill import Skill
|
||||
from app.models.user import User
|
||||
|
||||
|
||||
class SkillService:
|
||||
def __init__(self, db: AsyncSession):
|
||||
self.db = db
|
||||
|
||||
async def create(self, user_id: str, data: dict) -> Skill:
|
||||
"""创建新技能"""
|
||||
skill = Skill(
|
||||
owner_id=user_id,
|
||||
name=data.get("name"),
|
||||
description=data.get("description"),
|
||||
instructions=data.get("instructions"),
|
||||
agent_type=data.get("agent_type"),
|
||||
tools=data.get("tools", []),
|
||||
required_context=data.get("required_context", []),
|
||||
output_format=data.get("output_format"),
|
||||
visibility=data.get("visibility", "private"),
|
||||
team_id=data.get("team_id"),
|
||||
is_active=data.get("is_active", True),
|
||||
)
|
||||
self.db.add(skill)
|
||||
await self.db.commit()
|
||||
await self.db.refresh(skill)
|
||||
return skill
|
||||
|
||||
async def get_by_id(self, skill_id: str) -> Optional[Skill]:
|
||||
"""根据ID获取技能"""
|
||||
result = await self.db.execute(
|
||||
select(Skill).where(Skill.id == skill_id)
|
||||
)
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
async def list_for_user(
|
||||
self,
|
||||
user_id: str,
|
||||
agent_type: Optional[str] = None,
|
||||
visibility: Optional[str] = None,
|
||||
) -> list[Skill]:
|
||||
"""
|
||||
列出用户可访问的技能:自己的 + 市场的 + 团队的
|
||||
"""
|
||||
access_scope = or_(
|
||||
Skill.owner_id == user_id,
|
||||
Skill.visibility == "market",
|
||||
Skill.team_id == user_id,
|
||||
)
|
||||
|
||||
filters = [access_scope, Skill.is_active == True]
|
||||
|
||||
if agent_type:
|
||||
filters.append(Skill.agent_type == agent_type)
|
||||
|
||||
if visibility:
|
||||
filters.append(Skill.visibility == visibility)
|
||||
|
||||
query = select(Skill).where(and_(*filters))
|
||||
|
||||
result = await self.db.execute(query)
|
||||
return list(result.scalars().all())
|
||||
|
||||
async def update(self, skill_id: str, user_id: str, data: dict) -> Optional[Skill]:
|
||||
"""更新技能(仅所有者可更新)"""
|
||||
skill = await self.get_by_id(skill_id)
|
||||
if not skill:
|
||||
return None
|
||||
|
||||
# 检查是否是所有者
|
||||
if skill.owner_id != user_id:
|
||||
return None
|
||||
|
||||
# 更新字段
|
||||
update_fields = [
|
||||
"name", "description", "instructions", "agent_type",
|
||||
"tools", "required_context", "output_format", "visibility",
|
||||
"team_id", "is_active"
|
||||
]
|
||||
|
||||
for field in update_fields:
|
||||
if field in data:
|
||||
setattr(skill, field, data[field])
|
||||
|
||||
await self.db.commit()
|
||||
await self.db.refresh(skill)
|
||||
return skill
|
||||
|
||||
async def delete(self, skill_id: str, user_id: str) -> bool:
|
||||
"""删除技能(仅所有者可删除)"""
|
||||
skill = await self.get_by_id(skill_id)
|
||||
if not skill:
|
||||
return False
|
||||
|
||||
# 检查是否是所有者
|
||||
if skill.owner_id != user_id:
|
||||
return False
|
||||
|
||||
await self.db.delete(skill)
|
||||
await self.db.commit()
|
||||
return True
|
||||
|
||||
async def get_by_agent_type(self, agent_type: str) -> list[Skill]:
|
||||
"""
|
||||
获取指定 agent_type 的技能(用于 agent 运行时:市场 + 私有)
|
||||
"""
|
||||
result = await self.db.execute(
|
||||
select(Skill).where(
|
||||
and_(
|
||||
Skill.agent_type == agent_type,
|
||||
Skill.is_active == True,
|
||||
or_(
|
||||
Skill.visibility == "market",
|
||||
Skill.visibility == "private"
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
return list(result.scalars().all())
|
||||
@@ -1,6 +1,10 @@
|
||||
import psutil
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
try:
|
||||
import psutil
|
||||
except ModuleNotFoundError: # pragma: no cover - optional runtime dependency fallback
|
||||
psutil = None
|
||||
from datetime import UTC, datetime, timedelta
|
||||
from sqlalchemy import select, func, and_
|
||||
from sqlalchemy.orm import Session
|
||||
from app.models.conversation import Conversation, Message
|
||||
@@ -16,6 +20,19 @@ class StatsService:
|
||||
|
||||
def get_system_health(self) -> dict:
|
||||
"""获取系统健康指标"""
|
||||
if psutil is None:
|
||||
return {
|
||||
"uptime_seconds": 0,
|
||||
"cpu_percent": 0.0,
|
||||
"memory_used_mb": 0.0,
|
||||
"memory_total_mb": 0.0,
|
||||
"memory_percent": 0.0,
|
||||
"disk_used_gb": 0.0,
|
||||
"disk_total_gb": 0.0,
|
||||
"disk_percent": 0.0,
|
||||
"active_users_24h": 0,
|
||||
}
|
||||
|
||||
uptime_seconds = int(time.time() - psutil.boot_time())
|
||||
cpu_percent = psutil.cpu_percent(interval=0.1)
|
||||
mem = psutil.virtual_memory()
|
||||
@@ -35,7 +52,7 @@ class StatsService:
|
||||
|
||||
def _get_daily_stats(self, model, date_column, user_id=None, days=30) -> list:
|
||||
"""通用每日统计查询"""
|
||||
cutoff = datetime.utcnow() - timedelta(days=days)
|
||||
cutoff = datetime.now(UTC) - timedelta(days=days)
|
||||
query = self.db.query(
|
||||
func.date(date_column).label('date'),
|
||||
func.count().label('count')
|
||||
@@ -50,7 +67,7 @@ class StatsService:
|
||||
|
||||
def get_conversation_stats(self, user_id: str = None, days=30) -> dict:
|
||||
"""获取对话统计数据"""
|
||||
cutoff = datetime.utcnow() - timedelta(days=days)
|
||||
cutoff = datetime.now(UTC) - timedelta(days=days)
|
||||
|
||||
daily_conversations = self._get_daily_stats(
|
||||
Conversation, Conversation.created_at, user_id, days
|
||||
@@ -100,7 +117,7 @@ class StatsService:
|
||||
|
||||
def get_knowledge_stats(self, user_id: str = None, days=30) -> dict:
|
||||
"""获取知识库统计数据"""
|
||||
cutoff = datetime.utcnow() - timedelta(days=days)
|
||||
cutoff = datetime.now(UTC) - timedelta(days=days)
|
||||
|
||||
# New tags
|
||||
tag_query = self.db.query(
|
||||
@@ -145,7 +162,7 @@ class StatsService:
|
||||
func.date(Task.completed_at).label('date'),
|
||||
func.count().label('count')
|
||||
).filter(
|
||||
Task.completed_at >= datetime.utcnow() - timedelta(days=days),
|
||||
Task.completed_at >= datetime.now(UTC) - timedelta(days=days),
|
||||
Task.status == TaskStatus.DONE
|
||||
)
|
||||
if user_id:
|
||||
@@ -195,7 +212,7 @@ class StatsService:
|
||||
func.date(ForumPost.updated_at).label('date'),
|
||||
func.count().label('count')
|
||||
).filter(
|
||||
ForumPost.updated_at >= datetime.utcnow() - timedelta(days=days),
|
||||
ForumPost.updated_at >= datetime.now(UTC) - timedelta(days=days),
|
||||
ForumPost.is_executed == True
|
||||
)
|
||||
if user_id:
|
||||
@@ -243,7 +260,7 @@ class StatsService:
|
||||
top_tags = [{"tag_path": r.tag_path, "usage_count": r.usage_count} for r in tag_query.all()]
|
||||
|
||||
# Token trend
|
||||
now = datetime.utcnow()
|
||||
now = datetime.now(UTC)
|
||||
this_month_start = datetime(now.year, now.month, 1)
|
||||
last_month_end = this_month_start - timedelta(days=1)
|
||||
last_month_start = datetime(last_month_end.year, last_month_end.month, 1)
|
||||
|
||||
129
backend/app/services/system_service.py
Normal file
129
backend/app/services/system_service.py
Normal file
@@ -0,0 +1,129 @@
|
||||
from datetime import datetime, UTC
|
||||
from time import monotonic
|
||||
import platform
|
||||
import socket
|
||||
import subprocess
|
||||
|
||||
try:
|
||||
import psutil
|
||||
except ModuleNotFoundError: # pragma: no cover - optional runtime dependency fallback
|
||||
psutil = None
|
||||
|
||||
|
||||
class SystemService:
|
||||
_last_net_bytes_sent: int | None = None
|
||||
_last_net_bytes_recv: int | None = None
|
||||
_last_net_sample_at: float | None = None
|
||||
|
||||
def _get_network_rates(self) -> tuple[float, float]:
|
||||
counters = psutil.net_io_counters()
|
||||
now = monotonic()
|
||||
|
||||
if (
|
||||
self.__class__._last_net_sample_at is None
|
||||
or self.__class__._last_net_bytes_sent is None
|
||||
or self.__class__._last_net_bytes_recv is None
|
||||
):
|
||||
self.__class__._last_net_bytes_sent = counters.bytes_sent
|
||||
self.__class__._last_net_bytes_recv = counters.bytes_recv
|
||||
self.__class__._last_net_sample_at = now
|
||||
return 0.0, 0.0
|
||||
|
||||
elapsed = max(now - self.__class__._last_net_sample_at, 1e-6)
|
||||
upload_bps = max(counters.bytes_sent - self.__class__._last_net_bytes_sent, 0) / elapsed
|
||||
download_bps = max(counters.bytes_recv - self.__class__._last_net_bytes_recv, 0) / elapsed
|
||||
|
||||
self.__class__._last_net_bytes_sent = counters.bytes_sent
|
||||
self.__class__._last_net_bytes_recv = counters.bytes_recv
|
||||
self.__class__._last_net_sample_at = now
|
||||
|
||||
return round(upload_bps, 1), round(download_bps, 1)
|
||||
|
||||
def _get_gpu_status(self) -> dict:
|
||||
empty = {
|
||||
'gpu_name': None,
|
||||
'gpu_memory_total_mb': None,
|
||||
'gpu_memory_used_mb': None,
|
||||
'gpu_util_percent': None,
|
||||
}
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[
|
||||
'nvidia-smi',
|
||||
'--query-gpu=name,memory.total,memory.used,utilization.gpu',
|
||||
'--format=csv,noheader,nounits',
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
encoding='utf-8',
|
||||
timeout=2,
|
||||
check=False,
|
||||
)
|
||||
except (FileNotFoundError, subprocess.SubprocessError, OSError):
|
||||
return empty
|
||||
|
||||
if result.returncode != 0 or not result.stdout.strip():
|
||||
return empty
|
||||
|
||||
first_line = result.stdout.strip().splitlines()[0]
|
||||
parts = [part.strip() for part in first_line.split(',')]
|
||||
if len(parts) < 4:
|
||||
return empty
|
||||
|
||||
def parse_number(value: str) -> float | None:
|
||||
try:
|
||||
return float(value)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
return {
|
||||
'gpu_name': parts[0] or None,
|
||||
'gpu_memory_total_mb': parse_number(parts[1]),
|
||||
'gpu_memory_used_mb': parse_number(parts[2]),
|
||||
'gpu_util_percent': parse_number(parts[3]),
|
||||
}
|
||||
|
||||
def get_status(self) -> dict:
|
||||
if psutil is None:
|
||||
return {
|
||||
'cpu_percent': 0.0,
|
||||
'memory_percent': 0.0,
|
||||
'disk_percent': 0.0,
|
||||
'disk_used_gb': 0.0,
|
||||
'disk_total_gb': 0.0,
|
||||
'network_upload_bps': 0.0,
|
||||
'network_download_bps': 0.0,
|
||||
'system_name': platform.system(),
|
||||
'system_version': platform.version(),
|
||||
'hostname': socket.gethostname(),
|
||||
'uptime_seconds': 0.0,
|
||||
'gpu_name': None,
|
||||
'gpu_memory_total_mb': None,
|
||||
'gpu_memory_used_mb': None,
|
||||
'gpu_util_percent': None,
|
||||
'timestamp': datetime.now(UTC).isoformat(),
|
||||
}
|
||||
|
||||
cpu_percent = psutil.cpu_percent(interval=None)
|
||||
memory = psutil.virtual_memory()
|
||||
disk = psutil.disk_usage('/')
|
||||
upload_bps, download_bps = self._get_network_rates()
|
||||
gpu_status = self._get_gpu_status()
|
||||
boot_time = psutil.boot_time()
|
||||
now_ts = datetime.now(UTC).timestamp()
|
||||
return {
|
||||
'cpu_percent': round(cpu_percent, 1),
|
||||
'memory_percent': round(memory.percent, 1),
|
||||
'disk_percent': round(disk.percent, 1),
|
||||
'disk_used_gb': round(disk.used / (1024 ** 3), 1),
|
||||
'disk_total_gb': round(disk.total / (1024 ** 3), 1),
|
||||
'network_upload_bps': upload_bps,
|
||||
'network_download_bps': download_bps,
|
||||
'system_name': platform.system(),
|
||||
'system_version': platform.version(),
|
||||
'hostname': socket.gethostname(),
|
||||
'uptime_seconds': round(max(now_ts - boot_time, 0.0), 1),
|
||||
**gpu_status,
|
||||
'timestamp': datetime.now(UTC).isoformat(),
|
||||
}
|
||||
@@ -193,9 +193,9 @@ class TagService:
|
||||
"""
|
||||
增量打标签 - 只对最近新增/更新的内容节点打标签
|
||||
"""
|
||||
from datetime import datetime, timedelta
|
||||
from datetime import UTC, datetime, timedelta
|
||||
|
||||
cutoff_date = datetime.utcnow() - timedelta(days=days)
|
||||
cutoff_date = datetime.now(UTC) - timedelta(days=days)
|
||||
|
||||
content_nodes = self.db.query(KGNode).filter(
|
||||
KGNode.user_id == user_id,
|
||||
|
||||
124
backend/app/services/web_search_service.py
Normal file
124
backend/app/services/web_search_service.py
Normal file
@@ -0,0 +1,124 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Literal
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import httpx
|
||||
|
||||
from app.config import settings
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class WebSearchResult:
|
||||
title: str
|
||||
url: str
|
||||
snippet: str
|
||||
source: str | None = None
|
||||
published_at: str | None = None
|
||||
|
||||
|
||||
class WebSearchError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class WebSearchConfigurationError(WebSearchError):
|
||||
pass
|
||||
|
||||
|
||||
class WebSearchRequestError(WebSearchError):
|
||||
pass
|
||||
|
||||
|
||||
class WebSearchService:
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
enabled: bool | None = None,
|
||||
provider: str | None = None,
|
||||
base_url: str | None = None,
|
||||
default_limit: int | None = None,
|
||||
timeout_seconds: int | None = None,
|
||||
auth_type: Literal['none', 'bearer', 'basic'] | str | None = None,
|
||||
auth_token: str | None = None,
|
||||
basic_user: str | None = None,
|
||||
basic_password: str | None = None,
|
||||
):
|
||||
self.enabled = settings.WEB_SEARCH_ENABLED if enabled is None else enabled
|
||||
self.provider = (provider or settings.WEB_SEARCH_PROVIDER).strip().lower()
|
||||
self.base_url = (base_url or settings.SEARXNG_BASE_URL).strip().rstrip('/')
|
||||
self.default_limit = max(1, min(default_limit or settings.WEB_SEARCH_DEFAULT_LIMIT, 10))
|
||||
self.timeout_seconds = max(1, timeout_seconds or settings.WEB_SEARCH_TIMEOUT_SECONDS)
|
||||
self.auth_type = str(auth_type or settings.SEARXNG_AUTH_TYPE or 'none').strip().lower()
|
||||
self.auth_token = auth_token if auth_token is not None else settings.SEARXNG_AUTH_TOKEN
|
||||
self.basic_user = basic_user if basic_user is not None else settings.SEARXNG_BASIC_USER
|
||||
self.basic_password = basic_password if basic_password is not None else settings.SEARXNG_BASIC_PASSWORD
|
||||
|
||||
async def search(self, query: str, limit: int | None = None) -> list[WebSearchResult]:
|
||||
normalized_query = (query or '').strip()
|
||||
if not self.enabled or not self.base_url:
|
||||
raise WebSearchConfigurationError('网页搜索未启用或未配置')
|
||||
if self.provider != 'searxng':
|
||||
raise WebSearchConfigurationError(f'不支持的网页搜索 provider: {self.provider}')
|
||||
if not normalized_query:
|
||||
raise WebSearchRequestError('搜索关键词不能为空')
|
||||
|
||||
parsed = urlparse(self.base_url)
|
||||
if parsed.scheme not in {'http', 'https'} or not parsed.netloc:
|
||||
raise WebSearchConfigurationError('SEARXNG_BASE_URL 配置无效')
|
||||
|
||||
params = {
|
||||
'q': normalized_query,
|
||||
'format': 'json',
|
||||
'language': 'zh-CN',
|
||||
'safesearch': 1,
|
||||
}
|
||||
headers = self._build_headers()
|
||||
timeout = httpx.Timeout(float(self.timeout_seconds), connect=min(float(self.timeout_seconds), 5.0))
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=timeout) as client:
|
||||
response = await client.get(f'{self.base_url}/search', params=params, headers=headers)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
except httpx.HTTPError as exc:
|
||||
raise WebSearchRequestError('SearxNG 请求失败') from exc
|
||||
except ValueError as exc:
|
||||
raise WebSearchRequestError('SearxNG 返回了无效 JSON') from exc
|
||||
|
||||
raw_results = payload.get('results') if isinstance(payload, dict) else None
|
||||
if not isinstance(raw_results, list):
|
||||
return []
|
||||
|
||||
results: list[WebSearchResult] = []
|
||||
target_limit = max(1, min(limit or self.default_limit, 10))
|
||||
for item in raw_results:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
title = str(item.get('title') or '').strip()
|
||||
url = str(item.get('url') or '').strip()
|
||||
snippet = str(item.get('content') or item.get('snippet') or '').strip()
|
||||
if not title or not url:
|
||||
continue
|
||||
results.append(
|
||||
WebSearchResult(
|
||||
title=title,
|
||||
url=url,
|
||||
snippet=snippet,
|
||||
source=str(item.get('engine') or item.get('source') or '').strip() or None,
|
||||
published_at=str(item.get('publishedDate') or item.get('published_at') or '').strip() or None,
|
||||
)
|
||||
)
|
||||
if len(results) >= target_limit:
|
||||
break
|
||||
return results
|
||||
|
||||
def _build_headers(self) -> dict[str, str]:
|
||||
if self.auth_type == 'bearer' and self.auth_token:
|
||||
return {'Authorization': f'Bearer {self.auth_token}'}
|
||||
if self.auth_type == 'basic' and self.basic_user and self.basic_password:
|
||||
credentials = httpx.BasicAuth(self.basic_user, self.basic_password)
|
||||
request = httpx.Request('GET', self.base_url)
|
||||
credentials.auth_flow(request)
|
||||
return dict(request.headers)
|
||||
return {}
|
||||
2084
backend/backend.log
Normal file
2084
backend/backend.log
Normal file
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,119 +0,0 @@
|
||||
远光软件股份有限公司科技项目可行性研究报告
|
||||
|
||||
项目名称:大模型微调技术研究与应用
|
||||
|
||||
申请部门:
|
||||
|
||||
起止时间:年至年
|
||||
|
||||
项目负责人:
|
||||
|
||||
联系电话:
|
||||
|
||||
申请日期:年 月
|
||||
|
||||
大模型微调技术可行性研究报告
|
||||
|
||||
远光软件股份有限公司科技项目可行性研究报告
|
||||
|
||||
项目名称: 大模型微调技术研究与应用
|
||||
|
||||
申请部门:
|
||||
|
||||
起止时间: 年 月至 年 月
|
||||
|
||||
项目负责人:
|
||||
|
||||
联系电话:
|
||||
|
||||
申请日期: 年 月
|
||||
|
||||
一、目的和意义
|
||||
|
||||
1.1 项目背景与需求
|
||||
|
||||
近年来,以深度学习为基础的大型预训练语言模型(Large Language Models,
|
||||
LLMs)如GPT系列、BERT、LLaMA等在自然语言处理领域取得了突破性进展,通过海量数据的预训练和超大规模参数量,这些模型展现出强大的通用语言理解与生成能力,在机器翻译、文本摘要、问答系统、内容创作等众多任务中表现出色,引领了人工智能技术的新浪潮。然而,这些通用大模型在面对特定专业领域任务时,往往存在知识覆盖不足、专业术语理解偏差、领域特定逻辑推理能力欠缺、输出风格不符合行业特点等问题,难以直接满足垂直场景的应用需求。
|
||||
|
||||
模型微调(Fine-tuning)技术作为将通用大模型适配到特定场景的关键手段,通过在领域相关数据上进一步训练模型参数,使模型能够吸收领域知识、适应特定任务要求,从而显著提升模型在目标任务上的性能表现。随着大模型参数规模的不断扩大,传统的全参数微调方式面临着计算资源消耗大、存储成本高、容易产生灾难性遗忘等挑战,因此,参数高效微调(Parameter-Efficient
|
||||
Fine-Tuning,
|
||||
PEFT)方法如LoRA、Adapter、Prefix-tuning等技术应运而生,为低成本、高效率的大模型领域适配提供了新的技术路径。
|
||||
|
||||
本项目旨在探索适合特定领域特点的高效微调策略,解决数据稀缺性、专业术语理解、领域知识融合等关键技术问题,提升模型在特定场景下的准确性、可靠性和实用性。
|
||||
|
||||
项目成果将对该现状和技术发展的作用主要体现在技术推动作用和应用落地支撑两方面。
|
||||
|
||||
二、国内外研究水平综述
|
||||
|
||||
2.1 技术发展历史简要回顾
|
||||
|
||||
大模型微调技术的发展历程分为四个阶段:
|
||||
|
||||
第一阶段(2018年前):传统迁移学习与微调雏形阶段。模型适配多采用传统迁移学习思路,将通用数据集上训练的基础模型迁移至特定任务场景。
|
||||
|
||||
第二阶段(2018-2020年):预训练-微调范式确立阶段。2018年谷歌提出BERT模型,首次构建"预训练通用知识+下游任务微调"的技术框架。
|
||||
|
||||
第三阶段(2020-2022年):高效微调技术爆发阶段。LoRA、QLoRA、Adapter等参数高效微调技术相继出现,将微调参数规模大幅降低。
|
||||
|
||||
第四阶段(2022年至今):垂直领域深化与协同优化阶段。"基座模型+领域微调"的架构成为主流,微调技术与知识图谱进一步融合。
|
||||
|
||||
2.2 国内外研究水平现状和发展趋势
|
||||
|
||||
国际层面,Hugging
|
||||
Face、DeepSpeed等开源社区为参数高效微调技术的普及提供了重要支撑。国内层面,阿里云基于通义千问进行财税领域定制微调,验证了微调技术在财务领域的应用价值。
|
||||
|
||||
三、项目的理论和实践依据
|
||||
|
||||
3.1 项目研究内容原理简述
|
||||
|
||||
本项目采用"基座模型+领域适配"分层微调架构,选取开源基座模型,针对财务问答场景特性采用LoRA参数高效微调策略。
|
||||
|
||||
3.2 项目研究内容理论和实践依据
|
||||
|
||||
理论依据包括国家战略层面的政策支持和成熟的技术理论体系。实践依据包括大模型微调技术在财务等垂直领域的成功案例。
|
||||
|
||||
3.3 项目研究的关键和难点
|
||||
|
||||
关键点包括高质量数据集构建、高效微调策略适配、知识精准注入与幻觉抑制、效果评估体系建设。难点集中在数据处理、微调策略、知识注入和评估体系四个方面。
|
||||
|
||||
四、项目研究内容和实施方案
|
||||
|
||||
4.1 项目研究内容详细说明
|
||||
|
||||
本项目研究内容包括数据格式研究、微调框架研究、模型微调后评估体系研究三个方面。
|
||||
|
||||
4.2 理论研究步骤和试验计划
|
||||
|
||||
包括数据处理流程、训练数据生成流程、数据验证流程三个主要环节。
|
||||
|
||||
4.3 项目组织方式和协作分工
|
||||
|
||||
本项目由项目负责人统筹协调,下设数据组、算法组、应用组三个工作小组。
|
||||
|
||||
五、预期目标和成果形式
|
||||
|
||||
5.1 项目研究预期达到的目标
|
||||
|
||||
技术目标:问答准确率达到85%以上。应用目标:开发财务智能知识问答原型系统。效益目标:替代财务专家70%以上的重复性咨询工作。
|
||||
|
||||
5.2 明确叙述提高研究成果的形式
|
||||
|
||||
包括技术方案文档、原型系统、训练数据集、微调模型、技术论文/报告等成果形式。
|
||||
|
||||
六、项目承担团队的条件
|
||||
|
||||
项目团队具备人工智能、大数据等领域的技术背景,具备财务信息系统开发经验,具备充足的GPU计算资源和完善的开发测试环境。
|
||||
|
||||
七、项目进度安排
|
||||
|
||||
第1-2月:项目启动、需求分析;第3-4月:数据收集、清洗;第5-7月:数据集生成;第8-10月:模型训练;第11-12月:系统开发;第13-14月:优化整理;第15-16月:验收转化。
|
||||
|
||||
八、项目经费预算
|
||||
|
||||
本项目经费预算根据实际研究工作需要编制,包括人工费、设备使用费、业务费、场地使用费、专家咨询费等科目。
|
||||
|
||||
分管领导审核意见:
|
||||
|
||||
(对经费预算是否合理,有无其他经费来源,能否保证研究计划实施所需的人力,工作时间等基本条件提出具体意见)
|
||||
|
||||
分管领导(签字): 年 月 日
|
||||
@@ -3,7 +3,7 @@ name = "jarvis-backend"
|
||||
version = "0.1.0"
|
||||
description = "Jarvis Personal AI Assistant - Backend"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.12"
|
||||
requires-python = ">=3.11"
|
||||
license = { text = "MIT" }
|
||||
|
||||
dependencies = [
|
||||
@@ -27,6 +27,9 @@ dependencies = [
|
||||
"llama-index-vector-stores-chroma>=0.3.0",
|
||||
"chromadb>=0.5.0",
|
||||
|
||||
# Memory
|
||||
"mem0ai>=1.0.0",
|
||||
|
||||
# 数据库
|
||||
"sqlalchemy>=2.0.0",
|
||||
"aiosqlite>=0.20.0",
|
||||
@@ -48,6 +51,10 @@ dependencies = [
|
||||
# 工具
|
||||
"python-dotenv>=1.0.0",
|
||||
"httpx>=0.27.0",
|
||||
"openpyxl>=3.1.0",
|
||||
"python-docx>=1.1.0",
|
||||
"mineru>=2.0.3",
|
||||
"psutil>=6.1.0",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
@@ -68,7 +75,7 @@ build-backend = "hatchling.build"
|
||||
packages = ["app"]
|
||||
|
||||
[tool.ruff]
|
||||
target-version = "py312"
|
||||
target-version = "py311"
|
||||
line-length = 100
|
||||
select = ["E", "F", "I", "N", "W", "UP"]
|
||||
|
||||
|
||||
291
backend/tests/backend/app/agents/test_graph.py
Normal file
291
backend/tests/backend/app/agents/test_graph.py
Normal file
@@ -0,0 +1,291 @@
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
import sys
|
||||
|
||||
WORKTREE_ROOT = Path(__file__).resolve().parents[4]
|
||||
if str(WORKTREE_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(WORKTREE_ROOT))
|
||||
for module_name in list(sys.modules):
|
||||
if module_name == "app" or module_name.startswith("app."):
|
||||
del sys.modules[module_name]
|
||||
|
||||
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage
|
||||
from langgraph.graph import END
|
||||
|
||||
from app.agents.graph import (
|
||||
JSON_ACTION_FALLBACK_PROMPT,
|
||||
_get_role_tools,
|
||||
call_agent_llm,
|
||||
execute_tools_node,
|
||||
master_node,
|
||||
route_after_agent,
|
||||
route_master,
|
||||
)
|
||||
from app.agents.state import AgentRole
|
||||
from app.agents.tools import SUB_COMMANDER_TOOLSETS
|
||||
from app.agents.prompts import MASTER_SYSTEM_PROMPT
|
||||
|
||||
|
||||
def _base_state(message: str = "帮我安排今天的重点") -> dict:
|
||||
return {
|
||||
"messages": [HumanMessage(content=message)],
|
||||
"user_id": "u1",
|
||||
"conversation_id": "c1",
|
||||
"current_agent": AgentRole.MASTER.value,
|
||||
"next_step": None,
|
||||
"agent_trace": [AgentRole.MASTER.value],
|
||||
"pending_tasks": [],
|
||||
"completed_tasks": [],
|
||||
"created_entities": [],
|
||||
"knowledge_context": None,
|
||||
"schedule_context_summary": None,
|
||||
"analysis_report": None,
|
||||
"final_response": None,
|
||||
"memory_context": None,
|
||||
"current_datetime_context": None,
|
||||
"user_llm_config": None,
|
||||
"provider_capabilities": None,
|
||||
}
|
||||
|
||||
|
||||
class FailIfCalledLLM:
|
||||
async def ainvoke(self, messages):
|
||||
raise AssertionError("LLM should not be called for greeting fast-path")
|
||||
|
||||
|
||||
class StaticResponseLLM:
|
||||
def __init__(self, response: AIMessage):
|
||||
self.response = response
|
||||
self.messages = None
|
||||
|
||||
async def ainvoke(self, messages):
|
||||
self.messages = messages
|
||||
return self.response
|
||||
|
||||
|
||||
class CaptureFallbackLLM:
|
||||
def __init__(self, response: AIMessage):
|
||||
self.response = response
|
||||
self.messages = None
|
||||
self.bind_tools_called = False
|
||||
|
||||
async def ainvoke(self, messages):
|
||||
self.messages = messages
|
||||
return self.response
|
||||
|
||||
def bind_tools(self, tools):
|
||||
self.bind_tools_called = True
|
||||
raise AssertionError("bind_tools should not be used when native tools are unsupported")
|
||||
|
||||
|
||||
class AsyncFakeTool:
|
||||
def __init__(self, name: str, result: str):
|
||||
self.name = name
|
||||
self.result = result
|
||||
self.calls: list[dict] = []
|
||||
|
||||
async def ainvoke(self, args: dict):
|
||||
self.calls.append(args)
|
||||
return self.result
|
||||
|
||||
|
||||
class SyncFakeTool:
|
||||
def __init__(self, name: str, result: str):
|
||||
self.name = name
|
||||
self.result = result
|
||||
self.calls: list[dict] = []
|
||||
|
||||
def invoke(self, args: dict):
|
||||
self.calls.append(args)
|
||||
return self.result
|
||||
|
||||
|
||||
async def test_master_node_greeting_fast_path_returns_stable_reply_without_llm(monkeypatch):
|
||||
monkeypatch.setattr("app.agents.graph._get_llm_for_state", lambda state: (FailIfCalledLLM(), SimpleNamespace()))
|
||||
|
||||
result = await master_node(_base_state("你好"))
|
||||
|
||||
assert result["final_response"] == "您好。我在。\n\n您把问题给我,我先帮您收束重点,再往下推。"
|
||||
assert result["messages"][0].content == "您好。我在。"
|
||||
|
||||
|
||||
async def test_master_node_routes_to_agent_when_llm_returns_role_name(monkeypatch):
|
||||
llm = StaticResponseLLM(AIMessage(content="schedule_planner"))
|
||||
monkeypatch.setattr(
|
||||
"app.agents.graph._get_llm_for_state",
|
||||
lambda state: (llm, SimpleNamespace(provider="test", supports_native_tools=True)),
|
||||
)
|
||||
|
||||
state = _base_state("帮我安排这周重点")
|
||||
result = await master_node(state)
|
||||
|
||||
assert result["current_agent"] == AgentRole.SCHEDULE_PLANNER.value
|
||||
assert result["agent_trace"] == [AgentRole.MASTER.value, AgentRole.SCHEDULE_PLANNER.value]
|
||||
assert result["messages"][0].content == f"已分发至 {AgentRole.SCHEDULE_PLANNER.value} 处理。"
|
||||
assert isinstance(llm.messages[0], SystemMessage)
|
||||
assert MASTER_SYSTEM_PROMPT in llm.messages[0].content
|
||||
|
||||
|
||||
async def test_master_node_returns_final_response_when_llm_answers_directly(monkeypatch):
|
||||
response = AIMessage(content="我建议先收束需求,再拆执行步骤。")
|
||||
llm = StaticResponseLLM(response)
|
||||
monkeypatch.setattr(
|
||||
"app.agents.graph._get_llm_for_state",
|
||||
lambda state: (llm, SimpleNamespace(provider="test", supports_native_tools=True)),
|
||||
)
|
||||
|
||||
result = await master_node(_base_state("现在应该怎么推进这个项目?"))
|
||||
|
||||
assert result["final_response"] == response.content
|
||||
assert result["messages"] == [response]
|
||||
|
||||
|
||||
def test_route_after_agent_sends_tool_calls_to_tools_node():
|
||||
state = _base_state()
|
||||
state["messages"] = [AIMessage(content="", tool_calls=[{"id": "1", "name": "create_task", "args": {}}])]
|
||||
|
||||
assert route_after_agent(state) == "tools"
|
||||
|
||||
|
||||
def test_route_after_agent_ends_when_no_tool_calls_exist():
|
||||
state = _base_state()
|
||||
state["messages"] = [AIMessage(content="done")]
|
||||
|
||||
assert route_after_agent(state) == END
|
||||
|
||||
|
||||
def test_route_master_ends_when_final_response_exists():
|
||||
state = _base_state()
|
||||
state["final_response"] = "done"
|
||||
state["current_agent"] = AgentRole.EXECUTOR.value
|
||||
|
||||
assert route_master(state) == END
|
||||
|
||||
|
||||
def test_route_master_returns_current_agent_when_more_work_remains():
|
||||
state = _base_state()
|
||||
state["current_agent"] = AgentRole.LIBRARIAN.value
|
||||
|
||||
assert route_master(state) == AgentRole.LIBRARIAN.value
|
||||
|
||||
|
||||
def test_get_role_tools_returns_expected_semantic_tool_sets():
|
||||
expected_by_role = {
|
||||
AgentRole.SCHEDULE_PLANNER: [
|
||||
"get_schedule_day",
|
||||
"get_tasks",
|
||||
"resolve_time_expression",
|
||||
"create_todo",
|
||||
"create_schedule_task",
|
||||
"create_reminder",
|
||||
"create_goal",
|
||||
],
|
||||
AgentRole.EXECUTOR: [
|
||||
"get_tasks",
|
||||
"create_task",
|
||||
"update_task_status",
|
||||
"resolve_time_expression",
|
||||
"create_todo",
|
||||
"create_schedule_task",
|
||||
"create_reminder",
|
||||
"create_goal",
|
||||
"get_forum_posts",
|
||||
"create_forum_post",
|
||||
"scan_forum_for_instructions",
|
||||
],
|
||||
AgentRole.LIBRARIAN: [
|
||||
"search_knowledge",
|
||||
"hybrid_search",
|
||||
"web_search",
|
||||
"get_knowledge_graph_context",
|
||||
"build_knowledge_graph",
|
||||
],
|
||||
AgentRole.ANALYST: [
|
||||
"get_tasks",
|
||||
"get_forum_posts",
|
||||
"scan_forum_for_instructions",
|
||||
"search_knowledge",
|
||||
"hybrid_search",
|
||||
"web_search",
|
||||
],
|
||||
}
|
||||
|
||||
for role, expected_tool_names in expected_by_role.items():
|
||||
actual_tools = _get_role_tools(role)
|
||||
actual_tool_names = [tool.name for tool in actual_tools]
|
||||
assert actual_tool_names == expected_tool_names
|
||||
assert len(actual_tool_names) == len(set(actual_tool_names))
|
||||
|
||||
|
||||
async def test_execute_tools_node_executes_tool_calls_and_tracks_created_entities(monkeypatch):
|
||||
create_tool = AsyncFakeTool("create_task", "created task 123")
|
||||
read_tool = SyncFakeTool("get_tasks", "[]")
|
||||
|
||||
monkeypatch.setattr("app.agents.graph.ALL_TOOLS", [create_tool, read_tool])
|
||||
monkeypatch.setattr(
|
||||
"app.agents.graph.normalize_tool_time_arguments",
|
||||
lambda tool_name, tool_args, current_datetime_context: {**tool_args, "normalized": True},
|
||||
)
|
||||
|
||||
state = _base_state()
|
||||
state["created_entities"] = [{"tool": "existing", "result": "already there"}]
|
||||
state["current_datetime_context"] = "2026-04-02T09:00:00+08:00"
|
||||
state["messages"] = [
|
||||
AIMessage(
|
||||
content="",
|
||||
tool_calls=[
|
||||
{"id": "tool-1", "name": "create_task", "args": {"title": "Write tests"}},
|
||||
{"id": "tool-2", "name": "get_tasks", "args": {"status": "open"}},
|
||||
],
|
||||
)
|
||||
]
|
||||
|
||||
result = await execute_tools_node(state)
|
||||
|
||||
assert create_tool.calls == [{"title": "Write tests", "normalized": True}]
|
||||
assert read_tool.calls == [{"status": "open", "normalized": True}]
|
||||
assert [type(message) for message in result["messages"]] == [ToolMessage, ToolMessage]
|
||||
assert result["messages"][0].tool_call_id == "tool-1"
|
||||
assert result["messages"][0].name == "create_task"
|
||||
assert result["messages"][0].content == "created task 123"
|
||||
assert result["messages"][1].tool_call_id == "tool-2"
|
||||
assert result["messages"][1].name == "get_tasks"
|
||||
assert result["messages"][1].content == "[]"
|
||||
assert result["created_entities"] == [
|
||||
{"tool": "existing", "result": "already there"},
|
||||
{"tool": "create_task", "result": "created task 123"},
|
||||
]
|
||||
|
||||
|
||||
async def test_call_agent_llm_includes_context_messages_and_uses_json_fallback(monkeypatch):
|
||||
llm = CaptureFallbackLLM(AIMessage(content='{"mode":"final","final_response":"好的。"}'))
|
||||
capabilities = SimpleNamespace(
|
||||
provider="ollama",
|
||||
supports_native_tools=False,
|
||||
preferred_tool_strategy="json_fallback",
|
||||
)
|
||||
fake_tools = [SimpleNamespace(name="create_reminder"), SimpleNamespace(name="get_tasks")]
|
||||
|
||||
monkeypatch.setattr("app.agents.graph._get_llm_for_state", lambda state: (llm, capabilities))
|
||||
monkeypatch.setattr("app.agents.graph._get_role_tools", lambda role: fake_tools)
|
||||
monkeypatch.setattr("app.agents.graph.build_skill_context", lambda role_key: "技能上下文: 先判断,再执行")
|
||||
|
||||
state = _base_state("明天提醒我开会")
|
||||
state["messages"] = [HumanMessage(content="明天提醒我开会")]
|
||||
state["current_datetime_context"] = "CURRENT_TIME: 2026-04-02T09:00:00+08:00"
|
||||
state["memory_context"] = "用户偏好早上处理深度工作。"
|
||||
|
||||
result = await call_agent_llm(state, AgentRole.EXECUTOR, "executor system prompt")
|
||||
|
||||
assert result["messages"][0].content == '{"mode":"final","final_response":"好的。"}'
|
||||
assert llm.bind_tools_called is False
|
||||
assert llm.messages is not None
|
||||
|
||||
system_contents = [message.content for message in llm.messages if isinstance(message, SystemMessage)]
|
||||
assert "executor system prompt" in system_contents[0]
|
||||
assert any("当前时间上下文: CURRENT_TIME: 2026-04-02T09:00:00+08:00" == content for content in system_contents)
|
||||
assert any("长期记忆上下文: 用户偏好早上处理深度工作。" == content for content in system_contents)
|
||||
assert any("技能上下文: 先判断,再执行" == content for content in system_contents)
|
||||
assert any(content == JSON_ACTION_FALLBACK_PROMPT for content in system_contents)
|
||||
assert any(content == "本次可用工具列表: create_reminder, get_tasks" for content in system_contents)
|
||||
assert any(isinstance(message, HumanMessage) and message.content == "明天提醒我开会" for message in llm.messages)
|
||||
12
backend/tests/backend/app/agents/test_prompts.py
Normal file
12
backend/tests/backend/app/agents/test_prompts.py
Normal file
@@ -0,0 +1,12 @@
|
||||
from app.agents.prompts import MASTER_SYSTEM_PROMPT
|
||||
|
||||
|
||||
def test_master_prompt_forbids_subagent_rollcall_in_simple_greetings():
|
||||
assert '当用户只是打招呼(如“你好”“您好”“早”“在吗”)时:不要介绍 4 个子Agent' in MASTER_SYSTEM_PROMPT
|
||||
assert '只做一个自然、简短、有推进感的回应' in MASTER_SYSTEM_PROMPT
|
||||
|
||||
|
||||
def test_master_prompt_does_not_include_full_canned_answers_for_greetings_or_identity():
|
||||
assert 'Jarvis:您好。我在。' not in MASTER_SYSTEM_PROMPT
|
||||
assert 'Jarvis:我是 Jarvis。' not in MASTER_SYSTEM_PROMPT
|
||||
assert 'Jarvis:主要做三件事。' not in MASTER_SYSTEM_PROMPT
|
||||
360
backend/tests/backend/app/agents/test_registry.py
Normal file
360
backend/tests/backend/app/agents/test_registry.py
Normal file
@@ -0,0 +1,360 @@
|
||||
import pytest
|
||||
from collections.abc import Mapping
|
||||
|
||||
from app.agents.prompts import (
|
||||
SUB_COMMANDER_PROMPTS_BY_KEY,
|
||||
TOP_LEVEL_SYSTEM_PROMPTS_BY_KEY,
|
||||
)
|
||||
from app.agents.registry import build_registry_indexes, load_builtin_registry_bundle
|
||||
from app.agents.registry.indexes import summarize_registry_indexes
|
||||
from app.agents.registry.models import (
|
||||
AgentManifest,
|
||||
CapabilityManifest,
|
||||
SpecialistTemplateManifest,
|
||||
SubCommanderManifest,
|
||||
)
|
||||
from app.agents.registry.validator import validate_registry_bundle
|
||||
from app.agents.registry.builtins import (
|
||||
BUILTIN_AGENT_MANIFESTS,
|
||||
BUILTIN_CAPABILITY_MANIFESTS,
|
||||
BUILTIN_SPECIALIST_TEMPLATE_MANIFESTS,
|
||||
BUILTIN_SUB_COMMANDER_MANIFESTS,
|
||||
)
|
||||
from app.agents.state import AgentRole
|
||||
from app.agents.tools import SUB_COMMANDER_TOOLSETS
|
||||
|
||||
|
||||
def make_agent(
|
||||
agent_id: str = "master",
|
||||
*,
|
||||
display_name: str = "Master",
|
||||
role_value: str = "master",
|
||||
system_prompt_key: str = "master",
|
||||
default_sub_commanders: list[str] | None = None,
|
||||
) -> AgentManifest:
|
||||
return AgentManifest(
|
||||
agent_id=agent_id,
|
||||
display_name=display_name,
|
||||
role_value=role_value,
|
||||
system_prompt_key=system_prompt_key,
|
||||
routing_hints=["route"],
|
||||
default_sub_commanders=default_sub_commanders or [],
|
||||
)
|
||||
|
||||
|
||||
def make_sub_commander(
|
||||
sub_commander_id: str = "planner",
|
||||
*,
|
||||
parent_agent_id: str = "master",
|
||||
capability_ids: list[str] | None = None,
|
||||
) -> SubCommanderManifest:
|
||||
return SubCommanderManifest(
|
||||
sub_commander_id=sub_commander_id,
|
||||
parent_agent_id=parent_agent_id,
|
||||
prompt_text="Plan the work.",
|
||||
capability_ids=capability_ids or [],
|
||||
)
|
||||
|
||||
|
||||
def make_capability(capability_id: str = "calendar") -> CapabilityManifest:
|
||||
return CapabilityManifest(capability_id=capability_id, tool_name=f"{capability_id}_tool")
|
||||
|
||||
|
||||
def make_specialist_template(
|
||||
template_id: str = "researcher",
|
||||
*,
|
||||
allowed_capability_ids: list[str] | None = None,
|
||||
) -> SpecialistTemplateManifest:
|
||||
return SpecialistTemplateManifest(
|
||||
template_id=template_id,
|
||||
display_name="Researcher",
|
||||
description="Research specialist",
|
||||
allowed_capability_ids=allowed_capability_ids,
|
||||
)
|
||||
|
||||
|
||||
def test_validate_registry_bundle_accepts_valid_bundle() -> None:
|
||||
validate_registry_bundle(
|
||||
agents=[make_agent(default_sub_commanders=["planner"])],
|
||||
sub_commanders=[make_sub_commander(capability_ids=["calendar"])],
|
||||
capabilities=[make_capability()],
|
||||
specialist_templates=[make_specialist_template(allowed_capability_ids=["calendar"])],
|
||||
)
|
||||
|
||||
|
||||
def test_validate_registry_bundle_rejects_duplicate_agent_ids() -> None:
|
||||
agents = [
|
||||
make_agent(default_sub_commanders=["planner"]),
|
||||
make_agent(
|
||||
display_name="Duplicate Master",
|
||||
role_value="master_duplicate",
|
||||
system_prompt_key="master_duplicate",
|
||||
),
|
||||
]
|
||||
|
||||
with pytest.raises(ValueError, match="duplicate agent id: master"):
|
||||
validate_registry_bundle(
|
||||
agents=agents,
|
||||
sub_commanders=[],
|
||||
capabilities=[],
|
||||
specialist_templates=[],
|
||||
)
|
||||
|
||||
|
||||
def test_validate_registry_bundle_rejects_duplicate_sub_commander_ids() -> None:
|
||||
with pytest.raises(ValueError, match="duplicate sub commander id: planner"):
|
||||
validate_registry_bundle(
|
||||
agents=[make_agent()],
|
||||
sub_commanders=[make_sub_commander(), make_sub_commander()],
|
||||
capabilities=[],
|
||||
specialist_templates=[],
|
||||
)
|
||||
|
||||
|
||||
def test_validate_registry_bundle_rejects_duplicate_capability_ids() -> None:
|
||||
with pytest.raises(ValueError, match="duplicate capability id: calendar"):
|
||||
validate_registry_bundle(
|
||||
agents=[],
|
||||
sub_commanders=[],
|
||||
capabilities=[make_capability(), make_capability()],
|
||||
specialist_templates=[],
|
||||
)
|
||||
|
||||
|
||||
def test_validate_registry_bundle_rejects_duplicate_template_ids() -> None:
|
||||
with pytest.raises(ValueError, match="duplicate template id: researcher"):
|
||||
validate_registry_bundle(
|
||||
agents=[],
|
||||
sub_commanders=[],
|
||||
capabilities=[],
|
||||
specialist_templates=[make_specialist_template(), make_specialist_template()],
|
||||
)
|
||||
|
||||
|
||||
def test_validate_registry_bundle_rejects_unknown_sub_commander_parent_agent_ids() -> None:
|
||||
sub_commanders = [make_sub_commander(parent_agent_id="missing-agent")]
|
||||
|
||||
with pytest.raises(ValueError, match="unknown parent agent id: missing-agent"):
|
||||
validate_registry_bundle(
|
||||
agents=[],
|
||||
sub_commanders=sub_commanders,
|
||||
capabilities=[],
|
||||
specialist_templates=[],
|
||||
)
|
||||
|
||||
|
||||
def test_validate_registry_bundle_rejects_unknown_sub_commander_capability_references() -> None:
|
||||
with pytest.raises(ValueError, match="unknown capability id: search"):
|
||||
validate_registry_bundle(
|
||||
agents=[make_agent(default_sub_commanders=["planner"])],
|
||||
sub_commanders=[make_sub_commander(capability_ids=["search"])],
|
||||
capabilities=[make_capability()],
|
||||
specialist_templates=[],
|
||||
)
|
||||
|
||||
|
||||
def test_validate_registry_bundle_rejects_unknown_specialist_template_capability_references() -> None:
|
||||
with pytest.raises(ValueError, match="unknown capability id: missing-capability"):
|
||||
validate_registry_bundle(
|
||||
agents=[],
|
||||
sub_commanders=[],
|
||||
capabilities=[make_capability()],
|
||||
specialist_templates=[
|
||||
make_specialist_template(allowed_capability_ids=["missing-capability"])
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
def test_registry_bundle_agent_roles_match_runtime_agent_role_enum_values() -> None:
|
||||
bundle = load_builtin_registry_bundle()
|
||||
indexes = build_registry_indexes(bundle)
|
||||
|
||||
assert set(indexes.agent_by_id) == {role.value for role in AgentRole}
|
||||
assert {agent.role_value for agent in bundle.agents} == {role.value for role in AgentRole}
|
||||
|
||||
|
||||
def test_registry_bundle_agent_system_prompt_keys_match_runtime_top_level_prompt_surface() -> None:
|
||||
bundle = load_builtin_registry_bundle()
|
||||
indexes = build_registry_indexes(bundle)
|
||||
|
||||
expected_prompt_keys_by_agent_id = {
|
||||
role.value: role.value for role in AgentRole if role.value in TOP_LEVEL_SYSTEM_PROMPTS_BY_KEY
|
||||
}
|
||||
|
||||
assert set(TOP_LEVEL_SYSTEM_PROMPTS_BY_KEY) == {role.value for role in AgentRole}
|
||||
assert indexes.agent_prompt_key_by_id == expected_prompt_keys_by_agent_id
|
||||
assert {
|
||||
agent.agent_id: TOP_LEVEL_SYSTEM_PROMPTS_BY_KEY[agent.system_prompt_key]
|
||||
for agent in bundle.agents
|
||||
} == {
|
||||
role.value: TOP_LEVEL_SYSTEM_PROMPTS_BY_KEY[role.value]
|
||||
for role in AgentRole
|
||||
}
|
||||
|
||||
|
||||
def test_registry_bundle_skill_context_keys_match_graph_role_derivation_rule() -> None:
|
||||
bundle = load_builtin_registry_bundle()
|
||||
indexes = build_registry_indexes(bundle)
|
||||
|
||||
expected_skill_context_keys = {
|
||||
role.value: role.value.replace("agent_", "")
|
||||
for role in AgentRole
|
||||
}
|
||||
|
||||
assert indexes.skill_context_key_by_agent_id == expected_skill_context_keys
|
||||
assert {
|
||||
agent.agent_id: agent.skill_context_key for agent in bundle.agents
|
||||
} == expected_skill_context_keys
|
||||
|
||||
|
||||
def test_registry_bundle_sub_commander_prompt_texts_match_runtime_prompt_map() -> None:
|
||||
bundle = load_builtin_registry_bundle()
|
||||
indexes = build_registry_indexes(bundle)
|
||||
|
||||
assert set(indexes.sub_commander_by_id) == set(SUB_COMMANDER_PROMPTS_BY_KEY)
|
||||
assert indexes.sub_commander_prompt_key_by_id == {
|
||||
sub_commander_id: sub_commander_id
|
||||
for sub_commander_id in SUB_COMMANDER_PROMPTS_BY_KEY
|
||||
}
|
||||
assert {
|
||||
sub_commander.sub_commander_id: sub_commander.prompt_text
|
||||
for sub_commander in bundle.sub_commanders
|
||||
} == SUB_COMMANDER_PROMPTS_BY_KEY
|
||||
|
||||
|
||||
def test_registry_bundle_sub_commander_tool_membership_and_order_match_runtime_toolsets() -> None:
|
||||
bundle = load_builtin_registry_bundle()
|
||||
indexes = build_registry_indexes(bundle)
|
||||
|
||||
assert set(indexes.sub_commander_by_id) == set(SUB_COMMANDER_TOOLSETS)
|
||||
assert indexes.capability_ids_by_sub_commander_id == {
|
||||
sub_commander_id: tuple(tool.name for tool in tools)
|
||||
for sub_commander_id, tools in SUB_COMMANDER_TOOLSETS.items()
|
||||
}
|
||||
assert {
|
||||
sub_commander.sub_commander_id: tuple(sub_commander.capability_ids)
|
||||
for sub_commander in bundle.sub_commanders
|
||||
} == {
|
||||
sub_commander_id: tuple(tool.name for tool in tools)
|
||||
for sub_commander_id, tools in SUB_COMMANDER_TOOLSETS.items()
|
||||
}
|
||||
|
||||
|
||||
def test_builtin_capabilities_reference_actual_runtime_tool_names() -> None:
|
||||
expected_tool_names = {
|
||||
tool.name
|
||||
for tools in SUB_COMMANDER_TOOLSETS.values()
|
||||
for tool in tools
|
||||
}
|
||||
manifest_tool_names = {manifest.tool_name for manifest in BUILTIN_CAPABILITY_MANIFESTS}
|
||||
|
||||
assert manifest_tool_names == expected_tool_names
|
||||
|
||||
|
||||
def test_builtin_sub_commander_capabilities_match_runtime_toolsets() -> None:
|
||||
capabilities_by_tool_name = {
|
||||
manifest.tool_name: manifest.capability_id for manifest in BUILTIN_CAPABILITY_MANIFESTS
|
||||
}
|
||||
|
||||
for sub_commander in BUILTIN_SUB_COMMANDER_MANIFESTS:
|
||||
expected_capability_ids = {
|
||||
capabilities_by_tool_name[tool.name]
|
||||
for tool in SUB_COMMANDER_TOOLSETS[sub_commander.sub_commander_id]
|
||||
}
|
||||
assert set(sub_commander.capability_ids) == expected_capability_ids
|
||||
|
||||
|
||||
def test_builtin_manifests_form_a_valid_registry_bundle() -> None:
|
||||
validate_registry_bundle(
|
||||
agents=list(BUILTIN_AGENT_MANIFESTS),
|
||||
sub_commanders=list(BUILTIN_SUB_COMMANDER_MANIFESTS),
|
||||
capabilities=list(BUILTIN_CAPABILITY_MANIFESTS),
|
||||
specialist_templates=list(BUILTIN_SPECIALIST_TEMPLATE_MANIFESTS),
|
||||
)
|
||||
|
||||
|
||||
def test_load_builtin_registry_bundle_returns_non_empty_manifest_sets() -> None:
|
||||
bundle = load_builtin_registry_bundle()
|
||||
|
||||
assert bundle.agents
|
||||
assert bundle.sub_commanders
|
||||
assert bundle.capabilities
|
||||
assert isinstance(bundle.specialist_templates, tuple)
|
||||
|
||||
|
||||
def test_build_registry_indexes_exposes_manifest_lookups_by_id() -> None:
|
||||
bundle = load_builtin_registry_bundle()
|
||||
|
||||
indexes = build_registry_indexes(bundle)
|
||||
|
||||
assert indexes.agent_by_id
|
||||
assert indexes.sub_commander_by_id
|
||||
assert indexes.capability_by_id
|
||||
assert isinstance(indexes.specialist_template_by_id, Mapping)
|
||||
assert set(indexes.agent_by_id) == {agent.agent_id for agent in bundle.agents}
|
||||
assert set(indexes.sub_commander_by_id) == {
|
||||
sub_commander.sub_commander_id for sub_commander in bundle.sub_commanders
|
||||
}
|
||||
assert set(indexes.capability_by_id) == {
|
||||
capability.capability_id for capability in bundle.capabilities
|
||||
}
|
||||
assert set(indexes.specialist_template_by_id) == {
|
||||
template.template_id for template in bundle.specialist_templates
|
||||
}
|
||||
|
||||
|
||||
def test_summarize_registry_indexes_returns_read_only_debug_counts() -> None:
|
||||
bundle = load_builtin_registry_bundle()
|
||||
indexes = build_registry_indexes(bundle)
|
||||
|
||||
assert summarize_registry_indexes(indexes) == {
|
||||
"agent_count": len(bundle.agents),
|
||||
"sub_commander_count": len(bundle.sub_commanders),
|
||||
"capability_count": len(bundle.capabilities),
|
||||
"specialist_template_count": len(bundle.specialist_templates),
|
||||
}
|
||||
|
||||
|
||||
def test_build_registry_indexes_exposes_prompt_keys_skill_context_keys_and_capability_mappings() -> None:
|
||||
bundle = load_builtin_registry_bundle()
|
||||
|
||||
indexes = build_registry_indexes(bundle)
|
||||
|
||||
assert indexes.agent_prompt_key_by_id == {
|
||||
agent.agent_id: agent.system_prompt_key for agent in bundle.agents
|
||||
}
|
||||
assert indexes.agent_prompt_key_by_id == {
|
||||
agent.agent_id: agent.system_prompt_key for agent in BUILTIN_AGENT_MANIFESTS
|
||||
}
|
||||
assert set(indexes.agent_prompt_key_by_id.values()) == set(TOP_LEVEL_SYSTEM_PROMPTS_BY_KEY)
|
||||
assert indexes.sub_commander_prompt_key_by_id == {
|
||||
sub_commander.sub_commander_id: sub_commander.sub_commander_id
|
||||
for sub_commander in bundle.sub_commanders
|
||||
}
|
||||
assert set(indexes.sub_commander_prompt_key_by_id.values()) == {
|
||||
sub_commander.sub_commander_id for sub_commander in bundle.sub_commanders
|
||||
}
|
||||
assert indexes.skill_context_key_by_agent_id == {
|
||||
agent.agent_id: agent.skill_context_key
|
||||
for agent in bundle.agents
|
||||
if agent.skill_context_key is not None
|
||||
}
|
||||
assert indexes.capability_ids_by_sub_commander_id == {
|
||||
sub_commander.sub_commander_id: tuple(sub_commander.capability_ids)
|
||||
for sub_commander in bundle.sub_commanders
|
||||
}
|
||||
|
||||
|
||||
def test_validate_registry_bundle_accepts_loaded_builtin_registry_bundle() -> None:
|
||||
bundle = load_builtin_registry_bundle()
|
||||
|
||||
validate_registry_bundle(
|
||||
agents=list(bundle.agents),
|
||||
sub_commanders=list(bundle.sub_commanders),
|
||||
capabilities=list(bundle.capabilities),
|
||||
specialist_templates=list(bundle.specialist_templates),
|
||||
)
|
||||
|
||||
|
||||
def test_phase_one_still_declares_specialist_template_surface_even_if_runtime_is_deferred() -> None:
|
||||
assert isinstance(BUILTIN_SPECIALIST_TEMPLATE_MANIFESTS, tuple)
|
||||
49
backend/tests/backend/app/agents/test_search_tools.py
Normal file
49
backend/tests/backend/app/agents/test_search_tools.py
Normal file
@@ -0,0 +1,49 @@
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
|
||||
from app.agents.tools.search import web_search
|
||||
|
||||
|
||||
class FakeResult(SimpleNamespace):
|
||||
pass
|
||||
|
||||
|
||||
def test_web_search_tool_formats_results(monkeypatch):
|
||||
class FakeService:
|
||||
async def search(self, query: str, limit: int | None = None):
|
||||
assert query == 'Jarvis 最新更新'
|
||||
assert limit == 2
|
||||
return [
|
||||
FakeResult(
|
||||
title='Jarvis release notes',
|
||||
url='https://example.com/jarvis-release',
|
||||
snippet='Latest Jarvis changes.',
|
||||
source='duckduckgo',
|
||||
published_at='2026-03-29',
|
||||
)
|
||||
]
|
||||
|
||||
monkeypatch.setattr('app.services.web_search_service.WebSearchService', FakeService)
|
||||
|
||||
result = web_search.func('Jarvis 最新更新', top_k=2)
|
||||
|
||||
assert '[1] Jarvis release notes' in result
|
||||
assert '链接: https://example.com/jarvis-release' in result
|
||||
assert '来源: duckduckgo' in result
|
||||
assert '时间: 2026-03-29' in result
|
||||
assert '摘要: Latest Jarvis changes.' in result
|
||||
|
||||
|
||||
def test_web_search_tool_returns_stable_message_when_unavailable(monkeypatch):
|
||||
from app.services.web_search_service import WebSearchConfigurationError
|
||||
|
||||
class FakeService:
|
||||
async def search(self, query: str, limit: int | None = None):
|
||||
raise WebSearchConfigurationError('网页搜索未启用或未配置')
|
||||
|
||||
monkeypatch.setattr('app.services.web_search_service.WebSearchService', FakeService)
|
||||
|
||||
result = web_search.func('Jarvis')
|
||||
|
||||
assert result == '网页搜索不可用: 网页搜索未启用或未配置'
|
||||
12
backend/tests/backend/app/agents/test_state.py
Normal file
12
backend/tests/backend/app/agents/test_state.py
Normal file
@@ -0,0 +1,12 @@
|
||||
from langchain_core.messages import HumanMessage
|
||||
|
||||
from app.agents.state import ConversationTurn, turn_to_message
|
||||
|
||||
|
||||
def test_turn_to_message_returns_human_message_for_user_turn():
|
||||
turn = ConversationTurn(role='user', content='hello')
|
||||
|
||||
message = turn_to_message(turn)
|
||||
|
||||
assert isinstance(message, HumanMessage)
|
||||
assert message.content == 'hello'
|
||||
277
backend/tests/backend/app/agents/test_task_tools.py
Normal file
277
backend/tests/backend/app/agents/test_task_tools.py
Normal file
@@ -0,0 +1,277 @@
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from unittest.mock import Mock
|
||||
|
||||
import pytest
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine
|
||||
|
||||
sys.modules.setdefault("psutil", Mock())
|
||||
|
||||
import app.models # noqa: F401
|
||||
from app.models.goal import Goal
|
||||
from app.models.reminder import Reminder
|
||||
from app.models.task import Task, TaskPriority, TaskStatus
|
||||
from app.models.todo import DailyTodo
|
||||
from app.models.user import User
|
||||
from app.services.auth_service import get_password_hash
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def tool_env(tmp_path):
|
||||
db_path = tmp_path / "test_task_tools.db"
|
||||
engine = create_async_engine(f"sqlite+aiosqlite:///{db_path}", future=True)
|
||||
session_factory = async_sessionmaker(engine, expire_on_commit=False)
|
||||
|
||||
async with engine.begin() as conn:
|
||||
# 只创建本测试需要的表,避免全量 metadata 引入未注册的外键表。
|
||||
await conn.run_sync(User.metadata.create_all, tables=[
|
||||
User.__table__,
|
||||
Task.__table__,
|
||||
DailyTodo.__table__,
|
||||
Reminder.__table__,
|
||||
Goal.__table__,
|
||||
])
|
||||
|
||||
async with session_factory() as session:
|
||||
user = User(
|
||||
username="tool_user",
|
||||
email="tool@example.com",
|
||||
hashed_password=get_password_hash("secret123"),
|
||||
full_name="Tool Tester",
|
||||
)
|
||||
session.add(user)
|
||||
await session.commit()
|
||||
await session.refresh(user)
|
||||
|
||||
try:
|
||||
yield {"session_factory": session_factory, "user_id": user.id}
|
||||
finally:
|
||||
await engine.dispose()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_task_accepts_content_and_date_aliases_and_persists_task(tool_env, monkeypatch):
|
||||
from app.agents.tools import task as task_tools
|
||||
|
||||
monkeypatch.setattr(task_tools, "async_session", tool_env["session_factory"])
|
||||
monkeypatch.setattr(task_tools, "get_current_user", lambda: tool_env["user_id"])
|
||||
|
||||
result = task_tools.create_task.func(content="完成对话系统", date="2026-03-28")
|
||||
|
||||
assert "任务创建成功" in result
|
||||
|
||||
async with tool_env["session_factory"]() as session:
|
||||
saved = (await session.execute(select(Task))).scalar_one()
|
||||
|
||||
assert saved.title == "完成对话系统"
|
||||
assert saved.description == "完成对话系统"
|
||||
assert saved.priority == TaskPriority.MEDIUM
|
||||
assert saved.status == TaskStatus.TODO
|
||||
assert saved.due_date == datetime(2026, 3, 28, 0, 0)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_schedule_task_accepts_content_and_date_aliases_and_sets_morning_due_date(tool_env, monkeypatch):
|
||||
from app.agents.tools import schedule as schedule_tools
|
||||
|
||||
monkeypatch.setattr(schedule_tools, "async_session", tool_env["session_factory"])
|
||||
monkeypatch.setattr(schedule_tools, "get_current_user", lambda: tool_env["user_id"])
|
||||
|
||||
result = schedule_tools.create_schedule_task.func(content="完成对话系统", date="2026-03-28")
|
||||
|
||||
assert "任务创建成功" in result
|
||||
|
||||
async with tool_env["session_factory"]() as session:
|
||||
saved = (await session.execute(select(Task))).scalar_one()
|
||||
|
||||
assert saved.title == "完成对话系统"
|
||||
assert saved.description == "完成对话系统"
|
||||
assert saved.priority == TaskPriority.MEDIUM
|
||||
assert saved.status == TaskStatus.TODO
|
||||
assert saved.due_date == datetime(2026, 3, 28, 9, 0)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
("priority_input", "expected"),
|
||||
[
|
||||
(1, TaskPriority.LOW),
|
||||
(2, TaskPriority.MEDIUM),
|
||||
(3, TaskPriority.HIGH),
|
||||
(4, TaskPriority.URGENT),
|
||||
("urgent", TaskPriority.URGENT),
|
||||
],
|
||||
)
|
||||
async def test_create_task_normalizes_legacy_and_string_priorities(tool_env, monkeypatch, priority_input, expected):
|
||||
from app.agents.tools import task as task_tools
|
||||
|
||||
monkeypatch.setattr(task_tools, "async_session", tool_env["session_factory"])
|
||||
monkeypatch.setattr(task_tools, "get_current_user", lambda: tool_env["user_id"])
|
||||
|
||||
result = task_tools.create_task.func(title=f"priority-{priority_input}", priority=priority_input)
|
||||
|
||||
assert "任务创建成功" in result
|
||||
|
||||
async with tool_env["session_factory"]() as session:
|
||||
rows = (await session.execute(select(Task).order_by(Task.created_at.asc()))).scalars().all()
|
||||
|
||||
assert rows[-1].priority == expected
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_task_accepts_iso_datetime_due_date(tool_env, monkeypatch):
|
||||
from app.agents.tools import task as task_tools
|
||||
|
||||
monkeypatch.setattr(task_tools, "async_session", tool_env["session_factory"])
|
||||
monkeypatch.setattr(task_tools, "get_current_user", lambda: tool_env["user_id"])
|
||||
|
||||
result = task_tools.create_task.func(title="timed task", due_date="2026-03-28T15:30:00Z")
|
||||
|
||||
assert "任务创建成功" in result
|
||||
|
||||
async with tool_env["session_factory"]() as session:
|
||||
saved = (await session.execute(select(Task))).scalar_one()
|
||||
|
||||
assert saved.due_date == datetime(2026, 3, 28, 15, 30, 0)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_task_returns_failure_for_missing_title_and_content(tool_env, monkeypatch):
|
||||
from app.agents.tools import task as task_tools
|
||||
|
||||
monkeypatch.setattr(task_tools, "async_session", tool_env["session_factory"])
|
||||
monkeypatch.setattr(task_tools, "get_current_user", lambda: tool_env["user_id"])
|
||||
|
||||
result = task_tools.create_task.func()
|
||||
|
||||
assert result == "创建任务失败: title 不能为空"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_task_returns_failure_for_invalid_priority(tool_env, monkeypatch):
|
||||
from app.agents.tools import task as task_tools
|
||||
|
||||
monkeypatch.setattr(task_tools, "async_session", tool_env["session_factory"])
|
||||
monkeypatch.setattr(task_tools, "get_current_user", lambda: tool_env["user_id"])
|
||||
|
||||
result = task_tools.create_task.func(title="bad priority", priority="top")
|
||||
|
||||
assert "创建任务失败:" in result
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_update_task_status_rejects_invalid_status(tool_env, monkeypatch):
|
||||
from app.agents.tools import task as task_tools
|
||||
|
||||
monkeypatch.setattr(task_tools, "async_session", tool_env["session_factory"])
|
||||
monkeypatch.setattr(task_tools, "get_current_user", lambda: tool_env["user_id"])
|
||||
|
||||
create_result = task_tools.create_task.func(title="status test")
|
||||
assert "任务创建成功" in create_result
|
||||
|
||||
async with tool_env["session_factory"]() as session:
|
||||
saved = (await session.execute(select(Task))).scalar_one()
|
||||
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_tasks_filters_by_normalized_status_and_formats_values(tool_env, monkeypatch):
|
||||
from app.agents.tools import task as task_tools
|
||||
|
||||
monkeypatch.setattr(task_tools, "async_session", tool_env["session_factory"])
|
||||
monkeypatch.setattr(task_tools, "get_current_user", lambda: tool_env["user_id"])
|
||||
|
||||
task_tools.create_task.func(title="todo task", priority="high")
|
||||
task_tools.create_task.func(title="done task", priority="low")
|
||||
|
||||
async with tool_env["session_factory"]() as session:
|
||||
rows = (await session.execute(select(Task).order_by(Task.created_at.asc()))).scalars().all()
|
||||
rows[1].status = TaskStatus.DONE
|
||||
await session.commit()
|
||||
|
||||
result = task_tools.get_tasks.func(status="done")
|
||||
|
||||
assert "done task" in result
|
||||
assert "todo task" not in result
|
||||
assert "状态:done" in result
|
||||
assert "优先级:low" in result
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_schedule_reminder_accepts_datetime_description_and_at_aliases(tool_env, monkeypatch):
|
||||
from app.agents.tools import schedule as schedule_tools
|
||||
|
||||
monkeypatch.setattr(schedule_tools, "async_session", tool_env["session_factory"])
|
||||
monkeypatch.setattr(schedule_tools, "get_current_user", lambda: tool_env["user_id"])
|
||||
|
||||
result = schedule_tools.create_reminder.func(
|
||||
title="收被子",
|
||||
description="提醒收被子",
|
||||
datetime="2026-03-29T09:00:00",
|
||||
time_zone="Asia/Shanghai",
|
||||
)
|
||||
|
||||
assert "提醒创建成功" in result
|
||||
|
||||
async with tool_env["session_factory"]() as session:
|
||||
saved = (await session.execute(select(Reminder))).scalar_one()
|
||||
|
||||
assert saved.title == "收被子"
|
||||
assert saved.note == "提醒收被子"
|
||||
assert saved.reminder_at == datetime(2026, 3, 29, 9, 0)
|
||||
|
||||
result = schedule_tools.create_reminder.func(
|
||||
content="收被子",
|
||||
datetime="2026-03-29T09:00:00+08:00",
|
||||
)
|
||||
|
||||
assert "提醒创建成功" in result
|
||||
|
||||
async with tool_env["session_factory"]() as session:
|
||||
rows = (await session.execute(select(Reminder).order_by(Reminder.created_at.asc()))).scalars().all()
|
||||
|
||||
assert rows[-1].title == "收被子"
|
||||
assert rows[-1].note is None
|
||||
assert rows[-1].reminder_at == datetime(2026, 3, 29, 9, 0)
|
||||
|
||||
result = schedule_tools.create_reminder.func(
|
||||
content="收被子",
|
||||
time="2026-03-29T09:00:00",
|
||||
time_zone="Asia/Shanghai",
|
||||
)
|
||||
|
||||
assert "提醒创建成功" in result
|
||||
|
||||
async with tool_env["session_factory"]() as session:
|
||||
rows = (await session.execute(select(Reminder).order_by(Reminder.created_at.asc()))).scalars().all()
|
||||
|
||||
assert rows[-1].title == "收被子"
|
||||
assert rows[-1].note is None
|
||||
assert rows[-1].reminder_at == datetime(2026, 3, 29, 9, 0)
|
||||
|
||||
result = schedule_tools.create_reminder.func(
|
||||
title="收被子",
|
||||
remind_at="2026-03-29T18:00:00",
|
||||
)
|
||||
|
||||
assert "提醒创建成功" in result
|
||||
|
||||
async with tool_env["session_factory"]() as session:
|
||||
rows = (await session.execute(select(Reminder).order_by(Reminder.created_at.asc()))).scalars().all()
|
||||
|
||||
assert rows[-1].title == "收被子"
|
||||
assert rows[-1].note is None
|
||||
assert rows[-1].reminder_at == datetime(2026, 3, 29, 18, 0)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_schedule_reminder_returns_failure_when_time_aliases_missing(tool_env, monkeypatch):
|
||||
from app.agents.tools import schedule as schedule_tools
|
||||
|
||||
monkeypatch.setattr(schedule_tools, "async_session", tool_env["session_factory"])
|
||||
monkeypatch.setattr(schedule_tools, "get_current_user", lambda: tool_env["user_id"])
|
||||
|
||||
result = schedule_tools.create_reminder.func(title="收被子")
|
||||
|
||||
assert result == "创建提醒失败: reminder_at 不能为空"
|
||||
94
backend/tests/backend/app/agents/test_time_reasoning_tool.py
Normal file
94
backend/tests/backend/app/agents/test_time_reasoning_tool.py
Normal file
@@ -0,0 +1,94 @@
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from app.agents.tools.time_reasoning import (
|
||||
extract_reference_datetime,
|
||||
normalize_tool_time_arguments,
|
||||
resolve_time_expression_data,
|
||||
)
|
||||
|
||||
|
||||
def test_extract_reference_datetime_from_current_time_context():
|
||||
context = '【当前时间】\n- current_time_utc: 2026-03-28T12:00:00+00:00\n- current_date_utc: 2026-03-28\n说明:解析相对时间时请以 current_time_utc 为准。'
|
||||
|
||||
result = extract_reference_datetime(context)
|
||||
|
||||
assert result == datetime(2026, 3, 28, 12, 0, tzinfo=UTC)
|
||||
|
||||
|
||||
def test_resolve_time_expression_data_normalizes_relative_datetime():
|
||||
payload = resolve_time_expression_data(
|
||||
'明天早上9点',
|
||||
current_datetime_context='CURRENT_TIME: 2026-03-28T12:00:00+00:00',
|
||||
prefer='datetime',
|
||||
)
|
||||
|
||||
assert payload['grain'] == 'datetime'
|
||||
assert payload['resolved_date'] == '2026-03-29'
|
||||
assert payload['resolved_datetime'] == '2026-03-29T09:00:00'
|
||||
assert payload['assumed_time'] is False
|
||||
|
||||
|
||||
def test_resolve_time_expression_data_normalizes_relative_date_window():
|
||||
payload = resolve_time_expression_data(
|
||||
'下周一下午',
|
||||
current_datetime_context='CURRENT_TIME: 2026-03-28T12:00:00+00:00',
|
||||
prefer='datetime',
|
||||
)
|
||||
|
||||
assert payload['resolved_date'] == '2026-03-30'
|
||||
assert payload['resolved_datetime'] == '2026-03-30T15:00:00'
|
||||
assert payload['assumed_time'] is True
|
||||
assert 'assumed_time' in payload['reason']
|
||||
|
||||
|
||||
def test_normalize_tool_time_arguments_converts_reminder_time_aliases():
|
||||
normalized = normalize_tool_time_arguments(
|
||||
'create_reminder',
|
||||
{'title': '开会', 'reminder_at': '明天 09:00'},
|
||||
'CURRENT_TIME: 2026-03-28T12:00:00+00:00',
|
||||
)
|
||||
|
||||
assert normalized['reminder_at'] == '2026-03-29T09:00:00'
|
||||
|
||||
|
||||
def test_normalize_tool_time_arguments_converts_date_only_tools():
|
||||
normalized = normalize_tool_time_arguments(
|
||||
'create_goal',
|
||||
{'title': '交付节点', 'goal_date': '明天'},
|
||||
'CURRENT_TIME: 2026-03-28T12:00:00+00:00',
|
||||
)
|
||||
|
||||
assert normalized['goal_date'] == '2026-03-29'
|
||||
|
||||
|
||||
def test_resolve_time_expression_data_preserves_explicit_datetime_offset():
|
||||
payload = resolve_time_expression_data(
|
||||
'2026-03-29T09:00:00+08:00',
|
||||
current_datetime_context='CURRENT_TIME: 2026-03-28T12:00:00+00:00',
|
||||
prefer='datetime',
|
||||
)
|
||||
|
||||
assert payload['resolved_datetime'] == '2026-03-29T09:00:00+08:00'
|
||||
|
||||
|
||||
def test_normalize_tool_time_arguments_keeps_create_task_date_without_explicit_time():
|
||||
normalized = normalize_tool_time_arguments(
|
||||
'create_task',
|
||||
{'title': '写周报', 'due_date': '明天'},
|
||||
'CURRENT_TIME: 2026-03-28T12:00:00+00:00',
|
||||
)
|
||||
|
||||
assert normalized['due_date'] == '2026-03-29'
|
||||
|
||||
|
||||
def test_normalize_tool_time_arguments_raises_for_invalid_time_text():
|
||||
try:
|
||||
normalize_tool_time_arguments(
|
||||
'create_reminder',
|
||||
{'title': '开会', 'reminder_at': '明天25点'},
|
||||
'CURRENT_TIME: 2026-03-28T12:00:00+00:00',
|
||||
)
|
||||
except ValueError as exc:
|
||||
assert 'hour must be in 0..23' in str(exc)
|
||||
else:
|
||||
raise AssertionError('expected ValueError for invalid time text')
|
||||
23
backend/tests/backend/app/agents/test_tool_async_bridge.py
Normal file
23
backend/tests/backend/app/agents/test_tool_async_bridge.py
Normal file
@@ -0,0 +1,23 @@
|
||||
import pytest
|
||||
|
||||
from app.agents.tools import forum as forum_tools
|
||||
from app.agents.tools import schedule as schedule_tools
|
||||
from app.agents.tools import task as task_tools
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
("module", "label"),
|
||||
[
|
||||
(task_tools, "task"),
|
||||
(schedule_tools, "schedule"),
|
||||
(forum_tools, "forum"),
|
||||
],
|
||||
)
|
||||
async def test_run_async_bridge_works_inside_running_event_loop(module, label):
|
||||
async def sample():
|
||||
return f"ok:{label}"
|
||||
|
||||
result = module._run_async(sample())
|
||||
|
||||
assert result == f"ok:{label}"
|
||||
@@ -0,0 +1,183 @@
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine
|
||||
|
||||
import app.models # noqa: F401
|
||||
from app.database import Base
|
||||
from app.models.user import User
|
||||
from app.services.auth_service import verify_password
|
||||
from app.services.admin_bootstrap_service import ensure_admin_user
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_ensure_admin_user_creates_missing_admin(tmp_path):
|
||||
db_path = tmp_path / 'test_admin_bootstrap.db'
|
||||
engine = create_async_engine(f"sqlite+aiosqlite:///{db_path}", future=True)
|
||||
session_factory = async_sessionmaker(engine, expire_on_commit=False)
|
||||
|
||||
async with engine.begin() as conn:
|
||||
await conn.run_sync(Base.metadata.create_all)
|
||||
|
||||
settings = SimpleNamespace(
|
||||
ADMIN='admin',
|
||||
ADMIN_EMAIL='admin@example.com',
|
||||
ADMIN_PASSWORD='secret123',
|
||||
ADMIN_FULL_NAME='Administrator',
|
||||
)
|
||||
|
||||
async with session_factory() as session:
|
||||
await ensure_admin_user(session, settings)
|
||||
result = await session.execute(select(User).where(User.username == 'admin'))
|
||||
admin = result.scalar_one()
|
||||
|
||||
assert admin.email == 'admin@example.com'
|
||||
assert admin.full_name == 'Administrator'
|
||||
assert admin.is_active is True
|
||||
assert admin.is_superuser is True
|
||||
assert verify_password('secret123', admin.hashed_password)
|
||||
|
||||
await engine.dispose()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_ensure_admin_user_skips_when_target_admin_already_exists(tmp_path):
|
||||
db_path = tmp_path / 'test_admin_bootstrap_existing.db'
|
||||
engine = create_async_engine(f"sqlite+aiosqlite:///{db_path}", future=True)
|
||||
session_factory = async_sessionmaker(engine, expire_on_commit=False)
|
||||
|
||||
async with engine.begin() as conn:
|
||||
await conn.run_sync(Base.metadata.create_all)
|
||||
|
||||
settings = SimpleNamespace(
|
||||
ADMIN='admin',
|
||||
ADMIN_EMAIL='admin@example.com',
|
||||
ADMIN_PASSWORD='newsecret123',
|
||||
ADMIN_FULL_NAME='Administrator',
|
||||
)
|
||||
|
||||
async with session_factory() as session:
|
||||
existing_admin = User(
|
||||
username='admin',
|
||||
email='admin@example.com',
|
||||
hashed_password='existing-hash',
|
||||
full_name='Existing Admin',
|
||||
is_active=True,
|
||||
is_superuser=True,
|
||||
)
|
||||
session.add(existing_admin)
|
||||
await session.commit()
|
||||
|
||||
await ensure_admin_user(session, settings)
|
||||
result = await session.execute(select(User).where(User.username == 'admin'))
|
||||
admins = result.scalars().all()
|
||||
|
||||
assert len(admins) == 1
|
||||
assert admins[0].hashed_password == 'existing-hash'
|
||||
assert admins[0].full_name == 'Existing Admin'
|
||||
|
||||
await engine.dispose()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_ensure_admin_user_skips_when_bootstrap_not_enabled(tmp_path):
|
||||
db_path = tmp_path / 'test_admin_bootstrap_disabled.db'
|
||||
engine = create_async_engine(f"sqlite+aiosqlite:///{db_path}", future=True)
|
||||
session_factory = async_sessionmaker(engine, expire_on_commit=False)
|
||||
|
||||
async with engine.begin() as conn:
|
||||
await conn.run_sync(Base.metadata.create_all)
|
||||
|
||||
settings = SimpleNamespace(
|
||||
ADMIN='',
|
||||
ADMIN_EMAIL='admin@example.com',
|
||||
ADMIN_PASSWORD='',
|
||||
ADMIN_FULL_NAME='Administrator',
|
||||
)
|
||||
|
||||
async with session_factory() as session:
|
||||
await ensure_admin_user(session, settings)
|
||||
result = await session.execute(select(User))
|
||||
users = result.scalars().all()
|
||||
|
||||
assert users == []
|
||||
|
||||
await engine.dispose()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_ensure_admin_user_raises_for_conflicting_non_admin_user(tmp_path):
|
||||
db_path = tmp_path / 'test_admin_bootstrap_conflict.db'
|
||||
engine = create_async_engine(f"sqlite+aiosqlite:///{db_path}", future=True)
|
||||
session_factory = async_sessionmaker(engine, expire_on_commit=False)
|
||||
|
||||
async with engine.begin() as conn:
|
||||
await conn.run_sync(Base.metadata.create_all)
|
||||
|
||||
settings = SimpleNamespace(
|
||||
ADMIN='admin',
|
||||
ADMIN_EMAIL='admin@example.com',
|
||||
ADMIN_PASSWORD='secret123',
|
||||
ADMIN_FULL_NAME='Administrator',
|
||||
)
|
||||
|
||||
async with session_factory() as session:
|
||||
session.add(User(
|
||||
username='admin',
|
||||
email='someone@example.com',
|
||||
hashed_password='hash',
|
||||
full_name='Existing User',
|
||||
is_active=True,
|
||||
is_superuser=False,
|
||||
))
|
||||
await session.commit()
|
||||
|
||||
with pytest.raises(RuntimeError):
|
||||
await ensure_admin_user(session, settings)
|
||||
|
||||
await engine.dispose()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_ensure_admin_user_succeeds_when_duplicate_insert_was_created_concurrently(tmp_path):
|
||||
db_path = tmp_path / 'test_admin_bootstrap_duplicate.db'
|
||||
engine = create_async_engine(f"sqlite+aiosqlite:///{db_path}", future=True)
|
||||
session_factory = async_sessionmaker(engine, expire_on_commit=False)
|
||||
|
||||
async with engine.begin() as conn:
|
||||
await conn.run_sync(Base.metadata.create_all)
|
||||
|
||||
settings = SimpleNamespace(
|
||||
ADMIN='admin',
|
||||
ADMIN_EMAIL='admin@example.com',
|
||||
ADMIN_PASSWORD='secret123',
|
||||
ADMIN_FULL_NAME='Administrator',
|
||||
)
|
||||
|
||||
async with session_factory() as session:
|
||||
duplicate_admin = User(
|
||||
username='admin',
|
||||
email='admin@example.com',
|
||||
hashed_password='existing-hash',
|
||||
full_name='Existing Admin',
|
||||
is_active=True,
|
||||
is_superuser=True,
|
||||
)
|
||||
session.add(duplicate_admin)
|
||||
await session.flush()
|
||||
|
||||
original_commit = session.commit
|
||||
|
||||
async def fake_commit():
|
||||
await session.rollback()
|
||||
raise IntegrityError('insert', {}, Exception('duplicate'))
|
||||
|
||||
session.commit = fake_commit
|
||||
try:
|
||||
await ensure_admin_user(session, settings)
|
||||
finally:
|
||||
session.commit = original_commit
|
||||
|
||||
await engine.dispose()
|
||||
@@ -0,0 +1,155 @@
|
||||
import sys
|
||||
from unittest.mock import Mock
|
||||
|
||||
import pytest
|
||||
from fastapi import FastAPI
|
||||
from httpx import ASGITransport, AsyncClient
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine
|
||||
|
||||
sys.modules.setdefault('psutil', Mock())
|
||||
|
||||
import app.models # noqa: F401
|
||||
from app.database import Base, get_db
|
||||
from app.models.brain import BrainMemory, BrainTag
|
||||
from app.models.knowledge_graph import KGEdge, KGNode
|
||||
from app.models.user import User
|
||||
from app.routers.auth import get_current_user
|
||||
from app.routers.graph import router as graph_router
|
||||
from app.services.auth_service import get_password_hash
|
||||
from app.services.brain_service import BrainService
|
||||
from app.services.graph_service import GraphService
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def brain_graph_env(tmp_path):
|
||||
db_path = tmp_path / 'test_brain_graph.db'
|
||||
engine = create_async_engine(f"sqlite+aiosqlite:///{db_path}", future=True)
|
||||
session_factory = async_sessionmaker(engine, expire_on_commit=False)
|
||||
|
||||
async with engine.begin() as conn:
|
||||
await conn.run_sync(Base.metadata.create_all)
|
||||
|
||||
async with session_factory() as session:
|
||||
user = User(
|
||||
email='brain-graph@example.com',
|
||||
hashed_password=get_password_hash('secret123'),
|
||||
full_name='Brain Graph Tester',
|
||||
)
|
||||
session.add(user)
|
||||
await session.flush()
|
||||
|
||||
session.add_all([
|
||||
BrainMemory(
|
||||
user_id=user.id,
|
||||
memory_type='project_fact',
|
||||
title='Knowledge brain phase 1',
|
||||
content='Jarvis should learn from conversations and documents first.',
|
||||
importance=9,
|
||||
confidence=0.95,
|
||||
status='active',
|
||||
origin_source_types=['conversation', 'document'],
|
||||
),
|
||||
BrainMemory(
|
||||
user_id=user.id,
|
||||
memory_type='user_preference',
|
||||
title='Structured delivery preference',
|
||||
content='The user prefers concise structured summaries.',
|
||||
importance=7,
|
||||
confidence=0.88,
|
||||
status='active',
|
||||
origin_source_types=['conversation'],
|
||||
),
|
||||
BrainTag(
|
||||
user_id=user.id,
|
||||
name='knowledge-brain',
|
||||
category='topic',
|
||||
priority='important',
|
||||
score=9.5,
|
||||
),
|
||||
BrainTag(
|
||||
user_id=user.id,
|
||||
name='conversation',
|
||||
category='source',
|
||||
priority='secondary',
|
||||
score=7.0,
|
||||
),
|
||||
])
|
||||
await session.commit()
|
||||
await session.refresh(user)
|
||||
|
||||
async def override_get_db():
|
||||
async with session_factory() as session:
|
||||
yield session
|
||||
|
||||
async def override_get_current_user():
|
||||
return user
|
||||
|
||||
app = FastAPI()
|
||||
app.include_router(graph_router)
|
||||
app.dependency_overrides[get_db] = override_get_db
|
||||
app.dependency_overrides[get_current_user] = override_get_current_user
|
||||
|
||||
try:
|
||||
yield session_factory, user, app
|
||||
finally:
|
||||
await engine.dispose()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_build_graph_projects_kg_nodes_and_edges_from_brain_data(brain_graph_env):
|
||||
session_factory, user, _app = brain_graph_env
|
||||
|
||||
async with session_factory() as session:
|
||||
service = GraphService(session)
|
||||
await service.build_graph(user.id)
|
||||
|
||||
node_result = await session.execute(
|
||||
select(KGNode).where(KGNode.user_id == user.id).order_by(KGNode.name.asc())
|
||||
)
|
||||
nodes = list(node_result.scalars().all())
|
||||
edge_result = await session.execute(select(KGEdge))
|
||||
edges = list(edge_result.scalars().all())
|
||||
|
||||
node_names = [node.name for node in nodes]
|
||||
assert 'Knowledge brain phase 1' in node_names
|
||||
assert 'Structured delivery preference' in node_names
|
||||
assert 'knowledge-brain' in node_names
|
||||
assert len(edges) >= 2
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_run_learning_triggers_graph_rebuild(brain_graph_env, monkeypatch):
|
||||
session_factory, user, _app = brain_graph_env
|
||||
calls: list[str] = []
|
||||
|
||||
async def fake_build_graph(self, user_id, document_ids=None):
|
||||
calls.append(user_id)
|
||||
|
||||
monkeypatch.setattr(GraphService, 'build_graph', fake_build_graph)
|
||||
|
||||
async with session_factory() as session:
|
||||
service = BrainService(session)
|
||||
await service.run_learning(user.id)
|
||||
|
||||
assert calls == [user.id]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_graph_api_returns_brain_projected_graph_after_build(brain_graph_env):
|
||||
session_factory, user, app = brain_graph_env
|
||||
|
||||
async with session_factory() as session:
|
||||
service = GraphService(session)
|
||||
await service.build_graph(user.id)
|
||||
|
||||
transport = ASGITransport(app=app)
|
||||
async with AsyncClient(transport=transport, base_url='http://testserver') as client:
|
||||
response = await client.get('/api/graph')
|
||||
|
||||
assert response.status_code == 200
|
||||
payload = response.json()
|
||||
assert payload['stats']['node_count'] >= 3
|
||||
assert payload['stats']['edge_count'] >= 2
|
||||
assert any(node['name'] == 'Knowledge brain phase 1' for node in payload['nodes'])
|
||||
assert any(node['name'] == 'knowledge-brain' for node in payload['nodes'])
|
||||
1619
backend/tests/backend/app/services/test_brain_ingestion.py
Normal file
1619
backend/tests/backend/app/services/test_brain_ingestion.py
Normal file
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user