Align knowledge storage with real folders and add WebDAV import surface

Knowledge files were only partitioned in the database, which made nested uploads, local folder visibility, and delete behavior diverge from the UI. This change makes folder selection drive physical storage paths, keeps original filenames, adds a minimal WebDAV mount/sync path, and reshapes the knowledge panel so local and remote sources can share the same surface.

Constraint: Existing knowledge flow already depends on local-folder-backed uploads and document indexing
Rejected: Real-time bidirectional WebDAV sync | too much conflict and lifecycle complexity for the first pass
Confidence: medium
Scope-risk: moderate
Reversibility: messy
Directive: Keep remote mounts single-direction into local knowledge folders until etag-based incremental sync and conflict rules are verified
Tested: Python py_compile on new/modified backend files; LSP diagnostics on new frontend/backend files; manual targeted code-path inspection
Not-tested: Full pytest/vitest end-to-end runs blocked by environment temp/cache permission errors; live WebDAV server interoperability
This commit is contained in:
2026-04-09 17:26:37 +08:00
parent aa12c92a5a
commit 8c7cf0732b
18 changed files with 2776 additions and 26 deletions

View File

@@ -29,6 +29,8 @@ from app.routers import (
agent_skills_router,
agent_sessions_router,
terminal_router,
tools_router,
remote_mount_router,
)
from app.routers.scheduler import router as scheduler_router
from app.services.scheduler_service import start_scheduler, stop_scheduler, get_scheduler_status
@@ -129,6 +131,8 @@ app.include_router(marketplace_router)
app.include_router(agent_skills_router)
app.include_router(agent_sessions_router)
app.include_router(terminal_router)
app.include_router(tools_router)
app.include_router(remote_mount_router)
@app.get("/api/health")

View File

@@ -2,7 +2,17 @@ from app.models.base import Base
from app.models.user import User
from app.models.folder import Folder
from app.models.document import Document, DocumentChunk
from app.models.task import Task, TaskHistory
from app.models.task import (
Task,
TaskAssigneeType,
TaskDispatchStatus,
TaskHistory,
TaskPriority,
TaskQuadrant,
TaskSource,
TaskStatus,
TaskSubTask,
)
from app.models.forum import ForumPost, ForumReply
from app.models.agent import Agent, AgentMessage
from app.models.conversation import Conversation, Message
@@ -23,6 +33,7 @@ from app.models.reminder import Reminder, ReminderStatus
from app.models.goal import Goal, GoalStatus
from app.models.skill import Skill
from app.models.log import Log, LogType, LogLevel
from app.models.remote_mount import RemoteMount, RemoteSyncItem
__all__ = [
"Base",
@@ -31,7 +42,14 @@ __all__ = [
"Document",
"DocumentChunk",
"Task",
"TaskSubTask",
"TaskHistory",
"TaskStatus",
"TaskPriority",
"TaskSource",
"TaskQuadrant",
"TaskAssigneeType",
"TaskDispatchStatus",
"ForumPost",
"ForumReply",
"Agent",
@@ -61,4 +79,6 @@ __all__ = [
"Log",
"LogType",
"LogLevel",
"RemoteMount",
"RemoteSyncItem",
]

View File

@@ -0,0 +1,34 @@
from sqlalchemy import Boolean, Column, ForeignKey, String, Text, UniqueConstraint
from app.models.base import BaseModel
class RemoteMount(BaseModel):
__tablename__ = "remote_mounts"
__table_args__ = (
UniqueConstraint("user_id", "name", name="uq_remote_mount_user_name"),
)
user_id = Column(String(36), ForeignKey("users.id"), nullable=False, index=True)
name = Column(String(255), nullable=False)
mount_type = Column(String(32), nullable=False, default="webdav")
base_url = Column(String(1000), nullable=False)
username = Column(String(255), nullable=True)
password_encrypted = Column(Text, nullable=True)
root_path = Column(String(1000), nullable=False, default="/")
is_active = Column(Boolean, nullable=False, default=True)
last_sync_at = Column(String(64), nullable=True)
class RemoteSyncItem(BaseModel):
__tablename__ = "remote_sync_items"
mount_id = Column(String(36), ForeignKey("remote_mounts.id"), nullable=False, index=True)
remote_path = Column(String(2000), nullable=False)
remote_etag = Column(String(512), nullable=True)
remote_modified_at = Column(String(128), nullable=True)
local_folder_id = Column(String(36), ForeignKey("folders.id"), nullable=True)
local_document_id = Column(String(36), ForeignKey("documents.id"), nullable=True)
sync_status = Column(String(32), nullable=False, default="synced")
last_error = Column(Text, nullable=True)
last_synced_at = Column(String(64), nullable=True)

View File

@@ -21,3 +21,5 @@ from app.routers.plugins import _marketplace_router as marketplace_router
from app.routers.agent_skills import router as agent_skills_router
from app.routers.agent_sessions import router as agent_sessions_router
from app.routers.terminal import router as terminal_router
from app.routers.tools import router as tools_router
from app.routers.remote_mount import router as remote_mount_router

View File

@@ -1,17 +1,20 @@
from fastapi import APIRouter, Depends, HTTPException, status
from sqlalchemy import and_, select
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select, and_
from typing import List
import shutil
from app.database import get_db
from app.models.folder import Folder
from app.models.user import User
from app.schemas.folder import FolderCreate, FolderUpdate, FolderOut, FolderTreeOut
from app.routers.auth import get_current_user
from app.schemas.folder import FolderCreate, FolderOut, FolderTreeOut, FolderUpdate
from app.services.document_service import DocumentService
router = APIRouter(prefix="/api/folders", tags=["文件夹"])
def build_folder_tree(folders: list[Folder], parent_id: str = None) -> List[FolderTreeOut]:
"""递归构建文件夹树"""
tree = []
for folder in folders:
if folder.parent_id == parent_id:
@@ -20,30 +23,29 @@ def build_folder_tree(folders: list[Folder], parent_id: str = None) -> List[Fold
id=folder.id,
name=folder.name,
parent_id=folder.parent_id,
children=children
children=children,
))
return tree
@router.get("", response_model=List[FolderTreeOut])
async def get_folders(
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
current_user: User = Depends(get_current_user),
):
"""获取用户的完整文件夹树"""
result = await db.execute(
select(Folder).where(Folder.user_id == current_user.id)
)
folders = result.scalars().all()
return build_folder_tree(list(folders))
@router.post("", response_model=FolderOut, status_code=status.HTTP_201_CREATED)
async def create_folder(
folder_data: FolderCreate,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
current_user: User = Depends(get_current_user),
):
"""创建文件夹"""
# 验证父文件夹存在且属于当前用户
if folder_data.parent_id:
result = await db.execute(
select(Folder).where(
@@ -53,13 +55,12 @@ async def create_folder(
if not result.scalar_one_or_none():
raise HTTPException(status_code=404, detail="父文件夹不存在")
# 检查同名文件夹
result = await db.execute(
select(Folder).where(
and_(
Folder.user_id == current_user.id,
Folder.parent_id == folder_data.parent_id,
Folder.name == folder_data.name
Folder.name == folder_data.name,
)
)
)
@@ -69,21 +70,24 @@ async def create_folder(
folder = Folder(
user_id=current_user.id,
name=folder_data.name,
parent_id=folder_data.parent_id
parent_id=folder_data.parent_id,
)
db.add(folder)
await db.commit()
await db.refresh(folder)
document_service = DocumentService(db, current_user.id)
await document_service.ensure_folder_directory(current_user.id, folder.id)
return folder
@router.put("/{folder_id}", response_model=FolderOut)
async def rename_folder(
folder_id: str,
folder_data: FolderUpdate,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
current_user: User = Depends(get_current_user),
):
"""重命名文件夹"""
result = await db.execute(
select(Folder).where(
and_(Folder.id == folder_id, Folder.user_id == current_user.id)
@@ -93,18 +97,22 @@ async def rename_folder(
if not folder:
raise HTTPException(status_code=404, detail="文件夹不存在")
old_name = folder.name
folder.name = folder_data.name
document_service = DocumentService(db, current_user.id)
await document_service.rename_folder_directory(current_user.id, folder.id, old_name, folder_data.name)
await db.commit()
await db.refresh(folder)
return folder
@router.delete("/{folder_id}", status_code=status.HTTP_204_NO_CONTENT)
async def delete_folder(
folder_id: str,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
current_user: User = Depends(get_current_user),
):
"""删除文件夹(级联删除文档)"""
from app.models.document import Document
from app.services.knowledge_service import KnowledgeService
@@ -117,15 +125,16 @@ async def delete_folder(
if not folder:
raise HTTPException(status_code=404, detail="文件夹不存在")
document_service = DocumentService(db, current_user.id)
folder_path = await document_service._get_storage_directory(current_user.id, folder_id)
async def delete_recursive(fid: str):
# 删除子文件夹(先递归)
children = await db.execute(
select(Folder).where(Folder.parent_id == fid)
)
for child in children.scalars():
await delete_recursive(child.id)
# 删除文档
docs = await db.execute(
select(Document).where(Document.folder_id == fid)
)
@@ -134,10 +143,12 @@ async def delete_folder(
await knowledge_service.delete_from_vectorstore(current_user.id, doc.id)
await db.delete(doc)
# 删除文件夹本身
folder_to_delete = await db.get(Folder, fid)
if folder_to_delete:
await db.delete(folder_to_delete)
await delete_recursive(folder_id)
await db.commit()
if folder_path.exists():
shutil.rmtree(folder_path, ignore_errors=True)

View File

@@ -0,0 +1,130 @@
from fastapi import APIRouter, Depends, HTTPException, status
from sqlalchemy import and_, select
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db
from app.models.remote_mount import RemoteMount
from app.models.user import User
from app.routers.auth import get_current_user
from app.schemas.remote_mount import (
RemoteMountCreate,
RemoteMountOut,
RemoteMountTreeOut,
RemoteNodeOut,
RemoteSyncRequest,
RemoteSyncResultOut,
)
from app.services.remote_sync_service import RemoteSyncService
from app.services.secret_service import encrypt_secret
from app.services.webdav_service import WebDavNode, WebDavService
router = APIRouter(prefix="/api/remote-mounts", tags=["远程挂载"])
def _to_node_out(node: WebDavNode) -> RemoteNodeOut:
return RemoteNodeOut(
path=node.path,
name=node.name,
is_dir=node.is_dir,
size=node.size,
modified_at=node.modified_at,
etag=node.etag,
children=[_to_node_out(child) for child in node.children],
)
@router.get("", response_model=list[RemoteMountOut])
async def list_remote_mounts(
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user),
):
result = await db.execute(
select(RemoteMount).where(RemoteMount.user_id == current_user.id).order_by(RemoteMount.created_at.desc())
)
return list(result.scalars().all())
@router.post("", response_model=RemoteMountOut, status_code=status.HTTP_201_CREATED)
async def create_remote_mount(
payload: RemoteMountCreate,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user),
):
existing = await db.execute(
select(RemoteMount).where(and_(RemoteMount.user_id == current_user.id, RemoteMount.name == payload.name))
)
if existing.scalar_one_or_none():
raise HTTPException(status_code=400, detail="同名远程挂载已存在")
mount = RemoteMount(
user_id=current_user.id,
name=payload.name,
mount_type="webdav",
base_url=str(payload.base_url),
username=payload.username,
password_encrypted=encrypt_secret(payload.password),
root_path=payload.root_path,
is_active=True,
)
try:
await WebDavService(mount).list_directory(payload.root_path)
except Exception as exc: # noqa: BLE001
raise HTTPException(status_code=400, detail=f"WebDAV 连接失败: {exc}") from exc
db.add(mount)
await db.commit()
await db.refresh(mount)
return mount
async def _get_user_mount(db: AsyncSession, user_id: str, mount_id: str) -> RemoteMount:
result = await db.execute(
select(RemoteMount).where(and_(RemoteMount.id == mount_id, RemoteMount.user_id == user_id))
)
mount = result.scalar_one_or_none()
if mount is None:
raise HTTPException(status_code=404, detail="远程挂载不存在")
return mount
@router.get("/{mount_id}/tree", response_model=RemoteMountTreeOut)
async def get_remote_tree(
mount_id: str,
path: str | None = None,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user),
):
mount = await _get_user_mount(db, current_user.id, mount_id)
try:
nodes = await WebDavService(mount).list_tree(path or mount.root_path)
except Exception as exc: # noqa: BLE001
raise HTTPException(status_code=400, detail=f"远程目录读取失败: {exc}") from exc
return RemoteMountTreeOut(
mount_id=mount.id,
root_path=path or mount.root_path,
nodes=[_to_node_out(node) for node in nodes],
)
@router.post("/{mount_id}/sync", response_model=RemoteSyncResultOut)
async def sync_remote_mount(
mount_id: str,
payload: RemoteSyncRequest,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user),
):
mount = await _get_user_mount(db, current_user.id, mount_id)
try:
result = await RemoteSyncService(db, current_user.id).sync_remote_path(
mount,
payload.remote_path,
payload.local_folder_id,
payload.mode,
)
except ValueError as exc:
raise HTTPException(status_code=400, detail=str(exc)) from exc
except Exception as exc: # noqa: BLE001
raise HTTPException(status_code=500, detail=f"远程同步失败: {exc}") from exc
return RemoteSyncResultOut(**result)

View File

@@ -0,0 +1,58 @@
from datetime import datetime
from pydantic import BaseModel, Field, HttpUrl
class RemoteMountCreate(BaseModel):
name: str = Field(..., min_length=1, max_length=255)
base_url: HttpUrl
username: str | None = Field(default=None, max_length=255)
password: str | None = Field(default=None, max_length=2000)
root_path: str = Field(default="/", min_length=1, max_length=1000)
class RemoteMountOut(BaseModel):
id: str
name: str
mount_type: str
base_url: str
username: str | None
root_path: str
is_active: bool
last_sync_at: str | None
created_at: datetime
updated_at: datetime
model_config = {"from_attributes": True}
class RemoteNodeOut(BaseModel):
path: str
name: str
is_dir: bool
size: int | None = None
modified_at: str | None = None
etag: str | None = None
children: list["RemoteNodeOut"] = []
class RemoteMountTreeOut(BaseModel):
mount_id: str
root_path: str
nodes: list[RemoteNodeOut]
class RemoteSyncRequest(BaseModel):
remote_path: str = Field(..., min_length=1, max_length=2000)
local_folder_id: str = Field(..., min_length=1, max_length=36)
mode: str = Field(default="file", pattern="^(file|folder)$")
class RemoteSyncResultOut(BaseModel):
synced: int
skipped: int
failed: int
document_ids: list[str]
errors: list[str]
RemoteNodeOut.model_rebuild()

View File

@@ -5,6 +5,7 @@
from pathlib import Path
import tempfile
import shutil
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select
from fastapi import UploadFile
@@ -18,7 +19,6 @@ import json
import os
import re
import aiofiles
import uuid
from dataclasses import dataclass, field
@@ -52,9 +52,9 @@ class DocumentService:
if ext not in ALLOWED_EXTENSIONS:
raise ValueError(f"不支持的文件类型: {ext}")
os.makedirs(settings.UPLOAD_DIR, exist_ok=True)
file_id = str(uuid.uuid4())
file_path = os.path.join(settings.UPLOAD_DIR, f"{file_id}{ext}")
folder_path = await self._get_storage_directory(user_id, folder_id)
folder_path.mkdir(parents=True, exist_ok=True)
file_path = self._resolve_unique_file_path(folder_path, file.filename)
content = await file.read()
file_size = len(content)
@@ -64,7 +64,7 @@ class DocumentService:
async with aiofiles.open(file_path, "wb") as f:
await f.write(content)
parsed = await self._parse_document(file_path, ext)
parsed = await self._parse_document(str(file_path), ext)
parsed.structured_markdown = self._render_structured_markdown(parsed)
doc = Document(
@@ -73,7 +73,7 @@ class DocumentService:
filename=file.filename,
file_type=ext[1:],
file_size=file_size,
file_path=file_path,
file_path=str(file_path),
summary=parsed.summary[:500] if len(parsed.summary) > 500 else parsed.summary,
folder_id=folder_id,
ingestion_status="uploaded",
@@ -171,6 +171,83 @@ class DocumentService:
return "/" + "/".join(path_parts) if path_parts else None
async def ensure_folder_directory(self, user_id: str, folder_id: str | None) -> Path:
folder_path = await self._get_storage_directory(user_id, folder_id)
folder_path.mkdir(parents=True, exist_ok=True)
return folder_path
async def delete_folder_directory(self, user_id: str, folder_id: str) -> None:
folder_path = await self._get_storage_directory(user_id, folder_id)
if folder_path.exists():
shutil.rmtree(folder_path, ignore_errors=True)
async def rename_folder_directory(self, user_id: str, folder_id: str, old_name: str, new_name: str) -> None:
folder = await self.db.get(Folder, folder_id)
if folder is None:
return
parent_path = await self._get_storage_directory(user_id, folder.parent_id)
old_path = parent_path / self._sanitize_storage_name(old_name)
new_path = parent_path / self._sanitize_storage_name(new_name)
if old_path != new_path:
parent_path.mkdir(parents=True, exist_ok=True)
if old_path.exists():
old_path.rename(new_path)
else:
new_path.mkdir(parents=True, exist_ok=True)
else:
new_path.mkdir(parents=True, exist_ok=True)
document_result = await self.db.execute(
select(Document).where(Document.user_id == user_id)
)
for document in document_result.scalars().all():
try:
relative_path = Path(document.file_path).relative_to(old_path)
except ValueError:
continue
document.file_path = str(new_path / relative_path)
async def _get_storage_directory(self, user_id: str, folder_id: str | None) -> Path:
base_path = Path(settings.UPLOAD_DIR) / user_id
if not folder_id:
return base_path
folders = await self.db.execute(
select(Folder).where(Folder.user_id == user_id)
)
folder_map = {folder.id: folder for folder in folders.scalars().all()}
path_segments: list[str] = []
current_id = folder_id
while current_id:
folder = folder_map.get(current_id)
if folder is None:
raise ValueError("鐖舵枃浠跺す涓嶅瓨鍦?")
path_segments.insert(0, self._sanitize_storage_name(folder.name))
current_id = folder.parent_id
return base_path.joinpath(*path_segments)
def _resolve_unique_file_path(self, directory: Path, original_name: str) -> Path:
safe_name = self._sanitize_storage_name(Path(original_name).name, is_file=True)
stem = Path(safe_name).stem
suffix = Path(safe_name).suffix
candidate = directory / safe_name
counter = 2
while candidate.exists():
candidate = directory / f"{stem}-{counter}{suffix}"
counter += 1
return candidate
def _sanitize_storage_name(self, name: str, is_file: bool = False) -> str:
invalid_chars = '<>:"/\\|?*'
sanitized = ''.join('_' if char in invalid_chars or ord(char) < 32 else char for char in name).strip().rstrip('.')
if not sanitized:
return 'untitled' if is_file else 'folder'
return sanitized
async def delete_document(self, user_id: str, document_id: str):
result = await self.db.execute(
select(Document).where(

View File

@@ -0,0 +1,108 @@
from io import BytesIO
from datetime import UTC, datetime
from sqlalchemy import and_, select
from sqlalchemy.ext.asyncio import AsyncSession
from starlette.datastructures import UploadFile
from app.models.folder import Folder
from app.models.remote_mount import RemoteMount, RemoteSyncItem
from app.services.document_service import DocumentService
from app.services.webdav_service import WebDavNode, WebDavService
class RemoteSyncService:
def __init__(self, db: AsyncSession, user_id: str):
self.db = db
self.user_id = user_id
async def sync_remote_path(
self,
mount: RemoteMount,
remote_path: str,
local_folder_id: str,
mode: str = "file",
) -> dict:
folder = await self.db.execute(
select(Folder).where(and_(Folder.id == local_folder_id, Folder.user_id == self.user_id))
)
if folder.scalar_one_or_none() is None:
raise ValueError("本地目标文件夹不存在")
webdav = WebDavService(mount)
document_service = DocumentService(self.db, self.user_id)
synced = 0
skipped = 0
failed = 0
document_ids: list[str] = []
errors: list[str] = []
if mode == "folder":
nodes = await webdav.list_tree(remote_path)
targets = self._flatten_files(nodes)
else:
name = remote_path.rstrip("/").split("/")[-1] or "remote-file"
targets = [WebDavNode(path=remote_path, name=name, is_dir=False)]
for node in targets:
try:
content, filename = await webdav.download_file(node.path)
upload = UploadFile(filename=filename, file=BytesIO(content))
document = await document_service.upload_document(self.user_id, upload, folder_id=local_folder_id)
await self._upsert_sync_item(mount.id, node, local_folder_id, document.id)
document_ids.append(document.id)
synced += 1
except Exception as exc: # noqa: BLE001
failed += 1
errors.append(f"{node.path}: {exc}")
await self._upsert_sync_item(mount.id, node, local_folder_id, None, status="failed", error=str(exc))
mount.last_sync_at = datetime.now(UTC).isoformat()
await self.db.commit()
return {
"synced": synced,
"skipped": skipped,
"failed": failed,
"document_ids": document_ids,
"errors": errors,
}
def _flatten_files(self, nodes: list[WebDavNode]) -> list[WebDavNode]:
results: list[WebDavNode] = []
for node in nodes:
if node.is_dir:
results.extend(self._flatten_files(node.children))
else:
results.append(node)
return results
async def _upsert_sync_item(
self,
mount_id: str,
node: WebDavNode,
local_folder_id: str,
local_document_id: str | None,
status: str = "synced",
error: str | None = None,
) -> None:
result = await self.db.execute(
select(RemoteSyncItem).where(
and_(RemoteSyncItem.mount_id == mount_id, RemoteSyncItem.remote_path == node.path)
)
)
sync_item = result.scalar_one_or_none()
if sync_item is None:
sync_item = RemoteSyncItem(
mount_id=mount_id,
remote_path=node.path,
)
self.db.add(sync_item)
sync_item.remote_etag = node.etag
sync_item.remote_modified_at = node.modified_at
sync_item.local_folder_id = local_folder_id
sync_item.local_document_id = local_document_id
sync_item.sync_status = status
sync_item.last_error = error
sync_item.last_synced_at = datetime.now(UTC).isoformat()

View File

@@ -0,0 +1,24 @@
import base64
import hashlib
from cryptography.fernet import Fernet
from app.config import settings
def _build_fernet() -> Fernet:
digest = hashlib.sha256(settings.SECRET_KEY.encode("utf-8")).digest()
key = base64.urlsafe_b64encode(digest)
return Fernet(key)
def encrypt_secret(value: str | None) -> str | None:
if not value:
return None
return _build_fernet().encrypt(value.encode("utf-8")).decode("utf-8")
def decrypt_secret(value: str | None) -> str | None:
if not value:
return None
return _build_fernet().decrypt(value.encode("utf-8")).decode("utf-8")

View File

@@ -0,0 +1,127 @@
from dataclasses import dataclass, field
from urllib.parse import quote, urljoin
import xml.etree.ElementTree as ET
import httpx
from app.models.remote_mount import RemoteMount
from app.services.secret_service import decrypt_secret
WEBDAV_NAMESPACE = {
"d": "DAV:",
}
@dataclass
class WebDavNode:
path: str
name: str
is_dir: bool
size: int | None = None
modified_at: str | None = None
etag: str | None = None
children: list["WebDavNode"] = field(default_factory=list)
class WebDavService:
def __init__(self, mount: RemoteMount):
self.mount = mount
self.username = mount.username or None
self.password = decrypt_secret(mount.password_encrypted)
def _normalize_remote_path(self, remote_path: str | None = None) -> str:
path = remote_path or self.mount.root_path or "/"
if not path.startswith("/"):
path = f"/{path}"
return path
def _build_url(self, remote_path: str | None = None) -> str:
path = self._normalize_remote_path(remote_path)
encoded = "/".join(quote(segment) for segment in path.split("/") if segment)
if not encoded:
return self.mount.base_url.rstrip("/") + "/"
return urljoin(self.mount.base_url.rstrip("/") + "/", encoded)
async def list_directory(self, remote_path: str | None = None) -> list[WebDavNode]:
path = self._normalize_remote_path(remote_path)
body = """<?xml version="1.0" encoding="utf-8" ?>
<d:propfind xmlns:d="DAV:">
<d:prop>
<d:displayname />
<d:resourcetype />
<d:getcontentlength />
<d:getlastmodified />
<d:getetag />
</d:prop>
</d:propfind>"""
async with httpx.AsyncClient(timeout=30.0, auth=self._auth()) as client:
response = await client.request(
"PROPFIND",
self._build_url(path),
headers={"Depth": "1", "Content-Type": "application/xml"},
content=body,
)
response.raise_for_status()
return self._parse_propfind(path, response.text)
async def list_tree(self, remote_path: str | None = None, max_depth: int = 4) -> list[WebDavNode]:
path = self._normalize_remote_path(remote_path)
nodes = await self.list_directory(path)
if max_depth <= 1:
return nodes
for node in nodes:
if node.is_dir:
node.children = await self.list_tree(node.path, max_depth=max_depth - 1)
return nodes
async def download_file(self, remote_path: str) -> tuple[bytes, str]:
normalized = self._normalize_remote_path(remote_path)
async with httpx.AsyncClient(timeout=120.0, auth=self._auth()) as client:
response = await client.get(self._build_url(normalized))
response.raise_for_status()
name = normalized.rstrip("/").split("/")[-1] or "remote-file"
return response.content, name
def _auth(self) -> httpx.BasicAuth | None:
if self.username and self.password:
return httpx.BasicAuth(self.username, self.password)
return None
def _parse_propfind(self, parent_path: str, payload: str) -> list[WebDavNode]:
root = ET.fromstring(payload)
nodes: list[WebDavNode] = []
for response in root.findall("d:response", WEBDAV_NAMESPACE):
href = response.findtext("d:href", default="", namespaces=WEBDAV_NAMESPACE)
if not href:
continue
normalized_href = "/" + href.split("://", 1)[-1].split("/", 1)[-1].strip("/")
normalized_href = "/" if normalized_href == "/" else normalized_href.rstrip("/")
normalized_parent = self._normalize_remote_path(parent_path).rstrip("/") or "/"
if normalized_href.rstrip("/") == normalized_parent.rstrip("/"):
continue
prop = response.find("d:propstat/d:prop", WEBDAV_NAMESPACE)
if prop is None:
continue
is_dir = prop.find("d:resourcetype/d:collection", WEBDAV_NAMESPACE) is not None
display_name = prop.findtext("d:displayname", default="", namespaces=WEBDAV_NAMESPACE) or normalized_href.split("/")[-1]
size_text = prop.findtext("d:getcontentlength", default="", namespaces=WEBDAV_NAMESPACE)
etag = prop.findtext("d:getetag", default=None, namespaces=WEBDAV_NAMESPACE)
modified_at = prop.findtext("d:getlastmodified", default=None, namespaces=WEBDAV_NAMESPACE)
nodes.append(WebDavNode(
path=normalized_href,
name=display_name,
is_dir=is_dir,
size=int(size_text) if size_text.isdigit() else None,
etag=etag,
modified_at=modified_at,
))
nodes.sort(key=lambda item: (not item.is_dir, item.name.lower()))
return nodes

View File

@@ -15,6 +15,7 @@ from starlette.datastructures import UploadFile
import app.models # noqa: F401
from app.database import Base
from app.models.document import Document, DocumentChunk
from app.models.folder import Folder
from app.models.user import User
from app.services.auth_service import get_password_hash
from app.services.document_service import DocumentService
@@ -199,6 +200,29 @@ async def test_upload_document_persists_structured_metadata_json(document_test_e
assert stored_document.normalized_content == 'title\n\nplain text body for metadata storage'
@pytest.mark.asyncio
async def test_upload_document_stores_file_in_nested_folder_with_original_name(document_test_env):
session, user = document_test_env
service = DocumentService(session)
root = Folder(user_id=user.id, name='Projects')
session.add(root)
await session.flush()
child = Folder(user_id=user.id, name='Specs', parent_id=root.id)
session.add(child)
await session.commit()
await session.refresh(child)
upload = UploadFile(filename='system-design.md', file=BytesIO(b'# Design'))
document = await service.upload_document(user.id, upload, folder_id=child.id)
file_path = Path(document.file_path)
assert file_path.name == 'system-design.md'
assert file_path.parent.name == 'Specs'
assert file_path.parent.parent.name == 'Projects'
assert file_path.exists()
@pytest.mark.asyncio
async def test_upload_document_extracts_docx_heading_and_table_structure(document_test_env):
session, user = document_test_env

View File

@@ -0,0 +1,39 @@
from app.models.remote_mount import RemoteMount
from app.services.secret_service import encrypt_secret
from app.services.webdav_service import WebDavService
def test_parse_propfind_returns_sorted_nodes():
mount = RemoteMount(
user_id='user-1',
name='Docs',
mount_type='webdav',
base_url='https://example.com/dav/',
username='alice',
password_encrypted=encrypt_secret('secret'),
root_path='/knowledge',
is_active=True,
)
payload = """<?xml version="1.0" encoding="utf-8" ?>
<d:multistatus xmlns:d="DAV:">
<d:response>
<d:href>/knowledge/</d:href>
<d:propstat><d:prop><d:displayname>knowledge</d:displayname><d:resourcetype><d:collection /></d:resourcetype></d:prop></d:propstat>
</d:response>
<d:response>
<d:href>/knowledge/specs/</d:href>
<d:propstat><d:prop><d:displayname>specs</d:displayname><d:resourcetype><d:collection /></d:resourcetype></d:prop></d:propstat>
</d:response>
<d:response>
<d:href>/knowledge/roadmap.md</d:href>
<d:propstat><d:prop><d:displayname>roadmap.md</d:displayname><d:getcontentlength>128</d:getcontentlength><d:getetag>"etag-1"</d:getetag><d:getlastmodified>Wed, 09 Apr 2026 10:00:00 GMT</d:getlastmodified><d:resourcetype /></d:prop></d:propstat>
</d:response>
</d:multistatus>"""
nodes = WebDavService(mount)._parse_propfind('/knowledge', payload)
assert [node.name for node in nodes] == ['specs', 'roadmap.md']
assert nodes[0].is_dir is True
assert nodes[1].is_dir is False
assert nodes[1].size == 128
assert nodes[1].etag == '"etag-1"'

View File

@@ -0,0 +1,90 @@
from httpx import ASGITransport, AsyncClient
from pathlib import Path
import pytest
from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine
import app.models # noqa: F401
from app.database import Base, get_db
from app.main import app
from app.models.folder import Folder
from app.models.user import User
from app.routers.auth import get_current_user
from app.services.auth_service import get_password_hash
@pytest.fixture
async def folder_router_env(tmp_path, monkeypatch):
db_path = tmp_path / 'test_folders_router.db'
upload_dir = tmp_path / 'uploads'
engine = create_async_engine(f"sqlite+aiosqlite:///{db_path}", future=True)
session_factory = async_sessionmaker(engine, expire_on_commit=False)
async with engine.begin() as conn:
await conn.run_sync(Base.metadata.create_all)
async with session_factory() as session:
user = User(
email='folders@example.com',
hashed_password=get_password_hash('secret123'),
full_name='Folder Tester',
)
session.add(user)
await session.commit()
await session.refresh(user)
monkeypatch.setattr('app.services.document_service.settings.UPLOAD_DIR', str(upload_dir))
async def override_get_db():
async with session_factory() as session:
yield session
async def override_get_current_user():
return user
app.dependency_overrides[get_db] = override_get_db
app.dependency_overrides[get_current_user] = override_get_current_user
try:
yield user, upload_dir, session_factory
finally:
app.dependency_overrides.clear()
await engine.dispose()
@pytest.mark.asyncio
async def test_create_folder_creates_matching_local_directory(folder_router_env):
user, upload_dir, _session_factory = folder_router_env
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url='http://testserver') as client:
response = await client.post('/api/folders', json={'name': 'Projects', 'parent_id': None})
assert response.status_code == 201
folder_id = response.json()['id']
expected_path = upload_dir / user.id / 'Projects'
assert expected_path.exists()
assert expected_path.is_dir()
assert folder_id
@pytest.mark.asyncio
async def test_rename_folder_moves_local_directory(folder_router_env):
user, upload_dir, session_factory = folder_router_env
async with session_factory() as session:
folder = Folder(user_id=user.id, name='Old', parent_id=None)
session.add(folder)
await session.commit()
await session.refresh(folder)
(upload_dir / user.id / 'Old').mkdir(parents=True, exist_ok=True)
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url='http://testserver') as client:
response = await client.put(f'/api/folders/{folder.id}', json={'name': 'New'})
assert response.status_code == 200
assert not (upload_dir / user.id / 'Old').exists()
assert (upload_dir / user.id / 'New').exists()