Align knowledge storage with real folders and add WebDAV import surface

Knowledge files were only partitioned in the database, which made nested uploads, local folder visibility, and delete behavior diverge from the UI. This change makes folder selection drive physical storage paths, keeps original filenames, adds a minimal WebDAV mount/sync path, and reshapes the knowledge panel so local and remote sources can share the same surface.

Constraint: Existing knowledge flow already depends on local-folder-backed uploads and document indexing
Rejected: Real-time bidirectional WebDAV sync | too much conflict and lifecycle complexity for the first pass
Confidence: medium
Scope-risk: moderate
Reversibility: messy
Directive: Keep remote mounts single-direction into local knowledge folders until etag-based incremental sync and conflict rules are verified
Tested: Python py_compile on new/modified backend files; LSP diagnostics on new frontend/backend files; manual targeted code-path inspection
Not-tested: Full pytest/vitest end-to-end runs blocked by environment temp/cache permission errors; live WebDAV server interoperability
This commit is contained in:
2026-04-09 17:26:37 +08:00
parent aa12c92a5a
commit 8c7cf0732b
18 changed files with 2776 additions and 26 deletions

View File

@@ -0,0 +1,108 @@
from io import BytesIO
from datetime import UTC, datetime
from sqlalchemy import and_, select
from sqlalchemy.ext.asyncio import AsyncSession
from starlette.datastructures import UploadFile
from app.models.folder import Folder
from app.models.remote_mount import RemoteMount, RemoteSyncItem
from app.services.document_service import DocumentService
from app.services.webdav_service import WebDavNode, WebDavService
class RemoteSyncService:
def __init__(self, db: AsyncSession, user_id: str):
self.db = db
self.user_id = user_id
async def sync_remote_path(
self,
mount: RemoteMount,
remote_path: str,
local_folder_id: str,
mode: str = "file",
) -> dict:
folder = await self.db.execute(
select(Folder).where(and_(Folder.id == local_folder_id, Folder.user_id == self.user_id))
)
if folder.scalar_one_or_none() is None:
raise ValueError("本地目标文件夹不存在")
webdav = WebDavService(mount)
document_service = DocumentService(self.db, self.user_id)
synced = 0
skipped = 0
failed = 0
document_ids: list[str] = []
errors: list[str] = []
if mode == "folder":
nodes = await webdav.list_tree(remote_path)
targets = self._flatten_files(nodes)
else:
name = remote_path.rstrip("/").split("/")[-1] or "remote-file"
targets = [WebDavNode(path=remote_path, name=name, is_dir=False)]
for node in targets:
try:
content, filename = await webdav.download_file(node.path)
upload = UploadFile(filename=filename, file=BytesIO(content))
document = await document_service.upload_document(self.user_id, upload, folder_id=local_folder_id)
await self._upsert_sync_item(mount.id, node, local_folder_id, document.id)
document_ids.append(document.id)
synced += 1
except Exception as exc: # noqa: BLE001
failed += 1
errors.append(f"{node.path}: {exc}")
await self._upsert_sync_item(mount.id, node, local_folder_id, None, status="failed", error=str(exc))
mount.last_sync_at = datetime.now(UTC).isoformat()
await self.db.commit()
return {
"synced": synced,
"skipped": skipped,
"failed": failed,
"document_ids": document_ids,
"errors": errors,
}
def _flatten_files(self, nodes: list[WebDavNode]) -> list[WebDavNode]:
results: list[WebDavNode] = []
for node in nodes:
if node.is_dir:
results.extend(self._flatten_files(node.children))
else:
results.append(node)
return results
async def _upsert_sync_item(
self,
mount_id: str,
node: WebDavNode,
local_folder_id: str,
local_document_id: str | None,
status: str = "synced",
error: str | None = None,
) -> None:
result = await self.db.execute(
select(RemoteSyncItem).where(
and_(RemoteSyncItem.mount_id == mount_id, RemoteSyncItem.remote_path == node.path)
)
)
sync_item = result.scalar_one_or_none()
if sync_item is None:
sync_item = RemoteSyncItem(
mount_id=mount_id,
remote_path=node.path,
)
self.db.add(sync_item)
sync_item.remote_etag = node.etag
sync_item.remote_modified_at = node.modified_at
sync_item.local_folder_id = local_folder_id
sync_item.local_document_id = local_document_id
sync_item.sync_status = status
sync_item.last_error = error
sync_item.last_synced_at = datetime.now(UTC).isoformat()