Align knowledge storage with real folders and add WebDAV import surface
Knowledge files were only partitioned in the database, which made nested uploads, local folder visibility, and delete behavior diverge from the UI. This change makes folder selection drive physical storage paths, keeps original filenames, adds a minimal WebDAV mount/sync path, and reshapes the knowledge panel so local and remote sources can share the same surface. Constraint: Existing knowledge flow already depends on local-folder-backed uploads and document indexing Rejected: Real-time bidirectional WebDAV sync | too much conflict and lifecycle complexity for the first pass Confidence: medium Scope-risk: moderate Reversibility: messy Directive: Keep remote mounts single-direction into local knowledge folders until etag-based incremental sync and conflict rules are verified Tested: Python py_compile on new/modified backend files; LSP diagnostics on new frontend/backend files; manual targeted code-path inspection Not-tested: Full pytest/vitest end-to-end runs blocked by environment temp/cache permission errors; live WebDAV server interoperability
This commit is contained in:
108
backend/app/services/remote_sync_service.py
Normal file
108
backend/app/services/remote_sync_service.py
Normal file
@@ -0,0 +1,108 @@
|
||||
from io import BytesIO
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from sqlalchemy import and_, select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from starlette.datastructures import UploadFile
|
||||
|
||||
from app.models.folder import Folder
|
||||
from app.models.remote_mount import RemoteMount, RemoteSyncItem
|
||||
from app.services.document_service import DocumentService
|
||||
from app.services.webdav_service import WebDavNode, WebDavService
|
||||
|
||||
|
||||
class RemoteSyncService:
|
||||
def __init__(self, db: AsyncSession, user_id: str):
|
||||
self.db = db
|
||||
self.user_id = user_id
|
||||
|
||||
async def sync_remote_path(
|
||||
self,
|
||||
mount: RemoteMount,
|
||||
remote_path: str,
|
||||
local_folder_id: str,
|
||||
mode: str = "file",
|
||||
) -> dict:
|
||||
folder = await self.db.execute(
|
||||
select(Folder).where(and_(Folder.id == local_folder_id, Folder.user_id == self.user_id))
|
||||
)
|
||||
if folder.scalar_one_or_none() is None:
|
||||
raise ValueError("本地目标文件夹不存在")
|
||||
|
||||
webdav = WebDavService(mount)
|
||||
document_service = DocumentService(self.db, self.user_id)
|
||||
|
||||
synced = 0
|
||||
skipped = 0
|
||||
failed = 0
|
||||
document_ids: list[str] = []
|
||||
errors: list[str] = []
|
||||
|
||||
if mode == "folder":
|
||||
nodes = await webdav.list_tree(remote_path)
|
||||
targets = self._flatten_files(nodes)
|
||||
else:
|
||||
name = remote_path.rstrip("/").split("/")[-1] or "remote-file"
|
||||
targets = [WebDavNode(path=remote_path, name=name, is_dir=False)]
|
||||
|
||||
for node in targets:
|
||||
try:
|
||||
content, filename = await webdav.download_file(node.path)
|
||||
upload = UploadFile(filename=filename, file=BytesIO(content))
|
||||
document = await document_service.upload_document(self.user_id, upload, folder_id=local_folder_id)
|
||||
await self._upsert_sync_item(mount.id, node, local_folder_id, document.id)
|
||||
document_ids.append(document.id)
|
||||
synced += 1
|
||||
except Exception as exc: # noqa: BLE001
|
||||
failed += 1
|
||||
errors.append(f"{node.path}: {exc}")
|
||||
await self._upsert_sync_item(mount.id, node, local_folder_id, None, status="failed", error=str(exc))
|
||||
|
||||
mount.last_sync_at = datetime.now(UTC).isoformat()
|
||||
await self.db.commit()
|
||||
return {
|
||||
"synced": synced,
|
||||
"skipped": skipped,
|
||||
"failed": failed,
|
||||
"document_ids": document_ids,
|
||||
"errors": errors,
|
||||
}
|
||||
|
||||
def _flatten_files(self, nodes: list[WebDavNode]) -> list[WebDavNode]:
|
||||
results: list[WebDavNode] = []
|
||||
for node in nodes:
|
||||
if node.is_dir:
|
||||
results.extend(self._flatten_files(node.children))
|
||||
else:
|
||||
results.append(node)
|
||||
return results
|
||||
|
||||
async def _upsert_sync_item(
|
||||
self,
|
||||
mount_id: str,
|
||||
node: WebDavNode,
|
||||
local_folder_id: str,
|
||||
local_document_id: str | None,
|
||||
status: str = "synced",
|
||||
error: str | None = None,
|
||||
) -> None:
|
||||
result = await self.db.execute(
|
||||
select(RemoteSyncItem).where(
|
||||
and_(RemoteSyncItem.mount_id == mount_id, RemoteSyncItem.remote_path == node.path)
|
||||
)
|
||||
)
|
||||
sync_item = result.scalar_one_or_none()
|
||||
if sync_item is None:
|
||||
sync_item = RemoteSyncItem(
|
||||
mount_id=mount_id,
|
||||
remote_path=node.path,
|
||||
)
|
||||
self.db.add(sync_item)
|
||||
|
||||
sync_item.remote_etag = node.etag
|
||||
sync_item.remote_modified_at = node.modified_at
|
||||
sync_item.local_folder_id = local_folder_id
|
||||
sync_item.local_document_id = local_document_id
|
||||
sync_item.sync_status = status
|
||||
sync_item.last_error = error
|
||||
sync_item.last_synced_at = datetime.now(UTC).isoformat()
|
||||
Reference in New Issue
Block a user