Skip to main content

chkpt_core/ops/
restore.rs

1use crate::config::{project_id_from_path, StoreLayout};
2use crate::error::{ChkpttError, Result};
3use crate::index::FileIndex;
4use crate::ops::io_order::sort_scanned_for_locality;
5use crate::ops::lock::ProjectLock;
6use crate::scanner::ScannedFile;
7use crate::store::blob::{bytes_to_hex, hash_path_bytes};
8use crate::store::catalog::{ManifestEntry, MetadataCatalog};
9use crate::store::pack::{PackLocation, PackSet};
10use crate::store::tree::{EntryType, TreeStore};
11use std::collections::{BTreeMap, HashMap, HashSet};
12use std::io::{BufWriter, Write};
13use std::path::Path;
14use std::sync::atomic::{AtomicU64, Ordering};
15
16use crate::ops::progress::{emit, ProgressCallback, ProgressEvent};
17
18#[derive(Default)]
19pub struct RestoreOptions {
20    pub dry_run: bool,
21    pub progress: ProgressCallback,
22}
23
24#[derive(Debug)]
25pub struct RestoreResult {
26    pub snapshot_id: String,
27    pub files_added: u64,
28    pub files_changed: u64,
29    pub files_removed: u64,
30    pub files_unchanged: u64,
31}
32
33struct CurrentFileState {
34    hash: [u8; 16],
35    is_symlink: bool,
36}
37
38struct TargetFileState {
39    hash: [u8; 16],
40    is_symlink: bool,
41}
42
43struct RestoreDiff {
44    files_to_add: Vec<String>,
45    files_to_change: Vec<String>,
46    files_to_remove: Vec<String>,
47    files_unchanged: u64,
48}
49
50#[derive(Debug, Clone, Copy)]
51enum RestoreSource {
52    Packed(PackLocation),
53}
54
55#[derive(Debug, Clone)]
56struct RestoreTask {
57    path: String,
58    is_symlink: bool,
59    source: RestoreSource,
60}
61
62fn join_relative_path(prefix: &str, name: &str) -> String {
63    if prefix.is_empty() {
64        return name.to_owned();
65    }
66
67    let mut path = String::with_capacity(prefix.len() + 1 + name.len());
68    path.push_str(prefix);
69    path.push('/');
70    path.push_str(name);
71    path
72}
73
74/// Recursively walk a tree and collect all file entries as (relative_path, blob_hash_hex).
75fn collect_tree_files(
76    tree_store: &TreeStore,
77    tree_hash_hex: &str,
78    prefix: &str,
79    result: &mut BTreeMap<String, TargetFileState>,
80) -> Result<()> {
81    let entries = tree_store.read(tree_hash_hex)?;
82    for entry in &entries {
83        let path = join_relative_path(prefix, &entry.name);
84        match entry.entry_type {
85            EntryType::File => {
86                result.insert(
87                    path,
88                    TargetFileState {
89                        hash: entry.hash,
90                        is_symlink: false,
91                    },
92                );
93            }
94            EntryType::Dir => {
95                let subtree_hash_hex = bytes_to_hex(&entry.hash);
96                collect_tree_files(tree_store, &subtree_hash_hex, &path, result)?;
97            }
98            EntryType::Symlink => {
99                result.insert(
100                    path,
101                    TargetFileState {
102                        hash: entry.hash,
103                        is_symlink: true,
104                    },
105                );
106            }
107        }
108    }
109    Ok(())
110}
111
112fn target_state_from_manifest(manifest: &[ManifestEntry]) -> BTreeMap<String, TargetFileState> {
113    manifest
114        .iter()
115        .map(|entry| {
116            (
117                entry.path.clone(),
118                TargetFileState {
119                    hash: entry.blob_hash,
120                    is_symlink: mode_is_symlink(entry.mode),
121                },
122            )
123        })
124        .collect()
125}
126
127/// Scan the current workspace to get a mapping of (relative_path -> content_hash_hex).
128///
129/// This uses the scanner to discover files, then hashes each file to get the current
130/// content hash for comparison with the target snapshot state.
131fn scan_current_state(
132    workspace_root: &Path,
133    cached_entries: &HashMap<String, crate::index::FileEntry>,
134    include_deps: bool,
135) -> Result<BTreeMap<String, CurrentFileState>> {
136    let scanned = crate::scanner::scan_workspace_with_options(workspace_root, None, include_deps)?;
137    let mut state = BTreeMap::new();
138    let mut stale_files = Vec::with_capacity(scanned.len());
139
140    for file in scanned {
141        if let Some(hash) = cached_hash_bytes(&file, cached_entries) {
142            state.insert(
143                file.relative_path.clone(),
144                CurrentFileState {
145                    hash,
146                    is_symlink: file.is_symlink,
147                },
148            );
149        } else {
150            stale_files.push(file);
151        }
152    }
153
154    for (file, hash) in hash_scanned_files(stale_files)? {
155        state.insert(
156            file.relative_path.clone(),
157            CurrentFileState {
158                hash,
159                is_symlink: file.is_symlink,
160            },
161        );
162    }
163    Ok(state)
164}
165
166/// Pre-create all unique parent directories needed for restore tasks.
167fn precreate_restore_directories(
168    workspace_root: &Path,
169    restore_tasks: &[RestoreTask],
170) -> Result<()> {
171    let mut seen = std::collections::HashSet::with_capacity(restore_tasks.len() / 4);
172    for task in restore_tasks {
173        if let Some(parent) = std::path::Path::new(&task.path).parent() {
174            if parent.as_os_str().is_empty() {
175                continue;
176            }
177            if seen.insert(parent.to_path_buf()) {
178                std::fs::create_dir_all(workspace_root.join(parent))?;
179            }
180        }
181    }
182    Ok(())
183}
184
185fn restore_files(
186    workspace_root: &Path,
187    restore_tasks: &[RestoreTask],
188    pack_set: &PackSet,
189    progress: &ProgressCallback,
190    progress_counter: &AtomicU64,
191    restore_total: u64,
192) -> Result<Vec<String>> {
193    if restore_tasks.is_empty() {
194        return Ok(Vec::new());
195    }
196
197    precreate_restore_directories(workspace_root, restore_tasks)?;
198
199    let worker_count = std::thread::available_parallelism()
200        .map(|count| count.get())
201        .unwrap_or(1)
202        .min(restore_tasks.len());
203    if worker_count <= 1 {
204        let mut restored = Vec::with_capacity(restore_tasks.len());
205        for task in restore_tasks {
206            restore_file(workspace_root, task, pack_set)?;
207            let completed = progress_counter.fetch_add(1, Ordering::Relaxed) + 1;
208            emit(
209                progress,
210                ProgressEvent::RestoreFile {
211                    completed,
212                    total: restore_total,
213                },
214            );
215            restored.push(task.path.clone());
216        }
217        return Ok(restored);
218    }
219
220    let chunk_size = restore_tasks.len().div_ceil(worker_count);
221    std::thread::scope(|scope| {
222        let workers: Vec<_> = restore_tasks
223            .chunks(chunk_size)
224            .map(|chunk| {
225                scope.spawn(move || -> Result<Vec<String>> {
226                    let mut restored = Vec::with_capacity(chunk.len());
227                    for task in chunk {
228                        restore_file(workspace_root, task, pack_set)?;
229                        let completed = progress_counter.fetch_add(1, Ordering::Relaxed) + 1;
230                        emit(
231                            progress,
232                            ProgressEvent::RestoreFile {
233                                completed,
234                                total: restore_total,
235                            },
236                        );
237                        restored.push(task.path.clone());
238                    }
239                    Ok(restored)
240                })
241            })
242            .collect();
243
244        let mut restored_paths = Vec::with_capacity(restore_tasks.len());
245        for worker in workers {
246            let chunk = worker
247                .join()
248                .map_err(|_| ChkpttError::Other("restore worker thread panicked".into()))??;
249            restored_paths.extend(chunk);
250        }
251        Ok(restored_paths)
252    })
253}
254
255fn restore_file(workspace_root: &Path, task: &RestoreTask, pack_set: &PackSet) -> Result<()> {
256    let file_path = workspace_root.join(&task.path);
257
258    if let Ok(metadata) = std::fs::symlink_metadata(&file_path) {
259        if metadata.file_type().is_symlink() || task.is_symlink {
260            std::fs::remove_file(&file_path)?;
261        }
262    }
263
264    match task.source {
265        RestoreSource::Packed(location) => {
266            if task.is_symlink {
267                let mut content = Vec::new();
268                pack_set.copy_to_writer(&location, &mut content)?;
269                restore_symlink(&file_path, &content)?;
270            } else {
271                let file = std::fs::File::create(&file_path)?;
272                let mut writer = BufWriter::with_capacity(256 * 1024, file);
273                pack_set.copy_to_writer(&location, &mut writer)?;
274                writer.flush()?;
275            }
276        }
277    }
278
279    Ok(())
280}
281
282#[cfg(unix)]
283fn restore_symlink(path: &Path, target_bytes: &[u8]) -> Result<()> {
284    use std::os::unix::ffi::OsStrExt;
285    let target = std::ffi::OsStr::from_bytes(target_bytes);
286    std::os::unix::fs::symlink(target, path)?;
287    Ok(())
288}
289
290#[cfg(not(unix))]
291fn restore_symlink(_path: &Path, _target_bytes: &[u8]) -> Result<()> {
292    Err(ChkpttError::RestoreFailed(
293        "symlink restore is only supported on unix platforms".into(),
294    ))
295}
296
297fn resolve_restore_sources(
298    files_to_add: &[String],
299    files_to_change: &[String],
300    target_state: &BTreeMap<String, TargetFileState>,
301    catalog: &MetadataCatalog,
302    packs_dir: &Path,
303) -> Result<(PackSet, HashMap<[u8; 16], RestoreSource>)> {
304    let candidate_count = files_to_add.len() + files_to_change.len();
305    let mut seen_hashes = HashSet::with_capacity(candidate_count);
306    let mut packed_hashes = Vec::with_capacity(candidate_count);
307
308    for path in files_to_add.iter().chain(files_to_change.iter()) {
309        let target = target_state
310            .get(path)
311            .expect("target hash missing for restore source");
312        if !seen_hashes.insert(target.hash) {
313            continue;
314        }
315        packed_hashes.push(target.hash);
316    }
317
318    if packed_hashes.is_empty() {
319        return Ok((PackSet::empty(), HashMap::new()));
320    }
321
322    let blob_locations = catalog.blob_locations_for_hashes(&packed_hashes)?;
323
324    // Single pass: collect pack hashes and build per-blob location info
325    let mut selected_pack_hashes = HashSet::with_capacity(packed_hashes.len());
326    let mut hash_to_pack: Vec<([u8; 16], String)> = Vec::with_capacity(packed_hashes.len());
327    for hash in &packed_hashes {
328        let location = blob_locations
329            .get(hash)
330            .ok_or_else(|| ChkpttError::ObjectNotFound(bytes_to_hex(hash)))?;
331        let pack_hash = location.pack_hash.as_ref().ok_or_else(|| {
332            ChkpttError::StoreCorrupted(format!(
333                "blob {} is not stored in a pack",
334                bytes_to_hex(hash)
335            ))
336        })?;
337        selected_pack_hashes.insert(pack_hash.clone());
338        hash_to_pack.push((*hash, pack_hash.clone()));
339    }
340
341    let mut pack_hashes_vec: Vec<_> = selected_pack_hashes.into_iter().collect();
342    pack_hashes_vec.sort_unstable();
343    let pack_set = PackSet::open_selected(packs_dir, &pack_hashes_vec)?;
344
345    let mut sources = HashMap::with_capacity(hash_to_pack.len());
346    for (hash, pack_hash) in hash_to_pack {
347        let location = pack_set
348            .locate_in_pack_bytes(&pack_hash, &hash)
349            .ok_or_else(|| ChkpttError::ObjectNotFound(bytes_to_hex(&hash)))?;
350        sources.insert(hash, RestoreSource::Packed(location));
351    }
352
353    Ok((pack_set, sources))
354}
355
356fn build_restore_tasks(
357    files_to_add: &[String],
358    files_to_change: &[String],
359    target_state: &BTreeMap<String, TargetFileState>,
360    restore_sources: &HashMap<[u8; 16], RestoreSource>,
361) -> Result<Vec<RestoreTask>> {
362    let mut tasks = Vec::with_capacity(files_to_add.len() + files_to_change.len());
363
364    for path in files_to_add.iter().chain(files_to_change.iter()) {
365        let target = target_state
366            .get(path)
367            .expect("target hash missing for restore task");
368        let source = *restore_sources
369            .get(&target.hash)
370            .ok_or_else(|| ChkpttError::ObjectNotFound(bytes_to_hex(&target.hash)))?;
371
372        tasks.push(RestoreTask {
373            path: path.clone(),
374            is_symlink: target.is_symlink,
375            source,
376        });
377    }
378
379    tasks.sort_unstable_by(|left, right| match (&left.source, &right.source) {
380        (RestoreSource::Packed(left_location), RestoreSource::Packed(right_location)) => (
381            left_location.reader_index,
382            left_location.offset,
383            left.path.as_str(),
384        )
385            .cmp(&(
386                right_location.reader_index,
387                right_location.offset,
388                right.path.as_str(),
389            )),
390    });
391    Ok(tasks)
392}
393
394fn diff_restore_states(
395    target_state: &BTreeMap<String, TargetFileState>,
396    current_state: &BTreeMap<String, CurrentFileState>,
397) -> RestoreDiff {
398    let mut files_to_add = Vec::with_capacity(target_state.len());
399    let mut files_to_change = Vec::with_capacity(target_state.len().min(current_state.len()));
400    let mut files_to_remove = Vec::with_capacity(current_state.len());
401    let mut files_unchanged = 0;
402
403    let mut target_iter = target_state.iter().peekable();
404    let mut current_iter = current_state.iter().peekable();
405
406    loop {
407        match (target_iter.peek(), current_iter.peek()) {
408            (Some((target_path, target_file)), Some((current_path, current_file))) => {
409                match target_path.cmp(current_path) {
410                    std::cmp::Ordering::Less => {
411                        files_to_add.push((*target_path).clone());
412                        target_iter.next();
413                    }
414                    std::cmp::Ordering::Greater => {
415                        files_to_remove.push((*current_path).clone());
416                        current_iter.next();
417                    }
418                    std::cmp::Ordering::Equal => {
419                        if target_file.hash != current_file.hash
420                            || target_file.is_symlink != current_file.is_symlink
421                        {
422                            files_to_change.push((*target_path).clone());
423                        } else {
424                            files_unchanged += 1;
425                        }
426                        target_iter.next();
427                        current_iter.next();
428                    }
429                }
430            }
431            (Some((target_path, _)), None) => {
432                files_to_add.push((*target_path).clone());
433                target_iter.next();
434            }
435            (None, Some((current_path, _))) => {
436                files_to_remove.push((*current_path).clone());
437                current_iter.next();
438            }
439            (None, None) => break,
440        }
441    }
442
443    RestoreDiff {
444        files_to_add,
445        files_to_change,
446        files_to_remove,
447        files_unchanged,
448    }
449}
450
451/// Restore workspace to a snapshot state.
452///
453/// This is the main restore function that:
454/// 1. Resolves the snapshot ID ("latest" or prefix match)
455/// 2. Loads the snapshot and reconstructs the target file state from the tree
456/// 3. Compares target state vs current workspace state
457/// 4. Either reports what would change (dry_run) or performs the actual restore
458pub fn restore(
459    workspace_root: &Path,
460    snapshot_id: &str,
461    options: RestoreOptions,
462) -> Result<RestoreResult> {
463    // 1. Compute project_id, create StoreLayout
464    let project_id = project_id_from_path(workspace_root);
465    let layout = StoreLayout::new(&project_id);
466    layout.ensure_dirs()?;
467
468    // 2. Acquire project lock
469    let _lock = ProjectLock::acquire(&layout.locks_dir())?;
470    let catalog = MetadataCatalog::open(layout.catalog_path())?;
471
472    // 3. Resolve snapshot ID
473    let resolved_snapshot = catalog.resolve_snapshot_ref(snapshot_id)?;
474    let resolved_id = resolved_snapshot.id.clone();
475
476    // 4. Load snapshot's tree to get target state (path -> blob_hash_hex)
477    let manifest = catalog.snapshot_manifest(&resolved_id)?;
478    let target_state = if resolved_snapshot.stats.total_files == 0 {
479        BTreeMap::new()
480    } else if manifest.is_empty() {
481        let tree_store = TreeStore::new(layout.trees_dir());
482        let root_tree_hash = resolved_snapshot.root_tree_hash.ok_or_else(|| {
483            ChkpttError::StoreCorrupted(format!(
484                "snapshot '{}' is missing both manifest entries and root_tree_hash",
485                resolved_id
486            ))
487        })?;
488        let root_tree_hash_hex = bytes_to_hex(&root_tree_hash);
489        let mut state = BTreeMap::new();
490        collect_tree_files(&tree_store, &root_tree_hash_hex, "", &mut state)?;
491        state
492    } else {
493        target_state_from_manifest(&manifest)
494    };
495    let target_includes_deps = target_state
496        .keys()
497        .any(|path| path_contains_dependency_dir(path));
498
499    // 5. Scan current workspace to get current state (path -> content_hash_hex)
500    let mut index = FileIndex::open(layout.index_path())?;
501    let cached_entries = index.entries();
502    let current_state = scan_current_state(workspace_root, &cached_entries, target_includes_deps)?;
503    emit(
504        &options.progress,
505        ProgressEvent::ScanCurrentComplete {
506            file_count: current_state.len() as u64,
507        },
508    );
509
510    // 6. Compare target state vs current state
511    let diff = diff_restore_states(&target_state, &current_state);
512    let files_to_add = diff.files_to_add;
513    let files_to_change = diff.files_to_change;
514    let files_to_remove = diff.files_to_remove;
515    let files_unchanged = diff.files_unchanged;
516
517    let result = RestoreResult {
518        snapshot_id: resolved_id.clone(),
519        files_added: files_to_add.len() as u64,
520        files_changed: files_to_change.len() as u64,
521        files_removed: files_to_remove.len() as u64,
522        files_unchanged,
523    };
524
525    // 7. If dry_run, return result without modifying workspace
526    if options.dry_run {
527        return Ok(result);
528    }
529
530    // 8. Perform actual restore
531    let packs_dir = layout.packs_dir();
532    let (pack_set, restore_sources) = resolve_restore_sources(
533        &files_to_add,
534        &files_to_change,
535        &target_state,
536        &catalog,
537        &packs_dir,
538    )?;
539
540    let restore_total = (files_to_add.len() + files_to_change.len() + files_to_remove.len()) as u64;
541    emit(
542        &options.progress,
543        ProgressEvent::RestoreStart {
544            add: files_to_add.len() as u64,
545            change: files_to_change.len() as u64,
546            remove: files_to_remove.len() as u64,
547        },
548    );
549
550    // 8a. Restore files that need to be added or changed (parallel)
551    let restore_tasks = build_restore_tasks(
552        &files_to_add,
553        &files_to_change,
554        &target_state,
555        &restore_sources,
556    )?;
557    let restore_progress = AtomicU64::new(0);
558    let restored_paths = restore_files(
559        workspace_root,
560        &restore_tasks,
561        &pack_set,
562        &options.progress,
563        &restore_progress,
564        restore_total,
565    )?;
566
567    // 8b. Remove files that are not in the target snapshot (parallel)
568    {
569        let remove_worker_count = std::thread::available_parallelism()
570            .map(|n| n.get())
571            .unwrap_or(1)
572            .min(files_to_remove.len().max(1));
573        if remove_worker_count <= 1 {
574            for path in &files_to_remove {
575                let file_path = workspace_root.join(path);
576                match std::fs::remove_file(&file_path) {
577                    Ok(()) => {}
578                    Err(error) if error.kind() == std::io::ErrorKind::NotFound => {}
579                    Err(error) => return Err(error.into()),
580                }
581                let completed = restore_progress.fetch_add(1, Ordering::Relaxed) + 1;
582                emit(
583                    &options.progress,
584                    ProgressEvent::RestoreFile {
585                        completed,
586                        total: restore_total,
587                    },
588                );
589            }
590        } else {
591            let chunk_size = files_to_remove.len().div_ceil(remove_worker_count);
592            let progress_ref = &restore_progress;
593            let progress_cb_ref = &options.progress;
594            std::thread::scope(|scope| -> Result<()> {
595                let workers: Vec<_> = files_to_remove
596                    .chunks(chunk_size)
597                    .map(|chunk| {
598                        scope.spawn(move || -> Result<()> {
599                            for path in chunk {
600                                let file_path = workspace_root.join(path);
601                                match std::fs::remove_file(&file_path) {
602                                    Ok(()) => {}
603                                    Err(error) if error.kind() == std::io::ErrorKind::NotFound => {}
604                                    Err(error) => return Err(error.into()),
605                                }
606                                let completed = progress_ref.fetch_add(1, Ordering::Relaxed) + 1;
607                                emit(
608                                    progress_cb_ref,
609                                    ProgressEvent::RestoreFile {
610                                        completed,
611                                        total: restore_total,
612                                    },
613                                );
614                            }
615                            Ok(())
616                        })
617                    })
618                    .collect();
619
620                for worker in workers {
621                    worker.join().map_err(|_| {
622                        ChkpttError::Other("file removal worker thread panicked".into())
623                    })??;
624                }
625                Ok(())
626            })?;
627        }
628    }
629
630    // 8c. Clean up empty directories affected by removed files only.
631    cleanup_removed_file_parents(workspace_root, &files_to_remove)?;
632
633    let file_entries = restored_index_entries(workspace_root, &restored_paths, &target_state)?;
634    index.apply_changes(&files_to_remove, &file_entries)?;
635
636    Ok(result)
637}
638
639fn path_contains_dependency_dir(relative_path: &str) -> bool {
640    relative_path.split('/').any(|component| {
641        matches!(
642            component,
643            "node_modules"
644                | ".venv"
645                | "venv"
646                | "__pypackages__"
647                | ".tox"
648                | ".nox"
649                | ".gradle"
650                | ".m2"
651        )
652    })
653}
654
655fn mode_is_symlink(mode: u32) -> bool {
656    (mode & 0o170000) == 0o120000
657}
658
659fn restored_index_entries(
660    workspace_root: &Path,
661    restored_paths: &[String],
662    target_state: &BTreeMap<String, TargetFileState>,
663) -> Result<Vec<crate::index::FileEntry>> {
664    let mut file_entries = Vec::with_capacity(restored_paths.len());
665    for path in restored_paths {
666        let absolute_path = workspace_root.join(path);
667        let metadata = std::fs::symlink_metadata(&absolute_path)?;
668        let target = target_state.get(path).ok_or_else(|| {
669            ChkpttError::RestoreFailed(format!("Missing target hash for {}", path))
670        })?;
671        let scanned = scanned_file_from_metadata(path.clone(), absolute_path, &metadata);
672
673        file_entries.push(crate::index::FileEntry {
674            path: scanned.relative_path,
675            blob_hash: target.hash,
676            size: scanned.size,
677            mtime_secs: scanned.mtime_secs,
678            mtime_nanos: scanned.mtime_nanos,
679            inode: scanned.inode,
680            mode: scanned.mode,
681        });
682    }
683    Ok(file_entries)
684}
685
686fn cached_hash_bytes(
687    file: &ScannedFile,
688    cached_entries: &HashMap<String, crate::index::FileEntry>,
689) -> Option<[u8; 16]> {
690    let cached = cached_entries.get(&file.relative_path)?;
691    if cached.mtime_secs == file.mtime_secs
692        && cached.mtime_nanos == file.mtime_nanos
693        && cached.size == file.size
694        && cached.inode == file.inode
695        && cached.mode == file.mode
696    {
697        Some(cached.blob_hash)
698    } else {
699        None
700    }
701}
702
703fn hash_scanned_files(scanned_files: Vec<ScannedFile>) -> Result<Vec<(ScannedFile, [u8; 16])>> {
704    if scanned_files.is_empty() {
705        return Ok(Vec::new());
706    }
707    let mut scanned_files = scanned_files;
708    sort_scanned_for_locality(&mut scanned_files);
709
710    let worker_count = std::thread::available_parallelism()
711        .map(|count| count.get())
712        .unwrap_or(1)
713        .min(scanned_files.len());
714    if worker_count <= 1 {
715        return scanned_files
716            .into_iter()
717            .map(|file| {
718                Ok((
719                    file.clone(),
720                    hash_path_bytes(&file.absolute_path, file.is_symlink)?,
721                ))
722            })
723            .collect();
724    }
725
726    let chunk_size = scanned_files.len().div_ceil(worker_count);
727    std::thread::scope(|scope| {
728        let mut workers = Vec::with_capacity(scanned_files.len().div_ceil(chunk_size));
729        for chunk in scanned_files.chunks(chunk_size) {
730            workers.push(
731                scope.spawn(move || -> Result<Vec<(ScannedFile, [u8; 16])>> {
732                    chunk
733                        .iter()
734                        .map(|file| {
735                            Ok((
736                                file.clone(),
737                                hash_path_bytes(&file.absolute_path, file.is_symlink)?,
738                            ))
739                        })
740                        .collect()
741                }),
742            );
743        }
744
745        let mut hashed = Vec::with_capacity(scanned_files.len());
746        for worker in workers {
747            let chunk = worker
748                .join()
749                .map_err(|_| ChkpttError::Other("restore worker thread panicked".into()))??;
750            hashed.extend(chunk);
751        }
752        Ok(hashed)
753    })
754}
755
756#[cfg(unix)]
757fn scanned_file_from_metadata(
758    relative_path: String,
759    absolute_path: std::path::PathBuf,
760    metadata: &std::fs::Metadata,
761) -> ScannedFile {
762    use std::os::unix::fs::MetadataExt;
763
764    ScannedFile {
765        relative_path,
766        absolute_path,
767        size: metadata.len(),
768        mtime_secs: metadata.mtime(),
769        mtime_nanos: metadata.mtime_nsec(),
770        device: Some(metadata.dev()),
771        inode: Some(metadata.ino()),
772        mode: metadata.mode(),
773        is_symlink: metadata.file_type().is_symlink(),
774    }
775}
776
777#[cfg(not(unix))]
778fn scanned_file_from_metadata(
779    relative_path: String,
780    absolute_path: std::path::PathBuf,
781    metadata: &std::fs::Metadata,
782) -> ScannedFile {
783    use std::time::UNIX_EPOCH;
784
785    let (mtime_secs, mtime_nanos) = metadata
786        .modified()
787        .ok()
788        .and_then(|time| time.duration_since(UNIX_EPOCH).ok())
789        .map(|duration| (duration.as_secs() as i64, duration.subsec_nanos() as i64))
790        .unwrap_or((0, 0));
791
792    let is_symlink = metadata.file_type().is_symlink();
793    ScannedFile {
794        relative_path,
795        absolute_path,
796        size: metadata.len(),
797        mtime_secs,
798        mtime_nanos,
799        device: None,
800        inode: None,
801        mode: if is_symlink { 0o120000 } else { 0o644 },
802        is_symlink,
803    }
804}
805
806fn cleanup_removed_file_parents(root: &Path, removed_paths: &[String]) -> Result<()> {
807    if removed_paths.is_empty() {
808        return Ok(());
809    }
810
811    // Collect unique parent directory relative paths with pre-computed depth
812    let mut dir_depths: HashMap<String, usize> = HashMap::new();
813    for removed_path in removed_paths {
814        let mut path_str = removed_path.as_str();
815        while let Some(pos) = path_str.rfind('/') {
816            path_str = &path_str[..pos];
817            let depth = path_str.matches('/').count() + 1;
818            dir_depths.entry(path_str.to_string()).or_insert(depth);
819        }
820    }
821
822    // Sort deepest-first so we remove leaf directories before parents
823    let mut candidates: Vec<(String, usize)> = dir_depths.into_iter().collect();
824    candidates.sort_unstable_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
825
826    for (relative_dir, _depth) in candidates {
827        let dir = root.join(&relative_dir);
828        match std::fs::remove_dir(&dir) {
829            Ok(()) => {}
830            Err(error) if error.kind() == std::io::ErrorKind::NotFound => {}
831            Err(error) if error.kind() == std::io::ErrorKind::DirectoryNotEmpty => {}
832            Err(error) => return Err(error.into()),
833        }
834    }
835
836    Ok(())
837}
838
839#[cfg(test)]
840mod tests {
841    use super::*;
842    use tempfile::TempDir;
843
844    #[test]
845    fn test_cleanup_removed_file_parents_removes_only_empty_ancestor_chain() {
846        let dir = TempDir::new().unwrap();
847        let root = dir.path();
848
849        let empty_leaf = root.join("a/b/c");
850        std::fs::create_dir_all(&empty_leaf).unwrap();
851        std::fs::write(empty_leaf.join("gone.txt"), b"gone").unwrap();
852        std::fs::remove_file(empty_leaf.join("gone.txt")).unwrap();
853
854        let non_empty_leaf = root.join("a/keep");
855        std::fs::create_dir_all(&non_empty_leaf).unwrap();
856        std::fs::write(non_empty_leaf.join("keep.txt"), b"keep").unwrap();
857
858        cleanup_removed_file_parents(root, &[String::from("a/b/c/gone.txt")]).unwrap();
859
860        assert!(!root.join("a/b/c").exists());
861        assert!(!root.join("a/b").exists());
862        assert!(root.join("a").exists());
863        assert!(root.join("a/keep/keep.txt").exists());
864    }
865
866    #[test]
867    fn test_cleanup_removed_file_parents_skips_non_empty_directories() {
868        let dir = TempDir::new().unwrap();
869        let root = dir.path();
870
871        let shared = root.join("shared");
872        std::fs::create_dir_all(&shared).unwrap();
873        std::fs::write(shared.join("still-here.txt"), b"keep").unwrap();
874
875        cleanup_removed_file_parents(root, &[String::from("shared/gone.txt")]).unwrap();
876
877        assert!(root.join("shared").exists());
878        assert!(root.join("shared/still-here.txt").exists());
879    }
880
881    #[test]
882    fn test_diff_restore_states_classifies_paths() {
883        let target_state = BTreeMap::from([
884            (
885                "a.txt".to_string(),
886                TargetFileState {
887                    hash: hash_bytes("hash-a"),
888                    is_symlink: false,
889                },
890            ),
891            (
892                "b.txt".to_string(),
893                TargetFileState {
894                    hash: hash_bytes("hash-b-target"),
895                    is_symlink: true,
896                },
897            ),
898            (
899                "c.txt".to_string(),
900                TargetFileState {
901                    hash: hash_bytes("hash-c"),
902                    is_symlink: false,
903                },
904            ),
905        ]);
906        let current_state = BTreeMap::from([
907            (
908                "b.txt".to_string(),
909                CurrentFileState {
910                    hash: hash_bytes("hash-b-current"),
911                    is_symlink: false,
912                },
913            ),
914            (
915                "c.txt".to_string(),
916                CurrentFileState {
917                    hash: hash_bytes("hash-c"),
918                    is_symlink: false,
919                },
920            ),
921            (
922                "d.txt".to_string(),
923                CurrentFileState {
924                    hash: hash_bytes("hash-d"),
925                    is_symlink: false,
926                },
927            ),
928        ]);
929
930        let diff = diff_restore_states(&target_state, &current_state);
931        assert_eq!(diff.files_to_add, vec!["a.txt".to_string()]);
932        assert_eq!(diff.files_to_change, vec!["b.txt".to_string()]);
933        assert_eq!(diff.files_to_remove, vec!["d.txt".to_string()]);
934        assert_eq!(diff.files_unchanged, 1);
935    }
936
937    #[test]
938    fn test_diff_restore_states_handles_empty_inputs() {
939        let target_state: BTreeMap<String, TargetFileState> = BTreeMap::new();
940        let current_state: BTreeMap<String, CurrentFileState> = BTreeMap::new();
941        let diff = diff_restore_states(&target_state, &current_state);
942
943        assert!(diff.files_to_add.is_empty());
944        assert!(diff.files_to_change.is_empty());
945        assert!(diff.files_to_remove.is_empty());
946        assert_eq!(diff.files_unchanged, 0);
947    }
948
949    #[test]
950    fn test_diff_restore_states_detects_type_changes() {
951        let target_state = BTreeMap::from([(
952            "link".to_string(),
953            TargetFileState {
954                hash: hash_bytes("same-hash"),
955                is_symlink: true,
956            },
957        )]);
958        let current_state = BTreeMap::from([(
959            "link".to_string(),
960            CurrentFileState {
961                hash: hash_bytes("same-hash"),
962                is_symlink: false,
963            },
964        )]);
965
966        let diff = diff_restore_states(&target_state, &current_state);
967        assert_eq!(diff.files_to_change, vec!["link".to_string()]);
968    }
969
970    fn hash_bytes(label: &str) -> [u8; 16] {
971        xxhash_rust::xxh3::xxh3_128(label.as_bytes()).to_le_bytes()
972    }
973}