Skip to main content

void_core/workspace/
checkout.rs

1//! Checkout module - restore files from a commit to the working tree
2//!
3//! Provides functions for checking out files from encrypted commits:
4//! - `checkout_tree`: Restore entire tree from a commit
5//! - `checkout_paths`: Restore specific paths from a commit
6//!
7//! Uses parallel file restoration via rayon for performance.
8
9use std::collections::{HashMap, HashSet};
10use std::fs::{self, File};
11use std::io::{BufWriter, Write};
12use std::path::{Path, PathBuf};
13use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
14use std::sync::Arc;
15use std::time::SystemTime;
16
17use camino::Utf8PathBuf;
18use rayon::prelude::*;
19
20use void_crypto::WrappedKey;
21
22use crate::crypto::{CommitReader, ContentKey, KeyVault, SecretKey};
23use crate::staged;
24use crate::index::{
25    entry_matches_file, read_index, write_workspace_index, IndexEntry, WorkspaceIndex,
26};
27use crate::metadata::{ManifestEntry, ShardReference};
28use crate::metadata::manifest_tree::TreeManifest;
29use crate::pathspec::Pathspec;
30
31use crate::store::ObjectStoreExt;
32use crate::support::events::{emit_workspace, VoidObserver, WorkspaceEvent};
33use crate::{cid, ContentHash, Result, VoidError};
34
35/// Options for checkout operations
36#[derive(Clone)]
37pub struct CheckoutOptions {
38    /// Specific paths to checkout (None = full tree)
39    pub paths: Option<Vec<String>>,
40    /// Overwrite modified files without prompting
41    pub force: bool,
42    /// Optional observer for progress events.
43    pub observer: Option<Arc<dyn VoidObserver>>,
44    /// Per-workspace state directory for index operations.
45    /// When `None`, defaults to `workspace.join(".void")` (main workspace).
46    pub workspace_dir: Option<PathBuf>,
47    /// Include large (chunked) files in full-tree checkout.
48    /// When false (default), full-tree checkout skips files with shard_count > 1.
49    /// Path-specific checkout always includes large files regardless.
50    pub include_large: bool,
51}
52
53impl Default for CheckoutOptions {
54    fn default() -> Self {
55        Self {
56            paths: None,
57            force: false,
58            observer: None,
59            workspace_dir: None,
60            include_large: false,
61        }
62    }
63}
64
65/// Statistics from checkout operation
66#[derive(Clone, Debug, Default)]
67pub struct CheckoutStats {
68    /// Number of files restored to the workspace
69    pub files_restored: usize,
70    /// Total bytes written to disk
71    pub bytes_written: u64,
72    /// Number of files skipped (already up-to-date)
73    pub files_skipped: usize,
74    /// Number of shards read
75    pub shards_read: usize,
76    /// Number of large files deferred (not checked out)
77    pub files_deferred: usize,
78}
79
80/// A file to restore during checkout.
81///
82/// Each entry maps a file path to its manifest entry and shard info.
83#[derive(Clone, Debug)]
84pub struct FileToRestore {
85    /// The manifest entry with path, content_hash, offset, length, shard_index
86    pub entry: ManifestEntry,
87    /// Shard CID (from manifest.shards)
88    pub shard_cid: void_crypto::ShardCid,
89    /// Wrapped shard key (if shard uses per-shard encryption)
90    pub wrapped_key: Option<WrappedKey>,
91}
92
93/// Helper struct to hold loaded commit/manifest info
94struct CommitInfo {
95    manifest: TreeManifest,
96    /// All files in this commit with their content hashes (path -> content_hash)
97    all_files: HashMap<String, ContentHash>,
98    /// Commit reader for shard decryption
99    reader: CommitReader,
100    /// Ancestor content keys for shard decryption fallback
101    ancestor_keys: Vec<ContentKey>,
102}
103
104struct CheckoutPlan {
105    files: Vec<FileToRestore>,
106    /// Non-materialized index entries for deferred large files.
107    deferred_entries: Vec<IndexEntry>,
108    index: Option<WorkspaceIndex>,
109    pathspec: Pathspec,
110    /// Number of large files skipped during full-tree checkout.
111    files_deferred: usize,
112}
113
114/// Loads commit and manifest, returning deserialized info.
115fn load_commit_info<S: ObjectStoreExt>(
116    store: &S,
117    vault: &KeyVault,
118    commit_cid: &crate::VoidCid,
119) -> Result<CommitInfo> {
120    let commit_encrypted: void_crypto::EncryptedCommit = store.get_blob(commit_cid)?;
121    let (commit_bytes, reader) = CommitReader::open_with_vault(vault, &commit_encrypted)?;
122    let commit = commit_bytes.parse()?;
123
124    let manifest = TreeManifest::from_commit(store, &commit, &reader)?
125        .ok_or_else(|| VoidError::IntegrityError {
126            expected: "manifest_cid present on commit".into(),
127            actual: "None".into(),
128        })?;
129
130    // Collect all files map from manifest
131    let mut all_files = HashMap::new();
132    for entry_result in manifest.iter() {
133        let entry = entry_result?;
134        all_files.insert(entry.path.clone(), entry.content_hash);
135    }
136
137    let ancestor_keys = crate::crypto::collect_ancestor_content_keys_vault(vault, store, &commit);
138
139    Ok(CommitInfo {
140        manifest,
141        all_files,
142        reader,
143        ancestor_keys,
144    })
145}
146
147/// Determines which files need to be restored and groups them by shard.
148fn plan_checkout(
149    vault: &KeyVault,
150    commit_info: &CommitInfo,
151    workspace: &Path,
152    options: &CheckoutOptions,
153) -> Result<CheckoutPlan> {
154    // Build pathspec matcher if paths are specified
155    let pathspec = match &options.paths {
156        Some(paths) => {
157            let path_refs: Vec<&str> = paths.iter().map(|s| s.as_str()).collect();
158            Pathspec::new(&path_refs)?
159        }
160        None => Pathspec::new(&[])?, // matches all
161    };
162
163    // Try to load existing index for dirty detection
164    let void_dir = options.workspace_dir.clone().unwrap_or_else(|| workspace.join(".void"));
165    let existing_index = if void_dir.exists() {
166        read_index(&void_dir, vault.index_key()?).ok()
167    } else {
168        None
169    };
170    let base_path = Utf8PathBuf::try_from(workspace.to_path_buf())
171        .map_err(|e| VoidError::Io(std::io::Error::new(std::io::ErrorKind::InvalidData, e)))?;
172
173    let shards = commit_info.manifest.shards();
174    let mut files_to_restore = Vec::new();
175    let mut deferred_entries = Vec::new();
176    let mut files_deferred = 0usize;
177    let is_path_specific = options.paths.is_some();
178
179    for entry_result in commit_info.manifest.iter() {
180        let entry = entry_result?;
181
182        // Check if path matches filter
183        if !pathspec.matches(&entry.path) {
184            emit_workspace(
185                &options.observer,
186                WorkspaceEvent::FileSkipped {
187                    path: entry.path.clone(),
188                    reason: "does not match pathspec".to_string(),
189                },
190            );
191            continue;
192        }
193
194        // Check if file is dirty (modified in workspace)
195        if !options.force {
196            if let Some(ref index) = existing_index {
197                if let Some(idx_entry) = index.get(&entry.path) {
198                    let file_path_on_disk = crate::util::safe_join(workspace, &entry.path)?;
199                    if file_path_on_disk.exists() {
200                        let matches = entry_matches_file(idx_entry, &base_path).unwrap_or(false);
201                        if !matches {
202                            return Err(VoidError::Shard(format!(
203                                "file '{}' has local modifications; use --force to overwrite",
204                                entry.path
205                            )));
206                        }
207                    }
208                }
209            }
210        }
211
212        // Skip large (chunked) files during full-tree checkout unless explicitly requested
213        if entry.shard_count > 1 && !is_path_specific && !options.include_large {
214            files_deferred += 1;
215            deferred_entries.push(IndexEntry::new_remote(
216                entry.path.clone(),
217                entry.content_hash,
218                entry.size,
219            ));
220            emit_workspace(
221                &options.observer,
222                WorkspaceEvent::FileSkipped {
223                    path: entry.path.clone(),
224                    reason: "large file (use --include-large or checkout by path)".to_string(),
225                },
226            );
227            continue;
228        }
229
230        // Get shard info from manifest
231        let shard_ref = shards.get(entry.shard_index as usize)
232            .ok_or_else(|| VoidError::Shard(format!(
233                "shard_index {} out of range for file '{}'", entry.shard_index, entry.path
234            )))?;
235
236        files_to_restore.push(FileToRestore {
237            entry,
238            shard_cid: shard_ref.cid.clone(),
239            wrapped_key: shard_ref.wrapped_key.clone(),
240        });
241    }
242
243    Ok(CheckoutPlan {
244        files: files_to_restore,
245        deferred_entries,
246        index: existing_index,
247        pathspec,
248        files_deferred,
249    })
250}
251
252fn prune_extra_files(
253    workspace: &Path,
254    pathspec: &Pathspec,
255    target_set: &HashSet<String>,
256    existing_index: Option<&WorkspaceIndex>,
257    force: bool,
258) -> Result<Vec<String>> {
259    let Some(index) = existing_index else {
260        return Ok(Vec::new());
261    };
262
263    let base_path = Utf8PathBuf::try_from(workspace.to_path_buf())
264        .map_err(|e| VoidError::Io(std::io::Error::new(std::io::ErrorKind::InvalidData, e)))?;
265
266    let mut removed = Vec::new();
267
268    for entry in &index.entries {
269        if !pathspec.matches(&entry.path) {
270            continue;
271        }
272        if target_set.contains(&entry.path) {
273            continue;
274        }
275
276        let file_path = crate::util::safe_join(workspace, &entry.path)?;
277        if file_path.exists() {
278            if !force {
279                let matches = entry_matches_file(entry, &base_path).unwrap_or(false);
280                if !matches {
281                    return Err(VoidError::Shard(format!(
282                        "file '{}' has local modifications; use --force to overwrite",
283                        entry.path
284                    )));
285                }
286            }
287            fs::remove_file(&file_path)?;
288        }
289
290        removed.push(entry.path.clone());
291    }
292
293    Ok(removed)
294}
295
296/// Checkout entire tree from a commit.
297pub fn checkout_tree<S: ObjectStoreExt + Sync>(
298    store: &S,
299    vault: &KeyVault,
300    commit_cid: &crate::VoidCid,
301    workspace: &Path,
302    options: &CheckoutOptions,
303) -> Result<CheckoutStats> {
304    let commit_info = load_commit_info(store, vault, commit_cid)?;
305    let CheckoutPlan { files: plan_files, deferred_entries, index: plan_index, pathspec: plan_pathspec, files_deferred } =
306        plan_checkout(vault, &commit_info, workspace, options)?;
307    let target_set: HashSet<String> = commit_info
308        .all_files
309        .keys()
310        .filter(|path| plan_pathspec.matches(path))
311        .cloned()
312        .collect();
313
314    let removed_paths = prune_extra_files(
315        workspace,
316        &plan_pathspec,
317        &target_set,
318        plan_index.as_ref(),
319        options.force,
320    )?;
321
322    let void_dir = options.workspace_dir.clone().unwrap_or_else(|| workspace.join(".void"));
323
324    // Separate small files from chunked files
325    let (small_files, chunked_files): (Vec<FileToRestore>, Vec<FileToRestore>) = plan_files
326        .into_iter()
327        .partition(|f| f.entry.shard_count <= 1);
328
329    let (mut stats, mut restored_entries) =
330        restore_files(store, &commit_info.reader, vault.staged_key()?, &commit_info.ancestor_keys, workspace, &small_files, &options.observer, Some(&void_dir))?;
331
332    // Restore chunked files sequentially (one shard in memory at a time)
333    let manifest_shards = commit_info.manifest.shards();
334    for file_to_restore in &chunked_files {
335        let (bytes, index_entry) = restore_chunked_file(
336            store,
337            &commit_info.reader,
338            &commit_info.ancestor_keys,
339            workspace,
340            &file_to_restore.entry,
341            manifest_shards,
342        )?;
343        stats.files_restored += 1;
344        stats.bytes_written += bytes;
345        stats.shards_read += file_to_restore.entry.shard_count as usize;
346        restored_entries.push(index_entry);
347
348        emit_workspace(
349            &options.observer,
350            WorkspaceEvent::FileCheckedOut {
351                path: file_to_restore.entry.path.clone(),
352            },
353        );
354    }
355    if void_dir.exists() {
356        let index = if options.paths.is_some() {
357            let mut index = plan_index.unwrap_or_else(WorkspaceIndex::empty);
358            if !removed_paths.is_empty() {
359                let removed: HashSet<String> = removed_paths.into_iter().collect();
360                index.entries.retain(|entry| !removed.contains(&entry.path));
361            }
362            for entry in restored_entries {
363                index.upsert_entry(entry);
364            }
365            index
366        } else {
367            // Full checkout: build complete index from ALL commit files
368            let mut all_entries = restored_entries;
369            // Clone paths into owned HashSet to avoid borrow conflict
370            let restored_paths: HashSet<String> = all_entries.iter().map(|e| e.path.clone()).collect();
371
372            // Add entries for files that weren't restored (already existed with matching content)
373            for (path, content_hash) in &commit_info.all_files {
374                if !restored_paths.contains(path.as_str()) {
375                    // File was skipped, create index entry from disk metadata
376                    let file_path = crate::util::safe_join(workspace, path)?;
377                    if file_path.exists() {
378                        if let Ok(metadata) = std::fs::metadata(&file_path) {
379                            let (mtime_secs, mtime_nanos) = metadata
380                                .modified()
381                                .ok()
382                                .and_then(|mtime| mtime.duration_since(SystemTime::UNIX_EPOCH).ok())
383                                .map(|dur| (dur.as_secs(), dur.subsec_nanos()))
384                                .unwrap_or((0, 0));
385
386                            let entry = IndexEntry::new(
387                                path.clone(),
388                                *content_hash,
389                                mtime_secs,
390                                mtime_nanos,
391                                metadata.len(),
392                            );
393                            all_entries.push(entry);
394
395                            // Write staged blob for skipped files too
396                            if !staged::has_staged_blob(&void_dir, content_hash) {
397                                let content = fs::read(&file_path)?;
398                                staged::write_staged_blob(&void_dir, vault.staged_key()?, content_hash, &content)?;
399                            }
400                        }
401                    }
402                }
403            }
404
405            // Add non-materialized entries for deferred large files
406            all_entries.extend(deferred_entries);
407
408            WorkspaceIndex::new(Some(void_crypto::CommitCid::from_bytes(commit_cid.to_bytes())), all_entries)
409        };
410
411        write_workspace_index(&void_dir, vault.index_key()?, &index)?;
412    }
413
414    let mut stats = stats;
415    stats.files_deferred = files_deferred;
416    Ok(stats)
417}
418
419/// Checkout specific paths from a commit.
420pub fn checkout_paths<S: ObjectStoreExt + Sync>(
421    store: &S,
422    vault: &KeyVault,
423    commit_cid: &crate::VoidCid,
424    workspace: &Path,
425    paths: &[String],
426) -> Result<CheckoutStats> {
427    let options = CheckoutOptions {
428        paths: Some(paths.to_vec()),
429        force: true, // Path-based checkout always overwrites
430        observer: None,
431        workspace_dir: None,
432        include_large: false,
433    };
434
435    checkout_tree(store, vault, commit_cid, workspace, &options)
436}
437
438/// Restores files to the workspace using parallel processing.
439///
440/// Uses manifest entries for file offset/length within shards (ShardBody::read_file).
441/// When `staged_target` is provided, also writes staged blobs for each file
442/// so that subsequent commits can find them via `seal_index()`.
443pub fn restore_files<S: ObjectStoreExt + Sync>(
444    store: &S,
445    reader: &CommitReader,
446    staged_key: &SecretKey,
447    ancestor_keys: &[ContentKey],
448    workspace: &Path,
449    files: &[FileToRestore],
450    observer: &Option<Arc<dyn VoidObserver>>,
451    staged_target: Option<&Path>,
452) -> Result<(CheckoutStats, Vec<IndexEntry>)> {
453    if files.is_empty() {
454        return Ok((CheckoutStats::default(), Vec::new()));
455    }
456
457    let total_files = files.len() as u64;
458
459    // Emit initial progress
460    emit_workspace(
461        observer,
462        WorkspaceEvent::Progress {
463            stage: "checkout".to_string(),
464            current: 0,
465            total: total_files,
466        },
467    );
468
469    // Group files by shard_index to minimize shard reads
470    let unique_shards: HashSet<u32> = files.iter().map(|f| f.entry.shard_index).collect();
471
472    // Atomic counters for parallel stats collection
473    let files_restored = AtomicUsize::new(0);
474    let bytes_written = AtomicU64::new(0);
475    let shards_read = AtomicUsize::new(unique_shards.len());
476
477    // Collect index entries for updating
478    let index_entries = std::sync::Mutex::new(Vec::new());
479
480    // Process files in parallel, grouped by shard
481    let results: Result<Vec<()>> = unique_shards
482        .par_iter()
483        .map(|shard_index| {
484            // Find all files in this shard
485            let shard_files: Vec<_> = files.iter().filter(|f| f.entry.shard_index == *shard_index).collect();
486
487            if shard_files.is_empty() {
488                return Ok(());
489            }
490
491            // Get shard CID from first file (all files in same shard have same CID)
492            let shard_cid = cid::from_bytes(shard_files[0].shard_cid.as_bytes())?;
493
494            // Fetch, decrypt, and decompress shard
495            let shard_encrypted: void_crypto::EncryptedShard = store.get_blob(&shard_cid)?;
496            let shard_bytes = reader.decrypt_shard(&shard_encrypted, shard_files[0].wrapped_key.as_ref(), ancestor_keys)?;
497            let body = shard_bytes.decompress()?;
498
499            // Restore each file from this shard using manifest offsets
500            for file_info in shard_files {
501                let content = body.read_file(&file_info.entry)?;
502                let file_path = crate::util::safe_join(workspace, &file_info.entry.path)?;
503
504                // Create parent directories
505                if let Some(parent) = file_path.parent() {
506                    fs::create_dir_all(parent)?;
507                }
508
509                // Write file
510                let mut file = File::create(&file_path)?;
511                file.write_all(&content)?;
512
513                let content_len = content.len();
514
515                // Create index entry
516                let content_hash = ContentHash::digest(&content);
517
518                // Write staged blob so seal_index() can find it during commit
519                if let Some(target) = staged_target {
520                    staged::write_staged_blob(target, staged_key, &content_hash, &content)?;
521                }
522
523                let metadata = fs::metadata(&file_path)?;
524                let (mtime_secs, mtime_nanos) = metadata
525                    .modified()
526                    .ok()
527                    .and_then(|mtime| mtime.duration_since(SystemTime::UNIX_EPOCH).ok())
528                    .map(|dur| (dur.as_secs(), dur.subsec_nanos()))
529                    .unwrap_or((0, 0));
530
531                let entry = IndexEntry::new(
532                    file_info.entry.path.clone(),
533                    content_hash,
534                    mtime_secs,
535                    mtime_nanos,
536                    content_len as u64,
537                );
538
539                index_entries
540                    .lock()
541                    .map_err(|_| VoidError::Shard("index lock poisoned".into()))?
542                    .push(entry);
543
544                let restored_count = files_restored.fetch_add(1, Ordering::Relaxed) + 1;
545                bytes_written.fetch_add(content_len as u64, Ordering::Relaxed);
546
547                // Emit file checked out event
548                emit_workspace(
549                    observer,
550                    WorkspaceEvent::FileCheckedOut {
551                        path: file_info.entry.path.clone(),
552                    },
553                );
554
555                // Emit progress event
556                emit_workspace(
557                    observer,
558                    WorkspaceEvent::Progress {
559                        stage: "checkout".to_string(),
560                        current: restored_count as u64,
561                        total: total_files,
562                    },
563                );
564            }
565
566            Ok(())
567        })
568        .collect();
569
570    results?;
571
572    let entries = index_entries
573        .into_inner()
574        .map_err(|_| VoidError::Shard("failed to get index entries".into()))?;
575
576    Ok((
577        CheckoutStats {
578            files_restored: files_restored.load(Ordering::Relaxed),
579            bytes_written: bytes_written.load(Ordering::Relaxed),
580            files_skipped: 0,
581            shards_read: shards_read.load(Ordering::Relaxed),
582            files_deferred: 0,
583        },
584        entries,
585    ))
586}
587
588/// Restore a single chunked file by fetching its shards sequentially.
589///
590/// Each chunk shard is fetched, decrypted, decompressed, and written to disk
591/// in order. Only one shard is held in memory at a time. The content hash is
592/// verified incrementally after all chunks are written.
593fn restore_chunked_file<S: ObjectStoreExt>(
594    store: &S,
595    reader: &CommitReader,
596    ancestor_keys: &[ContentKey],
597    workspace: &Path,
598    entry: &ManifestEntry,
599    shards: &[ShardReference],
600) -> Result<(u64, IndexEntry)> {
601    use sha2::{Digest, Sha256};
602
603    let file_path = crate::util::safe_join(workspace, &entry.path)?;
604    if let Some(parent) = file_path.parent() {
605        fs::create_dir_all(parent)?;
606    }
607
608    let output = File::create(&file_path)?;
609    let mut writer = BufWriter::new(output);
610    let mut hasher = Sha256::new();
611    let mut total_written = 0u64;
612
613    let start = entry.shard_index as usize;
614    let end = start + entry.shard_count as usize;
615
616    for shard_idx in start..end {
617        let shard_ref = shards.get(shard_idx).ok_or_else(|| {
618            VoidError::Shard(format!(
619                "chunk shard index {} out of range for '{}'",
620                shard_idx, entry.path
621            ))
622        })?;
623
624        let shard_cid = cid::from_bytes(shard_ref.cid.as_bytes())?;
625        let encrypted: void_crypto::EncryptedShard = store.get_blob(&shard_cid)?;
626        let decrypted = reader.decrypt_shard(&encrypted, shard_ref.wrapped_key.as_ref(), ancestor_keys)?;
627        let body = decrypted.decompress()?;
628
629        // Each chunk shard's decompressed body IS the chunk content.
630        // Use as_bytes() directly instead of read_file() because
631        // entry.length is the full file size, not the chunk size.
632        let chunk = body.as_bytes();
633        writer.write_all(&chunk)?;
634        hasher.update(&chunk);
635        total_written += chunk.len() as u64;
636    }
637
638    writer.flush()?;
639
640    // Verify content hash
641    let computed_hash = ContentHash::from_bytes(hasher.finalize().into());
642    if computed_hash != entry.content_hash {
643        return Err(VoidError::IntegrityError {
644            expected: entry.content_hash.to_hex(),
645            actual: computed_hash.to_hex(),
646        });
647    }
648
649    // Build index entry from disk metadata
650    let metadata = fs::metadata(&file_path)?;
651    let (mtime_secs, mtime_nanos) = metadata
652        .modified()
653        .ok()
654        .and_then(|mtime| mtime.duration_since(SystemTime::UNIX_EPOCH).ok())
655        .map(|dur| (dur.as_secs(), dur.subsec_nanos()))
656        .unwrap_or((0, 0));
657
658    let index_entry = IndexEntry::new(
659        entry.path.clone(),
660        entry.content_hash,
661        mtime_secs,
662        mtime_nanos,
663        total_written,
664    );
665
666    Ok((total_written, index_entry))
667}