Skip to main content

void_core/workspace/
checkout.rs

1//! Checkout module - restore files from a commit to the working tree
2//!
3//! Provides functions for checking out files from encrypted commits:
4//! - `checkout_tree`: Restore entire tree from a commit
5//! - `checkout_paths`: Restore specific paths from a commit
6//!
7//! Uses parallel file restoration via rayon for performance.
8
9use std::collections::{HashMap, HashSet};
10use std::fs::{self, File};
11use std::io::Write;
12use std::path::{Path, PathBuf};
13use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
14use std::sync::Arc;
15use std::time::SystemTime;
16
17use camino::Utf8PathBuf;
18use rayon::prelude::*;
19
20use void_crypto::WrappedKey;
21
22use crate::crypto::{CommitReader, ContentKey, KeyVault, SecretKey};
23use crate::staged;
24use crate::index::{
25    entry_matches_file, read_index, write_workspace_index, IndexEntry, WorkspaceIndex,
26};
27use crate::metadata::ManifestEntry;
28use crate::metadata::manifest_tree::TreeManifest;
29use crate::pathspec::Pathspec;
30
31use crate::store::ObjectStoreExt;
32use crate::support::events::{emit_workspace, VoidObserver, WorkspaceEvent};
33use crate::{cid, ContentHash, Result, VoidError};
34
35/// Options for checkout operations
36#[derive(Clone)]
37pub struct CheckoutOptions {
38    /// Specific paths to checkout (None = full tree)
39    pub paths: Option<Vec<String>>,
40    /// Overwrite modified files without prompting
41    pub force: bool,
42    /// Optional observer for progress events.
43    pub observer: Option<Arc<dyn VoidObserver>>,
44    /// Per-workspace state directory for index operations.
45    /// When `None`, defaults to `workspace.join(".void")` (main workspace).
46    pub workspace_dir: Option<PathBuf>,
47}
48
49impl Default for CheckoutOptions {
50    fn default() -> Self {
51        Self {
52            paths: None,
53            force: false,
54            observer: None,
55            workspace_dir: None,
56        }
57    }
58}
59
60/// Statistics from checkout operation
61#[derive(Clone, Debug, Default)]
62pub struct CheckoutStats {
63    /// Number of files restored to the workspace
64    pub files_restored: usize,
65    /// Total bytes written to disk
66    pub bytes_written: u64,
67    /// Number of files skipped (already up-to-date)
68    pub files_skipped: usize,
69    /// Number of shards read
70    pub shards_read: usize,
71}
72
73/// A file to restore during checkout.
74///
75/// Each entry maps a file path to its manifest entry and shard info.
76#[derive(Clone, Debug)]
77pub struct FileToRestore {
78    /// The manifest entry with path, content_hash, offset, length, shard_index
79    pub entry: ManifestEntry,
80    /// Shard CID (from manifest.shards)
81    pub shard_cid: void_crypto::ShardCid,
82    /// Wrapped shard key (if shard uses per-shard encryption)
83    pub wrapped_key: Option<WrappedKey>,
84}
85
86/// Helper struct to hold loaded commit/manifest info
87struct CommitInfo {
88    manifest: TreeManifest,
89    /// All files in this commit with their content hashes (path -> content_hash)
90    all_files: HashMap<String, ContentHash>,
91    /// Commit reader for shard decryption
92    reader: CommitReader,
93    /// Ancestor content keys for shard decryption fallback
94    ancestor_keys: Vec<ContentKey>,
95}
96
97struct CheckoutPlan {
98    files: Vec<FileToRestore>,
99    index: Option<WorkspaceIndex>,
100    pathspec: Pathspec,
101}
102
103/// Loads commit and manifest, returning deserialized info.
104fn load_commit_info<S: ObjectStoreExt>(
105    store: &S,
106    vault: &KeyVault,
107    commit_cid: &crate::VoidCid,
108) -> Result<CommitInfo> {
109    let commit_encrypted: void_crypto::EncryptedCommit = store.get_blob(commit_cid)?;
110    let (commit_bytes, reader) = CommitReader::open_with_vault(vault, &commit_encrypted)?;
111    let commit = commit_bytes.parse()?;
112
113    let manifest = TreeManifest::from_commit(store, &commit, &reader)?
114        .ok_or_else(|| VoidError::IntegrityError {
115            expected: "manifest_cid present on commit".into(),
116            actual: "None".into(),
117        })?;
118
119    // Collect all files map from manifest
120    let mut all_files = HashMap::new();
121    for entry_result in manifest.iter() {
122        let entry = entry_result?;
123        all_files.insert(entry.path.clone(), entry.content_hash);
124    }
125
126    let ancestor_keys = crate::crypto::collect_ancestor_content_keys_vault(vault, store, &commit);
127
128    Ok(CommitInfo {
129        manifest,
130        all_files,
131        reader,
132        ancestor_keys,
133    })
134}
135
136/// Determines which files need to be restored and groups them by shard.
137fn plan_checkout(
138    vault: &KeyVault,
139    commit_info: &CommitInfo,
140    workspace: &Path,
141    options: &CheckoutOptions,
142) -> Result<CheckoutPlan> {
143    // Build pathspec matcher if paths are specified
144    let pathspec = match &options.paths {
145        Some(paths) => {
146            let path_refs: Vec<&str> = paths.iter().map(|s| s.as_str()).collect();
147            Pathspec::new(&path_refs)?
148        }
149        None => Pathspec::new(&[])?, // matches all
150    };
151
152    // Try to load existing index for dirty detection
153    let void_dir = options.workspace_dir.clone().unwrap_or_else(|| workspace.join(".void"));
154    let existing_index = if void_dir.exists() {
155        read_index(&void_dir, vault.index_key()?).ok()
156    } else {
157        None
158    };
159    let base_path = Utf8PathBuf::try_from(workspace.to_path_buf())
160        .map_err(|e| VoidError::Io(std::io::Error::new(std::io::ErrorKind::InvalidData, e)))?;
161
162    let shards = commit_info.manifest.shards();
163    let mut files_to_restore = Vec::new();
164
165    for entry_result in commit_info.manifest.iter() {
166        let entry = entry_result?;
167
168        // Check if path matches filter
169        if !pathspec.matches(&entry.path) {
170            emit_workspace(
171                &options.observer,
172                WorkspaceEvent::FileSkipped {
173                    path: entry.path.clone(),
174                    reason: "does not match pathspec".to_string(),
175                },
176            );
177            continue;
178        }
179
180        // Check if file is dirty (modified in workspace)
181        if !options.force {
182            if let Some(ref index) = existing_index {
183                if let Some(idx_entry) = index.get(&entry.path) {
184                    let file_path_on_disk = crate::util::safe_join(workspace, &entry.path)?;
185                    if file_path_on_disk.exists() {
186                        let matches = entry_matches_file(idx_entry, &base_path).unwrap_or(false);
187                        if !matches {
188                            return Err(VoidError::Shard(format!(
189                                "file '{}' has local modifications; use --force to overwrite",
190                                entry.path
191                            )));
192                        }
193                    }
194                }
195            }
196        }
197
198        // Get shard info from manifest
199        let shard_ref = shards.get(entry.shard_index as usize)
200            .ok_or_else(|| VoidError::Shard(format!(
201                "shard_index {} out of range for file '{}'", entry.shard_index, entry.path
202            )))?;
203
204        files_to_restore.push(FileToRestore {
205            entry,
206            shard_cid: shard_ref.cid.clone(),
207            wrapped_key: shard_ref.wrapped_key.clone(),
208        });
209    }
210
211    Ok(CheckoutPlan {
212        files: files_to_restore,
213        index: existing_index,
214        pathspec,
215    })
216}
217
218fn prune_extra_files(
219    workspace: &Path,
220    pathspec: &Pathspec,
221    target_set: &HashSet<String>,
222    existing_index: Option<&WorkspaceIndex>,
223    force: bool,
224) -> Result<Vec<String>> {
225    let Some(index) = existing_index else {
226        return Ok(Vec::new());
227    };
228
229    let base_path = Utf8PathBuf::try_from(workspace.to_path_buf())
230        .map_err(|e| VoidError::Io(std::io::Error::new(std::io::ErrorKind::InvalidData, e)))?;
231
232    let mut removed = Vec::new();
233
234    for entry in &index.entries {
235        if !pathspec.matches(&entry.path) {
236            continue;
237        }
238        if target_set.contains(&entry.path) {
239            continue;
240        }
241
242        let file_path = crate::util::safe_join(workspace, &entry.path)?;
243        if file_path.exists() {
244            if !force {
245                let matches = entry_matches_file(entry, &base_path).unwrap_or(false);
246                if !matches {
247                    return Err(VoidError::Shard(format!(
248                        "file '{}' has local modifications; use --force to overwrite",
249                        entry.path
250                    )));
251                }
252            }
253            fs::remove_file(&file_path)?;
254        }
255
256        removed.push(entry.path.clone());
257    }
258
259    Ok(removed)
260}
261
262/// Checkout entire tree from a commit.
263pub fn checkout_tree<S: ObjectStoreExt + Sync>(
264    store: &S,
265    vault: &KeyVault,
266    commit_cid: &crate::VoidCid,
267    workspace: &Path,
268    options: &CheckoutOptions,
269) -> Result<CheckoutStats> {
270    let commit_info = load_commit_info(store, vault, commit_cid)?;
271    let plan = plan_checkout(vault, &commit_info, workspace, options)?;
272    let target_set: HashSet<String> = commit_info
273        .all_files
274        .keys()
275        .filter(|path| plan.pathspec.matches(path))
276        .cloned()
277        .collect();
278
279    let removed_paths = prune_extra_files(
280        workspace,
281        &plan.pathspec,
282        &target_set,
283        plan.index.as_ref(),
284        options.force,
285    )?;
286
287    let void_dir = options.workspace_dir.clone().unwrap_or_else(|| workspace.join(".void"));
288
289    let (stats, restored_entries) =
290        restore_files(store, &commit_info.reader, vault.staged_key()?, &commit_info.ancestor_keys, workspace, &plan.files, &options.observer, Some(&void_dir))?;
291    if void_dir.exists() {
292        let index = if options.paths.is_some() {
293            let mut index = plan.index.unwrap_or_else(WorkspaceIndex::empty);
294            if !removed_paths.is_empty() {
295                let removed: HashSet<String> = removed_paths.into_iter().collect();
296                index.entries.retain(|entry| !removed.contains(&entry.path));
297            }
298            for entry in restored_entries {
299                index.upsert_entry(entry);
300            }
301            index
302        } else {
303            // Full checkout: build complete index from ALL commit files
304            let mut all_entries = restored_entries;
305            // Clone paths into owned HashSet to avoid borrow conflict
306            let restored_paths: HashSet<String> = all_entries.iter().map(|e| e.path.clone()).collect();
307
308            // Add entries for files that weren't restored (already existed with matching content)
309            for (path, content_hash) in &commit_info.all_files {
310                if !restored_paths.contains(path.as_str()) {
311                    // File was skipped, create index entry from disk metadata
312                    let file_path = crate::util::safe_join(workspace, path)?;
313                    if file_path.exists() {
314                        if let Ok(metadata) = std::fs::metadata(&file_path) {
315                            let (mtime_secs, mtime_nanos) = metadata
316                                .modified()
317                                .ok()
318                                .and_then(|mtime| mtime.duration_since(SystemTime::UNIX_EPOCH).ok())
319                                .map(|dur| (dur.as_secs(), dur.subsec_nanos()))
320                                .unwrap_or((0, 0));
321
322                            let entry = IndexEntry::new(
323                                path.clone(),
324                                *content_hash,
325                                mtime_secs,
326                                mtime_nanos,
327                                metadata.len(),
328                            );
329                            all_entries.push(entry);
330
331                            // Write staged blob for skipped files too
332                            if !staged::has_staged_blob(&void_dir, content_hash) {
333                                let content = fs::read(&file_path)?;
334                                staged::write_staged_blob(&void_dir, vault.staged_key()?, content_hash, &content)?;
335                            }
336                        }
337                    }
338                }
339            }
340
341            WorkspaceIndex::new(Some(void_crypto::CommitCid::from_bytes(commit_cid.to_bytes())), all_entries)
342        };
343
344        write_workspace_index(&void_dir, vault.index_key()?, &index)?;
345    }
346
347    Ok(stats)
348}
349
350/// Checkout specific paths from a commit.
351pub fn checkout_paths<S: ObjectStoreExt + Sync>(
352    store: &S,
353    vault: &KeyVault,
354    commit_cid: &crate::VoidCid,
355    workspace: &Path,
356    paths: &[String],
357) -> Result<CheckoutStats> {
358    let options = CheckoutOptions {
359        paths: Some(paths.to_vec()),
360        force: true, // Path-based checkout always overwrites
361        observer: None,
362        workspace_dir: None,
363    };
364
365    checkout_tree(store, vault, commit_cid, workspace, &options)
366}
367
368/// Restores files to the workspace using parallel processing.
369///
370/// Uses manifest entries for file offset/length within shards (ShardBody::read_file).
371/// When `staged_target` is provided, also writes staged blobs for each file
372/// so that subsequent commits can find them via `seal_index()`.
373pub fn restore_files<S: ObjectStoreExt + Sync>(
374    store: &S,
375    reader: &CommitReader,
376    staged_key: &SecretKey,
377    ancestor_keys: &[ContentKey],
378    workspace: &Path,
379    files: &[FileToRestore],
380    observer: &Option<Arc<dyn VoidObserver>>,
381    staged_target: Option<&Path>,
382) -> Result<(CheckoutStats, Vec<IndexEntry>)> {
383    if files.is_empty() {
384        return Ok((CheckoutStats::default(), Vec::new()));
385    }
386
387    let total_files = files.len() as u64;
388
389    // Emit initial progress
390    emit_workspace(
391        observer,
392        WorkspaceEvent::Progress {
393            stage: "checkout".to_string(),
394            current: 0,
395            total: total_files,
396        },
397    );
398
399    // Group files by shard_index to minimize shard reads
400    let unique_shards: HashSet<u32> = files.iter().map(|f| f.entry.shard_index).collect();
401
402    // Atomic counters for parallel stats collection
403    let files_restored = AtomicUsize::new(0);
404    let bytes_written = AtomicU64::new(0);
405    let shards_read = AtomicUsize::new(unique_shards.len());
406
407    // Collect index entries for updating
408    let index_entries = std::sync::Mutex::new(Vec::new());
409
410    // Process files in parallel, grouped by shard
411    let results: Result<Vec<()>> = unique_shards
412        .par_iter()
413        .map(|shard_index| {
414            // Find all files in this shard
415            let shard_files: Vec<_> = files.iter().filter(|f| f.entry.shard_index == *shard_index).collect();
416
417            if shard_files.is_empty() {
418                return Ok(());
419            }
420
421            // Get shard CID from first file (all files in same shard have same CID)
422            let shard_cid = cid::from_bytes(shard_files[0].shard_cid.as_bytes())?;
423
424            // Fetch, decrypt, and decompress shard
425            let shard_encrypted: void_crypto::EncryptedShard = store.get_blob(&shard_cid)?;
426            let shard_bytes = reader.decrypt_shard(&shard_encrypted, shard_files[0].wrapped_key.as_ref(), ancestor_keys)?;
427            let body = shard_bytes.decompress()?;
428
429            // Restore each file from this shard using manifest offsets
430            for file_info in shard_files {
431                let content = body.read_file(&file_info.entry)?;
432                let file_path = crate::util::safe_join(workspace, &file_info.entry.path)?;
433
434                // Create parent directories
435                if let Some(parent) = file_path.parent() {
436                    fs::create_dir_all(parent)?;
437                }
438
439                // Write file
440                let mut file = File::create(&file_path)?;
441                file.write_all(&content)?;
442
443                let content_len = content.len();
444
445                // Create index entry
446                let content_hash = ContentHash::digest(&content);
447
448                // Write staged blob so seal_index() can find it during commit
449                if let Some(target) = staged_target {
450                    staged::write_staged_blob(target, staged_key, &content_hash, &content)?;
451                }
452
453                let metadata = fs::metadata(&file_path)?;
454                let (mtime_secs, mtime_nanos) = metadata
455                    .modified()
456                    .ok()
457                    .and_then(|mtime| mtime.duration_since(SystemTime::UNIX_EPOCH).ok())
458                    .map(|dur| (dur.as_secs(), dur.subsec_nanos()))
459                    .unwrap_or((0, 0));
460
461                let entry = IndexEntry::new(
462                    file_info.entry.path.clone(),
463                    content_hash,
464                    mtime_secs,
465                    mtime_nanos,
466                    content_len as u64,
467                );
468
469                index_entries
470                    .lock()
471                    .map_err(|_| VoidError::Shard("index lock poisoned".into()))?
472                    .push(entry);
473
474                let restored_count = files_restored.fetch_add(1, Ordering::Relaxed) + 1;
475                bytes_written.fetch_add(content_len as u64, Ordering::Relaxed);
476
477                // Emit file checked out event
478                emit_workspace(
479                    observer,
480                    WorkspaceEvent::FileCheckedOut {
481                        path: file_info.entry.path.clone(),
482                    },
483                );
484
485                // Emit progress event
486                emit_workspace(
487                    observer,
488                    WorkspaceEvent::Progress {
489                        stage: "checkout".to_string(),
490                        current: restored_count as u64,
491                        total: total_files,
492                    },
493                );
494            }
495
496            Ok(())
497        })
498        .collect();
499
500    results?;
501
502    let entries = index_entries
503        .into_inner()
504        .map_err(|_| VoidError::Shard("failed to get index entries".into()))?;
505
506    Ok((
507        CheckoutStats {
508            files_restored: files_restored.load(Ordering::Relaxed),
509            bytes_written: bytes_written.load(Ordering::Relaxed),
510            files_skipped: 0,
511            shards_read: shards_read.load(Ordering::Relaxed),
512        },
513        entries,
514    ))
515}