Skip to main content

repo/
repository_materialization.rs

1// SPDX-License-Identifier: Apache-2.0
2//! Tree materialization helpers.
3
4use std::{
5    collections::BTreeSet,
6    fs,
7    num::NonZeroUsize,
8    path::{Path, PathBuf},
9    sync::atomic::{AtomicBool, Ordering},
10    thread,
11    time::Instant,
12};
13
14use objects::{
15    fs_atomic::enrich_fs_error,
16    object::{ChangeId, ContentHash, EntryType, Tree},
17};
18use tracing::{debug, instrument};
19
20use super::{HeddleError, Repository, Result, repository_worktree_apply::is_directory_not_empty};
21use crate::{
22    worktree_index::IndexEntry,
23    worktree_walk::{build_cached_entry, cache_key},
24};
25
26/// State threaded through a single `materialize_write_ops_seeded` call.
27/// Tracks whether filesystem-level reflinks (CoW clones) are viable on
28/// this destination filesystem, so we don't pay the per-blob
29/// `clonefile`/`FICLONE` retry tax once we've seen
30/// `EXDEV`/`EOPNOTSUPP`/`ENOSYS` from one of them. Reflink and copy
31/// counts are emitted at the end for observability.
32///
33/// SAFETY/CORRECTNESS NOTE on isolated blobs:
34///   We materialize blobs via filesystem-level copy-on-write
35///   ("reflink") where supported (`clonefile(2)` on macOS APFS,
36///   `ioctl(FICLONE)` on Linux btrfs/XFS-with-reflinks/ZFS), and via
37///   `fs::copy` everywhere else. **Both paths give the destination
38///   its own inode.** A worktree file is never an alias of the
39///   canonical loose blob nor of any other worktree's file — so an
40///   agent that runs `chmod +w file && echo new > file` only mutates
41///   *that* worktree's bytes. The OS handles the divergence: with a
42///   reflink the kernel forks the underlying allocation on first
43///   write; with a real copy the dest is a separate file from the
44///   start. Either way, no shared-inode hazard exists.
45///
46///   This replaces an earlier hardlink-plus-`chmod 0o444` defense
47///   that turned out to be trivially bypassable. The hardlink made
48///   the worktree file an alias of the canonical loose blob; the
49///   read-only mode was a soft hint that any agent could (and did)
50///   undo with `chmod 644`. The new model is filesystem-level and
51///   not bypassable from userspace.
52struct MaterializationContext {
53    reflink_supported: AtomicBool,
54    reflink_count: std::sync::atomic::AtomicUsize,
55    copy_count: std::sync::atomic::AtomicUsize,
56}
57
58impl MaterializationContext {
59    fn new() -> Self {
60        Self {
61            // Optimistic: try reflink on the first blob; a single
62            // `EXDEV`/`EOPNOTSUPP` flips this for the rest of the batch.
63            reflink_supported: AtomicBool::new(true),
64            reflink_count: std::sync::atomic::AtomicUsize::new(0),
65            copy_count: std::sync::atomic::AtomicUsize::new(0),
66        }
67    }
68
69    fn reflinks_enabled(&self) -> bool {
70        self.reflink_supported.load(Ordering::Relaxed)
71    }
72
73    fn record_reflink(&self) {
74        self.reflink_count.fetch_add(1, Ordering::Relaxed);
75    }
76
77    fn record_copy(&self) {
78        self.copy_count.fetch_add(1, Ordering::Relaxed);
79    }
80
81    /// Disable reflink attempts for the rest of this materialization
82    /// after the kernel told us the filesystem won't ever clone.
83    fn disable_reflinks(&self) {
84        self.reflink_supported.store(false, Ordering::Relaxed);
85    }
86}
87
88const MATERIALIZE_PARALLEL_THRESHOLD: usize = 32;
89const MATERIALIZE_THREADS_ENV: &str = "HEDDLE_MATERIALIZE_THREADS";
90
91struct MaterializationPlan {
92    directories: Vec<PathBuf>,
93    directory_contexts: Vec<MaterializedDirectoryContext>,
94    leaves: Vec<WorktreeWriteOp>,
95    file_count: usize,
96    symlink_count: usize,
97}
98
99#[derive(Debug)]
100pub(crate) struct MaterializedTree {
101    pub(crate) file_entries: Vec<SeededWorktreeEntry>,
102    pub(crate) directory_contexts: Vec<MaterializedDirectoryContext>,
103}
104
105#[derive(Debug)]
106pub(crate) struct SeededWorktreeEntry {
107    pub(crate) key: String,
108    pub(crate) entry: IndexEntry,
109}
110
111#[derive(Debug)]
112pub(crate) struct MaterializedDirectoryContext {
113    pub(crate) key: String,
114    pub(crate) path: PathBuf,
115    pub(crate) child_names: Vec<String>,
116    pub(crate) tree_hash: ContentHash,
117}
118
119#[derive(Clone, Debug)]
120pub(crate) enum WorktreeWriteOp {
121    Blob {
122        path: PathBuf,
123        hash: ContentHash,
124        executable: bool,
125    },
126    Symlink {
127        path: PathBuf,
128        hash: ContentHash,
129    },
130}
131
132impl WorktreeWriteOp {
133    pub(crate) fn path(&self) -> &Path {
134        match self {
135            Self::Blob { path, .. } | Self::Symlink { path, .. } => path,
136        }
137    }
138
139    pub(crate) fn hash(&self) -> ContentHash {
140        match self {
141            Self::Blob { hash, .. } | Self::Symlink { hash, .. } => *hash,
142        }
143    }
144
145    pub(crate) fn executable(&self) -> bool {
146        match self {
147            Self::Blob { executable, .. } => *executable,
148            Self::Symlink { .. } => false,
149        }
150    }
151
152    pub(crate) fn index_kind(&self) -> crate::worktree_index::IndexEntryKind {
153        match self {
154            Self::Blob { .. } => crate::worktree_index::IndexEntryKind::File,
155            Self::Symlink { .. } => crate::worktree_index::IndexEntryKind::Symlink,
156        }
157    }
158}
159
160/// Result of `Repository::warm_canonical_store_for_state(s)`.
161///
162/// The reflink-first materializer can only clone from a canonical
163/// loose-uncompressed file. After `pack_objects + prune_loose_objects`
164/// (the steady state for any non-fresh repo) every blob is pack-only
165/// and `loose_blob_path` returns `None`. The warm pass walks a
166/// state's tree(s) and promotes every reachable blob in advance so
167/// the next N materializations of that state across N worktrees all
168/// hit the fast path.
169///
170/// This is the proactive twin of the lazy promotion that already
171/// fires inside `materialize_blob`. Lazy is correct on its own; warm
172/// is a latency optimization for the "I'm about to materialize this
173/// state to N worktrees" case (e.g. `heddle delegate`).
174#[derive(Debug, Default, Clone, Copy)]
175pub struct WarmCanonicalStoreStats {
176    /// Blobs we wrote to the canonical loose-uncompressed path
177    /// because they were either pack-only or compressed-loose.
178    pub promoted: usize,
179    /// Blobs that were already loose+uncompressed; no work done.
180    pub already_loose: usize,
181    /// Blobs we tried to promote but `promote_to_loose_uncompressed`
182    /// returned an error (e.g. the blob isn't in the store, or a
183    /// transient I/O failure during the atomic write). Kept
184    /// non-fatal: the lazy path will retry on materialize, and a
185    /// real corruption shows up there with a louder error.
186    pub errors: usize,
187}
188
189impl WarmCanonicalStoreStats {
190    /// Total blobs visited.
191    pub fn total(&self) -> usize {
192        self.promoted + self.already_loose + self.errors
193    }
194}
195
196impl Repository {
197    /// Promote every reachable blob from `state_id`'s tree(s) into
198    /// the canonical loose-uncompressed store, so a subsequent
199    /// `materialize_tree` (or N parallel materializations) can
200    /// reflink from the canonical store without paying the
201    /// decompress-on-first-clone tax.
202    ///
203    /// Returns counts of work done. Errors per blob are accumulated
204    /// rather than bubbled up so a single corrupt or missing object
205    /// doesn't poison the whole warm pass — the lazy path inside
206    /// `materialize_blob` will surface that loudly when it actually
207    /// matters.
208    #[instrument(skip(self), fields(state_id = %state_id))]
209    pub fn warm_canonical_store_for_state(
210        &self,
211        state_id: &ChangeId,
212    ) -> Result<WarmCanonicalStoreStats> {
213        self.warm_canonical_store_for_states(std::slice::from_ref(state_id))
214    }
215
216    /// Multi-state variant. Walks each state's tree once, dedupes
217    /// the union of reachable blob hashes across all of them, and
218    /// promotes them. Useful when materializing several sibling
219    /// states from the same parent in quick succession (the
220    /// `heddle delegate`-style flow).
221    #[instrument(skip(self, state_ids), fields(state_count = state_ids.len()))]
222    pub fn warm_canonical_store_for_states(
223        &self,
224        state_ids: &[ChangeId],
225    ) -> Result<WarmCanonicalStoreStats> {
226        let mut blob_hashes = BTreeSet::new();
227        for state_id in state_ids {
228            let state = self
229                .store
230                .get_state(state_id)?
231                .ok_or_else(|| HeddleError::NotFound(format!("state {} not in store", state_id)))?;
232            let tree = self.store.get_tree(&state.tree)?.ok_or_else(|| {
233                HeddleError::NotFound(format!("tree {} (for state {})", state.tree, state_id))
234            })?;
235            self.collect_blob_hashes(&tree, &mut blob_hashes)?;
236        }
237
238        let mut stats = WarmCanonicalStoreStats::default();
239        for hash in &blob_hashes {
240            match self.store.promote_to_loose_uncompressed(hash) {
241                Ok(true) => stats.promoted += 1,
242                Ok(false) => stats.already_loose += 1,
243                Err(err) => {
244                    debug!(
245                        ?err,
246                        hash = %hash,
247                        "promote_to_loose_uncompressed failed during warm pass"
248                    );
249                    stats.errors += 1;
250                }
251            }
252        }
253
254        debug!(
255            promoted = stats.promoted,
256            already_loose = stats.already_loose,
257            errors = stats.errors,
258            "Warm canonical store pass complete"
259        );
260
261        Ok(stats)
262    }
263
264    fn collect_blob_hashes(&self, tree: &Tree, out: &mut BTreeSet<ContentHash>) -> Result<()> {
265        for entry in tree.entries() {
266            // Symlink targets are stored as blobs too — they're
267            // small, so promotion cost is negligible, and a stored
268            // symlink is materialized via `get_blob` (not hardlink),
269            // so promoting them is technically wasted work. But
270            // skipping symlinks would mean walking the tree with
271            // the same defensive `is_symlink` guard we use in
272            // `plan_materialization`, and the cost of warming a few
273            // tiny symlink-target blobs is dwarfed by the
274            // decompress cost of even one real source file. Keep
275            // it simple: promote everything reachable.
276            match entry.entry_type {
277                EntryType::Blob | EntryType::Symlink => {
278                    out.insert(entry.hash);
279                }
280                EntryType::Tree => {
281                    let subtree = self
282                        .store
283                        .get_tree(&entry.hash)?
284                        .ok_or_else(|| HeddleError::NotFound(format!("tree {}", entry.hash)))?;
285                    self.collect_blob_hashes(&subtree, out)?;
286                }
287            }
288        }
289        Ok(())
290    }
291
292    /// Materialize a tree to the filesystem.
293    #[instrument(skip(self, tree), fields(dir = %dir.display(), entries = tree.len()))]
294    pub fn materialize_tree(&self, tree: &Tree, dir: &Path) -> Result<()> {
295        self.materialize_tree_seeded(tree, dir).map(|_| ())
296    }
297
298    pub(crate) fn materialize_tree_seeded(
299        &self,
300        tree: &Tree,
301        dir: &Path,
302    ) -> Result<MaterializedTree> {
303        let plan_start = Instant::now();
304        let mut plan = MaterializationPlan {
305            directories: Vec::new(),
306            directory_contexts: Vec::new(),
307            leaves: Vec::new(),
308            file_count: 0,
309            symlink_count: 0,
310        };
311        self.plan_materialization(tree, Path::new(""), dir, &mut plan)?;
312        let plan_duration_ms = plan_start.elapsed().as_millis();
313
314        let execution_start = Instant::now();
315        let requested_threads = requested_materialization_threads();
316        fs::create_dir_all(dir)
317            .map_err(|e| HeddleError::Io(enrich_fs_error(dir, "creating", e)))?;
318        for directory in &plan.directories {
319            fs::create_dir_all(directory)
320                .map_err(|e| HeddleError::Io(enrich_fs_error(directory, "creating", e)))?;
321        }
322
323        let (worker_count, file_entries) = self.materialize_write_ops_seeded(&plan.leaves)?;
324
325        debug!(
326            directories = plan.directories.len(),
327            files = plan.file_count,
328            symlinks = plan.symlink_count,
329            workers = worker_count,
330            requested_workers = requested_threads.map(NonZeroUsize::get),
331            plan_duration_ms,
332            execution_duration_ms = execution_start.elapsed().as_millis(),
333            parallel = worker_count > 1,
334            "Tree materialization complete"
335        );
336
337        Ok(MaterializedTree {
338            file_entries,
339            directory_contexts: plan.directory_contexts,
340        })
341    }
342
343    fn plan_materialization(
344        &self,
345        tree: &Tree,
346        rel_dir: &Path,
347        dir: &Path,
348        plan: &mut MaterializationPlan,
349    ) -> Result<()> {
350        plan.directory_contexts.push(MaterializedDirectoryContext {
351            key: cache_key(rel_dir),
352            path: dir.to_path_buf(),
353            child_names: tree
354                .entries()
355                .iter()
356                .map(|entry| entry.name.clone())
357                .collect(),
358            tree_hash: tree.hash(),
359        });
360
361        for entry in tree.entries() {
362            let path = dir.join(&entry.name);
363            let rel_path = rel_dir.join(&entry.name);
364            // Defensive routing: a tree entry whose `mode` is Symlink should
365            // be materialized as a real symlink even if its `entry_type`
366            // says Blob. Pre-Phase-E imports stored symlinks as
367            // `(EntryType::Blob, FileMode::Symlink)` and the resulting
368            // worktree wrote the symlink target as plain file content.
369            // This guard makes those legacy trees materialize correctly
370            // on `goto` without requiring a re-import.
371            let is_symlink = entry.entry_type == EntryType::Symlink
372                || entry.mode == objects::object::FileMode::Symlink;
373            if is_symlink {
374                plan.symlink_count += 1;
375                plan.leaves.push(WorktreeWriteOp::Symlink {
376                    path,
377                    hash: entry.hash,
378                });
379                continue;
380            }
381            match entry.entry_type {
382                EntryType::Blob => {
383                    plan.file_count += 1;
384                    plan.leaves.push(WorktreeWriteOp::Blob {
385                        path,
386                        hash: entry.hash,
387                        executable: entry.is_executable(),
388                    });
389                }
390                EntryType::Tree => {
391                    let subtree = self
392                        .store
393                        .get_tree(&entry.hash)?
394                        .ok_or_else(|| HeddleError::NotFound(format!("tree {}", entry.hash)))?;
395                    plan.directories.push(path.clone());
396                    self.plan_materialization(&subtree, &rel_path, &path, plan)?;
397                }
398                EntryType::Symlink => {
399                    // Already handled above; left here for exhaustiveness.
400                    unreachable!(
401                        "EntryType::Symlink should have been routed by the is_symlink guard"
402                    );
403                }
404            }
405        }
406
407        Ok(())
408    }
409
410    pub(crate) fn materialize_write_ops(&self, writes: &[WorktreeWriteOp]) -> Result<usize> {
411        self.materialize_write_ops_seeded(writes)
412            .map(|(worker_count, _)| worker_count)
413    }
414
415    pub(crate) fn materialize_write_ops_seeded(
416        &self,
417        writes: &[WorktreeWriteOp],
418    ) -> Result<(usize, Vec<SeededWorktreeEntry>)> {
419        prepare_parent_directories(writes)?;
420
421        let requested_threads = requested_materialization_threads();
422        let worker_count = materialization_worker_count(writes.len(), requested_threads);
423
424        // No probe — the per-blob path tries `clonefile`/FICLONE
425        // first and flips a batch-wide flag on the first
426        // `EXDEV`/`EOPNOTSUPP`/`ENOSYS` verdict, so the rest of the
427        // batch falls straight through to `fs::copy` without paying
428        // the syscall tax. The cost of one failed reflink call on a
429        // non-CoW filesystem is one syscall; it's not worth a
430        // dedicated probe.
431        let context = MaterializationContext::new();
432
433        let result = if worker_count <= 1 {
434            let mut seeded = Vec::with_capacity(writes.len());
435            for write in writes {
436                seeded.push(self.materialize_write_op(write, &context)?);
437            }
438            Ok((worker_count, seeded))
439        } else {
440            let chunk_size = writes.len().div_ceil(worker_count);
441            let seeded = thread::scope(|scope| -> Result<Vec<SeededWorktreeEntry>> {
442                let mut workers = Vec::new();
443                let context = &context;
444                for chunk in writes.chunks(chunk_size) {
445                    workers.push(scope.spawn(move || -> Result<Vec<SeededWorktreeEntry>> {
446                        let mut seeded = Vec::with_capacity(chunk.len());
447                        for write in chunk {
448                            seeded.push(self.materialize_write_op(write, context)?);
449                        }
450                        Ok(seeded)
451                    }));
452                }
453
454                let mut seeded = Vec::with_capacity(writes.len());
455                for worker in workers {
456                    seeded.extend(worker.join().map_err(|_| {
457                        HeddleError::Config("materialization worker panicked".to_string())
458                    })??);
459                }
460
461                Ok(seeded)
462            })?;
463
464            Ok((worker_count, seeded))
465        };
466
467        let reflinks = context.reflink_count.load(Ordering::Relaxed);
468        let copies = context.copy_count.load(Ordering::Relaxed);
469        if reflinks + copies > 0 {
470            debug!(
471                reflinks,
472                copies,
473                reflinks_enabled = context.reflinks_enabled(),
474                "Materialized blobs"
475            );
476        }
477
478        result
479    }
480
481    fn materialize_write_op(
482        &self,
483        write: &WorktreeWriteOp,
484        context: &MaterializationContext,
485    ) -> Result<SeededWorktreeEntry> {
486        match write {
487            WorktreeWriteOp::Blob {
488                path,
489                hash,
490                executable,
491            } => {
492                self.materialize_blob(path, hash, *executable, context)?;
493            }
494            WorktreeWriteOp::Symlink { path, hash } => {
495                let blob = self
496                    .store
497                    .get_blob(hash)?
498                    .ok_or_else(|| HeddleError::NotFound(format!("blob {}", hash)))?;
499                #[cfg(unix)]
500                {
501                    let target = std::str::from_utf8(blob.content()).map_err(|_| {
502                        HeddleError::InvalidObject("invalid symlink target".to_string())
503                    })?;
504                    remove_materialized_leaf(path)?;
505                    std::os::unix::fs::symlink(target, path)?;
506                }
507                #[cfg(not(unix))]
508                let _ = blob;
509            }
510        }
511
512        let metadata = fs::symlink_metadata(write.path())?;
513        let entry = build_cached_entry(
514            write.hash(),
515            &metadata,
516            write.executable(),
517            write.index_kind(),
518        )
519        .ok_or_else(|| {
520            HeddleError::Config(format!(
521                "seed materialized worktree entry for {}",
522                write.path().display()
523            ))
524        })?;
525
526        Ok(SeededWorktreeEntry {
527            key: cache_key(
528                write
529                    .path()
530                    .strip_prefix(self.root())
531                    .unwrap_or(write.path()),
532            ),
533            entry,
534        })
535    }
536
537    /// Materialize a single blob into the worktree.
538    ///
539    /// Strategy (in order):
540    ///   1. Filesystem reflink (`clonefile(2)` on macOS APFS,
541    ///      `ioctl(FICLONE)` on Linux btrfs/XFS/ZFS) from the
542    ///      canonical loose-uncompressed blob into `dest`. The dest
543    ///      gets its own inode; the kernel forks the underlying
544    ///      allocation on first write to either side. On reflink-
545    ///      capable filesystems this preserves the storage win
546    ///      (~1× disk for N worktrees of the same state) without
547    ///      any shared-inode hazard.
548    ///   2. Lazy promotion + retry. If the canonical loose blob
549    ///      isn't on disk (e.g. post-`pack_objects + prune_loose`),
550    ///      promote it once and retry the reflink.
551    ///   3. `fs::write` of the decompressed blob bytes. Used when the
552    ///      filesystem doesn't support reflinks at all
553    ///      (`EXDEV`/`EOPNOTSUPP`/`ENOSYS`), in which case we flip a
554    ///      batch-wide flag and stop trying for the rest of this
555    ///      materialization.
556    ///
557    /// Permission bits are normalized to `0o644` (or `0o755` for
558    /// executables) on every path. There is no read-only-mode
559    /// defense — agents can `chmod +w` and overwrite freely; the
560    /// filesystem-level isolation is what keeps sibling worktrees
561    /// safe.
562    fn materialize_blob(
563        &self,
564        dest: &Path,
565        hash: &ContentHash,
566        executable: bool,
567        context: &MaterializationContext,
568    ) -> Result<()> {
569        // Redaction short-circuit: if any redaction declares this
570        // blob's bytes off-limits, materialize the human-readable
571        // stub instead. The stub names who redacted it, when, why,
572        // and whether the bytes have already been purged. Safe to
573        // include in worktrees, semantic diffs, and bridge-git
574        // exports (which themselves call through `materialize_tree`).
575        // Errors loading the redactions store are propagated rather
576        // than swallowed — a partial redaction read shouldn't
577        // silently leak the original bytes.
578        if let Some(stub) = self
579            .redaction_stub_for_blob(hash)
580            .map_err(|err| HeddleError::Config(format!("redaction lookup failed: {err}")))?
581        {
582            let _ = fs::remove_file(dest);
583            fs::write(dest, stub.as_bytes())?;
584            // Stubs are never executable — overwriting a tracked
585            // executable with a stub correctly drops the +x bit so
586            // operators don't accidentally run the redaction notice.
587            set_file_mode(dest, false)?;
588            // The redaction stub path doesn't reflink/clone — count
589            // it as a copy so observability stays accurate.
590            context.record_copy();
591            let _ = executable;
592            return Ok(());
593        }
594
595        if context.reflinks_enabled() {
596            // First-pass: blob is already loose+uncompressed.
597            if let Some(source) = self.store.loose_blob_path(hash)
598                && self.try_clone(&source, dest, executable, context)?
599            {
600                return Ok(());
601            }
602            // Second-pass: lazy promotion. Pack-resident or
603            // compressed-loose blob — promote it to the canonical
604            // uncompressed-loose path, then retry the reflink.
605            // Without this step `pack_objects + prune_loose_objects`
606            // permanently degrades materialize to slow `fs::write`.
607            //
608            // The first materialize of any given hash pays
609            // decompress + atomic write, but every subsequent one
610            // (other worktrees, future `goto`s) is a single
611            // `clonefile`/FICLONE. Net win for any N > 1
612            // materializations on a CoW filesystem.
613            match self.store.promote_to_loose_uncompressed(hash) {
614                Ok(_) => {
615                    if let Some(source) = self.store.loose_blob_path(hash)
616                        && self.try_clone(&source, dest, executable, context)?
617                    {
618                        return Ok(());
619                    }
620                }
621                Err(err) => {
622                    debug!(
623                        ?err,
624                        hash = %hash,
625                        "promote_to_loose_uncompressed failed; falling back to fs::write"
626                    );
627                }
628            }
629        }
630
631        let blob = self
632            .store
633            .get_blob(hash)?
634            .ok_or_else(|| HeddleError::NotFound(format!("blob {}", hash)))?;
635        // Remove any stale dest before writing. We don't share inodes
636        // with the canonical store anymore (no hardlinks), but a
637        // previous `goto` could still have left an unrelated file
638        // here that we should overwrite cleanly.
639        let _ = fs::remove_file(dest);
640        fs::write(dest, blob.content())?;
641        set_file_mode(dest, executable)?;
642        context.record_copy();
643        Ok(())
644    }
645
646    /// One clone attempt: returns `Ok(true)` on a successful reflink
647    /// or fallback `fs::copy`, `Ok(false)` only when the
648    /// filesystem-level helper reports the operation isn't supported
649    /// (`EXDEV`/`EOPNOTSUPP`/`ENOSYS`/`EINVAL`). On the unsupported
650    /// verdict the context is flipped so the rest of the batch skips
651    /// straight to the in-memory `fs::write` path without paying the
652    /// failed-syscall tax. Genuine I/O errors bubble up.
653    fn try_clone(
654        &self,
655        source: &Path,
656        dest: &Path,
657        executable: bool,
658        context: &MaterializationContext,
659    ) -> Result<bool> {
660        // `clonefile`/`FICLONE` fail if `dest` already exists, so
661        // make sure we're starting from a clean slate. A previous
662        // `goto` could have left a regular file or a stale link here.
663        let _ = fs::remove_file(dest);
664        match objects::fs_clone::try_reflink(source, dest) {
665            Ok(true) => {
666                set_file_mode(dest, executable)?;
667                context.record_reflink();
668                Ok(true)
669            }
670            Ok(false) => {
671                // Filesystem doesn't support reflinks. Disable for
672                // the rest of the batch and let the caller fall
673                // through to `fs::write` (which decompresses from
674                // memory rather than reading the loose file twice).
675                debug!(
676                    source = %source.display(),
677                    dest = %dest.display(),
678                    "reflink not supported on this filesystem; switching batch to fs::write fallback"
679                );
680                context.disable_reflinks();
681                Ok(false)
682            }
683            Err(err) => {
684                debug!(
685                    ?err,
686                    source = %source.display(),
687                    dest = %dest.display(),
688                    "reflink failed with I/O error"
689                );
690                Err(err.into())
691            }
692        }
693    }
694}
695
696fn prepare_parent_directories(writes: &[WorktreeWriteOp]) -> Result<()> {
697    let mut parents = BTreeSet::new();
698    for write in writes {
699        if let Some(parent) = write.path().parent() {
700            parents.insert(parent.to_path_buf());
701        }
702    }
703
704    for parent in parents {
705        fs::create_dir_all(&parent)
706            .map_err(|e| HeddleError::Io(enrich_fs_error(&parent, "creating", e)))?;
707    }
708
709    Ok(())
710}
711
712/// Best-effort removal of a leaf path, used by the symlink-write
713/// branch when a tree entry has changed shape (e.g. a directory has
714/// become a symlink in the new tree).
715///
716/// Tolerates `ENOTEMPTY` from `remove_dir` for the same reason the
717/// incremental apply path does: heddle-ignored siblings (`.git/`,
718/// `target/`, `node_modules/`) may still occupy the directory after
719/// the planner has cleaned out the tracked children. Without this
720/// tolerance, a `goto` over a real-world worktree that mutates a
721/// tracked directory into a symlink aborts mid-apply with `os error
722/// 66`, leaving HEAD stuck and disk diverged from state.
723fn remove_materialized_leaf(path: &Path) -> Result<()> {
724    match fs::symlink_metadata(path) {
725        Ok(metadata) => {
726            let file_type = metadata.file_type();
727            if file_type.is_symlink() || file_type.is_file() {
728                fs::remove_file(path)
729                    .map_err(|e| HeddleError::Io(enrich_fs_error(path, "removing", e)))?;
730            } else if file_type.is_dir() {
731                match fs::remove_dir(path) {
732                    Ok(()) => {}
733                    Err(error) if is_directory_not_empty(&error) => {}
734                    Err(error) => {
735                        return Err(HeddleError::Io(enrich_fs_error(path, "removing", error)));
736                    }
737                }
738            }
739            Ok(())
740        }
741        Err(error) if error.kind() == std::io::ErrorKind::NotFound => Ok(()),
742        Err(error) => Err(HeddleError::Io(enrich_fs_error(path, "inspecting", error))),
743    }
744}
745
746fn set_file_mode(path: &Path, executable: bool) -> Result<()> {
747    #[cfg(unix)]
748    {
749        use std::os::unix::fs::PermissionsExt;
750
751        let mode = if executable { 0o755 } else { 0o644 };
752        fs::set_permissions(path, fs::Permissions::from_mode(mode))?;
753    }
754    #[cfg(not(unix))]
755    {
756        let _ = (path, executable);
757    }
758    Ok(())
759}
760
761fn materialization_worker_count(
762    operation_count: usize,
763    requested_threads: Option<NonZeroUsize>,
764) -> usize {
765    if operation_count < MATERIALIZE_PARALLEL_THRESHOLD {
766        return 1;
767    }
768
769    let available = requested_threads.unwrap_or_else(default_materialization_threads);
770    available.get().min(operation_count.max(1))
771}
772
773fn default_materialization_threads() -> NonZeroUsize {
774    std::thread::available_parallelism().unwrap_or(NonZeroUsize::MIN)
775}
776
777fn requested_materialization_threads() -> Option<NonZeroUsize> {
778    let raw = std::env::var(MATERIALIZE_THREADS_ENV).ok()?;
779    raw.trim().parse::<usize>().ok().and_then(NonZeroUsize::new)
780}
781
782#[cfg(test)]
783mod tests {
784    use std::{num::NonZeroUsize, path::PathBuf};
785
786    use objects::{fs_clone::filesystem_supports_reflink, object::Blob};
787    use tempfile::TempDir;
788
789    use super::{
790        Repository, WorktreeWriteOp, materialization_worker_count, remove_materialized_leaf,
791    };
792
793    /// Regression: `remove_materialized_leaf` must tolerate `ENOTEMPTY` on
794    /// the directory branch, mirroring `remove_existing_path` in the
795    /// incremental apply path. Both tolerances are needed because the
796    /// apply planner intentionally skips heddle-ignored entries — when
797    /// the planner asks the materializer to clear a directory whose
798    /// tracked children are gone but whose ignored children
799    /// (`.git/`, `target/`, `node_modules/`) remain, `remove_dir` errors
800    /// with `os error 66` (macOS/BSD) / `39` (Linux). Pre-fix the
801    /// materialization branch propagated that error and aborted apply
802    /// mid-walk, leaving HEAD stuck and disk diverged from state.
803    #[test]
804    fn remove_materialized_leaf_tolerates_directory_not_empty() {
805        let temp = TempDir::new().unwrap();
806        let dir = temp.path().join("web");
807        std::fs::create_dir_all(dir.join("node_modules/lodash")).unwrap();
808        std::fs::write(dir.join("node_modules/lodash/index.js"), "ignored").unwrap();
809
810        // Pre-fix this would propagate ENOTEMPTY; post-fix it returns Ok
811        // and leaves the directory (with its ignored content) on disk.
812        remove_materialized_leaf(&dir).expect("must tolerate ENOTEMPTY");
813        assert!(
814            dir.join("node_modules/lodash/index.js").exists(),
815            "ignored content must survive the tolerated removal"
816        );
817    }
818
819    /// Regression: empty directories still get cleaned up (the common
820    /// case). The `ENOTEMPTY` tolerance must not regress the happy path.
821    #[test]
822    fn remove_materialized_leaf_removes_empty_directory() {
823        let temp = TempDir::new().unwrap();
824        let dir = temp.path().join("emptydir");
825        std::fs::create_dir(&dir).unwrap();
826
827        remove_materialized_leaf(&dir).expect("must remove empty dir");
828        assert!(!dir.exists(), "empty directory must be removed");
829    }
830
831    /// Regression: missing paths are a no-op (NotFound), not an error.
832    #[test]
833    fn remove_materialized_leaf_is_noop_for_missing_path() {
834        let temp = TempDir::new().unwrap();
835        remove_materialized_leaf(&temp.path().join("does-not-exist"))
836            .expect("missing path must be a no-op");
837    }
838
839    /// Regression: regular files are still removed (the common symlink-
840    /// replacement case where the existing leaf was a tracked file).
841    #[test]
842    fn remove_materialized_leaf_removes_regular_file() {
843        let temp = TempDir::new().unwrap();
844        let file = temp.path().join("a.txt");
845        std::fs::write(&file, "content").unwrap();
846
847        remove_materialized_leaf(&file).expect("must remove regular file");
848        assert!(!file.exists(), "regular file must be removed");
849    }
850
851    #[test]
852    fn materialization_parallelism_stays_sequential_for_small_workloads() {
853        assert_eq!(materialization_worker_count(31, Some(NonZeroUsize::MIN)), 1);
854    }
855
856    #[test]
857    fn materialization_parallelism_respects_requested_thread_cap() {
858        assert_eq!(materialization_worker_count(128, NonZeroUsize::new(4)), 4);
859    }
860
861    #[test]
862    fn materialize_write_ops_prepares_missing_parent_directories() {
863        let temp_dir = TempDir::new().unwrap();
864        let repo = Repository::init_default(temp_dir.path()).unwrap();
865
866        let blob = Blob::from("cold pull payload");
867        let hash = repo.store().put_blob(&blob).unwrap();
868        let file_path = temp_dir.path().join("nested/deep/file.txt");
869
870        repo.materialize_write_ops(&[WorktreeWriteOp::Blob {
871            path: file_path.clone(),
872            hash,
873            executable: false,
874        }])
875        .unwrap();
876
877        assert_eq!(
878            std::fs::read_to_string(&file_path).unwrap(),
879            "cold pull payload"
880        );
881    }
882
883    /// Materialized blobs must be writable by default. The
884    /// previous hardlink+chmod-0o444 approach was a footgun:
885    /// `chmod 644` then in-place write would mutate the canonical
886    /// store inode, corrupting every other worktree. The fix is
887    /// filesystem-level CoW (or full copy), so each worktree gets
888    /// its own inode and a normal `0o644`/`0o755` mode.
889    #[test]
890    #[cfg(unix)]
891    fn materialized_blob_uses_normal_writable_mode() {
892        use std::os::unix::fs::PermissionsExt;
893
894        let temp_dir = TempDir::new().unwrap();
895        let repo = Repository::init_default(temp_dir.path()).unwrap();
896
897        let blob = Blob::from("normal mode payload");
898        let hash = repo.store().put_blob(&blob).unwrap();
899        let regular = temp_dir.path().join("worktree/file.txt");
900        let exec = temp_dir.path().join("worktree/run.sh");
901
902        repo.materialize_write_ops(&[
903            WorktreeWriteOp::Blob {
904                path: regular.clone(),
905                hash,
906                executable: false,
907            },
908            WorktreeWriteOp::Blob {
909                path: exec.clone(),
910                hash,
911                executable: true,
912            },
913        ])
914        .unwrap();
915
916        let regular_mode = std::fs::metadata(&regular).unwrap().permissions().mode() & 0o777;
917        let exec_mode = std::fs::metadata(&exec).unwrap().permissions().mode() & 0o777;
918        assert_eq!(
919            regular_mode, 0o644,
920            "regular blob must be 0o644 (got 0o{:o})",
921            regular_mode
922        );
923        assert_eq!(
924            exec_mode, 0o755,
925            "executable blob must be 0o755 (got 0o{:o})",
926            exec_mode
927        );
928
929        // Sanity: a plain in-place write on the materialized file
930        // must succeed (no chmod gymnastics required).
931        std::fs::write(&regular, b"agent edits this").unwrap();
932        assert_eq!(std::fs::read(&regular).unwrap(), b"agent edits this");
933    }
934
935    /// THE core isolation property. An agent in worktree-A that
936    /// chmods +w (no-op since we already ship 0o644) and writes
937    /// in-place must not affect worktree-B's bytes. Under the old
938    /// hardlink+chmod model this exact sequence corrupted sibling
939    /// worktrees through the shared inode. Under the new
940    /// CoW/copy model the worktrees have distinct inodes and the
941    /// kernel guarantees isolation.
942    #[test]
943    #[cfg(unix)]
944    fn materialize_then_chmod_and_write_does_not_affect_sibling_worktree() {
945        use std::os::unix::fs::PermissionsExt;
946
947        let temp_dir = TempDir::new().unwrap();
948        let repo = Repository::init_default(temp_dir.path()).unwrap();
949
950        let blob = Blob::from("canonical bytes that must never change");
951        let hash = repo.store().put_blob(&blob).unwrap();
952
953        let worktree_a = temp_dir.path().join("wt-a/file.txt");
954        let worktree_b = temp_dir.path().join("wt-b/file.txt");
955
956        repo.materialize_write_ops(&[WorktreeWriteOp::Blob {
957            path: worktree_a.clone(),
958            hash,
959            executable: false,
960        }])
961        .unwrap();
962        repo.materialize_write_ops(&[WorktreeWriteOp::Blob {
963            path: worktree_b.clone(),
964            hash,
965            executable: false,
966        }])
967        .unwrap();
968
969        // Simulate a misbehaving agent: re-assert mode 0o644 (the
970        // old defense rendered this a no-op for blocking writes),
971        // then truncate-and-overwrite in place via the shell-style
972        // `> file` pathway.
973        std::fs::set_permissions(&worktree_a, std::fs::Permissions::from_mode(0o644)).unwrap();
974        std::fs::write(&worktree_a, b"AGENT_TAMPERED_WITH_WORKTREE_A").unwrap();
975
976        // Sibling worktree's bytes are unchanged.
977        assert_eq!(
978            std::fs::read(&worktree_b).unwrap(),
979            blob.content(),
980            "sibling worktree must keep canonical bytes despite in-place write to worktree-a"
981        );
982        // And the canonical loose blob in the store is untouched.
983        if let Some(loose) = repo.store().loose_blob_path(&hash) {
984            assert_eq!(
985                std::fs::read(&loose).unwrap(),
986                blob.content(),
987                "canonical loose blob must keep canonical bytes despite in-place write to worktree-a"
988            );
989        }
990    }
991
992    /// Atomic-rename writes (write-tempfile + `rename(2)` over
993    /// target) must also leave sibling worktrees untouched. This
994    /// path was always safe under the old model too — proving it
995    /// keeps working with the new isolation strategy.
996    #[test]
997    #[cfg(unix)]
998    fn materialize_atomic_rename_does_not_affect_sibling_worktree() {
999        let temp_dir = TempDir::new().unwrap();
1000        let repo = Repository::init_default(temp_dir.path()).unwrap();
1001
1002        let blob = Blob::from("atomic-rename canonical bytes");
1003        let hash = repo.store().put_blob(&blob).unwrap();
1004
1005        let worktree_a = temp_dir.path().join("wt-a/file.txt");
1006        let worktree_b = temp_dir.path().join("wt-b/file.txt");
1007
1008        repo.materialize_write_ops(&[WorktreeWriteOp::Blob {
1009            path: worktree_a.clone(),
1010            hash,
1011            executable: false,
1012        }])
1013        .unwrap();
1014        repo.materialize_write_ops(&[WorktreeWriteOp::Blob {
1015            path: worktree_b.clone(),
1016            hash,
1017            executable: false,
1018        }])
1019        .unwrap();
1020
1021        let tmp = temp_dir.path().join("wt-a/file.txt.tmp");
1022        std::fs::write(&tmp, b"NEW_CONTENT_VIA_ATOMIC_RENAME").unwrap();
1023        std::fs::rename(&tmp, &worktree_a).unwrap();
1024
1025        assert_eq!(
1026            std::fs::read(&worktree_a).unwrap(),
1027            b"NEW_CONTENT_VIA_ATOMIC_RENAME"
1028        );
1029        assert_eq!(
1030            std::fs::read(&worktree_b).unwrap(),
1031            blob.content(),
1032            "sibling worktree must keep canonical bytes despite atomic rename in worktree-a"
1033        );
1034    }
1035
1036    /// On a CoW filesystem (APFS, btrfs, XFS-with-reflinks, ZFS)
1037    /// the materialized worktree file must have a **distinct**
1038    /// inode from the canonical loose blob. This is the key
1039    /// correctness assertion that distinguishes reflinks from
1040    /// hardlinks: hardlinks share inodes (the bug we fixed),
1041    /// reflinks do not.
1042    ///
1043    /// On non-CoW filesystems the test soft-skips — `fs::copy`
1044    /// also gives distinct inodes, but the test is targeted at
1045    /// the reflink path specifically.
1046    #[test]
1047    #[cfg(unix)]
1048    fn materialize_uses_reflink_when_filesystem_supports_it() {
1049        use std::os::unix::fs::MetadataExt;
1050
1051        let temp_dir = TempDir::new().unwrap();
1052        if !filesystem_supports_reflink(temp_dir.path()) {
1053            eprintln!(
1054                "[skip] filesystem at {:?} does not advertise reflink support",
1055                temp_dir.path()
1056            );
1057            return;
1058        }
1059
1060        let repo = Repository::init_default(temp_dir.path()).unwrap();
1061        let blob = Blob::from("reflink correctness check, kept under compression threshold");
1062        let hash = repo.store().put_blob(&blob).unwrap();
1063        let worktree = temp_dir.path().join("wt/file.txt");
1064
1065        repo.materialize_write_ops(&[WorktreeWriteOp::Blob {
1066            path: worktree.clone(),
1067            hash,
1068            executable: false,
1069        }])
1070        .unwrap();
1071
1072        let loose = repo
1073            .store()
1074            .loose_blob_path(&hash)
1075            .expect("blob must be loose+uncompressed (under threshold)");
1076        let loose_inode = std::fs::metadata(&loose).unwrap().ino();
1077        let worktree_inode = std::fs::metadata(&worktree).unwrap().ino();
1078        assert_ne!(
1079            loose_inode, worktree_inode,
1080            "reflinked worktree file must have a distinct inode from canonical loose blob (got {} for both — that's a hardlink, the bug we fixed)",
1081            loose_inode
1082        );
1083        // And nlink on the canonical blob is 1: nothing aliases it.
1084        let nlink = std::fs::metadata(&loose).unwrap().nlink();
1085        assert_eq!(
1086            nlink, 1,
1087            "canonical loose blob must not be aliased (nlink={}); reflinks share blocks, not inodes",
1088            nlink
1089        );
1090    }
1091
1092    /// Functional readback after N materializations of the same
1093    /// blob across N worktrees on the same filesystem. Replaces
1094    /// the old "shared inode" assertion which is no longer the
1095    /// correctness model. Now we just assert every worktree reads
1096    /// back the canonical bytes (and they're independent — see
1097    /// the isolation tests above).
1098    #[test]
1099    #[cfg(unix)]
1100    fn materialize_blob_into_two_worktrees_reads_back_canonical_bytes() {
1101        let temp_dir = TempDir::new().unwrap();
1102        let repo = Repository::init_default(temp_dir.path()).unwrap();
1103
1104        let blob = Blob::from("two-worktree readback payload");
1105        let hash = repo.store().put_blob(&blob).unwrap();
1106
1107        let worktree_a = temp_dir.path().join("worktree-a/file.txt");
1108        let worktree_b = temp_dir.path().join("worktree-b/file.txt");
1109
1110        repo.materialize_write_ops(&[WorktreeWriteOp::Blob {
1111            path: worktree_a.clone(),
1112            hash,
1113            executable: false,
1114        }])
1115        .unwrap();
1116        repo.materialize_write_ops(&[WorktreeWriteOp::Blob {
1117            path: worktree_b.clone(),
1118            hash,
1119            executable: false,
1120        }])
1121        .unwrap();
1122
1123        assert_eq!(std::fs::read(&worktree_a).unwrap(), blob.content());
1124        assert_eq!(std::fs::read(&worktree_b).unwrap(), blob.content());
1125    }
1126
1127    /// Symlinks are routed through the existing path; introducing
1128    /// hardlinks must not regress the symlink test that lives in
1129    /// `repository_tests.rs`. Locally we just confirm a symlink op
1130    /// still produces a real symlink (not a hardlink to the target
1131    /// blob's loose path).
1132    #[test]
1133    #[cfg(unix)]
1134    fn materialize_symlink_op_produces_real_symlink_not_hardlink() {
1135        let temp_dir = TempDir::new().unwrap();
1136        let repo = Repository::init_default(temp_dir.path()).unwrap();
1137
1138        let symlink_blob = Blob::new(b"../canonical".to_vec());
1139        let symlink_hash = repo.store().put_blob(&symlink_blob).unwrap();
1140        let path = temp_dir.path().join("worktree/link.txt");
1141
1142        repo.materialize_write_ops(&[WorktreeWriteOp::Symlink {
1143            path: path.clone(),
1144            hash: symlink_hash,
1145        }])
1146        .unwrap();
1147
1148        let meta = std::fs::symlink_metadata(&path).unwrap();
1149        assert!(
1150            meta.file_type().is_symlink(),
1151            "Symlink op must produce a real symlink, not a hardlinked regular file"
1152        );
1153        assert_eq!(
1154            std::fs::read_link(&path).unwrap(),
1155            PathBuf::from("../canonical")
1156        );
1157    }
1158
1159    #[test]
1160    #[cfg(unix)]
1161    fn materialize_symlink_op_replaces_existing_symlink() {
1162        let temp_dir = TempDir::new().unwrap();
1163        let repo = Repository::init_default(temp_dir.path()).unwrap();
1164
1165        let first_hash = repo.store().put_blob(&Blob::from("first")).unwrap();
1166        let second_hash = repo.store().put_blob(&Blob::from("second")).unwrap();
1167        let path = temp_dir.path().join("worktree/link.txt");
1168
1169        repo.materialize_write_ops(&[WorktreeWriteOp::Symlink {
1170            path: path.clone(),
1171            hash: first_hash,
1172        }])
1173        .unwrap();
1174        repo.materialize_write_ops(&[WorktreeWriteOp::Symlink {
1175            path: path.clone(),
1176            hash: second_hash,
1177        }])
1178        .unwrap();
1179
1180        assert_eq!(std::fs::read_link(&path).unwrap(), PathBuf::from("second"));
1181    }
1182
1183    #[test]
1184    #[cfg(unix)]
1185    fn materialize_write_ops_reuses_prepared_parent_for_multiple_writes() {
1186        let temp_dir = TempDir::new().unwrap();
1187        let repo = Repository::init_default(temp_dir.path()).unwrap();
1188
1189        let symlink_target = Blob::new(b"../target.txt".to_vec());
1190        let target_hash = repo.store().put_blob(&Blob::from("target")).unwrap();
1191        let symlink_hash = repo.store().put_blob(&symlink_target).unwrap();
1192        let base_dir = temp_dir.path().join("nested/deep");
1193        let target_path = base_dir.join("target.txt");
1194        let link_path = base_dir.join("link.txt");
1195
1196        repo.materialize_write_ops(&[
1197            WorktreeWriteOp::Blob {
1198                path: target_path.clone(),
1199                hash: target_hash,
1200                executable: false,
1201            },
1202            WorktreeWriteOp::Symlink {
1203                path: link_path.clone(),
1204                hash: symlink_hash,
1205            },
1206        ])
1207        .unwrap();
1208
1209        assert_eq!(std::fs::read_to_string(&target_path).unwrap(), "target");
1210        assert_eq!(
1211            std::fs::read_link(&link_path).unwrap(),
1212            PathBuf::from("../target.txt")
1213        );
1214    }
1215
1216    /// After `pack_objects + prune_loose_objects`, every blob is
1217    /// pack-only. The lazy-promotion path inside `materialize_blob`
1218    /// must (a) succeed without errors, (b) read back the canonical
1219    /// bytes in both worktrees, and (c) leave a real loose
1220    /// uncompressed mirror on disk under
1221    /// `.heddle/objects/blobs/<2-char>/<rest>` so subsequent
1222    /// reflinks have something to clone from.
1223    #[test]
1224    #[cfg(unix)]
1225    fn lazy_promotion_after_pack_and_prune_restores_loose_mirror() {
1226        let temp_dir = TempDir::new().unwrap();
1227        let repo = Repository::init_default(temp_dir.path()).unwrap();
1228
1229        let blob = Blob::from(
1230            "lazy-promotion payload, packed-then-pruned, kept under compression threshold",
1231        );
1232        let hash = repo.store().put_blob(&blob).unwrap();
1233
1234        // Move the loose copy into a packfile, then drop the loose
1235        // copy. The store now has only the pack-resident blob.
1236        repo.store().pack_objects(false).unwrap();
1237        repo.store().prune_loose_objects().unwrap();
1238        assert!(
1239            repo.store().loose_blob_path(&hash).is_none(),
1240            "after pack+prune, the canonical loose path must be empty"
1241        );
1242
1243        let worktree_a = temp_dir.path().join("worktree-a/file.txt");
1244        let worktree_b = temp_dir.path().join("worktree-b/file.txt");
1245        repo.materialize_write_ops(&[WorktreeWriteOp::Blob {
1246            path: worktree_a.clone(),
1247            hash,
1248            executable: false,
1249        }])
1250        .unwrap();
1251        repo.materialize_write_ops(&[WorktreeWriteOp::Blob {
1252            path: worktree_b.clone(),
1253            hash,
1254            executable: false,
1255        }])
1256        .unwrap();
1257
1258        // (a)+(b) read back ok.
1259        assert_eq!(std::fs::read(&worktree_a).unwrap(), blob.content());
1260        assert_eq!(std::fs::read(&worktree_b).unwrap(), blob.content());
1261
1262        // (c) the loose-uncompressed mirror exists.
1263        let loose = repo
1264            .store()
1265            .loose_blob_path(&hash)
1266            .expect("after lazy promotion the canonical loose path must exist");
1267        assert_eq!(std::fs::read(&loose).unwrap(), blob.content());
1268    }
1269
1270    /// Proactive warm: walk a state's tree, promote every reachable
1271    /// blob, then materialize. Every blob must be loose-uncompressed
1272    /// after warm so the materialize step can reflink directly
1273    /// without paying the decompress tax. Cross-worktree readback
1274    /// must give the canonical bytes.
1275    #[test]
1276    #[cfg(unix)]
1277    fn proactive_warm_promotes_all_state_blobs() {
1278        let temp_dir = TempDir::new().unwrap();
1279        let repo = Repository::init_default(temp_dir.path()).unwrap();
1280
1281        // Materialize a few files and snapshot.
1282        for i in 0..4 {
1283            std::fs::write(
1284                temp_dir.path().join(format!("file-{i}.txt")),
1285                format!("warm-pass payload {i} {}", "x".repeat(140)),
1286            )
1287            .unwrap();
1288        }
1289        let state = repo
1290            .snapshot(Some("warm-pass test".to_string()), None)
1291            .unwrap();
1292
1293        // Pack + prune so every blob is pack-only.
1294        repo.store().pack_objects(false).unwrap();
1295        repo.store().prune_loose_objects().unwrap();
1296
1297        // Sanity: with a packed-then-pruned store, no canonical loose
1298        // file exists yet for the snapshot's blobs.
1299        let tree = repo.store().get_tree(&state.tree).unwrap().unwrap();
1300        let mut hashes = std::collections::BTreeSet::new();
1301        repo.collect_blob_hashes(&tree, &mut hashes).unwrap();
1302        for hash in &hashes {
1303            assert!(
1304                repo.store().loose_blob_path(hash).is_none(),
1305                "blob {} should be pack-only before warm",
1306                hash
1307            );
1308        }
1309
1310        // Warm: every blob should now be loose-uncompressed.
1311        let stats = repo
1312            .warm_canonical_store_for_state(&state.change_id)
1313            .unwrap();
1314        assert_eq!(stats.errors, 0, "warm pass produced errors: {:?}", stats);
1315        assert_eq!(stats.total(), hashes.len());
1316        assert!(
1317            stats.promoted >= hashes.len(),
1318            "expected to promote all {} blobs, got {} (already_loose={})",
1319            hashes.len(),
1320            stats.promoted,
1321            stats.already_loose
1322        );
1323        for hash in &hashes {
1324            assert!(
1325                repo.store().loose_blob_path(hash).is_some(),
1326                "blob {} should be loose+uncompressed after warm",
1327                hash
1328            );
1329        }
1330
1331        // Materialize across two worktrees on the same FS. Reading
1332        // back from each must yield the canonical bytes; isolation
1333        // is guaranteed by filesystem-level CoW (or full copy).
1334        let worktree_a = temp_dir.path().join("wt-a");
1335        let worktree_b = temp_dir.path().join("wt-b");
1336        repo.materialize_tree(&tree, &worktree_a).unwrap();
1337        repo.materialize_tree(&tree, &worktree_b).unwrap();
1338
1339        for entry in tree.entries() {
1340            let path_a = worktree_a.join(&entry.name);
1341            let path_b = worktree_b.join(&entry.name);
1342            assert_eq!(
1343                std::fs::read(&path_a).unwrap(),
1344                std::fs::read(&path_b).unwrap(),
1345                "{} must read back identically across worktrees",
1346                entry.name
1347            );
1348        }
1349    }
1350
1351    /// Idempotent warm: a second pass over the same state must not
1352    /// rewrite anything. Every blob is `already_loose`.
1353    #[test]
1354    #[cfg(unix)]
1355    fn warm_canonical_store_is_idempotent() {
1356        let temp_dir = TempDir::new().unwrap();
1357        let repo = Repository::init_default(temp_dir.path()).unwrap();
1358
1359        for i in 0..3 {
1360            std::fs::write(
1361                temp_dir.path().join(format!("idem-{i}.txt")),
1362                format!("idem payload {i} {}", "x".repeat(160)),
1363            )
1364            .unwrap();
1365        }
1366        let state = repo
1367            .snapshot(Some("idempotent warm".to_string()), None)
1368            .unwrap();
1369        repo.store().pack_objects(false).unwrap();
1370        repo.store().prune_loose_objects().unwrap();
1371
1372        let first = repo
1373            .warm_canonical_store_for_state(&state.change_id)
1374            .unwrap();
1375        let second = repo
1376            .warm_canonical_store_for_state(&state.change_id)
1377            .unwrap();
1378
1379        assert_eq!(first.total(), second.total(), "blob count must be stable");
1380        assert_eq!(
1381            second.promoted, 0,
1382            "second warm must not promote anything (got {})",
1383            second.promoted
1384        );
1385        assert_eq!(
1386            second.already_loose,
1387            second.total(),
1388            "every blob must be already_loose on second pass"
1389        );
1390        assert_eq!(second.errors, 0);
1391    }
1392
1393    /// Storage win after warm + materialize on a CoW filesystem.
1394    /// We can no longer dedupe via inode (reflinks have distinct
1395    /// inodes by design), so on CoW filesystems we instead assert
1396    /// that **every materialized file has its own inode**, distinct
1397    /// from the canonical loose blob — proving the materializer
1398    /// took the reflink path (which gives the storage win on CoW
1399    /// without aliasing) rather than the in-memory `fs::write` path
1400    /// (which costs full duplicates).
1401    ///
1402    /// On non-CoW filesystems the test soft-skips. The materializer
1403    /// will use `fs::copy` and the storage win is not recoverable
1404    /// without reflink support.
1405    #[test]
1406    #[cfg(unix)]
1407    fn packed_repo_storage_win_after_warm_and_materialize() {
1408        use std::{collections::HashSet, os::unix::fs::MetadataExt};
1409
1410        let temp_dir = TempDir::new().unwrap();
1411        if !filesystem_supports_reflink(temp_dir.path()) {
1412            eprintln!(
1413                "[skip] filesystem at {:?} does not support reflinks; storage-win test is reflink-specific",
1414                temp_dir.path()
1415            );
1416            return;
1417        }
1418
1419        let repo = Repository::init_default(temp_dir.path()).unwrap();
1420
1421        let blob_count = 5;
1422        for i in 0..blob_count {
1423            std::fs::write(
1424                temp_dir.path().join(format!("file-{i}.txt")),
1425                format!("packed-storage-win payload {i} {}", "x".repeat(140 + i * 8)),
1426            )
1427            .unwrap();
1428        }
1429        let state = repo
1430            .snapshot(Some("packed storage win".to_string()), None)
1431            .unwrap();
1432        // Realistic steady state.
1433        repo.store().pack_objects(false).unwrap();
1434        repo.store().prune_loose_objects().unwrap();
1435
1436        // Warm so the first materialize doesn't pay decompress cost.
1437        let stats = repo
1438            .warm_canonical_store_for_state(&state.change_id)
1439            .unwrap();
1440        assert_eq!(stats.errors, 0);
1441
1442        let n_worktrees = 6;
1443        let tree = repo.store().get_tree(&state.tree).unwrap().unwrap();
1444        let mut all_paths = Vec::new();
1445        for w in 0..n_worktrees {
1446            let worktree = temp_dir.path().join(format!("wt-{w}"));
1447            repo.materialize_tree(&tree, &worktree).unwrap();
1448            for i in 0..blob_count {
1449                all_paths.push(worktree.join(format!("file-{i}.txt")));
1450            }
1451        }
1452
1453        // Every materialized file has its own inode (reflinks, not
1454        // hardlinks). Total inodes = files materialized.
1455        let mut inodes = HashSet::new();
1456        for path in &all_paths {
1457            inodes.insert(std::fs::metadata(path).unwrap().ino());
1458        }
1459        assert_eq!(
1460            inodes.len(),
1461            all_paths.len(),
1462            "every reflinked worktree file must have its own inode (got {} for {} files)",
1463            inodes.len(),
1464            all_paths.len()
1465        );
1466
1467        // No materialized file shares an inode with the canonical
1468        // loose blob — that would be the hardlink bug.
1469        let mut canonical_inodes = HashSet::new();
1470        for hash in tree.entries().iter().map(|e| &e.hash) {
1471            if let Some(loose) = repo.store().loose_blob_path(hash) {
1472                canonical_inodes.insert(std::fs::metadata(&loose).unwrap().ino());
1473            }
1474        }
1475        for inode in &inodes {
1476            assert!(
1477                !canonical_inodes.contains(inode),
1478                "worktree file inode {} aliases the canonical loose blob — that's the hardlink bug",
1479                inode
1480            );
1481        }
1482
1483        eprintln!(
1484            "[packed-storage-win] n_worktrees={} blobs/tree={} reflink_path_confirmed=true",
1485            n_worktrees, blob_count
1486        );
1487    }
1488
1489    /// `promote_to_loose_uncompressed` is idempotent for an already
1490    /// loose+uncompressed blob — fast-path returns `Ok(false)` so a
1491    /// caller can distinguish "no work needed" from "promoted".
1492    #[test]
1493    fn promote_to_loose_uncompressed_idempotent_on_loose_blob() {
1494        let temp_dir = TempDir::new().unwrap();
1495        let repo = Repository::init_default(temp_dir.path()).unwrap();
1496
1497        let blob = Blob::from("idempotent promote payload");
1498        let hash = repo.store().put_blob(&blob).unwrap();
1499        // Already loose+uncompressed (under compression threshold).
1500        assert!(repo.store().loose_blob_path(&hash).is_some());
1501
1502        let did_work = repo.store().promote_to_loose_uncompressed(&hash).unwrap();
1503        assert!(
1504            !did_work,
1505            "promote on already-loose+uncompressed blob must be a no-op"
1506        );
1507    }
1508
1509    /// `promote_to_loose_uncompressed` on a missing blob bubbles a
1510    /// `NotFound`, not a silent success. Callers can degrade
1511    /// gracefully (e.g. lazy-path falls back to `fs::write`), but
1512    /// the failure must not be invisible.
1513    #[test]
1514    fn promote_to_loose_uncompressed_returns_error_for_missing_blob() {
1515        use objects::object::ContentHash;
1516
1517        let temp_dir = TempDir::new().unwrap();
1518        let repo = Repository::init_default(temp_dir.path()).unwrap();
1519
1520        let bogus = ContentHash::compute_typed("blob", b"never-stored");
1521        let result = repo.store().promote_to_loose_uncompressed(&bogus);
1522        assert!(
1523            result.is_err(),
1524            "promote on missing blob must error, got {:?}",
1525            result
1526        );
1527    }
1528}