repo/
repository_thread_materialize.rs

1// SPDX-License-Identifier: Apache-2.0
2//! Thread-level materialization: resolve a thread → state → tree,
3//! materialize the tree to disk (clonefile-first via the existing
4//! `Repository::materialize_tree`), and write a [`ThreadManifest`]
5//! sidecar that captures the per-file stat-cache for fast subsequent
6//! `heddle capture` scans.
7//!
8//! This is the day-one default workspace shape for lightweight
9//! threads on reflink-capable filesystems (see
10//! `docs/design/clonefile-threads.md`). Reads off the materialized
11//! tree are vanilla `read(2)` against real APFS/btrfs files — no
12//! userspace FS callbacks in the hot path. Disk usage is the
13//! ~zero-cost clonefile share until the agent diverges blocks.
14
15use std::{
16    collections::{BTreeMap, BTreeSet},
17    fs,
18    path::{Path, PathBuf},
19};
20
21use chrono::{DateTime, Utc};
22use objects::{
23    lock::RepositoryLockExt,
24    object::{ChangeId, State, ThreadName, Tree, VisibilityTier},
25    store::ObjectStore,
26};
27use oplog::OpRecord;
28use refs::RefExpectation;
29use tracing::{debug, instrument};
30
31use super::{HeddleError, Repository, Result};
32use crate::{
33    ThreadWorktreeTargetDisposition, ThreadWorktreeTargetError,
34    thread_manifest::{ManifestFile, ThreadManifest, read_manifest, write_manifest},
35    validate_thread_worktree_target,
36    visibility::{AudienceTier, visible},
37};
38
39/// Filename of the operator-local courtesy placeholder written when a
40/// checked-out state's tier is not visible to the operator's audience.
41pub(crate) const COURTESY_STUB_FILENAME: &str = "HEDDLE-EMBARGO.txt";
42
43/// Outcome of the visibility-gated checkout chokepoint
44/// [`Repository::checkout_state_gated`].
45#[derive(Clone, Debug)]
46pub enum CheckoutMaterialization {
47    /// The state was visible to the audience: its real tree was materialized
48    /// to `dest`. Carries the resolved tree so callers can populate a manifest
49    /// without a second store lookup.
50    Materialized { tree: Tree },
51    /// The state was under-tier for the audience: the operator-local courtesy
52    /// stub was written to `dest` and the tracked bytes withheld.
53    Withheld { tier: VisibilityTier },
54}
55
56/// Outcome of [`Repository::capture_thread_from_disk`].
57#[derive(Clone, Copy, Debug, PartialEq, Eq)]
58pub enum ThreadCaptureOutcome {
59    /// The materialized tree matches the existing thread head; no
60    /// new state was written. The manifest was refreshed to reflect
61    /// the latest stat fields (so subsequent captures stay fast even
62    /// if mtimes drifted via `touch`).
63    NoOp,
64    /// A new state was written and the thread head advanced.
65    Captured { state_id: ChangeId },
66}
67
68fn thread_worktree_target_error(error: ThreadWorktreeTargetError) -> HeddleError {
69    match error {
70        ThreadWorktreeTargetError::Io { source, .. } => HeddleError::Io(source),
71        ThreadWorktreeTargetError::Symlink { path } => HeddleError::Conflict(format!(
72            "thread worktree target '{}' cannot be a symlink",
73            path.display()
74        )),
75        ThreadWorktreeTargetError::NotDirectory { path } => HeddleError::Conflict(format!(
76            "thread worktree target '{}' must be a directory",
77            path.display()
78        )),
79        ThreadWorktreeTargetError::NotEmpty { path } => HeddleError::Conflict(format!(
80            "thread worktree target '{}' is not empty",
81            path.display()
82        )),
83    }
84}
85
86fn prepare_thread_worktree_target(dest: &Path) -> Result<ThreadWorktreeTargetDisposition> {
87    let disposition =
88        validate_thread_worktree_target(dest).map_err(thread_worktree_target_error)?;
89    if disposition == ThreadWorktreeTargetDisposition::Absent {
90        fs::create_dir_all(dest).map_err(HeddleError::Io)?;
91        validate_thread_worktree_target(dest).map_err(thread_worktree_target_error)?;
92    }
93    Ok(disposition)
94}
95
96fn clear_dir_contents(dir: &Path) -> std::io::Result<()> {
97    let metadata = fs::symlink_metadata(dir)?;
98    if metadata.file_type().is_symlink() || !metadata.is_dir() {
99        return Ok(());
100    }
101
102    for entry in fs::read_dir(dir)? {
103        let entry = entry?;
104        let path = entry.path();
105        if entry.file_type()?.is_dir() {
106            fs::remove_dir_all(&path)?;
107        } else {
108            fs::remove_file(&path)?;
109        }
110    }
111    Ok(())
112}
113
114fn cleanup_thread_worktree_target(
115    dest: &Path,
116    disposition: ThreadWorktreeTargetDisposition,
117) -> Result<()> {
118    match clear_dir_contents(dest) {
119        Ok(()) => {}
120        Err(err)
121            if err.kind() == std::io::ErrorKind::NotFound
122                || err.kind() == std::io::ErrorKind::NotADirectory => {}
123        Err(err) => return Err(HeddleError::Io(err)),
124    }
125
126    if disposition == ThreadWorktreeTargetDisposition::Absent {
127        match fs::remove_dir(dest) {
128            Ok(()) => {}
129            Err(err)
130                if err.kind() == std::io::ErrorKind::NotFound
131                    || err.kind() == std::io::ErrorKind::NotADirectory => {}
132            Err(err) => return Err(HeddleError::Io(err)),
133        }
134    }
135
136    Ok(())
137}
138
139impl Repository {
140    /// Materialize the captured tree of `thread` to `dest` and write
141    /// a [`ThreadManifest`] sidecar to
142    /// `<heddle_dir>/threads/<thread>/manifest.toml`.
143    ///
144    /// Order of operations:
145    ///   1. Resolve `thread` → `ChangeId` → `State` → `Tree`.
146    ///   2. Call `Repository::materialize_tree(&tree, dest)` — the
147    ///      existing clonefile-first materializer does the heavy
148    ///      lifting (loose-uncompressed promotion, parallel writes).
149    ///   3. Walk the materialized tree and capture per-file
150    ///      `(hash, inode, mtime_ns, ctime_ns, mode)` into the
151    ///      manifest.
152    ///   4. Atomically write the manifest.
153    ///
154    /// The walk step in (3) is a single `stat` per file — sub-ms for
155    /// the 643-file heddle workspace. Doing the walk after
156    /// materialize rather than capturing stats during materialize
157    /// keeps the existing materializer untouched.
158    #[instrument(skip(self), fields(thread = %thread, dest = %dest.display()))]
159    pub fn materialize_thread(
160        &self,
161        thread: &str,
162        dest: &Path,
163        audience: &AudienceTier,
164    ) -> Result<ThreadManifest> {
165        let change_id = self
166            .refs()
167            .resolve(thread)?
168            .ok_or_else(|| HeddleError::Config(format!("unknown thread {thread}")))?;
169        let state = self
170            .store()
171            .get_state(&change_id)?
172            .ok_or_else(|| HeddleError::Config(format!("state for {thread} missing")))?;
173        let target_disposition = prepare_thread_worktree_target(dest)?;
174
175        // Route through the single visibility-gated checkout chokepoint, which
176        // either materializes the real tree or writes the operator-local
177        // courtesy stub. The manifest is this method's own concern (it lives
178        // outside the checkout dir), so it is written here based on the gate
179        // outcome — not in the chokepoint, which `write_isolated_checkout` also
180        // calls without wanting a thread manifest.
181        let result = (|| -> Result<ThreadManifest> {
182            match self.checkout_state_gated(&change_id, &state, dest, audience)? {
183                CheckoutMaterialization::Withheld { tier } => {
184                    // Manifest reflects disk truth: no tracked files were
185                    // materialized (the placeholder is untracked). `tree_hash`
186                    // still names the real embargoed state's tree so the sidecar
187                    // identifies which state this checkout stands in for. The
188                    // `withheld` flag here is diagnostic only — it records that the
189                    // *last* materialize of this thread was withheld, but the
190                    // per-thread manifest is clobbered by a sibling worktree of the
191                    // same thread. The authoritative, per-worktree non-capturable
192                    // signal is the withheld marker written by
193                    // `checkout_state_gated`, keyed on the worktree root (heddle#316).
194                    let mut manifest =
195                        ThreadManifest::new(change_id, state.tree, canonical_worktree_path(dest));
196                    manifest.withheld = true;
197                    write_manifest(self.heddle_dir(), thread, &manifest)
198                        .map_err(HeddleError::Io)?;
199                    debug!(
200                        thread = %thread,
201                        state_id = %change_id,
202                        tier = tier.as_str(),
203                        "thread checkout rendered courtesy stub (under-tier for audience)"
204                    );
205                    Ok(manifest)
206                }
207                CheckoutMaterialization::Materialized { tree } => {
208                    let mut manifest =
209                        ThreadManifest::new(change_id, state.tree, canonical_worktree_path(dest));
210                    populate_manifest_from_tree(self, &tree, dest, "", &mut manifest.files)?;
211                    write_manifest(self.heddle_dir(), thread, &manifest)
212                        .map_err(HeddleError::Io)?;
213                    debug!(
214                        thread = %thread,
215                        state_id = %change_id,
216                        files = manifest.files.len(),
217                        "thread materialized"
218                    );
219                    Ok(manifest)
220                }
221            }
222        })();
223
224        if result.is_err() {
225            cleanup_thread_worktree_target(dest, target_disposition)?;
226        }
227
228        result
229    }
230
231    /// THE visibility-gated checkout chokepoint. Resolve `change_id`'s
232    /// effective tier against `audience` and either materialize its real tree
233    /// to `dest` (visible) or write the operator-local courtesy stub and
234    /// withhold the tracked bytes (under-tier).
235    ///
236    /// Every path that serves a *named committed state*'s content to a local
237    /// checkout MUST funnel through here — `materialize_thread` and the CLI's
238    /// `write_isolated_checkout` (`heddle start --path`) both do — so the
239    /// visibility gate cannot be bypassed by a caller reaching for the raw,
240    /// blob-keyed `materialize_tree`. The decision is made HERE, where the
241    /// `ChangeId` and the audience are both in scope; `materialize_tree`
242    /// carries neither and so cannot make it. `materialize_tree` stays the
243    /// primitive for *computed* trees (merge/cherry-pick results), which are
244    /// not a single named state and carry no audience.
245    ///
246    /// The courtesy stub is a working-tree convenience on bytes the operator
247    /// already holds — NOT a security boundary and NOT a public-mirror surface
248    /// (the public mirror emits absence, spike §5.3).
249    pub fn checkout_state_gated(
250        &self,
251        change_id: &ChangeId,
252        state: &State,
253        dest: &Path,
254        audience: &AudienceTier,
255    ) -> Result<CheckoutMaterialization> {
256        let tier = self.effective_visibility_tier(change_id).map_err(|e| {
257            HeddleError::Config(format!("resolve visibility for {change_id}: {e:#}"))
258        })?;
259        if !visible(&tier, audience) {
260            fs::create_dir_all(dest).map_err(HeddleError::Io)?;
261            // Canonicalize ONLY after the directory exists. `canonical_worktree_path`
262            // falls back to the raw input when `dest` does not yet resolve (a relative
263            // path, or a path through a not-yet-created symlink), so a pre-creation
264            // canonicalize would key the withheld marker and the `.leaves` record on a
265            // path `capture_thread_from_disk` never resolves to at read-time — the read
266            // canonicalizes the now-existing root, misses the marker, and captures a
267            // withheld checkout as a stub-only tree instead of no-oping. Resolving here,
268            // once `create_dir_all` has made `dest` exist, guarantees the write-time
269            // canonical root equals the read-time one (heddle#316).
270            let canonical = canonical_worktree_path(dest);
271            // Reconcile the root DOWN to the withheld tier: every tracked leaf a
272            // prior materialize of this root wrote must be removed, so the
273            // checkout holds ONLY the courtesy stub — never the very bytes the
274            // gate is withholding. `keep` is empty (the withheld tier permits no
275            // tracked content). `must_remove` additionally names the withheld
276            // state's own tree leaves, so the leak is closed even when no prior
277            // manifest survives for this root (a sibling worktree clobbered it).
278            // The stub itself is untracked and so never in either set (heddle#316
279            // CLASS 1).
280            let mut withheld_leaves = BTreeSet::new();
281            if let Some(tree) = self.store().get_tree(&state.tree)? {
282                collect_tree_leaf_paths(self, &tree, "", &mut withheld_leaves)?;
283            }
284            self.reconcile_materialized_root(dest, &canonical, &BTreeSet::new(), &withheld_leaves)?;
285            // Persist the clobber-proof per-root record: a withheld materialize
286            // leaves ONLY the untracked courtesy stub, so the tracked-leaf set is
287            // empty. Written here so the single chokepoint owns the record for
288            // every funnel path, and so a later reconcile of this root reads an
289            // authoritative empty set instead of falling to the backstop
290            // (heddle#316 CLASS 1).
291            crate::thread_manifest::write_materialized_leaves(
292                self.heddle_dir(),
293                &canonical,
294                &BTreeSet::new(),
295            )
296            .map_err(HeddleError::Io)?;
297            let embargo_until = self
298                .effective_state_visibility(change_id)
299                .map_err(|e| {
300                    HeddleError::Config(format!("resolve visibility for {change_id}: {e:#}"))
301                })?
302                .and_then(|record| record.embargo_until);
303            let stub = courtesy_stub_text(&tier, embargo_until);
304            fs::write(dest.join(COURTESY_STUB_FILENAME), stub.as_bytes())
305                .map_err(HeddleError::Io)?;
306            // Record the withheld status keyed by THIS worktree root, not by
307            // thread — a sibling worktree of the same thread materialized at a
308            // visible tier must keep its own capturable status (heddle#316).
309            crate::thread_manifest::mark_withheld_checkout(self.heddle_dir(), &canonical)
310                .map_err(HeddleError::Io)?;
311            return Ok(CheckoutMaterialization::Withheld { tier });
312        }
313
314        let tree = self
315            .store()
316            .get_tree(&state.tree)?
317            .ok_or_else(|| HeddleError::Config(format!("tree for {change_id} missing")))?;
318        self.materialize_tree(&tree, dest)?;
319        // Canonicalize only now that `materialize_tree` (via `create_dir_all`) has made
320        // `dest` exist — same read/write-root agreement as the withheld branch above
321        // (heddle#316).
322        let canonical = canonical_worktree_path(dest);
323        // Reconcile the root UP to the served tier: `materialize_tree` wrote the
324        // real tree's leaves but does NOT remove a stale leaf a prior
325        // materialize of a *different* tree left at this root. `keep` is the set
326        // of leaves the served tree just wrote — any prior tracked leaf NOT in
327        // it is removed, so the root holds exactly this tier's content
328        // (heddle#316 CLASS 1).
329        let mut served_leaves = BTreeSet::new();
330        collect_tree_leaf_paths(self, &tree, "", &mut served_leaves)?;
331        self.reconcile_materialized_root(dest, &canonical, &served_leaves, &BTreeSet::new())?;
332        // Persist the clobber-proof per-root record of exactly the tracked leaves
333        // this visible materialize left on disk, so a later withheld
334        // re-materialize of this root removes precisely them even if a sibling
335        // worktree of the same thread clobbered the per-thread manifest in the
336        // interim (heddle#316 CLASS 1).
337        crate::thread_manifest::write_materialized_leaves(
338            self.heddle_dir(),
339            &canonical,
340            &served_leaves,
341        )
342        .map_err(HeddleError::Io)?;
343        // This root now holds real served bytes: clear any stale withheld marker
344        // a prior under-tier materialize of the same root may have left, so it
345        // can't suppress this worktree's capture (heddle#316).
346        crate::thread_manifest::clear_withheld_checkout(self.heddle_dir(), &canonical)
347            .map_err(HeddleError::Io)?;
348        // Remove any leftover courtesy stub a prior under-tier materialize of the
349        // same root wrote: the stub is untracked, so the reconcile leaf-removal
350        // above leaves it in place. Cosmetic — capture ignores it — but an
351        // authorized re-materialize should leave a clean tree (heddle#316).
352        match fs::remove_file(dest.join(COURTESY_STUB_FILENAME)) {
353            Ok(()) => {}
354            Err(e) if e.kind() == std::io::ErrorKind::NotFound => {}
355            Err(e) => return Err(HeddleError::Io(e)),
356        }
357        Ok(CheckoutMaterialization::Materialized { tree })
358    }
359
360    /// Reconcile the worktree root at `dest` so it holds EXACTLY the content the
361    /// target tier permits, regardless of what a prior materialization of the
362    /// same root left behind. THE single chokepoint both branches of
363    /// [`Repository::checkout_state_gated`] funnel through to enforce the
364    /// invariant by construction rather than via two opposite one-off cleanups
365    /// (heddle#316 CLASS 1).
366    ///
367    /// Removes every tracked leaf that (a) a prior materialization recorded for
368    /// this root in its clobber-proof per-root **materialized-leaves record**
369    /// (keyed by the canonical worktree root, so a sibling worktree of the same
370    /// thread can never erase it) UNION (b) the caller's `must_remove` set —
371    /// MINUS the `keep` set the target tier permits. Removal is guarded per file
372    /// (`NotFound` ignored) and empty ancestor directories it leaves behind are
373    /// pruned via `remove_dir` (which fails on non-empty dirs, so untracked
374    /// siblings keep their directory alive).
375    ///
376    /// Sourcing the prior leaves from the per-root record — NOT the single
377    /// per-thread `manifest.toml` — is what makes the withheld reduction
378    /// correct-by-construction: the manifest is clobbered the instant a sibling
379    /// worktree of the same thread materializes, which would drop a prior
380    /// *visible* leaf (e.g. an `old-secret.txt` removed before the withheld
381    /// target state) out of the removal set and leak it next to the stub. The
382    /// per-root record is immune to that race (heddle#316 CLASS 1).
383    ///
384    /// Never blanket-`rm -rf`s: only paths sourced from the per-root record /
385    /// `must_remove` are touched, so user-untracked files and `.git`/heddle
386    /// metadata are never removed.
387    fn reconcile_materialized_root(
388        &self,
389        dest: &Path,
390        canonical_root: &Path,
391        keep: &BTreeSet<String>,
392        must_remove: &BTreeSet<String>,
393    ) -> Result<()> {
394        let mut to_remove: BTreeSet<String> = must_remove.clone();
395        match crate::thread_manifest::read_materialized_leaves(self.heddle_dir(), canonical_root)
396            .map_err(HeddleError::Io)?
397        {
398            Some(prior_leaves) => {
399                // Clobber-proof per-root record of exactly the tracked leaves a
400                // prior materialize of THIS root left on disk. Authoritative —
401                // survives a sibling worktree's clobber of the per-thread
402                // manifest.
403                to_remove.extend(prior_leaves);
404            }
405            None => {
406                // Fail-closed backstop: no per-root record yet. Reached only on a
407                // first-ever materialize of this root (nothing prior to remove)
408                // or a root last materialized by a binary predating the per-root
409                // record. Fall back to the best-effort per-thread manifest so an
410                // upgrade-window reconcile still drops a recorded prior tree's
411                // leaves; `must_remove` (the target tier's own leaves) covers the
412                // rest. Strictly safer than trusting `must_remove` alone, and —
413                // like the primary path — touches only recorded leaves, never
414                // untracked/non-heddle files.
415                if let Some(prior) = crate::thread_manifest::manifest_for_worktree_root(
416                    self.heddle_dir(),
417                    canonical_root,
418                )
419                .map_err(HeddleError::Io)?
420                {
421                    to_remove.extend(prior.files.keys().cloned());
422                }
423            }
424        }
425
426        let mut prune_dirs: BTreeSet<PathBuf> = BTreeSet::new();
427        for rel in &to_remove {
428            if keep.contains(rel) {
429                continue;
430            }
431            let path = dest.join(rel);
432            match fs::remove_file(&path) {
433                Ok(()) => {}
434                Err(e) if e.kind() == std::io::ErrorKind::NotFound => {}
435                Err(e) => return Err(HeddleError::Io(e)),
436            }
437            // Collect ancestor directories (within `dest`) so the now-empty ones
438            // left by the removed leaf can be pruned after the pass.
439            let mut parent = path.parent();
440            while let Some(p) = parent {
441                if p == dest || !p.starts_with(dest) {
442                    break;
443                }
444                prune_dirs.insert(p.to_path_buf());
445                parent = p.parent();
446            }
447        }
448
449        // Prune deepest-first so a parent only sees its children already gone.
450        // `remove_dir` errors on a non-empty dir, which we ignore — that is
451        // exactly how an untracked sibling keeps its directory.
452        let mut dirs: Vec<PathBuf> = prune_dirs.into_iter().collect();
453        dirs.sort_by_key(|d| std::cmp::Reverse(d.components().count()));
454        for d in dirs {
455            let _ = fs::remove_dir(&d);
456        }
457        Ok(())
458    }
459
460    /// Remove the per-worktree-root sidecars [`checkout_state_gated`] writes —
461    /// the clobber-proof materialized-leaves record and (if present) the withheld
462    /// marker — for the checkout at `worktree_root`. Both live under the SHARED
463    /// heddle dir keyed by the canonical worktree root, so the atomic `start`
464    /// rollback's checkout-directory rewind never reaches them; a failed-then-
465    /// rolled-back start would otherwise orphan them. Canonicalizes `worktree_root`
466    /// the same way the chokepoint did, so the key matches; the dir must still
467    /// exist at call time (the rollback clears these BEFORE rewinding the dir).
468    /// Idempotent: missing sidecars are a no-op (heddle#316 r11 P2).
469    ///
470    /// [`checkout_state_gated`]: Repository::checkout_state_gated
471    pub fn clear_materialized_root_records(&self, worktree_root: &Path) -> Result<()> {
472        let canonical = canonical_worktree_path(worktree_root);
473        crate::thread_manifest::clear_materialized_leaves(self.heddle_dir(), &canonical)
474            .map_err(HeddleError::Io)?;
475        crate::thread_manifest::clear_withheld_checkout(self.heddle_dir(), &canonical)
476            .map_err(HeddleError::Io)?;
477        Ok(())
478    }
479
480    /// Write the [`ThreadManifest`] sidecar for a worktree that's
481    /// already been materialised to `dest` against `state_id`. Used
482    /// by the CLI's `start` path, which calls `materialize_tree`
483    /// directly via `write_isolated_checkout` and then needs the
484    /// matching manifest written so the rest of the clonefile-thread
485    /// machinery (`heddle status` advisory, `Repository::snapshot`
486    /// auto-detection, `capture_thread_from_disk` fast no-op) sees a
487    /// fully-formed sidecar.
488    ///
489    /// `state_id` is the captured state the worktree was materialised
490    /// against; its tree is resolved and walked to populate the
491    /// manifest's per-file stat-cache entries (one `lstat` per file).
492    /// Atomic write: a torn manifest can't half-land. Idempotent at
493    /// the manifest-key level: rewriting a manifest for the same
494    /// thread is supported (and is what `capture_thread_from_disk`
495    /// does post-capture).
496    #[instrument(skip(self), fields(thread = %thread, dest = %dest.display(), state = %state_id))]
497    pub fn record_thread_manifest(
498        &self,
499        thread: &str,
500        state_id: &ChangeId,
501        dest: &Path,
502    ) -> Result<ThreadManifest> {
503        let state = self
504            .store()
505            .get_state(state_id)?
506            .ok_or_else(|| HeddleError::Config(format!("state {state_id} missing")))?;
507        let tree = self
508            .store()
509            .get_tree(&state.tree)?
510            .ok_or_else(|| HeddleError::Config(format!("tree for state {state_id} missing")))?;
511        let mut manifest =
512            ThreadManifest::new(*state_id, state.tree, canonical_worktree_path(dest));
513        populate_manifest_from_tree(self, &tree, dest, "", &mut manifest.files)?;
514        crate::thread_manifest::write_manifest(self.heddle_dir(), thread, &manifest)
515            .map_err(HeddleError::Io)?;
516        debug!(
517            thread = %thread,
518            state_id = %state_id,
519            files = manifest.files.len(),
520            "thread manifest recorded post-materialize"
521        );
522        Ok(manifest)
523    }
524
525    /// Record a WITHHELD-consistent manifest sidecar for a worktree whose
526    /// checkout was withheld — the base state's visibility tier was not visible
527    /// to the materializing audience, so [`Repository::checkout_state_gated`]
528    /// wrote ONLY the operator-local courtesy stub and the tracked bytes were
529    /// never materialized.
530    ///
531    /// Mirrors the withheld arm of [`Repository::materialize_thread`]: `tree_hash`
532    /// still names the real (unserved) state's tree so the sidecar identifies
533    /// which state the stub stands in for, but `files` is empty (no tracked leaf
534    /// is on disk) and `withheld = true`. Crucially this does NOT walk/stat the
535    /// real tree against `dest` the way [`Repository::record_thread_manifest`]
536    /// does — those files were intentionally not materialized, so stat-ing them
537    /// would record phantom stat-cache entries (or fail) against a checkout that
538    /// holds only the stub. The CLI's atomic `start` path calls this instead of
539    /// `record_thread_manifest` when the checkout came back withheld, so a start
540    /// on a Private base produces a withheld checkout + a consistent manifest
541    /// rather than erroring (heddle#316 / PR #528 r9 Finding 3).
542    #[instrument(skip(self), fields(thread = %thread, dest = %dest.display(), state = %state_id))]
543    pub fn record_withheld_thread_manifest(
544        &self,
545        thread: &str,
546        state_id: &ChangeId,
547        dest: &Path,
548    ) -> Result<ThreadManifest> {
549        let state = self
550            .store()
551            .get_state(state_id)?
552            .ok_or_else(|| HeddleError::Config(format!("state {state_id} missing")))?;
553        let mut manifest =
554            ThreadManifest::new(*state_id, state.tree, canonical_worktree_path(dest));
555        manifest.withheld = true;
556        crate::thread_manifest::write_manifest(self.heddle_dir(), thread, &manifest)
557            .map_err(HeddleError::Io)?;
558        debug!(
559            thread = %thread,
560            state_id = %state_id,
561            "withheld thread manifest recorded post-materialize"
562        );
563        Ok(manifest)
564    }
565
566    /// The staged domain commit record for a brand-new materialized-thread
567    /// start. The repo owns the op-record shape so callers don't reconstruct
568    /// `OpRecord::ThreadCreate`'s fields. `manager_snapshot` is `None`: the
569    /// thread record is written by the start's converge step (so there is
570    /// nothing to snapshot at record-construction time — heddle#23 r2). The
571    /// caller stages this as the executor's single commit record (it is NOT
572    /// appended eagerly); the commit marker dedups on the stable
573    /// `transaction_id`.
574    pub fn thread_create_op_record(&self, name: &str, state: ChangeId) -> OpRecord {
575        OpRecord::ThreadCreate {
576            name: name.to_string(),
577            state,
578            manager_snapshot: None,
579        }
580    }
581
582    /// CAS-guarded rollback of a materialized-thread-start ref forward
583    /// (heddle#356 cid 3333881583).
584    ///
585    /// The forward set the thread ref to `set_value` (the start's base state).
586    /// Undo it ONLY if the ref STILL points there: restore `restore_to` when a
587    /// prior value existed (a re-start that reused the ref), or delete a ref
588    /// this start created (`restore_to == None`). If a concurrent process
589    /// advanced/changed the ref after our forward (a concurrent start or
590    /// crash-recovery), leave their write in place — an unconditional
591    /// reset/delete would clobber it.
592    pub fn cas_guarded_thread_ref_rollback(
593        &self,
594        name: &ThreadName,
595        set_value: ChangeId,
596        restore_to: Option<ChangeId>,
597    ) -> Result<()> {
598        // Compare-before-write: bail without touching the ref if it no longer
599        // holds the value our forward set.
600        if self.refs().get_thread(name)? != Some(set_value) {
601            return Ok(());
602        }
603        let result = match restore_to {
604            Some(prior) => {
605                self.refs()
606                    .set_thread_cas(name, RefExpectation::Value(set_value), &prior)
607            }
608            None => self
609                .refs()
610                .delete_thread_cas(name, RefExpectation::Value(set_value)),
611        };
612        match result {
613            Ok(()) => Ok(()),
614            // Lost the race between the read above and this CAS: a concurrent
615            // writer advanced the ref. The expectation guard means we wrote
616            // nothing — leave their advance intact (the whole point of the
617            // guard).
618            Err(HeddleError::Conflict(_)) => Ok(()),
619            Err(other) => Err(other),
620        }
621    }
622
623    /// Restore the thread manifest sidecar to its captured pre-start snapshot:
624    /// rewrite the prior `manifest.toml` bytes if one existed, or remove the
625    /// directory this start created. Restoring (not blind-deleting) preserves
626    /// an OLD manifest left by a prior materialization of a reused thread ref
627    /// (heddle#356 cid 3333881561).
628    pub fn restore_thread_manifest(&self, thread: &str, prior: Option<Vec<u8>>) -> Result<()> {
629        match prior {
630            Some(bytes) => {
631                let path = crate::thread_manifest::manifest_path(self.heddle_dir(), thread);
632                if let Some(parent) = path.parent() {
633                    fs::create_dir_all(parent).map_err(HeddleError::Io)?;
634                }
635                fs::write(&path, bytes).map_err(HeddleError::Io)
636            }
637            None => crate::thread_manifest::remove_thread_manifest_dir(self.heddle_dir(), thread)
638                .map(|_| ())
639                .map_err(HeddleError::Io),
640        }
641    }
642
643    /// Scan the materialized worktree at `root`, build a fresh tree
644    /// from the on-disk bytes, and (if anything changed) advance
645    /// `thread`'s head to a new state pointing at that tree. The
646    /// manifest is rewritten to reflect the new state and the
647    /// post-capture stat fields.
648    ///
649    /// Returns [`ThreadCaptureOutcome::NoOp`] when the new tree's
650    /// hash equals the manifest's recorded `tree_hash` — the agent
651    /// touched nothing material. Otherwise
652    /// [`ThreadCaptureOutcome::Captured`] with the new state id.
653    ///
654    /// The reason this method exists alongside `Repository::snapshot`
655    /// is two-fold:
656    ///   1. `snapshot` always advances `HEAD`'s currently-attached
657    ///      thread. Capture-from-disk targets *a specific thread by
658    ///      name*, which is what auto-capture-on-switch needs.
659    ///   2. `snapshot` walks `self.root`. Capture-from-disk walks
660    ///      whatever directory the materializer put the thread at —
661    ///      managed checkouts under `<repo>/.heddle/threads/<thread>/`,
662    ///      which are NOT `self.root`.
663    ///
664    /// Walks `Repository::build_tree` for the slow path so the
665    /// resulting trees are byte-identical to what `heddle capture`
666    /// produces against the same content. A stat-cache fast path
667    /// (see [`stat_cache_no_op`]) short-circuits the common case
668    /// of "switch threads, nothing changed" so the dominant
669    /// auto-capture-on-switch latency is a `stat` walk, not a
670    /// blob rehash.
671    #[instrument(skip(self), fields(thread = %thread, root = %root.display()))]
672    pub fn capture_thread_from_disk(
673        &self,
674        thread: &str,
675        root: &Path,
676    ) -> Result<ThreadCaptureOutcome> {
677        // Repository-wide write lock — same shape as
678        // `snapshot_with_attribution_profiled`. Without it, two
679        // concurrent `thread switch` invocations from sibling
680        // worktrees can race the same source thread: both read
681        // `get_thread(thread)` returning the same parent, both
682        // `put_state` with that parent, both `set_thread` —
683        // result is two leaf states with the same parent, one of
684        // which is orphaned because the ref ends up pointing at
685        // whichever `set_thread` won the race. The manifest write
686        // at step 4 has the same lost-update problem on a smaller
687        // scale. Holding the write lock across the whole
688        // read-modify-write sequence makes the capture atomic with
689        // respect to other state-changing operations.
690        let _lock = self
691            .locker()
692            .write()
693            .map_err(|e| HeddleError::Io(std::io::Error::other(e.to_string())))?;
694
695        let existing_manifest =
696            read_manifest(self.heddle_dir(), thread).map_err(HeddleError::Io)?;
697
698        // 0a. Withheld checkouts are non-capturable. A withheld checkout holds
699        //     only the operator-local courtesy stub (the tracked bytes were
700        //     withheld because the state's tier is not visible to the
701        //     materializing audience). Capturing it would either pull the stub
702        //     in as tracked content or — worse — build an empty tree (the stub
703        //     is ignored, see `ignore_patterns`) and commit it, wiping the
704        //     withheld state's real files. The operator cannot capture content
705        //     they were never served, so refuse with a no-op and leave the
706        //     thread head where it is (heddle#316).
707        //
708        //     The withheld status is keyed by THIS worktree root, not by the
709        //     per-thread `manifest.toml` — that single file is clobbered when
710        //     the same thread is materialized into a second worktree, so a
711        //     manifest-level flag would let an under-tier checkout of one
712        //     worktree wrongly suppress an authorized sibling worktree's
713        //     capture. The per-root marker (written by `checkout_state_gated`)
714        //     scopes the suppression to exactly the worktree that was withheld.
715        if crate::thread_manifest::is_withheld_checkout(
716            self.heddle_dir(),
717            &canonical_worktree_path(root),
718        ) {
719            debug!(thread = %thread, "thread capture skipped (withheld checkout)");
720            return Ok(ThreadCaptureOutcome::NoOp);
721        }
722
723        // 0. Fast no-op via the stat-cache. If every file in the
724        //    manifest still exists with the same `(inode, mtime,
725        //    ctime, mode)` AND the disk walk turns up no
726        //    untracked/new files, we know the tree is byte-identical
727        //    to what we materialised. Skip the entire blob-and-tree
728        //    rebuild. Typical cost: ~5ms for a 643-file worktree
729        //    vs hundreds of ms for the full `build_tree` rehash.
730        if let Some(m) = existing_manifest.as_ref()
731            && stat_cache_no_op(self, m, root)?
732        {
733            debug!(thread = %thread, "thread capture no-op (stat-cache hit)");
734            return Ok(ThreadCaptureOutcome::NoOp);
735        }
736
737        let baseline_tree = match existing_manifest.as_ref() {
738            Some(manifest) => {
739                Some(self.store().get_tree(&manifest.tree_hash)?.ok_or_else(|| {
740                    HeddleError::Config(format!(
741                        "manifest baseline tree {} missing while capturing thread {thread}",
742                        manifest.tree_hash
743                    ))
744                })?)
745            }
746            None => None,
747        };
748
749        // 1. Walk the on-disk worktree → fresh Tree (also stores
750        //    every blob it sees as a side effect). When we have a
751        //    manifest, pass it as a stat-cache so unchanged files
752        //    skip the read+hash cycle entirely. Files that DID
753        //    change still get the full treatment, so correctness
754        //    is preserved; we just avoid the redundant work for
755        //    the (usually large) majority.
756        let new_tree = match existing_manifest.as_ref() {
757            Some(m) => {
758                self.build_tree_profiled_with_stat_cache_against(root, baseline_tree.as_ref(), m)?
759                    .0
760            }
761            None => {
762                self.build_tree_profiled_against(root, baseline_tree.as_ref())?
763                    .0
764            }
765        };
766        let new_tree_hash = self.store().put_tree(&new_tree)?;
767
768        // 2. Content-hash no-op (slow path equivalent of the
769        //    stat-cache check above). Hits when stat fields drifted
770        //    via `touch` or atime updates even though the bytes
771        //    didn't change — refresh the manifest's stat fields so
772        //    the next call hits the fast path.
773        if existing_manifest
774            .as_ref()
775            .map(|m| m.tree_hash == new_tree_hash)
776            .unwrap_or(false)
777        {
778            let mut refreshed = existing_manifest.expect("checked Some above");
779            refreshed.files.clear();
780            populate_manifest_from_tree(self, &new_tree, root, "", &mut refreshed.files)?;
781            write_manifest(self.heddle_dir(), thread, &refreshed).map_err(HeddleError::Io)?;
782            debug!(thread = %thread, "thread capture no-op (content-hash refresh)");
783            return Ok(ThreadCaptureOutcome::NoOp);
784        }
785
786        // 3. Real capture. Build a new state parented at the
787        //    current thread head (if any), put it, advance the
788        //    thread ref.
789        let attribution = self.get_attribution()?;
790        let thread_name = ThreadName::from(thread);
791        let parents = match self.refs().get_thread(&thread_name)? {
792            Some(prev) => vec![prev],
793            None => vec![],
794        };
795        let mut state = State::new_snapshot(new_tree_hash, parents, attribution);
796        // Auto-sign this thread-materialization capture (heddle#482) via the
797        // authored-state chokepoint, the same as the primary capture path — it
798        // is a real author capture that bypasses `stage_snapshot_objects`. Last
799        // mutation before the write.
800        self.put_authored_state(&mut state)?;
801        self.refs().set_thread(&thread_name, &state.change_id)?;
802
803        // 4. Rewrite the manifest to reflect the new state. `root` is
804        //    the worktree being captured from — record its canonical
805        //    path so the next snapshot can tell whether it's running
806        //    inside this same worktree.
807        let mut manifest = ThreadManifest::new(
808            state.change_id,
809            new_tree_hash,
810            canonical_worktree_path(root),
811        );
812        populate_manifest_from_tree(self, &new_tree, root, "", &mut manifest.files)?;
813        write_manifest(self.heddle_dir(), thread, &manifest).map_err(HeddleError::Io)?;
814
815        debug!(
816            thread = %thread,
817            new_state = %state.change_id,
818            files = manifest.files.len(),
819            "thread captured"
820        );
821        Ok(ThreadCaptureOutcome::Captured {
822            state_id: state.change_id,
823        })
824    }
825}
826
827/// Recursive helper: for each tree entry under `rel_prefix` inside
828/// the materialized `dest`, walk the captured tree (NOT the disk —
829/// we trust what we just put there) and stat the corresponding file
830/// to fill in the manifest's identity fields.
831///
832/// Using the captured tree as the walk basis is what lets a
833/// manifest entry survive `rm -rf .` later: the file may have
834/// disappeared but we still record what *should* be there per the
835/// captured state. Capture-from-disk decides what to do about
836/// missing files at its own scan time.
837/// Plain-text placeholder a holder sees instead of an under-tier state's
838/// tracked content on their own checkout. ASCII-only, mirrors the redaction
839/// `stub_text` shape. Never travels off-host.
840fn courtesy_stub_text(tier: &VisibilityTier, embargo_until: Option<DateTime<Utc>>) -> String {
841    let mut out = String::with_capacity(256);
842    out.push_str("# Heddle withheld this state's content from your audience.\n");
843    out.push_str(&format!("# visibility-tier: {}\n", tier.as_str()));
844    if let VisibilityTier::TeamScoped { team_id } = tier {
845        out.push_str(&format!("# team:            {team_id}\n"));
846    }
847    if let VisibilityTier::Restricted { scope_label } | VisibilityTier::Private { scope_label } =
848        tier
849    {
850        out.push_str(&format!("# scope:           {scope_label}\n"));
851    }
852    match embargo_until {
853        Some(when) => out.push_str(&format!("# promotes-at:     {}\n", when.to_rfc3339())),
854        None => out.push_str("# promotes-at:     (no scheduled promotion)\n"),
855    }
856    out.push_str("# This placeholder is a local courtesy; the bytes are not in this checkout.\n");
857    out
858}
859
860/// Collect every blob/symlink leaf path (worktree-relative, forward-slash
861/// joined) reachable from `tree` into `out`. Used by the checkout reconcile
862/// step to enumerate the tracked content a tier serves (the `keep` set on the
863/// visible path) or withholds (the `must_remove` set on the withheld path),
864/// without touching disk — the path set is derived purely from the tree.
865fn collect_tree_leaf_paths(
866    repo: &Repository,
867    tree: &Tree,
868    rel_prefix: &str,
869    out: &mut BTreeSet<String>,
870) -> Result<()> {
871    use objects::object::EntryType;
872    for entry in tree.entries() {
873        let rel_path = if rel_prefix.is_empty() {
874            entry.name().to_string()
875        } else {
876            format!("{rel_prefix}/{}", entry.name())
877        };
878        match entry.entry_type() {
879            EntryType::Tree => {
880                let Some(tree_hash) = entry.tree_hash() else {
881                    continue;
882                };
883                let subtree = repo.store().get_tree(&tree_hash)?.ok_or_else(|| {
884                    HeddleError::Config(format!(
885                        "subtree {} missing while collecting leaf paths for {rel_path}",
886                        tree_hash
887                    ))
888                })?;
889                collect_tree_leaf_paths(repo, &subtree, &rel_path, out)?;
890            }
891            EntryType::Blob | EntryType::Symlink | EntryType::Gitlink => {
892                out.insert(rel_path);
893            }
894            // Native child-spool edge: not a worktree leaf, so it has no
895            // materialized path to collect.
896            EntryType::Spoollink => {}
897        }
898    }
899    Ok(())
900}
901
902pub(crate) fn populate_manifest_from_tree(
903    repo: &Repository,
904    tree: &Tree,
905    dest: &Path,
906    rel_prefix: &str,
907    out: &mut BTreeMap<String, ManifestFile>,
908) -> Result<()> {
909    use objects::object::EntryType;
910    for entry in tree.entries() {
911        let rel_path = if rel_prefix.is_empty() {
912            entry.name().to_string()
913        } else {
914            format!("{rel_prefix}/{}", entry.name())
915        };
916        match entry.entry_type() {
917            EntryType::Tree => {
918                let Some(tree_hash) = entry.tree_hash() else {
919                    continue;
920                };
921                let subtree = repo.store().get_tree(&tree_hash)?.ok_or_else(|| {
922                    HeddleError::Config(format!(
923                        "subtree {} missing while populating manifest for {rel_path}",
924                        tree_hash
925                    ))
926                })?;
927                populate_manifest_from_tree(repo, &subtree, dest, &rel_path, out)?;
928            }
929            EntryType::Blob | EntryType::Symlink => {
930                let on_disk = dest.join(&rel_path);
931                let meta = match fs::symlink_metadata(&on_disk) {
932                    Ok(m) => m,
933                    Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
934                        // The materializer didn't put it there. That
935                        // shouldn't happen on a clean materialize,
936                        // but if it does we skip the entry so the
937                        // manifest stays a reflection of disk truth.
938                        debug!(
939                            path = %rel_path,
940                            "manifest population skipped missing file"
941                        );
942                        continue;
943                    }
944                    Err(e) => return Err(HeddleError::Io(e)),
945                };
946                let (size, inode, mtime_ns, ctime_ns, mode) =
947                    crate::stat_signature::stat_signature(&on_disk, &meta);
948                out.insert(
949                    rel_path,
950                    ManifestFile {
951                        hash: entry.require_content_hash(),
952                        size,
953                        inode,
954                        mtime_ns,
955                        ctime_ns,
956                        mode,
957                    },
958                );
959            }
960            EntryType::Gitlink => {}
961            // Native child-spool edge: nothing materialized to disk.
962            EntryType::Spoollink => {}
963        }
964    }
965    Ok(())
966}
967
968/// Record the manifest's worktree-path field as an *absolute*,
969/// symlink-resolved path. `Repository::snapshot` compares its
970/// `self.root` (also canonicalized) to this value to decide whether
971/// it's running inside the materialized worktree; without
972/// canonicalization a `/tmp/foo` materialize + `/private/tmp/foo`
973/// snapshot would miss the match on macOS.
974///
975/// Falls back to the input path on canonicalize failure — the
976/// comparison may produce a false miss in pathological cases, which
977/// degrades the cache to "always rebuild" instead of corrupting the
978/// manifest. Strictly worse perf, never worse correctness.
979pub(crate) fn canonical_worktree_path(path: &Path) -> PathBuf {
980    fs::canonicalize(path).unwrap_or_else(|_| path.to_path_buf())
981}
982
983/// Stat-cache fast no-op check. Returns `true` when the on-disk
984/// worktree is byte-identical to what `manifest` describes — every
985/// manifest file present at its recorded `(inode, mtime, ctime,
986/// mode)`, no untracked files, no deletions.
987///
988/// Pattern: same as git's index `assume-unchanged` fast path. The
989/// stat fields are populated by `populate_manifest_from_tree` at
990/// materialise time; clonefile/copy operations preserve the
991/// destination's inode for the lifetime of the file, so a single
992/// `stat` per file is sufficient to detect any modification.
993///
994/// Performance: ~5 ms for a 643-file worktree (single `stat` per
995/// file + B-tree lookup). The slow path (`build_tree`) reads and
996/// hashes every file, ~100s of ms for the same fixture.
997///
998/// Returns `Ok(false)` on ANY uncertainty — a stat call failed, a
999/// file in the manifest is missing, an untracked file showed up,
1000/// or any single field mismatched. Callers fall through to the
1001/// slow `build_tree` path, which is always correct.
1002/// Walk the captured tree named by `manifest.tree_hash` and collect
1003/// every subdirectory's relative path (forward-slash joined,
1004/// relative to the tree root, no leading or trailing slashes).
1005/// Source of truth for [`stat_cache_no_op`]'s directory leg —
1006/// includes tree-only empty directories that a `manifest.files`
1007/// ancestors-derived set would miss.
1008fn collect_expected_dirs(
1009    repo: &Repository,
1010    manifest: &ThreadManifest,
1011) -> Result<std::collections::HashSet<String>> {
1012    use std::collections::HashSet;
1013    let mut set: HashSet<String> = HashSet::new();
1014    let Some(tree) = repo.store().get_tree(&manifest.tree_hash)? else {
1015        // Tree missing from the store would be a serious anomaly —
1016        // surface it so the caller bails to the slow path which will
1017        // re-derive everything from the worktree.
1018        return Err(HeddleError::Config(format!(
1019            "tree {} referenced by manifest is missing",
1020            manifest.tree_hash
1021        )));
1022    };
1023    collect_subdirs_into(repo, &tree, "", &mut set)?;
1024    Ok(set)
1025}
1026
1027fn collect_subdirs_into(
1028    repo: &Repository,
1029    tree: &objects::object::Tree,
1030    rel_prefix: &str,
1031    out: &mut std::collections::HashSet<String>,
1032) -> Result<()> {
1033    use objects::object::EntryType;
1034    for entry in tree.entries() {
1035        if entry.entry_type() != EntryType::Tree {
1036            continue;
1037        }
1038        let rel = if rel_prefix.is_empty() {
1039            entry.name().to_string()
1040        } else {
1041            format!("{rel_prefix}/{}", entry.name())
1042        };
1043        let Some(tree_hash) = entry.tree_hash() else {
1044            continue;
1045        };
1046        let subtree = repo.store().get_tree(&tree_hash)?.ok_or_else(|| {
1047            HeddleError::Config(format!(
1048                "subtree {} missing while collecting expected dirs at {rel}",
1049                tree_hash
1050            ))
1051        })?;
1052        out.insert(rel.clone());
1053        collect_subdirs_into(repo, &subtree, &rel, out)?;
1054    }
1055    Ok(())
1056}
1057
1058/// Recursive `read_dir` worker for the stat-cache no-op predicate.
1059/// Returns `Ok(false)` to bail to the slow path (anything unexpected,
1060/// any stat mismatch); `Ok(true)` to continue the walk. Final
1061/// presence checks (`seen.len() == manifest.files.len()` etc.) live
1062/// in the caller; this fn only flags incremental mismatches.
1063///
1064/// Why hand-roll rather than reuse `ignore::WalkBuilder`: the walker
1065/// crate buffers entries, sorts them for determinism, calls
1066/// `metadata()` to populate its own `DirEntry`, and runs the gitignore
1067/// pipeline per directory even with every `git_*` flag turned off.
1068/// All of that is wasted on this predicate, which already has its own
1069/// `WorktreeIgnoreMatcher` and only needs `symlink_metadata` on each
1070/// file. A bare `read_dir` recursion is ≈3× faster on the 10k-file
1071/// fixture and matches `build_tree`'s ignore semantics exactly
1072/// because we go through the same matcher.
1073fn walk_for_no_op(
1074    root: &Path,
1075    cur: &Path,
1076    manifest: &ThreadManifest,
1077    expected_dirs: &std::collections::HashSet<String>,
1078    ignore_matcher: &crate::worktree_ignore::WorktreeIgnoreMatcher,
1079    seen: &mut std::collections::HashSet<String>,
1080    seen_dirs: &mut std::collections::HashSet<String>,
1081) -> Result<bool> {
1082    let entries = match fs::read_dir(cur) {
1083        Ok(it) => it,
1084        // A directory we can't read means we've lost certainty about
1085        // its contents — fall through to the slow path.
1086        Err(_) => return Ok(false),
1087    };
1088    for entry in entries {
1089        let entry = match entry {
1090            Ok(e) => e,
1091            Err(_) => return Ok(false),
1092        };
1093        let path = entry.path();
1094        let Ok(rel) = path.strip_prefix(root) else {
1095            return Ok(false);
1096        };
1097        let rel_str = rel.to_string_lossy().into_owned();
1098        let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
1099            return Ok(false);
1100        };
1101
1102        // Run the ignore matcher *first*, before consulting the
1103        // manifest. The previous "manifest-first" dispatch
1104        // accepted any manifest hit without re-checking the
1105        // matcher, which silently false-passed if the user had
1106        // tightened `.heddleignore` (or the in-config ignore set)
1107        // between materialise and this capture — `build_tree`
1108        // would now exclude the previously-tracked path and
1109        // produce a different tree, but the predicate said
1110        // "no-op". Always running the matcher first costs a
1111        // pattern check per entry but is what makes the
1112        // predicate's output match what `build_tree` would do.
1113        //
1114        // Three outcomes from the matcher:
1115        //   * Pruned + in manifest → ignore-config drift; bail
1116        //     to slow path so the new tree reflects the new
1117        //     exclusion.
1118        //   * Pruned + not in manifest → genuinely ignored;
1119        //     silently skip without recursing.
1120        //   * Not pruned → standard manifest / new-entry
1121        //     dispatch below.
1122        // `should_prune_directory_child` matches the production
1123        // walker's per-entry probe (`worktree_walk.rs`). It calls
1124        // `matched_relative(path, is_dir=true)` so gitignore rules
1125        // with trailing `/` still fire, and the same patterns
1126        // exclude both file and directory entries — same behaviour
1127        // `build_tree` would observe at materialise time.
1128        let pruned = ignore_matcher.should_prune_absolute_path(&path)
1129            || ignore_matcher.should_prune_directory_child(cur, name);
1130        if pruned {
1131            if manifest.files.contains_key(&rel_str) {
1132                // The matcher now wants this path excluded, but
1133                // it's in the manifest from materialise time.
1134                // Ignore-config drift — let the slow path
1135                // rebuild the tree without it.
1136                return Ok(false);
1137            }
1138            continue;
1139        }
1140
1141        // Not pruned. Manifest lookup is the fast path for
1142        // tracked files; un-tracked entries fall through to
1143        // dir-recursion / new-file detection below.
1144        if let Some(manifest_entry) = manifest.files.get(&rel_str) {
1145            // `symlink_metadata` (not `metadata`) so a symlink
1146            // doesn't transparently follow into the target's
1147            // inode.
1148            let meta = match fs::symlink_metadata(&path) {
1149                Ok(m) => m,
1150                Err(_) => return Ok(false),
1151            };
1152            let (size, inode, mtime_ns, ctime_ns, mode) =
1153                crate::stat_signature::stat_signature(&path, &meta);
1154            let stat = ManifestFile {
1155                hash: manifest_entry.hash,
1156                size,
1157                inode,
1158                mtime_ns,
1159                ctime_ns,
1160                mode,
1161            };
1162            if !stat.matches(manifest_entry) {
1163                return Ok(false);
1164            }
1165            seen.insert(rel_str);
1166            continue;
1167        }
1168
1169        let file_type = match entry.file_type() {
1170            Ok(ft) => ft,
1171            Err(_) => return Ok(false),
1172        };
1173        if file_type.is_dir() {
1174            // Directory leg: any directory not in `expected_dirs`
1175            // is an addition since materialise. Bail; the slow
1176            // path will incorporate it.
1177            if !expected_dirs.contains(&rel_str) {
1178                return Ok(false);
1179            }
1180            seen_dirs.insert(rel_str);
1181            if !walk_for_no_op(
1182                root,
1183                &path,
1184                manifest,
1185                expected_dirs,
1186                ignore_matcher,
1187                seen,
1188                seen_dirs,
1189            )? {
1190                return Ok(false);
1191            }
1192            continue;
1193        }
1194
1195        // A non-ignored, non-directory entry that's not in the
1196        // manifest is a new file. Bail to the slow path which
1197        // will rebuild the tree with the new entry.
1198        return Ok(false);
1199    }
1200    Ok(true)
1201}
1202
1203fn stat_cache_no_op(repo: &Repository, manifest: &ThreadManifest, root: &Path) -> Result<bool> {
1204    use std::collections::HashSet;
1205
1206    let ignore_patterns = repo.ignore_patterns()?;
1207    let nested_exclusions = repo.nested_thread_worktree_exclusions(root)?;
1208    let ignore_matcher = crate::worktree_ignore::WorktreeIgnoreMatcher::new(&ignore_patterns)
1209        .with_nested_worktree_exclusions(nested_exclusions);
1210
1211    // Manifests only record files+symlinks, but Heddle's tree
1212    // builder materialises empty directories as their own tree
1213    // entries. So a no-op predicate that only checks `manifest.files`
1214    // would miss "user added or removed an empty directory" —
1215    // `seen.len() == manifest.files.len()` is still true on the file
1216    // side, but the on-disk tree no longer matches what `build_tree`
1217    // would produce.
1218    //
1219    // Source of truth for the expected directory set is the captured
1220    // tree itself (the one the manifest's `tree_hash` names), not
1221    // the manifest's file ancestors. Two reasons:
1222    //
1223    //   1. *Tree-only empty directories.* A `Tree` entry with no
1224    //      files beneath it is invisible from a `manifest.files`
1225    //      ancestors-walk — the file set is empty, so every
1226    //      ancestor it would contribute is missing. Removing a
1227    //      legit empty leaf dir would still false-pass.
1228    //   2. *Future schema drift.* Files in `manifest.files` may
1229    //      use slash-normalised relative paths that don't exactly
1230    //      match how `Tree::entries` names subdirs on every
1231    //      platform; walking the tree directly avoids the
1232    //      double-encoding hazard.
1233    //
1234    // Cost is ~one `get_tree` per subdir of the captured tree.
1235    // For the typical thread (a few hundred dirs) that's a small
1236    // number of memory-mapped object reads; on the predicate's
1237    // hot path it's bounded by the tree's directory fan-out, not
1238    // file count.
1239    let expected_dirs: HashSet<String> = match collect_expected_dirs(repo, manifest) {
1240        Ok(s) => s,
1241        // Any error walking the tree → conservatively bail to the
1242        // slow path. `Ok(false)` keeps correctness; the worst case
1243        // is a wasted full rebuild.
1244        Err(_) => return Ok(false),
1245    };
1246
1247    // Walk the worktree. For every file we see, check it against the
1248    // manifest. Track which manifest paths we've actually seen so we
1249    // can detect deletions afterwards.
1250    //
1251    // Custom `read_dir` recursion instead of `ignore::WalkBuilder`:
1252    // the walker crate is fast on its own but the per-entry overhead
1253    // adds up at 10k+ files (it buffers, sorts, double-stats, and
1254    // re-applies the ignore stack for every dir). For this hot
1255    // predicate we only need: a `readdir` per directory, one
1256    // `symlink_metadata` per file, and the same ignore-matcher
1257    // check `build_tree` runs. The std-only recursion below
1258    // measured ≈3× faster on the 10k-file fixture (no per-entry
1259    // double-stat, no buffer churn, fewer allocations).
1260    let mut seen: HashSet<String> = HashSet::with_capacity(manifest.files.len());
1261    let mut seen_dirs: HashSet<String> = HashSet::with_capacity(expected_dirs.len());
1262    if !walk_for_no_op(
1263        root,
1264        root,
1265        manifest,
1266        &expected_dirs,
1267        &ignore_matcher,
1268        &mut seen,
1269        &mut seen_dirs,
1270    )? {
1271        return Ok(false);
1272    }
1273
1274    // Final pass: every manifest entry must have been seen (file
1275    // deletion check) and every manifest-implied directory must
1276    // have been seen (directory deletion check). The dir-side
1277    // check catches `rmdir` of an empty directory that was part
1278    // of the materialised tree — its files are also gone (so the
1279    // file side already declines) but if it had no files to begin
1280    // with the file side alone would false-pass.
1281    if seen.len() != manifest.files.len() {
1282        return Ok(false);
1283    }
1284    if seen_dirs.len() != expected_dirs.len() {
1285        return Ok(false);
1286    }
1287    Ok(true)
1288}
1289
1290#[cfg(test)]
1291mod tests {
1292    use objects::{
1293        object::{Blob, TreeEntry},
1294        util::gitlink_placeholder_bytes,
1295    };
1296    use sley::{ObjectFormat as GitObjectFormat, ObjectId as GitObjectId};
1297    use tempfile::TempDir;
1298
1299    use super::*;
1300    use crate::thread_manifest::read_manifest;
1301
1302    fn gitlink_target_for_tests() -> GitObjectId {
1303        GitObjectId::from_hex(
1304            GitObjectFormat::Sha1,
1305            "1234567890abcdef1234567890abcdef12345678",
1306        )
1307        .unwrap()
1308    }
1309
1310    fn seeded_repo() -> (TempDir, Repository) {
1311        let repo_dir = TempDir::new().unwrap();
1312        let repo = Repository::init_default(repo_dir.path()).unwrap();
1313        fs::write(repo_dir.path().join("file.txt"), b"tracked\n").unwrap();
1314        repo.snapshot(Some("seed".into()), None).unwrap();
1315        (repo_dir, repo)
1316    }
1317
1318    #[test]
1319    fn capture_thread_from_disk_preserves_unchanged_gitlink_when_sibling_changes() {
1320        let repo_dir = TempDir::new().unwrap();
1321        let repo = Repository::init_default(repo_dir.path()).unwrap();
1322        let target = gitlink_target_for_tests();
1323        let note_hash = repo
1324            .store()
1325            .put_blob(&Blob::new(b"before\n".to_vec()))
1326            .unwrap();
1327        let tree = Tree::from_entries(vec![
1328            TreeEntry::file("note.txt", note_hash, false).unwrap(),
1329            TreeEntry::gitlink("vendor", target).unwrap(),
1330        ]);
1331        repo.snapshot_tree_with_attribution_profiled(
1332            tree,
1333            Some("gitlink thread baseline".to_string()),
1334            None,
1335            repo.get_attribution().unwrap(),
1336        )
1337        .unwrap();
1338
1339        let dest = repo_dir.path().join("thread-out");
1340        repo.materialize_thread("main", &dest, &AudienceTier::Internal)
1341            .unwrap();
1342        assert_eq!(
1343            fs::read(dest.join("vendor")).unwrap(),
1344            gitlink_placeholder_bytes(&target)
1345        );
1346
1347        fs::write(dest.join("note.txt"), b"after\n").unwrap();
1348        let outcome = repo.capture_thread_from_disk("main", &dest).unwrap();
1349        let state_id = match outcome {
1350            ThreadCaptureOutcome::Captured { state_id } => state_id,
1351            ThreadCaptureOutcome::NoOp => panic!("sibling edit must capture a new state"),
1352        };
1353        let state = repo
1354            .store()
1355            .get_state(&state_id)
1356            .unwrap()
1357            .expect("captured state");
1358        let captured_tree = repo
1359            .store()
1360            .get_tree(&state.tree)
1361            .unwrap()
1362            .expect("captured tree");
1363
1364        assert_eq!(
1365            captured_tree
1366                .get("vendor")
1367                .expect("vendor gitlink")
1368                .gitlink_target(),
1369            Some(target)
1370        );
1371        let note_hash = captured_tree
1372            .get("note.txt")
1373            .expect("note entry")
1374            .blob_hash()
1375            .expect("note blob");
1376        let note = repo
1377            .store()
1378            .get_blob(&note_hash)
1379            .unwrap()
1380            .expect("note blob");
1381        assert_eq!(note.content(), b"after\n");
1382    }
1383
1384    #[test]
1385    fn materialize_thread_writes_manifest_with_files() {
1386        let repo_dir = TempDir::new().unwrap();
1387        let repo = Repository::init_default(repo_dir.path()).unwrap();
1388        // Build a small worktree to capture.
1389        fs::write(repo_dir.path().join("Cargo.toml"), b"# a\n").unwrap();
1390        fs::create_dir_all(repo_dir.path().join("src")).unwrap();
1391        fs::write(repo_dir.path().join("src/lib.rs"), b"fn main() {}\n").unwrap();
1392        repo.snapshot(Some("seed".into()), None).unwrap();
1393
1394        let dest = TempDir::new().unwrap();
1395        let manifest = repo
1396            .materialize_thread("main", &dest.path().join("out"), &AudienceTier::Internal)
1397            .unwrap();
1398
1399        assert_eq!(
1400            manifest.schema_version,
1401            crate::thread_manifest::SCHEMA_VERSION
1402        );
1403        // Three files: Cargo.toml, src/lib.rs, plus whatever
1404        // init_default seeded — only assert the ones we wrote
1405        // exist and have plausible stat fields.
1406        let cargo = manifest
1407            .files
1408            .get("Cargo.toml")
1409            .expect("Cargo.toml in manifest");
1410        assert_ne!(cargo.inode, 0);
1411        assert_ne!(cargo.mtime_ns, 0);
1412        let src = manifest
1413            .files
1414            .get("src/lib.rs")
1415            .expect("src/lib.rs in manifest");
1416        assert_ne!(src.inode, 0);
1417
1418        // Manifest persisted to disk.
1419        let loaded = read_manifest(repo.heddle_dir(), "main")
1420            .unwrap()
1421            .expect("manifest on disk");
1422        assert_eq!(loaded.files.len(), manifest.files.len());
1423        assert_eq!(
1424            loaded.files["Cargo.toml"].inode,
1425            manifest.files["Cargo.toml"].inode
1426        );
1427    }
1428
1429    #[test]
1430    fn materialize_thread_creates_absent_target() {
1431        let (_repo_dir, repo) = seeded_repo();
1432        let dest_holder = TempDir::new().unwrap();
1433        let dest = dest_holder.path().join("out");
1434
1435        repo.materialize_thread("main", &dest, &AudienceTier::Internal)
1436            .unwrap();
1437
1438        assert!(dest.is_dir());
1439        assert_eq!(
1440            fs::read_to_string(dest.join("file.txt")).unwrap(),
1441            "tracked\n"
1442        );
1443    }
1444
1445    #[test]
1446    fn materialize_thread_adopts_empty_directory() {
1447        let (_repo_dir, repo) = seeded_repo();
1448        let dest_holder = TempDir::new().unwrap();
1449        let dest = dest_holder.path().join("out");
1450        fs::create_dir(&dest).unwrap();
1451
1452        repo.materialize_thread("main", &dest, &AudienceTier::Internal)
1453            .unwrap();
1454
1455        assert!(dest.is_dir());
1456        assert_eq!(
1457            fs::read_to_string(dest.join("file.txt")).unwrap(),
1458            "tracked\n"
1459        );
1460    }
1461
1462    #[test]
1463    fn materialize_thread_rejects_non_empty_directory() {
1464        let (_repo_dir, repo) = seeded_repo();
1465        let dest_holder = TempDir::new().unwrap();
1466        let dest = dest_holder.path().join("out");
1467        fs::create_dir(&dest).unwrap();
1468        fs::write(dest.join("existing.txt"), b"user data\n").unwrap();
1469
1470        let err = repo
1471            .materialize_thread("main", &dest, &AudienceTier::Internal)
1472            .unwrap_err();
1473
1474        assert!(err.to_string().contains("is not empty"), "{err}");
1475        assert_eq!(
1476            fs::read_to_string(dest.join("existing.txt")).unwrap(),
1477            "user data\n"
1478        );
1479        assert!(!dest.join("file.txt").exists());
1480    }
1481
1482    #[cfg(unix)]
1483    #[test]
1484    fn materialize_thread_rejects_symlink_target() {
1485        let (_repo_dir, repo) = seeded_repo();
1486        let dest_holder = TempDir::new().unwrap();
1487        let real = dest_holder.path().join("real");
1488        fs::create_dir(&real).unwrap();
1489        let dest = dest_holder.path().join("link");
1490        std::os::unix::fs::symlink(&real, &dest).unwrap();
1491
1492        let err = repo
1493            .materialize_thread("main", &dest, &AudienceTier::Internal)
1494            .unwrap_err();
1495
1496        assert!(err.to_string().contains("cannot be a symlink"), "{err}");
1497        assert!(!real.join("file.txt").exists());
1498    }
1499
1500    #[test]
1501    fn materialize_thread_rejects_file_target() {
1502        let (_repo_dir, repo) = seeded_repo();
1503        let dest_holder = TempDir::new().unwrap();
1504        let dest = dest_holder.path().join("file");
1505        fs::write(&dest, b"user data\n").unwrap();
1506
1507        let err = repo
1508            .materialize_thread("main", &dest, &AudienceTier::Internal)
1509            .unwrap_err();
1510
1511        assert!(err.to_string().contains("must be a directory"), "{err}");
1512        assert_eq!(fs::read_to_string(&dest).unwrap(), "user data\n");
1513    }
1514
1515    fn embargo_state_with_tier(repo: &Repository, tier: VisibilityTier) -> ChangeId {
1516        use chrono::Utc;
1517        use objects::object::{Principal, StateVisibility};
1518        let state_id = repo
1519            .refs()
1520            .get_thread(&ThreadName::new("main"))
1521            .unwrap()
1522            .expect("head present");
1523        repo.put_state_visibility(StateVisibility {
1524            state: state_id,
1525            tier,
1526            embargo_until: None,
1527            declarer: Principal {
1528                name: "Grace Hopper".into(),
1529                email: "grace@example.com".into(),
1530            },
1531            declared_at: Utc::now(),
1532            signature: None,
1533            supersedes: None,
1534        })
1535        .expect("put visibility");
1536        state_id
1537    }
1538
1539    fn checkout_main(
1540        repo: &Repository,
1541        dest: &Path,
1542        audience: &AudienceTier,
1543    ) -> CheckoutMaterialization {
1544        let change_id = repo
1545            .refs()
1546            .resolve("main")
1547            .unwrap()
1548            .expect("main thread exists");
1549        let state = repo
1550            .store()
1551            .get_state(&change_id)
1552            .unwrap()
1553            .expect("main state exists");
1554        repo.checkout_state_gated(&change_id, &state, dest, audience)
1555            .unwrap()
1556    }
1557
1558    #[test]
1559    fn checkout_renders_courtesy_stub_when_state_is_under_tier_for_audience() {
1560        let repo_dir = TempDir::new().unwrap();
1561        let repo = Repository::init_default(repo_dir.path()).unwrap();
1562        fs::write(repo_dir.path().join("secret.rs"), b"fn exploit() {}\n").unwrap();
1563        repo.snapshot(Some("embargoed fix".into()), None).unwrap();
1564        embargo_state_with_tier(
1565            &repo,
1566            VisibilityTier::Private {
1567                scope_label: "sec-embargo".into(),
1568            },
1569        );
1570
1571        let dest_holder = TempDir::new().unwrap();
1572        let dest = dest_holder.path().join("out");
1573        // A Private state is withheld even from the all-seeing Internal
1574        // operator — the placeholder appears, the tracked bytes do not.
1575        let manifest = repo
1576            .materialize_thread("main", &dest, &AudienceTier::Internal)
1577            .unwrap();
1578
1579        assert!(
1580            dest.join(COURTESY_STUB_FILENAME).exists(),
1581            "courtesy placeholder must be written for an under-tier checkout"
1582        );
1583        assert!(
1584            !dest.join("secret.rs").exists(),
1585            "the tracked content must NOT be materialized for an under-tier audience"
1586        );
1587        assert!(
1588            manifest.files.is_empty(),
1589            "manifest must record no tracked files for a stubbed checkout"
1590        );
1591        let stub = fs::read_to_string(dest.join(COURTESY_STUB_FILENAME)).unwrap();
1592        assert!(stub.contains("private"));
1593        assert!(stub.contains("sec-embargo"));
1594    }
1595
1596    #[test]
1597    fn checkout_materializes_real_content_for_the_authorized_audience() {
1598        let repo_dir = TempDir::new().unwrap();
1599        let repo = Repository::init_default(repo_dir.path()).unwrap();
1600        fs::write(repo_dir.path().join("secret.rs"), b"fn exploit() {}\n").unwrap();
1601        repo.snapshot(Some("embargoed fix".into()), None).unwrap();
1602        embargo_state_with_tier(
1603            &repo,
1604            VisibilityTier::Private {
1605                scope_label: "sec-embargo".into(),
1606            },
1607        );
1608
1609        let dest_holder = TempDir::new().unwrap();
1610        let dest = dest_holder.path().join("out");
1611        // The holder of the matching restricted scope sees the real bytes.
1612        let manifest = repo
1613            .materialize_thread(
1614                "main",
1615                &dest,
1616                &AudienceTier::Restricted("sec-embargo".into()),
1617            )
1618            .unwrap();
1619
1620        assert!(dest.join("secret.rs").exists());
1621        assert!(!dest.join(COURTESY_STUB_FILENAME).exists());
1622        assert!(manifest.files.contains_key("secret.rs"));
1623    }
1624
1625    /// #316 / PR #528 r6: a worktree root first materialized under-tier (stub
1626    /// written) and later re-materialized for an authorized audience must end up
1627    /// with a clean tree — the real bytes present AND the stale courtesy stub
1628    /// removed. `materialize_tree` only writes tracked leaves, so without an
1629    /// explicit removal the stub would linger on disk after the visible path.
1630    #[test]
1631    fn authorized_rematerialize_removes_stale_embargo_stub() {
1632        let repo_dir = TempDir::new().unwrap();
1633        let repo = Repository::init_default(repo_dir.path()).unwrap();
1634        fs::write(repo_dir.path().join("secret.rs"), b"fn exploit() {}\n").unwrap();
1635        repo.snapshot(Some("embargoed fix".into()), None).unwrap();
1636        embargo_state_with_tier(
1637            &repo,
1638            VisibilityTier::Private {
1639                scope_label: "sec-embargo".into(),
1640            },
1641        );
1642
1643        let dest_holder = TempDir::new().unwrap();
1644        let dest = dest_holder.path().join("out");
1645
1646        // First: under-tier materialize of the root → only the stub lands.
1647        checkout_main(&repo, &dest, &AudienceTier::Internal);
1648        assert!(
1649            dest.join(COURTESY_STUB_FILENAME).exists(),
1650            "under-tier materialize must write the stub"
1651        );
1652        assert!(!dest.join("secret.rs").exists());
1653
1654        // Then: re-materialize the SAME root for an authorized audience.
1655        checkout_main(
1656            &repo,
1657            &dest,
1658            &AudienceTier::Restricted("sec-embargo".into()),
1659        );
1660
1661        assert!(
1662            dest.join("secret.rs").exists(),
1663            "authorized re-materialize must write the real tree"
1664        );
1665        assert!(
1666            !dest.join(COURTESY_STUB_FILENAME).exists(),
1667            "the stale courtesy stub must be removed on the authorized re-materialize"
1668        );
1669    }
1670
1671    /// #316 / PR #528 r7 CLASS 1 (the leak): a root first materialized for an
1672    /// AUTHORIZED audience (real tree on disk) and then re-materialized
1673    /// UNDER-TIER must end up holding ONLY the courtesy stub — none of the prior
1674    /// visible tree's tracked bytes may remain next to the stub, or the checkout
1675    /// still contains exactly the content the gate is supposed to withhold. The
1676    /// reconcile step removes the prior tracked leaves (including nested ones)
1677    /// and prunes the directories they leave empty.
1678    #[test]
1679    fn visible_then_withheld_root_has_only_stub() {
1680        let repo_dir = TempDir::new().unwrap();
1681        let repo = Repository::init_default(repo_dir.path()).unwrap();
1682        fs::write(repo_dir.path().join("secret.rs"), b"fn exploit() {}\n").unwrap();
1683        fs::create_dir_all(repo_dir.path().join("nested")).unwrap();
1684        fs::write(repo_dir.path().join("nested/inner.rs"), b"fn inner() {}\n").unwrap();
1685        repo.snapshot(Some("embargoed fix".into()), None).unwrap();
1686        embargo_state_with_tier(
1687            &repo,
1688            VisibilityTier::Private {
1689                scope_label: "sec-embargo".into(),
1690            },
1691        );
1692
1693        let dest_holder = TempDir::new().unwrap();
1694        let dest = dest_holder.path().join("out");
1695
1696        // Visible materialize: the real tree lands — the very bytes a later
1697        // under-tier materialize must withhold.
1698        checkout_main(
1699            &repo,
1700            &dest,
1701            &AudienceTier::Restricted("sec-embargo".into()),
1702        );
1703        assert!(dest.join("secret.rs").exists());
1704        assert!(dest.join("nested/inner.rs").exists());
1705
1706        // Under-tier re-materialize of the SAME root — the leak case.
1707        checkout_main(&repo, &dest, &AudienceTier::Internal);
1708
1709        assert!(
1710            dest.join(COURTESY_STUB_FILENAME).exists(),
1711            "withheld checkout must hold the courtesy stub"
1712        );
1713        assert!(
1714            !dest.join("secret.rs").exists(),
1715            "the prior visible tree's bytes must NOT remain next to the stub"
1716        );
1717        assert!(
1718            !dest.join("nested/inner.rs").exists(),
1719            "nested tracked leaves must be removed too"
1720        );
1721        // ONLY the stub remains: every prior tracked leaf — and the now-empty
1722        // directories they lived in — are gone.
1723        let remaining: Vec<_> = fs::read_dir(&dest)
1724            .unwrap()
1725            .map(|e| e.unwrap().file_name())
1726            .collect();
1727        assert_eq!(
1728            remaining.len(),
1729            1,
1730            "withheld root must contain only the courtesy stub, got {remaining:?}"
1731        );
1732        assert_eq!(remaining[0].to_str().unwrap(), COURTESY_STUB_FILENAME);
1733    }
1734
1735    /// #316 / PR #528 r7 CLASS 1 (r6 transition, as a matrix member): a root
1736    /// first materialized UNDER-TIER (stub) and then re-materialized for an
1737    /// AUTHORIZED audience must hold the real tree and NO stale stub.
1738    #[test]
1739    fn withheld_then_visible_root_has_real_tree_no_stub() {
1740        let repo_dir = TempDir::new().unwrap();
1741        let repo = Repository::init_default(repo_dir.path()).unwrap();
1742        fs::write(repo_dir.path().join("secret.rs"), b"fn exploit() {}\n").unwrap();
1743        repo.snapshot(Some("embargoed fix".into()), None).unwrap();
1744        embargo_state_with_tier(
1745            &repo,
1746            VisibilityTier::Private {
1747                scope_label: "sec-embargo".into(),
1748            },
1749        );
1750
1751        let dest_holder = TempDir::new().unwrap();
1752        let dest = dest_holder.path().join("out");
1753
1754        checkout_main(&repo, &dest, &AudienceTier::Internal);
1755        assert!(dest.join(COURTESY_STUB_FILENAME).exists());
1756        assert!(!dest.join("secret.rs").exists());
1757
1758        checkout_main(
1759            &repo,
1760            &dest,
1761            &AudienceTier::Restricted("sec-embargo".into()),
1762        );
1763        assert!(
1764            dest.join("secret.rs").exists(),
1765            "authorized re-materialize must write the real tree"
1766        );
1767        assert!(
1768            !dest.join(COURTESY_STUB_FILENAME).exists(),
1769            "the stale courtesy stub must be removed on the authorized re-materialize"
1770        );
1771    }
1772
1773    /// #316 / PR #528 r7 CLASS 1 (visible→visible): re-materializing a root at a
1774    /// NEW visible tree must leave exactly that tree — a leaf dropped from the
1775    /// new tree must not linger from the prior materialize. `materialize_tree`
1776    /// writes the new leaves but does not remove a now-absent prior leaf; the
1777    /// reconcile step closes that gap.
1778    #[test]
1779    fn visible_then_visible_refreshes_tree() {
1780        let repo_dir = TempDir::new().unwrap();
1781        let repo = Repository::init_default(repo_dir.path()).unwrap();
1782        fs::write(repo_dir.path().join("keep.rs"), b"keep\n").unwrap();
1783        fs::write(repo_dir.path().join("stale.rs"), b"stale\n").unwrap();
1784        repo.snapshot(Some("seed".into()), None).unwrap();
1785
1786        let dest_holder = TempDir::new().unwrap();
1787        let dest = dest_holder.path().join("out");
1788        checkout_main(&repo, &dest, &AudienceTier::Internal);
1789        assert!(dest.join("keep.rs").exists());
1790        assert!(dest.join("stale.rs").exists());
1791
1792        // Advance the thread head in the MAIN repo (snapshot walks repo.root,
1793        // not `dest`, so the dest manifest's worktree_path stays = dest and is
1794        // NOT refreshed here): drop stale.rs, add fresh.rs.
1795        fs::remove_file(repo_dir.path().join("stale.rs")).unwrap();
1796        fs::write(repo_dir.path().join("fresh.rs"), b"fresh\n").unwrap();
1797        repo.snapshot(Some("advance".into()), None).unwrap();
1798
1799        // Re-materialize the SAME root at the new (still visible) head.
1800        checkout_main(&repo, &dest, &AudienceTier::Internal);
1801        assert!(dest.join("keep.rs").exists(), "an unchanged leaf stays");
1802        assert!(dest.join("fresh.rs").exists(), "the new leaf is written");
1803        assert!(
1804            !dest.join("stale.rs").exists(),
1805            "a leaf dropped from the new tree must not linger from the prior materialize"
1806        );
1807        assert!(
1808            !dest.join(COURTESY_STUB_FILENAME).exists(),
1809            "a visible re-materialize writes no stub"
1810        );
1811    }
1812
1813    /// #316 / PR #528 r7 CLASS 1 (withheld→withheld): two under-tier
1814    /// materializes of the same root leave only the stub each time, and capture
1815    /// stays a no-op.
1816    #[test]
1817    fn withheld_then_withheld_stays_withheld() {
1818        let repo_dir = TempDir::new().unwrap();
1819        let repo = Repository::init_default(repo_dir.path()).unwrap();
1820        fs::write(repo_dir.path().join("secret.rs"), b"fn exploit() {}\n").unwrap();
1821        repo.snapshot(Some("embargoed fix".into()), None).unwrap();
1822        embargo_state_with_tier(
1823            &repo,
1824            VisibilityTier::Private {
1825                scope_label: "sec-embargo".into(),
1826            },
1827        );
1828
1829        let dest_holder = TempDir::new().unwrap();
1830        let dest = dest_holder.path().join("out");
1831
1832        checkout_main(&repo, &dest, &AudienceTier::Internal);
1833        assert!(dest.join(COURTESY_STUB_FILENAME).exists());
1834        assert!(!dest.join("secret.rs").exists());
1835
1836        // Second under-tier checkout of the same root: still only the stub.
1837        checkout_main(&repo, &dest, &AudienceTier::Internal);
1838        let remaining: Vec<_> = fs::read_dir(&dest)
1839            .unwrap()
1840            .map(|e| e.unwrap().file_name())
1841            .collect();
1842        assert_eq!(
1843            remaining.len(),
1844            1,
1845            "withheld root must contain only the courtesy stub, got {remaining:?}"
1846        );
1847        assert_eq!(remaining[0].to_str().unwrap(), COURTESY_STUB_FILENAME);
1848        assert!(!dest.join("secret.rs").exists());
1849
1850        // Capture of the still-withheld root is a no-op.
1851        let outcome = repo.capture_thread_from_disk("main", &dest).unwrap();
1852        assert_eq!(outcome, ThreadCaptureOutcome::NoOp);
1853    }
1854
1855    /// #316 / PR #528 r9 FINDING A: the withheld marker (and `.leaves` record)
1856    /// must be keyed on the root `capture_thread_from_disk` resolves at
1857    /// READ-time, not on a pre-materialization path. `canonical_worktree_path`
1858    /// falls back to its raw input when the path does not yet resolve, so a dest
1859    /// reached THROUGH a symlink whose leaf does not exist yet canonicalizes to
1860    /// the un-resolved `link/out` before the dir is made but to the resolved
1861    /// `real/out` after. Pre-fix the marker was written under `link/out` while
1862    /// capture looked it up under `real/out` → marker missed → a withheld
1863    /// checkout captured as a stub-only tree instead of no-oping.
1864    #[cfg(unix)]
1865    #[test]
1866    fn withheld_marker_keyed_on_canonical_root_for_relative_dest() {
1867        let repo_dir = TempDir::new().unwrap();
1868        let repo = Repository::init_default(repo_dir.path()).unwrap();
1869        fs::write(repo_dir.path().join("secret.rs"), b"fn exploit() {}\n").unwrap();
1870        repo.snapshot(Some("embargoed fix".into()), None).unwrap();
1871        embargo_state_with_tier(
1872            &repo,
1873            VisibilityTier::Private {
1874                scope_label: "sec-embargo".into(),
1875            },
1876        );
1877
1878        // `dest` travels through a symlink to a not-yet-existing leaf, so a
1879        // canonicalize BEFORE the dir is created resolves differently (falls
1880        // back to `link/out`) than one AFTER (`real/out`).
1881        let dest_holder = TempDir::new().unwrap();
1882        let real = dest_holder.path().join("real");
1883        fs::create_dir_all(&real).unwrap();
1884        std::os::unix::fs::symlink(&real, dest_holder.path().join("link")).unwrap();
1885        let dest = dest_holder.path().join("link").join("out");
1886
1887        repo.materialize_thread("main", &dest, &AudienceTier::Internal)
1888            .unwrap();
1889        assert!(dest.join(COURTESY_STUB_FILENAME).exists());
1890        assert!(!dest.join("secret.rs").exists());
1891
1892        // Capture through the symlinked path must be a NO-OP: the marker was
1893        // keyed on the same canonical root (`real/out`) capture resolves.
1894        let outcome = repo.capture_thread_from_disk("main", &dest).unwrap();
1895        assert_eq!(
1896            outcome,
1897            ThreadCaptureOutcome::NoOp,
1898            "withheld checkout reached via a symlinked path must not be capturable"
1899        );
1900    }
1901
1902    /// #316 / PR #528 r8 HOLE 1: the withheld reduction must NOT depend on the
1903    /// clobberable per-thread `manifest.toml`. A root first materialized VISIBLE
1904    /// (holding `old-secret.txt`), THEN observed while a sibling worktree of the
1905    /// SAME thread is materialized (the event that clobbers the per-thread
1906    /// manifest, retargeting it at the sibling's root), THEN re-materialized
1907    /// WITHHELD against a LATER state whose tree no longer contains
1908    /// `old-secret.txt`, must still end up holding ONLY the courtesy stub. The
1909    /// secret is in NEITHER the withheld state's own tree NOR (post-clobber) the
1910    /// per-thread manifest — only the clobber-proof per-root record names it, so
1911    /// the reduction can only succeed by sourcing that record.
1912    #[test]
1913    fn withheld_reduction_survives_sibling_manifest_clobber() {
1914        let repo_dir = TempDir::new().unwrap();
1915        let repo = Repository::init_default(repo_dir.path()).unwrap();
1916
1917        // State S1 (visible): contains the secret that must not linger later.
1918        fs::write(repo_dir.path().join("old-secret.txt"), b"launch codes\n").unwrap();
1919        repo.snapshot(Some("seed with secret".into()), None)
1920            .unwrap();
1921
1922        // Root A materialized VISIBLE at S1 — the real bytes land on disk and the
1923        // clobber-proof per-root record for A captures `old-secret.txt`.
1924        let a_holder = TempDir::new().unwrap();
1925        let root_a = a_holder.path().join("root-a");
1926        checkout_main(&repo, &root_a, &AudienceTier::Internal);
1927        assert!(root_a.join("old-secret.txt").exists());
1928
1929        // Advance the thread to S2: the secret is REMOVED before this state, a
1930        // new tracked file replaces it. So `old-secret.txt` is absent from S2's
1931        // tree entirely.
1932        fs::remove_file(repo_dir.path().join("old-secret.txt")).unwrap();
1933        fs::write(repo_dir.path().join("kept.txt"), b"benign\n").unwrap();
1934        repo.snapshot(Some("drop secret, advance".into()), None)
1935            .unwrap();
1936        embargo_state_with_tier(
1937            &repo,
1938            VisibilityTier::Private {
1939                scope_label: "sec-embargo".into(),
1940            },
1941        );
1942
1943        // A sibling worktree B of the SAME thread is materialized (authorized, at
1944        // S2). `materialize_thread` rewrites `threads/main/manifest.toml` keyed by
1945        // thread name, so this CLOBBERS A's record there — `manifest_for_worktree_root(A)`
1946        // now resolves to B, the precise race that reopened the leak in r7.
1947        let b_holder = TempDir::new().unwrap();
1948        let root_b = b_holder.path().join("root-b");
1949        repo.materialize_thread(
1950            "main",
1951            &root_b,
1952            &AudienceTier::Restricted("sec-embargo".into()),
1953        )
1954        .unwrap();
1955        assert!(root_b.join("kept.txt").exists());
1956        // Confirm the clobber really happened: the per-thread manifest no longer
1957        // records root A.
1958        assert!(
1959            crate::thread_manifest::manifest_for_worktree_root(
1960                repo.heddle_dir(),
1961                &canonical_worktree_path(&root_a),
1962            )
1963            .unwrap()
1964            .is_none(),
1965            "sibling materialize must have clobbered A's per-thread manifest record"
1966        );
1967
1968        // Re-materialize root A WITHHELD (Internal can't see S2's Private tier).
1969        // S2's tree does not contain `old-secret.txt`, and the per-thread
1970        // manifest no longer names A — only the clobber-proof per-root record can
1971        // drive its removal.
1972        checkout_main(&repo, &root_a, &AudienceTier::Internal);
1973
1974        assert!(
1975            root_a.join(COURTESY_STUB_FILENAME).exists(),
1976            "withheld checkout must hold the courtesy stub"
1977        );
1978        assert!(
1979            !root_a.join("old-secret.txt").exists(),
1980            "the prior visible tree's secret must be GONE even though the per-thread manifest was clobbered"
1981        );
1982        let remaining: Vec<_> = fs::read_dir(&root_a)
1983            .unwrap()
1984            .map(|e| e.unwrap().file_name())
1985            .collect();
1986        assert_eq!(
1987            remaining.len(),
1988            1,
1989            "withheld root must contain only the courtesy stub, got {remaining:?}"
1990        );
1991        assert_eq!(remaining[0].to_str().unwrap(), COURTESY_STUB_FILENAME);
1992    }
1993
1994    /// #316 / PR #528 r9 FINDING 4: close the per-root `.leaves`-staleness CLASS.
1995    /// `capture_thread_from_disk` rewrites `manifest.toml` but used to leave the
1996    /// clobber-proof per-root `.leaves` record untouched, so a captured-but-
1997    /// later-withheld leaf leaked. Sequence: a visible checkout holding `{a}`;
1998    /// the user adds `b` and captures (head advances, `.leaves` MUST refresh to
1999    /// `{a, b}`); the thread then advances to a state whose tree drops `b` and is
2000    /// embargoed; re-materializing the SAME root WITHHELD against that state must
2001    /// leave ONLY the stub — `b` (on disk from the capture) must be GONE, not
2002    /// leaked next to the stub. The withheld state's own tree lacks `b`, so only
2003    /// a `.leaves` record the capture refreshed can drive `b`'s removal.
2004    #[test]
2005    fn capture_refreshes_materialized_leaves() {
2006        let repo_dir = TempDir::new().unwrap();
2007        let repo = Repository::init_default(repo_dir.path()).unwrap();
2008
2009        // S1 (visible): tracked `a.txt`.
2010        fs::write(repo_dir.path().join("a.txt"), b"alpha\n").unwrap();
2011        repo.snapshot(Some("seed a".into()), None).unwrap();
2012
2013        // Materialize root R visible (Internal) at S1 → disk {a.txt},
2014        // .leaves(R) = {a.txt}.
2015        let holder = TempDir::new().unwrap();
2016        let root = holder.path().join("root");
2017        checkout_main(&repo, &root, &AudienceTier::Internal);
2018        assert!(root.join("a.txt").exists());
2019
2020        // User adds `b.txt` in R and captures → head advances to S2 = {a, b}.
2021        // The capture MUST refresh the per-root `.leaves` record to include
2022        // `b.txt` (the class-fix: capture rewrites the manifest AND `.leaves`).
2023        fs::write(root.join("b.txt"), b"beta\n").unwrap();
2024        match repo.capture_thread_from_disk("main", &root).unwrap() {
2025            ThreadCaptureOutcome::Captured { .. } => {}
2026            ThreadCaptureOutcome::NoOp => panic!("adding b.txt must produce a real capture"),
2027        }
2028        let leaves = crate::thread_manifest::read_materialized_leaves(
2029            repo.heddle_dir(),
2030            &canonical_worktree_path(&root),
2031        )
2032        .unwrap()
2033        .expect("capture must have written a per-root leaves record");
2034        assert!(
2035            leaves.contains("a.txt") && leaves.contains("b.txt"),
2036            "capture must refresh the per-root .leaves record to the captured tree's leaves, got {leaves:?}"
2037        );
2038
2039        // Advance the thread to S3 whose tree LACKS b.txt: snapshot from the main
2040        // repo dir (which only holds a.txt and is NOT the materialized worktree,
2041        // so the manifest is not refreshed here), then embargo S3 Private.
2042        fs::write(repo_dir.path().join("a.txt"), b"alpha v2\n").unwrap();
2043        repo.snapshot(Some("drop b, advance".into()), None).unwrap();
2044        embargo_state_with_tier(
2045            &repo,
2046            VisibilityTier::Private {
2047                scope_label: "sec-embargo".into(),
2048            },
2049        );
2050
2051        // Re-materialize R WITHHELD (Internal under-tier for the Private S3). S3's
2052        // own tree has no b.txt, so the withheld reduction can only remove the
2053        // capture-added b.txt by sourcing the refreshed per-root record.
2054        checkout_main(&repo, &root, &AudienceTier::Internal);
2055
2056        assert!(
2057            root.join(COURTESY_STUB_FILENAME).exists(),
2058            "withheld checkout must hold the courtesy stub"
2059        );
2060        assert!(
2061            !root.join("b.txt").exists(),
2062            "the capture-added leaf must be removed by the withheld reduction, not leaked next to the stub"
2063        );
2064        let remaining: Vec<_> = fs::read_dir(&root)
2065            .unwrap()
2066            .map(|e| e.unwrap().file_name())
2067            .collect();
2068        assert_eq!(
2069            remaining.len(),
2070            1,
2071            "withheld root must contain only the courtesy stub, got {remaining:?}"
2072        );
2073        assert_eq!(remaining[0].to_str().unwrap(), COURTESY_STUB_FILENAME);
2074    }
2075
2076    /// #316 / PR #528 r3 Finding 1: materializing an under-tier checkout writes
2077    /// the courtesy stub and marks the manifest `withheld`. A subsequent
2078    /// capture of that checkout must be a NO-OP — it must NOT pull the stub in
2079    /// as tracked content, and (crucially) must NOT commit an empty tree that
2080    /// wipes the withheld state's real files. The thread head stays put.
2081    #[test]
2082    fn capture_skips_embargo_courtesy_stub() {
2083        let repo_dir = TempDir::new().unwrap();
2084        let repo = Repository::init_default(repo_dir.path()).unwrap();
2085        fs::write(repo_dir.path().join("secret.rs"), b"fn exploit() {}\n").unwrap();
2086        repo.snapshot(Some("embargoed fix".into()), None).unwrap();
2087        embargo_state_with_tier(
2088            &repo,
2089            VisibilityTier::Private {
2090                scope_label: "sec-embargo".into(),
2091            },
2092        );
2093
2094        let dest_holder = TempDir::new().unwrap();
2095        let dest = dest_holder.path().join("out");
2096        // Under-tier audience → only the stub lands; no real bytes, empty files.
2097        let manifest = repo
2098            .materialize_thread("main", &dest, &AudienceTier::Internal)
2099            .unwrap();
2100        assert!(
2101            dest.join(COURTESY_STUB_FILENAME).exists(),
2102            "stub must be written for the under-tier checkout"
2103        );
2104        assert!(
2105            manifest.files.is_empty(),
2106            "no tracked files in a stub checkout"
2107        );
2108        assert!(
2109            manifest.withheld,
2110            "manifest must mark the checkout withheld"
2111        );
2112
2113        let head_before = repo
2114            .refs()
2115            .get_thread(&ThreadName::new("main"))
2116            .unwrap()
2117            .expect("head");
2118
2119        // Capture the withheld checkout.
2120        let outcome = repo.capture_thread_from_disk("main", &dest).unwrap();
2121        assert_eq!(
2122            outcome,
2123            ThreadCaptureOutcome::NoOp,
2124            "a withheld checkout is non-capturable"
2125        );
2126
2127        // Thread head must not have moved.
2128        let head_after = repo
2129            .refs()
2130            .get_thread(&ThreadName::new("main"))
2131            .unwrap()
2132            .expect("head");
2133        assert_eq!(
2134            head_before, head_after,
2135            "withheld capture must not advance the thread head"
2136        );
2137
2138        // The thread's tree is still the real embargoed tree: it contains the
2139        // withheld content and NOT the courtesy stub.
2140        let head_state = repo.store().get_state(&head_after).unwrap().unwrap();
2141        let tree = repo.store().get_tree(&head_state.tree).unwrap().unwrap();
2142        assert!(
2143            !tree
2144                .entries()
2145                .iter()
2146                .any(|e| e.name() == COURTESY_STUB_FILENAME),
2147            "captured tree must never contain the courtesy stub"
2148        );
2149        assert!(
2150            tree.entries().iter().any(|e| e.name() == "secret.rs"),
2151            "the withheld real content must remain intact in the thread"
2152        );
2153    }
2154
2155    /// #316 / PR #528 r4: the withheld status must be scoped per *worktree
2156    /// root*, not per thread. When one thread is materialized into TWO
2157    /// worktrees — an authorized one A (real bytes) and an under-tier one B
2158    /// (withheld stub) — the under-tier materialize of B clobbers the single
2159    /// per-thread `manifest.toml`. A withheld flag stored there would then
2160    /// wrongly suppress a capture of A, silently dropping legitimate work.
2161    /// With the per-worktree marker, A captures its real edits and B no-ops.
2162    #[test]
2163    fn withheld_manifest_is_per_worktree_not_per_thread() {
2164        let repo_dir = TempDir::new().unwrap();
2165        let repo = Repository::init_default(repo_dir.path()).unwrap();
2166        fs::write(repo_dir.path().join("secret.rs"), b"fn exploit() {}\n").unwrap();
2167        repo.snapshot(Some("embargoed fix".into()), None).unwrap();
2168        embargo_state_with_tier(
2169            &repo,
2170            VisibilityTier::Private {
2171                scope_label: "sec-embargo".into(),
2172            },
2173        );
2174
2175        let holder_a = TempDir::new().unwrap();
2176        let worktree_a = holder_a.path().join("authorized");
2177        let holder_b = TempDir::new().unwrap();
2178        let worktree_b = holder_b.path().join("under-tier");
2179
2180        // Worktree A: the matching-scope holder gets the real bytes.
2181        let manifest_a = repo
2182            .materialize_thread(
2183                "main",
2184                &worktree_a,
2185                &AudienceTier::Restricted("sec-embargo".into()),
2186            )
2187            .unwrap();
2188        assert!(worktree_a.join("secret.rs").exists());
2189        assert!(manifest_a.files.contains_key("secret.rs"));
2190
2191        // Edit A so a correct capture produces a NEW state. Without the edit,
2192        // capturing unchanged real content is a *legitimate* no-op and wouldn't
2193        // distinguish the bug (wrong withheld-suppression) from correct
2194        // behaviour.
2195        fs::write(worktree_a.join("extra.rs"), b"fn added() {}\n").unwrap();
2196
2197        let head_before = repo
2198            .refs()
2199            .get_thread(&ThreadName::new("main"))
2200            .unwrap()
2201            .expect("head");
2202
2203        // Worktree B: under-tier audience → stub only, withheld. This clobbers
2204        // the single per-thread `manifest.toml` with B's withheld record.
2205        let manifest_b = repo
2206            .materialize_thread("main", &worktree_b, &AudienceTier::Internal)
2207            .unwrap();
2208        assert!(worktree_b.join(COURTESY_STUB_FILENAME).exists());
2209        assert!(manifest_b.files.is_empty());
2210
2211        // Capture A: must capture the real edit — its withheld status is its
2212        // own (none), NOT inherited from B's clobbering materialize.
2213        let outcome_a = repo.capture_thread_from_disk("main", &worktree_a).unwrap();
2214        let captured_state = match outcome_a {
2215            ThreadCaptureOutcome::Captured { state_id } => state_id,
2216            ThreadCaptureOutcome::NoOp => {
2217                panic!("authorized worktree A must capture its real edit, not be suppressed")
2218            }
2219        };
2220        let head_after_a = repo
2221            .refs()
2222            .get_thread(&ThreadName::new("main"))
2223            .unwrap()
2224            .expect("head");
2225        assert_ne!(head_before, head_after_a, "capture A must advance the head");
2226        assert_eq!(head_after_a, captured_state);
2227        // The captured tree carries the edit and the real content, never the stub.
2228        let captured_tree = repo
2229            .store()
2230            .get_tree(
2231                &repo
2232                    .store()
2233                    .get_state(&captured_state)
2234                    .unwrap()
2235                    .unwrap()
2236                    .tree,
2237            )
2238            .unwrap()
2239            .unwrap();
2240        assert!(
2241            captured_tree
2242                .entries()
2243                .iter()
2244                .any(|e| e.name() == "extra.rs")
2245        );
2246        assert!(
2247            captured_tree
2248                .entries()
2249                .iter()
2250                .any(|e| e.name() == "secret.rs")
2251        );
2252        assert!(
2253            !captured_tree
2254                .entries()
2255                .iter()
2256                .any(|e| e.name() == COURTESY_STUB_FILENAME)
2257        );
2258
2259        // Capture B: must be a no-op — its own worktree is withheld.
2260        let outcome_b = repo.capture_thread_from_disk("main", &worktree_b).unwrap();
2261        assert_eq!(
2262            outcome_b,
2263            ThreadCaptureOutcome::NoOp,
2264            "under-tier worktree B is non-capturable"
2265        );
2266        let head_after_b = repo
2267            .refs()
2268            .get_thread(&ThreadName::new("main"))
2269            .unwrap()
2270            .expect("head");
2271        assert_eq!(
2272            head_after_a, head_after_b,
2273            "withheld capture of B must not advance the head"
2274        );
2275    }
2276
2277    /// `record_thread_manifest` should write a manifest sidecar that
2278    /// matches what `materialize_thread` would have produced, for a
2279    /// worktree the caller materialized directly via `materialize_tree`.
2280    /// Used by the CLI's `start` path (which sets the worktree up
2281    /// itself rather than going through `materialize_thread`).
2282    #[test]
2283    fn record_thread_manifest_writes_sidecar_for_externally_materialized_worktree() {
2284        let repo_dir = TempDir::new().unwrap();
2285        let repo = Repository::init_default(repo_dir.path()).unwrap();
2286        fs::write(repo_dir.path().join("a.txt"), b"alpha\n").unwrap();
2287        fs::write(repo_dir.path().join("b.txt"), b"beta\n").unwrap();
2288        repo.snapshot(Some("seed".into()), None).unwrap();
2289        let state_id = repo
2290            .refs()
2291            .get_thread(&ThreadName::new("main"))
2292            .unwrap()
2293            .expect("head present");
2294
2295        // Materialize externally via the lower-level `materialize_tree`
2296        // path — the shape `start --workspace materialized` uses.
2297        let dest_holder = TempDir::new().unwrap();
2298        let dest = dest_holder.path().join("out");
2299        let state = repo.store().get_state(&state_id).unwrap().unwrap();
2300        let tree = repo.store().get_tree(&state.tree).unwrap().unwrap();
2301        repo.materialize_tree(&tree, &dest).unwrap();
2302
2303        // No manifest written yet — `materialize_tree` is the bytes-only
2304        // step; the sidecar is recorded explicitly.
2305        assert!(
2306            read_manifest(repo.heddle_dir(), "feature/x")
2307                .unwrap()
2308                .is_none()
2309        );
2310
2311        let recorded = repo
2312            .record_thread_manifest("feature/x", &state_id, &dest)
2313            .unwrap();
2314        assert_eq!(recorded.state_id, state_id);
2315        assert_eq!(recorded.tree_hash, state.tree);
2316        assert!(recorded.files.contains_key("a.txt"));
2317        assert!(recorded.files.contains_key("b.txt"));
2318        assert_eq!(recorded.files["a.txt"].size, b"alpha\n".len() as u64);
2319
2320        // Sidecar persists at the expected location and round-trips.
2321        let loaded = read_manifest(repo.heddle_dir(), "feature/x")
2322            .unwrap()
2323            .expect("manifest on disk");
2324        assert_eq!(loaded.state_id, recorded.state_id);
2325        assert_eq!(loaded.files.len(), recorded.files.len());
2326
2327        // Idempotent: a second recording for the same thread succeeds
2328        // (used by `capture_thread_from_disk` post-capture refresh).
2329        repo.record_thread_manifest("feature/x", &state_id, &dest)
2330            .unwrap();
2331    }
2332
2333    /// `record_thread_manifest` against an unknown `state_id` should
2334    /// surface a clear "state missing" error instead of silently
2335    /// writing a manifest with no files (which would later look like
2336    /// a deletion of every tracked path).
2337    #[test]
2338    fn record_thread_manifest_errors_when_state_is_missing() {
2339        let repo_dir = TempDir::new().unwrap();
2340        let repo = Repository::init_default(repo_dir.path()).unwrap();
2341        let dest = TempDir::new().unwrap();
2342        let missing = objects::object::ChangeId::generate();
2343        let err = repo
2344            .record_thread_manifest("feature/x", &missing, &dest.path().join("out"))
2345            .expect_err("should fail when state is unknown");
2346        let message = format!("{err}");
2347        assert!(
2348            message.contains("missing"),
2349            "error message names the missing artifact: {message}"
2350        );
2351    }
2352
2353    #[test]
2354    fn materialize_unknown_thread_errors() {
2355        let repo_dir = TempDir::new().unwrap();
2356        let repo = Repository::init_default(repo_dir.path()).unwrap();
2357        let dest = TempDir::new().unwrap();
2358        let err = repo
2359            .materialize_thread(
2360                "no-such-thread",
2361                &dest.path().join("out"),
2362                &AudienceTier::Internal,
2363            )
2364            .expect_err("should fail");
2365        assert!(format!("{err}").contains("unknown thread"));
2366    }
2367
2368    /// Round-trip: materialize → edit a file → capture → confirm a
2369    /// new state was written, thread head advanced, and the manifest
2370    /// reflects the new state.
2371    #[test]
2372    fn capture_after_edit_advances_thread() {
2373        let repo_dir = TempDir::new().unwrap();
2374        let repo = Repository::init_default(repo_dir.path()).unwrap();
2375        fs::write(repo_dir.path().join("hello.txt"), b"hello\n").unwrap();
2376        repo.snapshot(Some("seed".into()), None).unwrap();
2377        let before = repo
2378            .refs()
2379            .get_thread(&ThreadName::new("main"))
2380            .unwrap()
2381            .expect("head");
2382
2383        let dest_holder = TempDir::new().unwrap();
2384        let dest = dest_holder.path().join("out");
2385        let materialize_manifest = repo
2386            .materialize_thread("main", &dest, &AudienceTier::Internal)
2387            .unwrap();
2388
2389        // Mutate a file in the materialized worktree.
2390        fs::write(dest.join("hello.txt"), b"hello world\n").unwrap();
2391
2392        let outcome = repo
2393            .capture_thread_from_disk("main", &dest)
2394            .expect("capture");
2395        let new_state = match outcome {
2396            ThreadCaptureOutcome::Captured { state_id } => state_id,
2397            ThreadCaptureOutcome::NoOp => panic!("expected Captured, got NoOp"),
2398        };
2399
2400        // Thread head advanced.
2401        let after = repo
2402            .refs()
2403            .get_thread(&ThreadName::new("main"))
2404            .unwrap()
2405            .expect("head");
2406        assert_ne!(before, after);
2407        assert_eq!(after, new_state);
2408
2409        // Manifest reflects the new state.
2410        let loaded = read_manifest(repo.heddle_dir(), "main")
2411            .unwrap()
2412            .expect("manifest");
2413        assert_eq!(loaded.state_id, new_state);
2414        assert_ne!(loaded.tree_hash, materialize_manifest.tree_hash);
2415        assert!(loaded.files.contains_key("hello.txt"));
2416    }
2417
2418    /// Capture with no edits is a no-op: thread head unchanged,
2419    /// manifest refreshed in place.
2420    #[test]
2421    fn capture_with_no_changes_is_noop() {
2422        let repo_dir = TempDir::new().unwrap();
2423        let repo = Repository::init_default(repo_dir.path()).unwrap();
2424        fs::write(repo_dir.path().join("steady.txt"), b"unchanged\n").unwrap();
2425        repo.snapshot(Some("seed".into()), None).unwrap();
2426        let before = repo
2427            .refs()
2428            .get_thread(&ThreadName::new("main"))
2429            .unwrap()
2430            .expect("head");
2431
2432        let dest_holder = TempDir::new().unwrap();
2433        let dest = dest_holder.path().join("out");
2434        repo.materialize_thread("main", &dest, &AudienceTier::Internal)
2435            .unwrap();
2436
2437        let outcome = repo.capture_thread_from_disk("main", &dest).unwrap();
2438        assert_eq!(outcome, ThreadCaptureOutcome::NoOp);
2439
2440        // Thread head unchanged.
2441        let after = repo
2442            .refs()
2443            .get_thread(&ThreadName::new("main"))
2444            .unwrap()
2445            .expect("head");
2446        assert_eq!(before, after);
2447    }
2448
2449    /// Stat-cache fast no-op: a fresh-materialised tree captures
2450    /// without invoking `build_tree`. Detected via the manifest
2451    /// reflecting bytes byte-identical to what got materialised.
2452    #[test]
2453    fn stat_cache_short_circuits_unchanged_capture() {
2454        let repo_dir = TempDir::new().unwrap();
2455        let repo = Repository::init_default(repo_dir.path()).unwrap();
2456        for i in 0..20 {
2457            fs::write(
2458                repo_dir.path().join(format!("file_{i:02}.txt")),
2459                format!("content {i}\n").as_bytes(),
2460            )
2461            .unwrap();
2462        }
2463        repo.snapshot(Some("seed".into()), None).unwrap();
2464
2465        let dest_holder = TempDir::new().unwrap();
2466        let dest = dest_holder.path().join("out");
2467        let manifest = repo
2468            .materialize_thread("main", &dest, &AudienceTier::Internal)
2469            .unwrap();
2470        assert_eq!(manifest.files.len(), 20);
2471
2472        // The fast-path predicate alone — without touching the
2473        // store-side `build_tree`. Exposes the boundary the
2474        // optimisation guards.
2475        assert!(
2476            stat_cache_no_op(&repo, &manifest, &dest).unwrap(),
2477            "fresh materialise should stat-match the manifest"
2478        );
2479
2480        // Full call also returns NoOp.
2481        let outcome = repo.capture_thread_from_disk("main", &dest).unwrap();
2482        assert_eq!(outcome, ThreadCaptureOutcome::NoOp);
2483    }
2484
2485    /// Stat-cache invalidates correctly on edit: a single touched
2486    /// file flips `stat_cache_no_op` to `false`, which forces the
2487    /// slow path to run and produces a new state.
2488    #[test]
2489    fn stat_cache_detects_edit_and_falls_through() {
2490        let repo_dir = TempDir::new().unwrap();
2491        let repo = Repository::init_default(repo_dir.path()).unwrap();
2492        fs::write(repo_dir.path().join("only.txt"), b"v1\n").unwrap();
2493        repo.snapshot(Some("seed".into()), None).unwrap();
2494
2495        let dest_holder = TempDir::new().unwrap();
2496        let dest = dest_holder.path().join("out");
2497        let manifest = repo
2498            .materialize_thread("main", &dest, &AudienceTier::Internal)
2499            .unwrap();
2500
2501        // Sleep briefly so the mtime moves; APFS gives sub-ms
2502        // resolution on modern macOS but Linux ext4 is only
2503        // 1-second granularity for ctime — make the test robust
2504        // either way.
2505        std::thread::sleep(std::time::Duration::from_millis(20));
2506        fs::write(dest.join("only.txt"), b"v2\n").unwrap();
2507
2508        assert!(
2509            !stat_cache_no_op(&repo, &manifest, &dest).unwrap(),
2510            "edited file must invalidate the fast path"
2511        );
2512
2513        // Slow path runs and creates a new state.
2514        match repo.capture_thread_from_disk("main", &dest).unwrap() {
2515            ThreadCaptureOutcome::Captured { .. } => {}
2516            other => panic!("expected Captured, got {other:?}"),
2517        }
2518    }
2519
2520    /// New file added out of band → fast path declines.
2521    #[test]
2522    fn stat_cache_detects_added_file() {
2523        let repo_dir = TempDir::new().unwrap();
2524        let repo = Repository::init_default(repo_dir.path()).unwrap();
2525        fs::write(repo_dir.path().join("a.txt"), b"a\n").unwrap();
2526        repo.snapshot(Some("seed".into()), None).unwrap();
2527
2528        let dest_holder = TempDir::new().unwrap();
2529        let dest = dest_holder.path().join("out");
2530        let manifest = repo
2531            .materialize_thread("main", &dest, &AudienceTier::Internal)
2532            .unwrap();
2533
2534        fs::write(dest.join("b.txt"), b"b\n").unwrap();
2535
2536        assert!(
2537            !stat_cache_no_op(&repo, &manifest, &dest).unwrap(),
2538            "added file must invalidate the fast path"
2539        );
2540    }
2541
2542    /// Plain `heddle capture` (via `Repository::snapshot`) detects the
2543    /// materialized-thread context — HEAD attached to a thread that has
2544    /// a manifest — and refreshes the manifest to the new state after
2545    /// the capture lands. This is the path the user hits when they edit
2546    /// inside a materialized thread worktree and run `heddle capture`
2547    /// directly (as opposed to `thread switch`, which is the auto-capture
2548    /// path covered by `capture_after_edit_advances_thread`).
2549    #[test]
2550    fn snapshot_in_materialized_thread_refreshes_manifest() {
2551        let repo_dir = TempDir::new().unwrap();
2552        let repo = Repository::init_default(repo_dir.path()).unwrap();
2553        fs::write(repo_dir.path().join("alpha.txt"), b"v1\n").unwrap();
2554        fs::write(repo_dir.path().join("beta.txt"), b"steady\n").unwrap();
2555        let initial = repo.snapshot(Some("seed".into()), None).unwrap();
2556
2557        // Stand up a manifest for `main` whose stat fields match the
2558        // worktree as it is right now. Mimics the post-materialize
2559        // state when the user is `cd`'d into the materialized
2560        // worktree (`self.root` == materialized path).
2561        let initial_tree = repo
2562            .store()
2563            .get_tree(&initial.tree)
2564            .unwrap()
2565            .expect("seed tree");
2566        let mut manifest = crate::thread_manifest::ThreadManifest::new(
2567            initial.change_id,
2568            initial.tree,
2569            canonical_worktree_path(repo_dir.path()),
2570        );
2571        populate_manifest_from_tree(
2572            &repo,
2573            &initial_tree,
2574            repo_dir.path(),
2575            "",
2576            &mut manifest.files,
2577        )
2578        .unwrap();
2579        crate::thread_manifest::write_manifest(repo.heddle_dir(), "main", &manifest).unwrap();
2580
2581        // Sleep long enough that the new mtime is observably distinct
2582        // on ext4's 1-second-granularity ctime (APFS is sub-ms).
2583        std::thread::sleep(std::time::Duration::from_millis(20));
2584        fs::write(repo_dir.path().join("alpha.txt"), b"v2\n").unwrap();
2585
2586        let captured = repo.snapshot(Some("after edit".into()), None).unwrap();
2587        assert_ne!(captured.change_id, initial.change_id);
2588        assert_ne!(captured.tree, initial.tree);
2589
2590        // Manifest got refreshed to point at the new state and tree.
2591        let refreshed = crate::thread_manifest::read_manifest(repo.heddle_dir(), "main")
2592            .unwrap()
2593            .expect("manifest persists");
2594        assert_eq!(refreshed.state_id, captured.change_id);
2595        assert_eq!(refreshed.tree_hash, captured.tree);
2596        // beta.txt was untouched — its stat fields (and hash) should
2597        // still appear in the refreshed manifest.
2598        assert!(refreshed.files.contains_key("alpha.txt"));
2599        assert!(refreshed.files.contains_key("beta.txt"));
2600    }
2601
2602    /// Regression: snapshot from a directory that is NOT the
2603    /// manifest's recorded worktree path must NOT refresh the
2604    /// manifest. Pre-fix, the snapshot code detected the
2605    /// "materialized-thread context" purely by `HEAD attached + a
2606    /// manifest exists for the attached thread", so a snapshot from
2607    /// the main repo dir (or any sibling worktree) would corrupt the
2608    /// manifest by writing the wrong directory's stat fields into it
2609    /// — and `heddle status` would then falsely report the
2610    /// materialized worktree as fresh because the manifest's
2611    /// `state_id` had auto-rolled forward.
2612    #[test]
2613    fn snapshot_outside_materialized_worktree_does_not_refresh_manifest() {
2614        let repo_dir = TempDir::new().unwrap();
2615        let repo = Repository::init_default(repo_dir.path()).unwrap();
2616        fs::write(repo_dir.path().join("alpha.txt"), b"v1\n").unwrap();
2617        repo.snapshot(Some("seed".into()), None).unwrap();
2618
2619        // Materialize "main" at a totally separate path. Manifest
2620        // records `dest_holder/out` as the worktree.
2621        let dest_holder = TempDir::new().unwrap();
2622        let dest = dest_holder.path().join("out");
2623        let materialize_manifest = repo
2624            .materialize_thread("main", &dest, &AudienceTier::Internal)
2625            .unwrap();
2626        let materialize_state_id = materialize_manifest.state_id;
2627        let materialize_tree_hash = materialize_manifest.tree_hash;
2628        let materialized_path = materialize_manifest.worktree_path.clone();
2629        assert_eq!(
2630            materialized_path,
2631            canonical_worktree_path(&dest),
2632            "manifest must record the canonical materialize destination"
2633        );
2634
2635        // Now run snapshot from the MAIN repo dir (`repo.root()`) —
2636        // a path that is NOT the materialized worktree. The pre-fix
2637        // bug fired here.
2638        std::thread::sleep(std::time::Duration::from_millis(20));
2639        fs::write(repo_dir.path().join("alpha.txt"), b"v2-from-main-repo\n").unwrap();
2640        let snap = repo
2641            .snapshot(Some("from main repo, not the mat worktree".into()), None)
2642            .unwrap();
2643        assert_ne!(
2644            snap.change_id, materialize_state_id,
2645            "snapshot must advance main's head"
2646        );
2647
2648        // The manifest must NOT have been refreshed: state_id and
2649        // tree_hash still point at the materialize state, worktree
2650        // path still points at `dest`.
2651        let after = crate::thread_manifest::read_manifest(repo.heddle_dir(), "main")
2652            .unwrap()
2653            .expect("manifest still present");
2654        assert_eq!(
2655            after.state_id, materialize_state_id,
2656            "manifest state_id must NOT advance when snapshot is taken outside the materialized worktree"
2657        );
2658        assert_eq!(
2659            after.tree_hash, materialize_tree_hash,
2660            "manifest tree_hash must NOT advance"
2661        );
2662        assert_eq!(
2663            after.worktree_path, materialized_path,
2664            "manifest worktree_path must be unchanged"
2665        );
2666
2667        // And `heddle status`'s staleness check should now correctly
2668        // report the materialized worktree as stale (head moved,
2669        // manifest didn't).
2670        let head_now = repo
2671            .refs()
2672            .get_thread(&ThreadName::new("main"))
2673            .unwrap()
2674            .expect("head");
2675        assert_ne!(
2676            head_now, after.state_id,
2677            "post-fix invariant: main head advanced past manifest's recorded state → stale"
2678        );
2679    }
2680
2681    /// Capture from a *dedicated* thread worktree (one whose path
2682    /// differs from `repo.root()`) must validate symlinks against
2683    /// that worktree's path, not against the main repo root.
2684    /// Pre-fix the walker passed `repo.root()` as the symlink-
2685    /// escape base, so every symlink inside a dedicated thread
2686    /// path was rejected as "outside the repo" the moment the
2687    /// slow path ran — `thread switch` auto-capture broke for any
2688    /// thread that contained a symlink. Reproduces the codex P2
2689    /// from review pass 2.
2690    #[cfg(unix)]
2691    #[test]
2692    fn capture_thread_from_disk_accepts_symlinks_in_dedicated_worktree() {
2693        let repo_dir = TempDir::new().unwrap();
2694        let repo = Repository::init_default(repo_dir.path()).unwrap();
2695        // Seed with a file + a symlink pointing inside the repo.
2696        fs::write(repo_dir.path().join("target.txt"), b"target\n").unwrap();
2697        std::os::unix::fs::symlink("target.txt", repo_dir.path().join("link")).unwrap();
2698        repo.snapshot(Some("seed".into()), None).unwrap();
2699
2700        // Materialise into a dedicated worktree — path differs
2701        // from `repo.root()`, which is exactly the case that
2702        // exposes the bug.
2703        let dest_holder = TempDir::new().unwrap();
2704        let dest = dest_holder.path().join("thread-worktree");
2705        repo.materialize_thread("main", &dest, &AudienceTier::Internal)
2706            .unwrap();
2707
2708        // Edit a non-symlink file so the slow path fires (the fast
2709        // stat-cache no-op would mask the bug). Sleep so the mtime
2710        // observably moves on coarse-granularity filesystems.
2711        std::thread::sleep(std::time::Duration::from_millis(20));
2712        fs::write(dest.join("target.txt"), b"target v2\n").unwrap();
2713
2714        // Pre-fix this errored with "symlink target escapes repo"
2715        // because `validate_symlink_target` was using `repo.root()`
2716        // as the allowed base instead of the walk root.
2717        let outcome = repo
2718            .capture_thread_from_disk("main", &dest)
2719            .expect("capture must accept symlinks inside the dedicated worktree");
2720        match outcome {
2721            ThreadCaptureOutcome::Captured { .. } => {}
2722            ThreadCaptureOutcome::NoOp => panic!("expected Captured; got NoOp"),
2723        }
2724    }
2725
2726    /// Codex pass-5 P1: when the ignore set tightens between
2727    /// materialise and capture (e.g. user adds an entry to
2728    /// `.heddleignore` covering an already-tracked path), the
2729    /// no-op predicate must bail to the slow path so `build_tree`
2730    /// can produce the tree that *now* matches the matcher. Pre-
2731    /// fix the manifest-first dispatch accepted any manifest hit
2732    /// without re-running the matcher, so the predicate silently
2733    /// false-passed and `thread switch`'s auto-capture missed
2734    /// the real tree delta.
2735    #[test]
2736    fn stat_cache_detects_ignore_config_tightening() {
2737        let repo_dir = TempDir::new().unwrap();
2738        let repo = Repository::init_default(repo_dir.path()).unwrap();
2739        // Seed: two files, no .heddleignore yet.
2740        fs::write(repo_dir.path().join("keep.txt"), b"keep\n").unwrap();
2741        fs::write(repo_dir.path().join("secret.txt"), b"secret\n").unwrap();
2742        repo.snapshot(Some("seed".into()), None).unwrap();
2743
2744        let dest_holder = TempDir::new().unwrap();
2745        let dest = dest_holder.path().join("out");
2746        let manifest = repo
2747            .materialize_thread("main", &dest, &AudienceTier::Internal)
2748            .unwrap();
2749        assert!(manifest.files.contains_key("secret.txt"));
2750
2751        // Tighten the ignore set in the source repo to exclude
2752        // `secret.txt`. The materialised worktree still has it
2753        // on disk (we just put it there), but `build_tree` would
2754        // now skip it and produce a different tree hash.
2755        fs::write(repo_dir.path().join(".heddleignore"), b"secret.txt\n").unwrap();
2756
2757        assert!(
2758            !stat_cache_no_op(&repo, &manifest, &dest).unwrap(),
2759            "ignore-config tightening over a tracked path must \
2760             invalidate the fast path; pre-fix the predicate \
2761             false-passed and auto-capture silently dropped \
2762             the resulting tree delta"
2763        );
2764    }
2765
2766    /// Codex pass-3 P2: a *tree-only* empty directory — one that
2767    /// was a captured tree entry but never had any files beneath it
2768    /// — was invisible to the pass-2 fix because `expected_dirs`
2769    /// was derived from manifest file ancestors. Removing such a
2770    /// directory left every set the same size and the predicate
2771    /// false-passed, silently dropping the change. The pass-3 fix
2772    /// derives `expected_dirs` from the captured tree directly so
2773    /// empty leaf dirs are tracked.
2774    #[test]
2775    fn stat_cache_detects_removed_tree_only_empty_directory() {
2776        let repo_dir = TempDir::new().unwrap();
2777        let repo = Repository::init_default(repo_dir.path()).unwrap();
2778        // Seed with one file (so the thread isn't empty) plus an
2779        // empty directory that becomes a tree entry on its own.
2780        fs::write(repo_dir.path().join("anchor.txt"), b"anchor\n").unwrap();
2781        fs::create_dir_all(repo_dir.path().join("empty-on-purpose")).unwrap();
2782        repo.snapshot(Some("seed".into()), None).unwrap();
2783
2784        let dest_holder = TempDir::new().unwrap();
2785        let dest = dest_holder.path().join("out");
2786        let manifest = repo
2787            .materialize_thread("main", &dest, &AudienceTier::Internal)
2788            .unwrap();
2789
2790        // Sanity: the empty dir landed on disk after materialise.
2791        assert!(
2792            dest.join("empty-on-purpose").is_dir(),
2793            "materialise must emit the empty dir on disk"
2794        );
2795
2796        // Remove the empty dir. No files inside it changed
2797        // because there never were any — pure tree-only delta.
2798        fs::remove_dir(dest.join("empty-on-purpose")).unwrap();
2799
2800        assert!(
2801            !stat_cache_no_op(&repo, &manifest, &dest).unwrap(),
2802            "removing a tree-only empty directory must invalidate \
2803             the fast path; pre-fix the predicate false-passed and \
2804             auto-capture silently dropped the deletion"
2805        );
2806    }
2807
2808    /// Empty directory added by the user — manifests only record
2809    /// files, but Heddle's tree builder emits a tree entry for the
2810    /// new dir. The stat-cache no-op predicate must decline so the
2811    /// slow path picks the change up; pre-fix it false-passed and
2812    /// `thread switch`'s auto-capture silently dropped the addition.
2813    #[test]
2814    fn stat_cache_detects_added_empty_directory() {
2815        let repo_dir = TempDir::new().unwrap();
2816        let repo = Repository::init_default(repo_dir.path()).unwrap();
2817        fs::write(repo_dir.path().join("only.txt"), b"a\n").unwrap();
2818        repo.snapshot(Some("seed".into()), None).unwrap();
2819
2820        let dest_holder = TempDir::new().unwrap();
2821        let dest = dest_holder.path().join("out");
2822        let manifest = repo
2823            .materialize_thread("main", &dest, &AudienceTier::Internal)
2824            .unwrap();
2825
2826        // Add an empty directory that has no manifest entry.
2827        fs::create_dir_all(dest.join("brand-new-empty-dir")).unwrap();
2828
2829        assert!(
2830            !stat_cache_no_op(&repo, &manifest, &dest).unwrap(),
2831            "an added empty directory must invalidate the fast path"
2832        );
2833    }
2834
2835    /// Empty directory removed by the user — the manifest expects it
2836    /// (its parent path appears as an ancestor of files) but the
2837    /// walk never visits it. The dir-side check must decline. Pre-
2838    /// fix the fast path would false-pass on this case too.
2839    #[test]
2840    fn stat_cache_detects_removed_empty_directory() {
2841        let repo_dir = TempDir::new().unwrap();
2842        let repo = Repository::init_default(repo_dir.path()).unwrap();
2843        fs::create_dir_all(repo_dir.path().join("nested/deep")).unwrap();
2844        fs::write(repo_dir.path().join("nested/deep/leaf.txt"), b"leaf\n").unwrap();
2845        repo.snapshot(Some("seed".into()), None).unwrap();
2846
2847        let dest_holder = TempDir::new().unwrap();
2848        let dest = dest_holder.path().join("out");
2849        let manifest = repo
2850            .materialize_thread("main", &dest, &AudienceTier::Internal)
2851            .unwrap();
2852
2853        // Remove the leaf file AND its parent dir. The file-side
2854        // check already catches the file removal, but if we then
2855        // synthesise a fresh leaf elsewhere we'd want the dir-side
2856        // check to catch the missing parent on its own too. Use a
2857        // slightly different shape: create + remove a sibling dir
2858        // whose ancestor matches the manifest's expected set.
2859        fs::create_dir_all(dest.join("nested/sibling-empty")).unwrap();
2860
2861        assert!(
2862            !stat_cache_no_op(&repo, &manifest, &dest).unwrap(),
2863            "an added empty directory inside an existing parent must invalidate"
2864        );
2865    }
2866
2867    /// Deleted file → fast path declines.
2868    #[test]
2869    fn stat_cache_detects_deletion() {
2870        let repo_dir = TempDir::new().unwrap();
2871        let repo = Repository::init_default(repo_dir.path()).unwrap();
2872        fs::write(repo_dir.path().join("a.txt"), b"a\n").unwrap();
2873        fs::write(repo_dir.path().join("b.txt"), b"b\n").unwrap();
2874        repo.snapshot(Some("seed".into()), None).unwrap();
2875
2876        let dest_holder = TempDir::new().unwrap();
2877        let dest = dest_holder.path().join("out");
2878        let manifest = repo
2879            .materialize_thread("main", &dest, &AudienceTier::Internal)
2880            .unwrap();
2881
2882        fs::remove_file(dest.join("a.txt")).unwrap();
2883
2884        assert!(
2885            !stat_cache_no_op(&repo, &manifest, &dest).unwrap(),
2886            "deleted file must invalidate the fast path"
2887        );
2888    }
2889
2890    /// Two `capture_thread_from_disk` calls on the same thread from
2891    /// different threads must serialize through the repository write
2892    /// lock: the thread head's parent chain must include both
2893    /// captures (no lost update where one capture's parent is the
2894    /// pre-race head instead of the other capture's state).
2895    ///
2896    /// Reproduces the race Codex P1 #2 named: pre-fix, two sibling
2897    /// worktrees doing `heddle thread switch` against the same
2898    /// source thread both read the same parent in
2899    /// `refs().get_thread()`, both `put_state` with that parent,
2900    /// both `set_thread` — whichever `set_thread` won last orphaned
2901    /// the other state on disk. With the lock both captures land in
2902    /// series and the final head's parent chain links back through
2903    /// both new states.
2904    #[test]
2905    fn concurrent_captures_serialize_via_repository_lock() {
2906        use std::sync::Arc;
2907
2908        let repo_dir = TempDir::new().unwrap();
2909        let repo = Arc::new(Repository::init_default(repo_dir.path()).unwrap());
2910        fs::write(repo_dir.path().join("shared.txt"), b"seed\n").unwrap();
2911        repo.snapshot(Some("seed".into()), None).unwrap();
2912        let initial_head = repo
2913            .refs()
2914            .get_thread(&ThreadName::new("main"))
2915            .unwrap()
2916            .expect("seeded");
2917
2918        // Two sibling materialized worktrees of the same thread.
2919        let dest_a_holder = TempDir::new().unwrap();
2920        let dest_a = dest_a_holder.path().join("a");
2921        repo.materialize_thread("main", &dest_a, &AudienceTier::Internal)
2922            .unwrap();
2923        let dest_b_holder = TempDir::new().unwrap();
2924        let dest_b = dest_b_holder.path().join("b");
2925        repo.materialize_thread("main", &dest_b, &AudienceTier::Internal)
2926            .unwrap();
2927
2928        // Disjoint edits so each capture has real work to do (no
2929        // stat-cache no-op short-circuit).
2930        std::thread::sleep(std::time::Duration::from_millis(20));
2931        fs::write(dest_a.join("shared.txt"), b"edited-by-a\n").unwrap();
2932        fs::write(dest_b.join("shared.txt"), b"edited-by-b\n").unwrap();
2933
2934        // Race the two captures.
2935        let repo_a = Arc::clone(&repo);
2936        let repo_b = Arc::clone(&repo);
2937        let h_a = std::thread::spawn(move || {
2938            repo_a
2939                .capture_thread_from_disk("main", &dest_a)
2940                .expect("capture A")
2941        });
2942        let h_b = std::thread::spawn(move || {
2943            repo_b
2944                .capture_thread_from_disk("main", &dest_b)
2945                .expect("capture B")
2946        });
2947        let outcome_a = h_a.join().expect("thread A");
2948        let outcome_b = h_b.join().expect("thread B");
2949
2950        // Both captures landed (neither was a NoOp because both
2951        // edited the same file with different bytes).
2952        let id_a = match outcome_a {
2953            ThreadCaptureOutcome::Captured { state_id } => state_id,
2954            ThreadCaptureOutcome::NoOp => panic!("A expected Captured"),
2955        };
2956        let id_b = match outcome_b {
2957            ThreadCaptureOutcome::Captured { state_id } => state_id,
2958            ThreadCaptureOutcome::NoOp => panic!("B expected Captured"),
2959        };
2960        assert_ne!(id_a, id_b, "the two captures must produce distinct states");
2961
2962        // The thread head is one of the two captures. Lock-naked,
2963        // the loser's parent would be `initial_head`. With the
2964        // lock, the loser's parent is the winner's id and the
2965        // winner's parent is `initial_head`.
2966        let final_head = repo
2967            .refs()
2968            .get_thread(&ThreadName::new("main"))
2969            .unwrap()
2970            .expect("head");
2971        let winner_id = final_head;
2972        let loser_id = if final_head == id_a { id_b } else { id_a };
2973
2974        let winner_state = repo
2975            .store()
2976            .get_state(&winner_id)
2977            .unwrap()
2978            .expect("winner state on disk");
2979        let loser_state = repo
2980            .store()
2981            .get_state(&loser_id)
2982            .unwrap()
2983            .expect("loser state on disk");
2984
2985        // The two captures must have linked through the lock:
2986        // exactly one of (winner.parents, loser.parents) names the
2987        // other; the remaining parent is the seed head. Pre-fix
2988        // both states named the seed head and the loser was
2989        // orphaned — assert that this isn't the case.
2990        let chained =
2991            winner_state.parents.contains(&loser_id) || loser_state.parents.contains(&winner_id);
2992        assert!(
2993            chained,
2994            "concurrent captures must chain through the lock; got\n  \
2995             winner {winner_id} parents={:?}\n  loser  {loser_id} parents={:?}",
2996            winner_state.parents, loser_state.parents
2997        );
2998        assert!(
2999            winner_state.parents.contains(&initial_head)
3000                || loser_state.parents.contains(&initial_head),
3001            "the bottom of the chain must still reach the seed head"
3002        );
3003    }
3004}
repo/repository_thread_materialize.rs

repo/
repository_thread_materialize.rs