repo/repository_thread_materialize.rs
1// SPDX-License-Identifier: Apache-2.0
2//! Thread-level materialization: resolve a thread → state → tree,
3//! materialize the tree to disk (clonefile-first via the existing
4//! `Repository::materialize_tree`), and write a [`ThreadManifest`]
5//! sidecar that captures the per-file stat-cache for fast subsequent
6//! `heddle capture` scans.
7//!
8//! This is the day-one default workspace shape for lightweight
9//! threads on reflink-capable filesystems (see
10//! `docs/design/clonefile-threads.md`). Reads off the materialized
11//! tree are vanilla `read(2)` against real APFS/btrfs files — no
12//! userspace FS callbacks in the hot path. Disk usage is the
13//! ~zero-cost clonefile share until the agent diverges blocks.
14
15use std::{
16 collections::{BTreeMap, BTreeSet},
17 fs,
18 path::{Path, PathBuf},
19};
20
21use chrono::{DateTime, Utc};
22use objects::{
23 lock::RepositoryLockExt,
24 object::{ChangeId, State, ThreadName, Tree, VisibilityTier},
25 store::ObjectStore,
26};
27use oplog::OpRecord;
28use refs::RefExpectation;
29use tracing::{debug, instrument};
30
31use super::{HeddleError, Repository, Result};
32use crate::{
33 ThreadWorktreeTargetDisposition, ThreadWorktreeTargetError,
34 thread_manifest::{ManifestFile, ThreadManifest, read_manifest, write_manifest},
35 validate_thread_worktree_target,
36 visibility::{AudienceTier, visible},
37};
38
39/// Filename of the operator-local courtesy placeholder written when a
40/// checked-out state's tier is not visible to the operator's audience.
41pub(crate) const COURTESY_STUB_FILENAME: &str = "HEDDLE-EMBARGO.txt";
42
43/// Outcome of the visibility-gated checkout chokepoint
44/// [`Repository::checkout_state_gated`].
45#[derive(Clone, Debug)]
46pub enum CheckoutMaterialization {
47 /// The state was visible to the audience: its real tree was materialized
48 /// to `dest`. Carries the resolved tree so callers can populate a manifest
49 /// without a second store lookup.
50 Materialized { tree: Tree },
51 /// The state was under-tier for the audience: the operator-local courtesy
52 /// stub was written to `dest` and the tracked bytes withheld.
53 Withheld { tier: VisibilityTier },
54}
55
56/// Outcome of [`Repository::capture_thread_from_disk`].
57#[derive(Clone, Copy, Debug, PartialEq, Eq)]
58pub enum ThreadCaptureOutcome {
59 /// The materialized tree matches the existing thread head; no
60 /// new state was written. The manifest was refreshed to reflect
61 /// the latest stat fields (so subsequent captures stay fast even
62 /// if mtimes drifted via `touch`).
63 NoOp,
64 /// A new state was written and the thread head advanced.
65 Captured { state_id: ChangeId },
66}
67
68fn thread_worktree_target_error(error: ThreadWorktreeTargetError) -> HeddleError {
69 match error {
70 ThreadWorktreeTargetError::Io { source, .. } => HeddleError::Io(source),
71 ThreadWorktreeTargetError::Symlink { path } => HeddleError::Conflict(format!(
72 "thread worktree target '{}' cannot be a symlink",
73 path.display()
74 )),
75 ThreadWorktreeTargetError::NotDirectory { path } => HeddleError::Conflict(format!(
76 "thread worktree target '{}' must be a directory",
77 path.display()
78 )),
79 ThreadWorktreeTargetError::NotEmpty { path } => HeddleError::Conflict(format!(
80 "thread worktree target '{}' is not empty",
81 path.display()
82 )),
83 }
84}
85
86fn prepare_thread_worktree_target(dest: &Path) -> Result<ThreadWorktreeTargetDisposition> {
87 let disposition =
88 validate_thread_worktree_target(dest).map_err(thread_worktree_target_error)?;
89 if disposition == ThreadWorktreeTargetDisposition::Absent {
90 fs::create_dir_all(dest).map_err(HeddleError::Io)?;
91 validate_thread_worktree_target(dest).map_err(thread_worktree_target_error)?;
92 }
93 Ok(disposition)
94}
95
96fn clear_dir_contents(dir: &Path) -> std::io::Result<()> {
97 let metadata = fs::symlink_metadata(dir)?;
98 if metadata.file_type().is_symlink() || !metadata.is_dir() {
99 return Ok(());
100 }
101
102 for entry in fs::read_dir(dir)? {
103 let entry = entry?;
104 let path = entry.path();
105 if entry.file_type()?.is_dir() {
106 fs::remove_dir_all(&path)?;
107 } else {
108 fs::remove_file(&path)?;
109 }
110 }
111 Ok(())
112}
113
114fn cleanup_thread_worktree_target(
115 dest: &Path,
116 disposition: ThreadWorktreeTargetDisposition,
117) -> Result<()> {
118 match clear_dir_contents(dest) {
119 Ok(()) => {}
120 Err(err)
121 if err.kind() == std::io::ErrorKind::NotFound
122 || err.kind() == std::io::ErrorKind::NotADirectory => {}
123 Err(err) => return Err(HeddleError::Io(err)),
124 }
125
126 if disposition == ThreadWorktreeTargetDisposition::Absent {
127 match fs::remove_dir(dest) {
128 Ok(()) => {}
129 Err(err)
130 if err.kind() == std::io::ErrorKind::NotFound
131 || err.kind() == std::io::ErrorKind::NotADirectory => {}
132 Err(err) => return Err(HeddleError::Io(err)),
133 }
134 }
135
136 Ok(())
137}
138
139impl Repository {
140 /// Materialize the captured tree of `thread` to `dest` and write
141 /// a [`ThreadManifest`] sidecar to
142 /// `<heddle_dir>/threads/<thread>/manifest.toml`.
143 ///
144 /// Order of operations:
145 /// 1. Resolve `thread` → `ChangeId` → `State` → `Tree`.
146 /// 2. Call `Repository::materialize_tree(&tree, dest)` — the
147 /// existing clonefile-first materializer does the heavy
148 /// lifting (loose-uncompressed promotion, parallel writes).
149 /// 3. Walk the materialized tree and capture per-file
150 /// `(hash, inode, mtime_ns, ctime_ns, mode)` into the
151 /// manifest.
152 /// 4. Atomically write the manifest.
153 ///
154 /// The walk step in (3) is a single `stat` per file — sub-ms for
155 /// the 643-file heddle workspace. Doing the walk after
156 /// materialize rather than capturing stats during materialize
157 /// keeps the existing materializer untouched.
158 #[instrument(skip(self), fields(thread = %thread, dest = %dest.display()))]
159 pub fn materialize_thread(
160 &self,
161 thread: &str,
162 dest: &Path,
163 audience: &AudienceTier,
164 ) -> Result<ThreadManifest> {
165 let change_id = self
166 .refs()
167 .resolve(thread)?
168 .ok_or_else(|| HeddleError::Config(format!("unknown thread {thread}")))?;
169 let state = self
170 .store()
171 .get_state(&change_id)?
172 .ok_or_else(|| HeddleError::Config(format!("state for {thread} missing")))?;
173 let target_disposition = prepare_thread_worktree_target(dest)?;
174
175 // Route through the single visibility-gated checkout chokepoint, which
176 // either materializes the real tree or writes the operator-local
177 // courtesy stub. The manifest is this method's own concern (it lives
178 // outside the checkout dir), so it is written here based on the gate
179 // outcome — not in the chokepoint, which `write_isolated_checkout` also
180 // calls without wanting a thread manifest.
181 let result = (|| -> Result<ThreadManifest> {
182 match self.checkout_state_gated(&change_id, &state, dest, audience)? {
183 CheckoutMaterialization::Withheld { tier } => {
184 // Manifest reflects disk truth: no tracked files were
185 // materialized (the placeholder is untracked). `tree_hash`
186 // still names the real embargoed state's tree so the sidecar
187 // identifies which state this checkout stands in for. The
188 // `withheld` flag here is diagnostic only — it records that the
189 // *last* materialize of this thread was withheld, but the
190 // per-thread manifest is clobbered by a sibling worktree of the
191 // same thread. The authoritative, per-worktree non-capturable
192 // signal is the withheld marker written by
193 // `checkout_state_gated`, keyed on the worktree root (heddle#316).
194 let mut manifest =
195 ThreadManifest::new(change_id, state.tree, canonical_worktree_path(dest));
196 manifest.withheld = true;
197 write_manifest(self.heddle_dir(), thread, &manifest)
198 .map_err(HeddleError::Io)?;
199 debug!(
200 thread = %thread,
201 state_id = %change_id,
202 tier = tier.as_str(),
203 "thread checkout rendered courtesy stub (under-tier for audience)"
204 );
205 Ok(manifest)
206 }
207 CheckoutMaterialization::Materialized { tree } => {
208 let mut manifest =
209 ThreadManifest::new(change_id, state.tree, canonical_worktree_path(dest));
210 populate_manifest_from_tree(self, &tree, dest, "", &mut manifest.files)?;
211 write_manifest(self.heddle_dir(), thread, &manifest)
212 .map_err(HeddleError::Io)?;
213 debug!(
214 thread = %thread,
215 state_id = %change_id,
216 files = manifest.files.len(),
217 "thread materialized"
218 );
219 Ok(manifest)
220 }
221 }
222 })();
223
224 if result.is_err() {
225 cleanup_thread_worktree_target(dest, target_disposition)?;
226 }
227
228 result
229 }
230
231 /// THE visibility-gated checkout chokepoint. Resolve `change_id`'s
232 /// effective tier against `audience` and either materialize its real tree
233 /// to `dest` (visible) or write the operator-local courtesy stub and
234 /// withhold the tracked bytes (under-tier).
235 ///
236 /// Every path that serves a *named committed state*'s content to a local
237 /// checkout MUST funnel through here — `materialize_thread` and the CLI's
238 /// `write_isolated_checkout` (`heddle start --path`) both do — so the
239 /// visibility gate cannot be bypassed by a caller reaching for the raw,
240 /// blob-keyed `materialize_tree`. The decision is made HERE, where the
241 /// `ChangeId` and the audience are both in scope; `materialize_tree`
242 /// carries neither and so cannot make it. `materialize_tree` stays the
243 /// primitive for *computed* trees (merge/cherry-pick results), which are
244 /// not a single named state and carry no audience.
245 ///
246 /// The courtesy stub is a working-tree convenience on bytes the operator
247 /// already holds — NOT a security boundary and NOT a public-mirror surface
248 /// (the public mirror emits absence, spike §5.3).
249 pub fn checkout_state_gated(
250 &self,
251 change_id: &ChangeId,
252 state: &State,
253 dest: &Path,
254 audience: &AudienceTier,
255 ) -> Result<CheckoutMaterialization> {
256 let tier = self.effective_visibility_tier(change_id).map_err(|e| {
257 HeddleError::Config(format!("resolve visibility for {change_id}: {e:#}"))
258 })?;
259 if !visible(&tier, audience) {
260 fs::create_dir_all(dest).map_err(HeddleError::Io)?;
261 // Canonicalize ONLY after the directory exists. `canonical_worktree_path`
262 // falls back to the raw input when `dest` does not yet resolve (a relative
263 // path, or a path through a not-yet-created symlink), so a pre-creation
264 // canonicalize would key the withheld marker and the `.leaves` record on a
265 // path `capture_thread_from_disk` never resolves to at read-time — the read
266 // canonicalizes the now-existing root, misses the marker, and captures a
267 // withheld checkout as a stub-only tree instead of no-oping. Resolving here,
268 // once `create_dir_all` has made `dest` exist, guarantees the write-time
269 // canonical root equals the read-time one (heddle#316).
270 let canonical = canonical_worktree_path(dest);
271 // Reconcile the root DOWN to the withheld tier: every tracked leaf a
272 // prior materialize of this root wrote must be removed, so the
273 // checkout holds ONLY the courtesy stub — never the very bytes the
274 // gate is withholding. `keep` is empty (the withheld tier permits no
275 // tracked content). `must_remove` additionally names the withheld
276 // state's own tree leaves, so the leak is closed even when no prior
277 // manifest survives for this root (a sibling worktree clobbered it).
278 // The stub itself is untracked and so never in either set (heddle#316
279 // CLASS 1).
280 let mut withheld_leaves = BTreeSet::new();
281 if let Some(tree) = self.store().get_tree(&state.tree)? {
282 collect_tree_leaf_paths(self, &tree, "", &mut withheld_leaves)?;
283 }
284 self.reconcile_materialized_root(dest, &canonical, &BTreeSet::new(), &withheld_leaves)?;
285 // Persist the clobber-proof per-root record: a withheld materialize
286 // leaves ONLY the untracked courtesy stub, so the tracked-leaf set is
287 // empty. Written here so the single chokepoint owns the record for
288 // every funnel path, and so a later reconcile of this root reads an
289 // authoritative empty set instead of falling to the backstop
290 // (heddle#316 CLASS 1).
291 crate::thread_manifest::write_materialized_leaves(
292 self.heddle_dir(),
293 &canonical,
294 &BTreeSet::new(),
295 )
296 .map_err(HeddleError::Io)?;
297 let embargo_until = self
298 .effective_state_visibility(change_id)
299 .map_err(|e| {
300 HeddleError::Config(format!("resolve visibility for {change_id}: {e:#}"))
301 })?
302 .and_then(|record| record.embargo_until);
303 let stub = courtesy_stub_text(&tier, embargo_until);
304 fs::write(dest.join(COURTESY_STUB_FILENAME), stub.as_bytes())
305 .map_err(HeddleError::Io)?;
306 // Record the withheld status keyed by THIS worktree root, not by
307 // thread — a sibling worktree of the same thread materialized at a
308 // visible tier must keep its own capturable status (heddle#316).
309 crate::thread_manifest::mark_withheld_checkout(self.heddle_dir(), &canonical)
310 .map_err(HeddleError::Io)?;
311 return Ok(CheckoutMaterialization::Withheld { tier });
312 }
313
314 let tree = self
315 .store()
316 .get_tree(&state.tree)?
317 .ok_or_else(|| HeddleError::Config(format!("tree for {change_id} missing")))?;
318 self.materialize_tree(&tree, dest)?;
319 // Canonicalize only now that `materialize_tree` (via `create_dir_all`) has made
320 // `dest` exist — same read/write-root agreement as the withheld branch above
321 // (heddle#316).
322 let canonical = canonical_worktree_path(dest);
323 // Reconcile the root UP to the served tier: `materialize_tree` wrote the
324 // real tree's leaves but does NOT remove a stale leaf a prior
325 // materialize of a *different* tree left at this root. `keep` is the set
326 // of leaves the served tree just wrote — any prior tracked leaf NOT in
327 // it is removed, so the root holds exactly this tier's content
328 // (heddle#316 CLASS 1).
329 let mut served_leaves = BTreeSet::new();
330 collect_tree_leaf_paths(self, &tree, "", &mut served_leaves)?;
331 self.reconcile_materialized_root(dest, &canonical, &served_leaves, &BTreeSet::new())?;
332 // Persist the clobber-proof per-root record of exactly the tracked leaves
333 // this visible materialize left on disk, so a later withheld
334 // re-materialize of this root removes precisely them even if a sibling
335 // worktree of the same thread clobbered the per-thread manifest in the
336 // interim (heddle#316 CLASS 1).
337 crate::thread_manifest::write_materialized_leaves(
338 self.heddle_dir(),
339 &canonical,
340 &served_leaves,
341 )
342 .map_err(HeddleError::Io)?;
343 // This root now holds real served bytes: clear any stale withheld marker
344 // a prior under-tier materialize of the same root may have left, so it
345 // can't suppress this worktree's capture (heddle#316).
346 crate::thread_manifest::clear_withheld_checkout(self.heddle_dir(), &canonical)
347 .map_err(HeddleError::Io)?;
348 // Remove any leftover courtesy stub a prior under-tier materialize of the
349 // same root wrote: the stub is untracked, so the reconcile leaf-removal
350 // above leaves it in place. Cosmetic — capture ignores it — but an
351 // authorized re-materialize should leave a clean tree (heddle#316).
352 match fs::remove_file(dest.join(COURTESY_STUB_FILENAME)) {
353 Ok(()) => {}
354 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {}
355 Err(e) => return Err(HeddleError::Io(e)),
356 }
357 Ok(CheckoutMaterialization::Materialized { tree })
358 }
359
360 /// Reconcile the worktree root at `dest` so it holds EXACTLY the content the
361 /// target tier permits, regardless of what a prior materialization of the
362 /// same root left behind. THE single chokepoint both branches of
363 /// [`Repository::checkout_state_gated`] funnel through to enforce the
364 /// invariant by construction rather than via two opposite one-off cleanups
365 /// (heddle#316 CLASS 1).
366 ///
367 /// Removes every tracked leaf that (a) a prior materialization recorded for
368 /// this root in its clobber-proof per-root **materialized-leaves record**
369 /// (keyed by the canonical worktree root, so a sibling worktree of the same
370 /// thread can never erase it) UNION (b) the caller's `must_remove` set —
371 /// MINUS the `keep` set the target tier permits. Removal is guarded per file
372 /// (`NotFound` ignored) and empty ancestor directories it leaves behind are
373 /// pruned via `remove_dir` (which fails on non-empty dirs, so untracked
374 /// siblings keep their directory alive).
375 ///
376 /// Sourcing the prior leaves from the per-root record — NOT the single
377 /// per-thread `manifest.toml` — is what makes the withheld reduction
378 /// correct-by-construction: the manifest is clobbered the instant a sibling
379 /// worktree of the same thread materializes, which would drop a prior
380 /// *visible* leaf (e.g. an `old-secret.txt` removed before the withheld
381 /// target state) out of the removal set and leak it next to the stub. The
382 /// per-root record is immune to that race (heddle#316 CLASS 1).
383 ///
384 /// Never blanket-`rm -rf`s: only paths sourced from the per-root record /
385 /// `must_remove` are touched, so user-untracked files and `.git`/heddle
386 /// metadata are never removed.
387 fn reconcile_materialized_root(
388 &self,
389 dest: &Path,
390 canonical_root: &Path,
391 keep: &BTreeSet<String>,
392 must_remove: &BTreeSet<String>,
393 ) -> Result<()> {
394 let mut to_remove: BTreeSet<String> = must_remove.clone();
395 match crate::thread_manifest::read_materialized_leaves(self.heddle_dir(), canonical_root)
396 .map_err(HeddleError::Io)?
397 {
398 Some(prior_leaves) => {
399 // Clobber-proof per-root record of exactly the tracked leaves a
400 // prior materialize of THIS root left on disk. Authoritative —
401 // survives a sibling worktree's clobber of the per-thread
402 // manifest.
403 to_remove.extend(prior_leaves);
404 }
405 None => {
406 // Fail-closed backstop: no per-root record yet. Reached only on a
407 // first-ever materialize of this root (nothing prior to remove)
408 // or a root last materialized by a binary predating the per-root
409 // record. Fall back to the best-effort per-thread manifest so an
410 // upgrade-window reconcile still drops a recorded prior tree's
411 // leaves; `must_remove` (the target tier's own leaves) covers the
412 // rest. Strictly safer than trusting `must_remove` alone, and —
413 // like the primary path — touches only recorded leaves, never
414 // untracked/non-heddle files.
415 if let Some(prior) = crate::thread_manifest::manifest_for_worktree_root(
416 self.heddle_dir(),
417 canonical_root,
418 )
419 .map_err(HeddleError::Io)?
420 {
421 to_remove.extend(prior.files.keys().cloned());
422 }
423 }
424 }
425
426 let mut prune_dirs: BTreeSet<PathBuf> = BTreeSet::new();
427 for rel in &to_remove {
428 if keep.contains(rel) {
429 continue;
430 }
431 let path = dest.join(rel);
432 match fs::remove_file(&path) {
433 Ok(()) => {}
434 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {}
435 Err(e) => return Err(HeddleError::Io(e)),
436 }
437 // Collect ancestor directories (within `dest`) so the now-empty ones
438 // left by the removed leaf can be pruned after the pass.
439 let mut parent = path.parent();
440 while let Some(p) = parent {
441 if p == dest || !p.starts_with(dest) {
442 break;
443 }
444 prune_dirs.insert(p.to_path_buf());
445 parent = p.parent();
446 }
447 }
448
449 // Prune deepest-first so a parent only sees its children already gone.
450 // `remove_dir` errors on a non-empty dir, which we ignore — that is
451 // exactly how an untracked sibling keeps its directory.
452 let mut dirs: Vec<PathBuf> = prune_dirs.into_iter().collect();
453 dirs.sort_by_key(|d| std::cmp::Reverse(d.components().count()));
454 for d in dirs {
455 let _ = fs::remove_dir(&d);
456 }
457 Ok(())
458 }
459
460 /// Remove the per-worktree-root sidecars [`checkout_state_gated`] writes —
461 /// the clobber-proof materialized-leaves record and (if present) the withheld
462 /// marker — for the checkout at `worktree_root`. Both live under the SHARED
463 /// heddle dir keyed by the canonical worktree root, so the atomic `start`
464 /// rollback's checkout-directory rewind never reaches them; a failed-then-
465 /// rolled-back start would otherwise orphan them. Canonicalizes `worktree_root`
466 /// the same way the chokepoint did, so the key matches; the dir must still
467 /// exist at call time (the rollback clears these BEFORE rewinding the dir).
468 /// Idempotent: missing sidecars are a no-op (heddle#316 r11 P2).
469 ///
470 /// [`checkout_state_gated`]: Repository::checkout_state_gated
471 pub fn clear_materialized_root_records(&self, worktree_root: &Path) -> Result<()> {
472 let canonical = canonical_worktree_path(worktree_root);
473 crate::thread_manifest::clear_materialized_leaves(self.heddle_dir(), &canonical)
474 .map_err(HeddleError::Io)?;
475 crate::thread_manifest::clear_withheld_checkout(self.heddle_dir(), &canonical)
476 .map_err(HeddleError::Io)?;
477 Ok(())
478 }
479
480 /// Write the [`ThreadManifest`] sidecar for a worktree that's
481 /// already been materialised to `dest` against `state_id`. Used
482 /// by the CLI's `start` path, which calls `materialize_tree`
483 /// directly via `write_isolated_checkout` and then needs the
484 /// matching manifest written so the rest of the clonefile-thread
485 /// machinery (`heddle status` advisory, `Repository::snapshot`
486 /// auto-detection, `capture_thread_from_disk` fast no-op) sees a
487 /// fully-formed sidecar.
488 ///
489 /// `state_id` is the captured state the worktree was materialised
490 /// against; its tree is resolved and walked to populate the
491 /// manifest's per-file stat-cache entries (one `lstat` per file).
492 /// Atomic write: a torn manifest can't half-land. Idempotent at
493 /// the manifest-key level: rewriting a manifest for the same
494 /// thread is supported (and is what `capture_thread_from_disk`
495 /// does post-capture).
496 #[instrument(skip(self), fields(thread = %thread, dest = %dest.display(), state = %state_id))]
497 pub fn record_thread_manifest(
498 &self,
499 thread: &str,
500 state_id: &ChangeId,
501 dest: &Path,
502 ) -> Result<ThreadManifest> {
503 let state = self
504 .store()
505 .get_state(state_id)?
506 .ok_or_else(|| HeddleError::Config(format!("state {state_id} missing")))?;
507 let tree = self
508 .store()
509 .get_tree(&state.tree)?
510 .ok_or_else(|| HeddleError::Config(format!("tree for state {state_id} missing")))?;
511 let mut manifest =
512 ThreadManifest::new(*state_id, state.tree, canonical_worktree_path(dest));
513 populate_manifest_from_tree(self, &tree, dest, "", &mut manifest.files)?;
514 crate::thread_manifest::write_manifest(self.heddle_dir(), thread, &manifest)
515 .map_err(HeddleError::Io)?;
516 debug!(
517 thread = %thread,
518 state_id = %state_id,
519 files = manifest.files.len(),
520 "thread manifest recorded post-materialize"
521 );
522 Ok(manifest)
523 }
524
525 /// Record a WITHHELD-consistent manifest sidecar for a worktree whose
526 /// checkout was withheld — the base state's visibility tier was not visible
527 /// to the materializing audience, so [`Repository::checkout_state_gated`]
528 /// wrote ONLY the operator-local courtesy stub and the tracked bytes were
529 /// never materialized.
530 ///
531 /// Mirrors the withheld arm of [`Repository::materialize_thread`]: `tree_hash`
532 /// still names the real (unserved) state's tree so the sidecar identifies
533 /// which state the stub stands in for, but `files` is empty (no tracked leaf
534 /// is on disk) and `withheld = true`. Crucially this does NOT walk/stat the
535 /// real tree against `dest` the way [`Repository::record_thread_manifest`]
536 /// does — those files were intentionally not materialized, so stat-ing them
537 /// would record phantom stat-cache entries (or fail) against a checkout that
538 /// holds only the stub. The CLI's atomic `start` path calls this instead of
539 /// `record_thread_manifest` when the checkout came back withheld, so a start
540 /// on a Private base produces a withheld checkout + a consistent manifest
541 /// rather than erroring (heddle#316 / PR #528 r9 Finding 3).
542 #[instrument(skip(self), fields(thread = %thread, dest = %dest.display(), state = %state_id))]
543 pub fn record_withheld_thread_manifest(
544 &self,
545 thread: &str,
546 state_id: &ChangeId,
547 dest: &Path,
548 ) -> Result<ThreadManifest> {
549 let state = self
550 .store()
551 .get_state(state_id)?
552 .ok_or_else(|| HeddleError::Config(format!("state {state_id} missing")))?;
553 let mut manifest =
554 ThreadManifest::new(*state_id, state.tree, canonical_worktree_path(dest));
555 manifest.withheld = true;
556 crate::thread_manifest::write_manifest(self.heddle_dir(), thread, &manifest)
557 .map_err(HeddleError::Io)?;
558 debug!(
559 thread = %thread,
560 state_id = %state_id,
561 "withheld thread manifest recorded post-materialize"
562 );
563 Ok(manifest)
564 }
565
566 /// The staged domain commit record for a brand-new materialized-thread
567 /// start. The repo owns the op-record shape so callers don't reconstruct
568 /// `OpRecord::ThreadCreate`'s fields. `manager_snapshot` is `None`: the
569 /// thread record is written by the start's converge step (so there is
570 /// nothing to snapshot at record-construction time — heddle#23 r2). The
571 /// caller stages this as the executor's single commit record (it is NOT
572 /// appended eagerly); the commit marker dedups on the stable
573 /// `transaction_id`.
574 pub fn thread_create_op_record(&self, name: &str, state: ChangeId) -> OpRecord {
575 OpRecord::ThreadCreate {
576 name: name.to_string(),
577 state,
578 manager_snapshot: None,
579 }
580 }
581
582 /// CAS-guarded rollback of a materialized-thread-start ref forward
583 /// (heddle#356 cid 3333881583).
584 ///
585 /// The forward set the thread ref to `set_value` (the start's base state).
586 /// Undo it ONLY if the ref STILL points there: restore `restore_to` when a
587 /// prior value existed (a re-start that reused the ref), or delete a ref
588 /// this start created (`restore_to == None`). If a concurrent process
589 /// advanced/changed the ref after our forward (a concurrent start or
590 /// crash-recovery), leave their write in place — an unconditional
591 /// reset/delete would clobber it.
592 pub fn cas_guarded_thread_ref_rollback(
593 &self,
594 name: &ThreadName,
595 set_value: ChangeId,
596 restore_to: Option<ChangeId>,
597 ) -> Result<()> {
598 // Compare-before-write: bail without touching the ref if it no longer
599 // holds the value our forward set.
600 if self.refs().get_thread(name)? != Some(set_value) {
601 return Ok(());
602 }
603 let result = match restore_to {
604 Some(prior) => {
605 self.refs()
606 .set_thread_cas(name, RefExpectation::Value(set_value), &prior)
607 }
608 None => self
609 .refs()
610 .delete_thread_cas(name, RefExpectation::Value(set_value)),
611 };
612 match result {
613 Ok(()) => Ok(()),
614 // Lost the race between the read above and this CAS: a concurrent
615 // writer advanced the ref. The expectation guard means we wrote
616 // nothing — leave their advance intact (the whole point of the
617 // guard).
618 Err(HeddleError::Conflict(_)) => Ok(()),
619 Err(other) => Err(other),
620 }
621 }
622
623 /// Restore the thread manifest sidecar to its captured pre-start snapshot:
624 /// rewrite the prior `manifest.toml` bytes if one existed, or remove the
625 /// directory this start created. Restoring (not blind-deleting) preserves
626 /// an OLD manifest left by a prior materialization of a reused thread ref
627 /// (heddle#356 cid 3333881561).
628 pub fn restore_thread_manifest(&self, thread: &str, prior: Option<Vec<u8>>) -> Result<()> {
629 match prior {
630 Some(bytes) => {
631 let path = crate::thread_manifest::manifest_path(self.heddle_dir(), thread);
632 if let Some(parent) = path.parent() {
633 fs::create_dir_all(parent).map_err(HeddleError::Io)?;
634 }
635 fs::write(&path, bytes).map_err(HeddleError::Io)
636 }
637 None => crate::thread_manifest::remove_thread_manifest_dir(self.heddle_dir(), thread)
638 .map(|_| ())
639 .map_err(HeddleError::Io),
640 }
641 }
642
643 /// Scan the materialized worktree at `root`, build a fresh tree
644 /// from the on-disk bytes, and (if anything changed) advance
645 /// `thread`'s head to a new state pointing at that tree. The
646 /// manifest is rewritten to reflect the new state and the
647 /// post-capture stat fields.
648 ///
649 /// Returns [`ThreadCaptureOutcome::NoOp`] when the new tree's
650 /// hash equals the manifest's recorded `tree_hash` — the agent
651 /// touched nothing material. Otherwise
652 /// [`ThreadCaptureOutcome::Captured`] with the new state id.
653 ///
654 /// The reason this method exists alongside `Repository::snapshot`
655 /// is two-fold:
656 /// 1. `snapshot` always advances `HEAD`'s currently-attached
657 /// thread. Capture-from-disk targets *a specific thread by
658 /// name*, which is what auto-capture-on-switch needs.
659 /// 2. `snapshot` walks `self.root`. Capture-from-disk walks
660 /// whatever directory the materializer put the thread at —
661 /// managed checkouts under `<repo>/.heddle/threads/<thread>/`,
662 /// which are NOT `self.root`.
663 ///
664 /// Walks `Repository::build_tree` for the slow path so the
665 /// resulting trees are byte-identical to what `heddle capture`
666 /// produces against the same content. A stat-cache fast path
667 /// (see [`stat_cache_no_op`]) short-circuits the common case
668 /// of "switch threads, nothing changed" so the dominant
669 /// auto-capture-on-switch latency is a `stat` walk, not a
670 /// blob rehash.
671 #[instrument(skip(self), fields(thread = %thread, root = %root.display()))]
672 pub fn capture_thread_from_disk(
673 &self,
674 thread: &str,
675 root: &Path,
676 ) -> Result<ThreadCaptureOutcome> {
677 // Repository-wide write lock — same shape as
678 // `snapshot_with_attribution_profiled`. Without it, two
679 // concurrent `thread switch` invocations from sibling
680 // worktrees can race the same source thread: both read
681 // `get_thread(thread)` returning the same parent, both
682 // `put_state` with that parent, both `set_thread` —
683 // result is two leaf states with the same parent, one of
684 // which is orphaned because the ref ends up pointing at
685 // whichever `set_thread` won the race. The manifest write
686 // at step 4 has the same lost-update problem on a smaller
687 // scale. Holding the write lock across the whole
688 // read-modify-write sequence makes the capture atomic with
689 // respect to other state-changing operations.
690 let _lock = self
691 .locker()
692 .write()
693 .map_err(|e| HeddleError::Io(std::io::Error::other(e.to_string())))?;
694
695 let existing_manifest =
696 read_manifest(self.heddle_dir(), thread).map_err(HeddleError::Io)?;
697
698 // 0a. Withheld checkouts are non-capturable. A withheld checkout holds
699 // only the operator-local courtesy stub (the tracked bytes were
700 // withheld because the state's tier is not visible to the
701 // materializing audience). Capturing it would either pull the stub
702 // in as tracked content or — worse — build an empty tree (the stub
703 // is ignored, see `ignore_patterns`) and commit it, wiping the
704 // withheld state's real files. The operator cannot capture content
705 // they were never served, so refuse with a no-op and leave the
706 // thread head where it is (heddle#316).
707 //
708 // The withheld status is keyed by THIS worktree root, not by the
709 // per-thread `manifest.toml` — that single file is clobbered when
710 // the same thread is materialized into a second worktree, so a
711 // manifest-level flag would let an under-tier checkout of one
712 // worktree wrongly suppress an authorized sibling worktree's
713 // capture. The per-root marker (written by `checkout_state_gated`)
714 // scopes the suppression to exactly the worktree that was withheld.
715 if crate::thread_manifest::is_withheld_checkout(
716 self.heddle_dir(),
717 &canonical_worktree_path(root),
718 ) {
719 debug!(thread = %thread, "thread capture skipped (withheld checkout)");
720 return Ok(ThreadCaptureOutcome::NoOp);
721 }
722
723 // 0. Fast no-op via the stat-cache. If every file in the
724 // manifest still exists with the same `(inode, mtime,
725 // ctime, mode)` AND the disk walk turns up no
726 // untracked/new files, we know the tree is byte-identical
727 // to what we materialised. Skip the entire blob-and-tree
728 // rebuild. Typical cost: ~5ms for a 643-file worktree
729 // vs hundreds of ms for the full `build_tree` rehash.
730 if let Some(m) = existing_manifest.as_ref()
731 && stat_cache_no_op(self, m, root)?
732 {
733 debug!(thread = %thread, "thread capture no-op (stat-cache hit)");
734 return Ok(ThreadCaptureOutcome::NoOp);
735 }
736
737 let baseline_tree = match existing_manifest.as_ref() {
738 Some(manifest) => {
739 Some(self.store().get_tree(&manifest.tree_hash)?.ok_or_else(|| {
740 HeddleError::Config(format!(
741 "manifest baseline tree {} missing while capturing thread {thread}",
742 manifest.tree_hash
743 ))
744 })?)
745 }
746 None => None,
747 };
748
749 // 1. Walk the on-disk worktree → fresh Tree (also stores
750 // every blob it sees as a side effect). When we have a
751 // manifest, pass it as a stat-cache so unchanged files
752 // skip the read+hash cycle entirely. Files that DID
753 // change still get the full treatment, so correctness
754 // is preserved; we just avoid the redundant work for
755 // the (usually large) majority.
756 let new_tree = match existing_manifest.as_ref() {
757 Some(m) => {
758 self.build_tree_profiled_with_stat_cache_against(root, baseline_tree.as_ref(), m)?
759 .0
760 }
761 None => {
762 self.build_tree_profiled_against(root, baseline_tree.as_ref())?
763 .0
764 }
765 };
766 let new_tree_hash = self.store().put_tree(&new_tree)?;
767
768 // 2. Content-hash no-op (slow path equivalent of the
769 // stat-cache check above). Hits when stat fields drifted
770 // via `touch` or atime updates even though the bytes
771 // didn't change — refresh the manifest's stat fields so
772 // the next call hits the fast path.
773 if existing_manifest
774 .as_ref()
775 .map(|m| m.tree_hash == new_tree_hash)
776 .unwrap_or(false)
777 {
778 let mut refreshed = existing_manifest.expect("checked Some above");
779 refreshed.files.clear();
780 populate_manifest_from_tree(self, &new_tree, root, "", &mut refreshed.files)?;
781 write_manifest(self.heddle_dir(), thread, &refreshed).map_err(HeddleError::Io)?;
782 debug!(thread = %thread, "thread capture no-op (content-hash refresh)");
783 return Ok(ThreadCaptureOutcome::NoOp);
784 }
785
786 // 3. Real capture. Build a new state parented at the
787 // current thread head (if any), put it, advance the
788 // thread ref.
789 let attribution = self.get_attribution()?;
790 let thread_name = ThreadName::from(thread);
791 let parents = match self.refs().get_thread(&thread_name)? {
792 Some(prev) => vec![prev],
793 None => vec![],
794 };
795 let mut state = State::new_snapshot(new_tree_hash, parents, attribution);
796 // Auto-sign this thread-materialization capture (heddle#482) via the
797 // authored-state chokepoint, the same as the primary capture path — it
798 // is a real author capture that bypasses `stage_snapshot_objects`. Last
799 // mutation before the write.
800 self.put_authored_state(&mut state)?;
801 self.refs().set_thread(&thread_name, &state.change_id)?;
802
803 // 4. Rewrite the manifest to reflect the new state. `root` is
804 // the worktree being captured from — record its canonical
805 // path so the next snapshot can tell whether it's running
806 // inside this same worktree.
807 let mut manifest = ThreadManifest::new(
808 state.change_id,
809 new_tree_hash,
810 canonical_worktree_path(root),
811 );
812 populate_manifest_from_tree(self, &new_tree, root, "", &mut manifest.files)?;
813 write_manifest(self.heddle_dir(), thread, &manifest).map_err(HeddleError::Io)?;
814
815 debug!(
816 thread = %thread,
817 new_state = %state.change_id,
818 files = manifest.files.len(),
819 "thread captured"
820 );
821 Ok(ThreadCaptureOutcome::Captured {
822 state_id: state.change_id,
823 })
824 }
825}
826
827/// Recursive helper: for each tree entry under `rel_prefix` inside
828/// the materialized `dest`, walk the captured tree (NOT the disk —
829/// we trust what we just put there) and stat the corresponding file
830/// to fill in the manifest's identity fields.
831///
832/// Using the captured tree as the walk basis is what lets a
833/// manifest entry survive `rm -rf .` later: the file may have
834/// disappeared but we still record what *should* be there per the
835/// captured state. Capture-from-disk decides what to do about
836/// missing files at its own scan time.
837/// Plain-text placeholder a holder sees instead of an under-tier state's
838/// tracked content on their own checkout. ASCII-only, mirrors the redaction
839/// `stub_text` shape. Never travels off-host.
840fn courtesy_stub_text(tier: &VisibilityTier, embargo_until: Option<DateTime<Utc>>) -> String {
841 let mut out = String::with_capacity(256);
842 out.push_str("# Heddle withheld this state's content from your audience.\n");
843 out.push_str(&format!("# visibility-tier: {}\n", tier.as_str()));
844 if let VisibilityTier::TeamScoped { team_id } = tier {
845 out.push_str(&format!("# team: {team_id}\n"));
846 }
847 if let VisibilityTier::Restricted { scope_label } | VisibilityTier::Private { scope_label } =
848 tier
849 {
850 out.push_str(&format!("# scope: {scope_label}\n"));
851 }
852 match embargo_until {
853 Some(when) => out.push_str(&format!("# promotes-at: {}\n", when.to_rfc3339())),
854 None => out.push_str("# promotes-at: (no scheduled promotion)\n"),
855 }
856 out.push_str("# This placeholder is a local courtesy; the bytes are not in this checkout.\n");
857 out
858}
859
860/// Collect every blob/symlink leaf path (worktree-relative, forward-slash
861/// joined) reachable from `tree` into `out`. Used by the checkout reconcile
862/// step to enumerate the tracked content a tier serves (the `keep` set on the
863/// visible path) or withholds (the `must_remove` set on the withheld path),
864/// without touching disk — the path set is derived purely from the tree.
865fn collect_tree_leaf_paths(
866 repo: &Repository,
867 tree: &Tree,
868 rel_prefix: &str,
869 out: &mut BTreeSet<String>,
870) -> Result<()> {
871 use objects::object::EntryType;
872 for entry in tree.entries() {
873 let rel_path = if rel_prefix.is_empty() {
874 entry.name().to_string()
875 } else {
876 format!("{rel_prefix}/{}", entry.name())
877 };
878 match entry.entry_type() {
879 EntryType::Tree => {
880 let Some(tree_hash) = entry.tree_hash() else {
881 continue;
882 };
883 let subtree = repo.store().get_tree(&tree_hash)?.ok_or_else(|| {
884 HeddleError::Config(format!(
885 "subtree {} missing while collecting leaf paths for {rel_path}",
886 tree_hash
887 ))
888 })?;
889 collect_tree_leaf_paths(repo, &subtree, &rel_path, out)?;
890 }
891 EntryType::Blob | EntryType::Symlink | EntryType::Gitlink => {
892 out.insert(rel_path);
893 }
894 // Native child-spool edge: not a worktree leaf, so it has no
895 // materialized path to collect.
896 EntryType::Spoollink => {}
897 }
898 }
899 Ok(())
900}
901
902pub(crate) fn populate_manifest_from_tree(
903 repo: &Repository,
904 tree: &Tree,
905 dest: &Path,
906 rel_prefix: &str,
907 out: &mut BTreeMap<String, ManifestFile>,
908) -> Result<()> {
909 use objects::object::EntryType;
910 for entry in tree.entries() {
911 let rel_path = if rel_prefix.is_empty() {
912 entry.name().to_string()
913 } else {
914 format!("{rel_prefix}/{}", entry.name())
915 };
916 match entry.entry_type() {
917 EntryType::Tree => {
918 let Some(tree_hash) = entry.tree_hash() else {
919 continue;
920 };
921 let subtree = repo.store().get_tree(&tree_hash)?.ok_or_else(|| {
922 HeddleError::Config(format!(
923 "subtree {} missing while populating manifest for {rel_path}",
924 tree_hash
925 ))
926 })?;
927 populate_manifest_from_tree(repo, &subtree, dest, &rel_path, out)?;
928 }
929 EntryType::Blob | EntryType::Symlink => {
930 let on_disk = dest.join(&rel_path);
931 let meta = match fs::symlink_metadata(&on_disk) {
932 Ok(m) => m,
933 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
934 // The materializer didn't put it there. That
935 // shouldn't happen on a clean materialize,
936 // but if it does we skip the entry so the
937 // manifest stays a reflection of disk truth.
938 debug!(
939 path = %rel_path,
940 "manifest population skipped missing file"
941 );
942 continue;
943 }
944 Err(e) => return Err(HeddleError::Io(e)),
945 };
946 let (size, inode, mtime_ns, ctime_ns, mode) =
947 crate::stat_signature::stat_signature(&on_disk, &meta);
948 out.insert(
949 rel_path,
950 ManifestFile {
951 hash: entry.require_content_hash(),
952 size,
953 inode,
954 mtime_ns,
955 ctime_ns,
956 mode,
957 },
958 );
959 }
960 EntryType::Gitlink => {}
961 // Native child-spool edge: nothing materialized to disk.
962 EntryType::Spoollink => {}
963 }
964 }
965 Ok(())
966}
967
968/// Record the manifest's worktree-path field as an *absolute*,
969/// symlink-resolved path. `Repository::snapshot` compares its
970/// `self.root` (also canonicalized) to this value to decide whether
971/// it's running inside the materialized worktree; without
972/// canonicalization a `/tmp/foo` materialize + `/private/tmp/foo`
973/// snapshot would miss the match on macOS.
974///
975/// Falls back to the input path on canonicalize failure — the
976/// comparison may produce a false miss in pathological cases, which
977/// degrades the cache to "always rebuild" instead of corrupting the
978/// manifest. Strictly worse perf, never worse correctness.
979pub(crate) fn canonical_worktree_path(path: &Path) -> PathBuf {
980 fs::canonicalize(path).unwrap_or_else(|_| path.to_path_buf())
981}
982
983/// Stat-cache fast no-op check. Returns `true` when the on-disk
984/// worktree is byte-identical to what `manifest` describes — every
985/// manifest file present at its recorded `(inode, mtime, ctime,
986/// mode)`, no untracked files, no deletions.
987///
988/// Pattern: same as git's index `assume-unchanged` fast path. The
989/// stat fields are populated by `populate_manifest_from_tree` at
990/// materialise time; clonefile/copy operations preserve the
991/// destination's inode for the lifetime of the file, so a single
992/// `stat` per file is sufficient to detect any modification.
993///
994/// Performance: ~5 ms for a 643-file worktree (single `stat` per
995/// file + B-tree lookup). The slow path (`build_tree`) reads and
996/// hashes every file, ~100s of ms for the same fixture.
997///
998/// Returns `Ok(false)` on ANY uncertainty — a stat call failed, a
999/// file in the manifest is missing, an untracked file showed up,
1000/// or any single field mismatched. Callers fall through to the
1001/// slow `build_tree` path, which is always correct.
1002/// Walk the captured tree named by `manifest.tree_hash` and collect
1003/// every subdirectory's relative path (forward-slash joined,
1004/// relative to the tree root, no leading or trailing slashes).
1005/// Source of truth for [`stat_cache_no_op`]'s directory leg —
1006/// includes tree-only empty directories that a `manifest.files`
1007/// ancestors-derived set would miss.
1008fn collect_expected_dirs(
1009 repo: &Repository,
1010 manifest: &ThreadManifest,
1011) -> Result<std::collections::HashSet<String>> {
1012 use std::collections::HashSet;
1013 let mut set: HashSet<String> = HashSet::new();
1014 let Some(tree) = repo.store().get_tree(&manifest.tree_hash)? else {
1015 // Tree missing from the store would be a serious anomaly —
1016 // surface it so the caller bails to the slow path which will
1017 // re-derive everything from the worktree.
1018 return Err(HeddleError::Config(format!(
1019 "tree {} referenced by manifest is missing",
1020 manifest.tree_hash
1021 )));
1022 };
1023 collect_subdirs_into(repo, &tree, "", &mut set)?;
1024 Ok(set)
1025}
1026
1027fn collect_subdirs_into(
1028 repo: &Repository,
1029 tree: &objects::object::Tree,
1030 rel_prefix: &str,
1031 out: &mut std::collections::HashSet<String>,
1032) -> Result<()> {
1033 use objects::object::EntryType;
1034 for entry in tree.entries() {
1035 if entry.entry_type() != EntryType::Tree {
1036 continue;
1037 }
1038 let rel = if rel_prefix.is_empty() {
1039 entry.name().to_string()
1040 } else {
1041 format!("{rel_prefix}/{}", entry.name())
1042 };
1043 let Some(tree_hash) = entry.tree_hash() else {
1044 continue;
1045 };
1046 let subtree = repo.store().get_tree(&tree_hash)?.ok_or_else(|| {
1047 HeddleError::Config(format!(
1048 "subtree {} missing while collecting expected dirs at {rel}",
1049 tree_hash
1050 ))
1051 })?;
1052 out.insert(rel.clone());
1053 collect_subdirs_into(repo, &subtree, &rel, out)?;
1054 }
1055 Ok(())
1056}
1057
1058/// Recursive `read_dir` worker for the stat-cache no-op predicate.
1059/// Returns `Ok(false)` to bail to the slow path (anything unexpected,
1060/// any stat mismatch); `Ok(true)` to continue the walk. Final
1061/// presence checks (`seen.len() == manifest.files.len()` etc.) live
1062/// in the caller; this fn only flags incremental mismatches.
1063///
1064/// Why hand-roll rather than reuse `ignore::WalkBuilder`: the walker
1065/// crate buffers entries, sorts them for determinism, calls
1066/// `metadata()` to populate its own `DirEntry`, and runs the gitignore
1067/// pipeline per directory even with every `git_*` flag turned off.
1068/// All of that is wasted on this predicate, which already has its own
1069/// `WorktreeIgnoreMatcher` and only needs `symlink_metadata` on each
1070/// file. A bare `read_dir` recursion is ≈3× faster on the 10k-file
1071/// fixture and matches `build_tree`'s ignore semantics exactly
1072/// because we go through the same matcher.
1073fn walk_for_no_op(
1074 root: &Path,
1075 cur: &Path,
1076 manifest: &ThreadManifest,
1077 expected_dirs: &std::collections::HashSet<String>,
1078 ignore_matcher: &crate::worktree_ignore::WorktreeIgnoreMatcher,
1079 seen: &mut std::collections::HashSet<String>,
1080 seen_dirs: &mut std::collections::HashSet<String>,
1081) -> Result<bool> {
1082 let entries = match fs::read_dir(cur) {
1083 Ok(it) => it,
1084 // A directory we can't read means we've lost certainty about
1085 // its contents — fall through to the slow path.
1086 Err(_) => return Ok(false),
1087 };
1088 for entry in entries {
1089 let entry = match entry {
1090 Ok(e) => e,
1091 Err(_) => return Ok(false),
1092 };
1093 let path = entry.path();
1094 let Ok(rel) = path.strip_prefix(root) else {
1095 return Ok(false);
1096 };
1097 let rel_str = rel.to_string_lossy().into_owned();
1098 let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
1099 return Ok(false);
1100 };
1101
1102 // Run the ignore matcher *first*, before consulting the
1103 // manifest. The previous "manifest-first" dispatch
1104 // accepted any manifest hit without re-checking the
1105 // matcher, which silently false-passed if the user had
1106 // tightened `.heddleignore` (or the in-config ignore set)
1107 // between materialise and this capture — `build_tree`
1108 // would now exclude the previously-tracked path and
1109 // produce a different tree, but the predicate said
1110 // "no-op". Always running the matcher first costs a
1111 // pattern check per entry but is what makes the
1112 // predicate's output match what `build_tree` would do.
1113 //
1114 // Three outcomes from the matcher:
1115 // * Pruned + in manifest → ignore-config drift; bail
1116 // to slow path so the new tree reflects the new
1117 // exclusion.
1118 // * Pruned + not in manifest → genuinely ignored;
1119 // silently skip without recursing.
1120 // * Not pruned → standard manifest / new-entry
1121 // dispatch below.
1122 // `should_prune_directory_child` matches the production
1123 // walker's per-entry probe (`worktree_walk.rs`). It calls
1124 // `matched_relative(path, is_dir=true)` so gitignore rules
1125 // with trailing `/` still fire, and the same patterns
1126 // exclude both file and directory entries — same behaviour
1127 // `build_tree` would observe at materialise time.
1128 let pruned = ignore_matcher.should_prune_absolute_path(&path)
1129 || ignore_matcher.should_prune_directory_child(cur, name);
1130 if pruned {
1131 if manifest.files.contains_key(&rel_str) {
1132 // The matcher now wants this path excluded, but
1133 // it's in the manifest from materialise time.
1134 // Ignore-config drift — let the slow path
1135 // rebuild the tree without it.
1136 return Ok(false);
1137 }
1138 continue;
1139 }
1140
1141 // Not pruned. Manifest lookup is the fast path for
1142 // tracked files; un-tracked entries fall through to
1143 // dir-recursion / new-file detection below.
1144 if let Some(manifest_entry) = manifest.files.get(&rel_str) {
1145 // `symlink_metadata` (not `metadata`) so a symlink
1146 // doesn't transparently follow into the target's
1147 // inode.
1148 let meta = match fs::symlink_metadata(&path) {
1149 Ok(m) => m,
1150 Err(_) => return Ok(false),
1151 };
1152 let (size, inode, mtime_ns, ctime_ns, mode) =
1153 crate::stat_signature::stat_signature(&path, &meta);
1154 let stat = ManifestFile {
1155 hash: manifest_entry.hash,
1156 size,
1157 inode,
1158 mtime_ns,
1159 ctime_ns,
1160 mode,
1161 };
1162 if !stat.matches(manifest_entry) {
1163 return Ok(false);
1164 }
1165 seen.insert(rel_str);
1166 continue;
1167 }
1168
1169 let file_type = match entry.file_type() {
1170 Ok(ft) => ft,
1171 Err(_) => return Ok(false),
1172 };
1173 if file_type.is_dir() {
1174 // Directory leg: any directory not in `expected_dirs`
1175 // is an addition since materialise. Bail; the slow
1176 // path will incorporate it.
1177 if !expected_dirs.contains(&rel_str) {
1178 return Ok(false);
1179 }
1180 seen_dirs.insert(rel_str);
1181 if !walk_for_no_op(
1182 root,
1183 &path,
1184 manifest,
1185 expected_dirs,
1186 ignore_matcher,
1187 seen,
1188 seen_dirs,
1189 )? {
1190 return Ok(false);
1191 }
1192 continue;
1193 }
1194
1195 // A non-ignored, non-directory entry that's not in the
1196 // manifest is a new file. Bail to the slow path which
1197 // will rebuild the tree with the new entry.
1198 return Ok(false);
1199 }
1200 Ok(true)
1201}
1202
1203fn stat_cache_no_op(repo: &Repository, manifest: &ThreadManifest, root: &Path) -> Result<bool> {
1204 use std::collections::HashSet;
1205
1206 let ignore_patterns = repo.ignore_patterns()?;
1207 let nested_exclusions = repo.nested_thread_worktree_exclusions(root)?;
1208 let ignore_matcher = crate::worktree_ignore::WorktreeIgnoreMatcher::new(&ignore_patterns)
1209 .with_nested_worktree_exclusions(nested_exclusions);
1210
1211 // Manifests only record files+symlinks, but Heddle's tree
1212 // builder materialises empty directories as their own tree
1213 // entries. So a no-op predicate that only checks `manifest.files`
1214 // would miss "user added or removed an empty directory" —
1215 // `seen.len() == manifest.files.len()` is still true on the file
1216 // side, but the on-disk tree no longer matches what `build_tree`
1217 // would produce.
1218 //
1219 // Source of truth for the expected directory set is the captured
1220 // tree itself (the one the manifest's `tree_hash` names), not
1221 // the manifest's file ancestors. Two reasons:
1222 //
1223 // 1. *Tree-only empty directories.* A `Tree` entry with no
1224 // files beneath it is invisible from a `manifest.files`
1225 // ancestors-walk — the file set is empty, so every
1226 // ancestor it would contribute is missing. Removing a
1227 // legit empty leaf dir would still false-pass.
1228 // 2. *Future schema drift.* Files in `manifest.files` may
1229 // use slash-normalised relative paths that don't exactly
1230 // match how `Tree::entries` names subdirs on every
1231 // platform; walking the tree directly avoids the
1232 // double-encoding hazard.
1233 //
1234 // Cost is ~one `get_tree` per subdir of the captured tree.
1235 // For the typical thread (a few hundred dirs) that's a small
1236 // number of memory-mapped object reads; on the predicate's
1237 // hot path it's bounded by the tree's directory fan-out, not
1238 // file count.
1239 let expected_dirs: HashSet<String> = match collect_expected_dirs(repo, manifest) {
1240 Ok(s) => s,
1241 // Any error walking the tree → conservatively bail to the
1242 // slow path. `Ok(false)` keeps correctness; the worst case
1243 // is a wasted full rebuild.
1244 Err(_) => return Ok(false),
1245 };
1246
1247 // Walk the worktree. For every file we see, check it against the
1248 // manifest. Track which manifest paths we've actually seen so we
1249 // can detect deletions afterwards.
1250 //
1251 // Custom `read_dir` recursion instead of `ignore::WalkBuilder`:
1252 // the walker crate is fast on its own but the per-entry overhead
1253 // adds up at 10k+ files (it buffers, sorts, double-stats, and
1254 // re-applies the ignore stack for every dir). For this hot
1255 // predicate we only need: a `readdir` per directory, one
1256 // `symlink_metadata` per file, and the same ignore-matcher
1257 // check `build_tree` runs. The std-only recursion below
1258 // measured ≈3× faster on the 10k-file fixture (no per-entry
1259 // double-stat, no buffer churn, fewer allocations).
1260 let mut seen: HashSet<String> = HashSet::with_capacity(manifest.files.len());
1261 let mut seen_dirs: HashSet<String> = HashSet::with_capacity(expected_dirs.len());
1262 if !walk_for_no_op(
1263 root,
1264 root,
1265 manifest,
1266 &expected_dirs,
1267 &ignore_matcher,
1268 &mut seen,
1269 &mut seen_dirs,
1270 )? {
1271 return Ok(false);
1272 }
1273
1274 // Final pass: every manifest entry must have been seen (file
1275 // deletion check) and every manifest-implied directory must
1276 // have been seen (directory deletion check). The dir-side
1277 // check catches `rmdir` of an empty directory that was part
1278 // of the materialised tree — its files are also gone (so the
1279 // file side already declines) but if it had no files to begin
1280 // with the file side alone would false-pass.
1281 if seen.len() != manifest.files.len() {
1282 return Ok(false);
1283 }
1284 if seen_dirs.len() != expected_dirs.len() {
1285 return Ok(false);
1286 }
1287 Ok(true)
1288}
1289
1290#[cfg(test)]
1291mod tests {
1292 use objects::{
1293 object::{Blob, TreeEntry},
1294 util::gitlink_placeholder_bytes,
1295 };
1296 use sley::{ObjectFormat as GitObjectFormat, ObjectId as GitObjectId};
1297 use tempfile::TempDir;
1298
1299 use super::*;
1300 use crate::thread_manifest::read_manifest;
1301
1302 fn gitlink_target_for_tests() -> GitObjectId {
1303 GitObjectId::from_hex(
1304 GitObjectFormat::Sha1,
1305 "1234567890abcdef1234567890abcdef12345678",
1306 )
1307 .unwrap()
1308 }
1309
1310 fn seeded_repo() -> (TempDir, Repository) {
1311 let repo_dir = TempDir::new().unwrap();
1312 let repo = Repository::init_default(repo_dir.path()).unwrap();
1313 fs::write(repo_dir.path().join("file.txt"), b"tracked\n").unwrap();
1314 repo.snapshot(Some("seed".into()), None).unwrap();
1315 (repo_dir, repo)
1316 }
1317
1318 #[test]
1319 fn capture_thread_from_disk_preserves_unchanged_gitlink_when_sibling_changes() {
1320 let repo_dir = TempDir::new().unwrap();
1321 let repo = Repository::init_default(repo_dir.path()).unwrap();
1322 let target = gitlink_target_for_tests();
1323 let note_hash = repo
1324 .store()
1325 .put_blob(&Blob::new(b"before\n".to_vec()))
1326 .unwrap();
1327 let tree = Tree::from_entries(vec![
1328 TreeEntry::file("note.txt", note_hash, false).unwrap(),
1329 TreeEntry::gitlink("vendor", target).unwrap(),
1330 ]);
1331 repo.snapshot_tree_with_attribution_profiled(
1332 tree,
1333 Some("gitlink thread baseline".to_string()),
1334 None,
1335 repo.get_attribution().unwrap(),
1336 )
1337 .unwrap();
1338
1339 let dest = repo_dir.path().join("thread-out");
1340 repo.materialize_thread("main", &dest, &AudienceTier::Internal)
1341 .unwrap();
1342 assert_eq!(
1343 fs::read(dest.join("vendor")).unwrap(),
1344 gitlink_placeholder_bytes(&target)
1345 );
1346
1347 fs::write(dest.join("note.txt"), b"after\n").unwrap();
1348 let outcome = repo.capture_thread_from_disk("main", &dest).unwrap();
1349 let state_id = match outcome {
1350 ThreadCaptureOutcome::Captured { state_id } => state_id,
1351 ThreadCaptureOutcome::NoOp => panic!("sibling edit must capture a new state"),
1352 };
1353 let state = repo
1354 .store()
1355 .get_state(&state_id)
1356 .unwrap()
1357 .expect("captured state");
1358 let captured_tree = repo
1359 .store()
1360 .get_tree(&state.tree)
1361 .unwrap()
1362 .expect("captured tree");
1363
1364 assert_eq!(
1365 captured_tree
1366 .get("vendor")
1367 .expect("vendor gitlink")
1368 .gitlink_target(),
1369 Some(target)
1370 );
1371 let note_hash = captured_tree
1372 .get("note.txt")
1373 .expect("note entry")
1374 .blob_hash()
1375 .expect("note blob");
1376 let note = repo
1377 .store()
1378 .get_blob(¬e_hash)
1379 .unwrap()
1380 .expect("note blob");
1381 assert_eq!(note.content(), b"after\n");
1382 }
1383
1384 #[test]
1385 fn materialize_thread_writes_manifest_with_files() {
1386 let repo_dir = TempDir::new().unwrap();
1387 let repo = Repository::init_default(repo_dir.path()).unwrap();
1388 // Build a small worktree to capture.
1389 fs::write(repo_dir.path().join("Cargo.toml"), b"# a\n").unwrap();
1390 fs::create_dir_all(repo_dir.path().join("src")).unwrap();
1391 fs::write(repo_dir.path().join("src/lib.rs"), b"fn main() {}\n").unwrap();
1392 repo.snapshot(Some("seed".into()), None).unwrap();
1393
1394 let dest = TempDir::new().unwrap();
1395 let manifest = repo
1396 .materialize_thread("main", &dest.path().join("out"), &AudienceTier::Internal)
1397 .unwrap();
1398
1399 assert_eq!(
1400 manifest.schema_version,
1401 crate::thread_manifest::SCHEMA_VERSION
1402 );
1403 // Three files: Cargo.toml, src/lib.rs, plus whatever
1404 // init_default seeded — only assert the ones we wrote
1405 // exist and have plausible stat fields.
1406 let cargo = manifest
1407 .files
1408 .get("Cargo.toml")
1409 .expect("Cargo.toml in manifest");
1410 assert_ne!(cargo.inode, 0);
1411 assert_ne!(cargo.mtime_ns, 0);
1412 let src = manifest
1413 .files
1414 .get("src/lib.rs")
1415 .expect("src/lib.rs in manifest");
1416 assert_ne!(src.inode, 0);
1417
1418 // Manifest persisted to disk.
1419 let loaded = read_manifest(repo.heddle_dir(), "main")
1420 .unwrap()
1421 .expect("manifest on disk");
1422 assert_eq!(loaded.files.len(), manifest.files.len());
1423 assert_eq!(
1424 loaded.files["Cargo.toml"].inode,
1425 manifest.files["Cargo.toml"].inode
1426 );
1427 }
1428
1429 #[test]
1430 fn materialize_thread_creates_absent_target() {
1431 let (_repo_dir, repo) = seeded_repo();
1432 let dest_holder = TempDir::new().unwrap();
1433 let dest = dest_holder.path().join("out");
1434
1435 repo.materialize_thread("main", &dest, &AudienceTier::Internal)
1436 .unwrap();
1437
1438 assert!(dest.is_dir());
1439 assert_eq!(
1440 fs::read_to_string(dest.join("file.txt")).unwrap(),
1441 "tracked\n"
1442 );
1443 }
1444
1445 #[test]
1446 fn materialize_thread_adopts_empty_directory() {
1447 let (_repo_dir, repo) = seeded_repo();
1448 let dest_holder = TempDir::new().unwrap();
1449 let dest = dest_holder.path().join("out");
1450 fs::create_dir(&dest).unwrap();
1451
1452 repo.materialize_thread("main", &dest, &AudienceTier::Internal)
1453 .unwrap();
1454
1455 assert!(dest.is_dir());
1456 assert_eq!(
1457 fs::read_to_string(dest.join("file.txt")).unwrap(),
1458 "tracked\n"
1459 );
1460 }
1461
1462 #[test]
1463 fn materialize_thread_rejects_non_empty_directory() {
1464 let (_repo_dir, repo) = seeded_repo();
1465 let dest_holder = TempDir::new().unwrap();
1466 let dest = dest_holder.path().join("out");
1467 fs::create_dir(&dest).unwrap();
1468 fs::write(dest.join("existing.txt"), b"user data\n").unwrap();
1469
1470 let err = repo
1471 .materialize_thread("main", &dest, &AudienceTier::Internal)
1472 .unwrap_err();
1473
1474 assert!(err.to_string().contains("is not empty"), "{err}");
1475 assert_eq!(
1476 fs::read_to_string(dest.join("existing.txt")).unwrap(),
1477 "user data\n"
1478 );
1479 assert!(!dest.join("file.txt").exists());
1480 }
1481
1482 #[cfg(unix)]
1483 #[test]
1484 fn materialize_thread_rejects_symlink_target() {
1485 let (_repo_dir, repo) = seeded_repo();
1486 let dest_holder = TempDir::new().unwrap();
1487 let real = dest_holder.path().join("real");
1488 fs::create_dir(&real).unwrap();
1489 let dest = dest_holder.path().join("link");
1490 std::os::unix::fs::symlink(&real, &dest).unwrap();
1491
1492 let err = repo
1493 .materialize_thread("main", &dest, &AudienceTier::Internal)
1494 .unwrap_err();
1495
1496 assert!(err.to_string().contains("cannot be a symlink"), "{err}");
1497 assert!(!real.join("file.txt").exists());
1498 }
1499
1500 #[test]
1501 fn materialize_thread_rejects_file_target() {
1502 let (_repo_dir, repo) = seeded_repo();
1503 let dest_holder = TempDir::new().unwrap();
1504 let dest = dest_holder.path().join("file");
1505 fs::write(&dest, b"user data\n").unwrap();
1506
1507 let err = repo
1508 .materialize_thread("main", &dest, &AudienceTier::Internal)
1509 .unwrap_err();
1510
1511 assert!(err.to_string().contains("must be a directory"), "{err}");
1512 assert_eq!(fs::read_to_string(&dest).unwrap(), "user data\n");
1513 }
1514
1515 fn embargo_state_with_tier(repo: &Repository, tier: VisibilityTier) -> ChangeId {
1516 use chrono::Utc;
1517 use objects::object::{Principal, StateVisibility};
1518 let state_id = repo
1519 .refs()
1520 .get_thread(&ThreadName::new("main"))
1521 .unwrap()
1522 .expect("head present");
1523 repo.put_state_visibility(StateVisibility {
1524 state: state_id,
1525 tier,
1526 embargo_until: None,
1527 declarer: Principal {
1528 name: "Grace Hopper".into(),
1529 email: "grace@example.com".into(),
1530 },
1531 declared_at: Utc::now(),
1532 signature: None,
1533 supersedes: None,
1534 })
1535 .expect("put visibility");
1536 state_id
1537 }
1538
1539 fn checkout_main(
1540 repo: &Repository,
1541 dest: &Path,
1542 audience: &AudienceTier,
1543 ) -> CheckoutMaterialization {
1544 let change_id = repo
1545 .refs()
1546 .resolve("main")
1547 .unwrap()
1548 .expect("main thread exists");
1549 let state = repo
1550 .store()
1551 .get_state(&change_id)
1552 .unwrap()
1553 .expect("main state exists");
1554 repo.checkout_state_gated(&change_id, &state, dest, audience)
1555 .unwrap()
1556 }
1557
1558 #[test]
1559 fn checkout_renders_courtesy_stub_when_state_is_under_tier_for_audience() {
1560 let repo_dir = TempDir::new().unwrap();
1561 let repo = Repository::init_default(repo_dir.path()).unwrap();
1562 fs::write(repo_dir.path().join("secret.rs"), b"fn exploit() {}\n").unwrap();
1563 repo.snapshot(Some("embargoed fix".into()), None).unwrap();
1564 embargo_state_with_tier(
1565 &repo,
1566 VisibilityTier::Private {
1567 scope_label: "sec-embargo".into(),
1568 },
1569 );
1570
1571 let dest_holder = TempDir::new().unwrap();
1572 let dest = dest_holder.path().join("out");
1573 // A Private state is withheld even from the all-seeing Internal
1574 // operator — the placeholder appears, the tracked bytes do not.
1575 let manifest = repo
1576 .materialize_thread("main", &dest, &AudienceTier::Internal)
1577 .unwrap();
1578
1579 assert!(
1580 dest.join(COURTESY_STUB_FILENAME).exists(),
1581 "courtesy placeholder must be written for an under-tier checkout"
1582 );
1583 assert!(
1584 !dest.join("secret.rs").exists(),
1585 "the tracked content must NOT be materialized for an under-tier audience"
1586 );
1587 assert!(
1588 manifest.files.is_empty(),
1589 "manifest must record no tracked files for a stubbed checkout"
1590 );
1591 let stub = fs::read_to_string(dest.join(COURTESY_STUB_FILENAME)).unwrap();
1592 assert!(stub.contains("private"));
1593 assert!(stub.contains("sec-embargo"));
1594 }
1595
1596 #[test]
1597 fn checkout_materializes_real_content_for_the_authorized_audience() {
1598 let repo_dir = TempDir::new().unwrap();
1599 let repo = Repository::init_default(repo_dir.path()).unwrap();
1600 fs::write(repo_dir.path().join("secret.rs"), b"fn exploit() {}\n").unwrap();
1601 repo.snapshot(Some("embargoed fix".into()), None).unwrap();
1602 embargo_state_with_tier(
1603 &repo,
1604 VisibilityTier::Private {
1605 scope_label: "sec-embargo".into(),
1606 },
1607 );
1608
1609 let dest_holder = TempDir::new().unwrap();
1610 let dest = dest_holder.path().join("out");
1611 // The holder of the matching restricted scope sees the real bytes.
1612 let manifest = repo
1613 .materialize_thread(
1614 "main",
1615 &dest,
1616 &AudienceTier::Restricted("sec-embargo".into()),
1617 )
1618 .unwrap();
1619
1620 assert!(dest.join("secret.rs").exists());
1621 assert!(!dest.join(COURTESY_STUB_FILENAME).exists());
1622 assert!(manifest.files.contains_key("secret.rs"));
1623 }
1624
1625 /// #316 / PR #528 r6: a worktree root first materialized under-tier (stub
1626 /// written) and later re-materialized for an authorized audience must end up
1627 /// with a clean tree — the real bytes present AND the stale courtesy stub
1628 /// removed. `materialize_tree` only writes tracked leaves, so without an
1629 /// explicit removal the stub would linger on disk after the visible path.
1630 #[test]
1631 fn authorized_rematerialize_removes_stale_embargo_stub() {
1632 let repo_dir = TempDir::new().unwrap();
1633 let repo = Repository::init_default(repo_dir.path()).unwrap();
1634 fs::write(repo_dir.path().join("secret.rs"), b"fn exploit() {}\n").unwrap();
1635 repo.snapshot(Some("embargoed fix".into()), None).unwrap();
1636 embargo_state_with_tier(
1637 &repo,
1638 VisibilityTier::Private {
1639 scope_label: "sec-embargo".into(),
1640 },
1641 );
1642
1643 let dest_holder = TempDir::new().unwrap();
1644 let dest = dest_holder.path().join("out");
1645
1646 // First: under-tier materialize of the root → only the stub lands.
1647 checkout_main(&repo, &dest, &AudienceTier::Internal);
1648 assert!(
1649 dest.join(COURTESY_STUB_FILENAME).exists(),
1650 "under-tier materialize must write the stub"
1651 );
1652 assert!(!dest.join("secret.rs").exists());
1653
1654 // Then: re-materialize the SAME root for an authorized audience.
1655 checkout_main(
1656 &repo,
1657 &dest,
1658 &AudienceTier::Restricted("sec-embargo".into()),
1659 );
1660
1661 assert!(
1662 dest.join("secret.rs").exists(),
1663 "authorized re-materialize must write the real tree"
1664 );
1665 assert!(
1666 !dest.join(COURTESY_STUB_FILENAME).exists(),
1667 "the stale courtesy stub must be removed on the authorized re-materialize"
1668 );
1669 }
1670
1671 /// #316 / PR #528 r7 CLASS 1 (the leak): a root first materialized for an
1672 /// AUTHORIZED audience (real tree on disk) and then re-materialized
1673 /// UNDER-TIER must end up holding ONLY the courtesy stub — none of the prior
1674 /// visible tree's tracked bytes may remain next to the stub, or the checkout
1675 /// still contains exactly the content the gate is supposed to withhold. The
1676 /// reconcile step removes the prior tracked leaves (including nested ones)
1677 /// and prunes the directories they leave empty.
1678 #[test]
1679 fn visible_then_withheld_root_has_only_stub() {
1680 let repo_dir = TempDir::new().unwrap();
1681 let repo = Repository::init_default(repo_dir.path()).unwrap();
1682 fs::write(repo_dir.path().join("secret.rs"), b"fn exploit() {}\n").unwrap();
1683 fs::create_dir_all(repo_dir.path().join("nested")).unwrap();
1684 fs::write(repo_dir.path().join("nested/inner.rs"), b"fn inner() {}\n").unwrap();
1685 repo.snapshot(Some("embargoed fix".into()), None).unwrap();
1686 embargo_state_with_tier(
1687 &repo,
1688 VisibilityTier::Private {
1689 scope_label: "sec-embargo".into(),
1690 },
1691 );
1692
1693 let dest_holder = TempDir::new().unwrap();
1694 let dest = dest_holder.path().join("out");
1695
1696 // Visible materialize: the real tree lands — the very bytes a later
1697 // under-tier materialize must withhold.
1698 checkout_main(
1699 &repo,
1700 &dest,
1701 &AudienceTier::Restricted("sec-embargo".into()),
1702 );
1703 assert!(dest.join("secret.rs").exists());
1704 assert!(dest.join("nested/inner.rs").exists());
1705
1706 // Under-tier re-materialize of the SAME root — the leak case.
1707 checkout_main(&repo, &dest, &AudienceTier::Internal);
1708
1709 assert!(
1710 dest.join(COURTESY_STUB_FILENAME).exists(),
1711 "withheld checkout must hold the courtesy stub"
1712 );
1713 assert!(
1714 !dest.join("secret.rs").exists(),
1715 "the prior visible tree's bytes must NOT remain next to the stub"
1716 );
1717 assert!(
1718 !dest.join("nested/inner.rs").exists(),
1719 "nested tracked leaves must be removed too"
1720 );
1721 // ONLY the stub remains: every prior tracked leaf — and the now-empty
1722 // directories they lived in — are gone.
1723 let remaining: Vec<_> = fs::read_dir(&dest)
1724 .unwrap()
1725 .map(|e| e.unwrap().file_name())
1726 .collect();
1727 assert_eq!(
1728 remaining.len(),
1729 1,
1730 "withheld root must contain only the courtesy stub, got {remaining:?}"
1731 );
1732 assert_eq!(remaining[0].to_str().unwrap(), COURTESY_STUB_FILENAME);
1733 }
1734
1735 /// #316 / PR #528 r7 CLASS 1 (r6 transition, as a matrix member): a root
1736 /// first materialized UNDER-TIER (stub) and then re-materialized for an
1737 /// AUTHORIZED audience must hold the real tree and NO stale stub.
1738 #[test]
1739 fn withheld_then_visible_root_has_real_tree_no_stub() {
1740 let repo_dir = TempDir::new().unwrap();
1741 let repo = Repository::init_default(repo_dir.path()).unwrap();
1742 fs::write(repo_dir.path().join("secret.rs"), b"fn exploit() {}\n").unwrap();
1743 repo.snapshot(Some("embargoed fix".into()), None).unwrap();
1744 embargo_state_with_tier(
1745 &repo,
1746 VisibilityTier::Private {
1747 scope_label: "sec-embargo".into(),
1748 },
1749 );
1750
1751 let dest_holder = TempDir::new().unwrap();
1752 let dest = dest_holder.path().join("out");
1753
1754 checkout_main(&repo, &dest, &AudienceTier::Internal);
1755 assert!(dest.join(COURTESY_STUB_FILENAME).exists());
1756 assert!(!dest.join("secret.rs").exists());
1757
1758 checkout_main(
1759 &repo,
1760 &dest,
1761 &AudienceTier::Restricted("sec-embargo".into()),
1762 );
1763 assert!(
1764 dest.join("secret.rs").exists(),
1765 "authorized re-materialize must write the real tree"
1766 );
1767 assert!(
1768 !dest.join(COURTESY_STUB_FILENAME).exists(),
1769 "the stale courtesy stub must be removed on the authorized re-materialize"
1770 );
1771 }
1772
1773 /// #316 / PR #528 r7 CLASS 1 (visible→visible): re-materializing a root at a
1774 /// NEW visible tree must leave exactly that tree — a leaf dropped from the
1775 /// new tree must not linger from the prior materialize. `materialize_tree`
1776 /// writes the new leaves but does not remove a now-absent prior leaf; the
1777 /// reconcile step closes that gap.
1778 #[test]
1779 fn visible_then_visible_refreshes_tree() {
1780 let repo_dir = TempDir::new().unwrap();
1781 let repo = Repository::init_default(repo_dir.path()).unwrap();
1782 fs::write(repo_dir.path().join("keep.rs"), b"keep\n").unwrap();
1783 fs::write(repo_dir.path().join("stale.rs"), b"stale\n").unwrap();
1784 repo.snapshot(Some("seed".into()), None).unwrap();
1785
1786 let dest_holder = TempDir::new().unwrap();
1787 let dest = dest_holder.path().join("out");
1788 checkout_main(&repo, &dest, &AudienceTier::Internal);
1789 assert!(dest.join("keep.rs").exists());
1790 assert!(dest.join("stale.rs").exists());
1791
1792 // Advance the thread head in the MAIN repo (snapshot walks repo.root,
1793 // not `dest`, so the dest manifest's worktree_path stays = dest and is
1794 // NOT refreshed here): drop stale.rs, add fresh.rs.
1795 fs::remove_file(repo_dir.path().join("stale.rs")).unwrap();
1796 fs::write(repo_dir.path().join("fresh.rs"), b"fresh\n").unwrap();
1797 repo.snapshot(Some("advance".into()), None).unwrap();
1798
1799 // Re-materialize the SAME root at the new (still visible) head.
1800 checkout_main(&repo, &dest, &AudienceTier::Internal);
1801 assert!(dest.join("keep.rs").exists(), "an unchanged leaf stays");
1802 assert!(dest.join("fresh.rs").exists(), "the new leaf is written");
1803 assert!(
1804 !dest.join("stale.rs").exists(),
1805 "a leaf dropped from the new tree must not linger from the prior materialize"
1806 );
1807 assert!(
1808 !dest.join(COURTESY_STUB_FILENAME).exists(),
1809 "a visible re-materialize writes no stub"
1810 );
1811 }
1812
1813 /// #316 / PR #528 r7 CLASS 1 (withheld→withheld): two under-tier
1814 /// materializes of the same root leave only the stub each time, and capture
1815 /// stays a no-op.
1816 #[test]
1817 fn withheld_then_withheld_stays_withheld() {
1818 let repo_dir = TempDir::new().unwrap();
1819 let repo = Repository::init_default(repo_dir.path()).unwrap();
1820 fs::write(repo_dir.path().join("secret.rs"), b"fn exploit() {}\n").unwrap();
1821 repo.snapshot(Some("embargoed fix".into()), None).unwrap();
1822 embargo_state_with_tier(
1823 &repo,
1824 VisibilityTier::Private {
1825 scope_label: "sec-embargo".into(),
1826 },
1827 );
1828
1829 let dest_holder = TempDir::new().unwrap();
1830 let dest = dest_holder.path().join("out");
1831
1832 checkout_main(&repo, &dest, &AudienceTier::Internal);
1833 assert!(dest.join(COURTESY_STUB_FILENAME).exists());
1834 assert!(!dest.join("secret.rs").exists());
1835
1836 // Second under-tier checkout of the same root: still only the stub.
1837 checkout_main(&repo, &dest, &AudienceTier::Internal);
1838 let remaining: Vec<_> = fs::read_dir(&dest)
1839 .unwrap()
1840 .map(|e| e.unwrap().file_name())
1841 .collect();
1842 assert_eq!(
1843 remaining.len(),
1844 1,
1845 "withheld root must contain only the courtesy stub, got {remaining:?}"
1846 );
1847 assert_eq!(remaining[0].to_str().unwrap(), COURTESY_STUB_FILENAME);
1848 assert!(!dest.join("secret.rs").exists());
1849
1850 // Capture of the still-withheld root is a no-op.
1851 let outcome = repo.capture_thread_from_disk("main", &dest).unwrap();
1852 assert_eq!(outcome, ThreadCaptureOutcome::NoOp);
1853 }
1854
1855 /// #316 / PR #528 r9 FINDING A: the withheld marker (and `.leaves` record)
1856 /// must be keyed on the root `capture_thread_from_disk` resolves at
1857 /// READ-time, not on a pre-materialization path. `canonical_worktree_path`
1858 /// falls back to its raw input when the path does not yet resolve, so a dest
1859 /// reached THROUGH a symlink whose leaf does not exist yet canonicalizes to
1860 /// the un-resolved `link/out` before the dir is made but to the resolved
1861 /// `real/out` after. Pre-fix the marker was written under `link/out` while
1862 /// capture looked it up under `real/out` → marker missed → a withheld
1863 /// checkout captured as a stub-only tree instead of no-oping.
1864 #[cfg(unix)]
1865 #[test]
1866 fn withheld_marker_keyed_on_canonical_root_for_relative_dest() {
1867 let repo_dir = TempDir::new().unwrap();
1868 let repo = Repository::init_default(repo_dir.path()).unwrap();
1869 fs::write(repo_dir.path().join("secret.rs"), b"fn exploit() {}\n").unwrap();
1870 repo.snapshot(Some("embargoed fix".into()), None).unwrap();
1871 embargo_state_with_tier(
1872 &repo,
1873 VisibilityTier::Private {
1874 scope_label: "sec-embargo".into(),
1875 },
1876 );
1877
1878 // `dest` travels through a symlink to a not-yet-existing leaf, so a
1879 // canonicalize BEFORE the dir is created resolves differently (falls
1880 // back to `link/out`) than one AFTER (`real/out`).
1881 let dest_holder = TempDir::new().unwrap();
1882 let real = dest_holder.path().join("real");
1883 fs::create_dir_all(&real).unwrap();
1884 std::os::unix::fs::symlink(&real, dest_holder.path().join("link")).unwrap();
1885 let dest = dest_holder.path().join("link").join("out");
1886
1887 repo.materialize_thread("main", &dest, &AudienceTier::Internal)
1888 .unwrap();
1889 assert!(dest.join(COURTESY_STUB_FILENAME).exists());
1890 assert!(!dest.join("secret.rs").exists());
1891
1892 // Capture through the symlinked path must be a NO-OP: the marker was
1893 // keyed on the same canonical root (`real/out`) capture resolves.
1894 let outcome = repo.capture_thread_from_disk("main", &dest).unwrap();
1895 assert_eq!(
1896 outcome,
1897 ThreadCaptureOutcome::NoOp,
1898 "withheld checkout reached via a symlinked path must not be capturable"
1899 );
1900 }
1901
1902 /// #316 / PR #528 r8 HOLE 1: the withheld reduction must NOT depend on the
1903 /// clobberable per-thread `manifest.toml`. A root first materialized VISIBLE
1904 /// (holding `old-secret.txt`), THEN observed while a sibling worktree of the
1905 /// SAME thread is materialized (the event that clobbers the per-thread
1906 /// manifest, retargeting it at the sibling's root), THEN re-materialized
1907 /// WITHHELD against a LATER state whose tree no longer contains
1908 /// `old-secret.txt`, must still end up holding ONLY the courtesy stub. The
1909 /// secret is in NEITHER the withheld state's own tree NOR (post-clobber) the
1910 /// per-thread manifest — only the clobber-proof per-root record names it, so
1911 /// the reduction can only succeed by sourcing that record.
1912 #[test]
1913 fn withheld_reduction_survives_sibling_manifest_clobber() {
1914 let repo_dir = TempDir::new().unwrap();
1915 let repo = Repository::init_default(repo_dir.path()).unwrap();
1916
1917 // State S1 (visible): contains the secret that must not linger later.
1918 fs::write(repo_dir.path().join("old-secret.txt"), b"launch codes\n").unwrap();
1919 repo.snapshot(Some("seed with secret".into()), None)
1920 .unwrap();
1921
1922 // Root A materialized VISIBLE at S1 — the real bytes land on disk and the
1923 // clobber-proof per-root record for A captures `old-secret.txt`.
1924 let a_holder = TempDir::new().unwrap();
1925 let root_a = a_holder.path().join("root-a");
1926 checkout_main(&repo, &root_a, &AudienceTier::Internal);
1927 assert!(root_a.join("old-secret.txt").exists());
1928
1929 // Advance the thread to S2: the secret is REMOVED before this state, a
1930 // new tracked file replaces it. So `old-secret.txt` is absent from S2's
1931 // tree entirely.
1932 fs::remove_file(repo_dir.path().join("old-secret.txt")).unwrap();
1933 fs::write(repo_dir.path().join("kept.txt"), b"benign\n").unwrap();
1934 repo.snapshot(Some("drop secret, advance".into()), None)
1935 .unwrap();
1936 embargo_state_with_tier(
1937 &repo,
1938 VisibilityTier::Private {
1939 scope_label: "sec-embargo".into(),
1940 },
1941 );
1942
1943 // A sibling worktree B of the SAME thread is materialized (authorized, at
1944 // S2). `materialize_thread` rewrites `threads/main/manifest.toml` keyed by
1945 // thread name, so this CLOBBERS A's record there — `manifest_for_worktree_root(A)`
1946 // now resolves to B, the precise race that reopened the leak in r7.
1947 let b_holder = TempDir::new().unwrap();
1948 let root_b = b_holder.path().join("root-b");
1949 repo.materialize_thread(
1950 "main",
1951 &root_b,
1952 &AudienceTier::Restricted("sec-embargo".into()),
1953 )
1954 .unwrap();
1955 assert!(root_b.join("kept.txt").exists());
1956 // Confirm the clobber really happened: the per-thread manifest no longer
1957 // records root A.
1958 assert!(
1959 crate::thread_manifest::manifest_for_worktree_root(
1960 repo.heddle_dir(),
1961 &canonical_worktree_path(&root_a),
1962 )
1963 .unwrap()
1964 .is_none(),
1965 "sibling materialize must have clobbered A's per-thread manifest record"
1966 );
1967
1968 // Re-materialize root A WITHHELD (Internal can't see S2's Private tier).
1969 // S2's tree does not contain `old-secret.txt`, and the per-thread
1970 // manifest no longer names A — only the clobber-proof per-root record can
1971 // drive its removal.
1972 checkout_main(&repo, &root_a, &AudienceTier::Internal);
1973
1974 assert!(
1975 root_a.join(COURTESY_STUB_FILENAME).exists(),
1976 "withheld checkout must hold the courtesy stub"
1977 );
1978 assert!(
1979 !root_a.join("old-secret.txt").exists(),
1980 "the prior visible tree's secret must be GONE even though the per-thread manifest was clobbered"
1981 );
1982 let remaining: Vec<_> = fs::read_dir(&root_a)
1983 .unwrap()
1984 .map(|e| e.unwrap().file_name())
1985 .collect();
1986 assert_eq!(
1987 remaining.len(),
1988 1,
1989 "withheld root must contain only the courtesy stub, got {remaining:?}"
1990 );
1991 assert_eq!(remaining[0].to_str().unwrap(), COURTESY_STUB_FILENAME);
1992 }
1993
1994 /// #316 / PR #528 r9 FINDING 4: close the per-root `.leaves`-staleness CLASS.
1995 /// `capture_thread_from_disk` rewrites `manifest.toml` but used to leave the
1996 /// clobber-proof per-root `.leaves` record untouched, so a captured-but-
1997 /// later-withheld leaf leaked. Sequence: a visible checkout holding `{a}`;
1998 /// the user adds `b` and captures (head advances, `.leaves` MUST refresh to
1999 /// `{a, b}`); the thread then advances to a state whose tree drops `b` and is
2000 /// embargoed; re-materializing the SAME root WITHHELD against that state must
2001 /// leave ONLY the stub — `b` (on disk from the capture) must be GONE, not
2002 /// leaked next to the stub. The withheld state's own tree lacks `b`, so only
2003 /// a `.leaves` record the capture refreshed can drive `b`'s removal.
2004 #[test]
2005 fn capture_refreshes_materialized_leaves() {
2006 let repo_dir = TempDir::new().unwrap();
2007 let repo = Repository::init_default(repo_dir.path()).unwrap();
2008
2009 // S1 (visible): tracked `a.txt`.
2010 fs::write(repo_dir.path().join("a.txt"), b"alpha\n").unwrap();
2011 repo.snapshot(Some("seed a".into()), None).unwrap();
2012
2013 // Materialize root R visible (Internal) at S1 → disk {a.txt},
2014 // .leaves(R) = {a.txt}.
2015 let holder = TempDir::new().unwrap();
2016 let root = holder.path().join("root");
2017 checkout_main(&repo, &root, &AudienceTier::Internal);
2018 assert!(root.join("a.txt").exists());
2019
2020 // User adds `b.txt` in R and captures → head advances to S2 = {a, b}.
2021 // The capture MUST refresh the per-root `.leaves` record to include
2022 // `b.txt` (the class-fix: capture rewrites the manifest AND `.leaves`).
2023 fs::write(root.join("b.txt"), b"beta\n").unwrap();
2024 match repo.capture_thread_from_disk("main", &root).unwrap() {
2025 ThreadCaptureOutcome::Captured { .. } => {}
2026 ThreadCaptureOutcome::NoOp => panic!("adding b.txt must produce a real capture"),
2027 }
2028 let leaves = crate::thread_manifest::read_materialized_leaves(
2029 repo.heddle_dir(),
2030 &canonical_worktree_path(&root),
2031 )
2032 .unwrap()
2033 .expect("capture must have written a per-root leaves record");
2034 assert!(
2035 leaves.contains("a.txt") && leaves.contains("b.txt"),
2036 "capture must refresh the per-root .leaves record to the captured tree's leaves, got {leaves:?}"
2037 );
2038
2039 // Advance the thread to S3 whose tree LACKS b.txt: snapshot from the main
2040 // repo dir (which only holds a.txt and is NOT the materialized worktree,
2041 // so the manifest is not refreshed here), then embargo S3 Private.
2042 fs::write(repo_dir.path().join("a.txt"), b"alpha v2\n").unwrap();
2043 repo.snapshot(Some("drop b, advance".into()), None).unwrap();
2044 embargo_state_with_tier(
2045 &repo,
2046 VisibilityTier::Private {
2047 scope_label: "sec-embargo".into(),
2048 },
2049 );
2050
2051 // Re-materialize R WITHHELD (Internal under-tier for the Private S3). S3's
2052 // own tree has no b.txt, so the withheld reduction can only remove the
2053 // capture-added b.txt by sourcing the refreshed per-root record.
2054 checkout_main(&repo, &root, &AudienceTier::Internal);
2055
2056 assert!(
2057 root.join(COURTESY_STUB_FILENAME).exists(),
2058 "withheld checkout must hold the courtesy stub"
2059 );
2060 assert!(
2061 !root.join("b.txt").exists(),
2062 "the capture-added leaf must be removed by the withheld reduction, not leaked next to the stub"
2063 );
2064 let remaining: Vec<_> = fs::read_dir(&root)
2065 .unwrap()
2066 .map(|e| e.unwrap().file_name())
2067 .collect();
2068 assert_eq!(
2069 remaining.len(),
2070 1,
2071 "withheld root must contain only the courtesy stub, got {remaining:?}"
2072 );
2073 assert_eq!(remaining[0].to_str().unwrap(), COURTESY_STUB_FILENAME);
2074 }
2075
2076 /// #316 / PR #528 r3 Finding 1: materializing an under-tier checkout writes
2077 /// the courtesy stub and marks the manifest `withheld`. A subsequent
2078 /// capture of that checkout must be a NO-OP — it must NOT pull the stub in
2079 /// as tracked content, and (crucially) must NOT commit an empty tree that
2080 /// wipes the withheld state's real files. The thread head stays put.
2081 #[test]
2082 fn capture_skips_embargo_courtesy_stub() {
2083 let repo_dir = TempDir::new().unwrap();
2084 let repo = Repository::init_default(repo_dir.path()).unwrap();
2085 fs::write(repo_dir.path().join("secret.rs"), b"fn exploit() {}\n").unwrap();
2086 repo.snapshot(Some("embargoed fix".into()), None).unwrap();
2087 embargo_state_with_tier(
2088 &repo,
2089 VisibilityTier::Private {
2090 scope_label: "sec-embargo".into(),
2091 },
2092 );
2093
2094 let dest_holder = TempDir::new().unwrap();
2095 let dest = dest_holder.path().join("out");
2096 // Under-tier audience → only the stub lands; no real bytes, empty files.
2097 let manifest = repo
2098 .materialize_thread("main", &dest, &AudienceTier::Internal)
2099 .unwrap();
2100 assert!(
2101 dest.join(COURTESY_STUB_FILENAME).exists(),
2102 "stub must be written for the under-tier checkout"
2103 );
2104 assert!(
2105 manifest.files.is_empty(),
2106 "no tracked files in a stub checkout"
2107 );
2108 assert!(
2109 manifest.withheld,
2110 "manifest must mark the checkout withheld"
2111 );
2112
2113 let head_before = repo
2114 .refs()
2115 .get_thread(&ThreadName::new("main"))
2116 .unwrap()
2117 .expect("head");
2118
2119 // Capture the withheld checkout.
2120 let outcome = repo.capture_thread_from_disk("main", &dest).unwrap();
2121 assert_eq!(
2122 outcome,
2123 ThreadCaptureOutcome::NoOp,
2124 "a withheld checkout is non-capturable"
2125 );
2126
2127 // Thread head must not have moved.
2128 let head_after = repo
2129 .refs()
2130 .get_thread(&ThreadName::new("main"))
2131 .unwrap()
2132 .expect("head");
2133 assert_eq!(
2134 head_before, head_after,
2135 "withheld capture must not advance the thread head"
2136 );
2137
2138 // The thread's tree is still the real embargoed tree: it contains the
2139 // withheld content and NOT the courtesy stub.
2140 let head_state = repo.store().get_state(&head_after).unwrap().unwrap();
2141 let tree = repo.store().get_tree(&head_state.tree).unwrap().unwrap();
2142 assert!(
2143 !tree
2144 .entries()
2145 .iter()
2146 .any(|e| e.name() == COURTESY_STUB_FILENAME),
2147 "captured tree must never contain the courtesy stub"
2148 );
2149 assert!(
2150 tree.entries().iter().any(|e| e.name() == "secret.rs"),
2151 "the withheld real content must remain intact in the thread"
2152 );
2153 }
2154
2155 /// #316 / PR #528 r4: the withheld status must be scoped per *worktree
2156 /// root*, not per thread. When one thread is materialized into TWO
2157 /// worktrees — an authorized one A (real bytes) and an under-tier one B
2158 /// (withheld stub) — the under-tier materialize of B clobbers the single
2159 /// per-thread `manifest.toml`. A withheld flag stored there would then
2160 /// wrongly suppress a capture of A, silently dropping legitimate work.
2161 /// With the per-worktree marker, A captures its real edits and B no-ops.
2162 #[test]
2163 fn withheld_manifest_is_per_worktree_not_per_thread() {
2164 let repo_dir = TempDir::new().unwrap();
2165 let repo = Repository::init_default(repo_dir.path()).unwrap();
2166 fs::write(repo_dir.path().join("secret.rs"), b"fn exploit() {}\n").unwrap();
2167 repo.snapshot(Some("embargoed fix".into()), None).unwrap();
2168 embargo_state_with_tier(
2169 &repo,
2170 VisibilityTier::Private {
2171 scope_label: "sec-embargo".into(),
2172 },
2173 );
2174
2175 let holder_a = TempDir::new().unwrap();
2176 let worktree_a = holder_a.path().join("authorized");
2177 let holder_b = TempDir::new().unwrap();
2178 let worktree_b = holder_b.path().join("under-tier");
2179
2180 // Worktree A: the matching-scope holder gets the real bytes.
2181 let manifest_a = repo
2182 .materialize_thread(
2183 "main",
2184 &worktree_a,
2185 &AudienceTier::Restricted("sec-embargo".into()),
2186 )
2187 .unwrap();
2188 assert!(worktree_a.join("secret.rs").exists());
2189 assert!(manifest_a.files.contains_key("secret.rs"));
2190
2191 // Edit A so a correct capture produces a NEW state. Without the edit,
2192 // capturing unchanged real content is a *legitimate* no-op and wouldn't
2193 // distinguish the bug (wrong withheld-suppression) from correct
2194 // behaviour.
2195 fs::write(worktree_a.join("extra.rs"), b"fn added() {}\n").unwrap();
2196
2197 let head_before = repo
2198 .refs()
2199 .get_thread(&ThreadName::new("main"))
2200 .unwrap()
2201 .expect("head");
2202
2203 // Worktree B: under-tier audience → stub only, withheld. This clobbers
2204 // the single per-thread `manifest.toml` with B's withheld record.
2205 let manifest_b = repo
2206 .materialize_thread("main", &worktree_b, &AudienceTier::Internal)
2207 .unwrap();
2208 assert!(worktree_b.join(COURTESY_STUB_FILENAME).exists());
2209 assert!(manifest_b.files.is_empty());
2210
2211 // Capture A: must capture the real edit — its withheld status is its
2212 // own (none), NOT inherited from B's clobbering materialize.
2213 let outcome_a = repo.capture_thread_from_disk("main", &worktree_a).unwrap();
2214 let captured_state = match outcome_a {
2215 ThreadCaptureOutcome::Captured { state_id } => state_id,
2216 ThreadCaptureOutcome::NoOp => {
2217 panic!("authorized worktree A must capture its real edit, not be suppressed")
2218 }
2219 };
2220 let head_after_a = repo
2221 .refs()
2222 .get_thread(&ThreadName::new("main"))
2223 .unwrap()
2224 .expect("head");
2225 assert_ne!(head_before, head_after_a, "capture A must advance the head");
2226 assert_eq!(head_after_a, captured_state);
2227 // The captured tree carries the edit and the real content, never the stub.
2228 let captured_tree = repo
2229 .store()
2230 .get_tree(
2231 &repo
2232 .store()
2233 .get_state(&captured_state)
2234 .unwrap()
2235 .unwrap()
2236 .tree,
2237 )
2238 .unwrap()
2239 .unwrap();
2240 assert!(
2241 captured_tree
2242 .entries()
2243 .iter()
2244 .any(|e| e.name() == "extra.rs")
2245 );
2246 assert!(
2247 captured_tree
2248 .entries()
2249 .iter()
2250 .any(|e| e.name() == "secret.rs")
2251 );
2252 assert!(
2253 !captured_tree
2254 .entries()
2255 .iter()
2256 .any(|e| e.name() == COURTESY_STUB_FILENAME)
2257 );
2258
2259 // Capture B: must be a no-op — its own worktree is withheld.
2260 let outcome_b = repo.capture_thread_from_disk("main", &worktree_b).unwrap();
2261 assert_eq!(
2262 outcome_b,
2263 ThreadCaptureOutcome::NoOp,
2264 "under-tier worktree B is non-capturable"
2265 );
2266 let head_after_b = repo
2267 .refs()
2268 .get_thread(&ThreadName::new("main"))
2269 .unwrap()
2270 .expect("head");
2271 assert_eq!(
2272 head_after_a, head_after_b,
2273 "withheld capture of B must not advance the head"
2274 );
2275 }
2276
2277 /// `record_thread_manifest` should write a manifest sidecar that
2278 /// matches what `materialize_thread` would have produced, for a
2279 /// worktree the caller materialized directly via `materialize_tree`.
2280 /// Used by the CLI's `start` path (which sets the worktree up
2281 /// itself rather than going through `materialize_thread`).
2282 #[test]
2283 fn record_thread_manifest_writes_sidecar_for_externally_materialized_worktree() {
2284 let repo_dir = TempDir::new().unwrap();
2285 let repo = Repository::init_default(repo_dir.path()).unwrap();
2286 fs::write(repo_dir.path().join("a.txt"), b"alpha\n").unwrap();
2287 fs::write(repo_dir.path().join("b.txt"), b"beta\n").unwrap();
2288 repo.snapshot(Some("seed".into()), None).unwrap();
2289 let state_id = repo
2290 .refs()
2291 .get_thread(&ThreadName::new("main"))
2292 .unwrap()
2293 .expect("head present");
2294
2295 // Materialize externally via the lower-level `materialize_tree`
2296 // path — the shape `start --workspace materialized` uses.
2297 let dest_holder = TempDir::new().unwrap();
2298 let dest = dest_holder.path().join("out");
2299 let state = repo.store().get_state(&state_id).unwrap().unwrap();
2300 let tree = repo.store().get_tree(&state.tree).unwrap().unwrap();
2301 repo.materialize_tree(&tree, &dest).unwrap();
2302
2303 // No manifest written yet — `materialize_tree` is the bytes-only
2304 // step; the sidecar is recorded explicitly.
2305 assert!(
2306 read_manifest(repo.heddle_dir(), "feature/x")
2307 .unwrap()
2308 .is_none()
2309 );
2310
2311 let recorded = repo
2312 .record_thread_manifest("feature/x", &state_id, &dest)
2313 .unwrap();
2314 assert_eq!(recorded.state_id, state_id);
2315 assert_eq!(recorded.tree_hash, state.tree);
2316 assert!(recorded.files.contains_key("a.txt"));
2317 assert!(recorded.files.contains_key("b.txt"));
2318 assert_eq!(recorded.files["a.txt"].size, b"alpha\n".len() as u64);
2319
2320 // Sidecar persists at the expected location and round-trips.
2321 let loaded = read_manifest(repo.heddle_dir(), "feature/x")
2322 .unwrap()
2323 .expect("manifest on disk");
2324 assert_eq!(loaded.state_id, recorded.state_id);
2325 assert_eq!(loaded.files.len(), recorded.files.len());
2326
2327 // Idempotent: a second recording for the same thread succeeds
2328 // (used by `capture_thread_from_disk` post-capture refresh).
2329 repo.record_thread_manifest("feature/x", &state_id, &dest)
2330 .unwrap();
2331 }
2332
2333 /// `record_thread_manifest` against an unknown `state_id` should
2334 /// surface a clear "state missing" error instead of silently
2335 /// writing a manifest with no files (which would later look like
2336 /// a deletion of every tracked path).
2337 #[test]
2338 fn record_thread_manifest_errors_when_state_is_missing() {
2339 let repo_dir = TempDir::new().unwrap();
2340 let repo = Repository::init_default(repo_dir.path()).unwrap();
2341 let dest = TempDir::new().unwrap();
2342 let missing = objects::object::ChangeId::generate();
2343 let err = repo
2344 .record_thread_manifest("feature/x", &missing, &dest.path().join("out"))
2345 .expect_err("should fail when state is unknown");
2346 let message = format!("{err}");
2347 assert!(
2348 message.contains("missing"),
2349 "error message names the missing artifact: {message}"
2350 );
2351 }
2352
2353 #[test]
2354 fn materialize_unknown_thread_errors() {
2355 let repo_dir = TempDir::new().unwrap();
2356 let repo = Repository::init_default(repo_dir.path()).unwrap();
2357 let dest = TempDir::new().unwrap();
2358 let err = repo
2359 .materialize_thread(
2360 "no-such-thread",
2361 &dest.path().join("out"),
2362 &AudienceTier::Internal,
2363 )
2364 .expect_err("should fail");
2365 assert!(format!("{err}").contains("unknown thread"));
2366 }
2367
2368 /// Round-trip: materialize → edit a file → capture → confirm a
2369 /// new state was written, thread head advanced, and the manifest
2370 /// reflects the new state.
2371 #[test]
2372 fn capture_after_edit_advances_thread() {
2373 let repo_dir = TempDir::new().unwrap();
2374 let repo = Repository::init_default(repo_dir.path()).unwrap();
2375 fs::write(repo_dir.path().join("hello.txt"), b"hello\n").unwrap();
2376 repo.snapshot(Some("seed".into()), None).unwrap();
2377 let before = repo
2378 .refs()
2379 .get_thread(&ThreadName::new("main"))
2380 .unwrap()
2381 .expect("head");
2382
2383 let dest_holder = TempDir::new().unwrap();
2384 let dest = dest_holder.path().join("out");
2385 let materialize_manifest = repo
2386 .materialize_thread("main", &dest, &AudienceTier::Internal)
2387 .unwrap();
2388
2389 // Mutate a file in the materialized worktree.
2390 fs::write(dest.join("hello.txt"), b"hello world\n").unwrap();
2391
2392 let outcome = repo
2393 .capture_thread_from_disk("main", &dest)
2394 .expect("capture");
2395 let new_state = match outcome {
2396 ThreadCaptureOutcome::Captured { state_id } => state_id,
2397 ThreadCaptureOutcome::NoOp => panic!("expected Captured, got NoOp"),
2398 };
2399
2400 // Thread head advanced.
2401 let after = repo
2402 .refs()
2403 .get_thread(&ThreadName::new("main"))
2404 .unwrap()
2405 .expect("head");
2406 assert_ne!(before, after);
2407 assert_eq!(after, new_state);
2408
2409 // Manifest reflects the new state.
2410 let loaded = read_manifest(repo.heddle_dir(), "main")
2411 .unwrap()
2412 .expect("manifest");
2413 assert_eq!(loaded.state_id, new_state);
2414 assert_ne!(loaded.tree_hash, materialize_manifest.tree_hash);
2415 assert!(loaded.files.contains_key("hello.txt"));
2416 }
2417
2418 /// Capture with no edits is a no-op: thread head unchanged,
2419 /// manifest refreshed in place.
2420 #[test]
2421 fn capture_with_no_changes_is_noop() {
2422 let repo_dir = TempDir::new().unwrap();
2423 let repo = Repository::init_default(repo_dir.path()).unwrap();
2424 fs::write(repo_dir.path().join("steady.txt"), b"unchanged\n").unwrap();
2425 repo.snapshot(Some("seed".into()), None).unwrap();
2426 let before = repo
2427 .refs()
2428 .get_thread(&ThreadName::new("main"))
2429 .unwrap()
2430 .expect("head");
2431
2432 let dest_holder = TempDir::new().unwrap();
2433 let dest = dest_holder.path().join("out");
2434 repo.materialize_thread("main", &dest, &AudienceTier::Internal)
2435 .unwrap();
2436
2437 let outcome = repo.capture_thread_from_disk("main", &dest).unwrap();
2438 assert_eq!(outcome, ThreadCaptureOutcome::NoOp);
2439
2440 // Thread head unchanged.
2441 let after = repo
2442 .refs()
2443 .get_thread(&ThreadName::new("main"))
2444 .unwrap()
2445 .expect("head");
2446 assert_eq!(before, after);
2447 }
2448
2449 /// Stat-cache fast no-op: a fresh-materialised tree captures
2450 /// without invoking `build_tree`. Detected via the manifest
2451 /// reflecting bytes byte-identical to what got materialised.
2452 #[test]
2453 fn stat_cache_short_circuits_unchanged_capture() {
2454 let repo_dir = TempDir::new().unwrap();
2455 let repo = Repository::init_default(repo_dir.path()).unwrap();
2456 for i in 0..20 {
2457 fs::write(
2458 repo_dir.path().join(format!("file_{i:02}.txt")),
2459 format!("content {i}\n").as_bytes(),
2460 )
2461 .unwrap();
2462 }
2463 repo.snapshot(Some("seed".into()), None).unwrap();
2464
2465 let dest_holder = TempDir::new().unwrap();
2466 let dest = dest_holder.path().join("out");
2467 let manifest = repo
2468 .materialize_thread("main", &dest, &AudienceTier::Internal)
2469 .unwrap();
2470 assert_eq!(manifest.files.len(), 20);
2471
2472 // The fast-path predicate alone — without touching the
2473 // store-side `build_tree`. Exposes the boundary the
2474 // optimisation guards.
2475 assert!(
2476 stat_cache_no_op(&repo, &manifest, &dest).unwrap(),
2477 "fresh materialise should stat-match the manifest"
2478 );
2479
2480 // Full call also returns NoOp.
2481 let outcome = repo.capture_thread_from_disk("main", &dest).unwrap();
2482 assert_eq!(outcome, ThreadCaptureOutcome::NoOp);
2483 }
2484
2485 /// Stat-cache invalidates correctly on edit: a single touched
2486 /// file flips `stat_cache_no_op` to `false`, which forces the
2487 /// slow path to run and produces a new state.
2488 #[test]
2489 fn stat_cache_detects_edit_and_falls_through() {
2490 let repo_dir = TempDir::new().unwrap();
2491 let repo = Repository::init_default(repo_dir.path()).unwrap();
2492 fs::write(repo_dir.path().join("only.txt"), b"v1\n").unwrap();
2493 repo.snapshot(Some("seed".into()), None).unwrap();
2494
2495 let dest_holder = TempDir::new().unwrap();
2496 let dest = dest_holder.path().join("out");
2497 let manifest = repo
2498 .materialize_thread("main", &dest, &AudienceTier::Internal)
2499 .unwrap();
2500
2501 // Sleep briefly so the mtime moves; APFS gives sub-ms
2502 // resolution on modern macOS but Linux ext4 is only
2503 // 1-second granularity for ctime — make the test robust
2504 // either way.
2505 std::thread::sleep(std::time::Duration::from_millis(20));
2506 fs::write(dest.join("only.txt"), b"v2\n").unwrap();
2507
2508 assert!(
2509 !stat_cache_no_op(&repo, &manifest, &dest).unwrap(),
2510 "edited file must invalidate the fast path"
2511 );
2512
2513 // Slow path runs and creates a new state.
2514 match repo.capture_thread_from_disk("main", &dest).unwrap() {
2515 ThreadCaptureOutcome::Captured { .. } => {}
2516 other => panic!("expected Captured, got {other:?}"),
2517 }
2518 }
2519
2520 /// New file added out of band → fast path declines.
2521 #[test]
2522 fn stat_cache_detects_added_file() {
2523 let repo_dir = TempDir::new().unwrap();
2524 let repo = Repository::init_default(repo_dir.path()).unwrap();
2525 fs::write(repo_dir.path().join("a.txt"), b"a\n").unwrap();
2526 repo.snapshot(Some("seed".into()), None).unwrap();
2527
2528 let dest_holder = TempDir::new().unwrap();
2529 let dest = dest_holder.path().join("out");
2530 let manifest = repo
2531 .materialize_thread("main", &dest, &AudienceTier::Internal)
2532 .unwrap();
2533
2534 fs::write(dest.join("b.txt"), b"b\n").unwrap();
2535
2536 assert!(
2537 !stat_cache_no_op(&repo, &manifest, &dest).unwrap(),
2538 "added file must invalidate the fast path"
2539 );
2540 }
2541
2542 /// Plain `heddle capture` (via `Repository::snapshot`) detects the
2543 /// materialized-thread context — HEAD attached to a thread that has
2544 /// a manifest — and refreshes the manifest to the new state after
2545 /// the capture lands. This is the path the user hits when they edit
2546 /// inside a materialized thread worktree and run `heddle capture`
2547 /// directly (as opposed to `thread switch`, which is the auto-capture
2548 /// path covered by `capture_after_edit_advances_thread`).
2549 #[test]
2550 fn snapshot_in_materialized_thread_refreshes_manifest() {
2551 let repo_dir = TempDir::new().unwrap();
2552 let repo = Repository::init_default(repo_dir.path()).unwrap();
2553 fs::write(repo_dir.path().join("alpha.txt"), b"v1\n").unwrap();
2554 fs::write(repo_dir.path().join("beta.txt"), b"steady\n").unwrap();
2555 let initial = repo.snapshot(Some("seed".into()), None).unwrap();
2556
2557 // Stand up a manifest for `main` whose stat fields match the
2558 // worktree as it is right now. Mimics the post-materialize
2559 // state when the user is `cd`'d into the materialized
2560 // worktree (`self.root` == materialized path).
2561 let initial_tree = repo
2562 .store()
2563 .get_tree(&initial.tree)
2564 .unwrap()
2565 .expect("seed tree");
2566 let mut manifest = crate::thread_manifest::ThreadManifest::new(
2567 initial.change_id,
2568 initial.tree,
2569 canonical_worktree_path(repo_dir.path()),
2570 );
2571 populate_manifest_from_tree(
2572 &repo,
2573 &initial_tree,
2574 repo_dir.path(),
2575 "",
2576 &mut manifest.files,
2577 )
2578 .unwrap();
2579 crate::thread_manifest::write_manifest(repo.heddle_dir(), "main", &manifest).unwrap();
2580
2581 // Sleep long enough that the new mtime is observably distinct
2582 // on ext4's 1-second-granularity ctime (APFS is sub-ms).
2583 std::thread::sleep(std::time::Duration::from_millis(20));
2584 fs::write(repo_dir.path().join("alpha.txt"), b"v2\n").unwrap();
2585
2586 let captured = repo.snapshot(Some("after edit".into()), None).unwrap();
2587 assert_ne!(captured.change_id, initial.change_id);
2588 assert_ne!(captured.tree, initial.tree);
2589
2590 // Manifest got refreshed to point at the new state and tree.
2591 let refreshed = crate::thread_manifest::read_manifest(repo.heddle_dir(), "main")
2592 .unwrap()
2593 .expect("manifest persists");
2594 assert_eq!(refreshed.state_id, captured.change_id);
2595 assert_eq!(refreshed.tree_hash, captured.tree);
2596 // beta.txt was untouched — its stat fields (and hash) should
2597 // still appear in the refreshed manifest.
2598 assert!(refreshed.files.contains_key("alpha.txt"));
2599 assert!(refreshed.files.contains_key("beta.txt"));
2600 }
2601
2602 /// Regression: snapshot from a directory that is NOT the
2603 /// manifest's recorded worktree path must NOT refresh the
2604 /// manifest. Pre-fix, the snapshot code detected the
2605 /// "materialized-thread context" purely by `HEAD attached + a
2606 /// manifest exists for the attached thread", so a snapshot from
2607 /// the main repo dir (or any sibling worktree) would corrupt the
2608 /// manifest by writing the wrong directory's stat fields into it
2609 /// — and `heddle status` would then falsely report the
2610 /// materialized worktree as fresh because the manifest's
2611 /// `state_id` had auto-rolled forward.
2612 #[test]
2613 fn snapshot_outside_materialized_worktree_does_not_refresh_manifest() {
2614 let repo_dir = TempDir::new().unwrap();
2615 let repo = Repository::init_default(repo_dir.path()).unwrap();
2616 fs::write(repo_dir.path().join("alpha.txt"), b"v1\n").unwrap();
2617 repo.snapshot(Some("seed".into()), None).unwrap();
2618
2619 // Materialize "main" at a totally separate path. Manifest
2620 // records `dest_holder/out` as the worktree.
2621 let dest_holder = TempDir::new().unwrap();
2622 let dest = dest_holder.path().join("out");
2623 let materialize_manifest = repo
2624 .materialize_thread("main", &dest, &AudienceTier::Internal)
2625 .unwrap();
2626 let materialize_state_id = materialize_manifest.state_id;
2627 let materialize_tree_hash = materialize_manifest.tree_hash;
2628 let materialized_path = materialize_manifest.worktree_path.clone();
2629 assert_eq!(
2630 materialized_path,
2631 canonical_worktree_path(&dest),
2632 "manifest must record the canonical materialize destination"
2633 );
2634
2635 // Now run snapshot from the MAIN repo dir (`repo.root()`) —
2636 // a path that is NOT the materialized worktree. The pre-fix
2637 // bug fired here.
2638 std::thread::sleep(std::time::Duration::from_millis(20));
2639 fs::write(repo_dir.path().join("alpha.txt"), b"v2-from-main-repo\n").unwrap();
2640 let snap = repo
2641 .snapshot(Some("from main repo, not the mat worktree".into()), None)
2642 .unwrap();
2643 assert_ne!(
2644 snap.change_id, materialize_state_id,
2645 "snapshot must advance main's head"
2646 );
2647
2648 // The manifest must NOT have been refreshed: state_id and
2649 // tree_hash still point at the materialize state, worktree
2650 // path still points at `dest`.
2651 let after = crate::thread_manifest::read_manifest(repo.heddle_dir(), "main")
2652 .unwrap()
2653 .expect("manifest still present");
2654 assert_eq!(
2655 after.state_id, materialize_state_id,
2656 "manifest state_id must NOT advance when snapshot is taken outside the materialized worktree"
2657 );
2658 assert_eq!(
2659 after.tree_hash, materialize_tree_hash,
2660 "manifest tree_hash must NOT advance"
2661 );
2662 assert_eq!(
2663 after.worktree_path, materialized_path,
2664 "manifest worktree_path must be unchanged"
2665 );
2666
2667 // And `heddle status`'s staleness check should now correctly
2668 // report the materialized worktree as stale (head moved,
2669 // manifest didn't).
2670 let head_now = repo
2671 .refs()
2672 .get_thread(&ThreadName::new("main"))
2673 .unwrap()
2674 .expect("head");
2675 assert_ne!(
2676 head_now, after.state_id,
2677 "post-fix invariant: main head advanced past manifest's recorded state → stale"
2678 );
2679 }
2680
2681 /// Capture from a *dedicated* thread worktree (one whose path
2682 /// differs from `repo.root()`) must validate symlinks against
2683 /// that worktree's path, not against the main repo root.
2684 /// Pre-fix the walker passed `repo.root()` as the symlink-
2685 /// escape base, so every symlink inside a dedicated thread
2686 /// path was rejected as "outside the repo" the moment the
2687 /// slow path ran — `thread switch` auto-capture broke for any
2688 /// thread that contained a symlink. Reproduces the codex P2
2689 /// from review pass 2.
2690 #[cfg(unix)]
2691 #[test]
2692 fn capture_thread_from_disk_accepts_symlinks_in_dedicated_worktree() {
2693 let repo_dir = TempDir::new().unwrap();
2694 let repo = Repository::init_default(repo_dir.path()).unwrap();
2695 // Seed with a file + a symlink pointing inside the repo.
2696 fs::write(repo_dir.path().join("target.txt"), b"target\n").unwrap();
2697 std::os::unix::fs::symlink("target.txt", repo_dir.path().join("link")).unwrap();
2698 repo.snapshot(Some("seed".into()), None).unwrap();
2699
2700 // Materialise into a dedicated worktree — path differs
2701 // from `repo.root()`, which is exactly the case that
2702 // exposes the bug.
2703 let dest_holder = TempDir::new().unwrap();
2704 let dest = dest_holder.path().join("thread-worktree");
2705 repo.materialize_thread("main", &dest, &AudienceTier::Internal)
2706 .unwrap();
2707
2708 // Edit a non-symlink file so the slow path fires (the fast
2709 // stat-cache no-op would mask the bug). Sleep so the mtime
2710 // observably moves on coarse-granularity filesystems.
2711 std::thread::sleep(std::time::Duration::from_millis(20));
2712 fs::write(dest.join("target.txt"), b"target v2\n").unwrap();
2713
2714 // Pre-fix this errored with "symlink target escapes repo"
2715 // because `validate_symlink_target` was using `repo.root()`
2716 // as the allowed base instead of the walk root.
2717 let outcome = repo
2718 .capture_thread_from_disk("main", &dest)
2719 .expect("capture must accept symlinks inside the dedicated worktree");
2720 match outcome {
2721 ThreadCaptureOutcome::Captured { .. } => {}
2722 ThreadCaptureOutcome::NoOp => panic!("expected Captured; got NoOp"),
2723 }
2724 }
2725
2726 /// Codex pass-5 P1: when the ignore set tightens between
2727 /// materialise and capture (e.g. user adds an entry to
2728 /// `.heddleignore` covering an already-tracked path), the
2729 /// no-op predicate must bail to the slow path so `build_tree`
2730 /// can produce the tree that *now* matches the matcher. Pre-
2731 /// fix the manifest-first dispatch accepted any manifest hit
2732 /// without re-running the matcher, so the predicate silently
2733 /// false-passed and `thread switch`'s auto-capture missed
2734 /// the real tree delta.
2735 #[test]
2736 fn stat_cache_detects_ignore_config_tightening() {
2737 let repo_dir = TempDir::new().unwrap();
2738 let repo = Repository::init_default(repo_dir.path()).unwrap();
2739 // Seed: two files, no .heddleignore yet.
2740 fs::write(repo_dir.path().join("keep.txt"), b"keep\n").unwrap();
2741 fs::write(repo_dir.path().join("secret.txt"), b"secret\n").unwrap();
2742 repo.snapshot(Some("seed".into()), None).unwrap();
2743
2744 let dest_holder = TempDir::new().unwrap();
2745 let dest = dest_holder.path().join("out");
2746 let manifest = repo
2747 .materialize_thread("main", &dest, &AudienceTier::Internal)
2748 .unwrap();
2749 assert!(manifest.files.contains_key("secret.txt"));
2750
2751 // Tighten the ignore set in the source repo to exclude
2752 // `secret.txt`. The materialised worktree still has it
2753 // on disk (we just put it there), but `build_tree` would
2754 // now skip it and produce a different tree hash.
2755 fs::write(repo_dir.path().join(".heddleignore"), b"secret.txt\n").unwrap();
2756
2757 assert!(
2758 !stat_cache_no_op(&repo, &manifest, &dest).unwrap(),
2759 "ignore-config tightening over a tracked path must \
2760 invalidate the fast path; pre-fix the predicate \
2761 false-passed and auto-capture silently dropped \
2762 the resulting tree delta"
2763 );
2764 }
2765
2766 /// Codex pass-3 P2: a *tree-only* empty directory — one that
2767 /// was a captured tree entry but never had any files beneath it
2768 /// — was invisible to the pass-2 fix because `expected_dirs`
2769 /// was derived from manifest file ancestors. Removing such a
2770 /// directory left every set the same size and the predicate
2771 /// false-passed, silently dropping the change. The pass-3 fix
2772 /// derives `expected_dirs` from the captured tree directly so
2773 /// empty leaf dirs are tracked.
2774 #[test]
2775 fn stat_cache_detects_removed_tree_only_empty_directory() {
2776 let repo_dir = TempDir::new().unwrap();
2777 let repo = Repository::init_default(repo_dir.path()).unwrap();
2778 // Seed with one file (so the thread isn't empty) plus an
2779 // empty directory that becomes a tree entry on its own.
2780 fs::write(repo_dir.path().join("anchor.txt"), b"anchor\n").unwrap();
2781 fs::create_dir_all(repo_dir.path().join("empty-on-purpose")).unwrap();
2782 repo.snapshot(Some("seed".into()), None).unwrap();
2783
2784 let dest_holder = TempDir::new().unwrap();
2785 let dest = dest_holder.path().join("out");
2786 let manifest = repo
2787 .materialize_thread("main", &dest, &AudienceTier::Internal)
2788 .unwrap();
2789
2790 // Sanity: the empty dir landed on disk after materialise.
2791 assert!(
2792 dest.join("empty-on-purpose").is_dir(),
2793 "materialise must emit the empty dir on disk"
2794 );
2795
2796 // Remove the empty dir. No files inside it changed
2797 // because there never were any — pure tree-only delta.
2798 fs::remove_dir(dest.join("empty-on-purpose")).unwrap();
2799
2800 assert!(
2801 !stat_cache_no_op(&repo, &manifest, &dest).unwrap(),
2802 "removing a tree-only empty directory must invalidate \
2803 the fast path; pre-fix the predicate false-passed and \
2804 auto-capture silently dropped the deletion"
2805 );
2806 }
2807
2808 /// Empty directory added by the user — manifests only record
2809 /// files, but Heddle's tree builder emits a tree entry for the
2810 /// new dir. The stat-cache no-op predicate must decline so the
2811 /// slow path picks the change up; pre-fix it false-passed and
2812 /// `thread switch`'s auto-capture silently dropped the addition.
2813 #[test]
2814 fn stat_cache_detects_added_empty_directory() {
2815 let repo_dir = TempDir::new().unwrap();
2816 let repo = Repository::init_default(repo_dir.path()).unwrap();
2817 fs::write(repo_dir.path().join("only.txt"), b"a\n").unwrap();
2818 repo.snapshot(Some("seed".into()), None).unwrap();
2819
2820 let dest_holder = TempDir::new().unwrap();
2821 let dest = dest_holder.path().join("out");
2822 let manifest = repo
2823 .materialize_thread("main", &dest, &AudienceTier::Internal)
2824 .unwrap();
2825
2826 // Add an empty directory that has no manifest entry.
2827 fs::create_dir_all(dest.join("brand-new-empty-dir")).unwrap();
2828
2829 assert!(
2830 !stat_cache_no_op(&repo, &manifest, &dest).unwrap(),
2831 "an added empty directory must invalidate the fast path"
2832 );
2833 }
2834
2835 /// Empty directory removed by the user — the manifest expects it
2836 /// (its parent path appears as an ancestor of files) but the
2837 /// walk never visits it. The dir-side check must decline. Pre-
2838 /// fix the fast path would false-pass on this case too.
2839 #[test]
2840 fn stat_cache_detects_removed_empty_directory() {
2841 let repo_dir = TempDir::new().unwrap();
2842 let repo = Repository::init_default(repo_dir.path()).unwrap();
2843 fs::create_dir_all(repo_dir.path().join("nested/deep")).unwrap();
2844 fs::write(repo_dir.path().join("nested/deep/leaf.txt"), b"leaf\n").unwrap();
2845 repo.snapshot(Some("seed".into()), None).unwrap();
2846
2847 let dest_holder = TempDir::new().unwrap();
2848 let dest = dest_holder.path().join("out");
2849 let manifest = repo
2850 .materialize_thread("main", &dest, &AudienceTier::Internal)
2851 .unwrap();
2852
2853 // Remove the leaf file AND its parent dir. The file-side
2854 // check already catches the file removal, but if we then
2855 // synthesise a fresh leaf elsewhere we'd want the dir-side
2856 // check to catch the missing parent on its own too. Use a
2857 // slightly different shape: create + remove a sibling dir
2858 // whose ancestor matches the manifest's expected set.
2859 fs::create_dir_all(dest.join("nested/sibling-empty")).unwrap();
2860
2861 assert!(
2862 !stat_cache_no_op(&repo, &manifest, &dest).unwrap(),
2863 "an added empty directory inside an existing parent must invalidate"
2864 );
2865 }
2866
2867 /// Deleted file → fast path declines.
2868 #[test]
2869 fn stat_cache_detects_deletion() {
2870 let repo_dir = TempDir::new().unwrap();
2871 let repo = Repository::init_default(repo_dir.path()).unwrap();
2872 fs::write(repo_dir.path().join("a.txt"), b"a\n").unwrap();
2873 fs::write(repo_dir.path().join("b.txt"), b"b\n").unwrap();
2874 repo.snapshot(Some("seed".into()), None).unwrap();
2875
2876 let dest_holder = TempDir::new().unwrap();
2877 let dest = dest_holder.path().join("out");
2878 let manifest = repo
2879 .materialize_thread("main", &dest, &AudienceTier::Internal)
2880 .unwrap();
2881
2882 fs::remove_file(dest.join("a.txt")).unwrap();
2883
2884 assert!(
2885 !stat_cache_no_op(&repo, &manifest, &dest).unwrap(),
2886 "deleted file must invalidate the fast path"
2887 );
2888 }
2889
2890 /// Two `capture_thread_from_disk` calls on the same thread from
2891 /// different threads must serialize through the repository write
2892 /// lock: the thread head's parent chain must include both
2893 /// captures (no lost update where one capture's parent is the
2894 /// pre-race head instead of the other capture's state).
2895 ///
2896 /// Reproduces the race Codex P1 #2 named: pre-fix, two sibling
2897 /// worktrees doing `heddle thread switch` against the same
2898 /// source thread both read the same parent in
2899 /// `refs().get_thread()`, both `put_state` with that parent,
2900 /// both `set_thread` — whichever `set_thread` won last orphaned
2901 /// the other state on disk. With the lock both captures land in
2902 /// series and the final head's parent chain links back through
2903 /// both new states.
2904 #[test]
2905 fn concurrent_captures_serialize_via_repository_lock() {
2906 use std::sync::Arc;
2907
2908 let repo_dir = TempDir::new().unwrap();
2909 let repo = Arc::new(Repository::init_default(repo_dir.path()).unwrap());
2910 fs::write(repo_dir.path().join("shared.txt"), b"seed\n").unwrap();
2911 repo.snapshot(Some("seed".into()), None).unwrap();
2912 let initial_head = repo
2913 .refs()
2914 .get_thread(&ThreadName::new("main"))
2915 .unwrap()
2916 .expect("seeded");
2917
2918 // Two sibling materialized worktrees of the same thread.
2919 let dest_a_holder = TempDir::new().unwrap();
2920 let dest_a = dest_a_holder.path().join("a");
2921 repo.materialize_thread("main", &dest_a, &AudienceTier::Internal)
2922 .unwrap();
2923 let dest_b_holder = TempDir::new().unwrap();
2924 let dest_b = dest_b_holder.path().join("b");
2925 repo.materialize_thread("main", &dest_b, &AudienceTier::Internal)
2926 .unwrap();
2927
2928 // Disjoint edits so each capture has real work to do (no
2929 // stat-cache no-op short-circuit).
2930 std::thread::sleep(std::time::Duration::from_millis(20));
2931 fs::write(dest_a.join("shared.txt"), b"edited-by-a\n").unwrap();
2932 fs::write(dest_b.join("shared.txt"), b"edited-by-b\n").unwrap();
2933
2934 // Race the two captures.
2935 let repo_a = Arc::clone(&repo);
2936 let repo_b = Arc::clone(&repo);
2937 let h_a = std::thread::spawn(move || {
2938 repo_a
2939 .capture_thread_from_disk("main", &dest_a)
2940 .expect("capture A")
2941 });
2942 let h_b = std::thread::spawn(move || {
2943 repo_b
2944 .capture_thread_from_disk("main", &dest_b)
2945 .expect("capture B")
2946 });
2947 let outcome_a = h_a.join().expect("thread A");
2948 let outcome_b = h_b.join().expect("thread B");
2949
2950 // Both captures landed (neither was a NoOp because both
2951 // edited the same file with different bytes).
2952 let id_a = match outcome_a {
2953 ThreadCaptureOutcome::Captured { state_id } => state_id,
2954 ThreadCaptureOutcome::NoOp => panic!("A expected Captured"),
2955 };
2956 let id_b = match outcome_b {
2957 ThreadCaptureOutcome::Captured { state_id } => state_id,
2958 ThreadCaptureOutcome::NoOp => panic!("B expected Captured"),
2959 };
2960 assert_ne!(id_a, id_b, "the two captures must produce distinct states");
2961
2962 // The thread head is one of the two captures. Lock-naked,
2963 // the loser's parent would be `initial_head`. With the
2964 // lock, the loser's parent is the winner's id and the
2965 // winner's parent is `initial_head`.
2966 let final_head = repo
2967 .refs()
2968 .get_thread(&ThreadName::new("main"))
2969 .unwrap()
2970 .expect("head");
2971 let winner_id = final_head;
2972 let loser_id = if final_head == id_a { id_b } else { id_a };
2973
2974 let winner_state = repo
2975 .store()
2976 .get_state(&winner_id)
2977 .unwrap()
2978 .expect("winner state on disk");
2979 let loser_state = repo
2980 .store()
2981 .get_state(&loser_id)
2982 .unwrap()
2983 .expect("loser state on disk");
2984
2985 // The two captures must have linked through the lock:
2986 // exactly one of (winner.parents, loser.parents) names the
2987 // other; the remaining parent is the seed head. Pre-fix
2988 // both states named the seed head and the loser was
2989 // orphaned — assert that this isn't the case.
2990 let chained =
2991 winner_state.parents.contains(&loser_id) || loser_state.parents.contains(&winner_id);
2992 assert!(
2993 chained,
2994 "concurrent captures must chain through the lock; got\n \
2995 winner {winner_id} parents={:?}\n loser {loser_id} parents={:?}",
2996 winner_state.parents, loser_state.parents
2997 );
2998 assert!(
2999 winner_state.parents.contains(&initial_head)
3000 || loser_state.parents.contains(&initial_head),
3001 "the bottom of the chain must still reach the seed head"
3002 );
3003 }
3004}