repo/repository_thread_materialize.rs
1// SPDX-License-Identifier: Apache-2.0
2//! Thread-level materialization: resolve a thread → state → tree,
3//! materialize the tree to disk (clonefile-first via the existing
4//! `Repository::materialize_tree`), and write a [`ThreadManifest`]
5//! sidecar that captures the per-file stat-cache for fast subsequent
6//! `heddle capture` scans.
7//!
8//! This is the day-one default workspace shape for lightweight
9//! threads on reflink-capable filesystems (see
10//! `docs/design/clonefile-threads.md`). Reads off the materialized
11//! tree are vanilla `read(2)` against real APFS/btrfs files — no
12//! userspace FS callbacks in the hot path. Disk usage is the
13//! ~zero-cost clonefile share until the agent diverges blocks.
14
15use std::{
16 collections::{BTreeMap, BTreeSet},
17 fs,
18 path::{Path, PathBuf},
19};
20
21use chrono::{DateTime, Utc};
22use objects::{
23 lock::RepositoryLockExt,
24 object::{ChangeId, State, ThreadName, Tree, VisibilityTier},
25 store::ObjectStore,
26};
27use oplog::OpRecord;
28use refs::RefExpectation;
29use tracing::{debug, instrument};
30
31use super::{HeddleError, Repository, Result};
32use crate::{
33 ThreadWorktreeTargetDisposition, ThreadWorktreeTargetError,
34 thread_manifest::{ManifestFile, ThreadManifest, read_manifest, write_manifest},
35 validate_thread_worktree_target,
36 visibility::{AudienceTier, visible},
37};
38
39/// Filename of the operator-local courtesy placeholder written when a
40/// checked-out state's tier is not visible to the operator's audience.
41pub(crate) const COURTESY_STUB_FILENAME: &str = "HEDDLE-EMBARGO.txt";
42
43/// Outcome of the visibility-gated checkout chokepoint
44/// [`Repository::checkout_state_gated`].
45#[derive(Clone, Debug)]
46pub enum CheckoutMaterialization {
47 /// The state was visible to the audience: its real tree was materialized
48 /// to `dest`. Carries the resolved tree so callers can populate a manifest
49 /// without a second store lookup.
50 Materialized { tree: Tree },
51 /// The state was under-tier for the audience: the operator-local courtesy
52 /// stub was written to `dest` and the tracked bytes withheld.
53 Withheld { tier: VisibilityTier },
54}
55
56/// Outcome of [`Repository::capture_thread_from_disk`].
57#[derive(Clone, Copy, Debug, PartialEq, Eq)]
58pub enum ThreadCaptureOutcome {
59 /// The materialized tree matches the existing thread head; no
60 /// new state was written. The manifest was refreshed to reflect
61 /// the latest stat fields (so subsequent captures stay fast even
62 /// if mtimes drifted via `touch`).
63 NoOp,
64 /// A new state was written and the thread head advanced.
65 Captured { state_id: ChangeId },
66}
67
68fn thread_worktree_target_error(error: ThreadWorktreeTargetError) -> HeddleError {
69 match error {
70 ThreadWorktreeTargetError::Io { source, .. } => HeddleError::Io(source),
71 ThreadWorktreeTargetError::Symlink { path } => HeddleError::Conflict(format!(
72 "thread worktree target '{}' cannot be a symlink",
73 path.display()
74 )),
75 ThreadWorktreeTargetError::NotDirectory { path } => HeddleError::Conflict(format!(
76 "thread worktree target '{}' must be a directory",
77 path.display()
78 )),
79 ThreadWorktreeTargetError::NotEmpty { path } => HeddleError::Conflict(format!(
80 "thread worktree target '{}' is not empty",
81 path.display()
82 )),
83 }
84}
85
86fn prepare_thread_worktree_target(dest: &Path) -> Result<ThreadWorktreeTargetDisposition> {
87 let disposition =
88 validate_thread_worktree_target(dest).map_err(thread_worktree_target_error)?;
89 if disposition == ThreadWorktreeTargetDisposition::Absent {
90 fs::create_dir_all(dest).map_err(HeddleError::Io)?;
91 validate_thread_worktree_target(dest).map_err(thread_worktree_target_error)?;
92 }
93 Ok(disposition)
94}
95
96fn clear_dir_contents(dir: &Path) -> std::io::Result<()> {
97 let metadata = fs::symlink_metadata(dir)?;
98 if metadata.file_type().is_symlink() || !metadata.is_dir() {
99 return Ok(());
100 }
101
102 for entry in fs::read_dir(dir)? {
103 let entry = entry?;
104 let path = entry.path();
105 if entry.file_type()?.is_dir() {
106 fs::remove_dir_all(&path)?;
107 } else {
108 fs::remove_file(&path)?;
109 }
110 }
111 Ok(())
112}
113
114fn cleanup_thread_worktree_target(
115 dest: &Path,
116 disposition: ThreadWorktreeTargetDisposition,
117) -> Result<()> {
118 match clear_dir_contents(dest) {
119 Ok(()) => {}
120 Err(err)
121 if err.kind() == std::io::ErrorKind::NotFound
122 || err.kind() == std::io::ErrorKind::NotADirectory => {}
123 Err(err) => return Err(HeddleError::Io(err)),
124 }
125
126 if disposition == ThreadWorktreeTargetDisposition::Absent {
127 match fs::remove_dir(dest) {
128 Ok(()) => {}
129 Err(err)
130 if err.kind() == std::io::ErrorKind::NotFound
131 || err.kind() == std::io::ErrorKind::NotADirectory => {}
132 Err(err) => return Err(HeddleError::Io(err)),
133 }
134 }
135
136 Ok(())
137}
138
139impl Repository {
140 /// Materialize the captured tree of `thread` to `dest` and write
141 /// a [`ThreadManifest`] sidecar to
142 /// `<heddle_dir>/threads/<thread>/manifest.toml`.
143 ///
144 /// Order of operations:
145 /// 1. Resolve `thread` → `ChangeId` → `State` → `Tree`.
146 /// 2. Call `Repository::materialize_tree(&tree, dest)` — the
147 /// existing clonefile-first materializer does the heavy
148 /// lifting (loose-uncompressed promotion, parallel writes).
149 /// 3. Walk the materialized tree and capture per-file
150 /// `(hash, inode, mtime_ns, ctime_ns, mode)` into the
151 /// manifest.
152 /// 4. Atomically write the manifest.
153 ///
154 /// The walk step in (3) is a single `stat` per file — sub-ms for
155 /// the 643-file heddle workspace. Doing the walk after
156 /// materialize rather than capturing stats during materialize
157 /// keeps the existing materializer untouched.
158 #[instrument(skip(self), fields(thread = %thread, dest = %dest.display()))]
159 pub fn materialize_thread(
160 &self,
161 thread: &str,
162 dest: &Path,
163 audience: &AudienceTier,
164 ) -> Result<ThreadManifest> {
165 let change_id = self
166 .refs()
167 .resolve(thread)?
168 .ok_or_else(|| HeddleError::Config(format!("unknown thread {thread}")))?;
169 let state = self
170 .store()
171 .get_state(&change_id)?
172 .ok_or_else(|| HeddleError::Config(format!("state for {thread} missing")))?;
173 let target_disposition = prepare_thread_worktree_target(dest)?;
174
175 // Route through the single visibility-gated checkout chokepoint, which
176 // either materializes the real tree or writes the operator-local
177 // courtesy stub. The manifest is this method's own concern (it lives
178 // outside the checkout dir), so it is written here based on the gate
179 // outcome — not in the chokepoint, which `write_isolated_checkout` also
180 // calls without wanting a thread manifest.
181 let result = (|| -> Result<ThreadManifest> {
182 match self.checkout_state_gated(&change_id, &state, dest, audience)? {
183 CheckoutMaterialization::Withheld { tier } => {
184 // Manifest reflects disk truth: no tracked files were
185 // materialized (the placeholder is untracked). `tree_hash`
186 // still names the real embargoed state's tree so the sidecar
187 // identifies which state this checkout stands in for. The
188 // `withheld` flag here is diagnostic only — it records that the
189 // *last* materialize of this thread was withheld, but the
190 // per-thread manifest is clobbered by a sibling worktree of the
191 // same thread. The authoritative, per-worktree non-capturable
192 // signal is the withheld marker written by
193 // `checkout_state_gated`, keyed on the worktree root (heddle#316).
194 let mut manifest =
195 ThreadManifest::new(change_id, state.tree, canonical_worktree_path(dest));
196 manifest.withheld = true;
197 write_manifest(self.heddle_dir(), thread, &manifest)
198 .map_err(HeddleError::Io)?;
199 debug!(
200 thread = %thread,
201 state_id = %change_id,
202 tier = tier.as_str(),
203 "thread checkout rendered courtesy stub (under-tier for audience)"
204 );
205 Ok(manifest)
206 }
207 CheckoutMaterialization::Materialized { tree } => {
208 let mut manifest =
209 ThreadManifest::new(change_id, state.tree, canonical_worktree_path(dest));
210 populate_manifest_from_tree(self, &tree, dest, "", &mut manifest.files)?;
211 write_manifest(self.heddle_dir(), thread, &manifest)
212 .map_err(HeddleError::Io)?;
213 debug!(
214 thread = %thread,
215 state_id = %change_id,
216 files = manifest.files.len(),
217 "thread materialized"
218 );
219 Ok(manifest)
220 }
221 }
222 })();
223
224 if result.is_err() {
225 cleanup_thread_worktree_target(dest, target_disposition)?;
226 }
227
228 result
229 }
230
231 /// THE visibility-gated checkout chokepoint. Resolve `change_id`'s
232 /// effective tier against `audience` and either materialize its real tree
233 /// to `dest` (visible) or write the operator-local courtesy stub and
234 /// withhold the tracked bytes (under-tier).
235 ///
236 /// Every path that serves a *named committed state*'s content to a local
237 /// checkout MUST funnel through here — `materialize_thread` and the CLI's
238 /// `write_isolated_checkout` (`heddle start --path`) both do — so the
239 /// visibility gate cannot be bypassed by a caller reaching for the raw,
240 /// blob-keyed `materialize_tree`. The decision is made HERE, where the
241 /// `ChangeId` and the audience are both in scope; `materialize_tree`
242 /// carries neither and so cannot make it. `materialize_tree` stays the
243 /// primitive for *computed* trees (merge/cherry-pick results), which are
244 /// not a single named state and carry no audience.
245 ///
246 /// The courtesy stub is a working-tree convenience on bytes the operator
247 /// already holds — NOT a security boundary and NOT a public-mirror surface
248 /// (the public mirror emits absence, spike §5.3).
249 pub fn checkout_state_gated(
250 &self,
251 change_id: &ChangeId,
252 state: &State,
253 dest: &Path,
254 audience: &AudienceTier,
255 ) -> Result<CheckoutMaterialization> {
256 let tier = self.effective_visibility_tier(change_id).map_err(|e| {
257 HeddleError::Config(format!("resolve visibility for {change_id}: {e:#}"))
258 })?;
259 if !visible(&tier, audience) {
260 fs::create_dir_all(dest).map_err(HeddleError::Io)?;
261 // Canonicalize ONLY after the directory exists. `canonical_worktree_path`
262 // falls back to the raw input when `dest` does not yet resolve (a relative
263 // path, or a path through a not-yet-created symlink), so a pre-creation
264 // canonicalize would key the withheld marker and the `.leaves` record on a
265 // path `capture_thread_from_disk` never resolves to at read-time — the read
266 // canonicalizes the now-existing root, misses the marker, and captures a
267 // withheld checkout as a stub-only tree instead of no-oping. Resolving here,
268 // once `create_dir_all` has made `dest` exist, guarantees the write-time
269 // canonical root equals the read-time one (heddle#316).
270 let canonical = canonical_worktree_path(dest);
271 // Reconcile the root DOWN to the withheld tier: every tracked leaf a
272 // prior materialize of this root wrote must be removed, so the
273 // checkout holds ONLY the courtesy stub — never the very bytes the
274 // gate is withholding. `keep` is empty (the withheld tier permits no
275 // tracked content). `must_remove` additionally names the withheld
276 // state's own tree leaves, so the leak is closed even when no prior
277 // manifest survives for this root (a sibling worktree clobbered it).
278 // The stub itself is untracked and so never in either set (heddle#316
279 // CLASS 1).
280 let mut withheld_leaves = BTreeSet::new();
281 if let Some(tree) = self.store().get_tree(&state.tree)? {
282 collect_tree_leaf_paths(self, &tree, "", &mut withheld_leaves)?;
283 }
284 self.reconcile_materialized_root(dest, &canonical, &BTreeSet::new(), &withheld_leaves)?;
285 // Persist the clobber-proof per-root record: a withheld materialize
286 // leaves ONLY the untracked courtesy stub, so the tracked-leaf set is
287 // empty. Written here so the single chokepoint owns the record for
288 // every funnel path, and so a later reconcile of this root reads an
289 // authoritative empty set instead of falling to the backstop
290 // (heddle#316 CLASS 1).
291 crate::thread_manifest::write_materialized_leaves(
292 self.heddle_dir(),
293 &canonical,
294 &BTreeSet::new(),
295 )
296 .map_err(HeddleError::Io)?;
297 let embargo_until = self
298 .effective_state_visibility(change_id)
299 .map_err(|e| {
300 HeddleError::Config(format!("resolve visibility for {change_id}: {e:#}"))
301 })?
302 .and_then(|record| record.embargo_until);
303 let stub = courtesy_stub_text(&tier, embargo_until);
304 fs::write(dest.join(COURTESY_STUB_FILENAME), stub.as_bytes())
305 .map_err(HeddleError::Io)?;
306 // Record the withheld status keyed by THIS worktree root, not by
307 // thread — a sibling worktree of the same thread materialized at a
308 // visible tier must keep its own capturable status (heddle#316).
309 crate::thread_manifest::mark_withheld_checkout(self.heddle_dir(), &canonical)
310 .map_err(HeddleError::Io)?;
311 return Ok(CheckoutMaterialization::Withheld { tier });
312 }
313
314 let tree = self
315 .store()
316 .get_tree(&state.tree)?
317 .ok_or_else(|| HeddleError::Config(format!("tree for {change_id} missing")))?;
318 self.materialize_tree(&tree, dest)?;
319 // Canonicalize only now that `materialize_tree` (via `create_dir_all`) has made
320 // `dest` exist — same read/write-root agreement as the withheld branch above
321 // (heddle#316).
322 let canonical = canonical_worktree_path(dest);
323 // Reconcile the root UP to the served tier: `materialize_tree` wrote the
324 // real tree's leaves but does NOT remove a stale leaf a prior
325 // materialize of a *different* tree left at this root. `keep` is the set
326 // of leaves the served tree just wrote — any prior tracked leaf NOT in
327 // it is removed, so the root holds exactly this tier's content
328 // (heddle#316 CLASS 1).
329 let mut served_leaves = BTreeSet::new();
330 collect_tree_leaf_paths(self, &tree, "", &mut served_leaves)?;
331 self.reconcile_materialized_root(dest, &canonical, &served_leaves, &BTreeSet::new())?;
332 // Persist the clobber-proof per-root record of exactly the tracked leaves
333 // this visible materialize left on disk, so a later withheld
334 // re-materialize of this root removes precisely them even if a sibling
335 // worktree of the same thread clobbered the per-thread manifest in the
336 // interim (heddle#316 CLASS 1).
337 crate::thread_manifest::write_materialized_leaves(
338 self.heddle_dir(),
339 &canonical,
340 &served_leaves,
341 )
342 .map_err(HeddleError::Io)?;
343 // This root now holds real served bytes: clear any stale withheld marker
344 // a prior under-tier materialize of the same root may have left, so it
345 // can't suppress this worktree's capture (heddle#316).
346 crate::thread_manifest::clear_withheld_checkout(self.heddle_dir(), &canonical)
347 .map_err(HeddleError::Io)?;
348 // Remove any leftover courtesy stub a prior under-tier materialize of the
349 // same root wrote: the stub is untracked, so the reconcile leaf-removal
350 // above leaves it in place. Cosmetic — capture ignores it — but an
351 // authorized re-materialize should leave a clean tree (heddle#316).
352 match fs::remove_file(dest.join(COURTESY_STUB_FILENAME)) {
353 Ok(()) => {}
354 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {}
355 Err(e) => return Err(HeddleError::Io(e)),
356 }
357 Ok(CheckoutMaterialization::Materialized { tree })
358 }
359
360 /// Reconcile the worktree root at `dest` so it holds EXACTLY the content the
361 /// target tier permits, regardless of what a prior materialization of the
362 /// same root left behind. THE single chokepoint both branches of
363 /// [`Repository::checkout_state_gated`] funnel through to enforce the
364 /// invariant by construction rather than via two opposite one-off cleanups
365 /// (heddle#316 CLASS 1).
366 ///
367 /// Removes every tracked leaf that (a) a prior materialization recorded for
368 /// this root in its clobber-proof per-root **materialized-leaves record**
369 /// (keyed by the canonical worktree root, so a sibling worktree of the same
370 /// thread can never erase it) UNION (b) the caller's `must_remove` set —
371 /// MINUS the `keep` set the target tier permits. Removal is guarded per file
372 /// (`NotFound` ignored) and empty ancestor directories it leaves behind are
373 /// pruned via `remove_dir` (which fails on non-empty dirs, so untracked
374 /// siblings keep their directory alive).
375 ///
376 /// Sourcing the prior leaves from the per-root record — NOT the single
377 /// per-thread `manifest.toml` — is what makes the withheld reduction
378 /// correct-by-construction: the manifest is clobbered the instant a sibling
379 /// worktree of the same thread materializes, which would drop a prior
380 /// *visible* leaf (e.g. an `old-secret.txt` removed before the withheld
381 /// target state) out of the removal set and leak it next to the stub. The
382 /// per-root record is immune to that race (heddle#316 CLASS 1).
383 ///
384 /// Never blanket-`rm -rf`s: only paths sourced from the per-root record /
385 /// `must_remove` are touched, so user-untracked files and `.git`/heddle
386 /// metadata are never removed.
387 fn reconcile_materialized_root(
388 &self,
389 dest: &Path,
390 canonical_root: &Path,
391 keep: &BTreeSet<String>,
392 must_remove: &BTreeSet<String>,
393 ) -> Result<()> {
394 let mut to_remove: BTreeSet<String> = must_remove.clone();
395 match crate::thread_manifest::read_materialized_leaves(self.heddle_dir(), canonical_root)
396 .map_err(HeddleError::Io)?
397 {
398 Some(prior_leaves) => {
399 // Clobber-proof per-root record of exactly the tracked leaves a
400 // prior materialize of THIS root left on disk. Authoritative —
401 // survives a sibling worktree's clobber of the per-thread
402 // manifest.
403 to_remove.extend(prior_leaves);
404 }
405 None => {
406 // Fail-closed backstop: no per-root record yet. Reached only on a
407 // first-ever materialize of this root (nothing prior to remove)
408 // or a root last materialized by a binary predating the per-root
409 // record. Fall back to the best-effort per-thread manifest so an
410 // upgrade-window reconcile still drops a recorded prior tree's
411 // leaves; `must_remove` (the target tier's own leaves) covers the
412 // rest. Strictly safer than trusting `must_remove` alone, and —
413 // like the primary path — touches only recorded leaves, never
414 // untracked/non-heddle files.
415 if let Some(prior) = crate::thread_manifest::manifest_for_worktree_root(
416 self.heddle_dir(),
417 canonical_root,
418 )
419 .map_err(HeddleError::Io)?
420 {
421 to_remove.extend(prior.files.keys().cloned());
422 }
423 }
424 }
425
426 let mut prune_dirs: BTreeSet<PathBuf> = BTreeSet::new();
427 for rel in &to_remove {
428 if keep.contains(rel) {
429 continue;
430 }
431 let path = dest.join(rel);
432 match fs::remove_file(&path) {
433 Ok(()) => {}
434 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {}
435 Err(e) => return Err(HeddleError::Io(e)),
436 }
437 // Collect ancestor directories (within `dest`) so the now-empty ones
438 // left by the removed leaf can be pruned after the pass.
439 let mut parent = path.parent();
440 while let Some(p) = parent {
441 if p == dest || !p.starts_with(dest) {
442 break;
443 }
444 prune_dirs.insert(p.to_path_buf());
445 parent = p.parent();
446 }
447 }
448
449 // Prune deepest-first so a parent only sees its children already gone.
450 // `remove_dir` errors on a non-empty dir, which we ignore — that is
451 // exactly how an untracked sibling keeps its directory.
452 let mut dirs: Vec<PathBuf> = prune_dirs.into_iter().collect();
453 dirs.sort_by_key(|d| std::cmp::Reverse(d.components().count()));
454 for d in dirs {
455 let _ = fs::remove_dir(&d);
456 }
457 Ok(())
458 }
459
460 /// Remove the per-worktree-root sidecars [`checkout_state_gated`] writes —
461 /// the clobber-proof materialized-leaves record and (if present) the withheld
462 /// marker — for the checkout at `worktree_root`. Both live under the SHARED
463 /// heddle dir keyed by the canonical worktree root, so the atomic `start`
464 /// rollback's checkout-directory rewind never reaches them; a failed-then-
465 /// rolled-back start would otherwise orphan them. Canonicalizes `worktree_root`
466 /// the same way the chokepoint did, so the key matches; the dir must still
467 /// exist at call time (the rollback clears these BEFORE rewinding the dir).
468 /// Idempotent: missing sidecars are a no-op (heddle#316 r11 P2).
469 ///
470 /// [`checkout_state_gated`]: Repository::checkout_state_gated
471 pub fn clear_materialized_root_records(&self, worktree_root: &Path) -> Result<()> {
472 let canonical = canonical_worktree_path(worktree_root);
473 crate::thread_manifest::clear_materialized_leaves(self.heddle_dir(), &canonical)
474 .map_err(HeddleError::Io)?;
475 crate::thread_manifest::clear_withheld_checkout(self.heddle_dir(), &canonical)
476 .map_err(HeddleError::Io)?;
477 Ok(())
478 }
479
480 /// Write the [`ThreadManifest`] sidecar for a worktree that's
481 /// already been materialised to `dest` against `state_id`. Used
482 /// by the CLI's `start` path, which calls `materialize_tree`
483 /// directly via `write_isolated_checkout` and then needs the
484 /// matching manifest written so the rest of the clonefile-thread
485 /// machinery (`heddle status` advisory, `Repository::snapshot`
486 /// auto-detection, `capture_thread_from_disk` fast no-op) sees a
487 /// fully-formed sidecar.
488 ///
489 /// `state_id` is the captured state the worktree was materialised
490 /// against; its tree is resolved and walked to populate the
491 /// manifest's per-file stat-cache entries (one `lstat` per file).
492 /// Atomic write: a torn manifest can't half-land. Idempotent at
493 /// the manifest-key level: rewriting a manifest for the same
494 /// thread is supported (and is what `capture_thread_from_disk`
495 /// does post-capture).
496 #[instrument(skip(self), fields(thread = %thread, dest = %dest.display(), state = %state_id))]
497 pub fn record_thread_manifest(
498 &self,
499 thread: &str,
500 state_id: &ChangeId,
501 dest: &Path,
502 ) -> Result<ThreadManifest> {
503 let state = self
504 .store()
505 .get_state(state_id)?
506 .ok_or_else(|| HeddleError::Config(format!("state {state_id} missing")))?;
507 let tree = self
508 .store()
509 .get_tree(&state.tree)?
510 .ok_or_else(|| HeddleError::Config(format!("tree for state {state_id} missing")))?;
511 let mut manifest =
512 ThreadManifest::new(*state_id, state.tree, canonical_worktree_path(dest));
513 populate_manifest_from_tree(self, &tree, dest, "", &mut manifest.files)?;
514 crate::thread_manifest::write_manifest(self.heddle_dir(), thread, &manifest)
515 .map_err(HeddleError::Io)?;
516 debug!(
517 thread = %thread,
518 state_id = %state_id,
519 files = manifest.files.len(),
520 "thread manifest recorded post-materialize"
521 );
522 Ok(manifest)
523 }
524
525 /// Record a WITHHELD-consistent manifest sidecar for a worktree whose
526 /// checkout was withheld — the base state's visibility tier was not visible
527 /// to the materializing audience, so [`Repository::checkout_state_gated`]
528 /// wrote ONLY the operator-local courtesy stub and the tracked bytes were
529 /// never materialized.
530 ///
531 /// Mirrors the withheld arm of [`Repository::materialize_thread`]: `tree_hash`
532 /// still names the real (unserved) state's tree so the sidecar identifies
533 /// which state the stub stands in for, but `files` is empty (no tracked leaf
534 /// is on disk) and `withheld = true`. Crucially this does NOT walk/stat the
535 /// real tree against `dest` the way [`Repository::record_thread_manifest`]
536 /// does — those files were intentionally not materialized, so stat-ing them
537 /// would record phantom stat-cache entries (or fail) against a checkout that
538 /// holds only the stub. The CLI's atomic `start` path calls this instead of
539 /// `record_thread_manifest` when the checkout came back withheld, so a start
540 /// on a Private base produces a withheld checkout + a consistent manifest
541 /// rather than erroring (heddle#316 / PR #528 r9 Finding 3).
542 #[instrument(skip(self), fields(thread = %thread, dest = %dest.display(), state = %state_id))]
543 pub fn record_withheld_thread_manifest(
544 &self,
545 thread: &str,
546 state_id: &ChangeId,
547 dest: &Path,
548 ) -> Result<ThreadManifest> {
549 let state = self
550 .store()
551 .get_state(state_id)?
552 .ok_or_else(|| HeddleError::Config(format!("state {state_id} missing")))?;
553 let mut manifest =
554 ThreadManifest::new(*state_id, state.tree, canonical_worktree_path(dest));
555 manifest.withheld = true;
556 crate::thread_manifest::write_manifest(self.heddle_dir(), thread, &manifest)
557 .map_err(HeddleError::Io)?;
558 debug!(
559 thread = %thread,
560 state_id = %state_id,
561 "withheld thread manifest recorded post-materialize"
562 );
563 Ok(manifest)
564 }
565
566 /// The staged domain commit record for a brand-new materialized-thread
567 /// start. The repo owns the op-record shape so callers don't reconstruct
568 /// `OpRecord::ThreadCreate`'s fields. `manager_snapshot` is `None`: the
569 /// thread record is written by the start's converge step (so there is
570 /// nothing to snapshot at record-construction time — heddle#23 r2). The
571 /// caller stages this as the executor's single commit record (it is NOT
572 /// appended eagerly); the commit marker dedups on the stable
573 /// `transaction_id`.
574 pub fn thread_create_op_record(&self, name: &str, state: ChangeId) -> OpRecord {
575 OpRecord::ThreadCreate {
576 name: name.to_string(),
577 state,
578 manager_snapshot: None,
579 }
580 }
581
582 /// CAS-guarded rollback of a materialized-thread-start ref forward
583 /// (heddle#356 cid 3333881583).
584 ///
585 /// The forward set the thread ref to `set_value` (the start's base state).
586 /// Undo it ONLY if the ref STILL points there: restore `restore_to` when a
587 /// prior value existed (a re-start that reused the ref), or delete a ref
588 /// this start created (`restore_to == None`). If a concurrent process
589 /// advanced/changed the ref after our forward (a concurrent start or
590 /// crash-recovery), leave their write in place — an unconditional
591 /// reset/delete would clobber it.
592 pub fn cas_guarded_thread_ref_rollback(
593 &self,
594 name: &ThreadName,
595 set_value: ChangeId,
596 restore_to: Option<ChangeId>,
597 ) -> Result<()> {
598 // Compare-before-write: bail without touching the ref if it no longer
599 // holds the value our forward set.
600 if self.refs().get_thread(name)? != Some(set_value) {
601 return Ok(());
602 }
603 let result = match restore_to {
604 Some(prior) => {
605 self.refs()
606 .set_thread_cas(name, RefExpectation::Value(set_value), &prior)
607 }
608 None => self
609 .refs()
610 .delete_thread_cas(name, RefExpectation::Value(set_value)),
611 };
612 match result {
613 Ok(()) => Ok(()),
614 // Lost the race between the read above and this CAS: a concurrent
615 // writer advanced the ref. The expectation guard means we wrote
616 // nothing — leave their advance intact (the whole point of the
617 // guard).
618 Err(HeddleError::Conflict(_)) => Ok(()),
619 Err(other) => Err(other),
620 }
621 }
622
623 /// Restore the thread manifest sidecar to its captured pre-start snapshot:
624 /// rewrite the prior `manifest.toml` bytes if one existed, or remove the
625 /// directory this start created. Restoring (not blind-deleting) preserves
626 /// an OLD manifest left by a prior materialization of a reused thread ref
627 /// (heddle#356 cid 3333881561).
628 pub fn restore_thread_manifest(&self, thread: &str, prior: Option<Vec<u8>>) -> Result<()> {
629 match prior {
630 Some(bytes) => {
631 let path = crate::thread_manifest::manifest_path(self.heddle_dir(), thread);
632 if let Some(parent) = path.parent() {
633 fs::create_dir_all(parent).map_err(HeddleError::Io)?;
634 }
635 fs::write(&path, bytes).map_err(HeddleError::Io)
636 }
637 None => crate::thread_manifest::remove_thread_manifest_dir(self.heddle_dir(), thread)
638 .map(|_| ())
639 .map_err(HeddleError::Io),
640 }
641 }
642
643 /// Scan the materialized worktree at `root`, build a fresh tree
644 /// from the on-disk bytes, and (if anything changed) advance
645 /// `thread`'s head to a new state pointing at that tree. The
646 /// manifest is rewritten to reflect the new state and the
647 /// post-capture stat fields.
648 ///
649 /// Returns [`ThreadCaptureOutcome::NoOp`] when the new tree's
650 /// hash equals the manifest's recorded `tree_hash` — the agent
651 /// touched nothing material. Otherwise
652 /// [`ThreadCaptureOutcome::Captured`] with the new state id.
653 ///
654 /// The reason this method exists alongside `Repository::snapshot`
655 /// is two-fold:
656 /// 1. `snapshot` always advances `HEAD`'s currently-attached
657 /// thread. Capture-from-disk targets *a specific thread by
658 /// name*, which is what auto-capture-on-switch needs.
659 /// 2. `snapshot` walks `self.root`. Capture-from-disk walks
660 /// whatever directory the materializer put the thread at —
661 /// managed checkouts under `<repo>/.heddle/threads/<thread>/`,
662 /// which are NOT `self.root`.
663 ///
664 /// Walks `Repository::build_tree` for the slow path so the
665 /// resulting trees are byte-identical to what `heddle capture`
666 /// produces against the same content. A stat-cache fast path
667 /// (see [`stat_cache_no_op`]) short-circuits the common case
668 /// of "switch threads, nothing changed" so the dominant
669 /// auto-capture-on-switch latency is a `stat` walk, not a
670 /// blob rehash.
671 #[instrument(skip(self), fields(thread = %thread, root = %root.display()))]
672 pub fn capture_thread_from_disk(
673 &self,
674 thread: &str,
675 root: &Path,
676 ) -> Result<ThreadCaptureOutcome> {
677 // Repository-wide write lock — same shape as
678 // `snapshot_with_attribution_profiled`. Without it, two
679 // concurrent `thread switch` invocations from sibling
680 // worktrees can race the same source thread: both read
681 // `get_thread(thread)` returning the same parent, both
682 // `put_state` with that parent, both `set_thread` —
683 // result is two leaf states with the same parent, one of
684 // which is orphaned because the ref ends up pointing at
685 // whichever `set_thread` won the race. The manifest write
686 // at step 4 has the same lost-update problem on a smaller
687 // scale. Holding the write lock across the whole
688 // read-modify-write sequence makes the capture atomic with
689 // respect to other state-changing operations.
690 let _lock = self
691 .locker()
692 .write()
693 .map_err(|e| HeddleError::Io(std::io::Error::other(e.to_string())))?;
694
695 let existing_manifest =
696 read_manifest(self.heddle_dir(), thread).map_err(HeddleError::Io)?;
697
698 // 0a. Withheld checkouts are non-capturable. A withheld checkout holds
699 // only the operator-local courtesy stub (the tracked bytes were
700 // withheld because the state's tier is not visible to the
701 // materializing audience). Capturing it would either pull the stub
702 // in as tracked content or — worse — build an empty tree (the stub
703 // is ignored, see `ignore_patterns`) and commit it, wiping the
704 // withheld state's real files. The operator cannot capture content
705 // they were never served, so refuse with a no-op and leave the
706 // thread head where it is (heddle#316).
707 //
708 // The withheld status is keyed by THIS worktree root, not by the
709 // per-thread `manifest.toml` — that single file is clobbered when
710 // the same thread is materialized into a second worktree, so a
711 // manifest-level flag would let an under-tier checkout of one
712 // worktree wrongly suppress an authorized sibling worktree's
713 // capture. The per-root marker (written by `checkout_state_gated`)
714 // scopes the suppression to exactly the worktree that was withheld.
715 if crate::thread_manifest::is_withheld_checkout(
716 self.heddle_dir(),
717 &canonical_worktree_path(root),
718 ) {
719 debug!(thread = %thread, "thread capture skipped (withheld checkout)");
720 return Ok(ThreadCaptureOutcome::NoOp);
721 }
722
723 // 0. Fast no-op via the stat-cache. If every file in the
724 // manifest still exists with the same `(inode, mtime,
725 // ctime, mode)` AND the disk walk turns up no
726 // untracked/new files, we know the tree is byte-identical
727 // to what we materialised. Skip the entire blob-and-tree
728 // rebuild. Typical cost: ~5ms for a 643-file worktree
729 // vs hundreds of ms for the full `build_tree` rehash.
730 if let Some(m) = existing_manifest.as_ref()
731 && stat_cache_no_op(self, m, root)?
732 {
733 debug!(thread = %thread, "thread capture no-op (stat-cache hit)");
734 return Ok(ThreadCaptureOutcome::NoOp);
735 }
736
737 // 1. Walk the on-disk worktree → fresh Tree (also stores
738 // every blob it sees as a side effect). When we have a
739 // manifest, pass it as a stat-cache so unchanged files
740 // skip the read+hash cycle entirely. Files that DID
741 // change still get the full treatment, so correctness
742 // is preserved; we just avoid the redundant work for
743 // the (usually large) majority.
744 let new_tree = match existing_manifest.as_ref() {
745 Some(m) => self.build_tree_with_stat_cache(root, m)?,
746 None => self.build_tree(root)?,
747 };
748 let new_tree_hash = self.store().put_tree(&new_tree)?;
749
750 // 2. Content-hash no-op (slow path equivalent of the
751 // stat-cache check above). Hits when stat fields drifted
752 // via `touch` or atime updates even though the bytes
753 // didn't change — refresh the manifest's stat fields so
754 // the next call hits the fast path.
755 if existing_manifest
756 .as_ref()
757 .map(|m| m.tree_hash == new_tree_hash)
758 .unwrap_or(false)
759 {
760 let mut refreshed = existing_manifest.expect("checked Some above");
761 refreshed.files.clear();
762 populate_manifest_from_tree(self, &new_tree, root, "", &mut refreshed.files)?;
763 write_manifest(self.heddle_dir(), thread, &refreshed).map_err(HeddleError::Io)?;
764 debug!(thread = %thread, "thread capture no-op (content-hash refresh)");
765 return Ok(ThreadCaptureOutcome::NoOp);
766 }
767
768 // 3. Real capture. Build a new state parented at the
769 // current thread head (if any), put it, advance the
770 // thread ref.
771 let attribution = self.get_attribution()?;
772 let thread_name = ThreadName::from(thread);
773 let parents = match self.refs().get_thread(&thread_name)? {
774 Some(prev) => vec![prev],
775 None => vec![],
776 };
777 let mut state = State::new_snapshot(new_tree_hash, parents, attribution);
778 // Auto-sign this thread-materialization capture (heddle#482) via the
779 // authored-state chokepoint, the same as the primary capture path — it
780 // is a real author capture that bypasses `stage_snapshot_objects`. Last
781 // mutation before the write.
782 self.put_authored_state(&mut state)?;
783 self.refs().set_thread(&thread_name, &state.change_id)?;
784
785 // 4. Rewrite the manifest to reflect the new state. `root` is
786 // the worktree being captured from — record its canonical
787 // path so the next snapshot can tell whether it's running
788 // inside this same worktree.
789 let mut manifest = ThreadManifest::new(
790 state.change_id,
791 new_tree_hash,
792 canonical_worktree_path(root),
793 );
794 populate_manifest_from_tree(self, &new_tree, root, "", &mut manifest.files)?;
795 write_manifest(self.heddle_dir(), thread, &manifest).map_err(HeddleError::Io)?;
796
797 debug!(
798 thread = %thread,
799 new_state = %state.change_id,
800 files = manifest.files.len(),
801 "thread captured"
802 );
803 Ok(ThreadCaptureOutcome::Captured {
804 state_id: state.change_id,
805 })
806 }
807}
808
809/// Recursive helper: for each tree entry under `rel_prefix` inside
810/// the materialized `dest`, walk the captured tree (NOT the disk —
811/// we trust what we just put there) and stat the corresponding file
812/// to fill in the manifest's identity fields.
813///
814/// Using the captured tree as the walk basis is what lets a
815/// manifest entry survive `rm -rf .` later: the file may have
816/// disappeared but we still record what *should* be there per the
817/// captured state. Capture-from-disk decides what to do about
818/// missing files at its own scan time.
819/// Plain-text placeholder a holder sees instead of an under-tier state's
820/// tracked content on their own checkout. ASCII-only, mirrors the redaction
821/// `stub_text` shape. Never travels off-host.
822fn courtesy_stub_text(tier: &VisibilityTier, embargo_until: Option<DateTime<Utc>>) -> String {
823 let mut out = String::with_capacity(256);
824 out.push_str("# Heddle withheld this state's content from your audience.\n");
825 out.push_str(&format!("# visibility-tier: {}\n", tier.as_str()));
826 if let VisibilityTier::TeamScoped { team_id } = tier {
827 out.push_str(&format!("# team: {team_id}\n"));
828 }
829 if let VisibilityTier::Restricted { scope_label } | VisibilityTier::Private { scope_label } =
830 tier
831 {
832 out.push_str(&format!("# scope: {scope_label}\n"));
833 }
834 match embargo_until {
835 Some(when) => out.push_str(&format!("# promotes-at: {}\n", when.to_rfc3339())),
836 None => out.push_str("# promotes-at: (no scheduled promotion)\n"),
837 }
838 out.push_str("# This placeholder is a local courtesy; the bytes are not in this checkout.\n");
839 out
840}
841
842/// Collect every blob/symlink leaf path (worktree-relative, forward-slash
843/// joined) reachable from `tree` into `out`. Used by the checkout reconcile
844/// step to enumerate the tracked content a tier serves (the `keep` set on the
845/// visible path) or withholds (the `must_remove` set on the withheld path),
846/// without touching disk — the path set is derived purely from the tree.
847fn collect_tree_leaf_paths(
848 repo: &Repository,
849 tree: &Tree,
850 rel_prefix: &str,
851 out: &mut BTreeSet<String>,
852) -> Result<()> {
853 use objects::object::EntryType;
854 for entry in tree.entries() {
855 let rel_path = if rel_prefix.is_empty() {
856 entry.name.clone()
857 } else {
858 format!("{rel_prefix}/{}", entry.name)
859 };
860 match entry.entry_type {
861 EntryType::Tree => {
862 let subtree = repo.store().get_tree(&entry.hash)?.ok_or_else(|| {
863 HeddleError::Config(format!(
864 "subtree {} missing while collecting leaf paths for {rel_path}",
865 entry.hash
866 ))
867 })?;
868 collect_tree_leaf_paths(repo, &subtree, &rel_path, out)?;
869 }
870 EntryType::Blob | EntryType::Symlink => {
871 out.insert(rel_path);
872 }
873 }
874 }
875 Ok(())
876}
877
878pub(crate) fn populate_manifest_from_tree(
879 repo: &Repository,
880 tree: &Tree,
881 dest: &Path,
882 rel_prefix: &str,
883 out: &mut BTreeMap<String, ManifestFile>,
884) -> Result<()> {
885 use objects::object::EntryType;
886 for entry in tree.entries() {
887 let rel_path = if rel_prefix.is_empty() {
888 entry.name.clone()
889 } else {
890 format!("{rel_prefix}/{}", entry.name)
891 };
892 match entry.entry_type {
893 EntryType::Tree => {
894 let subtree = repo.store().get_tree(&entry.hash)?.ok_or_else(|| {
895 HeddleError::Config(format!(
896 "subtree {} missing while populating manifest for {rel_path}",
897 entry.hash
898 ))
899 })?;
900 populate_manifest_from_tree(repo, &subtree, dest, &rel_path, out)?;
901 }
902 EntryType::Blob | EntryType::Symlink => {
903 let on_disk = dest.join(&rel_path);
904 let meta = match fs::symlink_metadata(&on_disk) {
905 Ok(m) => m,
906 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
907 // The materializer didn't put it there. That
908 // shouldn't happen on a clean materialize,
909 // but if it does we skip the entry so the
910 // manifest stays a reflection of disk truth.
911 debug!(
912 path = %rel_path,
913 "manifest population skipped missing file"
914 );
915 continue;
916 }
917 Err(e) => return Err(HeddleError::Io(e)),
918 };
919 let (size, inode, mtime_ns, ctime_ns, mode) =
920 crate::stat_signature::stat_signature(&on_disk, &meta);
921 out.insert(
922 rel_path,
923 ManifestFile {
924 hash: entry.hash,
925 size,
926 inode,
927 mtime_ns,
928 ctime_ns,
929 mode,
930 },
931 );
932 }
933 }
934 }
935 Ok(())
936}
937
938/// Record the manifest's worktree-path field as an *absolute*,
939/// symlink-resolved path. `Repository::snapshot` compares its
940/// `self.root` (also canonicalized) to this value to decide whether
941/// it's running inside the materialized worktree; without
942/// canonicalization a `/tmp/foo` materialize + `/private/tmp/foo`
943/// snapshot would miss the match on macOS.
944///
945/// Falls back to the input path on canonicalize failure — the
946/// comparison may produce a false miss in pathological cases, which
947/// degrades the cache to "always rebuild" instead of corrupting the
948/// manifest. Strictly worse perf, never worse correctness.
949pub(crate) fn canonical_worktree_path(path: &Path) -> PathBuf {
950 fs::canonicalize(path).unwrap_or_else(|_| path.to_path_buf())
951}
952
953/// Stat-cache fast no-op check. Returns `true` when the on-disk
954/// worktree is byte-identical to what `manifest` describes — every
955/// manifest file present at its recorded `(inode, mtime, ctime,
956/// mode)`, no untracked files, no deletions.
957///
958/// Pattern: same as git's index `assume-unchanged` fast path. The
959/// stat fields are populated by `populate_manifest_from_tree` at
960/// materialise time; clonefile/copy operations preserve the
961/// destination's inode for the lifetime of the file, so a single
962/// `stat` per file is sufficient to detect any modification.
963///
964/// Performance: ~5 ms for a 643-file worktree (single `stat` per
965/// file + B-tree lookup). The slow path (`build_tree`) reads and
966/// hashes every file, ~100s of ms for the same fixture.
967///
968/// Returns `Ok(false)` on ANY uncertainty — a stat call failed, a
969/// file in the manifest is missing, an untracked file showed up,
970/// or any single field mismatched. Callers fall through to the
971/// slow `build_tree` path, which is always correct.
972/// Walk the captured tree named by `manifest.tree_hash` and collect
973/// every subdirectory's relative path (forward-slash joined,
974/// relative to the tree root, no leading or trailing slashes).
975/// Source of truth for [`stat_cache_no_op`]'s directory leg —
976/// includes tree-only empty directories that a `manifest.files`
977/// ancestors-derived set would miss.
978fn collect_expected_dirs(
979 repo: &Repository,
980 manifest: &ThreadManifest,
981) -> Result<std::collections::HashSet<String>> {
982 use std::collections::HashSet;
983 let mut set: HashSet<String> = HashSet::new();
984 let Some(tree) = repo.store().get_tree(&manifest.tree_hash)? else {
985 // Tree missing from the store would be a serious anomaly —
986 // surface it so the caller bails to the slow path which will
987 // re-derive everything from the worktree.
988 return Err(HeddleError::Config(format!(
989 "tree {} referenced by manifest is missing",
990 manifest.tree_hash
991 )));
992 };
993 collect_subdirs_into(repo, &tree, "", &mut set)?;
994 Ok(set)
995}
996
997fn collect_subdirs_into(
998 repo: &Repository,
999 tree: &objects::object::Tree,
1000 rel_prefix: &str,
1001 out: &mut std::collections::HashSet<String>,
1002) -> Result<()> {
1003 use objects::object::EntryType;
1004 for entry in tree.entries() {
1005 if entry.entry_type != EntryType::Tree {
1006 continue;
1007 }
1008 let rel = if rel_prefix.is_empty() {
1009 entry.name.clone()
1010 } else {
1011 format!("{rel_prefix}/{}", entry.name)
1012 };
1013 let subtree = repo.store().get_tree(&entry.hash)?.ok_or_else(|| {
1014 HeddleError::Config(format!(
1015 "subtree {} missing while collecting expected dirs at {rel}",
1016 entry.hash
1017 ))
1018 })?;
1019 out.insert(rel.clone());
1020 collect_subdirs_into(repo, &subtree, &rel, out)?;
1021 }
1022 Ok(())
1023}
1024
1025/// Recursive `read_dir` worker for the stat-cache no-op predicate.
1026/// Returns `Ok(false)` to bail to the slow path (anything unexpected,
1027/// any stat mismatch); `Ok(true)` to continue the walk. Final
1028/// presence checks (`seen.len() == manifest.files.len()` etc.) live
1029/// in the caller; this fn only flags incremental mismatches.
1030///
1031/// Why hand-roll rather than reuse `ignore::WalkBuilder`: the walker
1032/// crate buffers entries, sorts them for determinism, calls
1033/// `metadata()` to populate its own `DirEntry`, and runs the gitignore
1034/// pipeline per directory even with every `git_*` flag turned off.
1035/// All of that is wasted on this predicate, which already has its own
1036/// `WorktreeIgnoreMatcher` and only needs `symlink_metadata` on each
1037/// file. A bare `read_dir` recursion is ≈3× faster on the 10k-file
1038/// fixture and matches `build_tree`'s ignore semantics exactly
1039/// because we go through the same matcher.
1040fn walk_for_no_op(
1041 root: &Path,
1042 cur: &Path,
1043 manifest: &ThreadManifest,
1044 expected_dirs: &std::collections::HashSet<String>,
1045 ignore_matcher: &crate::worktree_ignore::WorktreeIgnoreMatcher,
1046 seen: &mut std::collections::HashSet<String>,
1047 seen_dirs: &mut std::collections::HashSet<String>,
1048) -> Result<bool> {
1049 let entries = match fs::read_dir(cur) {
1050 Ok(it) => it,
1051 // A directory we can't read means we've lost certainty about
1052 // its contents — fall through to the slow path.
1053 Err(_) => return Ok(false),
1054 };
1055 for entry in entries {
1056 let entry = match entry {
1057 Ok(e) => e,
1058 Err(_) => return Ok(false),
1059 };
1060 let path = entry.path();
1061 let Ok(rel) = path.strip_prefix(root) else {
1062 return Ok(false);
1063 };
1064 let rel_str = rel.to_string_lossy().into_owned();
1065 let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
1066 return Ok(false);
1067 };
1068
1069 // Run the ignore matcher *first*, before consulting the
1070 // manifest. The previous "manifest-first" dispatch
1071 // accepted any manifest hit without re-checking the
1072 // matcher, which silently false-passed if the user had
1073 // tightened `.heddleignore` (or the in-config ignore set)
1074 // between materialise and this capture — `build_tree`
1075 // would now exclude the previously-tracked path and
1076 // produce a different tree, but the predicate said
1077 // "no-op". Always running the matcher first costs a
1078 // pattern check per entry but is what makes the
1079 // predicate's output match what `build_tree` would do.
1080 //
1081 // Three outcomes from the matcher:
1082 // * Pruned + in manifest → ignore-config drift; bail
1083 // to slow path so the new tree reflects the new
1084 // exclusion.
1085 // * Pruned + not in manifest → genuinely ignored;
1086 // silently skip without recursing.
1087 // * Not pruned → standard manifest / new-entry
1088 // dispatch below.
1089 // `should_prune_directory_child` matches the production
1090 // walker's per-entry probe (`worktree_walk.rs`). It calls
1091 // `matched_relative(path, is_dir=true)` so gitignore rules
1092 // with trailing `/` still fire, and the same patterns
1093 // exclude both file and directory entries — same behaviour
1094 // `build_tree` would observe at materialise time.
1095 let pruned = ignore_matcher.should_prune_absolute_path(&path)
1096 || ignore_matcher.should_prune_directory_child(cur, name);
1097 if pruned {
1098 if manifest.files.contains_key(&rel_str) {
1099 // The matcher now wants this path excluded, but
1100 // it's in the manifest from materialise time.
1101 // Ignore-config drift — let the slow path
1102 // rebuild the tree without it.
1103 return Ok(false);
1104 }
1105 continue;
1106 }
1107
1108 // Not pruned. Manifest lookup is the fast path for
1109 // tracked files; un-tracked entries fall through to
1110 // dir-recursion / new-file detection below.
1111 if let Some(manifest_entry) = manifest.files.get(&rel_str) {
1112 // `symlink_metadata` (not `metadata`) so a symlink
1113 // doesn't transparently follow into the target's
1114 // inode.
1115 let meta = match fs::symlink_metadata(&path) {
1116 Ok(m) => m,
1117 Err(_) => return Ok(false),
1118 };
1119 let (size, inode, mtime_ns, ctime_ns, mode) =
1120 crate::stat_signature::stat_signature(&path, &meta);
1121 let stat = ManifestFile {
1122 hash: manifest_entry.hash,
1123 size,
1124 inode,
1125 mtime_ns,
1126 ctime_ns,
1127 mode,
1128 };
1129 if !stat.matches(manifest_entry) {
1130 return Ok(false);
1131 }
1132 seen.insert(rel_str);
1133 continue;
1134 }
1135
1136 let file_type = match entry.file_type() {
1137 Ok(ft) => ft,
1138 Err(_) => return Ok(false),
1139 };
1140 if file_type.is_dir() {
1141 // Directory leg: any directory not in `expected_dirs`
1142 // is an addition since materialise. Bail; the slow
1143 // path will incorporate it.
1144 if !expected_dirs.contains(&rel_str) {
1145 return Ok(false);
1146 }
1147 seen_dirs.insert(rel_str);
1148 if !walk_for_no_op(
1149 root,
1150 &path,
1151 manifest,
1152 expected_dirs,
1153 ignore_matcher,
1154 seen,
1155 seen_dirs,
1156 )? {
1157 return Ok(false);
1158 }
1159 continue;
1160 }
1161
1162 // A non-ignored, non-directory entry that's not in the
1163 // manifest is a new file. Bail to the slow path which
1164 // will rebuild the tree with the new entry.
1165 return Ok(false);
1166 }
1167 Ok(true)
1168}
1169
1170fn stat_cache_no_op(repo: &Repository, manifest: &ThreadManifest, root: &Path) -> Result<bool> {
1171 use std::collections::HashSet;
1172
1173 let ignore_patterns = repo.ignore_patterns()?;
1174 let nested_exclusions = repo.nested_thread_worktree_exclusions(root)?;
1175 let ignore_matcher = crate::worktree_ignore::WorktreeIgnoreMatcher::new(&ignore_patterns)
1176 .with_nested_worktree_exclusions(nested_exclusions);
1177
1178 // Manifests only record files+symlinks, but Heddle's tree
1179 // builder materialises empty directories as their own tree
1180 // entries. So a no-op predicate that only checks `manifest.files`
1181 // would miss "user added or removed an empty directory" —
1182 // `seen.len() == manifest.files.len()` is still true on the file
1183 // side, but the on-disk tree no longer matches what `build_tree`
1184 // would produce.
1185 //
1186 // Source of truth for the expected directory set is the captured
1187 // tree itself (the one the manifest's `tree_hash` names), not
1188 // the manifest's file ancestors. Two reasons:
1189 //
1190 // 1. *Tree-only empty directories.* A `Tree` entry with no
1191 // files beneath it is invisible from a `manifest.files`
1192 // ancestors-walk — the file set is empty, so every
1193 // ancestor it would contribute is missing. Removing a
1194 // legit empty leaf dir would still false-pass.
1195 // 2. *Future schema drift.* Files in `manifest.files` may
1196 // use slash-normalised relative paths that don't exactly
1197 // match how `Tree::entries` names subdirs on every
1198 // platform; walking the tree directly avoids the
1199 // double-encoding hazard.
1200 //
1201 // Cost is ~one `get_tree` per subdir of the captured tree.
1202 // For the typical thread (a few hundred dirs) that's a small
1203 // number of memory-mapped object reads; on the predicate's
1204 // hot path it's bounded by the tree's directory fan-out, not
1205 // file count.
1206 let expected_dirs: HashSet<String> = match collect_expected_dirs(repo, manifest) {
1207 Ok(s) => s,
1208 // Any error walking the tree → conservatively bail to the
1209 // slow path. `Ok(false)` keeps correctness; the worst case
1210 // is a wasted full rebuild.
1211 Err(_) => return Ok(false),
1212 };
1213
1214 // Walk the worktree. For every file we see, check it against the
1215 // manifest. Track which manifest paths we've actually seen so we
1216 // can detect deletions afterwards.
1217 //
1218 // Custom `read_dir` recursion instead of `ignore::WalkBuilder`:
1219 // the walker crate is fast on its own but the per-entry overhead
1220 // adds up at 10k+ files (it buffers, sorts, double-stats, and
1221 // re-applies the ignore stack for every dir). For this hot
1222 // predicate we only need: a `readdir` per directory, one
1223 // `symlink_metadata` per file, and the same ignore-matcher
1224 // check `build_tree` runs. The std-only recursion below
1225 // measured ≈3× faster on the 10k-file fixture (no per-entry
1226 // double-stat, no buffer churn, fewer allocations).
1227 let mut seen: HashSet<String> = HashSet::with_capacity(manifest.files.len());
1228 let mut seen_dirs: HashSet<String> = HashSet::with_capacity(expected_dirs.len());
1229 if !walk_for_no_op(
1230 root,
1231 root,
1232 manifest,
1233 &expected_dirs,
1234 &ignore_matcher,
1235 &mut seen,
1236 &mut seen_dirs,
1237 )? {
1238 return Ok(false);
1239 }
1240
1241 // Final pass: every manifest entry must have been seen (file
1242 // deletion check) and every manifest-implied directory must
1243 // have been seen (directory deletion check). The dir-side
1244 // check catches `rmdir` of an empty directory that was part
1245 // of the materialised tree — its files are also gone (so the
1246 // file side already declines) but if it had no files to begin
1247 // with the file side alone would false-pass.
1248 if seen.len() != manifest.files.len() {
1249 return Ok(false);
1250 }
1251 if seen_dirs.len() != expected_dirs.len() {
1252 return Ok(false);
1253 }
1254 Ok(true)
1255}
1256
1257#[cfg(test)]
1258mod tests {
1259 use tempfile::TempDir;
1260
1261 use super::*;
1262 use crate::thread_manifest::read_manifest;
1263
1264 fn seeded_repo() -> (TempDir, Repository) {
1265 let repo_dir = TempDir::new().unwrap();
1266 let repo = Repository::init_default(repo_dir.path()).unwrap();
1267 fs::write(repo_dir.path().join("file.txt"), b"tracked\n").unwrap();
1268 repo.snapshot(Some("seed".into()), None).unwrap();
1269 (repo_dir, repo)
1270 }
1271
1272 #[test]
1273 fn materialize_thread_writes_manifest_with_files() {
1274 let repo_dir = TempDir::new().unwrap();
1275 let repo = Repository::init_default(repo_dir.path()).unwrap();
1276 // Build a small worktree to capture.
1277 fs::write(repo_dir.path().join("Cargo.toml"), b"# a\n").unwrap();
1278 fs::create_dir_all(repo_dir.path().join("src")).unwrap();
1279 fs::write(repo_dir.path().join("src/lib.rs"), b"fn main() {}\n").unwrap();
1280 repo.snapshot(Some("seed".into()), None).unwrap();
1281
1282 let dest = TempDir::new().unwrap();
1283 let manifest = repo
1284 .materialize_thread("main", &dest.path().join("out"), &AudienceTier::Internal)
1285 .unwrap();
1286
1287 assert_eq!(
1288 manifest.schema_version,
1289 crate::thread_manifest::SCHEMA_VERSION
1290 );
1291 // Three files: Cargo.toml, src/lib.rs, plus whatever
1292 // init_default seeded — only assert the ones we wrote
1293 // exist and have plausible stat fields.
1294 let cargo = manifest
1295 .files
1296 .get("Cargo.toml")
1297 .expect("Cargo.toml in manifest");
1298 assert_ne!(cargo.inode, 0);
1299 assert_ne!(cargo.mtime_ns, 0);
1300 let src = manifest
1301 .files
1302 .get("src/lib.rs")
1303 .expect("src/lib.rs in manifest");
1304 assert_ne!(src.inode, 0);
1305
1306 // Manifest persisted to disk.
1307 let loaded = read_manifest(repo.heddle_dir(), "main")
1308 .unwrap()
1309 .expect("manifest on disk");
1310 assert_eq!(loaded.files.len(), manifest.files.len());
1311 assert_eq!(
1312 loaded.files["Cargo.toml"].inode,
1313 manifest.files["Cargo.toml"].inode
1314 );
1315 }
1316
1317 #[test]
1318 fn materialize_thread_creates_absent_target() {
1319 let (_repo_dir, repo) = seeded_repo();
1320 let dest_holder = TempDir::new().unwrap();
1321 let dest = dest_holder.path().join("out");
1322
1323 repo.materialize_thread("main", &dest, &AudienceTier::Internal)
1324 .unwrap();
1325
1326 assert!(dest.is_dir());
1327 assert_eq!(
1328 fs::read_to_string(dest.join("file.txt")).unwrap(),
1329 "tracked\n"
1330 );
1331 }
1332
1333 #[test]
1334 fn materialize_thread_adopts_empty_directory() {
1335 let (_repo_dir, repo) = seeded_repo();
1336 let dest_holder = TempDir::new().unwrap();
1337 let dest = dest_holder.path().join("out");
1338 fs::create_dir(&dest).unwrap();
1339
1340 repo.materialize_thread("main", &dest, &AudienceTier::Internal)
1341 .unwrap();
1342
1343 assert!(dest.is_dir());
1344 assert_eq!(
1345 fs::read_to_string(dest.join("file.txt")).unwrap(),
1346 "tracked\n"
1347 );
1348 }
1349
1350 #[test]
1351 fn materialize_thread_rejects_non_empty_directory() {
1352 let (_repo_dir, repo) = seeded_repo();
1353 let dest_holder = TempDir::new().unwrap();
1354 let dest = dest_holder.path().join("out");
1355 fs::create_dir(&dest).unwrap();
1356 fs::write(dest.join("existing.txt"), b"user data\n").unwrap();
1357
1358 let err = repo
1359 .materialize_thread("main", &dest, &AudienceTier::Internal)
1360 .unwrap_err();
1361
1362 assert!(err.to_string().contains("is not empty"), "{err}");
1363 assert_eq!(
1364 fs::read_to_string(dest.join("existing.txt")).unwrap(),
1365 "user data\n"
1366 );
1367 assert!(!dest.join("file.txt").exists());
1368 }
1369
1370 #[cfg(unix)]
1371 #[test]
1372 fn materialize_thread_rejects_symlink_target() {
1373 let (_repo_dir, repo) = seeded_repo();
1374 let dest_holder = TempDir::new().unwrap();
1375 let real = dest_holder.path().join("real");
1376 fs::create_dir(&real).unwrap();
1377 let dest = dest_holder.path().join("link");
1378 std::os::unix::fs::symlink(&real, &dest).unwrap();
1379
1380 let err = repo
1381 .materialize_thread("main", &dest, &AudienceTier::Internal)
1382 .unwrap_err();
1383
1384 assert!(err.to_string().contains("cannot be a symlink"), "{err}");
1385 assert!(!real.join("file.txt").exists());
1386 }
1387
1388 #[test]
1389 fn materialize_thread_rejects_file_target() {
1390 let (_repo_dir, repo) = seeded_repo();
1391 let dest_holder = TempDir::new().unwrap();
1392 let dest = dest_holder.path().join("file");
1393 fs::write(&dest, b"user data\n").unwrap();
1394
1395 let err = repo
1396 .materialize_thread("main", &dest, &AudienceTier::Internal)
1397 .unwrap_err();
1398
1399 assert!(err.to_string().contains("must be a directory"), "{err}");
1400 assert_eq!(fs::read_to_string(&dest).unwrap(), "user data\n");
1401 }
1402
1403 fn embargo_state_with_tier(repo: &Repository, tier: VisibilityTier) -> ChangeId {
1404 use chrono::Utc;
1405 use objects::object::{Principal, StateVisibility};
1406 let state_id = repo
1407 .refs()
1408 .get_thread(&ThreadName::new("main"))
1409 .unwrap()
1410 .expect("head present");
1411 repo.put_state_visibility(StateVisibility {
1412 state: state_id,
1413 tier,
1414 embargo_until: None,
1415 declarer: Principal {
1416 name: "Grace Hopper".into(),
1417 email: "grace@example.com".into(),
1418 },
1419 declared_at: Utc::now(),
1420 signature: None,
1421 supersedes: None,
1422 })
1423 .expect("put visibility");
1424 state_id
1425 }
1426
1427 fn checkout_main(
1428 repo: &Repository,
1429 dest: &Path,
1430 audience: &AudienceTier,
1431 ) -> CheckoutMaterialization {
1432 let change_id = repo
1433 .refs()
1434 .resolve("main")
1435 .unwrap()
1436 .expect("main thread exists");
1437 let state = repo
1438 .store()
1439 .get_state(&change_id)
1440 .unwrap()
1441 .expect("main state exists");
1442 repo.checkout_state_gated(&change_id, &state, dest, audience)
1443 .unwrap()
1444 }
1445
1446 #[test]
1447 fn checkout_renders_courtesy_stub_when_state_is_under_tier_for_audience() {
1448 let repo_dir = TempDir::new().unwrap();
1449 let repo = Repository::init_default(repo_dir.path()).unwrap();
1450 fs::write(repo_dir.path().join("secret.rs"), b"fn exploit() {}\n").unwrap();
1451 repo.snapshot(Some("embargoed fix".into()), None).unwrap();
1452 embargo_state_with_tier(
1453 &repo,
1454 VisibilityTier::Private {
1455 scope_label: "sec-embargo".into(),
1456 },
1457 );
1458
1459 let dest_holder = TempDir::new().unwrap();
1460 let dest = dest_holder.path().join("out");
1461 // A Private state is withheld even from the all-seeing Internal
1462 // operator — the placeholder appears, the tracked bytes do not.
1463 let manifest = repo
1464 .materialize_thread("main", &dest, &AudienceTier::Internal)
1465 .unwrap();
1466
1467 assert!(
1468 dest.join(COURTESY_STUB_FILENAME).exists(),
1469 "courtesy placeholder must be written for an under-tier checkout"
1470 );
1471 assert!(
1472 !dest.join("secret.rs").exists(),
1473 "the tracked content must NOT be materialized for an under-tier audience"
1474 );
1475 assert!(
1476 manifest.files.is_empty(),
1477 "manifest must record no tracked files for a stubbed checkout"
1478 );
1479 let stub = fs::read_to_string(dest.join(COURTESY_STUB_FILENAME)).unwrap();
1480 assert!(stub.contains("private"));
1481 assert!(stub.contains("sec-embargo"));
1482 }
1483
1484 #[test]
1485 fn checkout_materializes_real_content_for_the_authorized_audience() {
1486 let repo_dir = TempDir::new().unwrap();
1487 let repo = Repository::init_default(repo_dir.path()).unwrap();
1488 fs::write(repo_dir.path().join("secret.rs"), b"fn exploit() {}\n").unwrap();
1489 repo.snapshot(Some("embargoed fix".into()), None).unwrap();
1490 embargo_state_with_tier(
1491 &repo,
1492 VisibilityTier::Private {
1493 scope_label: "sec-embargo".into(),
1494 },
1495 );
1496
1497 let dest_holder = TempDir::new().unwrap();
1498 let dest = dest_holder.path().join("out");
1499 // The holder of the matching restricted scope sees the real bytes.
1500 let manifest = repo
1501 .materialize_thread(
1502 "main",
1503 &dest,
1504 &AudienceTier::Restricted("sec-embargo".into()),
1505 )
1506 .unwrap();
1507
1508 assert!(dest.join("secret.rs").exists());
1509 assert!(!dest.join(COURTESY_STUB_FILENAME).exists());
1510 assert!(manifest.files.contains_key("secret.rs"));
1511 }
1512
1513 /// #316 / PR #528 r6: a worktree root first materialized under-tier (stub
1514 /// written) and later re-materialized for an authorized audience must end up
1515 /// with a clean tree — the real bytes present AND the stale courtesy stub
1516 /// removed. `materialize_tree` only writes tracked leaves, so without an
1517 /// explicit removal the stub would linger on disk after the visible path.
1518 #[test]
1519 fn authorized_rematerialize_removes_stale_embargo_stub() {
1520 let repo_dir = TempDir::new().unwrap();
1521 let repo = Repository::init_default(repo_dir.path()).unwrap();
1522 fs::write(repo_dir.path().join("secret.rs"), b"fn exploit() {}\n").unwrap();
1523 repo.snapshot(Some("embargoed fix".into()), None).unwrap();
1524 embargo_state_with_tier(
1525 &repo,
1526 VisibilityTier::Private {
1527 scope_label: "sec-embargo".into(),
1528 },
1529 );
1530
1531 let dest_holder = TempDir::new().unwrap();
1532 let dest = dest_holder.path().join("out");
1533
1534 // First: under-tier materialize of the root → only the stub lands.
1535 checkout_main(&repo, &dest, &AudienceTier::Internal);
1536 assert!(
1537 dest.join(COURTESY_STUB_FILENAME).exists(),
1538 "under-tier materialize must write the stub"
1539 );
1540 assert!(!dest.join("secret.rs").exists());
1541
1542 // Then: re-materialize the SAME root for an authorized audience.
1543 checkout_main(
1544 &repo,
1545 &dest,
1546 &AudienceTier::Restricted("sec-embargo".into()),
1547 );
1548
1549 assert!(
1550 dest.join("secret.rs").exists(),
1551 "authorized re-materialize must write the real tree"
1552 );
1553 assert!(
1554 !dest.join(COURTESY_STUB_FILENAME).exists(),
1555 "the stale courtesy stub must be removed on the authorized re-materialize"
1556 );
1557 }
1558
1559 /// #316 / PR #528 r7 CLASS 1 (the leak): a root first materialized for an
1560 /// AUTHORIZED audience (real tree on disk) and then re-materialized
1561 /// UNDER-TIER must end up holding ONLY the courtesy stub — none of the prior
1562 /// visible tree's tracked bytes may remain next to the stub, or the checkout
1563 /// still contains exactly the content the gate is supposed to withhold. The
1564 /// reconcile step removes the prior tracked leaves (including nested ones)
1565 /// and prunes the directories they leave empty.
1566 #[test]
1567 fn visible_then_withheld_root_has_only_stub() {
1568 let repo_dir = TempDir::new().unwrap();
1569 let repo = Repository::init_default(repo_dir.path()).unwrap();
1570 fs::write(repo_dir.path().join("secret.rs"), b"fn exploit() {}\n").unwrap();
1571 fs::create_dir_all(repo_dir.path().join("nested")).unwrap();
1572 fs::write(repo_dir.path().join("nested/inner.rs"), b"fn inner() {}\n").unwrap();
1573 repo.snapshot(Some("embargoed fix".into()), None).unwrap();
1574 embargo_state_with_tier(
1575 &repo,
1576 VisibilityTier::Private {
1577 scope_label: "sec-embargo".into(),
1578 },
1579 );
1580
1581 let dest_holder = TempDir::new().unwrap();
1582 let dest = dest_holder.path().join("out");
1583
1584 // Visible materialize: the real tree lands — the very bytes a later
1585 // under-tier materialize must withhold.
1586 checkout_main(
1587 &repo,
1588 &dest,
1589 &AudienceTier::Restricted("sec-embargo".into()),
1590 );
1591 assert!(dest.join("secret.rs").exists());
1592 assert!(dest.join("nested/inner.rs").exists());
1593
1594 // Under-tier re-materialize of the SAME root — the leak case.
1595 checkout_main(&repo, &dest, &AudienceTier::Internal);
1596
1597 assert!(
1598 dest.join(COURTESY_STUB_FILENAME).exists(),
1599 "withheld checkout must hold the courtesy stub"
1600 );
1601 assert!(
1602 !dest.join("secret.rs").exists(),
1603 "the prior visible tree's bytes must NOT remain next to the stub"
1604 );
1605 assert!(
1606 !dest.join("nested/inner.rs").exists(),
1607 "nested tracked leaves must be removed too"
1608 );
1609 // ONLY the stub remains: every prior tracked leaf — and the now-empty
1610 // directories they lived in — are gone.
1611 let remaining: Vec<_> = fs::read_dir(&dest)
1612 .unwrap()
1613 .map(|e| e.unwrap().file_name())
1614 .collect();
1615 assert_eq!(
1616 remaining.len(),
1617 1,
1618 "withheld root must contain only the courtesy stub, got {remaining:?}"
1619 );
1620 assert_eq!(remaining[0].to_str().unwrap(), COURTESY_STUB_FILENAME);
1621 }
1622
1623 /// #316 / PR #528 r7 CLASS 1 (r6 transition, as a matrix member): a root
1624 /// first materialized UNDER-TIER (stub) and then re-materialized for an
1625 /// AUTHORIZED audience must hold the real tree and NO stale stub.
1626 #[test]
1627 fn withheld_then_visible_root_has_real_tree_no_stub() {
1628 let repo_dir = TempDir::new().unwrap();
1629 let repo = Repository::init_default(repo_dir.path()).unwrap();
1630 fs::write(repo_dir.path().join("secret.rs"), b"fn exploit() {}\n").unwrap();
1631 repo.snapshot(Some("embargoed fix".into()), None).unwrap();
1632 embargo_state_with_tier(
1633 &repo,
1634 VisibilityTier::Private {
1635 scope_label: "sec-embargo".into(),
1636 },
1637 );
1638
1639 let dest_holder = TempDir::new().unwrap();
1640 let dest = dest_holder.path().join("out");
1641
1642 checkout_main(&repo, &dest, &AudienceTier::Internal);
1643 assert!(dest.join(COURTESY_STUB_FILENAME).exists());
1644 assert!(!dest.join("secret.rs").exists());
1645
1646 checkout_main(
1647 &repo,
1648 &dest,
1649 &AudienceTier::Restricted("sec-embargo".into()),
1650 );
1651 assert!(
1652 dest.join("secret.rs").exists(),
1653 "authorized re-materialize must write the real tree"
1654 );
1655 assert!(
1656 !dest.join(COURTESY_STUB_FILENAME).exists(),
1657 "the stale courtesy stub must be removed on the authorized re-materialize"
1658 );
1659 }
1660
1661 /// #316 / PR #528 r7 CLASS 1 (visible→visible): re-materializing a root at a
1662 /// NEW visible tree must leave exactly that tree — a leaf dropped from the
1663 /// new tree must not linger from the prior materialize. `materialize_tree`
1664 /// writes the new leaves but does not remove a now-absent prior leaf; the
1665 /// reconcile step closes that gap.
1666 #[test]
1667 fn visible_then_visible_refreshes_tree() {
1668 let repo_dir = TempDir::new().unwrap();
1669 let repo = Repository::init_default(repo_dir.path()).unwrap();
1670 fs::write(repo_dir.path().join("keep.rs"), b"keep\n").unwrap();
1671 fs::write(repo_dir.path().join("stale.rs"), b"stale\n").unwrap();
1672 repo.snapshot(Some("seed".into()), None).unwrap();
1673
1674 let dest_holder = TempDir::new().unwrap();
1675 let dest = dest_holder.path().join("out");
1676 checkout_main(&repo, &dest, &AudienceTier::Internal);
1677 assert!(dest.join("keep.rs").exists());
1678 assert!(dest.join("stale.rs").exists());
1679
1680 // Advance the thread head in the MAIN repo (snapshot walks repo.root,
1681 // not `dest`, so the dest manifest's worktree_path stays = dest and is
1682 // NOT refreshed here): drop stale.rs, add fresh.rs.
1683 fs::remove_file(repo_dir.path().join("stale.rs")).unwrap();
1684 fs::write(repo_dir.path().join("fresh.rs"), b"fresh\n").unwrap();
1685 repo.snapshot(Some("advance".into()), None).unwrap();
1686
1687 // Re-materialize the SAME root at the new (still visible) head.
1688 checkout_main(&repo, &dest, &AudienceTier::Internal);
1689 assert!(dest.join("keep.rs").exists(), "an unchanged leaf stays");
1690 assert!(dest.join("fresh.rs").exists(), "the new leaf is written");
1691 assert!(
1692 !dest.join("stale.rs").exists(),
1693 "a leaf dropped from the new tree must not linger from the prior materialize"
1694 );
1695 assert!(
1696 !dest.join(COURTESY_STUB_FILENAME).exists(),
1697 "a visible re-materialize writes no stub"
1698 );
1699 }
1700
1701 /// #316 / PR #528 r7 CLASS 1 (withheld→withheld): two under-tier
1702 /// materializes of the same root leave only the stub each time, and capture
1703 /// stays a no-op.
1704 #[test]
1705 fn withheld_then_withheld_stays_withheld() {
1706 let repo_dir = TempDir::new().unwrap();
1707 let repo = Repository::init_default(repo_dir.path()).unwrap();
1708 fs::write(repo_dir.path().join("secret.rs"), b"fn exploit() {}\n").unwrap();
1709 repo.snapshot(Some("embargoed fix".into()), None).unwrap();
1710 embargo_state_with_tier(
1711 &repo,
1712 VisibilityTier::Private {
1713 scope_label: "sec-embargo".into(),
1714 },
1715 );
1716
1717 let dest_holder = TempDir::new().unwrap();
1718 let dest = dest_holder.path().join("out");
1719
1720 checkout_main(&repo, &dest, &AudienceTier::Internal);
1721 assert!(dest.join(COURTESY_STUB_FILENAME).exists());
1722 assert!(!dest.join("secret.rs").exists());
1723
1724 // Second under-tier checkout of the same root: still only the stub.
1725 checkout_main(&repo, &dest, &AudienceTier::Internal);
1726 let remaining: Vec<_> = fs::read_dir(&dest)
1727 .unwrap()
1728 .map(|e| e.unwrap().file_name())
1729 .collect();
1730 assert_eq!(
1731 remaining.len(),
1732 1,
1733 "withheld root must contain only the courtesy stub, got {remaining:?}"
1734 );
1735 assert_eq!(remaining[0].to_str().unwrap(), COURTESY_STUB_FILENAME);
1736 assert!(!dest.join("secret.rs").exists());
1737
1738 // Capture of the still-withheld root is a no-op.
1739 let outcome = repo.capture_thread_from_disk("main", &dest).unwrap();
1740 assert_eq!(outcome, ThreadCaptureOutcome::NoOp);
1741 }
1742
1743 /// #316 / PR #528 r9 FINDING A: the withheld marker (and `.leaves` record)
1744 /// must be keyed on the root `capture_thread_from_disk` resolves at
1745 /// READ-time, not on a pre-materialization path. `canonical_worktree_path`
1746 /// falls back to its raw input when the path does not yet resolve, so a dest
1747 /// reached THROUGH a symlink whose leaf does not exist yet canonicalizes to
1748 /// the un-resolved `link/out` before the dir is made but to the resolved
1749 /// `real/out` after. Pre-fix the marker was written under `link/out` while
1750 /// capture looked it up under `real/out` → marker missed → a withheld
1751 /// checkout captured as a stub-only tree instead of no-oping.
1752 #[cfg(unix)]
1753 #[test]
1754 fn withheld_marker_keyed_on_canonical_root_for_relative_dest() {
1755 let repo_dir = TempDir::new().unwrap();
1756 let repo = Repository::init_default(repo_dir.path()).unwrap();
1757 fs::write(repo_dir.path().join("secret.rs"), b"fn exploit() {}\n").unwrap();
1758 repo.snapshot(Some("embargoed fix".into()), None).unwrap();
1759 embargo_state_with_tier(
1760 &repo,
1761 VisibilityTier::Private {
1762 scope_label: "sec-embargo".into(),
1763 },
1764 );
1765
1766 // `dest` travels through a symlink to a not-yet-existing leaf, so a
1767 // canonicalize BEFORE the dir is created resolves differently (falls
1768 // back to `link/out`) than one AFTER (`real/out`).
1769 let dest_holder = TempDir::new().unwrap();
1770 let real = dest_holder.path().join("real");
1771 fs::create_dir_all(&real).unwrap();
1772 std::os::unix::fs::symlink(&real, dest_holder.path().join("link")).unwrap();
1773 let dest = dest_holder.path().join("link").join("out");
1774
1775 repo.materialize_thread("main", &dest, &AudienceTier::Internal)
1776 .unwrap();
1777 assert!(dest.join(COURTESY_STUB_FILENAME).exists());
1778 assert!(!dest.join("secret.rs").exists());
1779
1780 // Capture through the symlinked path must be a NO-OP: the marker was
1781 // keyed on the same canonical root (`real/out`) capture resolves.
1782 let outcome = repo.capture_thread_from_disk("main", &dest).unwrap();
1783 assert_eq!(
1784 outcome,
1785 ThreadCaptureOutcome::NoOp,
1786 "withheld checkout reached via a symlinked path must not be capturable"
1787 );
1788 }
1789
1790 /// #316 / PR #528 r8 HOLE 1: the withheld reduction must NOT depend on the
1791 /// clobberable per-thread `manifest.toml`. A root first materialized VISIBLE
1792 /// (holding `old-secret.txt`), THEN observed while a sibling worktree of the
1793 /// SAME thread is materialized (the event that clobbers the per-thread
1794 /// manifest, retargeting it at the sibling's root), THEN re-materialized
1795 /// WITHHELD against a LATER state whose tree no longer contains
1796 /// `old-secret.txt`, must still end up holding ONLY the courtesy stub. The
1797 /// secret is in NEITHER the withheld state's own tree NOR (post-clobber) the
1798 /// per-thread manifest — only the clobber-proof per-root record names it, so
1799 /// the reduction can only succeed by sourcing that record.
1800 #[test]
1801 fn withheld_reduction_survives_sibling_manifest_clobber() {
1802 let repo_dir = TempDir::new().unwrap();
1803 let repo = Repository::init_default(repo_dir.path()).unwrap();
1804
1805 // State S1 (visible): contains the secret that must not linger later.
1806 fs::write(repo_dir.path().join("old-secret.txt"), b"launch codes\n").unwrap();
1807 repo.snapshot(Some("seed with secret".into()), None)
1808 .unwrap();
1809
1810 // Root A materialized VISIBLE at S1 — the real bytes land on disk and the
1811 // clobber-proof per-root record for A captures `old-secret.txt`.
1812 let a_holder = TempDir::new().unwrap();
1813 let root_a = a_holder.path().join("root-a");
1814 checkout_main(&repo, &root_a, &AudienceTier::Internal);
1815 assert!(root_a.join("old-secret.txt").exists());
1816
1817 // Advance the thread to S2: the secret is REMOVED before this state, a
1818 // new tracked file replaces it. So `old-secret.txt` is absent from S2's
1819 // tree entirely.
1820 fs::remove_file(repo_dir.path().join("old-secret.txt")).unwrap();
1821 fs::write(repo_dir.path().join("kept.txt"), b"benign\n").unwrap();
1822 repo.snapshot(Some("drop secret, advance".into()), None)
1823 .unwrap();
1824 embargo_state_with_tier(
1825 &repo,
1826 VisibilityTier::Private {
1827 scope_label: "sec-embargo".into(),
1828 },
1829 );
1830
1831 // A sibling worktree B of the SAME thread is materialized (authorized, at
1832 // S2). `materialize_thread` rewrites `threads/main/manifest.toml` keyed by
1833 // thread name, so this CLOBBERS A's record there — `manifest_for_worktree_root(A)`
1834 // now resolves to B, the precise race that reopened the leak in r7.
1835 let b_holder = TempDir::new().unwrap();
1836 let root_b = b_holder.path().join("root-b");
1837 repo.materialize_thread(
1838 "main",
1839 &root_b,
1840 &AudienceTier::Restricted("sec-embargo".into()),
1841 )
1842 .unwrap();
1843 assert!(root_b.join("kept.txt").exists());
1844 // Confirm the clobber really happened: the per-thread manifest no longer
1845 // records root A.
1846 assert!(
1847 crate::thread_manifest::manifest_for_worktree_root(
1848 repo.heddle_dir(),
1849 &canonical_worktree_path(&root_a),
1850 )
1851 .unwrap()
1852 .is_none(),
1853 "sibling materialize must have clobbered A's per-thread manifest record"
1854 );
1855
1856 // Re-materialize root A WITHHELD (Internal can't see S2's Private tier).
1857 // S2's tree does not contain `old-secret.txt`, and the per-thread
1858 // manifest no longer names A — only the clobber-proof per-root record can
1859 // drive its removal.
1860 checkout_main(&repo, &root_a, &AudienceTier::Internal);
1861
1862 assert!(
1863 root_a.join(COURTESY_STUB_FILENAME).exists(),
1864 "withheld checkout must hold the courtesy stub"
1865 );
1866 assert!(
1867 !root_a.join("old-secret.txt").exists(),
1868 "the prior visible tree's secret must be GONE even though the per-thread manifest was clobbered"
1869 );
1870 let remaining: Vec<_> = fs::read_dir(&root_a)
1871 .unwrap()
1872 .map(|e| e.unwrap().file_name())
1873 .collect();
1874 assert_eq!(
1875 remaining.len(),
1876 1,
1877 "withheld root must contain only the courtesy stub, got {remaining:?}"
1878 );
1879 assert_eq!(remaining[0].to_str().unwrap(), COURTESY_STUB_FILENAME);
1880 }
1881
1882 /// #316 / PR #528 r9 FINDING 4: close the per-root `.leaves`-staleness CLASS.
1883 /// `capture_thread_from_disk` rewrites `manifest.toml` but used to leave the
1884 /// clobber-proof per-root `.leaves` record untouched, so a captured-but-
1885 /// later-withheld leaf leaked. Sequence: a visible checkout holding `{a}`;
1886 /// the user adds `b` and captures (head advances, `.leaves` MUST refresh to
1887 /// `{a, b}`); the thread then advances to a state whose tree drops `b` and is
1888 /// embargoed; re-materializing the SAME root WITHHELD against that state must
1889 /// leave ONLY the stub — `b` (on disk from the capture) must be GONE, not
1890 /// leaked next to the stub. The withheld state's own tree lacks `b`, so only
1891 /// a `.leaves` record the capture refreshed can drive `b`'s removal.
1892 #[test]
1893 fn capture_refreshes_materialized_leaves() {
1894 let repo_dir = TempDir::new().unwrap();
1895 let repo = Repository::init_default(repo_dir.path()).unwrap();
1896
1897 // S1 (visible): tracked `a.txt`.
1898 fs::write(repo_dir.path().join("a.txt"), b"alpha\n").unwrap();
1899 repo.snapshot(Some("seed a".into()), None).unwrap();
1900
1901 // Materialize root R visible (Internal) at S1 → disk {a.txt},
1902 // .leaves(R) = {a.txt}.
1903 let holder = TempDir::new().unwrap();
1904 let root = holder.path().join("root");
1905 checkout_main(&repo, &root, &AudienceTier::Internal);
1906 assert!(root.join("a.txt").exists());
1907
1908 // User adds `b.txt` in R and captures → head advances to S2 = {a, b}.
1909 // The capture MUST refresh the per-root `.leaves` record to include
1910 // `b.txt` (the class-fix: capture rewrites the manifest AND `.leaves`).
1911 fs::write(root.join("b.txt"), b"beta\n").unwrap();
1912 match repo.capture_thread_from_disk("main", &root).unwrap() {
1913 ThreadCaptureOutcome::Captured { .. } => {}
1914 ThreadCaptureOutcome::NoOp => panic!("adding b.txt must produce a real capture"),
1915 }
1916 let leaves = crate::thread_manifest::read_materialized_leaves(
1917 repo.heddle_dir(),
1918 &canonical_worktree_path(&root),
1919 )
1920 .unwrap()
1921 .expect("capture must have written a per-root leaves record");
1922 assert!(
1923 leaves.contains("a.txt") && leaves.contains("b.txt"),
1924 "capture must refresh the per-root .leaves record to the captured tree's leaves, got {leaves:?}"
1925 );
1926
1927 // Advance the thread to S3 whose tree LACKS b.txt: snapshot from the main
1928 // repo dir (which only holds a.txt and is NOT the materialized worktree,
1929 // so the manifest is not refreshed here), then embargo S3 Private.
1930 fs::write(repo_dir.path().join("a.txt"), b"alpha v2\n").unwrap();
1931 repo.snapshot(Some("drop b, advance".into()), None).unwrap();
1932 embargo_state_with_tier(
1933 &repo,
1934 VisibilityTier::Private {
1935 scope_label: "sec-embargo".into(),
1936 },
1937 );
1938
1939 // Re-materialize R WITHHELD (Internal under-tier for the Private S3). S3's
1940 // own tree has no b.txt, so the withheld reduction can only remove the
1941 // capture-added b.txt by sourcing the refreshed per-root record.
1942 checkout_main(&repo, &root, &AudienceTier::Internal);
1943
1944 assert!(
1945 root.join(COURTESY_STUB_FILENAME).exists(),
1946 "withheld checkout must hold the courtesy stub"
1947 );
1948 assert!(
1949 !root.join("b.txt").exists(),
1950 "the capture-added leaf must be removed by the withheld reduction, not leaked next to the stub"
1951 );
1952 let remaining: Vec<_> = fs::read_dir(&root)
1953 .unwrap()
1954 .map(|e| e.unwrap().file_name())
1955 .collect();
1956 assert_eq!(
1957 remaining.len(),
1958 1,
1959 "withheld root must contain only the courtesy stub, got {remaining:?}"
1960 );
1961 assert_eq!(remaining[0].to_str().unwrap(), COURTESY_STUB_FILENAME);
1962 }
1963
1964 /// #316 / PR #528 r3 Finding 1: materializing an under-tier checkout writes
1965 /// the courtesy stub and marks the manifest `withheld`. A subsequent
1966 /// capture of that checkout must be a NO-OP — it must NOT pull the stub in
1967 /// as tracked content, and (crucially) must NOT commit an empty tree that
1968 /// wipes the withheld state's real files. The thread head stays put.
1969 #[test]
1970 fn capture_skips_embargo_courtesy_stub() {
1971 let repo_dir = TempDir::new().unwrap();
1972 let repo = Repository::init_default(repo_dir.path()).unwrap();
1973 fs::write(repo_dir.path().join("secret.rs"), b"fn exploit() {}\n").unwrap();
1974 repo.snapshot(Some("embargoed fix".into()), None).unwrap();
1975 embargo_state_with_tier(
1976 &repo,
1977 VisibilityTier::Private {
1978 scope_label: "sec-embargo".into(),
1979 },
1980 );
1981
1982 let dest_holder = TempDir::new().unwrap();
1983 let dest = dest_holder.path().join("out");
1984 // Under-tier audience → only the stub lands; no real bytes, empty files.
1985 let manifest = repo
1986 .materialize_thread("main", &dest, &AudienceTier::Internal)
1987 .unwrap();
1988 assert!(
1989 dest.join(COURTESY_STUB_FILENAME).exists(),
1990 "stub must be written for the under-tier checkout"
1991 );
1992 assert!(
1993 manifest.files.is_empty(),
1994 "no tracked files in a stub checkout"
1995 );
1996 assert!(
1997 manifest.withheld,
1998 "manifest must mark the checkout withheld"
1999 );
2000
2001 let head_before = repo
2002 .refs()
2003 .get_thread(&ThreadName::new("main"))
2004 .unwrap()
2005 .expect("head");
2006
2007 // Capture the withheld checkout.
2008 let outcome = repo.capture_thread_from_disk("main", &dest).unwrap();
2009 assert_eq!(
2010 outcome,
2011 ThreadCaptureOutcome::NoOp,
2012 "a withheld checkout is non-capturable"
2013 );
2014
2015 // Thread head must not have moved.
2016 let head_after = repo
2017 .refs()
2018 .get_thread(&ThreadName::new("main"))
2019 .unwrap()
2020 .expect("head");
2021 assert_eq!(
2022 head_before, head_after,
2023 "withheld capture must not advance the thread head"
2024 );
2025
2026 // The thread's tree is still the real embargoed tree: it contains the
2027 // withheld content and NOT the courtesy stub.
2028 let head_state = repo.store().get_state(&head_after).unwrap().unwrap();
2029 let tree = repo.store().get_tree(&head_state.tree).unwrap().unwrap();
2030 assert!(
2031 !tree
2032 .entries()
2033 .iter()
2034 .any(|e| e.name == COURTESY_STUB_FILENAME),
2035 "captured tree must never contain the courtesy stub"
2036 );
2037 assert!(
2038 tree.entries().iter().any(|e| e.name == "secret.rs"),
2039 "the withheld real content must remain intact in the thread"
2040 );
2041 }
2042
2043 /// #316 / PR #528 r4: the withheld status must be scoped per *worktree
2044 /// root*, not per thread. When one thread is materialized into TWO
2045 /// worktrees — an authorized one A (real bytes) and an under-tier one B
2046 /// (withheld stub) — the under-tier materialize of B clobbers the single
2047 /// per-thread `manifest.toml`. A withheld flag stored there would then
2048 /// wrongly suppress a capture of A, silently dropping legitimate work.
2049 /// With the per-worktree marker, A captures its real edits and B no-ops.
2050 #[test]
2051 fn withheld_manifest_is_per_worktree_not_per_thread() {
2052 let repo_dir = TempDir::new().unwrap();
2053 let repo = Repository::init_default(repo_dir.path()).unwrap();
2054 fs::write(repo_dir.path().join("secret.rs"), b"fn exploit() {}\n").unwrap();
2055 repo.snapshot(Some("embargoed fix".into()), None).unwrap();
2056 embargo_state_with_tier(
2057 &repo,
2058 VisibilityTier::Private {
2059 scope_label: "sec-embargo".into(),
2060 },
2061 );
2062
2063 let holder_a = TempDir::new().unwrap();
2064 let worktree_a = holder_a.path().join("authorized");
2065 let holder_b = TempDir::new().unwrap();
2066 let worktree_b = holder_b.path().join("under-tier");
2067
2068 // Worktree A: the matching-scope holder gets the real bytes.
2069 let manifest_a = repo
2070 .materialize_thread(
2071 "main",
2072 &worktree_a,
2073 &AudienceTier::Restricted("sec-embargo".into()),
2074 )
2075 .unwrap();
2076 assert!(worktree_a.join("secret.rs").exists());
2077 assert!(manifest_a.files.contains_key("secret.rs"));
2078
2079 // Edit A so a correct capture produces a NEW state. Without the edit,
2080 // capturing unchanged real content is a *legitimate* no-op and wouldn't
2081 // distinguish the bug (wrong withheld-suppression) from correct
2082 // behaviour.
2083 fs::write(worktree_a.join("extra.rs"), b"fn added() {}\n").unwrap();
2084
2085 let head_before = repo
2086 .refs()
2087 .get_thread(&ThreadName::new("main"))
2088 .unwrap()
2089 .expect("head");
2090
2091 // Worktree B: under-tier audience → stub only, withheld. This clobbers
2092 // the single per-thread `manifest.toml` with B's withheld record.
2093 let manifest_b = repo
2094 .materialize_thread("main", &worktree_b, &AudienceTier::Internal)
2095 .unwrap();
2096 assert!(worktree_b.join(COURTESY_STUB_FILENAME).exists());
2097 assert!(manifest_b.files.is_empty());
2098
2099 // Capture A: must capture the real edit — its withheld status is its
2100 // own (none), NOT inherited from B's clobbering materialize.
2101 let outcome_a = repo.capture_thread_from_disk("main", &worktree_a).unwrap();
2102 let captured_state = match outcome_a {
2103 ThreadCaptureOutcome::Captured { state_id } => state_id,
2104 ThreadCaptureOutcome::NoOp => {
2105 panic!("authorized worktree A must capture its real edit, not be suppressed")
2106 }
2107 };
2108 let head_after_a = repo
2109 .refs()
2110 .get_thread(&ThreadName::new("main"))
2111 .unwrap()
2112 .expect("head");
2113 assert_ne!(head_before, head_after_a, "capture A must advance the head");
2114 assert_eq!(head_after_a, captured_state);
2115 // The captured tree carries the edit and the real content, never the stub.
2116 let captured_tree = repo
2117 .store()
2118 .get_tree(
2119 &repo
2120 .store()
2121 .get_state(&captured_state)
2122 .unwrap()
2123 .unwrap()
2124 .tree,
2125 )
2126 .unwrap()
2127 .unwrap();
2128 assert!(captured_tree.entries().iter().any(|e| e.name == "extra.rs"));
2129 assert!(
2130 captured_tree
2131 .entries()
2132 .iter()
2133 .any(|e| e.name == "secret.rs")
2134 );
2135 assert!(
2136 !captured_tree
2137 .entries()
2138 .iter()
2139 .any(|e| e.name == COURTESY_STUB_FILENAME)
2140 );
2141
2142 // Capture B: must be a no-op — its own worktree is withheld.
2143 let outcome_b = repo.capture_thread_from_disk("main", &worktree_b).unwrap();
2144 assert_eq!(
2145 outcome_b,
2146 ThreadCaptureOutcome::NoOp,
2147 "under-tier worktree B is non-capturable"
2148 );
2149 let head_after_b = repo
2150 .refs()
2151 .get_thread(&ThreadName::new("main"))
2152 .unwrap()
2153 .expect("head");
2154 assert_eq!(
2155 head_after_a, head_after_b,
2156 "withheld capture of B must not advance the head"
2157 );
2158 }
2159
2160 /// `record_thread_manifest` should write a manifest sidecar that
2161 /// matches what `materialize_thread` would have produced, for a
2162 /// worktree the caller materialized directly via `materialize_tree`.
2163 /// Used by the CLI's `start` path (which sets the worktree up
2164 /// itself rather than going through `materialize_thread`).
2165 #[test]
2166 fn record_thread_manifest_writes_sidecar_for_externally_materialized_worktree() {
2167 let repo_dir = TempDir::new().unwrap();
2168 let repo = Repository::init_default(repo_dir.path()).unwrap();
2169 fs::write(repo_dir.path().join("a.txt"), b"alpha\n").unwrap();
2170 fs::write(repo_dir.path().join("b.txt"), b"beta\n").unwrap();
2171 repo.snapshot(Some("seed".into()), None).unwrap();
2172 let state_id = repo
2173 .refs()
2174 .get_thread(&ThreadName::new("main"))
2175 .unwrap()
2176 .expect("head present");
2177
2178 // Materialize externally via the lower-level `materialize_tree`
2179 // path — the shape `start --workspace materialized` uses.
2180 let dest_holder = TempDir::new().unwrap();
2181 let dest = dest_holder.path().join("out");
2182 let state = repo.store().get_state(&state_id).unwrap().unwrap();
2183 let tree = repo.store().get_tree(&state.tree).unwrap().unwrap();
2184 repo.materialize_tree(&tree, &dest).unwrap();
2185
2186 // No manifest written yet — `materialize_tree` is the bytes-only
2187 // step; the sidecar is recorded explicitly.
2188 assert!(
2189 read_manifest(repo.heddle_dir(), "feature/x")
2190 .unwrap()
2191 .is_none()
2192 );
2193
2194 let recorded = repo
2195 .record_thread_manifest("feature/x", &state_id, &dest)
2196 .unwrap();
2197 assert_eq!(recorded.state_id, state_id);
2198 assert_eq!(recorded.tree_hash, state.tree);
2199 assert!(recorded.files.contains_key("a.txt"));
2200 assert!(recorded.files.contains_key("b.txt"));
2201 assert_eq!(recorded.files["a.txt"].size, b"alpha\n".len() as u64);
2202
2203 // Sidecar persists at the expected location and round-trips.
2204 let loaded = read_manifest(repo.heddle_dir(), "feature/x")
2205 .unwrap()
2206 .expect("manifest on disk");
2207 assert_eq!(loaded.state_id, recorded.state_id);
2208 assert_eq!(loaded.files.len(), recorded.files.len());
2209
2210 // Idempotent: a second recording for the same thread succeeds
2211 // (used by `capture_thread_from_disk` post-capture refresh).
2212 repo.record_thread_manifest("feature/x", &state_id, &dest)
2213 .unwrap();
2214 }
2215
2216 /// `record_thread_manifest` against an unknown `state_id` should
2217 /// surface a clear "state missing" error instead of silently
2218 /// writing a manifest with no files (which would later look like
2219 /// a deletion of every tracked path).
2220 #[test]
2221 fn record_thread_manifest_errors_when_state_is_missing() {
2222 let repo_dir = TempDir::new().unwrap();
2223 let repo = Repository::init_default(repo_dir.path()).unwrap();
2224 let dest = TempDir::new().unwrap();
2225 let missing = objects::object::ChangeId::generate();
2226 let err = repo
2227 .record_thread_manifest("feature/x", &missing, &dest.path().join("out"))
2228 .expect_err("should fail when state is unknown");
2229 let message = format!("{err}");
2230 assert!(
2231 message.contains("missing"),
2232 "error message names the missing artifact: {message}"
2233 );
2234 }
2235
2236 #[test]
2237 fn materialize_unknown_thread_errors() {
2238 let repo_dir = TempDir::new().unwrap();
2239 let repo = Repository::init_default(repo_dir.path()).unwrap();
2240 let dest = TempDir::new().unwrap();
2241 let err = repo
2242 .materialize_thread(
2243 "no-such-thread",
2244 &dest.path().join("out"),
2245 &AudienceTier::Internal,
2246 )
2247 .expect_err("should fail");
2248 assert!(format!("{err}").contains("unknown thread"));
2249 }
2250
2251 /// Round-trip: materialize → edit a file → capture → confirm a
2252 /// new state was written, thread head advanced, and the manifest
2253 /// reflects the new state.
2254 #[test]
2255 fn capture_after_edit_advances_thread() {
2256 let repo_dir = TempDir::new().unwrap();
2257 let repo = Repository::init_default(repo_dir.path()).unwrap();
2258 fs::write(repo_dir.path().join("hello.txt"), b"hello\n").unwrap();
2259 repo.snapshot(Some("seed".into()), None).unwrap();
2260 let before = repo
2261 .refs()
2262 .get_thread(&ThreadName::new("main"))
2263 .unwrap()
2264 .expect("head");
2265
2266 let dest_holder = TempDir::new().unwrap();
2267 let dest = dest_holder.path().join("out");
2268 let materialize_manifest = repo
2269 .materialize_thread("main", &dest, &AudienceTier::Internal)
2270 .unwrap();
2271
2272 // Mutate a file in the materialized worktree.
2273 fs::write(dest.join("hello.txt"), b"hello world\n").unwrap();
2274
2275 let outcome = repo
2276 .capture_thread_from_disk("main", &dest)
2277 .expect("capture");
2278 let new_state = match outcome {
2279 ThreadCaptureOutcome::Captured { state_id } => state_id,
2280 ThreadCaptureOutcome::NoOp => panic!("expected Captured, got NoOp"),
2281 };
2282
2283 // Thread head advanced.
2284 let after = repo
2285 .refs()
2286 .get_thread(&ThreadName::new("main"))
2287 .unwrap()
2288 .expect("head");
2289 assert_ne!(before, after);
2290 assert_eq!(after, new_state);
2291
2292 // Manifest reflects the new state.
2293 let loaded = read_manifest(repo.heddle_dir(), "main")
2294 .unwrap()
2295 .expect("manifest");
2296 assert_eq!(loaded.state_id, new_state);
2297 assert_ne!(loaded.tree_hash, materialize_manifest.tree_hash);
2298 assert!(loaded.files.contains_key("hello.txt"));
2299 }
2300
2301 /// Capture with no edits is a no-op: thread head unchanged,
2302 /// manifest refreshed in place.
2303 #[test]
2304 fn capture_with_no_changes_is_noop() {
2305 let repo_dir = TempDir::new().unwrap();
2306 let repo = Repository::init_default(repo_dir.path()).unwrap();
2307 fs::write(repo_dir.path().join("steady.txt"), b"unchanged\n").unwrap();
2308 repo.snapshot(Some("seed".into()), None).unwrap();
2309 let before = repo
2310 .refs()
2311 .get_thread(&ThreadName::new("main"))
2312 .unwrap()
2313 .expect("head");
2314
2315 let dest_holder = TempDir::new().unwrap();
2316 let dest = dest_holder.path().join("out");
2317 repo.materialize_thread("main", &dest, &AudienceTier::Internal)
2318 .unwrap();
2319
2320 let outcome = repo.capture_thread_from_disk("main", &dest).unwrap();
2321 assert_eq!(outcome, ThreadCaptureOutcome::NoOp);
2322
2323 // Thread head unchanged.
2324 let after = repo
2325 .refs()
2326 .get_thread(&ThreadName::new("main"))
2327 .unwrap()
2328 .expect("head");
2329 assert_eq!(before, after);
2330 }
2331
2332 /// Stat-cache fast no-op: a fresh-materialised tree captures
2333 /// without invoking `build_tree`. Detected via the manifest
2334 /// reflecting bytes byte-identical to what got materialised.
2335 #[test]
2336 fn stat_cache_short_circuits_unchanged_capture() {
2337 let repo_dir = TempDir::new().unwrap();
2338 let repo = Repository::init_default(repo_dir.path()).unwrap();
2339 for i in 0..20 {
2340 fs::write(
2341 repo_dir.path().join(format!("file_{i:02}.txt")),
2342 format!("content {i}\n").as_bytes(),
2343 )
2344 .unwrap();
2345 }
2346 repo.snapshot(Some("seed".into()), None).unwrap();
2347
2348 let dest_holder = TempDir::new().unwrap();
2349 let dest = dest_holder.path().join("out");
2350 let manifest = repo
2351 .materialize_thread("main", &dest, &AudienceTier::Internal)
2352 .unwrap();
2353 assert_eq!(manifest.files.len(), 20);
2354
2355 // The fast-path predicate alone — without touching the
2356 // store-side `build_tree`. Exposes the boundary the
2357 // optimisation guards.
2358 assert!(
2359 stat_cache_no_op(&repo, &manifest, &dest).unwrap(),
2360 "fresh materialise should stat-match the manifest"
2361 );
2362
2363 // Full call also returns NoOp.
2364 let outcome = repo.capture_thread_from_disk("main", &dest).unwrap();
2365 assert_eq!(outcome, ThreadCaptureOutcome::NoOp);
2366 }
2367
2368 /// Stat-cache invalidates correctly on edit: a single touched
2369 /// file flips `stat_cache_no_op` to `false`, which forces the
2370 /// slow path to run and produces a new state.
2371 #[test]
2372 fn stat_cache_detects_edit_and_falls_through() {
2373 let repo_dir = TempDir::new().unwrap();
2374 let repo = Repository::init_default(repo_dir.path()).unwrap();
2375 fs::write(repo_dir.path().join("only.txt"), b"v1\n").unwrap();
2376 repo.snapshot(Some("seed".into()), None).unwrap();
2377
2378 let dest_holder = TempDir::new().unwrap();
2379 let dest = dest_holder.path().join("out");
2380 let manifest = repo
2381 .materialize_thread("main", &dest, &AudienceTier::Internal)
2382 .unwrap();
2383
2384 // Sleep briefly so the mtime moves; APFS gives sub-ms
2385 // resolution on modern macOS but Linux ext4 is only
2386 // 1-second granularity for ctime — make the test robust
2387 // either way.
2388 std::thread::sleep(std::time::Duration::from_millis(20));
2389 fs::write(dest.join("only.txt"), b"v2\n").unwrap();
2390
2391 assert!(
2392 !stat_cache_no_op(&repo, &manifest, &dest).unwrap(),
2393 "edited file must invalidate the fast path"
2394 );
2395
2396 // Slow path runs and creates a new state.
2397 match repo.capture_thread_from_disk("main", &dest).unwrap() {
2398 ThreadCaptureOutcome::Captured { .. } => {}
2399 other => panic!("expected Captured, got {other:?}"),
2400 }
2401 }
2402
2403 /// New file added out of band → fast path declines.
2404 #[test]
2405 fn stat_cache_detects_added_file() {
2406 let repo_dir = TempDir::new().unwrap();
2407 let repo = Repository::init_default(repo_dir.path()).unwrap();
2408 fs::write(repo_dir.path().join("a.txt"), b"a\n").unwrap();
2409 repo.snapshot(Some("seed".into()), None).unwrap();
2410
2411 let dest_holder = TempDir::new().unwrap();
2412 let dest = dest_holder.path().join("out");
2413 let manifest = repo
2414 .materialize_thread("main", &dest, &AudienceTier::Internal)
2415 .unwrap();
2416
2417 fs::write(dest.join("b.txt"), b"b\n").unwrap();
2418
2419 assert!(
2420 !stat_cache_no_op(&repo, &manifest, &dest).unwrap(),
2421 "added file must invalidate the fast path"
2422 );
2423 }
2424
2425 /// Plain `heddle capture` (via `Repository::snapshot`) detects the
2426 /// materialized-thread context — HEAD attached to a thread that has
2427 /// a manifest — and refreshes the manifest to the new state after
2428 /// the capture lands. This is the path the user hits when they edit
2429 /// inside a materialized thread worktree and run `heddle capture`
2430 /// directly (as opposed to `thread switch`, which is the auto-capture
2431 /// path covered by `capture_after_edit_advances_thread`).
2432 #[test]
2433 fn snapshot_in_materialized_thread_refreshes_manifest() {
2434 let repo_dir = TempDir::new().unwrap();
2435 let repo = Repository::init_default(repo_dir.path()).unwrap();
2436 fs::write(repo_dir.path().join("alpha.txt"), b"v1\n").unwrap();
2437 fs::write(repo_dir.path().join("beta.txt"), b"steady\n").unwrap();
2438 let initial = repo.snapshot(Some("seed".into()), None).unwrap();
2439
2440 // Stand up a manifest for `main` whose stat fields match the
2441 // worktree as it is right now. Mimics the post-materialize
2442 // state when the user is `cd`'d into the materialized
2443 // worktree (`self.root` == materialized path).
2444 let initial_tree = repo
2445 .store()
2446 .get_tree(&initial.tree)
2447 .unwrap()
2448 .expect("seed tree");
2449 let mut manifest = crate::thread_manifest::ThreadManifest::new(
2450 initial.change_id,
2451 initial.tree,
2452 canonical_worktree_path(repo_dir.path()),
2453 );
2454 populate_manifest_from_tree(
2455 &repo,
2456 &initial_tree,
2457 repo_dir.path(),
2458 "",
2459 &mut manifest.files,
2460 )
2461 .unwrap();
2462 crate::thread_manifest::write_manifest(repo.heddle_dir(), "main", &manifest).unwrap();
2463
2464 // Sleep long enough that the new mtime is observably distinct
2465 // on ext4's 1-second-granularity ctime (APFS is sub-ms).
2466 std::thread::sleep(std::time::Duration::from_millis(20));
2467 fs::write(repo_dir.path().join("alpha.txt"), b"v2\n").unwrap();
2468
2469 let captured = repo.snapshot(Some("after edit".into()), None).unwrap();
2470 assert_ne!(captured.change_id, initial.change_id);
2471 assert_ne!(captured.tree, initial.tree);
2472
2473 // Manifest got refreshed to point at the new state and tree.
2474 let refreshed = crate::thread_manifest::read_manifest(repo.heddle_dir(), "main")
2475 .unwrap()
2476 .expect("manifest persists");
2477 assert_eq!(refreshed.state_id, captured.change_id);
2478 assert_eq!(refreshed.tree_hash, captured.tree);
2479 // beta.txt was untouched — its stat fields (and hash) should
2480 // still appear in the refreshed manifest.
2481 assert!(refreshed.files.contains_key("alpha.txt"));
2482 assert!(refreshed.files.contains_key("beta.txt"));
2483 }
2484
2485 /// Regression: snapshot from a directory that is NOT the
2486 /// manifest's recorded worktree path must NOT refresh the
2487 /// manifest. Pre-fix, the snapshot code detected the
2488 /// "materialized-thread context" purely by `HEAD attached + a
2489 /// manifest exists for the attached thread", so a snapshot from
2490 /// the main repo dir (or any sibling worktree) would corrupt the
2491 /// manifest by writing the wrong directory's stat fields into it
2492 /// — and `heddle status` would then falsely report the
2493 /// materialized worktree as fresh because the manifest's
2494 /// `state_id` had auto-rolled forward.
2495 #[test]
2496 fn snapshot_outside_materialized_worktree_does_not_refresh_manifest() {
2497 let repo_dir = TempDir::new().unwrap();
2498 let repo = Repository::init_default(repo_dir.path()).unwrap();
2499 fs::write(repo_dir.path().join("alpha.txt"), b"v1\n").unwrap();
2500 repo.snapshot(Some("seed".into()), None).unwrap();
2501
2502 // Materialize "main" at a totally separate path. Manifest
2503 // records `dest_holder/out` as the worktree.
2504 let dest_holder = TempDir::new().unwrap();
2505 let dest = dest_holder.path().join("out");
2506 let materialize_manifest = repo
2507 .materialize_thread("main", &dest, &AudienceTier::Internal)
2508 .unwrap();
2509 let materialize_state_id = materialize_manifest.state_id;
2510 let materialize_tree_hash = materialize_manifest.tree_hash;
2511 let materialized_path = materialize_manifest.worktree_path.clone();
2512 assert_eq!(
2513 materialized_path,
2514 canonical_worktree_path(&dest),
2515 "manifest must record the canonical materialize destination"
2516 );
2517
2518 // Now run snapshot from the MAIN repo dir (`repo.root()`) —
2519 // a path that is NOT the materialized worktree. The pre-fix
2520 // bug fired here.
2521 std::thread::sleep(std::time::Duration::from_millis(20));
2522 fs::write(repo_dir.path().join("alpha.txt"), b"v2-from-main-repo\n").unwrap();
2523 let snap = repo
2524 .snapshot(Some("from main repo, not the mat worktree".into()), None)
2525 .unwrap();
2526 assert_ne!(
2527 snap.change_id, materialize_state_id,
2528 "snapshot must advance main's head"
2529 );
2530
2531 // The manifest must NOT have been refreshed: state_id and
2532 // tree_hash still point at the materialize state, worktree
2533 // path still points at `dest`.
2534 let after = crate::thread_manifest::read_manifest(repo.heddle_dir(), "main")
2535 .unwrap()
2536 .expect("manifest still present");
2537 assert_eq!(
2538 after.state_id, materialize_state_id,
2539 "manifest state_id must NOT advance when snapshot is taken outside the materialized worktree"
2540 );
2541 assert_eq!(
2542 after.tree_hash, materialize_tree_hash,
2543 "manifest tree_hash must NOT advance"
2544 );
2545 assert_eq!(
2546 after.worktree_path, materialized_path,
2547 "manifest worktree_path must be unchanged"
2548 );
2549
2550 // And `heddle status`'s staleness check should now correctly
2551 // report the materialized worktree as stale (head moved,
2552 // manifest didn't).
2553 let head_now = repo
2554 .refs()
2555 .get_thread(&ThreadName::new("main"))
2556 .unwrap()
2557 .expect("head");
2558 assert_ne!(
2559 head_now, after.state_id,
2560 "post-fix invariant: main head advanced past manifest's recorded state → stale"
2561 );
2562 }
2563
2564 /// Capture from a *dedicated* thread worktree (one whose path
2565 /// differs from `repo.root()`) must validate symlinks against
2566 /// that worktree's path, not against the main repo root.
2567 /// Pre-fix the walker passed `repo.root()` as the symlink-
2568 /// escape base, so every symlink inside a dedicated thread
2569 /// path was rejected as "outside the repo" the moment the
2570 /// slow path ran — `thread switch` auto-capture broke for any
2571 /// thread that contained a symlink. Reproduces the codex P2
2572 /// from review pass 2.
2573 #[cfg(unix)]
2574 #[test]
2575 fn capture_thread_from_disk_accepts_symlinks_in_dedicated_worktree() {
2576 let repo_dir = TempDir::new().unwrap();
2577 let repo = Repository::init_default(repo_dir.path()).unwrap();
2578 // Seed with a file + a symlink pointing inside the repo.
2579 fs::write(repo_dir.path().join("target.txt"), b"target\n").unwrap();
2580 std::os::unix::fs::symlink("target.txt", repo_dir.path().join("link")).unwrap();
2581 repo.snapshot(Some("seed".into()), None).unwrap();
2582
2583 // Materialise into a dedicated worktree — path differs
2584 // from `repo.root()`, which is exactly the case that
2585 // exposes the bug.
2586 let dest_holder = TempDir::new().unwrap();
2587 let dest = dest_holder.path().join("thread-worktree");
2588 repo.materialize_thread("main", &dest, &AudienceTier::Internal)
2589 .unwrap();
2590
2591 // Edit a non-symlink file so the slow path fires (the fast
2592 // stat-cache no-op would mask the bug). Sleep so the mtime
2593 // observably moves on coarse-granularity filesystems.
2594 std::thread::sleep(std::time::Duration::from_millis(20));
2595 fs::write(dest.join("target.txt"), b"target v2\n").unwrap();
2596
2597 // Pre-fix this errored with "symlink target escapes repo"
2598 // because `validate_symlink_target` was using `repo.root()`
2599 // as the allowed base instead of the walk root.
2600 let outcome = repo
2601 .capture_thread_from_disk("main", &dest)
2602 .expect("capture must accept symlinks inside the dedicated worktree");
2603 match outcome {
2604 ThreadCaptureOutcome::Captured { .. } => {}
2605 ThreadCaptureOutcome::NoOp => panic!("expected Captured; got NoOp"),
2606 }
2607 }
2608
2609 /// Codex pass-5 P1: when the ignore set tightens between
2610 /// materialise and capture (e.g. user adds an entry to
2611 /// `.heddleignore` covering an already-tracked path), the
2612 /// no-op predicate must bail to the slow path so `build_tree`
2613 /// can produce the tree that *now* matches the matcher. Pre-
2614 /// fix the manifest-first dispatch accepted any manifest hit
2615 /// without re-running the matcher, so the predicate silently
2616 /// false-passed and `thread switch`'s auto-capture missed
2617 /// the real tree delta.
2618 #[test]
2619 fn stat_cache_detects_ignore_config_tightening() {
2620 let repo_dir = TempDir::new().unwrap();
2621 let repo = Repository::init_default(repo_dir.path()).unwrap();
2622 // Seed: two files, no .heddleignore yet.
2623 fs::write(repo_dir.path().join("keep.txt"), b"keep\n").unwrap();
2624 fs::write(repo_dir.path().join("secret.txt"), b"secret\n").unwrap();
2625 repo.snapshot(Some("seed".into()), None).unwrap();
2626
2627 let dest_holder = TempDir::new().unwrap();
2628 let dest = dest_holder.path().join("out");
2629 let manifest = repo
2630 .materialize_thread("main", &dest, &AudienceTier::Internal)
2631 .unwrap();
2632 assert!(manifest.files.contains_key("secret.txt"));
2633
2634 // Tighten the ignore set in the source repo to exclude
2635 // `secret.txt`. The materialised worktree still has it
2636 // on disk (we just put it there), but `build_tree` would
2637 // now skip it and produce a different tree hash.
2638 fs::write(repo_dir.path().join(".heddleignore"), b"secret.txt\n").unwrap();
2639
2640 assert!(
2641 !stat_cache_no_op(&repo, &manifest, &dest).unwrap(),
2642 "ignore-config tightening over a tracked path must \
2643 invalidate the fast path; pre-fix the predicate \
2644 false-passed and auto-capture silently dropped \
2645 the resulting tree delta"
2646 );
2647 }
2648
2649 /// Codex pass-3 P2: a *tree-only* empty directory — one that
2650 /// was a captured tree entry but never had any files beneath it
2651 /// — was invisible to the pass-2 fix because `expected_dirs`
2652 /// was derived from manifest file ancestors. Removing such a
2653 /// directory left every set the same size and the predicate
2654 /// false-passed, silently dropping the change. The pass-3 fix
2655 /// derives `expected_dirs` from the captured tree directly so
2656 /// empty leaf dirs are tracked.
2657 #[test]
2658 fn stat_cache_detects_removed_tree_only_empty_directory() {
2659 let repo_dir = TempDir::new().unwrap();
2660 let repo = Repository::init_default(repo_dir.path()).unwrap();
2661 // Seed with one file (so the thread isn't empty) plus an
2662 // empty directory that becomes a tree entry on its own.
2663 fs::write(repo_dir.path().join("anchor.txt"), b"anchor\n").unwrap();
2664 fs::create_dir_all(repo_dir.path().join("empty-on-purpose")).unwrap();
2665 repo.snapshot(Some("seed".into()), None).unwrap();
2666
2667 let dest_holder = TempDir::new().unwrap();
2668 let dest = dest_holder.path().join("out");
2669 let manifest = repo
2670 .materialize_thread("main", &dest, &AudienceTier::Internal)
2671 .unwrap();
2672
2673 // Sanity: the empty dir landed on disk after materialise.
2674 assert!(
2675 dest.join("empty-on-purpose").is_dir(),
2676 "materialise must emit the empty dir on disk"
2677 );
2678
2679 // Remove the empty dir. No files inside it changed
2680 // because there never were any — pure tree-only delta.
2681 fs::remove_dir(dest.join("empty-on-purpose")).unwrap();
2682
2683 assert!(
2684 !stat_cache_no_op(&repo, &manifest, &dest).unwrap(),
2685 "removing a tree-only empty directory must invalidate \
2686 the fast path; pre-fix the predicate false-passed and \
2687 auto-capture silently dropped the deletion"
2688 );
2689 }
2690
2691 /// Empty directory added by the user — manifests only record
2692 /// files, but Heddle's tree builder emits a tree entry for the
2693 /// new dir. The stat-cache no-op predicate must decline so the
2694 /// slow path picks the change up; pre-fix it false-passed and
2695 /// `thread switch`'s auto-capture silently dropped the addition.
2696 #[test]
2697 fn stat_cache_detects_added_empty_directory() {
2698 let repo_dir = TempDir::new().unwrap();
2699 let repo = Repository::init_default(repo_dir.path()).unwrap();
2700 fs::write(repo_dir.path().join("only.txt"), b"a\n").unwrap();
2701 repo.snapshot(Some("seed".into()), None).unwrap();
2702
2703 let dest_holder = TempDir::new().unwrap();
2704 let dest = dest_holder.path().join("out");
2705 let manifest = repo
2706 .materialize_thread("main", &dest, &AudienceTier::Internal)
2707 .unwrap();
2708
2709 // Add an empty directory that has no manifest entry.
2710 fs::create_dir_all(dest.join("brand-new-empty-dir")).unwrap();
2711
2712 assert!(
2713 !stat_cache_no_op(&repo, &manifest, &dest).unwrap(),
2714 "an added empty directory must invalidate the fast path"
2715 );
2716 }
2717
2718 /// Empty directory removed by the user — the manifest expects it
2719 /// (its parent path appears as an ancestor of files) but the
2720 /// walk never visits it. The dir-side check must decline. Pre-
2721 /// fix the fast path would false-pass on this case too.
2722 #[test]
2723 fn stat_cache_detects_removed_empty_directory() {
2724 let repo_dir = TempDir::new().unwrap();
2725 let repo = Repository::init_default(repo_dir.path()).unwrap();
2726 fs::create_dir_all(repo_dir.path().join("nested/deep")).unwrap();
2727 fs::write(repo_dir.path().join("nested/deep/leaf.txt"), b"leaf\n").unwrap();
2728 repo.snapshot(Some("seed".into()), None).unwrap();
2729
2730 let dest_holder = TempDir::new().unwrap();
2731 let dest = dest_holder.path().join("out");
2732 let manifest = repo
2733 .materialize_thread("main", &dest, &AudienceTier::Internal)
2734 .unwrap();
2735
2736 // Remove the leaf file AND its parent dir. The file-side
2737 // check already catches the file removal, but if we then
2738 // synthesise a fresh leaf elsewhere we'd want the dir-side
2739 // check to catch the missing parent on its own too. Use a
2740 // slightly different shape: create + remove a sibling dir
2741 // whose ancestor matches the manifest's expected set.
2742 fs::create_dir_all(dest.join("nested/sibling-empty")).unwrap();
2743
2744 assert!(
2745 !stat_cache_no_op(&repo, &manifest, &dest).unwrap(),
2746 "an added empty directory inside an existing parent must invalidate"
2747 );
2748 }
2749
2750 /// Deleted file → fast path declines.
2751 #[test]
2752 fn stat_cache_detects_deletion() {
2753 let repo_dir = TempDir::new().unwrap();
2754 let repo = Repository::init_default(repo_dir.path()).unwrap();
2755 fs::write(repo_dir.path().join("a.txt"), b"a\n").unwrap();
2756 fs::write(repo_dir.path().join("b.txt"), b"b\n").unwrap();
2757 repo.snapshot(Some("seed".into()), None).unwrap();
2758
2759 let dest_holder = TempDir::new().unwrap();
2760 let dest = dest_holder.path().join("out");
2761 let manifest = repo
2762 .materialize_thread("main", &dest, &AudienceTier::Internal)
2763 .unwrap();
2764
2765 fs::remove_file(dest.join("a.txt")).unwrap();
2766
2767 assert!(
2768 !stat_cache_no_op(&repo, &manifest, &dest).unwrap(),
2769 "deleted file must invalidate the fast path"
2770 );
2771 }
2772
2773 /// Two `capture_thread_from_disk` calls on the same thread from
2774 /// different threads must serialize through the repository write
2775 /// lock: the thread head's parent chain must include both
2776 /// captures (no lost update where one capture's parent is the
2777 /// pre-race head instead of the other capture's state).
2778 ///
2779 /// Reproduces the race Codex P1 #2 named: pre-fix, two sibling
2780 /// worktrees doing `heddle thread switch` against the same
2781 /// source thread both read the same parent in
2782 /// `refs().get_thread()`, both `put_state` with that parent,
2783 /// both `set_thread` — whichever `set_thread` won last orphaned
2784 /// the other state on disk. With the lock both captures land in
2785 /// series and the final head's parent chain links back through
2786 /// both new states.
2787 #[test]
2788 fn concurrent_captures_serialize_via_repository_lock() {
2789 use std::sync::Arc;
2790
2791 let repo_dir = TempDir::new().unwrap();
2792 let repo = Arc::new(Repository::init_default(repo_dir.path()).unwrap());
2793 fs::write(repo_dir.path().join("shared.txt"), b"seed\n").unwrap();
2794 repo.snapshot(Some("seed".into()), None).unwrap();
2795 let initial_head = repo
2796 .refs()
2797 .get_thread(&ThreadName::new("main"))
2798 .unwrap()
2799 .expect("seeded");
2800
2801 // Two sibling materialized worktrees of the same thread.
2802 let dest_a_holder = TempDir::new().unwrap();
2803 let dest_a = dest_a_holder.path().join("a");
2804 repo.materialize_thread("main", &dest_a, &AudienceTier::Internal)
2805 .unwrap();
2806 let dest_b_holder = TempDir::new().unwrap();
2807 let dest_b = dest_b_holder.path().join("b");
2808 repo.materialize_thread("main", &dest_b, &AudienceTier::Internal)
2809 .unwrap();
2810
2811 // Disjoint edits so each capture has real work to do (no
2812 // stat-cache no-op short-circuit).
2813 std::thread::sleep(std::time::Duration::from_millis(20));
2814 fs::write(dest_a.join("shared.txt"), b"edited-by-a\n").unwrap();
2815 fs::write(dest_b.join("shared.txt"), b"edited-by-b\n").unwrap();
2816
2817 // Race the two captures.
2818 let repo_a = Arc::clone(&repo);
2819 let repo_b = Arc::clone(&repo);
2820 let h_a = std::thread::spawn(move || {
2821 repo_a
2822 .capture_thread_from_disk("main", &dest_a)
2823 .expect("capture A")
2824 });
2825 let h_b = std::thread::spawn(move || {
2826 repo_b
2827 .capture_thread_from_disk("main", &dest_b)
2828 .expect("capture B")
2829 });
2830 let outcome_a = h_a.join().expect("thread A");
2831 let outcome_b = h_b.join().expect("thread B");
2832
2833 // Both captures landed (neither was a NoOp because both
2834 // edited the same file with different bytes).
2835 let id_a = match outcome_a {
2836 ThreadCaptureOutcome::Captured { state_id } => state_id,
2837 ThreadCaptureOutcome::NoOp => panic!("A expected Captured"),
2838 };
2839 let id_b = match outcome_b {
2840 ThreadCaptureOutcome::Captured { state_id } => state_id,
2841 ThreadCaptureOutcome::NoOp => panic!("B expected Captured"),
2842 };
2843 assert_ne!(id_a, id_b, "the two captures must produce distinct states");
2844
2845 // The thread head is one of the two captures. Lock-naked,
2846 // the loser's parent would be `initial_head`. With the
2847 // lock, the loser's parent is the winner's id and the
2848 // winner's parent is `initial_head`.
2849 let final_head = repo
2850 .refs()
2851 .get_thread(&ThreadName::new("main"))
2852 .unwrap()
2853 .expect("head");
2854 let winner_id = final_head;
2855 let loser_id = if final_head == id_a { id_b } else { id_a };
2856
2857 let winner_state = repo
2858 .store()
2859 .get_state(&winner_id)
2860 .unwrap()
2861 .expect("winner state on disk");
2862 let loser_state = repo
2863 .store()
2864 .get_state(&loser_id)
2865 .unwrap()
2866 .expect("loser state on disk");
2867
2868 // The two captures must have linked through the lock:
2869 // exactly one of (winner.parents, loser.parents) names the
2870 // other; the remaining parent is the seed head. Pre-fix
2871 // both states named the seed head and the loser was
2872 // orphaned — assert that this isn't the case.
2873 let chained =
2874 winner_state.parents.contains(&loser_id) || loser_state.parents.contains(&winner_id);
2875 assert!(
2876 chained,
2877 "concurrent captures must chain through the lock; got\n \
2878 winner {winner_id} parents={:?}\n loser {loser_id} parents={:?}",
2879 winner_state.parents, loser_state.parents
2880 );
2881 assert!(
2882 winner_state.parents.contains(&initial_head)
2883 || loser_state.parents.contains(&initial_head),
2884 "the bottom of the chain must still reach the seed head"
2885 );
2886 }
2887}