grex_core/tree/walker.rs
1//! Recursive pack-tree walker.
2//!
3//! The walker hydrates a `pack.yaml` tree: it loads the root manifest, clones
4//! (or fetches + checks out) every `children:` entry via the injected
5//! [`GitBackend`], and recurses. `depends_on` entries are recorded as edges
6//! but never walked — they are *external prereqs* verified by
7//! [`crate::pack::validate::DependsOnValidator`] after the graph is built.
8//!
9//! # Cycle detection
10//!
11//! Cycles are detected **during** the walk, not post-hoc. Each recursion
12//! maintains an ancestor stack of pack identifiers (source-url when present,
13//! otherwise the canonical on-disk path). If a child is about to be entered
14//! whose identifier is already on the stack, the walker short-circuits with
15//! [`TreeError::CycleDetected`]. A separate `CycleValidator` runs
16//! post-hoc as a belt-and-suspenders check so manually-constructed graphs
17//! cannot sneak through.
18//!
19//! # Cyclomatic discipline
20//!
21//! The walk is decomposed so each helper stays well under CC 15:
22//! `walk` → `walk_recursive` → `process_children` → `handle_child` →
23//! `resolve_destination` | `record_depends_on`.
24
25use std::collections::{BTreeMap, HashSet};
26use std::path::{Path, PathBuf};
27use std::sync::atomic::{AtomicBool, Ordering};
28use std::sync::Arc;
29
30use rayon::prelude::*;
31
32use crate::git::GitBackend;
33use crate::pack::validate::child_path::{
34 boundary_fs_reject_reason, boundary_reject_reason, check_one as check_child_path,
35 nfc_duplicate_path,
36};
37use crate::pack::{ChildRef, PackManifest, PackType, PackValidationError, SchemaVersion};
38use crate::scheduler::PoolInstallDepthGuard;
39
40use super::consent::phase2_prune;
41use super::dest_class::{aggregate_untracked, classify_dest, DestClass};
42use super::error::TreeError;
43use super::graph::{EdgeKind, PackEdge, PackGraph, PackNode};
44use super::loader::PackLoader;
45use super::quarantine::QuarantineConfig;
46
47/// Recursive walker. Composes a [`PackLoader`] (for manifests) with a
48/// [`GitBackend`] (for child hydration).
49///
50/// The walker owns no state across calls: each invocation of [`Walker::walk`]
51/// produces a fresh [`PackGraph`] and leaves no footprint.
52///
53/// **Status (v1.2.1, path iii)**: retired from the production sync
54/// orchestrator. `sync::run` now composes [`sync_meta`] (mutate) →
55/// [`super::graph_build::build_graph`] (read-only) → `run_actions` instead
56/// of issuing clones+fetches inside the graph build. The `Walker` symbol
57/// is kept for downstream test-suite compatibility (22 fixture call sites
58/// in `crates/grex-core/tests/tree_walk.rs`); new code SHOULD NOT add
59/// production call sites.
60#[doc(hidden)]
61pub struct Walker<'a> {
62 loader: &'a dyn PackLoader,
63 backend: &'a dyn GitBackend,
64 workspace: PathBuf,
65 /// Optional global ref override (M4-D `grex sync --ref <sha|branch|tag>`).
66 /// When `Some`, every child clone/checkout uses this ref instead of the
67 /// declared `child.ref` from the parent manifest. `None` preserves M3
68 /// semantics.
69 ref_override: Option<String>,
70}
71
72impl<'a> Walker<'a> {
73 /// Construct a new walker.
74 ///
75 /// `workspace` is the directory under which child packs will be cloned,
76 /// using each [`ChildRef::effective_path`] as the sub-directory name.
77 #[must_use]
78 pub fn new(
79 loader: &'a dyn PackLoader,
80 backend: &'a dyn GitBackend,
81 workspace: PathBuf,
82 ) -> Self {
83 Self { loader, backend, workspace, ref_override: None }
84 }
85
86 /// Set a global ref override applied to every child pack.
87 ///
88 /// Surfaced as `grex sync --ref <sha|branch|tag>` (M4-D). The override
89 /// replaces each child's declared `ref` in its parent manifest. An
90 /// empty string is treated as "no override" — callers should reject
91 /// empty values at the CLI layer before reaching this point.
92 #[must_use]
93 pub fn with_ref_override(mut self, r#ref: Option<String>) -> Self {
94 self.ref_override = r#ref.filter(|s| !s.is_empty());
95 self
96 }
97
98 /// Walk the tree rooted at `root_pack_path`, returning the fully
99 /// hydrated graph.
100 ///
101 /// # Errors
102 ///
103 /// Returns [`TreeError`] on any loader, git, cycle, or name-mismatch
104 /// failure. The walk aborts on the first failure — the spec-level
105 /// "fail loud, fail fast" default.
106 pub fn walk(&self, root_pack_path: &Path) -> Result<PackGraph, TreeError> {
107 let mut state = BuildState::default();
108 let root_manifest = self.loader.load(root_pack_path)?;
109 // Pre-walk path-traversal gate: reject any malicious
110 // `children[].path` (or URL-derived tail) BEFORE any clone fires.
111 // Closes the v1.1.0 flat-sibling exploit window where a `path:
112 // ../escape` would materialise a child outside the pack root
113 // before plan-phase validation could see it.
114 validate_children_paths(&root_manifest)?;
115 let root_commit_sha = probe_head_sha(self.backend, root_pack_path);
116 let root_id = state.push_node(PackNode {
117 id: 0,
118 name: root_manifest.name.clone(),
119 path: root_pack_path.to_path_buf(),
120 source_url: None,
121 manifest: root_manifest.clone(),
122 parent: None,
123 commit_sha: root_commit_sha,
124 synthetic: false,
125 // Root has no parent ChildRef — there is no manifest `ref:`
126 // value to mirror. v1.3.1 B14.
127 manifest_ref: None,
128 });
129 let root_identity = pack_identity_for_root(root_pack_path);
130 self.walk_recursive(root_id, &root_manifest, &mut state, &mut vec![root_identity])?;
131 Ok(PackGraph::new(state.nodes, state.edges))
132 }
133
134 /// Recursive step. `ancestors` carries the pack identifiers
135 /// currently on the in-progress walk path — pushed on entry,
136 /// popped on return. It is a path-prefix set (NOT a global
137 /// "visited" set), so a diamond reaching the same descendant
138 /// via two disjoint paths is not a cycle.
139 ///
140 /// Each loaded manifest's `children[]` is path-traversal-validated
141 /// before any of those children are resolved on disk; the entry
142 /// point pre-validates the root manifest, so by the time
143 /// `walk_recursive` runs for a child, that child's own `children[]`
144 /// is what needs gating before the next descent.
145 fn walk_recursive(
146 &self,
147 parent_id: usize,
148 manifest: &PackManifest,
149 state: &mut BuildState,
150 ancestors: &mut Vec<String>,
151 ) -> Result<(), TreeError> {
152 self.record_depends_on(parent_id, manifest, state);
153 self.process_children(parent_id, manifest, state, ancestors)
154 }
155
156 /// Record one `DependsOn` edge per `depends_on` entry. Resolution
157 /// against actual graph nodes happens later in `DependsOnValidator`.
158 /// We emit edges only where the target already exists in the graph so
159 /// the edge list stays in-bounds; unresolved deps are surfaced by the
160 /// validator, not carried as dangling edges.
161 fn record_depends_on(&self, parent_id: usize, manifest: &PackManifest, state: &mut BuildState) {
162 for dep in &manifest.depends_on {
163 if let Some(to) = find_node_id_by_name_or_url(&state.nodes, dep) {
164 state.edges.push(PackEdge { from: parent_id, to, kind: EdgeKind::DependsOn });
165 }
166 }
167 }
168
169 fn process_children(
170 &self,
171 parent_id: usize,
172 manifest: &PackManifest,
173 state: &mut BuildState,
174 ancestors: &mut Vec<String>,
175 ) -> Result<(), TreeError> {
176 for child in &manifest.children {
177 self.handle_child(parent_id, child, state, ancestors)?;
178 }
179 Ok(())
180 }
181
182 fn handle_child(
183 &self,
184 parent_id: usize,
185 child: &ChildRef,
186 state: &mut BuildState,
187 ancestors: &mut Vec<String>,
188 ) -> Result<(), TreeError> {
189 let identity = pack_identity_for_child(child);
190 if ancestors.iter().any(|s| s == &identity) {
191 let mut chain = ancestors.clone();
192 chain.push(identity);
193 return Err(TreeError::CycleDetected { chain });
194 }
195 // v1.2.0 Stage 1.c: FS-resident boundary check fires BEFORE
196 // any clone / fetch. Junctions, reparse points, and
197 // `.git`-as-file (gitfile redirect) all re-open the
198 // parent-boundary escape that the syntactic gate closes on
199 // the path string itself; running the check on the prospective
200 // dest path means a hostile pre-existing slot is rejected
201 // before the GitBackend writes anything into (or through) it.
202 // The prospective path is reconstructed here so the helper
203 // can interrogate the slot before `resolve_destination`
204 // materialises a clone — pre-clone runs return `Ok(())` because
205 // the slot doesn't exist yet, and the walk continues normally.
206 let prospective_dest = self.workspace.join(child.effective_path());
207 check_dest_boundary(&prospective_dest, &child.effective_path())?;
208 let dest = self.resolve_destination(child, state)?;
209 // v1.1.1 plain-git children: when the destination has no
210 // `.grex/pack.yaml` but does carry a `.git/`, synthesize a
211 // leaf scripted-no-hooks manifest in-memory rather than
212 // aborting. See
213 // `openspec/changes/feat-v1.1.1-plain-git-children/design.md`
214 // §"Synthesis algorithm".
215 let (child_manifest, is_synthetic) = match self.loader.load(&dest) {
216 Ok(m) => (m, false),
217 Err(TreeError::ManifestNotFound(_)) if dest_has_git_repo(&dest) => {
218 (synthesize_plain_git_manifest(child), true)
219 }
220 Err(e) => return Err(e),
221 };
222 verify_child_name(&child_manifest.name, child, &dest)?;
223 // Validate this child's own `children[]` before its descent
224 // resolves any of them on disk. Mirrors the root-manifest gate
225 // in `walk`; together they ensure no clone can fire for a
226 // grandchild whose parent declared a traversal-bearing path.
227 validate_children_paths(&child_manifest)?;
228
229 let commit_sha = probe_head_sha(self.backend, &dest);
230 let child_id = state.push_node(PackNode {
231 id: state.nodes.len(),
232 name: child_manifest.name.clone(),
233 path: dest.clone(),
234 source_url: Some(child.url.clone()),
235 manifest: child_manifest.clone(),
236 parent: Some(parent_id),
237 commit_sha,
238 synthetic: is_synthetic,
239 // v1.3.1 B14: carry the parent manifest's `ref:` verbatim
240 // so the sync orchestrator can mirror it into
241 // `LockEntry.branch`.
242 manifest_ref: child.r#ref.clone(),
243 });
244 state.edges.push(PackEdge { from: parent_id, to: child_id, kind: EdgeKind::Child });
245
246 ancestors.push(identity);
247 let result = self.walk_recursive(child_id, &child_manifest, state, ancestors);
248 ancestors.pop();
249 result
250 }
251
252 /// Decide where `child` lives on disk and ensure the working tree is
253 /// in the expected state: clone if absent, fetch + optional checkout
254 /// if present.
255 fn resolve_destination(
256 &self,
257 child: &ChildRef,
258 _state: &mut BuildState,
259 ) -> Result<PathBuf, TreeError> {
260 let dest = self.workspace.join(child.effective_path());
261 // M4-D: `ref_override` wins over the parent-declared `child.ref`.
262 // Falls back to the declared ref when no override is active.
263 let effective_ref = self.ref_override.as_deref().or(child.r#ref.as_deref());
264 if dest_has_git_repo(&dest) {
265 self.backend.fetch(&dest)?;
266 if let Some(r) = effective_ref {
267 self.backend.checkout(&dest, r)?;
268 }
269 } else {
270 self.backend.clone(&child.url, &dest, effective_ref)?;
271 }
272 Ok(dest)
273 }
274}
275
276/// Best-effort HEAD probe. Returns `None` when the target is not a git
277/// repository or the backend refuses — the root of a declarative pack is
278/// often a plain directory, so this must not fail the walk.
279///
280/// Non-`.git` directories short-circuit silently (truly not a git
281/// repo). Backend errors on an actual `.git` directory are surfaced as
282/// a `tracing::warn!` log line so transient gix failures / ACL-denied
283/// `.git` reads do not silently degrade into an empty `commit_sha`
284/// without any operator signal. The walker continues with `None` — a
285/// best-effort probe is, by construction, allowed to fail.
286fn probe_head_sha(backend: &dyn GitBackend, path: &Path) -> Option<String> {
287 let dir =
288 if path.extension().and_then(|e| e.to_str()).is_some_and(|e| matches!(e, "yaml" | "yml")) {
289 path.parent()
290 .and_then(Path::parent)
291 .map_or_else(|| path.to_path_buf(), Path::to_path_buf)
292 } else {
293 path.to_path_buf()
294 };
295 if !dir.join(".git").exists() {
296 return None;
297 }
298 match backend.head_sha(&dir) {
299 Ok(s) => Some(s),
300 Err(e) => {
301 tracing::warn!(
302 target: "grex::walker",
303 "HEAD probe failed for {}: {e}",
304 dir.display()
305 );
306 None
307 }
308 }
309}
310
311/// Mutable state threaded through the walk. Private to this module so only
312/// the walker can grow the graph.
313#[derive(Default)]
314struct BuildState {
315 nodes: Vec<PackNode>,
316 edges: Vec<PackEdge>,
317}
318
319impl BuildState {
320 fn push_node(&mut self, node: PackNode) -> usize {
321 let id = node.id;
322 self.nodes.push(node);
323 id
324 }
325}
326
327/// Identity string used by the cycle detector for the root pack.
328fn pack_identity_for_root(path: &Path) -> String {
329 format!("path:{}", path.display())
330}
331
332/// Identity string for a child — url+ref so the same repo at two different
333/// refs is considered distinct. This matches git semantics and avoids
334/// false-positive cycle detections for diamond dependencies on different
335/// tags.
336///
337/// v1.2.3 (B2): when the ref is missing or empty the trailing `@` is
338/// omitted so the on-the-wire identity is just `url:<url>` — matches
339/// `Grex.Walker.ChildRef.identity` in the Lean model. Without this
340/// elision two children that differ only in `ref: None` vs
341/// `ref: Some("")` would otherwise serialise the same way as
342/// `url:<url>@`, masking the distinction the Lean specification draws.
343fn pack_identity_for_child(child: &ChildRef) -> String {
344 match child.r#ref.as_deref() {
345 Some(r) if !r.is_empty() => format!("url:{}@{}", child.url, r),
346 _ => format!("url:{}", child.url),
347 }
348}
349
350/// Shallow on-disk check: a `.git` entry (file or dir) signals an existing
351/// working tree. We deliberately do not open the repo here — that's the
352/// backend's job via `fetch`/`checkout`.
353///
354/// # Symlink safety
355///
356/// `dest` itself MUST NOT be a symlink. If it is, this function returns
357/// `false` regardless of whether the symlink target carries a `.git`
358/// entry. This refusal closes a synthesis-redirection attack: a parent
359/// pack declaring `path: code` against a workspace where the user
360/// happens to have `<workspace>/code -> $HOME` would otherwise let the
361/// walker treat `$HOME/.git` as a "plain-git child" and operate on an
362/// unrelated tree. The check uses [`std::fs::symlink_metadata`] so the
363/// link itself — not its target — is interrogated.
364pub fn dest_has_git_repo(dest: &Path) -> bool {
365 // Reject symlinked destinations outright. `symlink_metadata` does
366 // NOT follow the link, so a broken or path-traversing symlink is
367 // treated as untrusted regardless of its target.
368 if let Ok(meta) = std::fs::symlink_metadata(dest) {
369 if meta.file_type().is_symlink() {
370 return false;
371 }
372 }
373 dest.join(".git").exists()
374}
375
376/// Build the in-memory manifest used for v1.1.1 plain-git children — a
377/// leaf scripted pack with no hooks, no children, no actions. Activated
378/// at the walker's load-fallback boundary when a child has a `.git/`
379/// but no `.grex/pack.yaml`. See
380/// `openspec/changes/feat-v1.1.1-plain-git-children/design.md`.
381pub fn synthesize_plain_git_manifest(child: &ChildRef) -> PackManifest {
382 PackManifest {
383 schema_version: SchemaVersion::current(),
384 name: child.effective_path(),
385 r#type: PackType::Scripted,
386 version: None,
387 depends_on: Vec::new(),
388 children: Vec::new(),
389 actions: Vec::new(),
390 teardown: None,
391 extensions: BTreeMap::new(),
392 }
393}
394
395/// Enforce that the cloned child's pack.yaml name matches what the parent
396/// declared. The parent-side expectation is the child entry's
397/// [`ChildRef::effective_path`] — the directory name in the workspace.
398fn verify_child_name(got: &str, child: &ChildRef, dest: &Path) -> Result<(), TreeError> {
399 let expected = child.effective_path();
400 if got == expected {
401 return Ok(());
402 }
403 Err(TreeError::PackNameMismatch { got: got.to_string(), expected, path: dest.to_path_buf() })
404}
405
406/// Resolve a `depends_on` entry (URL or bare name) against nodes already
407/// recorded. Returns the node id on a hit, `None` otherwise.
408fn find_node_id_by_name_or_url(nodes: &[PackNode], dep: &str) -> Option<usize> {
409 if looks_like_url(dep) {
410 nodes.iter().find(|n| n.source_url.as_deref() == Some(dep)).map(|n| n.id)
411 } else {
412 nodes.iter().find(|n| n.name == dep).map(|n| n.id)
413 }
414}
415
416/// Run the path-traversal gate on `manifest.children`. Returns the
417/// first offending child as a [`TreeError::ChildPathInvalid`] so the
418/// walker aborts before any clone of the offending sibling fires.
419///
420/// Surfacing only the first offender (rather than aggregating) matches
421/// the walker's fail-fast posture — the plan-phase
422/// [`crate::pack::validate::ChildPathValidator`] still runs against the
423/// whole graph post-walk via `validate_graph`, so authors who clear
424/// the traversal exploit see the full diagnostic batch on the next
425/// invocation.
426///
427/// `check_child_path` is documented to return only the
428/// `ChildPathInvalid` variant, but we `match` exhaustively so any
429/// future variant the helper grows surfaces as a compile-time
430/// failure here rather than as a silently swallowed `Some(other)`.
431fn validate_children_paths(manifest: &PackManifest) -> Result<(), TreeError> {
432 // v1.2.0 Stage 1.c: NFC-duplicate sweep across the sibling list.
433 // Runs first because it's a cross-cutting check (one offender
434 // implicates the WHOLE list, not a single child). Surfaces as
435 // `TreeError::ManifestPathEscape` per walker.md
436 // §boundary-preservation — a NFC-collapsed name re-introduces the
437 // very boundary escape the regex was meant to close on
438 // case-insensitive filesystems.
439 if let Some(path) = nfc_duplicate_path(&manifest.children) {
440 return Err(TreeError::ManifestPathEscape {
441 path,
442 reason: "duplicate child path under Unicode NFC normalization (case-insensitive FS collision risk)"
443 .to_string(),
444 });
445 }
446 for child in &manifest.children {
447 // v1.2.0 Stage 1.c: per-segment boundary-preservation rejects.
448 // Layered AHEAD of the syntactic gate so the more specific
449 // `ManifestPathEscape` diagnostic wins for entries that would
450 // also fail the bare-name regex (e.g. `child:foo` is rejected
451 // here as a colon hazard instead of a generic charset miss).
452 let segment = child.path.as_deref().map_or_else(|| child.effective_path(), str::to_string);
453 if let Some(reason) = boundary_reject_reason(&segment) {
454 return Err(TreeError::ManifestPathEscape {
455 path: segment,
456 reason: reason.to_string(),
457 });
458 }
459 let Some(err) = check_child_path(child) else { continue };
460 match err {
461 PackValidationError::ChildPathInvalid { child_name, path, reason } => {
462 return Err(TreeError::ChildPathInvalid { child_name, path, reason });
463 }
464 other @ (PackValidationError::DuplicateSymlinkDst { .. }
465 | PackValidationError::GraphCycle { .. }
466 | PackValidationError::DependsOnUnsatisfied { .. }
467 | PackValidationError::ChildPathDuplicate { .. }) => {
468 // `check_child_path` is contracted to only emit
469 // `ChildPathInvalid`. Any other variant indicates the
470 // helper has drifted out of sync with this caller —
471 // surface loudly rather than silently swallowing it.
472 tracing::error!(
473 target: "grex::walker",
474 "check_child_path returned unexpected variant: {other:?}",
475 );
476 debug_assert!(false, "check_child_path returned unexpected variant: {other:?}");
477 }
478 }
479 }
480 Ok(())
481}
482
483/// v1.2.0 Stage 1.c: filesystem-resident boundary check. Run AFTER
484/// the destination has been resolved against the parent workspace but
485/// BEFORE any clone / fetch fires. Catches the case where the slot
486/// the walker is about to materialise into is already a junction,
487/// reparse point, symlink, or `.git`-as-file — each of which would
488/// re-introduce a parent-boundary escape.
489///
490/// Pre-clone: a non-existent destination is the happy path; the
491/// helper returns `None` and the walk continues. Post-clone or on a
492/// re-walk where the destination is already populated, the helper
493/// inspects the on-disk entry and surfaces a `ManifestPathEscape`
494/// when the entry violates the boundary contract.
495///
496/// Visibility: `pub(super)` — used by the walker's `handle_child`
497/// path-resolution step (wired in 1.c follow-up; this commit lands
498/// the helper itself and the boundary-check call site for the
499/// path-segment rejects).
500pub(super) fn check_dest_boundary(dest: &Path, segment: &str) -> Result<(), TreeError> {
501 if let Some(reason) = boundary_fs_reject_reason(dest) {
502 return Err(TreeError::ManifestPathEscape {
503 path: segment.to_string(),
504 reason: reason.to_string(),
505 });
506 }
507 Ok(())
508}
509
510/// Decide whether a `depends_on` entry is a URL rather than a bare name.
511/// The rule is intentionally literal — matching the spec's enumeration of
512/// accepted forms.
513pub(super) fn looks_like_url(s: &str) -> bool {
514 s.starts_with("http://")
515 || s.starts_with("https://")
516 || s.starts_with("ssh://")
517 || s.starts_with("git@")
518 || s.ends_with(".git")
519}
520
521// ---------------------------------------------------------------------------
522// v1.2.0 Stage 1.g — `sync_meta` entry point: parent-relative,
523// distributed-lockfile walker. Three phases per meta:
524//
525// Phase 1 (siblings): `classify_dest` (1.e) per child, dispatch
526// fetch / clone / refuse based on the verdict; aggregate
527// `PresentUndeclared` into `TreeError::UntrackedGitRepos`.
528// Phase 2 (orphan prune): for each `prune_candidate` (caller-supplied
529// by 1.h once the distributed lockfile read lands), run the
530// consent-walk via `phase2_prune` (1.f).
531// Phase 3 (recursion): per child whose dest carries
532// `<dest>/.grex/pack.yaml`, recursively `sync_meta` if `recurse`
533// is true and depth < `max_depth`.
534//
535// Design discipline:
536//
537// * **No new locking primitives.** Per-pack git ops acquire the M6
538// `PackLock` (synchronous `acquire`) for the duration of the
539// clone/fetch. The Lean axiom `sync_disjoint_commutes` (Bridge.lean)
540// permits any disjoint scheduler — sequential is the smallest model
541// that satisfies the axiom. Sibling parallelism via rayon is a 1.j /
542// 1.l-territory follow-up; the scaffolding here keeps the
543// single-threaded baseline correct first.
544// * **No lockfile mechanics.** Phase 2's orphan list is a parameter,
545// not a read from `<meta>/.grex/grex.lock.jsonl`. 1.h owns the
546// distributed-lockfile read/write surface; this commit only wires
547// the consent-walk + prune dispatch.
548// * **Error aggregation.** Every Phase 1 child failure plus every
549// Phase 2 refusal lands in `SyncMetaReport::errors` before the call
550// returns. The walker is fail-LOUD (caller gets the full picture),
551// not fail-fast (the legacy `Walker::walk` aborts on the first hit).
552// This matches the v1.2.0 walker.md §"untracked git policy" rule
553// that `UntrackedGitRepos` must enumerate every offender at once.
554// ---------------------------------------------------------------------------
555
556/// Per-meta options threaded through `sync_meta`. Keeps the call-site
557/// signature small without coupling to the full [`crate::sync::SyncOptions`]
558/// surface — the orchestrator (`sync.rs::run`) is responsible for projecting
559/// `SyncOptions` into `SyncMetaOptions` when it wires this entry point.
560///
561/// v1.2.5 — marked `#[non_exhaustive]` so future PATCH-level field additions
562/// (e.g. new optional knobs threaded through the same struct) do not break
563/// downstream `let SyncMetaOptions { .. }` destructuring or struct-literal
564/// construction at call sites that omit the new field. External callers
565/// MUST construct via [`SyncMetaOptions::default()`] and field assignment on
566/// a `mut` binding — `let mut opts = SyncMetaOptions::default(); opts.recurse
567/// = false;`. Struct-literal construction (including the `..base` spread
568/// shorthand) is rejected by E0639 from outside this crate.
569#[derive(Debug, Clone)]
570#[non_exhaustive]
571pub struct SyncMetaOptions {
572 /// Global ref override (`grex sync --ref <sha|branch|tag>`). Mirrors
573 /// [`Walker::with_ref_override`]: when `Some`, every child's
574 /// declared `ref` is replaced.
575 pub ref_override: Option<String>,
576 /// When `true`, Phase 3 recurses into child metas. `false` is the
577 /// `doctor --shallow` semantics: process only the immediate
578 /// children of the supplied meta.
579 pub recurse: bool,
580 /// Bound on Phase 3 recursion depth. `None` is unbounded; `Some(n)`
581 /// caps at `n` levels of nesting (the supplied `meta_dir` is depth
582 /// 0). Recursion ALWAYS halts before depth `n+1`.
583 pub max_depth: Option<usize>,
584 /// Phase 2 prune-safety override. Mirrors
585 /// [`crate::sync::SyncOptions::force_prune`].
586 pub force_prune: bool,
587 /// Phase 2 prune-safety override. Mirrors
588 /// [`crate::sync::SyncOptions::force_prune_with_ignored`].
589 pub force_prune_with_ignored: bool,
590 /// v1.2.1 item 3 — rayon thread-pool size for sibling-parallel
591 /// Phase 1 + Phase 3. `None` ⇒ rayon's default (`num_cpus::get()`);
592 /// `Some(1)` ⇒ effectively sequential (single-threaded pool, useful
593 /// for determinism testing); `Some(n >= 2)` ⇒ bounded parallel.
594 /// `Some(0)` is clamped to `1` (rayon rejects a zero-thread pool).
595 /// Mirrors [`crate::sync::SyncOptions::parallel`] semantics with the
596 /// one exception that `0` is clamped to `1` here — the unbounded
597 /// sentinel only makes sense for tokio's `Semaphore::MAX_PERMITS`.
598 pub parallel: Option<usize>,
599 /// v1.2.1 item 5b — when `Some`, Phase 2 prunes are diverted
600 /// through the snapshot-then-unlink quarantine pipeline before
601 /// `unlink(dest)` fires. Carries the per-meta trash bucket root
602 /// and audit-log path. `None` (default) preserves the legacy
603 /// v1.2.0 direct-unlink path. Set by
604 /// [`crate::sync::SyncOptions::quarantine`] at the orchestrator
605 /// boundary; the consent layer reads this to pick the deletion
606 /// strategy. Lean theorem `quarantine_snapshot_precedes_delete`
607 /// proves the safety contract.
608 pub quarantine: Option<QuarantineConfig>,
609 /// v1.2.5 — when `Some`, every meta sync starts with a best-effort
610 /// GC sweep over `<meta>/.grex/trash/` per the supplied retention
611 /// window. `None` (default) preserves v1.2.1 indefinite-retention
612 /// behavior. Set by [`crate::sync::SyncOptions::retain_days`] at
613 /// the orchestrator boundary; threaded through every recursion
614 /// frame so each meta's own trash bucket gets swept. Sweep
615 /// failures log via tracing and DO NOT halt the sync.
616 pub retention: Option<super::RetentionConfig>,
617 /// v1.3.1 (B4) — when `true`, Phase 1 SKIPS every clone/fetch
618 /// subprocess + lockfile + events.jsonl mutation. The walker still
619 /// traverses the manifest tree (parsing in-memory) and accumulates
620 /// one [`DryRunWouldCloneRecord`] per child that WOULD have been
621 /// cloned into [`SyncMetaReport::dry_run_would_clone`]. Lean
622 /// theorem `Grex.Walker.dry_run_no_side_effects` formalises the
623 /// invariant `dry_run = true ⇒ no network call ∧ no FS write`.
624 /// Default: `false` (preserves v1.3.0 mutation semantics for the
625 /// default sync path).
626 pub dry_run: bool,
627}
628
629impl Default for SyncMetaOptions {
630 fn default() -> Self {
631 Self {
632 ref_override: None,
633 recurse: true,
634 max_depth: None,
635 force_prune: false,
636 force_prune_with_ignored: false,
637 parallel: None,
638 quarantine: None,
639 retention: None,
640 dry_run: false,
641 }
642 }
643}
644
645/// v1.3.1 (B4) — one in-memory record describing a child the walker
646/// WOULD have cloned if `SyncMetaOptions::dry_run` were `false`. Mirrors
647/// the [`crate::manifest::Event::DryRunWouldClone`] event shape, but
648/// surfaced through the walker's [`SyncMetaReport`] return value
649/// instead of written to events.jsonl — dry-run is contractually
650/// side-effect-free per Lean theorem `dry_run_no_side_effects`. CLI
651/// renderers can serialize the records to stdout JSON if desired.
652#[non_exhaustive]
653#[derive(Debug, Clone, PartialEq, Eq)]
654pub struct DryRunWouldCloneRecord {
655 /// Folder name (= repo name) the child would land at. Mirrors the
656 /// runtime invariant `id = file_name(dest)` enforced by the
657 /// non-dry-run path.
658 pub id: String,
659 /// Upstream source URL.
660 pub url: String,
661 /// Effective ref (manifest-declared or `--ref` override). `None`
662 /// when neither is set.
663 pub ref_: Option<String>,
664}
665
666/// Outcome of one [`sync_meta`] invocation. Aggregated across every
667/// recursion frame: a sub-meta's report is folded into its parent's
668/// report at the end of Phase 3.
669///
670/// Marked `#[non_exhaustive]` so future PATCH/MINOR slices can add
671/// fields (e.g. v1.3.1's `dry_run_would_clone`) without breaking
672/// external struct-literal constructors or exhaustive pattern matches.
673/// In-crate construction goes through `..Default::default()` to stay
674/// `non_exhaustive`-compatible.
675#[non_exhaustive]
676#[derive(Debug, Default)]
677pub struct SyncMetaReport {
678 /// Number of metas processed (this meta + every descendant Phase 3
679 /// recursion fired against). Useful for `--shallow` verification:
680 /// `recurse: false` means `metas_visited == 1`.
681 pub metas_visited: usize,
682 /// Per-child Phase 1 verdicts, keyed by parent-relative child path.
683 /// `(meta_dir, child_dest, classification)` — exposed primarily for
684 /// tests; downstream callers will project into a status report.
685 pub phase1_classifications: Vec<(PathBuf, PathBuf, DestClass)>,
686 /// Successful Phase 2 prunes (paths that were removed). Empty when
687 /// no orphan list was supplied or every orphan refused.
688 pub phase2_pruned: Vec<PathBuf>,
689 /// Aggregate of every error encountered across Phases 1, 2, and 3.
690 /// The walker continues past recoverable errors so the caller sees
691 /// the full picture in one pass.
692 pub errors: Vec<TreeError>,
693 /// v1.3.1 (B4) — one record per child the walker WOULD have cloned
694 /// during Phase 1 if `SyncMetaOptions::dry_run` were `false`.
695 /// Always empty when `dry_run = false`. CLI renderers should
696 /// surface the records as the dry-run plan for the operator.
697 pub dry_run_would_clone: Vec<DryRunWouldCloneRecord>,
698}
699
700impl SyncMetaReport {
701 fn merge(&mut self, mut child: SyncMetaReport) {
702 self.metas_visited += child.metas_visited;
703 self.phase1_classifications.append(&mut child.phase1_classifications);
704 self.phase2_pruned.append(&mut child.phase2_pruned);
705 self.errors.append(&mut child.errors);
706 self.dry_run_would_clone.append(&mut child.dry_run_would_clone);
707 }
708}
709
710/// Sync a meta pack and (optionally) its descendants.
711///
712/// `meta_dir` is the on-disk directory containing the meta's
713/// `.grex/pack.yaml`. `prune_candidates` is the list of orphan dests
714/// (parent-relative) the caller's distributed-lockfile reader determined
715/// no longer appear in `manifest.children`.
716///
717/// The walker is **fail-loud, not fail-fast**: recoverable errors land
718/// in [`SyncMetaReport::errors`] and the walk continues so the caller
719/// sees the full picture in one pass. The only short-circuit is a
720/// detected cycle, which surfaces as `Err(TreeError::CycleDetected)`
721/// to keep the cyclic-clone storm risk contained.
722///
723/// # Errors
724///
725/// Returns the *first* catastrophic error: manifest parse failure on
726/// the supplied `meta_dir`, a cycle in the manifest forest (URL+ref
727/// identity), or a pre-walk path-traversal violation. Per-child
728/// clone / fetch / prune failures aggregate into
729/// [`SyncMetaReport::errors`] without aborting the walk.
730pub fn sync_meta(
731 meta_dir: &Path,
732 backend: &dyn GitBackend,
733 loader: &dyn PackLoader,
734 opts: &SyncMetaOptions,
735 prune_candidates: &[PathBuf],
736) -> Result<SyncMetaReport, TreeError> {
737 // v1.2.3 (B4) — seed the ancestor chain with the root pack's
738 // path-namespaced identity (`path:<meta_dir>`) so the Lean
739 // `acyclic_path` precondition that drives
740 // `sync_meta_no_cycle_infinite_clone` is established right at
741 // the call site rather than implicitly relying on an empty
742 // initial ancestor list. Children identify with `url:<url>@<ref>` —
743 // disjoint namespace from the root's `path:` identity, so seeding
744 // does not introduce false-positive cycle hits against any
745 // legitimate child.
746 //
747 // `sync_meta_inner` extends this chain per recursion edge (Phase
748 // 3) using clone-per-child so disjoint sibling branches do not
749 // pollute each other's ancestor view.
750 let initial_ancestors = vec![pack_identity_for_root(meta_dir)];
751 sync_meta_inner(
752 meta_dir,
753 backend,
754 loader,
755 opts,
756 prune_candidates,
757 /* depth */ 0,
758 &initial_ancestors,
759 )
760}
761
762fn sync_meta_inner(
763 meta_dir: &Path,
764 backend: &dyn GitBackend,
765 loader: &dyn PackLoader,
766 opts: &SyncMetaOptions,
767 prune_candidates: &[PathBuf],
768 depth: usize,
769 ancestors: &[String],
770) -> Result<SyncMetaReport, TreeError> {
771 let manifest = loader.load(meta_dir)?;
772 // v1.2.0 Stage 1.c gate — every recursion frame re-runs the
773 // path-traversal sweep before any child is touched on disk.
774 validate_children_paths(&manifest)?;
775
776 let mut report = SyncMetaReport { metas_visited: 1, ..SyncMetaReport::default() };
777
778 // v1.2.5 — best-effort quarantine GC sweep at meta sync start.
779 // Runs BEFORE Phase 1 so the trash bucket is current when any
780 // Phase 2 quarantine snapshot lands later in the same sync. The
781 // audit log path mirrors what the quarantine pipeline already
782 // uses (`<meta>/.grex/events.jsonl`). Sweep failures are logged
783 // via tracing inside `prune_quarantine` and DO NOT halt the
784 // sync — this is the design.md "best-effort retention" contract.
785 if let Some(retention) = opts.retention {
786 let audit_log = crate::manifest::event_log_path(meta_dir);
787 if let Err(e) = super::quarantine::prune_quarantine(meta_dir, retention, Some(&audit_log)) {
788 tracing::warn!(
789 meta_dir = %meta_dir.display(),
790 error = %e,
791 "quarantine GC sweep failed at meta sync start; continuing",
792 );
793 }
794 }
795
796 // v1.2.5 (A2) — snapshot which child dest dirs already exist on
797 // disk BEFORE Phase 1 runs its clones. Phase 3 uses this set to
798 // decide whether a Failed/Cancelled outcome should trigger
799 // `cleanup_partial_clone`: only freshly-created dests (i.e.
800 // ones NOT in this set) are cleaned. Dests that pre-existed
801 // belong to an earlier successful sync and must be preserved
802 // even when a downstream cycle aborts the current walk.
803 //
804 // The snapshot is captured here rather than at `phase3_handle_child`
805 // entry because Phase 1 always clones missing dests before Phase 3
806 // recurses, so by the time `phase3_handle_child` runs every
807 // declared dest exists on disk and the per-fn snapshot would
808 // never see a "missing" pre-state.
809 //
810 // v1.2.5 (W1) — keys are normalised via [`normalize_dest_key`] so
811 // a trailing `/` or `./` prefix in `child.effective_path()` does
812 // not desync the snapshot from the lookup at `phase3_handle_child`.
813 // Both ends MUST apply the same normalisation; the `dest` computed
814 // at the lookup site is wrapped through the same helper before
815 // `pre_existing_dests.contains(&dest)` runs.
816 let pre_existing_dests: HashSet<PathBuf> = manifest
817 .children
818 .iter()
819 .map(|c| normalize_dest_key(&meta_dir.join(c.effective_path())))
820 .filter(|d| d.exists())
821 .collect();
822
823 // v1.2.1 item 3: build a per-call rayon pool sized from
824 // `opts.parallel`. Phase 1 + Phase 3 install on this pool; Phase 2
825 // stays sequential (single-meta orphan sweep — no sibling
826 // parallelism to extract). The pool is dropped at the end of
827 // `sync_meta_inner`, so each recursion frame builds + tears down
828 // its own pool. This is intentional: we want the worker count to
829 // refresh per call so a top-level `--parallel 1` cap is honoured
830 // without piggy-backing on a global pool that an unrelated caller
831 // might have configured differently.
832 let pool = build_pool(opts.parallel)?;
833
834 phase1_sync_children(&pool, meta_dir, &manifest, backend, opts, &mut report);
835 phase2_prune_orphans(meta_dir, prune_candidates, opts, &mut report);
836 // v1.2.2 — cycle detection short-circuits the recursion edge with
837 // an `Err` return so the caller sees `Err(CycleDetected)` directly
838 // rather than burying it in `report.errors`. Cycles are catastrophic
839 // (would otherwise clone forever); fail-loud here, NOT fold-into-report.
840 phase3_recurse(
841 &pool,
842 meta_dir,
843 &manifest,
844 backend,
845 loader,
846 opts,
847 depth,
848 ancestors,
849 &pre_existing_dests,
850 &mut report,
851 )?;
852
853 Ok(report)
854}
855
856/// v1.2.1 item 3 — build a rayon `ThreadPool` sized from
857/// `opts.parallel`. Encapsulates the `None` ⇒ default,
858/// `Some(0)` ⇒ clamp-to-1, `Some(n)` ⇒ exact-N policy in one place
859/// so Phase 1 and Phase 3 install on identically-configured pools.
860///
861/// `Some(1)` produces a single-worker pool — the determinism
862/// test-mode fast-path (sibling iteration order matches sequential
863/// for-loop order on a 1-thread pool).
864///
865/// Build failures surface as [`TreeError::ManifestRead`]: a rayon
866/// pool failure is invariably a host-resource issue (out of file
867/// descriptors, thread-creation refused) — bucketing it into the
868/// generic IO-error variant keeps the error surface tight without
869/// inventing a one-off `RayonPoolBuild` discriminant. The Lean
870/// model treats pool construction as a well-formedness precondition
871/// of `sync`, not an in-band failure mode.
872fn build_pool(parallel: Option<usize>) -> Result<rayon::ThreadPool, TreeError> {
873 let mut builder = rayon::ThreadPoolBuilder::new();
874 if let Some(n) = parallel {
875 builder = builder.num_threads(n.max(1));
876 }
877 builder.build().map_err(|e| {
878 TreeError::ManifestRead(format!("failed to build rayon pool for sync_meta: {e}"))
879 })
880}
881
882/// Per-child output from Phase 1's parallel pass. Collected into a
883/// `Vec` after the rayon `par_iter` settles, then drained into the
884/// caller's `SyncMetaReport` in a single sequential pass. Carrying
885/// the data plain (no `&mut report` shared across threads) is what
886/// keeps the parallelisation sound under the Lean
887/// `sync_disjoint_commutes` axiom: each iteration's mutations are
888/// confined to its own owned struct.
889struct Phase1ChildOutcome {
890 /// `(meta_dir, dest, class)` — pushed onto
891 /// `report.phase1_classifications` regardless of dispatch outcome.
892 classification: (PathBuf, PathBuf, DestClass),
893 /// Per-child clone/fetch failure, if any. Folded into
894 /// `report.errors`.
895 error: Option<TreeError>,
896 /// `Some((dest, class))` when the child classified as
897 /// `PresentUndeclared`; the caller aggregates these into one
898 /// `UntrackedGitRepos` error after the parallel pass.
899 undeclared: Option<(PathBuf, DestClass)>,
900 /// v1.3.1 (B4) — `Some(record)` when `dry_run = true` and the
901 /// child would have been cloned (`Missing` classification) had the
902 /// real path run. Folded into `report.dry_run_would_clone`.
903 dry_run_record: Option<DryRunWouldCloneRecord>,
904}
905
906/// Phase 1: classify each declared child, then dispatch. Per the v1.2.0
907/// walker.md pseudocode the per-child branches are:
908///
909/// * `Missing` → clone via `backend.clone(url, dest, ref)`.
910/// * `PresentDeclared` → fetch (+ checkout if a ref override applies).
911/// * `PresentDirty` → no-op (preserve user changes; will surface at
912/// exec/plan stage if applicable).
913/// * `PresentInProgress` → refuse via `DirtyTreeRefusal{GitInProgress}`
914/// (collected into `report.errors`).
915/// * `PresentUndeclared` → impossible at Phase 1 dispatch time because
916/// declared paths are in `manifest.children`; the variant is reserved
917/// for the lockfile-orphan sweep (Phase 2 territory).
918///
919/// v1.2.1 item 3 — sibling-parallel via rayon `par_iter`. Disjointness
920/// across siblings (each child has its own `meta_dir.join(child.path)`
921/// dest, validated by `validate_children_paths` upstream) discharges
922/// the precondition of the `sync_disjoint_commutes` axiom in
923/// `proof/Grex/Bridge.lean`. The per-pack `.grex-lock` (M6, acquired
924/// inside the GitBackend implementation) continues to serialise any
925/// cross-task contention on the same pack path. Per-thread results
926/// are collected into a `Vec<Phase1ChildOutcome>` and folded into the
927/// caller's `SyncMetaReport` in a single sequential pass, preserving
928/// deterministic ordering of `report.phase1_classifications` (rayon
929/// `collect_into_vec` preserves source-order regardless of completion
930/// order).
931fn phase1_sync_children(
932 pool: &rayon::ThreadPool,
933 meta_dir: &Path,
934 manifest: &PackManifest,
935 backend: &dyn GitBackend,
936 opts: &SyncMetaOptions,
937 report: &mut SyncMetaReport,
938) {
939 // v1.2.5 (A3) — RAII guard tracks nested `pool.install` depth on
940 // the calling OS thread. In debug builds this powers an assertion
941 // in `phase3_recurse` that catches the v1.2.2 R#1 MED deadlock
942 // pattern (nested pool.install while holding a PackLock). Release
943 // builds compile to a zero-sized no-op.
944 let _depth_guard = PoolInstallDepthGuard::new();
945 // Install on the per-call pool so `--parallel N` is honoured even
946 // when this is invoked from inside another rayon context (Phase 3
947 // recursion). `install` is a synchronous fence: the closure
948 // returns once every parallel iteration has settled.
949 let outcomes: Vec<Phase1ChildOutcome> = pool.install(|| {
950 manifest
951 .children
952 .par_iter()
953 .map(|child| phase1_handle_child(meta_dir, child, backend, opts))
954 .collect()
955 });
956
957 // Sequential fold: the parallel pass cannot mutate `report` directly
958 // (it is `&mut`), so we drain the per-child outcomes here. Order is
959 // preserved by `par_iter().collect()` — see the `phase1_par_iter_preserves_order`
960 // test below.
961 let mut undeclared_seen: Vec<(PathBuf, DestClass)> = Vec::new();
962 for outcome in outcomes {
963 report.phase1_classifications.push(outcome.classification);
964 if let Some(e) = outcome.error {
965 report.errors.push(e);
966 }
967 if let Some(pair) = outcome.undeclared {
968 undeclared_seen.push(pair);
969 }
970 if let Some(rec) = outcome.dry_run_record {
971 report.dry_run_would_clone.push(rec);
972 }
973 }
974 if let Err(e) = aggregate_untracked(undeclared_seen) {
975 report.errors.push(e);
976 }
977}
978
979/// Per-child Phase 1 dispatch — runs inside the rayon pool. The
980/// extracted fn keeps the parallel closure body small and gives the
981/// Lean axiom a single discoverable Rust contract anchor (this fn is
982/// the per-sibling unit of work the `sync_disjoint_commutes` axiom
983/// quantifies over).
984fn phase1_handle_child(
985 meta_dir: &Path,
986 child: &ChildRef,
987 backend: &dyn GitBackend,
988 opts: &SyncMetaOptions,
989) -> Phase1ChildOutcome {
990 let dest = meta_dir.join(child.effective_path());
991 // Every declared child IS in the manifest by construction —
992 // `declared_in_manifest = true` is the only correct call here.
993 let class = classify_dest(&dest, true, None);
994 let mut out = Phase1ChildOutcome {
995 classification: (meta_dir.to_path_buf(), dest.clone(), class),
996 error: None,
997 undeclared: None,
998 dry_run_record: None,
999 };
1000 match class {
1001 DestClass::Missing => {
1002 // v1.3.1 (B4) — gate the clone subprocess + parent-mkdir
1003 // behind `dry_run`. When `dry_run = true` we record what
1004 // WOULD have been cloned (id = folder name = repo name)
1005 // and emit no FS / network / lockfile / events.jsonl
1006 // mutation. Lean theorem `dry_run_no_side_effects`
1007 // pins the invariant.
1008 if opts.dry_run {
1009 fill_dry_run_record(&mut out, &dest, child, opts);
1010 } else if let Err(e) = phase1_clone(backend, child, &dest, opts) {
1011 out.error = Some(e);
1012 }
1013 }
1014 DestClass::PresentDeclared => {
1015 // v1.3.1 (B4) — same gate for fetch/checkout. The dest
1016 // already exists on disk so a dry-run still emits a
1017 // would-clone record (callers want a uniform list of
1018 // children that would be touched), but no fetch fires.
1019 if opts.dry_run {
1020 fill_dry_run_record(&mut out, &dest, child, opts);
1021 } else if let Err(e) = phase1_fetch(backend, child, &dest, opts) {
1022 out.error = Some(e);
1023 }
1024 }
1025 DestClass::PresentDirty => {
1026 // Conservative: leave the dirty tree untouched. The
1027 // operator has uncommitted work; v1.2.0 walker policy
1028 // is to never overwrite their bytes during Phase 1.
1029 // Phase 2 will surface a refusal if the operator ALSO
1030 // requested a prune of this path, but that's a
1031 // separate decision made by the caller's lockfile-
1032 // orphan computation.
1033 }
1034 DestClass::PresentInProgress => {
1035 out.error = Some(TreeError::DirtyTreeRefusal {
1036 path: dest.clone(),
1037 kind: super::error::DirtyTreeRefusalKind::GitInProgress,
1038 });
1039 }
1040 DestClass::PresentUndeclared => {
1041 // Buffer for `aggregate_untracked` so we surface the
1042 // FULL list in one error.
1043 out.undeclared = Some((dest, class));
1044 }
1045 }
1046 out
1047}
1048
1049/// v1.3.1 (B4) — populate `out` for a dry-run child. The runtime
1050/// invariant `id = file_name(dest)` mirrors the non-dry-run path; if
1051/// `file_name` is absent (dest ends in `..` / is a filesystem root) or
1052/// the component is non-UTF-8, recording an empty `id` would silently
1053/// corrupt the dry-run plan, so we route the failure into
1054/// `out.error` (folded into `report.errors`) and continue per the
1055/// fail-loud-not-fail-fast contract. Shared between the `Missing` and
1056/// `PresentDeclared` arms of [`phase1_handle_child`].
1057fn fill_dry_run_record(
1058 out: &mut Phase1ChildOutcome,
1059 dest: &Path,
1060 child: &ChildRef,
1061 opts: &SyncMetaOptions,
1062) {
1063 match dest.file_name().and_then(|n| n.to_str()) {
1064 Some(name) => {
1065 out.dry_run_record = Some(DryRunWouldCloneRecord {
1066 id: name.to_string(),
1067 url: child.url.clone(),
1068 ref_: opts.ref_override.clone().or_else(|| child.r#ref.clone()),
1069 });
1070 }
1071 None => {
1072 out.error = Some(TreeError::InvalidDestination {
1073 path: dest.to_path_buf(),
1074 reason: "dest has no UTF-8 file_name component".to_string(),
1075 });
1076 }
1077 }
1078}
1079
1080/// Phase 1 clone helper. Acquires the M6 `PackLock` on the prospective
1081/// dest's parent (`meta_dir`) for the duration of the clone — distinct
1082/// children clone serially within a meta to keep the scheduler-tier
1083/// model honest. Sibling parallelism is a 1.j follow-up.
1084fn phase1_clone(
1085 backend: &dyn GitBackend,
1086 child: &ChildRef,
1087 dest: &Path,
1088 opts: &SyncMetaOptions,
1089) -> Result<(), TreeError> {
1090 let effective_ref = opts.ref_override.as_deref().or(child.r#ref.as_deref());
1091 // Make sure the dest's parent exists — the clone backend assumes
1092 // it. v1.2.0 invariant 1 (boundary) and 1.c's `validate_children_paths`
1093 // already ruled out a path that would escape `meta_dir`, so a
1094 // simple `create_dir_all` on the parent is safe here.
1095 if let Some(parent) = dest.parent() {
1096 std::fs::create_dir_all(parent).map_err(|e| {
1097 TreeError::ManifestRead(format!("failed to mkdir parent {}: {e}", parent.display()))
1098 })?;
1099 }
1100 backend.clone(&child.url, dest, effective_ref)?;
1101 Ok(())
1102}
1103
1104/// Phase 1 fetch helper. Same locking discipline as `phase1_clone`.
1105fn phase1_fetch(
1106 backend: &dyn GitBackend,
1107 child: &ChildRef,
1108 dest: &Path,
1109 opts: &SyncMetaOptions,
1110) -> Result<(), TreeError> {
1111 backend.fetch(dest)?;
1112 let effective_ref = opts.ref_override.as_deref().or(child.r#ref.as_deref());
1113 if let Some(r) = effective_ref {
1114 backend.checkout(dest, r)?;
1115 }
1116 Ok(())
1117}
1118
1119/// Phase 2: prune orphan lockfile entries. Each candidate is run
1120/// through the consent-walk via `phase2_prune` (1.f); a `Clean` verdict
1121/// removes the dest, anything else surfaces as an error. The orphan
1122/// list is supplied by the caller — 1.h owns the lockfile-read side
1123/// of the walker contract.
1124fn phase2_prune_orphans(
1125 meta_dir: &Path,
1126 prune_candidates: &[PathBuf],
1127 opts: &SyncMetaOptions,
1128 report: &mut SyncMetaReport,
1129) {
1130 // v1.2.0 Stage 1.l — postmortem audit log path. Resolved once per
1131 // meta from the canonical `<meta_dir>/.grex/events.jsonl` slot;
1132 // `phase2_prune` only writes to it when an override flag actually
1133 // consumed a non-Clean verdict (clean prunes never log).
1134 let audit_log = crate::manifest::event_log_path(meta_dir);
1135 for candidate in prune_candidates {
1136 // Candidates are parent-relative POSIX paths
1137 // (`LockEntry::validate_path` invariant from 1.b). Resolve
1138 // against `meta_dir` to get the absolute dest.
1139 let dest = meta_dir.join(candidate);
1140 match phase2_prune(
1141 &dest,
1142 opts.force_prune,
1143 opts.force_prune_with_ignored,
1144 Some(audit_log.as_path()),
1145 opts.quarantine.as_ref(),
1146 ) {
1147 Ok(()) => report.phase2_pruned.push(dest),
1148 Err(e) => report.errors.push(e),
1149 }
1150 }
1151}
1152
1153/// Per-child output from Phase 3's parallel recursion. Each variant
1154/// carries either a successful sub-`SyncMetaReport` (folded into the
1155/// caller via [`SyncMetaReport::merge`]) or a fatal error to push onto
1156/// `report.errors`. Children whose dest does NOT carry a sub-meta
1157/// produce `Skipped`.
1158///
1159/// v1.2.4 — `Cancelled` is the EARLY-OUT contributed by a sibling
1160/// closure that observed the per-`phase3_recurse` cancellation flag
1161/// already flipped to `true`. It carries no sub-report (no work was
1162/// done) and never contributes to `report.metas_visited` or
1163/// `report.errors` — the cycle that triggered the flip is the sole
1164/// error reported. Mirrors the Lean
1165/// `cancellation_terminates_promptly` theorem: when `cancelled` is
1166/// observed at entry, return ok with zero descent.
1167enum Phase3ChildOutcome {
1168 Skipped,
1169 Recursed(SyncMetaReport),
1170 Failed(TreeError),
1171 Cancelled,
1172}
1173
1174/// Phase 3: parallel recursion into child metas. A child qualifies for
1175/// recursion when:
1176///
1177/// 1. `opts.recurse` is `true`,
1178/// 2. `opts.max_depth` is unbounded OR the next-frame depth is
1179/// strictly less than the cap,
1180/// 3. `<dest>/.grex/pack.yaml` exists.
1181///
1182/// Sub-meta reports are merged into the parent's report via
1183/// [`SyncMetaReport::merge`] so a top-level caller sees one rolled-up
1184/// view of every frame's classifications + errors.
1185///
1186/// v1.2.1 item 3 — sibling-parallel via rayon `par_iter`. Each
1187/// recursion frame builds its own thread pool inside `sync_meta_inner`
1188/// (work-stealing across recursion levels happens naturally because
1189/// the inner `pool.install` blocks for the lifetime of the inner
1190/// sync_meta call; sibling sub-metas at level N execute in parallel
1191/// via the level-N pool, and each level-N child carries its own
1192/// level-(N+1) pool for its own grandchildren). Sub-reports are
1193/// collected source-ordered via `collect_into_vec`, then folded into
1194/// `report` sequentially to preserve deterministic ordering of the
1195/// `phase1_classifications` / `phase2_pruned` / `errors` vectors.
1196// Pre-rayon refactor this fn already carried 7 args (the clippy cap).
1197// v1.2.1 item 3 added the `pool` reference, taking it to 8. Bundling
1198// these into a context struct is technically possible but every other
1199// arg already comes from `sync_meta_inner`'s param list, so the struct
1200// would just shuffle the wiring without removing it. Localised allow
1201// instead — the call-site is private to this module and threads
1202// ownership of `pool` cleanly.
1203/// Per-child Phase 3 dispatch — runs inside the rayon pool. Mirrors
1204/// the `phase1_handle_child` / `sync_disjoint_commutes` discipline
1205/// (one discoverable Rust contract anchor per sibling unit of work)
1206/// and keeps `phase3_recurse` itself under the clippy line cap.
1207///
1208/// v1.2.2 — cycle detection lives here. `ancestors` is the in-progress
1209/// ancestor identity chain from root down to (but excluding) this
1210/// child. If the child's identity (`pack_identity_for_child`) is
1211/// already in the chain we surface `TreeError::CycleDetected` with
1212/// the chain extended by the recurring identity. Otherwise the
1213/// child's identity is appended (clone-per-child, A.1) so disjoint
1214/// sibling branches do not pollute each other's view.
1215///
1216/// v1.2.3 (B1) — the depth-cap check (`next_depth > opts.max_depth`)
1217/// MUST run AFTER the cycle check. Otherwise a cyclic manifest whose
1218/// cycle length exceeds `max_depth` would silently truncate without
1219/// surfacing `CycleDetected`: the depth cap is a "stop walking
1220/// further" knob, not a "ignore correctness invariants" knob.
1221///
1222/// v1.2.4 (A1) — cancellation flag plumbed through. The closure
1223/// observes `cancelled.load(Relaxed)` at entry: if a prior sibling
1224/// already detected a cycle and signalled, this closure returns
1225/// `Phase3ChildOutcome::Cancelled` with zero further work (no
1226/// classify, no clone, no recursion). When this closure itself
1227/// detects a cycle, it stores `true` into the flag BEFORE returning
1228/// `Phase3ChildOutcome::Failed(CycleDetected)` so any rayon-co-iterated
1229/// sibling not yet started observes the signal on its next entry. The
1230/// flag is per-`phase3_recurse`-call: recursive sub-`sync_meta_inner`
1231/// invocations build their own flag inside their own
1232/// `phase3_recurse`, so a cycle two levels down does not cancel
1233/// disjoint siblings at level one. Discharges the Lean
1234/// `cancellation_terminates_promptly` obligation.
1235#[allow(clippy::too_many_arguments)]
1236fn phase3_handle_child(
1237 meta_dir: &Path,
1238 child: &ChildRef,
1239 backend: &dyn GitBackend,
1240 loader: &dyn PackLoader,
1241 opts: &SyncMetaOptions,
1242 next_depth: usize,
1243 ancestors: &[String],
1244 cancelled: &AtomicBool,
1245 pre_existing_dests: &HashSet<PathBuf>,
1246) -> Phase3ChildOutcome {
1247 // v1.2.5 (W1) — `dest` is normalised via [`normalize_dest_key`] so
1248 // membership checks against `pre_existing_dests` (snapshotted with
1249 // the same normalisation in `sync_meta_inner`) compare apples to
1250 // apples even when the manifest's `child.effective_path()` carries
1251 // a trailing `/` or a `./` prefix. Lexical normalisation only —
1252 // keeps `dest` a valid filesystem path for the subsequent
1253 // `cleanup_partial_clone` and `sync_meta_inner` calls.
1254 let dest = normalize_dest_key(&meta_dir.join(child.effective_path()));
1255 // v1.2.4 EARLY-OUT — a sibling closure already detected a cycle
1256 // and signalled. Return immediately with zero further descent so
1257 // deep walks never start. Matches the Lean
1258 // `cancellation_terminates_promptly` theorem: cancelled = true
1259 // implies ok with zero recursive steps.
1260 //
1261 // v1.2.5 (W1): align with the Lean theorem
1262 // `outcome ≠ Recursed → post.at dest = pre.at dest` — Cancelled is
1263 // a non-Recursed outcome, so the dest must be restored to its
1264 // pre-state. Phase 1 may have already freshly cloned the dest by
1265 // the time this closure runs; if so, clean it. Pre-existing dests
1266 // (legitimate prior content) are preserved via the same
1267 // `pre_existing_dests` guard used by the Failed(CycleDetected)
1268 // branch. Best-effort: cleanup failures are logged but do NOT
1269 // mask the cancellation outcome.
1270 if cancelled.load(Ordering::Relaxed) {
1271 if !pre_existing_dests.contains(&dest) {
1272 cleanup_partial_clone(&dest);
1273 }
1274 return Phase3ChildOutcome::Cancelled;
1275 }
1276 if !dest.join(".grex").join("pack.yaml").is_file() {
1277 return Phase3ChildOutcome::Skipped;
1278 }
1279 // v1.2.2 cycle detection — discharges the
1280 // `sync_meta_no_cycle_infinite_clone` Lean theorem in
1281 // `proof/Grex/Walker.lean`. Identity is `url@ref` so the same
1282 // repo at two different refs is two distinct packs (intentional:
1283 // matches `pack_identity_for_child` and the build_graph cycle
1284 // detector at `graph_build.rs:174`). A single `Vec<String>`
1285 // doubles as O(depth) contains-check AND deterministic chain for
1286 // error display — depth is bounded ~5-10 in practice so linear
1287 // scan beats hashing here.
1288 //
1289 // v1.2.3 (B1): runs BEFORE the depth-cap early-return below so a
1290 // cycle longer than `max_depth` cannot hide behind truncation.
1291 let id = pack_identity_for_child(child);
1292 if ancestors.iter().any(|v| v == &id) {
1293 // v1.2.4 SIGNAL — flip the cancellation flag so any
1294 // co-iterated sibling closures observe it on their next
1295 // entry. `Relaxed` is sufficient: we need eventual visibility,
1296 // not strict happens-before ordering against any other memory
1297 // operation. See design doc §"Atomic ordering".
1298 cancelled.store(true, Ordering::Relaxed);
1299 // v1.2.5 (A2) — partial-clone cleanup. The dest may have been
1300 // freshly cloned by Phase 1 within this same `sync_meta_inner`
1301 // call. If it pre-existed (prior successful sync), preserve
1302 // it — `pre_existing_dests` was snapshotted before Phase 1
1303 // ran. Best-effort: cleanup failures are logged but do NOT
1304 // mask the cycle error. Discharges the Lean theorem
1305 // `partial_clone_cleanup_idempotent`.
1306 if !pre_existing_dests.contains(&dest) {
1307 cleanup_partial_clone(&dest);
1308 }
1309 let mut chain = ancestors.to_vec();
1310 chain.push(id);
1311 return Phase3ChildOutcome::Failed(TreeError::CycleDetected { chain });
1312 }
1313 // v1.2.3 (B1): depth-cap check moved from `phase3_recurse` to
1314 // here, AFTER the cycle check. `Skipped` rather than a hard error
1315 // because depth-cap truncation is a benign best-effort knob —
1316 // siblings further down the manifest tree should still classify.
1317 if let Some(cap) = opts.max_depth {
1318 if next_depth > cap {
1319 return Phase3ChildOutcome::Skipped;
1320 }
1321 }
1322 // Clone-per-child (A.1): each rayon iteration owns its own
1323 // ancestor view, so disjoint sibling branches do not see each
1324 // other on the path. A diamond where two siblings legitimately
1325 // depend on the same descendant is therefore not a cycle.
1326 let mut child_ancestors = ancestors.to_vec();
1327 child_ancestors.push(id);
1328 // Empty `prune_candidates` for the sub-meta — 1.h supplies the
1329 // sub-meta's distributed lockfile read via the same caller
1330 // pathway when it lands.
1331 match sync_meta_inner(&dest, backend, loader, opts, &[], next_depth, &child_ancestors) {
1332 Ok(sub) => Phase3ChildOutcome::Recursed(sub),
1333 Err(e) => {
1334 // v1.2.5 (W1) — recursive failure path: the sub-walk
1335 // returned ANY `TreeError` deep in the subtree. The dest
1336 // itself was cloned by THIS frame's Phase 1 if it was
1337 // missing, so symmetry with the same-frame cycle branch
1338 // above demands the same pre-existence guard for ALL error
1339 // variants — not just `CycleDetected`. Aligns with the
1340 // Lean theorem `outcome ≠ Recursed → post.at dest = pre.at
1341 // dest`: any non-Recursed outcome (Failed regardless of
1342 // variant) must restore pre-state. Best-effort cleanup;
1343 // logged on failure.
1344 if !pre_existing_dests.contains(&dest) {
1345 cleanup_partial_clone(&dest);
1346 }
1347 Phase3ChildOutcome::Failed(e)
1348 }
1349 }
1350}
1351
1352/// v1.2.5 (A2) — best-effort cleanup of a partially-cloned dest dir.
1353///
1354/// Invoked from the cycle-detected and cancelled paths in
1355/// `phase3_handle_child` when `dest` was freshly cloned by THIS frame's
1356/// Phase 1 (snapshotted via `pre_existing_dests`). Idempotent: a
1357/// non-existent `dest` is a no-op (the inner walker treats `NotFound`
1358/// as success). Failures are logged + swallowed so the original
1359/// cycle/error is preserved as the caller-visible outcome.
1360///
1361/// **Symlink safety (v1.2.5 W1):** This helper does NOT use
1362/// `std::fs::remove_dir_all` because that historically followed
1363/// directory symlinks during cleanup on some platforms / std versions.
1364/// Instead, it walks the tree manually using
1365/// [`std::fs::symlink_metadata`] at every level: a symlink is unlinked
1366/// AS a symlink (never followed into an unrelated tree), regular files
1367/// are deleted via `remove_file`, and directories are descended into
1368/// before being removed via `remove_dir`. Mirrors the
1369/// `safe_remove_dir_all` helper in `quarantine.rs` (kept inlined here
1370/// because the constraints of v1.2.5 W1 forbid touching that file).
1371///
1372/// Discharges the Lean theorem `partial_clone_cleanup_idempotent` in
1373/// `proof/Grex/Walker.lean`: the post-state at `dest` equals the
1374/// pre-state for any non-Recursed Phase 3 outcome.
1375fn cleanup_partial_clone(dest: &Path) {
1376 if let Err(e) = safe_remove_tree(dest) {
1377 // Don't propagate — the caller's primary error (cycle) is the
1378 // contract. Cleanup is best-effort hygiene.
1379 tracing::warn!(
1380 target: "grex::walker",
1381 dest = %dest.display(),
1382 error = %e,
1383 "v1.2.5 A2: partial-clone cleanup failed; original error preserved"
1384 );
1385 }
1386}
1387
1388/// v1.2.5 (W1) — lexical-normalisation key used for the
1389/// `pre_existing_dests` HashSet so a manifest entry whose
1390/// `effective_path()` carries a trailing `/`, a `./` prefix, or a
1391/// redundant intermediate `./` component does not desync the snapshot
1392/// site from the lookup site.
1393///
1394/// Strategy: walk the path's components, drop every `Component::CurDir`
1395/// (`.`), and collect the rest back into a fresh `PathBuf`. Drops the
1396/// trailing-separator artefact that some callers preserve via
1397/// `PathBuf::push("foo/")`. Does NOT canonicalise (no FS access, no
1398/// symlink resolution) — the goal is to make two textually-different
1399/// representations of the SAME logical path compare equal.
1400///
1401/// `dunce::simplified` is the long-form alternative (it also strips
1402/// the Windows `\\?\` UNC prefix); we deliberately avoid pulling in
1403/// the dependency for a fix that the lexical strategy already covers
1404/// for every observed case in practice.
1405fn normalize_dest_key(p: &Path) -> PathBuf {
1406 let mut out = PathBuf::new();
1407 for comp in p.components() {
1408 match comp {
1409 std::path::Component::CurDir => {
1410 // Drop redundant `./` components.
1411 }
1412 other => out.push(other.as_os_str()),
1413 }
1414 }
1415 if out.as_os_str().is_empty() {
1416 // Edge case: input was `.` or empty — preserve as `.` so
1417 // downstream FS ops still resolve to the same dir rather than
1418 // an empty path which has no meaning to the OS.
1419 out.push(".");
1420 }
1421 out
1422}
1423
1424/// v1.2.5 (W1) — symlink-secure recursive removal helper.
1425///
1426/// **v1.2.6 (W2)**: re-routed through a `cap_std::fs::Dir` capability
1427/// rooted at `path.parent()`. The recursion stays inside the capability
1428/// the kernel resolved at open time — a `..` segment, an absolute child
1429/// path, or a symlink whose target escapes the capability is rejected by
1430/// cap-std with `PermissionDenied` (matches the Lean theorem
1431/// `walker_subpath_resolution_bounded_by_meta_dir`).
1432///
1433/// Walks `path` using [`cap_std::fs::Dir::symlink_metadata`] at every
1434/// level so a symlink encountered mid-traversal is unlinked AS a symlink
1435/// rather than followed into an unrelated tree.
1436///
1437/// Behaviour:
1438/// * `path` does not exist (`NotFound`) → `Ok(())` (idempotent).
1439/// * `path` is a symlink → unlink the link itself (never the target).
1440/// On Windows a directory symlink requires `remove_dir`; we try
1441/// `remove_file` first and fall back to `remove_dir`.
1442/// * `path` is a directory → recurse into each child via
1443/// `symlink_metadata`, then `remove_dir(path)`.
1444/// * `path` is a regular file → `remove_file`.
1445/// * `path` has no parent (filesystem root) → fall back to the
1446/// pre-v1.2.6 ambient `std::fs` walk; cap-std cannot model a rooted
1447/// capability there.
1448fn safe_remove_tree(path: &Path) -> std::io::Result<()> {
1449 // Pre-flight existence probe so a missing path is a no-op even when
1450 // the parent itself is missing (idempotent contract).
1451 match std::fs::symlink_metadata(path) {
1452 Ok(_) => {}
1453 Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(()),
1454 Err(e) => return Err(e),
1455 }
1456 let (parent, name) = match (path.parent(), path.file_name()) {
1457 (Some(p), Some(n)) if !p.as_os_str().is_empty() => (p, std::path::PathBuf::from(n)),
1458 _ => return safe_remove_tree_ambient(path),
1459 };
1460 let parent_dir = match cap_std::fs::Dir::open_ambient_dir(parent, cap_std::ambient_authority())
1461 {
1462 Ok(d) => d,
1463 // Parent vanished or is unreadable — treat as idempotent success
1464 // (mirrors the NotFound short-circuit above).
1465 Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(()),
1466 Err(e) => return Err(e),
1467 };
1468 cap_remove_tree(&parent_dir, &name)
1469}
1470
1471/// v1.2.6 (W2) — capability-rooted recursive remove. `parent_dir` is a
1472/// `cap_std::fs::Dir` and `name` is a relative entry under it. cap-std
1473/// rejects `..` traversal and symlink escape; this recursion can only
1474/// touch entries provably under `parent_dir`'s root capability.
1475fn cap_remove_tree(parent_dir: &cap_std::fs::Dir, name: &Path) -> std::io::Result<()> {
1476 let meta = match parent_dir.symlink_metadata(name) {
1477 Ok(m) => m,
1478 Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(()),
1479 Err(e) => return Err(e),
1480 };
1481 let ft = meta.file_type();
1482 if ft.is_symlink() {
1483 // Unlink the symlink itself. On Windows, a symlink to a dir
1484 // requires `remove_dir`; `remove_file` covers file/symlink_file.
1485 // Try file-style first, then fall back to dir-style.
1486 return match parent_dir.remove_file(name) {
1487 Ok(()) => Ok(()),
1488 Err(_) => parent_dir.remove_dir(name),
1489 };
1490 }
1491 if ft.is_dir() {
1492 // Open the child as its own capability so the recursion stays
1493 // bound to it (cap-std refuses `..` escape relative to the
1494 // child handle, not just the parent). Drop the handle BEFORE
1495 // calling `parent_dir.remove_dir(name)` — on Windows an
1496 // outstanding open handle to a directory blocks removal with
1497 // ERROR_SHARING_VIOLATION.
1498 {
1499 let child_dir = parent_dir.open_dir(name)?;
1500 for entry in child_dir.entries()? {
1501 let entry = entry?;
1502 let child_name = std::path::PathBuf::from(entry.file_name());
1503 cap_remove_tree(&child_dir, &child_name)?;
1504 }
1505 }
1506 return parent_dir.remove_dir(name);
1507 }
1508 // Regular file or other unlinkable entry.
1509 parent_dir.remove_file(name)
1510}
1511
1512/// v1.2.6 (W2) — fallback ambient walk used when `path` has no usable
1513/// parent (filesystem root). This path keeps the pre-v1.2.6 contract
1514/// for the degenerate case; production call sites always have a parent
1515/// (the per-meta dest is `<meta>/<basename>`).
1516fn safe_remove_tree_ambient(path: &Path) -> std::io::Result<()> {
1517 let meta = match std::fs::symlink_metadata(path) {
1518 Ok(m) => m,
1519 Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(()),
1520 Err(e) => return Err(e),
1521 };
1522 let ft = meta.file_type();
1523 if ft.is_symlink() {
1524 return match std::fs::remove_file(path) {
1525 Ok(()) => Ok(()),
1526 Err(_) => std::fs::remove_dir(path),
1527 };
1528 }
1529 if ft.is_dir() {
1530 for entry in std::fs::read_dir(path)? {
1531 let entry = entry?;
1532 let child = entry.path();
1533 safe_remove_tree_ambient(&child)?;
1534 }
1535 return std::fs::remove_dir(path);
1536 }
1537 std::fs::remove_file(path)
1538}
1539
1540#[allow(clippy::too_many_arguments, clippy::too_many_lines)]
1541fn phase3_recurse(
1542 pool: &rayon::ThreadPool,
1543 meta_dir: &Path,
1544 manifest: &PackManifest,
1545 backend: &dyn GitBackend,
1546 loader: &dyn PackLoader,
1547 opts: &SyncMetaOptions,
1548 depth: usize,
1549 ancestors: &[String],
1550 pre_existing_dests: &HashSet<PathBuf>,
1551 report: &mut SyncMetaReport,
1552) -> Result<(), TreeError> {
1553 if !opts.recurse {
1554 return Ok(());
1555 }
1556 let next_depth = depth + 1;
1557 // v1.2.3 (B1): depth-cap early-return removed from this site —
1558 // moved into `phase3_handle_child` AFTER the cycle check so a
1559 // cycle longer than `max_depth` cannot mask itself by tripping
1560 // the depth cap before the cycle test fires. The per-child
1561 // handler now treats `next_depth > cap` as `Skipped`.
1562 //
1563 // v1.2.4 (A1): per-call cancellation flag. One `Arc<AtomicBool>`
1564 // is constructed here and shared (read-only across closures, with
1565 // a single point-of-truth `store` in any closure that detects a
1566 // cycle) by every sibling iteration of this `par_iter`. Recursive
1567 // sub-`sync_meta_inner` calls build their own flag via their own
1568 // `phase3_recurse` — disjoint subtrees do not cross-cancel. The
1569 // flag is dropped when this fn returns, so its lifetime is exactly
1570 // the rayon parallel pass it scopes.
1571 //
1572 // Scope: cancels siblings within THIS Phase 3 fan-out call only;
1573 // recursive sub-fan-outs construct their own flag, so a cycle deep
1574 // in pack X does not cancel pack Y at root level. This isolation
1575 // is intentional and tested by
1576 // `cancellation_per_call_scope_isolates_subtrees`.
1577 let cancelled = Arc::new(AtomicBool::new(false));
1578 // v1.2.5 (A3) — RAII guard tracks nested `pool.install` depth on
1579 // the calling OS thread. In debug builds the per-closure assertion
1580 // below uses this counter (combined with `pack_lock`'s *global*
1581 // `HELD_PACK_LOCKS` registry, scoped at read-time by `ThreadId`)
1582 // to fire if any worker thread enters the per-child closure body
1583 // while still holding a `PackLock` it acquired earlier on that
1584 // same thread — the v1.2.2 R#1 MED deadlock pattern. The
1585 // held-lock half of the check was migrated from a `thread_local!`
1586 // to a `Mutex<HashSet<(PathBuf, ThreadId)>>` (W4 cross-thread fix)
1587 // because rayon's work-stealing routes the closure onto worker
1588 // threads whose TL would always be empty (the lock was acquired
1589 // on the outer tokio worker, not on the rayon worker), masking
1590 // the deadlock pattern entirely. The depth counter remains
1591 // per-OS-thread (it tracks per-thread reentry depth correctly).
1592 // Release builds compile both halves to a zero-sized no-op. The
1593 // counter / global set live in `scheduler.rs` and `pack_lock.rs`
1594 // respectively (W3 + W2/W4 worker outputs).
1595 let _depth_guard = PoolInstallDepthGuard::new();
1596 let outcomes: Vec<Phase3ChildOutcome> = pool.install(|| {
1597 manifest
1598 .children
1599 .par_iter()
1600 .map(|child| {
1601 // v1.2.5 (A3) — debug-build deadlock guard. If this
1602 // worker thread already holds one or more PackLocks
1603 // AND we are inside a nested `pool.install` (depth
1604 // >= 2 means an outer Phase 1 / Phase 3 frame is
1605 // still active on this thread), the v1.2.2 R#1
1606 // deadlock pattern is reproducible. Single-frame
1607 // `pool.install` (depth == 1) holding a lock is
1608 // safe — the lock was acquired outside the pool and
1609 // released after the closure returns. The assertion
1610 // is `#[cfg(debug_assertions)]` only; release builds
1611 // pay no runtime cost. `held_pack_locks_for_test`
1612 // filters the global registry by `current().id()` so
1613 // the assertion correctly reports only locks held by
1614 // *this* worker thread, not concurrent acquires on
1615 // other threads. Discharges the Lean theorem
1616 // `pool_deadlock_guard_terminates` in
1617 // `proof/Grex/Scheduler.lean`.
1618 #[cfg(debug_assertions)]
1619 {
1620 let depth = crate::scheduler::pool_install_depth_for_test();
1621 let held = crate::pack_lock::held_pack_locks_for_test();
1622 debug_assert!(
1623 depth < 2 || held.is_empty(),
1624 "pool deadlock guard: nested pool.install (depth={depth}) entered while \
1625 holding PackLock(s) {held:?} on this thread — see concurrency.md \
1626 §lock acquisition order"
1627 );
1628 }
1629 phase3_handle_child(
1630 meta_dir,
1631 child,
1632 backend,
1633 loader,
1634 opts,
1635 next_depth,
1636 ancestors,
1637 &cancelled,
1638 pre_existing_dests,
1639 )
1640 })
1641 .collect()
1642 });
1643 // Cycle errors short-circuit (catastrophic — clone-storm risk);
1644 // every other outcome folds into the report per the existing
1645 // fail-loud-but-continue policy. v1.2.4: `Cancelled` outcomes are
1646 // skipped — they carry no sub-report and contribute neither to
1647 // `report.metas_visited` nor to `report.errors`. The cycle that
1648 // triggered the cancellation is the sole error reported.
1649 let mut first_cycle_idx: Option<usize> = None;
1650 for outcome in outcomes {
1651 match outcome {
1652 Phase3ChildOutcome::Skipped | Phase3ChildOutcome::Cancelled => {}
1653 Phase3ChildOutcome::Recursed(sub) => report.merge(sub),
1654 Phase3ChildOutcome::Failed(e) => {
1655 // v1.2.2 fix: surface all sibling cycles in
1656 // report.errors; first cycle returned as short-circuit
1657 // Err per fail-loud policy.
1658 if matches!(e, TreeError::CycleDetected { .. }) && first_cycle_idx.is_none() {
1659 first_cycle_idx = Some(report.errors.len());
1660 }
1661 report.errors.push(e);
1662 }
1663 }
1664 }
1665 if let Some(idx) = first_cycle_idx {
1666 // Clone the cycle to return as the short-circuit Err while
1667 // leaving the original entry (and any sibling cycles) recorded
1668 // in report.errors for the caller to log/print.
1669 let TreeError::CycleDetected { chain } = &report.errors[idx] else {
1670 unreachable!("first_cycle_idx points at a CycleDetected variant by construction");
1671 };
1672 return Err(TreeError::CycleDetected { chain: chain.clone() });
1673 }
1674 Ok(())
1675}
1676
1677#[cfg(test)]
1678mod tests {
1679 use super::*;
1680
1681 /// Direct unit test of the synthesis helper — name must equal the
1682 /// child's `effective_path()`, type must be `Scripted`, and every
1683 /// list field must be empty.
1684 #[test]
1685 fn synthesize_plain_git_manifest_yields_leaf_scripted_pack() {
1686 let child = ChildRef {
1687 url: "https://example.com/algo-leet.git".to_string(),
1688 path: None,
1689 r#ref: None,
1690 };
1691 let manifest = synthesize_plain_git_manifest(&child);
1692 assert_eq!(manifest.name, child.effective_path());
1693 assert_eq!(manifest.name, "algo-leet");
1694 assert_eq!(manifest.r#type, PackType::Scripted);
1695 assert_eq!(manifest.schema_version.as_str(), "1");
1696 assert!(manifest.depends_on.is_empty());
1697 assert!(manifest.children.is_empty());
1698 assert!(manifest.actions.is_empty());
1699 assert!(manifest.teardown.is_none());
1700 assert!(manifest.extensions.is_empty());
1701 assert!(manifest.version.is_none());
1702 }
1703
1704 /// Explicit `path:` override wins over the URL-derived bare name —
1705 /// confirms the synthesised manifest's `name` mirrors what the
1706 /// parent declared, so `verify_child_name` passes by construction.
1707 #[test]
1708 fn synthesize_plain_git_manifest_honours_explicit_path() {
1709 let child = ChildRef {
1710 url: "https://example.com/some-repo.git".to_string(),
1711 path: Some("custom-name".to_string()),
1712 r#ref: None,
1713 };
1714 let manifest = synthesize_plain_git_manifest(&child);
1715 assert_eq!(manifest.name, "custom-name");
1716 }
1717
1718 /// `dest_has_git_repo` MUST refuse a symlinked destination — even
1719 /// when the symlink target carries a real `.git/` directory.
1720 /// Otherwise a malicious parent pack could redirect synthesis to
1721 /// fetch into `$HOME` (or any sibling repo) by relying on a
1722 /// pre-existing symlink in the workspace.
1723 #[test]
1724 fn dest_has_git_repo_rejects_symlinked_dest() {
1725 // Skip on platforms where unprivileged symlink creation fails
1726 // (notably Windows without Developer Mode). Failing the symlink
1727 // call is itself proof the attack vector is closed for that
1728 // host, so the rest of the test is moot.
1729 let outer = tempfile::tempdir().unwrap();
1730 let real = outer.path().join("real-repo");
1731 std::fs::create_dir_all(real.join(".git")).unwrap();
1732 let link = outer.path().join("via-link");
1733
1734 #[cfg(unix)]
1735 let symlink_result = std::os::unix::fs::symlink(&real, &link);
1736 #[cfg(windows)]
1737 let symlink_result = std::os::windows::fs::symlink_dir(&real, &link);
1738
1739 if symlink_result.is_err() {
1740 // Host won't let us create a symlink — nothing to test.
1741 return;
1742 }
1743
1744 // Sanity: following the symlink would reveal `.git`.
1745 assert!(link.join(".git").exists(), "symlink target should expose .git through traversal");
1746 // But `dest_has_git_repo` must refuse it.
1747 assert!(
1748 !dest_has_git_repo(&link),
1749 "dest_has_git_repo must refuse a symlinked destination even when target has .git"
1750 );
1751 // Real (non-symlinked) sibling still passes — we haven't
1752 // accidentally broken the happy path.
1753 assert!(dest_has_git_repo(&real));
1754 }
1755
1756 // -----------------------------------------------------------------
1757 // v1.2.0 Stage 1.g — `sync_meta` three-phase walker tests (TDD).
1758 //
1759 // These tests use a thin in-memory `MockLoader` plus
1760 // `MockGitBackend` so the walker's PHASE ORCHESTRATION (not the
1761 // backend mechanics) is what's being exercised. The git-touching
1762 // primitives `classify_dest` (1.e) and `phase2_prune` (1.f) have
1763 // their own per-host tests that already cover the real-FS-and-git
1764 // path. The `host_has_git_binary` gate guards the few tests that
1765 // need a working `git` to materialise a clean `PresentDeclared`
1766 // verdict — same precedent as the `dest_class::tests` host-skip
1767 // pattern.
1768 // -----------------------------------------------------------------
1769
1770 use std::collections::HashMap;
1771 use std::sync::Mutex;
1772
1773 /// Minimal stand-in `PackLoader` for the v1.2.0 tests. Maps
1774 /// `meta_dir` → `PackManifest` directly so we never touch disk
1775 /// for manifest reads.
1776 struct InMemLoader {
1777 manifests: HashMap<PathBuf, PackManifest>,
1778 }
1779
1780 impl InMemLoader {
1781 fn new() -> Self {
1782 Self { manifests: HashMap::new() }
1783 }
1784 fn with(mut self, dir: impl Into<PathBuf>, m: PackManifest) -> Self {
1785 self.manifests.insert(dir.into(), m);
1786 self
1787 }
1788 }
1789
1790 impl PackLoader for InMemLoader {
1791 fn load(&self, path: &Path) -> Result<PackManifest, TreeError> {
1792 self.manifests
1793 .get(path)
1794 .cloned()
1795 .ok_or_else(|| TreeError::ManifestNotFound(path.to_path_buf()))
1796 }
1797 }
1798
1799 /// Minimal stand-in `GitBackend`. Records every call so tests can
1800 /// assert phase orchestration. `clone` materialises a `.git/`
1801 /// under the supplied dest so subsequent classify probes treat the
1802 /// slot as Present.
1803 #[allow(dead_code)] // fields populated for future test introspection.
1804 #[derive(Debug, Clone)]
1805 enum BackendCall {
1806 Clone { url: String, dest: PathBuf, r#ref: Option<String> },
1807 Fetch { dest: PathBuf },
1808 Checkout { dest: PathBuf, r#ref: String },
1809 HeadSha { dest: PathBuf },
1810 }
1811
1812 struct InMemGit {
1813 calls: Mutex<Vec<BackendCall>>,
1814 materialise_on_clone: bool,
1815 }
1816
1817 impl InMemGit {
1818 fn new() -> Self {
1819 Self { calls: Mutex::new(Vec::new()), materialise_on_clone: true }
1820 }
1821 fn calls(&self) -> Vec<BackendCall> {
1822 self.calls.lock().unwrap().clone()
1823 }
1824 }
1825
1826 impl GitBackend for InMemGit {
1827 fn name(&self) -> &'static str {
1828 "v1_2_0-mock-git"
1829 }
1830 fn clone(
1831 &self,
1832 url: &str,
1833 dest: &Path,
1834 r#ref: Option<&str>,
1835 ) -> Result<crate::ClonedRepo, crate::GitError> {
1836 self.calls.lock().unwrap().push(BackendCall::Clone {
1837 url: url.to_string(),
1838 dest: dest.to_path_buf(),
1839 r#ref: r#ref.map(str::to_string),
1840 });
1841 if self.materialise_on_clone {
1842 std::fs::create_dir_all(dest.join(".git")).unwrap();
1843 }
1844 Ok(crate::ClonedRepo { path: dest.to_path_buf(), head_sha: "0".repeat(40) })
1845 }
1846 fn fetch(&self, dest: &Path) -> Result<(), crate::GitError> {
1847 self.calls.lock().unwrap().push(BackendCall::Fetch { dest: dest.to_path_buf() });
1848 Ok(())
1849 }
1850 fn checkout(&self, dest: &Path, r#ref: &str) -> Result<(), crate::GitError> {
1851 self.calls
1852 .lock()
1853 .unwrap()
1854 .push(BackendCall::Checkout { dest: dest.to_path_buf(), r#ref: r#ref.to_string() });
1855 Ok(())
1856 }
1857 fn head_sha(&self, dest: &Path) -> Result<String, crate::GitError> {
1858 self.calls.lock().unwrap().push(BackendCall::HeadSha { dest: dest.to_path_buf() });
1859 Ok("0".repeat(40))
1860 }
1861 }
1862
1863 /// Build a meta manifest with the supplied children.
1864 fn meta_manifest_with(name: &str, children: Vec<ChildRef>) -> PackManifest {
1865 PackManifest {
1866 schema_version: SchemaVersion::current(),
1867 name: name.to_string(),
1868 r#type: PackType::Meta,
1869 version: None,
1870 depends_on: Vec::new(),
1871 children,
1872 actions: Vec::new(),
1873 teardown: None,
1874 extensions: BTreeMap::new(),
1875 }
1876 }
1877
1878 fn child(url: &str, path: &str) -> ChildRef {
1879 ChildRef { url: url.to_string(), path: Some(path.to_string()), r#ref: None }
1880 }
1881
1882 fn host_has_git_binary() -> bool {
1883 std::process::Command::new("git")
1884 .arg("--version")
1885 .output()
1886 .is_ok_and(|o| o.status.success())
1887 }
1888
1889 /// Empty meta — no children → the walker returns Ok with no work.
1890 #[test]
1891 fn test_walker_v1_2_0_simple_meta_no_children() {
1892 let tmp = tempfile::tempdir().unwrap();
1893 let meta_dir = tmp.path().to_path_buf();
1894 let loader = InMemLoader::new().with(meta_dir.clone(), meta_manifest_with("solo", vec![]));
1895 let backend = InMemGit::new();
1896 let opts = SyncMetaOptions::default();
1897 let report = sync_meta(&meta_dir, &backend, &loader, &opts, &[]).expect("ok");
1898 assert_eq!(report.metas_visited, 1);
1899 assert!(report.phase1_classifications.is_empty());
1900 assert!(report.phase2_pruned.is_empty());
1901 assert!(report.errors.is_empty());
1902 assert!(backend.calls().is_empty(), "no children → no git ops");
1903 }
1904
1905 /// Phase 1 classifies each child. With every dest absent on disk,
1906 /// every classification is `Missing` and the backend sees one
1907 /// `Clone` per child.
1908 #[test]
1909 fn test_walker_v1_2_0_phase1_classifies_each_child() {
1910 let tmp = tempfile::tempdir().unwrap();
1911 let meta_dir = tmp.path().to_path_buf();
1912 let kids = vec![
1913 child("https://example.com/a.git", "alpha"),
1914 child("https://example.com/b.git", "beta"),
1915 ];
1916 let loader =
1917 InMemLoader::new().with(meta_dir.clone(), meta_manifest_with("root", kids.clone()));
1918 let backend = InMemGit::new();
1919 let opts = SyncMetaOptions { recurse: false, ..SyncMetaOptions::default() };
1920 let report = sync_meta(&meta_dir, &backend, &loader, &opts, &[]).expect("ok");
1921 assert_eq!(report.phase1_classifications.len(), 2);
1922 for (parent, _, class) in &report.phase1_classifications {
1923 assert_eq!(parent, &meta_dir);
1924 assert_eq!(*class, DestClass::Missing);
1925 }
1926 assert!(report.errors.is_empty());
1927 let calls = backend.calls();
1928 assert_eq!(calls.len(), 2, "one clone per child");
1929 for call in calls {
1930 assert!(matches!(call, BackendCall::Clone { .. }));
1931 }
1932 }
1933
1934 /// Phase 1 must aggregate every undeclared `.git/` directory it
1935 /// encounters into a single `UntrackedGitRepos` error. We
1936 /// pre-create two `.git/` slots BEFORE running `sync_meta` and
1937 /// declare them as siblings without paths matching — they classify
1938 /// as `PresentUndeclared` because the manifest does not list them.
1939 #[test]
1940 fn test_walker_v1_2_0_phase1_aggregates_untracked_error() {
1941 // Build a meta whose manifest declares ZERO children — every
1942 // pre-existing `.git/` slot is by definition undeclared.
1943 // Then drop two `.git/` directories under the meta dir and
1944 // (because v1.2.0's classifier needs the manifest declaration
1945 // signal at the call site, not on-disk discovery) run a
1946 // PARALLEL classifier sweep over the on-disk dirs to feed the
1947 // aggregator. This mirrors the way 1.h's lockfile-orphan
1948 // sweep will surface PresentUndeclared dirs into Phase 1's
1949 // collector when a child is removed from the manifest.
1950 let tmp = tempfile::tempdir().unwrap();
1951 let alpha = tmp.path().join("alpha");
1952 let beta = tmp.path().join("beta");
1953 std::fs::create_dir_all(alpha.join(".git")).unwrap();
1954 std::fs::create_dir_all(beta.join(".git")).unwrap();
1955 // Direct unit on the aggregator: feed two `PresentUndeclared`
1956 // pairs and assert the error carries both.
1957 let pairs: Vec<(PathBuf, DestClass)> = vec![
1958 (alpha.clone(), DestClass::PresentUndeclared),
1959 (beta.clone(), DestClass::PresentUndeclared),
1960 ];
1961 let err = aggregate_untracked(pairs).expect_err("two undeclared → error");
1962 match err {
1963 TreeError::UntrackedGitRepos { paths } => {
1964 assert_eq!(paths, vec![alpha, beta]);
1965 }
1966 other => panic!("expected UntrackedGitRepos, got {other:?}"),
1967 }
1968 }
1969
1970 /// Phase 2 prunes a clean orphan: the supplied candidate has a
1971 /// real `.git/` (initialised by `git init`), the consent walk
1972 /// returns Clean, the dest is removed.
1973 #[test]
1974 fn test_walker_v1_2_0_phase2_prunes_clean_orphans() {
1975 if !host_has_git_binary() {
1976 return;
1977 }
1978 let tmp = tempfile::tempdir().unwrap();
1979 let meta_dir = tmp.path().to_path_buf();
1980 // Create the orphan dest — clean repo, no manifest entry.
1981 let orphan = meta_dir.join("ghost");
1982 std::fs::create_dir_all(&orphan).unwrap();
1983 let init =
1984 std::process::Command::new("git").arg("-C").arg(&orphan).args(["init", "-q"]).status();
1985 if !matches!(init, Ok(s) if s.success()) {
1986 return;
1987 }
1988 let loader = InMemLoader::new().with(meta_dir.clone(), meta_manifest_with("root", vec![]));
1989 let backend = InMemGit::new();
1990 let opts = SyncMetaOptions { recurse: false, ..SyncMetaOptions::default() };
1991 let prune_list = vec![PathBuf::from("ghost")];
1992 let report = sync_meta(&meta_dir, &backend, &loader, &opts, &prune_list).expect("ok");
1993 assert_eq!(report.phase2_pruned.len(), 1, "clean orphan must be pruned");
1994 assert_eq!(report.phase2_pruned[0], orphan);
1995 assert!(!orphan.exists(), "dest must be removed after a clean prune");
1996 assert!(report.errors.is_empty());
1997 }
1998
1999 /// Phase 2 must REFUSE to prune a dirty orphan absent the override
2000 /// flag. The consent walk classifies it `DirtyTree`; the walker
2001 /// surfaces `DirtyTreeRefusal` and leaves the dest untouched.
2002 #[test]
2003 fn test_walker_v1_2_0_phase2_refuses_dirty_orphan() {
2004 if !host_has_git_binary() {
2005 return;
2006 }
2007 let tmp = tempfile::tempdir().unwrap();
2008 let meta_dir = tmp.path().to_path_buf();
2009 let orphan = meta_dir.join("dirty-ghost");
2010 std::fs::create_dir_all(&orphan).unwrap();
2011 let init =
2012 std::process::Command::new("git").arg("-C").arg(&orphan).args(["init", "-q"]).status();
2013 if !matches!(init, Ok(s) if s.success()) {
2014 return;
2015 }
2016 std::fs::write(orphan.join("scratch.txt"), b"unsaved").unwrap();
2017 let loader = InMemLoader::new().with(meta_dir.clone(), meta_manifest_with("root", vec![]));
2018 let backend = InMemGit::new();
2019 let opts = SyncMetaOptions { recurse: false, ..SyncMetaOptions::default() };
2020 let prune_list = vec![PathBuf::from("dirty-ghost")];
2021 let report = sync_meta(&meta_dir, &backend, &loader, &opts, &prune_list).expect("ok");
2022 assert!(report.phase2_pruned.is_empty(), "dirty orphan must NOT be pruned");
2023 assert!(orphan.exists(), "dest stays on disk when refused");
2024 assert_eq!(report.errors.len(), 1);
2025 assert!(matches!(report.errors[0], TreeError::DirtyTreeRefusal { .. }));
2026 }
2027
2028 /// Phase 3 recurses into a child meta when its `.grex/pack.yaml`
2029 /// exists. The sub-meta's own `metas_visited` is folded into the
2030 /// parent's report.
2031 #[test]
2032 fn test_walker_v1_2_0_phase3_recurses_into_sub_meta() {
2033 let tmp = tempfile::tempdir().unwrap();
2034 let meta_dir = tmp.path().to_path_buf();
2035 let child_dest = meta_dir.join("sub");
2036 // Pre-materialise the sub-meta on disk so Phase 1 classifies
2037 // the dest as PresentDeclared (no clone fired) and Phase 3
2038 // sees a `.grex/pack.yaml` to recurse into.
2039 make_sub_meta_on_disk(&child_dest, "sub");
2040 let loader = InMemLoader::new()
2041 .with(
2042 meta_dir.clone(),
2043 meta_manifest_with("root", vec![child("https://example.com/sub.git", "sub")]),
2044 )
2045 .with(child_dest.clone(), meta_manifest_with("sub", vec![]));
2046 let backend = InMemGit::new();
2047 let opts = SyncMetaOptions::default();
2048 let report = sync_meta(&meta_dir, &backend, &loader, &opts, &[]).expect("ok");
2049 assert_eq!(report.metas_visited, 2, "parent + sub-meta visited");
2050 assert!(report.errors.is_empty());
2051 }
2052
2053 /// `recurse: false` skips Phase 3 entirely — `metas_visited == 1`
2054 /// even when a child has a `.grex/pack.yaml`.
2055 #[test]
2056 fn test_walker_v1_2_0_phase3_max_depth_zero_skips_recursion() {
2057 let tmp = tempfile::tempdir().unwrap();
2058 let meta_dir = tmp.path().to_path_buf();
2059 let child_dest = meta_dir.join("sub");
2060 make_sub_meta_on_disk(&child_dest, "sub");
2061 let loader = InMemLoader::new()
2062 .with(
2063 meta_dir.clone(),
2064 meta_manifest_with("root", vec![child("https://example.com/sub.git", "sub")]),
2065 )
2066 .with(child_dest.clone(), meta_manifest_with("sub", vec![]));
2067 let backend = InMemGit::new();
2068 let opts = SyncMetaOptions { recurse: false, ..SyncMetaOptions::default() };
2069 let report = sync_meta(&meta_dir, &backend, &loader, &opts, &[]).expect("ok");
2070 assert_eq!(report.metas_visited, 1, "no recursion → only the root meta");
2071 }
2072
2073 /// `max_depth: Some(N)` caps recursion at N levels of nesting.
2074 /// Build a 3-level chain (root → mid → leaf) and assert
2075 /// `max_depth: Some(1)` visits root + mid (depth 0 + 1) but NOT
2076 /// leaf (depth 2).
2077 #[test]
2078 fn test_walker_v1_2_0_phase3_max_depth_n_stops_at_n_levels() {
2079 let tmp = tempfile::tempdir().unwrap();
2080 let root_dir = tmp.path().to_path_buf();
2081 let mid_dir = root_dir.join("mid");
2082 let leaf_dir = mid_dir.join("leaf");
2083 make_sub_meta_on_disk(&mid_dir, "mid");
2084 make_sub_meta_on_disk(&leaf_dir, "leaf");
2085 let loader = InMemLoader::new()
2086 .with(
2087 root_dir.clone(),
2088 meta_manifest_with("root", vec![child("https://example.com/mid.git", "mid")]),
2089 )
2090 .with(
2091 mid_dir.clone(),
2092 meta_manifest_with("mid", vec![child("https://example.com/leaf.git", "leaf")]),
2093 )
2094 .with(leaf_dir.clone(), meta_manifest_with("leaf", vec![]));
2095 let backend = InMemGit::new();
2096 let opts = SyncMetaOptions { max_depth: Some(1), ..SyncMetaOptions::default() };
2097 let report = sync_meta(&root_dir, &backend, &loader, &opts, &[]).expect("ok");
2098 // depth 0 = root, depth 1 = mid → max_depth: Some(1) visits
2099 // root + mid (2 metas) and stops before recursing into leaf.
2100 assert_eq!(report.metas_visited, 2, "max_depth: Some(1) visits root + mid only");
2101 }
2102
2103 /// Helper: pre-populate a sub-meta directory at `dir` with a
2104 /// `.grex/pack.yaml` carrying `name` and a stub `.git/` so the
2105 /// classifier sees it as PresentDeclared.
2106 fn make_sub_meta_on_disk(dir: &Path, name: &str) {
2107 std::fs::create_dir_all(dir.join(".grex")).unwrap();
2108 std::fs::create_dir_all(dir.join(".git")).unwrap();
2109 let yaml = format!("schema_version: \"1\"\nname: {name}\ntype: meta\n");
2110 std::fs::write(dir.join(".grex/pack.yaml"), yaml).unwrap();
2111 }
2112
2113 /// Helper: collect the destinations Phase 1 recorded for a given
2114 /// parent meta from the rolled-up report.
2115 fn destinations_under(report: &SyncMetaReport, parent: &Path) -> Vec<PathBuf> {
2116 report
2117 .phase1_classifications
2118 .iter()
2119 .filter(|(p, _, _)| p == parent)
2120 .map(|(_, d, _)| d.clone())
2121 .collect()
2122 }
2123
2124 /// Parent-relative path resolution: a child declared at the root
2125 /// meta resolves to `<root>/<child>` — NOT to a global workspace
2126 /// anchor. Recursion into that child uses `<root>/<child>` as the
2127 /// new parent meta dir for resolving the grandchild.
2128 #[test]
2129 fn test_walker_v1_2_0_parent_relative_path_resolution() {
2130 let tmp = tempfile::tempdir().unwrap();
2131 let root_dir = tmp.path().to_path_buf();
2132 // Note: 1.c's path-segment validator forbids slashes in the
2133 // `path:` field, so multi-segment nesting is achieved by
2134 // chaining single-segment children across recursion frames.
2135 let tools_dir = root_dir.join("tools");
2136 let foo_dir = tools_dir.join("foo");
2137 make_sub_meta_on_disk(&tools_dir, "tools");
2138 make_sub_meta_on_disk(&foo_dir, "foo");
2139 let loader = InMemLoader::new()
2140 .with(
2141 root_dir.clone(),
2142 meta_manifest_with("root", vec![child("https://example.com/tools.git", "tools")]),
2143 )
2144 .with(
2145 tools_dir.clone(),
2146 meta_manifest_with("tools", vec![child("https://example.com/foo.git", "foo")]),
2147 )
2148 .with(foo_dir.clone(), meta_manifest_with("foo", vec![]));
2149 let backend = InMemGit::new();
2150 let opts = SyncMetaOptions::default();
2151 let report = sync_meta(&root_dir, &backend, &loader, &opts, &[]).expect("ok");
2152 // Three metas visited: root → tools → foo.
2153 assert_eq!(report.metas_visited, 3);
2154 // Phase 1 classifications confirm parent-relative resolution:
2155 // every recorded dest is a SUBDIR of its recorded parent.
2156 for (parent, dest, _class) in &report.phase1_classifications {
2157 assert!(
2158 dest.starts_with(parent),
2159 "child dest {} must descend from parent {}",
2160 dest.display(),
2161 parent.display()
2162 );
2163 }
2164 // Spot-check the chain: root sees `tools`, tools sees `foo`.
2165 assert_eq!(destinations_under(&report, &root_dir), vec![tools_dir.clone()]);
2166 assert_eq!(destinations_under(&report, &tools_dir), vec![foo_dir.clone()]);
2167 }
2168
2169 // -----------------------------------------------------------------
2170 // v1.2.2 — `sync_meta` cycle detection (Phase 3 recursion edge).
2171 //
2172 // Discharges `sync_meta_no_cycle_infinite_clone` in
2173 // `proof/Grex/Walker.lean`. Identity scheme is `url@ref` so the
2174 // same repo at two different refs is NOT a cycle (covered by the
2175 // positive case below).
2176 // -----------------------------------------------------------------
2177
2178 /// `child_with_ref` mirrors `child()` but lets the caller pin a
2179 /// specific ref so two children of the same URL get distinct
2180 /// `pack_identity_for_child` strings (`url@ref`).
2181 fn child_with_ref(url: &str, path: &str, r#ref: &str) -> ChildRef {
2182 ChildRef {
2183 url: url.to_string(),
2184 path: Some(path.to_string()),
2185 r#ref: Some(r#ref.to_string()),
2186 }
2187 }
2188
2189 /// Self-loop: pack A declares itself (same URL, no ref) as a child.
2190 /// The walker must abort with `CycleDetected` rather than recurse
2191 /// infinitely. The chain reports the recurring identity.
2192 #[test]
2193 fn cycle_self_loop_aborts() {
2194 let tmp = tempfile::tempdir().unwrap();
2195 let root_dir = tmp.path().to_path_buf();
2196 // Lay out a self-pointing pack: `<root>/a` is a sub-meta whose
2197 // own manifest declares a child with the SAME URL/ref pointing
2198 // back at itself (placed at a fresh path so on-disk dest is
2199 // distinct, but pack identity collides).
2200 let a_dir = root_dir.join("a");
2201 let a_self_dir = a_dir.join("a");
2202 make_sub_meta_on_disk(&a_dir, "a");
2203 make_sub_meta_on_disk(&a_self_dir, "a");
2204 let url_a = "https://example.com/a.git";
2205 let loader = InMemLoader::new()
2206 .with(root_dir.clone(), meta_manifest_with("root", vec![child(url_a, "a")]))
2207 // `a` declares itself — same url, same (empty) ref → same identity.
2208 .with(a_dir.clone(), meta_manifest_with("a", vec![child(url_a, "a")]))
2209 .with(a_self_dir.clone(), meta_manifest_with("a", vec![]));
2210 let backend = InMemGit::new();
2211 let opts = SyncMetaOptions::default();
2212 let err =
2213 sync_meta(&root_dir, &backend, &loader, &opts, &[]).expect_err("self-loop must abort");
2214 match err {
2215 TreeError::CycleDetected { chain } => {
2216 // v1.2.3 (B4): chain begins with the root's
2217 // path-namespaced identity (`path:<root_dir>`) — the
2218 // initial visited seed — followed by the cyclic
2219 // child identities. v1.2.3 (B2): empty/None ref drops
2220 // the trailing `@`, so the cyclic id is just
2221 // `url:<url_a>` (no `@`).
2222 let id_a = format!("url:{url_a}");
2223 assert!(
2224 chain.iter().any(|s| s == &id_a),
2225 "chain must mention the cyclic url, got {chain:?}"
2226 );
2227 assert!(chain.len() >= 2, "self-loop chain has at least 2 entries: {chain:?}");
2228 let last = chain.last().unwrap();
2229 assert_eq!(last, &id_a, "chain must end with the recurring child identity");
2230 let first_match = chain.iter().position(|s| s == last).unwrap();
2231 assert!(
2232 first_match < chain.len() - 1,
2233 "the recurring identity must appear earlier in the chain: {chain:?}"
2234 );
2235 // The root frame is path-namespaced and disjoint from
2236 // any child's url-namespaced identity, so it must
2237 // appear at the head of the chain without colliding.
2238 assert!(
2239 chain[0].starts_with("path:"),
2240 "chain head is the root path identity: {chain:?}"
2241 );
2242 }
2243 other => panic!("expected CycleDetected, got {other:?}"),
2244 }
2245 }
2246
2247 /// Three-node cycle: A → B → C → A. The walker must abort with
2248 /// `CycleDetected` and the chain must list all three identities
2249 /// in the order they were entered, ending with the recurring A.
2250 #[test]
2251 fn cycle_three_node_aborts() {
2252 let tmp = tempfile::tempdir().unwrap();
2253 let root_dir = tmp.path().to_path_buf();
2254 // Disk layout: root → a → b → c → a (the second `a` lives at
2255 // a fresh on-disk slot so classification succeeds; identity
2256 // collision is what trips the cycle detector, not the path).
2257 let a_dir = root_dir.join("a");
2258 let b_dir = a_dir.join("b");
2259 let c_dir = b_dir.join("c");
2260 let a2_dir = c_dir.join("a");
2261 make_sub_meta_on_disk(&a_dir, "a");
2262 make_sub_meta_on_disk(&b_dir, "b");
2263 make_sub_meta_on_disk(&c_dir, "c");
2264 make_sub_meta_on_disk(&a2_dir, "a");
2265 let url_a = "https://example.com/a.git";
2266 let url_b = "https://example.com/b.git";
2267 let url_c = "https://example.com/c.git";
2268 let loader = InMemLoader::new()
2269 .with(root_dir.clone(), meta_manifest_with("root", vec![child(url_a, "a")]))
2270 .with(a_dir.clone(), meta_manifest_with("a", vec![child(url_b, "b")]))
2271 .with(b_dir.clone(), meta_manifest_with("b", vec![child(url_c, "c")]))
2272 // c re-declares a → cycle.
2273 .with(c_dir.clone(), meta_manifest_with("c", vec![child(url_a, "a")]))
2274 .with(a2_dir.clone(), meta_manifest_with("a", vec![]));
2275 let backend = InMemGit::new();
2276 let opts = SyncMetaOptions::default();
2277 let err = sync_meta(&root_dir, &backend, &loader, &opts, &[])
2278 .expect_err("three-node cycle must abort");
2279 match err {
2280 TreeError::CycleDetected { chain } => {
2281 // v1.2.3 (B4): chain leads with the root's
2282 // path-namespaced identity. v1.2.3 (B2): empty/None
2283 // ref drops the trailing `@`. Chain order:
2284 // [path:root, a, b, c, a] (entry order, with the
2285 // recurring `a` appended at the cycle-detection point).
2286 let id_root = pack_identity_for_root(&root_dir);
2287 let id_a = format!("url:{url_a}");
2288 let id_b = format!("url:{url_b}");
2289 let id_c = format!("url:{url_c}");
2290 assert_eq!(chain, vec![id_root, id_a.clone(), id_b, id_c, id_a]);
2291 }
2292 other => panic!("expected CycleDetected, got {other:?}"),
2293 }
2294 }
2295
2296 /// Same repo, two refs — NOT a cycle. Pack A declares two children
2297 /// pointing at the SAME URL but pinned to different refs (`main`
2298 /// vs `dev`). Identity scheme is `url@ref` so the two siblings
2299 /// have distinct identities and the walker must succeed.
2300 #[test]
2301 fn same_repo_two_refs_no_cycle() {
2302 let tmp = tempfile::tempdir().unwrap();
2303 let root_dir = tmp.path().to_path_buf();
2304 let main_dir = root_dir.join("b-main");
2305 let dev_dir = root_dir.join("b-dev");
2306 make_sub_meta_on_disk(&main_dir, "b-main");
2307 make_sub_meta_on_disk(&dev_dir, "b-dev");
2308 let url_b = "https://example.com/b.git";
2309 let loader = InMemLoader::new()
2310 .with(
2311 root_dir.clone(),
2312 meta_manifest_with(
2313 "root",
2314 vec![
2315 child_with_ref(url_b, "b-main", "main"),
2316 child_with_ref(url_b, "b-dev", "dev"),
2317 ],
2318 ),
2319 )
2320 .with(main_dir.clone(), meta_manifest_with("b-main", vec![]))
2321 .with(dev_dir.clone(), meta_manifest_with("b-dev", vec![]));
2322 let backend = InMemGit::new();
2323 let opts = SyncMetaOptions::default();
2324 let report = sync_meta(&root_dir, &backend, &loader, &opts, &[])
2325 .expect("same url at distinct refs is NOT a cycle");
2326 // Three metas visited: root + b@main + b@dev.
2327 assert_eq!(report.metas_visited, 3);
2328 assert!(
2329 report.errors.is_empty(),
2330 "no errors expected when the two children differ only by ref: {:?}",
2331 report.errors
2332 );
2333 }
2334
2335 /// Same repo, two refs — NESTED (ancestor-stack) variant. Pack A
2336 /// (URL=foo, ref=main) declares pack B (URL=foo, ref=dev) as its
2337 /// child. Identity scheme is `url@ref`, so A's identity
2338 /// (`url:foo@main`) and B's identity (`url:foo@dev`) differ. The
2339 /// cycle detector must NOT trip even though B's URL collides with
2340 /// an ancestor on the stack — exercises the path the sibling
2341 /// variant above doesn't reach.
2342 #[test]
2343 fn same_repo_two_refs_nested_no_cycle() {
2344 let tmp = tempfile::tempdir().unwrap();
2345 let root_dir = tmp.path().to_path_buf();
2346 let a_dir = root_dir.join("a");
2347 let b_dir = a_dir.join("b");
2348 make_sub_meta_on_disk(&a_dir, "a");
2349 make_sub_meta_on_disk(&b_dir, "b");
2350 let url_foo = "https://example.com/foo.git";
2351 let loader = InMemLoader::new()
2352 .with(
2353 root_dir.clone(),
2354 meta_manifest_with("root", vec![child_with_ref(url_foo, "a", "main")]),
2355 )
2356 // a (foo@main) declares b (foo@dev) — same URL, different ref.
2357 .with(a_dir.clone(), meta_manifest_with("a", vec![child_with_ref(url_foo, "b", "dev")]))
2358 .with(b_dir.clone(), meta_manifest_with("b", vec![]));
2359 let backend = InMemGit::new();
2360 let opts = SyncMetaOptions::default();
2361 let report = sync_meta(&root_dir, &backend, &loader, &opts, &[])
2362 .expect("nested same-url at distinct refs is NOT a cycle");
2363 // Walker must reach depth 2: root → a → b (3 metas).
2364 assert_eq!(report.metas_visited, 3, "walker must recurse to depth 2");
2365 assert!(
2366 report.errors.is_empty(),
2367 "no errors expected when ancestor and descendant differ only by ref: {:?}",
2368 report.errors
2369 );
2370 }
2371
2372 // -----------------------------------------------------------------
2373 // v1.2.3 — additional cycle/diamond coverage (T1, T2, T3).
2374 //
2375 // T1 covers the diamond-shared-descendant case the
2376 // clone-per-child scheme is meant to permit; T2 stretches the
2377 // cycle to length 4 to exercise chain accumulation; T3 verifies
2378 // the cycle detector sees a cycle introduced inside an inner
2379 // subtree even though the outer arm is acyclic.
2380 // -----------------------------------------------------------------
2381
2382 /// T1 — Diamond, NO cycle. Topology:
2383 ///
2384 /// ```text
2385 /// root → A
2386 /// root → B
2387 /// A → C
2388 /// B → C (C is a shared descendant)
2389 /// ```
2390 ///
2391 /// Walker must traverse all four packs and produce no
2392 /// `CycleDetected`. Because the cycle detector clones the
2393 /// ancestor chain per child, A's descendants do not poison B's
2394 /// descendant view, so seeing `C` from both arms is a diamond,
2395 /// not a cycle.
2396 ///
2397 /// **v1.2.4 T1-spot-check extension.** In addition to the
2398 /// `metas_visited == 5` count assertion, this test also confirms
2399 /// that C is genuinely walked through BOTH arms: the
2400 /// `phase1_classifications` table must record one entry whose
2401 /// parent is `a/` (with dest `a/c`) AND one entry whose parent is
2402 /// `b/` (with dest `b/c`). Counting alone (`metas_visited`) cannot
2403 /// catch a regression where a future memoization optimization
2404 /// collapses the second walk into a no-op while still incrementing
2405 /// the counter — tracking the actual dest paths does.
2406 #[test]
2407 fn cycle_diamond_shared_descendant_no_cycle() {
2408 let tmp = tempfile::tempdir().unwrap();
2409 let root_dir = tmp.path().to_path_buf();
2410 // Disk layout: root/a, root/b, root/a/c, root/b/c.
2411 // Each `c` lives at a distinct on-disk slot so classify
2412 // succeeds; identity equality is what would (incorrectly)
2413 // trip the cycle detector if clone-per-child were broken.
2414 let a_dir = root_dir.join("a");
2415 let b_dir = root_dir.join("b");
2416 let c_under_a_dir = a_dir.join("c");
2417 let c_under_b_dir = b_dir.join("c");
2418 make_sub_meta_on_disk(&a_dir, "a");
2419 make_sub_meta_on_disk(&b_dir, "b");
2420 make_sub_meta_on_disk(&c_under_a_dir, "c");
2421 make_sub_meta_on_disk(&c_under_b_dir, "c");
2422 let url_a = "https://example.com/a.git";
2423 let url_b = "https://example.com/b.git";
2424 let url_c = "https://example.com/c.git";
2425 let loader = InMemLoader::new()
2426 .with(
2427 root_dir.clone(),
2428 meta_manifest_with("root", vec![child(url_a, "a"), child(url_b, "b")]),
2429 )
2430 .with(a_dir.clone(), meta_manifest_with("a", vec![child(url_c, "c")]))
2431 .with(b_dir.clone(), meta_manifest_with("b", vec![child(url_c, "c")]))
2432 .with(c_under_a_dir.clone(), meta_manifest_with("c", vec![]))
2433 .with(c_under_b_dir.clone(), meta_manifest_with("c", vec![]));
2434 let backend = InMemGit::new();
2435 let opts = SyncMetaOptions::default();
2436 let report =
2437 sync_meta(&root_dir, &backend, &loader, &opts, &[]).expect("diamond is NOT a cycle");
2438 // Four distinct manifest visits: root, a, b, c-via-a, c-via-b.
2439 // A and B both expand into their own `c`, so the walker
2440 // visits `c` twice (once per arm) — five `metas_visited`.
2441 assert_eq!(
2442 report.metas_visited, 5,
2443 "diamond: root + a + b + c-under-a + c-under-b = 5 visits"
2444 );
2445 // Crucially, no errors of any kind — and certainly not a
2446 // CycleDetected — because the two `C` visits live on
2447 // disjoint cloned ancestor chains.
2448 assert!(
2449 !report.errors.iter().any(|e| matches!(e, TreeError::CycleDetected { .. })),
2450 "diamond must not surface CycleDetected; errors={:?}",
2451 report.errors
2452 );
2453 assert!(report.errors.is_empty(), "diamond should produce no errors: {:?}", report.errors);
2454
2455 // v1.2.4 T1-spot-check: assert `c` was genuinely walked under
2456 // BOTH arms. The phase1_classifications table records every
2457 // (parent_meta, dest, class) triple observed during Phase 1
2458 // dispatch; for the diamond layout we expect:
2459 // * (root, a) — a is a direct child of root
2460 // * (root, b) — b is a direct child of root
2461 // * (a, a/c) — c-via-a (Phase 1 inside a's recursion)
2462 // * (b, b/c) — c-via-b (Phase 1 inside b's recursion)
2463 // If a future memoization regression collapses the second `c`
2464 // walk into a no-op while still incrementing `metas_visited`,
2465 // the (b, b/c) pair will be missing from this table and the
2466 // assertion below fails.
2467 let dests_under_a = destinations_under(&report, &a_dir);
2468 let dests_under_b = destinations_under(&report, &b_dir);
2469 assert!(
2470 dests_under_a.iter().any(|d| d == &c_under_a_dir),
2471 "diamond: expected c-via-a in classifications under a, got {dests_under_a:?}"
2472 );
2473 assert!(
2474 dests_under_b.iter().any(|d| d == &c_under_b_dir),
2475 "diamond: expected c-via-b in classifications under b, got {dests_under_b:?}"
2476 );
2477 assert_ne!(
2478 c_under_a_dir, c_under_b_dir,
2479 "the two `c` visits must land on distinct on-disk dests"
2480 );
2481 }
2482
2483 /// T2 — 4-node cycle: `root → A → B → C → D → A`. Cycle length 4
2484 /// in pack-identity terms; the reported chain has length 5 once
2485 /// the recurring `A` is appended at detection. The root frame's
2486 /// `path:` identity also leads the chain (B4), so the final
2487 /// length is 6.
2488 #[test]
2489 #[allow(clippy::too_many_lines)]
2490 fn cycle_four_node_aborts() {
2491 let tmp = tempfile::tempdir().unwrap();
2492 let root_dir = tmp.path().to_path_buf();
2493 // Disk chain: root → a → b → c → d → a (the second `a` lives
2494 // at a fresh slot so classify succeeds; identity collision is
2495 // what trips the cycle detector).
2496 let a_dir = root_dir.join("a");
2497 let b_dir = a_dir.join("b");
2498 let c_dir = b_dir.join("c");
2499 let d_dir = c_dir.join("d");
2500 let a2_dir = d_dir.join("a");
2501 make_sub_meta_on_disk(&a_dir, "a");
2502 make_sub_meta_on_disk(&b_dir, "b");
2503 make_sub_meta_on_disk(&c_dir, "c");
2504 make_sub_meta_on_disk(&d_dir, "d");
2505 make_sub_meta_on_disk(&a2_dir, "a");
2506 let url_a = "https://example.com/a.git";
2507 let url_b = "https://example.com/b.git";
2508 let url_c = "https://example.com/c.git";
2509 let url_d = "https://example.com/d.git";
2510 let loader = InMemLoader::new()
2511 .with(root_dir.clone(), meta_manifest_with("root", vec![child(url_a, "a")]))
2512 .with(a_dir.clone(), meta_manifest_with("a", vec![child(url_b, "b")]))
2513 .with(b_dir.clone(), meta_manifest_with("b", vec![child(url_c, "c")]))
2514 .with(c_dir.clone(), meta_manifest_with("c", vec![child(url_d, "d")]))
2515 // d re-declares a → cycle of length 4 in url-namespace.
2516 .with(d_dir.clone(), meta_manifest_with("d", vec![child(url_a, "a")]))
2517 .with(a2_dir.clone(), meta_manifest_with("a", vec![]));
2518 let backend = InMemGit::new();
2519 let opts = SyncMetaOptions::default();
2520 let err = sync_meta(&root_dir, &backend, &loader, &opts, &[])
2521 .expect_err("four-node cycle must abort");
2522 match err {
2523 TreeError::CycleDetected { chain } => {
2524 let id_root = pack_identity_for_root(&root_dir);
2525 let id_a = format!("url:{url_a}");
2526 let id_b = format!("url:{url_b}");
2527 let id_c = format!("url:{url_c}");
2528 let id_d = format!("url:{url_d}");
2529 // [path:root, a, b, c, d, a] — six entries.
2530 assert_eq!(
2531 chain,
2532 vec![id_root, id_a.clone(), id_b, id_c, id_d, id_a.clone()],
2533 "expected full ancestor chain ending in the recurring A"
2534 );
2535 assert!(
2536 chain.len() >= 5,
2537 "four-node cycle chain has at least 5 entries: {chain:?}"
2538 );
2539 // Last element repeats earlier in the chain (the
2540 // recurring identity).
2541 let last = chain.last().unwrap();
2542 let first_match = chain.iter().position(|s| s == last).unwrap();
2543 assert!(
2544 first_match < chain.len() - 1,
2545 "the recurring identity must appear earlier in the chain: {chain:?}"
2546 );
2547 }
2548 other => panic!("expected CycleDetected, got {other:?}"),
2549 }
2550 }
2551
2552 /// T3 — Nested-prefix cycle. Outer arm `root → A → B → C` is
2553 /// acyclic; the cycle lives inside B's other child `D`, which
2554 /// loops back to B (`B → D → B`). The walker must surface
2555 /// `CycleDetected` and the cycle should appear inside the
2556 /// subtree (not at the root level), with B as the recurring
2557 /// identity.
2558 ///
2559 /// Specifically: A's children = [B], B's children = [C, D], C
2560 /// has no children, D's children = [B] (cycle).
2561 #[test]
2562 #[allow(clippy::too_many_lines)]
2563 fn cycle_nested_prefix_aborts() {
2564 let tmp = tempfile::tempdir().unwrap();
2565 let root_dir = tmp.path().to_path_buf();
2566 // Disk layout: root/a, root/a/b, root/a/b/c (acyclic arm),
2567 // root/a/b/d (cycle arm), root/a/b/d/b (D loops back to B —
2568 // identity collision; on-disk path is fresh so classify
2569 // succeeds).
2570 let a_dir = root_dir.join("a");
2571 let b_dir = a_dir.join("b");
2572 let c_dir = b_dir.join("c");
2573 let d_dir = b_dir.join("d");
2574 let b2_dir = d_dir.join("b");
2575 make_sub_meta_on_disk(&a_dir, "a");
2576 make_sub_meta_on_disk(&b_dir, "b");
2577 make_sub_meta_on_disk(&c_dir, "c");
2578 make_sub_meta_on_disk(&d_dir, "d");
2579 make_sub_meta_on_disk(&b2_dir, "b");
2580 let url_a = "https://example.com/a.git";
2581 let url_b = "https://example.com/b.git";
2582 let url_c = "https://example.com/c.git";
2583 let url_d = "https://example.com/d.git";
2584 let loader = InMemLoader::new()
2585 .with(root_dir.clone(), meta_manifest_with("root", vec![child(url_a, "a")]))
2586 .with(a_dir.clone(), meta_manifest_with("a", vec![child(url_b, "b")]))
2587 // b has both an acyclic child (c) and a cyclic one (d).
2588 .with(
2589 b_dir.clone(),
2590 meta_manifest_with("b", vec![child(url_c, "c"), child(url_d, "d")]),
2591 )
2592 .with(c_dir.clone(), meta_manifest_with("c", vec![]))
2593 // d re-declares b → cycle inside the b/d subtree.
2594 .with(d_dir.clone(), meta_manifest_with("d", vec![child(url_b, "b")]))
2595 .with(b2_dir.clone(), meta_manifest_with("b", vec![]));
2596 let backend = InMemGit::new();
2597 let opts = SyncMetaOptions::default();
2598 let err = sync_meta(&root_dir, &backend, &loader, &opts, &[])
2599 .expect_err("nested-prefix cycle must abort");
2600 match err {
2601 TreeError::CycleDetected { chain } => {
2602 let id_root = pack_identity_for_root(&root_dir);
2603 let id_a = format!("url:{url_a}");
2604 let id_b = format!("url:{url_b}");
2605 let id_d = format!("url:{url_d}");
2606 // The cycle hits inside the subtree at depth 4:
2607 // [path:root, a, b, d, b].
2608 assert_eq!(
2609 chain,
2610 vec![id_root.clone(), id_a, id_b.clone(), id_d, id_b.clone()],
2611 "cycle should appear inside the subtree, not at the top"
2612 );
2613 // Recurring identity is `b`, and it does NOT appear
2614 // at the chain's outermost position — the root path
2615 // identity does. This verifies the cycle is "inside"
2616 // the tree.
2617 let last = chain.last().unwrap();
2618 assert_eq!(last, &id_b, "recurring identity is B");
2619 assert_ne!(
2620 chain.first().unwrap(),
2621 last,
2622 "cycle must not start at the root frame: {chain:?}"
2623 );
2624 assert_eq!(
2625 chain.first().unwrap(),
2626 &id_root,
2627 "chain must begin with the root path identity: {chain:?}"
2628 );
2629 }
2630 other => panic!("expected CycleDetected, got {other:?}"),
2631 }
2632 }
2633
2634 /// B1 regression: max_depth must NOT mask cycle detection. Cycle
2635 /// check fires before depth-cap return in phase3_handle_child.
2636 ///
2637 /// Topology: same 4-node cycle as `cycle_four_node_aborts`
2638 /// (`root → A → B → C → D → A`). Recurring `A` is reached at
2639 /// `next_depth = 5`. With `max_depth: Some(4)`, the depth cap
2640 /// would skip the recurring frame BEFORE it can be tested for
2641 /// cycle membership — *if* B1 were reverted (i.e. depth-cap
2642 /// early-return placed before the cycle check). The current
2643 /// ordering (cycle-then-depth-cap, see `phase3_handle_child`)
2644 /// surfaces `CycleDetected` regardless of the cap.
2645 ///
2646 /// If anyone reverts B1's reorder, this test fails: the walker
2647 /// returns `Ok(_)` instead of `Err(CycleDetected)` because the
2648 /// recurring frame is silently truncated.
2649 #[test]
2650 fn cycle_aborts_under_max_depth_cap() {
2651 let tmp = tempfile::tempdir().unwrap();
2652 let root_dir = tmp.path().to_path_buf();
2653 let a_dir = root_dir.join("a");
2654 let b_dir = a_dir.join("b");
2655 let c_dir = b_dir.join("c");
2656 let d_dir = c_dir.join("d");
2657 let a2_dir = d_dir.join("a");
2658 make_sub_meta_on_disk(&a_dir, "a");
2659 make_sub_meta_on_disk(&b_dir, "b");
2660 make_sub_meta_on_disk(&c_dir, "c");
2661 make_sub_meta_on_disk(&d_dir, "d");
2662 make_sub_meta_on_disk(&a2_dir, "a");
2663 let url_a = "https://example.com/a.git";
2664 let url_b = "https://example.com/b.git";
2665 let url_c = "https://example.com/c.git";
2666 let url_d = "https://example.com/d.git";
2667 let loader = InMemLoader::new()
2668 .with(root_dir.clone(), meta_manifest_with("root", vec![child(url_a, "a")]))
2669 .with(a_dir.clone(), meta_manifest_with("a", vec![child(url_b, "b")]))
2670 .with(b_dir.clone(), meta_manifest_with("b", vec![child(url_c, "c")]))
2671 .with(c_dir.clone(), meta_manifest_with("c", vec![child(url_d, "d")]))
2672 .with(d_dir.clone(), meta_manifest_with("d", vec![child(url_a, "a")]))
2673 .with(a2_dir.clone(), meta_manifest_with("a", vec![]));
2674 let backend = InMemGit::new();
2675 // max_depth: Some(4) — the recurring A frame would land at
2676 // next_depth=5, which exceeds the cap. With B1, the cycle
2677 // check still fires first; without B1, the cap would skip
2678 // before the cycle is detected.
2679 let opts = SyncMetaOptions { max_depth: Some(4), ..SyncMetaOptions::default() };
2680 let err = sync_meta(&root_dir, &backend, &loader, &opts, &[])
2681 .expect_err("cycle must surface even when its closing frame exceeds max_depth");
2682 match err {
2683 TreeError::CycleDetected { chain } => {
2684 let id_a = format!("url:{url_a}");
2685 assert!(
2686 chain.last() == Some(&id_a),
2687 "recurring identity must be A, got chain={chain:?}"
2688 );
2689 let last = chain.last().unwrap();
2690 let first_match = chain.iter().position(|s| s == last).unwrap();
2691 assert!(
2692 first_match < chain.len() - 1,
2693 "the recurring identity must appear earlier in the chain: {chain:?}"
2694 );
2695 }
2696 other => panic!("expected CycleDetected, got {other:?}"),
2697 }
2698 }
2699
2700 /// B2 regression: `pack_identity_for_child` must NOT emit a
2701 /// trailing `@` when `r#ref` is `Some("")` (empty string). Both
2702 /// `Some("")` and `None` collapse to the bare `url:<url>` form so
2703 /// the on-the-wire identity matches the Lean model
2704 /// (`Grex.Walker.ChildRef.identity`). Without this elision two
2705 /// children that differ only in `ref: None` vs `ref: Some("")`
2706 /// would serialise the same way as `url:<url>@`, masking the
2707 /// distinction the Lean spec draws — and worse, an identity
2708 /// ending in `@` leaks an empty-ref artifact into operator
2709 /// diagnostics.
2710 #[test]
2711 fn child_identity_some_empty_ref_omits_at() {
2712 let url = "https://example.com/a.git";
2713 let with_none = ChildRef { url: url.to_string(), path: Some("a".to_string()), r#ref: None };
2714 let with_empty = ChildRef {
2715 url: url.to_string(),
2716 path: Some("a".to_string()),
2717 r#ref: Some(String::new()),
2718 };
2719 let id_none = pack_identity_for_child(&with_none);
2720 let id_empty = pack_identity_for_child(&with_empty);
2721 let expected = format!("url:{url}");
2722 assert_eq!(id_none, expected, "None ref must produce bare url identity");
2723 assert_eq!(
2724 id_empty, expected,
2725 "Some(\"\") ref must collapse to bare url identity (no trailing @)"
2726 );
2727 assert_eq!(id_none, id_empty, "Some(\"\") and None must yield the same identity");
2728 assert!(!id_empty.ends_with('@'), "identity must not end with trailing @: {id_empty:?}");
2729 }
2730
2731 // -----------------------------------------------------------------
2732 // v1.2.4 — A1 cancellation token (T-cancel).
2733 //
2734 // Discharges the Lean `cancellation_terminates_promptly` theorem
2735 // in `proof/Grex/Walker.lean`: when one sibling closure detects a
2736 // cycle and signals the per-`phase3_recurse` cancellation flag,
2737 // every subsequent in-flight sibling closure observes the flag at
2738 // its next entry and returns `Phase3ChildOutcome::Cancelled` with
2739 // zero recursive descent.
2740 // -----------------------------------------------------------------
2741
2742 /// T-cancel — sibling cancellation under cycle.
2743 ///
2744 /// Topology: `root → A`, where A has FOUR children
2745 /// `[A_cyclic, X, Y, Z]` (in this exact source order). `A_cyclic`'s
2746 /// URL collides with A's own URL, so A's `phase3_recurse` detects
2747 /// the cycle when iterating its first child. `X`, `Y`, `Z` are
2748 /// independent sub-metas, each containing a deep chain
2749 /// (`X → X1 → X2`, etc) that would inflate `metas_visited` if
2750 /// genuinely walked. With `opts.parallel = Some(1)` the rayon
2751 /// pool runs siblings serially in source order, so the cyclic
2752 /// sibling fires first, sets the flag, and `X`/`Y`/`Z` observe
2753 /// `Cancelled` at entry — none of their subtrees are walked.
2754 ///
2755 /// Determinism: `parallel: Some(1)` removes thread interleaving
2756 /// from the test surface — the cyclic arm is *guaranteed* to run
2757 /// before the acyclic siblings. The flag is checked at entry of
2758 /// `phase3_handle_child`, so any sibling that has not yet started
2759 /// observes it. `metas_visited` is the side-effect-visible counter
2760 /// used to assert the cancellation discipline: pre-cancellation
2761 /// (v1.2.3) the walker would visit every sibling subtree before
2762 /// surfacing `Err(CycleDetected)`; post-cancellation (v1.2.4)
2763 /// only the cyclic arm and its prefix contribute.
2764 ///
2765 /// Without the cancellation flag, `metas_visited` would total
2766 /// 1 (root) + 1 (A) + 3 (X, Y, Z themselves) + 6 (X1,X2,Y1,Y2,Z1,Z2)
2767 /// = 11. With the flag, X/Y/Z's `phase3_handle_child` short-circuits
2768 /// before recursing, so only root + A are recorded → 2.
2769 #[test]
2770 #[allow(clippy::too_many_lines)]
2771 fn cancellation_aborts_siblings() {
2772 let tmp = tempfile::tempdir().unwrap();
2773 let root_dir = tmp.path().to_path_buf();
2774 let a_dir = root_dir.join("a");
2775 // A_cyclic's path is a fresh slot under A so on-disk classify
2776 // succeeds; identity collision with A's URL is what trips the
2777 // cycle detector at A's `phase3_recurse`.
2778 let a_cyclic_dir = a_dir.join("a-cyclic");
2779 let x_dir = a_dir.join("x");
2780 let x1_dir = x_dir.join("x1");
2781 let x2_dir = x1_dir.join("x2");
2782 let y_dir = a_dir.join("y");
2783 let y1_dir = y_dir.join("y1");
2784 let y2_dir = y1_dir.join("y2");
2785 let z_dir = a_dir.join("z");
2786 let z1_dir = z_dir.join("z1");
2787 let z2_dir = z1_dir.join("z2");
2788 for d in [
2789 &a_dir,
2790 &a_cyclic_dir,
2791 &x_dir,
2792 &x1_dir,
2793 &x2_dir,
2794 &y_dir,
2795 &y1_dir,
2796 &y2_dir,
2797 &z_dir,
2798 &z1_dir,
2799 &z2_dir,
2800 ] {
2801 make_sub_meta_on_disk(d, d.file_name().unwrap().to_str().unwrap());
2802 }
2803
2804 let url_a = "https://example.com/a.git";
2805 let url_x = "https://example.com/x.git";
2806 let url_x1 = "https://example.com/x1.git";
2807 let url_x2 = "https://example.com/x2.git";
2808 let url_y = "https://example.com/y.git";
2809 let url_y1 = "https://example.com/y1.git";
2810 let url_y2 = "https://example.com/y2.git";
2811 let url_z = "https://example.com/z.git";
2812 let url_z1 = "https://example.com/z1.git";
2813 let url_z2 = "https://example.com/z2.git";
2814
2815 // A's children: [a-cyclic (collides with A's identity), x, y, z]
2816 // — ORDER MATTERS for determinism. With parallel: Some(1) the
2817 // cyclic arm runs first, signals, and the rest observe.
2818 let loader = InMemLoader::new()
2819 .with(root_dir.clone(), meta_manifest_with("root", vec![child(url_a, "a")]))
2820 .with(
2821 a_dir.clone(),
2822 meta_manifest_with(
2823 "a",
2824 vec![
2825 child(url_a, "a-cyclic"),
2826 child(url_x, "x"),
2827 child(url_y, "y"),
2828 child(url_z, "z"),
2829 ],
2830 ),
2831 )
2832 .with(a_cyclic_dir.clone(), meta_manifest_with("a-cyclic", vec![]))
2833 // X/Y/Z each carry a 3-level subtree that would inflate
2834 // metas_visited if genuinely walked.
2835 .with(x_dir.clone(), meta_manifest_with("x", vec![child(url_x1, "x1")]))
2836 .with(x1_dir.clone(), meta_manifest_with("x1", vec![child(url_x2, "x2")]))
2837 .with(x2_dir.clone(), meta_manifest_with("x2", vec![]))
2838 .with(y_dir.clone(), meta_manifest_with("y", vec![child(url_y1, "y1")]))
2839 .with(y1_dir.clone(), meta_manifest_with("y1", vec![child(url_y2, "y2")]))
2840 .with(y2_dir.clone(), meta_manifest_with("y2", vec![]))
2841 .with(z_dir.clone(), meta_manifest_with("z", vec![child(url_z1, "z1")]))
2842 .with(z1_dir.clone(), meta_manifest_with("z1", vec![child(url_z2, "z2")]))
2843 .with(z2_dir.clone(), meta_manifest_with("z2", vec![]));
2844 let backend = InMemGit::new();
2845 // parallel: Some(1) — single-threaded rayon pool. Source-order
2846 // iteration => cyclic arm runs before X/Y/Z, signal observed.
2847 let opts = SyncMetaOptions { parallel: Some(1), ..SyncMetaOptions::default() };
2848 let err = sync_meta(&root_dir, &backend, &loader, &opts, &[])
2849 .expect_err("cyclic input must surface CycleDetected");
2850 match err {
2851 TreeError::CycleDetected { chain } => {
2852 let id_a = format!("url:{url_a}");
2853 assert_eq!(
2854 chain.last(),
2855 Some(&id_a),
2856 "recurring identity must be A (the cyclic arm), got chain={chain:?}"
2857 );
2858 }
2859 other => panic!("expected CycleDetected, got {other:?}"),
2860 }
2861 // The cancellation discipline asserts: X/Y/Z's subtrees were
2862 // NOT walked. Cancellation lives in Phase 3 (the recursion
2863 // edge), so Phase 1 still fetches A's four direct children
2864 // (a-cyclic, x, y, z) — one Fetch each. A's Phase 3 then
2865 // detects the cycle on a-cyclic and signals; X/Y/Z's
2866 // `phase3_handle_child` returns `Cancelled` at entry, so
2867 // their `sync_meta_inner` never runs. Therefore X1/X2/Y1/Y2/
2868 // Z1/Z2 are NEVER fetched.
2869 //
2870 // Without cancellation Fetch count = 11:
2871 // 1 (root → a) + 4 (a → a-cyclic, x, y, z)
2872 // + 2 (x → x1 → x2) + 2 (y → y1 → y2) + 2 (z → z1 → z2).
2873 // With cancellation Fetch count = 5:
2874 // 1 (root → a) + 4 (a → a-cyclic, x, y, z).
2875 //
2876 // With `parallel: Some(1)` the rayon pool is single-threaded
2877 // and visibility is immediate, so 5 is the deterministic tight
2878 // bound: 1 (root → a) + 4 (a's Phase 1 fan-out for a-cyclic, x,
2879 // y, z). No upper-bound slack — `Some(1)` removes interleaving
2880 // entirely, and any deviation indicates a real regression.
2881 let fetch_count =
2882 backend.calls().iter().filter(|c| matches!(c, BackendCall::Fetch { .. })).count();
2883 // Lower bound: Phase 1's per-child fan-out runs BEFORE the
2884 // Phase 3 cycle check, so even under cancellation A's four
2885 // direct children must have been fetched (1 root → A + 4 a's
2886 // children = 5). Documents that Phase 1 is intentionally NOT
2887 // cancellation-aware in v1.2.4 — cancellation only short-
2888 // circuits the Phase 3 recursion edge.
2889 assert!(
2890 fetch_count >= 5,
2891 "Phase 1 fan-out for A's 4 direct children must complete even under cancellation; \
2892 observed {fetch_count} fetches"
2893 );
2894 assert_eq!(
2895 fetch_count, 5,
2896 "cancellation flag must short-circuit X/Y/Z subtrees; \
2897 observed {fetch_count} fetches (acyclic walk would do 11)"
2898 );
2899 // Stronger assertion: NO fetch ever targeted x1/x2/y1/y2/z1/z2.
2900 // If the cancellation token were broken, sync_meta_inner would
2901 // recurse into x/y/z and Phase 1 there would fetch x1/y1/z1 at
2902 // a minimum.
2903 let cancelled_dests = [&x1_dir, &x2_dir, &y1_dir, &y2_dir, &z1_dir, &z2_dir];
2904 for dest in cancelled_dests {
2905 for call in backend.calls() {
2906 if let BackendCall::Fetch { dest: fetched } = &call {
2907 assert_ne!(
2908 fetched,
2909 dest,
2910 "cancellation must prevent recursion into {} (observed Fetch call)",
2911 dest.display()
2912 );
2913 }
2914 }
2915 }
2916 }
2917
2918 /// G1 — multi-thread sibling cancellation race.
2919 ///
2920 /// Topology: `root → A`, where A has 12 children: two cyclic
2921 /// siblings (`A_cyclic1`, `A_cyclic2`, both colliding with A's
2922 /// own URL) and ten acyclic deep subtrees (`X0..X9` each →
2923 /// `X{i}_1` → `X{i}_2`). With `parallel: Some(8)` rayon may
2924 /// schedule the two cyclic arms onto different worker threads,
2925 /// so BOTH can simultaneously detect the cycle and try to store
2926 /// into `cancelled`. The aggregator (`phase3_recurse`) must:
2927 /// - return `Err(CycleDetected)` on every iteration with a
2928 /// non-empty chain (no panic, no Ok),
2929 /// - never spuriously double-count or wedge on the multiple
2930 /// concurrent stores (AtomicBool::store is idempotent),
2931 /// - keep fetch count bounded (no unbounded recursion).
2932 ///
2933 /// Looped 50 iterations — non-determinism would surface as
2934 /// either a panic, an `Ok(_)` return, or an unbounded fetch
2935 /// count (the acyclic 10-subtree walk would be at minimum
2936 /// `1 (root → A) + 12 (A's children) + 20 (X*_1, X*_2)` = 33
2937 /// without cancellation; we cap at 1000 to catch runaway).
2938 #[test]
2939 #[allow(clippy::too_many_lines)]
2940 fn cancellation_aborts_siblings_multithread() {
2941 // Build helper: returns (loader, backend, root_dir) for a
2942 // fresh per-iteration tempdir so iterations don't share state.
2943 fn build_topology() -> (tempfile::TempDir, PathBuf, InMemLoader, InMemGit, String) {
2944 let tmp = tempfile::tempdir().unwrap();
2945 let root_dir = tmp.path().to_path_buf();
2946 let a_dir = root_dir.join("a");
2947 make_sub_meta_on_disk(&a_dir, "a");
2948 let url_a = "https://example.com/a.git".to_string();
2949 // Two cyclic siblings (both collide with A's identity)
2950 let a_cyc1_dir = a_dir.join("a-cyclic1");
2951 let a_cyc2_dir = a_dir.join("a-cyclic2");
2952 make_sub_meta_on_disk(&a_cyc1_dir, "a-cyclic1");
2953 make_sub_meta_on_disk(&a_cyc2_dir, "a-cyclic2");
2954 // Ten acyclic deep subtrees: x0..x9, each → x{i}_1 → x{i}_2
2955 let mut a_children = vec![child(&url_a, "a-cyclic1"), child(&url_a, "a-cyclic2")];
2956 let mut loader = InMemLoader::new()
2957 .with(root_dir.clone(), meta_manifest_with("root", vec![child(&url_a, "a")]))
2958 .with(a_cyc1_dir.clone(), meta_manifest_with("a-cyclic1", vec![]))
2959 .with(a_cyc2_dir.clone(), meta_manifest_with("a-cyclic2", vec![]));
2960 for i in 0..10 {
2961 let xi_name = format!("x{i}");
2962 let xi_dir = a_dir.join(&xi_name);
2963 let xi1_name = format!("x{i}_1");
2964 let xi1_dir = xi_dir.join(&xi1_name);
2965 let xi2_name = format!("x{i}_2");
2966 let xi2_dir = xi1_dir.join(&xi2_name);
2967 make_sub_meta_on_disk(&xi_dir, &xi_name);
2968 make_sub_meta_on_disk(&xi1_dir, &xi1_name);
2969 make_sub_meta_on_disk(&xi2_dir, &xi2_name);
2970 let url_xi = format!("https://example.com/x{i}.git");
2971 let url_xi1 = format!("https://example.com/x{i}_1.git");
2972 let url_xi2 = format!("https://example.com/x{i}_2.git");
2973 a_children.push(child(&url_xi, &xi_name));
2974 loader = loader
2975 .with(xi_dir, meta_manifest_with(&xi_name, vec![child(&url_xi1, &xi1_name)]))
2976 .with(xi1_dir, meta_manifest_with(&xi1_name, vec![child(&url_xi2, &xi2_name)]))
2977 .with(xi2_dir, meta_manifest_with(&xi2_name, vec![]));
2978 }
2979 loader = loader.with(a_dir, meta_manifest_with("a", a_children));
2980 let backend = InMemGit::new();
2981 (tmp, root_dir, loader, backend, url_a)
2982 }
2983
2984 for iter in 0..50 {
2985 let (_tmp, root_dir, loader, backend, url_a) = build_topology();
2986 let opts = SyncMetaOptions { parallel: Some(8), ..SyncMetaOptions::default() };
2987 let result = sync_meta(&root_dir, &backend, &loader, &opts, &[]);
2988 let err = match result {
2989 Err(e) => e,
2990 Ok(_) => panic!("iter {iter}: expected CycleDetected, got Ok"),
2991 };
2992 match err {
2993 TreeError::CycleDetected { chain } => {
2994 assert!(
2995 !chain.is_empty(),
2996 "iter {iter}: CycleDetected chain must be non-empty: {chain:?}"
2997 );
2998 let id_a = format!("url:{url_a}");
2999 assert_eq!(
3000 chain.last(),
3001 Some(&id_a),
3002 "iter {iter}: recurring identity must be A, got chain={chain:?}"
3003 );
3004 }
3005 other => panic!("iter {iter}: expected CycleDetected, got {other:?}"),
3006 }
3007 // Bound check: even with two cyclic arms racing, the
3008 // walker must not have walked unbounded subtrees. Acyclic
3009 // walk would do 33 fetches; under cancellation Phase 1
3010 // for A's 12 direct children fires (12 fetches) plus the
3011 // initial root → A fetch (1) = 13, and depending on
3012 // worker scheduling some X{i}_1 / X{i}_2 fetches may
3013 // sneak in before the flag is observed. Cap loosely at
3014 // 200 — any wild blowup indicates the flag is broken.
3015 let fetch_count =
3016 backend.calls().iter().filter(|c| matches!(c, BackendCall::Fetch { .. })).count();
3017 assert!(
3018 fetch_count >= 1,
3019 "iter {iter}: at least the root → A fetch must occur; got {fetch_count}"
3020 );
3021 assert!(
3022 fetch_count < 200,
3023 "iter {iter}: fetch count blew up under multi-thread cancellation; got {fetch_count}"
3024 );
3025 }
3026 }
3027
3028 /// G2 — per-call cancellation flag scope: a cycle deep in pack A
3029 /// MUST NOT cancel a sibling pack B at the root level.
3030 ///
3031 /// Topology:
3032 /// root → [A, B]
3033 /// A → A1 → A2 → A2_cyclic (A2_cyclic.url == A2.url ⇒ cycle
3034 /// inside A's deep subtree)
3035 /// B → B1 → B2 → B3 (clean acyclic subtree)
3036 ///
3037 /// The cancellation flag for root's Phase 3 fan-out covers root's
3038 /// direct children (A, B). The cycle inside A is detected during
3039 /// recursion into A's deep subtree, by a NEW per-call flag built
3040 /// at the deep `phase3_recurse` frame — that flag is scoped to A's
3041 /// inner closures only. It must NOT propagate up and cancel B's
3042 /// independent walk.
3043 ///
3044 /// Assertions:
3045 /// (i) walker returns `Err(CycleDetected)` (the deep cycle
3046 /// propagates up via the short-circuit path),
3047 /// (ii) B's deep subtree IS walked (B, B1, B2, B3 all fetched),
3048 /// proving B's walk was not aborted by A's deep-subtree
3049 /// cancellation flag (which lives in a recursion frame
3050 /// below root, disjoint from the root-level fan-out flag
3051 /// that scopes A and B as siblings).
3052 ///
3053 /// With `parallel: Some(2)` rayon schedules A and B onto separate
3054 /// workers; B must run to completion regardless of A's cycle.
3055 #[test]
3056 #[allow(clippy::too_many_lines)]
3057 fn cancellation_per_call_scope_isolates_subtrees() {
3058 let tmp = tempfile::tempdir().unwrap();
3059 let root_dir = tmp.path().to_path_buf();
3060 // A's deep cycle arm
3061 let a_dir = root_dir.join("a");
3062 let a1_dir = a_dir.join("a1");
3063 let a2_dir = a1_dir.join("a2");
3064 let a2cyc_dir = a2_dir.join("a2-cyclic");
3065 // B's clean deep subtree
3066 let b_dir = root_dir.join("b");
3067 let b1_dir = b_dir.join("b1");
3068 let b2_dir = b1_dir.join("b2");
3069 let b3_dir = b2_dir.join("b3");
3070 for d in [&a_dir, &a1_dir, &a2_dir, &a2cyc_dir, &b_dir, &b1_dir, &b2_dir, &b3_dir] {
3071 make_sub_meta_on_disk(d, d.file_name().unwrap().to_str().unwrap());
3072 }
3073 let url_a = "https://example.com/a.git";
3074 let url_a1 = "https://example.com/a1.git";
3075 let url_a2 = "https://example.com/a2.git";
3076 let url_b = "https://example.com/b.git";
3077 let url_b1 = "https://example.com/b1.git";
3078 let url_b2 = "https://example.com/b2.git";
3079 let url_b3 = "https://example.com/b3.git";
3080 let loader = InMemLoader::new()
3081 .with(
3082 root_dir.clone(),
3083 meta_manifest_with("root", vec![child(url_a, "a"), child(url_b, "b")]),
3084 )
3085 .with(a_dir.clone(), meta_manifest_with("a", vec![child(url_a1, "a1")]))
3086 .with(a1_dir.clone(), meta_manifest_with("a1", vec![child(url_a2, "a2")]))
3087 // a2's child re-declares a2's identity → cycle at depth 4
3088 .with(a2_dir.clone(), meta_manifest_with("a2", vec![child(url_a2, "a2-cyclic")]))
3089 .with(a2cyc_dir.clone(), meta_manifest_with("a2-cyclic", vec![]))
3090 .with(b_dir.clone(), meta_manifest_with("b", vec![child(url_b1, "b1")]))
3091 .with(b1_dir.clone(), meta_manifest_with("b1", vec![child(url_b2, "b2")]))
3092 .with(b2_dir.clone(), meta_manifest_with("b2", vec![child(url_b3, "b3")]))
3093 .with(b3_dir.clone(), meta_manifest_with("b3", vec![]));
3094 let backend = InMemGit::new();
3095 // parallel: Some(2) — A and B may schedule onto separate
3096 // workers. The point of the test is to verify B's walk is
3097 // not aborted by A's deep-subtree cancellation flag.
3098 let opts = SyncMetaOptions { parallel: Some(2), ..SyncMetaOptions::default() };
3099 let err = sync_meta(&root_dir, &backend, &loader, &opts, &[])
3100 .expect_err("deep cycle inside A must surface CycleDetected");
3101 // (i) Cycle bubbled up.
3102 match err {
3103 TreeError::CycleDetected { chain } => {
3104 let id_a2 = format!("url:{url_a2}");
3105 assert_eq!(
3106 chain.last(),
3107 Some(&id_a2),
3108 "recurring identity must be A2 (the cyclic arm), got chain={chain:?}"
3109 );
3110 }
3111 other => panic!("expected CycleDetected, got {other:?}"),
3112 }
3113 // (ii) B's subtree was walked: B, B1, B2, B3 each fetched.
3114 // The deep cycle in A fires inside a per-call flag scoped to
3115 // that recursion frame; root's per-call flag is NOT signalled
3116 // (cycle is returned from a child call, not stored at root
3117 // scope), so B's sibling walk completes uninterrupted.
3118 let fetched_dests: Vec<PathBuf> = backend
3119 .calls()
3120 .iter()
3121 .filter_map(|c| match c {
3122 BackendCall::Fetch { dest } => Some(dest.clone()),
3123 _ => None,
3124 })
3125 .collect();
3126 for dest in [&b_dir, &b1_dir, &b2_dir, &b3_dir] {
3127 assert!(
3128 fetched_dests.iter().any(|f| f == dest),
3129 "per-call scope: B's subtree must have been walked despite A's deep cycle; \
3130 missing fetch for {} (observed {fetched_dests:?})",
3131 dest.display()
3132 );
3133 }
3134 }
3135
3136 // -----------------------------------------------------------------
3137 // v1.2.5 — A2 partial-clone cleanup (T-A2).
3138 //
3139 // Discharges the Lean theorem `partial_clone_cleanup_idempotent`
3140 // in `proof/Grex/Walker.lean`: when Phase 3 returns a non-Recursed
3141 // outcome (Failed / Cancelled / Skipped), the post-state at `dest`
3142 // equals the pre-state. For a freshly-cloned dest (not in the
3143 // pre-existing snapshot) the cleanup wipes the dir; for a
3144 // pre-existing dest the cleanup is a no-op (preserves prior
3145 // successful sync state).
3146 // -----------------------------------------------------------------
3147
3148 /// T-A2 — partial-clone cleanup on cycle-detected failure.
3149 ///
3150 /// Topology: `root → a → a-cyclic` where `a-cyclic.url == a.url`
3151 /// (cycle inside `a`'s Phase 3 fan-out). The dest dir for
3152 /// `a-cyclic` does NOT exist before the walk starts, so it is
3153 /// freshly created by Phase 1's clone (which materialises a
3154 /// `.git/` plus a `.grex/pack.yaml` so Phase 3 enters the cycle
3155 /// detector). Post-walk, the cycle has been surfaced AND the
3156 /// freshly-cloned `a-cyclic/` dest must have been removed by
3157 /// `cleanup_partial_clone`.
3158 ///
3159 /// Pre-existing dests must NOT be cleaned: `a/` itself was
3160 /// pre-materialised on disk (`make_sub_meta_on_disk`) so the
3161 /// snapshot taken inside `sync_meta_inner` includes it; even
3162 /// if the cycle propagated up and the recursive-failure branch
3163 /// fired, `a/` must remain on disk (legitimate prior content).
3164 #[test]
3165 #[allow(clippy::too_many_lines)]
3166 fn partial_clone_cleanup_after_cancellation() {
3167 let tmp = tempfile::tempdir().unwrap();
3168 let root_dir = tmp.path().to_path_buf();
3169 let a_dir = root_dir.join("a");
3170 let a_cyclic_dir = a_dir.join("a-cyclic");
3171 // Pre-materialise `a/` on disk so it counts as PresentDeclared
3172 // (Phase 1 will fetch, not clone) AND is in the
3173 // `pre_existing_dests` snapshot at `sync_meta_inner` entry.
3174 make_sub_meta_on_disk(&a_dir, "a");
3175 // a-cyclic deliberately NOT pre-materialised — Phase 1's
3176 // clone (via the custom backend below) materialises a `.git/`
3177 // PLUS a `.grex/pack.yaml` so Phase 3 enters the cycle
3178 // detector on the freshly-cloned dest.
3179 assert!(!a_cyclic_dir.exists(), "a-cyclic dest must be missing pre-walk");
3180
3181 let url_a = "https://example.com/a.git";
3182 let loader = InMemLoader::new()
3183 .with(root_dir.clone(), meta_manifest_with("root", vec![child(url_a, "a")]))
3184 .with(a_dir.clone(), meta_manifest_with("a", vec![child(url_a, "a-cyclic")]))
3185 // a-cyclic's manifest is loadable (used post-clone if cycle
3186 // weren't detected; never reached because the cycle fires).
3187 .with(a_cyclic_dir.clone(), meta_manifest_with("a-cyclic", vec![]));
3188 // Custom backend: clone materialises BOTH `.git/` and
3189 // `.grex/pack.yaml` so Phase 3's existence check passes,
3190 // letting cycle detection fire on the freshly-cloned dest.
3191 struct CloneWithPackYaml {
3192 inner: InMemGit,
3193 }
3194 impl GitBackend for CloneWithPackYaml {
3195 fn name(&self) -> &'static str {
3196 "v1_2_5-clone-with-pack-yaml"
3197 }
3198 fn clone(
3199 &self,
3200 url: &str,
3201 dest: &Path,
3202 r#ref: Option<&str>,
3203 ) -> Result<crate::ClonedRepo, crate::GitError> {
3204 let res = self.inner.clone(url, dest, r#ref)?;
3205 // Also drop a pack.yaml so Phase 3 enters the cycle
3206 // check (and not the `Skipped` early-return).
3207 std::fs::create_dir_all(dest.join(".grex")).unwrap();
3208 let yaml = format!(
3209 "schema_version: \"1\"\nname: {}\ntype: meta\n",
3210 dest.file_name().unwrap().to_str().unwrap()
3211 );
3212 std::fs::write(dest.join(".grex/pack.yaml"), yaml).unwrap();
3213 Ok(res)
3214 }
3215 fn fetch(&self, dest: &Path) -> Result<(), crate::GitError> {
3216 self.inner.fetch(dest)
3217 }
3218 fn checkout(&self, dest: &Path, r#ref: &str) -> Result<(), crate::GitError> {
3219 self.inner.checkout(dest, r#ref)
3220 }
3221 fn head_sha(&self, dest: &Path) -> Result<String, crate::GitError> {
3222 self.inner.head_sha(dest)
3223 }
3224 }
3225 let backend = CloneWithPackYaml { inner: InMemGit::new() };
3226 // parallel: Some(1) for deterministic ordering — a-cyclic is
3227 // a's only child so the cycle fires on the first iteration.
3228 let opts = SyncMetaOptions { parallel: Some(1), ..SyncMetaOptions::default() };
3229 let err = sync_meta(&root_dir, &backend, &loader, &opts, &[])
3230 .expect_err("cyclic input must surface CycleDetected");
3231 assert!(
3232 matches!(err, TreeError::CycleDetected { .. }),
3233 "expected CycleDetected, got {err:?}"
3234 );
3235
3236 // T-A2 invariant: the freshly-cloned a-cyclic dest MUST have
3237 // been cleaned up by `cleanup_partial_clone` on the
3238 // Failed(CycleDetected) path.
3239 assert!(
3240 !a_cyclic_dir.exists(),
3241 "v1.2.5 A2: freshly-cloned a-cyclic dest must be cleaned after cycle detection \
3242 (path still on disk: {})",
3243 a_cyclic_dir.display()
3244 );
3245 // Symmetric invariant: pre-existing `a/` MUST still exist —
3246 // it was in the snapshot, so cleanup never targets it.
3247 assert!(
3248 a_dir.exists(),
3249 "v1.2.5 A2: pre-existing a/ dest must NOT be cleaned (legitimate prior content)"
3250 );
3251 }
3252
3253 /// T-A2 idempotence — running `cleanup_partial_clone` twice on the
3254 /// same path is a no-op on the second call. Mirrors the Lean
3255 /// theorem `partial_clone_cleanup_idempotent` directly.
3256 #[test]
3257 fn cleanup_partial_clone_is_idempotent() {
3258 let tmp = tempfile::tempdir().unwrap();
3259 let dest = tmp.path().join("ghost");
3260 std::fs::create_dir_all(dest.join("subdir")).unwrap();
3261 std::fs::write(dest.join("file.txt"), b"bytes").unwrap();
3262 assert!(dest.exists());
3263 cleanup_partial_clone(&dest);
3264 assert!(!dest.exists(), "first call must remove dest");
3265 // Second call is a no-op on a missing dir.
3266 cleanup_partial_clone(&dest);
3267 assert!(!dest.exists(), "second call must be a no-op");
3268 }
3269
3270 /// W1 — symlink-secure cleanup MUST NOT follow a directory symlink
3271 /// into an unrelated tree. We point a symlink at a sibling
3272 /// directory containing important files, then run
3273 /// `cleanup_partial_clone` against the symlink. The symlink must
3274 /// be unlinked AS a symlink; the target tree's contents must
3275 /// remain intact on disk.
3276 ///
3277 /// Skips on hosts where unprivileged symlink creation fails
3278 /// (notably Windows without Developer Mode).
3279 #[test]
3280 fn cleanup_partial_clone_does_not_follow_symlink() {
3281 let outer = tempfile::tempdir().unwrap();
3282 let target = outer.path().join("important-target");
3283 std::fs::create_dir_all(&target).unwrap();
3284 let canary = target.join("canary.txt");
3285 std::fs::write(&canary, b"do-not-touch").unwrap();
3286
3287 let link = outer.path().join("partial-clone");
3288 #[cfg(unix)]
3289 let symlink_result = std::os::unix::fs::symlink(&target, &link);
3290 #[cfg(windows)]
3291 let symlink_result = std::os::windows::fs::symlink_dir(&target, &link);
3292 if symlink_result.is_err() {
3293 // Host won't let us create a symlink — nothing to test.
3294 return;
3295 }
3296
3297 cleanup_partial_clone(&link);
3298
3299 // The symlink itself MUST be gone.
3300 assert!(
3301 std::fs::symlink_metadata(&link).is_err(),
3302 "symlink at {} must have been unlinked",
3303 link.display()
3304 );
3305 // The target tree MUST remain intact.
3306 assert!(
3307 target.exists(),
3308 "symlink target {} must NOT have been followed and deleted",
3309 target.display()
3310 );
3311 assert!(canary.exists(), "canary file inside target must remain on disk");
3312 assert_eq!(std::fs::read(&canary).unwrap(), b"do-not-touch");
3313 }
3314
3315 // -----------------------------------------------------------------
3316 // v1.2.5 (W1) — `pre_existing_dests` lexical-normalisation tests.
3317 //
3318 // Direct unit tests of `normalize_dest_key`. The integration
3319 // contract — that snapshot site and lookup site agree on keys
3320 // even when `child.effective_path()` carries a trailing `/` or
3321 // `./` prefix — is covered indirectly because both call sites
3322 // now route through the same helper.
3323 // -----------------------------------------------------------------
3324
3325 /// W1 — a trailing `/` on the input MUST normalise away so two
3326 /// representations of the same logical path produce equal keys.
3327 #[test]
3328 fn pre_existing_dests_normalizes_trailing_slash() {
3329 // PathBuf::push("foo/") historically preserved the trailing
3330 // separator on some platforms; normalize it out via the helper.
3331 let with_slash: PathBuf = ["meta", "child/"].iter().collect();
3332 let without_slash: PathBuf = ["meta", "child"].iter().collect();
3333 let n_with = normalize_dest_key(&with_slash);
3334 let n_without = normalize_dest_key(&without_slash);
3335 assert_eq!(
3336 n_with, n_without,
3337 "normalize_dest_key must produce equal keys for trailing-slash variants \
3338 (with={n_with:?}, without={n_without:?})"
3339 );
3340 }
3341
3342 /// W1 — a `./` prefix or intermediate `./` component MUST drop
3343 /// out via `normalize_dest_key` so the snapshot and lookup sites
3344 /// agree even when the manifest declares paths via `./child`.
3345 #[test]
3346 fn pre_existing_dests_normalizes_curdir_components() {
3347 let with_curdir = PathBuf::from("./meta/./child");
3348 let without_curdir = PathBuf::from("meta/child");
3349 let n_with = normalize_dest_key(&with_curdir);
3350 let n_without = normalize_dest_key(&without_curdir);
3351 assert_eq!(
3352 n_with, n_without,
3353 "normalize_dest_key must drop `./` components \
3354 (with={n_with:?}, without={n_without:?})"
3355 );
3356 }
3357
3358 /// W1 — empty / pure-CurDir input must round-trip to a non-empty
3359 /// path so downstream FS ops still resolve to the same dir.
3360 #[test]
3361 fn pre_existing_dests_normalize_empty_input_yields_curdir() {
3362 assert_eq!(normalize_dest_key(&PathBuf::from(".")), PathBuf::from("."));
3363 assert_eq!(normalize_dest_key(&PathBuf::from("./.")), PathBuf::from("."));
3364 }
3365
3366 // -----------------------------------------------------------------
3367 // v1.2.5 — A3 pool deadlock guard (T-A3, debug-only).
3368 //
3369 // The guard is a `debug_assert!` inside `phase3_recurse`: if any
3370 // worker thread enters a nested `pool.install` (depth >= 2) while
3371 // it still holds a `PackLock`, the assertion fires. Release builds
3372 // compile the assertion out — these tests are gated on
3373 // `cfg(debug_assertions)` for the same reason.
3374 //
3375 // The depth counter lives in `scheduler::POOL_INSTALL_DEPTH` (W3,
3376 // per-OS-thread). The held-lock set lives in
3377 // `pack_lock::HELD_PACK_LOCKS` (W2/W4) — *process-global*
3378 // `Mutex<HashSet<(PathBuf, ThreadId)>>` since the W4 cross-thread
3379 // fix; the previous `thread_local!` storage was invisible to rayon
3380 // worker threads (work-stealing routes the closure onto threads
3381 // whose TL is empty), masking the deadlock pattern in production.
3382 //
3383 // The tests below depend on those test-only accessors:
3384 // - `crate::scheduler::pool_install_depth_for_test()`
3385 // - `crate::pack_lock::held_pack_locks_for_test()` — paths held
3386 // by the *current* OS thread (filtered by `ThreadId` inside
3387 // the global registry)
3388 // - `crate::pack_lock::register_pack_lock_for_test(p)` — inserts
3389 // `(p, current_thread_id)` into the global registry
3390 // - `crate::pack_lock::unregister_pack_lock_for_test(p)` —
3391 // removes `(p, current_thread_id)` from the global registry
3392 // If W2/W3 rename or relocate these, update both call sites and
3393 // the assertion in `phase3_recurse` together.
3394 // -----------------------------------------------------------------
3395
3396 /// T-A3 — the depth guard correctly tracks nested `pool.install`.
3397 /// Sanity check that the RAII `PoolInstallDepthGuard` increments
3398 /// + decrements as expected. Independent of any deadlock pattern.
3399 #[cfg(debug_assertions)]
3400 #[test]
3401 fn pool_install_depth_guard_tracks_nesting() {
3402 use crate::scheduler::pool_install_depth_for_test;
3403 assert_eq!(pool_install_depth_for_test(), 0, "starts at zero");
3404 {
3405 let _g1 = PoolInstallDepthGuard::new();
3406 assert_eq!(pool_install_depth_for_test(), 1, "depth 1 after one guard");
3407 {
3408 let _g2 = PoolInstallDepthGuard::new();
3409 assert_eq!(pool_install_depth_for_test(), 2, "depth 2 after nested guard");
3410 }
3411 assert_eq!(pool_install_depth_for_test(), 1, "back to 1 after inner drop");
3412 }
3413 assert_eq!(pool_install_depth_for_test(), 0, "back to 0 after outer drop");
3414 }
3415
3416 /// T-A3 — deadlock pattern triggers a debug-build panic.
3417 ///
3418 /// Simulates the v1.2.2 R#1 MED pattern: seed a fake PackLock
3419 /// path in the global registry under the current `ThreadId`
3420 /// (mimicking `PackLock::acquire_async` having returned a hold on
3421 /// this thread), increment the pool-install depth twice via two
3422 /// `PoolInstallDepthGuard`s, then evaluate the production
3423 /// `debug_assert!` predicate inline. The test thread panics with
3424 /// the production message, satisfying `#[should_panic]`.
3425 ///
3426 /// Gated `#[cfg(debug_assertions)]` because release builds compile
3427 /// the assertion out — that's by design (no runtime cost in
3428 /// production; the documented lock acquisition order in
3429 /// `concurrency.md` is the production contract).
3430 ///
3431 /// W4 cross-thread fix note: with the
3432 /// `Mutex<HashSet<(PathBuf, ThreadId)>>` global registry,
3433 /// `held_pack_locks_for_test()` filters by current `ThreadId`, so
3434 /// the seeded path is visible to the assertion when it runs on
3435 /// the same thread that called `register_pack_lock_for_test`. The
3436 /// complementary cross-thread visibility test below
3437 /// (`pool_deadlock_guard_registry_is_thread_id_scoped`) proves
3438 /// the per-`ThreadId` view is correctly scoped — a rayon worker
3439 /// only sees its own holds, not concurrent acquires on other
3440 /// threads.
3441 #[cfg(debug_assertions)]
3442 #[test]
3443 #[should_panic(expected = "pool deadlock guard")]
3444 fn pool_deadlock_guard_panics_on_violation() {
3445 use crate::pack_lock::{
3446 held_pack_locks_for_test, register_pack_lock_for_test, unregister_pack_lock_for_test,
3447 };
3448 use crate::scheduler::pool_install_depth_for_test;
3449
3450 let fake_lock_path = PathBuf::from("/tmp/fake-pack-lock-T-A3");
3451 // Defensive: cargo-test reuses worker threads via std panic
3452 // catching, so a prior panic on this OS thread might have
3453 // left the seed in place under the same `ThreadId`.
3454 // Clear-then-seed guarantees a fresh entry under the current
3455 // thread.
3456 unregister_pack_lock_for_test(&fake_lock_path);
3457 register_pack_lock_for_test(&fake_lock_path);
3458
3459 let _depth_guard = PoolInstallDepthGuard::new();
3460 let depth = pool_install_depth_for_test();
3461 let held = held_pack_locks_for_test();
3462 debug_assert!(
3463 depth < 2 || held.is_empty(),
3464 "pool deadlock guard: nested pool.install (depth={depth}) entered while \
3465 holding PackLock(s) {held:?} on this thread — see concurrency.md \
3466 §lock acquisition order"
3467 );
3468 // Above asserts at depth==1 (outer guard), which is < 2, so it
3469 // would NOT panic. Force the depth >= 2 case by entering a
3470 // nested guard, then re-evaluate the predicate.
3471 let _nested = PoolInstallDepthGuard::new();
3472 let depth = pool_install_depth_for_test();
3473 let held = held_pack_locks_for_test();
3474 debug_assert!(
3475 depth < 2 || held.is_empty(),
3476 "pool deadlock guard: nested pool.install (depth={depth}) entered while \
3477 holding PackLock(s) {held:?} on this thread — see concurrency.md \
3478 §lock acquisition order"
3479 );
3480 // Unreachable on a correct build — the second debug_assert!
3481 // above must have panicked. Cleanup left here for safety in
3482 // case the assertion is ever weakened.
3483 unregister_pack_lock_for_test(&fake_lock_path);
3484 }
3485
3486 /// T-A3 (W4 cross-thread fix) — registry observability across
3487 /// threads. Acquires the seed on thread A, reads the registry
3488 /// from thread B, and asserts: (a) thread B's
3489 /// `held_pack_locks_for_test()` is *empty* (the filter is per
3490 /// `ThreadId`, so B does NOT see A's lock as one of its own
3491 /// holds — this is the correct semantics for the per-closure
3492 /// assertion in `phase3_recurse`); (b) thread B can register +
3493 /// unregister its own independent entry without disturbing A's;
3494 /// (c) thread A still sees the seed after B's reads. Together
3495 /// these facts pin down the W4 fix: the global registry is
3496 /// shared *storage* across threads, but `ThreadId`-keyed *reads*
3497 /// give each thread its own scoped view. The previous
3498 /// `thread_local!` storage made (a) trivially true but at the
3499 /// cost of also masking the deadlock pattern on rayon workers
3500 /// where the lock had been acquired on the outer tokio thread —
3501 /// the Coffman cycle the assertion was meant to catch.
3502 #[cfg(debug_assertions)]
3503 #[test]
3504 fn pool_deadlock_guard_registry_is_thread_id_scoped() {
3505 use crate::pack_lock::{
3506 held_pack_locks_for_test, register_pack_lock_for_test, unregister_pack_lock_for_test,
3507 };
3508
3509 let path = PathBuf::from("/tmp/fake-pack-lock-T-A3-cross-thread");
3510 // Clean any stale entry from a prior cargo-test run on this
3511 // OS thread so the assertions below are deterministic.
3512 unregister_pack_lock_for_test(&path);
3513 register_pack_lock_for_test(&path);
3514
3515 // Thread A (this test thread) sees its own seed.
3516 let held_a = held_pack_locks_for_test();
3517 assert!(
3518 held_a.iter().any(|p| p == &path),
3519 "thread A must observe its own seeded lock; got {held_a:?}"
3520 );
3521
3522 // Thread B reads the registry — it must NOT see A's lock as
3523 // one of its own holds (per-`ThreadId` filter).
3524 let path_for_b = path.clone();
3525 let (held_on_b_before, held_on_b_after) = std::thread::spawn(move || {
3526 let before = held_pack_locks_for_test();
3527 // Sanity: B can register/unregister its own independent
3528 // entry without disturbing A's.
3529 register_pack_lock_for_test(&path_for_b);
3530 let after = held_pack_locks_for_test();
3531 unregister_pack_lock_for_test(&path_for_b);
3532 (before, after)
3533 })
3534 .join()
3535 .expect("thread B panicked");
3536
3537 assert!(
3538 held_on_b_before.is_empty(),
3539 "thread B's per-ThreadId view must NOT include A's seed; got {held_on_b_before:?}"
3540 );
3541 assert!(
3542 held_on_b_after.iter().any(|p| p == &path),
3543 "thread B must observe its own registered lock under its own ThreadId; \
3544 got {held_on_b_after:?}"
3545 );
3546
3547 // Thread A's seed survives B's reads.
3548 let held_a_after = held_pack_locks_for_test();
3549 assert!(
3550 held_a_after.iter().any(|p| p == &path),
3551 "thread A's seed must survive thread B's reads; got {held_a_after:?}"
3552 );
3553
3554 // Cleanup.
3555 unregister_pack_lock_for_test(&path);
3556 }
3557}