Skip to main content

grex_core/tree/
walker.rs

1//! Recursive pack-tree walker.
2//!
3//! The walker hydrates a `pack.yaml` tree: it loads the root manifest, clones
4//! (or fetches + checks out) every `children:` entry via the injected
5//! [`GitBackend`], and recurses. `depends_on` entries are recorded as edges
6//! but never walked — they are *external prereqs* verified by
7//! [`crate::pack::validate::DependsOnValidator`] after the graph is built.
8//!
9//! # Cycle detection
10//!
11//! Cycles are detected **during** the walk, not post-hoc. Each recursion
12//! maintains an ancestor stack of pack identifiers (source-url when present,
13//! otherwise the canonical on-disk path). If a child is about to be entered
14//! whose identifier is already on the stack, the walker short-circuits with
15//! [`TreeError::CycleDetected`]. A separate `CycleValidator` runs
16//! post-hoc as a belt-and-suspenders check so manually-constructed graphs
17//! cannot sneak through.
18//!
19//! # Cyclomatic discipline
20//!
21//! The walk is decomposed so each helper stays well under CC 15:
22//! `walk` → `walk_recursive` → `process_children` → `handle_child` →
23//! `resolve_destination` | `record_depends_on`.
24
25use std::collections::BTreeMap;
26use std::path::{Path, PathBuf};
27use std::sync::atomic::{AtomicBool, Ordering};
28use std::sync::Arc;
29
30use rayon::prelude::*;
31
32use crate::git::GitBackend;
33use crate::pack::validate::child_path::{
34    boundary_fs_reject_reason, boundary_reject_reason, check_one as check_child_path,
35    nfc_duplicate_path,
36};
37use crate::pack::{ChildRef, PackManifest, PackType, PackValidationError, SchemaVersion};
38
39use super::consent::phase2_prune;
40use super::dest_class::{aggregate_untracked, classify_dest, DestClass};
41use super::error::TreeError;
42use super::graph::{EdgeKind, PackEdge, PackGraph, PackNode};
43use super::loader::PackLoader;
44use super::quarantine::QuarantineConfig;
45
46/// Recursive walker. Composes a [`PackLoader`] (for manifests) with a
47/// [`GitBackend`] (for child hydration).
48///
49/// The walker owns no state across calls: each invocation of [`Walker::walk`]
50/// produces a fresh [`PackGraph`] and leaves no footprint.
51///
52/// **Status (v1.2.1, path iii)**: retired from the production sync
53/// orchestrator. `sync::run` now composes [`sync_meta`] (mutate) →
54/// [`super::graph_build::build_graph`] (read-only) → `run_actions` instead
55/// of issuing clones+fetches inside the graph build. The `Walker` symbol
56/// is kept for downstream test-suite compatibility (22 fixture call sites
57/// in `crates/grex-core/tests/tree_walk.rs`); new code SHOULD NOT add
58/// production call sites.
59#[doc(hidden)]
60pub struct Walker<'a> {
61    loader: &'a dyn PackLoader,
62    backend: &'a dyn GitBackend,
63    workspace: PathBuf,
64    /// Optional global ref override (M4-D `grex sync --ref <sha|branch|tag>`).
65    /// When `Some`, every child clone/checkout uses this ref instead of the
66    /// declared `child.ref` from the parent manifest. `None` preserves M3
67    /// semantics.
68    ref_override: Option<String>,
69}
70
71impl<'a> Walker<'a> {
72    /// Construct a new walker.
73    ///
74    /// `workspace` is the directory under which child packs will be cloned,
75    /// using each [`ChildRef::effective_path`] as the sub-directory name.
76    #[must_use]
77    pub fn new(
78        loader: &'a dyn PackLoader,
79        backend: &'a dyn GitBackend,
80        workspace: PathBuf,
81    ) -> Self {
82        Self { loader, backend, workspace, ref_override: None }
83    }
84
85    /// Set a global ref override applied to every child pack.
86    ///
87    /// Surfaced as `grex sync --ref <sha|branch|tag>` (M4-D). The override
88    /// replaces each child's declared `ref` in its parent manifest. An
89    /// empty string is treated as "no override" — callers should reject
90    /// empty values at the CLI layer before reaching this point.
91    #[must_use]
92    pub fn with_ref_override(mut self, r#ref: Option<String>) -> Self {
93        self.ref_override = r#ref.filter(|s| !s.is_empty());
94        self
95    }
96
97    /// Walk the tree rooted at `root_pack_path`, returning the fully
98    /// hydrated graph.
99    ///
100    /// # Errors
101    ///
102    /// Returns [`TreeError`] on any loader, git, cycle, or name-mismatch
103    /// failure. The walk aborts on the first failure — the spec-level
104    /// "fail loud, fail fast" default.
105    pub fn walk(&self, root_pack_path: &Path) -> Result<PackGraph, TreeError> {
106        let mut state = BuildState::default();
107        let root_manifest = self.loader.load(root_pack_path)?;
108        // Pre-walk path-traversal gate: reject any malicious
109        // `children[].path` (or URL-derived tail) BEFORE any clone fires.
110        // Closes the v1.1.0 flat-sibling exploit window where a `path:
111        // ../escape` would materialise a child outside the pack root
112        // before plan-phase validation could see it.
113        validate_children_paths(&root_manifest)?;
114        let root_commit_sha = probe_head_sha(self.backend, root_pack_path);
115        let root_id = state.push_node(PackNode {
116            id: 0,
117            name: root_manifest.name.clone(),
118            path: root_pack_path.to_path_buf(),
119            source_url: None,
120            manifest: root_manifest.clone(),
121            parent: None,
122            commit_sha: root_commit_sha,
123            synthetic: false,
124        });
125        let root_identity = pack_identity_for_root(root_pack_path);
126        self.walk_recursive(root_id, &root_manifest, &mut state, &mut vec![root_identity])?;
127        Ok(PackGraph::new(state.nodes, state.edges))
128    }
129
130    /// Recursive step. `ancestors` carries the pack identifiers
131    /// currently on the in-progress walk path — pushed on entry,
132    /// popped on return. It is a path-prefix set (NOT a global
133    /// "visited" set), so a diamond reaching the same descendant
134    /// via two disjoint paths is not a cycle.
135    ///
136    /// Each loaded manifest's `children[]` is path-traversal-validated
137    /// before any of those children are resolved on disk; the entry
138    /// point pre-validates the root manifest, so by the time
139    /// `walk_recursive` runs for a child, that child's own `children[]`
140    /// is what needs gating before the next descent.
141    fn walk_recursive(
142        &self,
143        parent_id: usize,
144        manifest: &PackManifest,
145        state: &mut BuildState,
146        ancestors: &mut Vec<String>,
147    ) -> Result<(), TreeError> {
148        self.record_depends_on(parent_id, manifest, state);
149        self.process_children(parent_id, manifest, state, ancestors)
150    }
151
152    /// Record one `DependsOn` edge per `depends_on` entry. Resolution
153    /// against actual graph nodes happens later in `DependsOnValidator`.
154    /// We emit edges only where the target already exists in the graph so
155    /// the edge list stays in-bounds; unresolved deps are surfaced by the
156    /// validator, not carried as dangling edges.
157    fn record_depends_on(&self, parent_id: usize, manifest: &PackManifest, state: &mut BuildState) {
158        for dep in &manifest.depends_on {
159            if let Some(to) = find_node_id_by_name_or_url(&state.nodes, dep) {
160                state.edges.push(PackEdge { from: parent_id, to, kind: EdgeKind::DependsOn });
161            }
162        }
163    }
164
165    fn process_children(
166        &self,
167        parent_id: usize,
168        manifest: &PackManifest,
169        state: &mut BuildState,
170        ancestors: &mut Vec<String>,
171    ) -> Result<(), TreeError> {
172        for child in &manifest.children {
173            self.handle_child(parent_id, child, state, ancestors)?;
174        }
175        Ok(())
176    }
177
178    fn handle_child(
179        &self,
180        parent_id: usize,
181        child: &ChildRef,
182        state: &mut BuildState,
183        ancestors: &mut Vec<String>,
184    ) -> Result<(), TreeError> {
185        let identity = pack_identity_for_child(child);
186        if ancestors.iter().any(|s| s == &identity) {
187            let mut chain = ancestors.clone();
188            chain.push(identity);
189            return Err(TreeError::CycleDetected { chain });
190        }
191        // v1.2.0 Stage 1.c: FS-resident boundary check fires BEFORE
192        // any clone / fetch. Junctions, reparse points, and
193        // `.git`-as-file (gitfile redirect) all re-open the
194        // parent-boundary escape that the syntactic gate closes on
195        // the path string itself; running the check on the prospective
196        // dest path means a hostile pre-existing slot is rejected
197        // before the GitBackend writes anything into (or through) it.
198        // The prospective path is reconstructed here so the helper
199        // can interrogate the slot before `resolve_destination`
200        // materialises a clone — pre-clone runs return `Ok(())` because
201        // the slot doesn't exist yet, and the walk continues normally.
202        let prospective_dest = self.workspace.join(child.effective_path());
203        check_dest_boundary(&prospective_dest, &child.effective_path())?;
204        let dest = self.resolve_destination(child, state)?;
205        // v1.1.1 plain-git children: when the destination has no
206        // `.grex/pack.yaml` but does carry a `.git/`, synthesize a
207        // leaf scripted-no-hooks manifest in-memory rather than
208        // aborting. See
209        // `openspec/changes/feat-v1.1.1-plain-git-children/design.md`
210        // §"Synthesis algorithm".
211        let (child_manifest, is_synthetic) = match self.loader.load(&dest) {
212            Ok(m) => (m, false),
213            Err(TreeError::ManifestNotFound(_)) if dest_has_git_repo(&dest) => {
214                (synthesize_plain_git_manifest(child), true)
215            }
216            Err(e) => return Err(e),
217        };
218        verify_child_name(&child_manifest.name, child, &dest)?;
219        // Validate this child's own `children[]` before its descent
220        // resolves any of them on disk. Mirrors the root-manifest gate
221        // in `walk`; together they ensure no clone can fire for a
222        // grandchild whose parent declared a traversal-bearing path.
223        validate_children_paths(&child_manifest)?;
224
225        let commit_sha = probe_head_sha(self.backend, &dest);
226        let child_id = state.push_node(PackNode {
227            id: state.nodes.len(),
228            name: child_manifest.name.clone(),
229            path: dest.clone(),
230            source_url: Some(child.url.clone()),
231            manifest: child_manifest.clone(),
232            parent: Some(parent_id),
233            commit_sha,
234            synthetic: is_synthetic,
235        });
236        state.edges.push(PackEdge { from: parent_id, to: child_id, kind: EdgeKind::Child });
237
238        ancestors.push(identity);
239        let result = self.walk_recursive(child_id, &child_manifest, state, ancestors);
240        ancestors.pop();
241        result
242    }
243
244    /// Decide where `child` lives on disk and ensure the working tree is
245    /// in the expected state: clone if absent, fetch + optional checkout
246    /// if present.
247    fn resolve_destination(
248        &self,
249        child: &ChildRef,
250        _state: &mut BuildState,
251    ) -> Result<PathBuf, TreeError> {
252        let dest = self.workspace.join(child.effective_path());
253        // M4-D: `ref_override` wins over the parent-declared `child.ref`.
254        // Falls back to the declared ref when no override is active.
255        let effective_ref = self.ref_override.as_deref().or(child.r#ref.as_deref());
256        if dest_has_git_repo(&dest) {
257            self.backend.fetch(&dest)?;
258            if let Some(r) = effective_ref {
259                self.backend.checkout(&dest, r)?;
260            }
261        } else {
262            self.backend.clone(&child.url, &dest, effective_ref)?;
263        }
264        Ok(dest)
265    }
266}
267
268/// Best-effort HEAD probe. Returns `None` when the target is not a git
269/// repository or the backend refuses — the root of a declarative pack is
270/// often a plain directory, so this must not fail the walk.
271///
272/// Non-`.git` directories short-circuit silently (truly not a git
273/// repo). Backend errors on an actual `.git` directory are surfaced as
274/// a `tracing::warn!` log line so transient gix failures / ACL-denied
275/// `.git` reads do not silently degrade into an empty `commit_sha`
276/// without any operator signal. The walker continues with `None` — a
277/// best-effort probe is, by construction, allowed to fail.
278fn probe_head_sha(backend: &dyn GitBackend, path: &Path) -> Option<String> {
279    let dir =
280        if path.extension().and_then(|e| e.to_str()).is_some_and(|e| matches!(e, "yaml" | "yml")) {
281            path.parent()
282                .and_then(Path::parent)
283                .map_or_else(|| path.to_path_buf(), Path::to_path_buf)
284        } else {
285            path.to_path_buf()
286        };
287    if !dir.join(".git").exists() {
288        return None;
289    }
290    match backend.head_sha(&dir) {
291        Ok(s) => Some(s),
292        Err(e) => {
293            tracing::warn!(
294                target: "grex::walker",
295                "HEAD probe failed for {}: {e}",
296                dir.display()
297            );
298            None
299        }
300    }
301}
302
303/// Mutable state threaded through the walk. Private to this module so only
304/// the walker can grow the graph.
305#[derive(Default)]
306struct BuildState {
307    nodes: Vec<PackNode>,
308    edges: Vec<PackEdge>,
309}
310
311impl BuildState {
312    fn push_node(&mut self, node: PackNode) -> usize {
313        let id = node.id;
314        self.nodes.push(node);
315        id
316    }
317}
318
319/// Identity string used by the cycle detector for the root pack.
320fn pack_identity_for_root(path: &Path) -> String {
321    format!("path:{}", path.display())
322}
323
324/// Identity string for a child — url+ref so the same repo at two different
325/// refs is considered distinct. This matches git semantics and avoids
326/// false-positive cycle detections for diamond dependencies on different
327/// tags.
328///
329/// v1.2.3 (B2): when the ref is missing or empty the trailing `@` is
330/// omitted so the on-the-wire identity is just `url:<url>` — matches
331/// `Grex.Walker.ChildRef.identity` in the Lean model. Without this
332/// elision two children that differ only in `ref: None` vs
333/// `ref: Some("")` would otherwise serialise the same way as
334/// `url:<url>@`, masking the distinction the Lean specification draws.
335fn pack_identity_for_child(child: &ChildRef) -> String {
336    match child.r#ref.as_deref() {
337        Some(r) if !r.is_empty() => format!("url:{}@{}", child.url, r),
338        _ => format!("url:{}", child.url),
339    }
340}
341
342/// Shallow on-disk check: a `.git` entry (file or dir) signals an existing
343/// working tree. We deliberately do not open the repo here — that's the
344/// backend's job via `fetch`/`checkout`.
345///
346/// # Symlink safety
347///
348/// `dest` itself MUST NOT be a symlink. If it is, this function returns
349/// `false` regardless of whether the symlink target carries a `.git`
350/// entry. This refusal closes a synthesis-redirection attack: a parent
351/// pack declaring `path: code` against a workspace where the user
352/// happens to have `<workspace>/code -> $HOME` would otherwise let the
353/// walker treat `$HOME/.git` as a "plain-git child" and operate on an
354/// unrelated tree. The check uses [`std::fs::symlink_metadata`] so the
355/// link itself — not its target — is interrogated.
356pub fn dest_has_git_repo(dest: &Path) -> bool {
357    // Reject symlinked destinations outright. `symlink_metadata` does
358    // NOT follow the link, so a broken or path-traversing symlink is
359    // treated as untrusted regardless of its target.
360    if let Ok(meta) = std::fs::symlink_metadata(dest) {
361        if meta.file_type().is_symlink() {
362            return false;
363        }
364    }
365    dest.join(".git").exists()
366}
367
368/// Build the in-memory manifest used for v1.1.1 plain-git children — a
369/// leaf scripted pack with no hooks, no children, no actions. Activated
370/// at the walker's load-fallback boundary when a child has a `.git/`
371/// but no `.grex/pack.yaml`. See
372/// `openspec/changes/feat-v1.1.1-plain-git-children/design.md`.
373pub fn synthesize_plain_git_manifest(child: &ChildRef) -> PackManifest {
374    PackManifest {
375        schema_version: SchemaVersion::current(),
376        name: child.effective_path(),
377        r#type: PackType::Scripted,
378        version: None,
379        depends_on: Vec::new(),
380        children: Vec::new(),
381        actions: Vec::new(),
382        teardown: None,
383        extensions: BTreeMap::new(),
384    }
385}
386
387/// Enforce that the cloned child's pack.yaml name matches what the parent
388/// declared. The parent-side expectation is the child entry's
389/// [`ChildRef::effective_path`] — the directory name in the workspace.
390fn verify_child_name(got: &str, child: &ChildRef, dest: &Path) -> Result<(), TreeError> {
391    let expected = child.effective_path();
392    if got == expected {
393        return Ok(());
394    }
395    Err(TreeError::PackNameMismatch { got: got.to_string(), expected, path: dest.to_path_buf() })
396}
397
398/// Resolve a `depends_on` entry (URL or bare name) against nodes already
399/// recorded. Returns the node id on a hit, `None` otherwise.
400fn find_node_id_by_name_or_url(nodes: &[PackNode], dep: &str) -> Option<usize> {
401    if looks_like_url(dep) {
402        nodes.iter().find(|n| n.source_url.as_deref() == Some(dep)).map(|n| n.id)
403    } else {
404        nodes.iter().find(|n| n.name == dep).map(|n| n.id)
405    }
406}
407
408/// Run the path-traversal gate on `manifest.children`. Returns the
409/// first offending child as a [`TreeError::ChildPathInvalid`] so the
410/// walker aborts before any clone of the offending sibling fires.
411///
412/// Surfacing only the first offender (rather than aggregating) matches
413/// the walker's fail-fast posture — the plan-phase
414/// [`crate::pack::validate::ChildPathValidator`] still runs against the
415/// whole graph post-walk via `validate_graph`, so authors who clear
416/// the traversal exploit see the full diagnostic batch on the next
417/// invocation.
418///
419/// `check_child_path` is documented to return only the
420/// `ChildPathInvalid` variant, but we `match` exhaustively so any
421/// future variant the helper grows surfaces as a compile-time
422/// failure here rather than as a silently swallowed `Some(other)`.
423fn validate_children_paths(manifest: &PackManifest) -> Result<(), TreeError> {
424    // v1.2.0 Stage 1.c: NFC-duplicate sweep across the sibling list.
425    // Runs first because it's a cross-cutting check (one offender
426    // implicates the WHOLE list, not a single child). Surfaces as
427    // `TreeError::ManifestPathEscape` per walker.md
428    // §boundary-preservation — a NFC-collapsed name re-introduces the
429    // very boundary escape the regex was meant to close on
430    // case-insensitive filesystems.
431    if let Some(path) = nfc_duplicate_path(&manifest.children) {
432        return Err(TreeError::ManifestPathEscape {
433            path,
434            reason: "duplicate child path under Unicode NFC normalization (case-insensitive FS collision risk)"
435                .to_string(),
436        });
437    }
438    for child in &manifest.children {
439        // v1.2.0 Stage 1.c: per-segment boundary-preservation rejects.
440        // Layered AHEAD of the syntactic gate so the more specific
441        // `ManifestPathEscape` diagnostic wins for entries that would
442        // also fail the bare-name regex (e.g. `child:foo` is rejected
443        // here as a colon hazard instead of a generic charset miss).
444        let segment = child.path.as_deref().map_or_else(|| child.effective_path(), str::to_string);
445        if let Some(reason) = boundary_reject_reason(&segment) {
446            return Err(TreeError::ManifestPathEscape {
447                path: segment,
448                reason: reason.to_string(),
449            });
450        }
451        let Some(err) = check_child_path(child) else { continue };
452        match err {
453            PackValidationError::ChildPathInvalid { child_name, path, reason } => {
454                return Err(TreeError::ChildPathInvalid { child_name, path, reason });
455            }
456            other @ (PackValidationError::DuplicateSymlinkDst { .. }
457            | PackValidationError::GraphCycle { .. }
458            | PackValidationError::DependsOnUnsatisfied { .. }
459            | PackValidationError::ChildPathDuplicate { .. }) => {
460                // `check_child_path` is contracted to only emit
461                // `ChildPathInvalid`. Any other variant indicates the
462                // helper has drifted out of sync with this caller —
463                // surface loudly rather than silently swallowing it.
464                tracing::error!(
465                    target: "grex::walker",
466                    "check_child_path returned unexpected variant: {other:?}",
467                );
468                debug_assert!(false, "check_child_path returned unexpected variant: {other:?}");
469            }
470        }
471    }
472    Ok(())
473}
474
475/// v1.2.0 Stage 1.c: filesystem-resident boundary check. Run AFTER
476/// the destination has been resolved against the parent workspace but
477/// BEFORE any clone / fetch fires. Catches the case where the slot
478/// the walker is about to materialise into is already a junction,
479/// reparse point, symlink, or `.git`-as-file — each of which would
480/// re-introduce a parent-boundary escape.
481///
482/// Pre-clone: a non-existent destination is the happy path; the
483/// helper returns `None` and the walk continues. Post-clone or on a
484/// re-walk where the destination is already populated, the helper
485/// inspects the on-disk entry and surfaces a `ManifestPathEscape`
486/// when the entry violates the boundary contract.
487///
488/// Visibility: `pub(super)` — used by the walker's `handle_child`
489/// path-resolution step (wired in 1.c follow-up; this commit lands
490/// the helper itself and the boundary-check call site for the
491/// path-segment rejects).
492pub(super) fn check_dest_boundary(dest: &Path, segment: &str) -> Result<(), TreeError> {
493    if let Some(reason) = boundary_fs_reject_reason(dest) {
494        return Err(TreeError::ManifestPathEscape {
495            path: segment.to_string(),
496            reason: reason.to_string(),
497        });
498    }
499    Ok(())
500}
501
502/// Decide whether a `depends_on` entry is a URL rather than a bare name.
503/// The rule is intentionally literal — matching the spec's enumeration of
504/// accepted forms.
505pub(super) fn looks_like_url(s: &str) -> bool {
506    s.starts_with("http://")
507        || s.starts_with("https://")
508        || s.starts_with("ssh://")
509        || s.starts_with("git@")
510        || s.ends_with(".git")
511}
512
513// ---------------------------------------------------------------------------
514// v1.2.0 Stage 1.g — `sync_meta` entry point: parent-relative,
515// distributed-lockfile walker. Three phases per meta:
516//
517//   Phase 1 (siblings): `classify_dest` (1.e) per child, dispatch
518//     fetch / clone / refuse based on the verdict; aggregate
519//     `PresentUndeclared` into `TreeError::UntrackedGitRepos`.
520//   Phase 2 (orphan prune): for each `prune_candidate` (caller-supplied
521//     by 1.h once the distributed lockfile read lands), run the
522//     consent-walk via `phase2_prune` (1.f).
523//   Phase 3 (recursion): per child whose dest carries
524//     `<dest>/.grex/pack.yaml`, recursively `sync_meta` if `recurse`
525//     is true and depth < `max_depth`.
526//
527// Design discipline:
528//
529// * **No new locking primitives.** Per-pack git ops acquire the M6
530//   `PackLock` (synchronous `acquire`) for the duration of the
531//   clone/fetch. The Lean axiom `sync_disjoint_commutes` (Bridge.lean)
532//   permits any disjoint scheduler — sequential is the smallest model
533//   that satisfies the axiom. Sibling parallelism via rayon is a 1.j /
534//   1.l-territory follow-up; the scaffolding here keeps the
535//   single-threaded baseline correct first.
536// * **No lockfile mechanics.** Phase 2's orphan list is a parameter,
537//   not a read from `<meta>/.grex/grex.lock.jsonl`. 1.h owns the
538//   distributed-lockfile read/write surface; this commit only wires
539//   the consent-walk + prune dispatch.
540// * **Error aggregation.** Every Phase 1 child failure plus every
541//   Phase 2 refusal lands in `SyncMetaReport::errors` before the call
542//   returns. The walker is fail-LOUD (caller gets the full picture),
543//   not fail-fast (the legacy `Walker::walk` aborts on the first hit).
544//   This matches the v1.2.0 walker.md §"untracked git policy" rule
545//   that `UntrackedGitRepos` must enumerate every offender at once.
546// ---------------------------------------------------------------------------
547
548/// Per-meta options threaded through `sync_meta`. Keeps the call-site
549/// signature small without coupling to the full [`crate::sync::SyncOptions`]
550/// surface — the orchestrator (`sync.rs::run`) is responsible for projecting
551/// `SyncOptions` into `SyncMetaOptions` when it wires this entry point.
552#[derive(Debug, Clone)]
553pub struct SyncMetaOptions {
554    /// Global ref override (`grex sync --ref <sha|branch|tag>`). Mirrors
555    /// [`Walker::with_ref_override`]: when `Some`, every child's
556    /// declared `ref` is replaced.
557    pub ref_override: Option<String>,
558    /// When `true`, Phase 3 recurses into child metas. `false` is the
559    /// `doctor --shallow` semantics: process only the immediate
560    /// children of the supplied meta.
561    pub recurse: bool,
562    /// Bound on Phase 3 recursion depth. `None` is unbounded; `Some(n)`
563    /// caps at `n` levels of nesting (the supplied `meta_dir` is depth
564    /// 0). Recursion ALWAYS halts before depth `n+1`.
565    pub max_depth: Option<usize>,
566    /// Phase 2 prune-safety override. Mirrors
567    /// [`crate::sync::SyncOptions::force_prune`].
568    pub force_prune: bool,
569    /// Phase 2 prune-safety override. Mirrors
570    /// [`crate::sync::SyncOptions::force_prune_with_ignored`].
571    pub force_prune_with_ignored: bool,
572    /// v1.2.1 item 3 — rayon thread-pool size for sibling-parallel
573    /// Phase 1 + Phase 3. `None` ⇒ rayon's default (`num_cpus::get()`);
574    /// `Some(1)` ⇒ effectively sequential (single-threaded pool, useful
575    /// for determinism testing); `Some(n >= 2)` ⇒ bounded parallel.
576    /// `Some(0)` is clamped to `1` (rayon rejects a zero-thread pool).
577    /// Mirrors [`crate::sync::SyncOptions::parallel`] semantics with the
578    /// one exception that `0` is clamped to `1` here — the unbounded
579    /// sentinel only makes sense for tokio's `Semaphore::MAX_PERMITS`.
580    pub parallel: Option<usize>,
581    /// v1.2.1 item 5b — when `Some`, Phase 2 prunes are diverted
582    /// through the snapshot-then-unlink quarantine pipeline before
583    /// `unlink(dest)` fires. Carries the per-meta trash bucket root
584    /// and audit-log path. `None` (default) preserves the legacy
585    /// v1.2.0 direct-unlink path. Set by
586    /// [`crate::sync::SyncOptions::quarantine`] at the orchestrator
587    /// boundary; the consent layer reads this to pick the deletion
588    /// strategy. Lean theorem `quarantine_snapshot_precedes_delete`
589    /// proves the safety contract.
590    pub quarantine: Option<QuarantineConfig>,
591}
592
593impl Default for SyncMetaOptions {
594    fn default() -> Self {
595        Self {
596            ref_override: None,
597            recurse: true,
598            max_depth: None,
599            force_prune: false,
600            force_prune_with_ignored: false,
601            parallel: None,
602            quarantine: None,
603        }
604    }
605}
606
607/// Outcome of one [`sync_meta`] invocation. Aggregated across every
608/// recursion frame: a sub-meta's report is folded into its parent's
609/// report at the end of Phase 3.
610#[derive(Debug, Default)]
611pub struct SyncMetaReport {
612    /// Number of metas processed (this meta + every descendant Phase 3
613    /// recursion fired against). Useful for `--shallow` verification:
614    /// `recurse: false` means `metas_visited == 1`.
615    pub metas_visited: usize,
616    /// Per-child Phase 1 verdicts, keyed by parent-relative child path.
617    /// `(meta_dir, child_dest, classification)` — exposed primarily for
618    /// tests; downstream callers will project into a status report.
619    pub phase1_classifications: Vec<(PathBuf, PathBuf, DestClass)>,
620    /// Successful Phase 2 prunes (paths that were removed). Empty when
621    /// no orphan list was supplied or every orphan refused.
622    pub phase2_pruned: Vec<PathBuf>,
623    /// Aggregate of every error encountered across Phases 1, 2, and 3.
624    /// The walker continues past recoverable errors so the caller sees
625    /// the full picture in one pass.
626    pub errors: Vec<TreeError>,
627}
628
629impl SyncMetaReport {
630    fn merge(&mut self, mut child: SyncMetaReport) {
631        self.metas_visited += child.metas_visited;
632        self.phase1_classifications.append(&mut child.phase1_classifications);
633        self.phase2_pruned.append(&mut child.phase2_pruned);
634        self.errors.append(&mut child.errors);
635    }
636}
637
638/// Sync a meta pack and (optionally) its descendants.
639///
640/// `meta_dir` is the on-disk directory containing the meta's
641/// `.grex/pack.yaml`. `prune_candidates` is the list of orphan dests
642/// (parent-relative) the caller's distributed-lockfile reader determined
643/// no longer appear in `manifest.children`.
644///
645/// The walker is **fail-loud, not fail-fast**: recoverable errors land
646/// in [`SyncMetaReport::errors`] and the walk continues so the caller
647/// sees the full picture in one pass. The only short-circuit is a
648/// detected cycle, which surfaces as `Err(TreeError::CycleDetected)`
649/// to keep the cyclic-clone storm risk contained.
650///
651/// # Errors
652///
653/// Returns the *first* catastrophic error: manifest parse failure on
654/// the supplied `meta_dir`, a cycle in the manifest forest (URL+ref
655/// identity), or a pre-walk path-traversal violation. Per-child
656/// clone / fetch / prune failures aggregate into
657/// [`SyncMetaReport::errors`] without aborting the walk.
658pub fn sync_meta(
659    meta_dir: &Path,
660    backend: &dyn GitBackend,
661    loader: &dyn PackLoader,
662    opts: &SyncMetaOptions,
663    prune_candidates: &[PathBuf],
664) -> Result<SyncMetaReport, TreeError> {
665    // v1.2.3 (B4) — seed the ancestor chain with the root pack's
666    // path-namespaced identity (`path:<meta_dir>`) so the Lean
667    // `acyclic_path` precondition that drives
668    // `sync_meta_no_cycle_infinite_clone` is established right at
669    // the call site rather than implicitly relying on an empty
670    // initial ancestor list. Children identify with `url:<url>@<ref>` —
671    // disjoint namespace from the root's `path:` identity, so seeding
672    // does not introduce false-positive cycle hits against any
673    // legitimate child.
674    //
675    // `sync_meta_inner` extends this chain per recursion edge (Phase
676    // 3) using clone-per-child so disjoint sibling branches do not
677    // pollute each other's ancestor view.
678    let initial_ancestors = vec![pack_identity_for_root(meta_dir)];
679    sync_meta_inner(
680        meta_dir,
681        backend,
682        loader,
683        opts,
684        prune_candidates,
685        /* depth */ 0,
686        &initial_ancestors,
687    )
688}
689
690fn sync_meta_inner(
691    meta_dir: &Path,
692    backend: &dyn GitBackend,
693    loader: &dyn PackLoader,
694    opts: &SyncMetaOptions,
695    prune_candidates: &[PathBuf],
696    depth: usize,
697    ancestors: &[String],
698) -> Result<SyncMetaReport, TreeError> {
699    let manifest = loader.load(meta_dir)?;
700    // v1.2.0 Stage 1.c gate — every recursion frame re-runs the
701    // path-traversal sweep before any child is touched on disk.
702    validate_children_paths(&manifest)?;
703
704    let mut report = SyncMetaReport { metas_visited: 1, ..SyncMetaReport::default() };
705
706    // v1.2.1 item 3: build a per-call rayon pool sized from
707    // `opts.parallel`. Phase 1 + Phase 3 install on this pool; Phase 2
708    // stays sequential (single-meta orphan sweep — no sibling
709    // parallelism to extract). The pool is dropped at the end of
710    // `sync_meta_inner`, so each recursion frame builds + tears down
711    // its own pool. This is intentional: we want the worker count to
712    // refresh per call so a top-level `--parallel 1` cap is honoured
713    // without piggy-backing on a global pool that an unrelated caller
714    // might have configured differently.
715    let pool = build_pool(opts.parallel)?;
716
717    phase1_sync_children(&pool, meta_dir, &manifest, backend, opts, &mut report);
718    phase2_prune_orphans(meta_dir, prune_candidates, opts, &mut report);
719    // v1.2.2 — cycle detection short-circuits the recursion edge with
720    // an `Err` return so the caller sees `Err(CycleDetected)` directly
721    // rather than burying it in `report.errors`. Cycles are catastrophic
722    // (would otherwise clone forever); fail-loud here, NOT fold-into-report.
723    phase3_recurse(
724        &pool,
725        meta_dir,
726        &manifest,
727        backend,
728        loader,
729        opts,
730        depth,
731        ancestors,
732        &mut report,
733    )?;
734
735    Ok(report)
736}
737
738/// v1.2.1 item 3 — build a rayon `ThreadPool` sized from
739/// `opts.parallel`. Encapsulates the `None` ⇒ default,
740/// `Some(0)` ⇒ clamp-to-1, `Some(n)` ⇒ exact-N policy in one place
741/// so Phase 1 and Phase 3 install on identically-configured pools.
742///
743/// `Some(1)` produces a single-worker pool — the determinism
744/// test-mode fast-path (sibling iteration order matches sequential
745/// for-loop order on a 1-thread pool).
746///
747/// Build failures surface as [`TreeError::ManifestRead`]: a rayon
748/// pool failure is invariably a host-resource issue (out of file
749/// descriptors, thread-creation refused) — bucketing it into the
750/// generic IO-error variant keeps the error surface tight without
751/// inventing a one-off `RayonPoolBuild` discriminant. The Lean
752/// model treats pool construction as a well-formedness precondition
753/// of `sync`, not an in-band failure mode.
754fn build_pool(parallel: Option<usize>) -> Result<rayon::ThreadPool, TreeError> {
755    let mut builder = rayon::ThreadPoolBuilder::new();
756    if let Some(n) = parallel {
757        builder = builder.num_threads(n.max(1));
758    }
759    builder.build().map_err(|e| {
760        TreeError::ManifestRead(format!("failed to build rayon pool for sync_meta: {e}"))
761    })
762}
763
764/// Per-child output from Phase 1's parallel pass. Collected into a
765/// `Vec` after the rayon `par_iter` settles, then drained into the
766/// caller's `SyncMetaReport` in a single sequential pass. Carrying
767/// the data plain (no `&mut report` shared across threads) is what
768/// keeps the parallelisation sound under the Lean
769/// `sync_disjoint_commutes` axiom: each iteration's mutations are
770/// confined to its own owned struct.
771struct Phase1ChildOutcome {
772    /// `(meta_dir, dest, class)` — pushed onto
773    /// `report.phase1_classifications` regardless of dispatch outcome.
774    classification: (PathBuf, PathBuf, DestClass),
775    /// Per-child clone/fetch failure, if any. Folded into
776    /// `report.errors`.
777    error: Option<TreeError>,
778    /// `Some((dest, class))` when the child classified as
779    /// `PresentUndeclared`; the caller aggregates these into one
780    /// `UntrackedGitRepos` error after the parallel pass.
781    undeclared: Option<(PathBuf, DestClass)>,
782}
783
784/// Phase 1: classify each declared child, then dispatch. Per the v1.2.0
785/// walker.md pseudocode the per-child branches are:
786///
787/// * `Missing` → clone via `backend.clone(url, dest, ref)`.
788/// * `PresentDeclared` → fetch (+ checkout if a ref override applies).
789/// * `PresentDirty` → no-op (preserve user changes; will surface at
790///   exec/plan stage if applicable).
791/// * `PresentInProgress` → refuse via `DirtyTreeRefusal{GitInProgress}`
792///   (collected into `report.errors`).
793/// * `PresentUndeclared` → impossible at Phase 1 dispatch time because
794///   declared paths are in `manifest.children`; the variant is reserved
795///   for the lockfile-orphan sweep (Phase 2 territory).
796///
797/// v1.2.1 item 3 — sibling-parallel via rayon `par_iter`. Disjointness
798/// across siblings (each child has its own `meta_dir.join(child.path)`
799/// dest, validated by `validate_children_paths` upstream) discharges
800/// the precondition of the `sync_disjoint_commutes` axiom in
801/// `proof/Grex/Bridge.lean`. The per-pack `.grex-lock` (M6, acquired
802/// inside the GitBackend implementation) continues to serialise any
803/// cross-task contention on the same pack path. Per-thread results
804/// are collected into a `Vec<Phase1ChildOutcome>` and folded into the
805/// caller's `SyncMetaReport` in a single sequential pass, preserving
806/// deterministic ordering of `report.phase1_classifications` (rayon
807/// `collect_into_vec` preserves source-order regardless of completion
808/// order).
809fn phase1_sync_children(
810    pool: &rayon::ThreadPool,
811    meta_dir: &Path,
812    manifest: &PackManifest,
813    backend: &dyn GitBackend,
814    opts: &SyncMetaOptions,
815    report: &mut SyncMetaReport,
816) {
817    // Install on the per-call pool so `--parallel N` is honoured even
818    // when this is invoked from inside another rayon context (Phase 3
819    // recursion). `install` is a synchronous fence: the closure
820    // returns once every parallel iteration has settled.
821    let outcomes: Vec<Phase1ChildOutcome> = pool.install(|| {
822        manifest
823            .children
824            .par_iter()
825            .map(|child| phase1_handle_child(meta_dir, child, backend, opts))
826            .collect()
827    });
828
829    // Sequential fold: the parallel pass cannot mutate `report` directly
830    // (it is `&mut`), so we drain the per-child outcomes here. Order is
831    // preserved by `par_iter().collect()` — see the `phase1_par_iter_preserves_order`
832    // test below.
833    let mut undeclared_seen: Vec<(PathBuf, DestClass)> = Vec::new();
834    for outcome in outcomes {
835        report.phase1_classifications.push(outcome.classification);
836        if let Some(e) = outcome.error {
837            report.errors.push(e);
838        }
839        if let Some(pair) = outcome.undeclared {
840            undeclared_seen.push(pair);
841        }
842    }
843    if let Err(e) = aggregate_untracked(undeclared_seen) {
844        report.errors.push(e);
845    }
846}
847
848/// Per-child Phase 1 dispatch — runs inside the rayon pool. The
849/// extracted fn keeps the parallel closure body small and gives the
850/// Lean axiom a single discoverable Rust contract anchor (this fn is
851/// the per-sibling unit of work the `sync_disjoint_commutes` axiom
852/// quantifies over).
853fn phase1_handle_child(
854    meta_dir: &Path,
855    child: &ChildRef,
856    backend: &dyn GitBackend,
857    opts: &SyncMetaOptions,
858) -> Phase1ChildOutcome {
859    let dest = meta_dir.join(child.effective_path());
860    // Every declared child IS in the manifest by construction —
861    // `declared_in_manifest = true` is the only correct call here.
862    let class = classify_dest(&dest, true, None);
863    let mut out = Phase1ChildOutcome {
864        classification: (meta_dir.to_path_buf(), dest.clone(), class),
865        error: None,
866        undeclared: None,
867    };
868    match class {
869        DestClass::Missing => {
870            if let Err(e) = phase1_clone(backend, child, &dest, opts) {
871                out.error = Some(e);
872            }
873        }
874        DestClass::PresentDeclared => {
875            if let Err(e) = phase1_fetch(backend, child, &dest, opts) {
876                out.error = Some(e);
877            }
878        }
879        DestClass::PresentDirty => {
880            // Conservative: leave the dirty tree untouched. The
881            // operator has uncommitted work; v1.2.0 walker policy
882            // is to never overwrite their bytes during Phase 1.
883            // Phase 2 will surface a refusal if the operator ALSO
884            // requested a prune of this path, but that's a
885            // separate decision made by the caller's lockfile-
886            // orphan computation.
887        }
888        DestClass::PresentInProgress => {
889            out.error = Some(TreeError::DirtyTreeRefusal {
890                path: dest.clone(),
891                kind: super::error::DirtyTreeRefusalKind::GitInProgress,
892            });
893        }
894        DestClass::PresentUndeclared => {
895            // Buffer for `aggregate_untracked` so we surface the
896            // FULL list in one error.
897            out.undeclared = Some((dest, class));
898        }
899    }
900    out
901}
902
903/// Phase 1 clone helper. Acquires the M6 `PackLock` on the prospective
904/// dest's parent (`meta_dir`) for the duration of the clone — distinct
905/// children clone serially within a meta to keep the scheduler-tier
906/// model honest. Sibling parallelism is a 1.j follow-up.
907fn phase1_clone(
908    backend: &dyn GitBackend,
909    child: &ChildRef,
910    dest: &Path,
911    opts: &SyncMetaOptions,
912) -> Result<(), TreeError> {
913    let effective_ref = opts.ref_override.as_deref().or(child.r#ref.as_deref());
914    // Make sure the dest's parent exists — the clone backend assumes
915    // it. v1.2.0 invariant 1 (boundary) and 1.c's `validate_children_paths`
916    // already ruled out a path that would escape `meta_dir`, so a
917    // simple `create_dir_all` on the parent is safe here.
918    if let Some(parent) = dest.parent() {
919        std::fs::create_dir_all(parent).map_err(|e| {
920            TreeError::ManifestRead(format!("failed to mkdir parent {}: {e}", parent.display()))
921        })?;
922    }
923    backend.clone(&child.url, dest, effective_ref)?;
924    Ok(())
925}
926
927/// Phase 1 fetch helper. Same locking discipline as `phase1_clone`.
928fn phase1_fetch(
929    backend: &dyn GitBackend,
930    child: &ChildRef,
931    dest: &Path,
932    opts: &SyncMetaOptions,
933) -> Result<(), TreeError> {
934    backend.fetch(dest)?;
935    let effective_ref = opts.ref_override.as_deref().or(child.r#ref.as_deref());
936    if let Some(r) = effective_ref {
937        backend.checkout(dest, r)?;
938    }
939    Ok(())
940}
941
942/// Phase 2: prune orphan lockfile entries. Each candidate is run
943/// through the consent-walk via `phase2_prune` (1.f); a `Clean` verdict
944/// removes the dest, anything else surfaces as an error. The orphan
945/// list is supplied by the caller — 1.h owns the lockfile-read side
946/// of the walker contract.
947fn phase2_prune_orphans(
948    meta_dir: &Path,
949    prune_candidates: &[PathBuf],
950    opts: &SyncMetaOptions,
951    report: &mut SyncMetaReport,
952) {
953    // v1.2.0 Stage 1.l — postmortem audit log path. Resolved once per
954    // meta from the canonical `<meta_dir>/.grex/events.jsonl` slot;
955    // `phase2_prune` only writes to it when an override flag actually
956    // consumed a non-Clean verdict (clean prunes never log).
957    let audit_log = crate::manifest::event_log_path(meta_dir);
958    for candidate in prune_candidates {
959        // Candidates are parent-relative POSIX paths
960        // (`LockEntry::validate_path` invariant from 1.b). Resolve
961        // against `meta_dir` to get the absolute dest.
962        let dest = meta_dir.join(candidate);
963        match phase2_prune(
964            &dest,
965            opts.force_prune,
966            opts.force_prune_with_ignored,
967            Some(audit_log.as_path()),
968            opts.quarantine.as_ref(),
969        ) {
970            Ok(()) => report.phase2_pruned.push(dest),
971            Err(e) => report.errors.push(e),
972        }
973    }
974}
975
976/// Per-child output from Phase 3's parallel recursion. Each variant
977/// carries either a successful sub-`SyncMetaReport` (folded into the
978/// caller via [`SyncMetaReport::merge`]) or a fatal error to push onto
979/// `report.errors`. Children whose dest does NOT carry a sub-meta
980/// produce `Skipped`.
981///
982/// v1.2.4 — `Cancelled` is the EARLY-OUT contributed by a sibling
983/// closure that observed the per-`phase3_recurse` cancellation flag
984/// already flipped to `true`. It carries no sub-report (no work was
985/// done) and never contributes to `report.metas_visited` or
986/// `report.errors` — the cycle that triggered the flip is the sole
987/// error reported. Mirrors the Lean
988/// `cancellation_terminates_promptly` theorem: when `cancelled` is
989/// observed at entry, return ok with zero descent.
990enum Phase3ChildOutcome {
991    Skipped,
992    Recursed(SyncMetaReport),
993    Failed(TreeError),
994    Cancelled,
995}
996
997/// Phase 3: parallel recursion into child metas. A child qualifies for
998/// recursion when:
999///
1000///   1. `opts.recurse` is `true`,
1001///   2. `opts.max_depth` is unbounded OR the next-frame depth is
1002///      strictly less than the cap,
1003///   3. `<dest>/.grex/pack.yaml` exists.
1004///
1005/// Sub-meta reports are merged into the parent's report via
1006/// [`SyncMetaReport::merge`] so a top-level caller sees one rolled-up
1007/// view of every frame's classifications + errors.
1008///
1009/// v1.2.1 item 3 — sibling-parallel via rayon `par_iter`. Each
1010/// recursion frame builds its own thread pool inside `sync_meta_inner`
1011/// (work-stealing across recursion levels happens naturally because
1012/// the inner `pool.install` blocks for the lifetime of the inner
1013/// sync_meta call; sibling sub-metas at level N execute in parallel
1014/// via the level-N pool, and each level-N child carries its own
1015/// level-(N+1) pool for its own grandchildren). Sub-reports are
1016/// collected source-ordered via `collect_into_vec`, then folded into
1017/// `report` sequentially to preserve deterministic ordering of the
1018/// `phase1_classifications` / `phase2_pruned` / `errors` vectors.
1019// Pre-rayon refactor this fn already carried 7 args (the clippy cap).
1020// v1.2.1 item 3 added the `pool` reference, taking it to 8. Bundling
1021// these into a context struct is technically possible but every other
1022// arg already comes from `sync_meta_inner`'s param list, so the struct
1023// would just shuffle the wiring without removing it. Localised allow
1024// instead — the call-site is private to this module and threads
1025// ownership of `pool` cleanly.
1026/// Per-child Phase 3 dispatch — runs inside the rayon pool. Mirrors
1027/// the `phase1_handle_child` / `sync_disjoint_commutes` discipline
1028/// (one discoverable Rust contract anchor per sibling unit of work)
1029/// and keeps `phase3_recurse` itself under the clippy line cap.
1030///
1031/// v1.2.2 — cycle detection lives here. `ancestors` is the in-progress
1032/// ancestor identity chain from root down to (but excluding) this
1033/// child. If the child's identity (`pack_identity_for_child`) is
1034/// already in the chain we surface `TreeError::CycleDetected` with
1035/// the chain extended by the recurring identity. Otherwise the
1036/// child's identity is appended (clone-per-child, A.1) so disjoint
1037/// sibling branches do not pollute each other's view.
1038///
1039/// v1.2.3 (B1) — the depth-cap check (`next_depth > opts.max_depth`)
1040/// MUST run AFTER the cycle check. Otherwise a cyclic manifest whose
1041/// cycle length exceeds `max_depth` would silently truncate without
1042/// surfacing `CycleDetected`: the depth cap is a "stop walking
1043/// further" knob, not a "ignore correctness invariants" knob.
1044///
1045/// v1.2.4 (A1) — cancellation flag plumbed through. The closure
1046/// observes `cancelled.load(Relaxed)` at entry: if a prior sibling
1047/// already detected a cycle and signalled, this closure returns
1048/// `Phase3ChildOutcome::Cancelled` with zero further work (no
1049/// classify, no clone, no recursion). When this closure itself
1050/// detects a cycle, it stores `true` into the flag BEFORE returning
1051/// `Phase3ChildOutcome::Failed(CycleDetected)` so any rayon-co-iterated
1052/// sibling not yet started observes the signal on its next entry. The
1053/// flag is per-`phase3_recurse`-call: recursive sub-`sync_meta_inner`
1054/// invocations build their own flag inside their own
1055/// `phase3_recurse`, so a cycle two levels down does not cancel
1056/// disjoint siblings at level one. Discharges the Lean
1057/// `cancellation_terminates_promptly` obligation.
1058#[allow(clippy::too_many_arguments)]
1059fn phase3_handle_child(
1060    meta_dir: &Path,
1061    child: &ChildRef,
1062    backend: &dyn GitBackend,
1063    loader: &dyn PackLoader,
1064    opts: &SyncMetaOptions,
1065    next_depth: usize,
1066    ancestors: &[String],
1067    cancelled: &AtomicBool,
1068) -> Phase3ChildOutcome {
1069    // v1.2.4 EARLY-OUT — a sibling closure already detected a cycle
1070    // and signalled. Return immediately with zero descent so partial
1071    // clones / deep walks never start. Matches the Lean
1072    // `cancellation_terminates_promptly` theorem: cancelled = true
1073    // implies ok with zero recursive steps.
1074    if cancelled.load(Ordering::Relaxed) {
1075        return Phase3ChildOutcome::Cancelled;
1076    }
1077    let dest = meta_dir.join(child.effective_path());
1078    if !dest.join(".grex").join("pack.yaml").is_file() {
1079        return Phase3ChildOutcome::Skipped;
1080    }
1081    // v1.2.2 cycle detection — discharges the
1082    // `sync_meta_no_cycle_infinite_clone` Lean theorem in
1083    // `proof/Grex/Walker.lean`. Identity is `url@ref` so the same
1084    // repo at two different refs is two distinct packs (intentional:
1085    // matches `pack_identity_for_child` and the build_graph cycle
1086    // detector at `graph_build.rs:174`). A single `Vec<String>`
1087    // doubles as O(depth) contains-check AND deterministic chain for
1088    // error display — depth is bounded ~5-10 in practice so linear
1089    // scan beats hashing here.
1090    //
1091    // v1.2.3 (B1): runs BEFORE the depth-cap early-return below so a
1092    // cycle longer than `max_depth` cannot hide behind truncation.
1093    let id = pack_identity_for_child(child);
1094    if ancestors.iter().any(|v| v == &id) {
1095        // v1.2.4 SIGNAL — flip the cancellation flag so any
1096        // co-iterated sibling closures observe it on their next
1097        // entry. `Relaxed` is sufficient: we need eventual visibility,
1098        // not strict happens-before ordering against any other memory
1099        // operation. See design doc §"Atomic ordering".
1100        cancelled.store(true, Ordering::Relaxed);
1101        let mut chain = ancestors.to_vec();
1102        chain.push(id);
1103        return Phase3ChildOutcome::Failed(TreeError::CycleDetected { chain });
1104    }
1105    // v1.2.3 (B1): depth-cap check moved from `phase3_recurse` to
1106    // here, AFTER the cycle check. `Skipped` rather than a hard error
1107    // because depth-cap truncation is a benign best-effort knob —
1108    // siblings further down the manifest tree should still classify.
1109    if let Some(cap) = opts.max_depth {
1110        if next_depth > cap {
1111            return Phase3ChildOutcome::Skipped;
1112        }
1113    }
1114    // Clone-per-child (A.1): each rayon iteration owns its own
1115    // ancestor view, so disjoint sibling branches do not see each
1116    // other on the path. A diamond where two siblings legitimately
1117    // depend on the same descendant is therefore not a cycle.
1118    let mut child_ancestors = ancestors.to_vec();
1119    child_ancestors.push(id);
1120    // Empty `prune_candidates` for the sub-meta — 1.h supplies the
1121    // sub-meta's distributed lockfile read via the same caller
1122    // pathway when it lands.
1123    match sync_meta_inner(&dest, backend, loader, opts, &[], next_depth, &child_ancestors) {
1124        Ok(sub) => Phase3ChildOutcome::Recursed(sub),
1125        Err(e) => Phase3ChildOutcome::Failed(e),
1126    }
1127}
1128
1129#[allow(clippy::too_many_arguments)]
1130fn phase3_recurse(
1131    pool: &rayon::ThreadPool,
1132    meta_dir: &Path,
1133    manifest: &PackManifest,
1134    backend: &dyn GitBackend,
1135    loader: &dyn PackLoader,
1136    opts: &SyncMetaOptions,
1137    depth: usize,
1138    ancestors: &[String],
1139    report: &mut SyncMetaReport,
1140) -> Result<(), TreeError> {
1141    if !opts.recurse {
1142        return Ok(());
1143    }
1144    let next_depth = depth + 1;
1145    // v1.2.3 (B1): depth-cap early-return removed from this site —
1146    // moved into `phase3_handle_child` AFTER the cycle check so a
1147    // cycle longer than `max_depth` cannot mask itself by tripping
1148    // the depth cap before the cycle test fires. The per-child
1149    // handler now treats `next_depth > cap` as `Skipped`.
1150    //
1151    // v1.2.4 (A1): per-call cancellation flag. One `Arc<AtomicBool>`
1152    // is constructed here and shared (read-only across closures, with
1153    // a single point-of-truth `store` in any closure that detects a
1154    // cycle) by every sibling iteration of this `par_iter`. Recursive
1155    // sub-`sync_meta_inner` calls build their own flag via their own
1156    // `phase3_recurse` — disjoint subtrees do not cross-cancel. The
1157    // flag is dropped when this fn returns, so its lifetime is exactly
1158    // the rayon parallel pass it scopes.
1159    //
1160    // Scope: cancels siblings within THIS Phase 3 fan-out call only;
1161    // recursive sub-fan-outs construct their own flag, so a cycle deep
1162    // in pack X does not cancel pack Y at root level. This isolation
1163    // is intentional and tested by
1164    // `cancellation_per_call_scope_isolates_subtrees`.
1165    let cancelled = Arc::new(AtomicBool::new(false));
1166    let outcomes: Vec<Phase3ChildOutcome> = pool.install(|| {
1167        manifest
1168            .children
1169            .par_iter()
1170            .map(|child| {
1171                phase3_handle_child(
1172                    meta_dir, child, backend, loader, opts, next_depth, ancestors, &cancelled,
1173                )
1174            })
1175            .collect()
1176    });
1177    // Cycle errors short-circuit (catastrophic — clone-storm risk);
1178    // every other outcome folds into the report per the existing
1179    // fail-loud-but-continue policy. v1.2.4: `Cancelled` outcomes are
1180    // skipped — they carry no sub-report and contribute neither to
1181    // `report.metas_visited` nor to `report.errors`. The cycle that
1182    // triggered the cancellation is the sole error reported.
1183    let mut first_cycle_idx: Option<usize> = None;
1184    for outcome in outcomes {
1185        match outcome {
1186            Phase3ChildOutcome::Skipped | Phase3ChildOutcome::Cancelled => {}
1187            Phase3ChildOutcome::Recursed(sub) => report.merge(sub),
1188            Phase3ChildOutcome::Failed(e) => {
1189                // v1.2.2 fix: surface all sibling cycles in
1190                // report.errors; first cycle returned as short-circuit
1191                // Err per fail-loud policy.
1192                if matches!(e, TreeError::CycleDetected { .. }) && first_cycle_idx.is_none() {
1193                    first_cycle_idx = Some(report.errors.len());
1194                }
1195                report.errors.push(e);
1196            }
1197        }
1198    }
1199    if let Some(idx) = first_cycle_idx {
1200        // Clone the cycle to return as the short-circuit Err while
1201        // leaving the original entry (and any sibling cycles) recorded
1202        // in report.errors for the caller to log/print.
1203        let TreeError::CycleDetected { chain } = &report.errors[idx] else {
1204            unreachable!("first_cycle_idx points at a CycleDetected variant by construction");
1205        };
1206        return Err(TreeError::CycleDetected { chain: chain.clone() });
1207    }
1208    Ok(())
1209}
1210
1211#[cfg(test)]
1212mod tests {
1213    use super::*;
1214
1215    /// Direct unit test of the synthesis helper — name must equal the
1216    /// child's `effective_path()`, type must be `Scripted`, and every
1217    /// list field must be empty.
1218    #[test]
1219    fn synthesize_plain_git_manifest_yields_leaf_scripted_pack() {
1220        let child = ChildRef {
1221            url: "https://example.com/algo-leet.git".to_string(),
1222            path: None,
1223            r#ref: None,
1224        };
1225        let manifest = synthesize_plain_git_manifest(&child);
1226        assert_eq!(manifest.name, child.effective_path());
1227        assert_eq!(manifest.name, "algo-leet");
1228        assert_eq!(manifest.r#type, PackType::Scripted);
1229        assert_eq!(manifest.schema_version.as_str(), "1");
1230        assert!(manifest.depends_on.is_empty());
1231        assert!(manifest.children.is_empty());
1232        assert!(manifest.actions.is_empty());
1233        assert!(manifest.teardown.is_none());
1234        assert!(manifest.extensions.is_empty());
1235        assert!(manifest.version.is_none());
1236    }
1237
1238    /// Explicit `path:` override wins over the URL-derived bare name —
1239    /// confirms the synthesised manifest's `name` mirrors what the
1240    /// parent declared, so `verify_child_name` passes by construction.
1241    #[test]
1242    fn synthesize_plain_git_manifest_honours_explicit_path() {
1243        let child = ChildRef {
1244            url: "https://example.com/some-repo.git".to_string(),
1245            path: Some("custom-name".to_string()),
1246            r#ref: None,
1247        };
1248        let manifest = synthesize_plain_git_manifest(&child);
1249        assert_eq!(manifest.name, "custom-name");
1250    }
1251
1252    /// `dest_has_git_repo` MUST refuse a symlinked destination — even
1253    /// when the symlink target carries a real `.git/` directory.
1254    /// Otherwise a malicious parent pack could redirect synthesis to
1255    /// fetch into `$HOME` (or any sibling repo) by relying on a
1256    /// pre-existing symlink in the workspace.
1257    #[test]
1258    fn dest_has_git_repo_rejects_symlinked_dest() {
1259        // Skip on platforms where unprivileged symlink creation fails
1260        // (notably Windows without Developer Mode). Failing the symlink
1261        // call is itself proof the attack vector is closed for that
1262        // host, so the rest of the test is moot.
1263        let outer = tempfile::tempdir().unwrap();
1264        let real = outer.path().join("real-repo");
1265        std::fs::create_dir_all(real.join(".git")).unwrap();
1266        let link = outer.path().join("via-link");
1267
1268        #[cfg(unix)]
1269        let symlink_result = std::os::unix::fs::symlink(&real, &link);
1270        #[cfg(windows)]
1271        let symlink_result = std::os::windows::fs::symlink_dir(&real, &link);
1272
1273        if symlink_result.is_err() {
1274            // Host won't let us create a symlink — nothing to test.
1275            return;
1276        }
1277
1278        // Sanity: following the symlink would reveal `.git`.
1279        assert!(link.join(".git").exists(), "symlink target should expose .git through traversal");
1280        // But `dest_has_git_repo` must refuse it.
1281        assert!(
1282            !dest_has_git_repo(&link),
1283            "dest_has_git_repo must refuse a symlinked destination even when target has .git"
1284        );
1285        // Real (non-symlinked) sibling still passes — we haven't
1286        // accidentally broken the happy path.
1287        assert!(dest_has_git_repo(&real));
1288    }
1289
1290    // -----------------------------------------------------------------
1291    // v1.2.0 Stage 1.g — `sync_meta` three-phase walker tests (TDD).
1292    //
1293    // These tests use a thin in-memory `MockLoader` plus
1294    // `MockGitBackend` so the walker's PHASE ORCHESTRATION (not the
1295    // backend mechanics) is what's being exercised. The git-touching
1296    // primitives `classify_dest` (1.e) and `phase2_prune` (1.f) have
1297    // their own per-host tests that already cover the real-FS-and-git
1298    // path. The `host_has_git_binary` gate guards the few tests that
1299    // need a working `git` to materialise a clean `PresentDeclared`
1300    // verdict — same precedent as the `dest_class::tests` host-skip
1301    // pattern.
1302    // -----------------------------------------------------------------
1303
1304    use std::collections::HashMap;
1305    use std::sync::Mutex;
1306
1307    /// Minimal stand-in `PackLoader` for the v1.2.0 tests. Maps
1308    /// `meta_dir` → `PackManifest` directly so we never touch disk
1309    /// for manifest reads.
1310    struct InMemLoader {
1311        manifests: HashMap<PathBuf, PackManifest>,
1312    }
1313
1314    impl InMemLoader {
1315        fn new() -> Self {
1316            Self { manifests: HashMap::new() }
1317        }
1318        fn with(mut self, dir: impl Into<PathBuf>, m: PackManifest) -> Self {
1319            self.manifests.insert(dir.into(), m);
1320            self
1321        }
1322    }
1323
1324    impl PackLoader for InMemLoader {
1325        fn load(&self, path: &Path) -> Result<PackManifest, TreeError> {
1326            self.manifests
1327                .get(path)
1328                .cloned()
1329                .ok_or_else(|| TreeError::ManifestNotFound(path.to_path_buf()))
1330        }
1331    }
1332
1333    /// Minimal stand-in `GitBackend`. Records every call so tests can
1334    /// assert phase orchestration. `clone` materialises a `.git/`
1335    /// under the supplied dest so subsequent classify probes treat the
1336    /// slot as Present.
1337    #[allow(dead_code)] // fields populated for future test introspection.
1338    #[derive(Debug, Clone)]
1339    enum BackendCall {
1340        Clone { url: String, dest: PathBuf, r#ref: Option<String> },
1341        Fetch { dest: PathBuf },
1342        Checkout { dest: PathBuf, r#ref: String },
1343        HeadSha { dest: PathBuf },
1344    }
1345
1346    struct InMemGit {
1347        calls: Mutex<Vec<BackendCall>>,
1348        materialise_on_clone: bool,
1349    }
1350
1351    impl InMemGit {
1352        fn new() -> Self {
1353            Self { calls: Mutex::new(Vec::new()), materialise_on_clone: true }
1354        }
1355        fn calls(&self) -> Vec<BackendCall> {
1356            self.calls.lock().unwrap().clone()
1357        }
1358    }
1359
1360    impl GitBackend for InMemGit {
1361        fn name(&self) -> &'static str {
1362            "v1_2_0-mock-git"
1363        }
1364        fn clone(
1365            &self,
1366            url: &str,
1367            dest: &Path,
1368            r#ref: Option<&str>,
1369        ) -> Result<crate::ClonedRepo, crate::GitError> {
1370            self.calls.lock().unwrap().push(BackendCall::Clone {
1371                url: url.to_string(),
1372                dest: dest.to_path_buf(),
1373                r#ref: r#ref.map(str::to_string),
1374            });
1375            if self.materialise_on_clone {
1376                std::fs::create_dir_all(dest.join(".git")).unwrap();
1377            }
1378            Ok(crate::ClonedRepo { path: dest.to_path_buf(), head_sha: "0".repeat(40) })
1379        }
1380        fn fetch(&self, dest: &Path) -> Result<(), crate::GitError> {
1381            self.calls.lock().unwrap().push(BackendCall::Fetch { dest: dest.to_path_buf() });
1382            Ok(())
1383        }
1384        fn checkout(&self, dest: &Path, r#ref: &str) -> Result<(), crate::GitError> {
1385            self.calls
1386                .lock()
1387                .unwrap()
1388                .push(BackendCall::Checkout { dest: dest.to_path_buf(), r#ref: r#ref.to_string() });
1389            Ok(())
1390        }
1391        fn head_sha(&self, dest: &Path) -> Result<String, crate::GitError> {
1392            self.calls.lock().unwrap().push(BackendCall::HeadSha { dest: dest.to_path_buf() });
1393            Ok("0".repeat(40))
1394        }
1395    }
1396
1397    /// Build a meta manifest with the supplied children.
1398    fn meta_manifest_with(name: &str, children: Vec<ChildRef>) -> PackManifest {
1399        PackManifest {
1400            schema_version: SchemaVersion::current(),
1401            name: name.to_string(),
1402            r#type: PackType::Meta,
1403            version: None,
1404            depends_on: Vec::new(),
1405            children,
1406            actions: Vec::new(),
1407            teardown: None,
1408            extensions: BTreeMap::new(),
1409        }
1410    }
1411
1412    fn child(url: &str, path: &str) -> ChildRef {
1413        ChildRef { url: url.to_string(), path: Some(path.to_string()), r#ref: None }
1414    }
1415
1416    fn host_has_git_binary() -> bool {
1417        std::process::Command::new("git")
1418            .arg("--version")
1419            .output()
1420            .is_ok_and(|o| o.status.success())
1421    }
1422
1423    /// Empty meta — no children → the walker returns Ok with no work.
1424    #[test]
1425    fn test_walker_v1_2_0_simple_meta_no_children() {
1426        let tmp = tempfile::tempdir().unwrap();
1427        let meta_dir = tmp.path().to_path_buf();
1428        let loader = InMemLoader::new().with(meta_dir.clone(), meta_manifest_with("solo", vec![]));
1429        let backend = InMemGit::new();
1430        let opts = SyncMetaOptions::default();
1431        let report = sync_meta(&meta_dir, &backend, &loader, &opts, &[]).expect("ok");
1432        assert_eq!(report.metas_visited, 1);
1433        assert!(report.phase1_classifications.is_empty());
1434        assert!(report.phase2_pruned.is_empty());
1435        assert!(report.errors.is_empty());
1436        assert!(backend.calls().is_empty(), "no children → no git ops");
1437    }
1438
1439    /// Phase 1 classifies each child. With every dest absent on disk,
1440    /// every classification is `Missing` and the backend sees one
1441    /// `Clone` per child.
1442    #[test]
1443    fn test_walker_v1_2_0_phase1_classifies_each_child() {
1444        let tmp = tempfile::tempdir().unwrap();
1445        let meta_dir = tmp.path().to_path_buf();
1446        let kids = vec![
1447            child("https://example.com/a.git", "alpha"),
1448            child("https://example.com/b.git", "beta"),
1449        ];
1450        let loader =
1451            InMemLoader::new().with(meta_dir.clone(), meta_manifest_with("root", kids.clone()));
1452        let backend = InMemGit::new();
1453        let opts = SyncMetaOptions { recurse: false, ..SyncMetaOptions::default() };
1454        let report = sync_meta(&meta_dir, &backend, &loader, &opts, &[]).expect("ok");
1455        assert_eq!(report.phase1_classifications.len(), 2);
1456        for (parent, _, class) in &report.phase1_classifications {
1457            assert_eq!(parent, &meta_dir);
1458            assert_eq!(*class, DestClass::Missing);
1459        }
1460        assert!(report.errors.is_empty());
1461        let calls = backend.calls();
1462        assert_eq!(calls.len(), 2, "one clone per child");
1463        for call in calls {
1464            assert!(matches!(call, BackendCall::Clone { .. }));
1465        }
1466    }
1467
1468    /// Phase 1 must aggregate every undeclared `.git/` directory it
1469    /// encounters into a single `UntrackedGitRepos` error. We
1470    /// pre-create two `.git/` slots BEFORE running `sync_meta` and
1471    /// declare them as siblings without paths matching — they classify
1472    /// as `PresentUndeclared` because the manifest does not list them.
1473    #[test]
1474    fn test_walker_v1_2_0_phase1_aggregates_untracked_error() {
1475        // Build a meta whose manifest declares ZERO children — every
1476        // pre-existing `.git/` slot is by definition undeclared.
1477        // Then drop two `.git/` directories under the meta dir and
1478        // (because v1.2.0's classifier needs the manifest declaration
1479        // signal at the call site, not on-disk discovery) run a
1480        // PARALLEL classifier sweep over the on-disk dirs to feed the
1481        // aggregator. This mirrors the way 1.h's lockfile-orphan
1482        // sweep will surface PresentUndeclared dirs into Phase 1's
1483        // collector when a child is removed from the manifest.
1484        let tmp = tempfile::tempdir().unwrap();
1485        let alpha = tmp.path().join("alpha");
1486        let beta = tmp.path().join("beta");
1487        std::fs::create_dir_all(alpha.join(".git")).unwrap();
1488        std::fs::create_dir_all(beta.join(".git")).unwrap();
1489        // Direct unit on the aggregator: feed two `PresentUndeclared`
1490        // pairs and assert the error carries both.
1491        let pairs: Vec<(PathBuf, DestClass)> = vec![
1492            (alpha.clone(), DestClass::PresentUndeclared),
1493            (beta.clone(), DestClass::PresentUndeclared),
1494        ];
1495        let err = aggregate_untracked(pairs).expect_err("two undeclared → error");
1496        match err {
1497            TreeError::UntrackedGitRepos { paths } => {
1498                assert_eq!(paths, vec![alpha, beta]);
1499            }
1500            other => panic!("expected UntrackedGitRepos, got {other:?}"),
1501        }
1502    }
1503
1504    /// Phase 2 prunes a clean orphan: the supplied candidate has a
1505    /// real `.git/` (initialised by `git init`), the consent walk
1506    /// returns Clean, the dest is removed.
1507    #[test]
1508    fn test_walker_v1_2_0_phase2_prunes_clean_orphans() {
1509        if !host_has_git_binary() {
1510            return;
1511        }
1512        let tmp = tempfile::tempdir().unwrap();
1513        let meta_dir = tmp.path().to_path_buf();
1514        // Create the orphan dest — clean repo, no manifest entry.
1515        let orphan = meta_dir.join("ghost");
1516        std::fs::create_dir_all(&orphan).unwrap();
1517        let init =
1518            std::process::Command::new("git").arg("-C").arg(&orphan).args(["init", "-q"]).status();
1519        if !matches!(init, Ok(s) if s.success()) {
1520            return;
1521        }
1522        let loader = InMemLoader::new().with(meta_dir.clone(), meta_manifest_with("root", vec![]));
1523        let backend = InMemGit::new();
1524        let opts = SyncMetaOptions { recurse: false, ..SyncMetaOptions::default() };
1525        let prune_list = vec![PathBuf::from("ghost")];
1526        let report = sync_meta(&meta_dir, &backend, &loader, &opts, &prune_list).expect("ok");
1527        assert_eq!(report.phase2_pruned.len(), 1, "clean orphan must be pruned");
1528        assert_eq!(report.phase2_pruned[0], orphan);
1529        assert!(!orphan.exists(), "dest must be removed after a clean prune");
1530        assert!(report.errors.is_empty());
1531    }
1532
1533    /// Phase 2 must REFUSE to prune a dirty orphan absent the override
1534    /// flag. The consent walk classifies it `DirtyTree`; the walker
1535    /// surfaces `DirtyTreeRefusal` and leaves the dest untouched.
1536    #[test]
1537    fn test_walker_v1_2_0_phase2_refuses_dirty_orphan() {
1538        if !host_has_git_binary() {
1539            return;
1540        }
1541        let tmp = tempfile::tempdir().unwrap();
1542        let meta_dir = tmp.path().to_path_buf();
1543        let orphan = meta_dir.join("dirty-ghost");
1544        std::fs::create_dir_all(&orphan).unwrap();
1545        let init =
1546            std::process::Command::new("git").arg("-C").arg(&orphan).args(["init", "-q"]).status();
1547        if !matches!(init, Ok(s) if s.success()) {
1548            return;
1549        }
1550        std::fs::write(orphan.join("scratch.txt"), b"unsaved").unwrap();
1551        let loader = InMemLoader::new().with(meta_dir.clone(), meta_manifest_with("root", vec![]));
1552        let backend = InMemGit::new();
1553        let opts = SyncMetaOptions { recurse: false, ..SyncMetaOptions::default() };
1554        let prune_list = vec![PathBuf::from("dirty-ghost")];
1555        let report = sync_meta(&meta_dir, &backend, &loader, &opts, &prune_list).expect("ok");
1556        assert!(report.phase2_pruned.is_empty(), "dirty orphan must NOT be pruned");
1557        assert!(orphan.exists(), "dest stays on disk when refused");
1558        assert_eq!(report.errors.len(), 1);
1559        assert!(matches!(report.errors[0], TreeError::DirtyTreeRefusal { .. }));
1560    }
1561
1562    /// Phase 3 recurses into a child meta when its `.grex/pack.yaml`
1563    /// exists. The sub-meta's own `metas_visited` is folded into the
1564    /// parent's report.
1565    #[test]
1566    fn test_walker_v1_2_0_phase3_recurses_into_sub_meta() {
1567        let tmp = tempfile::tempdir().unwrap();
1568        let meta_dir = tmp.path().to_path_buf();
1569        let child_dest = meta_dir.join("sub");
1570        // Pre-materialise the sub-meta on disk so Phase 1 classifies
1571        // the dest as PresentDeclared (no clone fired) and Phase 3
1572        // sees a `.grex/pack.yaml` to recurse into.
1573        make_sub_meta_on_disk(&child_dest, "sub");
1574        let loader = InMemLoader::new()
1575            .with(
1576                meta_dir.clone(),
1577                meta_manifest_with("root", vec![child("https://example.com/sub.git", "sub")]),
1578            )
1579            .with(child_dest.clone(), meta_manifest_with("sub", vec![]));
1580        let backend = InMemGit::new();
1581        let opts = SyncMetaOptions::default();
1582        let report = sync_meta(&meta_dir, &backend, &loader, &opts, &[]).expect("ok");
1583        assert_eq!(report.metas_visited, 2, "parent + sub-meta visited");
1584        assert!(report.errors.is_empty());
1585    }
1586
1587    /// `recurse: false` skips Phase 3 entirely — `metas_visited == 1`
1588    /// even when a child has a `.grex/pack.yaml`.
1589    #[test]
1590    fn test_walker_v1_2_0_phase3_max_depth_zero_skips_recursion() {
1591        let tmp = tempfile::tempdir().unwrap();
1592        let meta_dir = tmp.path().to_path_buf();
1593        let child_dest = meta_dir.join("sub");
1594        make_sub_meta_on_disk(&child_dest, "sub");
1595        let loader = InMemLoader::new()
1596            .with(
1597                meta_dir.clone(),
1598                meta_manifest_with("root", vec![child("https://example.com/sub.git", "sub")]),
1599            )
1600            .with(child_dest.clone(), meta_manifest_with("sub", vec![]));
1601        let backend = InMemGit::new();
1602        let opts = SyncMetaOptions { recurse: false, ..SyncMetaOptions::default() };
1603        let report = sync_meta(&meta_dir, &backend, &loader, &opts, &[]).expect("ok");
1604        assert_eq!(report.metas_visited, 1, "no recursion → only the root meta");
1605    }
1606
1607    /// `max_depth: Some(N)` caps recursion at N levels of nesting.
1608    /// Build a 3-level chain (root → mid → leaf) and assert
1609    /// `max_depth: Some(1)` visits root + mid (depth 0 + 1) but NOT
1610    /// leaf (depth 2).
1611    #[test]
1612    fn test_walker_v1_2_0_phase3_max_depth_n_stops_at_n_levels() {
1613        let tmp = tempfile::tempdir().unwrap();
1614        let root_dir = tmp.path().to_path_buf();
1615        let mid_dir = root_dir.join("mid");
1616        let leaf_dir = mid_dir.join("leaf");
1617        make_sub_meta_on_disk(&mid_dir, "mid");
1618        make_sub_meta_on_disk(&leaf_dir, "leaf");
1619        let loader = InMemLoader::new()
1620            .with(
1621                root_dir.clone(),
1622                meta_manifest_with("root", vec![child("https://example.com/mid.git", "mid")]),
1623            )
1624            .with(
1625                mid_dir.clone(),
1626                meta_manifest_with("mid", vec![child("https://example.com/leaf.git", "leaf")]),
1627            )
1628            .with(leaf_dir.clone(), meta_manifest_with("leaf", vec![]));
1629        let backend = InMemGit::new();
1630        let opts = SyncMetaOptions { max_depth: Some(1), ..SyncMetaOptions::default() };
1631        let report = sync_meta(&root_dir, &backend, &loader, &opts, &[]).expect("ok");
1632        // depth 0 = root, depth 1 = mid → max_depth: Some(1) visits
1633        // root + mid (2 metas) and stops before recursing into leaf.
1634        assert_eq!(report.metas_visited, 2, "max_depth: Some(1) visits root + mid only");
1635    }
1636
1637    /// Helper: pre-populate a sub-meta directory at `dir` with a
1638    /// `.grex/pack.yaml` carrying `name` and a stub `.git/` so the
1639    /// classifier sees it as PresentDeclared.
1640    fn make_sub_meta_on_disk(dir: &Path, name: &str) {
1641        std::fs::create_dir_all(dir.join(".grex")).unwrap();
1642        std::fs::create_dir_all(dir.join(".git")).unwrap();
1643        let yaml = format!("schema_version: \"1\"\nname: {name}\ntype: meta\n");
1644        std::fs::write(dir.join(".grex/pack.yaml"), yaml).unwrap();
1645    }
1646
1647    /// Helper: collect the destinations Phase 1 recorded for a given
1648    /// parent meta from the rolled-up report.
1649    fn destinations_under(report: &SyncMetaReport, parent: &Path) -> Vec<PathBuf> {
1650        report
1651            .phase1_classifications
1652            .iter()
1653            .filter(|(p, _, _)| p == parent)
1654            .map(|(_, d, _)| d.clone())
1655            .collect()
1656    }
1657
1658    /// Parent-relative path resolution: a child declared at the root
1659    /// meta resolves to `<root>/<child>` — NOT to a global workspace
1660    /// anchor. Recursion into that child uses `<root>/<child>` as the
1661    /// new parent meta dir for resolving the grandchild.
1662    #[test]
1663    fn test_walker_v1_2_0_parent_relative_path_resolution() {
1664        let tmp = tempfile::tempdir().unwrap();
1665        let root_dir = tmp.path().to_path_buf();
1666        // Note: 1.c's path-segment validator forbids slashes in the
1667        // `path:` field, so multi-segment nesting is achieved by
1668        // chaining single-segment children across recursion frames.
1669        let tools_dir = root_dir.join("tools");
1670        let foo_dir = tools_dir.join("foo");
1671        make_sub_meta_on_disk(&tools_dir, "tools");
1672        make_sub_meta_on_disk(&foo_dir, "foo");
1673        let loader = InMemLoader::new()
1674            .with(
1675                root_dir.clone(),
1676                meta_manifest_with("root", vec![child("https://example.com/tools.git", "tools")]),
1677            )
1678            .with(
1679                tools_dir.clone(),
1680                meta_manifest_with("tools", vec![child("https://example.com/foo.git", "foo")]),
1681            )
1682            .with(foo_dir.clone(), meta_manifest_with("foo", vec![]));
1683        let backend = InMemGit::new();
1684        let opts = SyncMetaOptions::default();
1685        let report = sync_meta(&root_dir, &backend, &loader, &opts, &[]).expect("ok");
1686        // Three metas visited: root → tools → foo.
1687        assert_eq!(report.metas_visited, 3);
1688        // Phase 1 classifications confirm parent-relative resolution:
1689        // every recorded dest is a SUBDIR of its recorded parent.
1690        for (parent, dest, _class) in &report.phase1_classifications {
1691            assert!(
1692                dest.starts_with(parent),
1693                "child dest {} must descend from parent {}",
1694                dest.display(),
1695                parent.display()
1696            );
1697        }
1698        // Spot-check the chain: root sees `tools`, tools sees `foo`.
1699        assert_eq!(destinations_under(&report, &root_dir), vec![tools_dir.clone()]);
1700        assert_eq!(destinations_under(&report, &tools_dir), vec![foo_dir.clone()]);
1701    }
1702
1703    // -----------------------------------------------------------------
1704    // v1.2.2 — `sync_meta` cycle detection (Phase 3 recursion edge).
1705    //
1706    // Discharges `sync_meta_no_cycle_infinite_clone` in
1707    // `proof/Grex/Walker.lean`. Identity scheme is `url@ref` so the
1708    // same repo at two different refs is NOT a cycle (covered by the
1709    // positive case below).
1710    // -----------------------------------------------------------------
1711
1712    /// `child_with_ref` mirrors `child()` but lets the caller pin a
1713    /// specific ref so two children of the same URL get distinct
1714    /// `pack_identity_for_child` strings (`url@ref`).
1715    fn child_with_ref(url: &str, path: &str, r#ref: &str) -> ChildRef {
1716        ChildRef {
1717            url: url.to_string(),
1718            path: Some(path.to_string()),
1719            r#ref: Some(r#ref.to_string()),
1720        }
1721    }
1722
1723    /// Self-loop: pack A declares itself (same URL, no ref) as a child.
1724    /// The walker must abort with `CycleDetected` rather than recurse
1725    /// infinitely. The chain reports the recurring identity.
1726    #[test]
1727    fn cycle_self_loop_aborts() {
1728        let tmp = tempfile::tempdir().unwrap();
1729        let root_dir = tmp.path().to_path_buf();
1730        // Lay out a self-pointing pack: `<root>/a` is a sub-meta whose
1731        // own manifest declares a child with the SAME URL/ref pointing
1732        // back at itself (placed at a fresh path so on-disk dest is
1733        // distinct, but pack identity collides).
1734        let a_dir = root_dir.join("a");
1735        let a_self_dir = a_dir.join("a");
1736        make_sub_meta_on_disk(&a_dir, "a");
1737        make_sub_meta_on_disk(&a_self_dir, "a");
1738        let url_a = "https://example.com/a.git";
1739        let loader = InMemLoader::new()
1740            .with(root_dir.clone(), meta_manifest_with("root", vec![child(url_a, "a")]))
1741            // `a` declares itself — same url, same (empty) ref → same identity.
1742            .with(a_dir.clone(), meta_manifest_with("a", vec![child(url_a, "a")]))
1743            .with(a_self_dir.clone(), meta_manifest_with("a", vec![]));
1744        let backend = InMemGit::new();
1745        let opts = SyncMetaOptions::default();
1746        let err =
1747            sync_meta(&root_dir, &backend, &loader, &opts, &[]).expect_err("self-loop must abort");
1748        match err {
1749            TreeError::CycleDetected { chain } => {
1750                // v1.2.3 (B4): chain begins with the root's
1751                // path-namespaced identity (`path:<root_dir>`) — the
1752                // initial visited seed — followed by the cyclic
1753                // child identities. v1.2.3 (B2): empty/None ref drops
1754                // the trailing `@`, so the cyclic id is just
1755                // `url:<url_a>` (no `@`).
1756                let id_a = format!("url:{url_a}");
1757                assert!(
1758                    chain.iter().any(|s| s == &id_a),
1759                    "chain must mention the cyclic url, got {chain:?}"
1760                );
1761                assert!(chain.len() >= 2, "self-loop chain has at least 2 entries: {chain:?}");
1762                let last = chain.last().unwrap();
1763                assert_eq!(last, &id_a, "chain must end with the recurring child identity");
1764                let first_match = chain.iter().position(|s| s == last).unwrap();
1765                assert!(
1766                    first_match < chain.len() - 1,
1767                    "the recurring identity must appear earlier in the chain: {chain:?}"
1768                );
1769                // The root frame is path-namespaced and disjoint from
1770                // any child's url-namespaced identity, so it must
1771                // appear at the head of the chain without colliding.
1772                assert!(
1773                    chain[0].starts_with("path:"),
1774                    "chain head is the root path identity: {chain:?}"
1775                );
1776            }
1777            other => panic!("expected CycleDetected, got {other:?}"),
1778        }
1779    }
1780
1781    /// Three-node cycle: A → B → C → A. The walker must abort with
1782    /// `CycleDetected` and the chain must list all three identities
1783    /// in the order they were entered, ending with the recurring A.
1784    #[test]
1785    fn cycle_three_node_aborts() {
1786        let tmp = tempfile::tempdir().unwrap();
1787        let root_dir = tmp.path().to_path_buf();
1788        // Disk layout: root → a → b → c → a (the second `a` lives at
1789        // a fresh on-disk slot so classification succeeds; identity
1790        // collision is what trips the cycle detector, not the path).
1791        let a_dir = root_dir.join("a");
1792        let b_dir = a_dir.join("b");
1793        let c_dir = b_dir.join("c");
1794        let a2_dir = c_dir.join("a");
1795        make_sub_meta_on_disk(&a_dir, "a");
1796        make_sub_meta_on_disk(&b_dir, "b");
1797        make_sub_meta_on_disk(&c_dir, "c");
1798        make_sub_meta_on_disk(&a2_dir, "a");
1799        let url_a = "https://example.com/a.git";
1800        let url_b = "https://example.com/b.git";
1801        let url_c = "https://example.com/c.git";
1802        let loader = InMemLoader::new()
1803            .with(root_dir.clone(), meta_manifest_with("root", vec![child(url_a, "a")]))
1804            .with(a_dir.clone(), meta_manifest_with("a", vec![child(url_b, "b")]))
1805            .with(b_dir.clone(), meta_manifest_with("b", vec![child(url_c, "c")]))
1806            // c re-declares a → cycle.
1807            .with(c_dir.clone(), meta_manifest_with("c", vec![child(url_a, "a")]))
1808            .with(a2_dir.clone(), meta_manifest_with("a", vec![]));
1809        let backend = InMemGit::new();
1810        let opts = SyncMetaOptions::default();
1811        let err = sync_meta(&root_dir, &backend, &loader, &opts, &[])
1812            .expect_err("three-node cycle must abort");
1813        match err {
1814            TreeError::CycleDetected { chain } => {
1815                // v1.2.3 (B4): chain leads with the root's
1816                // path-namespaced identity. v1.2.3 (B2): empty/None
1817                // ref drops the trailing `@`. Chain order:
1818                // [path:root, a, b, c, a] (entry order, with the
1819                // recurring `a` appended at the cycle-detection point).
1820                let id_root = pack_identity_for_root(&root_dir);
1821                let id_a = format!("url:{url_a}");
1822                let id_b = format!("url:{url_b}");
1823                let id_c = format!("url:{url_c}");
1824                assert_eq!(chain, vec![id_root, id_a.clone(), id_b, id_c, id_a]);
1825            }
1826            other => panic!("expected CycleDetected, got {other:?}"),
1827        }
1828    }
1829
1830    /// Same repo, two refs — NOT a cycle. Pack A declares two children
1831    /// pointing at the SAME URL but pinned to different refs (`main`
1832    /// vs `dev`). Identity scheme is `url@ref` so the two siblings
1833    /// have distinct identities and the walker must succeed.
1834    #[test]
1835    fn same_repo_two_refs_no_cycle() {
1836        let tmp = tempfile::tempdir().unwrap();
1837        let root_dir = tmp.path().to_path_buf();
1838        let main_dir = root_dir.join("b-main");
1839        let dev_dir = root_dir.join("b-dev");
1840        make_sub_meta_on_disk(&main_dir, "b-main");
1841        make_sub_meta_on_disk(&dev_dir, "b-dev");
1842        let url_b = "https://example.com/b.git";
1843        let loader = InMemLoader::new()
1844            .with(
1845                root_dir.clone(),
1846                meta_manifest_with(
1847                    "root",
1848                    vec![
1849                        child_with_ref(url_b, "b-main", "main"),
1850                        child_with_ref(url_b, "b-dev", "dev"),
1851                    ],
1852                ),
1853            )
1854            .with(main_dir.clone(), meta_manifest_with("b-main", vec![]))
1855            .with(dev_dir.clone(), meta_manifest_with("b-dev", vec![]));
1856        let backend = InMemGit::new();
1857        let opts = SyncMetaOptions::default();
1858        let report = sync_meta(&root_dir, &backend, &loader, &opts, &[])
1859            .expect("same url at distinct refs is NOT a cycle");
1860        // Three metas visited: root + b@main + b@dev.
1861        assert_eq!(report.metas_visited, 3);
1862        assert!(
1863            report.errors.is_empty(),
1864            "no errors expected when the two children differ only by ref: {:?}",
1865            report.errors
1866        );
1867    }
1868
1869    /// Same repo, two refs — NESTED (ancestor-stack) variant. Pack A
1870    /// (URL=foo, ref=main) declares pack B (URL=foo, ref=dev) as its
1871    /// child. Identity scheme is `url@ref`, so A's identity
1872    /// (`url:foo@main`) and B's identity (`url:foo@dev`) differ. The
1873    /// cycle detector must NOT trip even though B's URL collides with
1874    /// an ancestor on the stack — exercises the path the sibling
1875    /// variant above doesn't reach.
1876    #[test]
1877    fn same_repo_two_refs_nested_no_cycle() {
1878        let tmp = tempfile::tempdir().unwrap();
1879        let root_dir = tmp.path().to_path_buf();
1880        let a_dir = root_dir.join("a");
1881        let b_dir = a_dir.join("b");
1882        make_sub_meta_on_disk(&a_dir, "a");
1883        make_sub_meta_on_disk(&b_dir, "b");
1884        let url_foo = "https://example.com/foo.git";
1885        let loader = InMemLoader::new()
1886            .with(
1887                root_dir.clone(),
1888                meta_manifest_with("root", vec![child_with_ref(url_foo, "a", "main")]),
1889            )
1890            // a (foo@main) declares b (foo@dev) — same URL, different ref.
1891            .with(a_dir.clone(), meta_manifest_with("a", vec![child_with_ref(url_foo, "b", "dev")]))
1892            .with(b_dir.clone(), meta_manifest_with("b", vec![]));
1893        let backend = InMemGit::new();
1894        let opts = SyncMetaOptions::default();
1895        let report = sync_meta(&root_dir, &backend, &loader, &opts, &[])
1896            .expect("nested same-url at distinct refs is NOT a cycle");
1897        // Walker must reach depth 2: root → a → b (3 metas).
1898        assert_eq!(report.metas_visited, 3, "walker must recurse to depth 2");
1899        assert!(
1900            report.errors.is_empty(),
1901            "no errors expected when ancestor and descendant differ only by ref: {:?}",
1902            report.errors
1903        );
1904    }
1905
1906    // -----------------------------------------------------------------
1907    // v1.2.3 — additional cycle/diamond coverage (T1, T2, T3).
1908    //
1909    // T1 covers the diamond-shared-descendant case the
1910    // clone-per-child scheme is meant to permit; T2 stretches the
1911    // cycle to length 4 to exercise chain accumulation; T3 verifies
1912    // the cycle detector sees a cycle introduced inside an inner
1913    // subtree even though the outer arm is acyclic.
1914    // -----------------------------------------------------------------
1915
1916    /// T1 — Diamond, NO cycle. Topology:
1917    ///
1918    /// ```text
1919    ///   root → A
1920    ///   root → B
1921    ///   A    → C
1922    ///   B    → C   (C is a shared descendant)
1923    /// ```
1924    ///
1925    /// Walker must traverse all four packs and produce no
1926    /// `CycleDetected`. Because the cycle detector clones the
1927    /// ancestor chain per child, A's descendants do not poison B's
1928    /// descendant view, so seeing `C` from both arms is a diamond,
1929    /// not a cycle.
1930    ///
1931    /// **v1.2.4 T1-spot-check extension.** In addition to the
1932    /// `metas_visited == 5` count assertion, this test also confirms
1933    /// that C is genuinely walked through BOTH arms: the
1934    /// `phase1_classifications` table must record one entry whose
1935    /// parent is `a/` (with dest `a/c`) AND one entry whose parent is
1936    /// `b/` (with dest `b/c`). Counting alone (`metas_visited`) cannot
1937    /// catch a regression where a future memoization optimization
1938    /// collapses the second walk into a no-op while still incrementing
1939    /// the counter — tracking the actual dest paths does.
1940    #[test]
1941    fn cycle_diamond_shared_descendant_no_cycle() {
1942        let tmp = tempfile::tempdir().unwrap();
1943        let root_dir = tmp.path().to_path_buf();
1944        // Disk layout: root/a, root/b, root/a/c, root/b/c.
1945        // Each `c` lives at a distinct on-disk slot so classify
1946        // succeeds; identity equality is what would (incorrectly)
1947        // trip the cycle detector if clone-per-child were broken.
1948        let a_dir = root_dir.join("a");
1949        let b_dir = root_dir.join("b");
1950        let c_under_a_dir = a_dir.join("c");
1951        let c_under_b_dir = b_dir.join("c");
1952        make_sub_meta_on_disk(&a_dir, "a");
1953        make_sub_meta_on_disk(&b_dir, "b");
1954        make_sub_meta_on_disk(&c_under_a_dir, "c");
1955        make_sub_meta_on_disk(&c_under_b_dir, "c");
1956        let url_a = "https://example.com/a.git";
1957        let url_b = "https://example.com/b.git";
1958        let url_c = "https://example.com/c.git";
1959        let loader = InMemLoader::new()
1960            .with(
1961                root_dir.clone(),
1962                meta_manifest_with("root", vec![child(url_a, "a"), child(url_b, "b")]),
1963            )
1964            .with(a_dir.clone(), meta_manifest_with("a", vec![child(url_c, "c")]))
1965            .with(b_dir.clone(), meta_manifest_with("b", vec![child(url_c, "c")]))
1966            .with(c_under_a_dir.clone(), meta_manifest_with("c", vec![]))
1967            .with(c_under_b_dir.clone(), meta_manifest_with("c", vec![]));
1968        let backend = InMemGit::new();
1969        let opts = SyncMetaOptions::default();
1970        let report =
1971            sync_meta(&root_dir, &backend, &loader, &opts, &[]).expect("diamond is NOT a cycle");
1972        // Four distinct manifest visits: root, a, b, c-via-a, c-via-b.
1973        // A and B both expand into their own `c`, so the walker
1974        // visits `c` twice (once per arm) — five `metas_visited`.
1975        assert_eq!(
1976            report.metas_visited, 5,
1977            "diamond: root + a + b + c-under-a + c-under-b = 5 visits"
1978        );
1979        // Crucially, no errors of any kind — and certainly not a
1980        // CycleDetected — because the two `C` visits live on
1981        // disjoint cloned ancestor chains.
1982        assert!(
1983            !report.errors.iter().any(|e| matches!(e, TreeError::CycleDetected { .. })),
1984            "diamond must not surface CycleDetected; errors={:?}",
1985            report.errors
1986        );
1987        assert!(report.errors.is_empty(), "diamond should produce no errors: {:?}", report.errors);
1988
1989        // v1.2.4 T1-spot-check: assert `c` was genuinely walked under
1990        // BOTH arms. The phase1_classifications table records every
1991        // (parent_meta, dest, class) triple observed during Phase 1
1992        // dispatch; for the diamond layout we expect:
1993        //   * (root, a)    — a is a direct child of root
1994        //   * (root, b)    — b is a direct child of root
1995        //   * (a, a/c)     — c-via-a (Phase 1 inside a's recursion)
1996        //   * (b, b/c)     — c-via-b (Phase 1 inside b's recursion)
1997        // If a future memoization regression collapses the second `c`
1998        // walk into a no-op while still incrementing `metas_visited`,
1999        // the (b, b/c) pair will be missing from this table and the
2000        // assertion below fails.
2001        let dests_under_a = destinations_under(&report, &a_dir);
2002        let dests_under_b = destinations_under(&report, &b_dir);
2003        assert!(
2004            dests_under_a.iter().any(|d| d == &c_under_a_dir),
2005            "diamond: expected c-via-a in classifications under a, got {dests_under_a:?}"
2006        );
2007        assert!(
2008            dests_under_b.iter().any(|d| d == &c_under_b_dir),
2009            "diamond: expected c-via-b in classifications under b, got {dests_under_b:?}"
2010        );
2011        assert_ne!(
2012            c_under_a_dir, c_under_b_dir,
2013            "the two `c` visits must land on distinct on-disk dests"
2014        );
2015    }
2016
2017    /// T2 — 4-node cycle: `root → A → B → C → D → A`. Cycle length 4
2018    /// in pack-identity terms; the reported chain has length 5 once
2019    /// the recurring `A` is appended at detection. The root frame's
2020    /// `path:` identity also leads the chain (B4), so the final
2021    /// length is 6.
2022    #[test]
2023    #[allow(clippy::too_many_lines)]
2024    fn cycle_four_node_aborts() {
2025        let tmp = tempfile::tempdir().unwrap();
2026        let root_dir = tmp.path().to_path_buf();
2027        // Disk chain: root → a → b → c → d → a (the second `a` lives
2028        // at a fresh slot so classify succeeds; identity collision is
2029        // what trips the cycle detector).
2030        let a_dir = root_dir.join("a");
2031        let b_dir = a_dir.join("b");
2032        let c_dir = b_dir.join("c");
2033        let d_dir = c_dir.join("d");
2034        let a2_dir = d_dir.join("a");
2035        make_sub_meta_on_disk(&a_dir, "a");
2036        make_sub_meta_on_disk(&b_dir, "b");
2037        make_sub_meta_on_disk(&c_dir, "c");
2038        make_sub_meta_on_disk(&d_dir, "d");
2039        make_sub_meta_on_disk(&a2_dir, "a");
2040        let url_a = "https://example.com/a.git";
2041        let url_b = "https://example.com/b.git";
2042        let url_c = "https://example.com/c.git";
2043        let url_d = "https://example.com/d.git";
2044        let loader = InMemLoader::new()
2045            .with(root_dir.clone(), meta_manifest_with("root", vec![child(url_a, "a")]))
2046            .with(a_dir.clone(), meta_manifest_with("a", vec![child(url_b, "b")]))
2047            .with(b_dir.clone(), meta_manifest_with("b", vec![child(url_c, "c")]))
2048            .with(c_dir.clone(), meta_manifest_with("c", vec![child(url_d, "d")]))
2049            // d re-declares a → cycle of length 4 in url-namespace.
2050            .with(d_dir.clone(), meta_manifest_with("d", vec![child(url_a, "a")]))
2051            .with(a2_dir.clone(), meta_manifest_with("a", vec![]));
2052        let backend = InMemGit::new();
2053        let opts = SyncMetaOptions::default();
2054        let err = sync_meta(&root_dir, &backend, &loader, &opts, &[])
2055            .expect_err("four-node cycle must abort");
2056        match err {
2057            TreeError::CycleDetected { chain } => {
2058                let id_root = pack_identity_for_root(&root_dir);
2059                let id_a = format!("url:{url_a}");
2060                let id_b = format!("url:{url_b}");
2061                let id_c = format!("url:{url_c}");
2062                let id_d = format!("url:{url_d}");
2063                // [path:root, a, b, c, d, a] — six entries.
2064                assert_eq!(
2065                    chain,
2066                    vec![id_root, id_a.clone(), id_b, id_c, id_d, id_a.clone()],
2067                    "expected full ancestor chain ending in the recurring A"
2068                );
2069                assert!(
2070                    chain.len() >= 5,
2071                    "four-node cycle chain has at least 5 entries: {chain:?}"
2072                );
2073                // Last element repeats earlier in the chain (the
2074                // recurring identity).
2075                let last = chain.last().unwrap();
2076                let first_match = chain.iter().position(|s| s == last).unwrap();
2077                assert!(
2078                    first_match < chain.len() - 1,
2079                    "the recurring identity must appear earlier in the chain: {chain:?}"
2080                );
2081            }
2082            other => panic!("expected CycleDetected, got {other:?}"),
2083        }
2084    }
2085
2086    /// T3 — Nested-prefix cycle. Outer arm `root → A → B → C` is
2087    /// acyclic; the cycle lives inside B's other child `D`, which
2088    /// loops back to B (`B → D → B`). The walker must surface
2089    /// `CycleDetected` and the cycle should appear inside the
2090    /// subtree (not at the root level), with B as the recurring
2091    /// identity.
2092    ///
2093    /// Specifically: A's children = [B], B's children = [C, D], C
2094    /// has no children, D's children = [B] (cycle).
2095    #[test]
2096    #[allow(clippy::too_many_lines)]
2097    fn cycle_nested_prefix_aborts() {
2098        let tmp = tempfile::tempdir().unwrap();
2099        let root_dir = tmp.path().to_path_buf();
2100        // Disk layout: root/a, root/a/b, root/a/b/c (acyclic arm),
2101        // root/a/b/d (cycle arm), root/a/b/d/b (D loops back to B —
2102        // identity collision; on-disk path is fresh so classify
2103        // succeeds).
2104        let a_dir = root_dir.join("a");
2105        let b_dir = a_dir.join("b");
2106        let c_dir = b_dir.join("c");
2107        let d_dir = b_dir.join("d");
2108        let b2_dir = d_dir.join("b");
2109        make_sub_meta_on_disk(&a_dir, "a");
2110        make_sub_meta_on_disk(&b_dir, "b");
2111        make_sub_meta_on_disk(&c_dir, "c");
2112        make_sub_meta_on_disk(&d_dir, "d");
2113        make_sub_meta_on_disk(&b2_dir, "b");
2114        let url_a = "https://example.com/a.git";
2115        let url_b = "https://example.com/b.git";
2116        let url_c = "https://example.com/c.git";
2117        let url_d = "https://example.com/d.git";
2118        let loader = InMemLoader::new()
2119            .with(root_dir.clone(), meta_manifest_with("root", vec![child(url_a, "a")]))
2120            .with(a_dir.clone(), meta_manifest_with("a", vec![child(url_b, "b")]))
2121            // b has both an acyclic child (c) and a cyclic one (d).
2122            .with(
2123                b_dir.clone(),
2124                meta_manifest_with("b", vec![child(url_c, "c"), child(url_d, "d")]),
2125            )
2126            .with(c_dir.clone(), meta_manifest_with("c", vec![]))
2127            // d re-declares b → cycle inside the b/d subtree.
2128            .with(d_dir.clone(), meta_manifest_with("d", vec![child(url_b, "b")]))
2129            .with(b2_dir.clone(), meta_manifest_with("b", vec![]));
2130        let backend = InMemGit::new();
2131        let opts = SyncMetaOptions::default();
2132        let err = sync_meta(&root_dir, &backend, &loader, &opts, &[])
2133            .expect_err("nested-prefix cycle must abort");
2134        match err {
2135            TreeError::CycleDetected { chain } => {
2136                let id_root = pack_identity_for_root(&root_dir);
2137                let id_a = format!("url:{url_a}");
2138                let id_b = format!("url:{url_b}");
2139                let id_d = format!("url:{url_d}");
2140                // The cycle hits inside the subtree at depth 4:
2141                // [path:root, a, b, d, b].
2142                assert_eq!(
2143                    chain,
2144                    vec![id_root.clone(), id_a, id_b.clone(), id_d, id_b.clone()],
2145                    "cycle should appear inside the subtree, not at the top"
2146                );
2147                // Recurring identity is `b`, and it does NOT appear
2148                // at the chain's outermost position — the root path
2149                // identity does. This verifies the cycle is "inside"
2150                // the tree.
2151                let last = chain.last().unwrap();
2152                assert_eq!(last, &id_b, "recurring identity is B");
2153                assert_ne!(
2154                    chain.first().unwrap(),
2155                    last,
2156                    "cycle must not start at the root frame: {chain:?}"
2157                );
2158                assert_eq!(
2159                    chain.first().unwrap(),
2160                    &id_root,
2161                    "chain must begin with the root path identity: {chain:?}"
2162                );
2163            }
2164            other => panic!("expected CycleDetected, got {other:?}"),
2165        }
2166    }
2167
2168    /// B1 regression: max_depth must NOT mask cycle detection. Cycle
2169    /// check fires before depth-cap return in phase3_handle_child.
2170    ///
2171    /// Topology: same 4-node cycle as `cycle_four_node_aborts`
2172    /// (`root → A → B → C → D → A`). Recurring `A` is reached at
2173    /// `next_depth = 5`. With `max_depth: Some(4)`, the depth cap
2174    /// would skip the recurring frame BEFORE it can be tested for
2175    /// cycle membership — *if* B1 were reverted (i.e. depth-cap
2176    /// early-return placed before the cycle check). The current
2177    /// ordering (cycle-then-depth-cap, see `phase3_handle_child`)
2178    /// surfaces `CycleDetected` regardless of the cap.
2179    ///
2180    /// If anyone reverts B1's reorder, this test fails: the walker
2181    /// returns `Ok(_)` instead of `Err(CycleDetected)` because the
2182    /// recurring frame is silently truncated.
2183    #[test]
2184    fn cycle_aborts_under_max_depth_cap() {
2185        let tmp = tempfile::tempdir().unwrap();
2186        let root_dir = tmp.path().to_path_buf();
2187        let a_dir = root_dir.join("a");
2188        let b_dir = a_dir.join("b");
2189        let c_dir = b_dir.join("c");
2190        let d_dir = c_dir.join("d");
2191        let a2_dir = d_dir.join("a");
2192        make_sub_meta_on_disk(&a_dir, "a");
2193        make_sub_meta_on_disk(&b_dir, "b");
2194        make_sub_meta_on_disk(&c_dir, "c");
2195        make_sub_meta_on_disk(&d_dir, "d");
2196        make_sub_meta_on_disk(&a2_dir, "a");
2197        let url_a = "https://example.com/a.git";
2198        let url_b = "https://example.com/b.git";
2199        let url_c = "https://example.com/c.git";
2200        let url_d = "https://example.com/d.git";
2201        let loader = InMemLoader::new()
2202            .with(root_dir.clone(), meta_manifest_with("root", vec![child(url_a, "a")]))
2203            .with(a_dir.clone(), meta_manifest_with("a", vec![child(url_b, "b")]))
2204            .with(b_dir.clone(), meta_manifest_with("b", vec![child(url_c, "c")]))
2205            .with(c_dir.clone(), meta_manifest_with("c", vec![child(url_d, "d")]))
2206            .with(d_dir.clone(), meta_manifest_with("d", vec![child(url_a, "a")]))
2207            .with(a2_dir.clone(), meta_manifest_with("a", vec![]));
2208        let backend = InMemGit::new();
2209        // max_depth: Some(4) — the recurring A frame would land at
2210        // next_depth=5, which exceeds the cap. With B1, the cycle
2211        // check still fires first; without B1, the cap would skip
2212        // before the cycle is detected.
2213        let opts = SyncMetaOptions { max_depth: Some(4), ..SyncMetaOptions::default() };
2214        let err = sync_meta(&root_dir, &backend, &loader, &opts, &[])
2215            .expect_err("cycle must surface even when its closing frame exceeds max_depth");
2216        match err {
2217            TreeError::CycleDetected { chain } => {
2218                let id_a = format!("url:{url_a}");
2219                assert!(
2220                    chain.last() == Some(&id_a),
2221                    "recurring identity must be A, got chain={chain:?}"
2222                );
2223                let last = chain.last().unwrap();
2224                let first_match = chain.iter().position(|s| s == last).unwrap();
2225                assert!(
2226                    first_match < chain.len() - 1,
2227                    "the recurring identity must appear earlier in the chain: {chain:?}"
2228                );
2229            }
2230            other => panic!("expected CycleDetected, got {other:?}"),
2231        }
2232    }
2233
2234    /// B2 regression: `pack_identity_for_child` must NOT emit a
2235    /// trailing `@` when `r#ref` is `Some("")` (empty string). Both
2236    /// `Some("")` and `None` collapse to the bare `url:<url>` form so
2237    /// the on-the-wire identity matches the Lean model
2238    /// (`Grex.Walker.ChildRef.identity`). Without this elision two
2239    /// children that differ only in `ref: None` vs `ref: Some("")`
2240    /// would serialise the same way as `url:<url>@`, masking the
2241    /// distinction the Lean spec draws — and worse, an identity
2242    /// ending in `@` leaks an empty-ref artifact into operator
2243    /// diagnostics.
2244    #[test]
2245    fn child_identity_some_empty_ref_omits_at() {
2246        let url = "https://example.com/a.git";
2247        let with_none = ChildRef { url: url.to_string(), path: Some("a".to_string()), r#ref: None };
2248        let with_empty = ChildRef {
2249            url: url.to_string(),
2250            path: Some("a".to_string()),
2251            r#ref: Some(String::new()),
2252        };
2253        let id_none = pack_identity_for_child(&with_none);
2254        let id_empty = pack_identity_for_child(&with_empty);
2255        let expected = format!("url:{url}");
2256        assert_eq!(id_none, expected, "None ref must produce bare url identity");
2257        assert_eq!(
2258            id_empty, expected,
2259            "Some(\"\") ref must collapse to bare url identity (no trailing @)"
2260        );
2261        assert_eq!(id_none, id_empty, "Some(\"\") and None must yield the same identity");
2262        assert!(!id_empty.ends_with('@'), "identity must not end with trailing @: {id_empty:?}");
2263    }
2264
2265    // -----------------------------------------------------------------
2266    // v1.2.4 — A1 cancellation token (T-cancel).
2267    //
2268    // Discharges the Lean `cancellation_terminates_promptly` theorem
2269    // in `proof/Grex/Walker.lean`: when one sibling closure detects a
2270    // cycle and signals the per-`phase3_recurse` cancellation flag,
2271    // every subsequent in-flight sibling closure observes the flag at
2272    // its next entry and returns `Phase3ChildOutcome::Cancelled` with
2273    // zero recursive descent.
2274    // -----------------------------------------------------------------
2275
2276    /// T-cancel — sibling cancellation under cycle.
2277    ///
2278    /// Topology: `root → A`, where A has FOUR children
2279    /// `[A_cyclic, X, Y, Z]` (in this exact source order). `A_cyclic`'s
2280    /// URL collides with A's own URL, so A's `phase3_recurse` detects
2281    /// the cycle when iterating its first child. `X`, `Y`, `Z` are
2282    /// independent sub-metas, each containing a deep chain
2283    /// (`X → X1 → X2`, etc) that would inflate `metas_visited` if
2284    /// genuinely walked. With `opts.parallel = Some(1)` the rayon
2285    /// pool runs siblings serially in source order, so the cyclic
2286    /// sibling fires first, sets the flag, and `X`/`Y`/`Z` observe
2287    /// `Cancelled` at entry — none of their subtrees are walked.
2288    ///
2289    /// Determinism: `parallel: Some(1)` removes thread interleaving
2290    /// from the test surface — the cyclic arm is *guaranteed* to run
2291    /// before the acyclic siblings. The flag is checked at entry of
2292    /// `phase3_handle_child`, so any sibling that has not yet started
2293    /// observes it. `metas_visited` is the side-effect-visible counter
2294    /// used to assert the cancellation discipline: pre-cancellation
2295    /// (v1.2.3) the walker would visit every sibling subtree before
2296    /// surfacing `Err(CycleDetected)`; post-cancellation (v1.2.4)
2297    /// only the cyclic arm and its prefix contribute.
2298    ///
2299    /// Without the cancellation flag, `metas_visited` would total
2300    /// 1 (root) + 1 (A) + 3 (X, Y, Z themselves) + 6 (X1,X2,Y1,Y2,Z1,Z2)
2301    /// = 11. With the flag, X/Y/Z's `phase3_handle_child` short-circuits
2302    /// before recursing, so only root + A are recorded → 2.
2303    #[test]
2304    #[allow(clippy::too_many_lines)]
2305    fn cancellation_aborts_siblings() {
2306        let tmp = tempfile::tempdir().unwrap();
2307        let root_dir = tmp.path().to_path_buf();
2308        let a_dir = root_dir.join("a");
2309        // A_cyclic's path is a fresh slot under A so on-disk classify
2310        // succeeds; identity collision with A's URL is what trips the
2311        // cycle detector at A's `phase3_recurse`.
2312        let a_cyclic_dir = a_dir.join("a-cyclic");
2313        let x_dir = a_dir.join("x");
2314        let x1_dir = x_dir.join("x1");
2315        let x2_dir = x1_dir.join("x2");
2316        let y_dir = a_dir.join("y");
2317        let y1_dir = y_dir.join("y1");
2318        let y2_dir = y1_dir.join("y2");
2319        let z_dir = a_dir.join("z");
2320        let z1_dir = z_dir.join("z1");
2321        let z2_dir = z1_dir.join("z2");
2322        for d in [
2323            &a_dir,
2324            &a_cyclic_dir,
2325            &x_dir,
2326            &x1_dir,
2327            &x2_dir,
2328            &y_dir,
2329            &y1_dir,
2330            &y2_dir,
2331            &z_dir,
2332            &z1_dir,
2333            &z2_dir,
2334        ] {
2335            make_sub_meta_on_disk(d, d.file_name().unwrap().to_str().unwrap());
2336        }
2337
2338        let url_a = "https://example.com/a.git";
2339        let url_x = "https://example.com/x.git";
2340        let url_x1 = "https://example.com/x1.git";
2341        let url_x2 = "https://example.com/x2.git";
2342        let url_y = "https://example.com/y.git";
2343        let url_y1 = "https://example.com/y1.git";
2344        let url_y2 = "https://example.com/y2.git";
2345        let url_z = "https://example.com/z.git";
2346        let url_z1 = "https://example.com/z1.git";
2347        let url_z2 = "https://example.com/z2.git";
2348
2349        // A's children: [a-cyclic (collides with A's identity), x, y, z]
2350        // — ORDER MATTERS for determinism. With parallel: Some(1) the
2351        // cyclic arm runs first, signals, and the rest observe.
2352        let loader = InMemLoader::new()
2353            .with(root_dir.clone(), meta_manifest_with("root", vec![child(url_a, "a")]))
2354            .with(
2355                a_dir.clone(),
2356                meta_manifest_with(
2357                    "a",
2358                    vec![
2359                        child(url_a, "a-cyclic"),
2360                        child(url_x, "x"),
2361                        child(url_y, "y"),
2362                        child(url_z, "z"),
2363                    ],
2364                ),
2365            )
2366            .with(a_cyclic_dir.clone(), meta_manifest_with("a-cyclic", vec![]))
2367            // X/Y/Z each carry a 3-level subtree that would inflate
2368            // metas_visited if genuinely walked.
2369            .with(x_dir.clone(), meta_manifest_with("x", vec![child(url_x1, "x1")]))
2370            .with(x1_dir.clone(), meta_manifest_with("x1", vec![child(url_x2, "x2")]))
2371            .with(x2_dir.clone(), meta_manifest_with("x2", vec![]))
2372            .with(y_dir.clone(), meta_manifest_with("y", vec![child(url_y1, "y1")]))
2373            .with(y1_dir.clone(), meta_manifest_with("y1", vec![child(url_y2, "y2")]))
2374            .with(y2_dir.clone(), meta_manifest_with("y2", vec![]))
2375            .with(z_dir.clone(), meta_manifest_with("z", vec![child(url_z1, "z1")]))
2376            .with(z1_dir.clone(), meta_manifest_with("z1", vec![child(url_z2, "z2")]))
2377            .with(z2_dir.clone(), meta_manifest_with("z2", vec![]));
2378        let backend = InMemGit::new();
2379        // parallel: Some(1) — single-threaded rayon pool. Source-order
2380        // iteration => cyclic arm runs before X/Y/Z, signal observed.
2381        let opts = SyncMetaOptions { parallel: Some(1), ..SyncMetaOptions::default() };
2382        let err = sync_meta(&root_dir, &backend, &loader, &opts, &[])
2383            .expect_err("cyclic input must surface CycleDetected");
2384        match err {
2385            TreeError::CycleDetected { chain } => {
2386                let id_a = format!("url:{url_a}");
2387                assert_eq!(
2388                    chain.last(),
2389                    Some(&id_a),
2390                    "recurring identity must be A (the cyclic arm), got chain={chain:?}"
2391                );
2392            }
2393            other => panic!("expected CycleDetected, got {other:?}"),
2394        }
2395        // The cancellation discipline asserts: X/Y/Z's subtrees were
2396        // NOT walked. Cancellation lives in Phase 3 (the recursion
2397        // edge), so Phase 1 still fetches A's four direct children
2398        // (a-cyclic, x, y, z) — one Fetch each. A's Phase 3 then
2399        // detects the cycle on a-cyclic and signals; X/Y/Z's
2400        // `phase3_handle_child` returns `Cancelled` at entry, so
2401        // their `sync_meta_inner` never runs. Therefore X1/X2/Y1/Y2/
2402        // Z1/Z2 are NEVER fetched.
2403        //
2404        // Without cancellation Fetch count = 11:
2405        //   1 (root → a) + 4 (a → a-cyclic, x, y, z)
2406        //   + 2 (x → x1 → x2) + 2 (y → y1 → y2) + 2 (z → z1 → z2).
2407        // With cancellation Fetch count = 5:
2408        //   1 (root → a) + 4 (a → a-cyclic, x, y, z).
2409        //
2410        // With `parallel: Some(1)` the rayon pool is single-threaded
2411        // and visibility is immediate, so 5 is the deterministic tight
2412        // bound: 1 (root → a) + 4 (a's Phase 1 fan-out for a-cyclic, x,
2413        // y, z). No upper-bound slack — `Some(1)` removes interleaving
2414        // entirely, and any deviation indicates a real regression.
2415        let fetch_count =
2416            backend.calls().iter().filter(|c| matches!(c, BackendCall::Fetch { .. })).count();
2417        // Lower bound: Phase 1's per-child fan-out runs BEFORE the
2418        // Phase 3 cycle check, so even under cancellation A's four
2419        // direct children must have been fetched (1 root → A + 4 a's
2420        // children = 5). Documents that Phase 1 is intentionally NOT
2421        // cancellation-aware in v1.2.4 — cancellation only short-
2422        // circuits the Phase 3 recursion edge.
2423        assert!(
2424            fetch_count >= 5,
2425            "Phase 1 fan-out for A's 4 direct children must complete even under cancellation; \
2426             observed {fetch_count} fetches"
2427        );
2428        assert_eq!(
2429            fetch_count, 5,
2430            "cancellation flag must short-circuit X/Y/Z subtrees; \
2431             observed {fetch_count} fetches (acyclic walk would do 11)"
2432        );
2433        // Stronger assertion: NO fetch ever targeted x1/x2/y1/y2/z1/z2.
2434        // If the cancellation token were broken, sync_meta_inner would
2435        // recurse into x/y/z and Phase 1 there would fetch x1/y1/z1 at
2436        // a minimum.
2437        let cancelled_dests = [&x1_dir, &x2_dir, &y1_dir, &y2_dir, &z1_dir, &z2_dir];
2438        for dest in cancelled_dests {
2439            for call in backend.calls() {
2440                if let BackendCall::Fetch { dest: fetched } = &call {
2441                    assert_ne!(
2442                        fetched,
2443                        dest,
2444                        "cancellation must prevent recursion into {} (observed Fetch call)",
2445                        dest.display()
2446                    );
2447                }
2448            }
2449        }
2450    }
2451
2452    /// G1 — multi-thread sibling cancellation race.
2453    ///
2454    /// Topology: `root → A`, where A has 12 children: two cyclic
2455    /// siblings (`A_cyclic1`, `A_cyclic2`, both colliding with A's
2456    /// own URL) and ten acyclic deep subtrees (`X0..X9` each →
2457    /// `X{i}_1` → `X{i}_2`). With `parallel: Some(8)` rayon may
2458    /// schedule the two cyclic arms onto different worker threads,
2459    /// so BOTH can simultaneously detect the cycle and try to store
2460    /// into `cancelled`. The aggregator (`phase3_recurse`) must:
2461    ///   - return `Err(CycleDetected)` on every iteration with a
2462    ///     non-empty chain (no panic, no Ok),
2463    ///   - never spuriously double-count or wedge on the multiple
2464    ///     concurrent stores (AtomicBool::store is idempotent),
2465    ///   - keep fetch count bounded (no unbounded recursion).
2466    ///
2467    /// Looped 50 iterations — non-determinism would surface as
2468    /// either a panic, an `Ok(_)` return, or an unbounded fetch
2469    /// count (the acyclic 10-subtree walk would be at minimum
2470    /// `1 (root → A) + 12 (A's children) + 20 (X*_1, X*_2)` = 33
2471    /// without cancellation; we cap at 1000 to catch runaway).
2472    #[test]
2473    #[allow(clippy::too_many_lines)]
2474    fn cancellation_aborts_siblings_multithread() {
2475        // Build helper: returns (loader, backend, root_dir) for a
2476        // fresh per-iteration tempdir so iterations don't share state.
2477        fn build_topology() -> (tempfile::TempDir, PathBuf, InMemLoader, InMemGit, String) {
2478            let tmp = tempfile::tempdir().unwrap();
2479            let root_dir = tmp.path().to_path_buf();
2480            let a_dir = root_dir.join("a");
2481            make_sub_meta_on_disk(&a_dir, "a");
2482            let url_a = "https://example.com/a.git".to_string();
2483            // Two cyclic siblings (both collide with A's identity)
2484            let a_cyc1_dir = a_dir.join("a-cyclic1");
2485            let a_cyc2_dir = a_dir.join("a-cyclic2");
2486            make_sub_meta_on_disk(&a_cyc1_dir, "a-cyclic1");
2487            make_sub_meta_on_disk(&a_cyc2_dir, "a-cyclic2");
2488            // Ten acyclic deep subtrees: x0..x9, each → x{i}_1 → x{i}_2
2489            let mut a_children = vec![child(&url_a, "a-cyclic1"), child(&url_a, "a-cyclic2")];
2490            let mut loader = InMemLoader::new()
2491                .with(root_dir.clone(), meta_manifest_with("root", vec![child(&url_a, "a")]))
2492                .with(a_cyc1_dir.clone(), meta_manifest_with("a-cyclic1", vec![]))
2493                .with(a_cyc2_dir.clone(), meta_manifest_with("a-cyclic2", vec![]));
2494            for i in 0..10 {
2495                let xi_name = format!("x{i}");
2496                let xi_dir = a_dir.join(&xi_name);
2497                let xi1_name = format!("x{i}_1");
2498                let xi1_dir = xi_dir.join(&xi1_name);
2499                let xi2_name = format!("x{i}_2");
2500                let xi2_dir = xi1_dir.join(&xi2_name);
2501                make_sub_meta_on_disk(&xi_dir, &xi_name);
2502                make_sub_meta_on_disk(&xi1_dir, &xi1_name);
2503                make_sub_meta_on_disk(&xi2_dir, &xi2_name);
2504                let url_xi = format!("https://example.com/x{i}.git");
2505                let url_xi1 = format!("https://example.com/x{i}_1.git");
2506                let url_xi2 = format!("https://example.com/x{i}_2.git");
2507                a_children.push(child(&url_xi, &xi_name));
2508                loader = loader
2509                    .with(xi_dir, meta_manifest_with(&xi_name, vec![child(&url_xi1, &xi1_name)]))
2510                    .with(xi1_dir, meta_manifest_with(&xi1_name, vec![child(&url_xi2, &xi2_name)]))
2511                    .with(xi2_dir, meta_manifest_with(&xi2_name, vec![]));
2512            }
2513            loader = loader.with(a_dir, meta_manifest_with("a", a_children));
2514            let backend = InMemGit::new();
2515            (tmp, root_dir, loader, backend, url_a)
2516        }
2517
2518        for iter in 0..50 {
2519            let (_tmp, root_dir, loader, backend, url_a) = build_topology();
2520            let opts = SyncMetaOptions { parallel: Some(8), ..SyncMetaOptions::default() };
2521            let result = sync_meta(&root_dir, &backend, &loader, &opts, &[]);
2522            let err = match result {
2523                Err(e) => e,
2524                Ok(_) => panic!("iter {iter}: expected CycleDetected, got Ok"),
2525            };
2526            match err {
2527                TreeError::CycleDetected { chain } => {
2528                    assert!(
2529                        !chain.is_empty(),
2530                        "iter {iter}: CycleDetected chain must be non-empty: {chain:?}"
2531                    );
2532                    let id_a = format!("url:{url_a}");
2533                    assert_eq!(
2534                        chain.last(),
2535                        Some(&id_a),
2536                        "iter {iter}: recurring identity must be A, got chain={chain:?}"
2537                    );
2538                }
2539                other => panic!("iter {iter}: expected CycleDetected, got {other:?}"),
2540            }
2541            // Bound check: even with two cyclic arms racing, the
2542            // walker must not have walked unbounded subtrees. Acyclic
2543            // walk would do 33 fetches; under cancellation Phase 1
2544            // for A's 12 direct children fires (12 fetches) plus the
2545            // initial root → A fetch (1) = 13, and depending on
2546            // worker scheduling some X{i}_1 / X{i}_2 fetches may
2547            // sneak in before the flag is observed. Cap loosely at
2548            // 200 — any wild blowup indicates the flag is broken.
2549            let fetch_count =
2550                backend.calls().iter().filter(|c| matches!(c, BackendCall::Fetch { .. })).count();
2551            assert!(
2552                fetch_count >= 1,
2553                "iter {iter}: at least the root → A fetch must occur; got {fetch_count}"
2554            );
2555            assert!(
2556                fetch_count < 200,
2557                "iter {iter}: fetch count blew up under multi-thread cancellation; got {fetch_count}"
2558            );
2559        }
2560    }
2561
2562    /// G2 — per-call cancellation flag scope: a cycle deep in pack A
2563    /// MUST NOT cancel a sibling pack B at the root level.
2564    ///
2565    /// Topology:
2566    ///   root → [A, B]
2567    ///     A → A1 → A2 → A2_cyclic   (A2_cyclic.url == A2.url ⇒ cycle
2568    ///                                 inside A's deep subtree)
2569    ///     B → B1 → B2 → B3          (clean acyclic subtree)
2570    ///
2571    /// The cancellation flag for root's Phase 3 fan-out covers root's
2572    /// direct children (A, B). The cycle inside A is detected during
2573    /// recursion into A's deep subtree, by a NEW per-call flag built
2574    /// at the deep `phase3_recurse` frame — that flag is scoped to A's
2575    /// inner closures only. It must NOT propagate up and cancel B's
2576    /// independent walk.
2577    ///
2578    /// Assertions:
2579    ///   (i)  walker returns `Err(CycleDetected)` (the deep cycle
2580    ///        propagates up via the short-circuit path),
2581    ///   (ii) B's deep subtree IS walked (B, B1, B2, B3 all fetched),
2582    ///        proving B's walk was not aborted by A's deep-subtree
2583    ///        cancellation flag (which lives in a recursion frame
2584    ///        below root, disjoint from the root-level fan-out flag
2585    ///        that scopes A and B as siblings).
2586    ///
2587    /// With `parallel: Some(2)` rayon schedules A and B onto separate
2588    /// workers; B must run to completion regardless of A's cycle.
2589    #[test]
2590    #[allow(clippy::too_many_lines)]
2591    fn cancellation_per_call_scope_isolates_subtrees() {
2592        let tmp = tempfile::tempdir().unwrap();
2593        let root_dir = tmp.path().to_path_buf();
2594        // A's deep cycle arm
2595        let a_dir = root_dir.join("a");
2596        let a1_dir = a_dir.join("a1");
2597        let a2_dir = a1_dir.join("a2");
2598        let a2cyc_dir = a2_dir.join("a2-cyclic");
2599        // B's clean deep subtree
2600        let b_dir = root_dir.join("b");
2601        let b1_dir = b_dir.join("b1");
2602        let b2_dir = b1_dir.join("b2");
2603        let b3_dir = b2_dir.join("b3");
2604        for d in [&a_dir, &a1_dir, &a2_dir, &a2cyc_dir, &b_dir, &b1_dir, &b2_dir, &b3_dir] {
2605            make_sub_meta_on_disk(d, d.file_name().unwrap().to_str().unwrap());
2606        }
2607        let url_a = "https://example.com/a.git";
2608        let url_a1 = "https://example.com/a1.git";
2609        let url_a2 = "https://example.com/a2.git";
2610        let url_b = "https://example.com/b.git";
2611        let url_b1 = "https://example.com/b1.git";
2612        let url_b2 = "https://example.com/b2.git";
2613        let url_b3 = "https://example.com/b3.git";
2614        let loader = InMemLoader::new()
2615            .with(
2616                root_dir.clone(),
2617                meta_manifest_with("root", vec![child(url_a, "a"), child(url_b, "b")]),
2618            )
2619            .with(a_dir.clone(), meta_manifest_with("a", vec![child(url_a1, "a1")]))
2620            .with(a1_dir.clone(), meta_manifest_with("a1", vec![child(url_a2, "a2")]))
2621            // a2's child re-declares a2's identity → cycle at depth 4
2622            .with(a2_dir.clone(), meta_manifest_with("a2", vec![child(url_a2, "a2-cyclic")]))
2623            .with(a2cyc_dir.clone(), meta_manifest_with("a2-cyclic", vec![]))
2624            .with(b_dir.clone(), meta_manifest_with("b", vec![child(url_b1, "b1")]))
2625            .with(b1_dir.clone(), meta_manifest_with("b1", vec![child(url_b2, "b2")]))
2626            .with(b2_dir.clone(), meta_manifest_with("b2", vec![child(url_b3, "b3")]))
2627            .with(b3_dir.clone(), meta_manifest_with("b3", vec![]));
2628        let backend = InMemGit::new();
2629        // parallel: Some(2) — A and B may schedule onto separate
2630        // workers. The point of the test is to verify B's walk is
2631        // not aborted by A's deep-subtree cancellation flag.
2632        let opts = SyncMetaOptions { parallel: Some(2), ..SyncMetaOptions::default() };
2633        let err = sync_meta(&root_dir, &backend, &loader, &opts, &[])
2634            .expect_err("deep cycle inside A must surface CycleDetected");
2635        // (i) Cycle bubbled up.
2636        match err {
2637            TreeError::CycleDetected { chain } => {
2638                let id_a2 = format!("url:{url_a2}");
2639                assert_eq!(
2640                    chain.last(),
2641                    Some(&id_a2),
2642                    "recurring identity must be A2 (the cyclic arm), got chain={chain:?}"
2643                );
2644            }
2645            other => panic!("expected CycleDetected, got {other:?}"),
2646        }
2647        // (ii) B's subtree was walked: B, B1, B2, B3 each fetched.
2648        // The deep cycle in A fires inside a per-call flag scoped to
2649        // that recursion frame; root's per-call flag is NOT signalled
2650        // (cycle is returned from a child call, not stored at root
2651        // scope), so B's sibling walk completes uninterrupted.
2652        let fetched_dests: Vec<PathBuf> = backend
2653            .calls()
2654            .iter()
2655            .filter_map(|c| match c {
2656                BackendCall::Fetch { dest } => Some(dest.clone()),
2657                _ => None,
2658            })
2659            .collect();
2660        for dest in [&b_dir, &b1_dir, &b2_dir, &b3_dir] {
2661            assert!(
2662                fetched_dests.iter().any(|f| f == dest),
2663                "per-call scope: B's subtree must have been walked despite A's deep cycle; \
2664                 missing fetch for {} (observed {fetched_dests:?})",
2665                dest.display()
2666            );
2667        }
2668    }
2669}