grex_core/tree/walker.rs
1//! Recursive pack-tree walker.
2//!
3//! The walker hydrates a `pack.yaml` tree: it loads the root manifest, clones
4//! (or fetches + checks out) every `children:` entry via the injected
5//! [`GitBackend`], and recurses. `depends_on` entries are recorded as edges
6//! but never walked — they are *external prereqs* verified by
7//! [`crate::pack::validate::DependsOnValidator`] after the graph is built.
8//!
9//! # Cycle detection
10//!
11//! Cycles are detected **during** the walk, not post-hoc. Each recursion
12//! maintains an ancestor stack of pack identifiers (source-url when present,
13//! otherwise the canonical on-disk path). If a child is about to be entered
14//! whose identifier is already on the stack, the walker short-circuits with
15//! [`TreeError::CycleDetected`]. A separate `CycleValidator` runs
16//! post-hoc as a belt-and-suspenders check so manually-constructed graphs
17//! cannot sneak through.
18//!
19//! # Cyclomatic discipline
20//!
21//! The walk is decomposed so each helper stays well under CC 15:
22//! `walk` → `walk_recursive` → `process_children` → `handle_child` →
23//! `resolve_destination` | `record_depends_on`.
24
25use std::collections::BTreeMap;
26use std::path::{Path, PathBuf};
27use std::sync::atomic::{AtomicBool, Ordering};
28use std::sync::Arc;
29
30use rayon::prelude::*;
31
32use crate::git::GitBackend;
33use crate::pack::validate::child_path::{
34 boundary_fs_reject_reason, boundary_reject_reason, check_one as check_child_path,
35 nfc_duplicate_path,
36};
37use crate::pack::{ChildRef, PackManifest, PackType, PackValidationError, SchemaVersion};
38
39use super::consent::phase2_prune;
40use super::dest_class::{aggregate_untracked, classify_dest, DestClass};
41use super::error::TreeError;
42use super::graph::{EdgeKind, PackEdge, PackGraph, PackNode};
43use super::loader::PackLoader;
44use super::quarantine::QuarantineConfig;
45
46/// Recursive walker. Composes a [`PackLoader`] (for manifests) with a
47/// [`GitBackend`] (for child hydration).
48///
49/// The walker owns no state across calls: each invocation of [`Walker::walk`]
50/// produces a fresh [`PackGraph`] and leaves no footprint.
51///
52/// **Status (v1.2.1, path iii)**: retired from the production sync
53/// orchestrator. `sync::run` now composes [`sync_meta`] (mutate) →
54/// [`super::graph_build::build_graph`] (read-only) → `run_actions` instead
55/// of issuing clones+fetches inside the graph build. The `Walker` symbol
56/// is kept for downstream test-suite compatibility (22 fixture call sites
57/// in `crates/grex-core/tests/tree_walk.rs`); new code SHOULD NOT add
58/// production call sites.
59#[doc(hidden)]
60pub struct Walker<'a> {
61 loader: &'a dyn PackLoader,
62 backend: &'a dyn GitBackend,
63 workspace: PathBuf,
64 /// Optional global ref override (M4-D `grex sync --ref <sha|branch|tag>`).
65 /// When `Some`, every child clone/checkout uses this ref instead of the
66 /// declared `child.ref` from the parent manifest. `None` preserves M3
67 /// semantics.
68 ref_override: Option<String>,
69}
70
71impl<'a> Walker<'a> {
72 /// Construct a new walker.
73 ///
74 /// `workspace` is the directory under which child packs will be cloned,
75 /// using each [`ChildRef::effective_path`] as the sub-directory name.
76 #[must_use]
77 pub fn new(
78 loader: &'a dyn PackLoader,
79 backend: &'a dyn GitBackend,
80 workspace: PathBuf,
81 ) -> Self {
82 Self { loader, backend, workspace, ref_override: None }
83 }
84
85 /// Set a global ref override applied to every child pack.
86 ///
87 /// Surfaced as `grex sync --ref <sha|branch|tag>` (M4-D). The override
88 /// replaces each child's declared `ref` in its parent manifest. An
89 /// empty string is treated as "no override" — callers should reject
90 /// empty values at the CLI layer before reaching this point.
91 #[must_use]
92 pub fn with_ref_override(mut self, r#ref: Option<String>) -> Self {
93 self.ref_override = r#ref.filter(|s| !s.is_empty());
94 self
95 }
96
97 /// Walk the tree rooted at `root_pack_path`, returning the fully
98 /// hydrated graph.
99 ///
100 /// # Errors
101 ///
102 /// Returns [`TreeError`] on any loader, git, cycle, or name-mismatch
103 /// failure. The walk aborts on the first failure — the spec-level
104 /// "fail loud, fail fast" default.
105 pub fn walk(&self, root_pack_path: &Path) -> Result<PackGraph, TreeError> {
106 let mut state = BuildState::default();
107 let root_manifest = self.loader.load(root_pack_path)?;
108 // Pre-walk path-traversal gate: reject any malicious
109 // `children[].path` (or URL-derived tail) BEFORE any clone fires.
110 // Closes the v1.1.0 flat-sibling exploit window where a `path:
111 // ../escape` would materialise a child outside the pack root
112 // before plan-phase validation could see it.
113 validate_children_paths(&root_manifest)?;
114 let root_commit_sha = probe_head_sha(self.backend, root_pack_path);
115 let root_id = state.push_node(PackNode {
116 id: 0,
117 name: root_manifest.name.clone(),
118 path: root_pack_path.to_path_buf(),
119 source_url: None,
120 manifest: root_manifest.clone(),
121 parent: None,
122 commit_sha: root_commit_sha,
123 synthetic: false,
124 });
125 let root_identity = pack_identity_for_root(root_pack_path);
126 self.walk_recursive(root_id, &root_manifest, &mut state, &mut vec![root_identity])?;
127 Ok(PackGraph::new(state.nodes, state.edges))
128 }
129
130 /// Recursive step. `ancestors` carries the pack identifiers
131 /// currently on the in-progress walk path — pushed on entry,
132 /// popped on return. It is a path-prefix set (NOT a global
133 /// "visited" set), so a diamond reaching the same descendant
134 /// via two disjoint paths is not a cycle.
135 ///
136 /// Each loaded manifest's `children[]` is path-traversal-validated
137 /// before any of those children are resolved on disk; the entry
138 /// point pre-validates the root manifest, so by the time
139 /// `walk_recursive` runs for a child, that child's own `children[]`
140 /// is what needs gating before the next descent.
141 fn walk_recursive(
142 &self,
143 parent_id: usize,
144 manifest: &PackManifest,
145 state: &mut BuildState,
146 ancestors: &mut Vec<String>,
147 ) -> Result<(), TreeError> {
148 self.record_depends_on(parent_id, manifest, state);
149 self.process_children(parent_id, manifest, state, ancestors)
150 }
151
152 /// Record one `DependsOn` edge per `depends_on` entry. Resolution
153 /// against actual graph nodes happens later in `DependsOnValidator`.
154 /// We emit edges only where the target already exists in the graph so
155 /// the edge list stays in-bounds; unresolved deps are surfaced by the
156 /// validator, not carried as dangling edges.
157 fn record_depends_on(&self, parent_id: usize, manifest: &PackManifest, state: &mut BuildState) {
158 for dep in &manifest.depends_on {
159 if let Some(to) = find_node_id_by_name_or_url(&state.nodes, dep) {
160 state.edges.push(PackEdge { from: parent_id, to, kind: EdgeKind::DependsOn });
161 }
162 }
163 }
164
165 fn process_children(
166 &self,
167 parent_id: usize,
168 manifest: &PackManifest,
169 state: &mut BuildState,
170 ancestors: &mut Vec<String>,
171 ) -> Result<(), TreeError> {
172 for child in &manifest.children {
173 self.handle_child(parent_id, child, state, ancestors)?;
174 }
175 Ok(())
176 }
177
178 fn handle_child(
179 &self,
180 parent_id: usize,
181 child: &ChildRef,
182 state: &mut BuildState,
183 ancestors: &mut Vec<String>,
184 ) -> Result<(), TreeError> {
185 let identity = pack_identity_for_child(child);
186 if ancestors.iter().any(|s| s == &identity) {
187 let mut chain = ancestors.clone();
188 chain.push(identity);
189 return Err(TreeError::CycleDetected { chain });
190 }
191 // v1.2.0 Stage 1.c: FS-resident boundary check fires BEFORE
192 // any clone / fetch. Junctions, reparse points, and
193 // `.git`-as-file (gitfile redirect) all re-open the
194 // parent-boundary escape that the syntactic gate closes on
195 // the path string itself; running the check on the prospective
196 // dest path means a hostile pre-existing slot is rejected
197 // before the GitBackend writes anything into (or through) it.
198 // The prospective path is reconstructed here so the helper
199 // can interrogate the slot before `resolve_destination`
200 // materialises a clone — pre-clone runs return `Ok(())` because
201 // the slot doesn't exist yet, and the walk continues normally.
202 let prospective_dest = self.workspace.join(child.effective_path());
203 check_dest_boundary(&prospective_dest, &child.effective_path())?;
204 let dest = self.resolve_destination(child, state)?;
205 // v1.1.1 plain-git children: when the destination has no
206 // `.grex/pack.yaml` but does carry a `.git/`, synthesize a
207 // leaf scripted-no-hooks manifest in-memory rather than
208 // aborting. See
209 // `openspec/changes/feat-v1.1.1-plain-git-children/design.md`
210 // §"Synthesis algorithm".
211 let (child_manifest, is_synthetic) = match self.loader.load(&dest) {
212 Ok(m) => (m, false),
213 Err(TreeError::ManifestNotFound(_)) if dest_has_git_repo(&dest) => {
214 (synthesize_plain_git_manifest(child), true)
215 }
216 Err(e) => return Err(e),
217 };
218 verify_child_name(&child_manifest.name, child, &dest)?;
219 // Validate this child's own `children[]` before its descent
220 // resolves any of them on disk. Mirrors the root-manifest gate
221 // in `walk`; together they ensure no clone can fire for a
222 // grandchild whose parent declared a traversal-bearing path.
223 validate_children_paths(&child_manifest)?;
224
225 let commit_sha = probe_head_sha(self.backend, &dest);
226 let child_id = state.push_node(PackNode {
227 id: state.nodes.len(),
228 name: child_manifest.name.clone(),
229 path: dest.clone(),
230 source_url: Some(child.url.clone()),
231 manifest: child_manifest.clone(),
232 parent: Some(parent_id),
233 commit_sha,
234 synthetic: is_synthetic,
235 });
236 state.edges.push(PackEdge { from: parent_id, to: child_id, kind: EdgeKind::Child });
237
238 ancestors.push(identity);
239 let result = self.walk_recursive(child_id, &child_manifest, state, ancestors);
240 ancestors.pop();
241 result
242 }
243
244 /// Decide where `child` lives on disk and ensure the working tree is
245 /// in the expected state: clone if absent, fetch + optional checkout
246 /// if present.
247 fn resolve_destination(
248 &self,
249 child: &ChildRef,
250 _state: &mut BuildState,
251 ) -> Result<PathBuf, TreeError> {
252 let dest = self.workspace.join(child.effective_path());
253 // M4-D: `ref_override` wins over the parent-declared `child.ref`.
254 // Falls back to the declared ref when no override is active.
255 let effective_ref = self.ref_override.as_deref().or(child.r#ref.as_deref());
256 if dest_has_git_repo(&dest) {
257 self.backend.fetch(&dest)?;
258 if let Some(r) = effective_ref {
259 self.backend.checkout(&dest, r)?;
260 }
261 } else {
262 self.backend.clone(&child.url, &dest, effective_ref)?;
263 }
264 Ok(dest)
265 }
266}
267
268/// Best-effort HEAD probe. Returns `None` when the target is not a git
269/// repository or the backend refuses — the root of a declarative pack is
270/// often a plain directory, so this must not fail the walk.
271///
272/// Non-`.git` directories short-circuit silently (truly not a git
273/// repo). Backend errors on an actual `.git` directory are surfaced as
274/// a `tracing::warn!` log line so transient gix failures / ACL-denied
275/// `.git` reads do not silently degrade into an empty `commit_sha`
276/// without any operator signal. The walker continues with `None` — a
277/// best-effort probe is, by construction, allowed to fail.
278fn probe_head_sha(backend: &dyn GitBackend, path: &Path) -> Option<String> {
279 let dir =
280 if path.extension().and_then(|e| e.to_str()).is_some_and(|e| matches!(e, "yaml" | "yml")) {
281 path.parent()
282 .and_then(Path::parent)
283 .map_or_else(|| path.to_path_buf(), Path::to_path_buf)
284 } else {
285 path.to_path_buf()
286 };
287 if !dir.join(".git").exists() {
288 return None;
289 }
290 match backend.head_sha(&dir) {
291 Ok(s) => Some(s),
292 Err(e) => {
293 tracing::warn!(
294 target: "grex::walker",
295 "HEAD probe failed for {}: {e}",
296 dir.display()
297 );
298 None
299 }
300 }
301}
302
303/// Mutable state threaded through the walk. Private to this module so only
304/// the walker can grow the graph.
305#[derive(Default)]
306struct BuildState {
307 nodes: Vec<PackNode>,
308 edges: Vec<PackEdge>,
309}
310
311impl BuildState {
312 fn push_node(&mut self, node: PackNode) -> usize {
313 let id = node.id;
314 self.nodes.push(node);
315 id
316 }
317}
318
319/// Identity string used by the cycle detector for the root pack.
320fn pack_identity_for_root(path: &Path) -> String {
321 format!("path:{}", path.display())
322}
323
324/// Identity string for a child — url+ref so the same repo at two different
325/// refs is considered distinct. This matches git semantics and avoids
326/// false-positive cycle detections for diamond dependencies on different
327/// tags.
328///
329/// v1.2.3 (B2): when the ref is missing or empty the trailing `@` is
330/// omitted so the on-the-wire identity is just `url:<url>` — matches
331/// `Grex.Walker.ChildRef.identity` in the Lean model. Without this
332/// elision two children that differ only in `ref: None` vs
333/// `ref: Some("")` would otherwise serialise the same way as
334/// `url:<url>@`, masking the distinction the Lean specification draws.
335fn pack_identity_for_child(child: &ChildRef) -> String {
336 match child.r#ref.as_deref() {
337 Some(r) if !r.is_empty() => format!("url:{}@{}", child.url, r),
338 _ => format!("url:{}", child.url),
339 }
340}
341
342/// Shallow on-disk check: a `.git` entry (file or dir) signals an existing
343/// working tree. We deliberately do not open the repo here — that's the
344/// backend's job via `fetch`/`checkout`.
345///
346/// # Symlink safety
347///
348/// `dest` itself MUST NOT be a symlink. If it is, this function returns
349/// `false` regardless of whether the symlink target carries a `.git`
350/// entry. This refusal closes a synthesis-redirection attack: a parent
351/// pack declaring `path: code` against a workspace where the user
352/// happens to have `<workspace>/code -> $HOME` would otherwise let the
353/// walker treat `$HOME/.git` as a "plain-git child" and operate on an
354/// unrelated tree. The check uses [`std::fs::symlink_metadata`] so the
355/// link itself — not its target — is interrogated.
356pub fn dest_has_git_repo(dest: &Path) -> bool {
357 // Reject symlinked destinations outright. `symlink_metadata` does
358 // NOT follow the link, so a broken or path-traversing symlink is
359 // treated as untrusted regardless of its target.
360 if let Ok(meta) = std::fs::symlink_metadata(dest) {
361 if meta.file_type().is_symlink() {
362 return false;
363 }
364 }
365 dest.join(".git").exists()
366}
367
368/// Build the in-memory manifest used for v1.1.1 plain-git children — a
369/// leaf scripted pack with no hooks, no children, no actions. Activated
370/// at the walker's load-fallback boundary when a child has a `.git/`
371/// but no `.grex/pack.yaml`. See
372/// `openspec/changes/feat-v1.1.1-plain-git-children/design.md`.
373pub fn synthesize_plain_git_manifest(child: &ChildRef) -> PackManifest {
374 PackManifest {
375 schema_version: SchemaVersion::current(),
376 name: child.effective_path(),
377 r#type: PackType::Scripted,
378 version: None,
379 depends_on: Vec::new(),
380 children: Vec::new(),
381 actions: Vec::new(),
382 teardown: None,
383 extensions: BTreeMap::new(),
384 }
385}
386
387/// Enforce that the cloned child's pack.yaml name matches what the parent
388/// declared. The parent-side expectation is the child entry's
389/// [`ChildRef::effective_path`] — the directory name in the workspace.
390fn verify_child_name(got: &str, child: &ChildRef, dest: &Path) -> Result<(), TreeError> {
391 let expected = child.effective_path();
392 if got == expected {
393 return Ok(());
394 }
395 Err(TreeError::PackNameMismatch { got: got.to_string(), expected, path: dest.to_path_buf() })
396}
397
398/// Resolve a `depends_on` entry (URL or bare name) against nodes already
399/// recorded. Returns the node id on a hit, `None` otherwise.
400fn find_node_id_by_name_or_url(nodes: &[PackNode], dep: &str) -> Option<usize> {
401 if looks_like_url(dep) {
402 nodes.iter().find(|n| n.source_url.as_deref() == Some(dep)).map(|n| n.id)
403 } else {
404 nodes.iter().find(|n| n.name == dep).map(|n| n.id)
405 }
406}
407
408/// Run the path-traversal gate on `manifest.children`. Returns the
409/// first offending child as a [`TreeError::ChildPathInvalid`] so the
410/// walker aborts before any clone of the offending sibling fires.
411///
412/// Surfacing only the first offender (rather than aggregating) matches
413/// the walker's fail-fast posture — the plan-phase
414/// [`crate::pack::validate::ChildPathValidator`] still runs against the
415/// whole graph post-walk via `validate_graph`, so authors who clear
416/// the traversal exploit see the full diagnostic batch on the next
417/// invocation.
418///
419/// `check_child_path` is documented to return only the
420/// `ChildPathInvalid` variant, but we `match` exhaustively so any
421/// future variant the helper grows surfaces as a compile-time
422/// failure here rather than as a silently swallowed `Some(other)`.
423fn validate_children_paths(manifest: &PackManifest) -> Result<(), TreeError> {
424 // v1.2.0 Stage 1.c: NFC-duplicate sweep across the sibling list.
425 // Runs first because it's a cross-cutting check (one offender
426 // implicates the WHOLE list, not a single child). Surfaces as
427 // `TreeError::ManifestPathEscape` per walker.md
428 // §boundary-preservation — a NFC-collapsed name re-introduces the
429 // very boundary escape the regex was meant to close on
430 // case-insensitive filesystems.
431 if let Some(path) = nfc_duplicate_path(&manifest.children) {
432 return Err(TreeError::ManifestPathEscape {
433 path,
434 reason: "duplicate child path under Unicode NFC normalization (case-insensitive FS collision risk)"
435 .to_string(),
436 });
437 }
438 for child in &manifest.children {
439 // v1.2.0 Stage 1.c: per-segment boundary-preservation rejects.
440 // Layered AHEAD of the syntactic gate so the more specific
441 // `ManifestPathEscape` diagnostic wins for entries that would
442 // also fail the bare-name regex (e.g. `child:foo` is rejected
443 // here as a colon hazard instead of a generic charset miss).
444 let segment = child.path.as_deref().map_or_else(|| child.effective_path(), str::to_string);
445 if let Some(reason) = boundary_reject_reason(&segment) {
446 return Err(TreeError::ManifestPathEscape {
447 path: segment,
448 reason: reason.to_string(),
449 });
450 }
451 let Some(err) = check_child_path(child) else { continue };
452 match err {
453 PackValidationError::ChildPathInvalid { child_name, path, reason } => {
454 return Err(TreeError::ChildPathInvalid { child_name, path, reason });
455 }
456 other @ (PackValidationError::DuplicateSymlinkDst { .. }
457 | PackValidationError::GraphCycle { .. }
458 | PackValidationError::DependsOnUnsatisfied { .. }
459 | PackValidationError::ChildPathDuplicate { .. }) => {
460 // `check_child_path` is contracted to only emit
461 // `ChildPathInvalid`. Any other variant indicates the
462 // helper has drifted out of sync with this caller —
463 // surface loudly rather than silently swallowing it.
464 tracing::error!(
465 target: "grex::walker",
466 "check_child_path returned unexpected variant: {other:?}",
467 );
468 debug_assert!(false, "check_child_path returned unexpected variant: {other:?}");
469 }
470 }
471 }
472 Ok(())
473}
474
475/// v1.2.0 Stage 1.c: filesystem-resident boundary check. Run AFTER
476/// the destination has been resolved against the parent workspace but
477/// BEFORE any clone / fetch fires. Catches the case where the slot
478/// the walker is about to materialise into is already a junction,
479/// reparse point, symlink, or `.git`-as-file — each of which would
480/// re-introduce a parent-boundary escape.
481///
482/// Pre-clone: a non-existent destination is the happy path; the
483/// helper returns `None` and the walk continues. Post-clone or on a
484/// re-walk where the destination is already populated, the helper
485/// inspects the on-disk entry and surfaces a `ManifestPathEscape`
486/// when the entry violates the boundary contract.
487///
488/// Visibility: `pub(super)` — used by the walker's `handle_child`
489/// path-resolution step (wired in 1.c follow-up; this commit lands
490/// the helper itself and the boundary-check call site for the
491/// path-segment rejects).
492pub(super) fn check_dest_boundary(dest: &Path, segment: &str) -> Result<(), TreeError> {
493 if let Some(reason) = boundary_fs_reject_reason(dest) {
494 return Err(TreeError::ManifestPathEscape {
495 path: segment.to_string(),
496 reason: reason.to_string(),
497 });
498 }
499 Ok(())
500}
501
502/// Decide whether a `depends_on` entry is a URL rather than a bare name.
503/// The rule is intentionally literal — matching the spec's enumeration of
504/// accepted forms.
505pub(super) fn looks_like_url(s: &str) -> bool {
506 s.starts_with("http://")
507 || s.starts_with("https://")
508 || s.starts_with("ssh://")
509 || s.starts_with("git@")
510 || s.ends_with(".git")
511}
512
513// ---------------------------------------------------------------------------
514// v1.2.0 Stage 1.g — `sync_meta` entry point: parent-relative,
515// distributed-lockfile walker. Three phases per meta:
516//
517// Phase 1 (siblings): `classify_dest` (1.e) per child, dispatch
518// fetch / clone / refuse based on the verdict; aggregate
519// `PresentUndeclared` into `TreeError::UntrackedGitRepos`.
520// Phase 2 (orphan prune): for each `prune_candidate` (caller-supplied
521// by 1.h once the distributed lockfile read lands), run the
522// consent-walk via `phase2_prune` (1.f).
523// Phase 3 (recursion): per child whose dest carries
524// `<dest>/.grex/pack.yaml`, recursively `sync_meta` if `recurse`
525// is true and depth < `max_depth`.
526//
527// Design discipline:
528//
529// * **No new locking primitives.** Per-pack git ops acquire the M6
530// `PackLock` (synchronous `acquire`) for the duration of the
531// clone/fetch. The Lean axiom `sync_disjoint_commutes` (Bridge.lean)
532// permits any disjoint scheduler — sequential is the smallest model
533// that satisfies the axiom. Sibling parallelism via rayon is a 1.j /
534// 1.l-territory follow-up; the scaffolding here keeps the
535// single-threaded baseline correct first.
536// * **No lockfile mechanics.** Phase 2's orphan list is a parameter,
537// not a read from `<meta>/.grex/grex.lock.jsonl`. 1.h owns the
538// distributed-lockfile read/write surface; this commit only wires
539// the consent-walk + prune dispatch.
540// * **Error aggregation.** Every Phase 1 child failure plus every
541// Phase 2 refusal lands in `SyncMetaReport::errors` before the call
542// returns. The walker is fail-LOUD (caller gets the full picture),
543// not fail-fast (the legacy `Walker::walk` aborts on the first hit).
544// This matches the v1.2.0 walker.md §"untracked git policy" rule
545// that `UntrackedGitRepos` must enumerate every offender at once.
546// ---------------------------------------------------------------------------
547
548/// Per-meta options threaded through `sync_meta`. Keeps the call-site
549/// signature small without coupling to the full [`crate::sync::SyncOptions`]
550/// surface — the orchestrator (`sync.rs::run`) is responsible for projecting
551/// `SyncOptions` into `SyncMetaOptions` when it wires this entry point.
552#[derive(Debug, Clone)]
553pub struct SyncMetaOptions {
554 /// Global ref override (`grex sync --ref <sha|branch|tag>`). Mirrors
555 /// [`Walker::with_ref_override`]: when `Some`, every child's
556 /// declared `ref` is replaced.
557 pub ref_override: Option<String>,
558 /// When `true`, Phase 3 recurses into child metas. `false` is the
559 /// `doctor --shallow` semantics: process only the immediate
560 /// children of the supplied meta.
561 pub recurse: bool,
562 /// Bound on Phase 3 recursion depth. `None` is unbounded; `Some(n)`
563 /// caps at `n` levels of nesting (the supplied `meta_dir` is depth
564 /// 0). Recursion ALWAYS halts before depth `n+1`.
565 pub max_depth: Option<usize>,
566 /// Phase 2 prune-safety override. Mirrors
567 /// [`crate::sync::SyncOptions::force_prune`].
568 pub force_prune: bool,
569 /// Phase 2 prune-safety override. Mirrors
570 /// [`crate::sync::SyncOptions::force_prune_with_ignored`].
571 pub force_prune_with_ignored: bool,
572 /// v1.2.1 item 3 — rayon thread-pool size for sibling-parallel
573 /// Phase 1 + Phase 3. `None` ⇒ rayon's default (`num_cpus::get()`);
574 /// `Some(1)` ⇒ effectively sequential (single-threaded pool, useful
575 /// for determinism testing); `Some(n >= 2)` ⇒ bounded parallel.
576 /// `Some(0)` is clamped to `1` (rayon rejects a zero-thread pool).
577 /// Mirrors [`crate::sync::SyncOptions::parallel`] semantics with the
578 /// one exception that `0` is clamped to `1` here — the unbounded
579 /// sentinel only makes sense for tokio's `Semaphore::MAX_PERMITS`.
580 pub parallel: Option<usize>,
581 /// v1.2.1 item 5b — when `Some`, Phase 2 prunes are diverted
582 /// through the snapshot-then-unlink quarantine pipeline before
583 /// `unlink(dest)` fires. Carries the per-meta trash bucket root
584 /// and audit-log path. `None` (default) preserves the legacy
585 /// v1.2.0 direct-unlink path. Set by
586 /// [`crate::sync::SyncOptions::quarantine`] at the orchestrator
587 /// boundary; the consent layer reads this to pick the deletion
588 /// strategy. Lean theorem `quarantine_snapshot_precedes_delete`
589 /// proves the safety contract.
590 pub quarantine: Option<QuarantineConfig>,
591}
592
593impl Default for SyncMetaOptions {
594 fn default() -> Self {
595 Self {
596 ref_override: None,
597 recurse: true,
598 max_depth: None,
599 force_prune: false,
600 force_prune_with_ignored: false,
601 parallel: None,
602 quarantine: None,
603 }
604 }
605}
606
607/// Outcome of one [`sync_meta`] invocation. Aggregated across every
608/// recursion frame: a sub-meta's report is folded into its parent's
609/// report at the end of Phase 3.
610#[derive(Debug, Default)]
611pub struct SyncMetaReport {
612 /// Number of metas processed (this meta + every descendant Phase 3
613 /// recursion fired against). Useful for `--shallow` verification:
614 /// `recurse: false` means `metas_visited == 1`.
615 pub metas_visited: usize,
616 /// Per-child Phase 1 verdicts, keyed by parent-relative child path.
617 /// `(meta_dir, child_dest, classification)` — exposed primarily for
618 /// tests; downstream callers will project into a status report.
619 pub phase1_classifications: Vec<(PathBuf, PathBuf, DestClass)>,
620 /// Successful Phase 2 prunes (paths that were removed). Empty when
621 /// no orphan list was supplied or every orphan refused.
622 pub phase2_pruned: Vec<PathBuf>,
623 /// Aggregate of every error encountered across Phases 1, 2, and 3.
624 /// The walker continues past recoverable errors so the caller sees
625 /// the full picture in one pass.
626 pub errors: Vec<TreeError>,
627}
628
629impl SyncMetaReport {
630 fn merge(&mut self, mut child: SyncMetaReport) {
631 self.metas_visited += child.metas_visited;
632 self.phase1_classifications.append(&mut child.phase1_classifications);
633 self.phase2_pruned.append(&mut child.phase2_pruned);
634 self.errors.append(&mut child.errors);
635 }
636}
637
638/// Sync a meta pack and (optionally) its descendants.
639///
640/// `meta_dir` is the on-disk directory containing the meta's
641/// `.grex/pack.yaml`. `prune_candidates` is the list of orphan dests
642/// (parent-relative) the caller's distributed-lockfile reader determined
643/// no longer appear in `manifest.children`.
644///
645/// The walker is **fail-loud, not fail-fast**: recoverable errors land
646/// in [`SyncMetaReport::errors`] and the walk continues so the caller
647/// sees the full picture in one pass. The only short-circuit is a
648/// detected cycle, which surfaces as `Err(TreeError::CycleDetected)`
649/// to keep the cyclic-clone storm risk contained.
650///
651/// # Errors
652///
653/// Returns the *first* catastrophic error: manifest parse failure on
654/// the supplied `meta_dir`, a cycle in the manifest forest (URL+ref
655/// identity), or a pre-walk path-traversal violation. Per-child
656/// clone / fetch / prune failures aggregate into
657/// [`SyncMetaReport::errors`] without aborting the walk.
658pub fn sync_meta(
659 meta_dir: &Path,
660 backend: &dyn GitBackend,
661 loader: &dyn PackLoader,
662 opts: &SyncMetaOptions,
663 prune_candidates: &[PathBuf],
664) -> Result<SyncMetaReport, TreeError> {
665 // v1.2.3 (B4) — seed the ancestor chain with the root pack's
666 // path-namespaced identity (`path:<meta_dir>`) so the Lean
667 // `acyclic_path` precondition that drives
668 // `sync_meta_no_cycle_infinite_clone` is established right at
669 // the call site rather than implicitly relying on an empty
670 // initial ancestor list. Children identify with `url:<url>@<ref>` —
671 // disjoint namespace from the root's `path:` identity, so seeding
672 // does not introduce false-positive cycle hits against any
673 // legitimate child.
674 //
675 // `sync_meta_inner` extends this chain per recursion edge (Phase
676 // 3) using clone-per-child so disjoint sibling branches do not
677 // pollute each other's ancestor view.
678 let initial_ancestors = vec![pack_identity_for_root(meta_dir)];
679 sync_meta_inner(
680 meta_dir,
681 backend,
682 loader,
683 opts,
684 prune_candidates,
685 /* depth */ 0,
686 &initial_ancestors,
687 )
688}
689
690fn sync_meta_inner(
691 meta_dir: &Path,
692 backend: &dyn GitBackend,
693 loader: &dyn PackLoader,
694 opts: &SyncMetaOptions,
695 prune_candidates: &[PathBuf],
696 depth: usize,
697 ancestors: &[String],
698) -> Result<SyncMetaReport, TreeError> {
699 let manifest = loader.load(meta_dir)?;
700 // v1.2.0 Stage 1.c gate — every recursion frame re-runs the
701 // path-traversal sweep before any child is touched on disk.
702 validate_children_paths(&manifest)?;
703
704 let mut report = SyncMetaReport { metas_visited: 1, ..SyncMetaReport::default() };
705
706 // v1.2.1 item 3: build a per-call rayon pool sized from
707 // `opts.parallel`. Phase 1 + Phase 3 install on this pool; Phase 2
708 // stays sequential (single-meta orphan sweep — no sibling
709 // parallelism to extract). The pool is dropped at the end of
710 // `sync_meta_inner`, so each recursion frame builds + tears down
711 // its own pool. This is intentional: we want the worker count to
712 // refresh per call so a top-level `--parallel 1` cap is honoured
713 // without piggy-backing on a global pool that an unrelated caller
714 // might have configured differently.
715 let pool = build_pool(opts.parallel)?;
716
717 phase1_sync_children(&pool, meta_dir, &manifest, backend, opts, &mut report);
718 phase2_prune_orphans(meta_dir, prune_candidates, opts, &mut report);
719 // v1.2.2 — cycle detection short-circuits the recursion edge with
720 // an `Err` return so the caller sees `Err(CycleDetected)` directly
721 // rather than burying it in `report.errors`. Cycles are catastrophic
722 // (would otherwise clone forever); fail-loud here, NOT fold-into-report.
723 phase3_recurse(
724 &pool,
725 meta_dir,
726 &manifest,
727 backend,
728 loader,
729 opts,
730 depth,
731 ancestors,
732 &mut report,
733 )?;
734
735 Ok(report)
736}
737
738/// v1.2.1 item 3 — build a rayon `ThreadPool` sized from
739/// `opts.parallel`. Encapsulates the `None` ⇒ default,
740/// `Some(0)` ⇒ clamp-to-1, `Some(n)` ⇒ exact-N policy in one place
741/// so Phase 1 and Phase 3 install on identically-configured pools.
742///
743/// `Some(1)` produces a single-worker pool — the determinism
744/// test-mode fast-path (sibling iteration order matches sequential
745/// for-loop order on a 1-thread pool).
746///
747/// Build failures surface as [`TreeError::ManifestRead`]: a rayon
748/// pool failure is invariably a host-resource issue (out of file
749/// descriptors, thread-creation refused) — bucketing it into the
750/// generic IO-error variant keeps the error surface tight without
751/// inventing a one-off `RayonPoolBuild` discriminant. The Lean
752/// model treats pool construction as a well-formedness precondition
753/// of `sync`, not an in-band failure mode.
754fn build_pool(parallel: Option<usize>) -> Result<rayon::ThreadPool, TreeError> {
755 let mut builder = rayon::ThreadPoolBuilder::new();
756 if let Some(n) = parallel {
757 builder = builder.num_threads(n.max(1));
758 }
759 builder.build().map_err(|e| {
760 TreeError::ManifestRead(format!("failed to build rayon pool for sync_meta: {e}"))
761 })
762}
763
764/// Per-child output from Phase 1's parallel pass. Collected into a
765/// `Vec` after the rayon `par_iter` settles, then drained into the
766/// caller's `SyncMetaReport` in a single sequential pass. Carrying
767/// the data plain (no `&mut report` shared across threads) is what
768/// keeps the parallelisation sound under the Lean
769/// `sync_disjoint_commutes` axiom: each iteration's mutations are
770/// confined to its own owned struct.
771struct Phase1ChildOutcome {
772 /// `(meta_dir, dest, class)` — pushed onto
773 /// `report.phase1_classifications` regardless of dispatch outcome.
774 classification: (PathBuf, PathBuf, DestClass),
775 /// Per-child clone/fetch failure, if any. Folded into
776 /// `report.errors`.
777 error: Option<TreeError>,
778 /// `Some((dest, class))` when the child classified as
779 /// `PresentUndeclared`; the caller aggregates these into one
780 /// `UntrackedGitRepos` error after the parallel pass.
781 undeclared: Option<(PathBuf, DestClass)>,
782}
783
784/// Phase 1: classify each declared child, then dispatch. Per the v1.2.0
785/// walker.md pseudocode the per-child branches are:
786///
787/// * `Missing` → clone via `backend.clone(url, dest, ref)`.
788/// * `PresentDeclared` → fetch (+ checkout if a ref override applies).
789/// * `PresentDirty` → no-op (preserve user changes; will surface at
790/// exec/plan stage if applicable).
791/// * `PresentInProgress` → refuse via `DirtyTreeRefusal{GitInProgress}`
792/// (collected into `report.errors`).
793/// * `PresentUndeclared` → impossible at Phase 1 dispatch time because
794/// declared paths are in `manifest.children`; the variant is reserved
795/// for the lockfile-orphan sweep (Phase 2 territory).
796///
797/// v1.2.1 item 3 — sibling-parallel via rayon `par_iter`. Disjointness
798/// across siblings (each child has its own `meta_dir.join(child.path)`
799/// dest, validated by `validate_children_paths` upstream) discharges
800/// the precondition of the `sync_disjoint_commutes` axiom in
801/// `proof/Grex/Bridge.lean`. The per-pack `.grex-lock` (M6, acquired
802/// inside the GitBackend implementation) continues to serialise any
803/// cross-task contention on the same pack path. Per-thread results
804/// are collected into a `Vec<Phase1ChildOutcome>` and folded into the
805/// caller's `SyncMetaReport` in a single sequential pass, preserving
806/// deterministic ordering of `report.phase1_classifications` (rayon
807/// `collect_into_vec` preserves source-order regardless of completion
808/// order).
809fn phase1_sync_children(
810 pool: &rayon::ThreadPool,
811 meta_dir: &Path,
812 manifest: &PackManifest,
813 backend: &dyn GitBackend,
814 opts: &SyncMetaOptions,
815 report: &mut SyncMetaReport,
816) {
817 // Install on the per-call pool so `--parallel N` is honoured even
818 // when this is invoked from inside another rayon context (Phase 3
819 // recursion). `install` is a synchronous fence: the closure
820 // returns once every parallel iteration has settled.
821 let outcomes: Vec<Phase1ChildOutcome> = pool.install(|| {
822 manifest
823 .children
824 .par_iter()
825 .map(|child| phase1_handle_child(meta_dir, child, backend, opts))
826 .collect()
827 });
828
829 // Sequential fold: the parallel pass cannot mutate `report` directly
830 // (it is `&mut`), so we drain the per-child outcomes here. Order is
831 // preserved by `par_iter().collect()` — see the `phase1_par_iter_preserves_order`
832 // test below.
833 let mut undeclared_seen: Vec<(PathBuf, DestClass)> = Vec::new();
834 for outcome in outcomes {
835 report.phase1_classifications.push(outcome.classification);
836 if let Some(e) = outcome.error {
837 report.errors.push(e);
838 }
839 if let Some(pair) = outcome.undeclared {
840 undeclared_seen.push(pair);
841 }
842 }
843 if let Err(e) = aggregate_untracked(undeclared_seen) {
844 report.errors.push(e);
845 }
846}
847
848/// Per-child Phase 1 dispatch — runs inside the rayon pool. The
849/// extracted fn keeps the parallel closure body small and gives the
850/// Lean axiom a single discoverable Rust contract anchor (this fn is
851/// the per-sibling unit of work the `sync_disjoint_commutes` axiom
852/// quantifies over).
853fn phase1_handle_child(
854 meta_dir: &Path,
855 child: &ChildRef,
856 backend: &dyn GitBackend,
857 opts: &SyncMetaOptions,
858) -> Phase1ChildOutcome {
859 let dest = meta_dir.join(child.effective_path());
860 // Every declared child IS in the manifest by construction —
861 // `declared_in_manifest = true` is the only correct call here.
862 let class = classify_dest(&dest, true, None);
863 let mut out = Phase1ChildOutcome {
864 classification: (meta_dir.to_path_buf(), dest.clone(), class),
865 error: None,
866 undeclared: None,
867 };
868 match class {
869 DestClass::Missing => {
870 if let Err(e) = phase1_clone(backend, child, &dest, opts) {
871 out.error = Some(e);
872 }
873 }
874 DestClass::PresentDeclared => {
875 if let Err(e) = phase1_fetch(backend, child, &dest, opts) {
876 out.error = Some(e);
877 }
878 }
879 DestClass::PresentDirty => {
880 // Conservative: leave the dirty tree untouched. The
881 // operator has uncommitted work; v1.2.0 walker policy
882 // is to never overwrite their bytes during Phase 1.
883 // Phase 2 will surface a refusal if the operator ALSO
884 // requested a prune of this path, but that's a
885 // separate decision made by the caller's lockfile-
886 // orphan computation.
887 }
888 DestClass::PresentInProgress => {
889 out.error = Some(TreeError::DirtyTreeRefusal {
890 path: dest.clone(),
891 kind: super::error::DirtyTreeRefusalKind::GitInProgress,
892 });
893 }
894 DestClass::PresentUndeclared => {
895 // Buffer for `aggregate_untracked` so we surface the
896 // FULL list in one error.
897 out.undeclared = Some((dest, class));
898 }
899 }
900 out
901}
902
903/// Phase 1 clone helper. Acquires the M6 `PackLock` on the prospective
904/// dest's parent (`meta_dir`) for the duration of the clone — distinct
905/// children clone serially within a meta to keep the scheduler-tier
906/// model honest. Sibling parallelism is a 1.j follow-up.
907fn phase1_clone(
908 backend: &dyn GitBackend,
909 child: &ChildRef,
910 dest: &Path,
911 opts: &SyncMetaOptions,
912) -> Result<(), TreeError> {
913 let effective_ref = opts.ref_override.as_deref().or(child.r#ref.as_deref());
914 // Make sure the dest's parent exists — the clone backend assumes
915 // it. v1.2.0 invariant 1 (boundary) and 1.c's `validate_children_paths`
916 // already ruled out a path that would escape `meta_dir`, so a
917 // simple `create_dir_all` on the parent is safe here.
918 if let Some(parent) = dest.parent() {
919 std::fs::create_dir_all(parent).map_err(|e| {
920 TreeError::ManifestRead(format!("failed to mkdir parent {}: {e}", parent.display()))
921 })?;
922 }
923 backend.clone(&child.url, dest, effective_ref)?;
924 Ok(())
925}
926
927/// Phase 1 fetch helper. Same locking discipline as `phase1_clone`.
928fn phase1_fetch(
929 backend: &dyn GitBackend,
930 child: &ChildRef,
931 dest: &Path,
932 opts: &SyncMetaOptions,
933) -> Result<(), TreeError> {
934 backend.fetch(dest)?;
935 let effective_ref = opts.ref_override.as_deref().or(child.r#ref.as_deref());
936 if let Some(r) = effective_ref {
937 backend.checkout(dest, r)?;
938 }
939 Ok(())
940}
941
942/// Phase 2: prune orphan lockfile entries. Each candidate is run
943/// through the consent-walk via `phase2_prune` (1.f); a `Clean` verdict
944/// removes the dest, anything else surfaces as an error. The orphan
945/// list is supplied by the caller — 1.h owns the lockfile-read side
946/// of the walker contract.
947fn phase2_prune_orphans(
948 meta_dir: &Path,
949 prune_candidates: &[PathBuf],
950 opts: &SyncMetaOptions,
951 report: &mut SyncMetaReport,
952) {
953 // v1.2.0 Stage 1.l — postmortem audit log path. Resolved once per
954 // meta from the canonical `<meta_dir>/.grex/events.jsonl` slot;
955 // `phase2_prune` only writes to it when an override flag actually
956 // consumed a non-Clean verdict (clean prunes never log).
957 let audit_log = crate::manifest::event_log_path(meta_dir);
958 for candidate in prune_candidates {
959 // Candidates are parent-relative POSIX paths
960 // (`LockEntry::validate_path` invariant from 1.b). Resolve
961 // against `meta_dir` to get the absolute dest.
962 let dest = meta_dir.join(candidate);
963 match phase2_prune(
964 &dest,
965 opts.force_prune,
966 opts.force_prune_with_ignored,
967 Some(audit_log.as_path()),
968 opts.quarantine.as_ref(),
969 ) {
970 Ok(()) => report.phase2_pruned.push(dest),
971 Err(e) => report.errors.push(e),
972 }
973 }
974}
975
976/// Per-child output from Phase 3's parallel recursion. Each variant
977/// carries either a successful sub-`SyncMetaReport` (folded into the
978/// caller via [`SyncMetaReport::merge`]) or a fatal error to push onto
979/// `report.errors`. Children whose dest does NOT carry a sub-meta
980/// produce `Skipped`.
981///
982/// v1.2.4 — `Cancelled` is the EARLY-OUT contributed by a sibling
983/// closure that observed the per-`phase3_recurse` cancellation flag
984/// already flipped to `true`. It carries no sub-report (no work was
985/// done) and never contributes to `report.metas_visited` or
986/// `report.errors` — the cycle that triggered the flip is the sole
987/// error reported. Mirrors the Lean
988/// `cancellation_terminates_promptly` theorem: when `cancelled` is
989/// observed at entry, return ok with zero descent.
990enum Phase3ChildOutcome {
991 Skipped,
992 Recursed(SyncMetaReport),
993 Failed(TreeError),
994 Cancelled,
995}
996
997/// Phase 3: parallel recursion into child metas. A child qualifies for
998/// recursion when:
999///
1000/// 1. `opts.recurse` is `true`,
1001/// 2. `opts.max_depth` is unbounded OR the next-frame depth is
1002/// strictly less than the cap,
1003/// 3. `<dest>/.grex/pack.yaml` exists.
1004///
1005/// Sub-meta reports are merged into the parent's report via
1006/// [`SyncMetaReport::merge`] so a top-level caller sees one rolled-up
1007/// view of every frame's classifications + errors.
1008///
1009/// v1.2.1 item 3 — sibling-parallel via rayon `par_iter`. Each
1010/// recursion frame builds its own thread pool inside `sync_meta_inner`
1011/// (work-stealing across recursion levels happens naturally because
1012/// the inner `pool.install` blocks for the lifetime of the inner
1013/// sync_meta call; sibling sub-metas at level N execute in parallel
1014/// via the level-N pool, and each level-N child carries its own
1015/// level-(N+1) pool for its own grandchildren). Sub-reports are
1016/// collected source-ordered via `collect_into_vec`, then folded into
1017/// `report` sequentially to preserve deterministic ordering of the
1018/// `phase1_classifications` / `phase2_pruned` / `errors` vectors.
1019// Pre-rayon refactor this fn already carried 7 args (the clippy cap).
1020// v1.2.1 item 3 added the `pool` reference, taking it to 8. Bundling
1021// these into a context struct is technically possible but every other
1022// arg already comes from `sync_meta_inner`'s param list, so the struct
1023// would just shuffle the wiring without removing it. Localised allow
1024// instead — the call-site is private to this module and threads
1025// ownership of `pool` cleanly.
1026/// Per-child Phase 3 dispatch — runs inside the rayon pool. Mirrors
1027/// the `phase1_handle_child` / `sync_disjoint_commutes` discipline
1028/// (one discoverable Rust contract anchor per sibling unit of work)
1029/// and keeps `phase3_recurse` itself under the clippy line cap.
1030///
1031/// v1.2.2 — cycle detection lives here. `ancestors` is the in-progress
1032/// ancestor identity chain from root down to (but excluding) this
1033/// child. If the child's identity (`pack_identity_for_child`) is
1034/// already in the chain we surface `TreeError::CycleDetected` with
1035/// the chain extended by the recurring identity. Otherwise the
1036/// child's identity is appended (clone-per-child, A.1) so disjoint
1037/// sibling branches do not pollute each other's view.
1038///
1039/// v1.2.3 (B1) — the depth-cap check (`next_depth > opts.max_depth`)
1040/// MUST run AFTER the cycle check. Otherwise a cyclic manifest whose
1041/// cycle length exceeds `max_depth` would silently truncate without
1042/// surfacing `CycleDetected`: the depth cap is a "stop walking
1043/// further" knob, not a "ignore correctness invariants" knob.
1044///
1045/// v1.2.4 (A1) — cancellation flag plumbed through. The closure
1046/// observes `cancelled.load(Relaxed)` at entry: if a prior sibling
1047/// already detected a cycle and signalled, this closure returns
1048/// `Phase3ChildOutcome::Cancelled` with zero further work (no
1049/// classify, no clone, no recursion). When this closure itself
1050/// detects a cycle, it stores `true` into the flag BEFORE returning
1051/// `Phase3ChildOutcome::Failed(CycleDetected)` so any rayon-co-iterated
1052/// sibling not yet started observes the signal on its next entry. The
1053/// flag is per-`phase3_recurse`-call: recursive sub-`sync_meta_inner`
1054/// invocations build their own flag inside their own
1055/// `phase3_recurse`, so a cycle two levels down does not cancel
1056/// disjoint siblings at level one. Discharges the Lean
1057/// `cancellation_terminates_promptly` obligation.
1058#[allow(clippy::too_many_arguments)]
1059fn phase3_handle_child(
1060 meta_dir: &Path,
1061 child: &ChildRef,
1062 backend: &dyn GitBackend,
1063 loader: &dyn PackLoader,
1064 opts: &SyncMetaOptions,
1065 next_depth: usize,
1066 ancestors: &[String],
1067 cancelled: &AtomicBool,
1068) -> Phase3ChildOutcome {
1069 // v1.2.4 EARLY-OUT — a sibling closure already detected a cycle
1070 // and signalled. Return immediately with zero descent so partial
1071 // clones / deep walks never start. Matches the Lean
1072 // `cancellation_terminates_promptly` theorem: cancelled = true
1073 // implies ok with zero recursive steps.
1074 if cancelled.load(Ordering::Relaxed) {
1075 return Phase3ChildOutcome::Cancelled;
1076 }
1077 let dest = meta_dir.join(child.effective_path());
1078 if !dest.join(".grex").join("pack.yaml").is_file() {
1079 return Phase3ChildOutcome::Skipped;
1080 }
1081 // v1.2.2 cycle detection — discharges the
1082 // `sync_meta_no_cycle_infinite_clone` Lean theorem in
1083 // `proof/Grex/Walker.lean`. Identity is `url@ref` so the same
1084 // repo at two different refs is two distinct packs (intentional:
1085 // matches `pack_identity_for_child` and the build_graph cycle
1086 // detector at `graph_build.rs:174`). A single `Vec<String>`
1087 // doubles as O(depth) contains-check AND deterministic chain for
1088 // error display — depth is bounded ~5-10 in practice so linear
1089 // scan beats hashing here.
1090 //
1091 // v1.2.3 (B1): runs BEFORE the depth-cap early-return below so a
1092 // cycle longer than `max_depth` cannot hide behind truncation.
1093 let id = pack_identity_for_child(child);
1094 if ancestors.iter().any(|v| v == &id) {
1095 // v1.2.4 SIGNAL — flip the cancellation flag so any
1096 // co-iterated sibling closures observe it on their next
1097 // entry. `Relaxed` is sufficient: we need eventual visibility,
1098 // not strict happens-before ordering against any other memory
1099 // operation. See design doc §"Atomic ordering".
1100 cancelled.store(true, Ordering::Relaxed);
1101 let mut chain = ancestors.to_vec();
1102 chain.push(id);
1103 return Phase3ChildOutcome::Failed(TreeError::CycleDetected { chain });
1104 }
1105 // v1.2.3 (B1): depth-cap check moved from `phase3_recurse` to
1106 // here, AFTER the cycle check. `Skipped` rather than a hard error
1107 // because depth-cap truncation is a benign best-effort knob —
1108 // siblings further down the manifest tree should still classify.
1109 if let Some(cap) = opts.max_depth {
1110 if next_depth > cap {
1111 return Phase3ChildOutcome::Skipped;
1112 }
1113 }
1114 // Clone-per-child (A.1): each rayon iteration owns its own
1115 // ancestor view, so disjoint sibling branches do not see each
1116 // other on the path. A diamond where two siblings legitimately
1117 // depend on the same descendant is therefore not a cycle.
1118 let mut child_ancestors = ancestors.to_vec();
1119 child_ancestors.push(id);
1120 // Empty `prune_candidates` for the sub-meta — 1.h supplies the
1121 // sub-meta's distributed lockfile read via the same caller
1122 // pathway when it lands.
1123 match sync_meta_inner(&dest, backend, loader, opts, &[], next_depth, &child_ancestors) {
1124 Ok(sub) => Phase3ChildOutcome::Recursed(sub),
1125 Err(e) => Phase3ChildOutcome::Failed(e),
1126 }
1127}
1128
1129#[allow(clippy::too_many_arguments)]
1130fn phase3_recurse(
1131 pool: &rayon::ThreadPool,
1132 meta_dir: &Path,
1133 manifest: &PackManifest,
1134 backend: &dyn GitBackend,
1135 loader: &dyn PackLoader,
1136 opts: &SyncMetaOptions,
1137 depth: usize,
1138 ancestors: &[String],
1139 report: &mut SyncMetaReport,
1140) -> Result<(), TreeError> {
1141 if !opts.recurse {
1142 return Ok(());
1143 }
1144 let next_depth = depth + 1;
1145 // v1.2.3 (B1): depth-cap early-return removed from this site —
1146 // moved into `phase3_handle_child` AFTER the cycle check so a
1147 // cycle longer than `max_depth` cannot mask itself by tripping
1148 // the depth cap before the cycle test fires. The per-child
1149 // handler now treats `next_depth > cap` as `Skipped`.
1150 //
1151 // v1.2.4 (A1): per-call cancellation flag. One `Arc<AtomicBool>`
1152 // is constructed here and shared (read-only across closures, with
1153 // a single point-of-truth `store` in any closure that detects a
1154 // cycle) by every sibling iteration of this `par_iter`. Recursive
1155 // sub-`sync_meta_inner` calls build their own flag via their own
1156 // `phase3_recurse` — disjoint subtrees do not cross-cancel. The
1157 // flag is dropped when this fn returns, so its lifetime is exactly
1158 // the rayon parallel pass it scopes.
1159 //
1160 // Scope: cancels siblings within THIS Phase 3 fan-out call only;
1161 // recursive sub-fan-outs construct their own flag, so a cycle deep
1162 // in pack X does not cancel pack Y at root level. This isolation
1163 // is intentional and tested by
1164 // `cancellation_per_call_scope_isolates_subtrees`.
1165 let cancelled = Arc::new(AtomicBool::new(false));
1166 let outcomes: Vec<Phase3ChildOutcome> = pool.install(|| {
1167 manifest
1168 .children
1169 .par_iter()
1170 .map(|child| {
1171 phase3_handle_child(
1172 meta_dir, child, backend, loader, opts, next_depth, ancestors, &cancelled,
1173 )
1174 })
1175 .collect()
1176 });
1177 // Cycle errors short-circuit (catastrophic — clone-storm risk);
1178 // every other outcome folds into the report per the existing
1179 // fail-loud-but-continue policy. v1.2.4: `Cancelled` outcomes are
1180 // skipped — they carry no sub-report and contribute neither to
1181 // `report.metas_visited` nor to `report.errors`. The cycle that
1182 // triggered the cancellation is the sole error reported.
1183 let mut first_cycle_idx: Option<usize> = None;
1184 for outcome in outcomes {
1185 match outcome {
1186 Phase3ChildOutcome::Skipped | Phase3ChildOutcome::Cancelled => {}
1187 Phase3ChildOutcome::Recursed(sub) => report.merge(sub),
1188 Phase3ChildOutcome::Failed(e) => {
1189 // v1.2.2 fix: surface all sibling cycles in
1190 // report.errors; first cycle returned as short-circuit
1191 // Err per fail-loud policy.
1192 if matches!(e, TreeError::CycleDetected { .. }) && first_cycle_idx.is_none() {
1193 first_cycle_idx = Some(report.errors.len());
1194 }
1195 report.errors.push(e);
1196 }
1197 }
1198 }
1199 if let Some(idx) = first_cycle_idx {
1200 // Clone the cycle to return as the short-circuit Err while
1201 // leaving the original entry (and any sibling cycles) recorded
1202 // in report.errors for the caller to log/print.
1203 let TreeError::CycleDetected { chain } = &report.errors[idx] else {
1204 unreachable!("first_cycle_idx points at a CycleDetected variant by construction");
1205 };
1206 return Err(TreeError::CycleDetected { chain: chain.clone() });
1207 }
1208 Ok(())
1209}
1210
1211#[cfg(test)]
1212mod tests {
1213 use super::*;
1214
1215 /// Direct unit test of the synthesis helper — name must equal the
1216 /// child's `effective_path()`, type must be `Scripted`, and every
1217 /// list field must be empty.
1218 #[test]
1219 fn synthesize_plain_git_manifest_yields_leaf_scripted_pack() {
1220 let child = ChildRef {
1221 url: "https://example.com/algo-leet.git".to_string(),
1222 path: None,
1223 r#ref: None,
1224 };
1225 let manifest = synthesize_plain_git_manifest(&child);
1226 assert_eq!(manifest.name, child.effective_path());
1227 assert_eq!(manifest.name, "algo-leet");
1228 assert_eq!(manifest.r#type, PackType::Scripted);
1229 assert_eq!(manifest.schema_version.as_str(), "1");
1230 assert!(manifest.depends_on.is_empty());
1231 assert!(manifest.children.is_empty());
1232 assert!(manifest.actions.is_empty());
1233 assert!(manifest.teardown.is_none());
1234 assert!(manifest.extensions.is_empty());
1235 assert!(manifest.version.is_none());
1236 }
1237
1238 /// Explicit `path:` override wins over the URL-derived bare name —
1239 /// confirms the synthesised manifest's `name` mirrors what the
1240 /// parent declared, so `verify_child_name` passes by construction.
1241 #[test]
1242 fn synthesize_plain_git_manifest_honours_explicit_path() {
1243 let child = ChildRef {
1244 url: "https://example.com/some-repo.git".to_string(),
1245 path: Some("custom-name".to_string()),
1246 r#ref: None,
1247 };
1248 let manifest = synthesize_plain_git_manifest(&child);
1249 assert_eq!(manifest.name, "custom-name");
1250 }
1251
1252 /// `dest_has_git_repo` MUST refuse a symlinked destination — even
1253 /// when the symlink target carries a real `.git/` directory.
1254 /// Otherwise a malicious parent pack could redirect synthesis to
1255 /// fetch into `$HOME` (or any sibling repo) by relying on a
1256 /// pre-existing symlink in the workspace.
1257 #[test]
1258 fn dest_has_git_repo_rejects_symlinked_dest() {
1259 // Skip on platforms where unprivileged symlink creation fails
1260 // (notably Windows without Developer Mode). Failing the symlink
1261 // call is itself proof the attack vector is closed for that
1262 // host, so the rest of the test is moot.
1263 let outer = tempfile::tempdir().unwrap();
1264 let real = outer.path().join("real-repo");
1265 std::fs::create_dir_all(real.join(".git")).unwrap();
1266 let link = outer.path().join("via-link");
1267
1268 #[cfg(unix)]
1269 let symlink_result = std::os::unix::fs::symlink(&real, &link);
1270 #[cfg(windows)]
1271 let symlink_result = std::os::windows::fs::symlink_dir(&real, &link);
1272
1273 if symlink_result.is_err() {
1274 // Host won't let us create a symlink — nothing to test.
1275 return;
1276 }
1277
1278 // Sanity: following the symlink would reveal `.git`.
1279 assert!(link.join(".git").exists(), "symlink target should expose .git through traversal");
1280 // But `dest_has_git_repo` must refuse it.
1281 assert!(
1282 !dest_has_git_repo(&link),
1283 "dest_has_git_repo must refuse a symlinked destination even when target has .git"
1284 );
1285 // Real (non-symlinked) sibling still passes — we haven't
1286 // accidentally broken the happy path.
1287 assert!(dest_has_git_repo(&real));
1288 }
1289
1290 // -----------------------------------------------------------------
1291 // v1.2.0 Stage 1.g — `sync_meta` three-phase walker tests (TDD).
1292 //
1293 // These tests use a thin in-memory `MockLoader` plus
1294 // `MockGitBackend` so the walker's PHASE ORCHESTRATION (not the
1295 // backend mechanics) is what's being exercised. The git-touching
1296 // primitives `classify_dest` (1.e) and `phase2_prune` (1.f) have
1297 // their own per-host tests that already cover the real-FS-and-git
1298 // path. The `host_has_git_binary` gate guards the few tests that
1299 // need a working `git` to materialise a clean `PresentDeclared`
1300 // verdict — same precedent as the `dest_class::tests` host-skip
1301 // pattern.
1302 // -----------------------------------------------------------------
1303
1304 use std::collections::HashMap;
1305 use std::sync::Mutex;
1306
1307 /// Minimal stand-in `PackLoader` for the v1.2.0 tests. Maps
1308 /// `meta_dir` → `PackManifest` directly so we never touch disk
1309 /// for manifest reads.
1310 struct InMemLoader {
1311 manifests: HashMap<PathBuf, PackManifest>,
1312 }
1313
1314 impl InMemLoader {
1315 fn new() -> Self {
1316 Self { manifests: HashMap::new() }
1317 }
1318 fn with(mut self, dir: impl Into<PathBuf>, m: PackManifest) -> Self {
1319 self.manifests.insert(dir.into(), m);
1320 self
1321 }
1322 }
1323
1324 impl PackLoader for InMemLoader {
1325 fn load(&self, path: &Path) -> Result<PackManifest, TreeError> {
1326 self.manifests
1327 .get(path)
1328 .cloned()
1329 .ok_or_else(|| TreeError::ManifestNotFound(path.to_path_buf()))
1330 }
1331 }
1332
1333 /// Minimal stand-in `GitBackend`. Records every call so tests can
1334 /// assert phase orchestration. `clone` materialises a `.git/`
1335 /// under the supplied dest so subsequent classify probes treat the
1336 /// slot as Present.
1337 #[allow(dead_code)] // fields populated for future test introspection.
1338 #[derive(Debug, Clone)]
1339 enum BackendCall {
1340 Clone { url: String, dest: PathBuf, r#ref: Option<String> },
1341 Fetch { dest: PathBuf },
1342 Checkout { dest: PathBuf, r#ref: String },
1343 HeadSha { dest: PathBuf },
1344 }
1345
1346 struct InMemGit {
1347 calls: Mutex<Vec<BackendCall>>,
1348 materialise_on_clone: bool,
1349 }
1350
1351 impl InMemGit {
1352 fn new() -> Self {
1353 Self { calls: Mutex::new(Vec::new()), materialise_on_clone: true }
1354 }
1355 fn calls(&self) -> Vec<BackendCall> {
1356 self.calls.lock().unwrap().clone()
1357 }
1358 }
1359
1360 impl GitBackend for InMemGit {
1361 fn name(&self) -> &'static str {
1362 "v1_2_0-mock-git"
1363 }
1364 fn clone(
1365 &self,
1366 url: &str,
1367 dest: &Path,
1368 r#ref: Option<&str>,
1369 ) -> Result<crate::ClonedRepo, crate::GitError> {
1370 self.calls.lock().unwrap().push(BackendCall::Clone {
1371 url: url.to_string(),
1372 dest: dest.to_path_buf(),
1373 r#ref: r#ref.map(str::to_string),
1374 });
1375 if self.materialise_on_clone {
1376 std::fs::create_dir_all(dest.join(".git")).unwrap();
1377 }
1378 Ok(crate::ClonedRepo { path: dest.to_path_buf(), head_sha: "0".repeat(40) })
1379 }
1380 fn fetch(&self, dest: &Path) -> Result<(), crate::GitError> {
1381 self.calls.lock().unwrap().push(BackendCall::Fetch { dest: dest.to_path_buf() });
1382 Ok(())
1383 }
1384 fn checkout(&self, dest: &Path, r#ref: &str) -> Result<(), crate::GitError> {
1385 self.calls
1386 .lock()
1387 .unwrap()
1388 .push(BackendCall::Checkout { dest: dest.to_path_buf(), r#ref: r#ref.to_string() });
1389 Ok(())
1390 }
1391 fn head_sha(&self, dest: &Path) -> Result<String, crate::GitError> {
1392 self.calls.lock().unwrap().push(BackendCall::HeadSha { dest: dest.to_path_buf() });
1393 Ok("0".repeat(40))
1394 }
1395 }
1396
1397 /// Build a meta manifest with the supplied children.
1398 fn meta_manifest_with(name: &str, children: Vec<ChildRef>) -> PackManifest {
1399 PackManifest {
1400 schema_version: SchemaVersion::current(),
1401 name: name.to_string(),
1402 r#type: PackType::Meta,
1403 version: None,
1404 depends_on: Vec::new(),
1405 children,
1406 actions: Vec::new(),
1407 teardown: None,
1408 extensions: BTreeMap::new(),
1409 }
1410 }
1411
1412 fn child(url: &str, path: &str) -> ChildRef {
1413 ChildRef { url: url.to_string(), path: Some(path.to_string()), r#ref: None }
1414 }
1415
1416 fn host_has_git_binary() -> bool {
1417 std::process::Command::new("git")
1418 .arg("--version")
1419 .output()
1420 .is_ok_and(|o| o.status.success())
1421 }
1422
1423 /// Empty meta — no children → the walker returns Ok with no work.
1424 #[test]
1425 fn test_walker_v1_2_0_simple_meta_no_children() {
1426 let tmp = tempfile::tempdir().unwrap();
1427 let meta_dir = tmp.path().to_path_buf();
1428 let loader = InMemLoader::new().with(meta_dir.clone(), meta_manifest_with("solo", vec![]));
1429 let backend = InMemGit::new();
1430 let opts = SyncMetaOptions::default();
1431 let report = sync_meta(&meta_dir, &backend, &loader, &opts, &[]).expect("ok");
1432 assert_eq!(report.metas_visited, 1);
1433 assert!(report.phase1_classifications.is_empty());
1434 assert!(report.phase2_pruned.is_empty());
1435 assert!(report.errors.is_empty());
1436 assert!(backend.calls().is_empty(), "no children → no git ops");
1437 }
1438
1439 /// Phase 1 classifies each child. With every dest absent on disk,
1440 /// every classification is `Missing` and the backend sees one
1441 /// `Clone` per child.
1442 #[test]
1443 fn test_walker_v1_2_0_phase1_classifies_each_child() {
1444 let tmp = tempfile::tempdir().unwrap();
1445 let meta_dir = tmp.path().to_path_buf();
1446 let kids = vec![
1447 child("https://example.com/a.git", "alpha"),
1448 child("https://example.com/b.git", "beta"),
1449 ];
1450 let loader =
1451 InMemLoader::new().with(meta_dir.clone(), meta_manifest_with("root", kids.clone()));
1452 let backend = InMemGit::new();
1453 let opts = SyncMetaOptions { recurse: false, ..SyncMetaOptions::default() };
1454 let report = sync_meta(&meta_dir, &backend, &loader, &opts, &[]).expect("ok");
1455 assert_eq!(report.phase1_classifications.len(), 2);
1456 for (parent, _, class) in &report.phase1_classifications {
1457 assert_eq!(parent, &meta_dir);
1458 assert_eq!(*class, DestClass::Missing);
1459 }
1460 assert!(report.errors.is_empty());
1461 let calls = backend.calls();
1462 assert_eq!(calls.len(), 2, "one clone per child");
1463 for call in calls {
1464 assert!(matches!(call, BackendCall::Clone { .. }));
1465 }
1466 }
1467
1468 /// Phase 1 must aggregate every undeclared `.git/` directory it
1469 /// encounters into a single `UntrackedGitRepos` error. We
1470 /// pre-create two `.git/` slots BEFORE running `sync_meta` and
1471 /// declare them as siblings without paths matching — they classify
1472 /// as `PresentUndeclared` because the manifest does not list them.
1473 #[test]
1474 fn test_walker_v1_2_0_phase1_aggregates_untracked_error() {
1475 // Build a meta whose manifest declares ZERO children — every
1476 // pre-existing `.git/` slot is by definition undeclared.
1477 // Then drop two `.git/` directories under the meta dir and
1478 // (because v1.2.0's classifier needs the manifest declaration
1479 // signal at the call site, not on-disk discovery) run a
1480 // PARALLEL classifier sweep over the on-disk dirs to feed the
1481 // aggregator. This mirrors the way 1.h's lockfile-orphan
1482 // sweep will surface PresentUndeclared dirs into Phase 1's
1483 // collector when a child is removed from the manifest.
1484 let tmp = tempfile::tempdir().unwrap();
1485 let alpha = tmp.path().join("alpha");
1486 let beta = tmp.path().join("beta");
1487 std::fs::create_dir_all(alpha.join(".git")).unwrap();
1488 std::fs::create_dir_all(beta.join(".git")).unwrap();
1489 // Direct unit on the aggregator: feed two `PresentUndeclared`
1490 // pairs and assert the error carries both.
1491 let pairs: Vec<(PathBuf, DestClass)> = vec![
1492 (alpha.clone(), DestClass::PresentUndeclared),
1493 (beta.clone(), DestClass::PresentUndeclared),
1494 ];
1495 let err = aggregate_untracked(pairs).expect_err("two undeclared → error");
1496 match err {
1497 TreeError::UntrackedGitRepos { paths } => {
1498 assert_eq!(paths, vec![alpha, beta]);
1499 }
1500 other => panic!("expected UntrackedGitRepos, got {other:?}"),
1501 }
1502 }
1503
1504 /// Phase 2 prunes a clean orphan: the supplied candidate has a
1505 /// real `.git/` (initialised by `git init`), the consent walk
1506 /// returns Clean, the dest is removed.
1507 #[test]
1508 fn test_walker_v1_2_0_phase2_prunes_clean_orphans() {
1509 if !host_has_git_binary() {
1510 return;
1511 }
1512 let tmp = tempfile::tempdir().unwrap();
1513 let meta_dir = tmp.path().to_path_buf();
1514 // Create the orphan dest — clean repo, no manifest entry.
1515 let orphan = meta_dir.join("ghost");
1516 std::fs::create_dir_all(&orphan).unwrap();
1517 let init =
1518 std::process::Command::new("git").arg("-C").arg(&orphan).args(["init", "-q"]).status();
1519 if !matches!(init, Ok(s) if s.success()) {
1520 return;
1521 }
1522 let loader = InMemLoader::new().with(meta_dir.clone(), meta_manifest_with("root", vec![]));
1523 let backend = InMemGit::new();
1524 let opts = SyncMetaOptions { recurse: false, ..SyncMetaOptions::default() };
1525 let prune_list = vec![PathBuf::from("ghost")];
1526 let report = sync_meta(&meta_dir, &backend, &loader, &opts, &prune_list).expect("ok");
1527 assert_eq!(report.phase2_pruned.len(), 1, "clean orphan must be pruned");
1528 assert_eq!(report.phase2_pruned[0], orphan);
1529 assert!(!orphan.exists(), "dest must be removed after a clean prune");
1530 assert!(report.errors.is_empty());
1531 }
1532
1533 /// Phase 2 must REFUSE to prune a dirty orphan absent the override
1534 /// flag. The consent walk classifies it `DirtyTree`; the walker
1535 /// surfaces `DirtyTreeRefusal` and leaves the dest untouched.
1536 #[test]
1537 fn test_walker_v1_2_0_phase2_refuses_dirty_orphan() {
1538 if !host_has_git_binary() {
1539 return;
1540 }
1541 let tmp = tempfile::tempdir().unwrap();
1542 let meta_dir = tmp.path().to_path_buf();
1543 let orphan = meta_dir.join("dirty-ghost");
1544 std::fs::create_dir_all(&orphan).unwrap();
1545 let init =
1546 std::process::Command::new("git").arg("-C").arg(&orphan).args(["init", "-q"]).status();
1547 if !matches!(init, Ok(s) if s.success()) {
1548 return;
1549 }
1550 std::fs::write(orphan.join("scratch.txt"), b"unsaved").unwrap();
1551 let loader = InMemLoader::new().with(meta_dir.clone(), meta_manifest_with("root", vec![]));
1552 let backend = InMemGit::new();
1553 let opts = SyncMetaOptions { recurse: false, ..SyncMetaOptions::default() };
1554 let prune_list = vec![PathBuf::from("dirty-ghost")];
1555 let report = sync_meta(&meta_dir, &backend, &loader, &opts, &prune_list).expect("ok");
1556 assert!(report.phase2_pruned.is_empty(), "dirty orphan must NOT be pruned");
1557 assert!(orphan.exists(), "dest stays on disk when refused");
1558 assert_eq!(report.errors.len(), 1);
1559 assert!(matches!(report.errors[0], TreeError::DirtyTreeRefusal { .. }));
1560 }
1561
1562 /// Phase 3 recurses into a child meta when its `.grex/pack.yaml`
1563 /// exists. The sub-meta's own `metas_visited` is folded into the
1564 /// parent's report.
1565 #[test]
1566 fn test_walker_v1_2_0_phase3_recurses_into_sub_meta() {
1567 let tmp = tempfile::tempdir().unwrap();
1568 let meta_dir = tmp.path().to_path_buf();
1569 let child_dest = meta_dir.join("sub");
1570 // Pre-materialise the sub-meta on disk so Phase 1 classifies
1571 // the dest as PresentDeclared (no clone fired) and Phase 3
1572 // sees a `.grex/pack.yaml` to recurse into.
1573 make_sub_meta_on_disk(&child_dest, "sub");
1574 let loader = InMemLoader::new()
1575 .with(
1576 meta_dir.clone(),
1577 meta_manifest_with("root", vec![child("https://example.com/sub.git", "sub")]),
1578 )
1579 .with(child_dest.clone(), meta_manifest_with("sub", vec![]));
1580 let backend = InMemGit::new();
1581 let opts = SyncMetaOptions::default();
1582 let report = sync_meta(&meta_dir, &backend, &loader, &opts, &[]).expect("ok");
1583 assert_eq!(report.metas_visited, 2, "parent + sub-meta visited");
1584 assert!(report.errors.is_empty());
1585 }
1586
1587 /// `recurse: false` skips Phase 3 entirely — `metas_visited == 1`
1588 /// even when a child has a `.grex/pack.yaml`.
1589 #[test]
1590 fn test_walker_v1_2_0_phase3_max_depth_zero_skips_recursion() {
1591 let tmp = tempfile::tempdir().unwrap();
1592 let meta_dir = tmp.path().to_path_buf();
1593 let child_dest = meta_dir.join("sub");
1594 make_sub_meta_on_disk(&child_dest, "sub");
1595 let loader = InMemLoader::new()
1596 .with(
1597 meta_dir.clone(),
1598 meta_manifest_with("root", vec![child("https://example.com/sub.git", "sub")]),
1599 )
1600 .with(child_dest.clone(), meta_manifest_with("sub", vec![]));
1601 let backend = InMemGit::new();
1602 let opts = SyncMetaOptions { recurse: false, ..SyncMetaOptions::default() };
1603 let report = sync_meta(&meta_dir, &backend, &loader, &opts, &[]).expect("ok");
1604 assert_eq!(report.metas_visited, 1, "no recursion → only the root meta");
1605 }
1606
1607 /// `max_depth: Some(N)` caps recursion at N levels of nesting.
1608 /// Build a 3-level chain (root → mid → leaf) and assert
1609 /// `max_depth: Some(1)` visits root + mid (depth 0 + 1) but NOT
1610 /// leaf (depth 2).
1611 #[test]
1612 fn test_walker_v1_2_0_phase3_max_depth_n_stops_at_n_levels() {
1613 let tmp = tempfile::tempdir().unwrap();
1614 let root_dir = tmp.path().to_path_buf();
1615 let mid_dir = root_dir.join("mid");
1616 let leaf_dir = mid_dir.join("leaf");
1617 make_sub_meta_on_disk(&mid_dir, "mid");
1618 make_sub_meta_on_disk(&leaf_dir, "leaf");
1619 let loader = InMemLoader::new()
1620 .with(
1621 root_dir.clone(),
1622 meta_manifest_with("root", vec![child("https://example.com/mid.git", "mid")]),
1623 )
1624 .with(
1625 mid_dir.clone(),
1626 meta_manifest_with("mid", vec![child("https://example.com/leaf.git", "leaf")]),
1627 )
1628 .with(leaf_dir.clone(), meta_manifest_with("leaf", vec![]));
1629 let backend = InMemGit::new();
1630 let opts = SyncMetaOptions { max_depth: Some(1), ..SyncMetaOptions::default() };
1631 let report = sync_meta(&root_dir, &backend, &loader, &opts, &[]).expect("ok");
1632 // depth 0 = root, depth 1 = mid → max_depth: Some(1) visits
1633 // root + mid (2 metas) and stops before recursing into leaf.
1634 assert_eq!(report.metas_visited, 2, "max_depth: Some(1) visits root + mid only");
1635 }
1636
1637 /// Helper: pre-populate a sub-meta directory at `dir` with a
1638 /// `.grex/pack.yaml` carrying `name` and a stub `.git/` so the
1639 /// classifier sees it as PresentDeclared.
1640 fn make_sub_meta_on_disk(dir: &Path, name: &str) {
1641 std::fs::create_dir_all(dir.join(".grex")).unwrap();
1642 std::fs::create_dir_all(dir.join(".git")).unwrap();
1643 let yaml = format!("schema_version: \"1\"\nname: {name}\ntype: meta\n");
1644 std::fs::write(dir.join(".grex/pack.yaml"), yaml).unwrap();
1645 }
1646
1647 /// Helper: collect the destinations Phase 1 recorded for a given
1648 /// parent meta from the rolled-up report.
1649 fn destinations_under(report: &SyncMetaReport, parent: &Path) -> Vec<PathBuf> {
1650 report
1651 .phase1_classifications
1652 .iter()
1653 .filter(|(p, _, _)| p == parent)
1654 .map(|(_, d, _)| d.clone())
1655 .collect()
1656 }
1657
1658 /// Parent-relative path resolution: a child declared at the root
1659 /// meta resolves to `<root>/<child>` — NOT to a global workspace
1660 /// anchor. Recursion into that child uses `<root>/<child>` as the
1661 /// new parent meta dir for resolving the grandchild.
1662 #[test]
1663 fn test_walker_v1_2_0_parent_relative_path_resolution() {
1664 let tmp = tempfile::tempdir().unwrap();
1665 let root_dir = tmp.path().to_path_buf();
1666 // Note: 1.c's path-segment validator forbids slashes in the
1667 // `path:` field, so multi-segment nesting is achieved by
1668 // chaining single-segment children across recursion frames.
1669 let tools_dir = root_dir.join("tools");
1670 let foo_dir = tools_dir.join("foo");
1671 make_sub_meta_on_disk(&tools_dir, "tools");
1672 make_sub_meta_on_disk(&foo_dir, "foo");
1673 let loader = InMemLoader::new()
1674 .with(
1675 root_dir.clone(),
1676 meta_manifest_with("root", vec![child("https://example.com/tools.git", "tools")]),
1677 )
1678 .with(
1679 tools_dir.clone(),
1680 meta_manifest_with("tools", vec![child("https://example.com/foo.git", "foo")]),
1681 )
1682 .with(foo_dir.clone(), meta_manifest_with("foo", vec![]));
1683 let backend = InMemGit::new();
1684 let opts = SyncMetaOptions::default();
1685 let report = sync_meta(&root_dir, &backend, &loader, &opts, &[]).expect("ok");
1686 // Three metas visited: root → tools → foo.
1687 assert_eq!(report.metas_visited, 3);
1688 // Phase 1 classifications confirm parent-relative resolution:
1689 // every recorded dest is a SUBDIR of its recorded parent.
1690 for (parent, dest, _class) in &report.phase1_classifications {
1691 assert!(
1692 dest.starts_with(parent),
1693 "child dest {} must descend from parent {}",
1694 dest.display(),
1695 parent.display()
1696 );
1697 }
1698 // Spot-check the chain: root sees `tools`, tools sees `foo`.
1699 assert_eq!(destinations_under(&report, &root_dir), vec![tools_dir.clone()]);
1700 assert_eq!(destinations_under(&report, &tools_dir), vec![foo_dir.clone()]);
1701 }
1702
1703 // -----------------------------------------------------------------
1704 // v1.2.2 — `sync_meta` cycle detection (Phase 3 recursion edge).
1705 //
1706 // Discharges `sync_meta_no_cycle_infinite_clone` in
1707 // `proof/Grex/Walker.lean`. Identity scheme is `url@ref` so the
1708 // same repo at two different refs is NOT a cycle (covered by the
1709 // positive case below).
1710 // -----------------------------------------------------------------
1711
1712 /// `child_with_ref` mirrors `child()` but lets the caller pin a
1713 /// specific ref so two children of the same URL get distinct
1714 /// `pack_identity_for_child` strings (`url@ref`).
1715 fn child_with_ref(url: &str, path: &str, r#ref: &str) -> ChildRef {
1716 ChildRef {
1717 url: url.to_string(),
1718 path: Some(path.to_string()),
1719 r#ref: Some(r#ref.to_string()),
1720 }
1721 }
1722
1723 /// Self-loop: pack A declares itself (same URL, no ref) as a child.
1724 /// The walker must abort with `CycleDetected` rather than recurse
1725 /// infinitely. The chain reports the recurring identity.
1726 #[test]
1727 fn cycle_self_loop_aborts() {
1728 let tmp = tempfile::tempdir().unwrap();
1729 let root_dir = tmp.path().to_path_buf();
1730 // Lay out a self-pointing pack: `<root>/a` is a sub-meta whose
1731 // own manifest declares a child with the SAME URL/ref pointing
1732 // back at itself (placed at a fresh path so on-disk dest is
1733 // distinct, but pack identity collides).
1734 let a_dir = root_dir.join("a");
1735 let a_self_dir = a_dir.join("a");
1736 make_sub_meta_on_disk(&a_dir, "a");
1737 make_sub_meta_on_disk(&a_self_dir, "a");
1738 let url_a = "https://example.com/a.git";
1739 let loader = InMemLoader::new()
1740 .with(root_dir.clone(), meta_manifest_with("root", vec![child(url_a, "a")]))
1741 // `a` declares itself — same url, same (empty) ref → same identity.
1742 .with(a_dir.clone(), meta_manifest_with("a", vec![child(url_a, "a")]))
1743 .with(a_self_dir.clone(), meta_manifest_with("a", vec![]));
1744 let backend = InMemGit::new();
1745 let opts = SyncMetaOptions::default();
1746 let err =
1747 sync_meta(&root_dir, &backend, &loader, &opts, &[]).expect_err("self-loop must abort");
1748 match err {
1749 TreeError::CycleDetected { chain } => {
1750 // v1.2.3 (B4): chain begins with the root's
1751 // path-namespaced identity (`path:<root_dir>`) — the
1752 // initial visited seed — followed by the cyclic
1753 // child identities. v1.2.3 (B2): empty/None ref drops
1754 // the trailing `@`, so the cyclic id is just
1755 // `url:<url_a>` (no `@`).
1756 let id_a = format!("url:{url_a}");
1757 assert!(
1758 chain.iter().any(|s| s == &id_a),
1759 "chain must mention the cyclic url, got {chain:?}"
1760 );
1761 assert!(chain.len() >= 2, "self-loop chain has at least 2 entries: {chain:?}");
1762 let last = chain.last().unwrap();
1763 assert_eq!(last, &id_a, "chain must end with the recurring child identity");
1764 let first_match = chain.iter().position(|s| s == last).unwrap();
1765 assert!(
1766 first_match < chain.len() - 1,
1767 "the recurring identity must appear earlier in the chain: {chain:?}"
1768 );
1769 // The root frame is path-namespaced and disjoint from
1770 // any child's url-namespaced identity, so it must
1771 // appear at the head of the chain without colliding.
1772 assert!(
1773 chain[0].starts_with("path:"),
1774 "chain head is the root path identity: {chain:?}"
1775 );
1776 }
1777 other => panic!("expected CycleDetected, got {other:?}"),
1778 }
1779 }
1780
1781 /// Three-node cycle: A → B → C → A. The walker must abort with
1782 /// `CycleDetected` and the chain must list all three identities
1783 /// in the order they were entered, ending with the recurring A.
1784 #[test]
1785 fn cycle_three_node_aborts() {
1786 let tmp = tempfile::tempdir().unwrap();
1787 let root_dir = tmp.path().to_path_buf();
1788 // Disk layout: root → a → b → c → a (the second `a` lives at
1789 // a fresh on-disk slot so classification succeeds; identity
1790 // collision is what trips the cycle detector, not the path).
1791 let a_dir = root_dir.join("a");
1792 let b_dir = a_dir.join("b");
1793 let c_dir = b_dir.join("c");
1794 let a2_dir = c_dir.join("a");
1795 make_sub_meta_on_disk(&a_dir, "a");
1796 make_sub_meta_on_disk(&b_dir, "b");
1797 make_sub_meta_on_disk(&c_dir, "c");
1798 make_sub_meta_on_disk(&a2_dir, "a");
1799 let url_a = "https://example.com/a.git";
1800 let url_b = "https://example.com/b.git";
1801 let url_c = "https://example.com/c.git";
1802 let loader = InMemLoader::new()
1803 .with(root_dir.clone(), meta_manifest_with("root", vec![child(url_a, "a")]))
1804 .with(a_dir.clone(), meta_manifest_with("a", vec![child(url_b, "b")]))
1805 .with(b_dir.clone(), meta_manifest_with("b", vec![child(url_c, "c")]))
1806 // c re-declares a → cycle.
1807 .with(c_dir.clone(), meta_manifest_with("c", vec![child(url_a, "a")]))
1808 .with(a2_dir.clone(), meta_manifest_with("a", vec![]));
1809 let backend = InMemGit::new();
1810 let opts = SyncMetaOptions::default();
1811 let err = sync_meta(&root_dir, &backend, &loader, &opts, &[])
1812 .expect_err("three-node cycle must abort");
1813 match err {
1814 TreeError::CycleDetected { chain } => {
1815 // v1.2.3 (B4): chain leads with the root's
1816 // path-namespaced identity. v1.2.3 (B2): empty/None
1817 // ref drops the trailing `@`. Chain order:
1818 // [path:root, a, b, c, a] (entry order, with the
1819 // recurring `a` appended at the cycle-detection point).
1820 let id_root = pack_identity_for_root(&root_dir);
1821 let id_a = format!("url:{url_a}");
1822 let id_b = format!("url:{url_b}");
1823 let id_c = format!("url:{url_c}");
1824 assert_eq!(chain, vec![id_root, id_a.clone(), id_b, id_c, id_a]);
1825 }
1826 other => panic!("expected CycleDetected, got {other:?}"),
1827 }
1828 }
1829
1830 /// Same repo, two refs — NOT a cycle. Pack A declares two children
1831 /// pointing at the SAME URL but pinned to different refs (`main`
1832 /// vs `dev`). Identity scheme is `url@ref` so the two siblings
1833 /// have distinct identities and the walker must succeed.
1834 #[test]
1835 fn same_repo_two_refs_no_cycle() {
1836 let tmp = tempfile::tempdir().unwrap();
1837 let root_dir = tmp.path().to_path_buf();
1838 let main_dir = root_dir.join("b-main");
1839 let dev_dir = root_dir.join("b-dev");
1840 make_sub_meta_on_disk(&main_dir, "b-main");
1841 make_sub_meta_on_disk(&dev_dir, "b-dev");
1842 let url_b = "https://example.com/b.git";
1843 let loader = InMemLoader::new()
1844 .with(
1845 root_dir.clone(),
1846 meta_manifest_with(
1847 "root",
1848 vec![
1849 child_with_ref(url_b, "b-main", "main"),
1850 child_with_ref(url_b, "b-dev", "dev"),
1851 ],
1852 ),
1853 )
1854 .with(main_dir.clone(), meta_manifest_with("b-main", vec![]))
1855 .with(dev_dir.clone(), meta_manifest_with("b-dev", vec![]));
1856 let backend = InMemGit::new();
1857 let opts = SyncMetaOptions::default();
1858 let report = sync_meta(&root_dir, &backend, &loader, &opts, &[])
1859 .expect("same url at distinct refs is NOT a cycle");
1860 // Three metas visited: root + b@main + b@dev.
1861 assert_eq!(report.metas_visited, 3);
1862 assert!(
1863 report.errors.is_empty(),
1864 "no errors expected when the two children differ only by ref: {:?}",
1865 report.errors
1866 );
1867 }
1868
1869 /// Same repo, two refs — NESTED (ancestor-stack) variant. Pack A
1870 /// (URL=foo, ref=main) declares pack B (URL=foo, ref=dev) as its
1871 /// child. Identity scheme is `url@ref`, so A's identity
1872 /// (`url:foo@main`) and B's identity (`url:foo@dev`) differ. The
1873 /// cycle detector must NOT trip even though B's URL collides with
1874 /// an ancestor on the stack — exercises the path the sibling
1875 /// variant above doesn't reach.
1876 #[test]
1877 fn same_repo_two_refs_nested_no_cycle() {
1878 let tmp = tempfile::tempdir().unwrap();
1879 let root_dir = tmp.path().to_path_buf();
1880 let a_dir = root_dir.join("a");
1881 let b_dir = a_dir.join("b");
1882 make_sub_meta_on_disk(&a_dir, "a");
1883 make_sub_meta_on_disk(&b_dir, "b");
1884 let url_foo = "https://example.com/foo.git";
1885 let loader = InMemLoader::new()
1886 .with(
1887 root_dir.clone(),
1888 meta_manifest_with("root", vec![child_with_ref(url_foo, "a", "main")]),
1889 )
1890 // a (foo@main) declares b (foo@dev) — same URL, different ref.
1891 .with(a_dir.clone(), meta_manifest_with("a", vec![child_with_ref(url_foo, "b", "dev")]))
1892 .with(b_dir.clone(), meta_manifest_with("b", vec![]));
1893 let backend = InMemGit::new();
1894 let opts = SyncMetaOptions::default();
1895 let report = sync_meta(&root_dir, &backend, &loader, &opts, &[])
1896 .expect("nested same-url at distinct refs is NOT a cycle");
1897 // Walker must reach depth 2: root → a → b (3 metas).
1898 assert_eq!(report.metas_visited, 3, "walker must recurse to depth 2");
1899 assert!(
1900 report.errors.is_empty(),
1901 "no errors expected when ancestor and descendant differ only by ref: {:?}",
1902 report.errors
1903 );
1904 }
1905
1906 // -----------------------------------------------------------------
1907 // v1.2.3 — additional cycle/diamond coverage (T1, T2, T3).
1908 //
1909 // T1 covers the diamond-shared-descendant case the
1910 // clone-per-child scheme is meant to permit; T2 stretches the
1911 // cycle to length 4 to exercise chain accumulation; T3 verifies
1912 // the cycle detector sees a cycle introduced inside an inner
1913 // subtree even though the outer arm is acyclic.
1914 // -----------------------------------------------------------------
1915
1916 /// T1 — Diamond, NO cycle. Topology:
1917 ///
1918 /// ```text
1919 /// root → A
1920 /// root → B
1921 /// A → C
1922 /// B → C (C is a shared descendant)
1923 /// ```
1924 ///
1925 /// Walker must traverse all four packs and produce no
1926 /// `CycleDetected`. Because the cycle detector clones the
1927 /// ancestor chain per child, A's descendants do not poison B's
1928 /// descendant view, so seeing `C` from both arms is a diamond,
1929 /// not a cycle.
1930 ///
1931 /// **v1.2.4 T1-spot-check extension.** In addition to the
1932 /// `metas_visited == 5` count assertion, this test also confirms
1933 /// that C is genuinely walked through BOTH arms: the
1934 /// `phase1_classifications` table must record one entry whose
1935 /// parent is `a/` (with dest `a/c`) AND one entry whose parent is
1936 /// `b/` (with dest `b/c`). Counting alone (`metas_visited`) cannot
1937 /// catch a regression where a future memoization optimization
1938 /// collapses the second walk into a no-op while still incrementing
1939 /// the counter — tracking the actual dest paths does.
1940 #[test]
1941 fn cycle_diamond_shared_descendant_no_cycle() {
1942 let tmp = tempfile::tempdir().unwrap();
1943 let root_dir = tmp.path().to_path_buf();
1944 // Disk layout: root/a, root/b, root/a/c, root/b/c.
1945 // Each `c` lives at a distinct on-disk slot so classify
1946 // succeeds; identity equality is what would (incorrectly)
1947 // trip the cycle detector if clone-per-child were broken.
1948 let a_dir = root_dir.join("a");
1949 let b_dir = root_dir.join("b");
1950 let c_under_a_dir = a_dir.join("c");
1951 let c_under_b_dir = b_dir.join("c");
1952 make_sub_meta_on_disk(&a_dir, "a");
1953 make_sub_meta_on_disk(&b_dir, "b");
1954 make_sub_meta_on_disk(&c_under_a_dir, "c");
1955 make_sub_meta_on_disk(&c_under_b_dir, "c");
1956 let url_a = "https://example.com/a.git";
1957 let url_b = "https://example.com/b.git";
1958 let url_c = "https://example.com/c.git";
1959 let loader = InMemLoader::new()
1960 .with(
1961 root_dir.clone(),
1962 meta_manifest_with("root", vec![child(url_a, "a"), child(url_b, "b")]),
1963 )
1964 .with(a_dir.clone(), meta_manifest_with("a", vec![child(url_c, "c")]))
1965 .with(b_dir.clone(), meta_manifest_with("b", vec![child(url_c, "c")]))
1966 .with(c_under_a_dir.clone(), meta_manifest_with("c", vec![]))
1967 .with(c_under_b_dir.clone(), meta_manifest_with("c", vec![]));
1968 let backend = InMemGit::new();
1969 let opts = SyncMetaOptions::default();
1970 let report =
1971 sync_meta(&root_dir, &backend, &loader, &opts, &[]).expect("diamond is NOT a cycle");
1972 // Four distinct manifest visits: root, a, b, c-via-a, c-via-b.
1973 // A and B both expand into their own `c`, so the walker
1974 // visits `c` twice (once per arm) — five `metas_visited`.
1975 assert_eq!(
1976 report.metas_visited, 5,
1977 "diamond: root + a + b + c-under-a + c-under-b = 5 visits"
1978 );
1979 // Crucially, no errors of any kind — and certainly not a
1980 // CycleDetected — because the two `C` visits live on
1981 // disjoint cloned ancestor chains.
1982 assert!(
1983 !report.errors.iter().any(|e| matches!(e, TreeError::CycleDetected { .. })),
1984 "diamond must not surface CycleDetected; errors={:?}",
1985 report.errors
1986 );
1987 assert!(report.errors.is_empty(), "diamond should produce no errors: {:?}", report.errors);
1988
1989 // v1.2.4 T1-spot-check: assert `c` was genuinely walked under
1990 // BOTH arms. The phase1_classifications table records every
1991 // (parent_meta, dest, class) triple observed during Phase 1
1992 // dispatch; for the diamond layout we expect:
1993 // * (root, a) — a is a direct child of root
1994 // * (root, b) — b is a direct child of root
1995 // * (a, a/c) — c-via-a (Phase 1 inside a's recursion)
1996 // * (b, b/c) — c-via-b (Phase 1 inside b's recursion)
1997 // If a future memoization regression collapses the second `c`
1998 // walk into a no-op while still incrementing `metas_visited`,
1999 // the (b, b/c) pair will be missing from this table and the
2000 // assertion below fails.
2001 let dests_under_a = destinations_under(&report, &a_dir);
2002 let dests_under_b = destinations_under(&report, &b_dir);
2003 assert!(
2004 dests_under_a.iter().any(|d| d == &c_under_a_dir),
2005 "diamond: expected c-via-a in classifications under a, got {dests_under_a:?}"
2006 );
2007 assert!(
2008 dests_under_b.iter().any(|d| d == &c_under_b_dir),
2009 "diamond: expected c-via-b in classifications under b, got {dests_under_b:?}"
2010 );
2011 assert_ne!(
2012 c_under_a_dir, c_under_b_dir,
2013 "the two `c` visits must land on distinct on-disk dests"
2014 );
2015 }
2016
2017 /// T2 — 4-node cycle: `root → A → B → C → D → A`. Cycle length 4
2018 /// in pack-identity terms; the reported chain has length 5 once
2019 /// the recurring `A` is appended at detection. The root frame's
2020 /// `path:` identity also leads the chain (B4), so the final
2021 /// length is 6.
2022 #[test]
2023 #[allow(clippy::too_many_lines)]
2024 fn cycle_four_node_aborts() {
2025 let tmp = tempfile::tempdir().unwrap();
2026 let root_dir = tmp.path().to_path_buf();
2027 // Disk chain: root → a → b → c → d → a (the second `a` lives
2028 // at a fresh slot so classify succeeds; identity collision is
2029 // what trips the cycle detector).
2030 let a_dir = root_dir.join("a");
2031 let b_dir = a_dir.join("b");
2032 let c_dir = b_dir.join("c");
2033 let d_dir = c_dir.join("d");
2034 let a2_dir = d_dir.join("a");
2035 make_sub_meta_on_disk(&a_dir, "a");
2036 make_sub_meta_on_disk(&b_dir, "b");
2037 make_sub_meta_on_disk(&c_dir, "c");
2038 make_sub_meta_on_disk(&d_dir, "d");
2039 make_sub_meta_on_disk(&a2_dir, "a");
2040 let url_a = "https://example.com/a.git";
2041 let url_b = "https://example.com/b.git";
2042 let url_c = "https://example.com/c.git";
2043 let url_d = "https://example.com/d.git";
2044 let loader = InMemLoader::new()
2045 .with(root_dir.clone(), meta_manifest_with("root", vec![child(url_a, "a")]))
2046 .with(a_dir.clone(), meta_manifest_with("a", vec![child(url_b, "b")]))
2047 .with(b_dir.clone(), meta_manifest_with("b", vec![child(url_c, "c")]))
2048 .with(c_dir.clone(), meta_manifest_with("c", vec![child(url_d, "d")]))
2049 // d re-declares a → cycle of length 4 in url-namespace.
2050 .with(d_dir.clone(), meta_manifest_with("d", vec![child(url_a, "a")]))
2051 .with(a2_dir.clone(), meta_manifest_with("a", vec![]));
2052 let backend = InMemGit::new();
2053 let opts = SyncMetaOptions::default();
2054 let err = sync_meta(&root_dir, &backend, &loader, &opts, &[])
2055 .expect_err("four-node cycle must abort");
2056 match err {
2057 TreeError::CycleDetected { chain } => {
2058 let id_root = pack_identity_for_root(&root_dir);
2059 let id_a = format!("url:{url_a}");
2060 let id_b = format!("url:{url_b}");
2061 let id_c = format!("url:{url_c}");
2062 let id_d = format!("url:{url_d}");
2063 // [path:root, a, b, c, d, a] — six entries.
2064 assert_eq!(
2065 chain,
2066 vec![id_root, id_a.clone(), id_b, id_c, id_d, id_a.clone()],
2067 "expected full ancestor chain ending in the recurring A"
2068 );
2069 assert!(
2070 chain.len() >= 5,
2071 "four-node cycle chain has at least 5 entries: {chain:?}"
2072 );
2073 // Last element repeats earlier in the chain (the
2074 // recurring identity).
2075 let last = chain.last().unwrap();
2076 let first_match = chain.iter().position(|s| s == last).unwrap();
2077 assert!(
2078 first_match < chain.len() - 1,
2079 "the recurring identity must appear earlier in the chain: {chain:?}"
2080 );
2081 }
2082 other => panic!("expected CycleDetected, got {other:?}"),
2083 }
2084 }
2085
2086 /// T3 — Nested-prefix cycle. Outer arm `root → A → B → C` is
2087 /// acyclic; the cycle lives inside B's other child `D`, which
2088 /// loops back to B (`B → D → B`). The walker must surface
2089 /// `CycleDetected` and the cycle should appear inside the
2090 /// subtree (not at the root level), with B as the recurring
2091 /// identity.
2092 ///
2093 /// Specifically: A's children = [B], B's children = [C, D], C
2094 /// has no children, D's children = [B] (cycle).
2095 #[test]
2096 #[allow(clippy::too_many_lines)]
2097 fn cycle_nested_prefix_aborts() {
2098 let tmp = tempfile::tempdir().unwrap();
2099 let root_dir = tmp.path().to_path_buf();
2100 // Disk layout: root/a, root/a/b, root/a/b/c (acyclic arm),
2101 // root/a/b/d (cycle arm), root/a/b/d/b (D loops back to B —
2102 // identity collision; on-disk path is fresh so classify
2103 // succeeds).
2104 let a_dir = root_dir.join("a");
2105 let b_dir = a_dir.join("b");
2106 let c_dir = b_dir.join("c");
2107 let d_dir = b_dir.join("d");
2108 let b2_dir = d_dir.join("b");
2109 make_sub_meta_on_disk(&a_dir, "a");
2110 make_sub_meta_on_disk(&b_dir, "b");
2111 make_sub_meta_on_disk(&c_dir, "c");
2112 make_sub_meta_on_disk(&d_dir, "d");
2113 make_sub_meta_on_disk(&b2_dir, "b");
2114 let url_a = "https://example.com/a.git";
2115 let url_b = "https://example.com/b.git";
2116 let url_c = "https://example.com/c.git";
2117 let url_d = "https://example.com/d.git";
2118 let loader = InMemLoader::new()
2119 .with(root_dir.clone(), meta_manifest_with("root", vec![child(url_a, "a")]))
2120 .with(a_dir.clone(), meta_manifest_with("a", vec![child(url_b, "b")]))
2121 // b has both an acyclic child (c) and a cyclic one (d).
2122 .with(
2123 b_dir.clone(),
2124 meta_manifest_with("b", vec![child(url_c, "c"), child(url_d, "d")]),
2125 )
2126 .with(c_dir.clone(), meta_manifest_with("c", vec![]))
2127 // d re-declares b → cycle inside the b/d subtree.
2128 .with(d_dir.clone(), meta_manifest_with("d", vec![child(url_b, "b")]))
2129 .with(b2_dir.clone(), meta_manifest_with("b", vec![]));
2130 let backend = InMemGit::new();
2131 let opts = SyncMetaOptions::default();
2132 let err = sync_meta(&root_dir, &backend, &loader, &opts, &[])
2133 .expect_err("nested-prefix cycle must abort");
2134 match err {
2135 TreeError::CycleDetected { chain } => {
2136 let id_root = pack_identity_for_root(&root_dir);
2137 let id_a = format!("url:{url_a}");
2138 let id_b = format!("url:{url_b}");
2139 let id_d = format!("url:{url_d}");
2140 // The cycle hits inside the subtree at depth 4:
2141 // [path:root, a, b, d, b].
2142 assert_eq!(
2143 chain,
2144 vec![id_root.clone(), id_a, id_b.clone(), id_d, id_b.clone()],
2145 "cycle should appear inside the subtree, not at the top"
2146 );
2147 // Recurring identity is `b`, and it does NOT appear
2148 // at the chain's outermost position — the root path
2149 // identity does. This verifies the cycle is "inside"
2150 // the tree.
2151 let last = chain.last().unwrap();
2152 assert_eq!(last, &id_b, "recurring identity is B");
2153 assert_ne!(
2154 chain.first().unwrap(),
2155 last,
2156 "cycle must not start at the root frame: {chain:?}"
2157 );
2158 assert_eq!(
2159 chain.first().unwrap(),
2160 &id_root,
2161 "chain must begin with the root path identity: {chain:?}"
2162 );
2163 }
2164 other => panic!("expected CycleDetected, got {other:?}"),
2165 }
2166 }
2167
2168 /// B1 regression: max_depth must NOT mask cycle detection. Cycle
2169 /// check fires before depth-cap return in phase3_handle_child.
2170 ///
2171 /// Topology: same 4-node cycle as `cycle_four_node_aborts`
2172 /// (`root → A → B → C → D → A`). Recurring `A` is reached at
2173 /// `next_depth = 5`. With `max_depth: Some(4)`, the depth cap
2174 /// would skip the recurring frame BEFORE it can be tested for
2175 /// cycle membership — *if* B1 were reverted (i.e. depth-cap
2176 /// early-return placed before the cycle check). The current
2177 /// ordering (cycle-then-depth-cap, see `phase3_handle_child`)
2178 /// surfaces `CycleDetected` regardless of the cap.
2179 ///
2180 /// If anyone reverts B1's reorder, this test fails: the walker
2181 /// returns `Ok(_)` instead of `Err(CycleDetected)` because the
2182 /// recurring frame is silently truncated.
2183 #[test]
2184 fn cycle_aborts_under_max_depth_cap() {
2185 let tmp = tempfile::tempdir().unwrap();
2186 let root_dir = tmp.path().to_path_buf();
2187 let a_dir = root_dir.join("a");
2188 let b_dir = a_dir.join("b");
2189 let c_dir = b_dir.join("c");
2190 let d_dir = c_dir.join("d");
2191 let a2_dir = d_dir.join("a");
2192 make_sub_meta_on_disk(&a_dir, "a");
2193 make_sub_meta_on_disk(&b_dir, "b");
2194 make_sub_meta_on_disk(&c_dir, "c");
2195 make_sub_meta_on_disk(&d_dir, "d");
2196 make_sub_meta_on_disk(&a2_dir, "a");
2197 let url_a = "https://example.com/a.git";
2198 let url_b = "https://example.com/b.git";
2199 let url_c = "https://example.com/c.git";
2200 let url_d = "https://example.com/d.git";
2201 let loader = InMemLoader::new()
2202 .with(root_dir.clone(), meta_manifest_with("root", vec![child(url_a, "a")]))
2203 .with(a_dir.clone(), meta_manifest_with("a", vec![child(url_b, "b")]))
2204 .with(b_dir.clone(), meta_manifest_with("b", vec![child(url_c, "c")]))
2205 .with(c_dir.clone(), meta_manifest_with("c", vec![child(url_d, "d")]))
2206 .with(d_dir.clone(), meta_manifest_with("d", vec![child(url_a, "a")]))
2207 .with(a2_dir.clone(), meta_manifest_with("a", vec![]));
2208 let backend = InMemGit::new();
2209 // max_depth: Some(4) — the recurring A frame would land at
2210 // next_depth=5, which exceeds the cap. With B1, the cycle
2211 // check still fires first; without B1, the cap would skip
2212 // before the cycle is detected.
2213 let opts = SyncMetaOptions { max_depth: Some(4), ..SyncMetaOptions::default() };
2214 let err = sync_meta(&root_dir, &backend, &loader, &opts, &[])
2215 .expect_err("cycle must surface even when its closing frame exceeds max_depth");
2216 match err {
2217 TreeError::CycleDetected { chain } => {
2218 let id_a = format!("url:{url_a}");
2219 assert!(
2220 chain.last() == Some(&id_a),
2221 "recurring identity must be A, got chain={chain:?}"
2222 );
2223 let last = chain.last().unwrap();
2224 let first_match = chain.iter().position(|s| s == last).unwrap();
2225 assert!(
2226 first_match < chain.len() - 1,
2227 "the recurring identity must appear earlier in the chain: {chain:?}"
2228 );
2229 }
2230 other => panic!("expected CycleDetected, got {other:?}"),
2231 }
2232 }
2233
2234 /// B2 regression: `pack_identity_for_child` must NOT emit a
2235 /// trailing `@` when `r#ref` is `Some("")` (empty string). Both
2236 /// `Some("")` and `None` collapse to the bare `url:<url>` form so
2237 /// the on-the-wire identity matches the Lean model
2238 /// (`Grex.Walker.ChildRef.identity`). Without this elision two
2239 /// children that differ only in `ref: None` vs `ref: Some("")`
2240 /// would serialise the same way as `url:<url>@`, masking the
2241 /// distinction the Lean spec draws — and worse, an identity
2242 /// ending in `@` leaks an empty-ref artifact into operator
2243 /// diagnostics.
2244 #[test]
2245 fn child_identity_some_empty_ref_omits_at() {
2246 let url = "https://example.com/a.git";
2247 let with_none = ChildRef { url: url.to_string(), path: Some("a".to_string()), r#ref: None };
2248 let with_empty = ChildRef {
2249 url: url.to_string(),
2250 path: Some("a".to_string()),
2251 r#ref: Some(String::new()),
2252 };
2253 let id_none = pack_identity_for_child(&with_none);
2254 let id_empty = pack_identity_for_child(&with_empty);
2255 let expected = format!("url:{url}");
2256 assert_eq!(id_none, expected, "None ref must produce bare url identity");
2257 assert_eq!(
2258 id_empty, expected,
2259 "Some(\"\") ref must collapse to bare url identity (no trailing @)"
2260 );
2261 assert_eq!(id_none, id_empty, "Some(\"\") and None must yield the same identity");
2262 assert!(!id_empty.ends_with('@'), "identity must not end with trailing @: {id_empty:?}");
2263 }
2264
2265 // -----------------------------------------------------------------
2266 // v1.2.4 — A1 cancellation token (T-cancel).
2267 //
2268 // Discharges the Lean `cancellation_terminates_promptly` theorem
2269 // in `proof/Grex/Walker.lean`: when one sibling closure detects a
2270 // cycle and signals the per-`phase3_recurse` cancellation flag,
2271 // every subsequent in-flight sibling closure observes the flag at
2272 // its next entry and returns `Phase3ChildOutcome::Cancelled` with
2273 // zero recursive descent.
2274 // -----------------------------------------------------------------
2275
2276 /// T-cancel — sibling cancellation under cycle.
2277 ///
2278 /// Topology: `root → A`, where A has FOUR children
2279 /// `[A_cyclic, X, Y, Z]` (in this exact source order). `A_cyclic`'s
2280 /// URL collides with A's own URL, so A's `phase3_recurse` detects
2281 /// the cycle when iterating its first child. `X`, `Y`, `Z` are
2282 /// independent sub-metas, each containing a deep chain
2283 /// (`X → X1 → X2`, etc) that would inflate `metas_visited` if
2284 /// genuinely walked. With `opts.parallel = Some(1)` the rayon
2285 /// pool runs siblings serially in source order, so the cyclic
2286 /// sibling fires first, sets the flag, and `X`/`Y`/`Z` observe
2287 /// `Cancelled` at entry — none of their subtrees are walked.
2288 ///
2289 /// Determinism: `parallel: Some(1)` removes thread interleaving
2290 /// from the test surface — the cyclic arm is *guaranteed* to run
2291 /// before the acyclic siblings. The flag is checked at entry of
2292 /// `phase3_handle_child`, so any sibling that has not yet started
2293 /// observes it. `metas_visited` is the side-effect-visible counter
2294 /// used to assert the cancellation discipline: pre-cancellation
2295 /// (v1.2.3) the walker would visit every sibling subtree before
2296 /// surfacing `Err(CycleDetected)`; post-cancellation (v1.2.4)
2297 /// only the cyclic arm and its prefix contribute.
2298 ///
2299 /// Without the cancellation flag, `metas_visited` would total
2300 /// 1 (root) + 1 (A) + 3 (X, Y, Z themselves) + 6 (X1,X2,Y1,Y2,Z1,Z2)
2301 /// = 11. With the flag, X/Y/Z's `phase3_handle_child` short-circuits
2302 /// before recursing, so only root + A are recorded → 2.
2303 #[test]
2304 #[allow(clippy::too_many_lines)]
2305 fn cancellation_aborts_siblings() {
2306 let tmp = tempfile::tempdir().unwrap();
2307 let root_dir = tmp.path().to_path_buf();
2308 let a_dir = root_dir.join("a");
2309 // A_cyclic's path is a fresh slot under A so on-disk classify
2310 // succeeds; identity collision with A's URL is what trips the
2311 // cycle detector at A's `phase3_recurse`.
2312 let a_cyclic_dir = a_dir.join("a-cyclic");
2313 let x_dir = a_dir.join("x");
2314 let x1_dir = x_dir.join("x1");
2315 let x2_dir = x1_dir.join("x2");
2316 let y_dir = a_dir.join("y");
2317 let y1_dir = y_dir.join("y1");
2318 let y2_dir = y1_dir.join("y2");
2319 let z_dir = a_dir.join("z");
2320 let z1_dir = z_dir.join("z1");
2321 let z2_dir = z1_dir.join("z2");
2322 for d in [
2323 &a_dir,
2324 &a_cyclic_dir,
2325 &x_dir,
2326 &x1_dir,
2327 &x2_dir,
2328 &y_dir,
2329 &y1_dir,
2330 &y2_dir,
2331 &z_dir,
2332 &z1_dir,
2333 &z2_dir,
2334 ] {
2335 make_sub_meta_on_disk(d, d.file_name().unwrap().to_str().unwrap());
2336 }
2337
2338 let url_a = "https://example.com/a.git";
2339 let url_x = "https://example.com/x.git";
2340 let url_x1 = "https://example.com/x1.git";
2341 let url_x2 = "https://example.com/x2.git";
2342 let url_y = "https://example.com/y.git";
2343 let url_y1 = "https://example.com/y1.git";
2344 let url_y2 = "https://example.com/y2.git";
2345 let url_z = "https://example.com/z.git";
2346 let url_z1 = "https://example.com/z1.git";
2347 let url_z2 = "https://example.com/z2.git";
2348
2349 // A's children: [a-cyclic (collides with A's identity), x, y, z]
2350 // — ORDER MATTERS for determinism. With parallel: Some(1) the
2351 // cyclic arm runs first, signals, and the rest observe.
2352 let loader = InMemLoader::new()
2353 .with(root_dir.clone(), meta_manifest_with("root", vec![child(url_a, "a")]))
2354 .with(
2355 a_dir.clone(),
2356 meta_manifest_with(
2357 "a",
2358 vec![
2359 child(url_a, "a-cyclic"),
2360 child(url_x, "x"),
2361 child(url_y, "y"),
2362 child(url_z, "z"),
2363 ],
2364 ),
2365 )
2366 .with(a_cyclic_dir.clone(), meta_manifest_with("a-cyclic", vec![]))
2367 // X/Y/Z each carry a 3-level subtree that would inflate
2368 // metas_visited if genuinely walked.
2369 .with(x_dir.clone(), meta_manifest_with("x", vec![child(url_x1, "x1")]))
2370 .with(x1_dir.clone(), meta_manifest_with("x1", vec![child(url_x2, "x2")]))
2371 .with(x2_dir.clone(), meta_manifest_with("x2", vec![]))
2372 .with(y_dir.clone(), meta_manifest_with("y", vec![child(url_y1, "y1")]))
2373 .with(y1_dir.clone(), meta_manifest_with("y1", vec![child(url_y2, "y2")]))
2374 .with(y2_dir.clone(), meta_manifest_with("y2", vec![]))
2375 .with(z_dir.clone(), meta_manifest_with("z", vec![child(url_z1, "z1")]))
2376 .with(z1_dir.clone(), meta_manifest_with("z1", vec![child(url_z2, "z2")]))
2377 .with(z2_dir.clone(), meta_manifest_with("z2", vec![]));
2378 let backend = InMemGit::new();
2379 // parallel: Some(1) — single-threaded rayon pool. Source-order
2380 // iteration => cyclic arm runs before X/Y/Z, signal observed.
2381 let opts = SyncMetaOptions { parallel: Some(1), ..SyncMetaOptions::default() };
2382 let err = sync_meta(&root_dir, &backend, &loader, &opts, &[])
2383 .expect_err("cyclic input must surface CycleDetected");
2384 match err {
2385 TreeError::CycleDetected { chain } => {
2386 let id_a = format!("url:{url_a}");
2387 assert_eq!(
2388 chain.last(),
2389 Some(&id_a),
2390 "recurring identity must be A (the cyclic arm), got chain={chain:?}"
2391 );
2392 }
2393 other => panic!("expected CycleDetected, got {other:?}"),
2394 }
2395 // The cancellation discipline asserts: X/Y/Z's subtrees were
2396 // NOT walked. Cancellation lives in Phase 3 (the recursion
2397 // edge), so Phase 1 still fetches A's four direct children
2398 // (a-cyclic, x, y, z) — one Fetch each. A's Phase 3 then
2399 // detects the cycle on a-cyclic and signals; X/Y/Z's
2400 // `phase3_handle_child` returns `Cancelled` at entry, so
2401 // their `sync_meta_inner` never runs. Therefore X1/X2/Y1/Y2/
2402 // Z1/Z2 are NEVER fetched.
2403 //
2404 // Without cancellation Fetch count = 11:
2405 // 1 (root → a) + 4 (a → a-cyclic, x, y, z)
2406 // + 2 (x → x1 → x2) + 2 (y → y1 → y2) + 2 (z → z1 → z2).
2407 // With cancellation Fetch count = 5:
2408 // 1 (root → a) + 4 (a → a-cyclic, x, y, z).
2409 //
2410 // With `parallel: Some(1)` the rayon pool is single-threaded
2411 // and visibility is immediate, so 5 is the deterministic tight
2412 // bound: 1 (root → a) + 4 (a's Phase 1 fan-out for a-cyclic, x,
2413 // y, z). No upper-bound slack — `Some(1)` removes interleaving
2414 // entirely, and any deviation indicates a real regression.
2415 let fetch_count =
2416 backend.calls().iter().filter(|c| matches!(c, BackendCall::Fetch { .. })).count();
2417 // Lower bound: Phase 1's per-child fan-out runs BEFORE the
2418 // Phase 3 cycle check, so even under cancellation A's four
2419 // direct children must have been fetched (1 root → A + 4 a's
2420 // children = 5). Documents that Phase 1 is intentionally NOT
2421 // cancellation-aware in v1.2.4 — cancellation only short-
2422 // circuits the Phase 3 recursion edge.
2423 assert!(
2424 fetch_count >= 5,
2425 "Phase 1 fan-out for A's 4 direct children must complete even under cancellation; \
2426 observed {fetch_count} fetches"
2427 );
2428 assert_eq!(
2429 fetch_count, 5,
2430 "cancellation flag must short-circuit X/Y/Z subtrees; \
2431 observed {fetch_count} fetches (acyclic walk would do 11)"
2432 );
2433 // Stronger assertion: NO fetch ever targeted x1/x2/y1/y2/z1/z2.
2434 // If the cancellation token were broken, sync_meta_inner would
2435 // recurse into x/y/z and Phase 1 there would fetch x1/y1/z1 at
2436 // a minimum.
2437 let cancelled_dests = [&x1_dir, &x2_dir, &y1_dir, &y2_dir, &z1_dir, &z2_dir];
2438 for dest in cancelled_dests {
2439 for call in backend.calls() {
2440 if let BackendCall::Fetch { dest: fetched } = &call {
2441 assert_ne!(
2442 fetched,
2443 dest,
2444 "cancellation must prevent recursion into {} (observed Fetch call)",
2445 dest.display()
2446 );
2447 }
2448 }
2449 }
2450 }
2451
2452 /// G1 — multi-thread sibling cancellation race.
2453 ///
2454 /// Topology: `root → A`, where A has 12 children: two cyclic
2455 /// siblings (`A_cyclic1`, `A_cyclic2`, both colliding with A's
2456 /// own URL) and ten acyclic deep subtrees (`X0..X9` each →
2457 /// `X{i}_1` → `X{i}_2`). With `parallel: Some(8)` rayon may
2458 /// schedule the two cyclic arms onto different worker threads,
2459 /// so BOTH can simultaneously detect the cycle and try to store
2460 /// into `cancelled`. The aggregator (`phase3_recurse`) must:
2461 /// - return `Err(CycleDetected)` on every iteration with a
2462 /// non-empty chain (no panic, no Ok),
2463 /// - never spuriously double-count or wedge on the multiple
2464 /// concurrent stores (AtomicBool::store is idempotent),
2465 /// - keep fetch count bounded (no unbounded recursion).
2466 ///
2467 /// Looped 50 iterations — non-determinism would surface as
2468 /// either a panic, an `Ok(_)` return, or an unbounded fetch
2469 /// count (the acyclic 10-subtree walk would be at minimum
2470 /// `1 (root → A) + 12 (A's children) + 20 (X*_1, X*_2)` = 33
2471 /// without cancellation; we cap at 1000 to catch runaway).
2472 #[test]
2473 #[allow(clippy::too_many_lines)]
2474 fn cancellation_aborts_siblings_multithread() {
2475 // Build helper: returns (loader, backend, root_dir) for a
2476 // fresh per-iteration tempdir so iterations don't share state.
2477 fn build_topology() -> (tempfile::TempDir, PathBuf, InMemLoader, InMemGit, String) {
2478 let tmp = tempfile::tempdir().unwrap();
2479 let root_dir = tmp.path().to_path_buf();
2480 let a_dir = root_dir.join("a");
2481 make_sub_meta_on_disk(&a_dir, "a");
2482 let url_a = "https://example.com/a.git".to_string();
2483 // Two cyclic siblings (both collide with A's identity)
2484 let a_cyc1_dir = a_dir.join("a-cyclic1");
2485 let a_cyc2_dir = a_dir.join("a-cyclic2");
2486 make_sub_meta_on_disk(&a_cyc1_dir, "a-cyclic1");
2487 make_sub_meta_on_disk(&a_cyc2_dir, "a-cyclic2");
2488 // Ten acyclic deep subtrees: x0..x9, each → x{i}_1 → x{i}_2
2489 let mut a_children = vec![child(&url_a, "a-cyclic1"), child(&url_a, "a-cyclic2")];
2490 let mut loader = InMemLoader::new()
2491 .with(root_dir.clone(), meta_manifest_with("root", vec![child(&url_a, "a")]))
2492 .with(a_cyc1_dir.clone(), meta_manifest_with("a-cyclic1", vec![]))
2493 .with(a_cyc2_dir.clone(), meta_manifest_with("a-cyclic2", vec![]));
2494 for i in 0..10 {
2495 let xi_name = format!("x{i}");
2496 let xi_dir = a_dir.join(&xi_name);
2497 let xi1_name = format!("x{i}_1");
2498 let xi1_dir = xi_dir.join(&xi1_name);
2499 let xi2_name = format!("x{i}_2");
2500 let xi2_dir = xi1_dir.join(&xi2_name);
2501 make_sub_meta_on_disk(&xi_dir, &xi_name);
2502 make_sub_meta_on_disk(&xi1_dir, &xi1_name);
2503 make_sub_meta_on_disk(&xi2_dir, &xi2_name);
2504 let url_xi = format!("https://example.com/x{i}.git");
2505 let url_xi1 = format!("https://example.com/x{i}_1.git");
2506 let url_xi2 = format!("https://example.com/x{i}_2.git");
2507 a_children.push(child(&url_xi, &xi_name));
2508 loader = loader
2509 .with(xi_dir, meta_manifest_with(&xi_name, vec![child(&url_xi1, &xi1_name)]))
2510 .with(xi1_dir, meta_manifest_with(&xi1_name, vec![child(&url_xi2, &xi2_name)]))
2511 .with(xi2_dir, meta_manifest_with(&xi2_name, vec![]));
2512 }
2513 loader = loader.with(a_dir, meta_manifest_with("a", a_children));
2514 let backend = InMemGit::new();
2515 (tmp, root_dir, loader, backend, url_a)
2516 }
2517
2518 for iter in 0..50 {
2519 let (_tmp, root_dir, loader, backend, url_a) = build_topology();
2520 let opts = SyncMetaOptions { parallel: Some(8), ..SyncMetaOptions::default() };
2521 let result = sync_meta(&root_dir, &backend, &loader, &opts, &[]);
2522 let err = match result {
2523 Err(e) => e,
2524 Ok(_) => panic!("iter {iter}: expected CycleDetected, got Ok"),
2525 };
2526 match err {
2527 TreeError::CycleDetected { chain } => {
2528 assert!(
2529 !chain.is_empty(),
2530 "iter {iter}: CycleDetected chain must be non-empty: {chain:?}"
2531 );
2532 let id_a = format!("url:{url_a}");
2533 assert_eq!(
2534 chain.last(),
2535 Some(&id_a),
2536 "iter {iter}: recurring identity must be A, got chain={chain:?}"
2537 );
2538 }
2539 other => panic!("iter {iter}: expected CycleDetected, got {other:?}"),
2540 }
2541 // Bound check: even with two cyclic arms racing, the
2542 // walker must not have walked unbounded subtrees. Acyclic
2543 // walk would do 33 fetches; under cancellation Phase 1
2544 // for A's 12 direct children fires (12 fetches) plus the
2545 // initial root → A fetch (1) = 13, and depending on
2546 // worker scheduling some X{i}_1 / X{i}_2 fetches may
2547 // sneak in before the flag is observed. Cap loosely at
2548 // 200 — any wild blowup indicates the flag is broken.
2549 let fetch_count =
2550 backend.calls().iter().filter(|c| matches!(c, BackendCall::Fetch { .. })).count();
2551 assert!(
2552 fetch_count >= 1,
2553 "iter {iter}: at least the root → A fetch must occur; got {fetch_count}"
2554 );
2555 assert!(
2556 fetch_count < 200,
2557 "iter {iter}: fetch count blew up under multi-thread cancellation; got {fetch_count}"
2558 );
2559 }
2560 }
2561
2562 /// G2 — per-call cancellation flag scope: a cycle deep in pack A
2563 /// MUST NOT cancel a sibling pack B at the root level.
2564 ///
2565 /// Topology:
2566 /// root → [A, B]
2567 /// A → A1 → A2 → A2_cyclic (A2_cyclic.url == A2.url ⇒ cycle
2568 /// inside A's deep subtree)
2569 /// B → B1 → B2 → B3 (clean acyclic subtree)
2570 ///
2571 /// The cancellation flag for root's Phase 3 fan-out covers root's
2572 /// direct children (A, B). The cycle inside A is detected during
2573 /// recursion into A's deep subtree, by a NEW per-call flag built
2574 /// at the deep `phase3_recurse` frame — that flag is scoped to A's
2575 /// inner closures only. It must NOT propagate up and cancel B's
2576 /// independent walk.
2577 ///
2578 /// Assertions:
2579 /// (i) walker returns `Err(CycleDetected)` (the deep cycle
2580 /// propagates up via the short-circuit path),
2581 /// (ii) B's deep subtree IS walked (B, B1, B2, B3 all fetched),
2582 /// proving B's walk was not aborted by A's deep-subtree
2583 /// cancellation flag (which lives in a recursion frame
2584 /// below root, disjoint from the root-level fan-out flag
2585 /// that scopes A and B as siblings).
2586 ///
2587 /// With `parallel: Some(2)` rayon schedules A and B onto separate
2588 /// workers; B must run to completion regardless of A's cycle.
2589 #[test]
2590 #[allow(clippy::too_many_lines)]
2591 fn cancellation_per_call_scope_isolates_subtrees() {
2592 let tmp = tempfile::tempdir().unwrap();
2593 let root_dir = tmp.path().to_path_buf();
2594 // A's deep cycle arm
2595 let a_dir = root_dir.join("a");
2596 let a1_dir = a_dir.join("a1");
2597 let a2_dir = a1_dir.join("a2");
2598 let a2cyc_dir = a2_dir.join("a2-cyclic");
2599 // B's clean deep subtree
2600 let b_dir = root_dir.join("b");
2601 let b1_dir = b_dir.join("b1");
2602 let b2_dir = b1_dir.join("b2");
2603 let b3_dir = b2_dir.join("b3");
2604 for d in [&a_dir, &a1_dir, &a2_dir, &a2cyc_dir, &b_dir, &b1_dir, &b2_dir, &b3_dir] {
2605 make_sub_meta_on_disk(d, d.file_name().unwrap().to_str().unwrap());
2606 }
2607 let url_a = "https://example.com/a.git";
2608 let url_a1 = "https://example.com/a1.git";
2609 let url_a2 = "https://example.com/a2.git";
2610 let url_b = "https://example.com/b.git";
2611 let url_b1 = "https://example.com/b1.git";
2612 let url_b2 = "https://example.com/b2.git";
2613 let url_b3 = "https://example.com/b3.git";
2614 let loader = InMemLoader::new()
2615 .with(
2616 root_dir.clone(),
2617 meta_manifest_with("root", vec![child(url_a, "a"), child(url_b, "b")]),
2618 )
2619 .with(a_dir.clone(), meta_manifest_with("a", vec![child(url_a1, "a1")]))
2620 .with(a1_dir.clone(), meta_manifest_with("a1", vec![child(url_a2, "a2")]))
2621 // a2's child re-declares a2's identity → cycle at depth 4
2622 .with(a2_dir.clone(), meta_manifest_with("a2", vec![child(url_a2, "a2-cyclic")]))
2623 .with(a2cyc_dir.clone(), meta_manifest_with("a2-cyclic", vec![]))
2624 .with(b_dir.clone(), meta_manifest_with("b", vec![child(url_b1, "b1")]))
2625 .with(b1_dir.clone(), meta_manifest_with("b1", vec![child(url_b2, "b2")]))
2626 .with(b2_dir.clone(), meta_manifest_with("b2", vec![child(url_b3, "b3")]))
2627 .with(b3_dir.clone(), meta_manifest_with("b3", vec![]));
2628 let backend = InMemGit::new();
2629 // parallel: Some(2) — A and B may schedule onto separate
2630 // workers. The point of the test is to verify B's walk is
2631 // not aborted by A's deep-subtree cancellation flag.
2632 let opts = SyncMetaOptions { parallel: Some(2), ..SyncMetaOptions::default() };
2633 let err = sync_meta(&root_dir, &backend, &loader, &opts, &[])
2634 .expect_err("deep cycle inside A must surface CycleDetected");
2635 // (i) Cycle bubbled up.
2636 match err {
2637 TreeError::CycleDetected { chain } => {
2638 let id_a2 = format!("url:{url_a2}");
2639 assert_eq!(
2640 chain.last(),
2641 Some(&id_a2),
2642 "recurring identity must be A2 (the cyclic arm), got chain={chain:?}"
2643 );
2644 }
2645 other => panic!("expected CycleDetected, got {other:?}"),
2646 }
2647 // (ii) B's subtree was walked: B, B1, B2, B3 each fetched.
2648 // The deep cycle in A fires inside a per-call flag scoped to
2649 // that recursion frame; root's per-call flag is NOT signalled
2650 // (cycle is returned from a child call, not stored at root
2651 // scope), so B's sibling walk completes uninterrupted.
2652 let fetched_dests: Vec<PathBuf> = backend
2653 .calls()
2654 .iter()
2655 .filter_map(|c| match c {
2656 BackendCall::Fetch { dest } => Some(dest.clone()),
2657 _ => None,
2658 })
2659 .collect();
2660 for dest in [&b_dir, &b1_dir, &b2_dir, &b3_dir] {
2661 assert!(
2662 fetched_dests.iter().any(|f| f == dest),
2663 "per-call scope: B's subtree must have been walked despite A's deep cycle; \
2664 missing fetch for {} (observed {fetched_dests:?})",
2665 dest.display()
2666 );
2667 }
2668 }
2669}