grex_core/tree/walker.rs
1//! Recursive pack-tree walker.
2//!
3//! The walker hydrates a `pack.yaml` tree: it loads the root manifest, clones
4//! (or fetches + checks out) every `children:` entry via the injected
5//! [`GitBackend`], and recurses. `depends_on` entries are recorded as edges
6//! but never walked — they are *external prereqs* verified by
7//! [`crate::pack::validate::DependsOnValidator`] after the graph is built.
8//!
9//! # Cycle detection
10//!
11//! Cycles are detected **during** the walk, not post-hoc. Each recursion
12//! maintains a walk stack of pack identifiers (source-url when present,
13//! otherwise the canonical on-disk path). If a child is about to be entered
14//! whose identifier is already on the stack, the walker short-circuits with
15//! [`TreeError::CycleDetected`]. A separate `CycleValidator` runs
16//! post-hoc as a belt-and-suspenders check so manually-constructed graphs
17//! cannot sneak through.
18//!
19//! # Cyclomatic discipline
20//!
21//! The walk is decomposed so each helper stays well under CC 15:
22//! `walk` → `walk_recursive` → `process_children` → `handle_child` →
23//! `resolve_destination` | `record_depends_on`.
24
25use std::collections::BTreeMap;
26use std::path::{Path, PathBuf};
27
28use rayon::prelude::*;
29
30use crate::git::GitBackend;
31use crate::pack::validate::child_path::{
32 boundary_fs_reject_reason, boundary_reject_reason, check_one as check_child_path,
33 nfc_duplicate_path,
34};
35use crate::pack::{ChildRef, PackManifest, PackType, PackValidationError, SchemaVersion};
36
37use super::consent::phase2_prune;
38use super::dest_class::{aggregate_untracked, classify_dest, DestClass};
39use super::error::TreeError;
40use super::graph::{EdgeKind, PackEdge, PackGraph, PackNode};
41use super::loader::PackLoader;
42use super::quarantine::QuarantineConfig;
43
44/// Recursive walker. Composes a [`PackLoader`] (for manifests) with a
45/// [`GitBackend`] (for child hydration).
46///
47/// The walker owns no state across calls: each invocation of [`Walker::walk`]
48/// produces a fresh [`PackGraph`] and leaves no footprint.
49///
50/// **Status (v1.2.1, path iii)**: retired from the production sync
51/// orchestrator. `sync::run` now composes [`sync_meta`] (mutate) →
52/// [`super::graph_build::build_graph`] (read-only) → `run_actions` instead
53/// of issuing clones+fetches inside the graph build. The `Walker` symbol
54/// is kept for downstream test-suite compatibility (22 fixture call sites
55/// in `crates/grex-core/tests/tree_walk.rs`); new code SHOULD NOT add
56/// production call sites.
57#[doc(hidden)]
58pub struct Walker<'a> {
59 loader: &'a dyn PackLoader,
60 backend: &'a dyn GitBackend,
61 workspace: PathBuf,
62 /// Optional global ref override (M4-D `grex sync --ref <sha|branch|tag>`).
63 /// When `Some`, every child clone/checkout uses this ref instead of the
64 /// declared `child.ref` from the parent manifest. `None` preserves M3
65 /// semantics.
66 ref_override: Option<String>,
67}
68
69impl<'a> Walker<'a> {
70 /// Construct a new walker.
71 ///
72 /// `workspace` is the directory under which child packs will be cloned,
73 /// using each [`ChildRef::effective_path`] as the sub-directory name.
74 #[must_use]
75 pub fn new(
76 loader: &'a dyn PackLoader,
77 backend: &'a dyn GitBackend,
78 workspace: PathBuf,
79 ) -> Self {
80 Self { loader, backend, workspace, ref_override: None }
81 }
82
83 /// Set a global ref override applied to every child pack.
84 ///
85 /// Surfaced as `grex sync --ref <sha|branch|tag>` (M4-D). The override
86 /// replaces each child's declared `ref` in its parent manifest. An
87 /// empty string is treated as "no override" — callers should reject
88 /// empty values at the CLI layer before reaching this point.
89 #[must_use]
90 pub fn with_ref_override(mut self, r#ref: Option<String>) -> Self {
91 self.ref_override = r#ref.filter(|s| !s.is_empty());
92 self
93 }
94
95 /// Walk the tree rooted at `root_pack_path`, returning the fully
96 /// hydrated graph.
97 ///
98 /// # Errors
99 ///
100 /// Returns [`TreeError`] on any loader, git, cycle, or name-mismatch
101 /// failure. The walk aborts on the first failure — the spec-level
102 /// "fail loud, fail fast" default.
103 pub fn walk(&self, root_pack_path: &Path) -> Result<PackGraph, TreeError> {
104 let mut state = BuildState::default();
105 let root_manifest = self.loader.load(root_pack_path)?;
106 // Pre-walk path-traversal gate: reject any malicious
107 // `children[].path` (or URL-derived tail) BEFORE any clone fires.
108 // Closes the v1.1.0 flat-sibling exploit window where a `path:
109 // ../escape` would materialise a child outside the pack root
110 // before plan-phase validation could see it.
111 validate_children_paths(&root_manifest)?;
112 let root_commit_sha = probe_head_sha(self.backend, root_pack_path);
113 let root_id = state.push_node(PackNode {
114 id: 0,
115 name: root_manifest.name.clone(),
116 path: root_pack_path.to_path_buf(),
117 source_url: None,
118 manifest: root_manifest.clone(),
119 parent: None,
120 commit_sha: root_commit_sha,
121 synthetic: false,
122 });
123 let root_identity = pack_identity_for_root(root_pack_path);
124 self.walk_recursive(root_id, &root_manifest, &mut state, &mut vec![root_identity])?;
125 Ok(PackGraph::new(state.nodes, state.edges))
126 }
127
128 /// Recursive step. `stack` carries the pack identifiers currently on
129 /// the walk path — pushed on entry, popped on return.
130 ///
131 /// Each loaded manifest's `children[]` is path-traversal-validated
132 /// before any of those children are resolved on disk; the entry
133 /// point pre-validates the root manifest, so by the time
134 /// `walk_recursive` runs for a child, that child's own `children[]`
135 /// is what needs gating before the next descent.
136 fn walk_recursive(
137 &self,
138 parent_id: usize,
139 manifest: &PackManifest,
140 state: &mut BuildState,
141 stack: &mut Vec<String>,
142 ) -> Result<(), TreeError> {
143 self.record_depends_on(parent_id, manifest, state);
144 self.process_children(parent_id, manifest, state, stack)
145 }
146
147 /// Record one `DependsOn` edge per `depends_on` entry. Resolution
148 /// against actual graph nodes happens later in `DependsOnValidator`.
149 /// We emit edges only where the target already exists in the graph so
150 /// the edge list stays in-bounds; unresolved deps are surfaced by the
151 /// validator, not carried as dangling edges.
152 fn record_depends_on(&self, parent_id: usize, manifest: &PackManifest, state: &mut BuildState) {
153 for dep in &manifest.depends_on {
154 if let Some(to) = find_node_id_by_name_or_url(&state.nodes, dep) {
155 state.edges.push(PackEdge { from: parent_id, to, kind: EdgeKind::DependsOn });
156 }
157 }
158 }
159
160 fn process_children(
161 &self,
162 parent_id: usize,
163 manifest: &PackManifest,
164 state: &mut BuildState,
165 stack: &mut Vec<String>,
166 ) -> Result<(), TreeError> {
167 for child in &manifest.children {
168 self.handle_child(parent_id, child, state, stack)?;
169 }
170 Ok(())
171 }
172
173 fn handle_child(
174 &self,
175 parent_id: usize,
176 child: &ChildRef,
177 state: &mut BuildState,
178 stack: &mut Vec<String>,
179 ) -> Result<(), TreeError> {
180 let identity = pack_identity_for_child(child);
181 if stack.iter().any(|s| s == &identity) {
182 let mut chain = stack.clone();
183 chain.push(identity);
184 return Err(TreeError::CycleDetected { chain });
185 }
186 // v1.2.0 Stage 1.c: FS-resident boundary check fires BEFORE
187 // any clone / fetch. Junctions, reparse points, and
188 // `.git`-as-file (gitfile redirect) all re-open the
189 // parent-boundary escape that the syntactic gate closes on
190 // the path string itself; running the check on the prospective
191 // dest path means a hostile pre-existing slot is rejected
192 // before the GitBackend writes anything into (or through) it.
193 // The prospective path is reconstructed here so the helper
194 // can interrogate the slot before `resolve_destination`
195 // materialises a clone — pre-clone runs return `Ok(())` because
196 // the slot doesn't exist yet, and the walk continues normally.
197 let prospective_dest = self.workspace.join(child.effective_path());
198 check_dest_boundary(&prospective_dest, &child.effective_path())?;
199 let dest = self.resolve_destination(child, state)?;
200 // v1.1.1 plain-git children: when the destination has no
201 // `.grex/pack.yaml` but does carry a `.git/`, synthesize a
202 // leaf scripted-no-hooks manifest in-memory rather than
203 // aborting. See
204 // `openspec/changes/feat-v1.1.1-plain-git-children/design.md`
205 // §"Synthesis algorithm".
206 let (child_manifest, is_synthetic) = match self.loader.load(&dest) {
207 Ok(m) => (m, false),
208 Err(TreeError::ManifestNotFound(_)) if dest_has_git_repo(&dest) => {
209 (synthesize_plain_git_manifest(child), true)
210 }
211 Err(e) => return Err(e),
212 };
213 verify_child_name(&child_manifest.name, child, &dest)?;
214 // Validate this child's own `children[]` before its descent
215 // resolves any of them on disk. Mirrors the root-manifest gate
216 // in `walk`; together they ensure no clone can fire for a
217 // grandchild whose parent declared a traversal-bearing path.
218 validate_children_paths(&child_manifest)?;
219
220 let commit_sha = probe_head_sha(self.backend, &dest);
221 let child_id = state.push_node(PackNode {
222 id: state.nodes.len(),
223 name: child_manifest.name.clone(),
224 path: dest.clone(),
225 source_url: Some(child.url.clone()),
226 manifest: child_manifest.clone(),
227 parent: Some(parent_id),
228 commit_sha,
229 synthetic: is_synthetic,
230 });
231 state.edges.push(PackEdge { from: parent_id, to: child_id, kind: EdgeKind::Child });
232
233 stack.push(identity);
234 let result = self.walk_recursive(child_id, &child_manifest, state, stack);
235 stack.pop();
236 result
237 }
238
239 /// Decide where `child` lives on disk and ensure the working tree is
240 /// in the expected state: clone if absent, fetch + optional checkout
241 /// if present.
242 fn resolve_destination(
243 &self,
244 child: &ChildRef,
245 _state: &mut BuildState,
246 ) -> Result<PathBuf, TreeError> {
247 let dest = self.workspace.join(child.effective_path());
248 // M4-D: `ref_override` wins over the parent-declared `child.ref`.
249 // Falls back to the declared ref when no override is active.
250 let effective_ref = self.ref_override.as_deref().or(child.r#ref.as_deref());
251 if dest_has_git_repo(&dest) {
252 self.backend.fetch(&dest)?;
253 if let Some(r) = effective_ref {
254 self.backend.checkout(&dest, r)?;
255 }
256 } else {
257 self.backend.clone(&child.url, &dest, effective_ref)?;
258 }
259 Ok(dest)
260 }
261}
262
263/// Best-effort HEAD probe. Returns `None` when the target is not a git
264/// repository or the backend refuses — the root of a declarative pack is
265/// often a plain directory, so this must not fail the walk.
266///
267/// Non-`.git` directories short-circuit silently (truly not a git
268/// repo). Backend errors on an actual `.git` directory are surfaced as
269/// a `tracing::warn!` log line so transient gix failures / ACL-denied
270/// `.git` reads do not silently degrade into an empty `commit_sha`
271/// without any operator signal. The walker continues with `None` — a
272/// best-effort probe is, by construction, allowed to fail.
273fn probe_head_sha(backend: &dyn GitBackend, path: &Path) -> Option<String> {
274 let dir =
275 if path.extension().and_then(|e| e.to_str()).is_some_and(|e| matches!(e, "yaml" | "yml")) {
276 path.parent()
277 .and_then(Path::parent)
278 .map_or_else(|| path.to_path_buf(), Path::to_path_buf)
279 } else {
280 path.to_path_buf()
281 };
282 if !dir.join(".git").exists() {
283 return None;
284 }
285 match backend.head_sha(&dir) {
286 Ok(s) => Some(s),
287 Err(e) => {
288 tracing::warn!(
289 target: "grex::walker",
290 "HEAD probe failed for {}: {e}",
291 dir.display()
292 );
293 None
294 }
295 }
296}
297
298/// Mutable state threaded through the walk. Private to this module so only
299/// the walker can grow the graph.
300#[derive(Default)]
301struct BuildState {
302 nodes: Vec<PackNode>,
303 edges: Vec<PackEdge>,
304}
305
306impl BuildState {
307 fn push_node(&mut self, node: PackNode) -> usize {
308 let id = node.id;
309 self.nodes.push(node);
310 id
311 }
312}
313
314/// Identity string used by the cycle detector for the root pack.
315fn pack_identity_for_root(path: &Path) -> String {
316 format!("path:{}", path.display())
317}
318
319/// Identity string for a child — url+ref so the same repo at two different
320/// refs is considered distinct. This matches git semantics and avoids
321/// false-positive cycle detections for diamond dependencies on different
322/// tags.
323///
324/// v1.2.3 (B2): when the ref is missing or empty the trailing `@` is
325/// omitted so the on-the-wire identity is just `url:<url>` — matches
326/// `Grex.Walker.ChildRef.identity` in the Lean model. Without this
327/// elision two children that differ only in `ref: None` vs
328/// `ref: Some("")` would otherwise serialise the same way as
329/// `url:<url>@`, masking the distinction the Lean specification draws.
330fn pack_identity_for_child(child: &ChildRef) -> String {
331 match child.r#ref.as_deref() {
332 Some(r) if !r.is_empty() => format!("url:{}@{}", child.url, r),
333 _ => format!("url:{}", child.url),
334 }
335}
336
337/// Shallow on-disk check: a `.git` entry (file or dir) signals an existing
338/// working tree. We deliberately do not open the repo here — that's the
339/// backend's job via `fetch`/`checkout`.
340///
341/// # Symlink safety
342///
343/// `dest` itself MUST NOT be a symlink. If it is, this function returns
344/// `false` regardless of whether the symlink target carries a `.git`
345/// entry. This refusal closes a synthesis-redirection attack: a parent
346/// pack declaring `path: code` against a workspace where the user
347/// happens to have `<workspace>/code -> $HOME` would otherwise let the
348/// walker treat `$HOME/.git` as a "plain-git child" and operate on an
349/// unrelated tree. The check uses [`std::fs::symlink_metadata`] so the
350/// link itself — not its target — is interrogated.
351pub fn dest_has_git_repo(dest: &Path) -> bool {
352 // Reject symlinked destinations outright. `symlink_metadata` does
353 // NOT follow the link, so a broken or path-traversing symlink is
354 // treated as untrusted regardless of its target.
355 if let Ok(meta) = std::fs::symlink_metadata(dest) {
356 if meta.file_type().is_symlink() {
357 return false;
358 }
359 }
360 dest.join(".git").exists()
361}
362
363/// Build the in-memory manifest used for v1.1.1 plain-git children — a
364/// leaf scripted pack with no hooks, no children, no actions. Activated
365/// at the walker's load-fallback boundary when a child has a `.git/`
366/// but no `.grex/pack.yaml`. See
367/// `openspec/changes/feat-v1.1.1-plain-git-children/design.md`.
368pub fn synthesize_plain_git_manifest(child: &ChildRef) -> PackManifest {
369 PackManifest {
370 schema_version: SchemaVersion::current(),
371 name: child.effective_path(),
372 r#type: PackType::Scripted,
373 version: None,
374 depends_on: Vec::new(),
375 children: Vec::new(),
376 actions: Vec::new(),
377 teardown: None,
378 extensions: BTreeMap::new(),
379 }
380}
381
382/// Enforce that the cloned child's pack.yaml name matches what the parent
383/// declared. The parent-side expectation is the child entry's
384/// [`ChildRef::effective_path`] — the directory name in the workspace.
385fn verify_child_name(got: &str, child: &ChildRef, dest: &Path) -> Result<(), TreeError> {
386 let expected = child.effective_path();
387 if got == expected {
388 return Ok(());
389 }
390 Err(TreeError::PackNameMismatch { got: got.to_string(), expected, path: dest.to_path_buf() })
391}
392
393/// Resolve a `depends_on` entry (URL or bare name) against nodes already
394/// recorded. Returns the node id on a hit, `None` otherwise.
395fn find_node_id_by_name_or_url(nodes: &[PackNode], dep: &str) -> Option<usize> {
396 if looks_like_url(dep) {
397 nodes.iter().find(|n| n.source_url.as_deref() == Some(dep)).map(|n| n.id)
398 } else {
399 nodes.iter().find(|n| n.name == dep).map(|n| n.id)
400 }
401}
402
403/// Run the path-traversal gate on `manifest.children`. Returns the
404/// first offending child as a [`TreeError::ChildPathInvalid`] so the
405/// walker aborts before any clone of the offending sibling fires.
406///
407/// Surfacing only the first offender (rather than aggregating) matches
408/// the walker's fail-fast posture — the plan-phase
409/// [`crate::pack::validate::ChildPathValidator`] still runs against the
410/// whole graph post-walk via `validate_graph`, so authors who clear
411/// the traversal exploit see the full diagnostic batch on the next
412/// invocation.
413///
414/// `check_child_path` is documented to return only the
415/// `ChildPathInvalid` variant, but we `match` exhaustively so any
416/// future variant the helper grows surfaces as a compile-time
417/// failure here rather than as a silently swallowed `Some(other)`.
418fn validate_children_paths(manifest: &PackManifest) -> Result<(), TreeError> {
419 // v1.2.0 Stage 1.c: NFC-duplicate sweep across the sibling list.
420 // Runs first because it's a cross-cutting check (one offender
421 // implicates the WHOLE list, not a single child). Surfaces as
422 // `TreeError::ManifestPathEscape` per walker.md
423 // §boundary-preservation — a NFC-collapsed name re-introduces the
424 // very boundary escape the regex was meant to close on
425 // case-insensitive filesystems.
426 if let Some(path) = nfc_duplicate_path(&manifest.children) {
427 return Err(TreeError::ManifestPathEscape {
428 path,
429 reason: "duplicate child path under Unicode NFC normalization (case-insensitive FS collision risk)"
430 .to_string(),
431 });
432 }
433 for child in &manifest.children {
434 // v1.2.0 Stage 1.c: per-segment boundary-preservation rejects.
435 // Layered AHEAD of the syntactic gate so the more specific
436 // `ManifestPathEscape` diagnostic wins for entries that would
437 // also fail the bare-name regex (e.g. `child:foo` is rejected
438 // here as a colon hazard instead of a generic charset miss).
439 let segment = child.path.as_deref().map_or_else(|| child.effective_path(), str::to_string);
440 if let Some(reason) = boundary_reject_reason(&segment) {
441 return Err(TreeError::ManifestPathEscape {
442 path: segment,
443 reason: reason.to_string(),
444 });
445 }
446 let Some(err) = check_child_path(child) else { continue };
447 match err {
448 PackValidationError::ChildPathInvalid { child_name, path, reason } => {
449 return Err(TreeError::ChildPathInvalid { child_name, path, reason });
450 }
451 other @ (PackValidationError::DuplicateSymlinkDst { .. }
452 | PackValidationError::GraphCycle { .. }
453 | PackValidationError::DependsOnUnsatisfied { .. }
454 | PackValidationError::ChildPathDuplicate { .. }) => {
455 // `check_child_path` is contracted to only emit
456 // `ChildPathInvalid`. Any other variant indicates the
457 // helper has drifted out of sync with this caller —
458 // surface loudly rather than silently swallowing it.
459 tracing::error!(
460 target: "grex::walker",
461 "check_child_path returned unexpected variant: {other:?}",
462 );
463 debug_assert!(false, "check_child_path returned unexpected variant: {other:?}");
464 }
465 }
466 }
467 Ok(())
468}
469
470/// v1.2.0 Stage 1.c: filesystem-resident boundary check. Run AFTER
471/// the destination has been resolved against the parent workspace but
472/// BEFORE any clone / fetch fires. Catches the case where the slot
473/// the walker is about to materialise into is already a junction,
474/// reparse point, symlink, or `.git`-as-file — each of which would
475/// re-introduce a parent-boundary escape.
476///
477/// Pre-clone: a non-existent destination is the happy path; the
478/// helper returns `None` and the walk continues. Post-clone or on a
479/// re-walk where the destination is already populated, the helper
480/// inspects the on-disk entry and surfaces a `ManifestPathEscape`
481/// when the entry violates the boundary contract.
482///
483/// Visibility: `pub(super)` — used by the walker's `handle_child`
484/// path-resolution step (wired in 1.c follow-up; this commit lands
485/// the helper itself and the boundary-check call site for the
486/// path-segment rejects).
487pub(super) fn check_dest_boundary(dest: &Path, segment: &str) -> Result<(), TreeError> {
488 if let Some(reason) = boundary_fs_reject_reason(dest) {
489 return Err(TreeError::ManifestPathEscape {
490 path: segment.to_string(),
491 reason: reason.to_string(),
492 });
493 }
494 Ok(())
495}
496
497/// Decide whether a `depends_on` entry is a URL rather than a bare name.
498/// The rule is intentionally literal — matching the spec's enumeration of
499/// accepted forms.
500pub(super) fn looks_like_url(s: &str) -> bool {
501 s.starts_with("http://")
502 || s.starts_with("https://")
503 || s.starts_with("ssh://")
504 || s.starts_with("git@")
505 || s.ends_with(".git")
506}
507
508// ---------------------------------------------------------------------------
509// v1.2.0 Stage 1.g — `sync_meta` entry point: parent-relative,
510// distributed-lockfile walker. Three phases per meta:
511//
512// Phase 1 (siblings): `classify_dest` (1.e) per child, dispatch
513// fetch / clone / refuse based on the verdict; aggregate
514// `PresentUndeclared` into `TreeError::UntrackedGitRepos`.
515// Phase 2 (orphan prune): for each `prune_candidate` (caller-supplied
516// by 1.h once the distributed lockfile read lands), run the
517// consent-walk via `phase2_prune` (1.f).
518// Phase 3 (recursion): per child whose dest carries
519// `<dest>/.grex/pack.yaml`, recursively `sync_meta` if `recurse`
520// is true and depth < `max_depth`.
521//
522// Design discipline:
523//
524// * **No new locking primitives.** Per-pack git ops acquire the M6
525// `PackLock` (synchronous `acquire`) for the duration of the
526// clone/fetch. The Lean axiom `sync_disjoint_commutes` (Bridge.lean)
527// permits any disjoint scheduler — sequential is the smallest model
528// that satisfies the axiom. Sibling parallelism via rayon is a 1.j /
529// 1.l-territory follow-up; the scaffolding here keeps the
530// single-threaded baseline correct first.
531// * **No lockfile mechanics.** Phase 2's orphan list is a parameter,
532// not a read from `<meta>/.grex/grex.lock.jsonl`. 1.h owns the
533// distributed-lockfile read/write surface; this commit only wires
534// the consent-walk + prune dispatch.
535// * **Error aggregation.** Every Phase 1 child failure plus every
536// Phase 2 refusal lands in `SyncMetaReport::errors` before the call
537// returns. The walker is fail-LOUD (caller gets the full picture),
538// not fail-fast (the legacy `Walker::walk` aborts on the first hit).
539// This matches the v1.2.0 walker.md §"untracked git policy" rule
540// that `UntrackedGitRepos` must enumerate every offender at once.
541// ---------------------------------------------------------------------------
542
543/// Per-meta options threaded through `sync_meta`. Keeps the call-site
544/// signature small without coupling to the full [`crate::sync::SyncOptions`]
545/// surface — the orchestrator (`sync.rs::run`) is responsible for projecting
546/// `SyncOptions` into `SyncMetaOptions` when it wires this entry point.
547#[derive(Debug, Clone)]
548pub struct SyncMetaOptions {
549 /// Global ref override (`grex sync --ref <sha|branch|tag>`). Mirrors
550 /// [`Walker::with_ref_override`]: when `Some`, every child's
551 /// declared `ref` is replaced.
552 pub ref_override: Option<String>,
553 /// When `true`, Phase 3 recurses into child metas. `false` is the
554 /// `doctor --shallow` semantics: process only the immediate
555 /// children of the supplied meta.
556 pub recurse: bool,
557 /// Bound on Phase 3 recursion depth. `None` is unbounded; `Some(n)`
558 /// caps at `n` levels of nesting (the supplied `meta_dir` is depth
559 /// 0). Recursion ALWAYS halts before depth `n+1`.
560 pub max_depth: Option<usize>,
561 /// Phase 2 prune-safety override. Mirrors
562 /// [`crate::sync::SyncOptions::force_prune`].
563 pub force_prune: bool,
564 /// Phase 2 prune-safety override. Mirrors
565 /// [`crate::sync::SyncOptions::force_prune_with_ignored`].
566 pub force_prune_with_ignored: bool,
567 /// v1.2.1 item 3 — rayon thread-pool size for sibling-parallel
568 /// Phase 1 + Phase 3. `None` ⇒ rayon's default (`num_cpus::get()`);
569 /// `Some(1)` ⇒ effectively sequential (single-threaded pool, useful
570 /// for determinism testing); `Some(n >= 2)` ⇒ bounded parallel.
571 /// `Some(0)` is clamped to `1` (rayon rejects a zero-thread pool).
572 /// Mirrors [`crate::sync::SyncOptions::parallel`] semantics with the
573 /// one exception that `0` is clamped to `1` here — the unbounded
574 /// sentinel only makes sense for tokio's `Semaphore::MAX_PERMITS`.
575 pub parallel: Option<usize>,
576 /// v1.2.1 item 5b — when `Some`, Phase 2 prunes are diverted
577 /// through the snapshot-then-unlink quarantine pipeline before
578 /// `unlink(dest)` fires. Carries the per-meta trash bucket root
579 /// and audit-log path. `None` (default) preserves the legacy
580 /// v1.2.0 direct-unlink path. Set by
581 /// [`crate::sync::SyncOptions::quarantine`] at the orchestrator
582 /// boundary; the consent layer reads this to pick the deletion
583 /// strategy. Lean theorem `quarantine_snapshot_precedes_delete`
584 /// proves the safety contract.
585 pub quarantine: Option<QuarantineConfig>,
586}
587
588impl Default for SyncMetaOptions {
589 fn default() -> Self {
590 Self {
591 ref_override: None,
592 recurse: true,
593 max_depth: None,
594 force_prune: false,
595 force_prune_with_ignored: false,
596 parallel: None,
597 quarantine: None,
598 }
599 }
600}
601
602/// Outcome of one [`sync_meta`] invocation. Aggregated across every
603/// recursion frame: a sub-meta's report is folded into its parent's
604/// report at the end of Phase 3.
605#[derive(Debug, Default)]
606pub struct SyncMetaReport {
607 /// Number of metas processed (this meta + every descendant Phase 3
608 /// recursion fired against). Useful for `--shallow` verification:
609 /// `recurse: false` means `metas_visited == 1`.
610 pub metas_visited: usize,
611 /// Per-child Phase 1 verdicts, keyed by parent-relative child path.
612 /// `(meta_dir, child_dest, classification)` — exposed primarily for
613 /// tests; downstream callers will project into a status report.
614 pub phase1_classifications: Vec<(PathBuf, PathBuf, DestClass)>,
615 /// Successful Phase 2 prunes (paths that were removed). Empty when
616 /// no orphan list was supplied or every orphan refused.
617 pub phase2_pruned: Vec<PathBuf>,
618 /// Aggregate of every error encountered across Phases 1, 2, and 3.
619 /// The walker continues past recoverable errors so the caller sees
620 /// the full picture in one pass.
621 pub errors: Vec<TreeError>,
622}
623
624impl SyncMetaReport {
625 fn merge(&mut self, mut child: SyncMetaReport) {
626 self.metas_visited += child.metas_visited;
627 self.phase1_classifications.append(&mut child.phase1_classifications);
628 self.phase2_pruned.append(&mut child.phase2_pruned);
629 self.errors.append(&mut child.errors);
630 }
631}
632
633/// v1.2.0 Stage 1.g — three-phase per-meta walker entry point.
634///
635/// `meta_dir` is the on-disk directory containing the meta's
636/// `.grex/pack.yaml`. `prune_candidates` is the list of orphan dests
637/// (parent-relative) the caller's distributed-lockfile reader determined
638/// no longer appear in `manifest.children` — empty until Stage 1.h
639/// supplies the read side.
640///
641/// Discharges Lean theorems W1–W8, V1, C1, C2, F1 via the bridges in
642/// `Bridge.lean`. The sequential implementation is a special case of
643/// the `sync_disjoint_commutes` axiom (single permit, no interleaving)
644/// so no new bridge axiom is required.
645///
646/// # Errors
647///
648/// Returns the *first* catastrophic error (manifest parse failure on
649/// the supplied `meta_dir`). All recoverable errors land in
650/// [`SyncMetaReport::errors`] and the walker continues — fail-loud,
651/// not fail-fast.
652pub fn sync_meta(
653 meta_dir: &Path,
654 backend: &dyn GitBackend,
655 loader: &dyn PackLoader,
656 opts: &SyncMetaOptions,
657 prune_candidates: &[PathBuf],
658) -> Result<SyncMetaReport, TreeError> {
659 // v1.2.3 (B4) — seed the ancestor chain with the root pack's
660 // path-namespaced identity (`path:<meta_dir>`) so the Lean
661 // `acyclic_path` precondition that drives
662 // `sync_meta_no_cycle_infinite_clone` is established right at
663 // the call site rather than implicitly relying on an empty
664 // initial visited. Children identify with `url:<url>@<ref>` —
665 // disjoint namespace from the root's `path:` identity, so seeding
666 // does not introduce false-positive cycle hits against any
667 // legitimate child.
668 //
669 // `sync_meta_inner` extends this chain per recursion edge (Phase
670 // 3) using clone-per-child so disjoint sibling branches do not
671 // pollute each other's ancestor view.
672 let initial_visited = vec![pack_identity_for_root(meta_dir)];
673 sync_meta_inner(
674 meta_dir,
675 backend,
676 loader,
677 opts,
678 prune_candidates,
679 /* depth */ 0,
680 &initial_visited,
681 )
682}
683
684fn sync_meta_inner(
685 meta_dir: &Path,
686 backend: &dyn GitBackend,
687 loader: &dyn PackLoader,
688 opts: &SyncMetaOptions,
689 prune_candidates: &[PathBuf],
690 depth: usize,
691 visited: &[String],
692) -> Result<SyncMetaReport, TreeError> {
693 let manifest = loader.load(meta_dir)?;
694 // v1.2.0 Stage 1.c gate — every recursion frame re-runs the
695 // path-traversal sweep before any child is touched on disk.
696 validate_children_paths(&manifest)?;
697
698 let mut report = SyncMetaReport { metas_visited: 1, ..SyncMetaReport::default() };
699
700 // v1.2.1 item 3: build a per-call rayon pool sized from
701 // `opts.parallel`. Phase 1 + Phase 3 install on this pool; Phase 2
702 // stays sequential (single-meta orphan sweep — no sibling
703 // parallelism to extract). The pool is dropped at the end of
704 // `sync_meta_inner`, so each recursion frame builds + tears down
705 // its own pool. This is intentional: we want the worker count to
706 // refresh per call so a top-level `--parallel 1` cap is honoured
707 // without piggy-backing on a global pool that an unrelated caller
708 // might have configured differently.
709 let pool = build_pool(opts.parallel)?;
710
711 phase1_sync_children(&pool, meta_dir, &manifest, backend, opts, &mut report);
712 phase2_prune_orphans(meta_dir, prune_candidates, opts, &mut report);
713 // v1.2.2 — cycle detection short-circuits the recursion edge with
714 // an `Err` return so the caller sees `Err(CycleDetected)` directly
715 // rather than burying it in `report.errors`. Cycles are catastrophic
716 // (would otherwise clone forever); fail-loud here, NOT fold-into-report.
717 phase3_recurse(&pool, meta_dir, &manifest, backend, loader, opts, depth, visited, &mut report)?;
718
719 Ok(report)
720}
721
722/// v1.2.1 item 3 — build a rayon `ThreadPool` sized from
723/// `opts.parallel`. Encapsulates the `None` ⇒ default,
724/// `Some(0)` ⇒ clamp-to-1, `Some(n)` ⇒ exact-N policy in one place
725/// so Phase 1 and Phase 3 install on identically-configured pools.
726///
727/// `Some(1)` produces a single-worker pool — the determinism
728/// test-mode fast-path (sibling iteration order matches sequential
729/// for-loop order on a 1-thread pool).
730///
731/// Build failures surface as [`TreeError::ManifestRead`]: a rayon
732/// pool failure is invariably a host-resource issue (out of file
733/// descriptors, thread-creation refused) — bucketing it into the
734/// generic IO-error variant keeps the error surface tight without
735/// inventing a one-off `RayonPoolBuild` discriminant. The Lean
736/// model treats pool construction as a well-formedness precondition
737/// of `sync`, not an in-band failure mode.
738fn build_pool(parallel: Option<usize>) -> Result<rayon::ThreadPool, TreeError> {
739 let mut builder = rayon::ThreadPoolBuilder::new();
740 if let Some(n) = parallel {
741 builder = builder.num_threads(n.max(1));
742 }
743 builder.build().map_err(|e| {
744 TreeError::ManifestRead(format!("failed to build rayon pool for sync_meta: {e}"))
745 })
746}
747
748/// Per-child output from Phase 1's parallel pass. Collected into a
749/// `Vec` after the rayon `par_iter` settles, then drained into the
750/// caller's `SyncMetaReport` in a single sequential pass. Carrying
751/// the data plain (no `&mut report` shared across threads) is what
752/// keeps the parallelisation sound under the Lean
753/// `sync_disjoint_commutes` axiom: each iteration's mutations are
754/// confined to its own owned struct.
755struct Phase1ChildOutcome {
756 /// `(meta_dir, dest, class)` — pushed onto
757 /// `report.phase1_classifications` regardless of dispatch outcome.
758 classification: (PathBuf, PathBuf, DestClass),
759 /// Per-child clone/fetch failure, if any. Folded into
760 /// `report.errors`.
761 error: Option<TreeError>,
762 /// `Some((dest, class))` when the child classified as
763 /// `PresentUndeclared`; the caller aggregates these into one
764 /// `UntrackedGitRepos` error after the parallel pass.
765 undeclared: Option<(PathBuf, DestClass)>,
766}
767
768/// Phase 1: classify each declared child, then dispatch. Per the v1.2.0
769/// walker.md pseudocode the per-child branches are:
770///
771/// * `Missing` → clone via `backend.clone(url, dest, ref)`.
772/// * `PresentDeclared` → fetch (+ checkout if a ref override applies).
773/// * `PresentDirty` → no-op (preserve user changes; will surface at
774/// exec/plan stage if applicable).
775/// * `PresentInProgress` → refuse via `DirtyTreeRefusal{GitInProgress}`
776/// (collected into `report.errors`).
777/// * `PresentUndeclared` → impossible at Phase 1 dispatch time because
778/// declared paths are in `manifest.children`; the variant is reserved
779/// for the lockfile-orphan sweep (Phase 2 territory).
780///
781/// v1.2.1 item 3 — sibling-parallel via rayon `par_iter`. Disjointness
782/// across siblings (each child has its own `meta_dir.join(child.path)`
783/// dest, validated by `validate_children_paths` upstream) discharges
784/// the precondition of the `sync_disjoint_commutes` axiom in
785/// `proof/Grex/Bridge.lean`. The per-pack `.grex-lock` (M6, acquired
786/// inside the GitBackend implementation) continues to serialise any
787/// cross-task contention on the same pack path. Per-thread results
788/// are collected into a `Vec<Phase1ChildOutcome>` and folded into the
789/// caller's `SyncMetaReport` in a single sequential pass, preserving
790/// deterministic ordering of `report.phase1_classifications` (rayon
791/// `collect_into_vec` preserves source-order regardless of completion
792/// order).
793fn phase1_sync_children(
794 pool: &rayon::ThreadPool,
795 meta_dir: &Path,
796 manifest: &PackManifest,
797 backend: &dyn GitBackend,
798 opts: &SyncMetaOptions,
799 report: &mut SyncMetaReport,
800) {
801 // Install on the per-call pool so `--parallel N` is honoured even
802 // when this is invoked from inside another rayon context (Phase 3
803 // recursion). `install` is a synchronous fence: the closure
804 // returns once every parallel iteration has settled.
805 let outcomes: Vec<Phase1ChildOutcome> = pool.install(|| {
806 manifest
807 .children
808 .par_iter()
809 .map(|child| phase1_handle_child(meta_dir, child, backend, opts))
810 .collect()
811 });
812
813 // Sequential fold: the parallel pass cannot mutate `report` directly
814 // (it is `&mut`), so we drain the per-child outcomes here. Order is
815 // preserved by `par_iter().collect()` — see the `phase1_par_iter_preserves_order`
816 // test below.
817 let mut undeclared_seen: Vec<(PathBuf, DestClass)> = Vec::new();
818 for outcome in outcomes {
819 report.phase1_classifications.push(outcome.classification);
820 if let Some(e) = outcome.error {
821 report.errors.push(e);
822 }
823 if let Some(pair) = outcome.undeclared {
824 undeclared_seen.push(pair);
825 }
826 }
827 if let Err(e) = aggregate_untracked(undeclared_seen) {
828 report.errors.push(e);
829 }
830}
831
832/// Per-child Phase 1 dispatch — runs inside the rayon pool. The
833/// extracted fn keeps the parallel closure body small and gives the
834/// Lean axiom a single discoverable Rust contract anchor (this fn is
835/// the per-sibling unit of work the `sync_disjoint_commutes` axiom
836/// quantifies over).
837fn phase1_handle_child(
838 meta_dir: &Path,
839 child: &ChildRef,
840 backend: &dyn GitBackend,
841 opts: &SyncMetaOptions,
842) -> Phase1ChildOutcome {
843 let dest = meta_dir.join(child.effective_path());
844 // Every declared child IS in the manifest by construction —
845 // `declared_in_manifest = true` is the only correct call here.
846 let class = classify_dest(&dest, true, None);
847 let mut out = Phase1ChildOutcome {
848 classification: (meta_dir.to_path_buf(), dest.clone(), class),
849 error: None,
850 undeclared: None,
851 };
852 match class {
853 DestClass::Missing => {
854 if let Err(e) = phase1_clone(backend, child, &dest, opts) {
855 out.error = Some(e);
856 }
857 }
858 DestClass::PresentDeclared => {
859 if let Err(e) = phase1_fetch(backend, child, &dest, opts) {
860 out.error = Some(e);
861 }
862 }
863 DestClass::PresentDirty => {
864 // Conservative: leave the dirty tree untouched. The
865 // operator has uncommitted work; v1.2.0 walker policy
866 // is to never overwrite their bytes during Phase 1.
867 // Phase 2 will surface a refusal if the operator ALSO
868 // requested a prune of this path, but that's a
869 // separate decision made by the caller's lockfile-
870 // orphan computation.
871 }
872 DestClass::PresentInProgress => {
873 out.error = Some(TreeError::DirtyTreeRefusal {
874 path: dest.clone(),
875 kind: super::error::DirtyTreeRefusalKind::GitInProgress,
876 });
877 }
878 DestClass::PresentUndeclared => {
879 // Buffer for `aggregate_untracked` so we surface the
880 // FULL list in one error.
881 out.undeclared = Some((dest, class));
882 }
883 }
884 out
885}
886
887/// Phase 1 clone helper. Acquires the M6 `PackLock` on the prospective
888/// dest's parent (`meta_dir`) for the duration of the clone — distinct
889/// children clone serially within a meta to keep the scheduler-tier
890/// model honest. Sibling parallelism is a 1.j follow-up.
891fn phase1_clone(
892 backend: &dyn GitBackend,
893 child: &ChildRef,
894 dest: &Path,
895 opts: &SyncMetaOptions,
896) -> Result<(), TreeError> {
897 let effective_ref = opts.ref_override.as_deref().or(child.r#ref.as_deref());
898 // Make sure the dest's parent exists — the clone backend assumes
899 // it. v1.2.0 invariant 1 (boundary) and 1.c's `validate_children_paths`
900 // already ruled out a path that would escape `meta_dir`, so a
901 // simple `create_dir_all` on the parent is safe here.
902 if let Some(parent) = dest.parent() {
903 std::fs::create_dir_all(parent).map_err(|e| {
904 TreeError::ManifestRead(format!("failed to mkdir parent {}: {e}", parent.display()))
905 })?;
906 }
907 backend.clone(&child.url, dest, effective_ref)?;
908 Ok(())
909}
910
911/// Phase 1 fetch helper. Same locking discipline as `phase1_clone`.
912fn phase1_fetch(
913 backend: &dyn GitBackend,
914 child: &ChildRef,
915 dest: &Path,
916 opts: &SyncMetaOptions,
917) -> Result<(), TreeError> {
918 backend.fetch(dest)?;
919 let effective_ref = opts.ref_override.as_deref().or(child.r#ref.as_deref());
920 if let Some(r) = effective_ref {
921 backend.checkout(dest, r)?;
922 }
923 Ok(())
924}
925
926/// Phase 2: prune orphan lockfile entries. Each candidate is run
927/// through the consent-walk via `phase2_prune` (1.f); a `Clean` verdict
928/// removes the dest, anything else surfaces as an error. The orphan
929/// list is supplied by the caller — 1.h owns the lockfile-read side
930/// of the walker contract.
931fn phase2_prune_orphans(
932 meta_dir: &Path,
933 prune_candidates: &[PathBuf],
934 opts: &SyncMetaOptions,
935 report: &mut SyncMetaReport,
936) {
937 // v1.2.0 Stage 1.l — postmortem audit log path. Resolved once per
938 // meta from the canonical `<meta_dir>/.grex/events.jsonl` slot;
939 // `phase2_prune` only writes to it when an override flag actually
940 // consumed a non-Clean verdict (clean prunes never log).
941 let audit_log = crate::manifest::event_log_path(meta_dir);
942 for candidate in prune_candidates {
943 // Candidates are parent-relative POSIX paths
944 // (`LockEntry::validate_path` invariant from 1.b). Resolve
945 // against `meta_dir` to get the absolute dest.
946 let dest = meta_dir.join(candidate);
947 match phase2_prune(
948 &dest,
949 opts.force_prune,
950 opts.force_prune_with_ignored,
951 Some(audit_log.as_path()),
952 opts.quarantine.as_ref(),
953 ) {
954 Ok(()) => report.phase2_pruned.push(dest),
955 Err(e) => report.errors.push(e),
956 }
957 }
958}
959
960/// Per-child output from Phase 3's parallel recursion. Each variant
961/// carries either a successful sub-`SyncMetaReport` (folded into the
962/// caller via [`SyncMetaReport::merge`]) or a fatal error to push onto
963/// `report.errors`. Children whose dest does NOT carry a sub-meta
964/// produce `Skipped`.
965enum Phase3ChildOutcome {
966 Skipped,
967 Recursed(SyncMetaReport),
968 Failed(TreeError),
969}
970
971/// Phase 3: parallel recursion into child metas. A child qualifies for
972/// recursion when:
973///
974/// 1. `opts.recurse` is `true`,
975/// 2. `opts.max_depth` is unbounded OR the next-frame depth is
976/// strictly less than the cap,
977/// 3. `<dest>/.grex/pack.yaml` exists.
978///
979/// Sub-meta reports are merged into the parent's report via
980/// [`SyncMetaReport::merge`] so a top-level caller sees one rolled-up
981/// view of every frame's classifications + errors.
982///
983/// v1.2.1 item 3 — sibling-parallel via rayon `par_iter`. Each
984/// recursion frame builds its own thread pool inside `sync_meta_inner`
985/// (work-stealing across recursion levels happens naturally because
986/// the inner `pool.install` blocks for the lifetime of the inner
987/// sync_meta call; sibling sub-metas at level N execute in parallel
988/// via the level-N pool, and each level-N child carries its own
989/// level-(N+1) pool for its own grandchildren). Sub-reports are
990/// collected source-ordered via `collect_into_vec`, then folded into
991/// `report` sequentially to preserve deterministic ordering of the
992/// `phase1_classifications` / `phase2_pruned` / `errors` vectors.
993// Pre-rayon refactor this fn already carried 7 args (the clippy cap).
994// v1.2.1 item 3 added the `pool` reference, taking it to 8. Bundling
995// these into a context struct is technically possible but every other
996// arg already comes from `sync_meta_inner`'s param list, so the struct
997// would just shuffle the wiring without removing it. Localised allow
998// instead — the call-site is private to this module and threads
999// ownership of `pool` cleanly.
1000/// Per-child Phase 3 dispatch — runs inside the rayon pool. Mirrors
1001/// the `phase1_handle_child` / `sync_disjoint_commutes` discipline
1002/// (one discoverable Rust contract anchor per sibling unit of work)
1003/// and keeps `phase3_recurse` itself under the clippy line cap.
1004///
1005/// v1.2.2 — cycle detection lives here. `visited` is the ancestor
1006/// identity chain from root down to (but excluding) this child. If
1007/// the child's identity (`pack_identity_for_child`) is already in
1008/// the chain we surface `TreeError::CycleDetected` with the chain
1009/// extended by the recurring identity. Otherwise the child's
1010/// identity is appended (clone-per-child, A.1) so disjoint sibling
1011/// branches do not pollute each other's view.
1012///
1013/// v1.2.3 (B1) — the depth-cap check (`next_depth > opts.max_depth`)
1014/// MUST run AFTER the cycle check. Otherwise a cyclic manifest whose
1015/// cycle length exceeds `max_depth` would silently truncate without
1016/// surfacing `CycleDetected`: the depth cap is a "stop walking
1017/// further" knob, not a "ignore correctness invariants" knob.
1018fn phase3_handle_child(
1019 meta_dir: &Path,
1020 child: &ChildRef,
1021 backend: &dyn GitBackend,
1022 loader: &dyn PackLoader,
1023 opts: &SyncMetaOptions,
1024 next_depth: usize,
1025 visited: &[String],
1026) -> Phase3ChildOutcome {
1027 let dest = meta_dir.join(child.effective_path());
1028 if !dest.join(".grex").join("pack.yaml").is_file() {
1029 return Phase3ChildOutcome::Skipped;
1030 }
1031 // v1.2.2 cycle detection — discharges the
1032 // `sync_meta_no_cycle_infinite_clone` Lean theorem in
1033 // `proof/Grex/Walker.lean`. Identity is `url@ref` so the same
1034 // repo at two different refs is two distinct packs (intentional:
1035 // matches `pack_identity_for_child` and the build_graph cycle
1036 // detector at `graph_build.rs:174`). A single `Vec<String>`
1037 // doubles as O(depth) contains-check AND deterministic chain for
1038 // error display — depth is bounded ~5-10 in practice so linear
1039 // scan beats hashing here.
1040 //
1041 // v1.2.3 (B1): runs BEFORE the depth-cap early-return below so a
1042 // cycle longer than `max_depth` cannot hide behind truncation.
1043 let id = pack_identity_for_child(child);
1044 if visited.iter().any(|v| v == &id) {
1045 let mut chain = visited.to_vec();
1046 chain.push(id);
1047 return Phase3ChildOutcome::Failed(TreeError::CycleDetected { chain });
1048 }
1049 // v1.2.3 (B1): depth-cap check moved from `phase3_recurse` to
1050 // here, AFTER the cycle check. `Skipped` rather than a hard error
1051 // because depth-cap truncation is a benign best-effort knob —
1052 // siblings further down the manifest tree should still classify.
1053 if let Some(cap) = opts.max_depth {
1054 if next_depth > cap {
1055 return Phase3ChildOutcome::Skipped;
1056 }
1057 }
1058 // Clone-per-child (A.1): each rayon iteration owns its own
1059 // ancestor view, so disjoint sibling branches do not see each
1060 // other on the path. A diamond where two siblings legitimately
1061 // depend on the same descendant is therefore not a cycle.
1062 let mut child_visited = visited.to_vec();
1063 child_visited.push(id);
1064 // Empty `prune_candidates` for the sub-meta — 1.h supplies the
1065 // sub-meta's distributed lockfile read via the same caller
1066 // pathway when it lands.
1067 match sync_meta_inner(&dest, backend, loader, opts, &[], next_depth, &child_visited) {
1068 Ok(sub) => Phase3ChildOutcome::Recursed(sub),
1069 Err(e) => Phase3ChildOutcome::Failed(e),
1070 }
1071}
1072
1073#[allow(clippy::too_many_arguments)]
1074fn phase3_recurse(
1075 pool: &rayon::ThreadPool,
1076 meta_dir: &Path,
1077 manifest: &PackManifest,
1078 backend: &dyn GitBackend,
1079 loader: &dyn PackLoader,
1080 opts: &SyncMetaOptions,
1081 depth: usize,
1082 visited: &[String],
1083 report: &mut SyncMetaReport,
1084) -> Result<(), TreeError> {
1085 if !opts.recurse {
1086 return Ok(());
1087 }
1088 let next_depth = depth + 1;
1089 // v1.2.3 (B1): depth-cap early-return removed from this site —
1090 // moved into `phase3_handle_child` AFTER the cycle check so a
1091 // cycle longer than `max_depth` cannot mask itself by tripping
1092 // the depth cap before the cycle test fires. The per-child
1093 // handler now treats `next_depth > cap` as `Skipped`.
1094 let outcomes: Vec<Phase3ChildOutcome> = pool.install(|| {
1095 manifest
1096 .children
1097 .par_iter()
1098 .map(|child| {
1099 phase3_handle_child(meta_dir, child, backend, loader, opts, next_depth, visited)
1100 })
1101 .collect()
1102 });
1103 // Cycle errors short-circuit (catastrophic — clone-storm risk);
1104 // every other outcome folds into the report per the existing
1105 // fail-loud-but-continue policy.
1106 let mut first_cycle_idx: Option<usize> = None;
1107 for outcome in outcomes {
1108 match outcome {
1109 Phase3ChildOutcome::Skipped => {}
1110 Phase3ChildOutcome::Recursed(sub) => report.merge(sub),
1111 Phase3ChildOutcome::Failed(e) => {
1112 // v1.2.2 fix: surface all sibling cycles in
1113 // report.errors; first cycle returned as short-circuit
1114 // Err per fail-loud policy.
1115 if matches!(e, TreeError::CycleDetected { .. }) && first_cycle_idx.is_none() {
1116 first_cycle_idx = Some(report.errors.len());
1117 }
1118 report.errors.push(e);
1119 }
1120 }
1121 }
1122 if let Some(idx) = first_cycle_idx {
1123 // Clone the cycle to return as the short-circuit Err while
1124 // leaving the original entry (and any sibling cycles) recorded
1125 // in report.errors for the caller to log/print.
1126 let TreeError::CycleDetected { chain } = &report.errors[idx] else {
1127 unreachable!("first_cycle_idx points at a CycleDetected variant by construction");
1128 };
1129 return Err(TreeError::CycleDetected { chain: chain.clone() });
1130 }
1131 Ok(())
1132}
1133
1134#[cfg(test)]
1135mod tests {
1136 use super::*;
1137
1138 /// Direct unit test of the synthesis helper — name must equal the
1139 /// child's `effective_path()`, type must be `Scripted`, and every
1140 /// list field must be empty.
1141 #[test]
1142 fn synthesize_plain_git_manifest_yields_leaf_scripted_pack() {
1143 let child = ChildRef {
1144 url: "https://example.com/algo-leet.git".to_string(),
1145 path: None,
1146 r#ref: None,
1147 };
1148 let manifest = synthesize_plain_git_manifest(&child);
1149 assert_eq!(manifest.name, child.effective_path());
1150 assert_eq!(manifest.name, "algo-leet");
1151 assert_eq!(manifest.r#type, PackType::Scripted);
1152 assert_eq!(manifest.schema_version.as_str(), "1");
1153 assert!(manifest.depends_on.is_empty());
1154 assert!(manifest.children.is_empty());
1155 assert!(manifest.actions.is_empty());
1156 assert!(manifest.teardown.is_none());
1157 assert!(manifest.extensions.is_empty());
1158 assert!(manifest.version.is_none());
1159 }
1160
1161 /// Explicit `path:` override wins over the URL-derived bare name —
1162 /// confirms the synthesised manifest's `name` mirrors what the
1163 /// parent declared, so `verify_child_name` passes by construction.
1164 #[test]
1165 fn synthesize_plain_git_manifest_honours_explicit_path() {
1166 let child = ChildRef {
1167 url: "https://example.com/some-repo.git".to_string(),
1168 path: Some("custom-name".to_string()),
1169 r#ref: None,
1170 };
1171 let manifest = synthesize_plain_git_manifest(&child);
1172 assert_eq!(manifest.name, "custom-name");
1173 }
1174
1175 /// `dest_has_git_repo` MUST refuse a symlinked destination — even
1176 /// when the symlink target carries a real `.git/` directory.
1177 /// Otherwise a malicious parent pack could redirect synthesis to
1178 /// fetch into `$HOME` (or any sibling repo) by relying on a
1179 /// pre-existing symlink in the workspace.
1180 #[test]
1181 fn dest_has_git_repo_rejects_symlinked_dest() {
1182 // Skip on platforms where unprivileged symlink creation fails
1183 // (notably Windows without Developer Mode). Failing the symlink
1184 // call is itself proof the attack vector is closed for that
1185 // host, so the rest of the test is moot.
1186 let outer = tempfile::tempdir().unwrap();
1187 let real = outer.path().join("real-repo");
1188 std::fs::create_dir_all(real.join(".git")).unwrap();
1189 let link = outer.path().join("via-link");
1190
1191 #[cfg(unix)]
1192 let symlink_result = std::os::unix::fs::symlink(&real, &link);
1193 #[cfg(windows)]
1194 let symlink_result = std::os::windows::fs::symlink_dir(&real, &link);
1195
1196 if symlink_result.is_err() {
1197 // Host won't let us create a symlink — nothing to test.
1198 return;
1199 }
1200
1201 // Sanity: following the symlink would reveal `.git`.
1202 assert!(link.join(".git").exists(), "symlink target should expose .git through traversal");
1203 // But `dest_has_git_repo` must refuse it.
1204 assert!(
1205 !dest_has_git_repo(&link),
1206 "dest_has_git_repo must refuse a symlinked destination even when target has .git"
1207 );
1208 // Real (non-symlinked) sibling still passes — we haven't
1209 // accidentally broken the happy path.
1210 assert!(dest_has_git_repo(&real));
1211 }
1212
1213 // -----------------------------------------------------------------
1214 // v1.2.0 Stage 1.g — `sync_meta` three-phase walker tests (TDD).
1215 //
1216 // These tests use a thin in-memory `MockLoader` plus
1217 // `MockGitBackend` so the walker's PHASE ORCHESTRATION (not the
1218 // backend mechanics) is what's being exercised. The git-touching
1219 // primitives `classify_dest` (1.e) and `phase2_prune` (1.f) have
1220 // their own per-host tests that already cover the real-FS-and-git
1221 // path. The `host_has_git_binary` gate guards the few tests that
1222 // need a working `git` to materialise a clean `PresentDeclared`
1223 // verdict — same precedent as the `dest_class::tests` host-skip
1224 // pattern.
1225 // -----------------------------------------------------------------
1226
1227 use std::collections::HashMap;
1228 use std::sync::Mutex;
1229
1230 /// Minimal stand-in `PackLoader` for the v1.2.0 tests. Maps
1231 /// `meta_dir` → `PackManifest` directly so we never touch disk
1232 /// for manifest reads.
1233 struct InMemLoader {
1234 manifests: HashMap<PathBuf, PackManifest>,
1235 }
1236
1237 impl InMemLoader {
1238 fn new() -> Self {
1239 Self { manifests: HashMap::new() }
1240 }
1241 fn with(mut self, dir: impl Into<PathBuf>, m: PackManifest) -> Self {
1242 self.manifests.insert(dir.into(), m);
1243 self
1244 }
1245 }
1246
1247 impl PackLoader for InMemLoader {
1248 fn load(&self, path: &Path) -> Result<PackManifest, TreeError> {
1249 self.manifests
1250 .get(path)
1251 .cloned()
1252 .ok_or_else(|| TreeError::ManifestNotFound(path.to_path_buf()))
1253 }
1254 }
1255
1256 /// Minimal stand-in `GitBackend`. Records every call so tests can
1257 /// assert phase orchestration. `clone` materialises a `.git/`
1258 /// under the supplied dest so subsequent classify probes treat the
1259 /// slot as Present.
1260 #[allow(dead_code)] // fields populated for future test introspection.
1261 #[derive(Debug, Clone)]
1262 enum BackendCall {
1263 Clone { url: String, dest: PathBuf, r#ref: Option<String> },
1264 Fetch { dest: PathBuf },
1265 Checkout { dest: PathBuf, r#ref: String },
1266 HeadSha { dest: PathBuf },
1267 }
1268
1269 struct InMemGit {
1270 calls: Mutex<Vec<BackendCall>>,
1271 materialise_on_clone: bool,
1272 }
1273
1274 impl InMemGit {
1275 fn new() -> Self {
1276 Self { calls: Mutex::new(Vec::new()), materialise_on_clone: true }
1277 }
1278 fn calls(&self) -> Vec<BackendCall> {
1279 self.calls.lock().unwrap().clone()
1280 }
1281 }
1282
1283 impl GitBackend for InMemGit {
1284 fn name(&self) -> &'static str {
1285 "v1_2_0-mock-git"
1286 }
1287 fn clone(
1288 &self,
1289 url: &str,
1290 dest: &Path,
1291 r#ref: Option<&str>,
1292 ) -> Result<crate::ClonedRepo, crate::GitError> {
1293 self.calls.lock().unwrap().push(BackendCall::Clone {
1294 url: url.to_string(),
1295 dest: dest.to_path_buf(),
1296 r#ref: r#ref.map(str::to_string),
1297 });
1298 if self.materialise_on_clone {
1299 std::fs::create_dir_all(dest.join(".git")).unwrap();
1300 }
1301 Ok(crate::ClonedRepo { path: dest.to_path_buf(), head_sha: "0".repeat(40) })
1302 }
1303 fn fetch(&self, dest: &Path) -> Result<(), crate::GitError> {
1304 self.calls.lock().unwrap().push(BackendCall::Fetch { dest: dest.to_path_buf() });
1305 Ok(())
1306 }
1307 fn checkout(&self, dest: &Path, r#ref: &str) -> Result<(), crate::GitError> {
1308 self.calls
1309 .lock()
1310 .unwrap()
1311 .push(BackendCall::Checkout { dest: dest.to_path_buf(), r#ref: r#ref.to_string() });
1312 Ok(())
1313 }
1314 fn head_sha(&self, dest: &Path) -> Result<String, crate::GitError> {
1315 self.calls.lock().unwrap().push(BackendCall::HeadSha { dest: dest.to_path_buf() });
1316 Ok("0".repeat(40))
1317 }
1318 }
1319
1320 /// Build a meta manifest with the supplied children.
1321 fn meta_manifest_with(name: &str, children: Vec<ChildRef>) -> PackManifest {
1322 PackManifest {
1323 schema_version: SchemaVersion::current(),
1324 name: name.to_string(),
1325 r#type: PackType::Meta,
1326 version: None,
1327 depends_on: Vec::new(),
1328 children,
1329 actions: Vec::new(),
1330 teardown: None,
1331 extensions: BTreeMap::new(),
1332 }
1333 }
1334
1335 fn child(url: &str, path: &str) -> ChildRef {
1336 ChildRef { url: url.to_string(), path: Some(path.to_string()), r#ref: None }
1337 }
1338
1339 fn host_has_git_binary() -> bool {
1340 std::process::Command::new("git")
1341 .arg("--version")
1342 .output()
1343 .is_ok_and(|o| o.status.success())
1344 }
1345
1346 /// Empty meta — no children → the walker returns Ok with no work.
1347 #[test]
1348 fn test_walker_v1_2_0_simple_meta_no_children() {
1349 let tmp = tempfile::tempdir().unwrap();
1350 let meta_dir = tmp.path().to_path_buf();
1351 let loader = InMemLoader::new().with(meta_dir.clone(), meta_manifest_with("solo", vec![]));
1352 let backend = InMemGit::new();
1353 let opts = SyncMetaOptions::default();
1354 let report = sync_meta(&meta_dir, &backend, &loader, &opts, &[]).expect("ok");
1355 assert_eq!(report.metas_visited, 1);
1356 assert!(report.phase1_classifications.is_empty());
1357 assert!(report.phase2_pruned.is_empty());
1358 assert!(report.errors.is_empty());
1359 assert!(backend.calls().is_empty(), "no children → no git ops");
1360 }
1361
1362 /// Phase 1 classifies each child. With every dest absent on disk,
1363 /// every classification is `Missing` and the backend sees one
1364 /// `Clone` per child.
1365 #[test]
1366 fn test_walker_v1_2_0_phase1_classifies_each_child() {
1367 let tmp = tempfile::tempdir().unwrap();
1368 let meta_dir = tmp.path().to_path_buf();
1369 let kids = vec![
1370 child("https://example.com/a.git", "alpha"),
1371 child("https://example.com/b.git", "beta"),
1372 ];
1373 let loader =
1374 InMemLoader::new().with(meta_dir.clone(), meta_manifest_with("root", kids.clone()));
1375 let backend = InMemGit::new();
1376 let opts = SyncMetaOptions { recurse: false, ..SyncMetaOptions::default() };
1377 let report = sync_meta(&meta_dir, &backend, &loader, &opts, &[]).expect("ok");
1378 assert_eq!(report.phase1_classifications.len(), 2);
1379 for (parent, _, class) in &report.phase1_classifications {
1380 assert_eq!(parent, &meta_dir);
1381 assert_eq!(*class, DestClass::Missing);
1382 }
1383 assert!(report.errors.is_empty());
1384 let calls = backend.calls();
1385 assert_eq!(calls.len(), 2, "one clone per child");
1386 for call in calls {
1387 assert!(matches!(call, BackendCall::Clone { .. }));
1388 }
1389 }
1390
1391 /// Phase 1 must aggregate every undeclared `.git/` directory it
1392 /// encounters into a single `UntrackedGitRepos` error. We
1393 /// pre-create two `.git/` slots BEFORE running `sync_meta` and
1394 /// declare them as siblings without paths matching — they classify
1395 /// as `PresentUndeclared` because the manifest does not list them.
1396 #[test]
1397 fn test_walker_v1_2_0_phase1_aggregates_untracked_error() {
1398 // Build a meta whose manifest declares ZERO children — every
1399 // pre-existing `.git/` slot is by definition undeclared.
1400 // Then drop two `.git/` directories under the meta dir and
1401 // (because v1.2.0's classifier needs the manifest declaration
1402 // signal at the call site, not on-disk discovery) run a
1403 // PARALLEL classifier sweep over the on-disk dirs to feed the
1404 // aggregator. This mirrors the way 1.h's lockfile-orphan
1405 // sweep will surface PresentUndeclared dirs into Phase 1's
1406 // collector when a child is removed from the manifest.
1407 let tmp = tempfile::tempdir().unwrap();
1408 let alpha = tmp.path().join("alpha");
1409 let beta = tmp.path().join("beta");
1410 std::fs::create_dir_all(alpha.join(".git")).unwrap();
1411 std::fs::create_dir_all(beta.join(".git")).unwrap();
1412 // Direct unit on the aggregator: feed two `PresentUndeclared`
1413 // pairs and assert the error carries both.
1414 let pairs: Vec<(PathBuf, DestClass)> = vec![
1415 (alpha.clone(), DestClass::PresentUndeclared),
1416 (beta.clone(), DestClass::PresentUndeclared),
1417 ];
1418 let err = aggregate_untracked(pairs).expect_err("two undeclared → error");
1419 match err {
1420 TreeError::UntrackedGitRepos { paths } => {
1421 assert_eq!(paths, vec![alpha, beta]);
1422 }
1423 other => panic!("expected UntrackedGitRepos, got {other:?}"),
1424 }
1425 }
1426
1427 /// Phase 2 prunes a clean orphan: the supplied candidate has a
1428 /// real `.git/` (initialised by `git init`), the consent walk
1429 /// returns Clean, the dest is removed.
1430 #[test]
1431 fn test_walker_v1_2_0_phase2_prunes_clean_orphans() {
1432 if !host_has_git_binary() {
1433 return;
1434 }
1435 let tmp = tempfile::tempdir().unwrap();
1436 let meta_dir = tmp.path().to_path_buf();
1437 // Create the orphan dest — clean repo, no manifest entry.
1438 let orphan = meta_dir.join("ghost");
1439 std::fs::create_dir_all(&orphan).unwrap();
1440 let init =
1441 std::process::Command::new("git").arg("-C").arg(&orphan).args(["init", "-q"]).status();
1442 if !matches!(init, Ok(s) if s.success()) {
1443 return;
1444 }
1445 let loader = InMemLoader::new().with(meta_dir.clone(), meta_manifest_with("root", vec![]));
1446 let backend = InMemGit::new();
1447 let opts = SyncMetaOptions { recurse: false, ..SyncMetaOptions::default() };
1448 let prune_list = vec![PathBuf::from("ghost")];
1449 let report = sync_meta(&meta_dir, &backend, &loader, &opts, &prune_list).expect("ok");
1450 assert_eq!(report.phase2_pruned.len(), 1, "clean orphan must be pruned");
1451 assert_eq!(report.phase2_pruned[0], orphan);
1452 assert!(!orphan.exists(), "dest must be removed after a clean prune");
1453 assert!(report.errors.is_empty());
1454 }
1455
1456 /// Phase 2 must REFUSE to prune a dirty orphan absent the override
1457 /// flag. The consent walk classifies it `DirtyTree`; the walker
1458 /// surfaces `DirtyTreeRefusal` and leaves the dest untouched.
1459 #[test]
1460 fn test_walker_v1_2_0_phase2_refuses_dirty_orphan() {
1461 if !host_has_git_binary() {
1462 return;
1463 }
1464 let tmp = tempfile::tempdir().unwrap();
1465 let meta_dir = tmp.path().to_path_buf();
1466 let orphan = meta_dir.join("dirty-ghost");
1467 std::fs::create_dir_all(&orphan).unwrap();
1468 let init =
1469 std::process::Command::new("git").arg("-C").arg(&orphan).args(["init", "-q"]).status();
1470 if !matches!(init, Ok(s) if s.success()) {
1471 return;
1472 }
1473 std::fs::write(orphan.join("scratch.txt"), b"unsaved").unwrap();
1474 let loader = InMemLoader::new().with(meta_dir.clone(), meta_manifest_with("root", vec![]));
1475 let backend = InMemGit::new();
1476 let opts = SyncMetaOptions { recurse: false, ..SyncMetaOptions::default() };
1477 let prune_list = vec![PathBuf::from("dirty-ghost")];
1478 let report = sync_meta(&meta_dir, &backend, &loader, &opts, &prune_list).expect("ok");
1479 assert!(report.phase2_pruned.is_empty(), "dirty orphan must NOT be pruned");
1480 assert!(orphan.exists(), "dest stays on disk when refused");
1481 assert_eq!(report.errors.len(), 1);
1482 assert!(matches!(report.errors[0], TreeError::DirtyTreeRefusal { .. }));
1483 }
1484
1485 /// Phase 3 recurses into a child meta when its `.grex/pack.yaml`
1486 /// exists. The sub-meta's own `metas_visited` is folded into the
1487 /// parent's report.
1488 #[test]
1489 fn test_walker_v1_2_0_phase3_recurses_into_sub_meta() {
1490 let tmp = tempfile::tempdir().unwrap();
1491 let meta_dir = tmp.path().to_path_buf();
1492 let child_dest = meta_dir.join("sub");
1493 // Pre-materialise the sub-meta on disk so Phase 1 classifies
1494 // the dest as PresentDeclared (no clone fired) and Phase 3
1495 // sees a `.grex/pack.yaml` to recurse into.
1496 make_sub_meta_on_disk(&child_dest, "sub");
1497 let loader = InMemLoader::new()
1498 .with(
1499 meta_dir.clone(),
1500 meta_manifest_with("root", vec![child("https://example.com/sub.git", "sub")]),
1501 )
1502 .with(child_dest.clone(), meta_manifest_with("sub", vec![]));
1503 let backend = InMemGit::new();
1504 let opts = SyncMetaOptions::default();
1505 let report = sync_meta(&meta_dir, &backend, &loader, &opts, &[]).expect("ok");
1506 assert_eq!(report.metas_visited, 2, "parent + sub-meta visited");
1507 assert!(report.errors.is_empty());
1508 }
1509
1510 /// `recurse: false` skips Phase 3 entirely — `metas_visited == 1`
1511 /// even when a child has a `.grex/pack.yaml`.
1512 #[test]
1513 fn test_walker_v1_2_0_phase3_max_depth_zero_skips_recursion() {
1514 let tmp = tempfile::tempdir().unwrap();
1515 let meta_dir = tmp.path().to_path_buf();
1516 let child_dest = meta_dir.join("sub");
1517 make_sub_meta_on_disk(&child_dest, "sub");
1518 let loader = InMemLoader::new()
1519 .with(
1520 meta_dir.clone(),
1521 meta_manifest_with("root", vec![child("https://example.com/sub.git", "sub")]),
1522 )
1523 .with(child_dest.clone(), meta_manifest_with("sub", vec![]));
1524 let backend = InMemGit::new();
1525 let opts = SyncMetaOptions { recurse: false, ..SyncMetaOptions::default() };
1526 let report = sync_meta(&meta_dir, &backend, &loader, &opts, &[]).expect("ok");
1527 assert_eq!(report.metas_visited, 1, "no recursion → only the root meta");
1528 }
1529
1530 /// `max_depth: Some(N)` caps recursion at N levels of nesting.
1531 /// Build a 3-level chain (root → mid → leaf) and assert
1532 /// `max_depth: Some(1)` visits root + mid (depth 0 + 1) but NOT
1533 /// leaf (depth 2).
1534 #[test]
1535 fn test_walker_v1_2_0_phase3_max_depth_n_stops_at_n_levels() {
1536 let tmp = tempfile::tempdir().unwrap();
1537 let root_dir = tmp.path().to_path_buf();
1538 let mid_dir = root_dir.join("mid");
1539 let leaf_dir = mid_dir.join("leaf");
1540 make_sub_meta_on_disk(&mid_dir, "mid");
1541 make_sub_meta_on_disk(&leaf_dir, "leaf");
1542 let loader = InMemLoader::new()
1543 .with(
1544 root_dir.clone(),
1545 meta_manifest_with("root", vec![child("https://example.com/mid.git", "mid")]),
1546 )
1547 .with(
1548 mid_dir.clone(),
1549 meta_manifest_with("mid", vec![child("https://example.com/leaf.git", "leaf")]),
1550 )
1551 .with(leaf_dir.clone(), meta_manifest_with("leaf", vec![]));
1552 let backend = InMemGit::new();
1553 let opts = SyncMetaOptions { max_depth: Some(1), ..SyncMetaOptions::default() };
1554 let report = sync_meta(&root_dir, &backend, &loader, &opts, &[]).expect("ok");
1555 // depth 0 = root, depth 1 = mid → max_depth: Some(1) visits
1556 // root + mid (2 metas) and stops before recursing into leaf.
1557 assert_eq!(report.metas_visited, 2, "max_depth: Some(1) visits root + mid only");
1558 }
1559
1560 /// Helper: pre-populate a sub-meta directory at `dir` with a
1561 /// `.grex/pack.yaml` carrying `name` and a stub `.git/` so the
1562 /// classifier sees it as PresentDeclared.
1563 fn make_sub_meta_on_disk(dir: &Path, name: &str) {
1564 std::fs::create_dir_all(dir.join(".grex")).unwrap();
1565 std::fs::create_dir_all(dir.join(".git")).unwrap();
1566 let yaml = format!("schema_version: \"1\"\nname: {name}\ntype: meta\n");
1567 std::fs::write(dir.join(".grex/pack.yaml"), yaml).unwrap();
1568 }
1569
1570 /// Helper: collect the destinations Phase 1 recorded for a given
1571 /// parent meta from the rolled-up report.
1572 fn destinations_under(report: &SyncMetaReport, parent: &Path) -> Vec<PathBuf> {
1573 report
1574 .phase1_classifications
1575 .iter()
1576 .filter(|(p, _, _)| p == parent)
1577 .map(|(_, d, _)| d.clone())
1578 .collect()
1579 }
1580
1581 /// Parent-relative path resolution: a child declared at the root
1582 /// meta resolves to `<root>/<child>` — NOT to a global workspace
1583 /// anchor. Recursion into that child uses `<root>/<child>` as the
1584 /// new parent meta dir for resolving the grandchild.
1585 #[test]
1586 fn test_walker_v1_2_0_parent_relative_path_resolution() {
1587 let tmp = tempfile::tempdir().unwrap();
1588 let root_dir = tmp.path().to_path_buf();
1589 // Note: 1.c's path-segment validator forbids slashes in the
1590 // `path:` field, so multi-segment nesting is achieved by
1591 // chaining single-segment children across recursion frames.
1592 let tools_dir = root_dir.join("tools");
1593 let foo_dir = tools_dir.join("foo");
1594 make_sub_meta_on_disk(&tools_dir, "tools");
1595 make_sub_meta_on_disk(&foo_dir, "foo");
1596 let loader = InMemLoader::new()
1597 .with(
1598 root_dir.clone(),
1599 meta_manifest_with("root", vec![child("https://example.com/tools.git", "tools")]),
1600 )
1601 .with(
1602 tools_dir.clone(),
1603 meta_manifest_with("tools", vec![child("https://example.com/foo.git", "foo")]),
1604 )
1605 .with(foo_dir.clone(), meta_manifest_with("foo", vec![]));
1606 let backend = InMemGit::new();
1607 let opts = SyncMetaOptions::default();
1608 let report = sync_meta(&root_dir, &backend, &loader, &opts, &[]).expect("ok");
1609 // Three metas visited: root → tools → foo.
1610 assert_eq!(report.metas_visited, 3);
1611 // Phase 1 classifications confirm parent-relative resolution:
1612 // every recorded dest is a SUBDIR of its recorded parent.
1613 for (parent, dest, _class) in &report.phase1_classifications {
1614 assert!(
1615 dest.starts_with(parent),
1616 "child dest {} must descend from parent {}",
1617 dest.display(),
1618 parent.display()
1619 );
1620 }
1621 // Spot-check the chain: root sees `tools`, tools sees `foo`.
1622 assert_eq!(destinations_under(&report, &root_dir), vec![tools_dir.clone()]);
1623 assert_eq!(destinations_under(&report, &tools_dir), vec![foo_dir.clone()]);
1624 }
1625
1626 // -----------------------------------------------------------------
1627 // v1.2.2 — `sync_meta` cycle detection (Phase 3 recursion edge).
1628 //
1629 // Discharges `sync_meta_no_cycle_infinite_clone` in
1630 // `proof/Grex/Walker.lean`. Identity scheme is `url@ref` so the
1631 // same repo at two different refs is NOT a cycle (covered by the
1632 // positive case below).
1633 // -----------------------------------------------------------------
1634
1635 /// `child_with_ref` mirrors `child()` but lets the caller pin a
1636 /// specific ref so two children of the same URL get distinct
1637 /// `pack_identity_for_child` strings (`url@ref`).
1638 fn child_with_ref(url: &str, path: &str, r#ref: &str) -> ChildRef {
1639 ChildRef {
1640 url: url.to_string(),
1641 path: Some(path.to_string()),
1642 r#ref: Some(r#ref.to_string()),
1643 }
1644 }
1645
1646 /// Self-loop: pack A declares itself (same URL, no ref) as a child.
1647 /// The walker must abort with `CycleDetected` rather than recurse
1648 /// infinitely. The chain reports the recurring identity.
1649 #[test]
1650 fn cycle_self_loop_aborts() {
1651 let tmp = tempfile::tempdir().unwrap();
1652 let root_dir = tmp.path().to_path_buf();
1653 // Lay out a self-pointing pack: `<root>/a` is a sub-meta whose
1654 // own manifest declares a child with the SAME URL/ref pointing
1655 // back at itself (placed at a fresh path so on-disk dest is
1656 // distinct, but pack identity collides).
1657 let a_dir = root_dir.join("a");
1658 let a_self_dir = a_dir.join("a");
1659 make_sub_meta_on_disk(&a_dir, "a");
1660 make_sub_meta_on_disk(&a_self_dir, "a");
1661 let url_a = "https://example.com/a.git";
1662 let loader = InMemLoader::new()
1663 .with(root_dir.clone(), meta_manifest_with("root", vec![child(url_a, "a")]))
1664 // `a` declares itself — same url, same (empty) ref → same identity.
1665 .with(a_dir.clone(), meta_manifest_with("a", vec![child(url_a, "a")]))
1666 .with(a_self_dir.clone(), meta_manifest_with("a", vec![]));
1667 let backend = InMemGit::new();
1668 let opts = SyncMetaOptions::default();
1669 let err =
1670 sync_meta(&root_dir, &backend, &loader, &opts, &[]).expect_err("self-loop must abort");
1671 match err {
1672 TreeError::CycleDetected { chain } => {
1673 // v1.2.3 (B4): chain begins with the root's
1674 // path-namespaced identity (`path:<root_dir>`) — the
1675 // initial visited seed — followed by the cyclic
1676 // child identities. v1.2.3 (B2): empty/None ref drops
1677 // the trailing `@`, so the cyclic id is just
1678 // `url:<url_a>` (no `@`).
1679 let id_a = format!("url:{url_a}");
1680 assert!(
1681 chain.iter().any(|s| s == &id_a),
1682 "chain must mention the cyclic url, got {chain:?}"
1683 );
1684 assert!(chain.len() >= 2, "self-loop chain has at least 2 entries: {chain:?}");
1685 let last = chain.last().unwrap();
1686 assert_eq!(last, &id_a, "chain must end with the recurring child identity");
1687 let first_match = chain.iter().position(|s| s == last).unwrap();
1688 assert!(
1689 first_match < chain.len() - 1,
1690 "the recurring identity must appear earlier in the chain: {chain:?}"
1691 );
1692 // The root frame is path-namespaced and disjoint from
1693 // any child's url-namespaced identity, so it must
1694 // appear at the head of the chain without colliding.
1695 assert!(
1696 chain[0].starts_with("path:"),
1697 "chain head is the root path identity: {chain:?}"
1698 );
1699 }
1700 other => panic!("expected CycleDetected, got {other:?}"),
1701 }
1702 }
1703
1704 /// Three-node cycle: A → B → C → A. The walker must abort with
1705 /// `CycleDetected` and the chain must list all three identities
1706 /// in the order they were entered, ending with the recurring A.
1707 #[test]
1708 fn cycle_three_node_aborts() {
1709 let tmp = tempfile::tempdir().unwrap();
1710 let root_dir = tmp.path().to_path_buf();
1711 // Disk layout: root → a → b → c → a (the second `a` lives at
1712 // a fresh on-disk slot so classification succeeds; identity
1713 // collision is what trips the cycle detector, not the path).
1714 let a_dir = root_dir.join("a");
1715 let b_dir = a_dir.join("b");
1716 let c_dir = b_dir.join("c");
1717 let a2_dir = c_dir.join("a");
1718 make_sub_meta_on_disk(&a_dir, "a");
1719 make_sub_meta_on_disk(&b_dir, "b");
1720 make_sub_meta_on_disk(&c_dir, "c");
1721 make_sub_meta_on_disk(&a2_dir, "a");
1722 let url_a = "https://example.com/a.git";
1723 let url_b = "https://example.com/b.git";
1724 let url_c = "https://example.com/c.git";
1725 let loader = InMemLoader::new()
1726 .with(root_dir.clone(), meta_manifest_with("root", vec![child(url_a, "a")]))
1727 .with(a_dir.clone(), meta_manifest_with("a", vec![child(url_b, "b")]))
1728 .with(b_dir.clone(), meta_manifest_with("b", vec![child(url_c, "c")]))
1729 // c re-declares a → cycle.
1730 .with(c_dir.clone(), meta_manifest_with("c", vec![child(url_a, "a")]))
1731 .with(a2_dir.clone(), meta_manifest_with("a", vec![]));
1732 let backend = InMemGit::new();
1733 let opts = SyncMetaOptions::default();
1734 let err = sync_meta(&root_dir, &backend, &loader, &opts, &[])
1735 .expect_err("three-node cycle must abort");
1736 match err {
1737 TreeError::CycleDetected { chain } => {
1738 // v1.2.3 (B4): chain leads with the root's
1739 // path-namespaced identity. v1.2.3 (B2): empty/None
1740 // ref drops the trailing `@`. Chain order:
1741 // [path:root, a, b, c, a] (entry order, with the
1742 // recurring `a` appended at the cycle-detection point).
1743 let id_root = pack_identity_for_root(&root_dir);
1744 let id_a = format!("url:{url_a}");
1745 let id_b = format!("url:{url_b}");
1746 let id_c = format!("url:{url_c}");
1747 assert_eq!(chain, vec![id_root, id_a.clone(), id_b, id_c, id_a]);
1748 }
1749 other => panic!("expected CycleDetected, got {other:?}"),
1750 }
1751 }
1752
1753 /// Same repo, two refs — NOT a cycle. Pack A declares two children
1754 /// pointing at the SAME URL but pinned to different refs (`main`
1755 /// vs `dev`). Identity scheme is `url@ref` so the two siblings
1756 /// have distinct identities and the walker must succeed.
1757 #[test]
1758 fn same_repo_two_refs_no_cycle() {
1759 let tmp = tempfile::tempdir().unwrap();
1760 let root_dir = tmp.path().to_path_buf();
1761 let main_dir = root_dir.join("b-main");
1762 let dev_dir = root_dir.join("b-dev");
1763 make_sub_meta_on_disk(&main_dir, "b-main");
1764 make_sub_meta_on_disk(&dev_dir, "b-dev");
1765 let url_b = "https://example.com/b.git";
1766 let loader = InMemLoader::new()
1767 .with(
1768 root_dir.clone(),
1769 meta_manifest_with(
1770 "root",
1771 vec![
1772 child_with_ref(url_b, "b-main", "main"),
1773 child_with_ref(url_b, "b-dev", "dev"),
1774 ],
1775 ),
1776 )
1777 .with(main_dir.clone(), meta_manifest_with("b-main", vec![]))
1778 .with(dev_dir.clone(), meta_manifest_with("b-dev", vec![]));
1779 let backend = InMemGit::new();
1780 let opts = SyncMetaOptions::default();
1781 let report = sync_meta(&root_dir, &backend, &loader, &opts, &[])
1782 .expect("same url at distinct refs is NOT a cycle");
1783 // Three metas visited: root + b@main + b@dev.
1784 assert_eq!(report.metas_visited, 3);
1785 assert!(
1786 report.errors.is_empty(),
1787 "no errors expected when the two children differ only by ref: {:?}",
1788 report.errors
1789 );
1790 }
1791
1792 /// Same repo, two refs — NESTED (ancestor-stack) variant. Pack A
1793 /// (URL=foo, ref=main) declares pack B (URL=foo, ref=dev) as its
1794 /// child. Identity scheme is `url@ref`, so A's identity
1795 /// (`url:foo@main`) and B's identity (`url:foo@dev`) differ. The
1796 /// cycle detector must NOT trip even though B's URL collides with
1797 /// an ancestor on the stack — exercises the path the sibling
1798 /// variant above doesn't reach.
1799 #[test]
1800 fn same_repo_two_refs_nested_no_cycle() {
1801 let tmp = tempfile::tempdir().unwrap();
1802 let root_dir = tmp.path().to_path_buf();
1803 let a_dir = root_dir.join("a");
1804 let b_dir = a_dir.join("b");
1805 make_sub_meta_on_disk(&a_dir, "a");
1806 make_sub_meta_on_disk(&b_dir, "b");
1807 let url_foo = "https://example.com/foo.git";
1808 let loader = InMemLoader::new()
1809 .with(
1810 root_dir.clone(),
1811 meta_manifest_with("root", vec![child_with_ref(url_foo, "a", "main")]),
1812 )
1813 // a (foo@main) declares b (foo@dev) — same URL, different ref.
1814 .with(a_dir.clone(), meta_manifest_with("a", vec![child_with_ref(url_foo, "b", "dev")]))
1815 .with(b_dir.clone(), meta_manifest_with("b", vec![]));
1816 let backend = InMemGit::new();
1817 let opts = SyncMetaOptions::default();
1818 let report = sync_meta(&root_dir, &backend, &loader, &opts, &[])
1819 .expect("nested same-url at distinct refs is NOT a cycle");
1820 // Walker must reach depth 2: root → a → b (3 metas).
1821 assert_eq!(report.metas_visited, 3, "walker must recurse to depth 2");
1822 assert!(
1823 report.errors.is_empty(),
1824 "no errors expected when ancestor and descendant differ only by ref: {:?}",
1825 report.errors
1826 );
1827 }
1828
1829 // -----------------------------------------------------------------
1830 // v1.2.3 — additional cycle/diamond coverage (T1, T2, T3).
1831 //
1832 // T1 covers the diamond-shared-descendant case the
1833 // clone-per-child scheme is meant to permit; T2 stretches the
1834 // cycle to length 4 to exercise chain accumulation; T3 verifies
1835 // the cycle detector sees a cycle introduced inside an inner
1836 // subtree even though the outer arm is acyclic.
1837 // -----------------------------------------------------------------
1838
1839 /// T1 — Diamond, NO cycle. Topology:
1840 ///
1841 /// ```text
1842 /// root → A
1843 /// root → B
1844 /// A → C
1845 /// B → C (C is a shared descendant)
1846 /// ```
1847 ///
1848 /// Walker must traverse all four packs and produce no
1849 /// `CycleDetected`. Because the cycle detector clones the
1850 /// `visited` chain per child, A's descendants do not poison B's
1851 /// descendant view, so seeing `C` from both arms is a diamond,
1852 /// not a cycle.
1853 #[test]
1854 fn cycle_diamond_shared_descendant_no_cycle() {
1855 let tmp = tempfile::tempdir().unwrap();
1856 let root_dir = tmp.path().to_path_buf();
1857 // Disk layout: root/a, root/b, root/a/c, root/b/c.
1858 // Each `c` lives at a distinct on-disk slot so classify
1859 // succeeds; identity equality is what would (incorrectly)
1860 // trip the cycle detector if clone-per-child were broken.
1861 let a_dir = root_dir.join("a");
1862 let b_dir = root_dir.join("b");
1863 let c_under_a_dir = a_dir.join("c");
1864 let c_under_b_dir = b_dir.join("c");
1865 make_sub_meta_on_disk(&a_dir, "a");
1866 make_sub_meta_on_disk(&b_dir, "b");
1867 make_sub_meta_on_disk(&c_under_a_dir, "c");
1868 make_sub_meta_on_disk(&c_under_b_dir, "c");
1869 let url_a = "https://example.com/a.git";
1870 let url_b = "https://example.com/b.git";
1871 let url_c = "https://example.com/c.git";
1872 let loader = InMemLoader::new()
1873 .with(
1874 root_dir.clone(),
1875 meta_manifest_with("root", vec![child(url_a, "a"), child(url_b, "b")]),
1876 )
1877 .with(a_dir.clone(), meta_manifest_with("a", vec![child(url_c, "c")]))
1878 .with(b_dir.clone(), meta_manifest_with("b", vec![child(url_c, "c")]))
1879 .with(c_under_a_dir.clone(), meta_manifest_with("c", vec![]))
1880 .with(c_under_b_dir.clone(), meta_manifest_with("c", vec![]));
1881 let backend = InMemGit::new();
1882 let opts = SyncMetaOptions::default();
1883 let report =
1884 sync_meta(&root_dir, &backend, &loader, &opts, &[]).expect("diamond is NOT a cycle");
1885 // Four distinct manifest visits: root, a, b, c-via-a, c-via-b.
1886 // A and B both expand into their own `c`, so the walker
1887 // visits `c` twice (once per arm) — five `metas_visited`.
1888 assert_eq!(
1889 report.metas_visited, 5,
1890 "diamond: root + a + b + c-under-a + c-under-b = 5 visits"
1891 );
1892 // Crucially, no errors of any kind — and certainly not a
1893 // CycleDetected — because the two `C` visits live on
1894 // disjoint cloned visited chains.
1895 assert!(
1896 !report.errors.iter().any(|e| matches!(e, TreeError::CycleDetected { .. })),
1897 "diamond must not surface CycleDetected; errors={:?}",
1898 report.errors
1899 );
1900 assert!(report.errors.is_empty(), "diamond should produce no errors: {:?}", report.errors);
1901 }
1902
1903 /// T2 — 4-node cycle: `root → A → B → C → D → A`. Cycle length 4
1904 /// in pack-identity terms; the reported chain has length 5 once
1905 /// the recurring `A` is appended at detection. The root frame's
1906 /// `path:` identity also leads the chain (B4), so the final
1907 /// length is 6.
1908 #[test]
1909 #[allow(clippy::too_many_lines)]
1910 fn cycle_four_node_aborts() {
1911 let tmp = tempfile::tempdir().unwrap();
1912 let root_dir = tmp.path().to_path_buf();
1913 // Disk chain: root → a → b → c → d → a (the second `a` lives
1914 // at a fresh slot so classify succeeds; identity collision is
1915 // what trips the cycle detector).
1916 let a_dir = root_dir.join("a");
1917 let b_dir = a_dir.join("b");
1918 let c_dir = b_dir.join("c");
1919 let d_dir = c_dir.join("d");
1920 let a2_dir = d_dir.join("a");
1921 make_sub_meta_on_disk(&a_dir, "a");
1922 make_sub_meta_on_disk(&b_dir, "b");
1923 make_sub_meta_on_disk(&c_dir, "c");
1924 make_sub_meta_on_disk(&d_dir, "d");
1925 make_sub_meta_on_disk(&a2_dir, "a");
1926 let url_a = "https://example.com/a.git";
1927 let url_b = "https://example.com/b.git";
1928 let url_c = "https://example.com/c.git";
1929 let url_d = "https://example.com/d.git";
1930 let loader = InMemLoader::new()
1931 .with(root_dir.clone(), meta_manifest_with("root", vec![child(url_a, "a")]))
1932 .with(a_dir.clone(), meta_manifest_with("a", vec![child(url_b, "b")]))
1933 .with(b_dir.clone(), meta_manifest_with("b", vec![child(url_c, "c")]))
1934 .with(c_dir.clone(), meta_manifest_with("c", vec![child(url_d, "d")]))
1935 // d re-declares a → cycle of length 4 in url-namespace.
1936 .with(d_dir.clone(), meta_manifest_with("d", vec![child(url_a, "a")]))
1937 .with(a2_dir.clone(), meta_manifest_with("a", vec![]));
1938 let backend = InMemGit::new();
1939 let opts = SyncMetaOptions::default();
1940 let err = sync_meta(&root_dir, &backend, &loader, &opts, &[])
1941 .expect_err("four-node cycle must abort");
1942 match err {
1943 TreeError::CycleDetected { chain } => {
1944 let id_root = pack_identity_for_root(&root_dir);
1945 let id_a = format!("url:{url_a}");
1946 let id_b = format!("url:{url_b}");
1947 let id_c = format!("url:{url_c}");
1948 let id_d = format!("url:{url_d}");
1949 // [path:root, a, b, c, d, a] — six entries.
1950 assert_eq!(
1951 chain,
1952 vec![id_root, id_a.clone(), id_b, id_c, id_d, id_a.clone()],
1953 "expected full ancestor chain ending in the recurring A"
1954 );
1955 assert!(
1956 chain.len() >= 5,
1957 "four-node cycle chain has at least 5 entries: {chain:?}"
1958 );
1959 // Last element repeats earlier in the chain (the
1960 // recurring identity).
1961 let last = chain.last().unwrap();
1962 let first_match = chain.iter().position(|s| s == last).unwrap();
1963 assert!(
1964 first_match < chain.len() - 1,
1965 "the recurring identity must appear earlier in the chain: {chain:?}"
1966 );
1967 }
1968 other => panic!("expected CycleDetected, got {other:?}"),
1969 }
1970 }
1971
1972 /// T3 — Nested-prefix cycle. Outer arm `root → A → B → C` is
1973 /// acyclic; the cycle lives inside B's other child `D`, which
1974 /// loops back to B (`B → D → B`). The walker must surface
1975 /// `CycleDetected` and the cycle should appear inside the
1976 /// subtree (not at the root level), with B as the recurring
1977 /// identity.
1978 ///
1979 /// Specifically: A's children = [B], B's children = [C, D], C
1980 /// has no children, D's children = [B] (cycle).
1981 #[test]
1982 #[allow(clippy::too_many_lines)]
1983 fn cycle_nested_prefix_aborts() {
1984 let tmp = tempfile::tempdir().unwrap();
1985 let root_dir = tmp.path().to_path_buf();
1986 // Disk layout: root/a, root/a/b, root/a/b/c (acyclic arm),
1987 // root/a/b/d (cycle arm), root/a/b/d/b (D loops back to B —
1988 // identity collision; on-disk path is fresh so classify
1989 // succeeds).
1990 let a_dir = root_dir.join("a");
1991 let b_dir = a_dir.join("b");
1992 let c_dir = b_dir.join("c");
1993 let d_dir = b_dir.join("d");
1994 let b2_dir = d_dir.join("b");
1995 make_sub_meta_on_disk(&a_dir, "a");
1996 make_sub_meta_on_disk(&b_dir, "b");
1997 make_sub_meta_on_disk(&c_dir, "c");
1998 make_sub_meta_on_disk(&d_dir, "d");
1999 make_sub_meta_on_disk(&b2_dir, "b");
2000 let url_a = "https://example.com/a.git";
2001 let url_b = "https://example.com/b.git";
2002 let url_c = "https://example.com/c.git";
2003 let url_d = "https://example.com/d.git";
2004 let loader = InMemLoader::new()
2005 .with(root_dir.clone(), meta_manifest_with("root", vec![child(url_a, "a")]))
2006 .with(a_dir.clone(), meta_manifest_with("a", vec![child(url_b, "b")]))
2007 // b has both an acyclic child (c) and a cyclic one (d).
2008 .with(
2009 b_dir.clone(),
2010 meta_manifest_with("b", vec![child(url_c, "c"), child(url_d, "d")]),
2011 )
2012 .with(c_dir.clone(), meta_manifest_with("c", vec![]))
2013 // d re-declares b → cycle inside the b/d subtree.
2014 .with(d_dir.clone(), meta_manifest_with("d", vec![child(url_b, "b")]))
2015 .with(b2_dir.clone(), meta_manifest_with("b", vec![]));
2016 let backend = InMemGit::new();
2017 let opts = SyncMetaOptions::default();
2018 let err = sync_meta(&root_dir, &backend, &loader, &opts, &[])
2019 .expect_err("nested-prefix cycle must abort");
2020 match err {
2021 TreeError::CycleDetected { chain } => {
2022 let id_root = pack_identity_for_root(&root_dir);
2023 let id_a = format!("url:{url_a}");
2024 let id_b = format!("url:{url_b}");
2025 let id_d = format!("url:{url_d}");
2026 // The cycle hits inside the subtree at depth 4:
2027 // [path:root, a, b, d, b].
2028 assert_eq!(
2029 chain,
2030 vec![id_root.clone(), id_a, id_b.clone(), id_d, id_b.clone()],
2031 "cycle should appear inside the subtree, not at the top"
2032 );
2033 // Recurring identity is `b`, and it does NOT appear
2034 // at the chain's outermost position — the root path
2035 // identity does. This verifies the cycle is "inside"
2036 // the tree.
2037 let last = chain.last().unwrap();
2038 assert_eq!(last, &id_b, "recurring identity is B");
2039 assert_ne!(
2040 chain.first().unwrap(),
2041 last,
2042 "cycle must not start at the root frame: {chain:?}"
2043 );
2044 assert_eq!(
2045 chain.first().unwrap(),
2046 &id_root,
2047 "chain must begin with the root path identity: {chain:?}"
2048 );
2049 }
2050 other => panic!("expected CycleDetected, got {other:?}"),
2051 }
2052 }
2053
2054 /// B1 regression: max_depth must NOT mask cycle detection. Cycle
2055 /// check fires before depth-cap return in phase3_handle_child.
2056 ///
2057 /// Topology: same 4-node cycle as `cycle_four_node_aborts`
2058 /// (`root → A → B → C → D → A`). Recurring `A` is reached at
2059 /// `next_depth = 5`. With `max_depth: Some(4)`, the depth cap
2060 /// would skip the recurring frame BEFORE it can be tested for
2061 /// cycle membership — *if* B1 were reverted (i.e. depth-cap
2062 /// early-return placed before the cycle check). The current
2063 /// ordering (cycle-then-depth-cap, see `phase3_handle_child`)
2064 /// surfaces `CycleDetected` regardless of the cap.
2065 ///
2066 /// If anyone reverts B1's reorder, this test fails: the walker
2067 /// returns `Ok(_)` instead of `Err(CycleDetected)` because the
2068 /// recurring frame is silently truncated.
2069 #[test]
2070 fn cycle_aborts_under_max_depth_cap() {
2071 let tmp = tempfile::tempdir().unwrap();
2072 let root_dir = tmp.path().to_path_buf();
2073 let a_dir = root_dir.join("a");
2074 let b_dir = a_dir.join("b");
2075 let c_dir = b_dir.join("c");
2076 let d_dir = c_dir.join("d");
2077 let a2_dir = d_dir.join("a");
2078 make_sub_meta_on_disk(&a_dir, "a");
2079 make_sub_meta_on_disk(&b_dir, "b");
2080 make_sub_meta_on_disk(&c_dir, "c");
2081 make_sub_meta_on_disk(&d_dir, "d");
2082 make_sub_meta_on_disk(&a2_dir, "a");
2083 let url_a = "https://example.com/a.git";
2084 let url_b = "https://example.com/b.git";
2085 let url_c = "https://example.com/c.git";
2086 let url_d = "https://example.com/d.git";
2087 let loader = InMemLoader::new()
2088 .with(root_dir.clone(), meta_manifest_with("root", vec![child(url_a, "a")]))
2089 .with(a_dir.clone(), meta_manifest_with("a", vec![child(url_b, "b")]))
2090 .with(b_dir.clone(), meta_manifest_with("b", vec![child(url_c, "c")]))
2091 .with(c_dir.clone(), meta_manifest_with("c", vec![child(url_d, "d")]))
2092 .with(d_dir.clone(), meta_manifest_with("d", vec![child(url_a, "a")]))
2093 .with(a2_dir.clone(), meta_manifest_with("a", vec![]));
2094 let backend = InMemGit::new();
2095 // max_depth: Some(4) — the recurring A frame would land at
2096 // next_depth=5, which exceeds the cap. With B1, the cycle
2097 // check still fires first; without B1, the cap would skip
2098 // before the cycle is detected.
2099 let opts = SyncMetaOptions { max_depth: Some(4), ..SyncMetaOptions::default() };
2100 let err = sync_meta(&root_dir, &backend, &loader, &opts, &[])
2101 .expect_err("cycle must surface even when its closing frame exceeds max_depth");
2102 match err {
2103 TreeError::CycleDetected { chain } => {
2104 let id_a = format!("url:{url_a}");
2105 assert!(
2106 chain.last() == Some(&id_a),
2107 "recurring identity must be A, got chain={chain:?}"
2108 );
2109 let last = chain.last().unwrap();
2110 let first_match = chain.iter().position(|s| s == last).unwrap();
2111 assert!(
2112 first_match < chain.len() - 1,
2113 "the recurring identity must appear earlier in the chain: {chain:?}"
2114 );
2115 }
2116 other => panic!("expected CycleDetected, got {other:?}"),
2117 }
2118 }
2119
2120 /// B2 regression: `pack_identity_for_child` must NOT emit a
2121 /// trailing `@` when `r#ref` is `Some("")` (empty string). Both
2122 /// `Some("")` and `None` collapse to the bare `url:<url>` form so
2123 /// the on-the-wire identity matches the Lean model
2124 /// (`Grex.Walker.ChildRef.identity`). Without this elision two
2125 /// children that differ only in `ref: None` vs `ref: Some("")`
2126 /// would serialise the same way as `url:<url>@`, masking the
2127 /// distinction the Lean spec draws — and worse, an identity
2128 /// ending in `@` leaks an empty-ref artifact into operator
2129 /// diagnostics.
2130 #[test]
2131 fn child_identity_some_empty_ref_omits_at() {
2132 let url = "https://example.com/a.git";
2133 let with_none = ChildRef { url: url.to_string(), path: Some("a".to_string()), r#ref: None };
2134 let with_empty = ChildRef {
2135 url: url.to_string(),
2136 path: Some("a".to_string()),
2137 r#ref: Some(String::new()),
2138 };
2139 let id_none = pack_identity_for_child(&with_none);
2140 let id_empty = pack_identity_for_child(&with_empty);
2141 let expected = format!("url:{url}");
2142 assert_eq!(id_none, expected, "None ref must produce bare url identity");
2143 assert_eq!(
2144 id_empty, expected,
2145 "Some(\"\") ref must collapse to bare url identity (no trailing @)"
2146 );
2147 assert_eq!(id_none, id_empty, "Some(\"\") and None must yield the same identity");
2148 assert!(!id_empty.ends_with('@'), "identity must not end with trailing @: {id_empty:?}");
2149 }
2150}