grex_core/tree/walker.rs
1//! Recursive pack-tree walker.
2//!
3//! The walker hydrates a `pack.yaml` tree: it loads the root manifest, clones
4//! (or fetches + checks out) every `children:` entry via the injected
5//! [`GitBackend`], and recurses. `depends_on` entries are recorded as edges
6//! but never walked — they are *external prereqs* verified by
7//! [`crate::pack::validate::DependsOnValidator`] after the graph is built.
8//!
9//! # Cycle detection
10//!
11//! Cycles are detected **during** the walk, not post-hoc. Each recursion
12//! maintains a walk stack of pack identifiers (source-url when present,
13//! otherwise the canonical on-disk path). If a child is about to be entered
14//! whose identifier is already on the stack, the walker short-circuits with
15//! [`TreeError::CycleDetected`]. A separate `CycleValidator` runs
16//! post-hoc as a belt-and-suspenders check so manually-constructed graphs
17//! cannot sneak through.
18//!
19//! # Cyclomatic discipline
20//!
21//! The walk is decomposed so each helper stays well under CC 15:
22//! `walk` → `walk_recursive` → `process_children` → `handle_child` →
23//! `resolve_destination` | `record_depends_on`.
24
25use std::collections::BTreeMap;
26use std::path::{Path, PathBuf};
27
28use rayon::prelude::*;
29
30use crate::git::GitBackend;
31use crate::pack::validate::child_path::{
32 boundary_fs_reject_reason, boundary_reject_reason, check_one as check_child_path,
33 nfc_duplicate_path,
34};
35use crate::pack::{ChildRef, PackManifest, PackType, PackValidationError, SchemaVersion};
36
37use super::consent::phase2_prune;
38use super::dest_class::{aggregate_untracked, classify_dest, DestClass};
39use super::error::TreeError;
40use super::graph::{EdgeKind, PackEdge, PackGraph, PackNode};
41use super::loader::PackLoader;
42use super::quarantine::QuarantineConfig;
43
44/// Recursive walker. Composes a [`PackLoader`] (for manifests) with a
45/// [`GitBackend`] (for child hydration).
46///
47/// The walker owns no state across calls: each invocation of [`Walker::walk`]
48/// produces a fresh [`PackGraph`] and leaves no footprint.
49///
50/// **Status (v1.2.1, path iii)**: retired from the production sync
51/// orchestrator. `sync::run` now composes [`sync_meta`] (mutate) →
52/// [`super::graph_build::build_graph`] (read-only) → `run_actions` instead
53/// of issuing clones+fetches inside the graph build. The `Walker` symbol
54/// is kept for downstream test-suite compatibility (22 fixture call sites
55/// in `crates/grex-core/tests/tree_walk.rs`); new code SHOULD NOT add
56/// production call sites.
57#[doc(hidden)]
58pub struct Walker<'a> {
59 loader: &'a dyn PackLoader,
60 backend: &'a dyn GitBackend,
61 workspace: PathBuf,
62 /// Optional global ref override (M4-D `grex sync --ref <sha|branch|tag>`).
63 /// When `Some`, every child clone/checkout uses this ref instead of the
64 /// declared `child.ref` from the parent manifest. `None` preserves M3
65 /// semantics.
66 ref_override: Option<String>,
67}
68
69impl<'a> Walker<'a> {
70 /// Construct a new walker.
71 ///
72 /// `workspace` is the directory under which child packs will be cloned,
73 /// using each [`ChildRef::effective_path`] as the sub-directory name.
74 #[must_use]
75 pub fn new(
76 loader: &'a dyn PackLoader,
77 backend: &'a dyn GitBackend,
78 workspace: PathBuf,
79 ) -> Self {
80 Self { loader, backend, workspace, ref_override: None }
81 }
82
83 /// Set a global ref override applied to every child pack.
84 ///
85 /// Surfaced as `grex sync --ref <sha|branch|tag>` (M4-D). The override
86 /// replaces each child's declared `ref` in its parent manifest. An
87 /// empty string is treated as "no override" — callers should reject
88 /// empty values at the CLI layer before reaching this point.
89 #[must_use]
90 pub fn with_ref_override(mut self, r#ref: Option<String>) -> Self {
91 self.ref_override = r#ref.filter(|s| !s.is_empty());
92 self
93 }
94
95 /// Walk the tree rooted at `root_pack_path`, returning the fully
96 /// hydrated graph.
97 ///
98 /// # Errors
99 ///
100 /// Returns [`TreeError`] on any loader, git, cycle, or name-mismatch
101 /// failure. The walk aborts on the first failure — the spec-level
102 /// "fail loud, fail fast" default.
103 pub fn walk(&self, root_pack_path: &Path) -> Result<PackGraph, TreeError> {
104 let mut state = BuildState::default();
105 let root_manifest = self.loader.load(root_pack_path)?;
106 // Pre-walk path-traversal gate: reject any malicious
107 // `children[].path` (or URL-derived tail) BEFORE any clone fires.
108 // Closes the v1.1.0 flat-sibling exploit window where a `path:
109 // ../escape` would materialise a child outside the pack root
110 // before plan-phase validation could see it.
111 validate_children_paths(&root_manifest)?;
112 let root_commit_sha = probe_head_sha(self.backend, root_pack_path);
113 let root_id = state.push_node(PackNode {
114 id: 0,
115 name: root_manifest.name.clone(),
116 path: root_pack_path.to_path_buf(),
117 source_url: None,
118 manifest: root_manifest.clone(),
119 parent: None,
120 commit_sha: root_commit_sha,
121 synthetic: false,
122 });
123 let root_identity = pack_identity_for_root(root_pack_path);
124 self.walk_recursive(root_id, &root_manifest, &mut state, &mut vec![root_identity])?;
125 Ok(PackGraph::new(state.nodes, state.edges))
126 }
127
128 /// Recursive step. `stack` carries the pack identifiers currently on
129 /// the walk path — pushed on entry, popped on return.
130 ///
131 /// Each loaded manifest's `children[]` is path-traversal-validated
132 /// before any of those children are resolved on disk; the entry
133 /// point pre-validates the root manifest, so by the time
134 /// `walk_recursive` runs for a child, that child's own `children[]`
135 /// is what needs gating before the next descent.
136 fn walk_recursive(
137 &self,
138 parent_id: usize,
139 manifest: &PackManifest,
140 state: &mut BuildState,
141 stack: &mut Vec<String>,
142 ) -> Result<(), TreeError> {
143 self.record_depends_on(parent_id, manifest, state);
144 self.process_children(parent_id, manifest, state, stack)
145 }
146
147 /// Record one `DependsOn` edge per `depends_on` entry. Resolution
148 /// against actual graph nodes happens later in `DependsOnValidator`.
149 /// We emit edges only where the target already exists in the graph so
150 /// the edge list stays in-bounds; unresolved deps are surfaced by the
151 /// validator, not carried as dangling edges.
152 fn record_depends_on(&self, parent_id: usize, manifest: &PackManifest, state: &mut BuildState) {
153 for dep in &manifest.depends_on {
154 if let Some(to) = find_node_id_by_name_or_url(&state.nodes, dep) {
155 state.edges.push(PackEdge { from: parent_id, to, kind: EdgeKind::DependsOn });
156 }
157 }
158 }
159
160 fn process_children(
161 &self,
162 parent_id: usize,
163 manifest: &PackManifest,
164 state: &mut BuildState,
165 stack: &mut Vec<String>,
166 ) -> Result<(), TreeError> {
167 for child in &manifest.children {
168 self.handle_child(parent_id, child, state, stack)?;
169 }
170 Ok(())
171 }
172
173 fn handle_child(
174 &self,
175 parent_id: usize,
176 child: &ChildRef,
177 state: &mut BuildState,
178 stack: &mut Vec<String>,
179 ) -> Result<(), TreeError> {
180 let identity = pack_identity_for_child(child);
181 if stack.iter().any(|s| s == &identity) {
182 let mut chain = stack.clone();
183 chain.push(identity);
184 return Err(TreeError::CycleDetected { chain });
185 }
186 // v1.2.0 Stage 1.c: FS-resident boundary check fires BEFORE
187 // any clone / fetch. Junctions, reparse points, and
188 // `.git`-as-file (gitfile redirect) all re-open the
189 // parent-boundary escape that the syntactic gate closes on
190 // the path string itself; running the check on the prospective
191 // dest path means a hostile pre-existing slot is rejected
192 // before the GitBackend writes anything into (or through) it.
193 // The prospective path is reconstructed here so the helper
194 // can interrogate the slot before `resolve_destination`
195 // materialises a clone — pre-clone runs return `Ok(())` because
196 // the slot doesn't exist yet, and the walk continues normally.
197 let prospective_dest = self.workspace.join(child.effective_path());
198 check_dest_boundary(&prospective_dest, &child.effective_path())?;
199 let dest = self.resolve_destination(child, state)?;
200 // v1.1.1 plain-git children: when the destination has no
201 // `.grex/pack.yaml` but does carry a `.git/`, synthesize a
202 // leaf scripted-no-hooks manifest in-memory rather than
203 // aborting. See
204 // `openspec/changes/feat-v1.1.1-plain-git-children/design.md`
205 // §"Synthesis algorithm".
206 let (child_manifest, is_synthetic) = match self.loader.load(&dest) {
207 Ok(m) => (m, false),
208 Err(TreeError::ManifestNotFound(_)) if dest_has_git_repo(&dest) => {
209 (synthesize_plain_git_manifest(child), true)
210 }
211 Err(e) => return Err(e),
212 };
213 verify_child_name(&child_manifest.name, child, &dest)?;
214 // Validate this child's own `children[]` before its descent
215 // resolves any of them on disk. Mirrors the root-manifest gate
216 // in `walk`; together they ensure no clone can fire for a
217 // grandchild whose parent declared a traversal-bearing path.
218 validate_children_paths(&child_manifest)?;
219
220 let commit_sha = probe_head_sha(self.backend, &dest);
221 let child_id = state.push_node(PackNode {
222 id: state.nodes.len(),
223 name: child_manifest.name.clone(),
224 path: dest.clone(),
225 source_url: Some(child.url.clone()),
226 manifest: child_manifest.clone(),
227 parent: Some(parent_id),
228 commit_sha,
229 synthetic: is_synthetic,
230 });
231 state.edges.push(PackEdge { from: parent_id, to: child_id, kind: EdgeKind::Child });
232
233 stack.push(identity);
234 let result = self.walk_recursive(child_id, &child_manifest, state, stack);
235 stack.pop();
236 result
237 }
238
239 /// Decide where `child` lives on disk and ensure the working tree is
240 /// in the expected state: clone if absent, fetch + optional checkout
241 /// if present.
242 fn resolve_destination(
243 &self,
244 child: &ChildRef,
245 _state: &mut BuildState,
246 ) -> Result<PathBuf, TreeError> {
247 let dest = self.workspace.join(child.effective_path());
248 // M4-D: `ref_override` wins over the parent-declared `child.ref`.
249 // Falls back to the declared ref when no override is active.
250 let effective_ref = self.ref_override.as_deref().or(child.r#ref.as_deref());
251 if dest_has_git_repo(&dest) {
252 self.backend.fetch(&dest)?;
253 if let Some(r) = effective_ref {
254 self.backend.checkout(&dest, r)?;
255 }
256 } else {
257 self.backend.clone(&child.url, &dest, effective_ref)?;
258 }
259 Ok(dest)
260 }
261}
262
263/// Best-effort HEAD probe. Returns `None` when the target is not a git
264/// repository or the backend refuses — the root of a declarative pack is
265/// often a plain directory, so this must not fail the walk.
266///
267/// Non-`.git` directories short-circuit silently (truly not a git
268/// repo). Backend errors on an actual `.git` directory are surfaced as
269/// a `tracing::warn!` log line so transient gix failures / ACL-denied
270/// `.git` reads do not silently degrade into an empty `commit_sha`
271/// without any operator signal. The walker continues with `None` — a
272/// best-effort probe is, by construction, allowed to fail.
273fn probe_head_sha(backend: &dyn GitBackend, path: &Path) -> Option<String> {
274 let dir =
275 if path.extension().and_then(|e| e.to_str()).is_some_and(|e| matches!(e, "yaml" | "yml")) {
276 path.parent()
277 .and_then(Path::parent)
278 .map_or_else(|| path.to_path_buf(), Path::to_path_buf)
279 } else {
280 path.to_path_buf()
281 };
282 if !dir.join(".git").exists() {
283 return None;
284 }
285 match backend.head_sha(&dir) {
286 Ok(s) => Some(s),
287 Err(e) => {
288 tracing::warn!(
289 target: "grex::walker",
290 "HEAD probe failed for {}: {e}",
291 dir.display()
292 );
293 None
294 }
295 }
296}
297
298/// Mutable state threaded through the walk. Private to this module so only
299/// the walker can grow the graph.
300#[derive(Default)]
301struct BuildState {
302 nodes: Vec<PackNode>,
303 edges: Vec<PackEdge>,
304}
305
306impl BuildState {
307 fn push_node(&mut self, node: PackNode) -> usize {
308 let id = node.id;
309 self.nodes.push(node);
310 id
311 }
312}
313
314/// Identity string used by the cycle detector for the root pack.
315fn pack_identity_for_root(path: &Path) -> String {
316 format!("path:{}", path.display())
317}
318
319/// Identity string for a child — url+ref so the same repo at two different
320/// refs is considered distinct. This matches git semantics and avoids
321/// false-positive cycle detections for diamond dependencies on different
322/// tags.
323fn pack_identity_for_child(child: &ChildRef) -> String {
324 let rref = child.r#ref.as_deref().unwrap_or("");
325 format!("url:{}@{}", child.url, rref)
326}
327
328/// Shallow on-disk check: a `.git` entry (file or dir) signals an existing
329/// working tree. We deliberately do not open the repo here — that's the
330/// backend's job via `fetch`/`checkout`.
331///
332/// # Symlink safety
333///
334/// `dest` itself MUST NOT be a symlink. If it is, this function returns
335/// `false` regardless of whether the symlink target carries a `.git`
336/// entry. This refusal closes a synthesis-redirection attack: a parent
337/// pack declaring `path: code` against a workspace where the user
338/// happens to have `<workspace>/code -> $HOME` would otherwise let the
339/// walker treat `$HOME/.git` as a "plain-git child" and operate on an
340/// unrelated tree. The check uses [`std::fs::symlink_metadata`] so the
341/// link itself — not its target — is interrogated.
342pub fn dest_has_git_repo(dest: &Path) -> bool {
343 // Reject symlinked destinations outright. `symlink_metadata` does
344 // NOT follow the link, so a broken or path-traversing symlink is
345 // treated as untrusted regardless of its target.
346 if let Ok(meta) = std::fs::symlink_metadata(dest) {
347 if meta.file_type().is_symlink() {
348 return false;
349 }
350 }
351 dest.join(".git").exists()
352}
353
354/// Build the in-memory manifest used for v1.1.1 plain-git children — a
355/// leaf scripted pack with no hooks, no children, no actions. Activated
356/// at the walker's load-fallback boundary when a child has a `.git/`
357/// but no `.grex/pack.yaml`. See
358/// `openspec/changes/feat-v1.1.1-plain-git-children/design.md`.
359pub fn synthesize_plain_git_manifest(child: &ChildRef) -> PackManifest {
360 PackManifest {
361 schema_version: SchemaVersion::current(),
362 name: child.effective_path(),
363 r#type: PackType::Scripted,
364 version: None,
365 depends_on: Vec::new(),
366 children: Vec::new(),
367 actions: Vec::new(),
368 teardown: None,
369 extensions: BTreeMap::new(),
370 }
371}
372
373/// Enforce that the cloned child's pack.yaml name matches what the parent
374/// declared. The parent-side expectation is the child entry's
375/// [`ChildRef::effective_path`] — the directory name in the workspace.
376fn verify_child_name(got: &str, child: &ChildRef, dest: &Path) -> Result<(), TreeError> {
377 let expected = child.effective_path();
378 if got == expected {
379 return Ok(());
380 }
381 Err(TreeError::PackNameMismatch { got: got.to_string(), expected, path: dest.to_path_buf() })
382}
383
384/// Resolve a `depends_on` entry (URL or bare name) against nodes already
385/// recorded. Returns the node id on a hit, `None` otherwise.
386fn find_node_id_by_name_or_url(nodes: &[PackNode], dep: &str) -> Option<usize> {
387 if looks_like_url(dep) {
388 nodes.iter().find(|n| n.source_url.as_deref() == Some(dep)).map(|n| n.id)
389 } else {
390 nodes.iter().find(|n| n.name == dep).map(|n| n.id)
391 }
392}
393
394/// Run the path-traversal gate on `manifest.children`. Returns the
395/// first offending child as a [`TreeError::ChildPathInvalid`] so the
396/// walker aborts before any clone of the offending sibling fires.
397///
398/// Surfacing only the first offender (rather than aggregating) matches
399/// the walker's fail-fast posture — the plan-phase
400/// [`crate::pack::validate::ChildPathValidator`] still runs against the
401/// whole graph post-walk via `validate_graph`, so authors who clear
402/// the traversal exploit see the full diagnostic batch on the next
403/// invocation.
404///
405/// `check_child_path` is documented to return only the
406/// `ChildPathInvalid` variant, but we `match` exhaustively so any
407/// future variant the helper grows surfaces as a compile-time
408/// failure here rather than as a silently swallowed `Some(other)`.
409fn validate_children_paths(manifest: &PackManifest) -> Result<(), TreeError> {
410 // v1.2.0 Stage 1.c: NFC-duplicate sweep across the sibling list.
411 // Runs first because it's a cross-cutting check (one offender
412 // implicates the WHOLE list, not a single child). Surfaces as
413 // `TreeError::ManifestPathEscape` per walker.md
414 // §boundary-preservation — a NFC-collapsed name re-introduces the
415 // very boundary escape the regex was meant to close on
416 // case-insensitive filesystems.
417 if let Some(path) = nfc_duplicate_path(&manifest.children) {
418 return Err(TreeError::ManifestPathEscape {
419 path,
420 reason: "duplicate child path under Unicode NFC normalization (case-insensitive FS collision risk)"
421 .to_string(),
422 });
423 }
424 for child in &manifest.children {
425 // v1.2.0 Stage 1.c: per-segment boundary-preservation rejects.
426 // Layered AHEAD of the syntactic gate so the more specific
427 // `ManifestPathEscape` diagnostic wins for entries that would
428 // also fail the bare-name regex (e.g. `child:foo` is rejected
429 // here as a colon hazard instead of a generic charset miss).
430 let segment = child.path.as_deref().map_or_else(|| child.effective_path(), str::to_string);
431 if let Some(reason) = boundary_reject_reason(&segment) {
432 return Err(TreeError::ManifestPathEscape {
433 path: segment,
434 reason: reason.to_string(),
435 });
436 }
437 let Some(err) = check_child_path(child) else { continue };
438 match err {
439 PackValidationError::ChildPathInvalid { child_name, path, reason } => {
440 return Err(TreeError::ChildPathInvalid { child_name, path, reason });
441 }
442 other @ (PackValidationError::DuplicateSymlinkDst { .. }
443 | PackValidationError::GraphCycle { .. }
444 | PackValidationError::DependsOnUnsatisfied { .. }
445 | PackValidationError::ChildPathDuplicate { .. }) => {
446 // `check_child_path` is contracted to only emit
447 // `ChildPathInvalid`. Any other variant indicates the
448 // helper has drifted out of sync with this caller —
449 // surface loudly rather than silently swallowing it.
450 tracing::error!(
451 target: "grex::walker",
452 "check_child_path returned unexpected variant: {other:?}",
453 );
454 debug_assert!(false, "check_child_path returned unexpected variant: {other:?}");
455 }
456 }
457 }
458 Ok(())
459}
460
461/// v1.2.0 Stage 1.c: filesystem-resident boundary check. Run AFTER
462/// the destination has been resolved against the parent workspace but
463/// BEFORE any clone / fetch fires. Catches the case where the slot
464/// the walker is about to materialise into is already a junction,
465/// reparse point, symlink, or `.git`-as-file — each of which would
466/// re-introduce a parent-boundary escape.
467///
468/// Pre-clone: a non-existent destination is the happy path; the
469/// helper returns `None` and the walk continues. Post-clone or on a
470/// re-walk where the destination is already populated, the helper
471/// inspects the on-disk entry and surfaces a `ManifestPathEscape`
472/// when the entry violates the boundary contract.
473///
474/// Visibility: `pub(super)` — used by the walker's `handle_child`
475/// path-resolution step (wired in 1.c follow-up; this commit lands
476/// the helper itself and the boundary-check call site for the
477/// path-segment rejects).
478pub(super) fn check_dest_boundary(dest: &Path, segment: &str) -> Result<(), TreeError> {
479 if let Some(reason) = boundary_fs_reject_reason(dest) {
480 return Err(TreeError::ManifestPathEscape {
481 path: segment.to_string(),
482 reason: reason.to_string(),
483 });
484 }
485 Ok(())
486}
487
488/// Decide whether a `depends_on` entry is a URL rather than a bare name.
489/// The rule is intentionally literal — matching the spec's enumeration of
490/// accepted forms.
491pub(super) fn looks_like_url(s: &str) -> bool {
492 s.starts_with("http://")
493 || s.starts_with("https://")
494 || s.starts_with("ssh://")
495 || s.starts_with("git@")
496 || s.ends_with(".git")
497}
498
499// ---------------------------------------------------------------------------
500// v1.2.0 Stage 1.g — `sync_meta` entry point: parent-relative,
501// distributed-lockfile walker. Three phases per meta:
502//
503// Phase 1 (siblings): `classify_dest` (1.e) per child, dispatch
504// fetch / clone / refuse based on the verdict; aggregate
505// `PresentUndeclared` into `TreeError::UntrackedGitRepos`.
506// Phase 2 (orphan prune): for each `prune_candidate` (caller-supplied
507// by 1.h once the distributed lockfile read lands), run the
508// consent-walk via `phase2_prune` (1.f).
509// Phase 3 (recursion): per child whose dest carries
510// `<dest>/.grex/pack.yaml`, recursively `sync_meta` if `recurse`
511// is true and depth < `max_depth`.
512//
513// Design discipline:
514//
515// * **No new locking primitives.** Per-pack git ops acquire the M6
516// `PackLock` (synchronous `acquire`) for the duration of the
517// clone/fetch. The Lean axiom `sync_disjoint_commutes` (Bridge.lean)
518// permits any disjoint scheduler — sequential is the smallest model
519// that satisfies the axiom. Sibling parallelism via rayon is a 1.j /
520// 1.l-territory follow-up; the scaffolding here keeps the
521// single-threaded baseline correct first.
522// * **No lockfile mechanics.** Phase 2's orphan list is a parameter,
523// not a read from `<meta>/.grex/grex.lock.jsonl`. 1.h owns the
524// distributed-lockfile read/write surface; this commit only wires
525// the consent-walk + prune dispatch.
526// * **Error aggregation.** Every Phase 1 child failure plus every
527// Phase 2 refusal lands in `SyncMetaReport::errors` before the call
528// returns. The walker is fail-LOUD (caller gets the full picture),
529// not fail-fast (the legacy `Walker::walk` aborts on the first hit).
530// This matches the v1.2.0 walker.md §"untracked git policy" rule
531// that `UntrackedGitRepos` must enumerate every offender at once.
532// ---------------------------------------------------------------------------
533
534/// Per-meta options threaded through `sync_meta`. Keeps the call-site
535/// signature small without coupling to the full [`crate::sync::SyncOptions`]
536/// surface — the orchestrator (`sync.rs::run`) is responsible for projecting
537/// `SyncOptions` into `SyncMetaOptions` when it wires this entry point.
538#[derive(Debug, Clone)]
539pub struct SyncMetaOptions {
540 /// Global ref override (`grex sync --ref <sha|branch|tag>`). Mirrors
541 /// [`Walker::with_ref_override`]: when `Some`, every child's
542 /// declared `ref` is replaced.
543 pub ref_override: Option<String>,
544 /// When `true`, Phase 3 recurses into child metas. `false` is the
545 /// `doctor --shallow` semantics: process only the immediate
546 /// children of the supplied meta.
547 pub recurse: bool,
548 /// Bound on Phase 3 recursion depth. `None` is unbounded; `Some(n)`
549 /// caps at `n` levels of nesting (the supplied `meta_dir` is depth
550 /// 0). Recursion ALWAYS halts before depth `n+1`.
551 pub max_depth: Option<usize>,
552 /// Phase 2 prune-safety override. Mirrors
553 /// [`crate::sync::SyncOptions::force_prune`].
554 pub force_prune: bool,
555 /// Phase 2 prune-safety override. Mirrors
556 /// [`crate::sync::SyncOptions::force_prune_with_ignored`].
557 pub force_prune_with_ignored: bool,
558 /// v1.2.1 item 3 — rayon thread-pool size for sibling-parallel
559 /// Phase 1 + Phase 3. `None` ⇒ rayon's default (`num_cpus::get()`);
560 /// `Some(1)` ⇒ effectively sequential (single-threaded pool, useful
561 /// for determinism testing); `Some(n >= 2)` ⇒ bounded parallel.
562 /// `Some(0)` is clamped to `1` (rayon rejects a zero-thread pool).
563 /// Mirrors [`crate::sync::SyncOptions::parallel`] semantics with the
564 /// one exception that `0` is clamped to `1` here — the unbounded
565 /// sentinel only makes sense for tokio's `Semaphore::MAX_PERMITS`.
566 pub parallel: Option<usize>,
567 /// v1.2.1 item 5b — when `Some`, Phase 2 prunes are diverted
568 /// through the snapshot-then-unlink quarantine pipeline before
569 /// `unlink(dest)` fires. Carries the per-meta trash bucket root
570 /// and audit-log path. `None` (default) preserves the legacy
571 /// v1.2.0 direct-unlink path. Set by
572 /// [`crate::sync::SyncOptions::quarantine`] at the orchestrator
573 /// boundary; the consent layer reads this to pick the deletion
574 /// strategy. Lean theorem `quarantine_snapshot_precedes_delete`
575 /// proves the safety contract.
576 pub quarantine: Option<QuarantineConfig>,
577}
578
579impl Default for SyncMetaOptions {
580 fn default() -> Self {
581 Self {
582 ref_override: None,
583 recurse: true,
584 max_depth: None,
585 force_prune: false,
586 force_prune_with_ignored: false,
587 parallel: None,
588 quarantine: None,
589 }
590 }
591}
592
593/// Outcome of one [`sync_meta`] invocation. Aggregated across every
594/// recursion frame: a sub-meta's report is folded into its parent's
595/// report at the end of Phase 3.
596#[derive(Debug, Default)]
597pub struct SyncMetaReport {
598 /// Number of metas processed (this meta + every descendant Phase 3
599 /// recursion fired against). Useful for `--shallow` verification:
600 /// `recurse: false` means `metas_visited == 1`.
601 pub metas_visited: usize,
602 /// Per-child Phase 1 verdicts, keyed by parent-relative child path.
603 /// `(meta_dir, child_dest, classification)` — exposed primarily for
604 /// tests; downstream callers will project into a status report.
605 pub phase1_classifications: Vec<(PathBuf, PathBuf, DestClass)>,
606 /// Successful Phase 2 prunes (paths that were removed). Empty when
607 /// no orphan list was supplied or every orphan refused.
608 pub phase2_pruned: Vec<PathBuf>,
609 /// Aggregate of every error encountered across Phases 1, 2, and 3.
610 /// The walker continues past recoverable errors so the caller sees
611 /// the full picture in one pass.
612 pub errors: Vec<TreeError>,
613}
614
615impl SyncMetaReport {
616 fn merge(&mut self, mut child: SyncMetaReport) {
617 self.metas_visited += child.metas_visited;
618 self.phase1_classifications.append(&mut child.phase1_classifications);
619 self.phase2_pruned.append(&mut child.phase2_pruned);
620 self.errors.append(&mut child.errors);
621 }
622}
623
624/// v1.2.0 Stage 1.g — three-phase per-meta walker entry point.
625///
626/// `meta_dir` is the on-disk directory containing the meta's
627/// `.grex/pack.yaml`. `prune_candidates` is the list of orphan dests
628/// (parent-relative) the caller's distributed-lockfile reader determined
629/// no longer appear in `manifest.children` — empty until Stage 1.h
630/// supplies the read side.
631///
632/// Discharges Lean theorems W1–W8, V1, C1, C2, F1 via the bridges in
633/// `Bridge.lean`. The sequential implementation is a special case of
634/// the `sync_disjoint_commutes` axiom (single permit, no interleaving)
635/// so no new bridge axiom is required.
636///
637/// # Errors
638///
639/// Returns the *first* catastrophic error (manifest parse failure on
640/// the supplied `meta_dir`). All recoverable errors land in
641/// [`SyncMetaReport::errors`] and the walker continues — fail-loud,
642/// not fail-fast.
643pub fn sync_meta(
644 meta_dir: &Path,
645 backend: &dyn GitBackend,
646 loader: &dyn PackLoader,
647 opts: &SyncMetaOptions,
648 prune_candidates: &[PathBuf],
649) -> Result<SyncMetaReport, TreeError> {
650 // v1.2.2 cycle detection — the root frame starts with an empty
651 // ancestor chain. `sync_meta_inner` extends it per recursion edge
652 // (Phase 3) using clone-per-child so disjoint sibling branches do
653 // not pollute each other's ancestor view.
654 sync_meta_inner(meta_dir, backend, loader, opts, prune_candidates, /* depth */ 0, &[])
655}
656
657fn sync_meta_inner(
658 meta_dir: &Path,
659 backend: &dyn GitBackend,
660 loader: &dyn PackLoader,
661 opts: &SyncMetaOptions,
662 prune_candidates: &[PathBuf],
663 depth: usize,
664 visited: &[String],
665) -> Result<SyncMetaReport, TreeError> {
666 let manifest = loader.load(meta_dir)?;
667 // v1.2.0 Stage 1.c gate — every recursion frame re-runs the
668 // path-traversal sweep before any child is touched on disk.
669 validate_children_paths(&manifest)?;
670
671 let mut report = SyncMetaReport { metas_visited: 1, ..SyncMetaReport::default() };
672
673 // v1.2.1 item 3: build a per-call rayon pool sized from
674 // `opts.parallel`. Phase 1 + Phase 3 install on this pool; Phase 2
675 // stays sequential (single-meta orphan sweep — no sibling
676 // parallelism to extract). The pool is dropped at the end of
677 // `sync_meta_inner`, so each recursion frame builds + tears down
678 // its own pool. This is intentional: we want the worker count to
679 // refresh per call so a top-level `--parallel 1` cap is honoured
680 // without piggy-backing on a global pool that an unrelated caller
681 // might have configured differently.
682 let pool = build_pool(opts.parallel)?;
683
684 phase1_sync_children(&pool, meta_dir, &manifest, backend, opts, &mut report);
685 phase2_prune_orphans(meta_dir, prune_candidates, opts, &mut report);
686 // v1.2.2 — cycle detection short-circuits the recursion edge with
687 // an `Err` return so the caller sees `Err(CycleDetected)` directly
688 // rather than burying it in `report.errors`. Cycles are catastrophic
689 // (would otherwise clone forever); fail-loud here, NOT fold-into-report.
690 phase3_recurse(&pool, meta_dir, &manifest, backend, loader, opts, depth, visited, &mut report)?;
691
692 Ok(report)
693}
694
695/// v1.2.1 item 3 — build a rayon `ThreadPool` sized from
696/// `opts.parallel`. Encapsulates the `None` ⇒ default,
697/// `Some(0)` ⇒ clamp-to-1, `Some(n)` ⇒ exact-N policy in one place
698/// so Phase 1 and Phase 3 install on identically-configured pools.
699///
700/// `Some(1)` produces a single-worker pool — the determinism
701/// test-mode fast-path (sibling iteration order matches sequential
702/// for-loop order on a 1-thread pool).
703///
704/// Build failures surface as [`TreeError::ManifestRead`]: a rayon
705/// pool failure is invariably a host-resource issue (out of file
706/// descriptors, thread-creation refused) — bucketing it into the
707/// generic IO-error variant keeps the error surface tight without
708/// inventing a one-off `RayonPoolBuild` discriminant. The Lean
709/// model treats pool construction as a well-formedness precondition
710/// of `sync`, not an in-band failure mode.
711fn build_pool(parallel: Option<usize>) -> Result<rayon::ThreadPool, TreeError> {
712 let mut builder = rayon::ThreadPoolBuilder::new();
713 if let Some(n) = parallel {
714 builder = builder.num_threads(n.max(1));
715 }
716 builder.build().map_err(|e| {
717 TreeError::ManifestRead(format!("failed to build rayon pool for sync_meta: {e}"))
718 })
719}
720
721/// Per-child output from Phase 1's parallel pass. Collected into a
722/// `Vec` after the rayon `par_iter` settles, then drained into the
723/// caller's `SyncMetaReport` in a single sequential pass. Carrying
724/// the data plain (no `&mut report` shared across threads) is what
725/// keeps the parallelisation sound under the Lean
726/// `sync_disjoint_commutes` axiom: each iteration's mutations are
727/// confined to its own owned struct.
728struct Phase1ChildOutcome {
729 /// `(meta_dir, dest, class)` — pushed onto
730 /// `report.phase1_classifications` regardless of dispatch outcome.
731 classification: (PathBuf, PathBuf, DestClass),
732 /// Per-child clone/fetch failure, if any. Folded into
733 /// `report.errors`.
734 error: Option<TreeError>,
735 /// `Some((dest, class))` when the child classified as
736 /// `PresentUndeclared`; the caller aggregates these into one
737 /// `UntrackedGitRepos` error after the parallel pass.
738 undeclared: Option<(PathBuf, DestClass)>,
739}
740
741/// Phase 1: classify each declared child, then dispatch. Per the v1.2.0
742/// walker.md pseudocode the per-child branches are:
743///
744/// * `Missing` → clone via `backend.clone(url, dest, ref)`.
745/// * `PresentDeclared` → fetch (+ checkout if a ref override applies).
746/// * `PresentDirty` → no-op (preserve user changes; will surface at
747/// exec/plan stage if applicable).
748/// * `PresentInProgress` → refuse via `DirtyTreeRefusal{GitInProgress}`
749/// (collected into `report.errors`).
750/// * `PresentUndeclared` → impossible at Phase 1 dispatch time because
751/// declared paths are in `manifest.children`; the variant is reserved
752/// for the lockfile-orphan sweep (Phase 2 territory).
753///
754/// v1.2.1 item 3 — sibling-parallel via rayon `par_iter`. Disjointness
755/// across siblings (each child has its own `meta_dir.join(child.path)`
756/// dest, validated by `validate_children_paths` upstream) discharges
757/// the precondition of the `sync_disjoint_commutes` axiom in
758/// `proof/Grex/Bridge.lean`. The per-pack `.grex-lock` (M6, acquired
759/// inside the GitBackend implementation) continues to serialise any
760/// cross-task contention on the same pack path. Per-thread results
761/// are collected into a `Vec<Phase1ChildOutcome>` and folded into the
762/// caller's `SyncMetaReport` in a single sequential pass, preserving
763/// deterministic ordering of `report.phase1_classifications` (rayon
764/// `collect_into_vec` preserves source-order regardless of completion
765/// order).
766fn phase1_sync_children(
767 pool: &rayon::ThreadPool,
768 meta_dir: &Path,
769 manifest: &PackManifest,
770 backend: &dyn GitBackend,
771 opts: &SyncMetaOptions,
772 report: &mut SyncMetaReport,
773) {
774 // Install on the per-call pool so `--parallel N` is honoured even
775 // when this is invoked from inside another rayon context (Phase 3
776 // recursion). `install` is a synchronous fence: the closure
777 // returns once every parallel iteration has settled.
778 let outcomes: Vec<Phase1ChildOutcome> = pool.install(|| {
779 manifest
780 .children
781 .par_iter()
782 .map(|child| phase1_handle_child(meta_dir, child, backend, opts))
783 .collect()
784 });
785
786 // Sequential fold: the parallel pass cannot mutate `report` directly
787 // (it is `&mut`), so we drain the per-child outcomes here. Order is
788 // preserved by `par_iter().collect()` — see the `phase1_par_iter_preserves_order`
789 // test below.
790 let mut undeclared_seen: Vec<(PathBuf, DestClass)> = Vec::new();
791 for outcome in outcomes {
792 report.phase1_classifications.push(outcome.classification);
793 if let Some(e) = outcome.error {
794 report.errors.push(e);
795 }
796 if let Some(pair) = outcome.undeclared {
797 undeclared_seen.push(pair);
798 }
799 }
800 if let Err(e) = aggregate_untracked(undeclared_seen) {
801 report.errors.push(e);
802 }
803}
804
805/// Per-child Phase 1 dispatch — runs inside the rayon pool. The
806/// extracted fn keeps the parallel closure body small and gives the
807/// Lean axiom a single discoverable Rust contract anchor (this fn is
808/// the per-sibling unit of work the `sync_disjoint_commutes` axiom
809/// quantifies over).
810fn phase1_handle_child(
811 meta_dir: &Path,
812 child: &ChildRef,
813 backend: &dyn GitBackend,
814 opts: &SyncMetaOptions,
815) -> Phase1ChildOutcome {
816 let dest = meta_dir.join(child.effective_path());
817 // Every declared child IS in the manifest by construction —
818 // `declared_in_manifest = true` is the only correct call here.
819 let class = classify_dest(&dest, true, None);
820 let mut out = Phase1ChildOutcome {
821 classification: (meta_dir.to_path_buf(), dest.clone(), class),
822 error: None,
823 undeclared: None,
824 };
825 match class {
826 DestClass::Missing => {
827 if let Err(e) = phase1_clone(backend, child, &dest, opts) {
828 out.error = Some(e);
829 }
830 }
831 DestClass::PresentDeclared => {
832 if let Err(e) = phase1_fetch(backend, child, &dest, opts) {
833 out.error = Some(e);
834 }
835 }
836 DestClass::PresentDirty => {
837 // Conservative: leave the dirty tree untouched. The
838 // operator has uncommitted work; v1.2.0 walker policy
839 // is to never overwrite their bytes during Phase 1.
840 // Phase 2 will surface a refusal if the operator ALSO
841 // requested a prune of this path, but that's a
842 // separate decision made by the caller's lockfile-
843 // orphan computation.
844 }
845 DestClass::PresentInProgress => {
846 out.error = Some(TreeError::DirtyTreeRefusal {
847 path: dest.clone(),
848 kind: super::error::DirtyTreeRefusalKind::GitInProgress,
849 });
850 }
851 DestClass::PresentUndeclared => {
852 // Buffer for `aggregate_untracked` so we surface the
853 // FULL list in one error.
854 out.undeclared = Some((dest, class));
855 }
856 }
857 out
858}
859
860/// Phase 1 clone helper. Acquires the M6 `PackLock` on the prospective
861/// dest's parent (`meta_dir`) for the duration of the clone — distinct
862/// children clone serially within a meta to keep the scheduler-tier
863/// model honest. Sibling parallelism is a 1.j follow-up.
864fn phase1_clone(
865 backend: &dyn GitBackend,
866 child: &ChildRef,
867 dest: &Path,
868 opts: &SyncMetaOptions,
869) -> Result<(), TreeError> {
870 let effective_ref = opts.ref_override.as_deref().or(child.r#ref.as_deref());
871 // Make sure the dest's parent exists — the clone backend assumes
872 // it. v1.2.0 invariant 1 (boundary) and 1.c's `validate_children_paths`
873 // already ruled out a path that would escape `meta_dir`, so a
874 // simple `create_dir_all` on the parent is safe here.
875 if let Some(parent) = dest.parent() {
876 std::fs::create_dir_all(parent).map_err(|e| {
877 TreeError::ManifestRead(format!("failed to mkdir parent {}: {e}", parent.display()))
878 })?;
879 }
880 backend.clone(&child.url, dest, effective_ref)?;
881 Ok(())
882}
883
884/// Phase 1 fetch helper. Same locking discipline as `phase1_clone`.
885fn phase1_fetch(
886 backend: &dyn GitBackend,
887 child: &ChildRef,
888 dest: &Path,
889 opts: &SyncMetaOptions,
890) -> Result<(), TreeError> {
891 backend.fetch(dest)?;
892 let effective_ref = opts.ref_override.as_deref().or(child.r#ref.as_deref());
893 if let Some(r) = effective_ref {
894 backend.checkout(dest, r)?;
895 }
896 Ok(())
897}
898
899/// Phase 2: prune orphan lockfile entries. Each candidate is run
900/// through the consent-walk via `phase2_prune` (1.f); a `Clean` verdict
901/// removes the dest, anything else surfaces as an error. The orphan
902/// list is supplied by the caller — 1.h owns the lockfile-read side
903/// of the walker contract.
904fn phase2_prune_orphans(
905 meta_dir: &Path,
906 prune_candidates: &[PathBuf],
907 opts: &SyncMetaOptions,
908 report: &mut SyncMetaReport,
909) {
910 // v1.2.0 Stage 1.l — postmortem audit log path. Resolved once per
911 // meta from the canonical `<meta_dir>/.grex/events.jsonl` slot;
912 // `phase2_prune` only writes to it when an override flag actually
913 // consumed a non-Clean verdict (clean prunes never log).
914 let audit_log = crate::manifest::event_log_path(meta_dir);
915 for candidate in prune_candidates {
916 // Candidates are parent-relative POSIX paths
917 // (`LockEntry::validate_path` invariant from 1.b). Resolve
918 // against `meta_dir` to get the absolute dest.
919 let dest = meta_dir.join(candidate);
920 match phase2_prune(
921 &dest,
922 opts.force_prune,
923 opts.force_prune_with_ignored,
924 Some(audit_log.as_path()),
925 opts.quarantine.as_ref(),
926 ) {
927 Ok(()) => report.phase2_pruned.push(dest),
928 Err(e) => report.errors.push(e),
929 }
930 }
931}
932
933/// Per-child output from Phase 3's parallel recursion. Each variant
934/// carries either a successful sub-`SyncMetaReport` (folded into the
935/// caller via [`SyncMetaReport::merge`]) or a fatal error to push onto
936/// `report.errors`. Children whose dest does NOT carry a sub-meta
937/// produce `Skipped`.
938enum Phase3ChildOutcome {
939 Skipped,
940 Recursed(SyncMetaReport),
941 Failed(TreeError),
942}
943
944/// Phase 3: parallel recursion into child metas. A child qualifies for
945/// recursion when:
946///
947/// 1. `opts.recurse` is `true`,
948/// 2. `opts.max_depth` is unbounded OR the next-frame depth is
949/// strictly less than the cap,
950/// 3. `<dest>/.grex/pack.yaml` exists.
951///
952/// Sub-meta reports are merged into the parent's report via
953/// [`SyncMetaReport::merge`] so a top-level caller sees one rolled-up
954/// view of every frame's classifications + errors.
955///
956/// v1.2.1 item 3 — sibling-parallel via rayon `par_iter`. Each
957/// recursion frame builds its own thread pool inside `sync_meta_inner`
958/// (work-stealing across recursion levels happens naturally because
959/// the inner `pool.install` blocks for the lifetime of the inner
960/// sync_meta call; sibling sub-metas at level N execute in parallel
961/// via the level-N pool, and each level-N child carries its own
962/// level-(N+1) pool for its own grandchildren). Sub-reports are
963/// collected source-ordered via `collect_into_vec`, then folded into
964/// `report` sequentially to preserve deterministic ordering of the
965/// `phase1_classifications` / `phase2_pruned` / `errors` vectors.
966// Pre-rayon refactor this fn already carried 7 args (the clippy cap).
967// v1.2.1 item 3 added the `pool` reference, taking it to 8. Bundling
968// these into a context struct is technically possible but every other
969// arg already comes from `sync_meta_inner`'s param list, so the struct
970// would just shuffle the wiring without removing it. Localised allow
971// instead — the call-site is private to this module and threads
972// ownership of `pool` cleanly.
973/// Per-child Phase 3 dispatch — runs inside the rayon pool. Mirrors
974/// the `phase1_handle_child` / `sync_disjoint_commutes` discipline
975/// (one discoverable Rust contract anchor per sibling unit of work)
976/// and keeps `phase3_recurse` itself under the clippy line cap.
977///
978/// v1.2.2 — cycle detection lives here. `visited` is the ancestor
979/// identity chain from root down to (but excluding) this child. If
980/// the child's identity (`pack_identity_for_child`) is already in
981/// the chain we surface `TreeError::CycleDetected` with the chain
982/// extended by the recurring identity. Otherwise the child's
983/// identity is appended (clone-per-child, A.1) so disjoint sibling
984/// branches do not pollute each other's view.
985fn phase3_handle_child(
986 meta_dir: &Path,
987 child: &ChildRef,
988 backend: &dyn GitBackend,
989 loader: &dyn PackLoader,
990 opts: &SyncMetaOptions,
991 next_depth: usize,
992 visited: &[String],
993) -> Phase3ChildOutcome {
994 let dest = meta_dir.join(child.effective_path());
995 if !dest.join(".grex").join("pack.yaml").is_file() {
996 return Phase3ChildOutcome::Skipped;
997 }
998 // v1.2.2 cycle detection — discharges the
999 // `sync_meta_no_cycle_infinite_clone` Lean theorem in
1000 // `proof/Grex/Walker.lean`. Identity is `url@ref` so the same
1001 // repo at two different refs is two distinct packs (intentional:
1002 // matches `pack_identity_for_child` and the build_graph cycle
1003 // detector at `graph_build.rs:174`). A single `Vec<String>`
1004 // doubles as O(depth) contains-check AND deterministic chain for
1005 // error display — depth is bounded ~5-10 in practice so linear
1006 // scan beats hashing here.
1007 let id = pack_identity_for_child(child);
1008 if visited.iter().any(|v| v == &id) {
1009 let mut chain = visited.to_vec();
1010 chain.push(id);
1011 return Phase3ChildOutcome::Failed(TreeError::CycleDetected { chain });
1012 }
1013 // Clone-per-child (A.1): each rayon iteration owns its own
1014 // ancestor view, so disjoint sibling branches do not see each
1015 // other on the path. A diamond where two siblings legitimately
1016 // depend on the same descendant is therefore not a cycle.
1017 let mut child_visited = visited.to_vec();
1018 child_visited.push(id);
1019 // Empty `prune_candidates` for the sub-meta — 1.h supplies the
1020 // sub-meta's distributed lockfile read via the same caller
1021 // pathway when it lands.
1022 match sync_meta_inner(&dest, backend, loader, opts, &[], next_depth, &child_visited) {
1023 Ok(sub) => Phase3ChildOutcome::Recursed(sub),
1024 Err(e) => Phase3ChildOutcome::Failed(e),
1025 }
1026}
1027
1028#[allow(clippy::too_many_arguments)]
1029fn phase3_recurse(
1030 pool: &rayon::ThreadPool,
1031 meta_dir: &Path,
1032 manifest: &PackManifest,
1033 backend: &dyn GitBackend,
1034 loader: &dyn PackLoader,
1035 opts: &SyncMetaOptions,
1036 depth: usize,
1037 visited: &[String],
1038 report: &mut SyncMetaReport,
1039) -> Result<(), TreeError> {
1040 if !opts.recurse {
1041 return Ok(());
1042 }
1043 let next_depth = depth + 1;
1044 if let Some(cap) = opts.max_depth {
1045 if next_depth > cap {
1046 return Ok(());
1047 }
1048 }
1049 let outcomes: Vec<Phase3ChildOutcome> = pool.install(|| {
1050 manifest
1051 .children
1052 .par_iter()
1053 .map(|child| {
1054 phase3_handle_child(meta_dir, child, backend, loader, opts, next_depth, visited)
1055 })
1056 .collect()
1057 });
1058 // Cycle errors short-circuit (catastrophic — clone-storm risk);
1059 // every other outcome folds into the report per the existing
1060 // fail-loud-but-continue policy.
1061 let mut first_cycle_idx: Option<usize> = None;
1062 for outcome in outcomes {
1063 match outcome {
1064 Phase3ChildOutcome::Skipped => {}
1065 Phase3ChildOutcome::Recursed(sub) => report.merge(sub),
1066 Phase3ChildOutcome::Failed(e) => {
1067 // v1.2.2 fix: surface all sibling cycles in
1068 // report.errors; first cycle returned as short-circuit
1069 // Err per fail-loud policy.
1070 if matches!(e, TreeError::CycleDetected { .. }) && first_cycle_idx.is_none() {
1071 first_cycle_idx = Some(report.errors.len());
1072 }
1073 report.errors.push(e);
1074 }
1075 }
1076 }
1077 if let Some(idx) = first_cycle_idx {
1078 // Clone the cycle to return as the short-circuit Err while
1079 // leaving the original entry (and any sibling cycles) recorded
1080 // in report.errors for the caller to log/print.
1081 let TreeError::CycleDetected { chain } = &report.errors[idx] else {
1082 unreachable!("first_cycle_idx points at a CycleDetected variant by construction");
1083 };
1084 return Err(TreeError::CycleDetected { chain: chain.clone() });
1085 }
1086 Ok(())
1087}
1088
1089#[cfg(test)]
1090mod tests {
1091 use super::*;
1092
1093 /// Direct unit test of the synthesis helper — name must equal the
1094 /// child's `effective_path()`, type must be `Scripted`, and every
1095 /// list field must be empty.
1096 #[test]
1097 fn synthesize_plain_git_manifest_yields_leaf_scripted_pack() {
1098 let child = ChildRef {
1099 url: "https://example.com/algo-leet.git".to_string(),
1100 path: None,
1101 r#ref: None,
1102 };
1103 let manifest = synthesize_plain_git_manifest(&child);
1104 assert_eq!(manifest.name, child.effective_path());
1105 assert_eq!(manifest.name, "algo-leet");
1106 assert_eq!(manifest.r#type, PackType::Scripted);
1107 assert_eq!(manifest.schema_version.as_str(), "1");
1108 assert!(manifest.depends_on.is_empty());
1109 assert!(manifest.children.is_empty());
1110 assert!(manifest.actions.is_empty());
1111 assert!(manifest.teardown.is_none());
1112 assert!(manifest.extensions.is_empty());
1113 assert!(manifest.version.is_none());
1114 }
1115
1116 /// Explicit `path:` override wins over the URL-derived bare name —
1117 /// confirms the synthesised manifest's `name` mirrors what the
1118 /// parent declared, so `verify_child_name` passes by construction.
1119 #[test]
1120 fn synthesize_plain_git_manifest_honours_explicit_path() {
1121 let child = ChildRef {
1122 url: "https://example.com/some-repo.git".to_string(),
1123 path: Some("custom-name".to_string()),
1124 r#ref: None,
1125 };
1126 let manifest = synthesize_plain_git_manifest(&child);
1127 assert_eq!(manifest.name, "custom-name");
1128 }
1129
1130 /// `dest_has_git_repo` MUST refuse a symlinked destination — even
1131 /// when the symlink target carries a real `.git/` directory.
1132 /// Otherwise a malicious parent pack could redirect synthesis to
1133 /// fetch into `$HOME` (or any sibling repo) by relying on a
1134 /// pre-existing symlink in the workspace.
1135 #[test]
1136 fn dest_has_git_repo_rejects_symlinked_dest() {
1137 // Skip on platforms where unprivileged symlink creation fails
1138 // (notably Windows without Developer Mode). Failing the symlink
1139 // call is itself proof the attack vector is closed for that
1140 // host, so the rest of the test is moot.
1141 let outer = tempfile::tempdir().unwrap();
1142 let real = outer.path().join("real-repo");
1143 std::fs::create_dir_all(real.join(".git")).unwrap();
1144 let link = outer.path().join("via-link");
1145
1146 #[cfg(unix)]
1147 let symlink_result = std::os::unix::fs::symlink(&real, &link);
1148 #[cfg(windows)]
1149 let symlink_result = std::os::windows::fs::symlink_dir(&real, &link);
1150
1151 if symlink_result.is_err() {
1152 // Host won't let us create a symlink — nothing to test.
1153 return;
1154 }
1155
1156 // Sanity: following the symlink would reveal `.git`.
1157 assert!(link.join(".git").exists(), "symlink target should expose .git through traversal");
1158 // But `dest_has_git_repo` must refuse it.
1159 assert!(
1160 !dest_has_git_repo(&link),
1161 "dest_has_git_repo must refuse a symlinked destination even when target has .git"
1162 );
1163 // Real (non-symlinked) sibling still passes — we haven't
1164 // accidentally broken the happy path.
1165 assert!(dest_has_git_repo(&real));
1166 }
1167
1168 // -----------------------------------------------------------------
1169 // v1.2.0 Stage 1.g — `sync_meta` three-phase walker tests (TDD).
1170 //
1171 // These tests use a thin in-memory `MockLoader` plus
1172 // `MockGitBackend` so the walker's PHASE ORCHESTRATION (not the
1173 // backend mechanics) is what's being exercised. The git-touching
1174 // primitives `classify_dest` (1.e) and `phase2_prune` (1.f) have
1175 // their own per-host tests that already cover the real-FS-and-git
1176 // path. The `host_has_git_binary` gate guards the few tests that
1177 // need a working `git` to materialise a clean `PresentDeclared`
1178 // verdict — same precedent as the `dest_class::tests` host-skip
1179 // pattern.
1180 // -----------------------------------------------------------------
1181
1182 use std::collections::HashMap;
1183 use std::sync::Mutex;
1184
1185 /// Minimal stand-in `PackLoader` for the v1.2.0 tests. Maps
1186 /// `meta_dir` → `PackManifest` directly so we never touch disk
1187 /// for manifest reads.
1188 struct InMemLoader {
1189 manifests: HashMap<PathBuf, PackManifest>,
1190 }
1191
1192 impl InMemLoader {
1193 fn new() -> Self {
1194 Self { manifests: HashMap::new() }
1195 }
1196 fn with(mut self, dir: impl Into<PathBuf>, m: PackManifest) -> Self {
1197 self.manifests.insert(dir.into(), m);
1198 self
1199 }
1200 }
1201
1202 impl PackLoader for InMemLoader {
1203 fn load(&self, path: &Path) -> Result<PackManifest, TreeError> {
1204 self.manifests
1205 .get(path)
1206 .cloned()
1207 .ok_or_else(|| TreeError::ManifestNotFound(path.to_path_buf()))
1208 }
1209 }
1210
1211 /// Minimal stand-in `GitBackend`. Records every call so tests can
1212 /// assert phase orchestration. `clone` materialises a `.git/`
1213 /// under the supplied dest so subsequent classify probes treat the
1214 /// slot as Present.
1215 #[allow(dead_code)] // fields populated for future test introspection.
1216 #[derive(Debug, Clone)]
1217 enum BackendCall {
1218 Clone { url: String, dest: PathBuf, r#ref: Option<String> },
1219 Fetch { dest: PathBuf },
1220 Checkout { dest: PathBuf, r#ref: String },
1221 HeadSha { dest: PathBuf },
1222 }
1223
1224 struct InMemGit {
1225 calls: Mutex<Vec<BackendCall>>,
1226 materialise_on_clone: bool,
1227 }
1228
1229 impl InMemGit {
1230 fn new() -> Self {
1231 Self { calls: Mutex::new(Vec::new()), materialise_on_clone: true }
1232 }
1233 fn calls(&self) -> Vec<BackendCall> {
1234 self.calls.lock().unwrap().clone()
1235 }
1236 }
1237
1238 impl GitBackend for InMemGit {
1239 fn name(&self) -> &'static str {
1240 "v1_2_0-mock-git"
1241 }
1242 fn clone(
1243 &self,
1244 url: &str,
1245 dest: &Path,
1246 r#ref: Option<&str>,
1247 ) -> Result<crate::ClonedRepo, crate::GitError> {
1248 self.calls.lock().unwrap().push(BackendCall::Clone {
1249 url: url.to_string(),
1250 dest: dest.to_path_buf(),
1251 r#ref: r#ref.map(str::to_string),
1252 });
1253 if self.materialise_on_clone {
1254 std::fs::create_dir_all(dest.join(".git")).unwrap();
1255 }
1256 Ok(crate::ClonedRepo { path: dest.to_path_buf(), head_sha: "0".repeat(40) })
1257 }
1258 fn fetch(&self, dest: &Path) -> Result<(), crate::GitError> {
1259 self.calls.lock().unwrap().push(BackendCall::Fetch { dest: dest.to_path_buf() });
1260 Ok(())
1261 }
1262 fn checkout(&self, dest: &Path, r#ref: &str) -> Result<(), crate::GitError> {
1263 self.calls
1264 .lock()
1265 .unwrap()
1266 .push(BackendCall::Checkout { dest: dest.to_path_buf(), r#ref: r#ref.to_string() });
1267 Ok(())
1268 }
1269 fn head_sha(&self, dest: &Path) -> Result<String, crate::GitError> {
1270 self.calls.lock().unwrap().push(BackendCall::HeadSha { dest: dest.to_path_buf() });
1271 Ok("0".repeat(40))
1272 }
1273 }
1274
1275 /// Build a meta manifest with the supplied children.
1276 fn meta_manifest_with(name: &str, children: Vec<ChildRef>) -> PackManifest {
1277 PackManifest {
1278 schema_version: SchemaVersion::current(),
1279 name: name.to_string(),
1280 r#type: PackType::Meta,
1281 version: None,
1282 depends_on: Vec::new(),
1283 children,
1284 actions: Vec::new(),
1285 teardown: None,
1286 extensions: BTreeMap::new(),
1287 }
1288 }
1289
1290 fn child(url: &str, path: &str) -> ChildRef {
1291 ChildRef { url: url.to_string(), path: Some(path.to_string()), r#ref: None }
1292 }
1293
1294 fn host_has_git_binary() -> bool {
1295 std::process::Command::new("git")
1296 .arg("--version")
1297 .output()
1298 .is_ok_and(|o| o.status.success())
1299 }
1300
1301 /// Empty meta — no children → the walker returns Ok with no work.
1302 #[test]
1303 fn test_walker_v1_2_0_simple_meta_no_children() {
1304 let tmp = tempfile::tempdir().unwrap();
1305 let meta_dir = tmp.path().to_path_buf();
1306 let loader = InMemLoader::new().with(meta_dir.clone(), meta_manifest_with("solo", vec![]));
1307 let backend = InMemGit::new();
1308 let opts = SyncMetaOptions::default();
1309 let report = sync_meta(&meta_dir, &backend, &loader, &opts, &[]).expect("ok");
1310 assert_eq!(report.metas_visited, 1);
1311 assert!(report.phase1_classifications.is_empty());
1312 assert!(report.phase2_pruned.is_empty());
1313 assert!(report.errors.is_empty());
1314 assert!(backend.calls().is_empty(), "no children → no git ops");
1315 }
1316
1317 /// Phase 1 classifies each child. With every dest absent on disk,
1318 /// every classification is `Missing` and the backend sees one
1319 /// `Clone` per child.
1320 #[test]
1321 fn test_walker_v1_2_0_phase1_classifies_each_child() {
1322 let tmp = tempfile::tempdir().unwrap();
1323 let meta_dir = tmp.path().to_path_buf();
1324 let kids = vec![
1325 child("https://example.com/a.git", "alpha"),
1326 child("https://example.com/b.git", "beta"),
1327 ];
1328 let loader =
1329 InMemLoader::new().with(meta_dir.clone(), meta_manifest_with("root", kids.clone()));
1330 let backend = InMemGit::new();
1331 let opts = SyncMetaOptions { recurse: false, ..SyncMetaOptions::default() };
1332 let report = sync_meta(&meta_dir, &backend, &loader, &opts, &[]).expect("ok");
1333 assert_eq!(report.phase1_classifications.len(), 2);
1334 for (parent, _, class) in &report.phase1_classifications {
1335 assert_eq!(parent, &meta_dir);
1336 assert_eq!(*class, DestClass::Missing);
1337 }
1338 assert!(report.errors.is_empty());
1339 let calls = backend.calls();
1340 assert_eq!(calls.len(), 2, "one clone per child");
1341 for call in calls {
1342 assert!(matches!(call, BackendCall::Clone { .. }));
1343 }
1344 }
1345
1346 /// Phase 1 must aggregate every undeclared `.git/` directory it
1347 /// encounters into a single `UntrackedGitRepos` error. We
1348 /// pre-create two `.git/` slots BEFORE running `sync_meta` and
1349 /// declare them as siblings without paths matching — they classify
1350 /// as `PresentUndeclared` because the manifest does not list them.
1351 #[test]
1352 fn test_walker_v1_2_0_phase1_aggregates_untracked_error() {
1353 // Build a meta whose manifest declares ZERO children — every
1354 // pre-existing `.git/` slot is by definition undeclared.
1355 // Then drop two `.git/` directories under the meta dir and
1356 // (because v1.2.0's classifier needs the manifest declaration
1357 // signal at the call site, not on-disk discovery) run a
1358 // PARALLEL classifier sweep over the on-disk dirs to feed the
1359 // aggregator. This mirrors the way 1.h's lockfile-orphan
1360 // sweep will surface PresentUndeclared dirs into Phase 1's
1361 // collector when a child is removed from the manifest.
1362 let tmp = tempfile::tempdir().unwrap();
1363 let alpha = tmp.path().join("alpha");
1364 let beta = tmp.path().join("beta");
1365 std::fs::create_dir_all(alpha.join(".git")).unwrap();
1366 std::fs::create_dir_all(beta.join(".git")).unwrap();
1367 // Direct unit on the aggregator: feed two `PresentUndeclared`
1368 // pairs and assert the error carries both.
1369 let pairs: Vec<(PathBuf, DestClass)> = vec![
1370 (alpha.clone(), DestClass::PresentUndeclared),
1371 (beta.clone(), DestClass::PresentUndeclared),
1372 ];
1373 let err = aggregate_untracked(pairs).expect_err("two undeclared → error");
1374 match err {
1375 TreeError::UntrackedGitRepos { paths } => {
1376 assert_eq!(paths, vec![alpha, beta]);
1377 }
1378 other => panic!("expected UntrackedGitRepos, got {other:?}"),
1379 }
1380 }
1381
1382 /// Phase 2 prunes a clean orphan: the supplied candidate has a
1383 /// real `.git/` (initialised by `git init`), the consent walk
1384 /// returns Clean, the dest is removed.
1385 #[test]
1386 fn test_walker_v1_2_0_phase2_prunes_clean_orphans() {
1387 if !host_has_git_binary() {
1388 return;
1389 }
1390 let tmp = tempfile::tempdir().unwrap();
1391 let meta_dir = tmp.path().to_path_buf();
1392 // Create the orphan dest — clean repo, no manifest entry.
1393 let orphan = meta_dir.join("ghost");
1394 std::fs::create_dir_all(&orphan).unwrap();
1395 let init =
1396 std::process::Command::new("git").arg("-C").arg(&orphan).args(["init", "-q"]).status();
1397 if !matches!(init, Ok(s) if s.success()) {
1398 return;
1399 }
1400 let loader = InMemLoader::new().with(meta_dir.clone(), meta_manifest_with("root", vec![]));
1401 let backend = InMemGit::new();
1402 let opts = SyncMetaOptions { recurse: false, ..SyncMetaOptions::default() };
1403 let prune_list = vec![PathBuf::from("ghost")];
1404 let report = sync_meta(&meta_dir, &backend, &loader, &opts, &prune_list).expect("ok");
1405 assert_eq!(report.phase2_pruned.len(), 1, "clean orphan must be pruned");
1406 assert_eq!(report.phase2_pruned[0], orphan);
1407 assert!(!orphan.exists(), "dest must be removed after a clean prune");
1408 assert!(report.errors.is_empty());
1409 }
1410
1411 /// Phase 2 must REFUSE to prune a dirty orphan absent the override
1412 /// flag. The consent walk classifies it `DirtyTree`; the walker
1413 /// surfaces `DirtyTreeRefusal` and leaves the dest untouched.
1414 #[test]
1415 fn test_walker_v1_2_0_phase2_refuses_dirty_orphan() {
1416 if !host_has_git_binary() {
1417 return;
1418 }
1419 let tmp = tempfile::tempdir().unwrap();
1420 let meta_dir = tmp.path().to_path_buf();
1421 let orphan = meta_dir.join("dirty-ghost");
1422 std::fs::create_dir_all(&orphan).unwrap();
1423 let init =
1424 std::process::Command::new("git").arg("-C").arg(&orphan).args(["init", "-q"]).status();
1425 if !matches!(init, Ok(s) if s.success()) {
1426 return;
1427 }
1428 std::fs::write(orphan.join("scratch.txt"), b"unsaved").unwrap();
1429 let loader = InMemLoader::new().with(meta_dir.clone(), meta_manifest_with("root", vec![]));
1430 let backend = InMemGit::new();
1431 let opts = SyncMetaOptions { recurse: false, ..SyncMetaOptions::default() };
1432 let prune_list = vec![PathBuf::from("dirty-ghost")];
1433 let report = sync_meta(&meta_dir, &backend, &loader, &opts, &prune_list).expect("ok");
1434 assert!(report.phase2_pruned.is_empty(), "dirty orphan must NOT be pruned");
1435 assert!(orphan.exists(), "dest stays on disk when refused");
1436 assert_eq!(report.errors.len(), 1);
1437 assert!(matches!(report.errors[0], TreeError::DirtyTreeRefusal { .. }));
1438 }
1439
1440 /// Phase 3 recurses into a child meta when its `.grex/pack.yaml`
1441 /// exists. The sub-meta's own `metas_visited` is folded into the
1442 /// parent's report.
1443 #[test]
1444 fn test_walker_v1_2_0_phase3_recurses_into_sub_meta() {
1445 let tmp = tempfile::tempdir().unwrap();
1446 let meta_dir = tmp.path().to_path_buf();
1447 let child_dest = meta_dir.join("sub");
1448 // Pre-materialise the sub-meta on disk so Phase 1 classifies
1449 // the dest as PresentDeclared (no clone fired) and Phase 3
1450 // sees a `.grex/pack.yaml` to recurse into.
1451 make_sub_meta_on_disk(&child_dest, "sub");
1452 let loader = InMemLoader::new()
1453 .with(
1454 meta_dir.clone(),
1455 meta_manifest_with("root", vec![child("https://example.com/sub.git", "sub")]),
1456 )
1457 .with(child_dest.clone(), meta_manifest_with("sub", vec![]));
1458 let backend = InMemGit::new();
1459 let opts = SyncMetaOptions::default();
1460 let report = sync_meta(&meta_dir, &backend, &loader, &opts, &[]).expect("ok");
1461 assert_eq!(report.metas_visited, 2, "parent + sub-meta visited");
1462 assert!(report.errors.is_empty());
1463 }
1464
1465 /// `recurse: false` skips Phase 3 entirely — `metas_visited == 1`
1466 /// even when a child has a `.grex/pack.yaml`.
1467 #[test]
1468 fn test_walker_v1_2_0_phase3_max_depth_zero_skips_recursion() {
1469 let tmp = tempfile::tempdir().unwrap();
1470 let meta_dir = tmp.path().to_path_buf();
1471 let child_dest = meta_dir.join("sub");
1472 make_sub_meta_on_disk(&child_dest, "sub");
1473 let loader = InMemLoader::new()
1474 .with(
1475 meta_dir.clone(),
1476 meta_manifest_with("root", vec![child("https://example.com/sub.git", "sub")]),
1477 )
1478 .with(child_dest.clone(), meta_manifest_with("sub", vec![]));
1479 let backend = InMemGit::new();
1480 let opts = SyncMetaOptions { recurse: false, ..SyncMetaOptions::default() };
1481 let report = sync_meta(&meta_dir, &backend, &loader, &opts, &[]).expect("ok");
1482 assert_eq!(report.metas_visited, 1, "no recursion → only the root meta");
1483 }
1484
1485 /// `max_depth: Some(N)` caps recursion at N levels of nesting.
1486 /// Build a 3-level chain (root → mid → leaf) and assert
1487 /// `max_depth: Some(1)` visits root + mid (depth 0 + 1) but NOT
1488 /// leaf (depth 2).
1489 #[test]
1490 fn test_walker_v1_2_0_phase3_max_depth_n_stops_at_n_levels() {
1491 let tmp = tempfile::tempdir().unwrap();
1492 let root_dir = tmp.path().to_path_buf();
1493 let mid_dir = root_dir.join("mid");
1494 let leaf_dir = mid_dir.join("leaf");
1495 make_sub_meta_on_disk(&mid_dir, "mid");
1496 make_sub_meta_on_disk(&leaf_dir, "leaf");
1497 let loader = InMemLoader::new()
1498 .with(
1499 root_dir.clone(),
1500 meta_manifest_with("root", vec![child("https://example.com/mid.git", "mid")]),
1501 )
1502 .with(
1503 mid_dir.clone(),
1504 meta_manifest_with("mid", vec![child("https://example.com/leaf.git", "leaf")]),
1505 )
1506 .with(leaf_dir.clone(), meta_manifest_with("leaf", vec![]));
1507 let backend = InMemGit::new();
1508 let opts = SyncMetaOptions { max_depth: Some(1), ..SyncMetaOptions::default() };
1509 let report = sync_meta(&root_dir, &backend, &loader, &opts, &[]).expect("ok");
1510 // depth 0 = root, depth 1 = mid → max_depth: Some(1) visits
1511 // root + mid (2 metas) and stops before recursing into leaf.
1512 assert_eq!(report.metas_visited, 2, "max_depth: Some(1) visits root + mid only");
1513 }
1514
1515 /// Helper: pre-populate a sub-meta directory at `dir` with a
1516 /// `.grex/pack.yaml` carrying `name` and a stub `.git/` so the
1517 /// classifier sees it as PresentDeclared.
1518 fn make_sub_meta_on_disk(dir: &Path, name: &str) {
1519 std::fs::create_dir_all(dir.join(".grex")).unwrap();
1520 std::fs::create_dir_all(dir.join(".git")).unwrap();
1521 let yaml = format!("schema_version: \"1\"\nname: {name}\ntype: meta\n");
1522 std::fs::write(dir.join(".grex/pack.yaml"), yaml).unwrap();
1523 }
1524
1525 /// Helper: collect the destinations Phase 1 recorded for a given
1526 /// parent meta from the rolled-up report.
1527 fn destinations_under(report: &SyncMetaReport, parent: &Path) -> Vec<PathBuf> {
1528 report
1529 .phase1_classifications
1530 .iter()
1531 .filter(|(p, _, _)| p == parent)
1532 .map(|(_, d, _)| d.clone())
1533 .collect()
1534 }
1535
1536 /// Parent-relative path resolution: a child declared at the root
1537 /// meta resolves to `<root>/<child>` — NOT to a global workspace
1538 /// anchor. Recursion into that child uses `<root>/<child>` as the
1539 /// new parent meta dir for resolving the grandchild.
1540 #[test]
1541 fn test_walker_v1_2_0_parent_relative_path_resolution() {
1542 let tmp = tempfile::tempdir().unwrap();
1543 let root_dir = tmp.path().to_path_buf();
1544 // Note: 1.c's path-segment validator forbids slashes in the
1545 // `path:` field, so multi-segment nesting is achieved by
1546 // chaining single-segment children across recursion frames.
1547 let tools_dir = root_dir.join("tools");
1548 let foo_dir = tools_dir.join("foo");
1549 make_sub_meta_on_disk(&tools_dir, "tools");
1550 make_sub_meta_on_disk(&foo_dir, "foo");
1551 let loader = InMemLoader::new()
1552 .with(
1553 root_dir.clone(),
1554 meta_manifest_with("root", vec![child("https://example.com/tools.git", "tools")]),
1555 )
1556 .with(
1557 tools_dir.clone(),
1558 meta_manifest_with("tools", vec![child("https://example.com/foo.git", "foo")]),
1559 )
1560 .with(foo_dir.clone(), meta_manifest_with("foo", vec![]));
1561 let backend = InMemGit::new();
1562 let opts = SyncMetaOptions::default();
1563 let report = sync_meta(&root_dir, &backend, &loader, &opts, &[]).expect("ok");
1564 // Three metas visited: root → tools → foo.
1565 assert_eq!(report.metas_visited, 3);
1566 // Phase 1 classifications confirm parent-relative resolution:
1567 // every recorded dest is a SUBDIR of its recorded parent.
1568 for (parent, dest, _class) in &report.phase1_classifications {
1569 assert!(
1570 dest.starts_with(parent),
1571 "child dest {} must descend from parent {}",
1572 dest.display(),
1573 parent.display()
1574 );
1575 }
1576 // Spot-check the chain: root sees `tools`, tools sees `foo`.
1577 assert_eq!(destinations_under(&report, &root_dir), vec![tools_dir.clone()]);
1578 assert_eq!(destinations_under(&report, &tools_dir), vec![foo_dir.clone()]);
1579 }
1580
1581 // -----------------------------------------------------------------
1582 // v1.2.2 — `sync_meta` cycle detection (Phase 3 recursion edge).
1583 //
1584 // Discharges `sync_meta_no_cycle_infinite_clone` in
1585 // `proof/Grex/Walker.lean`. Identity scheme is `url@ref` so the
1586 // same repo at two different refs is NOT a cycle (covered by the
1587 // positive case below).
1588 // -----------------------------------------------------------------
1589
1590 /// `child_with_ref` mirrors `child()` but lets the caller pin a
1591 /// specific ref so two children of the same URL get distinct
1592 /// `pack_identity_for_child` strings (`url@ref`).
1593 fn child_with_ref(url: &str, path: &str, r#ref: &str) -> ChildRef {
1594 ChildRef {
1595 url: url.to_string(),
1596 path: Some(path.to_string()),
1597 r#ref: Some(r#ref.to_string()),
1598 }
1599 }
1600
1601 /// Self-loop: pack A declares itself (same URL, no ref) as a child.
1602 /// The walker must abort with `CycleDetected` rather than recurse
1603 /// infinitely. The chain reports the recurring identity.
1604 #[test]
1605 fn cycle_self_loop_aborts() {
1606 let tmp = tempfile::tempdir().unwrap();
1607 let root_dir = tmp.path().to_path_buf();
1608 // Lay out a self-pointing pack: `<root>/a` is a sub-meta whose
1609 // own manifest declares a child with the SAME URL/ref pointing
1610 // back at itself (placed at a fresh path so on-disk dest is
1611 // distinct, but pack identity collides).
1612 let a_dir = root_dir.join("a");
1613 let a_self_dir = a_dir.join("a");
1614 make_sub_meta_on_disk(&a_dir, "a");
1615 make_sub_meta_on_disk(&a_self_dir, "a");
1616 let url_a = "https://example.com/a.git";
1617 let loader = InMemLoader::new()
1618 .with(root_dir.clone(), meta_manifest_with("root", vec![child(url_a, "a")]))
1619 // `a` declares itself — same url, same (empty) ref → same identity.
1620 .with(a_dir.clone(), meta_manifest_with("a", vec![child(url_a, "a")]))
1621 .with(a_self_dir.clone(), meta_manifest_with("a", vec![]));
1622 let backend = InMemGit::new();
1623 let opts = SyncMetaOptions::default();
1624 let err =
1625 sync_meta(&root_dir, &backend, &loader, &opts, &[]).expect_err("self-loop must abort");
1626 match err {
1627 TreeError::CycleDetected { chain } => {
1628 // Chain must end in the recurring identity. The
1629 // outermost frame is the root (no entry — root carries
1630 // no `url` identity in this scheme), so chain length
1631 // is 2: `[a@, a@]` — first push when entering `a` from
1632 // root, second push when `a` tries to enter itself.
1633 assert!(
1634 chain.iter().any(|s| s == &format!("url:{url_a}@")),
1635 "chain must mention the cyclic url, got {chain:?}"
1636 );
1637 assert!(chain.len() >= 2, "self-loop chain has at least 2 entries: {chain:?}");
1638 let last = chain.last().unwrap();
1639 let first_match = chain.iter().position(|s| s == last).unwrap();
1640 assert!(
1641 first_match < chain.len() - 1,
1642 "the recurring identity must appear earlier in the chain: {chain:?}"
1643 );
1644 }
1645 other => panic!("expected CycleDetected, got {other:?}"),
1646 }
1647 }
1648
1649 /// Three-node cycle: A → B → C → A. The walker must abort with
1650 /// `CycleDetected` and the chain must list all three identities
1651 /// in the order they were entered, ending with the recurring A.
1652 #[test]
1653 fn cycle_three_node_aborts() {
1654 let tmp = tempfile::tempdir().unwrap();
1655 let root_dir = tmp.path().to_path_buf();
1656 // Disk layout: root → a → b → c → a (the second `a` lives at
1657 // a fresh on-disk slot so classification succeeds; identity
1658 // collision is what trips the cycle detector, not the path).
1659 let a_dir = root_dir.join("a");
1660 let b_dir = a_dir.join("b");
1661 let c_dir = b_dir.join("c");
1662 let a2_dir = c_dir.join("a");
1663 make_sub_meta_on_disk(&a_dir, "a");
1664 make_sub_meta_on_disk(&b_dir, "b");
1665 make_sub_meta_on_disk(&c_dir, "c");
1666 make_sub_meta_on_disk(&a2_dir, "a");
1667 let url_a = "https://example.com/a.git";
1668 let url_b = "https://example.com/b.git";
1669 let url_c = "https://example.com/c.git";
1670 let loader = InMemLoader::new()
1671 .with(root_dir.clone(), meta_manifest_with("root", vec![child(url_a, "a")]))
1672 .with(a_dir.clone(), meta_manifest_with("a", vec![child(url_b, "b")]))
1673 .with(b_dir.clone(), meta_manifest_with("b", vec![child(url_c, "c")]))
1674 // c re-declares a → cycle.
1675 .with(c_dir.clone(), meta_manifest_with("c", vec![child(url_a, "a")]))
1676 .with(a2_dir.clone(), meta_manifest_with("a", vec![]));
1677 let backend = InMemGit::new();
1678 let opts = SyncMetaOptions::default();
1679 let err = sync_meta(&root_dir, &backend, &loader, &opts, &[])
1680 .expect_err("three-node cycle must abort");
1681 match err {
1682 TreeError::CycleDetected { chain } => {
1683 // Chain order: a, b, c, a (in entry order, with the
1684 // recurring `a` appended at the cycle-detection point).
1685 let id_a = format!("url:{url_a}@");
1686 let id_b = format!("url:{url_b}@");
1687 let id_c = format!("url:{url_c}@");
1688 assert_eq!(chain, vec![id_a.clone(), id_b, id_c, id_a]);
1689 }
1690 other => panic!("expected CycleDetected, got {other:?}"),
1691 }
1692 }
1693
1694 /// Same repo, two refs — NOT a cycle. Pack A declares two children
1695 /// pointing at the SAME URL but pinned to different refs (`main`
1696 /// vs `dev`). Identity scheme is `url@ref` so the two siblings
1697 /// have distinct identities and the walker must succeed.
1698 #[test]
1699 fn same_repo_two_refs_no_cycle() {
1700 let tmp = tempfile::tempdir().unwrap();
1701 let root_dir = tmp.path().to_path_buf();
1702 let main_dir = root_dir.join("b-main");
1703 let dev_dir = root_dir.join("b-dev");
1704 make_sub_meta_on_disk(&main_dir, "b-main");
1705 make_sub_meta_on_disk(&dev_dir, "b-dev");
1706 let url_b = "https://example.com/b.git";
1707 let loader = InMemLoader::new()
1708 .with(
1709 root_dir.clone(),
1710 meta_manifest_with(
1711 "root",
1712 vec![
1713 child_with_ref(url_b, "b-main", "main"),
1714 child_with_ref(url_b, "b-dev", "dev"),
1715 ],
1716 ),
1717 )
1718 .with(main_dir.clone(), meta_manifest_with("b-main", vec![]))
1719 .with(dev_dir.clone(), meta_manifest_with("b-dev", vec![]));
1720 let backend = InMemGit::new();
1721 let opts = SyncMetaOptions::default();
1722 let report = sync_meta(&root_dir, &backend, &loader, &opts, &[])
1723 .expect("same url at distinct refs is NOT a cycle");
1724 // Three metas visited: root + b@main + b@dev.
1725 assert_eq!(report.metas_visited, 3);
1726 assert!(
1727 report.errors.is_empty(),
1728 "no errors expected when the two children differ only by ref: {:?}",
1729 report.errors
1730 );
1731 }
1732
1733 /// Same repo, two refs — NESTED (ancestor-stack) variant. Pack A
1734 /// (URL=foo, ref=main) declares pack B (URL=foo, ref=dev) as its
1735 /// child. Identity scheme is `url@ref`, so A's identity
1736 /// (`url:foo@main`) and B's identity (`url:foo@dev`) differ. The
1737 /// cycle detector must NOT trip even though B's URL collides with
1738 /// an ancestor on the stack — exercises the path the sibling
1739 /// variant above doesn't reach.
1740 #[test]
1741 fn same_repo_two_refs_nested_no_cycle() {
1742 let tmp = tempfile::tempdir().unwrap();
1743 let root_dir = tmp.path().to_path_buf();
1744 let a_dir = root_dir.join("a");
1745 let b_dir = a_dir.join("b");
1746 make_sub_meta_on_disk(&a_dir, "a");
1747 make_sub_meta_on_disk(&b_dir, "b");
1748 let url_foo = "https://example.com/foo.git";
1749 let loader = InMemLoader::new()
1750 .with(
1751 root_dir.clone(),
1752 meta_manifest_with("root", vec![child_with_ref(url_foo, "a", "main")]),
1753 )
1754 // a (foo@main) declares b (foo@dev) — same URL, different ref.
1755 .with(a_dir.clone(), meta_manifest_with("a", vec![child_with_ref(url_foo, "b", "dev")]))
1756 .with(b_dir.clone(), meta_manifest_with("b", vec![]));
1757 let backend = InMemGit::new();
1758 let opts = SyncMetaOptions::default();
1759 let report = sync_meta(&root_dir, &backend, &loader, &opts, &[])
1760 .expect("nested same-url at distinct refs is NOT a cycle");
1761 // Walker must reach depth 2: root → a → b (3 metas).
1762 assert_eq!(report.metas_visited, 3, "walker must recurse to depth 2");
1763 assert!(
1764 report.errors.is_empty(),
1765 "no errors expected when ancestor and descendant differ only by ref: {:?}",
1766 report.errors
1767 );
1768 }
1769}