grex_core/tree/walker.rs
1//! Recursive pack-tree walker.
2//!
3//! The walker hydrates a `pack.yaml` tree: it loads the root manifest, clones
4//! (or fetches + checks out) every `children:` entry via the injected
5//! [`GitBackend`], and recurses. `depends_on` entries are recorded as edges
6//! but never walked — they are *external prereqs* verified by
7//! [`crate::pack::validate::DependsOnValidator`] after the graph is built.
8//!
9//! # Cycle detection
10//!
11//! Cycles are detected **during** the walk, not post-hoc. Each recursion
12//! maintains a walk stack of pack identifiers (source-url when present,
13//! otherwise the canonical on-disk path). If a child is about to be entered
14//! whose identifier is already on the stack, the walker short-circuits with
15//! [`TreeError::CycleDetected`]. A separate `CycleValidator` runs
16//! post-hoc as a belt-and-suspenders check so manually-constructed graphs
17//! cannot sneak through.
18//!
19//! # Cyclomatic discipline
20//!
21//! The walk is decomposed so each helper stays well under CC 15:
22//! `walk` → `walk_recursive` → `process_children` → `handle_child` →
23//! `resolve_destination` | `record_depends_on`.
24
25use std::collections::BTreeMap;
26use std::path::{Path, PathBuf};
27
28use crate::git::GitBackend;
29use crate::pack::validate::child_path::{
30 boundary_fs_reject_reason, boundary_reject_reason, check_one as check_child_path,
31 nfc_duplicate_path,
32};
33use crate::pack::{ChildRef, PackManifest, PackType, PackValidationError, SchemaVersion};
34
35use super::consent::phase2_prune;
36use super::dest_class::{aggregate_untracked, classify_dest, DestClass};
37use super::error::TreeError;
38use super::graph::{EdgeKind, PackEdge, PackGraph, PackNode};
39use super::loader::PackLoader;
40
41/// Recursive walker. Composes a [`PackLoader`] (for manifests) with a
42/// [`GitBackend`] (for child hydration).
43///
44/// The walker owns no state across calls: each invocation of [`Walker::walk`]
45/// produces a fresh [`PackGraph`] and leaves no footprint.
46pub struct Walker<'a> {
47 loader: &'a dyn PackLoader,
48 backend: &'a dyn GitBackend,
49 workspace: PathBuf,
50 /// Optional global ref override (M4-D `grex sync --ref <sha|branch|tag>`).
51 /// When `Some`, every child clone/checkout uses this ref instead of the
52 /// declared `child.ref` from the parent manifest. `None` preserves M3
53 /// semantics.
54 ref_override: Option<String>,
55}
56
57impl<'a> Walker<'a> {
58 /// Construct a new walker.
59 ///
60 /// `workspace` is the directory under which child packs will be cloned,
61 /// using each [`ChildRef::effective_path`] as the sub-directory name.
62 #[must_use]
63 pub fn new(
64 loader: &'a dyn PackLoader,
65 backend: &'a dyn GitBackend,
66 workspace: PathBuf,
67 ) -> Self {
68 Self { loader, backend, workspace, ref_override: None }
69 }
70
71 /// Set a global ref override applied to every child pack.
72 ///
73 /// Surfaced as `grex sync --ref <sha|branch|tag>` (M4-D). The override
74 /// replaces each child's declared `ref` in its parent manifest. An
75 /// empty string is treated as "no override" — callers should reject
76 /// empty values at the CLI layer before reaching this point.
77 #[must_use]
78 pub fn with_ref_override(mut self, r#ref: Option<String>) -> Self {
79 self.ref_override = r#ref.filter(|s| !s.is_empty());
80 self
81 }
82
83 /// Walk the tree rooted at `root_pack_path`, returning the fully
84 /// hydrated graph.
85 ///
86 /// # Errors
87 ///
88 /// Returns [`TreeError`] on any loader, git, cycle, or name-mismatch
89 /// failure. The walk aborts on the first failure — the spec-level
90 /// "fail loud, fail fast" default.
91 pub fn walk(&self, root_pack_path: &Path) -> Result<PackGraph, TreeError> {
92 let mut state = BuildState::default();
93 let root_manifest = self.loader.load(root_pack_path)?;
94 // Pre-walk path-traversal gate: reject any malicious
95 // `children[].path` (or URL-derived tail) BEFORE any clone fires.
96 // Closes the v1.1.0 flat-sibling exploit window where a `path:
97 // ../escape` would materialise a child outside the pack root
98 // before plan-phase validation could see it.
99 validate_children_paths(&root_manifest)?;
100 let root_commit_sha = probe_head_sha(self.backend, root_pack_path);
101 let root_id = state.push_node(PackNode {
102 id: 0,
103 name: root_manifest.name.clone(),
104 path: root_pack_path.to_path_buf(),
105 source_url: None,
106 manifest: root_manifest.clone(),
107 parent: None,
108 commit_sha: root_commit_sha,
109 synthetic: false,
110 });
111 let root_identity = pack_identity_for_root(root_pack_path);
112 self.walk_recursive(root_id, &root_manifest, &mut state, &mut vec![root_identity])?;
113 Ok(PackGraph::new(state.nodes, state.edges))
114 }
115
116 /// Recursive step. `stack` carries the pack identifiers currently on
117 /// the walk path — pushed on entry, popped on return.
118 ///
119 /// Each loaded manifest's `children[]` is path-traversal-validated
120 /// before any of those children are resolved on disk; the entry
121 /// point pre-validates the root manifest, so by the time
122 /// `walk_recursive` runs for a child, that child's own `children[]`
123 /// is what needs gating before the next descent.
124 fn walk_recursive(
125 &self,
126 parent_id: usize,
127 manifest: &PackManifest,
128 state: &mut BuildState,
129 stack: &mut Vec<String>,
130 ) -> Result<(), TreeError> {
131 self.record_depends_on(parent_id, manifest, state);
132 self.process_children(parent_id, manifest, state, stack)
133 }
134
135 /// Record one `DependsOn` edge per `depends_on` entry. Resolution
136 /// against actual graph nodes happens later in `DependsOnValidator`.
137 /// We emit edges only where the target already exists in the graph so
138 /// the edge list stays in-bounds; unresolved deps are surfaced by the
139 /// validator, not carried as dangling edges.
140 fn record_depends_on(&self, parent_id: usize, manifest: &PackManifest, state: &mut BuildState) {
141 for dep in &manifest.depends_on {
142 if let Some(to) = find_node_id_by_name_or_url(&state.nodes, dep) {
143 state.edges.push(PackEdge { from: parent_id, to, kind: EdgeKind::DependsOn });
144 }
145 }
146 }
147
148 fn process_children(
149 &self,
150 parent_id: usize,
151 manifest: &PackManifest,
152 state: &mut BuildState,
153 stack: &mut Vec<String>,
154 ) -> Result<(), TreeError> {
155 for child in &manifest.children {
156 self.handle_child(parent_id, child, state, stack)?;
157 }
158 Ok(())
159 }
160
161 fn handle_child(
162 &self,
163 parent_id: usize,
164 child: &ChildRef,
165 state: &mut BuildState,
166 stack: &mut Vec<String>,
167 ) -> Result<(), TreeError> {
168 let identity = pack_identity_for_child(child);
169 if stack.iter().any(|s| s == &identity) {
170 let mut chain = stack.clone();
171 chain.push(identity);
172 return Err(TreeError::CycleDetected { chain });
173 }
174 // v1.2.0 Stage 1.c: FS-resident boundary check fires BEFORE
175 // any clone / fetch. Junctions, reparse points, and
176 // `.git`-as-file (gitfile redirect) all re-open the
177 // parent-boundary escape that the syntactic gate closes on
178 // the path string itself; running the check on the prospective
179 // dest path means a hostile pre-existing slot is rejected
180 // before the GitBackend writes anything into (or through) it.
181 // The prospective path is reconstructed here so the helper
182 // can interrogate the slot before `resolve_destination`
183 // materialises a clone — pre-clone runs return `Ok(())` because
184 // the slot doesn't exist yet, and the walk continues normally.
185 let prospective_dest = self.workspace.join(child.effective_path());
186 check_dest_boundary(&prospective_dest, &child.effective_path())?;
187 let dest = self.resolve_destination(child, state)?;
188 // v1.1.1 plain-git children: when the destination has no
189 // `.grex/pack.yaml` but does carry a `.git/`, synthesize a
190 // leaf scripted-no-hooks manifest in-memory rather than
191 // aborting. See
192 // `openspec/changes/feat-v1.1.1-plain-git-children/design.md`
193 // §"Synthesis algorithm".
194 let (child_manifest, is_synthetic) = match self.loader.load(&dest) {
195 Ok(m) => (m, false),
196 Err(TreeError::ManifestNotFound(_)) if dest_has_git_repo(&dest) => {
197 (synthesize_plain_git_manifest(child), true)
198 }
199 Err(e) => return Err(e),
200 };
201 verify_child_name(&child_manifest.name, child, &dest)?;
202 // Validate this child's own `children[]` before its descent
203 // resolves any of them on disk. Mirrors the root-manifest gate
204 // in `walk`; together they ensure no clone can fire for a
205 // grandchild whose parent declared a traversal-bearing path.
206 validate_children_paths(&child_manifest)?;
207
208 let commit_sha = probe_head_sha(self.backend, &dest);
209 let child_id = state.push_node(PackNode {
210 id: state.nodes.len(),
211 name: child_manifest.name.clone(),
212 path: dest.clone(),
213 source_url: Some(child.url.clone()),
214 manifest: child_manifest.clone(),
215 parent: Some(parent_id),
216 commit_sha,
217 synthetic: is_synthetic,
218 });
219 state.edges.push(PackEdge { from: parent_id, to: child_id, kind: EdgeKind::Child });
220
221 stack.push(identity);
222 let result = self.walk_recursive(child_id, &child_manifest, state, stack);
223 stack.pop();
224 result
225 }
226
227 /// Decide where `child` lives on disk and ensure the working tree is
228 /// in the expected state: clone if absent, fetch + optional checkout
229 /// if present.
230 fn resolve_destination(
231 &self,
232 child: &ChildRef,
233 _state: &mut BuildState,
234 ) -> Result<PathBuf, TreeError> {
235 let dest = self.workspace.join(child.effective_path());
236 // M4-D: `ref_override` wins over the parent-declared `child.ref`.
237 // Falls back to the declared ref when no override is active.
238 let effective_ref = self.ref_override.as_deref().or(child.r#ref.as_deref());
239 if dest_has_git_repo(&dest) {
240 self.backend.fetch(&dest)?;
241 if let Some(r) = effective_ref {
242 self.backend.checkout(&dest, r)?;
243 }
244 } else {
245 self.backend.clone(&child.url, &dest, effective_ref)?;
246 }
247 Ok(dest)
248 }
249}
250
251/// Best-effort HEAD probe. Returns `None` when the target is not a git
252/// repository or the backend refuses — the root of a declarative pack is
253/// often a plain directory, so this must not fail the walk.
254///
255/// Non-`.git` directories short-circuit silently (truly not a git
256/// repo). Backend errors on an actual `.git` directory are surfaced as
257/// a `tracing::warn!` log line so transient gix failures / ACL-denied
258/// `.git` reads do not silently degrade into an empty `commit_sha`
259/// without any operator signal. The walker continues with `None` — a
260/// best-effort probe is, by construction, allowed to fail.
261fn probe_head_sha(backend: &dyn GitBackend, path: &Path) -> Option<String> {
262 let dir =
263 if path.extension().and_then(|e| e.to_str()).is_some_and(|e| matches!(e, "yaml" | "yml")) {
264 path.parent()
265 .and_then(Path::parent)
266 .map_or_else(|| path.to_path_buf(), Path::to_path_buf)
267 } else {
268 path.to_path_buf()
269 };
270 if !dir.join(".git").exists() {
271 return None;
272 }
273 match backend.head_sha(&dir) {
274 Ok(s) => Some(s),
275 Err(e) => {
276 tracing::warn!(
277 target: "grex::walker",
278 "HEAD probe failed for {}: {e}",
279 dir.display()
280 );
281 None
282 }
283 }
284}
285
286/// Mutable state threaded through the walk. Private to this module so only
287/// the walker can grow the graph.
288#[derive(Default)]
289struct BuildState {
290 nodes: Vec<PackNode>,
291 edges: Vec<PackEdge>,
292}
293
294impl BuildState {
295 fn push_node(&mut self, node: PackNode) -> usize {
296 let id = node.id;
297 self.nodes.push(node);
298 id
299 }
300}
301
302/// Identity string used by the cycle detector for the root pack.
303fn pack_identity_for_root(path: &Path) -> String {
304 format!("path:{}", path.display())
305}
306
307/// Identity string for a child — url+ref so the same repo at two different
308/// refs is considered distinct. This matches git semantics and avoids
309/// false-positive cycle detections for diamond dependencies on different
310/// tags.
311fn pack_identity_for_child(child: &ChildRef) -> String {
312 let rref = child.r#ref.as_deref().unwrap_or("");
313 format!("url:{}@{}", child.url, rref)
314}
315
316/// Shallow on-disk check: a `.git` entry (file or dir) signals an existing
317/// working tree. We deliberately do not open the repo here — that's the
318/// backend's job via `fetch`/`checkout`.
319///
320/// # Symlink safety
321///
322/// `dest` itself MUST NOT be a symlink. If it is, this function returns
323/// `false` regardless of whether the symlink target carries a `.git`
324/// entry. This refusal closes a synthesis-redirection attack: a parent
325/// pack declaring `path: code` against a workspace where the user
326/// happens to have `<workspace>/code -> $HOME` would otherwise let the
327/// walker treat `$HOME/.git` as a "plain-git child" and operate on an
328/// unrelated tree. The check uses [`std::fs::symlink_metadata`] so the
329/// link itself — not its target — is interrogated.
330pub fn dest_has_git_repo(dest: &Path) -> bool {
331 // Reject symlinked destinations outright. `symlink_metadata` does
332 // NOT follow the link, so a broken or path-traversing symlink is
333 // treated as untrusted regardless of its target.
334 if let Ok(meta) = std::fs::symlink_metadata(dest) {
335 if meta.file_type().is_symlink() {
336 return false;
337 }
338 }
339 dest.join(".git").exists()
340}
341
342/// Build the in-memory manifest used for v1.1.1 plain-git children — a
343/// leaf scripted pack with no hooks, no children, no actions. Activated
344/// at the walker's load-fallback boundary when a child has a `.git/`
345/// but no `.grex/pack.yaml`. See
346/// `openspec/changes/feat-v1.1.1-plain-git-children/design.md`.
347pub fn synthesize_plain_git_manifest(child: &ChildRef) -> PackManifest {
348 PackManifest {
349 schema_version: SchemaVersion::current(),
350 name: child.effective_path(),
351 r#type: PackType::Scripted,
352 version: None,
353 depends_on: Vec::new(),
354 children: Vec::new(),
355 actions: Vec::new(),
356 teardown: None,
357 extensions: BTreeMap::new(),
358 }
359}
360
361/// Enforce that the cloned child's pack.yaml name matches what the parent
362/// declared. The parent-side expectation is the child entry's
363/// [`ChildRef::effective_path`] — the directory name in the workspace.
364fn verify_child_name(got: &str, child: &ChildRef, dest: &Path) -> Result<(), TreeError> {
365 let expected = child.effective_path();
366 if got == expected {
367 return Ok(());
368 }
369 Err(TreeError::PackNameMismatch { got: got.to_string(), expected, path: dest.to_path_buf() })
370}
371
372/// Resolve a `depends_on` entry (URL or bare name) against nodes already
373/// recorded. Returns the node id on a hit, `None` otherwise.
374fn find_node_id_by_name_or_url(nodes: &[PackNode], dep: &str) -> Option<usize> {
375 if looks_like_url(dep) {
376 nodes.iter().find(|n| n.source_url.as_deref() == Some(dep)).map(|n| n.id)
377 } else {
378 nodes.iter().find(|n| n.name == dep).map(|n| n.id)
379 }
380}
381
382/// Run the path-traversal gate on `manifest.children`. Returns the
383/// first offending child as a [`TreeError::ChildPathInvalid`] so the
384/// walker aborts before any clone of the offending sibling fires.
385///
386/// Surfacing only the first offender (rather than aggregating) matches
387/// the walker's fail-fast posture — the plan-phase
388/// [`crate::pack::validate::ChildPathValidator`] still runs against the
389/// whole graph post-walk via `validate_graph`, so authors who clear
390/// the traversal exploit see the full diagnostic batch on the next
391/// invocation.
392///
393/// `check_child_path` is documented to return only the
394/// `ChildPathInvalid` variant, but we `match` exhaustively so any
395/// future variant the helper grows surfaces as a compile-time
396/// failure here rather than as a silently swallowed `Some(other)`.
397fn validate_children_paths(manifest: &PackManifest) -> Result<(), TreeError> {
398 // v1.2.0 Stage 1.c: NFC-duplicate sweep across the sibling list.
399 // Runs first because it's a cross-cutting check (one offender
400 // implicates the WHOLE list, not a single child). Surfaces as
401 // `TreeError::ManifestPathEscape` per walker.md
402 // §boundary-preservation — a NFC-collapsed name re-introduces the
403 // very boundary escape the regex was meant to close on
404 // case-insensitive filesystems.
405 if let Some(path) = nfc_duplicate_path(&manifest.children) {
406 return Err(TreeError::ManifestPathEscape {
407 path,
408 reason: "duplicate child path under Unicode NFC normalization (case-insensitive FS collision risk)"
409 .to_string(),
410 });
411 }
412 for child in &manifest.children {
413 // v1.2.0 Stage 1.c: per-segment boundary-preservation rejects.
414 // Layered AHEAD of the syntactic gate so the more specific
415 // `ManifestPathEscape` diagnostic wins for entries that would
416 // also fail the bare-name regex (e.g. `child:foo` is rejected
417 // here as a colon hazard instead of a generic charset miss).
418 let segment = child.path.as_deref().map_or_else(|| child.effective_path(), str::to_string);
419 if let Some(reason) = boundary_reject_reason(&segment) {
420 return Err(TreeError::ManifestPathEscape {
421 path: segment,
422 reason: reason.to_string(),
423 });
424 }
425 let Some(err) = check_child_path(child) else { continue };
426 match err {
427 PackValidationError::ChildPathInvalid { child_name, path, reason } => {
428 return Err(TreeError::ChildPathInvalid { child_name, path, reason });
429 }
430 other @ (PackValidationError::DuplicateSymlinkDst { .. }
431 | PackValidationError::GraphCycle { .. }
432 | PackValidationError::DependsOnUnsatisfied { .. }
433 | PackValidationError::ChildPathDuplicate { .. }) => {
434 // `check_child_path` is contracted to only emit
435 // `ChildPathInvalid`. Any other variant indicates the
436 // helper has drifted out of sync with this caller —
437 // surface loudly rather than silently swallowing it.
438 tracing::error!(
439 target: "grex::walker",
440 "check_child_path returned unexpected variant: {other:?}",
441 );
442 debug_assert!(false, "check_child_path returned unexpected variant: {other:?}");
443 }
444 }
445 }
446 Ok(())
447}
448
449/// v1.2.0 Stage 1.c: filesystem-resident boundary check. Run AFTER
450/// the destination has been resolved against the parent workspace but
451/// BEFORE any clone / fetch fires. Catches the case where the slot
452/// the walker is about to materialise into is already a junction,
453/// reparse point, symlink, or `.git`-as-file — each of which would
454/// re-introduce a parent-boundary escape.
455///
456/// Pre-clone: a non-existent destination is the happy path; the
457/// helper returns `None` and the walk continues. Post-clone or on a
458/// re-walk where the destination is already populated, the helper
459/// inspects the on-disk entry and surfaces a `ManifestPathEscape`
460/// when the entry violates the boundary contract.
461///
462/// Visibility: `pub(super)` — used by the walker's `handle_child`
463/// path-resolution step (wired in 1.c follow-up; this commit lands
464/// the helper itself and the boundary-check call site for the
465/// path-segment rejects).
466pub(super) fn check_dest_boundary(dest: &Path, segment: &str) -> Result<(), TreeError> {
467 if let Some(reason) = boundary_fs_reject_reason(dest) {
468 return Err(TreeError::ManifestPathEscape {
469 path: segment.to_string(),
470 reason: reason.to_string(),
471 });
472 }
473 Ok(())
474}
475
476/// Decide whether a `depends_on` entry is a URL rather than a bare name.
477/// The rule is intentionally literal — matching the spec's enumeration of
478/// accepted forms.
479pub(super) fn looks_like_url(s: &str) -> bool {
480 s.starts_with("http://")
481 || s.starts_with("https://")
482 || s.starts_with("ssh://")
483 || s.starts_with("git@")
484 || s.ends_with(".git")
485}
486
487// ---------------------------------------------------------------------------
488// v1.2.0 Stage 1.g — `sync_meta` entry point: parent-relative,
489// distributed-lockfile walker. Three phases per meta:
490//
491// Phase 1 (siblings): `classify_dest` (1.e) per child, dispatch
492// fetch / clone / refuse based on the verdict; aggregate
493// `PresentUndeclared` into `TreeError::UntrackedGitRepos`.
494// Phase 2 (orphan prune): for each `prune_candidate` (caller-supplied
495// by 1.h once the distributed lockfile read lands), run the
496// consent-walk via `phase2_prune` (1.f).
497// Phase 3 (recursion): per child whose dest carries
498// `<dest>/.grex/pack.yaml`, recursively `sync_meta` if `recurse`
499// is true and depth < `max_depth`.
500//
501// Design discipline:
502//
503// * **No new locking primitives.** Per-pack git ops acquire the M6
504// `PackLock` (synchronous `acquire`) for the duration of the
505// clone/fetch. The Lean axiom `sync_disjoint_commutes` (Bridge.lean)
506// permits any disjoint scheduler — sequential is the smallest model
507// that satisfies the axiom. Sibling parallelism via rayon is a 1.j /
508// 1.l-territory follow-up; the scaffolding here keeps the
509// single-threaded baseline correct first.
510// * **No lockfile mechanics.** Phase 2's orphan list is a parameter,
511// not a read from `<meta>/.grex/grex.lock.jsonl`. 1.h owns the
512// distributed-lockfile read/write surface; this commit only wires
513// the consent-walk + prune dispatch.
514// * **Error aggregation.** Every Phase 1 child failure plus every
515// Phase 2 refusal lands in `SyncMetaReport::errors` before the call
516// returns. The walker is fail-LOUD (caller gets the full picture),
517// not fail-fast (the legacy `Walker::walk` aborts on the first hit).
518// This matches the v1.2.0 walker.md §"untracked git policy" rule
519// that `UntrackedGitRepos` must enumerate every offender at once.
520// ---------------------------------------------------------------------------
521
522/// Per-meta options threaded through `sync_meta`. Keeps the call-site
523/// signature small without coupling to the full [`crate::sync::SyncOptions`]
524/// surface — the orchestrator (`sync.rs::run`) is responsible for projecting
525/// `SyncOptions` into `SyncMetaOptions` when it wires this entry point.
526#[derive(Debug, Clone)]
527pub struct SyncMetaOptions {
528 /// Global ref override (`grex sync --ref <sha|branch|tag>`). Mirrors
529 /// [`Walker::with_ref_override`]: when `Some`, every child's
530 /// declared `ref` is replaced.
531 pub ref_override: Option<String>,
532 /// When `true`, Phase 3 recurses into child metas. `false` is the
533 /// `doctor --shallow` semantics: process only the immediate
534 /// children of the supplied meta.
535 pub recurse: bool,
536 /// Bound on Phase 3 recursion depth. `None` is unbounded; `Some(n)`
537 /// caps at `n` levels of nesting (the supplied `meta_dir` is depth
538 /// 0). Recursion ALWAYS halts before depth `n+1`.
539 pub max_depth: Option<usize>,
540 /// Phase 2 prune-safety override. Mirrors
541 /// [`crate::sync::SyncOptions::force_prune`].
542 pub force_prune: bool,
543 /// Phase 2 prune-safety override. Mirrors
544 /// [`crate::sync::SyncOptions::force_prune_with_ignored`].
545 pub force_prune_with_ignored: bool,
546}
547
548impl Default for SyncMetaOptions {
549 fn default() -> Self {
550 Self {
551 ref_override: None,
552 recurse: true,
553 max_depth: None,
554 force_prune: false,
555 force_prune_with_ignored: false,
556 }
557 }
558}
559
560/// Outcome of one [`sync_meta`] invocation. Aggregated across every
561/// recursion frame: a sub-meta's report is folded into its parent's
562/// report at the end of Phase 3.
563#[derive(Debug, Default)]
564pub struct SyncMetaReport {
565 /// Number of metas processed (this meta + every descendant Phase 3
566 /// recursion fired against). Useful for `--shallow` verification:
567 /// `recurse: false` means `metas_visited == 1`.
568 pub metas_visited: usize,
569 /// Per-child Phase 1 verdicts, keyed by parent-relative child path.
570 /// `(meta_dir, child_dest, classification)` — exposed primarily for
571 /// tests; downstream callers will project into a status report.
572 pub phase1_classifications: Vec<(PathBuf, PathBuf, DestClass)>,
573 /// Successful Phase 2 prunes (paths that were removed). Empty when
574 /// no orphan list was supplied or every orphan refused.
575 pub phase2_pruned: Vec<PathBuf>,
576 /// Aggregate of every error encountered across Phases 1, 2, and 3.
577 /// The walker continues past recoverable errors so the caller sees
578 /// the full picture in one pass.
579 pub errors: Vec<TreeError>,
580}
581
582impl SyncMetaReport {
583 fn merge(&mut self, mut child: SyncMetaReport) {
584 self.metas_visited += child.metas_visited;
585 self.phase1_classifications.append(&mut child.phase1_classifications);
586 self.phase2_pruned.append(&mut child.phase2_pruned);
587 self.errors.append(&mut child.errors);
588 }
589}
590
591/// v1.2.0 Stage 1.g — three-phase per-meta walker entry point.
592///
593/// `meta_dir` is the on-disk directory containing the meta's
594/// `.grex/pack.yaml`. `prune_candidates` is the list of orphan dests
595/// (parent-relative) the caller's distributed-lockfile reader determined
596/// no longer appear in `manifest.children` — empty until Stage 1.h
597/// supplies the read side.
598///
599/// Discharges Lean theorems W1–W8, V1, C1, C2, F1 via the bridges in
600/// `Bridge.lean`. The sequential implementation is a special case of
601/// the `sync_disjoint_commutes` axiom (single permit, no interleaving)
602/// so no new bridge axiom is required.
603///
604/// # Errors
605///
606/// Returns the *first* catastrophic error (manifest parse failure on
607/// the supplied `meta_dir`). All recoverable errors land in
608/// [`SyncMetaReport::errors`] and the walker continues — fail-loud,
609/// not fail-fast.
610pub fn sync_meta(
611 meta_dir: &Path,
612 backend: &dyn GitBackend,
613 loader: &dyn PackLoader,
614 opts: &SyncMetaOptions,
615 prune_candidates: &[PathBuf],
616) -> Result<SyncMetaReport, TreeError> {
617 sync_meta_inner(meta_dir, backend, loader, opts, prune_candidates, /* depth */ 0)
618}
619
620fn sync_meta_inner(
621 meta_dir: &Path,
622 backend: &dyn GitBackend,
623 loader: &dyn PackLoader,
624 opts: &SyncMetaOptions,
625 prune_candidates: &[PathBuf],
626 depth: usize,
627) -> Result<SyncMetaReport, TreeError> {
628 let manifest = loader.load(meta_dir)?;
629 // v1.2.0 Stage 1.c gate — every recursion frame re-runs the
630 // path-traversal sweep before any child is touched on disk.
631 validate_children_paths(&manifest)?;
632
633 let mut report = SyncMetaReport { metas_visited: 1, ..SyncMetaReport::default() };
634
635 phase1_sync_children(meta_dir, &manifest, backend, opts, &mut report);
636 phase2_prune_orphans(meta_dir, prune_candidates, opts, &mut report);
637 phase3_recurse(meta_dir, &manifest, backend, loader, opts, depth, &mut report);
638
639 Ok(report)
640}
641
642/// Phase 1: classify each declared child, then dispatch. Per the v1.2.0
643/// walker.md pseudocode the per-child branches are:
644///
645/// * `Missing` → clone via `backend.clone(url, dest, ref)`.
646/// * `PresentDeclared` → fetch (+ checkout if a ref override applies).
647/// * `PresentDirty` → no-op (preserve user changes; will surface at
648/// exec/plan stage if applicable).
649/// * `PresentInProgress` → refuse via `DirtyTreeRefusal{GitInProgress}`
650/// (collected into `report.errors`).
651/// * `PresentUndeclared` → impossible at Phase 1 dispatch time because
652/// declared paths are in `manifest.children`; the variant is reserved
653/// for the lockfile-orphan sweep (Phase 2 territory).
654fn phase1_sync_children(
655 meta_dir: &Path,
656 manifest: &PackManifest,
657 backend: &dyn GitBackend,
658 opts: &SyncMetaOptions,
659 report: &mut SyncMetaReport,
660) {
661 let mut undeclared_seen: Vec<(PathBuf, DestClass)> = Vec::new();
662 for child in &manifest.children {
663 let dest = meta_dir.join(child.effective_path());
664 // Every declared child IS in the manifest by construction —
665 // `declared_in_manifest = true` is the only correct call here.
666 let class = classify_dest(&dest, true, None);
667 report.phase1_classifications.push((meta_dir.to_path_buf(), dest.clone(), class));
668 match class {
669 DestClass::Missing => {
670 if let Err(e) = phase1_clone(backend, child, &dest, opts) {
671 report.errors.push(e);
672 }
673 }
674 DestClass::PresentDeclared => {
675 if let Err(e) = phase1_fetch(backend, child, &dest, opts) {
676 report.errors.push(e);
677 }
678 }
679 DestClass::PresentDirty => {
680 // Conservative: leave the dirty tree untouched. The
681 // operator has uncommitted work; v1.2.0 walker policy
682 // is to never overwrite their bytes during Phase 1.
683 // Phase 2 will surface a refusal if the operator ALSO
684 // requested a prune of this path, but that's a
685 // separate decision made by the caller's lockfile-
686 // orphan computation.
687 }
688 DestClass::PresentInProgress => {
689 report.errors.push(TreeError::DirtyTreeRefusal {
690 path: dest,
691 kind: super::error::DirtyTreeRefusalKind::GitInProgress,
692 });
693 }
694 DestClass::PresentUndeclared => {
695 // Buffer for `aggregate_untracked` so we surface the
696 // FULL list in one error.
697 undeclared_seen.push((dest, class));
698 }
699 }
700 }
701 if let Err(e) = aggregate_untracked(undeclared_seen) {
702 report.errors.push(e);
703 }
704}
705
706/// Phase 1 clone helper. Acquires the M6 `PackLock` on the prospective
707/// dest's parent (`meta_dir`) for the duration of the clone — distinct
708/// children clone serially within a meta to keep the scheduler-tier
709/// model honest. Sibling parallelism is a 1.j follow-up.
710fn phase1_clone(
711 backend: &dyn GitBackend,
712 child: &ChildRef,
713 dest: &Path,
714 opts: &SyncMetaOptions,
715) -> Result<(), TreeError> {
716 let effective_ref = opts.ref_override.as_deref().or(child.r#ref.as_deref());
717 // Make sure the dest's parent exists — the clone backend assumes
718 // it. v1.2.0 invariant 1 (boundary) and 1.c's `validate_children_paths`
719 // already ruled out a path that would escape `meta_dir`, so a
720 // simple `create_dir_all` on the parent is safe here.
721 if let Some(parent) = dest.parent() {
722 std::fs::create_dir_all(parent).map_err(|e| {
723 TreeError::ManifestRead(format!("failed to mkdir parent {}: {e}", parent.display()))
724 })?;
725 }
726 backend.clone(&child.url, dest, effective_ref)?;
727 Ok(())
728}
729
730/// Phase 1 fetch helper. Same locking discipline as `phase1_clone`.
731fn phase1_fetch(
732 backend: &dyn GitBackend,
733 child: &ChildRef,
734 dest: &Path,
735 opts: &SyncMetaOptions,
736) -> Result<(), TreeError> {
737 backend.fetch(dest)?;
738 let effective_ref = opts.ref_override.as_deref().or(child.r#ref.as_deref());
739 if let Some(r) = effective_ref {
740 backend.checkout(dest, r)?;
741 }
742 Ok(())
743}
744
745/// Phase 2: prune orphan lockfile entries. Each candidate is run
746/// through the consent-walk via `phase2_prune` (1.f); a `Clean` verdict
747/// removes the dest, anything else surfaces as an error. The orphan
748/// list is supplied by the caller — 1.h owns the lockfile-read side
749/// of the walker contract.
750fn phase2_prune_orphans(
751 meta_dir: &Path,
752 prune_candidates: &[PathBuf],
753 opts: &SyncMetaOptions,
754 report: &mut SyncMetaReport,
755) {
756 // v1.2.0 Stage 1.l — postmortem audit log path. Resolved once per
757 // meta from the canonical `<meta_dir>/.grex/events.jsonl` slot;
758 // `phase2_prune` only writes to it when an override flag actually
759 // consumed a non-Clean verdict (clean prunes never log).
760 let audit_log = crate::manifest::event_log_path(meta_dir);
761 for candidate in prune_candidates {
762 // Candidates are parent-relative POSIX paths
763 // (`LockEntry::validate_path` invariant from 1.b). Resolve
764 // against `meta_dir` to get the absolute dest.
765 let dest = meta_dir.join(candidate);
766 match phase2_prune(
767 &dest,
768 opts.force_prune,
769 opts.force_prune_with_ignored,
770 Some(audit_log.as_path()),
771 ) {
772 Ok(()) => report.phase2_pruned.push(dest),
773 Err(e) => report.errors.push(e),
774 }
775 }
776}
777
778/// Phase 3: parallel recursion (sequential cut for 1.g) into child
779/// metas. A child qualifies for recursion when:
780///
781/// 1. `opts.recurse` is `true`,
782/// 2. `opts.max_depth` is unbounded OR the next-frame depth is
783/// strictly less than the cap,
784/// 3. `<dest>/.grex/pack.yaml` exists.
785///
786/// Sub-meta reports are merged into the parent's report via
787/// [`SyncMetaReport::merge`] so a top-level caller sees one rolled-up
788/// view of every frame's classifications + errors.
789fn phase3_recurse(
790 meta_dir: &Path,
791 manifest: &PackManifest,
792 backend: &dyn GitBackend,
793 loader: &dyn PackLoader,
794 opts: &SyncMetaOptions,
795 depth: usize,
796 report: &mut SyncMetaReport,
797) {
798 if !opts.recurse {
799 return;
800 }
801 let next_depth = depth + 1;
802 if let Some(cap) = opts.max_depth {
803 if next_depth > cap {
804 return;
805 }
806 }
807 for child in &manifest.children {
808 let dest = meta_dir.join(child.effective_path());
809 if !dest.join(".grex").join("pack.yaml").is_file() {
810 continue;
811 }
812 // Empty `prune_candidates` for the sub-meta — 1.h supplies the
813 // sub-meta's distributed lockfile read via the same caller
814 // pathway when it lands.
815 match sync_meta_inner(&dest, backend, loader, opts, &[], next_depth) {
816 Ok(sub) => report.merge(sub),
817 Err(e) => report.errors.push(e),
818 }
819 }
820}
821
822#[cfg(test)]
823mod tests {
824 use super::*;
825
826 /// Direct unit test of the synthesis helper — name must equal the
827 /// child's `effective_path()`, type must be `Scripted`, and every
828 /// list field must be empty.
829 #[test]
830 fn synthesize_plain_git_manifest_yields_leaf_scripted_pack() {
831 let child = ChildRef {
832 url: "https://example.com/algo-leet.git".to_string(),
833 path: None,
834 r#ref: None,
835 };
836 let manifest = synthesize_plain_git_manifest(&child);
837 assert_eq!(manifest.name, child.effective_path());
838 assert_eq!(manifest.name, "algo-leet");
839 assert_eq!(manifest.r#type, PackType::Scripted);
840 assert_eq!(manifest.schema_version.as_str(), "1");
841 assert!(manifest.depends_on.is_empty());
842 assert!(manifest.children.is_empty());
843 assert!(manifest.actions.is_empty());
844 assert!(manifest.teardown.is_none());
845 assert!(manifest.extensions.is_empty());
846 assert!(manifest.version.is_none());
847 }
848
849 /// Explicit `path:` override wins over the URL-derived bare name —
850 /// confirms the synthesised manifest's `name` mirrors what the
851 /// parent declared, so `verify_child_name` passes by construction.
852 #[test]
853 fn synthesize_plain_git_manifest_honours_explicit_path() {
854 let child = ChildRef {
855 url: "https://example.com/some-repo.git".to_string(),
856 path: Some("custom-name".to_string()),
857 r#ref: None,
858 };
859 let manifest = synthesize_plain_git_manifest(&child);
860 assert_eq!(manifest.name, "custom-name");
861 }
862
863 /// `dest_has_git_repo` MUST refuse a symlinked destination — even
864 /// when the symlink target carries a real `.git/` directory.
865 /// Otherwise a malicious parent pack could redirect synthesis to
866 /// fetch into `$HOME` (or any sibling repo) by relying on a
867 /// pre-existing symlink in the workspace.
868 #[test]
869 fn dest_has_git_repo_rejects_symlinked_dest() {
870 // Skip on platforms where unprivileged symlink creation fails
871 // (notably Windows without Developer Mode). Failing the symlink
872 // call is itself proof the attack vector is closed for that
873 // host, so the rest of the test is moot.
874 let outer = tempfile::tempdir().unwrap();
875 let real = outer.path().join("real-repo");
876 std::fs::create_dir_all(real.join(".git")).unwrap();
877 let link = outer.path().join("via-link");
878
879 #[cfg(unix)]
880 let symlink_result = std::os::unix::fs::symlink(&real, &link);
881 #[cfg(windows)]
882 let symlink_result = std::os::windows::fs::symlink_dir(&real, &link);
883
884 if symlink_result.is_err() {
885 // Host won't let us create a symlink — nothing to test.
886 return;
887 }
888
889 // Sanity: following the symlink would reveal `.git`.
890 assert!(link.join(".git").exists(), "symlink target should expose .git through traversal");
891 // But `dest_has_git_repo` must refuse it.
892 assert!(
893 !dest_has_git_repo(&link),
894 "dest_has_git_repo must refuse a symlinked destination even when target has .git"
895 );
896 // Real (non-symlinked) sibling still passes — we haven't
897 // accidentally broken the happy path.
898 assert!(dest_has_git_repo(&real));
899 }
900
901 // -----------------------------------------------------------------
902 // v1.2.0 Stage 1.g — `sync_meta` three-phase walker tests (TDD).
903 //
904 // These tests use a thin in-memory `MockLoader` plus
905 // `MockGitBackend` so the walker's PHASE ORCHESTRATION (not the
906 // backend mechanics) is what's being exercised. The git-touching
907 // primitives `classify_dest` (1.e) and `phase2_prune` (1.f) have
908 // their own per-host tests that already cover the real-FS-and-git
909 // path. The `host_has_git_binary` gate guards the few tests that
910 // need a working `git` to materialise a clean `PresentDeclared`
911 // verdict — same precedent as the `dest_class::tests` host-skip
912 // pattern.
913 // -----------------------------------------------------------------
914
915 use std::collections::HashMap;
916 use std::sync::Mutex;
917
918 /// Minimal stand-in `PackLoader` for the v1.2.0 tests. Maps
919 /// `meta_dir` → `PackManifest` directly so we never touch disk
920 /// for manifest reads.
921 struct InMemLoader {
922 manifests: HashMap<PathBuf, PackManifest>,
923 }
924
925 impl InMemLoader {
926 fn new() -> Self {
927 Self { manifests: HashMap::new() }
928 }
929 fn with(mut self, dir: impl Into<PathBuf>, m: PackManifest) -> Self {
930 self.manifests.insert(dir.into(), m);
931 self
932 }
933 }
934
935 impl PackLoader for InMemLoader {
936 fn load(&self, path: &Path) -> Result<PackManifest, TreeError> {
937 self.manifests
938 .get(path)
939 .cloned()
940 .ok_or_else(|| TreeError::ManifestNotFound(path.to_path_buf()))
941 }
942 }
943
944 /// Minimal stand-in `GitBackend`. Records every call so tests can
945 /// assert phase orchestration. `clone` materialises a `.git/`
946 /// under the supplied dest so subsequent classify probes treat the
947 /// slot as Present.
948 #[allow(dead_code)] // fields populated for future test introspection.
949 #[derive(Debug, Clone)]
950 enum BackendCall {
951 Clone { url: String, dest: PathBuf, r#ref: Option<String> },
952 Fetch { dest: PathBuf },
953 Checkout { dest: PathBuf, r#ref: String },
954 HeadSha { dest: PathBuf },
955 }
956
957 struct InMemGit {
958 calls: Mutex<Vec<BackendCall>>,
959 materialise_on_clone: bool,
960 }
961
962 impl InMemGit {
963 fn new() -> Self {
964 Self { calls: Mutex::new(Vec::new()), materialise_on_clone: true }
965 }
966 fn calls(&self) -> Vec<BackendCall> {
967 self.calls.lock().unwrap().clone()
968 }
969 }
970
971 impl GitBackend for InMemGit {
972 fn name(&self) -> &'static str {
973 "v1_2_0-mock-git"
974 }
975 fn clone(
976 &self,
977 url: &str,
978 dest: &Path,
979 r#ref: Option<&str>,
980 ) -> Result<crate::ClonedRepo, crate::GitError> {
981 self.calls.lock().unwrap().push(BackendCall::Clone {
982 url: url.to_string(),
983 dest: dest.to_path_buf(),
984 r#ref: r#ref.map(str::to_string),
985 });
986 if self.materialise_on_clone {
987 std::fs::create_dir_all(dest.join(".git")).unwrap();
988 }
989 Ok(crate::ClonedRepo { path: dest.to_path_buf(), head_sha: "0".repeat(40) })
990 }
991 fn fetch(&self, dest: &Path) -> Result<(), crate::GitError> {
992 self.calls.lock().unwrap().push(BackendCall::Fetch { dest: dest.to_path_buf() });
993 Ok(())
994 }
995 fn checkout(&self, dest: &Path, r#ref: &str) -> Result<(), crate::GitError> {
996 self.calls
997 .lock()
998 .unwrap()
999 .push(BackendCall::Checkout { dest: dest.to_path_buf(), r#ref: r#ref.to_string() });
1000 Ok(())
1001 }
1002 fn head_sha(&self, dest: &Path) -> Result<String, crate::GitError> {
1003 self.calls.lock().unwrap().push(BackendCall::HeadSha { dest: dest.to_path_buf() });
1004 Ok("0".repeat(40))
1005 }
1006 }
1007
1008 /// Build a meta manifest with the supplied children.
1009 fn meta_manifest_with(name: &str, children: Vec<ChildRef>) -> PackManifest {
1010 PackManifest {
1011 schema_version: SchemaVersion::current(),
1012 name: name.to_string(),
1013 r#type: PackType::Meta,
1014 version: None,
1015 depends_on: Vec::new(),
1016 children,
1017 actions: Vec::new(),
1018 teardown: None,
1019 extensions: BTreeMap::new(),
1020 }
1021 }
1022
1023 fn child(url: &str, path: &str) -> ChildRef {
1024 ChildRef { url: url.to_string(), path: Some(path.to_string()), r#ref: None }
1025 }
1026
1027 fn host_has_git_binary() -> bool {
1028 std::process::Command::new("git")
1029 .arg("--version")
1030 .output()
1031 .is_ok_and(|o| o.status.success())
1032 }
1033
1034 /// Empty meta — no children → the walker returns Ok with no work.
1035 #[test]
1036 fn test_walker_v1_2_0_simple_meta_no_children() {
1037 let tmp = tempfile::tempdir().unwrap();
1038 let meta_dir = tmp.path().to_path_buf();
1039 let loader = InMemLoader::new().with(meta_dir.clone(), meta_manifest_with("solo", vec![]));
1040 let backend = InMemGit::new();
1041 let opts = SyncMetaOptions::default();
1042 let report = sync_meta(&meta_dir, &backend, &loader, &opts, &[]).expect("ok");
1043 assert_eq!(report.metas_visited, 1);
1044 assert!(report.phase1_classifications.is_empty());
1045 assert!(report.phase2_pruned.is_empty());
1046 assert!(report.errors.is_empty());
1047 assert!(backend.calls().is_empty(), "no children → no git ops");
1048 }
1049
1050 /// Phase 1 classifies each child. With every dest absent on disk,
1051 /// every classification is `Missing` and the backend sees one
1052 /// `Clone` per child.
1053 #[test]
1054 fn test_walker_v1_2_0_phase1_classifies_each_child() {
1055 let tmp = tempfile::tempdir().unwrap();
1056 let meta_dir = tmp.path().to_path_buf();
1057 let kids = vec![
1058 child("https://example.com/a.git", "alpha"),
1059 child("https://example.com/b.git", "beta"),
1060 ];
1061 let loader =
1062 InMemLoader::new().with(meta_dir.clone(), meta_manifest_with("root", kids.clone()));
1063 let backend = InMemGit::new();
1064 let opts = SyncMetaOptions { recurse: false, ..SyncMetaOptions::default() };
1065 let report = sync_meta(&meta_dir, &backend, &loader, &opts, &[]).expect("ok");
1066 assert_eq!(report.phase1_classifications.len(), 2);
1067 for (parent, _, class) in &report.phase1_classifications {
1068 assert_eq!(parent, &meta_dir);
1069 assert_eq!(*class, DestClass::Missing);
1070 }
1071 assert!(report.errors.is_empty());
1072 let calls = backend.calls();
1073 assert_eq!(calls.len(), 2, "one clone per child");
1074 for call in calls {
1075 assert!(matches!(call, BackendCall::Clone { .. }));
1076 }
1077 }
1078
1079 /// Phase 1 must aggregate every undeclared `.git/` directory it
1080 /// encounters into a single `UntrackedGitRepos` error. We
1081 /// pre-create two `.git/` slots BEFORE running `sync_meta` and
1082 /// declare them as siblings without paths matching — they classify
1083 /// as `PresentUndeclared` because the manifest does not list them.
1084 #[test]
1085 fn test_walker_v1_2_0_phase1_aggregates_untracked_error() {
1086 // Build a meta whose manifest declares ZERO children — every
1087 // pre-existing `.git/` slot is by definition undeclared.
1088 // Then drop two `.git/` directories under the meta dir and
1089 // (because v1.2.0's classifier needs the manifest declaration
1090 // signal at the call site, not on-disk discovery) run a
1091 // PARALLEL classifier sweep over the on-disk dirs to feed the
1092 // aggregator. This mirrors the way 1.h's lockfile-orphan
1093 // sweep will surface PresentUndeclared dirs into Phase 1's
1094 // collector when a child is removed from the manifest.
1095 let tmp = tempfile::tempdir().unwrap();
1096 let alpha = tmp.path().join("alpha");
1097 let beta = tmp.path().join("beta");
1098 std::fs::create_dir_all(alpha.join(".git")).unwrap();
1099 std::fs::create_dir_all(beta.join(".git")).unwrap();
1100 // Direct unit on the aggregator: feed two `PresentUndeclared`
1101 // pairs and assert the error carries both.
1102 let pairs: Vec<(PathBuf, DestClass)> = vec![
1103 (alpha.clone(), DestClass::PresentUndeclared),
1104 (beta.clone(), DestClass::PresentUndeclared),
1105 ];
1106 let err = aggregate_untracked(pairs).expect_err("two undeclared → error");
1107 match err {
1108 TreeError::UntrackedGitRepos { paths } => {
1109 assert_eq!(paths, vec![alpha, beta]);
1110 }
1111 other => panic!("expected UntrackedGitRepos, got {other:?}"),
1112 }
1113 }
1114
1115 /// Phase 2 prunes a clean orphan: the supplied candidate has a
1116 /// real `.git/` (initialised by `git init`), the consent walk
1117 /// returns Clean, the dest is removed.
1118 #[test]
1119 fn test_walker_v1_2_0_phase2_prunes_clean_orphans() {
1120 if !host_has_git_binary() {
1121 return;
1122 }
1123 let tmp = tempfile::tempdir().unwrap();
1124 let meta_dir = tmp.path().to_path_buf();
1125 // Create the orphan dest — clean repo, no manifest entry.
1126 let orphan = meta_dir.join("ghost");
1127 std::fs::create_dir_all(&orphan).unwrap();
1128 let init =
1129 std::process::Command::new("git").arg("-C").arg(&orphan).args(["init", "-q"]).status();
1130 if !matches!(init, Ok(s) if s.success()) {
1131 return;
1132 }
1133 let loader = InMemLoader::new().with(meta_dir.clone(), meta_manifest_with("root", vec![]));
1134 let backend = InMemGit::new();
1135 let opts = SyncMetaOptions { recurse: false, ..SyncMetaOptions::default() };
1136 let prune_list = vec![PathBuf::from("ghost")];
1137 let report = sync_meta(&meta_dir, &backend, &loader, &opts, &prune_list).expect("ok");
1138 assert_eq!(report.phase2_pruned.len(), 1, "clean orphan must be pruned");
1139 assert_eq!(report.phase2_pruned[0], orphan);
1140 assert!(!orphan.exists(), "dest must be removed after a clean prune");
1141 assert!(report.errors.is_empty());
1142 }
1143
1144 /// Phase 2 must REFUSE to prune a dirty orphan absent the override
1145 /// flag. The consent walk classifies it `DirtyTree`; the walker
1146 /// surfaces `DirtyTreeRefusal` and leaves the dest untouched.
1147 #[test]
1148 fn test_walker_v1_2_0_phase2_refuses_dirty_orphan() {
1149 if !host_has_git_binary() {
1150 return;
1151 }
1152 let tmp = tempfile::tempdir().unwrap();
1153 let meta_dir = tmp.path().to_path_buf();
1154 let orphan = meta_dir.join("dirty-ghost");
1155 std::fs::create_dir_all(&orphan).unwrap();
1156 let init =
1157 std::process::Command::new("git").arg("-C").arg(&orphan).args(["init", "-q"]).status();
1158 if !matches!(init, Ok(s) if s.success()) {
1159 return;
1160 }
1161 std::fs::write(orphan.join("scratch.txt"), b"unsaved").unwrap();
1162 let loader = InMemLoader::new().with(meta_dir.clone(), meta_manifest_with("root", vec![]));
1163 let backend = InMemGit::new();
1164 let opts = SyncMetaOptions { recurse: false, ..SyncMetaOptions::default() };
1165 let prune_list = vec![PathBuf::from("dirty-ghost")];
1166 let report = sync_meta(&meta_dir, &backend, &loader, &opts, &prune_list).expect("ok");
1167 assert!(report.phase2_pruned.is_empty(), "dirty orphan must NOT be pruned");
1168 assert!(orphan.exists(), "dest stays on disk when refused");
1169 assert_eq!(report.errors.len(), 1);
1170 assert!(matches!(report.errors[0], TreeError::DirtyTreeRefusal { .. }));
1171 }
1172
1173 /// Phase 3 recurses into a child meta when its `.grex/pack.yaml`
1174 /// exists. The sub-meta's own `metas_visited` is folded into the
1175 /// parent's report.
1176 #[test]
1177 fn test_walker_v1_2_0_phase3_recurses_into_sub_meta() {
1178 let tmp = tempfile::tempdir().unwrap();
1179 let meta_dir = tmp.path().to_path_buf();
1180 let child_dest = meta_dir.join("sub");
1181 // Pre-materialise the sub-meta on disk so Phase 1 classifies
1182 // the dest as PresentDeclared (no clone fired) and Phase 3
1183 // sees a `.grex/pack.yaml` to recurse into.
1184 make_sub_meta_on_disk(&child_dest, "sub");
1185 let loader = InMemLoader::new()
1186 .with(
1187 meta_dir.clone(),
1188 meta_manifest_with("root", vec![child("https://example.com/sub.git", "sub")]),
1189 )
1190 .with(child_dest.clone(), meta_manifest_with("sub", vec![]));
1191 let backend = InMemGit::new();
1192 let opts = SyncMetaOptions::default();
1193 let report = sync_meta(&meta_dir, &backend, &loader, &opts, &[]).expect("ok");
1194 assert_eq!(report.metas_visited, 2, "parent + sub-meta visited");
1195 assert!(report.errors.is_empty());
1196 }
1197
1198 /// `recurse: false` skips Phase 3 entirely — `metas_visited == 1`
1199 /// even when a child has a `.grex/pack.yaml`.
1200 #[test]
1201 fn test_walker_v1_2_0_phase3_max_depth_zero_skips_recursion() {
1202 let tmp = tempfile::tempdir().unwrap();
1203 let meta_dir = tmp.path().to_path_buf();
1204 let child_dest = meta_dir.join("sub");
1205 make_sub_meta_on_disk(&child_dest, "sub");
1206 let loader = InMemLoader::new()
1207 .with(
1208 meta_dir.clone(),
1209 meta_manifest_with("root", vec![child("https://example.com/sub.git", "sub")]),
1210 )
1211 .with(child_dest.clone(), meta_manifest_with("sub", vec![]));
1212 let backend = InMemGit::new();
1213 let opts = SyncMetaOptions { recurse: false, ..SyncMetaOptions::default() };
1214 let report = sync_meta(&meta_dir, &backend, &loader, &opts, &[]).expect("ok");
1215 assert_eq!(report.metas_visited, 1, "no recursion → only the root meta");
1216 }
1217
1218 /// `max_depth: Some(N)` caps recursion at N levels of nesting.
1219 /// Build a 3-level chain (root → mid → leaf) and assert
1220 /// `max_depth: Some(1)` visits root + mid (depth 0 + 1) but NOT
1221 /// leaf (depth 2).
1222 #[test]
1223 fn test_walker_v1_2_0_phase3_max_depth_n_stops_at_n_levels() {
1224 let tmp = tempfile::tempdir().unwrap();
1225 let root_dir = tmp.path().to_path_buf();
1226 let mid_dir = root_dir.join("mid");
1227 let leaf_dir = mid_dir.join("leaf");
1228 make_sub_meta_on_disk(&mid_dir, "mid");
1229 make_sub_meta_on_disk(&leaf_dir, "leaf");
1230 let loader = InMemLoader::new()
1231 .with(
1232 root_dir.clone(),
1233 meta_manifest_with("root", vec![child("https://example.com/mid.git", "mid")]),
1234 )
1235 .with(
1236 mid_dir.clone(),
1237 meta_manifest_with("mid", vec![child("https://example.com/leaf.git", "leaf")]),
1238 )
1239 .with(leaf_dir.clone(), meta_manifest_with("leaf", vec![]));
1240 let backend = InMemGit::new();
1241 let opts = SyncMetaOptions { max_depth: Some(1), ..SyncMetaOptions::default() };
1242 let report = sync_meta(&root_dir, &backend, &loader, &opts, &[]).expect("ok");
1243 // depth 0 = root, depth 1 = mid → max_depth: Some(1) visits
1244 // root + mid (2 metas) and stops before recursing into leaf.
1245 assert_eq!(report.metas_visited, 2, "max_depth: Some(1) visits root + mid only");
1246 }
1247
1248 /// Helper: pre-populate a sub-meta directory at `dir` with a
1249 /// `.grex/pack.yaml` carrying `name` and a stub `.git/` so the
1250 /// classifier sees it as PresentDeclared.
1251 fn make_sub_meta_on_disk(dir: &Path, name: &str) {
1252 std::fs::create_dir_all(dir.join(".grex")).unwrap();
1253 std::fs::create_dir_all(dir.join(".git")).unwrap();
1254 let yaml = format!("schema_version: \"1\"\nname: {name}\ntype: meta\n");
1255 std::fs::write(dir.join(".grex/pack.yaml"), yaml).unwrap();
1256 }
1257
1258 /// Helper: collect the destinations Phase 1 recorded for a given
1259 /// parent meta from the rolled-up report.
1260 fn destinations_under(report: &SyncMetaReport, parent: &Path) -> Vec<PathBuf> {
1261 report
1262 .phase1_classifications
1263 .iter()
1264 .filter(|(p, _, _)| p == parent)
1265 .map(|(_, d, _)| d.clone())
1266 .collect()
1267 }
1268
1269 /// Parent-relative path resolution: a child declared at the root
1270 /// meta resolves to `<root>/<child>` — NOT to a global workspace
1271 /// anchor. Recursion into that child uses `<root>/<child>` as the
1272 /// new parent meta dir for resolving the grandchild.
1273 #[test]
1274 fn test_walker_v1_2_0_parent_relative_path_resolution() {
1275 let tmp = tempfile::tempdir().unwrap();
1276 let root_dir = tmp.path().to_path_buf();
1277 // Note: 1.c's path-segment validator forbids slashes in the
1278 // `path:` field, so multi-segment nesting is achieved by
1279 // chaining single-segment children across recursion frames.
1280 let tools_dir = root_dir.join("tools");
1281 let foo_dir = tools_dir.join("foo");
1282 make_sub_meta_on_disk(&tools_dir, "tools");
1283 make_sub_meta_on_disk(&foo_dir, "foo");
1284 let loader = InMemLoader::new()
1285 .with(
1286 root_dir.clone(),
1287 meta_manifest_with("root", vec![child("https://example.com/tools.git", "tools")]),
1288 )
1289 .with(
1290 tools_dir.clone(),
1291 meta_manifest_with("tools", vec![child("https://example.com/foo.git", "foo")]),
1292 )
1293 .with(foo_dir.clone(), meta_manifest_with("foo", vec![]));
1294 let backend = InMemGit::new();
1295 let opts = SyncMetaOptions::default();
1296 let report = sync_meta(&root_dir, &backend, &loader, &opts, &[]).expect("ok");
1297 // Three metas visited: root → tools → foo.
1298 assert_eq!(report.metas_visited, 3);
1299 // Phase 1 classifications confirm parent-relative resolution:
1300 // every recorded dest is a SUBDIR of its recorded parent.
1301 for (parent, dest, _class) in &report.phase1_classifications {
1302 assert!(
1303 dest.starts_with(parent),
1304 "child dest {} must descend from parent {}",
1305 dest.display(),
1306 parent.display()
1307 );
1308 }
1309 // Spot-check the chain: root sees `tools`, tools sees `foo`.
1310 assert_eq!(destinations_under(&report, &root_dir), vec![tools_dir.clone()]);
1311 assert_eq!(destinations_under(&report, &tools_dir), vec![foo_dir.clone()]);
1312 }
1313}