Skip to main content

grex_core/tree/
walker.rs

1//! Recursive pack-tree walker.
2//!
3//! The walker hydrates a `pack.yaml` tree: it loads the root manifest, clones
4//! (or fetches + checks out) every `children:` entry via the injected
5//! [`GitBackend`], and recurses. `depends_on` entries are recorded as edges
6//! but never walked — they are *external prereqs* verified by
7//! [`crate::pack::validate::DependsOnValidator`] after the graph is built.
8//!
9//! # Cycle detection
10//!
11//! Cycles are detected **during** the walk, not post-hoc. Each recursion
12//! maintains a walk stack of pack identifiers (source-url when present,
13//! otherwise the canonical on-disk path). If a child is about to be entered
14//! whose identifier is already on the stack, the walker short-circuits with
15//! [`TreeError::CycleDetected`]. A separate `CycleValidator` runs
16//! post-hoc as a belt-and-suspenders check so manually-constructed graphs
17//! cannot sneak through.
18//!
19//! # Cyclomatic discipline
20//!
21//! The walk is decomposed so each helper stays well under CC 15:
22//! `walk` → `walk_recursive` → `process_children` → `handle_child` →
23//! `resolve_destination` | `record_depends_on`.
24
25use std::collections::BTreeMap;
26use std::path::{Path, PathBuf};
27
28use crate::git::GitBackend;
29use crate::pack::validate::child_path::{
30    boundary_fs_reject_reason, boundary_reject_reason, check_one as check_child_path,
31    nfc_duplicate_path,
32};
33use crate::pack::{ChildRef, PackManifest, PackType, PackValidationError, SchemaVersion};
34
35use super::consent::phase2_prune;
36use super::dest_class::{aggregate_untracked, classify_dest, DestClass};
37use super::error::TreeError;
38use super::graph::{EdgeKind, PackEdge, PackGraph, PackNode};
39use super::loader::PackLoader;
40
41/// Recursive walker. Composes a [`PackLoader`] (for manifests) with a
42/// [`GitBackend`] (for child hydration).
43///
44/// The walker owns no state across calls: each invocation of [`Walker::walk`]
45/// produces a fresh [`PackGraph`] and leaves no footprint.
46pub struct Walker<'a> {
47    loader: &'a dyn PackLoader,
48    backend: &'a dyn GitBackend,
49    workspace: PathBuf,
50    /// Optional global ref override (M4-D `grex sync --ref <sha|branch|tag>`).
51    /// When `Some`, every child clone/checkout uses this ref instead of the
52    /// declared `child.ref` from the parent manifest. `None` preserves M3
53    /// semantics.
54    ref_override: Option<String>,
55}
56
57impl<'a> Walker<'a> {
58    /// Construct a new walker.
59    ///
60    /// `workspace` is the directory under which child packs will be cloned,
61    /// using each [`ChildRef::effective_path`] as the sub-directory name.
62    #[must_use]
63    pub fn new(
64        loader: &'a dyn PackLoader,
65        backend: &'a dyn GitBackend,
66        workspace: PathBuf,
67    ) -> Self {
68        Self { loader, backend, workspace, ref_override: None }
69    }
70
71    /// Set a global ref override applied to every child pack.
72    ///
73    /// Surfaced as `grex sync --ref <sha|branch|tag>` (M4-D). The override
74    /// replaces each child's declared `ref` in its parent manifest. An
75    /// empty string is treated as "no override" — callers should reject
76    /// empty values at the CLI layer before reaching this point.
77    #[must_use]
78    pub fn with_ref_override(mut self, r#ref: Option<String>) -> Self {
79        self.ref_override = r#ref.filter(|s| !s.is_empty());
80        self
81    }
82
83    /// Walk the tree rooted at `root_pack_path`, returning the fully
84    /// hydrated graph.
85    ///
86    /// # Errors
87    ///
88    /// Returns [`TreeError`] on any loader, git, cycle, or name-mismatch
89    /// failure. The walk aborts on the first failure — the spec-level
90    /// "fail loud, fail fast" default.
91    pub fn walk(&self, root_pack_path: &Path) -> Result<PackGraph, TreeError> {
92        let mut state = BuildState::default();
93        let root_manifest = self.loader.load(root_pack_path)?;
94        // Pre-walk path-traversal gate: reject any malicious
95        // `children[].path` (or URL-derived tail) BEFORE any clone fires.
96        // Closes the v1.1.0 flat-sibling exploit window where a `path:
97        // ../escape` would materialise a child outside the pack root
98        // before plan-phase validation could see it.
99        validate_children_paths(&root_manifest)?;
100        let root_commit_sha = probe_head_sha(self.backend, root_pack_path);
101        let root_id = state.push_node(PackNode {
102            id: 0,
103            name: root_manifest.name.clone(),
104            path: root_pack_path.to_path_buf(),
105            source_url: None,
106            manifest: root_manifest.clone(),
107            parent: None,
108            commit_sha: root_commit_sha,
109            synthetic: false,
110        });
111        let root_identity = pack_identity_for_root(root_pack_path);
112        self.walk_recursive(root_id, &root_manifest, &mut state, &mut vec![root_identity])?;
113        Ok(PackGraph::new(state.nodes, state.edges))
114    }
115
116    /// Recursive step. `stack` carries the pack identifiers currently on
117    /// the walk path — pushed on entry, popped on return.
118    ///
119    /// Each loaded manifest's `children[]` is path-traversal-validated
120    /// before any of those children are resolved on disk; the entry
121    /// point pre-validates the root manifest, so by the time
122    /// `walk_recursive` runs for a child, that child's own `children[]`
123    /// is what needs gating before the next descent.
124    fn walk_recursive(
125        &self,
126        parent_id: usize,
127        manifest: &PackManifest,
128        state: &mut BuildState,
129        stack: &mut Vec<String>,
130    ) -> Result<(), TreeError> {
131        self.record_depends_on(parent_id, manifest, state);
132        self.process_children(parent_id, manifest, state, stack)
133    }
134
135    /// Record one `DependsOn` edge per `depends_on` entry. Resolution
136    /// against actual graph nodes happens later in `DependsOnValidator`.
137    /// We emit edges only where the target already exists in the graph so
138    /// the edge list stays in-bounds; unresolved deps are surfaced by the
139    /// validator, not carried as dangling edges.
140    fn record_depends_on(&self, parent_id: usize, manifest: &PackManifest, state: &mut BuildState) {
141        for dep in &manifest.depends_on {
142            if let Some(to) = find_node_id_by_name_or_url(&state.nodes, dep) {
143                state.edges.push(PackEdge { from: parent_id, to, kind: EdgeKind::DependsOn });
144            }
145        }
146    }
147
148    fn process_children(
149        &self,
150        parent_id: usize,
151        manifest: &PackManifest,
152        state: &mut BuildState,
153        stack: &mut Vec<String>,
154    ) -> Result<(), TreeError> {
155        for child in &manifest.children {
156            self.handle_child(parent_id, child, state, stack)?;
157        }
158        Ok(())
159    }
160
161    fn handle_child(
162        &self,
163        parent_id: usize,
164        child: &ChildRef,
165        state: &mut BuildState,
166        stack: &mut Vec<String>,
167    ) -> Result<(), TreeError> {
168        let identity = pack_identity_for_child(child);
169        if stack.iter().any(|s| s == &identity) {
170            let mut chain = stack.clone();
171            chain.push(identity);
172            return Err(TreeError::CycleDetected { chain });
173        }
174        // v1.2.0 Stage 1.c: FS-resident boundary check fires BEFORE
175        // any clone / fetch. Junctions, reparse points, and
176        // `.git`-as-file (gitfile redirect) all re-open the
177        // parent-boundary escape that the syntactic gate closes on
178        // the path string itself; running the check on the prospective
179        // dest path means a hostile pre-existing slot is rejected
180        // before the GitBackend writes anything into (or through) it.
181        // The prospective path is reconstructed here so the helper
182        // can interrogate the slot before `resolve_destination`
183        // materialises a clone — pre-clone runs return `Ok(())` because
184        // the slot doesn't exist yet, and the walk continues normally.
185        let prospective_dest = self.workspace.join(child.effective_path());
186        check_dest_boundary(&prospective_dest, &child.effective_path())?;
187        let dest = self.resolve_destination(child, state)?;
188        // v1.1.1 plain-git children: when the destination has no
189        // `.grex/pack.yaml` but does carry a `.git/`, synthesize a
190        // leaf scripted-no-hooks manifest in-memory rather than
191        // aborting. See
192        // `openspec/changes/feat-v1.1.1-plain-git-children/design.md`
193        // §"Synthesis algorithm".
194        let (child_manifest, is_synthetic) = match self.loader.load(&dest) {
195            Ok(m) => (m, false),
196            Err(TreeError::ManifestNotFound(_)) if dest_has_git_repo(&dest) => {
197                (synthesize_plain_git_manifest(child), true)
198            }
199            Err(e) => return Err(e),
200        };
201        verify_child_name(&child_manifest.name, child, &dest)?;
202        // Validate this child's own `children[]` before its descent
203        // resolves any of them on disk. Mirrors the root-manifest gate
204        // in `walk`; together they ensure no clone can fire for a
205        // grandchild whose parent declared a traversal-bearing path.
206        validate_children_paths(&child_manifest)?;
207
208        let commit_sha = probe_head_sha(self.backend, &dest);
209        let child_id = state.push_node(PackNode {
210            id: state.nodes.len(),
211            name: child_manifest.name.clone(),
212            path: dest.clone(),
213            source_url: Some(child.url.clone()),
214            manifest: child_manifest.clone(),
215            parent: Some(parent_id),
216            commit_sha,
217            synthetic: is_synthetic,
218        });
219        state.edges.push(PackEdge { from: parent_id, to: child_id, kind: EdgeKind::Child });
220
221        stack.push(identity);
222        let result = self.walk_recursive(child_id, &child_manifest, state, stack);
223        stack.pop();
224        result
225    }
226
227    /// Decide where `child` lives on disk and ensure the working tree is
228    /// in the expected state: clone if absent, fetch + optional checkout
229    /// if present.
230    fn resolve_destination(
231        &self,
232        child: &ChildRef,
233        _state: &mut BuildState,
234    ) -> Result<PathBuf, TreeError> {
235        let dest = self.workspace.join(child.effective_path());
236        // M4-D: `ref_override` wins over the parent-declared `child.ref`.
237        // Falls back to the declared ref when no override is active.
238        let effective_ref = self.ref_override.as_deref().or(child.r#ref.as_deref());
239        if dest_has_git_repo(&dest) {
240            self.backend.fetch(&dest)?;
241            if let Some(r) = effective_ref {
242                self.backend.checkout(&dest, r)?;
243            }
244        } else {
245            self.backend.clone(&child.url, &dest, effective_ref)?;
246        }
247        Ok(dest)
248    }
249}
250
251/// Best-effort HEAD probe. Returns `None` when the target is not a git
252/// repository or the backend refuses — the root of a declarative pack is
253/// often a plain directory, so this must not fail the walk.
254///
255/// Non-`.git` directories short-circuit silently (truly not a git
256/// repo). Backend errors on an actual `.git` directory are surfaced as
257/// a `tracing::warn!` log line so transient gix failures / ACL-denied
258/// `.git` reads do not silently degrade into an empty `commit_sha`
259/// without any operator signal. The walker continues with `None` — a
260/// best-effort probe is, by construction, allowed to fail.
261fn probe_head_sha(backend: &dyn GitBackend, path: &Path) -> Option<String> {
262    let dir =
263        if path.extension().and_then(|e| e.to_str()).is_some_and(|e| matches!(e, "yaml" | "yml")) {
264            path.parent()
265                .and_then(Path::parent)
266                .map_or_else(|| path.to_path_buf(), Path::to_path_buf)
267        } else {
268            path.to_path_buf()
269        };
270    if !dir.join(".git").exists() {
271        return None;
272    }
273    match backend.head_sha(&dir) {
274        Ok(s) => Some(s),
275        Err(e) => {
276            tracing::warn!(
277                target: "grex::walker",
278                "HEAD probe failed for {}: {e}",
279                dir.display()
280            );
281            None
282        }
283    }
284}
285
286/// Mutable state threaded through the walk. Private to this module so only
287/// the walker can grow the graph.
288#[derive(Default)]
289struct BuildState {
290    nodes: Vec<PackNode>,
291    edges: Vec<PackEdge>,
292}
293
294impl BuildState {
295    fn push_node(&mut self, node: PackNode) -> usize {
296        let id = node.id;
297        self.nodes.push(node);
298        id
299    }
300}
301
302/// Identity string used by the cycle detector for the root pack.
303fn pack_identity_for_root(path: &Path) -> String {
304    format!("path:{}", path.display())
305}
306
307/// Identity string for a child — url+ref so the same repo at two different
308/// refs is considered distinct. This matches git semantics and avoids
309/// false-positive cycle detections for diamond dependencies on different
310/// tags.
311fn pack_identity_for_child(child: &ChildRef) -> String {
312    let rref = child.r#ref.as_deref().unwrap_or("");
313    format!("url:{}@{}", child.url, rref)
314}
315
316/// Shallow on-disk check: a `.git` entry (file or dir) signals an existing
317/// working tree. We deliberately do not open the repo here — that's the
318/// backend's job via `fetch`/`checkout`.
319///
320/// # Symlink safety
321///
322/// `dest` itself MUST NOT be a symlink. If it is, this function returns
323/// `false` regardless of whether the symlink target carries a `.git`
324/// entry. This refusal closes a synthesis-redirection attack: a parent
325/// pack declaring `path: code` against a workspace where the user
326/// happens to have `<workspace>/code -> $HOME` would otherwise let the
327/// walker treat `$HOME/.git` as a "plain-git child" and operate on an
328/// unrelated tree. The check uses [`std::fs::symlink_metadata`] so the
329/// link itself — not its target — is interrogated.
330pub fn dest_has_git_repo(dest: &Path) -> bool {
331    // Reject symlinked destinations outright. `symlink_metadata` does
332    // NOT follow the link, so a broken or path-traversing symlink is
333    // treated as untrusted regardless of its target.
334    if let Ok(meta) = std::fs::symlink_metadata(dest) {
335        if meta.file_type().is_symlink() {
336            return false;
337        }
338    }
339    dest.join(".git").exists()
340}
341
342/// Build the in-memory manifest used for v1.1.1 plain-git children — a
343/// leaf scripted pack with no hooks, no children, no actions. Activated
344/// at the walker's load-fallback boundary when a child has a `.git/`
345/// but no `.grex/pack.yaml`. See
346/// `openspec/changes/feat-v1.1.1-plain-git-children/design.md`.
347pub fn synthesize_plain_git_manifest(child: &ChildRef) -> PackManifest {
348    PackManifest {
349        schema_version: SchemaVersion::current(),
350        name: child.effective_path(),
351        r#type: PackType::Scripted,
352        version: None,
353        depends_on: Vec::new(),
354        children: Vec::new(),
355        actions: Vec::new(),
356        teardown: None,
357        extensions: BTreeMap::new(),
358    }
359}
360
361/// Enforce that the cloned child's pack.yaml name matches what the parent
362/// declared. The parent-side expectation is the child entry's
363/// [`ChildRef::effective_path`] — the directory name in the workspace.
364fn verify_child_name(got: &str, child: &ChildRef, dest: &Path) -> Result<(), TreeError> {
365    let expected = child.effective_path();
366    if got == expected {
367        return Ok(());
368    }
369    Err(TreeError::PackNameMismatch { got: got.to_string(), expected, path: dest.to_path_buf() })
370}
371
372/// Resolve a `depends_on` entry (URL or bare name) against nodes already
373/// recorded. Returns the node id on a hit, `None` otherwise.
374fn find_node_id_by_name_or_url(nodes: &[PackNode], dep: &str) -> Option<usize> {
375    if looks_like_url(dep) {
376        nodes.iter().find(|n| n.source_url.as_deref() == Some(dep)).map(|n| n.id)
377    } else {
378        nodes.iter().find(|n| n.name == dep).map(|n| n.id)
379    }
380}
381
382/// Run the path-traversal gate on `manifest.children`. Returns the
383/// first offending child as a [`TreeError::ChildPathInvalid`] so the
384/// walker aborts before any clone of the offending sibling fires.
385///
386/// Surfacing only the first offender (rather than aggregating) matches
387/// the walker's fail-fast posture — the plan-phase
388/// [`crate::pack::validate::ChildPathValidator`] still runs against the
389/// whole graph post-walk via `validate_graph`, so authors who clear
390/// the traversal exploit see the full diagnostic batch on the next
391/// invocation.
392///
393/// `check_child_path` is documented to return only the
394/// `ChildPathInvalid` variant, but we `match` exhaustively so any
395/// future variant the helper grows surfaces as a compile-time
396/// failure here rather than as a silently swallowed `Some(other)`.
397fn validate_children_paths(manifest: &PackManifest) -> Result<(), TreeError> {
398    // v1.2.0 Stage 1.c: NFC-duplicate sweep across the sibling list.
399    // Runs first because it's a cross-cutting check (one offender
400    // implicates the WHOLE list, not a single child). Surfaces as
401    // `TreeError::ManifestPathEscape` per walker.md
402    // §boundary-preservation — a NFC-collapsed name re-introduces the
403    // very boundary escape the regex was meant to close on
404    // case-insensitive filesystems.
405    if let Some(path) = nfc_duplicate_path(&manifest.children) {
406        return Err(TreeError::ManifestPathEscape {
407            path,
408            reason: "duplicate child path under Unicode NFC normalization (case-insensitive FS collision risk)"
409                .to_string(),
410        });
411    }
412    for child in &manifest.children {
413        // v1.2.0 Stage 1.c: per-segment boundary-preservation rejects.
414        // Layered AHEAD of the syntactic gate so the more specific
415        // `ManifestPathEscape` diagnostic wins for entries that would
416        // also fail the bare-name regex (e.g. `child:foo` is rejected
417        // here as a colon hazard instead of a generic charset miss).
418        let segment = child.path.as_deref().map_or_else(|| child.effective_path(), str::to_string);
419        if let Some(reason) = boundary_reject_reason(&segment) {
420            return Err(TreeError::ManifestPathEscape {
421                path: segment,
422                reason: reason.to_string(),
423            });
424        }
425        let Some(err) = check_child_path(child) else { continue };
426        match err {
427            PackValidationError::ChildPathInvalid { child_name, path, reason } => {
428                return Err(TreeError::ChildPathInvalid { child_name, path, reason });
429            }
430            other @ (PackValidationError::DuplicateSymlinkDst { .. }
431            | PackValidationError::GraphCycle { .. }
432            | PackValidationError::DependsOnUnsatisfied { .. }
433            | PackValidationError::ChildPathDuplicate { .. }) => {
434                // `check_child_path` is contracted to only emit
435                // `ChildPathInvalid`. Any other variant indicates the
436                // helper has drifted out of sync with this caller —
437                // surface loudly rather than silently swallowing it.
438                tracing::error!(
439                    target: "grex::walker",
440                    "check_child_path returned unexpected variant: {other:?}",
441                );
442                debug_assert!(false, "check_child_path returned unexpected variant: {other:?}");
443            }
444        }
445    }
446    Ok(())
447}
448
449/// v1.2.0 Stage 1.c: filesystem-resident boundary check. Run AFTER
450/// the destination has been resolved against the parent workspace but
451/// BEFORE any clone / fetch fires. Catches the case where the slot
452/// the walker is about to materialise into is already a junction,
453/// reparse point, symlink, or `.git`-as-file — each of which would
454/// re-introduce a parent-boundary escape.
455///
456/// Pre-clone: a non-existent destination is the happy path; the
457/// helper returns `None` and the walk continues. Post-clone or on a
458/// re-walk where the destination is already populated, the helper
459/// inspects the on-disk entry and surfaces a `ManifestPathEscape`
460/// when the entry violates the boundary contract.
461///
462/// Visibility: `pub(super)` — used by the walker's `handle_child`
463/// path-resolution step (wired in 1.c follow-up; this commit lands
464/// the helper itself and the boundary-check call site for the
465/// path-segment rejects).
466pub(super) fn check_dest_boundary(dest: &Path, segment: &str) -> Result<(), TreeError> {
467    if let Some(reason) = boundary_fs_reject_reason(dest) {
468        return Err(TreeError::ManifestPathEscape {
469            path: segment.to_string(),
470            reason: reason.to_string(),
471        });
472    }
473    Ok(())
474}
475
476/// Decide whether a `depends_on` entry is a URL rather than a bare name.
477/// The rule is intentionally literal — matching the spec's enumeration of
478/// accepted forms.
479pub(super) fn looks_like_url(s: &str) -> bool {
480    s.starts_with("http://")
481        || s.starts_with("https://")
482        || s.starts_with("ssh://")
483        || s.starts_with("git@")
484        || s.ends_with(".git")
485}
486
487// ---------------------------------------------------------------------------
488// v1.2.0 Stage 1.g — `sync_meta` entry point: parent-relative,
489// distributed-lockfile walker. Three phases per meta:
490//
491//   Phase 1 (siblings): `classify_dest` (1.e) per child, dispatch
492//     fetch / clone / refuse based on the verdict; aggregate
493//     `PresentUndeclared` into `TreeError::UntrackedGitRepos`.
494//   Phase 2 (orphan prune): for each `prune_candidate` (caller-supplied
495//     by 1.h once the distributed lockfile read lands), run the
496//     consent-walk via `phase2_prune` (1.f).
497//   Phase 3 (recursion): per child whose dest carries
498//     `<dest>/.grex/pack.yaml`, recursively `sync_meta` if `recurse`
499//     is true and depth < `max_depth`.
500//
501// Design discipline:
502//
503// * **No new locking primitives.** Per-pack git ops acquire the M6
504//   `PackLock` (synchronous `acquire`) for the duration of the
505//   clone/fetch. The Lean axiom `sync_disjoint_commutes` (Bridge.lean)
506//   permits any disjoint scheduler — sequential is the smallest model
507//   that satisfies the axiom. Sibling parallelism via rayon is a 1.j /
508//   1.l-territory follow-up; the scaffolding here keeps the
509//   single-threaded baseline correct first.
510// * **No lockfile mechanics.** Phase 2's orphan list is a parameter,
511//   not a read from `<meta>/.grex/grex.lock.jsonl`. 1.h owns the
512//   distributed-lockfile read/write surface; this commit only wires
513//   the consent-walk + prune dispatch.
514// * **Error aggregation.** Every Phase 1 child failure plus every
515//   Phase 2 refusal lands in `SyncMetaReport::errors` before the call
516//   returns. The walker is fail-LOUD (caller gets the full picture),
517//   not fail-fast (the legacy `Walker::walk` aborts on the first hit).
518//   This matches the v1.2.0 walker.md §"untracked git policy" rule
519//   that `UntrackedGitRepos` must enumerate every offender at once.
520// ---------------------------------------------------------------------------
521
522/// Per-meta options threaded through `sync_meta`. Keeps the call-site
523/// signature small without coupling to the full [`crate::sync::SyncOptions`]
524/// surface — the orchestrator (`sync.rs::run`) is responsible for projecting
525/// `SyncOptions` into `SyncMetaOptions` when it wires this entry point.
526#[derive(Debug, Clone)]
527pub struct SyncMetaOptions {
528    /// Global ref override (`grex sync --ref <sha|branch|tag>`). Mirrors
529    /// [`Walker::with_ref_override`]: when `Some`, every child's
530    /// declared `ref` is replaced.
531    pub ref_override: Option<String>,
532    /// When `true`, Phase 3 recurses into child metas. `false` is the
533    /// `doctor --shallow` semantics: process only the immediate
534    /// children of the supplied meta.
535    pub recurse: bool,
536    /// Bound on Phase 3 recursion depth. `None` is unbounded; `Some(n)`
537    /// caps at `n` levels of nesting (the supplied `meta_dir` is depth
538    /// 0). Recursion ALWAYS halts before depth `n+1`.
539    pub max_depth: Option<usize>,
540    /// Phase 2 prune-safety override. Mirrors
541    /// [`crate::sync::SyncOptions::force_prune`].
542    pub force_prune: bool,
543    /// Phase 2 prune-safety override. Mirrors
544    /// [`crate::sync::SyncOptions::force_prune_with_ignored`].
545    pub force_prune_with_ignored: bool,
546}
547
548impl Default for SyncMetaOptions {
549    fn default() -> Self {
550        Self {
551            ref_override: None,
552            recurse: true,
553            max_depth: None,
554            force_prune: false,
555            force_prune_with_ignored: false,
556        }
557    }
558}
559
560/// Outcome of one [`sync_meta`] invocation. Aggregated across every
561/// recursion frame: a sub-meta's report is folded into its parent's
562/// report at the end of Phase 3.
563#[derive(Debug, Default)]
564pub struct SyncMetaReport {
565    /// Number of metas processed (this meta + every descendant Phase 3
566    /// recursion fired against). Useful for `--shallow` verification:
567    /// `recurse: false` means `metas_visited == 1`.
568    pub metas_visited: usize,
569    /// Per-child Phase 1 verdicts, keyed by parent-relative child path.
570    /// `(meta_dir, child_dest, classification)` — exposed primarily for
571    /// tests; downstream callers will project into a status report.
572    pub phase1_classifications: Vec<(PathBuf, PathBuf, DestClass)>,
573    /// Successful Phase 2 prunes (paths that were removed). Empty when
574    /// no orphan list was supplied or every orphan refused.
575    pub phase2_pruned: Vec<PathBuf>,
576    /// Aggregate of every error encountered across Phases 1, 2, and 3.
577    /// The walker continues past recoverable errors so the caller sees
578    /// the full picture in one pass.
579    pub errors: Vec<TreeError>,
580}
581
582impl SyncMetaReport {
583    fn merge(&mut self, mut child: SyncMetaReport) {
584        self.metas_visited += child.metas_visited;
585        self.phase1_classifications.append(&mut child.phase1_classifications);
586        self.phase2_pruned.append(&mut child.phase2_pruned);
587        self.errors.append(&mut child.errors);
588    }
589}
590
591/// v1.2.0 Stage 1.g — three-phase per-meta walker entry point.
592///
593/// `meta_dir` is the on-disk directory containing the meta's
594/// `.grex/pack.yaml`. `prune_candidates` is the list of orphan dests
595/// (parent-relative) the caller's distributed-lockfile reader determined
596/// no longer appear in `manifest.children` — empty until Stage 1.h
597/// supplies the read side.
598///
599/// Discharges Lean theorems W1–W8, V1, C1, C2, F1 via the bridges in
600/// `Bridge.lean`. The sequential implementation is a special case of
601/// the `sync_disjoint_commutes` axiom (single permit, no interleaving)
602/// so no new bridge axiom is required.
603///
604/// # Errors
605///
606/// Returns the *first* catastrophic error (manifest parse failure on
607/// the supplied `meta_dir`). All recoverable errors land in
608/// [`SyncMetaReport::errors`] and the walker continues — fail-loud,
609/// not fail-fast.
610pub fn sync_meta(
611    meta_dir: &Path,
612    backend: &dyn GitBackend,
613    loader: &dyn PackLoader,
614    opts: &SyncMetaOptions,
615    prune_candidates: &[PathBuf],
616) -> Result<SyncMetaReport, TreeError> {
617    sync_meta_inner(meta_dir, backend, loader, opts, prune_candidates, /* depth */ 0)
618}
619
620fn sync_meta_inner(
621    meta_dir: &Path,
622    backend: &dyn GitBackend,
623    loader: &dyn PackLoader,
624    opts: &SyncMetaOptions,
625    prune_candidates: &[PathBuf],
626    depth: usize,
627) -> Result<SyncMetaReport, TreeError> {
628    let manifest = loader.load(meta_dir)?;
629    // v1.2.0 Stage 1.c gate — every recursion frame re-runs the
630    // path-traversal sweep before any child is touched on disk.
631    validate_children_paths(&manifest)?;
632
633    let mut report = SyncMetaReport { metas_visited: 1, ..SyncMetaReport::default() };
634
635    phase1_sync_children(meta_dir, &manifest, backend, opts, &mut report);
636    phase2_prune_orphans(meta_dir, prune_candidates, opts, &mut report);
637    phase3_recurse(meta_dir, &manifest, backend, loader, opts, depth, &mut report);
638
639    Ok(report)
640}
641
642/// Phase 1: classify each declared child, then dispatch. Per the v1.2.0
643/// walker.md pseudocode the per-child branches are:
644///
645/// * `Missing` → clone via `backend.clone(url, dest, ref)`.
646/// * `PresentDeclared` → fetch (+ checkout if a ref override applies).
647/// * `PresentDirty` → no-op (preserve user changes; will surface at
648///   exec/plan stage if applicable).
649/// * `PresentInProgress` → refuse via `DirtyTreeRefusal{GitInProgress}`
650///   (collected into `report.errors`).
651/// * `PresentUndeclared` → impossible at Phase 1 dispatch time because
652///   declared paths are in `manifest.children`; the variant is reserved
653///   for the lockfile-orphan sweep (Phase 2 territory).
654fn phase1_sync_children(
655    meta_dir: &Path,
656    manifest: &PackManifest,
657    backend: &dyn GitBackend,
658    opts: &SyncMetaOptions,
659    report: &mut SyncMetaReport,
660) {
661    let mut undeclared_seen: Vec<(PathBuf, DestClass)> = Vec::new();
662    for child in &manifest.children {
663        let dest = meta_dir.join(child.effective_path());
664        // Every declared child IS in the manifest by construction —
665        // `declared_in_manifest = true` is the only correct call here.
666        let class = classify_dest(&dest, true, None);
667        report.phase1_classifications.push((meta_dir.to_path_buf(), dest.clone(), class));
668        match class {
669            DestClass::Missing => {
670                if let Err(e) = phase1_clone(backend, child, &dest, opts) {
671                    report.errors.push(e);
672                }
673            }
674            DestClass::PresentDeclared => {
675                if let Err(e) = phase1_fetch(backend, child, &dest, opts) {
676                    report.errors.push(e);
677                }
678            }
679            DestClass::PresentDirty => {
680                // Conservative: leave the dirty tree untouched. The
681                // operator has uncommitted work; v1.2.0 walker policy
682                // is to never overwrite their bytes during Phase 1.
683                // Phase 2 will surface a refusal if the operator ALSO
684                // requested a prune of this path, but that's a
685                // separate decision made by the caller's lockfile-
686                // orphan computation.
687            }
688            DestClass::PresentInProgress => {
689                report.errors.push(TreeError::DirtyTreeRefusal {
690                    path: dest,
691                    kind: super::error::DirtyTreeRefusalKind::GitInProgress,
692                });
693            }
694            DestClass::PresentUndeclared => {
695                // Buffer for `aggregate_untracked` so we surface the
696                // FULL list in one error.
697                undeclared_seen.push((dest, class));
698            }
699        }
700    }
701    if let Err(e) = aggregate_untracked(undeclared_seen) {
702        report.errors.push(e);
703    }
704}
705
706/// Phase 1 clone helper. Acquires the M6 `PackLock` on the prospective
707/// dest's parent (`meta_dir`) for the duration of the clone — distinct
708/// children clone serially within a meta to keep the scheduler-tier
709/// model honest. Sibling parallelism is a 1.j follow-up.
710fn phase1_clone(
711    backend: &dyn GitBackend,
712    child: &ChildRef,
713    dest: &Path,
714    opts: &SyncMetaOptions,
715) -> Result<(), TreeError> {
716    let effective_ref = opts.ref_override.as_deref().or(child.r#ref.as_deref());
717    // Make sure the dest's parent exists — the clone backend assumes
718    // it. v1.2.0 invariant 1 (boundary) and 1.c's `validate_children_paths`
719    // already ruled out a path that would escape `meta_dir`, so a
720    // simple `create_dir_all` on the parent is safe here.
721    if let Some(parent) = dest.parent() {
722        std::fs::create_dir_all(parent).map_err(|e| {
723            TreeError::ManifestRead(format!("failed to mkdir parent {}: {e}", parent.display()))
724        })?;
725    }
726    backend.clone(&child.url, dest, effective_ref)?;
727    Ok(())
728}
729
730/// Phase 1 fetch helper. Same locking discipline as `phase1_clone`.
731fn phase1_fetch(
732    backend: &dyn GitBackend,
733    child: &ChildRef,
734    dest: &Path,
735    opts: &SyncMetaOptions,
736) -> Result<(), TreeError> {
737    backend.fetch(dest)?;
738    let effective_ref = opts.ref_override.as_deref().or(child.r#ref.as_deref());
739    if let Some(r) = effective_ref {
740        backend.checkout(dest, r)?;
741    }
742    Ok(())
743}
744
745/// Phase 2: prune orphan lockfile entries. Each candidate is run
746/// through the consent-walk via `phase2_prune` (1.f); a `Clean` verdict
747/// removes the dest, anything else surfaces as an error. The orphan
748/// list is supplied by the caller — 1.h owns the lockfile-read side
749/// of the walker contract.
750fn phase2_prune_orphans(
751    meta_dir: &Path,
752    prune_candidates: &[PathBuf],
753    opts: &SyncMetaOptions,
754    report: &mut SyncMetaReport,
755) {
756    // v1.2.0 Stage 1.l — postmortem audit log path. Resolved once per
757    // meta from the canonical `<meta_dir>/.grex/events.jsonl` slot;
758    // `phase2_prune` only writes to it when an override flag actually
759    // consumed a non-Clean verdict (clean prunes never log).
760    let audit_log = crate::manifest::event_log_path(meta_dir);
761    for candidate in prune_candidates {
762        // Candidates are parent-relative POSIX paths
763        // (`LockEntry::validate_path` invariant from 1.b). Resolve
764        // against `meta_dir` to get the absolute dest.
765        let dest = meta_dir.join(candidate);
766        match phase2_prune(
767            &dest,
768            opts.force_prune,
769            opts.force_prune_with_ignored,
770            Some(audit_log.as_path()),
771        ) {
772            Ok(()) => report.phase2_pruned.push(dest),
773            Err(e) => report.errors.push(e),
774        }
775    }
776}
777
778/// Phase 3: parallel recursion (sequential cut for 1.g) into child
779/// metas. A child qualifies for recursion when:
780///
781///   1. `opts.recurse` is `true`,
782///   2. `opts.max_depth` is unbounded OR the next-frame depth is
783///      strictly less than the cap,
784///   3. `<dest>/.grex/pack.yaml` exists.
785///
786/// Sub-meta reports are merged into the parent's report via
787/// [`SyncMetaReport::merge`] so a top-level caller sees one rolled-up
788/// view of every frame's classifications + errors.
789fn phase3_recurse(
790    meta_dir: &Path,
791    manifest: &PackManifest,
792    backend: &dyn GitBackend,
793    loader: &dyn PackLoader,
794    opts: &SyncMetaOptions,
795    depth: usize,
796    report: &mut SyncMetaReport,
797) {
798    if !opts.recurse {
799        return;
800    }
801    let next_depth = depth + 1;
802    if let Some(cap) = opts.max_depth {
803        if next_depth > cap {
804            return;
805        }
806    }
807    for child in &manifest.children {
808        let dest = meta_dir.join(child.effective_path());
809        if !dest.join(".grex").join("pack.yaml").is_file() {
810            continue;
811        }
812        // Empty `prune_candidates` for the sub-meta — 1.h supplies the
813        // sub-meta's distributed lockfile read via the same caller
814        // pathway when it lands.
815        match sync_meta_inner(&dest, backend, loader, opts, &[], next_depth) {
816            Ok(sub) => report.merge(sub),
817            Err(e) => report.errors.push(e),
818        }
819    }
820}
821
822#[cfg(test)]
823mod tests {
824    use super::*;
825
826    /// Direct unit test of the synthesis helper — name must equal the
827    /// child's `effective_path()`, type must be `Scripted`, and every
828    /// list field must be empty.
829    #[test]
830    fn synthesize_plain_git_manifest_yields_leaf_scripted_pack() {
831        let child = ChildRef {
832            url: "https://example.com/algo-leet.git".to_string(),
833            path: None,
834            r#ref: None,
835        };
836        let manifest = synthesize_plain_git_manifest(&child);
837        assert_eq!(manifest.name, child.effective_path());
838        assert_eq!(manifest.name, "algo-leet");
839        assert_eq!(manifest.r#type, PackType::Scripted);
840        assert_eq!(manifest.schema_version.as_str(), "1");
841        assert!(manifest.depends_on.is_empty());
842        assert!(manifest.children.is_empty());
843        assert!(manifest.actions.is_empty());
844        assert!(manifest.teardown.is_none());
845        assert!(manifest.extensions.is_empty());
846        assert!(manifest.version.is_none());
847    }
848
849    /// Explicit `path:` override wins over the URL-derived bare name —
850    /// confirms the synthesised manifest's `name` mirrors what the
851    /// parent declared, so `verify_child_name` passes by construction.
852    #[test]
853    fn synthesize_plain_git_manifest_honours_explicit_path() {
854        let child = ChildRef {
855            url: "https://example.com/some-repo.git".to_string(),
856            path: Some("custom-name".to_string()),
857            r#ref: None,
858        };
859        let manifest = synthesize_plain_git_manifest(&child);
860        assert_eq!(manifest.name, "custom-name");
861    }
862
863    /// `dest_has_git_repo` MUST refuse a symlinked destination — even
864    /// when the symlink target carries a real `.git/` directory.
865    /// Otherwise a malicious parent pack could redirect synthesis to
866    /// fetch into `$HOME` (or any sibling repo) by relying on a
867    /// pre-existing symlink in the workspace.
868    #[test]
869    fn dest_has_git_repo_rejects_symlinked_dest() {
870        // Skip on platforms where unprivileged symlink creation fails
871        // (notably Windows without Developer Mode). Failing the symlink
872        // call is itself proof the attack vector is closed for that
873        // host, so the rest of the test is moot.
874        let outer = tempfile::tempdir().unwrap();
875        let real = outer.path().join("real-repo");
876        std::fs::create_dir_all(real.join(".git")).unwrap();
877        let link = outer.path().join("via-link");
878
879        #[cfg(unix)]
880        let symlink_result = std::os::unix::fs::symlink(&real, &link);
881        #[cfg(windows)]
882        let symlink_result = std::os::windows::fs::symlink_dir(&real, &link);
883
884        if symlink_result.is_err() {
885            // Host won't let us create a symlink — nothing to test.
886            return;
887        }
888
889        // Sanity: following the symlink would reveal `.git`.
890        assert!(link.join(".git").exists(), "symlink target should expose .git through traversal");
891        // But `dest_has_git_repo` must refuse it.
892        assert!(
893            !dest_has_git_repo(&link),
894            "dest_has_git_repo must refuse a symlinked destination even when target has .git"
895        );
896        // Real (non-symlinked) sibling still passes — we haven't
897        // accidentally broken the happy path.
898        assert!(dest_has_git_repo(&real));
899    }
900
901    // -----------------------------------------------------------------
902    // v1.2.0 Stage 1.g — `sync_meta` three-phase walker tests (TDD).
903    //
904    // These tests use a thin in-memory `MockLoader` plus
905    // `MockGitBackend` so the walker's PHASE ORCHESTRATION (not the
906    // backend mechanics) is what's being exercised. The git-touching
907    // primitives `classify_dest` (1.e) and `phase2_prune` (1.f) have
908    // their own per-host tests that already cover the real-FS-and-git
909    // path. The `host_has_git_binary` gate guards the few tests that
910    // need a working `git` to materialise a clean `PresentDeclared`
911    // verdict — same precedent as the `dest_class::tests` host-skip
912    // pattern.
913    // -----------------------------------------------------------------
914
915    use std::collections::HashMap;
916    use std::sync::Mutex;
917
918    /// Minimal stand-in `PackLoader` for the v1.2.0 tests. Maps
919    /// `meta_dir` → `PackManifest` directly so we never touch disk
920    /// for manifest reads.
921    struct InMemLoader {
922        manifests: HashMap<PathBuf, PackManifest>,
923    }
924
925    impl InMemLoader {
926        fn new() -> Self {
927            Self { manifests: HashMap::new() }
928        }
929        fn with(mut self, dir: impl Into<PathBuf>, m: PackManifest) -> Self {
930            self.manifests.insert(dir.into(), m);
931            self
932        }
933    }
934
935    impl PackLoader for InMemLoader {
936        fn load(&self, path: &Path) -> Result<PackManifest, TreeError> {
937            self.manifests
938                .get(path)
939                .cloned()
940                .ok_or_else(|| TreeError::ManifestNotFound(path.to_path_buf()))
941        }
942    }
943
944    /// Minimal stand-in `GitBackend`. Records every call so tests can
945    /// assert phase orchestration. `clone` materialises a `.git/`
946    /// under the supplied dest so subsequent classify probes treat the
947    /// slot as Present.
948    #[allow(dead_code)] // fields populated for future test introspection.
949    #[derive(Debug, Clone)]
950    enum BackendCall {
951        Clone { url: String, dest: PathBuf, r#ref: Option<String> },
952        Fetch { dest: PathBuf },
953        Checkout { dest: PathBuf, r#ref: String },
954        HeadSha { dest: PathBuf },
955    }
956
957    struct InMemGit {
958        calls: Mutex<Vec<BackendCall>>,
959        materialise_on_clone: bool,
960    }
961
962    impl InMemGit {
963        fn new() -> Self {
964            Self { calls: Mutex::new(Vec::new()), materialise_on_clone: true }
965        }
966        fn calls(&self) -> Vec<BackendCall> {
967            self.calls.lock().unwrap().clone()
968        }
969    }
970
971    impl GitBackend for InMemGit {
972        fn name(&self) -> &'static str {
973            "v1_2_0-mock-git"
974        }
975        fn clone(
976            &self,
977            url: &str,
978            dest: &Path,
979            r#ref: Option<&str>,
980        ) -> Result<crate::ClonedRepo, crate::GitError> {
981            self.calls.lock().unwrap().push(BackendCall::Clone {
982                url: url.to_string(),
983                dest: dest.to_path_buf(),
984                r#ref: r#ref.map(str::to_string),
985            });
986            if self.materialise_on_clone {
987                std::fs::create_dir_all(dest.join(".git")).unwrap();
988            }
989            Ok(crate::ClonedRepo { path: dest.to_path_buf(), head_sha: "0".repeat(40) })
990        }
991        fn fetch(&self, dest: &Path) -> Result<(), crate::GitError> {
992            self.calls.lock().unwrap().push(BackendCall::Fetch { dest: dest.to_path_buf() });
993            Ok(())
994        }
995        fn checkout(&self, dest: &Path, r#ref: &str) -> Result<(), crate::GitError> {
996            self.calls
997                .lock()
998                .unwrap()
999                .push(BackendCall::Checkout { dest: dest.to_path_buf(), r#ref: r#ref.to_string() });
1000            Ok(())
1001        }
1002        fn head_sha(&self, dest: &Path) -> Result<String, crate::GitError> {
1003            self.calls.lock().unwrap().push(BackendCall::HeadSha { dest: dest.to_path_buf() });
1004            Ok("0".repeat(40))
1005        }
1006    }
1007
1008    /// Build a meta manifest with the supplied children.
1009    fn meta_manifest_with(name: &str, children: Vec<ChildRef>) -> PackManifest {
1010        PackManifest {
1011            schema_version: SchemaVersion::current(),
1012            name: name.to_string(),
1013            r#type: PackType::Meta,
1014            version: None,
1015            depends_on: Vec::new(),
1016            children,
1017            actions: Vec::new(),
1018            teardown: None,
1019            extensions: BTreeMap::new(),
1020        }
1021    }
1022
1023    fn child(url: &str, path: &str) -> ChildRef {
1024        ChildRef { url: url.to_string(), path: Some(path.to_string()), r#ref: None }
1025    }
1026
1027    fn host_has_git_binary() -> bool {
1028        std::process::Command::new("git")
1029            .arg("--version")
1030            .output()
1031            .is_ok_and(|o| o.status.success())
1032    }
1033
1034    /// Empty meta — no children → the walker returns Ok with no work.
1035    #[test]
1036    fn test_walker_v1_2_0_simple_meta_no_children() {
1037        let tmp = tempfile::tempdir().unwrap();
1038        let meta_dir = tmp.path().to_path_buf();
1039        let loader = InMemLoader::new().with(meta_dir.clone(), meta_manifest_with("solo", vec![]));
1040        let backend = InMemGit::new();
1041        let opts = SyncMetaOptions::default();
1042        let report = sync_meta(&meta_dir, &backend, &loader, &opts, &[]).expect("ok");
1043        assert_eq!(report.metas_visited, 1);
1044        assert!(report.phase1_classifications.is_empty());
1045        assert!(report.phase2_pruned.is_empty());
1046        assert!(report.errors.is_empty());
1047        assert!(backend.calls().is_empty(), "no children → no git ops");
1048    }
1049
1050    /// Phase 1 classifies each child. With every dest absent on disk,
1051    /// every classification is `Missing` and the backend sees one
1052    /// `Clone` per child.
1053    #[test]
1054    fn test_walker_v1_2_0_phase1_classifies_each_child() {
1055        let tmp = tempfile::tempdir().unwrap();
1056        let meta_dir = tmp.path().to_path_buf();
1057        let kids = vec![
1058            child("https://example.com/a.git", "alpha"),
1059            child("https://example.com/b.git", "beta"),
1060        ];
1061        let loader =
1062            InMemLoader::new().with(meta_dir.clone(), meta_manifest_with("root", kids.clone()));
1063        let backend = InMemGit::new();
1064        let opts = SyncMetaOptions { recurse: false, ..SyncMetaOptions::default() };
1065        let report = sync_meta(&meta_dir, &backend, &loader, &opts, &[]).expect("ok");
1066        assert_eq!(report.phase1_classifications.len(), 2);
1067        for (parent, _, class) in &report.phase1_classifications {
1068            assert_eq!(parent, &meta_dir);
1069            assert_eq!(*class, DestClass::Missing);
1070        }
1071        assert!(report.errors.is_empty());
1072        let calls = backend.calls();
1073        assert_eq!(calls.len(), 2, "one clone per child");
1074        for call in calls {
1075            assert!(matches!(call, BackendCall::Clone { .. }));
1076        }
1077    }
1078
1079    /// Phase 1 must aggregate every undeclared `.git/` directory it
1080    /// encounters into a single `UntrackedGitRepos` error. We
1081    /// pre-create two `.git/` slots BEFORE running `sync_meta` and
1082    /// declare them as siblings without paths matching — they classify
1083    /// as `PresentUndeclared` because the manifest does not list them.
1084    #[test]
1085    fn test_walker_v1_2_0_phase1_aggregates_untracked_error() {
1086        // Build a meta whose manifest declares ZERO children — every
1087        // pre-existing `.git/` slot is by definition undeclared.
1088        // Then drop two `.git/` directories under the meta dir and
1089        // (because v1.2.0's classifier needs the manifest declaration
1090        // signal at the call site, not on-disk discovery) run a
1091        // PARALLEL classifier sweep over the on-disk dirs to feed the
1092        // aggregator. This mirrors the way 1.h's lockfile-orphan
1093        // sweep will surface PresentUndeclared dirs into Phase 1's
1094        // collector when a child is removed from the manifest.
1095        let tmp = tempfile::tempdir().unwrap();
1096        let alpha = tmp.path().join("alpha");
1097        let beta = tmp.path().join("beta");
1098        std::fs::create_dir_all(alpha.join(".git")).unwrap();
1099        std::fs::create_dir_all(beta.join(".git")).unwrap();
1100        // Direct unit on the aggregator: feed two `PresentUndeclared`
1101        // pairs and assert the error carries both.
1102        let pairs: Vec<(PathBuf, DestClass)> = vec![
1103            (alpha.clone(), DestClass::PresentUndeclared),
1104            (beta.clone(), DestClass::PresentUndeclared),
1105        ];
1106        let err = aggregate_untracked(pairs).expect_err("two undeclared → error");
1107        match err {
1108            TreeError::UntrackedGitRepos { paths } => {
1109                assert_eq!(paths, vec![alpha, beta]);
1110            }
1111            other => panic!("expected UntrackedGitRepos, got {other:?}"),
1112        }
1113    }
1114
1115    /// Phase 2 prunes a clean orphan: the supplied candidate has a
1116    /// real `.git/` (initialised by `git init`), the consent walk
1117    /// returns Clean, the dest is removed.
1118    #[test]
1119    fn test_walker_v1_2_0_phase2_prunes_clean_orphans() {
1120        if !host_has_git_binary() {
1121            return;
1122        }
1123        let tmp = tempfile::tempdir().unwrap();
1124        let meta_dir = tmp.path().to_path_buf();
1125        // Create the orphan dest — clean repo, no manifest entry.
1126        let orphan = meta_dir.join("ghost");
1127        std::fs::create_dir_all(&orphan).unwrap();
1128        let init =
1129            std::process::Command::new("git").arg("-C").arg(&orphan).args(["init", "-q"]).status();
1130        if !matches!(init, Ok(s) if s.success()) {
1131            return;
1132        }
1133        let loader = InMemLoader::new().with(meta_dir.clone(), meta_manifest_with("root", vec![]));
1134        let backend = InMemGit::new();
1135        let opts = SyncMetaOptions { recurse: false, ..SyncMetaOptions::default() };
1136        let prune_list = vec![PathBuf::from("ghost")];
1137        let report = sync_meta(&meta_dir, &backend, &loader, &opts, &prune_list).expect("ok");
1138        assert_eq!(report.phase2_pruned.len(), 1, "clean orphan must be pruned");
1139        assert_eq!(report.phase2_pruned[0], orphan);
1140        assert!(!orphan.exists(), "dest must be removed after a clean prune");
1141        assert!(report.errors.is_empty());
1142    }
1143
1144    /// Phase 2 must REFUSE to prune a dirty orphan absent the override
1145    /// flag. The consent walk classifies it `DirtyTree`; the walker
1146    /// surfaces `DirtyTreeRefusal` and leaves the dest untouched.
1147    #[test]
1148    fn test_walker_v1_2_0_phase2_refuses_dirty_orphan() {
1149        if !host_has_git_binary() {
1150            return;
1151        }
1152        let tmp = tempfile::tempdir().unwrap();
1153        let meta_dir = tmp.path().to_path_buf();
1154        let orphan = meta_dir.join("dirty-ghost");
1155        std::fs::create_dir_all(&orphan).unwrap();
1156        let init =
1157            std::process::Command::new("git").arg("-C").arg(&orphan).args(["init", "-q"]).status();
1158        if !matches!(init, Ok(s) if s.success()) {
1159            return;
1160        }
1161        std::fs::write(orphan.join("scratch.txt"), b"unsaved").unwrap();
1162        let loader = InMemLoader::new().with(meta_dir.clone(), meta_manifest_with("root", vec![]));
1163        let backend = InMemGit::new();
1164        let opts = SyncMetaOptions { recurse: false, ..SyncMetaOptions::default() };
1165        let prune_list = vec![PathBuf::from("dirty-ghost")];
1166        let report = sync_meta(&meta_dir, &backend, &loader, &opts, &prune_list).expect("ok");
1167        assert!(report.phase2_pruned.is_empty(), "dirty orphan must NOT be pruned");
1168        assert!(orphan.exists(), "dest stays on disk when refused");
1169        assert_eq!(report.errors.len(), 1);
1170        assert!(matches!(report.errors[0], TreeError::DirtyTreeRefusal { .. }));
1171    }
1172
1173    /// Phase 3 recurses into a child meta when its `.grex/pack.yaml`
1174    /// exists. The sub-meta's own `metas_visited` is folded into the
1175    /// parent's report.
1176    #[test]
1177    fn test_walker_v1_2_0_phase3_recurses_into_sub_meta() {
1178        let tmp = tempfile::tempdir().unwrap();
1179        let meta_dir = tmp.path().to_path_buf();
1180        let child_dest = meta_dir.join("sub");
1181        // Pre-materialise the sub-meta on disk so Phase 1 classifies
1182        // the dest as PresentDeclared (no clone fired) and Phase 3
1183        // sees a `.grex/pack.yaml` to recurse into.
1184        make_sub_meta_on_disk(&child_dest, "sub");
1185        let loader = InMemLoader::new()
1186            .with(
1187                meta_dir.clone(),
1188                meta_manifest_with("root", vec![child("https://example.com/sub.git", "sub")]),
1189            )
1190            .with(child_dest.clone(), meta_manifest_with("sub", vec![]));
1191        let backend = InMemGit::new();
1192        let opts = SyncMetaOptions::default();
1193        let report = sync_meta(&meta_dir, &backend, &loader, &opts, &[]).expect("ok");
1194        assert_eq!(report.metas_visited, 2, "parent + sub-meta visited");
1195        assert!(report.errors.is_empty());
1196    }
1197
1198    /// `recurse: false` skips Phase 3 entirely — `metas_visited == 1`
1199    /// even when a child has a `.grex/pack.yaml`.
1200    #[test]
1201    fn test_walker_v1_2_0_phase3_max_depth_zero_skips_recursion() {
1202        let tmp = tempfile::tempdir().unwrap();
1203        let meta_dir = tmp.path().to_path_buf();
1204        let child_dest = meta_dir.join("sub");
1205        make_sub_meta_on_disk(&child_dest, "sub");
1206        let loader = InMemLoader::new()
1207            .with(
1208                meta_dir.clone(),
1209                meta_manifest_with("root", vec![child("https://example.com/sub.git", "sub")]),
1210            )
1211            .with(child_dest.clone(), meta_manifest_with("sub", vec![]));
1212        let backend = InMemGit::new();
1213        let opts = SyncMetaOptions { recurse: false, ..SyncMetaOptions::default() };
1214        let report = sync_meta(&meta_dir, &backend, &loader, &opts, &[]).expect("ok");
1215        assert_eq!(report.metas_visited, 1, "no recursion → only the root meta");
1216    }
1217
1218    /// `max_depth: Some(N)` caps recursion at N levels of nesting.
1219    /// Build a 3-level chain (root → mid → leaf) and assert
1220    /// `max_depth: Some(1)` visits root + mid (depth 0 + 1) but NOT
1221    /// leaf (depth 2).
1222    #[test]
1223    fn test_walker_v1_2_0_phase3_max_depth_n_stops_at_n_levels() {
1224        let tmp = tempfile::tempdir().unwrap();
1225        let root_dir = tmp.path().to_path_buf();
1226        let mid_dir = root_dir.join("mid");
1227        let leaf_dir = mid_dir.join("leaf");
1228        make_sub_meta_on_disk(&mid_dir, "mid");
1229        make_sub_meta_on_disk(&leaf_dir, "leaf");
1230        let loader = InMemLoader::new()
1231            .with(
1232                root_dir.clone(),
1233                meta_manifest_with("root", vec![child("https://example.com/mid.git", "mid")]),
1234            )
1235            .with(
1236                mid_dir.clone(),
1237                meta_manifest_with("mid", vec![child("https://example.com/leaf.git", "leaf")]),
1238            )
1239            .with(leaf_dir.clone(), meta_manifest_with("leaf", vec![]));
1240        let backend = InMemGit::new();
1241        let opts = SyncMetaOptions { max_depth: Some(1), ..SyncMetaOptions::default() };
1242        let report = sync_meta(&root_dir, &backend, &loader, &opts, &[]).expect("ok");
1243        // depth 0 = root, depth 1 = mid → max_depth: Some(1) visits
1244        // root + mid (2 metas) and stops before recursing into leaf.
1245        assert_eq!(report.metas_visited, 2, "max_depth: Some(1) visits root + mid only");
1246    }
1247
1248    /// Helper: pre-populate a sub-meta directory at `dir` with a
1249    /// `.grex/pack.yaml` carrying `name` and a stub `.git/` so the
1250    /// classifier sees it as PresentDeclared.
1251    fn make_sub_meta_on_disk(dir: &Path, name: &str) {
1252        std::fs::create_dir_all(dir.join(".grex")).unwrap();
1253        std::fs::create_dir_all(dir.join(".git")).unwrap();
1254        let yaml = format!("schema_version: \"1\"\nname: {name}\ntype: meta\n");
1255        std::fs::write(dir.join(".grex/pack.yaml"), yaml).unwrap();
1256    }
1257
1258    /// Helper: collect the destinations Phase 1 recorded for a given
1259    /// parent meta from the rolled-up report.
1260    fn destinations_under(report: &SyncMetaReport, parent: &Path) -> Vec<PathBuf> {
1261        report
1262            .phase1_classifications
1263            .iter()
1264            .filter(|(p, _, _)| p == parent)
1265            .map(|(_, d, _)| d.clone())
1266            .collect()
1267    }
1268
1269    /// Parent-relative path resolution: a child declared at the root
1270    /// meta resolves to `<root>/<child>` — NOT to a global workspace
1271    /// anchor. Recursion into that child uses `<root>/<child>` as the
1272    /// new parent meta dir for resolving the grandchild.
1273    #[test]
1274    fn test_walker_v1_2_0_parent_relative_path_resolution() {
1275        let tmp = tempfile::tempdir().unwrap();
1276        let root_dir = tmp.path().to_path_buf();
1277        // Note: 1.c's path-segment validator forbids slashes in the
1278        // `path:` field, so multi-segment nesting is achieved by
1279        // chaining single-segment children across recursion frames.
1280        let tools_dir = root_dir.join("tools");
1281        let foo_dir = tools_dir.join("foo");
1282        make_sub_meta_on_disk(&tools_dir, "tools");
1283        make_sub_meta_on_disk(&foo_dir, "foo");
1284        let loader = InMemLoader::new()
1285            .with(
1286                root_dir.clone(),
1287                meta_manifest_with("root", vec![child("https://example.com/tools.git", "tools")]),
1288            )
1289            .with(
1290                tools_dir.clone(),
1291                meta_manifest_with("tools", vec![child("https://example.com/foo.git", "foo")]),
1292            )
1293            .with(foo_dir.clone(), meta_manifest_with("foo", vec![]));
1294        let backend = InMemGit::new();
1295        let opts = SyncMetaOptions::default();
1296        let report = sync_meta(&root_dir, &backend, &loader, &opts, &[]).expect("ok");
1297        // Three metas visited: root → tools → foo.
1298        assert_eq!(report.metas_visited, 3);
1299        // Phase 1 classifications confirm parent-relative resolution:
1300        // every recorded dest is a SUBDIR of its recorded parent.
1301        for (parent, dest, _class) in &report.phase1_classifications {
1302            assert!(
1303                dest.starts_with(parent),
1304                "child dest {} must descend from parent {}",
1305                dest.display(),
1306                parent.display()
1307            );
1308        }
1309        // Spot-check the chain: root sees `tools`, tools sees `foo`.
1310        assert_eq!(destinations_under(&report, &root_dir), vec![tools_dir.clone()]);
1311        assert_eq!(destinations_under(&report, &tools_dir), vec![foo_dir.clone()]);
1312    }
1313}