Skip to main content

grex_core/tree/
dest_class.rs

1//! Phase 1 destination classifier (Stage 1.e).
2//!
3//! Implements the five-way classifier `classify_dest` whose totality is
4//! proven by the Lean theorem `classify_dest_total` in
5//! `proof/Grex/Phase1.lean`. Each declared child dest, on entry to the
6//! walker's Phase 1, is classified into exactly one of five
7//! [`DestClass`] variants; the walker then dispatches on the result
8//! (clone for `Missing`, fetch for `PresentDeclared`, refuse for
9//! `PresentInProgress`, etc.).
10//!
11//! The `PresentInProgress` probe is backed by Lean bridge axiom
12//! `git_in_progress_decidable` (`proof/Grex/Bridge.lean`), which
13//! enumerates the six marker files this module must check:
14//!
15//! 1. `.git/rebase-merge/`
16//! 2. `.git/rebase-apply/`
17//! 3. `.git/MERGE_HEAD`
18//! 4. `.git/CHERRY_PICK_HEAD`
19//! 5. `.git/REVERT_HEAD`
20//! 6. `.git/BISECT_LOG`
21//!
22//! The aggregation helper [`aggregate_untracked`] folds a sequence of
23//! classifications into the V1 walker contract: when ANY child carries a
24//! `.git/` but is not declared in the manifest, the walker MUST report
25//! ALL such offenders in one [`TreeError::UntrackedGitRepos`] error
26//! before failing — never first-seen. This discharges the Lean axiom
27//! `sync_no_untracked` enumeration property.
28
29use std::path::{Path, PathBuf};
30
31use crate::fs::boundary::BoundedDir;
32use crate::lockfile::LockEntry;
33
34use super::error::TreeError;
35
36/// Phase 1 per-child destination classifier output.
37///
38/// Lean reference: `proof/Grex/Types.lean` §"Five-way destination
39/// classifier" + theorem `classify_dest_total` in
40/// `proof/Grex/Phase1.lean`. Every Rust-side `classify_dest` invocation
41/// returns exactly one variant; this is the totality the proof asserts.
42#[derive(Debug, Clone, Copy, PartialEq, Eq)]
43pub enum DestClass {
44    /// Path doesn't exist on disk. Walker will clone here.
45    Missing,
46    /// Path exists, has both `.git/` AND a manifest entry, AND the
47    /// lockfile entry agrees. Walker will fetch (no clone needed).
48    PresentDeclared,
49    /// Path exists, has `.git/` and a manifest entry, lockfile says
50    /// clean, but the git working tree is dirty (uncommitted changes
51    /// or untracked files).
52    PresentDirty,
53    /// Path exists with `.git/` BUT mid-rebase, mid-merge,
54    /// mid-cherry-pick, mid-bisect, or mid-revert. Backed by the Lean
55    /// bridge axiom `git_in_progress_decidable`. Walker MUST refuse
56    /// here — both Phase 1 fetch and Phase 2 prune are unsafe.
57    PresentInProgress,
58    /// Path exists, has `.git/` BUT no manifest entry declares it.
59    /// Walker aggregates these into a single
60    /// [`TreeError::UntrackedGitRepos`] error after Phase 1 completes
61    /// (see [`aggregate_untracked`]).
62    PresentUndeclared,
63}
64
65/// Probe for an in-progress git operation under `dest`.
66///
67/// Mirrors the six-marker enumeration in Lean bridge axiom
68/// `git_in_progress_decidable` (`proof/Grex/Bridge.lean` lines 195–204).
69/// Each probe is a single `exists()` syscall — the kernel guarantees
70/// per-probe atomicity; cross-probe consistency is provided by the
71/// per-pack [`crate::pack_lock::PackLock`] held by the walker for the
72/// duration of classification (per the axiom's stated soundness
73/// assumption).
74///
75/// Returns `true` when ANY of the six markers is present, `false`
76/// otherwise. A non-existent `.git/` short-circuits to `false` — the
77/// caller is responsible for sequencing the `.git/` existence check
78/// before this probe (see [`classify_dest`]).
79#[must_use]
80pub fn git_in_progress_at(dest: &Path) -> bool {
81    let git = dest.join(".git");
82    if !git.exists() {
83        return false;
84    }
85    // Order mirrors the Lean axiom enumeration. Any single hit is
86    // enough — a mid-flight git operation is non-overlapping in
87    // practice, so short-circuiting on the first positive is correct.
88    git.join("rebase-merge").exists()
89        || git.join("rebase-apply").exists()
90        || git.join("MERGE_HEAD").exists()
91        || git.join("CHERRY_PICK_HEAD").exists()
92        || git.join("REVERT_HEAD").exists()
93        || git.join("BISECT_LOG").exists()
94}
95
96/// Probe whether the working tree at `dest` carries any tracked
97/// modifications or untracked-non-ignored content.
98///
99/// Implementation: shells out to `git status --porcelain --ignored=no`
100/// and returns `true` iff stdout is non-empty. The `--ignored=no` flag
101/// keeps gitignored content (build artefacts, IDE files) from
102/// triggering a false-positive dirty verdict — the Phase 2 prune
103/// safety check has its own `--force-prune-with-ignored` knob for the
104/// ignored-only case (Stage 1.f / 1.l territory).
105///
106/// On any error (no `git` on PATH, dest is not a git repo, permission
107/// denied), the probe returns `false`. This is conservative for Phase
108/// 1: a clean classification means the walker proceeds with fetch; a
109/// false negative on dirtiness will be caught at Phase 2 prune-time
110/// where the consent walk re-probes via the planned `recursive_consent`
111/// helper (Stage 1.f).
112#[must_use]
113fn git_status_dirty(dest: &Path) -> bool {
114    let output = std::process::Command::new("git")
115        .arg("-C")
116        .arg(dest)
117        .arg("status")
118        .arg("--porcelain")
119        .arg("--ignored=no")
120        .output();
121    match output {
122        Ok(out) if out.status.success() => !out.stdout.is_empty(),
123        _ => false,
124    }
125}
126
127/// Phase 1 five-way classifier.
128///
129/// `dest` is the prospective on-disk destination resolved against the
130/// parent meta. `declared_in_manifest` is `true` when the parent's
131/// `manifest.children[]` contains an entry whose `effective_path`
132/// resolves to this slot. `lockfile_entry` is the parent's lockfile
133/// entry for this dest (or `None` if absent).
134///
135/// Decision tree (mirrors the Lean axiom `classify_dest`):
136///
137/// 1. `dest` does not exist → [`DestClass::Missing`].
138/// 2. `dest/.git` does not exist → [`DestClass::Missing`] (the
139///    declarative-slot case: a directory with no `.git/` is treated
140///    as a fresh slot the walker will clone into; the v1.2.0 nested-
141///    children semantics already rule out the "real content with no
142///    .git" case at validator time).
143/// 3. [`git_in_progress_at`] returns `true` → [`DestClass::PresentInProgress`].
144/// 4. `!declared_in_manifest` → [`DestClass::PresentUndeclared`].
145/// 5. `git_status_dirty` returns `true` → [`DestClass::PresentDirty`].
146/// 6. Otherwise → [`DestClass::PresentDeclared`].
147///
148/// `lockfile_entry` is reserved for Stage 1.h's lockfile-vs-disk drift
149/// detection (e.g. lockfile records a sha that disagrees with the
150/// on-disk HEAD); for Stage 1.e it is accepted but unused — the
151/// classifier is sound without it. Future stages will tighten the
152/// `PresentDeclared` branch into `PresentDeclared` vs `PresentDrift`.
153///
154/// # BoundedDir integration (Stage 1.d wiring)
155///
156/// When `dest.parent()` is available, this function opens the parent
157/// as a `BoundedDir` and the relative dest as a child handle BEFORE
158/// any `.git/` probe. That binds the kernel resolution to an inode,
159/// closing the canonicalize→probe TOCTOU window. If the dirfd open
160/// fails (parent missing, traversal attempt, symlink escape), the
161/// classifier falls back to [`DestClass::Missing`] — the dest is not
162/// reachable through a confined-handle traversal, so Phase 1 cannot
163/// safely operate on it; treating it as Missing means the walker will
164/// either clone fresh (if the slot legitimately doesn't exist yet) or
165/// the subsequent clone attempt will surface the real underlying error.
166#[must_use]
167pub fn classify_dest(
168    dest: &Path,
169    declared_in_manifest: bool,
170    _lockfile_entry: Option<&LockEntry>,
171) -> DestClass {
172    // Step 0: confine via BoundedDir when a parent is available. This
173    // is the Stage 1.d integration the audit flagged for Phase 1: bind
174    // to an inode BEFORE FS probes so a swap of `dest` for a symlink
175    // mid-classify cannot redirect the probes.
176    if let (Some(parent), Some(name)) = (dest.parent(), dest.file_name()) {
177        if !parent.as_os_str().is_empty() {
178            // BoundedDir::open returns Err when the dest does not
179            // exist OR when the resolution traverses a symlink. Either
180            // case is "Missing" from the classifier's view: a non-
181            // existent slot is the happy path for clone, and a
182            // symlink-escape slot must not be operated on.
183            if BoundedDir::open(parent, Path::new(name)).is_err() {
184                return DestClass::Missing;
185            }
186        }
187    }
188
189    // Step 1: dest must exist and carry a `.git/` to be considered
190    // present. Either miss → Missing.
191    if !dest.exists() {
192        return DestClass::Missing;
193    }
194    if !dest.join(".git").exists() {
195        return DestClass::Missing;
196    }
197
198    // Step 2: in-progress operation gate (refusal class).
199    if git_in_progress_at(dest) {
200        return DestClass::PresentInProgress;
201    }
202
203    // Step 3: untracked / undeclared gate (aggregation class).
204    if !declared_in_manifest {
205        return DestClass::PresentUndeclared;
206    }
207
208    // Step 4: dirty-tree gate.
209    if git_status_dirty(dest) {
210        return DestClass::PresentDirty;
211    }
212
213    // Step 5: clean and declared.
214    DestClass::PresentDeclared
215}
216
217/// Aggregate per-child classifications into the Phase 1 untracked-repo
218/// error.
219///
220/// Walks the `(path, class)` pairs and collects every
221/// [`DestClass::PresentUndeclared`] path. If any are found, returns
222/// [`TreeError::UntrackedGitRepos`] with the COMPLETE list (preserving
223/// input order). Empty input — or no `PresentUndeclared` — returns
224/// `Ok(())`.
225///
226/// Discharges the Lean axiom `sync_no_untracked` enumeration property:
227/// the walker MUST not surface only the first offender — every
228/// undeclared `.git/` slot is reported in one go so the operator can
229/// fix the manifest with a single pass.
230pub fn aggregate_untracked<I, P>(classifications: I) -> Result<(), TreeError>
231where
232    I: IntoIterator<Item = (P, DestClass)>,
233    P: Into<PathBuf>,
234{
235    let untracked: Vec<PathBuf> = classifications
236        .into_iter()
237        .filter_map(|(p, c)| if c == DestClass::PresentUndeclared { Some(p.into()) } else { None })
238        .collect();
239    if untracked.is_empty() {
240        Ok(())
241    } else {
242        Err(TreeError::UntrackedGitRepos { paths: untracked })
243    }
244}
245
246#[cfg(test)]
247mod tests {
248    use super::*;
249    use std::fs;
250    use tempfile::tempdir;
251
252    /// Helper: turn a tempdir into a directory that looks like a
253    /// minimal git checkout (just a `.git/` folder; enough for the
254    /// classifier's `.git/` existence probes). Returns the dest path.
255    fn make_git_dir(parent: &Path, name: &str) -> PathBuf {
256        let dest = parent.join(name);
257        fs::create_dir_all(dest.join(".git")).unwrap();
258        dest
259    }
260
261    #[test]
262    fn test_classify_dest_missing() {
263        let parent = tempdir().unwrap();
264        let dest = parent.path().join("absent");
265        assert_eq!(classify_dest(&dest, true, None), DestClass::Missing);
266    }
267
268    #[test]
269    fn test_classify_dest_missing_when_no_dot_git() {
270        // Directory exists but has no `.git/` — classifier treats this
271        // as a fresh declarative slot (Missing-equivalent).
272        let parent = tempdir().unwrap();
273        let dest = parent.path().join("plain-dir");
274        fs::create_dir(&dest).unwrap();
275        assert_eq!(classify_dest(&dest, true, None), DestClass::Missing);
276    }
277
278    #[test]
279    fn test_classify_dest_present_declared() {
280        let parent = tempdir().unwrap();
281        let dest = make_git_dir(parent.path(), "child");
282        // Initialise as a real repo so `git status` returns success
283        // and reports a clean tree.
284        let init =
285            std::process::Command::new("git").arg("-C").arg(&dest).arg("init").arg("-q").status();
286        if init.is_err() || !init.unwrap().success() {
287            // Host has no `git` binary — skip. The classifier still
288            // treats `.git/` + declared as PresentDeclared via the
289            // `git_status_dirty` fallback (returns false on error), so
290            // assert that path too.
291        }
292        assert_eq!(classify_dest(&dest, true, None), DestClass::PresentDeclared);
293    }
294
295    #[test]
296    fn test_classify_dest_present_dirty() {
297        let parent = tempdir().unwrap();
298        let dest = make_git_dir(parent.path(), "child");
299        // Initialise a real repo so `git status` runs.
300        let init =
301            std::process::Command::new("git").arg("-C").arg(&dest).arg("init").arg("-q").status();
302        if init.is_err() || !init.map(|s| s.success()).unwrap_or(false) {
303            // No `git` binary — `git_status_dirty` returns false and
304            // we'd misclassify as Declared. Skip this test on that
305            // host; the per-host symlink-test pattern in
306            // `boundary::tests` is the precedent.
307            return;
308        }
309        // Write an untracked file → dirty.
310        fs::write(dest.join("dirty.txt"), b"hello").unwrap();
311        assert_eq!(classify_dest(&dest, true, None), DestClass::PresentDirty);
312    }
313
314    #[test]
315    fn test_classify_dest_present_in_progress_rebase() {
316        let parent = tempdir().unwrap();
317        let dest = make_git_dir(parent.path(), "child");
318        fs::create_dir_all(dest.join(".git/rebase-merge")).unwrap();
319        assert_eq!(classify_dest(&dest, true, None), DestClass::PresentInProgress);
320    }
321
322    #[test]
323    fn test_classify_dest_present_in_progress_rebase_apply() {
324        let parent = tempdir().unwrap();
325        let dest = make_git_dir(parent.path(), "child");
326        fs::create_dir_all(dest.join(".git/rebase-apply")).unwrap();
327        assert_eq!(classify_dest(&dest, true, None), DestClass::PresentInProgress);
328    }
329
330    #[test]
331    fn test_classify_dest_present_in_progress_merge() {
332        let parent = tempdir().unwrap();
333        let dest = make_git_dir(parent.path(), "child");
334        fs::write(dest.join(".git/MERGE_HEAD"), b"deadbeef").unwrap();
335        assert_eq!(classify_dest(&dest, true, None), DestClass::PresentInProgress);
336    }
337
338    #[test]
339    fn test_classify_dest_present_in_progress_cherry_pick() {
340        let parent = tempdir().unwrap();
341        let dest = make_git_dir(parent.path(), "child");
342        fs::write(dest.join(".git/CHERRY_PICK_HEAD"), b"deadbeef").unwrap();
343        assert_eq!(classify_dest(&dest, true, None), DestClass::PresentInProgress);
344    }
345
346    #[test]
347    fn test_classify_dest_present_in_progress_revert() {
348        let parent = tempdir().unwrap();
349        let dest = make_git_dir(parent.path(), "child");
350        fs::write(dest.join(".git/REVERT_HEAD"), b"deadbeef").unwrap();
351        assert_eq!(classify_dest(&dest, true, None), DestClass::PresentInProgress);
352    }
353
354    #[test]
355    fn test_classify_dest_present_in_progress_bisect() {
356        let parent = tempdir().unwrap();
357        let dest = make_git_dir(parent.path(), "child");
358        fs::write(dest.join(".git/BISECT_LOG"), b"start").unwrap();
359        assert_eq!(classify_dest(&dest, true, None), DestClass::PresentInProgress);
360    }
361
362    #[test]
363    fn test_classify_dest_present_undeclared() {
364        let parent = tempdir().unwrap();
365        let dest = make_git_dir(parent.path(), "child");
366        // Not declared in manifest → undeclared (regardless of dirty).
367        assert_eq!(classify_dest(&dest, false, None), DestClass::PresentUndeclared);
368    }
369
370    #[test]
371    fn test_git_in_progress_at_returns_false_on_clean_repo() {
372        let parent = tempdir().unwrap();
373        let dest = make_git_dir(parent.path(), "child");
374        assert!(!git_in_progress_at(&dest));
375    }
376
377    #[test]
378    fn test_git_in_progress_at_returns_false_on_non_repo() {
379        let parent = tempdir().unwrap();
380        let dest = parent.path().join("not-a-repo");
381        fs::create_dir(&dest).unwrap();
382        assert!(!git_in_progress_at(&dest));
383    }
384
385    #[test]
386    fn test_phase1_aggregates_untracked_into_error() {
387        // Three undeclared dirs interleaved with one declared and one
388        // missing. Aggregation must surface ALL undeclared paths in
389        // input order, not just the first.
390        let inputs: Vec<(PathBuf, DestClass)> = vec![
391            (PathBuf::from("alpha"), DestClass::PresentUndeclared),
392            (PathBuf::from("beta"), DestClass::PresentDeclared),
393            (PathBuf::from("gamma"), DestClass::PresentUndeclared),
394            (PathBuf::from("delta"), DestClass::Missing),
395            (PathBuf::from("epsilon"), DestClass::PresentUndeclared),
396        ];
397        let err =
398            aggregate_untracked(inputs).expect_err("aggregation must fail when any undeclared");
399        match err {
400            TreeError::UntrackedGitRepos { paths } => {
401                assert_eq!(
402                    paths,
403                    vec![PathBuf::from("alpha"), PathBuf::from("gamma"), PathBuf::from("epsilon"),],
404                    "aggregator must enumerate ALL undeclared paths in input order",
405                );
406            }
407            other => panic!("expected UntrackedGitRepos, got {other:?}"),
408        }
409    }
410
411    #[test]
412    fn test_phase1_aggregate_ok_when_no_undeclared() {
413        let inputs: Vec<(PathBuf, DestClass)> = vec![
414            (PathBuf::from("alpha"), DestClass::Missing),
415            (PathBuf::from("beta"), DestClass::PresentDeclared),
416        ];
417        assert!(aggregate_untracked(inputs).is_ok());
418    }
419
420    #[test]
421    fn test_phase1_aggregate_ok_on_empty_input() {
422        let inputs: Vec<(PathBuf, DestClass)> = Vec::new();
423        assert!(aggregate_untracked(inputs).is_ok());
424    }
425}