grex_core/tree/dest_class.rs
1//! Phase 1 destination classifier (Stage 1.e).
2//!
3//! Implements the five-way classifier `classify_dest` whose totality is
4//! proven by the Lean theorem `classify_dest_total` in
5//! `proof/Grex/Phase1.lean`. Each declared child dest, on entry to the
6//! walker's Phase 1, is classified into exactly one of five
7//! [`DestClass`] variants; the walker then dispatches on the result
8//! (clone for `Missing`, fetch for `PresentDeclared`, refuse for
9//! `PresentInProgress`, etc.).
10//!
11//! The `PresentInProgress` probe is backed by Lean bridge axiom
12//! `git_in_progress_decidable` (`proof/Grex/Bridge.lean`), which
13//! enumerates the six marker files this module must check:
14//!
15//! 1. `.git/rebase-merge/`
16//! 2. `.git/rebase-apply/`
17//! 3. `.git/MERGE_HEAD`
18//! 4. `.git/CHERRY_PICK_HEAD`
19//! 5. `.git/REVERT_HEAD`
20//! 6. `.git/BISECT_LOG`
21//!
22//! The aggregation helper [`aggregate_untracked`] folds a sequence of
23//! classifications into the V1 walker contract: when ANY child carries a
24//! `.git/` but is not declared in the manifest, the walker MUST report
25//! ALL such offenders in one [`TreeError::UntrackedGitRepos`] error
26//! before failing — never first-seen. This discharges the Lean axiom
27//! `sync_no_untracked` enumeration property.
28
29use std::path::{Path, PathBuf};
30
31use crate::fs::boundary::BoundedDir;
32use crate::lockfile::LockEntry;
33
34use super::error::TreeError;
35
36/// Phase 1 per-child destination classifier output.
37///
38/// Lean reference: `proof/Grex/Types.lean` §"Five-way destination
39/// classifier" + theorem `classify_dest_total` in
40/// `proof/Grex/Phase1.lean`. Every Rust-side `classify_dest` invocation
41/// returns exactly one variant; this is the totality the proof asserts.
42#[derive(Debug, Clone, Copy, PartialEq, Eq)]
43pub enum DestClass {
44 /// Path doesn't exist on disk. Walker will clone here.
45 Missing,
46 /// Path exists, has both `.git/` AND a manifest entry, AND the
47 /// lockfile entry agrees. Walker will fetch (no clone needed).
48 PresentDeclared,
49 /// Path exists, has `.git/` and a manifest entry, lockfile says
50 /// clean, but the git working tree is dirty (uncommitted changes
51 /// or untracked files).
52 PresentDirty,
53 /// Path exists with `.git/` BUT mid-rebase, mid-merge,
54 /// mid-cherry-pick, mid-bisect, or mid-revert. Backed by the Lean
55 /// bridge axiom `git_in_progress_decidable`. Walker MUST refuse
56 /// here — both Phase 1 fetch and Phase 2 prune are unsafe.
57 PresentInProgress,
58 /// Path exists, has `.git/` BUT no manifest entry declares it.
59 /// Walker aggregates these into a single
60 /// [`TreeError::UntrackedGitRepos`] error after Phase 1 completes
61 /// (see [`aggregate_untracked`]).
62 PresentUndeclared,
63}
64
65/// Probe for an in-progress git operation under `dest`.
66///
67/// Mirrors the six-marker enumeration in Lean bridge axiom
68/// `git_in_progress_decidable` (`proof/Grex/Bridge.lean` lines 195–204).
69/// Each probe is a single `exists()` syscall — the kernel guarantees
70/// per-probe atomicity; cross-probe consistency is provided by the
71/// per-pack [`crate::pack_lock::PackLock`] held by the walker for the
72/// duration of classification (per the axiom's stated soundness
73/// assumption).
74///
75/// Returns `true` when ANY of the six markers is present, `false`
76/// otherwise. A non-existent `.git/` short-circuits to `false` — the
77/// caller is responsible for sequencing the `.git/` existence check
78/// before this probe (see [`classify_dest`]).
79#[must_use]
80pub fn git_in_progress_at(dest: &Path) -> bool {
81 let git = dest.join(".git");
82 if !git.exists() {
83 return false;
84 }
85 // Order mirrors the Lean axiom enumeration. Any single hit is
86 // enough — a mid-flight git operation is non-overlapping in
87 // practice, so short-circuiting on the first positive is correct.
88 git.join("rebase-merge").exists()
89 || git.join("rebase-apply").exists()
90 || git.join("MERGE_HEAD").exists()
91 || git.join("CHERRY_PICK_HEAD").exists()
92 || git.join("REVERT_HEAD").exists()
93 || git.join("BISECT_LOG").exists()
94}
95
96/// Probe whether the working tree at `dest` carries any tracked
97/// modifications or untracked-non-ignored content.
98///
99/// Implementation: shells out to `git status --porcelain --ignored=no`
100/// and returns `true` iff stdout is non-empty. The `--ignored=no` flag
101/// keeps gitignored content (build artefacts, IDE files) from
102/// triggering a false-positive dirty verdict — the Phase 2 prune
103/// safety check has its own `--force-prune-with-ignored` knob for the
104/// ignored-only case (Stage 1.f / 1.l territory).
105///
106/// On any error (no `git` on PATH, dest is not a git repo, permission
107/// denied), the probe returns `false`. This is conservative for Phase
108/// 1: a clean classification means the walker proceeds with fetch; a
109/// false negative on dirtiness will be caught at Phase 2 prune-time
110/// where the consent walk re-probes via the planned `recursive_consent`
111/// helper (Stage 1.f).
112#[must_use]
113fn git_status_dirty(dest: &Path) -> bool {
114 let output = std::process::Command::new("git")
115 .arg("-C")
116 .arg(dest)
117 .arg("status")
118 .arg("--porcelain")
119 .arg("--ignored=no")
120 .output();
121 match output {
122 Ok(out) if out.status.success() => !out.stdout.is_empty(),
123 _ => false,
124 }
125}
126
127/// Phase 1 five-way classifier.
128///
129/// `dest` is the prospective on-disk destination resolved against the
130/// parent meta. `declared_in_manifest` is `true` when the parent's
131/// `manifest.children[]` contains an entry whose `effective_path`
132/// resolves to this slot. `lockfile_entry` is the parent's lockfile
133/// entry for this dest (or `None` if absent).
134///
135/// Decision tree (mirrors the Lean axiom `classify_dest`):
136///
137/// 1. `dest` does not exist → [`DestClass::Missing`].
138/// 2. `dest/.git` does not exist → [`DestClass::Missing`] (the
139/// declarative-slot case: a directory with no `.git/` is treated
140/// as a fresh slot the walker will clone into; the v1.2.0 nested-
141/// children semantics already rule out the "real content with no
142/// .git" case at validator time).
143/// 3. [`git_in_progress_at`] returns `true` → [`DestClass::PresentInProgress`].
144/// 4. `!declared_in_manifest` → [`DestClass::PresentUndeclared`].
145/// 5. `git_status_dirty` returns `true` → [`DestClass::PresentDirty`].
146/// 6. Otherwise → [`DestClass::PresentDeclared`].
147///
148/// `lockfile_entry` is reserved for Stage 1.h's lockfile-vs-disk drift
149/// detection (e.g. lockfile records a sha that disagrees with the
150/// on-disk HEAD); for Stage 1.e it is accepted but unused — the
151/// classifier is sound without it. Future stages will tighten the
152/// `PresentDeclared` branch into `PresentDeclared` vs `PresentDrift`.
153///
154/// # BoundedDir integration (Stage 1.d wiring)
155///
156/// When `dest.parent()` is available, this function opens the parent
157/// as a `BoundedDir` and the relative dest as a child handle BEFORE
158/// any `.git/` probe. That binds the kernel resolution to an inode,
159/// closing the canonicalize→probe TOCTOU window. If the dirfd open
160/// fails (parent missing, traversal attempt, symlink escape), the
161/// classifier falls back to [`DestClass::Missing`] — the dest is not
162/// reachable through a confined-handle traversal, so Phase 1 cannot
163/// safely operate on it; treating it as Missing means the walker will
164/// either clone fresh (if the slot legitimately doesn't exist yet) or
165/// the subsequent clone attempt will surface the real underlying error.
166#[must_use]
167pub fn classify_dest(
168 dest: &Path,
169 declared_in_manifest: bool,
170 _lockfile_entry: Option<&LockEntry>,
171) -> DestClass {
172 // Step 0: confine via BoundedDir when a parent is available. This
173 // is the Stage 1.d integration the audit flagged for Phase 1: bind
174 // to an inode BEFORE FS probes so a swap of `dest` for a symlink
175 // mid-classify cannot redirect the probes.
176 if let (Some(parent), Some(name)) = (dest.parent(), dest.file_name()) {
177 if !parent.as_os_str().is_empty() {
178 // BoundedDir::open returns Err when the dest does not
179 // exist OR when the resolution traverses a symlink. Either
180 // case is "Missing" from the classifier's view: a non-
181 // existent slot is the happy path for clone, and a
182 // symlink-escape slot must not be operated on.
183 if BoundedDir::open(parent, Path::new(name)).is_err() {
184 return DestClass::Missing;
185 }
186 }
187 }
188
189 // Step 1: dest must exist and carry a `.git/` to be considered
190 // present. Either miss → Missing.
191 if !dest.exists() {
192 return DestClass::Missing;
193 }
194 if !dest.join(".git").exists() {
195 return DestClass::Missing;
196 }
197
198 // Step 2: in-progress operation gate (refusal class).
199 if git_in_progress_at(dest) {
200 return DestClass::PresentInProgress;
201 }
202
203 // Step 3: untracked / undeclared gate (aggregation class).
204 if !declared_in_manifest {
205 return DestClass::PresentUndeclared;
206 }
207
208 // Step 4: dirty-tree gate.
209 if git_status_dirty(dest) {
210 return DestClass::PresentDirty;
211 }
212
213 // Step 5: clean and declared.
214 DestClass::PresentDeclared
215}
216
217/// Aggregate per-child classifications into the Phase 1 untracked-repo
218/// error.
219///
220/// Walks the `(path, class)` pairs and collects every
221/// [`DestClass::PresentUndeclared`] path. If any are found, returns
222/// [`TreeError::UntrackedGitRepos`] with the COMPLETE list (preserving
223/// input order). Empty input — or no `PresentUndeclared` — returns
224/// `Ok(())`.
225///
226/// Discharges the Lean axiom `sync_no_untracked` enumeration property:
227/// the walker MUST not surface only the first offender — every
228/// undeclared `.git/` slot is reported in one go so the operator can
229/// fix the manifest with a single pass.
230pub fn aggregate_untracked<I, P>(classifications: I) -> Result<(), TreeError>
231where
232 I: IntoIterator<Item = (P, DestClass)>,
233 P: Into<PathBuf>,
234{
235 let untracked: Vec<PathBuf> = classifications
236 .into_iter()
237 .filter_map(|(p, c)| if c == DestClass::PresentUndeclared { Some(p.into()) } else { None })
238 .collect();
239 if untracked.is_empty() {
240 Ok(())
241 } else {
242 Err(TreeError::UntrackedGitRepos { paths: untracked })
243 }
244}
245
246#[cfg(test)]
247mod tests {
248 use super::*;
249 use std::fs;
250 use tempfile::tempdir;
251
252 /// Helper: turn a tempdir into a directory that looks like a
253 /// minimal git checkout (just a `.git/` folder; enough for the
254 /// classifier's `.git/` existence probes). Returns the dest path.
255 fn make_git_dir(parent: &Path, name: &str) -> PathBuf {
256 let dest = parent.join(name);
257 fs::create_dir_all(dest.join(".git")).unwrap();
258 dest
259 }
260
261 #[test]
262 fn test_classify_dest_missing() {
263 let parent = tempdir().unwrap();
264 let dest = parent.path().join("absent");
265 assert_eq!(classify_dest(&dest, true, None), DestClass::Missing);
266 }
267
268 #[test]
269 fn test_classify_dest_missing_when_no_dot_git() {
270 // Directory exists but has no `.git/` — classifier treats this
271 // as a fresh declarative slot (Missing-equivalent).
272 let parent = tempdir().unwrap();
273 let dest = parent.path().join("plain-dir");
274 fs::create_dir(&dest).unwrap();
275 assert_eq!(classify_dest(&dest, true, None), DestClass::Missing);
276 }
277
278 #[test]
279 fn test_classify_dest_present_declared() {
280 let parent = tempdir().unwrap();
281 let dest = make_git_dir(parent.path(), "child");
282 // Initialise as a real repo so `git status` returns success
283 // and reports a clean tree.
284 let init =
285 std::process::Command::new("git").arg("-C").arg(&dest).arg("init").arg("-q").status();
286 if init.is_err() || !init.unwrap().success() {
287 // Host has no `git` binary — skip. The classifier still
288 // treats `.git/` + declared as PresentDeclared via the
289 // `git_status_dirty` fallback (returns false on error), so
290 // assert that path too.
291 }
292 assert_eq!(classify_dest(&dest, true, None), DestClass::PresentDeclared);
293 }
294
295 #[test]
296 fn test_classify_dest_present_dirty() {
297 let parent = tempdir().unwrap();
298 let dest = make_git_dir(parent.path(), "child");
299 // Initialise a real repo so `git status` runs.
300 let init =
301 std::process::Command::new("git").arg("-C").arg(&dest).arg("init").arg("-q").status();
302 if init.is_err() || !init.map(|s| s.success()).unwrap_or(false) {
303 // No `git` binary — `git_status_dirty` returns false and
304 // we'd misclassify as Declared. Skip this test on that
305 // host; the per-host symlink-test pattern in
306 // `boundary::tests` is the precedent.
307 return;
308 }
309 // Write an untracked file → dirty.
310 fs::write(dest.join("dirty.txt"), b"hello").unwrap();
311 assert_eq!(classify_dest(&dest, true, None), DestClass::PresentDirty);
312 }
313
314 #[test]
315 fn test_classify_dest_present_in_progress_rebase() {
316 let parent = tempdir().unwrap();
317 let dest = make_git_dir(parent.path(), "child");
318 fs::create_dir_all(dest.join(".git/rebase-merge")).unwrap();
319 assert_eq!(classify_dest(&dest, true, None), DestClass::PresentInProgress);
320 }
321
322 #[test]
323 fn test_classify_dest_present_in_progress_rebase_apply() {
324 let parent = tempdir().unwrap();
325 let dest = make_git_dir(parent.path(), "child");
326 fs::create_dir_all(dest.join(".git/rebase-apply")).unwrap();
327 assert_eq!(classify_dest(&dest, true, None), DestClass::PresentInProgress);
328 }
329
330 #[test]
331 fn test_classify_dest_present_in_progress_merge() {
332 let parent = tempdir().unwrap();
333 let dest = make_git_dir(parent.path(), "child");
334 fs::write(dest.join(".git/MERGE_HEAD"), b"deadbeef").unwrap();
335 assert_eq!(classify_dest(&dest, true, None), DestClass::PresentInProgress);
336 }
337
338 #[test]
339 fn test_classify_dest_present_in_progress_cherry_pick() {
340 let parent = tempdir().unwrap();
341 let dest = make_git_dir(parent.path(), "child");
342 fs::write(dest.join(".git/CHERRY_PICK_HEAD"), b"deadbeef").unwrap();
343 assert_eq!(classify_dest(&dest, true, None), DestClass::PresentInProgress);
344 }
345
346 #[test]
347 fn test_classify_dest_present_in_progress_revert() {
348 let parent = tempdir().unwrap();
349 let dest = make_git_dir(parent.path(), "child");
350 fs::write(dest.join(".git/REVERT_HEAD"), b"deadbeef").unwrap();
351 assert_eq!(classify_dest(&dest, true, None), DestClass::PresentInProgress);
352 }
353
354 #[test]
355 fn test_classify_dest_present_in_progress_bisect() {
356 let parent = tempdir().unwrap();
357 let dest = make_git_dir(parent.path(), "child");
358 fs::write(dest.join(".git/BISECT_LOG"), b"start").unwrap();
359 assert_eq!(classify_dest(&dest, true, None), DestClass::PresentInProgress);
360 }
361
362 #[test]
363 fn test_classify_dest_present_undeclared() {
364 let parent = tempdir().unwrap();
365 let dest = make_git_dir(parent.path(), "child");
366 // Not declared in manifest → undeclared (regardless of dirty).
367 assert_eq!(classify_dest(&dest, false, None), DestClass::PresentUndeclared);
368 }
369
370 #[test]
371 fn test_git_in_progress_at_returns_false_on_clean_repo() {
372 let parent = tempdir().unwrap();
373 let dest = make_git_dir(parent.path(), "child");
374 assert!(!git_in_progress_at(&dest));
375 }
376
377 #[test]
378 fn test_git_in_progress_at_returns_false_on_non_repo() {
379 let parent = tempdir().unwrap();
380 let dest = parent.path().join("not-a-repo");
381 fs::create_dir(&dest).unwrap();
382 assert!(!git_in_progress_at(&dest));
383 }
384
385 #[test]
386 fn test_phase1_aggregates_untracked_into_error() {
387 // Three undeclared dirs interleaved with one declared and one
388 // missing. Aggregation must surface ALL undeclared paths in
389 // input order, not just the first.
390 let inputs: Vec<(PathBuf, DestClass)> = vec![
391 (PathBuf::from("alpha"), DestClass::PresentUndeclared),
392 (PathBuf::from("beta"), DestClass::PresentDeclared),
393 (PathBuf::from("gamma"), DestClass::PresentUndeclared),
394 (PathBuf::from("delta"), DestClass::Missing),
395 (PathBuf::from("epsilon"), DestClass::PresentUndeclared),
396 ];
397 let err =
398 aggregate_untracked(inputs).expect_err("aggregation must fail when any undeclared");
399 match err {
400 TreeError::UntrackedGitRepos { paths } => {
401 assert_eq!(
402 paths,
403 vec![PathBuf::from("alpha"), PathBuf::from("gamma"), PathBuf::from("epsilon"),],
404 "aggregator must enumerate ALL undeclared paths in input order",
405 );
406 }
407 other => panic!("expected UntrackedGitRepos, got {other:?}"),
408 }
409 }
410
411 #[test]
412 fn test_phase1_aggregate_ok_when_no_undeclared() {
413 let inputs: Vec<(PathBuf, DestClass)> = vec![
414 (PathBuf::from("alpha"), DestClass::Missing),
415 (PathBuf::from("beta"), DestClass::PresentDeclared),
416 ];
417 assert!(aggregate_untracked(inputs).is_ok());
418 }
419
420 #[test]
421 fn test_phase1_aggregate_ok_on_empty_input() {
422 let inputs: Vec<(PathBuf, DestClass)> = Vec::new();
423 assert!(aggregate_untracked(inputs).is_ok());
424 }
425}