Skip to main content

ralph/git/
status.rs

1//! Git status and porcelain parsing operations.
2//!
3//! This module provides functions for parsing git status output, tracking path
4//! snapshots, and ensuring files remain unchanged during operations.
5//!
6//! # Invariants
7//! - Porcelain parsing must handle NUL-terminated format correctly
8//! - Path snapshots are deterministic and comparable
9//!
10//! # What this does NOT handle
11//! - Commit operations (see git/commit.rs)
12//! - LFS validation (see git/lfs.rs)
13//! - Repository cleanliness enforcement (see git/clean.rs)
14
15use crate::git::error::{GitError, git_base_command};
16use anyhow::{Context, Result, anyhow, bail};
17use std::collections::HashSet;
18use std::collections::hash_map::DefaultHasher;
19use std::fs;
20use std::hash::Hasher;
21use std::io::Read;
22use std::path::Path;
23
24/// A snapshot of a file path with a fingerprint for detecting changes.
25#[derive(Debug, Clone, PartialEq, Eq)]
26pub struct PathSnapshot {
27    pub path: String,
28    fingerprint: Option<u64>,
29}
30
31/// Internal representation of a porcelain -z entry.
32#[derive(Debug, Clone, PartialEq, Eq)]
33pub(crate) struct PorcelainZEntry {
34    pub xy: String,
35    pub old_path: Option<String>,
36    pub path: String,
37}
38
39/// Returns raw `git status --porcelain -z` output (may be empty).
40///
41/// NOTE: With `-z`, records are NUL-terminated (0x00) instead of newline-terminated.
42/// This makes the output safe to parse even when filenames contain spaces/newlines.
43pub fn status_porcelain(repo_root: &Path) -> Result<String, GitError> {
44    let output = git_base_command(repo_root)
45        .arg("status")
46        .arg("--porcelain")
47        .arg("-z")
48        .output()
49        .with_context(|| format!("run git status --porcelain -z in {}", repo_root.display()))?;
50
51    if !output.status.success() {
52        let stderr = String::from_utf8_lossy(&output.stderr).to_string();
53        return Err(GitError::CommandFailed {
54            args: "status --porcelain -z".to_string(),
55            code: output.status.code(),
56            stderr: stderr.trim().to_string(),
57        });
58    }
59
60    Ok(String::from_utf8_lossy(&output.stdout).to_string())
61}
62
63/// Returns a list of paths from git status.
64pub fn status_paths(repo_root: &Path) -> Result<Vec<String>, GitError> {
65    let status = status_porcelain(repo_root)?;
66    if status.is_empty() {
67        return Ok(Vec::new());
68    }
69
70    let mut paths = Vec::new();
71    let entries = parse_porcelain_z_entries(&status)?;
72    for entry in entries {
73        if !entry.path.is_empty() {
74            paths.push(entry.path);
75        }
76    }
77    Ok(paths)
78}
79
80/// Returns true if `rel_path` is ignored by git (respecting .gitignore, .git/info/exclude, and global excludes).
81///
82/// Uses `git check-ignore -q <path>`:
83/// - exit code 0 => ignored
84/// - exit code 1 => not ignored
85/// - otherwise => error
86pub fn is_path_ignored(repo_root: &Path, rel_path: &str) -> Result<bool, GitError> {
87    let rel = rel_path.trim();
88    if rel.is_empty() {
89        return Ok(false);
90    }
91
92    let output = git_base_command(repo_root)
93        .arg("check-ignore")
94        .arg("-q")
95        .arg(rel)
96        .output()
97        .with_context(|| format!("run git check-ignore -q {} in {}", rel, repo_root.display()))?;
98
99    match output.status.code() {
100        Some(0) => Ok(true),
101        Some(1) => Ok(false),
102        _ => {
103            let stderr = String::from_utf8_lossy(&output.stderr).to_string();
104            Err(GitError::CommandFailed {
105                args: format!("check-ignore -q {}", rel),
106                code: output.status.code(),
107                stderr: stderr.trim().to_string(),
108            })
109        }
110    }
111}
112
113/// Returns a list of gitignored paths (tracked ignore + local excludes).
114///
115/// Uses `git ls-files -i -o --exclude-standard -z --directory` to get
116/// NUL-delimited paths relative to the repo root.
117pub fn ignored_paths(repo_root: &Path) -> Result<Vec<String>, GitError> {
118    let output = git_base_command(repo_root)
119        .arg("ls-files")
120        .arg("-i")
121        .arg("-o")
122        .arg("--exclude-standard")
123        .arg("-z")
124        .arg("--directory")
125        .output()
126        .with_context(|| format!("run git ls-files -i -o in {}", repo_root.display()))?;
127
128    if !output.status.success() {
129        let stderr = String::from_utf8_lossy(&output.stderr).to_string();
130        return Err(GitError::CommandFailed {
131            args: "ls-files -i -o --exclude-standard -z --directory".to_string(),
132            code: output.status.code(),
133            stderr: stderr.trim().to_string(),
134        });
135    }
136
137    let raw = String::from_utf8_lossy(&output.stdout);
138    if raw.is_empty() {
139        return Ok(Vec::new());
140    }
141
142    let mut paths = Vec::new();
143    for entry in raw.split('\0') {
144        if entry.is_empty() {
145            continue;
146        }
147        paths.push(entry.to_string());
148    }
149    Ok(paths)
150}
151
152/// Create deterministic fingerprints for a list of baseline dirty paths.
153///
154/// This is used to ensure Phase 1 plan-only runs do not mutate pre-existing
155/// dirty files when `allow_dirty_repo` is true.
156pub fn snapshot_paths(repo_root: &Path, paths: &[String]) -> Result<Vec<PathSnapshot>> {
157    if paths.is_empty() {
158        return Ok(Vec::new());
159    }
160
161    let mut unique = HashSet::new();
162    let mut snapshots = Vec::new();
163    for path in paths {
164        let trimmed = path.trim();
165        if trimmed.is_empty() {
166            continue;
167        }
168        let normalized = trimmed.strip_prefix("./").unwrap_or(trimmed);
169        if !unique.insert(normalized.to_string()) {
170            continue;
171        }
172        let fingerprint = snapshot_path(&repo_root.join(normalized))?;
173        snapshots.push(PathSnapshot {
174            path: normalized.to_string(),
175            fingerprint,
176        });
177    }
178
179    snapshots.sort_by(|a, b| a.path.cmp(&b.path));
180    Ok(snapshots)
181}
182
183/// Validate that each baseline dirty path is unchanged from its fingerprint.
184pub fn ensure_paths_unchanged(repo_root: &Path, snapshots: &[PathSnapshot]) -> Result<()> {
185    for snapshot in snapshots {
186        let current = snapshot_path(&repo_root.join(&snapshot.path))?;
187        if current != snapshot.fingerprint {
188            bail!(
189                "Baseline dirty path changed during Phase 1: {}",
190                snapshot.path
191            );
192        }
193    }
194    Ok(())
195}
196
197fn snapshot_path(path: &Path) -> Result<Option<u64>> {
198    if !path.exists() {
199        return Ok(None);
200    }
201    let metadata = fs::symlink_metadata(path)?;
202    if metadata.is_dir() {
203        Ok(Some(hash_dir(path)?))
204    } else if metadata.is_file() {
205        Ok(Some(hash_file(path)?))
206    } else if metadata.file_type().is_symlink() {
207        let target = fs::read_link(path)?;
208        Ok(Some(hash_bytes(&target.to_string_lossy())))
209    } else {
210        Ok(Some(metadata.len()))
211    }
212}
213
214fn hash_dir(path: &Path) -> Result<u64> {
215    let mut entries: Vec<_> = fs::read_dir(path)?.collect::<Result<_, _>>()?;
216    entries.sort_by_key(|entry| entry.file_name());
217
218    let mut hasher = DefaultHasher::new();
219    for entry in entries {
220        let name = entry.file_name();
221        hasher.write(name.to_string_lossy().as_bytes());
222        let file_type = entry.file_type()?;
223        if file_type.is_dir() {
224            hasher.write_u8(1);
225            hasher.write_u64(hash_dir(&entry.path())?);
226        } else if file_type.is_file() {
227            hasher.write_u8(2);
228            hasher.write_u64(hash_file(&entry.path())?);
229        } else if file_type.is_symlink() {
230            hasher.write_u8(3);
231            let target = fs::read_link(entry.path())?;
232            hasher.write(target.to_string_lossy().as_bytes());
233        } else {
234            hasher.write_u8(4);
235            hasher.write_u64(entry.metadata()?.len());
236        }
237    }
238    Ok(hasher.finish())
239}
240
241fn hash_file(path: &Path) -> Result<u64> {
242    let mut file = fs::File::open(path)?;
243    let mut hasher = DefaultHasher::new();
244    let mut buf = [0u8; 8192];
245    loop {
246        let read = file.read(&mut buf)?;
247        if read == 0 {
248            break;
249        }
250        hasher.write(&buf[..read]);
251    }
252    Ok(hasher.finish())
253}
254
255fn hash_bytes(value: &str) -> u64 {
256    let mut hasher = DefaultHasher::new();
257    hasher.write(value.as_bytes());
258    hasher.finish()
259}
260
261/// Parse porcelain -z format entries from git status output.
262pub(crate) fn parse_porcelain_z_entries(status: &str) -> Result<Vec<PorcelainZEntry>, GitError> {
263    if status.is_empty() {
264        return Ok(Vec::new());
265    }
266
267    // Keep simple split-based approach, but parse defensively:
268    // - `git status --porcelain -z` is record-delimited by NUL
269    // - trailing NULs (and even accidental consecutive NULs) should not truncate parsing
270    let fields: Vec<&str> = status.split('\0').collect();
271    let mut idx = 0usize;
272
273    let mut entries = Vec::new();
274    while let Some(entry) = parse_status_path(&fields, &mut idx)? {
275        entries.push(entry);
276    }
277    Ok(entries)
278}
279
280fn is_rename_or_copy_xy(xy: &str) -> bool {
281    let bytes = xy.as_bytes();
282    if bytes.len() != 2 {
283        return false;
284    }
285    matches!(bytes[0], b'R' | b'C') || matches!(bytes[1], b'R' | b'C')
286}
287
288fn take_required_field<'a>(
289    fields: &'a [&'a str],
290    idx: &mut usize,
291    label: &str,
292    head: &str,
293    xy: &str,
294) -> Result<&'a str, GitError> {
295    let value = fields.get(*idx).copied().ok_or_else(|| {
296        GitError::Other(anyhow!(
297            "malformed porcelain -z output: missing {} after field {:?} (XY={:?}, next_index={})",
298            label,
299            head,
300            xy,
301            *idx
302        ))
303    })?;
304    *idx = idx.saturating_add(1);
305
306    if value.is_empty() {
307        return Err(GitError::Other(anyhow!(
308            "malformed porcelain -z output: empty {} after field {:?} (XY={:?})",
309            label,
310            head,
311            xy
312        )));
313    }
314
315    Ok(value)
316}
317
318fn parse_status_path(
319    fields: &[&str],
320    idx: &mut usize,
321) -> Result<Option<PorcelainZEntry>, GitError> {
322    // Skip empty fields so we don't prematurely stop on trailing NULs or accidental
323    // consecutive NULs. This is defensive; valid git output should not include empty
324    // records.
325    while *idx < fields.len() && fields[*idx].is_empty() {
326        *idx += 1;
327    }
328
329    if *idx >= fields.len() {
330        return Ok(None);
331    }
332
333    let head = fields[*idx];
334    *idx += 1;
335
336    let (xy, inline_path) = parse_xy_and_inline_path(head)?;
337    let is_rename_or_copy = is_rename_or_copy_xy(xy);
338
339    let path = match inline_path {
340        Some(path) => path,
341        None => take_required_field(fields, idx, "path", head, xy)?,
342    };
343
344    if path.is_empty() {
345        return Err(GitError::Other(anyhow!(
346            "malformed porcelain -z output: empty path in field {:?} (XY={:?})",
347            head,
348            xy
349        )));
350    }
351
352    let old_path = if is_rename_or_copy {
353        Some(
354            take_required_field(fields, idx, "old path field for rename/copy", head, xy)?
355                .to_string(),
356        )
357    } else {
358        None
359    };
360
361    Ok(Some(PorcelainZEntry {
362        xy: xy.to_string(),
363        old_path,
364        path: path.to_string(),
365    }))
366}
367
368fn parse_xy_and_inline_path(field: &str) -> Result<(&str, Option<&str>), GitError> {
369    if field.len() < 2 {
370        return Err(GitError::Other(anyhow!(
371            "malformed porcelain -z output: field too short for XY status: {:?}",
372            field
373        )));
374    }
375
376    let xy = &field[..2];
377
378    if field.len() == 2 {
379        return Ok((xy, None));
380    }
381
382    let bytes = field.as_bytes();
383    if bytes.len() >= 3 && bytes[2] == b' ' {
384        return Ok((xy, Some(&field[3..])));
385    }
386
387    Err(GitError::Other(anyhow!(
388        "malformed porcelain -z output: expected `XY<space>path` or `XY` field, got: {:?}",
389        field
390    )))
391}
392
393#[cfg(test)]
394mod porcelain_parser_tests {
395    use super::*;
396
397    #[test]
398    fn parse_porcelain_z_entries_skips_empty_fields_including_trailing_nuls() -> Result<()> {
399        // The empty segment between two NULs should not truncate parsing.
400        let status = "?? file1\0\0?? file2\0\0";
401        let entries = parse_porcelain_z_entries(status)?;
402        assert_eq!(entries.len(), 2);
403        assert_eq!(entries[0].xy, "??");
404        assert_eq!(entries[0].path, "file1");
405        assert_eq!(entries[1].xy, "??");
406        assert_eq!(entries[1].path, "file2");
407        Ok(())
408    }
409
410    #[test]
411    fn parse_porcelain_z_entries_parses_copy_entries() -> Result<()> {
412        // We unit-test C (copy) parsing directly rather than relying on git heuristics
413        // to detect copies in a temp repo.
414        let status = "C  new name.txt\0old name.txt\0";
415        let entries = parse_porcelain_z_entries(status)?;
416        assert_eq!(
417            entries,
418            vec![PorcelainZEntry {
419                xy: "C ".to_string(),
420                old_path: Some("old name.txt".to_string()),
421                path: "new name.txt".to_string(),
422            }]
423        );
424        Ok(())
425    }
426}
427
428#[cfg(test)]
429mod ignored_paths_tests {
430    use super::*;
431    use crate::testsupport::git as git_test;
432    use tempfile::TempDir;
433
434    #[test]
435    fn ignored_paths_lists_gitignored_entries() -> Result<()> {
436        let temp = TempDir::new()?;
437        let repo_root = temp.path().join("repo");
438        std::fs::create_dir_all(&repo_root)?;
439        git_test::init_repo(&repo_root)?;
440        std::fs::write(repo_root.join(".gitignore"), ".env\nignored_dir/\n")?;
441        std::fs::write(repo_root.join(".env"), "secret")?;
442        std::fs::create_dir_all(repo_root.join("ignored_dir"))?;
443        std::fs::write(repo_root.join("ignored_dir/file.txt"), "ignored content")?;
444
445        let ignored = ignored_paths(&repo_root)?;
446
447        assert!(ignored.contains(&".env".to_string()));
448        assert!(ignored.contains(&"ignored_dir/".to_string()));
449        Ok(())
450    }
451
452    #[test]
453    fn ignored_paths_errors_outside_repo() {
454        let temp = TempDir::new().expect("temp dir");
455        let repo_root = temp.path().join("repo");
456        std::fs::create_dir_all(&repo_root).expect("create dir");
457
458        let err = ignored_paths(&repo_root).expect_err("should fail outside repo");
459        assert!(matches!(err, GitError::CommandFailed { .. }));
460    }
461}