Skip to main content

ralph/git/
status.rs

1//! Git status and porcelain parsing operations.
2//!
3//! This module provides functions for parsing git status output, tracking path
4//! snapshots, and ensuring files remain unchanged during operations.
5//!
6//! # Invariants
7//! - Porcelain parsing must handle NUL-terminated format correctly
8//! - Path snapshots are deterministic and comparable
9//!
10//! # What this does NOT handle
11//! - Commit operations (see git/commit.rs)
12//! - LFS validation (see git/lfs.rs)
13//! - Repository cleanliness enforcement (see git/clean.rs)
14
15use crate::git::error::{GitError, git_output};
16use anyhow::{Context, Result, anyhow, bail};
17use std::collections::HashSet;
18use std::collections::hash_map::DefaultHasher;
19use std::fs;
20use std::hash::Hasher;
21use std::io::Read;
22use std::path::Path;
23
24/// A snapshot of a file path with a fingerprint for detecting changes.
25#[derive(Debug, Clone, PartialEq, Eq)]
26pub struct PathSnapshot {
27    pub path: String,
28    fingerprint: Option<u64>,
29}
30
31/// Internal representation of a porcelain -z entry.
32#[derive(Debug, Clone, PartialEq, Eq)]
33pub(crate) struct PorcelainZEntry {
34    pub xy: String,
35    pub old_path: Option<String>,
36    pub path: String,
37}
38
39/// Returns raw `git status --porcelain -z` output (may be empty).
40///
41/// NOTE: With `-z`, records are NUL-terminated (0x00) instead of newline-terminated.
42/// This makes the output safe to parse even when filenames contain spaces/newlines.
43pub fn status_porcelain(repo_root: &Path) -> Result<String, GitError> {
44    let output = git_output(repo_root, &["status", "--porcelain", "-z"])
45        .with_context(|| format!("run git status --porcelain -z in {}", repo_root.display()))?;
46
47    if !output.status.success() {
48        let stderr = String::from_utf8_lossy(&output.stderr).to_string();
49        return Err(GitError::CommandFailed {
50            args: "status --porcelain -z".to_string(),
51            code: output.status.code(),
52            stderr: stderr.trim().to_string(),
53        });
54    }
55
56    Ok(String::from_utf8_lossy(&output.stdout).to_string())
57}
58
59/// Returns a list of paths from git status.
60pub fn status_paths(repo_root: &Path) -> Result<Vec<String>, GitError> {
61    let status = status_porcelain(repo_root)?;
62    if status.is_empty() {
63        return Ok(Vec::new());
64    }
65
66    let mut paths = Vec::new();
67    let entries = parse_porcelain_z_entries(&status)?;
68    for entry in entries {
69        if !entry.path.is_empty() {
70            paths.push(entry.path);
71        }
72    }
73    Ok(paths)
74}
75
76/// Returns true if `rel_path` is ignored by git (respecting .gitignore, .git/info/exclude, and global excludes).
77///
78/// Uses `git check-ignore -q <path>`:
79/// - exit code 0 => ignored
80/// - exit code 1 => not ignored
81/// - otherwise => error
82pub fn is_path_ignored(repo_root: &Path, rel_path: &str) -> Result<bool, GitError> {
83    let rel = rel_path.trim();
84    if rel.is_empty() {
85        return Ok(false);
86    }
87
88    let output = git_output(repo_root, &["check-ignore", "-q", rel])
89        .with_context(|| format!("run git check-ignore -q {} in {}", rel, repo_root.display()))?;
90
91    match output.status.code() {
92        Some(0) => Ok(true),
93        Some(1) => Ok(false),
94        _ => {
95            let stderr = String::from_utf8_lossy(&output.stderr).to_string();
96            Err(GitError::CommandFailed {
97                args: format!("check-ignore -q {}", rel),
98                code: output.status.code(),
99                stderr: stderr.trim().to_string(),
100            })
101        }
102    }
103}
104
105/// Returns a list of gitignored paths (tracked ignore + local excludes).
106///
107/// Uses `git ls-files -i -o --exclude-standard -z --directory` to get
108/// NUL-delimited paths relative to the repo root.
109pub fn ignored_paths(repo_root: &Path) -> Result<Vec<String>, GitError> {
110    let output = git_output(
111        repo_root,
112        &[
113            "ls-files",
114            "-i",
115            "-o",
116            "--exclude-standard",
117            "-z",
118            "--directory",
119        ],
120    )
121    .with_context(|| format!("run git ls-files -i -o in {}", repo_root.display()))?;
122
123    if !output.status.success() {
124        let stderr = String::from_utf8_lossy(&output.stderr).to_string();
125        return Err(GitError::CommandFailed {
126            args: "ls-files -i -o --exclude-standard -z --directory".to_string(),
127            code: output.status.code(),
128            stderr: stderr.trim().to_string(),
129        });
130    }
131
132    let raw = String::from_utf8_lossy(&output.stdout);
133    if raw.is_empty() {
134        return Ok(Vec::new());
135    }
136
137    let mut paths = Vec::new();
138    for entry in raw.split('\0') {
139        if entry.is_empty() {
140            continue;
141        }
142        paths.push(entry.to_string());
143    }
144    Ok(paths)
145}
146
147/// Create deterministic fingerprints for a list of baseline dirty paths.
148///
149/// This is used to ensure Phase 1 plan-only runs do not mutate pre-existing
150/// dirty files when `allow_dirty_repo` is true.
151pub fn snapshot_paths(repo_root: &Path, paths: &[String]) -> Result<Vec<PathSnapshot>> {
152    if paths.is_empty() {
153        return Ok(Vec::new());
154    }
155
156    let mut unique = HashSet::new();
157    let mut snapshots = Vec::new();
158    for path in paths {
159        let trimmed = path.trim();
160        if trimmed.is_empty() {
161            continue;
162        }
163        let normalized = trimmed.strip_prefix("./").unwrap_or(trimmed);
164        if !unique.insert(normalized.to_string()) {
165            continue;
166        }
167        let fingerprint = snapshot_path(&repo_root.join(normalized))?;
168        snapshots.push(PathSnapshot {
169            path: normalized.to_string(),
170            fingerprint,
171        });
172    }
173
174    snapshots.sort_by(|a, b| a.path.cmp(&b.path));
175    Ok(snapshots)
176}
177
178/// Validate that each baseline dirty path is unchanged from its fingerprint.
179pub fn ensure_paths_unchanged(repo_root: &Path, snapshots: &[PathSnapshot]) -> Result<()> {
180    for snapshot in snapshots {
181        let current = snapshot_path(&repo_root.join(&snapshot.path))?;
182        if current != snapshot.fingerprint {
183            bail!(
184                "Baseline dirty path changed during Phase 1: {}",
185                snapshot.path
186            );
187        }
188    }
189    Ok(())
190}
191
192fn snapshot_path(path: &Path) -> Result<Option<u64>> {
193    if !path.exists() {
194        return Ok(None);
195    }
196    let metadata = fs::symlink_metadata(path)?;
197    if metadata.is_dir() {
198        Ok(Some(hash_dir(path)?))
199    } else if metadata.is_file() {
200        Ok(Some(hash_file(path)?))
201    } else if metadata.file_type().is_symlink() {
202        let target = fs::read_link(path)?;
203        Ok(Some(hash_bytes(&target.to_string_lossy())))
204    } else {
205        Ok(Some(metadata.len()))
206    }
207}
208
209fn hash_dir(path: &Path) -> Result<u64> {
210    let mut entries: Vec<_> = fs::read_dir(path)?.collect::<Result<_, _>>()?;
211    entries.sort_by_key(|entry| entry.file_name());
212
213    let mut hasher = DefaultHasher::new();
214    for entry in entries {
215        let name = entry.file_name();
216        hasher.write(name.to_string_lossy().as_bytes());
217        let file_type = entry.file_type()?;
218        if file_type.is_dir() {
219            hasher.write_u8(1);
220            hasher.write_u64(hash_dir(&entry.path())?);
221        } else if file_type.is_file() {
222            hasher.write_u8(2);
223            hasher.write_u64(hash_file(&entry.path())?);
224        } else if file_type.is_symlink() {
225            hasher.write_u8(3);
226            let target = fs::read_link(entry.path())?;
227            hasher.write(target.to_string_lossy().as_bytes());
228        } else {
229            hasher.write_u8(4);
230            hasher.write_u64(entry.metadata()?.len());
231        }
232    }
233    Ok(hasher.finish())
234}
235
236fn hash_file(path: &Path) -> Result<u64> {
237    let mut file = fs::File::open(path)?;
238    let mut hasher = DefaultHasher::new();
239    let mut buf = [0u8; 8192];
240    loop {
241        let read = file.read(&mut buf)?;
242        if read == 0 {
243            break;
244        }
245        hasher.write(&buf[..read]);
246    }
247    Ok(hasher.finish())
248}
249
250fn hash_bytes(value: &str) -> u64 {
251    let mut hasher = DefaultHasher::new();
252    hasher.write(value.as_bytes());
253    hasher.finish()
254}
255
256/// Parse porcelain -z format entries from git status output.
257pub(crate) fn parse_porcelain_z_entries(status: &str) -> Result<Vec<PorcelainZEntry>, GitError> {
258    if status.is_empty() {
259        return Ok(Vec::new());
260    }
261
262    // Keep simple split-based approach, but parse defensively:
263    // - `git status --porcelain -z` is record-delimited by NUL
264    // - trailing NULs (and even accidental consecutive NULs) should not truncate parsing
265    let fields: Vec<&str> = status.split('\0').collect();
266    let mut idx = 0usize;
267
268    let mut entries = Vec::new();
269    while let Some(entry) = parse_status_path(&fields, &mut idx)? {
270        entries.push(entry);
271    }
272    Ok(entries)
273}
274
275fn is_rename_or_copy_xy(xy: &str) -> bool {
276    let bytes = xy.as_bytes();
277    if bytes.len() != 2 {
278        return false;
279    }
280    matches!(bytes[0], b'R' | b'C') || matches!(bytes[1], b'R' | b'C')
281}
282
283fn take_required_field<'a>(
284    fields: &'a [&'a str],
285    idx: &mut usize,
286    label: &str,
287    head: &str,
288    xy: &str,
289) -> Result<&'a str, GitError> {
290    let value = fields.get(*idx).copied().ok_or_else(|| {
291        GitError::Other(anyhow!(
292            "malformed porcelain -z output: missing {} after field {:?} (XY={:?}, next_index={})",
293            label,
294            head,
295            xy,
296            *idx
297        ))
298    })?;
299    *idx = idx.saturating_add(1);
300
301    if value.is_empty() {
302        return Err(GitError::Other(anyhow!(
303            "malformed porcelain -z output: empty {} after field {:?} (XY={:?})",
304            label,
305            head,
306            xy
307        )));
308    }
309
310    Ok(value)
311}
312
313fn parse_status_path(
314    fields: &[&str],
315    idx: &mut usize,
316) -> Result<Option<PorcelainZEntry>, GitError> {
317    // Skip empty fields so we don't prematurely stop on trailing NULs or accidental
318    // consecutive NULs. This is defensive; valid git output should not include empty
319    // records.
320    while *idx < fields.len() && fields[*idx].is_empty() {
321        *idx += 1;
322    }
323
324    if *idx >= fields.len() {
325        return Ok(None);
326    }
327
328    let head = fields[*idx];
329    *idx += 1;
330
331    let (xy, inline_path) = parse_xy_and_inline_path(head)?;
332    let is_rename_or_copy = is_rename_or_copy_xy(xy);
333
334    let path = match inline_path {
335        Some(path) => path,
336        None => take_required_field(fields, idx, "path", head, xy)?,
337    };
338
339    if path.is_empty() {
340        return Err(GitError::Other(anyhow!(
341            "malformed porcelain -z output: empty path in field {:?} (XY={:?})",
342            head,
343            xy
344        )));
345    }
346
347    let old_path = if is_rename_or_copy {
348        Some(
349            take_required_field(fields, idx, "old path field for rename/copy", head, xy)?
350                .to_string(),
351        )
352    } else {
353        None
354    };
355
356    Ok(Some(PorcelainZEntry {
357        xy: xy.to_string(),
358        old_path,
359        path: path.to_string(),
360    }))
361}
362
363fn parse_xy_and_inline_path(field: &str) -> Result<(&str, Option<&str>), GitError> {
364    if field.len() < 2 {
365        return Err(GitError::Other(anyhow!(
366            "malformed porcelain -z output: field too short for XY status: {:?}",
367            field
368        )));
369    }
370
371    let xy = &field[..2];
372
373    if field.len() == 2 {
374        return Ok((xy, None));
375    }
376
377    let bytes = field.as_bytes();
378    if bytes.len() >= 3 && bytes[2] == b' ' {
379        return Ok((xy, Some(&field[3..])));
380    }
381
382    Err(GitError::Other(anyhow!(
383        "malformed porcelain -z output: expected `XY<space>path` or `XY` field, got: {:?}",
384        field
385    )))
386}
387
388#[cfg(test)]
389mod porcelain_parser_tests {
390    use super::*;
391
392    #[test]
393    fn parse_porcelain_z_entries_skips_empty_fields_including_trailing_nuls() -> Result<()> {
394        // The empty segment between two NULs should not truncate parsing.
395        let status = "?? file1\0\0?? file2\0\0";
396        let entries = parse_porcelain_z_entries(status)?;
397        assert_eq!(entries.len(), 2);
398        assert_eq!(entries[0].xy, "??");
399        assert_eq!(entries[0].path, "file1");
400        assert_eq!(entries[1].xy, "??");
401        assert_eq!(entries[1].path, "file2");
402        Ok(())
403    }
404
405    #[test]
406    fn parse_porcelain_z_entries_parses_copy_entries() -> Result<()> {
407        // We unit-test C (copy) parsing directly rather than relying on git heuristics
408        // to detect copies in a temp repo.
409        let status = "C  new name.txt\0old name.txt\0";
410        let entries = parse_porcelain_z_entries(status)?;
411        assert_eq!(
412            entries,
413            vec![PorcelainZEntry {
414                xy: "C ".to_string(),
415                old_path: Some("old name.txt".to_string()),
416                path: "new name.txt".to_string(),
417            }]
418        );
419        Ok(())
420    }
421}
422
423#[cfg(test)]
424mod ignored_paths_tests {
425    use super::*;
426    use crate::testsupport::git as git_test;
427    use tempfile::TempDir;
428
429    #[test]
430    fn ignored_paths_lists_gitignored_entries() -> Result<()> {
431        let temp = TempDir::new()?;
432        let repo_root = temp.path().join("repo");
433        std::fs::create_dir_all(&repo_root)?;
434        git_test::init_repo(&repo_root)?;
435        std::fs::write(repo_root.join(".gitignore"), ".env\nignored_dir/\n")?;
436        std::fs::write(repo_root.join(".env"), "secret")?;
437        std::fs::create_dir_all(repo_root.join("ignored_dir"))?;
438        std::fs::write(repo_root.join("ignored_dir/file.txt"), "ignored content")?;
439
440        let ignored = ignored_paths(&repo_root)?;
441
442        assert!(ignored.contains(&".env".to_string()));
443        assert!(ignored.contains(&"ignored_dir/".to_string()));
444        Ok(())
445    }
446
447    #[test]
448    fn ignored_paths_errors_outside_repo() {
449        let temp = TempDir::new().expect("temp dir");
450        let repo_root = temp.path().join("repo");
451        std::fs::create_dir_all(&repo_root).expect("create dir");
452
453        let err = ignored_paths(&repo_root).expect_err("should fail outside repo");
454        assert!(matches!(err, GitError::CommandFailed { .. }));
455    }
456}