Skip to main content

ralph_workflow/git_helpers/repo/
snapshot.rs

1use std::io;
2
3use crate::git_helpers::git2_to_io_error;
4use std::path::Path;
5
6/// Get a snapshot of the current git status.
7///
8/// Returns status in porcelain format (similar to `git status --porcelain=v1`).
9///
10/// # Errors
11///
12/// Returns error if the operation fails.
13pub fn git_snapshot() -> io::Result<String> {
14    git_snapshot_in_repo(Path::new("."))
15}
16
17/// Get a snapshot of git status for a specific repository root.
18///
19/// Prefer this in pipeline code where `ctx.repo_root` is known, to avoid
20/// accidentally discovering/inspecting the wrong repository.
21///
22/// # Errors
23///
24/// Returns error if the operation fails.
25pub fn git_snapshot_in_repo(repo_root: &Path) -> io::Result<String> {
26    let repo = git2::Repository::discover(repo_root).map_err(|e| git2_to_io_error(&e))?;
27    git_snapshot_impl(&repo)
28}
29
30/// Extract repo-relative paths from a porcelain v1-style status snapshot.
31///
32/// The returned paths are suitable for carry-forward/prompt context and are intentionally
33/// resilient to common porcelain edge cases:
34/// - rename/copy lines in the form `old -> new` (returns `new`)
35/// - quoted paths (returns the unquoted path)
36///
37/// This parser is used for residual-file detection and must be robust: incorrect path
38/// extraction can pollute carry-forward state.
39#[must_use]
40pub fn parse_git_status_paths(snapshot: &str) -> Vec<String> {
41    fn unquote_c_style(s: &str) -> Option<String> {
42        let bytes = s.as_bytes();
43        if bytes.len() < 2 || bytes[0] != b'"' || bytes[bytes.len() - 1] != b'"' {
44            return None;
45        }
46
47        // Git porcelain uses C-style quoting. Octal escapes represent BYTES, not Unicode codepoints.
48        let mut out: Vec<u8> = Vec::with_capacity(bytes.len().saturating_sub(2));
49        let mut i = 1usize;
50        while i + 1 < bytes.len() {
51            let b = bytes[i];
52            if b != b'\\' {
53                out.push(b);
54                i += 1;
55                continue;
56            }
57
58            i += 1;
59            if i + 1 > bytes.len() {
60                break;
61            }
62
63            let esc = bytes[i];
64            match esc {
65                b'\\' => out.push(b'\\'),
66                b'"' => out.push(b'"'),
67                // Do NOT decode control-character escapes into real control bytes.
68                // Preserve the escaped form to avoid control-character injection.
69                b'n' | b't' | b'r' | b'b' | b'f' | b'v' => {
70                    out.push(b'\\');
71                    out.push(esc);
72                }
73                b'0'..=b'7' => {
74                    let digit_start = i;
75                    let mut val: u32 = u32::from(esc - b'0');
76                    let mut consumed = 1usize;
77                    while consumed < 3 {
78                        let next_i = i + consumed;
79                        if next_i + 1 >= bytes.len() {
80                            break;
81                        }
82                        let nb = bytes[next_i];
83                        if !(b'0'..=b'7').contains(&nb) {
84                            break;
85                        }
86                        val = (val * 8) + u32::from(nb - b'0');
87                        consumed += 1;
88                    }
89                    i += consumed - 1;
90                    if let Ok(b) = u8::try_from(val) {
91                        if b < 0x20 || b == 0x7F {
92                            // Preserve escape sequence for control bytes.
93                            out.push(b'\\');
94                            out.extend_from_slice(&bytes[digit_start..digit_start + consumed]);
95                        } else {
96                            out.push(b);
97                        }
98                    } else {
99                        // Preserve unknown/overflow escape sequence.
100                        out.push(b'\\');
101                        out.extend_from_slice(&bytes[digit_start..digit_start + consumed]);
102                    }
103                }
104                other => {
105                    out.push(b'\\');
106                    out.push(other);
107                }
108            }
109            i += 1;
110        }
111
112        String::from_utf8(out).ok()
113    }
114
115    fn parse_path_component(raw: &str) -> String {
116        let raw = raw.trim_end();
117        unquote_c_style(raw).unwrap_or_else(|| raw.to_string())
118    }
119
120    let mut out: Vec<String> = Vec::new();
121    let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
122
123    for line in snapshot.lines() {
124        let bytes = line.as_bytes();
125        if bytes.len() < 4 {
126            continue;
127        }
128        // Porcelain v1: 2 status chars + space + path
129        if bytes[2] != b' ' {
130            continue;
131        }
132        let x = bytes[0] as char;
133        let y = bytes[1] as char;
134        let mut path_spec = &line[3..];
135        path_spec = path_spec.trim_end();
136        if path_spec.is_empty() {
137            continue;
138        }
139
140        // Rename/copy lines: `old -> new` (porcelain v1). Prefer the new path.
141        if x == 'R' || y == 'R' || x == 'C' || y == 'C' {
142            if let Some((_, new_part)) = path_spec.rsplit_once(" -> ") {
143                path_spec = new_part.trim_end();
144            }
145        }
146
147        let parsed = parse_path_component(path_spec);
148        if parsed.is_empty() {
149            continue;
150        }
151
152        if seen.insert(parsed.clone()) {
153            out.push(parsed);
154        }
155    }
156
157    out.sort();
158    out
159}
160
161/// Implementation of git snapshot.
162fn git_snapshot_impl(repo: &git2::Repository) -> io::Result<String> {
163    let mut opts = git2::StatusOptions::new();
164    opts.include_untracked(true)
165        .recurse_untracked_dirs(true)
166        .include_ignored(false);
167    let statuses = repo
168        .statuses(Some(&mut opts))
169        .map_err(|e| git2_to_io_error(&e))?;
170
171    let mut result = String::new();
172    for entry in statuses.iter() {
173        let status = entry.status();
174        let Some(path) = entry.path() else {
175            return Err(io::Error::new(
176                io::ErrorKind::InvalidData,
177                "non-UTF8 path encountered in git status; cannot safely track residual files",
178            ));
179        };
180        let path = path.to_string();
181        if path.bytes().any(|b| b < 0x20 || b == 0x7F) {
182            return Err(io::Error::new(
183                io::ErrorKind::InvalidData,
184                "control characters in path encountered in git status; cannot safely snapshot",
185            ));
186        }
187
188        // Convert git2 status to porcelain format.
189        // Untracked files are represented as "??" in porcelain v1.
190        if status.contains(git2::Status::WT_NEW) {
191            result.push('?');
192            result.push('?');
193            result.push(' ');
194            result.push_str(&path);
195            result.push('\n');
196            continue;
197        }
198
199        // Index status
200        let index_status = if status.contains(git2::Status::INDEX_NEW) {
201            'A'
202        } else if status.contains(git2::Status::INDEX_MODIFIED) {
203            'M'
204        } else if status.contains(git2::Status::INDEX_DELETED) {
205            'D'
206        } else if status.contains(git2::Status::INDEX_RENAMED) {
207            'R'
208        } else if status.contains(git2::Status::INDEX_TYPECHANGE) {
209            'T'
210        } else {
211            ' '
212        };
213
214        // Worktree status
215        let wt_status = if status.contains(git2::Status::WT_MODIFIED) {
216            'M'
217        } else if status.contains(git2::Status::WT_DELETED) {
218            'D'
219        } else if status.contains(git2::Status::WT_RENAMED) {
220            'R'
221        } else if status.contains(git2::Status::WT_TYPECHANGE) {
222            'T'
223        } else {
224            ' '
225        };
226
227        result.push(index_status);
228        result.push(wt_status);
229        result.push(' ');
230        result.push_str(&path);
231        result.push('\n');
232    }
233
234    Ok(result)
235}
236
237#[cfg(test)]
238mod parse_tests {
239    use super::parse_git_status_paths;
240
241    #[test]
242    fn test_parses_basic_xy_lines() {
243        let snapshot = " M src/lib.rs\n?? new file.txt\n";
244        let paths = parse_git_status_paths(snapshot);
245        assert_eq!(
246            paths,
247            vec!["new file.txt".to_string(), "src/lib.rs".to_string()]
248        );
249    }
250
251    #[test]
252    fn test_parses_rename_arrow_takes_new_path() {
253        let snapshot = "R  old/name.rs -> new/name.rs\n";
254        let paths = parse_git_status_paths(snapshot);
255        assert_eq!(paths, vec!["new/name.rs".to_string()]);
256    }
257
258    #[test]
259    fn test_parses_quoted_paths_and_rename() {
260        let snapshot = "?? \"dir with spaces/file.rs\"\nR  \"old name.rs\" -> \"new name.rs\"\n";
261        let paths = parse_git_status_paths(snapshot);
262        assert_eq!(
263            paths,
264            vec![
265                "dir with spaces/file.rs".to_string(),
266                "new name.rs".to_string()
267            ]
268        );
269    }
270
271    #[test]
272    fn test_unquote_c_style_decodes_utf8_octal_bytes() {
273        // Git porcelain uses C-style quoting with octal escapes for non-ASCII bytes.
274        // "caf\303\251.txt" represents the UTF-8 bytes for "café.txt".
275        let snapshot = "?? \"caf\\303\\251.txt\"\n";
276        let paths = parse_git_status_paths(snapshot);
277        assert_eq!(paths, vec!["café.txt".to_string()]);
278    }
279
280    #[test]
281    fn test_unquote_c_style_preserves_control_escapes() {
282        // Control-character escapes must not be decoded into real control characters.
283        // This prevents control-character injection into prompts/state/logs.
284        let snapshot = "?? \"x\\nsrc/file.rs\"\n";
285        let paths = parse_git_status_paths(snapshot);
286        assert_eq!(paths, vec!["x\\nsrc/file.rs".to_string()]);
287        assert!(!paths[0].contains('\n'));
288    }
289
290    #[test]
291    fn test_parse_git_status_paths_returns_sorted_paths() {
292        let snapshot = "?? b.txt\n?? a.txt\n";
293        let paths = parse_git_status_paths(snapshot);
294        assert_eq!(paths, vec!["a.txt".to_string(), "b.txt".to_string()]);
295    }
296}
297
298#[cfg(all(test, not(target_os = "macos")))]
299mod snapshot_tests {
300    use super::git_snapshot_in_repo;
301
302    #[test]
303    fn test_git_snapshot_in_repo_errors_on_non_utf8_paths() {
304        use std::io;
305        use std::os::unix::ffi::OsStrExt;
306
307        let tmp = tempfile::tempdir().expect("tempdir");
308        let root = tmp.path();
309        let _repo = git2::Repository::init(root).expect("init repo");
310
311        // Create a filename with bytes that are not valid UTF-8.
312        let name = std::ffi::OsStr::from_bytes(&[0xFF, 0xFE, b'.', b't', b'x', b't']);
313        std::fs::write(root.join(name), "x\n").expect("write non-utf8 file");
314
315        let err = git_snapshot_in_repo(root).expect_err("expected error");
316        assert_eq!(err.kind(), io::ErrorKind::InvalidData);
317    }
318}
319
320#[cfg(test)]
321mod snapshot_control_char_tests {
322    use super::git_snapshot_in_repo;
323
324    #[test]
325    fn test_git_snapshot_in_repo_errors_on_control_characters_in_paths() {
326        use std::io;
327
328        let tmp = tempfile::tempdir().expect("tempdir");
329        let root = tmp.path();
330        let _repo = git2::Repository::init(root).expect("init repo");
331
332        // Newlines are legal on Unix but cannot be safely represented in a newline-delimited
333        // snapshot format. Reject to avoid snapshot injection.
334        std::fs::write(root.join("x\nfile.rs"), "x\n").expect("write file with newline");
335
336        let err = git_snapshot_in_repo(root).expect_err("expected error");
337        assert_eq!(err.kind(), io::ErrorKind::InvalidData);
338    }
339}