ralph_workflow/git_helpers/repo/
snapshot.rs1use std::io;
2
3use crate::git_helpers::git2_to_io_error;
4use std::path::Path;
5
6pub fn git_snapshot() -> io::Result<String> {
14 git_snapshot_in_repo(Path::new("."))
15}
16
17pub fn git_snapshot_in_repo(repo_root: &Path) -> io::Result<String> {
26 let repo = git2::Repository::discover(repo_root).map_err(|e| git2_to_io_error(&e))?;
27 git_snapshot_impl(&repo)
28}
29
30#[must_use]
40pub fn parse_git_status_paths(snapshot: &str) -> Vec<String> {
41 fn unquote_c_style(s: &str) -> Option<String> {
42 let bytes = s.as_bytes();
43 if bytes.len() < 2 || bytes[0] != b'"' || bytes[bytes.len() - 1] != b'"' {
44 return None;
45 }
46
47 let mut out: Vec<u8> = Vec::with_capacity(bytes.len().saturating_sub(2));
49 let mut i = 1usize;
50 while i + 1 < bytes.len() {
51 let b = bytes[i];
52 if b != b'\\' {
53 out.push(b);
54 i += 1;
55 continue;
56 }
57
58 i += 1;
59 if i + 1 > bytes.len() {
60 break;
61 }
62
63 let esc = bytes[i];
64 match esc {
65 b'\\' => out.push(b'\\'),
66 b'"' => out.push(b'"'),
67 b'n' | b't' | b'r' | b'b' | b'f' | b'v' => {
70 out.push(b'\\');
71 out.push(esc);
72 }
73 b'0'..=b'7' => {
74 let digit_start = i;
75 let mut val: u32 = u32::from(esc - b'0');
76 let mut consumed = 1usize;
77 while consumed < 3 {
78 let next_i = i + consumed;
79 if next_i + 1 >= bytes.len() {
80 break;
81 }
82 let nb = bytes[next_i];
83 if !(b'0'..=b'7').contains(&nb) {
84 break;
85 }
86 val = (val * 8) + u32::from(nb - b'0');
87 consumed += 1;
88 }
89 i += consumed - 1;
90 if let Ok(b) = u8::try_from(val) {
91 if b < 0x20 || b == 0x7F {
92 out.push(b'\\');
94 out.extend_from_slice(&bytes[digit_start..digit_start + consumed]);
95 } else {
96 out.push(b);
97 }
98 } else {
99 out.push(b'\\');
101 out.extend_from_slice(&bytes[digit_start..digit_start + consumed]);
102 }
103 }
104 other => {
105 out.push(b'\\');
106 out.push(other);
107 }
108 }
109 i += 1;
110 }
111
112 String::from_utf8(out).ok()
113 }
114
115 fn parse_path_component(raw: &str) -> String {
116 let raw = raw.trim_end();
117 unquote_c_style(raw).unwrap_or_else(|| raw.to_string())
118 }
119
120 let mut out: Vec<String> = Vec::new();
121 let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
122
123 for line in snapshot.lines() {
124 let bytes = line.as_bytes();
125 if bytes.len() < 4 {
126 continue;
127 }
128 if bytes[2] != b' ' {
130 continue;
131 }
132 let x = bytes[0] as char;
133 let y = bytes[1] as char;
134 let mut path_spec = &line[3..];
135 path_spec = path_spec.trim_end();
136 if path_spec.is_empty() {
137 continue;
138 }
139
140 if x == 'R' || y == 'R' || x == 'C' || y == 'C' {
142 if let Some((_, new_part)) = path_spec.rsplit_once(" -> ") {
143 path_spec = new_part.trim_end();
144 }
145 }
146
147 let parsed = parse_path_component(path_spec);
148 if parsed.is_empty() {
149 continue;
150 }
151
152 if seen.insert(parsed.clone()) {
153 out.push(parsed);
154 }
155 }
156
157 out.sort();
158 out
159}
160
161fn git_snapshot_impl(repo: &git2::Repository) -> io::Result<String> {
163 let mut opts = git2::StatusOptions::new();
164 opts.include_untracked(true)
165 .recurse_untracked_dirs(true)
166 .include_ignored(false);
167 let statuses = repo
168 .statuses(Some(&mut opts))
169 .map_err(|e| git2_to_io_error(&e))?;
170
171 let mut result = String::new();
172 for entry in statuses.iter() {
173 let status = entry.status();
174 let Some(path) = entry.path() else {
175 return Err(io::Error::new(
176 io::ErrorKind::InvalidData,
177 "non-UTF8 path encountered in git status; cannot safely track residual files",
178 ));
179 };
180 let path = path.to_string();
181 if path.bytes().any(|b| b < 0x20 || b == 0x7F) {
182 return Err(io::Error::new(
183 io::ErrorKind::InvalidData,
184 "control characters in path encountered in git status; cannot safely snapshot",
185 ));
186 }
187
188 if status.contains(git2::Status::WT_NEW) {
191 result.push('?');
192 result.push('?');
193 result.push(' ');
194 result.push_str(&path);
195 result.push('\n');
196 continue;
197 }
198
199 let index_status = if status.contains(git2::Status::INDEX_NEW) {
201 'A'
202 } else if status.contains(git2::Status::INDEX_MODIFIED) {
203 'M'
204 } else if status.contains(git2::Status::INDEX_DELETED) {
205 'D'
206 } else if status.contains(git2::Status::INDEX_RENAMED) {
207 'R'
208 } else if status.contains(git2::Status::INDEX_TYPECHANGE) {
209 'T'
210 } else {
211 ' '
212 };
213
214 let wt_status = if status.contains(git2::Status::WT_MODIFIED) {
216 'M'
217 } else if status.contains(git2::Status::WT_DELETED) {
218 'D'
219 } else if status.contains(git2::Status::WT_RENAMED) {
220 'R'
221 } else if status.contains(git2::Status::WT_TYPECHANGE) {
222 'T'
223 } else {
224 ' '
225 };
226
227 result.push(index_status);
228 result.push(wt_status);
229 result.push(' ');
230 result.push_str(&path);
231 result.push('\n');
232 }
233
234 Ok(result)
235}
236
237#[cfg(test)]
238mod parse_tests {
239 use super::parse_git_status_paths;
240
241 #[test]
242 fn test_parses_basic_xy_lines() {
243 let snapshot = " M src/lib.rs\n?? new file.txt\n";
244 let paths = parse_git_status_paths(snapshot);
245 assert_eq!(
246 paths,
247 vec!["new file.txt".to_string(), "src/lib.rs".to_string()]
248 );
249 }
250
251 #[test]
252 fn test_parses_rename_arrow_takes_new_path() {
253 let snapshot = "R old/name.rs -> new/name.rs\n";
254 let paths = parse_git_status_paths(snapshot);
255 assert_eq!(paths, vec!["new/name.rs".to_string()]);
256 }
257
258 #[test]
259 fn test_parses_quoted_paths_and_rename() {
260 let snapshot = "?? \"dir with spaces/file.rs\"\nR \"old name.rs\" -> \"new name.rs\"\n";
261 let paths = parse_git_status_paths(snapshot);
262 assert_eq!(
263 paths,
264 vec![
265 "dir with spaces/file.rs".to_string(),
266 "new name.rs".to_string()
267 ]
268 );
269 }
270
271 #[test]
272 fn test_unquote_c_style_decodes_utf8_octal_bytes() {
273 let snapshot = "?? \"caf\\303\\251.txt\"\n";
276 let paths = parse_git_status_paths(snapshot);
277 assert_eq!(paths, vec!["café.txt".to_string()]);
278 }
279
280 #[test]
281 fn test_unquote_c_style_preserves_control_escapes() {
282 let snapshot = "?? \"x\\nsrc/file.rs\"\n";
285 let paths = parse_git_status_paths(snapshot);
286 assert_eq!(paths, vec!["x\\nsrc/file.rs".to_string()]);
287 assert!(!paths[0].contains('\n'));
288 }
289
290 #[test]
291 fn test_parse_git_status_paths_returns_sorted_paths() {
292 let snapshot = "?? b.txt\n?? a.txt\n";
293 let paths = parse_git_status_paths(snapshot);
294 assert_eq!(paths, vec!["a.txt".to_string(), "b.txt".to_string()]);
295 }
296}
297
298#[cfg(all(test, not(target_os = "macos")))]
299mod snapshot_tests {
300 use super::git_snapshot_in_repo;
301
302 #[test]
303 fn test_git_snapshot_in_repo_errors_on_non_utf8_paths() {
304 use std::io;
305 use std::os::unix::ffi::OsStrExt;
306
307 let tmp = tempfile::tempdir().expect("tempdir");
308 let root = tmp.path();
309 let _repo = git2::Repository::init(root).expect("init repo");
310
311 let name = std::ffi::OsStr::from_bytes(&[0xFF, 0xFE, b'.', b't', b'x', b't']);
313 std::fs::write(root.join(name), "x\n").expect("write non-utf8 file");
314
315 let err = git_snapshot_in_repo(root).expect_err("expected error");
316 assert_eq!(err.kind(), io::ErrorKind::InvalidData);
317 }
318}
319
320#[cfg(test)]
321mod snapshot_control_char_tests {
322 use super::git_snapshot_in_repo;
323
324 #[test]
325 fn test_git_snapshot_in_repo_errors_on_control_characters_in_paths() {
326 use std::io;
327
328 let tmp = tempfile::tempdir().expect("tempdir");
329 let root = tmp.path();
330 let _repo = git2::Repository::init(root).expect("init repo");
331
332 std::fs::write(root.join("x\nfile.rs"), "x\n").expect("write file with newline");
335
336 let err = git_snapshot_in_repo(root).expect_err("expected error");
337 assert_eq!(err.kind(), io::ErrorKind::InvalidData);
338 }
339}