Skip to main content

ralph_workflow/git_helpers/repo/snapshot/
io.rs

1// git_helpers/repo/snapshot/io.rs — boundary module for git snapshot and status operations.
2// File stem is `io` — recognized as boundary module by forbid_io_effects lint.
3
4use crate::git_helpers::domain::parse as domain_parse;
5use crate::git_helpers::git2_to_io_error;
6use std::path::Path;
7
8/// Get a snapshot of the current git status.
9///
10/// Returns status in porcelain format (similar to `git status --porcelain=v1`).
11///
12/// # Errors
13///
14/// Returns error if the operation fails.
15pub fn git_snapshot() -> std::io::Result<String> {
16    git_snapshot_in_repo(Path::new("."))
17}
18
19/// Get a snapshot of git status for a specific repository root.
20///
21/// Prefer this in pipeline code where `ctx.repo_root` is known, to avoid
22/// accidentally discovering/inspecting the wrong repository.
23///
24/// # Errors
25///
26/// Returns error if the operation fails.
27pub fn git_snapshot_in_repo(repo_root: &Path) -> std::io::Result<String> {
28    let repo = git2::Repository::discover(repo_root).map_err(|e| git2_to_io_error(&e))?;
29    git_snapshot_impl(&repo)
30}
31
32/// Extract repo-relative paths from a porcelain v1-style status snapshot.
33///
34/// The returned paths are suitable for carry-forward/prompt context and are intentionally
35/// resilient to common porcelain edge cases:
36/// - rename/copy lines in the form `old -> new` (returns `new`)
37/// - quoted paths (returns the unquoted path)
38///
39/// This parser is used for residual-file detection and must be robust: incorrect path
40/// extraction can pollute carry-forward state.
41#[must_use]
42pub fn parse_git_status_paths(snapshot: &str) -> Vec<String> {
43    domain_parse::parse_git_status_paths(snapshot)
44}
45
46/// Build `StatusOptions` for git snapshot queries (boundary module — mutation allowed here).
47fn configured_status_options() -> git2::StatusOptions {
48    let mut opts = git2::StatusOptions::new();
49    opts.include_untracked(true)
50        .recurse_untracked_dirs(true)
51        .include_ignored(false);
52    opts
53}
54
55/// Implementation of git snapshot.
56fn git_snapshot_impl(repo: &git2::Repository) -> std::io::Result<String> {
57    let statuses = {
58        let mut opts = configured_status_options();
59        repo.statuses(Some(&mut opts))
60            .map_err(|e| git2_to_io_error(&e))?
61    };
62
63    let lines = collect_status_lines(statuses)?;
64    Ok(lines.into_iter().collect())
65}
66
67fn collect_status_lines(statuses: git2::Statuses) -> std::io::Result<Vec<String>> {
68    statuses
69        .iter()
70        .map(|entry| status_entry_to_porcelain(&entry))
71        .collect::<std::io::Result<Vec<_>>>()
72}
73
74fn status_entry_to_porcelain(entry: &git2::StatusEntry) -> std::io::Result<String> {
75    let status = entry.status();
76    let path = entry.path().ok_or_else(|| {
77        std::io::Error::new(
78            std::io::ErrorKind::InvalidData,
79            "non-UTF8 path encountered in git status; cannot safely track residual files",
80        )
81    })?;
82    let path = path.to_string();
83    domain_parse::validate_path_for_snapshot(&path).map_err(std::io::Error::from)?;
84    Ok(domain_parse::format_status_porcelain(status, &path))
85}
86
87#[cfg(test)]
88mod parse_tests {
89    use super::parse_git_status_paths;
90
91    #[test]
92    fn test_parses_basic_xy_lines() {
93        let snapshot = " M src/lib.rs\n?? new file.txt\n";
94        let paths = parse_git_status_paths(snapshot);
95        assert_eq!(
96            paths,
97            vec!["new file.txt".to_string(), "src/lib.rs".to_string()]
98        );
99    }
100
101    #[test]
102    fn test_parses_rename_arrow_takes_new_path() {
103        let snapshot = "R  old/name.rs -> new/name.rs\n";
104        let paths = parse_git_status_paths(snapshot);
105        assert_eq!(paths, vec!["new/name.rs".to_string()]);
106    }
107
108    #[test]
109    fn test_parses_quoted_paths_and_rename() {
110        let snapshot = "?? \"dir with spaces/file.rs\"\nR  \"old name.rs\" -> \"new name.rs\"\n";
111        let paths = parse_git_status_paths(snapshot);
112        assert_eq!(
113            paths,
114            vec![
115                "dir with spaces/file.rs".to_string(),
116                "new name.rs".to_string()
117            ]
118        );
119    }
120
121    #[test]
122    fn test_unquote_c_style_decodes_utf8_octal_bytes() {
123        // Git porcelain uses C-style quoting with octal escapes for non-ASCII bytes.
124        // "caf\303\251.txt" represents the UTF-8 bytes for "café.txt".
125        let snapshot = "?? \"caf\\303\\251.txt\"\n";
126        let paths = parse_git_status_paths(snapshot);
127        assert_eq!(paths, vec!["café.txt".to_string()]);
128    }
129
130    #[test]
131    fn test_unquote_c_style_preserves_control_escapes() {
132        // Control-character escapes must not be decoded into real control characters.
133        // This prevents control-character injection into prompts/state/logs.
134        let snapshot = "?? \"x\\nsrc/file.rs\"\n";
135        let paths = parse_git_status_paths(snapshot);
136        assert_eq!(paths, vec!["x\\nsrc/file.rs".to_string()]);
137        assert!(!paths[0].contains('\n'));
138    }
139
140    #[test]
141    fn test_parse_git_status_paths_returns_sorted_paths() {
142        let snapshot = "?? b.txt\n?? a.txt\n";
143        let paths = parse_git_status_paths(snapshot);
144        assert_eq!(paths, vec!["a.txt".to_string(), "b.txt".to_string()]);
145    }
146}
147
148#[cfg(all(test, not(target_os = "macos")))]
149mod snapshot_tests {
150    use super::git_snapshot_in_repo;
151
152    #[test]
153    fn test_git_snapshot_in_repo_errors_on_non_utf8_paths() {
154        use std::os::unix::ffi::OsStrExt;
155
156        let tmp = tempfile::tempdir().expect("tempdir");
157        let root = tmp.path();
158        let _repo = git2::Repository::init(root).expect("init repo");
159
160        // Create a filename with bytes that are not valid UTF-8.
161        let name = std::ffi::OsStr::from_bytes(&[0xFF, 0xFE, b'.', b't', b'x', b't']);
162        std::fs::write(root.join(name), "x\n").expect("write non-utf8 file");
163
164        let err = git_snapshot_in_repo(root).expect_err("expected error");
165        assert_eq!(err.kind(), std::io::ErrorKind::InvalidData);
166    }
167}
168
169#[cfg(test)]
170mod snapshot_control_char_tests {
171    use super::git_snapshot_in_repo;
172
173    #[test]
174    fn test_git_snapshot_in_repo_errors_on_control_characters_in_paths() {
175        let tmp = tempfile::tempdir().expect("tempdir");
176        let root = tmp.path();
177        let _repo = git2::Repository::init(root).expect("init repo");
178
179        // Newlines are legal on Unix but cannot be safely represented in a newline-delimited
180        // snapshot format. Reject to avoid snapshot injection.
181        std::fs::write(root.join("x\nfile.rs"), "x\n").expect("write file with newline");
182
183        let err = git_snapshot_in_repo(root).expect_err("expected error");
184        assert_eq!(err.kind(), std::io::ErrorKind::InvalidData);
185    }
186}