Skip to main content

sdivi_pipeline/
commit_extract.rs

1//! Git shell-outs for resolving refs and extracting historical trees.
2//!
3//! Used by [`crate::Pipeline::snapshot_with_mode`] when `--commit REF` is supplied.
4
5use std::path::Path;
6use std::process::{Command, Stdio};
7
8use tempfile::TempDir;
9use thiserror::Error;
10
11/// Errors from commit extraction helpers.
12#[derive(Debug, Error)]
13pub enum CommitExtractError {
14    /// `git rev-parse --verify` failed — ref is unknown or git is unavailable.
15    #[error("ref resolution failed for '{reference}': {stderr}")]
16    RefResolutionFailed { reference: String, stderr: String },
17
18    /// The resolved SHA was not found by `git show`.
19    #[error("commit not found: {sha}")]
20    CommitNotFound { sha: String },
21
22    /// The date string returned by `git show --format=%cI` could not be parsed.
23    #[error("could not parse commit date for {sha}: {raw:?}")]
24    CommitDateParseFailed { sha: String, raw: String },
25
26    /// `git archive` exited non-zero.
27    #[error("git archive failed: {stderr}")]
28    ArchiveFailed { stderr: String },
29
30    /// `tar` exited non-zero during extraction.
31    #[error("tar extraction failed: {stderr}")]
32    TarFailed { stderr: String },
33
34    /// Underlying I/O error (spawning process, creating tempdir, etc.).
35    #[error("I/O error: {0}")]
36    IoError(#[from] std::io::Error),
37}
38
39/// Resolves `reference` to a full 40-char SHA via `git rev-parse --verify`.
40pub fn resolve_ref_to_sha(repo_root: &Path, reference: &str) -> Result<String, CommitExtractError> {
41    let out = Command::new("git")
42        .current_dir(repo_root)
43        .args(["rev-parse", "--verify", reference])
44        .output()
45        .map_err(|e| CommitExtractError::RefResolutionFailed {
46            reference: reference.to_string(),
47            stderr: e.to_string(),
48        })?;
49
50    if !out.status.success() {
51        let stderr = String::from_utf8_lossy(&out.stderr).trim().to_string();
52        return Err(CommitExtractError::RefResolutionFailed {
53            reference: reference.to_string(),
54            stderr: truncate_stderr(&stderr, 200),
55        });
56    }
57
58    let sha = String::from_utf8_lossy(&out.stdout).trim().to_string();
59    if sha.len() != 40 || !sha.chars().all(|c| c.is_ascii_hexdigit()) {
60        return Err(CommitExtractError::RefResolutionFailed {
61            reference: reference.to_string(),
62            stderr: format!("unexpected rev-parse output: {sha:?}"),
63        });
64    }
65    Ok(sha)
66}
67
68/// Returns the commit-date of `sha` as a UTC ISO 8601 string (`YYYY-MM-DDTHH:MM:SSZ`).
69///
70/// Uses `git show -s --format=%cI` and normalises the committer timezone to UTC.
71pub fn commit_date_iso(repo_root: &Path, sha: &str) -> Result<String, CommitExtractError> {
72    let out = Command::new("git")
73        .current_dir(repo_root)
74        .args(["show", "-s", "--format=%cI", sha])
75        .output()
76        .map_err(CommitExtractError::IoError)?;
77
78    if !out.status.success() {
79        return Err(CommitExtractError::CommitNotFound {
80            sha: sha.to_string(),
81        });
82    }
83
84    let raw = String::from_utf8_lossy(&out.stdout).trim().to_string();
85    normalize_to_utc(&raw).ok_or_else(|| CommitExtractError::CommitDateParseFailed {
86        sha: sha.to_string(),
87        raw,
88    })
89}
90
91/// Extracts the tree at `sha` into a fresh [`TempDir`] via `git archive | tar`.
92///
93/// The returned `TempDir` contains the repo tree at `sha`. It is removed when
94/// the `TempDir` is dropped.
95pub fn extract_commit_tree(repo_root: &Path, sha: &str) -> Result<TempDir, CommitExtractError> {
96    let tmpdir = TempDir::new()?;
97
98    // Verify tar is available before spawning git archive so we don't spawn a
99    // process whose stdout pipe will be immediately abandoned.
100    let tar_check = Command::new("tar")
101        .arg("--version")
102        .stdout(Stdio::null())
103        .stderr(Stdio::null())
104        .status();
105    if tar_check.is_err() || tar_check.is_ok_and(|s| !s.success()) {
106        return Err(CommitExtractError::TarFailed {
107            stderr: "tar not found on PATH; install tar to use --commit".to_string(),
108        });
109    }
110
111    let mut git = Command::new("git")
112        .current_dir(repo_root)
113        .args(["archive", "--format=tar", sha])
114        .stdout(Stdio::piped())
115        .stderr(Stdio::piped())
116        .spawn()?;
117
118    let git_stdout = git.stdout.take().expect("stdout is piped");
119
120    let mut tar = Command::new("tar")
121        .arg("-xC")
122        .arg(tmpdir.path())
123        .stdin(Stdio::from(git_stdout))
124        .stderr(Stdio::piped())
125        .spawn()?;
126
127    // Drain stderr on separate threads to prevent pipe-buffer deadlocks.
128    let git_stderr_handle = git.stderr.take().expect("stderr is piped");
129    let tar_stderr_handle = tar.stderr.take().expect("stderr is piped");
130
131    let git_err_thread = std::thread::spawn(move || read_to_string(git_stderr_handle));
132    let tar_err_thread = std::thread::spawn(move || read_to_string(tar_stderr_handle));
133
134    let tar_status = tar.wait()?;
135    let git_status = git.wait()?;
136
137    let git_err = git_err_thread.join().unwrap_or_default();
138    let tar_err = tar_err_thread.join().unwrap_or_default();
139
140    if !git_status.success() {
141        return Err(CommitExtractError::ArchiveFailed {
142            stderr: truncate_stderr(&git_err, 200),
143        });
144    }
145    if !tar_status.success() {
146        return Err(CommitExtractError::TarFailed {
147            stderr: truncate_stderr(&tar_err, 200),
148        });
149    }
150
151    Ok(tmpdir)
152}
153
154// ── private helpers ──────────────────────────────────────────────────────────
155
156fn truncate_stderr(stderr: &str, max_len: usize) -> String {
157    if stderr.len() <= max_len {
158        stderr.to_string()
159    } else {
160        format!("{}...", &stderr[..max_len])
161    }
162}
163
164fn read_to_string(r: impl std::io::Read) -> String {
165    let mut buf = Vec::new();
166    let mut reader = std::io::BufReader::new(r);
167    std::io::Read::read_to_end(&mut reader, &mut buf).ok();
168    String::from_utf8_lossy(&buf).to_string()
169}
170
171/// Parses a `git %cI` date (ISO 8601 with tz offset) and normalises to UTC.
172///
173/// Input examples: `2026-04-30T14:23:01-07:00`, `2026-04-29T00:00:00Z`.
174/// Returns `None` when the string is malformed.
175fn normalize_to_utc(s: &str) -> Option<String> {
176    chrono::DateTime::parse_from_rfc3339(s.trim())
177        .ok()
178        .map(|dt| dt.to_utc().format("%Y-%m-%dT%H:%M:%SZ").to_string())
179}
180
181#[cfg(test)]
182mod tests {
183    use super::*;
184
185    #[test]
186    fn utc_passthrough() {
187        assert_eq!(
188            normalize_to_utc("2026-04-29T00:00:00Z").unwrap(),
189            "2026-04-29T00:00:00Z"
190        );
191    }
192
193    #[test]
194    fn negative_offset_shifts_forward() {
195        // -07:00 means UTC = local + 7h.
196        assert_eq!(
197            normalize_to_utc("2026-04-30T14:00:00-07:00").unwrap(),
198            "2026-04-30T21:00:00Z"
199        );
200    }
201
202    #[test]
203    fn positive_offset_shifts_back() {
204        // +05:30 IST: UTC = local - 5h30m.
205        assert_eq!(
206            normalize_to_utc("2026-04-30T05:30:00+05:30").unwrap(),
207            "2026-04-30T00:00:00Z"
208        );
209    }
210
211    #[test]
212    fn malformed_returns_none() {
213        assert!(normalize_to_utc("not-a-date").is_none());
214        assert!(normalize_to_utc("2026-04").is_none());
215    }
216
217    #[test]
218    fn positive_offset_crosses_day_boundary_backward() {
219        // +01:00 means UTC = local - 1h.
220        // 2026-05-01 00:30:00 local → 2026-04-30 23:30:00 UTC (day rolls back).
221        assert_eq!(
222            normalize_to_utc("2026-05-01T00:30:00+01:00").unwrap(),
223            "2026-04-30T23:30:00Z"
224        );
225    }
226
227    #[test]
228    fn negative_offset_crosses_day_boundary_forward() {
229        // -01:00 means UTC = local + 1h.
230        // 2026-04-30 23:30:00 local → 2026-05-01 00:30:00 UTC (day rolls forward).
231        assert_eq!(
232            normalize_to_utc("2026-04-30T23:30:00-01:00").unwrap(),
233            "2026-05-01T00:30:00Z"
234        );
235    }
236
237    #[test]
238    fn commit_date_parse_failed_when_date_unparseable() {
239        // When normalize_to_utc returns None (malformed date),
240        // commit_date_iso should return CommitDateParseFailed with the raw string.
241        // This test documents the behavior by checking that normalize_to_utc's None
242        // return propagates correctly.
243        let unparseable = "not-a-valid-date";
244        assert!(
245            normalize_to_utc(unparseable).is_none(),
246            "normalize_to_utc should return None for unparseable input"
247        );
248    }
249}