sdivi_pipeline/
commit_extract.rs1use std::path::Path;
6use std::process::{Command, Stdio};
7
8use tempfile::TempDir;
9use thiserror::Error;
10
11#[derive(Debug, Error)]
13pub enum CommitExtractError {
14 #[error("ref resolution failed for '{reference}': {stderr}")]
16 RefResolutionFailed { reference: String, stderr: String },
17
18 #[error("commit not found: {sha}")]
20 CommitNotFound { sha: String },
21
22 #[error("could not parse commit date for {sha}: {raw:?}")]
24 CommitDateParseFailed { sha: String, raw: String },
25
26 #[error("git archive failed: {stderr}")]
28 ArchiveFailed { stderr: String },
29
30 #[error("tar extraction failed: {stderr}")]
32 TarFailed { stderr: String },
33
34 #[error("I/O error: {0}")]
36 IoError(#[from] std::io::Error),
37}
38
39pub fn resolve_ref_to_sha(repo_root: &Path, reference: &str) -> Result<String, CommitExtractError> {
41 let out = Command::new("git")
42 .current_dir(repo_root)
43 .args(["rev-parse", "--verify", reference])
44 .output()
45 .map_err(|e| CommitExtractError::RefResolutionFailed {
46 reference: reference.to_string(),
47 stderr: e.to_string(),
48 })?;
49
50 if !out.status.success() {
51 let stderr = String::from_utf8_lossy(&out.stderr).trim().to_string();
52 return Err(CommitExtractError::RefResolutionFailed {
53 reference: reference.to_string(),
54 stderr: truncate_stderr(&stderr, 200),
55 });
56 }
57
58 let sha = String::from_utf8_lossy(&out.stdout).trim().to_string();
59 if sha.len() != 40 || !sha.chars().all(|c| c.is_ascii_hexdigit()) {
60 return Err(CommitExtractError::RefResolutionFailed {
61 reference: reference.to_string(),
62 stderr: format!("unexpected rev-parse output: {sha:?}"),
63 });
64 }
65 Ok(sha)
66}
67
68pub fn commit_date_iso(repo_root: &Path, sha: &str) -> Result<String, CommitExtractError> {
72 let out = Command::new("git")
73 .current_dir(repo_root)
74 .args(["show", "-s", "--format=%cI", sha])
75 .output()
76 .map_err(CommitExtractError::IoError)?;
77
78 if !out.status.success() {
79 return Err(CommitExtractError::CommitNotFound {
80 sha: sha.to_string(),
81 });
82 }
83
84 let raw = String::from_utf8_lossy(&out.stdout).trim().to_string();
85 normalize_to_utc(&raw).ok_or_else(|| CommitExtractError::CommitDateParseFailed {
86 sha: sha.to_string(),
87 raw,
88 })
89}
90
91pub fn extract_commit_tree(repo_root: &Path, sha: &str) -> Result<TempDir, CommitExtractError> {
96 let tmpdir = TempDir::new()?;
97
98 let tar_check = Command::new("tar")
101 .arg("--version")
102 .stdout(Stdio::null())
103 .stderr(Stdio::null())
104 .status();
105 if tar_check.is_err() || tar_check.is_ok_and(|s| !s.success()) {
106 return Err(CommitExtractError::TarFailed {
107 stderr: "tar not found on PATH; install tar to use --commit".to_string(),
108 });
109 }
110
111 let mut git = Command::new("git")
112 .current_dir(repo_root)
113 .args(["archive", "--format=tar", sha])
114 .stdout(Stdio::piped())
115 .stderr(Stdio::piped())
116 .spawn()?;
117
118 let git_stdout = git.stdout.take().expect("stdout is piped");
119
120 let mut tar = Command::new("tar")
121 .arg("-xC")
122 .arg(tmpdir.path())
123 .stdin(Stdio::from(git_stdout))
124 .stderr(Stdio::piped())
125 .spawn()?;
126
127 let git_stderr_handle = git.stderr.take().expect("stderr is piped");
129 let tar_stderr_handle = tar.stderr.take().expect("stderr is piped");
130
131 let git_err_thread = std::thread::spawn(move || read_to_string(git_stderr_handle));
132 let tar_err_thread = std::thread::spawn(move || read_to_string(tar_stderr_handle));
133
134 let tar_status = tar.wait()?;
135 let git_status = git.wait()?;
136
137 let git_err = git_err_thread.join().unwrap_or_default();
138 let tar_err = tar_err_thread.join().unwrap_or_default();
139
140 if !git_status.success() {
141 return Err(CommitExtractError::ArchiveFailed {
142 stderr: truncate_stderr(&git_err, 200),
143 });
144 }
145 if !tar_status.success() {
146 return Err(CommitExtractError::TarFailed {
147 stderr: truncate_stderr(&tar_err, 200),
148 });
149 }
150
151 Ok(tmpdir)
152}
153
154fn truncate_stderr(stderr: &str, max_len: usize) -> String {
157 if stderr.len() <= max_len {
158 stderr.to_string()
159 } else {
160 format!("{}...", &stderr[..max_len])
161 }
162}
163
164fn read_to_string(r: impl std::io::Read) -> String {
165 let mut buf = Vec::new();
166 let mut reader = std::io::BufReader::new(r);
167 std::io::Read::read_to_end(&mut reader, &mut buf).ok();
168 String::from_utf8_lossy(&buf).to_string()
169}
170
171fn normalize_to_utc(s: &str) -> Option<String> {
176 chrono::DateTime::parse_from_rfc3339(s.trim())
177 .ok()
178 .map(|dt| dt.to_utc().format("%Y-%m-%dT%H:%M:%SZ").to_string())
179}
180
181#[cfg(test)]
182mod tests {
183 use super::*;
184
185 #[test]
186 fn utc_passthrough() {
187 assert_eq!(
188 normalize_to_utc("2026-04-29T00:00:00Z").unwrap(),
189 "2026-04-29T00:00:00Z"
190 );
191 }
192
193 #[test]
194 fn negative_offset_shifts_forward() {
195 assert_eq!(
197 normalize_to_utc("2026-04-30T14:00:00-07:00").unwrap(),
198 "2026-04-30T21:00:00Z"
199 );
200 }
201
202 #[test]
203 fn positive_offset_shifts_back() {
204 assert_eq!(
206 normalize_to_utc("2026-04-30T05:30:00+05:30").unwrap(),
207 "2026-04-30T00:00:00Z"
208 );
209 }
210
211 #[test]
212 fn malformed_returns_none() {
213 assert!(normalize_to_utc("not-a-date").is_none());
214 assert!(normalize_to_utc("2026-04").is_none());
215 }
216
217 #[test]
218 fn positive_offset_crosses_day_boundary_backward() {
219 assert_eq!(
222 normalize_to_utc("2026-05-01T00:30:00+01:00").unwrap(),
223 "2026-04-30T23:30:00Z"
224 );
225 }
226
227 #[test]
228 fn negative_offset_crosses_day_boundary_forward() {
229 assert_eq!(
232 normalize_to_utc("2026-04-30T23:30:00-01:00").unwrap(),
233 "2026-05-01T00:30:00Z"
234 );
235 }
236
237 #[test]
238 fn commit_date_parse_failed_when_date_unparseable() {
239 let unparseable = "not-a-valid-date";
244 assert!(
245 normalize_to_utc(unparseable).is_none(),
246 "normalize_to_utc should return None for unparseable input"
247 );
248 }
249}