1use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::fs;
9use std::io::{self, Read};
10use std::path::{Path, PathBuf};
11use uuid::Uuid;
12
13pub type FileHash = String;
15
16#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
18pub struct FileSnapshot {
19 pub path: PathBuf,
21 pub hash: Option<FileHash>,
23 pub size: u64,
25 pub is_dir: bool,
27}
28
29#[derive(Debug, Clone, Serialize, Deserialize)]
31pub struct Baseline {
32 pub id: Uuid,
34 pub root: PathBuf,
36 pub git_head: Option<String>,
38 #[serde(default)]
39 pub git_branches: Vec<String>,
40 pub files: HashMap<PathBuf, FileSnapshot>,
42 pub captured_at: chrono::DateTime<chrono::Utc>,
44}
45
46impl Baseline {
47 pub fn capture(root: &Path) -> io::Result<Self> {
49 let mut files = HashMap::new();
50 capture_recursive(root, root, &mut files)?;
51
52 let git_head = get_git_head(root).ok();
53 let git_branches = get_git_branches(root).ok().unwrap_or_default();
54
55 Ok(Self {
56 id: Uuid::new_v4(),
57 root: root.to_path_buf(),
58 git_head,
59 git_branches,
60 files,
61 captured_at: chrono::Utc::now(),
62 })
63 }
64
65 pub fn get(&self, path: &Path) -> Option<&FileSnapshot> {
67 self.files.get(path)
68 }
69
70 pub fn file_count(&self) -> usize {
72 self.files.len()
73 }
74}
75
76fn get_git_branches(path: &Path) -> io::Result<Vec<String>> {
77 use std::process::Command;
78
79 let output = Command::new("git")
80 .current_dir(path)
81 .args(["for-each-ref", "refs/heads", "--format=%(refname:short)"])
82 .output()?;
83
84 if !output.status.success() {
85 return Err(io::Error::other("Failed to list git branches"));
86 }
87
88 let mut branches: Vec<String> = String::from_utf8_lossy(&output.stdout)
89 .lines()
90 .map(|l| l.trim().to_string())
91 .filter(|l| !l.is_empty())
92 .collect();
93 branches.sort();
94 branches.dedup();
95 Ok(branches)
96}
97
98fn capture_recursive(
99 root: &Path,
100 current: &Path,
101 files: &mut HashMap<PathBuf, FileSnapshot>,
102) -> io::Result<()> {
103 for entry in fs::read_dir(current)? {
104 let entry = entry?;
105 let path = entry.path();
106 let relative = path.strip_prefix(root).unwrap_or(&path).to_path_buf();
107
108 if relative.starts_with(".git") {
110 continue;
111 }
112
113 let metadata = entry.metadata()?;
114 let is_dir = metadata.is_dir();
115
116 let hash = if is_dir {
117 None
118 } else {
119 compute_hash(&path).ok()
120 };
121
122 files.insert(
123 relative.clone(),
124 FileSnapshot {
125 path: relative,
126 hash,
127 size: metadata.len(),
128 is_dir,
129 },
130 );
131
132 if is_dir {
133 capture_recursive(root, &path, files)?;
134 }
135 }
136
137 Ok(())
138}
139
140fn compute_hash(path: &Path) -> io::Result<FileHash> {
141 use std::collections::hash_map::DefaultHasher;
142 use std::hash::{Hash, Hasher};
143
144 let mut file = fs::File::open(path)?;
145 let mut contents = Vec::new();
146 file.read_to_end(&mut contents)?;
147
148 let mut hasher = DefaultHasher::new();
149 contents.hash(&mut hasher);
150 Ok(format!("{:016x}", hasher.finish()))
151}
152
153fn get_git_head(path: &Path) -> io::Result<String> {
154 use std::process::Command;
155
156 let output = Command::new("git")
157 .current_dir(path)
158 .args(["rev-parse", "HEAD"])
159 .output()?;
160
161 if output.status.success() {
162 Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
163 } else {
164 Err(io::Error::other("Failed to get git HEAD"))
165 }
166}
167
168#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
170#[serde(rename_all = "lowercase")]
171pub enum ChangeType {
172 Created,
174 Modified,
176 Deleted,
178}
179
180#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
182pub struct FileChange {
183 pub path: PathBuf,
185 pub change_type: ChangeType,
187 pub old_hash: Option<FileHash>,
189 pub new_hash: Option<FileHash>,
191}
192
193#[derive(Debug, Clone, Serialize, Deserialize)]
195pub struct Diff {
196 pub id: Uuid,
198 pub task_id: Option<Uuid>,
200 pub attempt_id: Option<Uuid>,
202 pub baseline_id: Uuid,
204 pub changes: Vec<FileChange>,
206 pub computed_at: chrono::DateTime<chrono::Utc>,
208}
209
210impl Diff {
211 pub fn compute(baseline: &Baseline, current_root: &Path) -> io::Result<Self> {
213 let mut current_files = HashMap::new();
214 capture_recursive(current_root, current_root, &mut current_files)?;
215
216 let mut changes = Vec::new();
217
218 for (path, old_snapshot) in &baseline.files {
220 match current_files.get(path) {
221 Some(new_snapshot) => {
222 if old_snapshot.hash != new_snapshot.hash {
223 changes.push(FileChange {
224 path: path.clone(),
225 change_type: ChangeType::Modified,
226 old_hash: old_snapshot.hash.clone(),
227 new_hash: new_snapshot.hash.clone(),
228 });
229 }
230 }
231 None => {
232 changes.push(FileChange {
233 path: path.clone(),
234 change_type: ChangeType::Deleted,
235 old_hash: old_snapshot.hash.clone(),
236 new_hash: None,
237 });
238 }
239 }
240 }
241
242 for (path, new_snapshot) in ¤t_files {
244 if !baseline.files.contains_key(path) {
245 changes.push(FileChange {
246 path: path.clone(),
247 change_type: ChangeType::Created,
248 old_hash: None,
249 new_hash: new_snapshot.hash.clone(),
250 });
251 }
252 }
253
254 changes.sort_by(|a, b| a.path.cmp(&b.path));
256
257 Ok(Self {
258 id: Uuid::new_v4(),
259 task_id: None,
260 attempt_id: None,
261 baseline_id: baseline.id,
262 changes,
263 computed_at: chrono::Utc::now(),
264 })
265 }
266
267 #[must_use]
269 pub fn for_task(mut self, task_id: Uuid) -> Self {
270 self.task_id = Some(task_id);
271 self
272 }
273
274 #[must_use]
276 pub fn for_attempt(mut self, attempt_id: Uuid) -> Self {
277 self.attempt_id = Some(attempt_id);
278 self
279 }
280
281 pub fn is_empty(&self) -> bool {
283 self.changes.is_empty()
284 }
285
286 pub fn change_count(&self) -> usize {
288 self.changes.len()
289 }
290
291 pub fn changes_of_type(&self, change_type: ChangeType) -> Vec<&FileChange> {
293 self.changes
294 .iter()
295 .filter(|c| c.change_type == change_type)
296 .collect()
297 }
298
299 pub fn modified_paths(&self) -> Vec<&Path> {
301 self.changes.iter().map(|c| c.path.as_path()).collect()
302 }
303}
304
305#[derive(Debug, Clone, Serialize, Deserialize)]
307pub struct UnifiedDiff {
308 pub path: PathBuf,
310 pub content: String,
312}
313
314pub fn unified_diff(old_path: Option<&Path>, new_path: Option<&Path>) -> io::Result<String> {
316 use std::process::Command;
317
318 let (old, new) = match (old_path, new_path) {
319 (Some(old), Some(new)) => (old.to_str().unwrap_or(""), new.to_str().unwrap_or("")),
320 (Some(old), None) => (old.to_str().unwrap_or(""), "/dev/null"),
321 (None, Some(new)) => ("/dev/null", new.to_str().unwrap_or("")),
322 (None, None) => return Ok(String::new()),
323 };
324
325 let output = Command::new("diff").args(["-u", old, new]).output()?;
326
327 Ok(String::from_utf8_lossy(&output.stdout).to_string())
329}
330
331#[cfg(test)]
332mod tests {
333 use super::*;
334 use tempfile::TempDir;
335
336 fn create_test_dir() -> TempDir {
337 let dir = TempDir::new().unwrap();
338 fs::write(dir.path().join("file1.txt"), "content1").unwrap();
339 fs::write(dir.path().join("file2.txt"), "content2").unwrap();
340 fs::create_dir(dir.path().join("subdir")).unwrap();
341 fs::write(dir.path().join("subdir/file3.txt"), "content3").unwrap();
342 dir
343 }
344
345 #[test]
346 fn capture_baseline() {
347 let dir = create_test_dir();
348 let baseline = Baseline::capture(dir.path()).unwrap();
349
350 assert!(baseline.file_count() >= 3);
351 assert!(baseline.get(Path::new("file1.txt")).is_some());
352 }
353
354 #[test]
355 fn detect_created_file() {
356 let dir = create_test_dir();
357 let baseline = Baseline::capture(dir.path()).unwrap();
358
359 fs::write(dir.path().join("new_file.txt"), "new content").unwrap();
361
362 let diff = Diff::compute(&baseline, dir.path()).unwrap();
363
364 let created: Vec<_> = diff.changes_of_type(ChangeType::Created);
365 assert_eq!(created.len(), 1);
366 assert_eq!(created[0].path, Path::new("new_file.txt"));
367 }
368
369 #[test]
370 fn detect_modified_file() {
371 let dir = create_test_dir();
372 let baseline = Baseline::capture(dir.path()).unwrap();
373
374 fs::write(dir.path().join("file1.txt"), "modified content").unwrap();
376
377 let diff = Diff::compute(&baseline, dir.path()).unwrap();
378
379 let modified: Vec<_> = diff.changes_of_type(ChangeType::Modified);
380 assert_eq!(modified.len(), 1);
381 assert_eq!(modified[0].path, Path::new("file1.txt"));
382 }
383
384 #[test]
385 fn detect_deleted_file() {
386 let dir = create_test_dir();
387 let baseline = Baseline::capture(dir.path()).unwrap();
388
389 fs::remove_file(dir.path().join("file1.txt")).unwrap();
391
392 let diff = Diff::compute(&baseline, dir.path()).unwrap();
393
394 let deleted: Vec<_> = diff.changes_of_type(ChangeType::Deleted);
395 assert_eq!(deleted.len(), 1);
396 assert_eq!(deleted[0].path, Path::new("file1.txt"));
397 }
398
399 #[test]
400 fn no_changes_empty_diff() {
401 let dir = create_test_dir();
402 let baseline = Baseline::capture(dir.path()).unwrap();
403
404 let diff = Diff::compute(&baseline, dir.path()).unwrap();
405
406 assert!(diff.is_empty());
407 }
408
409 #[test]
410 fn diff_attribution() {
411 let dir = create_test_dir();
412 let baseline = Baseline::capture(dir.path()).unwrap();
413
414 let task_id = Uuid::new_v4();
415 let attempt_id = Uuid::new_v4();
416
417 let diff = Diff::compute(&baseline, dir.path())
418 .unwrap()
419 .for_task(task_id)
420 .for_attempt(attempt_id);
421
422 assert_eq!(diff.task_id, Some(task_id));
423 assert_eq!(diff.attempt_id, Some(attempt_id));
424 }
425
426 #[test]
427 fn file_change_serialization() {
428 let change = FileChange {
429 path: PathBuf::from("test.txt"),
430 change_type: ChangeType::Modified,
431 old_hash: Some("abc".to_string()),
432 new_hash: Some("def".to_string()),
433 };
434
435 let json = serde_json::to_string(&change).unwrap();
436 let restored: FileChange = serde_json::from_str(&json).unwrap();
437
438 assert_eq!(change, restored);
439 }
440}