cuenv_ci/
diff.rs

1//! Digest Diff Tool
2//!
3//! Compares two CI runs to identify what caused cache invalidation.
4//! Shows changed files, environment variables, and upstream outputs
5//! without exposing secret values.
6
7use crate::report::{PipelineReport, TaskReport};
8use serde::{Deserialize, Serialize};
9use std::collections::{HashMap, HashSet};
10use std::fs;
11use std::path::{Path, PathBuf};
12use thiserror::Error;
13
14/// Errors for diff operations
15#[derive(Debug, Error)]
16pub enum DiffError {
17    /// Report file not found
18    #[error("Report not found: {0}")]
19    ReportNotFound(PathBuf),
20
21    /// Failed to read report
22    #[error("Failed to read report '{path}': {source}")]
23    ReadError {
24        path: PathBuf,
25        #[source]
26        source: std::io::Error,
27    },
28
29    /// Failed to parse report
30    #[error("Failed to parse report '{path}': {source}")]
31    ParseError {
32        path: PathBuf,
33        #[source]
34        source: serde_json::Error,
35    },
36
37    /// Invalid run identifier
38    #[error("Invalid run identifier: {0}")]
39    InvalidRunId(String),
40}
41
42/// Result of comparing two CI runs
43#[derive(Debug, Clone, Serialize, Deserialize)]
44pub struct DigestDiff {
45    /// Run A identifier (typically commit SHA)
46    pub run_a: String,
47    /// Run B identifier
48    pub run_b: String,
49    /// Tasks that changed between runs
50    pub task_diffs: Vec<TaskDiff>,
51    /// Summary of changes
52    pub summary: DiffSummary,
53}
54
55/// Changes for a single task
56#[derive(Debug, Clone, Serialize, Deserialize)]
57pub struct TaskDiff {
58    /// Task name
59    pub name: String,
60    /// Change type
61    pub change_type: ChangeType,
62    /// Changed input files
63    pub changed_files: Vec<String>,
64    /// Changed environment variables (names only)
65    pub changed_env_vars: Vec<String>,
66    /// Changed upstream task outputs
67    pub changed_upstream: Vec<String>,
68    /// Whether secret fingerprint changed (no values exposed)
69    pub secrets_changed: bool,
70    /// Cache key in run A (if available)
71    pub cache_key_a: Option<String>,
72    /// Cache key in run B (if available)
73    pub cache_key_b: Option<String>,
74}
75
76/// Type of change for a task
77#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
78#[serde(rename_all = "snake_case")]
79pub enum ChangeType {
80    /// Task exists in both runs with same inputs
81    Unchanged,
82    /// Task inputs changed
83    Modified,
84    /// Task only exists in run A
85    Removed,
86    /// Task only exists in run B
87    Added,
88    /// Cache key changed but reason unknown
89    CacheInvalidated,
90}
91
92/// Summary statistics for the diff
93#[derive(Debug, Clone, Default, Serialize, Deserialize)]
94pub struct DiffSummary {
95    /// Total tasks compared
96    pub total_tasks: usize,
97    /// Tasks with changes
98    pub changed_tasks: usize,
99    /// Tasks added in run B
100    pub added_tasks: usize,
101    /// Tasks removed in run B
102    pub removed_tasks: usize,
103    /// Tasks with secret changes
104    pub secret_changes: usize,
105    /// Tasks with file changes
106    pub file_changes: usize,
107    /// Tasks with env var changes
108    pub env_changes: usize,
109}
110
111/// Compare two CI runs by their report files
112///
113/// # Errors
114///
115/// Returns `DiffError` if report files cannot be loaded.
116pub fn compare_runs(run_a: &Path, run_b: &Path) -> Result<DigestDiff, DiffError> {
117    let report_a = load_report(run_a)?;
118    let report_b = load_report(run_b)?;
119    compare_reports(&report_a, &report_b)
120}
121
122/// Compare two CI runs by commit SHA
123///
124/// # Errors
125///
126/// Returns `DiffError` if reports cannot be found or compared.
127pub fn compare_by_sha(
128    sha_a: &str,
129    sha_b: &str,
130    reports_dir: &Path,
131) -> Result<DigestDiff, DiffError> {
132    let dir_a = reports_dir.join(sha_a);
133    let dir_b = reports_dir.join(sha_b);
134    let report_a = find_first_report(&dir_a)?;
135    let report_b = find_first_report(&dir_b)?;
136    compare_runs(&report_a, &report_b)
137}
138
139/// Compare two pipeline reports
140///
141/// # Errors
142///
143/// Returns `DiffError` if report comparison fails.
144pub fn compare_reports(
145    report_a: &PipelineReport,
146    report_b: &PipelineReport,
147) -> Result<DigestDiff, DiffError> {
148    let mut task_diffs = Vec::new();
149    let mut summary = DiffSummary::default();
150
151    let old_tasks: HashMap<&str, &TaskReport> = report_a
152        .tasks
153        .iter()
154        .map(|t| (t.name.as_str(), t))
155        .collect();
156    let new_tasks: HashMap<&str, &TaskReport> = report_b
157        .tasks
158        .iter()
159        .map(|t| (t.name.as_str(), t))
160        .collect();
161
162    let all_tasks: HashSet<&str> = old_tasks.keys().chain(new_tasks.keys()).copied().collect();
163    summary.total_tasks = all_tasks.len();
164
165    for name in all_tasks {
166        let old_task = old_tasks.get(name);
167        let new_task = new_tasks.get(name);
168
169        let diff = match (old_task, new_task) {
170            (Some(a), Some(b)) => compare_tasks(name, a, b),
171            (Some(_), None) => TaskDiff {
172                name: name.to_string(),
173                change_type: ChangeType::Removed,
174                changed_files: vec![],
175                changed_env_vars: vec![],
176                changed_upstream: vec![],
177                secrets_changed: false,
178                cache_key_a: old_task.and_then(|t| t.cache_key.clone()),
179                cache_key_b: None,
180            },
181            (None, Some(_)) => TaskDiff {
182                name: name.to_string(),
183                change_type: ChangeType::Added,
184                changed_files: vec![],
185                changed_env_vars: vec![],
186                changed_upstream: vec![],
187                secrets_changed: false,
188                cache_key_a: None,
189                cache_key_b: new_task.and_then(|t| t.cache_key.clone()),
190            },
191            (None, None) => unreachable!(),
192        };
193
194        match diff.change_type {
195            ChangeType::Unchanged => {}
196            ChangeType::Modified | ChangeType::CacheInvalidated => summary.changed_tasks += 1,
197            ChangeType::Added => summary.added_tasks += 1,
198            ChangeType::Removed => summary.removed_tasks += 1,
199        }
200        if diff.secrets_changed {
201            summary.secret_changes += 1;
202        }
203        if !diff.changed_files.is_empty() {
204            summary.file_changes += 1;
205        }
206        if !diff.changed_env_vars.is_empty() {
207            summary.env_changes += 1;
208        }
209
210        task_diffs.push(diff);
211    }
212
213    task_diffs.sort_by(|a, b| {
214        let order = |ct: ChangeType| match ct {
215            ChangeType::Modified => 0,
216            ChangeType::CacheInvalidated => 1,
217            ChangeType::Added => 2,
218            ChangeType::Removed => 3,
219            ChangeType::Unchanged => 4,
220        };
221        order(a.change_type).cmp(&order(b.change_type))
222    });
223
224    Ok(DigestDiff {
225        run_a: report_a.context.sha.clone(),
226        run_b: report_b.context.sha.clone(),
227        task_diffs,
228        summary,
229    })
230}
231
232fn compare_tasks(name: &str, task_a: &TaskReport, task_b: &TaskReport) -> TaskDiff {
233    let mut changed_files = Vec::new();
234
235    let inputs_a: HashSet<&str> = task_a.inputs_matched.iter().map(String::as_str).collect();
236    let inputs_b: HashSet<&str> = task_b.inputs_matched.iter().map(String::as_str).collect();
237
238    for input in inputs_a.symmetric_difference(&inputs_b) {
239        changed_files.push((*input).to_string());
240    }
241
242    let secrets_changed = task_a.cache_key != task_b.cache_key
243        && changed_files.is_empty()
244        && task_a.cache_key.is_some()
245        && task_b.cache_key.is_some();
246
247    let change_type = if task_a.cache_key == task_b.cache_key {
248        ChangeType::Unchanged
249    } else if !changed_files.is_empty() {
250        ChangeType::Modified
251    } else {
252        ChangeType::CacheInvalidated
253    };
254
255    TaskDiff {
256        name: name.to_string(),
257        change_type,
258        changed_files,
259        changed_env_vars: vec![],
260        changed_upstream: vec![],
261        secrets_changed,
262        cache_key_a: task_a.cache_key.clone(),
263        cache_key_b: task_b.cache_key.clone(),
264    }
265}
266
267fn load_report(path: &Path) -> Result<PipelineReport, DiffError> {
268    if !path.exists() {
269        return Err(DiffError::ReportNotFound(path.to_path_buf()));
270    }
271    let contents = fs::read_to_string(path).map_err(|e| DiffError::ReadError {
272        path: path.to_path_buf(),
273        source: e,
274    })?;
275    serde_json::from_str(&contents).map_err(|e| DiffError::ParseError {
276        path: path.to_path_buf(),
277        source: e,
278    })
279}
280
281fn find_first_report(dir: &Path) -> Result<PathBuf, DiffError> {
282    if !dir.exists() {
283        return Err(DiffError::ReportNotFound(dir.to_path_buf()));
284    }
285    let entries = fs::read_dir(dir).map_err(|e| DiffError::ReadError {
286        path: dir.to_path_buf(),
287        source: e,
288    })?;
289    for entry in entries.flatten() {
290        let path = entry.path();
291        if path.extension().is_some_and(|ext| ext == "json") {
292            return Ok(path);
293        }
294    }
295    Err(DiffError::ReportNotFound(dir.to_path_buf()))
296}
297
298/// Format a diff for human-readable output
299#[must_use]
300pub fn format_diff(diff: &DigestDiff) -> String {
301    use std::fmt::Write;
302
303    let mut output = String::new();
304    let _ = writeln!(
305        output,
306        "Comparing runs: {} -> {}\n",
307        &diff.run_a[..7.min(diff.run_a.len())],
308        &diff.run_b[..7.min(diff.run_b.len())]
309    );
310    output.push_str("Summary:\n");
311    let _ = writeln!(output, "  Total tasks: {}", diff.summary.total_tasks);
312    let _ = writeln!(output, "  Changed: {}", diff.summary.changed_tasks);
313    let _ = writeln!(output, "  Added: {}", diff.summary.added_tasks);
314    let _ = writeln!(output, "  Removed: {}", diff.summary.removed_tasks);
315    if diff.summary.secret_changes > 0 {
316        let _ = writeln!(output, "  Secret changes: {}", diff.summary.secret_changes);
317    }
318    output.push('\n');
319
320    for task in &diff.task_diffs {
321        if task.change_type == ChangeType::Unchanged {
322            continue;
323        }
324        let symbol = match task.change_type {
325            ChangeType::Modified => "~",
326            ChangeType::CacheInvalidated => "!",
327            ChangeType::Added => "+",
328            ChangeType::Removed => "-",
329            ChangeType::Unchanged => " ",
330        };
331        let _ = writeln!(output, "{} {}", symbol, task.name);
332        if !task.changed_files.is_empty() {
333            output.push_str("  Changed files:\n");
334            for file in &task.changed_files {
335                let _ = writeln!(output, "    - {file}");
336            }
337        }
338        if task.secrets_changed {
339            output.push_str("  Secrets: changed (values hidden)\n");
340        }
341        output.push('\n');
342    }
343    output
344}
345
346#[cfg(test)]
347mod tests {
348    use super::*;
349    use crate::report::{ContextReport, PipelineStatus, TaskStatus};
350    use chrono::Utc;
351
352    fn make_report(sha: &str, tasks: Vec<TaskReport>) -> PipelineReport {
353        PipelineReport {
354            version: "1.0".to_string(),
355            project: "test".to_string(),
356            pipeline: "test-pipeline".to_string(),
357            context: ContextReport {
358                provider: "test".to_string(),
359                event: "push".to_string(),
360                ref_name: "refs/heads/main".to_string(),
361                base_ref: None,
362                sha: sha.to_string(),
363                changed_files: vec![],
364            },
365            started_at: Utc::now(),
366            completed_at: Some(Utc::now()),
367            duration_ms: Some(1000),
368            status: PipelineStatus::Success,
369            tasks,
370        }
371    }
372
373    fn make_task(name: &str, inputs: Vec<&str>, cache_key: Option<&str>) -> TaskReport {
374        TaskReport {
375            name: name.to_string(),
376            status: TaskStatus::Success,
377            duration_ms: 100,
378            exit_code: Some(0),
379            inputs_matched: inputs.into_iter().map(String::from).collect(),
380            cache_key: cache_key.map(String::from),
381            outputs: vec![],
382        }
383    }
384
385    #[test]
386    fn test_unchanged_tasks() {
387        let report_a = make_report(
388            "abc123",
389            vec![make_task("build", vec!["src/main.rs"], Some("key1"))],
390        );
391        let report_b = make_report(
392            "def456",
393            vec![make_task("build", vec!["src/main.rs"], Some("key1"))],
394        );
395        let diff = compare_reports(&report_a, &report_b).unwrap();
396        assert_eq!(diff.task_diffs[0].change_type, ChangeType::Unchanged);
397    }
398
399    #[test]
400    fn test_modified_task() {
401        let report_a = make_report(
402            "abc123",
403            vec![make_task("build", vec!["src/main.rs"], Some("key1"))],
404        );
405        let report_b = make_report(
406            "def456",
407            vec![make_task(
408                "build",
409                vec!["src/main.rs", "src/lib.rs"],
410                Some("key2"),
411            )],
412        );
413        let diff = compare_reports(&report_a, &report_b).unwrap();
414        assert_eq!(diff.task_diffs[0].change_type, ChangeType::Modified);
415        assert!(
416            diff.task_diffs[0]
417                .changed_files
418                .contains(&"src/lib.rs".to_string())
419        );
420    }
421
422    #[test]
423    fn test_secret_change_detection() {
424        let report_a = make_report(
425            "abc123",
426            vec![make_task("deploy", vec!["config.yml"], Some("key1"))],
427        );
428        let report_b = make_report(
429            "def456",
430            vec![make_task("deploy", vec!["config.yml"], Some("key2"))],
431        );
432        let diff = compare_reports(&report_a, &report_b).unwrap();
433        assert!(diff.task_diffs[0].secrets_changed);
434    }
435}