nornir 0.4.13 - Docs.rs

//! `regression_trace` — deterministic "time-bisect" over recorded release history.
//!
//! The question: *"this is red now — when was it last green, and what changed?"*
//! Because every release is recorded in `release_lineage` (per repo: `git_sha`,
//! `gate_status`, …), the answer is a **scan + boundary-find**, not inference —
//! no AI in the core (see `plan.md`). This is the inverse of the forward-looking
//! `gate::no_regression`: that one *blocks*, this one *explains*.
//!
//! MVP target: a repo's release **gate** (`failed_test` / `failed_bench` /
//! `failed_regression` / …). Bench-metric and per-test targets, plus
//! Mímir-based suspect ranking (`dep_graph::affected_by_change` ∩ `git_heat`),
//! layer on top of the same boundary-find.

use anyhow::Result;

use super::pipeline::{ReleaseReport, gate_succeeded, query_release_history};
use crate::warehouse::dep_graph::WorkspaceGraph;
use crate::warehouse::iceberg::IcebergWarehouse;

/// One time-frame in the historized "stacktrace": the target repo's state in a
/// single recorded release, oldest-first.
#[derive(Debug, Clone, serde::Serialize)]
pub struct Frame {
    pub release_id: String,
    pub repo: String,
    pub git_sha: String,
    pub gate_status: String,
    pub good: bool,
}

/// A repo blamed for the regression: it changed SHA across the boundary and is
/// either the target itself (`dep_distance = 0`) or one of its dependencies.
#[derive(Debug, Clone, serde::Serialize)]
pub struct Suspect {
    pub repo: String,
    /// 0 = the target repo itself; N = a dependency N hops away (nearest first).
    pub dep_distance: usize,
    pub last_good_sha: Option<String>,
    pub first_bad_sha: Option<String>,
}

/// Result of a gate time-bisect for one repo.
#[derive(Debug, Clone, serde::Serialize)]
pub struct Trace {
    pub repo: String,
    /// Newest release where the gate was green (`None` ⇒ never green on record).
    pub last_good: Option<Frame>,
    /// First red release *after* `last_good` (`None` ⇒ still green — no regression).
    pub first_bad: Option<Frame>,
    /// The full ordered series for the repo (oldest-first).
    pub frames: Vec<Frame>,
    /// Distinct git SHAs in the suspect window `(last_good, first_bad]` — where
    /// the break must have been introduced (target repo, release granularity).
    pub suspect_shas: Vec<String>,
    /// Ranked culprits across the dependency graph: the target + the deps that
    /// changed SHA over the boundary, nearest first. Empty unless a graph was
    /// supplied to [`trace_gate_async`].
    pub suspects: Vec<Suspect>,
}

impl Trace {
    /// True when the gate is currently green (nothing to explain).
    pub fn is_green(&self) -> bool {
        self.first_bad.is_none()
    }
}

/// Pure boundary-find: given release reports **oldest-first** (as
/// [`query_release_history`] returns), locate where `repo`'s gate went
/// green → red and bound the suspect SHAs. Deterministic, warehouse-free,
/// so it unit-tests without I/O.
pub fn trace_gate(reports: &[ReleaseReport], repo: &str) -> Trace {
    let frames: Vec<Frame> = reports
        .iter()
        .filter_map(|r| {
            r.repos.iter().find(|x| x.repo == repo).map(|rec| Frame {
                release_id: r.release_id.to_string(),
                repo: repo.to_string(),
                git_sha: rec.git.sha.clone(),
                gate_status: rec.gate_status.clone(),
                good: gate_succeeded(&rec.gate_status),
            })
        })
        .collect();

    // Last green frame anchors the bisect; the break is somewhere after it.
    let last_good_idx = frames.iter().rposition(|f| f.good);

    let (last_good, first_bad, suspect_shas) = match last_good_idx {
        Some(i) => {
            let after = &frames[i + 1..];
            let first_bad = after.iter().find(|f| !f.good).cloned();
            // Distinct SHAs from just-after-last-good up to and including the
            // first red release — the window the regression was introduced in.
            let mut shas: Vec<String> = Vec::new();
            for f in after {
                if !shas.contains(&f.git_sha) {
                    shas.push(f.git_sha.clone());
                }
                if !f.good {
                    break;
                }
            }
            (Some(frames[i].clone()), first_bad, shas)
        }
        None => {
            // Never green on record — can't bound; every recorded SHA is suspect.
            let first_bad = frames.iter().find(|f| !f.good).cloned();
            let mut shas: Vec<String> = Vec::new();
            for f in &frames {
                if !shas.contains(&f.git_sha) {
                    shas.push(f.git_sha.clone());
                }
            }
            (None, first_bad, shas)
        }
    };

    Trace { repo: repo.to_string(), last_good, first_bad, frames, suspect_shas, suspects: Vec::new() }
}

/// Candidate repos to blame, by dependency distance: the target itself (0) plus
/// its forward dependency closure (nearest first). Reads the graph's `edges`.
fn candidates_from_graph(graph: &WorkspaceGraph, target: &str) -> Vec<(String, usize)> {
    let mut out = vec![(target.to_string(), 0usize)];
    for dep in graph.deps_transitive(target) {
        let dist = graph
            .dep_path(target, &dep)
            .map(|p| p.len().saturating_sub(1))
            .unwrap_or(usize::MAX);
        out.push((dep, dist));
    }
    out
}

/// Rank which candidate repos likely introduced the regression: those whose SHA
/// changed between the last-good and first-bad releases, nearest dependency
/// first. Pure — `candidates` is `(repo, dep_distance)` with the target at
/// distance 0 (see [`candidates_from_graph`]). A repo that didn't move across
/// the boundary can't be the culprit, so it's dropped.
pub fn rank_suspects(
    reports: &[ReleaseReport],
    trace: &Trace,
    candidates: &[(String, usize)],
) -> Vec<Suspect> {
    let (Some(lg), Some(fb)) = (&trace.last_good, &trace.first_bad) else {
        return Vec::new();
    };
    let find = |id: &str| reports.iter().find(|r| r.release_id.to_string() == id);
    let (Some(lg_r), Some(fb_r)) = (find(&lg.release_id), find(&fb.release_id)) else {
        return Vec::new();
    };
    let sha = |rep: &ReleaseReport, repo: &str| {
        rep.repos.iter().find(|x| x.repo == repo).map(|x| x.git.sha.clone())
    };

    let mut out: Vec<Suspect> = candidates
        .iter()
        .filter_map(|(repo, dist)| {
            let last_good_sha = sha(lg_r, repo);
            let first_bad_sha = sha(fb_r, repo);
            let changed = match (&last_good_sha, &first_bad_sha) {
                (Some(a), Some(b)) => a != b,
                (None, Some(_)) => true, // newly introduced in the bad release
                _ => false,
            };
            changed.then(|| Suspect {
                repo: repo.clone(),
                dep_distance: *dist,
                last_good_sha,
                first_bad_sha,
            })
        })
        .collect();
    out.sort_by(|a, b| a.dep_distance.cmp(&b.dep_distance).then(a.repo.cmp(&b.repo)));
    out
}

/// Load `workspace`'s release history from the warehouse and bisect `repo`'s
/// gate. When a dependency `graph` is supplied, also rank cross-repo suspects
/// (the target + its changed dependencies) into [`Trace::suspects`].
pub async fn trace_gate_async(
    wh: &IcebergWarehouse,
    workspace: &str,
    repo: &str,
    graph: Option<&WorkspaceGraph>,
) -> Result<Trace> {
    let reports = query_release_history(wh, workspace, None).await?;
    let mut trace = trace_gate(&reports, repo);
    if let Some(g) = graph {
        let candidates = candidates_from_graph(g, repo);
        trace.suspects = rank_suspects(&reports, &trace, &candidates);
    }
    Ok(trace)
}

#[cfg(test)]
mod tests {
    use super::*;
    use super::super::pipeline::{RepoGitState, RepoReleaseRecord};
    use uuid::Uuid;

    fn report(repo: &str, sha: &str, status: &str) -> ReleaseReport {
        ReleaseReport {
            release_id: Uuid::new_v4(),
            workspace_name: "ws".into(),
            dep_graph_snapshot_id: Uuid::nil(),
            repos: vec![RepoReleaseRecord {
                repo: repo.into(),
                build_order_idx: 0,
                git: RepoGitState { sha: sha.into(), branch: "main".into(), dirty: false },
                gate_status: status.into(),
                tests_passed: 0,
                tests_failed: 0,
                published_versions: vec![],
                tantivy_snapshot_id: None,
                dwarf_snapshot_id: None,
            }],
        }
    }

    #[test]
    fn finds_green_to_red_boundary() {
        // a1 green → b2 green → c3 red
        let reports = vec![
            report("znippy", "a1", "succeeded"),
            report("znippy", "b2", "succeeded_dry_run"),
            report("znippy", "c3", "failed_test"),
        ];
        let t = trace_gate(&reports, "znippy");
        assert!(!t.is_green());
        assert_eq!(t.last_good.as_ref().unwrap().git_sha, "b2");
        assert_eq!(t.first_bad.as_ref().unwrap().git_sha, "c3");
        assert_eq!(t.first_bad.as_ref().unwrap().gate_status, "failed_test");
        assert_eq!(t.suspect_shas, vec!["c3".to_string()]);
        assert_eq!(t.frames.len(), 3);
    }

    #[test]
    fn boundary_endpoints_define_the_bisect_range() {
        // green a1 → green b2 → red c3 : the break is in the range b2..c3.
        let reports = vec![
            report("znippy", "a1", "succeeded"),
            report("znippy", "b2", "succeeded"),
            report("znippy", "c3", "failed_regression"),
        ];
        let t = trace_gate(&reports, "znippy");
        // last_good + first_bad hand you the git range to rev-walk: b2..c3
        assert_eq!(t.last_good.as_ref().unwrap().git_sha, "b2");
        assert_eq!(t.first_bad.as_ref().unwrap().git_sha, "c3");
        assert_eq!(t.suspect_shas, vec!["c3".to_string()]);
    }

    #[test]
    fn still_green_has_no_regression() {
        let reports = vec![
            report("znippy", "a1", "succeeded"),
            report("znippy", "b2", "succeeded"),
        ];
        let t = trace_gate(&reports, "znippy");
        assert!(t.is_green());
        assert!(t.first_bad.is_none());
        assert_eq!(t.last_good.as_ref().unwrap().git_sha, "b2");
        assert!(t.suspect_shas.is_empty());
    }

    #[test]
    fn never_green_marks_all_suspect() {
        let reports = vec![
            report("znippy", "a1", "failed_test"),
            report("znippy", "b2", "failed_bench"),
        ];
        let t = trace_gate(&reports, "znippy");
        assert!(t.last_good.is_none());
        assert_eq!(t.first_bad.as_ref().unwrap().git_sha, "a1");
        assert_eq!(t.suspect_shas, vec!["a1".to_string(), "b2".to_string()]);
    }

    /// One release with several repos, used to test cross-repo suspect ranking.
    fn multi(repos: &[(&str, &str, &str)]) -> ReleaseReport {
        ReleaseReport {
            release_id: Uuid::new_v4(),
            workspace_name: "ws".into(),
            dep_graph_snapshot_id: Uuid::nil(),
            repos: repos
                .iter()
                .map(|(r, sha, status)| RepoReleaseRecord {
                    repo: (*r).into(),
                    build_order_idx: 0,
                    git: RepoGitState { sha: (*sha).into(), branch: "main".into(), dirty: false },
                    gate_status: (*status).into(),
                    tests_passed: 0,
                    tests_failed: 0,
                    published_versions: vec![],
                    tantivy_snapshot_id: None,
                    dwarf_snapshot_id: None,
                })
                .collect(),
        }
    }

    #[test]
    fn ranks_changed_repos_nearest_first() {
        // app (target) green→red; util (dep, dist 2) also changed; liba (dist 1) did not.
        let reports = vec![
            multi(&[("app", "a1", "succeeded"), ("liba", "L1", "succeeded"), ("util", "U1", "succeeded")]),
            multi(&[("app", "a2", "failed_test"), ("liba", "L1", "succeeded"), ("util", "U2", "succeeded")]),
        ];
        let t = trace_gate(&reports, "app");
        let candidates = vec![("app".into(), 0usize), ("liba".into(), 1usize), ("util".into(), 2usize)];
        let suspects = rank_suspects(&reports, &t, &candidates);

        // liba unchanged → dropped; app (dist 0) ranks before util (dist 2).
        let names: Vec<&str> = suspects.iter().map(|s| s.repo.as_str()).collect();
        assert_eq!(names, vec!["app", "util"]);
        assert_eq!(suspects[0].last_good_sha.as_deref(), Some("a1"));
        assert_eq!(suspects[0].first_bad_sha.as_deref(), Some("a2"));
        assert_eq!(suspects[1].repo, "util");
    }
}