repoverse 0.1.4

Multi-repo workspace tool: keep many git repos in sync and roll changes up across dependency boundaries
//! Minimal GitHub Actions workflow parser: extract a named job's shell
//! `run:` steps so they can be executed locally ("basically the same test").
//! Caveats (services/matrix/uses) are detected and surfaced, not hidden.

use anyhow::{Context, Result};
use serde::Deserialize;
use std::collections::BTreeMap;
use std::path::Path;

#[derive(Debug, Deserialize)]
struct Workflow {
    #[serde(default)]
    jobs: BTreeMap<String, Job>,
}

#[derive(Debug, Deserialize)]
struct Job {
    #[serde(default)]
    steps: Vec<Step>,
    #[serde(default)]
    services: Option<serde_yaml::Value>,
    #[serde(default)]
    strategy: Option<serde_yaml::Value>,
}

#[derive(Debug, Deserialize)]
struct Step {
    #[serde(default)]
    run: Option<String>,
    #[serde(default)]
    uses: Option<String>,
    #[serde(default, rename = "working-directory")]
    working_directory: Option<String>,
}

#[derive(Debug, Clone, Default)]
pub struct ExtractedJob {
    /// Ordered shell commands (joined script).
    pub script: String,
    /// Non-fatal reproducibility caveats to surface to the user.
    pub caveats: Vec<String>,
}

/// Extract `job` from the workflow file at `path` into a runnable script.
pub fn extract_job(path: &Path, job: &str) -> Result<Option<ExtractedJob>> {
    let text = std::fs::read_to_string(path)
        .with_context(|| format!("reading workflow {}", path.display()))?;
    let wf: Workflow =
        serde_yaml::from_str(&text).with_context(|| format!("parsing {}", path.display()))?;
    let Some(j) = wf.jobs.get(job) else {
        return Ok(None);
    };
    let mut ex = ExtractedJob::default();
    if j.services.is_some() {
        ex.caveats.push(format!(
            "job `{job}` declares services: not reproduced locally"
        ));
    }
    if j.strategy.is_some() {
        ex.caveats
            .push(format!("job `{job}` uses a matrix: running host leg only"));
    }
    let mut lines = Vec::new();
    for s in &j.steps {
        if let Some(run) = &s.run {
            if let Some(wd) = &s.working_directory {
                lines.push(format!("cd {wd}"));
            }
            lines.push(run.trim().to_string());
        } else if let Some(uses) = &s.uses {
            if !uses.starts_with("actions/checkout") && !uses.starts_with("actions/setup") {
                ex.caveats.push(format!(
                    "step `uses: {uses}` skipped (assumed local toolchain)"
                ));
            }
        }
    }
    ex.script = lines.join("\n");
    Ok(Some(ex))
}

#[cfg(test)]
mod tests {
    use super::*;
    use tempfile::tempdir;

    #[test]
    fn extracts_run_steps_and_caveats() {
        let d = tempdir().unwrap();
        let p = d.path().join("ci.yml");
        std::fs::write(
            &p,
            r#"
name: ci
jobs:
  test:
    strategy: { matrix: { os: [a, b] } }
    steps:
      - uses: actions/checkout@v4
      - uses: dtolnay/rust-toolchain@stable
      - run: cargo build
      - run: cargo test
        working-directory: cli
"#,
        )
        .unwrap();
        let ex = extract_job(&p, "test").unwrap().unwrap();
        assert!(ex.script.contains("cargo build"));
        assert!(ex.script.contains("cd cli"));
        assert!(ex.script.contains("cargo test"));
        assert!(ex.caveats.iter().any(|c| c.contains("matrix")));
        assert!(ex.caveats.iter().any(|c| c.contains("rust-toolchain")));
    }

    #[test]
    fn missing_job_is_none() {
        let d = tempdir().unwrap();
        let p = d.path().join("ci.yml");
        std::fs::write(&p, "jobs: {}\n").unwrap();
        assert!(extract_job(&p, "nope").unwrap().is_none());
    }
}