deflake-rs 0.1.0

cargo-deflake is a command that detects flaky tests based on what tests fail and what code has changed
use std::{collections::HashMap, env::current_dir, process::ExitStatus};

use duct::cmd;
use serde::{Deserialize, Serialize};
use timed::timed;

use crate::test_interface::{TestCase, Track};

pub struct Cov {}

impl Cov {
    //#[timed]
    pub fn generate_profdata(test: &TestCase) {
        let profraw = test.profraw_filename();
        let profdata = test.profdata_filename();
        let res = cmd!("cargo", "profdata", "--", "merge", "--sparse", profraw, "-o", profdata)
            .dir(
                current_dir()
                    .unwrap()
                    .join(test.binary.workspace.clone().unwrap_or(".".to_string())),
            )
            //.unchecked()
            .stdout_capture()
            .stderr_capture()
            .unchecked()
            .run()
            .unwrap();
        if !res.status.success() {
            dbg!(String::from_utf8(res.stderr));

            dbg!(String::from_utf8(res.stdout));
        }
    }

    // Convert profdata to json format
    //#[timed]
    pub fn parse_cov(test: &TestCase) -> JsonCovRoot {
        let result = cmd!(
            "cargo",
            "cov",
            "--",
            "export",
            "-Xdemangler=rustfilt",
            test.binary.clone().path.clone(),
            format!(
                "-instr-profile={}",
                //test.binary.workspace.clone().unwrap_or(".".to_string()),
                test.profdata_filename()
            ),
            "-format=text",
            "-ignore-filename-regex='/.cargo/registry'"
        )
        .dir(
            current_dir()
                .unwrap()
                .join(test.binary.workspace.clone().unwrap_or(".".to_string())),
        )
        .read()
        .unwrap();

        let cov: JsonCovRoot = serde_json::from_str(&result).unwrap();

        //let code_dir = std::env::current_dir().unwrap();
        //let code_dir = code_dir.to_str().unwrap();

        //for f in &cov.data.first().unwrap().files {
        //if f.filename.starts_with(code_dir) {
        ////println!("{}", f.filename);
        //}
        //}

        cov
    }

    // Parse coverage data for files in a specific folder
    //#[timed]
    pub fn get_regions(cov: JsonCovRoot, file_prefix: &str) -> HashMap<String, Vec<Region>> {
        let code_dir = std::env::current_dir().unwrap();
        let code_dir = code_dir.to_str().unwrap();
        //let code_dir = "/Users/ben/Documents/code/dissertation/flaky_example";

        let mut files = HashMap::new();

        for file in &cov.data.first().unwrap().files {
            if file.filename.starts_with(code_dir) {
                // TODO: improve. assumes end segment always after start segment
                let mut regions = vec![];
                let mut current_segment = None;
                for s in &file.segments {
                    if s.is_region_entry() {
                        if current_segment.is_some() {
                            regions.push(Region::from_tuple(current_segment.unwrap()));
                        }
                        current_segment = Some((s.line(), s.col(), 0, 0, s.hits()));
                    } else {
                        if let Some(segment) = current_segment.as_mut() {
                            segment.2 = s.line();
                            segment.3 = s.col();
                        }
                    }
                }
                regions.push(Region::from_tuple(current_segment.unwrap()));

                //dbg!(&file.segments);
                //dbg!(&regions);

                files.insert(
                    file.filename
                        .strip_prefix(&file_prefix)
                        .unwrap()
                        .to_string(),
                    regions,
                );
            }
        }

        files
    }

    //#[timed]
    pub fn has_hits(regions: &Vec<Region>, track: &Vec<Track>) -> bool {
        // really bad but done for logging
        //let mut hits = 0;
        //track.into_iter().for_each(|line| {
        //let has_hit = regions
        //.into_iter()
        //.any(|region| region.includes(line) && region.hits > 0);

        //if has_hit {
        ////println!("Test hit: {:?}", line);
        //hits += 1;
        //}
        //});

        //return hits > 0;

        track.into_iter().any(|line| {
            let has_hit = regions
                .into_iter()
                .any(|region| region.includes(line) && region.hits > 0);
            has_hit
        })
    }
}

#[derive(Debug)]
pub struct Region {
    line_start: u64,
    line_end: u64,
    col_start: u64,
    col_end: u64,
    hits: u64,
}
impl Region {
    fn from_tuple(t: (u64, u64, u64, u64, u64)) -> Region {
        Region {
            line_start: t.0,
            line_end: t.2,
            col_start: t.1,
            col_end: t.3,
            hits: t.4,
        }
    }

    // TODO: clean up
    fn includes(&self, track: &Track) -> bool {
        match track {
            Track::Line(line, _) => {
                self.line_start <= *line as u64 && self.line_end >= *line as u64
            }
            Track::Span(ls, le, cs, ce) => {
                (self.line_start < *ls as u64 && self.line_end > *le as u64)
                    || (self.line_start == *ls as u64
                        && self.line_end == *le as u64
                        && self.col_start <= *cs as u64
                        && self.col_end >= *ce as u64)
            }
        }
    }
}

// TODO: parse json coverage into structured data
#[derive(Serialize, Deserialize, Debug)]
pub struct JsonCovRoot {
    data: Vec<JsonCovData>,
    #[serde(alias = "type")]
    type_name: String,
    version: String,
}

#[derive(Serialize, Deserialize, Debug)]
struct JsonCovData {
    files: Vec<JsonCovFile>,
    functions: Vec<JsonCovFunction>,
    totals: JsonCovTotal,
}

#[derive(Serialize, Deserialize)]
struct JsonCovFileSegment(u64, u64, u64, bool, bool, bool);
// See https://llvm.org/doxygen/structllvm_1_1coverage_1_1CoverageSegment.html
impl JsonCovFileSegment {
    fn line(&self) -> u64 {
        self.0
    }
    fn col(&self) -> u64 {
        self.1
    }
    fn hits(&self) -> u64 {
        self.2
    }

    // If it was instrumented or not
    fn has_hits(&self) -> bool {
        self.3
    }
    fn is_region_entry(&self) -> bool {
        self.4
    }
    fn is_region_gap(&self) -> bool {
        self.5
    }
}

impl std::fmt::Debug for JsonCovFileSegment {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "l: {}, col: {}, hits: {}, is tracked: {}, entry: {}, gap: {}",
            self.line(),
            self.col(),
            self.hits(),
            self.has_hits(),
            self.is_region_entry(),
            self.is_region_gap()
        )
    }
}

#[derive(Serialize, Deserialize, Debug)]
struct JsonCovFile {
    //branches: Vec<JsonCovBranch>,
    //expansions: Vec<JsonCovExpansion>,
    filename: String,
    //mcdc_records: Vec,
    segments: Vec<JsonCovFileSegment>,
    //summary: JsonCovFileSummary
}

#[derive(Serialize, Deserialize)]
struct JsonCovFunctionRegion(u64, u64, u64, u64, u64, u64, u64, u64);

#[derive(Debug)]
enum JsonCovFunctionRegionKind {
    Code,
    Expansion,
    Skipped,
    Gap,
}

// TODO: confirm correct
impl JsonCovFunctionRegion {
    fn line_start(&self) -> u64 {
        self.0
    }
    fn col_start(&self) -> u64 {
        self.1
    }
    fn line_end(&self) -> u64 {
        self.2
    }
    fn col_end(&self) -> u64 {
        self.3
    }
    fn hits(&self) -> u64 {
        self.4
    }
    fn file_id(&self) -> u64 {
        self.5
    }
    fn expanded_file_id(&self) -> u64 {
        self.6
    }
    fn kind(&self) -> JsonCovFunctionRegionKind {
        match self.7 {
            0 => JsonCovFunctionRegionKind::Code,
            1 => JsonCovFunctionRegionKind::Expansion,
            2 => JsonCovFunctionRegionKind::Skipped,
            3 => JsonCovFunctionRegionKind::Gap,
            i => panic!("Unexpected kind: {}", i),
        }
    }
}

impl std::fmt::Debug for JsonCovFunctionRegion {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "span: {}:{}-{}:{}, hits: {}, kind: {}",
            self.line_start(),
            self.col_start(),
            self.line_end(),
            self.col_end(),
            self.hits(),
            format!("{:?}", self.kind()),
        )
    }
}

#[derive(Serialize, Deserialize, Debug)]
struct JsonCovFunction {
    //branches
    count: u64,
    filenames: Vec<String>,
    //mcdc_records: Vec<>
    name: String,
    regions: Vec<JsonCovFunctionRegion>,
}

#[derive(Serialize, Deserialize, Debug)]
struct JsonCovTotal {}

#[cfg(test)]
mod test {
    use std::fs;

    use super::*;

    #[test]
    fn parse_json() {
        //cargo test -- --exact cov::test::parse_json --show-output
        let json =
            fs::read_to_string("../flaky_example/flaky_coverage/mylib_test::test_flaky.json")
                .unwrap();

        let cov: JsonCovRoot = serde_json::from_str(&json).unwrap();

        let code_dir = "/Users/ben/Documents/code/dissertation/flaky_example";

        dbg!(code_dir);

        for f in &cov.data.first().unwrap().files {
            if f.filename.starts_with(code_dir) {
                //dbg!(f);

                // TODO: improve. assumes end segment always after start segment
                let mut regions = vec![];
                let mut current_segment = None;
                for s in &f.segments {
                    if s.is_region_entry() {
                        if current_segment.is_some() {
                            regions.push(current_segment.unwrap());
                        }
                        current_segment = Some((s.line(), s.col(), 0, 0, s.hits()));
                    } else {
                        if let Some(segment) = current_segment.as_mut() {
                            segment.2 = s.line();
                            segment.3 = s.col();
                        }
                    }
                }
                regions.push(current_segment.unwrap());

                dbg!(&f.segments);
                dbg!(regions);
            }
        }

        for f in &cov.data.first().unwrap().functions {
            if f.filenames.first().unwrap().starts_with(code_dir) {
                println!("function: {}", rustc_demangle::demangle(&f.name));
                //dbg!(f);
            }
        }

        dbg!(Cov::get_regions(cov, &String::new()));
    }
}