deflake-rs 0.1.0

cargo-deflake is a command that detects flaky tests based on what tests fail and what code has changed
use core::panic;
use std::{collections::HashMap, env::current_dir, fs, io::read_to_string, str::FromStr};

use git2::{Blob, Delta, DiffLineType, DiffOptions, Oid, Repository};

use crate::cli::Cli;

#[derive(Eq, PartialEq, Hash, Clone, Debug)]
pub enum FileChange {
    Addition(String, Oid),          // new file name
    Modification(String, Oid, Oid), // old and new file names
}

#[derive(Debug, PartialEq, Eq, Clone)]
pub enum EditType {
    Add,
    Delete,
}

#[derive(Debug, PartialEq, Eq, Clone)]
pub struct Edit {
    pub edit_type: EditType,
    pub modification: String,
    pub line: u32, // For deletions, this will be the location in the new file the line would have
                   // been
}

pub type GitChanges = HashMap<FileChange, Vec<Edit>>;
pub struct Git {
    repository: Repository,
}

impl Git {
    pub fn new(dir: &String) -> Option<Git> {
        let repo = match Repository::open(dir) {
            Ok(repo) => repo,
            Err(e) => {
                panic!("error loading git repository: {}", e);
                return None;
            }
        };
        Some(Git { repository: repo })
    }

    pub fn changes(&self, b: &String, args: &Cli) -> GitChanges {
        let repo = &self.repository;

        // Load the old (a) and new (b) commits
        //let a_id = repo.revparse_single(a.as_str()).unwrap().id();
        //let a_tree = repo.find_commit(a_id).unwrap().tree().unwrap();

        //let a_id = repo.index().unwrap().write_tree().unwrap();
        //let a_tree = repo.find_tree(a_id).unwrap();

        let b_id = repo.revparse_single(b.as_str()).unwrap().id();
        let b_tree = repo.find_commit(b_id).unwrap().tree().unwrap();

        // Set the options for the diff
        let mut diff_options = DiffOptions::new();
        // NOTE: pathspec does not support just putting "." to get the current dir, have to work out the
        // relative path
        let repo_dir = repo.workdir().unwrap();
        let work_dir = current_dir().unwrap();
        let relative_dir = work_dir
            .strip_prefix(repo_dir)
            .unwrap_or(repo_dir)
            .to_str()
            .unwrap();
        diff_options.pathspec(relative_dir);
        diff_options.include_untracked(true);
        diff_options.recurse_untracked_dirs(true);
        //diff_options.update_index(true);

        // get the diff
        let diff = repo
            .diff_tree_to_workdir_with_index(Some(&b_tree), Some(&mut diff_options))
            .unwrap();

        // Collect all the edits made to each Rust file
        let mut map: HashMap<FileChange, Vec<Edit>> = HashMap::new();
        let mut deltas: HashMap<FileChange, i128> = HashMap::new();

        // NOTE: to correctly handle the delta, this expects that the edits in each file come in
        // order top to bottom
        let _ = diff.foreach(
            &mut |_, _| true,
            None,
            None,
            Some(&mut |a, b, c| {
                let path = a.new_file().path().unwrap().to_str().unwrap();
                if !path.ends_with(".rs")
                {
                    let warn = match &args.ignore {
                        Some(ignore) => {
                            !ignore.into_iter().any(|p| path.starts_with(format!("{}/{}", relative_dir, p).as_str()))
                        },
                        None => true

                    };
                    if warn {
                        println!("NOTE: skipping file \"{}\" as not Rust code. If this file affects the program this will be undetected {}", path, a.new_file().path().unwrap().file_name().unwrap().to_string_lossy());
                    }
                    return true;
                }

                // Hack to deal with objects in working dir. Instead it should be refactored to the DiffDelta is stored in FileChange and get_file reads from disk instead of using the ODB 
                let mut dir = std::env::current_dir().unwrap();
                dir.push(&args.git_dir);
                if self.repository.find_blob(a.new_file().id()).is_err() {
                        dir.push(a.new_file().path().unwrap());
                        repo.blob(fs::read(dir).unwrap().as_slice()).unwrap();
                }

                // Get the type of change the file had
                // NOTE: only tracks files that are added or modified
                let change = match a.status() {
                    Delta::Added => Some(FileChange::Addition(
                        String::from_str(a.new_file().path().unwrap().to_str().unwrap()).unwrap(),
                        a.new_file().id(),
                    )),
                    // TODO: treat renamed and copied as a Modification or Addition???
                    Delta::Modified | Delta::Renamed | Delta::Copied  => Some(FileChange::Modification(
                        String::from_str(a.new_file().path().unwrap().to_str().unwrap()).unwrap(),
                        a.old_file().id(),
                        a.new_file().id(),
                    )),
                    status => {
                        eprintln!("unhandled change of type: {:#?}", status);
                        None
                    }
                };

                // Ignore unsupported changes
                let change = match change {
                    Some(change) => change,
                    None => return true,
                };

                // Set default for the key if doesn't exist
                map.entry(change.clone()).or_insert(Vec::new());
                let delta = deltas.entry(change.clone()).or_insert(0);

                // TODO: combine lines of edits into blocks like jgit ?
                match &c.origin_value() {
                    DiffLineType::Context => {
                        // TODO: start a new edit?
                    }

                    DiffLineType::Addition => {
                        let new = &c.new_lineno().unwrap();
                        let edit = Edit {
                            edit_type: EditType::Add,
                            line: *new,
                            modification: String::from_utf8(c.content().to_vec()).unwrap(),
                        };
                        map.get_mut(&change).unwrap().push(edit);

                        *delta += 1;
                    }
                    DiffLineType::Deletion => {
                        let old = &c.old_lineno().unwrap();

                        //println!("old: {}, new: {}", old, *old as i128+delta.clone());

                        let edit = Edit {
                            edit_type: EditType::Delete,
                            // Calculate where the deleted line _would be_ in the new file, based
                            // on the lines added and deleted before it
                            line: (*old as i128+delta.clone()) as u32,
                            modification: String::from_utf8(c.content().to_vec()).unwrap(),
                        };
                        map.get_mut(&change).unwrap().push(edit);

                        *delta -= 1;
                    }

                    _ => {
                        eprintln!("UNEXPECTED ORIGIN: '{}'", &c.origin())
                    }
                }

                true
            }),
        );

        return map;
    }

    pub fn get_file(&self, oid: Oid) -> String {
        //let file =
        //String::from_utf8(self.repository.find_blob(oid).unwrap().content().to_vec()).unwrap();

        //dbg!(oid);
        let file = String::from_utf8(
            self.repository
                .find_object(oid, Some(git2::ObjectType::Blob))
                .unwrap()
                .into_blob()
                .unwrap()
                .content()
                .to_vec(),
        )
        .unwrap();

        return file;
    }
}

#[cfg(test)]
mod test {

    #[test]
    fn test1() {
        //Git::diff(
        //&"35f43c".to_string(),
        //&"eaf59e".to_string(),
        //&"..".to_string(),
        //);
    }
}