duplicate_code 0.8.1

A tool for parsing directories scanning all the files within to find duplicate segments of code across files.
use std::collections::{BTreeMap, HashMap};

use regex::Regex;

use crate::model::raw_file::*;

use super::*;

pub type LineNumberToLine = HashMap<LineNumber, Line>;
pub type IndexedFiles = BTreeMap<Filename, IndexedFile>;

#[cfg_attr(test, derive(serde::Serialize))]
pub struct IndexedFile {
    pub number_of_lines: LineNumber,
    #[cfg_attr(
        test,
        serde(serialize_with = "tests::ordered_serialization::ordered_line_number_to_line")
    )]
    pub line_number_to_line: LineNumberToLine,
}

impl IndexedFile {
    pub fn new(ignore_line_regexes: &[Regex], raw_file: RawFile) -> Self {
        let mut line_number_to_line = HashMap::new();
        let number_of_lines = raw_file.lines.len();
        let mut line_number = 1;

        for line in raw_file.lines {
            let trimmed_line = line.trim().to_string();

            if crate::regex_utilities::does_not_match_any(&trimmed_line, ignore_line_regexes) {
                line_number_to_line.insert(line_number, trimmed_line);
            }

            line_number += 1;
        }

        IndexedFile {
            number_of_lines,
            line_number_to_line,
        }
    }
}

pub fn to_indexed_files(ignore_line_regexes: Vec<String>, raw_files: RawFiles) -> IndexedFiles {
    let mut indexed_files = BTreeMap::new();
    let compiled_ignore_line_regexes = crate::regex_utilities::get_regexes(ignore_line_regexes);

    for raw_file in raw_files {
        let filename = raw_file.filename.clone();
        let indexed_file = IndexedFile::new(&compiled_ignore_line_regexes, raw_file);

        if !indexed_file.line_number_to_line.is_empty() {
            indexed_files.insert(filename, indexed_file);
        }
    }

    indexed_files
}

#[cfg(test)]
mod tests;