duplicate_code 0.8.1

A tool for parsing directories scanning all the files within to find duplicate segments of code across files.
use std::collections::{HashMap, HashSet};

use crate::model::indexed_file::*;

use super::*;

pub type HashedFiles = Vec<HashedFile>;
pub type HashToLineNumbers = HashMap<blake3::Hash, HashSet<LineNumber>>;
pub type LineNumberToHash = HashMap<LineNumber, blake3::Hash>;

#[cfg_attr(test, derive(serde::Serialize))]
pub struct HashedFile {
    pub filename: Filename,
    pub number_of_lines: LineNumber,
    #[cfg_attr(
        test,
        serde(serialize_with = "tests::ordered_serialization::ordered_hash_to_line_numbers")
    )]
    pub hash_to_line_numbers: HashToLineNumbers,
    #[cfg_attr(
        test,
        serde(serialize_with = "tests::ordered_serialization::ordered_line_number_to_hash")
    )]
    pub line_number_to_hash: LineNumberToHash,
}

impl HashedFile {
    pub fn new(filename: &str, indexed_file: &IndexedFile) -> Self {
        let mut hash_to_line_numbers = HashMap::new();
        let mut line_number_to_hash = HashMap::new();

        for (line_number, line) in indexed_file.line_number_to_line.clone() {
            let hash = crate::hashing_utilities::get_blake3_hash(&line);
            let line_numbers = hash_to_line_numbers
                .entry(hash)
                .or_insert_with(HashSet::new);
            line_numbers.insert(line_number);
            line_number_to_hash.insert(line_number, hash);
        }

        HashedFile {
            filename: filename.to_string(),
            number_of_lines: indexed_file.number_of_lines,
            hash_to_line_numbers,
            line_number_to_hash,
        }
    }
}

pub fn to_hashed_files(indexed_files: &IndexedFiles) -> HashedFiles {
    let mut hashed_files = vec![];

    for (filename, indexed_file) in indexed_files {
        hashed_files.push(HashedFile::new(filename, indexed_file));
    }

    hashed_files
}

#[cfg(test)]
mod tests;