duplicate_code 0.2.0

A tool for parsing directories scanning all the files within to find duplicate segments of code across files.
use std::collections::HashMap;

pub type LineNumber = usize;
pub type Filename = String;
pub type Lines = Vec<String>;
pub type RawFiles = Vec<RawFile>;
pub type Files = Vec<FileContent>;
pub type Duplicates = Vec<Duplicate>;

pub struct FileContent {
    pub filename: Filename,
    pub number_of_lines: usize,
    pub hash_to_line_number: HashMap<blake3::Hash, LineNumber>,
    pub line_number_to_hash: HashMap<LineNumber, blake3::Hash>,
}

impl FileContent {
    pub fn new(raw_file: RawFile) -> FileContent {
        let mut hash_to_line_number = HashMap::new();
        let mut line_number_to_hash = HashMap::new();
        let number_of_lines = raw_file.lines.len();
        let mut line_number = 1;

        for line in raw_file.lines {
            let hash = crate::hashing::get_blake3_hash(&line);

            hash_to_line_number.insert(hash, line_number);
            line_number_to_hash.insert(line_number, hash);

            line_number += 1;
        }

        FileContent {
            filename: raw_file.filename,
            number_of_lines,
            hash_to_line_number,
            line_number_to_hash,
        }
    }
}

pub fn to_file_content(raw_files: RawFiles) -> Files {
    let mut files = vec![];

    for raw_file in raw_files {
        files.push(FileContent::new(raw_file));
    }

    files
}

pub struct RawFile {
    pub filename: Filename,
    pub lines: Lines,
}

#[derive(Debug)]
pub struct LineRange {
    pub start: LineNumber,
    pub end: LineNumber,
}

#[derive(Debug)]
pub struct Duplicate {
    pub file_1: Filename,
    pub file_1_line_range: LineRange,
    pub file_2: Filename,
    pub file_2_line_range: LineRange,
}