use std::collections::HashMap;
pub type LineNumber = usize;
pub type Filename = String;
pub type Lines = Vec<String>;
pub type RawFiles = Vec<RawFile>;
pub type Files = Vec<FileContent>;
pub type Duplicates = Vec<Duplicate>;
pub struct FileContent {
pub filename: Filename,
pub number_of_lines: usize,
pub hash_to_line_number: HashMap<blake3::Hash, LineNumber>,
pub line_number_to_hash: HashMap<LineNumber, blake3::Hash>,
}
impl FileContent {
pub fn new(raw_file: RawFile) -> FileContent {
let mut hash_to_line_number = HashMap::new();
let mut line_number_to_hash = HashMap::new();
let number_of_lines = raw_file.lines.len();
let mut line_number = 1;
for line in raw_file.lines {
let hash = crate::hashing::get_blake3_hash(&line);
hash_to_line_number.insert(hash, line_number);
line_number_to_hash.insert(line_number, hash);
line_number += 1;
}
FileContent {
filename: raw_file.filename,
number_of_lines,
hash_to_line_number,
line_number_to_hash,
}
}
}
pub fn to_file_content(raw_files: RawFiles) -> Files {
let mut files = vec![];
for raw_file in raw_files {
files.push(FileContent::new(raw_file));
}
files
}
pub struct RawFile {
pub filename: Filename,
pub lines: Lines,
}
#[derive(Debug)]
pub struct LineRange {
pub start: LineNumber,
pub end: LineNumber,
}
#[derive(Debug)]
pub struct Duplicate {
pub file_1: Filename,
pub file_1_line_range: LineRange,
pub file_2: Filename,
pub file_2_line_range: LineRange,
}