use crate::model::duplicate::{Duplicate, Duplicates};
use crate::model::hashed_file::{HashedFile, HashedFiles};
use crate::model::line_range::LineRange;
use crate::model::LineNumber;
pub fn get_all_duplicates(minimum_successive_lines: usize, files: HashedFiles) -> Duplicates {
let mut duplicates = vec![];
debug!("Processing {:?} files for duplicates.", files.len());
let number_of_files = files.len();
for file_index_1 in 0..number_of_files {
for file_index_2 in (file_index_1 + 1)..number_of_files {
duplicates.extend(get_duplicates(
minimum_successive_lines,
files.get(file_index_1).unwrap(),
files.get(file_index_2).unwrap(),
));
}
}
debug!("Found {:?} duplicates.", duplicates.len());
duplicates.sort();
duplicates
}
fn get_duplicates(
minimum_successive_lines: usize,
file_1: &HashedFile,
file_2: &HashedFile,
) -> Duplicates {
let mut duplicates = vec![];
let mut file_1_line_number = 1;
let file_1_line_number_upper_range = file_1.number_of_lines + 1;
while file_1_line_number < file_1_line_number_upper_range {
if let Some(file_1_line_number_hash) = file_1.line_number_to_hash.get(&file_1_line_number) {
if file_2
.hash_to_line_numbers
.contains_key(file_1_line_number_hash)
{
let mut largest_duplicate_size = 0;
for file_2_start in file_2
.hash_to_line_numbers
.get(file_1_line_number_hash)
.unwrap()
{
if let Some((duplicate_size, duplicate)) = check_for_duplicate(
minimum_successive_lines,
file_1,
file_1_line_number,
file_2,
*file_2_start,
) {
if duplicate_size > largest_duplicate_size {
largest_duplicate_size = duplicate_size;
}
duplicates.push(duplicate);
}
}
if largest_duplicate_size > 0 {
file_1_line_number += largest_duplicate_size - 1;
}
}
}
file_1_line_number += 1;
}
duplicates
}
fn check_for_duplicate(
minimum_successive_lines: usize,
file_1: &HashedFile,
file_1_start: LineNumber,
file_2: &HashedFile,
file_2_start: LineNumber,
) -> Option<(LineNumber, Duplicate)> {
let file_1_line_number_upper_range = file_1.number_of_lines + 1;
let file_2_line_number_upper_range = file_2.number_of_lines + 1;
let mut successive_file_1_line_number = file_1_start + 1;
let mut successive_file_2_line_number = file_2_start + 1;
'successive: while (successive_file_1_line_number < file_1_line_number_upper_range)
&& (successive_file_2_line_number < file_2_line_number_upper_range)
{
match file_1
.line_number_to_hash
.get(&successive_file_1_line_number)
{
Some(successive_file_1_line_number_hash) => {
match file_2
.line_number_to_hash
.get(&successive_file_2_line_number)
{
Some(successive_file_2_line_number_hash) => {
if successive_file_1_line_number_hash.ne(successive_file_2_line_number_hash)
{
break 'successive;
}
successive_file_1_line_number += 1;
successive_file_2_line_number += 1;
}
None => {
break 'successive;
}
}
}
None => {
break 'successive;
}
}
}
let successive_lines = successive_file_1_line_number - file_1_start;
if successive_lines >= minimum_successive_lines {
let file_1_end = successive_file_1_line_number - 1;
let file_1_line_range = LineRange {
start: file_1_start,
end: file_1_end,
};
let file_2_line_range = LineRange {
start: file_2_start,
end: successive_file_2_line_number - 1,
};
let duplicate = Duplicate {
file_1: file_1.filename.clone(),
file_1_line_range,
file_2: file_2.filename.clone(),
file_2_line_range,
};
return Some((successive_lines, duplicate));
}
None
}
#[cfg(test)]
mod tests;