duplicate_code 0.8.1

A tool for parsing directories scanning all the files within to find duplicate segments of code across files.
use crate::model::duplicate::{Duplicate, Duplicates};
use crate::model::hashed_file::{HashedFile, HashedFiles};
use crate::model::line_range::LineRange;
use crate::model::LineNumber;

pub fn get_all_duplicates(minimum_successive_lines: usize, files: HashedFiles) -> Duplicates {
    let mut duplicates = vec![];
    debug!("Processing {:?} files for duplicates.", files.len());

    let number_of_files = files.len();

    for file_index_1 in 0..number_of_files {
        for file_index_2 in (file_index_1 + 1)..number_of_files {
            duplicates.extend(get_duplicates(
                minimum_successive_lines,
                files.get(file_index_1).unwrap(),
                files.get(file_index_2).unwrap(),
            ));
        }
    }

    debug!("Found {:?} duplicates.", duplicates.len());
    duplicates.sort();
    duplicates
}

fn get_duplicates(
    minimum_successive_lines: usize,
    file_1: &HashedFile,
    file_2: &HashedFile,
) -> Duplicates {
    let mut duplicates = vec![];
    let mut file_1_line_number = 1;
    let file_1_line_number_upper_range = file_1.number_of_lines + 1;

    while file_1_line_number < file_1_line_number_upper_range {
        if let Some(file_1_line_number_hash) = file_1.line_number_to_hash.get(&file_1_line_number) {
            if file_2
                .hash_to_line_numbers
                .contains_key(file_1_line_number_hash)
            {
                let mut largest_duplicate_size = 0;

                for file_2_start in file_2
                    .hash_to_line_numbers
                    .get(file_1_line_number_hash)
                    .unwrap()
                {
                    if let Some((duplicate_size, duplicate)) = check_for_duplicate(
                        minimum_successive_lines,
                        file_1,
                        file_1_line_number,
                        file_2,
                        *file_2_start,
                    ) {
                        if duplicate_size > largest_duplicate_size {
                            largest_duplicate_size = duplicate_size;
                        }

                        duplicates.push(duplicate);
                    }
                }

                if largest_duplicate_size > 0 {
                    file_1_line_number += largest_duplicate_size - 1;
                }
            }
        }

        file_1_line_number += 1;
    }

    duplicates
}

fn check_for_duplicate(
    minimum_successive_lines: usize,
    file_1: &HashedFile,
    file_1_start: LineNumber,
    file_2: &HashedFile,
    file_2_start: LineNumber,
) -> Option<(LineNumber, Duplicate)> {
    let file_1_line_number_upper_range = file_1.number_of_lines + 1;
    let file_2_line_number_upper_range = file_2.number_of_lines + 1;

    let mut successive_file_1_line_number = file_1_start + 1;
    let mut successive_file_2_line_number = file_2_start + 1;

    'successive: while (successive_file_1_line_number < file_1_line_number_upper_range)
        && (successive_file_2_line_number < file_2_line_number_upper_range)
    {
        match file_1
            .line_number_to_hash
            .get(&successive_file_1_line_number)
        {
            Some(successive_file_1_line_number_hash) => {
                match file_2
                    .line_number_to_hash
                    .get(&successive_file_2_line_number)
                {
                    Some(successive_file_2_line_number_hash) => {
                        if successive_file_1_line_number_hash.ne(successive_file_2_line_number_hash)
                        {
                            break 'successive;
                        }

                        successive_file_1_line_number += 1;
                        successive_file_2_line_number += 1;
                    }
                    None => {
                        break 'successive;
                    }
                }
            }
            None => {
                break 'successive;
            }
        }
    }

    let successive_lines = successive_file_1_line_number - file_1_start;
    if successive_lines >= minimum_successive_lines {
        // We have a successive match of at least MINIMUM_SUCCESSIVE_LINES
        let file_1_end = successive_file_1_line_number - 1;
        let file_1_line_range = LineRange {
            start: file_1_start,
            end: file_1_end,
        };
        let file_2_line_range = LineRange {
            start: file_2_start,
            end: successive_file_2_line_number - 1,
        };

        let duplicate = Duplicate {
            file_1: file_1.filename.clone(),
            file_1_line_range,
            file_2: file_2.filename.clone(),
            file_2_line_range,
        };

        return Some((successive_lines, duplicate));
    }

    None
}

#[cfg(test)]
mod tests;