1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
/*
Copyright 2022 Marek Suchánek <msuchane@redhat.com>

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

// Enable additional clippy lints by default.
#![warn(
    clippy::pedantic,
    clippy::unwrap_used,
    clippy::clone_on_ref_ptr,
    clippy::todo
)]
// Forbid unsafe code in this program.
#![forbid(unsafe_code)]

use std::convert::From;
use std::path::PathBuf;

use color_eyre::{eyre::bail, Result};
use permutator::Combination;

pub mod cli;
mod comparison;
mod load_files;
mod logging;
mod serialize;

use cli::Cli;
use comparison::{comparisons, Comparison};
use load_files::files;
pub use logging::init_log_and_errors;
use serialize::serialize;

/// Represents a loaded text file, with its path and content.
#[derive(Debug)]
pub struct File {
    pub path: PathBuf,
    pub content: String,
}

#[derive(Debug, PartialEq)]
pub struct Percentage(f64);

pub fn run(options: &Cli) -> Result<()> {
    // Check that the similarity threshold is a valid percentage between 0% and 100%.
    // The value is stored as a decimal between 0 and 1, but it's exposed to the user
    // as a value between 0 and 100.
    if options.threshold < 0.0 || options.threshold > 1.0 {
        bail!("The similarity threshold must be between 0.0 and 100.0.")
    }

    // Load all matching files from the directory.
    let files = files(options)?;

    // The comparison needs at least two files.
    if files.len() < 2 {
        bail!("Too few files that match the settings to compare in this directory.");
    }

    // Combinations by 2 pair each file with each file, so that no comparison
    // occurs more than once.
    let combinations = files.combination(2).map(|v| (v[0], v[1]));

    let comparisons = comparisons(combinations, options);

    // Only serialize if at least one serialization options is active.
    if options.csv.is_some() || options.json.is_some() {
        serialize(comparisons, options)?;
    }

    Ok(())
}

impl From<f64> for Percentage {
    /// Store percentage simply as a multiple of the float by 100.
    fn from(item: f64) -> Self {
        let percent = item * 100.0;
        Self(percent)
    }
}

impl Percentage {
    /// Round the percentage value in a way that makes sure that values above 99.9%
    /// aren't mistaken for identical duplicates (100%).
    ///
    /// We display the percentage with the accuracy of one decimal place, rounded.
    /// If the percentage is above 99.9, it might get rounded up to 100,
    /// which would suggest to the user that the files are identical,
    /// even if they aren't fully 100.0% similar.
    ///
    /// To avoid the confusion, round everything between 99.9 and 100.0 down
    /// to 99.9. Thus, 100.0% is reserved for identical files.
    fn rounded(&self) -> f64 {
        let upscaled = self.0 * 10.0;

        if 999.0 < upscaled && upscaled < 1000.0 {
            99.9
        } else {
            upscaled.round() / 10.0
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn check_percentage() {
        assert_eq!(90.0, Percentage::from(0.9).rounded());
        assert_eq!(99.9, Percentage::from(0.999).rounded());
        assert_eq!(100.0, Percentage::from(1.0).rounded());

        // This is the interesting case:
        assert_eq!(99.9, Percentage::from(0.99999999).rounded());
    }
}