1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
#![crate_name = "havocompare"]
//! # Comparing folders and files by rules
//! Havocompare allows to compare folders (or to be more exact: the files inside the folders) following user definable rules.
//! A self contained html report is generated. To use it without the CLI, the main method is: [`compare_folders`].
//!
#![warn(missing_docs)]
#![warn(unused_qualifications)]
#![deny(deprecated)]
#![deny(clippy::unwrap_used)]
#![deny(clippy::expect_used)]

/// comparison module for csv comparison
pub mod csv;

pub use csv::CSVCompareConfig;
use std::borrow::Cow;
mod hash;
pub use hash::HashConfig;
mod html;
mod image;
pub use crate::image::ImageCompareConfig;
mod pdf;
mod report;

pub use crate::html::HTMLCompareConfig;
use crate::report::FileCompareResult;
use schemars::schema_for;
use schemars_derive::JsonSchema;
use serde::{Deserialize, Serialize};
use std::fs::File;
use std::io::{BufReader, Read};
use std::path::{Path, PathBuf};
use thiserror::Error;
use tracing::{debug, error, info, span};
use vg_errortools::{fat_io_wrap_std, FatIOError};

#[derive(Error, Debug)]
/// Top-Level Error class for all errors that can happen during havocompare-running
pub enum Error {
    /// Pattern used for globbing was invalid
    #[error("Failed to evaluate globbing pattern! {0}")]
    IllegalGlobbingPattern(#[from] glob::PatternError),
    /// Regex pattern requested could not be compiled
    #[error("Failed to compile regex! {0}")]
    RegexCompilationError(#[from] regex::Error),
    /// An error occurred in the csv rule checker
    #[error("CSV module error")]
    CSVModuleError(#[from] csv::Error),
    /// An error occurred in the image rule checker
    #[error("CSV module error")]
    ImageModuleError(#[from] image::Error),

    /// An error occurred in the reporting module
    #[error("Error occurred during report creation {0}")]
    ReportingError(#[from] report::Error),
    /// An error occurred during reading yaml
    #[error("Serde error, loading a yaml: {0}")]
    SerdeYamlFail(#[from] serde_yaml::Error),
    /// An error occurred during writing json
    #[error("Serde error, writing json: {0}")]
    SerdeJsonFail(#[from] serde_json::Error),
    /// A problem happened while accessing a file
    #[error("File access failed {0}")]
    FileAccessError(#[from] FatIOError),

    /// could not extract filename from path
    #[error("File path parsing failed")]
    FilePathParsingFails(String),

    /// Different number of files matched pattern in actual and nominal
    #[error("Different number of files matched pattern in actual {0} and nominal {1}")]
    DifferentNumberOfFiles(usize, usize),
}

#[derive(Debug, Deserialize, Serialize, JsonSchema)]
#[allow(clippy::upper_case_acronyms)]
/// Representing the comparison mode
pub enum ComparisonMode {
    /// smart CSV compare
    CSV(CSVCompareConfig),
    /// thresholds comparison
    Image(ImageCompareConfig),
    /// plain text compare
    PlainText(HTMLCompareConfig),
    /// Compare using file hashes
    Hash(HashConfig),
    /// PDF text compare
    PDFText(HTMLCompareConfig),
}

fn get_file_name(path: &Path) -> Option<Cow<str>> {
    path.file_name().map(|f| f.to_string_lossy())
}

#[derive(Debug, Deserialize, Serialize, JsonSchema)]
/// Represents a whole configuration file consisting of several comparison rules
pub struct ConfigurationFile {
    /// A list of all rules to be checked on run
    pub rules: Vec<Rule>,
}

impl ConfigurationFile {
    /// creates a [`ConfigurationFile`] file struct from anything implementing `Read`
    pub fn from_reader(reader: impl Read) -> Result<ConfigurationFile, Error> {
        let config: ConfigurationFile = serde_yaml::from_reader(reader)?;
        Ok(config)
    }

    /// creates a [`ConfigurationFile`] from anything path-convertible
    pub fn from_file(file: impl AsRef<Path>) -> Result<ConfigurationFile, Error> {
        let config_reader = fat_io_wrap_std(file, &File::open)?;
        Self::from_reader(BufReader::new(config_reader))
    }
}

#[derive(Debug, Deserialize, Serialize, JsonSchema)]
/// Representing a single comparison rule
pub struct Rule {
    /// The name of the rule - will be displayed in logs
    pub name: String,
    /// A list of glob-patterns to include
    pub pattern_include: Vec<String>,
    /// A list of glob-patterns to exclude - optional
    pub pattern_exclude: Option<Vec<String>>,
    /// How these files shall be compared
    #[serde(flatten)]
    pub file_type: ComparisonMode,
}

fn glob_files(
    path: impl AsRef<Path>,
    patterns: &[impl AsRef<str>],
) -> Result<Vec<PathBuf>, glob::PatternError> {
    let mut files = Vec::new();
    for pattern in patterns {
        let path_prefix = path.as_ref().join(pattern.as_ref());
        let path_pattern = path_prefix.to_string_lossy();
        debug!("Globbing: {}", path_pattern);
        files.extend(glob::glob(path_pattern.as_ref())?.filter_map(|p| p.ok()));
    }
    Ok(files)
}

fn filter_exclude(paths: Vec<PathBuf>, excludes: Vec<PathBuf>) -> Vec<PathBuf> {
    debug!(
        "Filtering paths {:#?} with exclusion list {:#?}",
        &paths, &excludes
    );
    paths
        .into_iter()
        .filter_map(|p| if excludes.contains(&p) { None } else { Some(p) })
        .collect()
}

fn process_file(
    nominal: impl AsRef<Path>,
    actual: impl AsRef<Path>,
    rule: &Rule,
) -> Result<FileCompareResult, Box<dyn std::error::Error>> {
    let file_name_nominal = nominal.as_ref().to_string_lossy();
    let file_name_actual = actual.as_ref().to_string_lossy();
    let _file_span = span!(tracing::Level::INFO, "Processing");
    let _file_span = _file_span.enter();

    info!("File: {file_name_nominal} | {file_name_actual}");

    let compare_result: Result<FileCompareResult, Box<dyn std::error::Error>> =
        match &rule.file_type {
            ComparisonMode::CSV(conf) => {
                csv::compare_paths(nominal.as_ref(), actual.as_ref(), conf, &rule.name)
                    .map_err(|e| e.into())
            }
            ComparisonMode::Image(conf) => {
                image::compare_paths(nominal.as_ref(), actual.as_ref(), conf, &rule.name)
                    .map_err(|e| e.into())
            }
            ComparisonMode::PlainText(conf) => {
                html::compare_files(nominal.as_ref(), actual.as_ref(), conf, &rule.name)
                    .map_err(|e| e.into())
            }
            ComparisonMode::Hash(conf) => {
                hash::compare_files(nominal.as_ref(), actual.as_ref(), conf, &rule.name)
                    .map_err(|e| e.into())
            }
            ComparisonMode::PDFText(conf) => {
                pdf::compare_files(nominal.as_ref(), actual.as_ref(), conf, &rule.name)
                    .map_err(|e| e.into())
            }
        };

    if let Ok(compare_result) = &compare_result {
        if compare_result.is_error {
            error!("Files didn't match");
        } else {
            debug!("Files matched");
        }
    } else {
        error!("Problem comparing the files");
    }

    compare_result
}

fn get_files(
    path: impl AsRef<Path>,
    patterns_include: &[impl AsRef<str>],
    patterns_exclude: &[impl AsRef<str>],
) -> Result<Vec<PathBuf>, glob::PatternError> {
    let files_exclude = glob_files(path.as_ref(), patterns_exclude)?;
    let files_include: Vec<_> = glob_files(path.as_ref(), patterns_include)?;
    Ok(filter_exclude(files_include, files_exclude))
}

fn process_rule(
    nominal: impl AsRef<Path>,
    actual: impl AsRef<Path>,
    rule: &Rule,
    compare_results: &mut Vec<Result<FileCompareResult, Box<dyn std::error::Error>>>,
) -> Result<bool, Error> {
    let _file_span = span!(tracing::Level::INFO, "Rule");
    let _file_span = _file_span.enter();
    info!("Name: {}", rule.name.as_str());
    if !nominal.as_ref().is_dir() {
        error!(
            "Nominal folder {} is not a folder",
            nominal.as_ref().to_string_lossy()
        );
        return Ok(false);
    }
    if !actual.as_ref().is_dir() {
        error!(
            "Actual folder {} is not a folder",
            actual.as_ref().to_string_lossy()
        );
        return Ok(false);
    }

    let exclude_patterns = rule.pattern_exclude.as_deref().unwrap_or_default();

    let nominal_cleaned_paths =
        get_files(nominal.as_ref(), &rule.pattern_include, exclude_patterns)?;
    let actual_cleaned_paths = get_files(actual.as_ref(), &rule.pattern_include, exclude_patterns)?;

    info!(
        "Found {} files matching includes in actual, {} files in nominal",
        actual_cleaned_paths.len(),
        nominal_cleaned_paths.len()
    );
    let actual_files = actual_cleaned_paths.len();
    let nominal_files = nominal_cleaned_paths.len();

    if actual_files != nominal_files {
        return Err(Error::DifferentNumberOfFiles(actual_files, nominal_files));
    }

    let mut all_okay = true;
    nominal_cleaned_paths
        .into_iter()
        .zip(actual_cleaned_paths.into_iter())
        .for_each(|(n, a)| {
            let compare_result = process_file(n, a, rule);

            all_okay &= compare_result
                .as_ref()
                .map(|r| !r.is_error)
                .unwrap_or(false);
            compare_results.push(compare_result);
        });

    Ok(all_okay)
}

/// Use this function if you don't want this crate to load and parse a config file but provide a custom rules struct yourself
pub fn compare_folders_cfg(
    nominal: impl AsRef<Path>,
    actual: impl AsRef<Path>,
    config_struct: ConfigurationFile,
    report_path: impl AsRef<Path>,
) -> Result<bool, Error> {
    let mut rule_results: Vec<report::RuleResult> = Vec::new();

    let mut results = config_struct.rules.into_iter().map(|rule| {
        let mut compare_results: Vec<Result<FileCompareResult, Box<dyn std::error::Error>>> =
            Vec::new();
        let okay = process_rule(
            nominal.as_ref(),
            actual.as_ref(),
            &rule,
            &mut compare_results,
        );

        let rule_name = rule.name.as_str();

        let result = match okay {
            Ok(result) => result,
            Err(e) => {
                println!(
                    "Error occured during rule-processing for rule {}: {}",
                    rule_name, e
                );
                false
            }
        };
        rule_results.push(report::RuleResult {
            rule,
            compare_results: compare_results.into_iter().filter_map(|r| r.ok()).collect(),
        });

        result
    });
    let all_okay = results.all(|result| result);
    report::create(&rule_results, report_path)?;
    Ok(all_okay)
}

/// The main function for comparing folders. It will parse a config file in yaml format, create a report in report_path and compare the folders nominal and actual.
pub fn compare_folders(
    nominal: impl AsRef<Path>,
    actual: impl AsRef<Path>,
    config_file: impl AsRef<Path>,
    report_path: impl AsRef<Path>,
) -> Result<bool, Error> {
    let config = ConfigurationFile::from_file(config_file)?;
    compare_folders_cfg(nominal, actual, config, report_path)
}

/// Create the jsonschema for the current configuration file format
pub fn get_schema() -> Result<String, Error> {
    let schema = schema_for!(ConfigurationFile);
    Ok(serde_json::to_string_pretty(&schema)?)
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::image::ImageCompareConfig;
    #[test]
    fn folder_not_found_is_false() {
        let rule = Rule {
            name: "test rule".to_string(),
            file_type: ComparisonMode::Image(ImageCompareConfig { threshold: 1.0 }),
            pattern_include: vec!["*.".to_string()],
            pattern_exclude: None,
        };
        let mut result = Vec::new();
        assert!(!process_rule("NOT_EXISTING", ".", &rule, &mut result).unwrap());
        assert!(!process_rule(".", "NOT_EXISTING", &rule, &mut result).unwrap());
    }

    #[test]
    fn multiple_include_exclude_works() {
        let pattern_include = vec![
            "**/Components.csv".to_string(),
            "**/CumulatedHistogram.csv".to_string(),
        ];
        let empty = vec![""];
        let result =
            get_files("tests/csv/data/", &pattern_include, &empty).expect("could not glob");
        assert_eq!(result.len(), 2);
        let excludes = vec!["**/Components.csv".to_string()];
        let result =
            get_files("tests/csv/data/", &pattern_include, &excludes).expect("could not glob");
        assert_eq!(result.len(), 1);
        let excludes = vec![
            "**/Components.csv".to_string(),
            "**/CumulatedHistogram.csv".to_string(),
        ];
        let result =
            get_files("tests/csv/data/", &pattern_include, &excludes).expect("could not glob");
        assert!(result.is_empty());
    }
}