hashdeep_compare/
partition.rs

1use crate::common;
2use crate::log_entry::LogEntry;
3use crate::partitioner;
4
5
6#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Default)]
7pub struct PartitionLogSuccess
8{
9    /// Printable warning lines about the first hashdeep log file, if any were emitted
10    pub file1_warning_lines: Option<Vec<String>>,
11    /// Printable warning lines about the second hashdeep log file, if any were emitted
12    pub file2_warning_lines: Option<Vec<String>>,
13    /// Printable statistics about the partitioning results
14    pub stats_string: String,
15}
16
17/// Partitions entries from two hashdeep logs by content and name matches.
18///
19/// hashdeep logs are loaded from filename1 and filename2, and output groups
20/// are based on the output_filename_base path prefix.
21///
22/// Entries in the loaded logs will be grouped in this order:
23///
24/// 1. full match
25///     1. 1 in each file: no change between logs
26///     2. anomalies (invalid file)
27/// 2. only name match
28///     1. 1 in each file: file content changed between logs
29///     2. anomalies (invalid file)
30/// 3. only content match
31///     1. 1 in each file: file moved/renamed between logs
32///     2. match groups (unknown cause)
33/// 4. no match (listed by origin)
34///
35/// Each log entry is guaranteed to be represented in exactly one group.
36///
37/// On success, returns a statistics string about the successful operation,
38/// plus warning strings if any were emitted while loading the hashdeep logs.
39///
40/// # Errors
41///
42/// Any error emitted while reading or writing the files will be returned.
43///
44/// An integrity check is run on the partitioning results after calculation:
45///  an error will be emitted if this fails (this is extremely unlikely).
46///
47pub fn partition_log(filename1: &str, filename2: &str, output_filename_base: &str) -> Result<PartitionLogSuccess, Box<dyn std::error::Error>> {
48
49    let log_file1 = common::read_log_entries_from_file::<Vec<LogEntry>>(filename1)?;
50    let log_file2 = common::read_log_entries_from_file::<Vec<LogEntry>>(filename2)?;
51
52    let from_file1: Vec<&LogEntry> = log_file1.entries.iter().collect::<Vec<&LogEntry>>();
53    let from_file2: Vec<&LogEntry> = log_file2.entries.iter().collect::<Vec<&LogEntry>>();
54
55    let mp = partitioner::match_partition(&from_file1, &from_file2)?;
56
57    common::write_match_pairs_to_file(&mp.full_match_pairs, format!("{output_filename_base}_full_match_pairs").as_str())?;
58    common::write_single_file_match_groups_to_file(&mp.full_match_groups_file1, format!("{output_filename_base}_full_match_groups_file1_only").as_str())?;
59    common::write_single_file_match_groups_to_file(&mp.full_match_groups_file2, format!("{output_filename_base}_full_match_groups_file2_only").as_str())?;
60    common::write_match_groups_to_file(&mp.full_match_groups, format!("{output_filename_base}_full_match_groups_file1_and_file2").as_str())?;
61    common::write_match_pairs_to_file(&mp.name_match_pairs, format!("{output_filename_base}_name_match_pairs").as_str())?;
62    common::write_single_file_match_groups_to_file(&mp.name_match_groups_file1, format!("{output_filename_base}_name_match_groups_file1_only").as_str())?;
63    common::write_single_file_match_groups_to_file(&mp.name_match_groups_file2, format!("{output_filename_base}_name_match_groups_file2_only").as_str())?;
64    common::write_match_groups_to_file(&mp.name_match_groups, format!("{output_filename_base}_name_match_groups_file1_and_file2").as_str())?;
65    common::write_match_pairs_to_file(&mp.hashes_match_pairs, format!("{output_filename_base}_hashes_match_pairs").as_str())?;
66    common::write_single_file_match_groups_to_file(&mp.hashes_match_groups_file1, format!("{output_filename_base}_hashes_match_groups_file1_only").as_str())?;
67    common::write_single_file_match_groups_to_file(&mp.hashes_match_groups_file2, format!("{output_filename_base}_hashes_match_groups_file2_only").as_str())?;
68    common::write_match_groups_to_file(&mp.hashes_match_groups, format!("{output_filename_base}_hashes_match_groups_file1_and_file2").as_str())?;
69    common::write_log_entries_to_file(&mp.no_match_file1, format!("{output_filename_base}_no_match_entries_file1").as_str())?;
70    common::write_log_entries_to_file(&mp.no_match_file2, format!("{output_filename_base}_no_match_entries_file2").as_str())?;
71
72
73    let mut stats_string = String::new();
74    stats_string.push_str("log partition statistics:\n");
75    stats_string.push_str("   (note: \"pairs\" have 1 entry in each file)\n");
76    stats_string.push_str(format!(" {} full match pairs\n", mp.full_match_pairs.len()).as_str());
77    stats_string.push_str(format!(" {} full match groups in file 1 only (should be 0)\n", mp.full_match_groups_file1.len()).as_str());
78    stats_string.push_str(format!(" {} full match groups in file 2 only (should be 0)\n", mp.full_match_groups_file2.len()).as_str());
79    stats_string.push_str(format!(" {} full match groups in both files (should be 0)\n", mp.full_match_groups.len()).as_str());
80    stats_string.push_str(format!(" {} name match pairs\n", mp.name_match_pairs.len()).as_str());
81    stats_string.push_str(format!(" {} name match groups in file 1 only (should be 0)\n", mp.name_match_groups_file1.len()).as_str());
82    stats_string.push_str(format!(" {} name match groups in file 2 only (should be 0)\n", mp.name_match_groups_file2.len()).as_str());
83    stats_string.push_str(format!(" {} name match groups in both files (should be 0)\n", mp.name_match_groups.len()).as_str());
84    stats_string.push_str(format!(" {} hashes match pairs\n", mp.hashes_match_pairs.len()).as_str());
85    stats_string.push_str(format!(" {} hashes match groups in file 1 only\n", mp.hashes_match_groups_file1.len()).as_str());
86    stats_string.push_str(format!(" {} hashes match groups in file 2 only\n", mp.hashes_match_groups_file2.len()).as_str());
87    stats_string.push_str(format!(" {} hashes match groups in both files\n", mp.hashes_match_groups.len()).as_str());
88    stats_string.push_str(format!(" {} entries in file 1 with no match\n", mp.no_match_file1.len()).as_str());
89    stats_string.push_str(format!(" {} entries in file 2 with no match\n", mp.no_match_file2.len()).as_str());
90
91    Ok(PartitionLogSuccess
92    {
93        file1_warning_lines: log_file1.warning_report(),
94        file2_warning_lines: log_file2.warning_report(),
95        stats_string
96    })
97}