use crate::file_hasher::FileHasher;
use std::cmp::Ordering;
use std::fs;
use std::io::{self, Read};
use std::path::{Path, PathBuf};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Classification {
OnlyInDir1,
OnlyInDir2,
InBoth,
}
pub struct FileComparer<'a> {
path1: &'a Path,
path2: &'a Path,
pub buffer_size: usize,
pub hashers: Option<(&'a FileHasher, &'a FileHasher)>,
}
impl<'a> FileComparer<'a> {
pub const DEFAULT_BUFFER_SIZE_KB: usize = 64;
pub const DEFAULT_BUFFER_SIZE: usize = Self::DEFAULT_BUFFER_SIZE_KB * 1024;
pub fn new(path1: &'a Path, path2: &'a Path) -> Self {
Self {
path1,
path2,
buffer_size: Self::DEFAULT_BUFFER_SIZE,
hashers: None,
}
}
pub fn metadata(&self) -> io::Result<(fs::Metadata, fs::Metadata)> {
let m1 = fs::metadata(self.path1)?;
let m2 = fs::metadata(self.path2)?;
Ok((m1, m2))
}
pub(crate) fn compare_contents(&self) -> io::Result<bool> {
if let Some((hasher1, hasher2)) = self.hashers {
let (hash1, hash2) = rayon::join(
|| hasher1.get_hash(self.path1),
|| hasher2.get_hash(self.path2),
);
return Ok(hash1? == hash2?);
}
let start_time = std::time::Instant::now();
let mut f1 = fs::File::open(self.path1)?;
let mut f2 = fs::File::open(self.path2)?;
if self.buffer_size == 0 {
let len1 = f1.metadata()?.len();
let len2 = f2.metadata()?.len();
if len1 != len2 {
return Ok(false);
}
if len1 == 0 {
return Ok(true);
}
let mmap1 = unsafe { memmap2::MmapOptions::new().map(&f1)? };
let mmap2 = unsafe { memmap2::MmapOptions::new().map(&f2)? };
let result = mmap1[..] == mmap2[..];
log::trace!("Compared in {:?}: {:?}", start_time.elapsed(), self.path1);
return Ok(result);
}
let mut buf1 = vec![0u8; self.buffer_size];
let mut buf2 = vec![0u8; self.buffer_size];
loop {
let (n1, n2) = rayon::join(|| f1.read(&mut buf1), || f2.read(&mut buf2));
let n1 = n1?;
let n2 = n2?;
if n1 != n2 || buf1[..n1] != buf2[..n2] {
log::trace!("Compared in {:?}: {:?}", start_time.elapsed(), self.path1);
return Ok(false);
}
if n1 == 0 {
log::trace!("Compared in {:?}: {:?}", start_time.elapsed(), self.path1);
return Ok(true);
}
}
}
}
#[derive(Debug, Clone)]
pub struct FileComparisonResult {
pub relative_path: PathBuf,
pub classification: Classification,
pub modified_time_comparison: Option<Ordering>,
pub size_comparison: Option<Ordering>,
pub is_content_same: Option<bool>,
}
impl FileComparisonResult {
pub fn new(relative_path: PathBuf, classification: Classification) -> Self {
Self {
relative_path,
classification,
modified_time_comparison: None,
size_comparison: None,
is_content_same: None,
}
}
pub fn update(
&mut self,
comparer: &FileComparer,
should_compare_content: bool,
) -> anyhow::Result<()> {
let (m1, m2) = comparer.metadata()?;
let t1 = m1.modified()?;
let t2 = m2.modified()?;
self.modified_time_comparison = Some(t1.cmp(&t2));
let s1 = m1.len();
let s2 = m2.len();
self.size_comparison = Some(s1.cmp(&s2));
if should_compare_content && s1 == s2 {
self.is_content_same = Some(comparer.compare_contents()?);
}
Ok(())
}
pub fn is_identical(&self) -> bool {
self.classification == Classification::InBoth
&& self.modified_time_comparison == Some(Ordering::Equal)
&& self.size_comparison == Some(Ordering::Equal)
&& self.is_content_same != Some(false)
}
pub fn to_symbol_string(&self) -> String {
String::from_iter([
match self.classification {
Classification::OnlyInDir1 => '>',
Classification::OnlyInDir2 => '<',
Classification::InBoth => '=',
},
match self.modified_time_comparison {
None => ' ',
Some(Ordering::Greater) => '>',
Some(Ordering::Less) => '<',
Some(Ordering::Equal) => '=',
},
match self.size_comparison {
None => ' ',
Some(Ordering::Greater) => '>',
Some(Ordering::Less) => '<',
Some(Ordering::Equal) => {
if self.is_content_same == Some(false) {
'!'
} else {
'='
}
}
},
])
}
pub fn to_string(&self, dir1_name: &str, dir2_name: &str) -> String {
let mut parts = Vec::new();
match self.classification {
Classification::OnlyInDir1 => parts.push(format!("Only in {}", dir1_name)),
Classification::OnlyInDir2 => parts.push(format!("Only in {}", dir2_name)),
Classification::InBoth => {}
}
let mut has_equals = false;
match self.modified_time_comparison {
Some(Ordering::Greater) => parts.push(format!("{} is newer", dir1_name)),
Some(Ordering::Less) => parts.push(format!("{} is newer", dir2_name)),
Some(Ordering::Equal) => has_equals = true,
None => {}
}
match self.size_comparison {
Some(Ordering::Greater) => parts.push(format!("Size of {} is larger", dir1_name)),
Some(Ordering::Less) => parts.push(format!("Size of {} is larger", dir2_name)),
Some(Ordering::Equal) => has_equals = true,
None => {}
}
match self.is_content_same {
Some(false) => parts.push("Contents differ".to_string()),
Some(true) => has_equals = true,
None => {}
}
if parts.is_empty() {
if !has_equals {
return "Unknown".to_string();
}
return "Identical".to_string();
}
parts.join(", ")
}
}
#[cfg(test)]
mod tests {
use super::*;
fn check_compare(content1: &[u8], content2: &[u8], expected: bool) -> io::Result<()> {
let dir1 = tempfile::tempdir()?;
let dir2 = tempfile::tempdir()?;
let f1_path = dir1.path().join("file");
let f2_path = dir2.path().join("file");
fs::write(&f1_path, content1)?;
fs::write(&f2_path, content2)?;
let mut comparer = FileComparer::new(&f1_path, &f2_path);
comparer.buffer_size = 8192;
assert_eq!(comparer.compare_contents()?, expected);
comparer.buffer_size = 0;
assert_eq!(comparer.compare_contents()?, expected);
let hasher1 = FileHasher::new(dir1.path().to_path_buf());
let hasher2 = FileHasher::new(dir2.path().to_path_buf());
comparer.hashers = Some((&hasher1, &hasher2));
assert_eq!(comparer.compare_contents()?, expected);
Ok(())
}
#[test]
fn test_compare_contents_identical() -> io::Result<()> {
check_compare(b"hello world", b"hello world", true)
}
#[test]
fn test_compare_contents_different() -> io::Result<()> {
check_compare(b"hello world", b"hello rust", false)
}
#[test]
fn test_compare_contents_different_size() -> io::Result<()> {
check_compare(b"hello world", b"hello", false)
}
#[test]
fn test_compare_contents_empty_files() -> io::Result<()> {
check_compare(b"", b"", true)
}
#[test]
fn test_comparison_result_empty() {
let result = FileComparisonResult::new(PathBuf::from("test.txt"), Classification::InBoth);
assert!(!result.is_identical());
assert_eq!(result.to_string("dir1", "dir2"), "Unknown");
assert_eq!(result.to_symbol_string(), "= ");
}
#[test]
fn test_comparison_result_contents_skipped() {
let mut result =
FileComparisonResult::new(PathBuf::from("test.txt"), Classification::InBoth);
result.modified_time_comparison = Some(Ordering::Equal);
result.size_comparison = Some(Ordering::Equal);
assert!(result.is_identical());
assert_eq!(result.to_string("dir1", "dir2"), "Identical");
assert_eq!(result.to_symbol_string(), "===");
}
}