use anyhow::{Context, Result};
use rayon::prelude::*;
use std::collections::HashMap;
use std::fmt;
use std::path::{Path, PathBuf};
use std::time::SystemTime;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CompareBy {
Content,
Paranoid,
SizeTime,
Name,
}
#[derive(Debug)]
pub enum FolderDiffEntry {
Identical {
path: PathBuf,
size: u64,
},
Modified {
path: PathBuf,
left_size: u64,
right_size: u64,
},
Added {
path: PathBuf,
size: u64,
},
Removed {
path: PathBuf,
size: u64,
},
Moved {
from: PathBuf,
to: PathBuf,
size: u64,
},
Error {
path: PathBuf,
side: Side,
error: String,
},
}
#[derive(Debug, Clone, Copy)]
pub enum Side {
Left,
Right,
}
impl fmt::Display for Side {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Side::Left => write!(f, "left"),
Side::Right => write!(f, "right"),
}
}
}
#[derive(Debug, Default)]
pub struct FolderDiffSummary {
pub identical: usize,
pub modified: usize,
pub added: usize,
pub removed: usize,
pub moved: usize,
pub errors: usize,
pub left_bytes: u64,
pub right_bytes: u64,
}
pub struct FolderDiffResult {
pub entries: Vec<FolderDiffEntry>,
}
impl FolderDiffResult {
pub fn has_diff(&self) -> bool {
self.entries
.iter()
.any(|e| !matches!(e, FolderDiffEntry::Identical { .. }))
}
pub fn summary(&self) -> FolderDiffSummary {
let mut s = FolderDiffSummary::default();
for e in &self.entries {
match e {
FolderDiffEntry::Identical { size, .. } => {
s.identical += 1;
s.left_bytes += size;
s.right_bytes += size;
}
FolderDiffEntry::Modified {
left_size,
right_size,
..
} => {
s.modified += 1;
s.left_bytes += left_size;
s.right_bytes += right_size;
}
FolderDiffEntry::Added { size, .. } => {
s.added += 1;
s.right_bytes += size;
}
FolderDiffEntry::Removed { size, .. } => {
s.removed += 1;
s.left_bytes += size;
}
FolderDiffEntry::Moved { size, .. } => {
s.moved += 1;
s.left_bytes += size;
s.right_bytes += size;
}
FolderDiffEntry::Error { .. } => {
s.errors += 1;
}
}
}
s
}
}
struct FileRecord {
rel: PathBuf,
size: u64,
mtime: Option<SystemTime>,
hash: Option<u128>,
}
fn collect(root: &Path, recursive: bool, compare_by: CompareBy) -> Vec<FileRecord> {
let walker = if recursive {
walkdir::WalkDir::new(root)
} else {
walkdir::WalkDir::new(root).max_depth(1)
};
let entries: Vec<walkdir::DirEntry> = walker
.into_iter()
.filter_map(|e| e.ok())
.filter(|e| e.file_type().is_file())
.collect();
entries
.into_par_iter()
.filter_map(|e| {
let abs = e.into_path();
let rel = abs.strip_prefix(root).ok()?.to_path_buf();
let meta = std::fs::metadata(&abs).ok()?;
let size = meta.len();
let mtime = meta.modified().ok();
let hash = if compare_by == CompareBy::Content {
hash_xxh3(&abs).ok()
} else if compare_by == CompareBy::Paranoid {
hash_blake3(&abs).ok()
} else {
None
};
Some(FileRecord {
rel,
size,
mtime,
hash,
})
})
.collect()
}
fn hash_xxh3(path: &Path) -> Result<u128> {
use xxhash_rust::xxh3::Xxh3Default;
let file =
std::fs::File::open(path).with_context(|| format!("cannot open {}", path.display()))?;
let meta = file.metadata()?;
let len = meta.len() as usize;
if len == 0 {
return Ok(xxhash_rust::xxh3::xxh3_128(&[]));
}
if len <= 512 * 1024 * 1024 {
let mmap = unsafe { memmap2::Mmap::map(&file) };
if let Ok(m) = mmap {
return Ok(xxhash_rust::xxh3::xxh3_128(&m));
}
}
use std::io::Read;
let mut hasher = Xxh3Default::new();
let mut reader = std::io::BufReader::with_capacity(256 * 1024, file);
let mut buf = vec![0u8; 256 * 1024];
loop {
let n = reader.read(&mut buf)?;
if n == 0 {
break;
}
hasher.update(&buf[..n]);
}
Ok(hasher.digest128())
}
fn hash_blake3(path: &Path) -> Result<u128> {
let file =
std::fs::File::open(path).with_context(|| format!("cannot open {}", path.display()))?;
let meta = file.metadata()?;
let len = meta.len() as usize;
if len == 0 {
let hash = blake3::hash(&[]);
return Ok(u128::from_le_bytes(
hash.as_bytes()[..16].try_into().unwrap(),
));
}
if len <= 512 * 1024 * 1024 {
let mmap = unsafe { memmap2::Mmap::map(&file) };
if let Ok(m) = mmap {
let hash = blake3::hash(&m);
return Ok(u128::from_le_bytes(
hash.as_bytes()[..16].try_into().unwrap(),
));
}
}
use std::io::Read;
let mut hasher = blake3::Hasher::new();
let mut reader = std::io::BufReader::with_capacity(256 * 1024, file);
let mut buf = vec![0u8; 256 * 1024];
loop {
let n = reader.read(&mut buf)?;
if n == 0 {
break;
}
hasher.update(&buf[..n]);
}
let hash = hasher.finalize();
Ok(u128::from_le_bytes(
hash.as_bytes()[..16].try_into().unwrap(),
))
}
pub fn diff_folders(
left: &Path,
right: &Path,
recursive: bool,
compare_by: CompareBy,
) -> Result<FolderDiffResult> {
let (left_records, right_records) = rayon::join(
|| collect(left, recursive, compare_by),
|| collect(right, recursive, compare_by),
);
let left_map: HashMap<PathBuf, FileRecord> = left_records
.into_iter()
.map(|r| (r.rel.clone(), r))
.collect();
let right_map: HashMap<PathBuf, FileRecord> = right_records
.into_iter()
.map(|r| (r.rel.clone(), r))
.collect();
let left_by_hash: HashMap<u128, &PathBuf> =
if matches!(compare_by, CompareBy::Content | CompareBy::Paranoid) {
left_map
.iter()
.filter_map(|(p, r)| r.hash.map(|h| (h, p)))
.collect()
} else {
HashMap::new()
};
let mut entries: Vec<FolderDiffEntry> = Vec::new();
let mut moved_from: std::collections::HashSet<PathBuf> = Default::default();
for (rel, rr) in &right_map {
match left_map.get(rel) {
None => {
if matches!(compare_by, CompareBy::Content | CompareBy::Paranoid) {
if let Some(hash) = rr.hash {
if let Some(&from_path) = left_by_hash.get(&hash) {
if !right_map.contains_key(from_path) {
moved_from.insert(from_path.clone());
entries.push(FolderDiffEntry::Moved {
from: from_path.clone(),
to: rel.clone(),
size: rr.size,
});
continue;
}
}
}
}
entries.push(FolderDiffEntry::Added {
path: rel.clone(),
size: rr.size,
});
}
Some(lr) => {
if files_match(lr, rr, compare_by) {
entries.push(FolderDiffEntry::Identical {
path: rel.clone(),
size: rr.size,
});
} else {
entries.push(FolderDiffEntry::Modified {
path: rel.clone(),
left_size: lr.size,
right_size: rr.size,
});
}
}
}
}
for (rel, lr) in &left_map {
if !right_map.contains_key(rel) && !moved_from.contains(rel) {
entries.push(FolderDiffEntry::Removed {
path: rel.clone(),
size: lr.size,
});
}
}
entries.sort_by(|a, b| {
let (ka, pa) = entry_sort_key(a);
let (kb, pb) = entry_sort_key(b);
(ka, pa).cmp(&(kb, pb))
});
Ok(FolderDiffResult { entries })
}
fn files_match(left: &FileRecord, right: &FileRecord, compare_by: CompareBy) -> bool {
match compare_by {
CompareBy::Name => true, CompareBy::SizeTime => {
if left.size != right.size {
return false;
}
match (left.mtime, right.mtime) {
(Some(lt), Some(rt)) => {
let diff = if lt > rt {
lt.duration_since(rt)
} else {
rt.duration_since(lt)
};
diff.is_ok_and(|d| d.as_secs() <= 2)
}
_ => left.size == right.size, }
}
CompareBy::Content | CompareBy::Paranoid => left.hash == right.hash && left.hash.is_some(),
}
}
fn entry_sort_key(e: &FolderDiffEntry) -> (&'static str, &PathBuf) {
match e {
FolderDiffEntry::Removed { path, .. } => ("1-removed", path),
FolderDiffEntry::Modified { path, .. } => ("2-modified", path),
FolderDiffEntry::Moved { to, .. } => ("3-moved", to),
FolderDiffEntry::Added { path, .. } => ("4-added", path),
FolderDiffEntry::Identical { path, .. } => ("5-identical", path),
FolderDiffEntry::Error { path, .. } => ("6-error", path),
}
}
pub fn print_entry(e: &FolderDiffEntry, show_identical: bool) {
match e {
FolderDiffEntry::Removed { path, size } => {
println!("[-] REMOVED {} ({})", path.display(), format_size(*size));
}
FolderDiffEntry::Modified {
path,
left_size,
right_size,
} => {
println!(
"[≠] MODIFIED {} ({} → {})",
path.display(),
format_size(*left_size),
format_size(*right_size)
);
}
FolderDiffEntry::Moved { from, to, size } => {
println!(
"[→] MOVED {} ← {} ({})",
to.display(),
from.display(),
format_size(*size)
);
}
FolderDiffEntry::Added { path, size } => {
println!("[+] ADDED {} ({})", path.display(), format_size(*size));
}
FolderDiffEntry::Identical { path, size } => {
if show_identical {
println!("[=] IDENTICAL {} ({})", path.display(), format_size(*size));
}
}
FolderDiffEntry::Error { path, side, error } => {
eprintln!("[!] ERROR ({side}) {} — {error}", path.display());
}
}
}
pub fn print_summary(left: &Path, right: &Path, result: &FolderDiffResult) {
let s = result.summary();
if !result.has_diff() {
println!("[=] Folders are identical");
}
println!(
"[*] {:>4} identical | {:>4} modified | {:>4} added | {:>4} removed | {:>4} moved{}",
s.identical,
s.modified,
s.added,
s.removed,
s.moved,
if s.errors > 0 {
format!(" | {} errors", s.errors)
} else {
String::new()
}
);
println!(
" Left {} — {}",
left.display(),
format_size(s.left_bytes)
);
println!(
" Right {} — {}",
right.display(),
format_size(s.right_bytes)
);
let (sign, delta) = if s.right_bytes >= s.left_bytes {
("+", s.right_bytes - s.left_bytes)
} else {
("-", s.left_bytes - s.right_bytes)
};
if delta > 0 {
println!(" Delta {sign}{}", format_size(delta));
}
}
fn format_size(bytes: u64) -> String {
const GIB: u64 = 1024 * 1024 * 1024;
const MIB: u64 = 1024 * 1024;
const KIB: u64 = 1024;
if bytes >= GIB {
format!("{:.1} GiB", bytes as f64 / GIB as f64)
} else if bytes >= MIB {
format!("{:.1} MiB", bytes as f64 / MIB as f64)
} else if bytes >= KIB {
format!("{:.1} KiB", bytes as f64 / KIB as f64)
} else {
format!("{bytes} B")
}
}