use clap::Parser;
use crc32fast::Hasher;
use crossterm::{cursor::MoveUp, execute, terminal};
use num_format::{Locale, ToFormattedString};
use std::collections::HashMap;
use std::fs;
use std::fs::File;
use std::io::{stdout, Read};
use std::os::unix::fs::MetadataExt;
#[derive(Parser, Debug)]
#[clap(name = "rfdups")]
#[clap(author = "Tang Jun <ken@gos7.net>")]
#[clap(version)]
#[clap(about = "find duplicate files in directory quickly", long_about = None)]
pub struct Args {
#[clap(short('m'), long, help = "summarize dupe information")]
pub summarize: bool,
#[clap(short, long, help = "show size of duplicate files")]
pub size: bool,
#[clap(min_values = 1, multiple_values = true, required = true)]
pub dirs: Vec<String>,
}
pub fn crc32(filename: &str) -> u32 {
let mut hasher = Hasher::new();
const BUFFER_SIZE: usize = 4096;
let mut buffer = [0; BUFFER_SIZE];
let mut file = File::open(&filename).unwrap();
let _ = file.read(&mut buffer);
hasher.update(&buffer);
hasher.finalize()
}
fn clean_up_line() {
let screen_width = terminal::size().unwrap().0 as usize;
println!(
"{}",
std::iter::repeat(" ")
.take(screen_width)
.collect::<String>()
);
execute!(stdout(), MoveUp(1)).unwrap();
}
pub fn check_dup(file_info: &HashMap<String, Vec<String>>, options: &Args) {
let mut total_size: u64 = 0;
let mut file_num: u32 = 0;
clean_up_line();
for (f_info, fn_vec) in &*file_info {
if fn_vec.len() > 2 {
let v: Vec<&str> = f_info.split("_").collect();
let f_size = v[0].parse::<u64>().unwrap();
let dup_num = fn_vec.len() - 2;
total_size += f_size * dup_num as u64;
file_num += dup_num as u32;
if options.size {
println!("{} bytes each:", v[0]);
}
for (index, value) in fn_vec.iter().enumerate() {
if index > 0 {
println!("{}", value);
}
}
println!("");
}
}
if options.summarize {
println!(
"{} duplicate files, occupying {} bytes",
file_num, total_size
);
}
}
pub fn filehash_proc(
file_info: &HashMap<u64, Vec<String>>,
dup_files: &mut HashMap<String, Vec<String>>,
mut count: u32,
) {
let indicator = ['/', '|', '\\', '-'];
let mut progress: usize = 0;
for (len, info_vec) in &*file_info {
if info_vec.len() > 1 {
for (_index, path) in info_vec.iter().enumerate() {
let metadata = fs::metadata(&path).unwrap();
let inode = metadata.ino();
let key = format!("{}_{}", len, crc32(path));
if dup_files.contains_key(&key) {
let inode_str = &dup_files.get_mut(&key).unwrap()[0];
if !inode.to_string().eq(inode_str) {
dup_files.get_mut(&key).unwrap().push(path.to_string());
}
} else {
dup_files.insert(key, vec![inode.to_string(), path.to_string()]);
}
}
}
count -= info_vec.len() as u32;
clean_up_line();
eprintln!(
"{}\t{} \tfiles left.",
indicator[progress],
count.to_formatted_string(&Locale::en)
);
execute!(stdout(), MoveUp(1)).unwrap();
progress = (progress + 1) % 4;
}
}
pub fn read_dir(dir: &str, file_info: &mut HashMap<u64, Vec<String>>, mut count: u32) -> u32 {
let paths = fs::read_dir(dir).unwrap();
let indicator = ['-', '\\', '|', '/'];
let mut progress: usize = 0;
'outer: for entry in paths {
let path = entry.unwrap().path();
let attr = fs::symlink_metadata(&path).unwrap();
if attr.is_file() {
let metadata = fs::metadata(&path).unwrap();
if metadata.len() < 1 {
continue 'outer;
}
let filename = path.to_str().unwrap().to_string();
let length = metadata.len();
if !file_info.contains_key(&length) {
file_info.insert(length, vec![filename]);
} else {
file_info.get_mut(&length).unwrap().push(filename);
}
count += 1;
eprintln!(
"{}\t{} \tfiles found.",
indicator[progress],
count.to_formatted_string(&Locale::en)
);
execute!(stdout(), MoveUp(1)).unwrap();
progress = (progress + 1) % 4;
clean_up_line();
} else if attr.is_dir() {
count = read_dir(path.to_str().unwrap(), file_info, count);
}
}
count
}