use crate::{
my_print,
split_and_insert,
MyResult,
args::{
Algorithm,
Arguments,
ResultFormat::*,
},
};
use serde::{
Serialize,
Serializer,
};
use std::{
thread,
io::Write,
path::PathBuf,
};
use rayon::prelude::*;
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize)]
pub struct Key {
#[serde(serialize_with = "add_thousands_separator")]
pub size: usize,
pub hash: Option<String>,
}
#[derive(Debug, Clone)]
pub struct FileInfo {
pub key: Key,
pub path: PathBuf,
}
#[derive(Debug, Clone, Serialize)]
pub struct GroupInfo {
#[serde(rename = "File information")]
pub key: Key,
#[serde(rename = "Paths")]
pub paths: Vec<PathBuf>,
#[serde(rename = "Number of duplicate files")]
pub num_file: usize,
#[serde(rename = "Sum of file sizes", serialize_with = "add_thousands_separator")]
pub sum_size: usize,
}
#[derive(Debug, Default, Clone, Serialize)]
pub struct TotalInfo {
#[serde(rename = "Hashing algorithm")]
pub algorithm: Algorithm,
#[serde(rename = "Total number of files")]
pub total_num_files: usize,
#[serde(rename = "Total number of duplicate files")]
pub total_num_duplicate: usize,
#[serde(rename = "Total number of different hashes")]
pub total_num_hashes: usize,
#[serde(rename = "Total size of duplicate files", serialize_with = "add_thousands_separator")]
pub total_size: usize,
}
fn add_thousands_separator<S>(size: &usize, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.collect_str(&format_args!("{} bytes", &split_and_insert(*size, '.')))
}
impl GroupInfo {
pub fn print_formatted(&self, arguments: &Arguments, write: &mut Box<&mut dyn Write>) {
match &arguments.result_format {
Json => {
let serialized = serde_json::to_string_pretty(self).unwrap();
writeln!(write, "{serialized}\n").unwrap();
}
Yaml => {
let serialized = serde_yaml::to_string(self).unwrap();
writeln!(*write, "{serialized}").unwrap();
}
Personal => {
writeln!(write, "size: {} bytes", split_and_insert(self.key.size, '.')).unwrap();
writeln!(write, "hash: {}", self.key.hash.clone().unwrap_or_default()).unwrap();
writeln!(write, "Paths: {:#?}", self.paths).unwrap();
writeln!(write, "Number of duplicate files: {}", self.num_file).unwrap();
writeln!(write, "Sum of file sizes: {} bytes\n", split_and_insert(self.sum_size, '.')).unwrap();
}
}
}
}
pub trait Extensions {
fn sort_duplicate_files(&mut self, arguments: &Arguments);
fn print_duplicated_files(&self, arguments: &Arguments) -> MyResult<()>;
fn get_total_info(&self, arguments: &Arguments, total_num_files: usize) -> TotalInfo;
}
impl Extensions for [GroupInfo] {
fn sort_duplicate_files(&mut self, arguments: &Arguments) {
if arguments.sort {
self.par_sort_unstable_by_key(|group_info| {
(
group_info.key.size,
group_info.key.hash.clone(),
group_info.num_file,
)
});
} else {
self.par_sort_unstable_by_key(|group_info| {
(
group_info.num_file,
group_info.key.size,
group_info.key.hash.clone(),
)
});
}
}
fn print_duplicated_files(&self, arguments: &Arguments) -> MyResult<()> {
let all_buffer: Vec<u8> = self
.par_chunks(rayon::current_num_threads())
.flat_map(|groups_info| {
let mut buffer: Vec<u8> = Vec::new();
let mut write: Box<&mut dyn Write> = Box::new(&mut buffer);
groups_info
.iter()
.for_each(|group_info| group_info.print_formatted(arguments, &mut write));
buffer
})
.collect();
my_print(&all_buffer)?;
Ok(())
}
fn get_total_info(&self, arguments: &Arguments, total_num_files: usize) -> TotalInfo {
let (total_num_duplicate, total_size) = rayon::join(
|| self.par_iter().map(|group_info| group_info.num_file).sum(),
|| self.par_iter().map(|group_info| group_info.sum_size).sum(),
);
TotalInfo {
algorithm: arguments.algorithm,
total_num_files,
total_num_duplicate,
total_num_hashes: self.len(),
total_size,
}
}
}
impl TotalInfo {
pub fn get_summary(duplicate_hash: &[GroupInfo], arguments: &Arguments, total_num_files: usize) -> Self {
let (result_display, result_total_info) = thread::scope(|s| {
let thread_a = s.spawn(|| duplicate_hash.print_duplicated_files(arguments).unwrap());
let thread_b = s.spawn(|| duplicate_hash.get_total_info(arguments, total_num_files));
(thread_a.join(), thread_b.join())
});
let (_display, total_info) = match (result_display, result_total_info) {
(Ok(display), Ok(total_info)) => (display, total_info),
_ => panic!("thread::scope failed!"),
};
total_info
}
pub fn print_sumary(&self, arguments: &Arguments) -> MyResult<()> {
match &arguments.result_format {
Json => {
let serialized = serde_json::to_string_pretty(&self)?;
println!("{serialized}\n");
}
Yaml => {
let serialized = serde_yaml::to_string(&self)?;
println!("{serialized}");
}
Personal => {
println!("Hashing algorithm: {}", arguments.algorithm); println!("Total number of files: {}", self.total_num_files);
println!("Total number of duplicate files: {}", self.total_num_duplicate);
println!("Total number of different hashes: {}", self.total_num_hashes);
println!("Total size of duplicate files: {} bytes\n", split_and_insert(self.total_size, '.'));
}
}
Ok(())
}
}