#![deny(missing_docs)]
extern crate sha2;
#[macro_use]
mod macros;
mod utilities;
use std::error::Error;
use std::io;
use std::path::PathBuf;
use utilities::PathUtilities;
#[derive(Debug)]
pub struct FileHash {
hash: String,
files: Vec<PathBuf>
}
impl FileHash {
pub fn get_hash(&self) -> String {
self.hash.clone()
}
pub fn get_files(&self) -> &Vec<PathBuf> {
&self.files
}
pub fn get_hash_and_files(&self) -> (String, &Vec<PathBuf>) {
(self.hash.clone(), &self.files)
}
pub fn add_file(&mut self, file: PathBuf) {
self.files.push(file);
}
pub fn total_files(&self) -> usize {
self.files.len()
}
}
pub fn duplicates_of(files: &[PathBuf], dirs_opt: Option<&[PathBuf]>)
-> io::Result<Vec<FileHash>>
{
let mut check_files = vec![];
for path in files.iter().filter(|p| !p.is_file()) {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
format!("{} is not a file", path.display())
));
}
if let Some(dirs) = dirs_opt {
for path in dirs.iter().filter(|p| !p.is_dir()) {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
format!("{} is not a directory", path.display())
));
}
let mut sizes = vec![];
for file in files {
let metadata = try_with_path!(file.metadata(), file);
sizes.push(metadata.len());
}
for dir in dirs {
let mut dir_files = try_with_path!(
dir.files_within(Some(&sizes)),
dir
);
check_files.append(&mut dir_files);
}
for file in files {
if !check_files.contains(file) {
check_files.push(file.clone());
}
}
} else {
for file in files {
let parent = file.parent().unwrap().to_path_buf();
let metadata = try_with_path!(file.metadata(), file);
let sizes = vec![metadata.len()];
let mut dir_files = try_with_path!(
parent.files_within(Some(&sizes)),
parent
);
check_files.append(&mut dir_files);
}
}
duplicate_files(&check_files)
}
pub fn duplicates_within(dirs: &[PathBuf]) -> io::Result<Vec<FileHash>> {
for path in dirs.iter().filter(|p| !p.is_dir()) {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
format!("{} is not a directory", path.display())
));
}
let mut check_files = vec![];
for dir in dirs {
let mut dir_files = try_with_path!(dir.files_within(None), dir);
check_files.append(&mut dir_files);
}
duplicate_files(&check_files)
}
pub fn duplicate_files(files: &[PathBuf]) -> io::Result<Vec<FileHash>> {
for path in files.iter().filter(|p| !p.is_file()) {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
format!("{} is not a file", path.display())
));
}
let mut sizes: Vec<(u64, Vec<PathBuf>)> = vec![];
for file in files {
let metadata = try_with_path!(file.metadata(), file);
let size = metadata.len();
if let Some(i) = sizes.iter().position(|s| s.0 == size) {
sizes[i].1.push(file.clone())
} else {
sizes.push((size, vec![file.clone()]));
}
}
let mut hash_list: Vec<FileHash> = vec![];
for size in sizes.iter().filter(|s| s.1.len() > 1) {
for file in &size.1 {
let hash = try_with_path!(file.sha256(), file);
if let Some(i) = hash_list.iter().position(|h| h.hash == hash) {
hash_list[i].add_file(file.clone());
} else {
hash_list.push(FileHash {
hash: hash,
files: vec![file.clone()]
});
}
}
}
let mut remove: Vec<usize> = vec![];
for (i, hash) in hash_list.iter().enumerate() {
if hash.total_files() == 1 {
remove.push(i);
}
}
for r in remove.iter().rev() {
hash_list.remove(*r);
}
Ok(hash_list)
}