extern crate clap;
#[macro_use]
extern crate log;
use std::ffi::OsString;
use std::io::{BufRead, Write};
use std::path::{Path, PathBuf};
use std::{fs, io, process};
use anyhow::{anyhow, Context, Result};
use atty::Stream;
use indicatif::HumanBytes;
use crate::candidate_selection::candidate_selector::CandidateSelector;
use crate::duplicate_detection::DuplicateDetector;
use crate::file_descriptor::DeviceDescriptor;
use crate::options::{AnalysisMode, Format, LinkSelectionPreference, Options};
use crate::report::csv::CSVWriter;
use crate::report::json::JSONWriter;
use crate::report::report_writer::ReportWriter;
use crate::types::{Duplicate, FileDescriptorWithPaths};
use crate::ui::exit_codes::ExitCode;
use crate::ui::format::HumanInteger;
use rustc_hash::FxHashMap;
mod app;
mod candidate_selection;
mod concurrency;
mod duplicate_detection;
mod file_descriptor;
mod filesystem;
mod options;
mod report;
mod types;
mod ui;
fn main() {
pretty_env_logger::init();
let result = run();
match result {
Ok(exit_code) => {
process::exit(exit_code.into());
}
Err(err) => {
error!("[dsc error]: {:#}", err);
process::exit(ExitCode::GeneralError.into());
}
}
}
enum Subcommand {
Link,
Compare,
Report,
}
fn run() -> Result<ExitCode> {
let mut matches = app::build_app().get_matches();
let subcommand = match matches.subcommand_name() {
Some("report") => Subcommand::Report,
Some("link") => Subcommand::Link,
Some("cmp") => Subcommand::Compare,
_ => {
app::build_app().print_help()?;
return Ok(ExitCode::Success);
}
};
if let (_, Some(subcommand)) = matches.subcommand() {
matches = subcommand.to_owned()
}
let optional_paths = stdin_paths().or_else(|| {
matches
.values_of_os("path")
.map(|os_values| os_values.map(OsString::from).collect())
});
let search_paths = if let Some(paths) = optional_paths {
let mut directories = vec![];
for path in paths {
let path_buffer = PathBuf::from(path);
if filesystem::is_valid_path(&path_buffer) {
directories.push(path_buffer.canonicalize()?);
} else {
eprintln!(
"[dsc error]: {}",
format!(
"Search path '{}' is not a directory.",
path_buffer.to_string_lossy()
)
);
}
}
directories
} else {
let current_directory = Path::new(".");
if !filesystem::is_valid_path(current_directory) {
return Err(anyhow!(
"Could not retrieve current directory (has it been deleted?)."
));
}
vec![current_directory.to_path_buf()]
};
if search_paths.is_empty() {
return Err(anyhow!("No valid search paths given."));
}
let options = Options::from_matches(matches)?;
debug!("options: {:?}", options);
debug!("paths: {:?}", &search_paths);
scan(subcommand, &search_paths, options)
}
fn stdin_paths() -> Option<Vec<OsString>> {
if atty::is(Stream::Stdin) {
None
} else {
let mut lines = vec![];
let stdin = io::stdin();
for line in stdin.lock().lines() {
lines.push(OsString::from(line.unwrap()));
}
Some(lines)
}
}
fn scan(subcommand: Subcommand, path_vec: &[PathBuf], options: Options) -> Result<ExitCode> {
let duplicate_locator = CandidateSelector::new(options);
let mut duplicates = duplicate_locator.select_candidates(path_vec)?;
if options.analysis_mode == AnalysisMode::Exact {
let duplicate_checker = DuplicateDetector::new(options);
duplicates = duplicate_checker.check_duplicates(duplicates)?;
}
match subcommand {
Subcommand::Compare => cmp(duplicates, options),
Subcommand::Link => link(duplicates, options),
Subcommand::Report => report(duplicates, options),
}
}
fn cmp(duplicates: Vec<Duplicate>, options: Options) -> Result<ExitCode> {
if !duplicates.is_empty() && options.error_on_duplicate {
return Ok(ExitCode::DuplicateFound);
}
let total_bytes = duplicates
.iter()
.map(|duplicate| duplicate.file_size * ((duplicate.locations.len() - 1) as u64))
.sum();
let duplicate_files: u64 = duplicates
.iter()
.map(|duplicate| duplicate.locations.len() as u64 - 1)
.sum();
println!("Duplicate data : {}", HumanBytes(total_bytes));
println!(
"Total duplicates : {}",
HumanInteger(duplicates.len() as u64)
);
println!("Total duplicate files : {}", HumanInteger(duplicate_files));
Ok(ExitCode::Success)
}
fn link(duplicates: Vec<Duplicate>, options: Options) -> Result<ExitCode> {
if duplicates.is_empty() {
println!("No duplicates, nothing to be done");
return Ok(ExitCode::Success);
}
let mut assignments: Vec<LinkAssignment> = Vec::new();
for duplicate in duplicates {
let mut handles_by_device: FxHashMap<DeviceDescriptor, Vec<FileDescriptorWithPaths>> =
FxHashMap::default();
let file_size = duplicate.file_size;
for location in duplicate.locations {
let dev = location.file_descriptor.device_descriptor;
if let Some(vec) = handles_by_device.get_mut(&dev) {
vec.push(location)
} else {
handles_by_device.insert(dev, vec![location]);
}
}
let mut new_assignments: Vec<LinkAssignment> = handles_by_device
.into_iter()
.map(|(_, v)| v)
.filter(|v| v.len() > 1)
.map(|v| LinkAssignment {
file_size,
locations: v,
})
.collect();
assignments.append(&mut new_assignments);
}
let count: u64 = assignments
.iter()
.map(|v| v.locations.len() as u64 - 1)
.sum();
let data_size: u64 = assignments
.iter()
.map(|v| (v.locations.len() as u64 - 1) * v.file_size)
.sum();
if !options.force {
if atty::isnt(Stream::Stdin) {
return Err(anyhow!(
"--force parameter needs to be provided to allow linking when reading from stdin"
));
} else {
let dry_run_prefix = if options.dry_run { "(dryrun) " } else { "" };
let mut line = String::new();
print!(
"{}Are you sure you want to link {} files? [y/N]: ",
dry_run_prefix,
HumanInteger(count)
);
std::io::stdout().flush()?;
std::io::stdin()
.read_line(&mut line)
.context("Could not read from stdin")?;
match line.trim().to_ascii_lowercase().as_str() {
"y" => {}
_ => {
println!("Canceling link command");
return Ok(ExitCode::Success);
}
}
}
}
let uuid = uuid::Uuid::new_v4();
for (i, mut assignment) in assignments.into_iter().enumerate() {
assignment.locations.sort_by_key(|location| {
location
.paths
.iter()
.map(|v: &PathBuf| {
let metadata = v.as_path().metadata()?;
let created = metadata.created();
if created.is_ok() {
created
} else {
metadata.modified()
}
})
.next()
.transpose()
.unwrap()
});
let (target, locations) = match options.link_priority {
LinkSelectionPreference::Oldest => assignment.locations.split_first(),
_ => assignment.locations.split_last(),
}
.unwrap();
let to_path = target.paths.iter().next().unwrap();
let mut temp_path = to_path.parent().unwrap().to_path_buf();
temp_path.push(format!("dsc.{}.{}", i, uuid));
for location in locations.iter() {
for from_path in &location.paths {
if options.dry_run {
println!("linking {:?} => {:?}", from_path, to_path)
} else {
if let Err(err) = fs::hard_link(&to_path, &temp_path) {
fs::remove_file(temp_path)?;
return Err(anyhow!("Unable to replace: {}", err));
}
if let Err(err) = fs::rename(&temp_path, &from_path) {
fs::remove_file(temp_path)?;
return Err(anyhow!("Unable to replace {}", err));
}
}
}
}
}
println!("Done. Reclaimed {} of disk space.", HumanBytes(data_size));
Ok(ExitCode::Success)
}
struct LinkAssignment {
file_size: u64,
locations: Vec<FileDescriptorWithPaths>,
}
fn report(mut duplicates: Vec<Duplicate>, options: Options) -> Result<ExitCode> {
let mut writer: Box<dyn ReportWriter> = match options.format {
Format::CSV => Box::new(CSVWriter {}),
Format::JSON => Box::new(JSONWriter {}),
};
writer.write(&mut duplicates, &mut io::stdout())?;
Ok(ExitCode::Success)
}