use clap::{Args, Parser, Subcommand, ValueEnum};
use std::path::PathBuf;
#[derive(Debug, Parser)]
#[command(name = "rustdupe")]
#[command(author, version, about, long_about = None)]
#[command(propagate_version = true)]
pub struct Cli {
#[arg(short, long, action = clap::ArgAction::Count, global = true)]
pub verbose: u8,
#[arg(short, long, global = true, conflicts_with = "verbose")]
pub quiet: bool,
#[arg(long, global = true, env = "NO_COLOR")]
pub no_color: bool,
#[arg(long, value_enum, default_value = "auto", global = true)]
pub theme: ThemeArg,
#[command(subcommand)]
pub command: Commands,
}
#[derive(Debug, Subcommand)]
pub enum Commands {
Scan(Box<ScanArgs>),
Load(LoadArgs),
}
#[derive(Debug, Args)]
pub struct ScanArgs {
#[arg(value_name = "PATH", required_unless_present = "load_session")]
pub path: Option<PathBuf>,
#[arg(
long,
value_name = "SESSION_FILE",
conflicts_with = "path",
help_heading = "Scanning Options"
)]
pub load_session: Option<PathBuf>,
#[arg(long, value_name = "PATH", help_heading = "Output Options")]
pub save_session: Option<PathBuf>,
#[arg(
short,
long,
value_enum,
default_value = "tui",
help_heading = "Output Options"
)]
pub output: OutputFormat,
#[arg(long, value_name = "PATH", help_heading = "Output Options")]
pub output_file: Option<PathBuf>,
#[arg(long, value_enum, value_name = "TYPE", help_heading = "Output Options")]
pub script_type: Option<ScriptTypeArg>,
#[arg(long, value_name = "SIZE", value_parser = parse_size, help_heading = "Filtering Options")]
pub min_size: Option<u64>,
#[arg(long, value_name = "SIZE", value_parser = parse_size, help_heading = "Filtering Options")]
pub max_size: Option<u64>,
#[arg(long, value_name = "DATE", value_parser = parse_date, help_heading = "Filtering Options")]
pub newer_than: Option<std::time::SystemTime>,
#[arg(long, value_name = "DATE", value_parser = parse_date, help_heading = "Filtering Options")]
pub older_than: Option<std::time::SystemTime>,
#[arg(
long = "regex",
alias = "regex-include",
value_name = "PATTERN",
help_heading = "Filtering Options"
)]
pub regex_include: Vec<String>,
#[arg(
long = "regex-exclude",
value_name = "PATTERN",
help_heading = "Filtering Options"
)]
pub regex_exclude: Vec<String>,
#[arg(
long = "file-type",
value_enum,
value_name = "TYPE",
help_heading = "Filtering Options"
)]
pub file_types: Vec<FileType>,
#[arg(
short,
long = "ignore",
value_name = "PATTERN",
help_heading = "Filtering Options"
)]
pub ignore_patterns: Vec<String>,
#[arg(long, help_heading = "Scanning Options")]
pub follow_symlinks: bool,
#[arg(long, help_heading = "Scanning Options")]
pub skip_hidden: bool,
#[arg(
long,
value_name = "N",
default_value = "4",
help_heading = "Scanning Options"
)]
pub io_threads: usize,
#[arg(long, help_heading = "Scanning Options")]
pub paranoid: bool,
#[arg(long, help_heading = "Safety & Deletion Options")]
pub permanent: bool,
#[arg(short = 'y', long, help_heading = "Safety & Deletion Options")]
pub yes: bool,
#[arg(long, value_name = "PATH", help_heading = "Cache Options")]
pub cache: Option<PathBuf>,
#[arg(long, conflicts_with = "cache", help_heading = "Cache Options")]
pub no_cache: bool,
#[arg(long, help_heading = "Cache Options")]
pub clear_cache: bool,
#[arg(
long,
alias = "analyze-only",
help_heading = "Safety & Deletion Options"
)]
pub dry_run: bool,
#[arg(
long = "reference",
value_name = "PATH",
help_heading = "Safety & Deletion Options"
)]
pub reference_paths: Vec<PathBuf>,
}
#[derive(Debug, Args)]
pub struct LoadArgs {
#[arg(value_name = "SESSION_FILE")]
pub path: PathBuf,
#[arg(
short,
long,
value_enum,
default_value = "tui",
help_heading = "Output Options"
)]
pub output: OutputFormat,
#[arg(long, value_name = "PATH", help_heading = "Output Options")]
pub output_file: Option<PathBuf>,
#[arg(long, value_enum, value_name = "TYPE", help_heading = "Output Options")]
pub script_type: Option<ScriptTypeArg>,
#[arg(long, alias = "analyze-only", help_heading = "Safety Options")]
pub dry_run: bool,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)]
pub enum OutputFormat {
Tui,
Json,
Csv,
Html,
Session,
Script,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)]
pub enum ScriptTypeArg {
Posix,
Powershell,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)]
pub enum FileType {
Images,
Videos,
Audio,
Documents,
Archives,
}
#[derive(
Debug, Clone, Copy, PartialEq, Eq, ValueEnum, Default, serde::Serialize, serde::Deserialize,
)]
pub enum ThemeArg {
#[default]
Auto,
Light,
Dark,
}
impl From<FileType> for crate::scanner::FileCategory {
fn from(t: FileType) -> Self {
match t {
FileType::Images => crate::scanner::FileCategory::Images,
FileType::Videos => crate::scanner::FileCategory::Videos,
FileType::Audio => crate::scanner::FileCategory::Audio,
FileType::Documents => crate::scanner::FileCategory::Documents,
FileType::Archives => crate::scanner::FileCategory::Archives,
}
}
}
impl std::fmt::Display for OutputFormat {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
OutputFormat::Tui => write!(f, "tui"),
OutputFormat::Json => write!(f, "json"),
OutputFormat::Csv => write!(f, "csv"),
OutputFormat::Html => write!(f, "html"),
OutputFormat::Session => write!(f, "session"),
OutputFormat::Script => write!(f, "script"),
}
}
}
pub fn parse_size(s: &str) -> Result<u64, String> {
let s = s.trim();
if s.is_empty() {
return Err("Size cannot be empty".to_string());
}
let (num_str, suffix) = match s.find(|c: char| !c.is_ascii_digit() && c != '.') {
Some(idx) => (&s[..idx], s[idx..].trim().to_uppercase()),
None => (s, String::new()),
};
let num: f64 = num_str
.parse()
.map_err(|_| format!("Invalid number: '{num_str}'"))?;
if num < 0.0 {
return Err("Size cannot be negative".to_string());
}
let multiplier: u64 = match suffix.as_str() {
"" | "B" => 1,
"KB" | "K" => 1_000,
"KIB" => 1_024,
"MB" | "M" => 1_000_000,
"MIB" => 1_048_576,
"GB" | "G" => 1_000_000_000,
"GIB" => 1_073_741_824,
"TB" | "T" => 1_000_000_000_000,
"TIB" => 1_099_511_627_776,
_ => return Err(format!("Unknown size suffix: '{suffix}'")),
};
Ok((num * multiplier as f64) as u64)
}
pub fn parse_date(s: &str) -> Result<std::time::SystemTime, String> {
use chrono::{NaiveDate, TimeZone, Utc};
NaiveDate::parse_from_str(s, "%Y-%m-%d")
.map(|d| {
let dt = Utc.from_utc_datetime(&d.and_hms_opt(0, 0, 0).unwrap());
std::time::SystemTime::from(dt)
})
.map_err(|e| format!("Invalid date format (expected YYYY-MM-DD): {e}"))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_size_bytes() {
assert_eq!(parse_size("1024").unwrap(), 1024);
assert_eq!(parse_size("1024B").unwrap(), 1024);
assert_eq!(parse_size("0").unwrap(), 0);
}
#[test]
fn test_parse_size_kilobytes() {
assert_eq!(parse_size("1KB").unwrap(), 1_000);
assert_eq!(parse_size("1K").unwrap(), 1_000);
assert_eq!(parse_size("1KiB").unwrap(), 1_024);
assert_eq!(parse_size("1kib").unwrap(), 1_024); }
#[test]
fn test_parse_size_megabytes() {
assert_eq!(parse_size("1MB").unwrap(), 1_000_000);
assert_eq!(parse_size("1MiB").unwrap(), 1_048_576);
assert_eq!(parse_size("10MB").unwrap(), 10_000_000);
}
#[test]
fn test_parse_size_gigabytes() {
assert_eq!(parse_size("1GB").unwrap(), 1_000_000_000);
assert_eq!(parse_size("1GiB").unwrap(), 1_073_741_824);
}
#[test]
fn test_parse_size_terabytes() {
assert_eq!(parse_size("1TB").unwrap(), 1_000_000_000_000);
assert_eq!(parse_size("1TiB").unwrap(), 1_099_511_627_776);
}
#[test]
fn test_parse_size_fractional() {
assert_eq!(parse_size("1.5MB").unwrap(), 1_500_000);
assert_eq!(parse_size("0.5GB").unwrap(), 500_000_000);
}
#[test]
fn test_parse_size_with_whitespace() {
assert_eq!(parse_size(" 1024 ").unwrap(), 1024);
assert_eq!(parse_size("1 MB").unwrap(), 1_000_000);
}
#[test]
fn test_parse_size_errors() {
assert!(parse_size("").is_err());
assert!(parse_size("abc").is_err());
assert!(parse_size("1XB").is_err());
assert!(parse_size("-1MB").is_err());
}
#[test]
fn test_cli_parse_help() {
let result = Cli::try_parse_from(["rustdupe", "--help"]);
assert!(result.is_err());
}
#[test]
fn test_cli_parse_scan_basic() {
let cli = Cli::try_parse_from(["rustdupe", "scan", "/some/path"]).unwrap();
assert_eq!(cli.verbose, 0);
match cli.command {
Commands::Scan(args) => {
assert_eq!(args.path, Some(PathBuf::from("/some/path")));
assert_eq!(args.output, OutputFormat::Tui);
}
_ => panic!("Expected Scan command"),
}
}
#[test]
fn test_cli_parse_scan_with_options() {
let cli = Cli::try_parse_from([
"rustdupe",
"-v",
"scan",
"/path",
"--output",
"json",
"--min-size",
"1MB",
"--max-size",
"1GB",
"--newer-than",
"2026-01-01",
"--older-than",
"2026-12-31",
"--ignore",
"*.tmp",
"--ignore",
"node_modules",
"--regex",
"foo.*",
"--regex-exclude",
"bar.*",
])
.unwrap();
assert_eq!(cli.verbose, 1);
match cli.command {
Commands::Scan(args) => {
assert_eq!(args.output, OutputFormat::Json);
assert_eq!(args.min_size, Some(1_000_000));
assert_eq!(args.max_size, Some(1_000_000_000));
assert!(args.newer_than.is_some());
assert!(args.older_than.is_some());
assert_eq!(args.ignore_patterns, vec!["*.tmp", "node_modules"]);
assert_eq!(args.regex_include, vec!["foo.*"]);
assert_eq!(args.regex_exclude, vec!["bar.*"]);
}
_ => panic!("Expected Scan command"),
}
}
#[test]
fn test_cli_parse_scan_file_types() {
let cli = Cli::try_parse_from([
"rustdupe",
"scan",
"/path",
"--file-type",
"images",
"--file-type",
"documents",
])
.unwrap();
match cli.command {
Commands::Scan(args) => {
assert_eq!(args.file_types, vec![FileType::Images, FileType::Documents]);
}
_ => panic!("Expected Scan command"),
}
}
#[test]
fn test_parse_date() {
assert!(parse_date("2026-02-01").is_ok());
assert!(parse_date("2026-02-31").is_err()); assert!(parse_date("not-a-date").is_err());
}
#[test]
fn test_cli_parse_scan_script() {
let cli = Cli::try_parse_from([
"rustdupe",
"scan",
"/path",
"--output",
"script",
"--script-type",
"posix",
])
.unwrap();
match cli.command {
Commands::Scan(args) => {
assert_eq!(args.output, OutputFormat::Script);
assert_eq!(args.script_type, Some(ScriptTypeArg::Posix));
}
_ => panic!("Expected Scan command"),
}
}
#[test]
fn test_cli_parse_load_script() {
let cli = Cli::try_parse_from([
"rustdupe",
"load",
"session.json",
"--output",
"script",
"--script-type",
"powershell",
])
.unwrap();
match cli.command {
Commands::Load(args) => {
assert_eq!(args.output, OutputFormat::Script);
assert_eq!(args.script_type, Some(ScriptTypeArg::Powershell));
}
_ => panic!("Expected Load command"),
}
}
#[test]
fn test_cli_quiet_conflicts_with_verbose() {
let result = Cli::try_parse_from(["rustdupe", "-v", "-q", "scan", "/path"]);
assert!(result.is_err());
}
#[test]
fn test_cli_parse_scan_csv() {
let cli = Cli::try_parse_from(["rustdupe", "scan", "/path", "--output", "csv"]).unwrap();
match cli.command {
Commands::Scan(args) => {
assert_eq!(args.output, OutputFormat::Csv);
}
_ => panic!("Expected Scan command"),
}
}
#[test]
fn test_cli_parse_quiet() {
let cli = Cli::try_parse_from(["rustdupe", "-q", "scan", "/path"]).unwrap();
assert!(cli.quiet);
assert_eq!(cli.verbose, 0);
}
#[test]
fn test_cli_parse_scan_all_flags() {
let cli = Cli::try_parse_from([
"rustdupe",
"scan",
"/path",
"--follow-symlinks",
"--skip-hidden",
"--io-threads",
"8",
"--paranoid",
"--permanent",
"--yes",
])
.unwrap();
match cli.command {
Commands::Scan(args) => {
assert!(args.follow_symlinks);
assert!(args.skip_hidden);
assert_eq!(args.io_threads, 8);
assert!(args.paranoid);
assert!(args.permanent);
assert!(args.yes);
}
_ => panic!("Expected Scan command"),
}
}
#[test]
fn test_cli_no_color_env() {
std::env::set_var("NO_COLOR", "true");
let cli = Cli::try_parse_from(["rustdupe", "scan", "/path"]).unwrap();
assert!(cli.no_color);
std::env::remove_var("NO_COLOR");
}
#[test]
fn test_cli_invalid_subcommand() {
let result = Cli::try_parse_from(["rustdupe", "invalid", "/path"]);
assert!(result.is_err());
}
#[test]
fn test_cli_missing_path() {
let result = Cli::try_parse_from(["rustdupe", "scan"]);
assert!(result.is_err());
}
#[test]
fn test_cli_version_flag() {
let result = Cli::try_parse_from(["rustdupe", "--version"]);
assert!(result.is_err()); }
#[test]
fn test_cli_parse_scan_session_flags() {
let cli = Cli::try_parse_from([
"rustdupe",
"scan",
"/path",
"--save-session",
"session.json",
"--reference",
"/ref1",
"--reference",
"/ref2",
"--dry-run",
])
.unwrap();
match cli.command {
Commands::Scan(args) => {
assert_eq!(args.path, Some(PathBuf::from("/path")));
assert_eq!(args.save_session, Some(PathBuf::from("session.json")));
assert_eq!(
args.reference_paths,
vec![PathBuf::from("/ref1"), PathBuf::from("/ref2")]
);
assert!(args.dry_run);
}
_ => panic!("Expected Scan command"),
}
let cli = Cli::try_parse_from(["rustdupe", "scan", "/path", "--analyze-only"]).unwrap();
match cli.command {
Commands::Scan(args) => {
assert!(args.dry_run);
}
_ => panic!("Expected Scan command"),
}
let cli =
Cli::try_parse_from(["rustdupe", "scan", "--load-session", "session.json"]).unwrap();
match cli.command {
Commands::Scan(args) => {
assert_eq!(args.path, None);
assert_eq!(args.load_session, Some(PathBuf::from("session.json")));
}
_ => panic!("Expected Scan command"),
}
}
#[test]
fn test_cli_parse_load_subcommand() {
let cli =
Cli::try_parse_from(["rustdupe", "load", "session.json", "--output", "json"]).unwrap();
match cli.command {
Commands::Load(args) => {
assert_eq!(args.path, PathBuf::from("session.json"));
assert_eq!(args.output, OutputFormat::Json);
}
_ => panic!("Expected Load command"),
}
}
#[test]
fn test_cli_parse_cache_flags() {
let cli = Cli::try_parse_from([
"rustdupe",
"scan",
"/path",
"--cache",
"mycache.db",
"--clear-cache",
])
.unwrap();
match cli.command {
Commands::Scan(args) => {
assert_eq!(args.cache, Some(PathBuf::from("mycache.db")));
assert!(args.clear_cache);
assert!(!args.no_cache);
}
_ => panic!("Expected Scan command"),
}
let cli = Cli::try_parse_from(["rustdupe", "scan", "/path", "--no-cache"]).unwrap();
match cli.command {
Commands::Scan(args) => {
assert!(args.no_cache);
assert!(args.cache.is_none());
}
_ => panic!("Expected Scan command"),
}
}
}