fif 0.6.0

A command-line tool for detecting and optionally correcting files with incorrect extensions.
Documentation
// SPDX-FileCopyrightText: 2021-2022 Lynnesbian
// SPDX-License-Identifier: GPL-3.0-or-later

//! Command line argument parsing logic and associated functions.

use std::collections::BTreeSet;
use std::path::PathBuf;

use cfg_if::cfg_if;
use clap::{ArgEnum, Parser};

use crate::utils::{CLAP_LONG_VERSION, CLAP_VERSION};
use crate::String as StringType;

cfg_if! {
	if #[cfg(windows)] {
		/// The default [`OutputFormat`] to use.
		const DEFAULT_FORMAT: &str = "powershell";
	} else {
		/// The default [`OutputFormat`] to use.
		const DEFAULT_FORMAT: &str = "sh";
	}
}

#[derive(ArgEnum, Eq, PartialEq, Debug, Copy, Clone)]
/// The format to use when running fif without the `--fix` flag. Specified at runtime with the `-o`/`--output-format`
/// flag.
pub enum OutputFormat {
	/// A Bourne shell compatible script.
	#[clap(alias = "shell", alias = "bash")]
	Sh,
	/// A PowerShell script.
	#[clap(alias = "powershell")]
	PowerShell,
	/// Plain text.
	Text,
	/// JSON.
	#[cfg(feature = "json")]
	Json,
}

#[derive(ArgEnum, Eq, PartialEq, Debug, Copy, Clone)]
/// Specifies under what conditions the user should be prompted when running fif in `--fix` mode. Defaults to `Error`.
/// Specified at runtime with the `-p`/`--prompt` flag.
pub enum Prompt {
	/// Never prompt.
	Never,
	/// Prompt only on errors, and on overwrites, if `--overwrite` is set.
	Error,
	/// Prompt for every rename.
	Always,
}

#[derive(Parser, Debug)]
#[allow(clippy::struct_excessive_bools)]
#[clap(
	version = CLAP_VERSION.as_str(),
	long_version = CLAP_LONG_VERSION.as_str(),
	author = option_env!("CARGO_PKG_AUTHORS").unwrap_or("Lynnesbian"),
	about = option_env!("CARGO_PKG_DESCRIPTION").unwrap_or("File Info Fixer"),
	before_help = "Copyright © 2021-2022 Lynnesbian under the GPL3 (or later) License.",
	after_long_help = "Copyright © 2021-2022 Lynnesbian\n\
	This program is free software: you can redistribute it and/or modify \
	it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 \
	of the License, or (at your option) any later version.",
	max_term_width = 120,
	setting(clap::AppSettings::DeriveDisplayOrder)
)]
/// [`Clap`]-derived struct used to parse command line arguments.
pub struct Parameters {
	/// Automatically rename files to use the correct extension, prompting the user for every rename.
	#[clap(long, help_heading = "RENAMING")]
	pub fix: bool,

	/// Requires --fix. Should fif prompt you `Never`, only on `Error`s and overwrites, or `Always`?
	#[clap(short = 'p', long, arg_enum, requires = "fix", help_heading = "RENAMING")]
	pub prompt: Option<Prompt>,

	/// Requires --fix. Allow overwriting files. Warning: When used in combination with `--prompt never`, fif will
	/// overwrite files without prompting!
	#[clap(long, requires = "fix", help_heading = "RENAMING")]
	pub overwrite: bool,

	// NOTE: it is impossible (as far as i can tell) to accept extensions with commas in their name. i don't know why
	// you would ever want a file named something like "example.xy,z", though... fif's -e and -x flags don't support this.
	/// Only examine files with these extensions.
	/// Multiple extensions can be specified by either using the flag multiple times (`-e jpg -e png -e gif`), or by
	/// separating them with commas (`-e jpg,png,gif`).
	#[clap(short, long, use_value_delimiter = true, require_value_delimiter = true, value_name = "ext", takes_value = true,
	validator = validate_exts, help_heading = "FILTERING")]
	pub exts: Option<Vec<StringType>>,

	/// Use these preset lists of extensions as the search filter (comma-separated list).
	/// `media` includes all extensions from the `audio`, `video`, and `images` sets, making `-E media` equivalent to
	/// `-E audio,video,images`.
	#[clap(
		short = 'E',
		long,
		arg_enum,
		use_value_delimiter = true,
		require_value_delimiter = true,
		value_name = "set",
		help_heading = "FILTERING"
	)]
	pub ext_set: Vec<ExtensionSet>,

	/// Don't scan files with these extensions.
	/// This option takes precedence over extensions specified with `-e` or `-E`.
	#[clap(short = 'x', long, use_value_delimiter = true, require_value_delimiter = true, value_name = "ext", validator =
	validate_exts, help_heading = "FILTERING")]
	pub exclude: Option<Vec<StringType>>,

	/// Exclude files using a preset list of extensions.
	/// This option takes precedence over extensions specified with `-e` or `-E`.
	#[clap(
		short = 'X',
		long,
		arg_enum,
		use_value_delimiter = true,
		require_value_delimiter = true,
		value_name = "set",
		help_heading = "FILTERING"
	)]
	pub exclude_set: Vec<ExtensionSet>,

	/// Don't skip hidden files and directories.
	/// Even if this flag is not present, fif will still recurse into a hidden root directory - for example, `fif
	/// ~/.hidden` will recurse into `~/.hidden` regardless of whether or not -s was passed as an argument.
	#[clap(short, long, help_heading = "FILTERING")]
	pub scan_hidden: bool,

	/// Scan files without extensions.
	/// By default, fif will ignore files without extensions - for example, a jpeg file named `photo` won't be considered
	/// misnamed. Supplying the -S flag will cause fif to recommend renaming this file to `photo.jpg`.
	#[clap(short = 'S', long, help_heading = "FILTERING")]
	pub scan_extensionless: bool,

	/// Follow symlinks.
	#[clap(short, long, help_heading = "FILTERING")]
	pub follow_symlinks: bool,

	/// Don't rename files with extensions unknown to fif.
	/// For example, with this option, fif will not rename "image.unknown" to "image.jpg"
	#[clap(short = 'I', long, help_heading = "FILTERING")]
	pub ignore_unknown_exts: bool,

	/// Output format to use.
	/// By default, fif will output a PowerShell script on Windows, and a Bourne Shell script on other platforms.
	#[clap(short, long, default_value = DEFAULT_FORMAT, arg_enum, value_name = "format", help_heading = "OUTPUT")]
	pub output_format: OutputFormat,

	/// Output verbosity. Each additional `-v` increases verbosity.
	/// Can be overridden by FIF_LOG or RUST_LOG.
	#[clap(short, long, parse(from_occurrences), group = "verbosity", help_heading = "OUTPUT")]
	pub verbose: u8,

	/// Output quietness. Each additional `-q` decreases verbosity.
	/// Can be overridden by FIF_LOG or RUST_LOG.
	#[clap(short, long, parse(from_occurrences), group = "verbosity", help_heading = "OUTPUT")]
	pub quiet: u8,

	/// Use canonical (absolute) paths in output.
	/// A canonical path is the "one true path" to a given file, and is always an absolute path. While a file may have
	/// many absolute paths (for example, on Windows, '\\?\C:\file.txt' and 'C:\file.txt' are both absolute paths to the
	/// same file), but only one canonical path. This does not effect logged output.
	#[clap(long, help_heading = "OUTPUT")]
	pub canonical_paths: bool,

	/// The directory to process.
	#[clap(name = "DIR", default_value = ".", parse(from_os_str))]
	pub dir: PathBuf,

	#[cfg(feature = "multi-threaded")]
	/// Number of jobs (threads) to use when scanning results.
	/// The default behaviour is to use one thread per CPU thread. This behaviour can be manually requested by setting
	/// `-j 0`. Using `-j 1` will disable multi-threading behaviour, as if you had compiled fif with the multi-threading
	/// feature disabled. Setting more jobs than you have CPU threads is not recommended.
	#[clap(short = 'j', long, default_value = "0", help_heading = "MISC")]
	pub jobs: usize,
}

/// Validation function for argument parsing that ensures passed-in extensions are lowercase, and that the user
/// didn't supply an empty list.
fn validate_exts(exts: &str) -> Result<(), String> {
	// TODO: i would much rather accept uppercase exts and convert them to lowercase than just rejecting lowercase exts...

	if exts.is_empty() {
		return Err(String::from("Cannot specify empty extensions"));
	}

	if exts.to_lowercase() != exts {
		return Err(String::from("Supplied extensions must be lowercase"));
	}
	Ok(())
}

/// Further options relating to scanning.
#[derive(Eq, PartialEq, Debug, Copy, Clone)]
#[allow(clippy::struct_excessive_bools)]
pub struct ScanOpts {
	/// Whether hidden files and directories should be scanned.
	pub hidden: bool,
	/// Whether files without extensions should be scanned.
	pub extensionless: bool,
	/// Should symlinks be followed?
	pub follow_symlinks: bool,
	/// Whether to rename files with unknown extensions.
	pub ignore_unknown_exts: bool,
}

impl Parameters {
	/// Returns an optional vec of the extensions to be scanned - i.e., extensions specified via the `-e` or `-E` flag,
	/// minus the extensions excluded with the `-x` flag; i.e., the difference between the included and excluded sets.
	pub fn extensions(&self) -> Option<BTreeSet<&str>> {
		if let Some(included) = self.included_extensions() {
			if let Some(excluded) = self.excluded_extensions() {
				// return included extensions without excluded extensions
				// ...maybe i should have called them "suffixes" instead of extensions...
				Some(included.into_iter().filter(|ext| !excluded.contains(ext)).collect())
			} else {
				// no extensions excluded - just return all included
				Some(included)
			}
		} else {
			// no extensions included - return none
			None
		}
	}

	/// Returns an optional vec of extensions that were specified by `-e` or `-E`. Note that this doesn't account for
	/// extensions excluded by the exclusion flags.
	pub fn included_extensions(&self) -> Option<BTreeSet<&str>> {
		let mut included = BTreeSet::new();
		if let Some(exts) = self.exts.as_ref() {
			// -e
			included.extend(exts.iter().map(|ext| ext.as_str()));
		}

		if !&self.ext_set.is_empty() {
			// -E
			included.extend(self.ext_set.iter().flat_map(|set| set.extensions()));
		}

		match included {
			x if x.is_empty() => None,
			x => Some(x),
		}
	}

	/// Returns an optional vec of extensions that were specified by `-x` or `-X`.
	pub fn excluded_extensions(&self) -> Option<BTreeSet<&str>> {
		let mut excluded = BTreeSet::new();
		if let Some(exclude) = self.exclude.as_ref() {
			// -x
			excluded.extend(exclude.iter().map(|ext| ext.as_str()));
		}

		if !&self.exclude_set.is_empty() {
			// -X
			excluded.extend(self.exclude_set.iter().flat_map(|set| set.extensions()));
		}

		// excluded doesn't sound like a word anymore
		// tongue twister: enter X-options' excellent extension exclusion
		match excluded {
			x if x.is_empty() => None,
			x => Some(x),
		}
	}

	/// Populates a [`ScanOpts`] struct with values from [`self`].
	pub const fn get_scan_opts(&self) -> ScanOpts {
		ScanOpts {
			hidden: self.scan_hidden,
			extensionless: self.scan_extensionless,
			follow_symlinks: self.follow_symlinks,
			ignore_unknown_exts: self.ignore_unknown_exts,
		}
	}

	/// Gets the verbosity set by the user via the `-v` and `-q` flags.
	pub const fn get_verbosity(&self) -> log::LevelFilter {
		use log::LevelFilter;

		match self.quiet {
			0 => {
				match self.verbose {
					0 => LevelFilter::Info,  // no verbosity flags specified
					1 => LevelFilter::Debug, // -v
					_ => LevelFilter::Trace, // -vv...
				}
			}
			1 => LevelFilter::Warn,  // -q
			2 => LevelFilter::Error, // -qq
			_ => LevelFilter::Off,   // -qqq...
		}
	}
}

/// Sets of extensions for use with [Parameter](crate::parameters::Parameters)'s `-E` flag.
#[derive(ArgEnum, Eq, PartialEq, Debug, Copy, Clone)]
pub enum ExtensionSet {
	/// Extensions used for image file formats, such as `png`, `jpeg`, `webp`, etc.
	Images,
	/// Extensions used for audio file formats, such as `mp3`, `ogg`, `flac`, etc.
	Audio,
	/// Extensions used for video file formats, such as `mkv`, `mp4`, `mov`, etc.
	#[clap(alias = "videos")]
	Video,
	/// Extensions used for media file formats. This acts as a combination of the [Images](ExtensionSet::Images),
	/// [Audio](ExtensionSet::Audio) and [Video](ExtensionSet::Video) variants.
	Media,
	/// Extensions used for document file formats, such as `pdf`, `odt`, `docx`, etc.
	Documents,
	/// Extensions used for text file formats, such as `txt`, `toml`, `html`, etc.
	Text,
	/// Extensions used for archive file formats, such as `zip`, `zst`, `gz`, etc.
	Archives,
	/// Extensions used for system file formats, such as `mbr`, `crash`, `dll`, etc.
	System,
}

impl ExtensionSet {
	/// The list of known extensions for this `ExtensionSet`.
	pub fn extensions(&self) -> Vec<&str> {
		match self {
			Self::Images => mime_guess::get_mime_extensions_str("image/*").unwrap().to_vec(),
			Self::Audio => mime_guess::get_mime_extensions_str("audio/*").unwrap().to_vec(),
			Self::Video => mime_guess::get_mime_extensions_str("video/*").unwrap().to_vec(),
			Self::Media => [Self::Images.extensions(), Self::Audio.extensions(), Self::Video.extensions()].concat(),
			Self::Documents => vec![
				"pdf", "doc", "docx", "ppt", "pptx", "xls", "xlsx", "csv", "tsv", "odt", "ods", "odp", "oda", "rtf", "ps",
				"pages", "key", "numbers",
			],
			Self::Text => [
				mime_guess::get_mime_extensions_str("text/*").unwrap(),
				&["js", "pl", "csh", "sh", "bash", "zsh", "fish", "bat", "php"],
			]
			.concat(),
			// many compressed file types follow the name scheme "application/x.+compressed.*" - maybe this can be used
			// somehow to extract extensions for compressed files from mime_guess?
			Self::Archives => vec![
				"zip", "tar", "gz", "zst", "xz", "rar", "7z", "bz", "bz2", "tgz", "rpa", "txz", "tz2", "sea", "sitx", "z",
				"cpio",
			],
			Self::System => vec![
				"com", "dll", "exe", "sys", "reg", "nt", "cpl", "msi", "efi", "bio", "rcv", "mbr", "sbf", "grub", "ko",
				"dylib", "pdb", "hdmp", "crash", "cab",
			],
		}
	}
}