use crate::BoxResult;
use crate::config::{Extractor as ExtractorConfig, Tool as ToolConfig};
use crate::ignore_link;
use crate::result;
use async_std::io::BufReadExt;
use clap::builder::ValueParser;
use clap::command;
use clap::value_parser;
use clap::{Arg, ArgAction, ArgMatches, Command, ValueHint};
use cli_utils::StreamIdent;
use cli_utils::path_buf::PathBuf;
use const_format::formatcp;
use futures::StreamExt;
use futures::pin_mut;
use std::collections::HashSet;
use std::sync::LazyLock;
use std::{env, io};
use wildmatch::WildMatch;
pub const A_N_MARKUP_FILES: &str = "markup_files";
pub const A_L_MARKUP_FILES_LIST: &str = "markup-files-list";
pub const A_S_MARKUP_FILES_LIST: char = 'I';
pub const A_L_VERSION: &str = "version";
pub const A_S_VERSION: char = 'V';
pub const A_S_QUIET: char = 'q';
pub const A_L_QUIET: &str = "quiet";
pub const A_L_NO_LINKS: &str = "no-links";
pub const A_S_NO_LINKS: char = 'n';
pub const A_L_ANCHORS: &str = "anchors";
pub const A_S_ANCHORS: char = 'a';
pub const A_L_IGNORE_LINKS: &str = "ignore-links";
pub const A_S_IGNORE_LINKS: char = 'i';
pub const A_L_LINKS_FILE: &str = "links-file";
pub const A_S_LINKS_FILE: char = 'P';
pub const A_L_RESULT_FORMAT: &str = "result-format";
pub const A_S_RESULT_FORMAT: char = 'F';
pub const A_L_RESULT_EXTENDED: &str = "result-extended";
pub const A_S_RESULT_EXTENDED: char = 'E';
pub const A_L_RESULT_FLUSH: &str = "result-flush";
pub const A_S_RESULT_FLUSH: char = 'f';
pub const HH_VERBOSITY: &str = "Verbosity";
pub const HH_ADVANCED: &str = "Advanced";
#[must_use]
pub fn arg_version() -> Arg {
Arg::new(A_L_VERSION)
.help_heading(HH_VERBOSITY)
.help("Print version information and exit")
.long_help(formatcp!(
"Print version information and exit. \
May be combined with -{A_S_QUIET},--{A_L_QUIET}, \
to really only output the version string."
))
.short(A_S_VERSION)
.long(A_L_VERSION)
.action(ArgAction::SetTrue)
}
#[must_use]
pub fn arg_quiet() -> Arg {
Arg::new(A_L_QUIET)
.help_heading(HH_VERBOSITY)
.help("Minimize or suppress output to stdout")
.long_help(
"Minimize or suppress output to stdout, \
and only shows log output on stderr.",
)
.action(ArgAction::SetTrue)
.short(A_S_QUIET)
.long(A_L_QUIET)
}
#[must_use]
pub fn arg_markup_files() -> Arg {
Arg::new(A_N_MARKUP_FILES)
.help("The markup files to extract links and/or anchors from")
.num_args(1..)
.value_parser(value_parser!(PathBuf))
.value_name("MARKUP_FILE")
.value_hint(ValueHint::DirPath)
.action(ArgAction::Append)
.required_unless_present_any([A_L_VERSION, A_L_MARKUP_FILES_LIST])
.conflicts_with(A_L_MARKUP_FILES_LIST)
}
#[must_use]
pub fn arg_markup_files_list() -> Arg {
Arg::new(A_L_MARKUP_FILES_LIST)
.help(
"A file containing a list of markup files \
to extract links and/or anchors from; one per line.",
)
.num_args(1)
.value_name("LIST_FILE")
.short(A_S_MARKUP_FILES_LIST)
.long(A_L_MARKUP_FILES_LIST)
.value_parser(value_parser!(PathBuf))
.action(ArgAction::Set)
.required_unless_present_any([A_L_VERSION, A_N_MARKUP_FILES])
.conflicts_with(A_N_MARKUP_FILES)
}
#[must_use]
pub fn arg_no_links() -> Arg {
Arg::new(A_L_NO_LINKS)
.help_heading(HH_ADVANCED)
.help("Do not extract links")
.long_help(
"Do not extract links. \
See -{A_S_ANCHORS},--{A_L_ANCHORS}.",
)
.short(A_S_NO_LINKS)
.long(A_L_NO_LINKS)
.requires(A_L_ANCHORS)
.action(ArgAction::SetTrue)
}
#[must_use]
pub fn arg_anchors() -> Arg {
Arg::new(A_L_ANCHORS)
.help_heading(HH_ADVANCED)
.help(
"Enable extract of anchors, \
and optionally the file to store them to",
)
.num_args(0..=1)
.value_name("FILE")
.short(A_S_ANCHORS)
.long(A_L_ANCHORS)
.value_parser(value_parser!(PathBuf))
.action(ArgAction::Set)
.default_value(cli_utils::STREAM_PATH_STR)
}
#[must_use]
pub fn arg_ignore_links() -> Arg {
Arg::new(A_L_IGNORE_LINKS)
.help_heading(HH_ADVANCED)
.help("List of links which will not be extracted; space separated")
.long_help(
"One or more wildcard-patterns/globs, matching links \
which will not be extracted; separated by white-space.",
)
.num_args(1..)
.value_parser(ValueParser::new(ignore_link::parse))
.value_name("GLOB")
.short(A_S_IGNORE_LINKS)
.long(A_L_IGNORE_LINKS)
.action(ArgAction::Append)
}
#[must_use]
pub fn arg_links_file() -> Arg {
Arg::new(A_L_LINKS_FILE)
.help_heading(HH_ADVANCED)
.help("Which file to store the extracted links to")
.num_args(1)
.value_hint(ValueHint::FilePath)
.value_name("FILE")
.value_parser(value_parser!(PathBuf))
.short(A_S_LINKS_FILE)
.long(A_L_LINKS_FILE)
.action(ArgAction::Set)
}
#[must_use]
pub fn arg_result_format() -> Arg {
Arg::new(A_L_RESULT_FORMAT)
.help("Data format of the output")
.num_args(1)
.value_parser(value_parser!(result::Type))
.value_name("FORMAT")
.short(A_S_RESULT_FORMAT)
.long(A_L_RESULT_FORMAT)
.action(ArgAction::Set)
}
#[must_use]
pub fn arg_result_extended() -> Arg {
Arg::new(A_L_RESULT_EXTENDED)
.help_heading(HH_ADVANCED)
.help("Output additional properties per link/anchor")
.short(A_S_RESULT_EXTENDED)
.long(A_L_RESULT_EXTENDED)
.action(ArgAction::SetTrue)
}
#[must_use]
pub fn arg_result_flush() -> Arg {
Arg::new(A_L_RESULT_FLUSH)
.help_heading(HH_ADVANCED)
.help("Flush output after each link/anchor.")
.long_help(
"Flush output after each link/anchor. \
Not all output formats support this.",
)
.short(A_S_RESULT_FLUSH)
.long(A_L_RESULT_FLUSH)
.action(ArgAction::SetTrue)
}
static ARGS: LazyLock<Vec<Arg>> = LazyLock::new(|| {
vec![
arg_version(),
arg_quiet(),
arg_markup_files(),
arg_markup_files_list(),
arg_no_links(),
arg_anchors(),
arg_ignore_links(),
arg_links_file(),
arg_result_format(),
arg_result_extended(),
arg_result_flush(),
]
});
pub fn find_duplicate_short_options(args: &[Arg]) -> Vec<char> {
let mut short_options: Vec<char> = args.iter().filter_map(clap::Arg::get_short).collect();
short_options.push('h');
short_options.sort_unstable();
let mut duplicate_short_options = HashSet::new();
let mut last_chr = '&';
for chr in &short_options {
if *chr == last_chr {
duplicate_short_options.insert(*chr);
}
last_chr = *chr;
}
duplicate_short_options.iter().copied().collect()
}
#[must_use]
pub fn arg_matcher(bin_name: &str, args: &[Arg]) -> Command {
let duplicate_short_options = find_duplicate_short_options(args);
assert!(
duplicate_short_options.is_empty(),
"Duplicate argument short options: {duplicate_short_options:?}",
);
command!()
.bin_name(bin_name)
.help_expected(true)
.disable_version_flag(true)
.args(args.iter())
}
async fn read_lines<P>(
filename: P,
) -> io::Result<async_std::io::Lines<async_std::io::BufReader<async_std::fs::File>>>
where
P: AsRef<async_std::path::Path>,
{
let file = async_std::fs::File::open(filename).await?;
Ok(async_std::io::BufReader::new(file).lines())
}
pub async fn markup_files(args: &mut ArgMatches) -> io::Result<Vec<PathBuf>> {
let mut files = vec![];
if let Some(arg_files) = args.remove_many::<PathBuf>(A_N_MARKUP_FILES) {
for arg_file in arg_files {
files.push(arg_file);
}
}
if let Some(list_file) = args.remove_one::<PathBuf>(A_L_MARKUP_FILES_LIST) {
let lines = read_lines(list_file).await?;
pin_mut!(lines);
while let Some(line) = lines.next().await {
files.push(line?.as_str().into());
}
}
if files.is_empty() {
return Err(io::Error::other("No markup files provided on the CLI"));
}
Ok(files)
}
pub fn ignore_links(args: &mut ArgMatches) -> Vec<WildMatch> {
args.remove_many::<WildMatch>(A_L_IGNORE_LINKS)
.unwrap_or_default()
.collect()
}
pub fn print_version_and_exit(version: &str, quiet: bool) {
#![allow(clippy::print_stdout)]
if !quiet {
print!("{} ", clap::crate_name!());
}
println!("{version}");
std::process::exit(0);
}
pub async fn parse_args() -> BoxResult<ToolConfig> {
let mut args = arg_matcher(clap::crate_name!(), &ARGS).get_matches();
let quiet = args.get_flag(A_L_QUIET);
let version = args.get_flag(A_L_VERSION);
if version {
print_version_and_exit(crate::VERSION, quiet);
}
let markup_files = markup_files(&mut args).await?;
let links = if args.get_flag(A_L_NO_LINKS) {
None
} else {
Some(StreamIdent::from((
args.remove_one::<PathBuf>(A_L_LINKS_FILE)
.map(Into::<async_std::path::PathBuf>::into)
.map(Into::<cli_utils::path_buf::PathBuf>::into),
false,
)))
};
let anchors = if args.get_raw(A_L_ANCHORS).is_none() {
None
} else {
Some(StreamIdent::from((
args.remove_one::<PathBuf>(A_L_ANCHORS)
.map(Into::<async_std::path::PathBuf>::into)
.map(Into::<cli_utils::path_buf::PathBuf>::into),
false,
)))
};
let ignore_links: Vec<WildMatch> = ignore_links(&mut args);
let result_format = args
.remove_one::<result::Type>(A_L_RESULT_FORMAT)
.unwrap_or_default();
let result_extended = args.get_flag(A_L_RESULT_EXTENDED);
let result_flush = args.get_flag(A_L_RESULT_FLUSH);
Ok(ToolConfig {
extractor: ExtractorConfig {
markup_files,
links: links.is_some(),
anchors: anchors.is_some(),
ignore_links,
},
links,
anchors,
result_format,
result_extended,
result_flush,
})
}