use clap::{Args, Parser, Subcommand as ClapSubcommand, ValueEnum};
use std::path::PathBuf;
pub use clap_complete::Shell as CompletionShell;
pub const DEFAULT_PER_HOST_LIMIT: u32 = 2;
pub const MAX_PER_HOST_LIMIT: u32 = 10;
pub const MAX_PARALLELISM: u32 = 20;
pub const DEFAULT_PARALLELISM: u32 = 5;
pub const MAX_PAGES: u32 = 5;
pub const MAX_RETRIES: u32 = 10;
pub const DEFAULT_MAX_CONTENT_LENGTH: usize = 10_000;
pub const MAX_CONTENT_LENGTH_LIMIT: usize = 100_000;
pub const DEFAULT_GLOBAL_TIMEOUT: u64 = 60;
pub const MAX_GLOBAL_TIMEOUT: u64 = 3600;
#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)]
pub enum CliEndpoint {
Html,
Lite,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)]
pub enum CliTimeFilter {
D,
W,
M,
Y,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)]
pub enum CliSafeSearch {
Off,
Moderate,
On,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)]
pub enum CliIdentityProfile {
Auto,
ChromeWin,
ChromeMac,
ChromeLinux,
EdgeWin,
FirefoxLinux,
SafariMac,
}
impl CliIdentityProfile {
pub fn family_and_platform(
self,
) -> Option<(crate::identity::BrowserFamily, crate::identity::Platform)> {
use crate::identity::{BrowserFamily, Platform};
match self {
Self::Auto => None,
Self::ChromeWin => Some((BrowserFamily::Chrome, Platform::Windows)),
Self::ChromeMac => Some((BrowserFamily::Chrome, Platform::MacOS)),
Self::ChromeLinux => Some((BrowserFamily::Chrome, Platform::Linux)),
Self::EdgeWin => Some((BrowserFamily::Edge, Platform::Windows)),
Self::FirefoxLinux => Some((BrowserFamily::Firefox, Platform::Linux)),
Self::SafariMac => Some((BrowserFamily::Safari, Platform::MacOS)),
}
}
}
#[derive(Debug, Clone, Parser)]
#[command(
name = "duckduckgo-search-cli",
version,
about = "DuckDuckGo search via pure HTTP, JSON output for LLMs.",
long_about = "Rust CLI that queries the static DuckDuckGo HTML endpoint \
(https://html.duckduckgo.com/html/) using pure HTTP requests, \
no Chrome, no paid APIs, and no cache. Returns structured organic \
results as JSON ready for LLM consumption.",
after_long_help = "\
EXIT CODES:\n\
0 Success — at least one query returned results\n\
1 Runtime error (network, parse, I/O)\n\
2 Invalid configuration (flag out of range, bad proxy)\n\
3 DuckDuckGo 202 block anomaly (soft-rate-limit)\n\
4 Global timeout exceeded\n\
5 Zero results across all queries\n\
\n\
PIPE USAGE:\n\
duckduckgo-search-cli -q -f json \"query\" | jaq '.resultados[].url'\n\
Logs go to stderr (-q suppresses them). JSON goes to stdout."
)]
pub struct RootArgs {
#[command(subcommand)]
pub subcomando: Option<Subcommand>,
#[command(flatten)]
pub buscar: CliArgs,
}
#[derive(Debug, Clone, ClapSubcommand)]
pub enum Subcommand {
Buscar(Box<CliArgs>),
InitConfig(InitConfigArgs),
Completions(CompletionsArgs),
}
#[derive(Debug, Clone, Args)]
pub struct CompletionsArgs {
#[arg(value_enum)]
pub shell: CompletionShell,
}
#[derive(Debug, Clone, Args)]
pub struct InitConfigArgs {
#[arg(long = "force")]
pub force: bool,
#[arg(long = "dry-run")]
pub dry_run: bool,
}
#[derive(Debug, Clone, Args)]
pub struct CliArgs {
#[arg(value_name = "QUERY")]
pub queries: Vec<String>,
#[arg(short = 'n', long = "num", value_name = "N")]
pub num_results: Option<u32>,
#[arg(
short = 'f',
long = "format",
value_name = "FMT",
default_value = "auto"
)]
pub format: String,
#[arg(short = 'o', long = "output", value_name = "PATH")]
pub output_file: Option<PathBuf>,
#[arg(
short = 't',
long = "timeout",
value_name = "SECS",
default_value_t = 15
)]
pub timeout_seconds: u64,
#[arg(short = 'l', long = "lang", value_name = "LANG", default_value = "pt")]
pub language: String,
#[arg(short = 'c', long = "country", value_name = "CC", default_value = "br")]
pub country: String,
#[arg(
short = 'p',
long = "parallel",
value_name = "N",
default_value_t = DEFAULT_PARALLELISM
)]
pub parallelism: u32,
#[arg(long = "queries-file", value_name = "PATH")]
pub queries_file: Option<PathBuf>,
#[arg(long = "pages", value_name = "N", default_value_t = 1)]
pub pages: u32,
#[arg(long = "retries", value_name = "N", default_value_t = 2)]
pub retries: u32,
#[arg(long = "endpoint", value_enum, default_value_t = CliEndpoint::Html)]
pub endpoint: CliEndpoint,
#[arg(long = "time-filter", value_enum)]
pub time_filter: Option<CliTimeFilter>,
#[arg(long = "safe-search", value_enum, default_value_t = CliSafeSearch::Moderate)]
pub safe_search: CliSafeSearch,
#[arg(long = "probe")]
pub probe: bool,
#[arg(long = "identity-profile", value_enum, default_value_t = CliIdentityProfile::Auto)]
pub identity_profile: CliIdentityProfile,
#[arg(long = "stream")]
pub stream_mode: bool,
#[arg(short = 'v', long = "verbose", conflicts_with = "quiet")]
pub verbose: bool,
#[arg(short = 'q', long = "quiet", conflicts_with = "verbose")]
pub quiet: bool,
#[arg(long = "fetch-content")]
pub fetch_content: bool,
#[arg(
long = "max-content-length",
value_name = "N",
default_value_t = DEFAULT_MAX_CONTENT_LENGTH
)]
pub max_content_length: usize,
#[arg(long = "proxy", value_name = "URL", conflicts_with = "no_proxy")]
pub proxy: Option<String>,
#[arg(long = "no-proxy", conflicts_with = "proxy")]
pub no_proxy: bool,
#[arg(
long = "global-timeout",
value_name = "SECS",
default_value_t = DEFAULT_GLOBAL_TIMEOUT
)]
pub global_timeout_seconds: u64,
#[arg(long = "match-platform-ua")]
pub match_platform_ua: bool,
#[arg(
long = "per-host-limit",
value_name = "N",
default_value_t = DEFAULT_PER_HOST_LIMIT
)]
pub per_host_limit: u32,
#[arg(long = "chrome-path", value_name = "PATH")]
pub chrome_path: Option<PathBuf>,
#[arg(long = "no-color")]
pub no_color: bool,
#[arg(long = "seed", value_name = "N")]
pub seed: Option<u64>,
#[arg(long = "config", value_name = "PATH")]
pub config_path: Option<PathBuf>,
}
impl CliArgs {
pub fn validate_parallelism(&self) -> Result<(), String> {
if self.parallelism == 0 {
return Err(format!(
"--parallel must be at least 1 (got {})",
self.parallelism
));
}
if self.parallelism > MAX_PARALLELISM {
return Err(format!(
"--parallel cannot exceed {} (got {})",
MAX_PARALLELISM, self.parallelism
));
}
Ok(())
}
pub fn validate_pages(&self) -> Result<(), String> {
if self.pages == 0 {
return Err(format!("--pages must be at least 1 (got {})", self.pages));
}
if self.pages > MAX_PAGES {
return Err(format!(
"--pages cannot exceed {} (got {})",
MAX_PAGES, self.pages
));
}
Ok(())
}
pub fn validate_max_content_length(&self) -> Result<(), String> {
if self.max_content_length == 0 {
return Err(format!(
"--max-content-length must be at least 1 (got {})",
self.max_content_length
));
}
if self.max_content_length > MAX_CONTENT_LENGTH_LIMIT {
return Err(format!(
"--max-content-length cannot exceed {} (got {})",
MAX_CONTENT_LENGTH_LIMIT, self.max_content_length
));
}
Ok(())
}
pub fn validate_global_timeout(&self) -> Result<(), String> {
if self.global_timeout_seconds == 0 {
return Err(format!(
"--global-timeout must be at least 1 (got {})",
self.global_timeout_seconds
));
}
if self.global_timeout_seconds > MAX_GLOBAL_TIMEOUT {
return Err(format!(
"--global-timeout cannot exceed {} seconds (got {})",
MAX_GLOBAL_TIMEOUT, self.global_timeout_seconds
));
}
Ok(())
}
pub fn validate_proxy(&self) -> Result<(), String> {
let Some(url) = self.proxy.as_deref() else {
return Ok(());
};
let parsed =
reqwest::Url::parse(url).map_err(|e| format!("invalid --proxy URL ({url:?}): {e}"))?;
match parsed.scheme() {
"http" | "https" | "socks5" | "socks5h" => Ok(()),
other => Err(format!(
"scheme {other:?} not supported in --proxy (use http/https/socks5)"
)),
}
}
pub fn validate_retries(&self) -> Result<(), String> {
if self.retries > MAX_RETRIES {
return Err(format!(
"--retries cannot exceed {} (got {})",
MAX_RETRIES, self.retries
));
}
Ok(())
}
pub fn validate_per_host_limit(&self) -> Result<(), String> {
if self.per_host_limit == 0 {
return Err(format!(
"--per-host-limit must be at least 1 (got {})",
self.per_host_limit
));
}
if self.per_host_limit > MAX_PER_HOST_LIMIT {
return Err(format!(
"--per-host-limit cannot exceed {} (got {})",
MAX_PER_HOST_LIMIT, self.per_host_limit
));
}
Ok(())
}
pub fn validate_timeout_seconds(&self) -> Result<(), String> {
if self.timeout_seconds == 0 {
return Err(format!(
"--timeout must be at least 1 (got {})",
self.timeout_seconds
));
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use clap::CommandFactory;
fn parse_buscar(argv: &[&str]) -> Result<CliArgs, clap::Error> {
let root = RootArgs::try_parse_from(argv)?;
match root.subcomando {
Some(Subcommand::Buscar(a)) => Ok(*a),
Some(Subcommand::InitConfig(_)) | Some(Subcommand::Completions(_)) => {
Err(clap::Error::raw(
clap::error::ErrorKind::InvalidSubcommand,
"subcomando nao-busca retornado em contexto que esperava busca",
))
}
None => Ok(root.buscar),
}
}
#[test]
fn cli_passes_schema_validation() {
RootArgs::command().debug_assert();
}
#[test]
fn parseia_query_simples() {
let args = parse_buscar(&["bin", "rust async"]).expect("should parse");
assert_eq!(args.queries, vec!["rust async".to_string()]);
assert_eq!(args.format, "auto");
assert!(args.output_file.is_none());
assert_eq!(args.timeout_seconds, 15);
assert_eq!(args.language, "pt");
assert_eq!(args.country, "br");
assert_eq!(args.parallelism, DEFAULT_PARALLELISM);
assert_eq!(args.pages, 1);
assert_eq!(args.retries, 2);
assert_eq!(args.endpoint, CliEndpoint::Html);
assert!(args.time_filter.is_none());
assert_eq!(args.safe_search, CliSafeSearch::Moderate);
assert!(!args.stream_mode);
assert!(args.queries_file.is_none());
assert!(!args.verbose);
assert!(!args.quiet);
assert!(!args.fetch_content);
assert_eq!(args.max_content_length, DEFAULT_MAX_CONTENT_LENGTH);
assert!(args.proxy.is_none());
assert!(!args.no_proxy);
assert_eq!(args.global_timeout_seconds, DEFAULT_GLOBAL_TIMEOUT);
assert!(!args.match_platform_ua);
}
#[test]
fn parseia_fetch_content_e_max_content_length() {
let args = parse_buscar(&[
"bin",
"--fetch-content",
"--max-content-length",
"500",
"rust",
])
.expect("should parse --fetch-content");
assert!(args.fetch_content);
assert_eq!(args.max_content_length, 500);
}
#[test]
fn parseia_proxy_e_no_proxy_mutuamente_exclusivos() {
let ok = parse_buscar(&[
"bin",
"--proxy",
"http://user:pass@proxy.local:8080",
"rust",
])
.expect("should parse --proxy");
assert_eq!(
ok.proxy.as_deref(),
Some("http://user:pass@proxy.local:8080")
);
assert!(!ok.no_proxy);
let no = parse_buscar(&["bin", "--no-proxy", "rust"]).expect("should parse --no-proxy");
assert!(no.no_proxy);
assert!(no.proxy.is_none());
let err = parse_buscar(&["bin", "--proxy", "http://x", "--no-proxy", "rust"]);
assert!(err.is_err(), "--proxy + --no-proxy deve conflitar");
}
#[test]
fn parseia_global_timeout() {
let args = parse_buscar(&["bin", "--global-timeout", "30", "rust"]).unwrap();
assert_eq!(args.global_timeout_seconds, 30);
}
#[test]
fn validate_max_content_length_range() {
let mut args = parse_buscar(&["bin", "q"]).unwrap();
args.max_content_length = 0;
assert!(args.validate_max_content_length().is_err());
args.max_content_length = MAX_CONTENT_LENGTH_LIMIT + 1;
assert!(args.validate_max_content_length().is_err());
args.max_content_length = 5000;
assert!(args.validate_max_content_length().is_ok());
}
#[test]
fn validate_global_timeout_range() {
let mut args = parse_buscar(&["bin", "q"]).unwrap();
args.global_timeout_seconds = 0;
assert!(args.validate_global_timeout().is_err());
args.global_timeout_seconds = MAX_GLOBAL_TIMEOUT + 1;
assert!(args.validate_global_timeout().is_err());
args.global_timeout_seconds = 120;
assert!(args.validate_global_timeout().is_ok());
}
#[test]
fn validate_proxy_accepts_supported_schemes() {
let mut args = parse_buscar(&["bin", "q"]).unwrap();
for ok in [
"http://proxy:8080",
"https://user:pass@proxy:8443",
"socks5://127.0.0.1:9050",
"socks5h://host:1080",
] {
args.proxy = Some(ok.to_string());
assert!(
args.validate_proxy().is_ok(),
"proxy {ok:?} deveria ser aceito"
);
}
args.proxy = Some("ftp://proxy".to_string());
assert!(args.validate_proxy().is_err());
args.proxy = Some("nao-eh-uma-url".to_string());
assert!(args.validate_proxy().is_err());
args.proxy = None;
assert!(args.validate_proxy().is_ok());
}
#[test]
fn parses_resilience_and_filter_flags() {
let args = parse_buscar(&[
"bin",
"--pages",
"3",
"--retries",
"5",
"--endpoint",
"lite",
"--time-filter",
"w",
"--safe-search",
"on",
"rust",
])
.expect("should parse resilience flags");
assert_eq!(args.pages, 3);
assert_eq!(args.retries, 5);
assert_eq!(args.endpoint, CliEndpoint::Lite);
assert_eq!(args.time_filter, Some(CliTimeFilter::W));
assert_eq!(args.safe_search, CliSafeSearch::On);
}
#[test]
fn validate_pages_accepts_range_and_rejects_invalid() {
let mut args = parse_buscar(&["bin", "qualquer"]).unwrap();
for v in [1u32, 2, 5] {
args.pages = v;
assert!(args.validate_pages().is_ok(), "pages {v}");
}
args.pages = 0;
assert!(args.validate_pages().is_err());
args.pages = 6;
assert!(args.validate_pages().is_err());
}
#[test]
fn validate_retries_rejects_above_max() {
let mut args = parse_buscar(&["bin", "qualquer"]).unwrap();
args.retries = 0;
assert!(args.validate_retries().is_ok());
args.retries = 10;
assert!(args.validate_retries().is_ok());
args.retries = 11;
assert!(args.validate_retries().is_err());
}
#[test]
fn parseia_multiplas_queries_posicionais() {
let args = parse_buscar(&["bin", "rust async", "tokio runtime", "async channels"])
.expect("should parse multiple queries");
assert_eq!(
args.queries,
vec![
"rust async".to_string(),
"tokio runtime".to_string(),
"async channels".to_string(),
]
);
}
#[test]
fn parseia_flags_customizadas() {
let args = parse_buscar(&[
"bin",
"--num",
"10",
"--format",
"json",
"--timeout",
"30",
"--lang",
"en",
"--country",
"us",
"--parallel",
"8",
"--verbose",
"teste de busca",
])
.expect("should parse with flags");
assert_eq!(args.queries, vec!["teste de busca".to_string()]);
assert_eq!(args.num_results, Some(10));
assert_eq!(args.timeout_seconds, 30);
assert_eq!(args.language, "en");
assert_eq!(args.country, "us");
assert_eq!(args.parallelism, 8);
assert!(args.verbose);
}
#[test]
fn parseia_flag_output_curta_e_longa() {
let args = parse_buscar(&["bin", "-o", "/tmp/saida.json", "q"]).expect("should parse -o");
assert_eq!(
args.output_file.as_deref(),
Some(std::path::Path::new("/tmp/saida.json"))
);
let args2 = parse_buscar(&["bin", "--output", "/tmp/x.md", "--format", "markdown", "q"])
.expect("should parse --output");
assert_eq!(
args2.output_file.as_deref(),
Some(std::path::Path::new("/tmp/x.md"))
);
assert_eq!(args2.format, "markdown");
}
#[test]
fn parseia_arquivo_queries_e_stream() {
let args = parse_buscar(&["bin", "--queries-file", "queries.txt", "--stream"])
.expect("should parse --queries-file and --stream");
assert!(args.stream_mode);
assert_eq!(
args.queries_file.as_deref(),
Some(std::path::Path::new("queries.txt"))
);
assert!(args.queries.is_empty());
}
#[test]
fn verbose_e_quiet_sao_mutuamente_exclusivos() {
let result = parse_buscar(&["bin", "--verbose", "--quiet", "query qualquer"]);
assert!(result.is_err(), "verbose + quiet deve falhar a validação");
}
#[test]
fn validate_parallelism_accepts_allowed_range() {
let mut args = parse_buscar(&["bin", "qualquer"]).unwrap();
for value in [1u32, 5, 10, MAX_PARALLELISM] {
args.parallelism = value;
assert!(
args.validate_parallelism().is_ok(),
"--parallel {value} deveria ser aceito"
);
}
}
#[test]
fn validate_parallelism_rejects_invalid_values() {
let mut args = parse_buscar(&["bin", "qualquer"]).unwrap();
args.parallelism = 0;
assert!(args.validate_parallelism().is_err());
args.parallelism = MAX_PARALLELISM + 1;
assert!(args.validate_parallelism().is_err());
args.parallelism = 100;
assert!(args.validate_parallelism().is_err());
}
#[test]
fn parses_init_config_subcommand_with_flags() {
let root = RootArgs::try_parse_from(["bin", "init-config", "--force", "--dry-run"])
.expect("should parse init-config");
let Some(Subcommand::InitConfig(args)) = root.subcomando else {
panic!("esperava subcomando InitConfig");
};
assert!(args.force);
assert!(args.dry_run);
}
#[test]
fn parses_init_config_subcommand_without_flags() {
let root = RootArgs::try_parse_from(["bin", "init-config"])
.expect("should parse init-config without flags");
let Some(Subcommand::InitConfig(args)) = root.subcomando else {
panic!("esperava subcomando InitConfig");
};
assert!(!args.force);
assert!(!args.dry_run);
}
#[test]
fn parseia_subcomando_buscar_explicito() {
let root = RootArgs::try_parse_from(["bin", "buscar", "rust"])
.expect("should parse buscar subcommand");
let Some(Subcommand::Buscar(args)) = root.subcomando else {
panic!("esperava subcomando Buscar");
};
assert_eq!(args.queries, vec!["rust".to_string()]);
}
#[test]
fn search_subcommand_stays_small_when_boxed() {
let enum_size = std::mem::size_of::<Subcommand>();
let init_size = std::mem::size_of::<InitConfigArgs>();
assert!(
enum_size <= init_size.max(std::mem::size_of::<usize>()) * 4,
"Subcommand grew unexpectedly: {enum_size} bytes"
);
}
#[test]
fn parse_without_subcommand_uses_search_flatten() {
let root = RootArgs::try_parse_from(["bin", "rust async"])
.expect("should parse without subcommand");
assert!(root.subcomando.is_none());
assert_eq!(root.buscar.queries, vec!["rust async".to_string()]);
}
#[test]
fn parseia_per_host_limit() {
let args = parse_buscar(&["bin", "--per-host-limit", "5", "q"]).unwrap();
assert_eq!(args.per_host_limit, 5);
let default = parse_buscar(&["bin", "q"]).unwrap();
assert_eq!(default.per_host_limit, DEFAULT_PER_HOST_LIMIT);
}
#[test]
fn validate_per_host_limit_range() {
let mut args = parse_buscar(&["bin", "q"]).unwrap();
args.per_host_limit = 0;
assert!(args.validate_per_host_limit().is_err());
args.per_host_limit = MAX_PER_HOST_LIMIT + 1;
assert!(args.validate_per_host_limit().is_err());
args.per_host_limit = 2;
assert!(args.validate_per_host_limit().is_ok());
}
#[test]
fn validate_timeout_seconds_rejects_zero() {
let mut args = parse_buscar(&["bin", "q"]).unwrap();
args.timeout_seconds = 0;
assert!(args.validate_timeout_seconds().is_err());
args.timeout_seconds = 1;
assert!(args.validate_timeout_seconds().is_ok());
args.timeout_seconds = 15;
assert!(args.validate_timeout_seconds().is_ok());
}
}