#![doc(html_root_url = "https://docs.rs/duckduckgo-search-cli/0.6.4")]
#![doc(html_playground_url = "https://play.rust-lang.org")]
#![warn(missing_docs)]
#![warn(missing_debug_implementations)]
#![warn(rustdoc::broken_intra_doc_links)]
#![warn(rustdoc::private_intra_doc_links)]
#![warn(rustdoc::missing_crate_level_docs)]
#![warn(rustdoc::invalid_codeblock_attributes)]
#![warn(rustdoc::invalid_html_tags)]
#![warn(rustdoc::bare_urls)]
#![warn(rustdoc::redundant_explicit_links)]
#![warn(clippy::undocumented_unsafe_blocks)]
#![warn(clippy::multiple_unsafe_ops_per_block)]
#![warn(unsafe_op_in_unsafe_fn)]
pub mod cli;
pub mod config_init;
pub mod content;
pub mod content_fetch;
pub mod error;
pub mod extraction;
pub mod http;
pub mod identity;
pub mod output;
pub mod parallel;
pub mod paths;
pub mod pipeline;
pub mod platform;
pub mod search;
pub mod selectors;
pub mod signals;
pub mod types;
#[cfg_attr(docsrs, doc(cfg(feature = "chrome")))]
#[cfg(feature = "chrome")]
pub mod browser;
use crate::cli::{
CliArgs, CliEndpoint, CliSafeSearch, CliTimeFilter, CompletionsArgs, InitConfigArgs, RootArgs,
Subcommand,
};
use crate::error::exit_codes;
use crate::error::CliError;
use crate::types::{Config, Endpoint, OutputFormat, SafeSearch, TimeFilter};
use clap::Parser;
use tokio_util::sync::CancellationToken;
use tracing_subscriber::{fmt, EnvFilter};
pub async fn run(cancellation: CancellationToken) -> i32 {
let root = RootArgs::parse();
let args = match root.subcomando {
Some(Subcommand::InitConfig(args)) => {
return execute_init_config(args);
}
Some(Subcommand::Completions(args)) => {
return execute_completions(args);
}
Some(Subcommand::Buscar(args)) => *args,
None => root.buscar,
};
let disable_colors = platform::should_disable_color(args.no_color);
initialize_logging(args.verbose, args.quiet, disable_colors);
platform::init();
if args.probe {
return execute_probe(&args).await;
}
let config = match build_config(&args) {
Ok(c) => c,
Err(err) => {
tracing::error!(?err, "Invalid configuration");
output::emit_stderr(&format!("Configuration error: {err:#}"));
return exit_codes::INVALID_CONFIG;
}
};
let format = config.format;
let output_file = config.output_file.clone();
let global_timeout = std::time::Duration::from_secs(config.global_timeout_seconds);
let internal_cancellation = cancellation.clone();
let pipeline_future = pipeline::execute_pipeline(config, internal_cancellation);
let pipeline_result = match tokio::time::timeout(global_timeout, pipeline_future).await {
Ok(result) => result,
Err(_elapsed) => {
cancellation.cancel();
tracing::error!(
seconds = global_timeout.as_secs(),
"global timeout exceeded — execution aborted"
);
output::emit_stderr(&format!(
"Error: global timeout of {}s exceeded",
global_timeout.as_secs()
));
return exit_codes::GLOBAL_TIMEOUT;
}
};
match pipeline_result {
Ok(output) => {
let total = output.total_results();
let exit_code = if total == 0 {
tracing::warn!("Zero results returned across all queries");
exit_codes::ZERO_RESULTS
} else {
exit_codes::SUCCESS
};
if let Err(err) = output::emit_result(&output, format, output_file.as_deref()) {
if output::is_broken_pipe(&err) {
return exit_codes::SUCCESS;
}
tracing::error!(?err, "Failed to emit result");
output::emit_stderr(&format!("Error writing output: {err:#}"));
return exit_codes::GENERIC_ERROR;
}
exit_code
}
Err(err) => {
tracing::error!(?err, "Pipeline execution failed");
output::emit_stderr(&format!("Error: {err:#}"));
exit_codes::GENERIC_ERROR
}
}
}
fn execute_init_config(args: InitConfigArgs) -> i32 {
initialize_logging(false, false, false);
platform::init();
let report = match config_init::initialize_config(args.force, args.dry_run) {
Ok(r) => r,
Err(err) => {
tracing::error!(?err, "failed to initialize config");
output::emit_stderr(&format!("Error: {err:#}"));
return exit_codes::GENERIC_ERROR;
}
};
match serde_json::to_string_pretty(&report) {
Ok(json) => {
if let Err(err) = output::print_line_stdout(&json) {
if output::is_broken_pipe(&err) {
return exit_codes::SUCCESS;
}
tracing::error!(?err, "failed to emit report");
return exit_codes::GENERIC_ERROR;
}
}
Err(err) => {
tracing::error!(?err, "failed to serialize JSON report");
return exit_codes::GENERIC_ERROR;
}
}
let had_error = report.files.iter().any(|a| {
matches!(
a.action_taken,
crate::config_init::ConfigFileAction::Error { .. }
)
});
if had_error {
return exit_codes::GENERIC_ERROR;
}
exit_codes::SUCCESS
}
async fn execute_probe(args: &crate::cli::CliArgs) -> i32 {
use crate::error::exit_codes;
use std::time::Instant;
let endpoint = match args.endpoint {
crate::cli::CliEndpoint::Html => "html",
crate::cli::CliEndpoint::Lite => "lite",
};
let probe_url = if endpoint == "lite" {
crate::search::lite_base_url()
} else {
crate::search::html_base_url()
};
let ua = match args.seed {
Some(seed) => {
crate::http::select_profile_from_list_seeded(
&crate::http::load_user_agents(args.match_platform_ua),
Some(seed),
)
.user_agent
}
None => crate::http::select_user_agent(),
};
let client =
match crate::http::build_client(&ua, args.timeout_seconds, &args.language, &args.country) {
Ok(c) => c,
Err(err) => {
let payload = serde_json::json!({
"type": "probe",
"endpoint": endpoint,
"status": 0u16,
"latency_ms": 0u64,
"has_set_cookie": false,
"error": format!("client build failed: {err}"),
});
let _ = crate::output::print_line_stdout(&payload.to_string());
return exit_codes::GENERIC_ERROR;
}
};
let started = Instant::now();
let result = client.get(&probe_url).send().await;
let latency_ms = started.elapsed().as_millis().min(u128::from(u64::MAX)) as u64;
match result {
Ok(response) => {
let status = response.status().as_u16();
let has_set_cookie = response.headers().contains_key("set-cookie");
let payload = serde_json::json!({
"type": "probe",
"endpoint": endpoint,
"status": status,
"latency_ms": latency_ms,
"has_set_cookie": has_set_cookie,
"url": probe_url,
});
if let Err(err) = crate::output::print_line_stdout(&payload.to_string()) {
if !crate::output::is_broken_pipe(&err) {
tracing::error!(?err, "failed to emit probe report");
return exit_codes::GENERIC_ERROR;
}
}
exit_codes::SUCCESS
}
Err(err) => {
let payload = serde_json::json!({
"type": "probe",
"endpoint": endpoint,
"status": 0u16,
"latency_ms": latency_ms,
"has_set_cookie": false,
"url": probe_url,
"error": format!("network error: {err}"),
});
let _ = crate::output::print_line_stdout(&payload.to_string());
exit_codes::GENERIC_ERROR
}
}
}
fn execute_completions(args: CompletionsArgs) -> i32 {
use clap::CommandFactory;
let mut cmd = RootArgs::command();
clap_complete::generate(
args.shell,
&mut cmd,
"duckduckgo-search-cli",
&mut std::io::stdout(),
);
exit_codes::SUCCESS
}
fn initialize_logging(verbose: bool, quiet: bool, disable_colors: bool) {
let filter = if quiet {
EnvFilter::new("error")
} else if verbose {
EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("debug"))
} else {
EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info"))
};
let subscriber = fmt()
.with_env_filter(filter)
.with_writer(std::io::stderr)
.with_target(false)
.with_ansi(!disable_colors)
.compact()
.finish();
let _ = tracing::subscriber::set_global_default(subscriber);
}
fn build_config(args: &CliArgs) -> Result<Config, CliError> {
let format =
OutputFormat::from_str_value(&args.format).ok_or_else(|| CliError::InvalidConfig {
message: format!("unknown format: {:?}", args.format),
})?;
args.validate_parallelism()
.map_err(|e| CliError::InvalidConfig { message: e })?;
args.validate_pages()
.map_err(|e| CliError::InvalidConfig { message: e })?;
args.validate_retries()
.map_err(|e| CliError::InvalidConfig { message: e })?;
args.validate_max_content_length()
.map_err(|e| CliError::InvalidConfig { message: e })?;
args.validate_global_timeout()
.map_err(|e| CliError::InvalidConfig { message: e })?;
args.validate_proxy()
.map_err(|e| CliError::InvalidConfig { message: e })?;
args.validate_per_host_limit()
.map_err(|e| CliError::InvalidConfig { message: e })?;
args.validate_timeout_seconds()
.map_err(|e| CliError::InvalidConfig { message: e })?;
if let Some(path) = &args.output_file {
crate::paths::validate_output_path(path)?;
}
let file_queries = match &args.queries_file {
Some(path) => pipeline::read_queries_from_file(path)?,
None => Vec::new(),
};
let queries_stdin = if args.queries.is_empty() && args.queries_file.is_none() {
pipeline::read_queries_from_stdin_if_pipe()?
} else {
Vec::new()
};
let queries =
pipeline::combine_and_dedup_queries(args.queries.clone(), file_queries, queries_stdin);
if queries.is_empty() {
return Err(CliError::InvalidConfig {
message:
"no query provided (positional arguments, --queries-file, and stdin are all empty)"
.into(),
});
}
let first_query = queries[0].clone();
let ua_list = http::load_user_agents(args.match_platform_ua);
let browser_profile = http::select_profile_from_list_seeded(&ua_list, args.seed);
let user_agent = browser_profile.user_agent.clone();
let selectors = if let Some(ref dir) = args.config_path {
selectors::load_selectors_from_dir(dir)
} else {
selectors::load_selectors()
};
let effective_num = args.num_results.unwrap_or(15);
let effective_pages = if args.pages > 1 {
args.pages
} else if effective_num > 10 {
effective_num.div_ceil(10).min(5)
} else {
1
};
Ok(Config {
query: first_query,
queries,
num_results: Some(effective_num),
format,
timeout_seconds: args.timeout_seconds,
language: args.language.clone(),
country: args.country.clone(),
verbose: args.verbose,
quiet: args.quiet,
user_agent,
browser_profile,
parallelism: args.parallelism,
pages: effective_pages,
retries: args.retries,
endpoint: convert_endpoint(args.endpoint),
time_filter: args.time_filter.map(convert_time_filter),
safe_search: convert_safe_search(args.safe_search),
stream_mode: args.stream_mode,
output_file: args.output_file.clone(),
fetch_content: args.fetch_content,
max_content_length: args.max_content_length,
proxy: args.proxy.clone(),
no_proxy: args.no_proxy,
global_timeout_seconds: args.global_timeout_seconds,
match_platform_ua: args.match_platform_ua,
per_host_limit: args.per_host_limit as usize,
chrome_path: args.chrome_path.clone(),
selectors,
})
}
fn convert_endpoint(source: CliEndpoint) -> Endpoint {
match source {
CliEndpoint::Html => Endpoint::Html,
CliEndpoint::Lite => Endpoint::Lite,
}
}
fn convert_time_filter(source: CliTimeFilter) -> TimeFilter {
match source {
CliTimeFilter::D => TimeFilter::Day,
CliTimeFilter::W => TimeFilter::Week,
CliTimeFilter::M => TimeFilter::Month,
CliTimeFilter::Y => TimeFilter::Year,
}
}
fn convert_safe_search(source: CliSafeSearch) -> SafeSearch {
match source {
CliSafeSearch::Off => SafeSearch::Off,
CliSafeSearch::Moderate => SafeSearch::Moderate,
CliSafeSearch::On => SafeSearch::Strict,
}
}
#[cfg(test)]
mod tests {
use super::*;
fn base_args() -> CliArgs {
CliArgs {
queries: vec!["rust async".to_string()],
num_results: Some(5),
format: "json".to_string(),
output_file: None,
timeout_seconds: 15,
language: "pt".to_string(),
country: "br".to_string(),
parallelism: 5,
queries_file: None,
pages: 1,
retries: 2,
endpoint: CliEndpoint::Html,
time_filter: None,
safe_search: CliSafeSearch::Moderate,
probe: false,
identity_profile: crate::cli::CliIdentityProfile::Auto,
stream_mode: false,
verbose: false,
quiet: false,
fetch_content: false,
max_content_length: crate::cli::DEFAULT_MAX_CONTENT_LENGTH,
proxy: None,
no_proxy: false,
global_timeout_seconds: crate::cli::DEFAULT_GLOBAL_TIMEOUT,
match_platform_ua: false,
per_host_limit: crate::cli::DEFAULT_PER_HOST_LIMIT,
chrome_path: None,
no_color: false,
seed: None,
config_path: None,
}
}
#[test]
fn build_config_with_valid_args() {
let args = base_args();
let cfg = build_config(&args).expect("should build config");
assert_eq!(cfg.query, "rust async");
assert_eq!(cfg.queries, vec!["rust async".to_string()]);
assert_eq!(cfg.format, OutputFormat::Json);
assert_eq!(cfg.num_results, Some(5));
assert_eq!(cfg.parallelism, 5);
assert_eq!(cfg.pages, 1);
assert!(!cfg.stream_mode);
}
#[test]
fn build_config_rejects_all_empty_queries() {
let mut args = base_args();
args.queries = vec![" ".to_string(), "".to_string()];
let result = build_config(&args);
assert!(result.is_err());
}
#[test]
fn build_config_rejects_unknown_format() {
let mut args = base_args();
args.format = "xml".to_string();
assert!(build_config(&args).is_err());
}
#[test]
fn build_config_rejects_zero_parallelism() {
let mut args = base_args();
args.parallelism = 0;
assert!(build_config(&args).is_err());
}
#[test]
fn build_config_rejects_parallelism_above_max() {
let mut args = base_args();
args.parallelism = 50;
assert!(build_config(&args).is_err());
}
#[test]
fn build_config_applies_default_num_15_when_omitted() {
let mut args = base_args();
args.num_results = None;
args.pages = 1;
let cfg = build_config(&args).expect("should build");
assert_eq!(cfg.num_results, Some(15), "default 15 quando None");
assert_eq!(cfg.pages, 2, "auto-eleva para ceil(15/10) = 2");
}
#[test]
fn build_config_respects_explicit_pages_above_1() {
let mut args = base_args();
args.num_results = Some(20);
args.pages = 3;
let cfg = build_config(&args).expect("should build");
assert_eq!(cfg.num_results, Some(20));
assert_eq!(cfg.pages, 3, "respeita --pages explícito do usuário");
}
#[test]
fn build_config_auto_paginates_when_num_above_10() {
let casos = [
(11u32, 2u32), (15, 2), (20, 2), (21, 3), (45, 5), (60, 5), ];
for (num, expected_pages) in casos {
let mut args = base_args();
args.num_results = Some(num);
args.pages = 1;
let cfg =
build_config(&args).unwrap_or_else(|e| panic!("should build for num={num}: {e}"));
assert_eq!(
cfg.pages, expected_pages,
"para num={num}, paginas deveria ser {expected_pages}"
);
}
}
#[test]
fn build_config_no_auto_paginate_when_num_10_or_less() {
for num in [1u32, 5, 10] {
let mut args = base_args();
args.num_results = Some(num);
args.pages = 1;
let cfg = build_config(&args).expect("should build");
assert_eq!(cfg.pages, 1, "num={num} não deveria auto-paginar");
}
}
#[test]
fn build_config_combines_multiple_positional_queries() {
let mut args = base_args();
args.queries = vec![
"alfa".to_string(),
"beta".to_string(),
"alfa".to_string(), "gama".to_string(),
];
let cfg = build_config(&args).expect("should build config");
assert_eq!(cfg.queries, vec!["alfa", "beta", "gama"]);
assert_eq!(cfg.query, "alfa");
}
}