use crate::http::BrowserProfile;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SearchResult {
#[serde(rename = "posicao")]
pub position: u32,
#[serde(rename = "titulo")]
pub title: String,
pub url: String,
#[serde(rename = "url_exibicao")]
#[serde(skip_serializing_if = "Option::is_none")]
pub display_url: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub snippet: Option<String>,
#[serde(rename = "titulo_original")]
#[serde(skip_serializing_if = "Option::is_none")]
pub original_title: Option<String>,
#[serde(rename = "conteudo")]
#[serde(skip_serializing_if = "Option::is_none")]
pub content: Option<String>,
#[serde(rename = "tamanho_conteudo")]
#[serde(skip_serializing_if = "Option::is_none")]
pub content_size: Option<u32>,
#[serde(rename = "metodo_extracao_conteudo")]
#[serde(skip_serializing_if = "Option::is_none")]
pub content_extraction_method: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SearchMetadata {
#[serde(rename = "tempo_execucao_ms")]
pub execution_time_ms: u64,
#[serde(rename = "hash_seletores")]
pub selectors_hash: String,
#[serde(rename = "retentativas")]
pub retries: u32,
#[serde(rename = "usou_endpoint_fallback")]
pub used_fallback_endpoint: bool,
#[serde(rename = "fetches_simultaneos")]
pub concurrent_fetches: u32,
#[serde(rename = "sucessos_fetch")]
pub fetch_successes: u32,
#[serde(rename = "falhas_fetch")]
pub fetch_failures: u32,
#[serde(rename = "usou_chrome")]
pub used_chrome: bool,
pub user_agent: String,
#[serde(rename = "identidade_usada")]
#[serde(skip_serializing_if = "Option::is_none")]
pub identity_used: Option<String>,
#[serde(rename = "nivel_cascata")]
#[serde(skip_serializing_if = "Option::is_none")]
pub cascade_level: Option<u32>,
#[serde(rename = "usou_proxy")]
pub used_proxy: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SearchOutput {
pub query: String,
#[serde(rename = "motor")]
pub engine: String,
pub endpoint: String,
pub timestamp: String,
#[serde(rename = "regiao")]
pub region: String,
#[serde(rename = "quantidade_resultados")]
pub result_count: u32,
#[serde(rename = "resultados")]
pub results: Vec<SearchResult>,
#[serde(rename = "paginas_buscadas")]
pub pages_fetched: u32,
#[serde(rename = "erro")]
#[serde(skip_serializing_if = "Option::is_none")]
pub error: Option<String>,
#[serde(rename = "mensagem")]
#[serde(skip_serializing_if = "Option::is_none")]
pub message: Option<String>,
#[serde(rename = "metadados")]
pub metadata: SearchMetadata,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MultiSearchOutput {
#[serde(rename = "quantidade_queries")]
pub query_count: u32,
pub timestamp: String,
#[serde(rename = "paralelismo")]
pub parallelism: u32,
#[serde(rename = "buscas")]
pub searches: Vec<SearchOutput>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
#[serde(default)]
pub struct SelectorConfig {
pub html_endpoint: HtmlSelectors,
#[serde(default)]
pub lite_endpoint: LiteSelectors,
#[serde(default)]
pub pagination: PaginationSelectors,
#[serde(default)]
pub related_searches: RelatedSelectors,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(default)]
pub struct HtmlSelectors {
pub results_container: String,
pub result_item: String,
pub title_and_url: String,
pub snippet: String,
pub display_url: String,
pub ads_filter: AdFilter,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(default)]
pub struct AdFilter {
pub ad_classes: Vec<String>,
pub ad_attributes: Vec<String>,
pub ad_url_patterns: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(default)]
pub struct LiteSelectors {
pub results_table: String,
pub result_link: String,
pub result_snippet: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(default)]
pub struct PaginationSelectors {
pub vqd_input: String,
pub s_input: String,
pub dc_input: String,
pub next_form: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(default)]
pub struct RelatedSelectors {
pub container: String,
pub links: String,
}
impl Default for HtmlSelectors {
fn default() -> Self {
Self {
results_container: "#links".to_string(),
result_item:
"#links .result:not(.result--ad), #links .results_links, div.result:not(.result--ad)"
.to_string(),
title_and_url: ".result__a, a.result__a, .result__title a".to_string(),
snippet: ".result__snippet, a.result__snippet".to_string(),
display_url: ".result__url, span.result__url".to_string(),
ads_filter: AdFilter::default(),
}
}
}
impl Default for AdFilter {
fn default() -> Self {
Self {
ad_classes: vec![".result--ad".to_string(), ".badge--ad".to_string()],
ad_attributes: vec!["data-nrn=ad".to_string()],
ad_url_patterns: vec!["duckduckgo.com/y.js".to_string()],
}
}
}
impl Default for LiteSelectors {
fn default() -> Self {
Self {
results_table: "table, body table".to_string(),
result_link: "a.result-link, td a[href]".to_string(),
result_snippet: "td.result-snippet, tr.result-snippet td".to_string(),
}
}
}
impl Default for PaginationSelectors {
fn default() -> Self {
Self {
vqd_input: "input[name='vqd'], input[type='hidden'][name='vqd']".to_string(),
s_input: "input[name='s']".to_string(),
dc_input: "input[name='dc']".to_string(),
next_form: "form.result--more__btn, form[action='/html/']".to_string(),
}
}
}
impl Default for RelatedSelectors {
fn default() -> Self {
Self {
container: ".result--more__btn, .result--sep".to_string(),
links: "a".to_string(),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Endpoint {
Html,
Lite,
}
impl Endpoint {
pub fn as_str(&self) -> &'static str {
match self {
Endpoint::Html => "html",
Endpoint::Lite => "lite",
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TimeFilter {
Day,
Week,
Month,
Year,
}
impl TimeFilter {
pub fn as_param(&self) -> &'static str {
match self {
TimeFilter::Day => "d",
TimeFilter::Week => "w",
TimeFilter::Month => "m",
TimeFilter::Year => "y",
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SafeSearch {
Off,
Moderate,
Strict,
}
impl SafeSearch {
pub fn as_param(&self) -> Option<&'static str> {
match self {
SafeSearch::Off => Some("-1"),
SafeSearch::Moderate => None,
SafeSearch::Strict => Some("1"),
}
}
}
#[derive(Debug, Clone)]
pub struct Config {
pub query: String,
pub queries: Vec<String>,
pub num_results: Option<u32>,
pub format: OutputFormat,
pub timeout_seconds: u64,
pub language: String,
pub country: String,
pub verbose: bool,
pub quiet: bool,
pub user_agent: String,
pub browser_profile: BrowserProfile,
pub parallelism: u32,
pub pages: u32,
pub retries: u32,
pub endpoint: Endpoint,
pub time_filter: Option<TimeFilter>,
pub safe_search: SafeSearch,
pub stream_mode: bool,
pub output_file: Option<std::path::PathBuf>,
pub fetch_content: bool,
pub max_content_length: usize,
pub proxy: Option<String>,
pub no_proxy: bool,
pub global_timeout_seconds: u64,
pub match_platform_ua: bool,
pub per_host_limit: usize,
pub chrome_path: Option<std::path::PathBuf>,
pub selectors: std::sync::Arc<SelectorConfig>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum OutputFormat {
Json,
Text,
Markdown,
Auto,
}
impl OutputFormat {
pub fn from_str_value(value: &str) -> Option<Self> {
match value.to_ascii_lowercase().as_str() {
"json" => Some(Self::Json),
"text" => Some(Self::Text),
"markdown" | "md" => Some(Self::Markdown),
"auto" => Some(Self::Auto),
_ => None,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn selector_config_default_has_result_container() {
let cfg = SelectorConfig::default();
assert_eq!(cfg.html_endpoint.results_container, "#links");
assert!(cfg
.html_endpoint
.ads_filter
.ad_url_patterns
.contains(&"duckduckgo.com/y.js".to_string()));
}
#[test]
fn output_format_parses_valid_variants() {
assert_eq!(
OutputFormat::from_str_value("json"),
Some(OutputFormat::Json)
);
assert_eq!(
OutputFormat::from_str_value("TEXT"),
Some(OutputFormat::Text)
);
assert_eq!(
OutputFormat::from_str_value("markdown"),
Some(OutputFormat::Markdown)
);
assert_eq!(
OutputFormat::from_str_value("md"),
Some(OutputFormat::Markdown)
);
assert_eq!(
OutputFormat::from_str_value("Auto"),
Some(OutputFormat::Auto)
);
assert_eq!(OutputFormat::from_str_value("xml"), None);
}
#[test]
fn search_output_serializes_pt_json_keys() {
let output = SearchOutput {
query: "teste".to_string(),
engine: "duckduckgo".to_string(),
endpoint: "html".to_string(),
timestamp: "2026-04-14T00:00:00Z".to_string(),
region: "br-pt".to_string(),
result_count: 0,
results: vec![],
pages_fetched: 1,
error: None,
message: None,
metadata: SearchMetadata {
execution_time_ms: 0,
selectors_hash: "abc123".to_string(),
retries: 0,
used_fallback_endpoint: false,
concurrent_fetches: 0,
fetch_successes: 0,
fetch_failures: 0,
used_chrome: false,
user_agent: "Mozilla/5.0".to_string(),
used_proxy: false,
identity_used: None,
cascade_level: None,
},
};
let json = serde_json::to_string(&output).expect("serialization should work");
assert!(json.contains("\"query\""));
assert!(json.contains("\"quantidade_resultados\""));
assert!(json.contains("\"tempo_execucao_ms\""));
assert!(json.contains("\"resultados\""));
assert!(json.contains("\"metadados\""));
assert!(!json.contains("\"buscas_relacionadas\""));
assert!(!json.contains("\"results_count\""));
assert!(!json.contains("\"results\":"));
assert!(!json.contains("\"metadata\""));
assert!(!json.contains("\"related_searches\""));
}
#[test]
fn multi_search_output_serializes_pt_json_keys() {
let output = MultiSearchOutput {
query_count: 2,
timestamp: "2026-04-14T00:00:00Z".to_string(),
parallelism: 5,
searches: vec![],
};
let json = serde_json::to_string(&output).expect("serialization should work");
assert!(json.contains("\"quantidade_queries\":2"));
assert!(json.contains("\"paralelismo\":5"));
assert!(json.contains("\"buscas\":[]"));
assert!(!json.contains("\"queries_count\""));
assert!(!json.contains("\"parallel\""));
assert!(!json.contains("\"searches\""));
}
}