1pub use webfetch_core::{compress, refs, tls};
11
12pub mod extract;
13pub mod types;
14
15use std::time::Duration;
16
17use reqwest::Client;
18
19use crate::compress::estimate_tokens;
20use types::{Reference, SearchOptions, SearchOutput, SearchResult};
21
22const USER_AGENT: &str =
23 "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0 Safari/537.36";
24const MAX_ATTEMPTS: u32 = 3;
25
26pub async fn fetch_ddg_lite(query: &str, options: &SearchOptions) -> anyhow::Result<String> {
29 let builder = Client::builder()
30 .timeout(Duration::from_secs(options.timeout_secs))
31 .gzip(true);
32 let client = options.tls.apply(builder)?.build()?;
35
36 let mut url = format!(
37 "https://lite.duckduckgo.com/lite/?q={}",
38 urlencoding::encode(query)
39 );
40 if let Some(safe) = options.safe_search {
42 url.push_str(if safe { "&kp=1" } else { "&kp=-1" });
43 }
44
45 let mut delay = Duration::from_millis(200);
46 for attempt_no in 1..=MAX_ATTEMPTS {
47 match attempt(&client, &url).await {
48 Ok(body) => return Ok(body),
49 Err((err, transient)) => {
50 if attempt_no == MAX_ATTEMPTS || !transient {
51 return Err(err);
52 }
53 tokio::time::sleep(delay).await;
54 delay *= 2;
55 }
56 }
57 }
58 unreachable!("loop returns on the final attempt")
59}
60
61async fn attempt(client: &Client, url: &str) -> Result<String, (anyhow::Error, bool)> {
63 let resp = match client
64 .get(url)
65 .header("User-Agent", USER_AGENT)
66 .send()
67 .await
68 {
69 Ok(r) => r,
70 Err(e) => {
71 let transient = e.is_timeout() || e.is_connect() || e.is_request();
72 return Err((e.into(), transient));
73 }
74 };
75 let status = resp.status();
76 let resp = match resp.error_for_status() {
77 Ok(r) => r,
78 Err(e) => {
79 let transient = status.is_server_error() || status.as_u16() == 429;
80 return Err((e.into(), transient));
81 }
82 };
83 match resp.text().await {
84 Ok(body) => Ok(body),
85 Err(e) => {
86 let transient = e.is_timeout();
87 Err((e.into(), transient))
88 }
89 }
90}
91
92pub fn build_refs(results: &[SearchResult]) -> Vec<Reference> {
94 results
95 .iter()
96 .map(|r| Reference {
97 index: r.ref_index,
98 url: r.url.clone(),
99 })
100 .collect()
101}
102
103pub fn format_results(results: &[SearchResult]) -> String {
106 results
107 .iter()
108 .map(|r| {
109 if r.snippet.is_empty() {
110 format!("{} [{}]", r.title, r.ref_index)
111 } else {
112 format!("{} [{}]\n{}", r.title, r.ref_index, r.snippet)
113 }
114 })
115 .collect::<Vec<_>>()
116 .join("\n\n")
117}
118
119pub fn render_references(refs: &[Reference]) -> String {
122 crate::refs::render_block(refs)
123}
124
125pub fn build_output(query: &str, html: &str, max_results: usize) -> SearchOutput {
127 let results = extract::parse_ddg_lite(html, max_results);
128 let references = build_refs(&results);
129
130 let body = format_results(&results);
131 let refs_block = render_references(&references);
132 let full = if refs_block.is_empty() {
133 body
134 } else {
135 format!("{body}\n\n{refs_block}")
136 };
137
138 SearchOutput {
139 query: query.to_string(),
140 token_estimate: estimate_tokens(&full),
141 result_count: results.len(),
142 references,
143 results,
144 }
145}
146
147pub async fn run_search(options: SearchOptions) -> anyhow::Result<SearchOutput> {
149 let html = fetch_ddg_lite(&options.query, &options).await?;
150 let max = options.max_results.unwrap_or(5);
151 Ok(build_output(&options.query, &html, max))
152}