1pub use webfetch_core::{compress, refs};
11
12pub mod extract;
13pub mod types;
14
15use std::time::Duration;
16
17use reqwest::Client;
18
19use crate::compress::estimate_tokens;
20use types::{Reference, SearchOptions, SearchOutput, SearchResult};
21
22const USER_AGENT: &str =
23 "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0 Safari/537.36";
24const MAX_ATTEMPTS: u32 = 3;
25
26pub async fn fetch_ddg_lite(query: &str, options: &SearchOptions) -> anyhow::Result<String> {
29 let client = Client::builder()
30 .timeout(Duration::from_secs(options.timeout_secs))
31 .gzip(true)
32 .build()?;
33
34 let mut url = format!(
35 "https://lite.duckduckgo.com/lite/?q={}",
36 urlencoding::encode(query)
37 );
38 if let Some(safe) = options.safe_search {
40 url.push_str(if safe { "&kp=1" } else { "&kp=-1" });
41 }
42
43 let mut delay = Duration::from_millis(200);
44 for attempt_no in 1..=MAX_ATTEMPTS {
45 match attempt(&client, &url).await {
46 Ok(body) => return Ok(body),
47 Err((err, transient)) => {
48 if attempt_no == MAX_ATTEMPTS || !transient {
49 return Err(err);
50 }
51 tokio::time::sleep(delay).await;
52 delay *= 2;
53 }
54 }
55 }
56 unreachable!("loop returns on the final attempt")
57}
58
59async fn attempt(client: &Client, url: &str) -> Result<String, (anyhow::Error, bool)> {
61 let resp = match client
62 .get(url)
63 .header("User-Agent", USER_AGENT)
64 .send()
65 .await
66 {
67 Ok(r) => r,
68 Err(e) => {
69 let transient = e.is_timeout() || e.is_connect() || e.is_request();
70 return Err((e.into(), transient));
71 }
72 };
73 let status = resp.status();
74 let resp = match resp.error_for_status() {
75 Ok(r) => r,
76 Err(e) => {
77 let transient = status.is_server_error() || status.as_u16() == 429;
78 return Err((e.into(), transient));
79 }
80 };
81 match resp.text().await {
82 Ok(body) => Ok(body),
83 Err(e) => {
84 let transient = e.is_timeout();
85 Err((e.into(), transient))
86 }
87 }
88}
89
90pub fn build_refs(results: &[SearchResult]) -> Vec<Reference> {
92 results
93 .iter()
94 .map(|r| Reference {
95 index: r.ref_index,
96 url: r.url.clone(),
97 })
98 .collect()
99}
100
101pub fn format_results(results: &[SearchResult]) -> String {
104 results
105 .iter()
106 .map(|r| {
107 if r.snippet.is_empty() {
108 format!("{} [{}]", r.title, r.ref_index)
109 } else {
110 format!("{} [{}]\n{}", r.title, r.ref_index, r.snippet)
111 }
112 })
113 .collect::<Vec<_>>()
114 .join("\n\n")
115}
116
117pub fn render_references(refs: &[Reference]) -> String {
120 crate::refs::render_block(refs)
121}
122
123pub fn build_output(query: &str, html: &str, max_results: usize) -> SearchOutput {
125 let results = extract::parse_ddg_lite(html, max_results);
126 let references = build_refs(&results);
127
128 let body = format_results(&results);
129 let refs_block = render_references(&references);
130 let full = if refs_block.is_empty() {
131 body
132 } else {
133 format!("{body}\n\n{refs_block}")
134 };
135
136 SearchOutput {
137 query: query.to_string(),
138 token_estimate: estimate_tokens(&full),
139 result_count: results.len(),
140 references,
141 results,
142 }
143}
144
145pub async fn run_search(options: SearchOptions) -> anyhow::Result<SearchOutput> {
147 let html = fetch_ddg_lite(&options.query, &options).await?;
148 let max = options.max_results.unwrap_or(5);
149 Ok(build_output(&options.query, &html, max))
150}