1use std::collections::HashMap;
18use std::time::Duration;
19
20use serde_json::Value;
21use tracing::{debug, error, warn};
22use wreq_util::Emulation;
23
24use crate::config::{
25 FetcherConfig, FetcherConfigBuilder, FollowRedirects, Impersonate, ParserConfig,
26};
27use crate::error::{FetchError, Result};
28use crate::fingerprint::{default_user_agent, generate_headers};
29use crate::proxy::{Proxy, ProxyRotator};
30use crate::response::{Response, build_response_async};
31
32fn merge_headers(
33 base: &HashMap<String, String>,
34 req: &RequestConfig,
35 stealth: bool,
36 impersonate_enabled: bool,
37) -> HashMap<String, String> {
38 let mut headers = base.clone();
39
40 if let Some(req_headers) = &req.headers {
41 headers.extend(req_headers.iter().map(|(k, v)| (k.clone(), v.clone())));
42 }
43
44 let keys_lower: std::collections::HashSet<String> =
45 headers.keys().map(|k| k.to_lowercase()).collect();
46
47 match (stealth, impersonate_enabled) {
48 (true, _) => {
49 if !keys_lower.contains("referer") {
50 headers.insert("referer".into(), "https://www.google.com/".into());
51 }
52 if !impersonate_enabled {
53 generate_headers(false)
54 .into_iter()
55 .filter(|(k, _)| !keys_lower.contains(&k.to_lowercase()))
56 .for_each(|(k, v)| {
57 headers.insert(k, v);
58 });
59 }
60 }
61 (false, false) if !keys_lower.contains("user-agent") => {
62 headers.insert("User-Agent".into(), default_user_agent());
63 }
64 _ => {}
65 }
66
67 headers
68}
69
70fn resolve_emulation(name: &str) -> Option<Emulation> {
75 match name.to_lowercase().as_str() {
76 "chrome" | "chrome145" => Some(Emulation::Chrome145),
77 "chrome100" => Some(Emulation::Chrome100),
78 "chrome120" => Some(Emulation::Chrome120),
79 "chrome124" => Some(Emulation::Chrome124),
80 "chrome131" => Some(Emulation::Chrome131),
81 "chrome136" => Some(Emulation::Chrome136),
82 "chrome140" => Some(Emulation::Chrome140),
83 "chrome142" => Some(Emulation::Chrome142),
84 "chrome143" => Some(Emulation::Chrome143),
85 "chrome144" => Some(Emulation::Chrome144),
86 "edge" | "edge145" => Some(Emulation::Edge145),
87 "edge140" => Some(Emulation::Edge140),
88 "edge134" => Some(Emulation::Edge134),
89 "safari" | "safari26" => Some(Emulation::Safari26),
90 "safari18" => Some(Emulation::Safari18_5),
91 "firefox" | "firefox135" => Some(Emulation::Firefox135),
92 "firefox133" => Some(Emulation::Firefox133),
93 "firefox128" => Some(Emulation::Firefox128),
94 _ => None,
95 }
96}
97
98#[derive(Debug, Default)]
110pub struct RequestConfig {
111 pub headers: Option<HashMap<String, String>>,
114 pub cookies: Option<HashMap<String, String>>,
117 pub params: Option<HashMap<String, String>>,
119 pub timeout_secs: Option<u64>,
122 pub follow_redirects: Option<FollowRedirects>,
124 pub max_redirects: Option<usize>,
126 pub retries: Option<u32>,
128 pub retry_delay_secs: Option<u64>,
130 pub proxy: Option<Proxy>,
133 pub verify: Option<bool>,
135 pub impersonate: Option<Impersonate>,
137 pub stealthy_headers: Option<bool>,
139 pub data: Option<Vec<u8>>,
142 pub json: Option<Value>,
145 pub auth: Option<(String, String)>,
147}
148
149pub struct Fetcher {
162 config: FetcherConfig,
163 proxy_rotator: Option<ProxyRotator>,
164 parser_config: ParserConfig,
165}
166
167impl Fetcher {
168 pub fn new() -> Self {
171 Self {
172 config: FetcherConfig::default(),
173 proxy_rotator: None,
174 parser_config: ParserConfig::default(),
175 }
176 }
177
178 pub fn with_config(config: FetcherConfig) -> Self {
181 Self {
182 config,
183 proxy_rotator: None,
184 parser_config: ParserConfig::default(),
185 }
186 }
187
188 pub fn builder() -> FetcherConfigBuilder {
191 FetcherConfigBuilder::new()
192 }
193
194 pub fn from_builder(builder: FetcherConfigBuilder) -> Result<Self> {
198 let (config, rotator) = builder.build()?;
199 Ok(Self {
200 config,
201 proxy_rotator: rotator,
202 parser_config: ParserConfig::default(),
203 })
204 }
205
206 pub fn set_proxy_rotator(&mut self, rotator: ProxyRotator) {
209 self.proxy_rotator = Some(rotator);
210 }
211
212 pub fn set_parser_config(&mut self, parser_config: ParserConfig) {
215 self.parser_config = parser_config;
216 }
217
218 pub fn config(&self) -> &FetcherConfig {
221 &self.config
222 }
223
224 pub async fn get(&self, url: &str, req: Option<RequestConfig>) -> Result<Response> {
228 self.request("GET", url, req.unwrap_or_default()).await
229 }
230
231 pub async fn post(&self, url: &str, req: Option<RequestConfig>) -> Result<Response> {
234 self.request("POST", url, req.unwrap_or_default()).await
235 }
236
237 pub async fn put(&self, url: &str, req: Option<RequestConfig>) -> Result<Response> {
240 self.request("PUT", url, req.unwrap_or_default()).await
241 }
242
243 pub async fn delete(&self, url: &str, req: Option<RequestConfig>) -> Result<Response> {
246 self.request("DELETE", url, req.unwrap_or_default()).await
247 }
248
249 async fn request(&self, method: &str, url: &str, req: RequestConfig) -> Result<Response> {
250 let max_retries = req.retries.unwrap_or(self.config.retries);
251 let retry_delay = req.retry_delay_secs.unwrap_or(self.config.retry_delay_secs);
252 let static_proxy = req.proxy.clone();
253
254 let mut last_error: Option<FetchError> = None;
255
256 for attempt in 0..max_retries {
257 let proxy = match (&self.proxy_rotator, &static_proxy) {
258 (Some(rotator), None) => Some(rotator.get_proxy()),
259 _ => static_proxy.clone().or_else(|| self.config.proxy.clone()),
260 };
261
262 match self
263 .execute_request(method, url, &req, proxy.as_ref())
264 .await
265 {
266 Ok(response) => return Ok(response),
267 Err(e) => {
268 match attempt < max_retries - 1 {
269 true => {
270 warn!(attempt = attempt + 1, error = %e, "request failed, retrying in {retry_delay}s");
271 tokio::time::sleep(Duration::from_secs(retry_delay)).await;
272 }
273 false => {
274 error!(attempts = max_retries, error = %e, "all retries exhausted");
275 }
276 }
277 last_error = Some(e);
278 }
279 }
280 }
281
282 Err(FetchError::MaxRetriesExceeded {
283 attempts: max_retries,
284 last_error: Box::new(last_error.unwrap_or(FetchError::Other("unknown error".into()))),
285 })
286 }
287
288 async fn execute_request(
289 &self,
290 method: &str,
291 url: &str,
292 req: &RequestConfig,
293 proxy: Option<&Proxy>,
294 ) -> Result<Response> {
295 let stealth = req.stealthy_headers.unwrap_or(self.config.stealthy_headers);
296 let impersonate = req.impersonate.as_ref().unwrap_or(&self.config.impersonate);
297 let impersonate_selected = impersonate.select();
298 let timeout = req.timeout_secs.unwrap_or(self.config.timeout_secs);
299 let follow = req.follow_redirects.unwrap_or(self.config.follow_redirects);
300 let max_redirects = req.max_redirects.unwrap_or(self.config.max_redirects);
301 let verify = req.verify.unwrap_or(self.config.verify);
302
303 let final_headers = merge_headers(
304 &self.config.headers,
305 req,
306 stealth,
307 impersonate_selected.is_some(),
308 );
309
310 let mut client_builder = wreq::Client::builder().timeout(Duration::from_secs(timeout));
312
313 if !verify {
314 client_builder = client_builder.cert_verification(false);
315 }
316
317 match follow {
318 FollowRedirects::None => {
319 client_builder = client_builder.redirect(wreq::redirect::Policy::none());
320 }
321 FollowRedirects::All | FollowRedirects::Safe => {
322 client_builder =
323 client_builder.redirect(wreq::redirect::Policy::limited(max_redirects));
324 }
325 }
326
327 if let Some(p) = proxy {
328 let rp = wreq::Proxy::all(p.server())
329 .map_err(|e| FetchError::InvalidProxy(e.to_string()))?;
330 client_builder = client_builder.proxy(rp);
331 }
332
333 let client = client_builder.build()?;
334
335 let http_method: wreq::Method = method
337 .parse()
338 .map_err(|_| FetchError::Other(format!("invalid HTTP method: {method}")))?;
339
340 let mut full_url = url::Url::parse(url)?;
341 if let Some(params) = &req.params {
342 let mut pairs = full_url.query_pairs_mut();
343 params.iter().for_each(|(k, v)| {
344 pairs.append_pair(k, v);
345 });
346 }
347
348 let mut request_builder = client.request(http_method, full_url.as_str());
349
350 if let Some(browser_name) = impersonate_selected {
352 if let Some(emulation) = resolve_emulation(browser_name) {
353 request_builder = request_builder.emulation(emulation);
354 }
355 }
356
357 for (k, v) in &final_headers {
359 request_builder = request_builder.header(k.as_str(), v.as_str());
360 }
361
362 if let Some(cookies) = &req.cookies {
364 let cookie_str = cookies
365 .iter()
366 .map(|(k, v)| format!("{k}={v}"))
367 .collect::<Vec<_>>()
368 .join("; ");
369 request_builder = request_builder.header("cookie", cookie_str);
370 }
371
372 if let Some((user, pass)) = &req.auth {
374 request_builder = request_builder.basic_auth(user, Some(pass));
375 }
376
377 if let Some(json_body) = &req.json {
379 request_builder = request_builder
380 .header("content-type", "application/json")
381 .body(serde_json::to_vec(json_body)?);
382 } else if let Some(data) = &req.data {
383 request_builder = request_builder.body(data.clone());
384 }
385
386 let request_headers_map = final_headers;
387
388 debug!(method, url, "sending request via wreq");
389
390 let resp = request_builder.send().await?;
391
392 let mut meta = HashMap::new();
393 if let Some(p) = proxy {
394 meta.insert("proxy".to_owned(), Value::String(p.server().to_owned()));
395 }
396
397 build_response_async(resp, request_headers_map, method, meta).await
398 }
399}
400
401impl Default for Fetcher {
402 fn default() -> Self {
403 Self::new()
404 }
405}
406
407pub struct FetcherSession {
421 config: FetcherConfig,
422 proxy_rotator: Option<ProxyRotator>,
423 parser_config: ParserConfig,
424 client: Option<wreq::Client>,
425}
426
427impl FetcherSession {
428 pub fn new(config: FetcherConfig) -> Self {
431 Self {
432 config,
433 proxy_rotator: None,
434 parser_config: ParserConfig::default(),
435 client: None,
436 }
437 }
438
439 pub fn with_rotator(mut self, rotator: ProxyRotator) -> Self {
442 self.proxy_rotator = Some(rotator);
443 self
444 }
445
446 pub fn with_parser_config(mut self, parser_config: ParserConfig) -> Self {
449 self.parser_config = parser_config;
450 self
451 }
452
453 pub fn open(&mut self) -> Result<()> {
457 if self.client.is_some() {
458 return Err(FetchError::SessionAlreadyActive);
459 }
460
461 let mut builder = wreq::Client::builder()
462 .timeout(Duration::from_secs(self.config.timeout_secs))
463 .cookie_store(true);
464
465 if !self.config.verify {
466 builder = builder.cert_verification(false);
467 }
468
469 match self.config.follow_redirects {
470 FollowRedirects::None => {
471 builder = builder.redirect(wreq::redirect::Policy::none());
472 }
473 FollowRedirects::All | FollowRedirects::Safe => {
474 builder =
475 builder.redirect(wreq::redirect::Policy::limited(self.config.max_redirects));
476 }
477 }
478
479 if let Some(ref p) = self.config.proxy {
480 let rp = wreq::Proxy::all(p.server())
481 .map_err(|e| FetchError::InvalidProxy(e.to_string()))?;
482 builder = builder.proxy(rp);
483 }
484
485 self.client = Some(builder.build()?);
486 Ok(())
487 }
488
489 pub fn close(&mut self) {
492 self.client = None;
493 }
494
495 pub fn is_active(&self) -> bool {
498 self.client.is_some()
499 }
500
501 pub async fn get(&self, url: &str, req: Option<RequestConfig>) -> Result<Response> {
504 self.request("GET", url, req.unwrap_or_default()).await
505 }
506
507 pub async fn post(&self, url: &str, req: Option<RequestConfig>) -> Result<Response> {
510 self.request("POST", url, req.unwrap_or_default()).await
511 }
512
513 pub async fn put(&self, url: &str, req: Option<RequestConfig>) -> Result<Response> {
516 self.request("PUT", url, req.unwrap_or_default()).await
517 }
518
519 pub async fn delete(&self, url: &str, req: Option<RequestConfig>) -> Result<Response> {
521 self.request("DELETE", url, req.unwrap_or_default()).await
522 }
523
524 async fn request(&self, method: &str, url: &str, req: RequestConfig) -> Result<Response> {
525 let client = self.client.as_ref().ok_or(FetchError::SessionNotActive)?;
526
527 let stealth = req.stealthy_headers.unwrap_or(self.config.stealthy_headers);
528 let impersonate = req.impersonate.as_ref().unwrap_or(&self.config.impersonate);
529 let impersonate_selected = impersonate.select();
530
531 let final_headers = merge_headers(
532 &self.config.headers,
533 &req,
534 stealth,
535 impersonate_selected.is_some(),
536 );
537
538 let http_method: wreq::Method = method
539 .parse()
540 .map_err(|_| FetchError::Other(format!("invalid HTTP method: {method}")))?;
541
542 let mut full_url = url::Url::parse(url)?;
543 if let Some(params) = &req.params {
544 let mut pairs = full_url.query_pairs_mut();
545 params.iter().for_each(|(k, v)| {
546 pairs.append_pair(k, v);
547 });
548 }
549
550 let mut request_builder = client.request(http_method, full_url.as_str());
551
552 if let Some(browser_name) = impersonate_selected {
553 if let Some(emulation) = resolve_emulation(browser_name) {
554 request_builder = request_builder.emulation(emulation);
555 }
556 }
557
558 for (k, v) in &final_headers {
559 request_builder = request_builder.header(k.as_str(), v.as_str());
560 }
561
562 if let Some(cookies) = &req.cookies {
563 let cookie_str = cookies
564 .iter()
565 .map(|(k, v)| format!("{k}={v}"))
566 .collect::<Vec<_>>()
567 .join("; ");
568 request_builder = request_builder.header("cookie", cookie_str);
569 }
570
571 if let Some((user, pass)) = &req.auth {
572 request_builder = request_builder.basic_auth(user, Some(pass));
573 }
574
575 if let Some(json_body) = &req.json {
576 request_builder = request_builder
577 .header("content-type", "application/json")
578 .body(serde_json::to_vec(json_body)?);
579 } else if let Some(data) = &req.data {
580 request_builder = request_builder.body(data.clone());
581 }
582
583 debug!(method, url, "sending request via wreq session");
584
585 let resp = request_builder.send().await?;
586
587 build_response_async(resp, final_headers, method, HashMap::new()).await
588 }
589}
590
591impl Drop for FetcherSession {
592 fn drop(&mut self) {
593 self.close();
594 }
595}