Skip to main content

api_scanner/
http_client.rs

1// src/http_client.rs
2//
3// Thin wrapper around `reqwest::Client` with WAF evasion, size capping,
4// and convenience methods for the scanner modules.
5
6use crate::{
7    config::Config,
8    error::{CapturedError, ScannerError, ScannerResult},
9    waf::WafEvasion,
10};
11use dashmap::DashMap;
12use reqwest::{
13    header::{HeaderMap, HeaderName, HeaderValue, CONTENT_TYPE},
14    Client, Method, Response,
15};
16use serde::{Deserialize, Serialize};
17use std::{
18    collections::HashMap,
19    path::PathBuf,
20    sync::{
21        atomic::{AtomicBool, AtomicU64, Ordering},
22        Arc,
23    },
24    time::Duration,
25};
26use tokio::sync::{Mutex, OwnedSemaphorePermit, Semaphore};
27use tracing::{debug, info};
28use url::Url;
29
30// Optional auth credential support
31pub use crate::auth::LiveCredential;
32
33/// Parsed, size-capped HTTP response.
34#[derive(Debug, Clone)]
35#[allow(dead_code)]
36pub struct HttpResponse {
37    pub status: u16,
38    pub headers: HashMap<String, String>,
39    pub body: String,
40    pub url: String,
41}
42
43/// Runtime transport counters captured during scan execution.
44#[derive(Debug, Clone, Copy, Default, Serialize)]
45pub struct HttpRuntimeMetrics {
46    pub requests_sent: u64,
47    pub retries_performed: u64,
48}
49
50#[allow(dead_code)]
51impl HttpResponse {
52    pub fn header(&self, key: &str) -> Option<&str> {
53        self.headers.get(&key.to_lowercase()).map(|s| s.as_str())
54    }
55
56    pub fn is_success(&self) -> bool {
57        self.status < 400
58    }
59
60    pub fn is_redirect(&self) -> bool {
61        (300..400).contains(&self.status)
62    }
63}
64
65/// Maximum response body size in bytes (512 KB).
66const MAX_RESPONSE_BYTES: usize = 512 * 1024;
67
68/// Default auth-like headers to strip for unauthenticated probes.
69const DEFAULT_UNAUTH_STRIP_HEADERS: &[&str] = &[
70    "authorization",
71    "cookie",
72    "x-api-key",
73    "x-auth-token",
74    "x-access-token",
75    "x-authorization",
76    "api-key",
77    "x-session-token",
78];
79
80/// Thin wrapper around `reqwest::Client` with WAF evasion & size capping.
81#[derive(Clone)]
82pub struct HttpClient {
83    inner: Client,
84    no_redirect_inner: Client,
85    unauth_inner: Client,
86    client_config: ClientConfig,
87    unauth_client_config: ClientConfig,
88    per_host_clients: bool,
89    clients: Arc<DashMap<String, Client>>,
90    unauth_clients: Arc<DashMap<String, Client>>,
91    spec_cache: Arc<DashMap<String, String>>,
92    waf_enabled: bool,
93    delay_ms: u64,
94    retries: u32,
95    host_last_request: Arc<DashMap<String, tokio::time::Instant>>,
96    session_store: Option<Arc<Mutex<SessionStore>>>,
97    session_path: Option<PathBuf>,
98    adaptive: Option<Arc<AdaptiveLimiter>>,
99    request_count: Arc<AtomicU64>,
100    retry_count: Arc<AtomicU64>,
101    /// Optional live credential for auth flow injection.
102    live_credential: Option<Arc<LiveCredential>>,
103    /// Header names to strip for unauthenticated probes.
104    unauth_strip_headers: Vec<HeaderName>,
105}
106
107#[derive(Debug)]
108struct AdaptiveLimiter {
109    semaphore: Arc<Semaphore>,
110    max: usize,
111    min: usize,
112    held: Arc<Mutex<Vec<OwnedSemaphorePermit>>>,
113    decrease_scheduled: Arc<AtomicBool>,
114    success_streak: std::sync::atomic::AtomicUsize,
115}
116
117impl AdaptiveLimiter {
118    fn new(max: usize) -> Self {
119        Self {
120            semaphore: Arc::new(Semaphore::new(max)),
121            max,
122            min: 1,
123            held: Arc::new(Mutex::new(Vec::new())),
124            decrease_scheduled: Arc::new(AtomicBool::new(false)),
125            success_streak: std::sync::atomic::AtomicUsize::new(0),
126        }
127    }
128
129    async fn acquire(&self) -> Result<OwnedSemaphorePermit, &'static str> {
130        self.semaphore
131            .clone()
132            .acquire_owned()
133            .await
134            .map_err(|_| "semaphore closed")
135    }
136
137    async fn on_success(&self) {
138        let streak = self
139            .success_streak
140            .fetch_add(1, std::sync::atomic::Ordering::Relaxed)
141            + 1;
142        if streak >= 10 {
143            self.success_streak
144                .store(0, std::sync::atomic::Ordering::Relaxed);
145            self.increase().await;
146        }
147    }
148
149    async fn on_backoff(&self) {
150        self.success_streak
151            .store(0, std::sync::atomic::Ordering::Relaxed);
152        self.decrease().await;
153    }
154
155    async fn decrease(&self) {
156        let mut held = self.held.lock().await;
157        let current = self.max.saturating_sub(held.len());
158        if current <= self.min {
159            return;
160        }
161        if let Ok(permit) = self.semaphore.clone().try_acquire_owned() {
162            held.push(permit);
163            let new_limit = self.max.saturating_sub(held.len());
164            info!(
165                old_limit = current,
166                new_limit, "Adaptive concurrency decreased"
167            );
168            return;
169        }
170        drop(held);
171
172        // When the limiter is saturated, try_acquire fails even though we still
173        // need to reduce future concurrency. Queue a single background reduction
174        // that acquires and parks the next available permit.
175        if self.decrease_scheduled.swap(true, Ordering::AcqRel) {
176            return;
177        }
178
179        let semaphore = Arc::clone(&self.semaphore);
180        let held = Arc::clone(&self.held);
181        let decrease_scheduled = Arc::clone(&self.decrease_scheduled);
182        let max = self.max;
183        let min = self.min;
184        tokio::spawn(async move {
185            let permit = match semaphore.acquire_owned().await {
186                Ok(p) => p,
187                Err(_) => {
188                    decrease_scheduled.store(false, Ordering::Release);
189                    return;
190                }
191            };
192
193            let mut held = held.lock().await;
194            let current = max.saturating_sub(held.len());
195            if current > min {
196                held.push(permit);
197                let new_limit = max.saturating_sub(held.len());
198                info!(
199                    old_limit = current,
200                    new_limit, "Adaptive concurrency decreased"
201                );
202            } else {
203                drop(permit);
204            }
205            decrease_scheduled.store(false, Ordering::Release);
206        });
207    }
208
209    async fn increase(&self) {
210        let mut held = self.held.lock().await;
211        let current = self.max.saturating_sub(held.len());
212        if let Some(permit) = held.pop() {
213            drop(permit);
214            let new_limit = self.max.saturating_sub(held.len());
215            info!(
216                old_limit = current,
217                new_limit, "Adaptive concurrency increased"
218            );
219        }
220    }
221}
222
223#[derive(Debug, Clone)]
224struct ClientConfig {
225    timeout_secs: u64,
226    danger_accept_invalid_certs: bool,
227    default_headers: HeaderMap,
228    proxy: Option<String>,
229}
230
231#[derive(Debug, Default, Clone, Serialize, Deserialize)]
232struct SessionFile {
233    hosts: HashMap<String, HashMap<String, String>>,
234}
235
236type SessionStore = HashMap<String, HashMap<String, String>>;
237
238impl HttpClient {
239    pub fn new(config: &Config) -> ScannerResult<Self> {
240        let mut default_headers = HeaderMap::new();
241        for (k, v) in &config.default_headers {
242            if let (Ok(name), Ok(value)) = (
243                HeaderName::from_bytes(k.as_bytes()),
244                HeaderValue::from_str(v),
245            ) {
246                default_headers.insert(name, value);
247            }
248        }
249
250        if !config.cookies.is_empty() {
251            let cookie_value = config
252                .cookies
253                .iter()
254                .map(|(k, v)| format!("{k}={v}"))
255                .collect::<Vec<_>>()
256                .join("; ");
257
258            let key = HeaderName::from_static("cookie");
259            if let Some(existing) = default_headers.get(&key).cloned() {
260                let mut combined = existing.to_str().unwrap_or("").to_string();
261                if !combined.is_empty() {
262                    combined.push_str("; ");
263                }
264                combined.push_str(&cookie_value);
265                if let Ok(value) = HeaderValue::from_str(&combined) {
266                    default_headers.insert(key, value);
267                }
268            } else if let Ok(value) = HeaderValue::from_str(&cookie_value) {
269                default_headers.insert(key, value);
270            }
271        }
272
273        let client_config = ClientConfig {
274            timeout_secs: config.politeness.timeout_secs,
275            danger_accept_invalid_certs: config.danger_accept_invalid_certs,
276            default_headers,
277            proxy: config.proxy.clone(),
278        };
279
280        let inner = build_client(&client_config)?;
281        let no_redirect_inner = build_client_no_redirect(&client_config)?;
282        let unauth_client_config = ClientConfig {
283            default_headers: HeaderMap::new(),
284            ..client_config.clone()
285        };
286        let unauth_inner = build_client(&unauth_client_config)?;
287        let unauth_strip_headers = build_unauth_strip_headers(&config.unauth_strip_headers)?;
288
289        let session_store = if let Some(path) = &config.session_file {
290            if path.exists() {
291                match load_session_file(path) {
292                    Ok(store) => Some(Arc::new(Mutex::new(store))),
293                    Err(e) => {
294                        return Err(ScannerError::Config(format!(
295                            "Failed to load session file: {e}"
296                        )));
297                    }
298                }
299            } else {
300                Some(Arc::new(Mutex::new(HashMap::new())))
301            }
302        } else {
303            None
304        };
305
306        Ok(Self {
307            inner,
308            no_redirect_inner,
309            unauth_inner,
310            client_config,
311            unauth_client_config,
312            per_host_clients: config.per_host_clients,
313            clients: Arc::new(DashMap::new()),
314            unauth_clients: Arc::new(DashMap::new()),
315            spec_cache: Arc::new(DashMap::new()),
316            waf_enabled: config.waf_evasion.enabled,
317            delay_ms: config.politeness.delay_ms,
318            retries: config.politeness.retries,
319            host_last_request: Arc::new(DashMap::new()),
320            session_store,
321            session_path: config.session_file.clone(),
322            adaptive: if config.adaptive_concurrency {
323                Some(Arc::new(AdaptiveLimiter::new(config.concurrency.max(1))))
324            } else {
325                None
326            },
327            request_count: Arc::new(AtomicU64::new(0)),
328            retry_count: Arc::new(AtomicU64::new(0)),
329            live_credential: None,
330            unauth_strip_headers,
331        })
332    }
333
334    /// Attach a live credential to this client for auth flow injection.
335    pub fn with_credential(mut self, cred: Arc<LiveCredential>) -> Self {
336        self.live_credential = Some(cred);
337        self
338    }
339
340    pub fn cache_spec(&self, url: &str, body: &str) {
341        self.spec_cache.insert(url.to_string(), body.to_string());
342    }
343
344    pub fn get_cached_spec(&self, url: &str) -> Option<String> {
345        self.spec_cache.get(url).map(|v| v.value().clone())
346    }
347
348    pub fn runtime_metrics(&self) -> HttpRuntimeMetrics {
349        HttpRuntimeMetrics {
350            requests_sent: self.request_count.load(Ordering::Relaxed),
351            retries_performed: self.retry_count.load(Ordering::Relaxed),
352        }
353    }
354
355    // ------------------------------------------------------------------ //
356    //  Core request entry point
357    // ------------------------------------------------------------------ //
358
359    /// Send a request, applying WAF evasion delays + rotating headers.
360    pub async fn request(
361        &self,
362        method: Method,
363        url: &str,
364        extra_headers: Option<HeaderMap>,
365        body: Option<serde_json::Value>,
366    ) -> Result<HttpResponse, CapturedError> {
367        let _adaptive_permit = if let Some(adaptive) = &self.adaptive {
368            match adaptive.acquire().await {
369                Ok(permit) => Some(permit),
370                Err(e) => {
371                    debug!("[{method} {url}] adaptive limiter acquire failed: {e}");
372                    None
373                }
374            }
375        } else {
376            None
377        };
378
379        self.enforce_host_delay(url).await;
380
381        // Random inter-request delay based on configured delay_ms.
382        if self.waf_enabled && self.delay_ms > 0 {
383            let min_secs = self.delay_ms as f64 / 1000.0;
384            let max_secs = min_secs * 3.0; // jitter up to 3x
385            WafEvasion::random_delay(min_secs, max_secs).await;
386        }
387
388        let attempts = self.retries + 1;
389        let mut last_err: Option<CapturedError> = None;
390
391        for attempt in 0..attempts {
392            if attempt > 0 {
393                self.retry_count.fetch_add(1, Ordering::Relaxed);
394                let backoff = retry_backoff(attempt);
395                tokio::time::sleep(backoff).await;
396            }
397
398            self.request_count.fetch_add(1, Ordering::Relaxed);
399            match self
400                .send_once(
401                    method.clone(),
402                    url,
403                    extra_headers.as_ref().cloned(),
404                    body.as_ref().cloned(),
405                )
406                .await
407            {
408                Ok(resp) => {
409                    if let Some(adaptive) = &self.adaptive {
410                        if should_retry_status(resp.status) {
411                            adaptive.on_backoff().await;
412                        } else {
413                            adaptive.on_success().await;
414                        }
415                    }
416                    if should_retry_status(resp.status) && attempt + 1 < attempts {
417                        debug!(
418                            "[{method} {url}] retrying due to status {} (attempt {}/{})",
419                            resp.status,
420                            attempt + 1,
421                            attempts
422                        );
423                        continue;
424                    }
425                    return Ok(resp);
426                }
427                Err(e) => {
428                    debug!(
429                        "[{method} {url}] attempt {}/{} failed: {}",
430                        attempt + 1,
431                        attempts,
432                        e
433                    );
434                    if let Some(adaptive) = &self.adaptive {
435                        adaptive.on_backoff().await;
436                    }
437                    last_err = Some(e);
438                    if attempt + 1 == attempts {
439                        break;
440                    }
441                }
442            }
443        }
444
445        Err(last_err.unwrap_or_else(|| {
446            CapturedError::from_str(
447                "http::send",
448                Some(url.to_string()),
449                "request failed after retries",
450            )
451        }))
452    }
453
454    async fn send_once(
455        &self,
456        method: Method,
457        url: &str,
458        extra_headers: Option<HeaderMap>,
459        body: Option<serde_json::Value>,
460    ) -> Result<HttpResponse, CapturedError> {
461        let client = self
462            .client_for_url(url)
463            .map_err(|e| CapturedError::from_str("http::client", Some(url.to_string()), e))?;
464        let mut req = client.request(method.clone(), url);
465
466        // Rotate UA + evasion headers on every request.
467        if self.waf_enabled {
468            req = req.headers(WafEvasion::evasion_headers());
469        }
470
471        let mut combined_headers = HeaderMap::new();
472
473        if let Some(hdrs) = extra_headers {
474            combined_headers.extend(hdrs);
475        }
476
477        if let Some(cookie) = self.cookie_header_for(url).await {
478            let key = HeaderName::from_static("cookie");
479            let merged = if let Some(existing) = combined_headers.get(&key) {
480                let mut combined = existing.to_str().unwrap_or("").to_string();
481                if !combined.is_empty() {
482                    combined.push_str("; ");
483                }
484                combined.push_str(&cookie);
485                combined
486            } else {
487                cookie
488            };
489            if let Ok(value) = HeaderValue::from_str(&merged) {
490                combined_headers.insert(key, value);
491            }
492        }
493
494        // Apply live credential from auth flow
495        if let Some(ref cred) = self.live_credential {
496            cred.apply_to(&mut combined_headers);
497        }
498
499        if !combined_headers.is_empty() {
500            req = req.headers(combined_headers);
501        }
502
503        if let Some(json_body) = body {
504            req = req
505                .header(CONTENT_TYPE, HeaderValue::from_static("application/json"))
506                .json(&json_body);
507        }
508
509        let response = req.send().await.map_err(|e| {
510            debug!("[{method} {url}] send error: {e}");
511            CapturedError::new("http::send", Some(url.to_string()), &e)
512        })?;
513
514        self.read_response(response, url).await
515    }
516
517    async fn send_once_no_redirect(
518        &self,
519        method: Method,
520        url: &str,
521        extra_headers: Option<HeaderMap>,
522        body: Option<serde_json::Value>,
523    ) -> Result<HttpResponse, CapturedError> {
524        let client = self.no_redirect_inner.clone();
525        let mut req = client.request(method.clone(), url);
526
527        if self.waf_enabled {
528            req = req.headers(WafEvasion::evasion_headers());
529        }
530
531        let mut combined_headers = HeaderMap::new();
532        if let Some(hdrs) = extra_headers {
533            combined_headers.extend(hdrs);
534        }
535
536        if let Some(cookie) = self.cookie_header_for(url).await {
537            let key = HeaderName::from_static("cookie");
538            let merged = if let Some(existing) = combined_headers.get(&key) {
539                let mut combined = existing.to_str().unwrap_or("").to_string();
540                if !combined.is_empty() {
541                    combined.push_str("; ");
542                }
543                combined.push_str(&cookie);
544                combined
545            } else {
546                cookie
547            };
548            if let Ok(value) = HeaderValue::from_str(&merged) {
549                combined_headers.insert(key, value);
550            }
551        }
552
553        if let Some(ref cred) = self.live_credential {
554            cred.apply_to(&mut combined_headers);
555        }
556
557        if !combined_headers.is_empty() {
558            req = req.headers(combined_headers);
559        }
560
561        if let Some(json_body) = body {
562            req = req
563                .header(CONTENT_TYPE, HeaderValue::from_static("application/json"))
564                .json(&json_body);
565        }
566
567        let response = req.send().await.map_err(|e| {
568            debug!("[{method} {url}] send error (no-redirect): {e}");
569            CapturedError::new("http::send_no_redirect", Some(url.to_string()), &e)
570        })?;
571        self.read_response(response, url).await
572    }
573
574    async fn enforce_host_delay(&self, url: &str) {
575        if self.delay_ms == 0 {
576            return;
577        }
578
579        let parsed = match Url::parse(url) {
580            Ok(u) => u,
581            Err(_) => return,
582        };
583
584        let host = match parsed.host_str() {
585            Some(h) => h,
586            None => return,
587        };
588
589        let mut key = host.to_string();
590        if let Some(port) = parsed.port() {
591            key.push_str(&format!(":{port}"));
592        }
593
594        let min_gap = Duration::from_millis(self.delay_ms);
595        let now = tokio::time::Instant::now();
596
597        // Reserve the next allowed time per host to prevent concurrent TOCTOU races.
598        // Keep lock scope to a single map entry update to avoid global
599        // serialization across hosts under high concurrency.
600        let next_allowed = {
601            let mut entry = self.host_last_request.entry(key).or_insert(now);
602            let candidate = *entry + min_gap;
603            let next = if candidate > now { candidate } else { now };
604            *entry = next;
605            next
606        };
607
608        let sleep_for = if next_allowed > now {
609            next_allowed - now
610        } else {
611            Duration::from_millis(0)
612        };
613
614        if !sleep_for.is_zero() {
615            tokio::time::sleep(sleep_for).await;
616        }
617    }
618
619    // ------------------------------------------------------------------ //
620    //  Convenience wrappers
621    // ------------------------------------------------------------------ //
622
623    pub async fn get(&self, url: &str) -> Result<HttpResponse, CapturedError> {
624        self.request(Method::GET, url, None, None).await
625    }
626
627    /// GET request for burst probes: bypasses host-delay and retry orchestration
628    /// to exercise server-side rate limiting under near-simultaneous load.
629    pub async fn get_burst(&self, url: &str) -> Result<HttpResponse, CapturedError> {
630        self.request_count.fetch_add(1, Ordering::Relaxed);
631        self.send_once(Method::GET, url, None, None).await
632    }
633
634    /// GET with extra request headers specified as `[(name, value)]` pairs.
635    pub async fn get_with_headers(
636        &self,
637        url: &str,
638        extra: &[(String, String)],
639    ) -> Result<HttpResponse, CapturedError> {
640        let mut map = HeaderMap::new();
641        for (k, v) in extra {
642            if let (Ok(name), Ok(value)) = (
643                HeaderName::from_bytes(k.as_bytes()),
644                HeaderValue::from_str(v),
645            ) {
646                map.insert(name, value);
647            }
648        }
649        self.request(Method::GET, url, Some(map), None).await
650    }
651
652    /// Headered GET for burst probes: bypasses host-delay and retries.
653    pub async fn get_with_headers_burst(
654        &self,
655        url: &str,
656        extra: &[(String, String)],
657    ) -> Result<HttpResponse, CapturedError> {
658        let mut map = HeaderMap::new();
659        for (k, v) in extra {
660            if let (Ok(name), Ok(value)) = (
661                HeaderName::from_bytes(k.as_bytes()),
662                HeaderValue::from_str(v),
663            ) {
664                map.insert(name, value);
665            }
666        }
667        self.request_count.fetch_add(1, Ordering::Relaxed);
668        self.send_once(Method::GET, url, Some(map), None).await
669    }
670
671    /// GET with extra headers while forcing redirect policy to `none`.
672    /// This keeps transport accounting/evasion behavior consistent with normal
673    /// requests while allowing scanners to inspect 30x `Location` responses.
674    pub async fn get_with_headers_no_redirect(
675        &self,
676        url: &str,
677        extra: &[(String, String)],
678    ) -> Result<HttpResponse, CapturedError> {
679        let _adaptive_permit = if let Some(adaptive) = &self.adaptive {
680            match adaptive.acquire().await {
681                Ok(permit) => Some(permit),
682                Err(e) => {
683                    debug!("[GET {url}] adaptive limiter acquire failed: {e}");
684                    None
685                }
686            }
687        } else {
688            None
689        };
690
691        self.enforce_host_delay(url).await;
692        if self.waf_enabled && self.delay_ms > 0 {
693            let min_secs = self.delay_ms as f64 / 1000.0;
694            let max_secs = min_secs * 3.0;
695            WafEvasion::random_delay(min_secs, max_secs).await;
696        }
697
698        let mut map = HeaderMap::new();
699        for (k, v) in extra {
700            if let (Ok(name), Ok(value)) = (
701                HeaderName::from_bytes(k.as_bytes()),
702                HeaderValue::from_str(v),
703            ) {
704                map.insert(name, value);
705            }
706        }
707
708        let attempts = self.retries + 1;
709        let mut last_err: Option<CapturedError> = None;
710        for attempt in 0..attempts {
711            if attempt > 0 {
712                self.retry_count.fetch_add(1, Ordering::Relaxed);
713                tokio::time::sleep(retry_backoff(attempt)).await;
714            }
715
716            self.request_count.fetch_add(1, Ordering::Relaxed);
717            match self
718                .send_once_no_redirect(Method::GET, url, Some(map.clone()), None)
719                .await
720            {
721                Ok(resp) => {
722                    if let Some(adaptive) = &self.adaptive {
723                        if should_retry_status(resp.status) {
724                            adaptive.on_backoff().await;
725                        } else {
726                            adaptive.on_success().await;
727                        }
728                    }
729                    if should_retry_status(resp.status) && attempt + 1 < attempts {
730                        continue;
731                    }
732                    return Ok(resp);
733                }
734                Err(e) => {
735                    if let Some(adaptive) = &self.adaptive {
736                        adaptive.on_backoff().await;
737                    }
738                    last_err = Some(e);
739                    if attempt + 1 == attempts {
740                        break;
741                    }
742                }
743            }
744        }
745
746        Err(last_err.unwrap_or_else(|| {
747            CapturedError::from_str(
748                "http::send_no_redirect",
749                Some(url.to_string()),
750                "request failed after retries",
751            )
752        }))
753    }
754
755    #[allow(dead_code)]
756    pub async fn head(&self, url: &str) -> Result<HttpResponse, CapturedError> {
757        self.request(Method::HEAD, url, None, None).await
758    }
759
760    pub async fn options(
761        &self,
762        url: &str,
763        extra: Option<HeaderMap>,
764    ) -> Result<HttpResponse, CapturedError> {
765        self.request(Method::OPTIONS, url, extra, None).await
766    }
767
768    pub async fn post_json(
769        &self,
770        url: &str,
771        body: &serde_json::Value,
772    ) -> Result<HttpResponse, CapturedError> {
773        self.request(Method::POST, url, None, Some(body.clone()))
774            .await
775    }
776
777    /// GET request without the live credential (used for unauthenticated comparison in IDOR checks).
778    pub async fn get_without_auth(&self, url: &str) -> Result<HttpResponse, CapturedError> {
779        let _adaptive_permit = if let Some(adaptive) = &self.adaptive {
780            match adaptive.acquire().await {
781                Ok(permit) => Some(permit),
782                Err(e) => {
783                    debug!("[GET {url}] adaptive limiter acquire failed: {e}");
784                    None
785                }
786            }
787        } else {
788            None
789        };
790
791        self.enforce_host_delay(url).await;
792
793        if self.waf_enabled && self.delay_ms > 0 {
794            let min_secs = self.delay_ms as f64 / 1000.0;
795            let max_secs = min_secs * 3.0;
796            WafEvasion::random_delay(min_secs, max_secs).await;
797        }
798
799        let attempts = self.retries + 1;
800        let mut last_err: Option<CapturedError> = None;
801        for attempt in 0..attempts {
802            if attempt > 0 {
803                self.retry_count.fetch_add(1, Ordering::Relaxed);
804                tokio::time::sleep(retry_backoff(attempt)).await;
805            }
806
807            self.request_count.fetch_add(1, Ordering::Relaxed);
808            match self.send_once_without_auth(url).await {
809                Ok(resp) => {
810                    if let Some(adaptive) = &self.adaptive {
811                        if should_retry_status(resp.status) {
812                            adaptive.on_backoff().await;
813                        } else {
814                            adaptive.on_success().await;
815                        }
816                    }
817                    if should_retry_status(resp.status) && attempt + 1 < attempts {
818                        continue;
819                    }
820                    return Ok(resp);
821                }
822                Err(e) => {
823                    if let Some(adaptive) = &self.adaptive {
824                        adaptive.on_backoff().await;
825                    }
826                    last_err = Some(e);
827                    if attempt + 1 == attempts {
828                        break;
829                    }
830                }
831            }
832        }
833
834        Err(last_err.unwrap_or_else(|| {
835            CapturedError::from_str(
836                "http::get_without_auth",
837                Some(url.to_string()),
838                "request failed after retries",
839            )
840        }))
841    }
842
843    async fn send_once_without_auth(&self, url: &str) -> Result<HttpResponse, CapturedError> {
844        let client = self.client_for_url_unauth(url).map_err(|e| {
845            CapturedError::from_str("http::get_without_auth", Some(url.to_string()), e)
846        })?;
847
848        let mut req = client.request(reqwest::Method::GET, url);
849
850        if self.waf_enabled {
851            req = req.headers(WafEvasion::evasion_headers());
852        }
853
854        let mut req = req
855            .build()
856            .map_err(|e| CapturedError::new("http::get_without_auth", Some(url.to_string()), &e))?;
857
858        // Strip any default auth-like headers that could be set on the client.
859        let headers = req.headers_mut();
860        for name in &self.unauth_strip_headers {
861            headers.remove(name);
862        }
863
864        let response = client.execute(req).await.map_err(|e| {
865            debug!("[GET {url}] send error (no auth): {e}");
866            CapturedError::new("http::get_without_auth", Some(url.to_string()), &e)
867        })?;
868
869        self.read_response(response, url).await
870    }
871
872    pub async fn method_probe(
873        &self,
874        method: &str,
875        url: &str,
876    ) -> Result<HttpResponse, CapturedError> {
877        let m = Method::from_bytes(method.as_bytes()).map_err(|e| {
878            CapturedError::from_str("http::method_probe", Some(url.to_string()), e.to_string())
879        })?;
880        self.request(m, url, None, None).await
881    }
882
883    // ------------------------------------------------------------------ //
884    //  Response reading with size cap
885    // ------------------------------------------------------------------ //
886
887    async fn read_response(
888        &self,
889        response: Response,
890        url: &str,
891    ) -> Result<HttpResponse, CapturedError> {
892        let status = response.status().as_u16();
893        let final_url = response.url().to_string();
894
895        let set_cookies: Vec<String> = response
896            .headers()
897            .get_all("set-cookie")
898            .iter()
899            .filter_map(|v| v.to_str().ok().map(|s| s.to_string()))
900            .collect();
901
902        // Flatten headers into lowercase map (last value wins for duplicates).
903        let headers: HashMap<String, String> = response
904            .headers()
905            .iter()
906            .map(|(k, v)| {
907                (
908                    k.as_str().to_lowercase(),
909                    v.to_str().unwrap_or("").to_string(),
910                )
911            })
912            .collect();
913
914        if let Some(store) = &self.session_store {
915            if let Err(e) = self
916                .update_session_from_set_cookie(&set_cookies, &final_url, store)
917                .await
918            {
919                debug!("[session] update error for {final_url}: {e}");
920            }
921        }
922
923        // Read body with size cap.
924        let raw_bytes = response
925            .bytes()
926            .await
927            .map_err(|e| CapturedError::new("http::read_body", Some(url.to_string()), &e))?;
928
929        let capped: &[u8] = if raw_bytes.len() > MAX_RESPONSE_BYTES {
930            &raw_bytes[..MAX_RESPONSE_BYTES]
931        } else {
932            &raw_bytes
933        };
934
935        // Best-effort UTF-8 decode.
936        let body = String::from_utf8_lossy(capped).into_owned();
937
938        Ok(HttpResponse {
939            status,
940            headers,
941            body,
942            url: final_url,
943        })
944    }
945
946    fn client_for_url(&self, url: &str) -> Result<Client, String> {
947        if !self.per_host_clients {
948            return Ok(self.inner.clone());
949        }
950
951        let host = parse_host_or_unknown(url);
952
953        if let Some(client) = self.clients.get(&host) {
954            return Ok(client.value().clone());
955        }
956
957        let client = build_client(&self.client_config)
958            .map_err(|e| format!("per-host client build failed: {e}"))?;
959        self.clients.insert(host, client.clone());
960        Ok(client)
961    }
962
963    fn client_for_url_unauth(&self, url: &str) -> Result<Client, String> {
964        if !self.per_host_clients {
965            return Ok(self.unauth_inner.clone());
966        }
967
968        let host = parse_host_or_unknown(url);
969
970        if let Some(client) = self.unauth_clients.get(&host) {
971            return Ok(client.value().clone());
972        }
973
974        let client = build_client(&self.unauth_client_config)
975            .map_err(|e| format!("per-host unauth client build failed: {e}"))?;
976        self.unauth_clients.insert(host, client.clone());
977        Ok(client)
978    }
979
980    async fn cookie_header_for(&self, url: &str) -> Option<String> {
981        let store = self.session_store.as_ref()?;
982        let host = Url::parse(url)
983            .ok()
984            .and_then(|u| u.host_str().map(|h| h.to_string()))?;
985        let map = store.lock().await;
986        let cookies = map.get(&host)?;
987        if cookies.is_empty() {
988            return None;
989        }
990        let value = cookies
991            .iter()
992            .map(|(k, v)| format!("{k}={v}"))
993            .collect::<Vec<_>>()
994            .join("; ");
995        Some(value)
996    }
997
998    async fn update_session_from_set_cookie(
999        &self,
1000        set_cookies: &[String],
1001        url: &str,
1002        store: &Arc<Mutex<SessionStore>>,
1003    ) -> Result<(), String> {
1004        let host = Url::parse(url)
1005            .ok()
1006            .and_then(|u| u.host_str().map(|h| h.to_string()))
1007            .ok_or_else(|| "invalid response url".to_string())?;
1008
1009        if set_cookies.is_empty() {
1010            return Ok(());
1011        }
1012
1013        let mut map = store.lock().await;
1014        let entry = map.entry(host).or_insert_with(HashMap::new);
1015
1016        for raw in set_cookies {
1017            if let Some((name, value)) = parse_set_cookie_pair(raw) {
1018                entry.insert(name.to_string(), value.to_string());
1019            }
1020        }
1021        Ok(())
1022    }
1023
1024    pub async fn save_session(&self) -> ScannerResult<()> {
1025        let Some(path) = &self.session_path else {
1026            return Ok(());
1027        };
1028        let Some(store) = &self.session_store else {
1029            return Ok(());
1030        };
1031
1032        let map = store.lock().await;
1033        let doc = SessionFile { hosts: map.clone() };
1034        let json = serde_json::to_string_pretty(&doc)
1035            .map_err(|e| ScannerError::Config(format!("Session serialise failed: {e}")))?;
1036        std::fs::write(path, json)
1037            .map_err(|e| ScannerError::Config(format!("Session write failed: {e}")))?;
1038        Ok(())
1039    }
1040}
1041
1042fn parse_host_or_unknown(url: &str) -> String {
1043    match Url::parse(url) {
1044        Ok(parsed) => parsed.host_str().map(|h| h.to_string()).unwrap_or_else(|| {
1045            debug!("Failed to extract host from URL: {url}");
1046            "unknown".to_string()
1047        }),
1048        Err(e) => {
1049            debug!("Failed to parse URL {url}: {e}");
1050            "unknown".to_string()
1051        }
1052    }
1053}
1054
1055fn parse_set_cookie_pair(raw: &str) -> Option<(&str, &str)> {
1056    let first_part = raw.split(';').next()?.trim();
1057    let (name, value) = first_part.split_once('=')?;
1058    let name = name.trim();
1059    let value = value.trim();
1060    if name.is_empty() {
1061        return None;
1062    }
1063    Some((name, value))
1064}
1065
1066fn build_unauth_strip_headers(raws: &[String]) -> ScannerResult<Vec<HeaderName>> {
1067    let mut out = Vec::new();
1068    let mut seen = std::collections::HashSet::new();
1069
1070    let names = DEFAULT_UNAUTH_STRIP_HEADERS
1071        .iter()
1072        .copied()
1073        .chain(raws.iter().map(|s| s.as_str()));
1074
1075    for name in names {
1076        let trimmed = name.trim();
1077        if trimmed.is_empty() {
1078            continue;
1079        }
1080        let key = trimmed.to_ascii_lowercase();
1081        if !seen.insert(key) {
1082            continue;
1083        }
1084        let header = HeaderName::from_bytes(trimmed.as_bytes()).map_err(|e| {
1085            ScannerError::Config(format!("Invalid unauth strip header '{trimmed}': {e}"))
1086        })?;
1087        out.push(header);
1088    }
1089    Ok(out)
1090}
1091
1092fn should_retry_status(status: u16) -> bool {
1093    matches!(status, 429 | 500 | 502 | 503 | 504)
1094}
1095
1096fn retry_backoff(attempt: u32) -> Duration {
1097    let shift = attempt.min(6);
1098    let exp = 1u64 << shift;
1099    Duration::from_millis(200 * exp)
1100}
1101
1102fn build_client(cfg: &ClientConfig) -> ScannerResult<Client> {
1103    build_client_with_redirect(cfg, reqwest::redirect::Policy::limited(5))
1104}
1105
1106fn build_client_no_redirect(cfg: &ClientConfig) -> ScannerResult<Client> {
1107    build_client_with_redirect(cfg, reqwest::redirect::Policy::none())
1108}
1109
1110fn build_client_with_redirect(
1111    cfg: &ClientConfig,
1112    redirect: reqwest::redirect::Policy,
1113) -> ScannerResult<Client> {
1114    let mut builder = Client::builder()
1115        .timeout(Duration::from_secs(cfg.timeout_secs))
1116        .danger_accept_invalid_certs(cfg.danger_accept_invalid_certs)
1117        .gzip(true)
1118        .deflate(true)
1119        .redirect(redirect)
1120        .tcp_keepalive(Duration::from_secs(30));
1121
1122    if !cfg.default_headers.is_empty() {
1123        builder = builder.default_headers(cfg.default_headers.clone());
1124    }
1125
1126    if let Some(proxy_url) = &cfg.proxy {
1127        let proxy = reqwest::Proxy::all(proxy_url)
1128            .map_err(|e| ScannerError::Config(format!("Invalid proxy: {e}")))?;
1129        builder = builder.proxy(proxy);
1130    }
1131
1132    builder
1133        .build()
1134        .map_err(|e| ScannerError::Config(format!("Client build failed: {e}")))
1135}
1136
1137fn load_session_file(path: &PathBuf) -> Result<SessionStore, ScannerError> {
1138    let content = std::fs::read_to_string(path)
1139        .map_err(|e| ScannerError::Config(format!("Session read failed: {e}")))?;
1140    let doc: SessionFile = serde_json::from_str(&content)
1141        .map_err(|e| ScannerError::Config(format!("Session parse failed: {e}")))?;
1142    Ok(doc.hosts)
1143}