Skip to main content

hpx_browser/net/
mod.rs

1//! Stealth HTTP client with cookie management, Accept-CH tracking, and
2//! redirect following.
3//!
4//! Wraps `hpx::Client` as the underlying HTTP/1.1 + HTTP/2 transport with
5//! BoringSSL TLS and browser-profile emulation. Higher-level browser
6//! session concerns (cookies, Client Hints, H1-only host memory) live here.
7
8pub mod blocklist;
9pub mod cookies;
10pub mod csp;
11pub mod headers;
12pub mod robots;
13pub mod ssrf;
14
15use std::{collections::HashMap, sync::Arc};
16
17pub use cookies::CookieJar;
18use tokio::sync::Mutex;
19use url::Url;
20
21// ---------------------------------------------------------------------------
22// Error
23// ---------------------------------------------------------------------------
24
25#[derive(Debug, thiserror::Error)]
26pub enum NetError {
27    #[error("HTTP error: {0}")]
28    Http(String),
29
30    #[error("URL parse error: {0}")]
31    Url(#[from] url::ParseError),
32
33    #[error("Request failed: {0}")]
34    Request(String),
35
36    #[error("hpx client error: {0}")]
37    Client(#[from] hpx::Error),
38}
39
40// ---------------------------------------------------------------------------
41// TimingStats
42// ---------------------------------------------------------------------------
43
44#[derive(Debug, Clone, Default)]
45pub struct TimingStats {
46    pub dns_start_ms: f64,
47    pub dns_end_ms: f64,
48    pub connect_start_ms: f64,
49    pub connect_end_ms: f64,
50    pub tls_start_ms: f64,
51    pub tls_end_ms: f64,
52    pub request_start_ms: f64,
53    pub response_start_ms: f64,
54    pub response_end_ms: f64,
55}
56
57// ---------------------------------------------------------------------------
58// Response
59// ---------------------------------------------------------------------------
60
61#[derive(Debug, Clone, Default)]
62pub struct Response {
63    pub status: u16,
64    pub status_text: String,
65    pub headers: HashMap<String, String>,
66    /// All Set-Cookie header values, preserved separately because HTTP
67    /// responses can contain multiple Set-Cookie headers.
68    pub set_cookies: Vec<String>,
69    pub body: Vec<u8>,
70    pub url: String,
71    /// Whether this response taught the client Accept-CH for the first time.
72    pub accept_ch_upgrade: bool,
73    pub timings: TimingStats,
74}
75
76impl Response {
77    pub fn text(&self) -> String {
78        String::from_utf8_lossy(&self.body).to_string()
79    }
80
81    pub fn ok(&self) -> bool {
82        (200..300).contains(&self.status)
83    }
84}
85
86// ---------------------------------------------------------------------------
87// SharedSession — process-wide cookie jar + Accept-CH origins
88// ---------------------------------------------------------------------------
89
90#[derive(Clone)]
91pub struct SharedSession {
92    pub cookies: Arc<Mutex<CookieJar>>,
93    pub accept_ch: scc::HashSet<String>,
94    pub h1_only_hosts: scc::HashSet<String>,
95}
96
97static SHARED_SESSION: std::sync::OnceLock<SharedSession> = std::sync::OnceLock::new();
98
99/// Get the process-wide shared session.
100pub fn shared_session() -> SharedSession {
101    SHARED_SESSION
102        .get_or_init(|| SharedSession {
103            cookies: Arc::new(Mutex::new(CookieJar::new())),
104            accept_ch: scc::HashSet::new(),
105            h1_only_hosts: scc::HashSet::new(),
106        })
107        .clone()
108}
109
110// ---------------------------------------------------------------------------
111// HttpClient
112// ---------------------------------------------------------------------------
113
114#[derive(Clone)]
115pub struct HttpClient {
116    inner: hpx::Client,
117    cookies: Arc<Mutex<CookieJar>>,
118    accept_ch_origins: scc::HashSet<String>,
119    h1_only_hosts: scc::HashSet<String>,
120    browser_profile: hpx::BrowserProfile,
121}
122
123impl HttpClient {
124    /// Create a new client with the given browser profile.
125    pub fn new(browser_profile: hpx::BrowserProfile) -> Result<Self, NetError> {
126        let inner = hpx::Client::builder()
127            .build()
128            .map_err(|e| NetError::Http(format!("failed to build hpx client: {e}")))?;
129
130        Ok(Self {
131            inner,
132            cookies: Arc::new(Mutex::new(CookieJar::new())),
133            accept_ch_origins: scc::HashSet::new(),
134            h1_only_hosts: scc::HashSet::new(),
135            browser_profile,
136        })
137    }
138
139    /// Build a client that participates in the process-wide shared session.
140    pub fn shared(browser_profile: hpx::BrowserProfile) -> Result<Self, NetError> {
141        let s = shared_session();
142        let inner = hpx::Client::builder()
143            .build()
144            .map_err(|e| NetError::Http(format!("failed to build hpx client: {e}")))?;
145
146        Ok(Self {
147            inner,
148            cookies: s.cookies,
149            accept_ch_origins: s.accept_ch,
150            h1_only_hosts: s.h1_only_hosts,
151            browser_profile,
152        })
153    }
154
155    pub fn cookies(&self) -> Arc<Mutex<CookieJar>> {
156        self.cookies.clone()
157    }
158
159    pub fn browser_profile(&self) -> &hpx::BrowserProfile {
160        &self.browser_profile
161    }
162
163    /// Whether `host` has previously sent `Accept-CH`.
164    pub fn has_accept_ch(&self, host: &str) -> bool {
165        self.accept_ch_origins.contains_sync(host)
166    }
167
168    /// Learn Accept-CH from response headers. Returns `true` if this is a
169    /// new origin that just opted in.
170    fn learn_accept_ch(&self, host: &str, headers: &HashMap<String, String>) -> bool {
171        let has_ch = headers.keys().any(|k| {
172            let k = k.to_ascii_lowercase();
173            k == "accept-ch" || k == "critical-ch"
174        });
175        if has_ch {
176            return self.accept_ch_origins.insert_sync(host.to_string()).is_ok();
177        }
178        false
179    }
180
181    /// Snapshot all cookies for a URL.
182    pub async fn cookies_for_url(&self, url: &Url) -> Option<String> {
183        let jar = self.cookies.lock().await;
184        jar.cookies_for(url)
185    }
186
187    /// Inject cookies from external sources (e.g., JS `document.cookie`).
188    pub async fn inject_cookies(&self, url: &Url, cookies: &[String]) {
189        let mut jar = self.cookies.lock().await;
190        jar.set_cookies(url, cookies);
191    }
192
193    /// Set a single cookie from a raw Set-Cookie-style string.
194    pub async fn set_cookie_str(&self, url: &Url, raw: &str) {
195        let mut jar = self.cookies.lock().await;
196        jar.set_cookies(url, &[raw.to_string()]);
197    }
198
199    /// Drop all cookies matching `target_domain`.
200    pub async fn clear_cookies_for_domain(&self, target_domain: &str) {
201        let mut jar = self.cookies.lock().await;
202        jar.clear_for_domain(target_domain);
203    }
204
205    // ----- Request methods -----
206
207    /// Perform a GET request.
208    pub async fn get(&self, url: &str) -> Result<Response, NetError> {
209        self.get_with_headers(url, &[]).await
210    }
211
212    /// GET with extra headers.
213    pub async fn get_with_headers(
214        &self,
215        url: &str,
216        extra_headers: &[(String, String)],
217    ) -> Result<Response, NetError> {
218        let parsed = Url::parse(url)?;
219        let builder = self.inner.get(url).emulation(self.browser_profile);
220
221        let builder = self
222            .inject_request_headers(builder, &parsed, extra_headers)
223            .await;
224        let hpx_resp = builder.send().await?;
225        self.process_response(hpx_resp, url, &parsed).await
226    }
227
228    /// Fetch-API-style GET with `accept: */*` semantics.
229    pub async fn fetch_get(
230        &self,
231        url: &str,
232        extra_headers: &[(String, String)],
233        _origin: Option<&str>,
234    ) -> Result<Response, NetError> {
235        let parsed = Url::parse(url)?;
236        let mut builder = self.inner.get(url).emulation(self.browser_profile);
237
238        // Fetch-style: accept: */*, sec-fetch-dest: empty
239        builder = builder.header("accept", "*/*");
240        builder = builder.header("sec-fetch-mode", "cors");
241        builder = builder.header("sec-fetch-dest", "empty");
242        builder = builder.header("sec-fetch-site", "same-origin");
243
244        builder = self
245            .inject_request_headers(builder, &parsed, extra_headers)
246            .await;
247        let hpx_resp = builder.send().await?;
248        self.process_response(hpx_resp, url, &parsed).await
249    }
250
251    /// Fetch-API-style POST with raw bytes.
252    pub async fn fetch_post_bytes(
253        &self,
254        url: &str,
255        body: &[u8],
256        extra_headers: &[(String, String)],
257        _origin: Option<&str>,
258    ) -> Result<Response, NetError> {
259        let parsed = Url::parse(url)?;
260        let mut builder = self.inner.post(url).emulation(self.browser_profile);
261
262        builder = builder.header("accept", "*/*");
263        builder = builder.header("sec-fetch-mode", "cors");
264        builder = builder.header("sec-fetch-dest", "empty");
265        builder = builder.header("sec-fetch-site", "same-origin");
266
267        builder = self
268            .inject_request_headers(builder, &parsed, extra_headers)
269            .await;
270        let hpx_resp = builder.body(body.to_vec()).send().await?;
271        self.process_response(hpx_resp, url, &parsed).await
272    }
273
274    /// Perform a POST request with a string body.
275    pub async fn post(&self, url: &str, body: &str) -> Result<Response, NetError> {
276        self.post_with_headers(url, body, &[]).await
277    }
278
279    /// POST with extra headers.
280    pub async fn post_with_headers(
281        &self,
282        url: &str,
283        body: &str,
284        extra_headers: &[(String, String)],
285    ) -> Result<Response, NetError> {
286        self.post_bytes_with_headers(url, body.as_bytes(), extra_headers)
287            .await
288    }
289
290    /// POST with raw bytes and extra headers.
291    pub async fn post_bytes_with_headers(
292        &self,
293        url: &str,
294        body: &[u8],
295        extra_headers: &[(String, String)],
296    ) -> Result<Response, NetError> {
297        let parsed = Url::parse(url)?;
298        let builder = self.inner.post(url).emulation(self.browser_profile);
299
300        let builder = self
301            .inject_request_headers(builder, &parsed, extra_headers)
302            .await;
303        let hpx_resp = builder.body(body.to_vec()).send().await?;
304        self.process_response(hpx_resp, url, &parsed).await
305    }
306
307    /// GET with explicit redirect following.
308    pub async fn get_follow(&self, url: &str, max_redirects: u8) -> Result<Response, NetError> {
309        let mut current_url = url.to_string();
310        for _ in 0..max_redirects {
311            let resp = self.get(&current_url).await?;
312            if matches!(resp.status, 301 | 302 | 303 | 307 | 308) {
313                if let Some(loc) = resp.headers.get("location") {
314                    current_url = resolve_redirect(&current_url, loc)?;
315                    continue;
316                }
317            }
318            return Ok(resp);
319        }
320        self.get(&current_url).await
321    }
322
323    /// GET with extra headers and redirect following.
324    pub async fn get_follow_with_headers(
325        &self,
326        url: &str,
327        extra_headers: &[(String, String)],
328        max_redirects: u8,
329    ) -> Result<Response, NetError> {
330        let mut current_url = url.to_string();
331        for _ in 0..max_redirects {
332            let resp = self.get_with_headers(&current_url, extra_headers).await?;
333            if matches!(resp.status, 301 | 302 | 303 | 307 | 308) {
334                if let Some(loc) = resp.headers.get("location") {
335                    current_url = resolve_redirect(&current_url, loc)?;
336                    continue;
337                }
338            }
339            return Ok(resp);
340        }
341        self.get_with_headers(&current_url, extra_headers).await
342    }
343
344    /// POST with redirect following. 307/308 preserve the body.
345    pub async fn post_follow(
346        &self,
347        url: &str,
348        body: &str,
349        max_redirects: u8,
350    ) -> Result<Response, NetError> {
351        self.post_bytes_follow(url, body.as_bytes(), &[], max_redirects)
352            .await
353    }
354
355    /// POST with raw bytes and redirect following.
356    pub async fn post_bytes_follow(
357        &self,
358        url: &str,
359        body: &[u8],
360        extra_headers: &[(String, String)],
361        max_redirects: u8,
362    ) -> Result<Response, NetError> {
363        let mut current_url = url.to_string();
364        for _ in 0..max_redirects {
365            let resp = self
366                .post_bytes_with_headers(&current_url, body, extra_headers)
367                .await?;
368
369            if matches!(resp.status, 301 | 302 | 303 | 307 | 308) {
370                if let Some(loc) = resp.headers.get("location") {
371                    let next_url = resolve_redirect(&current_url, loc)?;
372                    if matches!(resp.status, 307 | 308) {
373                        current_url = next_url;
374                        continue;
375                    }
376                    // 301/302/303 on POST → switch to GET
377                    return self
378                        .get_follow(&next_url, max_redirects.saturating_sub(1))
379                        .await;
380                }
381            }
382            return Ok(resp);
383        }
384        self.post_bytes_with_headers(&current_url, body, extra_headers)
385            .await
386    }
387
388    /// Pre-establish a connection to a host. hpx handles connection pooling
389    /// internally, so this is a lightweight GET that warms the pool.
390    pub async fn preconnect(&self, url: &str) -> Result<(), NetError> {
391        // ponytail: hpx manages its own pool; a HEAD is the cheapest way to
392        // establish a connection. If hpx ever exposes a dedicated preconnect,
393        // switch to that.
394        let _ = self
395            .inner
396            .head(url)
397            .emulation(self.browser_profile)
398            .send()
399            .await;
400        Ok(())
401    }
402
403    // ----- Internal helpers -----
404
405    /// Inject cookies and extra headers into a request builder.
406    async fn inject_request_headers(
407        &self,
408        mut builder: hpx::RequestBuilder,
409        parsed: &Url,
410        extra_headers: &[(String, String)],
411    ) -> hpx::RequestBuilder {
412        let cookie_str = {
413            let jar = self.cookies.lock().await;
414            jar.cookies_for(parsed)
415        };
416
417        if let Some(cs) = cookie_str {
418            builder = builder.header("cookie", cs);
419        }
420
421        for (k, v) in extra_headers {
422            if k.eq_ignore_ascii_case("host") || k.eq_ignore_ascii_case("connection") {
423                continue;
424            }
425            builder = builder.header(k.as_str(), v.as_str());
426        }
427
428        builder
429    }
430
431    /// Convert an hpx Response into our Response type.
432    async fn process_response(
433        &self,
434        hpx_resp: hpx::Response,
435        url: &str,
436        parsed: &Url,
437    ) -> Result<Response, NetError> {
438        let status = hpx_resp.status().as_u16();
439        let status_text = hpx_resp
440            .status()
441            .canonical_reason()
442            .unwrap_or("")
443            .to_string();
444
445        let mut headers = HashMap::new();
446        let mut set_cookies = Vec::new();
447
448        for (key, value) in hpx_resp.headers() {
449            if let Ok(v) = value.to_str() {
450                if key.as_str().eq_ignore_ascii_case("set-cookie") {
451                    set_cookies.push(v.to_string());
452                } else {
453                    headers.insert(key.to_string(), v.to_string());
454                }
455            }
456        }
457
458        let body = hpx_resp
459            .bytes()
460            .await
461            .map_err(|e| NetError::Http(format!("failed to read body: {e}")))?;
462
463        // Learn Accept-CH
464        let host = parsed.host_str().unwrap_or("");
465        let upgrade = self.learn_accept_ch(host, &headers);
466
467        // Store Set-Cookie
468        if !set_cookies.is_empty() {
469            let mut jar = self.cookies.lock().await;
470            jar.set_cookies(parsed, &set_cookies);
471        }
472
473        Ok(Response {
474            status,
475            status_text,
476            headers,
477            set_cookies,
478            body: body.to_vec(),
479            url: url.to_string(),
480            accept_ch_upgrade: upgrade,
481            timings: TimingStats::default(),
482        })
483    }
484}
485
486// ---------------------------------------------------------------------------
487// Helpers
488// ---------------------------------------------------------------------------
489
490/// Resolve a redirect Location header to an absolute URL.
491fn resolve_redirect(current_url: &str, location: &str) -> Result<String, NetError> {
492    let base = Url::parse(current_url).map_err(|e| NetError::Request(e.to_string()))?;
493    let resolved = base.join(location).map_err(|e| {
494        NetError::Request(format!(
495            "redirect resolve: {e} (base={current_url}, loc={location})"
496        ))
497    })?;
498    Ok(resolved.to_string())
499}
500
501// ---------------------------------------------------------------------------
502// Tests
503// ---------------------------------------------------------------------------
504
505#[cfg(test)]
506mod tests {
507    use super::*;
508
509    #[test]
510    fn client_creates_successfully() {
511        let client = HttpClient::new(hpx::BrowserProfile::Chrome);
512        assert!(client.is_ok());
513    }
514
515    #[test]
516    fn shared_client_creates_successfully() {
517        let client = HttpClient::shared(hpx::BrowserProfile::Chrome);
518        assert!(client.is_ok());
519    }
520
521    #[test]
522    fn redirect_resolve_handles_rfc3986_cases() {
523        // Absolute
524        assert_eq!(
525            resolve_redirect("https://a.com/x", "https://b.com/y").unwrap(),
526            "https://b.com/y"
527        );
528        // Root-relative
529        assert_eq!(
530            resolve_redirect("https://a.com/x/y", "/z").unwrap(),
531            "https://a.com/z"
532        );
533        // Relative
534        assert_eq!(
535            resolve_redirect("https://a.com/x/y", "z.html").unwrap(),
536            "https://a.com/x/z.html"
537        );
538        // Dot segments
539        assert_eq!(
540            resolve_redirect("https://a.com/x/y/", "../z.html").unwrap(),
541            "https://a.com/x/z.html"
542        );
543        // Scheme-relative
544        assert_eq!(
545            resolve_redirect("https://a.com/x", "//b.com/y").unwrap(),
546            "https://b.com/y"
547        );
548        // Query-only
549        assert_eq!(
550            resolve_redirect("https://a.com/x?old=1", "?new=2").unwrap(),
551            "https://a.com/x?new=2"
552        );
553    }
554
555    #[test]
556    fn response_text_and_ok() {
557        let resp = Response {
558            status: 200,
559            status_text: "OK".into(),
560            headers: HashMap::new(),
561            set_cookies: Vec::new(),
562            body: b"Hello world".to_vec(),
563            url: "https://example.com".into(),
564            accept_ch_upgrade: false,
565            timings: TimingStats::default(),
566        };
567        assert_eq!(resp.text(), "Hello world");
568        assert!(resp.ok());
569    }
570
571    #[test]
572    fn response_not_ok() {
573        let resp = Response {
574            status: 404,
575            status_text: "Not Found".into(),
576            headers: HashMap::new(),
577            set_cookies: Vec::new(),
578            body: vec![],
579            url: "https://example.com/missing".into(),
580            accept_ch_upgrade: false,
581            timings: TimingStats::default(),
582        };
583        assert!(!resp.ok());
584    }
585
586    #[test]
587    fn cookie_jar_set_and_get() {
588        let mut jar = CookieJar::new();
589        let url = Url::parse("https://example.com/path").unwrap();
590        jar.set_cookies(&url, &["session=abc123; Path=/; Secure".to_string()]);
591        assert_eq!(jar.cookie_count(), 1);
592        let cookies = jar.cookies_for(&url);
593        assert_eq!(cookies, Some("session=abc123".to_string()));
594    }
595
596    #[test]
597    fn cookie_jar_domain_scope() {
598        let mut jar = CookieJar::new();
599        let url = Url::parse("https://sub.example.com").unwrap();
600        jar.set_cookies(&url, &["token=xyz; Domain=example.com".to_string()]);
601        // Parent domain cookie visible on subdomain
602        assert_eq!(jar.cookie_count(), 1);
603        let cookies = jar.cookies_for(&url);
604        assert!(cookies.is_some());
605        assert!(cookies.unwrap().contains("token=xyz"));
606    }
607
608    #[test]
609    fn cookie_jar_cross_domain_reject() {
610        let mut jar = CookieJar::new();
611        let url = Url::parse("https://example.com").unwrap();
612        jar.set_cookies(&url, &["evil=hack; Domain=evil.com".to_string()]);
613        assert_eq!(jar.cookie_count(), 0);
614    }
615
616    #[test]
617    fn cookie_jar_clear_for_domain() {
618        let mut jar = CookieJar::new();
619        let url = Url::parse("https://example.com").unwrap();
620        jar.set_cookies(&url, &["a=1".to_string(), "b=2".to_string()]);
621        assert_eq!(jar.cookie_count(), 2);
622        jar.clear_for_domain("example.com");
623        assert_eq!(jar.cookie_count(), 0);
624    }
625
626    #[test]
627    fn accept_ch_starts_false_then_true() {
628        let client = HttpClient::new(hpx::BrowserProfile::Chrome).unwrap();
629        assert!(!client.has_accept_ch("example.com"));
630
631        let mut headers = HashMap::new();
632        headers.insert(
633            "accept-ch".to_string(),
634            "Sec-CH-UA-Full-Version-List".to_string(),
635        );
636        client.learn_accept_ch("example.com", &headers);
637
638        assert!(client.has_accept_ch("example.com"));
639        assert!(!client.has_accept_ch("other.com"));
640    }
641
642    #[test]
643    fn accept_ch_case_insensitive() {
644        let client = HttpClient::new(hpx::BrowserProfile::Chrome).unwrap();
645        let mut headers = HashMap::new();
646        headers.insert("Accept-CH".to_string(), "Sec-CH-UA-Arch".to_string());
647        client.learn_accept_ch("site.example", &headers);
648        assert!(client.has_accept_ch("site.example"));
649    }
650
651    #[test]
652    fn response_without_accept_ch_does_not_upgrade() {
653        let client = HttpClient::new(hpx::BrowserProfile::Chrome).unwrap();
654        let mut headers = HashMap::new();
655        headers.insert("content-type".to_string(), "text/html".to_string());
656        client.learn_accept_ch("boring.example", &headers);
657        assert!(!client.has_accept_ch("boring.example"));
658    }
659
660    #[tokio::test]
661    #[ignore] // requires network
662    async fn get_request() {
663        let client = HttpClient::new(hpx::BrowserProfile::Chrome).unwrap();
664        let resp = client.get("https://httpbin.org/get").await.unwrap();
665        assert_eq!(resp.status, 200);
666        assert!(resp.text().contains("httpbin"));
667    }
668
669    #[tokio::test]
670    #[ignore] // requires network
671    async fn post_request() {
672        let client = HttpClient::new(hpx::BrowserProfile::Chrome).unwrap();
673        let resp = client
674            .post("https://httpbin.org/post", "hello")
675            .await
676            .unwrap();
677        assert_eq!(resp.status, 200);
678        assert!(resp.text().contains("hello"));
679    }
680
681    #[tokio::test]
682    #[ignore] // requires network
683    async fn get_follow_redirects() {
684        let client = HttpClient::new(hpx::BrowserProfile::Chrome).unwrap();
685        let resp = client
686            .get_follow("https://httpbin.org/redirect/2", 5)
687            .await
688            .unwrap();
689        assert_eq!(resp.status, 200);
690    }
691}