1pub mod blocklist;
9pub mod cookies;
10pub mod csp;
11pub mod headers;
12pub mod robots;
13pub mod ssrf;
14
15use std::{collections::HashMap, sync::Arc};
16
17pub use cookies::CookieJar;
18use tokio::sync::Mutex;
19use url::Url;
20
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
30pub enum RedirectPolicy {
31 Follow(u8),
32 Manual,
33}
34
35impl RedirectPolicy {
36 #[inline]
37 pub const fn max_redirects(self) -> u8 {
38 match self {
39 Self::Follow(n) => n,
40 Self::Manual => 0,
41 }
42 }
43}
44
45#[derive(Debug, thiserror::Error)]
50pub enum NetError {
51 #[error("HTTP error: {0}")]
52 Http(String),
53
54 #[error("URL parse error: {0}")]
55 Url(#[from] url::ParseError),
56
57 #[error("Request failed: {0}")]
58 Request(String),
59
60 #[error("hpx client error: {0}")]
61 Client(#[from] hpx::Error),
62}
63
64#[derive(Debug, Clone, Default)]
69pub struct TimingStats {
70 pub dns_start_ms: f64,
71 pub dns_end_ms: f64,
72 pub connect_start_ms: f64,
73 pub connect_end_ms: f64,
74 pub tls_start_ms: f64,
75 pub tls_end_ms: f64,
76 pub request_start_ms: f64,
77 pub response_start_ms: f64,
78 pub response_end_ms: f64,
79}
80
81#[derive(Debug, Clone, Default)]
86pub struct Response {
87 pub status: u16,
88 pub status_text: String,
89 pub headers: HashMap<String, String>,
90 pub set_cookies: Vec<String>,
93 pub body: Vec<u8>,
94 pub url: String,
95 pub accept_ch_upgrade: bool,
97 pub timings: TimingStats,
98}
99
100impl Response {
101 pub fn text(&self) -> String {
102 String::from_utf8_lossy(&self.body).to_string()
103 }
104
105 pub fn ok(&self) -> bool {
106 (200..300).contains(&self.status)
107 }
108}
109
110#[derive(Clone)]
115pub struct SharedSession {
116 pub cookies: Arc<Mutex<CookieJar>>,
117 pub accept_ch: scc::HashSet<String>,
118 pub h1_only_hosts: scc::HashSet<String>,
119}
120
121impl SharedSession {
122 pub fn new() -> Self {
123 Self {
124 cookies: Arc::new(Mutex::new(CookieJar::new())),
125 accept_ch: scc::HashSet::new(),
126 h1_only_hosts: scc::HashSet::new(),
127 }
128 }
129}
130
131impl Default for SharedSession {
132 fn default() -> Self {
133 Self::new()
134 }
135}
136
137#[derive(Clone)]
142pub struct HttpClient {
143 inner: hpx::Client,
144 cookies: Arc<Mutex<CookieJar>>,
145 accept_ch_origins: scc::HashSet<String>,
146 h1_only_hosts: scc::HashSet<String>,
147 browser_profile: hpx::BrowserProfile,
148}
149
150impl HttpClient {
151 pub fn new(browser_profile: hpx::BrowserProfile) -> Result<Self, NetError> {
153 let session = SharedSession::new();
154 Self::with_session(Arc::new(session), browser_profile)
155 }
156
157 pub fn with_session(
159 session: Arc<SharedSession>,
160 browser_profile: hpx::BrowserProfile,
161 ) -> Result<Self, NetError> {
162 let inner = hpx::Client::builder()
163 .build()
164 .map_err(|e| NetError::Http(format!("failed to build hpx client: {e}")))?;
165
166 Ok(Self {
167 inner,
168 cookies: session.cookies.clone(),
169 accept_ch_origins: session.accept_ch.clone(),
170 h1_only_hosts: session.h1_only_hosts.clone(),
171 browser_profile,
172 })
173 }
174
175 pub fn cookies(&self) -> Arc<Mutex<CookieJar>> {
176 self.cookies.clone()
177 }
178
179 pub fn browser_profile(&self) -> &hpx::BrowserProfile {
180 &self.browser_profile
181 }
182
183 pub fn has_accept_ch(&self, host: &str) -> bool {
185 self.accept_ch_origins.contains_sync(host)
186 }
187
188 fn learn_accept_ch(&self, host: &str, headers: &HashMap<String, String>) -> bool {
191 let has_ch = headers.keys().any(|k| {
192 let k = k.to_ascii_lowercase();
193 k == "accept-ch" || k == "critical-ch"
194 });
195 if has_ch {
196 return self.accept_ch_origins.insert_sync(host.to_string()).is_ok();
197 }
198 false
199 }
200
201 pub async fn cookies_for_url(&self, url: &Url) -> Option<String> {
203 let jar = self.cookies.lock().await;
204 jar.cookies_for(url)
205 }
206
207 pub async fn inject_cookies(&self, url: &Url, cookies: &[String]) {
209 let mut jar = self.cookies.lock().await;
210 jar.set_cookies(url, cookies);
211 }
212
213 pub async fn set_cookie_str(&self, url: &Url, raw: &str) {
215 let mut jar = self.cookies.lock().await;
216 jar.set_cookies(url, &[raw.to_string()]);
217 }
218
219 pub async fn clear_cookies_for_domain(&self, target_domain: &str) {
221 let mut jar = self.cookies.lock().await;
222 jar.clear_for_domain(target_domain);
223 }
224
225 #[deprecated(note = "Use HttpClient::request() instead")]
229 pub async fn get(&self, url: &str) -> Result<Response, NetError> {
230 self.request("GET", url, None, &[], RedirectPolicy::Manual)
231 .await
232 }
233
234 #[deprecated(note = "Use HttpClient::request() instead")]
236 pub async fn get_with_headers(
237 &self,
238 url: &str,
239 extra_headers: &[(String, String)],
240 ) -> Result<Response, NetError> {
241 self.request("GET", url, None, extra_headers, RedirectPolicy::Manual)
242 .await
243 }
244
245 #[deprecated(note = "Use HttpClient::request() with explicit headers instead")]
247 pub async fn fetch_get(
248 &self,
249 url: &str,
250 extra_headers: &[(String, String)],
251 _origin: Option<&str>,
252 ) -> Result<Response, NetError> {
253 let mut headers = extra_headers.to_vec();
254 headers.push(("accept".to_string(), "*/*".to_string()));
255 headers.push(("sec-fetch-mode".to_string(), "cors".to_string()));
256 headers.push(("sec-fetch-dest".to_string(), "empty".to_string()));
257 headers.push(("sec-fetch-site".to_string(), "same-origin".to_string()));
258
259 self.request("GET", url, None, &headers, RedirectPolicy::Manual)
260 .await
261 }
262
263 #[deprecated(note = "Use HttpClient::request() with explicit headers instead")]
265 pub async fn fetch_post_bytes(
266 &self,
267 url: &str,
268 body: &[u8],
269 extra_headers: &[(String, String)],
270 _origin: Option<&str>,
271 ) -> Result<Response, NetError> {
272 let mut headers = extra_headers.to_vec();
273 headers.push(("accept".to_string(), "*/*".to_string()));
274 headers.push(("sec-fetch-mode".to_string(), "cors".to_string()));
275 headers.push(("sec-fetch-dest".to_string(), "empty".to_string()));
276 headers.push(("sec-fetch-site".to_string(), "same-origin".to_string()));
277
278 self.request("POST", url, Some(body), &headers, RedirectPolicy::Manual)
279 .await
280 }
281
282 #[deprecated(note = "Use HttpClient::request() instead")]
284 pub async fn post(&self, url: &str, body: &str) -> Result<Response, NetError> {
285 self.request(
286 "POST",
287 url,
288 Some(body.as_bytes()),
289 &[],
290 RedirectPolicy::Manual,
291 )
292 .await
293 }
294
295 #[deprecated(note = "Use HttpClient::request() instead")]
297 pub async fn post_with_headers(
298 &self,
299 url: &str,
300 body: &str,
301 extra_headers: &[(String, String)],
302 ) -> Result<Response, NetError> {
303 self.request(
304 "POST",
305 url,
306 Some(body.as_bytes()),
307 extra_headers,
308 RedirectPolicy::Manual,
309 )
310 .await
311 }
312
313 #[deprecated(note = "Use HttpClient::request() instead")]
315 pub async fn post_bytes_with_headers(
316 &self,
317 url: &str,
318 body: &[u8],
319 extra_headers: &[(String, String)],
320 ) -> Result<Response, NetError> {
321 self.request(
322 "POST",
323 url,
324 Some(body),
325 extra_headers,
326 RedirectPolicy::Manual,
327 )
328 .await
329 }
330
331 #[deprecated(note = "Use HttpClient::request() with RedirectPolicy::Follow(n) instead")]
333 pub async fn get_follow(&self, url: &str, max_redirects: u8) -> Result<Response, NetError> {
334 self.request("GET", url, None, &[], RedirectPolicy::Follow(max_redirects))
335 .await
336 }
337
338 #[deprecated(note = "Use HttpClient::request() with RedirectPolicy::Follow(n) instead")]
340 pub async fn get_follow_with_headers(
341 &self,
342 url: &str,
343 extra_headers: &[(String, String)],
344 max_redirects: u8,
345 ) -> Result<Response, NetError> {
346 self.request(
347 "GET",
348 url,
349 None,
350 extra_headers,
351 RedirectPolicy::Follow(max_redirects),
352 )
353 .await
354 }
355
356 #[deprecated(note = "Use HttpClient::request() with RedirectPolicy::Follow(n) instead")]
358 pub async fn post_follow(
359 &self,
360 url: &str,
361 body: &str,
362 max_redirects: u8,
363 ) -> Result<Response, NetError> {
364 self.request(
365 "POST",
366 url,
367 Some(body.as_bytes()),
368 &[],
369 RedirectPolicy::Follow(max_redirects),
370 )
371 .await
372 }
373
374 #[deprecated(note = "Use HttpClient::request() with RedirectPolicy::Follow(n) instead")]
376 pub async fn post_bytes_follow(
377 &self,
378 url: &str,
379 body: &[u8],
380 extra_headers: &[(String, String)],
381 max_redirects: u8,
382 ) -> Result<Response, NetError> {
383 self.request(
384 "POST",
385 url,
386 Some(body),
387 extra_headers,
388 RedirectPolicy::Follow(max_redirects),
389 )
390 .await
391 }
392
393 pub async fn preconnect(&self, url: &str) -> Result<(), NetError> {
396 let _ = self
400 .inner
401 .head(url)
402 .emulation(self.browser_profile)
403 .send()
404 .await;
405 Ok(())
406 }
407
408 pub async fn request(
414 &self,
415 method: &str,
416 url: &str,
417 body: Option<&[u8]>,
418 extra_headers: &[(String, String)],
419 policy: RedirectPolicy,
420 ) -> Result<Response, NetError> {
421 let mut current_url = url.to_string();
422 let mut current_method = method.to_string();
423 let mut current_body = body.map(<[u8]>::to_vec);
424 let max_redirects = policy.max_redirects();
425 let mut remaining = max_redirects;
426
427 loop {
428 let parsed_current = Url::parse(¤t_url)?;
429 let hpx_resp = self
430 .execute_single_request(
431 ¤t_method,
432 ¤t_url,
433 current_body.as_deref(),
434 extra_headers,
435 )
436 .await?;
437
438 let resp = self
439 .process_response(hpx_resp, ¤t_url, &parsed_current)
440 .await?;
441
442 if !matches!(resp.status, 301 | 302 | 303 | 307 | 308) {
443 return Ok(resp);
444 }
445
446 match policy {
447 RedirectPolicy::Manual => return Ok(resp),
448 RedirectPolicy::Follow(_) => {
449 let loc = resp.headers.get("location").ok_or_else(|| {
450 NetError::Request("redirect missing Location header".into())
451 })?;
452 let next_url = resolve_redirect(¤t_url, loc)?;
453
454 if current_method == "POST" && matches!(resp.status, 301..=303) {
456 current_method = "GET".to_string();
457 current_body = None;
458 }
459
460 if remaining == 0 {
461 return Ok(resp);
462 }
463 remaining -= 1;
464 current_url = next_url;
465 }
466 }
467 }
468 }
469
470 async fn execute_single_request(
472 &self,
473 method: &str,
474 url: &str,
475 body: Option<&[u8]>,
476 extra_headers: &[(String, String)],
477 ) -> Result<hpx::Response, NetError> {
478 let parsed = Url::parse(url)?;
479 let builder = match method {
480 "GET" | "HEAD" => self.inner.get(url),
481 "POST" => self.inner.post(url),
482 "PUT" => self.inner.put(url),
483 "PATCH" => self.inner.patch(url),
484 "DELETE" => self.inner.delete(url),
485 _ => {
486 return Err(NetError::Request(format!(
487 "unsupported HTTP method: {method}"
488 )));
489 }
490 }
491 .emulation(self.browser_profile);
492
493 let builder = self
494 .inject_request_headers(builder, &parsed, extra_headers)
495 .await;
496
497 let builder = if let Some(b) = body {
498 builder.body(b.to_vec())
499 } else {
500 builder
501 };
502
503 builder.send().await.map_err(|e| e.into())
504 }
505
506 async fn inject_request_headers(
510 &self,
511 mut builder: hpx::RequestBuilder,
512 parsed: &Url,
513 extra_headers: &[(String, String)],
514 ) -> hpx::RequestBuilder {
515 let cookie_str = {
516 let jar = self.cookies.lock().await;
517 jar.cookies_for(parsed)
518 };
519
520 if let Some(cs) = cookie_str {
521 builder = builder.header("cookie", cs);
522 }
523
524 for (k, v) in extra_headers {
525 if k.eq_ignore_ascii_case("host") || k.eq_ignore_ascii_case("connection") {
526 continue;
527 }
528 builder = builder.header(k.as_str(), v.as_str());
529 }
530
531 builder
532 }
533
534 async fn process_response(
536 &self,
537 hpx_resp: hpx::Response,
538 url: &str,
539 parsed: &Url,
540 ) -> Result<Response, NetError> {
541 let status = hpx_resp.status().as_u16();
542 let status_text = hpx_resp
543 .status()
544 .canonical_reason()
545 .unwrap_or("")
546 .to_string();
547
548 let mut headers = HashMap::new();
549 let mut set_cookies = Vec::new();
550
551 for (key, value) in hpx_resp.headers() {
552 if let Ok(v) = value.to_str() {
553 if key.as_str().eq_ignore_ascii_case("set-cookie") {
554 set_cookies.push(v.to_string());
555 } else {
556 headers.insert(key.to_string(), v.to_string());
557 }
558 }
559 }
560
561 let body = hpx_resp
562 .bytes()
563 .await
564 .map_err(|e| NetError::Http(format!("failed to read body: {e}")))?;
565
566 let host = parsed.host_str().unwrap_or("");
568 let upgrade = self.learn_accept_ch(host, &headers);
569
570 if !set_cookies.is_empty() {
572 let mut jar = self.cookies.lock().await;
573 jar.set_cookies(parsed, &set_cookies);
574 }
575
576 Ok(Response {
577 status,
578 status_text,
579 headers,
580 set_cookies,
581 body: body.to_vec(),
582 url: url.to_string(),
583 accept_ch_upgrade: upgrade,
584 timings: TimingStats::default(),
585 })
586 }
587}
588
589fn resolve_redirect(current_url: &str, location: &str) -> Result<String, NetError> {
595 let base = Url::parse(current_url).map_err(|e| NetError::Request(e.to_string()))?;
596 let resolved = base.join(location).map_err(|e| {
597 NetError::Request(format!(
598 "redirect resolve: {e} (base={current_url}, loc={location})"
599 ))
600 })?;
601 Ok(resolved.to_string())
602}
603
604#[cfg(test)]
609mod tests {
610 use super::*;
611
612 #[test]
613 fn client_creates_successfully() {
614 let client = HttpClient::new(hpx::BrowserProfile::Chrome);
615 assert!(client.is_ok());
616 }
617
618 #[test]
619 fn with_session_creates_successfully() {
620 let session = Arc::new(SharedSession::new());
621 let client = HttpClient::with_session(session, hpx::BrowserProfile::Chrome);
622 assert!(client.is_ok());
623 }
624
625 #[test]
626 fn shared_session_new_isolation() {
627 let s1 = SharedSession::new();
628 let s2 = SharedSession::new();
629 assert!(!Arc::ptr_eq(&s1.cookies, &s2.cookies));
630 }
631
632 #[test]
633 fn shared_session_default() {
634 let s: SharedSession = Default::default();
635 let rt = tokio::runtime::Runtime::new().unwrap();
637 let cookies = rt.block_on(async { s.cookies.lock().await.cookie_count() });
638 assert_eq!(cookies, 0);
639 }
640
641 #[test]
642 fn redirect_resolve_handles_rfc3986_cases() {
643 assert_eq!(
645 resolve_redirect("https://a.com/x", "https://b.com/y").unwrap(),
646 "https://b.com/y"
647 );
648 assert_eq!(
650 resolve_redirect("https://a.com/x/y", "/z").unwrap(),
651 "https://a.com/z"
652 );
653 assert_eq!(
655 resolve_redirect("https://a.com/x/y", "z.html").unwrap(),
656 "https://a.com/x/z.html"
657 );
658 assert_eq!(
660 resolve_redirect("https://a.com/x/y/", "../z.html").unwrap(),
661 "https://a.com/x/z.html"
662 );
663 assert_eq!(
665 resolve_redirect("https://a.com/x", "//b.com/y").unwrap(),
666 "https://b.com/y"
667 );
668 assert_eq!(
670 resolve_redirect("https://a.com/x?old=1", "?new=2").unwrap(),
671 "https://a.com/x?new=2"
672 );
673 }
674
675 #[test]
676 fn response_text_and_ok() {
677 let resp = Response {
678 status: 200,
679 status_text: "OK".into(),
680 headers: HashMap::new(),
681 set_cookies: Vec::new(),
682 body: b"Hello world".to_vec(),
683 url: "https://example.com".into(),
684 accept_ch_upgrade: false,
685 timings: TimingStats::default(),
686 };
687 assert_eq!(resp.text(), "Hello world");
688 assert!(resp.ok());
689 }
690
691 #[test]
692 fn response_not_ok() {
693 let resp = Response {
694 status: 404,
695 status_text: "Not Found".into(),
696 headers: HashMap::new(),
697 set_cookies: Vec::new(),
698 body: vec![],
699 url: "https://example.com/missing".into(),
700 accept_ch_upgrade: false,
701 timings: TimingStats::default(),
702 };
703 assert!(!resp.ok());
704 }
705
706 #[test]
707 fn cookie_jar_set_and_get() {
708 let mut jar = CookieJar::new();
709 let url = Url::parse("https://example.com/path").unwrap();
710 jar.set_cookies(&url, &["session=abc123; Path=/; Secure".to_string()]);
711 assert_eq!(jar.cookie_count(), 1);
712 let cookies = jar.cookies_for(&url);
713 assert_eq!(cookies, Some("session=abc123".to_string()));
714 }
715
716 #[test]
717 fn cookie_jar_domain_scope() {
718 let mut jar = CookieJar::new();
719 let url = Url::parse("https://sub.example.com").unwrap();
720 jar.set_cookies(&url, &["token=xyz; Domain=example.com".to_string()]);
721 assert_eq!(jar.cookie_count(), 1);
723 let cookies = jar.cookies_for(&url);
724 assert!(cookies.is_some());
725 assert!(cookies.unwrap().contains("token=xyz"));
726 }
727
728 #[test]
729 fn cookie_jar_cross_domain_reject() {
730 let mut jar = CookieJar::new();
731 let url = Url::parse("https://example.com").unwrap();
732 jar.set_cookies(&url, &["evil=hack; Domain=evil.com".to_string()]);
733 assert_eq!(jar.cookie_count(), 0);
734 }
735
736 #[test]
737 fn cookie_jar_clear_for_domain() {
738 let mut jar = CookieJar::new();
739 let url = Url::parse("https://example.com").unwrap();
740 jar.set_cookies(&url, &["a=1".to_string(), "b=2".to_string()]);
741 assert_eq!(jar.cookie_count(), 2);
742 jar.clear_for_domain("example.com");
743 assert_eq!(jar.cookie_count(), 0);
744 }
745
746 #[test]
747 fn accept_ch_starts_false_then_true() {
748 let client = HttpClient::new(hpx::BrowserProfile::Chrome).unwrap();
749 assert!(!client.has_accept_ch("example.com"));
750
751 let mut headers = HashMap::new();
752 headers.insert(
753 "accept-ch".to_string(),
754 "Sec-CH-UA-Full-Version-List".to_string(),
755 );
756 client.learn_accept_ch("example.com", &headers);
757
758 assert!(client.has_accept_ch("example.com"));
759 assert!(!client.has_accept_ch("other.com"));
760 }
761
762 #[test]
763 fn accept_ch_case_insensitive() {
764 let client = HttpClient::new(hpx::BrowserProfile::Chrome).unwrap();
765 let mut headers = HashMap::new();
766 headers.insert("Accept-CH".to_string(), "Sec-CH-UA-Arch".to_string());
767 client.learn_accept_ch("site.example", &headers);
768 assert!(client.has_accept_ch("site.example"));
769 }
770
771 #[test]
772 fn response_without_accept_ch_does_not_upgrade() {
773 let client = HttpClient::new(hpx::BrowserProfile::Chrome).unwrap();
774 let mut headers = HashMap::new();
775 headers.insert("content-type".to_string(), "text/html".to_string());
776 client.learn_accept_ch("boring.example", &headers);
777 assert!(!client.has_accept_ch("boring.example"));
778 }
779
780 #[tokio::test]
781 #[ignore] async fn get_request() {
783 let client = HttpClient::new(hpx::BrowserProfile::Chrome).unwrap();
784 let resp = client.get("https://httpbin.org/get").await.unwrap();
785 assert_eq!(resp.status, 200);
786 assert!(resp.text().contains("httpbin"));
787 }
788
789 #[tokio::test]
790 #[ignore] async fn post_request() {
792 let client = HttpClient::new(hpx::BrowserProfile::Chrome).unwrap();
793 let resp = client
794 .post("https://httpbin.org/post", "hello")
795 .await
796 .unwrap();
797 assert_eq!(resp.status, 200);
798 assert!(resp.text().contains("hello"));
799 }
800
801 #[tokio::test]
802 #[ignore] async fn get_follow_redirects() {
804 let client = HttpClient::new(hpx::BrowserProfile::Chrome).unwrap();
805 let resp = client
806 .get_follow("https://httpbin.org/redirect/2", 5)
807 .await
808 .unwrap();
809 assert_eq!(resp.status, 200);
810 }
811
812 #[test]
813 fn redirect_policy_max_redirects() {
814 assert_eq!(RedirectPolicy::Follow(5).max_redirects(), 5);
815 assert_eq!(RedirectPolicy::Follow(0).max_redirects(), 0);
816 assert_eq!(RedirectPolicy::Manual.max_redirects(), 0);
817 }
818
819 #[test]
820 fn redirect_policy_clone_copy() {
821 let p = RedirectPolicy::Follow(3);
822 let q = p;
823 assert_eq!(p, q); }
825}