1pub mod blocklist;
9pub mod cookies;
10pub mod csp;
11pub mod headers;
12pub mod robots;
13pub mod ssrf;
14
15use std::{collections::HashMap, sync::Arc};
16
17pub use cookies::CookieJar;
18use tokio::sync::Mutex;
19use url::Url;
20
21#[derive(Debug, thiserror::Error)]
26pub enum NetError {
27 #[error("HTTP error: {0}")]
28 Http(String),
29
30 #[error("URL parse error: {0}")]
31 Url(#[from] url::ParseError),
32
33 #[error("Request failed: {0}")]
34 Request(String),
35
36 #[error("hpx client error: {0}")]
37 Client(#[from] hpx::Error),
38}
39
40#[derive(Debug, Clone, Default)]
45pub struct TimingStats {
46 pub dns_start_ms: f64,
47 pub dns_end_ms: f64,
48 pub connect_start_ms: f64,
49 pub connect_end_ms: f64,
50 pub tls_start_ms: f64,
51 pub tls_end_ms: f64,
52 pub request_start_ms: f64,
53 pub response_start_ms: f64,
54 pub response_end_ms: f64,
55}
56
57#[derive(Debug, Clone, Default)]
62pub struct Response {
63 pub status: u16,
64 pub status_text: String,
65 pub headers: HashMap<String, String>,
66 pub set_cookies: Vec<String>,
69 pub body: Vec<u8>,
70 pub url: String,
71 pub accept_ch_upgrade: bool,
73 pub timings: TimingStats,
74}
75
76impl Response {
77 pub fn text(&self) -> String {
78 String::from_utf8_lossy(&self.body).to_string()
79 }
80
81 pub fn ok(&self) -> bool {
82 (200..300).contains(&self.status)
83 }
84}
85
86#[derive(Clone)]
91pub struct SharedSession {
92 pub cookies: Arc<Mutex<CookieJar>>,
93 pub accept_ch: scc::HashSet<String>,
94 pub h1_only_hosts: scc::HashSet<String>,
95}
96
97static SHARED_SESSION: std::sync::OnceLock<SharedSession> = std::sync::OnceLock::new();
98
99pub fn shared_session() -> SharedSession {
101 SHARED_SESSION
102 .get_or_init(|| SharedSession {
103 cookies: Arc::new(Mutex::new(CookieJar::new())),
104 accept_ch: scc::HashSet::new(),
105 h1_only_hosts: scc::HashSet::new(),
106 })
107 .clone()
108}
109
110#[derive(Clone)]
115pub struct HttpClient {
116 inner: hpx::Client,
117 cookies: Arc<Mutex<CookieJar>>,
118 accept_ch_origins: scc::HashSet<String>,
119 h1_only_hosts: scc::HashSet<String>,
120 browser_profile: hpx::BrowserProfile,
121}
122
123impl HttpClient {
124 pub fn new(browser_profile: hpx::BrowserProfile) -> Result<Self, NetError> {
126 let inner = hpx::Client::builder()
127 .build()
128 .map_err(|e| NetError::Http(format!("failed to build hpx client: {e}")))?;
129
130 Ok(Self {
131 inner,
132 cookies: Arc::new(Mutex::new(CookieJar::new())),
133 accept_ch_origins: scc::HashSet::new(),
134 h1_only_hosts: scc::HashSet::new(),
135 browser_profile,
136 })
137 }
138
139 pub fn shared(browser_profile: hpx::BrowserProfile) -> Result<Self, NetError> {
141 let s = shared_session();
142 let inner = hpx::Client::builder()
143 .build()
144 .map_err(|e| NetError::Http(format!("failed to build hpx client: {e}")))?;
145
146 Ok(Self {
147 inner,
148 cookies: s.cookies,
149 accept_ch_origins: s.accept_ch,
150 h1_only_hosts: s.h1_only_hosts,
151 browser_profile,
152 })
153 }
154
155 pub fn cookies(&self) -> Arc<Mutex<CookieJar>> {
156 self.cookies.clone()
157 }
158
159 pub fn browser_profile(&self) -> &hpx::BrowserProfile {
160 &self.browser_profile
161 }
162
163 pub fn has_accept_ch(&self, host: &str) -> bool {
165 self.accept_ch_origins.contains_sync(host)
166 }
167
168 fn learn_accept_ch(&self, host: &str, headers: &HashMap<String, String>) -> bool {
171 let has_ch = headers.keys().any(|k| {
172 let k = k.to_ascii_lowercase();
173 k == "accept-ch" || k == "critical-ch"
174 });
175 if has_ch {
176 return self.accept_ch_origins.insert_sync(host.to_string()).is_ok();
177 }
178 false
179 }
180
181 pub async fn cookies_for_url(&self, url: &Url) -> Option<String> {
183 let jar = self.cookies.lock().await;
184 jar.cookies_for(url)
185 }
186
187 pub async fn inject_cookies(&self, url: &Url, cookies: &[String]) {
189 let mut jar = self.cookies.lock().await;
190 jar.set_cookies(url, cookies);
191 }
192
193 pub async fn set_cookie_str(&self, url: &Url, raw: &str) {
195 let mut jar = self.cookies.lock().await;
196 jar.set_cookies(url, &[raw.to_string()]);
197 }
198
199 pub async fn clear_cookies_for_domain(&self, target_domain: &str) {
201 let mut jar = self.cookies.lock().await;
202 jar.clear_for_domain(target_domain);
203 }
204
205 pub async fn get(&self, url: &str) -> Result<Response, NetError> {
209 self.get_with_headers(url, &[]).await
210 }
211
212 pub async fn get_with_headers(
214 &self,
215 url: &str,
216 extra_headers: &[(String, String)],
217 ) -> Result<Response, NetError> {
218 let parsed = Url::parse(url)?;
219 let builder = self.inner.get(url).emulation(self.browser_profile);
220
221 let builder = self
222 .inject_request_headers(builder, &parsed, extra_headers)
223 .await;
224 let hpx_resp = builder.send().await?;
225 self.process_response(hpx_resp, url, &parsed).await
226 }
227
228 pub async fn fetch_get(
230 &self,
231 url: &str,
232 extra_headers: &[(String, String)],
233 _origin: Option<&str>,
234 ) -> Result<Response, NetError> {
235 let parsed = Url::parse(url)?;
236 let mut builder = self.inner.get(url).emulation(self.browser_profile);
237
238 builder = builder.header("accept", "*/*");
240 builder = builder.header("sec-fetch-mode", "cors");
241 builder = builder.header("sec-fetch-dest", "empty");
242 builder = builder.header("sec-fetch-site", "same-origin");
243
244 builder = self
245 .inject_request_headers(builder, &parsed, extra_headers)
246 .await;
247 let hpx_resp = builder.send().await?;
248 self.process_response(hpx_resp, url, &parsed).await
249 }
250
251 pub async fn fetch_post_bytes(
253 &self,
254 url: &str,
255 body: &[u8],
256 extra_headers: &[(String, String)],
257 _origin: Option<&str>,
258 ) -> Result<Response, NetError> {
259 let parsed = Url::parse(url)?;
260 let mut builder = self.inner.post(url).emulation(self.browser_profile);
261
262 builder = builder.header("accept", "*/*");
263 builder = builder.header("sec-fetch-mode", "cors");
264 builder = builder.header("sec-fetch-dest", "empty");
265 builder = builder.header("sec-fetch-site", "same-origin");
266
267 builder = self
268 .inject_request_headers(builder, &parsed, extra_headers)
269 .await;
270 let hpx_resp = builder.body(body.to_vec()).send().await?;
271 self.process_response(hpx_resp, url, &parsed).await
272 }
273
274 pub async fn post(&self, url: &str, body: &str) -> Result<Response, NetError> {
276 self.post_with_headers(url, body, &[]).await
277 }
278
279 pub async fn post_with_headers(
281 &self,
282 url: &str,
283 body: &str,
284 extra_headers: &[(String, String)],
285 ) -> Result<Response, NetError> {
286 self.post_bytes_with_headers(url, body.as_bytes(), extra_headers)
287 .await
288 }
289
290 pub async fn post_bytes_with_headers(
292 &self,
293 url: &str,
294 body: &[u8],
295 extra_headers: &[(String, String)],
296 ) -> Result<Response, NetError> {
297 let parsed = Url::parse(url)?;
298 let builder = self.inner.post(url).emulation(self.browser_profile);
299
300 let builder = self
301 .inject_request_headers(builder, &parsed, extra_headers)
302 .await;
303 let hpx_resp = builder.body(body.to_vec()).send().await?;
304 self.process_response(hpx_resp, url, &parsed).await
305 }
306
307 pub async fn get_follow(&self, url: &str, max_redirects: u8) -> Result<Response, NetError> {
309 let mut current_url = url.to_string();
310 for _ in 0..max_redirects {
311 let resp = self.get(¤t_url).await?;
312 if matches!(resp.status, 301 | 302 | 303 | 307 | 308) {
313 if let Some(loc) = resp.headers.get("location") {
314 current_url = resolve_redirect(¤t_url, loc)?;
315 continue;
316 }
317 }
318 return Ok(resp);
319 }
320 self.get(¤t_url).await
321 }
322
323 pub async fn get_follow_with_headers(
325 &self,
326 url: &str,
327 extra_headers: &[(String, String)],
328 max_redirects: u8,
329 ) -> Result<Response, NetError> {
330 let mut current_url = url.to_string();
331 for _ in 0..max_redirects {
332 let resp = self.get_with_headers(¤t_url, extra_headers).await?;
333 if matches!(resp.status, 301 | 302 | 303 | 307 | 308) {
334 if let Some(loc) = resp.headers.get("location") {
335 current_url = resolve_redirect(¤t_url, loc)?;
336 continue;
337 }
338 }
339 return Ok(resp);
340 }
341 self.get_with_headers(¤t_url, extra_headers).await
342 }
343
344 pub async fn post_follow(
346 &self,
347 url: &str,
348 body: &str,
349 max_redirects: u8,
350 ) -> Result<Response, NetError> {
351 self.post_bytes_follow(url, body.as_bytes(), &[], max_redirects)
352 .await
353 }
354
355 pub async fn post_bytes_follow(
357 &self,
358 url: &str,
359 body: &[u8],
360 extra_headers: &[(String, String)],
361 max_redirects: u8,
362 ) -> Result<Response, NetError> {
363 let mut current_url = url.to_string();
364 for _ in 0..max_redirects {
365 let resp = self
366 .post_bytes_with_headers(¤t_url, body, extra_headers)
367 .await?;
368
369 if matches!(resp.status, 301 | 302 | 303 | 307 | 308) {
370 if let Some(loc) = resp.headers.get("location") {
371 let next_url = resolve_redirect(¤t_url, loc)?;
372 if matches!(resp.status, 307 | 308) {
373 current_url = next_url;
374 continue;
375 }
376 return self
378 .get_follow(&next_url, max_redirects.saturating_sub(1))
379 .await;
380 }
381 }
382 return Ok(resp);
383 }
384 self.post_bytes_with_headers(¤t_url, body, extra_headers)
385 .await
386 }
387
388 pub async fn preconnect(&self, url: &str) -> Result<(), NetError> {
391 let _ = self
395 .inner
396 .head(url)
397 .emulation(self.browser_profile)
398 .send()
399 .await;
400 Ok(())
401 }
402
403 async fn inject_request_headers(
407 &self,
408 mut builder: hpx::RequestBuilder,
409 parsed: &Url,
410 extra_headers: &[(String, String)],
411 ) -> hpx::RequestBuilder {
412 let cookie_str = {
413 let jar = self.cookies.lock().await;
414 jar.cookies_for(parsed)
415 };
416
417 if let Some(cs) = cookie_str {
418 builder = builder.header("cookie", cs);
419 }
420
421 for (k, v) in extra_headers {
422 if k.eq_ignore_ascii_case("host") || k.eq_ignore_ascii_case("connection") {
423 continue;
424 }
425 builder = builder.header(k.as_str(), v.as_str());
426 }
427
428 builder
429 }
430
431 async fn process_response(
433 &self,
434 hpx_resp: hpx::Response,
435 url: &str,
436 parsed: &Url,
437 ) -> Result<Response, NetError> {
438 let status = hpx_resp.status().as_u16();
439 let status_text = hpx_resp
440 .status()
441 .canonical_reason()
442 .unwrap_or("")
443 .to_string();
444
445 let mut headers = HashMap::new();
446 let mut set_cookies = Vec::new();
447
448 for (key, value) in hpx_resp.headers() {
449 if let Ok(v) = value.to_str() {
450 if key.as_str().eq_ignore_ascii_case("set-cookie") {
451 set_cookies.push(v.to_string());
452 } else {
453 headers.insert(key.to_string(), v.to_string());
454 }
455 }
456 }
457
458 let body = hpx_resp
459 .bytes()
460 .await
461 .map_err(|e| NetError::Http(format!("failed to read body: {e}")))?;
462
463 let host = parsed.host_str().unwrap_or("");
465 let upgrade = self.learn_accept_ch(host, &headers);
466
467 if !set_cookies.is_empty() {
469 let mut jar = self.cookies.lock().await;
470 jar.set_cookies(parsed, &set_cookies);
471 }
472
473 Ok(Response {
474 status,
475 status_text,
476 headers,
477 set_cookies,
478 body: body.to_vec(),
479 url: url.to_string(),
480 accept_ch_upgrade: upgrade,
481 timings: TimingStats::default(),
482 })
483 }
484}
485
486fn resolve_redirect(current_url: &str, location: &str) -> Result<String, NetError> {
492 let base = Url::parse(current_url).map_err(|e| NetError::Request(e.to_string()))?;
493 let resolved = base.join(location).map_err(|e| {
494 NetError::Request(format!(
495 "redirect resolve: {e} (base={current_url}, loc={location})"
496 ))
497 })?;
498 Ok(resolved.to_string())
499}
500
501#[cfg(test)]
506mod tests {
507 use super::*;
508
509 #[test]
510 fn client_creates_successfully() {
511 let client = HttpClient::new(hpx::BrowserProfile::Chrome);
512 assert!(client.is_ok());
513 }
514
515 #[test]
516 fn shared_client_creates_successfully() {
517 let client = HttpClient::shared(hpx::BrowserProfile::Chrome);
518 assert!(client.is_ok());
519 }
520
521 #[test]
522 fn redirect_resolve_handles_rfc3986_cases() {
523 assert_eq!(
525 resolve_redirect("https://a.com/x", "https://b.com/y").unwrap(),
526 "https://b.com/y"
527 );
528 assert_eq!(
530 resolve_redirect("https://a.com/x/y", "/z").unwrap(),
531 "https://a.com/z"
532 );
533 assert_eq!(
535 resolve_redirect("https://a.com/x/y", "z.html").unwrap(),
536 "https://a.com/x/z.html"
537 );
538 assert_eq!(
540 resolve_redirect("https://a.com/x/y/", "../z.html").unwrap(),
541 "https://a.com/x/z.html"
542 );
543 assert_eq!(
545 resolve_redirect("https://a.com/x", "//b.com/y").unwrap(),
546 "https://b.com/y"
547 );
548 assert_eq!(
550 resolve_redirect("https://a.com/x?old=1", "?new=2").unwrap(),
551 "https://a.com/x?new=2"
552 );
553 }
554
555 #[test]
556 fn response_text_and_ok() {
557 let resp = Response {
558 status: 200,
559 status_text: "OK".into(),
560 headers: HashMap::new(),
561 set_cookies: Vec::new(),
562 body: b"Hello world".to_vec(),
563 url: "https://example.com".into(),
564 accept_ch_upgrade: false,
565 timings: TimingStats::default(),
566 };
567 assert_eq!(resp.text(), "Hello world");
568 assert!(resp.ok());
569 }
570
571 #[test]
572 fn response_not_ok() {
573 let resp = Response {
574 status: 404,
575 status_text: "Not Found".into(),
576 headers: HashMap::new(),
577 set_cookies: Vec::new(),
578 body: vec![],
579 url: "https://example.com/missing".into(),
580 accept_ch_upgrade: false,
581 timings: TimingStats::default(),
582 };
583 assert!(!resp.ok());
584 }
585
586 #[test]
587 fn cookie_jar_set_and_get() {
588 let mut jar = CookieJar::new();
589 let url = Url::parse("https://example.com/path").unwrap();
590 jar.set_cookies(&url, &["session=abc123; Path=/; Secure".to_string()]);
591 assert_eq!(jar.cookie_count(), 1);
592 let cookies = jar.cookies_for(&url);
593 assert_eq!(cookies, Some("session=abc123".to_string()));
594 }
595
596 #[test]
597 fn cookie_jar_domain_scope() {
598 let mut jar = CookieJar::new();
599 let url = Url::parse("https://sub.example.com").unwrap();
600 jar.set_cookies(&url, &["token=xyz; Domain=example.com".to_string()]);
601 assert_eq!(jar.cookie_count(), 1);
603 let cookies = jar.cookies_for(&url);
604 assert!(cookies.is_some());
605 assert!(cookies.unwrap().contains("token=xyz"));
606 }
607
608 #[test]
609 fn cookie_jar_cross_domain_reject() {
610 let mut jar = CookieJar::new();
611 let url = Url::parse("https://example.com").unwrap();
612 jar.set_cookies(&url, &["evil=hack; Domain=evil.com".to_string()]);
613 assert_eq!(jar.cookie_count(), 0);
614 }
615
616 #[test]
617 fn cookie_jar_clear_for_domain() {
618 let mut jar = CookieJar::new();
619 let url = Url::parse("https://example.com").unwrap();
620 jar.set_cookies(&url, &["a=1".to_string(), "b=2".to_string()]);
621 assert_eq!(jar.cookie_count(), 2);
622 jar.clear_for_domain("example.com");
623 assert_eq!(jar.cookie_count(), 0);
624 }
625
626 #[test]
627 fn accept_ch_starts_false_then_true() {
628 let client = HttpClient::new(hpx::BrowserProfile::Chrome).unwrap();
629 assert!(!client.has_accept_ch("example.com"));
630
631 let mut headers = HashMap::new();
632 headers.insert(
633 "accept-ch".to_string(),
634 "Sec-CH-UA-Full-Version-List".to_string(),
635 );
636 client.learn_accept_ch("example.com", &headers);
637
638 assert!(client.has_accept_ch("example.com"));
639 assert!(!client.has_accept_ch("other.com"));
640 }
641
642 #[test]
643 fn accept_ch_case_insensitive() {
644 let client = HttpClient::new(hpx::BrowserProfile::Chrome).unwrap();
645 let mut headers = HashMap::new();
646 headers.insert("Accept-CH".to_string(), "Sec-CH-UA-Arch".to_string());
647 client.learn_accept_ch("site.example", &headers);
648 assert!(client.has_accept_ch("site.example"));
649 }
650
651 #[test]
652 fn response_without_accept_ch_does_not_upgrade() {
653 let client = HttpClient::new(hpx::BrowserProfile::Chrome).unwrap();
654 let mut headers = HashMap::new();
655 headers.insert("content-type".to_string(), "text/html".to_string());
656 client.learn_accept_ch("boring.example", &headers);
657 assert!(!client.has_accept_ch("boring.example"));
658 }
659
660 #[tokio::test]
661 #[ignore] async fn get_request() {
663 let client = HttpClient::new(hpx::BrowserProfile::Chrome).unwrap();
664 let resp = client.get("https://httpbin.org/get").await.unwrap();
665 assert_eq!(resp.status, 200);
666 assert!(resp.text().contains("httpbin"));
667 }
668
669 #[tokio::test]
670 #[ignore] async fn post_request() {
672 let client = HttpClient::new(hpx::BrowserProfile::Chrome).unwrap();
673 let resp = client
674 .post("https://httpbin.org/post", "hello")
675 .await
676 .unwrap();
677 assert_eq!(resp.status, 200);
678 assert!(resp.text().contains("hello"));
679 }
680
681 #[tokio::test]
682 #[ignore] async fn get_follow_redirects() {
684 let client = HttpClient::new(hpx::BrowserProfile::Chrome).unwrap();
685 let resp = client
686 .get_follow("https://httpbin.org/redirect/2", 5)
687 .await
688 .unwrap();
689 assert_eq!(resp.status, 200);
690 }
691}