1use reqwest::header::CONTENT_TYPE;
2use reqwest::{redirect::Policy, Client};
3use std::time::Duration;
4
5const USER_AGENT: &str = concat!("webfetch/", env!("CARGO_PKG_VERSION"));
6const MAX_ATTEMPTS: u32 = 3;
7
8pub struct FetchedPage {
11 pub body: String,
12 pub final_url: String,
13 pub content_type: Option<String>,
14}
15
16fn build_client(timeout_secs: u64) -> anyhow::Result<Client> {
17 Ok(Client::builder()
18 .timeout(Duration::from_secs(timeout_secs))
19 .redirect(Policy::limited(5))
20 .user_agent(USER_AGENT)
21 .gzip(true)
22 .brotli(true)
23 .build()?)
24}
25
26async fn attempt(client: &Client, url: &str) -> Result<FetchedPage, (anyhow::Error, bool)> {
29 let resp = match client.get(url).send().await {
30 Ok(r) => r,
31 Err(e) => {
32 let transient = e.is_timeout() || e.is_connect() || e.is_request();
33 return Err((e.into(), transient));
34 }
35 };
36
37 let status = resp.status();
38 let resp = match resp.error_for_status() {
39 Ok(r) => r,
40 Err(e) => {
41 let transient = status.is_server_error() || status.as_u16() == 429;
42 return Err((e.into(), transient));
43 }
44 };
45
46 let final_url = resp.url().to_string();
47 let content_type = resp
48 .headers()
49 .get(CONTENT_TYPE)
50 .and_then(|v| v.to_str().ok())
51 .map(|s| s.to_string());
52
53 match resp.text().await {
54 Ok(body) => Ok(FetchedPage {
55 body,
56 final_url,
57 content_type,
58 }),
59 Err(e) => {
60 let transient = e.is_timeout();
61 Err((e.into(), transient))
62 }
63 }
64}
65
66pub async fn fetch_page(url: &str, timeout_secs: u64) -> anyhow::Result<FetchedPage> {
69 let client = build_client(timeout_secs)?;
70
71 let mut delay = Duration::from_millis(200);
72 for attempt_no in 1..=MAX_ATTEMPTS {
73 match attempt(&client, url).await {
74 Ok(page) => return Ok(page),
75 Err((err, transient)) => {
76 if attempt_no == MAX_ATTEMPTS || !transient {
77 return Err(err);
78 }
79 tokio::time::sleep(delay).await;
80 delay *= 2;
81 }
82 }
83 }
84 unreachable!("loop returns on the final attempt")
85}