Skip to main content

spider_lib/downloaders/
reqwest_client.rs

1use crate::{
2    downloader::SimpleHttpClient, request::Body, Downloader, Request, Response, SpiderError,
3};
4use async_trait::async_trait;
5use bytes::Bytes;
6use http::StatusCode;
7use reqwest::Client;
8use std::time::Duration;
9use tracing::info;
10
11#[async_trait]
12impl SimpleHttpClient for Client {
13    async fn get_text(
14        &self,
15        url: &str,
16        timeout: Duration,
17    ) -> Result<(StatusCode, Bytes), SpiderError> {
18        let resp = self.get(url).timeout(timeout).send().await?;
19        let status = resp.status();
20        let body = resp.bytes().await?;
21        Ok((status, body))
22    }
23}
24
25/// Concrete implementation of Downloader using reqwest client
26pub struct ReqwestClientDownloader {
27    client: Client,
28}
29
30#[async_trait]
31impl Downloader for ReqwestClientDownloader {
32    type Client = Client;
33
34    /// Returns a reference to the underlying HTTP client.
35    fn client(&self) -> &Self::Client {
36        &self.client
37    }
38
39    async fn download(&self, request: Request) -> Result<Response, SpiderError> {
40        info!(
41            "Downloading {} (fingerprint: {})",
42            request.url,
43            request.fingerprint()
44        );
45
46        let Request {
47            url,
48            method,
49            headers,
50            body,
51            meta,
52            ..
53        } = request;
54
55        let mut req_builder = self.client.request(method, url.clone());
56
57        if let Some(body_content) = body {
58            req_builder = match body_content {
59                Body::Json(json_val) => req_builder.json(&json_val),
60                Body::Form(form_val) => req_builder.form(&form_val),
61                Body::Bytes(bytes_val) => req_builder.body(bytes_val),
62            };
63        }
64
65        let res = req_builder.headers(headers).send().await?;
66
67        let response_url = res.url().clone();
68        let status = res.status();
69        let response_headers = res.headers().clone();
70        let response_body = res.bytes().await?;
71
72        Ok(Response {
73            url: response_url,
74            status,
75            headers: response_headers,
76            body: response_body,
77            request_url: url,
78            meta,
79        })
80    }
81}
82
83impl ReqwestClientDownloader {
84    /// Creates a new `ReqwestClientDownloader` with a default timeout of 30 seconds.
85    pub fn new() -> Self {
86        Self::new_with_timeout(Duration::from_secs(30))
87    }
88
89    /// Creates a new `ReqwestClientDownloader` with a specified request timeout.
90    pub fn new_with_timeout(timeout: Duration) -> Self {
91        ReqwestClientDownloader {
92            client: Client::builder().timeout(timeout).build().unwrap(),
93        }
94    }
95}
96
97impl Default for ReqwestClientDownloader {
98    fn default() -> Self {
99        Self::new()
100    }
101}