Skip to main content

spider_lib/downloaders/
reqwest_client.rs

1//! Reqwest-based Downloader implementation for the `spider-lib` framework.
2//!
3//! This module provides `ReqwestClientDownloader`, a concrete implementation
4//! of the `Downloader` trait that leverages the `reqwest` HTTP client library.
5//! It is responsible for executing HTTP requests defined by `Request` objects
6//! and converting the received HTTP responses into `Response` objects suitable
7//! for further processing by the crawler.
8//!
9//! This downloader handles various HTTP methods, request bodies (JSON, form data, bytes),
10//! and integrates with the framework's error handling.
11
12use crate::{
13    Downloader, Request, Response, SpiderError, downloader::SimpleHttpClient, request::Body,
14};
15use async_trait::async_trait;
16use bytes::Bytes;
17use http::StatusCode;
18use reqwest::Client;
19use std::time::Duration;
20use tracing::info;
21
22#[async_trait]
23impl SimpleHttpClient for Client {
24    async fn get_text(
25        &self,
26        url: &str,
27        timeout: Duration,
28    ) -> Result<(StatusCode, Bytes), SpiderError> {
29        let resp = self.get(url).timeout(timeout).send().await?;
30        let status = resp.status();
31        let body = resp.bytes().await?;
32        Ok((status, body))
33    }
34}
35
36/// Concrete implementation of Downloader using reqwest client
37pub struct ReqwestClientDownloader {
38    client: Client,
39}
40
41#[async_trait]
42impl Downloader for ReqwestClientDownloader {
43    type Client = Client;
44
45    /// Returns a reference to the underlying HTTP client.
46    fn client(&self) -> &Self::Client {
47        &self.client
48    }
49
50    async fn download(&self, request: Request) -> Result<Response, SpiderError> {
51        info!(
52            "Downloading {} (fingerprint: {})",
53            request.url,
54            request.fingerprint()
55        );
56
57        let Request {
58            url,
59            method,
60            headers,
61            body,
62            meta,
63            ..
64        } = request;
65
66        let mut req_builder = self.client.request(method, url.clone());
67
68        if let Some(body_content) = body {
69            req_builder = match body_content {
70                Body::Json(json_val) => req_builder.json(&json_val),
71                Body::Form(form_val) => req_builder.form(&form_val),
72                Body::Bytes(bytes_val) => req_builder.body(bytes_val),
73            };
74        }
75
76        let res = req_builder.headers(headers).send().await?;
77
78        let response_url = res.url().clone();
79        let status = res.status();
80        let response_headers = res.headers().clone();
81        let response_body = res.bytes().await?;
82
83        Ok(Response {
84            url: response_url,
85            status,
86            headers: response_headers,
87            body: response_body,
88            request_url: url,
89            meta,
90            cached: false,
91        })
92    }
93}
94
95impl ReqwestClientDownloader {
96    /// Creates a new `ReqwestClientDownloader` with a default timeout of 30 seconds.
97    pub fn new() -> Self {
98        Self::new_with_timeout(Duration::from_secs(30))
99    }
100
101    /// Creates a new `ReqwestClientDownloader` with a specified request timeout.
102    pub fn new_with_timeout(timeout: Duration) -> Self {
103        ReqwestClientDownloader {
104            client: Client::builder().timeout(timeout).build().unwrap(),
105        }
106    }
107}
108
109impl Default for ReqwestClientDownloader {
110    fn default() -> Self {
111        Self::new()
112    }
113}