spider_lib/downloaders/
reqwest_client.rs1use crate::{
2 downloader::SimpleHttpClient, request::Body, Downloader, Request, Response, SpiderError,
3};
4use async_trait::async_trait;
5use bytes::Bytes;
6use http::StatusCode;
7use reqwest::Client;
8use std::time::Duration;
9use tracing::info;
10
11#[async_trait]
12impl SimpleHttpClient for Client {
13 async fn get_text(
14 &self,
15 url: &str,
16 timeout: Duration,
17 ) -> Result<(StatusCode, Bytes), SpiderError> {
18 let resp = self.get(url).timeout(timeout).send().await?;
19 let status = resp.status();
20 let body = resp.bytes().await?;
21 Ok((status, body))
22 }
23}
24
25pub struct ReqwestClientDownloader {
27 client: Client,
28}
29
30#[async_trait]
31impl Downloader for ReqwestClientDownloader {
32 type Client = Client;
33
34 fn client(&self) -> &Self::Client {
36 &self.client
37 }
38
39 async fn download(&self, request: Request) -> Result<Response, SpiderError> {
40 info!(
41 "Downloading {} (fingerprint: {})",
42 request.url,
43 request.fingerprint()
44 );
45
46 let Request {
47 url,
48 method,
49 headers,
50 body,
51 meta,
52 ..
53 } = request;
54
55 let mut req_builder = self.client.request(method, url.clone());
56
57 if let Some(body_content) = body {
58 req_builder = match body_content {
59 Body::Json(json_val) => req_builder.json(&json_val),
60 Body::Form(form_val) => req_builder.form(&form_val),
61 Body::Bytes(bytes_val) => req_builder.body(bytes_val),
62 };
63 }
64
65 let res = req_builder.headers(headers).send().await?;
66
67 let response_url = res.url().clone();
68 let status = res.status();
69 let response_headers = res.headers().clone();
70 let response_body = res.bytes().await?;
71
72 Ok(Response {
73 url: response_url,
74 status,
75 headers: response_headers,
76 body: response_body,
77 request_url: url,
78 meta,
79 })
80 }
81}
82
83impl ReqwestClientDownloader {
84 pub fn new() -> Self {
86 Self::new_with_timeout(Duration::from_secs(30))
87 }
88
89 pub fn new_with_timeout(timeout: Duration) -> Self {
91 ReqwestClientDownloader {
92 client: Client::builder().timeout(timeout).build().unwrap(),
93 }
94 }
95}
96
97impl Default for ReqwestClientDownloader {
98 fn default() -> Self {
99 Self::new()
100 }
101}