spider_lib/downloaders/
reqwest_client.rs1use crate::{
13 Downloader, Request, Response, SpiderError, downloader::SimpleHttpClient, request::Body,
14};
15use async_trait::async_trait;
16use bytes::Bytes;
17use http::StatusCode;
18use reqwest::{Client, Proxy};
19use std::time::Duration;
20use tracing::info;
21
22#[async_trait]
23impl SimpleHttpClient for Client {
24 async fn get_text(
25 &self,
26 url: &str,
27 timeout: Duration,
28 ) -> Result<(StatusCode, Bytes), SpiderError> {
29 let resp = self.get(url).timeout(timeout).send().await?;
30 let status = resp.status();
31 let body = resp.bytes().await?;
32 Ok((status, body))
33 }
34}
35
36pub struct ReqwestClientDownloader {
38 client: Client,
39 timeout: Duration,
40}
41
42#[async_trait]
43impl Downloader for ReqwestClientDownloader {
44 type Client = Client;
45
46 fn client(&self) -> &Self::Client {
48 &self.client
49 }
50
51 async fn download(&self, request: Request) -> Result<Response, SpiderError> {
52 info!(
53 "Downloading {} (fingerprint: {})",
54 request.url,
55 request.fingerprint()
56 );
57
58 let Request {
59 url,
60 method,
61 headers,
62 body,
63 meta,
64 ..
65 } = request;
66
67 let mut client_to_use = self.client.clone();
68
69 if let Some(proxy_val) = meta.get("proxy")
70 && let Some(proxy_str) = proxy_val.as_str()
71 {
72 match Proxy::all(proxy_str) {
73 Ok(proxy) => {
74 let new_client = Client::builder()
75 .timeout(self.timeout)
76 .proxy(proxy)
77 .build()
78 .map_err(|e| SpiderError::ReqwestError(e.into()))?;
79 client_to_use = new_client;
80 }
81 Err(e) => {
82 return Err(SpiderError::ReqwestError(e.into()));
83 }
84 }
85 }
86
87 let mut req_builder = client_to_use.request(method, url.clone());
88
89 if let Some(body_content) = body {
90 req_builder = match body_content {
91 Body::Json(json_val) => req_builder.json(&json_val),
92 Body::Form(form_val) => req_builder.form(&form_val),
93 Body::Bytes(bytes_val) => req_builder.body(bytes_val),
94 };
95 }
96
97 let res = req_builder.headers(headers).send().await?;
98
99 let response_url = res.url().clone();
100 let status = res.status();
101 let response_headers = res.headers().clone();
102 let response_body = res.bytes().await?;
103
104 Ok(Response {
105 url: response_url,
106 status,
107 headers: response_headers,
108 body: response_body,
109 request_url: url,
110 meta,
111 cached: false,
112 })
113 }
114}
115
116impl ReqwestClientDownloader {
117 pub fn new() -> Self {
119 Self::new_with_timeout(Duration::from_secs(30))
120 }
121
122 pub fn new_with_timeout(timeout: Duration) -> Self {
124 ReqwestClientDownloader {
125 client: Client::builder().timeout(timeout).build().unwrap(),
126 timeout,
127 }
128 }
129}
130
131impl Default for ReqwestClientDownloader {
132 fn default() -> Self {
133 Self::new()
134 }
135}