spider_downloader/
reqwest_client.rs1use crate::{
13 Downloader,
14 SimpleHttpClient,
15};
16use spider_util::request::{Request, Body};
17use spider_util::response::Response;
18use spider_util::error::SpiderError;
19use async_trait::async_trait;
20use bytes::Bytes;
21use http::StatusCode;
22use reqwest::{Client, Proxy};
23use std::time::Duration;
24use tracing::info;
25
26#[async_trait]
27impl SimpleHttpClient for Client {
28 async fn get_text(
29 &self,
30 url: &str,
31 timeout: Duration,
32 ) -> Result<(StatusCode, Bytes), SpiderError> {
33 let resp = self.get(url).timeout(timeout).send().await?;
34 let status = resp.status();
35 let body = resp.bytes().await?;
36 Ok((status, body))
37 }
38}
39
40pub struct ReqwestClientDownloader {
42 client: Client,
43 timeout: Duration,
44}
45
46#[async_trait]
47impl Downloader for ReqwestClientDownloader {
48 type Client = Client;
49
50 fn client(&self) -> &Self::Client {
52 &self.client
53 }
54
55 async fn download(&self, request: Request) -> Result<Response, SpiderError> {
56 info!(
57 "Downloading {} (fingerprint: {})",
58 request.url,
59 request.fingerprint()
60 );
61
62 let Request {
63 url,
64 method,
65 headers,
66 body,
67 meta,
68 ..
69 } = request;
70
71 let mut client_to_use = self.client.clone();
72
73 if let Some(proxy_val) = meta.get("proxy") && let Some(proxy_str) = proxy_val.as_str() {
74 match Proxy::all(proxy_str) {
75 Ok(proxy) => {
76 let new_client = Client::builder()
77 .timeout(self.timeout)
78 .proxy(proxy)
79 .build()
80 .map_err(|e| SpiderError::ReqwestError(e.into()))?;
81 client_to_use = new_client;
82 }
83 Err(e) => {
84 return Err(SpiderError::ReqwestError(e.into()));
85 }
86 }
87 }
88
89 let mut req_builder = client_to_use.request(method, url.clone());
90
91 if let Some(body_content) = body {
92 req_builder = match body_content {
93 Body::Json(json_val) => req_builder.json(&json_val),
94 Body::Form(form_val) => {
95 let mut form_map = std::collections::HashMap::new();
96 for entry in form_val.iter() {
97 form_map.insert(entry.key().clone(), entry.value().clone());
98 }
99 req_builder.form(&form_map)
100 },
101 Body::Bytes(bytes_val) => req_builder.body(bytes_val),
102 };
103 }
104
105 let res = req_builder.headers(headers).send().await?;
106
107 let response_url = res.url().clone();
108 let status = res.status();
109 let response_headers = res.headers().clone();
110 let response_body = res.bytes().await?;
111
112 Ok(Response {
113 url: response_url,
114 status,
115 headers: response_headers,
116 body: response_body,
117 request_url: url,
118 meta,
119 cached: false,
120 })
121 }
122}
123
124impl ReqwestClientDownloader {
125 pub fn new() -> Self {
127 Self::new_with_timeout(Duration::from_secs(30))
128 }
129
130 pub fn new_with_timeout(timeout: Duration) -> Self {
132 ReqwestClientDownloader {
133 client: Client::builder().timeout(timeout).build().unwrap(),
134 timeout,
135 }
136 }
137}
138
139impl Default for ReqwestClientDownloader {
140 fn default() -> Self {
141 Self::new()
142 }
143}