Skip to main content

spider_client/
lib.rs

1//! The `spider-client` module provides the primary interface and
2//! functionalities for the Spider web crawler library, which is
3//! designed for rapid and efficient crawling of web pages to gather
4//! links using isolated contexts.
5//!
6//! ### Features
7//!
8//! - **Multi-threaded Crawling:** Spider can utilize multiple
9//!   threads to parallelize the crawling process, drastically
10//!   improving performance and allowing the ability to gather
11//!   millions of pages in a short time.
12//!
13//! - **Configurable:** The library provides various options to
14//!   configure the crawling behavior, such as setting the depth
15//!   of crawling, user-agent strings, delays between requests,
16//!   and more.
17//!
18//! - **Link Gathering:** One of the primary objectives of Spider is to
19//!   gather and manage links from the web pages it crawls,
20//!   compiling them into a structured format for further use.
21//!
22//! ### Examples
23//!
24//! Basic usage of the Spider client might look like this:
25//!
26//! ```rust
27//! use spider_client::{Spider, RequestType, RequestParams};
28//! use tokio;
29//!
30//!  # #[ignore]
31//! #[tokio::main]
32//! async fn main() {
33//!     let spider = Spider::new(Some("myspiderapikey".into())).expect("API key must be provided");
34//!
35//!     let url = "https://spider.cloud";
36//!
37//!     // Scrape a single URL
38//!     let scraped_data = spider.scrape_url(url, None, "application/json").await.expect("Failed to scrape the URL");
39//!
40//!     println!("Scraped Data: {:?}", scraped_data);
41//!
42//!     // Crawl a website
43//!     let crawler_params = RequestParams {
44//!         limit: Some(1),
45//!         proxy_enabled: Some(true),
46//!         metadata: Some(false),
47//!         request: Some(RequestType::Http),
48//!         ..Default::default()
49//!     };
50//!
51//!     let crawl_result = spider.crawl_url(url, Some(crawler_params), false, "application/json", None::<fn(serde_json::Value)>).await.expect("Failed to crawl the URL");
52//!
53//!     println!("Crawl Result: {:?}", crawl_result);
54//! }
55//! ```
56//!
57//! ### Modules
58//!
59//! - `config`: Contains the configuration options for the Spider client.
60//! - `utils`: Utility functions used by the Spider client.
61//!
62
63pub mod shapes;
64
65use backon::ExponentialBuilder;
66use backon::Retryable;
67use reqwest::Client;
68use reqwest::{Error, Response};
69use serde::Serialize;
70pub use shapes::{request::*, response::*};
71use std::collections::HashMap;
72use std::sync::atomic::{AtomicU32, Ordering};
73use std::sync::OnceLock;
74use tokio_stream::StreamExt;
75
76/// Rate limit state from API response headers.
77/// Uses atomics so it can be updated from `&self`.
78#[derive(Debug, Default)]
79pub struct RateLimitInfo {
80    /// Maximum requests allowed per minute.
81    pub limit: AtomicU32,
82    /// Requests remaining in the current window.
83    pub remaining: AtomicU32,
84    /// Seconds until the rate limit window resets.
85    pub reset_seconds: AtomicU32,
86}
87
88impl RateLimitInfo {
89    /// Read the current rate limit snapshot.
90    pub fn snapshot(&self) -> (u32, u32, u32) {
91        (
92            self.limit.load(Ordering::Relaxed),
93            self.remaining.load(Ordering::Relaxed),
94            self.reset_seconds.load(Ordering::Relaxed),
95        )
96    }
97}
98
99static API_URL: OnceLock<String> = OnceLock::new();
100
101/// The API endpoint.
102pub fn get_api_url() -> &'static str {
103    API_URL.get_or_init(|| {
104        std::env::var("SPIDER_API_URL").unwrap_or_else(|_| "https://api.spider.cloud".to_string())
105    })
106}
107
108/// Represents a Spider with API key and HTTP client.
109#[derive(Debug, Default)]
110pub struct Spider {
111    /// The Spider API key.
112    pub api_key: String,
113    /// The Spider Client to re-use.
114    pub client: Client,
115    /// The latest rate limit info from API responses.
116    pub rate_limit: RateLimitInfo,
117}
118
119/// Handle the json response.
120pub async fn handle_json(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
121    res.json().await
122}
123
124/// Handle the jsonl response.
125pub async fn handle_jsonl(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
126    let text = res.text().await?;
127    let lines = text
128        .lines()
129        .filter_map(|line| serde_json::from_str::<serde_json::Value>(line).ok())
130        .collect::<Vec<_>>();
131    Ok(serde_json::Value::Array(lines))
132}
133
134/// Handle the CSV response.
135#[cfg(feature = "csv")]
136pub async fn handle_csv(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
137    use std::collections::HashMap;
138    let text = res.text().await?;
139    let mut rdr = csv::Reader::from_reader(text.as_bytes());
140    let records: Vec<HashMap<String, String>> = rdr.deserialize().filter_map(Result::ok).collect();
141
142    if let Ok(record) = serde_json::to_value(records) {
143        Ok(record)
144    } else {
145        Ok(serde_json::Value::String(text))
146    }
147}
148
149#[cfg(not(feature = "csv"))]
150pub async fn handle_csv(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
151    handle_text(res).await
152}
153
154/// Basic handle response to text
155pub async fn handle_text(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
156    Ok(serde_json::Value::String(
157        res.text().await.unwrap_or_default(),
158    ))
159}
160
161/// Handle the XML response.
162#[cfg(feature = "csv")]
163pub async fn handle_xml(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
164    let text = res.text().await?;
165    match quick_xml::de::from_str::<serde_json::Value>(&text) {
166        Ok(val) => Ok(val),
167        Err(_) => Ok(serde_json::Value::String(text)),
168    }
169}
170
171#[cfg(not(feature = "csv"))]
172/// Handle the XML response.
173pub async fn handle_xml(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
174    handle_text(res).await
175}
176
177pub async fn parse_response(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
178    let content_type = res
179        .headers()
180        .get(reqwest::header::CONTENT_TYPE)
181        .and_then(|v| v.to_str().ok())
182        .unwrap_or_default()
183        .to_ascii_lowercase();
184
185    if content_type.contains("json") && !content_type.contains("jsonl") {
186        handle_json(res).await
187    } else if content_type.contains("jsonl") || content_type.contains("ndjson") {
188        handle_jsonl(res).await
189    } else if content_type.contains("csv") {
190        handle_csv(res).await
191    } else if content_type.contains("xml") {
192        handle_xml(res).await
193    } else {
194        handle_text(res).await
195    }
196}
197
198impl Spider {
199    /// Creates a new instance of Spider.
200    ///
201    /// # Arguments
202    ///
203    /// * `api_key` - An optional API key. Defaults to using the 'SPIDER_API_KEY' env variable.
204    ///
205    /// # Returns
206    ///
207    /// A new instance of Spider or an error string if no API key is provided.
208    pub fn new(api_key: Option<String>) -> Result<Self, &'static str> {
209        let api_key = api_key.or_else(|| std::env::var("SPIDER_API_KEY").ok());
210
211        match api_key {
212            Some(key) => Ok(Self {
213                api_key: key,
214                client: Client::new(),
215                rate_limit: RateLimitInfo::default(),
216            }),
217            None => Err("No API key provided"),
218        }
219    }
220
221    /// Creates a new instance of Spider.
222    ///
223    /// # Arguments
224    ///
225    /// * `api_key` - An optional API key. Defaults to using the 'SPIDER_API_KEY' env variable.
226    /// * `client` - A custom client to pass in.
227    ///
228    /// # Returns
229    ///
230    /// A new instance of Spider or an error string if no API key is provided.
231    pub fn new_with_client(api_key: Option<String>, client: Client) -> Result<Self, &'static str> {
232        let api_key = api_key.or_else(|| std::env::var("SPIDER_API_KEY").ok());
233
234        match api_key {
235            Some(key) => Ok(Self {
236                api_key: key,
237                client,
238                rate_limit: RateLimitInfo::default(),
239            }),
240            None => Err("No API key provided"),
241        }
242    }
243
244    /// Update rate limit state from response headers.
245    fn update_rate_limit(&self, headers: &reqwest::header::HeaderMap) {
246        if let Some(v) = headers.get("RateLimit-Limit").and_then(|v| v.to_str().ok()) {
247            if let Ok(n) = v.parse::<u32>() {
248                self.rate_limit.limit.store(n, Ordering::Relaxed);
249            }
250        }
251        if let Some(v) = headers
252            .get("RateLimit-Remaining")
253            .and_then(|v| v.to_str().ok())
254        {
255            if let Ok(n) = v.parse::<u32>() {
256                self.rate_limit.remaining.store(n, Ordering::Relaxed);
257            }
258        }
259        if let Some(v) = headers
260            .get("RateLimit-Reset")
261            .and_then(|v| v.to_str().ok())
262        {
263            if let Ok(n) = v.parse::<u32>() {
264                self.rate_limit.reset_seconds.store(n, Ordering::Relaxed);
265            }
266        }
267    }
268
269    /// Sends a POST request to the API.
270    ///
271    /// # Arguments
272    ///
273    /// * `endpoint` - The API endpoint.
274    /// * `data` - The request data as a HashMap.
275    /// * `stream` - Whether streaming is enabled.
276    /// * `content_type` - The content type of the request.
277    ///
278    /// # Returns
279    ///
280    /// The response from the API.
281    async fn api_post_base(
282        &self,
283        endpoint: &str,
284        data: impl Serialize + Sized + std::fmt::Debug,
285        content_type: &str,
286    ) -> Result<Response, Error> {
287        let url: String = format!("{}/{}", get_api_url(), endpoint);
288
289        let resp = self
290            .client
291            .post(&url)
292            .header(
293                "User-Agent",
294                format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
295            )
296            .header("Content-Type", content_type)
297            .header("Authorization", format!("Bearer {}", self.api_key))
298            .json(&data)
299            .send()
300            .await?;
301
302        self.update_rate_limit(resp.headers());
303
304        if resp.status() == reqwest::StatusCode::TOO_MANY_REQUESTS {
305            let retry_after = resp
306                .headers()
307                .get("Retry-After")
308                .and_then(|v| v.to_str().ok())
309                .and_then(|v| v.parse::<u64>().ok())
310                .unwrap_or(1);
311            tokio::time::sleep(std::time::Duration::from_secs(retry_after)).await;
312            return resp.error_for_status();
313        }
314
315        Ok(resp)
316    }
317
318    /// Sends a POST request to the API.
319    ///
320    /// # Arguments
321    ///
322    /// * `endpoint` - The API endpoint.
323    /// * `data` - The request data as a HashMap.
324    /// * `stream` - Whether streaming is enabled.
325    /// * `content_type` - The content type of the request.
326    ///
327    /// # Returns
328    ///
329    /// The response from the API.
330    pub async fn api_post(
331        &self,
332        endpoint: &str,
333        data: impl Serialize + std::fmt::Debug + Clone + Send + Sync,
334        content_type: &str,
335    ) -> Result<Response, Error> {
336        let fetch = || async {
337            self.api_post_base(endpoint, data.to_owned(), content_type)
338                .await
339        };
340
341        fetch
342            .retry(ExponentialBuilder::default().with_max_times(5))
343            .when(|err: &reqwest::Error| {
344                if let Some(status) = err.status() {
345                    status.is_server_error()
346                        || status == reqwest::StatusCode::TOO_MANY_REQUESTS
347                } else {
348                    err.is_timeout()
349                }
350            })
351            .await
352    }
353
354    /// Sends a GET request to the API.
355    ///
356    /// # Arguments
357    ///
358    /// * `endpoint` - The API endpoint.
359    ///
360    /// # Returns
361    ///
362    /// The response from the API as a JSON value.
363    async fn api_get_base<T: Serialize>(
364        &self,
365        endpoint: &str,
366        query_params: Option<&T>,
367    ) -> Result<serde_json::Value, reqwest::Error> {
368        let url = format!("{}/{}", get_api_url(), endpoint);
369        let res = self
370            .client
371            .get(&url)
372            .query(&query_params)
373            .header(
374                "User-Agent",
375                format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
376            )
377            .header("Content-Type", "application/json")
378            .header("Authorization", format!("Bearer {}", self.api_key))
379            .send()
380            .await?;
381
382        self.update_rate_limit(res.headers());
383
384        if res.status() == reqwest::StatusCode::TOO_MANY_REQUESTS {
385            let retry_after = res
386                .headers()
387                .get("Retry-After")
388                .and_then(|v| v.to_str().ok())
389                .and_then(|v| v.parse::<u64>().ok())
390                .unwrap_or(1);
391            tokio::time::sleep(std::time::Duration::from_secs(retry_after)).await;
392            return Err(res.error_for_status().unwrap_err());
393        }
394
395        parse_response(res).await
396    }
397
398    /// Sends a GET request to the API.
399    ///
400    /// # Arguments
401    ///
402    /// * `endpoint` - The API endpoint.
403    ///
404    /// # Returns
405    ///
406    /// The response from the API as a JSON value.
407    pub async fn api_get<T: Serialize>(
408        &self,
409        endpoint: &str,
410        query_params: Option<&T>,
411    ) -> Result<serde_json::Value, reqwest::Error> {
412        let fetch = || async { self.api_get_base(endpoint, query_params.to_owned()).await };
413
414        fetch
415            .retry(ExponentialBuilder::default().with_max_times(5))
416            .when(|err: &reqwest::Error| {
417                if let Some(status) = err.status() {
418                    status.is_server_error()
419                        || status == reqwest::StatusCode::TOO_MANY_REQUESTS
420                } else {
421                    err.is_timeout()
422                }
423            })
424            .await
425    }
426
427    /// Sends a DELETE request to the API.
428    ///
429    /// # Arguments
430    ///
431    /// * `endpoint` - The API endpoint.
432    /// * `params` - Optional request parameters.
433    /// * `stream` - Whether streaming is enabled.
434    /// * `content_type` - The content type of the request.
435    ///
436    /// # Returns
437    ///
438    /// The response from the API.
439    async fn api_delete_base(
440        &self,
441        endpoint: &str,
442        params: Option<HashMap<String, serde_json::Value>>,
443    ) -> Result<Response, Error> {
444        let url = format!("{}/v1/{}", get_api_url(), endpoint);
445        let request_builder = self
446            .client
447            .delete(&url)
448            .header(
449                "User-Agent",
450                format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
451            )
452            .header("Content-Type", "application/json")
453            .header("Authorization", format!("Bearer {}", self.api_key));
454
455        let request_builder = if let Some(params) = params {
456            request_builder.json(&params)
457        } else {
458            request_builder
459        };
460
461        request_builder.send().await
462    }
463
464    /// Sends a DELETE request to the API.
465    ///
466    /// # Arguments
467    ///
468    /// * `endpoint` - The API endpoint.
469    /// * `params` - Optional request parameters.
470    /// * `stream` - Whether streaming is enabled.
471    /// * `content_type` - The content type of the request.
472    ///
473    /// # Returns
474    ///
475    /// The response from the API.
476    pub async fn api_delete(
477        &self,
478        endpoint: &str,
479        params: Option<HashMap<String, serde_json::Value>>,
480    ) -> Result<Response, Error> {
481        let fetch = || async { self.api_delete_base(endpoint, params.to_owned()).await };
482
483        fetch
484            .retry(ExponentialBuilder::default().with_max_times(5))
485            .when(|err: &reqwest::Error| {
486                if let Some(status) = err.status() {
487                    status.is_server_error()
488                } else {
489                    err.is_timeout()
490                }
491            })
492            .await
493    }
494
495    /// Scrapes a URL.
496    ///
497    /// # Arguments
498    ///
499    /// * `url` - The URL to scrape.
500    /// * `params` - Optional request parameters.
501    /// * `stream` - Whether streaming is enabled.
502    /// * `content_type` - The content type of the request.
503    ///
504    /// # Returns
505    ///
506    /// The response from the API as a JSON value.
507    pub async fn scrape_url(
508        &self,
509        url: &str,
510        params: Option<RequestParams>,
511        content_type: &str,
512    ) -> Result<serde_json::Value, reqwest::Error> {
513        let mut data = HashMap::new();
514
515        if let Ok(params) = serde_json::to_value(params) {
516            if let Some(ref p) = params.as_object() {
517                data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
518            }
519        }
520
521        if !url.is_empty() {
522            data.insert(
523                "url".to_string(),
524                serde_json::Value::String(url.to_string()),
525            );
526        }
527
528        let res = self.api_post("scrape", data, content_type).await?;
529        parse_response(res).await
530    }
531
532    /// Scrapes multi URLs.
533    ///
534    /// # Arguments
535    ///
536    /// * `url` - The URL to scrape.
537    /// * `params` - Optional request parameters.
538    /// * `stream` - Whether streaming is enabled.
539    /// * `content_type` - The content type of the request.
540    ///
541    /// # Returns
542    ///
543    /// The response from the API as a JSON value.
544    pub async fn multi_scrape_url(
545        &self,
546        params: Option<Vec<RequestParams>>,
547        content_type: &str,
548    ) -> Result<serde_json::Value, reqwest::Error> {
549        let mut data = HashMap::new();
550
551        if let Ok(mut params) = serde_json::to_value(params) {
552            if let Some(obj) = params.as_object_mut() {
553                obj.insert("limit".to_string(), serde_json::Value::Number(1.into()));
554                data.extend(obj.iter().map(|(k, v)| (k.clone(), v.clone())));
555            }
556        }
557        let res = self.api_post("scrape", data, content_type).await?;
558        parse_response(res).await
559    }
560
561    /// Crawls a URL.
562    ///
563    /// # Arguments
564    ///
565    /// * `url` - The URL to crawl.
566    /// * `params` - Optional request parameters.
567    /// * `stream` - Whether streaming is enabled.
568    /// * `content_type` - The content type of the request.
569    /// * `callback` - Optional callback function to handle each streamed chunk.
570    ///
571    /// # Returns
572    ///
573    /// The response from the API as a JSON value.
574    pub async fn crawl_url(
575        &self,
576        url: &str,
577        params: Option<RequestParams>,
578        stream: bool,
579        content_type: &str,
580        callback: Option<impl Fn(serde_json::Value) + Send>,
581    ) -> Result<serde_json::Value, reqwest::Error> {
582        use tokio_util::codec::{FramedRead, LinesCodec};
583
584        let mut data = HashMap::new();
585
586        if let Ok(params) = serde_json::to_value(params) {
587            if let Some(ref p) = params.as_object() {
588                data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
589            }
590        }
591
592        data.insert("url".into(), serde_json::Value::String(url.to_string()));
593
594        let res = self.api_post("crawl", data, content_type).await?;
595
596        if stream {
597            if let Some(callback) = callback {
598                let stream = res.bytes_stream();
599
600                let stream_reader = tokio_util::io::StreamReader::new(
601                    stream
602                        .map(|r| r.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))),
603                );
604
605                let mut lines = FramedRead::new(stream_reader, LinesCodec::new());
606
607                while let Some(line_result) = lines.next().await {
608                    match line_result {
609                        Ok(line) => match serde_json::from_str::<serde_json::Value>(&line) {
610                            Ok(value) => {
611                                callback(value);
612                            }
613                            Err(_e) => {
614                                continue;
615                            }
616                        },
617                        Err(_e) => return Ok(serde_json::Value::Null),
618                    }
619                }
620
621                Ok(serde_json::Value::Null)
622            } else {
623                Ok(serde_json::Value::Null)
624            }
625        } else {
626            parse_response(res).await
627        }
628    }
629
630    /// Crawls multiple URLs.
631    ///
632    /// # Arguments
633    ///
634    /// * `url` - The URL to crawl.
635    /// * `params` - Optional request parameters.
636    /// * `stream` - Whether streaming is enabled.
637    /// * `content_type` - The content type of the request.
638    /// * `callback` - Optional callback function to handle each streamed chunk.
639    ///
640    /// # Returns
641    ///
642    /// The response from the API as a JSON value.
643    pub async fn multi_crawl_url(
644        &self,
645        params: Option<Vec<RequestParams>>,
646        stream: bool,
647        content_type: &str,
648        callback: Option<impl Fn(serde_json::Value) + Send>,
649    ) -> Result<serde_json::Value, reqwest::Error> {
650        use tokio_util::codec::{FramedRead, LinesCodec};
651
652        let res = self.api_post("crawl", params, content_type).await?;
653
654        if stream {
655            if let Some(callback) = callback {
656                let stream = res.bytes_stream();
657
658                let stream_reader = tokio_util::io::StreamReader::new(
659                    stream
660                        .map(|r| r.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))),
661                );
662
663                let mut lines = FramedRead::new(stream_reader, LinesCodec::new());
664
665                while let Some(line_result) = lines.next().await {
666                    match line_result {
667                        Ok(line) => match serde_json::from_str::<serde_json::Value>(&line) {
668                            Ok(value) => {
669                                callback(value);
670                            }
671                            Err(_e) => {
672                                continue;
673                            }
674                        },
675                        Err(_e) => return Ok(serde_json::Value::Null),
676                    }
677                }
678
679                Ok(serde_json::Value::Null)
680            } else {
681                Ok(serde_json::Value::Null)
682            }
683        } else {
684            parse_response(res).await
685        }
686    }
687
688    /// Fetches links from a URL.
689    ///
690    /// # Arguments
691    ///
692    /// * `url` - The URL to fetch links from.
693    /// * `params` - Optional request parameters.
694    /// * `stream` - Whether streaming is enabled.
695    /// * `content_type` - The content type of the request.
696    ///
697    /// # Returns
698    ///
699    /// The response from the API as a JSON value.
700    pub async fn links(
701        &self,
702        url: &str,
703        params: Option<RequestParams>,
704        _stream: bool,
705        content_type: &str,
706    ) -> Result<serde_json::Value, reqwest::Error> {
707        let mut data = HashMap::new();
708
709        if let Ok(params) = serde_json::to_value(params) {
710            if let Some(ref p) = params.as_object() {
711                data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
712            }
713        }
714
715        data.insert("url".into(), serde_json::Value::String(url.to_string()));
716
717        let res = self.api_post("links", data, content_type).await?;
718        parse_response(res).await
719    }
720
721    /// Fetches links from a URLs.
722    ///
723    /// # Arguments
724    ///
725    /// * `url` - The URL to fetch links from.
726    /// * `params` - Optional request parameters.
727    /// * `stream` - Whether streaming is enabled.
728    /// * `content_type` - The content type of the request.
729    ///
730    /// # Returns
731    ///
732    /// The response from the API as a JSON value.
733    pub async fn multi_links(
734        &self,
735        params: Option<Vec<RequestParams>>,
736        _stream: bool,
737        content_type: &str,
738    ) -> Result<serde_json::Value, reqwest::Error> {
739        let res = self.api_post("links", params, content_type).await?;
740        parse_response(res).await
741    }
742
743    /// Takes a screenshot of a URL.
744    ///
745    /// # Arguments
746    ///
747    /// * `url` - The URL to take a screenshot of.
748    /// * `params` - Optional request parameters.
749    /// * `stream` - Whether streaming is enabled.
750    /// * `content_type` - The content type of the request.
751    ///
752    /// # Returns
753    ///
754    /// The response from the API as a JSON value.
755    pub async fn screenshot(
756        &self,
757        url: &str,
758        params: Option<RequestParams>,
759        _stream: bool,
760        content_type: &str,
761    ) -> Result<serde_json::Value, reqwest::Error> {
762        let mut data = HashMap::new();
763
764        if let Ok(params) = serde_json::to_value(params) {
765            if let Some(ref p) = params.as_object() {
766                data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
767            }
768        }
769
770        data.insert("url".into(), serde_json::Value::String(url.to_string()));
771
772        let res = self.api_post("screenshot", data, content_type).await?;
773        parse_response(res).await
774    }
775
776    /// Takes a screenshot of multiple URLs.
777    ///
778    /// # Arguments
779    ///
780    /// * `url` - The URL to take a screenshot of.
781    /// * `params` - Optional request parameters.
782    /// * `stream` - Whether streaming is enabled.
783    /// * `content_type` - The content type of the request.
784    ///
785    /// # Returns
786    ///
787    /// The response from the API as a JSON value.
788    pub async fn multi_screenshot(
789        &self,
790        params: Option<Vec<RequestParams>>,
791        _stream: bool,
792        content_type: &str,
793    ) -> Result<serde_json::Value, reqwest::Error> {
794        let res = self.api_post("screenshot", params, content_type).await?;
795        parse_response(res).await
796    }
797
798    /// Searches for a query.
799    ///
800    /// # Arguments
801    ///
802    /// * `q` - The query to search for.
803    /// * `params` - Optional request parameters.
804    /// * `stream` - Whether streaming is enabled.
805    /// * `content_type` - The content type of the request.
806    ///
807    /// # Returns
808    ///
809    /// The response from the API as a JSON value.
810    pub async fn search(
811        &self,
812        q: &str,
813        params: Option<SearchRequestParams>,
814        _stream: bool,
815        content_type: &str,
816    ) -> Result<serde_json::Value, reqwest::Error> {
817        let body = match params {
818            Some(mut params) => {
819                params.search = q.to_string();
820                params
821            }
822            _ => {
823                let mut params = SearchRequestParams::default();
824                params.search = q.to_string();
825                params
826            }
827        };
828
829        let res = self.api_post("search", body, content_type).await?;
830
831        parse_response(res).await
832    }
833
834    /// Searches for multiple querys.
835    ///
836    /// # Arguments
837    ///
838    /// * `q` - The query to search for.
839    /// * `params` - Optional request parameters.
840    /// * `stream` - Whether streaming is enabled.
841    /// * `content_type` - The content type of the request.
842    ///
843    /// # Returns
844    ///
845    /// The response from the API as a JSON value.
846    pub async fn multi_search(
847        &self,
848        params: Option<Vec<SearchRequestParams>>,
849        content_type: &str,
850    ) -> Result<serde_json::Value, reqwest::Error> {
851        let res = self.api_post("search", params, content_type).await?;
852        parse_response(res).await
853    }
854
855    /// Unblock a URL.
856    ///
857    /// # Arguments
858    ///
859    /// * `url` - The URL to scrape.
860    /// * `params` - Optional request parameters.
861    /// * `stream` - Whether streaming is enabled.
862    /// * `content_type` - The content type of the request.
863    ///
864    /// # Returns
865    ///
866    /// The response from the API as a JSON value.
867    pub async fn unblock_url(
868        &self,
869        url: &str,
870        params: Option<RequestParams>,
871        content_type: &str,
872    ) -> Result<serde_json::Value, reqwest::Error> {
873        let mut data = HashMap::new();
874
875        if let Ok(params) = serde_json::to_value(params) {
876            if let Some(ref p) = params.as_object() {
877                data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
878            }
879        }
880
881        if !url.is_empty() {
882            data.insert(
883                "url".to_string(),
884                serde_json::Value::String(url.to_string()),
885            );
886        }
887
888        let res = self.api_post("unblocker", data, content_type).await?;
889        parse_response(res).await
890    }
891
892    /// Unblock multi URLs.
893    ///
894    /// # Arguments
895    ///
896    /// * `url` - The URL to scrape.
897    /// * `params` - Optional request parameters.
898    /// * `stream` - Whether streaming is enabled.
899    /// * `content_type` - The content type of the request.
900    ///
901    /// # Returns
902    ///
903    /// The response from the API as a JSON value.
904    pub async fn multi_unblock_url(
905        &self,
906        params: Option<Vec<RequestParams>>,
907        content_type: &str,
908    ) -> Result<serde_json::Value, reqwest::Error> {
909        let mut data = HashMap::new();
910
911        if let Ok(mut params) = serde_json::to_value(params) {
912            if let Some(obj) = params.as_object_mut() {
913                obj.insert("limit".to_string(), serde_json::Value::Number(1.into()));
914                data.extend(obj.iter().map(|(k, v)| (k.clone(), v.clone())));
915            }
916        }
917        let res = self.api_post("unblocker", data, content_type).await?;
918        parse_response(res).await
919    }
920
921    /// Transforms data.
922    ///
923    /// # Arguments
924    ///
925    /// * `data` - The data to transform.
926    /// * `params` - Optional request parameters.
927    /// * `stream` - Whether streaming is enabled.
928    /// * `content_type` - The content type of the request.
929    ///
930    /// # Returns
931    ///
932    /// The response from the API as a JSON value.
933    pub async fn transform(
934        &self,
935        data: Vec<HashMap<&str, &str>>,
936        params: Option<TransformParams>,
937        _stream: bool,
938        content_type: &str,
939    ) -> Result<serde_json::Value, reqwest::Error> {
940        let mut payload = HashMap::new();
941
942        if let Ok(params) = serde_json::to_value(params) {
943            if let Some(ref p) = params.as_object() {
944                payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
945            }
946        }
947
948        if let Ok(d) = serde_json::to_value(data) {
949            payload.insert("data".into(), d);
950        }
951
952        let res = self.api_post("transform", payload, content_type).await?;
953
954        parse_response(res).await
955    }
956
957    /// Get the account credits left.
958    pub async fn get_credits(&self) -> Result<serde_json::Value, reqwest::Error> {
959        self.api_get::<serde_json::Value>("data/credits", None)
960            .await
961    }
962
963    /// Send a request for a data record.
964    pub async fn data_post(
965        &self,
966        table: &str,
967        data: Option<RequestParams>,
968    ) -> Result<serde_json::Value, reqwest::Error> {
969        let res = self
970            .api_post(&format!("data/{}", table), data, "application/json")
971            .await?;
972        parse_response(res).await
973    }
974
975    /// Get a table record.
976    pub async fn data_get(
977        &self,
978        table: &str,
979        params: Option<RequestParams>,
980    ) -> Result<serde_json::Value, reqwest::Error> {
981        let mut payload = HashMap::new();
982
983        if let Some(params) = params {
984            if let Ok(p) = serde_json::to_value(params) {
985                if let Some(o) = p.as_object() {
986                    payload.extend(o.iter().map(|(k, v)| (k.as_str(), v.clone())));
987                }
988            }
989        }
990
991        let res = self
992            .api_get::<serde_json::Value>(&format!("data/{}", table), None)
993            .await?;
994        Ok(res)
995    }
996}
997
998#[cfg(test)]
999mod tests {
1000    use super::*;
1001    use dotenv::dotenv;
1002    use lazy_static::lazy_static;
1003    use reqwest::ClientBuilder;
1004
1005    lazy_static! {
1006        static ref SPIDER_CLIENT: Spider = {
1007            dotenv().ok();
1008            let client = ClientBuilder::new();
1009            let client = client.user_agent("SpiderBot").build().unwrap();
1010
1011            Spider::new_with_client(None, client).expect("client to build")
1012        };
1013    }
1014
1015    #[tokio::test]
1016    #[ignore]
1017    async fn test_scrape_url() {
1018        let response = SPIDER_CLIENT
1019            .scrape_url("https://example.com", None, "application/json")
1020            .await;
1021        assert!(response.is_ok());
1022    }
1023
1024    #[tokio::test]
1025    async fn test_crawl_url() {
1026        let response = SPIDER_CLIENT
1027            .crawl_url(
1028                "https://example.com",
1029                None,
1030                false,
1031                "application/json",
1032                None::<fn(serde_json::Value)>,
1033            )
1034            .await;
1035        assert!(response.is_ok());
1036    }
1037
1038    #[tokio::test]
1039    #[ignore]
1040    async fn test_links() {
1041        let response: Result<serde_json::Value, Error> = SPIDER_CLIENT
1042            .links("https://example.com", None, false, "application/json")
1043            .await;
1044        assert!(response.is_ok());
1045    }
1046
1047    #[tokio::test]
1048    #[ignore]
1049    async fn test_screenshot() {
1050        let mut params = RequestParams::default();
1051        params.limit = Some(1);
1052
1053        let response = SPIDER_CLIENT
1054            .screenshot(
1055                "https://example.com",
1056                Some(params),
1057                false,
1058                "application/json",
1059            )
1060            .await;
1061        assert!(response.is_ok());
1062    }
1063
1064    // #[tokio::test(flavor = "multi_thread")]
1065    // async fn test_search() {
1066    //     let mut params = SearchRequestParams::default();
1067
1068    //     params.search_limit = Some(1);
1069    //     params.num = Some(1);
1070    //     params.fetch_page_content = Some(false);
1071
1072    //     let response = SPIDER_CLIENT
1073    //         .search("a sports website", Some(params), false, "application/json")
1074    //         .await;
1075
1076    //     assert!(response.is_ok());
1077    // }
1078
1079    #[tokio::test]
1080    #[ignore]
1081    async fn test_transform() {
1082        let data = vec![HashMap::from([(
1083            "<html><body><h1>Transformation</h1></body></html>".into(),
1084            "".into(),
1085        )])];
1086        let response = SPIDER_CLIENT
1087            .transform(data, None, false, "application/json")
1088            .await;
1089        assert!(response.is_ok());
1090    }
1091
1092    #[tokio::test]
1093    async fn test_get_credits() {
1094        let response = SPIDER_CLIENT.get_credits().await;
1095        assert!(response.is_ok());
1096    }
1097}