spider_client/
lib.rs

1//! The `spider-client` module provides the primary interface and
2//! functionalities for the Spider web crawler library, which is
3//! designed for rapid and efficient crawling of web pages to gather
4//! links using isolated contexts.
5//!
6//! ### Features
7//!
8//! - **Multi-threaded Crawling:** Spider can utilize multiple
9//!   threads to parallelize the crawling process, drastically
10//!   improving performance and allowing the ability to gather
11//!   millions of pages in a short time.
12//!
13//! - **Configurable:** The library provides various options to
14//!   configure the crawling behavior, such as setting the depth
15//!   of crawling, user-agent strings, delays between requests,
16//!   and more.
17//!
18//! - **Link Gathering:** One of the primary objectives of Spider is to
19//!   gather and manage links from the web pages it crawls,
20//!   compiling them into a structured format for further use.
21//!
22//! ### Examples
23//!
24//! Basic usage of the Spider client might look like this:
25//!
26//! ```rust
27//! use spider_client::{Spider, RequestType, RequestParams};
28//! use tokio;
29//!
30//!  # #[ignore]
31//! #[tokio::main]
32//! async fn main() {
33//!     let spider = Spider::new(Some("myspiderapikey".into())).expect("API key must be provided");
34//!
35//!     let url = "https://spider.cloud";
36//!
37//!     // Scrape a single URL
38//!     let scraped_data = spider.scrape_url(url, None, "application/json").await.expect("Failed to scrape the URL");
39//!
40//!     println!("Scraped Data: {:?}", scraped_data);
41//!
42//!     // Crawl a website
43//!     let crawler_params = RequestParams {
44//!         limit: Some(1),
45//!         proxy_enabled: Some(true),
46//!         metadata: Some(false),
47//!         request: Some(RequestType::Http),
48//!         ..Default::default()
49//!     };
50//!
51//!     let crawl_result = spider.crawl_url(url, Some(crawler_params), false, "application/json", None::<fn(serde_json::Value)>).await.expect("Failed to crawl the URL");
52//!
53//!     println!("Crawl Result: {:?}", crawl_result);
54//! }
55//! ```
56//!
57//! ### Modules
58//!
59//! - `config`: Contains the configuration options for the Spider client.
60//! - `utils`: Utility functions used by the Spider client.
61//!
62
63pub mod shapes;
64
65use backon::ExponentialBuilder;
66use backon::Retryable;
67use reqwest::Client;
68use reqwest::{Error, Response};
69use serde::Serialize;
70pub use shapes::{request::*, response::*};
71use std::collections::HashMap;
72use std::sync::OnceLock;
73use tokio_stream::StreamExt;
74
75static API_URL: OnceLock<String> = OnceLock::new();
76
77/// The API endpoint.
78pub fn get_api_url() -> &'static str {
79    API_URL.get_or_init(|| {
80        std::env::var("SPIDER_API_URL").unwrap_or_else(|_| "https://api.spider.cloud".to_string())
81    })
82}
83
84/// Represents a Spider with API key and HTTP client.
85#[derive(Debug, Default)]
86pub struct Spider {
87    /// The Spider API key.
88    pub api_key: String,
89    /// The Spider Client to re-use.
90    pub client: Client,
91}
92
93/// Handle the json response.
94pub async fn handle_json(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
95    res.json().await
96}
97
98/// Handle the jsonl response.
99pub async fn handle_jsonl(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
100    let text = res.text().await?;
101    let lines = text
102        .lines()
103        .filter_map(|line| serde_json::from_str::<serde_json::Value>(line).ok())
104        .collect::<Vec<_>>();
105    Ok(serde_json::Value::Array(lines))
106}
107
108/// Handle the CSV response.
109#[cfg(feature = "csv")]
110pub async fn handle_csv(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
111    use std::collections::HashMap;
112    let text = res.text().await?;
113    let mut rdr = csv::Reader::from_reader(text.as_bytes());
114    let records: Vec<HashMap<String, String>> = rdr.deserialize().filter_map(Result::ok).collect();
115
116    if let Ok(record) = serde_json::to_value(records) {
117        Ok(record)
118    } else {
119        Ok(serde_json::Value::String(text))
120    }
121}
122
123#[cfg(not(feature = "csv"))]
124pub async fn handle_csv(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
125    handle_text(res).await
126}
127
128/// Basic handle response to text
129pub async fn handle_text(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
130    Ok(serde_json::Value::String(
131        res.text().await.unwrap_or_default(),
132    ))
133}
134
135/// Handle the XML response.
136#[cfg(feature = "csv")]
137pub async fn handle_xml(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
138    let text = res.text().await?;
139    match quick_xml::de::from_str::<serde_json::Value>(&text) {
140        Ok(val) => Ok(val),
141        Err(_) => Ok(serde_json::Value::String(text)),
142    }
143}
144
145#[cfg(not(feature = "csv"))]
146/// Handle the XML response.
147pub async fn handle_xml(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
148    handle_text(res).await
149}
150
151pub async fn parse_response(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
152    let content_type = res
153        .headers()
154        .get(reqwest::header::CONTENT_TYPE)
155        .and_then(|v| v.to_str().ok())
156        .unwrap_or_default()
157        .to_ascii_lowercase();
158
159    if content_type.contains("json") && !content_type.contains("jsonl") {
160        handle_json(res).await
161    } else if content_type.contains("jsonl") || content_type.contains("ndjson") {
162        handle_jsonl(res).await
163    } else if content_type.contains("csv") {
164        handle_csv(res).await
165    } else if content_type.contains("xml") {
166        handle_xml(res).await
167    } else {
168        handle_text(res).await
169    }
170}
171
172impl Spider {
173    /// Creates a new instance of Spider.
174    ///
175    /// # Arguments
176    ///
177    /// * `api_key` - An optional API key. Defaults to using the 'SPIDER_API_KEY' env variable.
178    ///
179    /// # Returns
180    ///
181    /// A new instance of Spider or an error string if no API key is provided.
182    pub fn new(api_key: Option<String>) -> Result<Self, &'static str> {
183        let api_key = api_key.or_else(|| std::env::var("SPIDER_API_KEY").ok());
184
185        match api_key {
186            Some(key) => Ok(Self {
187                api_key: key,
188                client: Client::new(),
189            }),
190            None => Err("No API key provided"),
191        }
192    }
193
194    /// Creates a new instance of Spider.
195    ///
196    /// # Arguments
197    ///
198    /// * `api_key` - An optional API key. Defaults to using the 'SPIDER_API_KEY' env variable.
199    /// * `client` - A custom client to pass in.
200    ///
201    /// # Returns
202    ///
203    /// A new instance of Spider or an error string if no API key is provided.
204    pub fn new_with_client(api_key: Option<String>, client: Client) -> Result<Self, &'static str> {
205        let api_key = api_key.or_else(|| std::env::var("SPIDER_API_KEY").ok());
206
207        match api_key {
208            Some(key) => Ok(Self {
209                api_key: key,
210                client,
211            }),
212            None => Err("No API key provided"),
213        }
214    }
215
216    /// Sends a POST request to the API.
217    ///
218    /// # Arguments
219    ///
220    /// * `endpoint` - The API endpoint.
221    /// * `data` - The request data as a HashMap.
222    /// * `stream` - Whether streaming is enabled.
223    /// * `content_type` - The content type of the request.
224    ///
225    /// # Returns
226    ///
227    /// The response from the API.
228    async fn api_post_base(
229        &self,
230        endpoint: &str,
231        data: impl Serialize + Sized + std::fmt::Debug,
232        content_type: &str,
233    ) -> Result<Response, Error> {
234        let url: String = format!("{}/{}", get_api_url(), endpoint);
235
236        self.client
237            .post(&url)
238            .header(
239                "User-Agent",
240                format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
241            )
242            .header("Content-Type", content_type)
243            .header("Authorization", format!("Bearer {}", self.api_key))
244            .json(&data)
245            .send()
246            .await
247    }
248
249    /// Sends a POST request to the API.
250    ///
251    /// # Arguments
252    ///
253    /// * `endpoint` - The API endpoint.
254    /// * `data` - The request data as a HashMap.
255    /// * `stream` - Whether streaming is enabled.
256    /// * `content_type` - The content type of the request.
257    ///
258    /// # Returns
259    ///
260    /// The response from the API.
261    pub async fn api_post(
262        &self,
263        endpoint: &str,
264        data: impl Serialize + std::fmt::Debug + Clone + Send + Sync,
265        content_type: &str,
266    ) -> Result<Response, Error> {
267        let fetch = || async {
268            self.api_post_base(endpoint, data.to_owned(), content_type)
269                .await
270        };
271
272        fetch
273            .retry(ExponentialBuilder::default().with_max_times(5))
274            .when(|err: &reqwest::Error| {
275                if let Some(status) = err.status() {
276                    status.is_server_error()
277                } else {
278                    err.is_timeout()
279                }
280            })
281            .await
282    }
283
284    /// Sends a GET request to the API.
285    ///
286    /// # Arguments
287    ///
288    /// * `endpoint` - The API endpoint.
289    ///
290    /// # Returns
291    ///
292    /// The response from the API as a JSON value.
293    async fn api_get_base<T: Serialize>(
294        &self,
295        endpoint: &str,
296        query_params: Option<&T>,
297    ) -> Result<serde_json::Value, reqwest::Error> {
298        let url = format!("{}/{}", get_api_url(), endpoint);
299        let res = self
300            .client
301            .get(&url)
302            .query(&query_params)
303            .header(
304                "User-Agent",
305                format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
306            )
307            .header("Content-Type", "application/json")
308            .header("Authorization", format!("Bearer {}", self.api_key))
309            .send()
310            .await?;
311        parse_response(res).await
312    }
313
314    /// Sends a GET request to the API.
315    ///
316    /// # Arguments
317    ///
318    /// * `endpoint` - The API endpoint.
319    ///
320    /// # Returns
321    ///
322    /// The response from the API as a JSON value.
323    pub async fn api_get<T: Serialize>(
324        &self,
325        endpoint: &str,
326        query_params: Option<&T>,
327    ) -> Result<serde_json::Value, reqwest::Error> {
328        let fetch = || async { self.api_get_base(endpoint, query_params.to_owned()).await };
329
330        fetch
331            .retry(ExponentialBuilder::default().with_max_times(5))
332            .when(|err: &reqwest::Error| {
333                if let Some(status) = err.status() {
334                    status.is_server_error()
335                } else {
336                    err.is_timeout()
337                }
338            })
339            .await
340    }
341
342    /// Sends a DELETE request to the API.
343    ///
344    /// # Arguments
345    ///
346    /// * `endpoint` - The API endpoint.
347    /// * `params` - Optional request parameters.
348    /// * `stream` - Whether streaming is enabled.
349    /// * `content_type` - The content type of the request.
350    ///
351    /// # Returns
352    ///
353    /// The response from the API.
354    async fn api_delete_base(
355        &self,
356        endpoint: &str,
357        params: Option<HashMap<String, serde_json::Value>>,
358    ) -> Result<Response, Error> {
359        let url = format!("{}/v1/{}", get_api_url(), endpoint);
360        let request_builder = self
361            .client
362            .delete(&url)
363            .header(
364                "User-Agent",
365                format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
366            )
367            .header("Content-Type", "application/json")
368            .header("Authorization", format!("Bearer {}", self.api_key));
369
370        let request_builder = if let Some(params) = params {
371            request_builder.json(&params)
372        } else {
373            request_builder
374        };
375
376        request_builder.send().await
377    }
378
379    /// Sends a DELETE request to the API.
380    ///
381    /// # Arguments
382    ///
383    /// * `endpoint` - The API endpoint.
384    /// * `params` - Optional request parameters.
385    /// * `stream` - Whether streaming is enabled.
386    /// * `content_type` - The content type of the request.
387    ///
388    /// # Returns
389    ///
390    /// The response from the API.
391    pub async fn api_delete(
392        &self,
393        endpoint: &str,
394        params: Option<HashMap<String, serde_json::Value>>,
395    ) -> Result<Response, Error> {
396        let fetch = || async { self.api_delete_base(endpoint, params.to_owned()).await };
397
398        fetch
399            .retry(ExponentialBuilder::default().with_max_times(5))
400            .when(|err: &reqwest::Error| {
401                if let Some(status) = err.status() {
402                    status.is_server_error()
403                } else {
404                    err.is_timeout()
405                }
406            })
407            .await
408    }
409
410    /// Scrapes a URL.
411    ///
412    /// # Arguments
413    ///
414    /// * `url` - The URL to scrape.
415    /// * `params` - Optional request parameters.
416    /// * `stream` - Whether streaming is enabled.
417    /// * `content_type` - The content type of the request.
418    ///
419    /// # Returns
420    ///
421    /// The response from the API as a JSON value.
422    pub async fn scrape_url(
423        &self,
424        url: &str,
425        params: Option<RequestParams>,
426        content_type: &str,
427    ) -> Result<serde_json::Value, reqwest::Error> {
428        let mut data = HashMap::new();
429
430        if let Ok(params) = serde_json::to_value(params) {
431            if let Some(ref p) = params.as_object() {
432                data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
433            }
434        }
435
436        if !url.is_empty() {
437            data.insert(
438                "url".to_string(),
439                serde_json::Value::String(url.to_string()),
440            );
441        }
442
443        let res = self.api_post("scrape", data, content_type).await?;
444        parse_response(res).await
445    }
446
447    /// Scrapes multi URLs.
448    ///
449    /// # Arguments
450    ///
451    /// * `url` - The URL to scrape.
452    /// * `params` - Optional request parameters.
453    /// * `stream` - Whether streaming is enabled.
454    /// * `content_type` - The content type of the request.
455    ///
456    /// # Returns
457    ///
458    /// The response from the API as a JSON value.
459    pub async fn multi_scrape_url(
460        &self,
461        params: Option<Vec<RequestParams>>,
462        content_type: &str,
463    ) -> Result<serde_json::Value, reqwest::Error> {
464        let mut data = HashMap::new();
465
466        if let Ok(mut params) = serde_json::to_value(params) {
467            if let Some(obj) = params.as_object_mut() {
468                obj.insert("limit".to_string(), serde_json::Value::Number(1.into()));
469                data.extend(obj.iter().map(|(k, v)| (k.clone(), v.clone())));
470            }
471        }
472        let res = self.api_post("scrape", data, content_type).await?;
473        parse_response(res).await
474    }
475
476    /// Crawls a URL.
477    ///
478    /// # Arguments
479    ///
480    /// * `url` - The URL to crawl.
481    /// * `params` - Optional request parameters.
482    /// * `stream` - Whether streaming is enabled.
483    /// * `content_type` - The content type of the request.
484    /// * `callback` - Optional callback function to handle each streamed chunk.
485    ///
486    /// # Returns
487    ///
488    /// The response from the API as a JSON value.
489    pub async fn crawl_url(
490        &self,
491        url: &str,
492        params: Option<RequestParams>,
493        stream: bool,
494        content_type: &str,
495        callback: Option<impl Fn(serde_json::Value) + Send>,
496    ) -> Result<serde_json::Value, reqwest::Error> {
497        use tokio_util::codec::{FramedRead, LinesCodec};
498
499        let mut data = HashMap::new();
500
501        if let Ok(params) = serde_json::to_value(params) {
502            if let Some(ref p) = params.as_object() {
503                data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
504            }
505        }
506
507        data.insert("url".into(), serde_json::Value::String(url.to_string()));
508
509        let res = self.api_post("crawl", data, content_type).await?;
510
511        if stream {
512            if let Some(callback) = callback {
513                let stream = res.bytes_stream();
514
515                let stream_reader = tokio_util::io::StreamReader::new(
516                    stream
517                        .map(|r| r.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))),
518                );
519
520                let mut lines = FramedRead::new(stream_reader, LinesCodec::new());
521
522                while let Some(line_result) = lines.next().await {
523                    match line_result {
524                        Ok(line) => match serde_json::from_str::<serde_json::Value>(&line) {
525                            Ok(value) => {
526                                callback(value);
527                            }
528                            Err(_e) => {
529                                continue;
530                            }
531                        },
532                        Err(_e) => return Ok(serde_json::Value::Null),
533                    }
534                }
535
536                Ok(serde_json::Value::Null)
537            } else {
538                Ok(serde_json::Value::Null)
539            }
540        } else {
541            parse_response(res).await
542        }
543    }
544
545    /// Crawls multiple URLs.
546    ///
547    /// # Arguments
548    ///
549    /// * `url` - The URL to crawl.
550    /// * `params` - Optional request parameters.
551    /// * `stream` - Whether streaming is enabled.
552    /// * `content_type` - The content type of the request.
553    /// * `callback` - Optional callback function to handle each streamed chunk.
554    ///
555    /// # Returns
556    ///
557    /// The response from the API as a JSON value.
558    pub async fn multi_crawl_url(
559        &self,
560        params: Option<Vec<RequestParams>>,
561        stream: bool,
562        content_type: &str,
563        callback: Option<impl Fn(serde_json::Value) + Send>,
564    ) -> Result<serde_json::Value, reqwest::Error> {
565        use tokio_util::codec::{FramedRead, LinesCodec};
566
567        let res = self.api_post("crawl", params, content_type).await?;
568
569        if stream {
570            if let Some(callback) = callback {
571                let stream = res.bytes_stream();
572
573                let stream_reader = tokio_util::io::StreamReader::new(
574                    stream
575                        .map(|r| r.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))),
576                );
577
578                let mut lines = FramedRead::new(stream_reader, LinesCodec::new());
579
580                while let Some(line_result) = lines.next().await {
581                    match line_result {
582                        Ok(line) => match serde_json::from_str::<serde_json::Value>(&line) {
583                            Ok(value) => {
584                                callback(value);
585                            }
586                            Err(_e) => {
587                                continue;
588                            }
589                        },
590                        Err(_e) => return Ok(serde_json::Value::Null),
591                    }
592                }
593
594                Ok(serde_json::Value::Null)
595            } else {
596                Ok(serde_json::Value::Null)
597            }
598        } else {
599            parse_response(res).await
600        }
601    }
602
603    /// Fetches links from a URL.
604    ///
605    /// # Arguments
606    ///
607    /// * `url` - The URL to fetch links from.
608    /// * `params` - Optional request parameters.
609    /// * `stream` - Whether streaming is enabled.
610    /// * `content_type` - The content type of the request.
611    ///
612    /// # Returns
613    ///
614    /// The response from the API as a JSON value.
615    pub async fn links(
616        &self,
617        url: &str,
618        params: Option<RequestParams>,
619        _stream: bool,
620        content_type: &str,
621    ) -> Result<serde_json::Value, reqwest::Error> {
622        let mut data = HashMap::new();
623
624        if let Ok(params) = serde_json::to_value(params) {
625            if let Some(ref p) = params.as_object() {
626                data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
627            }
628        }
629
630        data.insert("url".into(), serde_json::Value::String(url.to_string()));
631
632        let res = self.api_post("links", data, content_type).await?;
633        parse_response(res).await
634    }
635
636    /// Fetches links from a URLs.
637    ///
638    /// # Arguments
639    ///
640    /// * `url` - The URL to fetch links from.
641    /// * `params` - Optional request parameters.
642    /// * `stream` - Whether streaming is enabled.
643    /// * `content_type` - The content type of the request.
644    ///
645    /// # Returns
646    ///
647    /// The response from the API as a JSON value.
648    pub async fn multi_links(
649        &self,
650        params: Option<Vec<RequestParams>>,
651        _stream: bool,
652        content_type: &str,
653    ) -> Result<serde_json::Value, reqwest::Error> {
654        let res = self.api_post("links", params, content_type).await?;
655        parse_response(res).await
656    }
657
658    /// Takes a screenshot of a URL.
659    ///
660    /// # Arguments
661    ///
662    /// * `url` - The URL to take a screenshot of.
663    /// * `params` - Optional request parameters.
664    /// * `stream` - Whether streaming is enabled.
665    /// * `content_type` - The content type of the request.
666    ///
667    /// # Returns
668    ///
669    /// The response from the API as a JSON value.
670    pub async fn screenshot(
671        &self,
672        url: &str,
673        params: Option<RequestParams>,
674        _stream: bool,
675        content_type: &str,
676    ) -> Result<serde_json::Value, reqwest::Error> {
677        let mut data = HashMap::new();
678
679        if let Ok(params) = serde_json::to_value(params) {
680            if let Some(ref p) = params.as_object() {
681                data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
682            }
683        }
684
685        data.insert("url".into(), serde_json::Value::String(url.to_string()));
686
687        let res = self.api_post("screenshot", data, content_type).await?;
688        parse_response(res).await
689    }
690
691    /// Takes a screenshot of multiple URLs.
692    ///
693    /// # Arguments
694    ///
695    /// * `url` - The URL to take a screenshot of.
696    /// * `params` - Optional request parameters.
697    /// * `stream` - Whether streaming is enabled.
698    /// * `content_type` - The content type of the request.
699    ///
700    /// # Returns
701    ///
702    /// The response from the API as a JSON value.
703    pub async fn multi_screenshot(
704        &self,
705        params: Option<Vec<RequestParams>>,
706        _stream: bool,
707        content_type: &str,
708    ) -> Result<serde_json::Value, reqwest::Error> {
709        let res = self.api_post("screenshot", params, content_type).await?;
710        parse_response(res).await
711    }
712
713    /// Searches for a query.
714    ///
715    /// # Arguments
716    ///
717    /// * `q` - The query to search for.
718    /// * `params` - Optional request parameters.
719    /// * `stream` - Whether streaming is enabled.
720    /// * `content_type` - The content type of the request.
721    ///
722    /// # Returns
723    ///
724    /// The response from the API as a JSON value.
725    pub async fn search(
726        &self,
727        q: &str,
728        params: Option<SearchRequestParams>,
729        _stream: bool,
730        content_type: &str,
731    ) -> Result<serde_json::Value, reqwest::Error> {
732        let body = match params {
733            Some(mut params) => {
734                params.search = q.to_string();
735                params
736            }
737            _ => {
738                let mut params = SearchRequestParams::default();
739                params.search = q.to_string();
740                params
741            }
742        };
743
744        let res = self.api_post("search", body, content_type).await?;
745
746        parse_response(res).await
747    }
748
749    /// Searches for multiple querys.
750    ///
751    /// # Arguments
752    ///
753    /// * `q` - The query to search for.
754    /// * `params` - Optional request parameters.
755    /// * `stream` - Whether streaming is enabled.
756    /// * `content_type` - The content type of the request.
757    ///
758    /// # Returns
759    ///
760    /// The response from the API as a JSON value.
761    pub async fn multi_search(
762        &self,
763        params: Option<Vec<SearchRequestParams>>,
764        content_type: &str,
765    ) -> Result<serde_json::Value, reqwest::Error> {
766        let res = self.api_post("search", params, content_type).await?;
767        parse_response(res).await
768    }
769
770    /// Unblock a URL.
771    ///
772    /// # Arguments
773    ///
774    /// * `url` - The URL to scrape.
775    /// * `params` - Optional request parameters.
776    /// * `stream` - Whether streaming is enabled.
777    /// * `content_type` - The content type of the request.
778    ///
779    /// # Returns
780    ///
781    /// The response from the API as a JSON value.
782    pub async fn unblock_url(
783        &self,
784        url: &str,
785        params: Option<RequestParams>,
786        content_type: &str,
787    ) -> Result<serde_json::Value, reqwest::Error> {
788        let mut data = HashMap::new();
789
790        if let Ok(params) = serde_json::to_value(params) {
791            if let Some(ref p) = params.as_object() {
792                data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
793            }
794        }
795
796        if !url.is_empty() {
797            data.insert(
798                "url".to_string(),
799                serde_json::Value::String(url.to_string()),
800            );
801        }
802
803        let res = self.api_post("unblocker", data, content_type).await?;
804        parse_response(res).await
805    }
806
807    /// Unblock multi URLs.
808    ///
809    /// # Arguments
810    ///
811    /// * `url` - The URL to scrape.
812    /// * `params` - Optional request parameters.
813    /// * `stream` - Whether streaming is enabled.
814    /// * `content_type` - The content type of the request.
815    ///
816    /// # Returns
817    ///
818    /// The response from the API as a JSON value.
819    pub async fn multi_unblock_url(
820        &self,
821        params: Option<Vec<RequestParams>>,
822        content_type: &str,
823    ) -> Result<serde_json::Value, reqwest::Error> {
824        let mut data = HashMap::new();
825
826        if let Ok(mut params) = serde_json::to_value(params) {
827            if let Some(obj) = params.as_object_mut() {
828                obj.insert("limit".to_string(), serde_json::Value::Number(1.into()));
829                data.extend(obj.iter().map(|(k, v)| (k.clone(), v.clone())));
830            }
831        }
832        let res = self.api_post("unblocker", data, content_type).await?;
833        parse_response(res).await
834    }
835
836    /// Transforms data.
837    ///
838    /// # Arguments
839    ///
840    /// * `data` - The data to transform.
841    /// * `params` - Optional request parameters.
842    /// * `stream` - Whether streaming is enabled.
843    /// * `content_type` - The content type of the request.
844    ///
845    /// # Returns
846    ///
847    /// The response from the API as a JSON value.
848    pub async fn transform(
849        &self,
850        data: Vec<HashMap<&str, &str>>,
851        params: Option<TransformParams>,
852        _stream: bool,
853        content_type: &str,
854    ) -> Result<serde_json::Value, reqwest::Error> {
855        let mut payload = HashMap::new();
856
857        if let Ok(params) = serde_json::to_value(params) {
858            if let Some(ref p) = params.as_object() {
859                payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
860            }
861        }
862
863        if let Ok(d) = serde_json::to_value(data) {
864            payload.insert("data".into(), d);
865        }
866
867        let res = self.api_post("transform", payload, content_type).await?;
868
869        parse_response(res).await
870    }
871
872    /// Get the account credits left.
873    pub async fn get_credits(&self) -> Result<serde_json::Value, reqwest::Error> {
874        self.api_get::<serde_json::Value>("data/credits", None)
875            .await
876    }
877
878    /// Send a request for a data record.
879    pub async fn data_post(
880        &self,
881        table: &str,
882        data: Option<RequestParams>,
883    ) -> Result<serde_json::Value, reqwest::Error> {
884        let res = self
885            .api_post(&format!("data/{}", table), data, "application/json")
886            .await?;
887        parse_response(res).await
888    }
889
890    /// Get a table record.
891    pub async fn data_get(
892        &self,
893        table: &str,
894        params: Option<RequestParams>,
895    ) -> Result<serde_json::Value, reqwest::Error> {
896        let mut payload = HashMap::new();
897
898        if let Some(params) = params {
899            if let Ok(p) = serde_json::to_value(params) {
900                if let Some(o) = p.as_object() {
901                    payload.extend(o.iter().map(|(k, v)| (k.as_str(), v.clone())));
902                }
903            }
904        }
905
906        let res = self
907            .api_get::<serde_json::Value>(&format!("data/{}", table), None)
908            .await?;
909        Ok(res)
910    }
911}
912
913#[cfg(test)]
914mod tests {
915    use super::*;
916    use dotenv::dotenv;
917    use lazy_static::lazy_static;
918    use reqwest::ClientBuilder;
919
920    lazy_static! {
921        static ref SPIDER_CLIENT: Spider = {
922            dotenv().ok();
923            let client = ClientBuilder::new();
924            let client = client.user_agent("SpiderBot").build().unwrap();
925
926            Spider::new_with_client(None, client).expect("client to build")
927        };
928    }
929
930    #[tokio::test]
931    #[ignore]
932    async fn test_scrape_url() {
933        let response = SPIDER_CLIENT
934            .scrape_url("https://example.com", None, "application/json")
935            .await;
936        assert!(response.is_ok());
937    }
938
939    #[tokio::test]
940    async fn test_crawl_url() {
941        let response = SPIDER_CLIENT
942            .crawl_url(
943                "https://example.com",
944                None,
945                false,
946                "application/json",
947                None::<fn(serde_json::Value)>,
948            )
949            .await;
950        assert!(response.is_ok());
951    }
952
953    #[tokio::test]
954    #[ignore]
955    async fn test_links() {
956        let response: Result<serde_json::Value, Error> = SPIDER_CLIENT
957            .links("https://example.com", None, false, "application/json")
958            .await;
959        assert!(response.is_ok());
960    }
961
962    #[tokio::test]
963    #[ignore]
964    async fn test_screenshot() {
965        let mut params = RequestParams::default();
966        params.limit = Some(1);
967
968        let response = SPIDER_CLIENT
969            .screenshot(
970                "https://example.com",
971                Some(params),
972                false,
973                "application/json",
974            )
975            .await;
976        assert!(response.is_ok());
977    }
978
979    // #[tokio::test(flavor = "multi_thread")]
980    // async fn test_search() {
981    //     let mut params = SearchRequestParams::default();
982
983    //     params.search_limit = Some(1);
984    //     params.num = Some(1);
985    //     params.fetch_page_content = Some(false);
986
987    //     let response = SPIDER_CLIENT
988    //         .search("a sports website", Some(params), false, "application/json")
989    //         .await;
990
991    //     assert!(response.is_ok());
992    // }
993
994    #[tokio::test]
995    #[ignore]
996    async fn test_transform() {
997        let data = vec![HashMap::from([(
998            "<html><body><h1>Transformation</h1></body></html>".into(),
999            "".into(),
1000        )])];
1001        let response = SPIDER_CLIENT
1002            .transform(data, None, false, "application/json")
1003            .await;
1004        assert!(response.is_ok());
1005    }
1006
1007    #[tokio::test]
1008    async fn test_get_credits() {
1009        let response = SPIDER_CLIENT.get_credits().await;
1010        assert!(response.is_ok());
1011    }
1012}