spider_client/
lib.rs

1//! The `spider-client` module provides the primary interface and
2//! functionalities for the Spider web crawler library, which is
3//! designed for rapid and efficient crawling of web pages to gather
4//! links using isolated contexts.
5//!
6//! ### Features
7//!
8//! - **Multi-threaded Crawling:** Spider can utilize multiple
9//!   threads to parallelize the crawling process, drastically
10//!   improving performance and allowing the ability to gather
11//!   millions of pages in a short time.
12//!
13//! - **Configurable:** The library provides various options to
14//!   configure the crawling behavior, such as setting the depth
15//!   of crawling, user-agent strings, delays between requests,
16//!   and more.
17//!
18//! - **Link Gathering:** One of the primary objectives of Spider is to
19//!   gather and manage links from the web pages it crawls,
20//!   compiling them into a structured format for further use.
21//!
22//! ### Examples
23//!
24//! Basic usage of the Spider client might look like this:
25//!
26//! ```rust
27//! use spider_client::{Spider, RequestType, RequestParams};
28//! use tokio;
29//!
30//!  # #[ignore]
31//! #[tokio::main]
32//! async fn main() {
33//!     let spider = Spider::new(Some("myspiderapikey".into())).expect("API key must be provided");
34//!
35//!     let url = "https://spider.cloud";
36//!
37//!     // Scrape a single URL
38//!     let scraped_data = spider.scrape_url(url, None, "application/json").await.expect("Failed to scrape the URL");
39//!
40//!     println!("Scraped Data: {:?}", scraped_data);
41//!
42//!     // Crawl a website
43//!     let crawler_params = RequestParams {
44//!         limit: Some(1),
45//!         proxy_enabled: Some(true),
46//!         metadata: Some(false),
47//!         request: Some(RequestType::Http),
48//!         ..Default::default()
49//!     };
50//!
51//!     let crawl_result = spider.crawl_url(url, Some(crawler_params), false, "application/json", None::<fn(serde_json::Value)>).await.expect("Failed to crawl the URL");
52//!
53//!     println!("Crawl Result: {:?}", crawl_result);
54//! }
55//! ```
56//!
57//! ### Modules
58//!
59//! - `config`: Contains the configuration options for the Spider client.
60//! - `utils`: Utility functions used by the Spider client.
61//!
62
63pub mod shapes;
64
65use backon::ExponentialBuilder;
66use backon::Retryable;
67use reqwest::Client;
68use reqwest::{Error, Response};
69use serde::Serialize;
70use std::collections::HashMap;
71use tokio_stream::StreamExt;
72pub use shapes::{request::*, response::*};
73use std::sync::OnceLock;
74
75static API_URL: OnceLock<String> = OnceLock::new();
76
77/// The API endpoint.
78pub fn get_api_url() -> &'static str {
79    API_URL.get_or_init(|| {
80        std::env::var("SPIDER_API_URL").unwrap_or_else(|_| "https://api.spider.cloud".to_string())
81    })
82}
83
84/// Represents a Spider with API key and HTTP client.
85#[derive(Debug, Default)]
86pub struct Spider {
87    /// The Spider API key.
88    pub api_key: String,
89    /// The Spider Client to re-use.
90    pub client: Client,
91}
92
93/// Handle the json response.
94pub async fn handle_json(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
95    res.json().await
96}
97
98/// Handle the jsonl response.
99pub async fn handle_jsonl(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
100    let text = res.text().await?;
101    let lines = text
102        .lines()
103        .filter_map(|line| serde_json::from_str::<serde_json::Value>(line).ok())
104        .collect::<Vec<_>>();
105    Ok(serde_json::Value::Array(lines))
106}
107
108/// Handle the CSV response.
109#[cfg(feature = "csv")]
110pub async fn handle_csv(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
111    use std::collections::HashMap;
112    let text = res.text().await?;
113    let mut rdr = csv::Reader::from_reader(text.as_bytes());
114    let records: Vec<HashMap<String, String>> = rdr.deserialize().filter_map(Result::ok).collect();
115
116    if let Ok(record) = serde_json::to_value(records) {
117        Ok(record)
118    } else {
119        Ok(serde_json::Value::String(text))
120    }
121}
122
123#[cfg(not(feature = "csv"))]
124pub async fn handle_csv(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
125    handle_text(res).await
126}
127
128/// Basic handle response to text
129pub async fn handle_text(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
130    Ok(serde_json::Value::String(
131        res.text().await.unwrap_or_default(),
132    ))
133}
134
135/// Handle the XML response.
136#[cfg(feature = "csv")]
137pub async fn handle_xml(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
138    let text = res.text().await?;
139    match quick_xml::de::from_str::<serde_json::Value>(&text) {
140        Ok(val) => Ok(val),
141        Err(_) => Ok(serde_json::Value::String(text)),
142    }
143}
144
145#[cfg(not(feature = "csv"))]
146/// Handle the XML response.
147pub async fn handle_xml(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
148    handle_text(res).await
149}
150
151pub async fn parse_response(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
152    let content_type = res
153        .headers()
154        .get(reqwest::header::CONTENT_TYPE)
155        .and_then(|v| v.to_str().ok())
156        .unwrap_or_default()
157        .to_ascii_lowercase();
158
159    if content_type.contains("json") && !content_type.contains("jsonl") {
160        handle_json(res).await
161    } else if content_type.contains("jsonl") || content_type.contains("ndjson") {
162        handle_jsonl(res).await
163    } else if content_type.contains("csv") {
164        handle_csv(res).await
165    } else if content_type.contains("xml") {
166        handle_xml(res).await
167    } else {
168        handle_text(res).await
169    }
170}
171
172impl Spider {
173    /// Creates a new instance of Spider.
174    ///
175    /// # Arguments
176    ///
177    /// * `api_key` - An optional API key. Defaults to using the 'SPIDER_API_KEY' env variable.
178    ///
179    /// # Returns
180    ///
181    /// A new instance of Spider or an error string if no API key is provided.
182    pub fn new(api_key: Option<String>) -> Result<Self, &'static str> {
183        let api_key = api_key.or_else(|| std::env::var("SPIDER_API_KEY").ok());
184
185        match api_key {
186            Some(key) => Ok(Self {
187                api_key: key,
188                client: Client::new(),
189            }),
190            None => Err("No API key provided"),
191        }
192    }
193
194    /// Creates a new instance of Spider.
195    ///
196    /// # Arguments
197    ///
198    /// * `api_key` - An optional API key. Defaults to using the 'SPIDER_API_KEY' env variable.
199    /// * `client` - A custom client to pass in.
200    ///
201    /// # Returns
202    ///
203    /// A new instance of Spider or an error string if no API key is provided.
204    pub fn new_with_client(api_key: Option<String>, client: Client) -> Result<Self, &'static str> {
205        let api_key = api_key.or_else(|| std::env::var("SPIDER_API_KEY").ok());
206
207        match api_key {
208            Some(key) => Ok(Self {
209                api_key: key,
210                client,
211            }),
212            None => Err("No API key provided"),
213        }
214    }
215
216    /// Sends a POST request to the API.
217    ///
218    /// # Arguments
219    ///
220    /// * `endpoint` - The API endpoint.
221    /// * `data` - The request data as a HashMap.
222    /// * `stream` - Whether streaming is enabled.
223    /// * `content_type` - The content type of the request.
224    ///
225    /// # Returns
226    ///
227    /// The response from the API.
228    async fn api_post_base(
229        &self,
230        endpoint: &str,
231        data: impl Serialize + Sized + std::fmt::Debug,
232        content_type: &str,
233    ) -> Result<Response, Error> {
234        let url: String = format!("{}/{}", get_api_url(), endpoint);
235
236        self.client
237            .post(&url)
238            .header(
239                "User-Agent",
240                format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
241            )
242            .header("Content-Type", content_type)
243            .header("Authorization", format!("Bearer {}", self.api_key))
244            .json(&data)
245            .send()
246            .await
247    }
248
249    /// Sends a POST request to the API.
250    ///
251    /// # Arguments
252    ///
253    /// * `endpoint` - The API endpoint.
254    /// * `data` - The request data as a HashMap.
255    /// * `stream` - Whether streaming is enabled.
256    /// * `content_type` - The content type of the request.
257    ///
258    /// # Returns
259    ///
260    /// The response from the API.
261    pub async fn api_post(
262        &self,
263        endpoint: &str,
264        data: impl Serialize + std::fmt::Debug + Clone + Send + Sync,
265        content_type: &str,
266    ) -> Result<Response, Error> {
267        let fetch = || async {
268            self.api_post_base(endpoint, data.to_owned(), content_type)
269                .await
270        };
271
272        fetch
273            .retry(ExponentialBuilder::default().with_max_times(5))
274            .when(|err: &reqwest::Error| {
275                if let Some(status) = err.status() {
276                    status.is_server_error()
277                } else {
278                    err.is_timeout()
279                }
280            })
281            .await
282    }
283
284    /// Sends a GET request to the API.
285    ///
286    /// # Arguments
287    ///
288    /// * `endpoint` - The API endpoint.
289    ///
290    /// # Returns
291    ///
292    /// The response from the API as a JSON value.
293    async fn api_get_base<T: Serialize>(
294        &self,
295        endpoint: &str,
296        query_params: Option<&T>,
297    ) -> Result<serde_json::Value, reqwest::Error> {
298        let url = format!("{}/{}", get_api_url(), endpoint);
299        let res = self
300            .client
301            .get(&url)
302            .query(&query_params)
303            .header(
304                "User-Agent",
305                format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
306            )
307            .header("Content-Type", "application/json")
308            .header("Authorization", format!("Bearer {}", self.api_key))
309            .send()
310            .await?;
311        parse_response(res).await
312    }
313
314    /// Sends a GET request to the API.
315    ///
316    /// # Arguments
317    ///
318    /// * `endpoint` - The API endpoint.
319    ///
320    /// # Returns
321    ///
322    /// The response from the API as a JSON value.
323    pub async fn api_get<T: Serialize>(
324        &self,
325        endpoint: &str,
326        query_params: Option<&T>,
327    ) -> Result<serde_json::Value, reqwest::Error> {
328        let fetch = || async { self.api_get_base(endpoint, query_params.to_owned()).await };
329
330        fetch
331            .retry(ExponentialBuilder::default().with_max_times(5))
332            .when(|err: &reqwest::Error| {
333                if let Some(status) = err.status() {
334                    status.is_server_error()
335                } else {
336                    err.is_timeout()
337                }
338            })
339            .await
340    }
341
342    /// Sends a DELETE request to the API.
343    ///
344    /// # Arguments
345    ///
346    /// * `endpoint` - The API endpoint.
347    /// * `params` - Optional request parameters.
348    /// * `stream` - Whether streaming is enabled.
349    /// * `content_type` - The content type of the request.
350    ///
351    /// # Returns
352    ///
353    /// The response from the API.
354    async fn api_delete_base(
355        &self,
356        endpoint: &str,
357        params: Option<HashMap<String, serde_json::Value>>,
358    ) -> Result<Response, Error> {
359        let url = format!("{}/v1/{}", get_api_url(), endpoint);
360        let request_builder = self
361            .client
362            .delete(&url)
363            .header(
364                "User-Agent",
365                format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
366            )
367            .header("Content-Type", "application/json")
368            .header("Authorization", format!("Bearer {}", self.api_key));
369
370        let request_builder = if let Some(params) = params {
371            request_builder.json(&params)
372        } else {
373            request_builder
374        };
375
376        request_builder.send().await
377    }
378
379    /// Sends a DELETE request to the API.
380    ///
381    /// # Arguments
382    ///
383    /// * `endpoint` - The API endpoint.
384    /// * `params` - Optional request parameters.
385    /// * `stream` - Whether streaming is enabled.
386    /// * `content_type` - The content type of the request.
387    ///
388    /// # Returns
389    ///
390    /// The response from the API.
391    pub async fn api_delete(
392        &self,
393        endpoint: &str,
394        params: Option<HashMap<String, serde_json::Value>>,
395    ) -> Result<Response, Error> {
396        let fetch = || async { self.api_delete_base(endpoint, params.to_owned()).await };
397
398        fetch
399            .retry(ExponentialBuilder::default().with_max_times(5))
400            .when(|err: &reqwest::Error| {
401                if let Some(status) = err.status() {
402                    status.is_server_error()
403                } else {
404                    err.is_timeout()
405                }
406            })
407            .await
408    }
409
410    /// Scrapes a URL.
411    ///
412    /// # Arguments
413    ///
414    /// * `url` - The URL to scrape.
415    /// * `params` - Optional request parameters.
416    /// * `stream` - Whether streaming is enabled.
417    /// * `content_type` - The content type of the request.
418    ///
419    /// # Returns
420    ///
421    /// The response from the API as a JSON value.
422    pub async fn scrape_url(
423        &self,
424        url: &str,
425        params: Option<RequestParams>,
426        content_type: &str,
427    ) -> Result<serde_json::Value, reqwest::Error> {
428        let mut data = HashMap::new();
429
430        if let Ok(params) = serde_json::to_value(params) {
431            if let Some(ref p) = params.as_object() {
432                data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
433            }
434        }
435
436        if !url.is_empty() {
437            data.insert(
438                "url".to_string(),
439                serde_json::Value::String(url.to_string()),
440            );
441        }
442
443        data.insert("limit".to_string(), serde_json::Value::Number(1.into()));
444
445        let res = self.api_post("crawl", data, content_type).await?;
446        parse_response(res).await
447    }
448
449    /// Scrapes multi URLs.
450    ///
451    /// # Arguments
452    ///
453    /// * `url` - The URL to scrape.
454    /// * `params` - Optional request parameters.
455    /// * `stream` - Whether streaming is enabled.
456    /// * `content_type` - The content type of the request.
457    ///
458    /// # Returns
459    ///
460    /// The response from the API as a JSON value.
461    pub async fn multi_scrape_url(
462        &self,
463        params: Option<Vec<RequestParams>>,
464        content_type: &str,
465    ) -> Result<serde_json::Value, reqwest::Error> {
466        let mut data = HashMap::new();
467
468if let Ok(mut params) = serde_json::to_value(params) {
469    if let Some(obj) = params.as_object_mut() {
470        obj.insert("limit".to_string(), serde_json::Value::Number(1.into()));
471        data.extend(obj.iter().map(|(k, v)| (k.clone(), v.clone())));
472    }
473}
474        let res = self.api_post("crawl", data, content_type).await?;
475        parse_response(res).await
476    }
477
478
479    /// Crawls a URL.
480    ///
481    /// # Arguments
482    ///
483    /// * `url` - The URL to crawl.
484    /// * `params` - Optional request parameters.
485    /// * `stream` - Whether streaming is enabled.
486    /// * `content_type` - The content type of the request.
487    /// * `callback` - Optional callback function to handle each streamed chunk.
488    ///
489    /// # Returns
490    ///
491    /// The response from the API as a JSON value.
492    pub async fn crawl_url(
493        &self,
494        url: &str,
495        params: Option<RequestParams>,
496        stream: bool,
497        content_type: &str,
498        callback: Option<impl Fn(serde_json::Value) + Send>,
499    ) -> Result<serde_json::Value, reqwest::Error> {
500        use tokio_util::codec::{FramedRead, LinesCodec};
501
502        let mut data = HashMap::new();
503
504        if let Ok(params) = serde_json::to_value(params) {
505            if let Some(ref p) = params.as_object() {
506                data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
507            }
508        }
509
510        data.insert("url".into(), serde_json::Value::String(url.to_string()));
511
512        let res = self.api_post("crawl", data, content_type).await?;
513
514        if stream {
515            if let Some(callback) = callback {
516                let stream = res.bytes_stream();
517
518                let stream_reader = tokio_util::io::StreamReader::new(
519                    stream
520                        .map(|r| r.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))),
521                );
522
523                let mut lines = FramedRead::new(stream_reader, LinesCodec::new());
524
525                while let Some(line_result) = lines.next().await {
526                    match line_result {
527                        Ok(line) => match serde_json::from_str::<serde_json::Value>(&line) {
528                            Ok(value) => {
529                                callback(value);
530                            }
531                            Err(_e) => {
532                                continue;
533                            }
534                        },
535                        Err(_e) => return Ok(serde_json::Value::Null),
536                    }
537                }
538
539                Ok(serde_json::Value::Null)
540            } else {
541                Ok(serde_json::Value::Null)
542            }
543        } else {
544            parse_response(res).await
545        }
546    }
547
548    /// Crawls multiple URLs.
549    ///
550    /// # Arguments
551    ///
552    /// * `url` - The URL to crawl.
553    /// * `params` - Optional request parameters.
554    /// * `stream` - Whether streaming is enabled.
555    /// * `content_type` - The content type of the request.
556    /// * `callback` - Optional callback function to handle each streamed chunk.
557    ///
558    /// # Returns
559    ///
560    /// The response from the API as a JSON value.
561    pub async fn multi_crawl_url(
562        &self,
563        params: Option<Vec<RequestParams>>,
564        stream: bool,
565        content_type: &str,
566        callback: Option<impl Fn(serde_json::Value) + Send>,
567    ) -> Result<serde_json::Value, reqwest::Error> {
568        use tokio_util::codec::{FramedRead, LinesCodec};
569
570
571        let res = self.api_post("crawl", params, content_type).await?;
572
573        if stream {
574            if let Some(callback) = callback {
575                let stream = res.bytes_stream();
576
577                let stream_reader = tokio_util::io::StreamReader::new(
578                    stream
579                        .map(|r| r.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))),
580                );
581
582                let mut lines = FramedRead::new(stream_reader, LinesCodec::new());
583
584                while let Some(line_result) = lines.next().await {
585                    match line_result {
586                        Ok(line) => match serde_json::from_str::<serde_json::Value>(&line) {
587                            Ok(value) => {
588                                callback(value);
589                            }
590                            Err(_e) => {
591                                continue;
592                            }
593                        },
594                        Err(_e) => return Ok(serde_json::Value::Null),
595                    }
596                }
597
598                Ok(serde_json::Value::Null)
599            } else {
600                Ok(serde_json::Value::Null)
601            }
602        } else {
603            parse_response(res).await
604        }
605    }
606
607
608    /// Fetches links from a URL.
609    ///
610    /// # Arguments
611    ///
612    /// * `url` - The URL to fetch links from.
613    /// * `params` - Optional request parameters.
614    /// * `stream` - Whether streaming is enabled.
615    /// * `content_type` - The content type of the request.
616    ///
617    /// # Returns
618    ///
619    /// The response from the API as a JSON value.
620    pub async fn links(
621        &self,
622        url: &str,
623        params: Option<RequestParams>,
624        _stream: bool,
625        content_type: &str,
626    ) -> Result<serde_json::Value, reqwest::Error> {
627        let mut data = HashMap::new();
628
629        if let Ok(params) = serde_json::to_value(params) {
630            if let Some(ref p) = params.as_object() {
631                data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
632            }
633        }
634
635        data.insert("url".into(), serde_json::Value::String(url.to_string()));
636
637        let res = self.api_post("links", data, content_type).await?;
638        parse_response(res).await
639    }
640
641
642    /// Fetches links from a URLs.
643    ///
644    /// # Arguments
645    ///
646    /// * `url` - The URL to fetch links from.
647    /// * `params` - Optional request parameters.
648    /// * `stream` - Whether streaming is enabled.
649    /// * `content_type` - The content type of the request.
650    ///
651    /// # Returns
652    ///
653    /// The response from the API as a JSON value.
654    pub async fn multi_links(
655        &self,
656        params: Option<Vec<RequestParams>>,
657        _stream: bool,
658        content_type: &str,
659    ) -> Result<serde_json::Value, reqwest::Error> {
660        let res = self.api_post("links", params, content_type).await?;
661        parse_response(res).await
662    }
663
664    
665    /// Takes a screenshot of a URL.
666    ///
667    /// # Arguments
668    ///
669    /// * `url` - The URL to take a screenshot of.
670    /// * `params` - Optional request parameters.
671    /// * `stream` - Whether streaming is enabled.
672    /// * `content_type` - The content type of the request.
673    ///
674    /// # Returns
675    ///
676    /// The response from the API as a JSON value.
677    pub async fn screenshot(
678        &self,
679        url: &str,
680        params: Option<RequestParams>,
681        _stream: bool,
682        content_type: &str,
683    ) -> Result<serde_json::Value, reqwest::Error> {
684        let mut data = HashMap::new();
685
686        if let Ok(params) = serde_json::to_value(params) {
687            if let Some(ref p) = params.as_object() {
688                data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
689            }
690        }
691
692        data.insert("url".into(), serde_json::Value::String(url.to_string()));
693
694        let res = self.api_post("screenshot", data, content_type).await?;
695        parse_response(res).await
696    }
697
698    /// Takes a screenshot of multiple URLs.
699    ///
700    /// # Arguments
701    ///
702    /// * `url` - The URL to take a screenshot of.
703    /// * `params` - Optional request parameters.
704    /// * `stream` - Whether streaming is enabled.
705    /// * `content_type` - The content type of the request.
706    ///
707    /// # Returns
708    ///
709    /// The response from the API as a JSON value.
710    pub async fn multi_screenshot(
711        &self,
712        params: Option<Vec<RequestParams>>,
713        _stream: bool,
714        content_type: &str,
715    ) -> Result<serde_json::Value, reqwest::Error> {
716        let res = self.api_post("screenshot", params, content_type).await?;
717        parse_response(res).await
718    }
719
720    /// Searches for a query.
721    ///
722    /// # Arguments
723    ///
724    /// * `q` - The query to search for.
725    /// * `params` - Optional request parameters.
726    /// * `stream` - Whether streaming is enabled.
727    /// * `content_type` - The content type of the request.
728    ///
729    /// # Returns
730    ///
731    /// The response from the API as a JSON value.
732    pub async fn search(
733        &self,
734        q: &str,
735        params: Option<SearchRequestParams>,
736        _stream: bool,
737        content_type: &str,
738    ) -> Result<serde_json::Value, reqwest::Error> {
739        let body = match params {
740            Some(mut params) => {
741                params.search = q.to_string();
742                params
743            }
744            _ => {
745                let mut params = SearchRequestParams::default();
746                params.search = q.to_string();
747                params
748            }
749        };
750
751        let res = self.api_post("search", body, content_type).await?;
752
753        parse_response(res).await
754    }
755
756    /// Searches for multiple querys.
757    ///
758    /// # Arguments
759    ///
760    /// * `q` - The query to search for.
761    /// * `params` - Optional request parameters.
762    /// * `stream` - Whether streaming is enabled.
763    /// * `content_type` - The content type of the request.
764    ///
765    /// # Returns
766    ///
767    /// The response from the API as a JSON value.
768    pub async fn multi_search(
769        &self,
770        params: Option<Vec<SearchRequestParams>>,
771        content_type: &str,
772    ) -> Result<serde_json::Value, reqwest::Error> {
773        let res = self.api_post("search", params, content_type).await?;
774        parse_response(res).await
775    }
776
777    /// Transforms data.
778    ///
779    /// # Arguments
780    ///
781    /// * `data` - The data to transform.
782    /// * `params` - Optional request parameters.
783    /// * `stream` - Whether streaming is enabled.
784    /// * `content_type` - The content type of the request.
785    ///
786    /// # Returns
787    ///
788    /// The response from the API as a JSON value.
789    pub async fn transform(
790        &self,
791        data: Vec<HashMap<&str, &str>>,
792        params: Option<TransformParams>,
793        _stream: bool,
794        content_type: &str,
795    ) -> Result<serde_json::Value, reqwest::Error> {
796        let mut payload = HashMap::new();
797
798        if let Ok(params) = serde_json::to_value(params) {
799            if let Some(ref p) = params.as_object() {
800                payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
801            }
802        }
803
804        if let Ok(d) = serde_json::to_value(data) {
805            payload.insert("data".into(), d);
806        }
807
808        let res = self.api_post("transform", payload, content_type).await?;
809
810        parse_response(res).await
811    }
812
813    /// Get the account credits left.
814    pub async fn get_credits(&self) -> Result<serde_json::Value, reqwest::Error> {
815        self.api_get::<serde_json::Value>("data/credits", None)
816            .await
817    }
818
819    /// Send a request for a data record.
820    pub async fn data_post(
821        &self,
822        table: &str,
823        data: Option<RequestParams>,
824    ) -> Result<serde_json::Value, reqwest::Error> {
825        let res = self
826            .api_post(&format!("data/{}", table), data, "application/json")
827            .await?;
828        parse_response(res).await
829    }
830
831    /// Get a table record.
832    pub async fn data_get(
833        &self,
834        table: &str,
835        params: Option<RequestParams>,
836    ) -> Result<serde_json::Value, reqwest::Error> {
837        let mut payload = HashMap::new();
838
839        if let Some(params) = params {
840            if let Ok(p) = serde_json::to_value(params) {
841                if let Some(o) = p.as_object() {
842                    payload.extend(o.iter().map(|(k, v)| (k.as_str(), v.clone())));
843                }
844            }
845        }
846
847        let res = self
848            .api_get::<serde_json::Value>(&format!("data/{}", table), None)
849            .await?;
850        Ok(res)
851    }
852}
853
854#[cfg(test)]
855mod tests {
856    use super::*;
857    use dotenv::dotenv;
858    use lazy_static::lazy_static;
859    use reqwest::ClientBuilder;
860
861    lazy_static! {
862        static ref SPIDER_CLIENT: Spider = {
863            dotenv().ok();
864            let client = ClientBuilder::new();
865            let client = client.user_agent("SpiderBot").build().unwrap();
866
867            Spider::new_with_client(None, client).expect("client to build")
868        };
869    }
870
871    #[tokio::test]
872    #[ignore]
873    async fn test_scrape_url() {
874        let response = SPIDER_CLIENT
875            .scrape_url("https://example.com", None, "application/json")
876            .await;
877        assert!(response.is_ok());
878    }
879
880    #[tokio::test]
881    async fn test_crawl_url() {
882        let response = SPIDER_CLIENT
883            .crawl_url(
884                "https://example.com",
885                None,
886                false,
887                "application/json",
888                None::<fn(serde_json::Value)>,
889            )
890            .await;
891        assert!(response.is_ok());
892    }
893
894    #[tokio::test]
895    #[ignore]
896    async fn test_links() {
897        let response: Result<serde_json::Value, Error> = SPIDER_CLIENT
898            .links("https://example.com", None, false, "application/json")
899            .await;
900        assert!(response.is_ok());
901    }
902
903    #[tokio::test]
904    #[ignore]
905    async fn test_screenshot() {
906        let mut params = RequestParams::default();
907        params.limit = Some(1);
908
909        let response = SPIDER_CLIENT
910            .screenshot(
911                "https://example.com",
912                Some(params),
913                false,
914                "application/json",
915            )
916            .await;
917        assert!(response.is_ok());
918    }
919
920    // #[tokio::test(flavor = "multi_thread")]
921    // async fn test_search() {
922    //     let mut params = SearchRequestParams::default();
923
924    //     params.search_limit = Some(1);
925    //     params.num = Some(1);
926    //     params.fetch_page_content = Some(false);
927
928    //     let response = SPIDER_CLIENT
929    //         .search("a sports website", Some(params), false, "application/json")
930    //         .await;
931
932    //     assert!(response.is_ok());
933    // }
934
935    #[tokio::test]
936    #[ignore]
937    async fn test_transform() {
938        let data = vec![HashMap::from([(
939            "<html><body><h1>Transformation</h1></body></html>".into(),
940            "".into(),
941        )])];
942        let response = SPIDER_CLIENT
943            .transform(data, None, false, "application/json")
944            .await;
945        assert!(response.is_ok());
946    }
947
948    #[tokio::test]
949    async fn test_get_credits() {
950        let response = SPIDER_CLIENT.get_credits().await;
951        assert!(response.is_ok());
952    }
953}