spider_client/
lib.rs

1//! The `spider-client` module provides the primary interface and
2//! functionalities for the Spider web crawler library, which is
3//! designed for rapid and efficient crawling of web pages to gather
4//! links using isolated contexts.
5//!
6//! ### Features
7//!
8//! - **Multi-threaded Crawling:** Spider can utilize multiple
9//!   threads to parallelize the crawling process, drastically
10//!   improving performance and allowing the ability to gather
11//!   millions of pages in a short time.
12//!
13//! - **Configurable:** The library provides various options to
14//!   configure the crawling behavior, such as setting the depth
15//!   of crawling, user-agent strings, delays between requests,
16//!   and more.
17//!
18//! - **Link Gathering:** One of the primary objectives of Spider is to
19//!   gather and manage links from the web pages it crawls,
20//!   compiling them into a structured format for further use.
21//!
22//! ### Examples
23//!
24//! Basic usage of the Spider client might look like this:
25//!
26//! ```rust
27//! use spider_client::{Spider, RequestType, RequestParams};
28//! use tokio;
29//!
30//!  # #[ignore]
31//! #[tokio::main]
32//! async fn main() {
33//!     let spider = Spider::new(Some("myspiderapikey".into())).expect("API key must be provided");
34//!
35//!     let url = "https://spider.cloud";
36//!
37//!     // Scrape a single URL
38//!     let scraped_data = spider.scrape_url(url, None, "application/json").await.expect("Failed to scrape the URL");
39//!
40//!     println!("Scraped Data: {:?}", scraped_data);
41//!
42//!     // Crawl a website
43//!     let crawler_params = RequestParams {
44//!         limit: Some(1),
45//!         proxy_enabled: Some(true),
46//!         store_data: Some(false),
47//!         metadata: Some(false),
48//!         request: Some(RequestType::Http),
49//!         ..Default::default()
50//!     };
51//!
52//!     let crawl_result = spider.crawl_url(url, Some(crawler_params), false, "application/json", None::<fn(serde_json::Value)>).await.expect("Failed to crawl the URL");
53//!
54//!     println!("Crawl Result: {:?}", crawl_result);
55//! }
56//! ```
57//!
58//! ### Modules
59//!
60//! - `config`: Contains the configuration options for the Spider client.
61//! - `utils`: Utility functions used by the Spider client.
62//!
63
64pub mod shapes;
65
66use backon::ExponentialBuilder;
67use backon::Retryable;
68use reqwest::Client;
69use reqwest::{Error, Response};
70use serde::Serialize;
71use std::collections::HashMap;
72use tokio_stream::StreamExt;
73
74pub use shapes::{request::*, response::*};
75
76/// The API url.
77const API_URL: &'static str = "https://api.spider.cloud";
78
79/// Represents a Spider with API key and HTTP client.
80#[derive(Debug, Default)]
81pub struct Spider {
82    /// The Spider API key.
83    pub api_key: String,
84    /// The Spider Client to re-use.
85    pub client: Client,
86}
87
88/// Handle the json response.
89pub async fn handle_json(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
90    res.json().await
91}
92
93/// Handle the jsonl response.
94pub async fn handle_jsonl(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
95    let text = res.text().await?;
96    let lines = text
97        .lines()
98        .filter_map(|line| serde_json::from_str::<serde_json::Value>(line).ok())
99        .collect::<Vec<_>>();
100    Ok(serde_json::Value::Array(lines))
101}
102
103/// Handle the CSV response.
104#[cfg(feature = "csv")]
105pub async fn handle_csv(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
106    use std::collections::HashMap;
107    let text = res.text().await?;
108    let mut rdr = csv::Reader::from_reader(text.as_bytes());
109    let records: Vec<HashMap<String, String>> = rdr.deserialize().filter_map(Result::ok).collect();
110
111    if let Ok(record) = serde_json::to_value(records) {
112        Ok(record)
113    } else {
114        Ok(serde_json::Value::String(text))
115    }
116}
117
118#[cfg(not(feature = "csv"))]
119pub async fn handle_csv(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
120    handle_text(res).await
121}
122
123/// Basic handle response to text
124pub async fn handle_text(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
125    Ok(serde_json::Value::String(
126        res.text().await.unwrap_or_default(),
127    ))
128}
129
130/// Handle the XML response.
131#[cfg(feature = "csv")]
132pub async fn handle_xml(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
133    let text = res.text().await?;
134    match quick_xml::de::from_str::<serde_json::Value>(&text) {
135        Ok(val) => Ok(val),
136        Err(_) => Ok(serde_json::Value::String(text)),
137    }
138}
139
140#[cfg(not(feature = "csv"))]
141/// Handle the XML response.
142pub async fn handle_xml(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
143    handle_text(res).await
144}
145
146pub async fn parse_response(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
147    let content_type = res
148        .headers()
149        .get(reqwest::header::CONTENT_TYPE)
150        .and_then(|v| v.to_str().ok())
151        .unwrap_or_default()
152        .to_ascii_lowercase();
153
154    if content_type.contains("json") && !content_type.contains("jsonl") {
155        handle_json(res).await
156    } else if content_type.contains("jsonl") || content_type.contains("ndjson") {
157        handle_jsonl(res).await
158    } else if content_type.contains("csv") {
159        handle_csv(res).await
160    } else if content_type.contains("xml") {
161        handle_xml(res).await
162    } else {
163        handle_text(res).await
164    }
165}
166
167impl Spider {
168    /// Creates a new instance of Spider.
169    ///
170    /// # Arguments
171    ///
172    /// * `api_key` - An optional API key. Defaults to using the 'SPIDER_API_KEY' env variable.
173    ///
174    /// # Returns
175    ///
176    /// A new instance of Spider or an error string if no API key is provided.
177    pub fn new(api_key: Option<String>) -> Result<Self, &'static str> {
178        let api_key = api_key.or_else(|| std::env::var("SPIDER_API_KEY").ok());
179
180        match api_key {
181            Some(key) => Ok(Self {
182                api_key: key,
183                client: Client::new(),
184            }),
185            None => Err("No API key provided"),
186        }
187    }
188
189    /// Creates a new instance of Spider.
190    ///
191    /// # Arguments
192    ///
193    /// * `api_key` - An optional API key. Defaults to using the 'SPIDER_API_KEY' env variable.
194    /// * `client` - A custom client to pass in.
195    ///
196    /// # Returns
197    ///
198    /// A new instance of Spider or an error string if no API key is provided.
199    pub fn new_with_client(api_key: Option<String>, client: Client) -> Result<Self, &'static str> {
200        let api_key = api_key.or_else(|| std::env::var("SPIDER_API_KEY").ok());
201
202        match api_key {
203            Some(key) => Ok(Self {
204                api_key: key,
205                client,
206            }),
207            None => Err("No API key provided"),
208        }
209    }
210
211    /// Sends a POST request to the API.
212    ///
213    /// # Arguments
214    ///
215    /// * `endpoint` - The API endpoint.
216    /// * `data` - The request data as a HashMap.
217    /// * `stream` - Whether streaming is enabled.
218    /// * `content_type` - The content type of the request.
219    ///
220    /// # Returns
221    ///
222    /// The response from the API.
223    async fn api_post_base(
224        &self,
225        endpoint: &str,
226        data: impl Serialize + Sized + std::fmt::Debug,
227        content_type: &str,
228    ) -> Result<Response, Error> {
229        let url: String = format!("{API_URL}/{}", endpoint);
230
231        self.client
232            .post(&url)
233            .header(
234                "User-Agent",
235                format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
236            )
237            .header("Content-Type", content_type)
238            .header("Authorization", format!("Bearer {}", self.api_key))
239            .json(&data)
240            .send()
241            .await
242    }
243
244    /// Sends a POST request to the API.
245    ///
246    /// # Arguments
247    ///
248    /// * `endpoint` - The API endpoint.
249    /// * `data` - The request data as a HashMap.
250    /// * `stream` - Whether streaming is enabled.
251    /// * `content_type` - The content type of the request.
252    ///
253    /// # Returns
254    ///
255    /// The response from the API.
256    async fn api_post(
257        &self,
258        endpoint: &str,
259        data: impl Serialize + std::fmt::Debug + Clone + Send + Sync,
260        content_type: &str,
261    ) -> Result<Response, Error> {
262        let fetch = || async {
263            self.api_post_base(endpoint, data.to_owned(), content_type)
264                .await
265        };
266
267        fetch
268            .retry(ExponentialBuilder::default().with_max_times(5))
269            .when(|err: &reqwest::Error| {
270                if let Some(status) = err.status() {
271                    status.is_server_error()
272                } else {
273                    err.is_timeout()
274                }
275            })
276            .await
277    }
278
279    /// Sends a GET request to the API.
280    ///
281    /// # Arguments
282    ///
283    /// * `endpoint` - The API endpoint.
284    ///
285    /// # Returns
286    ///
287    /// The response from the API as a JSON value.
288    async fn api_get_base<T: Serialize>(
289        &self,
290        endpoint: &str,
291        query_params: Option<&T>,
292    ) -> Result<serde_json::Value, reqwest::Error> {
293        let url = format!("{API_URL}/{}", endpoint);
294        let res = self
295            .client
296            .get(&url)
297            .query(&query_params)
298            .header(
299                "User-Agent",
300                format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
301            )
302            .header("Content-Type", "application/json")
303            .header("Authorization", format!("Bearer {}", self.api_key))
304            .send()
305            .await?;
306        parse_response(res).await
307    }
308
309    /// Sends a GET request to the API.
310    ///
311    /// # Arguments
312    ///
313    /// * `endpoint` - The API endpoint.
314    ///
315    /// # Returns
316    ///
317    /// The response from the API as a JSON value.
318    async fn api_get<T: Serialize>(
319        &self,
320        endpoint: &str,
321        query_params: Option<&T>,
322    ) -> Result<serde_json::Value, reqwest::Error> {
323        let fetch = || async { self.api_get_base(endpoint, query_params.to_owned()).await };
324
325        fetch
326            .retry(ExponentialBuilder::default().with_max_times(5))
327            .when(|err: &reqwest::Error| {
328                if let Some(status) = err.status() {
329                    status.is_server_error()
330                } else {
331                    err.is_timeout()
332                }
333            })
334            .await
335    }
336
337    /// Sends a DELETE request to the API.
338    ///
339    /// # Arguments
340    ///
341    /// * `endpoint` - The API endpoint.
342    /// * `params` - Optional request parameters.
343    /// * `stream` - Whether streaming is enabled.
344    /// * `content_type` - The content type of the request.
345    ///
346    /// # Returns
347    ///
348    /// The response from the API.
349    async fn api_delete_base(
350        &self,
351        endpoint: &str,
352        params: Option<HashMap<String, serde_json::Value>>,
353    ) -> Result<Response, Error> {
354        let url = format!("{API_URL}/v1/{}", endpoint);
355        let request_builder = self
356            .client
357            .delete(&url)
358            .header(
359                "User-Agent",
360                format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
361            )
362            .header("Content-Type", "application/json")
363            .header("Authorization", format!("Bearer {}", self.api_key));
364
365        let request_builder = if let Some(params) = params {
366            request_builder.json(&params)
367        } else {
368            request_builder
369        };
370
371        request_builder.send().await
372    }
373
374    /// Sends a DELETE request to the API.
375    ///
376    /// # Arguments
377    ///
378    /// * `endpoint` - The API endpoint.
379    /// * `params` - Optional request parameters.
380    /// * `stream` - Whether streaming is enabled.
381    /// * `content_type` - The content type of the request.
382    ///
383    /// # Returns
384    ///
385    /// The response from the API.
386    async fn api_delete(
387        &self,
388        endpoint: &str,
389        params: Option<HashMap<String, serde_json::Value>>,
390    ) -> Result<Response, Error> {
391        let fetch = || async { self.api_delete_base(endpoint, params.to_owned()).await };
392
393        fetch
394            .retry(ExponentialBuilder::default().with_max_times(5))
395            .when(|err: &reqwest::Error| {
396                if let Some(status) = err.status() {
397                    status.is_server_error()
398                } else {
399                    err.is_timeout()
400                }
401            })
402            .await
403    }
404
405    /// Scrapes a URL.
406    ///
407    /// # Arguments
408    ///
409    /// * `url` - The URL to scrape.
410    /// * `params` - Optional request parameters.
411    /// * `stream` - Whether streaming is enabled.
412    /// * `content_type` - The content type of the request.
413    ///
414    /// # Returns
415    ///
416    /// The response from the API as a JSON value.
417    pub async fn scrape_url(
418        &self,
419        url: &str,
420        params: Option<RequestParams>,
421        content_type: &str,
422    ) -> Result<serde_json::Value, reqwest::Error> {
423        let mut data = HashMap::new();
424
425        data.insert(
426            "url".to_string(),
427            serde_json::Value::String(url.to_string()),
428        );
429        data.insert("limit".to_string(), serde_json::Value::Number(1.into()));
430
431        if let Ok(params) = serde_json::to_value(params) {
432            if let Some(ref p) = params.as_object() {
433                data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
434            }
435        }
436
437        let res = self.api_post("crawl", data, content_type).await?;
438        parse_response(res).await
439    }
440
441    /// Crawls a URL.
442    ///
443    /// # Arguments
444    ///
445    /// * `url` - The URL to crawl.
446    /// * `params` - Optional request parameters.
447    /// * `stream` - Whether streaming is enabled.
448    /// * `content_type` - The content type of the request.
449    /// * `callback` - Optional callback function to handle each streamed chunk.
450    ///
451    /// # Returns
452    ///
453    /// The response from the API as a JSON value.
454    pub async fn crawl_url(
455        &self,
456        url: &str,
457        params: Option<RequestParams>,
458        stream: bool,
459        content_type: &str,
460        callback: Option<impl Fn(serde_json::Value) + Send>,
461    ) -> Result<serde_json::Value, reqwest::Error> {
462        use tokio_util::codec::{FramedRead, LinesCodec};
463
464        let mut data = HashMap::new();
465
466        if let Ok(params) = serde_json::to_value(params) {
467            if let Some(ref p) = params.as_object() {
468                data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
469            }
470        }
471
472        data.insert("url".into(), serde_json::Value::String(url.to_string()));
473
474        let res = self.api_post("crawl", data, content_type).await?;
475
476        if stream {
477            if let Some(callback) = callback {
478                let stream = res.bytes_stream();
479
480                let stream_reader = tokio_util::io::StreamReader::new(
481                    stream
482                        .map(|r| r.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))),
483                );
484
485                let mut lines = FramedRead::new(stream_reader, LinesCodec::new());
486
487                while let Some(line_result) = lines.next().await {
488                    match line_result {
489                        Ok(line) => match serde_json::from_str::<serde_json::Value>(&line) {
490                            Ok(value) => {
491                                callback(value);
492                            }
493                            Err(_e) => {
494                                continue;
495                            }
496                        },
497                        Err(_e) => return Ok(serde_json::Value::Null),
498                    }
499                }
500
501                Ok(serde_json::Value::Null)
502            } else {
503                Ok(serde_json::Value::Null)
504            }
505        } else {
506            parse_response(res).await
507        }
508    }
509
510    /// Fetches links from a URL.
511    ///
512    /// # Arguments
513    ///
514    /// * `url` - The URL to fetch links from.
515    /// * `params` - Optional request parameters.
516    /// * `stream` - Whether streaming is enabled.
517    /// * `content_type` - The content type of the request.
518    ///
519    /// # Returns
520    ///
521    /// The response from the API as a JSON value.
522    pub async fn links(
523        &self,
524        url: &str,
525        params: Option<RequestParams>,
526        _stream: bool,
527        content_type: &str,
528    ) -> Result<serde_json::Value, reqwest::Error> {
529        let mut data = HashMap::new();
530
531        if let Ok(params) = serde_json::to_value(params) {
532            if let Some(ref p) = params.as_object() {
533                data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
534            }
535        }
536
537        data.insert("url".into(), serde_json::Value::String(url.to_string()));
538
539        let res = self.api_post("links", data, content_type).await?;
540        parse_response(res).await
541    }
542
543    /// Takes a screenshot of a URL.
544    ///
545    /// # Arguments
546    ///
547    /// * `url` - The URL to take a screenshot of.
548    /// * `params` - Optional request parameters.
549    /// * `stream` - Whether streaming is enabled.
550    /// * `content_type` - The content type of the request.
551    ///
552    /// # Returns
553    ///
554    /// The response from the API as a JSON value.
555    pub async fn screenshot(
556        &self,
557        url: &str,
558        params: Option<RequestParams>,
559        _stream: bool,
560        content_type: &str,
561    ) -> Result<serde_json::Value, reqwest::Error> {
562        let mut data = HashMap::new();
563
564        if let Ok(params) = serde_json::to_value(params) {
565            if let Some(ref p) = params.as_object() {
566                data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
567            }
568        }
569
570        data.insert("url".into(), serde_json::Value::String(url.to_string()));
571
572        let res = self.api_post("screenshot", data, content_type).await?;
573        parse_response(res).await
574    }
575
576    /// Searches for a query.
577    ///
578    /// # Arguments
579    ///
580    /// * `q` - The query to search for.
581    /// * `params` - Optional request parameters.
582    /// * `stream` - Whether streaming is enabled.
583    /// * `content_type` - The content type of the request.
584    ///
585    /// # Returns
586    ///
587    /// The response from the API as a JSON value.
588    pub async fn search(
589        &self,
590        q: &str,
591        params: Option<SearchRequestParams>,
592        _stream: bool,
593        content_type: &str,
594    ) -> Result<serde_json::Value, reqwest::Error> {
595        let body = match params {
596            Some(mut params) => {
597                params.search = q.to_string();
598                params
599            }
600            _ => {
601                let mut params = SearchRequestParams::default();
602                params.search = q.to_string();
603                params
604            }
605        };
606
607        let res = self.api_post("search", body, content_type).await?;
608
609        parse_response(res).await
610    }
611
612    /// Transforms data.
613    ///
614    /// # Arguments
615    ///
616    /// * `data` - The data to transform.
617    /// * `params` - Optional request parameters.
618    /// * `stream` - Whether streaming is enabled.
619    /// * `content_type` - The content type of the request.
620    ///
621    /// # Returns
622    ///
623    /// The response from the API as a JSON value.
624    pub async fn transform(
625        &self,
626        data: Vec<HashMap<&str, &str>>,
627        params: Option<TransformParams>,
628        _stream: bool,
629        content_type: &str,
630    ) -> Result<serde_json::Value, reqwest::Error> {
631        let mut payload = HashMap::new();
632
633        if let Ok(params) = serde_json::to_value(params) {
634            if let Some(ref p) = params.as_object() {
635                payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
636            }
637        }
638
639        if let Ok(d) = serde_json::to_value(data) {
640            payload.insert("data".into(), d);
641        }
642
643        let res = self.api_post("transform", payload, content_type).await?;
644
645        parse_response(res).await
646    }
647
648    /// Extracts contacts from a URL.
649    ///
650    /// # Arguments
651    ///
652    /// * `url` - The URL to extract contacts from.
653    /// * `params` - Optional request parameters.
654    /// * `stream` - Whether streaming is enabled.
655    /// * `content_type` - The content type of the request.
656    ///
657    /// # Returns
658    ///
659    /// The response from the API as a JSON value.
660    pub async fn extract_contacts(
661        &self,
662        url: &str,
663        params: Option<RequestParams>,
664        _stream: bool,
665        content_type: &str,
666    ) -> Result<serde_json::Value, reqwest::Error> {
667        let mut data = HashMap::new();
668
669        if let Ok(params) = serde_json::to_value(params) {
670            if let Ok(params) = serde_json::to_value(params) {
671                if let Some(ref p) = params.as_object() {
672                    data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
673                }
674            }
675        }
676
677        match serde_json::to_value(url) {
678            Ok(u) => {
679                data.insert("url".into(), u);
680            }
681            _ => (),
682        }
683
684        let res = self
685            .api_post("pipeline/extract-contacts", data, content_type)
686            .await?;
687
688        parse_response(res).await
689    }
690
691    /// Labels data from a URL.
692    ///
693    /// # Arguments
694    ///
695    /// * `url` - The URL to label data from.
696    /// * `params` - Optional request parameters.
697    /// * `stream` - Whether streaming is enabled.
698    /// * `content_type` - The content type of the request.
699    ///
700    /// # Returns
701    ///
702    /// The response from the API as a JSON value.
703    pub async fn label(
704        &self,
705        url: &str,
706        params: Option<RequestParams>,
707        _stream: bool,
708        content_type: &str,
709    ) -> Result<serde_json::Value, reqwest::Error> {
710        let mut data = HashMap::new();
711
712        if let Ok(params) = serde_json::to_value(params) {
713            if let Ok(params) = serde_json::to_value(params) {
714                if let Some(ref p) = params.as_object() {
715                    data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
716                }
717            }
718        }
719
720        data.insert("url".into(), serde_json::Value::String(url.to_string()));
721
722        let res = self.api_post("pipeline/label", data, content_type).await?;
723        parse_response(res).await
724    }
725
726    /// Download a record from storage.
727    ///
728    /// # Arguments
729    ///
730    /// * `url` - Optional exact url of the file in storage.
731    /// * `options` - Optional options.
732    /// * `stream` - Whether streaming is enabled.
733    ///
734    /// # Returns
735    ///
736    /// The response from the API.
737    pub async fn download(
738        &self,
739        url: Option<&str>,
740        options: Option<HashMap<&str, i32>>,
741    ) -> Result<reqwest::Response, reqwest::Error> {
742        let mut params = HashMap::new();
743
744        if let Some(url) = url {
745            params.insert("url".to_string(), url.to_string());
746        }
747
748        if let Some(options) = options {
749            for (key, value) in options {
750                params.insert(key.to_string(), value.to_string());
751            }
752        }
753
754        let url = format!("{API_URL}/v1/data/download");
755        let request = self
756            .client
757            .get(&url)
758            .header(
759                "User-Agent",
760                format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
761            )
762            .header("Content-Type", "application/octet-stream")
763            .header("Authorization", format!("Bearer {}", self.api_key))
764            .query(&params);
765
766        let res = request.send().await?;
767
768        Ok(res)
769    }
770
771    /// Creates a signed URL of a file from storage.
772    ///
773    /// # Arguments
774    ///
775    /// * `url` - Optional exact url of the file in storage.
776    /// * `options` - Optional options.
777    /// * `stream` - Whether streaming is enabled.
778    ///
779    /// # Returns
780    ///
781    /// The response from the API.
782    pub async fn create_signed_url(
783        &self,
784        url: Option<&str>,
785        options: Option<HashMap<&str, i32>>,
786    ) -> Result<serde_json::Value, reqwest::Error> {
787        let mut params = HashMap::new();
788
789        if let Some(options) = options {
790            for (key, value) in options {
791                params.insert(key.to_string(), value.to_string());
792            }
793        }
794
795        if let Some(url) = url {
796            params.insert("url".to_string(), url.to_string());
797        }
798
799        let url = format!("{API_URL}/v1/data/sign-url");
800        let request = self
801            .client
802            .get(&url)
803            .header(
804                "User-Agent",
805                format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
806            )
807            .header("Authorization", format!("Bearer {}", self.api_key))
808            .query(&params);
809
810        let res = request.send().await?;
811
812        parse_response(res).await
813    }
814
815    /// Gets the crawl state of a URL.
816    ///
817    /// # Arguments
818    ///
819    /// * `url` - The URL to get the crawl state of.
820    /// * `params` - Optional request parameters.
821    /// * `stream` - Whether streaming is enabled.
822    /// * `content_type` - The content type of the request.
823    ///
824    /// # Returns
825    ///
826    pub async fn get_crawl_state(
827        &self,
828        url: &str,
829        params: Option<RequestParams>,
830        content_type: &str,
831    ) -> Result<serde_json::Value, reqwest::Error> {
832        let mut payload = HashMap::new();
833        payload.insert("url".into(), serde_json::Value::String(url.to_string()));
834        payload.insert(
835            "contentType".into(),
836            serde_json::Value::String(content_type.to_string()),
837        );
838
839        if let Ok(params) = serde_json::to_value(params) {
840            if let Ok(params) = serde_json::to_value(params) {
841                if let Some(ref p) = params.as_object() {
842                    payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
843                }
844            }
845        }
846
847        let res = self
848            .api_post("data/crawl_state", payload, content_type)
849            .await?;
850        parse_response(res).await
851    }
852
853    /// Get the account credits left.
854    pub async fn get_credits(&self) -> Result<serde_json::Value, reqwest::Error> {
855        self.api_get::<serde_json::Value>("data/credits", None)
856            .await
857    }
858
859    /// Send a request for a data record.
860    pub async fn data_post(
861        &self,
862        table: &str,
863        data: Option<RequestParams>,
864    ) -> Result<serde_json::Value, reqwest::Error> {
865        let res = self
866            .api_post(&format!("data/{}", table), data, "application/json")
867            .await?;
868        parse_response(res).await
869    }
870
871    /// Query a record from the global DB.
872    pub async fn query(&self, params: &QueryRequest) -> Result<serde_json::Value, reqwest::Error> {
873        let res = self
874            .api_get::<QueryRequest>(&"data/query", Some(params))
875            .await?;
876
877        Ok(res)
878    }
879
880    /// Get a table record.
881    pub async fn data_get(
882        &self,
883        table: &str,
884        params: Option<RequestParams>,
885    ) -> Result<serde_json::Value, reqwest::Error> {
886        let mut payload = HashMap::new();
887
888        if let Some(params) = params {
889            if let Ok(p) = serde_json::to_value(params) {
890                if let Some(o) = p.as_object() {
891                    payload.extend(o.iter().map(|(k, v)| (k.as_str(), v.clone())));
892                }
893            }
894        }
895
896        let res = self
897            .api_get::<serde_json::Value>(&format!("data/{}", table), None)
898            .await?;
899        Ok(res)
900    }
901
902    /// Delete a record.
903    pub async fn data_delete(
904        &self,
905        table: &str,
906        params: Option<RequestParams>,
907    ) -> Result<serde_json::Value, reqwest::Error> {
908        let mut payload = HashMap::new();
909
910        if let Ok(params) = serde_json::to_value(params) {
911            if let Ok(params) = serde_json::to_value(params) {
912                if let Some(ref p) = params.as_object() {
913                    payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
914                }
915            }
916        }
917
918        let res = self
919            .api_delete(&format!("data/{}", table), Some(payload))
920            .await?;
921        parse_response(res).await
922    }
923}
924
925#[cfg(test)]
926mod tests {
927    use super::*;
928    use dotenv::dotenv;
929    use lazy_static::lazy_static;
930    use reqwest::ClientBuilder;
931
932    lazy_static! {
933        static ref SPIDER_CLIENT: Spider = {
934            dotenv().ok();
935            let client = ClientBuilder::new();
936            let client = client.user_agent("SpiderBot").build().unwrap();
937
938            Spider::new_with_client(None, client).expect("client to build")
939        };
940    }
941
942    #[tokio::test]
943    #[ignore]
944    async fn test_scrape_url() {
945        let response = SPIDER_CLIENT
946            .scrape_url("https://example.com", None, "application/json")
947            .await;
948        assert!(response.is_ok());
949    }
950
951    #[tokio::test]
952    async fn test_crawl_url() {
953        let response = SPIDER_CLIENT
954            .crawl_url(
955                "https://example.com",
956                None,
957                false,
958                "application/json",
959                None::<fn(serde_json::Value)>,
960            )
961            .await;
962        assert!(response.is_ok());
963    }
964
965    #[tokio::test]
966    #[ignore]
967    async fn test_links() {
968        let response: Result<serde_json::Value, Error> = SPIDER_CLIENT
969            .links("https://example.com", None, false, "application/json")
970            .await;
971        assert!(response.is_ok());
972    }
973
974    #[tokio::test]
975    #[ignore]
976    async fn test_screenshot() {
977        let mut params = RequestParams::default();
978        params.limit = Some(1);
979
980        let response = SPIDER_CLIENT
981            .screenshot(
982                "https://example.com",
983                Some(params),
984                false,
985                "application/json",
986            )
987            .await;
988        assert!(response.is_ok());
989    }
990
991    // #[tokio::test(flavor = "multi_thread")]
992    // async fn test_search() {
993    //     let mut params = SearchRequestParams::default();
994
995    //     params.search_limit = Some(1);
996    //     params.num = Some(1);
997    //     params.fetch_page_content = Some(false);
998
999    //     let response = SPIDER_CLIENT
1000    //         .search("a sports website", Some(params), false, "application/json")
1001    //         .await;
1002
1003    //     assert!(response.is_ok());
1004    // }
1005
1006    #[tokio::test]
1007    #[ignore]
1008    async fn test_transform() {
1009        let data = vec![HashMap::from([(
1010            "<html><body><h1>Transformation</h1></body></html>".into(),
1011            "".into(),
1012        )])];
1013        let response = SPIDER_CLIENT
1014            .transform(data, None, false, "application/json")
1015            .await;
1016        assert!(response.is_ok());
1017    }
1018
1019    #[tokio::test]
1020    #[ignore]
1021    async fn test_extract_contacts() {
1022        let response = SPIDER_CLIENT
1023            .extract_contacts("https://example.com", None, false, "application/json")
1024            .await;
1025        assert!(response.is_ok());
1026    }
1027
1028    #[tokio::test]
1029    #[ignore]
1030    async fn test_label() {
1031        let response = SPIDER_CLIENT
1032            .label("https://example.com", None, false, "application/json")
1033            .await;
1034        assert!(response.is_ok());
1035    }
1036
1037    #[tokio::test]
1038    async fn test_create_signed_url() {
1039        let response = SPIDER_CLIENT
1040            .create_signed_url(Some("example.com"), None)
1041            .await;
1042        assert!(response.is_ok());
1043    }
1044
1045    #[tokio::test]
1046    async fn test_get_crawl_state() {
1047        let response = SPIDER_CLIENT
1048            .get_crawl_state("https://example.com", None, "application/json")
1049            .await;
1050        assert!(response.is_ok());
1051    }
1052
1053    #[tokio::test]
1054    async fn test_query() {
1055        let mut query = QueryRequest::default();
1056
1057        query.domain = Some("spider.cloud".into());
1058
1059        let response = SPIDER_CLIENT.query(&query).await;
1060        assert!(response.is_ok());
1061    }
1062
1063    #[tokio::test]
1064    async fn test_get_credits() {
1065        let response = SPIDER_CLIENT.get_credits().await;
1066        assert!(response.is_ok());
1067    }
1068}