spider_client/
lib.rs

1//! The `spider-client` module provides the primary interface and
2//! functionalities for the Spider web crawler library, which is
3//! designed for rapid and efficient crawling of web pages to gather
4//! links using isolated contexts.
5//!
6//! ### Features
7//!
8//! - **Multi-threaded Crawling:** Spider can utilize multiple
9//!   threads to parallelize the crawling process, drastically
10//!   improving performance and allowing the ability to gather
11//!   millions of pages in a short time.
12//!
13//! - **Configurable:** The library provides various options to
14//!   configure the crawling behavior, such as setting the depth
15//!   of crawling, user-agent strings, delays between requests,
16//!   and more.
17//!
18//! - **Link Gathering:** One of the primary objectives of Spider is to
19//!   gather and manage links from the web pages it crawls,
20//!   compiling them into a structured format for further use.
21//!
22//! ### Examples
23//!
24//! Basic usage of the Spider client might look like this:
25//!
26//! ```rust
27//! use spider_client::{Spider, RequestType, RequestParams};
28//! use tokio;
29//!
30//!  # #[ignore]
31//! #[tokio::main]
32//! async fn main() {
33//!     let spider = Spider::new(Some("myspiderapikey".into())).expect("API key must be provided");
34//!
35//!     let url = "https://spider.cloud";
36//!
37//!     // Scrape a single URL
38//!     let scraped_data = spider.scrape_url(url, None, "application/json").await.expect("Failed to scrape the URL");
39//!
40//!     println!("Scraped Data: {:?}", scraped_data);
41//!
42//!     // Crawl a website
43//!     let crawler_params = RequestParams {
44//!         limit: Some(1),
45//!         proxy_enabled: Some(true),
46//!         store_data: Some(false),
47//!         metadata: Some(false),
48//!         request: Some(RequestType::Http),
49//!         ..Default::default()
50//!     };
51//!
52//!     let crawl_result = spider.crawl_url(url, Some(crawler_params), false, "application/json", None::<fn(serde_json::Value)>).await.expect("Failed to crawl the URL");
53//!
54//!     println!("Crawl Result: {:?}", crawl_result);
55//! }
56//! ```
57//!
58//! ### Modules
59//!
60//! - `config`: Contains the configuration options for the Spider client.
61//! - `utils`: Utility functions used by the Spider client.
62//!
63
64pub mod shapes;
65
66use backon::ExponentialBuilder;
67use backon::Retryable;
68use reqwest::Client;
69use reqwest::{Error, Response};
70use serde::Serialize;
71use std::collections::HashMap;
72use tokio_stream::StreamExt;
73pub use shapes::{request::*, response::*};
74use std::sync::OnceLock;
75
76static API_URL: OnceLock<String> = OnceLock::new();
77
78/// The API endpoint.
79pub fn get_api_url() -> &'static str {
80    API_URL.get_or_init(|| {
81        std::env::var("SPIDER_API_URL").unwrap_or_else(|_| "https://api.spider.cloud".to_string())
82    })
83}
84
85/// Represents a Spider with API key and HTTP client.
86#[derive(Debug, Default)]
87pub struct Spider {
88    /// The Spider API key.
89    pub api_key: String,
90    /// The Spider Client to re-use.
91    pub client: Client,
92}
93
94/// Handle the json response.
95pub async fn handle_json(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
96    res.json().await
97}
98
99/// Handle the jsonl response.
100pub async fn handle_jsonl(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
101    let text = res.text().await?;
102    let lines = text
103        .lines()
104        .filter_map(|line| serde_json::from_str::<serde_json::Value>(line).ok())
105        .collect::<Vec<_>>();
106    Ok(serde_json::Value::Array(lines))
107}
108
109/// Handle the CSV response.
110#[cfg(feature = "csv")]
111pub async fn handle_csv(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
112    use std::collections::HashMap;
113    let text = res.text().await?;
114    let mut rdr = csv::Reader::from_reader(text.as_bytes());
115    let records: Vec<HashMap<String, String>> = rdr.deserialize().filter_map(Result::ok).collect();
116
117    if let Ok(record) = serde_json::to_value(records) {
118        Ok(record)
119    } else {
120        Ok(serde_json::Value::String(text))
121    }
122}
123
124#[cfg(not(feature = "csv"))]
125pub async fn handle_csv(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
126    handle_text(res).await
127}
128
129/// Basic handle response to text
130pub async fn handle_text(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
131    Ok(serde_json::Value::String(
132        res.text().await.unwrap_or_default(),
133    ))
134}
135
136/// Handle the XML response.
137#[cfg(feature = "csv")]
138pub async fn handle_xml(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
139    let text = res.text().await?;
140    match quick_xml::de::from_str::<serde_json::Value>(&text) {
141        Ok(val) => Ok(val),
142        Err(_) => Ok(serde_json::Value::String(text)),
143    }
144}
145
146#[cfg(not(feature = "csv"))]
147/// Handle the XML response.
148pub async fn handle_xml(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
149    handle_text(res).await
150}
151
152pub async fn parse_response(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
153    let content_type = res
154        .headers()
155        .get(reqwest::header::CONTENT_TYPE)
156        .and_then(|v| v.to_str().ok())
157        .unwrap_or_default()
158        .to_ascii_lowercase();
159
160    if content_type.contains("json") && !content_type.contains("jsonl") {
161        handle_json(res).await
162    } else if content_type.contains("jsonl") || content_type.contains("ndjson") {
163        handle_jsonl(res).await
164    } else if content_type.contains("csv") {
165        handle_csv(res).await
166    } else if content_type.contains("xml") {
167        handle_xml(res).await
168    } else {
169        handle_text(res).await
170    }
171}
172
173impl Spider {
174    /// Creates a new instance of Spider.
175    ///
176    /// # Arguments
177    ///
178    /// * `api_key` - An optional API key. Defaults to using the 'SPIDER_API_KEY' env variable.
179    ///
180    /// # Returns
181    ///
182    /// A new instance of Spider or an error string if no API key is provided.
183    pub fn new(api_key: Option<String>) -> Result<Self, &'static str> {
184        let api_key = api_key.or_else(|| std::env::var("SPIDER_API_KEY").ok());
185
186        match api_key {
187            Some(key) => Ok(Self {
188                api_key: key,
189                client: Client::new(),
190            }),
191            None => Err("No API key provided"),
192        }
193    }
194
195    /// Creates a new instance of Spider.
196    ///
197    /// # Arguments
198    ///
199    /// * `api_key` - An optional API key. Defaults to using the 'SPIDER_API_KEY' env variable.
200    /// * `client` - A custom client to pass in.
201    ///
202    /// # Returns
203    ///
204    /// A new instance of Spider or an error string if no API key is provided.
205    pub fn new_with_client(api_key: Option<String>, client: Client) -> Result<Self, &'static str> {
206        let api_key = api_key.or_else(|| std::env::var("SPIDER_API_KEY").ok());
207
208        match api_key {
209            Some(key) => Ok(Self {
210                api_key: key,
211                client,
212            }),
213            None => Err("No API key provided"),
214        }
215    }
216
217    /// Sends a POST request to the API.
218    ///
219    /// # Arguments
220    ///
221    /// * `endpoint` - The API endpoint.
222    /// * `data` - The request data as a HashMap.
223    /// * `stream` - Whether streaming is enabled.
224    /// * `content_type` - The content type of the request.
225    ///
226    /// # Returns
227    ///
228    /// The response from the API.
229    async fn api_post_base(
230        &self,
231        endpoint: &str,
232        data: impl Serialize + Sized + std::fmt::Debug,
233        content_type: &str,
234    ) -> Result<Response, Error> {
235        let url: String = format!("{}/{}", get_api_url(), endpoint);
236
237        self.client
238            .post(&url)
239            .header(
240                "User-Agent",
241                format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
242            )
243            .header("Content-Type", content_type)
244            .header("Authorization", format!("Bearer {}", self.api_key))
245            .json(&data)
246            .send()
247            .await
248    }
249
250    /// Sends a POST request to the API.
251    ///
252    /// # Arguments
253    ///
254    /// * `endpoint` - The API endpoint.
255    /// * `data` - The request data as a HashMap.
256    /// * `stream` - Whether streaming is enabled.
257    /// * `content_type` - The content type of the request.
258    ///
259    /// # Returns
260    ///
261    /// The response from the API.
262    async fn api_post(
263        &self,
264        endpoint: &str,
265        data: impl Serialize + std::fmt::Debug + Clone + Send + Sync,
266        content_type: &str,
267    ) -> Result<Response, Error> {
268        let fetch = || async {
269            self.api_post_base(endpoint, data.to_owned(), content_type)
270                .await
271        };
272
273        fetch
274            .retry(ExponentialBuilder::default().with_max_times(5))
275            .when(|err: &reqwest::Error| {
276                if let Some(status) = err.status() {
277                    status.is_server_error()
278                } else {
279                    err.is_timeout()
280                }
281            })
282            .await
283    }
284
285    /// Sends a GET request to the API.
286    ///
287    /// # Arguments
288    ///
289    /// * `endpoint` - The API endpoint.
290    ///
291    /// # Returns
292    ///
293    /// The response from the API as a JSON value.
294    async fn api_get_base<T: Serialize>(
295        &self,
296        endpoint: &str,
297        query_params: Option<&T>,
298    ) -> Result<serde_json::Value, reqwest::Error> {
299        let url = format!("{}/{}", get_api_url(), endpoint);
300        let res = self
301            .client
302            .get(&url)
303            .query(&query_params)
304            .header(
305                "User-Agent",
306                format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
307            )
308            .header("Content-Type", "application/json")
309            .header("Authorization", format!("Bearer {}", self.api_key))
310            .send()
311            .await?;
312        parse_response(res).await
313    }
314
315    /// Sends a GET request to the API.
316    ///
317    /// # Arguments
318    ///
319    /// * `endpoint` - The API endpoint.
320    ///
321    /// # Returns
322    ///
323    /// The response from the API as a JSON value.
324    async fn api_get<T: Serialize>(
325        &self,
326        endpoint: &str,
327        query_params: Option<&T>,
328    ) -> Result<serde_json::Value, reqwest::Error> {
329        let fetch = || async { self.api_get_base(endpoint, query_params.to_owned()).await };
330
331        fetch
332            .retry(ExponentialBuilder::default().with_max_times(5))
333            .when(|err: &reqwest::Error| {
334                if let Some(status) = err.status() {
335                    status.is_server_error()
336                } else {
337                    err.is_timeout()
338                }
339            })
340            .await
341    }
342
343    /// Sends a DELETE request to the API.
344    ///
345    /// # Arguments
346    ///
347    /// * `endpoint` - The API endpoint.
348    /// * `params` - Optional request parameters.
349    /// * `stream` - Whether streaming is enabled.
350    /// * `content_type` - The content type of the request.
351    ///
352    /// # Returns
353    ///
354    /// The response from the API.
355    async fn api_delete_base(
356        &self,
357        endpoint: &str,
358        params: Option<HashMap<String, serde_json::Value>>,
359    ) -> Result<Response, Error> {
360        let url = format!("{}/v1/{}", get_api_url(), endpoint);
361        let request_builder = self
362            .client
363            .delete(&url)
364            .header(
365                "User-Agent",
366                format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
367            )
368            .header("Content-Type", "application/json")
369            .header("Authorization", format!("Bearer {}", self.api_key));
370
371        let request_builder = if let Some(params) = params {
372            request_builder.json(&params)
373        } else {
374            request_builder
375        };
376
377        request_builder.send().await
378    }
379
380    /// Sends a DELETE request to the API.
381    ///
382    /// # Arguments
383    ///
384    /// * `endpoint` - The API endpoint.
385    /// * `params` - Optional request parameters.
386    /// * `stream` - Whether streaming is enabled.
387    /// * `content_type` - The content type of the request.
388    ///
389    /// # Returns
390    ///
391    /// The response from the API.
392    async fn api_delete(
393        &self,
394        endpoint: &str,
395        params: Option<HashMap<String, serde_json::Value>>,
396    ) -> Result<Response, Error> {
397        let fetch = || async { self.api_delete_base(endpoint, params.to_owned()).await };
398
399        fetch
400            .retry(ExponentialBuilder::default().with_max_times(5))
401            .when(|err: &reqwest::Error| {
402                if let Some(status) = err.status() {
403                    status.is_server_error()
404                } else {
405                    err.is_timeout()
406                }
407            })
408            .await
409    }
410
411    /// Scrapes a URL.
412    ///
413    /// # Arguments
414    ///
415    /// * `url` - The URL to scrape.
416    /// * `params` - Optional request parameters.
417    /// * `stream` - Whether streaming is enabled.
418    /// * `content_type` - The content type of the request.
419    ///
420    /// # Returns
421    ///
422    /// The response from the API as a JSON value.
423    pub async fn scrape_url(
424        &self,
425        url: &str,
426        params: Option<RequestParams>,
427        content_type: &str,
428    ) -> Result<serde_json::Value, reqwest::Error> {
429        let mut data = HashMap::new();
430
431        if let Ok(params) = serde_json::to_value(params) {
432            if let Some(ref p) = params.as_object() {
433                data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
434            }
435        }
436
437        if !url.is_empty() {
438            data.insert(
439                "url".to_string(),
440                serde_json::Value::String(url.to_string()),
441            );
442        }
443
444        data.insert("limit".to_string(), serde_json::Value::Number(1.into()));
445
446        let res = self.api_post("crawl", data, content_type).await?;
447        parse_response(res).await
448    }
449
450    /// Crawls a URL.
451    ///
452    /// # Arguments
453    ///
454    /// * `url` - The URL to crawl.
455    /// * `params` - Optional request parameters.
456    /// * `stream` - Whether streaming is enabled.
457    /// * `content_type` - The content type of the request.
458    /// * `callback` - Optional callback function to handle each streamed chunk.
459    ///
460    /// # Returns
461    ///
462    /// The response from the API as a JSON value.
463    pub async fn crawl_url(
464        &self,
465        url: &str,
466        params: Option<RequestParams>,
467        stream: bool,
468        content_type: &str,
469        callback: Option<impl Fn(serde_json::Value) + Send>,
470    ) -> Result<serde_json::Value, reqwest::Error> {
471        use tokio_util::codec::{FramedRead, LinesCodec};
472
473        let mut data = HashMap::new();
474
475        if let Ok(params) = serde_json::to_value(params) {
476            if let Some(ref p) = params.as_object() {
477                data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
478            }
479        }
480
481        data.insert("url".into(), serde_json::Value::String(url.to_string()));
482
483        let res = self.api_post("crawl", data, content_type).await?;
484
485        if stream {
486            if let Some(callback) = callback {
487                let stream = res.bytes_stream();
488
489                let stream_reader = tokio_util::io::StreamReader::new(
490                    stream
491                        .map(|r| r.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))),
492                );
493
494                let mut lines = FramedRead::new(stream_reader, LinesCodec::new());
495
496                while let Some(line_result) = lines.next().await {
497                    match line_result {
498                        Ok(line) => match serde_json::from_str::<serde_json::Value>(&line) {
499                            Ok(value) => {
500                                callback(value);
501                            }
502                            Err(_e) => {
503                                continue;
504                            }
505                        },
506                        Err(_e) => return Ok(serde_json::Value::Null),
507                    }
508                }
509
510                Ok(serde_json::Value::Null)
511            } else {
512                Ok(serde_json::Value::Null)
513            }
514        } else {
515            parse_response(res).await
516        }
517    }
518
519    /// Fetches links from a URL.
520    ///
521    /// # Arguments
522    ///
523    /// * `url` - The URL to fetch links from.
524    /// * `params` - Optional request parameters.
525    /// * `stream` - Whether streaming is enabled.
526    /// * `content_type` - The content type of the request.
527    ///
528    /// # Returns
529    ///
530    /// The response from the API as a JSON value.
531    pub async fn links(
532        &self,
533        url: &str,
534        params: Option<RequestParams>,
535        _stream: bool,
536        content_type: &str,
537    ) -> Result<serde_json::Value, reqwest::Error> {
538        let mut data = HashMap::new();
539
540        if let Ok(params) = serde_json::to_value(params) {
541            if let Some(ref p) = params.as_object() {
542                data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
543            }
544        }
545
546        data.insert("url".into(), serde_json::Value::String(url.to_string()));
547
548        let res = self.api_post("links", data, content_type).await?;
549        parse_response(res).await
550    }
551
552    /// Takes a screenshot of a URL.
553    ///
554    /// # Arguments
555    ///
556    /// * `url` - The URL to take a screenshot of.
557    /// * `params` - Optional request parameters.
558    /// * `stream` - Whether streaming is enabled.
559    /// * `content_type` - The content type of the request.
560    ///
561    /// # Returns
562    ///
563    /// The response from the API as a JSON value.
564    pub async fn screenshot(
565        &self,
566        url: &str,
567        params: Option<RequestParams>,
568        _stream: bool,
569        content_type: &str,
570    ) -> Result<serde_json::Value, reqwest::Error> {
571        let mut data = HashMap::new();
572
573        if let Ok(params) = serde_json::to_value(params) {
574            if let Some(ref p) = params.as_object() {
575                data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
576            }
577        }
578
579        data.insert("url".into(), serde_json::Value::String(url.to_string()));
580
581        let res = self.api_post("screenshot", data, content_type).await?;
582        parse_response(res).await
583    }
584
585    /// Searches for a query.
586    ///
587    /// # Arguments
588    ///
589    /// * `q` - The query to search for.
590    /// * `params` - Optional request parameters.
591    /// * `stream` - Whether streaming is enabled.
592    /// * `content_type` - The content type of the request.
593    ///
594    /// # Returns
595    ///
596    /// The response from the API as a JSON value.
597    pub async fn search(
598        &self,
599        q: &str,
600        params: Option<SearchRequestParams>,
601        _stream: bool,
602        content_type: &str,
603    ) -> Result<serde_json::Value, reqwest::Error> {
604        let body = match params {
605            Some(mut params) => {
606                params.search = q.to_string();
607                params
608            }
609            _ => {
610                let mut params = SearchRequestParams::default();
611                params.search = q.to_string();
612                params
613            }
614        };
615
616        let res = self.api_post("search", body, content_type).await?;
617
618        parse_response(res).await
619    }
620
621    /// Transforms data.
622    ///
623    /// # Arguments
624    ///
625    /// * `data` - The data to transform.
626    /// * `params` - Optional request parameters.
627    /// * `stream` - Whether streaming is enabled.
628    /// * `content_type` - The content type of the request.
629    ///
630    /// # Returns
631    ///
632    /// The response from the API as a JSON value.
633    pub async fn transform(
634        &self,
635        data: Vec<HashMap<&str, &str>>,
636        params: Option<TransformParams>,
637        _stream: bool,
638        content_type: &str,
639    ) -> Result<serde_json::Value, reqwest::Error> {
640        let mut payload = HashMap::new();
641
642        if let Ok(params) = serde_json::to_value(params) {
643            if let Some(ref p) = params.as_object() {
644                payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
645            }
646        }
647
648        if let Ok(d) = serde_json::to_value(data) {
649            payload.insert("data".into(), d);
650        }
651
652        let res = self.api_post("transform", payload, content_type).await?;
653
654        parse_response(res).await
655    }
656
657    /// Extracts contacts from a URL.
658    ///
659    /// # Arguments
660    ///
661    /// * `url` - The URL to extract contacts from.
662    /// * `params` - Optional request parameters.
663    /// * `stream` - Whether streaming is enabled.
664    /// * `content_type` - The content type of the request.
665    ///
666    /// # Returns
667    ///
668    /// The response from the API as a JSON value.
669    pub async fn extract_contacts(
670        &self,
671        url: &str,
672        params: Option<RequestParams>,
673        _stream: bool,
674        content_type: &str,
675    ) -> Result<serde_json::Value, reqwest::Error> {
676        let mut data = HashMap::new();
677
678        if let Ok(params) = serde_json::to_value(params) {
679            if let Ok(params) = serde_json::to_value(params) {
680                if let Some(ref p) = params.as_object() {
681                    data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
682                }
683            }
684        }
685
686        match serde_json::to_value(url) {
687            Ok(u) => {
688                data.insert("url".into(), u);
689            }
690            _ => (),
691        }
692
693        let res = self
694            .api_post("pipeline/extract-contacts", data, content_type)
695            .await?;
696
697        parse_response(res).await
698    }
699
700    /// Labels data from a URL.
701    ///
702    /// # Arguments
703    ///
704    /// * `url` - The URL to label data from.
705    /// * `params` - Optional request parameters.
706    /// * `stream` - Whether streaming is enabled.
707    /// * `content_type` - The content type of the request.
708    ///
709    /// # Returns
710    ///
711    /// The response from the API as a JSON value.
712    pub async fn label(
713        &self,
714        url: &str,
715        params: Option<RequestParams>,
716        _stream: bool,
717        content_type: &str,
718    ) -> Result<serde_json::Value, reqwest::Error> {
719        let mut data = HashMap::new();
720
721        if let Ok(params) = serde_json::to_value(params) {
722            if let Ok(params) = serde_json::to_value(params) {
723                if let Some(ref p) = params.as_object() {
724                    data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
725                }
726            }
727        }
728
729        data.insert("url".into(), serde_json::Value::String(url.to_string()));
730
731        let res = self.api_post("pipeline/label", data, content_type).await?;
732        parse_response(res).await
733    }
734
735    /// Download a record from storage.
736    ///
737    /// # Arguments
738    ///
739    /// * `url` - Optional exact url of the file in storage.
740    /// * `options` - Optional options.
741    /// * `stream` - Whether streaming is enabled.
742    ///
743    /// # Returns
744    ///
745    /// The response from the API.
746    pub async fn download(
747        &self,
748        url: Option<&str>,
749        options: Option<HashMap<&str, i32>>,
750    ) -> Result<reqwest::Response, reqwest::Error> {
751        let mut params = HashMap::new();
752
753        if let Some(url) = url {
754            params.insert("url".to_string(), url.to_string());
755        }
756
757        if let Some(options) = options {
758            for (key, value) in options {
759                params.insert(key.to_string(), value.to_string());
760            }
761        }
762
763        let url = format!("{}/v1/data/download", get_api_url());
764        let request = self
765            .client
766            .get(&url)
767            .header(
768                "User-Agent",
769                format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
770            )
771            .header("Content-Type", "application/octet-stream")
772            .header("Authorization", format!("Bearer {}", self.api_key))
773            .query(&params);
774
775        let res = request.send().await?;
776
777        Ok(res)
778    }
779
780    /// Creates a signed URL of a file from storage.
781    ///
782    /// # Arguments
783    ///
784    /// * `url` - Optional exact url of the file in storage.
785    /// * `options` - Optional options.
786    /// * `stream` - Whether streaming is enabled.
787    ///
788    /// # Returns
789    ///
790    /// The response from the API.
791    pub async fn create_signed_url(
792        &self,
793        url: Option<&str>,
794        options: Option<HashMap<&str, i32>>,
795    ) -> Result<serde_json::Value, reqwest::Error> {
796        let mut params = HashMap::new();
797
798        if let Some(options) = options {
799            for (key, value) in options {
800                params.insert(key.to_string(), value.to_string());
801            }
802        }
803
804        if let Some(url) = url {
805            params.insert("url".to_string(), url.to_string());
806        }
807
808        let url = format!("{}/v1/data/sign-url", get_api_url());
809        let request = self
810            .client
811            .get(&url)
812            .header(
813                "User-Agent",
814                format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
815            )
816            .header("Authorization", format!("Bearer {}", self.api_key))
817            .query(&params);
818
819        let res = request.send().await?;
820
821        parse_response(res).await
822    }
823
824    /// Gets the crawl state of a URL.
825    ///
826    /// # Arguments
827    ///
828    /// * `url` - The URL to get the crawl state of.
829    /// * `params` - Optional request parameters.
830    /// * `stream` - Whether streaming is enabled.
831    /// * `content_type` - The content type of the request.
832    ///
833    /// # Returns
834    ///
835    pub async fn get_crawl_state(
836        &self,
837        url: &str,
838        params: Option<RequestParams>,
839        content_type: &str,
840    ) -> Result<serde_json::Value, reqwest::Error> {
841        let mut payload = HashMap::new();
842        payload.insert("url".into(), serde_json::Value::String(url.to_string()));
843        payload.insert(
844            "contentType".into(),
845            serde_json::Value::String(content_type.to_string()),
846        );
847
848        if let Ok(params) = serde_json::to_value(params) {
849            if let Ok(params) = serde_json::to_value(params) {
850                if let Some(ref p) = params.as_object() {
851                    payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
852                }
853            }
854        }
855
856        let res = self
857            .api_post("data/crawl_state", payload, content_type)
858            .await?;
859        parse_response(res).await
860    }
861
862    /// Get the account credits left.
863    pub async fn get_credits(&self) -> Result<serde_json::Value, reqwest::Error> {
864        self.api_get::<serde_json::Value>("data/credits", None)
865            .await
866    }
867
868    /// Send a request for a data record.
869    pub async fn data_post(
870        &self,
871        table: &str,
872        data: Option<RequestParams>,
873    ) -> Result<serde_json::Value, reqwest::Error> {
874        let res = self
875            .api_post(&format!("data/{}", table), data, "application/json")
876            .await?;
877        parse_response(res).await
878    }
879
880    /// Query a record from the global DB.
881    pub async fn query(&self, params: &QueryRequest) -> Result<serde_json::Value, reqwest::Error> {
882        let res = self
883            .api_get::<QueryRequest>(&"data/query", Some(params))
884            .await?;
885
886        Ok(res)
887    }
888
889    /// Get a table record.
890    pub async fn data_get(
891        &self,
892        table: &str,
893        params: Option<RequestParams>,
894    ) -> Result<serde_json::Value, reqwest::Error> {
895        let mut payload = HashMap::new();
896
897        if let Some(params) = params {
898            if let Ok(p) = serde_json::to_value(params) {
899                if let Some(o) = p.as_object() {
900                    payload.extend(o.iter().map(|(k, v)| (k.as_str(), v.clone())));
901                }
902            }
903        }
904
905        let res = self
906            .api_get::<serde_json::Value>(&format!("data/{}", table), None)
907            .await?;
908        Ok(res)
909    }
910
911    /// Delete a record.
912    pub async fn data_delete(
913        &self,
914        table: &str,
915        params: Option<RequestParams>,
916    ) -> Result<serde_json::Value, reqwest::Error> {
917        let mut payload = HashMap::new();
918
919        if let Ok(params) = serde_json::to_value(params) {
920            if let Ok(params) = serde_json::to_value(params) {
921                if let Some(ref p) = params.as_object() {
922                    payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
923                }
924            }
925        }
926
927        let res = self
928            .api_delete(&format!("data/{}", table), Some(payload))
929            .await?;
930        parse_response(res).await
931    }
932}
933
934#[cfg(test)]
935mod tests {
936    use super::*;
937    use dotenv::dotenv;
938    use lazy_static::lazy_static;
939    use reqwest::ClientBuilder;
940
941    lazy_static! {
942        static ref SPIDER_CLIENT: Spider = {
943            dotenv().ok();
944            let client = ClientBuilder::new();
945            let client = client.user_agent("SpiderBot").build().unwrap();
946
947            Spider::new_with_client(None, client).expect("client to build")
948        };
949    }
950
951    #[tokio::test]
952    #[ignore]
953    async fn test_scrape_url() {
954        let response = SPIDER_CLIENT
955            .scrape_url("https://example.com", None, "application/json")
956            .await;
957        assert!(response.is_ok());
958    }
959
960    #[tokio::test]
961    async fn test_crawl_url() {
962        let response = SPIDER_CLIENT
963            .crawl_url(
964                "https://example.com",
965                None,
966                false,
967                "application/json",
968                None::<fn(serde_json::Value)>,
969            )
970            .await;
971        assert!(response.is_ok());
972    }
973
974    #[tokio::test]
975    #[ignore]
976    async fn test_links() {
977        let response: Result<serde_json::Value, Error> = SPIDER_CLIENT
978            .links("https://example.com", None, false, "application/json")
979            .await;
980        assert!(response.is_ok());
981    }
982
983    #[tokio::test]
984    #[ignore]
985    async fn test_screenshot() {
986        let mut params = RequestParams::default();
987        params.limit = Some(1);
988
989        let response = SPIDER_CLIENT
990            .screenshot(
991                "https://example.com",
992                Some(params),
993                false,
994                "application/json",
995            )
996            .await;
997        assert!(response.is_ok());
998    }
999
1000    // #[tokio::test(flavor = "multi_thread")]
1001    // async fn test_search() {
1002    //     let mut params = SearchRequestParams::default();
1003
1004    //     params.search_limit = Some(1);
1005    //     params.num = Some(1);
1006    //     params.fetch_page_content = Some(false);
1007
1008    //     let response = SPIDER_CLIENT
1009    //         .search("a sports website", Some(params), false, "application/json")
1010    //         .await;
1011
1012    //     assert!(response.is_ok());
1013    // }
1014
1015    #[tokio::test]
1016    #[ignore]
1017    async fn test_transform() {
1018        let data = vec![HashMap::from([(
1019            "<html><body><h1>Transformation</h1></body></html>".into(),
1020            "".into(),
1021        )])];
1022        let response = SPIDER_CLIENT
1023            .transform(data, None, false, "application/json")
1024            .await;
1025        assert!(response.is_ok());
1026    }
1027
1028    #[tokio::test]
1029    #[ignore]
1030    async fn test_extract_contacts() {
1031        let response = SPIDER_CLIENT
1032            .extract_contacts("https://example.com", None, false, "application/json")
1033            .await;
1034        assert!(response.is_ok());
1035    }
1036
1037    #[tokio::test]
1038    #[ignore]
1039    async fn test_label() {
1040        let response = SPIDER_CLIENT
1041            .label("https://example.com", None, false, "application/json")
1042            .await;
1043        assert!(response.is_ok());
1044    }
1045
1046    #[tokio::test]
1047    async fn test_create_signed_url() {
1048        let response = SPIDER_CLIENT
1049            .create_signed_url(Some("example.com"), None)
1050            .await;
1051        assert!(response.is_ok());
1052    }
1053
1054    #[tokio::test]
1055    async fn test_get_crawl_state() {
1056        let response = SPIDER_CLIENT
1057            .get_crawl_state("https://example.com", None, "application/json")
1058            .await;
1059        assert!(response.is_ok());
1060    }
1061
1062    #[tokio::test]
1063    async fn test_query() {
1064        let mut query = QueryRequest::default();
1065
1066        query.domain = Some("spider.cloud".into());
1067
1068        let response = SPIDER_CLIENT.query(&query).await;
1069        assert!(response.is_ok());
1070    }
1071
1072    #[tokio::test]
1073    async fn test_get_credits() {
1074        let response = SPIDER_CLIENT.get_credits().await;
1075        assert!(response.is_ok());
1076    }
1077}