spider_client/
lib.rs

1//! The `spider-client` module provides the primary interface and
2//! functionalities for the Spider web crawler library, which is
3//! designed for rapid and efficient crawling of web pages to gather
4//! links using isolated contexts.
5//!
6//! ### Features
7//!
8//! - **Multi-threaded Crawling:** Spider can utilize multiple
9//!   threads to parallelize the crawling process, drastically
10//!   improving performance and allowing the ability to gather
11//!   millions of pages in a short time.
12//!
13//! - **Configurable:** The library provides various options to
14//!   configure the crawling behavior, such as setting the depth
15//!   of crawling, user-agent strings, delays between requests,
16//!   and more.
17//!
18//! - **Link Gathering:** One of the primary objectives of Spider is to
19//!   gather and manage links from the web pages it crawls,
20//!   compiling them into a structured format for further use.
21//!
22//! ### Examples
23//!
24//! Basic usage of the Spider client might look like this:
25//!
26//! ```rust
27//! use spider_client::{Spider, RequestType, RequestParams};
28//! use tokio;
29//!
30//!  # #[ignore]
31//! #[tokio::main]
32//! async fn main() {
33//!     let spider = Spider::new(Some("myspiderapikey".into())).expect("API key must be provided");
34//!
35//!     let url = "https://spider.cloud";
36//!
37//!     // Scrape a single URL
38//!     let scraped_data = spider.scrape_url(url, None, "application/json").await.expect("Failed to scrape the URL");
39//!
40//!     println!("Scraped Data: {:?}", scraped_data);
41//!
42//!     // Crawl a website
43//!     let crawler_params = RequestParams {
44//!         limit: Some(1),
45//!         proxy_enabled: Some(true),
46//!         metadata: Some(false),
47//!         request: Some(RequestType::Http),
48//!         ..Default::default()
49//!     };
50//!
51//!     let crawl_result = spider.crawl_url(url, Some(crawler_params), false, "application/json", None::<fn(serde_json::Value)>).await.expect("Failed to crawl the URL");
52//!
53//!     println!("Crawl Result: {:?}", crawl_result);
54//! }
55//! ```
56//!
57//! ### Modules
58//!
59//! - `config`: Contains the configuration options for the Spider client.
60//! - `utils`: Utility functions used by the Spider client.
61//!
62
63pub mod shapes;
64
65use backon::ExponentialBuilder;
66use backon::Retryable;
67use reqwest::Client;
68use reqwest::{Error, Response};
69use serde::Serialize;
70use std::collections::HashMap;
71use tokio_stream::StreamExt;
72pub use shapes::{request::*, response::*};
73use std::sync::OnceLock;
74
75static API_URL: OnceLock<String> = OnceLock::new();
76
77/// The API endpoint.
78pub fn get_api_url() -> &'static str {
79    API_URL.get_or_init(|| {
80        std::env::var("SPIDER_API_URL").unwrap_or_else(|_| "https://api.spider.cloud".to_string())
81    })
82}
83
84/// Represents a Spider with API key and HTTP client.
85#[derive(Debug, Default)]
86pub struct Spider {
87    /// The Spider API key.
88    pub api_key: String,
89    /// The Spider Client to re-use.
90    pub client: Client,
91}
92
93/// Handle the json response.
94pub async fn handle_json(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
95    res.json().await
96}
97
98/// Handle the jsonl response.
99pub async fn handle_jsonl(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
100    let text = res.text().await?;
101    let lines = text
102        .lines()
103        .filter_map(|line| serde_json::from_str::<serde_json::Value>(line).ok())
104        .collect::<Vec<_>>();
105    Ok(serde_json::Value::Array(lines))
106}
107
108/// Handle the CSV response.
109#[cfg(feature = "csv")]
110pub async fn handle_csv(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
111    use std::collections::HashMap;
112    let text = res.text().await?;
113    let mut rdr = csv::Reader::from_reader(text.as_bytes());
114    let records: Vec<HashMap<String, String>> = rdr.deserialize().filter_map(Result::ok).collect();
115
116    if let Ok(record) = serde_json::to_value(records) {
117        Ok(record)
118    } else {
119        Ok(serde_json::Value::String(text))
120    }
121}
122
123#[cfg(not(feature = "csv"))]
124pub async fn handle_csv(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
125    handle_text(res).await
126}
127
128/// Basic handle response to text
129pub async fn handle_text(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
130    Ok(serde_json::Value::String(
131        res.text().await.unwrap_or_default(),
132    ))
133}
134
135/// Handle the XML response.
136#[cfg(feature = "csv")]
137pub async fn handle_xml(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
138    let text = res.text().await?;
139    match quick_xml::de::from_str::<serde_json::Value>(&text) {
140        Ok(val) => Ok(val),
141        Err(_) => Ok(serde_json::Value::String(text)),
142    }
143}
144
145#[cfg(not(feature = "csv"))]
146/// Handle the XML response.
147pub async fn handle_xml(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
148    handle_text(res).await
149}
150
151pub async fn parse_response(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
152    let content_type = res
153        .headers()
154        .get(reqwest::header::CONTENT_TYPE)
155        .and_then(|v| v.to_str().ok())
156        .unwrap_or_default()
157        .to_ascii_lowercase();
158
159    if content_type.contains("json") && !content_type.contains("jsonl") {
160        handle_json(res).await
161    } else if content_type.contains("jsonl") || content_type.contains("ndjson") {
162        handle_jsonl(res).await
163    } else if content_type.contains("csv") {
164        handle_csv(res).await
165    } else if content_type.contains("xml") {
166        handle_xml(res).await
167    } else {
168        handle_text(res).await
169    }
170}
171
172impl Spider {
173    /// Creates a new instance of Spider.
174    ///
175    /// # Arguments
176    ///
177    /// * `api_key` - An optional API key. Defaults to using the 'SPIDER_API_KEY' env variable.
178    ///
179    /// # Returns
180    ///
181    /// A new instance of Spider or an error string if no API key is provided.
182    pub fn new(api_key: Option<String>) -> Result<Self, &'static str> {
183        let api_key = api_key.or_else(|| std::env::var("SPIDER_API_KEY").ok());
184
185        match api_key {
186            Some(key) => Ok(Self {
187                api_key: key,
188                client: Client::new(),
189            }),
190            None => Err("No API key provided"),
191        }
192    }
193
194    /// Creates a new instance of Spider.
195    ///
196    /// # Arguments
197    ///
198    /// * `api_key` - An optional API key. Defaults to using the 'SPIDER_API_KEY' env variable.
199    /// * `client` - A custom client to pass in.
200    ///
201    /// # Returns
202    ///
203    /// A new instance of Spider or an error string if no API key is provided.
204    pub fn new_with_client(api_key: Option<String>, client: Client) -> Result<Self, &'static str> {
205        let api_key = api_key.or_else(|| std::env::var("SPIDER_API_KEY").ok());
206
207        match api_key {
208            Some(key) => Ok(Self {
209                api_key: key,
210                client,
211            }),
212            None => Err("No API key provided"),
213        }
214    }
215
216    /// Sends a POST request to the API.
217    ///
218    /// # Arguments
219    ///
220    /// * `endpoint` - The API endpoint.
221    /// * `data` - The request data as a HashMap.
222    /// * `stream` - Whether streaming is enabled.
223    /// * `content_type` - The content type of the request.
224    ///
225    /// # Returns
226    ///
227    /// The response from the API.
228    async fn api_post_base(
229        &self,
230        endpoint: &str,
231        data: impl Serialize + Sized + std::fmt::Debug,
232        content_type: &str,
233    ) -> Result<Response, Error> {
234        let url: String = format!("{}/{}", get_api_url(), endpoint);
235
236        self.client
237            .post(&url)
238            .header(
239                "User-Agent",
240                format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
241            )
242            .header("Content-Type", content_type)
243            .header("Authorization", format!("Bearer {}", self.api_key))
244            .json(&data)
245            .send()
246            .await
247    }
248
249    /// Sends a POST request to the API.
250    ///
251    /// # Arguments
252    ///
253    /// * `endpoint` - The API endpoint.
254    /// * `data` - The request data as a HashMap.
255    /// * `stream` - Whether streaming is enabled.
256    /// * `content_type` - The content type of the request.
257    ///
258    /// # Returns
259    ///
260    /// The response from the API.
261    pub async fn api_post(
262        &self,
263        endpoint: &str,
264        data: impl Serialize + std::fmt::Debug + Clone + Send + Sync,
265        content_type: &str,
266    ) -> Result<Response, Error> {
267        let fetch = || async {
268            self.api_post_base(endpoint, data.to_owned(), content_type)
269                .await
270        };
271
272        fetch
273            .retry(ExponentialBuilder::default().with_max_times(5))
274            .when(|err: &reqwest::Error| {
275                if let Some(status) = err.status() {
276                    status.is_server_error()
277                } else {
278                    err.is_timeout()
279                }
280            })
281            .await
282    }
283
284    /// Sends a GET request to the API.
285    ///
286    /// # Arguments
287    ///
288    /// * `endpoint` - The API endpoint.
289    ///
290    /// # Returns
291    ///
292    /// The response from the API as a JSON value.
293    async fn api_get_base<T: Serialize>(
294        &self,
295        endpoint: &str,
296        query_params: Option<&T>,
297    ) -> Result<serde_json::Value, reqwest::Error> {
298        let url = format!("{}/{}", get_api_url(), endpoint);
299        let res = self
300            .client
301            .get(&url)
302            .query(&query_params)
303            .header(
304                "User-Agent",
305                format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
306            )
307            .header("Content-Type", "application/json")
308            .header("Authorization", format!("Bearer {}", self.api_key))
309            .send()
310            .await?;
311        parse_response(res).await
312    }
313
314    /// Sends a GET request to the API.
315    ///
316    /// # Arguments
317    ///
318    /// * `endpoint` - The API endpoint.
319    ///
320    /// # Returns
321    ///
322    /// The response from the API as a JSON value.
323    pub async fn api_get<T: Serialize>(
324        &self,
325        endpoint: &str,
326        query_params: Option<&T>,
327    ) -> Result<serde_json::Value, reqwest::Error> {
328        let fetch = || async { self.api_get_base(endpoint, query_params.to_owned()).await };
329
330        fetch
331            .retry(ExponentialBuilder::default().with_max_times(5))
332            .when(|err: &reqwest::Error| {
333                if let Some(status) = err.status() {
334                    status.is_server_error()
335                } else {
336                    err.is_timeout()
337                }
338            })
339            .await
340    }
341
342    /// Sends a DELETE request to the API.
343    ///
344    /// # Arguments
345    ///
346    /// * `endpoint` - The API endpoint.
347    /// * `params` - Optional request parameters.
348    /// * `stream` - Whether streaming is enabled.
349    /// * `content_type` - The content type of the request.
350    ///
351    /// # Returns
352    ///
353    /// The response from the API.
354    async fn api_delete_base(
355        &self,
356        endpoint: &str,
357        params: Option<HashMap<String, serde_json::Value>>,
358    ) -> Result<Response, Error> {
359        let url = format!("{}/v1/{}", get_api_url(), endpoint);
360        let request_builder = self
361            .client
362            .delete(&url)
363            .header(
364                "User-Agent",
365                format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
366            )
367            .header("Content-Type", "application/json")
368            .header("Authorization", format!("Bearer {}", self.api_key));
369
370        let request_builder = if let Some(params) = params {
371            request_builder.json(&params)
372        } else {
373            request_builder
374        };
375
376        request_builder.send().await
377    }
378
379    /// Sends a DELETE request to the API.
380    ///
381    /// # Arguments
382    ///
383    /// * `endpoint` - The API endpoint.
384    /// * `params` - Optional request parameters.
385    /// * `stream` - Whether streaming is enabled.
386    /// * `content_type` - The content type of the request.
387    ///
388    /// # Returns
389    ///
390    /// The response from the API.
391    pub async fn api_delete(
392        &self,
393        endpoint: &str,
394        params: Option<HashMap<String, serde_json::Value>>,
395    ) -> Result<Response, Error> {
396        let fetch = || async { self.api_delete_base(endpoint, params.to_owned()).await };
397
398        fetch
399            .retry(ExponentialBuilder::default().with_max_times(5))
400            .when(|err: &reqwest::Error| {
401                if let Some(status) = err.status() {
402                    status.is_server_error()
403                } else {
404                    err.is_timeout()
405                }
406            })
407            .await
408    }
409
410    /// Scrapes a URL.
411    ///
412    /// # Arguments
413    ///
414    /// * `url` - The URL to scrape.
415    /// * `params` - Optional request parameters.
416    /// * `stream` - Whether streaming is enabled.
417    /// * `content_type` - The content type of the request.
418    ///
419    /// # Returns
420    ///
421    /// The response from the API as a JSON value.
422    pub async fn scrape_url(
423        &self,
424        url: &str,
425        params: Option<RequestParams>,
426        content_type: &str,
427    ) -> Result<serde_json::Value, reqwest::Error> {
428        let mut data = HashMap::new();
429
430        if let Ok(params) = serde_json::to_value(params) {
431            if let Some(ref p) = params.as_object() {
432                data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
433            }
434        }
435
436        if !url.is_empty() {
437            data.insert(
438                "url".to_string(),
439                serde_json::Value::String(url.to_string()),
440            );
441        }
442
443        data.insert("limit".to_string(), serde_json::Value::Number(1.into()));
444
445        let res = self.api_post("crawl", data, content_type).await?;
446        parse_response(res).await
447    }
448
449    /// Scrapes multi URLs.
450    ///
451    /// # Arguments
452    ///
453    /// * `url` - The URL to scrape.
454    /// * `params` - Optional request parameters.
455    /// * `stream` - Whether streaming is enabled.
456    /// * `content_type` - The content type of the request.
457    ///
458    /// # Returns
459    ///
460    /// The response from the API as a JSON value.
461    pub async fn multi_scrape_url(
462        &self,
463        params: Option<Vec<RequestParams>>,
464        content_type: &str,
465    ) -> Result<serde_json::Value, reqwest::Error> {
466        let mut data = HashMap::new();
467
468if let Ok(mut params) = serde_json::to_value(params) {
469    if let Some(obj) = params.as_object_mut() {
470        obj.insert("limit".to_string(), serde_json::Value::Number(1.into()));
471        data.extend(obj.iter().map(|(k, v)| (k.clone(), v.clone())));
472    }
473}
474        let res = self.api_post("crawl", data, content_type).await?;
475        parse_response(res).await
476    }
477
478
479    /// Crawls a URL.
480    ///
481    /// # Arguments
482    ///
483    /// * `url` - The URL to crawl.
484    /// * `params` - Optional request parameters.
485    /// * `stream` - Whether streaming is enabled.
486    /// * `content_type` - The content type of the request.
487    /// * `callback` - Optional callback function to handle each streamed chunk.
488    ///
489    /// # Returns
490    ///
491    /// The response from the API as a JSON value.
492    pub async fn crawl_url(
493        &self,
494        url: &str,
495        params: Option<RequestParams>,
496        stream: bool,
497        content_type: &str,
498        callback: Option<impl Fn(serde_json::Value) + Send>,
499    ) -> Result<serde_json::Value, reqwest::Error> {
500        use tokio_util::codec::{FramedRead, LinesCodec};
501
502        let mut data = HashMap::new();
503
504        if let Ok(params) = serde_json::to_value(params) {
505            if let Some(ref p) = params.as_object() {
506                data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
507            }
508        }
509
510        data.insert("url".into(), serde_json::Value::String(url.to_string()));
511
512        let res = self.api_post("crawl", data, content_type).await?;
513
514        if stream {
515            if let Some(callback) = callback {
516                let stream = res.bytes_stream();
517
518                let stream_reader = tokio_util::io::StreamReader::new(
519                    stream
520                        .map(|r| r.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))),
521                );
522
523                let mut lines = FramedRead::new(stream_reader, LinesCodec::new());
524
525                while let Some(line_result) = lines.next().await {
526                    match line_result {
527                        Ok(line) => match serde_json::from_str::<serde_json::Value>(&line) {
528                            Ok(value) => {
529                                callback(value);
530                            }
531                            Err(_e) => {
532                                continue;
533                            }
534                        },
535                        Err(_e) => return Ok(serde_json::Value::Null),
536                    }
537                }
538
539                Ok(serde_json::Value::Null)
540            } else {
541                Ok(serde_json::Value::Null)
542            }
543        } else {
544            parse_response(res).await
545        }
546    }
547
548    /// Crawls multiple URLs.
549    ///
550    /// # Arguments
551    ///
552    /// * `url` - The URL to crawl.
553    /// * `params` - Optional request parameters.
554    /// * `stream` - Whether streaming is enabled.
555    /// * `content_type` - The content type of the request.
556    /// * `callback` - Optional callback function to handle each streamed chunk.
557    ///
558    /// # Returns
559    ///
560    /// The response from the API as a JSON value.
561    pub async fn multi_crawl_url(
562        &self,
563        params: Option<Vec<RequestParams>>,
564        stream: bool,
565        content_type: &str,
566        callback: Option<impl Fn(serde_json::Value) + Send>,
567    ) -> Result<serde_json::Value, reqwest::Error> {
568        use tokio_util::codec::{FramedRead, LinesCodec};
569
570
571        let res = self.api_post("crawl", params, content_type).await?;
572
573        if stream {
574            if let Some(callback) = callback {
575                let stream = res.bytes_stream();
576
577                let stream_reader = tokio_util::io::StreamReader::new(
578                    stream
579                        .map(|r| r.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))),
580                );
581
582                let mut lines = FramedRead::new(stream_reader, LinesCodec::new());
583
584                while let Some(line_result) = lines.next().await {
585                    match line_result {
586                        Ok(line) => match serde_json::from_str::<serde_json::Value>(&line) {
587                            Ok(value) => {
588                                callback(value);
589                            }
590                            Err(_e) => {
591                                continue;
592                            }
593                        },
594                        Err(_e) => return Ok(serde_json::Value::Null),
595                    }
596                }
597
598                Ok(serde_json::Value::Null)
599            } else {
600                Ok(serde_json::Value::Null)
601            }
602        } else {
603            parse_response(res).await
604        }
605    }
606
607
608    /// Fetches links from a URL.
609    ///
610    /// # Arguments
611    ///
612    /// * `url` - The URL to fetch links from.
613    /// * `params` - Optional request parameters.
614    /// * `stream` - Whether streaming is enabled.
615    /// * `content_type` - The content type of the request.
616    ///
617    /// # Returns
618    ///
619    /// The response from the API as a JSON value.
620    pub async fn links(
621        &self,
622        url: &str,
623        params: Option<RequestParams>,
624        _stream: bool,
625        content_type: &str,
626    ) -> Result<serde_json::Value, reqwest::Error> {
627        let mut data = HashMap::new();
628
629        if let Ok(params) = serde_json::to_value(params) {
630            if let Some(ref p) = params.as_object() {
631                data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
632            }
633        }
634
635        data.insert("url".into(), serde_json::Value::String(url.to_string()));
636
637        let res = self.api_post("links", data, content_type).await?;
638        parse_response(res).await
639    }
640
641
642    /// Fetches links from a URLs.
643    ///
644    /// # Arguments
645    ///
646    /// * `url` - The URL to fetch links from.
647    /// * `params` - Optional request parameters.
648    /// * `stream` - Whether streaming is enabled.
649    /// * `content_type` - The content type of the request.
650    ///
651    /// # Returns
652    ///
653    /// The response from the API as a JSON value.
654    pub async fn multi_links(
655        &self,
656        params: Option<Vec<RequestParams>>,
657        _stream: bool,
658        content_type: &str,
659    ) -> Result<serde_json::Value, reqwest::Error> {
660        let res = self.api_post("links", params, content_type).await?;
661        parse_response(res).await
662    }
663
664    
665    /// Takes a screenshot of a URL.
666    ///
667    /// # Arguments
668    ///
669    /// * `url` - The URL to take a screenshot of.
670    /// * `params` - Optional request parameters.
671    /// * `stream` - Whether streaming is enabled.
672    /// * `content_type` - The content type of the request.
673    ///
674    /// # Returns
675    ///
676    /// The response from the API as a JSON value.
677    pub async fn screenshot(
678        &self,
679        url: &str,
680        params: Option<RequestParams>,
681        _stream: bool,
682        content_type: &str,
683    ) -> Result<serde_json::Value, reqwest::Error> {
684        let mut data = HashMap::new();
685
686        if let Ok(params) = serde_json::to_value(params) {
687            if let Some(ref p) = params.as_object() {
688                data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
689            }
690        }
691
692        data.insert("url".into(), serde_json::Value::String(url.to_string()));
693
694        let res = self.api_post("screenshot", data, content_type).await?;
695        parse_response(res).await
696    }
697
698    /// Takes a screenshot of multiple URLs.
699    ///
700    /// # Arguments
701    ///
702    /// * `url` - The URL to take a screenshot of.
703    /// * `params` - Optional request parameters.
704    /// * `stream` - Whether streaming is enabled.
705    /// * `content_type` - The content type of the request.
706    ///
707    /// # Returns
708    ///
709    /// The response from the API as a JSON value.
710    pub async fn multi_screenshot(
711        &self,
712        params: Option<Vec<RequestParams>>,
713        _stream: bool,
714        content_type: &str,
715    ) -> Result<serde_json::Value, reqwest::Error> {
716        let res = self.api_post("screenshot", params, content_type).await?;
717        parse_response(res).await
718    }
719
720    /// Searches for a query.
721    ///
722    /// # Arguments
723    ///
724    /// * `q` - The query to search for.
725    /// * `params` - Optional request parameters.
726    /// * `stream` - Whether streaming is enabled.
727    /// * `content_type` - The content type of the request.
728    ///
729    /// # Returns
730    ///
731    /// The response from the API as a JSON value.
732    pub async fn search(
733        &self,
734        q: &str,
735        params: Option<SearchRequestParams>,
736        _stream: bool,
737        content_type: &str,
738    ) -> Result<serde_json::Value, reqwest::Error> {
739        let body = match params {
740            Some(mut params) => {
741                params.search = q.to_string();
742                params
743            }
744            _ => {
745                let mut params = SearchRequestParams::default();
746                params.search = q.to_string();
747                params
748            }
749        };
750
751        let res = self.api_post("search", body, content_type).await?;
752
753        parse_response(res).await
754    }
755
756    /// Searches for multiple querys.
757    ///
758    /// # Arguments
759    ///
760    /// * `q` - The query to search for.
761    /// * `params` - Optional request parameters.
762    /// * `stream` - Whether streaming is enabled.
763    /// * `content_type` - The content type of the request.
764    ///
765    /// # Returns
766    ///
767    /// The response from the API as a JSON value.
768    pub async fn multi_search(
769        &self,
770        params: Option<Vec<SearchRequestParams>>,
771        content_type: &str,
772    ) -> Result<serde_json::Value, reqwest::Error> {
773        let res = self.api_post("search", params, content_type).await?;
774        parse_response(res).await
775    }
776
777    /// Transforms data.
778    ///
779    /// # Arguments
780    ///
781    /// * `data` - The data to transform.
782    /// * `params` - Optional request parameters.
783    /// * `stream` - Whether streaming is enabled.
784    /// * `content_type` - The content type of the request.
785    ///
786    /// # Returns
787    ///
788    /// The response from the API as a JSON value.
789    pub async fn transform(
790        &self,
791        data: Vec<HashMap<&str, &str>>,
792        params: Option<TransformParams>,
793        _stream: bool,
794        content_type: &str,
795    ) -> Result<serde_json::Value, reqwest::Error> {
796        let mut payload = HashMap::new();
797
798        if let Ok(params) = serde_json::to_value(params) {
799            if let Some(ref p) = params.as_object() {
800                payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
801            }
802        }
803
804        if let Ok(d) = serde_json::to_value(data) {
805            payload.insert("data".into(), d);
806        }
807
808        let res = self.api_post("transform", payload, content_type).await?;
809
810        parse_response(res).await
811    }
812
813    /// Download a record from storage.
814    ///
815    /// # Arguments
816    ///
817    /// * `url` - Optional exact url of the file in storage.
818    /// * `options` - Optional options.
819    /// * `stream` - Whether streaming is enabled.
820    ///
821    /// # Returns
822    ///
823    /// The response from the API.
824    pub async fn download(
825        &self,
826        url: Option<&str>,
827        options: Option<HashMap<&str, i32>>,
828    ) -> Result<reqwest::Response, reqwest::Error> {
829        let mut params = HashMap::new();
830
831        if let Some(url) = url {
832            params.insert("url".to_string(), url.to_string());
833        }
834
835        if let Some(options) = options {
836            for (key, value) in options {
837                params.insert(key.to_string(), value.to_string());
838            }
839        }
840
841        let url = format!("{}/v1/data/download", get_api_url());
842        let request = self
843            .client
844            .get(&url)
845            .header(
846                "User-Agent",
847                format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
848            )
849            .header("Content-Type", "application/octet-stream")
850            .header("Authorization", format!("Bearer {}", self.api_key))
851            .query(&params);
852
853        let res = request.send().await?;
854
855        Ok(res)
856    }
857
858    /// Creates a signed URL of a file from storage.
859    ///
860    /// # Arguments
861    ///
862    /// * `url` - Optional exact url of the file in storage.
863    /// * `options` - Optional options.
864    /// * `stream` - Whether streaming is enabled.
865    ///
866    /// # Returns
867    ///
868    /// The response from the API.
869    pub async fn create_signed_url(
870        &self,
871        url: Option<&str>,
872        options: Option<HashMap<&str, i32>>,
873    ) -> Result<serde_json::Value, reqwest::Error> {
874        let mut params = HashMap::new();
875
876        if let Some(options) = options {
877            for (key, value) in options {
878                params.insert(key.to_string(), value.to_string());
879            }
880        }
881
882        if let Some(url) = url {
883            params.insert("url".to_string(), url.to_string());
884        }
885
886        let url = format!("{}/v1/data/sign-url", get_api_url());
887        let request = self
888            .client
889            .get(&url)
890            .header(
891                "User-Agent",
892                format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
893            )
894            .header("Authorization", format!("Bearer {}", self.api_key))
895            .query(&params);
896
897        let res = request.send().await?;
898
899        parse_response(res).await
900    }
901
902    /// Gets the crawl state of a URL.
903    ///
904    /// # Arguments
905    ///
906    /// * `url` - The URL to get the crawl state of.
907    /// * `params` - Optional request parameters.
908    /// * `stream` - Whether streaming is enabled.
909    /// * `content_type` - The content type of the request.
910    ///
911    /// # Returns
912    ///
913    pub async fn get_crawl_state(
914        &self,
915        url: &str,
916        params: Option<RequestParams>,
917        content_type: &str,
918    ) -> Result<serde_json::Value, reqwest::Error> {
919        let mut payload = HashMap::new();
920        payload.insert("url".into(), serde_json::Value::String(url.to_string()));
921        payload.insert(
922            "contentType".into(),
923            serde_json::Value::String(content_type.to_string()),
924        );
925
926        if let Ok(params) = serde_json::to_value(params) {
927            if let Ok(params) = serde_json::to_value(params) {
928                if let Some(ref p) = params.as_object() {
929                    payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
930                }
931            }
932        }
933
934        let res = self
935            .api_post("data/crawl_state", payload, content_type)
936            .await?;
937        parse_response(res).await
938    }
939
940    /// Get the account credits left.
941    pub async fn get_credits(&self) -> Result<serde_json::Value, reqwest::Error> {
942        self.api_get::<serde_json::Value>("data/credits", None)
943            .await
944    }
945
946    /// Send a request for a data record.
947    pub async fn data_post(
948        &self,
949        table: &str,
950        data: Option<RequestParams>,
951    ) -> Result<serde_json::Value, reqwest::Error> {
952        let res = self
953            .api_post(&format!("data/{}", table), data, "application/json")
954            .await?;
955        parse_response(res).await
956    }
957
958    /// Query a record from the global DB.
959    pub async fn query(&self, params: &QueryRequest) -> Result<serde_json::Value, reqwest::Error> {
960        let res = self
961            .api_get::<QueryRequest>(&"data/query", Some(params))
962            .await?;
963
964        Ok(res)
965    }
966
967    /// Get a table record.
968    pub async fn data_get(
969        &self,
970        table: &str,
971        params: Option<RequestParams>,
972    ) -> Result<serde_json::Value, reqwest::Error> {
973        let mut payload = HashMap::new();
974
975        if let Some(params) = params {
976            if let Ok(p) = serde_json::to_value(params) {
977                if let Some(o) = p.as_object() {
978                    payload.extend(o.iter().map(|(k, v)| (k.as_str(), v.clone())));
979                }
980            }
981        }
982
983        let res = self
984            .api_get::<serde_json::Value>(&format!("data/{}", table), None)
985            .await?;
986        Ok(res)
987    }
988
989    /// Delete a record.
990    pub async fn data_delete(
991        &self,
992        table: &str,
993        params: Option<RequestParams>,
994    ) -> Result<serde_json::Value, reqwest::Error> {
995        let mut payload = HashMap::new();
996
997        if let Ok(params) = serde_json::to_value(params) {
998            if let Ok(params) = serde_json::to_value(params) {
999                if let Some(ref p) = params.as_object() {
1000                    payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
1001                }
1002            }
1003        }
1004
1005        let res = self
1006            .api_delete(&format!("data/{}", table), Some(payload))
1007            .await?;
1008        parse_response(res).await
1009    }
1010}
1011
1012#[cfg(test)]
1013mod tests {
1014    use super::*;
1015    use dotenv::dotenv;
1016    use lazy_static::lazy_static;
1017    use reqwest::ClientBuilder;
1018
1019    lazy_static! {
1020        static ref SPIDER_CLIENT: Spider = {
1021            dotenv().ok();
1022            let client = ClientBuilder::new();
1023            let client = client.user_agent("SpiderBot").build().unwrap();
1024
1025            Spider::new_with_client(None, client).expect("client to build")
1026        };
1027    }
1028
1029    #[tokio::test]
1030    #[ignore]
1031    async fn test_scrape_url() {
1032        let response = SPIDER_CLIENT
1033            .scrape_url("https://example.com", None, "application/json")
1034            .await;
1035        assert!(response.is_ok());
1036    }
1037
1038    #[tokio::test]
1039    async fn test_crawl_url() {
1040        let response = SPIDER_CLIENT
1041            .crawl_url(
1042                "https://example.com",
1043                None,
1044                false,
1045                "application/json",
1046                None::<fn(serde_json::Value)>,
1047            )
1048            .await;
1049        assert!(response.is_ok());
1050    }
1051
1052    #[tokio::test]
1053    #[ignore]
1054    async fn test_links() {
1055        let response: Result<serde_json::Value, Error> = SPIDER_CLIENT
1056            .links("https://example.com", None, false, "application/json")
1057            .await;
1058        assert!(response.is_ok());
1059    }
1060
1061    #[tokio::test]
1062    #[ignore]
1063    async fn test_screenshot() {
1064        let mut params = RequestParams::default();
1065        params.limit = Some(1);
1066
1067        let response = SPIDER_CLIENT
1068            .screenshot(
1069                "https://example.com",
1070                Some(params),
1071                false,
1072                "application/json",
1073            )
1074            .await;
1075        assert!(response.is_ok());
1076    }
1077
1078    // #[tokio::test(flavor = "multi_thread")]
1079    // async fn test_search() {
1080    //     let mut params = SearchRequestParams::default();
1081
1082    //     params.search_limit = Some(1);
1083    //     params.num = Some(1);
1084    //     params.fetch_page_content = Some(false);
1085
1086    //     let response = SPIDER_CLIENT
1087    //         .search("a sports website", Some(params), false, "application/json")
1088    //         .await;
1089
1090    //     assert!(response.is_ok());
1091    // }
1092
1093    #[tokio::test]
1094    #[ignore]
1095    async fn test_transform() {
1096        let data = vec![HashMap::from([(
1097            "<html><body><h1>Transformation</h1></body></html>".into(),
1098            "".into(),
1099        )])];
1100        let response = SPIDER_CLIENT
1101            .transform(data, None, false, "application/json")
1102            .await;
1103        assert!(response.is_ok());
1104    }
1105
1106    #[tokio::test]
1107    async fn test_create_signed_url() {
1108        let response = SPIDER_CLIENT
1109            .create_signed_url(Some("example.com"), None)
1110            .await;
1111        assert!(response.is_ok());
1112    }
1113
1114    #[tokio::test]
1115    async fn test_get_crawl_state() {
1116        let response = SPIDER_CLIENT
1117            .get_crawl_state("https://example.com", None, "application/json")
1118            .await;
1119        assert!(response.is_ok());
1120    }
1121
1122    #[tokio::test]
1123    async fn test_query() {
1124        let mut query = QueryRequest::default();
1125
1126        query.domain = Some("spider.cloud".into());
1127
1128        let response = SPIDER_CLIENT.query(&query).await;
1129        assert!(response.is_ok());
1130    }
1131
1132    #[tokio::test]
1133    async fn test_get_credits() {
1134        let response = SPIDER_CLIENT.get_credits().await;
1135        assert!(response.is_ok());
1136    }
1137}