spider_client/
lib.rs

1//! The `spider-client` module provides the primary interface and
2//! functionalities for the Spider web crawler library, which is
3//! designed for rapid and efficient crawling of web pages to gather
4//! links using isolated contexts.
5//!
6//! ### Features
7//!
8//! - **Multi-threaded Crawling:** Spider can utilize multiple
9//!   threads to parallelize the crawling process, drastically
10//!   improving performance and allowing the ability to gather
11//!   millions of pages in a short time.
12//!
13//! - **Configurable:** The library provides various options to
14//!   configure the crawling behavior, such as setting the depth
15//!   of crawling, user-agent strings, delays between requests,
16//!   and more.
17//!
18//! - **Link Gathering:** One of the primary objectives of Spider is to
19//!   gather and manage links from the web pages it crawls,
20//!   compiling them into a structured format for further use.
21//!
22//! ### Examples
23//!
24//! Basic usage of the Spider client might look like this:
25//!
26//! ```rust
27//! use spider_client::{Spider, RequestType, RequestParams};
28//! use tokio;
29//!
30//!  # #[ignore]
31//! #[tokio::main]
32//! async fn main() {
33//!     let spider = Spider::new(Some("myspiderapikey".into())).expect("API key must be provided");
34//!
35//!     let url = "https://spider.cloud";
36//!
37//!     // Scrape a single URL
38//!     let scraped_data = spider.scrape_url(url, None, "application/json").await.expect("Failed to scrape the URL");
39//!
40//!     println!("Scraped Data: {:?}", scraped_data);
41//!
42//!     // Crawl a website
43//!     let crawler_params = RequestParams {
44//!         limit: Some(1),
45//!         proxy_enabled: Some(true),
46//!         store_data: Some(false),
47//!         metadata: Some(false),
48//!         request: Some(RequestType::Http),
49//!         ..Default::default()
50//!     };
51//!
52//!     let crawl_result = spider.crawl_url(url, Some(crawler_params), false, "application/json", None::<fn(serde_json::Value)>).await.expect("Failed to crawl the URL");
53//!
54//!     println!("Crawl Result: {:?}", crawl_result);
55//! }
56//! ```
57//!
58//! ### Modules
59//!
60//! - `config`: Contains the configuration options for the Spider client.
61//! - `utils`: Utility functions used by the Spider client.
62//!
63
64use backon::ExponentialBuilder;
65use backon::Retryable;
66use reqwest::Client;
67use reqwest::{Error, Response};
68use serde::{Deserialize, Serialize};
69use serde_json::Value;
70use std::collections::HashMap;
71use tokio_stream::StreamExt;
72
73/// Structure representing the Chunking algorithm dictionary.
74#[derive(Debug, Deserialize, Serialize, Clone)]
75pub struct ChunkingAlgDict {
76    /// The chunking algorithm to use, defined as a specific type.
77    r#type: ChunkingType,
78    /// The amount to chunk by.
79    value: i32,
80}
81
82// The nested structures
83#[derive(Serialize, Deserialize, Debug, Clone)]
84pub struct Timeout {
85    /// The seconds up to 60.
86    pub secs: u64,
87    /// The nanoseconds.
88    pub nanos: u32,
89}
90
91#[derive(Serialize, Deserialize, Debug, Clone)]
92pub struct IdleNetwork {
93    /// The timeout to wait until.
94    pub timeout: Timeout,
95}
96
97#[derive(Serialize, Deserialize, Debug, Clone)]
98#[serde(tag = "type", rename_all = "PascalCase")]
99pub enum WebAutomation {
100    Evaluate { code: String },
101    Click { selector: String },
102    Wait { duration: u64 },
103    WaitForNavigation,
104    WaitFor { selector: String },
105    WaitForAndClick { selector: String },
106    ScrollX { pixels: i32 },
107    ScrollY { pixels: i32 },
108    Fill { selector: String, value: String },
109    InfiniteScroll { times: u32 },
110}
111
112#[derive(Default, Serialize, Deserialize, Debug, Clone)]
113#[serde(tag = "type", rename_all = "PascalCase")]
114pub enum RedirectPolicy {
115    Loose,
116    #[default]
117    Strict,
118}
119
120pub type WebAutomationMap = std::collections::HashMap<String, Vec<WebAutomation>>;
121pub type ExecutionScriptsMap = std::collections::HashMap<String, String>;
122
123#[derive(Serialize, Deserialize, Debug, Clone)]
124pub struct Selector {
125    /// The timeout to wait until.
126    pub timeout: Timeout,
127    /// The selector to wait for.
128    pub selector: String,
129}
130
131#[derive(Serialize, Deserialize, Debug, Clone)]
132pub struct Delay {
133    /// The timeout to wait until.
134    pub timeout: Timeout,
135}
136
137#[derive(Serialize, Deserialize, Debug, Clone)]
138pub struct WaitFor {
139    /// Wait until idle networks with a timeout of idleness.
140    pub idle_network: Option<IdleNetwork>,
141    /// Wait until a selector exist. Can determine if a selector exist after executing all js and network events.
142    pub selector: Option<Selector>,
143    /// Wait until a hard delay.
144    pub delay: Option<Delay>,
145    /// Wait until page navigation happen. Default is true.
146    pub page_navigations: Option<bool>,
147}
148
149/// Query request to get a document.
150#[derive(Serialize, Deserialize, Debug, Clone, Default)]
151pub struct QueryRequest {
152    /// The exact website url.
153    pub url: Option<String>,
154    /// The website domain.
155    pub domain: Option<String>,
156    /// The path of the resource.
157    pub pathname: Option<String>,
158}
159
160/// Enum representing different types of Chunking.
161#[derive(Default, Debug, Deserialize, Serialize, Clone)]
162#[serde(rename_all = "lowercase")]
163pub enum ChunkingType {
164    #[default]
165    /// By the word count.
166    ByWords,
167    /// By the line count.
168    ByLines,
169    /// By the char length.
170    ByCharacterLength,
171    /// By sentence.
172    BySentence,
173}
174
175#[derive(Default, Debug, Deserialize, Serialize, Clone)]
176/// View port handling for chrome.
177pub struct Viewport {
178    /// Device screen Width
179    pub width: u32,
180    /// Device screen size
181    pub height: u32,
182    /// Device scale factor
183    pub device_scale_factor: Option<f64>,
184    /// Emulating Mobile?
185    pub emulating_mobile: bool,
186    /// Use landscape mode instead of portrait.
187    pub is_landscape: bool,
188    /// Touch screen device?
189    pub has_touch: bool,
190}
191
192/// The API url.
193const API_URL: &'static str = "https://api.spider.cloud";
194
195// Define the CSSSelector struct
196#[derive(Debug, Clone, Default, Deserialize, Serialize)]
197pub struct CSSSelector {
198    /// The name of the selector group
199    pub name: String,
200    /// A vector of CSS selectors
201    pub selectors: Vec<String>,
202}
203
204// Define the CSSExtractionMap type
205pub type CSSExtractionMap = HashMap<String, Vec<CSSSelector>>;
206
207/// Represents the settings for a webhook configuration
208#[derive(Debug, Default, Deserialize, Serialize, Clone)]
209pub struct WebhookSettings {
210    /// The destination where the webhook information will be sent
211    destination: String,
212    /// Trigger an action when all credits are depleted
213    on_credits_depleted: bool,
214    /// Trigger an action when half of the credits are depleted
215    on_credits_half_depleted: bool,
216    /// Trigger an action on a website status update event
217    on_website_status: bool,
218    /// Send information about a new page find (such as links and bytes)
219    on_find: bool,
220    /// Handle the metadata of a found page
221    on_find_metadata: bool,
222}
223
224/// Proxy pool selection for outbound request routing.
225/// Choose a pool based on your use case (e.g., stealth, speed, or stability).
226///
227/// - 'residential'         → cost-effective entry-level residential pool
228/// - 'residential_fast'    → faster residential pool for higher throughput
229/// - 'residential_static'  → static residential IPs, rotated daily
230/// - 'residential_premium' → low-latency premium IPs
231/// - 'residential_core'    → balanced plan (quality vs. cost)
232/// - 'residential_plus'    → largest and highest quality core pool
233/// - 'mobile'              → 4G/5G mobile proxies for maximum evasion
234/// - 'isp'                 → ISP-grade datacenters
235#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
236pub enum ProxyType {
237    /// Cost-effective entry-level residential pool.
238    #[serde(rename = "residential")]
239    Residential,
240    /// Higher-throughput residential pool for better performance.
241    #[serde(rename = "residential_fast")]
242    ResidentialFast,
243    /// Static residential IPs, rotated daily for session persistence.
244    #[serde(rename = "residential_static")]
245    ResidentialStatic,
246    /// 4G / 5G mobile proxies for maximum stealth and evasion.
247    #[serde(rename = "mobile")]
248    Mobile,
249    /// ISP-grade residential routing (alias: `datacenter`).
250    #[serde(rename = "isp", alias = "datacenter")]
251    #[default]
252    Isp,
253    /// Premium low-latency residential proxy pool.
254    #[serde(rename = "residential_premium")]
255    ResidentialPremium,
256    /// Core residential plan optimized for balance between cost and quality.
257    #[serde(rename = "residential_core")]
258    ResidentialCore,
259    /// Extended core residential pool with the largest, highest-quality IPs.
260    #[serde(rename = "residential_plus")]
261    ResidentialPlus,
262}
263
264/// Send multiple return formats.
265#[derive(Debug, Deserialize, Serialize, Clone)]
266#[serde(untagged)]
267pub enum ReturnFormatHandling {
268    /// A single return item.
269    Single(ReturnFormat),
270    /// Multiple return formats.
271    Multi(std::collections::HashSet<ReturnFormat>),
272}
273
274impl Default for ReturnFormatHandling {
275    fn default() -> ReturnFormatHandling {
276        ReturnFormatHandling::Single(ReturnFormat::Raw)
277    }
278}
279
280#[derive(Debug, Default, Deserialize, Serialize, Clone)]
281pub struct EventTracker {
282    /// The responses received.
283    responses: Option<bool>,
284    ///The request sent.
285    requests: Option<bool>,
286}
287
288/// Structure representing request parameters.
289#[derive(Debug, Default, Deserialize, Serialize, Clone)]
290pub struct RequestParams {
291    #[serde(default)]
292    /// The URL to be crawled.
293    pub url: Option<String>,
294    #[serde(default)]
295    /// The type of request to be made.
296    pub request: Option<RequestType>,
297    #[serde(default)]
298    /// The maximum number of pages the crawler should visit.
299    pub limit: Option<u32>,
300    #[serde(default)]
301    /// The format in which the result should be returned.
302    pub return_format: Option<ReturnFormatHandling>,
303    #[serde(default)]
304    /// Specifies whether to only visit the top-level domain.
305    pub tld: Option<bool>,
306    #[serde(default)]
307    /// The depth of the crawl.
308    pub depth: Option<u32>,
309    #[serde(default)]
310    /// Specifies whether the request should be cached.
311    pub cache: Option<bool>,
312    #[serde(default)]
313    /// Perform an infinite scroll on the page as new content arises. The request param also needs to be set to 'chrome' or 'smart'.
314    pub scroll: Option<u32>,
315    #[serde(default)]
316    /// The budget for various resources.
317    pub budget: Option<HashMap<String, u32>>,
318    #[serde(default)]
319    /// The blacklist routes to ignore. This can be a Regex string pattern.
320    pub blacklist: Option<Vec<String>>,
321    #[serde(default)]
322    /// The whitelist routes to only crawl. This can be a Regex string pattern and used with black_listing.
323    pub whitelist: Option<Vec<String>>,
324    #[serde(default)]
325    /// The locale to be used during the crawl.
326    pub locale: Option<String>,
327    #[serde(default)]
328    /// The cookies to be set for the request, formatted as a single string.
329    pub cookies: Option<String>,
330    #[serde(default)]
331    /// Specifies whether to use stealth techniques to avoid detection.
332    pub stealth: Option<bool>,
333    #[serde(default)]
334    /// The headers to be used for the request.
335    pub headers: Option<HashMap<String, String>>,
336    #[serde(default)]
337    /// Specifies whether anti-bot measures should be used.
338    pub anti_bot: Option<bool>,
339    #[serde(default)]
340    /// Specifies whether to send data via webhooks.
341    pub webhooks: Option<WebhookSettings>,
342    #[serde(default)]
343    /// Specifies whether to include metadata in the response.
344    pub metadata: Option<bool>,
345    #[serde(default)]
346    /// The dimensions of the viewport.
347    pub viewport: Option<Viewport>,
348    #[serde(default)]
349    /// The encoding to be used for the request.
350    pub encoding: Option<String>,
351    #[serde(default)]
352    /// Specifies whether to include subdomains in the crawl.
353    pub subdomains: Option<bool>,
354    #[serde(default)]
355    /// The user agent string to be used for the request.
356    pub user_agent: Option<String>,
357    #[serde(default)]
358    /// Specifies whether the response data should be stored.
359    pub store_data: Option<bool>,
360    #[serde(default)]
361    /// Configuration settings for GPT (general purpose texture mappings).
362    pub gpt_config: Option<HashMap<String, String>>,
363    #[serde(default)]
364    /// Specifies whether to use fingerprinting protection.
365    pub fingerprint: Option<bool>,
366    #[serde(default)]
367    /// Specifies whether to perform the request without using storage.
368    pub storageless: Option<bool>,
369    #[serde(default)]
370    /// Specifies whether readability optimizations should be applied.
371    pub readability: Option<bool>,
372    #[serde(default)]
373    /// Specifies whether to use a proxy for the request. [Deprecated]: use the 'proxy' param instead.
374    pub proxy_enabled: Option<bool>,
375    #[serde(default)]
376    /// Specifies whether to respect the site's robots.txt file.
377    pub respect_robots: Option<bool>,
378    #[serde(default)]
379    /// CSS selector to be used to filter the content.
380    pub root_selector: Option<String>,
381    #[serde(default)]
382    /// Specifies whether to load all resources of the crawl target.
383    pub full_resources: Option<bool>,
384    #[serde(default)]
385    /// The text string to extract data from.
386    pub text: Option<String>,
387    #[serde(default)]
388    /// Specifies whether to use the sitemap links.
389    pub sitemap: Option<bool>,
390    #[serde(default)]
391    /// External domains to include the crawl.
392    pub external_domains: Option<Vec<String>>,
393    #[serde(default)]
394    /// Returns the OpenAI embeddings for the title and description. Other values, such as keywords, may also be included. Requires the `metadata` parameter to be set to `true`.
395    pub return_embeddings: Option<bool>,
396    #[serde(default)]
397    /// Returns the HTTP response headers.
398    pub return_headers: Option<bool>,
399    #[serde(default)]
400    /// Returns the link(s) found on the page that match the crawler query.
401    pub return_page_links: Option<bool>,
402    #[serde(default)]
403    /// Returns the HTTP response cookies.
404    pub return_cookies: Option<bool>,
405    #[serde(default)]
406    /// The timeout for the request, in milliseconds.
407    pub request_timeout: Option<u8>,
408    #[serde(default)]
409    /// Specifies whether to run the request in the background.
410    pub run_in_background: Option<bool>,
411    #[serde(default)]
412    /// Specifies whether to skip configuration checks.
413    pub skip_config_checks: Option<bool>,
414    #[serde(default)]
415    /// Use CSS query selectors to scrape contents from the web page. Set the paths and the CSS extraction object map to perform extractions per path or page.
416    pub css_extraction_map: Option<CSSExtractionMap>,
417    #[serde(default)]
418    /// The chunking algorithm to use.
419    pub chunking_alg: Option<ChunkingAlgDict>,
420    #[serde(default)]
421    /// Disable request interception when running 'request' as 'chrome' or 'smart'. This can help when the page uses 3rd party or external scripts to load content.
422    pub disable_intercept: Option<bool>,
423    #[serde(default)]
424    /// The wait for events on the page. You need to make your `request` `chrome` or `smart`.
425    pub wait_for: Option<WaitFor>,
426    #[serde(default)]
427    /// Perform custom Javascript tasks on a url or url path. You need to make your `request` `chrome` or `smart`
428    pub execution_scripts: Option<ExecutionScriptsMap>,
429    #[serde(default)]
430    /// Perform web automated tasks on a url or url path. You need to make your `request` `chrome` or `smart`
431    pub automation_scripts: Option<WebAutomationMap>,
432    #[serde(default)]
433    /// The redirect policy for HTTP request. Set the value to Loose to allow all.
434    pub redirect_policy: Option<RedirectPolicy>,
435    #[serde(default)]
436    /// Track the request sent and responses received for `chrome` or `smart`. The responses will track the bytes used and the requests will have the monotime sent.
437    pub event_tracker: Option<EventTracker>,
438    #[serde(default)]
439    /// The timeout to stop the crawl.
440    pub crawl_timeout: Option<Timeout>,
441    #[serde(default)]
442    /// Evaluates given script in every frame upon creation (before loading frame's scripts).
443    pub evaluate_on_new_document: Option<Box<String>>,
444    #[serde(default)]
445    /// Runs the request using lite_mode:Lite mode reduces data transfer costs by 70%, with trade-offs in speed, accuracy,
446    /// geo-targeting, and reliability. It’s best suited for non-urgent data collection or when
447    /// targeting websites with minimal anti-bot protections.
448    pub lite_mode: Option<bool>,
449    #[serde(default)]
450    /// The proxy to use for request.
451    pub proxy: Option<ProxyType>,
452    #[serde(default)]
453    /// Use a remote proxy at ~70% reduced cost for file downloads.
454    /// This requires a user-supplied static IP proxy endpoint.
455    pub remote_proxy: Option<String>,
456    #[serde(default)]
457    /// Set the maximum number of credits to use per page.
458    /// Credits are measured in decimal units, where 10,000 credits equal one dollar (100 credits per penny).
459    /// Credit limiting only applies to request that are Javascript rendered using smart_mode or chrome for the 'request' type.
460    pub max_credits_per_page: Option<f64>,
461}
462
463/// The structure representing request parameters for a search request.
464#[derive(Debug, Default, Deserialize, Serialize, Clone)]
465pub struct SearchRequestParams {
466    /// The base request parameters.
467    #[serde(default, flatten)]
468    pub base: RequestParams,
469    // The search request.
470    pub search: String,
471    /// The search limit.
472    pub search_limit: Option<u32>,
473    // Fetch the page content. Defaults to true.
474    pub fetch_page_content: Option<bool>,
475    /// The search location of the request
476    pub location: Option<String>,
477    /// The country code of the request
478    pub country: Option<String>,
479    /// The language code of the request.
480    pub language: Option<String>,
481    /// The number of search results
482    pub num: Option<u32>,
483    /// The page of the search results.
484    pub page: Option<u32>,
485    #[serde(default)]
486    /// The websites limit if a list is sent from text or urls comma split. This helps automatic configuration of the system.
487    pub website_limit: Option<u32>,
488}
489
490/// Structure representing request parameters for transforming files.
491#[derive(Debug, Default, Deserialize, Serialize, Clone)]
492pub struct TransformParams {
493    #[serde(default)]
494    /// The format in which the result should be returned.
495    pub return_format: Option<ReturnFormat>,
496    #[serde(default)]
497    /// Specifies whether readability optimizations should be applied.
498    pub readability: Option<bool>,
499    #[serde(default)]
500    /// Clean the markdown or text for AI.
501    pub clean: Option<bool>,
502    #[serde(default)]
503    /// Clean the markdown or text for AI removing footers, navigation, and more.
504    pub clean_full: Option<bool>,
505    /// The data being transformed.
506    pub data: Vec<DataParam>,
507}
508
509#[derive(Serialize, Deserialize, Debug, Clone)]
510pub struct DataParam {
511    /// The HTML resource.
512    pub html: String,
513    /// The website url.
514    pub url: Option<String>,
515}
516
517/// the request type to perform
518#[derive(Debug, Default, Clone, Serialize, Deserialize, PartialEq, Eq)]
519#[serde(rename_all = "lowercase")]
520pub enum RequestType {
521    /// Default HTTP request
522    Http,
523    /// Chrome browser rendering
524    Chrome,
525    #[default]
526    /// Smart mode defaulting to HTTP and using Chrome when needed.
527    SmartMode,
528}
529
530/// Enum representing different return formats.
531#[derive(Default, Debug, Deserialize, Serialize, Clone, PartialEq, Eq, Hash)]
532#[serde(rename_all = "lowercase")]
533pub enum ReturnFormat {
534    #[default]
535    /// The default return format of the resource.
536    Raw,
537    /// Return the response as Markdown.
538    Markdown,
539    /// Return the response as Commonmark.
540    Commonmark,
541    /// Return the response as Html2text.
542    Html2text,
543    /// Return the response as Text.
544    Text,
545    /// Return the response as XML.
546    Xml,
547    /// Return the response as Bytes.
548    Bytes,
549}
550
551/// Represents a Spider with API key and HTTP client.
552#[derive(Debug, Default)]
553pub struct Spider {
554    /// The Spider API key.
555    pub api_key: String,
556    /// The Spider Client to re-use.
557    pub client: Client,
558}
559
560/// Handle the json response.
561pub async fn handle_json(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
562    res.json().await
563}
564
565/// Handle the jsonl response.
566pub async fn handle_jsonl(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
567    let text = res.text().await?;
568    let lines = text
569        .lines()
570        .filter_map(|line| serde_json::from_str::<serde_json::Value>(line).ok())
571        .collect::<Vec<_>>();
572    Ok(serde_json::Value::Array(lines))
573}
574
575/// Handle the CSV response.
576pub async fn handle_csv(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
577    use std::collections::HashMap;
578    let text = res.text().await?;
579    let mut rdr = csv::Reader::from_reader(text.as_bytes());
580    let records: Vec<HashMap<String, String>> = rdr.deserialize().filter_map(Result::ok).collect();
581
582    if let Ok(record) = serde_json::to_value(records) {
583        Ok(record)
584    } else {
585        Ok(serde_json::Value::String(text))
586    }
587}
588
589/// Handle the XML response.
590pub async fn handle_xml(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
591    let text = res.text().await?;
592    match quick_xml::de::from_str::<serde_json::Value>(&text) {
593        Ok(val) => Ok(val),
594        Err(_) => Ok(serde_json::Value::String(text)),
595    }
596}
597
598pub async fn parse_response(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
599    let content_type = res
600        .headers()
601        .get(reqwest::header::CONTENT_TYPE)
602        .and_then(|v| v.to_str().ok())
603        .unwrap_or_default()
604        .to_ascii_lowercase();
605
606    if content_type.contains("json") && !content_type.contains("jsonl") {
607        handle_json(res).await
608    } else if content_type.contains("jsonl") || content_type.contains("ndjson") {
609        handle_jsonl(res).await
610    } else if content_type.contains("csv") {
611        handle_csv(res).await
612    } else if content_type.contains("xml") {
613        handle_xml(res).await
614    } else {
615        Ok(serde_json::Value::String(
616            res.text().await.unwrap_or_default(),
617        ))
618    }
619}
620
621impl Spider {
622    /// Creates a new instance of Spider.
623    ///
624    /// # Arguments
625    ///
626    /// * `api_key` - An optional API key. Defaults to using the 'SPIDER_API_KEY' env variable.
627    ///
628    /// # Returns
629    ///
630    /// A new instance of Spider or an error string if no API key is provided.
631    pub fn new(api_key: Option<String>) -> Result<Self, &'static str> {
632        let api_key = api_key.or_else(|| std::env::var("SPIDER_API_KEY").ok());
633
634        match api_key {
635            Some(key) => Ok(Self {
636                api_key: key,
637                client: Client::new(),
638            }),
639            None => Err("No API key provided"),
640        }
641    }
642
643    /// Creates a new instance of Spider.
644    ///
645    /// # Arguments
646    ///
647    /// * `api_key` - An optional API key. Defaults to using the 'SPIDER_API_KEY' env variable.
648    /// * `client` - A custom client to pass in.
649    ///
650    /// # Returns
651    ///
652    /// A new instance of Spider or an error string if no API key is provided.
653    pub fn new_with_client(api_key: Option<String>, client: Client) -> Result<Self, &'static str> {
654        let api_key = api_key.or_else(|| std::env::var("SPIDER_API_KEY").ok());
655
656        match api_key {
657            Some(key) => Ok(Self {
658                api_key: key,
659                client,
660            }),
661            None => Err("No API key provided"),
662        }
663    }
664
665    /// Sends a POST request to the API.
666    ///
667    /// # Arguments
668    ///
669    /// * `endpoint` - The API endpoint.
670    /// * `data` - The request data as a HashMap.
671    /// * `stream` - Whether streaming is enabled.
672    /// * `content_type` - The content type of the request.
673    ///
674    /// # Returns
675    ///
676    /// The response from the API.
677    async fn api_post_base(
678        &self,
679        endpoint: &str,
680        data: impl Serialize + Sized + std::fmt::Debug,
681        content_type: &str,
682    ) -> Result<Response, Error> {
683        let url: String = format!("{API_URL}/{}", endpoint);
684
685        self.client
686            .post(&url)
687            .header(
688                "User-Agent",
689                format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
690            )
691            .header("Content-Type", content_type)
692            .header("Authorization", format!("Bearer {}", self.api_key))
693            .json(&data)
694            .send()
695            .await
696    }
697
698    /// Sends a POST request to the API.
699    ///
700    /// # Arguments
701    ///
702    /// * `endpoint` - The API endpoint.
703    /// * `data` - The request data as a HashMap.
704    /// * `stream` - Whether streaming is enabled.
705    /// * `content_type` - The content type of the request.
706    ///
707    /// # Returns
708    ///
709    /// The response from the API.
710    async fn api_post(
711        &self,
712        endpoint: &str,
713        data: impl Serialize + std::fmt::Debug + Clone + Send + Sync,
714        content_type: &str,
715    ) -> Result<Response, Error> {
716        let fetch = || async {
717            self.api_post_base(endpoint, data.to_owned(), content_type)
718                .await
719        };
720
721        fetch
722            .retry(ExponentialBuilder::default().with_max_times(5))
723            .when(|err: &reqwest::Error| {
724                if let Some(status) = err.status() {
725                    status.is_server_error()
726                } else {
727                    err.is_timeout()
728                }
729            })
730            .await
731    }
732
733    /// Sends a GET request to the API.
734    ///
735    /// # Arguments
736    ///
737    /// * `endpoint` - The API endpoint.
738    ///
739    /// # Returns
740    ///
741    /// The response from the API as a JSON value.
742    async fn api_get_base<T: Serialize>(
743        &self,
744        endpoint: &str,
745        query_params: Option<&T>,
746    ) -> Result<serde_json::Value, reqwest::Error> {
747        let url = format!("{API_URL}/{}", endpoint);
748        let res = self
749            .client
750            .get(&url)
751            .query(&query_params)
752            .header(
753                "User-Agent",
754                format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
755            )
756            .header("Content-Type", "application/json")
757            .header("Authorization", format!("Bearer {}", self.api_key))
758            .send()
759            .await?;
760        parse_response(res).await
761    }
762
763    /// Sends a GET request to the API.
764    ///
765    /// # Arguments
766    ///
767    /// * `endpoint` - The API endpoint.
768    ///
769    /// # Returns
770    ///
771    /// The response from the API as a JSON value.
772    async fn api_get<T: Serialize>(
773        &self,
774        endpoint: &str,
775        query_params: Option<&T>,
776    ) -> Result<serde_json::Value, reqwest::Error> {
777        let fetch = || async { self.api_get_base(endpoint, query_params.to_owned()).await };
778
779        fetch
780            .retry(ExponentialBuilder::default().with_max_times(5))
781            .when(|err: &reqwest::Error| {
782                if let Some(status) = err.status() {
783                    status.is_server_error()
784                } else {
785                    err.is_timeout()
786                }
787            })
788            .await
789    }
790
791    /// Sends a DELETE request to the API.
792    ///
793    /// # Arguments
794    ///
795    /// * `endpoint` - The API endpoint.
796    /// * `params` - Optional request parameters.
797    /// * `stream` - Whether streaming is enabled.
798    /// * `content_type` - The content type of the request.
799    ///
800    /// # Returns
801    ///
802    /// The response from the API.
803    async fn api_delete_base(
804        &self,
805        endpoint: &str,
806        params: Option<HashMap<String, serde_json::Value>>,
807    ) -> Result<Response, Error> {
808        let url = format!("{API_URL}/v1/{}", endpoint);
809        let request_builder = self
810            .client
811            .delete(&url)
812            .header(
813                "User-Agent",
814                format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
815            )
816            .header("Content-Type", "application/json")
817            .header("Authorization", format!("Bearer {}", self.api_key));
818
819        let request_builder = if let Some(params) = params {
820            request_builder.json(&params)
821        } else {
822            request_builder
823        };
824
825        request_builder.send().await
826    }
827
828    /// Sends a DELETE request to the API.
829    ///
830    /// # Arguments
831    ///
832    /// * `endpoint` - The API endpoint.
833    /// * `params` - Optional request parameters.
834    /// * `stream` - Whether streaming is enabled.
835    /// * `content_type` - The content type of the request.
836    ///
837    /// # Returns
838    ///
839    /// The response from the API.
840    async fn api_delete(
841        &self,
842        endpoint: &str,
843        params: Option<HashMap<String, serde_json::Value>>,
844    ) -> Result<Response, Error> {
845        let fetch = || async { self.api_delete_base(endpoint, params.to_owned()).await };
846
847        fetch
848            .retry(ExponentialBuilder::default().with_max_times(5))
849            .when(|err: &reqwest::Error| {
850                if let Some(status) = err.status() {
851                    status.is_server_error()
852                } else {
853                    err.is_timeout()
854                }
855            })
856            .await
857    }
858
859    /// Scrapes a URL.
860    ///
861    /// # Arguments
862    ///
863    /// * `url` - The URL to scrape.
864    /// * `params` - Optional request parameters.
865    /// * `stream` - Whether streaming is enabled.
866    /// * `content_type` - The content type of the request.
867    ///
868    /// # Returns
869    ///
870    /// The response from the API as a JSON value.
871    pub async fn scrape_url(
872        &self,
873        url: &str,
874        params: Option<RequestParams>,
875        content_type: &str,
876    ) -> Result<serde_json::Value, reqwest::Error> {
877        let mut data = HashMap::new();
878
879        data.insert(
880            "url".to_string(),
881            serde_json::Value::String(url.to_string()),
882        );
883        data.insert("limit".to_string(), serde_json::Value::Number(1.into()));
884
885        if let Ok(params) = serde_json::to_value(params) {
886            if let Some(ref p) = params.as_object() {
887                data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
888            }
889        }
890
891        let res = self.api_post("crawl", data, content_type).await?;
892        parse_response(res).await
893    }
894
895    /// Crawls a URL.
896    ///
897    /// # Arguments
898    ///
899    /// * `url` - The URL to crawl.
900    /// * `params` - Optional request parameters.
901    /// * `stream` - Whether streaming is enabled.
902    /// * `content_type` - The content type of the request.
903    /// * `callback` - Optional callback function to handle each streamed chunk.
904    ///
905    /// # Returns
906    ///
907    /// The response from the API as a JSON value.
908    pub async fn crawl_url(
909        &self,
910        url: &str,
911        params: Option<RequestParams>,
912        stream: bool,
913        content_type: &str,
914        callback: Option<impl Fn(serde_json::Value) + Send>,
915    ) -> Result<serde_json::Value, reqwest::Error> {
916        use tokio_util::codec::{FramedRead, LinesCodec};
917
918        let mut data = HashMap::new();
919
920        if let Ok(params) = serde_json::to_value(params) {
921            if let Some(ref p) = params.as_object() {
922                data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
923            }
924        }
925
926        data.insert("url".into(), serde_json::Value::String(url.to_string()));
927
928        let res = self.api_post("crawl", data, content_type).await?;
929
930        if stream {
931            if let Some(callback) = callback {
932                let stream = res.bytes_stream();
933
934                let stream_reader = tokio_util::io::StreamReader::new(
935                    stream
936                        .map(|r| r.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))),
937                );
938
939                let mut lines = FramedRead::new(stream_reader, LinesCodec::new());
940
941                while let Some(line_result) = lines.next().await {
942                    match line_result {
943                        Ok(line) => match serde_json::from_str::<serde_json::Value>(&line) {
944                            Ok(value) => {
945                                callback(value);
946                            }
947                            Err(_e) => {
948                                continue;
949                            }
950                        },
951                        Err(_e) => return Ok(serde_json::Value::Null),
952                    }
953                }
954
955                Ok(serde_json::Value::Null)
956            } else {
957                Ok(serde_json::Value::Null)
958            }
959        } else {
960            parse_response(res).await
961        }
962    }
963
964    /// Fetches links from a URL.
965    ///
966    /// # Arguments
967    ///
968    /// * `url` - The URL to fetch links from.
969    /// * `params` - Optional request parameters.
970    /// * `stream` - Whether streaming is enabled.
971    /// * `content_type` - The content type of the request.
972    ///
973    /// # Returns
974    ///
975    /// The response from the API as a JSON value.
976    pub async fn links(
977        &self,
978        url: &str,
979        params: Option<RequestParams>,
980        _stream: bool,
981        content_type: &str,
982    ) -> Result<serde_json::Value, reqwest::Error> {
983        let mut data = HashMap::new();
984
985        if let Ok(params) = serde_json::to_value(params) {
986            if let Some(ref p) = params.as_object() {
987                data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
988            }
989        }
990
991        data.insert("url".into(), serde_json::Value::String(url.to_string()));
992
993        let res = self.api_post("links", data, content_type).await?;
994        parse_response(res).await
995    }
996
997    /// Takes a screenshot of a URL.
998    ///
999    /// # Arguments
1000    ///
1001    /// * `url` - The URL to take a screenshot of.
1002    /// * `params` - Optional request parameters.
1003    /// * `stream` - Whether streaming is enabled.
1004    /// * `content_type` - The content type of the request.
1005    ///
1006    /// # Returns
1007    ///
1008    /// The response from the API as a JSON value.
1009    pub async fn screenshot(
1010        &self,
1011        url: &str,
1012        params: Option<RequestParams>,
1013        _stream: bool,
1014        content_type: &str,
1015    ) -> Result<serde_json::Value, reqwest::Error> {
1016        let mut data = HashMap::new();
1017
1018        if let Ok(params) = serde_json::to_value(params) {
1019            if let Some(ref p) = params.as_object() {
1020                data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
1021            }
1022        }
1023
1024        data.insert("url".into(), serde_json::Value::String(url.to_string()));
1025
1026        let res = self.api_post("screenshot", data, content_type).await?;
1027        parse_response(res).await
1028    }
1029
1030    /// Searches for a query.
1031    ///
1032    /// # Arguments
1033    ///
1034    /// * `q` - The query to search for.
1035    /// * `params` - Optional request parameters.
1036    /// * `stream` - Whether streaming is enabled.
1037    /// * `content_type` - The content type of the request.
1038    ///
1039    /// # Returns
1040    ///
1041    /// The response from the API as a JSON value.
1042    pub async fn search(
1043        &self,
1044        q: &str,
1045        params: Option<SearchRequestParams>,
1046        _stream: bool,
1047        content_type: &str,
1048    ) -> Result<serde_json::Value, reqwest::Error> {
1049        let body = match params {
1050            Some(mut params) => {
1051                params.search = q.to_string();
1052                params
1053            }
1054            _ => {
1055                let mut params = SearchRequestParams::default();
1056                params.search = q.to_string();
1057                params
1058            }
1059        };
1060
1061        let res = self.api_post("search", body, content_type).await?;
1062
1063        parse_response(res).await
1064    }
1065
1066    /// Transforms data.
1067    ///
1068    /// # Arguments
1069    ///
1070    /// * `data` - The data to transform.
1071    /// * `params` - Optional request parameters.
1072    /// * `stream` - Whether streaming is enabled.
1073    /// * `content_type` - The content type of the request.
1074    ///
1075    /// # Returns
1076    ///
1077    /// The response from the API as a JSON value.
1078    pub async fn transform(
1079        &self,
1080        data: Vec<HashMap<&str, &str>>,
1081        params: Option<TransformParams>,
1082        _stream: bool,
1083        content_type: &str,
1084    ) -> Result<serde_json::Value, reqwest::Error> {
1085        let mut payload = HashMap::new();
1086
1087        if let Ok(params) = serde_json::to_value(params) {
1088            if let Some(ref p) = params.as_object() {
1089                payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
1090            }
1091        }
1092
1093        if let Ok(d) = serde_json::to_value(data) {
1094            payload.insert("data".into(), d);
1095        }
1096
1097        let res = self.api_post("transform", payload, content_type).await?;
1098
1099        parse_response(res).await
1100    }
1101
1102    /// Extracts contacts from a URL.
1103    ///
1104    /// # Arguments
1105    ///
1106    /// * `url` - The URL to extract contacts from.
1107    /// * `params` - Optional request parameters.
1108    /// * `stream` - Whether streaming is enabled.
1109    /// * `content_type` - The content type of the request.
1110    ///
1111    /// # Returns
1112    ///
1113    /// The response from the API as a JSON value.
1114    pub async fn extract_contacts(
1115        &self,
1116        url: &str,
1117        params: Option<RequestParams>,
1118        _stream: bool,
1119        content_type: &str,
1120    ) -> Result<serde_json::Value, reqwest::Error> {
1121        let mut data = HashMap::new();
1122
1123        if let Ok(params) = serde_json::to_value(params) {
1124            if let Ok(params) = serde_json::to_value(params) {
1125                if let Some(ref p) = params.as_object() {
1126                    data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
1127                }
1128            }
1129        }
1130
1131        match serde_json::to_value(url) {
1132            Ok(u) => {
1133                data.insert("url".into(), u);
1134            }
1135            _ => (),
1136        }
1137
1138        let res = self
1139            .api_post("pipeline/extract-contacts", data, content_type)
1140            .await?;
1141
1142        parse_response(res).await
1143    }
1144
1145    /// Labels data from a URL.
1146    ///
1147    /// # Arguments
1148    ///
1149    /// * `url` - The URL to label data from.
1150    /// * `params` - Optional request parameters.
1151    /// * `stream` - Whether streaming is enabled.
1152    /// * `content_type` - The content type of the request.
1153    ///
1154    /// # Returns
1155    ///
1156    /// The response from the API as a JSON value.
1157    pub async fn label(
1158        &self,
1159        url: &str,
1160        params: Option<RequestParams>,
1161        _stream: bool,
1162        content_type: &str,
1163    ) -> Result<serde_json::Value, reqwest::Error> {
1164        let mut data = HashMap::new();
1165
1166        if let Ok(params) = serde_json::to_value(params) {
1167            if let Ok(params) = serde_json::to_value(params) {
1168                if let Some(ref p) = params.as_object() {
1169                    data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
1170                }
1171            }
1172        }
1173
1174        data.insert("url".into(), serde_json::Value::String(url.to_string()));
1175
1176        let res = self.api_post("pipeline/label", data, content_type).await?;
1177        parse_response(res).await
1178    }
1179
1180    /// Download a record from storage.
1181    ///
1182    /// # Arguments
1183    ///
1184    /// * `url` - Optional exact url of the file in storage.
1185    /// * `options` - Optional options.
1186    /// * `stream` - Whether streaming is enabled.
1187    ///
1188    /// # Returns
1189    ///
1190    /// The response from the API.
1191    pub async fn download(
1192        &self,
1193        url: Option<&str>,
1194        options: Option<HashMap<&str, i32>>,
1195    ) -> Result<reqwest::Response, reqwest::Error> {
1196        let mut params = HashMap::new();
1197
1198        if let Some(url) = url {
1199            params.insert("url".to_string(), url.to_string());
1200        }
1201
1202        if let Some(options) = options {
1203            for (key, value) in options {
1204                params.insert(key.to_string(), value.to_string());
1205            }
1206        }
1207
1208        let url = format!("{API_URL}/v1/data/download");
1209        let request = self
1210            .client
1211            .get(&url)
1212            .header(
1213                "User-Agent",
1214                format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
1215            )
1216            .header("Content-Type", "application/octet-stream")
1217            .header("Authorization", format!("Bearer {}", self.api_key))
1218            .query(&params);
1219
1220        let res = request.send().await?;
1221
1222        Ok(res)
1223    }
1224
1225    /// Creates a signed URL of a file from storage.
1226    ///
1227    /// # Arguments
1228    ///
1229    /// * `url` - Optional exact url of the file in storage.
1230    /// * `options` - Optional options.
1231    /// * `stream` - Whether streaming is enabled.
1232    ///
1233    /// # Returns
1234    ///
1235    /// The response from the API.
1236    pub async fn create_signed_url(
1237        &self,
1238        url: Option<&str>,
1239        options: Option<HashMap<&str, i32>>,
1240    ) -> Result<serde_json::Value, reqwest::Error> {
1241        let mut params = HashMap::new();
1242
1243        if let Some(options) = options {
1244            for (key, value) in options {
1245                params.insert(key.to_string(), value.to_string());
1246            }
1247        }
1248
1249        if let Some(url) = url {
1250            params.insert("url".to_string(), url.to_string());
1251        }
1252
1253        let url = format!("{API_URL}/v1/data/sign-url");
1254        let request = self
1255            .client
1256            .get(&url)
1257            .header(
1258                "User-Agent",
1259                format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
1260            )
1261            .header("Authorization", format!("Bearer {}", self.api_key))
1262            .query(&params);
1263
1264        let res = request.send().await?;
1265
1266        parse_response(res).await
1267    }
1268
1269    /// Gets the crawl state of a URL.
1270    ///
1271    /// # Arguments
1272    ///
1273    /// * `url` - The URL to get the crawl state of.
1274    /// * `params` - Optional request parameters.
1275    /// * `stream` - Whether streaming is enabled.
1276    /// * `content_type` - The content type of the request.
1277    ///
1278    /// # Returns
1279    ///
1280    pub async fn get_crawl_state(
1281        &self,
1282        url: &str,
1283        params: Option<RequestParams>,
1284        content_type: &str,
1285    ) -> Result<serde_json::Value, reqwest::Error> {
1286        let mut payload = HashMap::new();
1287        payload.insert("url".into(), serde_json::Value::String(url.to_string()));
1288        payload.insert(
1289            "contentType".into(),
1290            serde_json::Value::String(content_type.to_string()),
1291        );
1292
1293        if let Ok(params) = serde_json::to_value(params) {
1294            if let Ok(params) = serde_json::to_value(params) {
1295                if let Some(ref p) = params.as_object() {
1296                    payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
1297                }
1298            }
1299        }
1300
1301        let res = self
1302            .api_post("data/crawl_state", payload, content_type)
1303            .await?;
1304        parse_response(res).await
1305    }
1306
1307    /// Get the account credits left.
1308    pub async fn get_credits(&self) -> Result<serde_json::Value, reqwest::Error> {
1309        self.api_get::<serde_json::Value>("data/credits", None)
1310            .await
1311    }
1312
1313    /// Send a request for a data record.
1314    pub async fn data_post(
1315        &self,
1316        table: &str,
1317        data: Option<RequestParams>,
1318    ) -> Result<serde_json::Value, reqwest::Error> {
1319        let res = self
1320            .api_post(&format!("data/{}", table), data, "application/json")
1321            .await?;
1322        parse_response(res).await
1323    }
1324
1325    /// Query a record from the global DB.
1326    pub async fn query(&self, params: &QueryRequest) -> Result<serde_json::Value, reqwest::Error> {
1327        let res = self
1328            .api_get::<QueryRequest>(&"data/query", Some(params))
1329            .await?;
1330
1331        Ok(res)
1332    }
1333
1334    /// Get a table record.
1335    pub async fn data_get(
1336        &self,
1337        table: &str,
1338        params: Option<RequestParams>,
1339    ) -> Result<serde_json::Value, reqwest::Error> {
1340        let mut payload = HashMap::new();
1341
1342        if let Some(params) = params {
1343            if let Ok(p) = serde_json::to_value(params) {
1344                if let Some(o) = p.as_object() {
1345                    payload.extend(o.iter().map(|(k, v)| (k.as_str(), v.clone())));
1346                }
1347            }
1348        }
1349
1350        let res = self
1351            .api_get::<serde_json::Value>(&format!("data/{}", table), None)
1352            .await?;
1353        Ok(res)
1354    }
1355
1356    /// Delete a record.
1357    pub async fn data_delete(
1358        &self,
1359        table: &str,
1360        params: Option<RequestParams>,
1361    ) -> Result<serde_json::Value, reqwest::Error> {
1362        let mut payload = HashMap::new();
1363
1364        if let Ok(params) = serde_json::to_value(params) {
1365            if let Ok(params) = serde_json::to_value(params) {
1366                if let Some(ref p) = params.as_object() {
1367                    payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
1368                }
1369            }
1370        }
1371
1372        let res = self
1373            .api_delete(&format!("data/{}", table), Some(payload))
1374            .await?;
1375        parse_response(res).await
1376    }
1377}
1378
1379#[cfg(test)]
1380mod tests {
1381    use super::*;
1382    use dotenv::dotenv;
1383    use lazy_static::lazy_static;
1384    use reqwest::ClientBuilder;
1385
1386    lazy_static! {
1387        static ref SPIDER_CLIENT: Spider = {
1388            dotenv().ok();
1389            let client = ClientBuilder::new();
1390            let client = client.user_agent("SpiderBot").build().unwrap();
1391
1392            Spider::new_with_client(None, client).expect("client to build")
1393        };
1394    }
1395
1396    #[tokio::test]
1397    #[ignore]
1398    async fn test_scrape_url() {
1399        let response = SPIDER_CLIENT
1400            .scrape_url("https://example.com", None, "application/json")
1401            .await;
1402        assert!(response.is_ok());
1403    }
1404
1405    #[tokio::test]
1406    async fn test_crawl_url() {
1407        let response = SPIDER_CLIENT
1408            .crawl_url(
1409                "https://example.com",
1410                None,
1411                false,
1412                "application/json",
1413                None::<fn(serde_json::Value)>,
1414            )
1415            .await;
1416        assert!(response.is_ok());
1417    }
1418
1419    #[tokio::test]
1420    #[ignore]
1421    async fn test_links() {
1422        let response: Result<serde_json::Value, Error> = SPIDER_CLIENT
1423            .links("https://example.com", None, false, "application/json")
1424            .await;
1425        assert!(response.is_ok());
1426    }
1427
1428    #[tokio::test]
1429    #[ignore]
1430    async fn test_screenshot() {
1431        let mut params = RequestParams::default();
1432        params.limit = Some(1);
1433
1434        let response = SPIDER_CLIENT
1435            .screenshot(
1436                "https://example.com",
1437                Some(params),
1438                false,
1439                "application/json",
1440            )
1441            .await;
1442        assert!(response.is_ok());
1443    }
1444
1445    // #[tokio::test(flavor = "multi_thread")]
1446    // async fn test_search() {
1447    //     let mut params = SearchRequestParams::default();
1448
1449    //     params.search_limit = Some(1);
1450    //     params.num = Some(1);
1451    //     params.fetch_page_content = Some(false);
1452
1453    //     let response = SPIDER_CLIENT
1454    //         .search("a sports website", Some(params), false, "application/json")
1455    //         .await;
1456
1457    //     assert!(response.is_ok());
1458    // }
1459
1460    #[tokio::test]
1461    #[ignore]
1462    async fn test_transform() {
1463        let data = vec![HashMap::from([(
1464            "<html><body><h1>Transformation</h1></body></html>".into(),
1465            "".into(),
1466        )])];
1467        let response = SPIDER_CLIENT
1468            .transform(data, None, false, "application/json")
1469            .await;
1470        assert!(response.is_ok());
1471    }
1472
1473    #[tokio::test]
1474    #[ignore]
1475    async fn test_extract_contacts() {
1476        let response = SPIDER_CLIENT
1477            .extract_contacts("https://example.com", None, false, "application/json")
1478            .await;
1479        assert!(response.is_ok());
1480    }
1481
1482    #[tokio::test]
1483    #[ignore]
1484    async fn test_label() {
1485        let response = SPIDER_CLIENT
1486            .label("https://example.com", None, false, "application/json")
1487            .await;
1488        assert!(response.is_ok());
1489    }
1490
1491    #[tokio::test]
1492    async fn test_create_signed_url() {
1493        let response = SPIDER_CLIENT
1494            .create_signed_url(Some("example.com"), None)
1495            .await;
1496        assert!(response.is_ok());
1497    }
1498
1499    #[tokio::test]
1500    async fn test_get_crawl_state() {
1501        let response = SPIDER_CLIENT
1502            .get_crawl_state("https://example.com", None, "application/json")
1503            .await;
1504        assert!(response.is_ok());
1505    }
1506
1507    #[tokio::test]
1508    async fn test_query() {
1509        let mut query = QueryRequest::default();
1510
1511        query.domain = Some("spider.cloud".into());
1512
1513        let response = SPIDER_CLIENT.query(&query).await;
1514        assert!(response.is_ok());
1515    }
1516
1517    #[tokio::test]
1518    async fn test_get_credits() {
1519        let response = SPIDER_CLIENT.get_credits().await;
1520        assert!(response.is_ok());
1521    }
1522}