spider_client/
lib.rs

1//! The `spider-client` module provides the primary interface and
2//! functionalities for the Spider web crawler library, which is
3//! designed for rapid and efficient crawling of web pages to gather
4//! links using isolated contexts.
5//!
6//! ### Features
7//!
8//! - **Multi-threaded Crawling:** Spider can utilize multiple
9//!   threads to parallelize the crawling process, drastically
10//!   improving performance and allowing the ability to gather
11//!   millions of pages in a short time.
12//!
13//! - **Configurable:** The library provides various options to
14//!   configure the crawling behavior, such as setting the depth
15//!   of crawling, user-agent strings, delays between requests,
16//!   and more.
17//!
18//! - **Link Gathering:** One of the primary objectives of Spider is to
19//!   gather and manage links from the web pages it crawls,
20//!   compiling them into a structured format for further use.
21//!
22//! ### Examples
23//!
24//! Basic usage of the Spider client might look like this:
25//!
26//! ```rust
27//! use spider_client::{Spider, RequestType, RequestParams};
28//! use tokio;
29//!
30//!  # #[ignore]
31//! #[tokio::main]
32//! async fn main() {
33//!     let spider = Spider::new(Some("myspiderapikey".into())).expect("API key must be provided");
34//!
35//!     let url = "https://spider.cloud";
36//!
37//!     // Scrape a single URL
38//!     let scraped_data = spider.scrape_url(url, None, "application/json").await.expect("Failed to scrape the URL");
39//!
40//!     println!("Scraped Data: {:?}", scraped_data);
41//!
42//!     // Crawl a website
43//!     let crawler_params = RequestParams {
44//!         limit: Some(1),
45//!         proxy_enabled: Some(true),
46//!         store_data: Some(false),
47//!         metadata: Some(false),
48//!         request: Some(RequestType::Http),
49//!         ..Default::default()
50//!     };
51//!
52//!     let crawl_result = spider.crawl_url(url, Some(crawler_params), false, "application/json", None::<fn(serde_json::Value)>).await.expect("Failed to crawl the URL");
53//!
54//!     println!("Crawl Result: {:?}", crawl_result);
55//! }
56//! ```
57//!
58//! ### Modules
59//!
60//! - `config`: Contains the configuration options for the Spider client.
61//! - `utils`: Utility functions used by the Spider client.
62//!
63
64use backon::ExponentialBuilder;
65use backon::Retryable;
66use reqwest::Client;
67use reqwest::{Error, Response};
68use serde::{Deserialize, Serialize};
69use std::collections::HashMap;
70use tokio_stream::StreamExt;
71
72/// Structure representing the Chunking algorithm dictionary.
73#[derive(Debug, Deserialize, Serialize, Clone)]
74pub struct ChunkingAlgDict {
75    /// The chunking algorithm to use, defined as a specific type.
76    r#type: ChunkingType,
77    /// The amount to chunk by.
78    value: i32,
79}
80
81// The nested structures
82#[derive(Serialize, Deserialize, Debug, Clone)]
83pub struct Timeout {
84    /// The seconds up to 60.
85    pub secs: u64,
86    /// The nanoseconds.
87    pub nanos: u32,
88}
89
90#[derive(Serialize, Deserialize, Debug, Clone)]
91pub struct IdleNetwork {
92    /// The timeout to wait until.
93    pub timeout: Timeout,
94}
95
96#[derive(Serialize, Deserialize, Debug, Clone)]
97#[serde(tag = "type", rename_all = "PascalCase")]
98pub enum WebAutomation {
99    Evaluate { code: String },
100    Click { selector: String },
101    Wait { duration: u64 },
102    WaitForNavigation,
103    WaitFor { selector: String },
104    WaitForAndClick { selector: String },
105    ScrollX { pixels: i32 },
106    ScrollY { pixels: i32 },
107    Fill { selector: String, value: String },
108    InfiniteScroll { times: u32 },
109}
110
111#[derive(Default, Serialize, Deserialize, Debug, Clone)]
112#[serde(tag = "type", rename_all = "PascalCase")]
113pub enum RedirectPolicy {
114    Loose,
115    #[default]
116    Strict,
117}
118
119pub type WebAutomationMap = std::collections::HashMap<String, Vec<WebAutomation>>;
120pub type ExecutionScriptsMap = std::collections::HashMap<String, String>;
121
122#[derive(Serialize, Deserialize, Debug, Clone)]
123pub struct Selector {
124    /// The timeout to wait until.
125    pub timeout: Timeout,
126    /// The selector to wait for.
127    pub selector: String,
128}
129
130#[derive(Serialize, Deserialize, Debug, Clone)]
131pub struct Delay {
132    /// The timeout to wait until.
133    pub timeout: Timeout,
134}
135
136#[derive(Serialize, Deserialize, Debug, Clone)]
137pub struct WaitFor {
138    /// Wait until idle networks with a timeout of idleness.
139    pub idle_network: Option<IdleNetwork>,
140    /// Wait until a selector exist. Can determine if a selector exist after executing all js and network events.
141    pub selector: Option<Selector>,
142    /// Wait until a hard delay.
143    pub delay: Option<Delay>,
144    /// Wait until page navigation happen. Default is true.
145    pub page_navigations: Option<bool>,
146}
147
148/// Query request to get a document.
149#[derive(Serialize, Deserialize, Debug, Clone, Default)]
150pub struct QueryRequest {
151    /// The exact website url.
152    pub url: Option<String>,
153    /// The website domain.
154    pub domain: Option<String>,
155    /// The path of the resource.
156    pub pathname: Option<String>,
157}
158
159/// Enum representing different types of Chunking.
160#[derive(Default, Debug, Deserialize, Serialize, Clone)]
161#[serde(rename_all = "lowercase")]
162pub enum ChunkingType {
163    #[default]
164    /// By the word count.
165    ByWords,
166    /// By the line count.
167    ByLines,
168    /// By the char length.
169    ByCharacterLength,
170    /// By sentence.
171    BySentence,
172}
173
174#[derive(Default, Debug, Deserialize, Serialize, Clone)]
175/// View port handling for chrome.
176pub struct Viewport {
177    /// Device screen Width
178    pub width: u32,
179    /// Device screen size
180    pub height: u32,
181    /// Device scale factor
182    pub device_scale_factor: Option<f64>,
183    /// Emulating Mobile?
184    pub emulating_mobile: bool,
185    /// Use landscape mode instead of portrait.
186    pub is_landscape: bool,
187    /// Touch screen device?
188    pub has_touch: bool,
189}
190
191/// The API url.
192const API_URL: &'static str = "https://api.spider.cloud";
193
194// Define the CSSSelector struct
195#[derive(Debug, Clone, Default, Deserialize, Serialize)]
196pub struct CSSSelector {
197    /// The name of the selector group
198    pub name: String,
199    /// A vector of CSS selectors
200    pub selectors: Vec<String>,
201}
202
203// Define the CSSExtractionMap type
204pub type CSSExtractionMap = HashMap<String, Vec<CSSSelector>>;
205
206/// Represents the settings for a webhook configuration
207#[derive(Debug, Default, Deserialize, Serialize, Clone)]
208pub struct WebhookSettings {
209    /// The destination where the webhook information will be sent
210    destination: String,
211    /// Trigger an action when all credits are depleted
212    on_credits_depleted: bool,
213    /// Trigger an action when half of the credits are depleted
214    on_credits_half_depleted: bool,
215    /// Trigger an action on a website status update event
216    on_website_status: bool,
217    /// Send information about a new page find (such as links and bytes)
218    on_find: bool,
219    /// Handle the metadata of a found page
220    on_find_metadata: bool,
221}
222
223/// Proxy pool selection for outbound request routing.
224/// Choose a pool based on your use case (e.g., stealth, speed, or stability).
225///
226/// - 'residential'         → cost-effective entry-level residential pool
227/// - 'residential_fast'    → faster residential pool for higher throughput
228/// - 'residential_static'  → static residential IPs, rotated daily
229/// - 'residential_premium' → low-latency premium IPs
230/// - 'residential_core'    → balanced plan (quality vs. cost)
231/// - 'residential_plus'    → largest and highest quality core pool
232/// - 'mobile'              → 4G/5G mobile proxies for maximum evasion
233/// - 'isp'                 → ISP-grade datacenters
234#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
235pub enum ProxyType {
236    /// Cost-effective entry-level residential pool.
237    #[serde(rename = "residential")]
238    Residential,
239    /// Higher-throughput residential pool for better performance.
240    #[serde(rename = "residential_fast")]
241    ResidentialFast,
242    /// Static residential IPs, rotated daily for session persistence.
243    #[serde(rename = "residential_static")]
244    ResidentialStatic,
245    /// 4G / 5G mobile proxies for maximum stealth and evasion.
246    #[serde(rename = "mobile")]
247    Mobile,
248    /// ISP-grade residential routing (alias: `datacenter`).
249    #[serde(rename = "isp", alias = "datacenter")]
250    #[default]
251    Isp,
252    /// Premium low-latency residential proxy pool.
253    #[serde(rename = "residential_premium")]
254    ResidentialPremium,
255    /// Core residential plan optimized for balance between cost and quality.
256    #[serde(rename = "residential_core")]
257    ResidentialCore,
258    /// Extended core residential pool with the largest, highest-quality IPs.
259    #[serde(rename = "residential_plus")]
260    ResidentialPlus,
261}
262
263/// Send multiple return formats.
264#[derive(Debug, Deserialize, Serialize, Clone)]
265#[serde(untagged)]
266pub enum ReturnFormatHandling {
267    /// A single return item.
268    Single(ReturnFormat),
269    /// Multiple return formats.
270    Multi(std::collections::HashSet<ReturnFormat>),
271}
272
273impl Default for ReturnFormatHandling {
274    fn default() -> ReturnFormatHandling {
275        ReturnFormatHandling::Single(ReturnFormat::Raw)
276    }
277}
278
279#[derive(Debug, Default, Deserialize, Serialize, Clone)]
280pub struct EventTracker {
281    /// The responses received.
282    responses: Option<bool>,
283    ///The request sent.
284    requests: Option<bool>,
285}
286
287/// Structure representing request parameters.
288#[derive(Debug, Default, Deserialize, Serialize, Clone)]
289pub struct RequestParams {
290    #[serde(default)]
291    /// The URL to be crawled.
292    pub url: Option<String>,
293    #[serde(default)]
294    /// The type of request to be made.
295    pub request: Option<RequestType>,
296    #[serde(default)]
297    /// The maximum number of pages the crawler should visit.
298    pub limit: Option<u32>,
299    #[serde(default)]
300    /// The format in which the result should be returned.
301    pub return_format: Option<ReturnFormatHandling>,
302    #[serde(default)]
303    /// Specifies whether to only visit the top-level domain.
304    pub tld: Option<bool>,
305    #[serde(default)]
306    /// The depth of the crawl.
307    pub depth: Option<u32>,
308    #[serde(default)]
309    /// Specifies whether the request should be cached.
310    pub cache: Option<bool>,
311    #[serde(default)]
312    /// Perform an infinite scroll on the page as new content arises. The request param also needs to be set to 'chrome' or 'smart'.
313    pub scroll: Option<u32>,
314    #[serde(default)]
315    /// The budget for various resources.
316    pub budget: Option<HashMap<String, u32>>,
317    #[serde(default)]
318    /// The blacklist routes to ignore. This can be a Regex string pattern.
319    pub blacklist: Option<Vec<String>>,
320    #[serde(default)]
321    /// The whitelist routes to only crawl. This can be a Regex string pattern and used with black_listing.
322    pub whitelist: Option<Vec<String>>,
323    #[serde(default)]
324    /// The locale to be used during the crawl.
325    pub locale: Option<String>,
326    #[serde(default)]
327    /// The cookies to be set for the request, formatted as a single string.
328    pub cookies: Option<String>,
329    #[serde(default)]
330    /// Specifies whether to use stealth techniques to avoid detection.
331    pub stealth: Option<bool>,
332    #[serde(default)]
333    /// The headers to be used for the request.
334    pub headers: Option<HashMap<String, String>>,
335    #[serde(default)]
336    /// Specifies whether anti-bot measures should be used.
337    pub anti_bot: Option<bool>,
338    #[serde(default)]
339    /// Specifies whether to send data via webhooks.
340    pub webhooks: Option<WebhookSettings>,
341    #[serde(default)]
342    /// Specifies whether to include metadata in the response.
343    pub metadata: Option<bool>,
344    #[serde(default)]
345    /// The dimensions of the viewport.
346    pub viewport: Option<Viewport>,
347    #[serde(default)]
348    /// The encoding to be used for the request.
349    pub encoding: Option<String>,
350    #[serde(default)]
351    /// Specifies whether to include subdomains in the crawl.
352    pub subdomains: Option<bool>,
353    #[serde(default)]
354    /// The user agent string to be used for the request.
355    pub user_agent: Option<String>,
356    #[serde(default)]
357    /// Specifies whether the response data should be stored.
358    pub store_data: Option<bool>,
359    #[serde(default)]
360    /// Configuration settings for GPT (general purpose texture mappings).
361    pub gpt_config: Option<HashMap<String, String>>,
362    #[serde(default)]
363    /// Specifies whether to use fingerprinting protection.
364    pub fingerprint: Option<bool>,
365    #[serde(default)]
366    /// Specifies whether to perform the request without using storage.
367    pub storageless: Option<bool>,
368    #[serde(default)]
369    /// Specifies whether readability optimizations should be applied.
370    pub readability: Option<bool>,
371    #[serde(default)]
372    /// Specifies whether to use a proxy for the request. [Deprecated]: use the 'proxy' param instead.
373    pub proxy_enabled: Option<bool>,
374    #[serde(default)]
375    /// Specifies whether to respect the site's robots.txt file.
376    pub respect_robots: Option<bool>,
377    #[serde(default)]
378    /// CSS selector to be used to filter the content.
379    pub root_selector: Option<String>,
380    #[serde(default)]
381    /// Specifies whether to load all resources of the crawl target.
382    pub full_resources: Option<bool>,
383    #[serde(default)]
384    /// The text string to extract data from.
385    pub text: Option<String>,
386    #[serde(default)]
387    /// Specifies whether to use the sitemap links.
388    pub sitemap: Option<bool>,
389    #[serde(default)]
390    /// External domains to include the crawl.
391    pub external_domains: Option<Vec<String>>,
392    #[serde(default)]
393    /// Returns the OpenAI embeddings for the title and description. Other values, such as keywords, may also be included. Requires the `metadata` parameter to be set to `true`.
394    pub return_embeddings: Option<bool>,
395    #[serde(default)]
396    /// Returns the HTTP response headers.
397    pub return_headers: Option<bool>,
398    #[serde(default)]
399    /// Returns the link(s) found on the page that match the crawler query.
400    pub return_page_links: Option<bool>,
401    #[serde(default)]
402    /// Returns the HTTP response cookies.
403    pub return_cookies: Option<bool>,
404    #[serde(default)]
405    /// The timeout for the request, in milliseconds.
406    pub request_timeout: Option<u8>,
407    #[serde(default)]
408    /// Specifies whether to run the request in the background.
409    pub run_in_background: Option<bool>,
410    #[serde(default)]
411    /// Specifies whether to skip configuration checks.
412    pub skip_config_checks: Option<bool>,
413    #[serde(default)]
414    /// Use CSS query selectors to scrape contents from the web page. Set the paths and the CSS extraction object map to perform extractions per path or page.
415    pub css_extraction_map: Option<CSSExtractionMap>,
416    #[serde(default)]
417    /// The chunking algorithm to use.
418    pub chunking_alg: Option<ChunkingAlgDict>,
419    #[serde(default)]
420    /// Disable request interception when running 'request' as 'chrome' or 'smart'. This can help when the page uses 3rd party or external scripts to load content.
421    pub disable_intercept: Option<bool>,
422    #[serde(default)]
423    /// The wait for events on the page. You need to make your `request` `chrome` or `smart`.
424    pub wait_for: Option<WaitFor>,
425    #[serde(default)]
426    /// Perform custom Javascript tasks on a url or url path. You need to make your `request` `chrome` or `smart`
427    pub execution_scripts: Option<ExecutionScriptsMap>,
428    #[serde(default)]
429    /// Perform web automated tasks on a url or url path. You need to make your `request` `chrome` or `smart`
430    pub automation_scripts: Option<WebAutomationMap>,
431    #[serde(default)]
432    /// The redirect policy for HTTP request. Set the value to Loose to allow all.
433    pub redirect_policy: Option<RedirectPolicy>,
434    #[serde(default)]
435    /// Track the request sent and responses received for `chrome` or `smart`. The responses will track the bytes used and the requests will have the monotime sent.
436    pub event_tracker: Option<EventTracker>,
437    #[serde(default)]
438    /// The timeout to stop the crawl.
439    pub crawl_timeout: Option<Timeout>,
440    #[serde(default)]
441    /// Evaluates given script in every frame upon creation (before loading frame's scripts).
442    pub evaluate_on_new_document: Option<Box<String>>,
443    #[serde(default)]
444    /// Runs the request using lite_mode:Lite mode reduces data transfer costs by 70%, with trade-offs in speed, accuracy,
445    /// geo-targeting, and reliability. It’s best suited for non-urgent data collection or when
446    /// targeting websites with minimal anti-bot protections.
447    pub lite_mode: Option<bool>,
448    #[serde(default)]
449    /// The proxy to use for request.
450    pub proxy: Option<ProxyType>,
451    #[serde(default)]
452    /// Use a remote proxy at ~70% reduced cost for file downloads.
453    /// This requires a user-supplied static IP proxy endpoint.
454    pub remote_proxy: Option<String>,
455    #[serde(default)]
456    /// Set the maximum number of credits to use per page.
457    /// Credits are measured in decimal units, where 10,000 credits equal one dollar (100 credits per penny).
458    /// Credit limiting only applies to request that are Javascript rendered using smart_mode or chrome for the 'request' type.
459    pub max_credits_per_page: Option<f64>,
460}
461
462/// The structure representing request parameters for a search request.
463#[derive(Debug, Default, Deserialize, Serialize, Clone)]
464pub struct SearchRequestParams {
465    /// The base request parameters.
466    #[serde(default, flatten)]
467    pub base: RequestParams,
468    // The search request.
469    pub search: String,
470    /// The search limit.
471    pub search_limit: Option<u32>,
472    // Fetch the page content. Defaults to true.
473    pub fetch_page_content: Option<bool>,
474    /// The search location of the request
475    pub location: Option<String>,
476    /// The country code of the request
477    pub country: Option<String>,
478    /// The language code of the request.
479    pub language: Option<String>,
480    /// The number of search results
481    pub num: Option<u32>,
482    /// The page of the search results.
483    pub page: Option<u32>,
484    #[serde(default)]
485    /// The websites limit if a list is sent from text or urls comma split. This helps automatic configuration of the system.
486    pub website_limit: Option<u32>,
487}
488
489/// Structure representing request parameters for transforming files.
490#[derive(Debug, Default, Deserialize, Serialize, Clone)]
491pub struct TransformParams {
492    #[serde(default)]
493    /// The format in which the result should be returned.
494    pub return_format: Option<ReturnFormat>,
495    #[serde(default)]
496    /// Specifies whether readability optimizations should be applied.
497    pub readability: Option<bool>,
498    #[serde(default)]
499    /// Clean the markdown or text for AI.
500    pub clean: Option<bool>,
501    #[serde(default)]
502    /// Clean the markdown or text for AI removing footers, navigation, and more.
503    pub clean_full: Option<bool>,
504    /// The data being transformed.
505    pub data: Vec<DataParam>,
506}
507
508#[derive(Serialize, Deserialize, Debug, Clone)]
509pub struct DataParam {
510    /// The HTML resource.
511    pub html: String,
512    /// The website url.
513    pub url: Option<String>,
514}
515
516/// the request type to perform
517#[derive(Debug, Default, Clone, Serialize, Deserialize, PartialEq, Eq)]
518#[serde(rename_all = "lowercase")]
519pub enum RequestType {
520    /// Default HTTP request
521    Http,
522    /// Chrome browser rendering
523    Chrome,
524    #[default]
525    /// Smart mode defaulting to HTTP and using Chrome when needed.
526    SmartMode,
527}
528
529/// Enum representing different return formats.
530#[derive(Default, Debug, Deserialize, Serialize, Clone, PartialEq, Eq, Hash)]
531#[serde(rename_all = "lowercase")]
532pub enum ReturnFormat {
533    #[default]
534    /// The default return format of the resource.
535    Raw,
536    /// Return the response as Markdown.
537    Markdown,
538    /// Return the response as Commonmark.
539    Commonmark,
540    /// Return the response as Html2text.
541    Html2text,
542    /// Return the response as Text.
543    Text,
544    /// Return the response as XML.
545    Xml,
546    /// Return the response as Bytes.
547    Bytes,
548}
549
550/// Represents a Spider with API key and HTTP client.
551#[derive(Debug, Default)]
552pub struct Spider {
553    /// The Spider API key.
554    pub api_key: String,
555    /// The Spider Client to re-use.
556    pub client: Client,
557}
558
559/// Handle the json response.
560pub async fn handle_json(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
561    res.json().await
562}
563
564/// Handle the jsonl response.
565pub async fn handle_jsonl(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
566    let text = res.text().await?;
567    let lines = text
568        .lines()
569        .filter_map(|line| serde_json::from_str::<serde_json::Value>(line).ok())
570        .collect::<Vec<_>>();
571    Ok(serde_json::Value::Array(lines))
572}
573
574/// Handle the CSV response.
575#[cfg(feature = "csv")]
576pub async fn handle_csv(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
577    use std::collections::HashMap;
578    let text = res.text().await?;
579    let mut rdr = csv::Reader::from_reader(text.as_bytes());
580    let records: Vec<HashMap<String, String>> = rdr.deserialize().filter_map(Result::ok).collect();
581
582    if let Ok(record) = serde_json::to_value(records) {
583        Ok(record)
584    } else {
585        Ok(serde_json::Value::String(text))
586    }
587}
588
589#[cfg(not(feature = "csv"))]
590pub async fn handle_csv(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
591    handle_text(res).await
592}
593
594/// Basic handle response to text
595pub async fn handle_text(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
596    Ok(serde_json::Value::String(
597        res.text().await.unwrap_or_default(),
598    ))
599}
600
601/// Handle the XML response.
602#[cfg(feature = "csv")]
603pub async fn handle_xml(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
604    let text = res.text().await?;
605    match quick_xml::de::from_str::<serde_json::Value>(&text) {
606        Ok(val) => Ok(val),
607        Err(_) => Ok(serde_json::Value::String(text)),
608    }
609}
610
611#[cfg(not(feature = "csv"))]
612/// Handle the XML response.
613pub async fn handle_xml(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
614    handle_text(res).await
615}
616
617pub async fn parse_response(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
618    let content_type = res
619        .headers()
620        .get(reqwest::header::CONTENT_TYPE)
621        .and_then(|v| v.to_str().ok())
622        .unwrap_or_default()
623        .to_ascii_lowercase();
624
625    if content_type.contains("json") && !content_type.contains("jsonl") {
626        handle_json(res).await
627    } else if content_type.contains("jsonl") || content_type.contains("ndjson") {
628        handle_jsonl(res).await
629    } else if content_type.contains("csv") {
630        handle_csv(res).await
631    } else if content_type.contains("xml") {
632        handle_xml(res).await
633    } else {
634        handle_text(res).await
635    }
636}
637
638impl Spider {
639    /// Creates a new instance of Spider.
640    ///
641    /// # Arguments
642    ///
643    /// * `api_key` - An optional API key. Defaults to using the 'SPIDER_API_KEY' env variable.
644    ///
645    /// # Returns
646    ///
647    /// A new instance of Spider or an error string if no API key is provided.
648    pub fn new(api_key: Option<String>) -> Result<Self, &'static str> {
649        let api_key = api_key.or_else(|| std::env::var("SPIDER_API_KEY").ok());
650
651        match api_key {
652            Some(key) => Ok(Self {
653                api_key: key,
654                client: Client::new(),
655            }),
656            None => Err("No API key provided"),
657        }
658    }
659
660    /// Creates a new instance of Spider.
661    ///
662    /// # Arguments
663    ///
664    /// * `api_key` - An optional API key. Defaults to using the 'SPIDER_API_KEY' env variable.
665    /// * `client` - A custom client to pass in.
666    ///
667    /// # Returns
668    ///
669    /// A new instance of Spider or an error string if no API key is provided.
670    pub fn new_with_client(api_key: Option<String>, client: Client) -> Result<Self, &'static str> {
671        let api_key = api_key.or_else(|| std::env::var("SPIDER_API_KEY").ok());
672
673        match api_key {
674            Some(key) => Ok(Self {
675                api_key: key,
676                client,
677            }),
678            None => Err("No API key provided"),
679        }
680    }
681
682    /// Sends a POST request to the API.
683    ///
684    /// # Arguments
685    ///
686    /// * `endpoint` - The API endpoint.
687    /// * `data` - The request data as a HashMap.
688    /// * `stream` - Whether streaming is enabled.
689    /// * `content_type` - The content type of the request.
690    ///
691    /// # Returns
692    ///
693    /// The response from the API.
694    async fn api_post_base(
695        &self,
696        endpoint: &str,
697        data: impl Serialize + Sized + std::fmt::Debug,
698        content_type: &str,
699    ) -> Result<Response, Error> {
700        let url: String = format!("{API_URL}/{}", endpoint);
701
702        self.client
703            .post(&url)
704            .header(
705                "User-Agent",
706                format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
707            )
708            .header("Content-Type", content_type)
709            .header("Authorization", format!("Bearer {}", self.api_key))
710            .json(&data)
711            .send()
712            .await
713    }
714
715    /// Sends a POST request to the API.
716    ///
717    /// # Arguments
718    ///
719    /// * `endpoint` - The API endpoint.
720    /// * `data` - The request data as a HashMap.
721    /// * `stream` - Whether streaming is enabled.
722    /// * `content_type` - The content type of the request.
723    ///
724    /// # Returns
725    ///
726    /// The response from the API.
727    async fn api_post(
728        &self,
729        endpoint: &str,
730        data: impl Serialize + std::fmt::Debug + Clone + Send + Sync,
731        content_type: &str,
732    ) -> Result<Response, Error> {
733        let fetch = || async {
734            self.api_post_base(endpoint, data.to_owned(), content_type)
735                .await
736        };
737
738        fetch
739            .retry(ExponentialBuilder::default().with_max_times(5))
740            .when(|err: &reqwest::Error| {
741                if let Some(status) = err.status() {
742                    status.is_server_error()
743                } else {
744                    err.is_timeout()
745                }
746            })
747            .await
748    }
749
750    /// Sends a GET request to the API.
751    ///
752    /// # Arguments
753    ///
754    /// * `endpoint` - The API endpoint.
755    ///
756    /// # Returns
757    ///
758    /// The response from the API as a JSON value.
759    async fn api_get_base<T: Serialize>(
760        &self,
761        endpoint: &str,
762        query_params: Option<&T>,
763    ) -> Result<serde_json::Value, reqwest::Error> {
764        let url = format!("{API_URL}/{}", endpoint);
765        let res = self
766            .client
767            .get(&url)
768            .query(&query_params)
769            .header(
770                "User-Agent",
771                format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
772            )
773            .header("Content-Type", "application/json")
774            .header("Authorization", format!("Bearer {}", self.api_key))
775            .send()
776            .await?;
777        parse_response(res).await
778    }
779
780    /// Sends a GET request to the API.
781    ///
782    /// # Arguments
783    ///
784    /// * `endpoint` - The API endpoint.
785    ///
786    /// # Returns
787    ///
788    /// The response from the API as a JSON value.
789    async fn api_get<T: Serialize>(
790        &self,
791        endpoint: &str,
792        query_params: Option<&T>,
793    ) -> Result<serde_json::Value, reqwest::Error> {
794        let fetch = || async { self.api_get_base(endpoint, query_params.to_owned()).await };
795
796        fetch
797            .retry(ExponentialBuilder::default().with_max_times(5))
798            .when(|err: &reqwest::Error| {
799                if let Some(status) = err.status() {
800                    status.is_server_error()
801                } else {
802                    err.is_timeout()
803                }
804            })
805            .await
806    }
807
808    /// Sends a DELETE request to the API.
809    ///
810    /// # Arguments
811    ///
812    /// * `endpoint` - The API endpoint.
813    /// * `params` - Optional request parameters.
814    /// * `stream` - Whether streaming is enabled.
815    /// * `content_type` - The content type of the request.
816    ///
817    /// # Returns
818    ///
819    /// The response from the API.
820    async fn api_delete_base(
821        &self,
822        endpoint: &str,
823        params: Option<HashMap<String, serde_json::Value>>,
824    ) -> Result<Response, Error> {
825        let url = format!("{API_URL}/v1/{}", endpoint);
826        let request_builder = self
827            .client
828            .delete(&url)
829            .header(
830                "User-Agent",
831                format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
832            )
833            .header("Content-Type", "application/json")
834            .header("Authorization", format!("Bearer {}", self.api_key));
835
836        let request_builder = if let Some(params) = params {
837            request_builder.json(&params)
838        } else {
839            request_builder
840        };
841
842        request_builder.send().await
843    }
844
845    /// Sends a DELETE request to the API.
846    ///
847    /// # Arguments
848    ///
849    /// * `endpoint` - The API endpoint.
850    /// * `params` - Optional request parameters.
851    /// * `stream` - Whether streaming is enabled.
852    /// * `content_type` - The content type of the request.
853    ///
854    /// # Returns
855    ///
856    /// The response from the API.
857    async fn api_delete(
858        &self,
859        endpoint: &str,
860        params: Option<HashMap<String, serde_json::Value>>,
861    ) -> Result<Response, Error> {
862        let fetch = || async { self.api_delete_base(endpoint, params.to_owned()).await };
863
864        fetch
865            .retry(ExponentialBuilder::default().with_max_times(5))
866            .when(|err: &reqwest::Error| {
867                if let Some(status) = err.status() {
868                    status.is_server_error()
869                } else {
870                    err.is_timeout()
871                }
872            })
873            .await
874    }
875
876    /// Scrapes a URL.
877    ///
878    /// # Arguments
879    ///
880    /// * `url` - The URL to scrape.
881    /// * `params` - Optional request parameters.
882    /// * `stream` - Whether streaming is enabled.
883    /// * `content_type` - The content type of the request.
884    ///
885    /// # Returns
886    ///
887    /// The response from the API as a JSON value.
888    pub async fn scrape_url(
889        &self,
890        url: &str,
891        params: Option<RequestParams>,
892        content_type: &str,
893    ) -> Result<serde_json::Value, reqwest::Error> {
894        let mut data = HashMap::new();
895
896        data.insert(
897            "url".to_string(),
898            serde_json::Value::String(url.to_string()),
899        );
900        data.insert("limit".to_string(), serde_json::Value::Number(1.into()));
901
902        if let Ok(params) = serde_json::to_value(params) {
903            if let Some(ref p) = params.as_object() {
904                data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
905            }
906        }
907
908        let res = self.api_post("crawl", data, content_type).await?;
909        parse_response(res).await
910    }
911
912    /// Crawls a URL.
913    ///
914    /// # Arguments
915    ///
916    /// * `url` - The URL to crawl.
917    /// * `params` - Optional request parameters.
918    /// * `stream` - Whether streaming is enabled.
919    /// * `content_type` - The content type of the request.
920    /// * `callback` - Optional callback function to handle each streamed chunk.
921    ///
922    /// # Returns
923    ///
924    /// The response from the API as a JSON value.
925    pub async fn crawl_url(
926        &self,
927        url: &str,
928        params: Option<RequestParams>,
929        stream: bool,
930        content_type: &str,
931        callback: Option<impl Fn(serde_json::Value) + Send>,
932    ) -> Result<serde_json::Value, reqwest::Error> {
933        use tokio_util::codec::{FramedRead, LinesCodec};
934
935        let mut data = HashMap::new();
936
937        if let Ok(params) = serde_json::to_value(params) {
938            if let Some(ref p) = params.as_object() {
939                data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
940            }
941        }
942
943        data.insert("url".into(), serde_json::Value::String(url.to_string()));
944
945        let res = self.api_post("crawl", data, content_type).await?;
946
947        if stream {
948            if let Some(callback) = callback {
949                let stream = res.bytes_stream();
950
951                let stream_reader = tokio_util::io::StreamReader::new(
952                    stream
953                        .map(|r| r.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))),
954                );
955
956                let mut lines = FramedRead::new(stream_reader, LinesCodec::new());
957
958                while let Some(line_result) = lines.next().await {
959                    match line_result {
960                        Ok(line) => match serde_json::from_str::<serde_json::Value>(&line) {
961                            Ok(value) => {
962                                callback(value);
963                            }
964                            Err(_e) => {
965                                continue;
966                            }
967                        },
968                        Err(_e) => return Ok(serde_json::Value::Null),
969                    }
970                }
971
972                Ok(serde_json::Value::Null)
973            } else {
974                Ok(serde_json::Value::Null)
975            }
976        } else {
977            parse_response(res).await
978        }
979    }
980
981    /// Fetches links from a URL.
982    ///
983    /// # Arguments
984    ///
985    /// * `url` - The URL to fetch links from.
986    /// * `params` - Optional request parameters.
987    /// * `stream` - Whether streaming is enabled.
988    /// * `content_type` - The content type of the request.
989    ///
990    /// # Returns
991    ///
992    /// The response from the API as a JSON value.
993    pub async fn links(
994        &self,
995        url: &str,
996        params: Option<RequestParams>,
997        _stream: bool,
998        content_type: &str,
999    ) -> Result<serde_json::Value, reqwest::Error> {
1000        let mut data = HashMap::new();
1001
1002        if let Ok(params) = serde_json::to_value(params) {
1003            if let Some(ref p) = params.as_object() {
1004                data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
1005            }
1006        }
1007
1008        data.insert("url".into(), serde_json::Value::String(url.to_string()));
1009
1010        let res = self.api_post("links", data, content_type).await?;
1011        parse_response(res).await
1012    }
1013
1014    /// Takes a screenshot of a URL.
1015    ///
1016    /// # Arguments
1017    ///
1018    /// * `url` - The URL to take a screenshot of.
1019    /// * `params` - Optional request parameters.
1020    /// * `stream` - Whether streaming is enabled.
1021    /// * `content_type` - The content type of the request.
1022    ///
1023    /// # Returns
1024    ///
1025    /// The response from the API as a JSON value.
1026    pub async fn screenshot(
1027        &self,
1028        url: &str,
1029        params: Option<RequestParams>,
1030        _stream: bool,
1031        content_type: &str,
1032    ) -> Result<serde_json::Value, reqwest::Error> {
1033        let mut data = HashMap::new();
1034
1035        if let Ok(params) = serde_json::to_value(params) {
1036            if let Some(ref p) = params.as_object() {
1037                data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
1038            }
1039        }
1040
1041        data.insert("url".into(), serde_json::Value::String(url.to_string()));
1042
1043        let res = self.api_post("screenshot", data, content_type).await?;
1044        parse_response(res).await
1045    }
1046
1047    /// Searches for a query.
1048    ///
1049    /// # Arguments
1050    ///
1051    /// * `q` - The query to search for.
1052    /// * `params` - Optional request parameters.
1053    /// * `stream` - Whether streaming is enabled.
1054    /// * `content_type` - The content type of the request.
1055    ///
1056    /// # Returns
1057    ///
1058    /// The response from the API as a JSON value.
1059    pub async fn search(
1060        &self,
1061        q: &str,
1062        params: Option<SearchRequestParams>,
1063        _stream: bool,
1064        content_type: &str,
1065    ) -> Result<serde_json::Value, reqwest::Error> {
1066        let body = match params {
1067            Some(mut params) => {
1068                params.search = q.to_string();
1069                params
1070            }
1071            _ => {
1072                let mut params = SearchRequestParams::default();
1073                params.search = q.to_string();
1074                params
1075            }
1076        };
1077
1078        let res = self.api_post("search", body, content_type).await?;
1079
1080        parse_response(res).await
1081    }
1082
1083    /// Transforms data.
1084    ///
1085    /// # Arguments
1086    ///
1087    /// * `data` - The data to transform.
1088    /// * `params` - Optional request parameters.
1089    /// * `stream` - Whether streaming is enabled.
1090    /// * `content_type` - The content type of the request.
1091    ///
1092    /// # Returns
1093    ///
1094    /// The response from the API as a JSON value.
1095    pub async fn transform(
1096        &self,
1097        data: Vec<HashMap<&str, &str>>,
1098        params: Option<TransformParams>,
1099        _stream: bool,
1100        content_type: &str,
1101    ) -> Result<serde_json::Value, reqwest::Error> {
1102        let mut payload = HashMap::new();
1103
1104        if let Ok(params) = serde_json::to_value(params) {
1105            if let Some(ref p) = params.as_object() {
1106                payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
1107            }
1108        }
1109
1110        if let Ok(d) = serde_json::to_value(data) {
1111            payload.insert("data".into(), d);
1112        }
1113
1114        let res = self.api_post("transform", payload, content_type).await?;
1115
1116        parse_response(res).await
1117    }
1118
1119    /// Extracts contacts from a URL.
1120    ///
1121    /// # Arguments
1122    ///
1123    /// * `url` - The URL to extract contacts from.
1124    /// * `params` - Optional request parameters.
1125    /// * `stream` - Whether streaming is enabled.
1126    /// * `content_type` - The content type of the request.
1127    ///
1128    /// # Returns
1129    ///
1130    /// The response from the API as a JSON value.
1131    pub async fn extract_contacts(
1132        &self,
1133        url: &str,
1134        params: Option<RequestParams>,
1135        _stream: bool,
1136        content_type: &str,
1137    ) -> Result<serde_json::Value, reqwest::Error> {
1138        let mut data = HashMap::new();
1139
1140        if let Ok(params) = serde_json::to_value(params) {
1141            if let Ok(params) = serde_json::to_value(params) {
1142                if let Some(ref p) = params.as_object() {
1143                    data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
1144                }
1145            }
1146        }
1147
1148        match serde_json::to_value(url) {
1149            Ok(u) => {
1150                data.insert("url".into(), u);
1151            }
1152            _ => (),
1153        }
1154
1155        let res = self
1156            .api_post("pipeline/extract-contacts", data, content_type)
1157            .await?;
1158
1159        parse_response(res).await
1160    }
1161
1162    /// Labels data from a URL.
1163    ///
1164    /// # Arguments
1165    ///
1166    /// * `url` - The URL to label data from.
1167    /// * `params` - Optional request parameters.
1168    /// * `stream` - Whether streaming is enabled.
1169    /// * `content_type` - The content type of the request.
1170    ///
1171    /// # Returns
1172    ///
1173    /// The response from the API as a JSON value.
1174    pub async fn label(
1175        &self,
1176        url: &str,
1177        params: Option<RequestParams>,
1178        _stream: bool,
1179        content_type: &str,
1180    ) -> Result<serde_json::Value, reqwest::Error> {
1181        let mut data = HashMap::new();
1182
1183        if let Ok(params) = serde_json::to_value(params) {
1184            if let Ok(params) = serde_json::to_value(params) {
1185                if let Some(ref p) = params.as_object() {
1186                    data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
1187                }
1188            }
1189        }
1190
1191        data.insert("url".into(), serde_json::Value::String(url.to_string()));
1192
1193        let res = self.api_post("pipeline/label", data, content_type).await?;
1194        parse_response(res).await
1195    }
1196
1197    /// Download a record from storage.
1198    ///
1199    /// # Arguments
1200    ///
1201    /// * `url` - Optional exact url of the file in storage.
1202    /// * `options` - Optional options.
1203    /// * `stream` - Whether streaming is enabled.
1204    ///
1205    /// # Returns
1206    ///
1207    /// The response from the API.
1208    pub async fn download(
1209        &self,
1210        url: Option<&str>,
1211        options: Option<HashMap<&str, i32>>,
1212    ) -> Result<reqwest::Response, reqwest::Error> {
1213        let mut params = HashMap::new();
1214
1215        if let Some(url) = url {
1216            params.insert("url".to_string(), url.to_string());
1217        }
1218
1219        if let Some(options) = options {
1220            for (key, value) in options {
1221                params.insert(key.to_string(), value.to_string());
1222            }
1223        }
1224
1225        let url = format!("{API_URL}/v1/data/download");
1226        let request = self
1227            .client
1228            .get(&url)
1229            .header(
1230                "User-Agent",
1231                format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
1232            )
1233            .header("Content-Type", "application/octet-stream")
1234            .header("Authorization", format!("Bearer {}", self.api_key))
1235            .query(&params);
1236
1237        let res = request.send().await?;
1238
1239        Ok(res)
1240    }
1241
1242    /// Creates a signed URL of a file from storage.
1243    ///
1244    /// # Arguments
1245    ///
1246    /// * `url` - Optional exact url of the file in storage.
1247    /// * `options` - Optional options.
1248    /// * `stream` - Whether streaming is enabled.
1249    ///
1250    /// # Returns
1251    ///
1252    /// The response from the API.
1253    pub async fn create_signed_url(
1254        &self,
1255        url: Option<&str>,
1256        options: Option<HashMap<&str, i32>>,
1257    ) -> Result<serde_json::Value, reqwest::Error> {
1258        let mut params = HashMap::new();
1259
1260        if let Some(options) = options {
1261            for (key, value) in options {
1262                params.insert(key.to_string(), value.to_string());
1263            }
1264        }
1265
1266        if let Some(url) = url {
1267            params.insert("url".to_string(), url.to_string());
1268        }
1269
1270        let url = format!("{API_URL}/v1/data/sign-url");
1271        let request = self
1272            .client
1273            .get(&url)
1274            .header(
1275                "User-Agent",
1276                format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
1277            )
1278            .header("Authorization", format!("Bearer {}", self.api_key))
1279            .query(&params);
1280
1281        let res = request.send().await?;
1282
1283        parse_response(res).await
1284    }
1285
1286    /// Gets the crawl state of a URL.
1287    ///
1288    /// # Arguments
1289    ///
1290    /// * `url` - The URL to get the crawl state of.
1291    /// * `params` - Optional request parameters.
1292    /// * `stream` - Whether streaming is enabled.
1293    /// * `content_type` - The content type of the request.
1294    ///
1295    /// # Returns
1296    ///
1297    pub async fn get_crawl_state(
1298        &self,
1299        url: &str,
1300        params: Option<RequestParams>,
1301        content_type: &str,
1302    ) -> Result<serde_json::Value, reqwest::Error> {
1303        let mut payload = HashMap::new();
1304        payload.insert("url".into(), serde_json::Value::String(url.to_string()));
1305        payload.insert(
1306            "contentType".into(),
1307            serde_json::Value::String(content_type.to_string()),
1308        );
1309
1310        if let Ok(params) = serde_json::to_value(params) {
1311            if let Ok(params) = serde_json::to_value(params) {
1312                if let Some(ref p) = params.as_object() {
1313                    payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
1314                }
1315            }
1316        }
1317
1318        let res = self
1319            .api_post("data/crawl_state", payload, content_type)
1320            .await?;
1321        parse_response(res).await
1322    }
1323
1324    /// Get the account credits left.
1325    pub async fn get_credits(&self) -> Result<serde_json::Value, reqwest::Error> {
1326        self.api_get::<serde_json::Value>("data/credits", None)
1327            .await
1328    }
1329
1330    /// Send a request for a data record.
1331    pub async fn data_post(
1332        &self,
1333        table: &str,
1334        data: Option<RequestParams>,
1335    ) -> Result<serde_json::Value, reqwest::Error> {
1336        let res = self
1337            .api_post(&format!("data/{}", table), data, "application/json")
1338            .await?;
1339        parse_response(res).await
1340    }
1341
1342    /// Query a record from the global DB.
1343    pub async fn query(&self, params: &QueryRequest) -> Result<serde_json::Value, reqwest::Error> {
1344        let res = self
1345            .api_get::<QueryRequest>(&"data/query", Some(params))
1346            .await?;
1347
1348        Ok(res)
1349    }
1350
1351    /// Get a table record.
1352    pub async fn data_get(
1353        &self,
1354        table: &str,
1355        params: Option<RequestParams>,
1356    ) -> Result<serde_json::Value, reqwest::Error> {
1357        let mut payload = HashMap::new();
1358
1359        if let Some(params) = params {
1360            if let Ok(p) = serde_json::to_value(params) {
1361                if let Some(o) = p.as_object() {
1362                    payload.extend(o.iter().map(|(k, v)| (k.as_str(), v.clone())));
1363                }
1364            }
1365        }
1366
1367        let res = self
1368            .api_get::<serde_json::Value>(&format!("data/{}", table), None)
1369            .await?;
1370        Ok(res)
1371    }
1372
1373    /// Delete a record.
1374    pub async fn data_delete(
1375        &self,
1376        table: &str,
1377        params: Option<RequestParams>,
1378    ) -> Result<serde_json::Value, reqwest::Error> {
1379        let mut payload = HashMap::new();
1380
1381        if let Ok(params) = serde_json::to_value(params) {
1382            if let Ok(params) = serde_json::to_value(params) {
1383                if let Some(ref p) = params.as_object() {
1384                    payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
1385                }
1386            }
1387        }
1388
1389        let res = self
1390            .api_delete(&format!("data/{}", table), Some(payload))
1391            .await?;
1392        parse_response(res).await
1393    }
1394}
1395
1396#[cfg(test)]
1397mod tests {
1398    use super::*;
1399    use dotenv::dotenv;
1400    use lazy_static::lazy_static;
1401    use reqwest::ClientBuilder;
1402
1403    lazy_static! {
1404        static ref SPIDER_CLIENT: Spider = {
1405            dotenv().ok();
1406            let client = ClientBuilder::new();
1407            let client = client.user_agent("SpiderBot").build().unwrap();
1408
1409            Spider::new_with_client(None, client).expect("client to build")
1410        };
1411    }
1412
1413    #[tokio::test]
1414    #[ignore]
1415    async fn test_scrape_url() {
1416        let response = SPIDER_CLIENT
1417            .scrape_url("https://example.com", None, "application/json")
1418            .await;
1419        assert!(response.is_ok());
1420    }
1421
1422    #[tokio::test]
1423    async fn test_crawl_url() {
1424        let response = SPIDER_CLIENT
1425            .crawl_url(
1426                "https://example.com",
1427                None,
1428                false,
1429                "application/json",
1430                None::<fn(serde_json::Value)>,
1431            )
1432            .await;
1433        assert!(response.is_ok());
1434    }
1435
1436    #[tokio::test]
1437    #[ignore]
1438    async fn test_links() {
1439        let response: Result<serde_json::Value, Error> = SPIDER_CLIENT
1440            .links("https://example.com", None, false, "application/json")
1441            .await;
1442        assert!(response.is_ok());
1443    }
1444
1445    #[tokio::test]
1446    #[ignore]
1447    async fn test_screenshot() {
1448        let mut params = RequestParams::default();
1449        params.limit = Some(1);
1450
1451        let response = SPIDER_CLIENT
1452            .screenshot(
1453                "https://example.com",
1454                Some(params),
1455                false,
1456                "application/json",
1457            )
1458            .await;
1459        assert!(response.is_ok());
1460    }
1461
1462    // #[tokio::test(flavor = "multi_thread")]
1463    // async fn test_search() {
1464    //     let mut params = SearchRequestParams::default();
1465
1466    //     params.search_limit = Some(1);
1467    //     params.num = Some(1);
1468    //     params.fetch_page_content = Some(false);
1469
1470    //     let response = SPIDER_CLIENT
1471    //         .search("a sports website", Some(params), false, "application/json")
1472    //         .await;
1473
1474    //     assert!(response.is_ok());
1475    // }
1476
1477    #[tokio::test]
1478    #[ignore]
1479    async fn test_transform() {
1480        let data = vec![HashMap::from([(
1481            "<html><body><h1>Transformation</h1></body></html>".into(),
1482            "".into(),
1483        )])];
1484        let response = SPIDER_CLIENT
1485            .transform(data, None, false, "application/json")
1486            .await;
1487        assert!(response.is_ok());
1488    }
1489
1490    #[tokio::test]
1491    #[ignore]
1492    async fn test_extract_contacts() {
1493        let response = SPIDER_CLIENT
1494            .extract_contacts("https://example.com", None, false, "application/json")
1495            .await;
1496        assert!(response.is_ok());
1497    }
1498
1499    #[tokio::test]
1500    #[ignore]
1501    async fn test_label() {
1502        let response = SPIDER_CLIENT
1503            .label("https://example.com", None, false, "application/json")
1504            .await;
1505        assert!(response.is_ok());
1506    }
1507
1508    #[tokio::test]
1509    async fn test_create_signed_url() {
1510        let response = SPIDER_CLIENT
1511            .create_signed_url(Some("example.com"), None)
1512            .await;
1513        assert!(response.is_ok());
1514    }
1515
1516    #[tokio::test]
1517    async fn test_get_crawl_state() {
1518        let response = SPIDER_CLIENT
1519            .get_crawl_state("https://example.com", None, "application/json")
1520            .await;
1521        assert!(response.is_ok());
1522    }
1523
1524    #[tokio::test]
1525    async fn test_query() {
1526        let mut query = QueryRequest::default();
1527
1528        query.domain = Some("spider.cloud".into());
1529
1530        let response = SPIDER_CLIENT.query(&query).await;
1531        assert!(response.is_ok());
1532    }
1533
1534    #[tokio::test]
1535    async fn test_get_credits() {
1536        let response = SPIDER_CLIENT.get_credits().await;
1537        assert!(response.is_ok());
1538    }
1539}
spider_client/lib.rs

spider_client/
lib.rs