1use backon::ExponentialBuilder;
65use backon::Retryable;
66use reqwest::Client;
67use reqwest::{Error, Response};
68use serde::{Deserialize, Serialize};
69use serde_json::Value;
70use std::collections::HashMap;
71use tokio_stream::StreamExt;
72
73#[derive(Debug, Deserialize, Serialize, Clone)]
75pub struct ChunkingAlgDict {
76 r#type: ChunkingType,
78 value: i32,
80}
81
82#[derive(Serialize, Deserialize, Debug, Clone)]
84pub struct Timeout {
85 pub secs: u64,
87 pub nanos: u32,
89}
90
91#[derive(Serialize, Deserialize, Debug, Clone)]
92pub struct IdleNetwork {
93 pub timeout: Timeout,
95}
96
97#[derive(Serialize, Deserialize, Debug, Clone)]
98#[serde(tag = "type", rename_all = "PascalCase")]
99pub enum WebAutomation {
100 Evaluate { code: String },
101 Click { selector: String },
102 Wait { duration: u64 },
103 WaitForNavigation,
104 WaitFor { selector: String },
105 WaitForAndClick { selector: String },
106 ScrollX { pixels: i32 },
107 ScrollY { pixels: i32 },
108 Fill { selector: String, value: String },
109 InfiniteScroll { times: u32 },
110}
111
112#[derive(Default, Serialize, Deserialize, Debug, Clone)]
113#[serde(tag = "type", rename_all = "PascalCase")]
114pub enum RedirectPolicy {
115 Loose,
116 #[default]
117 Strict,
118}
119
120pub type WebAutomationMap = std::collections::HashMap<String, Vec<WebAutomation>>;
121pub type ExecutionScriptsMap = std::collections::HashMap<String, String>;
122
123#[derive(Serialize, Deserialize, Debug, Clone)]
124pub struct Selector {
125 pub timeout: Timeout,
127 pub selector: String,
129}
130
131#[derive(Serialize, Deserialize, Debug, Clone)]
132pub struct Delay {
133 pub timeout: Timeout,
135}
136
137#[derive(Serialize, Deserialize, Debug, Clone)]
138pub struct WaitFor {
139 pub idle_network: Option<IdleNetwork>,
141 pub selector: Option<Selector>,
143 pub delay: Option<Delay>,
145 pub page_navigations: Option<bool>,
147}
148
149#[derive(Serialize, Deserialize, Debug, Clone, Default)]
151pub struct QueryRequest {
152 pub url: Option<String>,
154 pub domain: Option<String>,
156 pub pathname: Option<String>,
158}
159
160#[derive(Default, Debug, Deserialize, Serialize, Clone)]
162#[serde(rename_all = "lowercase")]
163pub enum ChunkingType {
164 #[default]
165 ByWords,
167 ByLines,
169 ByCharacterLength,
171 BySentence,
173}
174
175#[derive(Default, Debug, Deserialize, Serialize, Clone)]
176pub struct Viewport {
178 pub width: u32,
180 pub height: u32,
182 pub device_scale_factor: Option<f64>,
184 pub emulating_mobile: bool,
186 pub is_landscape: bool,
188 pub has_touch: bool,
190}
191
192const API_URL: &'static str = "https://api.spider.cloud";
194
195#[derive(Debug, Clone, Default, Deserialize, Serialize)]
197pub struct CSSSelector {
198 pub name: String,
200 pub selectors: Vec<String>,
202}
203
204pub type CSSExtractionMap = HashMap<String, Vec<CSSSelector>>;
206
207#[derive(Debug, Default, Deserialize, Serialize, Clone)]
209pub struct WebhookSettings {
210 destination: String,
212 on_credits_depleted: bool,
214 on_credits_half_depleted: bool,
216 on_website_status: bool,
218 on_find: bool,
220 on_find_metadata: bool,
222}
223
224#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
236pub enum ProxyType {
237 #[serde(rename = "residential")]
239 Residential,
240 #[serde(rename = "residential_fast")]
242 ResidentialFast,
243 #[serde(rename = "residential_static")]
245 ResidentialStatic,
246 #[serde(rename = "mobile")]
248 Mobile,
249 #[serde(rename = "isp", alias = "datacenter")]
251 #[default]
252 Isp,
253 #[serde(rename = "residential_premium")]
255 ResidentialPremium,
256 #[serde(rename = "residential_core")]
258 ResidentialCore,
259 #[serde(rename = "residential_plus")]
261 ResidentialPlus,
262}
263
264#[derive(Debug, Deserialize, Serialize, Clone)]
266#[serde(untagged)]
267pub enum ReturnFormatHandling {
268 Single(ReturnFormat),
270 Multi(std::collections::HashSet<ReturnFormat>),
272}
273
274impl Default for ReturnFormatHandling {
275 fn default() -> ReturnFormatHandling {
276 ReturnFormatHandling::Single(ReturnFormat::Raw)
277 }
278}
279
280#[derive(Debug, Default, Deserialize, Serialize, Clone)]
281pub struct EventTracker {
282 responses: Option<bool>,
284 requests: Option<bool>,
286}
287
288#[derive(Debug, Default, Deserialize, Serialize, Clone)]
290pub struct RequestParams {
291 #[serde(default)]
292 pub url: Option<String>,
294 #[serde(default)]
295 pub request: Option<RequestType>,
297 #[serde(default)]
298 pub limit: Option<u32>,
300 #[serde(default)]
301 pub return_format: Option<ReturnFormatHandling>,
303 #[serde(default)]
304 pub tld: Option<bool>,
306 #[serde(default)]
307 pub depth: Option<u32>,
309 #[serde(default)]
310 pub cache: Option<bool>,
312 #[serde(default)]
313 pub scroll: Option<u32>,
315 #[serde(default)]
316 pub budget: Option<HashMap<String, u32>>,
318 #[serde(default)]
319 pub blacklist: Option<Vec<String>>,
321 #[serde(default)]
322 pub whitelist: Option<Vec<String>>,
324 #[serde(default)]
325 pub locale: Option<String>,
327 #[serde(default)]
328 pub cookies: Option<String>,
330 #[serde(default)]
331 pub stealth: Option<bool>,
333 #[serde(default)]
334 pub headers: Option<HashMap<String, String>>,
336 #[serde(default)]
337 pub anti_bot: Option<bool>,
339 #[serde(default)]
340 pub webhooks: Option<WebhookSettings>,
342 #[serde(default)]
343 pub metadata: Option<bool>,
345 #[serde(default)]
346 pub viewport: Option<Viewport>,
348 #[serde(default)]
349 pub encoding: Option<String>,
351 #[serde(default)]
352 pub subdomains: Option<bool>,
354 #[serde(default)]
355 pub user_agent: Option<String>,
357 #[serde(default)]
358 pub store_data: Option<bool>,
360 #[serde(default)]
361 pub gpt_config: Option<HashMap<String, String>>,
363 #[serde(default)]
364 pub fingerprint: Option<bool>,
366 #[serde(default)]
367 pub storageless: Option<bool>,
369 #[serde(default)]
370 pub readability: Option<bool>,
372 #[serde(default)]
373 pub proxy_enabled: Option<bool>,
375 #[serde(default)]
376 pub respect_robots: Option<bool>,
378 #[serde(default)]
379 pub root_selector: Option<String>,
381 #[serde(default)]
382 pub full_resources: Option<bool>,
384 #[serde(default)]
385 pub text: Option<String>,
387 #[serde(default)]
388 pub sitemap: Option<bool>,
390 #[serde(default)]
391 pub external_domains: Option<Vec<String>>,
393 #[serde(default)]
394 pub return_embeddings: Option<bool>,
396 #[serde(default)]
397 pub return_headers: Option<bool>,
399 #[serde(default)]
400 pub return_page_links: Option<bool>,
402 #[serde(default)]
403 pub return_cookies: Option<bool>,
405 #[serde(default)]
406 pub request_timeout: Option<u8>,
408 #[serde(default)]
409 pub run_in_background: Option<bool>,
411 #[serde(default)]
412 pub skip_config_checks: Option<bool>,
414 #[serde(default)]
415 pub css_extraction_map: Option<CSSExtractionMap>,
417 #[serde(default)]
418 pub chunking_alg: Option<ChunkingAlgDict>,
420 #[serde(default)]
421 pub disable_intercept: Option<bool>,
423 #[serde(default)]
424 pub wait_for: Option<WaitFor>,
426 #[serde(default)]
427 pub execution_scripts: Option<ExecutionScriptsMap>,
429 #[serde(default)]
430 pub automation_scripts: Option<WebAutomationMap>,
432 #[serde(default)]
433 pub redirect_policy: Option<RedirectPolicy>,
435 #[serde(default)]
436 pub event_tracker: Option<EventTracker>,
438 #[serde(default)]
439 pub crawl_timeout: Option<Timeout>,
441 #[serde(default)]
442 pub evaluate_on_new_document: Option<Box<String>>,
444 #[serde(default)]
445 pub lite_mode: Option<bool>,
449 #[serde(default)]
450 pub proxy: Option<ProxyType>,
452 #[serde(default)]
453 pub remote_proxy: Option<String>,
456 #[serde(default)]
457 pub max_credits_per_page: Option<f64>,
461}
462
463#[derive(Debug, Default, Deserialize, Serialize, Clone)]
465pub struct SearchRequestParams {
466 #[serde(default, flatten)]
468 pub base: RequestParams,
469 pub search: String,
471 pub search_limit: Option<u32>,
473 pub fetch_page_content: Option<bool>,
475 pub location: Option<String>,
477 pub country: Option<String>,
479 pub language: Option<String>,
481 pub num: Option<u32>,
483 pub page: Option<u32>,
485 #[serde(default)]
486 pub website_limit: Option<u32>,
488}
489
490#[derive(Debug, Default, Deserialize, Serialize, Clone)]
492pub struct TransformParams {
493 #[serde(default)]
494 pub return_format: Option<ReturnFormat>,
496 #[serde(default)]
497 pub readability: Option<bool>,
499 #[serde(default)]
500 pub clean: Option<bool>,
502 #[serde(default)]
503 pub clean_full: Option<bool>,
505 pub data: Vec<DataParam>,
507}
508
509#[derive(Serialize, Deserialize, Debug, Clone)]
510pub struct DataParam {
511 pub html: String,
513 pub url: Option<String>,
515}
516
517#[derive(Debug, Default, Clone, Serialize, Deserialize, PartialEq, Eq)]
519#[serde(rename_all = "lowercase")]
520pub enum RequestType {
521 Http,
523 Chrome,
525 #[default]
526 SmartMode,
528}
529
530#[derive(Default, Debug, Deserialize, Serialize, Clone, PartialEq, Eq, Hash)]
532#[serde(rename_all = "lowercase")]
533pub enum ReturnFormat {
534 #[default]
535 Raw,
537 Markdown,
539 Commonmark,
541 Html2text,
543 Text,
545 Xml,
547 Bytes,
549}
550
551#[derive(Debug, Default)]
553pub struct Spider {
554 pub api_key: String,
556 pub client: Client,
558}
559
560pub async fn handle_json(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
562 res.json().await
563}
564
565pub async fn handle_jsonl(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
567 let text = res.text().await?;
568 let lines = text
569 .lines()
570 .filter_map(|line| serde_json::from_str::<serde_json::Value>(line).ok())
571 .collect::<Vec<_>>();
572 Ok(serde_json::Value::Array(lines))
573}
574
575pub async fn handle_csv(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
577 use std::collections::HashMap;
578 let text = res.text().await?;
579 let mut rdr = csv::Reader::from_reader(text.as_bytes());
580 let records: Vec<HashMap<String, String>> = rdr.deserialize().filter_map(Result::ok).collect();
581
582 if let Ok(record) = serde_json::to_value(records) {
583 Ok(record)
584 } else {
585 Ok(serde_json::Value::String(text))
586 }
587}
588
589pub async fn handle_xml(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
591 let text = res.text().await?;
592 match quick_xml::de::from_str::<serde_json::Value>(&text) {
593 Ok(val) => Ok(val),
594 Err(_) => Ok(serde_json::Value::String(text)),
595 }
596}
597
598pub async fn parse_response(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
599 let content_type = res
600 .headers()
601 .get(reqwest::header::CONTENT_TYPE)
602 .and_then(|v| v.to_str().ok())
603 .unwrap_or_default()
604 .to_ascii_lowercase();
605
606 if content_type.contains("json") && !content_type.contains("jsonl") {
607 handle_json(res).await
608 } else if content_type.contains("jsonl") || content_type.contains("ndjson") {
609 handle_jsonl(res).await
610 } else if content_type.contains("csv") {
611 handle_csv(res).await
612 } else if content_type.contains("xml") {
613 handle_xml(res).await
614 } else {
615 Ok(serde_json::Value::String(
616 res.text().await.unwrap_or_default(),
617 ))
618 }
619}
620
621impl Spider {
622 pub fn new(api_key: Option<String>) -> Result<Self, &'static str> {
632 let api_key = api_key.or_else(|| std::env::var("SPIDER_API_KEY").ok());
633
634 match api_key {
635 Some(key) => Ok(Self {
636 api_key: key,
637 client: Client::new(),
638 }),
639 None => Err("No API key provided"),
640 }
641 }
642
643 pub fn new_with_client(api_key: Option<String>, client: Client) -> Result<Self, &'static str> {
654 let api_key = api_key.or_else(|| std::env::var("SPIDER_API_KEY").ok());
655
656 match api_key {
657 Some(key) => Ok(Self {
658 api_key: key,
659 client,
660 }),
661 None => Err("No API key provided"),
662 }
663 }
664
665 async fn api_post_base(
678 &self,
679 endpoint: &str,
680 data: impl Serialize + Sized + std::fmt::Debug,
681 content_type: &str,
682 ) -> Result<Response, Error> {
683 let url: String = format!("{API_URL}/{}", endpoint);
684
685 self.client
686 .post(&url)
687 .header(
688 "User-Agent",
689 format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
690 )
691 .header("Content-Type", content_type)
692 .header("Authorization", format!("Bearer {}", self.api_key))
693 .json(&data)
694 .send()
695 .await
696 }
697
698 async fn api_post(
711 &self,
712 endpoint: &str,
713 data: impl Serialize + std::fmt::Debug + Clone + Send + Sync,
714 content_type: &str,
715 ) -> Result<Response, Error> {
716 let fetch = || async {
717 self.api_post_base(endpoint, data.to_owned(), content_type)
718 .await
719 };
720
721 fetch
722 .retry(ExponentialBuilder::default().with_max_times(5))
723 .when(|err: &reqwest::Error| {
724 if let Some(status) = err.status() {
725 status.is_server_error()
726 } else {
727 err.is_timeout()
728 }
729 })
730 .await
731 }
732
733 async fn api_get_base<T: Serialize>(
743 &self,
744 endpoint: &str,
745 query_params: Option<&T>,
746 ) -> Result<serde_json::Value, reqwest::Error> {
747 let url = format!("{API_URL}/{}", endpoint);
748 let res = self
749 .client
750 .get(&url)
751 .query(&query_params)
752 .header(
753 "User-Agent",
754 format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
755 )
756 .header("Content-Type", "application/json")
757 .header("Authorization", format!("Bearer {}", self.api_key))
758 .send()
759 .await?;
760 parse_response(res).await
761 }
762
763 async fn api_get<T: Serialize>(
773 &self,
774 endpoint: &str,
775 query_params: Option<&T>,
776 ) -> Result<serde_json::Value, reqwest::Error> {
777 let fetch = || async { self.api_get_base(endpoint, query_params.to_owned()).await };
778
779 fetch
780 .retry(ExponentialBuilder::default().with_max_times(5))
781 .when(|err: &reqwest::Error| {
782 if let Some(status) = err.status() {
783 status.is_server_error()
784 } else {
785 err.is_timeout()
786 }
787 })
788 .await
789 }
790
791 async fn api_delete_base(
804 &self,
805 endpoint: &str,
806 params: Option<HashMap<String, serde_json::Value>>,
807 ) -> Result<Response, Error> {
808 let url = format!("{API_URL}/v1/{}", endpoint);
809 let request_builder = self
810 .client
811 .delete(&url)
812 .header(
813 "User-Agent",
814 format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
815 )
816 .header("Content-Type", "application/json")
817 .header("Authorization", format!("Bearer {}", self.api_key));
818
819 let request_builder = if let Some(params) = params {
820 request_builder.json(¶ms)
821 } else {
822 request_builder
823 };
824
825 request_builder.send().await
826 }
827
828 async fn api_delete(
841 &self,
842 endpoint: &str,
843 params: Option<HashMap<String, serde_json::Value>>,
844 ) -> Result<Response, Error> {
845 let fetch = || async { self.api_delete_base(endpoint, params.to_owned()).await };
846
847 fetch
848 .retry(ExponentialBuilder::default().with_max_times(5))
849 .when(|err: &reqwest::Error| {
850 if let Some(status) = err.status() {
851 status.is_server_error()
852 } else {
853 err.is_timeout()
854 }
855 })
856 .await
857 }
858
859 pub async fn scrape_url(
872 &self,
873 url: &str,
874 params: Option<RequestParams>,
875 content_type: &str,
876 ) -> Result<serde_json::Value, reqwest::Error> {
877 let mut data = HashMap::new();
878
879 data.insert(
880 "url".to_string(),
881 serde_json::Value::String(url.to_string()),
882 );
883 data.insert("limit".to_string(), serde_json::Value::Number(1.into()));
884
885 if let Ok(params) = serde_json::to_value(params) {
886 if let Some(ref p) = params.as_object() {
887 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
888 }
889 }
890
891 let res = self.api_post("crawl", data, content_type).await?;
892 parse_response(res).await
893 }
894
895 pub async fn crawl_url(
909 &self,
910 url: &str,
911 params: Option<RequestParams>,
912 stream: bool,
913 content_type: &str,
914 callback: Option<impl Fn(serde_json::Value) + Send>,
915 ) -> Result<serde_json::Value, reqwest::Error> {
916 use tokio_util::codec::{FramedRead, LinesCodec};
917
918 let mut data = HashMap::new();
919
920 if let Ok(params) = serde_json::to_value(params) {
921 if let Some(ref p) = params.as_object() {
922 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
923 }
924 }
925
926 data.insert("url".into(), serde_json::Value::String(url.to_string()));
927
928 let res = self.api_post("crawl", data, content_type).await?;
929
930 if stream {
931 if let Some(callback) = callback {
932 let stream = res.bytes_stream();
933
934 let stream_reader = tokio_util::io::StreamReader::new(
935 stream
936 .map(|r| r.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))),
937 );
938
939 let mut lines = FramedRead::new(stream_reader, LinesCodec::new());
940
941 while let Some(line_result) = lines.next().await {
942 match line_result {
943 Ok(line) => match serde_json::from_str::<serde_json::Value>(&line) {
944 Ok(value) => {
945 callback(value);
946 }
947 Err(_e) => {
948 continue;
949 }
950 },
951 Err(_e) => return Ok(serde_json::Value::Null),
952 }
953 }
954
955 Ok(serde_json::Value::Null)
956 } else {
957 Ok(serde_json::Value::Null)
958 }
959 } else {
960 parse_response(res).await
961 }
962 }
963
964 pub async fn links(
977 &self,
978 url: &str,
979 params: Option<RequestParams>,
980 _stream: bool,
981 content_type: &str,
982 ) -> Result<serde_json::Value, reqwest::Error> {
983 let mut data = HashMap::new();
984
985 if let Ok(params) = serde_json::to_value(params) {
986 if let Some(ref p) = params.as_object() {
987 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
988 }
989 }
990
991 data.insert("url".into(), serde_json::Value::String(url.to_string()));
992
993 let res = self.api_post("links", data, content_type).await?;
994 parse_response(res).await
995 }
996
997 pub async fn screenshot(
1010 &self,
1011 url: &str,
1012 params: Option<RequestParams>,
1013 _stream: bool,
1014 content_type: &str,
1015 ) -> Result<serde_json::Value, reqwest::Error> {
1016 let mut data = HashMap::new();
1017
1018 if let Ok(params) = serde_json::to_value(params) {
1019 if let Some(ref p) = params.as_object() {
1020 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
1021 }
1022 }
1023
1024 data.insert("url".into(), serde_json::Value::String(url.to_string()));
1025
1026 let res = self.api_post("screenshot", data, content_type).await?;
1027 parse_response(res).await
1028 }
1029
1030 pub async fn search(
1043 &self,
1044 q: &str,
1045 params: Option<SearchRequestParams>,
1046 _stream: bool,
1047 content_type: &str,
1048 ) -> Result<serde_json::Value, reqwest::Error> {
1049 let body = match params {
1050 Some(mut params) => {
1051 params.search = q.to_string();
1052 params
1053 }
1054 _ => {
1055 let mut params = SearchRequestParams::default();
1056 params.search = q.to_string();
1057 params
1058 }
1059 };
1060
1061 let res = self.api_post("search", body, content_type).await?;
1062
1063 parse_response(res).await
1064 }
1065
1066 pub async fn transform(
1079 &self,
1080 data: Vec<HashMap<&str, &str>>,
1081 params: Option<TransformParams>,
1082 _stream: bool,
1083 content_type: &str,
1084 ) -> Result<serde_json::Value, reqwest::Error> {
1085 let mut payload = HashMap::new();
1086
1087 if let Ok(params) = serde_json::to_value(params) {
1088 if let Some(ref p) = params.as_object() {
1089 payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
1090 }
1091 }
1092
1093 if let Ok(d) = serde_json::to_value(data) {
1094 payload.insert("data".into(), d);
1095 }
1096
1097 let res = self.api_post("transform", payload, content_type).await?;
1098
1099 parse_response(res).await
1100 }
1101
1102 pub async fn extract_contacts(
1115 &self,
1116 url: &str,
1117 params: Option<RequestParams>,
1118 _stream: bool,
1119 content_type: &str,
1120 ) -> Result<serde_json::Value, reqwest::Error> {
1121 let mut data = HashMap::new();
1122
1123 if let Ok(params) = serde_json::to_value(params) {
1124 if let Ok(params) = serde_json::to_value(params) {
1125 if let Some(ref p) = params.as_object() {
1126 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
1127 }
1128 }
1129 }
1130
1131 match serde_json::to_value(url) {
1132 Ok(u) => {
1133 data.insert("url".into(), u);
1134 }
1135 _ => (),
1136 }
1137
1138 let res = self
1139 .api_post("pipeline/extract-contacts", data, content_type)
1140 .await?;
1141
1142 parse_response(res).await
1143 }
1144
1145 pub async fn label(
1158 &self,
1159 url: &str,
1160 params: Option<RequestParams>,
1161 _stream: bool,
1162 content_type: &str,
1163 ) -> Result<serde_json::Value, reqwest::Error> {
1164 let mut data = HashMap::new();
1165
1166 if let Ok(params) = serde_json::to_value(params) {
1167 if let Ok(params) = serde_json::to_value(params) {
1168 if let Some(ref p) = params.as_object() {
1169 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
1170 }
1171 }
1172 }
1173
1174 data.insert("url".into(), serde_json::Value::String(url.to_string()));
1175
1176 let res = self.api_post("pipeline/label", data, content_type).await?;
1177 parse_response(res).await
1178 }
1179
1180 pub async fn download(
1192 &self,
1193 url: Option<&str>,
1194 options: Option<HashMap<&str, i32>>,
1195 ) -> Result<reqwest::Response, reqwest::Error> {
1196 let mut params = HashMap::new();
1197
1198 if let Some(url) = url {
1199 params.insert("url".to_string(), url.to_string());
1200 }
1201
1202 if let Some(options) = options {
1203 for (key, value) in options {
1204 params.insert(key.to_string(), value.to_string());
1205 }
1206 }
1207
1208 let url = format!("{API_URL}/v1/data/download");
1209 let request = self
1210 .client
1211 .get(&url)
1212 .header(
1213 "User-Agent",
1214 format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
1215 )
1216 .header("Content-Type", "application/octet-stream")
1217 .header("Authorization", format!("Bearer {}", self.api_key))
1218 .query(¶ms);
1219
1220 let res = request.send().await?;
1221
1222 Ok(res)
1223 }
1224
1225 pub async fn create_signed_url(
1237 &self,
1238 url: Option<&str>,
1239 options: Option<HashMap<&str, i32>>,
1240 ) -> Result<serde_json::Value, reqwest::Error> {
1241 let mut params = HashMap::new();
1242
1243 if let Some(options) = options {
1244 for (key, value) in options {
1245 params.insert(key.to_string(), value.to_string());
1246 }
1247 }
1248
1249 if let Some(url) = url {
1250 params.insert("url".to_string(), url.to_string());
1251 }
1252
1253 let url = format!("{API_URL}/v1/data/sign-url");
1254 let request = self
1255 .client
1256 .get(&url)
1257 .header(
1258 "User-Agent",
1259 format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
1260 )
1261 .header("Authorization", format!("Bearer {}", self.api_key))
1262 .query(¶ms);
1263
1264 let res = request.send().await?;
1265
1266 parse_response(res).await
1267 }
1268
1269 pub async fn get_crawl_state(
1281 &self,
1282 url: &str,
1283 params: Option<RequestParams>,
1284 content_type: &str,
1285 ) -> Result<serde_json::Value, reqwest::Error> {
1286 let mut payload = HashMap::new();
1287 payload.insert("url".into(), serde_json::Value::String(url.to_string()));
1288 payload.insert(
1289 "contentType".into(),
1290 serde_json::Value::String(content_type.to_string()),
1291 );
1292
1293 if let Ok(params) = serde_json::to_value(params) {
1294 if let Ok(params) = serde_json::to_value(params) {
1295 if let Some(ref p) = params.as_object() {
1296 payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
1297 }
1298 }
1299 }
1300
1301 let res = self
1302 .api_post("data/crawl_state", payload, content_type)
1303 .await?;
1304 parse_response(res).await
1305 }
1306
1307 pub async fn get_credits(&self) -> Result<serde_json::Value, reqwest::Error> {
1309 self.api_get::<serde_json::Value>("data/credits", None)
1310 .await
1311 }
1312
1313 pub async fn data_post(
1315 &self,
1316 table: &str,
1317 data: Option<RequestParams>,
1318 ) -> Result<serde_json::Value, reqwest::Error> {
1319 let res = self
1320 .api_post(&format!("data/{}", table), data, "application/json")
1321 .await?;
1322 parse_response(res).await
1323 }
1324
1325 pub async fn query(&self, params: &QueryRequest) -> Result<serde_json::Value, reqwest::Error> {
1327 let res = self
1328 .api_get::<QueryRequest>(&"data/query", Some(params))
1329 .await?;
1330
1331 Ok(res)
1332 }
1333
1334 pub async fn data_get(
1336 &self,
1337 table: &str,
1338 params: Option<RequestParams>,
1339 ) -> Result<serde_json::Value, reqwest::Error> {
1340 let mut payload = HashMap::new();
1341
1342 if let Some(params) = params {
1343 if let Ok(p) = serde_json::to_value(params) {
1344 if let Some(o) = p.as_object() {
1345 payload.extend(o.iter().map(|(k, v)| (k.as_str(), v.clone())));
1346 }
1347 }
1348 }
1349
1350 let res = self
1351 .api_get::<serde_json::Value>(&format!("data/{}", table), None)
1352 .await?;
1353 Ok(res)
1354 }
1355
1356 pub async fn data_delete(
1358 &self,
1359 table: &str,
1360 params: Option<RequestParams>,
1361 ) -> Result<serde_json::Value, reqwest::Error> {
1362 let mut payload = HashMap::new();
1363
1364 if let Ok(params) = serde_json::to_value(params) {
1365 if let Ok(params) = serde_json::to_value(params) {
1366 if let Some(ref p) = params.as_object() {
1367 payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
1368 }
1369 }
1370 }
1371
1372 let res = self
1373 .api_delete(&format!("data/{}", table), Some(payload))
1374 .await?;
1375 parse_response(res).await
1376 }
1377}
1378
1379#[cfg(test)]
1380mod tests {
1381 use super::*;
1382 use dotenv::dotenv;
1383 use lazy_static::lazy_static;
1384 use reqwest::ClientBuilder;
1385
1386 lazy_static! {
1387 static ref SPIDER_CLIENT: Spider = {
1388 dotenv().ok();
1389 let client = ClientBuilder::new();
1390 let client = client.user_agent("SpiderBot").build().unwrap();
1391
1392 Spider::new_with_client(None, client).expect("client to build")
1393 };
1394 }
1395
1396 #[tokio::test]
1397 #[ignore]
1398 async fn test_scrape_url() {
1399 let response = SPIDER_CLIENT
1400 .scrape_url("https://example.com", None, "application/json")
1401 .await;
1402 assert!(response.is_ok());
1403 }
1404
1405 #[tokio::test]
1406 async fn test_crawl_url() {
1407 let response = SPIDER_CLIENT
1408 .crawl_url(
1409 "https://example.com",
1410 None,
1411 false,
1412 "application/json",
1413 None::<fn(serde_json::Value)>,
1414 )
1415 .await;
1416 assert!(response.is_ok());
1417 }
1418
1419 #[tokio::test]
1420 #[ignore]
1421 async fn test_links() {
1422 let response: Result<serde_json::Value, Error> = SPIDER_CLIENT
1423 .links("https://example.com", None, false, "application/json")
1424 .await;
1425 assert!(response.is_ok());
1426 }
1427
1428 #[tokio::test]
1429 #[ignore]
1430 async fn test_screenshot() {
1431 let mut params = RequestParams::default();
1432 params.limit = Some(1);
1433
1434 let response = SPIDER_CLIENT
1435 .screenshot(
1436 "https://example.com",
1437 Some(params),
1438 false,
1439 "application/json",
1440 )
1441 .await;
1442 assert!(response.is_ok());
1443 }
1444
1445 #[tokio::test]
1461 #[ignore]
1462 async fn test_transform() {
1463 let data = vec![HashMap::from([(
1464 "<html><body><h1>Transformation</h1></body></html>".into(),
1465 "".into(),
1466 )])];
1467 let response = SPIDER_CLIENT
1468 .transform(data, None, false, "application/json")
1469 .await;
1470 assert!(response.is_ok());
1471 }
1472
1473 #[tokio::test]
1474 #[ignore]
1475 async fn test_extract_contacts() {
1476 let response = SPIDER_CLIENT
1477 .extract_contacts("https://example.com", None, false, "application/json")
1478 .await;
1479 assert!(response.is_ok());
1480 }
1481
1482 #[tokio::test]
1483 #[ignore]
1484 async fn test_label() {
1485 let response = SPIDER_CLIENT
1486 .label("https://example.com", None, false, "application/json")
1487 .await;
1488 assert!(response.is_ok());
1489 }
1490
1491 #[tokio::test]
1492 async fn test_create_signed_url() {
1493 let response = SPIDER_CLIENT
1494 .create_signed_url(Some("example.com"), None)
1495 .await;
1496 assert!(response.is_ok());
1497 }
1498
1499 #[tokio::test]
1500 async fn test_get_crawl_state() {
1501 let response = SPIDER_CLIENT
1502 .get_crawl_state("https://example.com", None, "application/json")
1503 .await;
1504 assert!(response.is_ok());
1505 }
1506
1507 #[tokio::test]
1508 async fn test_query() {
1509 let mut query = QueryRequest::default();
1510
1511 query.domain = Some("spider.cloud".into());
1512
1513 let response = SPIDER_CLIENT.query(&query).await;
1514 assert!(response.is_ok());
1515 }
1516
1517 #[tokio::test]
1518 async fn test_get_credits() {
1519 let response = SPIDER_CLIENT.get_credits().await;
1520 assert!(response.is_ok());
1521 }
1522}