1use backon::ExponentialBuilder;
65use backon::Retryable;
66use reqwest::Client;
67use reqwest::{Error, Response};
68use serde::{Deserialize, Serialize};
69use std::collections::HashMap;
70use tokio_stream::StreamExt;
71
72#[derive(Debug, Deserialize, Serialize, Clone)]
74pub struct ChunkingAlgDict {
75 r#type: ChunkingType,
77 value: i32,
79}
80
81#[derive(Serialize, Deserialize, Debug, Clone)]
83pub struct Timeout {
84 pub secs: u64,
86 pub nanos: u32,
88}
89
90#[derive(Serialize, Deserialize, Debug, Clone)]
91pub struct IdleNetwork {
92 pub timeout: Timeout,
94}
95
96#[derive(Serialize, Deserialize, Debug, Clone)]
97#[serde(tag = "type", rename_all = "PascalCase")]
98pub enum WebAutomation {
99 Evaluate { code: String },
100 Click { selector: String },
101 Wait { duration: u64 },
102 WaitForNavigation,
103 WaitFor { selector: String },
104 WaitForAndClick { selector: String },
105 ScrollX { pixels: i32 },
106 ScrollY { pixels: i32 },
107 Fill { selector: String, value: String },
108 InfiniteScroll { times: u32 },
109}
110
111#[derive(Default, Serialize, Deserialize, Debug, Clone)]
112#[serde(tag = "type", rename_all = "PascalCase")]
113pub enum RedirectPolicy {
114 Loose,
115 #[default]
116 Strict,
117}
118
119pub type WebAutomationMap = std::collections::HashMap<String, Vec<WebAutomation>>;
120pub type ExecutionScriptsMap = std::collections::HashMap<String, String>;
121
122#[derive(Serialize, Deserialize, Debug, Clone)]
123pub struct Selector {
124 pub timeout: Timeout,
126 pub selector: String,
128}
129
130#[derive(Serialize, Deserialize, Debug, Clone)]
131pub struct Delay {
132 pub timeout: Timeout,
134}
135
136#[derive(Serialize, Deserialize, Debug, Clone)]
137pub struct WaitFor {
138 pub idle_network: Option<IdleNetwork>,
140 pub selector: Option<Selector>,
142 pub delay: Option<Delay>,
144 pub page_navigations: Option<bool>,
146}
147
148#[derive(Serialize, Deserialize, Debug, Clone, Default)]
150pub struct QueryRequest {
151 pub url: Option<String>,
153 pub domain: Option<String>,
155 pub pathname: Option<String>,
157}
158
159#[derive(Default, Debug, Deserialize, Serialize, Clone)]
161#[serde(rename_all = "lowercase")]
162pub enum ChunkingType {
163 #[default]
164 ByWords,
166 ByLines,
168 ByCharacterLength,
170 BySentence,
172}
173
174#[derive(Default, Debug, Deserialize, Serialize, Clone)]
175pub struct Viewport {
177 pub width: u32,
179 pub height: u32,
181 pub device_scale_factor: Option<f64>,
183 pub emulating_mobile: bool,
185 pub is_landscape: bool,
187 pub has_touch: bool,
189}
190
191const API_URL: &'static str = "https://api.spider.cloud";
193
194#[derive(Debug, Clone, Default, Deserialize, Serialize)]
196pub struct CSSSelector {
197 pub name: String,
199 pub selectors: Vec<String>,
201}
202
203pub type CSSExtractionMap = HashMap<String, Vec<CSSSelector>>;
205
206#[derive(Debug, Default, Deserialize, Serialize, Clone)]
208pub struct WebhookSettings {
209 destination: String,
211 on_credits_depleted: bool,
213 on_credits_half_depleted: bool,
215 on_website_status: bool,
217 on_find: bool,
219 on_find_metadata: bool,
221}
222
223#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
235pub enum ProxyType {
236 #[serde(rename = "residential")]
238 Residential,
239 #[serde(rename = "residential_fast")]
241 ResidentialFast,
242 #[serde(rename = "residential_static")]
244 ResidentialStatic,
245 #[serde(rename = "mobile")]
247 Mobile,
248 #[serde(rename = "isp", alias = "datacenter")]
250 #[default]
251 Isp,
252 #[serde(rename = "residential_premium")]
254 ResidentialPremium,
255 #[serde(rename = "residential_core")]
257 ResidentialCore,
258 #[serde(rename = "residential_plus")]
260 ResidentialPlus,
261}
262
263#[derive(Debug, Deserialize, Serialize, Clone)]
265#[serde(untagged)]
266pub enum ReturnFormatHandling {
267 Single(ReturnFormat),
269 Multi(std::collections::HashSet<ReturnFormat>),
271}
272
273impl Default for ReturnFormatHandling {
274 fn default() -> ReturnFormatHandling {
275 ReturnFormatHandling::Single(ReturnFormat::Raw)
276 }
277}
278
279#[derive(Debug, Default, Deserialize, Serialize, Clone)]
280pub struct EventTracker {
281 responses: Option<bool>,
283 requests: Option<bool>,
285}
286
287#[derive(Debug, Default, Deserialize, Serialize, Clone)]
289pub struct RequestParams {
290 #[serde(default)]
291 pub url: Option<String>,
293 #[serde(default)]
294 pub request: Option<RequestType>,
296 #[serde(default)]
297 pub limit: Option<u32>,
299 #[serde(default)]
300 pub return_format: Option<ReturnFormatHandling>,
302 #[serde(default)]
303 pub tld: Option<bool>,
305 #[serde(default)]
306 pub depth: Option<u32>,
308 #[serde(default)]
309 pub cache: Option<bool>,
311 #[serde(default)]
312 pub scroll: Option<u32>,
314 #[serde(default)]
315 pub budget: Option<HashMap<String, u32>>,
317 #[serde(default)]
318 pub blacklist: Option<Vec<String>>,
320 #[serde(default)]
321 pub whitelist: Option<Vec<String>>,
323 #[serde(default)]
324 pub locale: Option<String>,
326 #[serde(default)]
327 pub cookies: Option<String>,
329 #[serde(default)]
330 pub stealth: Option<bool>,
332 #[serde(default)]
333 pub headers: Option<HashMap<String, String>>,
335 #[serde(default)]
336 pub anti_bot: Option<bool>,
338 #[serde(default)]
339 pub webhooks: Option<WebhookSettings>,
341 #[serde(default)]
342 pub metadata: Option<bool>,
344 #[serde(default)]
345 pub viewport: Option<Viewport>,
347 #[serde(default)]
348 pub encoding: Option<String>,
350 #[serde(default)]
351 pub subdomains: Option<bool>,
353 #[serde(default)]
354 pub user_agent: Option<String>,
356 #[serde(default)]
357 pub store_data: Option<bool>,
359 #[serde(default)]
360 pub gpt_config: Option<HashMap<String, String>>,
362 #[serde(default)]
363 pub fingerprint: Option<bool>,
365 #[serde(default)]
366 pub storageless: Option<bool>,
368 #[serde(default)]
369 pub readability: Option<bool>,
371 #[serde(default)]
372 pub proxy_enabled: Option<bool>,
374 #[serde(default)]
375 pub respect_robots: Option<bool>,
377 #[serde(default)]
378 pub root_selector: Option<String>,
380 #[serde(default)]
381 pub full_resources: Option<bool>,
383 #[serde(default)]
384 pub text: Option<String>,
386 #[serde(default)]
387 pub sitemap: Option<bool>,
389 #[serde(default)]
390 pub external_domains: Option<Vec<String>>,
392 #[serde(default)]
393 pub return_embeddings: Option<bool>,
395 #[serde(default)]
396 pub return_headers: Option<bool>,
398 #[serde(default)]
399 pub return_page_links: Option<bool>,
401 #[serde(default)]
402 pub return_cookies: Option<bool>,
404 #[serde(default)]
405 pub request_timeout: Option<u8>,
407 #[serde(default)]
408 pub run_in_background: Option<bool>,
410 #[serde(default)]
411 pub skip_config_checks: Option<bool>,
413 #[serde(default)]
414 pub css_extraction_map: Option<CSSExtractionMap>,
416 #[serde(default)]
417 pub chunking_alg: Option<ChunkingAlgDict>,
419 #[serde(default)]
420 pub disable_intercept: Option<bool>,
422 #[serde(default)]
423 pub wait_for: Option<WaitFor>,
425 #[serde(default)]
426 pub execution_scripts: Option<ExecutionScriptsMap>,
428 #[serde(default)]
429 pub automation_scripts: Option<WebAutomationMap>,
431 #[serde(default)]
432 pub redirect_policy: Option<RedirectPolicy>,
434 #[serde(default)]
435 pub event_tracker: Option<EventTracker>,
437 #[serde(default)]
438 pub crawl_timeout: Option<Timeout>,
440 #[serde(default)]
441 pub evaluate_on_new_document: Option<Box<String>>,
443 #[serde(default)]
444 pub lite_mode: Option<bool>,
448 #[serde(default)]
449 pub proxy: Option<ProxyType>,
451 #[serde(default)]
452 pub remote_proxy: Option<String>,
455 #[serde(default)]
456 pub max_credits_per_page: Option<f64>,
460}
461
462#[derive(Debug, Default, Deserialize, Serialize, Clone)]
464pub struct SearchRequestParams {
465 #[serde(default, flatten)]
467 pub base: RequestParams,
468 pub search: String,
470 pub search_limit: Option<u32>,
472 pub fetch_page_content: Option<bool>,
474 pub location: Option<String>,
476 pub country: Option<String>,
478 pub language: Option<String>,
480 pub num: Option<u32>,
482 pub page: Option<u32>,
484 #[serde(default)]
485 pub website_limit: Option<u32>,
487}
488
489#[derive(Debug, Default, Deserialize, Serialize, Clone)]
491pub struct TransformParams {
492 #[serde(default)]
493 pub return_format: Option<ReturnFormat>,
495 #[serde(default)]
496 pub readability: Option<bool>,
498 #[serde(default)]
499 pub clean: Option<bool>,
501 #[serde(default)]
502 pub clean_full: Option<bool>,
504 pub data: Vec<DataParam>,
506}
507
508#[derive(Serialize, Deserialize, Debug, Clone)]
509pub struct DataParam {
510 pub html: String,
512 pub url: Option<String>,
514}
515
516#[derive(Debug, Default, Clone, Serialize, Deserialize, PartialEq, Eq)]
518#[serde(rename_all = "lowercase")]
519pub enum RequestType {
520 Http,
522 Chrome,
524 #[default]
525 SmartMode,
527}
528
529#[derive(Default, Debug, Deserialize, Serialize, Clone, PartialEq, Eq, Hash)]
531#[serde(rename_all = "lowercase")]
532pub enum ReturnFormat {
533 #[default]
534 Raw,
536 Markdown,
538 Commonmark,
540 Html2text,
542 Text,
544 Xml,
546 Bytes,
548}
549
550#[derive(Debug, Default)]
552pub struct Spider {
553 pub api_key: String,
555 pub client: Client,
557}
558
559pub async fn handle_json(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
561 res.json().await
562}
563
564pub async fn handle_jsonl(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
566 let text = res.text().await?;
567 let lines = text
568 .lines()
569 .filter_map(|line| serde_json::from_str::<serde_json::Value>(line).ok())
570 .collect::<Vec<_>>();
571 Ok(serde_json::Value::Array(lines))
572}
573
574#[cfg(feature = "csv")]
576pub async fn handle_csv(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
577 use std::collections::HashMap;
578 let text = res.text().await?;
579 let mut rdr = csv::Reader::from_reader(text.as_bytes());
580 let records: Vec<HashMap<String, String>> = rdr.deserialize().filter_map(Result::ok).collect();
581
582 if let Ok(record) = serde_json::to_value(records) {
583 Ok(record)
584 } else {
585 Ok(serde_json::Value::String(text))
586 }
587}
588
589#[cfg(not(feature = "csv"))]
590pub async fn handle_csv(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
591 handle_text(res).await
592}
593
594pub async fn handle_text(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
596 Ok(serde_json::Value::String(
597 res.text().await.unwrap_or_default(),
598 ))
599}
600
601#[cfg(feature = "csv")]
603pub async fn handle_xml(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
604 let text = res.text().await?;
605 match quick_xml::de::from_str::<serde_json::Value>(&text) {
606 Ok(val) => Ok(val),
607 Err(_) => Ok(serde_json::Value::String(text)),
608 }
609}
610
611#[cfg(not(feature = "csv"))]
612pub async fn handle_xml(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
614 handle_text(res).await
615}
616
617pub async fn parse_response(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
618 let content_type = res
619 .headers()
620 .get(reqwest::header::CONTENT_TYPE)
621 .and_then(|v| v.to_str().ok())
622 .unwrap_or_default()
623 .to_ascii_lowercase();
624
625 if content_type.contains("json") && !content_type.contains("jsonl") {
626 handle_json(res).await
627 } else if content_type.contains("jsonl") || content_type.contains("ndjson") {
628 handle_jsonl(res).await
629 } else if content_type.contains("csv") {
630 handle_csv(res).await
631 } else if content_type.contains("xml") {
632 handle_xml(res).await
633 } else {
634 handle_text(res).await
635 }
636}
637
638impl Spider {
639 pub fn new(api_key: Option<String>) -> Result<Self, &'static str> {
649 let api_key = api_key.or_else(|| std::env::var("SPIDER_API_KEY").ok());
650
651 match api_key {
652 Some(key) => Ok(Self {
653 api_key: key,
654 client: Client::new(),
655 }),
656 None => Err("No API key provided"),
657 }
658 }
659
660 pub fn new_with_client(api_key: Option<String>, client: Client) -> Result<Self, &'static str> {
671 let api_key = api_key.or_else(|| std::env::var("SPIDER_API_KEY").ok());
672
673 match api_key {
674 Some(key) => Ok(Self {
675 api_key: key,
676 client,
677 }),
678 None => Err("No API key provided"),
679 }
680 }
681
682 async fn api_post_base(
695 &self,
696 endpoint: &str,
697 data: impl Serialize + Sized + std::fmt::Debug,
698 content_type: &str,
699 ) -> Result<Response, Error> {
700 let url: String = format!("{API_URL}/{}", endpoint);
701
702 self.client
703 .post(&url)
704 .header(
705 "User-Agent",
706 format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
707 )
708 .header("Content-Type", content_type)
709 .header("Authorization", format!("Bearer {}", self.api_key))
710 .json(&data)
711 .send()
712 .await
713 }
714
715 async fn api_post(
728 &self,
729 endpoint: &str,
730 data: impl Serialize + std::fmt::Debug + Clone + Send + Sync,
731 content_type: &str,
732 ) -> Result<Response, Error> {
733 let fetch = || async {
734 self.api_post_base(endpoint, data.to_owned(), content_type)
735 .await
736 };
737
738 fetch
739 .retry(ExponentialBuilder::default().with_max_times(5))
740 .when(|err: &reqwest::Error| {
741 if let Some(status) = err.status() {
742 status.is_server_error()
743 } else {
744 err.is_timeout()
745 }
746 })
747 .await
748 }
749
750 async fn api_get_base<T: Serialize>(
760 &self,
761 endpoint: &str,
762 query_params: Option<&T>,
763 ) -> Result<serde_json::Value, reqwest::Error> {
764 let url = format!("{API_URL}/{}", endpoint);
765 let res = self
766 .client
767 .get(&url)
768 .query(&query_params)
769 .header(
770 "User-Agent",
771 format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
772 )
773 .header("Content-Type", "application/json")
774 .header("Authorization", format!("Bearer {}", self.api_key))
775 .send()
776 .await?;
777 parse_response(res).await
778 }
779
780 async fn api_get<T: Serialize>(
790 &self,
791 endpoint: &str,
792 query_params: Option<&T>,
793 ) -> Result<serde_json::Value, reqwest::Error> {
794 let fetch = || async { self.api_get_base(endpoint, query_params.to_owned()).await };
795
796 fetch
797 .retry(ExponentialBuilder::default().with_max_times(5))
798 .when(|err: &reqwest::Error| {
799 if let Some(status) = err.status() {
800 status.is_server_error()
801 } else {
802 err.is_timeout()
803 }
804 })
805 .await
806 }
807
808 async fn api_delete_base(
821 &self,
822 endpoint: &str,
823 params: Option<HashMap<String, serde_json::Value>>,
824 ) -> Result<Response, Error> {
825 let url = format!("{API_URL}/v1/{}", endpoint);
826 let request_builder = self
827 .client
828 .delete(&url)
829 .header(
830 "User-Agent",
831 format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
832 )
833 .header("Content-Type", "application/json")
834 .header("Authorization", format!("Bearer {}", self.api_key));
835
836 let request_builder = if let Some(params) = params {
837 request_builder.json(¶ms)
838 } else {
839 request_builder
840 };
841
842 request_builder.send().await
843 }
844
845 async fn api_delete(
858 &self,
859 endpoint: &str,
860 params: Option<HashMap<String, serde_json::Value>>,
861 ) -> Result<Response, Error> {
862 let fetch = || async { self.api_delete_base(endpoint, params.to_owned()).await };
863
864 fetch
865 .retry(ExponentialBuilder::default().with_max_times(5))
866 .when(|err: &reqwest::Error| {
867 if let Some(status) = err.status() {
868 status.is_server_error()
869 } else {
870 err.is_timeout()
871 }
872 })
873 .await
874 }
875
876 pub async fn scrape_url(
889 &self,
890 url: &str,
891 params: Option<RequestParams>,
892 content_type: &str,
893 ) -> Result<serde_json::Value, reqwest::Error> {
894 let mut data = HashMap::new();
895
896 data.insert(
897 "url".to_string(),
898 serde_json::Value::String(url.to_string()),
899 );
900 data.insert("limit".to_string(), serde_json::Value::Number(1.into()));
901
902 if let Ok(params) = serde_json::to_value(params) {
903 if let Some(ref p) = params.as_object() {
904 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
905 }
906 }
907
908 let res = self.api_post("crawl", data, content_type).await?;
909 parse_response(res).await
910 }
911
912 pub async fn crawl_url(
926 &self,
927 url: &str,
928 params: Option<RequestParams>,
929 stream: bool,
930 content_type: &str,
931 callback: Option<impl Fn(serde_json::Value) + Send>,
932 ) -> Result<serde_json::Value, reqwest::Error> {
933 use tokio_util::codec::{FramedRead, LinesCodec};
934
935 let mut data = HashMap::new();
936
937 if let Ok(params) = serde_json::to_value(params) {
938 if let Some(ref p) = params.as_object() {
939 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
940 }
941 }
942
943 data.insert("url".into(), serde_json::Value::String(url.to_string()));
944
945 let res = self.api_post("crawl", data, content_type).await?;
946
947 if stream {
948 if let Some(callback) = callback {
949 let stream = res.bytes_stream();
950
951 let stream_reader = tokio_util::io::StreamReader::new(
952 stream
953 .map(|r| r.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))),
954 );
955
956 let mut lines = FramedRead::new(stream_reader, LinesCodec::new());
957
958 while let Some(line_result) = lines.next().await {
959 match line_result {
960 Ok(line) => match serde_json::from_str::<serde_json::Value>(&line) {
961 Ok(value) => {
962 callback(value);
963 }
964 Err(_e) => {
965 continue;
966 }
967 },
968 Err(_e) => return Ok(serde_json::Value::Null),
969 }
970 }
971
972 Ok(serde_json::Value::Null)
973 } else {
974 Ok(serde_json::Value::Null)
975 }
976 } else {
977 parse_response(res).await
978 }
979 }
980
981 pub async fn links(
994 &self,
995 url: &str,
996 params: Option<RequestParams>,
997 _stream: bool,
998 content_type: &str,
999 ) -> Result<serde_json::Value, reqwest::Error> {
1000 let mut data = HashMap::new();
1001
1002 if let Ok(params) = serde_json::to_value(params) {
1003 if let Some(ref p) = params.as_object() {
1004 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
1005 }
1006 }
1007
1008 data.insert("url".into(), serde_json::Value::String(url.to_string()));
1009
1010 let res = self.api_post("links", data, content_type).await?;
1011 parse_response(res).await
1012 }
1013
1014 pub async fn screenshot(
1027 &self,
1028 url: &str,
1029 params: Option<RequestParams>,
1030 _stream: bool,
1031 content_type: &str,
1032 ) -> Result<serde_json::Value, reqwest::Error> {
1033 let mut data = HashMap::new();
1034
1035 if let Ok(params) = serde_json::to_value(params) {
1036 if let Some(ref p) = params.as_object() {
1037 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
1038 }
1039 }
1040
1041 data.insert("url".into(), serde_json::Value::String(url.to_string()));
1042
1043 let res = self.api_post("screenshot", data, content_type).await?;
1044 parse_response(res).await
1045 }
1046
1047 pub async fn search(
1060 &self,
1061 q: &str,
1062 params: Option<SearchRequestParams>,
1063 _stream: bool,
1064 content_type: &str,
1065 ) -> Result<serde_json::Value, reqwest::Error> {
1066 let body = match params {
1067 Some(mut params) => {
1068 params.search = q.to_string();
1069 params
1070 }
1071 _ => {
1072 let mut params = SearchRequestParams::default();
1073 params.search = q.to_string();
1074 params
1075 }
1076 };
1077
1078 let res = self.api_post("search", body, content_type).await?;
1079
1080 parse_response(res).await
1081 }
1082
1083 pub async fn transform(
1096 &self,
1097 data: Vec<HashMap<&str, &str>>,
1098 params: Option<TransformParams>,
1099 _stream: bool,
1100 content_type: &str,
1101 ) -> Result<serde_json::Value, reqwest::Error> {
1102 let mut payload = HashMap::new();
1103
1104 if let Ok(params) = serde_json::to_value(params) {
1105 if let Some(ref p) = params.as_object() {
1106 payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
1107 }
1108 }
1109
1110 if let Ok(d) = serde_json::to_value(data) {
1111 payload.insert("data".into(), d);
1112 }
1113
1114 let res = self.api_post("transform", payload, content_type).await?;
1115
1116 parse_response(res).await
1117 }
1118
1119 pub async fn extract_contacts(
1132 &self,
1133 url: &str,
1134 params: Option<RequestParams>,
1135 _stream: bool,
1136 content_type: &str,
1137 ) -> Result<serde_json::Value, reqwest::Error> {
1138 let mut data = HashMap::new();
1139
1140 if let Ok(params) = serde_json::to_value(params) {
1141 if let Ok(params) = serde_json::to_value(params) {
1142 if let Some(ref p) = params.as_object() {
1143 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
1144 }
1145 }
1146 }
1147
1148 match serde_json::to_value(url) {
1149 Ok(u) => {
1150 data.insert("url".into(), u);
1151 }
1152 _ => (),
1153 }
1154
1155 let res = self
1156 .api_post("pipeline/extract-contacts", data, content_type)
1157 .await?;
1158
1159 parse_response(res).await
1160 }
1161
1162 pub async fn label(
1175 &self,
1176 url: &str,
1177 params: Option<RequestParams>,
1178 _stream: bool,
1179 content_type: &str,
1180 ) -> Result<serde_json::Value, reqwest::Error> {
1181 let mut data = HashMap::new();
1182
1183 if let Ok(params) = serde_json::to_value(params) {
1184 if let Ok(params) = serde_json::to_value(params) {
1185 if let Some(ref p) = params.as_object() {
1186 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
1187 }
1188 }
1189 }
1190
1191 data.insert("url".into(), serde_json::Value::String(url.to_string()));
1192
1193 let res = self.api_post("pipeline/label", data, content_type).await?;
1194 parse_response(res).await
1195 }
1196
1197 pub async fn download(
1209 &self,
1210 url: Option<&str>,
1211 options: Option<HashMap<&str, i32>>,
1212 ) -> Result<reqwest::Response, reqwest::Error> {
1213 let mut params = HashMap::new();
1214
1215 if let Some(url) = url {
1216 params.insert("url".to_string(), url.to_string());
1217 }
1218
1219 if let Some(options) = options {
1220 for (key, value) in options {
1221 params.insert(key.to_string(), value.to_string());
1222 }
1223 }
1224
1225 let url = format!("{API_URL}/v1/data/download");
1226 let request = self
1227 .client
1228 .get(&url)
1229 .header(
1230 "User-Agent",
1231 format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
1232 )
1233 .header("Content-Type", "application/octet-stream")
1234 .header("Authorization", format!("Bearer {}", self.api_key))
1235 .query(¶ms);
1236
1237 let res = request.send().await?;
1238
1239 Ok(res)
1240 }
1241
1242 pub async fn create_signed_url(
1254 &self,
1255 url: Option<&str>,
1256 options: Option<HashMap<&str, i32>>,
1257 ) -> Result<serde_json::Value, reqwest::Error> {
1258 let mut params = HashMap::new();
1259
1260 if let Some(options) = options {
1261 for (key, value) in options {
1262 params.insert(key.to_string(), value.to_string());
1263 }
1264 }
1265
1266 if let Some(url) = url {
1267 params.insert("url".to_string(), url.to_string());
1268 }
1269
1270 let url = format!("{API_URL}/v1/data/sign-url");
1271 let request = self
1272 .client
1273 .get(&url)
1274 .header(
1275 "User-Agent",
1276 format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
1277 )
1278 .header("Authorization", format!("Bearer {}", self.api_key))
1279 .query(¶ms);
1280
1281 let res = request.send().await?;
1282
1283 parse_response(res).await
1284 }
1285
1286 pub async fn get_crawl_state(
1298 &self,
1299 url: &str,
1300 params: Option<RequestParams>,
1301 content_type: &str,
1302 ) -> Result<serde_json::Value, reqwest::Error> {
1303 let mut payload = HashMap::new();
1304 payload.insert("url".into(), serde_json::Value::String(url.to_string()));
1305 payload.insert(
1306 "contentType".into(),
1307 serde_json::Value::String(content_type.to_string()),
1308 );
1309
1310 if let Ok(params) = serde_json::to_value(params) {
1311 if let Ok(params) = serde_json::to_value(params) {
1312 if let Some(ref p) = params.as_object() {
1313 payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
1314 }
1315 }
1316 }
1317
1318 let res = self
1319 .api_post("data/crawl_state", payload, content_type)
1320 .await?;
1321 parse_response(res).await
1322 }
1323
1324 pub async fn get_credits(&self) -> Result<serde_json::Value, reqwest::Error> {
1326 self.api_get::<serde_json::Value>("data/credits", None)
1327 .await
1328 }
1329
1330 pub async fn data_post(
1332 &self,
1333 table: &str,
1334 data: Option<RequestParams>,
1335 ) -> Result<serde_json::Value, reqwest::Error> {
1336 let res = self
1337 .api_post(&format!("data/{}", table), data, "application/json")
1338 .await?;
1339 parse_response(res).await
1340 }
1341
1342 pub async fn query(&self, params: &QueryRequest) -> Result<serde_json::Value, reqwest::Error> {
1344 let res = self
1345 .api_get::<QueryRequest>(&"data/query", Some(params))
1346 .await?;
1347
1348 Ok(res)
1349 }
1350
1351 pub async fn data_get(
1353 &self,
1354 table: &str,
1355 params: Option<RequestParams>,
1356 ) -> Result<serde_json::Value, reqwest::Error> {
1357 let mut payload = HashMap::new();
1358
1359 if let Some(params) = params {
1360 if let Ok(p) = serde_json::to_value(params) {
1361 if let Some(o) = p.as_object() {
1362 payload.extend(o.iter().map(|(k, v)| (k.as_str(), v.clone())));
1363 }
1364 }
1365 }
1366
1367 let res = self
1368 .api_get::<serde_json::Value>(&format!("data/{}", table), None)
1369 .await?;
1370 Ok(res)
1371 }
1372
1373 pub async fn data_delete(
1375 &self,
1376 table: &str,
1377 params: Option<RequestParams>,
1378 ) -> Result<serde_json::Value, reqwest::Error> {
1379 let mut payload = HashMap::new();
1380
1381 if let Ok(params) = serde_json::to_value(params) {
1382 if let Ok(params) = serde_json::to_value(params) {
1383 if let Some(ref p) = params.as_object() {
1384 payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
1385 }
1386 }
1387 }
1388
1389 let res = self
1390 .api_delete(&format!("data/{}", table), Some(payload))
1391 .await?;
1392 parse_response(res).await
1393 }
1394}
1395
1396#[cfg(test)]
1397mod tests {
1398 use super::*;
1399 use dotenv::dotenv;
1400 use lazy_static::lazy_static;
1401 use reqwest::ClientBuilder;
1402
1403 lazy_static! {
1404 static ref SPIDER_CLIENT: Spider = {
1405 dotenv().ok();
1406 let client = ClientBuilder::new();
1407 let client = client.user_agent("SpiderBot").build().unwrap();
1408
1409 Spider::new_with_client(None, client).expect("client to build")
1410 };
1411 }
1412
1413 #[tokio::test]
1414 #[ignore]
1415 async fn test_scrape_url() {
1416 let response = SPIDER_CLIENT
1417 .scrape_url("https://example.com", None, "application/json")
1418 .await;
1419 assert!(response.is_ok());
1420 }
1421
1422 #[tokio::test]
1423 async fn test_crawl_url() {
1424 let response = SPIDER_CLIENT
1425 .crawl_url(
1426 "https://example.com",
1427 None,
1428 false,
1429 "application/json",
1430 None::<fn(serde_json::Value)>,
1431 )
1432 .await;
1433 assert!(response.is_ok());
1434 }
1435
1436 #[tokio::test]
1437 #[ignore]
1438 async fn test_links() {
1439 let response: Result<serde_json::Value, Error> = SPIDER_CLIENT
1440 .links("https://example.com", None, false, "application/json")
1441 .await;
1442 assert!(response.is_ok());
1443 }
1444
1445 #[tokio::test]
1446 #[ignore]
1447 async fn test_screenshot() {
1448 let mut params = RequestParams::default();
1449 params.limit = Some(1);
1450
1451 let response = SPIDER_CLIENT
1452 .screenshot(
1453 "https://example.com",
1454 Some(params),
1455 false,
1456 "application/json",
1457 )
1458 .await;
1459 assert!(response.is_ok());
1460 }
1461
1462 #[tokio::test]
1478 #[ignore]
1479 async fn test_transform() {
1480 let data = vec![HashMap::from([(
1481 "<html><body><h1>Transformation</h1></body></html>".into(),
1482 "".into(),
1483 )])];
1484 let response = SPIDER_CLIENT
1485 .transform(data, None, false, "application/json")
1486 .await;
1487 assert!(response.is_ok());
1488 }
1489
1490 #[tokio::test]
1491 #[ignore]
1492 async fn test_extract_contacts() {
1493 let response = SPIDER_CLIENT
1494 .extract_contacts("https://example.com", None, false, "application/json")
1495 .await;
1496 assert!(response.is_ok());
1497 }
1498
1499 #[tokio::test]
1500 #[ignore]
1501 async fn test_label() {
1502 let response = SPIDER_CLIENT
1503 .label("https://example.com", None, false, "application/json")
1504 .await;
1505 assert!(response.is_ok());
1506 }
1507
1508 #[tokio::test]
1509 async fn test_create_signed_url() {
1510 let response = SPIDER_CLIENT
1511 .create_signed_url(Some("example.com"), None)
1512 .await;
1513 assert!(response.is_ok());
1514 }
1515
1516 #[tokio::test]
1517 async fn test_get_crawl_state() {
1518 let response = SPIDER_CLIENT
1519 .get_crawl_state("https://example.com", None, "application/json")
1520 .await;
1521 assert!(response.is_ok());
1522 }
1523
1524 #[tokio::test]
1525 async fn test_query() {
1526 let mut query = QueryRequest::default();
1527
1528 query.domain = Some("spider.cloud".into());
1529
1530 let response = SPIDER_CLIENT.query(&query).await;
1531 assert!(response.is_ok());
1532 }
1533
1534 #[tokio::test]
1535 async fn test_get_credits() {
1536 let response = SPIDER_CLIENT.get_credits().await;
1537 assert!(response.is_ok());
1538 }
1539}