1use backon::ExponentialBuilder;
65use backon::Retryable;
66use reqwest::Client;
67use reqwest::{Error, Response};
68use serde::{Deserialize, Serialize};
69use std::collections::HashMap;
70use tokio_stream::StreamExt;
71
72#[derive(Debug, Deserialize, Serialize, Clone)]
74pub struct ChunkingAlgDict {
75 r#type: ChunkingType,
77 value: i32,
79}
80
81#[derive(Serialize, Deserialize, Debug, Clone)]
83pub struct Timeout {
84 pub secs: u64,
86 pub nanos: u32,
88}
89
90#[derive(Serialize, Deserialize, Debug, Clone)]
91pub struct IdleNetwork {
92 pub timeout: Timeout,
94}
95
96#[derive(Serialize, Deserialize, Debug, Clone)]
97#[serde(tag = "type", rename_all = "PascalCase")]
98pub enum WebAutomation {
99 Evaluate { code: String },
100 Click { selector: String },
101 Wait { duration: u64 },
102 WaitForNavigation,
103 WaitFor { selector: String },
104 WaitForAndClick { selector: String },
105 ScrollX { pixels: i32 },
106 ScrollY { pixels: i32 },
107 Fill { selector: String, value: String },
108 InfiniteScroll { times: u32 },
109}
110
111#[derive(Default, Serialize, Deserialize, Debug, Clone)]
112#[serde(tag = "type", rename_all = "PascalCase")]
113pub enum RedirectPolicy {
114 Loose,
115 #[default]
116 Strict,
117}
118
119pub type WebAutomationMap = std::collections::HashMap<String, Vec<WebAutomation>>;
120pub type ExecutionScriptsMap = std::collections::HashMap<String, String>;
121
122#[derive(Serialize, Deserialize, Debug, Clone)]
123pub struct Selector {
124 pub timeout: Timeout,
126 pub selector: String,
128}
129
130#[derive(Serialize, Deserialize, Debug, Clone)]
131pub struct Delay {
132 pub timeout: Timeout,
134}
135
136#[derive(Serialize, Deserialize, Debug, Clone)]
137pub struct WaitFor {
138 pub idle_network: Option<IdleNetwork>,
140 pub selector: Option<Selector>,
142 pub delay: Option<Delay>,
144 pub page_navigations: Option<bool>,
146}
147
148#[derive(Serialize, Deserialize, Debug, Clone, Default)]
150pub struct QueryRequest {
151 pub url: Option<String>,
153 pub domain: Option<String>,
155 pub pathname: Option<String>,
157}
158
159#[derive(Default, Debug, Deserialize, Serialize, Clone)]
161#[serde(rename_all = "lowercase")]
162pub enum ChunkingType {
163 #[default]
164 ByWords,
166 ByLines,
168 ByCharacterLength,
170 BySentence,
172}
173
174#[derive(Default, Debug, Deserialize, Serialize, Clone)]
175pub struct Viewport {
177 pub width: u32,
179 pub height: u32,
181 pub device_scale_factor: Option<f64>,
183 pub emulating_mobile: bool,
185 pub is_landscape: bool,
187 pub has_touch: bool,
189}
190
191const API_URL: &'static str = "https://api.spider.cloud";
193
194#[derive(Debug, Clone, Default, Deserialize, Serialize)]
196pub struct CSSSelector {
197 pub name: String,
199 pub selectors: Vec<String>,
201}
202
203pub type CSSExtractionMap = HashMap<String, Vec<CSSSelector>>;
205
206#[derive(Debug, Default, Deserialize, Serialize, Clone)]
208pub struct WebhookSettings {
209 destination: String,
211 on_credits_depleted: bool,
213 on_credits_half_depleted: bool,
215 on_website_status: bool,
217 on_find: bool,
219 on_find_metadata: bool,
221}
222
223#[derive(Debug, Deserialize, Serialize, Clone)]
225#[serde(untagged)]
226pub enum ReturnFormatHandling {
227 Single(ReturnFormat),
229 Multi(std::collections::HashSet<ReturnFormat>),
231}
232
233impl Default for ReturnFormatHandling {
234 fn default() -> ReturnFormatHandling {
235 ReturnFormatHandling::Single(ReturnFormat::Raw)
236 }
237}
238
239#[derive(Debug, Default, Deserialize, Serialize, Clone)]
240pub struct EventTracker {
241 responses: Option<bool>,
243 requests: Option<bool>
245}
246
247#[derive(Debug, Default, Deserialize, Serialize, Clone)]
249pub struct RequestParams {
250 #[serde(default)]
251 pub url: Option<String>,
253 #[serde(default)]
254 pub request: Option<RequestType>,
256 #[serde(default)]
257 pub limit: Option<u32>,
259 #[serde(default)]
260 pub return_format: Option<ReturnFormatHandling>,
262 #[serde(default)]
263 pub tld: Option<bool>,
265 #[serde(default)]
266 pub depth: Option<u32>,
268 #[serde(default)]
269 pub cache: Option<bool>,
271 #[serde(default)]
272 pub scroll: Option<u32>,
274 #[serde(default)]
275 pub budget: Option<HashMap<String, u32>>,
277 #[serde(default)]
278 pub blacklist: Option<Vec<String>>,
280 #[serde(default)]
281 pub whitelist: Option<Vec<String>>,
283 #[serde(default)]
284 pub locale: Option<String>,
286 #[serde(default)]
287 pub cookies: Option<String>,
289 #[serde(default)]
290 pub stealth: Option<bool>,
292 #[serde(default)]
293 pub headers: Option<HashMap<String, String>>,
295 #[serde(default)]
296 pub anti_bot: Option<bool>,
298 #[serde(default)]
299 pub webhooks: Option<WebhookSettings>,
301 #[serde(default)]
302 pub metadata: Option<bool>,
304 #[serde(default)]
305 pub viewport: Option<Viewport>,
307 #[serde(default)]
308 pub encoding: Option<String>,
310 #[serde(default)]
311 pub subdomains: Option<bool>,
313 #[serde(default)]
314 pub user_agent: Option<String>,
316 #[serde(default)]
317 pub store_data: Option<bool>,
319 #[serde(default)]
320 pub gpt_config: Option<HashMap<String, String>>,
322 #[serde(default)]
323 pub fingerprint: Option<bool>,
325 #[serde(default)]
326 pub storageless: Option<bool>,
328 #[serde(default)]
329 pub readability: Option<bool>,
331 #[serde(default)]
332 pub proxy_enabled: Option<bool>,
334 #[serde(default)]
335 pub respect_robots: Option<bool>,
337 #[serde(default)]
338 pub root_selector: Option<String>,
340 #[serde(default)]
341 pub full_resources: Option<bool>,
343 #[serde(default)]
344 pub text: Option<String>,
346 #[serde(default)]
347 pub sitemap: Option<bool>,
349 #[serde(default)]
350 pub external_domains: Option<Vec<String>>,
352 #[serde(default)]
353 pub return_embeddings: Option<bool>,
355 #[serde(default)]
356 pub return_headers: Option<bool>,
358 #[serde(default)]
359 pub return_page_links: Option<bool>,
361 #[serde(default)]
362 pub return_cookies: Option<bool>,
364 #[serde(default)]
365 pub request_timeout: Option<u8>,
367 #[serde(default)]
368 pub run_in_background: Option<bool>,
370 #[serde(default)]
371 pub skip_config_checks: Option<bool>,
373 #[serde(default)]
374 pub css_extraction_map: Option<CSSExtractionMap>,
376 #[serde(default)]
377 pub chunking_alg: Option<ChunkingAlgDict>,
379 #[serde(default)]
380 pub disable_intercept: Option<bool>,
382 #[serde(default)]
383 pub wait_for: Option<WaitFor>,
385 #[serde(default)]
386 pub execution_scripts: Option<ExecutionScriptsMap>,
388 #[serde(default)]
389 pub automation_scripts: Option<WebAutomationMap>,
391 #[serde(default)]
392 pub redirect_policy: Option<RedirectPolicy>,
394 #[serde(default)]
395 pub event_tracker: Option<EventTracker>,
397 #[serde(default)]
398 pub crawl_timeout: Option<Timeout>,
400 #[serde(default)]
401 pub evaluate_on_new_document: Option<Box<String>>
403}
404
405#[derive(Debug, Default, Deserialize, Serialize, Clone)]
407pub struct SearchRequestParams {
408 #[serde(default, flatten)]
410 pub base: RequestParams,
411 pub search: String,
413 pub search_limit: Option<u32>,
415 pub fetch_page_content: Option<bool>,
417 pub location: Option<String>,
419 pub country: Option<String>,
421 pub language: Option<String>,
423 pub num: Option<u32>,
425 pub page: Option<u32>,
427 #[serde(default)]
428 pub website_limit: Option<u32>,
430}
431
432#[derive(Debug, Default, Deserialize, Serialize, Clone)]
434pub struct TransformParams {
435 #[serde(default)]
436 pub return_format: Option<ReturnFormat>,
438 #[serde(default)]
439 pub readability: Option<bool>,
441 #[serde(default)]
442 pub clean: Option<bool>,
444 #[serde(default)]
445 pub clean_full: Option<bool>,
447 pub data: Vec<DataParam>,
449}
450
451#[derive(Serialize, Deserialize, Debug, Clone)]
452pub struct DataParam {
453 pub html: String,
455 pub url: Option<String>,
457}
458
459#[derive(Debug, Default, Clone, Serialize, Deserialize, PartialEq, Eq)]
461#[serde(rename_all = "lowercase")]
462pub enum RequestType {
463 Http,
465 Chrome,
467 #[default]
468 SmartMode,
470}
471
472#[derive(Default, Debug, Deserialize, Serialize, Clone, PartialEq, Eq, Hash)]
474#[serde(rename_all = "lowercase")]
475pub enum ReturnFormat {
476 #[default]
477 Raw,
479 Markdown,
481 Commonmark,
483 Html2text,
485 Text,
487 Xml,
489 Bytes,
491}
492
493#[derive(Debug, Default)]
495pub struct Spider {
496 pub api_key: String,
498 pub client: Client,
500}
501
502impl Spider {
503 pub fn new(api_key: Option<String>) -> Result<Self, &'static str> {
513 let api_key = api_key.or_else(|| std::env::var("SPIDER_API_KEY").ok());
514
515 match api_key {
516 Some(key) => Ok(Self {
517 api_key: key,
518 client: Client::new(),
519 }),
520 None => Err("No API key provided"),
521 }
522 }
523
524 pub fn new_with_client(api_key: Option<String>, client: Client) -> Result<Self, &'static str> {
535 let api_key = api_key.or_else(|| std::env::var("SPIDER_API_KEY").ok());
536
537 match api_key {
538 Some(key) => Ok(Self {
539 api_key: key,
540 client,
541 }),
542 None => Err("No API key provided"),
543 }
544 }
545
546 async fn api_post_base(
559 &self,
560 endpoint: &str,
561 data: impl Serialize + Sized + std::fmt::Debug,
562 content_type: &str,
563 ) -> Result<Response, Error> {
564 let url: String = format!("{API_URL}/{}", endpoint);
565
566 self.client
567 .post(&url)
568 .header(
569 "User-Agent",
570 format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
571 )
572 .header("Content-Type", content_type)
573 .header("Authorization", format!("Bearer {}", self.api_key))
574 .json(&data)
575 .send()
576 .await
577 }
578
579 async fn api_post(
592 &self,
593 endpoint: &str,
594 data: impl Serialize + std::fmt::Debug + Clone + Send + Sync,
595 content_type: &str,
596 ) -> Result<Response, Error> {
597 let fetch = || async {
598 self.api_post_base(endpoint, data.to_owned(), content_type)
599 .await
600 };
601
602 fetch
603 .retry(ExponentialBuilder::default().with_max_times(5))
604 .when(|err: &reqwest::Error| {
605 if let Some(status) = err.status() {
606 status.is_server_error()
607 } else {
608 err.is_timeout()
609 }
610 })
611 .await
612 }
613
614 async fn api_get_base<T: Serialize>(
624 &self,
625 endpoint: &str,
626 query_params: Option<&T>,
627 ) -> Result<serde_json::Value, reqwest::Error> {
628 let url = format!("{API_URL}/{}", endpoint);
629 let res = self
630 .client
631 .get(&url)
632 .query(&query_params)
633 .header(
634 "User-Agent",
635 format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
636 )
637 .header("Content-Type", "application/json")
638 .header("Authorization", format!("Bearer {}", self.api_key))
639 .send()
640 .await?;
641 res.json().await
642 }
643
644 async fn api_get<T: Serialize>(
654 &self,
655 endpoint: &str,
656 query_params: Option<&T>,
657 ) -> Result<serde_json::Value, reqwest::Error> {
658 let fetch = || async { self.api_get_base(endpoint, query_params.to_owned()).await };
659
660 fetch
661 .retry(ExponentialBuilder::default().with_max_times(5))
662 .when(|err: &reqwest::Error| {
663 if let Some(status) = err.status() {
664 status.is_server_error()
665 } else {
666 err.is_timeout()
667 }
668 })
669 .await
670 }
671
672 async fn api_delete_base(
685 &self,
686 endpoint: &str,
687 params: Option<HashMap<String, serde_json::Value>>,
688 ) -> Result<Response, Error> {
689 let url = format!("{API_URL}/v1/{}", endpoint);
690 let request_builder = self
691 .client
692 .delete(&url)
693 .header(
694 "User-Agent",
695 format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
696 )
697 .header("Content-Type", "application/json")
698 .header("Authorization", format!("Bearer {}", self.api_key));
699
700 let request_builder = if let Some(params) = params {
701 request_builder.json(¶ms)
702 } else {
703 request_builder
704 };
705
706 request_builder.send().await
707 }
708
709 async fn api_delete(
722 &self,
723 endpoint: &str,
724 params: Option<HashMap<String, serde_json::Value>>,
725 ) -> Result<Response, Error> {
726 let fetch = || async { self.api_delete_base(endpoint, params.to_owned()).await };
727
728 fetch
729 .retry(ExponentialBuilder::default().with_max_times(5))
730 .when(|err: &reqwest::Error| {
731 if let Some(status) = err.status() {
732 status.is_server_error()
733 } else {
734 err.is_timeout()
735 }
736 })
737 .await
738 }
739
740 pub async fn scrape_url(
753 &self,
754 url: &str,
755 params: Option<RequestParams>,
756 content_type: &str,
757 ) -> Result<serde_json::Value, reqwest::Error> {
758 let mut data = HashMap::new();
759
760 data.insert(
761 "url".to_string(),
762 serde_json::Value::String(url.to_string()),
763 );
764 data.insert("limit".to_string(), serde_json::Value::Number(1.into()));
765
766 if let Ok(params) = serde_json::to_value(params) {
767 if let Some(ref p) = params.as_object() {
768 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
769 }
770 }
771
772 let res = self.api_post("crawl", data, content_type).await?;
773 res.json().await
774 }
775
776 pub async fn crawl_url(
790 &self,
791 url: &str,
792 params: Option<RequestParams>,
793 stream: bool,
794 content_type: &str,
795 callback: Option<impl Fn(serde_json::Value) + Send>,
796 ) -> Result<serde_json::Value, reqwest::Error> {
797 use tokio_util::codec::{FramedRead, LinesCodec};
798
799 let mut data = HashMap::new();
800
801 if let Ok(params) = serde_json::to_value(params) {
802 if let Some(ref p) = params.as_object() {
803 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
804 }
805 }
806
807 data.insert("url".into(), serde_json::Value::String(url.to_string()));
808
809 let res = self.api_post("crawl", data, content_type).await?;
810
811 if stream {
812 if let Some(callback) = callback {
813 let stream = res.bytes_stream();
814
815 let stream_reader = tokio_util::io::StreamReader::new(
816 stream.map(|r| r.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))),
817 );
818
819 let mut lines = FramedRead::new(stream_reader, LinesCodec::new());
820
821 while let Some(line_result) = lines.next().await {
822 match line_result {
823 Ok(line) => {
824 match serde_json::from_str::<serde_json::Value>(&line) {
825 Ok(value) => {
826 callback(value);
827 }
828 Err(_e) => {
829 continue;
830 }
831 }
832 }
833 Err(_e) => {
834 return Ok(serde_json::Value::Null)
835 }
836 }
837 }
838
839 Ok(serde_json::Value::Null)
840 } else {
841 Ok(serde_json::Value::Null)
842 }
843 } else {
844 res.json().await
845 }
846 }
847
848 pub async fn links(
861 &self,
862 url: &str,
863 params: Option<RequestParams>,
864 _stream: bool,
865 content_type: &str,
866 ) -> Result<serde_json::Value, reqwest::Error> {
867 let mut data = HashMap::new();
868
869 if let Ok(params) = serde_json::to_value(params) {
870 if let Some(ref p) = params.as_object() {
871 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
872 }
873 }
874
875 data.insert("url".into(), serde_json::Value::String(url.to_string()));
876
877 let res = self.api_post("links", data, content_type).await?;
878 res.json().await
879 }
880
881 pub async fn screenshot(
894 &self,
895 url: &str,
896 params: Option<RequestParams>,
897 _stream: bool,
898 content_type: &str,
899 ) -> Result<serde_json::Value, reqwest::Error> {
900 let mut data = HashMap::new();
901
902 if let Ok(params) = serde_json::to_value(params) {
903 if let Some(ref p) = params.as_object() {
904 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
905 }
906 }
907
908 data.insert("url".into(), serde_json::Value::String(url.to_string()));
909
910 let res = self.api_post("screenshot", data, content_type).await?;
911 res.json().await
912 }
913
914 pub async fn search(
927 &self,
928 q: &str,
929 params: Option<SearchRequestParams>,
930 _stream: bool,
931 content_type: &str,
932 ) -> Result<serde_json::Value, reqwest::Error> {
933 let body = match params {
934 Some(mut params) => {
935 params.search = q.to_string();
936 params
937 }
938 _ => {
939 let mut params = SearchRequestParams::default();
940 params.search = q.to_string();
941 params
942 }
943 };
944
945 let res = self.api_post("search", body, content_type).await?;
946
947 res.json().await
948 }
949
950 pub async fn transform(
963 &self,
964 data: Vec<HashMap<&str, &str>>,
965 params: Option<TransformParams>,
966 _stream: bool,
967 content_type: &str,
968 ) -> Result<serde_json::Value, reqwest::Error> {
969 let mut payload = HashMap::new();
970
971 if let Ok(params) = serde_json::to_value(params) {
972 if let Some(ref p) = params.as_object() {
973 payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
974 }
975 }
976
977 if let Ok(d) = serde_json::to_value(data) {
978 payload.insert("data".into(), d);
979 }
980
981 let res = self.api_post("transform", payload, content_type).await?;
982
983 res.json().await
984 }
985
986 pub async fn extract_contacts(
999 &self,
1000 url: &str,
1001 params: Option<RequestParams>,
1002 _stream: bool,
1003 content_type: &str,
1004 ) -> Result<serde_json::Value, reqwest::Error> {
1005 let mut data = HashMap::new();
1006
1007 if let Ok(params) = serde_json::to_value(params) {
1008 if let Ok(params) = serde_json::to_value(params) {
1009 if let Some(ref p) = params.as_object() {
1010 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
1011 }
1012 }
1013 }
1014
1015 match serde_json::to_value(url) {
1016 Ok(u) => {
1017 data.insert("url".into(), u);
1018 }
1019 _ => (),
1020 }
1021
1022 let res = self
1023 .api_post("pipeline/extract-contacts", data, content_type)
1024 .await?;
1025 res.json().await
1026 }
1027
1028 pub async fn label(
1041 &self,
1042 url: &str,
1043 params: Option<RequestParams>,
1044 _stream: bool,
1045 content_type: &str,
1046 ) -> Result<serde_json::Value, reqwest::Error> {
1047 let mut data = HashMap::new();
1048
1049 if let Ok(params) = serde_json::to_value(params) {
1050 if let Ok(params) = serde_json::to_value(params) {
1051 if let Some(ref p) = params.as_object() {
1052 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
1053 }
1054 }
1055 }
1056
1057 data.insert("url".into(), serde_json::Value::String(url.to_string()));
1058
1059 let res = self.api_post("pipeline/label", data, content_type).await?;
1060 res.json().await
1061 }
1062
1063 pub async fn download(
1075 &self,
1076 url: Option<&str>,
1077 options: Option<HashMap<&str, i32>>,
1078 ) -> Result<reqwest::Response, reqwest::Error> {
1079 let mut params = HashMap::new();
1080
1081 if let Some(url) = url {
1082 params.insert("url".to_string(), url.to_string());
1083 }
1084
1085 if let Some(options) = options {
1086 for (key, value) in options {
1087 params.insert(key.to_string(), value.to_string());
1088 }
1089 }
1090
1091 let url = format!("{API_URL}/v1/data/download");
1092 let request = self
1093 .client
1094 .get(&url)
1095 .header(
1096 "User-Agent",
1097 format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
1098 )
1099 .header("Content-Type", "application/octet-stream")
1100 .header("Authorization", format!("Bearer {}", self.api_key))
1101 .query(¶ms);
1102
1103 let res = request.send().await?;
1104
1105 Ok(res)
1106 }
1107
1108 pub async fn create_signed_url(
1120 &self,
1121 url: Option<&str>,
1122 options: Option<HashMap<&str, i32>>,
1123 ) -> Result<serde_json::Value, reqwest::Error> {
1124 let mut params = HashMap::new();
1125
1126 if let Some(options) = options {
1127 for (key, value) in options {
1128 params.insert(key.to_string(), value.to_string());
1129 }
1130 }
1131
1132 if let Some(url) = url {
1133 params.insert("url".to_string(), url.to_string());
1134 }
1135
1136 let url = format!("{API_URL}/v1/data/sign-url");
1137 let request = self
1138 .client
1139 .get(&url)
1140 .header(
1141 "User-Agent",
1142 format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
1143 )
1144 .header("Authorization", format!("Bearer {}", self.api_key))
1145 .query(¶ms);
1146
1147 let res = request.send().await?;
1148
1149 res.json().await
1150 }
1151
1152 pub async fn get_crawl_state(
1164 &self,
1165 url: &str,
1166 params: Option<RequestParams>,
1167 content_type: &str,
1168 ) -> Result<serde_json::Value, reqwest::Error> {
1169 let mut payload = HashMap::new();
1170 payload.insert("url".into(), serde_json::Value::String(url.to_string()));
1171 payload.insert(
1172 "contentType".into(),
1173 serde_json::Value::String(content_type.to_string()),
1174 );
1175
1176 if let Ok(params) = serde_json::to_value(params) {
1177 if let Ok(params) = serde_json::to_value(params) {
1178 if let Some(ref p) = params.as_object() {
1179 payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
1180 }
1181 }
1182 }
1183
1184 let res = self
1185 .api_post("data/crawl_state", payload, content_type)
1186 .await?;
1187 res.json().await
1188 }
1189
1190 pub async fn get_credits(&self) -> Result<serde_json::Value, reqwest::Error> {
1192 self.api_get::<serde_json::Value>("data/credits", None)
1193 .await
1194 }
1195
1196 pub async fn data_post(
1198 &self,
1199 table: &str,
1200 data: Option<RequestParams>,
1201 ) -> Result<serde_json::Value, reqwest::Error> {
1202 let res = self
1203 .api_post(&format!("data/{}", table), data, "application/json")
1204 .await?;
1205 res.json().await
1206 }
1207
1208 pub async fn query(&self, params: &QueryRequest) -> Result<serde_json::Value, reqwest::Error> {
1210 let res = self
1211 .api_get::<QueryRequest>(&"data/query", Some(params))
1212 .await?;
1213
1214 Ok(res)
1215 }
1216
1217 pub async fn data_get(
1219 &self,
1220 table: &str,
1221 params: Option<RequestParams>,
1222 ) -> Result<serde_json::Value, reqwest::Error> {
1223 let mut payload = HashMap::new();
1224
1225 if let Some(params) = params {
1226 if let Ok(p) = serde_json::to_value(params) {
1227 if let Some(o) = p.as_object() {
1228 payload.extend(o.iter().map(|(k, v)| (k.as_str(), v.clone())));
1229 }
1230 }
1231 }
1232
1233 let res = self
1234 .api_get::<serde_json::Value>(&format!("data/{}", table), None)
1235 .await?;
1236 Ok(res)
1237 }
1238
1239 pub async fn data_delete(
1241 &self,
1242 table: &str,
1243 params: Option<RequestParams>,
1244 ) -> Result<serde_json::Value, reqwest::Error> {
1245 let mut payload = HashMap::new();
1246
1247 if let Ok(params) = serde_json::to_value(params) {
1248 if let Ok(params) = serde_json::to_value(params) {
1249 if let Some(ref p) = params.as_object() {
1250 payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
1251 }
1252 }
1253 }
1254
1255 let res = self
1256 .api_delete(&format!("data/{}", table), Some(payload))
1257 .await?;
1258 res.json().await
1259 }
1260}
1261
1262#[cfg(test)]
1263mod tests {
1264 use super::*;
1265 use dotenv::dotenv;
1266 use lazy_static::lazy_static;
1267 use reqwest::ClientBuilder;
1268
1269 lazy_static! {
1270 static ref SPIDER_CLIENT: Spider = {
1271 dotenv().ok();
1272 let client = ClientBuilder::new();
1273 let client = client.user_agent("SpiderBot").build().unwrap();
1274
1275 Spider::new_with_client(None, client).expect("client to build")
1276 };
1277 }
1278
1279 #[tokio::test]
1280 #[ignore]
1281 async fn test_scrape_url() {
1282 let response = SPIDER_CLIENT
1283 .scrape_url("https://example.com", None, "application/json")
1284 .await;
1285 assert!(response.is_ok());
1286 }
1287
1288 #[tokio::test]
1289 async fn test_crawl_url() {
1290 let response = SPIDER_CLIENT
1291 .crawl_url(
1292 "https://example.com",
1293 None,
1294 false,
1295 "application/json",
1296 None::<fn(serde_json::Value)>,
1297 )
1298 .await;
1299 assert!(response.is_ok());
1300 }
1301
1302 #[tokio::test]
1303 #[ignore]
1304 async fn test_links() {
1305 let response: Result<serde_json::Value, Error> = SPIDER_CLIENT
1306 .links("https://example.com", None, false, "application/json")
1307 .await;
1308 assert!(response.is_ok());
1309 }
1310
1311 #[tokio::test]
1312 #[ignore]
1313 async fn test_screenshot() {
1314 let mut params = RequestParams::default();
1315 params.limit = Some(1);
1316
1317 let response = SPIDER_CLIENT
1318 .screenshot(
1319 "https://example.com",
1320 Some(params),
1321 false,
1322 "application/json",
1323 )
1324 .await;
1325 assert!(response.is_ok());
1326 }
1327
1328 #[tokio::test]
1344 #[ignore]
1345 async fn test_transform() {
1346 let data = vec![HashMap::from([(
1347 "<html><body><h1>Transformation</h1></body></html>".into(),
1348 "".into(),
1349 )])];
1350 let response = SPIDER_CLIENT
1351 .transform(data, None, false, "application/json")
1352 .await;
1353 assert!(response.is_ok());
1354 }
1355
1356 #[tokio::test]
1357 #[ignore]
1358 async fn test_extract_contacts() {
1359 let response = SPIDER_CLIENT
1360 .extract_contacts("https://example.com", None, false, "application/json")
1361 .await;
1362 assert!(response.is_ok());
1363 }
1364
1365 #[tokio::test]
1366 #[ignore]
1367 async fn test_label() {
1368 let response = SPIDER_CLIENT
1369 .label("https://example.com", None, false, "application/json")
1370 .await;
1371 assert!(response.is_ok());
1372 }
1373
1374 #[tokio::test]
1375 async fn test_create_signed_url() {
1376 let response = SPIDER_CLIENT
1377 .create_signed_url(Some("example.com"), None)
1378 .await;
1379 assert!(response.is_ok());
1380 }
1381
1382 #[tokio::test]
1383 async fn test_get_crawl_state() {
1384 let response = SPIDER_CLIENT
1385 .get_crawl_state("https://example.com", None, "application/json")
1386 .await;
1387 assert!(response.is_ok());
1388 }
1389
1390 #[tokio::test]
1391 async fn test_query() {
1392 let mut query = QueryRequest::default();
1393
1394 query.domain = Some("spider.cloud".into());
1395
1396 let response = SPIDER_CLIENT.query(&query).await;
1397 assert!(response.is_ok());
1398 }
1399
1400 #[tokio::test]
1401 async fn test_get_credits() {
1402 let response = SPIDER_CLIENT.get_credits().await;
1403 assert!(response.is_ok());
1404 }
1405}