1pub mod shapes;
65
66use backon::ExponentialBuilder;
67use backon::Retryable;
68use reqwest::Client;
69use reqwest::{Error, Response};
70use serde::Serialize;
71use std::collections::HashMap;
72use tokio_stream::StreamExt;
73
74pub use shapes::{request::*, response::*};
75
76const API_URL: &'static str = "https://api.spider.cloud";
78
79#[derive(Debug, Default)]
81pub struct Spider {
82 pub api_key: String,
84 pub client: Client,
86}
87
88pub async fn handle_json(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
90 res.json().await
91}
92
93pub async fn handle_jsonl(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
95 let text = res.text().await?;
96 let lines = text
97 .lines()
98 .filter_map(|line| serde_json::from_str::<serde_json::Value>(line).ok())
99 .collect::<Vec<_>>();
100 Ok(serde_json::Value::Array(lines))
101}
102
103#[cfg(feature = "csv")]
105pub async fn handle_csv(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
106 use std::collections::HashMap;
107 let text = res.text().await?;
108 let mut rdr = csv::Reader::from_reader(text.as_bytes());
109 let records: Vec<HashMap<String, String>> = rdr.deserialize().filter_map(Result::ok).collect();
110
111 if let Ok(record) = serde_json::to_value(records) {
112 Ok(record)
113 } else {
114 Ok(serde_json::Value::String(text))
115 }
116}
117
118#[cfg(not(feature = "csv"))]
119pub async fn handle_csv(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
120 handle_text(res).await
121}
122
123pub async fn handle_text(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
125 Ok(serde_json::Value::String(
126 res.text().await.unwrap_or_default(),
127 ))
128}
129
130#[cfg(feature = "csv")]
132pub async fn handle_xml(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
133 let text = res.text().await?;
134 match quick_xml::de::from_str::<serde_json::Value>(&text) {
135 Ok(val) => Ok(val),
136 Err(_) => Ok(serde_json::Value::String(text)),
137 }
138}
139
140#[cfg(not(feature = "csv"))]
141pub async fn handle_xml(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
143 handle_text(res).await
144}
145
146pub async fn parse_response(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
147 let content_type = res
148 .headers()
149 .get(reqwest::header::CONTENT_TYPE)
150 .and_then(|v| v.to_str().ok())
151 .unwrap_or_default()
152 .to_ascii_lowercase();
153
154 if content_type.contains("json") && !content_type.contains("jsonl") {
155 handle_json(res).await
156 } else if content_type.contains("jsonl") || content_type.contains("ndjson") {
157 handle_jsonl(res).await
158 } else if content_type.contains("csv") {
159 handle_csv(res).await
160 } else if content_type.contains("xml") {
161 handle_xml(res).await
162 } else {
163 handle_text(res).await
164 }
165}
166
167impl Spider {
168 pub fn new(api_key: Option<String>) -> Result<Self, &'static str> {
178 let api_key = api_key.or_else(|| std::env::var("SPIDER_API_KEY").ok());
179
180 match api_key {
181 Some(key) => Ok(Self {
182 api_key: key,
183 client: Client::new(),
184 }),
185 None => Err("No API key provided"),
186 }
187 }
188
189 pub fn new_with_client(api_key: Option<String>, client: Client) -> Result<Self, &'static str> {
200 let api_key = api_key.or_else(|| std::env::var("SPIDER_API_KEY").ok());
201
202 match api_key {
203 Some(key) => Ok(Self {
204 api_key: key,
205 client,
206 }),
207 None => Err("No API key provided"),
208 }
209 }
210
211 async fn api_post_base(
224 &self,
225 endpoint: &str,
226 data: impl Serialize + Sized + std::fmt::Debug,
227 content_type: &str,
228 ) -> Result<Response, Error> {
229 let url: String = format!("{API_URL}/{}", endpoint);
230
231 self.client
232 .post(&url)
233 .header(
234 "User-Agent",
235 format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
236 )
237 .header("Content-Type", content_type)
238 .header("Authorization", format!("Bearer {}", self.api_key))
239 .json(&data)
240 .send()
241 .await
242 }
243
244 async fn api_post(
257 &self,
258 endpoint: &str,
259 data: impl Serialize + std::fmt::Debug + Clone + Send + Sync,
260 content_type: &str,
261 ) -> Result<Response, Error> {
262 let fetch = || async {
263 self.api_post_base(endpoint, data.to_owned(), content_type)
264 .await
265 };
266
267 fetch
268 .retry(ExponentialBuilder::default().with_max_times(5))
269 .when(|err: &reqwest::Error| {
270 if let Some(status) = err.status() {
271 status.is_server_error()
272 } else {
273 err.is_timeout()
274 }
275 })
276 .await
277 }
278
279 async fn api_get_base<T: Serialize>(
289 &self,
290 endpoint: &str,
291 query_params: Option<&T>,
292 ) -> Result<serde_json::Value, reqwest::Error> {
293 let url = format!("{API_URL}/{}", endpoint);
294 let res = self
295 .client
296 .get(&url)
297 .query(&query_params)
298 .header(
299 "User-Agent",
300 format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
301 )
302 .header("Content-Type", "application/json")
303 .header("Authorization", format!("Bearer {}", self.api_key))
304 .send()
305 .await?;
306 parse_response(res).await
307 }
308
309 async fn api_get<T: Serialize>(
319 &self,
320 endpoint: &str,
321 query_params: Option<&T>,
322 ) -> Result<serde_json::Value, reqwest::Error> {
323 let fetch = || async { self.api_get_base(endpoint, query_params.to_owned()).await };
324
325 fetch
326 .retry(ExponentialBuilder::default().with_max_times(5))
327 .when(|err: &reqwest::Error| {
328 if let Some(status) = err.status() {
329 status.is_server_error()
330 } else {
331 err.is_timeout()
332 }
333 })
334 .await
335 }
336
337 async fn api_delete_base(
350 &self,
351 endpoint: &str,
352 params: Option<HashMap<String, serde_json::Value>>,
353 ) -> Result<Response, Error> {
354 let url = format!("{API_URL}/v1/{}", endpoint);
355 let request_builder = self
356 .client
357 .delete(&url)
358 .header(
359 "User-Agent",
360 format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
361 )
362 .header("Content-Type", "application/json")
363 .header("Authorization", format!("Bearer {}", self.api_key));
364
365 let request_builder = if let Some(params) = params {
366 request_builder.json(¶ms)
367 } else {
368 request_builder
369 };
370
371 request_builder.send().await
372 }
373
374 async fn api_delete(
387 &self,
388 endpoint: &str,
389 params: Option<HashMap<String, serde_json::Value>>,
390 ) -> Result<Response, Error> {
391 let fetch = || async { self.api_delete_base(endpoint, params.to_owned()).await };
392
393 fetch
394 .retry(ExponentialBuilder::default().with_max_times(5))
395 .when(|err: &reqwest::Error| {
396 if let Some(status) = err.status() {
397 status.is_server_error()
398 } else {
399 err.is_timeout()
400 }
401 })
402 .await
403 }
404
405 pub async fn scrape_url(
418 &self,
419 url: &str,
420 params: Option<RequestParams>,
421 content_type: &str,
422 ) -> Result<serde_json::Value, reqwest::Error> {
423 let mut data = HashMap::new();
424
425 if let Ok(params) = serde_json::to_value(params) {
426 if let Some(ref p) = params.as_object() {
427 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
428 }
429 }
430
431 if !url.is_empty() {
432 data.insert(
433 "url".to_string(),
434 serde_json::Value::String(url.to_string()),
435 );
436 }
437
438 data.insert("limit".to_string(), serde_json::Value::Number(1.into()));
439
440 let res = self.api_post("crawl", data, content_type).await?;
441 parse_response(res).await
442 }
443
444 pub async fn crawl_url(
458 &self,
459 url: &str,
460 params: Option<RequestParams>,
461 stream: bool,
462 content_type: &str,
463 callback: Option<impl Fn(serde_json::Value) + Send>,
464 ) -> Result<serde_json::Value, reqwest::Error> {
465 use tokio_util::codec::{FramedRead, LinesCodec};
466
467 let mut data = HashMap::new();
468
469 if let Ok(params) = serde_json::to_value(params) {
470 if let Some(ref p) = params.as_object() {
471 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
472 }
473 }
474
475 data.insert("url".into(), serde_json::Value::String(url.to_string()));
476
477 let res = self.api_post("crawl", data, content_type).await?;
478
479 if stream {
480 if let Some(callback) = callback {
481 let stream = res.bytes_stream();
482
483 let stream_reader = tokio_util::io::StreamReader::new(
484 stream
485 .map(|r| r.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))),
486 );
487
488 let mut lines = FramedRead::new(stream_reader, LinesCodec::new());
489
490 while let Some(line_result) = lines.next().await {
491 match line_result {
492 Ok(line) => match serde_json::from_str::<serde_json::Value>(&line) {
493 Ok(value) => {
494 callback(value);
495 }
496 Err(_e) => {
497 continue;
498 }
499 },
500 Err(_e) => return Ok(serde_json::Value::Null),
501 }
502 }
503
504 Ok(serde_json::Value::Null)
505 } else {
506 Ok(serde_json::Value::Null)
507 }
508 } else {
509 parse_response(res).await
510 }
511 }
512
513 pub async fn links(
526 &self,
527 url: &str,
528 params: Option<RequestParams>,
529 _stream: bool,
530 content_type: &str,
531 ) -> Result<serde_json::Value, reqwest::Error> {
532 let mut data = HashMap::new();
533
534 if let Ok(params) = serde_json::to_value(params) {
535 if let Some(ref p) = params.as_object() {
536 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
537 }
538 }
539
540 data.insert("url".into(), serde_json::Value::String(url.to_string()));
541
542 let res = self.api_post("links", data, content_type).await?;
543 parse_response(res).await
544 }
545
546 pub async fn screenshot(
559 &self,
560 url: &str,
561 params: Option<RequestParams>,
562 _stream: bool,
563 content_type: &str,
564 ) -> Result<serde_json::Value, reqwest::Error> {
565 let mut data = HashMap::new();
566
567 if let Ok(params) = serde_json::to_value(params) {
568 if let Some(ref p) = params.as_object() {
569 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
570 }
571 }
572
573 data.insert("url".into(), serde_json::Value::String(url.to_string()));
574
575 let res = self.api_post("screenshot", data, content_type).await?;
576 parse_response(res).await
577 }
578
579 pub async fn search(
592 &self,
593 q: &str,
594 params: Option<SearchRequestParams>,
595 _stream: bool,
596 content_type: &str,
597 ) -> Result<serde_json::Value, reqwest::Error> {
598 let body = match params {
599 Some(mut params) => {
600 params.search = q.to_string();
601 params
602 }
603 _ => {
604 let mut params = SearchRequestParams::default();
605 params.search = q.to_string();
606 params
607 }
608 };
609
610 let res = self.api_post("search", body, content_type).await?;
611
612 parse_response(res).await
613 }
614
615 pub async fn transform(
628 &self,
629 data: Vec<HashMap<&str, &str>>,
630 params: Option<TransformParams>,
631 _stream: bool,
632 content_type: &str,
633 ) -> Result<serde_json::Value, reqwest::Error> {
634 let mut payload = HashMap::new();
635
636 if let Ok(params) = serde_json::to_value(params) {
637 if let Some(ref p) = params.as_object() {
638 payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
639 }
640 }
641
642 if let Ok(d) = serde_json::to_value(data) {
643 payload.insert("data".into(), d);
644 }
645
646 let res = self.api_post("transform", payload, content_type).await?;
647
648 parse_response(res).await
649 }
650
651 pub async fn extract_contacts(
664 &self,
665 url: &str,
666 params: Option<RequestParams>,
667 _stream: bool,
668 content_type: &str,
669 ) -> Result<serde_json::Value, reqwest::Error> {
670 let mut data = HashMap::new();
671
672 if let Ok(params) = serde_json::to_value(params) {
673 if let Ok(params) = serde_json::to_value(params) {
674 if let Some(ref p) = params.as_object() {
675 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
676 }
677 }
678 }
679
680 match serde_json::to_value(url) {
681 Ok(u) => {
682 data.insert("url".into(), u);
683 }
684 _ => (),
685 }
686
687 let res = self
688 .api_post("pipeline/extract-contacts", data, content_type)
689 .await?;
690
691 parse_response(res).await
692 }
693
694 pub async fn label(
707 &self,
708 url: &str,
709 params: Option<RequestParams>,
710 _stream: bool,
711 content_type: &str,
712 ) -> Result<serde_json::Value, reqwest::Error> {
713 let mut data = HashMap::new();
714
715 if let Ok(params) = serde_json::to_value(params) {
716 if let Ok(params) = serde_json::to_value(params) {
717 if let Some(ref p) = params.as_object() {
718 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
719 }
720 }
721 }
722
723 data.insert("url".into(), serde_json::Value::String(url.to_string()));
724
725 let res = self.api_post("pipeline/label", data, content_type).await?;
726 parse_response(res).await
727 }
728
729 pub async fn download(
741 &self,
742 url: Option<&str>,
743 options: Option<HashMap<&str, i32>>,
744 ) -> Result<reqwest::Response, reqwest::Error> {
745 let mut params = HashMap::new();
746
747 if let Some(url) = url {
748 params.insert("url".to_string(), url.to_string());
749 }
750
751 if let Some(options) = options {
752 for (key, value) in options {
753 params.insert(key.to_string(), value.to_string());
754 }
755 }
756
757 let url = format!("{API_URL}/v1/data/download");
758 let request = self
759 .client
760 .get(&url)
761 .header(
762 "User-Agent",
763 format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
764 )
765 .header("Content-Type", "application/octet-stream")
766 .header("Authorization", format!("Bearer {}", self.api_key))
767 .query(¶ms);
768
769 let res = request.send().await?;
770
771 Ok(res)
772 }
773
774 pub async fn create_signed_url(
786 &self,
787 url: Option<&str>,
788 options: Option<HashMap<&str, i32>>,
789 ) -> Result<serde_json::Value, reqwest::Error> {
790 let mut params = HashMap::new();
791
792 if let Some(options) = options {
793 for (key, value) in options {
794 params.insert(key.to_string(), value.to_string());
795 }
796 }
797
798 if let Some(url) = url {
799 params.insert("url".to_string(), url.to_string());
800 }
801
802 let url = format!("{API_URL}/v1/data/sign-url");
803 let request = self
804 .client
805 .get(&url)
806 .header(
807 "User-Agent",
808 format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
809 )
810 .header("Authorization", format!("Bearer {}", self.api_key))
811 .query(¶ms);
812
813 let res = request.send().await?;
814
815 parse_response(res).await
816 }
817
818 pub async fn get_crawl_state(
830 &self,
831 url: &str,
832 params: Option<RequestParams>,
833 content_type: &str,
834 ) -> Result<serde_json::Value, reqwest::Error> {
835 let mut payload = HashMap::new();
836 payload.insert("url".into(), serde_json::Value::String(url.to_string()));
837 payload.insert(
838 "contentType".into(),
839 serde_json::Value::String(content_type.to_string()),
840 );
841
842 if let Ok(params) = serde_json::to_value(params) {
843 if let Ok(params) = serde_json::to_value(params) {
844 if let Some(ref p) = params.as_object() {
845 payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
846 }
847 }
848 }
849
850 let res = self
851 .api_post("data/crawl_state", payload, content_type)
852 .await?;
853 parse_response(res).await
854 }
855
856 pub async fn get_credits(&self) -> Result<serde_json::Value, reqwest::Error> {
858 self.api_get::<serde_json::Value>("data/credits", None)
859 .await
860 }
861
862 pub async fn data_post(
864 &self,
865 table: &str,
866 data: Option<RequestParams>,
867 ) -> Result<serde_json::Value, reqwest::Error> {
868 let res = self
869 .api_post(&format!("data/{}", table), data, "application/json")
870 .await?;
871 parse_response(res).await
872 }
873
874 pub async fn query(&self, params: &QueryRequest) -> Result<serde_json::Value, reqwest::Error> {
876 let res = self
877 .api_get::<QueryRequest>(&"data/query", Some(params))
878 .await?;
879
880 Ok(res)
881 }
882
883 pub async fn data_get(
885 &self,
886 table: &str,
887 params: Option<RequestParams>,
888 ) -> Result<serde_json::Value, reqwest::Error> {
889 let mut payload = HashMap::new();
890
891 if let Some(params) = params {
892 if let Ok(p) = serde_json::to_value(params) {
893 if let Some(o) = p.as_object() {
894 payload.extend(o.iter().map(|(k, v)| (k.as_str(), v.clone())));
895 }
896 }
897 }
898
899 let res = self
900 .api_get::<serde_json::Value>(&format!("data/{}", table), None)
901 .await?;
902 Ok(res)
903 }
904
905 pub async fn data_delete(
907 &self,
908 table: &str,
909 params: Option<RequestParams>,
910 ) -> Result<serde_json::Value, reqwest::Error> {
911 let mut payload = HashMap::new();
912
913 if let Ok(params) = serde_json::to_value(params) {
914 if let Ok(params) = serde_json::to_value(params) {
915 if let Some(ref p) = params.as_object() {
916 payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
917 }
918 }
919 }
920
921 let res = self
922 .api_delete(&format!("data/{}", table), Some(payload))
923 .await?;
924 parse_response(res).await
925 }
926}
927
928#[cfg(test)]
929mod tests {
930 use super::*;
931 use dotenv::dotenv;
932 use lazy_static::lazy_static;
933 use reqwest::ClientBuilder;
934
935 lazy_static! {
936 static ref SPIDER_CLIENT: Spider = {
937 dotenv().ok();
938 let client = ClientBuilder::new();
939 let client = client.user_agent("SpiderBot").build().unwrap();
940
941 Spider::new_with_client(None, client).expect("client to build")
942 };
943 }
944
945 #[tokio::test]
946 #[ignore]
947 async fn test_scrape_url() {
948 let response = SPIDER_CLIENT
949 .scrape_url("https://example.com", None, "application/json")
950 .await;
951 assert!(response.is_ok());
952 }
953
954 #[tokio::test]
955 async fn test_crawl_url() {
956 let response = SPIDER_CLIENT
957 .crawl_url(
958 "https://example.com",
959 None,
960 false,
961 "application/json",
962 None::<fn(serde_json::Value)>,
963 )
964 .await;
965 assert!(response.is_ok());
966 }
967
968 #[tokio::test]
969 #[ignore]
970 async fn test_links() {
971 let response: Result<serde_json::Value, Error> = SPIDER_CLIENT
972 .links("https://example.com", None, false, "application/json")
973 .await;
974 assert!(response.is_ok());
975 }
976
977 #[tokio::test]
978 #[ignore]
979 async fn test_screenshot() {
980 let mut params = RequestParams::default();
981 params.limit = Some(1);
982
983 let response = SPIDER_CLIENT
984 .screenshot(
985 "https://example.com",
986 Some(params),
987 false,
988 "application/json",
989 )
990 .await;
991 assert!(response.is_ok());
992 }
993
994 #[tokio::test]
1010 #[ignore]
1011 async fn test_transform() {
1012 let data = vec![HashMap::from([(
1013 "<html><body><h1>Transformation</h1></body></html>".into(),
1014 "".into(),
1015 )])];
1016 let response = SPIDER_CLIENT
1017 .transform(data, None, false, "application/json")
1018 .await;
1019 assert!(response.is_ok());
1020 }
1021
1022 #[tokio::test]
1023 #[ignore]
1024 async fn test_extract_contacts() {
1025 let response = SPIDER_CLIENT
1026 .extract_contacts("https://example.com", None, false, "application/json")
1027 .await;
1028 assert!(response.is_ok());
1029 }
1030
1031 #[tokio::test]
1032 #[ignore]
1033 async fn test_label() {
1034 let response = SPIDER_CLIENT
1035 .label("https://example.com", None, false, "application/json")
1036 .await;
1037 assert!(response.is_ok());
1038 }
1039
1040 #[tokio::test]
1041 async fn test_create_signed_url() {
1042 let response = SPIDER_CLIENT
1043 .create_signed_url(Some("example.com"), None)
1044 .await;
1045 assert!(response.is_ok());
1046 }
1047
1048 #[tokio::test]
1049 async fn test_get_crawl_state() {
1050 let response = SPIDER_CLIENT
1051 .get_crawl_state("https://example.com", None, "application/json")
1052 .await;
1053 assert!(response.is_ok());
1054 }
1055
1056 #[tokio::test]
1057 async fn test_query() {
1058 let mut query = QueryRequest::default();
1059
1060 query.domain = Some("spider.cloud".into());
1061
1062 let response = SPIDER_CLIENT.query(&query).await;
1063 assert!(response.is_ok());
1064 }
1065
1066 #[tokio::test]
1067 async fn test_get_credits() {
1068 let response = SPIDER_CLIENT.get_credits().await;
1069 assert!(response.is_ok());
1070 }
1071}