1pub mod shapes;
65
66use backon::ExponentialBuilder;
67use backon::Retryable;
68use reqwest::Client;
69use reqwest::{Error, Response};
70use serde::Serialize;
71use std::collections::HashMap;
72use tokio_stream::StreamExt;
73
74pub use shapes::{request::*, response::*};
75
76const API_URL: &'static str = "https://api.spider.cloud";
78
79#[derive(Debug, Default)]
81pub struct Spider {
82 pub api_key: String,
84 pub client: Client,
86}
87
88pub async fn handle_json(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
90 res.json().await
91}
92
93pub async fn handle_jsonl(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
95 let text = res.text().await?;
96 let lines = text
97 .lines()
98 .filter_map(|line| serde_json::from_str::<serde_json::Value>(line).ok())
99 .collect::<Vec<_>>();
100 Ok(serde_json::Value::Array(lines))
101}
102
103#[cfg(feature = "csv")]
105pub async fn handle_csv(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
106 use std::collections::HashMap;
107 let text = res.text().await?;
108 let mut rdr = csv::Reader::from_reader(text.as_bytes());
109 let records: Vec<HashMap<String, String>> = rdr.deserialize().filter_map(Result::ok).collect();
110
111 if let Ok(record) = serde_json::to_value(records) {
112 Ok(record)
113 } else {
114 Ok(serde_json::Value::String(text))
115 }
116}
117
118#[cfg(not(feature = "csv"))]
119pub async fn handle_csv(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
120 handle_text(res).await
121}
122
123pub async fn handle_text(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
125 Ok(serde_json::Value::String(
126 res.text().await.unwrap_or_default(),
127 ))
128}
129
130#[cfg(feature = "csv")]
132pub async fn handle_xml(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
133 let text = res.text().await?;
134 match quick_xml::de::from_str::<serde_json::Value>(&text) {
135 Ok(val) => Ok(val),
136 Err(_) => Ok(serde_json::Value::String(text)),
137 }
138}
139
140#[cfg(not(feature = "csv"))]
141pub async fn handle_xml(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
143 handle_text(res).await
144}
145
146pub async fn parse_response(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
147 let content_type = res
148 .headers()
149 .get(reqwest::header::CONTENT_TYPE)
150 .and_then(|v| v.to_str().ok())
151 .unwrap_or_default()
152 .to_ascii_lowercase();
153
154 if content_type.contains("json") && !content_type.contains("jsonl") {
155 handle_json(res).await
156 } else if content_type.contains("jsonl") || content_type.contains("ndjson") {
157 handle_jsonl(res).await
158 } else if content_type.contains("csv") {
159 handle_csv(res).await
160 } else if content_type.contains("xml") {
161 handle_xml(res).await
162 } else {
163 handle_text(res).await
164 }
165}
166
167impl Spider {
168 pub fn new(api_key: Option<String>) -> Result<Self, &'static str> {
178 let api_key = api_key.or_else(|| std::env::var("SPIDER_API_KEY").ok());
179
180 match api_key {
181 Some(key) => Ok(Self {
182 api_key: key,
183 client: Client::new(),
184 }),
185 None => Err("No API key provided"),
186 }
187 }
188
189 pub fn new_with_client(api_key: Option<String>, client: Client) -> Result<Self, &'static str> {
200 let api_key = api_key.or_else(|| std::env::var("SPIDER_API_KEY").ok());
201
202 match api_key {
203 Some(key) => Ok(Self {
204 api_key: key,
205 client,
206 }),
207 None => Err("No API key provided"),
208 }
209 }
210
211 async fn api_post_base(
224 &self,
225 endpoint: &str,
226 data: impl Serialize + Sized + std::fmt::Debug,
227 content_type: &str,
228 ) -> Result<Response, Error> {
229 let url: String = format!("{API_URL}/{}", endpoint);
230
231 self.client
232 .post(&url)
233 .header(
234 "User-Agent",
235 format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
236 )
237 .header("Content-Type", content_type)
238 .header("Authorization", format!("Bearer {}", self.api_key))
239 .json(&data)
240 .send()
241 .await
242 }
243
244 async fn api_post(
257 &self,
258 endpoint: &str,
259 data: impl Serialize + std::fmt::Debug + Clone + Send + Sync,
260 content_type: &str,
261 ) -> Result<Response, Error> {
262 let fetch = || async {
263 self.api_post_base(endpoint, data.to_owned(), content_type)
264 .await
265 };
266
267 fetch
268 .retry(ExponentialBuilder::default().with_max_times(5))
269 .when(|err: &reqwest::Error| {
270 if let Some(status) = err.status() {
271 status.is_server_error()
272 } else {
273 err.is_timeout()
274 }
275 })
276 .await
277 }
278
279 async fn api_get_base<T: Serialize>(
289 &self,
290 endpoint: &str,
291 query_params: Option<&T>,
292 ) -> Result<serde_json::Value, reqwest::Error> {
293 let url = format!("{API_URL}/{}", endpoint);
294 let res = self
295 .client
296 .get(&url)
297 .query(&query_params)
298 .header(
299 "User-Agent",
300 format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
301 )
302 .header("Content-Type", "application/json")
303 .header("Authorization", format!("Bearer {}", self.api_key))
304 .send()
305 .await?;
306 parse_response(res).await
307 }
308
309 async fn api_get<T: Serialize>(
319 &self,
320 endpoint: &str,
321 query_params: Option<&T>,
322 ) -> Result<serde_json::Value, reqwest::Error> {
323 let fetch = || async { self.api_get_base(endpoint, query_params.to_owned()).await };
324
325 fetch
326 .retry(ExponentialBuilder::default().with_max_times(5))
327 .when(|err: &reqwest::Error| {
328 if let Some(status) = err.status() {
329 status.is_server_error()
330 } else {
331 err.is_timeout()
332 }
333 })
334 .await
335 }
336
337 async fn api_delete_base(
350 &self,
351 endpoint: &str,
352 params: Option<HashMap<String, serde_json::Value>>,
353 ) -> Result<Response, Error> {
354 let url = format!("{API_URL}/v1/{}", endpoint);
355 let request_builder = self
356 .client
357 .delete(&url)
358 .header(
359 "User-Agent",
360 format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
361 )
362 .header("Content-Type", "application/json")
363 .header("Authorization", format!("Bearer {}", self.api_key));
364
365 let request_builder = if let Some(params) = params {
366 request_builder.json(¶ms)
367 } else {
368 request_builder
369 };
370
371 request_builder.send().await
372 }
373
374 async fn api_delete(
387 &self,
388 endpoint: &str,
389 params: Option<HashMap<String, serde_json::Value>>,
390 ) -> Result<Response, Error> {
391 let fetch = || async { self.api_delete_base(endpoint, params.to_owned()).await };
392
393 fetch
394 .retry(ExponentialBuilder::default().with_max_times(5))
395 .when(|err: &reqwest::Error| {
396 if let Some(status) = err.status() {
397 status.is_server_error()
398 } else {
399 err.is_timeout()
400 }
401 })
402 .await
403 }
404
405 pub async fn scrape_url(
418 &self,
419 url: &str,
420 params: Option<RequestParams>,
421 content_type: &str,
422 ) -> Result<serde_json::Value, reqwest::Error> {
423 let mut data = HashMap::new();
424
425 data.insert(
426 "url".to_string(),
427 serde_json::Value::String(url.to_string()),
428 );
429 data.insert("limit".to_string(), serde_json::Value::Number(1.into()));
430
431 if let Ok(params) = serde_json::to_value(params) {
432 if let Some(ref p) = params.as_object() {
433 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
434 }
435 }
436
437 let res = self.api_post("crawl", data, content_type).await?;
438 parse_response(res).await
439 }
440
441 pub async fn crawl_url(
455 &self,
456 url: &str,
457 params: Option<RequestParams>,
458 stream: bool,
459 content_type: &str,
460 callback: Option<impl Fn(serde_json::Value) + Send>,
461 ) -> Result<serde_json::Value, reqwest::Error> {
462 use tokio_util::codec::{FramedRead, LinesCodec};
463
464 let mut data = HashMap::new();
465
466 if let Ok(params) = serde_json::to_value(params) {
467 if let Some(ref p) = params.as_object() {
468 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
469 }
470 }
471
472 data.insert("url".into(), serde_json::Value::String(url.to_string()));
473
474 let res = self.api_post("crawl", data, content_type).await?;
475
476 if stream {
477 if let Some(callback) = callback {
478 let stream = res.bytes_stream();
479
480 let stream_reader = tokio_util::io::StreamReader::new(
481 stream
482 .map(|r| r.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))),
483 );
484
485 let mut lines = FramedRead::new(stream_reader, LinesCodec::new());
486
487 while let Some(line_result) = lines.next().await {
488 match line_result {
489 Ok(line) => match serde_json::from_str::<serde_json::Value>(&line) {
490 Ok(value) => {
491 callback(value);
492 }
493 Err(_e) => {
494 continue;
495 }
496 },
497 Err(_e) => return Ok(serde_json::Value::Null),
498 }
499 }
500
501 Ok(serde_json::Value::Null)
502 } else {
503 Ok(serde_json::Value::Null)
504 }
505 } else {
506 parse_response(res).await
507 }
508 }
509
510 pub async fn links(
523 &self,
524 url: &str,
525 params: Option<RequestParams>,
526 _stream: bool,
527 content_type: &str,
528 ) -> Result<serde_json::Value, reqwest::Error> {
529 let mut data = HashMap::new();
530
531 if let Ok(params) = serde_json::to_value(params) {
532 if let Some(ref p) = params.as_object() {
533 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
534 }
535 }
536
537 data.insert("url".into(), serde_json::Value::String(url.to_string()));
538
539 let res = self.api_post("links", data, content_type).await?;
540 parse_response(res).await
541 }
542
543 pub async fn screenshot(
556 &self,
557 url: &str,
558 params: Option<RequestParams>,
559 _stream: bool,
560 content_type: &str,
561 ) -> Result<serde_json::Value, reqwest::Error> {
562 let mut data = HashMap::new();
563
564 if let Ok(params) = serde_json::to_value(params) {
565 if let Some(ref p) = params.as_object() {
566 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
567 }
568 }
569
570 data.insert("url".into(), serde_json::Value::String(url.to_string()));
571
572 let res = self.api_post("screenshot", data, content_type).await?;
573 parse_response(res).await
574 }
575
576 pub async fn search(
589 &self,
590 q: &str,
591 params: Option<SearchRequestParams>,
592 _stream: bool,
593 content_type: &str,
594 ) -> Result<serde_json::Value, reqwest::Error> {
595 let body = match params {
596 Some(mut params) => {
597 params.search = q.to_string();
598 params
599 }
600 _ => {
601 let mut params = SearchRequestParams::default();
602 params.search = q.to_string();
603 params
604 }
605 };
606
607 let res = self.api_post("search", body, content_type).await?;
608
609 parse_response(res).await
610 }
611
612 pub async fn transform(
625 &self,
626 data: Vec<HashMap<&str, &str>>,
627 params: Option<TransformParams>,
628 _stream: bool,
629 content_type: &str,
630 ) -> Result<serde_json::Value, reqwest::Error> {
631 let mut payload = HashMap::new();
632
633 if let Ok(params) = serde_json::to_value(params) {
634 if let Some(ref p) = params.as_object() {
635 payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
636 }
637 }
638
639 if let Ok(d) = serde_json::to_value(data) {
640 payload.insert("data".into(), d);
641 }
642
643 let res = self.api_post("transform", payload, content_type).await?;
644
645 parse_response(res).await
646 }
647
648 pub async fn extract_contacts(
661 &self,
662 url: &str,
663 params: Option<RequestParams>,
664 _stream: bool,
665 content_type: &str,
666 ) -> Result<serde_json::Value, reqwest::Error> {
667 let mut data = HashMap::new();
668
669 if let Ok(params) = serde_json::to_value(params) {
670 if let Ok(params) = serde_json::to_value(params) {
671 if let Some(ref p) = params.as_object() {
672 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
673 }
674 }
675 }
676
677 match serde_json::to_value(url) {
678 Ok(u) => {
679 data.insert("url".into(), u);
680 }
681 _ => (),
682 }
683
684 let res = self
685 .api_post("pipeline/extract-contacts", data, content_type)
686 .await?;
687
688 parse_response(res).await
689 }
690
691 pub async fn label(
704 &self,
705 url: &str,
706 params: Option<RequestParams>,
707 _stream: bool,
708 content_type: &str,
709 ) -> Result<serde_json::Value, reqwest::Error> {
710 let mut data = HashMap::new();
711
712 if let Ok(params) = serde_json::to_value(params) {
713 if let Ok(params) = serde_json::to_value(params) {
714 if let Some(ref p) = params.as_object() {
715 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
716 }
717 }
718 }
719
720 data.insert("url".into(), serde_json::Value::String(url.to_string()));
721
722 let res = self.api_post("pipeline/label", data, content_type).await?;
723 parse_response(res).await
724 }
725
726 pub async fn download(
738 &self,
739 url: Option<&str>,
740 options: Option<HashMap<&str, i32>>,
741 ) -> Result<reqwest::Response, reqwest::Error> {
742 let mut params = HashMap::new();
743
744 if let Some(url) = url {
745 params.insert("url".to_string(), url.to_string());
746 }
747
748 if let Some(options) = options {
749 for (key, value) in options {
750 params.insert(key.to_string(), value.to_string());
751 }
752 }
753
754 let url = format!("{API_URL}/v1/data/download");
755 let request = self
756 .client
757 .get(&url)
758 .header(
759 "User-Agent",
760 format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
761 )
762 .header("Content-Type", "application/octet-stream")
763 .header("Authorization", format!("Bearer {}", self.api_key))
764 .query(¶ms);
765
766 let res = request.send().await?;
767
768 Ok(res)
769 }
770
771 pub async fn create_signed_url(
783 &self,
784 url: Option<&str>,
785 options: Option<HashMap<&str, i32>>,
786 ) -> Result<serde_json::Value, reqwest::Error> {
787 let mut params = HashMap::new();
788
789 if let Some(options) = options {
790 for (key, value) in options {
791 params.insert(key.to_string(), value.to_string());
792 }
793 }
794
795 if let Some(url) = url {
796 params.insert("url".to_string(), url.to_string());
797 }
798
799 let url = format!("{API_URL}/v1/data/sign-url");
800 let request = self
801 .client
802 .get(&url)
803 .header(
804 "User-Agent",
805 format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
806 )
807 .header("Authorization", format!("Bearer {}", self.api_key))
808 .query(¶ms);
809
810 let res = request.send().await?;
811
812 parse_response(res).await
813 }
814
815 pub async fn get_crawl_state(
827 &self,
828 url: &str,
829 params: Option<RequestParams>,
830 content_type: &str,
831 ) -> Result<serde_json::Value, reqwest::Error> {
832 let mut payload = HashMap::new();
833 payload.insert("url".into(), serde_json::Value::String(url.to_string()));
834 payload.insert(
835 "contentType".into(),
836 serde_json::Value::String(content_type.to_string()),
837 );
838
839 if let Ok(params) = serde_json::to_value(params) {
840 if let Ok(params) = serde_json::to_value(params) {
841 if let Some(ref p) = params.as_object() {
842 payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
843 }
844 }
845 }
846
847 let res = self
848 .api_post("data/crawl_state", payload, content_type)
849 .await?;
850 parse_response(res).await
851 }
852
853 pub async fn get_credits(&self) -> Result<serde_json::Value, reqwest::Error> {
855 self.api_get::<serde_json::Value>("data/credits", None)
856 .await
857 }
858
859 pub async fn data_post(
861 &self,
862 table: &str,
863 data: Option<RequestParams>,
864 ) -> Result<serde_json::Value, reqwest::Error> {
865 let res = self
866 .api_post(&format!("data/{}", table), data, "application/json")
867 .await?;
868 parse_response(res).await
869 }
870
871 pub async fn query(&self, params: &QueryRequest) -> Result<serde_json::Value, reqwest::Error> {
873 let res = self
874 .api_get::<QueryRequest>(&"data/query", Some(params))
875 .await?;
876
877 Ok(res)
878 }
879
880 pub async fn data_get(
882 &self,
883 table: &str,
884 params: Option<RequestParams>,
885 ) -> Result<serde_json::Value, reqwest::Error> {
886 let mut payload = HashMap::new();
887
888 if let Some(params) = params {
889 if let Ok(p) = serde_json::to_value(params) {
890 if let Some(o) = p.as_object() {
891 payload.extend(o.iter().map(|(k, v)| (k.as_str(), v.clone())));
892 }
893 }
894 }
895
896 let res = self
897 .api_get::<serde_json::Value>(&format!("data/{}", table), None)
898 .await?;
899 Ok(res)
900 }
901
902 pub async fn data_delete(
904 &self,
905 table: &str,
906 params: Option<RequestParams>,
907 ) -> Result<serde_json::Value, reqwest::Error> {
908 let mut payload = HashMap::new();
909
910 if let Ok(params) = serde_json::to_value(params) {
911 if let Ok(params) = serde_json::to_value(params) {
912 if let Some(ref p) = params.as_object() {
913 payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
914 }
915 }
916 }
917
918 let res = self
919 .api_delete(&format!("data/{}", table), Some(payload))
920 .await?;
921 parse_response(res).await
922 }
923}
924
925#[cfg(test)]
926mod tests {
927 use super::*;
928 use dotenv::dotenv;
929 use lazy_static::lazy_static;
930 use reqwest::ClientBuilder;
931
932 lazy_static! {
933 static ref SPIDER_CLIENT: Spider = {
934 dotenv().ok();
935 let client = ClientBuilder::new();
936 let client = client.user_agent("SpiderBot").build().unwrap();
937
938 Spider::new_with_client(None, client).expect("client to build")
939 };
940 }
941
942 #[tokio::test]
943 #[ignore]
944 async fn test_scrape_url() {
945 let response = SPIDER_CLIENT
946 .scrape_url("https://example.com", None, "application/json")
947 .await;
948 assert!(response.is_ok());
949 }
950
951 #[tokio::test]
952 async fn test_crawl_url() {
953 let response = SPIDER_CLIENT
954 .crawl_url(
955 "https://example.com",
956 None,
957 false,
958 "application/json",
959 None::<fn(serde_json::Value)>,
960 )
961 .await;
962 assert!(response.is_ok());
963 }
964
965 #[tokio::test]
966 #[ignore]
967 async fn test_links() {
968 let response: Result<serde_json::Value, Error> = SPIDER_CLIENT
969 .links("https://example.com", None, false, "application/json")
970 .await;
971 assert!(response.is_ok());
972 }
973
974 #[tokio::test]
975 #[ignore]
976 async fn test_screenshot() {
977 let mut params = RequestParams::default();
978 params.limit = Some(1);
979
980 let response = SPIDER_CLIENT
981 .screenshot(
982 "https://example.com",
983 Some(params),
984 false,
985 "application/json",
986 )
987 .await;
988 assert!(response.is_ok());
989 }
990
991 #[tokio::test]
1007 #[ignore]
1008 async fn test_transform() {
1009 let data = vec![HashMap::from([(
1010 "<html><body><h1>Transformation</h1></body></html>".into(),
1011 "".into(),
1012 )])];
1013 let response = SPIDER_CLIENT
1014 .transform(data, None, false, "application/json")
1015 .await;
1016 assert!(response.is_ok());
1017 }
1018
1019 #[tokio::test]
1020 #[ignore]
1021 async fn test_extract_contacts() {
1022 let response = SPIDER_CLIENT
1023 .extract_contacts("https://example.com", None, false, "application/json")
1024 .await;
1025 assert!(response.is_ok());
1026 }
1027
1028 #[tokio::test]
1029 #[ignore]
1030 async fn test_label() {
1031 let response = SPIDER_CLIENT
1032 .label("https://example.com", None, false, "application/json")
1033 .await;
1034 assert!(response.is_ok());
1035 }
1036
1037 #[tokio::test]
1038 async fn test_create_signed_url() {
1039 let response = SPIDER_CLIENT
1040 .create_signed_url(Some("example.com"), None)
1041 .await;
1042 assert!(response.is_ok());
1043 }
1044
1045 #[tokio::test]
1046 async fn test_get_crawl_state() {
1047 let response = SPIDER_CLIENT
1048 .get_crawl_state("https://example.com", None, "application/json")
1049 .await;
1050 assert!(response.is_ok());
1051 }
1052
1053 #[tokio::test]
1054 async fn test_query() {
1055 let mut query = QueryRequest::default();
1056
1057 query.domain = Some("spider.cloud".into());
1058
1059 let response = SPIDER_CLIENT.query(&query).await;
1060 assert!(response.is_ok());
1061 }
1062
1063 #[tokio::test]
1064 async fn test_get_credits() {
1065 let response = SPIDER_CLIENT.get_credits().await;
1066 assert!(response.is_ok());
1067 }
1068}