1pub mod shapes;
64
65use backon::ExponentialBuilder;
66use backon::Retryable;
67use reqwest::Client;
68use reqwest::{Error, Response};
69use serde::Serialize;
70use std::collections::HashMap;
71use tokio_stream::StreamExt;
72pub use shapes::{request::*, response::*};
73use std::sync::OnceLock;
74
75static API_URL: OnceLock<String> = OnceLock::new();
76
77pub fn get_api_url() -> &'static str {
79 API_URL.get_or_init(|| {
80 std::env::var("SPIDER_API_URL").unwrap_or_else(|_| "https://api.spider.cloud".to_string())
81 })
82}
83
84#[derive(Debug, Default)]
86pub struct Spider {
87 pub api_key: String,
89 pub client: Client,
91}
92
93pub async fn handle_json(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
95 res.json().await
96}
97
98pub async fn handle_jsonl(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
100 let text = res.text().await?;
101 let lines = text
102 .lines()
103 .filter_map(|line| serde_json::from_str::<serde_json::Value>(line).ok())
104 .collect::<Vec<_>>();
105 Ok(serde_json::Value::Array(lines))
106}
107
108#[cfg(feature = "csv")]
110pub async fn handle_csv(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
111 use std::collections::HashMap;
112 let text = res.text().await?;
113 let mut rdr = csv::Reader::from_reader(text.as_bytes());
114 let records: Vec<HashMap<String, String>> = rdr.deserialize().filter_map(Result::ok).collect();
115
116 if let Ok(record) = serde_json::to_value(records) {
117 Ok(record)
118 } else {
119 Ok(serde_json::Value::String(text))
120 }
121}
122
123#[cfg(not(feature = "csv"))]
124pub async fn handle_csv(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
125 handle_text(res).await
126}
127
128pub async fn handle_text(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
130 Ok(serde_json::Value::String(
131 res.text().await.unwrap_or_default(),
132 ))
133}
134
135#[cfg(feature = "csv")]
137pub async fn handle_xml(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
138 let text = res.text().await?;
139 match quick_xml::de::from_str::<serde_json::Value>(&text) {
140 Ok(val) => Ok(val),
141 Err(_) => Ok(serde_json::Value::String(text)),
142 }
143}
144
145#[cfg(not(feature = "csv"))]
146pub async fn handle_xml(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
148 handle_text(res).await
149}
150
151pub async fn parse_response(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
152 let content_type = res
153 .headers()
154 .get(reqwest::header::CONTENT_TYPE)
155 .and_then(|v| v.to_str().ok())
156 .unwrap_or_default()
157 .to_ascii_lowercase();
158
159 if content_type.contains("json") && !content_type.contains("jsonl") {
160 handle_json(res).await
161 } else if content_type.contains("jsonl") || content_type.contains("ndjson") {
162 handle_jsonl(res).await
163 } else if content_type.contains("csv") {
164 handle_csv(res).await
165 } else if content_type.contains("xml") {
166 handle_xml(res).await
167 } else {
168 handle_text(res).await
169 }
170}
171
172impl Spider {
173 pub fn new(api_key: Option<String>) -> Result<Self, &'static str> {
183 let api_key = api_key.or_else(|| std::env::var("SPIDER_API_KEY").ok());
184
185 match api_key {
186 Some(key) => Ok(Self {
187 api_key: key,
188 client: Client::new(),
189 }),
190 None => Err("No API key provided"),
191 }
192 }
193
194 pub fn new_with_client(api_key: Option<String>, client: Client) -> Result<Self, &'static str> {
205 let api_key = api_key.or_else(|| std::env::var("SPIDER_API_KEY").ok());
206
207 match api_key {
208 Some(key) => Ok(Self {
209 api_key: key,
210 client,
211 }),
212 None => Err("No API key provided"),
213 }
214 }
215
216 async fn api_post_base(
229 &self,
230 endpoint: &str,
231 data: impl Serialize + Sized + std::fmt::Debug,
232 content_type: &str,
233 ) -> Result<Response, Error> {
234 let url: String = format!("{}/{}", get_api_url(), endpoint);
235
236 self.client
237 .post(&url)
238 .header(
239 "User-Agent",
240 format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
241 )
242 .header("Content-Type", content_type)
243 .header("Authorization", format!("Bearer {}", self.api_key))
244 .json(&data)
245 .send()
246 .await
247 }
248
249 pub async fn api_post(
262 &self,
263 endpoint: &str,
264 data: impl Serialize + std::fmt::Debug + Clone + Send + Sync,
265 content_type: &str,
266 ) -> Result<Response, Error> {
267 let fetch = || async {
268 self.api_post_base(endpoint, data.to_owned(), content_type)
269 .await
270 };
271
272 fetch
273 .retry(ExponentialBuilder::default().with_max_times(5))
274 .when(|err: &reqwest::Error| {
275 if let Some(status) = err.status() {
276 status.is_server_error()
277 } else {
278 err.is_timeout()
279 }
280 })
281 .await
282 }
283
284 async fn api_get_base<T: Serialize>(
294 &self,
295 endpoint: &str,
296 query_params: Option<&T>,
297 ) -> Result<serde_json::Value, reqwest::Error> {
298 let url = format!("{}/{}", get_api_url(), endpoint);
299 let res = self
300 .client
301 .get(&url)
302 .query(&query_params)
303 .header(
304 "User-Agent",
305 format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
306 )
307 .header("Content-Type", "application/json")
308 .header("Authorization", format!("Bearer {}", self.api_key))
309 .send()
310 .await?;
311 parse_response(res).await
312 }
313
314 pub async fn api_get<T: Serialize>(
324 &self,
325 endpoint: &str,
326 query_params: Option<&T>,
327 ) -> Result<serde_json::Value, reqwest::Error> {
328 let fetch = || async { self.api_get_base(endpoint, query_params.to_owned()).await };
329
330 fetch
331 .retry(ExponentialBuilder::default().with_max_times(5))
332 .when(|err: &reqwest::Error| {
333 if let Some(status) = err.status() {
334 status.is_server_error()
335 } else {
336 err.is_timeout()
337 }
338 })
339 .await
340 }
341
342 async fn api_delete_base(
355 &self,
356 endpoint: &str,
357 params: Option<HashMap<String, serde_json::Value>>,
358 ) -> Result<Response, Error> {
359 let url = format!("{}/v1/{}", get_api_url(), endpoint);
360 let request_builder = self
361 .client
362 .delete(&url)
363 .header(
364 "User-Agent",
365 format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
366 )
367 .header("Content-Type", "application/json")
368 .header("Authorization", format!("Bearer {}", self.api_key));
369
370 let request_builder = if let Some(params) = params {
371 request_builder.json(¶ms)
372 } else {
373 request_builder
374 };
375
376 request_builder.send().await
377 }
378
379 pub async fn api_delete(
392 &self,
393 endpoint: &str,
394 params: Option<HashMap<String, serde_json::Value>>,
395 ) -> Result<Response, Error> {
396 let fetch = || async { self.api_delete_base(endpoint, params.to_owned()).await };
397
398 fetch
399 .retry(ExponentialBuilder::default().with_max_times(5))
400 .when(|err: &reqwest::Error| {
401 if let Some(status) = err.status() {
402 status.is_server_error()
403 } else {
404 err.is_timeout()
405 }
406 })
407 .await
408 }
409
410 pub async fn scrape_url(
423 &self,
424 url: &str,
425 params: Option<RequestParams>,
426 content_type: &str,
427 ) -> Result<serde_json::Value, reqwest::Error> {
428 let mut data = HashMap::new();
429
430 if let Ok(params) = serde_json::to_value(params) {
431 if let Some(ref p) = params.as_object() {
432 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
433 }
434 }
435
436 if !url.is_empty() {
437 data.insert(
438 "url".to_string(),
439 serde_json::Value::String(url.to_string()),
440 );
441 }
442
443 data.insert("limit".to_string(), serde_json::Value::Number(1.into()));
444
445 let res = self.api_post("crawl", data, content_type).await?;
446 parse_response(res).await
447 }
448
449 pub async fn multi_scrape_url(
462 &self,
463 params: Option<Vec<RequestParams>>,
464 content_type: &str,
465 ) -> Result<serde_json::Value, reqwest::Error> {
466 let mut data = HashMap::new();
467
468if let Ok(mut params) = serde_json::to_value(params) {
469 if let Some(obj) = params.as_object_mut() {
470 obj.insert("limit".to_string(), serde_json::Value::Number(1.into()));
471 data.extend(obj.iter().map(|(k, v)| (k.clone(), v.clone())));
472 }
473}
474 let res = self.api_post("crawl", data, content_type).await?;
475 parse_response(res).await
476 }
477
478
479 pub async fn crawl_url(
493 &self,
494 url: &str,
495 params: Option<RequestParams>,
496 stream: bool,
497 content_type: &str,
498 callback: Option<impl Fn(serde_json::Value) + Send>,
499 ) -> Result<serde_json::Value, reqwest::Error> {
500 use tokio_util::codec::{FramedRead, LinesCodec};
501
502 let mut data = HashMap::new();
503
504 if let Ok(params) = serde_json::to_value(params) {
505 if let Some(ref p) = params.as_object() {
506 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
507 }
508 }
509
510 data.insert("url".into(), serde_json::Value::String(url.to_string()));
511
512 let res = self.api_post("crawl", data, content_type).await?;
513
514 if stream {
515 if let Some(callback) = callback {
516 let stream = res.bytes_stream();
517
518 let stream_reader = tokio_util::io::StreamReader::new(
519 stream
520 .map(|r| r.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))),
521 );
522
523 let mut lines = FramedRead::new(stream_reader, LinesCodec::new());
524
525 while let Some(line_result) = lines.next().await {
526 match line_result {
527 Ok(line) => match serde_json::from_str::<serde_json::Value>(&line) {
528 Ok(value) => {
529 callback(value);
530 }
531 Err(_e) => {
532 continue;
533 }
534 },
535 Err(_e) => return Ok(serde_json::Value::Null),
536 }
537 }
538
539 Ok(serde_json::Value::Null)
540 } else {
541 Ok(serde_json::Value::Null)
542 }
543 } else {
544 parse_response(res).await
545 }
546 }
547
548 pub async fn multi_crawl_url(
562 &self,
563 params: Option<Vec<RequestParams>>,
564 stream: bool,
565 content_type: &str,
566 callback: Option<impl Fn(serde_json::Value) + Send>,
567 ) -> Result<serde_json::Value, reqwest::Error> {
568 use tokio_util::codec::{FramedRead, LinesCodec};
569
570
571 let res = self.api_post("crawl", params, content_type).await?;
572
573 if stream {
574 if let Some(callback) = callback {
575 let stream = res.bytes_stream();
576
577 let stream_reader = tokio_util::io::StreamReader::new(
578 stream
579 .map(|r| r.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))),
580 );
581
582 let mut lines = FramedRead::new(stream_reader, LinesCodec::new());
583
584 while let Some(line_result) = lines.next().await {
585 match line_result {
586 Ok(line) => match serde_json::from_str::<serde_json::Value>(&line) {
587 Ok(value) => {
588 callback(value);
589 }
590 Err(_e) => {
591 continue;
592 }
593 },
594 Err(_e) => return Ok(serde_json::Value::Null),
595 }
596 }
597
598 Ok(serde_json::Value::Null)
599 } else {
600 Ok(serde_json::Value::Null)
601 }
602 } else {
603 parse_response(res).await
604 }
605 }
606
607
608 pub async fn links(
621 &self,
622 url: &str,
623 params: Option<RequestParams>,
624 _stream: bool,
625 content_type: &str,
626 ) -> Result<serde_json::Value, reqwest::Error> {
627 let mut data = HashMap::new();
628
629 if let Ok(params) = serde_json::to_value(params) {
630 if let Some(ref p) = params.as_object() {
631 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
632 }
633 }
634
635 data.insert("url".into(), serde_json::Value::String(url.to_string()));
636
637 let res = self.api_post("links", data, content_type).await?;
638 parse_response(res).await
639 }
640
641
642 pub async fn multi_links(
655 &self,
656 params: Option<Vec<RequestParams>>,
657 _stream: bool,
658 content_type: &str,
659 ) -> Result<serde_json::Value, reqwest::Error> {
660 let res = self.api_post("links", params, content_type).await?;
661 parse_response(res).await
662 }
663
664
665 pub async fn screenshot(
678 &self,
679 url: &str,
680 params: Option<RequestParams>,
681 _stream: bool,
682 content_type: &str,
683 ) -> Result<serde_json::Value, reqwest::Error> {
684 let mut data = HashMap::new();
685
686 if let Ok(params) = serde_json::to_value(params) {
687 if let Some(ref p) = params.as_object() {
688 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
689 }
690 }
691
692 data.insert("url".into(), serde_json::Value::String(url.to_string()));
693
694 let res = self.api_post("screenshot", data, content_type).await?;
695 parse_response(res).await
696 }
697
698 pub async fn multi_screenshot(
711 &self,
712 params: Option<Vec<RequestParams>>,
713 _stream: bool,
714 content_type: &str,
715 ) -> Result<serde_json::Value, reqwest::Error> {
716 let res = self.api_post("screenshot", params, content_type).await?;
717 parse_response(res).await
718 }
719
720 pub async fn search(
733 &self,
734 q: &str,
735 params: Option<SearchRequestParams>,
736 _stream: bool,
737 content_type: &str,
738 ) -> Result<serde_json::Value, reqwest::Error> {
739 let body = match params {
740 Some(mut params) => {
741 params.search = q.to_string();
742 params
743 }
744 _ => {
745 let mut params = SearchRequestParams::default();
746 params.search = q.to_string();
747 params
748 }
749 };
750
751 let res = self.api_post("search", body, content_type).await?;
752
753 parse_response(res).await
754 }
755
756 pub async fn multi_search(
769 &self,
770 params: Option<Vec<SearchRequestParams>>,
771 content_type: &str,
772 ) -> Result<serde_json::Value, reqwest::Error> {
773 let res = self.api_post("search", params, content_type).await?;
774 parse_response(res).await
775 }
776
777 pub async fn transform(
790 &self,
791 data: Vec<HashMap<&str, &str>>,
792 params: Option<TransformParams>,
793 _stream: bool,
794 content_type: &str,
795 ) -> Result<serde_json::Value, reqwest::Error> {
796 let mut payload = HashMap::new();
797
798 if let Ok(params) = serde_json::to_value(params) {
799 if let Some(ref p) = params.as_object() {
800 payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
801 }
802 }
803
804 if let Ok(d) = serde_json::to_value(data) {
805 payload.insert("data".into(), d);
806 }
807
808 let res = self.api_post("transform", payload, content_type).await?;
809
810 parse_response(res).await
811 }
812
813 pub async fn download(
825 &self,
826 url: Option<&str>,
827 options: Option<HashMap<&str, i32>>,
828 ) -> Result<reqwest::Response, reqwest::Error> {
829 let mut params = HashMap::new();
830
831 if let Some(url) = url {
832 params.insert("url".to_string(), url.to_string());
833 }
834
835 if let Some(options) = options {
836 for (key, value) in options {
837 params.insert(key.to_string(), value.to_string());
838 }
839 }
840
841 let url = format!("{}/v1/data/download", get_api_url());
842 let request = self
843 .client
844 .get(&url)
845 .header(
846 "User-Agent",
847 format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
848 )
849 .header("Content-Type", "application/octet-stream")
850 .header("Authorization", format!("Bearer {}", self.api_key))
851 .query(¶ms);
852
853 let res = request.send().await?;
854
855 Ok(res)
856 }
857
858 pub async fn create_signed_url(
870 &self,
871 url: Option<&str>,
872 options: Option<HashMap<&str, i32>>,
873 ) -> Result<serde_json::Value, reqwest::Error> {
874 let mut params = HashMap::new();
875
876 if let Some(options) = options {
877 for (key, value) in options {
878 params.insert(key.to_string(), value.to_string());
879 }
880 }
881
882 if let Some(url) = url {
883 params.insert("url".to_string(), url.to_string());
884 }
885
886 let url = format!("{}/v1/data/sign-url", get_api_url());
887 let request = self
888 .client
889 .get(&url)
890 .header(
891 "User-Agent",
892 format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
893 )
894 .header("Authorization", format!("Bearer {}", self.api_key))
895 .query(¶ms);
896
897 let res = request.send().await?;
898
899 parse_response(res).await
900 }
901
902 pub async fn get_crawl_state(
914 &self,
915 url: &str,
916 params: Option<RequestParams>,
917 content_type: &str,
918 ) -> Result<serde_json::Value, reqwest::Error> {
919 let mut payload = HashMap::new();
920 payload.insert("url".into(), serde_json::Value::String(url.to_string()));
921 payload.insert(
922 "contentType".into(),
923 serde_json::Value::String(content_type.to_string()),
924 );
925
926 if let Ok(params) = serde_json::to_value(params) {
927 if let Ok(params) = serde_json::to_value(params) {
928 if let Some(ref p) = params.as_object() {
929 payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
930 }
931 }
932 }
933
934 let res = self
935 .api_post("data/crawl_state", payload, content_type)
936 .await?;
937 parse_response(res).await
938 }
939
940 pub async fn get_credits(&self) -> Result<serde_json::Value, reqwest::Error> {
942 self.api_get::<serde_json::Value>("data/credits", None)
943 .await
944 }
945
946 pub async fn data_post(
948 &self,
949 table: &str,
950 data: Option<RequestParams>,
951 ) -> Result<serde_json::Value, reqwest::Error> {
952 let res = self
953 .api_post(&format!("data/{}", table), data, "application/json")
954 .await?;
955 parse_response(res).await
956 }
957
958 pub async fn query(&self, params: &QueryRequest) -> Result<serde_json::Value, reqwest::Error> {
960 let res = self
961 .api_get::<QueryRequest>(&"data/query", Some(params))
962 .await?;
963
964 Ok(res)
965 }
966
967 pub async fn data_get(
969 &self,
970 table: &str,
971 params: Option<RequestParams>,
972 ) -> Result<serde_json::Value, reqwest::Error> {
973 let mut payload = HashMap::new();
974
975 if let Some(params) = params {
976 if let Ok(p) = serde_json::to_value(params) {
977 if let Some(o) = p.as_object() {
978 payload.extend(o.iter().map(|(k, v)| (k.as_str(), v.clone())));
979 }
980 }
981 }
982
983 let res = self
984 .api_get::<serde_json::Value>(&format!("data/{}", table), None)
985 .await?;
986 Ok(res)
987 }
988
989 pub async fn data_delete(
991 &self,
992 table: &str,
993 params: Option<RequestParams>,
994 ) -> Result<serde_json::Value, reqwest::Error> {
995 let mut payload = HashMap::new();
996
997 if let Ok(params) = serde_json::to_value(params) {
998 if let Ok(params) = serde_json::to_value(params) {
999 if let Some(ref p) = params.as_object() {
1000 payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
1001 }
1002 }
1003 }
1004
1005 let res = self
1006 .api_delete(&format!("data/{}", table), Some(payload))
1007 .await?;
1008 parse_response(res).await
1009 }
1010}
1011
1012#[cfg(test)]
1013mod tests {
1014 use super::*;
1015 use dotenv::dotenv;
1016 use lazy_static::lazy_static;
1017 use reqwest::ClientBuilder;
1018
1019 lazy_static! {
1020 static ref SPIDER_CLIENT: Spider = {
1021 dotenv().ok();
1022 let client = ClientBuilder::new();
1023 let client = client.user_agent("SpiderBot").build().unwrap();
1024
1025 Spider::new_with_client(None, client).expect("client to build")
1026 };
1027 }
1028
1029 #[tokio::test]
1030 #[ignore]
1031 async fn test_scrape_url() {
1032 let response = SPIDER_CLIENT
1033 .scrape_url("https://example.com", None, "application/json")
1034 .await;
1035 assert!(response.is_ok());
1036 }
1037
1038 #[tokio::test]
1039 async fn test_crawl_url() {
1040 let response = SPIDER_CLIENT
1041 .crawl_url(
1042 "https://example.com",
1043 None,
1044 false,
1045 "application/json",
1046 None::<fn(serde_json::Value)>,
1047 )
1048 .await;
1049 assert!(response.is_ok());
1050 }
1051
1052 #[tokio::test]
1053 #[ignore]
1054 async fn test_links() {
1055 let response: Result<serde_json::Value, Error> = SPIDER_CLIENT
1056 .links("https://example.com", None, false, "application/json")
1057 .await;
1058 assert!(response.is_ok());
1059 }
1060
1061 #[tokio::test]
1062 #[ignore]
1063 async fn test_screenshot() {
1064 let mut params = RequestParams::default();
1065 params.limit = Some(1);
1066
1067 let response = SPIDER_CLIENT
1068 .screenshot(
1069 "https://example.com",
1070 Some(params),
1071 false,
1072 "application/json",
1073 )
1074 .await;
1075 assert!(response.is_ok());
1076 }
1077
1078 #[tokio::test]
1094 #[ignore]
1095 async fn test_transform() {
1096 let data = vec![HashMap::from([(
1097 "<html><body><h1>Transformation</h1></body></html>".into(),
1098 "".into(),
1099 )])];
1100 let response = SPIDER_CLIENT
1101 .transform(data, None, false, "application/json")
1102 .await;
1103 assert!(response.is_ok());
1104 }
1105
1106 #[tokio::test]
1107 async fn test_create_signed_url() {
1108 let response = SPIDER_CLIENT
1109 .create_signed_url(Some("example.com"), None)
1110 .await;
1111 assert!(response.is_ok());
1112 }
1113
1114 #[tokio::test]
1115 async fn test_get_crawl_state() {
1116 let response = SPIDER_CLIENT
1117 .get_crawl_state("https://example.com", None, "application/json")
1118 .await;
1119 assert!(response.is_ok());
1120 }
1121
1122 #[tokio::test]
1123 async fn test_query() {
1124 let mut query = QueryRequest::default();
1125
1126 query.domain = Some("spider.cloud".into());
1127
1128 let response = SPIDER_CLIENT.query(&query).await;
1129 assert!(response.is_ok());
1130 }
1131
1132 #[tokio::test]
1133 async fn test_get_credits() {
1134 let response = SPIDER_CLIENT.get_credits().await;
1135 assert!(response.is_ok());
1136 }
1137}