1pub mod shapes;
64
65use backon::ExponentialBuilder;
66use backon::Retryable;
67use reqwest::Client;
68use reqwest::{Error, Response};
69use serde::Serialize;
70pub use shapes::{request::*, response::*};
71use std::collections::HashMap;
72use std::sync::OnceLock;
73use tokio_stream::StreamExt;
74
75static API_URL: OnceLock<String> = OnceLock::new();
76
77pub fn get_api_url() -> &'static str {
79 API_URL.get_or_init(|| {
80 std::env::var("SPIDER_API_URL").unwrap_or_else(|_| "https://api.spider.cloud".to_string())
81 })
82}
83
84#[derive(Debug, Default)]
86pub struct Spider {
87 pub api_key: String,
89 pub client: Client,
91}
92
93pub async fn handle_json(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
95 res.json().await
96}
97
98pub async fn handle_jsonl(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
100 let text = res.text().await?;
101 let lines = text
102 .lines()
103 .filter_map(|line| serde_json::from_str::<serde_json::Value>(line).ok())
104 .collect::<Vec<_>>();
105 Ok(serde_json::Value::Array(lines))
106}
107
108#[cfg(feature = "csv")]
110pub async fn handle_csv(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
111 use std::collections::HashMap;
112 let text = res.text().await?;
113 let mut rdr = csv::Reader::from_reader(text.as_bytes());
114 let records: Vec<HashMap<String, String>> = rdr.deserialize().filter_map(Result::ok).collect();
115
116 if let Ok(record) = serde_json::to_value(records) {
117 Ok(record)
118 } else {
119 Ok(serde_json::Value::String(text))
120 }
121}
122
123#[cfg(not(feature = "csv"))]
124pub async fn handle_csv(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
125 handle_text(res).await
126}
127
128pub async fn handle_text(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
130 Ok(serde_json::Value::String(
131 res.text().await.unwrap_or_default(),
132 ))
133}
134
135#[cfg(feature = "csv")]
137pub async fn handle_xml(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
138 let text = res.text().await?;
139 match quick_xml::de::from_str::<serde_json::Value>(&text) {
140 Ok(val) => Ok(val),
141 Err(_) => Ok(serde_json::Value::String(text)),
142 }
143}
144
145#[cfg(not(feature = "csv"))]
146pub async fn handle_xml(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
148 handle_text(res).await
149}
150
151pub async fn parse_response(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
152 let content_type = res
153 .headers()
154 .get(reqwest::header::CONTENT_TYPE)
155 .and_then(|v| v.to_str().ok())
156 .unwrap_or_default()
157 .to_ascii_lowercase();
158
159 if content_type.contains("json") && !content_type.contains("jsonl") {
160 handle_json(res).await
161 } else if content_type.contains("jsonl") || content_type.contains("ndjson") {
162 handle_jsonl(res).await
163 } else if content_type.contains("csv") {
164 handle_csv(res).await
165 } else if content_type.contains("xml") {
166 handle_xml(res).await
167 } else {
168 handle_text(res).await
169 }
170}
171
172impl Spider {
173 pub fn new(api_key: Option<String>) -> Result<Self, &'static str> {
183 let api_key = api_key.or_else(|| std::env::var("SPIDER_API_KEY").ok());
184
185 match api_key {
186 Some(key) => Ok(Self {
187 api_key: key,
188 client: Client::new(),
189 }),
190 None => Err("No API key provided"),
191 }
192 }
193
194 pub fn new_with_client(api_key: Option<String>, client: Client) -> Result<Self, &'static str> {
205 let api_key = api_key.or_else(|| std::env::var("SPIDER_API_KEY").ok());
206
207 match api_key {
208 Some(key) => Ok(Self {
209 api_key: key,
210 client,
211 }),
212 None => Err("No API key provided"),
213 }
214 }
215
216 async fn api_post_base(
229 &self,
230 endpoint: &str,
231 data: impl Serialize + Sized + std::fmt::Debug,
232 content_type: &str,
233 ) -> Result<Response, Error> {
234 let url: String = format!("{}/{}", get_api_url(), endpoint);
235
236 self.client
237 .post(&url)
238 .header(
239 "User-Agent",
240 format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
241 )
242 .header("Content-Type", content_type)
243 .header("Authorization", format!("Bearer {}", self.api_key))
244 .json(&data)
245 .send()
246 .await
247 }
248
249 pub async fn api_post(
262 &self,
263 endpoint: &str,
264 data: impl Serialize + std::fmt::Debug + Clone + Send + Sync,
265 content_type: &str,
266 ) -> Result<Response, Error> {
267 let fetch = || async {
268 self.api_post_base(endpoint, data.to_owned(), content_type)
269 .await
270 };
271
272 fetch
273 .retry(ExponentialBuilder::default().with_max_times(5))
274 .when(|err: &reqwest::Error| {
275 if let Some(status) = err.status() {
276 status.is_server_error()
277 } else {
278 err.is_timeout()
279 }
280 })
281 .await
282 }
283
284 async fn api_get_base<T: Serialize>(
294 &self,
295 endpoint: &str,
296 query_params: Option<&T>,
297 ) -> Result<serde_json::Value, reqwest::Error> {
298 let url = format!("{}/{}", get_api_url(), endpoint);
299 let res = self
300 .client
301 .get(&url)
302 .query(&query_params)
303 .header(
304 "User-Agent",
305 format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
306 )
307 .header("Content-Type", "application/json")
308 .header("Authorization", format!("Bearer {}", self.api_key))
309 .send()
310 .await?;
311 parse_response(res).await
312 }
313
314 pub async fn api_get<T: Serialize>(
324 &self,
325 endpoint: &str,
326 query_params: Option<&T>,
327 ) -> Result<serde_json::Value, reqwest::Error> {
328 let fetch = || async { self.api_get_base(endpoint, query_params.to_owned()).await };
329
330 fetch
331 .retry(ExponentialBuilder::default().with_max_times(5))
332 .when(|err: &reqwest::Error| {
333 if let Some(status) = err.status() {
334 status.is_server_error()
335 } else {
336 err.is_timeout()
337 }
338 })
339 .await
340 }
341
342 async fn api_delete_base(
355 &self,
356 endpoint: &str,
357 params: Option<HashMap<String, serde_json::Value>>,
358 ) -> Result<Response, Error> {
359 let url = format!("{}/v1/{}", get_api_url(), endpoint);
360 let request_builder = self
361 .client
362 .delete(&url)
363 .header(
364 "User-Agent",
365 format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
366 )
367 .header("Content-Type", "application/json")
368 .header("Authorization", format!("Bearer {}", self.api_key));
369
370 let request_builder = if let Some(params) = params {
371 request_builder.json(¶ms)
372 } else {
373 request_builder
374 };
375
376 request_builder.send().await
377 }
378
379 pub async fn api_delete(
392 &self,
393 endpoint: &str,
394 params: Option<HashMap<String, serde_json::Value>>,
395 ) -> Result<Response, Error> {
396 let fetch = || async { self.api_delete_base(endpoint, params.to_owned()).await };
397
398 fetch
399 .retry(ExponentialBuilder::default().with_max_times(5))
400 .when(|err: &reqwest::Error| {
401 if let Some(status) = err.status() {
402 status.is_server_error()
403 } else {
404 err.is_timeout()
405 }
406 })
407 .await
408 }
409
410 pub async fn scrape_url(
423 &self,
424 url: &str,
425 params: Option<RequestParams>,
426 content_type: &str,
427 ) -> Result<serde_json::Value, reqwest::Error> {
428 let mut data = HashMap::new();
429
430 if let Ok(params) = serde_json::to_value(params) {
431 if let Some(ref p) = params.as_object() {
432 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
433 }
434 }
435
436 if !url.is_empty() {
437 data.insert(
438 "url".to_string(),
439 serde_json::Value::String(url.to_string()),
440 );
441 }
442
443 let res = self.api_post("scrape", data, content_type).await?;
444 parse_response(res).await
445 }
446
447 pub async fn multi_scrape_url(
460 &self,
461 params: Option<Vec<RequestParams>>,
462 content_type: &str,
463 ) -> Result<serde_json::Value, reqwest::Error> {
464 let mut data = HashMap::new();
465
466 if let Ok(mut params) = serde_json::to_value(params) {
467 if let Some(obj) = params.as_object_mut() {
468 obj.insert("limit".to_string(), serde_json::Value::Number(1.into()));
469 data.extend(obj.iter().map(|(k, v)| (k.clone(), v.clone())));
470 }
471 }
472 let res = self.api_post("scrape", data, content_type).await?;
473 parse_response(res).await
474 }
475
476 pub async fn crawl_url(
490 &self,
491 url: &str,
492 params: Option<RequestParams>,
493 stream: bool,
494 content_type: &str,
495 callback: Option<impl Fn(serde_json::Value) + Send>,
496 ) -> Result<serde_json::Value, reqwest::Error> {
497 use tokio_util::codec::{FramedRead, LinesCodec};
498
499 let mut data = HashMap::new();
500
501 if let Ok(params) = serde_json::to_value(params) {
502 if let Some(ref p) = params.as_object() {
503 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
504 }
505 }
506
507 data.insert("url".into(), serde_json::Value::String(url.to_string()));
508
509 let res = self.api_post("crawl", data, content_type).await?;
510
511 if stream {
512 if let Some(callback) = callback {
513 let stream = res.bytes_stream();
514
515 let stream_reader = tokio_util::io::StreamReader::new(
516 stream
517 .map(|r| r.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))),
518 );
519
520 let mut lines = FramedRead::new(stream_reader, LinesCodec::new());
521
522 while let Some(line_result) = lines.next().await {
523 match line_result {
524 Ok(line) => match serde_json::from_str::<serde_json::Value>(&line) {
525 Ok(value) => {
526 callback(value);
527 }
528 Err(_e) => {
529 continue;
530 }
531 },
532 Err(_e) => return Ok(serde_json::Value::Null),
533 }
534 }
535
536 Ok(serde_json::Value::Null)
537 } else {
538 Ok(serde_json::Value::Null)
539 }
540 } else {
541 parse_response(res).await
542 }
543 }
544
545 pub async fn multi_crawl_url(
559 &self,
560 params: Option<Vec<RequestParams>>,
561 stream: bool,
562 content_type: &str,
563 callback: Option<impl Fn(serde_json::Value) + Send>,
564 ) -> Result<serde_json::Value, reqwest::Error> {
565 use tokio_util::codec::{FramedRead, LinesCodec};
566
567 let res = self.api_post("crawl", params, content_type).await?;
568
569 if stream {
570 if let Some(callback) = callback {
571 let stream = res.bytes_stream();
572
573 let stream_reader = tokio_util::io::StreamReader::new(
574 stream
575 .map(|r| r.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))),
576 );
577
578 let mut lines = FramedRead::new(stream_reader, LinesCodec::new());
579
580 while let Some(line_result) = lines.next().await {
581 match line_result {
582 Ok(line) => match serde_json::from_str::<serde_json::Value>(&line) {
583 Ok(value) => {
584 callback(value);
585 }
586 Err(_e) => {
587 continue;
588 }
589 },
590 Err(_e) => return Ok(serde_json::Value::Null),
591 }
592 }
593
594 Ok(serde_json::Value::Null)
595 } else {
596 Ok(serde_json::Value::Null)
597 }
598 } else {
599 parse_response(res).await
600 }
601 }
602
603 pub async fn links(
616 &self,
617 url: &str,
618 params: Option<RequestParams>,
619 _stream: bool,
620 content_type: &str,
621 ) -> Result<serde_json::Value, reqwest::Error> {
622 let mut data = HashMap::new();
623
624 if let Ok(params) = serde_json::to_value(params) {
625 if let Some(ref p) = params.as_object() {
626 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
627 }
628 }
629
630 data.insert("url".into(), serde_json::Value::String(url.to_string()));
631
632 let res = self.api_post("links", data, content_type).await?;
633 parse_response(res).await
634 }
635
636 pub async fn multi_links(
649 &self,
650 params: Option<Vec<RequestParams>>,
651 _stream: bool,
652 content_type: &str,
653 ) -> Result<serde_json::Value, reqwest::Error> {
654 let res = self.api_post("links", params, content_type).await?;
655 parse_response(res).await
656 }
657
658 pub async fn screenshot(
671 &self,
672 url: &str,
673 params: Option<RequestParams>,
674 _stream: bool,
675 content_type: &str,
676 ) -> Result<serde_json::Value, reqwest::Error> {
677 let mut data = HashMap::new();
678
679 if let Ok(params) = serde_json::to_value(params) {
680 if let Some(ref p) = params.as_object() {
681 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
682 }
683 }
684
685 data.insert("url".into(), serde_json::Value::String(url.to_string()));
686
687 let res = self.api_post("screenshot", data, content_type).await?;
688 parse_response(res).await
689 }
690
691 pub async fn multi_screenshot(
704 &self,
705 params: Option<Vec<RequestParams>>,
706 _stream: bool,
707 content_type: &str,
708 ) -> Result<serde_json::Value, reqwest::Error> {
709 let res = self.api_post("screenshot", params, content_type).await?;
710 parse_response(res).await
711 }
712
713 pub async fn search(
726 &self,
727 q: &str,
728 params: Option<SearchRequestParams>,
729 _stream: bool,
730 content_type: &str,
731 ) -> Result<serde_json::Value, reqwest::Error> {
732 let body = match params {
733 Some(mut params) => {
734 params.search = q.to_string();
735 params
736 }
737 _ => {
738 let mut params = SearchRequestParams::default();
739 params.search = q.to_string();
740 params
741 }
742 };
743
744 let res = self.api_post("search", body, content_type).await?;
745
746 parse_response(res).await
747 }
748
749 pub async fn multi_search(
762 &self,
763 params: Option<Vec<SearchRequestParams>>,
764 content_type: &str,
765 ) -> Result<serde_json::Value, reqwest::Error> {
766 let res = self.api_post("search", params, content_type).await?;
767 parse_response(res).await
768 }
769
770 pub async fn unblock_url(
783 &self,
784 url: &str,
785 params: Option<RequestParams>,
786 content_type: &str,
787 ) -> Result<serde_json::Value, reqwest::Error> {
788 let mut data = HashMap::new();
789
790 if let Ok(params) = serde_json::to_value(params) {
791 if let Some(ref p) = params.as_object() {
792 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
793 }
794 }
795
796 if !url.is_empty() {
797 data.insert(
798 "url".to_string(),
799 serde_json::Value::String(url.to_string()),
800 );
801 }
802
803 let res = self.api_post("unblocker", data, content_type).await?;
804 parse_response(res).await
805 }
806
807 pub async fn multi_unblock_url(
820 &self,
821 params: Option<Vec<RequestParams>>,
822 content_type: &str,
823 ) -> Result<serde_json::Value, reqwest::Error> {
824 let mut data = HashMap::new();
825
826 if let Ok(mut params) = serde_json::to_value(params) {
827 if let Some(obj) = params.as_object_mut() {
828 obj.insert("limit".to_string(), serde_json::Value::Number(1.into()));
829 data.extend(obj.iter().map(|(k, v)| (k.clone(), v.clone())));
830 }
831 }
832 let res = self.api_post("unblocker", data, content_type).await?;
833 parse_response(res).await
834 }
835
836 pub async fn transform(
849 &self,
850 data: Vec<HashMap<&str, &str>>,
851 params: Option<TransformParams>,
852 _stream: bool,
853 content_type: &str,
854 ) -> Result<serde_json::Value, reqwest::Error> {
855 let mut payload = HashMap::new();
856
857 if let Ok(params) = serde_json::to_value(params) {
858 if let Some(ref p) = params.as_object() {
859 payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
860 }
861 }
862
863 if let Ok(d) = serde_json::to_value(data) {
864 payload.insert("data".into(), d);
865 }
866
867 let res = self.api_post("transform", payload, content_type).await?;
868
869 parse_response(res).await
870 }
871
872 pub async fn get_credits(&self) -> Result<serde_json::Value, reqwest::Error> {
874 self.api_get::<serde_json::Value>("data/credits", None)
875 .await
876 }
877
878 pub async fn data_post(
880 &self,
881 table: &str,
882 data: Option<RequestParams>,
883 ) -> Result<serde_json::Value, reqwest::Error> {
884 let res = self
885 .api_post(&format!("data/{}", table), data, "application/json")
886 .await?;
887 parse_response(res).await
888 }
889
890 pub async fn data_get(
892 &self,
893 table: &str,
894 params: Option<RequestParams>,
895 ) -> Result<serde_json::Value, reqwest::Error> {
896 let mut payload = HashMap::new();
897
898 if let Some(params) = params {
899 if let Ok(p) = serde_json::to_value(params) {
900 if let Some(o) = p.as_object() {
901 payload.extend(o.iter().map(|(k, v)| (k.as_str(), v.clone())));
902 }
903 }
904 }
905
906 let res = self
907 .api_get::<serde_json::Value>(&format!("data/{}", table), None)
908 .await?;
909 Ok(res)
910 }
911}
912
913#[cfg(test)]
914mod tests {
915 use super::*;
916 use dotenv::dotenv;
917 use lazy_static::lazy_static;
918 use reqwest::ClientBuilder;
919
920 lazy_static! {
921 static ref SPIDER_CLIENT: Spider = {
922 dotenv().ok();
923 let client = ClientBuilder::new();
924 let client = client.user_agent("SpiderBot").build().unwrap();
925
926 Spider::new_with_client(None, client).expect("client to build")
927 };
928 }
929
930 #[tokio::test]
931 #[ignore]
932 async fn test_scrape_url() {
933 let response = SPIDER_CLIENT
934 .scrape_url("https://example.com", None, "application/json")
935 .await;
936 assert!(response.is_ok());
937 }
938
939 #[tokio::test]
940 async fn test_crawl_url() {
941 let response = SPIDER_CLIENT
942 .crawl_url(
943 "https://example.com",
944 None,
945 false,
946 "application/json",
947 None::<fn(serde_json::Value)>,
948 )
949 .await;
950 assert!(response.is_ok());
951 }
952
953 #[tokio::test]
954 #[ignore]
955 async fn test_links() {
956 let response: Result<serde_json::Value, Error> = SPIDER_CLIENT
957 .links("https://example.com", None, false, "application/json")
958 .await;
959 assert!(response.is_ok());
960 }
961
962 #[tokio::test]
963 #[ignore]
964 async fn test_screenshot() {
965 let mut params = RequestParams::default();
966 params.limit = Some(1);
967
968 let response = SPIDER_CLIENT
969 .screenshot(
970 "https://example.com",
971 Some(params),
972 false,
973 "application/json",
974 )
975 .await;
976 assert!(response.is_ok());
977 }
978
979 #[tokio::test]
995 #[ignore]
996 async fn test_transform() {
997 let data = vec![HashMap::from([(
998 "<html><body><h1>Transformation</h1></body></html>".into(),
999 "".into(),
1000 )])];
1001 let response = SPIDER_CLIENT
1002 .transform(data, None, false, "application/json")
1003 .await;
1004 assert!(response.is_ok());
1005 }
1006
1007 #[tokio::test]
1008 async fn test_get_credits() {
1009 let response = SPIDER_CLIENT.get_credits().await;
1010 assert!(response.is_ok());
1011 }
1012}