1pub mod shapes;
64
65use backon::ExponentialBuilder;
66use backon::Retryable;
67use reqwest::Client;
68use reqwest::{Error, Response};
69use serde::Serialize;
70use std::collections::HashMap;
71use tokio_stream::StreamExt;
72pub use shapes::{request::*, response::*};
73use std::sync::OnceLock;
74
75static API_URL: OnceLock<String> = OnceLock::new();
76
77pub fn get_api_url() -> &'static str {
79 API_URL.get_or_init(|| {
80 std::env::var("SPIDER_API_URL").unwrap_or_else(|_| "https://api.spider.cloud".to_string())
81 })
82}
83
84#[derive(Debug, Default)]
86pub struct Spider {
87 pub api_key: String,
89 pub client: Client,
91}
92
93pub async fn handle_json(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
95 res.json().await
96}
97
98pub async fn handle_jsonl(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
100 let text = res.text().await?;
101 let lines = text
102 .lines()
103 .filter_map(|line| serde_json::from_str::<serde_json::Value>(line).ok())
104 .collect::<Vec<_>>();
105 Ok(serde_json::Value::Array(lines))
106}
107
108#[cfg(feature = "csv")]
110pub async fn handle_csv(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
111 use std::collections::HashMap;
112 let text = res.text().await?;
113 let mut rdr = csv::Reader::from_reader(text.as_bytes());
114 let records: Vec<HashMap<String, String>> = rdr.deserialize().filter_map(Result::ok).collect();
115
116 if let Ok(record) = serde_json::to_value(records) {
117 Ok(record)
118 } else {
119 Ok(serde_json::Value::String(text))
120 }
121}
122
123#[cfg(not(feature = "csv"))]
124pub async fn handle_csv(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
125 handle_text(res).await
126}
127
128pub async fn handle_text(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
130 Ok(serde_json::Value::String(
131 res.text().await.unwrap_or_default(),
132 ))
133}
134
135#[cfg(feature = "csv")]
137pub async fn handle_xml(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
138 let text = res.text().await?;
139 match quick_xml::de::from_str::<serde_json::Value>(&text) {
140 Ok(val) => Ok(val),
141 Err(_) => Ok(serde_json::Value::String(text)),
142 }
143}
144
145#[cfg(not(feature = "csv"))]
146pub async fn handle_xml(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
148 handle_text(res).await
149}
150
151pub async fn parse_response(res: reqwest::Response) -> Result<serde_json::Value, reqwest::Error> {
152 let content_type = res
153 .headers()
154 .get(reqwest::header::CONTENT_TYPE)
155 .and_then(|v| v.to_str().ok())
156 .unwrap_or_default()
157 .to_ascii_lowercase();
158
159 if content_type.contains("json") && !content_type.contains("jsonl") {
160 handle_json(res).await
161 } else if content_type.contains("jsonl") || content_type.contains("ndjson") {
162 handle_jsonl(res).await
163 } else if content_type.contains("csv") {
164 handle_csv(res).await
165 } else if content_type.contains("xml") {
166 handle_xml(res).await
167 } else {
168 handle_text(res).await
169 }
170}
171
172impl Spider {
173 pub fn new(api_key: Option<String>) -> Result<Self, &'static str> {
183 let api_key = api_key.or_else(|| std::env::var("SPIDER_API_KEY").ok());
184
185 match api_key {
186 Some(key) => Ok(Self {
187 api_key: key,
188 client: Client::new(),
189 }),
190 None => Err("No API key provided"),
191 }
192 }
193
194 pub fn new_with_client(api_key: Option<String>, client: Client) -> Result<Self, &'static str> {
205 let api_key = api_key.or_else(|| std::env::var("SPIDER_API_KEY").ok());
206
207 match api_key {
208 Some(key) => Ok(Self {
209 api_key: key,
210 client,
211 }),
212 None => Err("No API key provided"),
213 }
214 }
215
216 async fn api_post_base(
229 &self,
230 endpoint: &str,
231 data: impl Serialize + Sized + std::fmt::Debug,
232 content_type: &str,
233 ) -> Result<Response, Error> {
234 let url: String = format!("{}/{}", get_api_url(), endpoint);
235
236 self.client
237 .post(&url)
238 .header(
239 "User-Agent",
240 format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
241 )
242 .header("Content-Type", content_type)
243 .header("Authorization", format!("Bearer {}", self.api_key))
244 .json(&data)
245 .send()
246 .await
247 }
248
249 pub async fn api_post(
262 &self,
263 endpoint: &str,
264 data: impl Serialize + std::fmt::Debug + Clone + Send + Sync,
265 content_type: &str,
266 ) -> Result<Response, Error> {
267 let fetch = || async {
268 self.api_post_base(endpoint, data.to_owned(), content_type)
269 .await
270 };
271
272 fetch
273 .retry(ExponentialBuilder::default().with_max_times(5))
274 .when(|err: &reqwest::Error| {
275 if let Some(status) = err.status() {
276 status.is_server_error()
277 } else {
278 err.is_timeout()
279 }
280 })
281 .await
282 }
283
284 async fn api_get_base<T: Serialize>(
294 &self,
295 endpoint: &str,
296 query_params: Option<&T>,
297 ) -> Result<serde_json::Value, reqwest::Error> {
298 let url = format!("{}/{}", get_api_url(), endpoint);
299 let res = self
300 .client
301 .get(&url)
302 .query(&query_params)
303 .header(
304 "User-Agent",
305 format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
306 )
307 .header("Content-Type", "application/json")
308 .header("Authorization", format!("Bearer {}", self.api_key))
309 .send()
310 .await?;
311 parse_response(res).await
312 }
313
314 pub async fn api_get<T: Serialize>(
324 &self,
325 endpoint: &str,
326 query_params: Option<&T>,
327 ) -> Result<serde_json::Value, reqwest::Error> {
328 let fetch = || async { self.api_get_base(endpoint, query_params.to_owned()).await };
329
330 fetch
331 .retry(ExponentialBuilder::default().with_max_times(5))
332 .when(|err: &reqwest::Error| {
333 if let Some(status) = err.status() {
334 status.is_server_error()
335 } else {
336 err.is_timeout()
337 }
338 })
339 .await
340 }
341
342 async fn api_delete_base(
355 &self,
356 endpoint: &str,
357 params: Option<HashMap<String, serde_json::Value>>,
358 ) -> Result<Response, Error> {
359 let url = format!("{}/v1/{}", get_api_url(), endpoint);
360 let request_builder = self
361 .client
362 .delete(&url)
363 .header(
364 "User-Agent",
365 format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")),
366 )
367 .header("Content-Type", "application/json")
368 .header("Authorization", format!("Bearer {}", self.api_key));
369
370 let request_builder = if let Some(params) = params {
371 request_builder.json(¶ms)
372 } else {
373 request_builder
374 };
375
376 request_builder.send().await
377 }
378
379 pub async fn api_delete(
392 &self,
393 endpoint: &str,
394 params: Option<HashMap<String, serde_json::Value>>,
395 ) -> Result<Response, Error> {
396 let fetch = || async { self.api_delete_base(endpoint, params.to_owned()).await };
397
398 fetch
399 .retry(ExponentialBuilder::default().with_max_times(5))
400 .when(|err: &reqwest::Error| {
401 if let Some(status) = err.status() {
402 status.is_server_error()
403 } else {
404 err.is_timeout()
405 }
406 })
407 .await
408 }
409
410 pub async fn scrape_url(
423 &self,
424 url: &str,
425 params: Option<RequestParams>,
426 content_type: &str,
427 ) -> Result<serde_json::Value, reqwest::Error> {
428 let mut data = HashMap::new();
429
430 if let Ok(params) = serde_json::to_value(params) {
431 if let Some(ref p) = params.as_object() {
432 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
433 }
434 }
435
436 if !url.is_empty() {
437 data.insert(
438 "url".to_string(),
439 serde_json::Value::String(url.to_string()),
440 );
441 }
442
443 data.insert("limit".to_string(), serde_json::Value::Number(1.into()));
444
445 let res = self.api_post("crawl", data, content_type).await?;
446 parse_response(res).await
447 }
448
449 pub async fn multi_scrape_url(
462 &self,
463 params: Option<Vec<RequestParams>>,
464 content_type: &str,
465 ) -> Result<serde_json::Value, reqwest::Error> {
466 let mut data = HashMap::new();
467
468if let Ok(mut params) = serde_json::to_value(params) {
469 if let Some(obj) = params.as_object_mut() {
470 obj.insert("limit".to_string(), serde_json::Value::Number(1.into()));
471 data.extend(obj.iter().map(|(k, v)| (k.clone(), v.clone())));
472 }
473}
474 let res = self.api_post("crawl", data, content_type).await?;
475 parse_response(res).await
476 }
477
478
479 pub async fn crawl_url(
493 &self,
494 url: &str,
495 params: Option<RequestParams>,
496 stream: bool,
497 content_type: &str,
498 callback: Option<impl Fn(serde_json::Value) + Send>,
499 ) -> Result<serde_json::Value, reqwest::Error> {
500 use tokio_util::codec::{FramedRead, LinesCodec};
501
502 let mut data = HashMap::new();
503
504 if let Ok(params) = serde_json::to_value(params) {
505 if let Some(ref p) = params.as_object() {
506 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
507 }
508 }
509
510 data.insert("url".into(), serde_json::Value::String(url.to_string()));
511
512 let res = self.api_post("crawl", data, content_type).await?;
513
514 if stream {
515 if let Some(callback) = callback {
516 let stream = res.bytes_stream();
517
518 let stream_reader = tokio_util::io::StreamReader::new(
519 stream
520 .map(|r| r.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))),
521 );
522
523 let mut lines = FramedRead::new(stream_reader, LinesCodec::new());
524
525 while let Some(line_result) = lines.next().await {
526 match line_result {
527 Ok(line) => match serde_json::from_str::<serde_json::Value>(&line) {
528 Ok(value) => {
529 callback(value);
530 }
531 Err(_e) => {
532 continue;
533 }
534 },
535 Err(_e) => return Ok(serde_json::Value::Null),
536 }
537 }
538
539 Ok(serde_json::Value::Null)
540 } else {
541 Ok(serde_json::Value::Null)
542 }
543 } else {
544 parse_response(res).await
545 }
546 }
547
548 pub async fn multi_crawl_url(
562 &self,
563 params: Option<Vec<RequestParams>>,
564 stream: bool,
565 content_type: &str,
566 callback: Option<impl Fn(serde_json::Value) + Send>,
567 ) -> Result<serde_json::Value, reqwest::Error> {
568 use tokio_util::codec::{FramedRead, LinesCodec};
569
570
571 let res = self.api_post("crawl", params, content_type).await?;
572
573 if stream {
574 if let Some(callback) = callback {
575 let stream = res.bytes_stream();
576
577 let stream_reader = tokio_util::io::StreamReader::new(
578 stream
579 .map(|r| r.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))),
580 );
581
582 let mut lines = FramedRead::new(stream_reader, LinesCodec::new());
583
584 while let Some(line_result) = lines.next().await {
585 match line_result {
586 Ok(line) => match serde_json::from_str::<serde_json::Value>(&line) {
587 Ok(value) => {
588 callback(value);
589 }
590 Err(_e) => {
591 continue;
592 }
593 },
594 Err(_e) => return Ok(serde_json::Value::Null),
595 }
596 }
597
598 Ok(serde_json::Value::Null)
599 } else {
600 Ok(serde_json::Value::Null)
601 }
602 } else {
603 parse_response(res).await
604 }
605 }
606
607
608 pub async fn links(
621 &self,
622 url: &str,
623 params: Option<RequestParams>,
624 _stream: bool,
625 content_type: &str,
626 ) -> Result<serde_json::Value, reqwest::Error> {
627 let mut data = HashMap::new();
628
629 if let Ok(params) = serde_json::to_value(params) {
630 if let Some(ref p) = params.as_object() {
631 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
632 }
633 }
634
635 data.insert("url".into(), serde_json::Value::String(url.to_string()));
636
637 let res = self.api_post("links", data, content_type).await?;
638 parse_response(res).await
639 }
640
641
642 pub async fn multi_links(
655 &self,
656 params: Option<Vec<RequestParams>>,
657 _stream: bool,
658 content_type: &str,
659 ) -> Result<serde_json::Value, reqwest::Error> {
660 let res = self.api_post("links", params, content_type).await?;
661 parse_response(res).await
662 }
663
664
665 pub async fn screenshot(
678 &self,
679 url: &str,
680 params: Option<RequestParams>,
681 _stream: bool,
682 content_type: &str,
683 ) -> Result<serde_json::Value, reqwest::Error> {
684 let mut data = HashMap::new();
685
686 if let Ok(params) = serde_json::to_value(params) {
687 if let Some(ref p) = params.as_object() {
688 data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
689 }
690 }
691
692 data.insert("url".into(), serde_json::Value::String(url.to_string()));
693
694 let res = self.api_post("screenshot", data, content_type).await?;
695 parse_response(res).await
696 }
697
698 pub async fn multi_screenshot(
711 &self,
712 params: Option<Vec<RequestParams>>,
713 _stream: bool,
714 content_type: &str,
715 ) -> Result<serde_json::Value, reqwest::Error> {
716 let res = self.api_post("screenshot", params, content_type).await?;
717 parse_response(res).await
718 }
719
720 pub async fn search(
733 &self,
734 q: &str,
735 params: Option<SearchRequestParams>,
736 _stream: bool,
737 content_type: &str,
738 ) -> Result<serde_json::Value, reqwest::Error> {
739 let body = match params {
740 Some(mut params) => {
741 params.search = q.to_string();
742 params
743 }
744 _ => {
745 let mut params = SearchRequestParams::default();
746 params.search = q.to_string();
747 params
748 }
749 };
750
751 let res = self.api_post("search", body, content_type).await?;
752
753 parse_response(res).await
754 }
755
756 pub async fn multi_search(
769 &self,
770 params: Option<Vec<SearchRequestParams>>,
771 content_type: &str,
772 ) -> Result<serde_json::Value, reqwest::Error> {
773 let res = self.api_post("search", params, content_type).await?;
774 parse_response(res).await
775 }
776
777 pub async fn transform(
790 &self,
791 data: Vec<HashMap<&str, &str>>,
792 params: Option<TransformParams>,
793 _stream: bool,
794 content_type: &str,
795 ) -> Result<serde_json::Value, reqwest::Error> {
796 let mut payload = HashMap::new();
797
798 if let Ok(params) = serde_json::to_value(params) {
799 if let Some(ref p) = params.as_object() {
800 payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone())));
801 }
802 }
803
804 if let Ok(d) = serde_json::to_value(data) {
805 payload.insert("data".into(), d);
806 }
807
808 let res = self.api_post("transform", payload, content_type).await?;
809
810 parse_response(res).await
811 }
812
813 pub async fn get_credits(&self) -> Result<serde_json::Value, reqwest::Error> {
815 self.api_get::<serde_json::Value>("data/credits", None)
816 .await
817 }
818
819 pub async fn data_post(
821 &self,
822 table: &str,
823 data: Option<RequestParams>,
824 ) -> Result<serde_json::Value, reqwest::Error> {
825 let res = self
826 .api_post(&format!("data/{}", table), data, "application/json")
827 .await?;
828 parse_response(res).await
829 }
830
831 pub async fn data_get(
833 &self,
834 table: &str,
835 params: Option<RequestParams>,
836 ) -> Result<serde_json::Value, reqwest::Error> {
837 let mut payload = HashMap::new();
838
839 if let Some(params) = params {
840 if let Ok(p) = serde_json::to_value(params) {
841 if let Some(o) = p.as_object() {
842 payload.extend(o.iter().map(|(k, v)| (k.as_str(), v.clone())));
843 }
844 }
845 }
846
847 let res = self
848 .api_get::<serde_json::Value>(&format!("data/{}", table), None)
849 .await?;
850 Ok(res)
851 }
852}
853
854#[cfg(test)]
855mod tests {
856 use super::*;
857 use dotenv::dotenv;
858 use lazy_static::lazy_static;
859 use reqwest::ClientBuilder;
860
861 lazy_static! {
862 static ref SPIDER_CLIENT: Spider = {
863 dotenv().ok();
864 let client = ClientBuilder::new();
865 let client = client.user_agent("SpiderBot").build().unwrap();
866
867 Spider::new_with_client(None, client).expect("client to build")
868 };
869 }
870
871 #[tokio::test]
872 #[ignore]
873 async fn test_scrape_url() {
874 let response = SPIDER_CLIENT
875 .scrape_url("https://example.com", None, "application/json")
876 .await;
877 assert!(response.is_ok());
878 }
879
880 #[tokio::test]
881 async fn test_crawl_url() {
882 let response = SPIDER_CLIENT
883 .crawl_url(
884 "https://example.com",
885 None,
886 false,
887 "application/json",
888 None::<fn(serde_json::Value)>,
889 )
890 .await;
891 assert!(response.is_ok());
892 }
893
894 #[tokio::test]
895 #[ignore]
896 async fn test_links() {
897 let response: Result<serde_json::Value, Error> = SPIDER_CLIENT
898 .links("https://example.com", None, false, "application/json")
899 .await;
900 assert!(response.is_ok());
901 }
902
903 #[tokio::test]
904 #[ignore]
905 async fn test_screenshot() {
906 let mut params = RequestParams::default();
907 params.limit = Some(1);
908
909 let response = SPIDER_CLIENT
910 .screenshot(
911 "https://example.com",
912 Some(params),
913 false,
914 "application/json",
915 )
916 .await;
917 assert!(response.is_ok());
918 }
919
920 #[tokio::test]
936 #[ignore]
937 async fn test_transform() {
938 let data = vec![HashMap::from([(
939 "<html><body><h1>Transformation</h1></body></html>".into(),
940 "".into(),
941 )])];
942 let response = SPIDER_CLIENT
943 .transform(data, None, false, "application/json")
944 .await;
945 assert!(response.is_ok());
946 }
947
948 #[tokio::test]
949 async fn test_get_credits() {
950 let response = SPIDER_CLIENT.get_credits().await;
951 assert!(response.is_ok());
952 }
953}