json_crawler/
lib.rs

1//! Library to crawl Json using the pointer syntax and return useful errors.
2//! Documentation is a work in progress.
3use error::ParseTarget;
4use serde::de::DeserializeOwned;
5use std::{fmt::Display, ops::ControlFlow, str::FromStr, sync::Arc};
6
7pub use error::{CrawlerError, CrawlerResult};
8pub use iter::*;
9// Currently the only way to create a crawler is from a serde_json::Value, so we
10// might as well re-export it.
11// doc(no_inline) means that the re-export will be clear in the docs.
12#[doc(no_inline)]
13pub use serde_json::Value;
14
15mod error;
16mod iter;
17
18/// Trait to represent a JsonCrawler that may own or borrow from the original
19/// `serde_json::Value`.
20pub trait JsonCrawler
21where
22    Self: Sized,
23{
24    type BorrowTo<'a>: JsonCrawler
25    where
26        Self: 'a;
27    type IterMut<'a>: Iterator<Item = Self::BorrowTo<'a>>
28    where
29        Self: 'a;
30    type IntoIter: Iterator<Item = Self>;
31    fn navigate_pointer(self, new_path: impl AsRef<str>) -> CrawlerResult<Self>;
32    fn navigate_index(self, index: usize) -> CrawlerResult<Self>;
33    fn borrow_pointer(&mut self, path: impl AsRef<str>) -> CrawlerResult<Self::BorrowTo<'_>>;
34    fn borrow_index(&mut self, index: usize) -> CrawlerResult<Self::BorrowTo<'_>>;
35    fn borrow_mut(&mut self) -> Self::BorrowTo<'_>;
36    fn try_into_iter(self) -> CrawlerResult<Self::IntoIter>;
37    fn try_iter_mut(&mut self) -> CrawlerResult<Self::IterMut<'_>>;
38    fn path_exists(&self, path: &str) -> bool;
39    fn get_path(&self) -> String;
40    fn get_source(&self) -> Arc<String>;
41    fn take_value<T: DeserializeOwned>(&mut self) -> CrawlerResult<T>;
42    fn take_value_pointer<T: DeserializeOwned>(
43        &mut self,
44        path: impl AsRef<str>,
45    ) -> CrawlerResult<T>;
46    /// For use when you want to try and take value that could be at multiple
47    /// valid locations. Returns an error message that notes that all valid
48    /// locations were attempted.
49    ///
50    /// # Usage
51    /// ```no_run
52    /// # use json_crawler::*;
53    /// # let mut crawler = JsonCrawlerOwned::new(String::new(), serde_json::Value::Null);
54    /// // Output will be an error that path should contain "header" and "headerName", if crawler contains neither.
55    /// let output: CrawlerResult<String> = crawler.take_value_pointers(&["header", "headerName"]);
56    /// ```
57    fn take_value_pointers<T: DeserializeOwned, S: AsRef<str>>(
58        &mut self,
59        paths: &[S],
60    ) -> CrawlerResult<T>;
61    /// For use when you want to apply some operations that return Option, but
62    /// still return an error with context if they fail. For convenience,
63    /// closure return type is fallible, allowing you to see the cause of the
64    /// error at the failure point as well, if you have it.
65    ///
66    /// # Usage
67    /// ```no_run
68    /// # use json_crawler::*;
69    /// # let mut crawler = JsonCrawlerOwned::new(String::new(), serde_json::Value::Null);
70    /// // Returns Ok(42) if crawler parses into 42.
71    /// // Returns parsing from string error, plus the message that output should be 42, if output fails to parse from string.
72    /// // Returns message that output should be 42, if output parses from string, but is not 42.
73    /// let forty_two: CrawlerResult<usize> = crawler.try_expect("Output should be 42", |crawler| {
74    ///     let num = crawler.take_and_parse_str::<usize>()?;
75    ///     if num == 42 {
76    ///         return Ok(Some(num));
77    ///     }
78    ///     Ok(None)
79    /// });
80    /// ```
81    fn try_expect<F, O>(&mut self, msg: impl ToString, f: F) -> CrawlerResult<O>
82    where
83        F: FnOnce(&mut Self) -> CrawlerResult<Option<O>>,
84    {
85        match f(self) {
86            Ok(Some(r)) => Ok(r),
87            Ok(None) => Err(CrawlerError::parsing(
88                self.get_path(),
89                self.get_source(),
90                crate::error::ParseTarget::Other(std::any::type_name::<O>().to_string()),
91                Some(msg.to_string()),
92            )),
93            // In this case, we've got a nested error, and should display both sets of context.
94            Err(e) => {
95                let msg = format!("Expected {} but encountered '{e}'", msg.to_string());
96                Err(CrawlerError::parsing(
97                    self.get_path(),
98                    self.get_source(),
99                    crate::error::ParseTarget::Other(std::any::type_name::<O>().to_string()),
100                    Some(msg),
101                ))
102            }
103        }
104    }
105    /// Take the value as a String, and apply FromStr to return the desired
106    /// type.
107    fn take_and_parse_str<F: FromStr>(&mut self) -> CrawlerResult<F>
108    where
109        F::Err: Display,
110    {
111        let as_string = self.take_value::<String>()?;
112        str::parse::<F>(as_string.as_str()).map_err(|e| {
113            CrawlerError::parsing(
114                self.get_path(),
115                self.get_source(),
116                crate::error::ParseTarget::Other(std::any::type_name::<F>().to_string()),
117                Some(format!("{e}")),
118            )
119        })
120    }
121    /// Try to apply each function in a list of functions, returning the first
122    /// Ok result, or the last Err result if none returned Ok.
123    ///
124    /// # Warning
125    /// If one of the functions mutates before failing, the mutation will still
126    /// be applied. Also, the mutations are applied sequentially - mutation 1
127    /// could impact mutation 2 for example.
128    fn try_functions<O>(
129        &mut self,
130        functions: Vec<fn(&mut Self) -> CrawlerResult<O>>,
131    ) -> CrawlerResult<O> {
132        let original_path = self.get_path();
133        let source_ptr = self.get_source();
134        let output = functions.into_iter().try_fold(Vec::new(), |mut acc, f| {
135            let res = f(self);
136            let e = match res {
137                Ok(ret) => return ControlFlow::Break(ret),
138                Err(e) => e,
139            };
140            acc.push(e);
141            ControlFlow::Continue(acc)
142        });
143        match output {
144            ControlFlow::Continue(c) => Err(CrawlerError::multiple_parse_error(
145                original_path,
146                source_ptr,
147                c,
148            )),
149            ControlFlow::Break(b) => Ok(b),
150        }
151    }
152}
153
154#[derive(Clone, PartialEq, Debug)]
155pub struct JsonCrawlerOwned {
156    // Source is wrapped in an Arc as we are going to pass ownership when returning an error and we
157    // want it to be thread safe.
158    source: Arc<String>,
159    crawler: serde_json::Value,
160    path: PathList,
161}
162pub struct JsonCrawlerBorrowed<'a> {
163    // Source is wrapped in an Arc as we are going to pass ownership when returning an error and we
164    // want it to be thread safe.
165    source: Arc<String>,
166    crawler: &'a mut serde_json::Value,
167    path: PathList,
168}
169
170impl JsonCrawlerOwned {
171    /// Create a new JsonCrawler, where 'json' is the `serde_json::Value` that
172    /// you wish to crawl and 'source' represents a serialized copy of the same
173    /// `serde_json::Value`.
174    // TODO: Safer constructor that avoids 'source' being out of sync with 'json'
175    pub fn new(source: String, json: serde_json::Value) -> Self {
176        Self {
177            source: Arc::new(source),
178            crawler: json,
179            path: Default::default(),
180        }
181    }
182}
183
184impl<'a> JsonCrawler for JsonCrawlerBorrowed<'a> {
185    type BorrowTo<'b>
186        = JsonCrawlerBorrowed<'b>
187    where
188        Self: 'b;
189    type IterMut<'b>
190        = JsonCrawlerArrayIterMut<'b>
191    where
192        Self: 'b;
193    type IntoIter = JsonCrawlerArrayIterMut<'a>;
194    fn take_value_pointer<T: DeserializeOwned>(
195        &mut self,
196        path: impl AsRef<str>,
197    ) -> CrawlerResult<T> {
198        let mut path_clone = self.path.clone();
199        path_clone.push(JsonPath::pointer(path.as_ref()));
200        serde_json::from_value(
201            self.crawler
202                .pointer_mut(path.as_ref())
203                .map(|v| v.take())
204                .ok_or_else(|| CrawlerError::navigation(&path_clone, self.source.clone()))?,
205        )
206        .map_err(|e| {
207            CrawlerError::parsing(
208                &path_clone,
209                self.source.clone(),
210                ParseTarget::Other(std::any::type_name::<T>().to_string()),
211                Some(format!("{e}")),
212            )
213        })
214    }
215    fn borrow_pointer(&mut self, path: impl AsRef<str>) -> CrawlerResult<Self::BorrowTo<'_>> {
216        let mut path_clone = self.path.clone();
217        path_clone.push(JsonPath::pointer(path.as_ref()));
218        let crawler = self
219            .crawler
220            .pointer_mut(path.as_ref())
221            .ok_or_else(|| CrawlerError::navigation(&path_clone, self.source.clone()))?;
222        Ok(JsonCrawlerBorrowed {
223            source: self.source.clone(),
224            crawler,
225            path: path_clone,
226        })
227    }
228    fn navigate_pointer(self, path: impl AsRef<str>) -> CrawlerResult<Self> {
229        let mut path_clone = self.path.clone();
230        path_clone.push(JsonPath::pointer(path.as_ref()));
231        let crawler = self
232            .crawler
233            .pointer_mut(path.as_ref())
234            .ok_or_else(|| CrawlerError::navigation(&path_clone, self.source.clone()))?;
235        Ok(Self {
236            source: self.source,
237            crawler,
238            path: path_clone,
239        })
240    }
241    fn try_into_iter(self) -> CrawlerResult<Self::IntoIter> {
242        let json_array = self.crawler.as_array_mut().ok_or_else(|| {
243            CrawlerError::parsing(&self.path, self.source.clone(), ParseTarget::Array, None)
244        })?;
245        let path_clone = self.path.clone();
246        let cur_back = json_array.len().saturating_sub(1);
247        Ok(JsonCrawlerArrayIterMut {
248            source: self.source,
249            array: json_array.iter_mut(),
250            path: path_clone,
251            cur_front: 0,
252            cur_back,
253        })
254    }
255    fn try_iter_mut(&mut self) -> CrawlerResult<Self::IterMut<'_>> {
256        let json_array = self.crawler.as_array_mut().ok_or_else(|| {
257            CrawlerError::parsing(&self.path, self.source.clone(), ParseTarget::Array, None)
258        })?;
259        let path_clone = self.path.clone();
260        let cur_back = json_array.len().saturating_sub(1);
261        Ok(JsonCrawlerArrayIterMut {
262            source: self.source.clone(),
263            array: json_array.iter_mut(),
264            path: path_clone,
265            cur_front: 0,
266            cur_back,
267        })
268    }
269    fn navigate_index(self, index: usize) -> CrawlerResult<Self> {
270        let mut path_clone = self.path.clone();
271        path_clone.push(JsonPath::IndexNum(index));
272        let crawler = self
273            .crawler
274            .get_mut(index)
275            .ok_or_else(|| CrawlerError::navigation(&path_clone, self.source.clone()))?;
276        Ok(Self {
277            source: self.source,
278            crawler,
279            path: path_clone,
280        })
281    }
282    fn borrow_index(&mut self, index: usize) -> CrawlerResult<Self::BorrowTo<'_>> {
283        let mut path_clone = self.path.clone();
284        path_clone.push(JsonPath::IndexNum(index));
285        let crawler = self
286            .crawler
287            .get_mut(index)
288            .ok_or_else(|| CrawlerError::navigation(&path_clone, self.source.clone()))?;
289        Ok(JsonCrawlerBorrowed {
290            source: self.source.clone(),
291            crawler,
292            path: path_clone,
293        })
294    }
295    fn borrow_mut(&mut self) -> Self::BorrowTo<'_> {
296        JsonCrawlerBorrowed {
297            source: self.source.clone(),
298            crawler: self.crawler,
299            path: self.path.to_owned(),
300        }
301    }
302    fn get_path(&self) -> String {
303        (&self.path).into()
304    }
305    fn take_value<T: DeserializeOwned>(&mut self) -> CrawlerResult<T> {
306        serde_json::from_value(self.crawler.take()).map_err(|e| {
307            CrawlerError::parsing(
308                &self.path,
309                self.source.clone(),
310                ParseTarget::Other(std::any::type_name::<T>().to_string()),
311                Some(format!("{e}")),
312            )
313        })
314    }
315    fn take_value_pointers<T: DeserializeOwned, S: AsRef<str>>(
316        &mut self,
317        paths: &[S],
318    ) -> CrawlerResult<T> {
319        let mut path_clone = self.path.clone();
320        let Some((found, path)) = paths
321            .iter()
322            .find_map(|p| self.crawler.pointer_mut(p.as_ref()).map(|v| (v.take(), p)))
323        else {
324            return Err(CrawlerError::paths_not_found(
325                path_clone,
326                self.source.clone(),
327                paths.iter().map(|s| s.as_ref().to_string()).collect(),
328            ));
329        };
330        path_clone.push(JsonPath::Pointer(path.as_ref().to_string()));
331        serde_json::from_value(found).map_err(|e| {
332            CrawlerError::parsing(
333                &path_clone,
334                self.source.clone(),
335                ParseTarget::Other(std::any::type_name::<T>().to_string()),
336                Some(format!("{e}")),
337            )
338        })
339    }
340    fn path_exists(&self, path: &str) -> bool {
341        self.crawler.pointer(path).is_some()
342    }
343    fn get_source(&self) -> Arc<String> {
344        self.source.clone()
345    }
346}
347
348impl JsonCrawler for JsonCrawlerOwned {
349    type BorrowTo<'a>
350        = JsonCrawlerBorrowed<'a>
351    where
352        Self: 'a;
353    type IterMut<'a>
354        = JsonCrawlerArrayIterMut<'a>
355    where
356        Self: 'a;
357    type IntoIter = JsonCrawlerArrayIntoIter;
358    fn try_into_iter(self) -> CrawlerResult<Self::IntoIter> {
359        if let JsonCrawlerOwned {
360            source,
361            crawler: serde_json::Value::Array(array),
362            path,
363        } = self
364        {
365            let cur_back = array.len().saturating_sub(1);
366            return Ok(JsonCrawlerArrayIntoIter {
367                source,
368                array: array.into_iter(),
369                path,
370                cur_front: 0,
371                cur_back,
372            });
373        }
374        Err(CrawlerError::parsing(
375            &self.path,
376            self.source.clone(),
377            ParseTarget::Array,
378            None,
379        ))
380    }
381    fn try_iter_mut(&mut self) -> CrawlerResult<Self::IterMut<'_>> {
382        let json_array = self.crawler.as_array_mut().ok_or_else(|| {
383            CrawlerError::parsing(&self.path, self.source.clone(), ParseTarget::Array, None)
384        })?;
385        let path_clone = self.path.clone();
386        let cur_back = json_array.len().saturating_sub(1);
387        Ok(JsonCrawlerArrayIterMut {
388            source: self.source.clone(),
389            array: json_array.iter_mut(),
390            path: path_clone,
391            cur_front: 0,
392            cur_back,
393        })
394    }
395    fn navigate_pointer(self, new_path: impl AsRef<str>) -> CrawlerResult<Self> {
396        let Self {
397            source,
398            crawler: mut old_crawler,
399            mut path,
400        } = self;
401        path.push(JsonPath::pointer(new_path.as_ref()));
402        let crawler = old_crawler
403            .pointer_mut(new_path.as_ref())
404            .map(|v| v.take())
405            .ok_or_else(|| CrawlerError::navigation(&path, source.clone()))?;
406        Ok(Self {
407            source,
408            crawler,
409            path,
410        })
411    }
412    fn navigate_index(self, index: usize) -> CrawlerResult<Self> {
413        let Self {
414            source,
415            crawler: mut old_crawler,
416            mut path,
417        } = self;
418        path.push(JsonPath::IndexNum(index));
419        let crawler = old_crawler
420            .get_mut(index)
421            .map(|v| v.take())
422            .ok_or_else(|| CrawlerError::navigation(&path, source.clone()))?;
423        Ok(Self {
424            source,
425            crawler,
426            path,
427        })
428    }
429    fn borrow_pointer(&mut self, path: impl AsRef<str>) -> CrawlerResult<Self::BorrowTo<'_>> {
430        let mut path_clone = self.path.clone();
431        path_clone.push(JsonPath::Pointer(path.as_ref().to_owned()));
432        let crawler = self
433            .crawler
434            .pointer_mut(path.as_ref())
435            .ok_or_else(|| CrawlerError::navigation(&path_clone, self.source.clone()))?;
436        Ok(JsonCrawlerBorrowed {
437            source: self.source.clone(),
438            crawler,
439            path: path_clone,
440        })
441    }
442    fn borrow_index(&mut self, index: usize) -> CrawlerResult<Self::BorrowTo<'_>> {
443        let mut path_clone = self.path.clone();
444        path_clone.push(JsonPath::IndexNum(index));
445        let crawler = self
446            .crawler
447            .get_mut(index)
448            .ok_or_else(|| CrawlerError::navigation(&path_clone, self.source.clone()))?;
449        Ok(JsonCrawlerBorrowed {
450            source: self.source.clone(),
451            crawler,
452            path: path_clone,
453        })
454    }
455    fn borrow_mut(&mut self) -> Self::BorrowTo<'_> {
456        JsonCrawlerBorrowed {
457            source: self.source.clone(),
458            crawler: &mut self.crawler,
459            path: self.path.to_owned(),
460        }
461    }
462    fn take_value<T: DeserializeOwned>(&mut self) -> CrawlerResult<T> {
463        serde_json::from_value(self.crawler.take()).map_err(|e| {
464            CrawlerError::parsing(
465                &self.path,
466                self.source.clone(),
467                ParseTarget::Other(std::any::type_name::<T>().to_string()),
468                Some(format!("{e}")),
469            )
470        })
471    }
472    fn take_value_pointer<T: DeserializeOwned>(
473        &mut self,
474        path: impl AsRef<str>,
475    ) -> CrawlerResult<T> {
476        let mut path_clone = self.path.clone();
477        path_clone.push(JsonPath::pointer(path.as_ref()));
478        serde_json::from_value(
479            self.crawler
480                .pointer_mut(path.as_ref())
481                .map(|v| v.take())
482                .ok_or_else(|| CrawlerError::navigation(&path_clone, self.source.clone()))?,
483        )
484        .map_err(|e| {
485            CrawlerError::parsing(
486                &path_clone,
487                self.source.clone(),
488                ParseTarget::Other(std::any::type_name::<T>().to_string()),
489                Some(format!("{e}")),
490            )
491        })
492    }
493    fn take_value_pointers<T: DeserializeOwned, S: AsRef<str>>(
494        &mut self,
495        paths: &[S],
496    ) -> CrawlerResult<T> {
497        let mut path_clone = self.path.clone();
498        let Some((found, path)) = paths
499            .iter()
500            .find_map(|p| self.crawler.pointer_mut(p.as_ref()).map(|v| (v.take(), p)))
501        else {
502            return Err(CrawlerError::paths_not_found(
503                path_clone,
504                self.source.clone(),
505                paths.iter().map(|s| s.as_ref().to_string()).collect(),
506            ));
507        };
508        path_clone.push(JsonPath::Pointer(path.as_ref().to_string()));
509        serde_json::from_value(found).map_err(|e| {
510            CrawlerError::parsing(
511                &path_clone,
512                self.source.clone(),
513                ParseTarget::Other(std::any::type_name::<T>().to_string()),
514                Some(format!("{e}")),
515            )
516        })
517    }
518    fn path_exists(&self, path: &str) -> bool {
519        self.crawler.pointer(path).is_some()
520    }
521    fn get_source(&self) -> Arc<String> {
522        self.source.clone()
523    }
524    fn get_path(&self) -> String {
525        (&self.path).into()
526    }
527}
528
529#[derive(Clone, PartialEq, Debug)]
530pub enum JsonPath {
531    Pointer(String),
532    IndexNum(usize),
533}
534#[derive(Clone, Default, PartialEq, Debug)]
535struct PathList {
536    list: Vec<JsonPath>,
537}
538
539impl From<&JsonPath> for String {
540    fn from(value: &JsonPath) -> Self {
541        match value {
542            JsonPath::Pointer(p) => p.to_owned(),
543            JsonPath::IndexNum(i) => format! {"/{i}"},
544        }
545    }
546}
547impl JsonPath {
548    pub fn pointer<S: Into<String>>(path: S) -> Self {
549        JsonPath::Pointer(path.into())
550    }
551}
552impl PathList {
553    fn with(mut self, path: JsonPath) -> Self {
554        self.list.push(path);
555        self
556    }
557    fn push(&mut self, path: JsonPath) {
558        self.list.push(path)
559    }
560}
561
562// I believe both implementations are required, due to orphan rules.
563impl From<&PathList> for String {
564    fn from(value: &PathList) -> Self {
565        let mut path = String::new();
566        for p in &value.list {
567            path.push_str(String::from(p).as_str());
568        }
569        path
570    }
571}
572impl From<PathList> for String {
573    fn from(value: PathList) -> Self {
574        let mut path = String::new();
575        for p in &value.list {
576            path.push_str(String::from(p).as_str());
577        }
578        path
579    }
580}