json_crawler/
lib.rs

1//! Library to crawl Json using the pointer syntax and return useful errors.
2//! Documentation is a work in progress.
3use error::ParseTarget;
4pub use error::{CrawlerError, CrawlerResult};
5pub use iter::*;
6use serde::de::DeserializeOwned;
7use serde::Deserialize;
8// Currently the only way to create a crawler is from a serde_json::Value, so we
9// might as well re-export it.
10// doc(no_inline) means that the re-export will be clear in the docs.
11#[doc(no_inline)]
12pub use serde_json::Value;
13use std::fmt::Display;
14use std::ops::ControlFlow;
15use std::str::FromStr;
16use std::sync::Arc;
17
18mod error;
19mod iter;
20
21/// Trait to represent a JsonCrawler that may own or borrow from the original
22/// `serde_json::Value`.
23pub trait JsonCrawler
24where
25    Self: Sized,
26{
27    type BorrowTo<'a>: JsonCrawler
28    where
29        Self: 'a;
30    type IterMut<'a>: Iterator<Item = Self::BorrowTo<'a>>
31    where
32        Self: 'a;
33    type IntoIter: Iterator<Item = Self>;
34    fn navigate_pointer(self, new_path: impl AsRef<str>) -> CrawlerResult<Self>;
35    fn navigate_index(self, index: usize) -> CrawlerResult<Self>;
36    fn borrow_pointer(&mut self, path: impl AsRef<str>) -> CrawlerResult<Self::BorrowTo<'_>>;
37    fn borrow_index(&mut self, index: usize) -> CrawlerResult<Self::BorrowTo<'_>>;
38    fn borrow_mut(&mut self) -> Self::BorrowTo<'_>;
39    fn try_into_iter(self) -> CrawlerResult<Self::IntoIter>;
40    fn try_iter_mut(&mut self) -> CrawlerResult<Self::IterMut<'_>>;
41    fn path_exists(&self, path: &str) -> bool;
42    fn get_path(&self) -> String;
43    fn get_source(&self) -> Arc<String>;
44    fn take_value<T: DeserializeOwned>(&mut self) -> CrawlerResult<T>;
45    fn take_value_pointer<T: DeserializeOwned>(
46        &mut self,
47        path: impl AsRef<str>,
48    ) -> CrawlerResult<T>;
49    fn borrow_value<T: for<'de> Deserialize<'de>>(&self) -> CrawlerResult<T>;
50    fn borrow_value_pointer<T: for<'de> Deserialize<'de>>(
51        &self,
52        path: impl AsRef<str>,
53    ) -> CrawlerResult<T>;
54    /// For use when you want to try and take value that could be at multiple
55    /// valid locations. Returns an error message that notes that all valid
56    /// locations were attempted.
57    ///
58    /// # Usage
59    /// ```no_run
60    /// # use json_crawler::*;
61    /// # let mut crawler = JsonCrawlerOwned::new(String::new(), serde_json::Value::Null);
62    /// // Output will be an error that path should contain "header" and "headerName", if crawler contains neither.
63    /// let output: CrawlerResult<String> = crawler.take_value_pointers(&["header", "headerName"]);
64    /// ```
65    fn take_value_pointers<T: DeserializeOwned, S: AsRef<str>>(
66        &mut self,
67        paths: &[S],
68    ) -> CrawlerResult<T>;
69    /// For use when you want to apply some operations that return Option, but
70    /// still return an error with context if they fail. For convenience,
71    /// closure return type is fallible, allowing you to see the cause of the
72    /// error at the failure point as well, if you have it.
73    ///
74    /// # Usage
75    /// ```no_run
76    /// # use json_crawler::*;
77    /// # let mut crawler = JsonCrawlerOwned::new(String::new(), serde_json::Value::Null);
78    /// // Returns Ok(42) if crawler parses into 42.
79    /// // Returns parsing from string error, plus the message that output should be 42, if output fails to parse from string.
80    /// // Returns message that output should be 42, if output parses from string, but is not 42.
81    /// let forty_two: CrawlerResult<usize> = crawler.try_expect("Output should be 42", |crawler| {
82    ///     let num = crawler.take_and_parse_str::<usize>()?;
83    ///     if num == 42 {
84    ///         return Ok(Some(num));
85    ///     }
86    ///     Ok(None)
87    /// });
88    /// ```
89    fn try_expect<F, O>(&mut self, msg: impl ToString, f: F) -> CrawlerResult<O>
90    where
91        F: FnOnce(&mut Self) -> CrawlerResult<Option<O>>,
92    {
93        match f(self) {
94            Ok(Some(r)) => Ok(r),
95            Ok(None) => Err(CrawlerError::parsing(
96                self.get_path(),
97                self.get_source(),
98                crate::error::ParseTarget::Other(std::any::type_name::<O>().to_string()),
99                Some(msg.to_string()),
100            )),
101            // In this case, we've got a nested error, and should display both sets of context.
102            Err(e) => {
103                let msg = format!("Expected {} but encountered '{e}'", msg.to_string());
104                Err(CrawlerError::parsing(
105                    self.get_path(),
106                    self.get_source(),
107                    crate::error::ParseTarget::Other(std::any::type_name::<O>().to_string()),
108                    Some(msg),
109                ))
110            }
111        }
112    }
113    /// Take the value as a String, and apply FromStr to return the desired
114    /// type.
115    fn take_and_parse_str<F: FromStr>(&mut self) -> CrawlerResult<F>
116    where
117        F::Err: Display,
118    {
119        let as_string = self.take_value::<String>()?;
120        str::parse::<F>(as_string.as_str()).map_err(|e| {
121            CrawlerError::parsing(
122                self.get_path(),
123                self.get_source(),
124                crate::error::ParseTarget::Other(std::any::type_name::<F>().to_string()),
125                Some(format!("{e}")),
126            )
127        })
128    }
129    /// Try to apply each function in a list of functions, returning the first
130    /// Ok result, or the last Err result if none returned Ok.
131    ///
132    /// # Warning
133    /// If one of the functions mutates before failing, the mutation will still
134    /// be applied. Also, the mutations are applied sequentially - mutation 1
135    /// could impact mutation 2 for example.
136    fn try_functions<O>(
137        &mut self,
138        functions: Vec<fn(&mut Self) -> CrawlerResult<O>>,
139    ) -> CrawlerResult<O> {
140        let original_path = self.get_path();
141        let source_ptr = self.get_source();
142        let output = functions.into_iter().try_fold(Vec::new(), |mut acc, f| {
143            let res = f(self);
144            let e = match res {
145                Ok(ret) => return ControlFlow::Break(ret),
146                Err(e) => e,
147            };
148            acc.push(e);
149            ControlFlow::Continue(acc)
150        });
151        match output {
152            ControlFlow::Continue(c) => Err(CrawlerError::multiple_parse_error(
153                original_path,
154                source_ptr,
155                c,
156            )),
157            ControlFlow::Break(b) => Ok(b),
158        }
159    }
160}
161
162#[derive(Clone, PartialEq, Debug)]
163pub struct JsonCrawlerOwned {
164    // Source is wrapped in an Arc as we are going to pass ownership when returning an error and we
165    // want it to be thread safe.
166    source: Arc<String>,
167    crawler: serde_json::Value,
168    path: PathList,
169}
170pub struct JsonCrawlerBorrowed<'a> {
171    // Source is wrapped in an Arc as we are going to pass ownership when returning an error and we
172    // want it to be thread safe.
173    source: Arc<String>,
174    crawler: &'a mut serde_json::Value,
175    path: PathList,
176}
177
178impl JsonCrawlerOwned {
179    /// Create a new JsonCrawler, where 'json' is the `serde_json::Value` that
180    /// you wish to crawl and 'source' represents a serialized copy of the same
181    /// `serde_json::Value`.
182    // TODO: Safer constructor that avoids 'source' being out of sync with 'json'
183    pub fn new(source: String, json: serde_json::Value) -> Self {
184        Self {
185            source: Arc::new(source),
186            crawler: json,
187            path: Default::default(),
188        }
189    }
190}
191
192impl<'a> JsonCrawler for JsonCrawlerBorrowed<'a> {
193    type BorrowTo<'b>
194        = JsonCrawlerBorrowed<'b>
195    where
196        Self: 'b;
197    type IterMut<'b>
198        = JsonCrawlerArrayIterMut<'b>
199    where
200        Self: 'b;
201    type IntoIter = JsonCrawlerArrayIterMut<'a>;
202    fn take_value_pointer<T: DeserializeOwned>(
203        &mut self,
204        path: impl AsRef<str>,
205    ) -> CrawlerResult<T> {
206        let mut path_clone = self.path.clone();
207        path_clone.push(JsonPath::pointer(path.as_ref()));
208        serde_json::from_value(
209            self.crawler
210                .pointer_mut(path.as_ref())
211                .map(|v| v.take())
212                .ok_or_else(|| CrawlerError::navigation(&path_clone, self.source.clone()))?,
213        )
214        .map_err(|e| {
215            CrawlerError::parsing(
216                &path_clone,
217                self.source.clone(),
218                ParseTarget::Other(std::any::type_name::<T>().to_string()),
219                Some(format!("{e}")),
220            )
221        })
222    }
223    fn borrow_pointer(&mut self, path: impl AsRef<str>) -> CrawlerResult<Self::BorrowTo<'_>> {
224        let mut path_clone = self.path.clone();
225        path_clone.push(JsonPath::pointer(path.as_ref()));
226        let crawler = self
227            .crawler
228            .pointer_mut(path.as_ref())
229            .ok_or_else(|| CrawlerError::navigation(&path_clone, self.source.clone()))?;
230        Ok(JsonCrawlerBorrowed {
231            source: self.source.clone(),
232            crawler,
233            path: path_clone,
234        })
235    }
236    fn navigate_pointer(self, path: impl AsRef<str>) -> CrawlerResult<Self> {
237        let mut path_clone = self.path.clone();
238        path_clone.push(JsonPath::pointer(path.as_ref()));
239        let crawler = self
240            .crawler
241            .pointer_mut(path.as_ref())
242            .ok_or_else(|| CrawlerError::navigation(&path_clone, self.source.clone()))?;
243        Ok(Self {
244            source: self.source,
245            crawler,
246            path: path_clone,
247        })
248    }
249    fn try_into_iter(self) -> CrawlerResult<Self::IntoIter> {
250        let json_array = self.crawler.as_array_mut().ok_or_else(|| {
251            CrawlerError::parsing(&self.path, self.source.clone(), ParseTarget::Array, None)
252        })?;
253        let path_clone = self.path.clone();
254        let cur_back = json_array.len().saturating_sub(1);
255        Ok(JsonCrawlerArrayIterMut {
256            source: self.source,
257            array: json_array.iter_mut(),
258            path: path_clone,
259            cur_front: 0,
260            cur_back,
261        })
262    }
263    fn try_iter_mut(&mut self) -> CrawlerResult<Self::IterMut<'_>> {
264        let json_array = self.crawler.as_array_mut().ok_or_else(|| {
265            CrawlerError::parsing(&self.path, self.source.clone(), ParseTarget::Array, None)
266        })?;
267        let path_clone = self.path.clone();
268        let cur_back = json_array.len().saturating_sub(1);
269        Ok(JsonCrawlerArrayIterMut {
270            source: self.source.clone(),
271            array: json_array.iter_mut(),
272            path: path_clone,
273            cur_front: 0,
274            cur_back,
275        })
276    }
277    fn navigate_index(self, index: usize) -> CrawlerResult<Self> {
278        let mut path_clone = self.path.clone();
279        path_clone.push(JsonPath::IndexNum(index));
280        let crawler = self
281            .crawler
282            .get_mut(index)
283            .ok_or_else(|| CrawlerError::navigation(&path_clone, self.source.clone()))?;
284        Ok(Self {
285            source: self.source,
286            crawler,
287            path: path_clone,
288        })
289    }
290    fn borrow_index(&mut self, index: usize) -> CrawlerResult<Self::BorrowTo<'_>> {
291        let mut path_clone = self.path.clone();
292        path_clone.push(JsonPath::IndexNum(index));
293        let crawler = self
294            .crawler
295            .get_mut(index)
296            .ok_or_else(|| CrawlerError::navigation(&path_clone, self.source.clone()))?;
297        Ok(JsonCrawlerBorrowed {
298            source: self.source.clone(),
299            crawler,
300            path: path_clone,
301        })
302    }
303    fn borrow_mut(&mut self) -> Self::BorrowTo<'_> {
304        JsonCrawlerBorrowed {
305            source: self.source.clone(),
306            crawler: self.crawler,
307            path: self.path.to_owned(),
308        }
309    }
310    fn get_path(&self) -> String {
311        (&self.path).into()
312    }
313    fn take_value<T: DeserializeOwned>(&mut self) -> CrawlerResult<T> {
314        serde_json::from_value(self.crawler.take()).map_err(|e| {
315            CrawlerError::parsing(
316                &self.path,
317                self.source.clone(),
318                ParseTarget::Other(std::any::type_name::<T>().to_string()),
319                Some(format!("{e}")),
320            )
321        })
322    }
323    fn take_value_pointers<T: DeserializeOwned, S: AsRef<str>>(
324        &mut self,
325        paths: &[S],
326    ) -> CrawlerResult<T> {
327        let mut path_clone = self.path.clone();
328        let Some((found, path)) = paths
329            .iter()
330            .find_map(|p| self.crawler.pointer_mut(p.as_ref()).map(|v| (v.take(), p)))
331        else {
332            return Err(CrawlerError::paths_not_found(
333                path_clone,
334                self.source.clone(),
335                paths.iter().map(|s| s.as_ref().to_string()).collect(),
336            ));
337        };
338        path_clone.push(JsonPath::Pointer(path.as_ref().to_string()));
339        serde_json::from_value(found).map_err(|e| {
340            CrawlerError::parsing(
341                &path_clone,
342                self.source.clone(),
343                ParseTarget::Other(std::any::type_name::<T>().to_string()),
344                Some(format!("{e}")),
345            )
346        })
347    }
348    fn borrow_value<T: for<'de> Deserialize<'de>>(&self) -> CrawlerResult<T> {
349        T::deserialize(&*self.crawler).map_err(|e| {
350            CrawlerError::parsing(
351                &self.path,
352                self.source.clone(),
353                ParseTarget::Other(std::any::type_name::<T>().to_string()),
354                Some(format!("{e}")),
355            )
356        })
357    }
358    fn borrow_value_pointer<T: for<'de> Deserialize<'de>>(
359        &self,
360        path: impl AsRef<str>,
361    ) -> CrawlerResult<T> {
362        let mut path_clone = self.path.clone();
363        path_clone.push(JsonPath::pointer(path.as_ref()));
364        // Deserialize without taking ownership or cloning.
365        T::deserialize(
366            self.crawler
367                .pointer(path.as_ref())
368                .ok_or_else(|| CrawlerError::navigation(&path_clone, self.source.clone()))?,
369        )
370        .map_err(|e| {
371            CrawlerError::parsing(
372                &path_clone,
373                self.source.clone(),
374                ParseTarget::Other(std::any::type_name::<T>().to_string()),
375                Some(format!("{e}")),
376            )
377        })
378    }
379    fn path_exists(&self, path: &str) -> bool {
380        self.crawler.pointer(path).is_some()
381    }
382    fn get_source(&self) -> Arc<String> {
383        self.source.clone()
384    }
385}
386
387impl JsonCrawler for JsonCrawlerOwned {
388    type BorrowTo<'a>
389        = JsonCrawlerBorrowed<'a>
390    where
391        Self: 'a;
392    type IterMut<'a>
393        = JsonCrawlerArrayIterMut<'a>
394    where
395        Self: 'a;
396    type IntoIter = JsonCrawlerArrayIntoIter;
397    fn try_into_iter(self) -> CrawlerResult<Self::IntoIter> {
398        if let JsonCrawlerOwned {
399            source,
400            crawler: serde_json::Value::Array(array),
401            path,
402        } = self
403        {
404            let cur_back = array.len().saturating_sub(1);
405            return Ok(JsonCrawlerArrayIntoIter {
406                source,
407                array: array.into_iter(),
408                path,
409                cur_front: 0,
410                cur_back,
411            });
412        }
413        Err(CrawlerError::parsing(
414            &self.path,
415            self.source.clone(),
416            ParseTarget::Array,
417            None,
418        ))
419    }
420    fn try_iter_mut(&mut self) -> CrawlerResult<Self::IterMut<'_>> {
421        let json_array = self.crawler.as_array_mut().ok_or_else(|| {
422            CrawlerError::parsing(&self.path, self.source.clone(), ParseTarget::Array, None)
423        })?;
424        let path_clone = self.path.clone();
425        let cur_back = json_array.len().saturating_sub(1);
426        Ok(JsonCrawlerArrayIterMut {
427            source: self.source.clone(),
428            array: json_array.iter_mut(),
429            path: path_clone,
430            cur_front: 0,
431            cur_back,
432        })
433    }
434    fn navigate_pointer(self, new_path: impl AsRef<str>) -> CrawlerResult<Self> {
435        let Self {
436            source,
437            crawler: mut old_crawler,
438            mut path,
439        } = self;
440        path.push(JsonPath::pointer(new_path.as_ref()));
441        let crawler = old_crawler
442            .pointer_mut(new_path.as_ref())
443            .map(|v| v.take())
444            .ok_or_else(|| CrawlerError::navigation(&path, source.clone()))?;
445        Ok(Self {
446            source,
447            crawler,
448            path,
449        })
450    }
451    fn navigate_index(self, index: usize) -> CrawlerResult<Self> {
452        let Self {
453            source,
454            crawler: mut old_crawler,
455            mut path,
456        } = self;
457        path.push(JsonPath::IndexNum(index));
458        let crawler = old_crawler
459            .get_mut(index)
460            .map(|v| v.take())
461            .ok_or_else(|| CrawlerError::navigation(&path, source.clone()))?;
462        Ok(Self {
463            source,
464            crawler,
465            path,
466        })
467    }
468    fn borrow_pointer(&mut self, path: impl AsRef<str>) -> CrawlerResult<Self::BorrowTo<'_>> {
469        let mut path_clone = self.path.clone();
470        path_clone.push(JsonPath::Pointer(path.as_ref().to_owned()));
471        let crawler = self
472            .crawler
473            .pointer_mut(path.as_ref())
474            .ok_or_else(|| CrawlerError::navigation(&path_clone, self.source.clone()))?;
475        Ok(JsonCrawlerBorrowed {
476            source: self.source.clone(),
477            crawler,
478            path: path_clone,
479        })
480    }
481    fn borrow_index(&mut self, index: usize) -> CrawlerResult<Self::BorrowTo<'_>> {
482        let mut path_clone = self.path.clone();
483        path_clone.push(JsonPath::IndexNum(index));
484        let crawler = self
485            .crawler
486            .get_mut(index)
487            .ok_or_else(|| CrawlerError::navigation(&path_clone, self.source.clone()))?;
488        Ok(JsonCrawlerBorrowed {
489            source: self.source.clone(),
490            crawler,
491            path: path_clone,
492        })
493    }
494    fn borrow_mut(&mut self) -> Self::BorrowTo<'_> {
495        JsonCrawlerBorrowed {
496            source: self.source.clone(),
497            crawler: &mut self.crawler,
498            path: self.path.to_owned(),
499        }
500    }
501    fn take_value<T: DeserializeOwned>(&mut self) -> CrawlerResult<T> {
502        serde_json::from_value(self.crawler.take()).map_err(|e| {
503            CrawlerError::parsing(
504                &self.path,
505                self.source.clone(),
506                ParseTarget::Other(std::any::type_name::<T>().to_string()),
507                Some(format!("{e}")),
508            )
509        })
510    }
511    fn take_value_pointer<T: DeserializeOwned>(
512        &mut self,
513        path: impl AsRef<str>,
514    ) -> CrawlerResult<T> {
515        let mut path_clone = self.path.clone();
516        path_clone.push(JsonPath::pointer(path.as_ref()));
517        serde_json::from_value(
518            self.crawler
519                .pointer_mut(path.as_ref())
520                .map(|v| v.take())
521                .ok_or_else(|| CrawlerError::navigation(&path_clone, self.source.clone()))?,
522        )
523        .map_err(|e| {
524            CrawlerError::parsing(
525                &path_clone,
526                self.source.clone(),
527                ParseTarget::Other(std::any::type_name::<T>().to_string()),
528                Some(format!("{e}")),
529            )
530        })
531    }
532    fn take_value_pointers<T: DeserializeOwned, S: AsRef<str>>(
533        &mut self,
534        paths: &[S],
535    ) -> CrawlerResult<T> {
536        let mut path_clone = self.path.clone();
537        let Some((found, path)) = paths
538            .iter()
539            .find_map(|p| self.crawler.pointer_mut(p.as_ref()).map(|v| (v.take(), p)))
540        else {
541            return Err(CrawlerError::paths_not_found(
542                path_clone,
543                self.source.clone(),
544                paths.iter().map(|s| s.as_ref().to_string()).collect(),
545            ));
546        };
547        path_clone.push(JsonPath::Pointer(path.as_ref().to_string()));
548        serde_json::from_value(found).map_err(|e| {
549            CrawlerError::parsing(
550                &path_clone,
551                self.source.clone(),
552                ParseTarget::Other(std::any::type_name::<T>().to_string()),
553                Some(format!("{e}")),
554            )
555        })
556    }
557    fn borrow_value<T: DeserializeOwned>(&self) -> CrawlerResult<T> {
558        T::deserialize(&self.crawler).map_err(|e| {
559            CrawlerError::parsing(
560                &self.path,
561                self.source.clone(),
562                ParseTarget::Other(std::any::type_name::<T>().to_string()),
563                Some(format!("{e}")),
564            )
565        })
566    }
567    fn borrow_value_pointer<T: DeserializeOwned>(&self, path: impl AsRef<str>) -> CrawlerResult<T> {
568        let mut path_clone = self.path.clone();
569        path_clone.push(JsonPath::pointer(path.as_ref()));
570        T::deserialize(
571            self.crawler
572                .pointer(path.as_ref())
573                .ok_or_else(|| CrawlerError::navigation(&path_clone, self.source.clone()))?,
574        )
575        .map_err(|e| {
576            CrawlerError::parsing(
577                &path_clone,
578                self.source.clone(),
579                ParseTarget::Other(std::any::type_name::<T>().to_string()),
580                Some(format!("{e}")),
581            )
582        })
583    }
584    fn path_exists(&self, path: &str) -> bool {
585        self.crawler.pointer(path).is_some()
586    }
587    fn get_source(&self) -> Arc<String> {
588        self.source.clone()
589    }
590    fn get_path(&self) -> String {
591        (&self.path).into()
592    }
593}
594
595#[derive(Clone, PartialEq, Debug)]
596pub enum JsonPath {
597    Pointer(String),
598    IndexNum(usize),
599}
600#[derive(Clone, Default, PartialEq, Debug)]
601struct PathList {
602    list: Vec<JsonPath>,
603}
604
605impl From<&JsonPath> for String {
606    fn from(value: &JsonPath) -> Self {
607        match value {
608            JsonPath::Pointer(p) => p.to_owned(),
609            JsonPath::IndexNum(i) => format! {"/{i}"},
610        }
611    }
612}
613impl JsonPath {
614    pub fn pointer<S: Into<String>>(path: S) -> Self {
615        JsonPath::Pointer(path.into())
616    }
617}
618impl PathList {
619    fn with(mut self, path: JsonPath) -> Self {
620        self.list.push(path);
621        self
622    }
623    fn push(&mut self, path: JsonPath) {
624        self.list.push(path)
625    }
626}
627
628// I believe both implementations are required, due to orphan rules.
629impl From<&PathList> for String {
630    fn from(value: &PathList) -> Self {
631        let mut path = String::new();
632        for p in &value.list {
633            path.push_str(String::from(p).as_str());
634        }
635        path
636    }
637}
638impl From<PathList> for String {
639    fn from(value: PathList) -> Self {
640        let mut path = String::new();
641        for p in &value.list {
642            path.push_str(String::from(p).as_str());
643        }
644        path
645    }
646}