json_crawler/
lib.rs

1//! Library to crawl Json using the pointer syntax and return useful errors.
2//! Documentation is a work in progress.
3use error::ParseTarget;
4pub use error::{CrawlerError, CrawlerResult};
5pub use iter::*;
6use serde::de::DeserializeOwned;
7// Currently the only way to create a crawler is from a serde_json::Value, so we
8// might as well re-export it.
9// doc(no_inline) means that the re-export will be clear in the docs.
10#[doc(no_inline)]
11pub use serde_json::Value;
12use std::fmt::Display;
13use std::ops::ControlFlow;
14use std::str::FromStr;
15use std::sync::Arc;
16
17mod error;
18mod iter;
19
20/// Trait to represent a JsonCrawler that may own or borrow from the original
21/// `serde_json::Value`.
22pub trait JsonCrawler
23where
24    Self: Sized,
25{
26    type BorrowTo<'a>: JsonCrawler
27    where
28        Self: 'a;
29    type IterMut<'a>: Iterator<Item = Self::BorrowTo<'a>>
30    where
31        Self: 'a;
32    type IntoIter: Iterator<Item = Self>;
33    fn navigate_pointer(self, new_path: impl AsRef<str>) -> CrawlerResult<Self>;
34    fn navigate_index(self, index: usize) -> CrawlerResult<Self>;
35    fn borrow_pointer(&mut self, path: impl AsRef<str>) -> CrawlerResult<Self::BorrowTo<'_>>;
36    fn borrow_index(&mut self, index: usize) -> CrawlerResult<Self::BorrowTo<'_>>;
37    fn borrow_mut(&mut self) -> Self::BorrowTo<'_>;
38    fn try_into_iter(self) -> CrawlerResult<Self::IntoIter>;
39    fn try_iter_mut(&mut self) -> CrawlerResult<Self::IterMut<'_>>;
40    fn path_exists(&self, path: &str) -> bool;
41    fn get_path(&self) -> String;
42    fn get_source(&self) -> Arc<String>;
43    fn take_value<T: DeserializeOwned>(&mut self) -> CrawlerResult<T>;
44    fn take_value_pointer<T: DeserializeOwned>(
45        &mut self,
46        path: impl AsRef<str>,
47    ) -> CrawlerResult<T>;
48    /// For use when you want to try and take value that could be at multiple
49    /// valid locations. Returns an error message that notes that all valid
50    /// locations were attempted.
51    ///
52    /// # Usage
53    /// ```no_run
54    /// # use json_crawler::*;
55    /// # let mut crawler = JsonCrawlerOwned::new(String::new(), serde_json::Value::Null);
56    /// // Output will be an error that path should contain "header" and "headerName", if crawler contains neither.
57    /// let output: CrawlerResult<String> = crawler.take_value_pointers(&["header", "headerName"]);
58    /// ```
59    fn take_value_pointers<T: DeserializeOwned, S: AsRef<str>>(
60        &mut self,
61        paths: &[S],
62    ) -> CrawlerResult<T>;
63    /// For use when you want to apply some operations that return Option, but
64    /// still return an error with context if they fail. For convenience,
65    /// closure return type is fallible, allowing you to see the cause of the
66    /// error at the failure point as well, if you have it.
67    ///
68    /// # Usage
69    /// ```no_run
70    /// # use json_crawler::*;
71    /// # let mut crawler = JsonCrawlerOwned::new(String::new(), serde_json::Value::Null);
72    /// // Returns Ok(42) if crawler parses into 42.
73    /// // Returns parsing from string error, plus the message that output should be 42, if output fails to parse from string.
74    /// // Returns message that output should be 42, if output parses from string, but is not 42.
75    /// let forty_two: CrawlerResult<usize> = crawler.try_expect("Output should be 42", |crawler| {
76    ///     let num = crawler.take_and_parse_str::<usize>()?;
77    ///     if num == 42 {
78    ///         return Ok(Some(num));
79    ///     }
80    ///     Ok(None)
81    /// });
82    /// ```
83    fn try_expect<F, O>(&mut self, msg: impl ToString, f: F) -> CrawlerResult<O>
84    where
85        F: FnOnce(&mut Self) -> CrawlerResult<Option<O>>,
86    {
87        match f(self) {
88            Ok(Some(r)) => Ok(r),
89            Ok(None) => Err(CrawlerError::parsing(
90                self.get_path(),
91                self.get_source(),
92                crate::error::ParseTarget::Other(std::any::type_name::<O>().to_string()),
93                Some(msg.to_string()),
94            )),
95            // In this case, we've got a nested error, and should display both sets of context.
96            Err(e) => {
97                let msg = format!("Expected {} but encountered '{e}'", msg.to_string());
98                Err(CrawlerError::parsing(
99                    self.get_path(),
100                    self.get_source(),
101                    crate::error::ParseTarget::Other(std::any::type_name::<O>().to_string()),
102                    Some(msg),
103                ))
104            }
105        }
106    }
107    /// Take the value as a String, and apply FromStr to return the desired
108    /// type.
109    fn take_and_parse_str<F: FromStr>(&mut self) -> CrawlerResult<F>
110    where
111        F::Err: Display,
112    {
113        let as_string = self.take_value::<String>()?;
114        str::parse::<F>(as_string.as_str()).map_err(|e| {
115            CrawlerError::parsing(
116                self.get_path(),
117                self.get_source(),
118                crate::error::ParseTarget::Other(std::any::type_name::<F>().to_string()),
119                Some(format!("{e}")),
120            )
121        })
122    }
123    /// Try to apply each function in a list of functions, returning the first
124    /// Ok result, or the last Err result if none returned Ok.
125    ///
126    /// # Warning
127    /// If one of the functions mutates before failing, the mutation will still
128    /// be applied. Also, the mutations are applied sequentially - mutation 1
129    /// could impact mutation 2 for example.
130    fn try_functions<O>(
131        &mut self,
132        functions: Vec<fn(&mut Self) -> CrawlerResult<O>>,
133    ) -> CrawlerResult<O> {
134        let original_path = self.get_path();
135        let source_ptr = self.get_source();
136        let output = functions.into_iter().try_fold(Vec::new(), |mut acc, f| {
137            let res = f(self);
138            let e = match res {
139                Ok(ret) => return ControlFlow::Break(ret),
140                Err(e) => e,
141            };
142            acc.push(e);
143            ControlFlow::Continue(acc)
144        });
145        match output {
146            ControlFlow::Continue(c) => Err(CrawlerError::multiple_parse_error(
147                original_path,
148                source_ptr,
149                c,
150            )),
151            ControlFlow::Break(b) => Ok(b),
152        }
153    }
154}
155
156#[derive(Clone, PartialEq, Debug)]
157pub struct JsonCrawlerOwned {
158    // Source is wrapped in an Arc as we are going to pass ownership when returning an error and we
159    // want it to be thread safe.
160    source: Arc<String>,
161    crawler: serde_json::Value,
162    path: PathList,
163}
164pub struct JsonCrawlerBorrowed<'a> {
165    // Source is wrapped in an Arc as we are going to pass ownership when returning an error and we
166    // want it to be thread safe.
167    source: Arc<String>,
168    crawler: &'a mut serde_json::Value,
169    path: PathList,
170}
171
172impl JsonCrawlerOwned {
173    /// Create a new JsonCrawler, where 'json' is the `serde_json::Value` that
174    /// you wish to crawl and 'source' represents a serialized copy of the same
175    /// `serde_json::Value`.
176    // TODO: Safer constructor that avoids 'source' being out of sync with 'json'
177    pub fn new(source: String, json: serde_json::Value) -> Self {
178        Self {
179            source: Arc::new(source),
180            crawler: json,
181            path: Default::default(),
182        }
183    }
184}
185
186impl<'a> JsonCrawler for JsonCrawlerBorrowed<'a> {
187    type BorrowTo<'b>
188        = JsonCrawlerBorrowed<'b>
189    where
190        Self: 'b;
191    type IterMut<'b>
192        = JsonCrawlerArrayIterMut<'b>
193    where
194        Self: 'b;
195    type IntoIter = JsonCrawlerArrayIterMut<'a>;
196    fn take_value_pointer<T: DeserializeOwned>(
197        &mut self,
198        path: impl AsRef<str>,
199    ) -> CrawlerResult<T> {
200        let mut path_clone = self.path.clone();
201        path_clone.push(JsonPath::pointer(path.as_ref()));
202        serde_json::from_value(
203            self.crawler
204                .pointer_mut(path.as_ref())
205                .map(|v| v.take())
206                .ok_or_else(|| CrawlerError::navigation(&path_clone, self.source.clone()))?,
207        )
208        .map_err(|e| {
209            CrawlerError::parsing(
210                &path_clone,
211                self.source.clone(),
212                ParseTarget::Other(std::any::type_name::<T>().to_string()),
213                Some(format!("{e}")),
214            )
215        })
216    }
217    fn borrow_pointer(&mut self, path: impl AsRef<str>) -> CrawlerResult<Self::BorrowTo<'_>> {
218        let mut path_clone = self.path.clone();
219        path_clone.push(JsonPath::pointer(path.as_ref()));
220        let crawler = self
221            .crawler
222            .pointer_mut(path.as_ref())
223            .ok_or_else(|| CrawlerError::navigation(&path_clone, self.source.clone()))?;
224        Ok(JsonCrawlerBorrowed {
225            source: self.source.clone(),
226            crawler,
227            path: path_clone,
228        })
229    }
230    fn navigate_pointer(self, path: impl AsRef<str>) -> CrawlerResult<Self> {
231        let mut path_clone = self.path.clone();
232        path_clone.push(JsonPath::pointer(path.as_ref()));
233        let crawler = self
234            .crawler
235            .pointer_mut(path.as_ref())
236            .ok_or_else(|| CrawlerError::navigation(&path_clone, self.source.clone()))?;
237        Ok(Self {
238            source: self.source,
239            crawler,
240            path: path_clone,
241        })
242    }
243    fn try_into_iter(self) -> CrawlerResult<Self::IntoIter> {
244        let json_array = self.crawler.as_array_mut().ok_or_else(|| {
245            CrawlerError::parsing(&self.path, self.source.clone(), ParseTarget::Array, None)
246        })?;
247        let path_clone = self.path.clone();
248        let cur_back = json_array.len().saturating_sub(1);
249        Ok(JsonCrawlerArrayIterMut {
250            source: self.source,
251            array: json_array.iter_mut(),
252            path: path_clone,
253            cur_front: 0,
254            cur_back,
255        })
256    }
257    fn try_iter_mut(&mut self) -> CrawlerResult<Self::IterMut<'_>> {
258        let json_array = self.crawler.as_array_mut().ok_or_else(|| {
259            CrawlerError::parsing(&self.path, self.source.clone(), ParseTarget::Array, None)
260        })?;
261        let path_clone = self.path.clone();
262        let cur_back = json_array.len().saturating_sub(1);
263        Ok(JsonCrawlerArrayIterMut {
264            source: self.source.clone(),
265            array: json_array.iter_mut(),
266            path: path_clone,
267            cur_front: 0,
268            cur_back,
269        })
270    }
271    fn navigate_index(self, index: usize) -> CrawlerResult<Self> {
272        let mut path_clone = self.path.clone();
273        path_clone.push(JsonPath::IndexNum(index));
274        let crawler = self
275            .crawler
276            .get_mut(index)
277            .ok_or_else(|| CrawlerError::navigation(&path_clone, self.source.clone()))?;
278        Ok(Self {
279            source: self.source,
280            crawler,
281            path: path_clone,
282        })
283    }
284    fn borrow_index(&mut self, index: usize) -> CrawlerResult<Self::BorrowTo<'_>> {
285        let mut path_clone = self.path.clone();
286        path_clone.push(JsonPath::IndexNum(index));
287        let crawler = self
288            .crawler
289            .get_mut(index)
290            .ok_or_else(|| CrawlerError::navigation(&path_clone, self.source.clone()))?;
291        Ok(JsonCrawlerBorrowed {
292            source: self.source.clone(),
293            crawler,
294            path: path_clone,
295        })
296    }
297    fn borrow_mut(&mut self) -> Self::BorrowTo<'_> {
298        JsonCrawlerBorrowed {
299            source: self.source.clone(),
300            crawler: self.crawler,
301            path: self.path.to_owned(),
302        }
303    }
304    fn get_path(&self) -> String {
305        (&self.path).into()
306    }
307    fn take_value<T: DeserializeOwned>(&mut self) -> CrawlerResult<T> {
308        serde_json::from_value(self.crawler.take()).map_err(|e| {
309            CrawlerError::parsing(
310                &self.path,
311                self.source.clone(),
312                ParseTarget::Other(std::any::type_name::<T>().to_string()),
313                Some(format!("{e}")),
314            )
315        })
316    }
317    fn take_value_pointers<T: DeserializeOwned, S: AsRef<str>>(
318        &mut self,
319        paths: &[S],
320    ) -> CrawlerResult<T> {
321        let mut path_clone = self.path.clone();
322        let Some((found, path)) = paths
323            .iter()
324            .find_map(|p| self.crawler.pointer_mut(p.as_ref()).map(|v| (v.take(), p)))
325        else {
326            return Err(CrawlerError::paths_not_found(
327                path_clone,
328                self.source.clone(),
329                paths.iter().map(|s| s.as_ref().to_string()).collect(),
330            ));
331        };
332        path_clone.push(JsonPath::Pointer(path.as_ref().to_string()));
333        serde_json::from_value(found).map_err(|e| {
334            CrawlerError::parsing(
335                &path_clone,
336                self.source.clone(),
337                ParseTarget::Other(std::any::type_name::<T>().to_string()),
338                Some(format!("{e}")),
339            )
340        })
341    }
342    fn path_exists(&self, path: &str) -> bool {
343        self.crawler.pointer(path).is_some()
344    }
345    fn get_source(&self) -> Arc<String> {
346        self.source.clone()
347    }
348}
349
350impl JsonCrawler for JsonCrawlerOwned {
351    type BorrowTo<'a>
352        = JsonCrawlerBorrowed<'a>
353    where
354        Self: 'a;
355    type IterMut<'a>
356        = JsonCrawlerArrayIterMut<'a>
357    where
358        Self: 'a;
359    type IntoIter = JsonCrawlerArrayIntoIter;
360    fn try_into_iter(self) -> CrawlerResult<Self::IntoIter> {
361        if let JsonCrawlerOwned {
362            source,
363            crawler: serde_json::Value::Array(array),
364            path,
365        } = self
366        {
367            let cur_back = array.len().saturating_sub(1);
368            return Ok(JsonCrawlerArrayIntoIter {
369                source,
370                array: array.into_iter(),
371                path,
372                cur_front: 0,
373                cur_back,
374            });
375        }
376        Err(CrawlerError::parsing(
377            &self.path,
378            self.source.clone(),
379            ParseTarget::Array,
380            None,
381        ))
382    }
383    fn try_iter_mut(&mut self) -> CrawlerResult<Self::IterMut<'_>> {
384        let json_array = self.crawler.as_array_mut().ok_or_else(|| {
385            CrawlerError::parsing(&self.path, self.source.clone(), ParseTarget::Array, None)
386        })?;
387        let path_clone = self.path.clone();
388        let cur_back = json_array.len().saturating_sub(1);
389        Ok(JsonCrawlerArrayIterMut {
390            source: self.source.clone(),
391            array: json_array.iter_mut(),
392            path: path_clone,
393            cur_front: 0,
394            cur_back,
395        })
396    }
397    fn navigate_pointer(self, new_path: impl AsRef<str>) -> CrawlerResult<Self> {
398        let Self {
399            source,
400            crawler: mut old_crawler,
401            mut path,
402        } = self;
403        path.push(JsonPath::pointer(new_path.as_ref()));
404        let crawler = old_crawler
405            .pointer_mut(new_path.as_ref())
406            .map(|v| v.take())
407            .ok_or_else(|| CrawlerError::navigation(&path, source.clone()))?;
408        Ok(Self {
409            source,
410            crawler,
411            path,
412        })
413    }
414    fn navigate_index(self, index: usize) -> CrawlerResult<Self> {
415        let Self {
416            source,
417            crawler: mut old_crawler,
418            mut path,
419        } = self;
420        path.push(JsonPath::IndexNum(index));
421        let crawler = old_crawler
422            .get_mut(index)
423            .map(|v| v.take())
424            .ok_or_else(|| CrawlerError::navigation(&path, source.clone()))?;
425        Ok(Self {
426            source,
427            crawler,
428            path,
429        })
430    }
431    fn borrow_pointer(&mut self, path: impl AsRef<str>) -> CrawlerResult<Self::BorrowTo<'_>> {
432        let mut path_clone = self.path.clone();
433        path_clone.push(JsonPath::Pointer(path.as_ref().to_owned()));
434        let crawler = self
435            .crawler
436            .pointer_mut(path.as_ref())
437            .ok_or_else(|| CrawlerError::navigation(&path_clone, self.source.clone()))?;
438        Ok(JsonCrawlerBorrowed {
439            source: self.source.clone(),
440            crawler,
441            path: path_clone,
442        })
443    }
444    fn borrow_index(&mut self, index: usize) -> CrawlerResult<Self::BorrowTo<'_>> {
445        let mut path_clone = self.path.clone();
446        path_clone.push(JsonPath::IndexNum(index));
447        let crawler = self
448            .crawler
449            .get_mut(index)
450            .ok_or_else(|| CrawlerError::navigation(&path_clone, self.source.clone()))?;
451        Ok(JsonCrawlerBorrowed {
452            source: self.source.clone(),
453            crawler,
454            path: path_clone,
455        })
456    }
457    fn borrow_mut(&mut self) -> Self::BorrowTo<'_> {
458        JsonCrawlerBorrowed {
459            source: self.source.clone(),
460            crawler: &mut self.crawler,
461            path: self.path.to_owned(),
462        }
463    }
464    fn take_value<T: DeserializeOwned>(&mut self) -> CrawlerResult<T> {
465        serde_json::from_value(self.crawler.take()).map_err(|e| {
466            CrawlerError::parsing(
467                &self.path,
468                self.source.clone(),
469                ParseTarget::Other(std::any::type_name::<T>().to_string()),
470                Some(format!("{e}")),
471            )
472        })
473    }
474    fn take_value_pointer<T: DeserializeOwned>(
475        &mut self,
476        path: impl AsRef<str>,
477    ) -> CrawlerResult<T> {
478        let mut path_clone = self.path.clone();
479        path_clone.push(JsonPath::pointer(path.as_ref()));
480        serde_json::from_value(
481            self.crawler
482                .pointer_mut(path.as_ref())
483                .map(|v| v.take())
484                .ok_or_else(|| CrawlerError::navigation(&path_clone, self.source.clone()))?,
485        )
486        .map_err(|e| {
487            CrawlerError::parsing(
488                &path_clone,
489                self.source.clone(),
490                ParseTarget::Other(std::any::type_name::<T>().to_string()),
491                Some(format!("{e}")),
492            )
493        })
494    }
495    fn take_value_pointers<T: DeserializeOwned, S: AsRef<str>>(
496        &mut self,
497        paths: &[S],
498    ) -> CrawlerResult<T> {
499        let mut path_clone = self.path.clone();
500        let Some((found, path)) = paths
501            .iter()
502            .find_map(|p| self.crawler.pointer_mut(p.as_ref()).map(|v| (v.take(), p)))
503        else {
504            return Err(CrawlerError::paths_not_found(
505                path_clone,
506                self.source.clone(),
507                paths.iter().map(|s| s.as_ref().to_string()).collect(),
508            ));
509        };
510        path_clone.push(JsonPath::Pointer(path.as_ref().to_string()));
511        serde_json::from_value(found).map_err(|e| {
512            CrawlerError::parsing(
513                &path_clone,
514                self.source.clone(),
515                ParseTarget::Other(std::any::type_name::<T>().to_string()),
516                Some(format!("{e}")),
517            )
518        })
519    }
520    fn path_exists(&self, path: &str) -> bool {
521        self.crawler.pointer(path).is_some()
522    }
523    fn get_source(&self) -> Arc<String> {
524        self.source.clone()
525    }
526    fn get_path(&self) -> String {
527        (&self.path).into()
528    }
529}
530
531#[derive(Clone, PartialEq, Debug)]
532pub enum JsonPath {
533    Pointer(String),
534    IndexNum(usize),
535}
536#[derive(Clone, Default, PartialEq, Debug)]
537struct PathList {
538    list: Vec<JsonPath>,
539}
540
541impl From<&JsonPath> for String {
542    fn from(value: &JsonPath) -> Self {
543        match value {
544            JsonPath::Pointer(p) => p.to_owned(),
545            JsonPath::IndexNum(i) => format! {"/{i}"},
546        }
547    }
548}
549impl JsonPath {
550    pub fn pointer<S: Into<String>>(path: S) -> Self {
551        JsonPath::Pointer(path.into())
552    }
553}
554impl PathList {
555    fn with(mut self, path: JsonPath) -> Self {
556        self.list.push(path);
557        self
558    }
559    fn push(&mut self, path: JsonPath) {
560        self.list.push(path)
561    }
562}
563
564// I believe both implementations are required, due to orphan rules.
565impl From<&PathList> for String {
566    fn from(value: &PathList) -> Self {
567        let mut path = String::new();
568        for p in &value.list {
569            path.push_str(String::from(p).as_str());
570        }
571        path
572    }
573}
574impl From<PathList> for String {
575    fn from(value: PathList) -> Self {
576        let mut path = String::new();
577        for p in &value.list {
578            path.push_str(String::from(p).as_str());
579        }
580        path
581    }
582}