rsonpath/
lib.rs

1//! Blazing fast execution of JSONPath queries.
2//!
3//! JSONPath parser, execution engines and byte stream utilities useful when parsing
4//! JSON structures.
5//!
6//! # Examples
7//! ```rust
8//! use rsonpath::engine::{Compiler, Engine, RsonpathEngine};
9//! use rsonpath::input::BorrowedBytes;
10//! use rsonpath::result::count::CountRecorder;
11//! # use std::error::Error;
12//!
13//! # fn main() -> Result<(), Box<dyn Error>> {
14//! // Parse a JSONPath query from string.
15//! let query = rsonpath_syntax::parse("$..phoneNumbers[*].number")?;
16//! // Convert the contents to the Input type required by the Engines.
17//! let mut contents = r#"
18//! {
19//!   "person": {
20//!     "name": "John",
21//!     "surname": "Doe",
22//!     "phoneNumbers": [
23//!       {
24//!         "type": "Home",
25//!         "number": "111-222-333"
26//!       },
27//!       {
28//!         "type": "Work",
29//!         "number": "123-456-789"
30//!       }
31//!     ]
32//!   }
33//! }
34//! "#;
35//! let input = BorrowedBytes::new(contents.as_bytes());
36//! // Compile the query. The engine can be reused to run the same query on different contents.
37//! let engine = RsonpathEngine::compile_query(&query)?;
38//! // Count the number of occurrences of elements satisfying the query.
39//! let count = engine.count(&input)?;
40//!
41//! assert_eq!(2, count);
42//! # Ok(())
43//! # }
44//! ```
45//! # Input JSON assumptions
46//!
47//! The JSON must be a syntactically valid JSON encoded in UTF-8 as defined by
48//! [RFC4627](https://datatracker.ietf.org/doc/html/rfc4627).
49//!
50//! If the assumptions are violated the algorithm's behavior is undefined. It might return nonsensical results,
51//! not process the whole document, or stop with an error.
52//! It should not panic &ndash; if you encounter a panic, you may report it as a bug.
53//! Some simple mistakes are caught, for example missing closing brackets or braces, but robust validation is
54//! sacrificed for performance. Asserting the assumptions falls on the user of this library.
55//! If you need a high-throughput parser that validates the document, take a look at
56//! [simdjson](https://lib.rs/crates/simd-json).
57//!
58//! # JSONPath language
59//!
60//! The library implements the JSONPath syntax as established by Stefan Goessner in
61//! <https://goessner.net/articles/JsonPath/>.
62//! That implementation does not describe its semantics. There is no guarantee that this library has the same semantics
63//! as Goessner's implementation. The semantics used by rsonpath are described below.
64//!
65//! ## Grammar
66//!
67//! ```ebnf
68//! query = [root] , { selector }
69//! root = "$"
70//! selector = child | descendant | wildcard child | wildcard descendant
71//! wildcard child = ".*" | "[*]"
72//! wildcard descendant = "..*" | "..[*]"
73//! child = dot | index
74//! dot = "." , member
75//! descendant = ".." , ( member | index )
76//! index = "[" , quoted member , "]"
77//! member = member first , { member character }
78//! member first = ALPHA | "_" | NONASCII
79//! member character = ALPHANUMERIC | "_" | NONASCII
80//! quoted member = ("'" , single quoted member , "'") | ('"' , double quoted member , '"')
81//! single quoted member = { UNESCAPED | ESCAPED | '"' | "\'" }
82//! double quoted member = { UNESCAPED | ESCAPED | "'" | '\"' }
83//!
84//! ALPHA = ? [A-Za-z] ?
85//! ALPHANUMERIC = ? [A-Za-z0-9] ?
86//! NONASCII = ? [\u0080-\u10FFFF] ?
87//! UNESCAPED = ? [^'"\u0000-\u001F] ?
88//! ESCAPED = ? \\[btnfr/\\] ?
89//! ```
90//!
91//! ## Semantics
92//!
93//! The query is executed from left to right, selector by selector. When a value is found that matches
94//! the current selector, the execution advances to the next selector and evaluates it recursively within
95//! the context of that value.
96//!
97//! ### Root selector (`$`)
98//! The root selector may only appear at the beginning of the query and is implicit if not specified.
99//! It matches the root object or array. Thus the query "$" gives either 1 or 0 results, if the JSON
100//! is empty or non-empty, respectively.
101//!
102//! ### Child selector (`.<member>`, `[<member>]`)
103//! Matches any value under a specified key in the current object
104//! and then executes the rest of the query on that value.
105//!
106//! ### Child wildcard selector (`.*`, `[*]`)
107//! Matches any value regardless of key in the current object, or any value within the current array,
108//! and then executes the rest of the query on that value.
109//!
110//! ### Descendant selector (`..<member>`, `..[<member>]`)
111//! Switches the engine into a recursive descent mode.
112//! Looks for the specified key in every value nested in the current object or array,
113//! recursively, and then executes the rest of the query on that value..
114//!
115//! ### Descendant wildcard selector (`..*`, `..[*]`)
116//! Switches the engine into a recursive descent mode.
117//! Matches any value regardless of key in any object, or any value within any array nested
118//! within the current object or array, recursively, and then executes the rest of the query on that value.
119//!
120//! ## Active development
121//!
122//! Only the aforementioned selectors are supported at this moment.
123//! This library is under active development.
124
125#![doc(html_logo_url = "https://raw.githubusercontent.com/V0ldek/rsonpath/main/img/rsonquery-logo.svg")]
126// Documentation lints, enabled only on --release.
127#![cfg_attr(
128    not(debug_assertions),
129    warn(missing_docs, clippy::missing_errors_doc, clippy::missing_panics_doc,)
130)]
131#![cfg_attr(not(debug_assertions), warn(rustdoc::missing_crate_level_docs))]
132// Panic-free lints (disabled for tests).
133#![cfg_attr(not(test), warn(clippy::panic, clippy::panic_in_result_fn, clippy::unwrap_used))]
134// IO hygiene, only on --release.
135#![cfg_attr(
136    not(debug_assertions),
137    warn(clippy::print_stderr, clippy::print_stdout, clippy::todo)
138)]
139// Docs.rs config.
140#![cfg_attr(docsrs, feature(doc_cfg))]
141
142pub mod automaton;
143pub mod classification;
144mod depth;
145pub mod engine;
146pub mod error;
147pub mod input;
148pub mod result;
149pub(crate) mod string_pattern;
150
151pub use string_pattern::StringPattern;
152
153cfg_if::cfg_if! {
154    if #[cfg(target_pointer_width = "32")] {
155        pub(crate) const BLOCK_SIZE: usize = 32;
156        pub(crate) type MaskType = u32;
157    }
158    else if #[cfg(target_pointer_width = "64")] {
159        pub(crate) const BLOCK_SIZE: usize = 64;
160        pub(crate) type MaskType = u64;
161    }
162}
163
164/// Macro for debug logging. Evaluates to [`log::debug`], if debug assertions are enabled.
165/// Otherwise it's an empty statement.
166///
167/// Use this instead of plain [`log::debug`], since this is automatically removed in
168/// release mode and incurs no performance penalties.
169#[cfg(debug_assertions)]
170#[allow(unused_macros)]
171macro_rules! debug {
172    (target: $target:expr, $($arg:tt)+) => (log::debug!(target: $target, $($arg)+));
173    ($($arg:tt)+) => (log::debug!($($arg)+))
174}
175
176#[allow(unused_macros)]
177macro_rules! block {
178    ($b:expr) => {
179        crate::debug!(
180            "{: >24}: {}",
181            "block",
182            std::str::from_utf8(
183                &$b.iter()
184                    .map(|x| if x.is_ascii_whitespace() { b' ' } else { *x })
185                    .collect::<Vec<_>>()
186            )
187            .unwrap_or("[INVALID UTF8]")
188        );
189    };
190}
191
192/// Macro for debug logging. Evaluates to [`log::debug`], if debug assertions are enabled.
193/// Otherwise it's an empty statement.
194///
195/// Use this instead of plain [`log::debug`], since this is automatically removed in
196/// release mode and incurs no performance penalties.
197#[cfg(not(debug_assertions))]
198#[allow(unused_macros)]
199macro_rules! debug {
200    (target: $target:expr, $($arg:tt)+) => {};
201    ($($arg:tt)+) => {};
202}
203
204/// Debug log the given u64 expression by its full 64-bit binary string representation.
205#[allow(unused_macros)]
206macro_rules! bin_u64 {
207    ($name:expr, $e:expr) => {
208        $crate::debug!(
209            "{: >24}: {:064b} ({})",
210            $name,
211            {
212                let mut res = 0_u64;
213                for i in 0..64 {
214                    let bit = (($e) & (1 << i)) >> i;
215                    res |= bit << (63 - i);
216                }
217                res
218            },
219            $e
220        );
221    };
222}
223
224/// Debug log the given u32 expression by its full 32-bit binary string representation.
225#[allow(unused_macros)]
226macro_rules! bin_u32 {
227    ($name:expr, $e:expr) => {
228        $crate::debug!(
229            "{: >24}: {:032b} ({})",
230            $name,
231            {
232                let mut res = 0_u32;
233                for i in 0..32 {
234                    let bit = (($e) & (1 << i)) >> i;
235                    res |= bit << (31 - i);
236                }
237                res
238            },
239            $e
240        );
241    };
242}
243
244#[allow(unused_imports)]
245pub(crate) use bin_u32;
246#[allow(unused_imports)]
247pub(crate) use bin_u64;
248#[allow(unused_imports)]
249pub(crate) use block;
250#[allow(unused_imports)]
251pub(crate) use debug;
252
253/// Variation of the [`Iterator`] trait where each read can fail.
254pub trait FallibleIterator {
255    /// Type of items returned by this iterator.
256    type Item;
257    /// Type of errors that can occur when reading from this iterator.
258    type Error: std::error::Error;
259
260    /// Advances the iterator and returns the next value.
261    ///
262    /// # Errors
263    /// May fail depending on the implementation.
264    fn next(&mut self) -> Result<Option<Self::Item>, Self::Error>;
265
266    /// Transforms an iterator into a collection.
267    ///
268    /// # Errors
269    /// This consumes the iterator and reads from it. If any read fails,
270    /// the result is the first error encountered.
271    #[inline]
272    fn collect<B>(self) -> Result<B, Self::Error>
273    where
274        B: FromIterator<Self::Item>,
275        Self: Sized,
276    {
277        let iter = FallibleIntoIter { src: self };
278        iter.collect()
279    }
280}
281
282struct FallibleIntoIter<F> {
283    src: F,
284}
285
286impl<F: FallibleIterator> Iterator for FallibleIntoIter<F> {
287    type Item = Result<F::Item, F::Error>;
288
289    fn next(&mut self) -> Option<Self::Item> {
290        match self.src.next() {
291            Ok(item) => item.map(Ok),
292            Err(e) => Some(Err(e)),
293        }
294    }
295}
296
297pub(crate) const JSON_SPACE_BYTE: u8 = b' ';
298
299pub(crate) const JSON_WHITESPACE_BYTES: [u8; 4] = [b' ', b'\t', b'\n', b'\r'];
300
301#[inline(always)]
302#[must_use]
303pub(crate) fn is_json_whitespace(x: u8) -> bool {
304    JSON_WHITESPACE_BYTES.contains(&x)
305}