rsonpath/lib.rs
1//! Blazing fast execution of JSONPath queries.
2//!
3//! JSONPath parser, execution engines and byte stream utilities useful when parsing
4//! JSON structures.
5//!
6//! # Examples
7//! ```rust
8//! use rsonpath::engine::{Compiler, Engine, RsonpathEngine};
9//! use rsonpath::input::BorrowedBytes;
10//! use rsonpath::result::count::CountRecorder;
11//! # use std::error::Error;
12//!
13//! # fn main() -> Result<(), Box<dyn Error>> {
14//! // Parse a JSONPath query from string.
15//! let query = rsonpath_syntax::parse("$..phoneNumbers[*].number")?;
16//! // Convert the contents to the Input type required by the Engines.
17//! let mut contents = r#"
18//! {
19//! "person": {
20//! "name": "John",
21//! "surname": "Doe",
22//! "phoneNumbers": [
23//! {
24//! "type": "Home",
25//! "number": "111-222-333"
26//! },
27//! {
28//! "type": "Work",
29//! "number": "123-456-789"
30//! }
31//! ]
32//! }
33//! }
34//! "#;
35//! let input = BorrowedBytes::new(contents.as_bytes());
36//! // Compile the query. The engine can be reused to run the same query on different contents.
37//! let engine = RsonpathEngine::compile_query(&query)?;
38//! // Count the number of occurrences of elements satisfying the query.
39//! let count = engine.count(&input)?;
40//!
41//! assert_eq!(2, count);
42//! # Ok(())
43//! # }
44//! ```
45//! # Input JSON assumptions
46//!
47//! The JSON must be a syntactically valid JSON encoded in UTF-8 as defined by
48//! [RFC4627](https://datatracker.ietf.org/doc/html/rfc4627).
49//!
50//! If the assumptions are violated the algorithm's behavior is undefined. It might return nonsensical results,
51//! not process the whole document, or stop with an error.
52//! It should not panic – if you encounter a panic, you may report it as a bug.
53//! Some simple mistakes are caught, for example missing closing brackets or braces, but robust validation is
54//! sacrificed for performance. Asserting the assumptions falls on the user of this library.
55//! If you need a high-throughput parser that validates the document, take a look at
56//! [simdjson](https://lib.rs/crates/simd-json).
57//!
58//! # JSONPath language
59//!
60//! The library implements the JSONPath syntax as established by Stefan Goessner in
61//! <https://goessner.net/articles/JsonPath/>.
62//! That implementation does not describe its semantics. There is no guarantee that this library has the same semantics
63//! as Goessner's implementation. The semantics used by rsonpath are described below.
64//!
65//! ## Grammar
66//!
67//! ```ebnf
68//! query = [root] , { selector }
69//! root = "$"
70//! selector = child | descendant | wildcard child | wildcard descendant
71//! wildcard child = ".*" | "[*]"
72//! wildcard descendant = "..*" | "..[*]"
73//! child = dot | index
74//! dot = "." , member
75//! descendant = ".." , ( member | index )
76//! index = "[" , quoted member , "]"
77//! member = member first , { member character }
78//! member first = ALPHA | "_" | NONASCII
79//! member character = ALPHANUMERIC | "_" | NONASCII
80//! quoted member = ("'" , single quoted member , "'") | ('"' , double quoted member , '"')
81//! single quoted member = { UNESCAPED | ESCAPED | '"' | "\'" }
82//! double quoted member = { UNESCAPED | ESCAPED | "'" | '\"' }
83//!
84//! ALPHA = ? [A-Za-z] ?
85//! ALPHANUMERIC = ? [A-Za-z0-9] ?
86//! NONASCII = ? [\u0080-\u10FFFF] ?
87//! UNESCAPED = ? [^'"\u0000-\u001F] ?
88//! ESCAPED = ? \\[btnfr/\\] ?
89//! ```
90//!
91//! ## Semantics
92//!
93//! The query is executed from left to right, selector by selector. When a value is found that matches
94//! the current selector, the execution advances to the next selector and evaluates it recursively within
95//! the context of that value.
96//!
97//! ### Root selector (`$`)
98//! The root selector may only appear at the beginning of the query and is implicit if not specified.
99//! It matches the root object or array. Thus the query "$" gives either 1 or 0 results, if the JSON
100//! is empty or non-empty, respectively.
101//!
102//! ### Child selector (`.<member>`, `[<member>]`)
103//! Matches any value under a specified key in the current object
104//! and then executes the rest of the query on that value.
105//!
106//! ### Child wildcard selector (`.*`, `[*]`)
107//! Matches any value regardless of key in the current object, or any value within the current array,
108//! and then executes the rest of the query on that value.
109//!
110//! ### Descendant selector (`..<member>`, `..[<member>]`)
111//! Switches the engine into a recursive descent mode.
112//! Looks for the specified key in every value nested in the current object or array,
113//! recursively, and then executes the rest of the query on that value..
114//!
115//! ### Descendant wildcard selector (`..*`, `..[*]`)
116//! Switches the engine into a recursive descent mode.
117//! Matches any value regardless of key in any object, or any value within any array nested
118//! within the current object or array, recursively, and then executes the rest of the query on that value.
119//!
120//! ## Active development
121//!
122//! Only the aforementioned selectors are supported at this moment.
123//! This library is under active development.
124
125#![doc(html_logo_url = "https://raw.githubusercontent.com/V0ldek/rsonpath/main/img/rsonquery-logo.svg")]
126// Documentation lints, enabled only on --release.
127#![cfg_attr(
128 not(debug_assertions),
129 warn(missing_docs, clippy::missing_errors_doc, clippy::missing_panics_doc,)
130)]
131#![cfg_attr(not(debug_assertions), warn(rustdoc::missing_crate_level_docs))]
132// Panic-free lints (disabled for tests).
133#![cfg_attr(not(test), warn(clippy::panic, clippy::panic_in_result_fn, clippy::unwrap_used))]
134// IO hygiene, only on --release.
135#![cfg_attr(
136 not(debug_assertions),
137 warn(clippy::print_stderr, clippy::print_stdout, clippy::todo)
138)]
139// Docs.rs config.
140#![cfg_attr(docsrs, feature(doc_cfg))]
141
142pub mod automaton;
143pub mod classification;
144mod depth;
145pub mod engine;
146pub mod error;
147pub mod input;
148pub mod result;
149pub(crate) mod string_pattern;
150
151pub use string_pattern::StringPattern;
152
153cfg_if::cfg_if! {
154 if #[cfg(target_pointer_width = "32")] {
155 pub(crate) const BLOCK_SIZE: usize = 32;
156 pub(crate) type MaskType = u32;
157 }
158 else if #[cfg(target_pointer_width = "64")] {
159 pub(crate) const BLOCK_SIZE: usize = 64;
160 pub(crate) type MaskType = u64;
161 }
162}
163
164/// Macro for debug logging. Evaluates to [`log::debug`], if debug assertions are enabled.
165/// Otherwise it's an empty statement.
166///
167/// Use this instead of plain [`log::debug`], since this is automatically removed in
168/// release mode and incurs no performance penalties.
169#[cfg(debug_assertions)]
170#[allow(unused_macros)]
171macro_rules! debug {
172 (target: $target:expr, $($arg:tt)+) => (log::debug!(target: $target, $($arg)+));
173 ($($arg:tt)+) => (log::debug!($($arg)+))
174}
175
176#[allow(unused_macros)]
177macro_rules! block {
178 ($b:expr) => {
179 crate::debug!(
180 "{: >24}: {}",
181 "block",
182 std::str::from_utf8(
183 &$b.iter()
184 .map(|x| if x.is_ascii_whitespace() { b' ' } else { *x })
185 .collect::<Vec<_>>()
186 )
187 .unwrap_or("[INVALID UTF8]")
188 );
189 };
190}
191
192/// Macro for debug logging. Evaluates to [`log::debug`], if debug assertions are enabled.
193/// Otherwise it's an empty statement.
194///
195/// Use this instead of plain [`log::debug`], since this is automatically removed in
196/// release mode and incurs no performance penalties.
197#[cfg(not(debug_assertions))]
198#[allow(unused_macros)]
199macro_rules! debug {
200 (target: $target:expr, $($arg:tt)+) => {};
201 ($($arg:tt)+) => {};
202}
203
204/// Debug log the given u64 expression by its full 64-bit binary string representation.
205#[allow(unused_macros)]
206macro_rules! bin_u64 {
207 ($name:expr, $e:expr) => {
208 $crate::debug!(
209 "{: >24}: {:064b} ({})",
210 $name,
211 {
212 let mut res = 0_u64;
213 for i in 0..64 {
214 let bit = (($e) & (1 << i)) >> i;
215 res |= bit << (63 - i);
216 }
217 res
218 },
219 $e
220 );
221 };
222}
223
224/// Debug log the given u32 expression by its full 32-bit binary string representation.
225#[allow(unused_macros)]
226macro_rules! bin_u32 {
227 ($name:expr, $e:expr) => {
228 $crate::debug!(
229 "{: >24}: {:032b} ({})",
230 $name,
231 {
232 let mut res = 0_u32;
233 for i in 0..32 {
234 let bit = (($e) & (1 << i)) >> i;
235 res |= bit << (31 - i);
236 }
237 res
238 },
239 $e
240 );
241 };
242}
243
244#[allow(unused_imports)]
245pub(crate) use bin_u32;
246#[allow(unused_imports)]
247pub(crate) use bin_u64;
248#[allow(unused_imports)]
249pub(crate) use block;
250#[allow(unused_imports)]
251pub(crate) use debug;
252
253/// Variation of the [`Iterator`] trait where each read can fail.
254pub trait FallibleIterator {
255 /// Type of items returned by this iterator.
256 type Item;
257 /// Type of errors that can occur when reading from this iterator.
258 type Error: std::error::Error;
259
260 /// Advances the iterator and returns the next value.
261 ///
262 /// # Errors
263 /// May fail depending on the implementation.
264 fn next(&mut self) -> Result<Option<Self::Item>, Self::Error>;
265
266 /// Transforms an iterator into a collection.
267 ///
268 /// # Errors
269 /// This consumes the iterator and reads from it. If any read fails,
270 /// the result is the first error encountered.
271 #[inline]
272 fn collect<B>(self) -> Result<B, Self::Error>
273 where
274 B: FromIterator<Self::Item>,
275 Self: Sized,
276 {
277 let iter = FallibleIntoIter { src: self };
278 iter.collect()
279 }
280}
281
282struct FallibleIntoIter<F> {
283 src: F,
284}
285
286impl<F: FallibleIterator> Iterator for FallibleIntoIter<F> {
287 type Item = Result<F::Item, F::Error>;
288
289 fn next(&mut self) -> Option<Self::Item> {
290 match self.src.next() {
291 Ok(item) => item.map(Ok),
292 Err(e) => Some(Err(e)),
293 }
294 }
295}
296
297pub(crate) const JSON_SPACE_BYTE: u8 = b' ';
298
299pub(crate) const JSON_WHITESPACE_BYTES: [u8; 4] = [b' ', b'\t', b'\n', b'\r'];
300
301#[inline(always)]
302#[must_use]
303pub(crate) fn is_json_whitespace(x: u8) -> bool {
304 JSON_WHITESPACE_BYTES.contains(&x)
305}