rsonpath/lib.rs
1//! Blazing fast execution of JSONPath queries.
2//!
3//! JSONPath parser, execution engines and byte stream utilities useful when parsing
4//! JSON structures.
5//!
6//! # Examples
7//! ```rust
8//! use rsonpath::engine::{Compiler, Engine, RsonpathEngine};
9//! use rsonpath::input::BorrowedBytes;
10//! use rsonpath::result::count::CountRecorder;
11//! # use std::error::Error;
12//!
13//! # fn main() -> Result<(), Box<dyn Error>> {
14//! // Parse a JSONPath query from string.
15//! let query = rsonpath_syntax::parse("$..phoneNumbers[*].number")?;
16//! // Convert the contents to the Input type required by the Engines.
17//! let mut contents = r#"
18//! {
19//! "person": {
20//! "name": "John",
21//! "surname": "Doe",
22//! "phoneNumbers": [
23//! {
24//! "type": "Home",
25//! "number": "111-222-333"
26//! },
27//! {
28//! "type": "Work",
29//! "number": "123-456-789"
30//! }
31//! ]
32//! }
33//! }
34//! "#;
35//! let input = BorrowedBytes::new(contents.as_bytes());
36//! // Compile the query. The engine can be reused to run the same query on different contents.
37//! let engine = RsonpathEngine::compile_query(&query)?;
38//! // Count the number of occurrences of elements satisfying the query.
39//! let count = engine.count(&input)?;
40//!
41//! assert_eq!(2, count);
42//! # Ok(())
43//! # }
44//! ```
45//! # Input JSON assumptions
46//!
47//! The JSON must be a syntactically valid JSON encoded in UTF-8 as defined by
48//! [RFC4627](https://datatracker.ietf.org/doc/html/rfc4627).
49//!
50//! If the assumptions are violated the algorithm's behavior is undefined. It might return nonsensical results,
51//! not process the whole document, or stop with an error.
52//! It should not panic – if you encounter a panic, you may report it as a bug.
53//! Some simple mistakes are caught, for example missing closing brackets or braces, but robust validation is
54//! sacrificed for performance. Asserting the assumptions falls on the user of this library.
55//! If you need a high-throughput parser that validates the document, take a look at
56//! [simdjson](https://lib.rs/crates/simd-json).
57//!
58//! # JSONPath language
59//!
60//! The library implements the JSONPath syntax as established by Stefan Goessner in
61//! <https://goessner.net/articles/JsonPath/>.
62//! That implementation does not describe its semantics. There is no guarantee that this library has the same semantics
63//! as Goessner's implementation. The semantics used by rsonpath are described below.
64//!
65//! ## Grammar
66//!
67//! ```ebnf
68//! query = [root] , { selector }
69//! root = "$"
70//! selector = child | descendant | wildcard child | wildcard descendant
71//! wildcard child = ".*" | "[*]"
72//! wildcard descendant = "..*" | "..[*]"
73//! child = dot | index
74//! dot = "." , member
75//! descendant = ".." , ( member | index )
76//! index = "[" , quoted member , "]"
77//! member = member first , { member character }
78//! member first = ALPHA | "_" | NONASCII
79//! member character = ALPHANUMERIC | "_" | NONASCII
80//! quoted member = ("'" , single quoted member , "'") | ('"' , double quoted member , '"')
81//! single quoted member = { UNESCAPED | ESCAPED | '"' | "\'" }
82//! double quoted member = { UNESCAPED | ESCAPED | "'" | '\"' }
83//!
84//! ALPHA = ? [A-Za-z] ?
85//! ALPHANUMERIC = ? [A-Za-z0-9] ?
86//! NONASCII = ? [\u0080-\u10FFFF] ?
87//! UNESCAPED = ? [^'"\u0000-\u001F] ?
88//! ESCAPED = ? \\[btnfr/\\] ?
89//! ```
90//!
91//! ## Semantics
92//!
93//! The query is executed from left to right, selector by selector. When a value is found that matches
94//! the current selector, the execution advances to the next selector and evaluates it recursively within
95//! the context of that value.
96//!
97//! ### Root selector (`$`)
98//! The root selector may only appear at the beginning of the query and is implicit if not specified.
99//! It matches the root object or array. Thus the query "$" gives either 1 or 0 results, if the JSON
100//! is empty or non-empty, respectively.
101//!
102//! ### Child selector (`.<member>`, `[<member>]`)
103//! Matches any value under a specified key in the current object
104//! and then executes the rest of the query on that value.
105//!
106//! ### Child wildcard selector (`.*`, `[*]`)
107//! Matches any value regardless of key in the current object, or any value within the current array,
108//! and then executes the rest of the query on that value.
109//!
110//! ### Descendant selector (`..<member>`, `..[<member>]`)
111//! Switches the engine into a recursive descent mode.
112//! Looks for the specified key in every value nested in the current object or array,
113//! recursively, and then executes the rest of the query on that value..
114//!
115//! ### Descendant wildcard selector (`..*`, `..[*]`)
116//! Switches the engine into a recursive descent mode.
117//! Matches any value regardless of key in any object, or any value within any array nested
118//! within the current object or array, recursively, and then executes the rest of the query on that value.
119//!
120//! ## Active development
121//!
122//! Only the aforementioned selectors are supported at this moment.
123//! This library is under active development.
124
125#![doc(html_logo_url = "https://raw.githubusercontent.com/V0ldek/rsonpath/main/img/rsonquery-logo.svg")]
126// Documentation lints, enabled only on --release.
127#![cfg_attr(
128 not(debug_assertions),
129 warn(
130 missing_docs,
131 clippy::cargo_common_metadata,
132 clippy::missing_errors_doc,
133 clippy::missing_panics_doc,
134 clippy::too_long_first_doc_paragraph
135 )
136)]
137#![cfg_attr(not(debug_assertions), warn(rustdoc::missing_crate_level_docs))]
138// Panic-free lints (disabled for tests).
139#![cfg_attr(not(test), warn(clippy::panic, clippy::panic_in_result_fn, clippy::unwrap_used))]
140// IO hygiene, only on --release.
141#![cfg_attr(
142 not(debug_assertions),
143 warn(clippy::print_stderr, clippy::print_stdout, clippy::todo, clippy::dbg_macro)
144)]
145// Docs.rs config.
146#![cfg_attr(docsrs, feature(doc_cfg))]
147
148pub mod automaton;
149pub mod classification;
150mod depth;
151pub mod engine;
152pub mod error;
153pub mod input;
154pub mod result;
155pub(crate) mod string_pattern;
156
157pub use string_pattern::StringPattern;
158
159cfg_if::cfg_if! {
160 if #[cfg(target_pointer_width = "32")] {
161 pub(crate) const BLOCK_SIZE: usize = 32;
162 pub(crate) type MaskType = u32;
163 }
164 else if #[cfg(target_pointer_width = "64")] {
165 pub(crate) const BLOCK_SIZE: usize = 64;
166 pub(crate) type MaskType = u64;
167 }
168}
169
170/// Macro for debug logging. Evaluates to [`log::debug`], if debug assertions are enabled.
171/// Otherwise it's an empty statement.
172///
173/// Use this instead of plain [`log::debug`], since this is automatically removed in
174/// release mode and incurs no performance penalties.
175#[cfg(debug_assertions)]
176#[allow(unused_macros, reason = "crate-wide debug macros for convenience, may be unused")]
177macro_rules! debug {
178 (target: $target:expr, $($arg:tt)+) => (log::debug!(target: $target, $($arg)+));
179 ($($arg:tt)+) => (log::debug!($($arg)+))
180}
181
182#[allow(unused_macros, reason = "crate-wide debug macros for convenience, may be unused")]
183macro_rules! block {
184 ($b:expr) => {
185 crate::debug!(
186 "{: >24}: {}",
187 "block",
188 std::str::from_utf8(
189 &$b.iter()
190 .map(|x| if x.is_ascii_whitespace() { b' ' } else { *x })
191 .collect::<Vec<_>>()
192 )
193 .unwrap_or("[INVALID UTF8]")
194 );
195 };
196}
197
198/// Macro for debug logging. Evaluates to [`log::debug`], if debug assertions are enabled.
199/// Otherwise it's an empty statement.
200///
201/// Use this instead of plain [`log::debug`], since this is automatically removed in
202/// release mode and incurs no performance penalties.
203#[cfg(not(debug_assertions))]
204#[allow(unused_macros, reason = "crate-wide debug macros for convenience, may be unused")]
205macro_rules! debug {
206 (target: $target:expr, $($arg:tt)+) => {};
207 ($($arg:tt)+) => {};
208}
209
210/// Debug log the given u64 expression by its full 64-bit binary string representation.
211#[allow(unused_macros, reason = "crate-wide debug macros for convenience, may be unused")]
212macro_rules! bin_u64 {
213 ($name:expr, $e:expr) => {
214 $crate::debug!(
215 "{: >24}: {:064b} ({})",
216 $name,
217 {
218 let mut res = 0_u64;
219 for i in 0..64 {
220 let bit = (($e) & (1 << i)) >> i;
221 res |= bit << (63 - i);
222 }
223 res
224 },
225 $e
226 );
227 };
228}
229
230/// Debug log the given u32 expression by its full 32-bit binary string representation.
231#[allow(unused_macros, reason = "crate-wide debug macros for convenience, may be unused")]
232macro_rules! bin_u32 {
233 ($name:expr, $e:expr) => {
234 $crate::debug!(
235 "{: >24}: {:032b} ({})",
236 $name,
237 {
238 let mut res = 0_u32;
239 for i in 0..32 {
240 let bit = (($e) & (1 << i)) >> i;
241 res |= bit << (31 - i);
242 }
243 res
244 },
245 $e
246 );
247 };
248}
249
250#[allow(unused_imports, reason = "crate-wide debug macros for convenience, may be unused")]
251pub(crate) use bin_u32;
252#[allow(unused_imports, reason = "crate-wide debug macros for convenience, may be unused")]
253pub(crate) use bin_u64;
254#[allow(unused_imports, reason = "crate-wide debug macros for convenience, may be unused")]
255pub(crate) use block;
256#[allow(unused_imports, reason = "crate-wide debug macros for convenience, may be unused")]
257pub(crate) use debug;
258
259/// Variation of the [`Iterator`] trait where each read can fail.
260pub trait FallibleIterator {
261 /// Type of items returned by this iterator.
262 type Item;
263 /// Type of errors that can occur when reading from this iterator.
264 type Error: std::error::Error;
265
266 /// Advances the iterator and returns the next value.
267 ///
268 /// # Errors
269 /// May fail depending on the implementation.
270 fn next(&mut self) -> Result<Option<Self::Item>, Self::Error>;
271
272 /// Transforms an iterator into a collection.
273 ///
274 /// # Errors
275 /// This consumes the iterator and reads from it. If any read fails,
276 /// the result is the first error encountered.
277 #[inline]
278 fn collect<B>(self) -> Result<B, Self::Error>
279 where
280 B: FromIterator<Self::Item>,
281 Self: Sized,
282 {
283 let iter = FallibleIntoIter { src: self };
284 iter.collect()
285 }
286}
287
288struct FallibleIntoIter<F> {
289 src: F,
290}
291
292impl<F: FallibleIterator> Iterator for FallibleIntoIter<F> {
293 type Item = Result<F::Item, F::Error>;
294
295 fn next(&mut self) -> Option<Self::Item> {
296 match self.src.next() {
297 Ok(item) => item.map(Ok),
298 Err(e) => Some(Err(e)),
299 }
300 }
301}
302
303pub(crate) const JSON_SPACE_BYTE: u8 = b' ';
304
305pub(crate) const JSON_WHITESPACE_BYTES: [u8; 4] = [b' ', b'\t', b'\n', b'\r'];
306
307#[inline(always)]
308#[must_use]
309pub(crate) fn is_json_whitespace(x: u8) -> bool {
310 JSON_WHITESPACE_BYTES.contains(&x)
311}