1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
//! Blazing fast execution of JSONPath queries.
//!
//! JSONPath parser, execution engines and byte stream utilities useful when parsing
//! JSON structures.
//!
//! # Examples
//! ```rust
//! use rsonpath::engine::{Compiler, Engine, RsonpathEngine};
//! use rsonpath::input::BorrowedBytes;
//! use rsonpath::result::count::CountRecorder;
//! # use std::error::Error;
//!
//! # fn main() -> Result<(), Box<dyn Error>> {
//! // Parse a JSONPath query from string.
//! let query = rsonpath_syntax::parse("$..phoneNumbers[*].number")?;
//! // Convert the contents to the Input type required by the Engines.
//! let mut contents = r#"
//! {
//!   "person": {
//!     "name": "John",
//!     "surname": "Doe",
//!     "phoneNumbers": [
//!       {
//!         "type": "Home",
//!         "number": "111-222-333"
//!       },
//!       {
//!         "type": "Work",
//!         "number": "123-456-789"
//!       }
//!     ]
//!   }
//! }
//! "#;
//! let input = BorrowedBytes::new(contents.as_bytes());
//! // Compile the query. The engine can be reused to run the same query on different contents.
//! let engine = RsonpathEngine::compile_query(&query)?;
//! // Count the number of occurrences of elements satisfying the query.
//! let count = engine.count(&input)?;
//!
//! assert_eq!(2, count);
//! # Ok(())
//! # }
//! ```
//! # Input JSON assumptions
//!
//! The JSON must be a syntactically valid JSON encoded in UTF-8 as defined by
//! [RFC4627](https://datatracker.ietf.org/doc/html/rfc4627).
//!
//! If the assumptions are violated the algorithm's behavior is undefined. It might return nonsensical results,
//! not process the whole document, or stop with an error.
//! It should not panic &ndash; if you encounter a panic, you may report it as a bug.
//! Some simple mistakes are caught, for example missing closing brackets or braces, but robust validation is
//! sacrificed for performance. Asserting the assumptions falls on the user of this library.
//! If you need a high-throughput parser that validates the document, take a look at
//! [simdjson](https://lib.rs/crates/simd-json).
//!
//! # JSONPath language
//!
//! The library implements the JSONPath syntax as established by Stefan Goessner in
//! <https://goessner.net/articles/JsonPath/>.
//! That implementation does not describe its semantics. There is no guarantee that this library has the same semantics
//! as Goessner's implementation. The semantics used by rsonpath are described below.
//!
//! ## Grammar
//!
//! ```ebnf
//! query = [root] , { selector }
//! root = "$"
//! selector = child | descendant | wildcard child | wildcard descendant
//! wildcard child = ".*" | "[*]"
//! wildcard descendant = "..*" | "..[*]"
//! child = dot | index
//! dot = "." , member
//! descendant = ".." , ( member | index )
//! index = "[" , quoted member , "]"
//! member = member first , { member character }
//! member first = ALPHA | "_" | NONASCII
//! member character = ALPHANUMERIC | "_" | NONASCII
//! quoted member = ("'" , single quoted member , "'") | ('"' , double quoted member , '"')
//! single quoted member = { UNESCAPED | ESCAPED | '"' | "\'" }
//! double quoted member = { UNESCAPED | ESCAPED | "'" | '\"' }
//!
//! ALPHA = ? [A-Za-z] ?
//! ALPHANUMERIC = ? [A-Za-z0-9] ?
//! NONASCII = ? [\u0080-\u10FFFF] ?
//! UNESCAPED = ? [^'"\u0000-\u001F] ?
//! ESCAPED = ? \\[btnfr/\\] ?
//! ```
//!
//! ## Semantics
//!
//! The query is executed from left to right, selector by selector. When a value is found that matches
//! the current selector, the execution advances to the next selector and evaluates it recursively within
//! the context of that value.
//!
//! ### Root selector (`$`)
//! The root selector may only appear at the beginning of the query and is implicit if not specified.
//! It matches the root object or array. Thus the query "$" gives either 1 or 0 results, if the JSON
//! is empty or non-empty, respectively.
//!
//! ### Child selector (`.<member>`, `[<member>]`)
//! Matches any value under a specified key in the current object
//! and then executes the rest of the query on that value.
//!
//! ### Child wildcard selector (`.*`, `[*]`)
//! Matches any value regardless of key in the current object, or any value within the current array,
//! and then executes the rest of the query on that value.
//!
//! ### Descendant selector (`..<member>`, `..[<member>]`)
//! Switches the engine into a recursive descent mode.
//! Looks for the specified key in every value nested in the current object or array,
//! recursively, and then executes the rest of the query on that value..
//!
//! ### Descendant wildcard selector (`..*`, `..[*]`)
//! Switches the engine into a recursive descent mode.
//! Matches any value regardless of key in any object, or any value within any array nested
//! within the current object or array, recursively, and then executes the rest of the query on that value.
//!
//! ## Active development
//!
//! Only the aforementioned selectors are supported at this moment.
//! This library is under active development.

#![doc(html_logo_url = "https://raw.githubusercontent.com/V0ldek/rsonpath/main/img/rsonquery-logo.svg")]
// Documentation lints, enabled only on --release.
#![cfg_attr(
    not(debug_assertions),
    warn(missing_docs, clippy::missing_errors_doc, clippy::missing_panics_doc,)
)]
#![cfg_attr(not(debug_assertions), warn(rustdoc::missing_crate_level_docs))]
// Generic pedantic lints.
#![warn(
    explicit_outlives_requirements,
    semicolon_in_expressions_from_macros,
    unreachable_pub,
    unused_import_braces,
    unused_lifetimes
)]
// Clippy pedantic lints.
#![warn(
    clippy::allow_attributes_without_reason,
    clippy::cargo_common_metadata,
    clippy::cast_lossless,
    clippy::cloned_instead_of_copied,
    clippy::empty_drop,
    clippy::empty_line_after_outer_attr,
    clippy::equatable_if_let,
    clippy::expl_impl_clone_on_copy,
    clippy::explicit_deref_methods,
    clippy::explicit_into_iter_loop,
    clippy::explicit_iter_loop,
    clippy::fallible_impl_from,
    clippy::flat_map_option,
    clippy::if_then_some_else_none,
    clippy::inconsistent_struct_constructor,
    clippy::large_digit_groups,
    clippy::let_underscore_must_use,
    clippy::manual_ok_or,
    clippy::map_err_ignore,
    clippy::map_unwrap_or,
    clippy::match_same_arms,
    clippy::match_wildcard_for_single_variants,
    clippy::missing_inline_in_public_items,
    clippy::mod_module_files,
    clippy::must_use_candidate,
    clippy::needless_continue,
    clippy::needless_for_each,
    clippy::needless_pass_by_value,
    clippy::ptr_as_ptr,
    clippy::redundant_closure_for_method_calls,
    clippy::ref_binding_to_reference,
    clippy::ref_option_ref,
    clippy::rest_pat_in_fully_bound_structs,
    clippy::undocumented_unsafe_blocks,
    clippy::unneeded_field_pattern,
    clippy::unseparated_literal_suffix,
    clippy::unreadable_literal,
    clippy::unused_self,
    clippy::use_self
)]
// Panic-free lint.
#![warn(clippy::exit)]
// Panic-free lints (disabled for tests).
#![cfg_attr(not(test), warn(clippy::panic, clippy::panic_in_result_fn, clippy::unwrap_used))]
// IO hygiene, only on --release.
#![cfg_attr(
    not(debug_assertions),
    warn(clippy::print_stderr, clippy::print_stdout, clippy::todo)
)]
#![cfg_attr(docsrs, feature(doc_cfg))]

pub mod automaton;
pub mod classification;
mod depth;
pub mod engine;
pub mod error;
pub mod input;
pub mod result;

cfg_if::cfg_if! {
    if #[cfg(target_pointer_width = "32")] {
        pub(crate) const BLOCK_SIZE: usize = 32;
        pub(crate) type MaskType = u32;
    }
    else if #[cfg(target_pointer_width = "64")] {
        pub(crate) const BLOCK_SIZE: usize = 64;
        pub(crate) type MaskType = u64;
    }
}

/// Macro for debug logging. Evaluates to [`log::debug`], if debug assertions are enabled.
/// Otherwise it's an empty statement.
///
/// Use this instead of plain [`log::debug`], since this is automatically removed in
/// release mode and incurs no performance penalties.
#[cfg(debug_assertions)]
#[allow(unused_macros)]
macro_rules! debug {
    (target: $target:expr, $($arg:tt)+) => (log::debug!(target: $target, $($arg)+));
    ($($arg:tt)+) => (log::debug!($($arg)+))
}

#[allow(unused_macros)]
macro_rules! block {
    ($b:expr) => {
        crate::debug!(
            "{: >24}: {}",
            "block",
            std::str::from_utf8(
                &$b.iter()
                    .map(|x| if x.is_ascii_whitespace() { b' ' } else { *x })
                    .collect::<Vec<_>>()
            )
            .unwrap_or("[INVALID UTF8]")
        );
    };
}

/// Macro for debug logging. Evaluates to [`log::debug`], if debug assertions are enabled.
/// Otherwise it's an empty statement.
///
/// Use this instead of plain [`log::debug`], since this is automatically removed in
/// release mode and incurs no performance penalties.
#[cfg(not(debug_assertions))]
#[allow(unused_macros)]
macro_rules! debug {
    (target: $target:expr, $($arg:tt)+) => {};
    ($($arg:tt)+) => {};
}

/// Debug log the given u64 expression by its full 64-bit binary string representation.
#[allow(unused_macros)]
macro_rules! bin_u64 {
    ($name:expr, $e:expr) => {
        $crate::debug!(
            "{: >24}: {:064b} ({})",
            $name,
            {
                let mut res = 0_u64;
                for i in 0..64 {
                    let bit = (($e) & (1 << i)) >> i;
                    res |= bit << (63 - i);
                }
                res
            },
            $e
        );
    };
}

/// Debug log the given u32 expression by its full 32-bit binary string representation.
#[allow(unused_macros)]
macro_rules! bin_u32 {
    ($name:expr, $e:expr) => {
        $crate::debug!(
            "{: >24}: {:032b} ({})",
            $name,
            {
                let mut res = 0_u32;
                for i in 0..32 {
                    let bit = (($e) & (1 << i)) >> i;
                    res |= bit << (31 - i);
                }
                res
            },
            $e
        );
    };
}

#[allow(unused_imports)]
pub(crate) use bin_u32;
#[allow(unused_imports)]
pub(crate) use bin_u64;
#[allow(unused_imports)]
pub(crate) use block;
#[allow(unused_imports)]
pub(crate) use debug;

/// Variation of the [`Iterator`] trait where each read can fail.
pub trait FallibleIterator {
    /// Type of items returned by this iterator.
    type Item;
    /// Type of errors that can occur when reading from this iterator.
    type Error: std::error::Error;

    /// Advances the iterator and returns the next value.
    ///
    /// # Errors
    /// May fail depending on the implementation.
    fn next(&mut self) -> Result<Option<Self::Item>, Self::Error>;

    /// Transforms an iterator into a collection.
    ///
    /// # Errors
    /// This consumes the iterator and reads from it. If any read fails,
    /// the result is the first error encountered.
    #[inline]
    fn collect<B>(self) -> Result<B, Self::Error>
    where
        B: FromIterator<Self::Item>,
        Self: Sized,
    {
        let iter = FallibleIntoIter { src: self };
        iter.collect()
    }
}

struct FallibleIntoIter<F> {
    src: F,
}

impl<F: FallibleIterator> Iterator for FallibleIntoIter<F> {
    type Item = Result<F::Item, F::Error>;

    fn next(&mut self) -> Option<Self::Item> {
        match self.src.next() {
            Ok(item) => item.map(Ok),
            Err(e) => Some(Err(e)),
        }
    }
}

pub(crate) const JSON_SPACE_BYTE: u8 = b' ';

pub(crate) const JSON_WHITESPACE_BYTES: [u8; 4] = [b' ', b'\t', b'\n', b'\r'];

#[inline(always)]
#[must_use]
pub(crate) fn is_json_whitespace(x: u8) -> bool {
    JSON_WHITESPACE_BYTES.contains(&x)
}