Skip to main content

jpx_core/
lib.rs

1//! jpx-core: A complete JMESPath implementation using `serde_json::Value`.
2//!
3//! This crate provides a JMESPath parser, interpreter, and runtime that works
4//! directly with `serde_json::Value`, eliminating the need for a separate
5//! `Variable` type and the conversion overhead that comes with it.
6//!
7//! # Quick Start
8//!
9//! ```
10//! use jpx_core::compile;
11//! use serde_json::json;
12//!
13//! let expr = compile("foo.bar").unwrap();
14//! let data = json!({"foo": {"bar": true}});
15//! let result = expr.search(&data).unwrap();
16//! assert_eq!(result, json!(true));
17//! ```
18
19pub mod ast;
20#[cfg(feature = "extensions")]
21pub mod extensions;
22pub mod functions;
23pub mod query_library;
24pub mod registry;
25pub mod value_ext;
26
27pub use crate::error::{ErrorReason, JmespathError, RuntimeError};
28pub use crate::interpreter::SearchResult;
29pub use crate::parser::{ParseResult, parse};
30pub use crate::registry::{Category, Feature, FunctionInfo, FunctionRegistry};
31pub use crate::runtime::{Runtime, RuntimeBuilder};
32pub use crate::value_ext::{JmespathType, ValueExt};
33
34mod error;
35pub mod interpreter;
36mod lexer;
37mod parser;
38mod runtime;
39
40#[cfg(feature = "let-expr")]
41use std::collections::HashMap;
42use std::fmt;
43use std::sync::LazyLock;
44
45use serde_json::Value;
46
47use crate::ast::Ast;
48use crate::interpreter::interpret;
49
50/// The default runtime with all 26 built-in JMESPath functions registered.
51pub static DEFAULT_RUNTIME: LazyLock<Runtime> = LazyLock::new(|| {
52    let mut runtime = Runtime::new();
53    runtime.register_builtin_functions();
54    runtime
55});
56
57/// Compiles a JMESPath expression using the default Runtime.
58#[inline]
59pub fn compile(expression: &str) -> Result<Expression<'static>, JmespathError> {
60    DEFAULT_RUNTIME.compile(expression)
61}
62
63/// A compiled JMESPath expression.
64///
65/// The compiled expression can be used multiple times without incurring
66/// the cost of re-parsing the expression each time.
67#[derive(Clone)]
68pub struct Expression<'a> {
69    ast: Ast,
70    expression: String,
71    runtime: &'a Runtime,
72}
73
74impl<'a> Expression<'a> {
75    /// Creates a new JMESPath expression.
76    #[inline]
77    pub fn new<S>(expression: S, ast: Ast, runtime: &'a Runtime) -> Expression<'a>
78    where
79        S: Into<String>,
80    {
81        Expression {
82            expression: expression.into(),
83            ast,
84            runtime,
85        }
86    }
87
88    /// Searches data with the compiled expression.
89    ///
90    /// Takes a `&Value` and returns a `Value` directly -- no conversion needed.
91    pub fn search(&self, data: &Value) -> SearchResult {
92        let mut ctx = Context::new(&self.expression, self.runtime);
93        let result = interpret(data, &self.ast, &mut ctx)?;
94        // Strip expref sentinels from top-level results
95        Ok(strip_expref_sentinels(result))
96    }
97
98    /// Returns the JMESPath expression string.
99    pub fn as_str(&self) -> &str {
100        &self.expression
101    }
102
103    /// Returns the AST of the parsed JMESPath expression.
104    pub fn as_ast(&self) -> &Ast {
105        &self.ast
106    }
107}
108
109impl<'a> fmt::Display for Expression<'a> {
110    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
111        write!(f, "{}", self.as_str())
112    }
113}
114
115impl<'a> fmt::Debug for Expression<'a> {
116    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
117        fmt::Display::fmt(self, f)
118    }
119}
120
121impl<'a> PartialEq for Expression<'a> {
122    fn eq(&self, other: &Expression<'_>) -> bool {
123        self.as_str() == other.as_str()
124    }
125}
126
127/// Context object used during expression evaluation.
128///
129/// The Context struct carries state needed by the interpreter and functions,
130/// including the expression string (for error messages), runtime (for function
131/// lookup), current AST offset, and expref side-channel table.
132pub struct Context<'a> {
133    /// Expression string being interpreted.
134    pub expression: &'a str,
135    /// JMESPath runtime used for function lookup.
136    pub runtime: &'a Runtime,
137    /// Current AST offset (for error reporting).
138    pub offset: usize,
139    /// Side-channel table for expression references.
140    /// Exprefs are stored here and referenced by index in sentinel values.
141    pub(crate) expref_table: Vec<Ast>,
142    /// Current interpreter recursion depth, used to bound evaluation nesting
143    /// and prevent stack overflow on deeply nested ASTs.
144    pub(crate) eval_depth: usize,
145    /// Variable scopes for let expressions (JEP-18).
146    #[cfg(feature = "let-expr")]
147    scopes: Vec<HashMap<String, Value>>,
148}
149
150impl<'a> Context<'a> {
151    /// Creates a new context.
152    #[inline]
153    pub fn new(expression: &'a str, runtime: &'a Runtime) -> Context<'a> {
154        Context {
155            expression,
156            runtime,
157            offset: 0,
158            expref_table: Vec::new(),
159            eval_depth: 0,
160            #[cfg(feature = "let-expr")]
161            scopes: Vec::new(),
162        }
163    }
164
165    /// Stores an expref AST and returns its index in the table.
166    pub(crate) fn store_expref(&mut self, ast: Ast) -> usize {
167        let id = self.expref_table.len();
168        self.expref_table.push(ast);
169        id
170    }
171
172    /// Retrieves an expref AST by index.
173    pub fn get_expref(&self, id: usize) -> Option<&Ast> {
174        self.expref_table.get(id)
175    }
176
177    /// Push a new scope onto the scope stack.
178    #[cfg(feature = "let-expr")]
179    #[inline]
180    pub fn push_scope(&mut self, bindings: HashMap<String, Value>) {
181        self.scopes.push(bindings);
182    }
183
184    /// Pop the innermost scope from the scope stack.
185    #[cfg(feature = "let-expr")]
186    #[inline]
187    pub fn pop_scope(&mut self) {
188        self.scopes.pop();
189    }
190
191    /// Look up a variable in the scope stack.
192    #[cfg(feature = "let-expr")]
193    #[inline]
194    pub fn get_variable(&self, name: &str) -> Option<Value> {
195        for scope in self.scopes.iter().rev() {
196            if let Some(value) = scope.get(name) {
197                return Some(value.clone());
198            }
199        }
200        None
201    }
202}
203
204/// Per-process random key used for expref sentinels.
205///
206/// Exprefs (`&foo`) are carried internally as an in-band `Value` object keyed by
207/// this string. A fixed key (e.g. `"__jpx_expref__"`) collides with user data
208/// that happens to contain that key -- such data would be mis-typed as an expref
209/// and silently stripped from output. Seeding the key with a per-process random
210/// token (which never reaches output, since sentinels are stripped) means user
211/// data cannot accidentally -- or, in practice, maliciously, since the token is
212/// never observable -- be mistaken for an expref.
213pub(crate) static EXPREF_KEY: LazyLock<String> = LazyLock::new(|| {
214    use std::hash::{BuildHasher, Hasher};
215    // RandomState is seeded from OS entropy per process, so this differs across
216    // runs and is not predictable to a client.
217    let mut hasher = std::collections::hash_map::RandomState::new().build_hasher();
218    hasher.write_u8(0);
219    format!("__jpx_expref_{:016x}__", hasher.finish())
220});
221
222/// Creates an expref sentinel value from a table index.
223pub(crate) fn make_expref_sentinel(id: usize) -> Value {
224    let mut map = serde_json::Map::new();
225    map.insert(
226        EXPREF_KEY.clone(),
227        Value::Number(serde_json::Number::from(id)),
228    );
229    Value::Object(map)
230}
231
232/// Extracts the expref ID from a sentinel value.
233pub fn get_expref_id(value: &Value) -> Option<usize> {
234    value
235        .as_object()
236        .and_then(|m| m.get(EXPREF_KEY.as_str()))
237        .and_then(|v| v.as_u64())
238        .map(|v| v as usize)
239}
240
241/// Strips expref sentinels from a value (recursive for arrays/objects).
242fn strip_expref_sentinels(value: Value) -> Value {
243    match value {
244        Value::Object(map) if map.contains_key(EXPREF_KEY.as_str()) => Value::Null,
245        Value::Array(arr) => Value::Array(arr.into_iter().map(strip_expref_sentinels).collect()),
246        Value::Object(map) => Value::Object(
247            map.into_iter()
248                .map(|(k, v)| (k, strip_expref_sentinels(v)))
249                .collect(),
250        ),
251        other => other,
252    }
253}
254
255#[cfg(test)]
256mod tests {
257    use super::*;
258    use serde_json::json;
259
260    #[test]
261    fn formats_expression_as_string_or_debug() {
262        let expr = compile("foo | baz").unwrap();
263        assert_eq!("foo | baz/foo | baz", format!("{expr}/{expr:?}"));
264    }
265
266    #[test]
267    fn implements_partial_eq() {
268        let a = compile("@").unwrap();
269        let b = compile("@").unwrap();
270        assert!(a == b);
271    }
272
273    #[test]
274    fn can_evaluate_jmespath_expression() {
275        let expr = compile("foo.bar").unwrap();
276        let data = json!({"foo": {"bar": true}});
277        assert_eq!(json!(true), expr.search(&data).unwrap());
278    }
279
280    #[test]
281    fn user_data_with_expref_key_is_not_mistaken_for_an_expref() {
282        // The sentinel key is randomised per process, so user data containing
283        // the old fixed literal key is treated as ordinary data: it round-trips
284        // through output (previously stripped to null) and types as an object
285        // (previously "expref").
286        let data = json!({"__jpx_expref__": 0});
287        assert_eq!(compile("@").unwrap().search(&data).unwrap(), data);
288        assert_eq!(
289            compile("type(@)").unwrap().search(&data).unwrap(),
290            json!("object")
291        );
292    }
293
294    #[test]
295    fn exprefs_still_evaluate() {
296        let data = json!([{"n": 3}, {"n": 1}, {"n": 2}]);
297        assert_eq!(
298            compile("sort_by(@, &n)").unwrap().search(&data).unwrap(),
299            json!([{"n": 1}, {"n": 2}, {"n": 3}])
300        );
301    }
302
303    #[test]
304    fn can_get_expression_ast() {
305        let expr = compile("foo").unwrap();
306        assert_eq!(
307            &Ast::Field {
308                offset: 0,
309                name: "foo".to_string(),
310            },
311            expr.as_ast()
312        );
313    }
314
315    #[test]
316    fn expression_clone() {
317        let expr = compile("foo").unwrap();
318        let _ = expr.clone();
319    }
320
321    #[test]
322    fn test_invalid_number() {
323        let _ = compile("6455555524");
324    }
325}
326
327#[cfg(all(test, feature = "let-expr"))]
328mod let_tests {
329    use super::*;
330    use serde_json::json;
331
332    #[test]
333    fn test_simple_let_expression() {
334        let expr = compile("let $x = `1` in $x").unwrap();
335        let data = json!({});
336        let result = expr.search(&data).unwrap();
337        assert_eq!(result, json!(1));
338    }
339
340    #[test]
341    fn test_let_with_data_reference() {
342        let expr = compile("let $name = name in $name").unwrap();
343        let data = json!({"name": "Alice"});
344        let result = expr.search(&data).unwrap();
345        assert_eq!(result, json!("Alice"));
346    }
347
348    #[test]
349    fn test_let_multiple_bindings() {
350        let expr = compile("let $a = `1`, $b = `2` in [$a, $b]").unwrap();
351        let data = json!({});
352        let result = expr.search(&data).unwrap();
353        assert_eq!(result, json!([1, 2]));
354    }
355
356    #[test]
357    fn test_let_with_expression_body() {
358        let expr = compile("let $items = items in $items[0].name").unwrap();
359        let data = json!({"items": [{"name": "first"}, {"name": "second"}]});
360        let result = expr.search(&data).unwrap();
361        assert_eq!(result, json!("first"));
362    }
363
364    #[test]
365    fn test_nested_let() {
366        let expr = compile("let $x = `1` in let $y = `2` in [$x, $y]").unwrap();
367        let data = json!({});
368        let result = expr.search(&data).unwrap();
369        assert_eq!(result, json!([1, 2]));
370    }
371
372    #[test]
373    fn test_let_variable_shadowing() {
374        let expr = compile("let $x = `1` in let $x = `2` in $x").unwrap();
375        let data = json!({});
376        let result = expr.search(&data).unwrap();
377        assert_eq!(result, json!(2));
378    }
379
380    #[test]
381    fn test_undefined_variable_error() {
382        let expr = compile("$undefined").unwrap();
383        let data = json!({});
384        let result = expr.search(&data);
385        assert!(result.is_err());
386    }
387
388    #[test]
389    fn test_let_in_projection() {
390        let expr = compile("let $threshold = `50` in numbers[? @ > $threshold]").unwrap();
391        let data = json!({"numbers": [10, 30, 50, 70, 90]});
392        let result = expr.search(&data).unwrap();
393        assert_eq!(result, json!([70, 90]));
394    }
395
396    #[test]
397    fn test_let_variable_used_multiple_times() {
398        let expr = compile("let $foo = foo.bar in [$foo, $foo]").unwrap();
399        let data = json!({"foo": {"bar": "baz"}});
400        let result = expr.search(&data).unwrap();
401        assert_eq!(result, json!(["baz", "baz"]));
402    }
403
404    #[test]
405    fn test_let_shadowing_in_projection() {
406        let expr = compile("let $a = a in b[*].[a, $a, let $a = 'shadow' in $a]").unwrap();
407        let data = json!({"a": "topval", "b": [{"a": "inner1"}, {"a": "inner2"}]});
408        let result = expr.search(&data).unwrap();
409        assert_eq!(
410            result,
411            json!([
412                ["inner1", "topval", "shadow"],
413                ["inner2", "topval", "shadow"]
414            ])
415        );
416    }
417
418    #[test]
419    fn test_let_bindings_evaluated_in_outer_scope() {
420        let expr = compile("let $a = 'top-a' in let $a = 'in-a', $b = $a in $b").unwrap();
421        let data = json!({});
422        let result = expr.search(&data).unwrap();
423        assert_eq!(result, json!("top-a"));
424    }
425
426    #[test]
427    fn test_let_projection_stopping() {
428        let expr = compile("let $foo = foo[*] in $foo[0]").unwrap();
429        let data = json!({"foo": [[0, 1], [2, 3], [4, 5]]});
430        let result = expr.search(&data).unwrap();
431        assert_eq!(result, json!([0, 1]));
432    }
433
434    #[test]
435    fn test_let_shadow_and_restore() {
436        let expr = compile("let $x = 'outer' in [let $x = 'inner' in $x, $x]").unwrap();
437        let data = json!({});
438        let result = expr.search(&data).unwrap();
439        assert_eq!(result, json!(["inner", "outer"]));
440    }
441
442    #[test]
443    fn test_let_with_functions() {
444        let expr = compile("let $arr = numbers in length($arr)").unwrap();
445        let data = json!({"numbers": [1, 2, 3, 4, 5]});
446        let result = expr.search(&data).unwrap();
447        assert_eq!(result, json!(5));
448    }
449
450    #[test]
451    fn test_let_deeply_nested_scopes() {
452        let expr = compile("let $a = `1` in let $b = `2` in let $c = `3` in [$a, $b, $c]").unwrap();
453        let data = json!({});
454        let result = expr.search(&data).unwrap();
455        assert_eq!(result, json!([1, 2, 3]));
456    }
457
458    #[test]
459    fn test_let_with_flatten() {
460        let expr = compile("let $data = nested in $data[].items[]").unwrap();
461        let data = json!({"nested": [{"items": [1, 2]}, {"items": [3, 4]}]});
462        let result = expr.search(&data).unwrap();
463        assert_eq!(result, json!([1, 2, 3, 4]));
464    }
465
466    #[test]
467    fn test_let_with_slice() {
468        let expr = compile("let $arr = numbers in $arr[1:3]").unwrap();
469        let data = json!({"numbers": [0, 1, 2, 3, 4]});
470        let result = expr.search(&data).unwrap();
471        assert_eq!(result, json!([1, 2]));
472    }
473
474    #[test]
475    fn test_let_with_or_expression() {
476        let expr = compile("let $default = 'N/A' in name || $default").unwrap();
477        let data = json!({});
478        let result = expr.search(&data).unwrap();
479        assert_eq!(result, json!("N/A"));
480    }
481
482    #[test]
483    fn test_let_with_not_expression() {
484        let expr = compile("let $val = `false` in !$val").unwrap();
485        let data = json!({});
486        let result = expr.search(&data).unwrap();
487        assert_eq!(result, json!(true));
488    }
489
490    #[test]
491    fn test_let_binding_to_literal() {
492        let expr = compile(
493            "let $str = 'hello', $num = `42`, $bool = `true`, $null = `null` in [$str, $num, $bool, $null]",
494        )
495        .unwrap();
496        let data = json!({});
497        let result = expr.search(&data).unwrap();
498        assert_eq!(result, json!(["hello", 42, true, null]));
499    }
500}