Skip to main content

jsonata_core/
lib.rs

1// jsonatapy - High-performance Python implementation of JSONata
2// Copyright (c) 2025 jsonatapy contributors
3// Licensed under the MIT License
4
5//! # jsonatapy
6//!
7//! A high-performance Rust implementation of JSONata - the JSON query and
8//! transformation language - with optional Python bindings via PyO3.
9//!
10//! ## Rust API
11//!
12//! ```rust,ignore
13//! use jsonata_core::parser;
14//! use jsonata_core::evaluator::Evaluator;
15//! use jsonata_core::value::JValue;
16//!
17//! let ast = parser::parse("user.name").unwrap();
18//! let data = JValue::from_json_str(r#"{"user":{"name":"Alice"}}"#).unwrap();
19//! let result = Evaluator::new().evaluate(&ast, &data).unwrap();
20//! ```
21//!
22//! ## Architecture
23//!
24//! - `parser` - Expression parser (converts JSONata strings to AST)
25//! - `evaluator` - Expression evaluator (executes AST against data)
26//! - `functions` - Built-in function implementations
27//! - `datetime` - Date/time handling functions
28//! - `signature` - Function signature validation
29//! - `ast` - Abstract Syntax Tree definitions
30//! - `value` - JValue type (the runtime value representation)
31
32pub mod ast;
33mod compiler;
34mod datetime;
35pub mod evaluator;
36pub mod functions;
37pub mod parser;
38mod signature;
39pub mod value;
40mod vm;
41
42// ── Benchmarking facade (only when the "bench" feature is enabled) ────────────
43//
44// Exposes the compiler/VM pipeline for Criterion benchmarks without making
45// the internals part of the permanent public API.
46
47/// Internal benchmarking API — do not use in production code.
48///
49/// Enabled with `--features bench`. Provides access to the bytecode compiler
50/// and VM so that Criterion benchmarks can measure tree-walker vs VM directly.
51#[cfg(feature = "bench")]
52pub mod _bench {
53    use crate::ast::AstNode;
54    pub use crate::evaluator::EvaluatorError;
55    use crate::value::JValue;
56
57    /// An opaque handle to a compiled bytecode program.
58    pub struct CompiledProgram(crate::vm::BytecodeProgram);
59
60    /// Compile an AST node to bytecode.
61    ///
62    /// Returns `None` if the expression contains constructs the compiler
63    /// doesn't handle (e.g. wildcards, `$eval`, higher-order functions at
64    /// the top level). In that case, fall back to `Evaluator::new().evaluate()`.
65    pub fn compile(ast: &AstNode) -> Option<CompiledProgram> {
66        crate::evaluator::try_compile_expr(ast)
67            .map(|ce| CompiledProgram(crate::compiler::BytecodeCompiler::compile(&ce)))
68    }
69
70    /// Execute a compiled program against `data`.
71    pub fn run(prog: &CompiledProgram, data: &JValue) -> Result<JValue, EvaluatorError> {
72        crate::vm::Vm::new(&prog.0).run(data, None)
73    }
74}
75
76// ── Python bindings (only when the "python" feature is enabled) ───────────────
77
78/// The JSONata reference implementation version this library targets.
79const JSONATA_REFERENCE_VERSION: &str = "2.1.0";
80
81#[cfg(feature = "python")]
82use crate::value::JValue;
83#[cfg(feature = "python")]
84use indexmap::IndexMap;
85#[cfg(feature = "python")]
86use pyo3::exceptions::{PyTypeError, PyValueError};
87#[cfg(feature = "python")]
88use pyo3::prelude::*;
89#[cfg(feature = "python")]
90use pyo3::types::{PyBool, PyDict, PyFloat, PyInt, PyList, PyString};
91
92/// Pre-converted data handle for efficient repeated evaluation.
93///
94/// Convert Python data to an internal representation once, then reuse it
95/// across multiple evaluations to avoid repeated Python↔Rust conversion overhead.
96///
97/// # Examples
98///
99/// ```python
100/// import jsonatapy
101///
102/// data = jsonatapy.JsonataData({"orders": [{"price": 150}, {"price": 50}]})
103/// expr = jsonatapy.compile("orders[price > 100]")
104/// result = expr.evaluate_with_data(data)
105/// ```
106#[cfg(feature = "python")]
107#[pyclass(unsendable)]
108struct JsonataData {
109    data: JValue,
110}
111
112#[cfg(feature = "python")]
113#[pymethods]
114impl JsonataData {
115    /// Create from a Python object (dict, list, etc.)
116    #[new]
117    fn new(py: Python, data: Py<PyAny>) -> PyResult<Self> {
118        let jvalue = python_to_json(py, &data)?;
119        Ok(JsonataData { data: jvalue })
120    }
121
122    /// Create from a JSON string (fastest path).
123    #[staticmethod]
124    fn from_json(json_str: &str) -> PyResult<Self> {
125        let data = JValue::from_json_str(json_str)
126            .map_err(|e| PyValueError::new_err(format!("Invalid JSON: {}", e)))?;
127        Ok(JsonataData { data })
128    }
129}
130
131/// A compiled JSONata expression that can be evaluated against data.
132///
133/// This is the main entry point for using JSONata. Compile an expression once,
134/// then evaluate it multiple times against different data.
135///
136/// # Examples
137///
138/// ```python
139/// import jsonatapy
140///
141/// # Compile once
142/// expr = jsonatapy.compile("orders[price > 100].product")
143///
144/// # Evaluate many times
145/// data1 = {"orders": [{"product": "A", "price": 150}]}
146/// result1 = expr.evaluate(data1)
147///
148/// data2 = {"orders": [{"product": "B", "price": 50}]}
149/// result2 = expr.evaluate(data2)
150/// ```
151#[cfg(feature = "python")]
152#[pyclass(unsendable)]
153struct JsonataExpression {
154    /// The parsed Abstract Syntax Tree
155    ast: ast::AstNode,
156    /// Lazily compiled bytecode — populated on first evaluate() call.
157    /// `Some(bc)` = fast VM path; `None` = must use tree-walker.
158    /// `OnceCell` ensures compilation happens at most once per expression instance.
159    bytecode: std::cell::OnceCell<Option<vm::BytecodeProgram>>,
160}
161
162#[cfg(feature = "python")]
163impl JsonataExpression {
164    /// Evaluate the compiled expression against pre-converted data.
165    /// Uses bytecode VM when available, falls back to tree-walker.
166    fn run_eval(&self, py: Python, data: &JValue, bindings: Option<Py<PyAny>>) -> PyResult<JValue> {
167        if bindings.is_none() {
168            let bytecode = self.bytecode.get_or_init(|| {
169                evaluator::try_compile_expr(&self.ast)
170                    .map(|ce| compiler::BytecodeCompiler::compile(&ce))
171            });
172            if let Some(bc) = bytecode {
173                vm::Vm::new(bc)
174                    .run(data, None)
175                    .map_err(evaluator_error_to_py)
176            } else {
177                let mut ev = evaluator::Evaluator::new();
178                ev.evaluate(&self.ast, data).map_err(evaluator_error_to_py)
179            }
180        } else {
181            let mut ev = create_evaluator(py, bindings)?;
182            ev.evaluate(&self.ast, data).map_err(evaluator_error_to_py)
183        }
184    }
185}
186
187#[cfg(feature = "python")]
188#[pymethods]
189impl JsonataExpression {
190    /// Returns ValueError if evaluation fails
191    #[pyo3(signature = (data, bindings=None))]
192    fn evaluate(
193        &self,
194        py: Python,
195        data: Py<PyAny>,
196        bindings: Option<Py<PyAny>>,
197    ) -> PyResult<Py<PyAny>> {
198        let json_data = python_to_json(py, &data)?;
199        json_to_python(py, &self.run_eval(py, &json_data, bindings)?)
200    }
201
202    /// Evaluate with a pre-converted data handle (fastest for repeated evaluation).
203    ///
204    /// # Arguments
205    ///
206    /// * `data` - A JsonataData handle (pre-converted from Python to internal format)
207    /// * `bindings` - Optional additional variable bindings
208    ///
209    /// # Returns
210    ///
211    /// The result of evaluating the expression
212    #[pyo3(signature = (data, bindings=None))]
213    fn evaluate_with_data(
214        &self,
215        py: Python,
216        data: &JsonataData,
217        bindings: Option<Py<PyAny>>,
218    ) -> PyResult<Py<PyAny>> {
219        json_to_python(py, &self.run_eval(py, &data.data, bindings)?)
220    }
221
222    /// Evaluate with a pre-converted data handle, return JSON string (zero-overhead output).
223    ///
224    /// # Arguments
225    ///
226    /// * `data` - A JsonataData handle (pre-converted from Python to internal format)
227    /// * `bindings` - Optional additional variable bindings
228    ///
229    /// # Returns
230    ///
231    /// The result as a JSON string
232    #[pyo3(signature = (data, bindings=None))]
233    fn evaluate_data_to_json(
234        &self,
235        py: Python,
236        data: &JsonataData,
237        bindings: Option<Py<PyAny>>,
238    ) -> PyResult<String> {
239        self.run_eval(py, &data.data, bindings)?
240            .to_json_string()
241            .map_err(|e| PyValueError::new_err(format!("Failed to serialize result: {}", e)))
242    }
243
244    /// Evaluate the expression with JSON string input/output (faster for large data).
245    ///
246    /// This method avoids Python↔Rust conversion overhead by accepting and returning
247    /// JSON strings directly. This is significantly faster for large datasets.
248    ///
249    /// # Arguments
250    ///
251    /// * `json_str` - Input data as a JSON string
252    /// * `bindings` - Optional dict of variable bindings (default: None)
253    ///
254    /// # Returns
255    ///
256    /// The result as a JSON string
257    ///
258    /// # Errors
259    ///
260    /// Returns ValueError if JSON parsing or evaluation fails
261    #[pyo3(signature = (json_str, bindings=None))]
262    fn evaluate_json(
263        &self,
264        py: Python,
265        json_str: &str,
266        bindings: Option<Py<PyAny>>,
267    ) -> PyResult<String> {
268        let json_data = JValue::from_json_str(json_str)
269            .map_err(|e| PyValueError::new_err(format!("Invalid JSON: {}", e)))?;
270        self.run_eval(py, &json_data, bindings)?
271            .to_json_string()
272            .map_err(|e| PyValueError::new_err(format!("Failed to serialize result: {}", e)))
273    }
274}
275
276/// Compile a JSONata expression into an executable form.
277///
278/// # Arguments
279///
280/// * `expression` - A JSONata query/transformation expression string
281///
282/// # Returns
283///
284/// A compiled JsonataExpression that can be evaluated
285///
286/// # Errors
287///
288/// Returns ValueError if the expression cannot be parsed
289///
290/// # Examples
291///
292/// ```python
293/// import jsonatapy
294///
295/// expr = jsonatapy.compile("$.name")
296/// result = expr.evaluate({"name": "Alice"})
297/// print(result)  # "Alice"
298/// ```
299#[cfg(feature = "python")]
300#[pyfunction]
301fn compile(expression: &str) -> PyResult<JsonataExpression> {
302    let ast = parser::parse(expression)
303        .map_err(|e| PyValueError::new_err(format!("Parse error: {}", e)))?;
304
305    Ok(JsonataExpression {
306        ast,
307        bytecode: std::cell::OnceCell::new(),
308    })
309}
310
311/// Evaluate a JSONata expression against data in one step.
312///
313/// This is a convenience function that compiles and evaluates in one call.
314/// For repeated evaluations of the same expression, use `compile()` instead.
315///
316/// # Arguments
317///
318/// * `expression` - A JSONata query/transformation expression string
319/// * `data` - A Python object (typically dict) to query/transform
320/// * `bindings` - Optional additional variable bindings
321///
322/// # Returns
323///
324/// The result of evaluating the expression
325///
326/// # Errors
327///
328/// Returns ValueError if parsing or evaluation fails
329///
330/// # Examples
331///
332/// ```python
333/// import jsonatapy
334///
335/// result = jsonatapy.evaluate("$uppercase(name)", {"name": "alice"})
336/// print(result)  # "ALICE"
337/// ```
338#[cfg(feature = "python")]
339#[pyfunction]
340#[pyo3(signature = (expression, data, bindings=None))]
341fn evaluate(
342    py: Python,
343    expression: &str,
344    data: Py<PyAny>,
345    bindings: Option<Py<PyAny>>,
346) -> PyResult<Py<PyAny>> {
347    let expr = compile(expression)?;
348    expr.evaluate(py, data, bindings)
349}
350
351/// Convert a Python object to a JValue.
352///
353/// Handles conversion of Python types:
354/// - None -> Null
355/// - bool -> Bool (checked before int since bool is a subclass of int)
356/// - int, float -> Number
357/// - str -> String
358/// - list -> Array
359/// - dict -> Object
360#[cfg(feature = "python")]
361fn python_to_json(py: Python, obj: &Py<PyAny>) -> PyResult<JValue> {
362    python_to_json_bound(obj.bind(py))
363}
364
365/// Inner conversion using Bound API for zero-overhead type checks.
366///
367/// Uses is_instance_of::<T>() which compiles to C-level type pointer comparisons
368/// (PyBool_Check, PyLong_Check, etc.) — single pointer comparison vs qualname()
369/// which allocates a Python string and does string comparison.
370#[cfg(feature = "python")]
371fn python_to_json_bound(obj: &Bound<'_, PyAny>) -> PyResult<JValue> {
372    if obj.is_none() {
373        return Ok(JValue::Null);
374    }
375
376    // Check bool before int — Python bool is a subclass of int
377    if obj.is_instance_of::<PyBool>() {
378        return Ok(JValue::Bool(obj.extract::<bool>()?));
379    }
380    if obj.is_instance_of::<PyInt>() {
381        return Ok(JValue::Number(obj.extract::<i64>()? as f64));
382    }
383    if obj.is_instance_of::<PyFloat>() {
384        return Ok(JValue::Number(obj.extract::<f64>()?));
385    }
386    if obj.is_instance_of::<PyString>() {
387        return Ok(JValue::string(obj.extract::<String>()?));
388    }
389    if let Ok(list) = obj.cast::<PyList>() {
390        let mut result = Vec::with_capacity(list.len());
391        for item in list.iter() {
392            result.push(python_to_json_bound(&item)?);
393        }
394        return Ok(JValue::array(result));
395    }
396    if let Ok(dict) = obj.cast::<PyDict>() {
397        let mut result = IndexMap::with_capacity(dict.len());
398        for (key, value) in dict.iter() {
399            let key_str = key.extract::<String>()?;
400            result.insert(key_str, python_to_json_bound(&value)?);
401        }
402        return Ok(JValue::object(result));
403    }
404
405    // Fallback for subclasses, numpy types, etc.
406    if let Ok(b) = obj.extract::<bool>() {
407        return Ok(JValue::Bool(b));
408    }
409    if let Ok(i) = obj.extract::<i64>() {
410        return Ok(JValue::Number(i as f64));
411    }
412    if let Ok(f) = obj.extract::<f64>() {
413        return Ok(JValue::Number(f));
414    }
415    if let Ok(s) = obj.extract::<String>() {
416        return Ok(JValue::string(s));
417    }
418
419    Err(PyTypeError::new_err(format!(
420        "Cannot convert Python object to JSON: {}",
421        obj.get_type().name()?
422    )))
423}
424
425/// Convert a JValue to a Python object.
426///
427/// Handles conversion of JValue variants to Python types:
428/// - Null/Undefined -> None
429/// - Bool -> bool
430/// - Number -> int (if whole number) or float
431/// - String -> str
432/// - Array -> list (batch-constructed via PyList::new for fewer C API calls)
433/// - Object -> dict
434/// - Lambda/Builtin/Regex -> None
435#[cfg(feature = "python")]
436fn json_to_python(py: Python, value: &JValue) -> PyResult<Py<PyAny>> {
437    match value {
438        JValue::Null | JValue::Undefined => Ok(py.None()),
439
440        JValue::Bool(b) => Ok(b.into_pyobject(py).unwrap().to_owned().into_any().unbind()),
441
442        JValue::Number(n) => {
443            // If it's a whole number that fits in i64, return as Python int
444            if n.fract() == 0.0 && n.is_finite() && *n >= i64::MIN as f64 && *n <= i64::MAX as f64 {
445                Ok((*n as i64).into_pyobject(py).unwrap().into_any().unbind())
446            } else {
447                Ok(n.into_pyobject(py).unwrap().into_any().unbind())
448            }
449        }
450
451        JValue::String(s) => Ok((&**s).into_pyobject(py).unwrap().into_any().unbind()),
452
453        JValue::Array(arr) => {
454            // Array of objects with shared keys: intern first object's keys as
455            // Python strings to avoid repeated UTF-8 -> PyString conversion.
456            let all_objects =
457                arr.len() >= 2 && arr.iter().all(|item| matches!(item, JValue::Object(_)));
458            if all_objects {
459                let first_obj = match arr.first() {
460                    Some(JValue::Object(obj)) => obj,
461                    _ => unreachable!("all_objects guard ensures first element is an object"),
462                };
463
464                // Intern keys: store (&str, Py<PyString>) — no String clone needed
465                // since first_obj borrows from arr which outlives this block
466                let interned_keys: Vec<(&str, Py<PyString>)> = first_obj
467                    .keys()
468                    .map(|k| (k.as_str(), PyString::new(py, k).unbind()))
469                    .collect();
470
471                let items: Vec<Py<PyAny>> = arr
472                    .iter()
473                    .map(|item| {
474                        // Safe to unwrap: all_objects guarantees every element is Object
475                        let obj = match item {
476                            JValue::Object(obj) => obj,
477                            _ => unreachable!(),
478                        };
479                        let dict = PyDict::new(py);
480                        for (key_str, py_key) in &interned_keys {
481                            if let Some(value) = obj.get(*key_str) {
482                                dict.set_item(py_key.bind(py), json_to_python(py, value)?)?;
483                            }
484                        }
485                        // Handle any extra keys not in first object
486                        for (key, value) in obj.iter() {
487                            if !first_obj.contains_key(key) {
488                                dict.set_item(key, json_to_python(py, value)?)?;
489                            }
490                        }
491                        Ok(dict.unbind().into())
492                    })
493                    .collect::<PyResult<Vec<_>>>()?;
494                return Ok(PyList::new(py, &items)?.unbind().into());
495            }
496
497            // General array: batch construction
498            let items: Vec<Py<PyAny>> = arr
499                .iter()
500                .map(|item| json_to_python(py, item))
501                .collect::<PyResult<Vec<_>>>()?;
502            Ok(PyList::new(py, &items)?.unbind().into())
503        }
504
505        JValue::Object(obj) => {
506            let dict = PyDict::new(py);
507            for (key, value) in obj.iter() {
508                dict.set_item(key, json_to_python(py, value)?)?;
509            }
510            Ok(dict.unbind().into())
511        }
512
513        JValue::Lambda { .. } | JValue::Builtin { .. } | JValue::Regex { .. } => Ok(py.None()),
514    }
515}
516
517/// Create an evaluator, optionally configured with Python bindings
518#[cfg(feature = "python")]
519fn create_evaluator(py: Python, bindings: Option<Py<PyAny>>) -> PyResult<evaluator::Evaluator> {
520    if let Some(bindings_obj) = bindings {
521        let bindings_json = python_to_json(py, &bindings_obj)?;
522
523        let mut context = evaluator::Context::new();
524        if let JValue::Object(map) = bindings_json {
525            for (key, value) in map.iter() {
526                context.bind(key.clone(), value.clone());
527            }
528        } else {
529            return Err(PyTypeError::new_err("bindings must be a dictionary"));
530        }
531        Ok(evaluator::Evaluator::with_context(context))
532    } else {
533        Ok(evaluator::Evaluator::new())
534    }
535}
536
537/// Convert an EvaluatorError to a PyErr
538#[cfg(feature = "python")]
539fn evaluator_error_to_py(e: evaluator::EvaluatorError) -> PyErr {
540    match e {
541        evaluator::EvaluatorError::TypeError(msg) => PyValueError::new_err(msg),
542        evaluator::EvaluatorError::ReferenceError(msg) => PyValueError::new_err(msg),
543        evaluator::EvaluatorError::EvaluationError(msg) => PyValueError::new_err(msg),
544    }
545}
546
547/// JSONata Python module
548#[cfg(feature = "python")]
549#[pymodule]
550fn _jsonatapy(m: &Bound<'_, PyModule>) -> PyResult<()> {
551    m.add_function(wrap_pyfunction!(compile, m)?)?;
552    m.add_function(wrap_pyfunction!(evaluate, m)?)?;
553    m.add_class::<JsonataExpression>()?;
554    m.add_class::<JsonataData>()?;
555
556    // Add version info
557    m.add("__version__", env!("CARGO_PKG_VERSION"))?;
558    m.add("__jsonata_version__", JSONATA_REFERENCE_VERSION)?;
559
560    Ok(())
561}
562
563#[cfg(test)]
564mod tests {
565    #[test]
566    fn test_module_creation() {
567        // Basic smoke test
568        assert!(!env!("CARGO_PKG_VERSION").is_empty());
569    }
570}