Skip to main content

jsonata_core/
lib.rs

1// jsonatapy - High-performance Python implementation of JSONata
2// Copyright (c) 2025 jsonatapy contributors
3// Licensed under the MIT License
4
5//! # jsonatapy
6//!
7//! A high-performance Rust implementation of JSONata - the JSON query and
8//! transformation language - with optional Python bindings via PyO3.
9//!
10//! ## Rust API
11//!
12//! ```rust,ignore
13//! use jsonata_core::parser;
14//! use jsonata_core::evaluator::Evaluator;
15//! use jsonata_core::value::JValue;
16//!
17//! let ast = parser::parse("user.name").unwrap();
18//! let data = JValue::from_json_str(r#"{"user":{"name":"Alice"}}"#).unwrap();
19//! let result = Evaluator::new().evaluate(&ast, &data).unwrap();
20//! ```
21//!
22//! ## Architecture
23//!
24//! - `parser` - Expression parser (converts JSONata strings to AST)
25//! - `evaluator` - Expression evaluator (executes AST against data)
26//! - `functions` - Built-in function implementations
27//! - `datetime` - Date/time handling functions
28//! - `signature` - Function signature validation
29//! - `ast` - Abstract Syntax Tree definitions
30//! - `value` - JValue type (the runtime value representation)
31
32pub mod ast;
33mod compiler;
34mod datetime;
35pub mod evaluator;
36pub mod functions;
37pub mod parser;
38mod signature;
39pub mod value;
40mod vm;
41
42// ── Benchmarking facade (only when the "bench" feature is enabled) ────────────
43//
44// Exposes the compiler/VM pipeline for Criterion benchmarks without making
45// the internals part of the permanent public API.
46
47/// Internal benchmarking API — do not use in production code.
48///
49/// Enabled with `--features bench`. Provides access to the bytecode compiler
50/// and VM so that Criterion benchmarks can measure tree-walker vs VM directly.
51#[cfg(feature = "bench")]
52pub mod _bench {
53    use crate::ast::AstNode;
54    pub use crate::evaluator::EvaluatorError;
55    use crate::value::JValue;
56
57    /// An opaque handle to a compiled bytecode program.
58    pub struct CompiledProgram(crate::vm::BytecodeProgram);
59
60    /// Compile an AST node to bytecode.
61    ///
62    /// Returns `None` if the expression contains constructs the compiler
63    /// doesn't handle (e.g. wildcards, `$eval`, higher-order functions at
64    /// the top level). In that case, fall back to `Evaluator::new().evaluate()`.
65    pub fn compile(ast: &AstNode) -> Option<CompiledProgram> {
66        crate::evaluator::try_compile_expr(ast)
67            .map(|ce| CompiledProgram(crate::compiler::BytecodeCompiler::compile(&ce)))
68    }
69
70    /// Execute a compiled program against `data`.
71    pub fn run(prog: &CompiledProgram, data: &JValue) -> Result<JValue, EvaluatorError> {
72        crate::vm::Vm::new(&prog.0).run(data, None)
73    }
74}
75
76// ── Python bindings (only when the "python" feature is enabled) ───────────────
77
78#[cfg(feature = "python")]
79use crate::value::JValue;
80#[cfg(feature = "python")]
81use indexmap::IndexMap;
82#[cfg(feature = "python")]
83use pyo3::exceptions::{PyTypeError, PyValueError};
84#[cfg(feature = "python")]
85use pyo3::prelude::*;
86#[cfg(feature = "python")]
87use pyo3::types::{PyBool, PyDict, PyFloat, PyInt, PyList, PyString};
88
89/// Pre-converted data handle for efficient repeated evaluation.
90///
91/// Convert Python data to an internal representation once, then reuse it
92/// across multiple evaluations to avoid repeated Python↔Rust conversion overhead.
93///
94/// # Examples
95///
96/// ```python
97/// import jsonatapy
98///
99/// data = jsonatapy.JsonataData({"orders": [{"price": 150}, {"price": 50}]})
100/// expr = jsonatapy.compile("orders[price > 100]")
101/// result = expr.evaluate_with_data(data)
102/// ```
103#[cfg(feature = "python")]
104#[pyclass(unsendable)]
105struct JsonataData {
106    data: JValue,
107}
108
109#[cfg(feature = "python")]
110#[pymethods]
111impl JsonataData {
112    /// Create from a Python object (dict, list, etc.)
113    #[new]
114    fn new(py: Python, data: Py<PyAny>) -> PyResult<Self> {
115        let jvalue = python_to_json(py, &data)?;
116        Ok(JsonataData { data: jvalue })
117    }
118
119    /// Create from a JSON string (fastest path).
120    #[staticmethod]
121    fn from_json(json_str: &str) -> PyResult<Self> {
122        let data = JValue::from_json_str(json_str)
123            .map_err(|e| PyValueError::new_err(format!("Invalid JSON: {}", e)))?;
124        Ok(JsonataData { data })
125    }
126}
127
128/// A compiled JSONata expression that can be evaluated against data.
129///
130/// This is the main entry point for using JSONata. Compile an expression once,
131/// then evaluate it multiple times against different data.
132///
133/// # Examples
134///
135/// ```python
136/// import jsonatapy
137///
138/// # Compile once
139/// expr = jsonatapy.compile("orders[price > 100].product")
140///
141/// # Evaluate many times
142/// data1 = {"orders": [{"product": "A", "price": 150}]}
143/// result1 = expr.evaluate(data1)
144///
145/// data2 = {"orders": [{"product": "B", "price": 50}]}
146/// result2 = expr.evaluate(data2)
147/// ```
148#[cfg(feature = "python")]
149#[pyclass(unsendable)]
150struct JsonataExpression {
151    /// The parsed Abstract Syntax Tree
152    ast: ast::AstNode,
153    /// Lazily compiled bytecode — populated on first evaluate() call.
154    /// `Some(bc)` = fast VM path; `None` = must use tree-walker.
155    /// `OnceCell` ensures compilation happens at most once per expression instance.
156    bytecode: std::cell::OnceCell<Option<vm::BytecodeProgram>>,
157}
158
159#[cfg(feature = "python")]
160#[pymethods]
161impl JsonataExpression {
162    /// Evaluate this expression against the provided data.
163    ///
164    /// # Arguments
165    ///
166    /// * `data` - A Python object (typically dict) to query/transform
167    /// * `bindings` - Optional additional variable bindings
168    ///
169    /// # Returns
170    ///
171    /// The result of evaluating the expression
172    ///
173    /// # Errors
174    ///
175    /// Returns ValueError if evaluation fails
176    #[pyo3(signature = (data, bindings=None))]
177    fn evaluate(
178        &self,
179        py: Python,
180        data: Py<PyAny>,
181        bindings: Option<Py<PyAny>>,
182    ) -> PyResult<Py<PyAny>> {
183        let json_data = python_to_json(py, &data)?;
184        let result = if bindings.is_none() {
185            let bytecode = self.bytecode.get_or_init(|| {
186                evaluator::try_compile_expr(&self.ast)
187                    .map(|ce| compiler::BytecodeCompiler::compile(&ce))
188            });
189            if let Some(bc) = bytecode {
190                vm::Vm::new(bc)
191                    .run(&json_data, None)
192                    .map_err(evaluator_error_to_py)?
193            } else {
194                let mut ev = evaluator::Evaluator::new();
195                ev.evaluate(&self.ast, &json_data)
196                    .map_err(evaluator_error_to_py)?
197            }
198        } else {
199            let mut ev = create_evaluator(py, bindings)?;
200            ev.evaluate(&self.ast, &json_data)
201                .map_err(evaluator_error_to_py)?
202        };
203        json_to_python(py, &result)
204    }
205
206    /// Evaluate with a pre-converted data handle (fastest for repeated evaluation).
207    ///
208    /// # Arguments
209    ///
210    /// * `data` - A JsonataData handle (pre-converted from Python to internal format)
211    /// * `bindings` - Optional additional variable bindings
212    ///
213    /// # Returns
214    ///
215    /// The result of evaluating the expression
216    #[pyo3(signature = (data, bindings=None))]
217    fn evaluate_with_data(
218        &self,
219        py: Python,
220        data: &JsonataData,
221        bindings: Option<Py<PyAny>>,
222    ) -> PyResult<Py<PyAny>> {
223        let result = if bindings.is_none() {
224            let bytecode = self.bytecode.get_or_init(|| {
225                evaluator::try_compile_expr(&self.ast)
226                    .map(|ce| compiler::BytecodeCompiler::compile(&ce))
227            });
228            if let Some(bc) = bytecode {
229                vm::Vm::new(bc)
230                    .run(&data.data, None)
231                    .map_err(evaluator_error_to_py)?
232            } else {
233                let mut ev = evaluator::Evaluator::new();
234                ev.evaluate(&self.ast, &data.data)
235                    .map_err(evaluator_error_to_py)?
236            }
237        } else {
238            let mut ev = create_evaluator(py, bindings)?;
239            ev.evaluate(&self.ast, &data.data)
240                .map_err(evaluator_error_to_py)?
241        };
242        json_to_python(py, &result)
243    }
244
245    /// Evaluate with a pre-converted data handle, return JSON string (zero-overhead output).
246    ///
247    /// # Arguments
248    ///
249    /// * `data` - A JsonataData handle (pre-converted from Python to internal format)
250    /// * `bindings` - Optional additional variable bindings
251    ///
252    /// # Returns
253    ///
254    /// The result as a JSON string
255    #[pyo3(signature = (data, bindings=None))]
256    fn evaluate_data_to_json(
257        &self,
258        py: Python,
259        data: &JsonataData,
260        bindings: Option<Py<PyAny>>,
261    ) -> PyResult<String> {
262        let result = if bindings.is_none() {
263            let bytecode = self.bytecode.get_or_init(|| {
264                evaluator::try_compile_expr(&self.ast)
265                    .map(|ce| compiler::BytecodeCompiler::compile(&ce))
266            });
267            if let Some(bc) = bytecode {
268                vm::Vm::new(bc)
269                    .run(&data.data, None)
270                    .map_err(evaluator_error_to_py)?
271            } else {
272                let mut ev = evaluator::Evaluator::new();
273                ev.evaluate(&self.ast, &data.data)
274                    .map_err(evaluator_error_to_py)?
275            }
276        } else {
277            let mut ev = create_evaluator(py, bindings)?;
278            ev.evaluate(&self.ast, &data.data)
279                .map_err(evaluator_error_to_py)?
280        };
281        result
282            .to_json_string()
283            .map_err(|e| PyValueError::new_err(format!("Failed to serialize result: {}", e)))
284    }
285
286    /// Evaluate the expression with JSON string input/output (faster for large data).
287    ///
288    /// This method avoids Python↔Rust conversion overhead by accepting and returning
289    /// JSON strings directly. This is significantly faster for large datasets.
290    ///
291    /// # Arguments
292    ///
293    /// * `json_str` - Input data as a JSON string
294    /// * `bindings` - Optional dict of variable bindings (default: None)
295    ///
296    /// # Returns
297    ///
298    /// The result as a JSON string
299    ///
300    /// # Errors
301    ///
302    /// Returns ValueError if JSON parsing or evaluation fails
303    #[pyo3(signature = (json_str, bindings=None))]
304    fn evaluate_json(
305        &self,
306        py: Python,
307        json_str: &str,
308        bindings: Option<Py<PyAny>>,
309    ) -> PyResult<String> {
310        let json_data = JValue::from_json_str(json_str)
311            .map_err(|e| PyValueError::new_err(format!("Invalid JSON: {}", e)))?;
312        let result = if bindings.is_none() {
313            let bytecode = self.bytecode.get_or_init(|| {
314                evaluator::try_compile_expr(&self.ast)
315                    .map(|ce| compiler::BytecodeCompiler::compile(&ce))
316            });
317            if let Some(bc) = bytecode {
318                vm::Vm::new(bc)
319                    .run(&json_data, None)
320                    .map_err(evaluator_error_to_py)?
321            } else {
322                let mut ev = evaluator::Evaluator::new();
323                ev.evaluate(&self.ast, &json_data)
324                    .map_err(evaluator_error_to_py)?
325            }
326        } else {
327            let mut ev = create_evaluator(py, bindings)?;
328            ev.evaluate(&self.ast, &json_data)
329                .map_err(evaluator_error_to_py)?
330        };
331        result
332            .to_json_string()
333            .map_err(|e| PyValueError::new_err(format!("Failed to serialize result: {}", e)))
334    }
335}
336
337/// Compile a JSONata expression into an executable form.
338///
339/// # Arguments
340///
341/// * `expression` - A JSONata query/transformation expression string
342///
343/// # Returns
344///
345/// A compiled JsonataExpression that can be evaluated
346///
347/// # Errors
348///
349/// Returns ValueError if the expression cannot be parsed
350///
351/// # Examples
352///
353/// ```python
354/// import jsonatapy
355///
356/// expr = jsonatapy.compile("$.name")
357/// result = expr.evaluate({"name": "Alice"})
358/// print(result)  # "Alice"
359/// ```
360#[cfg(feature = "python")]
361#[pyfunction]
362fn compile(expression: &str) -> PyResult<JsonataExpression> {
363    let ast = parser::parse(expression)
364        .map_err(|e| PyValueError::new_err(format!("Parse error: {}", e)))?;
365
366    Ok(JsonataExpression {
367        ast,
368        bytecode: std::cell::OnceCell::new(),
369    })
370}
371
372/// Evaluate a JSONata expression against data in one step.
373///
374/// This is a convenience function that compiles and evaluates in one call.
375/// For repeated evaluations of the same expression, use `compile()` instead.
376///
377/// # Arguments
378///
379/// * `expression` - A JSONata query/transformation expression string
380/// * `data` - A Python object (typically dict) to query/transform
381/// * `bindings` - Optional additional variable bindings
382///
383/// # Returns
384///
385/// The result of evaluating the expression
386///
387/// # Errors
388///
389/// Returns ValueError if parsing or evaluation fails
390///
391/// # Examples
392///
393/// ```python
394/// import jsonatapy
395///
396/// result = jsonatapy.evaluate("$uppercase(name)", {"name": "alice"})
397/// print(result)  # "ALICE"
398/// ```
399#[cfg(feature = "python")]
400#[pyfunction]
401#[pyo3(signature = (expression, data, bindings=None))]
402fn evaluate(
403    py: Python,
404    expression: &str,
405    data: Py<PyAny>,
406    bindings: Option<Py<PyAny>>,
407) -> PyResult<Py<PyAny>> {
408    let expr = compile(expression)?;
409    expr.evaluate(py, data, bindings)
410}
411
412/// Convert a Python object to a JValue.
413///
414/// Handles conversion of Python types:
415/// - None -> Null
416/// - bool -> Bool (checked before int since bool is a subclass of int)
417/// - int, float -> Number
418/// - str -> String
419/// - list -> Array
420/// - dict -> Object
421#[cfg(feature = "python")]
422fn python_to_json(py: Python, obj: &Py<PyAny>) -> PyResult<JValue> {
423    python_to_json_bound(obj.bind(py))
424}
425
426/// Inner conversion using Bound API for zero-overhead type checks.
427///
428/// Uses is_instance_of::<T>() which compiles to C-level type pointer comparisons
429/// (PyBool_Check, PyLong_Check, etc.) — single pointer comparison vs qualname()
430/// which allocates a Python string and does string comparison.
431#[cfg(feature = "python")]
432fn python_to_json_bound(obj: &Bound<'_, PyAny>) -> PyResult<JValue> {
433    if obj.is_none() {
434        return Ok(JValue::Null);
435    }
436
437    // Check bool before int — Python bool is a subclass of int
438    if obj.is_instance_of::<PyBool>() {
439        return Ok(JValue::Bool(obj.extract::<bool>()?));
440    }
441    if obj.is_instance_of::<PyInt>() {
442        return Ok(JValue::Number(obj.extract::<i64>()? as f64));
443    }
444    if obj.is_instance_of::<PyFloat>() {
445        return Ok(JValue::Number(obj.extract::<f64>()?));
446    }
447    if obj.is_instance_of::<PyString>() {
448        return Ok(JValue::string(obj.extract::<String>()?));
449    }
450    if let Ok(list) = obj.cast_exact::<PyList>() {
451        let mut result = Vec::with_capacity(list.len());
452        for item in list.iter() {
453            result.push(python_to_json_bound(&item)?);
454        }
455        return Ok(JValue::array(result));
456    }
457    if let Ok(dict) = obj.cast_exact::<PyDict>() {
458        let mut result = IndexMap::with_capacity(dict.len());
459        for (key, value) in dict.iter() {
460            let key_str = key.extract::<String>()?;
461            result.insert(key_str, python_to_json_bound(&value)?);
462        }
463        return Ok(JValue::object(result));
464    }
465
466    // Fallback for subclasses, numpy types, etc.
467    if let Ok(b) = obj.extract::<bool>() {
468        return Ok(JValue::Bool(b));
469    }
470    if let Ok(i) = obj.extract::<i64>() {
471        return Ok(JValue::Number(i as f64));
472    }
473    if let Ok(f) = obj.extract::<f64>() {
474        return Ok(JValue::Number(f));
475    }
476    if let Ok(s) = obj.extract::<String>() {
477        return Ok(JValue::string(s));
478    }
479
480    Err(PyTypeError::new_err(format!(
481        "Cannot convert Python object to JSON: {}",
482        obj.get_type().name()?
483    )))
484}
485
486/// Convert a JValue to a Python object.
487///
488/// Handles conversion of JValue variants to Python types:
489/// - Null/Undefined -> None
490/// - Bool -> bool
491/// - Number -> int (if whole number) or float
492/// - String -> str
493/// - Array -> list (batch-constructed via PyList::new for fewer C API calls)
494/// - Object -> dict
495/// - Lambda/Builtin/Regex -> None
496#[cfg(feature = "python")]
497fn json_to_python(py: Python, value: &JValue) -> PyResult<Py<PyAny>> {
498    match value {
499        JValue::Null | JValue::Undefined => Ok(py.None()),
500
501        JValue::Bool(b) => Ok(b.into_pyobject(py).unwrap().to_owned().into_any().unbind()),
502
503        JValue::Number(n) => {
504            // If it's a whole number that fits in i64, return as Python int
505            if n.fract() == 0.0 && n.is_finite() && *n >= i64::MIN as f64 && *n <= i64::MAX as f64 {
506                Ok((*n as i64).into_pyobject(py).unwrap().into_any().unbind())
507            } else {
508                Ok(n.into_pyobject(py).unwrap().into_any().unbind())
509            }
510        }
511
512        JValue::String(s) => Ok((&**s).into_pyobject(py).unwrap().into_any().unbind()),
513
514        JValue::Array(arr) => {
515            // Array of objects with shared keys: intern first object's keys as
516            // Python strings to avoid repeated UTF-8 -> PyString conversion.
517            let all_objects =
518                arr.len() >= 2 && arr.iter().all(|item| matches!(item, JValue::Object(_)));
519            if all_objects {
520                let first_obj = match arr.first() {
521                    Some(JValue::Object(obj)) => obj,
522                    _ => unreachable!("all_objects guard ensures first element is an object"),
523                };
524
525                // Intern keys: store (&str, Py<PyString>) — no String clone needed
526                // since first_obj borrows from arr which outlives this block
527                let interned_keys: Vec<(&str, Py<PyString>)> = first_obj
528                    .keys()
529                    .map(|k| (k.as_str(), PyString::new(py, k).unbind()))
530                    .collect();
531
532                let items: Vec<Py<PyAny>> = arr
533                    .iter()
534                    .map(|item| {
535                        // Safe to unwrap: all_objects guarantees every element is Object
536                        let obj = match item {
537                            JValue::Object(obj) => obj,
538                            _ => unreachable!(),
539                        };
540                        let dict = PyDict::new(py);
541                        for (key_str, py_key) in &interned_keys {
542                            if let Some(value) = obj.get(*key_str) {
543                                dict.set_item(py_key.bind(py), json_to_python(py, value)?)?;
544                            }
545                        }
546                        // Handle any extra keys not in first object
547                        for (key, value) in obj.iter() {
548                            if !first_obj.contains_key(key) {
549                                dict.set_item(key, json_to_python(py, value)?)?;
550                            }
551                        }
552                        Ok(dict.unbind().into())
553                    })
554                    .collect::<PyResult<Vec<_>>>()?;
555                return Ok(PyList::new(py, &items)?.unbind().into());
556            }
557
558            // General array: batch construction
559            let items: Vec<Py<PyAny>> = arr
560                .iter()
561                .map(|item| json_to_python(py, item))
562                .collect::<PyResult<Vec<_>>>()?;
563            Ok(PyList::new(py, &items)?.unbind().into())
564        }
565
566        JValue::Object(obj) => {
567            let dict = PyDict::new(py);
568            for (key, value) in obj.iter() {
569                dict.set_item(key, json_to_python(py, value)?)?;
570            }
571            Ok(dict.unbind().into())
572        }
573
574        JValue::Lambda { .. } | JValue::Builtin { .. } | JValue::Regex { .. } => Ok(py.None()),
575    }
576}
577
578/// Create an evaluator, optionally configured with Python bindings
579#[cfg(feature = "python")]
580fn create_evaluator(py: Python, bindings: Option<Py<PyAny>>) -> PyResult<evaluator::Evaluator> {
581    if let Some(bindings_obj) = bindings {
582        let bindings_json = python_to_json(py, &bindings_obj)?;
583
584        let mut context = evaluator::Context::new();
585        if let JValue::Object(map) = bindings_json {
586            for (key, value) in map.iter() {
587                context.bind(key.clone(), value.clone());
588            }
589        } else {
590            return Err(PyTypeError::new_err("bindings must be a dictionary"));
591        }
592        Ok(evaluator::Evaluator::with_context(context))
593    } else {
594        Ok(evaluator::Evaluator::new())
595    }
596}
597
598/// Convert an EvaluatorError to a PyErr
599#[cfg(feature = "python")]
600fn evaluator_error_to_py(e: evaluator::EvaluatorError) -> PyErr {
601    match e {
602        evaluator::EvaluatorError::TypeError(msg) => PyValueError::new_err(msg),
603        evaluator::EvaluatorError::ReferenceError(msg) => PyValueError::new_err(msg),
604        evaluator::EvaluatorError::EvaluationError(msg) => PyValueError::new_err(msg),
605    }
606}
607
608/// JSONata Python module
609#[cfg(feature = "python")]
610#[pymodule]
611fn _jsonatapy(m: &Bound<'_, PyModule>) -> PyResult<()> {
612    m.add_function(wrap_pyfunction!(compile, m)?)?;
613    m.add_function(wrap_pyfunction!(evaluate, m)?)?;
614    m.add_class::<JsonataExpression>()?;
615    m.add_class::<JsonataData>()?;
616
617    // Add version info
618    m.add("__version__", env!("CARGO_PKG_VERSION"))?;
619    m.add("__jsonata_version__", "2.1.0")?; // Reference implementation version
620
621    Ok(())
622}
623
624#[cfg(test)]
625mod tests {
626    #[test]
627    fn test_module_creation() {
628        // Basic smoke test
629        assert_eq!(env!("CARGO_PKG_VERSION"), "2.1.2");
630    }
631}