Skip to main content

jsonata_core/
lib.rs

1// jsonatapy - High-performance Python implementation of JSONata
2// Copyright (c) 2025 jsonatapy contributors
3// Licensed under the MIT License
4
5//! # jsonatapy
6//!
7//! A high-performance Rust implementation of JSONata - the JSON query and
8//! transformation language - with optional Python bindings via PyO3.
9//!
10//! ## Rust API
11//!
12//! ```rust,ignore
13//! use jsonata_core::parser;
14//! use jsonata_core::evaluator::Evaluator;
15//! use jsonata_core::value::JValue;
16//!
17//! let ast = parser::parse("user.name").unwrap();
18//! let data = JValue::from_json_str(r#"{"user":{"name":"Alice"}}"#).unwrap();
19//! let result = Evaluator::new().evaluate(&ast, &data).unwrap();
20//! ```
21//!
22//! ## Architecture
23//!
24//! - `parser` - Expression parser (converts JSONata strings to AST)
25//! - `evaluator` - Expression evaluator (executes AST against data)
26//! - `functions` - Built-in function implementations
27//! - `datetime` - Date/time handling functions
28//! - `signature` - Function signature validation
29//! - `ast` - Abstract Syntax Tree definitions
30//! - `value` - JValue type (the runtime value representation)
31
32pub mod ast;
33mod datetime;
34pub mod evaluator;
35pub mod functions;
36pub mod parser;
37mod signature;
38pub mod value;
39
40// ── Python bindings (only when the "python" feature is enabled) ───────────────
41
42#[cfg(feature = "python")]
43use crate::value::JValue;
44#[cfg(feature = "python")]
45use indexmap::IndexMap;
46#[cfg(feature = "python")]
47use pyo3::exceptions::{PyTypeError, PyValueError};
48#[cfg(feature = "python")]
49use pyo3::prelude::*;
50#[cfg(feature = "python")]
51use pyo3::types::{PyBool, PyDict, PyFloat, PyInt, PyList, PyString};
52
53/// Pre-converted data handle for efficient repeated evaluation.
54///
55/// Convert Python data to an internal representation once, then reuse it
56/// across multiple evaluations to avoid repeated Python↔Rust conversion overhead.
57///
58/// # Examples
59///
60/// ```python
61/// import jsonatapy
62///
63/// data = jsonatapy.JsonataData({"orders": [{"price": 150}, {"price": 50}]})
64/// expr = jsonatapy.compile("orders[price > 100]")
65/// result = expr.evaluate_with_data(data)
66/// ```
67#[cfg(feature = "python")]
68#[pyclass(unsendable)]
69struct JsonataData {
70    data: JValue,
71}
72
73#[cfg(feature = "python")]
74#[pymethods]
75impl JsonataData {
76    /// Create from a Python object (dict, list, etc.)
77    #[new]
78    fn new(py: Python, data: PyObject) -> PyResult<Self> {
79        let jvalue = python_to_json(py, &data)?;
80        Ok(JsonataData { data: jvalue })
81    }
82
83    /// Create from a JSON string (fastest path).
84    #[staticmethod]
85    fn from_json(json_str: &str) -> PyResult<Self> {
86        let data = JValue::from_json_str(json_str)
87            .map_err(|e| PyValueError::new_err(format!("Invalid JSON: {}", e)))?;
88        Ok(JsonataData { data })
89    }
90}
91
92/// A compiled JSONata expression that can be evaluated against data.
93///
94/// This is the main entry point for using JSONata. Compile an expression once,
95/// then evaluate it multiple times against different data.
96///
97/// # Examples
98///
99/// ```python
100/// import jsonatapy
101///
102/// # Compile once
103/// expr = jsonatapy.compile("orders[price > 100].product")
104///
105/// # Evaluate many times
106/// data1 = {"orders": [{"product": "A", "price": 150}]}
107/// result1 = expr.evaluate(data1)
108///
109/// data2 = {"orders": [{"product": "B", "price": 50}]}
110/// result2 = expr.evaluate(data2)
111/// ```
112#[cfg(feature = "python")]
113#[pyclass(unsendable)]
114struct JsonataExpression {
115    /// The parsed Abstract Syntax Tree
116    ast: ast::AstNode,
117    /// Lazily compiled bytecode — populated on first evaluate() call.
118    /// `Some(bc)` = compiled to bytecode; `None` = must use tree-walker.
119    /// `OnceCell` ensures compilation happens at most once per expression instance.
120    bytecode: std::cell::OnceCell<Option<vm::BytecodeProgram>>,
121}
122
123#[cfg(feature = "python")]
124#[pymethods]
125impl JsonataExpression {
126    /// Evaluate this expression against the provided data.
127    ///
128    /// # Arguments
129    ///
130    /// * `data` - A Python object (typically dict) to query/transform
131    /// * `bindings` - Optional additional variable bindings
132    ///
133    /// # Returns
134    ///
135    /// The result of evaluating the expression
136    ///
137    /// # Errors
138    ///
139    /// Returns ValueError if evaluation fails
140    #[pyo3(signature = (data, bindings=None))]
141    fn evaluate(
142        &self,
143        py: Python,
144        data: PyObject,
145        bindings: Option<PyObject>,
146    ) -> PyResult<PyObject> {
147        let json_data = python_to_json(py, &data)?;
148        let result = if bindings.is_none() {
149            let bytecode = self.bytecode.get_or_init(|| {
150                evaluator::try_compile_expr(&self.ast)
151                    .map(|ce| compiler::BytecodeCompiler::compile(&ce))
152            });
153            if let Some(bc) = bytecode {
154                vm::Vm::new(bc).run(&json_data, None).map_err(evaluator_error_to_py)?
155            } else {
156                let mut ev = evaluator::Evaluator::new();
157                ev.evaluate(&self.ast, &json_data)
158                    .map_err(evaluator_error_to_py)?
159            }
160        } else {
161            let mut ev = create_evaluator(py, bindings)?;
162            ev.evaluate(&self.ast, &json_data)
163                .map_err(evaluator_error_to_py)?
164        };
165        json_to_python(py, &result)
166    }
167
168    /// Evaluate with a pre-converted data handle (fastest for repeated evaluation).
169    ///
170    /// # Arguments
171    ///
172    /// * `data` - A JsonataData handle (pre-converted from Python to internal format)
173    /// * `bindings` - Optional additional variable bindings
174    ///
175    /// # Returns
176    ///
177    /// The result of evaluating the expression
178    #[pyo3(signature = (data, bindings=None))]
179    fn evaluate_with_data(
180        &self,
181        py: Python,
182        data: &JsonataData,
183        bindings: Option<PyObject>,
184    ) -> PyResult<PyObject> {
185        let result = if bindings.is_none() {
186            let bytecode = self.bytecode.get_or_init(|| {
187                evaluator::try_compile_expr(&self.ast)
188                    .map(|ce| compiler::BytecodeCompiler::compile(&ce))
189            });
190            if let Some(bc) = bytecode {
191                vm::Vm::new(bc).run(&data.data, None).map_err(evaluator_error_to_py)?
192            } else {
193                let mut ev = evaluator::Evaluator::new();
194                ev.evaluate(&self.ast, &data.data)
195                    .map_err(evaluator_error_to_py)?
196            }
197        } else {
198            let mut ev = create_evaluator(py, bindings)?;
199            ev.evaluate(&self.ast, &data.data)
200                .map_err(evaluator_error_to_py)?
201        };
202        json_to_python(py, &result)
203    }
204
205    /// Evaluate with a pre-converted data handle, return JSON string (zero-overhead output).
206    ///
207    /// # Arguments
208    ///
209    /// * `data` - A JsonataData handle (pre-converted from Python to internal format)
210    /// * `bindings` - Optional additional variable bindings
211    ///
212    /// # Returns
213    ///
214    /// The result as a JSON string
215    #[pyo3(signature = (data, bindings=None))]
216    fn evaluate_data_to_json(
217        &self,
218        py: Python,
219        data: &JsonataData,
220        bindings: Option<PyObject>,
221    ) -> PyResult<String> {
222        let result = if bindings.is_none() {
223            let bytecode = self.bytecode.get_or_init(|| {
224                evaluator::try_compile_expr(&self.ast)
225                    .map(|ce| compiler::BytecodeCompiler::compile(&ce))
226            });
227            if let Some(bc) = bytecode {
228                vm::Vm::new(bc).run(&data.data, None).map_err(evaluator_error_to_py)?
229            } else {
230                let mut ev = evaluator::Evaluator::new();
231                ev.evaluate(&self.ast, &data.data)
232                    .map_err(evaluator_error_to_py)?
233            }
234        } else {
235            let mut ev = create_evaluator(py, bindings)?;
236            ev.evaluate(&self.ast, &data.data)
237                .map_err(evaluator_error_to_py)?
238        };
239        result
240            .to_json_string()
241            .map_err(|e| PyValueError::new_err(format!("Failed to serialize result: {}", e)))
242    }
243
244    /// Evaluate the expression with JSON string input/output (faster for large data).
245    ///
246    /// This method avoids Python↔Rust conversion overhead by accepting and returning
247    /// JSON strings directly. This is significantly faster for large datasets.
248    ///
249    /// # Arguments
250    ///
251    /// * `json_str` - Input data as a JSON string
252    /// * `bindings` - Optional dict of variable bindings (default: None)
253    ///
254    /// # Returns
255    ///
256    /// The result as a JSON string
257    ///
258    /// # Errors
259    ///
260    /// Returns ValueError if JSON parsing or evaluation fails
261    #[pyo3(signature = (json_str, bindings=None))]
262    fn evaluate_json(
263        &self,
264        py: Python,
265        json_str: &str,
266        bindings: Option<PyObject>,
267    ) -> PyResult<String> {
268        let json_data = JValue::from_json_str(json_str)
269            .map_err(|e| PyValueError::new_err(format!("Invalid JSON: {}", e)))?;
270        let result = if bindings.is_none() {
271            let bytecode = self.bytecode.get_or_init(|| {
272                evaluator::try_compile_expr(&self.ast)
273                    .map(|ce| compiler::BytecodeCompiler::compile(&ce))
274            });
275            if let Some(bc) = bytecode {
276                vm::Vm::new(bc).run(&json_data, None).map_err(evaluator_error_to_py)?
277            } else {
278                let mut ev = evaluator::Evaluator::new();
279                ev.evaluate(&self.ast, &json_data)
280                    .map_err(evaluator_error_to_py)?
281            }
282        } else {
283            let mut ev = create_evaluator(py, bindings)?;
284            ev.evaluate(&self.ast, &json_data)
285                .map_err(evaluator_error_to_py)?
286        };
287        result
288            .to_json_string()
289            .map_err(|e| PyValueError::new_err(format!("Failed to serialize result: {}", e)))
290    }
291}
292
293/// Compile a JSONata expression into an executable form.
294///
295/// # Arguments
296///
297/// * `expression` - A JSONata query/transformation expression string
298///
299/// # Returns
300///
301/// A compiled JsonataExpression that can be evaluated
302///
303/// # Errors
304///
305/// Returns ValueError if the expression cannot be parsed
306///
307/// # Examples
308///
309/// ```python
310/// import jsonatapy
311///
312/// expr = jsonatapy.compile("$.name")
313/// result = expr.evaluate({"name": "Alice"})
314/// print(result)  # "Alice"
315/// ```
316#[cfg(feature = "python")]
317#[pyfunction]
318fn compile(expression: &str) -> PyResult<JsonataExpression> {
319    let ast = parser::parse(expression)
320        .map_err(|e| PyValueError::new_err(format!("Parse error: {}", e)))?;
321
322    Ok(JsonataExpression {
323        ast,
324        bytecode: std::cell::OnceCell::new(),
325    })
326}
327
328/// Evaluate a JSONata expression against data in one step.
329///
330/// This is a convenience function that compiles and evaluates in one call.
331/// For repeated evaluations of the same expression, use `compile()` instead.
332///
333/// # Arguments
334///
335/// * `expression` - A JSONata query/transformation expression string
336/// * `data` - A Python object (typically dict) to query/transform
337/// * `bindings` - Optional additional variable bindings
338///
339/// # Returns
340///
341/// The result of evaluating the expression
342///
343/// # Errors
344///
345/// Returns ValueError if parsing or evaluation fails
346///
347/// # Examples
348///
349/// ```python
350/// import jsonatapy
351///
352/// result = jsonatapy.evaluate("$uppercase(name)", {"name": "alice"})
353/// print(result)  # "ALICE"
354/// ```
355#[cfg(feature = "python")]
356#[pyfunction]
357#[pyo3(signature = (expression, data, bindings=None))]
358fn evaluate(
359    py: Python,
360    expression: &str,
361    data: PyObject,
362    bindings: Option<PyObject>,
363) -> PyResult<PyObject> {
364    let expr = compile(expression)?;
365    expr.evaluate(py, data, bindings)
366}
367
368/// Convert a Python object to a JValue.
369///
370/// Handles conversion of Python types:
371/// - None -> Null
372/// - bool -> Bool (checked before int since bool is a subclass of int)
373/// - int, float -> Number
374/// - str -> String
375/// - list -> Array
376/// - dict -> Object
377#[cfg(feature = "python")]
378fn python_to_json(py: Python, obj: &PyObject) -> PyResult<JValue> {
379    python_to_json_bound(obj.bind(py))
380}
381
382/// Inner conversion using Bound API for zero-overhead type checks.
383///
384/// Uses is_instance_of::<T>() which compiles to C-level type pointer comparisons
385/// (PyBool_Check, PyLong_Check, etc.) — single pointer comparison vs qualname()
386/// which allocates a Python string and does string comparison.
387#[cfg(feature = "python")]
388fn python_to_json_bound(obj: &Bound<'_, PyAny>) -> PyResult<JValue> {
389    if obj.is_none() {
390        return Ok(JValue::Null);
391    }
392
393    // Check bool before int — Python bool is a subclass of int
394    if obj.is_instance_of::<PyBool>() {
395        return Ok(JValue::Bool(obj.extract::<bool>()?));
396    }
397    if obj.is_instance_of::<PyInt>() {
398        return Ok(JValue::Number(obj.extract::<i64>()? as f64));
399    }
400    if obj.is_instance_of::<PyFloat>() {
401        return Ok(JValue::Number(obj.extract::<f64>()?));
402    }
403    if obj.is_instance_of::<PyString>() {
404        return Ok(JValue::string(obj.extract::<String>()?));
405    }
406    if let Ok(list) = obj.downcast::<PyList>() {
407        let mut result = Vec::with_capacity(list.len());
408        for item in list.iter() {
409            result.push(python_to_json_bound(&item)?);
410        }
411        return Ok(JValue::array(result));
412    }
413    if let Ok(dict) = obj.downcast::<PyDict>() {
414        let mut result = IndexMap::with_capacity(dict.len());
415        for (key, value) in dict.iter() {
416            let key_str = key.extract::<String>()?;
417            result.insert(key_str, python_to_json_bound(&value)?);
418        }
419        return Ok(JValue::object(result));
420    }
421
422    // Fallback for subclasses, numpy types, etc.
423    if let Ok(b) = obj.extract::<bool>() {
424        return Ok(JValue::Bool(b));
425    }
426    if let Ok(i) = obj.extract::<i64>() {
427        return Ok(JValue::Number(i as f64));
428    }
429    if let Ok(f) = obj.extract::<f64>() {
430        return Ok(JValue::Number(f));
431    }
432    if let Ok(s) = obj.extract::<String>() {
433        return Ok(JValue::string(s));
434    }
435
436    Err(PyTypeError::new_err(format!(
437        "Cannot convert Python object to JSON: {}",
438        obj.get_type().name()?
439    )))
440}
441
442/// Convert a JValue to a Python object.
443///
444/// Handles conversion of JValue variants to Python types:
445/// - Null/Undefined -> None
446/// - Bool -> bool
447/// - Number -> int (if whole number) or float
448/// - String -> str
449/// - Array -> list (batch-constructed via PyList::new for fewer C API calls)
450/// - Object -> dict
451/// - Lambda/Builtin/Regex -> None
452#[cfg(feature = "python")]
453fn json_to_python(py: Python, value: &JValue) -> PyResult<PyObject> {
454    match value {
455        JValue::Null | JValue::Undefined => Ok(py.None()),
456
457        JValue::Bool(b) => Ok(b.into_pyobject(py).unwrap().to_owned().into_any().unbind()),
458
459        JValue::Number(n) => {
460            // If it's a whole number that fits in i64, return as Python int
461            if n.fract() == 0.0 && n.is_finite() && *n >= i64::MIN as f64 && *n <= i64::MAX as f64
462            {
463                Ok((*n as i64).into_pyobject(py).unwrap().into_any().unbind())
464            } else {
465                Ok(n.into_pyobject(py).unwrap().into_any().unbind())
466            }
467        }
468
469        JValue::String(s) => Ok((&**s).into_pyobject(py).unwrap().into_any().unbind()),
470
471        JValue::Array(arr) => {
472            // Array of objects with shared keys: intern first object's keys as
473            // Python strings to avoid repeated UTF-8 -> PyString conversion.
474            let all_objects = arr.len() >= 2
475                && arr.iter().all(|item| matches!(item, JValue::Object(_)));
476            if all_objects {
477                let first_obj = match arr.first() {
478                    Some(JValue::Object(obj)) => obj,
479                    _ => unreachable!("all_objects guard ensures first element is an object"),
480                };
481
482                // Intern keys: store (&str, Py<PyString>) — no String clone needed
483                // since first_obj borrows from arr which outlives this block
484                let interned_keys: Vec<(&str, Py<PyString>)> = first_obj
485                    .keys()
486                    .map(|k| (k.as_str(), PyString::new(py, k).unbind()))
487                    .collect();
488
489                let items: Vec<PyObject> = arr
490                    .iter()
491                    .map(|item| {
492                        // Safe to unwrap: all_objects guarantees every element is Object
493                        let obj = match item {
494                            JValue::Object(obj) => obj,
495                            _ => unreachable!(),
496                        };
497                        let dict = PyDict::new(py);
498                        for (key_str, py_key) in &interned_keys {
499                            if let Some(value) = obj.get(*key_str) {
500                                dict.set_item(py_key.bind(py), json_to_python(py, value)?)?;
501                            }
502                        }
503                        // Handle any extra keys not in first object
504                        for (key, value) in obj.iter() {
505                            if !first_obj.contains_key(key) {
506                                dict.set_item(key, json_to_python(py, value)?)?;
507                            }
508                        }
509                        Ok(dict.unbind().into())
510                    })
511                    .collect::<PyResult<Vec<_>>>()?;
512                return Ok(PyList::new(py, &items)?.unbind().into());
513            }
514
515            // General array: batch construction
516            let items: Vec<PyObject> = arr
517                .iter()
518                .map(|item| json_to_python(py, item))
519                .collect::<PyResult<Vec<_>>>()?;
520            Ok(PyList::new(py, &items)?.unbind().into())
521        }
522
523        JValue::Object(obj) => {
524            let dict = PyDict::new(py);
525            for (key, value) in obj.iter() {
526                dict.set_item(key, json_to_python(py, value)?)?;
527            }
528            Ok(dict.unbind().into())
529        }
530
531        JValue::Lambda { .. } | JValue::Builtin { .. } | JValue::Regex { .. } => Ok(py.None()),
532    }
533}
534
535/// Create an evaluator, optionally configured with Python bindings
536#[cfg(feature = "python")]
537fn create_evaluator(py: Python, bindings: Option<PyObject>) -> PyResult<evaluator::Evaluator> {
538    if let Some(bindings_obj) = bindings {
539        let bindings_json = python_to_json(py, &bindings_obj)?;
540
541        let mut context = evaluator::Context::new();
542        if let JValue::Object(map) = bindings_json {
543            for (key, value) in map.iter() {
544                context.bind(key.clone(), value.clone());
545            }
546        } else {
547            return Err(PyTypeError::new_err("bindings must be a dictionary"));
548        }
549        Ok(evaluator::Evaluator::with_context(context))
550    } else {
551        Ok(evaluator::Evaluator::new())
552    }
553}
554
555/// Convert an EvaluatorError to a PyErr
556#[cfg(feature = "python")]
557fn evaluator_error_to_py(e: evaluator::EvaluatorError) -> PyErr {
558    match e {
559        evaluator::EvaluatorError::TypeError(msg) => PyValueError::new_err(msg),
560        evaluator::EvaluatorError::ReferenceError(msg) => PyValueError::new_err(msg),
561        evaluator::EvaluatorError::EvaluationError(msg) => PyValueError::new_err(msg),
562    }
563}
564
565/// JSONata Python module
566#[cfg(feature = "python")]
567#[pymodule]
568fn _jsonatapy(m: &Bound<'_, PyModule>) -> PyResult<()> {
569    m.add_function(wrap_pyfunction!(compile, m)?)?;
570    m.add_function(wrap_pyfunction!(evaluate, m)?)?;
571    m.add_class::<JsonataExpression>()?;
572    m.add_class::<JsonataData>()?;
573
574    // Add version info
575    m.add("__version__", env!("CARGO_PKG_VERSION"))?;
576    m.add("__jsonata_version__", "2.1.0")?; // Reference implementation version
577
578    Ok(())
579}
580
581#[cfg(test)]
582mod tests {
583    #[test]
584    fn test_module_creation() {
585        // Basic smoke test
586        assert_eq!(env!("CARGO_PKG_VERSION"), "2.1.0");
587    }
588}