Skip to main content

jsonata_core/
lib.rs

1// jsonatapy - High-performance Python implementation of JSONata
2// Copyright (c) 2025 jsonatapy contributors
3// Licensed under the MIT License
4
5//! # jsonatapy
6//!
7//! A high-performance Rust implementation of JSONata - the JSON query and
8//! transformation language - with optional Python bindings via PyO3.
9//!
10//! ## Rust API
11//!
12//! ```rust,ignore
13//! use jsonata_core::parser;
14//! use jsonata_core::evaluator::Evaluator;
15//! use jsonata_core::value::JValue;
16//!
17//! let ast = parser::parse("user.name").unwrap();
18//! let data = JValue::from_json_str(r#"{"user":{"name":"Alice"}}"#).unwrap();
19//! let result = Evaluator::new().evaluate(&ast, &data).unwrap();
20//! ```
21//!
22//! ## Architecture
23//!
24//! - `parser` - Expression parser (converts JSONata strings to AST)
25//! - `evaluator` - Expression evaluator (executes AST against data)
26//! - `functions` - Built-in function implementations
27//! - `datetime` - Date/time handling functions
28//! - `signature` - Function signature validation
29//! - `ast` - Abstract Syntax Tree definitions
30//! - `value` - JValue type (the runtime value representation)
31
32pub mod ast;
33mod compiler;
34mod datetime;
35pub mod evaluator;
36pub mod functions;
37pub mod parser;
38mod signature;
39pub mod value;
40mod vm;
41
42// ── Python bindings (only when the "python" feature is enabled) ───────────────
43
44#[cfg(feature = "python")]
45use crate::value::JValue;
46#[cfg(feature = "python")]
47use indexmap::IndexMap;
48#[cfg(feature = "python")]
49use pyo3::exceptions::{PyTypeError, PyValueError};
50#[cfg(feature = "python")]
51use pyo3::prelude::*;
52#[cfg(feature = "python")]
53use pyo3::types::{PyBool, PyDict, PyFloat, PyInt, PyList, PyString};
54
55/// Pre-converted data handle for efficient repeated evaluation.
56///
57/// Convert Python data to an internal representation once, then reuse it
58/// across multiple evaluations to avoid repeated Python↔Rust conversion overhead.
59///
60/// # Examples
61///
62/// ```python
63/// import jsonatapy
64///
65/// data = jsonatapy.JsonataData({"orders": [{"price": 150}, {"price": 50}]})
66/// expr = jsonatapy.compile("orders[price > 100]")
67/// result = expr.evaluate_with_data(data)
68/// ```
69#[cfg(feature = "python")]
70#[pyclass(unsendable)]
71struct JsonataData {
72    data: JValue,
73}
74
75#[cfg(feature = "python")]
76#[pymethods]
77impl JsonataData {
78    /// Create from a Python object (dict, list, etc.)
79    #[new]
80    fn new(py: Python, data: PyObject) -> PyResult<Self> {
81        let jvalue = python_to_json(py, &data)?;
82        Ok(JsonataData { data: jvalue })
83    }
84
85    /// Create from a JSON string (fastest path).
86    #[staticmethod]
87    fn from_json(json_str: &str) -> PyResult<Self> {
88        let data = JValue::from_json_str(json_str)
89            .map_err(|e| PyValueError::new_err(format!("Invalid JSON: {}", e)))?;
90        Ok(JsonataData { data })
91    }
92}
93
94/// A compiled JSONata expression that can be evaluated against data.
95///
96/// This is the main entry point for using JSONata. Compile an expression once,
97/// then evaluate it multiple times against different data.
98///
99/// # Examples
100///
101/// ```python
102/// import jsonatapy
103///
104/// # Compile once
105/// expr = jsonatapy.compile("orders[price > 100].product")
106///
107/// # Evaluate many times
108/// data1 = {"orders": [{"product": "A", "price": 150}]}
109/// result1 = expr.evaluate(data1)
110///
111/// data2 = {"orders": [{"product": "B", "price": 50}]}
112/// result2 = expr.evaluate(data2)
113/// ```
114#[cfg(feature = "python")]
115#[pyclass(unsendable)]
116struct JsonataExpression {
117    /// The parsed Abstract Syntax Tree
118    ast: ast::AstNode,
119    /// Lazily compiled bytecode — populated on first evaluate() call.
120    /// `Some(bc)` = compiled to bytecode; `None` = must use tree-walker.
121    /// `OnceCell` ensures compilation happens at most once per expression instance.
122    bytecode: std::cell::OnceCell<Option<vm::BytecodeProgram>>,
123}
124
125#[cfg(feature = "python")]
126#[pymethods]
127impl JsonataExpression {
128    /// Evaluate this expression against the provided data.
129    ///
130    /// # Arguments
131    ///
132    /// * `data` - A Python object (typically dict) to query/transform
133    /// * `bindings` - Optional additional variable bindings
134    ///
135    /// # Returns
136    ///
137    /// The result of evaluating the expression
138    ///
139    /// # Errors
140    ///
141    /// Returns ValueError if evaluation fails
142    #[pyo3(signature = (data, bindings=None))]
143    fn evaluate(
144        &self,
145        py: Python,
146        data: PyObject,
147        bindings: Option<PyObject>,
148    ) -> PyResult<PyObject> {
149        let json_data = python_to_json(py, &data)?;
150        let result = if bindings.is_none() {
151            let bytecode = self.bytecode.get_or_init(|| {
152                evaluator::try_compile_expr(&self.ast)
153                    .map(|ce| compiler::BytecodeCompiler::compile(&ce))
154            });
155            if let Some(bc) = bytecode {
156                vm::Vm::new(bc).run(&json_data, None).map_err(evaluator_error_to_py)?
157            } else {
158                let mut ev = evaluator::Evaluator::new();
159                ev.evaluate(&self.ast, &json_data)
160                    .map_err(evaluator_error_to_py)?
161            }
162        } else {
163            let mut ev = create_evaluator(py, bindings)?;
164            ev.evaluate(&self.ast, &json_data)
165                .map_err(evaluator_error_to_py)?
166        };
167        json_to_python(py, &result)
168    }
169
170    /// Evaluate with a pre-converted data handle (fastest for repeated evaluation).
171    ///
172    /// # Arguments
173    ///
174    /// * `data` - A JsonataData handle (pre-converted from Python to internal format)
175    /// * `bindings` - Optional additional variable bindings
176    ///
177    /// # Returns
178    ///
179    /// The result of evaluating the expression
180    #[pyo3(signature = (data, bindings=None))]
181    fn evaluate_with_data(
182        &self,
183        py: Python,
184        data: &JsonataData,
185        bindings: Option<PyObject>,
186    ) -> PyResult<PyObject> {
187        let result = if bindings.is_none() {
188            let bytecode = self.bytecode.get_or_init(|| {
189                evaluator::try_compile_expr(&self.ast)
190                    .map(|ce| compiler::BytecodeCompiler::compile(&ce))
191            });
192            if let Some(bc) = bytecode {
193                vm::Vm::new(bc).run(&data.data, None).map_err(evaluator_error_to_py)?
194            } else {
195                let mut ev = evaluator::Evaluator::new();
196                ev.evaluate(&self.ast, &data.data)
197                    .map_err(evaluator_error_to_py)?
198            }
199        } else {
200            let mut ev = create_evaluator(py, bindings)?;
201            ev.evaluate(&self.ast, &data.data)
202                .map_err(evaluator_error_to_py)?
203        };
204        json_to_python(py, &result)
205    }
206
207    /// Evaluate with a pre-converted data handle, return JSON string (zero-overhead output).
208    ///
209    /// # Arguments
210    ///
211    /// * `data` - A JsonataData handle (pre-converted from Python to internal format)
212    /// * `bindings` - Optional additional variable bindings
213    ///
214    /// # Returns
215    ///
216    /// The result as a JSON string
217    #[pyo3(signature = (data, bindings=None))]
218    fn evaluate_data_to_json(
219        &self,
220        py: Python,
221        data: &JsonataData,
222        bindings: Option<PyObject>,
223    ) -> PyResult<String> {
224        let result = if bindings.is_none() {
225            let bytecode = self.bytecode.get_or_init(|| {
226                evaluator::try_compile_expr(&self.ast)
227                    .map(|ce| compiler::BytecodeCompiler::compile(&ce))
228            });
229            if let Some(bc) = bytecode {
230                vm::Vm::new(bc).run(&data.data, None).map_err(evaluator_error_to_py)?
231            } else {
232                let mut ev = evaluator::Evaluator::new();
233                ev.evaluate(&self.ast, &data.data)
234                    .map_err(evaluator_error_to_py)?
235            }
236        } else {
237            let mut ev = create_evaluator(py, bindings)?;
238            ev.evaluate(&self.ast, &data.data)
239                .map_err(evaluator_error_to_py)?
240        };
241        result
242            .to_json_string()
243            .map_err(|e| PyValueError::new_err(format!("Failed to serialize result: {}", e)))
244    }
245
246    /// Evaluate the expression with JSON string input/output (faster for large data).
247    ///
248    /// This method avoids Python↔Rust conversion overhead by accepting and returning
249    /// JSON strings directly. This is significantly faster for large datasets.
250    ///
251    /// # Arguments
252    ///
253    /// * `json_str` - Input data as a JSON string
254    /// * `bindings` - Optional dict of variable bindings (default: None)
255    ///
256    /// # Returns
257    ///
258    /// The result as a JSON string
259    ///
260    /// # Errors
261    ///
262    /// Returns ValueError if JSON parsing or evaluation fails
263    #[pyo3(signature = (json_str, bindings=None))]
264    fn evaluate_json(
265        &self,
266        py: Python,
267        json_str: &str,
268        bindings: Option<PyObject>,
269    ) -> PyResult<String> {
270        let json_data = JValue::from_json_str(json_str)
271            .map_err(|e| PyValueError::new_err(format!("Invalid JSON: {}", e)))?;
272        let result = if bindings.is_none() {
273            let bytecode = self.bytecode.get_or_init(|| {
274                evaluator::try_compile_expr(&self.ast)
275                    .map(|ce| compiler::BytecodeCompiler::compile(&ce))
276            });
277            if let Some(bc) = bytecode {
278                vm::Vm::new(bc).run(&json_data, None).map_err(evaluator_error_to_py)?
279            } else {
280                let mut ev = evaluator::Evaluator::new();
281                ev.evaluate(&self.ast, &json_data)
282                    .map_err(evaluator_error_to_py)?
283            }
284        } else {
285            let mut ev = create_evaluator(py, bindings)?;
286            ev.evaluate(&self.ast, &json_data)
287                .map_err(evaluator_error_to_py)?
288        };
289        result
290            .to_json_string()
291            .map_err(|e| PyValueError::new_err(format!("Failed to serialize result: {}", e)))
292    }
293}
294
295/// Compile a JSONata expression into an executable form.
296///
297/// # Arguments
298///
299/// * `expression` - A JSONata query/transformation expression string
300///
301/// # Returns
302///
303/// A compiled JsonataExpression that can be evaluated
304///
305/// # Errors
306///
307/// Returns ValueError if the expression cannot be parsed
308///
309/// # Examples
310///
311/// ```python
312/// import jsonatapy
313///
314/// expr = jsonatapy.compile("$.name")
315/// result = expr.evaluate({"name": "Alice"})
316/// print(result)  # "Alice"
317/// ```
318#[cfg(feature = "python")]
319#[pyfunction]
320fn compile(expression: &str) -> PyResult<JsonataExpression> {
321    let ast = parser::parse(expression)
322        .map_err(|e| PyValueError::new_err(format!("Parse error: {}", e)))?;
323
324    Ok(JsonataExpression {
325        ast,
326        bytecode: std::cell::OnceCell::new(),
327    })
328}
329
330/// Evaluate a JSONata expression against data in one step.
331///
332/// This is a convenience function that compiles and evaluates in one call.
333/// For repeated evaluations of the same expression, use `compile()` instead.
334///
335/// # Arguments
336///
337/// * `expression` - A JSONata query/transformation expression string
338/// * `data` - A Python object (typically dict) to query/transform
339/// * `bindings` - Optional additional variable bindings
340///
341/// # Returns
342///
343/// The result of evaluating the expression
344///
345/// # Errors
346///
347/// Returns ValueError if parsing or evaluation fails
348///
349/// # Examples
350///
351/// ```python
352/// import jsonatapy
353///
354/// result = jsonatapy.evaluate("$uppercase(name)", {"name": "alice"})
355/// print(result)  # "ALICE"
356/// ```
357#[cfg(feature = "python")]
358#[pyfunction]
359#[pyo3(signature = (expression, data, bindings=None))]
360fn evaluate(
361    py: Python,
362    expression: &str,
363    data: PyObject,
364    bindings: Option<PyObject>,
365) -> PyResult<PyObject> {
366    let expr = compile(expression)?;
367    expr.evaluate(py, data, bindings)
368}
369
370/// Convert a Python object to a JValue.
371///
372/// Handles conversion of Python types:
373/// - None -> Null
374/// - bool -> Bool (checked before int since bool is a subclass of int)
375/// - int, float -> Number
376/// - str -> String
377/// - list -> Array
378/// - dict -> Object
379#[cfg(feature = "python")]
380fn python_to_json(py: Python, obj: &PyObject) -> PyResult<JValue> {
381    python_to_json_bound(obj.bind(py))
382}
383
384/// Inner conversion using Bound API for zero-overhead type checks.
385///
386/// Uses is_instance_of::<T>() which compiles to C-level type pointer comparisons
387/// (PyBool_Check, PyLong_Check, etc.) — single pointer comparison vs qualname()
388/// which allocates a Python string and does string comparison.
389#[cfg(feature = "python")]
390fn python_to_json_bound(obj: &Bound<'_, PyAny>) -> PyResult<JValue> {
391    if obj.is_none() {
392        return Ok(JValue::Null);
393    }
394
395    // Check bool before int — Python bool is a subclass of int
396    if obj.is_instance_of::<PyBool>() {
397        return Ok(JValue::Bool(obj.extract::<bool>()?));
398    }
399    if obj.is_instance_of::<PyInt>() {
400        return Ok(JValue::Number(obj.extract::<i64>()? as f64));
401    }
402    if obj.is_instance_of::<PyFloat>() {
403        return Ok(JValue::Number(obj.extract::<f64>()?));
404    }
405    if obj.is_instance_of::<PyString>() {
406        return Ok(JValue::string(obj.extract::<String>()?));
407    }
408    if let Ok(list) = obj.downcast::<PyList>() {
409        let mut result = Vec::with_capacity(list.len());
410        for item in list.iter() {
411            result.push(python_to_json_bound(&item)?);
412        }
413        return Ok(JValue::array(result));
414    }
415    if let Ok(dict) = obj.downcast::<PyDict>() {
416        let mut result = IndexMap::with_capacity(dict.len());
417        for (key, value) in dict.iter() {
418            let key_str = key.extract::<String>()?;
419            result.insert(key_str, python_to_json_bound(&value)?);
420        }
421        return Ok(JValue::object(result));
422    }
423
424    // Fallback for subclasses, numpy types, etc.
425    if let Ok(b) = obj.extract::<bool>() {
426        return Ok(JValue::Bool(b));
427    }
428    if let Ok(i) = obj.extract::<i64>() {
429        return Ok(JValue::Number(i as f64));
430    }
431    if let Ok(f) = obj.extract::<f64>() {
432        return Ok(JValue::Number(f));
433    }
434    if let Ok(s) = obj.extract::<String>() {
435        return Ok(JValue::string(s));
436    }
437
438    Err(PyTypeError::new_err(format!(
439        "Cannot convert Python object to JSON: {}",
440        obj.get_type().name()?
441    )))
442}
443
444/// Convert a JValue to a Python object.
445///
446/// Handles conversion of JValue variants to Python types:
447/// - Null/Undefined -> None
448/// - Bool -> bool
449/// - Number -> int (if whole number) or float
450/// - String -> str
451/// - Array -> list (batch-constructed via PyList::new for fewer C API calls)
452/// - Object -> dict
453/// - Lambda/Builtin/Regex -> None
454#[cfg(feature = "python")]
455fn json_to_python(py: Python, value: &JValue) -> PyResult<PyObject> {
456    match value {
457        JValue::Null | JValue::Undefined => Ok(py.None()),
458
459        JValue::Bool(b) => Ok(b.into_pyobject(py).unwrap().to_owned().into_any().unbind()),
460
461        JValue::Number(n) => {
462            // If it's a whole number that fits in i64, return as Python int
463            if n.fract() == 0.0 && n.is_finite() && *n >= i64::MIN as f64 && *n <= i64::MAX as f64
464            {
465                Ok((*n as i64).into_pyobject(py).unwrap().into_any().unbind())
466            } else {
467                Ok(n.into_pyobject(py).unwrap().into_any().unbind())
468            }
469        }
470
471        JValue::String(s) => Ok((&**s).into_pyobject(py).unwrap().into_any().unbind()),
472
473        JValue::Array(arr) => {
474            // Array of objects with shared keys: intern first object's keys as
475            // Python strings to avoid repeated UTF-8 -> PyString conversion.
476            let all_objects = arr.len() >= 2
477                && arr.iter().all(|item| matches!(item, JValue::Object(_)));
478            if all_objects {
479                let first_obj = match arr.first() {
480                    Some(JValue::Object(obj)) => obj,
481                    _ => unreachable!("all_objects guard ensures first element is an object"),
482                };
483
484                // Intern keys: store (&str, Py<PyString>) — no String clone needed
485                // since first_obj borrows from arr which outlives this block
486                let interned_keys: Vec<(&str, Py<PyString>)> = first_obj
487                    .keys()
488                    .map(|k| (k.as_str(), PyString::new(py, k).unbind()))
489                    .collect();
490
491                let items: Vec<PyObject> = arr
492                    .iter()
493                    .map(|item| {
494                        // Safe to unwrap: all_objects guarantees every element is Object
495                        let obj = match item {
496                            JValue::Object(obj) => obj,
497                            _ => unreachable!(),
498                        };
499                        let dict = PyDict::new(py);
500                        for (key_str, py_key) in &interned_keys {
501                            if let Some(value) = obj.get(*key_str) {
502                                dict.set_item(py_key.bind(py), json_to_python(py, value)?)?;
503                            }
504                        }
505                        // Handle any extra keys not in first object
506                        for (key, value) in obj.iter() {
507                            if !first_obj.contains_key(key) {
508                                dict.set_item(key, json_to_python(py, value)?)?;
509                            }
510                        }
511                        Ok(dict.unbind().into())
512                    })
513                    .collect::<PyResult<Vec<_>>>()?;
514                return Ok(PyList::new(py, &items)?.unbind().into());
515            }
516
517            // General array: batch construction
518            let items: Vec<PyObject> = arr
519                .iter()
520                .map(|item| json_to_python(py, item))
521                .collect::<PyResult<Vec<_>>>()?;
522            Ok(PyList::new(py, &items)?.unbind().into())
523        }
524
525        JValue::Object(obj) => {
526            let dict = PyDict::new(py);
527            for (key, value) in obj.iter() {
528                dict.set_item(key, json_to_python(py, value)?)?;
529            }
530            Ok(dict.unbind().into())
531        }
532
533        JValue::Lambda { .. } | JValue::Builtin { .. } | JValue::Regex { .. } => Ok(py.None()),
534    }
535}
536
537/// Create an evaluator, optionally configured with Python bindings
538#[cfg(feature = "python")]
539fn create_evaluator(py: Python, bindings: Option<PyObject>) -> PyResult<evaluator::Evaluator> {
540    if let Some(bindings_obj) = bindings {
541        let bindings_json = python_to_json(py, &bindings_obj)?;
542
543        let mut context = evaluator::Context::new();
544        if let JValue::Object(map) = bindings_json {
545            for (key, value) in map.iter() {
546                context.bind(key.clone(), value.clone());
547            }
548        } else {
549            return Err(PyTypeError::new_err("bindings must be a dictionary"));
550        }
551        Ok(evaluator::Evaluator::with_context(context))
552    } else {
553        Ok(evaluator::Evaluator::new())
554    }
555}
556
557/// Convert an EvaluatorError to a PyErr
558#[cfg(feature = "python")]
559fn evaluator_error_to_py(e: evaluator::EvaluatorError) -> PyErr {
560    match e {
561        evaluator::EvaluatorError::TypeError(msg) => PyValueError::new_err(msg),
562        evaluator::EvaluatorError::ReferenceError(msg) => PyValueError::new_err(msg),
563        evaluator::EvaluatorError::EvaluationError(msg) => PyValueError::new_err(msg),
564    }
565}
566
567/// JSONata Python module
568#[cfg(feature = "python")]
569#[pymodule]
570fn _jsonatapy(m: &Bound<'_, PyModule>) -> PyResult<()> {
571    m.add_function(wrap_pyfunction!(compile, m)?)?;
572    m.add_function(wrap_pyfunction!(evaluate, m)?)?;
573    m.add_class::<JsonataExpression>()?;
574    m.add_class::<JsonataData>()?;
575
576    // Add version info
577    m.add("__version__", env!("CARGO_PKG_VERSION"))?;
578    m.add("__jsonata_version__", "2.1.0")?; // Reference implementation version
579
580    Ok(())
581}
582
583#[cfg(test)]
584mod tests {
585    #[test]
586    fn test_module_creation() {
587        // Basic smoke test
588        assert_eq!(env!("CARGO_PKG_VERSION"), "2.1.0");
589    }
590}