jsonata_core/lib.rs
1// jsonatapy - High-performance Python implementation of JSONata
2// Copyright (c) 2025 jsonatapy contributors
3// Licensed under the MIT License
4
5//! # jsonatapy
6//!
7//! A high-performance Rust implementation of JSONata - the JSON query and
8//! transformation language - with optional Python bindings via PyO3.
9//!
10//! ## Rust API
11//!
12//! ```rust,ignore
13//! use jsonata_core::parser;
14//! use jsonata_core::evaluator::Evaluator;
15//! use jsonata_core::value::JValue;
16//!
17//! let ast = parser::parse("user.name").unwrap();
18//! let data = JValue::from_json_str(r#"{"user":{"name":"Alice"}}"#).unwrap();
19//! let result = Evaluator::new().evaluate(&ast, &data).unwrap();
20//! ```
21//!
22//! ## Architecture
23//!
24//! - `parser` - Expression parser (converts JSONata strings to AST)
25//! - `evaluator` - Expression evaluator (executes AST against data)
26//! - `functions` - Built-in function implementations
27//! - `datetime` - Date/time handling functions
28//! - `signature` - Function signature validation
29//! - `ast` - Abstract Syntax Tree definitions
30//! - `value` - JValue type (the runtime value representation)
31
32pub mod ast;
33mod compiler;
34mod datetime;
35pub mod evaluator;
36pub mod functions;
37pub mod parser;
38mod signature;
39pub mod value;
40mod vm;
41
42// ── Benchmarking facade (only when the "bench" feature is enabled) ────────────
43//
44// Exposes the compiler/VM pipeline for Criterion benchmarks without making
45// the internals part of the permanent public API.
46
47/// Internal benchmarking API — do not use in production code.
48///
49/// Enabled with `--features bench`. Provides access to the bytecode compiler
50/// and VM so that Criterion benchmarks can measure tree-walker vs VM directly.
51#[cfg(feature = "bench")]
52pub mod _bench {
53 use crate::ast::AstNode;
54 pub use crate::evaluator::EvaluatorError;
55 use crate::value::JValue;
56
57 /// An opaque handle to a compiled bytecode program.
58 pub struct CompiledProgram(crate::vm::BytecodeProgram);
59
60 /// Compile an AST node to bytecode.
61 ///
62 /// Returns `None` if the expression contains constructs the compiler
63 /// doesn't handle (e.g. wildcards, `$eval`, higher-order functions at
64 /// the top level). In that case, fall back to `Evaluator::new().evaluate()`.
65 pub fn compile(ast: &AstNode) -> Option<CompiledProgram> {
66 crate::evaluator::try_compile_expr(ast)
67 .map(|ce| CompiledProgram(crate::compiler::BytecodeCompiler::compile(&ce)))
68 }
69
70 /// Execute a compiled program against `data`.
71 pub fn run(prog: &CompiledProgram, data: &JValue) -> Result<JValue, EvaluatorError> {
72 crate::vm::Vm::new(&prog.0).run(data, None)
73 }
74}
75
76// ── Python bindings (only when the "python" feature is enabled) ───────────────
77
78/// The JSONata reference implementation version this library targets.
79const JSONATA_REFERENCE_VERSION: &str = "2.1.0";
80
81#[cfg(feature = "python")]
82use crate::value::JValue;
83#[cfg(feature = "python")]
84use indexmap::IndexMap;
85#[cfg(feature = "python")]
86use pyo3::exceptions::{PyTypeError, PyValueError};
87#[cfg(feature = "python")]
88use pyo3::prelude::*;
89#[cfg(feature = "python")]
90use pyo3::types::{PyBool, PyDict, PyFloat, PyInt, PyList, PyString};
91
92/// Pre-converted data handle for efficient repeated evaluation.
93///
94/// Convert Python data to an internal representation once, then reuse it
95/// across multiple evaluations to avoid repeated Python↔Rust conversion overhead.
96///
97/// # Examples
98///
99/// ```python
100/// import jsonatapy
101///
102/// data = jsonatapy.JsonataData({"orders": [{"price": 150}, {"price": 50}]})
103/// expr = jsonatapy.compile("orders[price > 100]")
104/// result = expr.evaluate_with_data(data)
105/// ```
106#[cfg(feature = "python")]
107#[pyclass(unsendable)]
108struct JsonataData {
109 data: JValue,
110}
111
112#[cfg(feature = "python")]
113#[pymethods]
114impl JsonataData {
115 /// Create from a Python object (dict, list, etc.)
116 #[new]
117 fn new(py: Python, data: Py<PyAny>) -> PyResult<Self> {
118 let jvalue = python_to_json(py, &data)?;
119 Ok(JsonataData { data: jvalue })
120 }
121
122 /// Create from a JSON string (fastest path).
123 #[staticmethod]
124 fn from_json(json_str: &str) -> PyResult<Self> {
125 let data = JValue::from_json_str(json_str)
126 .map_err(|e| PyValueError::new_err(format!("Invalid JSON: {}", e)))?;
127 Ok(JsonataData { data })
128 }
129}
130
131/// A compiled JSONata expression that can be evaluated against data.
132///
133/// This is the main entry point for using JSONata. Compile an expression once,
134/// then evaluate it multiple times against different data.
135///
136/// # Examples
137///
138/// ```python
139/// import jsonatapy
140///
141/// # Compile once
142/// expr = jsonatapy.compile("orders[price > 100].product")
143///
144/// # Evaluate many times
145/// data1 = {"orders": [{"product": "A", "price": 150}]}
146/// result1 = expr.evaluate(data1)
147///
148/// data2 = {"orders": [{"product": "B", "price": 50}]}
149/// result2 = expr.evaluate(data2)
150/// ```
151#[cfg(feature = "python")]
152#[pyclass(unsendable)]
153struct JsonataExpression {
154 /// The parsed Abstract Syntax Tree
155 ast: ast::AstNode,
156 /// Lazily compiled bytecode — populated on first evaluate() call.
157 /// `Some(bc)` = fast VM path; `None` = must use tree-walker.
158 /// `OnceCell` ensures compilation happens at most once per expression instance.
159 bytecode: std::cell::OnceCell<Option<vm::BytecodeProgram>>,
160}
161
162#[cfg(feature = "python")]
163impl JsonataExpression {
164 /// Evaluate the compiled expression against pre-converted data.
165 /// Uses bytecode VM when available, falls back to tree-walker.
166 fn run_eval(&self, py: Python, data: &JValue, bindings: Option<Py<PyAny>>) -> PyResult<JValue> {
167 if bindings.is_none() {
168 let bytecode = self.bytecode.get_or_init(|| {
169 evaluator::try_compile_expr(&self.ast)
170 .map(|ce| compiler::BytecodeCompiler::compile(&ce))
171 });
172 if let Some(bc) = bytecode {
173 vm::Vm::new(bc)
174 .run(data, None)
175 .map_err(evaluator_error_to_py)
176 } else {
177 let mut ev = evaluator::Evaluator::new();
178 ev.evaluate(&self.ast, data).map_err(evaluator_error_to_py)
179 }
180 } else {
181 let mut ev = create_evaluator(py, bindings)?;
182 ev.evaluate(&self.ast, data).map_err(evaluator_error_to_py)
183 }
184 }
185}
186
187#[cfg(feature = "python")]
188#[pymethods]
189impl JsonataExpression {
190 /// Returns ValueError if evaluation fails
191 #[pyo3(signature = (data, bindings=None))]
192 fn evaluate(
193 &self,
194 py: Python,
195 data: Py<PyAny>,
196 bindings: Option<Py<PyAny>>,
197 ) -> PyResult<Py<PyAny>> {
198 let json_data = python_to_json(py, &data)?;
199 json_to_python(py, &self.run_eval(py, &json_data, bindings)?)
200 }
201
202 /// Evaluate with a pre-converted data handle (fastest for repeated evaluation).
203 ///
204 /// # Arguments
205 ///
206 /// * `data` - A JsonataData handle (pre-converted from Python to internal format)
207 /// * `bindings` - Optional additional variable bindings
208 ///
209 /// # Returns
210 ///
211 /// The result of evaluating the expression
212 #[pyo3(signature = (data, bindings=None))]
213 fn evaluate_with_data(
214 &self,
215 py: Python,
216 data: &JsonataData,
217 bindings: Option<Py<PyAny>>,
218 ) -> PyResult<Py<PyAny>> {
219 json_to_python(py, &self.run_eval(py, &data.data, bindings)?)
220 }
221
222 /// Evaluate with a pre-converted data handle, return JSON string (zero-overhead output).
223 ///
224 /// # Arguments
225 ///
226 /// * `data` - A JsonataData handle (pre-converted from Python to internal format)
227 /// * `bindings` - Optional additional variable bindings
228 ///
229 /// # Returns
230 ///
231 /// The result as a JSON string
232 #[pyo3(signature = (data, bindings=None))]
233 fn evaluate_data_to_json(
234 &self,
235 py: Python,
236 data: &JsonataData,
237 bindings: Option<Py<PyAny>>,
238 ) -> PyResult<String> {
239 self.run_eval(py, &data.data, bindings)?
240 .to_json_string()
241 .map_err(|e| PyValueError::new_err(format!("Failed to serialize result: {}", e)))
242 }
243
244 /// Evaluate the expression with JSON string input/output (faster for large data).
245 ///
246 /// This method avoids Python↔Rust conversion overhead by accepting and returning
247 /// JSON strings directly. This is significantly faster for large datasets.
248 ///
249 /// # Arguments
250 ///
251 /// * `json_str` - Input data as a JSON string
252 /// * `bindings` - Optional dict of variable bindings (default: None)
253 ///
254 /// # Returns
255 ///
256 /// The result as a JSON string
257 ///
258 /// # Errors
259 ///
260 /// Returns ValueError if JSON parsing or evaluation fails
261 #[pyo3(signature = (json_str, bindings=None))]
262 fn evaluate_json(
263 &self,
264 py: Python,
265 json_str: &str,
266 bindings: Option<Py<PyAny>>,
267 ) -> PyResult<String> {
268 let json_data = JValue::from_json_str(json_str)
269 .map_err(|e| PyValueError::new_err(format!("Invalid JSON: {}", e)))?;
270 self.run_eval(py, &json_data, bindings)?
271 .to_json_string()
272 .map_err(|e| PyValueError::new_err(format!("Failed to serialize result: {}", e)))
273 }
274}
275
276/// Compile a JSONata expression into an executable form.
277///
278/// # Arguments
279///
280/// * `expression` - A JSONata query/transformation expression string
281///
282/// # Returns
283///
284/// A compiled JsonataExpression that can be evaluated
285///
286/// # Errors
287///
288/// Returns ValueError if the expression cannot be parsed
289///
290/// # Examples
291///
292/// ```python
293/// import jsonatapy
294///
295/// expr = jsonatapy.compile("$.name")
296/// result = expr.evaluate({"name": "Alice"})
297/// print(result) # "Alice"
298/// ```
299#[cfg(feature = "python")]
300#[pyfunction]
301fn compile(expression: &str) -> PyResult<JsonataExpression> {
302 let ast = parser::parse(expression)
303 .map_err(|e| PyValueError::new_err(format!("Parse error: {}", e)))?;
304
305 Ok(JsonataExpression {
306 ast,
307 bytecode: std::cell::OnceCell::new(),
308 })
309}
310
311/// Evaluate a JSONata expression against data in one step.
312///
313/// This is a convenience function that compiles and evaluates in one call.
314/// For repeated evaluations of the same expression, use `compile()` instead.
315///
316/// # Arguments
317///
318/// * `expression` - A JSONata query/transformation expression string
319/// * `data` - A Python object (typically dict) to query/transform
320/// * `bindings` - Optional additional variable bindings
321///
322/// # Returns
323///
324/// The result of evaluating the expression
325///
326/// # Errors
327///
328/// Returns ValueError if parsing or evaluation fails
329///
330/// # Examples
331///
332/// ```python
333/// import jsonatapy
334///
335/// result = jsonatapy.evaluate("$uppercase(name)", {"name": "alice"})
336/// print(result) # "ALICE"
337/// ```
338#[cfg(feature = "python")]
339#[pyfunction]
340#[pyo3(signature = (expression, data, bindings=None))]
341fn evaluate(
342 py: Python,
343 expression: &str,
344 data: Py<PyAny>,
345 bindings: Option<Py<PyAny>>,
346) -> PyResult<Py<PyAny>> {
347 let expr = compile(expression)?;
348 expr.evaluate(py, data, bindings)
349}
350
351/// Convert a Python object to a JValue.
352///
353/// Handles conversion of Python types:
354/// - None -> Null
355/// - bool -> Bool (checked before int since bool is a subclass of int)
356/// - int, float -> Number
357/// - str -> String
358/// - list -> Array
359/// - dict -> Object
360#[cfg(feature = "python")]
361fn python_to_json(py: Python, obj: &Py<PyAny>) -> PyResult<JValue> {
362 python_to_json_bound(obj.bind(py))
363}
364
365/// Inner conversion using Bound API for zero-overhead type checks.
366///
367/// Uses is_instance_of::<T>() which compiles to C-level type pointer comparisons
368/// (PyBool_Check, PyLong_Check, etc.) — single pointer comparison vs qualname()
369/// which allocates a Python string and does string comparison.
370#[cfg(feature = "python")]
371fn python_to_json_bound(obj: &Bound<'_, PyAny>) -> PyResult<JValue> {
372 if obj.is_none() {
373 return Ok(JValue::Null);
374 }
375
376 // Check bool before int — Python bool is a subclass of int
377 if obj.is_instance_of::<PyBool>() {
378 return Ok(JValue::Bool(obj.extract::<bool>()?));
379 }
380 if obj.is_instance_of::<PyInt>() {
381 return Ok(JValue::Number(obj.extract::<i64>()? as f64));
382 }
383 if obj.is_instance_of::<PyFloat>() {
384 return Ok(JValue::Number(obj.extract::<f64>()?));
385 }
386 if obj.is_instance_of::<PyString>() {
387 return Ok(JValue::string(obj.extract::<String>()?));
388 }
389 if let Ok(list) = obj.cast::<PyList>() {
390 let mut result = Vec::with_capacity(list.len());
391 for item in list.iter() {
392 result.push(python_to_json_bound(&item)?);
393 }
394 return Ok(JValue::array(result));
395 }
396 if let Ok(dict) = obj.cast::<PyDict>() {
397 let mut result = IndexMap::with_capacity(dict.len());
398 for (key, value) in dict.iter() {
399 let key_str = key.extract::<String>()?;
400 result.insert(key_str, python_to_json_bound(&value)?);
401 }
402 return Ok(JValue::object(result));
403 }
404
405 // Fallback for subclasses, numpy types, etc.
406 if let Ok(b) = obj.extract::<bool>() {
407 return Ok(JValue::Bool(b));
408 }
409 if let Ok(i) = obj.extract::<i64>() {
410 return Ok(JValue::Number(i as f64));
411 }
412 if let Ok(f) = obj.extract::<f64>() {
413 return Ok(JValue::Number(f));
414 }
415 if let Ok(s) = obj.extract::<String>() {
416 return Ok(JValue::string(s));
417 }
418
419 Err(PyTypeError::new_err(format!(
420 "Cannot convert Python object to JSON: {}",
421 obj.get_type().name()?
422 )))
423}
424
425/// Convert a JValue to a Python object.
426///
427/// Handles conversion of JValue variants to Python types:
428/// - Null/Undefined -> None
429/// - Bool -> bool
430/// - Number -> int (if whole number) or float
431/// - String -> str
432/// - Array -> list (batch-constructed via PyList::new for fewer C API calls)
433/// - Object -> dict
434/// - Lambda/Builtin/Regex -> None
435#[cfg(feature = "python")]
436fn json_to_python(py: Python, value: &JValue) -> PyResult<Py<PyAny>> {
437 match value {
438 JValue::Null | JValue::Undefined => Ok(py.None()),
439
440 JValue::Bool(b) => Ok(b.into_pyobject(py).unwrap().to_owned().into_any().unbind()),
441
442 JValue::Number(n) => {
443 // If it's a whole number that fits in i64, return as Python int
444 if n.fract() == 0.0 && n.is_finite() && *n >= i64::MIN as f64 && *n <= i64::MAX as f64 {
445 Ok((*n as i64).into_pyobject(py).unwrap().into_any().unbind())
446 } else {
447 Ok(n.into_pyobject(py).unwrap().into_any().unbind())
448 }
449 }
450
451 JValue::String(s) => Ok((&**s).into_pyobject(py).unwrap().into_any().unbind()),
452
453 JValue::Array(arr) => {
454 // Array of objects with shared keys: intern first object's keys as
455 // Python strings to avoid repeated UTF-8 -> PyString conversion.
456 let all_objects =
457 arr.len() >= 2 && arr.iter().all(|item| matches!(item, JValue::Object(_)));
458 if all_objects {
459 let first_obj = match arr.first() {
460 Some(JValue::Object(obj)) => obj,
461 _ => unreachable!("all_objects guard ensures first element is an object"),
462 };
463
464 // Intern keys: store (&str, Py<PyString>) — no String clone needed
465 // since first_obj borrows from arr which outlives this block
466 let interned_keys: Vec<(&str, Py<PyString>)> = first_obj
467 .keys()
468 .map(|k| (k.as_str(), PyString::new(py, k).unbind()))
469 .collect();
470
471 let items: Vec<Py<PyAny>> = arr
472 .iter()
473 .map(|item| {
474 // Safe to unwrap: all_objects guarantees every element is Object
475 let obj = match item {
476 JValue::Object(obj) => obj,
477 _ => unreachable!(),
478 };
479 let dict = PyDict::new(py);
480 for (key_str, py_key) in &interned_keys {
481 if let Some(value) = obj.get(*key_str) {
482 dict.set_item(py_key.bind(py), json_to_python(py, value)?)?;
483 }
484 }
485 // Handle any extra keys not in first object
486 for (key, value) in obj.iter() {
487 if !first_obj.contains_key(key) {
488 dict.set_item(key, json_to_python(py, value)?)?;
489 }
490 }
491 Ok(dict.unbind().into())
492 })
493 .collect::<PyResult<Vec<_>>>()?;
494 return Ok(PyList::new(py, &items)?.unbind().into());
495 }
496
497 // General array: batch construction
498 let items: Vec<Py<PyAny>> = arr
499 .iter()
500 .map(|item| json_to_python(py, item))
501 .collect::<PyResult<Vec<_>>>()?;
502 Ok(PyList::new(py, &items)?.unbind().into())
503 }
504
505 JValue::Object(obj) => {
506 let dict = PyDict::new(py);
507 for (key, value) in obj.iter() {
508 dict.set_item(key, json_to_python(py, value)?)?;
509 }
510 Ok(dict.unbind().into())
511 }
512
513 JValue::Lambda { .. } | JValue::Builtin { .. } | JValue::Regex { .. } => Ok(py.None()),
514 }
515}
516
517/// Create an evaluator, optionally configured with Python bindings
518#[cfg(feature = "python")]
519fn create_evaluator(py: Python, bindings: Option<Py<PyAny>>) -> PyResult<evaluator::Evaluator> {
520 if let Some(bindings_obj) = bindings {
521 let bindings_json = python_to_json(py, &bindings_obj)?;
522
523 let mut context = evaluator::Context::new();
524 if let JValue::Object(map) = bindings_json {
525 for (key, value) in map.iter() {
526 context.bind(key.clone(), value.clone());
527 }
528 } else {
529 return Err(PyTypeError::new_err("bindings must be a dictionary"));
530 }
531 Ok(evaluator::Evaluator::with_context(context))
532 } else {
533 Ok(evaluator::Evaluator::new())
534 }
535}
536
537/// Convert an EvaluatorError to a PyErr
538#[cfg(feature = "python")]
539fn evaluator_error_to_py(e: evaluator::EvaluatorError) -> PyErr {
540 match e {
541 evaluator::EvaluatorError::TypeError(msg) => PyValueError::new_err(msg),
542 evaluator::EvaluatorError::ReferenceError(msg) => PyValueError::new_err(msg),
543 evaluator::EvaluatorError::EvaluationError(msg) => PyValueError::new_err(msg),
544 }
545}
546
547/// JSONata Python module
548#[cfg(feature = "python")]
549#[pymodule]
550fn _jsonatapy(m: &Bound<'_, PyModule>) -> PyResult<()> {
551 m.add_function(wrap_pyfunction!(compile, m)?)?;
552 m.add_function(wrap_pyfunction!(evaluate, m)?)?;
553 m.add_class::<JsonataExpression>()?;
554 m.add_class::<JsonataData>()?;
555
556 // Add version info
557 m.add("__version__", env!("CARGO_PKG_VERSION"))?;
558 m.add("__jsonata_version__", JSONATA_REFERENCE_VERSION)?;
559
560 Ok(())
561}
562
563#[cfg(test)]
564mod tests {
565 #[test]
566 fn test_module_creation() {
567 // Basic smoke test
568 assert!(!env!("CARGO_PKG_VERSION").is_empty());
569 }
570}