Skip to main content

jetro_core/
lib.rs

1//! Jetro core — parser, compiler, and VM for the Jetro JSON query language.
2//!
3//! This crate is storage-free.  For the embedded B+ tree store, named
4//! expressions, graph queries, joins, and [`Session`](../jetrodb/struct.Session.html),
5//! depend on the sibling `jetrodb` crate, or pull the umbrella `jetro` crate
6//! which re-exports both.
7//!
8//! # Architecture
9//!
10//! ```text
11//!   source text
12//!       │
13//!       ▼
14//!   parser.rs  ── pest grammar → [ast::Expr] tree
15//!       │
16//!       ▼
17//!   vm::Compiler::emit      ── Expr → Vec<Opcode>
18//!       │
19//!       ▼
20//!   vm::Compiler::optimize  ── peephole passes (root_chain, filter/count,
21//!                              filter/map fusion, strength reduction,
22//!                              constant folding, nullness-driven specialisation)
23//!       │
24//!       ▼
25//!   Compiler::compile runs:
26//!       • AST rewrite: reorder_and_operands        (selectivity-based)
27//!       • post-pass  : analysis::dedup_subprograms (CSE on Arc<Program>)
28//!       │
29//!       ▼
30//!   vm::VM::execute          ── stack machine over &serde_json::Value
31//!                                with thread-local pointer cache.
32//! ```
33//!
34//! # Quick start
35//!
36//! ```rust
37//! use jetro_core::Jetro;
38//! use serde_json::json;
39//!
40//! let j = Jetro::new(json!({
41//!     "store": {
42//!         "books": [
43//!             {"title": "Dune",        "price": 12.99},
44//!             {"title": "Foundation",  "price":  9.99}
45//!         ]
46//!     }
47//! }));
48//!
49//! let count = j.collect("$.store.books.len()").unwrap();
50//! assert_eq!(count, json!(2));
51//! ```
52
53pub mod ast;
54pub mod engine;
55pub mod eval;
56pub mod expr;
57pub mod graph;
58pub mod parser;
59pub mod vm;
60pub mod analysis;
61pub mod schema;
62pub mod plan;
63pub mod cfg;
64pub mod ssa;
65pub mod scan;
66pub mod strref;
67
68#[cfg(test)]
69mod tests;
70#[cfg(test)]
71mod examples;
72
73use std::cell::{OnceCell, RefCell};
74use std::sync::Arc;
75use serde_json::Value;
76use eval::Val;
77
78pub use engine::Engine;
79pub use eval::EvalError;
80pub use eval::{Method, MethodRegistry, Val as JetroVal};
81pub use expr::Expr;
82pub use graph::Graph;
83pub use parser::ParseError;
84pub use vm::{VM, Compiler, Program};
85
86/// Trait implemented by `#[derive(JetroSchema)]` — pairs a type with a
87/// fixed set of named expressions.
88///
89/// ```ignore
90/// use jetro_core::JetroSchema;
91///
92/// #[derive(JetroSchema)]
93/// #[expr(titles = "$.books.map(title)")]
94/// #[expr(count  = "$.books.len()")]
95/// struct BookView;
96///
97/// for (name, src) in BookView::exprs() { /* register on a bucket */ }
98/// ```
99pub trait JetroSchema {
100    const EXPRS: &'static [(&'static str, &'static str)];
101    fn exprs() -> &'static [(&'static str, &'static str)];
102    fn names() -> &'static [&'static str];
103}
104
105// ── Error ─────────────────────────────────────────────────────────────────────
106
107/// Engine-side error type.  Either a parse failure or an evaluation failure.
108///
109/// Storage and IO errors are carried by `jetrodb::DbError` in the sibling
110/// crate.  The umbrella `jetro` crate unifies both into a flatter
111/// `jetro::Error` for callers that want a single match arm per variant.
112#[derive(Debug)]
113pub enum Error {
114    Parse(ParseError),
115    Eval(EvalError),
116}
117
118pub type Result<T> = std::result::Result<T, Error>;
119
120impl std::fmt::Display for Error {
121    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
122        match self {
123            Error::Parse(e) => write!(f, "{}", e),
124            Error::Eval(e)  => write!(f, "{}", e),
125        }
126    }
127}
128impl std::error::Error for Error {
129    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
130        match self {
131            Error::Parse(e) => Some(e),
132            Error::Eval(_)  => None,
133        }
134    }
135}
136
137impl From<ParseError> for Error { fn from(e: ParseError) -> Self { Error::Parse(e) } }
138impl From<EvalError>  for Error { fn from(e: EvalError)  -> Self { Error::Eval(e)  } }
139
140/// Evaluate a Jetro expression against a JSON value.
141pub fn query(expr: &str, doc: &Value) -> Result<Value> {
142    let ast = parser::parse(expr)?;
143    Ok(eval::evaluate(&ast, doc)?)
144}
145
146/// Evaluate a Jetro expression with a custom method registry.
147pub fn query_with(expr: &str, doc: &Value, registry: Arc<MethodRegistry>) -> Result<Value> {
148    let ast = parser::parse(expr)?;
149    Ok(eval::evaluate_with(&ast, doc, registry)?)
150}
151
152// ── Jetro ─────────────────────────────────────────────────────────────────────
153
154thread_local! {
155    static THREAD_VM: RefCell<VM> = RefCell::new(VM::new());
156}
157
158/// Primary entry point for evaluating Jetro expressions.
159///
160/// Holds a JSON document and evaluates expressions against it.  Internally
161/// delegates to a thread-local [`VM`] so the compile cache and resolution
162/// cache accumulate over the lifetime of the thread.
163pub struct Jetro {
164    document: Value,
165    /// Cached `Val` tree — built on first `collect()` and reused across
166    /// subsequent calls, amortising the `Val::from(&Value)` walk.
167    root_val: OnceCell<Val>,
168    /// Retained JSON source bytes when the caller built via
169    /// [`Jetro::from_bytes`] / [`Jetro::from_slice`].  Enables SIMD
170    /// byte-scan fast paths for `$..key` queries.
171    raw_bytes: Option<Arc<[u8]>>,
172}
173
174impl Jetro {
175    pub fn new(document: Value) -> Self {
176        Self { document, root_val: OnceCell::new(), raw_bytes: None }
177    }
178
179    /// Parse JSON bytes and retain them alongside the parsed document.
180    /// Descendant queries (`$..key`) can then take the SIMD byte-scan path
181    /// instead of walking the tree.
182    pub fn from_bytes(bytes: Vec<u8>) -> std::result::Result<Self, serde_json::Error> {
183        let document: Value = serde_json::from_slice(&bytes)?;
184        Ok(Self {
185            document,
186            root_val: OnceCell::new(),
187            raw_bytes: Some(Arc::from(bytes.into_boxed_slice())),
188        })
189    }
190
191    /// Parse JSON from a slice, retaining a copy of the bytes.
192    pub fn from_slice(bytes: &[u8]) -> std::result::Result<Self, serde_json::Error> {
193        Self::from_bytes(bytes.to_vec())
194    }
195
196    fn root_val(&self) -> Val {
197        self.root_val.get_or_init(|| Val::from(&self.document)).clone()
198    }
199
200    /// Evaluate `expr` against the document.  Routes through the thread-local
201    /// VM (compile + path caches); when the Jetro handle carries raw bytes
202    /// the VM executes on an env with `raw_bytes` set so `Opcode::Descendant`
203    /// can take the SIMD byte-scan fast path.
204    pub fn collect<S: AsRef<str>>(&self, expr: S) -> std::result::Result<Value, EvalError> {
205        let expr = expr.as_ref();
206        THREAD_VM.with(|cell| match (cell.try_borrow_mut(), &self.raw_bytes) {
207            (Ok(mut vm), Some(bytes)) => {
208                let prog = vm.get_or_compile(expr)?;
209                vm.execute_val_with_raw(&prog, self.root_val(), Arc::clone(bytes))
210            }
211            (Ok(mut vm), None) => {
212                let prog = vm.get_or_compile(expr)?;
213                vm.execute_val(&prog, self.root_val())
214            }
215            (Err(_), Some(bytes)) => VM::new().run_str_with_raw(expr, &self.document, Arc::clone(bytes)),
216            (Err(_), None)        => VM::new().run_str(expr, &self.document),
217        })
218    }
219
220    /// Evaluate `expr` and return the raw `Val` without converting to
221    /// `serde_json::Value`.  For large structural results (e.g. `group_by`
222    /// on 20k+ items) this avoids an expensive materialisation that
223    /// otherwise dominates runtime.  The returned `Val` supports cheap
224    /// `Arc`-clone and shares structure with the source document.
225    ///
226    /// Prefer this over `collect` when the caller consumes the result
227    /// structurally (further queries, custom walk, re-evaluation) rather
228    /// than handing it to `serde_json`-aware code.
229    pub fn collect_val<S: AsRef<str>>(&self, expr: S) -> std::result::Result<JetroVal, EvalError> {
230        let expr = expr.as_ref();
231        THREAD_VM.with(|cell| {
232            let mut vm = cell.try_borrow_mut().map_err(|_| EvalError("VM in use".into()))?;
233            let prog = vm.get_or_compile(expr)?;
234            vm.execute_val_raw(&prog, self.root_val())
235        })
236    }
237}
238
239impl From<Value> for Jetro {
240    fn from(v: Value) -> Self {
241        Self::new(v)
242    }
243}