jetro_core/lib.rs
1//! Jetro core — parser, compiler, and VM for the Jetro JSON query language.
2//!
3//! This crate is storage-free. For the embedded B+ tree store, named
4//! expressions, graph queries, joins, and [`Session`](../jetrodb/struct.Session.html),
5//! depend on the sibling `jetrodb` crate, or pull the umbrella `jetro` crate
6//! which re-exports both.
7//!
8//! # Architecture
9//!
10//! ```text
11//! source text
12//! │
13//! ▼
14//! parser.rs ── pest grammar → [ast::Expr] tree
15//! │
16//! ▼
17//! vm::Compiler::emit ── Expr → Vec<Opcode>
18//! │
19//! ▼
20//! vm::Compiler::optimize ── peephole passes (root_chain, filter/count,
21//! filter/map fusion, strength reduction,
22//! constant folding, nullness-driven specialisation)
23//! │
24//! ▼
25//! Compiler::compile runs:
26//! • AST rewrite: reorder_and_operands (selectivity-based)
27//! • post-pass : analysis::dedup_subprograms (CSE on Arc<Program>)
28//! │
29//! ▼
30//! vm::VM::execute ── stack machine over &serde_json::Value
31//! with thread-local pointer cache.
32//! ```
33//!
34//! # Quick start
35//!
36//! ```rust
37//! use jetro_core::Jetro;
38//! use serde_json::json;
39//!
40//! let j = Jetro::new(json!({
41//! "store": {
42//! "books": [
43//! {"title": "Dune", "price": 12.99},
44//! {"title": "Foundation", "price": 9.99}
45//! ]
46//! }
47//! }));
48//!
49//! let count = j.collect("$.store.books.len()").unwrap();
50//! assert_eq!(count, json!(2));
51//! ```
52
53pub mod ast;
54pub mod engine;
55pub mod eval;
56pub mod expr;
57pub mod graph;
58pub mod parser;
59pub mod vm;
60pub mod analysis;
61pub mod schema;
62pub mod plan;
63pub mod cfg;
64pub mod ssa;
65pub mod scan;
66pub mod strref;
67
68#[cfg(test)]
69mod tests;
70#[cfg(test)]
71mod examples;
72
73use std::cell::{OnceCell, RefCell};
74use std::sync::Arc;
75use serde_json::Value;
76use eval::Val;
77
78pub use engine::Engine;
79pub use eval::EvalError;
80pub use eval::{Method, MethodRegistry, Val as JetroVal};
81pub use expr::Expr;
82pub use graph::Graph;
83pub use parser::ParseError;
84pub use vm::{VM, Compiler, Program};
85
86/// Trait implemented by `#[derive(JetroSchema)]` — pairs a type with a
87/// fixed set of named expressions.
88///
89/// ```ignore
90/// use jetro_core::JetroSchema;
91///
92/// #[derive(JetroSchema)]
93/// #[expr(titles = "$.books.map(title)")]
94/// #[expr(count = "$.books.len()")]
95/// struct BookView;
96///
97/// for (name, src) in BookView::exprs() { /* register on a bucket */ }
98/// ```
99pub trait JetroSchema {
100 const EXPRS: &'static [(&'static str, &'static str)];
101 fn exprs() -> &'static [(&'static str, &'static str)];
102 fn names() -> &'static [&'static str];
103}
104
105// ── Error ─────────────────────────────────────────────────────────────────────
106
107/// Engine-side error type. Either a parse failure or an evaluation failure.
108///
109/// Storage and IO errors are carried by `jetrodb::DbError` in the sibling
110/// crate. The umbrella `jetro` crate unifies both into a flatter
111/// `jetro::Error` for callers that want a single match arm per variant.
112#[derive(Debug)]
113pub enum Error {
114 Parse(ParseError),
115 Eval(EvalError),
116}
117
118pub type Result<T> = std::result::Result<T, Error>;
119
120impl std::fmt::Display for Error {
121 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
122 match self {
123 Error::Parse(e) => write!(f, "{}", e),
124 Error::Eval(e) => write!(f, "{}", e),
125 }
126 }
127}
128impl std::error::Error for Error {
129 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
130 match self {
131 Error::Parse(e) => Some(e),
132 Error::Eval(_) => None,
133 }
134 }
135}
136
137impl From<ParseError> for Error { fn from(e: ParseError) -> Self { Error::Parse(e) } }
138impl From<EvalError> for Error { fn from(e: EvalError) -> Self { Error::Eval(e) } }
139
140/// Evaluate a Jetro expression against a JSON value.
141pub fn query(expr: &str, doc: &Value) -> Result<Value> {
142 let ast = parser::parse(expr)?;
143 Ok(eval::evaluate(&ast, doc)?)
144}
145
146/// Evaluate a Jetro expression with a custom method registry.
147pub fn query_with(expr: &str, doc: &Value, registry: Arc<MethodRegistry>) -> Result<Value> {
148 let ast = parser::parse(expr)?;
149 Ok(eval::evaluate_with(&ast, doc, registry)?)
150}
151
152// ── Jetro ─────────────────────────────────────────────────────────────────────
153
154thread_local! {
155 static THREAD_VM: RefCell<VM> = RefCell::new(VM::new());
156}
157
158/// Primary entry point for evaluating Jetro expressions.
159///
160/// Holds a JSON document and evaluates expressions against it. Internally
161/// delegates to a thread-local [`VM`] so the compile cache and resolution
162/// cache accumulate over the lifetime of the thread.
163pub struct Jetro {
164 document: Value,
165 /// Cached `Val` tree — built on first `collect()` and reused across
166 /// subsequent calls, amortising the `Val::from(&Value)` walk.
167 root_val: OnceCell<Val>,
168 /// Retained JSON source bytes when the caller built via
169 /// [`Jetro::from_bytes`] / [`Jetro::from_slice`]. Enables SIMD
170 /// byte-scan fast paths for `$..key` queries.
171 raw_bytes: Option<Arc<[u8]>>,
172}
173
174impl Jetro {
175 pub fn new(document: Value) -> Self {
176 Self { document, root_val: OnceCell::new(), raw_bytes: None }
177 }
178
179 /// Parse JSON bytes and retain them alongside the parsed document.
180 /// Descendant queries (`$..key`) can then take the SIMD byte-scan path
181 /// instead of walking the tree.
182 pub fn from_bytes(bytes: Vec<u8>) -> std::result::Result<Self, serde_json::Error> {
183 let document: Value = serde_json::from_slice(&bytes)?;
184 Ok(Self {
185 document,
186 root_val: OnceCell::new(),
187 raw_bytes: Some(Arc::from(bytes.into_boxed_slice())),
188 })
189 }
190
191 /// Parse JSON from a slice, retaining a copy of the bytes.
192 pub fn from_slice(bytes: &[u8]) -> std::result::Result<Self, serde_json::Error> {
193 Self::from_bytes(bytes.to_vec())
194 }
195
196 fn root_val(&self) -> Val {
197 self.root_val.get_or_init(|| Val::from(&self.document)).clone()
198 }
199
200 /// Evaluate `expr` against the document. Routes through the thread-local
201 /// VM (compile + path caches); when the Jetro handle carries raw bytes
202 /// the VM executes on an env with `raw_bytes` set so `Opcode::Descendant`
203 /// can take the SIMD byte-scan fast path.
204 pub fn collect<S: AsRef<str>>(&self, expr: S) -> std::result::Result<Value, EvalError> {
205 let expr = expr.as_ref();
206 THREAD_VM.with(|cell| match (cell.try_borrow_mut(), &self.raw_bytes) {
207 (Ok(mut vm), Some(bytes)) => {
208 let prog = vm.get_or_compile(expr)?;
209 vm.execute_val_with_raw(&prog, self.root_val(), Arc::clone(bytes))
210 }
211 (Ok(mut vm), None) => {
212 let prog = vm.get_or_compile(expr)?;
213 vm.execute_val(&prog, self.root_val())
214 }
215 (Err(_), Some(bytes)) => VM::new().run_str_with_raw(expr, &self.document, Arc::clone(bytes)),
216 (Err(_), None) => VM::new().run_str(expr, &self.document),
217 })
218 }
219
220 /// Evaluate `expr` and return the raw `Val` without converting to
221 /// `serde_json::Value`. For large structural results (e.g. `group_by`
222 /// on 20k+ items) this avoids an expensive materialisation that
223 /// otherwise dominates runtime. The returned `Val` supports cheap
224 /// `Arc`-clone and shares structure with the source document.
225 ///
226 /// Prefer this over `collect` when the caller consumes the result
227 /// structurally (further queries, custom walk, re-evaluation) rather
228 /// than handing it to `serde_json`-aware code.
229 pub fn collect_val<S: AsRef<str>>(&self, expr: S) -> std::result::Result<JetroVal, EvalError> {
230 let expr = expr.as_ref();
231 THREAD_VM.with(|cell| {
232 let mut vm = cell.try_borrow_mut().map_err(|_| EvalError("VM in use".into()))?;
233 let prog = vm.get_or_compile(expr)?;
234 vm.execute_val_raw(&prog, self.root_val())
235 })
236 }
237}
238
239impl From<Value> for Jetro {
240 fn from(v: Value) -> Self {
241 Self::new(v)
242 }
243}