Skip to main content

jetro_core/
lib.rs

1//! Jetro core — parser, compiler, and VM for the Jetro JSON query language.
2//!
3//! # Execution path
4//!
5//! ```text
6//! source text
7//!   │  parse::parser::parse() → Expr AST
8//!   │  plan::physical::plan_query() → QueryPlan (physical IR)
9//!   │  exec::router::collect_*() → dispatches to:
10//!   │    StructuralIndex backend  (jetro-experimental bitmap)
11//!   │    ViewPipeline backend     (borrowed tape/Val navigation)
12//!   │    Pipeline backend         (pull-based composed stages)
13//!   └─  VM fallback               (bytecode stack machine)
14//! ```
15//!
16//! # Quick start
17//!
18//! ```rust
19//! use jetro_core::Jetro;
20//! let j = Jetro::from_bytes(br#"{"books":[{"price":12}]}"#.to_vec()).unwrap();
21//! assert_eq!(j.collect("$.books.len()").unwrap(), serde_json::json!(1));
22//! ```
23//!
24//! ```rust
25//! use jetro_core::JetroEngine;
26//! use std::io::Cursor;
27//!
28//! let engine = JetroEngine::new();
29//! let rows = Cursor::new(br#"{"name":"Ada"}
30//! {"name":"Bob"}
31//! "#);
32//! let names = engine.collect_ndjson(rows, "name").unwrap();
33//! assert_eq!(names, vec![serde_json::json!("Ada"), serde_json::json!("Bob")]);
34//! ```
35//!
36//! Match-limited NDJSON helpers evaluate a predicate per row, return the
37//! original full row for truthy matches, and stop after the requested number of
38//! matches:
39//!
40//! ```rust
41//! use jetro_core::JetroEngine;
42//! use std::io::Cursor;
43//!
44//! let engine = JetroEngine::new();
45//! let rows = Cursor::new(br#"{"id":1,"active":true}
46//! {"id":2,"active":false}
47//! {"id":3,"active":true}
48//! "#);
49//! let first_two = engine.collect_ndjson_matches(rows, "active", 2).unwrap();
50//! assert_eq!(first_two, vec![
51//!     serde_json::json!({"id": 1, "active": true}),
52//!     serde_json::json!({"id": 3, "active": true}),
53//! ]);
54//! ```
55
56pub(crate) mod builtins;
57pub(crate) mod compile;
58pub(crate) mod data;
59pub(crate) mod exec;
60pub mod io;
61pub(crate) mod ir;
62pub(crate) mod parse;
63pub(crate) mod plan;
64pub(crate) mod util;
65pub(crate) mod vm;
66
67#[cfg(test)]
68mod tests;
69
70use data::value::Val;
71use serde_json::Value;
72use std::cell::{OnceCell, RefCell};
73use std::collections::HashMap;
74use std::sync::Arc;
75use std::sync::Mutex;
76
77pub use data::context::EvalError;
78#[cfg(test)]
79use parse::parser::ParseError;
80use vm::VM;
81
82/// Internal parser surface re-exported only when the `fuzz_internal` feature
83/// is enabled. Used by the `cargo-fuzz` harness to reach the PEG parser
84/// without going through `Jetro::collect`. NOT a stable public API.
85#[cfg(feature = "fuzz_internal")]
86pub mod __fuzz_internal {
87    pub use crate::parse::parser::{parse, ParseError};
88    pub use crate::plan::physical::plan_query;
89}
90
91#[cfg(test)]
92#[derive(Debug)]
93pub(crate) enum Error {
94    Parse(ParseError),
95    Eval(EvalError),
96}
97
98#[cfg(test)]
99impl std::fmt::Display for Error {
100    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
101        match self {
102            Error::Parse(e) => write!(f, "{}", e),
103            Error::Eval(e) => write!(f, "{}", e),
104        }
105    }
106}
107#[cfg(test)]
108impl std::error::Error for Error {
109    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
110        match self {
111            Error::Parse(e) => Some(e),
112            Error::Eval(_) => None,
113        }
114    }
115}
116
117#[cfg(test)]
118impl From<ParseError> for Error {
119    fn from(e: ParseError) -> Self {
120        Error::Parse(e)
121    }
122}
123#[cfg(test)]
124impl From<EvalError> for Error {
125    fn from(e: EvalError) -> Self {
126        Error::Eval(e)
127    }
128}
129
130/// Primary entry point. Holds a JSON document and evaluates expressions against
131/// it. Lazy fields (`root_val`, `tape`, `structural_index`, `objvec_cache`)
132/// are populated on first use so callers only pay for the representations a
133/// particular query actually needs.
134pub struct Jetro {
135    /// The `serde_json::Value` root document; unused when `simd-json` is enabled
136    /// (the tape is the authoritative source in that case).
137    document: Value,
138    /// Cached `Val` tree — built once and reused across `collect()` calls.
139    root_val: OnceCell<Val>,
140    /// Retained raw bytes for lazy tape and structural-index materialisation.
141    raw_bytes: Option<Arc<[u8]>>,
142
143    /// Lazily parsed simd-json tape; `Err` is cached to avoid re-parsing after failure.
144    #[cfg(feature = "simd-json")]
145    tape: OnceCell<std::result::Result<Arc<crate::data::tape::TapeData>, String>>,
146    /// Unused placeholder so the field name is consistent regardless of features.
147    #[cfg(not(feature = "simd-json"))]
148    #[allow(dead_code)]
149    tape: OnceCell<()>,
150
151    /// Lazily built bitmap structural index for accelerated key-presence queries.
152    structural_index:
153        OnceCell<std::result::Result<Arc<jetro_experimental::StructuralIndex>, String>>,
154
155    /// Per-document cache from `Arc<Vec<Val>>` pointer addresses to promoted
156    /// `ObjVecData` columnar representations; keyed by pointer to avoid re-promotion.
157    pub(crate) objvec_cache:
158        std::sync::Mutex<std::collections::HashMap<usize, Arc<crate::data::value::ObjVecData>>>,
159
160    /// Per-document VM cache used by `Jetro::collect`; not shared across document handles.
161    vm: RefCell<VM>,
162}
163
164/// Long-lived multi-document query engine with an explicit plan cache.
165/// Use when the same process evaluates many expressions over many documents —
166/// parse/lower/compile work is amortised by this object, not hidden in
167/// thread-local state.
168pub struct JetroEngine {
169    /// Maps `"<context_key>\0<expr>"` to compiled `QueryPlan`; evicted wholesale when full.
170    plan_cache: Mutex<HashMap<String, ir::physical::QueryPlan>>,
171    /// Maximum number of entries before the cache is cleared; 0 disables caching.
172    plan_cache_limit: usize,
173    /// The shared `VM` used by all `collect*` calls on this engine instance.
174    vm: Mutex<VM>,
175    /// Engine-owned JSON object-key intern cache. Used by [`JetroEngine::parse_value`]
176    /// and [`JetroEngine::parse_bytes`] (and the `collect_*` shortcuts that go through
177    /// them) so each engine instance has an isolated key cache. Documents built via
178    /// the standalone `Jetro::from_bytes`/`From<serde_json::Value>` paths use the
179    /// process-wide [`crate::data::intern::default_cache`] instead.
180    keys: Arc<crate::data::intern::KeyCache>,
181}
182
183/// Error returned by `JetroEngine::collect_bytes` and similar methods that
184/// may fail during JSON parsing or during expression evaluation.
185#[derive(Debug)]
186pub enum JetroEngineError {
187    /// JSON parsing failed before evaluation could begin.
188    Json(serde_json::Error),
189    /// Reading from a stream or writing results failed.
190    Io(std::io::Error),
191    /// NDJSON row parsing failed with row context.
192    Ndjson(io::RowError),
193    /// Expression evaluation failed (the JSON was valid but the query errored).
194    Eval(EvalError),
195}
196
197impl std::fmt::Display for JetroEngineError {
198    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
199        match self {
200            Self::Json(err) => write!(f, "{}", err),
201            Self::Io(err) => write!(f, "{}", err),
202            Self::Ndjson(err) => write!(f, "{}", err),
203            Self::Eval(err) => write!(f, "{}", err),
204        }
205    }
206}
207
208impl std::error::Error for JetroEngineError {
209    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
210        match self {
211            Self::Json(err) => Some(err),
212            Self::Io(err) => Some(err),
213            Self::Ndjson(err) => Some(err),
214            Self::Eval(_) => None,
215        }
216    }
217}
218
219impl From<serde_json::Error> for JetroEngineError {
220    fn from(err: serde_json::Error) -> Self {
221        Self::Json(err)
222    }
223}
224
225impl From<std::io::Error> for JetroEngineError {
226    fn from(err: std::io::Error) -> Self {
227        Self::Io(err)
228    }
229}
230
231impl From<io::RowError> for JetroEngineError {
232    fn from(err: io::RowError) -> Self {
233        Self::Ndjson(err)
234    }
235}
236
237impl From<EvalError> for JetroEngineError {
238    fn from(err: EvalError) -> Self {
239        Self::Eval(err)
240    }
241}
242
243impl Default for JetroEngine {
244    fn default() -> Self {
245        Self::new()
246    }
247}
248
249impl JetroEngine {
250    /// Default maximum plan-cache size; the cache is cleared wholesale when reached.
251    const DEFAULT_PLAN_CACHE_LIMIT: usize = 256;
252
253    /// Create a `JetroEngine` with the default plan-cache limit of 256 entries.
254    pub fn new() -> Self {
255        Self::with_plan_cache_limit(Self::DEFAULT_PLAN_CACHE_LIMIT)
256    }
257
258    /// Create a `JetroEngine` with an explicit plan-cache capacity.
259    /// Set `plan_cache_limit` to 0 to disable caching entirely.
260    pub fn with_plan_cache_limit(plan_cache_limit: usize) -> Self {
261        Self {
262            plan_cache: Mutex::new(HashMap::new()),
263            plan_cache_limit,
264            vm: Mutex::new(VM::new()),
265            keys: crate::data::intern::KeyCache::new(),
266        }
267    }
268
269    /// Borrow this engine's JSON key-intern cache.
270    pub fn keys(&self) -> &Arc<crate::data::intern::KeyCache> {
271        &self.keys
272    }
273
274    /// Discard all cached query plans and the engine's key-intern cache,
275    /// forcing re-compilation and re-interning on the next call.
276    pub fn clear_cache(&self) {
277        self.plan_cache.lock().expect("plan cache poisoned").clear();
278        self.keys.clear();
279    }
280
281    /// Build a `Jetro` document from a `serde_json::Value` with object keys
282    /// interned into this engine's key cache. Use this in place of
283    /// `Jetro::from(...)` / the `From<serde_json::Value>` impl when
284    /// per-engine key isolation is required.
285    pub fn parse_value(&self, document: Value) -> Jetro {
286        let root = Val::from_value_with(&self.keys, &document);
287        Jetro::from_val_and_value(root, document)
288    }
289
290    /// Parse raw JSON bytes into a `Jetro` document with object keys
291    /// interned into this engine's key cache. With `simd-json`, the tape
292    /// is materialised eagerly so interning happens once at parse time
293    /// (subsequent `collect` calls reuse the cached `Val` tree).
294    pub fn parse_bytes(&self, bytes: Vec<u8>) -> std::result::Result<Jetro, JetroEngineError> {
295        let document = Jetro::from_bytes(bytes)?;
296        // Force materialisation so keys are interned through this
297        // engine's cache rather than the default thread-local one when
298        // `collect` later asks for `root_val`.
299        let _ = document.root_val_with(&self.keys)?;
300        Ok(document)
301    }
302
303    /// Parse raw JSON bytes into a `Jetro` document without forcing the `Val`
304    /// tree. This keeps byte-backed callers eligible for tape/view execution;
305    /// object keys are interned only if execution later materialises the row.
306    pub(crate) fn parse_bytes_lazy(
307        &self,
308        bytes: Vec<u8>,
309    ) -> std::result::Result<Jetro, JetroEngineError> {
310        Ok(Jetro::from_bytes(bytes)?)
311    }
312
313    /// Evaluate a Jetro expression against an already-constructed `Jetro` document,
314    /// using the engine's shared plan cache and `VM`.
315    pub fn collect<S: AsRef<str>>(
316        &self,
317        document: &Jetro,
318        expr: S,
319    ) -> std::result::Result<Value, EvalError> {
320        let plan = self.cached_plan(expr.as_ref(), exec::router::planning_context(document));
321        self.collect_prepared(document, &plan)
322    }
323
324    pub(crate) fn collect_prepared(
325        &self,
326        document: &Jetro,
327        plan: &ir::physical::QueryPlan,
328    ) -> std::result::Result<Value, EvalError> {
329        self.collect_prepared_val(document, plan).map(Value::from)
330    }
331
332    pub(crate) fn collect_prepared_val(
333        &self,
334        document: &Jetro,
335        plan: &ir::physical::QueryPlan,
336    ) -> std::result::Result<Val, EvalError> {
337        let mut vm = self.vm.lock().expect("vm cache poisoned");
338        exec::router::collect_plan_val_with_vm(document, plan, &mut vm)
339    }
340
341    pub(crate) fn lock_vm(&self) -> std::sync::MutexGuard<'_, VM> {
342        self.vm.lock().expect("vm cache poisoned")
343    }
344
345    /// Convenience wrapper: wrap a `serde_json::Value` in a `Jetro` and evaluate `expr`.
346    /// Routes through [`JetroEngine::parse_value`] so the document's object keys are
347    /// interned into this engine's key cache.
348    pub fn collect_value<S: AsRef<str>>(
349        &self,
350        document: Value,
351        expr: S,
352    ) -> std::result::Result<Value, EvalError> {
353        let document = self.parse_value(document);
354        self.collect(&document, expr)
355    }
356
357    /// Parse raw JSON bytes into a `Jetro` document and evaluate `expr`,
358    /// returning a `JetroEngineError` on either parse or evaluation failure.
359    /// Routes through [`JetroEngine::parse_bytes`] so the document's object keys
360    /// are interned into this engine's key cache.
361    pub fn collect_bytes<S: AsRef<str>>(
362        &self,
363        bytes: Vec<u8>,
364        expr: S,
365    ) -> std::result::Result<Value, JetroEngineError> {
366        let document = self.parse_bytes(bytes)?;
367        Ok(self.collect(&document, expr)?)
368    }
369
370    /// Evaluate `query` independently for every non-empty NDJSON row and write
371    /// one JSON result per output line.
372    pub fn run_ndjson<R, W>(
373        &self,
374        reader: R,
375        query: &str,
376        writer: W,
377    ) -> std::result::Result<usize, JetroEngineError>
378    where
379        R: std::io::BufRead,
380        W: std::io::Write,
381    {
382        io::run_ndjson(self, reader, query, writer)
383    }
384
385    /// Open an NDJSON file and evaluate `query` independently for every
386    /// non-empty row, writing one JSON result per output line.
387    pub fn run_ndjson_file<P, W>(
388        &self,
389        path: P,
390        query: &str,
391        writer: W,
392    ) -> std::result::Result<usize, JetroEngineError>
393    where
394        P: AsRef<std::path::Path>,
395        W: std::io::Write,
396    {
397        io::run_ndjson_file(self, path, query, writer)
398    }
399
400    /// Like [`JetroEngine::run_ndjson_file`] with explicit NDJSON reader options.
401    pub fn run_ndjson_file_with_options<P, W>(
402        &self,
403        path: P,
404        query: &str,
405        writer: W,
406        options: io::NdjsonOptions,
407    ) -> std::result::Result<usize, JetroEngineError>
408    where
409        P: AsRef<std::path::Path>,
410        W: std::io::Write,
411    {
412        io::run_ndjson_file_with_options(self, path, query, writer, options)
413    }
414
415    /// Open an NDJSON file, write at most `limit` query results, and stop reading.
416    pub fn run_ndjson_file_limit<P, W>(
417        &self,
418        path: P,
419        query: &str,
420        limit: usize,
421        writer: W,
422    ) -> std::result::Result<usize, JetroEngineError>
423    where
424        P: AsRef<std::path::Path>,
425        W: std::io::Write,
426    {
427        io::run_ndjson_file_limit(self, path, query, limit, writer)
428    }
429
430    /// Like [`JetroEngine::run_ndjson_file_limit`] with explicit NDJSON reader options.
431    pub fn run_ndjson_file_limit_with_options<P, W>(
432        &self,
433        path: P,
434        query: &str,
435        limit: usize,
436        writer: W,
437        options: io::NdjsonOptions,
438    ) -> std::result::Result<usize, JetroEngineError>
439    where
440        P: AsRef<std::path::Path>,
441        W: std::io::Write,
442    {
443        io::run_ndjson_file_limit_with_options(self, path, query, limit, writer, options)
444    }
445
446    /// Evaluate `query` independently for every row from an [`io::NdjsonSource`].
447    pub fn run_ndjson_source<W>(
448        &self,
449        source: io::NdjsonSource,
450        query: &str,
451        writer: W,
452    ) -> std::result::Result<usize, JetroEngineError>
453    where
454        W: std::io::Write,
455    {
456        io::run_ndjson_source(self, source, query, writer)
457    }
458
459    /// Like [`JetroEngine::run_ndjson_source`] with explicit NDJSON reader options.
460    pub fn run_ndjson_source_with_options<W>(
461        &self,
462        source: io::NdjsonSource,
463        query: &str,
464        writer: W,
465        options: io::NdjsonOptions,
466    ) -> std::result::Result<usize, JetroEngineError>
467    where
468        W: std::io::Write,
469    {
470        io::run_ndjson_source_with_options(self, source, query, writer, options)
471    }
472
473    /// Evaluate `query` for rows from an [`io::NdjsonSource`], write at most
474    /// `limit` results, and stop reading.
475    pub fn run_ndjson_source_limit<W>(
476        &self,
477        source: io::NdjsonSource,
478        query: &str,
479        limit: usize,
480        writer: W,
481    ) -> std::result::Result<usize, JetroEngineError>
482    where
483        W: std::io::Write,
484    {
485        io::run_ndjson_source_limit(self, source, query, limit, writer)
486    }
487
488    /// Like [`JetroEngine::run_ndjson_source_limit`] with explicit NDJSON reader options.
489    pub fn run_ndjson_source_limit_with_options<W>(
490        &self,
491        source: io::NdjsonSource,
492        query: &str,
493        limit: usize,
494        writer: W,
495        options: io::NdjsonOptions,
496    ) -> std::result::Result<usize, JetroEngineError>
497    where
498        W: std::io::Write,
499    {
500        io::run_ndjson_source_limit_with_options(self, source, query, limit, writer, options)
501    }
502
503    /// Read an NDJSON file from tail to head and write one query result per row.
504    pub fn run_ndjson_rev<P, W>(
505        &self,
506        path: P,
507        query: &str,
508        writer: W,
509    ) -> std::result::Result<usize, JetroEngineError>
510    where
511        P: AsRef<std::path::Path>,
512        W: std::io::Write,
513    {
514        io::run_ndjson_rev(self, path, query, writer)
515    }
516
517    /// Like [`JetroEngine::run_ndjson_rev`] with explicit NDJSON reader options.
518    pub fn run_ndjson_rev_with_options<P, W>(
519        &self,
520        path: P,
521        query: &str,
522        writer: W,
523        options: io::NdjsonOptions,
524    ) -> std::result::Result<usize, JetroEngineError>
525    where
526        P: AsRef<std::path::Path>,
527        W: std::io::Write,
528    {
529        io::run_ndjson_rev_with_options(self, path, query, writer, options)
530    }
531
532    /// Read an NDJSON file from tail to head, write at most `limit` query
533    /// results, and stop reading.
534    pub fn run_ndjson_rev_limit<P, W>(
535        &self,
536        path: P,
537        query: &str,
538        limit: usize,
539        writer: W,
540    ) -> std::result::Result<usize, JetroEngineError>
541    where
542        P: AsRef<std::path::Path>,
543        W: std::io::Write,
544    {
545        io::run_ndjson_rev_limit(self, path, query, limit, writer)
546    }
547
548    /// Like [`JetroEngine::run_ndjson_rev_limit`] with explicit NDJSON reader options.
549    pub fn run_ndjson_rev_limit_with_options<P, W>(
550        &self,
551        path: P,
552        query: &str,
553        limit: usize,
554        writer: W,
555        options: io::NdjsonOptions,
556    ) -> std::result::Result<usize, JetroEngineError>
557    where
558        P: AsRef<std::path::Path>,
559        W: std::io::Write,
560    {
561        io::run_ndjson_rev_limit_with_options(self, path, query, limit, writer, options)
562    }
563
564    /// Like [`JetroEngine::run_ndjson`] with explicit NDJSON reader options.
565    pub fn run_ndjson_with_options<R, W>(
566        &self,
567        reader: R,
568        query: &str,
569        writer: W,
570        options: io::NdjsonOptions,
571    ) -> std::result::Result<usize, JetroEngineError>
572    where
573        R: std::io::BufRead,
574        W: std::io::Write,
575    {
576        io::run_ndjson_with_options(self, reader, query, writer, options)
577    }
578
579    /// Evaluate `query` for NDJSON rows, write at most `limit` results, and stop reading.
580    pub fn run_ndjson_limit<R, W>(
581        &self,
582        reader: R,
583        query: &str,
584        limit: usize,
585        writer: W,
586    ) -> std::result::Result<usize, JetroEngineError>
587    where
588        R: std::io::BufRead,
589        W: std::io::Write,
590    {
591        io::run_ndjson_limit(self, reader, query, limit, writer)
592    }
593
594    /// Like [`JetroEngine::run_ndjson_limit`] with explicit NDJSON reader options.
595    pub fn run_ndjson_limit_with_options<R, W>(
596        &self,
597        reader: R,
598        query: &str,
599        limit: usize,
600        writer: W,
601        options: io::NdjsonOptions,
602    ) -> std::result::Result<usize, JetroEngineError>
603    where
604        R: std::io::BufRead,
605        W: std::io::Write,
606    {
607        io::run_ndjson_limit_with_options(self, reader, query, limit, writer, options)
608    }
609
610    /// Evaluate `predicate` for each NDJSON row, write matching original rows,
611    /// and stop after `limit` matches.
612    pub fn run_ndjson_matches<R, W>(
613        &self,
614        reader: R,
615        predicate: &str,
616        limit: usize,
617        writer: W,
618    ) -> std::result::Result<usize, JetroEngineError>
619    where
620        R: std::io::BufRead,
621        W: std::io::Write,
622    {
623        io::run_ndjson_matches(self, reader, predicate, limit, writer)
624    }
625
626    /// Like [`JetroEngine::run_ndjson_matches`] with explicit NDJSON reader options.
627    pub fn run_ndjson_matches_with_options<R, W>(
628        &self,
629        reader: R,
630        predicate: &str,
631        limit: usize,
632        writer: W,
633        options: io::NdjsonOptions,
634    ) -> std::result::Result<usize, JetroEngineError>
635    where
636        R: std::io::BufRead,
637        W: std::io::Write,
638    {
639        io::run_ndjson_matches_with_options(self, reader, predicate, limit, writer, options)
640    }
641
642    /// Open an NDJSON file, write matching original rows, and stop after `limit` matches.
643    pub fn run_ndjson_matches_file<P, W>(
644        &self,
645        path: P,
646        predicate: &str,
647        limit: usize,
648        writer: W,
649    ) -> std::result::Result<usize, JetroEngineError>
650    where
651        P: AsRef<std::path::Path>,
652        W: std::io::Write,
653    {
654        io::run_ndjson_matches_file(self, path, predicate, limit, writer)
655    }
656
657    /// Like [`JetroEngine::run_ndjson_matches_file`] with explicit NDJSON reader options.
658    pub fn run_ndjson_matches_file_with_options<P, W>(
659        &self,
660        path: P,
661        predicate: &str,
662        limit: usize,
663        writer: W,
664        options: io::NdjsonOptions,
665    ) -> std::result::Result<usize, JetroEngineError>
666    where
667        P: AsRef<std::path::Path>,
668        W: std::io::Write,
669    {
670        io::run_ndjson_matches_file_with_options(self, path, predicate, limit, writer, options)
671    }
672
673    /// Evaluate `predicate` against each row from an [`io::NdjsonSource`], write
674    /// matching original rows, and stop after `limit` matches.
675    pub fn run_ndjson_matches_source<W>(
676        &self,
677        source: io::NdjsonSource,
678        predicate: &str,
679        limit: usize,
680        writer: W,
681    ) -> std::result::Result<usize, JetroEngineError>
682    where
683        W: std::io::Write,
684    {
685        io::run_ndjson_matches_source(self, source, predicate, limit, writer)
686    }
687
688    /// Like [`JetroEngine::run_ndjson_matches_source`] with explicit NDJSON reader options.
689    pub fn run_ndjson_matches_source_with_options<W>(
690        &self,
691        source: io::NdjsonSource,
692        predicate: &str,
693        limit: usize,
694        writer: W,
695        options: io::NdjsonOptions,
696    ) -> std::result::Result<usize, JetroEngineError>
697    where
698        W: std::io::Write,
699    {
700        io::run_ndjson_matches_source_with_options(self, source, predicate, limit, writer, options)
701    }
702
703    /// Read an NDJSON file from tail to head, write matching original rows, and
704    /// stop after `limit` matches.
705    pub fn run_ndjson_rev_matches<P, W>(
706        &self,
707        path: P,
708        predicate: &str,
709        limit: usize,
710        writer: W,
711    ) -> std::result::Result<usize, JetroEngineError>
712    where
713        P: AsRef<std::path::Path>,
714        W: std::io::Write,
715    {
716        io::run_ndjson_rev_matches(self, path, predicate, limit, writer)
717    }
718
719    /// Like [`JetroEngine::run_ndjson_rev_matches`] with explicit NDJSON reader options.
720    pub fn run_ndjson_rev_matches_with_options<P, W>(
721        &self,
722        path: P,
723        predicate: &str,
724        limit: usize,
725        writer: W,
726        options: io::NdjsonOptions,
727    ) -> std::result::Result<usize, JetroEngineError>
728    where
729        P: AsRef<std::path::Path>,
730        W: std::io::Write,
731    {
732        io::run_ndjson_rev_matches_with_options(self, path, predicate, limit, writer, options)
733    }
734
735    /// Evaluate `query` independently for every non-empty NDJSON row and collect
736    /// the per-row results.
737    pub fn collect_ndjson<R>(
738        &self,
739        reader: R,
740        query: &str,
741    ) -> std::result::Result<Vec<Value>, JetroEngineError>
742    where
743        R: std::io::BufRead,
744    {
745        io::collect_ndjson(self, reader, query)
746    }
747
748    /// Open an NDJSON file and collect per-row query results.
749    pub fn collect_ndjson_file<P>(
750        &self,
751        path: P,
752        query: &str,
753    ) -> std::result::Result<Vec<Value>, JetroEngineError>
754    where
755        P: AsRef<std::path::Path>,
756    {
757        io::collect_ndjson_file(self, path, query)
758    }
759
760    /// Like [`JetroEngine::collect_ndjson_file`] with explicit NDJSON reader options.
761    pub fn collect_ndjson_file_with_options<P>(
762        &self,
763        path: P,
764        query: &str,
765        options: io::NdjsonOptions,
766    ) -> std::result::Result<Vec<Value>, JetroEngineError>
767    where
768        P: AsRef<std::path::Path>,
769    {
770        io::collect_ndjson_file_with_options(self, path, query, options)
771    }
772
773    /// Collect per-row query results from an [`io::NdjsonSource`].
774    pub fn collect_ndjson_source(
775        &self,
776        source: io::NdjsonSource,
777        query: &str,
778    ) -> std::result::Result<Vec<Value>, JetroEngineError> {
779        io::collect_ndjson_source(self, source, query)
780    }
781
782    /// Like [`JetroEngine::collect_ndjson_source`] with explicit NDJSON reader options.
783    pub fn collect_ndjson_source_with_options(
784        &self,
785        source: io::NdjsonSource,
786        query: &str,
787        options: io::NdjsonOptions,
788    ) -> std::result::Result<Vec<Value>, JetroEngineError> {
789        io::collect_ndjson_source_with_options(self, source, query, options)
790    }
791
792    /// Read an NDJSON file from tail to head and collect per-row query results.
793    pub fn collect_ndjson_rev<P>(
794        &self,
795        path: P,
796        query: &str,
797    ) -> std::result::Result<Vec<Value>, JetroEngineError>
798    where
799        P: AsRef<std::path::Path>,
800    {
801        io::collect_ndjson_rev(self, path, query)
802    }
803
804    /// Like [`JetroEngine::collect_ndjson_rev`] with explicit NDJSON reader options.
805    pub fn collect_ndjson_rev_with_options<P>(
806        &self,
807        path: P,
808        query: &str,
809        options: io::NdjsonOptions,
810    ) -> std::result::Result<Vec<Value>, JetroEngineError>
811    where
812        P: AsRef<std::path::Path>,
813    {
814        io::collect_ndjson_rev_with_options(self, path, query, options)
815    }
816
817    /// Read an NDJSON file from tail to head and call `f` with each query result
818    /// as it is produced.
819    pub fn for_each_ndjson_rev<P, F>(
820        &self,
821        path: P,
822        query: &str,
823        f: F,
824    ) -> std::result::Result<usize, JetroEngineError>
825    where
826        P: AsRef<std::path::Path>,
827        F: FnMut(Value),
828    {
829        io::for_each_ndjson_rev(self, path, query, f)
830    }
831
832    /// Read an NDJSON file from tail to head and call `f` until it returns
833    /// [`io::NdjsonControl::Stop`] or input is exhausted.
834    pub fn for_each_ndjson_rev_until<P, F>(
835        &self,
836        path: P,
837        query: &str,
838        f: F,
839    ) -> std::result::Result<usize, JetroEngineError>
840    where
841        P: AsRef<std::path::Path>,
842        F: FnMut(Value) -> std::result::Result<io::NdjsonControl, JetroEngineError>,
843    {
844        io::for_each_ndjson_rev_with_options(self, path, query, io::NdjsonOptions::default(), f)
845    }
846
847    /// Like [`JetroEngine::for_each_ndjson_rev_until`] with explicit NDJSON reader options.
848    pub fn for_each_ndjson_rev_until_with_options<P, F>(
849        &self,
850        path: P,
851        query: &str,
852        options: io::NdjsonOptions,
853        f: F,
854    ) -> std::result::Result<usize, JetroEngineError>
855    where
856        P: AsRef<std::path::Path>,
857        F: FnMut(Value) -> std::result::Result<io::NdjsonControl, JetroEngineError>,
858    {
859        io::for_each_ndjson_rev_with_options(self, path, query, options, f)
860    }
861
862    /// Like [`JetroEngine::for_each_ndjson_rev`] with explicit NDJSON reader options.
863    pub fn for_each_ndjson_rev_with_options<P, F>(
864        &self,
865        path: P,
866        query: &str,
867        options: io::NdjsonOptions,
868        mut f: F,
869    ) -> std::result::Result<usize, JetroEngineError>
870    where
871        P: AsRef<std::path::Path>,
872        F: FnMut(Value),
873    {
874        io::for_each_ndjson_rev_with_options(self, path, query, options, |value| {
875            f(value);
876            Ok(io::NdjsonControl::Continue)
877        })
878    }
879
880    /// Like [`JetroEngine::collect_ndjson`] with explicit NDJSON reader options.
881    pub fn collect_ndjson_with_options<R>(
882        &self,
883        reader: R,
884        query: &str,
885        options: io::NdjsonOptions,
886    ) -> std::result::Result<Vec<Value>, JetroEngineError>
887    where
888        R: std::io::BufRead,
889    {
890        io::collect_ndjson_with_options(self, reader, query, options)
891    }
892
893    /// Evaluate `predicate` for each NDJSON row, collect matching original
894    /// rows, and stop after `limit` matches.
895    pub fn collect_ndjson_matches<R>(
896        &self,
897        reader: R,
898        predicate: &str,
899        limit: usize,
900    ) -> std::result::Result<Vec<Value>, JetroEngineError>
901    where
902        R: std::io::BufRead,
903    {
904        io::collect_ndjson_matches(self, reader, predicate, limit)
905    }
906
907    /// Like [`JetroEngine::collect_ndjson_matches`] with explicit NDJSON reader options.
908    pub fn collect_ndjson_matches_with_options<R>(
909        &self,
910        reader: R,
911        predicate: &str,
912        limit: usize,
913        options: io::NdjsonOptions,
914    ) -> std::result::Result<Vec<Value>, JetroEngineError>
915    where
916        R: std::io::BufRead,
917    {
918        io::collect_ndjson_matches_with_options(self, reader, predicate, limit, options)
919    }
920
921    /// Open an NDJSON file, collect matching original rows, and stop after `limit` matches.
922    pub fn collect_ndjson_matches_file<P>(
923        &self,
924        path: P,
925        predicate: &str,
926        limit: usize,
927    ) -> std::result::Result<Vec<Value>, JetroEngineError>
928    where
929        P: AsRef<std::path::Path>,
930    {
931        io::collect_ndjson_matches_file(self, path, predicate, limit)
932    }
933
934    /// Like [`JetroEngine::collect_ndjson_matches_file`] with explicit NDJSON reader options.
935    pub fn collect_ndjson_matches_file_with_options<P>(
936        &self,
937        path: P,
938        predicate: &str,
939        limit: usize,
940        options: io::NdjsonOptions,
941    ) -> std::result::Result<Vec<Value>, JetroEngineError>
942    where
943        P: AsRef<std::path::Path>,
944    {
945        io::collect_ndjson_matches_file_with_options(self, path, predicate, limit, options)
946    }
947
948    /// Evaluate `predicate` against each row from an [`io::NdjsonSource`],
949    /// collect matching original rows, and stop after `limit` matches.
950    pub fn collect_ndjson_matches_source(
951        &self,
952        source: io::NdjsonSource,
953        predicate: &str,
954        limit: usize,
955    ) -> std::result::Result<Vec<Value>, JetroEngineError> {
956        io::collect_ndjson_matches_source(self, source, predicate, limit)
957    }
958
959    /// Like [`JetroEngine::collect_ndjson_matches_source`] with explicit NDJSON reader options.
960    pub fn collect_ndjson_matches_source_with_options(
961        &self,
962        source: io::NdjsonSource,
963        predicate: &str,
964        limit: usize,
965        options: io::NdjsonOptions,
966    ) -> std::result::Result<Vec<Value>, JetroEngineError> {
967        io::collect_ndjson_matches_source_with_options(self, source, predicate, limit, options)
968    }
969
970    /// Read an NDJSON file from tail to head, collect matching original rows,
971    /// and stop after `limit` matches.
972    pub fn collect_ndjson_rev_matches<P>(
973        &self,
974        path: P,
975        predicate: &str,
976        limit: usize,
977    ) -> std::result::Result<Vec<Value>, JetroEngineError>
978    where
979        P: AsRef<std::path::Path>,
980    {
981        io::collect_ndjson_rev_matches(self, path, predicate, limit)
982    }
983
984    /// Like [`JetroEngine::collect_ndjson_rev_matches`] with explicit NDJSON reader options.
985    pub fn collect_ndjson_rev_matches_with_options<P>(
986        &self,
987        path: P,
988        predicate: &str,
989        limit: usize,
990        options: io::NdjsonOptions,
991    ) -> std::result::Result<Vec<Value>, JetroEngineError>
992    where
993        P: AsRef<std::path::Path>,
994    {
995        io::collect_ndjson_rev_matches_with_options(self, path, predicate, limit, options)
996    }
997
998    /// Evaluate `query` independently for every non-empty NDJSON row and call
999    /// `f` with each result as it is produced.
1000    pub fn for_each_ndjson<R, F>(
1001        &self,
1002        reader: R,
1003        query: &str,
1004        f: F,
1005    ) -> std::result::Result<usize, JetroEngineError>
1006    where
1007        R: std::io::BufRead,
1008        F: FnMut(Value),
1009    {
1010        io::for_each_ndjson(self, reader, query, f)
1011    }
1012
1013    /// Evaluate `query` independently for every non-empty NDJSON row and call
1014    /// `f` until it returns [`io::NdjsonControl::Stop`] or input is exhausted.
1015    pub fn for_each_ndjson_until<R, F>(
1016        &self,
1017        reader: R,
1018        query: &str,
1019        f: F,
1020    ) -> std::result::Result<usize, JetroEngineError>
1021    where
1022        R: std::io::BufRead,
1023        F: FnMut(Value) -> std::result::Result<io::NdjsonControl, JetroEngineError>,
1024    {
1025        io::for_each_ndjson_until(self, reader, query, f)
1026    }
1027
1028    /// Evaluate `query` for every row from an [`io::NdjsonSource`] and call
1029    /// `f` with each result as it is produced.
1030    pub fn for_each_ndjson_source<F>(
1031        &self,
1032        source: io::NdjsonSource,
1033        query: &str,
1034        f: F,
1035    ) -> std::result::Result<usize, JetroEngineError>
1036    where
1037        F: FnMut(Value),
1038    {
1039        io::for_each_ndjson_source(self, source, query, f)
1040    }
1041
1042    /// Evaluate `query` for every row from an [`io::NdjsonSource`] and call
1043    /// `f` until it returns [`io::NdjsonControl::Stop`] or input is exhausted.
1044    pub fn for_each_ndjson_source_until<F>(
1045        &self,
1046        source: io::NdjsonSource,
1047        query: &str,
1048        f: F,
1049    ) -> std::result::Result<usize, JetroEngineError>
1050    where
1051        F: FnMut(Value) -> std::result::Result<io::NdjsonControl, JetroEngineError>,
1052    {
1053        io::for_each_ndjson_source_until(self, source, query, f)
1054    }
1055
1056    /// Like [`JetroEngine::for_each_ndjson_source_until`] with explicit NDJSON reader options.
1057    pub fn for_each_ndjson_source_until_with_options<F>(
1058        &self,
1059        source: io::NdjsonSource,
1060        query: &str,
1061        options: io::NdjsonOptions,
1062        f: F,
1063    ) -> std::result::Result<usize, JetroEngineError>
1064    where
1065        F: FnMut(Value) -> std::result::Result<io::NdjsonControl, JetroEngineError>,
1066    {
1067        io::for_each_ndjson_source_until_with_options(self, source, query, options, f)
1068    }
1069
1070    /// Like [`JetroEngine::for_each_ndjson_source`] with explicit NDJSON reader options.
1071    pub fn for_each_ndjson_source_with_options<F>(
1072        &self,
1073        source: io::NdjsonSource,
1074        query: &str,
1075        options: io::NdjsonOptions,
1076        f: F,
1077    ) -> std::result::Result<usize, JetroEngineError>
1078    where
1079        F: FnMut(Value),
1080    {
1081        io::for_each_ndjson_source_with_options(self, source, query, options, f)
1082    }
1083
1084    /// Like [`JetroEngine::for_each_ndjson`] with explicit NDJSON reader options.
1085    pub fn for_each_ndjson_with_options<R, F>(
1086        &self,
1087        reader: R,
1088        query: &str,
1089        options: io::NdjsonOptions,
1090        f: F,
1091    ) -> std::result::Result<usize, JetroEngineError>
1092    where
1093        R: std::io::BufRead,
1094        F: FnMut(Value),
1095    {
1096        io::for_each_ndjson_with_options(self, reader, query, options, f)
1097    }
1098
1099    /// Like [`JetroEngine::for_each_ndjson_until`] with explicit NDJSON reader options.
1100    pub fn for_each_ndjson_until_with_options<R, F>(
1101        &self,
1102        reader: R,
1103        query: &str,
1104        options: io::NdjsonOptions,
1105        f: F,
1106    ) -> std::result::Result<usize, JetroEngineError>
1107    where
1108        R: std::io::BufRead,
1109        F: FnMut(Value) -> std::result::Result<io::NdjsonControl, JetroEngineError>,
1110    {
1111        io::for_each_ndjson_until_with_options(self, reader, query, options, f)
1112    }
1113
1114    /// Look up a compiled `QueryPlan` by expression string and planning context,
1115    /// compiling and inserting it if not already cached; evicts the whole cache if full.
1116    pub(crate) fn cached_plan(
1117        &self,
1118        expr: &str,
1119        context: plan::physical::PlanningContext,
1120    ) -> ir::physical::QueryPlan {
1121        let mut cache = self.plan_cache.lock().expect("plan cache poisoned");
1122        let cache_key = format!("{}\0{}", context.cache_key(), expr);
1123        if let Some(plan) = cache.get(&cache_key) {
1124            return plan.clone();
1125        }
1126
1127        let plan = plan::physical::plan_query_with_context(expr, context);
1128        if self.plan_cache_limit > 0 {
1129            if cache.len() >= self.plan_cache_limit {
1130                cache.clear();
1131            }
1132            cache.insert(cache_key, plan.clone());
1133        }
1134        plan
1135    }
1136}
1137
1138impl exec::pipeline::PipelineData for Jetro {
1139    fn promote_objvec(&self, arr: &Arc<Vec<Val>>) -> Option<Arc<crate::data::value::ObjVecData>> {
1140        self.get_or_promote_objvec(arr)
1141    }
1142}
1143
1144impl Jetro {
1145    /// Return a reference to the lazily parsed simd-json `TapeData`, parsing raw bytes
1146    /// on first access. Returns `Ok(None)` when no raw bytes are stored.
1147    #[cfg(feature = "simd-json")]
1148    pub(crate) fn lazy_tape(
1149        &self,
1150    ) -> std::result::Result<Option<&Arc<crate::data::tape::TapeData>>, EvalError> {
1151        if let Some(result) = self.tape.get() {
1152            return result
1153                .as_ref()
1154                .map(Some)
1155                .map_err(|err| EvalError(format!("Invalid JSON: {err}")));
1156        }
1157        let Some(raw) = self.raw_bytes.as_ref() else {
1158            return Ok(None);
1159        };
1160        let bytes: Vec<u8> = (**raw).to_vec();
1161        let parsed = crate::data::tape::TapeData::parse(bytes).map_err(|err| err.to_string());
1162        let _ = self.tape.set(parsed);
1163        self.tape
1164            .get()
1165            .expect("tape cache initialized")
1166            .as_ref()
1167            .map(Some)
1168            .map_err(|err| EvalError(format!("Invalid JSON: {err}")))
1169    }
1170
1171    /// Look up or build an `ObjVecData` columnar representation for the given
1172    /// `Arc<Vec<Val>>` array, caching the result by pointer address.
1173    pub(crate) fn get_or_promote_objvec(
1174        &self,
1175        arr: &Arc<Vec<Val>>,
1176    ) -> Option<Arc<crate::data::value::ObjVecData>> {
1177        let key = Arc::as_ptr(arr) as usize;
1178        if let Ok(cache) = self.objvec_cache.lock() {
1179            if let Some(d) = cache.get(&key) {
1180                return Some(Arc::clone(d));
1181            }
1182        }
1183        let promoted = exec::pipeline::Pipeline::try_promote_objvec_arr(arr)?;
1184        if let Ok(mut cache) = self.objvec_cache.lock() {
1185            cache.entry(key).or_insert_with(|| Arc::clone(&promoted));
1186        }
1187        Some(promoted)
1188    }
1189
1190    /// Internal constructor that wraps a `serde_json::Value` without raw bytes.
1191    pub(crate) fn new(document: Value) -> Self {
1192        Self {
1193            document,
1194            root_val: OnceCell::new(),
1195            objvec_cache: Default::default(),
1196            raw_bytes: None,
1197            tape: OnceCell::new(),
1198            structural_index: OnceCell::new(),
1199            vm: RefCell::new(VM::new()),
1200        }
1201    }
1202
1203    /// Build a `Jetro` whose `root_val` is pre-cached with `root` (constructed by the
1204    /// caller, typically via [`Val::from_value_with`] using an engine-owned key cache).
1205    /// `document` is retained for back-compat with non-`simd-json` callers and tests
1206    /// that read the original `serde_json::Value`.
1207    pub(crate) fn from_val_and_value(root: Val, document: Value) -> Self {
1208        let root_val = OnceCell::new();
1209        let _ = root_val.set(root);
1210        Self {
1211            document,
1212            root_val,
1213            objvec_cache: Default::default(),
1214            raw_bytes: None,
1215            tape: OnceCell::new(),
1216            structural_index: OnceCell::new(),
1217            vm: RefCell::new(VM::new()),
1218        }
1219    }
1220
1221    /// Build a `Jetro` handle around an already parsed tape. This is used by
1222    /// NDJSON row execution, where the row buffer is already owned and can be
1223    /// consumed directly by simd-json instead of first being copied into
1224    /// `raw_bytes` and copied again by `lazy_tape`.
1225    #[cfg(feature = "simd-json")]
1226    pub(crate) fn from_tape_data(tape: Arc<crate::data::tape::TapeData>) -> Self {
1227        let tape_cell = OnceCell::new();
1228        let _ = tape_cell.set(Ok(tape));
1229        Self {
1230            document: Value::Null,
1231            root_val: OnceCell::new(),
1232            objvec_cache: Default::default(),
1233            raw_bytes: None,
1234            tape: tape_cell,
1235            structural_index: OnceCell::new(),
1236            vm: RefCell::new(VM::new()),
1237        }
1238    }
1239
1240    /// Like [`Jetro::root_val`] but interns object keys through `keys` instead of the
1241    /// process-wide default. Used by [`JetroEngine::parse_bytes`] to materialise the
1242    /// `Val` tree once at parse time so subsequent `collect` calls find a populated
1243    /// `root_val` cache and skip re-interning.
1244    pub(crate) fn root_val_with(
1245        &self,
1246        keys: &crate::data::intern::KeyCache,
1247    ) -> std::result::Result<Val, EvalError> {
1248        if let Some(root) = self.root_val.get() {
1249            return Ok(root.clone());
1250        }
1251        let root = {
1252            #[cfg(feature = "simd-json")]
1253            {
1254                if let Some(tape) = self.lazy_tape()? {
1255                    Val::from_tape_data_with(keys, tape)
1256                } else {
1257                    Val::from_value_with(keys, &self.document)
1258                }
1259            }
1260            #[cfg(not(feature = "simd-json"))]
1261            {
1262                Val::from_value_with(keys, &self.document)
1263            }
1264        };
1265        let _ = self.root_val.set(root);
1266        Ok(self.root_val.get().expect("root val initialized").clone())
1267    }
1268
1269    /// Parse raw JSON bytes and build a `Jetro` query handle.
1270    /// When the `simd-json` feature is enabled the bytes are not parsed eagerly;
1271    /// the tape is built lazily on the first query that needs it.
1272    pub fn from_bytes(bytes: Vec<u8>) -> std::result::Result<Self, serde_json::Error> {
1273        #[cfg(feature = "simd-json")]
1274        {
1275            return Ok(Self {
1276                document: Value::Null,
1277                root_val: OnceCell::new(),
1278                objvec_cache: Default::default(),
1279                raw_bytes: Some(Arc::from(bytes.into_boxed_slice())),
1280                tape: OnceCell::new(),
1281                structural_index: OnceCell::new(),
1282                vm: RefCell::new(VM::new()),
1283            });
1284        }
1285        #[allow(unreachable_code)]
1286        {
1287            let document: Value = serde_json::from_slice(&bytes)?;
1288            Ok(Self {
1289                document,
1290                root_val: OnceCell::new(),
1291                objvec_cache: Default::default(),
1292                raw_bytes: Some(Arc::from(bytes.into_boxed_slice())),
1293                tape: OnceCell::new(),
1294                structural_index: OnceCell::new(),
1295                vm: RefCell::new(VM::new()),
1296            })
1297        }
1298    }
1299
1300    /// Borrow this document's VM cache, falling back to a temporary VM on re-entrant use.
1301    pub(crate) fn with_vm<F, R>(&self, f: F) -> R
1302    where
1303        F: FnOnce(&mut VM) -> R,
1304    {
1305        match self.vm.try_borrow_mut() {
1306            Ok(mut vm) => f(&mut vm),
1307            Err(_) => {
1308                let mut vm = VM::new();
1309                f(&mut vm)
1310            }
1311        }
1312    }
1313
1314    /// Return the raw JSON byte slice if this handle was constructed from bytes,
1315    /// or `None` if it was constructed from a `serde_json::Value`.
1316    pub(crate) fn raw_bytes(&self) -> Option<&[u8]> {
1317        self.raw_bytes.as_deref()
1318    }
1319
1320    /// Return a reference to the lazily built `StructuralIndex` for key-presence
1321    /// queries, constructing it from raw bytes on first access if available.
1322    pub(crate) fn lazy_structural_index(
1323        &self,
1324    ) -> std::result::Result<Option<&Arc<jetro_experimental::StructuralIndex>>, EvalError> {
1325        if let Some(result) = self.structural_index.get() {
1326            return result
1327                .as_ref()
1328                .map(Some)
1329                .map_err(|err| EvalError(format!("Invalid JSON: {err}")));
1330        }
1331        let Some(raw) = self.raw_bytes.as_ref() else {
1332            return Ok(None);
1333        };
1334        let built = jetro_experimental::from_bytes_with(
1335            raw.as_ref(),
1336            jetro_experimental::BuildOptions::keys_only(),
1337        )
1338        .map(Arc::new)
1339        .map_err(|err| err.to_string());
1340        let _ = self.structural_index.set(built);
1341        self.structural_index
1342            .get()
1343            .expect("structural index cache initialized")
1344            .as_ref()
1345            .map(Some)
1346            .map_err(|err| EvalError(format!("Invalid JSON: {err}")))
1347    }
1348
1349    /// Return the root `Val` for the document, building and caching it from the
1350    /// tape (simd-json) or from the `serde_json::Value` on first access.
1351    pub(crate) fn root_val(&self) -> std::result::Result<Val, EvalError> {
1352        if let Some(root) = self.root_val.get() {
1353            return Ok(root.clone());
1354        }
1355        let root = {
1356            #[cfg(feature = "simd-json")]
1357            {
1358                if let Some(tape) = self.lazy_tape()? {
1359                    Val::from_tape_data(tape)
1360                } else {
1361                    Val::from(&self.document)
1362                }
1363            }
1364            #[cfg(not(feature = "simd-json"))]
1365            {
1366                Val::from(&self.document)
1367            }
1368        };
1369        let _ = self.root_val.set(root);
1370        Ok(self.root_val.get().expect("root val initialized").clone())
1371    }
1372
1373    /// Return `true` if the `Val` tree has already been materialised; used in
1374    /// tests to assert that lazy evaluation is working correctly.
1375    #[cfg(test)]
1376    pub(crate) fn root_val_is_materialized(&self) -> bool {
1377        self.root_val.get().is_some()
1378    }
1379
1380    #[cfg(test)]
1381    pub(crate) fn structural_index_is_built(&self) -> bool {
1382        self.structural_index.get().is_some()
1383    }
1384
1385    #[cfg(all(test, feature = "simd-json"))]
1386    pub(crate) fn tape_is_built(&self) -> bool {
1387        self.tape.get().is_some()
1388    }
1389
1390    #[cfg(all(test, feature = "simd-json"))]
1391    pub(crate) fn reset_tape_materialized_subtrees(&self) {
1392        if let Ok(Some(tape)) = self.lazy_tape() {
1393            tape.reset_materialized_subtrees();
1394        }
1395    }
1396
1397    #[cfg(all(test, feature = "simd-json"))]
1398    pub(crate) fn tape_materialized_subtrees(&self) -> usize {
1399        self.lazy_tape()
1400            .ok()
1401            .flatten()
1402            .map(|tape| tape.materialized_subtrees())
1403            .unwrap_or(0)
1404    }
1405
1406    /// Evaluate a Jetro expression against this document and return the result
1407    /// as a `serde_json::Value`. Uses this document's VM with compile and
1408    /// path-resolution caches for repeated calls.
1409    pub fn collect<S: AsRef<str>>(&self, expr: S) -> std::result::Result<Value, EvalError> {
1410        exec::router::collect_json(self, expr.as_ref())
1411    }
1412}
1413
1414/// Wrap an existing `serde_json::Value` in a `Jetro` handle without raw bytes.
1415/// Prefer `Jetro::from_bytes` when you have the original JSON source, as it
1416/// enables the tape and structural-index lazy backends.
1417impl From<Value> for Jetro {
1418    /// Convert a `serde_json::Value` into a `Jetro` query handle.
1419    fn from(v: Value) -> Self {
1420        Self::new(v)
1421    }
1422}