1pub(crate) mod builtins;
25pub(crate) mod compile;
26pub(crate) mod data;
27pub(crate) mod exec;
28pub(crate) mod ir;
29pub(crate) mod parse;
30pub(crate) mod plan;
31pub(crate) mod util;
32pub(crate) mod vm;
33
34#[cfg(test)]
35mod tests;
36
37use serde_json::Value;
38use std::cell::{OnceCell, RefCell};
39use std::collections::HashMap;
40use std::sync::Arc;
41use std::sync::Mutex;
42use data::value::Val;
43
44pub use data::context::EvalError;
45#[cfg(test)]
46use parse::parser::ParseError;
47use vm::VM;
48
49#[cfg(feature = "fuzz_internal")]
53pub mod __fuzz_internal {
54 pub use crate::parse::parser::{parse, ParseError};
55 pub use crate::plan::physical::plan_query;
56}
57
58
59#[cfg(test)]
60#[derive(Debug)]
61pub(crate) enum Error {
62 Parse(ParseError),
63 Eval(EvalError),
64}
65
66#[cfg(test)]
67impl std::fmt::Display for Error {
68 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
69 match self {
70 Error::Parse(e) => write!(f, "{}", e),
71 Error::Eval(e) => write!(f, "{}", e),
72 }
73 }
74}
75#[cfg(test)]
76impl std::error::Error for Error {
77 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
78 match self {
79 Error::Parse(e) => Some(e),
80 Error::Eval(_) => None,
81 }
82 }
83}
84
85#[cfg(test)]
86impl From<ParseError> for Error {
87 fn from(e: ParseError) -> Self {
88 Error::Parse(e)
89 }
90}
91#[cfg(test)]
92impl From<EvalError> for Error {
93 fn from(e: EvalError) -> Self {
94 Error::Eval(e)
95 }
96}
97
98pub struct Jetro {
103 document: Value,
106 root_val: OnceCell<Val>,
108 raw_bytes: Option<Arc<[u8]>>,
110
111 #[cfg(feature = "simd-json")]
113 tape: OnceCell<std::result::Result<Arc<crate::data::tape::TapeData>, String>>,
114 #[cfg(not(feature = "simd-json"))]
116 #[allow(dead_code)]
117 tape: OnceCell<()>,
118
119 structural_index:
121 OnceCell<std::result::Result<Arc<jetro_experimental::StructuralIndex>, String>>,
122
123 pub(crate) objvec_cache:
126 std::sync::Mutex<std::collections::HashMap<usize, Arc<crate::data::value::ObjVecData>>>,
127
128 vm: RefCell<VM>,
130}
131
132
133pub struct JetroEngine {
138 plan_cache: Mutex<HashMap<String, ir::physical::QueryPlan>>,
140 plan_cache_limit: usize,
142 vm: Mutex<VM>,
144 keys: Arc<crate::data::intern::KeyCache>,
150}
151
152#[derive(Debug)]
155pub enum JetroEngineError {
156 Json(serde_json::Error),
158 Eval(EvalError),
160}
161
162impl std::fmt::Display for JetroEngineError {
163 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
164 match self {
165 Self::Json(err) => write!(f, "{}", err),
166 Self::Eval(err) => write!(f, "{}", err),
167 }
168 }
169}
170
171impl std::error::Error for JetroEngineError {
172 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
173 match self {
174 Self::Json(err) => Some(err),
175 Self::Eval(_) => None,
176 }
177 }
178}
179
180impl From<serde_json::Error> for JetroEngineError {
181 fn from(err: serde_json::Error) -> Self {
182 Self::Json(err)
183 }
184}
185
186impl From<EvalError> for JetroEngineError {
187 fn from(err: EvalError) -> Self {
188 Self::Eval(err)
189 }
190}
191
192impl Default for JetroEngine {
193 fn default() -> Self {
194 Self::new()
195 }
196}
197
198impl JetroEngine {
199 const DEFAULT_PLAN_CACHE_LIMIT: usize = 256;
201
202 pub fn new() -> Self {
204 Self::with_plan_cache_limit(Self::DEFAULT_PLAN_CACHE_LIMIT)
205 }
206
207 pub fn with_plan_cache_limit(plan_cache_limit: usize) -> Self {
210 Self {
211 plan_cache: Mutex::new(HashMap::new()),
212 plan_cache_limit,
213 vm: Mutex::new(VM::new()),
214 keys: crate::data::intern::KeyCache::new(),
215 }
216 }
217
218 pub fn keys(&self) -> &Arc<crate::data::intern::KeyCache> {
220 &self.keys
221 }
222
223 pub fn clear_cache(&self) {
226 self.plan_cache.lock().expect("plan cache poisoned").clear();
227 self.keys.clear();
228 }
229
230 pub fn parse_value(&self, document: Value) -> Jetro {
235 let root = Val::from_value_with(&self.keys, &document);
236 Jetro::from_val_and_value(root, document)
237 }
238
239 pub fn parse_bytes(
244 &self,
245 bytes: Vec<u8>,
246 ) -> std::result::Result<Jetro, JetroEngineError> {
247 let document = Jetro::from_bytes(bytes)?;
248 let _ = document.root_val_with(&self.keys)?;
252 Ok(document)
253 }
254
255 pub fn collect<S: AsRef<str>>(
258 &self,
259 document: &Jetro,
260 expr: S,
261 ) -> std::result::Result<Value, EvalError> {
262 let plan = self.cached_plan(expr.as_ref(), exec::router::planning_context(document));
263 let mut vm = self.vm.lock().expect("vm cache poisoned");
264 exec::router::collect_plan_json_with_vm(document, &plan, &mut vm)
265 }
266
267 pub fn collect_value<S: AsRef<str>>(
271 &self,
272 document: Value,
273 expr: S,
274 ) -> std::result::Result<Value, EvalError> {
275 let document = self.parse_value(document);
276 self.collect(&document, expr)
277 }
278
279 pub fn collect_bytes<S: AsRef<str>>(
284 &self,
285 bytes: Vec<u8>,
286 expr: S,
287 ) -> std::result::Result<Value, JetroEngineError> {
288 let document = self.parse_bytes(bytes)?;
289 Ok(self.collect(&document, expr)?)
290 }
291
292 fn cached_plan(&self, expr: &str, context: plan::physical::PlanningContext) -> ir::physical::QueryPlan {
295 let mut cache = self.plan_cache.lock().expect("plan cache poisoned");
296 let cache_key = format!("{}\0{}", context.cache_key(), expr);
297 if let Some(plan) = cache.get(&cache_key) {
298 return plan.clone();
299 }
300
301 let plan = plan::physical::plan_query_with_context(expr, context);
302 if self.plan_cache_limit > 0 {
303 if cache.len() >= self.plan_cache_limit {
304 cache.clear();
305 }
306 cache.insert(cache_key, plan.clone());
307 }
308 plan
309 }
310}
311
312impl exec::pipeline::PipelineData for Jetro {
313 fn promote_objvec(&self, arr: &Arc<Vec<Val>>) -> Option<Arc<crate::data::value::ObjVecData>> {
314 self.get_or_promote_objvec(arr)
315 }
316}
317
318impl Jetro {
319 #[cfg(feature = "simd-json")]
322 pub(crate) fn lazy_tape(
323 &self,
324 ) -> std::result::Result<Option<&Arc<crate::data::tape::TapeData>>, EvalError> {
325 if let Some(result) = self.tape.get() {
326 return result
327 .as_ref()
328 .map(Some)
329 .map_err(|err| EvalError(format!("Invalid JSON: {err}")));
330 }
331 let Some(raw) = self.raw_bytes.as_ref() else {
332 return Ok(None);
333 };
334 let bytes: Vec<u8> = (**raw).to_vec();
335 let parsed = crate::data::tape::TapeData::parse(bytes).map_err(|err| err.to_string());
336 let _ = self.tape.set(parsed);
337 self.tape
338 .get()
339 .expect("tape cache initialized")
340 .as_ref()
341 .map(Some)
342 .map_err(|err| EvalError(format!("Invalid JSON: {err}")))
343 }
344
345 pub(crate) fn get_or_promote_objvec(
348 &self,
349 arr: &Arc<Vec<Val>>,
350 ) -> Option<Arc<crate::data::value::ObjVecData>> {
351 let key = Arc::as_ptr(arr) as usize;
352 if let Ok(cache) = self.objvec_cache.lock() {
353 if let Some(d) = cache.get(&key) {
354 return Some(Arc::clone(d));
355 }
356 }
357 let promoted = exec::pipeline::Pipeline::try_promote_objvec_arr(arr)?;
358 if let Ok(mut cache) = self.objvec_cache.lock() {
359 cache.entry(key).or_insert_with(|| Arc::clone(&promoted));
360 }
361 Some(promoted)
362 }
363
364 pub(crate) fn new(document: Value) -> Self {
366 Self {
367 document,
368 root_val: OnceCell::new(),
369 objvec_cache: Default::default(),
370 raw_bytes: None,
371 tape: OnceCell::new(),
372 structural_index: OnceCell::new(),
373 vm: RefCell::new(VM::new()),
374 }
375 }
376
377 pub(crate) fn from_val_and_value(root: Val, document: Value) -> Self {
382 let root_val = OnceCell::new();
383 let _ = root_val.set(root);
384 Self {
385 document,
386 root_val,
387 objvec_cache: Default::default(),
388 raw_bytes: None,
389 tape: OnceCell::new(),
390 structural_index: OnceCell::new(),
391 vm: RefCell::new(VM::new()),
392 }
393 }
394
395 pub(crate) fn root_val_with(
400 &self,
401 keys: &crate::data::intern::KeyCache,
402 ) -> std::result::Result<Val, EvalError> {
403 if let Some(root) = self.root_val.get() {
404 return Ok(root.clone());
405 }
406 let root = {
407 #[cfg(feature = "simd-json")]
408 {
409 if let Some(tape) = self.lazy_tape()? {
410 Val::from_tape_data_with(keys, tape)
411 } else {
412 Val::from_value_with(keys, &self.document)
413 }
414 }
415 #[cfg(not(feature = "simd-json"))]
416 {
417 Val::from_value_with(keys, &self.document)
418 }
419 };
420 let _ = self.root_val.set(root);
421 Ok(self.root_val.get().expect("root val initialized").clone())
422 }
423
424 pub fn from_bytes(bytes: Vec<u8>) -> std::result::Result<Self, serde_json::Error> {
428
429
430 #[cfg(feature = "simd-json")]
431 {
432 return Ok(Self {
433 document: Value::Null,
434 root_val: OnceCell::new(),
435 objvec_cache: Default::default(),
436 raw_bytes: Some(Arc::from(bytes.into_boxed_slice())),
437 tape: OnceCell::new(),
438 structural_index: OnceCell::new(),
439 vm: RefCell::new(VM::new()),
440 });
441 }
442 #[allow(unreachable_code)]
443 {
444 let document: Value = serde_json::from_slice(&bytes)?;
445 Ok(Self {
446 document,
447 root_val: OnceCell::new(),
448 objvec_cache: Default::default(),
449 raw_bytes: Some(Arc::from(bytes.into_boxed_slice())),
450 tape: OnceCell::new(),
451 structural_index: OnceCell::new(),
452 vm: RefCell::new(VM::new()),
453 })
454 }
455 }
456
457 pub(crate) fn with_vm<F, R>(&self, f: F) -> R
459 where
460 F: FnOnce(&mut VM) -> R,
461 {
462 match self.vm.try_borrow_mut() {
463 Ok(mut vm) => f(&mut vm),
464 Err(_) => {
465 let mut vm = VM::new();
466 f(&mut vm)
467 }
468 }
469 }
470
471 pub(crate) fn raw_bytes(&self) -> Option<&[u8]> {
474 self.raw_bytes.as_deref()
475 }
476
477 pub(crate) fn lazy_structural_index(
480 &self,
481 ) -> std::result::Result<Option<&Arc<jetro_experimental::StructuralIndex>>, EvalError> {
482 if let Some(result) = self.structural_index.get() {
483 return result
484 .as_ref()
485 .map(Some)
486 .map_err(|err| EvalError(format!("Invalid JSON: {err}")));
487 }
488 let Some(raw) = self.raw_bytes.as_ref() else {
489 return Ok(None);
490 };
491 let built = jetro_experimental::from_bytes_with(
492 raw.as_ref(),
493 jetro_experimental::BuildOptions::keys_only(),
494 )
495 .map(Arc::new)
496 .map_err(|err| err.to_string());
497 let _ = self.structural_index.set(built);
498 self.structural_index
499 .get()
500 .expect("structural index cache initialized")
501 .as_ref()
502 .map(Some)
503 .map_err(|err| EvalError(format!("Invalid JSON: {err}")))
504 }
505
506 pub(crate) fn root_val(&self) -> std::result::Result<Val, EvalError> {
509 if let Some(root) = self.root_val.get() {
510 return Ok(root.clone());
511 }
512 let root = {
513 #[cfg(feature = "simd-json")]
514 {
515 if let Some(tape) = self.lazy_tape()? {
516 Val::from_tape_data(tape)
517 } else {
518 Val::from(&self.document)
519 }
520 }
521 #[cfg(not(feature = "simd-json"))]
522 {
523 Val::from(&self.document)
524 }
525 };
526 let _ = self.root_val.set(root);
527 Ok(self.root_val.get().expect("root val initialized").clone())
528 }
529
530 #[cfg(test)]
533 pub(crate) fn root_val_is_materialized(&self) -> bool {
534 self.root_val.get().is_some()
535 }
536
537 #[cfg(test)]
538 pub(crate) fn structural_index_is_built(&self) -> bool {
539 self.structural_index.get().is_some()
540 }
541
542 #[cfg(all(test, feature = "simd-json"))]
543 pub(crate) fn tape_is_built(&self) -> bool {
544 self.tape.get().is_some()
545 }
546
547 #[cfg(all(test, feature = "simd-json"))]
548 pub(crate) fn reset_tape_materialized_subtrees(&self) {
549 if let Ok(Some(tape)) = self.lazy_tape() {
550 tape.reset_materialized_subtrees();
551 }
552 }
553
554 #[cfg(all(test, feature = "simd-json"))]
555 pub(crate) fn tape_materialized_subtrees(&self) -> usize {
556 self.lazy_tape()
557 .ok()
558 .flatten()
559 .map(|tape| tape.materialized_subtrees())
560 .unwrap_or(0)
561 }
562
563 pub fn collect<S: AsRef<str>>(&self, expr: S) -> std::result::Result<Value, EvalError> {
567 exec::router::collect_json(self, expr.as_ref())
568 }
569}
570
571impl From<Value> for Jetro {
575 fn from(v: Value) -> Self {
577 Self::new(v)
578 }
579}