datalogic_rs/compiled.rs
1use crate::{ContextStack, DataLogic, Result, opcode::OpCode};
2use regex::Regex;
3use serde_json::{Value, json};
4use std::sync::Arc;
5
6/// A pre-parsed path segment for compiled variable access.
7#[derive(Debug, Clone)]
8pub enum PathSegment {
9 /// Object field access by key
10 Field(Box<str>),
11 /// Array element access by index
12 Index(usize),
13 /// Try as object key first, then as array index (for segments that could be either).
14 /// Pre-parses the index at compile time to avoid runtime parsing.
15 FieldOrIndex(Box<str>, usize),
16}
17
18/// Hint for reduce context resolution, detected at compile time.
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20pub enum ReduceHint {
21 /// Normal path access (no reduce context)
22 None,
23 /// Path is exactly "current" — return reduce_current directly
24 Current,
25 /// Path is exactly "accumulator" — return reduce_accumulator directly
26 Accumulator,
27 /// Path starts with "current." — segments[0] is "current", use segments[1..] from reduce_current
28 CurrentPath,
29 /// Path starts with "accumulator." — segments[0] is "accumulator", use segments[1..] from reduce_accumulator
30 AccumulatorPath,
31}
32
33/// Hint for metadata access (index/key), detected at compile time.
34#[derive(Debug, Clone, Copy, PartialEq, Eq)]
35pub enum MetadataHint {
36 /// Normal data access
37 None,
38 /// Access frame index metadata
39 Index,
40 /// Access frame key metadata
41 Key,
42}
43
44/// A compiled node representing a single operation or value in the logic tree.
45///
46/// Nodes are created during the compilation phase and evaluated during execution.
47/// Each node type is optimized for its specific purpose:
48///
49/// - **Value**: Static JSON values that don't require evaluation
50/// - **Array**: Collections of nodes evaluated sequentially
51/// - **BuiltinOperator**: Fast OpCode-based dispatch for built-in operators
52/// - **CustomOperator**: User-defined operators with dynamic dispatch
53/// - **StructuredObject**: Template objects for structure preservation
54#[derive(Debug, Clone)]
55pub enum CompiledNode {
56 /// A static JSON value that requires no evaluation.
57 ///
58 /// Used for literals like numbers, strings, booleans, and null.
59 Value { value: Value },
60
61 /// An array of compiled nodes.
62 ///
63 /// Each node is evaluated in sequence, and the results are collected into a JSON array.
64 /// Uses `Box<[CompiledNode]>` for memory efficiency.
65 Array { nodes: Box<[CompiledNode]> },
66
67 /// A built-in operator optimized with OpCode dispatch.
68 ///
69 /// The OpCode enum enables direct dispatch without string lookups,
70 /// significantly improving performance for the 50+ built-in operators.
71 BuiltinOperator {
72 opcode: OpCode,
73 args: Box<[CompiledNode]>,
74 },
75
76 /// A custom operator registered via `DataLogic::add_operator`.
77 ///
78 /// Custom operators use dynamic dispatch and are looked up by name
79 /// from the engine's operator registry.
80 CustomOperator {
81 name: String,
82 args: Box<[CompiledNode]>,
83 },
84
85 /// A structured object template for preserve_structure mode.
86 ///
87 /// When structure preservation is enabled, objects with keys that are not
88 /// built-in operators or registered custom operators are preserved as templates.
89 /// Each field is evaluated independently, allowing for dynamic object generation.
90 ///
91 /// Note: Custom operators are checked before treating keys as structured fields,
92 /// ensuring they work correctly within preserved structures.
93 StructuredObject {
94 fields: Box<[(String, CompiledNode)]>,
95 },
96
97 /// A pre-compiled variable access (unified var/val).
98 ///
99 /// scope_level 0 = current context (var-style), N = go up N levels (val with [[N], ...]).
100 /// Segments are pre-parsed at compile time to avoid runtime string splitting.
101 CompiledVar {
102 scope_level: u32,
103 segments: Box<[PathSegment]>,
104 reduce_hint: ReduceHint,
105 metadata_hint: MetadataHint,
106 default_value: Option<Box<CompiledNode>>,
107 },
108
109 /// A pre-compiled exists check.
110 ///
111 /// scope_level 0 = current context, N = go up N levels.
112 /// Segments are pre-parsed at compile time.
113 CompiledExists {
114 scope_level: u32,
115 segments: Box<[PathSegment]>,
116 },
117
118 /// A pre-compiled split with regex pattern.
119 ///
120 /// When the split operator's delimiter is a static regex pattern with named
121 /// capture groups, the regex is compiled once during the compilation phase
122 /// instead of on every evaluation.
123 CompiledSplitRegex {
124 /// The text argument (only the first arg of split)
125 args: Box<[CompiledNode]>,
126 /// Pre-compiled regex pattern
127 regex: Arc<Regex>,
128 /// Pre-extracted capture group names
129 capture_names: Box<[Box<str>]>,
130 },
131
132 /// A pre-compiled throw with a static error object.
133 ///
134 /// When `throw` is called with a literal string, the error object
135 /// `{"type": "..."}` is pre-built at compile time.
136 CompiledThrow { error_obj: Value },
137}
138
139/// Compiled logic that can be evaluated multiple times across different data.
140///
141/// `CompiledLogic` represents a pre-processed JSONLogic expression that has been
142/// optimized for repeated evaluation. It's thread-safe and can be shared across
143/// threads using `Arc`.
144///
145/// # Performance Benefits
146///
147/// - **Parse once, evaluate many**: Avoid repeated JSON parsing
148/// - **Static evaluation**: Constant expressions are pre-computed
149/// - **OpCode dispatch**: Built-in operators use fast enum dispatch
150/// - **Thread-safe sharing**: Use `Arc` to share across threads
151///
152/// # Example
153///
154/// ```rust
155/// use datalogic_rs::DataLogic;
156/// use serde_json::json;
157/// use std::sync::Arc;
158///
159/// let engine = DataLogic::new();
160/// let logic = json!({">": [{"var": "score"}, 90]});
161/// let compiled = engine.compile(&logic).unwrap(); // Returns Arc<CompiledLogic>
162///
163/// // Can be shared across threads
164/// let compiled_clone = Arc::clone(&compiled);
165/// std::thread::spawn(move || {
166/// let data = json!({"score": 95});
167/// let result = engine.evaluate_owned(&compiled_clone, data);
168/// });
169/// ```
170#[derive(Debug, Clone)]
171pub struct CompiledLogic {
172 /// The root node of the compiled logic tree
173 pub root: CompiledNode,
174}
175
176impl CompiledLogic {
177 /// Creates a new compiled logic from a root node.
178 ///
179 /// # Arguments
180 ///
181 /// * `root` - The root node of the compiled logic tree
182 pub fn new(root: CompiledNode) -> Self {
183 Self { root }
184 }
185
186 /// Compiles a JSON value into a compiled logic structure.
187 ///
188 /// This method performs basic compilation without static evaluation.
189 /// For optimal performance, use `compile_with_static_eval` instead.
190 ///
191 /// # Arguments
192 ///
193 /// * `logic` - The JSON logic expression to compile
194 ///
195 /// # Returns
196 ///
197 /// A compiled logic structure, or an error if compilation fails.
198 pub fn compile(logic: &Value) -> Result<Self> {
199 let root = Self::compile_node(logic, None, false)?;
200 Ok(Self::new(root))
201 }
202
203 /// Compiles for tracing without static evaluation.
204 ///
205 /// This method compiles the logic without performing static evaluation,
206 /// ensuring that all operators remain in the tree for step-by-step debugging.
207 /// Use this when you need to trace execution through operators that would
208 /// otherwise be pre-evaluated at compile time.
209 ///
210 /// # Arguments
211 ///
212 /// * `logic` - The JSON logic expression to compile
213 /// * `preserve_structure` - Whether to preserve unknown object structure
214 ///
215 /// # Returns
216 ///
217 /// A compiled logic structure without static optimizations.
218 pub fn compile_for_trace(logic: &Value, preserve_structure: bool) -> Result<Self> {
219 let root = Self::compile_node(logic, None, preserve_structure)?;
220 Ok(Self::new(root))
221 }
222
223 /// Compiles with static evaluation using the provided engine.
224 ///
225 /// This method performs optimizations including:
226 /// - Static evaluation of constant expressions
227 /// - OpCode assignment for built-in operators
228 /// - Structure preservation based on engine settings
229 ///
230 /// # Arguments
231 ///
232 /// * `logic` - The JSON logic expression to compile
233 /// * `engine` - The DataLogic engine for static evaluation
234 ///
235 /// # Returns
236 ///
237 /// An optimized compiled logic structure, or an error if compilation fails.
238 pub fn compile_with_static_eval(logic: &Value, engine: &DataLogic) -> Result<Self> {
239 let root = Self::compile_node(logic, Some(engine), engine.preserve_structure())?;
240 Ok(Self::new(root))
241 }
242
243 /// Compiles a single JSON value into a CompiledNode.
244 ///
245 /// This recursive method handles all node types:
246 /// - Objects with operators
247 /// - Arrays
248 /// - Primitive values
249 /// - Structured objects (in preserve mode)
250 ///
251 /// # Arguments
252 ///
253 /// * `value` - The JSON value to compile
254 /// * `engine` - Optional engine for static evaluation
255 /// * `preserve_structure` - Whether to preserve unknown object structure
256 ///
257 /// # Returns
258 ///
259 /// A compiled node, or an error if the value is invalid.
260 fn compile_node(
261 value: &Value,
262 engine: Option<&DataLogic>,
263 preserve_structure: bool,
264 ) -> Result<CompiledNode> {
265 match value {
266 Value::Object(obj) if obj.len() > 1 => {
267 if preserve_structure {
268 // In preserve_structure mode, treat multi-key objects as structured objects
269 // We'll create a special StructuredObject node that gets evaluated field by field
270 let fields: Vec<_> = obj
271 .iter()
272 .map(|(key, val)| {
273 Self::compile_node(val, engine, preserve_structure)
274 .map(|compiled_val| (key.clone(), compiled_val))
275 })
276 .collect::<Result<Vec<_>>>()?;
277 Ok(CompiledNode::StructuredObject {
278 fields: fields.into_boxed_slice(),
279 })
280 } else {
281 // Multi-key objects are not valid operators
282 Err(crate::error::Error::InvalidOperator(
283 "Unknown Operator".to_string(),
284 ))
285 }
286 }
287 Value::Object(obj) if obj.len() == 1 => {
288 // Single key object is an operator
289 let (op_name, args_value) = obj.iter().next().unwrap();
290
291 // Try to parse as built-in operator first
292 if let Ok(opcode) = op_name.parse::<OpCode>() {
293 // Check if this operator requires array arguments
294 let requires_array = matches!(opcode, OpCode::And | OpCode::Or | OpCode::If);
295
296 // For operators that require arrays, check the raw value
297 if requires_array && !matches!(args_value, Value::Array(_)) {
298 // Create a special marker node for invalid arguments
299 let invalid_value = json!({
300 "__invalid_args__": true,
301 "value": args_value
302 });
303 let value_node = CompiledNode::Value {
304 value: invalid_value,
305 };
306 let args = vec![value_node].into_boxed_slice();
307 return Ok(CompiledNode::BuiltinOperator { opcode, args });
308 }
309
310 // Special handling for preserve operator - don't compile its arguments
311 let args = if opcode == OpCode::Preserve {
312 // Preserve takes raw values, not compiled logic
313 match args_value {
314 Value::Array(arr) => arr
315 .iter()
316 .map(|v| CompiledNode::Value { value: v.clone() })
317 .collect::<Vec<_>>()
318 .into_boxed_slice(),
319 _ => vec![CompiledNode::Value {
320 value: args_value.clone(),
321 }]
322 .into_boxed_slice(),
323 }
324 } else {
325 Self::compile_args(args_value, engine, preserve_structure)?
326 };
327 // Try to optimize variable access operators at compile time
328 if matches!(opcode, OpCode::Var | OpCode::Val | OpCode::Exists) {
329 let optimized = match opcode {
330 OpCode::Var => Self::try_compile_var(&args),
331 OpCode::Val => Self::try_compile_val(&args),
332 OpCode::Exists => Self::try_compile_exists(&args),
333 _ => None,
334 };
335 if let Some(node) = optimized {
336 return Ok(node);
337 }
338 }
339
340 // Pre-compile regex for split operator when delimiter is a static pattern
341 if opcode == OpCode::Split
342 && let Some(node) = Self::try_compile_split_regex(&args)
343 {
344 return Ok(node);
345 }
346
347 // Pre-compile throw with literal string into CompiledThrow
348 if opcode == OpCode::Throw
349 && args.len() == 1
350 && let CompiledNode::Value {
351 value: Value::String(s),
352 } = &args[0]
353 {
354 return Ok(CompiledNode::CompiledThrow {
355 error_obj: serde_json::json!({"type": s}),
356 });
357 }
358
359 let node = CompiledNode::BuiltinOperator { opcode, args };
360
361 // If engine is provided and node is static, evaluate it
362 if let std::option::Option::Some(eng) = engine
363 && Self::node_is_static(&node)
364 {
365 // Evaluate with empty context since it's static
366 let mut context = ContextStack::new(Arc::new(Value::Null));
367 match eng.evaluate_node(&node, &mut context) {
368 Ok(value) => {
369 return Ok(CompiledNode::Value { value });
370 }
371 // If evaluation fails, keep as operator node
372 Err(_) => return Ok(node),
373 }
374 }
375
376 Ok(node)
377 } else if preserve_structure {
378 // In preserve_structure mode, we need to distinguish between:
379 // 1. Custom operators (should be evaluated as operators)
380 // 2. Unknown keys (should be preserved as structured object fields)
381 //
382 // Check if this is a custom operator first
383 if let Some(eng) = engine
384 && eng.has_custom_operator(op_name)
385 {
386 // It's a registered custom operator - compile as CustomOperator
387 // This ensures custom operators work correctly in preserve_structure mode,
388 // e.g., {"result": {"custom_op": arg}} will evaluate custom_op properly
389 let args = Self::compile_args(args_value, engine, preserve_structure)?;
390 return Ok(CompiledNode::CustomOperator {
391 name: op_name.clone(),
392 args,
393 });
394 }
395 // Not a built-in operator or custom operator - treat as structured object field
396 // This allows dynamic object generation like {"name": {"var": "user.name"}}
397 let compiled_val = Self::compile_node(args_value, engine, preserve_structure)?;
398 let fields = vec![(op_name.clone(), compiled_val)].into_boxed_slice();
399 Ok(CompiledNode::StructuredObject { fields })
400 } else {
401 let args = Self::compile_args(args_value, engine, preserve_structure)?;
402 // Fall back to custom operator - don't pre-evaluate custom operators
403 Ok(CompiledNode::CustomOperator {
404 name: op_name.clone(),
405 args,
406 })
407 }
408 }
409 Value::Array(arr) => {
410 // Array of logic expressions
411 let nodes = arr
412 .iter()
413 .map(|v| Self::compile_node(v, engine, preserve_structure))
414 .collect::<Result<Vec<_>>>()?;
415
416 let nodes_boxed = nodes.into_boxed_slice();
417 let node = CompiledNode::Array { nodes: nodes_boxed };
418
419 // If engine is provided and array is static, evaluate it
420 if let std::option::Option::Some(eng) = engine
421 && Self::node_is_static(&node)
422 {
423 let mut context = ContextStack::new(Arc::new(Value::Null));
424 if let Ok(value) = eng.evaluate_node(&node, &mut context) {
425 return Ok(CompiledNode::Value { value });
426 }
427 }
428
429 Ok(node)
430 }
431 _ => {
432 // Static value
433 Ok(CompiledNode::Value {
434 value: value.clone(),
435 })
436 }
437 }
438 }
439
440 /// Compile operator arguments
441 fn compile_args(
442 value: &Value,
443 engine: Option<&DataLogic>,
444 preserve_structure: bool,
445 ) -> Result<Box<[CompiledNode]>> {
446 match value {
447 Value::Array(arr) => arr
448 .iter()
449 .map(|v| Self::compile_node(v, engine, preserve_structure))
450 .collect::<Result<Vec<_>>>()
451 .map(Vec::into_boxed_slice),
452 _ => {
453 // Single argument - compile it
454 Ok(vec![Self::compile_node(value, engine, preserve_structure)?].into_boxed_slice())
455 }
456 }
457 }
458
459 /// Check if this compiled logic is static (can be evaluated without context)
460 pub fn is_static(&self) -> bool {
461 Self::node_is_static(&self.root)
462 }
463
464 fn node_is_static(node: &CompiledNode) -> bool {
465 match node {
466 CompiledNode::Value { .. } => true,
467 CompiledNode::Array { nodes, .. } => nodes.iter().all(Self::node_is_static),
468 CompiledNode::BuiltinOperator { opcode, args, .. } => {
469 Self::opcode_is_static(opcode, args)
470 }
471 CompiledNode::CustomOperator { .. } => false, // Unknown operators are non-static
472 CompiledNode::CompiledVar { .. } | CompiledNode::CompiledExists { .. } => false, // Context-dependent
473 CompiledNode::CompiledSplitRegex { args, .. } => args.iter().all(Self::node_is_static),
474 CompiledNode::CompiledThrow { .. } => false, // Error-producing, non-static
475 CompiledNode::StructuredObject { fields, .. } => {
476 fields.iter().all(|(_, node)| Self::node_is_static(node))
477 }
478 }
479 }
480
481 /// Check if an operator can be statically evaluated at compile time.
482 ///
483 /// Static operators can be pre-computed during compilation when their arguments
484 /// are also static, eliminating runtime evaluation overhead.
485 ///
486 /// # Classification Criteria
487 ///
488 /// An operator is **non-static** (dynamic) if it:
489 /// 1. Reads from the data context (`var`, `val`, `missing`, `exists`)
490 /// 2. Uses iterative callbacks with changing context (`map`, `filter`, `reduce`)
491 /// 3. Has side effects or error handling (`try`, `throw`)
492 /// 4. Depends on runtime state (`now` for current time)
493 /// 5. Needs runtime disambiguation (`preserve`, `merge`, `min`, `max`)
494 ///
495 /// All other operators are **static** when their arguments are static.
496 /// Parse a dot-separated path into pre-parsed segments (for var, which uses dot notation).
497 /// Numeric segments become FieldOrIndex to handle both object keys and array indices.
498 fn parse_path_segments(path: &str) -> Vec<PathSegment> {
499 if path.is_empty() {
500 return Vec::new();
501 }
502 if !path.contains('.') {
503 if let Ok(idx) = path.parse::<usize>() {
504 return vec![PathSegment::FieldOrIndex(path.into(), idx)];
505 }
506 return vec![PathSegment::Field(path.into())];
507 }
508 path.split('.')
509 .map(|part| {
510 if let Ok(idx) = part.parse::<usize>() {
511 PathSegment::FieldOrIndex(part.into(), idx)
512 } else {
513 PathSegment::Field(part.into())
514 }
515 })
516 .collect()
517 }
518
519 /// Parse a var path and determine the reduce hint.
520 fn parse_var_path(path: &str) -> (ReduceHint, Vec<PathSegment>) {
521 if path == "current" {
522 (
523 ReduceHint::Current,
524 vec![PathSegment::Field("current".into())],
525 )
526 } else if path == "accumulator" {
527 (
528 ReduceHint::Accumulator,
529 vec![PathSegment::Field("accumulator".into())],
530 )
531 } else if let Some(rest) = path.strip_prefix("current.") {
532 let mut segs = vec![PathSegment::Field("current".into())];
533 segs.extend(Self::parse_path_segments(rest));
534 (ReduceHint::CurrentPath, segs)
535 } else if let Some(rest) = path.strip_prefix("accumulator.") {
536 let mut segs = vec![PathSegment::Field("accumulator".into())];
537 segs.extend(Self::parse_path_segments(rest));
538 (ReduceHint::AccumulatorPath, segs)
539 } else {
540 (ReduceHint::None, Self::parse_path_segments(path))
541 }
542 }
543
544 /// Try to compile a var operator into a CompiledVar node.
545 fn try_compile_var(args: &[CompiledNode]) -> Option<CompiledNode> {
546 if args.is_empty() {
547 return Some(CompiledNode::CompiledVar {
548 scope_level: 0,
549 segments: Box::new([]),
550 reduce_hint: ReduceHint::None,
551 metadata_hint: MetadataHint::None,
552 default_value: None,
553 });
554 }
555
556 let (segments, reduce_hint) = match &args[0] {
557 CompiledNode::Value {
558 value: Value::String(s),
559 } => {
560 let (hint, segs) = Self::parse_var_path(s);
561 (segs, hint)
562 }
563 CompiledNode::Value {
564 value: Value::Number(n),
565 } => {
566 let s = n.to_string();
567 let segs = Self::parse_path_segments(&s);
568 (segs, ReduceHint::None)
569 }
570 _ => return None, // dynamic path
571 };
572
573 let default_value = if args.len() > 1 {
574 Some(Box::new(args[1].clone()))
575 } else {
576 None
577 };
578
579 Some(CompiledNode::CompiledVar {
580 scope_level: 0,
581 segments: segments.into_boxed_slice(),
582 reduce_hint,
583 metadata_hint: MetadataHint::None,
584 default_value,
585 })
586 }
587
588 /// Try to compile a val operator into a CompiledVar node.
589 fn try_compile_val(args: &[CompiledNode]) -> Option<CompiledNode> {
590 if args.is_empty() {
591 return Some(CompiledNode::CompiledVar {
592 scope_level: 0,
593 segments: Box::new([]),
594 reduce_hint: ReduceHint::None,
595 metadata_hint: MetadataHint::None,
596 default_value: None,
597 });
598 }
599
600 // Val does NOT support dot-path notation. Each arg is a literal key/index.
601
602 // Case 2: Single non-empty string → single Field segment (literal key)
603 // Empty string has dual behavior (try key "" then whole-context fallback) — keep as BuiltinOperator.
604 if args.len() == 1 {
605 if let CompiledNode::Value {
606 value: Value::String(s),
607 } = &args[0]
608 && !s.is_empty()
609 {
610 let reduce_hint = if s == "current" {
611 ReduceHint::Current
612 } else if s == "accumulator" {
613 ReduceHint::Accumulator
614 } else {
615 ReduceHint::None
616 };
617 let segment = if let Ok(idx) = s.parse::<usize>() {
618 PathSegment::FieldOrIndex(s.as_str().into(), idx)
619 } else {
620 PathSegment::Field(s.as_str().into())
621 };
622 return Some(CompiledNode::CompiledVar {
623 scope_level: 0,
624 segments: vec![segment].into_boxed_slice(),
625 reduce_hint,
626 metadata_hint: MetadataHint::None,
627 default_value: None,
628 });
629 }
630 return None;
631 }
632
633 // Case 3: First arg is [[level]] array
634 if let CompiledNode::Value {
635 value: Value::Array(level_arr),
636 } = &args[0]
637 && let Some(Value::Number(level_num)) = level_arr.first()
638 && let Some(level) = level_num.as_i64()
639 {
640 let scope_level = level.unsigned_abs() as u32;
641
642 // Check metadata hints for 2-arg case
643 let mut metadata_hint = MetadataHint::None;
644 if args.len() == 2
645 && let CompiledNode::Value {
646 value: Value::String(s),
647 } = &args[1]
648 {
649 if s == "index" {
650 metadata_hint = MetadataHint::Index;
651 } else if s == "key" {
652 metadata_hint = MetadataHint::Key;
653 }
654 }
655
656 return Self::try_compile_val_segments(&args[1..], scope_level, metadata_hint);
657 }
658
659 // Case 4: 2+ args with all literal path segments — compile as path chain.
660 if let Some(first_seg) = Self::val_arg_to_segment(&args[0]) {
661 let reduce_hint = match &args[0] {
662 CompiledNode::Value {
663 value: Value::String(s),
664 } if s == "current" => ReduceHint::CurrentPath,
665 CompiledNode::Value {
666 value: Value::String(s),
667 } if s == "accumulator" => ReduceHint::AccumulatorPath,
668 _ => ReduceHint::None,
669 };
670
671 let mut segments = vec![first_seg];
672 if let Some(compiled) =
673 Self::try_collect_val_segments(&args[1..], &mut segments, reduce_hint)
674 {
675 return Some(compiled);
676 }
677 }
678
679 None
680 }
681
682 /// Convert a val argument into a PathSegment.
683 /// Val treats string args as literal keys (no dot-splitting), and numbers as indices.
684 /// Numeric strings get FieldOrIndex to handle both object key and array index access.
685 fn val_arg_to_segment(arg: &CompiledNode) -> Option<PathSegment> {
686 match arg {
687 CompiledNode::Value {
688 value: Value::String(s),
689 } => {
690 if let Ok(idx) = s.parse::<usize>() {
691 Some(PathSegment::FieldOrIndex(s.as_str().into(), idx))
692 } else {
693 Some(PathSegment::Field(s.as_str().into()))
694 }
695 }
696 CompiledNode::Value {
697 value: Value::Number(n),
698 } => n.as_u64().map(|idx| PathSegment::Index(idx as usize)),
699 _ => None,
700 }
701 }
702
703 /// Try to compile val path segments (used by level-access and path-chain cases).
704 fn try_compile_val_segments(
705 args: &[CompiledNode],
706 scope_level: u32,
707 metadata_hint: MetadataHint,
708 ) -> Option<CompiledNode> {
709 let mut segments = Vec::new();
710 for arg in args {
711 segments.push(Self::val_arg_to_segment(arg)?);
712 }
713
714 Some(CompiledNode::CompiledVar {
715 scope_level,
716 segments: segments.into_boxed_slice(),
717 reduce_hint: ReduceHint::None,
718 metadata_hint,
719 default_value: None,
720 })
721 }
722
723 /// Try to collect remaining val args into segments and build a CompiledVar.
724 fn try_collect_val_segments(
725 args: &[CompiledNode],
726 segments: &mut Vec<PathSegment>,
727 reduce_hint: ReduceHint,
728 ) -> Option<CompiledNode> {
729 for arg in args {
730 segments.push(Self::val_arg_to_segment(arg)?);
731 }
732
733 Some(CompiledNode::CompiledVar {
734 scope_level: 0,
735 segments: std::mem::take(segments).into_boxed_slice(),
736 reduce_hint,
737 metadata_hint: MetadataHint::None,
738 default_value: None,
739 })
740 }
741
742 /// Try to compile an exists operator into a CompiledExists node.
743 fn try_compile_exists(args: &[CompiledNode]) -> Option<CompiledNode> {
744 if args.is_empty() {
745 return Some(CompiledNode::CompiledExists {
746 scope_level: 0,
747 segments: Box::new([]),
748 });
749 }
750
751 if args.len() == 1 {
752 if let CompiledNode::Value {
753 value: Value::String(s),
754 } = &args[0]
755 {
756 return Some(CompiledNode::CompiledExists {
757 scope_level: 0,
758 segments: vec![PathSegment::Field(s.as_str().into())].into_boxed_slice(),
759 });
760 }
761 return None;
762 }
763
764 // Multiple args - all must be literal strings
765 let mut segments = Vec::new();
766 for arg in args {
767 if let CompiledNode::Value {
768 value: Value::String(s),
769 } = arg
770 {
771 segments.push(PathSegment::Field(s.as_str().into()));
772 } else {
773 return None;
774 }
775 }
776
777 Some(CompiledNode::CompiledExists {
778 scope_level: 0,
779 segments: segments.into_boxed_slice(),
780 })
781 }
782
783 /// Try to pre-compile a split operator's regex pattern at compile time.
784 ///
785 /// When the delimiter (second arg) is a static string containing named capture
786 /// groups (`(?P<...)`), the regex is compiled once here instead of on every evaluation.
787 fn try_compile_split_regex(args: &[CompiledNode]) -> Option<CompiledNode> {
788 if args.len() < 2 {
789 return None;
790 }
791
792 // Check if the delimiter is a static string with named capture groups
793 let pattern = match &args[1] {
794 CompiledNode::Value {
795 value: Value::String(s),
796 } if s.contains("(?P<") => s.as_str(),
797 _ => return None,
798 };
799
800 // Try to compile the regex
801 let re = Regex::new(pattern).ok()?;
802 let capture_names: Vec<Box<str>> = re.capture_names().flatten().map(|n| n.into()).collect();
803
804 // Only optimize if there are named capture groups
805 if capture_names.is_empty() {
806 return None;
807 }
808
809 // Keep only the text argument (first arg)
810 let text_args = vec![args[0].clone()].into_boxed_slice();
811
812 Some(CompiledNode::CompiledSplitRegex {
813 args: text_args,
814 regex: Arc::new(re),
815 capture_names: capture_names.into_boxed_slice(),
816 })
817 }
818
819 fn opcode_is_static(opcode: &OpCode, args: &[CompiledNode]) -> bool {
820 use OpCode::*;
821
822 // Check if all arguments are static first (common pattern)
823 let args_static = || args.iter().all(Self::node_is_static);
824
825 match opcode {
826 // Context-dependent: These operators read from the data context, which is
827 // not available at compile time. They must remain dynamic.
828 Var | Val | Missing | MissingSome | Exists => false,
829
830 // Iteration operators: These push new contexts for each iteration and use
831 // callbacks that may reference the iteration variable. Even with static
832 // arrays, the callback logic depends on the per-element context.
833 Map | Filter | Reduce | All | Some | None => false,
834
835 // Error handling: These have control flow effects (early exit, error propagation)
836 // that should be preserved for runtime execution.
837 Try | Throw => false,
838
839 // Time-dependent: Returns current UTC time, inherently non-static.
840 Now => false,
841
842 // Runtime disambiguation needed:
843 // - Preserve: Must know it was explicitly used as an operator, not inferred
844 // - Merge/Min/Max: Need to distinguish [1,2,3] literal from operator arguments
845 // at runtime to handle nested arrays correctly
846 Preserve => false,
847 Merge | Min | Max => false,
848
849 // Pure operators: Static when all arguments are static. These perform
850 // deterministic transformations without side effects or context access.
851 Type | StartsWith | EndsWith | Upper | Lower | Trim | Split | Datetime | Timestamp
852 | ParseDate | FormatDate | DateDiff | Abs | Ceil | Floor | Add | Subtract
853 | Multiply | Divide | Modulo | Equals | StrictEquals | NotEquals | StrictNotEquals
854 | GreaterThan | GreaterThanEqual | LessThan | LessThanEqual | Not | DoubleNot | And
855 | Or | Ternary | If | Cat | Substr | In | Length | Sort | Slice | Coalesce | Switch => {
856 args_static()
857 }
858 }
859 }
860}