Skip to main content

datalogic_rs/
compiled.rs

1use crate::{ContextStack, DataLogic, Result, opcode::OpCode};
2use regex::Regex;
3use serde_json::{Value, json};
4use std::sync::Arc;
5
6/// A pre-parsed path segment for compiled variable access.
7#[derive(Debug, Clone)]
8pub enum PathSegment {
9    /// Object field access by key
10    Field(Box<str>),
11    /// Array element access by index
12    Index(usize),
13    /// Try as object key first, then as array index (for segments that could be either).
14    /// Pre-parses the index at compile time to avoid runtime parsing.
15    FieldOrIndex(Box<str>, usize),
16}
17
18/// Hint for reduce context resolution, detected at compile time.
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20pub enum ReduceHint {
21    /// Normal path access (no reduce context)
22    None,
23    /// Path is exactly "current" — return reduce_current directly
24    Current,
25    /// Path is exactly "accumulator" — return reduce_accumulator directly
26    Accumulator,
27    /// Path starts with "current." — segments[0] is "current", use segments[1..] from reduce_current
28    CurrentPath,
29    /// Path starts with "accumulator." — segments[0] is "accumulator", use segments[1..] from reduce_accumulator
30    AccumulatorPath,
31}
32
33/// Hint for metadata access (index/key), detected at compile time.
34#[derive(Debug, Clone, Copy, PartialEq, Eq)]
35pub enum MetadataHint {
36    /// Normal data access
37    None,
38    /// Access frame index metadata
39    Index,
40    /// Access frame key metadata
41    Key,
42}
43
44/// A compiled node representing a single operation or value in the logic tree.
45///
46/// Nodes are created during the compilation phase and evaluated during execution.
47/// Each node type is optimized for its specific purpose:
48///
49/// - **Value**: Static JSON values that don't require evaluation
50/// - **Array**: Collections of nodes evaluated sequentially
51/// - **BuiltinOperator**: Fast OpCode-based dispatch for built-in operators
52/// - **CustomOperator**: User-defined operators with dynamic dispatch
53/// - **StructuredObject**: Template objects for structure preservation
54#[derive(Debug, Clone)]
55pub enum CompiledNode {
56    /// A static JSON value that requires no evaluation.
57    ///
58    /// Used for literals like numbers, strings, booleans, and null.
59    Value { value: Value },
60
61    /// An array of compiled nodes.
62    ///
63    /// Each node is evaluated in sequence, and the results are collected into a JSON array.
64    /// Uses `Box<[CompiledNode]>` for memory efficiency.
65    Array { nodes: Box<[CompiledNode]> },
66
67    /// A built-in operator optimized with OpCode dispatch.
68    ///
69    /// The OpCode enum enables direct dispatch without string lookups,
70    /// significantly improving performance for the 50+ built-in operators.
71    BuiltinOperator {
72        opcode: OpCode,
73        args: Box<[CompiledNode]>,
74    },
75
76    /// A custom operator registered via `DataLogic::add_operator`.
77    ///
78    /// Custom operators use dynamic dispatch and are looked up by name
79    /// from the engine's operator registry.
80    CustomOperator {
81        name: String,
82        args: Box<[CompiledNode]>,
83    },
84
85    /// A structured object template for preserve_structure mode.
86    ///
87    /// When structure preservation is enabled, objects with keys that are not
88    /// built-in operators or registered custom operators are preserved as templates.
89    /// Each field is evaluated independently, allowing for dynamic object generation.
90    ///
91    /// Note: Custom operators are checked before treating keys as structured fields,
92    /// ensuring they work correctly within preserved structures.
93    StructuredObject {
94        fields: Box<[(String, CompiledNode)]>,
95    },
96
97    /// A pre-compiled variable access (unified var/val).
98    ///
99    /// scope_level 0 = current context (var-style), N = go up N levels (val with [[N], ...]).
100    /// Segments are pre-parsed at compile time to avoid runtime string splitting.
101    CompiledVar {
102        scope_level: u32,
103        segments: Box<[PathSegment]>,
104        reduce_hint: ReduceHint,
105        metadata_hint: MetadataHint,
106        default_value: Option<Box<CompiledNode>>,
107    },
108
109    /// A pre-compiled exists check.
110    ///
111    /// scope_level 0 = current context, N = go up N levels.
112    /// Segments are pre-parsed at compile time.
113    CompiledExists {
114        scope_level: u32,
115        segments: Box<[PathSegment]>,
116    },
117
118    /// A pre-compiled split with regex pattern.
119    ///
120    /// When the split operator's delimiter is a static regex pattern with named
121    /// capture groups, the regex is compiled once during the compilation phase
122    /// instead of on every evaluation.
123    CompiledSplitRegex {
124        /// The text argument (only the first arg of split)
125        args: Box<[CompiledNode]>,
126        /// Pre-compiled regex pattern
127        regex: Arc<Regex>,
128        /// Pre-extracted capture group names
129        capture_names: Box<[Box<str>]>,
130    },
131
132    /// A pre-compiled throw with a static error object.
133    ///
134    /// When `throw` is called with a literal string, the error object
135    /// `{"type": "..."}` is pre-built at compile time.
136    CompiledThrow { error_obj: Value },
137}
138
139/// Compiled logic that can be evaluated multiple times across different data.
140///
141/// `CompiledLogic` represents a pre-processed JSONLogic expression that has been
142/// optimized for repeated evaluation. It's thread-safe and can be shared across
143/// threads using `Arc`.
144///
145/// # Performance Benefits
146///
147/// - **Parse once, evaluate many**: Avoid repeated JSON parsing
148/// - **Static evaluation**: Constant expressions are pre-computed
149/// - **OpCode dispatch**: Built-in operators use fast enum dispatch
150/// - **Thread-safe sharing**: Use `Arc` to share across threads
151///
152/// # Example
153///
154/// ```rust
155/// use datalogic_rs::DataLogic;
156/// use serde_json::json;
157/// use std::sync::Arc;
158///
159/// let engine = DataLogic::new();
160/// let logic = json!({">": [{"var": "score"}, 90]});
161/// let compiled = engine.compile(&logic).unwrap(); // Returns Arc<CompiledLogic>
162///
163/// // Can be shared across threads
164/// let compiled_clone = Arc::clone(&compiled);
165/// std::thread::spawn(move || {
166///     let data = json!({"score": 95});
167///     let result = engine.evaluate_owned(&compiled_clone, data);
168/// });
169/// ```
170#[derive(Debug, Clone)]
171pub struct CompiledLogic {
172    /// The root node of the compiled logic tree
173    pub root: CompiledNode,
174}
175
176impl CompiledLogic {
177    /// Creates a new compiled logic from a root node.
178    ///
179    /// # Arguments
180    ///
181    /// * `root` - The root node of the compiled logic tree
182    pub fn new(root: CompiledNode) -> Self {
183        Self { root }
184    }
185
186    /// Compiles a JSON value into a compiled logic structure.
187    ///
188    /// This method performs basic compilation without static evaluation.
189    /// For optimal performance, use `compile_with_static_eval` instead.
190    ///
191    /// # Arguments
192    ///
193    /// * `logic` - The JSON logic expression to compile
194    ///
195    /// # Returns
196    ///
197    /// A compiled logic structure, or an error if compilation fails.
198    pub fn compile(logic: &Value) -> Result<Self> {
199        let root = Self::compile_node(logic, None, false)?;
200        Ok(Self::new(root))
201    }
202
203    /// Compiles for tracing without static evaluation.
204    ///
205    /// This method compiles the logic without performing static evaluation,
206    /// ensuring that all operators remain in the tree for step-by-step debugging.
207    /// Use this when you need to trace execution through operators that would
208    /// otherwise be pre-evaluated at compile time.
209    ///
210    /// # Arguments
211    ///
212    /// * `logic` - The JSON logic expression to compile
213    /// * `preserve_structure` - Whether to preserve unknown object structure
214    ///
215    /// # Returns
216    ///
217    /// A compiled logic structure without static optimizations.
218    pub fn compile_for_trace(logic: &Value, preserve_structure: bool) -> Result<Self> {
219        let root = Self::compile_node(logic, None, preserve_structure)?;
220        Ok(Self::new(root))
221    }
222
223    /// Compiles with static evaluation using the provided engine.
224    ///
225    /// This method performs optimizations including:
226    /// - Static evaluation of constant expressions
227    /// - OpCode assignment for built-in operators
228    /// - Structure preservation based on engine settings
229    ///
230    /// # Arguments
231    ///
232    /// * `logic` - The JSON logic expression to compile
233    /// * `engine` - The DataLogic engine for static evaluation
234    ///
235    /// # Returns
236    ///
237    /// An optimized compiled logic structure, or an error if compilation fails.
238    pub fn compile_with_static_eval(logic: &Value, engine: &DataLogic) -> Result<Self> {
239        let root = Self::compile_node(logic, Some(engine), engine.preserve_structure())?;
240        Ok(Self::new(root))
241    }
242
243    /// Compiles a single JSON value into a CompiledNode.
244    ///
245    /// This recursive method handles all node types:
246    /// - Objects with operators
247    /// - Arrays
248    /// - Primitive values
249    /// - Structured objects (in preserve mode)
250    ///
251    /// # Arguments
252    ///
253    /// * `value` - The JSON value to compile
254    /// * `engine` - Optional engine for static evaluation
255    /// * `preserve_structure` - Whether to preserve unknown object structure
256    ///
257    /// # Returns
258    ///
259    /// A compiled node, or an error if the value is invalid.
260    fn compile_node(
261        value: &Value,
262        engine: Option<&DataLogic>,
263        preserve_structure: bool,
264    ) -> Result<CompiledNode> {
265        match value {
266            Value::Object(obj) if obj.len() > 1 => {
267                if preserve_structure {
268                    // In preserve_structure mode, treat multi-key objects as structured objects
269                    // We'll create a special StructuredObject node that gets evaluated field by field
270                    let fields: Vec<_> = obj
271                        .iter()
272                        .map(|(key, val)| {
273                            Self::compile_node(val, engine, preserve_structure)
274                                .map(|compiled_val| (key.clone(), compiled_val))
275                        })
276                        .collect::<Result<Vec<_>>>()?;
277                    Ok(CompiledNode::StructuredObject {
278                        fields: fields.into_boxed_slice(),
279                    })
280                } else {
281                    // Multi-key objects are not valid operators
282                    Err(crate::error::Error::InvalidOperator(
283                        "Unknown Operator".to_string(),
284                    ))
285                }
286            }
287            Value::Object(obj) if obj.len() == 1 => {
288                // Single key object is an operator
289                let (op_name, args_value) = obj.iter().next().unwrap();
290
291                // Try to parse as built-in operator first
292                if let Ok(opcode) = op_name.parse::<OpCode>() {
293                    // Check if this operator requires array arguments
294                    let requires_array = matches!(opcode, OpCode::And | OpCode::Or | OpCode::If);
295
296                    // For operators that require arrays, check the raw value
297                    if requires_array && !matches!(args_value, Value::Array(_)) {
298                        // Create a special marker node for invalid arguments
299                        let invalid_value = json!({
300                            "__invalid_args__": true,
301                            "value": args_value
302                        });
303                        let value_node = CompiledNode::Value {
304                            value: invalid_value,
305                        };
306                        let args = vec![value_node].into_boxed_slice();
307                        return Ok(CompiledNode::BuiltinOperator { opcode, args });
308                    }
309
310                    // Special handling for preserve operator - don't compile its arguments
311                    let args = if opcode == OpCode::Preserve {
312                        // Preserve takes raw values, not compiled logic
313                        match args_value {
314                            Value::Array(arr) => arr
315                                .iter()
316                                .map(|v| CompiledNode::Value { value: v.clone() })
317                                .collect::<Vec<_>>()
318                                .into_boxed_slice(),
319                            _ => vec![CompiledNode::Value {
320                                value: args_value.clone(),
321                            }]
322                            .into_boxed_slice(),
323                        }
324                    } else {
325                        Self::compile_args(args_value, engine, preserve_structure)?
326                    };
327                    // Try to optimize variable access operators at compile time
328                    if matches!(opcode, OpCode::Var | OpCode::Val | OpCode::Exists) {
329                        let optimized = match opcode {
330                            OpCode::Var => Self::try_compile_var(&args),
331                            OpCode::Val => Self::try_compile_val(&args),
332                            OpCode::Exists => Self::try_compile_exists(&args),
333                            _ => None,
334                        };
335                        if let Some(node) = optimized {
336                            return Ok(node);
337                        }
338                    }
339
340                    // Pre-compile regex for split operator when delimiter is a static pattern
341                    if opcode == OpCode::Split
342                        && let Some(node) = Self::try_compile_split_regex(&args)
343                    {
344                        return Ok(node);
345                    }
346
347                    // Pre-compile throw with literal string into CompiledThrow
348                    if opcode == OpCode::Throw
349                        && args.len() == 1
350                        && let CompiledNode::Value {
351                            value: Value::String(s),
352                        } = &args[0]
353                    {
354                        return Ok(CompiledNode::CompiledThrow {
355                            error_obj: serde_json::json!({"type": s}),
356                        });
357                    }
358
359                    let node = CompiledNode::BuiltinOperator { opcode, args };
360
361                    // If engine is provided and node is static, evaluate it
362                    if let std::option::Option::Some(eng) = engine
363                        && Self::node_is_static(&node)
364                    {
365                        // Evaluate with empty context since it's static
366                        let mut context = ContextStack::new(Arc::new(Value::Null));
367                        match eng.evaluate_node(&node, &mut context) {
368                            Ok(value) => {
369                                return Ok(CompiledNode::Value { value });
370                            }
371                            // If evaluation fails, keep as operator node
372                            Err(_) => return Ok(node),
373                        }
374                    }
375
376                    Ok(node)
377                } else if preserve_structure {
378                    // In preserve_structure mode, we need to distinguish between:
379                    // 1. Custom operators (should be evaluated as operators)
380                    // 2. Unknown keys (should be preserved as structured object fields)
381                    //
382                    // Check if this is a custom operator first
383                    if let Some(eng) = engine
384                        && eng.has_custom_operator(op_name)
385                    {
386                        // It's a registered custom operator - compile as CustomOperator
387                        // This ensures custom operators work correctly in preserve_structure mode,
388                        // e.g., {"result": {"custom_op": arg}} will evaluate custom_op properly
389                        let args = Self::compile_args(args_value, engine, preserve_structure)?;
390                        return Ok(CompiledNode::CustomOperator {
391                            name: op_name.clone(),
392                            args,
393                        });
394                    }
395                    // Not a built-in operator or custom operator - treat as structured object field
396                    // This allows dynamic object generation like {"name": {"var": "user.name"}}
397                    let compiled_val = Self::compile_node(args_value, engine, preserve_structure)?;
398                    let fields = vec![(op_name.clone(), compiled_val)].into_boxed_slice();
399                    Ok(CompiledNode::StructuredObject { fields })
400                } else {
401                    let args = Self::compile_args(args_value, engine, preserve_structure)?;
402                    // Fall back to custom operator - don't pre-evaluate custom operators
403                    Ok(CompiledNode::CustomOperator {
404                        name: op_name.clone(),
405                        args,
406                    })
407                }
408            }
409            Value::Array(arr) => {
410                // Array of logic expressions
411                let nodes = arr
412                    .iter()
413                    .map(|v| Self::compile_node(v, engine, preserve_structure))
414                    .collect::<Result<Vec<_>>>()?;
415
416                let nodes_boxed = nodes.into_boxed_slice();
417                let node = CompiledNode::Array { nodes: nodes_boxed };
418
419                // If engine is provided and array is static, evaluate it
420                if let std::option::Option::Some(eng) = engine
421                    && Self::node_is_static(&node)
422                {
423                    let mut context = ContextStack::new(Arc::new(Value::Null));
424                    if let Ok(value) = eng.evaluate_node(&node, &mut context) {
425                        return Ok(CompiledNode::Value { value });
426                    }
427                }
428
429                Ok(node)
430            }
431            _ => {
432                // Static value
433                Ok(CompiledNode::Value {
434                    value: value.clone(),
435                })
436            }
437        }
438    }
439
440    /// Compile operator arguments
441    fn compile_args(
442        value: &Value,
443        engine: Option<&DataLogic>,
444        preserve_structure: bool,
445    ) -> Result<Box<[CompiledNode]>> {
446        match value {
447            Value::Array(arr) => arr
448                .iter()
449                .map(|v| Self::compile_node(v, engine, preserve_structure))
450                .collect::<Result<Vec<_>>>()
451                .map(Vec::into_boxed_slice),
452            _ => {
453                // Single argument - compile it
454                Ok(vec![Self::compile_node(value, engine, preserve_structure)?].into_boxed_slice())
455            }
456        }
457    }
458
459    /// Check if this compiled logic is static (can be evaluated without context)
460    pub fn is_static(&self) -> bool {
461        Self::node_is_static(&self.root)
462    }
463
464    fn node_is_static(node: &CompiledNode) -> bool {
465        match node {
466            CompiledNode::Value { .. } => true,
467            CompiledNode::Array { nodes, .. } => nodes.iter().all(Self::node_is_static),
468            CompiledNode::BuiltinOperator { opcode, args, .. } => {
469                Self::opcode_is_static(opcode, args)
470            }
471            CompiledNode::CustomOperator { .. } => false, // Unknown operators are non-static
472            CompiledNode::CompiledVar { .. } | CompiledNode::CompiledExists { .. } => false, // Context-dependent
473            CompiledNode::CompiledSplitRegex { args, .. } => args.iter().all(Self::node_is_static),
474            CompiledNode::CompiledThrow { .. } => false, // Error-producing, non-static
475            CompiledNode::StructuredObject { fields, .. } => {
476                fields.iter().all(|(_, node)| Self::node_is_static(node))
477            }
478        }
479    }
480
481    /// Check if an operator can be statically evaluated at compile time.
482    ///
483    /// Static operators can be pre-computed during compilation when their arguments
484    /// are also static, eliminating runtime evaluation overhead.
485    ///
486    /// # Classification Criteria
487    ///
488    /// An operator is **non-static** (dynamic) if it:
489    /// 1. Reads from the data context (`var`, `val`, `missing`, `exists`)
490    /// 2. Uses iterative callbacks with changing context (`map`, `filter`, `reduce`)
491    /// 3. Has side effects or error handling (`try`, `throw`)
492    /// 4. Depends on runtime state (`now` for current time)
493    /// 5. Needs runtime disambiguation (`preserve`, `merge`, `min`, `max`)
494    ///
495    /// All other operators are **static** when their arguments are static.
496    /// Parse a dot-separated path into pre-parsed segments (for var, which uses dot notation).
497    /// Numeric segments become FieldOrIndex to handle both object keys and array indices.
498    fn parse_path_segments(path: &str) -> Vec<PathSegment> {
499        if path.is_empty() {
500            return Vec::new();
501        }
502        if !path.contains('.') {
503            if let Ok(idx) = path.parse::<usize>() {
504                return vec![PathSegment::FieldOrIndex(path.into(), idx)];
505            }
506            return vec![PathSegment::Field(path.into())];
507        }
508        path.split('.')
509            .map(|part| {
510                if let Ok(idx) = part.parse::<usize>() {
511                    PathSegment::FieldOrIndex(part.into(), idx)
512                } else {
513                    PathSegment::Field(part.into())
514                }
515            })
516            .collect()
517    }
518
519    /// Parse a var path and determine the reduce hint.
520    fn parse_var_path(path: &str) -> (ReduceHint, Vec<PathSegment>) {
521        if path == "current" {
522            (
523                ReduceHint::Current,
524                vec![PathSegment::Field("current".into())],
525            )
526        } else if path == "accumulator" {
527            (
528                ReduceHint::Accumulator,
529                vec![PathSegment::Field("accumulator".into())],
530            )
531        } else if let Some(rest) = path.strip_prefix("current.") {
532            let mut segs = vec![PathSegment::Field("current".into())];
533            segs.extend(Self::parse_path_segments(rest));
534            (ReduceHint::CurrentPath, segs)
535        } else if let Some(rest) = path.strip_prefix("accumulator.") {
536            let mut segs = vec![PathSegment::Field("accumulator".into())];
537            segs.extend(Self::parse_path_segments(rest));
538            (ReduceHint::AccumulatorPath, segs)
539        } else {
540            (ReduceHint::None, Self::parse_path_segments(path))
541        }
542    }
543
544    /// Try to compile a var operator into a CompiledVar node.
545    fn try_compile_var(args: &[CompiledNode]) -> Option<CompiledNode> {
546        if args.is_empty() {
547            return Some(CompiledNode::CompiledVar {
548                scope_level: 0,
549                segments: Box::new([]),
550                reduce_hint: ReduceHint::None,
551                metadata_hint: MetadataHint::None,
552                default_value: None,
553            });
554        }
555
556        let (segments, reduce_hint) = match &args[0] {
557            CompiledNode::Value {
558                value: Value::String(s),
559            } => {
560                let (hint, segs) = Self::parse_var_path(s);
561                (segs, hint)
562            }
563            CompiledNode::Value {
564                value: Value::Number(n),
565            } => {
566                let s = n.to_string();
567                let segs = Self::parse_path_segments(&s);
568                (segs, ReduceHint::None)
569            }
570            _ => return None, // dynamic path
571        };
572
573        let default_value = if args.len() > 1 {
574            Some(Box::new(args[1].clone()))
575        } else {
576            None
577        };
578
579        Some(CompiledNode::CompiledVar {
580            scope_level: 0,
581            segments: segments.into_boxed_slice(),
582            reduce_hint,
583            metadata_hint: MetadataHint::None,
584            default_value,
585        })
586    }
587
588    /// Try to compile a val operator into a CompiledVar node.
589    fn try_compile_val(args: &[CompiledNode]) -> Option<CompiledNode> {
590        if args.is_empty() {
591            return Some(CompiledNode::CompiledVar {
592                scope_level: 0,
593                segments: Box::new([]),
594                reduce_hint: ReduceHint::None,
595                metadata_hint: MetadataHint::None,
596                default_value: None,
597            });
598        }
599
600        // Val does NOT support dot-path notation. Each arg is a literal key/index.
601
602        // Case 2: Single non-empty string → single Field segment (literal key)
603        // Empty string has dual behavior (try key "" then whole-context fallback) — keep as BuiltinOperator.
604        if args.len() == 1 {
605            if let CompiledNode::Value {
606                value: Value::String(s),
607            } = &args[0]
608                && !s.is_empty()
609            {
610                let reduce_hint = if s == "current" {
611                    ReduceHint::Current
612                } else if s == "accumulator" {
613                    ReduceHint::Accumulator
614                } else {
615                    ReduceHint::None
616                };
617                let segment = if let Ok(idx) = s.parse::<usize>() {
618                    PathSegment::FieldOrIndex(s.as_str().into(), idx)
619                } else {
620                    PathSegment::Field(s.as_str().into())
621                };
622                return Some(CompiledNode::CompiledVar {
623                    scope_level: 0,
624                    segments: vec![segment].into_boxed_slice(),
625                    reduce_hint,
626                    metadata_hint: MetadataHint::None,
627                    default_value: None,
628                });
629            }
630            return None;
631        }
632
633        // Case 3: First arg is [[level]] array
634        if let CompiledNode::Value {
635            value: Value::Array(level_arr),
636        } = &args[0]
637            && let Some(Value::Number(level_num)) = level_arr.first()
638            && let Some(level) = level_num.as_i64()
639        {
640            let scope_level = level.unsigned_abs() as u32;
641
642            // Check metadata hints for 2-arg case
643            let mut metadata_hint = MetadataHint::None;
644            if args.len() == 2
645                && let CompiledNode::Value {
646                    value: Value::String(s),
647                } = &args[1]
648            {
649                if s == "index" {
650                    metadata_hint = MetadataHint::Index;
651                } else if s == "key" {
652                    metadata_hint = MetadataHint::Key;
653                }
654            }
655
656            return Self::try_compile_val_segments(&args[1..], scope_level, metadata_hint);
657        }
658
659        // Case 4: 2+ args with all literal path segments — compile as path chain.
660        if let Some(first_seg) = Self::val_arg_to_segment(&args[0]) {
661            let reduce_hint = match &args[0] {
662                CompiledNode::Value {
663                    value: Value::String(s),
664                } if s == "current" => ReduceHint::CurrentPath,
665                CompiledNode::Value {
666                    value: Value::String(s),
667                } if s == "accumulator" => ReduceHint::AccumulatorPath,
668                _ => ReduceHint::None,
669            };
670
671            let mut segments = vec![first_seg];
672            if let Some(compiled) =
673                Self::try_collect_val_segments(&args[1..], &mut segments, reduce_hint)
674            {
675                return Some(compiled);
676            }
677        }
678
679        None
680    }
681
682    /// Convert a val argument into a PathSegment.
683    /// Val treats string args as literal keys (no dot-splitting), and numbers as indices.
684    /// Numeric strings get FieldOrIndex to handle both object key and array index access.
685    fn val_arg_to_segment(arg: &CompiledNode) -> Option<PathSegment> {
686        match arg {
687            CompiledNode::Value {
688                value: Value::String(s),
689            } => {
690                if let Ok(idx) = s.parse::<usize>() {
691                    Some(PathSegment::FieldOrIndex(s.as_str().into(), idx))
692                } else {
693                    Some(PathSegment::Field(s.as_str().into()))
694                }
695            }
696            CompiledNode::Value {
697                value: Value::Number(n),
698            } => n.as_u64().map(|idx| PathSegment::Index(idx as usize)),
699            _ => None,
700        }
701    }
702
703    /// Try to compile val path segments (used by level-access and path-chain cases).
704    fn try_compile_val_segments(
705        args: &[CompiledNode],
706        scope_level: u32,
707        metadata_hint: MetadataHint,
708    ) -> Option<CompiledNode> {
709        let mut segments = Vec::new();
710        for arg in args {
711            segments.push(Self::val_arg_to_segment(arg)?);
712        }
713
714        Some(CompiledNode::CompiledVar {
715            scope_level,
716            segments: segments.into_boxed_slice(),
717            reduce_hint: ReduceHint::None,
718            metadata_hint,
719            default_value: None,
720        })
721    }
722
723    /// Try to collect remaining val args into segments and build a CompiledVar.
724    fn try_collect_val_segments(
725        args: &[CompiledNode],
726        segments: &mut Vec<PathSegment>,
727        reduce_hint: ReduceHint,
728    ) -> Option<CompiledNode> {
729        for arg in args {
730            segments.push(Self::val_arg_to_segment(arg)?);
731        }
732
733        Some(CompiledNode::CompiledVar {
734            scope_level: 0,
735            segments: std::mem::take(segments).into_boxed_slice(),
736            reduce_hint,
737            metadata_hint: MetadataHint::None,
738            default_value: None,
739        })
740    }
741
742    /// Try to compile an exists operator into a CompiledExists node.
743    fn try_compile_exists(args: &[CompiledNode]) -> Option<CompiledNode> {
744        if args.is_empty() {
745            return Some(CompiledNode::CompiledExists {
746                scope_level: 0,
747                segments: Box::new([]),
748            });
749        }
750
751        if args.len() == 1 {
752            if let CompiledNode::Value {
753                value: Value::String(s),
754            } = &args[0]
755            {
756                return Some(CompiledNode::CompiledExists {
757                    scope_level: 0,
758                    segments: vec![PathSegment::Field(s.as_str().into())].into_boxed_slice(),
759                });
760            }
761            return None;
762        }
763
764        // Multiple args - all must be literal strings
765        let mut segments = Vec::new();
766        for arg in args {
767            if let CompiledNode::Value {
768                value: Value::String(s),
769            } = arg
770            {
771                segments.push(PathSegment::Field(s.as_str().into()));
772            } else {
773                return None;
774            }
775        }
776
777        Some(CompiledNode::CompiledExists {
778            scope_level: 0,
779            segments: segments.into_boxed_slice(),
780        })
781    }
782
783    /// Try to pre-compile a split operator's regex pattern at compile time.
784    ///
785    /// When the delimiter (second arg) is a static string containing named capture
786    /// groups (`(?P<...)`), the regex is compiled once here instead of on every evaluation.
787    fn try_compile_split_regex(args: &[CompiledNode]) -> Option<CompiledNode> {
788        if args.len() < 2 {
789            return None;
790        }
791
792        // Check if the delimiter is a static string with named capture groups
793        let pattern = match &args[1] {
794            CompiledNode::Value {
795                value: Value::String(s),
796            } if s.contains("(?P<") => s.as_str(),
797            _ => return None,
798        };
799
800        // Try to compile the regex
801        let re = Regex::new(pattern).ok()?;
802        let capture_names: Vec<Box<str>> = re.capture_names().flatten().map(|n| n.into()).collect();
803
804        // Only optimize if there are named capture groups
805        if capture_names.is_empty() {
806            return None;
807        }
808
809        // Keep only the text argument (first arg)
810        let text_args = vec![args[0].clone()].into_boxed_slice();
811
812        Some(CompiledNode::CompiledSplitRegex {
813            args: text_args,
814            regex: Arc::new(re),
815            capture_names: capture_names.into_boxed_slice(),
816        })
817    }
818
819    fn opcode_is_static(opcode: &OpCode, args: &[CompiledNode]) -> bool {
820        use OpCode::*;
821
822        // Check if all arguments are static first (common pattern)
823        let args_static = || args.iter().all(Self::node_is_static);
824
825        match opcode {
826            // Context-dependent: These operators read from the data context, which is
827            // not available at compile time. They must remain dynamic.
828            Var | Val | Missing | MissingSome | Exists => false,
829
830            // Iteration operators: These push new contexts for each iteration and use
831            // callbacks that may reference the iteration variable. Even with static
832            // arrays, the callback logic depends on the per-element context.
833            Map | Filter | Reduce | All | Some | None => false,
834
835            // Error handling: These have control flow effects (early exit, error propagation)
836            // that should be preserved for runtime execution.
837            Try | Throw => false,
838
839            // Time-dependent: Returns current UTC time, inherently non-static.
840            Now => false,
841
842            // Runtime disambiguation needed:
843            // - Preserve: Must know it was explicitly used as an operator, not inferred
844            // - Merge/Min/Max: Need to distinguish [1,2,3] literal from operator arguments
845            //   at runtime to handle nested arrays correctly
846            Preserve => false,
847            Merge | Min | Max => false,
848
849            // Pure operators: Static when all arguments are static. These perform
850            // deterministic transformations without side effects or context access.
851            Type | StartsWith | EndsWith | Upper | Lower | Trim | Split | Datetime | Timestamp
852            | ParseDate | FormatDate | DateDiff | Abs | Ceil | Floor | Add | Subtract
853            | Multiply | Divide | Modulo | Equals | StrictEquals | NotEquals | StrictNotEquals
854            | GreaterThan | GreaterThanEqual | LessThan | LessThanEqual | Not | DoubleNot | And
855            | Or | Ternary | If | Cat | Substr | In | Length | Sort | Slice | Coalesce | Switch => {
856                args_static()
857            }
858        }
859    }
860}