Skip to main content

datalogic_rs/compile/
mod.rs

1pub mod optimize;
2
3use crate::node::{
4    CompiledLogic, CompiledNode, MetadataHint, PathSegment, ReduceHint, node_is_static,
5};
6use crate::opcode::OpCode;
7use crate::{ContextStack, DataLogic, Result};
8use regex::Regex;
9use serde_json::{Value, json};
10use std::sync::Arc;
11
12impl CompiledLogic {
13    /// Compiles a JSON value into a compiled logic structure.
14    ///
15    /// This method performs basic compilation without static evaluation.
16    /// For optimal performance, use `compile_with_static_eval` instead.
17    ///
18    /// # Arguments
19    ///
20    /// * `logic` - The JSON logic expression to compile
21    ///
22    /// # Returns
23    ///
24    /// A compiled logic structure, or an error if compilation fails.
25    pub fn compile(logic: &Value) -> Result<Self> {
26        let root = Self::compile_node(logic, None, false)?;
27        Ok(Self::new(root))
28    }
29
30    /// Compiles for tracing without static evaluation.
31    ///
32    /// This method compiles the logic without performing static evaluation,
33    /// ensuring that all operators remain in the tree for step-by-step debugging.
34    /// Use this when you need to trace execution through operators that would
35    /// otherwise be pre-evaluated at compile time.
36    ///
37    /// # Arguments
38    ///
39    /// * `logic` - The JSON logic expression to compile
40    /// * `preserve_structure` - Whether to preserve unknown object structure
41    ///
42    /// # Returns
43    ///
44    /// A compiled logic structure without static optimizations.
45    pub fn compile_for_trace(logic: &Value, preserve_structure: bool) -> Result<Self> {
46        let root = Self::compile_node(logic, None, preserve_structure)?;
47        Ok(Self::new(root))
48    }
49
50    /// Compiles with static evaluation using the provided engine.
51    ///
52    /// This method performs optimizations including:
53    /// - Static evaluation of constant expressions
54    /// - OpCode assignment for built-in operators
55    /// - Structure preservation based on engine settings
56    ///
57    /// # Arguments
58    ///
59    /// * `logic` - The JSON logic expression to compile
60    /// * `engine` - The DataLogic engine for static evaluation
61    ///
62    /// # Returns
63    ///
64    /// An optimized compiled logic structure, or an error if compilation fails.
65    pub fn compile_with_static_eval(logic: &Value, engine: &DataLogic) -> Result<Self> {
66        let root = Self::compile_node(logic, Some(engine), engine.preserve_structure())?;
67        Ok(Self::new(root))
68    }
69
70    /// Compiles a single JSON value into a CompiledNode.
71    ///
72    /// This recursive method handles all node types:
73    /// - Objects with operators
74    /// - Arrays
75    /// - Primitive values
76    /// - Structured objects (in preserve mode)
77    ///
78    /// # Arguments
79    ///
80    /// * `value` - The JSON value to compile
81    /// * `engine` - Optional engine for static evaluation
82    /// * `preserve_structure` - Whether to preserve unknown object structure
83    ///
84    /// # Returns
85    ///
86    /// A compiled node, or an error if the value is invalid.
87    fn compile_node(
88        value: &Value,
89        engine: Option<&DataLogic>,
90        preserve_structure: bool,
91    ) -> Result<CompiledNode> {
92        match value {
93            Value::Object(obj) if obj.len() > 1 => {
94                if preserve_structure {
95                    // In preserve_structure mode, treat multi-key objects as structured objects
96                    // We'll create a special StructuredObject node that gets evaluated field by field
97                    let fields: Vec<_> = obj
98                        .iter()
99                        .map(|(key, val)| {
100                            Self::compile_node(val, engine, preserve_structure)
101                                .map(|compiled_val| (key.clone(), compiled_val))
102                        })
103                        .collect::<Result<Vec<_>>>()?;
104                    Ok(CompiledNode::StructuredObject(Box::new(
105                        crate::node::StructuredObjectData {
106                            fields: fields.into_boxed_slice(),
107                        },
108                    )))
109                } else {
110                    // Multi-key objects are not valid operators
111                    Err(crate::error::Error::InvalidOperator(
112                        "Unknown Operator".to_string(),
113                    ))
114                }
115            }
116            Value::Object(obj) if obj.len() == 1 => {
117                // Single key object is an operator
118                let (op_name, args_value) = obj.iter().next().unwrap();
119
120                // Try to parse as built-in operator first
121                if let Ok(opcode) = op_name.parse::<OpCode>() {
122                    // Check if this operator requires array arguments
123                    let requires_array = matches!(opcode, OpCode::And | OpCode::Or | OpCode::If);
124
125                    // For operators that require arrays, check the raw value
126                    if requires_array && !matches!(args_value, Value::Array(_)) {
127                        // Create a special marker node for invalid arguments
128                        let invalid_value = json!({
129                            "__invalid_args__": true,
130                            "value": args_value
131                        });
132                        let value_node = CompiledNode::Value {
133                            value: invalid_value,
134                        };
135                        let args = vec![value_node].into_boxed_slice();
136                        return Ok(CompiledNode::BuiltinOperator { opcode, args });
137                    }
138
139                    // Special handling for preserve operator - don't compile its arguments
140                    let args = if opcode == OpCode::Preserve {
141                        // Preserve takes raw values, not compiled logic
142                        match args_value {
143                            Value::Array(arr) => arr
144                                .iter()
145                                .map(|v| CompiledNode::Value { value: v.clone() })
146                                .collect::<Vec<_>>()
147                                .into_boxed_slice(),
148                            _ => vec![CompiledNode::Value {
149                                value: args_value.clone(),
150                            }]
151                            .into_boxed_slice(),
152                        }
153                    } else {
154                        Self::compile_args(args_value, engine, preserve_structure)?
155                    };
156                    // Try to optimize variable access operators at compile time
157                    if matches!(opcode, OpCode::Var | OpCode::Val | OpCode::Exists) {
158                        let optimized = match opcode {
159                            OpCode::Var => Self::try_compile_var(&args),
160                            OpCode::Val => Self::try_compile_val(&args),
161                            OpCode::Exists => Self::try_compile_exists(&args),
162                            _ => None,
163                        };
164                        if let Some(node) = optimized {
165                            return Ok(node);
166                        }
167                    }
168
169                    // Pre-compile regex for split operator when delimiter is a static pattern
170                    if opcode == OpCode::Split
171                        && let Some(node) = Self::try_compile_split_regex(&args)
172                    {
173                        return Ok(node);
174                    }
175
176                    // Pre-compile throw with literal string into CompiledThrow
177                    if opcode == OpCode::Throw
178                        && args.len() == 1
179                        && let CompiledNode::Value {
180                            value: Value::String(s),
181                        } = &args[0]
182                    {
183                        return Ok(CompiledNode::CompiledThrow(Box::new(
184                            serde_json::json!({"type": s}),
185                        )));
186                    }
187
188                    let mut node = CompiledNode::BuiltinOperator { opcode, args };
189
190                    // Run optimization passes when engine is available
191                    if let std::option::Option::Some(eng) = engine {
192                        node = optimize::optimize(node, eng);
193                    }
194
195                    // If engine is provided and node is static, evaluate it
196                    if let std::option::Option::Some(eng) = engine
197                        && node_is_static(&node)
198                    {
199                        // Evaluate with empty context since it's static
200                        let mut context = ContextStack::new(Arc::new(Value::Null));
201                        match eng.evaluate_node(&node, &mut context) {
202                            Ok(value) => {
203                                return Ok(CompiledNode::Value { value });
204                            }
205                            // If evaluation fails, keep as operator node
206                            Err(_) => return Ok(node),
207                        }
208                    }
209
210                    Ok(node)
211                } else if preserve_structure {
212                    // In preserve_structure mode, we need to distinguish between:
213                    // 1. Custom operators (should be evaluated as operators)
214                    // 2. Unknown keys (should be preserved as structured object fields)
215                    //
216                    // Check if this is a custom operator first
217                    if let Some(eng) = engine
218                        && eng.has_custom_operator(op_name)
219                    {
220                        // It's a registered custom operator - compile as CustomOperator
221                        // This ensures custom operators work correctly in preserve_structure mode,
222                        // e.g., {"result": {"custom_op": arg}} will evaluate custom_op properly
223                        let args = Self::compile_args(args_value, engine, preserve_structure)?;
224                        return Ok(CompiledNode::CustomOperator(Box::new(
225                            crate::node::CustomOperatorData {
226                                name: op_name.clone(),
227                                args,
228                            },
229                        )));
230                    }
231                    // Not a built-in operator or custom operator - treat as structured object field
232                    // This allows dynamic object generation like {"name": {"var": "user.name"}}
233                    let compiled_val = Self::compile_node(args_value, engine, preserve_structure)?;
234                    let fields = vec![(op_name.clone(), compiled_val)].into_boxed_slice();
235                    Ok(CompiledNode::StructuredObject(Box::new(
236                        crate::node::StructuredObjectData { fields },
237                    )))
238                } else {
239                    let args = Self::compile_args(args_value, engine, preserve_structure)?;
240                    // Fall back to custom operator - don't pre-evaluate custom operators
241                    Ok(CompiledNode::CustomOperator(Box::new(
242                        crate::node::CustomOperatorData {
243                            name: op_name.clone(),
244                            args,
245                        },
246                    )))
247                }
248            }
249            Value::Array(arr) => {
250                // Array of logic expressions
251                let nodes = arr
252                    .iter()
253                    .map(|v| Self::compile_node(v, engine, preserve_structure))
254                    .collect::<Result<Vec<_>>>()?;
255
256                let nodes_boxed = nodes.into_boxed_slice();
257                let node = CompiledNode::Array { nodes: nodes_boxed };
258
259                // If engine is provided and array is static, evaluate it
260                if let std::option::Option::Some(eng) = engine
261                    && node_is_static(&node)
262                {
263                    let mut context = ContextStack::new(Arc::new(Value::Null));
264                    if let Ok(value) = eng.evaluate_node(&node, &mut context) {
265                        return Ok(CompiledNode::Value { value });
266                    }
267                }
268
269                Ok(node)
270            }
271            _ => {
272                // Static value
273                Ok(CompiledNode::Value {
274                    value: value.clone(),
275                })
276            }
277        }
278    }
279
280    /// Compile operator arguments
281    fn compile_args(
282        value: &Value,
283        engine: Option<&DataLogic>,
284        preserve_structure: bool,
285    ) -> Result<Box<[CompiledNode]>> {
286        match value {
287            Value::Array(arr) => arr
288                .iter()
289                .map(|v| Self::compile_node(v, engine, preserve_structure))
290                .collect::<Result<Vec<_>>>()
291                .map(Vec::into_boxed_slice),
292            _ => {
293                // Single argument - compile it
294                Ok(vec![Self::compile_node(value, engine, preserve_structure)?].into_boxed_slice())
295            }
296        }
297    }
298
299    /// Parse a dot-separated path into pre-parsed segments (for var, which uses dot notation).
300    /// Numeric segments become FieldOrIndex to handle both object keys and array indices.
301    fn parse_path_segments(path: &str) -> Vec<PathSegment> {
302        if path.is_empty() {
303            return Vec::new();
304        }
305        if !path.contains('.') {
306            if let Ok(idx) = path.parse::<usize>() {
307                return vec![PathSegment::FieldOrIndex(path.into(), idx)];
308            }
309            return vec![PathSegment::Field(path.into())];
310        }
311        path.split('.')
312            .map(|part| {
313                if let Ok(idx) = part.parse::<usize>() {
314                    PathSegment::FieldOrIndex(part.into(), idx)
315                } else {
316                    PathSegment::Field(part.into())
317                }
318            })
319            .collect()
320    }
321
322    /// Parse a var path and determine the reduce hint.
323    fn parse_var_path(path: &str) -> (ReduceHint, Vec<PathSegment>) {
324        if path == "current" {
325            (
326                ReduceHint::Current,
327                vec![PathSegment::Field("current".into())],
328            )
329        } else if path == "accumulator" {
330            (
331                ReduceHint::Accumulator,
332                vec![PathSegment::Field("accumulator".into())],
333            )
334        } else if let Some(rest) = path.strip_prefix("current.") {
335            let mut segs = vec![PathSegment::Field("current".into())];
336            segs.extend(Self::parse_path_segments(rest));
337            (ReduceHint::CurrentPath, segs)
338        } else if let Some(rest) = path.strip_prefix("accumulator.") {
339            let mut segs = vec![PathSegment::Field("accumulator".into())];
340            segs.extend(Self::parse_path_segments(rest));
341            (ReduceHint::AccumulatorPath, segs)
342        } else {
343            (ReduceHint::None, Self::parse_path_segments(path))
344        }
345    }
346
347    /// Try to compile a var operator into a CompiledVar node.
348    fn try_compile_var(args: &[CompiledNode]) -> Option<CompiledNode> {
349        if args.is_empty() {
350            return Some(CompiledNode::CompiledVar {
351                scope_level: 0,
352                segments: Box::new([]),
353                reduce_hint: ReduceHint::None,
354                metadata_hint: MetadataHint::None,
355                default_value: None,
356            });
357        }
358
359        let (segments, reduce_hint) = match &args[0] {
360            CompiledNode::Value {
361                value: Value::String(s),
362            } => {
363                let (hint, segs) = Self::parse_var_path(s);
364                (segs, hint)
365            }
366            CompiledNode::Value {
367                value: Value::Number(n),
368            } => {
369                let s = n.to_string();
370                let segs = Self::parse_path_segments(&s);
371                (segs, ReduceHint::None)
372            }
373            _ => return None, // dynamic path
374        };
375
376        let default_value = if args.len() > 1 {
377            Some(Box::new(args[1].clone()))
378        } else {
379            None
380        };
381
382        Some(CompiledNode::CompiledVar {
383            scope_level: 0,
384            segments: segments.into_boxed_slice(),
385            reduce_hint,
386            metadata_hint: MetadataHint::None,
387            default_value,
388        })
389    }
390
391    /// Try to compile a val operator into a CompiledVar node.
392    fn try_compile_val(args: &[CompiledNode]) -> Option<CompiledNode> {
393        if args.is_empty() {
394            return Some(CompiledNode::CompiledVar {
395                scope_level: 0,
396                segments: Box::new([]),
397                reduce_hint: ReduceHint::None,
398                metadata_hint: MetadataHint::None,
399                default_value: None,
400            });
401        }
402
403        // Val does NOT support dot-path notation. Each arg is a literal key/index.
404
405        // Case 2: Single non-empty string → single Field segment (literal key)
406        // Empty string has dual behavior (try key "" then whole-context fallback) — keep as BuiltinOperator.
407        if args.len() == 1 {
408            if let CompiledNode::Value {
409                value: Value::String(s),
410            } = &args[0]
411                && !s.is_empty()
412            {
413                let reduce_hint = if s == "current" {
414                    ReduceHint::Current
415                } else if s == "accumulator" {
416                    ReduceHint::Accumulator
417                } else {
418                    ReduceHint::None
419                };
420                let segment = if let Ok(idx) = s.parse::<usize>() {
421                    PathSegment::FieldOrIndex(s.as_str().into(), idx)
422                } else {
423                    PathSegment::Field(s.as_str().into())
424                };
425                return Some(CompiledNode::CompiledVar {
426                    scope_level: 0,
427                    segments: vec![segment].into_boxed_slice(),
428                    reduce_hint,
429                    metadata_hint: MetadataHint::None,
430                    default_value: None,
431                });
432            }
433            return None;
434        }
435
436        // Case 3: First arg is [[level]] array
437        if let CompiledNode::Value {
438            value: Value::Array(level_arr),
439        } = &args[0]
440            && let Some(Value::Number(level_num)) = level_arr.first()
441            && let Some(level) = level_num.as_i64()
442        {
443            let scope_level = level.unsigned_abs() as u32;
444
445            // Check metadata hints for 2-arg case
446            let mut metadata_hint = MetadataHint::None;
447            if args.len() == 2
448                && let CompiledNode::Value {
449                    value: Value::String(s),
450                } = &args[1]
451            {
452                if s == "index" {
453                    metadata_hint = MetadataHint::Index;
454                } else if s == "key" {
455                    metadata_hint = MetadataHint::Key;
456                }
457            }
458
459            return Self::try_compile_val_segments(&args[1..], scope_level, metadata_hint);
460        }
461
462        // Case 4: 2+ args with all literal path segments — compile as path chain.
463        if let Some(first_seg) = Self::val_arg_to_segment(&args[0]) {
464            let reduce_hint = match &args[0] {
465                CompiledNode::Value {
466                    value: Value::String(s),
467                } if s == "current" => ReduceHint::CurrentPath,
468                CompiledNode::Value {
469                    value: Value::String(s),
470                } if s == "accumulator" => ReduceHint::AccumulatorPath,
471                _ => ReduceHint::None,
472            };
473
474            let mut segments = vec![first_seg];
475            if let Some(compiled) =
476                Self::try_collect_val_segments(&args[1..], &mut segments, reduce_hint)
477            {
478                return Some(compiled);
479            }
480        }
481
482        None
483    }
484
485    /// Convert a val argument into a PathSegment.
486    /// Val treats string args as literal keys (no dot-splitting), and numbers as indices.
487    /// Numeric strings get FieldOrIndex to handle both object key and array index access.
488    fn val_arg_to_segment(arg: &CompiledNode) -> Option<PathSegment> {
489        match arg {
490            CompiledNode::Value {
491                value: Value::String(s),
492            } => {
493                if let Ok(idx) = s.parse::<usize>() {
494                    Some(PathSegment::FieldOrIndex(s.as_str().into(), idx))
495                } else {
496                    Some(PathSegment::Field(s.as_str().into()))
497                }
498            }
499            CompiledNode::Value {
500                value: Value::Number(n),
501            } => n.as_u64().map(|idx| PathSegment::Index(idx as usize)),
502            _ => None,
503        }
504    }
505
506    /// Try to compile val path segments (used by level-access and path-chain cases).
507    fn try_compile_val_segments(
508        args: &[CompiledNode],
509        scope_level: u32,
510        metadata_hint: MetadataHint,
511    ) -> Option<CompiledNode> {
512        let mut segments = Vec::new();
513        for arg in args {
514            segments.push(Self::val_arg_to_segment(arg)?);
515        }
516
517        Some(CompiledNode::CompiledVar {
518            scope_level,
519            segments: segments.into_boxed_slice(),
520            reduce_hint: ReduceHint::None,
521            metadata_hint,
522            default_value: None,
523        })
524    }
525
526    /// Try to collect remaining val args into segments and build a CompiledVar.
527    fn try_collect_val_segments(
528        args: &[CompiledNode],
529        segments: &mut Vec<PathSegment>,
530        reduce_hint: ReduceHint,
531    ) -> Option<CompiledNode> {
532        for arg in args {
533            segments.push(Self::val_arg_to_segment(arg)?);
534        }
535
536        Some(CompiledNode::CompiledVar {
537            scope_level: 0,
538            segments: std::mem::take(segments).into_boxed_slice(),
539            reduce_hint,
540            metadata_hint: MetadataHint::None,
541            default_value: None,
542        })
543    }
544
545    /// Try to compile an exists operator into a CompiledExists node.
546    fn try_compile_exists(args: &[CompiledNode]) -> Option<CompiledNode> {
547        if args.is_empty() {
548            return Some(CompiledNode::CompiledExists(Box::new(
549                crate::node::CompiledExistsData {
550                    scope_level: 0,
551                    segments: Box::new([]),
552                },
553            )));
554        }
555
556        if args.len() == 1 {
557            if let CompiledNode::Value {
558                value: Value::String(s),
559            } = &args[0]
560            {
561                return Some(CompiledNode::CompiledExists(Box::new(
562                    crate::node::CompiledExistsData {
563                        scope_level: 0,
564                        segments: vec![PathSegment::Field(s.as_str().into())].into_boxed_slice(),
565                    },
566                )));
567            }
568            return None;
569        }
570
571        // Multiple args - all must be literal strings
572        let mut segments = Vec::new();
573        for arg in args {
574            if let CompiledNode::Value {
575                value: Value::String(s),
576            } = arg
577            {
578                segments.push(PathSegment::Field(s.as_str().into()));
579            } else {
580                return None;
581            }
582        }
583
584        Some(CompiledNode::CompiledExists(Box::new(
585            crate::node::CompiledExistsData {
586                scope_level: 0,
587                segments: segments.into_boxed_slice(),
588            },
589        )))
590    }
591
592    /// Try to pre-compile a split operator's regex pattern at compile time.
593    ///
594    /// When the delimiter (second arg) is a static string containing named capture
595    /// groups (`(?P<...>`), the regex is compiled once here instead of on every evaluation.
596    fn try_compile_split_regex(args: &[CompiledNode]) -> Option<CompiledNode> {
597        if args.len() < 2 {
598            return None;
599        }
600
601        // Check if the delimiter is a static string with named capture groups
602        let pattern = match &args[1] {
603            CompiledNode::Value {
604                value: Value::String(s),
605            } if s.contains("(?P<") => s.as_str(),
606            _ => return None,
607        };
608
609        // Try to compile the regex
610        let re = Regex::new(pattern).ok()?;
611        let capture_names: Vec<Box<str>> = re.capture_names().flatten().map(|n| n.into()).collect();
612
613        // Only optimize if there are named capture groups
614        if capture_names.is_empty() {
615            return None;
616        }
617
618        // Keep only the text argument (first arg)
619        let text_args = vec![args[0].clone()].into_boxed_slice();
620
621        Some(CompiledNode::CompiledSplitRegex(Box::new(
622            crate::node::CompiledSplitRegexData {
623                args: text_args,
624                regex: Arc::new(re),
625                capture_names: capture_names.into_boxed_slice(),
626            },
627        )))
628    }
629}