Skip to main content

datalogic_rs/
node.rs

1use crate::opcode::OpCode;
2use regex::Regex;
3use serde_json::Value;
4use std::sync::Arc;
5
6/// A pre-parsed path segment for compiled variable access.
7#[derive(Debug, Clone)]
8pub enum PathSegment {
9    /// Object field access by key
10    Field(Box<str>),
11    /// Array element access by index
12    Index(usize),
13    /// Try as object key first, then as array index (for segments that could be either).
14    /// Pre-parses the index at compile time to avoid runtime parsing.
15    FieldOrIndex(Box<str>, usize),
16}
17
18/// Hint for reduce context resolution, detected at compile time.
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20pub enum ReduceHint {
21    /// Normal path access (no reduce context)
22    None,
23    /// Path is exactly "current" — return reduce_current directly
24    Current,
25    /// Path is exactly "accumulator" — return reduce_accumulator directly
26    Accumulator,
27    /// Path starts with "current." — segments[0] is "current", use segments[1..] from reduce_current
28    CurrentPath,
29    /// Path starts with "accumulator." — segments[0] is "accumulator", use segments[1..] from reduce_accumulator
30    AccumulatorPath,
31}
32
33/// Hint for metadata access (index/key), detected at compile time.
34#[derive(Debug, Clone, Copy, PartialEq, Eq)]
35pub enum MetadataHint {
36    /// Normal data access
37    None,
38    /// Access frame index metadata
39    Index,
40    /// Access frame key metadata
41    Key,
42}
43
44/// Data for a custom operator (boxed inside CompiledNode to reduce enum size).
45#[derive(Debug, Clone)]
46pub struct CustomOperatorData {
47    pub name: String,
48    pub args: Box<[CompiledNode]>,
49}
50
51/// Data for a structured object template (boxed inside CompiledNode to reduce enum size).
52#[derive(Debug, Clone)]
53pub struct StructuredObjectData {
54    pub fields: Box<[(String, CompiledNode)]>,
55}
56
57/// Data for a pre-compiled exists check (boxed inside CompiledNode to reduce enum size).
58#[derive(Debug, Clone)]
59pub struct CompiledExistsData {
60    pub scope_level: u32,
61    pub segments: Box<[PathSegment]>,
62}
63
64/// Data for a pre-compiled split with regex (boxed inside CompiledNode to reduce enum size).
65#[derive(Debug, Clone)]
66pub struct CompiledSplitRegexData {
67    pub args: Box<[CompiledNode]>,
68    pub regex: Arc<Regex>,
69    pub capture_names: Box<[Box<str>]>,
70}
71
72/// A compiled node representing a single operation or value in the logic tree.
73///
74/// Nodes are created during the compilation phase and evaluated during execution.
75/// Each node type is optimized for its specific purpose:
76///
77/// - **Value**: Static JSON values that don't require evaluation
78/// - **Array**: Collections of nodes evaluated sequentially
79/// - **BuiltinOperator**: Fast OpCode-based dispatch for built-in operators
80/// - **CustomOperator**: User-defined operators with dynamic dispatch
81/// - **StructuredObject**: Template objects for structure preservation
82#[derive(Debug, Clone)]
83pub enum CompiledNode {
84    /// A static JSON value that requires no evaluation.
85    ///
86    /// Used for literals like numbers, strings, booleans, and null.
87    Value { value: Value },
88
89    /// An array of compiled nodes.
90    ///
91    /// Each node is evaluated in sequence, and the results are collected into a JSON array.
92    /// Uses `Box<[CompiledNode]>` for memory efficiency.
93    Array { nodes: Box<[CompiledNode]> },
94
95    /// A built-in operator optimized with OpCode dispatch.
96    ///
97    /// The OpCode enum enables direct dispatch without string lookups,
98    /// significantly improving performance for the 50+ built-in operators.
99    BuiltinOperator {
100        opcode: OpCode,
101        args: Box<[CompiledNode]>,
102    },
103
104    /// A custom operator registered via `DataLogic::add_operator`.
105    /// Boxed to reduce enum size (rare variant).
106    CustomOperator(Box<CustomOperatorData>),
107
108    /// A structured object template for preserve_structure mode.
109    /// Boxed to reduce enum size (rare variant).
110    StructuredObject(Box<StructuredObjectData>),
111
112    /// A pre-compiled variable access (unified var/val).
113    ///
114    /// scope_level 0 = current context (var-style), N = go up N levels (val with [[N], ...]).
115    /// Segments are pre-parsed at compile time to avoid runtime string splitting.
116    CompiledVar {
117        scope_level: u32,
118        segments: Box<[PathSegment]>,
119        reduce_hint: ReduceHint,
120        metadata_hint: MetadataHint,
121        default_value: Option<Box<CompiledNode>>,
122    },
123
124    /// A pre-compiled exists check.
125    /// Boxed to reduce enum size (rare variant).
126    CompiledExists(Box<CompiledExistsData>),
127
128    /// A pre-compiled split with regex pattern.
129    /// Boxed to reduce enum size (rare variant).
130    CompiledSplitRegex(Box<CompiledSplitRegexData>),
131
132    /// A pre-compiled throw with a static error object.
133    /// Boxed to reduce enum size (rare variant).
134    CompiledThrow(Box<Value>),
135}
136
137/// Compiled logic that can be evaluated multiple times across different data.
138///
139/// `CompiledLogic` represents a pre-processed JSONLogic expression that has been
140/// optimized for repeated evaluation. It's thread-safe and can be shared across
141/// threads using `Arc`.
142///
143/// # Performance Benefits
144///
145/// - **Parse once, evaluate many**: Avoid repeated JSON parsing
146/// - **Static evaluation**: Constant expressions are pre-computed
147/// - **OpCode dispatch**: Built-in operators use fast enum dispatch
148/// - **Thread-safe sharing**: Use `Arc` to share across threads
149///
150/// # Example
151///
152/// ```rust
153/// use datalogic_rs::DataLogic;
154/// use serde_json::json;
155/// use std::sync::Arc;
156///
157/// let engine = DataLogic::new();
158/// let logic = json!({">": [{"var": "score"}, 90]});
159/// let compiled = engine.compile(&logic).unwrap(); // Returns Arc<CompiledLogic>
160///
161/// // Can be shared across threads
162/// let compiled_clone = Arc::clone(&compiled);
163/// std::thread::spawn(move || {
164///     let data = json!({"score": 95});
165///     let result = engine.evaluate_owned(&compiled_clone, data);
166/// });
167/// ```
168#[derive(Debug, Clone)]
169pub struct CompiledLogic {
170    /// The root node of the compiled logic tree
171    pub root: CompiledNode,
172}
173
174impl CompiledLogic {
175    /// Creates a new compiled logic from a root node.
176    ///
177    /// # Arguments
178    ///
179    /// * `root` - The root node of the compiled logic tree
180    pub fn new(root: CompiledNode) -> Self {
181        Self { root }
182    }
183
184    /// Check if this compiled logic is static (can be evaluated without context)
185    pub fn is_static(&self) -> bool {
186        node_is_static(&self.root)
187    }
188}
189
190/// Check if a compiled node is static (can be evaluated without runtime context).
191pub(crate) fn node_is_static(node: &CompiledNode) -> bool {
192    match node {
193        CompiledNode::Value { .. } => true,
194        CompiledNode::Array { nodes, .. } => nodes.iter().all(node_is_static),
195        CompiledNode::BuiltinOperator { opcode, args, .. } => opcode_is_static(opcode, args),
196        CompiledNode::CustomOperator(_) => false,
197        CompiledNode::CompiledVar { .. } | CompiledNode::CompiledExists(_) => false,
198        CompiledNode::CompiledSplitRegex(data) => data.args.iter().all(node_is_static),
199        CompiledNode::CompiledThrow(_) => false,
200        CompiledNode::StructuredObject(data) => {
201            data.fields.iter().all(|(_, node)| node_is_static(node))
202        }
203    }
204}
205
206/// Check if an operator can be statically evaluated at compile time.
207///
208/// Static operators can be pre-computed during compilation when their arguments
209/// are also static, eliminating runtime evaluation overhead.
210///
211/// # Classification Criteria
212///
213/// An operator is **non-static** (dynamic) if it:
214/// 1. Reads from the data context (`var`, `val`, `missing`, `exists`)
215/// 2. Uses iterative callbacks with changing context (`map`, `filter`, `reduce`)
216/// 3. Has side effects or error handling (`try`, `throw`)
217/// 4. Depends on runtime state (`now` for current time)
218/// 5. Needs runtime disambiguation (`preserve`, `merge`, `min`, `max`)
219///
220/// All other operators are **static** when their arguments are static.
221pub(crate) fn opcode_is_static(opcode: &OpCode, args: &[CompiledNode]) -> bool {
222    use OpCode::*;
223
224    // Check if all arguments are static first (common pattern)
225    let args_static = || args.iter().all(node_is_static);
226
227    match opcode {
228        // Context-dependent: These operators read from the data context, which is
229        // not available at compile time. They must remain dynamic.
230        Var | Val | Missing | MissingSome | Exists => false,
231
232        // Iteration operators: These push new contexts for each iteration and use
233        // callbacks that may reference the iteration variable. Even with static
234        // arrays, the callback logic depends on the per-element context.
235        Map | Filter | Reduce | All | Some | None => false,
236
237        // Error handling: These have control flow effects (early exit, error propagation)
238        // that should be preserved for runtime execution.
239        Try | Throw => false,
240
241        // Time-dependent: Returns current UTC time, inherently non-static.
242        Now => false,
243
244        // Runtime disambiguation needed:
245        // - Preserve: Must know it was explicitly used as an operator, not inferred
246        // - Merge/Min/Max: Need to distinguish [1,2,3] literal from operator arguments
247        //   at runtime to handle nested arrays correctly
248        Preserve => false,
249        Merge | Min | Max => false,
250
251        // Pure operators: Static when all arguments are static. These perform
252        // deterministic transformations without side effects or context access.
253        Type | StartsWith | EndsWith | Upper | Lower | Trim | Split | Datetime | Timestamp
254        | ParseDate | FormatDate | DateDiff | Abs | Ceil | Floor | Add | Subtract | Multiply
255        | Divide | Modulo | Equals | StrictEquals | NotEquals | StrictNotEquals | GreaterThan
256        | GreaterThanEqual | LessThan | LessThanEqual | Not | DoubleNot | And | Or | Ternary
257        | If | Cat | Substr | In | Length | Sort | Slice | Coalesce | Switch => args_static(),
258    }
259}
260
261/// Convert path segments back to a dot-separated path string.
262pub(crate) fn segments_to_dot_path(segments: &[PathSegment]) -> String {
263    segments
264        .iter()
265        .map(|seg| match seg {
266            PathSegment::Field(s) | PathSegment::FieldOrIndex(s, _) => s.to_string(),
267            PathSegment::Index(i) => i.to_string(),
268        })
269        .collect::<Vec<_>>()
270        .join(".")
271}
272
273/// Convert a path segment to a JSON value.
274pub(crate) fn segment_to_value(seg: &PathSegment) -> Value {
275    match seg {
276        PathSegment::Field(s) | PathSegment::FieldOrIndex(s, _) => Value::String(s.to_string()),
277        PathSegment::Index(i) => Value::Number((*i as u64).into()),
278    }
279}
280
281/// Convert a compiled node back to a JSON value (for custom operators).
282pub(crate) fn node_to_value(node: &CompiledNode) -> Value {
283    match node {
284        CompiledNode::Value { value, .. } => value.clone(),
285        CompiledNode::Array { nodes, .. } => {
286            Value::Array(nodes.iter().map(node_to_value).collect())
287        }
288        CompiledNode::BuiltinOperator { opcode, args, .. } => {
289            let mut obj = serde_json::Map::new();
290            let args_value = if args.len() == 1 {
291                node_to_value(&args[0])
292            } else {
293                Value::Array(args.iter().map(node_to_value).collect())
294            };
295            obj.insert(opcode.as_str().into(), args_value);
296            Value::Object(obj)
297        }
298        CompiledNode::CustomOperator(data) => {
299            let mut obj = serde_json::Map::new();
300            let args_value = if data.args.len() == 1 {
301                node_to_value(&data.args[0])
302            } else {
303                Value::Array(data.args.iter().map(node_to_value).collect())
304            };
305            obj.insert(data.name.clone(), args_value);
306            Value::Object(obj)
307        }
308        CompiledNode::StructuredObject(data) => {
309            let mut obj = serde_json::Map::new();
310            for (key, node) in data.fields.iter() {
311                obj.insert(key.clone(), node_to_value(node));
312            }
313            Value::Object(obj)
314        }
315        CompiledNode::CompiledVar {
316            scope_level,
317            segments,
318            default_value,
319            ..
320        } => {
321            let mut obj = serde_json::Map::new();
322            if *scope_level == 0 {
323                // Reconstruct as var
324                let path = segments_to_dot_path(segments);
325                match default_value {
326                    Some(def) => {
327                        obj.insert(
328                            "var".into(),
329                            Value::Array(vec![Value::String(path), node_to_value(def)]),
330                        );
331                    }
332                    None => {
333                        obj.insert("var".into(), Value::String(path));
334                    }
335                }
336            } else {
337                // Reconstruct as val with level
338                let mut arr: Vec<Value> = vec![Value::Array(vec![Value::Number(
339                    (*scope_level as u64).into(),
340                )])];
341                for seg in segments.iter() {
342                    arr.push(segment_to_value(seg));
343                }
344                obj.insert("val".into(), Value::Array(arr));
345            }
346            Value::Object(obj)
347        }
348        CompiledNode::CompiledExists(data) => {
349            let mut obj = serde_json::Map::new();
350            if data.segments.len() == 1 {
351                obj.insert("exists".into(), segment_to_value(&data.segments[0]));
352            } else {
353                let arr: Vec<Value> = data.segments.iter().map(segment_to_value).collect();
354                obj.insert("exists".into(), Value::Array(arr));
355            }
356            Value::Object(obj)
357        }
358        CompiledNode::CompiledSplitRegex(data) => {
359            let mut obj = serde_json::Map::new();
360            let mut arr = vec![node_to_value(&data.args[0])];
361            arr.push(Value::String(data.regex.as_str().to_string()));
362            obj.insert("split".into(), Value::Array(arr));
363            Value::Object(obj)
364        }
365        CompiledNode::CompiledThrow(error_obj) => {
366            let mut obj = serde_json::Map::new();
367            if let Value::Object(err_map) = error_obj.as_ref() {
368                if let Some(Value::String(s)) = err_map.get("type") {
369                    obj.insert("throw".into(), Value::String(s.clone()));
370                } else {
371                    obj.insert("throw".into(), error_obj.as_ref().clone());
372                }
373            } else {
374                obj.insert("throw".into(), error_obj.as_ref().clone());
375            }
376            Value::Object(obj)
377        }
378    }
379}