datalogic_rs/compiled.rs
1use crate::{ContextStack, DataLogic, Result, opcode::OpCode};
2use serde_json::{Value, json};
3use std::sync::Arc;
4
5/// A compiled node representing a single operation or value in the logic tree.
6///
7/// Nodes are created during the compilation phase and evaluated during execution.
8/// Each node type is optimized for its specific purpose:
9///
10/// - **Value**: Static JSON values that don't require evaluation
11/// - **Array**: Collections of nodes evaluated sequentially
12/// - **BuiltinOperator**: Fast OpCode-based dispatch for built-in operators
13/// - **CustomOperator**: User-defined operators with dynamic dispatch
14/// - **StructuredObject**: Template objects for structure preservation
15#[derive(Debug, Clone)]
16pub enum CompiledNode {
17 /// A static JSON value that requires no evaluation.
18 ///
19 /// Used for literals like numbers, strings, booleans, and null.
20 Value { value: Value },
21
22 /// An array of compiled nodes.
23 ///
24 /// Each node is evaluated in sequence, and the results are collected into a JSON array.
25 /// Uses `Box<[CompiledNode]>` for memory efficiency.
26 Array { nodes: Box<[CompiledNode]> },
27
28 /// A built-in operator optimized with OpCode dispatch.
29 ///
30 /// The OpCode enum enables direct dispatch without string lookups,
31 /// significantly improving performance for the 50+ built-in operators.
32 BuiltinOperator {
33 opcode: OpCode,
34 args: Vec<CompiledNode>,
35 },
36
37 /// A custom operator registered via `DataLogic::add_operator`.
38 ///
39 /// Custom operators use dynamic dispatch and are looked up by name
40 /// from the engine's operator registry.
41 CustomOperator {
42 name: String,
43 args: Vec<CompiledNode>,
44 },
45
46 /// A structured object template for preserve_structure mode.
47 ///
48 /// When structure preservation is enabled, objects with keys that are not
49 /// built-in operators or registered custom operators are preserved as templates.
50 /// Each field is evaluated independently, allowing for dynamic object generation.
51 ///
52 /// Note: Custom operators are checked before treating keys as structured fields,
53 /// ensuring they work correctly within preserved structures.
54 StructuredObject { fields: Vec<(String, CompiledNode)> },
55}
56
57// Hash methods removed - no longer needed
58
59// Hash functions removed - no longer needed
60
61/// Compiled logic that can be evaluated multiple times across different data.
62///
63/// `CompiledLogic` represents a pre-processed JSONLogic expression that has been
64/// optimized for repeated evaluation. It's thread-safe and can be shared across
65/// threads using `Arc`.
66///
67/// # Performance Benefits
68///
69/// - **Parse once, evaluate many**: Avoid repeated JSON parsing
70/// - **Static evaluation**: Constant expressions are pre-computed
71/// - **OpCode dispatch**: Built-in operators use fast enum dispatch
72/// - **Thread-safe sharing**: Use `Arc` to share across threads
73///
74/// # Example
75///
76/// ```rust
77/// use datalogic_rs::DataLogic;
78/// use serde_json::json;
79/// use std::sync::Arc;
80///
81/// let engine = DataLogic::new();
82/// let logic = json!({">": [{"var": "score"}, 90]});
83/// let compiled = engine.compile(&logic).unwrap(); // Returns Arc<CompiledLogic>
84///
85/// // Can be shared across threads
86/// let compiled_clone = Arc::clone(&compiled);
87/// std::thread::spawn(move || {
88/// let data = json!({"score": 95});
89/// let result = engine.evaluate_owned(&compiled_clone, data);
90/// });
91/// ```
92#[derive(Debug, Clone)]
93pub struct CompiledLogic {
94 /// The root node of the compiled logic tree
95 pub root: CompiledNode,
96}
97
98impl CompiledLogic {
99 /// Creates a new compiled logic from a root node.
100 ///
101 /// # Arguments
102 ///
103 /// * `root` - The root node of the compiled logic tree
104 pub fn new(root: CompiledNode) -> Self {
105 Self { root }
106 }
107
108 /// Compiles a JSON value into a compiled logic structure.
109 ///
110 /// This method performs basic compilation without static evaluation.
111 /// For optimal performance, use `compile_with_static_eval` instead.
112 ///
113 /// # Arguments
114 ///
115 /// * `logic` - The JSON logic expression to compile
116 ///
117 /// # Returns
118 ///
119 /// A compiled logic structure, or an error if compilation fails.
120 pub fn compile(logic: &Value) -> Result<Self> {
121 let root = Self::compile_node(logic, None, false)?;
122 Ok(Self::new(root))
123 }
124
125 /// Compiles for tracing without static evaluation.
126 ///
127 /// This method compiles the logic without performing static evaluation,
128 /// ensuring that all operators remain in the tree for step-by-step debugging.
129 /// Use this when you need to trace execution through operators that would
130 /// otherwise be pre-evaluated at compile time.
131 ///
132 /// # Arguments
133 ///
134 /// * `logic` - The JSON logic expression to compile
135 /// * `preserve_structure` - Whether to preserve unknown object structure
136 ///
137 /// # Returns
138 ///
139 /// A compiled logic structure without static optimizations.
140 pub fn compile_for_trace(logic: &Value, preserve_structure: bool) -> Result<Self> {
141 let root = Self::compile_node(logic, None, preserve_structure)?;
142 Ok(Self::new(root))
143 }
144
145 /// Compiles with static evaluation using the provided engine.
146 ///
147 /// This method performs optimizations including:
148 /// - Static evaluation of constant expressions
149 /// - OpCode assignment for built-in operators
150 /// - Structure preservation based on engine settings
151 ///
152 /// # Arguments
153 ///
154 /// * `logic` - The JSON logic expression to compile
155 /// * `engine` - The DataLogic engine for static evaluation
156 ///
157 /// # Returns
158 ///
159 /// An optimized compiled logic structure, or an error if compilation fails.
160 pub fn compile_with_static_eval(logic: &Value, engine: &DataLogic) -> Result<Self> {
161 let root = Self::compile_node(logic, Some(engine), engine.preserve_structure())?;
162 Ok(Self::new(root))
163 }
164
165 /// Compiles a single JSON value into a CompiledNode.
166 ///
167 /// This recursive method handles all node types:
168 /// - Objects with operators
169 /// - Arrays
170 /// - Primitive values
171 /// - Structured objects (in preserve mode)
172 ///
173 /// # Arguments
174 ///
175 /// * `value` - The JSON value to compile
176 /// * `engine` - Optional engine for static evaluation
177 /// * `preserve_structure` - Whether to preserve unknown object structure
178 ///
179 /// # Returns
180 ///
181 /// A compiled node, or an error if the value is invalid.
182 fn compile_node(
183 value: &Value,
184 engine: Option<&DataLogic>,
185 preserve_structure: bool,
186 ) -> Result<CompiledNode> {
187 match value {
188 Value::Object(obj) if obj.len() > 1 => {
189 if preserve_structure {
190 // In preserve_structure mode, treat multi-key objects as structured objects
191 // We'll create a special StructuredObject node that gets evaluated field by field
192 let mut fields = Vec::new();
193 for (key, val) in obj.iter() {
194 let compiled_val = Self::compile_node(val, engine, preserve_structure)?;
195 fields.push((key.clone(), compiled_val));
196 }
197 Ok(CompiledNode::StructuredObject { fields })
198 } else {
199 // Multi-key objects are not valid operators
200 Err(crate::error::Error::InvalidOperator(
201 "Unknown Operator".to_string(),
202 ))
203 }
204 }
205 Value::Object(obj) if obj.len() == 1 => {
206 // Single key object is an operator
207 let (op_name, args_value) = obj.iter().next().unwrap();
208
209 // Try to parse as built-in operator first
210 if let Ok(opcode) = op_name.parse::<OpCode>() {
211 // Check if this operator requires array arguments
212 let requires_array = matches!(opcode, OpCode::And | OpCode::Or | OpCode::If);
213
214 // For operators that require arrays, check the raw value
215 if requires_array && !matches!(args_value, Value::Array(_)) {
216 // Create a special marker node for invalid arguments
217 let invalid_value = json!({
218 "__invalid_args__": true,
219 "value": args_value
220 });
221 let value_node = CompiledNode::Value {
222 value: invalid_value,
223 };
224 let args = vec![value_node];
225 return Ok(CompiledNode::BuiltinOperator { opcode, args });
226 }
227
228 // Special handling for preserve operator - don't compile its arguments
229 let args = if opcode == OpCode::Preserve {
230 // Preserve takes raw values, not compiled logic
231 match args_value {
232 Value::Array(arr) => arr
233 .iter()
234 .map(|v| CompiledNode::Value { value: v.clone() })
235 .collect(),
236 _ => {
237 vec![CompiledNode::Value {
238 value: args_value.clone(),
239 }]
240 }
241 }
242 } else {
243 Self::compile_args(args_value, engine, preserve_structure)?
244 };
245 let node = CompiledNode::BuiltinOperator { opcode, args };
246
247 // If engine is provided and node is static, evaluate it
248 if let std::option::Option::Some(eng) = engine
249 && Self::node_is_static(&node)
250 {
251 // Evaluate with empty context since it's static
252 let mut context = ContextStack::new(Arc::new(Value::Null));
253 match eng.evaluate_node(&node, &mut context) {
254 Ok(value) => {
255 return Ok(CompiledNode::Value { value });
256 }
257 // If evaluation fails, keep as operator node
258 Err(_) => return Ok(node),
259 }
260 }
261
262 Ok(node)
263 } else if preserve_structure {
264 // In preserve_structure mode, we need to distinguish between:
265 // 1. Custom operators (should be evaluated as operators)
266 // 2. Unknown keys (should be preserved as structured object fields)
267 //
268 // Check if this is a custom operator first
269 if let Some(eng) = engine
270 && eng.has_custom_operator(op_name)
271 {
272 // It's a registered custom operator - compile as CustomOperator
273 // This ensures custom operators work correctly in preserve_structure mode,
274 // e.g., {"result": {"custom_op": arg}} will evaluate custom_op properly
275 let args = Self::compile_args(args_value, engine, preserve_structure)?;
276 return Ok(CompiledNode::CustomOperator {
277 name: op_name.clone(),
278 args,
279 });
280 }
281 // Not a built-in operator or custom operator - treat as structured object field
282 // This allows dynamic object generation like {"name": {"var": "user.name"}}
283 let compiled_val = Self::compile_node(args_value, engine, preserve_structure)?;
284 let fields = vec![(op_name.clone(), compiled_val)];
285 Ok(CompiledNode::StructuredObject { fields })
286 } else {
287 let args = Self::compile_args(args_value, engine, preserve_structure)?;
288 // Fall back to custom operator - don't pre-evaluate custom operators
289 Ok(CompiledNode::CustomOperator {
290 name: op_name.clone(),
291 args,
292 })
293 }
294 }
295 Value::Array(arr) => {
296 // Array of logic expressions
297 let nodes = arr
298 .iter()
299 .map(|v| Self::compile_node(v, engine, preserve_structure))
300 .collect::<Result<Vec<_>>>()?;
301
302 let nodes_boxed = nodes.into_boxed_slice();
303 let node = CompiledNode::Array { nodes: nodes_boxed };
304
305 // If engine is provided and array is static, evaluate it
306 if let std::option::Option::Some(eng) = engine
307 && Self::node_is_static(&node)
308 {
309 let mut context = ContextStack::new(Arc::new(Value::Null));
310 if let Ok(value) = eng.evaluate_node(&node, &mut context) {
311 return Ok(CompiledNode::Value { value });
312 }
313 }
314
315 Ok(node)
316 }
317 _ => {
318 // Static value
319 Ok(CompiledNode::Value {
320 value: value.clone(),
321 })
322 }
323 }
324 }
325
326 /// Compile operator arguments
327 fn compile_args(
328 value: &Value,
329 engine: Option<&DataLogic>,
330 preserve_structure: bool,
331 ) -> Result<Vec<CompiledNode>> {
332 match value {
333 Value::Array(arr) => arr
334 .iter()
335 .map(|v| Self::compile_node(v, engine, preserve_structure))
336 .collect::<Result<Vec<_>>>(),
337 _ => {
338 // Single argument - compile it
339 Ok(vec![Self::compile_node(value, engine, preserve_structure)?])
340 }
341 }
342 }
343
344 /// Check if this compiled logic is static (can be evaluated without context)
345 pub fn is_static(&self) -> bool {
346 Self::node_is_static(&self.root)
347 }
348
349 fn node_is_static(node: &CompiledNode) -> bool {
350 match node {
351 CompiledNode::Value { .. } => true,
352 CompiledNode::Array { nodes, .. } => nodes.iter().all(Self::node_is_static),
353 CompiledNode::BuiltinOperator { opcode, args, .. } => {
354 Self::opcode_is_static(opcode, args)
355 }
356 CompiledNode::CustomOperator { .. } => false, // Unknown operators are non-static
357 CompiledNode::StructuredObject { fields, .. } => {
358 fields.iter().all(|(_, node)| Self::node_is_static(node))
359 }
360 }
361 }
362
363 /// Check if an operator can be statically evaluated at compile time.
364 ///
365 /// Static operators can be pre-computed during compilation when their arguments
366 /// are also static, eliminating runtime evaluation overhead.
367 ///
368 /// # Classification Criteria
369 ///
370 /// An operator is **non-static** (dynamic) if it:
371 /// 1. Reads from the data context (`var`, `val`, `missing`, `exists`)
372 /// 2. Uses iterative callbacks with changing context (`map`, `filter`, `reduce`)
373 /// 3. Has side effects or error handling (`try`, `throw`)
374 /// 4. Depends on runtime state (`now` for current time)
375 /// 5. Needs runtime disambiguation (`preserve`, `merge`, `min`, `max`)
376 ///
377 /// All other operators are **static** when their arguments are static.
378 fn opcode_is_static(opcode: &OpCode, args: &[CompiledNode]) -> bool {
379 use OpCode::*;
380
381 // Check if all arguments are static first (common pattern)
382 let args_static = || args.iter().all(Self::node_is_static);
383
384 match opcode {
385 // Context-dependent: These operators read from the data context, which is
386 // not available at compile time. They must remain dynamic.
387 Var | Val | Missing | MissingSome | Exists => false,
388
389 // Iteration operators: These push new contexts for each iteration and use
390 // callbacks that may reference the iteration variable. Even with static
391 // arrays, the callback logic depends on the per-element context.
392 Map | Filter | Reduce | All | Some | None => false,
393
394 // Error handling: These have control flow effects (early exit, error propagation)
395 // that should be preserved for runtime execution.
396 Try | Throw => false,
397
398 // Time-dependent: Returns current UTC time, inherently non-static.
399 Now => false,
400
401 // Runtime disambiguation needed:
402 // - Preserve: Must know it was explicitly used as an operator, not inferred
403 // - Merge/Min/Max: Need to distinguish [1,2,3] literal from operator arguments
404 // at runtime to handle nested arrays correctly
405 Preserve => false,
406 Merge | Min | Max => false,
407
408 // Pure operators: Static when all arguments are static. These perform
409 // deterministic transformations without side effects or context access.
410 Type | StartsWith | EndsWith | Upper | Lower | Trim | Split | Datetime | Timestamp
411 | ParseDate | FormatDate | DateDiff | Abs | Ceil | Floor | Add | Subtract
412 | Multiply | Divide | Modulo | Equals | StrictEquals | NotEquals | StrictNotEquals
413 | GreaterThan | GreaterThanEqual | LessThan | LessThanEqual | Not | DoubleNot | And
414 | Or | Ternary | If | Cat | Substr | In | Length | Sort | Slice | Coalesce => {
415 args_static()
416 }
417 }
418 }
419}