datalogic_rs/node.rs
1use crate::opcode::OpCode;
2use regex::Regex;
3use serde_json::Value;
4use std::sync::Arc;
5
6/// A pre-parsed path segment for compiled variable access.
7#[derive(Debug, Clone)]
8pub enum PathSegment {
9 /// Object field access by key
10 Field(Box<str>),
11 /// Array element access by index
12 Index(usize),
13 /// Try as object key first, then as array index (for segments that could be either).
14 /// Pre-parses the index at compile time to avoid runtime parsing.
15 FieldOrIndex(Box<str>, usize),
16}
17
18/// Hint for reduce context resolution, detected at compile time.
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20pub enum ReduceHint {
21 /// Normal path access (no reduce context)
22 None,
23 /// Path is exactly "current" — return reduce_current directly
24 Current,
25 /// Path is exactly "accumulator" — return reduce_accumulator directly
26 Accumulator,
27 /// Path starts with "current." — segments[0] is "current", use segments[1..] from reduce_current
28 CurrentPath,
29 /// Path starts with "accumulator." — segments[0] is "accumulator", use segments[1..] from reduce_accumulator
30 AccumulatorPath,
31}
32
33/// Hint for metadata access (index/key), detected at compile time.
34#[derive(Debug, Clone, Copy, PartialEq, Eq)]
35pub enum MetadataHint {
36 /// Normal data access
37 None,
38 /// Access frame index metadata
39 Index,
40 /// Access frame key metadata
41 Key,
42}
43
44/// Data for a custom operator (boxed inside CompiledNode to reduce enum size).
45#[derive(Debug, Clone)]
46pub struct CustomOperatorData {
47 pub name: String,
48 pub args: Box<[CompiledNode]>,
49}
50
51/// Data for a structured object template (boxed inside CompiledNode to reduce enum size).
52#[derive(Debug, Clone)]
53pub struct StructuredObjectData {
54 pub fields: Box<[(String, CompiledNode)]>,
55}
56
57/// Data for a pre-compiled exists check (boxed inside CompiledNode to reduce enum size).
58#[derive(Debug, Clone)]
59pub struct CompiledExistsData {
60 pub scope_level: u32,
61 pub segments: Box<[PathSegment]>,
62}
63
64/// Data for a pre-compiled split with regex (boxed inside CompiledNode to reduce enum size).
65#[derive(Debug, Clone)]
66pub struct CompiledSplitRegexData {
67 pub args: Box<[CompiledNode]>,
68 pub regex: Arc<Regex>,
69 pub capture_names: Box<[Box<str>]>,
70}
71
72/// A compiled node representing a single operation or value in the logic tree.
73///
74/// Nodes are created during the compilation phase and evaluated during execution.
75/// Each node type is optimized for its specific purpose:
76///
77/// - **Value**: Static JSON values that don't require evaluation
78/// - **Array**: Collections of nodes evaluated sequentially
79/// - **BuiltinOperator**: Fast OpCode-based dispatch for built-in operators
80/// - **CustomOperator**: User-defined operators with dynamic dispatch
81/// - **StructuredObject**: Template objects for structure preservation
82#[derive(Debug, Clone)]
83pub enum CompiledNode {
84 /// A static JSON value that requires no evaluation.
85 ///
86 /// Used for literals like numbers, strings, booleans, and null.
87 Value { value: Value },
88
89 /// An array of compiled nodes.
90 ///
91 /// Each node is evaluated in sequence, and the results are collected into a JSON array.
92 /// Uses `Box<[CompiledNode]>` for memory efficiency.
93 Array { nodes: Box<[CompiledNode]> },
94
95 /// A built-in operator optimized with OpCode dispatch.
96 ///
97 /// The OpCode enum enables direct dispatch without string lookups,
98 /// significantly improving performance for the 50+ built-in operators.
99 BuiltinOperator {
100 opcode: OpCode,
101 args: Box<[CompiledNode]>,
102 },
103
104 /// A custom operator registered via `DataLogic::add_operator`.
105 /// Boxed to reduce enum size (rare variant).
106 CustomOperator(Box<CustomOperatorData>),
107
108 /// A structured object template for preserve_structure mode.
109 /// Boxed to reduce enum size (rare variant).
110 StructuredObject(Box<StructuredObjectData>),
111
112 /// A pre-compiled variable access (unified var/val).
113 ///
114 /// scope_level 0 = current context (var-style), N = go up N levels (val with [[N], ...]).
115 /// Segments are pre-parsed at compile time to avoid runtime string splitting.
116 CompiledVar {
117 scope_level: u32,
118 segments: Box<[PathSegment]>,
119 reduce_hint: ReduceHint,
120 metadata_hint: MetadataHint,
121 default_value: Option<Box<CompiledNode>>,
122 },
123
124 /// A pre-compiled exists check.
125 /// Boxed to reduce enum size (rare variant).
126 CompiledExists(Box<CompiledExistsData>),
127
128 /// A pre-compiled split with regex pattern.
129 /// Boxed to reduce enum size (rare variant).
130 CompiledSplitRegex(Box<CompiledSplitRegexData>),
131
132 /// A pre-compiled throw with a static error object.
133 /// Boxed to reduce enum size (rare variant).
134 CompiledThrow(Box<Value>),
135}
136
137/// Compiled logic that can be evaluated multiple times across different data.
138///
139/// `CompiledLogic` represents a pre-processed JSONLogic expression that has been
140/// optimized for repeated evaluation. It's thread-safe and can be shared across
141/// threads using `Arc`.
142///
143/// # Performance Benefits
144///
145/// - **Parse once, evaluate many**: Avoid repeated JSON parsing
146/// - **Static evaluation**: Constant expressions are pre-computed
147/// - **OpCode dispatch**: Built-in operators use fast enum dispatch
148/// - **Thread-safe sharing**: Use `Arc` to share across threads
149///
150/// # Example
151///
152/// ```rust
153/// use datalogic_rs::DataLogic;
154/// use serde_json::json;
155/// use std::sync::Arc;
156///
157/// let engine = DataLogic::new();
158/// let logic = json!({">": [{"var": "score"}, 90]});
159/// let compiled = engine.compile(&logic).unwrap(); // Returns Arc<CompiledLogic>
160///
161/// // Can be shared across threads
162/// let compiled_clone = Arc::clone(&compiled);
163/// std::thread::spawn(move || {
164/// let data = json!({"score": 95});
165/// let result = engine.evaluate_owned(&compiled_clone, data);
166/// });
167/// ```
168#[derive(Debug, Clone)]
169pub struct CompiledLogic {
170 /// The root node of the compiled logic tree
171 pub root: CompiledNode,
172}
173
174impl CompiledLogic {
175 /// Creates a new compiled logic from a root node.
176 ///
177 /// # Arguments
178 ///
179 /// * `root` - The root node of the compiled logic tree
180 pub fn new(root: CompiledNode) -> Self {
181 Self { root }
182 }
183
184 /// Check if this compiled logic is static (can be evaluated without context)
185 pub fn is_static(&self) -> bool {
186 node_is_static(&self.root)
187 }
188}
189
190/// Check if a compiled node is static (can be evaluated without runtime context).
191pub(crate) fn node_is_static(node: &CompiledNode) -> bool {
192 match node {
193 CompiledNode::Value { .. } => true,
194 CompiledNode::Array { nodes, .. } => nodes.iter().all(node_is_static),
195 CompiledNode::BuiltinOperator { opcode, args, .. } => opcode_is_static(opcode, args),
196 CompiledNode::CustomOperator(_) => false,
197 CompiledNode::CompiledVar { .. } | CompiledNode::CompiledExists(_) => false,
198 CompiledNode::CompiledSplitRegex(data) => data.args.iter().all(node_is_static),
199 CompiledNode::CompiledThrow(_) => false,
200 CompiledNode::StructuredObject(data) => {
201 data.fields.iter().all(|(_, node)| node_is_static(node))
202 }
203 }
204}
205
206/// Check if an operator can be statically evaluated at compile time.
207///
208/// Static operators can be pre-computed during compilation when their arguments
209/// are also static, eliminating runtime evaluation overhead.
210///
211/// # Classification Criteria
212///
213/// An operator is **non-static** (dynamic) if it:
214/// 1. Reads from the data context (`var`, `val`, `missing`, `exists`)
215/// 2. Uses iterative callbacks with changing context (`map`, `filter`, `reduce`)
216/// 3. Has side effects or error handling (`try`, `throw`)
217/// 4. Depends on runtime state (`now` for current time)
218/// 5. Needs runtime disambiguation (`preserve`, `merge`, `min`, `max`)
219///
220/// All other operators are **static** when their arguments are static.
221pub(crate) fn opcode_is_static(opcode: &OpCode, args: &[CompiledNode]) -> bool {
222 use OpCode::*;
223
224 // Check if all arguments are static first (common pattern)
225 let args_static = || args.iter().all(node_is_static);
226
227 match opcode {
228 // Context-dependent: These operators read from the data context, which is
229 // not available at compile time. They must remain dynamic.
230 Var | Val | Missing | MissingSome | Exists => false,
231
232 // Iteration operators: These push new contexts for each iteration and use
233 // callbacks that may reference the iteration variable. Even with static
234 // arrays, the callback logic depends on the per-element context.
235 Map | Filter | Reduce | All | Some | None => false,
236
237 // Error handling: These have control flow effects (early exit, error propagation)
238 // that should be preserved for runtime execution.
239 Try | Throw => false,
240
241 // Time-dependent: Returns current UTC time, inherently non-static.
242 Now => false,
243
244 // Runtime disambiguation needed:
245 // - Preserve: Must know it was explicitly used as an operator, not inferred
246 // - Merge/Min/Max: Need to distinguish [1,2,3] literal from operator arguments
247 // at runtime to handle nested arrays correctly
248 Preserve => false,
249 Merge | Min | Max => false,
250
251 // Pure operators: Static when all arguments are static. These perform
252 // deterministic transformations without side effects or context access.
253 Type | StartsWith | EndsWith | Upper | Lower | Trim | Split | Datetime | Timestamp
254 | ParseDate | FormatDate | DateDiff | Abs | Ceil | Floor | Add | Subtract | Multiply
255 | Divide | Modulo | Equals | StrictEquals | NotEquals | StrictNotEquals | GreaterThan
256 | GreaterThanEqual | LessThan | LessThanEqual | Not | DoubleNot | And | Or | Ternary
257 | If | Cat | Substr | In | Length | Sort | Slice | Coalesce | Switch => args_static(),
258 }
259}
260
261/// Convert path segments back to a dot-separated path string.
262pub(crate) fn segments_to_dot_path(segments: &[PathSegment]) -> String {
263 segments
264 .iter()
265 .map(|seg| match seg {
266 PathSegment::Field(s) | PathSegment::FieldOrIndex(s, _) => s.to_string(),
267 PathSegment::Index(i) => i.to_string(),
268 })
269 .collect::<Vec<_>>()
270 .join(".")
271}
272
273/// Convert a path segment to a JSON value.
274pub(crate) fn segment_to_value(seg: &PathSegment) -> Value {
275 match seg {
276 PathSegment::Field(s) | PathSegment::FieldOrIndex(s, _) => Value::String(s.to_string()),
277 PathSegment::Index(i) => Value::Number((*i as u64).into()),
278 }
279}
280
281/// Convert a compiled node back to a JSON value (for custom operators).
282pub(crate) fn node_to_value(node: &CompiledNode) -> Value {
283 match node {
284 CompiledNode::Value { value, .. } => value.clone(),
285 CompiledNode::Array { nodes, .. } => {
286 Value::Array(nodes.iter().map(node_to_value).collect())
287 }
288 CompiledNode::BuiltinOperator { opcode, args, .. } => {
289 let mut obj = serde_json::Map::new();
290 let args_value = if args.len() == 1 {
291 node_to_value(&args[0])
292 } else {
293 Value::Array(args.iter().map(node_to_value).collect())
294 };
295 obj.insert(opcode.as_str().into(), args_value);
296 Value::Object(obj)
297 }
298 CompiledNode::CustomOperator(data) => {
299 let mut obj = serde_json::Map::new();
300 let args_value = if data.args.len() == 1 {
301 node_to_value(&data.args[0])
302 } else {
303 Value::Array(data.args.iter().map(node_to_value).collect())
304 };
305 obj.insert(data.name.clone(), args_value);
306 Value::Object(obj)
307 }
308 CompiledNode::StructuredObject(data) => {
309 let mut obj = serde_json::Map::new();
310 for (key, node) in data.fields.iter() {
311 obj.insert(key.clone(), node_to_value(node));
312 }
313 Value::Object(obj)
314 }
315 CompiledNode::CompiledVar {
316 scope_level,
317 segments,
318 default_value,
319 ..
320 } => {
321 let mut obj = serde_json::Map::new();
322 if *scope_level == 0 {
323 // Reconstruct as var
324 let path = segments_to_dot_path(segments);
325 match default_value {
326 Some(def) => {
327 obj.insert(
328 "var".into(),
329 Value::Array(vec![Value::String(path), node_to_value(def)]),
330 );
331 }
332 None => {
333 obj.insert("var".into(), Value::String(path));
334 }
335 }
336 } else {
337 // Reconstruct as val with level
338 let mut arr: Vec<Value> = vec![Value::Array(vec![Value::Number(
339 (*scope_level as u64).into(),
340 )])];
341 for seg in segments.iter() {
342 arr.push(segment_to_value(seg));
343 }
344 obj.insert("val".into(), Value::Array(arr));
345 }
346 Value::Object(obj)
347 }
348 CompiledNode::CompiledExists(data) => {
349 let mut obj = serde_json::Map::new();
350 if data.segments.len() == 1 {
351 obj.insert("exists".into(), segment_to_value(&data.segments[0]));
352 } else {
353 let arr: Vec<Value> = data.segments.iter().map(segment_to_value).collect();
354 obj.insert("exists".into(), Value::Array(arr));
355 }
356 Value::Object(obj)
357 }
358 CompiledNode::CompiledSplitRegex(data) => {
359 let mut obj = serde_json::Map::new();
360 let mut arr = vec![node_to_value(&data.args[0])];
361 arr.push(Value::String(data.regex.as_str().to_string()));
362 obj.insert("split".into(), Value::Array(arr));
363 Value::Object(obj)
364 }
365 CompiledNode::CompiledThrow(error_obj) => {
366 let mut obj = serde_json::Map::new();
367 if let Value::Object(err_map) = error_obj.as_ref() {
368 if let Some(Value::String(s)) = err_map.get("type") {
369 obj.insert("throw".into(), Value::String(s.clone()));
370 } else {
371 obj.insert("throw".into(), error_obj.as_ref().clone());
372 }
373 } else {
374 obj.insert("throw".into(), error_obj.as_ref().clone());
375 }
376 Value::Object(obj)
377 }
378 }
379}