dataflow_rs/engine/
utils.rs

1//! # Utility Functions Module
2//!
3//! This module contains common utility functions used throughout the engine.
4//! These utilities provide helper functionality for:
5//! - JSON value manipulation and path navigation
6//! - Value truthiness evaluation
7//! - Nested data structure access and modification
8
9use serde_json::Value;
10
11/// Get nested value from JSON using dot notation path
12///
13/// Supports both object property access and array indexing:
14/// - `"user.name"` - Access object property
15/// - `"items.0"` - Access array element by index
16/// - `"user.addresses.0.city"` - Combined object and array access
17/// - `"data.#20"` - Access field named "20" (# prefix removed)
18/// - `"data.##"` - Access field named "#" (first # removed, second # kept)
19///
20/// # Arguments
21/// * `data` - The JSON value to navigate
22/// * `path` - Dot-separated path to the target value
23///
24/// # Returns
25/// * `Option<&Value>` - Reference to the value if found, None otherwise
26///
27/// # Safety
28/// Returns None for invalid array indices or missing keys rather than panicking
29pub fn get_nested_value<'b>(data: &'b Value, path: &str) -> Option<&'b Value> {
30    if path.is_empty() {
31        return Some(data);
32    }
33
34    let parts: Vec<&str> = path.split('.').collect();
35    let mut current = data;
36
37    for part in parts {
38        match current {
39            Value::Object(map) => {
40                // Handle # prefix for numeric or special field names
41                let field_name = if let Some(stripped) = part.strip_prefix('#') {
42                    stripped // Remove the first # character
43                } else {
44                    part
45                };
46                current = map.get(field_name)?;
47            }
48            Value::Array(arr) => {
49                // For arrays, try to parse as index (no # prefix handling needed)
50                let index = match part.parse::<usize>() {
51                    Ok(idx) => idx,
52                    Err(_) => return None, // Invalid index format
53                };
54
55                // Bounds check before access
56                if index >= arr.len() {
57                    return None; // Index out of bounds
58                }
59
60                current = arr.get(index)?;
61            }
62            _ => return None, // Can't navigate further
63        }
64    }
65
66    Some(current)
67}
68
69/// Set nested value in JSON using dot notation path
70///
71/// Creates intermediate objects or arrays as needed when navigating the path.
72/// Supports setting values in nested objects and arrays with automatic expansion.
73///
74/// # Path Syntax
75/// - `"user.name"` - Set object property
76/// - `"items.0"` - Set array element
77/// - `"data.#20"` - Set field named "20" (# prefix removed)
78/// - `"data.##"` - Set field named "#" (first # removed)
79///
80/// # Arguments
81/// * `data` - The JSON value to modify
82/// * `path` - Dot-separated path to the target location
83/// * `value` - The value to set at the target location
84///
85/// # Example
86/// ```
87/// use serde_json::json;
88/// use dataflow_rs::engine::utils::set_nested_value;
89///
90/// let mut data = json!({});
91/// set_nested_value(&mut data, "user.name", json!("Alice"));
92/// assert_eq!(data, json!({"user": {"name": "Alice"}}));
93/// ```
94pub fn set_nested_value(data: &mut Value, path: &str, value: Value) {
95    let parts: Vec<&str> = path.split('.').collect();
96    let mut current = data;
97
98    for (i, part) in parts.iter().enumerate() {
99        if i == parts.len() - 1 {
100            // Last part - set the value
101            match current {
102                Value::Object(map) => {
103                    // Handle # prefix for field names
104                    let field_name = if let Some(stripped) = part.strip_prefix('#') {
105                        stripped // Remove the first # character
106                    } else {
107                        part
108                    };
109                    map.insert(field_name.to_string(), value);
110                }
111                Value::Array(arr) => {
112                    // Try to parse as array index (no # prefix for arrays)
113                    if let Ok(index) = part.parse::<usize>() {
114                        // Expand array if necessary (fill with nulls)
115                        while arr.len() <= index {
116                            arr.push(Value::Null);
117                        }
118                        if index < arr.len() {
119                            arr[index] = value;
120                        }
121                    }
122                }
123                _ => {}
124            }
125            return;
126        }
127
128        // Navigate to the next level
129        // Check if next part is a number (array index)
130        let next_is_array = parts
131            .get(i + 1)
132            .and_then(|p| p.parse::<usize>().ok())
133            .is_some();
134
135        match current {
136            Value::Object(map) => {
137                // Handle # prefix for field names
138                let field_name = if let Some(stripped) = part.strip_prefix('#') {
139                    stripped // Remove the first # character
140                } else {
141                    // Check if current part is meant to be an array index
142                    if let Ok(_index) = part.parse::<usize>() {
143                        // This shouldn't happen in a well-formed path for objects
144                        return;
145                    }
146                    part
147                };
148
149                // Create the appropriate structure for the next level
150                current = map.entry(field_name.to_string()).or_insert_with(|| {
151                    if next_is_array {
152                        Value::Array(Vec::new())
153                    } else {
154                        Value::Object(serde_json::Map::new())
155                    }
156                });
157            }
158            Value::Array(arr) => {
159                // Parse current part as array index
160                if let Ok(index) = part.parse::<usize>() {
161                    // Expand array if necessary
162                    while arr.len() <= index {
163                        arr.push(Value::Null);
164                    }
165
166                    // Ensure the element at index exists and is the right type
167                    if arr[index].is_null() {
168                        arr[index] = if next_is_array {
169                            Value::Array(Vec::new())
170                        } else {
171                            Value::Object(serde_json::Map::new())
172                        };
173                    }
174
175                    current = &mut arr[index];
176                } else {
177                    // Can't use string key on array
178                    return;
179                }
180            }
181            _ => {
182                // Current value is neither object nor array, can't navigate
183                return;
184            }
185        }
186    }
187}
188
189/// Clone a value at a nested path
190///
191/// Combines `get_nested_value` with cloning for convenience.
192///
193/// # Arguments
194/// * `data` - The JSON value to read from
195/// * `path` - Dot-separated path to the target value
196///
197/// # Returns
198/// * `Option<Value>` - Cloned value if found, None otherwise
199pub fn get_nested_value_cloned(data: &Value, path: &str) -> Option<Value> {
200    get_nested_value(data, path).cloned()
201}
202
203#[cfg(test)]
204mod tests {
205    use super::*;
206    use serde_json::json;
207
208    #[test]
209    fn test_get_nested_value() {
210        let data = json!({
211            "user": {
212                "name": "John",
213                "age": 30,
214                "addresses": [
215                    {"city": "New York", "zip": "10001"},
216                    {"city": "San Francisco", "zip": "94102"}
217                ],
218                "preferences": {
219                    "theme": "dark",
220                    "notifications": true
221                }
222            },
223            "items": [1, 2, 3]
224        });
225
226        // Object property access
227        assert_eq!(get_nested_value(&data, "user.name"), Some(&json!("John")));
228        assert_eq!(get_nested_value(&data, "user.age"), Some(&json!(30)));
229
230        // Nested object access
231        assert_eq!(
232            get_nested_value(&data, "user.preferences.theme"),
233            Some(&json!("dark"))
234        );
235        assert_eq!(
236            get_nested_value(&data, "user.preferences.notifications"),
237            Some(&json!(true))
238        );
239
240        // Array element access
241        assert_eq!(get_nested_value(&data, "items.0"), Some(&json!(1)));
242        assert_eq!(get_nested_value(&data, "items.2"), Some(&json!(3)));
243
244        // Combined object and array access
245        assert_eq!(
246            get_nested_value(&data, "user.addresses.0.city"),
247            Some(&json!("New York"))
248        );
249        assert_eq!(
250            get_nested_value(&data, "user.addresses.1.zip"),
251            Some(&json!("94102"))
252        );
253
254        // Non-existent paths
255        assert_eq!(get_nested_value(&data, "user.missing"), None);
256        assert_eq!(get_nested_value(&data, "items.10"), None);
257        assert_eq!(get_nested_value(&data, "user.addresses.2.city"), None);
258        assert_eq!(get_nested_value(&data, "nonexistent.path"), None);
259    }
260
261    #[test]
262    fn test_set_nested_value() {
263        let mut data = json!({});
264
265        // Set simple property
266        set_nested_value(&mut data, "name", json!("Alice"));
267        assert_eq!(data, json!({"name": "Alice"}));
268
269        // Set nested property (creates intermediate objects)
270        set_nested_value(&mut data, "user.email", json!("alice@example.com"));
271        assert_eq!(
272            data,
273            json!({
274                "name": "Alice",
275                "user": {"email": "alice@example.com"}
276            })
277        );
278
279        // Overwrite existing value
280        set_nested_value(&mut data, "name", json!("Bob"));
281        assert_eq!(
282            data,
283            json!({
284                "name": "Bob",
285                "user": {"email": "alice@example.com"}
286            })
287        );
288
289        // Set deeply nested property
290        set_nested_value(&mut data, "settings.theme.mode", json!("dark"));
291        assert_eq!(data["settings"]["theme"]["mode"], json!("dark"));
292
293        // Add to existing nested object
294        set_nested_value(&mut data, "user.age", json!(25));
295        assert_eq!(data["user"]["age"], json!(25));
296        assert_eq!(data["user"]["email"], json!("alice@example.com"));
297    }
298
299    #[test]
300    fn test_set_nested_value_with_arrays() {
301        let mut data = json!({
302            "items": [1, 2, 3]
303        });
304
305        // Test setting existing array element
306        set_nested_value(&mut data, "items.0", json!(10));
307        assert_eq!(data["items"], json!([10, 2, 3]));
308
309        // Test setting array element beyond current length (should expand)
310        set_nested_value(&mut data, "items.5", json!(50));
311        assert_eq!(data["items"], json!([10, 2, 3, null, null, 50]));
312
313        // Test creating nested array structure
314        let mut data2 = json!({});
315        set_nested_value(&mut data2, "matrix.0.0", json!(1));
316        set_nested_value(&mut data2, "matrix.0.1", json!(2));
317        set_nested_value(&mut data2, "matrix.1.0", json!(3));
318        assert_eq!(
319            data2,
320            json!({
321                "matrix": [[1, 2], [3]]
322            })
323        );
324    }
325
326    #[test]
327    fn test_set_nested_value_array_expansion() {
328        let mut data = json!({});
329
330        // Create array and set element at index 2 (should create nulls for 0 and 1)
331        set_nested_value(&mut data, "array.2", json!("value"));
332        assert_eq!(
333            data,
334            json!({
335                "array": [null, null, "value"]
336            })
337        );
338
339        // Test deeply nested array creation
340        let mut data2 = json!({});
341        set_nested_value(&mut data2, "deep.nested.0.field", json!("test"));
342        assert_eq!(
343            data2,
344            json!({
345                "deep": {
346                    "nested": [{"field": "test"}]
347                }
348            })
349        );
350    }
351
352    #[test]
353    fn test_get_nested_value_cloned() {
354        let data = json!({
355            "user": {
356                "profile": {
357                    "name": "Alice",
358                    "settings": {"theme": "dark"}
359                }
360            }
361        });
362
363        // Test successful cloning
364        let cloned = get_nested_value_cloned(&data, "user.profile.name");
365        assert_eq!(cloned, Some(json!("Alice")));
366
367        // Test cloning complex object
368        let cloned = get_nested_value_cloned(&data, "user.profile.settings");
369        assert_eq!(cloned, Some(json!({"theme": "dark"})));
370
371        // Test non-existent path
372        let cloned = get_nested_value_cloned(&data, "user.missing");
373        assert_eq!(cloned, None);
374    }
375
376    #[test]
377    fn test_get_nested_value_bounds_checking() {
378        let data = json!({
379            "items": [1, 2, 3],
380            "nested": {
381                "array": [
382                    {"id": 1},
383                    {"id": 2}
384                ]
385            }
386        });
387
388        // Test valid array access
389        assert_eq!(get_nested_value(&data, "items.0"), Some(&json!(1)));
390        assert_eq!(get_nested_value(&data, "items.2"), Some(&json!(3)));
391
392        // Test out-of-bounds array access (should return None, not panic)
393        assert_eq!(get_nested_value(&data, "items.10"), None);
394        assert_eq!(get_nested_value(&data, "items.999999"), None);
395
396        // Test invalid array index format
397        assert_eq!(get_nested_value(&data, "items.abc"), None);
398        assert_eq!(get_nested_value(&data, "items.-1"), None);
399        assert_eq!(get_nested_value(&data, "items.2.5"), None);
400
401        // Test nested array bounds
402        assert_eq!(
403            get_nested_value(&data, "nested.array.0.id"),
404            Some(&json!(1))
405        );
406        assert_eq!(get_nested_value(&data, "nested.array.5.id"), None);
407
408        // Test empty path
409        assert_eq!(get_nested_value(&data, ""), Some(&data));
410    }
411
412    #[test]
413    fn test_set_nested_value_bounds_safety() {
414        let mut data = json!({});
415
416        // Test creating arrays with large indices (should create nulls in between)
417        set_nested_value(&mut data, "large.10", json!("value"));
418        assert_eq!(data["large"].as_array().unwrap().len(), 11);
419        assert_eq!(data["large"][10], json!("value"));
420        for i in 0..10 {
421            assert_eq!(data["large"][i], json!(null));
422        }
423
424        // Test setting nested array values
425        let mut data2 = json!({"matrix": []});
426        set_nested_value(&mut data2, "matrix.2.1", json!(5));
427        assert_eq!(data2["matrix"][0], json!(null));
428        assert_eq!(data2["matrix"][1], json!(null));
429        assert_eq!(data2["matrix"][2][0], json!(null));
430        assert_eq!(data2["matrix"][2][1], json!(5));
431
432        // Test overwriting array elements
433        let mut data3 = json!({"arr": [1, 2, 3]});
434        set_nested_value(&mut data3, "arr.1", json!("replaced"));
435        assert_eq!(data3["arr"], json!([1, "replaced", 3]));
436    }
437
438    #[test]
439    fn test_hash_prefix_in_paths() {
440        // Test getting values with # prefix
441        let data = json!({
442            "fields": {
443                "20": "numeric field name",
444                "#": "hash field",
445                "##": "double hash field",
446                "normal": "normal field"
447            }
448        });
449
450        // Access field named "20" using #20
451        assert_eq!(
452            get_nested_value(&data, "fields.#20"),
453            Some(&json!("numeric field name"))
454        );
455
456        // Access field named "#" using ##
457        assert_eq!(
458            get_nested_value(&data, "fields.##"),
459            Some(&json!("hash field"))
460        );
461
462        // Access field named "##" using ###
463        assert_eq!(
464            get_nested_value(&data, "fields.###"),
465            Some(&json!("double hash field"))
466        );
467
468        // Normal field access still works
469        assert_eq!(
470            get_nested_value(&data, "fields.normal"),
471            Some(&json!("normal field"))
472        );
473
474        // Non-existent field with # prefix
475        assert_eq!(get_nested_value(&data, "fields.#999"), None);
476    }
477
478    #[test]
479    fn test_set_hash_prefix_in_paths() {
480        let mut data = json!({});
481
482        // Set field named "20" using #20
483        set_nested_value(&mut data, "fields.#20", json!("value for 20"));
484        assert_eq!(data["fields"]["20"], json!("value for 20"));
485
486        // Set field named "#" using ##
487        set_nested_value(&mut data, "fields.##", json!("hash value"));
488        assert_eq!(data["fields"]["#"], json!("hash value"));
489
490        // Set field named "##" using ###
491        set_nested_value(&mut data, "fields.###", json!("double hash value"));
492        assert_eq!(data["fields"]["##"], json!("double hash value"));
493
494        // Normal field setting still works
495        set_nested_value(&mut data, "fields.normal", json!("normal value"));
496        assert_eq!(data["fields"]["normal"], json!("normal value"));
497
498        // Verify the complete structure
499        assert_eq!(
500            data,
501            json!({
502                "fields": {
503                    "20": "value for 20",
504                    "#": "hash value",
505                    "##": "double hash value",
506                    "normal": "normal value"
507                }
508            })
509        );
510    }
511
512    #[test]
513    fn test_hash_prefix_with_arrays() {
514        let mut data = json!({
515            "items": [
516                {"0": "field named zero", "id": 1},
517                {"1": "field named one", "id": 2}
518            ]
519        });
520
521        // Access array element, then field named "0" using #0
522        assert_eq!(
523            get_nested_value(&data, "items.0.#0"),
524            Some(&json!("field named zero"))
525        );
526
527        // Access array element, then field named "1" using #1
528        assert_eq!(
529            get_nested_value(&data, "items.1.#1"),
530            Some(&json!("field named one"))
531        );
532
533        // Set a field named "2" in array element using #2
534        set_nested_value(&mut data, "items.0.#2", json!("field named two"));
535        assert_eq!(data["items"][0]["2"], json!("field named two"));
536
537        // Array indices still work normally (without # prefix)
538        assert_eq!(get_nested_value(&data, "items.0.id"), Some(&json!(1)));
539        assert_eq!(get_nested_value(&data, "items.1.id"), Some(&json!(2)));
540    }
541
542    #[test]
543    fn test_hash_prefix_field_with_array_value() {
544        // Test case: "data.fields.#72.0" should access field named "72" then array index 0
545        let data = json!({
546            "data": {
547                "fields": {
548                    "72": ["first", "second", "third"],
549                    "100": ["alpha", "beta", "gamma"],
550                    "normal": ["one", "two", "three"]
551                }
552            }
553        });
554
555        // Access field named "72" (using #72) then array index 0
556        assert_eq!(
557            get_nested_value(&data, "data.fields.#72.0"),
558            Some(&json!("first"))
559        );
560
561        // Access field named "72" then array index 1
562        assert_eq!(
563            get_nested_value(&data, "data.fields.#72.1"),
564            Some(&json!("second"))
565        );
566
567        // Access field named "72" then array index 2
568        assert_eq!(
569            get_nested_value(&data, "data.fields.#72.2"),
570            Some(&json!("third"))
571        );
572
573        // Access field named "100" then array indices
574        assert_eq!(
575            get_nested_value(&data, "data.fields.#100.0"),
576            Some(&json!("alpha"))
577        );
578        assert_eq!(
579            get_nested_value(&data, "data.fields.#100.1"),
580            Some(&json!("beta"))
581        );
582
583        // Normal field access still works
584        assert_eq!(
585            get_nested_value(&data, "data.fields.normal.0"),
586            Some(&json!("one"))
587        );
588
589        // Test setting values in arrays accessed via # prefix
590        let mut data_mut = data.clone();
591        set_nested_value(&mut data_mut, "data.fields.#72.0", json!("modified"));
592        assert_eq!(data_mut["data"]["fields"]["72"][0], json!("modified"));
593
594        // Test creating new field with numeric name containing array
595        set_nested_value(&mut data_mut, "data.fields.#999.0", json!("new value"));
596        assert_eq!(data_mut["data"]["fields"]["999"][0], json!("new value"));
597
598        // Test nested objects in arrays accessed via # prefix
599        let complex_data = json!({
600            "fields": {
601                "42": [
602                    {"name": "item1", "value": 100},
603                    {"name": "item2", "value": 200}
604                ]
605            }
606        });
607
608        assert_eq!(
609            get_nested_value(&complex_data, "fields.#42.0.name"),
610            Some(&json!("item1"))
611        );
612        assert_eq!(
613            get_nested_value(&complex_data, "fields.#42.1.value"),
614            Some(&json!(200))
615        );
616
617        // Test multiple # prefixes in path
618        let multi_hash_data = json!({
619            "data": {
620                "#fields": {
621                    "##": ["hash array"],
622                    "10": ["numeric array"]
623                }
624            }
625        });
626
627        // Access field named "#fields" using ##fields
628        assert_eq!(
629            get_nested_value(&multi_hash_data, "data.##fields.###.0"),
630            Some(&json!("hash array"))
631        );
632        assert_eq!(
633            get_nested_value(&multi_hash_data, "data.##fields.#10.0"),
634            Some(&json!("numeric array"))
635        );
636    }
637}