Skip to main content

json_eval_rs/jsoneval/
path_utils.rs

1//! Path utilities for JSON pointer operations
2//!
3//! This module provides JSON pointer normalization and access functions
4//! for efficient native serde_json operations.
5
6use serde_json::Value;
7
8/// Normalize path to JSON pointer format for efficient native access
9///
10/// Handles various input formats:
11/// - JSON Schema refs: #/$params/constants/DEATH_SA -> /$params/constants/DEATH_SA
12/// - Dotted paths: user.name -> /user/name
13/// - Already normalized paths (no-op)
14/// - Simple field names: field -> /field
15#[inline]
16pub fn normalize_to_json_pointer(path: &str) -> String {
17    if path.is_empty() {
18        return String::new();
19    }
20
21    // Fast path: JSON Schema reference format (#/...) — most common case
22    if path.starts_with("#/") {
23        let stripped = &path[1..];
24        if !stripped.contains("//") {
25            return stripped.to_string();
26        }
27    }
28
29    // Fast path: already a valid JSON pointer
30    if path.starts_with('/') && !path.contains("//") {
31        return if path == "/" {
32            String::new()
33        } else {
34            path.to_string()
35        };
36    }
37
38    // Slow path: needs full normalization
39    let mut normalized = if path.starts_with("#/") {
40        path[1..].to_string()
41    } else if !path.starts_with('/') {
42        if path.contains('.') {
43            format!("/{}", path.replace('.', "/"))
44        } else {
45            format!("/{}", path)
46        }
47    } else {
48        path.to_string()
49    };
50
51    // Clean up double slashes
52    while normalized.contains("//") {
53        normalized = normalized.replace("//", "/");
54    }
55
56    if normalized == "/" {
57        String::new()
58    } else {
59        normalized
60    }
61}
62
63/// Convert dotted path to JSON Schema pointer format
64///
65/// This is used for schema paths where properties are nested under `/properties/`
66///
67/// Examples:
68/// - "illustration.insured.name" -> "#/illustration/properties/insured/properties/name"
69/// - "header.form_number" -> "#/header/properties/form_number"
70/// - "#/already/formatted" -> "#/already/formatted" (no change)
71#[inline]
72pub fn dot_notation_to_schema_pointer(path: &str) -> String {
73    // If already a JSON pointer (starts with # or /), return as-is
74    if path.starts_with('#') || path.starts_with('/') {
75        return path.to_string();
76    }
77
78    // Split by dots and join with /properties/
79    let parts: Vec<&str> = path.split('.').collect();
80    if parts.is_empty() {
81        return "#/".to_string();
82    }
83
84    // Build schema path: #/part1/properties/part2/properties/part3
85    // First part is root-level field, rest are under /properties/
86    // Don't add /properties/ if path starts with $ (direct JSON pointer)
87    let mut result = String::from("#");
88    for (i, part) in parts.iter().enumerate() {
89        if part.eq(&"properties") {
90            continue;
91        }
92
93        if i > 0 && !path.starts_with('$') {
94            result.push_str("/properties");
95        }
96        result.push_str("/");
97        result.push_str(part);
98    }
99
100    result
101}
102
103/// Convert JSON pointer or schema pointer to dotted notation
104///
105/// This converts various pointer formats back to dotted notation:
106///
107/// Examples:
108/// - "#/illustration/properties/insured/properties/ins_corrname" -> "illustration.properties.insured.properties.ins_corrname"
109/// - "/user/name" -> "user.name"
110/// - "person.name" -> "person.name" (already dotted, no change)
111#[inline]
112pub fn pointer_to_dot_notation(path: &str) -> String {
113    if path.is_empty() {
114        return String::new();
115    }
116
117    // If already dotted notation (no # or / prefix), return as-is
118    if !path.starts_with('#') && !path.starts_with('/') {
119        return path.to_string();
120    }
121
122    // Remove leading # or /
123    let clean_path = if path.starts_with("#/") {
124        &path[2..]
125    } else if path.starts_with('/') {
126        &path[1..]
127    } else if path.starts_with('#') {
128        &path[1..]
129    } else {
130        path
131    };
132
133    // Convert slashes to dots
134    clean_path.replace('/', ".")
135}
136
137/// Fast JSON pointer-based value access using serde's native implementation
138///
139/// This is significantly faster than manual path traversal for deeply nested objects
140#[inline]
141pub fn get_value_by_pointer<'a>(data: &'a Value, pointer: &str) -> Option<&'a Value> {
142    if pointer.is_empty() {
143        Some(data)
144    } else {
145        data.pointer(pointer)
146    }
147}
148
149#[inline]
150pub fn get_value_by_pointer_without_properties<'a>(
151    data: &'a Value,
152    pointer: &str,
153) -> Option<&'a Value> {
154    if pointer.is_empty() {
155        Some(data)
156    } else {
157        data.pointer(&pointer.replace("properties/", ""))
158    }
159}
160
161/// Batch pointer resolution for multiple paths
162pub fn get_values_by_pointers<'a>(data: &'a Value, pointers: &[String]) -> Vec<Option<&'a Value>> {
163    pointers
164        .iter()
165        .map(|pointer| get_value_by_pointer(data, pointer))
166        .collect()
167}
168
169/// Fast array indexing helper for JSON arrays
170///
171/// Returns None if not an array or index out of bounds
172#[inline]
173pub fn get_array_element<'a>(data: &'a Value, index: usize) -> Option<&'a Value> {
174    data.as_array()?.get(index)
175}
176
177/// Fast array indexing with JSON pointer path
178///
179/// Example: get_array_element_by_pointer(data, "/$params/tables", 0)
180#[inline]
181pub fn get_array_element_by_pointer<'a>(
182    data: &'a Value,
183    pointer: &str,
184    index: usize,
185) -> Option<&'a Value> {
186    get_value_by_pointer(data, pointer)?.as_array()?.get(index)
187}
188
189/// Extract table metadata for fast array operations during schema parsing
190#[derive(Debug, Clone)]
191pub struct ArrayMetadata {
192    /// Pointer to the array location
193    pub pointer: String,
194    /// Array length (cached for fast bounds checking)
195    pub length: usize,
196    /// Column names for object arrays (cached for fast field access)
197    pub column_names: Vec<String>,
198    /// Whether this is a uniform object array (all elements have same structure)
199    pub is_uniform: bool,
200}
201
202impl ArrayMetadata {
203    /// Build metadata for an array at the given pointer
204    pub fn build(data: &Value, pointer: &str) -> Option<Self> {
205        let array = get_value_by_pointer(data, pointer)?.as_array()?;
206
207        let length = array.len();
208        if length == 0 {
209            return Some(ArrayMetadata {
210                pointer: pointer.to_string(),
211                length: 0,
212                column_names: Vec::new(),
213                is_uniform: true,
214            });
215        }
216
217        // Analyze first element to determine structure
218        let first_element = &array[0];
219        let column_names = if let Value::Object(obj) = first_element {
220            obj.keys().cloned().collect()
221        } else {
222            Vec::new()
223        };
224
225        // Check if all elements have the same structure (uniform array)
226        let is_uniform = if !column_names.is_empty() {
227            array.iter().all(|elem| {
228                if let Value::Object(obj) = elem {
229                    obj.keys().len() == column_names.len()
230                        && column_names.iter().all(|col| obj.contains_key(col))
231                } else {
232                    false
233                }
234            })
235        } else {
236            // Non-object arrays are considered uniform if all elements have same type
237            let first_type = std::mem::discriminant(first_element);
238            array
239                .iter()
240                .all(|elem| std::mem::discriminant(elem) == first_type)
241        };
242
243        Some(ArrayMetadata {
244            pointer: pointer.to_string(),
245            length,
246            column_names,
247            is_uniform,
248        })
249    }
250
251    /// Fast column access for uniform object arrays
252    #[inline]
253    pub fn get_column_value<'a>(
254        &self,
255        data: &'a Value,
256        row_index: usize,
257        column: &str,
258    ) -> Option<&'a Value> {
259        if !self.is_uniform || row_index >= self.length {
260            return None;
261        }
262
263        get_array_element_by_pointer(data, &self.pointer, row_index)?
264            .as_object()?
265            .get(column)
266    }
267
268    /// Fast bounds checking
269    #[inline]
270    pub fn is_valid_index(&self, index: usize) -> bool {
271        index < self.length
272    }
273}