Skip to main content

json_eval_rs/jsoneval/
path_utils.rs

1use serde_json::Value;
2use std::borrow::Cow;
3
4/// Normalize path to JSON pointer format for efficient native access
5///
6/// Handles various input formats:
7/// - JSON Schema refs: #/$params/constants/DEATH_SA -> /$params/constants/DEATH_SA
8/// - Dotted paths: user.name -> /user/name
9/// - Already normalized paths (no-op)
10/// - Simple field names: field -> /field
11///
12/// Returns `Cow::Borrowed` for already-normalized paths to avoid heap allocation.
13#[inline]
14pub fn normalize_to_json_pointer(path: &str) -> Cow<'_, str> {
15    if path.is_empty() {
16        return Cow::Borrowed("");
17    }
18
19    if path.starts_with("#/") {
20        let stripped = &path[1..];
21        if !stripped.contains("//") {
22            return Cow::Borrowed(stripped);
23        }
24    }
25
26    if path.starts_with('/') && !path.contains("//") {
27        return if path == "/" {
28            Cow::Borrowed("")
29        } else {
30            Cow::Borrowed(path)
31        };
32    }
33
34    let mut normalized = String::with_capacity(path.len() + 1);
35    let source = if path.starts_with("#/") {
36        &path[1..]
37    } else if !path.starts_with('/') {
38        normalized.push('/');
39        path
40    } else {
41        path
42    };
43
44    let mut prev_slash = normalized.ends_with('/');
45    for ch in source.chars() {
46        let c = if ch == '.' && !path.starts_with('/') && !path.starts_with('#') {
47            '/'
48        } else {
49            ch
50        };
51        if c == '/' {
52            if !prev_slash {
53                normalized.push('/');
54            }
55            prev_slash = true;
56        } else {
57            normalized.push(c);
58            prev_slash = false;
59        }
60    }
61
62    if normalized == "/" {
63        Cow::Borrowed("")
64    } else {
65        Cow::Owned(normalized)
66    }
67}
68
69/// Convert dotted path to JSON Schema pointer format
70///
71/// This is used for schema paths where properties are nested under `/properties/`
72///
73/// Examples:
74/// - "illustration.insured.name" -> "#/illustration/properties/insured/properties/name"
75/// - "header.form_number" -> "#/header/properties/form_number"
76/// - "#/already/formatted" -> "#/already/formatted" (no change)
77#[inline]
78pub fn dot_notation_to_schema_pointer(path: &str) -> String {
79    // If already a JSON pointer (starts with # or /), return as-is
80    if path.starts_with('#') || path.starts_with('/') {
81        return path.to_string();
82    }
83
84    // Split by dots and join with /properties/
85    let parts: Vec<&str> = path.split('.').collect();
86    if parts.is_empty() {
87        return "#/".to_string();
88    }
89
90    // Build schema path: #/part1/properties/part2/properties/part3
91    // First part is root-level field, rest are under /properties/
92    // Don't add /properties/ if path starts with $ (direct JSON pointer)
93    let mut result = String::from("#");
94    for (i, part) in parts.iter().enumerate() {
95        if part.eq(&"properties") {
96            continue;
97        }
98
99        if i > 0 && !path.starts_with('$') {
100            result.push_str("/properties");
101        }
102        result.push_str("/");
103        result.push_str(part);
104    }
105
106    result
107}
108
109/// Convert JSON pointer or schema pointer to dotted notation
110///
111/// This converts various pointer formats back to dotted notation:
112///
113/// Examples:
114/// - "#/illustration/properties/insured/properties/ins_corrname" -> "illustration.properties.insured.properties.ins_corrname"
115/// - "/user/name" -> "user.name"
116/// - "person.name" -> "person.name" (already dotted, no change)
117#[inline]
118pub fn pointer_to_dot_notation(path: &str) -> String {
119    if path.is_empty() {
120        return String::new();
121    }
122
123    // If already dotted notation (no # or / prefix), return as-is
124    if !path.starts_with('#') && !path.starts_with('/') {
125        return path.to_string();
126    }
127
128    // Remove leading # or /
129    let clean_path = if path.starts_with("#/") {
130        &path[2..]
131    } else if path.starts_with('/') {
132        &path[1..]
133    } else if path.starts_with('#') {
134        &path[1..]
135    } else {
136        path
137    };
138
139    // Convert slashes to dots
140    clean_path.replace('/', ".")
141}
142
143/// Canonicalize a path for schema lookups.
144///
145/// This performs a single-pass conversion that:
146/// 1. Normalizes the path to a JSON pointer (starts with /).
147/// 2. Injects `/properties/` segments for data paths (e.g., `a.b.c` -> `/a/properties/b/properties/c`).
148/// 3. Preserves system paths starting with `$` (e.g., `/$params` -> `/$params`).
149/// 4. Handles existing JSON pointers/schema refs by re-canonicalizing them.
150///
151/// Returns `Cow::Borrowed` if the path is already canonical.
152pub fn canonicalize_schema_path(path: &str) -> Cow<'_, str> {
153    if path.is_empty() {
154        return Cow::Borrowed("");
155    }
156
157    // Fast check for already normalized system paths
158    if path.starts_with("/$") && !path.contains('.') && !path.contains("//") {
159        return Cow::Borrowed(path);
160    }
161
162    // Identify system paths early
163    let is_system = path.starts_with('$') || path.starts_with("/$") || path.starts_with("#/$");
164
165    // Clean prefix and detect if we need to do work
166    let clean_path = if path.starts_with("#/") {
167        &path[2..]
168    } else if path.starts_with('/') {
169        &path[1..]
170    } else if path.starts_with('#') {
171        &path[1..]
172    } else {
173        path
174    };
175
176    // If it's a simple top-level field with no dots/slashes, and not system,
177    // we can just prepend / and return borrowed if it was already /field
178    if !is_system
179        && !clean_path.contains('.')
180        && !clean_path.contains('/')
181        && !clean_path.is_empty()
182    {
183        if path.starts_with('/') && path.len() == clean_path.len() + 1 {
184            return Cow::Borrowed(path);
185        }
186        let mut s = String::with_capacity(clean_path.len() + 1);
187        s.push('/');
188        s.push_str(clean_path);
189        return Cow::Owned(s);
190    }
191
192    // Full decomposition and reconstruction
193    let mut result = String::with_capacity(path.len() * 2);
194    result.push('/');
195
196    let parts = clean_path.split(|c| c == '/' || c == '.');
197    let mut first = true;
198
199    for part in parts {
200        if part.is_empty() || part == "properties" {
201            continue;
202        }
203
204        if !first && !is_system {
205            result.push_str("properties/");
206        }
207        result.push_str(part);
208        result.push('/');
209        first = false;
210    }
211
212    if result.len() > 1 {
213        result.pop(); // Remove trailing slash
214    }
215
216    // If result matches original exactly, return borrowed
217    if result == path {
218        Cow::Borrowed(path)
219    } else {
220        Cow::Owned(result)
221    }
222}
223
224/// Fast JSON pointer-based value access using serde's native implementation
225///
226/// This is significantly faster than manual path traversal for deeply nested objects
227#[inline]
228pub fn get_value_by_pointer<'a>(data: &'a Value, pointer: &str) -> Option<&'a Value> {
229    if pointer.is_empty() {
230        Some(data)
231    } else {
232        data.pointer(pointer)
233    }
234}
235
236#[inline]
237pub fn get_value_by_pointer_without_properties<'a>(
238    data: &'a Value,
239    pointer: &str,
240) -> Option<&'a Value> {
241    if pointer.is_empty() {
242        Some(data)
243    } else {
244        data.pointer(&pointer.replace("properties/", ""))
245    }
246}
247
248/// Batch pointer resolution for multiple paths
249pub fn get_values_by_pointers<'a>(data: &'a Value, pointers: &[String]) -> Vec<Option<&'a Value>> {
250    pointers
251        .iter()
252        .map(|pointer| get_value_by_pointer(data, pointer))
253        .collect()
254}
255
256/// Fast array indexing helper for JSON arrays
257///
258/// Returns None if not an array or index out of bounds
259#[inline]
260pub fn get_array_element<'a>(data: &'a Value, index: usize) -> Option<&'a Value> {
261    data.as_array()?.get(index)
262}
263
264/// Fast array indexing with JSON pointer path
265///
266/// Example: get_array_element_by_pointer(data, "/$params/tables", 0)
267#[inline]
268pub fn get_array_element_by_pointer<'a>(
269    data: &'a Value,
270    pointer: &str,
271    index: usize,
272) -> Option<&'a Value> {
273    get_value_by_pointer(data, pointer)?.as_array()?.get(index)
274}
275
276/// Extract table metadata for fast array operations during schema parsing
277#[derive(Debug, Clone)]
278pub struct ArrayMetadata {
279    /// Pointer to the array location
280    pub pointer: String,
281    /// Array length (cached for fast bounds checking)
282    pub length: usize,
283    /// Column names for object arrays (cached for fast field access)
284    pub column_names: Vec<String>,
285    /// Whether this is a uniform object array (all elements have same structure)
286    pub is_uniform: bool,
287}
288
289impl ArrayMetadata {
290    /// Build metadata for an array at the given pointer
291    pub fn build(data: &Value, pointer: &str) -> Option<Self> {
292        let array = get_value_by_pointer(data, pointer)?.as_array()?;
293
294        let length = array.len();
295        if length == 0 {
296            return Some(ArrayMetadata {
297                pointer: pointer.to_string(),
298                length: 0,
299                column_names: Vec::new(),
300                is_uniform: true,
301            });
302        }
303
304        // Analyze first element to determine structure
305        let first_element = &array[0];
306        let column_names = if let Value::Object(obj) = first_element {
307            obj.keys().cloned().collect()
308        } else {
309            Vec::new()
310        };
311
312        // Check if all elements have the same structure (uniform array)
313        let is_uniform = if !column_names.is_empty() {
314            array.iter().all(|elem| {
315                if let Value::Object(obj) = elem {
316                    obj.keys().len() == column_names.len()
317                        && column_names.iter().all(|col| obj.contains_key(col))
318                } else {
319                    false
320                }
321            })
322        } else {
323            // Non-object arrays are considered uniform if all elements have same type
324            let first_type = std::mem::discriminant(first_element);
325            array
326                .iter()
327                .all(|elem| std::mem::discriminant(elem) == first_type)
328        };
329
330        Some(ArrayMetadata {
331            pointer: pointer.to_string(),
332            length,
333            column_names,
334            is_uniform,
335        })
336    }
337
338    /// Fast column access for uniform object arrays
339    #[inline]
340    pub fn get_column_value<'a>(
341        &self,
342        data: &'a Value,
343        row_index: usize,
344        column: &str,
345    ) -> Option<&'a Value> {
346        if !self.is_uniform || row_index >= self.length {
347            return None;
348        }
349
350        get_array_element_by_pointer(data, &self.pointer, row_index)?
351            .as_object()?
352            .get(column)
353    }
354
355    /// Fast bounds checking
356    #[inline]
357    pub fn is_valid_index(&self, index: usize) -> bool {
358        index < self.length
359    }
360}