Skip to main content

json_eval_rs/jsoneval/
path_utils.rs

1use std::borrow::Cow;
2use serde_json::Value;
3
4/// Normalize path to JSON pointer format for efficient native access
5///
6/// Handles various input formats:
7/// - JSON Schema refs: #/$params/constants/DEATH_SA -> /$params/constants/DEATH_SA
8/// - Dotted paths: user.name -> /user/name
9/// - Already normalized paths (no-op)
10/// - Simple field names: field -> /field
11///
12/// Returns `Cow::Borrowed` for already-normalized paths to avoid heap allocation.
13#[inline]
14pub fn normalize_to_json_pointer(path: &str) -> Cow<'_, str> {
15    if path.is_empty() {
16        return Cow::Borrowed("");
17    }
18
19    if path.starts_with("#/") {
20        let stripped = &path[1..];
21        if !stripped.contains("//") {
22            return Cow::Borrowed(stripped);
23        }
24    }
25
26    if path.starts_with('/') && !path.contains("//") {
27        return if path == "/" {
28            Cow::Borrowed("")
29        } else {
30            Cow::Borrowed(path)
31        };
32    }
33
34    let mut normalized = String::with_capacity(path.len() + 1);
35    let source = if path.starts_with("#/") {
36        &path[1..]
37    } else if !path.starts_with('/') {
38        normalized.push('/');
39        path
40    } else {
41        path
42    };
43
44    let mut prev_slash = normalized.ends_with('/');
45    for ch in source.chars() {
46        let c = if ch == '.' && !path.starts_with('/') && !path.starts_with('#') {
47            '/'
48        } else {
49            ch
50        };
51        if c == '/' {
52            if !prev_slash {
53                normalized.push('/');
54            }
55            prev_slash = true;
56        } else {
57            normalized.push(c);
58            prev_slash = false;
59        }
60    }
61
62    if normalized == "/" {
63        Cow::Borrowed("")
64    } else {
65        Cow::Owned(normalized)
66    }
67}
68
69
70/// Convert dotted path to JSON Schema pointer format
71///
72/// This is used for schema paths where properties are nested under `/properties/`
73///
74/// Examples:
75/// - "illustration.insured.name" -> "#/illustration/properties/insured/properties/name"
76/// - "header.form_number" -> "#/header/properties/form_number"
77/// - "#/already/formatted" -> "#/already/formatted" (no change)
78#[inline]
79pub fn dot_notation_to_schema_pointer(path: &str) -> String {
80    // If already a JSON pointer (starts with # or /), return as-is
81    if path.starts_with('#') || path.starts_with('/') {
82        return path.to_string();
83    }
84
85    // Split by dots and join with /properties/
86    let parts: Vec<&str> = path.split('.').collect();
87    if parts.is_empty() {
88        return "#/".to_string();
89    }
90
91    // Build schema path: #/part1/properties/part2/properties/part3
92    // First part is root-level field, rest are under /properties/
93    // Don't add /properties/ if path starts with $ (direct JSON pointer)
94    let mut result = String::from("#");
95    for (i, part) in parts.iter().enumerate() {
96        if part.eq(&"properties") {
97            continue;
98        }
99
100        if i > 0 && !path.starts_with('$') {
101            result.push_str("/properties");
102        }
103        result.push_str("/");
104        result.push_str(part);
105    }
106
107    result
108}
109
110/// Convert JSON pointer or schema pointer to dotted notation
111///
112/// This converts various pointer formats back to dotted notation:
113///
114/// Examples:
115/// - "#/illustration/properties/insured/properties/ins_corrname" -> "illustration.properties.insured.properties.ins_corrname"
116/// - "/user/name" -> "user.name"
117/// - "person.name" -> "person.name" (already dotted, no change)
118#[inline]
119pub fn pointer_to_dot_notation(path: &str) -> String {
120    if path.is_empty() {
121        return String::new();
122    }
123
124    // If already dotted notation (no # or / prefix), return as-is
125    if !path.starts_with('#') && !path.starts_with('/') {
126        return path.to_string();
127    }
128
129    // Remove leading # or /
130    let clean_path = if path.starts_with("#/") {
131        &path[2..]
132    } else if path.starts_with('/') {
133        &path[1..]
134    } else if path.starts_with('#') {
135        &path[1..]
136    } else {
137        path
138    };
139
140    // Convert slashes to dots
141    clean_path.replace('/', ".")
142}
143
144/// Canonicalize a path for schema lookups.
145///
146/// This performs a single-pass conversion that:
147/// 1. Normalizes the path to a JSON pointer (starts with /).
148/// 2. Injects `/properties/` segments for data paths (e.g., `a.b.c` -> `/a/properties/b/properties/c`).
149/// 3. Preserves system paths starting with `$` (e.g., `/$params` -> `/$params`).
150/// 4. Handles existing JSON pointers/schema refs by re-canonicalizing them.
151///
152/// Returns `Cow::Borrowed` if the path is already canonical.
153pub fn canonicalize_schema_path(path: &str) -> Cow<'_, str> {
154    if path.is_empty() {
155        return Cow::Borrowed("");
156    }
157
158    // Fast check for already normalized system paths
159    if path.starts_with("/$") && !path.contains('.') && !path.contains("//") {
160        return Cow::Borrowed(path);
161    }
162
163    // Identify system paths early
164    let is_system = path.starts_with('$') || path.starts_with("/$") || path.starts_with("#/$");
165
166    // Clean prefix and detect if we need to do work
167    let clean_path = if path.starts_with("#/") {
168        &path[2..]
169    } else if path.starts_with('/') {
170        &path[1..]
171    } else if path.starts_with('#') {
172        &path[1..]
173    } else {
174        path
175    };
176
177    // If it's a simple top-level field with no dots/slashes, and not system,
178    // we can just prepend / and return borrowed if it was already /field
179    if !is_system
180        && !clean_path.contains('.')
181        && !clean_path.contains('/')
182        && !clean_path.is_empty()
183    {
184        if path.starts_with('/') && path.len() == clean_path.len() + 1 {
185            return Cow::Borrowed(path);
186        }
187        let mut s = String::with_capacity(clean_path.len() + 1);
188        s.push('/');
189        s.push_str(clean_path);
190        return Cow::Owned(s);
191    }
192
193    // Full decomposition and reconstruction
194    let mut result = String::with_capacity(path.len() * 2);
195    result.push('/');
196
197    let parts = clean_path.split(|c| c == '/' || c == '.');
198    let mut first = true;
199
200    for part in parts {
201        if part.is_empty() || part == "properties" {
202            continue;
203        }
204
205        if !first && !is_system {
206            result.push_str("properties/");
207        }
208        result.push_str(part);
209        result.push('/');
210        first = false;
211    }
212
213    if result.len() > 1 {
214        result.pop(); // Remove trailing slash
215    }
216
217    // If result matches original exactly, return borrowed
218    if result == path {
219        Cow::Borrowed(path)
220    } else {
221        Cow::Owned(result)
222    }
223}
224
225/// Fast JSON pointer-based value access using serde's native implementation
226///
227/// This is significantly faster than manual path traversal for deeply nested objects
228#[inline]
229pub fn get_value_by_pointer<'a>(data: &'a Value, pointer: &str) -> Option<&'a Value> {
230    if pointer.is_empty() {
231        Some(data)
232    } else {
233        data.pointer(pointer)
234    }
235}
236
237#[inline]
238pub fn get_value_by_pointer_without_properties<'a>(
239    data: &'a Value,
240    pointer: &str,
241) -> Option<&'a Value> {
242    if pointer.is_empty() {
243        Some(data)
244    } else {
245        data.pointer(&pointer.replace("properties/", ""))
246    }
247}
248
249/// Batch pointer resolution for multiple paths
250pub fn get_values_by_pointers<'a>(data: &'a Value, pointers: &[String]) -> Vec<Option<&'a Value>> {
251    pointers
252        .iter()
253        .map(|pointer| get_value_by_pointer(data, pointer))
254        .collect()
255}
256
257/// Fast array indexing helper for JSON arrays
258///
259/// Returns None if not an array or index out of bounds
260#[inline]
261pub fn get_array_element<'a>(data: &'a Value, index: usize) -> Option<&'a Value> {
262    data.as_array()?.get(index)
263}
264
265/// Fast array indexing with JSON pointer path
266///
267/// Example: get_array_element_by_pointer(data, "/$params/tables", 0)
268#[inline]
269pub fn get_array_element_by_pointer<'a>(
270    data: &'a Value,
271    pointer: &str,
272    index: usize,
273) -> Option<&'a Value> {
274    get_value_by_pointer(data, pointer)?.as_array()?.get(index)
275}
276
277/// Extract table metadata for fast array operations during schema parsing
278#[derive(Debug, Clone)]
279pub struct ArrayMetadata {
280    /// Pointer to the array location
281    pub pointer: String,
282    /// Array length (cached for fast bounds checking)
283    pub length: usize,
284    /// Column names for object arrays (cached for fast field access)
285    pub column_names: Vec<String>,
286    /// Whether this is a uniform object array (all elements have same structure)
287    pub is_uniform: bool,
288}
289
290impl ArrayMetadata {
291    /// Build metadata for an array at the given pointer
292    pub fn build(data: &Value, pointer: &str) -> Option<Self> {
293        let array = get_value_by_pointer(data, pointer)?.as_array()?;
294
295        let length = array.len();
296        if length == 0 {
297            return Some(ArrayMetadata {
298                pointer: pointer.to_string(),
299                length: 0,
300                column_names: Vec::new(),
301                is_uniform: true,
302            });
303        }
304
305        // Analyze first element to determine structure
306        let first_element = &array[0];
307        let column_names = if let Value::Object(obj) = first_element {
308            obj.keys().cloned().collect()
309        } else {
310            Vec::new()
311        };
312
313        // Check if all elements have the same structure (uniform array)
314        let is_uniform = if !column_names.is_empty() {
315            array.iter().all(|elem| {
316                if let Value::Object(obj) = elem {
317                    obj.keys().len() == column_names.len()
318                        && column_names.iter().all(|col| obj.contains_key(col))
319                } else {
320                    false
321                }
322            })
323        } else {
324            // Non-object arrays are considered uniform if all elements have same type
325            let first_type = std::mem::discriminant(first_element);
326            array
327                .iter()
328                .all(|elem| std::mem::discriminant(elem) == first_type)
329        };
330
331        Some(ArrayMetadata {
332            pointer: pointer.to_string(),
333            length,
334            column_names,
335            is_uniform,
336        })
337    }
338
339    /// Fast column access for uniform object arrays
340    #[inline]
341    pub fn get_column_value<'a>(
342        &self,
343        data: &'a Value,
344        row_index: usize,
345        column: &str,
346    ) -> Option<&'a Value> {
347        if !self.is_uniform || row_index >= self.length {
348            return None;
349        }
350
351        get_array_element_by_pointer(data, &self.pointer, row_index)?
352            .as_object()?
353            .get(column)
354    }
355
356    /// Fast bounds checking
357    #[inline]
358    pub fn is_valid_index(&self, index: usize) -> bool {
359        index < self.length
360    }
361}