json_eval_rs/jsoneval/
path_utils.rs

1//! Path utilities for JSON pointer operations
2//!
3//! This module provides JSON pointer normalization and access functions
4//! for efficient native serde_json operations.
5
6use serde_json::Value;
7
8/// Normalize path to JSON pointer format for efficient native access
9///
10/// Handles various input formats:
11/// - JSON Schema refs: #/$params/constants/DEATH_SA -> /$params/constants/DEATH_SA
12/// - Dotted paths: user.name -> /user/name
13/// - Already normalized paths (no-op)
14/// - Simple field names: field -> /field
15#[inline]
16pub fn normalize_to_json_pointer(path: &str) -> String {
17    if path.is_empty() {
18        return "".to_string();
19    }
20
21    let mut normalized = path.to_string();
22
23    // Handle JSON Schema reference format
24    if normalized.starts_with("#/") {
25        normalized = normalized[1..].to_string(); // Keep leading /
26    } else if !normalized.starts_with('/') {
27        // Handle dotted notation: user.name -> /user/name
28        if normalized.contains('.') {
29            normalized = format!("/{}", normalized.replace('.', "/"));
30        } else {
31            // Simple field name: field -> /field
32            normalized = format!("/{}", normalized);
33        }
34    }
35
36    // Clean up double slashes
37    while normalized.contains("//") {
38        normalized = normalized.replace("//", "/");
39    }
40
41    // Return valid JSON pointer
42    if normalized == "/" {
43        "".to_string() // Root reference
44    } else {
45        normalized
46    }
47}
48
49/// Convert dotted path to JSON Schema pointer format
50///
51/// This is used for schema paths where properties are nested under `/properties/`
52///
53/// Examples:
54/// - "illustration.insured.name" -> "#/illustration/properties/insured/properties/name"
55/// - "header.form_number" -> "#/header/properties/form_number"
56/// - "#/already/formatted" -> "#/already/formatted" (no change)
57#[inline]
58pub fn dot_notation_to_schema_pointer(path: &str) -> String {
59    // If already a JSON pointer (starts with # or /), return as-is
60    if path.starts_with('#') || path.starts_with('/') {
61        return path.to_string();
62    }
63
64    // Split by dots and join with /properties/
65    let parts: Vec<&str> = path.split('.').collect();
66    if parts.is_empty() {
67        return "#/".to_string();
68    }
69
70    // Build schema path: #/part1/properties/part2/properties/part3
71    // First part is root-level field, rest are under /properties/
72    // Don't add /properties/ if path starts with $ (direct JSON pointer)
73    let mut result = String::from("#/");
74    for (i, part) in parts.iter().enumerate() {
75        if part.eq(&"properties") {
76            continue;
77        }
78
79        if i > 0 && !path.starts_with('$') {
80            result.push_str("/properties/");
81        }
82        result.push_str(part);
83    }
84
85    result
86}
87
88/// Convert JSON pointer or schema pointer to dotted notation
89///
90/// This converts various pointer formats back to dotted notation:
91///
92/// Examples:
93/// - "#/illustration/properties/insured/properties/ins_corrname" -> "illustration.properties.insured.properties.ins_corrname"
94/// - "/user/name" -> "user.name"
95/// - "person.name" -> "person.name" (already dotted, no change)
96#[inline]
97pub fn pointer_to_dot_notation(path: &str) -> String {
98    if path.is_empty() {
99        return String::new();
100    }
101
102    // If already dotted notation (no # or / prefix), return as-is
103    if !path.starts_with('#') && !path.starts_with('/') {
104        return path.to_string();
105    }
106
107    // Remove leading # or /
108    let clean_path = if path.starts_with("#/") {
109        &path[2..]
110    } else if path.starts_with('/') {
111        &path[1..]
112    } else if path.starts_with('#') {
113        &path[1..]
114    } else {
115        path
116    };
117
118    // Convert slashes to dots
119    clean_path.replace('/', ".")
120}
121
122/// Fast JSON pointer-based value access using serde's native implementation
123///
124/// This is significantly faster than manual path traversal for deeply nested objects
125#[inline]
126pub fn get_value_by_pointer<'a>(data: &'a Value, pointer: &str) -> Option<&'a Value> {
127    if pointer.is_empty() {
128        Some(data)
129    } else {
130        data.pointer(pointer)
131    }
132}
133
134#[inline]
135pub fn get_value_by_pointer_without_properties<'a>(
136    data: &'a Value,
137    pointer: &str,
138) -> Option<&'a Value> {
139    if pointer.is_empty() {
140        Some(data)
141    } else {
142        data.pointer(&pointer.replace("properties/", ""))
143    }
144}
145
146/// Batch pointer resolution for multiple paths
147pub fn get_values_by_pointers<'a>(data: &'a Value, pointers: &[String]) -> Vec<Option<&'a Value>> {
148    pointers
149        .iter()
150        .map(|pointer| get_value_by_pointer(data, pointer))
151        .collect()
152}
153
154/// Fast array indexing helper for JSON arrays
155///
156/// Returns None if not an array or index out of bounds
157#[inline]
158pub fn get_array_element<'a>(data: &'a Value, index: usize) -> Option<&'a Value> {
159    data.as_array()?.get(index)
160}
161
162/// Fast array indexing with JSON pointer path
163///
164/// Example: get_array_element_by_pointer(data, "/$params/tables", 0)
165#[inline]
166pub fn get_array_element_by_pointer<'a>(
167    data: &'a Value,
168    pointer: &str,
169    index: usize,
170) -> Option<&'a Value> {
171    get_value_by_pointer(data, pointer)?.as_array()?.get(index)
172}
173
174/// Extract table metadata for fast array operations during schema parsing
175#[derive(Debug, Clone)]
176pub struct ArrayMetadata {
177    /// Pointer to the array location
178    pub pointer: String,
179    /// Array length (cached for fast bounds checking)
180    pub length: usize,
181    /// Column names for object arrays (cached for fast field access)
182    pub column_names: Vec<String>,
183    /// Whether this is a uniform object array (all elements have same structure)
184    pub is_uniform: bool,
185}
186
187impl ArrayMetadata {
188    /// Build metadata for an array at the given pointer
189    pub fn build(data: &Value, pointer: &str) -> Option<Self> {
190        let array = get_value_by_pointer(data, pointer)?.as_array()?;
191
192        let length = array.len();
193        if length == 0 {
194            return Some(ArrayMetadata {
195                pointer: pointer.to_string(),
196                length: 0,
197                column_names: Vec::new(),
198                is_uniform: true,
199            });
200        }
201
202        // Analyze first element to determine structure
203        let first_element = &array[0];
204        let column_names = if let Value::Object(obj) = first_element {
205            obj.keys().cloned().collect()
206        } else {
207            Vec::new()
208        };
209
210        // Check if all elements have the same structure (uniform array)
211        let is_uniform = if !column_names.is_empty() {
212            array.iter().all(|elem| {
213                if let Value::Object(obj) = elem {
214                    obj.keys().len() == column_names.len()
215                        && column_names.iter().all(|col| obj.contains_key(col))
216                } else {
217                    false
218                }
219            })
220        } else {
221            // Non-object arrays are considered uniform if all elements have same type
222            let first_type = std::mem::discriminant(first_element);
223            array
224                .iter()
225                .all(|elem| std::mem::discriminant(elem) == first_type)
226        };
227
228        Some(ArrayMetadata {
229            pointer: pointer.to_string(),
230            length,
231            column_names,
232            is_uniform,
233        })
234    }
235
236    /// Fast column access for uniform object arrays
237    #[inline]
238    pub fn get_column_value<'a>(
239        &self,
240        data: &'a Value,
241        row_index: usize,
242        column: &str,
243    ) -> Option<&'a Value> {
244        if !self.is_uniform || row_index >= self.length {
245            return None;
246        }
247
248        get_array_element_by_pointer(data, &self.pointer, row_index)?
249            .as_object()?
250            .get(column)
251    }
252
253    /// Fast bounds checking
254    #[inline]
255    pub fn is_valid_index(&self, index: usize) -> bool {
256        index < self.length
257    }
258}