json_eval_rs/jsoneval/
path_utils.rs

1//! Path utilities for JSON pointer operations
2//!
3//! This module provides JSON pointer normalization and access functions
4//! for efficient native serde_json operations.
5
6use serde_json::Value;
7
8/// Normalize path to JSON pointer format for efficient native access
9///
10/// Handles various input formats:
11/// - JSON Schema refs: #/$params/constants/DEATH_SA -> /$params/constants/DEATH_SA
12/// - Dotted paths: user.name -> /user/name
13/// - Already normalized paths (no-op)
14/// - Simple field names: field -> /field
15#[inline]
16pub fn normalize_to_json_pointer(path: &str) -> String {
17    if path.is_empty() {
18        return "".to_string();
19    }
20
21    let mut normalized = path.to_string();
22
23    // Handle JSON Schema reference format
24    if normalized.starts_with("#/") {
25        normalized = normalized[1..].to_string(); // Keep leading /
26    } else if !normalized.starts_with('/') {
27        // Handle dotted notation: user.name -> /user/name
28        if normalized.contains('.') {
29            normalized = format!("/{}", normalized.replace('.', "/"));
30        } else {
31            // Simple field name: field -> /field
32            normalized = format!("/{}", normalized);
33        }
34    }
35
36    // Clean up double slashes
37    while normalized.contains("//") {
38        normalized = normalized.replace("//", "/");
39    }
40
41    // Return valid JSON pointer
42    if normalized == "/" {
43        "".to_string() // Root reference
44    } else {
45        normalized
46    }
47}
48
49/// Convert dotted path to JSON Schema pointer format
50///
51/// This is used for schema paths where properties are nested under `/properties/`
52///
53/// Examples:
54/// - "illustration.insured.name" -> "#/illustration/properties/insured/properties/name"
55/// - "header.form_number" -> "#/header/properties/form_number"
56/// - "#/already/formatted" -> "#/already/formatted" (no change)
57#[inline]
58pub fn dot_notation_to_schema_pointer(path: &str) -> String {
59    // If already a JSON pointer (starts with # or /), return as-is
60    if path.starts_with('#') || path.starts_with('/') {
61        return path.to_string();
62    }
63
64    // Split by dots and join with /properties/
65    let parts: Vec<&str> = path.split('.').collect();
66    if parts.is_empty() {
67        return "#/".to_string();
68    }
69
70    // Build schema path: #/part1/properties/part2/properties/part3
71    // First part is root-level field, rest are under /properties/
72    // Don't add /properties/ if path starts with $ (direct JSON pointer)
73    let mut result = String::from("#");
74    for (i, part) in parts.iter().enumerate() {
75        if part.eq(&"properties") {
76            continue;
77        }
78
79        if i > 0 && !path.starts_with('$') {
80            result.push_str("/properties");
81        }
82        result.push_str("/");
83        result.push_str(part);
84    }
85
86    result
87}
88
89/// Convert JSON pointer or schema pointer to dotted notation
90///
91/// This converts various pointer formats back to dotted notation:
92///
93/// Examples:
94/// - "#/illustration/properties/insured/properties/ins_corrname" -> "illustration.properties.insured.properties.ins_corrname"
95/// - "/user/name" -> "user.name"
96/// - "person.name" -> "person.name" (already dotted, no change)
97#[inline]
98pub fn pointer_to_dot_notation(path: &str) -> String {
99    if path.is_empty() {
100        return String::new();
101    }
102
103    // If already dotted notation (no # or / prefix), return as-is
104    if !path.starts_with('#') && !path.starts_with('/') {
105        return path.to_string();
106    }
107
108    // Remove leading # or /
109    let clean_path = if path.starts_with("#/") {
110        &path[2..]
111    } else if path.starts_with('/') {
112        &path[1..]
113    } else if path.starts_with('#') {
114        &path[1..]
115    } else {
116        path
117    };
118
119    // Convert slashes to dots
120    clean_path.replace('/', ".")
121}
122
123/// Fast JSON pointer-based value access using serde's native implementation
124///
125/// This is significantly faster than manual path traversal for deeply nested objects
126#[inline]
127pub fn get_value_by_pointer<'a>(data: &'a Value, pointer: &str) -> Option<&'a Value> {
128    if pointer.is_empty() {
129        Some(data)
130    } else {
131        data.pointer(pointer)
132    }
133}
134
135#[inline]
136pub fn get_value_by_pointer_without_properties<'a>(
137    data: &'a Value,
138    pointer: &str,
139) -> Option<&'a Value> {
140    if pointer.is_empty() {
141        Some(data)
142    } else {
143        data.pointer(&pointer.replace("properties/", ""))
144    }
145}
146
147/// Batch pointer resolution for multiple paths
148pub fn get_values_by_pointers<'a>(data: &'a Value, pointers: &[String]) -> Vec<Option<&'a Value>> {
149    pointers
150        .iter()
151        .map(|pointer| get_value_by_pointer(data, pointer))
152        .collect()
153}
154
155/// Fast array indexing helper for JSON arrays
156///
157/// Returns None if not an array or index out of bounds
158#[inline]
159pub fn get_array_element<'a>(data: &'a Value, index: usize) -> Option<&'a Value> {
160    data.as_array()?.get(index)
161}
162
163/// Fast array indexing with JSON pointer path
164///
165/// Example: get_array_element_by_pointer(data, "/$params/tables", 0)
166#[inline]
167pub fn get_array_element_by_pointer<'a>(
168    data: &'a Value,
169    pointer: &str,
170    index: usize,
171) -> Option<&'a Value> {
172    get_value_by_pointer(data, pointer)?.as_array()?.get(index)
173}
174
175/// Extract table metadata for fast array operations during schema parsing
176#[derive(Debug, Clone)]
177pub struct ArrayMetadata {
178    /// Pointer to the array location
179    pub pointer: String,
180    /// Array length (cached for fast bounds checking)
181    pub length: usize,
182    /// Column names for object arrays (cached for fast field access)
183    pub column_names: Vec<String>,
184    /// Whether this is a uniform object array (all elements have same structure)
185    pub is_uniform: bool,
186}
187
188impl ArrayMetadata {
189    /// Build metadata for an array at the given pointer
190    pub fn build(data: &Value, pointer: &str) -> Option<Self> {
191        let array = get_value_by_pointer(data, pointer)?.as_array()?;
192
193        let length = array.len();
194        if length == 0 {
195            return Some(ArrayMetadata {
196                pointer: pointer.to_string(),
197                length: 0,
198                column_names: Vec::new(),
199                is_uniform: true,
200            });
201        }
202
203        // Analyze first element to determine structure
204        let first_element = &array[0];
205        let column_names = if let Value::Object(obj) = first_element {
206            obj.keys().cloned().collect()
207        } else {
208            Vec::new()
209        };
210
211        // Check if all elements have the same structure (uniform array)
212        let is_uniform = if !column_names.is_empty() {
213            array.iter().all(|elem| {
214                if let Value::Object(obj) = elem {
215                    obj.keys().len() == column_names.len()
216                        && column_names.iter().all(|col| obj.contains_key(col))
217                } else {
218                    false
219                }
220            })
221        } else {
222            // Non-object arrays are considered uniform if all elements have same type
223            let first_type = std::mem::discriminant(first_element);
224            array
225                .iter()
226                .all(|elem| std::mem::discriminant(elem) == first_type)
227        };
228
229        Some(ArrayMetadata {
230            pointer: pointer.to_string(),
231            length,
232            column_names,
233            is_uniform,
234        })
235    }
236
237    /// Fast column access for uniform object arrays
238    #[inline]
239    pub fn get_column_value<'a>(
240        &self,
241        data: &'a Value,
242        row_index: usize,
243        column: &str,
244    ) -> Option<&'a Value> {
245        if !self.is_uniform || row_index >= self.length {
246            return None;
247        }
248
249        get_array_element_by_pointer(data, &self.pointer, row_index)?
250            .as_object()?
251            .get(column)
252    }
253
254    /// Fast bounds checking
255    #[inline]
256    pub fn is_valid_index(&self, index: usize) -> bool {
257        index < self.length
258    }
259}