Skip to main content

ai_lib_rust/utils/
json_path.rs

1//! JSONPath evaluator and path mapper for frame selection and field extraction
2//!
3//! Inspired by ai-lib's PathMapper implementation, with support for:
4//! - Nested path access (e.g., "a.b.c")
5//! - Array indexing (e.g., "choices[0].delta.content")
6//! - Condition evaluation (exists, ==, !=, in, &&, ||, >, <, >=, <=)
7//! - Regular expression matching
8
9use serde_json::{json, Value};
10use std::collections::HashMap;
11
12/// Path mapper error
13#[derive(Debug, thiserror::Error)]
14pub enum PathMapperError {
15    #[error("Invalid path: {0}")]
16    InvalidPath(String),
17
18    #[error("Cannot set value at path: {0}")]
19    CannotSetValue(String),
20}
21
22/// Path mapper for extracting and setting values in JSON using dot-notation paths
23pub struct PathMapper;
24
25impl PathMapper {
26    /// Get value from JSON using dot-notation path (supports array indexing)
27    ///
28    /// Examples:
29    /// - "choices[0].delta.content"
30    /// - "input.temperature"
31    /// - "delta.text"
32    pub fn get_path<'a>(obj: &'a Value, path: &str) -> Option<&'a Value> {
33        if path.is_empty() {
34            return None;
35        }
36
37        // Remove leading "$." if present (JSONPath style)
38        let normalized = path.trim().trim_start_matches("$.").to_string();
39        let parts: Vec<&str> = normalized.split('.').collect();
40        let mut current = obj;
41
42        for part in parts {
43            if part.is_empty() {
44                return None;
45            }
46
47            // Check if part contains array index, e.g., "choices[0]"
48            if let Some(bracket_pos) = part.find('[') {
49                // Extract key and index
50                let key = &part[..bracket_pos];
51                let idx_str = part[bracket_pos + 1..].trim_end_matches(']');
52
53                // First access the object key
54                if !key.is_empty() {
55                    match current {
56                        Value::Object(map) => {
57                            current = map.get(key)?;
58                        }
59                        _ => return None,
60                    }
61                }
62
63                // Then access the array index
64                if let Ok(idx) = idx_str.parse::<usize>() {
65                    match current {
66                        Value::Array(arr) => {
67                            current = arr.get(idx)?;
68                        }
69                        _ => return None,
70                    }
71                } else if idx_str == "*" {
72                    // Wildcard: get first element
73                    match current {
74                        Value::Array(arr) => {
75                            current = arr.first()?;
76                        }
77                        _ => return None,
78                    }
79                } else {
80                    return None;
81                }
82            } else {
83                // Simple key access OR dot-index access (e.g. "choices.0.delta")
84                match current {
85                    Value::Object(map) => {
86                        current = map.get(part)?;
87                    }
88                    Value::Array(arr) => {
89                        // Support "0" / "1" style index segments (common in some JSONPath variants)
90                        if let Ok(idx) = part.parse::<usize>() {
91                            current = arr.get(idx)?;
92                        } else if part == "*" {
93                            current = arr.first()?;
94                        } else {
95                            return None;
96                        }
97                    }
98                    _ => return None,
99                }
100            }
101        }
102
103        Some(current)
104    }
105
106    /// Get string value from path (converts number to string if needed)
107    pub fn get_string(obj: &Value, path: &str) -> Option<String> {
108        Self::get_path(obj, path).and_then(|v| {
109            if v.is_string() {
110                v.as_str().map(|s| s.to_string())
111            } else {
112                serde_json::to_string(v).ok()
113            }
114        })
115    }
116
117    /// Set value at nested path in JSON object
118    ///
119    /// Examples:
120    /// - "input.temperature" -> sets obj["input"]["temperature"]
121    /// - "generationConfig.maxOutputTokens" -> sets obj["generationConfig"]["maxOutputTokens"]
122    pub fn set_path(obj: &mut Value, path: &str, value: Value) -> Result<(), PathMapperError> {
123        if path.is_empty() {
124            return Err(PathMapperError::InvalidPath("Empty path".to_string()));
125        }
126
127        // Remove leading "$." if present
128        let normalized = path.trim().trim_start_matches("$.").to_string();
129        let parts: Vec<&str> = normalized.split('.').collect();
130
131        if parts.is_empty() {
132            return Err(PathMapperError::InvalidPath("Empty path parts".to_string()));
133        }
134
135        // Ensure root object is Object
136        if !obj.is_object() {
137            *obj = json!({});
138        }
139
140        let mut current = obj
141            .as_object_mut()
142            .ok_or_else(|| PathMapperError::CannotSetValue("Root is not an object".to_string()))?;
143
144        // Process all but the last path segment
145        for (idx, part) in parts.iter().enumerate().take(parts.len() - 1) {
146            if part.is_empty() {
147                return Err(PathMapperError::InvalidPath(format!(
148                    "Empty path part at index {}",
149                    idx
150                )));
151            }
152
153            // If path doesn't exist or is not an object, create new object
154            if !current.contains_key(*part) || !current[*part].is_object() {
155                current.insert(part.to_string(), json!({}));
156            }
157
158            // Move to next level
159            current = current[*part].as_object_mut().ok_or_else(|| {
160                PathMapperError::CannotSetValue(format!("Cannot access object at path: {}", part))
161            })?;
162        }
163
164        // Set the last path segment's value
165        let last_part = parts
166            .last()
167            .ok_or_else(|| PathMapperError::InvalidPath("No last part".to_string()))?;
168
169        if last_part.is_empty() {
170            return Err(PathMapperError::InvalidPath(
171                "Last path part is empty".to_string(),
172            ));
173        }
174
175        current.insert(last_part.to_string(), value);
176        Ok(())
177    }
178
179    /// Batch set multiple paths
180    pub fn set_paths(
181        obj: &mut Value,
182        paths: &HashMap<String, Value>,
183    ) -> Result<(), PathMapperError> {
184        for (path, value) in paths {
185            Self::set_path(obj, path, value.clone())?;
186        }
187        Ok(())
188    }
189}
190
191/// JSONPath evaluator for condition matching
192/// Supports: exists, ==, !=, in, &&, ||, >, <, >=, <=, regex
193#[derive(Clone)]
194pub struct JsonPathEvaluator {
195    expression: String,
196}
197
198impl JsonPathEvaluator {
199    pub fn new(expression: &str) -> Result<Self, String> {
200        if expression.is_empty() {
201            return Err("Empty expression".to_string());
202        }
203        Ok(Self {
204            expression: expression.to_string(),
205        })
206    }
207
208    /// Check if expression matches the JSON value
209    ///
210    /// Supports:
211    /// - exists($.path) - check if path exists
212    /// - $.path == "value" - equality check
213    /// - $.path != "value" - inequality check
214    /// - $.path in ['value1', 'value2'] - list membership
215    /// - $.path != null / $.path == null - null check
216    /// - $.path > 10 / $.path < 10 - numeric comparison
217    /// - $.path >= 10 / $.path <= 10 - numeric comparison
218    /// - $.path =~ /pattern/ - regex matching
219    /// - && and || for logical combination
220    pub fn matches(&self, value: &Value) -> bool {
221        Self::evaluate_match(&self.expression, value)
222    }
223
224    /// Extract string value from JSON using path
225    pub fn extract_string(&self, value: &Value) -> Option<String> {
226        // If expression is a simple path, extract it
227        if self.expression.starts_with("$.") {
228            return PathMapper::get_string(value, &self.expression);
229        }
230        None
231    }
232
233    /// Evaluate match expression with support for numeric comparisons and regex
234    fn evaluate_match(expr: &str, root: &Value) -> bool {
235        // Split by OR
236        let or_parts: Vec<&str> = expr.split("||").collect();
237        for or_part in or_parts {
238            let mut ok = true;
239            // Split by AND
240            let and_parts: Vec<&str> = or_part.split("&&").collect();
241            for part in and_parts {
242                let cond = part.trim();
243                if cond.is_empty() {
244                    continue;
245                }
246
247                // exists() check
248                if cond.starts_with("exists(") && cond.ends_with(')') {
249                    let path = cond.trim_start_matches("exists(").trim_end_matches(')');
250                    if PathMapper::get_path(root, path).is_none() {
251                        ok = false;
252                        break;
253                    }
254                    continue;
255                }
256
257                // Regex matching: $.path =~ /pattern/
258                if let Some(idx) = cond.find("=~") {
259                    let (path, rest) = cond.split_at(idx);
260                    let path = path.trim();
261                    let pattern_str = rest.trim_start_matches("=~").trim();
262
263                    // Extract pattern from /pattern/ or "pattern"
264                    let pattern = pattern_str
265                        .trim_start_matches('/')
266                        .trim_end_matches('/')
267                        .trim_matches('"')
268                        .trim_matches('\'');
269
270                    if let Some(actual) = PathMapper::get_string(root, path) {
271                        // Simple regex matching (for full implementation, use regex crate)
272                        // For now, support basic wildcard patterns
273                        if !Self::simple_regex_match(&actual, pattern) {
274                            ok = false;
275                            break;
276                        }
277                    } else {
278                        ok = false;
279                        break;
280                    }
281                    continue;
282                }
283
284                // Numeric comparisons: >, <, >=, <=
285                for op in &[">=", "<=", ">", "<"] {
286                    if let Some(idx) = cond.find(op) {
287                        let (path, rest) = cond.split_at(idx);
288                        let path = path.trim();
289                        let target_str = rest
290                            .trim_start_matches(op)
291                            .trim()
292                            .trim_matches('"')
293                            .trim_matches('\'');
294
295                        if let Ok(target_num) = target_str.parse::<f64>() {
296                            if let Some(actual_val) = PathMapper::get_path(root, path) {
297                                let actual_num = actual_val.as_f64().or_else(|| {
298                                    actual_val.as_str().and_then(|s| s.parse::<f64>().ok())
299                                });
300
301                                if let Some(actual) = actual_num {
302                                    let matches = match *op {
303                                        ">" => actual > target_num,
304                                        "<" => actual < target_num,
305                                        ">=" => actual >= target_num,
306                                        "<=" => actual <= target_num,
307                                        _ => false,
308                                    };
309                                    if !matches {
310                                        ok = false;
311                                        break;
312                                    }
313                                    continue;
314                                }
315                            }
316                        }
317                        ok = false;
318                        break;
319                    }
320                }
321
322                // "in" list check
323                if let Some(idx) = cond.find(" in ") {
324                    let (path, rest) = cond.split_at(idx);
325                    let path = path.trim();
326                    let list_str = rest.trim_start_matches(" in ").trim();
327                    let list_str = list_str.trim_start_matches('[').trim_end_matches(']');
328                    let values: Vec<String> = list_str
329                        .split(',')
330                        .filter_map(|v| v.trim().trim_matches('\'').trim_matches('"').parse().ok())
331                        .collect();
332                    let actual = PathMapper::get_string(root, path);
333                    if !actual.map(|a| values.contains(&a)).unwrap_or(false) {
334                        ok = false;
335                        break;
336                    }
337                    continue;
338                }
339
340                // "!= null" check
341                if let Some(idx) = cond.find("!= null") {
342                    let path = cond[..idx].trim();
343                    let val = PathMapper::get_path(root, path);
344                    if val.is_none() || val == Some(&Value::Null) {
345                        ok = false;
346                        break;
347                    }
348                    continue;
349                }
350
351                // "== null" check
352                if let Some(idx) = cond.find("== null") {
353                    let path = cond[..idx].trim();
354                    let val = PathMapper::get_path(root, path);
355                    if val.is_some() && val != Some(&Value::Null) {
356                        ok = false;
357                        break;
358                    }
359                    continue;
360                }
361
362                // "==" equality check
363                if let Some(idx) = cond.find("==") {
364                    let (path, value_part) = cond.split_at(idx);
365                    let path = path.trim();
366                    let target = value_part
367                        .trim_start_matches("==")
368                        .trim()
369                        .trim_matches('\'')
370                        .trim_matches('"');
371                    let actual = PathMapper::get_string(root, path);
372                    if actual.as_deref() != Some(target) {
373                        ok = false;
374                        break;
375                    }
376                    continue;
377                }
378
379                // "!=" inequality check
380                if let Some(idx) = cond.find("!=") {
381                    let (path, value_part) = cond.split_at(idx);
382                    let path = path.trim();
383                    let target = value_part
384                        .trim_start_matches("!=")
385                        .trim()
386                        .trim_matches('\'')
387                        .trim_matches('"');
388                    let actual = PathMapper::get_string(root, path);
389                    if actual.as_deref() == Some(target) {
390                        ok = false;
391                        break;
392                    }
393                    continue;
394                }
395            }
396            if ok {
397                return true;
398            }
399        }
400        false
401    }
402
403    /// Simple regex matching (supports basic wildcards)
404    /// For full regex support, use the `regex` crate
405    fn simple_regex_match(text: &str, pattern: &str) -> bool {
406        // Simple wildcard matching: * matches any sequence, ? matches any character
407        if pattern.contains('*') || pattern.contains('?') {
408            // Basic wildcard implementation
409            let mut text_chars = text.chars();
410            let mut pattern_chars = pattern.chars();
411
412            while let Some(p) = pattern_chars.next() {
413                match p {
414                    '*' => {
415                        // Match zero or more characters
416                        if let Some(next_p) = pattern_chars.next() {
417                            // Find next character in pattern
418                            while let Some(t) = text_chars.next() {
419                                if t == next_p {
420                                    break;
421                                }
422                            }
423                        } else {
424                            // * at end matches rest
425                            return true;
426                        }
427                    }
428                    '?' => {
429                        // Match any single character
430                        if text_chars.next().is_none() {
431                            return false;
432                        }
433                    }
434                    c => {
435                        if text_chars.next() != Some(c) {
436                            return false;
437                        }
438                    }
439                }
440            }
441            text_chars.next().is_none()
442        } else {
443            // Simple substring match
444            text.contains(pattern)
445        }
446    }
447}