ai_lib_rust/utils/json_path.rs
1//! JSONPath evaluator and path mapper for frame selection and field extraction
2//!
3//! Inspired by ai-lib's PathMapper implementation, with support for:
4//! - Nested path access (e.g., "a.b.c")
5//! - Array indexing (e.g., "choices[0].delta.content")
6//! - Condition evaluation (exists, ==, !=, in, &&, ||, >, <, >=, <=)
7//! - Regular expression matching
8
9use serde_json::{json, Value};
10use std::collections::HashMap;
11
12/// Path mapper error
13#[derive(Debug, thiserror::Error)]
14pub enum PathMapperError {
15 #[error("Invalid path: {0}")]
16 InvalidPath(String),
17
18 #[error("Cannot set value at path: {0}")]
19 CannotSetValue(String),
20}
21
22/// Path mapper for extracting and setting values in JSON using dot-notation paths
23pub struct PathMapper;
24
25impl PathMapper {
26 /// Get value from JSON using dot-notation path (supports array indexing)
27 ///
28 /// Examples:
29 /// - "choices[0].delta.content"
30 /// - "input.temperature"
31 /// - "delta.text"
32 pub fn get_path<'a>(obj: &'a Value, path: &str) -> Option<&'a Value> {
33 if path.is_empty() {
34 return None;
35 }
36
37 // Remove leading "$." if present (JSONPath style)
38 let normalized = path.trim().trim_start_matches("$.").to_string();
39 let parts: Vec<&str> = normalized.split('.').collect();
40 let mut current = obj;
41
42 for part in parts {
43 if part.is_empty() {
44 return None;
45 }
46
47 // Check if part contains array index, e.g., "choices[0]"
48 if let Some(bracket_pos) = part.find('[') {
49 // Extract key and index
50 let key = &part[..bracket_pos];
51 let idx_str = part[bracket_pos + 1..].trim_end_matches(']');
52
53 // First access the object key
54 if !key.is_empty() {
55 match current {
56 Value::Object(map) => {
57 current = map.get(key)?;
58 }
59 _ => return None,
60 }
61 }
62
63 // Then access the array index
64 if let Ok(idx) = idx_str.parse::<usize>() {
65 match current {
66 Value::Array(arr) => {
67 current = arr.get(idx)?;
68 }
69 _ => return None,
70 }
71 } else if idx_str == "*" {
72 // Wildcard: get first element
73 match current {
74 Value::Array(arr) => {
75 current = arr.first()?;
76 }
77 _ => return None,
78 }
79 } else {
80 return None;
81 }
82 } else {
83 // Simple key access OR dot-index access (e.g. "choices.0.delta")
84 match current {
85 Value::Object(map) => {
86 current = map.get(part)?;
87 }
88 Value::Array(arr) => {
89 // Support "0" / "1" style index segments (common in some JSONPath variants)
90 if let Ok(idx) = part.parse::<usize>() {
91 current = arr.get(idx)?;
92 } else if part == "*" {
93 current = arr.first()?;
94 } else {
95 return None;
96 }
97 }
98 _ => return None,
99 }
100 }
101 }
102
103 Some(current)
104 }
105
106 /// Get string value from path (converts number to string if needed)
107 pub fn get_string(obj: &Value, path: &str) -> Option<String> {
108 Self::get_path(obj, path).and_then(|v| {
109 if v.is_string() {
110 v.as_str().map(|s| s.to_string())
111 } else {
112 serde_json::to_string(v).ok()
113 }
114 })
115 }
116
117 /// Set value at nested path in JSON object
118 ///
119 /// Examples:
120 /// - "input.temperature" -> sets obj["input"]["temperature"]
121 /// - "generationConfig.maxOutputTokens" -> sets obj["generationConfig"]["maxOutputTokens"]
122 pub fn set_path(obj: &mut Value, path: &str, value: Value) -> Result<(), PathMapperError> {
123 if path.is_empty() {
124 return Err(PathMapperError::InvalidPath("Empty path".to_string()));
125 }
126
127 // Remove leading "$." if present
128 let normalized = path.trim().trim_start_matches("$.").to_string();
129 let parts: Vec<&str> = normalized.split('.').collect();
130
131 if parts.is_empty() {
132 return Err(PathMapperError::InvalidPath("Empty path parts".to_string()));
133 }
134
135 // Ensure root object is Object
136 if !obj.is_object() {
137 *obj = json!({});
138 }
139
140 let mut current = obj
141 .as_object_mut()
142 .ok_or_else(|| PathMapperError::CannotSetValue("Root is not an object".to_string()))?;
143
144 // Process all but the last path segment
145 for (idx, part) in parts.iter().enumerate().take(parts.len() - 1) {
146 if part.is_empty() {
147 return Err(PathMapperError::InvalidPath(format!(
148 "Empty path part at index {}",
149 idx
150 )));
151 }
152
153 // If path doesn't exist or is not an object, create new object
154 if !current.contains_key(*part) || !current[*part].is_object() {
155 current.insert(part.to_string(), json!({}));
156 }
157
158 // Move to next level
159 current = current[*part].as_object_mut().ok_or_else(|| {
160 PathMapperError::CannotSetValue(format!("Cannot access object at path: {}", part))
161 })?;
162 }
163
164 // Set the last path segment's value
165 let last_part = parts
166 .last()
167 .ok_or_else(|| PathMapperError::InvalidPath("No last part".to_string()))?;
168
169 if last_part.is_empty() {
170 return Err(PathMapperError::InvalidPath(
171 "Last path part is empty".to_string(),
172 ));
173 }
174
175 current.insert(last_part.to_string(), value);
176 Ok(())
177 }
178
179 /// Batch set multiple paths
180 pub fn set_paths(
181 obj: &mut Value,
182 paths: &HashMap<String, Value>,
183 ) -> Result<(), PathMapperError> {
184 for (path, value) in paths {
185 Self::set_path(obj, path, value.clone())?;
186 }
187 Ok(())
188 }
189}
190
191/// JSONPath evaluator for condition matching
192/// Supports: exists, ==, !=, in, &&, ||, >, <, >=, <=, regex
193#[derive(Clone)]
194pub struct JsonPathEvaluator {
195 expression: String,
196}
197
198impl JsonPathEvaluator {
199 pub fn new(expression: &str) -> Result<Self, String> {
200 if expression.is_empty() {
201 return Err("Empty expression".to_string());
202 }
203 Ok(Self {
204 expression: expression.to_string(),
205 })
206 }
207
208 /// Check if expression matches the JSON value
209 ///
210 /// Supports:
211 /// - exists($.path) - check if path exists
212 /// - $.path == "value" - equality check
213 /// - $.path != "value" - inequality check
214 /// - $.path in ['value1', 'value2'] - list membership
215 /// - $.path != null / $.path == null - null check
216 /// - $.path > 10 / $.path < 10 - numeric comparison
217 /// - $.path >= 10 / $.path <= 10 - numeric comparison
218 /// - $.path =~ /pattern/ - regex matching
219 /// - && and || for logical combination
220 pub fn matches(&self, value: &Value) -> bool {
221 Self::evaluate_match(&self.expression, value)
222 }
223
224 /// Extract string value from JSON using path
225 pub fn extract_string(&self, value: &Value) -> Option<String> {
226 // If expression is a simple path, extract it
227 if self.expression.starts_with("$.") {
228 return PathMapper::get_string(value, &self.expression);
229 }
230 None
231 }
232
233 /// Evaluate match expression with support for numeric comparisons and regex
234 fn evaluate_match(expr: &str, root: &Value) -> bool {
235 // Split by OR
236 let or_parts: Vec<&str> = expr.split("||").collect();
237 for or_part in or_parts {
238 let mut ok = true;
239 // Split by AND
240 let and_parts: Vec<&str> = or_part.split("&&").collect();
241 for part in and_parts {
242 let cond = part.trim();
243 if cond.is_empty() {
244 continue;
245 }
246
247 // exists() check
248 if cond.starts_with("exists(") && cond.ends_with(')') {
249 let path = cond.trim_start_matches("exists(").trim_end_matches(')');
250 if PathMapper::get_path(root, path).is_none() {
251 ok = false;
252 break;
253 }
254 continue;
255 }
256
257 // Regex matching: $.path =~ /pattern/
258 if let Some(idx) = cond.find("=~") {
259 let (path, rest) = cond.split_at(idx);
260 let path = path.trim();
261 let pattern_str = rest.trim_start_matches("=~").trim();
262
263 // Extract pattern from /pattern/ or "pattern"
264 let pattern = pattern_str
265 .trim_start_matches('/')
266 .trim_end_matches('/')
267 .trim_matches('"')
268 .trim_matches('\'');
269
270 if let Some(actual) = PathMapper::get_string(root, path) {
271 // Simple regex matching (for full implementation, use regex crate)
272 // For now, support basic wildcard patterns
273 if !Self::simple_regex_match(&actual, pattern) {
274 ok = false;
275 break;
276 }
277 } else {
278 ok = false;
279 break;
280 }
281 continue;
282 }
283
284 // Numeric comparisons: >, <, >=, <=
285 for op in &[">=", "<=", ">", "<"] {
286 if let Some(idx) = cond.find(op) {
287 let (path, rest) = cond.split_at(idx);
288 let path = path.trim();
289 let target_str = rest
290 .trim_start_matches(op)
291 .trim()
292 .trim_matches('"')
293 .trim_matches('\'');
294
295 if let Ok(target_num) = target_str.parse::<f64>() {
296 if let Some(actual_val) = PathMapper::get_path(root, path) {
297 let actual_num = actual_val.as_f64().or_else(|| {
298 actual_val.as_str().and_then(|s| s.parse::<f64>().ok())
299 });
300
301 if let Some(actual) = actual_num {
302 let matches = match *op {
303 ">" => actual > target_num,
304 "<" => actual < target_num,
305 ">=" => actual >= target_num,
306 "<=" => actual <= target_num,
307 _ => false,
308 };
309 if !matches {
310 ok = false;
311 break;
312 }
313 continue;
314 }
315 }
316 }
317 ok = false;
318 break;
319 }
320 }
321
322 // "in" list check
323 if let Some(idx) = cond.find(" in ") {
324 let (path, rest) = cond.split_at(idx);
325 let path = path.trim();
326 let list_str = rest.trim_start_matches(" in ").trim();
327 let list_str = list_str.trim_start_matches('[').trim_end_matches(']');
328 let values: Vec<String> = list_str
329 .split(',')
330 .filter_map(|v| v.trim().trim_matches('\'').trim_matches('"').parse().ok())
331 .collect();
332 let actual = PathMapper::get_string(root, path);
333 if !actual.map(|a| values.contains(&a)).unwrap_or(false) {
334 ok = false;
335 break;
336 }
337 continue;
338 }
339
340 // "!= null" check
341 if let Some(idx) = cond.find("!= null") {
342 let path = cond[..idx].trim();
343 let val = PathMapper::get_path(root, path);
344 if val.is_none() || val == Some(&Value::Null) {
345 ok = false;
346 break;
347 }
348 continue;
349 }
350
351 // "== null" check
352 if let Some(idx) = cond.find("== null") {
353 let path = cond[..idx].trim();
354 let val = PathMapper::get_path(root, path);
355 if val.is_some() && val != Some(&Value::Null) {
356 ok = false;
357 break;
358 }
359 continue;
360 }
361
362 // "==" equality check
363 if let Some(idx) = cond.find("==") {
364 let (path, value_part) = cond.split_at(idx);
365 let path = path.trim();
366 let target = value_part
367 .trim_start_matches("==")
368 .trim()
369 .trim_matches('\'')
370 .trim_matches('"');
371 let actual = PathMapper::get_string(root, path);
372 if actual.as_deref() != Some(target) {
373 ok = false;
374 break;
375 }
376 continue;
377 }
378
379 // "!=" inequality check
380 if let Some(idx) = cond.find("!=") {
381 let (path, value_part) = cond.split_at(idx);
382 let path = path.trim();
383 let target = value_part
384 .trim_start_matches("!=")
385 .trim()
386 .trim_matches('\'')
387 .trim_matches('"');
388 let actual = PathMapper::get_string(root, path);
389 if actual.as_deref() == Some(target) {
390 ok = false;
391 break;
392 }
393 continue;
394 }
395 }
396 if ok {
397 return true;
398 }
399 }
400 false
401 }
402
403 /// Simple regex matching (supports basic wildcards)
404 /// For full regex support, use the `regex` crate
405 fn simple_regex_match(text: &str, pattern: &str) -> bool {
406 // Simple wildcard matching: * matches any sequence, ? matches any character
407 if pattern.contains('*') || pattern.contains('?') {
408 // Basic wildcard implementation
409 let mut text_chars = text.chars();
410 let mut pattern_chars = pattern.chars();
411
412 while let Some(p) = pattern_chars.next() {
413 match p {
414 '*' => {
415 // Match zero or more characters
416 if let Some(next_p) = pattern_chars.next() {
417 // Find next character in pattern
418 while let Some(t) = text_chars.next() {
419 if t == next_p {
420 break;
421 }
422 }
423 } else {
424 // * at end matches rest
425 return true;
426 }
427 }
428 '?' => {
429 // Match any single character
430 if text_chars.next().is_none() {
431 return false;
432 }
433 }
434 c => {
435 if text_chars.next() != Some(c) {
436 return false;
437 }
438 }
439 }
440 }
441 text_chars.next().is_none()
442 } else {
443 // Simple substring match
444 text.contains(pattern)
445 }
446 }
447}