Skip to main content

shard_den_json_extractor/
lib.rs

1//! JSON Extractor - Extract fields from JSON using path syntax
2//!
3//! Supports JSONPath-like syntax:
4//! - `key` - Get key value
5//! - `*` - Wildcard
6//! - `[]` - Array iteration
7//! - `[0]` - Array index
8//! - `..` - Recursive descent
9
10#[cfg(feature = "wasm")]
11use wasm_bindgen::prelude::*;
12
13pub mod extract;
14pub mod format;
15pub mod path;
16
17pub use extract::{ExtractResult, Extractor};
18pub use format::{Formatter, OutputFormat};
19pub use path::{JsonPath, PathParser};
20
21use shard_den_core::ShardDenError;
22
23/// Maximum allowed JSON nesting depth to prevent stack overflow
24const MAX_JSON_DEPTH: usize = 128;
25
26/// Check JSON value depth recursively
27fn check_json_depth(value: &serde_json::Value, depth: usize) -> Result<(), String> {
28    if depth > MAX_JSON_DEPTH {
29        return Err(format!("JSON too deeply nested (max: {})", MAX_JSON_DEPTH));
30    }
31
32    match value {
33        serde_json::Value::Array(arr) => {
34            for item in arr {
35                check_json_depth(item, depth + 1)?;
36            }
37        }
38        serde_json::Value::Object(obj) => {
39            for (_, v) in obj {
40                check_json_depth(v, depth + 1)?;
41            }
42        }
43        _ => {}
44    }
45    Ok(())
46}
47
48/// Parse paths string, handling quoted strings and escape characters
49pub fn parse_paths(input: &str) -> Vec<String> {
50    let mut paths = Vec::new();
51    let mut current = String::new();
52    let mut in_quotes = false;
53    let mut escape_next = false;
54
55    for ch in input.chars() {
56        match (ch, escape_next, in_quotes) {
57            // Handle escape character
58            ('\\', false, _) => escape_next = true,
59            // Handle escaped quote inside quotes - add quote and reset escape
60            ('"', true, true) => {
61                current.push('"');
62                escape_next = false;
63            }
64            // Handle quote toggle (only when not escaped)
65            ('"', false, _) => in_quotes = !in_quotes,
66            // Handle comma separator (only when outside quotes)
67            (',', false, false) => {
68                if !current.is_empty() {
69                    paths.push(current.trim().to_string());
70                    current.clear();
71                }
72            }
73            _ => {
74                current.push(ch);
75                escape_next = false;
76            }
77        }
78    }
79
80    if !current.is_empty() {
81        paths.push(current.trim().to_string());
82    }
83
84    paths
85}
86
87/// Pure Rust JSON Extractor (for CLI)
88#[allow(dead_code)]
89pub struct JsonExtractorCore {
90    extractor: Extractor,
91    formatter: Formatter,
92    path_parser: PathParser,
93}
94
95impl JsonExtractorCore {
96    pub fn new() -> Self {
97        Self {
98            extractor: Extractor::new(),
99            formatter: Formatter::new(),
100            path_parser: PathParser::new(),
101        }
102    }
103
104    pub fn extract(&self, json: &str, paths: &str) -> shard_den_core::Result<String> {
105        let paths_vec = parse_paths(paths);
106
107        let value: serde_json::Value = serde_json::from_str(json)?;
108        check_json_depth(&value, 0).map_err(ShardDenError::invalid_input)?;
109        let result = self.extractor.extract(&value, &paths_vec)?;
110
111        // Return just the extracted values as JSON array
112        // Each path returns an array of values from jsonpath-rust
113        let mut all_values: Vec<serde_json::Value> = Vec::new();
114        for extracted in &result.values {
115            if let serde_json::Value::Array(arr) = &extracted.value {
116                all_values.extend(arr.clone());
117            } else {
118                all_values.push(extracted.value.clone());
119            }
120        }
121        serde_json::to_string(&all_values).map_err(Into::into)
122    }
123
124    pub fn extract_with_format(
125        &self, json: &str, paths: &str, format: OutputFormat,
126    ) -> shard_den_core::Result<String> {
127        let paths_vec = parse_paths(paths);
128
129        let value: serde_json::Value = serde_json::from_str(json)?;
130        check_json_depth(&value, 0).map_err(ShardDenError::invalid_input)?;
131        let result = self.extractor.extract(&value, &paths_vec)?;
132
133        // Get all extracted values (flatten arrays from jsonpath)
134        let mut all_values: Vec<serde_json::Value> = Vec::new();
135        for extracted in &result.values {
136            if let serde_json::Value::Array(arr) = &extracted.value {
137                all_values.extend(arr.clone());
138            } else {
139                all_values.push(extracted.value.clone());
140            }
141        }
142
143        // Convert to JSON Value for formatting
144        let json_value: serde_json::Value = serde_json::to_value(&all_values)?;
145        self.formatter.format(&json_value, format)
146    }
147
148    pub fn detect_paths(&self, json: &str) -> shard_den_core::Result<Vec<String>> {
149        let value: serde_json::Value = serde_json::from_str(json)?;
150        check_json_depth(&value, 0).map_err(ShardDenError::invalid_input)?;
151        Ok(self.path_parser.detect_paths(&value))
152    }
153}
154
155impl Default for JsonExtractorCore {
156    fn default() -> Self {
157        Self::new()
158    }
159}
160
161#[cfg(feature = "wasm")]
162/// WASM-compatible JSON Extractor
163#[wasm_bindgen]
164#[allow(dead_code)]
165pub struct JsonExtractor {
166    extractor: Extractor,
167    formatter: Formatter,
168    path_parser: PathParser,
169}
170
171#[cfg(feature = "wasm")]
172#[wasm_bindgen]
173impl JsonExtractor {
174    /// Create a new extractor
175    #[wasm_bindgen(constructor)]
176    pub fn new() -> Self {
177        Self {
178            extractor: Extractor::new(),
179            formatter: Formatter::new(),
180            path_parser: PathParser::new(),
181        }
182    }
183
184    /// Extract fields from JSON
185    pub fn extract(&self, json: &str, paths: &str) -> Result<String, JsValue> {
186        let paths_vec = parse_paths(paths);
187
188        let value: serde_json::Value =
189            serde_json::from_str(json).map_err(|e| JsValue::from_str(&e.to_string()))?;
190        check_json_depth(&value, 0).map_err(|e| JsValue::from_str(&e))?;
191
192        let result = self
193            .extractor
194            .extract(&value, &paths_vec)
195            .map_err(|e| JsValue::from_str(&e.to_string()))?;
196
197        // Flatten the extracted values
198        let mut all_values: Vec<serde_json::Value> = Vec::new();
199        for extracted in &result.values {
200            if let serde_json::Value::Array(arr) = &extracted.value {
201                all_values.extend(arr.clone());
202            } else {
203                all_values.push(extracted.value.clone());
204            }
205        }
206
207        // If single path with single value, return unwrapped
208        let json_value = if paths_vec.len() == 1 && all_values.len() == 1 {
209            all_values.into_iter().next().unwrap()
210        } else {
211            serde_json::to_value(&all_values).map_err(|e| JsValue::from_str(&e.to_string()))?
212        };
213
214        serde_json::to_string(&json_value).map_err(|e| JsValue::from_str(&e.to_string()))
215    }
216
217    /// Extract with format
218    pub fn extract_with_format(
219        &self, json: &str, paths: &str, format: &str,
220    ) -> Result<String, JsValue> {
221        let paths_vec = parse_paths(paths);
222
223        let value: serde_json::Value =
224            serde_json::from_str(json).map_err(|e| JsValue::from_str(&e.to_string()))?;
225        check_json_depth(&value, 0).map_err(|e| JsValue::from_str(&e))?;
226
227        let result = self
228            .extractor
229            .extract(&value, &paths_vec)
230            .map_err(|e| JsValue::from_str(&e.to_string()))?;
231
232        // Flatten the extracted values
233        let mut all_values: Vec<serde_json::Value> = Vec::new();
234        for extracted in &result.values {
235            if let serde_json::Value::Array(arr) = &extracted.value {
236                all_values.extend(arr.clone());
237            } else {
238                all_values.push(extracted.value.clone());
239            }
240        }
241
242        // If single path with single value, return unwrapped
243        let json_value: serde_json::Value = if paths_vec.len() == 1 && all_values.len() == 1 {
244            all_values.into_iter().next().unwrap()
245        } else {
246            serde_json::to_value(&all_values).map_err(|e| JsValue::from_str(&e.to_string()))?
247        };
248
249        let output_format = match format.to_lowercase().as_str() {
250            "csv" => OutputFormat::Csv,
251            "text" => OutputFormat::Text,
252            "yaml" => OutputFormat::Yaml,
253            _ => OutputFormat::Json,
254        };
255
256        self.formatter
257            .format(&json_value, output_format)
258            .map_err(|e| JsValue::from_str(&e.to_string()))
259    }
260
261    /// Auto-detect available paths in JSON
262    pub fn detect_paths(&self, json: &str) -> Result<String, JsValue> {
263        let value: serde_json::Value =
264            serde_json::from_str(json).map_err(|e| JsValue::from_str(&e.to_string()))?;
265        check_json_depth(&value, 0).map_err(|e| JsValue::from_str(&e))?;
266
267        let paths = self.path_parser.detect_paths(&value);
268
269        serde_json::to_string(&paths).map_err(|e| JsValue::from_str(&e.to_string()))
270    }
271
272    #[wasm_bindgen(getter)]
273    pub fn name(&self) -> String {
274        "json-extractor".to_string()
275    }
276
277    #[wasm_bindgen(getter)]
278    pub fn description(&self) -> String {
279        "Extract fields from JSON using path syntax".to_string()
280    }
281}
282
283#[cfg(feature = "wasm")]
284impl Default for JsonExtractor {
285    fn default() -> Self {
286        Self::new()
287    }
288}
289
290#[cfg(test)]
291mod tests {
292    use super::*;
293
294    #[test]
295    fn test_extractor_creation() {
296        let extractor = JsonExtractorCore::new();
297        let json = r#"{"name": "test"}"#;
298        // JSONPath requires $ prefix
299        let result = extractor.extract(json, "$.name");
300        assert!(result.is_ok());
301    }
302
303    #[test]
304    fn test_extract_placeholder() {
305        let extractor = JsonExtractorCore::new();
306        let json = r#"{"name": "test"}"#;
307        // JSONPath requires $ prefix
308        let result = extractor.extract(json, "$.name");
309        assert!(result.is_ok());
310    }
311
312    #[test]
313    fn test_extract_with_format_json() {
314        let extractor = JsonExtractorCore::new();
315        let json = r#"{"items": [{"id": 1}]}"#;
316        let result = extractor.extract_with_format(json, "$.items[*].id", OutputFormat::Json);
317        assert!(result.is_ok());
318    }
319
320    #[test]
321    fn test_extract_with_format_csv() {
322        let extractor = JsonExtractorCore::new();
323        let json = r#"{"items": [{"id": 1}]}"#;
324        let result = extractor.extract_with_format(json, "$.items[*].id", OutputFormat::Csv);
325        assert!(result.is_ok());
326    }
327
328    #[test]
329    fn test_extract_with_format_text() {
330        let extractor = JsonExtractorCore::new();
331        let json = r#"{"items": [{"id": 1}]}"#;
332        let result = extractor.extract_with_format(json, "$.items[*].id", OutputFormat::Text);
333        assert!(result.is_ok());
334    }
335
336    #[test]
337    fn test_extract_with_format_yaml() {
338        let extractor = JsonExtractorCore::new();
339        let json = r#"{"items": [{"id": 1}]}"#;
340        let result = extractor.extract_with_format(json, "$.items[*].id", OutputFormat::Yaml);
341        assert!(result.is_ok());
342    }
343
344    #[test]
345    fn test_detect_paths() {
346        let extractor = JsonExtractorCore::new();
347        let json = r#"{"name": "test", "data": {"id": 1}}"#;
348        let result = extractor.detect_paths(json);
349        assert!(result.is_ok());
350        let paths = result.unwrap();
351        assert!(paths.contains(&"$.name".to_string()));
352        assert!(paths.contains(&"$.data".to_string()));
353    }
354
355    #[test]
356    fn test_detect_paths_invalid_json() {
357        let extractor = JsonExtractorCore::new();
358        let json = r#"not json"#;
359        let result = extractor.detect_paths(json);
360        assert!(result.is_err());
361    }
362
363    #[test]
364    fn test_extract_invalid_json() {
365        let extractor = JsonExtractorCore::new();
366        let json = r#"not json"#;
367        let result = extractor.extract(json, "$.name");
368        assert!(result.is_err());
369    }
370
371    #[test]
372    fn test_extract_multiple_paths() {
373        let extractor = JsonExtractorCore::new();
374        let json = r#"{"name": "test", "value": 42}"#;
375        let result = extractor.extract(json, "$.name,$.value");
376        assert!(result.is_ok());
377    }
378
379    #[test]
380    fn test_extractor_default() {
381        // Test Default implementation
382        let extractor = JsonExtractorCore::default();
383        let json = r#"{"name": "test"}"#;
384        let result = extractor.extract(json, "$.name");
385        assert!(result.is_ok());
386    }
387
388    #[test]
389    fn test_extract_single_value_non_array() {
390        // Test extracting a single scalar value (not wrapped in array)
391        let extractor = JsonExtractorCore::new();
392        let json = r#"{"name": "test", "count": 5}"#;
393        // Extract single path - result should not be wrapped in array
394        let result = extractor.extract(json, "$.count");
395        assert!(result.is_ok());
396    }
397
398    #[test]
399    fn test_extract_with_format_single_value() {
400        // Test extract_with_format with single value
401        let extractor = JsonExtractorCore::new();
402        let json = r#"{"value": 42}"#;
403        let result = extractor.extract_with_format(json, "$.value", OutputFormat::Text);
404        assert!(result.is_ok());
405    }
406
407    #[test]
408    fn test_json_depth_limit() {
409        // Test that deeply nested JSON is rejected
410        let extractor = JsonExtractorCore::new();
411
412        // Create JSON with 200 levels of nesting
413        let mut json = "{\"a\":".to_string();
414        for _ in 0..199 {
415            json.push_str("{\"a\":");
416        }
417        json.push_str("1");
418        for _ in 0..200 {
419            json.push_str("}");
420        }
421
422        let result = extractor.extract(&json, "$.a");
423        assert!(result.is_err());
424    }
425
426    #[test]
427    fn test_parse_paths_basic() {
428        let paths = parse_paths("$.name,$.value");
429        assert_eq!(paths, vec!["$.name", "$.value"]);
430    }
431
432    #[test]
433    fn test_parse_paths_with_quoted_comma() {
434        // Test quoted string with comma inside
435        let paths = parse_paths("\"a,b\",c");
436        assert_eq!(paths, vec!["a,b", "c"]);
437    }
438
439    #[test]
440    fn test_parse_paths_with_spaces() {
441        let paths = parse_paths("  $.name  ,  $.value  ");
442        assert_eq!(paths, vec!["$.name", "$.value"]);
443    }
444
445    #[test]
446    fn test_parse_paths_empty() {
447        let paths = parse_paths("");
448        assert!(paths.is_empty());
449    }
450
451    #[test]
452    fn test_parse_paths_single() {
453        let paths = parse_paths("$.name");
454        assert_eq!(paths, vec!["$.name"]);
455    }
456
457    #[test]
458    fn test_parse_paths_with_escape() {
459        // Test escape character
460        let paths = parse_paths("a\\,b,c");
461        assert_eq!(paths, vec!["a,b", "c"]);
462    }
463
464    #[test]
465    fn test_parse_paths_with_escaped_quote() {
466        // Test escaped quote inside quotes
467        let paths = parse_paths(r#"a\"b,c"#);
468        assert_eq!(paths, vec![r#"a"b"#, "c"]);
469    }
470
471    #[test]
472    fn test_extract_scalar_value_not_array() {
473        // Test extracting a scalar value which is not wrapped in array
474        let extractor = JsonExtractorCore::new();
475        let json = r#"{"value": 42}"#;
476        // Extract single value - should push directly (line 118/139)
477        let result = extractor.extract(json, "$.value");
478        assert!(result.is_ok());
479        let output = result.unwrap();
480        // Should contain the value
481        assert!(output.contains("42"));
482    }
483
484    #[test]
485    fn test_extract_with_format_scalar_value() {
486        // Test extract_with_format with scalar value (not array)
487        let extractor = JsonExtractorCore::new();
488        let json = r#"{"name": "test"}"#;
489        let result = extractor.extract_with_format(json, "$.name", OutputFormat::Text);
490        assert!(result.is_ok());
491    }
492
493    // Tests for check_json_depth function
494    #[test]
495    fn test_check_json_depth_valid() {
496        let json = serde_json::json!({
497            "level1": {
498                "level2": {
499                    "level3": "value"
500                }
501            }
502        });
503        let result = check_json_depth(&json, 0);
504        assert!(result.is_ok());
505    }
506
507    #[test]
508    fn test_check_json_depth_array() {
509        let json = serde_json::json!({
510            "items": [{"a": 1}, {"a": 2}]
511        });
512        let result = check_json_depth(&json, 0);
513        assert!(result.is_ok());
514    }
515
516    #[test]
517    fn test_check_json_depth_exceeds_limit() {
518        // Create deeply nested JSON that exceeds limit
519        let mut json = serde_json::json!({"a": 1});
520        for _ in 0..130 {
521            json = serde_json::json!({"a": json});
522        }
523        let result = check_json_depth(&json, 0);
524        assert!(result.is_err());
525    }
526}