shard_den_json_extractor/
lib.rs1#[cfg(feature = "wasm")]
11use wasm_bindgen::prelude::*;
12
13pub mod extract;
14pub mod format;
15pub mod path;
16
17pub use extract::{ExtractResult, Extractor};
18pub use format::{Formatter, OutputFormat};
19pub use path::{JsonPath, PathParser};
20
21use shard_den_core::ShardDenError;
22
23const MAX_JSON_DEPTH: usize = 128;
25
26fn check_json_depth(value: &serde_json::Value, depth: usize) -> Result<(), String> {
28 if depth > MAX_JSON_DEPTH {
29 return Err(format!("JSON too deeply nested (max: {})", MAX_JSON_DEPTH));
30 }
31
32 match value {
33 serde_json::Value::Array(arr) => {
34 for item in arr {
35 check_json_depth(item, depth + 1)?;
36 }
37 }
38 serde_json::Value::Object(obj) => {
39 for (_, v) in obj {
40 check_json_depth(v, depth + 1)?;
41 }
42 }
43 _ => {}
44 }
45 Ok(())
46}
47
48pub fn parse_paths(input: &str) -> Vec<String> {
50 let mut paths = Vec::new();
51 let mut current = String::new();
52 let mut in_quotes = false;
53 let mut escape_next = false;
54
55 for ch in input.chars() {
56 match (ch, escape_next, in_quotes) {
57 ('\\', false, _) => escape_next = true,
59 ('"', true, true) => {
61 current.push('"');
62 escape_next = false;
63 }
64 ('"', false, _) => in_quotes = !in_quotes,
66 (',', false, false) => {
68 if !current.is_empty() {
69 paths.push(current.trim().to_string());
70 current.clear();
71 }
72 }
73 _ => {
74 current.push(ch);
75 escape_next = false;
76 }
77 }
78 }
79
80 if !current.is_empty() {
81 paths.push(current.trim().to_string());
82 }
83
84 paths
85}
86
87#[allow(dead_code)]
89pub struct JsonExtractorCore {
90 extractor: Extractor,
91 formatter: Formatter,
92 path_parser: PathParser,
93}
94
95impl JsonExtractorCore {
96 pub fn new() -> Self {
97 Self {
98 extractor: Extractor::new(),
99 formatter: Formatter::new(),
100 path_parser: PathParser::new(),
101 }
102 }
103
104 pub fn extract(&self, json: &str, paths: &str) -> shard_den_core::Result<String> {
105 let paths_vec = parse_paths(paths);
106
107 let value: serde_json::Value = serde_json::from_str(json)?;
108 check_json_depth(&value, 0).map_err(ShardDenError::invalid_input)?;
109 let result = self.extractor.extract(&value, &paths_vec)?;
110
111 let mut all_values: Vec<serde_json::Value> = Vec::new();
114 for extracted in &result.values {
115 if let serde_json::Value::Array(arr) = &extracted.value {
116 all_values.extend(arr.clone());
117 } else {
118 all_values.push(extracted.value.clone());
119 }
120 }
121 serde_json::to_string(&all_values).map_err(Into::into)
122 }
123
124 pub fn extract_with_format(
125 &self, json: &str, paths: &str, format: OutputFormat,
126 ) -> shard_den_core::Result<String> {
127 let paths_vec = parse_paths(paths);
128
129 let value: serde_json::Value = serde_json::from_str(json)?;
130 check_json_depth(&value, 0).map_err(ShardDenError::invalid_input)?;
131 let result = self.extractor.extract(&value, &paths_vec)?;
132
133 let mut all_values: Vec<serde_json::Value> = Vec::new();
135 for extracted in &result.values {
136 if let serde_json::Value::Array(arr) = &extracted.value {
137 all_values.extend(arr.clone());
138 } else {
139 all_values.push(extracted.value.clone());
140 }
141 }
142
143 let json_value: serde_json::Value = serde_json::to_value(&all_values)?;
145 self.formatter.format(&json_value, format)
146 }
147
148 pub fn detect_paths(&self, json: &str) -> shard_den_core::Result<Vec<String>> {
149 let value: serde_json::Value = serde_json::from_str(json)?;
150 check_json_depth(&value, 0).map_err(ShardDenError::invalid_input)?;
151 Ok(self.path_parser.detect_paths(&value))
152 }
153}
154
155impl Default for JsonExtractorCore {
156 fn default() -> Self {
157 Self::new()
158 }
159}
160
161#[cfg(feature = "wasm")]
162#[wasm_bindgen]
164#[allow(dead_code)]
165pub struct JsonExtractor {
166 extractor: Extractor,
167 formatter: Formatter,
168 path_parser: PathParser,
169}
170
171#[cfg(feature = "wasm")]
172#[wasm_bindgen]
173impl JsonExtractor {
174 #[wasm_bindgen(constructor)]
176 pub fn new() -> Self {
177 Self {
178 extractor: Extractor::new(),
179 formatter: Formatter::new(),
180 path_parser: PathParser::new(),
181 }
182 }
183
184 pub fn extract(&self, json: &str, paths: &str) -> Result<String, JsValue> {
186 let paths_vec = parse_paths(paths);
187
188 let value: serde_json::Value =
189 serde_json::from_str(json).map_err(|e| JsValue::from_str(&e.to_string()))?;
190 check_json_depth(&value, 0).map_err(|e| JsValue::from_str(&e))?;
191
192 let result = self
193 .extractor
194 .extract(&value, &paths_vec)
195 .map_err(|e| JsValue::from_str(&e.to_string()))?;
196
197 let mut all_values: Vec<serde_json::Value> = Vec::new();
199 for extracted in &result.values {
200 if let serde_json::Value::Array(arr) = &extracted.value {
201 all_values.extend(arr.clone());
202 } else {
203 all_values.push(extracted.value.clone());
204 }
205 }
206
207 let json_value = if paths_vec.len() == 1 && all_values.len() == 1 {
209 all_values.into_iter().next().unwrap()
210 } else {
211 serde_json::to_value(&all_values).map_err(|e| JsValue::from_str(&e.to_string()))?
212 };
213
214 serde_json::to_string(&json_value).map_err(|e| JsValue::from_str(&e.to_string()))
215 }
216
217 pub fn extract_with_format(
219 &self, json: &str, paths: &str, format: &str,
220 ) -> Result<String, JsValue> {
221 let paths_vec = parse_paths(paths);
222
223 let value: serde_json::Value =
224 serde_json::from_str(json).map_err(|e| JsValue::from_str(&e.to_string()))?;
225 check_json_depth(&value, 0).map_err(|e| JsValue::from_str(&e))?;
226
227 let result = self
228 .extractor
229 .extract(&value, &paths_vec)
230 .map_err(|e| JsValue::from_str(&e.to_string()))?;
231
232 let mut all_values: Vec<serde_json::Value> = Vec::new();
234 for extracted in &result.values {
235 if let serde_json::Value::Array(arr) = &extracted.value {
236 all_values.extend(arr.clone());
237 } else {
238 all_values.push(extracted.value.clone());
239 }
240 }
241
242 let json_value: serde_json::Value = if paths_vec.len() == 1 && all_values.len() == 1 {
244 all_values.into_iter().next().unwrap()
245 } else {
246 serde_json::to_value(&all_values).map_err(|e| JsValue::from_str(&e.to_string()))?
247 };
248
249 let output_format = match format.to_lowercase().as_str() {
250 "csv" => OutputFormat::Csv,
251 "text" => OutputFormat::Text,
252 "yaml" => OutputFormat::Yaml,
253 _ => OutputFormat::Json,
254 };
255
256 self.formatter
257 .format(&json_value, output_format)
258 .map_err(|e| JsValue::from_str(&e.to_string()))
259 }
260
261 pub fn detect_paths(&self, json: &str) -> Result<String, JsValue> {
263 let value: serde_json::Value =
264 serde_json::from_str(json).map_err(|e| JsValue::from_str(&e.to_string()))?;
265 check_json_depth(&value, 0).map_err(|e| JsValue::from_str(&e))?;
266
267 let paths = self.path_parser.detect_paths(&value);
268
269 serde_json::to_string(&paths).map_err(|e| JsValue::from_str(&e.to_string()))
270 }
271
272 #[wasm_bindgen(getter)]
273 pub fn name(&self) -> String {
274 "json-extractor".to_string()
275 }
276
277 #[wasm_bindgen(getter)]
278 pub fn description(&self) -> String {
279 "Extract fields from JSON using path syntax".to_string()
280 }
281}
282
283#[cfg(feature = "wasm")]
284impl Default for JsonExtractor {
285 fn default() -> Self {
286 Self::new()
287 }
288}
289
290#[cfg(test)]
291mod tests {
292 use super::*;
293
294 #[test]
295 fn test_extractor_creation() {
296 let extractor = JsonExtractorCore::new();
297 let json = r#"{"name": "test"}"#;
298 let result = extractor.extract(json, "$.name");
300 assert!(result.is_ok());
301 }
302
303 #[test]
304 fn test_extract_placeholder() {
305 let extractor = JsonExtractorCore::new();
306 let json = r#"{"name": "test"}"#;
307 let result = extractor.extract(json, "$.name");
309 assert!(result.is_ok());
310 }
311
312 #[test]
313 fn test_extract_with_format_json() {
314 let extractor = JsonExtractorCore::new();
315 let json = r#"{"items": [{"id": 1}]}"#;
316 let result = extractor.extract_with_format(json, "$.items[*].id", OutputFormat::Json);
317 assert!(result.is_ok());
318 }
319
320 #[test]
321 fn test_extract_with_format_csv() {
322 let extractor = JsonExtractorCore::new();
323 let json = r#"{"items": [{"id": 1}]}"#;
324 let result = extractor.extract_with_format(json, "$.items[*].id", OutputFormat::Csv);
325 assert!(result.is_ok());
326 }
327
328 #[test]
329 fn test_extract_with_format_text() {
330 let extractor = JsonExtractorCore::new();
331 let json = r#"{"items": [{"id": 1}]}"#;
332 let result = extractor.extract_with_format(json, "$.items[*].id", OutputFormat::Text);
333 assert!(result.is_ok());
334 }
335
336 #[test]
337 fn test_extract_with_format_yaml() {
338 let extractor = JsonExtractorCore::new();
339 let json = r#"{"items": [{"id": 1}]}"#;
340 let result = extractor.extract_with_format(json, "$.items[*].id", OutputFormat::Yaml);
341 assert!(result.is_ok());
342 }
343
344 #[test]
345 fn test_detect_paths() {
346 let extractor = JsonExtractorCore::new();
347 let json = r#"{"name": "test", "data": {"id": 1}}"#;
348 let result = extractor.detect_paths(json);
349 assert!(result.is_ok());
350 let paths = result.unwrap();
351 assert!(paths.contains(&"$.name".to_string()));
352 assert!(paths.contains(&"$.data".to_string()));
353 }
354
355 #[test]
356 fn test_detect_paths_invalid_json() {
357 let extractor = JsonExtractorCore::new();
358 let json = r#"not json"#;
359 let result = extractor.detect_paths(json);
360 assert!(result.is_err());
361 }
362
363 #[test]
364 fn test_extract_invalid_json() {
365 let extractor = JsonExtractorCore::new();
366 let json = r#"not json"#;
367 let result = extractor.extract(json, "$.name");
368 assert!(result.is_err());
369 }
370
371 #[test]
372 fn test_extract_multiple_paths() {
373 let extractor = JsonExtractorCore::new();
374 let json = r#"{"name": "test", "value": 42}"#;
375 let result = extractor.extract(json, "$.name,$.value");
376 assert!(result.is_ok());
377 }
378
379 #[test]
380 fn test_extractor_default() {
381 let extractor = JsonExtractorCore::default();
383 let json = r#"{"name": "test"}"#;
384 let result = extractor.extract(json, "$.name");
385 assert!(result.is_ok());
386 }
387
388 #[test]
389 fn test_extract_single_value_non_array() {
390 let extractor = JsonExtractorCore::new();
392 let json = r#"{"name": "test", "count": 5}"#;
393 let result = extractor.extract(json, "$.count");
395 assert!(result.is_ok());
396 }
397
398 #[test]
399 fn test_extract_with_format_single_value() {
400 let extractor = JsonExtractorCore::new();
402 let json = r#"{"value": 42}"#;
403 let result = extractor.extract_with_format(json, "$.value", OutputFormat::Text);
404 assert!(result.is_ok());
405 }
406
407 #[test]
408 fn test_json_depth_limit() {
409 let extractor = JsonExtractorCore::new();
411
412 let mut json = "{\"a\":".to_string();
414 for _ in 0..199 {
415 json.push_str("{\"a\":");
416 }
417 json.push_str("1");
418 for _ in 0..200 {
419 json.push_str("}");
420 }
421
422 let result = extractor.extract(&json, "$.a");
423 assert!(result.is_err());
424 }
425
426 #[test]
427 fn test_parse_paths_basic() {
428 let paths = parse_paths("$.name,$.value");
429 assert_eq!(paths, vec!["$.name", "$.value"]);
430 }
431
432 #[test]
433 fn test_parse_paths_with_quoted_comma() {
434 let paths = parse_paths("\"a,b\",c");
436 assert_eq!(paths, vec!["a,b", "c"]);
437 }
438
439 #[test]
440 fn test_parse_paths_with_spaces() {
441 let paths = parse_paths(" $.name , $.value ");
442 assert_eq!(paths, vec!["$.name", "$.value"]);
443 }
444
445 #[test]
446 fn test_parse_paths_empty() {
447 let paths = parse_paths("");
448 assert!(paths.is_empty());
449 }
450
451 #[test]
452 fn test_parse_paths_single() {
453 let paths = parse_paths("$.name");
454 assert_eq!(paths, vec!["$.name"]);
455 }
456
457 #[test]
458 fn test_parse_paths_with_escape() {
459 let paths = parse_paths("a\\,b,c");
461 assert_eq!(paths, vec!["a,b", "c"]);
462 }
463
464 #[test]
465 fn test_parse_paths_with_escaped_quote() {
466 let paths = parse_paths(r#"a\"b,c"#);
468 assert_eq!(paths, vec![r#"a"b"#, "c"]);
469 }
470
471 #[test]
472 fn test_extract_scalar_value_not_array() {
473 let extractor = JsonExtractorCore::new();
475 let json = r#"{"value": 42}"#;
476 let result = extractor.extract(json, "$.value");
478 assert!(result.is_ok());
479 let output = result.unwrap();
480 assert!(output.contains("42"));
482 }
483
484 #[test]
485 fn test_extract_with_format_scalar_value() {
486 let extractor = JsonExtractorCore::new();
488 let json = r#"{"name": "test"}"#;
489 let result = extractor.extract_with_format(json, "$.name", OutputFormat::Text);
490 assert!(result.is_ok());
491 }
492
493 #[test]
495 fn test_check_json_depth_valid() {
496 let json = serde_json::json!({
497 "level1": {
498 "level2": {
499 "level3": "value"
500 }
501 }
502 });
503 let result = check_json_depth(&json, 0);
504 assert!(result.is_ok());
505 }
506
507 #[test]
508 fn test_check_json_depth_array() {
509 let json = serde_json::json!({
510 "items": [{"a": 1}, {"a": 2}]
511 });
512 let result = check_json_depth(&json, 0);
513 assert!(result.is_ok());
514 }
515
516 #[test]
517 fn test_check_json_depth_exceeds_limit() {
518 let mut json = serde_json::json!({"a": 1});
520 for _ in 0..130 {
521 json = serde_json::json!({"a": json});
522 }
523 let result = check_json_depth(&json, 0);
524 assert!(result.is_err());
525 }
526}