#[cfg(feature = "wasm")]
use wasm_bindgen::prelude::*;
pub mod extract;
pub mod format;
pub mod path;
pub use extract::{ExtractResult, Extractor};
pub use format::{Formatter, OutputFormat};
pub use path::{JsonPath, PathParser};
use shard_den_core::ShardDenError;
const MAX_JSON_DEPTH: usize = 128;
fn check_json_depth(value: &serde_json::Value, depth: usize) -> Result<(), String> {
if depth > MAX_JSON_DEPTH {
return Err(format!("JSON too deeply nested (max: {})", MAX_JSON_DEPTH));
}
match value {
serde_json::Value::Array(arr) => {
for item in arr {
check_json_depth(item, depth + 1)?;
}
}
serde_json::Value::Object(obj) => {
for (_, v) in obj {
check_json_depth(v, depth + 1)?;
}
}
_ => {}
}
Ok(())
}
pub fn parse_paths(input: &str) -> Vec<String> {
let mut paths = Vec::new();
let mut current = String::new();
let mut in_quotes = false;
let mut escape_next = false;
for ch in input.chars() {
match (ch, escape_next, in_quotes) {
('\\', false, _) => escape_next = true,
('"', true, true) => {
current.push('"');
escape_next = false;
}
('"', false, _) => in_quotes = !in_quotes,
(',', false, false) => {
if !current.is_empty() {
paths.push(current.trim().to_string());
current.clear();
}
}
_ => {
current.push(ch);
escape_next = false;
}
}
}
if !current.is_empty() {
paths.push(current.trim().to_string());
}
paths
}
#[allow(dead_code)]
pub struct JsonExtractorCore {
extractor: Extractor,
formatter: Formatter,
path_parser: PathParser,
}
impl JsonExtractorCore {
pub fn new() -> Self {
Self {
extractor: Extractor::new(),
formatter: Formatter::new(),
path_parser: PathParser::new(),
}
}
pub fn extract(&self, json: &str, paths: &str) -> shard_den_core::Result<String> {
let paths_vec = parse_paths(paths);
let value: serde_json::Value = serde_json::from_str(json)?;
check_json_depth(&value, 0).map_err(ShardDenError::invalid_input)?;
let result = self.extractor.extract(&value, &paths_vec)?;
let mut all_values: Vec<serde_json::Value> = Vec::new();
for extracted in &result.values {
if let serde_json::Value::Array(arr) = &extracted.value {
all_values.extend(arr.clone());
} else {
all_values.push(extracted.value.clone());
}
}
serde_json::to_string(&all_values).map_err(Into::into)
}
pub fn extract_with_format(
&self, json: &str, paths: &str, format: OutputFormat,
) -> shard_den_core::Result<String> {
let paths_vec = parse_paths(paths);
let value: serde_json::Value = serde_json::from_str(json)?;
check_json_depth(&value, 0).map_err(ShardDenError::invalid_input)?;
let result = self.extractor.extract(&value, &paths_vec)?;
let mut all_values: Vec<serde_json::Value> = Vec::new();
for extracted in &result.values {
if let serde_json::Value::Array(arr) = &extracted.value {
all_values.extend(arr.clone());
} else {
all_values.push(extracted.value.clone());
}
}
let json_value: serde_json::Value = serde_json::to_value(&all_values)?;
self.formatter.format(&json_value, format)
}
pub fn detect_paths(&self, json: &str) -> shard_den_core::Result<Vec<String>> {
let value: serde_json::Value = serde_json::from_str(json)?;
check_json_depth(&value, 0).map_err(ShardDenError::invalid_input)?;
Ok(self.path_parser.detect_paths(&value))
}
}
impl Default for JsonExtractorCore {
fn default() -> Self {
Self::new()
}
}
#[cfg(feature = "wasm")]
#[wasm_bindgen]
#[allow(dead_code)]
pub struct JsonExtractor {
extractor: Extractor,
formatter: Formatter,
path_parser: PathParser,
}
#[cfg(feature = "wasm")]
#[wasm_bindgen]
impl JsonExtractor {
#[wasm_bindgen(constructor)]
pub fn new() -> Self {
Self {
extractor: Extractor::new(),
formatter: Formatter::new(),
path_parser: PathParser::new(),
}
}
pub fn extract(&self, json: &str, paths: &str) -> Result<String, JsValue> {
let paths_vec = parse_paths(paths);
let value: serde_json::Value =
serde_json::from_str(json).map_err(|e| JsValue::from_str(&e.to_string()))?;
check_json_depth(&value, 0).map_err(|e| JsValue::from_str(&e))?;
let result = self
.extractor
.extract(&value, &paths_vec)
.map_err(|e| JsValue::from_str(&e.to_string()))?;
let mut all_values: Vec<serde_json::Value> = Vec::new();
for extracted in &result.values {
if let serde_json::Value::Array(arr) = &extracted.value {
all_values.extend(arr.clone());
} else {
all_values.push(extracted.value.clone());
}
}
let json_value = if paths_vec.len() == 1 && all_values.len() == 1 {
all_values.into_iter().next().unwrap()
} else {
serde_json::to_value(&all_values).map_err(|e| JsValue::from_str(&e.to_string()))?
};
serde_json::to_string(&json_value).map_err(|e| JsValue::from_str(&e.to_string()))
}
pub fn extract_with_format(
&self, json: &str, paths: &str, format: &str,
) -> Result<String, JsValue> {
let paths_vec = parse_paths(paths);
let value: serde_json::Value =
serde_json::from_str(json).map_err(|e| JsValue::from_str(&e.to_string()))?;
check_json_depth(&value, 0).map_err(|e| JsValue::from_str(&e))?;
let result = self
.extractor
.extract(&value, &paths_vec)
.map_err(|e| JsValue::from_str(&e.to_string()))?;
let mut all_values: Vec<serde_json::Value> = Vec::new();
for extracted in &result.values {
if let serde_json::Value::Array(arr) = &extracted.value {
all_values.extend(arr.clone());
} else {
all_values.push(extracted.value.clone());
}
}
let json_value: serde_json::Value = if paths_vec.len() == 1 && all_values.len() == 1 {
all_values.into_iter().next().unwrap()
} else {
serde_json::to_value(&all_values).map_err(|e| JsValue::from_str(&e.to_string()))?
};
let output_format = match format.to_lowercase().as_str() {
"csv" => OutputFormat::Csv,
"text" => OutputFormat::Text,
"yaml" => OutputFormat::Yaml,
_ => OutputFormat::Json,
};
self.formatter
.format(&json_value, output_format)
.map_err(|e| JsValue::from_str(&e.to_string()))
}
pub fn detect_paths(&self, json: &str) -> Result<String, JsValue> {
let value: serde_json::Value =
serde_json::from_str(json).map_err(|e| JsValue::from_str(&e.to_string()))?;
check_json_depth(&value, 0).map_err(|e| JsValue::from_str(&e))?;
let paths = self.path_parser.detect_paths(&value);
serde_json::to_string(&paths).map_err(|e| JsValue::from_str(&e.to_string()))
}
#[wasm_bindgen(getter)]
pub fn name(&self) -> String {
"json-extractor".to_string()
}
#[wasm_bindgen(getter)]
pub fn description(&self) -> String {
"Extract fields from JSON using path syntax".to_string()
}
}
#[cfg(feature = "wasm")]
impl Default for JsonExtractor {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_extractor_creation() {
let extractor = JsonExtractorCore::new();
let json = r#"{"name": "test"}"#;
let result = extractor.extract(json, "$.name");
assert!(result.is_ok());
}
#[test]
fn test_extract_placeholder() {
let extractor = JsonExtractorCore::new();
let json = r#"{"name": "test"}"#;
let result = extractor.extract(json, "$.name");
assert!(result.is_ok());
}
#[test]
fn test_extract_with_format_json() {
let extractor = JsonExtractorCore::new();
let json = r#"{"items": [{"id": 1}]}"#;
let result = extractor.extract_with_format(json, "$.items[*].id", OutputFormat::Json);
assert!(result.is_ok());
}
#[test]
fn test_extract_with_format_csv() {
let extractor = JsonExtractorCore::new();
let json = r#"{"items": [{"id": 1}]}"#;
let result = extractor.extract_with_format(json, "$.items[*].id", OutputFormat::Csv);
assert!(result.is_ok());
}
#[test]
fn test_extract_with_format_text() {
let extractor = JsonExtractorCore::new();
let json = r#"{"items": [{"id": 1}]}"#;
let result = extractor.extract_with_format(json, "$.items[*].id", OutputFormat::Text);
assert!(result.is_ok());
}
#[test]
fn test_extract_with_format_yaml() {
let extractor = JsonExtractorCore::new();
let json = r#"{"items": [{"id": 1}]}"#;
let result = extractor.extract_with_format(json, "$.items[*].id", OutputFormat::Yaml);
assert!(result.is_ok());
}
#[test]
fn test_detect_paths() {
let extractor = JsonExtractorCore::new();
let json = r#"{"name": "test", "data": {"id": 1}}"#;
let result = extractor.detect_paths(json);
assert!(result.is_ok());
let paths = result.unwrap();
assert!(paths.contains(&"$.name".to_string()));
assert!(paths.contains(&"$.data".to_string()));
}
#[test]
fn test_detect_paths_invalid_json() {
let extractor = JsonExtractorCore::new();
let json = r#"not json"#;
let result = extractor.detect_paths(json);
assert!(result.is_err());
}
#[test]
fn test_extract_invalid_json() {
let extractor = JsonExtractorCore::new();
let json = r#"not json"#;
let result = extractor.extract(json, "$.name");
assert!(result.is_err());
}
#[test]
fn test_extract_multiple_paths() {
let extractor = JsonExtractorCore::new();
let json = r#"{"name": "test", "value": 42}"#;
let result = extractor.extract(json, "$.name,$.value");
assert!(result.is_ok());
}
#[test]
fn test_extractor_default() {
let extractor = JsonExtractorCore::default();
let json = r#"{"name": "test"}"#;
let result = extractor.extract(json, "$.name");
assert!(result.is_ok());
}
#[test]
fn test_extract_single_value_non_array() {
let extractor = JsonExtractorCore::new();
let json = r#"{"name": "test", "count": 5}"#;
let result = extractor.extract(json, "$.count");
assert!(result.is_ok());
}
#[test]
fn test_extract_with_format_single_value() {
let extractor = JsonExtractorCore::new();
let json = r#"{"value": 42}"#;
let result = extractor.extract_with_format(json, "$.value", OutputFormat::Text);
assert!(result.is_ok());
}
#[test]
fn test_json_depth_limit() {
let extractor = JsonExtractorCore::new();
let mut json = "{\"a\":".to_string();
for _ in 0..199 {
json.push_str("{\"a\":");
}
json.push_str("1");
for _ in 0..200 {
json.push_str("}");
}
let result = extractor.extract(&json, "$.a");
assert!(result.is_err());
}
#[test]
fn test_parse_paths_basic() {
let paths = parse_paths("$.name,$.value");
assert_eq!(paths, vec!["$.name", "$.value"]);
}
#[test]
fn test_parse_paths_with_quoted_comma() {
let paths = parse_paths("\"a,b\",c");
assert_eq!(paths, vec!["a,b", "c"]);
}
#[test]
fn test_parse_paths_with_spaces() {
let paths = parse_paths(" $.name , $.value ");
assert_eq!(paths, vec!["$.name", "$.value"]);
}
#[test]
fn test_parse_paths_empty() {
let paths = parse_paths("");
assert!(paths.is_empty());
}
#[test]
fn test_parse_paths_single() {
let paths = parse_paths("$.name");
assert_eq!(paths, vec!["$.name"]);
}
#[test]
fn test_parse_paths_with_escape() {
let paths = parse_paths("a\\,b,c");
assert_eq!(paths, vec!["a,b", "c"]);
}
#[test]
fn test_parse_paths_with_escaped_quote() {
let paths = parse_paths(r#"a\"b,c"#);
assert_eq!(paths, vec![r#"a"b"#, "c"]);
}
#[test]
fn test_extract_scalar_value_not_array() {
let extractor = JsonExtractorCore::new();
let json = r#"{"value": 42}"#;
let result = extractor.extract(json, "$.value");
assert!(result.is_ok());
let output = result.unwrap();
assert!(output.contains("42"));
}
#[test]
fn test_extract_with_format_scalar_value() {
let extractor = JsonExtractorCore::new();
let json = r#"{"name": "test"}"#;
let result = extractor.extract_with_format(json, "$.name", OutputFormat::Text);
assert!(result.is_ok());
}
#[test]
fn test_check_json_depth_valid() {
let json = serde_json::json!({
"level1": {
"level2": {
"level3": "value"
}
}
});
let result = check_json_depth(&json, 0);
assert!(result.is_ok());
}
#[test]
fn test_check_json_depth_array() {
let json = serde_json::json!({
"items": [{"a": 1}, {"a": 2}]
});
let result = check_json_depth(&json, 0);
assert!(result.is_ok());
}
#[test]
fn test_check_json_depth_exceeds_limit() {
let mut json = serde_json::json!({"a": 1});
for _ in 0..130 {
json = serde_json::json!({"a": json});
}
let result = check_json_depth(&json, 0);
assert!(result.is_err());
}
}