jshape 0.1.0

Repair malformed JSON and render a stable, human-readable structural outline.
Documentation
use indexmap::IndexMap;
use json_repair::repair_json_string;
use serde_json::Value;
use std::collections::HashSet;

#[derive(Debug, Clone)]
pub enum Schema {
    Null,
    Bool(Option<bool>),
    Int(Vec<i64>),
    Float(Vec<f64>),
    Str(Vec<String>),
    Array(Box<Schema>, usize, Option<HashSet<String>>),
    Object(IndexMap<String, Schema>, HashSet<String>),
}

pub fn repair_and_parse_json(input: &str) -> Result<Value, String> {
    repair_json_string(input).map_err(|err| format!("Failed to parse or repair JSON: {}", err))
}

pub fn extract_schema(value: &Value) -> Schema {
    match value {
        Value::Null => Schema::Null,
        Value::Bool(b) => Schema::Bool(Some(*b)),
        Value::Number(n) if n.is_i64() => Schema::Int(vec![n.as_i64().unwrap()]),
        Value::Number(n) => Schema::Float(vec![n.as_f64().unwrap()]),
        Value::String(s) => Schema::Str(vec![s.clone()]),
        Value::Array(arr) => {
            if arr.is_empty() {
                return Schema::Array(Box::new(Schema::Null), 0, None);
            }
            let mut item_schema = extract_schema(&arr[0]);

            match &mut item_schema {
                Schema::Str(examples) => {
                    for v in arr.iter().skip(1) {
                        if let Value::String(s) = v {
                            if !examples.contains(s) {
                                examples.push(s.clone());
                            }
                        }
                    }
                }
                Schema::Int(examples) => {
                    for v in arr.iter().skip(1) {
                        if let Value::Number(n) = v {
                            if let Some(i) = n.as_i64() {
                                if !examples.contains(&i) {
                                    examples.push(i);
                                }
                            }
                        }
                    }
                }
                Schema::Float(examples) => {
                    for v in arr.iter().skip(1) {
                        if let Value::Number(n) = v {
                            if let Some(f) = n.as_f64() {
                                if !examples.contains(&f) {
                                    examples.push(f);
                                }
                            }
                        }
                    }
                }
                Schema::Bool(_) => {
                    item_schema = Schema::Bool(None);
                }
                _ => {
                    for v in arr.iter().skip(1) {
                        let other = extract_schema(v);
                        merge_schema(&mut item_schema, &other);
                    }
                }
            }

            let opt_keys = match &item_schema {
                Schema::Object(_, opt) if !opt.is_empty() => Some(opt.clone()),
                _ => None,
            };

            Schema::Array(Box::new(item_schema), arr.len(), opt_keys)
        }
        Value::Object(map) => {
            let mut fields = IndexMap::new();
            for (k, v) in map {
                fields.insert(k.clone(), extract_schema(v));
            }
            Schema::Object(fields, HashSet::new())
        }
    }
}

pub fn format_schema(schema: &Schema, show_examples: bool) -> String {
    render_root(schema, show_examples)
}

pub fn analyze_json(input: &str, show_examples: bool) -> Result<String, String> {
    let value = repair_and_parse_json(input)?;
    let schema = extract_schema(&value);
    Ok(format_schema(&schema, show_examples))
}

fn merge_schema(a: &mut Schema, b: &Schema) {
    use Schema::*;
    match (&mut *a, b) {
        (Int(_), Float(_)) => {
            *a = Float(vec![]);
        }
        (Array(item_a, len_a, opt_a), Array(item_b, len_b, opt_b)) => {
            *len_a = (*len_a).max(*len_b);
            if let (Some(opt_a), Some(opt_b)) = (opt_a, opt_b) {
                let intersection: HashSet<_> = opt_a.intersection(opt_b).cloned().collect();
                *opt_a = intersection;
            }
            merge_schema(item_a, item_b);
        }
        (Object(fields_a, opt_a), Object(fields_b, opt_b)) => {
            for (k, v) in fields_b {
                if let Some(va) = fields_a.get_mut(k) {
                    merge_schema(va, v);
                } else {
                    opt_a.insert(k.clone());
                    fields_a.insert(k.clone(), v.clone());
                }
            }
            let keys_in_b: HashSet<_> = fields_b.keys().cloned().collect();
            for k in fields_a.keys().cloned().collect::<Vec<_>>() {
                if !keys_in_b.contains(&k) {
                    opt_a.insert(k);
                }
            }
            for k in opt_b {
                opt_a.insert(k.clone());
            }
        }
        _ => {}
    }
}

fn render_root(schema: &Schema, show_examples: bool) -> String {
    match schema {
        Schema::Object(fields, opt_fields) => {
            let mut lines = vec!["{".to_string()];
            let entry_count = fields.len();
            for (index, (k, v)) in fields.iter().enumerate() {
                let key_display = format_key(k, opt_fields.contains(k));
                let val_str = format_field_value(v, 1, 2, show_examples);
                let suffix = if index + 1 < entry_count { "," } else { "" };
                lines.push(format!("  {}: {}{}", key_display, val_str, suffix));
            }
            lines.push("}".to_string());
            lines.join("\n")
        }
        _ => format_schema_at_depth(schema, 1, 2, show_examples),
    }
}

fn format_scalar_value(schema: &Schema, show_examples: bool) -> Option<String> {
    match schema {
        Schema::Null => Some("null".to_string()),
        Schema::Bool(b) => Some(if show_examples {
            if let Some(val) = b {
                val.to_string()
            } else {
                "bool".to_string()
            }
        } else {
            "bool".to_string()
        }),
        Schema::Int(examples) => Some(if show_examples && !examples.is_empty() {
            examples.iter().map(|e| e.to_string()).collect::<Vec<_>>().join(", ")
        } else {
            "int".to_string()
        }),
        Schema::Float(examples) => Some(if show_examples && !examples.is_empty() {
            examples.iter().map(|e| e.to_string()).collect::<Vec<_>>().join(", ")
        } else {
            "float".to_string()
        }),
        Schema::Str(examples) => Some(if show_examples && !examples.is_empty() {
            examples.iter().map(|s| format!("\"{}\"", s)).collect::<Vec<_>>().join(", ")
        } else {
            "str".to_string()
        }),
        _ => None,
    }
}

fn format_key(key: &str, is_optional: bool) -> String {
    let quoted = serde_json::to_string(key).expect("schema keys should serialize to JSON strings");
    if is_optional {
        format!("{}?", quoted)
    } else {
        quoted
    }
}

fn format_block_field_value(schema: &Schema, depth: usize, indent: usize, show_examples: bool) -> String {
    let formatted = format_schema_at_depth(schema, depth, indent, show_examples);
    let pad = " ".repeat(depth * indent);

    let mut lines = formatted.lines();
    let first_line = lines
        .next()
        .map(|line| line.strip_prefix(&pad).unwrap_or(line).to_string())
        .unwrap_or_default();

    let mut result = first_line;
    for line in lines {
        result.push('\n');
        result.push_str(line);
    }
    result
}

fn format_array(
    item: &Schema,
    len: usize,
    opt_keys: Option<&HashSet<String>>,
    depth: usize,
    indent: usize,
    show_examples: bool,
) -> String {
    let pad = " ".repeat(depth * indent);
    let inner_pad = " ".repeat((depth + 1) * indent);

    match item {
        Schema::Object(fields, obj_opt) => {
            let mut lines = vec![format!("{}[", pad)];
            if len > 0 {
                lines.push(format!("{}{{", inner_pad));
                let entry_count = fields.len();
                for (index, (k, v)) in fields.iter().enumerate() {
                    let is_optional = obj_opt.contains(k)
                        || opt_keys.is_some_and(|keys| keys.contains(k));
                    let val_str = format_field_value(v, depth + 2, indent, show_examples);
                    let suffix = if index + 1 < entry_count { "," } else { "" };
                    lines.push(format!(
                        "{}{}: {}{}",
                        " ".repeat((depth + 2) * indent),
                        format_key(k, is_optional),
                        val_str,
                        suffix
                    ));
                }
                if len > 1 {
                    if show_examples {
                        lines.push(format!("{}}},", inner_pad));
                        lines.push(format!(
                            "{}...  // {} {}",
                            pad,
                            len,
                            if len == 1 { "item" } else { "items" }
                        ));
                    } else {
                        lines.push(format!("{}}}", inner_pad));
                    }
                } else {
                    lines.push(format!("{}}}", inner_pad));
                }
            }
            lines.push(format!("{}]", pad));
            lines.join("\n")
        }
        Schema::Array(_, _, _) => {
            let mut lines = vec![format!("{}[", pad)];
            let inner = format_schema_at_depth(item, depth + 1, indent, show_examples);
            for line in inner.lines() {
                lines.push(line.to_string());
            }
            lines.push(format!("{}]", pad));
            lines.join("\n")
        }
        _ => {
            let inner = format_scalar_value(item, show_examples)
                .expect("array primitive branch should only receive scalar item schemas");
            format!("{}[\n{}  {}\n{}]", pad, pad, inner, pad)
        }
    }
}

fn format_schema_at_depth(schema: &Schema, depth: usize, indent: usize, show_examples: bool) -> String {
    let pad = " ".repeat(depth * indent);
    if let Some(value) = format_scalar_value(schema, show_examples) {
        return format!("{}{}", pad, value);
    }

    match schema {
        Schema::Array(item, len, opt_keys) => {
            format_array(item, *len, opt_keys.as_ref(), depth, indent, show_examples)
        }
        Schema::Object(fields, opt_fields) => {
            let mut lines = vec![format!("{}{{", pad)];
            let entry_count = fields.len();
            for (index, (k, v)) in fields.iter().enumerate() {
                let val_str = format_field_value(v, depth + 1, indent, show_examples);
                let suffix = if index + 1 < entry_count { "," } else { "" };
                lines.push(format!(
                    "{}{}: {}{}",
                    " ".repeat((depth + 1) * indent),
                    format_key(k, opt_fields.contains(k)),
                    val_str,
                    suffix
                ));
            }
            lines.push(format!("{}}}", pad));
            lines.join("\n")
        }
        _ => unreachable!("non-scalar schemas are handled above"),
    }
}

fn format_field_value(schema: &Schema, depth: usize, indent: usize, show_examples: bool) -> String {
    if let Some(value) = format_scalar_value(schema, show_examples) {
        return value;
    }

    match schema {
        Schema::Object(_, _) => format_block_field_value(schema, depth, indent, show_examples),
        Schema::Array(_, _, _) => format_block_field_value(schema, depth, indent, show_examples),
        _ => unreachable!("scalar schemas are returned above"),
    }
}