const MAX_INPUT_BYTES: usize = 4 * 1024 * 1024;
fn strip_insignificant_ws(input: &str) -> String {
let mut out = String::with_capacity(input.len());
let mut in_string = false;
let mut escaped = false;
for c in input.chars() {
if in_string {
out.push(c);
if escaped {
escaped = false;
} else if c == '\\' {
escaped = true;
} else if c == '"' {
in_string = false;
}
continue;
}
match c {
'"' => {
in_string = true;
out.push(c);
}
' ' | '\t' | '\n' | '\r' => {} _ => out.push(c),
}
}
out
}
#[must_use]
pub fn compact_json(input: &str) -> Option<String> {
if input.len() > MAX_INPUT_BYTES {
return None;
}
let trimmed = input.trim_start();
if !trimmed.starts_with('{') && !trimmed.starts_with('[') {
return None;
}
serde_json::from_str::<serde_json::Value>(input).ok()?;
let compact = strip_insignificant_ws(input);
(compact.len() < input.len()).then_some(compact)
}
#[must_use]
pub fn compact_jsonl(input: &str) -> Option<String> {
if input.len() > MAX_INPUT_BYTES {
return None;
}
let mut out = String::with_capacity(input.len());
let mut any = false;
for line in input.lines() {
let t = line.trim();
if t.is_empty() {
continue;
}
serde_json::from_str::<serde_json::Value>(t).ok()?;
if any {
out.push('\n');
}
out.push_str(&strip_insignificant_ws(t));
any = true;
}
if !any {
return None;
}
(out.len() < input.len()).then_some(out)
}
#[must_use]
pub fn compact_structured(content: &str, ext: Option<&str>) -> Option<String> {
if matches!(ext, Some("jsonl" | "ndjson")) {
return compact_jsonl(content);
}
compact_json(content)
}
#[cfg(test)]
mod tests {
use super::*;
fn parse(s: &str) -> serde_json::Value {
serde_json::from_str(s).expect("valid json")
}
#[test]
fn compacts_pretty_object_losslessly() {
let pretty = "{\n \"name\": \"lean-ctx\",\n \"version\": 3,\n \"tags\": [\n \"a\",\n \"b\"\n ]\n}";
let out = compact_json(pretty).expect("should compact");
assert!(out.len() < pretty.len());
assert_eq!(parse(&out), parse(pretty), "value must be identical");
assert!(!out.contains('\n'));
}
#[test]
fn preserves_key_order() {
let pretty = "{\n \"zebra\": 1,\n \"alpha\": 2,\n \"mike\": 3\n}";
let out = compact_json(pretty).expect("should compact");
assert_eq!(out, r#"{"zebra":1,"alpha":2,"mike":3}"#);
}
#[test]
fn preserves_number_formatting() {
let pretty = "{\n \"a\": 1.0,\n \"b\": 1e3,\n \"c\": 0.50\n}";
let out = compact_json(pretty).expect("should compact");
assert_eq!(out, r#"{"a":1.0,"b":1e3,"c":0.50}"#);
}
#[test]
fn whitespace_inside_strings_is_kept() {
let input = "{\n \"msg\": \"hello world\\n\\ttab\"\n}";
let out = compact_json(input).expect("should compact");
assert_eq!(parse(&out), parse(input));
assert!(out.contains("hello world"), "inner spaces preserved");
assert!(out.contains("\\n\\ttab"), "escapes preserved");
}
#[test]
fn escaped_quote_does_not_end_string() {
let input = "{\n \"q\": \"a \\\" b : c\"\n}";
let out = compact_json(input).expect("should compact");
assert_eq!(parse(&out), parse(input));
assert_eq!(out, r#"{"q":"a \" b : c"}"#);
}
#[test]
fn already_minified_returns_none() {
let min = r#"{"a":1,"b":[2,3]}"#;
assert!(compact_json(min).is_none(), "no smaller form available");
}
#[test]
fn invalid_json_is_never_touched() {
assert!(compact_json("{not valid json").is_none());
assert!(compact_json("{\"a\": }").is_none());
assert!(compact_json("just text with spaces").is_none());
}
#[test]
fn scalars_and_non_json_skipped() {
assert!(compact_json("42").is_none());
assert!(compact_json("\"a string\"").is_none());
assert!(compact_json(" ").is_none());
}
#[test]
fn jsonl_compacts_each_line() {
let input = "{ \"a\": 1 }\n{ \"b\": 2 }\n\n{ \"c\": 3 }";
let out = compact_jsonl(input).expect("should compact");
assert_eq!(out, "{\"a\":1}\n{\"b\":2}\n{\"c\":3}");
}
#[test]
fn jsonl_with_invalid_line_returns_none() {
let input = "{\"a\":1}\nnot json\n{\"b\":2}";
assert!(compact_jsonl(input).is_none());
}
#[test]
fn compact_structured_dispatches_by_ext() {
let pretty = "{\n \"x\": 1\n}";
assert!(compact_structured(pretty, Some("json")).is_some());
assert!(compact_structured("{ \"x\": 1 }\n{ \"y\": 2 }", Some("jsonl")).is_some());
assert!(compact_structured(pretty, None).is_some());
assert!(compact_structured("def f(): pass", Some("py")).is_none());
}
#[test]
fn idempotent_on_compacted_output() {
let pretty = "{\n \"a\": [1, 2, 3],\n \"b\": { \"c\": 4 }\n}";
let once = compact_json(pretty).expect("compact once");
assert!(compact_json(&once).is_none(), "second pass finds nothing");
}
#[test]
fn oversized_input_bails() {
let big = format!("{{\"a\":\"{}\"}}", " ".repeat(MAX_INPUT_BYTES));
assert!(compact_json(&big).is_none());
}
}