lean_ctx/core/
structured_compact.rs1const MAX_INPUT_BYTES: usize = 4 * 1024 * 1024;
21
22fn strip_insignificant_ws(input: &str) -> String {
28 let mut out = String::with_capacity(input.len());
29 let mut in_string = false;
30 let mut escaped = false;
31
32 for c in input.chars() {
33 if in_string {
34 out.push(c);
35 if escaped {
36 escaped = false;
37 } else if c == '\\' {
38 escaped = true;
39 } else if c == '"' {
40 in_string = false;
41 }
42 continue;
43 }
44
45 match c {
46 '"' => {
47 in_string = true;
48 out.push(c);
49 }
50 ' ' | '\t' | '\n' | '\r' => {} _ => out.push(c),
52 }
53 }
54
55 out
56}
57
58#[must_use]
64pub fn compact_json(input: &str) -> Option<String> {
65 if input.len() > MAX_INPUT_BYTES {
66 return None;
67 }
68 let trimmed = input.trim_start();
69 if !trimmed.starts_with('{') && !trimmed.starts_with('[') {
72 return None;
73 }
74 serde_json::from_str::<serde_json::Value>(input).ok()?;
76
77 let compact = strip_insignificant_ws(input);
78 (compact.len() < input.len()).then_some(compact)
79}
80
81#[must_use]
84pub fn compact_jsonl(input: &str) -> Option<String> {
85 if input.len() > MAX_INPUT_BYTES {
86 return None;
87 }
88 let mut out = String::with_capacity(input.len());
89 let mut any = false;
90
91 for line in input.lines() {
92 let t = line.trim();
93 if t.is_empty() {
94 continue;
95 }
96 serde_json::from_str::<serde_json::Value>(t).ok()?;
97 if any {
98 out.push('\n');
99 }
100 out.push_str(&strip_insignificant_ws(t));
101 any = true;
102 }
103
104 if !any {
105 return None;
106 }
107 (out.len() < input.len()).then_some(out)
108}
109
110#[must_use]
116pub fn compact_structured(content: &str, ext: Option<&str>) -> Option<String> {
117 if matches!(ext, Some("jsonl" | "ndjson")) {
118 return compact_jsonl(content);
119 }
120 compact_json(content)
123}
124
125#[cfg(test)]
126mod tests {
127 use super::*;
128
129 fn parse(s: &str) -> serde_json::Value {
130 serde_json::from_str(s).expect("valid json")
131 }
132
133 #[test]
134 fn compacts_pretty_object_losslessly() {
135 let pretty = "{\n \"name\": \"lean-ctx\",\n \"version\": 3,\n \"tags\": [\n \"a\",\n \"b\"\n ]\n}";
136 let out = compact_json(pretty).expect("should compact");
137 assert!(out.len() < pretty.len());
138 assert_eq!(parse(&out), parse(pretty), "value must be identical");
139 assert!(!out.contains('\n'));
140 }
141
142 #[test]
143 fn preserves_key_order() {
144 let pretty = "{\n \"zebra\": 1,\n \"alpha\": 2,\n \"mike\": 3\n}";
146 let out = compact_json(pretty).expect("should compact");
147 assert_eq!(out, r#"{"zebra":1,"alpha":2,"mike":3}"#);
148 }
149
150 #[test]
151 fn preserves_number_formatting() {
152 let pretty = "{\n \"a\": 1.0,\n \"b\": 1e3,\n \"c\": 0.50\n}";
153 let out = compact_json(pretty).expect("should compact");
154 assert_eq!(out, r#"{"a":1.0,"b":1e3,"c":0.50}"#);
155 }
156
157 #[test]
158 fn whitespace_inside_strings_is_kept() {
159 let input = "{\n \"msg\": \"hello world\\n\\ttab\"\n}";
160 let out = compact_json(input).expect("should compact");
161 assert_eq!(parse(&out), parse(input));
162 assert!(out.contains("hello world"), "inner spaces preserved");
163 assert!(out.contains("\\n\\ttab"), "escapes preserved");
164 }
165
166 #[test]
167 fn escaped_quote_does_not_end_string() {
168 let input = "{\n \"q\": \"a \\\" b : c\"\n}";
169 let out = compact_json(input).expect("should compact");
170 assert_eq!(parse(&out), parse(input));
171 assert_eq!(out, r#"{"q":"a \" b : c"}"#);
172 }
173
174 #[test]
175 fn already_minified_returns_none() {
176 let min = r#"{"a":1,"b":[2,3]}"#;
177 assert!(compact_json(min).is_none(), "no smaller form available");
178 }
179
180 #[test]
181 fn invalid_json_is_never_touched() {
182 assert!(compact_json("{not valid json").is_none());
183 assert!(compact_json("{\"a\": }").is_none());
184 assert!(compact_json("just text with spaces").is_none());
185 }
186
187 #[test]
188 fn scalars_and_non_json_skipped() {
189 assert!(compact_json("42").is_none());
190 assert!(compact_json("\"a string\"").is_none());
191 assert!(compact_json(" ").is_none());
192 }
193
194 #[test]
195 fn jsonl_compacts_each_line() {
196 let input = "{ \"a\": 1 }\n{ \"b\": 2 }\n\n{ \"c\": 3 }";
197 let out = compact_jsonl(input).expect("should compact");
198 assert_eq!(out, "{\"a\":1}\n{\"b\":2}\n{\"c\":3}");
199 }
200
201 #[test]
202 fn jsonl_with_invalid_line_returns_none() {
203 let input = "{\"a\":1}\nnot json\n{\"b\":2}";
204 assert!(compact_jsonl(input).is_none());
205 }
206
207 #[test]
208 fn compact_structured_dispatches_by_ext() {
209 let pretty = "{\n \"x\": 1\n}";
210 assert!(compact_structured(pretty, Some("json")).is_some());
211 assert!(compact_structured("{ \"x\": 1 }\n{ \"y\": 2 }", Some("jsonl")).is_some());
212 assert!(compact_structured(pretty, None).is_some());
213 assert!(compact_structured("def f(): pass", Some("py")).is_none());
214 }
215
216 #[test]
217 fn idempotent_on_compacted_output() {
218 let pretty = "{\n \"a\": [1, 2, 3],\n \"b\": { \"c\": 4 }\n}";
219 let once = compact_json(pretty).expect("compact once");
220 assert!(compact_json(&once).is_none(), "second pass finds nothing");
221 }
222
223 #[test]
224 fn oversized_input_bails() {
225 let big = format!("{{\"a\":\"{}\"}}", " ".repeat(MAX_INPUT_BYTES));
226 assert!(compact_json(&big).is_none());
227 }
228}