1pub fn sanitize_json(input: &str) -> String {
90 let trimmed = input.trim();
91 if trimmed.is_empty() {
92 return String::new();
93 }
94
95 let with_delimiters = fix_missing_delimiters(trimmed);
97
98 remove_trailing_commas(&with_delimiters)
100}
101
102fn remove_trailing_commas(input: &str) -> String {
106 let mut result = String::with_capacity(input.len());
107 let mut chars = input.chars().peekable();
108 let mut in_string = false;
109 let mut escape_next = false;
110
111 while let Some(c) = chars.next() {
112 if escape_next {
113 result.push(c);
114 escape_next = false;
115 continue;
116 }
117
118 match c {
119 '\\' if in_string => {
120 result.push(c);
121 escape_next = true;
122 }
123 '"' => {
124 in_string = !in_string;
125 result.push(c);
126 }
127 ',' if !in_string => {
128 let mut peek_iter = chars.clone();
131 let next_non_ws = loop {
132 match peek_iter.next() {
133 Some(ws) if ws.is_whitespace() => continue,
134 other => break other,
135 }
136 };
137
138 if matches!(next_non_ws, Some('}') | Some(']')) {
139 continue;
141 }
142 result.push(c);
143 }
144 _ => {
145 result.push(c);
146 }
147 }
148 }
149
150 result
151}
152
153fn fix_missing_delimiters(input: &str) -> String {
157 let mut result = String::from(input);
158 let mut in_string = false;
159 let mut escape_next = false;
160
161 let mut stack: Vec<char> = Vec::new();
163
164 for c in input.chars() {
165 if escape_next {
166 escape_next = false;
167 continue;
168 }
169
170 match c {
171 '\\' if in_string => {
172 escape_next = true;
173 }
174 '"' => {
175 in_string = !in_string;
176 }
177 '{' if !in_string => {
178 stack.push('{');
179 }
180 '[' if !in_string => {
181 stack.push('[');
182 }
183 '}' if !in_string => {
184 if let Some(&top) = stack.last() {
185 if top == '{' {
186 stack.pop();
187 }
188 }
189 }
190 ']' if !in_string => {
191 if let Some(&top) = stack.last() {
192 if top == '[' {
193 stack.pop();
194 }
195 }
196 }
197 _ => {}
198 }
199 }
200
201 if in_string {
203 result.push('"');
204 }
205
206 for &opener in stack.iter().rev() {
208 match opener {
209 '{' => result.push('}'),
210 '[' => result.push(']'),
211 _ => {}
212 }
213 }
214
215 result
216}
217
218#[cfg(test)]
219mod tests {
220 use super::*;
221
222 #[test]
227 fn test_trailing_comma_object() {
228 assert_eq!(sanitize_json(r#"{"a": 1,}"#), r#"{"a": 1}"#);
229 }
230
231 #[test]
232 fn test_trailing_comma_array() {
233 assert_eq!(sanitize_json(r#"[1, 2, 3,]"#), r#"[1, 2, 3]"#);
234 }
235
236 #[test]
237 fn test_trailing_comma_nested_object() {
238 assert_eq!(
239 sanitize_json(r#"{"outer": {"inner": 1,},}"#),
240 r#"{"outer": {"inner": 1}}"#
241 );
242 }
243
244 #[test]
245 fn test_trailing_comma_nested_array() {
246 assert_eq!(sanitize_json(r#"[[1, 2,], [3,],]"#), r#"[[1, 2], [3]]"#);
247 }
248
249 #[test]
250 fn test_trailing_comma_mixed() {
251 assert_eq!(
252 sanitize_json(r#"{"items": [1, 2,], "name": "test",}"#),
253 r#"{"items": [1, 2], "name": "test"}"#
254 );
255 }
256
257 #[test]
258 fn test_trailing_comma_with_whitespace() {
259 assert_eq!(sanitize_json(r#"{"a": 1 , }"#), r#"{"a": 1 }"#);
260 assert_eq!(sanitize_json("{\n \"a\": 1,\n}"), "{\n \"a\": 1\n}");
261 }
262
263 #[test]
264 fn test_comma_in_string_preserved() {
265 assert_eq!(
267 sanitize_json(r#"{"msg": "hello, world"}"#),
268 r#"{"msg": "hello, world"}"#
269 );
270 assert_eq!(sanitize_json(r#"{"msg": "a,}"}"#), r#"{"msg": "a,}"}"#);
271 }
272
273 #[test]
274 fn test_no_trailing_comma() {
275 assert_eq!(sanitize_json(r#"{"a": 1}"#), r#"{"a": 1}"#);
276 assert_eq!(sanitize_json(r#"[1, 2, 3]"#), r#"[1, 2, 3]"#);
277 }
278
279 #[test]
284 fn test_missing_closing_brace() {
285 assert_eq!(sanitize_json(r#"{"a": 1"#), r#"{"a": 1}"#);
286 }
287
288 #[test]
289 fn test_missing_closing_bracket() {
290 assert_eq!(sanitize_json(r#"["a", "b""#), r#"["a", "b"]"#);
291 }
292
293 #[test]
294 fn test_missing_multiple_braces() {
295 assert_eq!(sanitize_json(r#"{"a": {"b": 1"#), r#"{"a": {"b": 1}}"#);
296 }
297
298 #[test]
299 fn test_missing_multiple_brackets() {
300 assert_eq!(sanitize_json(r#"[[1, 2], [3"#), r#"[[1, 2], [3]]"#);
301 }
302
303 #[test]
304 fn test_missing_mixed_delimiters() {
305 assert_eq!(sanitize_json(r#"{"items": [1, 2"#), r#"{"items": [1, 2]}"#);
306 }
307
308 #[test]
309 fn test_brace_in_string_ignored() {
310 assert_eq!(sanitize_json(r#"{"msg": "{"}"#), r#"{"msg": "{"}"#);
312 }
313
314 #[test]
315 fn test_no_missing_delimiters() {
316 assert_eq!(sanitize_json(r#"{"a": 1}"#), r#"{"a": 1}"#);
317 assert_eq!(sanitize_json(r#"[1, 2]"#), r#"[1, 2]"#);
318 }
319
320 #[test]
325 fn test_trailing_comma_and_missing_brace() {
326 assert_eq!(sanitize_json(r#"{"a": 1,"#), r#"{"a": 1}"#);
327 }
328
329 #[test]
330 fn test_trailing_comma_and_missing_bracket() {
331 assert_eq!(sanitize_json(r#"[1, 2,"#), r#"[1, 2]"#);
332 }
333
334 #[test]
335 fn test_complex_llm_output() {
336 let input = r#"{
337 "type": "AddDerive",
338 "target": "User",
339 "derives": ["Debug", "Clone",],
340 "#;
341 let expected = r#"{
343 "type": "AddDerive",
344 "target": "User",
345 "derives": ["Debug", "Clone"]}"#;
346 assert_eq!(sanitize_json(input), expected);
347 }
348
349 #[test]
354 fn test_empty_input() {
355 assert_eq!(sanitize_json(""), "");
356 assert_eq!(sanitize_json(" "), "");
357 }
358
359 #[test]
360 fn test_whitespace_only() {
361 assert_eq!(sanitize_json(" \n\t "), "");
362 }
363
364 #[test]
365 fn test_simple_values() {
366 assert_eq!(sanitize_json("null"), "null");
367 assert_eq!(sanitize_json("true"), "true");
368 assert_eq!(sanitize_json("123"), "123");
369 assert_eq!(sanitize_json(r#""string""#), r#""string""#);
370 }
371
372 #[test]
373 fn test_escaped_quote_in_string() {
374 assert_eq!(
375 sanitize_json(r#"{"msg": "say \"hello\""}"#),
376 r#"{"msg": "say \"hello\""}"#
377 );
378 }
379
380 #[test]
381 fn test_escaped_backslash_in_string() {
382 assert_eq!(
383 sanitize_json(r#"{"path": "C:\\Users\\test"}"#),
384 r#"{"path": "C:\\Users\\test"}"#
385 );
386 }
387
388 #[test]
389 fn test_unclosed_string() {
390 assert_eq!(sanitize_json(r#"{"a": "test"#), r#"{"a": "test"}"#);
392 }
393
394 #[test]
395 fn test_deeply_nested() {
396 assert_eq!(
397 sanitize_json(r#"{"a": {"b": {"c": [1, 2,],"#),
398 r#"{"a": {"b": {"c": [1, 2]}}}"#
399 );
400 }
401
402 #[test]
407 fn test_llm_truncated_response() {
408 let input = r#"{"type": "RenameIdent", "from": "old_name", "to": "new_na"#;
409 let fixed = sanitize_json(input);
410 assert_eq!(
411 fixed,
412 r#"{"type": "RenameIdent", "from": "old_name", "to": "new_na"}"#
413 );
414 }
415
416 #[test]
417 fn test_llm_array_with_trailing_comma() {
418 let input = r#"{"intents": [
419 {"type": "AddDerive", "target": "User",},
420 {"type": "AddDerive", "target": "Post",},
421 ]}"#;
422 let fixed = sanitize_json(input);
423 assert!(fixed.contains(r#""target": "User"}"#));
424 assert!(fixed.contains(r#""target": "Post"}"#));
425 assert!(!fixed.contains(",}"));
426 assert!(!fixed.contains(",]"));
427 }
428}