ralph/queue/
json_repair.rs1use regex::Regex;
17use std::sync::LazyLock;
18
19static SINGLE_QUOTED_STRING_RE: LazyLock<Regex> =
20 LazyLock::new(|| Regex::new(r"(^|[^a-zA-Z0-9])'([^']*?)'([^a-zA-Z0-9]|$)").unwrap());
21
22static UNQUOTED_KEY_RE: LazyLock<Regex> =
23 LazyLock::new(|| Regex::new(r"([{,]\s*)([a-zA-Z_][a-zA-Z0-9_]*)\s*:").unwrap());
24
25static TRAILING_COMMA_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r",(\s*[}\]])").unwrap());
26
27static TRAILING_COMMA_NEWLINE_RE: LazyLock<Regex> =
28 LazyLock::new(|| Regex::new(r",(\s*)\n(\s*[}\]])").unwrap());
29
30pub fn attempt_json_repair(raw: &str) -> Option<String> {
33 let mut repaired = raw.to_string();
34 let original = raw.to_string();
35
36 if SINGLE_QUOTED_STRING_RE.is_match(&repaired) {
42 log::debug!("JSON repair: converting single-quoted strings to double-quoted");
43 repaired = SINGLE_QUOTED_STRING_RE
44 .replace_all(&repaired, |caps: ®ex::Captures| {
45 let prefix = &caps[1];
46 let content = &caps[2];
47 let suffix = &caps[3];
48 let escaped = content.replace('"', "\\\"");
49 format!("{}\"{}\"{}", prefix, escaped, suffix)
50 })
51 .to_string();
52 }
53
54 if UNQUOTED_KEY_RE.is_match(&repaired) {
58 log::debug!("JSON repair: adding quotes around unquoted object keys");
59 repaired = UNQUOTED_KEY_RE
60 .replace_all(&repaired, "$1\"$2\":")
61 .to_string();
62 }
63
64 repaired = repair_unescaped_newlines(&repaired);
68
69 repaired = repair_unescaped_quotes(&repaired);
72
73 if TRAILING_COMMA_RE.is_match(&repaired) {
76 log::debug!("JSON repair: removing trailing commas");
77 repaired = TRAILING_COMMA_RE.replace_all(&repaired, "$1").to_string();
78 }
79
80 if TRAILING_COMMA_NEWLINE_RE.is_match(&repaired) {
84 log::debug!("JSON repair: removing trailing commas before newlines");
85 repaired = TRAILING_COMMA_NEWLINE_RE
86 .replace_all(&repaired, "$1\n$2")
87 .to_string();
88 }
89
90 let open_brackets = repaired.matches('[').count();
92 let close_brackets = repaired.matches(']').count();
93 let open_braces = repaired.matches('{').count();
94 let close_braces = repaired.matches('}').count();
95
96 if open_brackets > close_brackets {
97 log::debug!(
98 "JSON repair: adding {} missing closing bracket(s)",
99 open_brackets - close_brackets
100 );
101 repaired.push_str(&"]".repeat(open_brackets - close_brackets));
102 }
103 if open_braces > close_braces {
104 log::debug!(
105 "JSON repair: adding {} missing closing brace(s)",
106 open_braces - close_braces
107 );
108 repaired.push_str(&"}".repeat(open_braces - close_braces));
109 }
110
111 if repaired != original {
112 Some(repaired)
113 } else {
114 None
115 }
116}
117
118fn repair_unescaped_newlines(raw: &str) -> String {
121 let mut result = String::with_capacity(raw.len());
122 let mut in_string = false;
123 let mut escaped = false;
124
125 for ch in raw.chars() {
126 if escaped {
127 result.push(ch);
129 escaped = false;
130 continue;
131 }
132
133 match ch {
134 '\\' => {
135 escaped = true;
136 result.push(ch);
137 }
138 '"' => {
139 in_string = !in_string;
140 result.push(ch);
141 }
142 '\n' if in_string => {
143 log::trace!("JSON repair: escaping unescaped newline in string");
145 result.push_str("\\n");
146 }
147 '\r' if in_string => {
148 log::trace!("JSON repair: escaping unescaped carriage return in string");
150 result.push_str("\\r");
151 }
152 _ => {
153 result.push(ch);
154 }
155 }
156 }
157
158 result
159}
160
161fn repair_unescaped_quotes(raw: &str) -> String {
171 raw.to_string()
174}
175
176#[cfg(test)]
177mod tests {
178 use super::*;
179 use crate::contracts::QueueFile;
180
181 #[test]
182 fn attempt_json_repair_fixes_trailing_comma_in_array() {
183 let input = r#"{"tasks": [{"id": "RQ-0001", "tags": ["a", "b",]}]}"#;
184 let repaired = attempt_json_repair(input).expect("should repair");
185 assert!(repaired.contains("\"tags\": [\"a\", \"b\"]"));
186 assert!(!repaired.contains("\"b\","));
187 }
188
189 #[test]
190 fn attempt_json_repair_fixes_trailing_comma_in_object() {
191 let input = r#"{"tasks": [{"id": "RQ-0001", "title": "Test",}]}"#;
192 let repaired = attempt_json_repair(input).expect("should repair");
193 assert!(repaired.contains("\"title\": \"Test\"}"));
194 assert!(!repaired.contains("\"Test\","));
195 }
196
197 #[test]
198 fn attempt_json_repair_returns_none_for_valid_json() {
199 let input = r#"{"tasks": [{"id": "RQ-0001", "title": "Test"}]}"#;
200 assert!(attempt_json_repair(input).is_none());
201 }
202
203 #[test]
204 fn attempt_json_repair_fixes_multiple_trailing_commas() {
205 let input = r#"{"version": 1, "tasks": [{"id": "RQ-0001", "title": "Test", "status": "todo", "tags": ["a", "b",], "scope": ["file",],}]}"#;
207 let repaired = attempt_json_repair(input).expect("should repair");
208 let _: QueueFile = serde_json::from_str(&repaired).expect("repaired should be valid JSON");
210 }
211
212 #[test]
215 fn attempt_json_repair_fixes_single_quoted_strings() {
216 let input = r#"{'version': 1, 'tasks': [{'id': 'RQ-0001', 'title': 'Test'}]}"#;
217 let repaired = attempt_json_repair(input).expect("should repair");
218 let _: QueueFile = serde_json::from_str(&repaired).expect("repaired should be valid JSON");
220 assert!(repaired.contains("\"version\""));
222 assert!(repaired.contains("\"tasks\""));
223 assert!(repaired.contains("\"id\""));
224 assert!(repaired.contains("\"RQ-0001\""));
225 assert!(repaired.contains("\"title\""));
226 assert!(repaired.contains("\"Test\""));
227 }
228
229 #[test]
230 fn attempt_json_repair_preserves_apostrophes_in_words() {
231 let input = r#"{"tasks": [{"id": "RQ-0001", "title": "Don't break this"}]}"#;
233 assert!(attempt_json_repair(input).is_none());
235 }
236
237 #[test]
238 fn attempt_json_repair_fixes_unquoted_object_keys() {
239 let input = r#"{version: 1, tasks: [{id: "RQ-0001", title: "Test"}]}"#;
240 let repaired = attempt_json_repair(input).expect("should repair");
241 let _: QueueFile = serde_json::from_str(&repaired).expect("repaired should be valid JSON");
243 assert!(repaired.contains("\"version\""));
245 assert!(repaired.contains("\"tasks\""));
246 assert!(repaired.contains("\"id\""));
247 assert!(repaired.contains("\"title\""));
248 }
249
250 #[test]
251 fn attempt_json_repair_fixes_unquoted_keys_after_comma() {
252 let input =
253 r#"{"version": 1, tasks: [{"id": "RQ-0001", "title": "Test", status: "todo"}]}"#;
254 let repaired = attempt_json_repair(input).expect("should repair");
255 let _: QueueFile = serde_json::from_str(&repaired).expect("repaired should be valid JSON");
256 assert!(repaired.contains("\"tasks\""));
257 assert!(repaired.contains("\"status\""));
258 }
259
260 #[test]
261 fn attempt_json_repair_fixes_unescaped_newlines_in_strings() {
262 let input = "{\"version\": 1, \"tasks\": [{\"id\": \"RQ-0001\", \"title\": \"Line one\nLine two\"}]}";
264 let repaired = attempt_json_repair(input).expect("should repair");
265 assert!(repaired.contains("Line one\\nLine two"));
267 assert!(!repaired.contains("Line one\nLine two"));
268 let _: QueueFile = serde_json::from_str(&repaired).expect("repaired should be valid JSON");
270 }
271
272 #[test]
273 fn attempt_json_repair_fixes_unescaped_carriage_returns_in_strings() {
274 let input = "{\"version\": 1, \"tasks\": [{\"id\": \"RQ-0001\", \"title\": \"Line one\rLine two\"}]}";
275 let repaired = attempt_json_repair(input).expect("should repair");
276 assert!(repaired.contains("Line one\\rLine two"));
277 assert!(!repaired.contains("Line one\rLine two"));
278 }
279
280 #[test]
281 fn attempt_json_repair_handles_multiple_errors() {
282 let input = r#"{'version': 1, tasks: [{'id': 'RQ-0001', 'title': 'Test', 'status': 'todo', 'tags': [], 'scope': [], 'evidence': [], 'plan': [],}]}"#;
284 let repaired = attempt_json_repair(input).expect("should repair");
285 let _: QueueFile = serde_json::from_str(&repaired).expect("repaired should be valid JSON");
286 assert!(repaired.contains("\"version\""));
287 assert!(repaired.contains("\"tasks\""));
288 assert!(repaired.contains("\"id\""));
289 assert!(repaired.contains("\"RQ-0001\""));
290 }
291
292 #[test]
293 fn attempt_json_repair_escapes_double_quotes_in_single_quoted_strings() {
294 let input = r#"{'version': 1, 'tasks': [{'id': 'RQ-0001', 'title': 'Say "hello"'}]}"#;
296 let repaired = attempt_json_repair(input).expect("should repair");
297 assert!(repaired.contains("\"Say \\\"hello\\\"\""));
298 }
299
300 #[test]
301 fn attempt_json_repair_handles_empty_single_quoted_string() {
302 let input = r#"{'version': 1, 'tasks': [{'id': '', 'title': ''}]}"#;
303 let repaired = attempt_json_repair(input).expect("should repair");
304 let _: QueueFile = serde_json::from_str(&repaired).expect("repaired should be valid JSON");
305 assert!(repaired.contains("\"id\": \"\""));
306 }
307
308 #[test]
309 fn attempt_json_repair_preserves_single_quote_then_unquoted_key_order() {
310 let input = r#"{'version': 1, 'tasks': [{'id': 'RQ-0001', 'title': 'Test', 'status': 'todo', 'tags': [], 'scope': [], 'evidence': [], 'plan': [], 'created_at': '2026-01-01T00:00:00Z', 'updated_at': '2026-01-01T00:00:00Z'}]}"#;
311 let repaired = attempt_json_repair(input).expect("should repair");
312 assert!(repaired.contains(r#""tasks""#));
313 assert!(repaired.contains(r#""id": "RQ-0001""#));
314 let _: QueueFile = serde_json::from_str(&repaired).expect("repaired should parse as JSON");
315 }
316
317 #[test]
318 fn attempt_json_repair_handles_multiple_ordered_errors() {
319 let input = r#"{'version': 1, tasks: [{id: 'RQ-0001', title: 'A', status: 'todo', tags: ['bug',], scope: [], evidence: [], plan: [], created_at: '2026-01-01T00:00:00Z', updated_at: '2026-01-01T00:00:00Z'}]}"#;
320 let repaired = attempt_json_repair(input).expect("should repair");
321 let _parsed: QueueFile =
322 serde_json::from_str(&repaired).expect("repaired should parse as JSON");
323 assert!(repaired.contains(r#""version""#));
324 assert!(repaired.contains(r#""tasks""#));
325 assert!(repaired.contains(r#""title""#));
326 assert!(repaired.contains(r#""tags": ["bug"]"#));
327 }
328
329 #[test]
330 #[ignore = "perf-smoke: run manually when tuning hot-path: cargo test -p ralph-agent-loop queue::json_repair::tests::attempt_json_repair_perf_smoke -- --ignored"]
331 fn attempt_json_repair_perf_smoke() {
332 let input = r#"{'version': 1, 'tasks': [{'id': 'RQ-0001', 'title': 'A', 'status': 'todo', 'scope': ['x',], 'evidence': ['a',], 'plan': ['x',], 'created_at': '2026-01-01T00:00:00Z', 'updated_at': '2026-01-01T00:00:00Z'}]}"#;
333 let start = std::time::Instant::now();
334 for _ in 0..20_000 {
335 let _ = attempt_json_repair(input);
336 }
337 let _elapsed = start.elapsed();
338 }
339}