Skip to main content

ralph/queue/
json_repair.rs

1//! JSON repair utilities for fixing common agent-induced JSON errors.
2//!
3//! Responsibilities:
4//! - Attempt to repair malformed JSON caused by common agent mistakes.
5//! - Fix single-quoted strings, unquoted object keys, trailing commas,
6//!   unescaped newlines, and missing closing brackets/braces.
7//!
8//! Not handled here:
9//! - JSONC parsing with comments (handled by `crate::jsonc`).
10//! - Semantic validation of queue content.
11//!
12//! Invariants/assumptions:
13//! - Repair functions return `None` if no changes were made.
14//! - Repairs are conservative; they should not make valid JSON invalid.
15
16use regex::Regex;
17use std::sync::LazyLock;
18
19static SINGLE_QUOTED_STRING_RE: LazyLock<Regex> =
20    LazyLock::new(|| Regex::new(r"(^|[^a-zA-Z0-9])'([^']*?)'([^a-zA-Z0-9]|$)").unwrap());
21
22static UNQUOTED_KEY_RE: LazyLock<Regex> =
23    LazyLock::new(|| Regex::new(r"([{,]\s*)([a-zA-Z_][a-zA-Z0-9_]*)\s*:").unwrap());
24
25static TRAILING_COMMA_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r",(\s*[}\]])").unwrap());
26
27static TRAILING_COMMA_NEWLINE_RE: LazyLock<Regex> =
28    LazyLock::new(|| Regex::new(r",(\s*)\n(\s*[}\]])").unwrap());
29
30/// Attempt to repair common JSON errors induced by agents.
31/// Returns Some(repaired_json) if repairs were made, None if no repairs possible.
32pub fn attempt_json_repair(raw: &str) -> Option<String> {
33    let mut repaired = raw.to_string();
34    let original = raw.to_string();
35
36    // Repair 1: Convert single-quoted strings to double-quoted
37    // Pattern: 'value' (but not apostrophes within words like "don't")
38    // We match single quotes that appear to be string delimiters
39    // Match '...' where the content doesn't contain ' and is not preceded/followed by alphanumeric
40    // Use ^ or non-alphanumeric before, and non-alphanumeric or $ after
41    if SINGLE_QUOTED_STRING_RE.is_match(&repaired) {
42        log::debug!("JSON repair: converting single-quoted strings to double-quoted");
43        repaired = SINGLE_QUOTED_STRING_RE
44            .replace_all(&repaired, |caps: &regex::Captures| {
45                let prefix = &caps[1];
46                let content = &caps[2];
47                let suffix = &caps[3];
48                let escaped = content.replace('"', "\\\"");
49                format!("{}\"{}\"{}", prefix, escaped, suffix)
50            })
51            .to_string();
52    }
53
54    // Repair 2: Add missing quotes around unquoted object keys
55    // Pattern: {[ or , followed by whitespace, then identifier followed by colon
56    // Matches: {key: or ,key: or { key: or , key:
57    if UNQUOTED_KEY_RE.is_match(&repaired) {
58        log::debug!("JSON repair: adding quotes around unquoted object keys");
59        repaired = UNQUOTED_KEY_RE
60            .replace_all(&repaired, "$1\"$2\":")
61            .to_string();
62    }
63
64    // Repair 3: Fix unescaped newlines within string values
65    // This is a common error when agents paste multi-line content
66    // We need to find newlines that are inside string contexts and escape them
67    repaired = repair_unescaped_newlines(&repaired);
68
69    // Repair 4: Fix unescaped quotes within string values
70    // Find quotes inside strings that aren't escaped and escape them
71    repaired = repair_unescaped_quotes(&repaired);
72
73    // Repair 5: Remove trailing commas before ] or }
74    // Pattern: ,\s*] or ,\s*}
75    if TRAILING_COMMA_RE.is_match(&repaired) {
76        log::debug!("JSON repair: removing trailing commas");
77        repaired = TRAILING_COMMA_RE.replace_all(&repaired, "$1").to_string();
78    }
79
80    // Repair 6: Remove trailing commas at end of arrays/objects (more aggressive)
81    // This handles cases where there might be newlines between comma and bracket
82    // Pattern: ,(\s*)\n(\s*[}\]])
83    if TRAILING_COMMA_NEWLINE_RE.is_match(&repaired) {
84        log::debug!("JSON repair: removing trailing commas before newlines");
85        repaired = TRAILING_COMMA_NEWLINE_RE
86            .replace_all(&repaired, "$1\n$2")
87            .to_string();
88    }
89
90    // Repair 7: Fix missing closing bracket at end of file
91    let open_brackets = repaired.matches('[').count();
92    let close_brackets = repaired.matches(']').count();
93    let open_braces = repaired.matches('{').count();
94    let close_braces = repaired.matches('}').count();
95
96    if open_brackets > close_brackets {
97        log::debug!(
98            "JSON repair: adding {} missing closing bracket(s)",
99            open_brackets - close_brackets
100        );
101        repaired.push_str(&"]".repeat(open_brackets - close_brackets));
102    }
103    if open_braces > close_braces {
104        log::debug!(
105            "JSON repair: adding {} missing closing brace(s)",
106            open_braces - close_braces
107        );
108        repaired.push_str(&"}".repeat(open_braces - close_braces));
109    }
110
111    if repaired != original {
112        Some(repaired)
113    } else {
114        None
115    }
116}
117
118/// Fix unescaped newlines within JSON string values.
119/// Uses a simple state machine to track whether we're inside a string.
120fn repair_unescaped_newlines(raw: &str) -> String {
121    let mut result = String::with_capacity(raw.len());
122    let mut in_string = false;
123    let mut escaped = false;
124
125    for ch in raw.chars() {
126        if escaped {
127            // Previous char was backslash, this char is escaped
128            result.push(ch);
129            escaped = false;
130            continue;
131        }
132
133        match ch {
134            '\\' => {
135                escaped = true;
136                result.push(ch);
137            }
138            '"' => {
139                in_string = !in_string;
140                result.push(ch);
141            }
142            '\n' if in_string => {
143                // Newline inside string - escape it
144                log::trace!("JSON repair: escaping unescaped newline in string");
145                result.push_str("\\n");
146            }
147            '\r' if in_string => {
148                // Carriage return inside string - escape it
149                log::trace!("JSON repair: escaping unescaped carriage return in string");
150                result.push_str("\\r");
151            }
152            _ => {
153                result.push(ch);
154            }
155        }
156    }
157
158    result
159}
160
161/// Placeholder for future unescaped quote repair within JSON string values.
162///
163/// Currently tracks string state but does not modify quotes. Properly escaping
164/// internal quotes requires look-ahead heuristics to distinguish between:
165/// - Quotes that close a string (followed by structural chars like `:`, `,`, `}`, `]`)
166/// - Quotes that are content and need escaping (followed by other chars)
167///
168/// This is a complex repair that risks over-escaping. For now, this function
169/// passes through unchanged to avoid making valid JSON invalid.
170fn repair_unescaped_quotes(raw: &str) -> String {
171    // Future implementation: use look-ahead to determine if a quote inside
172    // a string should be escaped or is closing the string.
173    raw.to_string()
174}
175
176#[cfg(test)]
177mod tests {
178    use super::*;
179    use crate::contracts::QueueFile;
180
181    #[test]
182    fn attempt_json_repair_fixes_trailing_comma_in_array() {
183        let input = r#"{"tasks": [{"id": "RQ-0001", "tags": ["a", "b",]}]}"#;
184        let repaired = attempt_json_repair(input).expect("should repair");
185        assert!(repaired.contains("\"tags\": [\"a\", \"b\"]"));
186        assert!(!repaired.contains("\"b\","));
187    }
188
189    #[test]
190    fn attempt_json_repair_fixes_trailing_comma_in_object() {
191        let input = r#"{"tasks": [{"id": "RQ-0001", "title": "Test",}]}"#;
192        let repaired = attempt_json_repair(input).expect("should repair");
193        assert!(repaired.contains("\"title\": \"Test\"}"));
194        assert!(!repaired.contains("\"Test\","));
195    }
196
197    #[test]
198    fn attempt_json_repair_returns_none_for_valid_json() {
199        let input = r#"{"tasks": [{"id": "RQ-0001", "title": "Test"}]}"#;
200        assert!(attempt_json_repair(input).is_none());
201    }
202
203    #[test]
204    fn attempt_json_repair_fixes_multiple_trailing_commas() {
205        // Test with a complete valid task structure that includes all required fields
206        let input = r#"{"version": 1, "tasks": [{"id": "RQ-0001", "title": "Test", "status": "todo", "tags": ["a", "b",], "scope": ["file",],}]}"#;
207        let repaired = attempt_json_repair(input).expect("should repair");
208        // Verify it's valid JSON
209        let _: QueueFile = serde_json::from_str(&repaired).expect("repaired should be valid JSON");
210    }
211
212    // Tests for enhanced JSON repair (RQ-0362)
213
214    #[test]
215    fn attempt_json_repair_fixes_single_quoted_strings() {
216        let input = r#"{'version': 1, 'tasks': [{'id': 'RQ-0001', 'title': 'Test'}]}"#;
217        let repaired = attempt_json_repair(input).expect("should repair");
218        // Verify it's valid JSON
219        let _: QueueFile = serde_json::from_str(&repaired).expect("repaired should be valid JSON");
220        // Check specific conversions
221        assert!(repaired.contains("\"version\""));
222        assert!(repaired.contains("\"tasks\""));
223        assert!(repaired.contains("\"id\""));
224        assert!(repaired.contains("\"RQ-0001\""));
225        assert!(repaired.contains("\"title\""));
226        assert!(repaired.contains("\"Test\""));
227    }
228
229    #[test]
230    fn attempt_json_repair_preserves_apostrophes_in_words() {
231        // Apostrophes within words (like "don't") should not be converted
232        let input = r#"{"tasks": [{"id": "RQ-0001", "title": "Don't break this"}]}"#;
233        // This is valid JSON, so no repair needed
234        assert!(attempt_json_repair(input).is_none());
235    }
236
237    #[test]
238    fn attempt_json_repair_fixes_unquoted_object_keys() {
239        let input = r#"{version: 1, tasks: [{id: "RQ-0001", title: "Test"}]}"#;
240        let repaired = attempt_json_repair(input).expect("should repair");
241        // Verify it's valid JSON
242        let _: QueueFile = serde_json::from_str(&repaired).expect("repaired should be valid JSON");
243        // Check keys are quoted
244        assert!(repaired.contains("\"version\""));
245        assert!(repaired.contains("\"tasks\""));
246        assert!(repaired.contains("\"id\""));
247        assert!(repaired.contains("\"title\""));
248    }
249
250    #[test]
251    fn attempt_json_repair_fixes_unquoted_keys_after_comma() {
252        let input =
253            r#"{"version": 1, tasks: [{"id": "RQ-0001", "title": "Test", status: "todo"}]}"#;
254        let repaired = attempt_json_repair(input).expect("should repair");
255        let _: QueueFile = serde_json::from_str(&repaired).expect("repaired should be valid JSON");
256        assert!(repaired.contains("\"tasks\""));
257        assert!(repaired.contains("\"status\""));
258    }
259
260    #[test]
261    fn attempt_json_repair_fixes_unescaped_newlines_in_strings() {
262        // Agent pastes multi-line content without escaping
263        let input = "{\"version\": 1, \"tasks\": [{\"id\": \"RQ-0001\", \"title\": \"Line one\nLine two\"}]}";
264        let repaired = attempt_json_repair(input).expect("should repair");
265        // Newlines should be escaped
266        assert!(repaired.contains("Line one\\nLine two"));
267        assert!(!repaired.contains("Line one\nLine two"));
268        // Verify it's valid JSON
269        let _: QueueFile = serde_json::from_str(&repaired).expect("repaired should be valid JSON");
270    }
271
272    #[test]
273    fn attempt_json_repair_fixes_unescaped_carriage_returns_in_strings() {
274        let input = "{\"version\": 1, \"tasks\": [{\"id\": \"RQ-0001\", \"title\": \"Line one\rLine two\"}]}";
275        let repaired = attempt_json_repair(input).expect("should repair");
276        assert!(repaired.contains("Line one\\rLine two"));
277        assert!(!repaired.contains("Line one\rLine two"));
278    }
279
280    #[test]
281    fn attempt_json_repair_handles_multiple_errors() {
282        // Combine multiple errors: single quotes, unquoted keys, trailing comma
283        let input = r#"{'version': 1, tasks: [{'id': 'RQ-0001', 'title': 'Test', 'status': 'todo', 'tags': [], 'scope': [], 'evidence': [], 'plan': [],}]}"#;
284        let repaired = attempt_json_repair(input).expect("should repair");
285        let _: QueueFile = serde_json::from_str(&repaired).expect("repaired should be valid JSON");
286        assert!(repaired.contains("\"version\""));
287        assert!(repaired.contains("\"tasks\""));
288        assert!(repaired.contains("\"id\""));
289        assert!(repaired.contains("\"RQ-0001\""));
290    }
291
292    #[test]
293    fn attempt_json_repair_escapes_double_quotes_in_single_quoted_strings() {
294        // Single-quoted string containing double quotes should escape them
295        let input = r#"{'version': 1, 'tasks': [{'id': 'RQ-0001', 'title': 'Say "hello"'}]}"#;
296        let repaired = attempt_json_repair(input).expect("should repair");
297        assert!(repaired.contains("\"Say \\\"hello\\\"\""));
298    }
299
300    #[test]
301    fn attempt_json_repair_handles_empty_single_quoted_string() {
302        let input = r#"{'version': 1, 'tasks': [{'id': '', 'title': ''}]}"#;
303        let repaired = attempt_json_repair(input).expect("should repair");
304        let _: QueueFile = serde_json::from_str(&repaired).expect("repaired should be valid JSON");
305        assert!(repaired.contains("\"id\": \"\""));
306    }
307
308    #[test]
309    fn attempt_json_repair_preserves_single_quote_then_unquoted_key_order() {
310        let input = r#"{'version': 1, 'tasks': [{'id': 'RQ-0001', 'title': 'Test', 'status': 'todo', 'tags': [], 'scope': [], 'evidence': [], 'plan': [], 'created_at': '2026-01-01T00:00:00Z', 'updated_at': '2026-01-01T00:00:00Z'}]}"#;
311        let repaired = attempt_json_repair(input).expect("should repair");
312        assert!(repaired.contains(r#""tasks""#));
313        assert!(repaired.contains(r#""id": "RQ-0001""#));
314        let _: QueueFile = serde_json::from_str(&repaired).expect("repaired should parse as JSON");
315    }
316
317    #[test]
318    fn attempt_json_repair_handles_multiple_ordered_errors() {
319        let input = r#"{'version': 1, tasks: [{id: 'RQ-0001', title: 'A', status: 'todo', tags: ['bug',], scope: [], evidence: [], plan: [], created_at: '2026-01-01T00:00:00Z', updated_at: '2026-01-01T00:00:00Z'}]}"#;
320        let repaired = attempt_json_repair(input).expect("should repair");
321        let _parsed: QueueFile =
322            serde_json::from_str(&repaired).expect("repaired should parse as JSON");
323        assert!(repaired.contains(r#""version""#));
324        assert!(repaired.contains(r#""tasks""#));
325        assert!(repaired.contains(r#""title""#));
326        assert!(repaired.contains(r#""tags": ["bug"]"#));
327    }
328
329    #[test]
330    #[ignore = "perf-smoke: run manually when tuning hot-path: cargo test -p ralph-agent-loop queue::json_repair::tests::attempt_json_repair_perf_smoke -- --ignored"]
331    fn attempt_json_repair_perf_smoke() {
332        let input = r#"{'version': 1, 'tasks': [{'id': 'RQ-0001', 'title': 'A', 'status': 'todo', 'scope': ['x',], 'evidence': ['a',], 'plan': ['x',], 'created_at': '2026-01-01T00:00:00Z', 'updated_at': '2026-01-01T00:00:00Z'}]}"#;
333        let start = std::time::Instant::now();
334        for _ in 0..20_000 {
335            let _ = attempt_json_repair(input);
336        }
337        let _elapsed = start.elapsed();
338    }
339}