use regex::Regex;
use std::sync::LazyLock;
static SINGLE_QUOTED_STRING_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"(^|[^a-zA-Z0-9])'([^']*?)'([^a-zA-Z0-9]|$)").unwrap());
static UNQUOTED_KEY_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"([{,]\s*)([a-zA-Z_][a-zA-Z0-9_]*)\s*:").unwrap());
static TRAILING_COMMA_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r",(\s*[}\]])").unwrap());
static TRAILING_COMMA_NEWLINE_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r",(\s*)\n(\s*[}\]])").unwrap());
pub fn attempt_json_repair(raw: &str) -> Option<String> {
let mut repaired = raw.to_string();
let original = raw.to_string();
if SINGLE_QUOTED_STRING_RE.is_match(&repaired) {
log::debug!("JSON repair: converting single-quoted strings to double-quoted");
repaired = SINGLE_QUOTED_STRING_RE
.replace_all(&repaired, |caps: ®ex::Captures| {
let prefix = &caps[1];
let content = &caps[2];
let suffix = &caps[3];
let escaped = content.replace('"', "\\\"");
format!("{}\"{}\"{}", prefix, escaped, suffix)
})
.to_string();
}
if UNQUOTED_KEY_RE.is_match(&repaired) {
log::debug!("JSON repair: adding quotes around unquoted object keys");
repaired = UNQUOTED_KEY_RE
.replace_all(&repaired, "$1\"$2\":")
.to_string();
}
repaired = repair_unescaped_newlines(&repaired);
repaired = repair_unescaped_quotes(&repaired);
if TRAILING_COMMA_RE.is_match(&repaired) {
log::debug!("JSON repair: removing trailing commas");
repaired = TRAILING_COMMA_RE.replace_all(&repaired, "$1").to_string();
}
if TRAILING_COMMA_NEWLINE_RE.is_match(&repaired) {
log::debug!("JSON repair: removing trailing commas before newlines");
repaired = TRAILING_COMMA_NEWLINE_RE
.replace_all(&repaired, "$1\n$2")
.to_string();
}
let open_brackets = repaired.matches('[').count();
let close_brackets = repaired.matches(']').count();
let open_braces = repaired.matches('{').count();
let close_braces = repaired.matches('}').count();
if open_brackets > close_brackets {
log::debug!(
"JSON repair: adding {} missing closing bracket(s)",
open_brackets - close_brackets
);
repaired.push_str(&"]".repeat(open_brackets - close_brackets));
}
if open_braces > close_braces {
log::debug!(
"JSON repair: adding {} missing closing brace(s)",
open_braces - close_braces
);
repaired.push_str(&"}".repeat(open_braces - close_braces));
}
if repaired != original {
Some(repaired)
} else {
None
}
}
fn repair_unescaped_newlines(raw: &str) -> String {
let mut result = String::with_capacity(raw.len());
let mut in_string = false;
let mut escaped = false;
for ch in raw.chars() {
if escaped {
result.push(ch);
escaped = false;
continue;
}
match ch {
'\\' => {
escaped = true;
result.push(ch);
}
'"' => {
in_string = !in_string;
result.push(ch);
}
'\n' if in_string => {
log::trace!("JSON repair: escaping unescaped newline in string");
result.push_str("\\n");
}
'\r' if in_string => {
log::trace!("JSON repair: escaping unescaped carriage return in string");
result.push_str("\\r");
}
_ => {
result.push(ch);
}
}
}
result
}
fn repair_unescaped_quotes(raw: &str) -> String {
raw.to_string()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::contracts::QueueFile;
#[test]
fn attempt_json_repair_fixes_trailing_comma_in_array() {
let input = r#"{"tasks": [{"id": "RQ-0001", "tags": ["a", "b",]}]}"#;
let repaired = attempt_json_repair(input).expect("should repair");
assert!(repaired.contains("\"tags\": [\"a\", \"b\"]"));
assert!(!repaired.contains("\"b\","));
}
#[test]
fn attempt_json_repair_fixes_trailing_comma_in_object() {
let input = r#"{"tasks": [{"id": "RQ-0001", "title": "Test",}]}"#;
let repaired = attempt_json_repair(input).expect("should repair");
assert!(repaired.contains("\"title\": \"Test\"}"));
assert!(!repaired.contains("\"Test\","));
}
#[test]
fn attempt_json_repair_returns_none_for_valid_json() {
let input = r#"{"tasks": [{"id": "RQ-0001", "title": "Test"}]}"#;
assert!(attempt_json_repair(input).is_none());
}
#[test]
fn attempt_json_repair_fixes_multiple_trailing_commas() {
let input = r#"{"version": 1, "tasks": [{"id": "RQ-0001", "title": "Test", "status": "todo", "tags": ["a", "b",], "scope": ["file",],}]}"#;
let repaired = attempt_json_repair(input).expect("should repair");
let _: QueueFile = serde_json::from_str(&repaired).expect("repaired should be valid JSON");
}
#[test]
fn attempt_json_repair_fixes_single_quoted_strings() {
let input = r#"{'version': 1, 'tasks': [{'id': 'RQ-0001', 'title': 'Test'}]}"#;
let repaired = attempt_json_repair(input).expect("should repair");
let _: QueueFile = serde_json::from_str(&repaired).expect("repaired should be valid JSON");
assert!(repaired.contains("\"version\""));
assert!(repaired.contains("\"tasks\""));
assert!(repaired.contains("\"id\""));
assert!(repaired.contains("\"RQ-0001\""));
assert!(repaired.contains("\"title\""));
assert!(repaired.contains("\"Test\""));
}
#[test]
fn attempt_json_repair_preserves_apostrophes_in_words() {
let input = r#"{"tasks": [{"id": "RQ-0001", "title": "Don't break this"}]}"#;
assert!(attempt_json_repair(input).is_none());
}
#[test]
fn attempt_json_repair_fixes_unquoted_object_keys() {
let input = r#"{version: 1, tasks: [{id: "RQ-0001", title: "Test"}]}"#;
let repaired = attempt_json_repair(input).expect("should repair");
let _: QueueFile = serde_json::from_str(&repaired).expect("repaired should be valid JSON");
assert!(repaired.contains("\"version\""));
assert!(repaired.contains("\"tasks\""));
assert!(repaired.contains("\"id\""));
assert!(repaired.contains("\"title\""));
}
#[test]
fn attempt_json_repair_fixes_unquoted_keys_after_comma() {
let input =
r#"{"version": 1, tasks: [{"id": "RQ-0001", "title": "Test", status: "todo"}]}"#;
let repaired = attempt_json_repair(input).expect("should repair");
let _: QueueFile = serde_json::from_str(&repaired).expect("repaired should be valid JSON");
assert!(repaired.contains("\"tasks\""));
assert!(repaired.contains("\"status\""));
}
#[test]
fn attempt_json_repair_fixes_unescaped_newlines_in_strings() {
let input = "{\"version\": 1, \"tasks\": [{\"id\": \"RQ-0001\", \"title\": \"Line one\nLine two\"}]}";
let repaired = attempt_json_repair(input).expect("should repair");
assert!(repaired.contains("Line one\\nLine two"));
assert!(!repaired.contains("Line one\nLine two"));
let _: QueueFile = serde_json::from_str(&repaired).expect("repaired should be valid JSON");
}
#[test]
fn attempt_json_repair_fixes_unescaped_carriage_returns_in_strings() {
let input = "{\"version\": 1, \"tasks\": [{\"id\": \"RQ-0001\", \"title\": \"Line one\rLine two\"}]}";
let repaired = attempt_json_repair(input).expect("should repair");
assert!(repaired.contains("Line one\\rLine two"));
assert!(!repaired.contains("Line one\rLine two"));
}
#[test]
fn attempt_json_repair_handles_multiple_errors() {
let input = r#"{'version': 1, tasks: [{'id': 'RQ-0001', 'title': 'Test', 'status': 'todo', 'tags': [], 'scope': [], 'evidence': [], 'plan': [],}]}"#;
let repaired = attempt_json_repair(input).expect("should repair");
let _: QueueFile = serde_json::from_str(&repaired).expect("repaired should be valid JSON");
assert!(repaired.contains("\"version\""));
assert!(repaired.contains("\"tasks\""));
assert!(repaired.contains("\"id\""));
assert!(repaired.contains("\"RQ-0001\""));
}
#[test]
fn attempt_json_repair_escapes_double_quotes_in_single_quoted_strings() {
let input = r#"{'version': 1, 'tasks': [{'id': 'RQ-0001', 'title': 'Say "hello"'}]}"#;
let repaired = attempt_json_repair(input).expect("should repair");
assert!(repaired.contains("\"Say \\\"hello\\\"\""));
}
#[test]
fn attempt_json_repair_handles_empty_single_quoted_string() {
let input = r#"{'version': 1, 'tasks': [{'id': '', 'title': ''}]}"#;
let repaired = attempt_json_repair(input).expect("should repair");
let _: QueueFile = serde_json::from_str(&repaired).expect("repaired should be valid JSON");
assert!(repaired.contains("\"id\": \"\""));
}
#[test]
fn attempt_json_repair_preserves_single_quote_then_unquoted_key_order() {
let input = r#"{'version': 1, 'tasks': [{'id': 'RQ-0001', 'title': 'Test', 'status': 'todo', 'tags': [], 'scope': [], 'evidence': [], 'plan': [], 'created_at': '2026-01-01T00:00:00Z', 'updated_at': '2026-01-01T00:00:00Z'}]}"#;
let repaired = attempt_json_repair(input).expect("should repair");
assert!(repaired.contains(r#""tasks""#));
assert!(repaired.contains(r#""id": "RQ-0001""#));
let _: QueueFile = serde_json::from_str(&repaired).expect("repaired should parse as JSON");
}
#[test]
fn attempt_json_repair_handles_multiple_ordered_errors() {
let input = r#"{'version': 1, tasks: [{id: 'RQ-0001', title: 'A', status: 'todo', tags: ['bug',], scope: [], evidence: [], plan: [], created_at: '2026-01-01T00:00:00Z', updated_at: '2026-01-01T00:00:00Z'}]}"#;
let repaired = attempt_json_repair(input).expect("should repair");
let _parsed: QueueFile =
serde_json::from_str(&repaired).expect("repaired should parse as JSON");
assert!(repaired.contains(r#""version""#));
assert!(repaired.contains(r#""tasks""#));
assert!(repaired.contains(r#""title""#));
assert!(repaired.contains(r#""tags": ["bug"]"#));
}
#[test]
#[ignore = "perf-smoke: run manually when tuning hot-path: cargo test -p ralph-agent-loop queue::json_repair::tests::attempt_json_repair_perf_smoke -- --ignored"]
fn attempt_json_repair_perf_smoke() {
let input = r#"{'version': 1, 'tasks': [{'id': 'RQ-0001', 'title': 'A', 'status': 'todo', 'scope': ['x',], 'evidence': ['a',], 'plan': ['x',], 'created_at': '2026-01-01T00:00:00Z', 'updated_at': '2026-01-01T00:00:00Z'}]}"#;
let start = std::time::Instant::now();
for _ in 0..20_000 {
let _ = attempt_json_repair(input);
}
let _elapsed = start.elapsed();
}
}