use fancy_regex::{Captures, Regex};
use serde_json;
#[derive(Debug)]
pub struct FixReport {
pub original: String,
pub fixed: String,
pub steps: Vec<String>,
pub success: bool,
}
#[derive(Debug)]
enum FixStep {
RemoveEscapedQuoteComma,
}
fn apply_step<F: FnOnce(String) -> String>(input: String, _step: FixStep, f: F) -> String {
f(input)
}
pub fn fix_json_syntax(input: &str) -> FixReport {
let mut steps = Vec::new();
let mut fixed = input.trim().to_string();
if fixed.starts_with("```json") || fixed.starts_with("```") {
fixed = fixed
.trim_start_matches("```json")
.trim_start_matches("```")
.trim_end_matches("```")
.trim()
.to_string();
steps.push("Stripped markdown wrappers or triple quotes".to_string());
}
let re_missing_commas = Regex::new(r#"(\"[^\"]+\"\s*:\s*\"[^\"]+\")\s+\""#).unwrap();
let new_fixed = re_missing_commas.replace_all(&fixed, "$1,\n\"").to_string();
if new_fixed != fixed {
fixed = new_fixed;
steps.push("Fixed missing commas between fields".to_string());
}
let new_fixed = fixed.replace("}\n{", "},\n{");
if new_fixed != fixed {
fixed = new_fixed;
steps.push("Fixed adjacent object blocks".to_string());
}
let re_adjacent_quoted = Regex::new(r#""\s+""#).unwrap();
let new_fixed = re_adjacent_quoted
.replace_all(&fixed, "\",\n\"")
.to_string();
if new_fixed != fixed {
fixed = new_fixed;
steps.push("Fixed generic missing commas between quoted values".to_string());
}
let re_trailing_commas = Regex::new(r",\s*([\]}])").unwrap();
let new_fixed = re_trailing_commas.replace_all(&fixed, "$1").to_string();
if new_fixed != fixed {
fixed = new_fixed;
steps.push("Removed trailing commas".to_string());
}
let re_single_quotes = Regex::new(r"'([^']*)'").unwrap();
let new_fixed = re_single_quotes.replace_all(&fixed, "\"$1\"").to_string();
if new_fixed != fixed {
fixed = new_fixed;
steps.push("Converted single quotes to double quotes".to_string());
}
let new_fixed = fixed
.replace('“', "\"")
.replace('”', "\"")
.replace('‘', "'")
.replace('’', "'");
if new_fixed != fixed {
fixed = new_fixed;
steps.push("Converted curly quotes and weird symbols".to_string());
}
let re_broken_contractions = Regex::new(r#"(\b\w+)"(\w+)"#).unwrap();
let new_fixed = re_broken_contractions
.replace_all(&fixed, "$1'$2")
.to_string();
if new_fixed != fixed {
fixed = new_fixed;
steps.push("Fixed broken contractions written with double quotes".to_string());
}
let re_broken_apostrophes = Regex::new(r#"(\w)"([sdmt])\b"#).unwrap();
let new_fixed = re_broken_apostrophes
.replace_all(&fixed, "$1'$2")
.to_string();
if new_fixed != fixed {
fixed = new_fixed;
steps.push("Fixed broken apostrophes written as quotes".to_string());
}
if fixed.starts_with('\"') && fixed.ends_with('\"') {
if let Ok(unescaped) = serde_json::from_str::<String>(&fixed) {
if unescaped != fixed {
fixed = unescaped;
steps.push("Handled escaped stringified JSON".to_string());
}
}
}
let re_unquoted_keys = Regex::new(r#"(?m)(^|[{,\s])(\w+)(\s*:\s*)""#).unwrap();
let new_fixed = re_unquoted_keys
.replace_all(&fixed, "$1\"$2\"$3")
.to_string();
if new_fixed != fixed {
fixed = new_fixed;
steps.push("Quoted unquoted keys".to_string());
}
let re_unescaped_inner_quotes = Regex::new(r#":\s*"([^"]*?)"([^\\"][^"]*?)""#).unwrap();
let new_fixed = re_unescaped_inner_quotes
.replace_all(&fixed, r#": "$1\"$2""#)
.to_string();
if new_fixed != fixed {
fixed = new_fixed;
steps.push("Escaped unescaped inner double quotes within values".to_string());
}
let re_invalid_escapes = Regex::new(r#"\\[^"\\/bfnrt]"#).unwrap();
let new_fixed = re_invalid_escapes.replace_all(&fixed, "").to_string();
if new_fixed != fixed {
fixed = new_fixed;
steps.push("Removed invalid escape sequences".to_string());
}
let re_multiline_strings = Regex::new(r#""([^"]*?)\n([^"]*?)""#).unwrap();
let new_fixed = re_multiline_strings
.replace_all(&fixed, |caps: &Captures| {
let first = &caps[1].replace('\n', "\\n");
let second = &caps[2].replace('\n', "\\n");
format!("\"{}\\n{}\"", first, second)
})
.to_string();
if new_fixed != fixed {
fixed = new_fixed;
steps.push("Converted raw newlines in strings to \\n".to_string());
}
let re_embedded_key_start = Regex::new(
r#""(?P<key1>\w+)"\s*:\s*"(?P<val>[^"]*?),\s*\\?"(?P<key2>\w+)"\s*:\s*(?P<val2>[^"{}\[\],]+)"#
).unwrap();
let new_fixed = re_embedded_key_start
.replace_all(&fixed, |caps: &Captures| {
let key1 = &caps["key1"];
let val = &caps["val"];
let key2 = &caps["key2"];
let val2 = &caps["val2"];
format!(r#""{}": "{}", "{}": {}"#, key1, val.trim(), key2, val2)
})
.to_string();
if new_fixed != fixed {
fixed = new_fixed;
steps.push("Fixed embedded key-value pair trapped inside string".to_string());
}
let re_embedded_key_start = Regex::new(
r#""(?P<key1>\w+)"\s*:\s*"(?P<val>[^"]*?),\s*"(?P<key2>\w+)"\s*:\s*(?P<val2>[^"{}\[\],]+)"#,
)
.unwrap();
let new_fixed = re_embedded_key_start
.replace_all(&fixed, |caps: &Captures| {
let key1 = &caps["key1"];
let val = &caps["val"];
let key2 = &caps["key2"];
let val2 = &caps["val2"];
format!(
r#""{}": "{}", "{}": {}"#,
key1,
val.trim(),
key2,
val2.trim()
)
})
.to_string();
if new_fixed != fixed {
fixed = new_fixed;
steps.push("Fixed embedded key start inside unescaped value".to_string());
}
let re_misescaped = Regex::new(r#"(?P<key>:\s*")(?P<val>[^"]*?)\\",\s*(?P<rest>")"#).unwrap();
let new_fixed = re_misescaped
.replace_all(&fixed, "${key}${val}, ${rest}")
.to_string();
if new_fixed != fixed {
fixed = new_fixed;
steps.push("Fixed misescaped internal quote sequences".to_string());
}
let re_inner_unescaped_quotes =
Regex::new(r#"(".*?:\s*")((?:[^"\\]|\\.)*?)"((?:[^"\\]|\\.)*?)""#).unwrap();
let new_fixed = re_inner_unescaped_quotes
.replace_all(&fixed, "$1$2\\\"$3\"")
.to_string();
if new_fixed != fixed {
fixed = new_fixed;
steps.push("Escaped unescaped double quotes inside string values".to_string());
}
let mut stack = vec![];
let mut cleaned = String::new();
for c in fixed.chars() {
match c {
'{' | '[' => {
stack.push(c);
cleaned.push(c);
}
'}' => {
if stack.last() == Some(&'{') {
stack.pop();
cleaned.push('}');
}
}
']' => {
if stack.last() == Some(&'[') {
stack.pop();
cleaned.push(']');
}
}
_ => cleaned.push(c),
}
}
while let Some(c) = stack.pop() {
match c {
'{' => cleaned.push('}'),
'[' => cleaned.push(']'),
_ => {}
}
}
if cleaned != fixed {
fixed = cleaned;
steps.push("Auto-fixed dangling or mismatched brackets/braces".to_string());
}
let re_stray_array_linebreaks = Regex::new(r#"(\"\s*),\s*\\n\s*(\")"#).unwrap();
let new_fixed = re_stray_array_linebreaks
.replace_all(&fixed, "$1, $2")
.to_string();
if new_fixed != fixed {
fixed = new_fixed;
steps.push("Normalized line breaks between quoted array elements".to_string());
}
let re_array_line_merger = Regex::new(r#"\",\s*\\n\s*\""#).unwrap();
let new_fixed = re_array_line_merger
.replace_all(&fixed, "\", \"")
.to_string();
if new_fixed != fixed {
fixed = new_fixed;
steps.push("Normalized overly escaped array strings with embedded linebreaks".to_string());
}
let re_linebreaks_between_items = Regex::new(r#"\",\s*\n\s*\""#).unwrap();
let new_fixed = re_linebreaks_between_items
.replace_all(&fixed, "\", \"")
.to_string();
if new_fixed != fixed {
fixed = new_fixed;
steps.push("Normalized line breaks between array items".to_string());
}
fixed = apply_step(fixed, FixStep::RemoveEscapedQuoteComma, |s| {
s.replace("\\\",", "\",")
});
let success = if let Ok(val) = serde_json::from_str::<serde_json::Value>(&fixed) {
if let Ok(re) = serde_json::to_string_pretty(&val) {
fixed = re;
true
} else {
false
}
} else {
if let Err(e) = serde_json::from_str::<serde_json::Value>(&fixed) {
println!("❌ Final JSON parse error: {}", e);
let line = e.line();
let column = e.column();
println!("📍 Error occurred at line {}, column {}", line, column);
println!("📍 Faulty fixed JSON:\n{:#?}", fixed);
}
false
};
FixReport {
original: input.to_string(),
fixed,
steps,
success,
}
}