1use fancy_regex::{Captures, Regex};
2use serde_json;
3
4#[derive(Debug)]
7pub struct FixReport {
8 pub original: String,
9 pub fixed: String,
10 pub steps: Vec<String>,
11 pub success: bool,
12}
13
14#[derive(Debug)]
15enum FixStep {
16 RemoveEscapedQuoteComma,
17 }
19
20fn apply_step<F: FnOnce(String) -> String>(input: String, _step: FixStep, f: F) -> String {
21 f(input)
22}
23
24pub fn fix_json_syntax(input: &str) -> FixReport {
25 let mut steps = Vec::new();
26 let mut fixed = input.trim().to_string();
27
28 if fixed.starts_with("```json") || fixed.starts_with("```") {
30 fixed = fixed
31 .trim_start_matches("```json")
32 .trim_start_matches("```")
33 .trim_end_matches("```")
34 .trim()
35 .to_string();
36 steps.push("Stripped markdown wrappers or triple quotes".to_string());
37 }
38
39 let re_missing_commas = Regex::new(r#"(\"[^\"]+\"\s*:\s*\"[^\"]+\")\s+\""#).unwrap();
41 let new_fixed = re_missing_commas.replace_all(&fixed, "$1,\n\"").to_string();
42 if new_fixed != fixed {
43 fixed = new_fixed;
44 steps.push("Fixed missing commas between fields".to_string());
45 }
46
47 let new_fixed = fixed.replace("}\n{", "},\n{");
49 if new_fixed != fixed {
50 fixed = new_fixed;
51 steps.push("Fixed adjacent object blocks".to_string());
52 }
53
54 let re_adjacent_quoted = Regex::new(r#""\s+""#).unwrap();
56 let new_fixed = re_adjacent_quoted
57 .replace_all(&fixed, "\",\n\"")
58 .to_string();
59 if new_fixed != fixed {
60 fixed = new_fixed;
61 steps.push("Fixed generic missing commas between quoted values".to_string());
62 }
63
64 let re_trailing_commas = Regex::new(r",\s*([\]}])").unwrap();
66 let new_fixed = re_trailing_commas.replace_all(&fixed, "$1").to_string();
67 if new_fixed != fixed {
68 fixed = new_fixed;
69 steps.push("Removed trailing commas".to_string());
70 }
71
72 let re_single_quotes = Regex::new(r"'([^']*)'").unwrap();
74 let new_fixed = re_single_quotes.replace_all(&fixed, "\"$1\"").to_string();
75 if new_fixed != fixed {
76 fixed = new_fixed;
77 steps.push("Converted single quotes to double quotes".to_string());
78 }
79
80 let new_fixed = fixed
82 .replace('“', "\"")
83 .replace('”', "\"")
84 .replace('‘', "'")
85 .replace('’', "'");
86 if new_fixed != fixed {
87 fixed = new_fixed;
88 steps.push("Converted curly quotes and weird symbols".to_string());
89 }
90
91 let re_broken_contractions = Regex::new(r#"(\b\w+)"(\w+)"#).unwrap();
95 let new_fixed = re_broken_contractions
96 .replace_all(&fixed, "$1'$2")
97 .to_string();
98 if new_fixed != fixed {
99 fixed = new_fixed;
100 steps.push("Fixed broken contractions written with double quotes".to_string());
101 }
102
103 let re_broken_apostrophes = Regex::new(r#"(\w)"([sdmt])\b"#).unwrap();
105 let new_fixed = re_broken_apostrophes
106 .replace_all(&fixed, "$1'$2")
107 .to_string();
108 if new_fixed != fixed {
109 fixed = new_fixed;
110 steps.push("Fixed broken apostrophes written as quotes".to_string());
111 }
112
113 if fixed.starts_with('\"') && fixed.ends_with('\"') {
115 if let Ok(unescaped) = serde_json::from_str::<String>(&fixed) {
116 if unescaped != fixed {
117 fixed = unescaped;
118 steps.push("Handled escaped stringified JSON".to_string());
119 }
120 }
121 }
122
123 let re_unquoted_keys = Regex::new(r#"(?m)(^|[{,\s])(\w+)(\s*:\s*)""#).unwrap();
125 let new_fixed = re_unquoted_keys
126 .replace_all(&fixed, "$1\"$2\"$3")
127 .to_string();
128 if new_fixed != fixed {
129 fixed = new_fixed;
130 steps.push("Quoted unquoted keys".to_string());
131 }
132
133 let re_unescaped_inner_quotes = Regex::new(r#":\s*"([^"]*?)"([^\\"][^"]*?)""#).unwrap();
137 let new_fixed = re_unescaped_inner_quotes
138 .replace_all(&fixed, r#": "$1\"$2""#)
139 .to_string();
140 if new_fixed != fixed {
141 fixed = new_fixed;
142 steps.push("Escaped unescaped inner double quotes within values".to_string());
143 }
144
145 let re_invalid_escapes = Regex::new(r#"\\[^"\\/bfnrt]"#).unwrap();
147 let new_fixed = re_invalid_escapes.replace_all(&fixed, "").to_string();
148 if new_fixed != fixed {
149 fixed = new_fixed;
150 steps.push("Removed invalid escape sequences".to_string());
151 }
152
153 let re_multiline_strings = Regex::new(r#""([^"]*?)\n([^"]*?)""#).unwrap();
156 let new_fixed = re_multiline_strings
157 .replace_all(&fixed, |caps: &Captures| {
158 let first = &caps[1].replace('\n', "\\n");
159 let second = &caps[2].replace('\n', "\\n");
160 format!("\"{}\\n{}\"", first, second)
161 })
162 .to_string();
163 if new_fixed != fixed {
164 fixed = new_fixed;
165 steps.push("Converted raw newlines in strings to \\n".to_string());
166 }
167
168 let re_embedded_key_start = Regex::new(
170 r#""(?P<key1>\w+)"\s*:\s*"(?P<val>[^"]*?),\s*\\?"(?P<key2>\w+)"\s*:\s*(?P<val2>[^"{}\[\],]+)"#
171 ).unwrap();
172
173 let new_fixed = re_embedded_key_start
174 .replace_all(&fixed, |caps: &Captures| {
175 let key1 = &caps["key1"];
176 let val = &caps["val"];
177 let key2 = &caps["key2"];
178 let val2 = &caps["val2"];
179 format!(r#""{}": "{}", "{}": {}"#, key1, val.trim(), key2, val2)
180 })
181 .to_string();
182
183 if new_fixed != fixed {
184 fixed = new_fixed;
185 steps.push("Fixed embedded key-value pair trapped inside string".to_string());
186 }
187
188 let re_embedded_key_start = Regex::new(
190 r#""(?P<key1>\w+)"\s*:\s*"(?P<val>[^"]*?),\s*"(?P<key2>\w+)"\s*:\s*(?P<val2>[^"{}\[\],]+)"#,
191 )
192 .unwrap();
193
194 let new_fixed = re_embedded_key_start
195 .replace_all(&fixed, |caps: &Captures| {
196 let key1 = &caps["key1"];
197 let val = &caps["val"];
198 let key2 = &caps["key2"];
199 let val2 = &caps["val2"];
200 format!(
201 r#""{}": "{}", "{}": {}"#,
202 key1,
203 val.trim(),
204 key2,
205 val2.trim()
206 )
207 })
208 .to_string();
209 if new_fixed != fixed {
210 fixed = new_fixed;
211 steps.push("Fixed embedded key start inside unescaped value".to_string());
212 }
213
214 let re_misescaped = Regex::new(r#"(?P<key>:\s*")(?P<val>[^"]*?)\\",\s*(?P<rest>")"#).unwrap();
216 let new_fixed = re_misescaped
217 .replace_all(&fixed, "${key}${val}, ${rest}")
218 .to_string();
219 if new_fixed != fixed {
220 fixed = new_fixed;
221 steps.push("Fixed misescaped internal quote sequences".to_string());
222 }
223
224 let re_inner_unescaped_quotes =
228 Regex::new(r#"(".*?:\s*")((?:[^"\\]|\\.)*?)"((?:[^"\\]|\\.)*?)""#).unwrap();
229 let new_fixed = re_inner_unescaped_quotes
230 .replace_all(&fixed, "$1$2\\\"$3\"")
231 .to_string();
232 if new_fixed != fixed {
233 fixed = new_fixed;
234 steps.push("Escaped unescaped double quotes inside string values".to_string());
235 }
236
237 let mut stack = vec![];
239 let mut cleaned = String::new();
240 for c in fixed.chars() {
241 match c {
242 '{' | '[' => {
243 stack.push(c);
244 cleaned.push(c);
245 }
246 '}' => {
247 if stack.last() == Some(&'{') {
248 stack.pop();
249 cleaned.push('}');
250 }
251 }
253 ']' => {
254 if stack.last() == Some(&'[') {
255 stack.pop();
256 cleaned.push(']');
257 }
258 }
260 _ => cleaned.push(c),
261 }
262 }
263 while let Some(c) = stack.pop() {
265 match c {
266 '{' => cleaned.push('}'),
267 '[' => cleaned.push(']'),
268 _ => {}
269 }
270 }
271 if cleaned != fixed {
272 fixed = cleaned;
273 steps.push("Auto-fixed dangling or mismatched brackets/braces".to_string());
274 }
275
276 let re_stray_array_linebreaks = Regex::new(r#"(\"\s*),\s*\\n\s*(\")"#).unwrap();
278 let new_fixed = re_stray_array_linebreaks
279 .replace_all(&fixed, "$1, $2")
280 .to_string();
281 if new_fixed != fixed {
282 fixed = new_fixed;
283 steps.push("Normalized line breaks between quoted array elements".to_string());
284 }
285
286 let re_array_line_merger = Regex::new(r#"\",\s*\\n\s*\""#).unwrap();
288 let new_fixed = re_array_line_merger
289 .replace_all(&fixed, "\", \"")
290 .to_string();
291 if new_fixed != fixed {
292 fixed = new_fixed;
293 steps.push("Normalized overly escaped array strings with embedded linebreaks".to_string());
294 }
295
296 let re_linebreaks_between_items = Regex::new(r#"\",\s*\n\s*\""#).unwrap();
298 let new_fixed = re_linebreaks_between_items
299 .replace_all(&fixed, "\", \"")
300 .to_string();
301 if new_fixed != fixed {
302 fixed = new_fixed;
303 steps.push("Normalized line breaks between array items".to_string());
304 }
305
306 fixed = apply_step(fixed, FixStep::RemoveEscapedQuoteComma, |s| {
308 s.replace("\\\",", "\",")
309 });
310
311 let success = if let Ok(val) = serde_json::from_str::<serde_json::Value>(&fixed) {
314 if let Ok(re) = serde_json::to_string_pretty(&val) {
315 fixed = re;
316 true
317 } else {
318 false
319 }
320 } else {
321 if let Err(e) = serde_json::from_str::<serde_json::Value>(&fixed) {
322 println!("❌ Final JSON parse error: {}", e);
323 let line = e.line();
324 let column = e.column();
325 println!("📍 Error occurred at line {}, column {}", line, column);
326 println!("📍 Faulty fixed JSON:\n{:#?}", fixed);
327 }
328 false
329 };
330
331 FixReport {
332 original: input.to_string(),
333 fixed,
334 steps,
335 success,
336 }
337}