1pub fn preprocess_semicolons(source: &str) -> String {
12 let lines: Vec<&str> = source.split('\n').collect();
13 if lines.len() <= 1 {
14 return source.to_string();
15 }
16
17 let mut result = String::with_capacity(source.len() + 64);
18 let mut in_block_comment = false;
19 let mut in_triple_string = false;
20
21 for i in 0..lines.len() {
22 let line = lines[i];
23
24 let last_char = effective_last_char(line, &mut in_block_comment, &mut in_triple_string);
26
27 let needs_semicolon = if let Some(ch) = last_char {
28 is_statement_ender(ch)
29 && next_nonblank_starts_with_bracket_or_paren(&lines, i + 1)
30 } else {
31 false
32 };
33
34 result.push_str(line);
35 if needs_semicolon {
36 result.push(';');
37 }
38 if i < lines.len() - 1 {
39 result.push('\n');
40 }
41 }
42
43 result
44}
45
46fn effective_last_char(
49 line: &str,
50 in_block_comment: &mut bool,
51 in_triple_string: &mut bool,
52) -> Option<char> {
53 let mut last_significant: Option<char> = None;
54 let mut in_simple_string = false;
55 let bytes = line.as_bytes();
56 let len = bytes.len();
57 let mut i = 0;
58
59 while i < len {
60 let ch = bytes[i] as char;
61
62 if *in_triple_string {
64 if ch == '"' && i + 2 < len && bytes[i + 1] == b'"' && bytes[i + 2] == b'"' {
65 *in_triple_string = false;
66 last_significant = Some('"');
67 i += 3;
68 } else {
69 i += 1;
70 }
71 continue;
72 }
73
74 if *in_block_comment {
76 if ch == '*' && i + 1 < len && bytes[i + 1] == b'/' {
77 *in_block_comment = false;
78 i += 2;
79 } else {
80 i += 1;
81 }
82 continue;
83 }
84
85 if in_simple_string {
87 if ch == '\\' {
88 i += 2; } else if ch == '"' {
90 in_simple_string = false;
91 last_significant = Some('"');
92 i += 1;
93 } else {
94 i += 1;
95 }
96 continue;
97 }
98
99 match ch {
101 '"' => {
102 if i + 2 < len && bytes[i + 1] == b'"' && bytes[i + 2] == b'"' {
104 *in_triple_string = true;
105 i += 3;
106 } else {
107 in_simple_string = true;
108 i += 1;
109 }
110 }
111 '/' => {
112 if i + 1 < len && bytes[i + 1] == b'/' {
113 break;
115 } else if i + 1 < len && bytes[i + 1] == b'*' {
116 *in_block_comment = true;
117 i += 2;
118 } else {
119 last_significant = Some(ch);
120 i += 1;
121 }
122 }
123 _ => {
124 if !ch.is_ascii_whitespace() {
125 last_significant = Some(ch);
126 }
127 i += 1;
128 }
129 }
130 }
131
132 last_significant
133}
134
135fn is_statement_ender(ch: char) -> bool {
137 ch.is_alphanumeric() || ch == '_' || ch == ')' || ch == ']' || ch == '}' || ch == '"'
138}
139
140fn next_nonblank_starts_with_bracket_or_paren(lines: &[&str], from: usize) -> bool {
142 for i in from..lines.len() {
143 let trimmed = lines[i].trim();
144 if !trimmed.is_empty() {
145 return trimmed.starts_with('[') || trimmed.starts_with('(');
146 }
147 }
148 false
149}
150
151#[cfg(test)]
152mod tests {
153 use super::*;
154
155 #[test]
156 fn test_insert_after_identifier_before_bracket() {
157 let input = "let x = foo\n[1, 2]";
158 let output = preprocess_semicolons(input);
159 assert_eq!(output, "let x = foo;\n[1, 2]");
160 }
161
162 #[test]
163 fn test_insert_after_paren_before_bracket() {
164 let input = "let m = HashMap().set(\"x\", None)\n[m.has(\"x\"), m.get(\"x\")]";
165 let output = preprocess_semicolons(input);
166 assert!(
167 output.contains(");\n["),
168 "should insert ; after closing paren"
169 );
170 }
171
172 #[test]
173 fn test_no_insert_when_line_ends_with_comma() {
174 let input = "foo(a,\n[1, 2])";
175 let output = preprocess_semicolons(input);
176 assert_eq!(output, input, "comma means continuation");
177 }
178
179 #[test]
180 fn test_no_insert_when_line_ends_with_dot() {
181 let input = "foo.\n[0]";
182 let output = preprocess_semicolons(input);
183 assert_eq!(output, input, "dot means method chain continuation");
184 }
185
186 #[test]
187 fn test_no_insert_when_next_line_not_bracket() {
188 let input = "let x = 5\nlet y = 10";
189 let output = preprocess_semicolons(input);
190 assert_eq!(output, input, "no bracket on next line");
191 }
192
193 #[test]
194 fn test_skips_blank_lines() {
195 let input = "let x = foo\n\n[1, 2]";
196 let output = preprocess_semicolons(input);
197 assert_eq!(output, "let x = foo;\n\n[1, 2]");
198 }
199
200 #[test]
201 fn test_line_comment_stripped() {
202 let input = "let x = foo // comment\n[1, 2]";
203 let output = preprocess_semicolons(input);
204 assert_eq!(output, "let x = foo // comment;\n[1, 2]");
205 }
206
207 #[test]
208 fn test_block_comment_tracked() {
209 let input = "let x = foo /* start\nend */ [1, 2]";
211 let output = preprocess_semicolons(input);
212 assert_eq!(output, input, "inside block comment");
213 }
214
215 #[test]
216 fn test_string_not_confused_with_comment() {
217 let input = "let x = \"//not a comment\"\n[1, 2]";
218 let output = preprocess_semicolons(input);
219 assert!(
220 output.contains("\";\n["),
221 "string ending with quote is a statement ender"
222 );
223 }
224
225 #[test]
226 fn test_closing_bracket_before_bracket() {
227 let input = "let a = [10, 20, 30]\n[a.first(), a.last()]";
228 let output = preprocess_semicolons(input);
229 assert!(output.contains("];\n["), "closing ] is a statement ender");
230 }
231
232 #[test]
233 fn test_closing_brace_before_bracket() {
234 let input = "let f = { x: 1 }\n[1, 2]";
235 let output = preprocess_semicolons(input);
236 assert!(output.contains("};\n["), "closing }} is a statement ender");
237 }
238
239 #[test]
240 fn test_no_insert_after_operator() {
241 let input = "let x = a +\n[1, 2]";
242 let output = preprocess_semicolons(input);
243 assert_eq!(output, input, "+ means expression continues");
244 }
245
246 #[test]
247 fn test_single_line_unchanged() {
248 let input = "let x = [1, 2, 3]";
249 let output = preprocess_semicolons(input);
250 assert_eq!(output, input);
251 }
252
253 #[test]
254 fn test_empty_input() {
255 assert_eq!(preprocess_semicolons(""), "");
256 }
257
258 #[test]
259 fn test_no_insert_inside_triple_string() {
260 let input = "let s = \"\"\"\nfoo\n[bar]\n\"\"\"\n[1, 2]";
262 let output = preprocess_semicolons(input);
263 assert!(
265 output.contains("\"\"\";\n[1, 2]"),
266 "semicolon after triple string close, got: {}",
267 output
268 );
269 assert!(
271 !output.contains("foo;\n"),
272 "no insertion inside triple string"
273 );
274 }
275
276 #[test]
277 fn test_insert_before_paren_on_new_line() {
278 let input = "let b = Pt { x: 10.0, y: 20.0 }\n(a + b).x";
279 let output = preprocess_semicolons(input);
280 assert_eq!(output, "let b = Pt { x: 10.0, y: 20.0 };\n(a + b).x");
281 }
282
283 #[test]
284 fn test_insert_before_paren_after_identifier() {
285 let input = "let dy = self.y2 - self.y1\n(dx * dx + dy * dy)";
286 let output = preprocess_semicolons(input);
287 assert_eq!(
288 output,
289 "let dy = self.y2 - self.y1;\n(dx * dx + dy * dy)"
290 );
291 }
292
293 #[test]
294 fn test_no_insert_before_paren_after_operator() {
295 let input = "let x = a +\n(b + c)";
296 let output = preprocess_semicolons(input);
297 assert_eq!(output, input, "+ means expression continues");
298 }
299
300 #[test]
301 fn test_no_insert_before_paren_after_comma() {
302 let input = "foo(a,\n(b + c))";
303 let output = preprocess_semicolons(input);
304 assert_eq!(output, input, "comma means continuation");
305 }
306
307 #[test]
308 fn test_no_insert_before_paren_after_equals() {
309 let input = "let x =\n(1 + 2)";
310 let output = preprocess_semicolons(input);
311 assert_eq!(output, input, "= means assignment continues");
312 }
313
314 #[test]
315 fn test_real_hashmap_pattern() {
316 let input = r#"let m = HashMap().set("x", None)
317[m.has("x"), m.get("x") == None]"#;
318 let expected = r#"let m = HashMap().set("x", None);
319[m.has("x"), m.get("x") == None]"#;
320 assert_eq!(preprocess_semicolons(input), expected);
321 }
322
323 #[test]
326 fn test_triple_string_multiline_with_bracket_inside() {
327 let input = r#"let s = """
330 this has
331 [brackets inside]
332 the string
333"""
334let x = 5"#;
335 let output = preprocess_semicolons(input);
336 assert!(
338 !output.contains("has;\n"),
339 "no insertion inside triple string"
340 );
341 assert!(
342 !output.contains("inside];\n"),
343 "no insertion inside triple string before brackets"
344 );
345 assert_eq!(output, input, "no changes needed here");
346 }
347
348 #[test]
349 fn test_triple_string_ending_then_array_on_next_line() {
350 let input = "let s = \"\"\"hello\"\"\"\n[1, 2]";
352 let output = preprocess_semicolons(input);
353 assert!(
354 output.contains("\"\"\";\n[1, 2]"),
355 "semicolon after triple string close before [, got: {}",
356 output
357 );
358 }
359
360 #[test]
361 fn test_triple_string_multiline_close_then_array() {
362 let input = "let s = \"\"\"\n content\n \"\"\"\n[1, 2]";
365 let output = preprocess_semicolons(input);
366 assert!(
367 output.contains("\"\"\";\n[1, 2]"),
368 "semicolon after closing triple-quote line, got: {}",
369 output
370 );
371 }
372
373 #[test]
374 fn test_triple_string_with_indented_bracket_lines() {
375 let input = "let a_str = \"\"\"\n this is\n a multiline\n string.\n -it should indent\n \"\"\"\n[a_str.length]";
377 let output = preprocess_semicolons(input);
378 assert!(
380 output.contains("\"\"\";\n[a_str"),
381 "semicolon after triple string, got: {}",
382 output
383 );
384 assert!(!output.contains("is;\n"), "no insertion inside string");
386 assert!(!output.contains("indent;\n"), "no insertion inside string");
387 }
388
389 #[test]
390 fn test_formatted_triple_string_tracked() {
391 let input = "let s = f\"\"\"\n value: {x}\n [y]\n \"\"\"\n[1, 2]";
393 let output = preprocess_semicolons(input);
394 assert!(
396 !output.contains("{x};\n"),
397 "no insertion inside f-triple string"
398 );
399 assert!(
401 output.contains("\"\"\";\n[1, 2]"),
402 "semicolon after f-triple string close, got: {}",
403 output
404 );
405 }
406
407 #[test]
408 fn test_multiple_triple_strings_in_sequence() {
409 let input = "let a = \"\"\"\n [inside a]\n \"\"\"\nlet b = \"\"\"\n [inside b]\n \"\"\"\n[1, 2]";
410 let output = preprocess_semicolons(input);
411 assert!(
413 !output.contains("a];\n"),
414 "no insertion inside first string"
415 );
416 assert!(
417 !output.contains("b];\n"),
418 "no insertion inside second string"
419 );
420 assert!(
422 output.contains("\"\"\";\n[1, 2]"),
423 "semicolon before final array, got: {}",
424 output
425 );
426 }
427}