1pub fn preprocess_semicolons(source: &str) -> String {
12 let lines: Vec<&str> = source.split('\n').collect();
13 if lines.len() <= 1 {
14 return source.to_string();
15 }
16
17 let mut result = String::with_capacity(source.len() + 64);
18 let mut in_block_comment = false;
19 let mut in_triple_string = false;
20
21 for i in 0..lines.len() {
22 let line = lines[i];
23
24 let last_char = effective_last_char(line, &mut in_block_comment, &mut in_triple_string);
26
27 let needs_semicolon = if let Some(ch) = last_char {
28 is_statement_ender(ch) && next_nonblank_starts_with_bracket_or_paren(&lines, i + 1)
29 } else {
30 false
31 };
32
33 result.push_str(line);
34 if needs_semicolon {
35 result.push(';');
36 }
37 if i < lines.len() - 1 {
38 result.push('\n');
39 }
40 }
41
42 result
43}
44
45fn effective_last_char(
48 line: &str,
49 in_block_comment: &mut bool,
50 in_triple_string: &mut bool,
51) -> Option<char> {
52 let mut last_significant: Option<char> = None;
53 let mut in_simple_string = false;
54 let bytes = line.as_bytes();
55 let len = bytes.len();
56 let mut i = 0;
57
58 while i < len {
59 let ch = bytes[i] as char;
60
61 if *in_triple_string {
63 if ch == '"' && i + 2 < len && bytes[i + 1] == b'"' && bytes[i + 2] == b'"' {
64 *in_triple_string = false;
65 last_significant = Some('"');
66 i += 3;
67 } else {
68 i += 1;
69 }
70 continue;
71 }
72
73 if *in_block_comment {
75 if ch == '*' && i + 1 < len && bytes[i + 1] == b'/' {
76 *in_block_comment = false;
77 i += 2;
78 } else {
79 i += 1;
80 }
81 continue;
82 }
83
84 if in_simple_string {
86 if ch == '\\' {
87 i += 2; } else if ch == '"' {
89 in_simple_string = false;
90 last_significant = Some('"');
91 i += 1;
92 } else {
93 i += 1;
94 }
95 continue;
96 }
97
98 match ch {
100 '"' => {
101 if i + 2 < len && bytes[i + 1] == b'"' && bytes[i + 2] == b'"' {
103 *in_triple_string = true;
104 i += 3;
105 } else {
106 in_simple_string = true;
107 i += 1;
108 }
109 }
110 '/' => {
111 if i + 1 < len && bytes[i + 1] == b'/' {
112 break;
114 } else if i + 1 < len && bytes[i + 1] == b'*' {
115 *in_block_comment = true;
116 i += 2;
117 } else {
118 last_significant = Some(ch);
119 i += 1;
120 }
121 }
122 _ => {
123 if !ch.is_ascii_whitespace() {
124 last_significant = Some(ch);
125 }
126 i += 1;
127 }
128 }
129 }
130
131 last_significant
132}
133
134fn is_statement_ender(ch: char) -> bool {
136 ch.is_alphanumeric() || ch == '_' || ch == ')' || ch == ']' || ch == '}' || ch == '"'
137}
138
139fn next_nonblank_starts_with_bracket_or_paren(lines: &[&str], from: usize) -> bool {
141 for i in from..lines.len() {
142 let trimmed = lines[i].trim();
143 if !trimmed.is_empty() {
144 return trimmed.starts_with('[') || trimmed.starts_with('(');
145 }
146 }
147 false
148}
149
150#[cfg(test)]
151mod tests {
152 use super::*;
153
154 #[test]
155 fn test_insert_after_identifier_before_bracket() {
156 let input = "let x = foo\n[1, 2]";
157 let output = preprocess_semicolons(input);
158 assert_eq!(output, "let x = foo;\n[1, 2]");
159 }
160
161 #[test]
162 fn test_insert_after_paren_before_bracket() {
163 let input = "let m = HashMap().set(\"x\", None)\n[m.has(\"x\"), m.get(\"x\")]";
164 let output = preprocess_semicolons(input);
165 assert!(
166 output.contains(");\n["),
167 "should insert ; after closing paren"
168 );
169 }
170
171 #[test]
172 fn test_no_insert_when_line_ends_with_comma() {
173 let input = "foo(a,\n[1, 2])";
174 let output = preprocess_semicolons(input);
175 assert_eq!(output, input, "comma means continuation");
176 }
177
178 #[test]
179 fn test_no_insert_when_line_ends_with_dot() {
180 let input = "foo.\n[0]";
181 let output = preprocess_semicolons(input);
182 assert_eq!(output, input, "dot means method chain continuation");
183 }
184
185 #[test]
186 fn test_no_insert_when_next_line_not_bracket() {
187 let input = "let x = 5\nlet y = 10";
188 let output = preprocess_semicolons(input);
189 assert_eq!(output, input, "no bracket on next line");
190 }
191
192 #[test]
193 fn test_skips_blank_lines() {
194 let input = "let x = foo\n\n[1, 2]";
195 let output = preprocess_semicolons(input);
196 assert_eq!(output, "let x = foo;\n\n[1, 2]");
197 }
198
199 #[test]
200 fn test_line_comment_stripped() {
201 let input = "let x = foo // comment\n[1, 2]";
202 let output = preprocess_semicolons(input);
203 assert_eq!(output, "let x = foo // comment;\n[1, 2]");
204 }
205
206 #[test]
207 fn test_block_comment_tracked() {
208 let input = "let x = foo /* start\nend */ [1, 2]";
210 let output = preprocess_semicolons(input);
211 assert_eq!(output, input, "inside block comment");
212 }
213
214 #[test]
215 fn test_string_not_confused_with_comment() {
216 let input = "let x = \"//not a comment\"\n[1, 2]";
217 let output = preprocess_semicolons(input);
218 assert!(
219 output.contains("\";\n["),
220 "string ending with quote is a statement ender"
221 );
222 }
223
224 #[test]
225 fn test_closing_bracket_before_bracket() {
226 let input = "let a = [10, 20, 30]\n[a.first(), a.last()]";
227 let output = preprocess_semicolons(input);
228 assert!(output.contains("];\n["), "closing ] is a statement ender");
229 }
230
231 #[test]
232 fn test_closing_brace_before_bracket() {
233 let input = "let f = { x: 1 }\n[1, 2]";
234 let output = preprocess_semicolons(input);
235 assert!(output.contains("};\n["), "closing }} is a statement ender");
236 }
237
238 #[test]
239 fn test_no_insert_after_operator() {
240 let input = "let x = a +\n[1, 2]";
241 let output = preprocess_semicolons(input);
242 assert_eq!(output, input, "+ means expression continues");
243 }
244
245 #[test]
246 fn test_single_line_unchanged() {
247 let input = "let x = [1, 2, 3]";
248 let output = preprocess_semicolons(input);
249 assert_eq!(output, input);
250 }
251
252 #[test]
253 fn test_empty_input() {
254 assert_eq!(preprocess_semicolons(""), "");
255 }
256
257 #[test]
258 fn test_no_insert_inside_triple_string() {
259 let input = "let s = \"\"\"\nfoo\n[bar]\n\"\"\"\n[1, 2]";
261 let output = preprocess_semicolons(input);
262 assert!(
264 output.contains("\"\"\";\n[1, 2]"),
265 "semicolon after triple string close, got: {}",
266 output
267 );
268 assert!(
270 !output.contains("foo;\n"),
271 "no insertion inside triple string"
272 );
273 }
274
275 #[test]
276 fn test_insert_before_paren_on_new_line() {
277 let input = "let b = Pt { x: 10.0, y: 20.0 }\n(a + b).x";
278 let output = preprocess_semicolons(input);
279 assert_eq!(output, "let b = Pt { x: 10.0, y: 20.0 };\n(a + b).x");
280 }
281
282 #[test]
283 fn test_insert_before_paren_after_identifier() {
284 let input = "let dy = self.y2 - self.y1\n(dx * dx + dy * dy)";
285 let output = preprocess_semicolons(input);
286 assert_eq!(output, "let dy = self.y2 - self.y1;\n(dx * dx + dy * dy)");
287 }
288
289 #[test]
290 fn test_no_insert_before_paren_after_operator() {
291 let input = "let x = a +\n(b + c)";
292 let output = preprocess_semicolons(input);
293 assert_eq!(output, input, "+ means expression continues");
294 }
295
296 #[test]
297 fn test_no_insert_before_paren_after_comma() {
298 let input = "foo(a,\n(b + c))";
299 let output = preprocess_semicolons(input);
300 assert_eq!(output, input, "comma means continuation");
301 }
302
303 #[test]
304 fn test_no_insert_before_paren_after_equals() {
305 let input = "let x =\n(1 + 2)";
306 let output = preprocess_semicolons(input);
307 assert_eq!(output, input, "= means assignment continues");
308 }
309
310 #[test]
311 fn test_real_hashmap_pattern() {
312 let input = r#"let m = HashMap().set("x", None)
313[m.has("x"), m.get("x") == None]"#;
314 let expected = r#"let m = HashMap().set("x", None);
315[m.has("x"), m.get("x") == None]"#;
316 assert_eq!(preprocess_semicolons(input), expected);
317 }
318
319 #[test]
322 fn test_triple_string_multiline_with_bracket_inside() {
323 let input = r#"let s = """
326 this has
327 [brackets inside]
328 the string
329"""
330let x = 5"#;
331 let output = preprocess_semicolons(input);
332 assert!(
334 !output.contains("has;\n"),
335 "no insertion inside triple string"
336 );
337 assert!(
338 !output.contains("inside];\n"),
339 "no insertion inside triple string before brackets"
340 );
341 assert_eq!(output, input, "no changes needed here");
342 }
343
344 #[test]
345 fn test_triple_string_ending_then_array_on_next_line() {
346 let input = "let s = \"\"\"hello\"\"\"\n[1, 2]";
348 let output = preprocess_semicolons(input);
349 assert!(
350 output.contains("\"\"\";\n[1, 2]"),
351 "semicolon after triple string close before [, got: {}",
352 output
353 );
354 }
355
356 #[test]
357 fn test_triple_string_multiline_close_then_array() {
358 let input = "let s = \"\"\"\n content\n \"\"\"\n[1, 2]";
361 let output = preprocess_semicolons(input);
362 assert!(
363 output.contains("\"\"\";\n[1, 2]"),
364 "semicolon after closing triple-quote line, got: {}",
365 output
366 );
367 }
368
369 #[test]
370 fn test_triple_string_with_indented_bracket_lines() {
371 let input = "let a_str = \"\"\"\n this is\n a multiline\n string.\n -it should indent\n \"\"\"\n[a_str.length]";
373 let output = preprocess_semicolons(input);
374 assert!(
376 output.contains("\"\"\";\n[a_str"),
377 "semicolon after triple string, got: {}",
378 output
379 );
380 assert!(!output.contains("is;\n"), "no insertion inside string");
382 assert!(!output.contains("indent;\n"), "no insertion inside string");
383 }
384
385 #[test]
386 fn test_formatted_triple_string_tracked() {
387 let input = "let s = f\"\"\"\n value: {x}\n [y]\n \"\"\"\n[1, 2]";
389 let output = preprocess_semicolons(input);
390 assert!(
392 !output.contains("{x};\n"),
393 "no insertion inside f-triple string"
394 );
395 assert!(
397 output.contains("\"\"\";\n[1, 2]"),
398 "semicolon after f-triple string close, got: {}",
399 output
400 );
401 }
402
403 #[test]
404 fn test_multiple_triple_strings_in_sequence() {
405 let input = "let a = \"\"\"\n [inside a]\n \"\"\"\nlet b = \"\"\"\n [inside b]\n \"\"\"\n[1, 2]";
406 let output = preprocess_semicolons(input);
407 assert!(
409 !output.contains("a];\n"),
410 "no insertion inside first string"
411 );
412 assert!(
413 !output.contains("b];\n"),
414 "no insertion inside second string"
415 );
416 assert!(
418 output.contains("\"\"\";\n[1, 2]"),
419 "semicolon before final array, got: {}",
420 output
421 );
422 }
423}