masm_formatter/
lib.rs

1use std::{
2    fs::File,
3    io::{self, BufRead, BufReader, Write},
4    path::Path,
5};
6
7use once_cell::sync::Lazy;
8use regex::Regex;
9
10static SINGLE_LINE_EXPORT_REGEX: Lazy<Regex> =
11    Lazy::new(|| Regex::new(r"^export\..*(?:(?:::)|(?:->)).*$").unwrap());
12
13#[derive(Debug, PartialEq, Clone)]
14enum ConstructType {
15    Proc,
16    Export,
17    Begin,
18    End,
19    While,
20    Repeat,
21    If,
22    Else,
23}
24
25impl ConstructType {
26    fn from_str(s: &str) -> Option<Self> {
27        match s {
28            "proc" => Some(Self::Proc),
29            "export" => Some(Self::Export),
30            "begin" => Some(Self::Begin),
31            "end" => Some(Self::End),
32            "while" => Some(Self::While),
33            "repeat" => Some(Self::Repeat),
34            "if" => Some(Self::If),
35            "else" => Some(Self::Else),
36            _ => None,
37        }
38    }
39}
40
41const INDENT: &str = "    ";
42
43fn is_comment(line: &str) -> bool {
44    line.trim_start().starts_with('#')
45}
46
47fn is_stack_comment(line: &str) -> bool {
48    let trimmed = line.trim_start();
49    trimmed.starts_with("# => [") || trimmed.starts_with("#! => [")
50}
51
52fn is_single_export_line(line: &str) -> bool {
53    SINGLE_LINE_EXPORT_REGEX.is_match(line)
54}
55
56fn is_use_statement(line: &str) -> bool {
57    line.trim_start().starts_with("use.")
58}
59
60fn is_decorator(line: &str) -> bool {
61    let trimmed = line.trim();
62    trimmed.starts_with('@') && !is_comment(trimmed)
63}
64
65fn is_proc_or_export(line: &str) -> bool {
66    let trimmed = line.trim();
67    trimmed.starts_with("proc.")
68        || trimmed.starts_with("export.")
69        || trimmed.starts_with("proc ")
70        || trimmed.starts_with("pub proc ")
71}
72
73fn is_section_separator_comment(line: &str) -> bool {
74    let trimmed = line.trim_start();
75    (trimmed.starts_with("# ====") || trimmed.starts_with("#! ====")) && trimmed.contains("====")
76}
77
78#[derive(Debug, Clone)]
79enum LineType {
80    Import(String),
81    Comment(String),
82    Empty,
83    Other(String),
84}
85
86fn classify_line(line: &str) -> LineType {
87    let trimmed = line.trim();
88    if trimmed.is_empty() {
89        LineType::Empty
90    } else if is_use_statement(trimmed) {
91        LineType::Import(trimmed.to_string())
92    } else if is_comment(trimmed) {
93        LineType::Comment(trimmed.to_string())
94    } else {
95        LineType::Other(trimmed.to_string())
96    }
97}
98
99fn process_import_section(lines: &[&str]) -> (Vec<String>, usize) {
100    let mut result = Vec::new();
101    let mut current_import_group = Vec::new();
102    let mut end_index = 0;
103
104    for (i, line) in lines.iter().enumerate() {
105        let line_type = classify_line(line);
106
107        match line_type {
108            LineType::Import(import) => {
109                current_import_group.push(import);
110                end_index = i + 1;
111            }
112            LineType::Comment(comment) => {
113                // If we have imports in the current group, sort and add them
114                if !current_import_group.is_empty() {
115                    current_import_group.sort();
116                    result.extend(current_import_group.drain(..));
117                    // Add empty line after imports before comment
118                    result.push(String::new());
119                }
120                // Add the comment
121                result.push(comment);
122                end_index = i + 1;
123            }
124            LineType::Empty => {
125                // Empty lines are preserved in their position, but avoid multiple consecutive empty lines
126                if !result.is_empty() && !result.last().map_or(false, |s| s.is_empty()) {
127                    result.push(String::new());
128                    end_index = i + 1;
129                }
130            }
131            LineType::Other(content) => {
132                // Stop processing when we hit const or other non-import content
133                if content.starts_with("const.") {
134                    break;
135                }
136                // If we have imports in the current group, sort and add them
137                if !current_import_group.is_empty() {
138                    current_import_group.sort();
139                    result.extend(current_import_group.drain(..));
140                }
141                break;
142            }
143        }
144    }
145
146    // Handle any remaining imports in the current group
147    if !current_import_group.is_empty() {
148        current_import_group.sort();
149        result.extend(current_import_group);
150    }
151
152    (result, end_index)
153}
154
155pub fn format_code(code: &str) -> String {
156    let lines: Vec<&str> = code.lines().collect();
157
158    // Extract and sort imports
159    let (sorted_imports, import_end_index) = process_import_section(&lines);
160
161    let mut formatted_code = String::new();
162    let mut indentation_level = 0;
163    let mut construct_stack = Vec::new();
164    let mut last_line_was_empty = false;
165    let mut last_was_export_line = false;
166    let mut last_line_was_stack_comment = false;
167
168    // Add sorted imports first
169    for import in sorted_imports {
170        formatted_code.push_str(&import);
171        formatted_code.push('\n');
172    }
173
174    // Add empty line after imports if there were any and the next line exists
175    if import_end_index > 0 && import_end_index < lines.len() {
176        // Always add empty line after imports, unless the next line is already empty
177        let next_line = lines[import_end_index].trim();
178        if !next_line.is_empty() {
179            formatted_code.push('\n');
180        }
181    }
182
183    // Process remaining lines (skip the import section)
184    let remaining_lines = &lines[import_end_index..];
185
186    for (i, line) in remaining_lines.iter().enumerate() {
187        let trimmed_line = line.trim();
188
189        if !trimmed_line.is_empty() {
190            if is_decorator(trimmed_line) {
191                // Handle decorators - they should be formatted at the current indentation level
192                formatted_code.push_str(&INDENT.repeat(indentation_level));
193                formatted_code.push_str(trimmed_line);
194                formatted_code.push('\n');
195                last_line_was_empty = false;
196                continue;
197            }
198
199            if is_comment(trimmed_line) {
200                last_line_was_stack_comment = is_stack_comment(trimmed_line);
201
202                if last_was_export_line {
203                    formatted_code.push_str(trimmed_line);
204                } else {
205                    if let Some(prev_line) = formatted_code.lines().last() {
206                        let prev_indent_level =
207                            prev_line.chars().take_while(|&c| c == ' ').count() / 4;
208                        if prev_line.trim_start().starts_with("export") {
209                            formatted_code.push_str(&INDENT.repeat(prev_indent_level + 1));
210                        } else {
211                            formatted_code.push_str(&INDENT.repeat(indentation_level));
212                        }
213                    } else {
214                        formatted_code.push_str(&INDENT.repeat(indentation_level));
215                    }
216                    formatted_code.push_str(trimmed_line);
217                }
218                formatted_code.push('\n');
219                last_line_was_empty = false;
220                continue;
221            }
222
223            if is_single_export_line(trimmed_line) {
224                formatted_code.push_str(trimmed_line);
225                formatted_code.push('\n');
226                last_line_was_empty = false;
227                last_was_export_line = true;
228                continue;
229            }
230
231            last_was_export_line = false;
232
233            // Remove inline comment for keyword extraction.
234            let code_without_comment = trimmed_line.split('#').next().unwrap().trim();
235
236            // Handle new procedure syntax (proc name or pub proc name)
237            let first_word = if code_without_comment.starts_with("pub proc ") {
238                Some("proc")
239            } else if code_without_comment.starts_with("proc ") {
240                Some("proc")
241            } else {
242                code_without_comment.split('.').next()
243            };
244
245            // Special handling for stack comment newline
246            if last_line_was_stack_comment {
247                if let Some(word) = first_word
248                    && word != "end"
249                    && word != "else"
250                    && !last_line_was_empty
251                {
252                    formatted_code.push('\n');
253                }
254                last_line_was_stack_comment = false;
255            }
256
257            if let Some(word) = first_word
258                && let Some(construct) = ConstructType::from_str(word)
259            {
260                match construct {
261                    ConstructType::End => {
262                        let was_proc_or_export_end =
263                            if let Some(last_construct) = construct_stack.pop() {
264                                let is_proc_or_export = matches!(
265                                    last_construct,
266                                    ConstructType::Proc | ConstructType::Export
267                                );
268                                if last_construct != ConstructType::End && indentation_level > 0 {
269                                    indentation_level -= 1;
270                                }
271                                is_proc_or_export
272                            } else {
273                                false
274                            };
275
276                        formatted_code.push_str(&INDENT.repeat(indentation_level));
277                        formatted_code.push_str(trimmed_line);
278                        formatted_code.push('\n');
279                        last_line_was_empty = false;
280
281                        // Add blank line after procedure/export end if there's more content
282                        if was_proc_or_export_end && i + 1 < remaining_lines.len() {
283                            let next_line = remaining_lines[i + 1].trim();
284                            if !next_line.is_empty() {
285                                formatted_code.push('\n');
286                                last_line_was_empty = true;
287                            }
288                        }
289
290                        continue;
291                    }
292                    ConstructType::Else => {
293                        if let Some(last_construct) = construct_stack.last()
294                            && *last_construct == ConstructType::If
295                            && indentation_level > 0
296                        {
297                            indentation_level -= 1;
298                        }
299                    }
300                    _ => {
301                        construct_stack.push(construct.clone());
302                    }
303                }
304
305                formatted_code.push_str(&INDENT.repeat(indentation_level));
306                formatted_code.push_str(trimmed_line);
307                formatted_code.push('\n');
308                last_line_was_empty = false;
309
310                match construct {
311                    ConstructType::Begin
312                    | ConstructType::If
313                    | ConstructType::Proc
314                    | ConstructType::Export
315                    | ConstructType::Repeat
316                    | ConstructType::While
317                    | ConstructType::Else => {
318                        indentation_level += 1;
319                    }
320                    _ => {}
321                }
322
323                continue;
324            }
325
326            formatted_code.push_str(&INDENT.repeat(indentation_level));
327            formatted_code.push_str(trimmed_line);
328            formatted_code.push('\n');
329            last_line_was_empty = false;
330        } else {
331            // This is an empty line in the input
332            // Check if we should skip adding it (e.g., between comment and const)
333            let should_skip_empty_line = if i + 1 < remaining_lines.len() && !last_line_was_empty {
334                let next_line = remaining_lines[i + 1].trim();
335                let prev_lines: Vec<&str> = formatted_code.lines().collect();
336                let prev_line = prev_lines.last().map(|l| l.trim()).unwrap_or("");
337
338                // Skip empty line if previous line is a comment and next line is a const
339                is_comment(prev_line) && next_line.starts_with("const.")
340            } else {
341                false
342            };
343
344            if !should_skip_empty_line && !last_line_was_empty {
345                formatted_code.push('\n');
346                last_line_was_empty = true;
347            }
348        }
349    }
350
351    // Ensure the output ends with exactly one newline.
352    while formatted_code.ends_with('\n') {
353        formatted_code.pop();
354    }
355    formatted_code.push('\n');
356
357    // Final pass: collapse any remaining multiple consecutive empty lines (3+ becomes 1)
358    // Also prevent blank lines between comments and proc/export declarations
359    let lines: Vec<&str> = formatted_code.lines().collect();
360    let mut final_output = String::new();
361    let mut consecutive_empty_count = 0;
362
363    for (i, line) in lines.iter().enumerate() {
364        let is_empty = line.trim().is_empty();
365
366        if is_empty {
367            consecutive_empty_count += 1;
368
369            // Check if this empty line is between a comment and proc/export/const
370            let should_skip_empty_line = if i > 0 && i + 1 < lines.len() {
371                let prev_line = lines[i - 1].trim();
372                let next_line = lines[i + 1].trim();
373                // Skip empty lines between regular comments and proc/export/const, but preserve them after section separators
374                is_comment(prev_line)
375                    && (is_proc_or_export(next_line) || next_line.starts_with("const."))
376                    && !is_section_separator_comment(prev_line)
377            } else {
378                false
379            };
380
381            // Allow up to 1 empty line, collapse 2+ into 1, but skip if between comment and proc/export
382            if consecutive_empty_count <= 1 && !should_skip_empty_line {
383                final_output.push_str(line);
384                final_output.push('\n');
385            }
386            // Skip additional consecutive empty lines (2nd, 3rd, etc.) or comment-proc gaps
387        } else {
388            final_output.push_str(line);
389            final_output.push('\n');
390            consecutive_empty_count = 0;
391        }
392    }
393
394    // Ensure the final output ends with exactly one newline
395    while final_output.ends_with('\n') {
396        final_output.pop();
397    }
398    final_output.push('\n');
399
400    final_output
401}
402
403pub fn format_file(file_path: &Path) -> io::Result<()> {
404    let file = File::open(file_path)?;
405    let mut input_code = String::new();
406
407    let reader = BufReader::new(file);
408    for line in reader.lines() {
409        input_code.push_str(&line?);
410        input_code.push('\n');
411    }
412
413    let formatted_code = format_code(&input_code);
414
415    let mut file = File::create(file_path)?;
416    file.write_all(formatted_code.as_bytes())?;
417
418    Ok(())
419}