masm_formatter/
lib.rs

1use std::{
2    fs::File,
3    io::{self, BufRead, BufReader, Write},
4    path::Path,
5};
6
7use once_cell::sync::Lazy;
8use regex::Regex;
9
10static SINGLE_LINE_EXPORT_REGEX: Lazy<Regex> =
11    Lazy::new(|| Regex::new(r"^export\..*(?:(?:::)|(?:->)).*$").unwrap());
12
13#[derive(Debug, PartialEq, Clone)]
14enum ConstructType {
15    Proc,
16    Export,
17    Begin,
18    End,
19    While,
20    Repeat,
21    If,
22    Else,
23}
24
25impl ConstructType {
26    fn from_str(s: &str) -> Option<Self> {
27        match s {
28            "proc" => Some(Self::Proc),
29            "export" => Some(Self::Export),
30            "begin" => Some(Self::Begin),
31            "end" => Some(Self::End),
32            "while" => Some(Self::While),
33            "repeat" => Some(Self::Repeat),
34            "if" => Some(Self::If),
35            "else" => Some(Self::Else),
36            _ => None,
37        }
38    }
39}
40
41const INDENT: &str = "    ";
42
43fn is_comment(line: &str) -> bool {
44    line.trim_start().starts_with('#')
45}
46
47fn is_stack_comment(line: &str) -> bool {
48    let trimmed = line.trim_start();
49    trimmed.starts_with("# => [") || trimmed.starts_with("#! => [")
50}
51
52fn is_single_export_line(line: &str) -> bool {
53    SINGLE_LINE_EXPORT_REGEX.is_match(line)
54}
55
56fn is_use_statement(line: &str) -> bool {
57    line.trim_start().starts_with("use.")
58}
59
60fn is_proc_or_export(line: &str) -> bool {
61    let trimmed = line.trim();
62    trimmed.starts_with("proc.") || trimmed.starts_with("export.")
63}
64
65fn is_section_separator_comment(line: &str) -> bool {
66    let trimmed = line.trim_start();
67    (trimmed.starts_with("# ====") || trimmed.starts_with("#! ====")) && trimmed.contains("====")
68}
69
70#[derive(Debug, Clone)]
71enum LineType {
72    Import(String),
73    Comment(String),
74    Empty,
75    Other(String),
76}
77
78fn classify_line(line: &str) -> LineType {
79    let trimmed = line.trim();
80    if trimmed.is_empty() {
81        LineType::Empty
82    } else if is_use_statement(trimmed) {
83        LineType::Import(trimmed.to_string())
84    } else if is_comment(trimmed) {
85        LineType::Comment(trimmed.to_string())
86    } else {
87        LineType::Other(trimmed.to_string())
88    }
89}
90
91fn process_import_section(lines: &[&str]) -> (Vec<String>, usize) {
92    let mut result = Vec::new();
93    let mut current_import_group = Vec::new();
94    let mut end_index = 0;
95
96    for (i, line) in lines.iter().enumerate() {
97        let line_type = classify_line(line);
98
99        match line_type {
100            LineType::Import(import) => {
101                current_import_group.push(import);
102                end_index = i + 1;
103            }
104            LineType::Comment(comment) => {
105                // If we have imports in the current group, sort and add them
106                if !current_import_group.is_empty() {
107                    current_import_group.sort();
108                    result.extend(current_import_group.drain(..));
109                    // Add empty line after imports before comment
110                    result.push(String::new());
111                }
112                // Add the comment
113                result.push(comment);
114                end_index = i + 1;
115            }
116            LineType::Empty => {
117                // Empty lines are preserved in their position, but avoid multiple consecutive empty lines
118                if !result.is_empty() && !result.last().map_or(false, |s| s.is_empty()) {
119                    result.push(String::new());
120                    end_index = i + 1;
121                }
122            }
123            LineType::Other(content) => {
124                // Stop processing when we hit const or other non-import content
125                if content.starts_with("const.") {
126                    break;
127                }
128                // If we have imports in the current group, sort and add them
129                if !current_import_group.is_empty() {
130                    current_import_group.sort();
131                    result.extend(current_import_group.drain(..));
132                }
133                break;
134            }
135        }
136    }
137
138    // Handle any remaining imports in the current group
139    if !current_import_group.is_empty() {
140        current_import_group.sort();
141        result.extend(current_import_group);
142    }
143
144    (result, end_index)
145}
146
147pub fn format_code(code: &str) -> String {
148    let lines: Vec<&str> = code.lines().collect();
149
150    // Extract and sort imports
151    let (sorted_imports, import_end_index) = process_import_section(&lines);
152
153    let mut formatted_code = String::new();
154    let mut indentation_level = 0;
155    let mut construct_stack = Vec::new();
156    let mut last_line_was_empty = false;
157    let mut last_was_export_line = false;
158    let mut last_line_was_stack_comment = false;
159
160    // Add sorted imports first
161    for import in sorted_imports {
162        formatted_code.push_str(&import);
163        formatted_code.push('\n');
164    }
165
166    // Add empty line after imports if there were any and the next line exists
167    if import_end_index > 0 && import_end_index < lines.len() {
168        // Always add empty line after imports, unless the next line is already empty
169        let next_line = lines[import_end_index].trim();
170        if !next_line.is_empty() {
171            formatted_code.push('\n');
172        }
173    }
174
175    // Process remaining lines (skip the import section)
176    let remaining_lines = &lines[import_end_index..];
177
178    for (i, line) in remaining_lines.iter().enumerate() {
179        let trimmed_line = line.trim();
180
181        if !trimmed_line.is_empty() {
182            if is_comment(trimmed_line) {
183                last_line_was_stack_comment = is_stack_comment(trimmed_line);
184
185                if last_was_export_line {
186                    formatted_code.push_str(trimmed_line);
187                } else {
188                    if let Some(prev_line) = formatted_code.lines().last() {
189                        let prev_indent_level =
190                            prev_line.chars().take_while(|&c| c == ' ').count() / 4;
191                        if prev_line.trim_start().starts_with("export") {
192                            formatted_code.push_str(&INDENT.repeat(prev_indent_level + 1));
193                        } else {
194                            formatted_code.push_str(&INDENT.repeat(indentation_level));
195                        }
196                    } else {
197                        formatted_code.push_str(&INDENT.repeat(indentation_level));
198                    }
199                    formatted_code.push_str(trimmed_line);
200                }
201                formatted_code.push('\n');
202                last_line_was_empty = false;
203                continue;
204            }
205
206            if is_single_export_line(trimmed_line) {
207                formatted_code.push_str(trimmed_line);
208                formatted_code.push('\n');
209                last_line_was_empty = false;
210                last_was_export_line = true;
211                continue;
212            }
213
214            last_was_export_line = false;
215
216            // Remove inline comment for keyword extraction.
217            let code_without_comment = trimmed_line.split('#').next().unwrap().trim();
218            let first_word = code_without_comment.split('.').next();
219
220            // Special handling for stack comment newline
221            if last_line_was_stack_comment {
222                if let Some(word) = first_word
223                    && word != "end"
224                    && word != "else"
225                    && !last_line_was_empty
226                {
227                    formatted_code.push('\n');
228                }
229                last_line_was_stack_comment = false;
230            }
231
232            if let Some(word) = first_word
233                && let Some(construct) = ConstructType::from_str(word)
234            {
235                match construct {
236                    ConstructType::End => {
237                        let was_proc_or_export_end =
238                            if let Some(last_construct) = construct_stack.pop() {
239                                let is_proc_or_export = matches!(
240                                    last_construct,
241                                    ConstructType::Proc | ConstructType::Export
242                                );
243                                if last_construct != ConstructType::End && indentation_level > 0 {
244                                    indentation_level -= 1;
245                                }
246                                is_proc_or_export
247                            } else {
248                                false
249                            };
250
251                        formatted_code.push_str(&INDENT.repeat(indentation_level));
252                        formatted_code.push_str(trimmed_line);
253                        formatted_code.push('\n');
254                        last_line_was_empty = false;
255
256                        // Add blank line after procedure/export end if there's more content
257                        if was_proc_or_export_end && i + 1 < remaining_lines.len() {
258                            let next_line = remaining_lines[i + 1].trim();
259                            if !next_line.is_empty() {
260                                formatted_code.push('\n');
261                                last_line_was_empty = true;
262                            }
263                        }
264
265                        continue;
266                    }
267                    ConstructType::Else => {
268                        if let Some(last_construct) = construct_stack.last()
269                            && *last_construct == ConstructType::If
270                            && indentation_level > 0
271                        {
272                            indentation_level -= 1;
273                        }
274                    }
275                    _ => {
276                        construct_stack.push(construct.clone());
277                    }
278                }
279
280                formatted_code.push_str(&INDENT.repeat(indentation_level));
281                formatted_code.push_str(trimmed_line);
282                formatted_code.push('\n');
283                last_line_was_empty = false;
284
285                match construct {
286                    ConstructType::Begin
287                    | ConstructType::If
288                    | ConstructType::Proc
289                    | ConstructType::Export
290                    | ConstructType::Repeat
291                    | ConstructType::While
292                    | ConstructType::Else => {
293                        indentation_level += 1;
294                    }
295                    _ => {}
296                }
297
298                continue;
299            }
300
301            formatted_code.push_str(&INDENT.repeat(indentation_level));
302            formatted_code.push_str(trimmed_line);
303            formatted_code.push('\n');
304            last_line_was_empty = false;
305        } else {
306            // This is an empty line in the input
307            // Check if we should skip adding it (e.g., between comment and const)
308            let should_skip_empty_line = if i + 1 < remaining_lines.len() && !last_line_was_empty {
309                let next_line = remaining_lines[i + 1].trim();
310                let prev_lines: Vec<&str> = formatted_code.lines().collect();
311                let prev_line = prev_lines.last().map(|l| l.trim()).unwrap_or("");
312
313                // Skip empty line if previous line is a comment and next line is a const
314                is_comment(prev_line) && next_line.starts_with("const.")
315            } else {
316                false
317            };
318
319            if !should_skip_empty_line && !last_line_was_empty {
320                formatted_code.push('\n');
321                last_line_was_empty = true;
322            }
323        }
324    }
325
326    // Ensure the output ends with exactly one newline.
327    while formatted_code.ends_with('\n') {
328        formatted_code.pop();
329    }
330    formatted_code.push('\n');
331
332    // Final pass: collapse any remaining multiple consecutive empty lines (3+ becomes 1)
333    // Also prevent blank lines between comments and proc/export declarations
334    let lines: Vec<&str> = formatted_code.lines().collect();
335    let mut final_output = String::new();
336    let mut consecutive_empty_count = 0;
337
338    for (i, line) in lines.iter().enumerate() {
339        let is_empty = line.trim().is_empty();
340
341        if is_empty {
342            consecutive_empty_count += 1;
343
344            // Check if this empty line is between a comment and proc/export/const
345            let should_skip_empty_line = if i > 0 && i + 1 < lines.len() {
346                let prev_line = lines[i - 1].trim();
347                let next_line = lines[i + 1].trim();
348                // Skip empty lines between regular comments and proc/export/const, but preserve them after section separators
349                is_comment(prev_line)
350                    && (is_proc_or_export(next_line) || next_line.starts_with("const."))
351                    && !is_section_separator_comment(prev_line)
352            } else {
353                false
354            };
355
356            // Allow up to 1 empty line, collapse 2+ into 1, but skip if between comment and proc/export
357            if consecutive_empty_count <= 1 && !should_skip_empty_line {
358                final_output.push_str(line);
359                final_output.push('\n');
360            }
361            // Skip additional consecutive empty lines (2nd, 3rd, etc.) or comment-proc gaps
362        } else {
363            final_output.push_str(line);
364            final_output.push('\n');
365            consecutive_empty_count = 0;
366        }
367    }
368
369    // Ensure the final output ends with exactly one newline
370    while final_output.ends_with('\n') {
371        final_output.pop();
372    }
373    final_output.push('\n');
374
375    final_output
376}
377
378pub fn format_file(file_path: &Path) -> io::Result<()> {
379    let file = File::open(file_path)?;
380    let mut input_code = String::new();
381
382    let reader = BufReader::new(file);
383    for line in reader.lines() {
384        input_code.push_str(&line?);
385        input_code.push('\n');
386    }
387
388    let formatted_code = format_code(&input_code);
389
390    let mut file = File::create(file_path)?;
391    file.write_all(formatted_code.as_bytes())?;
392
393    Ok(())
394}