ebnf_fmt/
formatter.rs

1use std::vec;
2
3#[cfg(debug_assertions)]
4use ebnf_parser::Token;
5use ebnf_parser::{ast::*, CommentMap, ParseResult, TokenKind};
6
7use crate::configuration::{Configuration, NewlineKind, QuoteStyle};
8
9enum Special {
10    /// A newline according to the current config
11    Newline,
12    /// The current indent as spaces
13    Indent,
14    /// A Newline followed by an Indent
15    NewlineIndent,
16    /// The current indent as spaces minus the given length
17    RestIndent(usize),
18    /// A MergingSpace or NewlineIndent depending on the current line length
19    SpaceOrNewline,
20    /// A space when the previous character is not a space
21    MergingSpace,
22}
23
24enum PushKind<'a> {
25    Char(char),
26    Str(&'a str),
27    Special(Special),
28}
29
30impl From<char> for PushKind<'_> {
31    fn from(c: char) -> Self {
32        Self::Char(c)
33    }
34}
35
36impl<'a> From<&'a str> for PushKind<'a> {
37    fn from(s: &'a str) -> Self {
38        Self::Str(s)
39    }
40}
41
42impl From<Special> for PushKind<'_> {
43    fn from(s: Special) -> Self {
44        Self::Special(s)
45    }
46}
47
48pub struct Formatter<'src, 'config, CommentFormatter>
49where
50    CommentFormatter: FnMut(String) -> String,
51{
52    syntax: Option<Syntax<'src>>,
53    text: &'src str,
54    config: &'config Configuration,
55    indent: usize,
56    output: String,
57    curr_line_len: usize,
58    #[cfg(debug_assertions)]
59    tokens: vec::IntoIter<Token<'src>>,
60    #[cfg(debug_assertions)]
61    curr_tok: Option<Token<'src>>,
62    tok_index: usize,
63    comments: CommentMap<'src>,
64    /// Is true while ignoring formatting for a rule to prevent pushing to `output` while still
65    /// progressing `tokens`.
66    no_push: bool,
67    /// A custom function to format the text inside multiline comments
68    comment_formatter: CommentFormatter,
69}
70
71impl<'src, 'config, CommentFormatter> Formatter<'src, 'config, CommentFormatter>
72where
73    CommentFormatter: FnMut(String) -> String,
74{
75    pub fn new(
76        parse_result: ParseResult<'src>,
77        text: &'src str,
78        config: &'config Configuration,
79        comment_formatter: CommentFormatter,
80    ) -> Self {
81        Self {
82            syntax: Some(parse_result.syntax),
83            text,
84            config,
85            indent: 0,
86            output: String::new(),
87            curr_line_len: 0,
88            #[cfg(debug_assertions)]
89            tokens: parse_result.tokens.into_iter(),
90            #[cfg(debug_assertions)]
91            curr_tok: None,
92            tok_index: usize::MAX,
93            comments: parse_result.comments,
94            no_push: false,
95            comment_formatter,
96        }
97    }
98
99    pub fn format(mut self) -> String {
100        self.next_tok();
101        let syntax = self
102            .syntax
103            .take()
104            .expect("set to Some(..) in Formatter::new and this method is only called once");
105        self.format_syntax(syntax);
106        self.output
107    }
108
109    fn next_tok(&mut self) {
110        #[cfg(debug_assertions)]
111        {
112            self.curr_tok = self.tokens.next();
113        }
114        self.tok_index = self.tok_index.wrapping_add(1);
115    }
116
117    fn push(&mut self, kind: PushKind) {
118        match kind {
119            PushKind::Char(c) => self.push_char(c),
120            PushKind::Str(s) => self.push_str(s),
121            PushKind::Special(s) => self.push_special(s),
122        }
123    }
124
125    fn push_char(&mut self, char: char) {
126        if self.no_push {
127            return;
128        }
129        self.curr_line_len += 1;
130        self.output.push(char);
131    }
132
133    fn push_str(&mut self, text: &str) {
134        if self.no_push {
135            return;
136        }
137        self.curr_line_len += text.chars().count();
138        self.output.push_str(text);
139    }
140
141    fn push_special(&mut self, special: Special) {
142        if self.no_push {
143            return;
144        }
145        match special {
146            Special::Newline => {
147                // Trim trailing spaces
148                self.output
149                    .truncate(self.output.trim_end_matches(' ').len());
150
151                match self.config.newline_kind {
152                    NewlineKind::Unix => self.output.push('\n'),
153                    NewlineKind::Windows => self.output.push_str("\r\n"),
154                };
155                self.curr_line_len = 0;
156            }
157            Special::Indent => self.push_str(&" ".repeat(self.indent)),
158            Special::NewlineIndent => {
159                self.push_special(Special::Newline);
160                self.push_special(Special::Indent);
161            }
162            Special::RestIndent(len) => self.push_str(&" ".repeat(self.indent - len)),
163            Special::SpaceOrNewline => {
164                if self.curr_line_len >= self.config.line_width {
165                    self.push_special(Special::NewlineIndent);
166                } else {
167                    self.push_special(Special::MergingSpace);
168                }
169            }
170            Special::MergingSpace => {
171                if !self.output.ends_with(' ') {
172                    self.push_char(' ');
173                }
174            }
175        }
176    }
177
178    fn push_token(&mut self, token: TokenKind, prefix: Option<PushKind>, suffix: Option<PushKind>) {
179        self.check_comments();
180        #[cfg(debug_assertions)]
181        {
182            debug_assert_eq!(
183                token,
184                self.curr_tok
185                    .as_ref()
186                    .unwrap_or_else(|| panic!("expected TokenKind {:?} but was None", token))
187                    .kind
188            );
189        }
190        self.next_tok();
191
192        if let Some(prefix) = prefix {
193            self.push(prefix);
194        }
195        match token {
196            TokenKind::Terminal(text) => {
197                let quote = match self.config.quote_style {
198                    QuoteStyle::Single if text.contains('\'') => '"',
199                    QuoteStyle::Single => '\'',
200                    QuoteStyle::Double if text.contains('"') => '\'',
201                    QuoteStyle::Double => '"',
202                };
203                self.push_char(quote);
204                self.push_str(text);
205                self.push_char(quote);
206            }
207            _ => self.push_str(&token.to_string()),
208        }
209        if let Some(suffix) = suffix {
210            self.push(suffix);
211        }
212    }
213
214    fn check_comments(&mut self) {
215        if let Some(comments) = self.comments.remove(&self.tok_index) {
216            let mut prev_comment: Option<Comment> = None;
217            for comment in comments {
218                // Insert blank line when there was one before
219                if let Some(prev_comment) = prev_comment {
220                    let text_between = &self.text[prev_comment.span.end..comment.span.start];
221                    if text_between.contains("\n\n") || text_between.contains("\r\n\r\n") {
222                        self.push_special(Special::Newline);
223                    }
224                }
225
226                self.format_comment(comment.text);
227                prev_comment = Some(comment);
228            }
229        }
230    }
231
232    fn format_syntax(&mut self, node: Syntax) {
233        let mut blocks: Vec<Vec<SyntaxRule>> = vec![vec![]];
234        for node in node.rules {
235            if let Some(prev_node) = blocks
236                .last()
237                .expect("Vector initialized with one element and never remove any element")
238                .last()
239            {
240                let text_between = &self.text[prev_node.span.end..node.span.start];
241                if text_between.contains("\n\n") || text_between.contains("\r\n\r\n") {
242                    blocks.push(vec![]);
243                }
244            }
245            blocks
246                .last_mut()
247                .expect("Vector initialized with one element and never remove any element")
248                .push(node);
249        }
250        let last = blocks.len().saturating_sub(1);
251        for (index, block) in blocks.into_iter().enumerate() {
252            self.format_rule_block(block);
253            if index != last {
254                self.push_special(Special::Newline);
255            }
256        }
257        self.check_comments();
258    }
259
260    fn format_rule_block(&mut self, block: Vec<SyntaxRule>) {
261        self.indent = block
262            .iter()
263            .map(|rule| rule.name.len())
264            .max()
265            .expect("Every block consists of at least one rule")
266            + 1;
267        for rule in block {
268            self.format_syntax_rule(rule);
269        }
270    }
271
272    fn format_syntax_rule(&mut self, node: SyntaxRule) {
273        // Check for ignore comment
274        if let Some(comments) = self.comments.get(&self.tok_index) {
275            if comments
276                .iter()
277                .any(|comment| comment.text.contains(&self.config.ignore_rule_comment_text))
278            {
279                self.check_comments();
280                let raw_text = &self.text[node.span.start..node.span.end];
281                for line in raw_text.split('\n') {
282                    self.push_str(line.trim_end_matches('\r'));
283                    self.push_special(Special::Newline);
284                }
285                self.no_push = true;
286            }
287        }
288
289        // Format
290        self.push_token(TokenKind::Identifier(node.name), None, None);
291        self.push_special(Special::RestIndent(node.name.len()));
292        self.push_token(TokenKind::Equal, None, Some(' '.into()));
293        self.format_definitions_list(node.definitions);
294        self.push_token(
295            TokenKind::Semicolon,
296            Some(Special::MergingSpace.into()),
297            None,
298        );
299        self.push_special(Special::Newline);
300
301        // Allow further formatting
302        self.no_push = false;
303    }
304
305    fn format_definitions_list(&mut self, node: Vec<SingleDefinition>) {
306        // Format inline when every definition has length 1
307        let inline = node.iter().all(|node| node.terms.len() == 1);
308
309        let last = node.len().saturating_sub(1);
310        for (index, node) in node.into_iter().enumerate() {
311            self.format_single_definition(node);
312            if index != last {
313                self.push_token(
314                    TokenKind::Pipe,
315                    Some(match inline {
316                        true => Special::SpaceOrNewline.into(),
317                        false => Special::NewlineIndent.into(),
318                    }),
319                    Some(' '.into()),
320                );
321            }
322        }
323    }
324
325    fn format_single_definition(&mut self, node: SingleDefinition) {
326        let last = node.terms.len().saturating_sub(1);
327        for (index, node) in node.terms.into_iter().enumerate() {
328            self.format_syntactic_term(node);
329            if index != last {
330                self.push_token(
331                    TokenKind::Comma,
332                    Some(Special::SpaceOrNewline.into()),
333                    Some(' '.into()),
334                );
335            }
336        }
337    }
338
339    fn format_syntactic_term(&mut self, node: SyntacticTerm) {
340        let prefix = match (&node.factor, &node.exception) {
341            (
342                SyntacticFactor {
343                    primary:
344                        SyntacticPrimary {
345                            kind: SyntacticPrimaryKind::RepeatedSequence(_),
346                            ..
347                        },
348                    ..
349                },
350                ..,
351            ) => None,
352            _ => Some(Special::MergingSpace.into()),
353        };
354
355        self.format_syntactic_factor(node.factor);
356        if let Some(exception) = node.exception {
357            self.push_token(TokenKind::Dash, prefix, Some(' '.into()));
358            self.format_syntactic_factor(exception);
359        }
360    }
361
362    fn format_syntactic_factor(&mut self, node: SyntacticFactor) {
363        if let Some(repetition) = node.repetition {
364            self.push_token(TokenKind::Integer(repetition), None, None);
365            self.push_token(
366                TokenKind::Star,
367                Some(Special::MergingSpace.into()),
368                Some(' '.into()),
369            );
370        }
371        self.format_syntactic_primary(node.primary);
372    }
373
374    fn format_syntactic_primary(&mut self, node: SyntacticPrimary) {
375        match node.kind {
376            SyntacticPrimaryKind::OptionalSequence(node) => self.format_delimited_definitions_list(
377                node,
378                TokenKind::LBracket,
379                TokenKind::RBracket,
380            ),
381            SyntacticPrimaryKind::RepeatedSequence(node) => {
382                self.format_delimited_definitions_list(node, TokenKind::LBrace, TokenKind::RBrace)
383            }
384            SyntacticPrimaryKind::GroupedSequence(node) => {
385                self.format_delimited_definitions_list(node, TokenKind::LParen, TokenKind::RParen)
386            }
387            SyntacticPrimaryKind::MetaIdentifier(name) => {
388                self.push_token(TokenKind::Identifier(name), None, None)
389            }
390            SyntacticPrimaryKind::TerminalString(text) => {
391                self.push_token(TokenKind::Terminal(text), None, None)
392            }
393            SyntacticPrimaryKind::SpecialSequence(text) => {
394                self.push_token(TokenKind::SpecialSeq(text), None, None)
395            }
396            SyntacticPrimaryKind::EmptySequence => {}
397        }
398    }
399
400    fn format_delimited_definitions_list(
401        &mut self,
402        node: Vec<SingleDefinition>,
403        open: TokenKind,
404        close: TokenKind,
405    ) {
406        let saved_indent = self.indent;
407        self.indent = self.curr_line_len;
408        self.push_token(open, None, Some(' '.into()));
409        self.format_definitions_list(node);
410        self.push_token(close, Some(Special::MergingSpace.into()), None);
411        self.indent = saved_indent;
412    }
413
414    fn format_comment(&mut self, mut text: &str) {
415        if self.curr_line_len != 0 {
416            self.push_special(Special::MergingSpace);
417            self.push_str("(* ");
418            self.push_str(text.trim());
419            self.push_str(" *) ");
420        } else if text.contains('\n') {
421            let saved_indent = self.indent;
422            self.indent = self.config.mutliline_comment_indent;
423
424            self.push_str("(*");
425            self.push_special(Special::Newline);
426
427            let current_comment_indent = text
428                .trim_start_matches(|c| c == '\n' || c == '\r')
429                .chars()
430                .take_while(|c| *c == ' ')
431                .count();
432            text = text.trim();
433
434            let mut trimmed_lines = vec![];
435            for line in text.split('\n') {
436                // Trim any existing indent up to `current_comment_indent`
437                let mut line_start = 0;
438                while line_start < current_comment_indent
439                    && line.as_bytes().get(line_start) == Some(&b' ')
440                {
441                    line_start += 1;
442                }
443
444                trimmed_lines.push(line[line_start..].trim_end_matches('\r'));
445            }
446
447            let formatted = (self.comment_formatter)(trimmed_lines.join("\n"));
448            for line in formatted.trim().split('\n') {
449                if !line.trim().is_empty() {
450                    self.push_special(Special::Indent);
451                }
452                self.push_str(line.trim_end_matches('\r'));
453                self.push_special(Special::Newline);
454            }
455
456            self.push_str("*)");
457            self.indent = saved_indent;
458            self.push_special(Special::Newline);
459        } else {
460            self.push_str("(* ");
461            self.push_str(text.trim());
462            self.push_str(" *)");
463            self.push_special(Special::Newline);
464        }
465    }
466}