1use std::vec;
2
3#[cfg(debug_assertions)]
4use ebnf_parser::Token;
5use ebnf_parser::{ast::*, CommentMap, ParseResult, TokenKind};
6
7use crate::configuration::{Configuration, NewlineKind, QuoteStyle};
8
9enum Special {
10 Newline,
12 Indent,
14 NewlineIndent,
16 RestIndent(usize),
18 SpaceOrNewline,
20 MergingSpace,
22}
23
24enum PushKind<'a> {
25 Char(char),
26 Str(&'a str),
27 Special(Special),
28}
29
30impl From<char> for PushKind<'_> {
31 fn from(c: char) -> Self {
32 Self::Char(c)
33 }
34}
35
36impl<'a> From<&'a str> for PushKind<'a> {
37 fn from(s: &'a str) -> Self {
38 Self::Str(s)
39 }
40}
41
42impl From<Special> for PushKind<'_> {
43 fn from(s: Special) -> Self {
44 Self::Special(s)
45 }
46}
47
48pub struct Formatter<'src, 'config, CommentFormatter>
49where
50 CommentFormatter: FnMut(String) -> String,
51{
52 syntax: Option<Syntax<'src>>,
53 text: &'src str,
54 config: &'config Configuration,
55 indent: usize,
56 output: String,
57 curr_line_len: usize,
58 #[cfg(debug_assertions)]
59 tokens: vec::IntoIter<Token<'src>>,
60 #[cfg(debug_assertions)]
61 curr_tok: Option<Token<'src>>,
62 tok_index: usize,
63 comments: CommentMap<'src>,
64 no_push: bool,
67 comment_formatter: CommentFormatter,
69}
70
71impl<'src, 'config, CommentFormatter> Formatter<'src, 'config, CommentFormatter>
72where
73 CommentFormatter: FnMut(String) -> String,
74{
75 pub fn new(
76 parse_result: ParseResult<'src>,
77 text: &'src str,
78 config: &'config Configuration,
79 comment_formatter: CommentFormatter,
80 ) -> Self {
81 Self {
82 syntax: Some(parse_result.syntax),
83 text,
84 config,
85 indent: 0,
86 output: String::new(),
87 curr_line_len: 0,
88 #[cfg(debug_assertions)]
89 tokens: parse_result.tokens.into_iter(),
90 #[cfg(debug_assertions)]
91 curr_tok: None,
92 tok_index: usize::MAX,
93 comments: parse_result.comments,
94 no_push: false,
95 comment_formatter,
96 }
97 }
98
99 pub fn format(mut self) -> String {
100 self.next_tok();
101 let syntax = self
102 .syntax
103 .take()
104 .expect("set to Some(..) in Formatter::new and this method is only called once");
105 self.format_syntax(syntax);
106 self.output
107 }
108
109 fn next_tok(&mut self) {
110 #[cfg(debug_assertions)]
111 {
112 self.curr_tok = self.tokens.next();
113 }
114 self.tok_index = self.tok_index.wrapping_add(1);
115 }
116
117 fn push(&mut self, kind: PushKind) {
118 match kind {
119 PushKind::Char(c) => self.push_char(c),
120 PushKind::Str(s) => self.push_str(s),
121 PushKind::Special(s) => self.push_special(s),
122 }
123 }
124
125 fn push_char(&mut self, char: char) {
126 if self.no_push {
127 return;
128 }
129 self.curr_line_len += 1;
130 self.output.push(char);
131 }
132
133 fn push_str(&mut self, text: &str) {
134 if self.no_push {
135 return;
136 }
137 self.curr_line_len += text.chars().count();
138 self.output.push_str(text);
139 }
140
141 fn push_special(&mut self, special: Special) {
142 if self.no_push {
143 return;
144 }
145 match special {
146 Special::Newline => {
147 self.output
149 .truncate(self.output.trim_end_matches(' ').len());
150
151 match self.config.newline_kind {
152 NewlineKind::Unix => self.output.push('\n'),
153 NewlineKind::Windows => self.output.push_str("\r\n"),
154 };
155 self.curr_line_len = 0;
156 }
157 Special::Indent => self.push_str(&" ".repeat(self.indent)),
158 Special::NewlineIndent => {
159 self.push_special(Special::Newline);
160 self.push_special(Special::Indent);
161 }
162 Special::RestIndent(len) => self.push_str(&" ".repeat(self.indent - len)),
163 Special::SpaceOrNewline => {
164 if self.curr_line_len >= self.config.line_width {
165 self.push_special(Special::NewlineIndent);
166 } else {
167 self.push_special(Special::MergingSpace);
168 }
169 }
170 Special::MergingSpace => {
171 if !self.output.ends_with(' ') {
172 self.push_char(' ');
173 }
174 }
175 }
176 }
177
178 fn push_token(&mut self, token: TokenKind, prefix: Option<PushKind>, suffix: Option<PushKind>) {
179 self.check_comments();
180 #[cfg(debug_assertions)]
181 {
182 debug_assert_eq!(
183 token,
184 self.curr_tok
185 .as_ref()
186 .unwrap_or_else(|| panic!("expected TokenKind {:?} but was None", token))
187 .kind
188 );
189 }
190 self.next_tok();
191
192 if let Some(prefix) = prefix {
193 self.push(prefix);
194 }
195 match token {
196 TokenKind::Terminal(text) => {
197 let quote = match self.config.quote_style {
198 QuoteStyle::Single if text.contains('\'') => '"',
199 QuoteStyle::Single => '\'',
200 QuoteStyle::Double if text.contains('"') => '\'',
201 QuoteStyle::Double => '"',
202 };
203 self.push_char(quote);
204 self.push_str(text);
205 self.push_char(quote);
206 }
207 _ => self.push_str(&token.to_string()),
208 }
209 if let Some(suffix) = suffix {
210 self.push(suffix);
211 }
212 }
213
214 fn check_comments(&mut self) {
215 if let Some(comments) = self.comments.remove(&self.tok_index) {
216 let mut prev_comment: Option<Comment> = None;
217 for comment in comments {
218 if let Some(prev_comment) = prev_comment {
220 let text_between = &self.text[prev_comment.span.end..comment.span.start];
221 if text_between.contains("\n\n") || text_between.contains("\r\n\r\n") {
222 self.push_special(Special::Newline);
223 }
224 }
225
226 self.format_comment(comment.text);
227 prev_comment = Some(comment);
228 }
229 }
230 }
231
232 fn format_syntax(&mut self, node: Syntax) {
233 let mut blocks: Vec<Vec<SyntaxRule>> = vec![vec![]];
234 for node in node.rules {
235 if let Some(prev_node) = blocks
236 .last()
237 .expect("Vector initialized with one element and never remove any element")
238 .last()
239 {
240 let text_between = &self.text[prev_node.span.end..node.span.start];
241 if text_between.contains("\n\n") || text_between.contains("\r\n\r\n") {
242 blocks.push(vec![]);
243 }
244 }
245 blocks
246 .last_mut()
247 .expect("Vector initialized with one element and never remove any element")
248 .push(node);
249 }
250 let last = blocks.len().saturating_sub(1);
251 for (index, block) in blocks.into_iter().enumerate() {
252 self.format_rule_block(block);
253 if index != last {
254 self.push_special(Special::Newline);
255 }
256 }
257 self.check_comments();
258 }
259
260 fn format_rule_block(&mut self, block: Vec<SyntaxRule>) {
261 self.indent = block
262 .iter()
263 .map(|rule| rule.name.len())
264 .max()
265 .expect("Every block consists of at least one rule")
266 + 1;
267 for rule in block {
268 self.format_syntax_rule(rule);
269 }
270 }
271
272 fn format_syntax_rule(&mut self, node: SyntaxRule) {
273 if let Some(comments) = self.comments.get(&self.tok_index) {
275 if comments
276 .iter()
277 .any(|comment| comment.text.contains(&self.config.ignore_rule_comment_text))
278 {
279 self.check_comments();
280 let raw_text = &self.text[node.span.start..node.span.end];
281 for line in raw_text.split('\n') {
282 self.push_str(line.trim_end_matches('\r'));
283 self.push_special(Special::Newline);
284 }
285 self.no_push = true;
286 }
287 }
288
289 self.push_token(TokenKind::Identifier(node.name), None, None);
291 self.push_special(Special::RestIndent(node.name.len()));
292 self.push_token(TokenKind::Equal, None, Some(' '.into()));
293 self.format_definitions_list(node.definitions);
294 self.push_token(
295 TokenKind::Semicolon,
296 Some(Special::MergingSpace.into()),
297 None,
298 );
299 self.push_special(Special::Newline);
300
301 self.no_push = false;
303 }
304
305 fn format_definitions_list(&mut self, node: Vec<SingleDefinition>) {
306 let inline = node.iter().all(|node| node.terms.len() == 1);
308
309 let last = node.len().saturating_sub(1);
310 for (index, node) in node.into_iter().enumerate() {
311 self.format_single_definition(node);
312 if index != last {
313 self.push_token(
314 TokenKind::Pipe,
315 Some(match inline {
316 true => Special::SpaceOrNewline.into(),
317 false => Special::NewlineIndent.into(),
318 }),
319 Some(' '.into()),
320 );
321 }
322 }
323 }
324
325 fn format_single_definition(&mut self, node: SingleDefinition) {
326 let last = node.terms.len().saturating_sub(1);
327 for (index, node) in node.terms.into_iter().enumerate() {
328 self.format_syntactic_term(node);
329 if index != last {
330 self.push_token(
331 TokenKind::Comma,
332 Some(Special::SpaceOrNewline.into()),
333 Some(' '.into()),
334 );
335 }
336 }
337 }
338
339 fn format_syntactic_term(&mut self, node: SyntacticTerm) {
340 let prefix = match (&node.factor, &node.exception) {
341 (
342 SyntacticFactor {
343 primary:
344 SyntacticPrimary {
345 kind: SyntacticPrimaryKind::RepeatedSequence(_),
346 ..
347 },
348 ..
349 },
350 ..,
351 ) => None,
352 _ => Some(Special::MergingSpace.into()),
353 };
354
355 self.format_syntactic_factor(node.factor);
356 if let Some(exception) = node.exception {
357 self.push_token(TokenKind::Dash, prefix, Some(' '.into()));
358 self.format_syntactic_factor(exception);
359 }
360 }
361
362 fn format_syntactic_factor(&mut self, node: SyntacticFactor) {
363 if let Some(repetition) = node.repetition {
364 self.push_token(TokenKind::Integer(repetition), None, None);
365 self.push_token(
366 TokenKind::Star,
367 Some(Special::MergingSpace.into()),
368 Some(' '.into()),
369 );
370 }
371 self.format_syntactic_primary(node.primary);
372 }
373
374 fn format_syntactic_primary(&mut self, node: SyntacticPrimary) {
375 match node.kind {
376 SyntacticPrimaryKind::OptionalSequence(node) => self.format_delimited_definitions_list(
377 node,
378 TokenKind::LBracket,
379 TokenKind::RBracket,
380 ),
381 SyntacticPrimaryKind::RepeatedSequence(node) => {
382 self.format_delimited_definitions_list(node, TokenKind::LBrace, TokenKind::RBrace)
383 }
384 SyntacticPrimaryKind::GroupedSequence(node) => {
385 self.format_delimited_definitions_list(node, TokenKind::LParen, TokenKind::RParen)
386 }
387 SyntacticPrimaryKind::MetaIdentifier(name) => {
388 self.push_token(TokenKind::Identifier(name), None, None)
389 }
390 SyntacticPrimaryKind::TerminalString(text) => {
391 self.push_token(TokenKind::Terminal(text), None, None)
392 }
393 SyntacticPrimaryKind::SpecialSequence(text) => {
394 self.push_token(TokenKind::SpecialSeq(text), None, None)
395 }
396 SyntacticPrimaryKind::EmptySequence => {}
397 }
398 }
399
400 fn format_delimited_definitions_list(
401 &mut self,
402 node: Vec<SingleDefinition>,
403 open: TokenKind,
404 close: TokenKind,
405 ) {
406 let saved_indent = self.indent;
407 self.indent = self.curr_line_len;
408 self.push_token(open, None, Some(' '.into()));
409 self.format_definitions_list(node);
410 self.push_token(close, Some(Special::MergingSpace.into()), None);
411 self.indent = saved_indent;
412 }
413
414 fn format_comment(&mut self, mut text: &str) {
415 if self.curr_line_len != 0 {
416 self.push_special(Special::MergingSpace);
417 self.push_str("(* ");
418 self.push_str(text.trim());
419 self.push_str(" *) ");
420 } else if text.contains('\n') {
421 let saved_indent = self.indent;
422 self.indent = self.config.mutliline_comment_indent;
423
424 self.push_str("(*");
425 self.push_special(Special::Newline);
426
427 let current_comment_indent = text
428 .trim_start_matches(|c| c == '\n' || c == '\r')
429 .chars()
430 .take_while(|c| *c == ' ')
431 .count();
432 text = text.trim();
433
434 let mut trimmed_lines = vec![];
435 for line in text.split('\n') {
436 let mut line_start = 0;
438 while line_start < current_comment_indent
439 && line.as_bytes().get(line_start) == Some(&b' ')
440 {
441 line_start += 1;
442 }
443
444 trimmed_lines.push(line[line_start..].trim_end_matches('\r'));
445 }
446
447 let formatted = (self.comment_formatter)(trimmed_lines.join("\n"));
448 for line in formatted.trim().split('\n') {
449 if !line.trim().is_empty() {
450 self.push_special(Special::Indent);
451 }
452 self.push_str(line.trim_end_matches('\r'));
453 self.push_special(Special::Newline);
454 }
455
456 self.push_str("*)");
457 self.indent = saved_indent;
458 self.push_special(Special::Newline);
459 } else {
460 self.push_str("(* ");
461 self.push_str(text.trim());
462 self.push_str(" *)");
463 self.push_special(Special::Newline);
464 }
465 }
466}