wdl_format/token/
post.rs

1//! Postprocessed tokens.
2//!
3//! Generally speaking, unless you are working with the internals of code
4//! formatting, you're not going to be working with these.
5
6use std::collections::HashSet;
7use std::fmt::Display;
8use std::rc::Rc;
9
10use wdl_ast::SyntaxKind;
11
12use crate::Comment;
13use crate::Config;
14use crate::NEWLINE;
15use crate::PreToken;
16use crate::SPACE;
17use crate::Token;
18use crate::TokenStream;
19use crate::Trivia;
20use crate::TriviaBlankLineSpacingPolicy;
21
22/// [`PostToken`]s that precede an inline comment.
23const INLINE_COMMENT_PRECEDING_TOKENS: [PostToken; 2] = [PostToken::Space, PostToken::Space];
24
25/// A postprocessed token.
26#[derive(Clone, Eq, PartialEq)]
27pub enum PostToken {
28    /// A space.
29    Space,
30
31    /// A newline.
32    Newline,
33
34    /// One indentation.
35    Indent,
36
37    /// A temporary indent.
38    ///
39    /// This is added after a [`PostToken::Indent`] during the formatting of
40    /// command sections.
41    TempIndent(Rc<String>),
42
43    /// A string literal.
44    Literal(Rc<String>),
45}
46
47impl std::fmt::Debug for PostToken {
48    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
49        match self {
50            Self::Space => write!(f, "<SPACE>"),
51            Self::Newline => write!(f, "<NEWLINE>"),
52            Self::Indent => write!(f, "<INDENT>"),
53            Self::TempIndent(value) => write!(f, "<TEMP_INDENT@{value}>"),
54            Self::Literal(value) => write!(f, "<LITERAL@{value}>"),
55        }
56    }
57}
58
59impl Token for PostToken {
60    /// Returns a displayable version of the token.
61    fn display<'a>(&'a self, config: &'a Config) -> impl Display + 'a {
62        /// A displayable version of a [`PostToken`].
63        struct Display<'a> {
64            /// The token to display.
65            token: &'a PostToken,
66            /// The configuration to use.
67            config: &'a Config,
68        }
69
70        impl std::fmt::Display for Display<'_> {
71            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
72                match self.token {
73                    PostToken::Space => write!(f, "{SPACE}"),
74                    PostToken::Newline => write!(f, "{NEWLINE}"),
75                    PostToken::Indent => {
76                        write!(f, "{indent}", indent = self.config.indent().string())
77                    }
78                    PostToken::TempIndent(value) => write!(f, "{value}"),
79                    PostToken::Literal(value) => write!(f, "{value}"),
80                }
81            }
82        }
83
84        Display {
85            token: self,
86            config,
87        }
88    }
89}
90
91impl PostToken {
92    /// Gets the width of the [`PostToken`].
93    fn width(&self, config: &crate::Config) -> usize {
94        match self {
95            Self::Space => SPACE.len(),
96            Self::Newline => 0,
97            Self::Indent => config.indent().num(),
98            Self::TempIndent(value) => value.len(),
99            Self::Literal(value) => value.len(),
100        }
101    }
102}
103
104impl TokenStream<PostToken> {
105    /// Gets the maximum width of the [`TokenStream`].
106    ///
107    /// This is suitable to call if the stream represents multiple lines.
108    fn max_width(&self, config: &Config) -> usize {
109        let mut max: usize = 0;
110        let mut cur_width: usize = 0;
111        for token in self.iter() {
112            cur_width += token.width(config);
113            if token == &PostToken::Newline {
114                max = max.max(cur_width);
115                cur_width = 0;
116            }
117        }
118        max.max(cur_width)
119    }
120
121    /// Gets the width of the last line of the [`TokenStream`].
122    fn last_line_width(&self, config: &Config) -> usize {
123        let mut width = 0;
124        for token in self.iter().rev() {
125            if token == &PostToken::Newline {
126                break;
127            }
128            width += token.width(config);
129        }
130        width
131    }
132}
133
134/// A line break.
135enum LineBreak {
136    /// A line break that can be inserted before a token.
137    Before,
138    /// A line break that can be inserted after a token.
139    After,
140}
141
142/// Returns whether a token can be line broken.
143fn can_be_line_broken(kind: SyntaxKind) -> Option<LineBreak> {
144    match kind {
145        SyntaxKind::CloseBrace
146        | SyntaxKind::CloseBracket
147        | SyntaxKind::CloseParen
148        | SyntaxKind::CloseHeredoc
149        | SyntaxKind::Assignment
150        | SyntaxKind::Plus
151        | SyntaxKind::Minus
152        | SyntaxKind::Asterisk
153        | SyntaxKind::Slash
154        | SyntaxKind::Percent
155        | SyntaxKind::Exponentiation
156        | SyntaxKind::Equal
157        | SyntaxKind::NotEqual
158        | SyntaxKind::Less
159        | SyntaxKind::LessEqual
160        | SyntaxKind::Greater
161        | SyntaxKind::GreaterEqual
162        | SyntaxKind::LogicalAnd
163        | SyntaxKind::LogicalOr
164        | SyntaxKind::AfterKeyword
165        | SyntaxKind::AsKeyword
166        | SyntaxKind::IfKeyword
167        | SyntaxKind::ElseKeyword
168        | SyntaxKind::ThenKeyword => Some(LineBreak::Before),
169        SyntaxKind::OpenBrace
170        | SyntaxKind::OpenBracket
171        | SyntaxKind::OpenParen
172        | SyntaxKind::OpenHeredoc
173        | SyntaxKind::Colon
174        | SyntaxKind::PlaceholderOpen
175        | SyntaxKind::Comma => Some(LineBreak::After),
176        _ => None,
177    }
178}
179
180/// Current position in a line.
181#[derive(Default, Eq, PartialEq)]
182enum LinePosition {
183    /// The start of a line.
184    #[default]
185    StartOfLine,
186
187    /// The middle of a line.
188    MiddleOfLine,
189}
190
191/// A postprocessor of [tokens](PreToken).
192#[derive(Default)]
193pub struct Postprocessor {
194    /// The current position in the line.
195    position: LinePosition,
196
197    /// The current indentation level.
198    indent_level: usize,
199
200    /// Whether the current line has been interrupted by trivia.
201    interrupted: bool,
202
203    /// The current trivial blank line spacing policy.
204    line_spacing_policy: TriviaBlankLineSpacingPolicy,
205
206    /// Whether temporary indentation is needed.
207    temp_indent_needed: bool,
208
209    /// Temporary indentation to add.
210    temp_indent: Rc<String>,
211}
212
213impl Postprocessor {
214    /// Runs the postprocessor.
215    pub fn run(&mut self, input: TokenStream<PreToken>, config: &Config) -> TokenStream<PostToken> {
216        let mut output = TokenStream::<PostToken>::default();
217        let mut buffer = TokenStream::<PreToken>::default();
218
219        for token in input {
220            match token {
221                PreToken::LineEnd => {
222                    self.flush(&buffer, &mut output, config);
223                    self.trim_whitespace(&mut output);
224                    output.push(PostToken::Newline);
225
226                    buffer.clear();
227                    self.interrupted = false;
228                    self.position = LinePosition::StartOfLine;
229                }
230                _ => {
231                    buffer.push(token);
232                }
233            }
234        }
235
236        output
237    }
238
239    /// Takes a step of a [`PreToken`] stream and processes the appropriate
240    /// [`PostToken`]s.
241    pub fn step(
242        &mut self,
243        token: PreToken,
244        next: Option<&PreToken>,
245        stream: &mut TokenStream<PostToken>,
246    ) {
247        if stream.is_empty() {
248            self.interrupted = false;
249            self.position = LinePosition::StartOfLine;
250            self.indent(stream);
251        }
252        match token {
253            PreToken::BlankLine => {
254                self.blank_line(stream);
255            }
256            PreToken::LineEnd => {
257                self.interrupted = false;
258                self.end_line(stream);
259            }
260            PreToken::WordEnd => {
261                stream.trim_end(&PostToken::Space);
262
263                if self.position == LinePosition::MiddleOfLine {
264                    stream.push(PostToken::Space);
265                } else {
266                    // We're at the start of a line, so we don't need to add a
267                    // space.
268                }
269            }
270            PreToken::IndentStart => {
271                self.indent_level += 1;
272                self.end_line(stream);
273            }
274            PreToken::IndentEnd => {
275                self.indent_level = self.indent_level.saturating_sub(1);
276                self.end_line(stream);
277            }
278            PreToken::LineSpacingPolicy(policy) => {
279                self.line_spacing_policy = policy;
280            }
281            PreToken::Literal(value, kind) => {
282                assert!(!kind.is_trivia());
283
284                // This is special handling for inserting the empty string.
285                // We remove any indentation or spaces from the end of the
286                // stream and then add the empty string as a literal.
287                // Then we set the position to [`LinePosition::MiddleOfLine`]
288                // in order to trigger a newline being added before the next
289                // token.
290                if value.is_empty() {
291                    self.trim_last_line(stream);
292                    stream.push(PostToken::Literal(value));
293                    self.position = LinePosition::MiddleOfLine;
294                    return;
295                }
296
297                if self.interrupted
298                    && matches!(
299                        kind,
300                        SyntaxKind::OpenBrace
301                            | SyntaxKind::OpenBracket
302                            | SyntaxKind::OpenParen
303                            | SyntaxKind::OpenHeredoc
304                    )
305                    && matches!(
306                        stream.0.last(),
307                        Some(&PostToken::Indent) | Some(&PostToken::TempIndent(_))
308                    )
309                {
310                    stream.0.pop();
311                }
312
313                if kind == SyntaxKind::LiteralCommandText {
314                    self.temp_indent = Rc::new(
315                        value
316                            .chars()
317                            .take_while(|c| matches!(c.to_string().as_str(), SPACE | crate::TAB))
318                            .collect(),
319                    );
320                }
321
322                stream.push(PostToken::Literal(value));
323                self.position = LinePosition::MiddleOfLine;
324            }
325            PreToken::Trivia(trivia) => match trivia {
326                Trivia::BlankLine => match self.line_spacing_policy {
327                    TriviaBlankLineSpacingPolicy::Always => {
328                        self.blank_line(stream);
329                    }
330                    TriviaBlankLineSpacingPolicy::RemoveTrailingBlanks => {
331                        if matches!(next, Some(&PreToken::Trivia(Trivia::Comment(_)))) {
332                            self.blank_line(stream);
333                        }
334                    }
335                },
336                Trivia::Comment(comment) => {
337                    match comment {
338                        Comment::Preceding(value) => {
339                            if !matches!(
340                                stream.0.last(),
341                                Some(&PostToken::Newline)
342                                    | Some(&PostToken::Indent)
343                                    | Some(&PostToken::TempIndent(_))
344                                    | None
345                            ) {
346                                self.interrupted = true;
347                            }
348                            self.end_line(stream);
349                            stream.push(PostToken::Literal(value));
350                        }
351                        Comment::Inline(value) => {
352                            assert!(self.position == LinePosition::MiddleOfLine);
353                            if let Some(next) = next {
354                                if next != &PreToken::LineEnd {
355                                    self.interrupted = true;
356                                }
357                            }
358                            self.trim_last_line(stream);
359                            for token in INLINE_COMMENT_PRECEDING_TOKENS.iter() {
360                                stream.push(token.clone());
361                            }
362                            stream.push(PostToken::Literal(value));
363                        }
364                    }
365                    self.position = LinePosition::MiddleOfLine;
366                    self.end_line(stream);
367                }
368            },
369            PreToken::TempIndentStart => {
370                self.temp_indent_needed = true;
371            }
372            PreToken::TempIndentEnd => {
373                self.temp_indent_needed = false;
374            }
375        }
376    }
377
378    /// Flushes the `in_stream` buffer to the `out_stream`.
379    fn flush(
380        &mut self,
381        in_stream: &TokenStream<PreToken>,
382        out_stream: &mut TokenStream<PostToken>,
383        config: &Config,
384    ) {
385        assert!(!self.interrupted);
386        assert!(self.position == LinePosition::StartOfLine);
387        let mut post_buffer = TokenStream::<PostToken>::default();
388        let mut pre_buffer = in_stream.iter().peekable();
389        let starting_indent = self.indent_level;
390        while let Some(token) = pre_buffer.next() {
391            let next = pre_buffer.peek().copied();
392            self.step(token.clone(), next, &mut post_buffer);
393        }
394
395        // If all lines are short enough, we can just add the post_buffer to the
396        // out_stream and be done.
397        if config.max_line_length().is_none()
398            || post_buffer.max_width(config) <= config.max_line_length().unwrap()
399        {
400            out_stream.extend(post_buffer);
401            return;
402        }
403
404        // At least one line in the post_buffer is too long.
405        // We iterate through the in_stream to find potential line breaks,
406        // and then we iterate through the in_stream again to actually insert
407        // them in the proper places.
408
409        let max_length = config.max_line_length().unwrap();
410
411        let mut potential_line_breaks: HashSet<usize> = HashSet::new();
412        for (i, token) in in_stream.iter().enumerate() {
413            if let PreToken::Literal(_, kind) = token {
414                match can_be_line_broken(*kind) {
415                    Some(LineBreak::Before) => {
416                        potential_line_breaks.insert(i);
417                    }
418                    Some(LineBreak::After) => {
419                        potential_line_breaks.insert(i + 1);
420                    }
421                    None => {}
422                }
423            }
424        }
425
426        if potential_line_breaks.is_empty() {
427            // There are no potential line breaks, so we can't do anything.
428            out_stream.extend(post_buffer);
429            return;
430        }
431
432        // Set up the buffers for the second pass.
433        post_buffer.clear();
434        let mut pre_buffer = in_stream.iter().enumerate().peekable();
435
436        // Reset the indent level.
437        self.indent_level = starting_indent;
438
439        while let Some((i, token)) = pre_buffer.next() {
440            let mut cache = None;
441            if potential_line_breaks.contains(&i) {
442                if post_buffer.last_line_width(config) > max_length {
443                    // The line is already too long, and taking the next step
444                    // can only make it worse. Insert a line break here.
445                    self.interrupted = true;
446                    self.end_line(&mut post_buffer);
447                } else {
448                    // The line is not too long yet, but it might be after the
449                    // next step. Cache the current state so we can revert to it
450                    // if necessary.
451                    cache = Some(post_buffer.clone());
452                }
453            }
454            self.step(
455                token.clone(),
456                pre_buffer.peek().map(|(_, v)| &**v),
457                &mut post_buffer,
458            );
459
460            if let Some(cache) = cache {
461                if post_buffer.last_line_width(config) > max_length {
462                    // The line is too long after the next step. Revert to the
463                    // cached state and insert a line break.
464                    post_buffer = cache;
465                    self.interrupted = true;
466                    self.end_line(&mut post_buffer);
467                    self.step(
468                        token.clone(),
469                        pre_buffer.peek().map(|(_, v)| &**v),
470                        &mut post_buffer,
471                    );
472                }
473            }
474        }
475
476        out_stream.extend(post_buffer);
477    }
478
479    /// Trims any and all whitespace from the end of the stream.
480    fn trim_whitespace(&self, stream: &mut TokenStream<PostToken>) {
481        stream.trim_while(|token| {
482            matches!(
483                token,
484                PostToken::Space
485                    | PostToken::Newline
486                    | PostToken::Indent
487                    | PostToken::TempIndent(_)
488            )
489        });
490    }
491
492    /// Trims spaces and indents (and not newlines) from the end of the stream.
493    fn trim_last_line(&mut self, stream: &mut TokenStream<PostToken>) {
494        stream.trim_while(|token| {
495            matches!(
496                token,
497                PostToken::Space | PostToken::Indent | PostToken::TempIndent(_)
498            )
499        });
500    }
501
502    /// Ends the current line without resetting the interrupted flag.
503    ///
504    /// Removes any trailing spaces or indents and adds a newline only if state
505    /// is not [`LinePosition::StartOfLine`]. State is then set to
506    /// [`LinePosition::StartOfLine`]. Safe to call multiple times in a row.
507    fn end_line(&mut self, stream: &mut TokenStream<PostToken>) {
508        self.trim_last_line(stream);
509        if self.position != LinePosition::StartOfLine {
510            stream.push(PostToken::Newline);
511        }
512        self.position = LinePosition::StartOfLine;
513        self.indent(stream);
514    }
515
516    /// Pushes the current indentation level to the stream.
517    /// This should only be called when the state is
518    /// [`LinePosition::StartOfLine`]. This does not change the state.
519    fn indent(&self, stream: &mut TokenStream<PostToken>) {
520        assert!(self.position == LinePosition::StartOfLine);
521
522        let level = if self.interrupted {
523            self.indent_level + 1
524        } else {
525            self.indent_level
526        };
527
528        for _ in 0..level {
529            stream.push(PostToken::Indent);
530        }
531
532        if self.temp_indent_needed {
533            stream.push(PostToken::TempIndent(self.temp_indent.clone()));
534        }
535    }
536
537    /// Creates a blank line and then indents.
538    fn blank_line(&mut self, stream: &mut TokenStream<PostToken>) {
539        self.trim_whitespace(stream);
540        if !stream.is_empty() {
541            stream.push(PostToken::Newline);
542        }
543        stream.push(PostToken::Newline);
544        self.position = LinePosition::StartOfLine;
545        self.indent(stream);
546    }
547}