wdl_format/token/
post.rs

1//! Postprocessed tokens.
2//!
3//! Generally speaking, unless you are working with the internals of code
4//! formatting, you're not going to be working with these.
5
6use std::collections::HashSet;
7use std::fmt::Display;
8use std::rc::Rc;
9
10use wdl_ast::SyntaxKind;
11
12use crate::Comment;
13use crate::Config;
14use crate::NEWLINE;
15use crate::PreToken;
16use crate::SPACE;
17use crate::Token;
18use crate::TokenStream;
19use crate::Trivia;
20use crate::TriviaBlankLineSpacingPolicy;
21
22/// [`PostToken`]s that precede an inline comment.
23const INLINE_COMMENT_PRECEDING_TOKENS: [PostToken; 2] = [PostToken::Space, PostToken::Space];
24
25/// A postprocessed token.
26#[derive(Clone, Eq, PartialEq)]
27pub enum PostToken {
28    /// A space.
29    Space,
30
31    /// A newline.
32    Newline,
33
34    /// One indentation.
35    Indent,
36
37    /// A temporary indent.
38    ///
39    /// This is added after a [`PostToken::Indent`] during the formatting of
40    /// command sections.
41    TempIndent(Rc<String>),
42
43    /// A string literal.
44    Literal(Rc<String>),
45}
46
47impl std::fmt::Debug for PostToken {
48    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
49        match self {
50            Self::Space => write!(f, "<SPACE>"),
51            Self::Newline => write!(f, "<NEWLINE>"),
52            Self::Indent => write!(f, "<INDENT>"),
53            Self::TempIndent(value) => write!(f, "<TEMP_INDENT@{value}>"),
54            Self::Literal(value) => write!(f, "<LITERAL@{value}>"),
55        }
56    }
57}
58
59impl Token for PostToken {
60    /// Returns a displayable version of the token.
61    fn display<'a>(&'a self, config: &'a Config) -> impl Display + 'a {
62        /// A displayable version of a [`PostToken`].
63        struct Display<'a> {
64            /// The token to display.
65            token: &'a PostToken,
66            /// The configuration to use.
67            config: &'a Config,
68        }
69
70        impl std::fmt::Display for Display<'_> {
71            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
72                match self.token {
73                    PostToken::Space => write!(f, "{SPACE}"),
74                    PostToken::Newline => write!(f, "{NEWLINE}"),
75                    PostToken::Indent => {
76                        write!(f, "{indent}", indent = self.config.indent().string())
77                    }
78                    PostToken::TempIndent(value) => write!(f, "{value}"),
79                    PostToken::Literal(value) => write!(f, "{value}"),
80                }
81            }
82        }
83
84        Display {
85            token: self,
86            config,
87        }
88    }
89}
90
91impl PostToken {
92    /// Gets the width of the [`PostToken`].
93    ///
94    /// This is used to determine how much space the token takes up _within a
95    /// single line_ for the purposes of respecting the maximum line length.
96    /// As such, newlines are considered zero-width tokens.
97    fn width(&self, config: &crate::Config) -> usize {
98        match self {
99            Self::Space => SPACE.len(), // 1 character
100            Self::Newline => 0,
101            Self::Indent => config.indent().num(),
102            Self::TempIndent(value) => value.len(),
103            Self::Literal(value) => value.len(),
104        }
105    }
106}
107
108impl TokenStream<PostToken> {
109    /// Gets the maximum width of the [`TokenStream`].
110    ///
111    /// This is suitable to call if the stream represents multiple lines.
112    fn max_width(&self, config: &Config) -> usize {
113        let mut max: usize = 0;
114        let mut cur_width: usize = 0;
115        for token in self.iter() {
116            cur_width += token.width(config);
117            if token == &PostToken::Newline {
118                max = max.max(cur_width);
119                cur_width = 0;
120            }
121        }
122        max.max(cur_width)
123    }
124
125    /// Gets the width of the last line of the [`TokenStream`].
126    fn last_line_width(&self, config: &Config) -> usize {
127        let mut width = 0;
128        for token in self.iter().rev() {
129            if token == &PostToken::Newline {
130                break;
131            }
132            width += token.width(config);
133        }
134        width
135    }
136}
137
138/// A line break.
139enum LineBreak {
140    /// A line break that can be inserted before a token.
141    Before,
142    /// A line break that can be inserted after a token.
143    After,
144}
145
146/// Returns whether a token can be line broken.
147fn can_be_line_broken(kind: SyntaxKind) -> Option<LineBreak> {
148    match kind {
149        SyntaxKind::CloseBrace
150        | SyntaxKind::CloseBracket
151        | SyntaxKind::CloseParen
152        | SyntaxKind::CloseHeredoc
153        | SyntaxKind::Assignment
154        | SyntaxKind::Plus
155        | SyntaxKind::Minus
156        | SyntaxKind::Asterisk
157        | SyntaxKind::Slash
158        | SyntaxKind::Percent
159        | SyntaxKind::Exponentiation
160        | SyntaxKind::Equal
161        | SyntaxKind::NotEqual
162        | SyntaxKind::Less
163        | SyntaxKind::LessEqual
164        | SyntaxKind::Greater
165        | SyntaxKind::GreaterEqual
166        | SyntaxKind::LogicalAnd
167        | SyntaxKind::LogicalOr
168        | SyntaxKind::AfterKeyword
169        | SyntaxKind::AsKeyword
170        | SyntaxKind::IfKeyword
171        | SyntaxKind::ElseKeyword
172        | SyntaxKind::ThenKeyword => Some(LineBreak::Before),
173        SyntaxKind::OpenBrace
174        | SyntaxKind::OpenBracket
175        | SyntaxKind::OpenParen
176        | SyntaxKind::OpenHeredoc
177        | SyntaxKind::Colon
178        | SyntaxKind::PlaceholderOpen
179        | SyntaxKind::Comma => Some(LineBreak::After),
180        _ => None,
181    }
182}
183
184/// Current position in a line.
185#[derive(Default, Eq, PartialEq)]
186enum LinePosition {
187    /// The start of a line.
188    #[default]
189    StartOfLine,
190
191    /// The middle of a line.
192    MiddleOfLine,
193}
194
195/// A postprocessor of [tokens](PreToken).
196#[derive(Default)]
197pub struct Postprocessor {
198    /// The current position in the line.
199    position: LinePosition,
200
201    /// The current indentation level.
202    indent_level: usize,
203
204    /// Whether the current line has been interrupted by trivia.
205    interrupted: bool,
206
207    /// The current trivial blank line spacing policy.
208    line_spacing_policy: TriviaBlankLineSpacingPolicy,
209
210    /// Whether temporary indentation is needed.
211    temp_indent_needed: bool,
212
213    /// Temporary indentation to add.
214    temp_indent: Rc<String>,
215}
216
217impl Postprocessor {
218    /// Runs the postprocessor.
219    pub fn run(&mut self, input: TokenStream<PreToken>, config: &Config) -> TokenStream<PostToken> {
220        let mut output = TokenStream::<PostToken>::default();
221        let mut buffer = TokenStream::<PreToken>::default();
222
223        for token in input {
224            match token {
225                PreToken::LineEnd => {
226                    self.flush(&buffer, &mut output, config);
227                    self.trim_whitespace(&mut output);
228                    output.push(PostToken::Newline);
229
230                    buffer.clear();
231                    self.interrupted = false;
232                    self.position = LinePosition::StartOfLine;
233                }
234                _ => {
235                    buffer.push(token);
236                }
237            }
238        }
239
240        // TODO: bug where trailing trivia is not processed
241        // https://github.com/stjude-rust-labs/wdl/issues/497
242
243        output
244    }
245
246    /// Takes a step of a [`PreToken`] stream and processes the appropriate
247    /// [`PostToken`]s.
248    pub fn step(
249        &mut self,
250        token: PreToken,
251        next: Option<&PreToken>,
252        stream: &mut TokenStream<PostToken>,
253    ) {
254        if stream.is_empty() {
255            self.interrupted = false;
256            self.position = LinePosition::StartOfLine;
257            self.indent(stream);
258        }
259        match token {
260            PreToken::BlankLine => {
261                self.blank_line(stream);
262            }
263            PreToken::LineEnd => {
264                self.interrupted = false;
265                self.end_line(stream);
266            }
267            PreToken::WordEnd => {
268                stream.trim_end(&PostToken::Space);
269
270                if self.position == LinePosition::MiddleOfLine {
271                    stream.push(PostToken::Space);
272                } else {
273                    // We're at the start of a line, so we don't need to add a
274                    // space.
275                }
276            }
277            PreToken::IndentStart => {
278                self.indent_level += 1;
279                self.end_line(stream);
280            }
281            PreToken::IndentEnd => {
282                self.indent_level = self.indent_level.saturating_sub(1);
283                self.end_line(stream);
284            }
285            PreToken::LineSpacingPolicy(policy) => {
286                self.line_spacing_policy = policy;
287            }
288            PreToken::Literal(value, kind) => {
289                assert!(!kind.is_trivia());
290
291                // This is special handling for inserting the empty string.
292                // We remove any indentation or spaces from the end of the
293                // stream and then add the empty string as a literal.
294                // Then we set the position to [`LinePosition::MiddleOfLine`]
295                // in order to trigger a newline being added before the next
296                // token.
297                if value.is_empty() {
298                    self.trim_last_line(stream);
299                    stream.push(PostToken::Literal(value));
300                    self.position = LinePosition::MiddleOfLine;
301                    return;
302                }
303
304                if self.interrupted
305                    && matches!(
306                        kind,
307                        SyntaxKind::OpenBrace
308                            | SyntaxKind::OpenBracket
309                            | SyntaxKind::OpenParen
310                            | SyntaxKind::OpenHeredoc
311                    )
312                    && matches!(
313                        stream.0.last(),
314                        Some(&PostToken::Indent) | Some(&PostToken::TempIndent(_))
315                    )
316                {
317                    stream.0.pop();
318                }
319
320                if kind == SyntaxKind::LiteralCommandText {
321                    self.temp_indent = Rc::new(
322                        value
323                            .chars()
324                            .take_while(|c| matches!(c.to_string().as_str(), SPACE | crate::TAB))
325                            .collect(),
326                    );
327                }
328
329                stream.push(PostToken::Literal(value));
330                self.position = LinePosition::MiddleOfLine;
331            }
332            PreToken::Trivia(trivia) => match trivia {
333                Trivia::BlankLine => match self.line_spacing_policy {
334                    TriviaBlankLineSpacingPolicy::Always => {
335                        self.blank_line(stream);
336                    }
337                    TriviaBlankLineSpacingPolicy::RemoveTrailingBlanks => {
338                        if matches!(next, Some(&PreToken::Trivia(Trivia::Comment(_)))) {
339                            self.blank_line(stream);
340                        }
341                    }
342                },
343                Trivia::Comment(comment) => {
344                    match comment {
345                        Comment::Preceding(value) => {
346                            if !matches!(
347                                stream.0.last(),
348                                Some(&PostToken::Newline)
349                                    | Some(&PostToken::Indent)
350                                    | Some(&PostToken::TempIndent(_))
351                                    | None
352                            ) {
353                                self.interrupted = true;
354                            }
355                            self.end_line(stream);
356                            stream.push(PostToken::Literal(value));
357                            self.position = LinePosition::MiddleOfLine;
358                        }
359                        Comment::Inline(value) => {
360                            assert!(self.position == LinePosition::MiddleOfLine);
361                            if let Some(next) = next {
362                                if next != &PreToken::LineEnd {
363                                    self.interrupted = true;
364                                }
365                            }
366                            self.trim_last_line(stream);
367                            for token in INLINE_COMMENT_PRECEDING_TOKENS.iter() {
368                                stream.push(token.clone());
369                            }
370                            stream.push(PostToken::Literal(value));
371                        }
372                    }
373                    self.end_line(stream);
374                }
375            },
376            PreToken::TempIndentStart => {
377                self.temp_indent_needed = true;
378            }
379            PreToken::TempIndentEnd => {
380                self.temp_indent_needed = false;
381            }
382        }
383    }
384
385    /// Flushes the `in_stream` buffer to the `out_stream`.
386    fn flush(
387        &mut self,
388        in_stream: &TokenStream<PreToken>,
389        out_stream: &mut TokenStream<PostToken>,
390        config: &Config,
391    ) {
392        assert!(!self.interrupted);
393        assert!(self.position == LinePosition::StartOfLine);
394        let mut post_buffer = TokenStream::<PostToken>::default();
395        let mut pre_buffer = in_stream.iter().peekable();
396        let starting_indent = self.indent_level;
397        while let Some(token) = pre_buffer.next() {
398            let next = pre_buffer.peek().copied();
399            self.step(token.clone(), next, &mut post_buffer);
400        }
401
402        // If all lines are short enough, we can just add the post_buffer to the
403        // out_stream and be done.
404        if config.max_line_length().is_none()
405            || post_buffer.max_width(config) <= config.max_line_length().unwrap()
406        {
407            out_stream.extend(post_buffer);
408            return;
409        }
410
411        // At least one line in the post_buffer is too long.
412        // We iterate through the in_stream to find potential line breaks,
413        // and then we iterate through the in_stream again to actually insert
414        // them in the proper places.
415
416        let max_length = config.max_line_length().unwrap();
417
418        let mut potential_line_breaks: HashSet<usize> = HashSet::new();
419        for (i, token) in in_stream.iter().enumerate() {
420            if let PreToken::Literal(_, kind) = token {
421                match can_be_line_broken(*kind) {
422                    Some(LineBreak::Before) => {
423                        potential_line_breaks.insert(i);
424                    }
425                    Some(LineBreak::After) => {
426                        potential_line_breaks.insert(i + 1);
427                    }
428                    None => {}
429                }
430            }
431        }
432
433        if potential_line_breaks.is_empty() {
434            // There are no potential line breaks, so we can't do anything.
435            out_stream.extend(post_buffer);
436            return;
437        }
438
439        // Set up the buffers for the second pass.
440        post_buffer.clear();
441        let mut pre_buffer = in_stream.iter().enumerate().peekable();
442
443        // Reset the indent level.
444        self.indent_level = starting_indent;
445
446        while let Some((i, token)) = pre_buffer.next() {
447            let mut cache = None;
448            if potential_line_breaks.contains(&i) {
449                if post_buffer.last_line_width(config) > max_length {
450                    // The line is already too long, and taking the next step
451                    // can only make it worse. Insert a line break here.
452                    self.interrupted = true;
453                    self.end_line(&mut post_buffer);
454                } else {
455                    // The line is not too long yet, but it might be after the
456                    // next step. Cache the current state so we can revert to it
457                    // if necessary.
458                    cache = Some(post_buffer.clone());
459                }
460            }
461            self.step(
462                token.clone(),
463                pre_buffer.peek().map(|(_, v)| &**v),
464                &mut post_buffer,
465            );
466
467            if let Some(cache) = cache {
468                if post_buffer.last_line_width(config) > max_length {
469                    // The line is too long after the next step. Revert to the
470                    // cached state and insert a line break.
471                    post_buffer = cache;
472                    self.interrupted = true;
473                    self.end_line(&mut post_buffer);
474                    self.step(
475                        token.clone(),
476                        pre_buffer.peek().map(|(_, v)| &**v),
477                        &mut post_buffer,
478                    );
479                }
480            }
481        }
482
483        out_stream.extend(post_buffer);
484    }
485
486    /// Trims any and all whitespace from the end of the stream.
487    fn trim_whitespace(&self, stream: &mut TokenStream<PostToken>) {
488        stream.trim_while(|token| {
489            matches!(
490                token,
491                PostToken::Space
492                    | PostToken::Newline
493                    | PostToken::Indent
494                    | PostToken::TempIndent(_)
495            )
496        });
497    }
498
499    /// Trims spaces and indents (and not newlines) from the end of the stream.
500    fn trim_last_line(&mut self, stream: &mut TokenStream<PostToken>) {
501        stream.trim_while(|token| {
502            matches!(
503                token,
504                PostToken::Space | PostToken::Indent | PostToken::TempIndent(_)
505            )
506        });
507    }
508
509    /// Ends the current line without resetting the interrupted flag.
510    ///
511    /// Removes any trailing spaces or indents and adds a newline only if state
512    /// is not [`LinePosition::StartOfLine`]. State is then set to
513    /// [`LinePosition::StartOfLine`]. Finally, indentation is added. Safe to
514    /// call multiple times in a row.
515    fn end_line(&mut self, stream: &mut TokenStream<PostToken>) {
516        self.trim_last_line(stream);
517        if self.position != LinePosition::StartOfLine {
518            stream.push(PostToken::Newline);
519        }
520        self.position = LinePosition::StartOfLine;
521        self.indent(stream);
522    }
523
524    /// Pushes the current indentation level to the stream.
525    /// This should only be called when the state is
526    /// [`LinePosition::StartOfLine`]. This does not change the state.
527    fn indent(&self, stream: &mut TokenStream<PostToken>) {
528        assert!(self.position == LinePosition::StartOfLine);
529
530        let level = if self.interrupted {
531            self.indent_level + 1
532        } else {
533            self.indent_level
534        };
535
536        for _ in 0..level {
537            stream.push(PostToken::Indent);
538        }
539
540        if self.temp_indent_needed {
541            stream.push(PostToken::TempIndent(self.temp_indent.clone()));
542        }
543    }
544
545    /// Creates a blank line and then indents.
546    fn blank_line(&mut self, stream: &mut TokenStream<PostToken>) {
547        self.trim_whitespace(stream);
548        if !stream.is_empty() {
549            stream.push(PostToken::Newline);
550        }
551        stream.push(PostToken::Newline);
552        self.position = LinePosition::StartOfLine;
553        self.indent(stream);
554    }
555}