wdl_format/token/
post.rs

1//! Postprocessed tokens.
2//!
3//! Generally speaking, unless you are working with the internals of code
4//! formatting, you're not going to be working with these.
5
6use std::collections::HashSet;
7use std::fmt::Display;
8use std::rc::Rc;
9
10use wdl_ast::SyntaxKind;
11
12use crate::Comment;
13use crate::Config;
14use crate::NEWLINE;
15use crate::PreToken;
16use crate::SPACE;
17use crate::Token;
18use crate::TokenStream;
19use crate::Trivia;
20use crate::TriviaBlankLineSpacingPolicy;
21
22/// [`PostToken`]s that precede an inline comment.
23const INLINE_COMMENT_PRECEDING_TOKENS: [PostToken; 2] = [PostToken::Space, PostToken::Space];
24
25/// A postprocessed token.
26#[derive(Clone, Eq, PartialEq)]
27pub enum PostToken {
28    /// A space.
29    Space,
30
31    /// A newline.
32    Newline,
33
34    /// One indentation.
35    Indent,
36
37    /// A temporary indent.
38    ///
39    /// This is added after a [`PostToken::Indent`] during the formatting of
40    /// command sections.
41    TempIndent(Rc<String>),
42
43    /// A string literal.
44    Literal(Rc<String>),
45}
46
47impl std::fmt::Debug for PostToken {
48    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
49        match self {
50            Self::Space => write!(f, "<SPACE>"),
51            Self::Newline => write!(f, "<NEWLINE>"),
52            Self::Indent => write!(f, "<INDENT>"),
53            Self::TempIndent(value) => write!(f, "<TEMP_INDENT@{value}>"),
54            Self::Literal(value) => write!(f, "<LITERAL@{value}>"),
55        }
56    }
57}
58
59impl Token for PostToken {
60    /// Returns a displayable version of the token.
61    fn display<'a>(&'a self, config: &'a Config) -> impl Display + 'a {
62        /// A displayable version of a [`PostToken`].
63        struct Display<'a> {
64            /// The token to display.
65            token: &'a PostToken,
66            /// The configuration to use.
67            config: &'a Config,
68        }
69
70        impl std::fmt::Display for Display<'_> {
71            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
72                match self.token {
73                    PostToken::Space => write!(f, "{SPACE}"),
74                    PostToken::Newline => write!(f, "{NEWLINE}"),
75                    PostToken::Indent => {
76                        write!(f, "{indent}", indent = self.config.indent().string())
77                    }
78                    PostToken::TempIndent(value) => write!(f, "{value}"),
79                    PostToken::Literal(value) => write!(f, "{value}"),
80                }
81            }
82        }
83
84        Display {
85            token: self,
86            config,
87        }
88    }
89}
90
91impl PostToken {
92    /// Gets the width of the [`PostToken`].
93    ///
94    /// This is used to determine how much space the token takes up _within a
95    /// single line_ for the purposes of respecting the maximum line length.
96    /// As such, newlines are considered zero-width tokens.
97    fn width(&self, config: &crate::Config) -> usize {
98        match self {
99            Self::Space => SPACE.len(), // 1 character
100            Self::Newline => 0,
101            Self::Indent => config.indent().num(),
102            Self::TempIndent(value) => value.len(),
103            Self::Literal(value) => value.len(),
104        }
105    }
106}
107
108impl TokenStream<PostToken> {
109    /// Gets the maximum width of the [`TokenStream`].
110    ///
111    /// This is suitable to call if the stream represents multiple lines.
112    fn max_width(&self, config: &Config) -> usize {
113        let mut max: usize = 0;
114        let mut cur_width: usize = 0;
115        for token in self.iter() {
116            cur_width += token.width(config);
117            if token == &PostToken::Newline {
118                max = max.max(cur_width);
119                cur_width = 0;
120            }
121        }
122        max.max(cur_width)
123    }
124
125    /// Gets the width of the last line of the [`TokenStream`].
126    fn last_line_width(&self, config: &Config) -> usize {
127        let mut width = 0;
128        for token in self.iter().rev() {
129            if token == &PostToken::Newline {
130                break;
131            }
132            width += token.width(config);
133        }
134        width
135    }
136}
137
138/// A line break.
139enum LineBreak {
140    /// A line break that can be inserted before a token.
141    Before,
142    /// A line break that can be inserted after a token.
143    After,
144}
145
146/// Returns whether a token can be line broken.
147fn can_be_line_broken(kind: SyntaxKind) -> Option<LineBreak> {
148    match kind {
149        SyntaxKind::CloseBrace
150        | SyntaxKind::CloseBracket
151        | SyntaxKind::CloseParen
152        | SyntaxKind::CloseHeredoc
153        | SyntaxKind::Assignment
154        | SyntaxKind::Plus
155        | SyntaxKind::Minus
156        | SyntaxKind::Asterisk
157        | SyntaxKind::Slash
158        | SyntaxKind::Percent
159        | SyntaxKind::Exponentiation
160        | SyntaxKind::Equal
161        | SyntaxKind::NotEqual
162        | SyntaxKind::Less
163        | SyntaxKind::LessEqual
164        | SyntaxKind::Greater
165        | SyntaxKind::GreaterEqual
166        | SyntaxKind::LogicalAnd
167        | SyntaxKind::LogicalOr
168        | SyntaxKind::AfterKeyword
169        | SyntaxKind::AsKeyword
170        | SyntaxKind::IfKeyword
171        | SyntaxKind::ElseKeyword
172        | SyntaxKind::ThenKeyword => Some(LineBreak::Before),
173        SyntaxKind::OpenBrace
174        | SyntaxKind::OpenBracket
175        | SyntaxKind::OpenParen
176        | SyntaxKind::OpenHeredoc
177        | SyntaxKind::Colon
178        | SyntaxKind::PlaceholderOpen
179        | SyntaxKind::Comma => Some(LineBreak::After),
180        _ => None,
181    }
182}
183
184/// Current position in a line.
185#[derive(Default, Eq, PartialEq)]
186enum LinePosition {
187    /// The start of a line.
188    #[default]
189    StartOfLine,
190
191    /// The middle of a line.
192    MiddleOfLine,
193}
194
195/// A postprocessor of [tokens](PreToken).
196#[derive(Default)]
197pub struct Postprocessor {
198    /// The current position in the line.
199    position: LinePosition,
200
201    /// The current indentation level.
202    indent_level: usize,
203
204    /// Whether the current line has been interrupted by trivia.
205    interrupted: bool,
206
207    /// The current trivial blank line spacing policy.
208    line_spacing_policy: TriviaBlankLineSpacingPolicy,
209
210    /// Whether temporary indentation is needed.
211    temp_indent_needed: bool,
212
213    /// Temporary indentation to add.
214    temp_indent: Rc<String>,
215}
216
217impl Postprocessor {
218    /// Runs the postprocessor.
219    pub fn run(&mut self, input: TokenStream<PreToken>, config: &Config) -> TokenStream<PostToken> {
220        let mut output = TokenStream::<PostToken>::default();
221        let mut buffer = TokenStream::<PreToken>::default();
222
223        for token in input {
224            match token {
225                PreToken::LineEnd => {
226                    self.flush(&buffer, &mut output, config);
227                    self.trim_whitespace(&mut output);
228                    output.push(PostToken::Newline);
229
230                    buffer.clear();
231                    self.interrupted = false;
232                    self.position = LinePosition::StartOfLine;
233                }
234                _ => {
235                    buffer.push(token);
236                }
237            }
238        }
239
240        output
241    }
242
243    /// Takes a step of a [`PreToken`] stream and processes the appropriate
244    /// [`PostToken`]s.
245    fn step(
246        &mut self,
247        token: PreToken,
248        next: Option<&PreToken>,
249        stream: &mut TokenStream<PostToken>,
250    ) {
251        if stream.is_empty() {
252            self.interrupted = false;
253            self.position = LinePosition::StartOfLine;
254            self.indent(stream);
255        }
256        match token {
257            PreToken::BlankLine => {
258                self.blank_line(stream);
259            }
260            PreToken::LineEnd => {
261                self.interrupted = false;
262                self.end_line(stream);
263            }
264            PreToken::WordEnd => {
265                stream.trim_end(&PostToken::Space);
266
267                if self.position == LinePosition::MiddleOfLine {
268                    stream.push(PostToken::Space);
269                } else {
270                    // We're at the start of a line, so we don't need to add a
271                    // space.
272                }
273            }
274            PreToken::IndentStart => {
275                self.indent_level += 1;
276                self.end_line(stream);
277            }
278            PreToken::IndentEnd => {
279                self.indent_level = self.indent_level.saturating_sub(1);
280                self.end_line(stream);
281            }
282            PreToken::LineSpacingPolicy(policy) => {
283                self.line_spacing_policy = policy;
284            }
285            PreToken::Literal(value, kind) => {
286                assert!(!kind.is_trivia());
287
288                // This is special handling for inserting the empty string.
289                // We remove any indentation or spaces from the end of the
290                // stream and then add the empty string as a literal.
291                // Then we set the position to [`LinePosition::MiddleOfLine`]
292                // in order to trigger a newline being added before the next
293                // token.
294                if value.is_empty() {
295                    self.trim_last_line(stream);
296                    stream.push(PostToken::Literal(value));
297                    self.position = LinePosition::MiddleOfLine;
298                    return;
299                }
300
301                if self.interrupted
302                    && matches!(
303                        kind,
304                        SyntaxKind::OpenBrace
305                            | SyntaxKind::OpenBracket
306                            | SyntaxKind::OpenParen
307                            | SyntaxKind::OpenHeredoc
308                    )
309                    && matches!(
310                        stream.0.last(),
311                        Some(&PostToken::Indent) | Some(&PostToken::TempIndent(_))
312                    )
313                {
314                    stream.0.pop();
315                }
316
317                if kind == SyntaxKind::LiteralCommandText {
318                    self.temp_indent = Rc::new(
319                        value
320                            .chars()
321                            .take_while(|c| matches!(c.to_string().as_str(), SPACE | crate::TAB))
322                            .collect(),
323                    );
324                }
325
326                stream.push(PostToken::Literal(value));
327                self.position = LinePosition::MiddleOfLine;
328            }
329            PreToken::Trivia(trivia) => match trivia {
330                Trivia::BlankLine => match self.line_spacing_policy {
331                    TriviaBlankLineSpacingPolicy::Always => {
332                        self.blank_line(stream);
333                    }
334                    TriviaBlankLineSpacingPolicy::RemoveTrailingBlanks => {
335                        if matches!(next, Some(&PreToken::Trivia(Trivia::Comment(_)))) {
336                            self.blank_line(stream);
337                        }
338                    }
339                },
340                Trivia::Comment(comment) => {
341                    match comment {
342                        Comment::Preceding(value) => {
343                            if !matches!(
344                                stream.0.last(),
345                                Some(&PostToken::Newline)
346                                    | Some(&PostToken::Indent)
347                                    | Some(&PostToken::TempIndent(_))
348                                    | None
349                            ) {
350                                self.interrupted = true;
351                            }
352                            self.end_line(stream);
353                            stream.push(PostToken::Literal(value));
354                            self.position = LinePosition::MiddleOfLine;
355                        }
356                        Comment::Inline(value) => {
357                            assert!(self.position == LinePosition::MiddleOfLine);
358                            if let Some(next) = next
359                                && next != &PreToken::LineEnd
360                            {
361                                self.interrupted = true;
362                            }
363                            self.trim_last_line(stream);
364                            for token in INLINE_COMMENT_PRECEDING_TOKENS.iter() {
365                                stream.push(token.clone());
366                            }
367                            stream.push(PostToken::Literal(value));
368                        }
369                    }
370                    self.end_line(stream);
371                }
372            },
373            PreToken::TempIndentStart => {
374                self.temp_indent_needed = true;
375            }
376            PreToken::TempIndentEnd => {
377                self.temp_indent_needed = false;
378            }
379        }
380    }
381
382    /// Flushes the `in_stream` buffer to the `out_stream`.
383    fn flush(
384        &mut self,
385        in_stream: &TokenStream<PreToken>,
386        out_stream: &mut TokenStream<PostToken>,
387        config: &Config,
388    ) {
389        assert!(!self.interrupted);
390        assert!(self.position == LinePosition::StartOfLine);
391        let mut post_buffer = TokenStream::<PostToken>::default();
392        let mut pre_buffer = in_stream.iter().peekable();
393        let starting_indent = self.indent_level;
394        while let Some(token) = pre_buffer.next() {
395            let next = pre_buffer.peek().copied();
396            self.step(token.clone(), next, &mut post_buffer);
397        }
398
399        // If all lines are short enough, we can just add the post_buffer to the
400        // out_stream and be done.
401        if config.max_line_length().is_none()
402            || post_buffer.max_width(config) <= config.max_line_length().unwrap()
403        {
404            out_stream.extend(post_buffer);
405            return;
406        }
407
408        // At least one line in the post_buffer is too long.
409        // We iterate through the in_stream to find potential line breaks,
410        // and then we iterate through the in_stream again to actually insert
411        // them in the proper places.
412
413        let max_length = config.max_line_length().unwrap();
414
415        let mut potential_line_breaks: HashSet<usize> = HashSet::new();
416        for (i, token) in in_stream.iter().enumerate() {
417            if let PreToken::Literal(_, kind) = token {
418                match can_be_line_broken(*kind) {
419                    Some(LineBreak::Before) => {
420                        potential_line_breaks.insert(i);
421                    }
422                    Some(LineBreak::After) => {
423                        potential_line_breaks.insert(i + 1);
424                    }
425                    None => {}
426                }
427            }
428        }
429
430        if potential_line_breaks.is_empty() {
431            // There are no potential line breaks, so we can't do anything.
432            out_stream.extend(post_buffer);
433            return;
434        }
435
436        // Set up the buffers for the second pass.
437        post_buffer.clear();
438        let mut pre_buffer = in_stream.iter().enumerate().peekable();
439
440        // Reset the indent level.
441        self.indent_level = starting_indent;
442
443        while let Some((i, token)) = pre_buffer.next() {
444            let mut cache = None;
445            if potential_line_breaks.contains(&i) {
446                if post_buffer.last_line_width(config) > max_length {
447                    // The line is already too long, and taking the next step
448                    // can only make it worse. Insert a line break here.
449                    self.interrupted = true;
450                    self.end_line(&mut post_buffer);
451                } else {
452                    // The line is not too long yet, but it might be after the
453                    // next step. Cache the current state so we can revert to it
454                    // if necessary.
455                    cache = Some(post_buffer.clone());
456                }
457            }
458            self.step(
459                token.clone(),
460                pre_buffer.peek().map(|(_, v)| &**v),
461                &mut post_buffer,
462            );
463
464            if let Some(cache) = cache
465                && post_buffer.last_line_width(config) > max_length
466            {
467                // The line is too long after the next step. Revert to the
468                // cached state and insert a line break.
469                post_buffer = cache;
470                self.interrupted = true;
471                self.end_line(&mut post_buffer);
472                self.step(
473                    token.clone(),
474                    pre_buffer.peek().map(|(_, v)| &**v),
475                    &mut post_buffer,
476                );
477            }
478        }
479
480        out_stream.extend(post_buffer);
481    }
482
483    /// Trims any and all whitespace from the end of the stream.
484    fn trim_whitespace(&self, stream: &mut TokenStream<PostToken>) {
485        stream.trim_while(|token| {
486            matches!(
487                token,
488                PostToken::Space
489                    | PostToken::Newline
490                    | PostToken::Indent
491                    | PostToken::TempIndent(_)
492            )
493        });
494    }
495
496    /// Trims spaces and indents (and not newlines) from the end of the stream.
497    fn trim_last_line(&mut self, stream: &mut TokenStream<PostToken>) {
498        stream.trim_while(|token| {
499            matches!(
500                token,
501                PostToken::Space | PostToken::Indent | PostToken::TempIndent(_)
502            )
503        });
504    }
505
506    /// Ends the current line without resetting the interrupted flag.
507    ///
508    /// Removes any trailing spaces or indents and adds a newline only if state
509    /// is not [`LinePosition::StartOfLine`]. State is then set to
510    /// [`LinePosition::StartOfLine`]. Finally, indentation is added. Safe to
511    /// call multiple times in a row.
512    fn end_line(&mut self, stream: &mut TokenStream<PostToken>) {
513        self.trim_last_line(stream);
514        if self.position != LinePosition::StartOfLine {
515            stream.push(PostToken::Newline);
516        }
517        self.position = LinePosition::StartOfLine;
518        self.indent(stream);
519    }
520
521    /// Pushes the current indentation level to the stream.
522    /// This should only be called when the state is
523    /// [`LinePosition::StartOfLine`]. This does not change the state.
524    fn indent(&self, stream: &mut TokenStream<PostToken>) {
525        assert!(self.position == LinePosition::StartOfLine);
526
527        let level = if self.interrupted {
528            self.indent_level + 1
529        } else {
530            self.indent_level
531        };
532
533        for _ in 0..level {
534            stream.push(PostToken::Indent);
535        }
536
537        if self.temp_indent_needed {
538            stream.push(PostToken::TempIndent(self.temp_indent.clone()));
539        }
540    }
541
542    /// Creates a blank line and then indents.
543    fn blank_line(&mut self, stream: &mut TokenStream<PostToken>) {
544        self.trim_whitespace(stream);
545        if !stream.is_empty() {
546            stream.push(PostToken::Newline);
547        }
548        stream.push(PostToken::Newline);
549        self.position = LinePosition::StartOfLine;
550        self.indent(stream);
551    }
552}