Skip to main content

wdl_format/token/
post.rs

1//! Postprocessed tokens.
2//!
3//! Generally speaking, unless you are working with the internals of code
4//! formatting, you're not going to be working with these.
5
6use std::collections::HashMap;
7use std::fmt::Display;
8use std::rc::Rc;
9
10use wdl_ast::DIRECTIVE_COMMENT_PREFIX;
11use wdl_ast::DIRECTIVE_DELIMITER;
12use wdl_ast::DOC_COMMENT_PREFIX;
13use wdl_ast::Directive;
14use wdl_ast::SyntaxKind;
15
16use crate::Comment;
17use crate::Config;
18use crate::NEWLINE;
19use crate::PreToken;
20use crate::SPACE;
21use crate::Token;
22use crate::TokenStream;
23use crate::Trivia;
24use crate::TriviaBlankLineSpacingPolicy;
25
26/// [`PostToken`]s that precede an inline comment.
27const INLINE_COMMENT_PRECEDING_TOKENS: [PostToken; 2] = [PostToken::Space, PostToken::Space];
28
29/// A postprocessed token.
30#[derive(Clone, Eq, PartialEq)]
31pub enum PostToken {
32    /// A space.
33    Space,
34
35    /// A newline.
36    Newline,
37
38    /// One indentation.
39    Indent,
40
41    /// A temporary indent.
42    ///
43    /// This is added after a [`PostToken::Indent`] during the formatting of
44    /// command sections.
45    TempIndent(Rc<String>),
46
47    /// A string literal.
48    Literal(Rc<String>),
49
50    /// A doc comment block.
51    Documentation {
52        /// The current indent level.
53        num_indents: usize,
54        /// The contents of the doc comment block.
55        contents: Rc<String>,
56    },
57
58    /// A directive comment.
59    Directive {
60        /// The current indent level.
61        num_indents: usize,
62        /// The directive.
63        directive: Rc<Directive>,
64    },
65}
66
67impl std::fmt::Debug for PostToken {
68    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
69        match self {
70            Self::Space => write!(f, "<SPACE>"),
71            Self::Newline => write!(f, "<NEWLINE>"),
72            Self::Indent => write!(f, "<INDENT>"),
73            Self::TempIndent(value) => write!(f, "<TEMP_INDENT@{value}>"),
74            Self::Literal(value) => write!(f, "<LITERAL@{value}>"),
75            Self::Directive { directive, .. } => write!(f, "<DIRECTIVE@{directive:?}>"),
76            Self::Documentation { contents, .. } => write!(f, "<DOCUMENTATION@{contents}>"),
77        }
78    }
79}
80
81impl Token for PostToken {
82    /// Returns a displayable version of the token.
83    fn display<'a>(&'a self, config: &'a Config) -> impl Display + 'a {
84        /// A displayable version of a [`PostToken`].
85        struct Display<'a> {
86            /// The token to display.
87            token: &'a PostToken,
88            /// The configuration to use.
89            config: &'a Config,
90        }
91
92        fn write_indents(
93            f: &mut std::fmt::Formatter<'_>,
94            indent: &str,
95            num_indents: usize,
96        ) -> std::fmt::Result {
97            for _ in 0usize..num_indents {
98                write!(f, "{indent}")?;
99            }
100            Ok(())
101        }
102
103        impl std::fmt::Display for Display<'_> {
104            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
105                match self.token {
106                    PostToken::Space => write!(f, "{SPACE}"),
107                    PostToken::Newline => write!(f, "{NEWLINE}"),
108                    PostToken::Indent => {
109                        write!(f, "{indent}", indent = self.config.indent.string())
110                    }
111                    PostToken::TempIndent(value) => write!(f, "{value}"),
112                    PostToken::Literal(value) => write!(f, "{value}"),
113                    PostToken::Documentation {
114                        num_indents,
115                        contents: markdown,
116                    } => {
117                        let prefix = DOC_COMMENT_PREFIX;
118                        write!(f, "{prefix}")?;
119                        let mut lines = markdown.lines().peekable();
120                        while let Some(cur) = lines.next() {
121                            write!(f, "{cur}")?;
122                            if lines.peek().is_some() {
123                                write!(f, "{NEWLINE}")?;
124                                write_indents(f, &self.config.indent.string(), *num_indents)?;
125                                write!(f, "{prefix}")?;
126                            }
127                        }
128                        Ok(())
129                    }
130                    PostToken::Directive {
131                        num_indents,
132                        directive,
133                    } => {
134                        let mut prefix = format!("{} ", DIRECTIVE_COMMENT_PREFIX);
135                        match &**directive {
136                            Directive::Except(exceptions) => {
137                                prefix.push_str("except");
138                                prefix.push_str(DIRECTIVE_DELIMITER);
139                                prefix.push(' ');
140                                let mut rules: Vec<String> = exceptions.iter().cloned().collect();
141                                rules.sort();
142                                write!(f, "{prefix}")?;
143                                if let Some(max) = self.config.max_line_length.get() {
144                                    let indent_width = self.config.indent.num() * num_indents;
145                                    let start_width = indent_width + prefix.len();
146                                    let mut remaining = max.saturating_sub(start_width);
147                                    let mut written_to_cur_line = 0usize;
148                                    for rule in rules {
149                                        let cur_len = rule.len();
150                                        if written_to_cur_line == 0 {
151                                            write!(f, "{rule}")?;
152                                            remaining = remaining.saturating_sub(cur_len);
153                                            written_to_cur_line += 1;
154                                        } else if remaining.saturating_sub(cur_len + 2) > 0 {
155                                            // Current rule fits
156                                            write!(f, ", {rule}")?;
157                                            remaining = remaining.saturating_sub(cur_len + 2);
158                                            written_to_cur_line += 1;
159                                        } else {
160                                            // Current rule does not fit
161                                            write!(f, "{NEWLINE}")?;
162                                            write_indents(
163                                                f,
164                                                &self.config.indent.string(),
165                                                *num_indents,
166                                            )?;
167                                            write!(f, "{prefix}{rule}")?;
168                                            written_to_cur_line = 1;
169                                            remaining = max.saturating_sub(start_width + cur_len);
170                                        }
171                                    }
172                                    Ok(())
173                                } else {
174                                    write!(f, "{rules}", rules = rules.join(", "))
175                                }
176                            }
177                        }
178                    }
179                }
180            }
181        }
182
183        Display {
184            token: self,
185            config,
186        }
187    }
188}
189
190impl PostToken {
191    /// Gets the width of the [`PostToken`].
192    ///
193    /// This is used to determine how much space the token takes up _within a
194    /// single line_ for the purposes of respecting the maximum line length.
195    /// As such, newlines are considered zero-width tokens. Similarly, doc
196    /// comments and directive comments are considered zero-width as they always
197    /// appear on their own lines.
198    fn width(&self, config: &crate::Config) -> usize {
199        match self {
200            Self::Space => SPACE.len(), // 1 character
201            Self::Newline => 0,
202            Self::Indent => config.indent.num(),
203            Self::TempIndent(value) => value.len(),
204            Self::Literal(value) => value.len(),
205            Self::Directive { .. } => 0,
206            Self::Documentation { .. } => 0,
207        }
208    }
209}
210
211impl TokenStream<PostToken> {
212    /// Gets the maximum width of the [`TokenStream`].
213    ///
214    /// This is suitable to call if the stream represents multiple lines.
215    fn max_width(&self, config: &Config) -> usize {
216        let mut max: usize = 0;
217        let mut cur_width: usize = 0;
218        for token in self.iter() {
219            cur_width += token.width(config);
220            if token == &PostToken::Newline {
221                max = max.max(cur_width);
222                cur_width = 0;
223            }
224        }
225        max.max(cur_width)
226    }
227
228    /// Gets the width of the last line of the [`TokenStream`].
229    fn last_line_width(&self, config: &Config) -> usize {
230        let mut width = 0;
231        for token in self.iter().rev() {
232            if token == &PostToken::Newline {
233                break;
234            }
235            width += token.width(config);
236        }
237        width
238    }
239}
240
241/// A line break.
242enum LineBreak {
243    /// A line break that can be inserted before a token.
244    Before,
245    /// A line break that can be inserted after a token.
246    After,
247}
248
249/// Returns whether a token can be line broken.
250fn can_be_line_broken(kind: SyntaxKind) -> Option<LineBreak> {
251    match kind {
252        SyntaxKind::CloseBrace
253        | SyntaxKind::CloseBracket
254        | SyntaxKind::CloseParen
255        | SyntaxKind::CloseHeredoc
256        | SyntaxKind::Assignment
257        | SyntaxKind::Plus
258        | SyntaxKind::Minus
259        | SyntaxKind::Asterisk
260        | SyntaxKind::Slash
261        | SyntaxKind::Percent
262        | SyntaxKind::Exponentiation
263        | SyntaxKind::Equal
264        | SyntaxKind::NotEqual
265        | SyntaxKind::Less
266        | SyntaxKind::LessEqual
267        | SyntaxKind::Greater
268        | SyntaxKind::GreaterEqual
269        | SyntaxKind::LogicalAnd
270        | SyntaxKind::LogicalOr
271        | SyntaxKind::AfterKeyword
272        | SyntaxKind::AsKeyword
273        | SyntaxKind::IfKeyword
274        | SyntaxKind::ElseKeyword
275        | SyntaxKind::ThenKeyword => Some(LineBreak::Before),
276        SyntaxKind::OpenBrace
277        | SyntaxKind::OpenBracket
278        | SyntaxKind::OpenParen
279        | SyntaxKind::OpenHeredoc
280        | SyntaxKind::Colon
281        | SyntaxKind::PlaceholderOpen
282        | SyntaxKind::Comma => Some(LineBreak::After),
283        _ => None,
284    }
285}
286
287/// Gets the corresponding [`SyntaxKind`] that should be line broken in tandem
288/// with the provided [`SyntaxKind`].
289fn tandem_line_break(kind: SyntaxKind) -> Option<SyntaxKind> {
290    match kind {
291        SyntaxKind::OpenBrace => Some(SyntaxKind::CloseBrace),
292        SyntaxKind::OpenBracket => Some(SyntaxKind::CloseBracket),
293        SyntaxKind::OpenParen => Some(SyntaxKind::CloseParen),
294        SyntaxKind::OpenHeredoc => Some(SyntaxKind::CloseHeredoc),
295        SyntaxKind::PlaceholderOpen => Some(SyntaxKind::CloseBrace),
296        _ => None,
297    }
298}
299
300/// Tokens that should have a single indent popped from the
301/// stream if they are being added at the start of a line.
302fn should_deindent(kind: SyntaxKind) -> bool {
303    matches!(
304        kind,
305        SyntaxKind::OpenBrace
306            | SyntaxKind::OpenBracket
307            | SyntaxKind::OpenParen
308            | SyntaxKind::OpenHeredoc
309            | SyntaxKind::CloseBrace
310            | SyntaxKind::CloseBracket
311            | SyntaxKind::CloseParen
312            | SyntaxKind::CloseHeredoc
313    )
314}
315
316/// Tracks a tandem break.
317struct TandemBreak {
318    /// The [`SyntaxKind`] which opened this tandem break.
319    pub open: SyntaxKind,
320    /// The [`SyntaxKind`] which will close this tandem break.
321    pub close: SyntaxKind,
322    /// Token depth since opening the break.
323    ///
324    /// The close break is only added when `depth == 0`.
325    /// This is incremented by one for every token matching `open` after the
326    /// break is initiated. It is decremented by one for every token
327    /// matching `close` after the break is initiated.
328    pub depth: usize,
329}
330
331/// Current position in a line.
332#[derive(Default, Eq, PartialEq)]
333enum LinePosition {
334    /// The start of a line.
335    #[default]
336    StartOfLine,
337
338    /// The middle of a line.
339    MiddleOfLine,
340}
341
342/// A postprocessor of [tokens](PreToken).
343#[derive(Default)]
344pub struct Postprocessor {
345    /// The current position in the line.
346    position: LinePosition,
347
348    /// The current indentation level.
349    indent_level: usize,
350
351    /// Whether the current line has been interrupted by trivia.
352    interrupted: bool,
353
354    /// The current trivial blank line spacing policy.
355    line_spacing_policy: TriviaBlankLineSpacingPolicy,
356
357    /// Temporary indentation to add.
358    temp_indent: Option<Rc<String>>,
359}
360
361impl Postprocessor {
362    /// Runs the postprocessor.
363    pub fn run(&mut self, input: TokenStream<PreToken>, config: &Config) -> TokenStream<PostToken> {
364        let mut output = TokenStream::<PostToken>::default();
365        let mut buffer = TokenStream::<PreToken>::default();
366
367        for token in input {
368            match token {
369                PreToken::LineEnd => {
370                    self.flush(&buffer, &mut output, config);
371                    self.trim_whitespace(&mut output);
372                    output.push(PostToken::Newline);
373
374                    buffer.clear();
375                    self.interrupted = false;
376                    self.position = LinePosition::StartOfLine;
377                }
378                _ => {
379                    buffer.push(token);
380                }
381            }
382        }
383
384        output
385    }
386
387    /// Takes a step of a [`PreToken`] stream and processes the appropriate
388    /// [`PostToken`]s.
389    fn step(
390        &mut self,
391        token: PreToken,
392        next: Option<&PreToken>,
393        stream: &mut TokenStream<PostToken>,
394    ) {
395        if stream.is_empty() {
396            self.interrupted = false;
397            self.position = LinePosition::StartOfLine;
398            self.indent(stream);
399        }
400        match token {
401            PreToken::BlankLine => {
402                self.blank_line(stream);
403            }
404            PreToken::LineEnd => {
405                self.interrupted = false;
406                self.end_line(stream);
407            }
408            PreToken::WordEnd => {
409                stream.trim_end(&PostToken::Space);
410
411                if self.position == LinePosition::MiddleOfLine {
412                    stream.push(PostToken::Space);
413                } else {
414                    // We're at the start of a line, so we don't need to add a
415                    // space.
416                }
417            }
418            PreToken::IndentStart => {
419                self.indent_level += 1;
420                self.end_line(stream);
421            }
422            PreToken::IndentEnd => {
423                self.indent_level = self.indent_level.saturating_sub(1);
424                self.end_line(stream);
425            }
426            PreToken::LineSpacingPolicy(policy) => {
427                self.line_spacing_policy = policy;
428            }
429            PreToken::Literal(value, kind) => {
430                assert!(!kind.is_trivia());
431
432                // This is special handling for inserting the empty string.
433                // We remove any indentation or spaces from the end of the
434                // stream before adding the empty string as a literal.
435                if value.is_empty() {
436                    self.trim_last_line(stream);
437                }
438
439                if self.interrupted
440                    && should_deindent(kind)
441                    && matches!(
442                        stream.0.last(),
443                        Some(&PostToken::Indent) | Some(&PostToken::TempIndent(_))
444                    )
445                {
446                    let popped = stream.0.pop().unwrap();
447                    // We don't actually want to pop the TempIndent token,
448                    // but rather a regular Indent token before the temp indent.
449                    if matches!(popped, PostToken::TempIndent(_)) {
450                        stream.0.pop_if(|t| matches!(t, PostToken::Indent));
451                        // Restore the popped TempIndent
452                        stream.0.push(popped);
453                    }
454                }
455
456                stream.push(PostToken::Literal(value));
457                self.position = LinePosition::MiddleOfLine;
458            }
459            PreToken::Trivia(trivia) => match trivia {
460                Trivia::BlankLine => match self.line_spacing_policy {
461                    TriviaBlankLineSpacingPolicy::Always => {
462                        self.blank_line(stream);
463                    }
464                    TriviaBlankLineSpacingPolicy::RemoveTrailingBlanks => {
465                        if matches!(next, Some(&PreToken::Trivia(Trivia::Comment(_)))) {
466                            self.blank_line(stream);
467                        }
468                    }
469                },
470                Trivia::Comment(comment) => {
471                    match comment {
472                        Comment::Preceding(value) => {
473                            if self.position == LinePosition::MiddleOfLine {
474                                self.interrupted = true;
475                                self.end_line(stream);
476                            }
477                            stream.push(PostToken::Literal(value));
478                        }
479                        Comment::Inline(value) => {
480                            assert!(self.position == LinePosition::MiddleOfLine);
481                            if let Some(next) = next
482                                && next != &PreToken::LineEnd
483                            {
484                                self.interrupted = true;
485                            }
486                            self.trim_last_line(stream);
487                            for token in INLINE_COMMENT_PRECEDING_TOKENS.iter() {
488                                stream.push(token.clone());
489                            }
490                            stream.push(PostToken::Literal(value));
491                        }
492                        Comment::Documentation(contents) => {
493                            if self.position == LinePosition::MiddleOfLine {
494                                self.interrupted = true;
495                                self.end_line(stream);
496                            }
497                            stream.push(PostToken::Documentation {
498                                num_indents: self.indent_level,
499                                contents,
500                            });
501                        }
502                        Comment::Directive(directive) => {
503                            if self.position == LinePosition::MiddleOfLine {
504                                self.interrupted = true;
505                                self.end_line(stream);
506                            }
507                            stream.push(PostToken::Directive {
508                                num_indents: self.indent_level,
509                                directive,
510                            });
511                        }
512                    }
513                    self.position = LinePosition::MiddleOfLine;
514                    self.end_line(stream);
515                }
516            },
517            PreToken::TempIndentStart(bash_indent) => {
518                self.temp_indent = Some(bash_indent);
519            }
520            PreToken::TempIndentEnd => {
521                self.temp_indent = None;
522            }
523        }
524    }
525
526    /// Flushes the `in_stream` buffer to the `out_stream`.
527    fn flush(
528        &mut self,
529        in_stream: &TokenStream<PreToken>,
530        out_stream: &mut TokenStream<PostToken>,
531        config: &Config,
532    ) {
533        assert!(!self.interrupted);
534        assert!(self.position == LinePosition::StartOfLine);
535        let mut post_buffer = TokenStream::<PostToken>::default();
536        let mut pre_buffer = in_stream.iter().peekable();
537        let starting_indent = self.indent_level;
538        let starting_temp_indent = self.temp_indent.clone();
539        while let Some(token) = pre_buffer.next() {
540            let next = pre_buffer.peek().copied();
541            self.step(token.clone(), next, &mut post_buffer);
542        }
543
544        // If all lines are short enough, we can just add the post_buffer to the
545        // out_stream and be done.
546        if config.max_line_length.get().is_none()
547            || post_buffer.max_width(config) <= config.max_line_length.get().unwrap()
548        {
549            out_stream.extend(post_buffer);
550            return;
551        }
552
553        // At least one line in the post_buffer is too long.
554        // We iterate through the in_stream to find potential line breaks,
555        // and then we iterate through the in_stream again to actually insert
556        // them in the proper places.
557
558        let max_length = config.max_line_length.get().unwrap();
559
560        let mut potential_line_breaks: HashMap<usize, SyntaxKind> = HashMap::new();
561        for (i, token) in in_stream.iter().enumerate() {
562            if let PreToken::Literal(_, kind) = token {
563                match can_be_line_broken(*kind) {
564                    Some(LineBreak::Before) => {
565                        potential_line_breaks.insert(i, *kind);
566                    }
567                    Some(LineBreak::After) => {
568                        potential_line_breaks.insert(i + 1, *kind);
569                    }
570                    None => {}
571                }
572            }
573        }
574
575        if potential_line_breaks.is_empty() {
576            // There are no potential line breaks, so we can't do anything.
577            out_stream.extend(post_buffer);
578            return;
579        }
580
581        // Set up the buffers for the second pass.
582        post_buffer.clear();
583        let mut pre_buffer = in_stream.iter().enumerate().peekable();
584
585        // Reset self.
586        self.interrupted = false;
587        self.position = LinePosition::StartOfLine;
588        self.temp_indent = starting_temp_indent;
589        self.indent_level = starting_indent;
590
591        let mut break_stack: Vec<TandemBreak> = Vec::new();
592
593        while let Some((i, token)) = pre_buffer.next() {
594            let mut cache = None;
595            if let Some(break_kind) = potential_line_breaks.get(&i) {
596                // Check if we need a break to match a prior tandem break
597                if let Some(top_of_stack) = break_stack.last_mut() {
598                    if *break_kind == top_of_stack.close {
599                        if top_of_stack.depth > 0 {
600                            top_of_stack.depth -= 1;
601                        } else {
602                            break_stack.pop();
603                            self.indent_level -= 1;
604                            self.end_line(&mut post_buffer);
605                        }
606                    } else if *break_kind == top_of_stack.open {
607                        top_of_stack.depth += 1;
608                    }
609                }
610                // Cache the current state so we can revert to it if
611                // necessary.
612                cache = Some(post_buffer.clone());
613            }
614
615            self.step(
616                token.clone(),
617                pre_buffer.peek().map(|(_, v)| &**v),
618                &mut post_buffer,
619            );
620
621            if let Some(cache) = cache
622                && post_buffer.last_line_width(config) > max_length
623            {
624                // The line is too long after the next step. Revert to the
625                // cached state and insert a line break.
626                post_buffer = cache;
627                self.interrupted = true;
628                self.end_line(&mut post_buffer);
629                self.step(
630                    token.clone(),
631                    pre_buffer.peek().map(|(_, v)| &**v),
632                    &mut post_buffer,
633                );
634
635                // Check if this introduces a tandem break
636                // SAFETY: if cache is Some(_) this step must have a potential line break
637                let break_kind = potential_line_breaks.get(&i).unwrap();
638                if let Some(also_break_on) = tandem_line_break(*break_kind) {
639                    let tandem_break = TandemBreak {
640                        open: *break_kind,
641                        close: also_break_on,
642                        depth: 0,
643                    };
644                    break_stack.push(tandem_break);
645                    self.indent_level += 1;
646                }
647            }
648        }
649
650        // reduce indent for breaks never added
651        for _ in break_stack {
652            self.indent_level = self.indent_level.saturating_sub(1);
653        }
654        out_stream.extend(post_buffer);
655    }
656
657    /// Trims any and all whitespace from the end of the stream.
658    fn trim_whitespace(&self, stream: &mut TokenStream<PostToken>) {
659        stream.trim_while(|token| {
660            matches!(
661                token,
662                PostToken::Space
663                    | PostToken::Newline
664                    | PostToken::Indent
665                    | PostToken::TempIndent(_)
666            )
667        });
668    }
669
670    /// Trims spaces and indents (and not newlines) from the end of the stream.
671    fn trim_last_line(&self, stream: &mut TokenStream<PostToken>) {
672        stream.trim_while(|token| {
673            matches!(
674                token,
675                PostToken::Space | PostToken::Indent | PostToken::TempIndent(_)
676            )
677        });
678    }
679
680    /// Ends the current line without resetting the interrupted flag.
681    ///
682    /// Removes any trailing spaces or indents and adds a newline only if state
683    /// is not [`LinePosition::StartOfLine`]. State is then set to
684    /// [`LinePosition::StartOfLine`]. Finally, indentation is added. Safe to
685    /// call multiple times in a row.
686    fn end_line(&mut self, stream: &mut TokenStream<PostToken>) {
687        self.trim_last_line(stream);
688        if self.position != LinePosition::StartOfLine {
689            stream.push(PostToken::Newline);
690        }
691        self.position = LinePosition::StartOfLine;
692        self.indent(stream);
693    }
694
695    /// Pushes the current indentation level to the stream.
696    ///
697    /// This should only be called when the state is
698    /// [`LinePosition::StartOfLine`]. This does not change the state
699    /// and is safe to call multiple times in a row.
700    fn indent(&self, stream: &mut TokenStream<PostToken>) {
701        assert!(self.position == LinePosition::StartOfLine);
702
703        self.trim_last_line(stream);
704
705        let level = if self.interrupted {
706            self.indent_level + 1
707        } else {
708            self.indent_level
709        };
710
711        for _ in 0..level {
712            stream.push(PostToken::Indent);
713        }
714
715        if let Some(ref temp_indent) = self.temp_indent {
716            stream.push(PostToken::TempIndent(temp_indent.clone()));
717        }
718    }
719
720    /// Creates a blank line and then indents.
721    fn blank_line(&mut self, stream: &mut TokenStream<PostToken>) {
722        self.trim_whitespace(stream);
723        if !stream.is_empty() {
724            stream.push(PostToken::Newline);
725        }
726        stream.push(PostToken::Newline);
727        self.position = LinePosition::StartOfLine;
728        self.indent(stream);
729    }
730}