Skip to main content

wdl_format/token/
post.rs

1//! Postprocessed tokens.
2//!
3//! Generally speaking, unless you are working with the internals of code
4//! formatting, you're not going to be working with these.
5
6use std::collections::HashMap;
7use std::fmt::Display;
8use std::rc::Rc;
9
10use wdl_ast::DIRECTIVE_COMMENT_PREFIX;
11use wdl_ast::DIRECTIVE_DELIMITER;
12use wdl_ast::DOC_COMMENT_PREFIX;
13use wdl_ast::Directive;
14use wdl_ast::SyntaxKind;
15
16use crate::Comment;
17use crate::Config;
18use crate::PreToken;
19use crate::SPACE;
20use crate::Token;
21use crate::TokenStream;
22use crate::Trivia;
23use crate::TriviaBlankLineSpacingPolicy;
24
25/// [`PostToken`]s that precede an inline comment.
26const INLINE_COMMENT_PRECEDING_TOKENS: [PostToken; 2] = [PostToken::Space, PostToken::Space];
27
28/// A postprocessed token.
29#[derive(Clone, Eq, PartialEq)]
30pub enum PostToken {
31    /// A space.
32    Space,
33
34    /// A newline.
35    Newline,
36
37    /// One indentation.
38    Indent,
39
40    /// A temporary indent.
41    ///
42    /// This is added after a [`PostToken::Indent`] during the formatting of
43    /// command sections.
44    TempIndent(Rc<String>),
45
46    /// A string literal.
47    Literal(Rc<String>),
48
49    /// A doc comment block.
50    Documentation {
51        /// The current indent level.
52        num_indents: usize,
53        /// The contents of the doc comment block.
54        contents: Rc<String>,
55    },
56
57    /// A directive comment.
58    Directive {
59        /// The current indent level.
60        num_indents: usize,
61        /// The directive.
62        directive: Rc<Directive>,
63    },
64}
65
66impl std::fmt::Debug for PostToken {
67    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
68        match self {
69            Self::Space => write!(f, "<SPACE>"),
70            Self::Newline => write!(f, "<NEWLINE>"),
71            Self::Indent => write!(f, "<INDENT>"),
72            Self::TempIndent(value) => write!(f, "<TEMP_INDENT@{value}>"),
73            Self::Literal(value) => write!(f, "<LITERAL@{value}>"),
74            Self::Directive { directive, .. } => write!(f, "<DIRECTIVE@{directive:?}>"),
75            Self::Documentation { contents, .. } => write!(f, "<DOCUMENTATION@{contents}>"),
76        }
77    }
78}
79
80impl Token for PostToken {
81    /// Returns a displayable version of the token.
82    fn display<'a>(&'a self, config: &'a Config) -> impl Display + 'a {
83        /// A displayable version of a [`PostToken`].
84        struct Display<'a> {
85            /// The token to display.
86            token: &'a PostToken,
87            /// The configuration to use.
88            config: &'a Config,
89        }
90
91        fn write_indents(
92            f: &mut std::fmt::Formatter<'_>,
93            indent: &str,
94            num_indents: usize,
95        ) -> std::fmt::Result {
96            for _ in 0usize..num_indents {
97                write!(f, "{indent}")?;
98            }
99            Ok(())
100        }
101
102        impl std::fmt::Display for Display<'_> {
103            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
104                match self.token {
105                    PostToken::Space => write!(f, "{SPACE}"),
106                    PostToken::Newline => write!(f, "{}", self.config.newline_style.as_str()),
107                    PostToken::Indent => {
108                        write!(f, "{indent}", indent = self.config.indent.string())
109                    }
110                    PostToken::TempIndent(value) => write!(f, "{value}"),
111                    PostToken::Literal(value) => write!(f, "{value}"),
112                    PostToken::Documentation {
113                        num_indents,
114                        contents: markdown,
115                    } => {
116                        let prefix = DOC_COMMENT_PREFIX;
117                        write!(f, "{prefix}")?;
118                        let mut lines = markdown.lines().peekable();
119                        while let Some(cur) = lines.next() {
120                            write!(f, "{cur}")?;
121                            if lines.peek().is_some() {
122                                write!(f, "{}", self.config.newline_style.as_str())?;
123                                write_indents(f, &self.config.indent.string(), *num_indents)?;
124                                write!(f, "{prefix}")?;
125                            }
126                        }
127                        Ok(())
128                    }
129                    PostToken::Directive {
130                        num_indents,
131                        directive,
132                    } => {
133                        let mut prefix = format!("{} ", DIRECTIVE_COMMENT_PREFIX);
134                        match &**directive {
135                            Directive::Except(exceptions) => {
136                                prefix.push_str("except");
137                                prefix.push_str(DIRECTIVE_DELIMITER);
138                                prefix.push(' ');
139                                let mut rules: Vec<String> = exceptions.iter().cloned().collect();
140                                rules.sort();
141                                write!(f, "{prefix}")?;
142                                if let Some(max) = self.config.max_line_length.get() {
143                                    let indent_width = self.config.indent.num() * num_indents;
144                                    let start_width = indent_width + prefix.len();
145                                    let mut remaining = max.saturating_sub(start_width);
146                                    let mut written_to_cur_line = 0usize;
147                                    for rule in rules {
148                                        let cur_len = rule.len();
149                                        if written_to_cur_line == 0 {
150                                            write!(f, "{rule}")?;
151                                            remaining = remaining.saturating_sub(cur_len);
152                                            written_to_cur_line += 1;
153                                        } else if remaining.saturating_sub(cur_len + 2) > 0 {
154                                            // NOTE: the `+ 2` accounts for
155                                            // the `", "` separator written
156                                            // before each subsequent rule.
157                                            write!(f, ", {rule}")?;
158                                            remaining = remaining.saturating_sub(cur_len + 2);
159                                            written_to_cur_line += 1;
160                                        } else {
161                                            // Current rule does not fit
162                                            write!(f, "{}", self.config.newline_style.as_str())?;
163                                            write_indents(
164                                                f,
165                                                &self.config.indent.string(),
166                                                *num_indents,
167                                            )?;
168                                            write!(f, "{prefix}{rule}")?;
169                                            written_to_cur_line = 1;
170                                            remaining = max.saturating_sub(start_width + cur_len);
171                                        }
172                                    }
173                                    Ok(())
174                                } else {
175                                    write!(f, "{rules}", rules = rules.join(", "))
176                                }
177                            }
178                        }
179                    }
180                }
181            }
182        }
183
184        Display {
185            token: self,
186            config,
187        }
188    }
189}
190
191impl PostToken {
192    /// Gets the width of the [`PostToken`].
193    ///
194    /// This is used to determine how much space the token takes up _within a
195    /// single line_ for the purposes of respecting the maximum line length.
196    /// As such, newlines are considered zero-width tokens. Similarly, doc
197    /// comments and directive comments are considered zero-width as they always
198    /// appear on their own lines.
199    fn width(&self, config: &crate::Config) -> usize {
200        match self {
201            Self::Space => SPACE.len(), // 1 character
202            Self::Newline => 0,
203            Self::Indent => config.indent.num(),
204            Self::TempIndent(value) => value.len(),
205            Self::Literal(value) => value.len(),
206            Self::Directive { .. } => 0,
207            Self::Documentation { .. } => 0,
208        }
209    }
210}
211
212impl TokenStream<PostToken> {
213    /// Gets the maximum width of the [`TokenStream`].
214    ///
215    /// This is suitable to call if the stream represents multiple lines.
216    fn max_width(&self, config: &Config) -> usize {
217        let mut max: usize = 0;
218        let mut cur_width: usize = 0;
219        for token in self.iter() {
220            cur_width += token.width(config);
221            if token == &PostToken::Newline {
222                max = max.max(cur_width);
223                cur_width = 0;
224            }
225        }
226        max.max(cur_width)
227    }
228
229    /// Gets the width of the last line of the [`TokenStream`].
230    fn last_line_width(&self, config: &Config) -> usize {
231        let mut width = 0;
232        for token in self.iter().rev() {
233            if token == &PostToken::Newline {
234                break;
235            }
236            width += token.width(config);
237        }
238        width
239    }
240}
241
242/// A line break.
243enum LineBreak {
244    /// A line break that can be inserted before a token.
245    Before,
246    /// A line break that can be inserted after a token.
247    After,
248}
249
250/// Returns whether a token can be line broken.
251fn can_be_line_broken(kind: SyntaxKind) -> Option<LineBreak> {
252    match kind {
253        SyntaxKind::CloseBrace
254        | SyntaxKind::CloseBracket
255        | SyntaxKind::CloseParen
256        | SyntaxKind::CloseHeredoc
257        | SyntaxKind::Assignment
258        | SyntaxKind::Plus
259        | SyntaxKind::Minus
260        | SyntaxKind::Asterisk
261        | SyntaxKind::Slash
262        | SyntaxKind::Percent
263        | SyntaxKind::Exponentiation
264        | SyntaxKind::Equal
265        | SyntaxKind::NotEqual
266        | SyntaxKind::Less
267        | SyntaxKind::LessEqual
268        | SyntaxKind::Greater
269        | SyntaxKind::GreaterEqual
270        | SyntaxKind::LogicalAnd
271        | SyntaxKind::LogicalOr
272        | SyntaxKind::AfterKeyword
273        | SyntaxKind::AsKeyword
274        | SyntaxKind::IfKeyword
275        | SyntaxKind::ElseKeyword
276        | SyntaxKind::ThenKeyword => Some(LineBreak::Before),
277        SyntaxKind::OpenBrace
278        | SyntaxKind::OpenBracket
279        | SyntaxKind::OpenParen
280        | SyntaxKind::OpenHeredoc
281        | SyntaxKind::Colon
282        | SyntaxKind::PlaceholderOpen
283        | SyntaxKind::Comma => Some(LineBreak::After),
284        _ => None,
285    }
286}
287
288/// Gets the corresponding [`SyntaxKind`] that should be line broken in tandem
289/// with the provided [`SyntaxKind`].
290fn tandem_line_break(kind: SyntaxKind) -> Option<SyntaxKind> {
291    match kind {
292        SyntaxKind::OpenBrace => Some(SyntaxKind::CloseBrace),
293        SyntaxKind::OpenBracket => Some(SyntaxKind::CloseBracket),
294        SyntaxKind::OpenParen => Some(SyntaxKind::CloseParen),
295        SyntaxKind::OpenHeredoc => Some(SyntaxKind::CloseHeredoc),
296        SyntaxKind::PlaceholderOpen => Some(SyntaxKind::CloseBrace),
297        _ => None,
298    }
299}
300
301/// Tokens that should have a single indent popped from the
302/// stream if they are being added at the start of a line.
303fn should_deindent(kind: SyntaxKind) -> bool {
304    matches!(
305        kind,
306        SyntaxKind::OpenBrace
307            | SyntaxKind::OpenBracket
308            | SyntaxKind::OpenParen
309            | SyntaxKind::OpenHeredoc
310            | SyntaxKind::CloseBrace
311            | SyntaxKind::CloseBracket
312            | SyntaxKind::CloseParen
313            | SyntaxKind::CloseHeredoc
314    )
315}
316
317/// Tracks a tandem break.
318struct TandemBreak {
319    /// The [`SyntaxKind`] which opened this tandem break.
320    pub open: SyntaxKind,
321    /// The [`SyntaxKind`] which will close this tandem break.
322    pub close: SyntaxKind,
323    /// Token depth since opening the break.
324    ///
325    /// The close break is only added when `depth == 0`.
326    /// This is incremented by one for every token matching `open` after the
327    /// break is initiated. It is decremented by one for every token
328    /// matching `close` after the break is initiated.
329    pub depth: usize,
330}
331
332/// Current position in a line.
333#[derive(Default, Eq, PartialEq)]
334enum LinePosition {
335    /// The start of a line.
336    #[default]
337    StartOfLine,
338
339    /// The middle of a line.
340    MiddleOfLine,
341}
342
343/// A postprocessor of [tokens](PreToken).
344#[derive(Default)]
345pub struct Postprocessor {
346    /// The current position in the line.
347    position: LinePosition,
348
349    /// The current indentation level.
350    indent_level: usize,
351
352    /// Whether the current line has been interrupted by trivia.
353    interrupted: bool,
354
355    /// The current trivial blank line spacing policy.
356    line_spacing_policy: TriviaBlankLineSpacingPolicy,
357
358    /// Temporary indentation to add.
359    temp_indent: Option<Rc<String>>,
360}
361
362impl Postprocessor {
363    /// Runs the postprocessor.
364    pub fn run(&mut self, input: TokenStream<PreToken>, config: &Config) -> TokenStream<PostToken> {
365        let mut output = TokenStream::<PostToken>::default();
366        let mut buffer = TokenStream::<PreToken>::default();
367
368        for token in input {
369            match token {
370                PreToken::LineEnd => {
371                    self.flush(&buffer, &mut output, config);
372                    self.trim_whitespace(&mut output);
373                    output.push(PostToken::Newline);
374
375                    buffer.clear();
376                    self.interrupted = false;
377                    self.position = LinePosition::StartOfLine;
378                }
379                _ => {
380                    buffer.push(token);
381                }
382            }
383        }
384
385        output
386    }
387
388    /// Takes a step of a [`PreToken`] stream and processes the appropriate
389    /// [`PostToken`]s.
390    fn step(
391        &mut self,
392        token: PreToken,
393        next: Option<&PreToken>,
394        stream: &mut TokenStream<PostToken>,
395    ) {
396        if stream.is_empty() {
397            self.interrupted = false;
398            self.position = LinePosition::StartOfLine;
399            self.indent(stream);
400        }
401        match token {
402            PreToken::BlankLine => {
403                self.blank_line(stream);
404            }
405            PreToken::LineEnd => {
406                self.interrupted = false;
407                self.end_line(stream);
408            }
409            PreToken::WordEnd => {
410                stream.trim_end(&PostToken::Space);
411
412                if self.position == LinePosition::MiddleOfLine {
413                    stream.push(PostToken::Space);
414                } else {
415                    // We're at the start of a line, so we don't need to add a
416                    // space.
417                }
418            }
419            PreToken::IndentStart => {
420                self.indent_level += 1;
421                self.end_line(stream);
422            }
423            PreToken::IndentEnd => {
424                self.indent_level = self.indent_level.saturating_sub(1);
425                self.end_line(stream);
426            }
427            PreToken::LineSpacingPolicy(policy) => {
428                self.line_spacing_policy = policy;
429            }
430            PreToken::Literal(value, kind) => {
431                assert!(!kind.is_trivia());
432
433                // This is special handling for inserting the empty string.
434                // We remove any indentation or spaces from the end of the
435                // stream before adding the empty string as a literal.
436                if value.is_empty() {
437                    self.trim_last_line(stream);
438                }
439
440                if self.interrupted
441                    && should_deindent(kind)
442                    && matches!(
443                        stream.0.last(),
444                        Some(&PostToken::Indent) | Some(&PostToken::TempIndent(_))
445                    )
446                {
447                    let popped = stream.0.pop().unwrap();
448                    // We don't actually want to pop the TempIndent token,
449                    // but rather a regular Indent token before the temp indent.
450                    if matches!(popped, PostToken::TempIndent(_)) {
451                        stream.0.pop_if(|t| matches!(t, PostToken::Indent));
452                        // Restore the popped TempIndent
453                        stream.0.push(popped);
454                    }
455                }
456
457                stream.push(PostToken::Literal(value));
458                self.position = LinePosition::MiddleOfLine;
459            }
460            PreToken::Trivia(trivia) => match trivia {
461                Trivia::BlankLine => match self.line_spacing_policy {
462                    TriviaBlankLineSpacingPolicy::Always => {
463                        self.blank_line(stream);
464                    }
465                    TriviaBlankLineSpacingPolicy::RemoveTrailingBlanks => {
466                        if matches!(next, Some(&PreToken::Trivia(Trivia::Comment(_)))) {
467                            self.blank_line(stream);
468                        }
469                    }
470                },
471                Trivia::Comment(comment) => {
472                    match comment {
473                        Comment::Preceding(value) => {
474                            if self.position == LinePosition::MiddleOfLine {
475                                self.interrupted = true;
476                                self.end_line(stream);
477                            }
478                            stream.push(PostToken::Literal(value));
479                        }
480                        Comment::Inline(value) => {
481                            assert!(self.position == LinePosition::MiddleOfLine);
482                            if let Some(next) = next
483                                && next != &PreToken::LineEnd
484                            {
485                                self.interrupted = true;
486                            }
487                            self.trim_last_line(stream);
488                            for token in INLINE_COMMENT_PRECEDING_TOKENS.iter() {
489                                stream.push(token.clone());
490                            }
491                            stream.push(PostToken::Literal(value));
492                        }
493                        Comment::Documentation(contents) => {
494                            if self.position == LinePosition::MiddleOfLine {
495                                self.interrupted = true;
496                                self.end_line(stream);
497                            }
498                            stream.push(PostToken::Documentation {
499                                num_indents: self.indent_level,
500                                contents,
501                            });
502                        }
503                        Comment::Directive(directive) => {
504                            if self.position == LinePosition::MiddleOfLine {
505                                self.interrupted = true;
506                                self.end_line(stream);
507                            }
508                            stream.push(PostToken::Directive {
509                                num_indents: self.indent_level,
510                                directive,
511                            });
512                        }
513                    }
514                    self.position = LinePosition::MiddleOfLine;
515                    self.end_line(stream);
516                }
517            },
518            PreToken::TempIndentStart(bash_indent) => {
519                self.temp_indent = Some(bash_indent);
520            }
521            PreToken::TempIndentEnd => {
522                self.temp_indent = None;
523            }
524        }
525    }
526
527    /// Flushes the `in_stream` buffer to the `out_stream`.
528    fn flush(
529        &mut self,
530        in_stream: &TokenStream<PreToken>,
531        out_stream: &mut TokenStream<PostToken>,
532        config: &Config,
533    ) {
534        assert!(!self.interrupted);
535        assert!(self.position == LinePosition::StartOfLine);
536        let mut post_buffer = TokenStream::<PostToken>::default();
537        let mut pre_buffer = in_stream.iter().peekable();
538        let starting_indent = self.indent_level;
539        let starting_temp_indent = self.temp_indent.clone();
540        while let Some(token) = pre_buffer.next() {
541            let next = pre_buffer.peek().copied();
542            self.step(token.clone(), next, &mut post_buffer);
543        }
544
545        // If all lines are short enough, we can just add the post_buffer to the
546        // out_stream and be done.
547        if config.max_line_length.get().is_none()
548            || post_buffer.max_width(config) <= config.max_line_length.get().unwrap()
549        {
550            out_stream.extend(post_buffer);
551            return;
552        }
553
554        // At least one line in the post_buffer is too long.
555        // We iterate through the in_stream to find potential line breaks,
556        // and then we iterate through the in_stream again to actually insert
557        // them in the proper places.
558
559        let max_length = config.max_line_length.get().unwrap();
560
561        let mut potential_line_breaks: HashMap<usize, SyntaxKind> = HashMap::new();
562        for (i, token) in in_stream.iter().enumerate() {
563            if let PreToken::Literal(_, kind) = token {
564                match can_be_line_broken(*kind) {
565                    Some(LineBreak::Before) => {
566                        potential_line_breaks.insert(i, *kind);
567                    }
568                    Some(LineBreak::After) => {
569                        potential_line_breaks.insert(i + 1, *kind);
570                    }
571                    None => {}
572                }
573            }
574        }
575
576        if potential_line_breaks.is_empty() {
577            // There are no potential line breaks, so we can't do anything.
578            out_stream.extend(post_buffer);
579            return;
580        }
581
582        // Set up the buffers for the second pass.
583        post_buffer.clear();
584        let mut pre_buffer = in_stream.iter().enumerate().peekable();
585
586        // Reset self.
587        self.interrupted = false;
588        self.position = LinePosition::StartOfLine;
589        self.temp_indent = starting_temp_indent;
590        self.indent_level = starting_indent;
591
592        let mut break_stack: Vec<TandemBreak> = Vec::new();
593
594        while let Some((i, token)) = pre_buffer.next() {
595            let mut cache = None;
596            if let Some(break_kind) = potential_line_breaks.get(&i) {
597                // Check if we need a break to match a prior tandem break
598                if let Some(top_of_stack) = break_stack.last_mut() {
599                    if *break_kind == top_of_stack.close {
600                        if top_of_stack.depth > 0 {
601                            top_of_stack.depth -= 1;
602                        } else {
603                            break_stack.pop();
604                            self.indent_level -= 1;
605                            self.end_line(&mut post_buffer);
606                        }
607                    } else if *break_kind == top_of_stack.open {
608                        top_of_stack.depth += 1;
609                    }
610                }
611                // Cache the current state so we can revert to it if
612                // necessary.
613                cache = Some(post_buffer.clone());
614            }
615
616            self.step(
617                token.clone(),
618                pre_buffer.peek().map(|(_, v)| &**v),
619                &mut post_buffer,
620            );
621
622            if let Some(cache) = cache
623                && post_buffer.last_line_width(config) > max_length
624            {
625                // The line is too long after the next step. Revert to the
626                // cached state and insert a line break.
627                post_buffer = cache;
628                self.interrupted = true;
629                self.end_line(&mut post_buffer);
630                self.step(
631                    token.clone(),
632                    pre_buffer.peek().map(|(_, v)| &**v),
633                    &mut post_buffer,
634                );
635
636                // Check if this introduces a tandem break
637                // SAFETY: if cache is Some(_) this step must have a potential line break
638                let break_kind = potential_line_breaks.get(&i).unwrap();
639                if let Some(also_break_on) = tandem_line_break(*break_kind) {
640                    let tandem_break = TandemBreak {
641                        open: *break_kind,
642                        close: also_break_on,
643                        depth: 0,
644                    };
645                    break_stack.push(tandem_break);
646                    self.indent_level += 1;
647                }
648            }
649        }
650
651        // reduce indent for breaks never added
652        for _ in break_stack {
653            self.indent_level = self.indent_level.saturating_sub(1);
654        }
655        out_stream.extend(post_buffer);
656    }
657
658    /// Trims any and all whitespace from the end of the stream.
659    fn trim_whitespace(&self, stream: &mut TokenStream<PostToken>) {
660        stream.trim_while(|token| {
661            matches!(
662                token,
663                PostToken::Space
664                    | PostToken::Newline
665                    | PostToken::Indent
666                    | PostToken::TempIndent(_)
667            )
668        });
669    }
670
671    /// Trims spaces and indents (and not newlines) from the end of the stream.
672    fn trim_last_line(&self, stream: &mut TokenStream<PostToken>) {
673        stream.trim_while(|token| {
674            matches!(
675                token,
676                PostToken::Space | PostToken::Indent | PostToken::TempIndent(_)
677            )
678        });
679    }
680
681    /// Ends the current line without resetting the interrupted flag.
682    ///
683    /// Removes any trailing spaces or indents and adds a newline only if state
684    /// is not [`LinePosition::StartOfLine`]. State is then set to
685    /// [`LinePosition::StartOfLine`]. Finally, indentation is added. Safe to
686    /// call multiple times in a row.
687    fn end_line(&mut self, stream: &mut TokenStream<PostToken>) {
688        self.trim_last_line(stream);
689        if self.position != LinePosition::StartOfLine {
690            stream.push(PostToken::Newline);
691        }
692        self.position = LinePosition::StartOfLine;
693        self.indent(stream);
694    }
695
696    /// Pushes the current indentation level to the stream.
697    ///
698    /// This should only be called when the state is
699    /// [`LinePosition::StartOfLine`]. This does not change the state
700    /// and is safe to call multiple times in a row.
701    fn indent(&self, stream: &mut TokenStream<PostToken>) {
702        assert!(self.position == LinePosition::StartOfLine);
703
704        self.trim_last_line(stream);
705
706        let level = if self.interrupted {
707            self.indent_level + 1
708        } else {
709            self.indent_level
710        };
711
712        for _ in 0..level {
713            stream.push(PostToken::Indent);
714        }
715
716        if let Some(ref temp_indent) = self.temp_indent {
717            stream.push(PostToken::TempIndent(temp_indent.clone()));
718        }
719    }
720
721    /// Creates a blank line and then indents.
722    fn blank_line(&mut self, stream: &mut TokenStream<PostToken>) {
723        self.trim_whitespace(stream);
724        if !stream.is_empty() {
725            stream.push(PostToken::Newline);
726        }
727        stream.push(PostToken::Newline);
728        self.position = LinePosition::StartOfLine;
729        self.indent(stream);
730    }
731}