wdl_format/token/
post.rs

1//! Postprocessed tokens.
2//!
3//! Generally speaking, unless you are working with the internals of code
4//! formatting, you're not going to be working with these.
5
6use std::collections::HashSet;
7use std::fmt::Display;
8use std::rc::Rc;
9
10use wdl_ast::SyntaxKind;
11
12use crate::Comment;
13use crate::Config;
14use crate::NEWLINE;
15use crate::PreToken;
16use crate::SPACE;
17use crate::Token;
18use crate::TokenStream;
19use crate::Trivia;
20use crate::TriviaBlankLineSpacingPolicy;
21
22/// [`PostToken`]s that precede an inline comment.
23const INLINE_COMMENT_PRECEDING_TOKENS: [PostToken; 2] = [PostToken::Space, PostToken::Space];
24
25/// A postprocessed token.
26#[derive(Clone, Eq, PartialEq)]
27pub enum PostToken {
28    /// A space.
29    Space,
30
31    /// A newline.
32    Newline,
33
34    /// One indentation.
35    Indent,
36
37    /// A temporary indent.
38    ///
39    /// This is added after a [`PostToken::Indent`] during the formatting of
40    /// command sections.
41    TempIndent(Rc<String>),
42
43    /// A string literal.
44    Literal(Rc<String>),
45}
46
47impl std::fmt::Debug for PostToken {
48    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
49        match self {
50            Self::Space => write!(f, "<SPACE>"),
51            Self::Newline => write!(f, "<NEWLINE>"),
52            Self::Indent => write!(f, "<INDENT>"),
53            Self::TempIndent(value) => write!(f, "<TEMP_INDENT@{value}>"),
54            Self::Literal(value) => write!(f, "<LITERAL@{value}>"),
55        }
56    }
57}
58
59impl Token for PostToken {
60    /// Returns a displayable version of the token.
61    fn display<'a>(&'a self, config: &'a Config) -> impl Display + 'a {
62        /// A displayable version of a [`PostToken`].
63        struct Display<'a> {
64            /// The token to display.
65            token: &'a PostToken,
66            /// The configuration to use.
67            config: &'a Config,
68        }
69
70        impl std::fmt::Display for Display<'_> {
71            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
72                match self.token {
73                    PostToken::Space => write!(f, "{SPACE}"),
74                    PostToken::Newline => write!(f, "{NEWLINE}"),
75                    PostToken::Indent => {
76                        write!(f, "{indent}", indent = self.config.indent().string())
77                    }
78                    PostToken::TempIndent(value) => write!(f, "{value}"),
79                    PostToken::Literal(value) => write!(f, "{value}"),
80                }
81            }
82        }
83
84        Display {
85            token: self,
86            config,
87        }
88    }
89}
90
91impl PostToken {
92    /// Gets the width of the [`PostToken`].
93    ///
94    /// This is used to determine how much space the token takes up _within a
95    /// single line_ for the purposes of respecting the maximum line length.
96    /// As such, newlines are considered zero-width tokens.
97    fn width(&self, config: &crate::Config) -> usize {
98        match self {
99            Self::Space => SPACE.len(), // 1 character
100            Self::Newline => 0,
101            Self::Indent => config.indent().num(),
102            Self::TempIndent(value) => value.len(),
103            Self::Literal(value) => value.len(),
104        }
105    }
106}
107
108impl TokenStream<PostToken> {
109    /// Gets the maximum width of the [`TokenStream`].
110    ///
111    /// This is suitable to call if the stream represents multiple lines.
112    fn max_width(&self, config: &Config) -> usize {
113        let mut max: usize = 0;
114        let mut cur_width: usize = 0;
115        for token in self.iter() {
116            cur_width += token.width(config);
117            if token == &PostToken::Newline {
118                max = max.max(cur_width);
119                cur_width = 0;
120            }
121        }
122        max.max(cur_width)
123    }
124
125    /// Gets the width of the last line of the [`TokenStream`].
126    fn last_line_width(&self, config: &Config) -> usize {
127        let mut width = 0;
128        for token in self.iter().rev() {
129            if token == &PostToken::Newline {
130                break;
131            }
132            width += token.width(config);
133        }
134        width
135    }
136}
137
138/// A line break.
139enum LineBreak {
140    /// A line break that can be inserted before a token.
141    Before,
142    /// A line break that can be inserted after a token.
143    After,
144}
145
146/// Returns whether a token can be line broken.
147fn can_be_line_broken(kind: SyntaxKind) -> Option<LineBreak> {
148    match kind {
149        SyntaxKind::CloseBrace
150        | SyntaxKind::CloseBracket
151        | SyntaxKind::CloseParen
152        | SyntaxKind::CloseHeredoc
153        | SyntaxKind::Assignment
154        | SyntaxKind::Plus
155        | SyntaxKind::Minus
156        | SyntaxKind::Asterisk
157        | SyntaxKind::Slash
158        | SyntaxKind::Percent
159        | SyntaxKind::Exponentiation
160        | SyntaxKind::Equal
161        | SyntaxKind::NotEqual
162        | SyntaxKind::Less
163        | SyntaxKind::LessEqual
164        | SyntaxKind::Greater
165        | SyntaxKind::GreaterEqual
166        | SyntaxKind::LogicalAnd
167        | SyntaxKind::LogicalOr
168        | SyntaxKind::AfterKeyword
169        | SyntaxKind::AsKeyword
170        | SyntaxKind::IfKeyword
171        | SyntaxKind::ElseKeyword
172        | SyntaxKind::ThenKeyword => Some(LineBreak::Before),
173        SyntaxKind::OpenBrace
174        | SyntaxKind::OpenBracket
175        | SyntaxKind::OpenParen
176        | SyntaxKind::OpenHeredoc
177        | SyntaxKind::Colon
178        | SyntaxKind::PlaceholderOpen
179        | SyntaxKind::Comma => Some(LineBreak::After),
180        _ => None,
181    }
182}
183
184/// Attempts to split a `#@ except:` directive into multiple lines when it
185/// exceeds `max_len`.
186///
187/// Returns `None` if the input is not a `#@ except:` directive or if no
188/// splitting is required. Splitting occurs only at comma boundaries and
189/// ensures at least one rule per line.
190fn split_except_directive_lines(value: &str, max_len: usize) -> Option<Vec<String>> {
191    let remainder = value.strip_prefix("#@")?;
192    let rules_text = remainder.trim_start().strip_prefix("except:")?;
193
194    // If the whole line fits, no splitting needed
195    if value.len() <= max_len {
196        return None;
197    };
198
199    // Split into individual rules
200    let rules: Vec<&str> = rules_text
201        .split(',')
202        .map(|s| s.trim())
203        .filter(|s| !s.is_empty())
204        .collect();
205
206    if rules.is_empty() {
207        return None;
208    }
209
210    let prefix = "#@ except: ";
211    let mut lines = Vec::new();
212    let mut current_rules = Vec::new();
213
214    for rule in rules {
215        // Build what the line would look like with this rule added
216        let mut test_rules = current_rules.clone();
217        test_rules.push(rule);
218        let test_line = format!("{}{}", prefix, test_rules.join(", "));
219
220        if test_line.len() <= max_len {
221            // Rule fits, add it to current line
222            current_rules.push(rule);
223        } else {
224            // Rule doesn't fit
225            if current_rules.is_empty() {
226                // This is the first rule and it's already too long
227                // Add it anyway (minimum 1 rule per line)
228                current_rules.push(rule);
229            } else {
230                // Finalize current line and start a new one
231                lines.push(format!("{}{}", prefix, current_rules.join(", ")));
232                current_rules.clear();
233                current_rules.push(rule);
234            }
235        }
236    }
237
238    // Add the last line if there are remaining rules
239    if !current_rules.is_empty() {
240        lines.push(format!("{}{}", prefix, current_rules.join(", ")));
241    }
242
243    Some(lines)
244}
245
246/// Current position in a line.
247#[derive(Default, Eq, PartialEq)]
248enum LinePosition {
249    /// The start of a line.
250    #[default]
251    StartOfLine,
252
253    /// The middle of a line.
254    MiddleOfLine,
255}
256
257/// A postprocessor of [tokens](PreToken).
258#[derive(Default)]
259pub struct Postprocessor {
260    /// The current position in the line.
261    position: LinePosition,
262
263    /// The current indentation level.
264    indent_level: usize,
265
266    /// Whether the current line has been interrupted by trivia.
267    interrupted: bool,
268
269    /// The current trivial blank line spacing policy.
270    line_spacing_policy: TriviaBlankLineSpacingPolicy,
271
272    /// Whether temporary indentation is needed.
273    temp_indent_needed: bool,
274
275    /// Temporary indentation to add.
276    temp_indent: Rc<String>,
277}
278
279impl Postprocessor {
280    /// Runs the postprocessor.
281    pub fn run(&mut self, input: TokenStream<PreToken>, config: &Config) -> TokenStream<PostToken> {
282        let mut output = TokenStream::<PostToken>::default();
283        let mut buffer = TokenStream::<PreToken>::default();
284
285        for token in input {
286            match token {
287                PreToken::LineEnd => {
288                    self.flush(&buffer, &mut output, config);
289                    self.trim_whitespace(&mut output);
290                    output.push(PostToken::Newline);
291
292                    buffer.clear();
293                    self.interrupted = false;
294                    self.position = LinePosition::StartOfLine;
295                }
296                _ => {
297                    buffer.push(token);
298                }
299            }
300        }
301
302        output
303    }
304
305    /// Takes a step of a [`PreToken`] stream and processes the appropriate
306    /// [`PostToken`]s.
307    fn step(
308        &mut self,
309        token: PreToken,
310        next: Option<&PreToken>,
311        stream: &mut TokenStream<PostToken>,
312    ) {
313        if stream.is_empty() {
314            self.interrupted = false;
315            self.position = LinePosition::StartOfLine;
316            self.indent(stream);
317        }
318        match token {
319            PreToken::BlankLine => {
320                self.blank_line(stream);
321            }
322            PreToken::LineEnd => {
323                self.interrupted = false;
324                self.end_line(stream);
325            }
326            PreToken::WordEnd => {
327                stream.trim_end(&PostToken::Space);
328
329                if self.position == LinePosition::MiddleOfLine {
330                    stream.push(PostToken::Space);
331                } else {
332                    // We're at the start of a line, so we don't need to add a
333                    // space.
334                }
335            }
336            PreToken::IndentStart => {
337                self.indent_level += 1;
338                self.end_line(stream);
339            }
340            PreToken::IndentEnd => {
341                self.indent_level = self.indent_level.saturating_sub(1);
342                self.end_line(stream);
343            }
344            PreToken::LineSpacingPolicy(policy) => {
345                self.line_spacing_policy = policy;
346            }
347            PreToken::Literal(value, kind) => {
348                assert!(!kind.is_trivia());
349
350                // This is special handling for inserting the empty string.
351                // We remove any indentation or spaces from the end of the
352                // stream and then add the empty string as a literal.
353                // Then we set the position to [`LinePosition::MiddleOfLine`]
354                // in order to trigger a newline being added before the next
355                // token.
356                if value.is_empty() {
357                    self.trim_last_line(stream);
358                    stream.push(PostToken::Literal(value));
359                    self.position = LinePosition::MiddleOfLine;
360                    return;
361                }
362
363                if self.interrupted
364                    && matches!(
365                        kind,
366                        SyntaxKind::OpenBrace
367                            | SyntaxKind::OpenBracket
368                            | SyntaxKind::OpenParen
369                            | SyntaxKind::OpenHeredoc
370                    )
371                    && matches!(
372                        stream.0.last(),
373                        Some(&PostToken::Indent) | Some(&PostToken::TempIndent(_))
374                    )
375                {
376                    stream.0.pop();
377                }
378
379                if kind == SyntaxKind::LiteralCommandText {
380                    self.temp_indent = Rc::new(
381                        value
382                            .chars()
383                            .take_while(|c| matches!(c.to_string().as_str(), SPACE | crate::TAB))
384                            .collect(),
385                    );
386                }
387
388                stream.push(PostToken::Literal(value));
389                self.position = LinePosition::MiddleOfLine;
390            }
391            PreToken::Trivia(trivia) => match trivia {
392                Trivia::BlankLine => match self.line_spacing_policy {
393                    TriviaBlankLineSpacingPolicy::Always => {
394                        self.blank_line(stream);
395                    }
396                    TriviaBlankLineSpacingPolicy::RemoveTrailingBlanks => {
397                        if matches!(next, Some(&PreToken::Trivia(Trivia::Comment(_)))) {
398                            self.blank_line(stream);
399                        }
400                    }
401                },
402                Trivia::Comment(comment) => {
403                    match comment {
404                        Comment::Preceding(value) => {
405                            if !matches!(
406                                stream.0.last(),
407                                Some(&PostToken::Newline)
408                                    | Some(&PostToken::Indent)
409                                    | Some(&PostToken::TempIndent(_))
410                                    | None
411                            ) {
412                                self.interrupted = true;
413                            }
414                            self.end_line(stream);
415                            stream.push(PostToken::Literal(value));
416                            self.position = LinePosition::MiddleOfLine;
417                        }
418                        Comment::Inline(value) => {
419                            assert!(self.position == LinePosition::MiddleOfLine);
420                            if let Some(next) = next
421                                && next != &PreToken::LineEnd
422                            {
423                                self.interrupted = true;
424                            }
425                            self.trim_last_line(stream);
426                            for token in INLINE_COMMENT_PRECEDING_TOKENS.iter() {
427                                stream.push(token.clone());
428                            }
429                            stream.push(PostToken::Literal(value));
430                        }
431                    }
432                    self.end_line(stream);
433                }
434            },
435            PreToken::TempIndentStart => {
436                self.temp_indent_needed = true;
437            }
438            PreToken::TempIndentEnd => {
439                self.temp_indent_needed = false;
440            }
441        }
442    }
443
444    /// Flushes the `in_stream` buffer to the `out_stream`.
445    fn flush(
446        &mut self,
447        in_stream: &TokenStream<PreToken>,
448        out_stream: &mut TokenStream<PostToken>,
449        config: &Config,
450    ) {
451        assert!(!self.interrupted);
452        assert!(self.position == LinePosition::StartOfLine);
453        // Preprocess the input stream to split long except directives if needed
454        let mut expanded_stream = TokenStream::<PreToken>::default();
455        let in_stream = if let Some(max_len) = config.max_line_length() {
456            for token in in_stream.iter() {
457                let PreToken::Trivia(Trivia::Comment(Comment::Preceding(value))) = token else {
458                    expanded_stream.push(token.clone());
459                    continue;
460                };
461
462                if !value.starts_with("#@") {
463                    expanded_stream.push(token.clone());
464                    continue;
465                }
466
467                if let Some(lines) = split_except_directive_lines(value, max_len) {
468                    for line in lines {
469                        expanded_stream.push(PreToken::Trivia(Trivia::Comment(
470                            Comment::Preceding(Rc::new(line)),
471                        )));
472                    }
473                } else {
474                    expanded_stream.push(token.clone());
475                }
476            }
477            &expanded_stream
478        } else {
479            // No max line length configured, use original stream
480            in_stream
481        };
482        let mut post_buffer = TokenStream::<PostToken>::default();
483        let mut pre_buffer = in_stream.iter().peekable();
484        let starting_indent = self.indent_level;
485        while let Some(token) = pre_buffer.next() {
486            let next = pre_buffer.peek().copied();
487            self.step(token.clone(), next, &mut post_buffer);
488        }
489
490        // If all lines are short enough, we can just add the post_buffer to the
491        // out_stream and be done.
492        if config.max_line_length().is_none()
493            || post_buffer.max_width(config) <= config.max_line_length().unwrap()
494        {
495            out_stream.extend(post_buffer);
496            return;
497        }
498
499        // At least one line in the post_buffer is too long.
500        // We iterate through the in_stream to find potential line breaks,
501        // and then we iterate through the in_stream again to actually insert
502        // them in the proper places.
503
504        let max_length = config.max_line_length().unwrap();
505
506        let mut potential_line_breaks: HashSet<usize> = HashSet::new();
507        for (i, token) in in_stream.iter().enumerate() {
508            if let PreToken::Literal(_, kind) = token {
509                match can_be_line_broken(*kind) {
510                    Some(LineBreak::Before) => {
511                        potential_line_breaks.insert(i);
512                    }
513                    Some(LineBreak::After) => {
514                        potential_line_breaks.insert(i + 1);
515                    }
516                    None => {}
517                }
518            }
519        }
520
521        if potential_line_breaks.is_empty() {
522            // There are no potential line breaks, so we can't do anything.
523            out_stream.extend(post_buffer);
524            return;
525        }
526
527        // Set up the buffers for the second pass.
528        post_buffer.clear();
529        let mut pre_buffer = in_stream.iter().enumerate().peekable();
530
531        // Reset the indent level.
532        self.indent_level = starting_indent;
533
534        while let Some((i, token)) = pre_buffer.next() {
535            let mut cache = None;
536            if potential_line_breaks.contains(&i) {
537                if post_buffer.last_line_width(config) > max_length {
538                    // The line is already too long, and taking the next step
539                    // can only make it worse. Insert a line break here.
540                    self.interrupted = true;
541                    self.end_line(&mut post_buffer);
542                } else {
543                    // The line is not too long yet, but it might be after the
544                    // next step. Cache the current state so we can revert to it
545                    // if necessary.
546                    cache = Some(post_buffer.clone());
547                }
548            }
549            self.step(
550                token.clone(),
551                pre_buffer.peek().map(|(_, v)| &**v),
552                &mut post_buffer,
553            );
554
555            if let Some(cache) = cache
556                && post_buffer.last_line_width(config) > max_length
557            {
558                // The line is too long after the next step. Revert to the
559                // cached state and insert a line break.
560                post_buffer = cache;
561                self.interrupted = true;
562                self.end_line(&mut post_buffer);
563                self.step(
564                    token.clone(),
565                    pre_buffer.peek().map(|(_, v)| &**v),
566                    &mut post_buffer,
567                );
568            }
569        }
570
571        out_stream.extend(post_buffer);
572    }
573
574    /// Trims any and all whitespace from the end of the stream.
575    fn trim_whitespace(&self, stream: &mut TokenStream<PostToken>) {
576        stream.trim_while(|token| {
577            matches!(
578                token,
579                PostToken::Space
580                    | PostToken::Newline
581                    | PostToken::Indent
582                    | PostToken::TempIndent(_)
583            )
584        });
585    }
586
587    /// Trims spaces and indents (and not newlines) from the end of the stream.
588    fn trim_last_line(&mut self, stream: &mut TokenStream<PostToken>) {
589        stream.trim_while(|token| {
590            matches!(
591                token,
592                PostToken::Space | PostToken::Indent | PostToken::TempIndent(_)
593            )
594        });
595    }
596
597    /// Ends the current line without resetting the interrupted flag.
598    ///
599    /// Removes any trailing spaces or indents and adds a newline only if state
600    /// is not [`LinePosition::StartOfLine`]. State is then set to
601    /// [`LinePosition::StartOfLine`]. Finally, indentation is added. Safe to
602    /// call multiple times in a row.
603    fn end_line(&mut self, stream: &mut TokenStream<PostToken>) {
604        self.trim_last_line(stream);
605        if self.position != LinePosition::StartOfLine {
606            stream.push(PostToken::Newline);
607        }
608        self.position = LinePosition::StartOfLine;
609        self.indent(stream);
610    }
611
612    /// Pushes the current indentation level to the stream.
613    /// This should only be called when the state is
614    /// [`LinePosition::StartOfLine`]. This does not change the state.
615    fn indent(&self, stream: &mut TokenStream<PostToken>) {
616        assert!(self.position == LinePosition::StartOfLine);
617
618        let level = if self.interrupted {
619            self.indent_level + 1
620        } else {
621            self.indent_level
622        };
623
624        for _ in 0..level {
625            stream.push(PostToken::Indent);
626        }
627
628        if self.temp_indent_needed {
629            stream.push(PostToken::TempIndent(self.temp_indent.clone()));
630        }
631    }
632
633    /// Creates a blank line and then indents.
634    fn blank_line(&mut self, stream: &mut TokenStream<PostToken>) {
635        self.trim_whitespace(stream);
636        if !stream.is_empty() {
637            stream.push(PostToken::Newline);
638        }
639        stream.push(PostToken::Newline);
640        self.position = LinePosition::StartOfLine;
641        self.indent(stream);
642    }
643}
wdl_format/token/post.rs

wdl_format/token/
post.rs