tergo_parser/
pre_parsing_hooks.rs

1use tokenizer::tokens::CommentedToken;
2use tokenizer::tokens::Token;
3
4/// This function aims to squeeze the comments into the tokens, so the
5/// parser doesn't have to worry about comments.
6///
7/// It achieves this by attaching all the comments that precede
8/// a token, and the inline comments that follow a token to the token itself.
9/// Thus, all the comments are attached to non-comment tokens.
10/// The comments are then unfurled in the formatting stage.
11pub fn pre_parse<'a>(tokens: &'a mut [CommentedToken<'a>]) -> Vec<&'a CommentedToken<'a>> {
12    let mut it = 0;
13    let mut tokens_without_comments = vec![];
14    while it < tokens.len() {
15        if let Token::Comment(comment) = tokens[it].token {
16            let mut comments = vec![];
17            // Pop all newlines except the last one to collapse whitespace
18            // before comments. This handles such cases as:
19            // TRUE
20            //
21            //
22            // # Leading comment
23            // FALSE
24            if it > 1
25                && matches!(tokens[it - 1].token, Token::Newline)
26                && matches!(tokens[it - 2].token, Token::Newline)
27            {
28                comments.push("");
29            }
30            comments.push(comment);
31            while let Some(&last_added) = tokens_without_comments.last() {
32                let last_token: &CommentedToken<'a> = &tokens[last_added];
33                if matches!(last_token.token, Token::Newline) {
34                    tokens_without_comments.pop();
35                } else {
36                    break;
37                }
38            }
39            if it > 0 {
40                tokens_without_comments.push(it - 1);
41            }
42
43            it += 1;
44            loop {
45                match tokens[it].token {
46                    Token::Newline => {
47                        if matches!(tokens[it - 1].token, Token::Newline) {
48                            comments.push("");
49                        }
50                    }
51                    Token::Comment(comment) => comments.push(comment),
52                    _ => break,
53                }
54                it += 1;
55            }
56            tokens[it].leading_comments = Some(comments);
57            tokens_without_comments.push(it);
58        } else if let Token::InlineComment(comment) = tokens[it].token {
59            tokens[it - 1].inline_comment = Some(comment);
60        } else {
61            tokens_without_comments.push(it);
62        }
63        it += 1;
64    }
65
66    tokens_without_comments
67        .into_iter()
68        .map(|id| &tokens[id])
69        .collect()
70}
71
72#[cfg(test)]
73mod tests {
74    use super::*;
75    use tokenizer::tokens::commented_tokens;
76
77    #[test]
78    fn test_pre_parse() {
79        let mut tokens = commented_tokens![
80            Token::Comment("Comment"),
81            Token::Newline,
82            Token::Symbol("7"),
83            Token::InlineComment("Inline comment")
84        ];
85        let commented_tokens = pre_parse(&mut tokens);
86        assert!(commented_tokens.len() == 1);
87        let res_token = commented_tokens[0];
88
89        // Comments
90        assert_eq!(
91            res_token.leading_comments,
92            Some(vec!["Comment"]),
93            "The length of the leading comments does not match"
94        );
95
96        // Inlined comments
97        assert!(res_token.inline_comment.is_some());
98        assert!(matches!(
99            res_token.inline_comment.unwrap(),
100            "Inline comment"
101        ));
102    }
103}