tergo_parser/
pre_parsing_hooks.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
use tokenizer::tokens::CommentedToken;
use tokenizer::tokens::Token;

/// This function aims to squeeze the comments into the tokens, so the
/// parser doesn't have to worry about comments.
///
/// It achieves this by attaching all the comments that precede
/// a token, and the inline comments that follow a token to the token itself.
/// Thus, all the comments are attached to non-comment tokens.
/// The comments are then unfurled in the formatting stage.
pub fn pre_parse<'a>(tokens: &'a mut [CommentedToken<'a>]) -> Vec<&'a CommentedToken<'a>> {
    let mut it = 0;
    let mut tokens_without_comments = vec![];
    while it < tokens.len() {
        if let Token::Comment(comment) = tokens[it].token {
            let mut comments = vec![];
            // Pop all newlines except the last one to collapse whitespace
            // before comments. This handles such cases as:
            // TRUE
            //
            //
            // # Leading comment
            // FALSE
            if it > 1
                && matches!(tokens[it - 1].token, Token::Newline)
                && matches!(tokens[it - 2].token, Token::Newline)
            {
                comments.push("");
            }
            comments.push(comment);
            while let Some(&last_added) = tokens_without_comments.last() {
                let last_token: &CommentedToken<'a> = &tokens[last_added];
                if matches!(last_token.token, Token::Newline) {
                    tokens_without_comments.pop();
                } else {
                    break;
                }
            }
            if it > 0 {
                tokens_without_comments.push(it - 1);
            }

            it += 1;
            loop {
                match tokens[it].token {
                    Token::Newline => {
                        if matches!(tokens[it - 1].token, Token::Newline) {
                            comments.push("");
                        }
                    }
                    Token::Comment(comment) => comments.push(comment),
                    _ => break,
                }
                it += 1;
            }
            tokens[it].leading_comments = Some(comments);
            tokens_without_comments.push(it);
        } else if let Token::InlineComment(comment) = tokens[it].token {
            tokens[it - 1].inline_comment = Some(comment);
        } else {
            tokens_without_comments.push(it);
        }
        it += 1;
    }

    tokens_without_comments
        .into_iter()
        .map(|id| &tokens[id])
        .collect()
}

#[cfg(test)]
mod tests {
    use super::*;
    use tokenizer::tokens::commented_tokens;

    #[test]
    fn test_pre_parse() {
        let mut tokens = commented_tokens![
            Token::Comment("Comment"),
            Token::Newline,
            Token::Symbol("7"),
            Token::InlineComment("Inline comment")
        ];
        let commented_tokens = pre_parse(&mut tokens);
        assert!(commented_tokens.len() == 1);
        let res_token = commented_tokens[0];

        // Comments
        assert_eq!(
            res_token.leading_comments,
            Some(vec!["Comment"]),
            "The length of the leading comments does not match"
        );

        // Inlined comments
        assert!(res_token.inline_comment.is_some());
        assert!(matches!(
            res_token.inline_comment.unwrap(),
            "Inline comment"
        ));
    }
}