emmylua_parser/parser/
lua_parser.rs

1use crate::{
2    grammar::parse_chunk,
3    kind::LuaTokenKind,
4    lexer::{LuaLexer, LuaTokenData},
5    parser_error::LuaParseError,
6    text::SourceRange,
7    LuaSyntaxTree, LuaTreeBuilder,
8};
9
10use super::{
11    lua_doc_parser::LuaDocParser,
12    marker::{MarkEvent, MarkerEventContainer},
13    parser_config::ParserConfig,
14};
15
16#[allow(unused)]
17pub struct LuaParser<'a> {
18    text: &'a str,
19    events: Vec<MarkEvent>,
20    tokens: Vec<LuaTokenData>,
21    token_index: usize,
22    current_token: LuaTokenKind,
23    mark_level: usize,
24    pub parse_config: ParserConfig<'a>,
25    pub(crate) errors: &'a mut Vec<LuaParseError>,
26}
27
28impl MarkerEventContainer for LuaParser<'_> {
29    fn get_mark_level(&self) -> usize {
30        self.mark_level
31    }
32
33    fn incr_mark_level(&mut self) {
34        self.mark_level += 1;
35    }
36
37    fn decr_mark_level(&mut self) {
38        self.mark_level -= 1;
39    }
40
41    fn get_events(&mut self) -> &mut Vec<MarkEvent> {
42        &mut self.events
43    }
44}
45
46impl<'a> LuaParser<'a> {
47    #[allow(unused)]
48    pub fn parse(text: &'a str, config: ParserConfig) -> LuaSyntaxTree {
49        let mut errors: Vec<LuaParseError> = Vec::new();
50        let tokens = {
51            let mut lexer = LuaLexer::new(text, config.lexer_config(), &mut errors);
52            lexer.tokenize()
53        };
54
55        let mut parser = LuaParser {
56            text,
57            events: Vec::new(),
58            tokens,
59            token_index: 0,
60            current_token: LuaTokenKind::None,
61            parse_config: config,
62            mark_level: 0,
63            errors: &mut errors,
64        };
65
66        parse_chunk(&mut parser);
67        let errors = parser.get_errors();
68        let root = {
69            let mut builder = LuaTreeBuilder::new(
70                parser.origin_text(),
71                parser.events,
72                parser.parse_config.node_cache(),
73            );
74            builder.build();
75            builder.finish()
76        };
77        LuaSyntaxTree::new(root, errors)
78    }
79
80    pub fn init(&mut self) {
81        if self.tokens.is_empty() {
82            self.current_token = LuaTokenKind::TkEof;
83        } else {
84            self.current_token = self.tokens[0].kind;
85        }
86
87        if is_trivia_kind(self.current_token) {
88            self.bump();
89        }
90    }
91
92    pub fn origin_text(&self) -> &'a str {
93        self.text
94    }
95
96    pub fn current_token(&self) -> LuaTokenKind {
97        self.current_token
98    }
99
100    pub fn current_token_index(&self) -> usize {
101        self.token_index
102    }
103
104    pub fn current_token_range(&self) -> SourceRange {
105        if self.token_index >= self.tokens.len() {
106            if self.tokens.is_empty() {
107                return SourceRange::EMPTY;
108            } else {
109                return self.tokens[self.tokens.len() - 1].range;
110            }
111        }
112
113        self.tokens[self.token_index].range
114    }
115
116    #[allow(unused)]
117    pub fn current_token_text(&self) -> &str {
118        let range = &self.tokens[self.token_index].range;
119        &self.text[range.start_offset..range.end_offset()]
120    }
121
122    pub fn bump(&mut self) {
123        if !is_invalid_kind(self.current_token) && self.token_index < self.tokens.len() {
124            let token = &self.tokens[self.token_index];
125            self.events.push(MarkEvent::EatToken {
126                kind: token.kind,
127                range: token.range,
128            });
129        }
130
131        let mut next_index = self.token_index + 1;
132        self.skip_trivia(&mut next_index);
133        self.parse_trivia_tokens(next_index);
134        self.token_index = next_index;
135
136        if self.token_index >= self.tokens.len() {
137            self.current_token = LuaTokenKind::TkEof;
138            return;
139        }
140
141        self.current_token = self.tokens[self.token_index].kind;
142    }
143
144    pub fn peek_next_token(&self) -> LuaTokenKind {
145        let mut next_index = self.token_index + 1;
146        self.skip_trivia(&mut next_index);
147
148        if next_index >= self.tokens.len() {
149            LuaTokenKind::None
150        } else {
151            self.tokens[next_index].kind
152        }
153    }
154
155    fn skip_trivia(&self, index: &mut usize) {
156        if index >= &mut self.tokens.len() {
157            return;
158        }
159
160        let mut kind = self.tokens[*index].kind;
161        while is_trivia_kind(kind) {
162            *index += 1;
163            if *index >= self.tokens.len() {
164                break;
165            }
166            kind = self.tokens[*index].kind;
167        }
168    }
169
170    // Analyze consecutive whitespace/comments
171    // At this point, comments may be in the wrong parent node, adjustments will be made in the subsequent treeBuilder
172    fn parse_trivia_tokens(&mut self, next_index: usize) {
173        let mut line_count = 0;
174        let start = self.token_index;
175        let mut doc_tokens: Vec<LuaTokenData> = Vec::new();
176        for i in start..next_index {
177            let token = &self.tokens[i];
178            match token.kind {
179                LuaTokenKind::TkShortComment | LuaTokenKind::TkLongComment => {
180                    line_count = 0;
181                    doc_tokens.push(*token);
182                }
183                LuaTokenKind::TkEndOfLine => {
184                    line_count += 1;
185
186                    if doc_tokens.is_empty() {
187                        self.events.push(MarkEvent::EatToken {
188                            kind: token.kind,
189                            range: token.range,
190                        });
191                    } else {
192                        doc_tokens.push(*token);
193                    }
194
195                    // If there are two EOFs after the comment, the previous comment is considered a group of comments
196                    if line_count > 1 && !doc_tokens.is_empty() {
197                        self.parse_comments(&doc_tokens);
198                        doc_tokens.clear();
199                    }
200                    // check if the comment is an inline comment
201                    // first is comment, second is endofline
202                    else if doc_tokens.len() == 2 && i >= 2 {
203                        let mut temp_index = i as isize - 2;
204                        let mut inline_comment = false;
205                        while temp_index >= 0 {
206                            let kind = self.tokens[temp_index as usize].kind;
207                            match kind {
208                                LuaTokenKind::TkEndOfLine => {
209                                    break;
210                                }
211                                LuaTokenKind::TkWhitespace => {
212                                    temp_index -= 1;
213                                    continue;
214                                }
215                                _ => {
216                                    inline_comment = true;
217                                    break;
218                                }
219                            }
220                        }
221
222                        if inline_comment {
223                            self.parse_comments(&doc_tokens);
224                            doc_tokens.clear();
225                        }
226                    }
227                }
228                LuaTokenKind::TkShebang | LuaTokenKind::TkWhitespace => {
229                    if doc_tokens.is_empty() {
230                        self.events.push(MarkEvent::EatToken {
231                            kind: token.kind,
232                            range: token.range,
233                        });
234                    } else {
235                        doc_tokens.push(*token);
236                    }
237                }
238                _ => {
239                    if !doc_tokens.is_empty() {
240                        self.parse_comments(&doc_tokens);
241                        doc_tokens.clear();
242                    }
243                }
244            }
245        }
246
247        if !doc_tokens.is_empty() {
248            self.parse_comments(&doc_tokens);
249        }
250    }
251
252    fn parse_comments(&mut self, comment_tokens: &Vec<LuaTokenData>) {
253        let mut trivia_token_start = comment_tokens.len();
254        // Reverse iterate over comment_tokens, removing whitespace and end-of-line tokens
255        for i in (0..comment_tokens.len()).rev() {
256            if matches!(
257                comment_tokens[i].kind,
258                LuaTokenKind::TkWhitespace | LuaTokenKind::TkEndOfLine
259            ) {
260                trivia_token_start = i;
261            } else {
262                break;
263            }
264        }
265
266        let tokens = &comment_tokens[..trivia_token_start];
267        LuaDocParser::parse(self, tokens);
268
269        for i in trivia_token_start..comment_tokens.len() {
270            let token = &comment_tokens[i];
271            self.events.push(MarkEvent::EatToken {
272                kind: token.kind,
273                range: token.range,
274            });
275        }
276    }
277
278    pub fn push_error(&mut self, err: LuaParseError) {
279        self.errors.push(err);
280    }
281
282    pub fn has_error(&self) -> bool {
283        !self.errors.is_empty()
284    }
285
286    pub fn get_errors(&self) -> Vec<LuaParseError> {
287        self.errors.clone()
288    }
289}
290
291fn is_trivia_kind(kind: LuaTokenKind) -> bool {
292    matches!(
293        kind,
294        LuaTokenKind::TkShortComment
295            | LuaTokenKind::TkLongComment
296            | LuaTokenKind::TkEndOfLine
297            | LuaTokenKind::TkWhitespace
298            | LuaTokenKind::TkShebang
299    )
300}
301
302fn is_invalid_kind(kind: LuaTokenKind) -> bool {
303    matches!(
304        kind,
305        LuaTokenKind::None
306            | LuaTokenKind::TkEof
307            | LuaTokenKind::TkWhitespace
308            | LuaTokenKind::TkShebang
309            | LuaTokenKind::TkEndOfLine
310            | LuaTokenKind::TkShortComment
311            | LuaTokenKind::TkLongComment
312    )
313}
314
315#[cfg(test)]
316mod tests {
317    use crate::{
318        kind::LuaTokenKind, lexer::LuaLexer, parser::ParserConfig, parser_error::LuaParseError,
319        LuaParser,
320    };
321
322    #[allow(unused)]
323    fn new_parser<'a>(
324        text: &'a str,
325        config: ParserConfig<'a>,
326        errors: &'a mut Vec<LuaParseError>,
327        show_tokens: bool,
328    ) -> LuaParser<'a> {
329        let tokens = {
330            let mut lexer = LuaLexer::new(text, config.lexer_config(), errors);
331            lexer.tokenize()
332        };
333
334        if show_tokens {
335            println!("tokens: ");
336            for t in &tokens {
337                println!("{:?}", t);
338            }
339        }
340
341        let mut parser = LuaParser {
342            text,
343            events: Vec::new(),
344            tokens,
345            token_index: 0,
346            current_token: LuaTokenKind::None,
347            parse_config: config,
348            mark_level: 0,
349            errors,
350        };
351        parser.init();
352
353        parser
354    }
355
356    #[test]
357    fn test_parse_and_ast() {
358        let lua_code = r#"
359            function foo(a, b)
360                return a + b
361            end
362        "#;
363
364        let tree = LuaParser::parse(lua_code, ParserConfig::default());
365        println!("{:#?}", tree.get_red_root());
366    }
367
368    #[test]
369    fn test_parse_and_ast_with_error() {
370        let lua_code = r#"
371            function foo(a, b)
372                return a + b
373        "#;
374
375        let tree = LuaParser::parse(lua_code, ParserConfig::default());
376        println!("{:#?}", tree.get_red_root());
377    }
378
379    #[test]
380    fn test_parse_comment() {
381        let lua_code = r#"
382            -- comment
383            local t
384            -- inline comment
385        "#;
386
387        let tree = LuaParser::parse(lua_code, ParserConfig::default());
388        println!("{:#?}", tree.get_red_root());
389    }
390
391    #[test]
392    fn test_parse_empty_file() {
393        let lua_code = r#""#;
394
395        let tree = LuaParser::parse(lua_code, ParserConfig::default());
396        println!("{:#?}", tree.get_red_root());
397    }
398}