emmylua_parser/parser/
lua_parser.rs

1use crate::{
2    LuaSyntaxTree, LuaTreeBuilder,
3    grammar::parse_chunk,
4    kind::LuaTokenKind,
5    lexer::{LuaLexer, LuaTokenData},
6    parser_error::LuaParseError,
7    text::SourceRange,
8};
9
10use super::{
11    lua_doc_parser::LuaDocParser,
12    marker::{MarkEvent, MarkerEventContainer},
13    parser_config::ParserConfig,
14};
15
16#[allow(unused)]
17pub struct LuaParser<'a> {
18    text: &'a str,
19    events: Vec<MarkEvent>,
20    tokens: Vec<LuaTokenData>,
21    token_index: usize,
22    current_token: LuaTokenKind,
23    mark_level: usize,
24    pub parse_config: ParserConfig<'a>,
25    pub(crate) errors: &'a mut Vec<LuaParseError>,
26}
27
28impl MarkerEventContainer for LuaParser<'_> {
29    fn get_mark_level(&self) -> usize {
30        self.mark_level
31    }
32
33    fn incr_mark_level(&mut self) {
34        self.mark_level += 1;
35    }
36
37    fn decr_mark_level(&mut self) {
38        self.mark_level -= 1;
39    }
40
41    fn get_events(&mut self) -> &mut Vec<MarkEvent> {
42        &mut self.events
43    }
44}
45
46impl<'a> LuaParser<'a> {
47    #[allow(unused)]
48    pub fn parse(text: &'a str, config: ParserConfig) -> LuaSyntaxTree {
49        let mut errors: Vec<LuaParseError> = Vec::new();
50        let tokens = {
51            let mut lexer = LuaLexer::new(text, config.lexer_config(), &mut errors);
52            lexer.tokenize()
53        };
54
55        let mut parser = LuaParser {
56            text,
57            events: Vec::new(),
58            tokens,
59            token_index: 0,
60            current_token: LuaTokenKind::None,
61            parse_config: config,
62            mark_level: 0,
63            errors: &mut errors,
64        };
65
66        parse_chunk(&mut parser);
67        let errors = parser.get_errors();
68        let root = {
69            let mut builder = LuaTreeBuilder::new(
70                parser.origin_text(),
71                parser.events,
72                parser.parse_config.node_cache(),
73            );
74            builder.build();
75            builder.finish()
76        };
77        LuaSyntaxTree::new(root, errors)
78    }
79
80    pub fn init(&mut self) {
81        if self.tokens.is_empty() {
82            self.current_token = LuaTokenKind::TkEof;
83        } else {
84            self.current_token = self.tokens[0].kind;
85        }
86
87        if is_trivia_kind(self.current_token) {
88            self.bump();
89        }
90    }
91
92    pub fn origin_text(&self) -> &'a str {
93        self.text
94    }
95
96    pub fn current_token(&self) -> LuaTokenKind {
97        self.current_token
98    }
99
100    pub fn current_token_index(&self) -> usize {
101        self.token_index
102    }
103
104    pub fn current_token_range(&self) -> SourceRange {
105        if self.token_index >= self.tokens.len() {
106            if self.tokens.is_empty() {
107                return SourceRange::EMPTY;
108            } else {
109                return self.tokens[self.tokens.len() - 1].range;
110            }
111        }
112
113        self.tokens[self.token_index].range
114    }
115
116    pub fn current_token_text(&self) -> &str {
117        let range = &self.tokens[self.token_index].range;
118        &self.text[range.start_offset..range.end_offset()]
119    }
120
121    pub fn set_current_token_kind(&mut self, kind: LuaTokenKind) {
122        if self.token_index < self.tokens.len() {
123            self.tokens[self.token_index].kind = kind;
124            self.current_token = kind;
125        }
126    }
127
128    pub fn bump(&mut self) {
129        if !is_invalid_kind(self.current_token) && self.token_index < self.tokens.len() {
130            let token = &self.tokens[self.token_index];
131            self.events.push(MarkEvent::EatToken {
132                kind: token.kind,
133                range: token.range,
134            });
135        }
136
137        let mut next_index = self.token_index + 1;
138        self.skip_trivia(&mut next_index);
139        self.parse_trivia_tokens(next_index);
140        self.token_index = next_index;
141
142        if self.token_index >= self.tokens.len() {
143            self.current_token = LuaTokenKind::TkEof;
144            return;
145        }
146
147        self.current_token = self.tokens[self.token_index].kind;
148    }
149
150    pub fn peek_next_token(&self) -> LuaTokenKind {
151        let mut next_index = self.token_index + 1;
152        self.skip_trivia(&mut next_index);
153
154        if next_index >= self.tokens.len() {
155            LuaTokenKind::None
156        } else {
157            self.tokens[next_index].kind
158        }
159    }
160
161    fn skip_trivia(&self, index: &mut usize) {
162        if index >= &mut self.tokens.len() {
163            return;
164        }
165
166        let mut kind = self.tokens[*index].kind;
167        while is_trivia_kind(kind) {
168            *index += 1;
169            if *index >= self.tokens.len() {
170                break;
171            }
172            kind = self.tokens[*index].kind;
173        }
174    }
175
176    // Analyze consecutive whitespace/comments
177    // At this point, comments may be in the wrong parent node, adjustments will be made in the subsequent treeBuilder
178    fn parse_trivia_tokens(&mut self, next_index: usize) {
179        let mut line_count = 0;
180        let start = self.token_index;
181        let mut doc_tokens: Vec<LuaTokenData> = Vec::new();
182        for i in start..next_index {
183            let token = &self.tokens[i];
184            match token.kind {
185                LuaTokenKind::TkShortComment | LuaTokenKind::TkLongComment => {
186                    line_count = 0;
187                    doc_tokens.push(*token);
188                }
189                LuaTokenKind::TkEndOfLine => {
190                    line_count += 1;
191
192                    if doc_tokens.is_empty() {
193                        self.events.push(MarkEvent::EatToken {
194                            kind: token.kind,
195                            range: token.range,
196                        });
197                    } else {
198                        doc_tokens.push(*token);
199                    }
200
201                    // If there are two EOFs after the comment, the previous comment is considered a group of comments
202                    if line_count > 1 && !doc_tokens.is_empty() {
203                        self.parse_comments(&doc_tokens);
204                        doc_tokens.clear();
205                    }
206                    // check if the comment is an inline comment
207                    // first is comment, second is endofline
208                    else if doc_tokens.len() == 2 && i >= 2 {
209                        let mut temp_index = i as isize - 2;
210                        let mut inline_comment = false;
211                        while temp_index >= 0 {
212                            let kind = self.tokens[temp_index as usize].kind;
213                            match kind {
214                                LuaTokenKind::TkEndOfLine => {
215                                    break;
216                                }
217                                LuaTokenKind::TkWhitespace => {
218                                    temp_index -= 1;
219                                    continue;
220                                }
221                                _ => {
222                                    inline_comment = true;
223                                    break;
224                                }
225                            }
226                        }
227
228                        if inline_comment {
229                            self.parse_comments(&doc_tokens);
230                            doc_tokens.clear();
231                        }
232                    }
233                }
234                LuaTokenKind::TkShebang | LuaTokenKind::TkWhitespace => {
235                    if doc_tokens.is_empty() {
236                        self.events.push(MarkEvent::EatToken {
237                            kind: token.kind,
238                            range: token.range,
239                        });
240                    } else {
241                        doc_tokens.push(*token);
242                    }
243                }
244                _ => {
245                    if !doc_tokens.is_empty() {
246                        self.parse_comments(&doc_tokens);
247                        doc_tokens.clear();
248                    }
249                }
250            }
251        }
252
253        if !doc_tokens.is_empty() {
254            self.parse_comments(&doc_tokens);
255        }
256    }
257
258    fn parse_comments(&mut self, comment_tokens: &Vec<LuaTokenData>) {
259        let mut trivia_token_start = comment_tokens.len();
260        // Reverse iterate over comment_tokens, removing whitespace and end-of-line tokens
261        for i in (0..comment_tokens.len()).rev() {
262            if matches!(
263                comment_tokens[i].kind,
264                LuaTokenKind::TkWhitespace | LuaTokenKind::TkEndOfLine
265            ) {
266                trivia_token_start = i;
267            } else {
268                break;
269            }
270        }
271
272        let tokens = &comment_tokens[..trivia_token_start];
273        LuaDocParser::parse(self, tokens);
274
275        for i in trivia_token_start..comment_tokens.len() {
276            let token = &comment_tokens[i];
277            self.events.push(MarkEvent::EatToken {
278                kind: token.kind,
279                range: token.range,
280            });
281        }
282    }
283
284    pub fn push_error(&mut self, err: LuaParseError) {
285        self.errors.push(err);
286    }
287
288    pub fn has_error(&self) -> bool {
289        !self.errors.is_empty()
290    }
291
292    pub fn get_errors(&self) -> Vec<LuaParseError> {
293        self.errors.clone()
294    }
295}
296
297fn is_trivia_kind(kind: LuaTokenKind) -> bool {
298    matches!(
299        kind,
300        LuaTokenKind::TkShortComment
301            | LuaTokenKind::TkLongComment
302            | LuaTokenKind::TkEndOfLine
303            | LuaTokenKind::TkWhitespace
304            | LuaTokenKind::TkShebang
305    )
306}
307
308fn is_invalid_kind(kind: LuaTokenKind) -> bool {
309    matches!(
310        kind,
311        LuaTokenKind::None
312            | LuaTokenKind::TkEof
313            | LuaTokenKind::TkWhitespace
314            | LuaTokenKind::TkShebang
315            | LuaTokenKind::TkEndOfLine
316            | LuaTokenKind::TkShortComment
317            | LuaTokenKind::TkLongComment
318    )
319}
320
321#[cfg(test)]
322mod tests {
323    use crate::{
324        LuaParser, kind::LuaTokenKind, lexer::LuaLexer, parser::ParserConfig,
325        parser_error::LuaParseError,
326    };
327
328    #[allow(unused)]
329    fn new_parser<'a>(
330        text: &'a str,
331        config: ParserConfig<'a>,
332        errors: &'a mut Vec<LuaParseError>,
333        show_tokens: bool,
334    ) -> LuaParser<'a> {
335        let tokens = {
336            let mut lexer = LuaLexer::new(text, config.lexer_config(), errors);
337            lexer.tokenize()
338        };
339
340        if show_tokens {
341            println!("tokens: ");
342            for t in &tokens {
343                println!("{:?}", t);
344            }
345        }
346
347        let mut parser = LuaParser {
348            text,
349            events: Vec::new(),
350            tokens,
351            token_index: 0,
352            current_token: LuaTokenKind::None,
353            parse_config: config,
354            mark_level: 0,
355            errors,
356        };
357        parser.init();
358
359        parser
360    }
361
362    #[test]
363    fn test_parse_and_ast() {
364        let lua_code = r#"
365            function foo(a, b)
366                return a + b
367            end
368        "#;
369
370        let tree = LuaParser::parse(lua_code, ParserConfig::default());
371        println!("{:#?}", tree.get_red_root());
372    }
373
374    #[test]
375    fn test_parse_and_ast_with_error() {
376        let lua_code = r#"
377            function foo(a, b)
378                return a + b
379        "#;
380
381        let tree = LuaParser::parse(lua_code, ParserConfig::default());
382        println!("{:#?}", tree.get_red_root());
383    }
384
385    #[test]
386    fn test_parse_comment() {
387        let lua_code = r#"
388            -- comment
389            local t
390            -- inline comment
391        "#;
392
393        let tree = LuaParser::parse(lua_code, ParserConfig::default());
394        println!("{:#?}", tree.get_red_root());
395    }
396
397    #[test]
398    fn test_parse_empty_file() {
399        let lua_code = r#""#;
400
401        let tree = LuaParser::parse(lua_code, ParserConfig::default());
402        println!("{:#?}", tree.get_red_root());
403    }
404}