Skip to main content

luaparse_rs/parser/
mod.rs

1use alloc::vec::Vec;
2
3use crate::{
4    ast::*, lexer::Token, marker::LuaVersion,
5    LexError,
6    ParseError,
7    Span,
8};
9use std::marker::PhantomData;
10
11pub mod helpers;
12pub mod types;
13pub mod stmt;
14pub mod expr;
15
16/// The main parser. Feed it Lua source code and get back an [`Ast`].
17///
18/// The type parameter `V` selects which Lua version's grammar to use.
19/// Pick one of [`Luau`](crate::Luau), [`Lua51`](crate::Lua51),
20/// [`Lua52`](crate::Lua52), [`Lua53`](crate::Lua53), or [`Lua54`](crate::Lua54).
21///
22/// # Example
23///
24/// ```rust
25/// use luaparse_rs::{Parser, Luau};
26///
27/// let parser = Parser::<Luau>::new("local x = 1").unwrap();
28/// let ast = parser.parse().unwrap();
29/// ```
30pub struct Parser<'src, V: LuaVersion> {
31    tokens: Vec<(Token, Span)>,
32    position: usize,
33    source: &'src str,
34    comments: Vec<Comment>,
35    _version: PhantomData<V>,
36}
37
38impl<'src, V: LuaVersion> Parser<'src, V> {
39    /// Creates a new parser from a source string.
40    ///
41    /// This tokenizes the input immediately. If the source contains invalid
42    /// tokens (like an unterminated string), you'll get a [`LexError`] here.
43    pub fn new(source: &'src str) -> Result<Self, LexError> {
44        let tokens = crate::lexer::lex_for_version::<V>(source)?;
45        Ok(Self {
46            tokens,
47            position: 0,
48            source,
49            comments: Vec::new(),
50            _version: PhantomData,
51        })
52    }
53    
54    /// Parses the source and returns the full syntax tree.
55    ///
56    /// This consumes the parser. If you need type declarations (Luau),
57    /// use [`parse_with_types`](Self::parse_with_types) instead.
58    pub fn parse(mut self) -> Result<Ast, ParseError> {
59        let start = 0;
60        let mut statements = Vec::new();
61        
62        while !self.is_eof() {
63            if let Token::Comment(content) = self.current() {
64                let span = self.current_span();
65                let is_block = content.contains('\n');
66                self.comments.push(Comment::new(content.clone(), is_block, span));
67                self.advance();
68                continue;
69            }
70            
71            while matches!(self.current(), Token::Semi) {
72                self.advance();
73                self.skip_comments();
74            }
75            
76            if self.is_eof() {
77                break;
78            }
79            
80            statements.push(self.parse_statement()?);
81            
82            // return/break/continue must be the last statement in a block
83            if matches!(
84                statements.last().map(|s| &s.kind),
85                Some(StmtKind::ReturnStatement(_))
86                    | Some(StmtKind::BreakStatement)
87                    | Some(StmtKind::ContinueStatement)
88            ) {
89                while matches!(self.current(), Token::Semi) {
90                    self.advance();
91                }
92                break;
93            }
94            
95            if matches!(self.current(), Token::Semi) {
96                self.advance();
97            }
98        }
99        
100        // skip trailing comments
101        self.skip_comments();
102        
103        // reject any trailing tokens after the block
104        if !self.is_eof() {
105            return Err(ParseError::InvalidSyntax {
106                message: "unexpected statement after return".to_string(),
107                span: self.current_span(),
108                help: Some("return must be the last statement in a block".to_string()),
109            });
110        }
111        
112        let end = self.source.len();
113        let block = Block::new(statements, start..end);
114        
115        Ok(Ast::new(block, self.comments))
116    }
117    
118    /// Parses the source and returns the syntax tree along with type declarations.
119    ///
120    /// This is the Luau variant of [`parse`](Self::parse). It pulls `type` and
121    /// `export type` declarations into a separate list so you can work with them
122    /// independently from the rest of the code.
123    pub fn parse_with_types(mut self) -> Result<AstWithTypes, ParseError> {
124        let mut type_declarations = Vec::new();
125        let start = 0;
126        let mut statements = Vec::new();
127        
128        while !self.is_eof() {
129            if let Token::Comment(content) = self.current() {
130                let span = self.current_span();
131                let is_block = content.contains('\n');
132                self.comments.push(Comment::new(content.clone(), is_block, span));
133                self.advance();
134                continue;
135            }
136            
137            // handle export type
138            if matches!(self.current(), Token::Export) && V::HAS_EXPORT {
139                let checkpoint = self.checkpoint();
140                self.advance();
141                
142                if matches!(self.current(), Token::Type) && V::HAS_TYPE_ANNOTATIONS {
143                    self.advance();
144                    let mut type_decl = self.parse_type_declaration_full()?;
145                    type_decl.exported = true;
146                    type_declarations.push(type_decl);
147                    continue;
148                } else {
149                    self.restore(checkpoint);
150                }
151            }
152            
153            // check for type dec.
154            if matches!(self.current(), Token::Type) && V::HAS_TYPE_ANNOTATIONS {
155                self.advance();
156                let type_decl = self.parse_type_declaration_full()?;
157                type_declarations.push(type_decl);
158                continue;
159            }
160            
161            statements.push(self.parse_statement()?);
162            
163            // return/break/continue must be the last statement in a block
164            if matches!(
165                statements.last().map(|s| &s.kind),
166                Some(StmtKind::ReturnStatement(_))
167                    | Some(StmtKind::BreakStatement)
168                    | Some(StmtKind::ContinueStatement)
169            ) {
170                while matches!(self.current(), Token::Semi) {
171                    self.advance();
172                }
173                break;
174            }
175        }
176        
177        // skip trailing comments
178        self.skip_comments();
179        
180        // reject any trailing tokens after the block
181        if !self.is_eof() {
182            return Err(ParseError::InvalidSyntax {
183                message: "unexpected statement after return".to_string(),
184                span: self.current_span(),
185                help: Some("return must be the last statement in a block".to_string()),
186            });
187        }
188        
189        let end = self.source.len();
190        let block = Block::new(statements, start..end);
191        let ast = Ast::new(block, self.comments);
192        
193        Ok(AstWithTypes::new(ast, type_declarations))
194    }
195    
196    fn current(&self) -> &Token {
197        self.tokens
198            .get(self.position)
199            .map(|(t, _)| t)
200            .unwrap_or(&Token::Eof)
201    }
202    
203    fn current_span(&self) -> Span {
204        self.tokens
205            .get(self.position)
206            .map(|(_, s)| s.clone())
207            .unwrap_or_else(|| {
208                let pos = self.source.len();
209                pos..pos
210            })
211    }
212    
213    fn peek(&self, n: usize) -> &Token {
214        self.tokens
215            .get(self.position + n)
216            .map(|(t, _)| t)
217            .unwrap_or(&Token::Eof)
218    }
219    
220    fn advance(&mut self) -> (Token, Span) {
221        let result = self.tokens
222            .get(self.position)
223            .cloned()
224            .unwrap_or_else(|| {
225                let pos = self.source.len();
226                (Token::Eof, pos..pos)
227            });
228        self.position += 1;
229        result
230    }
231    
232    pub(super) fn split_greater_eq(&mut self) {
233        if matches!(self.current(), Token::GreaterEq) {
234            let span = self.current_span();
235            self.tokens[self.position] = (Token::Greater, span.clone());
236            self.tokens.insert(self.position + 1, (Token::Eq, span));
237        }
238    }
239    
240    pub(super) fn split_right_shift(&mut self) {
241        if matches!(self.current(), Token::RightShift) {
242            let span = self.current_span();
243            let mid = span.start + 1;
244            self.tokens[self.position] = (Token::Greater, span.start..mid);
245            self.tokens.insert(self.position + 1, (Token::Greater, mid..span.end));
246        }
247    }
248
249    fn is_eof(&self) -> bool {
250        matches!(self.current(), Token::Eof)
251    }
252    
253    fn expect(&mut self, expected: Token) -> Result<Span, ParseError> {
254        if self.current() == &expected {
255            let (_, span) = self.advance();
256            Ok(span)
257        } else {
258            Err(ParseError::UnexpectedToken {
259                expected: vec![format!("{:?}", expected)],
260                found: format!("{:?}", self.current()),
261                span: self.current_span(),
262            })
263        }
264    }
265    
266    fn consume(&mut self, token: Token) -> bool {
267        if self.current() == &token {
268            self.advance();
269            true
270        } else {
271            false
272        }
273    }
274    
275    fn checkpoint(&self) -> usize {
276        self.position
277    }
278    
279    fn restore(&mut self, checkpoint: usize) {
280        self.position = checkpoint;
281    }
282    
283    fn try_parse<T, F>(&mut self, f: F) -> Option<T>
284    where
285        F: FnOnce(&mut Self) -> Result<T, ParseError>,
286    {
287        let checkpoint = self.checkpoint();
288        match f(self) {
289            Ok(value) => Some(value),
290            Err(_) => {
291                self.restore(checkpoint);
292                None
293            }
294        }
295    }
296    
297    fn skip_comments(&mut self) {
298        while matches!(self.current(), Token::Comment(_)) {
299            let (token, span) = self.advance();
300            if let Token::Comment(content) = token {
301                let is_block = content.contains('\n');
302                self.comments.push(Comment::new(content, is_block, span));
303            }
304        }
305    }
306    
307    fn parse_statement(&mut self) -> Result<Stmt, ParseError> {
308        self.skip_comments();
309        stmt::parse_statement(self)
310    }
311    
312    fn parse_expression(&mut self) -> Result<Expr, ParseError> {
313        self.skip_comments();
314        expr::parse_expression(self)
315    }
316}