loxcraft/syntax/
lexer.rs

1use std::num::ParseFloatError;
2
3use logos::Logos;
4
5use crate::error::{Error, ErrorS, SyntaxError};
6
7#[derive(Debug)]
8pub struct Lexer<'a> {
9    inner: logos::Lexer<'a, Token>,
10    pending: Option<(usize, Token, usize)>,
11}
12
13impl<'a> Lexer<'a> {
14    pub fn new(source: &'a str) -> Self {
15        Self { inner: Token::lexer(source), pending: None }
16    }
17}
18
19impl<'a> Iterator for Lexer<'a> {
20    type Item = Result<(usize, Token, usize), ErrorS>;
21
22    fn next(&mut self) -> Option<Self::Item> {
23        if let Some(token) = self.pending.take() {
24            return Some(Ok(token));
25        }
26
27        match self.inner.next()? {
28            Token::Error => {
29                let mut span = self.inner.span();
30
31                // Check for unterminated string.
32                if self.inner.slice().starts_with('"') {
33                    return Some(Err((Error::SyntaxError(SyntaxError::UnterminatedString), span)));
34                }
35
36                // Recover error.
37                while let Some(token) = self.inner.next() {
38                    let span_new = self.inner.span();
39                    if span.end == span_new.start {
40                        span.end = span_new.end;
41                    } else {
42                        self.pending = Some((span_new.start, token, span_new.end));
43                        break;
44                    }
45                }
46
47                Some(Err((
48                    Error::SyntaxError(SyntaxError::UnexpectedInput {
49                        token: self.inner.source()[span.start..span.end].to_string(),
50                    }),
51                    span,
52                )))
53            }
54            token => {
55                let span = self.inner.span();
56                Some(Ok((span.start, token, span.end)))
57            }
58        }
59    }
60}
61
62#[derive(Clone, Debug, Logos, PartialEq)]
63pub enum Token {
64    // Single-character tokens.
65    #[token("(")]
66    LtParen,
67    #[token(")")]
68    RtParen,
69    #[token("{")]
70    LtBrace,
71    #[token("}")]
72    RtBrace,
73    #[token(",")]
74    Comma,
75    #[token(".")]
76    Dot,
77    #[token("-")]
78    Minus,
79    #[token("+")]
80    Plus,
81    #[token(";")]
82    Semicolon,
83    #[token("/")]
84    Slash,
85    #[token("*")]
86    Asterisk,
87
88    // One or two character tokens.
89    #[token("!")]
90    Bang,
91    #[token("!=")]
92    BangEqual,
93    #[token("=")]
94    Equal,
95    #[token("==")]
96    EqualEqual,
97    #[token(">")]
98    Greater,
99    #[token(">=")]
100    GreaterEqual,
101    #[token("<")]
102    Less,
103    #[token("<=")]
104    LessEqual,
105
106    // Literals.
107    #[regex("[a-zA-Z_][a-zA-Z0-9_]*", lex_identifier)]
108    Identifier(String),
109    #[regex(r#""[^"]*""#, lex_string)]
110    String(String),
111    #[regex(r#"[0-9]+(\.[0-9]+)?"#, lex_number)]
112    Number(f64),
113
114    // Keywords.
115    #[token("and")]
116    And,
117    #[token("class")]
118    Class,
119    #[token("else")]
120    Else,
121    #[token("false")]
122    False,
123    #[token("for")]
124    For,
125    #[token("fun")]
126    Fun,
127    #[token("if")]
128    If,
129    #[token("nil")]
130    Nil,
131    #[token("or")]
132    Or,
133    #[token("print")]
134    Print,
135    #[token("return")]
136    Return,
137    #[token("super")]
138    Super,
139    #[token("this")]
140    This,
141    #[token("true")]
142    True,
143    #[token("var")]
144    Var,
145    #[token("while")]
146    While,
147
148    #[regex(r"//.*", logos::skip)]
149    #[regex(r"[ \r\n\t\f]+", logos::skip)]
150    #[error]
151    Error,
152}
153
154fn lex_number(lexer: &mut logos::Lexer<Token>) -> Result<f64, ParseFloatError> {
155    let slice = lexer.slice();
156    slice.parse::<f64>()
157}
158
159fn lex_string(lexer: &mut logos::Lexer<Token>) -> String {
160    let slice = lexer.slice();
161    slice[1..slice.len() - 1].to_string()
162}
163
164fn lex_identifier(lexer: &mut logos::Lexer<Token>) -> String {
165    let slice = lexer.slice();
166    slice.to_string()
167}
168
169#[cfg(test)]
170mod tests {
171    use pretty_assertions::assert_eq;
172
173    use super::*;
174
175    #[test]
176    fn lex_invalid_token() {
177        let exp = vec![
178            Err((
179                Error::SyntaxError(SyntaxError::UnexpectedInput { token: "@foo".to_string() }),
180                0..4,
181            )),
182            Ok((5, Token::Identifier("bar".to_string()), 8)),
183        ];
184        let got = Lexer::new("@foo bar").collect::<Vec<_>>();
185        assert_eq!(exp, got);
186    }
187
188    #[test]
189    fn lex_unterminated_string() {
190        let exp = vec![Err((Error::SyntaxError(SyntaxError::UnterminatedString), 0..5))];
191        let got = Lexer::new("\"\nfoo").collect::<Vec<_>>();
192        assert_eq!(exp, got);
193    }
194}