luna_lib/luna_impl/
lexer.rs

1use std::{
2    error::Error,
3    fmt::Display,
4    iter::Peekable,
5    num::{ParseFloatError, ParseIntError},
6    str::Chars,
7};
8
9use crate::lang::tokens::Token;
10
11use super::position::{Located, Position};
12
13#[derive(Debug, Clone)]
14pub struct Lexer<'source> {
15    source: Peekable<Chars<'source>>,
16    ln: usize,
17    col: usize,
18}
19#[derive(Debug, Clone, PartialEq)]
20pub enum LexError {
21    ExpectedEscape,
22    ExpectedCharacter,
23    UnclosedCharacter,
24    UnclosedString,
25    BadCharacter(char),
26    ParseIntError(ParseIntError),
27    ParseFloatError(ParseFloatError),
28}
29impl<'source> Lexer<'source> {
30    pub fn advance(&mut self) {
31        if self.source.peek() == Some(&'\n') {
32            self.ln += 1;
33            self.col = 0;
34        } else {
35            self.col += 1;
36        }
37    }
38    pub fn pos(&self) -> Position {
39        Position::single(self.ln, self.col)
40    }
41    pub fn lex(&mut self) -> Result<Vec<Located<Token>>, Located<LexError>> {
42        let mut tokens = vec![];
43        for res in self.by_ref() {
44            tokens.push(res?);
45        }
46        Ok(tokens)
47    }
48}
49impl<'source> Iterator for Lexer<'source> {
50    type Item = Result<Located<Token>, Located<LexError>>;
51    fn next(&mut self) -> Option<Self::Item> {
52        while self
53            .source
54            .peek()
55            .map(|c| c.is_ascii_whitespace())
56            .unwrap_or_default()
57        {
58            self.advance();
59            self.source.next();
60        }
61        while self.source.peek() == Some(&'#') {
62            self.advance();
63            self.source.next()?;
64            while self.source.peek().map(|c| *c != '\n').unwrap_or_default() {
65                self.advance();
66                self.source.next();
67            }
68            self.advance();
69            self.source.next();
70            while self
71                .source
72                .peek()
73                .map(|c| c.is_ascii_whitespace())
74                .unwrap_or_default()
75            {
76                self.advance();
77                self.source.next();
78            }
79        }
80        let mut pos = self.pos();
81        self.advance();
82        match self.source.next()? {
83            '=' => {
84                if self.source.peek() == Some(&'=') {
85                    self.source.next();
86                    pos.extend(&self.pos());
87                    self.advance();
88                    Some(Ok(Located::new(Token::EqualEqual, pos)))
89                } else if self.source.peek() == Some(&'>') {
90                    self.source.next();
91                    pos.extend(&self.pos());
92                    self.advance();
93                    Some(Ok(Located::new(Token::EqualArrow, pos)))
94                } else {
95                    Some(Ok(Located::new(Token::Equal, pos)))
96                }
97            }
98            ',' => Some(Ok(Located::new(Token::Comma, pos))),
99            '.' => Some(Ok(Located::new(Token::Dot, pos))),
100            ':' => Some(Ok(Located::new(Token::Colon, pos))),
101            '!' => {
102                if self.source.peek() == Some(&'=') {
103                    self.source.next();
104                    pos.extend(&self.pos());
105                    self.advance();
106                    Some(Ok(Located::new(Token::ExclamationEqual, pos)))
107                } else {
108                    Some(Ok(Located::new(Token::Exclamation, pos)))
109                }
110            }
111            '(' => Some(Ok(Located::new(Token::ParanLeft, pos))),
112            ')' => Some(Ok(Located::new(Token::ParanRight, pos))),
113            '[' => Some(Ok(Located::new(Token::BracketLeft, pos))),
114            ']' => Some(Ok(Located::new(Token::BracketRight, pos))),
115            '{' => Some(Ok(Located::new(Token::BraceLeft, pos))),
116            '}' => Some(Ok(Located::new(Token::BraceRight, pos))),
117            '+' => {
118                if self.source.peek() == Some(&'=') {
119                    self.source.next();
120                    pos.extend(&self.pos());
121                    self.advance();
122                    Some(Ok(Located::new(Token::PlusEqual, pos)))
123                } else {
124                    Some(Ok(Located::new(Token::Plus, pos)))
125                }
126            }
127            '-' => {
128                if self.source.peek() == Some(&'=') {
129                    self.source.next();
130                    pos.extend(&self.pos());
131                    self.advance();
132                    Some(Ok(Located::new(Token::MinusEqual, pos)))
133                } else {
134                    Some(Ok(Located::new(Token::Minus, pos)))
135                }
136            }
137            '*' => {
138                if self.source.peek() == Some(&'=') {
139                    self.source.next();
140                    pos.extend(&self.pos());
141                    self.advance();
142                    Some(Ok(Located::new(Token::StarEqual, pos)))
143                } else {
144                    Some(Ok(Located::new(Token::Star, pos)))
145                }
146            }
147            '/' => {
148                if self.source.peek() == Some(&'=') {
149                    self.source.next();
150                    pos.extend(&self.pos());
151                    self.advance();
152                    Some(Ok(Located::new(Token::SlashEqual, pos)))
153                } else {
154                    Some(Ok(Located::new(Token::Slash, pos)))
155                }
156            }
157            '%' => {
158                if self.source.peek() == Some(&'=') {
159                    self.source.next();
160                    pos.extend(&self.pos());
161                    self.advance();
162                    Some(Ok(Located::new(Token::PercentEqual, pos)))
163                } else {
164                    Some(Ok(Located::new(Token::Percent, pos)))
165                }
166            }
167            '^' => {
168                if self.source.peek() == Some(&'=') {
169                    self.source.next();
170                    pos.extend(&self.pos());
171                    self.advance();
172                    Some(Ok(Located::new(Token::ExponentEqual, pos)))
173                } else {
174                    Some(Ok(Located::new(Token::Exponent, pos)))
175                }
176            }
177            '<' => {
178                if self.source.peek() == Some(&'=') {
179                    self.source.next();
180                    pos.extend(&self.pos());
181                    self.advance();
182                    Some(Ok(Located::new(Token::LessEqual, pos)))
183                } else {
184                    Some(Ok(Located::new(Token::Less, pos)))
185                }
186            }
187            '>' => {
188                if self.source.peek() == Some(&'=') {
189                    self.source.next();
190                    pos.extend(&self.pos());
191                    self.advance();
192                    Some(Ok(Located::new(Token::GreaterEqual, pos)))
193                } else {
194                    Some(Ok(Located::new(Token::Greater, pos)))
195                }
196            }
197            '&' => Some(Ok(Located::new(Token::Ampersand, pos))),
198            '|' => Some(Ok(Located::new(Token::Pipe, pos))),
199            '\'' => {
200                let c = match self
201                    .source
202                    .next()
203                    .ok_or(LexError::ExpectedCharacter)
204                    .map_err(|err| Located::new(err, self.pos()))
205                {
206                    Ok(c) => match c {
207                        '\\' => {
208                            self.advance();
209                            let c = match self.source.peek() {
210                                Some('n') => '\n',
211                                Some('t') => '\t',
212                                Some('r') => '\r',
213                                Some('0') => '\0',
214                                Some(c) => *c,
215                                None => {
216                                    return Some(Err(Located::new(
217                                        LexError::ExpectedEscape,
218                                        self.pos(),
219                                    )))
220                                }
221                            };
222                            self.source.next();
223                            c
224                        }
225                        c => c,
226                    },
227                    Err(err) => return Some(Err(err)),
228                };
229                self.advance();
230                if self.source.next_if(|c| *c == '\'').is_none() {
231                    return Some(Err(Located::new(LexError::UnclosedCharacter, pos)));
232                }
233                Some(Ok(Located::new(Token::Char(c), pos)))
234            }
235            '"' => {
236                let mut string = String::new();
237                while let Some(c) = self.source.peek() {
238                    if *c == '"' {
239                        break;
240                    }
241                    string.push(match *c {
242                        '\\' => {
243                            self.source.next()?;
244                            self.advance();
245                            match self.source.peek() {
246                                Some('n') => '\n',
247                                Some('t') => '\t',
248                                Some('r') => '\r',
249                                Some('0') => '\0',
250                                Some(c) => *c,
251                                None => {
252                                    return Some(Err(Located::new(
253                                        LexError::ExpectedEscape,
254                                        self.pos(),
255                                    )))
256                                }
257                            }
258                        }
259                        c => c,
260                    });
261                    pos.extend(&self.pos());
262                    self.advance();
263                    self.source.next();
264                }
265                if self.source.next_if(|c| *c == '"').is_none() {
266                    return Some(Err(Located::new(LexError::UnclosedString, pos)));
267                }
268                Some(Ok(Located::new(Token::String(string), pos)))
269            }
270            c if c.is_ascii_digit() => {
271                let mut number = String::from(c);
272                while let Some(c) = self.source.peek() {
273                    if c == &'_' {
274                        pos.extend(&self.pos());
275                        self.advance();
276                        self.source.next();
277                        continue;
278                    }
279                    if !c.is_ascii_digit() {
280                        break;
281                    }
282                    number.push(*c);
283                    pos.extend(&self.pos());
284                    self.advance();
285                    self.source.next();
286                }
287                if self.source.next_if(|c| *c == '.').is_some() {
288                    number.push('.');
289                    pos.extend(&self.pos());
290                    self.advance();
291                    while let Some(c) = self.source.peek() {
292                        if c == &'_' {
293                            pos.extend(&self.pos());
294                            self.advance();
295                            self.source.next();
296                            continue;
297                        }
298                        if !c.is_ascii_digit() {
299                            break;
300                        }
301                        number.push(*c);
302                        pos.extend(&self.pos());
303                        self.advance();
304                        self.source.next();
305                    }
306                    Some(Ok(Located::new(
307                        Token::Float(
308                            match number
309                                .parse()
310                                .map_err(LexError::ParseFloatError)
311                                .map_err(|err| Located::new(err, pos.clone()))
312                            {
313                                Ok(number) => number,
314                                Err(err) => return Some(Err(err)),
315                            },
316                        ),
317                        pos,
318                    )))
319                } else if self.source.next_if(|c| *c == 'b').is_some() && number.as_str() == "0" {
320                    number.clear();
321                    pos.extend(&self.pos());
322                    self.advance();
323                    while let Some(c) = self.source.peek() {
324                        if c == &'_' {
325                            pos.extend(&self.pos());
326                            self.advance();
327                            self.source.next();
328                            continue;
329                        }
330                        if !c.is_digit(2) {
331                            break;
332                        }
333                        number.push(*c);
334                        pos.extend(&self.pos());
335                        self.advance();
336                        self.source.next();
337                    }
338                    Some(Ok(Located::new(
339                        Token::Int(
340                            match i64::from_str_radix(&number, 2)
341                                .map_err(LexError::ParseIntError)
342                                .map_err(|err| Located::new(err, pos.clone()))
343                            {
344                                Ok(number) => number,
345                                Err(err) => return Some(Err(err)),
346                            },
347                        ),
348                        pos,
349                    )))
350                } else if self.source.next_if(|c| *c == 'x').is_some() && number.as_str() == "0" {
351                    number.clear();
352                    pos.extend(&self.pos());
353                    self.advance();
354                    while let Some(c) = self.source.peek() {
355                        if c == &'_' {
356                            pos.extend(&self.pos());
357                            self.advance();
358                            self.source.next();
359                            continue;
360                        }
361                        if !c.is_ascii_hexdigit() {
362                            break;
363                        }
364                        number.push(*c);
365                        pos.extend(&self.pos());
366                        self.advance();
367                        self.source.next();
368                    }
369                    Some(Ok(Located::new(
370                        Token::Int(
371                            match i64::from_str_radix(&number, 16)
372                                .map_err(LexError::ParseIntError)
373                                .map_err(|err| Located::new(err, pos.clone()))
374                            {
375                                Ok(number) => number,
376                                Err(err) => return Some(Err(err)),
377                            },
378                        ),
379                        pos,
380                    )))
381                } else {
382                    Some(Ok(Located::new(
383                        Token::Int(
384                            match number
385                                .parse()
386                                .map_err(LexError::ParseIntError)
387                                .map_err(|err| Located::new(err, pos.clone()))
388                            {
389                                Ok(number) => number,
390                                Err(err) => return Some(Err(err)),
391                            },
392                        ),
393                        pos,
394                    )))
395                }
396            }
397            c if c.is_ascii_alphanumeric() || c == '_' => {
398                let mut ident = String::from(c);
399                while let Some(c) = self.source.peek() {
400                    if !c.is_ascii_alphanumeric() && *c != '_' {
401                        break;
402                    }
403                    ident.push(*c);
404                    pos.extend(&self.pos());
405                    self.advance();
406                    self.source.next();
407                }
408                Some(Ok(Located::new(Token::ident(ident), pos)))
409            }
410            c => Some(Err(Located::new(LexError::BadCharacter(c), pos))),
411        }
412    }
413}
414impl<'source> From<&'source str> for Lexer<'source> {
415    fn from(value: &'source str) -> Self {
416        Self {
417            source: value.chars().peekable(),
418            ln: 0,
419            col: 0,
420        }
421    }
422}
423impl Display for LexError {
424    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
425        match self {
426            LexError::ExpectedEscape => write!(f, "expected escape character"),
427            LexError::ExpectedCharacter => write!(f, "expected a character"),
428            LexError::UnclosedCharacter => write!(f, "unclosed character"),
429            LexError::UnclosedString => write!(f, "unclosed string"),
430            LexError::BadCharacter(c) => write!(f, "bad character {c:?}"),
431            LexError::ParseIntError(err) => write!(f, "error while parsing to int: {err}"),
432            LexError::ParseFloatError(err) => write!(f, "error while parsing to float: {err}"),
433        }
434    }
435}
436impl Error for LexError {}