s1mple/
lexer.rs

1#[derive(Debug, Copy, Clone)]
2pub enum Value {
3    I64(i64),
4}
5
6#[derive(Debug, Copy, Clone)]
7pub enum Operator {
8    Add,
9}
10
11#[derive(Debug, Copy, Clone)]
12pub enum Keyword {
13    Fn,
14}
15
16#[derive(Debug, Copy, Clone)]
17pub enum Punctuation {
18    LeftParen,
19    RightParen,
20    LeftBrace,
21    RightBrace,
22    LeftBracket,
23    RightBracket,
24    ReturnArrow,
25}
26
27#[derive(Debug, Clone)]
28pub enum Token {
29    Keyword(Keyword),
30    Operator(Operator),
31    Literal(Value),
32    Ident(String),
33    Punctuation(Punctuation),
34    EOF,
35}
36
37pub struct Lexer {
38    code: Vec<String>,
39    idx: usize,
40}
41
42impl Lexer {
43    pub fn from(raw_code: &str) -> Self {
44        let code = raw_code
45            .split_ascii_whitespace()
46            .map(|s| s.to_owned())
47            .collect();
48        Lexer { code, idx: 0 }
49    }
50
51    pub fn peek(&self) -> String {
52        self.code[self.idx].clone()
53    }
54
55    pub fn get_token(&mut self) -> Token {
56        if self.idx >= self.code.len() {
57            return Token::EOF;
58        }
59
60        let raw_token = self.peek();
61        self.idx += 1;
62        let first_letter = raw_token.chars().next().unwrap();
63
64        // handle keywords or ident
65        if first_letter >= 'a' && first_letter <= 'z' || first_letter >= 'A' && first_letter <= 'Z'
66        {
67            if raw_token == "fn" {
68                Token::Keyword(Keyword::Fn)
69            } else {
70                Token::Ident(raw_token)
71            }
72        }
73        // handle literals
74        else if let Ok(val) = raw_token.parse::<i64>() {
75            Token::Literal(Value::I64(val))
76        } else {
77            // handle operators
78            if raw_token == "+" {
79                Token::Operator(Operator::Add)
80            } else if raw_token == "{" {
81                Token::Punctuation(Punctuation::LeftBrace)
82            } else if raw_token == "}" {
83                Token::Punctuation(Punctuation::RightBrace)
84            } else if raw_token == "(" {
85                Token::Punctuation(Punctuation::LeftParen)
86            } else if raw_token == ")" {
87                Token::Punctuation(Punctuation::RightParen)
88            } else if raw_token == "->" {
89                Token::Punctuation(Punctuation::ReturnArrow)
90            } else {
91                panic!("invalid token '{}'", raw_token)
92            }
93        }
94    }
95}
96
97pub fn lex(raw_code: &str) -> Vec<Token> {
98    let mut lexer = Lexer::from(raw_code);
99    let mut tok = lexer.get_token();
100    let mut tokens = Vec::with_capacity(lexer.code.len());
101    loop {
102        // println!("idx is {}", lexer.idx);
103        match tok {
104            Token::EOF => {
105                tokens.push(tok);
106                return tokens;
107            }
108            _ => {
109                tokens.push(tok);
110            }
111        }
112        tok = lexer.get_token();
113    }
114}