rawk_core/
lexer.rs

1use crate::token::{Token, TokenKind};
2
3pub struct Lexer<'a> {
4    input: &'a str,
5    position: usize,
6    read_position: usize,
7    ch: Option<u8>,
8}
9
10impl<'a> Lexer<'a> {
11    pub fn new(src: &'a str) -> Self {
12        let mut lexer = Lexer {
13            input: src,
14            position: 0,
15            read_position: 0,
16            ch: None,
17        };
18
19        lexer.read_char();
20        lexer
21    }
22
23    pub fn next_token(&mut self) -> Token {
24        let token = match self.ch {
25            Some(b'{') => Token {
26                kind: TokenKind::LeftCurlyBrace,
27                literal: "{",
28            },
29            Some(b'}') => Token {
30                kind: TokenKind::RightCurlyBrace,
31                literal: "}",
32            },
33            Some(b'(') => Token {
34                kind: TokenKind::LeftParen,
35                literal: "(",
36            },
37            Some(b')') => Token {
38                kind: TokenKind::RightParen,
39                literal: ")",
40            },
41            Some(b'[') => Token {
42                kind: TokenKind::LeftSquareBracket,
43                literal: "[",
44            },
45            Some(b']') => Token {
46                kind: TokenKind::RightSquareBracket,
47                literal: "]",
48            },
49            Some(b',') => Token {
50                kind: TokenKind::Comma,
51                literal: ",",
52            },
53            Some(b';') => Token {
54                kind: TokenKind::Semicolon,
55                literal: ";",
56            },
57            Some(b'\n') => Token {
58                kind: TokenKind::NewLine,
59                literal: "<newline>",
60            },
61            Some(b'+') => Token {
62                kind: TokenKind::Plus,
63                literal: "+",
64            },
65            Some(b'-') => Token {
66                kind: TokenKind::Minus,
67                literal: "-",
68            },
69            Some(b'*') => Token {
70                kind: TokenKind::Asterisk,
71                literal: "*",
72            },
73            Some(b'%') => Token {
74                kind: TokenKind::Percent,
75                literal: "%",
76            },
77            Some(b'^') => Token {
78                kind: TokenKind::Caret,
79                literal: "^",
80            },
81            Some(b'!') => Token {
82                kind: TokenKind::ExclamationMark,
83                literal: "!",
84            },
85            Some(b'>') => Token {
86                kind: TokenKind::GreaterThan,
87                literal: ">",
88            },
89            Some(b'<') => Token {
90                kind: TokenKind::LessThan,
91                literal: "<",
92            },
93            Some(b'|') => Token {
94                kind: TokenKind::Pipe,
95                literal: "|",
96            },
97            Some(b'?') => Token {
98                kind: TokenKind::QuestionMark,
99                literal: "?",
100            },
101            Some(b':') => Token {
102                kind: TokenKind::Colon,
103                literal: ":",
104            },
105            Some(b'~') => Token {
106                kind: TokenKind::Tilde,
107                literal: "~",
108            },
109            Some(b'$') => Token {
110                kind: TokenKind::DollarSign,
111                literal: "$",
112            },
113            Some(b'=') => Token {
114                kind: TokenKind::Equal,
115                literal: "=",
116            },
117            None => Token {
118                kind: TokenKind::Eof,
119                literal: "",
120            },
121            _ => Token {
122                kind: TokenKind::Illegal,
123                literal: "<illegal>",
124            },
125        };
126
127        self.read_char();
128        token
129    }
130
131    fn read_char(&mut self) {
132        if self.read_position >= self.input.len() {
133            self.ch = None;
134        } else {
135            self.ch = Some(self.input.as_bytes()[self.read_position]);
136        }
137        self.position = self.read_position;
138        self.read_position += 1;
139    }
140}
141
142#[cfg(test)]
143mod tests {
144    use super::*;
145
146    #[test]
147    fn next_left_curly_brace_token() {
148        let expected = Token {
149            kind: TokenKind::LeftCurlyBrace,
150            literal: "{",
151        };
152        let input = "{";
153        let mut lexer = Lexer::new(input);
154
155        let token = lexer.next_token();
156
157        assert_eq!(expected, token);
158    }
159
160    #[test]
161    fn next_right_curly_brace_token() {
162        let expected = Token {
163            kind: TokenKind::RightCurlyBrace,
164            literal: "}",
165        };
166        let input = "}";
167        let mut lexer = Lexer::new(input);
168
169        let token = lexer.next_token();
170
171        assert_eq!(expected, token);
172    }
173
174    #[test]
175    fn next_one_character_token() {
176        let input = "{}()[],;\n+-*%^!><|?:~$=";
177        let mut lexer = Lexer::new(input);
178
179        let expected_tokens = vec![
180            Token {
181                kind: TokenKind::LeftCurlyBrace,
182                literal: "{",
183            },
184            Token {
185                kind: TokenKind::RightCurlyBrace,
186                literal: "}",
187            },
188            Token {
189                kind: TokenKind::LeftParen,
190                literal: "(",
191            },
192            Token {
193                kind: TokenKind::RightParen,
194                literal: ")",
195            },
196            Token {
197                kind: TokenKind::LeftSquareBracket,
198                literal: "[",
199            },
200            Token {
201                kind: TokenKind::RightSquareBracket,
202                literal: "]",
203            },
204            Token {
205                kind: TokenKind::Comma,
206                literal: ",",
207            },
208            Token {
209                kind: TokenKind::Semicolon,
210                literal: ";",
211            },
212            Token {
213                kind: TokenKind::NewLine,
214                literal: "<newline>",
215            },
216            Token {
217                kind: TokenKind::Plus,
218                literal: "+",
219            },
220            Token {
221                kind: TokenKind::Minus,
222                literal: "-",
223            },
224            Token {
225                kind: TokenKind::Asterisk,
226                literal: "*",
227            },
228            Token {
229                kind: TokenKind::Percent,
230                literal: "%",
231            },
232            Token {
233                kind: TokenKind::Caret,
234                literal: "^",
235            },
236            Token {
237                kind: TokenKind::ExclamationMark,
238                literal: "!",
239            },
240            Token {
241                kind: TokenKind::GreaterThan,
242                literal: ">",
243            },
244            Token {
245                kind: TokenKind::LessThan,
246                literal: "<",
247            },
248            Token {
249                kind: TokenKind::Pipe,
250                literal: "|",
251            },
252            Token {
253                kind: TokenKind::QuestionMark,
254                literal: "?",
255            },
256            Token {
257                kind: TokenKind::Colon,
258                literal: ":",
259            },
260            Token {
261                kind: TokenKind::Tilde,
262                literal: "~",
263            },
264            Token {
265                kind: TokenKind::DollarSign,
266                literal: "$",
267            },
268            Token {
269                kind: TokenKind::Equal,
270                literal: "=",
271            },
272            Token {
273                kind: TokenKind::Eof,
274                literal: "",
275            },
276        ];
277
278        for expected in expected_tokens {
279            let token = lexer.next_token();
280            assert_eq!(expected, token);
281        }
282    }
283}