1use std::{iter::Peekable, str::Chars};
2
3use crate::{common::Pos, error::TokenizeError};
4
5#[derive(Debug, Clone, PartialEq, Eq)]
6pub enum Keyword {
7 Fn,
8 If,
9 Else,
10 While,
11 For,
12 In,
13 Continue,
14 Break,
15 Return,
16 True,
17 False,
18 Null,
19}
20
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub enum Operator {
23 Plus,
25 Minus,
27 Star,
29 Slash,
31 Percent,
33 Caret,
35 Exclamation,
37 Equal,
39 Unequal,
41 LessThan,
43 GreaterThan,
45 LessThanOrEqual,
47 GreaterThanOrEqual,
49 And,
51 Or,
53}
54
55#[derive(Debug, Clone, PartialEq)]
56pub enum TokenKind {
57 Keyword(Keyword),
58 Identifier(String),
59 Number(f64),
60 String(String),
61 Operator(Operator),
62 Comma,
64 LParen,
66 RParen,
68 LBrace,
70 RBrace,
72 LBracket,
74 RBracket,
76 Equal,
78 Dot,
80 Colon,
82 Newline,
83 Eof,
85}
86
87#[derive(Debug, Clone, PartialEq)]
88pub struct Token {
89 pub pos: Pos,
90 pub kind: TokenKind,
91}
92
93pub fn tokenize(s: &str) -> Result<Vec<Token>, TokenizeError> {
94 Tokenizer::new(s).tokenize()
95}
96
97struct Tokenizer<'a> {
98 chars: Peekable<Chars<'a>>,
99 idx: usize,
100}
101
102impl<'a> Tokenizer<'a> {
103 fn new(s: &'a str) -> Self {
104 Self {
105 chars: s.chars().peekable(),
106 idx: 0,
107 }
108 }
109
110 fn next(&mut self) -> Option<char> {
111 self.idx += 1;
112 self.chars.next()
113 }
114
115 fn peek(&mut self) -> Option<&char> {
116 self.chars.peek()
117 }
118
119 fn tokenize(&mut self) -> Result<Vec<Token>, TokenizeError> {
120 let mut tokens = vec![];
121
122 while let Some(c) = self.next() {
123 let token_start_pos = self.idx - 1;
124 let token_kind = match (c, self.peek()) {
125 ('/', Some('/')) => {
126 self.next();
127 while let Some(c) = self.peek() {
128 if *c == '\n' {
129 break;
130 }
131 self.next();
132 }
133 continue;
134 }
135 ('!', Some('=')) => {
136 self.next();
137 TokenKind::Operator(Operator::Unequal)
138 }
139 ('=', Some('=')) => {
140 self.next();
141 TokenKind::Operator(Operator::Equal)
142 }
143 ('<', Some('=')) => {
144 self.next();
145 TokenKind::Operator(Operator::LessThanOrEqual)
146 }
147 ('>', Some('=')) => {
148 self.next();
149 TokenKind::Operator(Operator::GreaterThanOrEqual)
150 }
151 ('&', Some('&')) => {
152 self.next();
153 TokenKind::Operator(Operator::And)
154 }
155 ('|', Some('|')) => {
156 self.next();
157 TokenKind::Operator(Operator::Or)
158 }
159 ('!', _) => TokenKind::Operator(Operator::Exclamation),
160 ('<', _) => TokenKind::Operator(Operator::LessThan),
161 ('>', _) => TokenKind::Operator(Operator::GreaterThan),
162 ('+', _) => TokenKind::Operator(Operator::Plus),
163 ('-', _) => TokenKind::Operator(Operator::Minus),
164 ('*', _) => TokenKind::Operator(Operator::Star),
165 ('/', _) => TokenKind::Operator(Operator::Slash),
166 ('^', _) => TokenKind::Operator(Operator::Caret),
167 ('%', _) => TokenKind::Operator(Operator::Percent),
168 (',', _) => TokenKind::Comma,
169 ('(', _) => TokenKind::LParen,
170 (')', _) => TokenKind::RParen,
171 ('{', _) => TokenKind::LBrace,
172 ('}', _) => TokenKind::RBrace,
173 ('[', _) => TokenKind::LBracket,
174 (']', _) => TokenKind::RBracket,
175 ('=', _) => TokenKind::Equal,
176 (c @ ('0'..='9'), _) | (c @ '.', Some('0'..='9')) => {
177 let mut has_dot = c == '.';
178 let mut has_e = false;
179
180 let mut num = String::new();
181 num.push(c);
182 while let Some(c) = self.peek() {
183 match c {
184 '.' => {
185 if has_dot || has_e {
186 return Err(TokenizeError::UnexpectedChar {
187 got: '.',
188 pos: self.idx,
189 });
190 }
191 has_dot = true;
192 }
193 'e' => {
194 if has_e {
195 return Err(TokenizeError::UnexpectedChar {
196 got: 'e',
197 pos: self.idx,
198 });
199 }
200 has_e = true;
201 }
202 '0'..='9' => (),
203 _ => break,
204 }
205 let c = self.next().unwrap();
206 num.push(c);
207
208 if c == 'e' && self.peek() == Some(&'-') {
209 num.push(self.next().unwrap());
210 }
211 }
212
213 if num == "." {
214 return Err(TokenizeError::UnexpectedChar {
215 got: '.',
216 pos: self.idx - 1,
217 });
218 }
219
220 let Ok(n) = num.parse() else {
221 return Err(TokenizeError::MalformedNumber {
222 number_str: num,
223 pos: token_start_pos,
224 });
225 };
226 TokenKind::Number(n)
227 }
228 ('.', _) => TokenKind::Dot,
229 (':', _) => TokenKind::Colon,
230 ('"', _) => {
231 let mut str = String::new();
232 let mut terminated = false;
233 while let Some(c) = self.next() {
234 match c {
235 '"' => {
236 terminated = true;
237 break;
238 }
239 '\n' => break,
240 '\\' => match self.next() {
241 Some('"') => str.push('"'),
242 Some('n') => str.push('\n'),
243 Some('r') => str.push('\r'),
244 Some('t') => str.push('\t'),
245 Some('\\') => str.push('\\'),
246 Some('0') => str.push('\0'),
247 Some(c) => {
248 return Err(TokenizeError::UnexpectedChar {
249 got: c,
250 pos: self.idx - 1,
251 })
252 }
253 None => break,
254 },
255 _ => str.push(c),
256 }
257 }
258
259 if !terminated {
260 return Err(TokenizeError::UnterminatedString {
261 pos: token_start_pos,
262 });
263 }
264
265 TokenKind::String(str)
266 }
267 (c @ ('a'..='z' | 'A'..='Z' | '_'), _) => {
268 let mut ident = String::new();
269 ident.push(c);
270 while let Some(c) = self.peek() {
271 match c {
272 'a'..='z' | 'A'..='Z' | '_' | '0'..='9' => {
273 let c = self.next().unwrap();
274 ident.push(c);
275 }
276 _ => break,
277 }
278 }
279
280 match ident.as_str() {
281 "fn" => TokenKind::Keyword(Keyword::Fn),
282 "if" => TokenKind::Keyword(Keyword::If),
283 "else" => TokenKind::Keyword(Keyword::Else),
284 "while" => TokenKind::Keyword(Keyword::While),
285 "for" => TokenKind::Keyword(Keyword::For),
286 "in" => TokenKind::Keyword(Keyword::In),
287 "continue" => TokenKind::Keyword(Keyword::Continue),
288 "break" => TokenKind::Keyword(Keyword::Break),
289 "return" => TokenKind::Keyword(Keyword::Return),
290 "true" => TokenKind::Keyword(Keyword::True),
291 "false" => TokenKind::Keyword(Keyword::False),
292 "null" => TokenKind::Keyword(Keyword::Null),
293 _ => TokenKind::Identifier(ident),
294 }
295 }
296 ('\n', _) => TokenKind::Newline,
298 (c, _) if c.is_ascii_whitespace() => continue,
299 (c, _) => {
300 return Err(TokenizeError::UnexpectedChar {
301 got: c,
302 pos: self.idx - 1,
303 })
304 }
305 };
306
307 let token = Token {
308 kind: token_kind,
309 pos: token_start_pos,
310 };
311 tokens.push(token);
312 }
313
314 let eof_token = Token {
315 kind: TokenKind::Eof,
316 pos: self.idx - 1,
317 };
318 tokens.push(eof_token);
319
320 Ok(tokens)
321 }
322}