1use std::{
2 error::Error,
3 fmt::Display,
4 iter::Peekable,
5 num::{ParseFloatError, ParseIntError},
6 str::Chars,
7};
8
9use crate::lang::tokens::Token;
10
11use super::position::{Located, Position};
12
13#[derive(Debug, Clone)]
14pub struct Lexer<'source> {
15 source: Peekable<Chars<'source>>,
16 ln: usize,
17 col: usize,
18}
19#[derive(Debug, Clone, PartialEq)]
20pub enum LexError {
21 ExpectedEscape,
22 ExpectedCharacter,
23 UnclosedCharacter,
24 UnclosedString,
25 BadCharacter(char),
26 ParseIntError(ParseIntError),
27 ParseFloatError(ParseFloatError),
28}
29impl<'source> Lexer<'source> {
30 pub fn advance(&mut self) {
31 if self.source.peek() == Some(&'\n') {
32 self.ln += 1;
33 self.col = 0;
34 } else {
35 self.col += 1;
36 }
37 }
38 pub fn pos(&self) -> Position {
39 Position::single(self.ln, self.col)
40 }
41 pub fn lex(&mut self) -> Result<Vec<Located<Token>>, Located<LexError>> {
42 let mut tokens = vec![];
43 for res in self.by_ref() {
44 tokens.push(res?);
45 }
46 Ok(tokens)
47 }
48}
49impl<'source> Iterator for Lexer<'source> {
50 type Item = Result<Located<Token>, Located<LexError>>;
51 fn next(&mut self) -> Option<Self::Item> {
52 while self
53 .source
54 .peek()
55 .map(|c| c.is_ascii_whitespace())
56 .unwrap_or_default()
57 {
58 self.advance();
59 self.source.next();
60 }
61 while self.source.peek() == Some(&'#') {
62 self.advance();
63 self.source.next()?;
64 while self.source.peek().map(|c| *c != '\n').unwrap_or_default() {
65 self.advance();
66 self.source.next();
67 }
68 self.advance();
69 self.source.next();
70 while self
71 .source
72 .peek()
73 .map(|c| c.is_ascii_whitespace())
74 .unwrap_or_default()
75 {
76 self.advance();
77 self.source.next();
78 }
79 }
80 let mut pos = self.pos();
81 self.advance();
82 match self.source.next()? {
83 '=' => {
84 if self.source.peek() == Some(&'=') {
85 self.source.next();
86 pos.extend(&self.pos());
87 self.advance();
88 Some(Ok(Located::new(Token::EqualEqual, pos)))
89 } else if self.source.peek() == Some(&'>') {
90 self.source.next();
91 pos.extend(&self.pos());
92 self.advance();
93 Some(Ok(Located::new(Token::EqualArrow, pos)))
94 } else {
95 Some(Ok(Located::new(Token::Equal, pos)))
96 }
97 }
98 ',' => Some(Ok(Located::new(Token::Comma, pos))),
99 '.' => Some(Ok(Located::new(Token::Dot, pos))),
100 ':' => Some(Ok(Located::new(Token::Colon, pos))),
101 '!' => {
102 if self.source.peek() == Some(&'=') {
103 self.source.next();
104 pos.extend(&self.pos());
105 self.advance();
106 Some(Ok(Located::new(Token::ExclamationEqual, pos)))
107 } else {
108 Some(Ok(Located::new(Token::Exclamation, pos)))
109 }
110 }
111 '(' => Some(Ok(Located::new(Token::ParanLeft, pos))),
112 ')' => Some(Ok(Located::new(Token::ParanRight, pos))),
113 '[' => Some(Ok(Located::new(Token::BracketLeft, pos))),
114 ']' => Some(Ok(Located::new(Token::BracketRight, pos))),
115 '{' => Some(Ok(Located::new(Token::BraceLeft, pos))),
116 '}' => Some(Ok(Located::new(Token::BraceRight, pos))),
117 '+' => {
118 if self.source.peek() == Some(&'=') {
119 self.source.next();
120 pos.extend(&self.pos());
121 self.advance();
122 Some(Ok(Located::new(Token::PlusEqual, pos)))
123 } else {
124 Some(Ok(Located::new(Token::Plus, pos)))
125 }
126 }
127 '-' => {
128 if self.source.peek() == Some(&'=') {
129 self.source.next();
130 pos.extend(&self.pos());
131 self.advance();
132 Some(Ok(Located::new(Token::MinusEqual, pos)))
133 } else {
134 Some(Ok(Located::new(Token::Minus, pos)))
135 }
136 }
137 '*' => {
138 if self.source.peek() == Some(&'=') {
139 self.source.next();
140 pos.extend(&self.pos());
141 self.advance();
142 Some(Ok(Located::new(Token::StarEqual, pos)))
143 } else {
144 Some(Ok(Located::new(Token::Star, pos)))
145 }
146 }
147 '/' => {
148 if self.source.peek() == Some(&'=') {
149 self.source.next();
150 pos.extend(&self.pos());
151 self.advance();
152 Some(Ok(Located::new(Token::SlashEqual, pos)))
153 } else {
154 Some(Ok(Located::new(Token::Slash, pos)))
155 }
156 }
157 '%' => {
158 if self.source.peek() == Some(&'=') {
159 self.source.next();
160 pos.extend(&self.pos());
161 self.advance();
162 Some(Ok(Located::new(Token::PercentEqual, pos)))
163 } else {
164 Some(Ok(Located::new(Token::Percent, pos)))
165 }
166 }
167 '^' => {
168 if self.source.peek() == Some(&'=') {
169 self.source.next();
170 pos.extend(&self.pos());
171 self.advance();
172 Some(Ok(Located::new(Token::ExponentEqual, pos)))
173 } else {
174 Some(Ok(Located::new(Token::Exponent, pos)))
175 }
176 }
177 '<' => {
178 if self.source.peek() == Some(&'=') {
179 self.source.next();
180 pos.extend(&self.pos());
181 self.advance();
182 Some(Ok(Located::new(Token::LessEqual, pos)))
183 } else {
184 Some(Ok(Located::new(Token::Less, pos)))
185 }
186 }
187 '>' => {
188 if self.source.peek() == Some(&'=') {
189 self.source.next();
190 pos.extend(&self.pos());
191 self.advance();
192 Some(Ok(Located::new(Token::GreaterEqual, pos)))
193 } else {
194 Some(Ok(Located::new(Token::Greater, pos)))
195 }
196 }
197 '&' => Some(Ok(Located::new(Token::Ampersand, pos))),
198 '|' => Some(Ok(Located::new(Token::Pipe, pos))),
199 '\'' => {
200 let c = match self
201 .source
202 .next()
203 .ok_or(LexError::ExpectedCharacter)
204 .map_err(|err| Located::new(err, self.pos()))
205 {
206 Ok(c) => match c {
207 '\\' => {
208 self.advance();
209 let c = match self.source.peek() {
210 Some('n') => '\n',
211 Some('t') => '\t',
212 Some('r') => '\r',
213 Some('0') => '\0',
214 Some(c) => *c,
215 None => {
216 return Some(Err(Located::new(
217 LexError::ExpectedEscape,
218 self.pos(),
219 )))
220 }
221 };
222 self.source.next();
223 c
224 }
225 c => c,
226 },
227 Err(err) => return Some(Err(err)),
228 };
229 self.advance();
230 if self.source.next_if(|c| *c == '\'').is_none() {
231 return Some(Err(Located::new(LexError::UnclosedCharacter, pos)));
232 }
233 Some(Ok(Located::new(Token::Char(c), pos)))
234 }
235 '"' => {
236 let mut string = String::new();
237 while let Some(c) = self.source.peek() {
238 if *c == '"' {
239 break;
240 }
241 string.push(match *c {
242 '\\' => {
243 self.source.next()?;
244 self.advance();
245 match self.source.peek() {
246 Some('n') => '\n',
247 Some('t') => '\t',
248 Some('r') => '\r',
249 Some('0') => '\0',
250 Some(c) => *c,
251 None => {
252 return Some(Err(Located::new(
253 LexError::ExpectedEscape,
254 self.pos(),
255 )))
256 }
257 }
258 }
259 c => c,
260 });
261 pos.extend(&self.pos());
262 self.advance();
263 self.source.next();
264 }
265 if self.source.next_if(|c| *c == '"').is_none() {
266 return Some(Err(Located::new(LexError::UnclosedString, pos)));
267 }
268 Some(Ok(Located::new(Token::String(string), pos)))
269 }
270 c if c.is_ascii_digit() => {
271 let mut number = String::from(c);
272 while let Some(c) = self.source.peek() {
273 if c == &'_' {
274 pos.extend(&self.pos());
275 self.advance();
276 self.source.next();
277 continue;
278 }
279 if !c.is_ascii_digit() {
280 break;
281 }
282 number.push(*c);
283 pos.extend(&self.pos());
284 self.advance();
285 self.source.next();
286 }
287 if self.source.next_if(|c| *c == '.').is_some() {
288 number.push('.');
289 pos.extend(&self.pos());
290 self.advance();
291 while let Some(c) = self.source.peek() {
292 if c == &'_' {
293 pos.extend(&self.pos());
294 self.advance();
295 self.source.next();
296 continue;
297 }
298 if !c.is_ascii_digit() {
299 break;
300 }
301 number.push(*c);
302 pos.extend(&self.pos());
303 self.advance();
304 self.source.next();
305 }
306 Some(Ok(Located::new(
307 Token::Float(
308 match number
309 .parse()
310 .map_err(LexError::ParseFloatError)
311 .map_err(|err| Located::new(err, pos.clone()))
312 {
313 Ok(number) => number,
314 Err(err) => return Some(Err(err)),
315 },
316 ),
317 pos,
318 )))
319 } else if self.source.next_if(|c| *c == 'b').is_some() && number.as_str() == "0" {
320 number.clear();
321 pos.extend(&self.pos());
322 self.advance();
323 while let Some(c) = self.source.peek() {
324 if c == &'_' {
325 pos.extend(&self.pos());
326 self.advance();
327 self.source.next();
328 continue;
329 }
330 if !c.is_digit(2) {
331 break;
332 }
333 number.push(*c);
334 pos.extend(&self.pos());
335 self.advance();
336 self.source.next();
337 }
338 Some(Ok(Located::new(
339 Token::Int(
340 match i64::from_str_radix(&number, 2)
341 .map_err(LexError::ParseIntError)
342 .map_err(|err| Located::new(err, pos.clone()))
343 {
344 Ok(number) => number,
345 Err(err) => return Some(Err(err)),
346 },
347 ),
348 pos,
349 )))
350 } else if self.source.next_if(|c| *c == 'x').is_some() && number.as_str() == "0" {
351 number.clear();
352 pos.extend(&self.pos());
353 self.advance();
354 while let Some(c) = self.source.peek() {
355 if c == &'_' {
356 pos.extend(&self.pos());
357 self.advance();
358 self.source.next();
359 continue;
360 }
361 if !c.is_ascii_hexdigit() {
362 break;
363 }
364 number.push(*c);
365 pos.extend(&self.pos());
366 self.advance();
367 self.source.next();
368 }
369 Some(Ok(Located::new(
370 Token::Int(
371 match i64::from_str_radix(&number, 16)
372 .map_err(LexError::ParseIntError)
373 .map_err(|err| Located::new(err, pos.clone()))
374 {
375 Ok(number) => number,
376 Err(err) => return Some(Err(err)),
377 },
378 ),
379 pos,
380 )))
381 } else {
382 Some(Ok(Located::new(
383 Token::Int(
384 match number
385 .parse()
386 .map_err(LexError::ParseIntError)
387 .map_err(|err| Located::new(err, pos.clone()))
388 {
389 Ok(number) => number,
390 Err(err) => return Some(Err(err)),
391 },
392 ),
393 pos,
394 )))
395 }
396 }
397 c if c.is_ascii_alphanumeric() || c == '_' => {
398 let mut ident = String::from(c);
399 while let Some(c) = self.source.peek() {
400 if !c.is_ascii_alphanumeric() && *c != '_' {
401 break;
402 }
403 ident.push(*c);
404 pos.extend(&self.pos());
405 self.advance();
406 self.source.next();
407 }
408 Some(Ok(Located::new(Token::ident(ident), pos)))
409 }
410 c => Some(Err(Located::new(LexError::BadCharacter(c), pos))),
411 }
412 }
413}
414impl<'source> From<&'source str> for Lexer<'source> {
415 fn from(value: &'source str) -> Self {
416 Self {
417 source: value.chars().peekable(),
418 ln: 0,
419 col: 0,
420 }
421 }
422}
423impl Display for LexError {
424 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
425 match self {
426 LexError::ExpectedEscape => write!(f, "expected escape character"),
427 LexError::ExpectedCharacter => write!(f, "expected a character"),
428 LexError::UnclosedCharacter => write!(f, "unclosed character"),
429 LexError::UnclosedString => write!(f, "unclosed string"),
430 LexError::BadCharacter(c) => write!(f, "bad character {c:?}"),
431 LexError::ParseIntError(err) => write!(f, "error while parsing to int: {err}"),
432 LexError::ParseFloatError(err) => write!(f, "error while parsing to float: {err}"),
433 }
434 }
435}
436impl Error for LexError {}