1use std::{mem, str::Chars};
2
3use crate::{
4 error::SyntaxError,
5 span::Span,
6 token::{Token, TokenKind},
7};
8
9macro_rules! simple_token {
10 ($self:ident, $token:expr) => {{
11 let start = $self.index;
12 $self.next();
13 Ok(Token::new($token, Span::new(start, $self.index)))
14 }};
15}
16
17pub struct Lexer<'src> {
18 text: &'src str,
19 src: Chars<'src>,
20 curr_char: Option<char>,
21 next_char: Option<char>,
22 pub(crate) index: usize,
23}
24
25impl<'src> Lexer<'src> {
26 pub fn new(text: &'src str) -> Self {
27 let mut lexer = Lexer {
28 text,
29 src: text.chars(),
30 curr_char: None,
31 next_char: None,
32 index: 0,
33 };
34 lexer.next();
35 lexer.next();
36 lexer
37 }
38
39 fn next(&mut self) {
40 if let Some(curr_char) = self.curr_char {
41 self.index += curr_char.len_utf8();
42 }
43 mem::swap(&mut self.curr_char, &mut self.next_char);
44 self.next_char = self.src.next();
45 }
46
47 pub fn next_token(&mut self) -> Result<Option<Token<'src>>, SyntaxError> {
48 while let Some(' ' | '\n' | '\t' | '\r') = self.curr_char {
49 self.next();
50 }
51 if let Some(curr_char) = self.curr_char {
52 let token_result = match curr_char {
53 '{' => simple_token!(self, TokenKind::LBrace),
54 '}' => simple_token!(self, TokenKind::RBrace),
55 '[' => simple_token!(self, TokenKind::LBracket),
56 ']' => simple_token!(self, TokenKind::RBracket),
57 '(' if self.next_char != Some('*') => simple_token!(self, TokenKind::LParen),
58 ')' => simple_token!(self, TokenKind::RParen),
59 '|' => simple_token!(self, TokenKind::Pipe),
60 ',' => simple_token!(self, TokenKind::Comma),
61 ';' => simple_token!(self, TokenKind::Semicolon),
62 '=' => simple_token!(self, TokenKind::Equal),
63 '*' => simple_token!(self, TokenKind::Star),
64 '-' => simple_token!(self, TokenKind::Dash),
65 '(' => self.parse_comment(),
66 '\'' | '"' => self.parse_terminal(),
67 '?' => self.parse_special_seq(),
68 c if c.is_ascii_alphabetic() => self.parse_identifier(),
69 c if c.is_ascii_digit() => self.parse_integer(),
70 c => {
71 let span_start = self.index;
72 self.next();
73 Err(SyntaxError::new(
74 Span::new(span_start, self.index),
75 format!("Illegal character '{}'", c).into(),
76 ))
77 }
78 };
79 match token_result {
80 Ok(token) => Ok(Some(token)),
81 Err(err) => Err(err),
82 }
83 } else {
84 Ok(None)
85 }
86 }
87
88 fn delimeted_str(&mut self, delimeter: Option<char>) -> &'src str {
89 self.next(); let content_start = self.index;
91 while self.curr_char.is_some() && self.curr_char != delimeter {
92 self.next();
93 }
94 let content_end = self.index;
95 self.next(); &self.text[content_start..content_end]
97 }
98
99 fn parse_comment(&mut self) -> Result<Token<'src>, SyntaxError> {
100 debug_assert!(
101 self.curr_char == Some('(') && self.next_char == Some('*'),
102 "Expected '(' and '*', was {:?} and {:?}",
103 self.curr_char,
104 self.next_char,
105 );
106
107 let span_start = self.index;
108
109 self.next();
110 self.next();
111 let content_start = self.index;
112 while self.curr_char.is_some()
113 && !(self.curr_char == Some('*') && self.next_char == Some(')'))
114 {
115 self.next();
116 }
117 let content_end = self.index;
118 self.next();
119 self.next();
120
121 let content = &self.text[content_start..content_end];
122
123 Ok(Token::new(
124 TokenKind::Comment(content),
125 Span::new(span_start, self.index),
126 ))
127 }
128
129 fn parse_terminal(&mut self) -> Result<Token<'src>, SyntaxError> {
130 debug_assert!(
131 self.curr_char == Some('\'') || self.curr_char == Some('"'),
132 "Expected quote, was {:?}",
133 self.curr_char,
134 );
135
136 let quote = self.curr_char;
137 let span_start = self.index;
138 let content = self.delimeted_str(quote).trim();
139
140 Ok(Token::new(
141 TokenKind::Terminal(content),
142 Span::new(span_start, self.index),
143 ))
144 }
145
146 fn parse_special_seq(&mut self) -> Result<Token<'src>, SyntaxError> {
147 debug_assert!(
148 self.curr_char == Some('?'),
149 "Expected '?', was {:?}",
150 self.curr_char,
151 );
152
153 let span_start = self.index;
154 let content = self.delimeted_str(Some('?')).trim();
155
156 Ok(Token::new(
157 TokenKind::SpecialSeq(content),
158 Span::new(span_start, self.index),
159 ))
160 }
161
162 fn parse_identifier(&mut self) -> Result<Token<'src>, SyntaxError> {
163 debug_assert!(
164 self.curr_char.map_or(false, |c| c.is_ascii_alphabetic()),
165 "Expected letter, was {:?}",
166 self.curr_char,
167 );
168
169 let span_start = self.index;
170 let content_start = self.index;
171 self.next(); while self
173 .curr_char
174 .map_or(false, |c| c.is_ascii_alphanumeric() || c == '_')
175 {
176 self.next();
177 }
178 let content_end = self.index;
179 let content = &self.text[content_start..content_end];
180
181 Ok(Token::new(
182 TokenKind::Identifier(content),
183 Span::new(span_start, self.index),
184 ))
185 }
186
187 fn parse_integer(&mut self) -> Result<Token<'src>, SyntaxError> {
188 debug_assert!(
189 self.curr_char.map_or(false, |c| c.is_ascii_digit()),
190 "Expected digit, was {:?}",
191 self.curr_char,
192 );
193
194 let span_start = self.index;
195 let content_start = self.index;
196 self.next(); while self.curr_char.map_or(false, |c| c.is_ascii_digit()) {
198 self.next();
199 }
200 let content_end = self.index;
201 let slice = &self.text[content_start..content_end];
202 let num = match slice.parse() {
203 Ok(num) => num,
204 Err(_) => {
205 return Err(SyntaxError::new(
206 Span::new(span_start, self.index),
207 "Number does not fit into `usize` type".into(),
208 ))
209 }
210 };
211
212 Ok(Token::new(
213 TokenKind::Integer(num),
214 Span::new(span_start, self.index),
215 ))
216 }
217}