1use std::{ops::RangeBounds, usize, vec};
2
3use super::span::{self, Span};
4use crate::syntax_error::SyntaxError;
5
6#[derive(Debug, Clone)]
7pub struct Token {
8 pub kind: String,
9 pub raw: String,
10 pub span: span::Span,
11}
12
13impl Token {
14 fn new<A: ToString>(kind: A, cursor: &Cursor, span: span::Span) -> Self {
15 Self {
16 kind: kind.to_string(),
17 raw: cursor.get_by_span(&span),
18 span,
19 }
20 }
21}
22
23#[derive(Debug)]
24struct Cursor {
25 payload: String,
26 index: usize,
27}
28
29impl Cursor {
30 fn current_char(&self) -> Option<char> {
31 self.payload.chars().nth(self.index)
32 }
33
34 fn current_char_expected(&self, val: char) {
35 if !self.current_matches_char(val) {
36 todo!(
37 "Require error expected the char {} (ascii code {})",
38 val,
39 val as usize
40 );
41 }
42 }
43
44 fn current_matches_char(&self, val: char) -> bool {
45 if let Some(c) = self.current_char() {
46 c == val
47 } else {
48 false
49 }
50 }
51
52 pub fn current_matches_range_char<T>(&self, vec_ranges: &Vec<T>) -> bool
53 where
54 T: RangeBounds<char> + std::fmt::Debug,
55 {
56 let current_char = self.current_char();
57 if let Some(current_char) = current_char {
58 for e in vec_ranges {
59 if e.contains(¤t_char) {
60 return true;
61 }
62 }
63 }
64
65 return false;
66 }
67
68 fn current_range_char_expected<T>(&self, vec_ranges: &Vec<T>)
69 where
70 T: RangeBounds<char> + std::fmt::Debug,
71 {
72 if !self.current_matches_range_char(&vec_ranges) {
73 todo!("Require error expected the char {:?}", &vec_ranges);
74 }
75 }
76
77 fn forward(&mut self, positions: usize) {
78 self.index = self.index + positions;
79 }
80
81 fn get_by_span(&self, span: &span::Span) -> String {
82 unsafe { self.payload.get_unchecked(span.start..span.end).to_string() }
83 }
84
85 fn has_current<'a>(&'a self) -> bool {
86 self.payload.len() > self.index
87 }
88
89 fn has_next<'a>(&'a self) -> bool {
90 (self.payload.len() > (self.index + 1)).clone()
91 }
92
93 fn new<A>(index: usize, payload: A) -> Self
94 where
95 A: ToString,
96 {
97 Self {
98 payload: payload.to_string(),
99 index,
100 }
101 }
102
103 fn next_char(&self) -> Option<char> {
104 self.payload.chars().nth(self.index + 1)
105 }
106
107 fn next_matches_char(&self, val: char) -> bool {
108 if let Some(c) = self.next_char() {
109 c == val
110 } else {
111 false
112 }
113 }
114
115 pub fn next_matches_range_char<T>(&self, vec_ranges: &Vec<T>) -> bool
116 where
117 T: RangeBounds<char> + std::fmt::Debug,
118 {
119 let current_char = self.next_char();
120 if let Some(current_char) = current_char {
121 for e in vec_ranges {
122 if e.contains(¤t_char) {
123 return true;
124 }
125 }
126 }
127
128 return false;
129 }
130}
131
132pub struct Tokenizer {}
133
134impl Tokenizer {
135 pub fn parse<A>(payload: A) -> Result<Vec<Token>, SyntaxError>
136 where
137 A: ToString,
138 {
139 let ref mut cursor = Cursor::new(0, payload);
140 return Ok(Self::parse_by_cursor(cursor)?);
141 }
142
143 fn parse_by_cursor(cursor: &mut Cursor) -> Result<Vec<Token>, SyntaxError> {
144 let mut tokens: Vec<Token> = vec![];
145
146 while cursor.has_current() {
147 if cursor.current_matches_char('#') {
148 tokens.extend(Self::parse_comment(cursor)?);
149 continue;
150 }
151
152 if cursor.current_matches_char('\n') {
153 tokens.extend(Self::parse_newline(cursor)?);
154 continue;
155 }
156
157 if cursor.current_matches_range_char(&vec![' '..=' ', '\t'..='\t']) {
158 tokens.extend(Self::parse_spaces(cursor)?);
159 continue;
160 }
161
162 if cursor.current_matches_char(':') {
163 tokens.extend(Self::parse_colon(cursor)?);
164 continue;
165 }
166
167 if cursor.current_matches_char('=') {
168 tokens.extend(Self::parse_equal(cursor)?);
169 continue;
170 }
171
172 if cursor.current_matches_char('"') {
173 tokens.extend(Self::parse_string(cursor)?);
174 continue;
175 }
176
177 if cursor.current_matches_range_char(&vec!['a'..='z', 'A'..='Z', '_'..='_']) {
178 tokens.extend(Self::parse_keyword(cursor)?);
179 continue;
180 }
181
182 if cursor.current_matches_range_char(&vec!['0'..='9']) {
183 tokens.extend(Self::parse_number(cursor)?);
184 continue;
185 }
186
187 if cursor.current_matches_range_char(&vec!['?'..='?']) {
188 tokens.extend(Self::parse_question_mark(cursor)?);
189 continue;
190 }
191
192 if cursor.current_matches_range_char(&vec!['<'..='<']) {
193 tokens.extend(Self::parse_less_than(cursor)?);
194 continue;
195 }
196
197 if cursor.current_matches_range_char(&vec!['>'..='>']) {
198 tokens.extend(Self::parse_greater_than(cursor)?);
199 continue;
200 }
201
202 do yeet SyntaxError::new(
206 "Unexpected token",
207 Span {
208 start: cursor.index,
209 end: cursor.index + 1,
210 },
211 )
212 }
213
214 Ok(tokens)
215 }
216
217 fn parse_comment(cursor: &mut Cursor) -> Result<Vec<Token>, SyntaxError> {
218 cursor.current_char_expected('#');
219 let span_start = cursor.index;
220
221 while cursor.has_current() {
222 cursor.forward(1);
223 if cursor.current_matches_char('\n') {
224 break;
225 }
226 }
227
228 let span = span::Span {
229 start: span_start,
230 end: cursor.index,
231 };
232 let comment_token = Token::new("comment", &cursor, span);
233 Ok(vec![comment_token])
234 }
235
236 fn parse_newline(cursor: &mut Cursor) -> Result<Vec<Token>, SyntaxError> {
237 cursor.current_char_expected('\n');
238 let span_start = cursor.index;
239 cursor.forward(1);
240 let span = span::Span {
241 start: span_start,
242 end: cursor.index,
243 };
244 let newline_token = Token::new("newline", &cursor, span);
245 Ok(vec![newline_token])
246 }
247
248 fn parse_spaces(cursor: &mut Cursor) -> Result<Vec<Token>, SyntaxError> {
249 cursor.current_range_char_expected(&vec![' '..=' ', '\t'..='\t']);
250 let span_start = cursor.index;
251 while cursor.has_current() {
252 if cursor.current_matches_range_char(&vec![' '..=' ', '\t'..='\t']) {
253 cursor.forward(1);
254 continue;
255 }
256 break;
257 }
258 let span = span::Span {
259 start: span_start,
260 end: cursor.index,
261 };
262 let newline_token = Token::new("space", &cursor, span);
263 Ok(vec![newline_token])
264 }
265
266 fn parse_equal(cursor: &mut Cursor) -> Result<Vec<Token>, SyntaxError> {
267 cursor.current_char_expected('=');
268 let span_start = cursor.index;
269 cursor.forward(1);
270 let span = span::Span {
271 start: span_start,
272 end: cursor.index,
273 };
274 let newline_token = Token::new("equal", &cursor, span);
275 Ok(vec![newline_token])
276 }
277
278 fn parse_colon(cursor: &mut Cursor) -> Result<Vec<Token>, SyntaxError> {
279 cursor.current_char_expected(':');
280 let span_start = cursor.index;
281 cursor.forward(1);
282 let span = span::Span {
283 start: span_start,
284 end: cursor.index,
285 };
286 let newline_token = Token::new("colon", &cursor, span);
287 Ok(vec![newline_token])
288 }
289
290 fn parse_question_mark(cursor: &mut Cursor) -> Result<Vec<Token>, SyntaxError> {
291 cursor.current_char_expected('?');
292 let span_start = cursor.index;
293 cursor.forward(1);
294 let span = span::Span {
295 start: span_start,
296 end: cursor.index,
297 };
298 let newline_token = Token::new("question_mark", &cursor, span);
299 Ok(vec![newline_token])
300 }
301
302 fn parse_less_than(cursor: &mut Cursor) -> Result<Vec<Token>, SyntaxError> {
303 cursor.current_char_expected('<');
304 let span_start = cursor.index;
305 cursor.forward(1);
306 let span = span::Span {
307 start: span_start,
308 end: cursor.index,
309 };
310 let newline_token = Token::new("less_than", &cursor, span);
311 Ok(vec![newline_token])
312 }
313
314 fn parse_greater_than(cursor: &mut Cursor) -> Result<Vec<Token>, SyntaxError> {
315 cursor.current_char_expected('>');
316 let span_start = cursor.index;
317 cursor.forward(1);
318 let span = span::Span {
319 start: span_start,
320 end: cursor.index,
321 };
322 let newline_token = Token::new("greater_than", &cursor, span);
323 Ok(vec![newline_token])
324 }
325
326 fn parse_string(cursor: &mut Cursor) -> Result<Vec<Token>, SyntaxError> {
327 cursor.current_char_expected('"');
328 cursor.forward(1);
329 let span_start = cursor.index;
330
331 while cursor.has_current() {
332 if cursor.current_matches_char('\\') {
333 cursor.forward(2);
334 continue;
335 }
336 if cursor.current_matches_char('"') {
337 break;
338 }
339 cursor.forward(1);
340 }
341
342 cursor.current_char_expected('"');
343
344 let span = span::Span {
345 start: span_start,
346 end: cursor.index,
347 };
348 cursor.forward(1);
349 let newline_token = Token::new("string", &cursor, span);
350 Ok(vec![newline_token])
351 }
352
353 fn parse_keyword(cursor: &mut Cursor) -> Result<Vec<Token>, SyntaxError> {
354 cursor.current_range_char_expected(&vec!['a'..='z', 'A'..='Z', '0'..='9', '_'..='_']);
355 let span_start = cursor.index;
356
357 while cursor.has_current() {
358 if !cursor.current_matches_range_char(&vec!['a'..='z', 'A'..='Z', '0'..='9', '_'..='_'])
359 {
360 break;
361 }
362 cursor.forward(1);
363 }
364
365 let span = span::Span {
366 start: span_start,
367 end: cursor.index,
368 };
369 let newline_token = Token::new("keyword", &cursor, span);
370 Ok(vec![newline_token])
371 }
372
373 fn parse_number(cursor: &mut Cursor) -> Result<Vec<Token>, SyntaxError> {
374 cursor.current_range_char_expected(&vec!['0'..='9']);
375 let span_start = cursor.index;
376 let mut decimal = false;
377
378 while cursor.has_current() {
379 if cursor.current_matches_range_char(&vec!['0'..='9']) {
380 cursor.forward(1);
381 continue;
382 }
383 if cursor.current_matches_char('_') {
384 if !cursor.next_matches_range_char(&vec!['0'..='9']) {
385 do yeet SyntaxError::new(
386 "Only one underscore is allowed as numeric separator",
387 Span {
388 start: span_start,
389 end: cursor.index,
390 },
391 )
392 }
393 cursor.forward(1);
394 continue;
395 }
396 if cursor.current_matches_char('.') {
397 if decimal {
398 do yeet SyntaxError::new(
399 "Unexpected token",
400 Span {
401 start: span_start,
402 end: cursor.index,
403 },
404 )
405 }
406 if !cursor.next_matches_range_char(&vec!['0'..='9']) {
407 do yeet SyntaxError::new(
408 "Invalid or unexpected token",
409 Span {
410 start: span_start,
411 end: cursor.index,
412 },
413 )
414 }
415 decimal = true;
416 cursor.forward(1);
417 continue;
418 }
419 break;
420 }
421
422 let span = span::Span {
423 start: span_start,
424 end: cursor.index,
425 };
426 let newline_token = Token::new("number", &cursor, span);
427 Ok(vec![newline_token])
428 }
429}