1use crate::error::{Result, SchemaError};
4use crate::span::Span;
5use crate::token::{Token, TokenKind};
6
7pub struct Lexer<'a> {
9 source: &'a str,
11 pos: usize,
13 chars: std::str::Chars<'a>,
15 peeked: Option<char>,
17}
18
19impl<'a> Lexer<'a> {
20 pub fn new(source: &'a str) -> Self {
22 Self {
23 source,
24 pos: 0,
25 chars: source.chars(),
26 peeked: None,
27 }
28 }
29
30 pub fn next_token(&mut self) -> Result<Token> {
32 self.skip_whitespace();
33
34 while self.peek() == Some('/')
35 && (self.peek_n(1) == Some('/') || self.peek_n(1) == Some('*'))
36 {
37 self.skip_comment()?;
38 self.skip_whitespace();
39 }
40
41 let start = self.pos;
42
43 if self.is_at_end() {
44 return Ok(Token::new(TokenKind::Eof, Span::new(start, start)));
45 }
46
47 let ch = self.peek().unwrap();
48
49 if ch == '\n' {
50 self.advance();
51 return Ok(Token::new(TokenKind::Newline, Span::new(start, self.pos)));
52 }
53
54 if ch == '"' {
55 return self.lex_string(start);
56 }
57
58 if ch.is_ascii_digit() {
59 return self.lex_number(start);
60 }
61
62 if ch.is_alphabetic() || ch == '_' {
63 return self.lex_identifier_or_keyword(start);
64 }
65
66 if ch == '@' {
67 self.advance();
68 if self.peek() == Some('@') {
69 self.advance();
70 return Ok(Token::new(TokenKind::AtAt, Span::new(start, self.pos)));
71 }
72 return Ok(Token::new(TokenKind::At, Span::new(start, self.pos)));
73 }
74
75 let kind = match ch {
76 '{' => {
77 self.advance();
78 TokenKind::LBrace
79 }
80 '}' => {
81 self.advance();
82 TokenKind::RBrace
83 }
84 '[' => {
85 self.advance();
86 TokenKind::LBracket
87 }
88 ']' => {
89 self.advance();
90 TokenKind::RBracket
91 }
92 '(' => {
93 self.advance();
94 TokenKind::LParen
95 }
96 ')' => {
97 self.advance();
98 TokenKind::RParen
99 }
100 ',' => {
101 self.advance();
102 TokenKind::Comma
103 }
104 ':' => {
105 self.advance();
106 TokenKind::Colon
107 }
108 '=' => {
109 self.advance();
110 TokenKind::Equal
111 }
112 '?' => {
113 self.advance();
114 TokenKind::Question
115 }
116 '!' => {
117 self.advance();
118 if self.peek() == Some('=') {
119 self.advance();
120 TokenKind::BangEqual
121 } else {
122 TokenKind::Bang
123 }
124 }
125 '.' => {
126 self.advance();
127 TokenKind::Dot
128 }
129 '*' => {
130 self.advance();
131 TokenKind::Star
132 }
133 '+' => {
134 self.advance();
135 TokenKind::Plus
136 }
137 '-' => {
138 self.advance();
139 TokenKind::Minus
140 }
141 '<' => {
142 self.advance();
143 if self.peek() == Some('=') {
144 self.advance();
145 TokenKind::LessEqual
146 } else {
147 TokenKind::LAngle
148 }
149 }
150 '>' => {
151 self.advance();
152 if self.peek() == Some('=') {
153 self.advance();
154 TokenKind::GreaterEqual
155 } else {
156 TokenKind::RAngle
157 }
158 }
159 '%' => {
160 self.advance();
161 TokenKind::Percent
162 }
163 '|' => {
164 self.advance();
165 if self.peek() == Some('|') {
166 self.advance();
167 TokenKind::DoublePipe
168 } else {
169 TokenKind::Pipe
170 }
171 }
172 _ => {
173 self.advance();
174 return Err(SchemaError::UnexpectedCharacter(ch, Span::single(start)));
175 }
176 };
177
178 Ok(Token::new(kind, Span::new(start, self.pos)))
179 }
180
181 fn lex_identifier_or_keyword(&mut self, start: usize) -> Result<Token> {
183 while let Some(ch) = self.peek() {
184 if ch.is_alphanumeric() || ch == '_' {
185 self.advance();
186 } else {
187 break;
188 }
189 }
190
191 let text = &self.source[start..self.pos];
192 let kind = TokenKind::from_ident(text);
193 Ok(Token::new(kind, Span::new(start, self.pos)))
194 }
195
196 fn lex_string(&mut self, start: usize) -> Result<Token> {
198 self.advance();
199
200 let mut value = String::new();
201
202 loop {
203 match self.peek() {
204 None | Some('\n') => {
205 return Err(SchemaError::UnterminatedString(Span::new(start, self.pos)));
206 }
207 Some('"') => {
208 self.advance();
209 break;
210 }
211 Some('\\') => {
212 self.advance();
213 match self.peek() {
214 Some(ch) => {
215 let escaped = match ch {
216 'n' => '\n',
217 't' => '\t',
218 'r' => '\r',
219 '\\' => '\\',
220 '"' => '"',
221 _ => ch,
222 };
223 value.push(escaped);
224 self.advance();
225 }
226 None => {
227 return Err(SchemaError::UnterminatedString(Span::new(
228 start, self.pos,
229 )));
230 }
231 }
232 }
233 Some(ch) => {
234 value.push(ch);
235 self.advance();
236 }
237 }
238 }
239
240 Ok(Token::new(
241 TokenKind::String(value),
242 Span::new(start, self.pos),
243 ))
244 }
245
246 fn lex_number(&mut self, start: usize) -> Result<Token> {
248 while let Some(ch) = self.peek() {
249 if ch.is_ascii_digit() {
250 self.advance();
251 } else {
252 break;
253 }
254 }
255
256 if self.peek() == Some('.') && self.peek_n(1).is_some_and(|ch| ch.is_ascii_digit()) {
257 self.advance(); while let Some(ch) = self.peek() {
260 if ch.is_ascii_digit() {
261 self.advance();
262 } else {
263 break;
264 }
265 }
266 }
267
268 let text = &self.source[start..self.pos];
269
270 if text.parse::<f64>().is_err() {
271 return Err(SchemaError::InvalidNumber(
272 text.to_string(),
273 Span::new(start, self.pos),
274 ));
275 }
276
277 Ok(Token::new(
278 TokenKind::Number(text.to_string()),
279 Span::new(start, self.pos),
280 ))
281 }
282
283 fn skip_whitespace(&mut self) {
285 while let Some(ch) = self.peek() {
286 if ch == ' ' || ch == '\t' || ch == '\r' {
287 self.advance();
288 } else {
289 break;
290 }
291 }
292 }
293
294 fn skip_comment(&mut self) -> Result<()> {
296 if self.peek() != Some('/') {
297 return Ok(());
298 }
299
300 let start = self.pos;
301 self.advance(); match self.peek() {
304 Some('/') => {
305 self.advance(); while let Some(ch) = self.peek() {
307 if ch == '\n' {
308 break;
309 }
310 self.advance();
311 }
312 }
313 Some('*') => {
314 self.advance(); loop {
317 match self.peek() {
318 None => {
319 return Err(SchemaError::Lexer(
320 "Unterminated block comment".to_string(),
321 Span::new(start, self.pos),
322 ));
323 }
324 Some('*') => {
325 self.advance();
326 if self.peek() == Some('/') {
327 self.advance();
328 break;
329 }
330 }
331 Some(_) => {
332 self.advance();
333 }
334 }
335 }
336 }
337 _ => {}
338 }
339
340 Ok(())
341 }
342
343 fn peek(&mut self) -> Option<char> {
345 if self.peeked.is_none() {
346 self.peeked = self.chars.next();
347 }
348 self.peeked
349 }
350
351 fn peek_n(&self, n: usize) -> Option<char> {
353 self.source[self.pos..].chars().nth(n)
354 }
355
356 fn advance(&mut self) -> Option<char> {
358 let ch = self.peek()?;
359 self.pos += ch.len_utf8();
360 self.peeked = None;
361 Some(ch)
362 }
363
364 fn is_at_end(&mut self) -> bool {
366 self.peek().is_none()
367 }
368}
369
370#[cfg(test)]
371mod tests {
372 use super::*;
373
374 fn tokenize(source: &str) -> Result<Vec<TokenKind>> {
375 let mut lexer = Lexer::new(source);
376 let mut tokens = Vec::new();
377
378 loop {
379 let token = lexer.next_token()?;
380 if token.kind == TokenKind::Eof {
381 break;
382 }
383 tokens.push(token.kind);
384 }
385
386 Ok(tokens)
387 }
388
389 #[test]
390 fn test_keywords() {
391 let tokens = tokenize("datasource generator model enum").unwrap();
392 assert_eq!(
393 tokens,
394 vec![
395 TokenKind::Datasource,
396 TokenKind::Generator,
397 TokenKind::Model,
398 TokenKind::Enum
399 ]
400 );
401 }
402
403 #[test]
404 fn test_identifiers() {
405 let tokens = tokenize("User email_address _private").unwrap();
406 assert_eq!(
407 tokens,
408 vec![
409 TokenKind::Ident("User".to_string()),
410 TokenKind::Ident("email_address".to_string()),
411 TokenKind::Ident("_private".to_string()),
412 ]
413 );
414 }
415
416 #[test]
417 fn test_string_literals() {
418 let tokens = tokenize(r#""hello" "world""#).unwrap();
419 assert_eq!(
420 tokens,
421 vec![
422 TokenKind::String("hello".to_string()),
423 TokenKind::String("world".to_string()),
424 ]
425 );
426 }
427
428 #[test]
429 fn test_string_escapes() {
430 let tokens = tokenize(r#""hello \"world\"""#).unwrap();
431 assert_eq!(
432 tokens,
433 vec![TokenKind::String("hello \"world\"".to_string())]
434 );
435
436 let tokens = tokenize(r#""line1\nline2""#).unwrap();
437 assert_eq!(tokens, vec![TokenKind::String("line1\nline2".to_string())]);
438 }
439
440 #[test]
441 fn test_numbers() {
442 let tokens = tokenize("42 3.14 100").unwrap();
443 assert_eq!(
444 tokens,
445 vec![
446 TokenKind::Number("42".to_string()),
447 TokenKind::Number("3.14".to_string()),
448 TokenKind::Number("100".to_string()),
449 ]
450 );
451 }
452
453 #[test]
454 fn test_punctuation() {
455 let tokens = tokenize("{ } [ ] ( ) , : = ? .").unwrap();
456 assert_eq!(
457 tokens,
458 vec![
459 TokenKind::LBrace,
460 TokenKind::RBrace,
461 TokenKind::LBracket,
462 TokenKind::RBracket,
463 TokenKind::LParen,
464 TokenKind::RParen,
465 TokenKind::Comma,
466 TokenKind::Colon,
467 TokenKind::Equal,
468 TokenKind::Question,
469 TokenKind::Dot,
470 ]
471 );
472 }
473
474 #[test]
475 fn test_attributes() {
476 let tokens = tokenize("@ @@ @id @@map").unwrap();
477 assert_eq!(
478 tokens,
479 vec![
480 TokenKind::At,
481 TokenKind::AtAt,
482 TokenKind::At,
483 TokenKind::Ident("id".to_string()),
484 TokenKind::AtAt,
485 TokenKind::Ident("map".to_string()),
486 ]
487 );
488 }
489
490 #[test]
491 fn test_single_line_comment() {
492 let tokens = tokenize("model // this is a comment\nUser").unwrap();
493 assert_eq!(
494 tokens,
495 vec![
496 TokenKind::Model,
497 TokenKind::Newline,
498 TokenKind::Ident("User".to_string()),
499 ]
500 );
501 }
502
503 #[test]
504 fn test_block_comment() {
505 let tokens = tokenize("model /* comment */ User").unwrap();
506 assert_eq!(
507 tokens,
508 vec![TokenKind::Model, TokenKind::Ident("User".to_string()),]
509 );
510 }
511
512 #[test]
513 fn test_multiline_block_comment() {
514 let tokens = tokenize("model /* line 1\nline 2\nline 3 */ User").unwrap();
515 assert_eq!(
516 tokens,
517 vec![TokenKind::Model, TokenKind::Ident("User".to_string()),]
518 );
519 }
520
521 #[test]
522 fn test_unterminated_string() {
523 let result = tokenize(r#""hello"#);
524 assert!(result.is_err());
525 match result.unwrap_err() {
526 SchemaError::UnterminatedString(_) => {}
527 _ => panic!("Expected UnterminatedString error"),
528 }
529 }
530
531 #[test]
532 fn test_unexpected_character() {
533 let result = tokenize("model #");
534 assert!(result.is_err());
535 match result.unwrap_err() {
536 SchemaError::UnexpectedCharacter('#', _) => {}
537 _ => panic!("Expected UnexpectedCharacter error"),
538 }
539 }
540
541 #[test]
542 fn test_newlines() {
543 let tokens = tokenize("model\nUser\n").unwrap();
544 assert_eq!(
545 tokens,
546 vec![
547 TokenKind::Model,
548 TokenKind::Newline,
549 TokenKind::Ident("User".to_string()),
550 TokenKind::Newline,
551 ]
552 );
553 }
554
555 #[test]
556 fn test_schema_snippet() {
557 let source = r#"
558model User {
559 id Int @id
560 email String @unique
561}
562"#;
563 let tokens = tokenize(source).unwrap();
564 assert!(tokens.contains(&TokenKind::Model));
565 assert!(tokens.contains(&TokenKind::Ident("User".to_string())));
566 assert!(tokens.contains(&TokenKind::LBrace));
567 assert!(tokens.contains(&TokenKind::At));
568 }
569}