rust_ts_json_compiler/lexer/
lexer.rs

1use std::iter::Peekable;
2use std::str::Chars;
3
4use super::Token;
5
6pub struct Lexer<'a> {
7    input: Peekable<Chars<'a>>,
8
9    ch: u8,
10}
11
12#[allow(dead_code)]
13impl<'a> Lexer<'a> {
14    pub fn new(input: &'a str) -> Self {
15        let mut lexer = Self {
16            input: input.chars().peekable(),
17            ch: 0,
18        };
19        lexer.next_char();
20        lexer
21    }
22
23    fn next_char(&mut self) {
24        self.ch = match self.input.peek() {
25            Some(ch) => *ch as u8,
26            None => 0,
27        };
28
29        self.input.next();
30    }
31
32    pub fn peek(&mut self) -> u8 {
33        match self.input.peek() {
34            Some(ch) => *ch as u8,
35            None => 0,
36        }
37    }
38
39    pub fn next_token(&mut self) -> Token {
40        self.skip_whitespace();
41
42        match self.ch {
43            b'a'..=b'z' | b'A'..=b'Z' | b'_' => self.consume_ident(),
44            b'.' => {
45                self.next_char();
46                Token::Dot
47            }
48            b':' => {
49                self.next_char();
50                Token::Colon
51            }
52            b'0'..=b'9' => self.consume_int(),
53            b',' => {
54                self.next_char();
55                Token::Comma
56            }
57            b'(' => {
58                self.next_char();
59                Token::LRound
60            }
61            b')' => {
62                self.next_char();
63                Token::RRound
64            }
65            b'{' => {
66                self.next_char();
67                Token::LCurly
68            }
69            b'}' => {
70                self.next_char();
71                Token::RCurly
72            }
73            b'[' => {
74                self.next_char();
75                Token::LSquare
76            }
77            b']' => {
78                self.next_char();
79                Token::RSquare
80            }
81            b'"' | b'\'' => self.consume_string(),
82            0 => Token::Eof,
83            _ => {
84                self.next_char();
85                Token::Illegal
86            }
87        }
88    }
89
90    fn consume_string(&mut self) -> Token {
91        let mut value = String::new();
92        let quote_type = self.ch;
93        let mut next_skip = false;
94
95        self.next_char();
96
97        while self.ch != quote_type || next_skip {
98            next_skip = self.ch == b'\\';
99            value.push(self.ch as char);
100            self.next_char();
101        }
102
103        self.next_char();
104
105        Token::Str(value)
106    }
107
108    fn consume_ident(&mut self) -> Token {
109        let mut value = String::new();
110
111        while self.ch.is_ascii_alphanumeric() || self.ch == b'_' {
112            value.push(self.ch as char);
113            self.next_char();
114        }
115
116        Token::Ident(value)
117    }
118
119    fn skip_whitespace(&mut self) {
120        while let b' ' | b'\t' | b'\n' | b'\r' = self.ch {
121            self.next_char();
122        }
123    }
124
125    fn consume_int(&mut self) -> Token {
126        let mut value = String::new();
127
128        while self.ch.is_ascii_digit() || self.ch == b'.' {
129            value.push(self.ch as char);
130            self.next_char();
131        }
132
133        Token::Int(value)
134    }
135}