tiny_json_rs/lexer/
mod.rs

1use alloc::format;
2use alloc::string::{String, ToString};
3use alloc::vec::Vec;
4
5const SIMPLE_ALPHA: &str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_";
6const DIGITS: &str = "0123456789";
7//  Json tokens
8pub enum Chars {
9    LBrace,
10    RBrace,
11    LBracket,
12    RBracket,
13    Colon,
14    Comma,
15    Quote,
16    NewLine,
17    MinusSign,
18    Dot,
19    Space,
20    Char(u8),
21}
22
23impl Chars {
24    pub fn from(ch: u8) -> Chars {
25        match ch {
26            b'{' => Chars::LBrace,
27            b'}' => Chars::RBrace,
28            b'[' => Chars::LBracket,
29            b']' => Chars::RBracket,
30            b':' => Chars::Colon,
31            b',' => Chars::Comma,
32            b'"' => Chars::Quote,
33            b'\n' => Chars::NewLine,
34            b'-' => Chars::MinusSign,
35            b'.' => Chars::Dot,
36            b' ' => Chars::Space,
37            _ => Chars::Char(ch),
38        }
39    }
40
41    pub fn to_string(&self) -> String {
42        match self {
43            Chars::LBrace => "{".to_string(),
44            Chars::RBrace => "}".to_string(),
45            Chars::LBracket => "[".to_string(),
46            Chars::RBracket => "]".to_string(),
47            Chars::Colon => ":".to_string(),
48            Chars::Comma => ",".to_string(),
49            Chars::Quote => "\"".to_string(),
50            Chars::NewLine => "\n".to_string(),
51            Chars::MinusSign => "-".to_string(),
52            Chars::Dot => ".".to_string(),
53            Chars::Space => " ".to_string(),
54            Chars::Char(ch) => format!("{}", *ch as char),
55        }
56    }
57
58    pub fn to_char(&self) -> char {
59        match self {
60            Chars::LBrace => '{',
61            Chars::RBrace => '}',
62            Chars::LBracket => '[',
63            Chars::RBracket => ']',
64            Chars::Colon => ':',
65            Chars::Comma => ',',
66            Chars::Quote => '"',
67            Chars::NewLine => '\n',
68            Chars::MinusSign => '-',
69            Chars::Dot => '.',
70            Chars::Space => ' ',
71            Chars::Char(ch) => *ch as char,
72        }
73    }
74}
75
76#[derive(Clone, Debug, PartialEq)]
77pub enum StringType {
78    SimpleString,
79    ComplexString,
80}
81
82#[derive(Clone, Debug, PartialEq)]
83pub enum TokenType {
84    None,
85    Int,
86    String(StringType),
87    ReservedString,
88    Float,
89    LBrace,
90    RBrace,
91    LBracket,
92    RBracket,
93    Colon,
94    Comma,
95}
96
97#[derive(Clone, Debug, PartialEq)]
98pub struct Token {
99    pub token_type: TokenType,
100    pub literal: String,
101}
102
103impl Token {
104    pub fn default() -> Token {
105        return Token {
106            token_type: TokenType::None,
107            literal: "".to_string(),
108        };
109    }
110}
111
112#[derive(Clone, Debug)]
113pub enum LexerError {
114    InvalidTokenError(String),
115    OutOfRangeError,
116}
117
118#[derive(Clone, Debug)]
119pub struct Lexer {
120    pub input: String,
121    pub position: usize,
122    pub token_list: Vec<Token>,
123    pub current_token: Token,
124    pub line: usize,
125}
126
127impl Lexer {
128    pub fn new(input: String) -> Lexer {
129        return Lexer {
130            input,
131            position: 0,
132            token_list: Vec::new(),
133            line: 0,
134            current_token: Token {
135                token_type: TokenType::None,
136                literal: "".to_string(),
137            },
138        };
139    }
140
141    fn read_char(&mut self) -> Result<Chars, LexerError> {
142        if self.position >= self.input.len() {
143            return Err(LexerError::OutOfRangeError);
144        }
145
146        let ch = self.input.as_bytes()[self.position];
147        self.position += 1;
148        return Ok(Chars::from(ch));
149    }
150
151    fn process_start_token(&mut self) -> Result<(), LexerError> {
152        let ch = self.read_char()?;
153        match ch {
154            Chars::LBrace => {
155                self.current_token.token_type = TokenType::LBrace;
156                self.current_token.literal = ch.to_string();
157                self.token_list.push(self.current_token.clone());
158            }
159            Chars::RBrace => {
160                self.current_token.token_type = TokenType::RBrace;
161                self.current_token.literal = ch.to_string();
162                self.token_list.push(self.current_token.clone());
163            }
164            Chars::LBracket => {
165                self.current_token.token_type = TokenType::LBracket;
166                self.current_token.literal = ch.to_string();
167                self.token_list.push(self.current_token.clone());
168            }
169            Chars::RBracket => {
170                self.current_token.token_type = TokenType::RBracket;
171                self.current_token.literal = ch.to_string();
172                self.token_list.push(self.current_token.clone());
173            }
174            Chars::Colon => {
175                self.current_token.token_type = TokenType::Colon;
176                self.current_token.literal = ch.to_string();
177                self.token_list.push(self.current_token.clone());
178            }
179            Chars::Comma => {
180                self.current_token.token_type = TokenType::Comma;
181                self.current_token.literal = ch.to_string();
182                self.token_list.push(self.current_token.clone());
183            }
184            Chars::Quote => {
185                self.current_token.token_type = TokenType::String(StringType::SimpleString);
186                self.current_token.literal = "".to_string();
187            }
188            Chars::NewLine => {
189                self.line += 1;
190            }
191            Chars::MinusSign => {
192                self.current_token.token_type = TokenType::Int;
193                self.current_token.literal = ch.to_string();
194                self.token_list.push(self.current_token.clone());
195            }
196            Chars::Space => {}
197            Chars::Char(c) => {
198                if DIGITS.contains(c as char) {
199                    self.current_token.token_type = TokenType::Int;
200                    self.current_token.literal = ch.to_string();
201                } else if SIMPLE_ALPHA.contains(c as char) {
202                    self.current_token.token_type = TokenType::ReservedString;
203                    self.current_token.literal = ch.to_string();
204                } else {
205                    return Err(LexerError::InvalidTokenError(ch.to_string()));
206                }
207            }
208            _ => {
209                return Err(LexerError::InvalidTokenError(ch.to_string()));
210            }
211        }
212        Ok(())
213    }
214
215    fn process_int_token(&mut self) -> Result<(), LexerError> {
216        let ch = self.read_char()?;
217        match ch {
218            Chars::Char(_) => {
219                if DIGITS.contains(ch.to_char()) {
220                    self.current_token.token_type = TokenType::Int;
221                    self.current_token.literal.push(ch.to_char());
222                } else {
223                    return Err(LexerError::InvalidTokenError(ch.to_string()));
224                }
225            }
226            Chars::Dot => {
227                self.current_token.token_type = TokenType::Float;
228                self.current_token.literal.push('.');
229            }
230            _ => {
231                self.token_list.push(self.current_token.clone());
232                self.current_token = Token::default();
233                self.position -= 1;
234            }
235        }
236
237        Ok(())
238    }
239
240    fn process_string_token(&mut self) -> Result<(), LexerError> {
241        let ch = self.read_char()?;
242        match ch {
243            Chars::Char(c) => {
244                if SIMPLE_ALPHA.contains(c as char) || DIGITS.contains(c as char) {
245                    self.current_token.literal.push(ch.to_char());
246                } else {
247                    self.current_token.token_type = TokenType::String(StringType::ComplexString);
248                    self.current_token.literal.push(ch.to_char());
249                }
250            }
251            Chars::Quote => {
252                self.token_list.push(self.current_token.clone());
253                self.current_token = Token::default();
254            }
255            _ => {
256                self.current_token.token_type = TokenType::String(StringType::ComplexString);
257                self.current_token.literal.push(ch.to_char());
258            }
259        }
260
261        Ok(())
262    }
263
264    fn process_reseved_string(&mut self) -> Result<(), LexerError> {
265        let ch = self.read_char()?;
266        match ch {
267            Chars::Char(c) => {
268                if SIMPLE_ALPHA.contains(c as char) || DIGITS.contains(c as char) {
269                    self.current_token.literal.push(ch.to_char());
270                } else {
271                    return Err(LexerError::InvalidTokenError(ch.to_string()));
272                }
273            }
274            _ => {
275                self.token_list.push(self.current_token.clone());
276                self.current_token = Token::default();
277                self.position -= 1;
278            }
279        }
280
281        Ok(())
282    }
283
284    fn process_float_token(&mut self) -> Result<(), LexerError> {
285        let ch = self.read_char()?;
286        match ch {
287            Chars::Char(_) => {
288                if DIGITS.contains(ch.to_char()) {
289                    self.current_token.token_type = TokenType::Float;
290                    self.current_token.literal.push(ch.to_char());
291                } else {
292                    return Err(LexerError::InvalidTokenError(ch.to_string()));
293                }
294            }
295            _ => {
296                self.token_list.push(self.current_token.clone());
297                self.current_token = Token::default();
298                self.position -= 1;
299            }
300        }
301
302        Ok(())
303    }
304
305    fn process_next_token(&mut self) -> Result<(), LexerError> {
306        match self.current_token.token_type {
307            TokenType::None => {
308                self.process_start_token()?;
309            }
310            TokenType::Int => {
311                self.process_int_token()?;
312            }
313            TokenType::String(_) => {
314                self.process_string_token()?;
315            }
316            TokenType::ReservedString => {
317                self.process_reseved_string()?;
318            }
319            TokenType::Float => {
320                self.process_float_token()?;
321            }
322            _ => {
323                self.current_token = Token::default();
324            }
325        }
326
327        Ok(())
328    }
329
330    pub fn tokenize(&mut self) -> Result<Vec<Token>, LexerError> {
331        self.token_list = Vec::new();
332        self.current_token = Token::default();
333        while self.position < self.input.len() {
334            self.process_next_token()?
335        }
336
337        Ok(self.token_list.clone())
338    }
339}