1use alloc::format;
2use alloc::string::{String, ToString};
3use alloc::vec::Vec;
4
5const SIMPLE_ALPHA: &str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_";
6const DIGITS: &str = "0123456789";
7pub enum Chars {
9 LBrace,
10 RBrace,
11 LBracket,
12 RBracket,
13 Colon,
14 Comma,
15 Quote,
16 NewLine,
17 MinusSign,
18 Dot,
19 Space,
20 Char(u8),
21}
22
23impl Chars {
24 pub fn from(ch: u8) -> Chars {
25 match ch {
26 b'{' => Chars::LBrace,
27 b'}' => Chars::RBrace,
28 b'[' => Chars::LBracket,
29 b']' => Chars::RBracket,
30 b':' => Chars::Colon,
31 b',' => Chars::Comma,
32 b'"' => Chars::Quote,
33 b'\n' => Chars::NewLine,
34 b'-' => Chars::MinusSign,
35 b'.' => Chars::Dot,
36 b' ' => Chars::Space,
37 _ => Chars::Char(ch),
38 }
39 }
40
41 pub fn to_string(&self) -> String {
42 match self {
43 Chars::LBrace => "{".to_string(),
44 Chars::RBrace => "}".to_string(),
45 Chars::LBracket => "[".to_string(),
46 Chars::RBracket => "]".to_string(),
47 Chars::Colon => ":".to_string(),
48 Chars::Comma => ",".to_string(),
49 Chars::Quote => "\"".to_string(),
50 Chars::NewLine => "\n".to_string(),
51 Chars::MinusSign => "-".to_string(),
52 Chars::Dot => ".".to_string(),
53 Chars::Space => " ".to_string(),
54 Chars::Char(ch) => format!("{}", *ch as char),
55 }
56 }
57
58 pub fn to_char(&self) -> char {
59 match self {
60 Chars::LBrace => '{',
61 Chars::RBrace => '}',
62 Chars::LBracket => '[',
63 Chars::RBracket => ']',
64 Chars::Colon => ':',
65 Chars::Comma => ',',
66 Chars::Quote => '"',
67 Chars::NewLine => '\n',
68 Chars::MinusSign => '-',
69 Chars::Dot => '.',
70 Chars::Space => ' ',
71 Chars::Char(ch) => *ch as char,
72 }
73 }
74}
75
76#[derive(Clone, Debug, PartialEq)]
77pub enum StringType {
78 SimpleString,
79 ComplexString,
80}
81
82#[derive(Clone, Debug, PartialEq)]
83pub enum TokenType {
84 None,
85 Int,
86 String(StringType),
87 ReservedString,
88 Float,
89 LBrace,
90 RBrace,
91 LBracket,
92 RBracket,
93 Colon,
94 Comma,
95}
96
97#[derive(Clone, Debug, PartialEq)]
98pub struct Token {
99 pub token_type: TokenType,
100 pub literal: String,
101}
102
103impl Token {
104 pub fn default() -> Token {
105 return Token {
106 token_type: TokenType::None,
107 literal: "".to_string(),
108 };
109 }
110}
111
112#[derive(Clone, Debug)]
113pub enum LexerError {
114 InvalidTokenError(String),
115 OutOfRangeError,
116}
117
118#[derive(Clone, Debug)]
119pub struct Lexer {
120 pub input: String,
121 pub position: usize,
122 pub token_list: Vec<Token>,
123 pub current_token: Token,
124 pub line: usize,
125}
126
127impl Lexer {
128 pub fn new(input: String) -> Lexer {
129 return Lexer {
130 input,
131 position: 0,
132 token_list: Vec::new(),
133 line: 0,
134 current_token: Token {
135 token_type: TokenType::None,
136 literal: "".to_string(),
137 },
138 };
139 }
140
141 fn read_char(&mut self) -> Result<Chars, LexerError> {
142 if self.position >= self.input.len() {
143 return Err(LexerError::OutOfRangeError);
144 }
145
146 let ch = self.input.as_bytes()[self.position];
147 self.position += 1;
148 return Ok(Chars::from(ch));
149 }
150
151 fn process_start_token(&mut self) -> Result<(), LexerError> {
152 let ch = self.read_char()?;
153 match ch {
154 Chars::LBrace => {
155 self.current_token.token_type = TokenType::LBrace;
156 self.current_token.literal = ch.to_string();
157 self.token_list.push(self.current_token.clone());
158 }
159 Chars::RBrace => {
160 self.current_token.token_type = TokenType::RBrace;
161 self.current_token.literal = ch.to_string();
162 self.token_list.push(self.current_token.clone());
163 }
164 Chars::LBracket => {
165 self.current_token.token_type = TokenType::LBracket;
166 self.current_token.literal = ch.to_string();
167 self.token_list.push(self.current_token.clone());
168 }
169 Chars::RBracket => {
170 self.current_token.token_type = TokenType::RBracket;
171 self.current_token.literal = ch.to_string();
172 self.token_list.push(self.current_token.clone());
173 }
174 Chars::Colon => {
175 self.current_token.token_type = TokenType::Colon;
176 self.current_token.literal = ch.to_string();
177 self.token_list.push(self.current_token.clone());
178 }
179 Chars::Comma => {
180 self.current_token.token_type = TokenType::Comma;
181 self.current_token.literal = ch.to_string();
182 self.token_list.push(self.current_token.clone());
183 }
184 Chars::Quote => {
185 self.current_token.token_type = TokenType::String(StringType::SimpleString);
186 self.current_token.literal = "".to_string();
187 }
188 Chars::NewLine => {
189 self.line += 1;
190 }
191 Chars::MinusSign => {
192 self.current_token.token_type = TokenType::Int;
193 self.current_token.literal = ch.to_string();
194 self.token_list.push(self.current_token.clone());
195 }
196 Chars::Space => {}
197 Chars::Char(c) => {
198 if DIGITS.contains(c as char) {
199 self.current_token.token_type = TokenType::Int;
200 self.current_token.literal = ch.to_string();
201 } else if SIMPLE_ALPHA.contains(c as char) {
202 self.current_token.token_type = TokenType::ReservedString;
203 self.current_token.literal = ch.to_string();
204 } else {
205 return Err(LexerError::InvalidTokenError(ch.to_string()));
206 }
207 }
208 _ => {
209 return Err(LexerError::InvalidTokenError(ch.to_string()));
210 }
211 }
212 Ok(())
213 }
214
215 fn process_int_token(&mut self) -> Result<(), LexerError> {
216 let ch = self.read_char()?;
217 match ch {
218 Chars::Char(_) => {
219 if DIGITS.contains(ch.to_char()) {
220 self.current_token.token_type = TokenType::Int;
221 self.current_token.literal.push(ch.to_char());
222 } else {
223 return Err(LexerError::InvalidTokenError(ch.to_string()));
224 }
225 }
226 Chars::Dot => {
227 self.current_token.token_type = TokenType::Float;
228 self.current_token.literal.push('.');
229 }
230 _ => {
231 self.token_list.push(self.current_token.clone());
232 self.current_token = Token::default();
233 self.position -= 1;
234 }
235 }
236
237 Ok(())
238 }
239
240 fn process_string_token(&mut self) -> Result<(), LexerError> {
241 let ch = self.read_char()?;
242 match ch {
243 Chars::Char(c) => {
244 if SIMPLE_ALPHA.contains(c as char) || DIGITS.contains(c as char) {
245 self.current_token.literal.push(ch.to_char());
246 } else {
247 self.current_token.token_type = TokenType::String(StringType::ComplexString);
248 self.current_token.literal.push(ch.to_char());
249 }
250 }
251 Chars::Quote => {
252 self.token_list.push(self.current_token.clone());
253 self.current_token = Token::default();
254 }
255 _ => {
256 self.current_token.token_type = TokenType::String(StringType::ComplexString);
257 self.current_token.literal.push(ch.to_char());
258 }
259 }
260
261 Ok(())
262 }
263
264 fn process_reseved_string(&mut self) -> Result<(), LexerError> {
265 let ch = self.read_char()?;
266 match ch {
267 Chars::Char(c) => {
268 if SIMPLE_ALPHA.contains(c as char) || DIGITS.contains(c as char) {
269 self.current_token.literal.push(ch.to_char());
270 } else {
271 return Err(LexerError::InvalidTokenError(ch.to_string()));
272 }
273 }
274 _ => {
275 self.token_list.push(self.current_token.clone());
276 self.current_token = Token::default();
277 self.position -= 1;
278 }
279 }
280
281 Ok(())
282 }
283
284 fn process_float_token(&mut self) -> Result<(), LexerError> {
285 let ch = self.read_char()?;
286 match ch {
287 Chars::Char(_) => {
288 if DIGITS.contains(ch.to_char()) {
289 self.current_token.token_type = TokenType::Float;
290 self.current_token.literal.push(ch.to_char());
291 } else {
292 return Err(LexerError::InvalidTokenError(ch.to_string()));
293 }
294 }
295 _ => {
296 self.token_list.push(self.current_token.clone());
297 self.current_token = Token::default();
298 self.position -= 1;
299 }
300 }
301
302 Ok(())
303 }
304
305 fn process_next_token(&mut self) -> Result<(), LexerError> {
306 match self.current_token.token_type {
307 TokenType::None => {
308 self.process_start_token()?;
309 }
310 TokenType::Int => {
311 self.process_int_token()?;
312 }
313 TokenType::String(_) => {
314 self.process_string_token()?;
315 }
316 TokenType::ReservedString => {
317 self.process_reseved_string()?;
318 }
319 TokenType::Float => {
320 self.process_float_token()?;
321 }
322 _ => {
323 self.current_token = Token::default();
324 }
325 }
326
327 Ok(())
328 }
329
330 pub fn tokenize(&mut self) -> Result<Vec<Token>, LexerError> {
331 self.token_list = Vec::new();
332 self.current_token = Token::default();
333 while self.position < self.input.len() {
334 self.process_next_token()?
335 }
336
337 Ok(self.token_list.clone())
338 }
339}