tiny_json/
token.rs

1use std::fmt::{Display, Formatter};
2use crate::parser::Parser;
3use crate::util::*;
4
5#[derive(Eq, PartialEq, Copy, Clone)]
6pub enum TokenType {
7    BracesStart,
8    BracesEnd,
9    BracketsStart,
10    BracketsEnd,
11    Separator,
12    Comma,
13    String,
14    Number,
15    Word,
16    StartF,
17    EndF,
18    Unknown,
19}
20
21#[derive(Eq, PartialEq)]
22pub enum TokenTypeKind {
23    String,
24    Number,
25}
26
27// #[derive(Clone)]
28pub struct Token {
29    pub token_type: TokenType,
30    pub value: String,
31    pub start: usize,
32    pub end: usize,
33}
34
35impl Token {
36    pub fn new(token_type: TokenType) -> Self {
37        Token {
38            token_type,
39            value: String::from(""),
40            start: 0,
41            end: 0,
42        }
43    }
44
45    pub fn create(token_type: TokenType, value: &str, start: usize, end: usize) -> Self {
46        Token {
47            token_type,
48            value: value.to_string(),
49            start,
50            end,
51        }
52    }
53
54    pub fn to_string(&self) -> String {
55        format!("Token[{}] ({}, {})", self.value, self.start, self.end)
56    }
57}
58
59impl Clone for Token {
60    fn clone(&self) -> Self {
61        Token::create(
62            self.token_type,
63            &self.value.to_string(),
64            self.start,
65            self.end,
66        )
67    }
68}
69
70impl Parser {
71    pub fn move_next(&mut self) {
72        let token: Token;
73        self.last_token = self.current_token.clone();
74
75        let mut start_pos: i32 = -1;
76        while self.is_valid_pos() && start_pos != (self.pos as i32) {
77            start_pos = self.pos as i32;
78            self.skip_space();
79            self.skip_line_comment();
80            self.skip_block_comment();
81        }
82
83        if !self.is_valid_pos() {
84            token = Token::create(TokenType::EndF, &"EOF".to_string(), self.length, self.length);
85        } else {
86            token = self.read_token();
87        }
88        self.current_token = token;
89    }
90
91    pub fn read_token(&mut self) -> Token {
92        let code = self.get_current_code();
93        if is_number_char(code) || code == 45 { // 0-9 or '-'
94            return self.read_number_token();
95        } else if is_word_char(code) { // A-Z or a-z
96            return self.read_word_token();
97        }
98
99        let start_pos = self.pos;
100        match code {
101            123 => { // '{'
102                self.pos += 1;
103                Token::create(TokenType::BracesStart, "{", start_pos, self.pos)
104            }
105            125 => { // '}'
106                self.pos += 1;
107                Token::create(TokenType::BracesEnd, "}", start_pos, self.pos)
108            }
109            91 => { // '['
110                self.pos += 1;
111                Token::create(TokenType::BracketsStart, "[", start_pos, self.pos)
112            }
113            93 => { // ']'
114                self.pos += 1;
115                Token::create(TokenType::BracketsEnd, "]", start_pos, self.pos)
116            }
117            58 => { // ':'
118                self.pos += 1;
119                Token::create(TokenType::Separator, ":", start_pos, self.pos)
120            }
121            44 => { // ','
122                self.pos += 1;
123                Token::create(TokenType::Comma, ",", start_pos, self.pos)
124            }
125            34 => { // '"'
126                self.read_string_token()
127            }
128            _ => self.unexpected_pos(self.pos)
129        }
130    }
131
132    pub fn read_string_token(&mut self) -> Token {
133        let mut token = Token::new(TokenType::String);
134        token.start = self.pos;
135        self.pos += 1;
136
137        let mut chunk_start = self.pos;
138        let mut code: usize = 0;
139        let mut value = String::new();
140        let mut is_escape_char = false;
141
142        while self.is_valid_pos() {
143            if is_escape_char {
144                is_escape_char = false;
145                self.pos += 1;
146                continue;
147            }
148
149            code = self.get_current_code();
150            if code == 34 { // '"'
151                break;
152            }
153            if code == 92 { // '\': escape
154                is_escape_char = true;
155                value.push_str(&self.slice_str(chunk_start, self.pos));
156                self.pos += 1;
157                chunk_start = self.pos;
158            } else {
159                self.pos += 1;
160            }
161        }
162
163        if code != 34 { // '"'
164            self.unexpected_token_type_kind(&TokenTypeKind::String, self.pos);
165        }
166
167        value.push_str(&self.slice_str(chunk_start, self.pos));
168        self.pos += 1;
169
170        token.value = value.to_string();
171        token.end = self.pos;
172        token
173    }
174
175    pub fn read_number_token(&mut self) -> Token {
176        let chunk_start = self.pos;
177        let mut count = 0;
178        let mut allow_dot = false;
179        let mut allow_e = false;
180        let mut expect_a_number = true;
181        let mut code = self.get_current_code();
182
183        if code == 45 { // '-'
184            self.pos += 1;
185        }
186
187        while self.is_valid_pos() {
188            code = self.get_current_code();
189            count = count + 1;
190
191            if is_number_char(code) {
192                if count == 1 {
193                    allow_dot = true;
194                    allow_e = true;
195                }
196                expect_a_number = false;
197                self.pos += 1;
198            } else if expect_a_number {
199                break;
200            } else if allow_e && (code == 69 || code == 101) { // 'E' or 'e'
201                allow_e = false;
202                allow_dot = false;
203                expect_a_number = true;
204                self.pos += 1;
205                if self.is_valid_pos() && (self.get_current_code() == 43 || self.get_current_code() == 45) { // '+' or '-'
206                    self.pos += 1;
207                }
208            } else if allow_dot && code == 46 { // '.'
209                allow_dot = false;
210                expect_a_number = true;
211                self.pos += 1;
212            } else {
213                break;
214            }
215        }
216
217        // check
218        if expect_a_number {
219            self.unexpected_token_type_kind(&TokenTypeKind::Number, self.pos);
220        }
221
222        let value = &self.slice_str(chunk_start, self.pos);
223        Token::create(TokenType::Number, value, chunk_start, self.pos)
224    }
225
226    pub fn read_word_token(&mut self) -> Token {
227        let chunk_start = self.pos;
228        while self.is_valid_pos() && is_word_char(self.get_code_at(self.pos)) {
229            self.pos += 1;
230        }
231        let value = &self.slice_str(chunk_start, self.pos);
232        Token::create(TokenType::Word, value, chunk_start, self.pos)
233    }
234}