1use std::iter::Peekable;
7use std::str::Chars;
8
9use super::jsonfixer_error::{SyntaxError, JsonFixerError};
10
11#[derive(Debug, PartialEq, Clone)]
13pub struct Position {
14 pub line: usize,
16 pub column: usize,
18}
19
20#[derive(Debug, PartialEq, Clone)]
21pub enum Token {
22 LeftBrace(Position), RightBrace(Position), LeftBracket(Position), RightBracket(Position), Colon(Position), Comma(Position), String(String, Position), Number(String, Position), Boolean(bool, Position), Null(Position),
32 Whitespace(String, Position), UnquotedString(String, Position), }
35
36impl Token {
37 pub fn get(&self) -> String {
39 match self {
40 Self::LeftBrace(_) => "'{'".to_string(),
41 Self::RightBrace(_) => "'}'".to_string(),
42 Self::LeftBracket(_) => "'['".to_string(),
43 Self::RightBracket(_) => "']'".to_string(),
44 Self::Colon(_) => "':'".to_string(),
45 Self::Comma(_) => "','".to_string(),
46 Self::String(s, _) => format!("String({s})"),
47 Self::Number(n, _) => format!("Number({n})"),
48 Self::Boolean(b, _) => format!("Boolean({b})"),
49 Self::Null(_) => "null".to_string(),
50 Self::Whitespace(s, _) => format!("{}", s),
51 Self::UnquotedString(s, _) => format!("{}", s),
52 }
53 }
54 pub fn pos(&self) -> &Position {
55 match self {
56 Self::LeftBrace(pos) => pos,
57 Self::RightBrace(pos) => pos,
58 Self::LeftBracket(pos) => pos,
59 Self::RightBracket(pos) => pos,
60 Self::Colon(pos) => pos,
61 Self::Comma(pos) => pos,
62 Self::String(_, pos) => pos,
63 Self::Number(_, pos) => pos,
64 Self::Boolean(_, pos) => pos,
65 Self::Null(pos) => pos,
66 Self::Whitespace(_, pos) => pos,
67 Self::UnquotedString(_, pos) => pos,
68 }
69 }
70}
71
72pub struct JsonTokenizer<'a> {
74 input: Peekable<Chars<'a>>,
75 line: usize,
76 column: usize,
77}
78
79impl<'a> JsonTokenizer<'a> {
80 pub fn new(input: &'a str) -> Self {
82 Self {
83 input: input.chars().peekable(),
84 line: 1,
85 column: 0,
86 }
87 }
88
89 pub fn next_token(&mut self) -> Result<Option<Token>, JsonFixerError> {
95 if let Some(ch) = self.advance() {
96 match ch {
97 ch if ch.is_whitespace() => self.tokenize_whitespaces(ch).map(Some),
98 '{' => Ok(Some(Token::LeftBrace(self.current_position()))),
99 '}' => Ok(Some(Token::RightBrace(self.current_position()))),
100 '[' => Ok(Some(Token::LeftBracket(self.current_position()))),
101 ']' => Ok(Some(Token::RightBracket(self.current_position()))),
102 ':' => Ok(Some(Token::Colon(self.current_position()))),
103 ',' => Ok(Some(Token::Comma(self.current_position()))),
104 '\'' | '"' => self.tokenize_string(ch).map(Some),
105 '.' | '+' | '-' | '0'..='9' => self.tokenize_number(ch).map(Some),
106 'a'..='z' | 'A'..='Z' | '_' => self.tokenize_identifier(ch).map(Some),
107 ch => Err(JsonFixerError::Syntax(SyntaxError::UnexpectedCharacter(
108 ch,
109 Position {
110 line: self.line,
111 column: self.column,
112 },
113 ))),
114 }
115 } else {
116 Ok(None)
117 }
118 }
119
120 fn tokenize_whitespaces(&mut self, first_space: char) -> Result<Token, JsonFixerError> {
121 let start_pos = self.current_position();
122 let mut whitespaces = String::new();
123 whitespaces.push(first_space);
124
125 while let Some(next_ch) = self.input.peek() {
126 if !next_ch.is_whitespace() {
127 break;
128 }
129
130 whitespaces.push(self.advance().unwrap());
131 }
132
133 Ok(Token::Whitespace(whitespaces, start_pos))
134 }
135
136 fn peek(&mut self) -> Option<&char> {
137 self.input.peek()
138 }
139 fn advance(&mut self) -> Option<char> {
140 if let Some(ch) = self.input.next() {
141 self.column += 1;
142
143 if ch == '\n' {
144 self.line += 1;
145 self.column = 1;
146 }
147 Some(ch)
148 } else {
149 None
150 }
151 }
152 pub fn current_position(&self) -> Position {
153 Position {
154 line: self.line,
155 column: self.column,
156 }
157 }
158 fn tokenize_string(&mut self, quote_char: char) -> Result<Token, JsonFixerError> {
159 let start_pos = self.current_position();
160 let mut result = String::new();
161
162 while let Some(ch) = self.advance() {
163 match ch {
164 ch if ch == quote_char => return Ok(Token::String(result, start_pos)),
165 '\\' => {
166 if let Some(next_ch) = self.advance() {
167 match next_ch {
168 '"' | '\\' | '/' => result.push(next_ch),
169 'b' => result.push('\x08'), 'f' => result.push('\x0C'),
172 'n' => result.push('\n'),
173 'r' => result.push('\r'),
174 't' => result.push('\t'),
175 'u' => {
176 let mut hex = String::with_capacity(4);
178 for _ in 0..4 {
179 if let Some(h) = self.advance() {
180 hex.push(h);
181 }
182 }
183 if let Ok(code) = u32::from_str_radix(&hex, 16) {
184 if let Some(chr) = std::char::from_u32(code) {
185 result.push(chr);
186 }
187 }
188 }
189 _ => result.push(next_ch),
190 }
191 }
192 }
193 _ => result.push(ch),
194 }
195 }
196 Err(JsonFixerError::Syntax(SyntaxError::UnmatchedQuotes(
197 start_pos,
198 ))) }
200
201 fn tokenize_number(&mut self, first_char: char) -> Result<Token, JsonFixerError> {
202 let start_pos = self.current_position();
203 let mut number = String::from(first_char);
204
205 if first_char == '+' || first_char == '.' {
207 if let Some(next_char) = self.peek() {
209 if !next_char.is_digit(10) {
210 return Err(JsonFixerError::Syntax(SyntaxError::InvalidNumber(
211 number, start_pos,
212 )));
213 }
214 } else {
215 return Err(JsonFixerError::Syntax(SyntaxError::InvalidNumber(
216 number, start_pos,
217 )));
218 }
219
220 if first_char == '+' {
221 number.clear();
223 }
224
225 if first_char == '.' {
226 number.clear();
228 number.push('0');
229 number.push('.');
230 }
231 }
232
233 let mut multi_dots = false;
234 while let Some(&ch) = self.peek() {
235 if !ch.is_digit(10) && ch != '.' && ch != 'e' && ch != 'E' && ch != '+' && ch != '-' {
236 break;
237 }
238 if first_char == '.' && ch == '.' {
239 multi_dots = true;
241 }
242
243 number.push(self.advance().unwrap());
244 }
245
246 if multi_dots {
248 return Err(JsonFixerError::Syntax(SyntaxError::InvalidNumber(
249 number, start_pos,
250 )));
251 }
252
253 if number.chars().last().unwrap() == '.' {
254 number.pop();
256 }
257
258 Ok(Token::Number(number, self.current_position()))
259 }
260
261 fn tokenize_identifier(&mut self, first_char: char) -> Result<Token, JsonFixerError> {
262 let start_pos = self.current_position();
263 let mut ident = String::from(first_char);
264 while let Some(&ch) = self.input.peek() {
265 if !ch.is_alphanumeric() && ch != '_' {
266 break;
267 }
268
269 ident.push(self.advance().unwrap());
270 }
271
272 match ident.as_str() {
273 "true" => Ok(Token::Boolean(true, start_pos)),
274 "false" => Ok(Token::Boolean(false, start_pos)),
275 "null" => Ok(Token::Null(start_pos)),
276 _ => Ok(Token::UnquotedString(ident, start_pos)),
277 }
278 }
279}