json_fixer/jsonfixer/
jsonparser.rs

1//! A robust JSON parser and fixer that handles malformed JSON input.
2//!
3//! This module provides functionality to parse and fix JSON data that may be slightly malformed,
4//! such as missing commas, extra commas, or unquoted identifiers. It attempts to produce valid
5//! JSON output while maintaining the original data structure.
6
7use std::fmt::Write;
8
9use super::{
10    json_tokenizer::{JsonTokenizer, Token},
11    jsonfixer_config::JsonFixerConfig,
12    jsonfixer_error::{JsonFixerError, SyntaxError},
13    jsonformatter::{Formatter, JsonFormatter},
14};
15
16#[derive(Debug, Clone, PartialEq)]
17pub enum JsonValue {
18    Null,
19    Boolean(bool),
20    Number(String),
21    String(String),
22    Array(Vec<JsonEntryValue>),
23    Object(Vec<JsonEntryValue>),
24    Space(String),
25}
26
27/*
28************************** JsonParser *************************
29*/
30
31#[derive(Debug, Clone, PartialEq)]
32pub struct JsonEntryValue {
33    pub space_bf_key: Option<String>,
34    pub key: Option<String>,
35    pub space_af_key: Option<String>,
36    pub space_bf_val: Option<String>,
37    pub value: Option<JsonValue>,
38    pub space_af_val: Option<String>,
39}
40
41impl JsonEntryValue {
42    fn new() -> Self {
43        Self {
44            space_bf_key: None,
45            key: None,
46            space_af_key: None,
47            space_bf_val: None,
48            value: None,
49            space_af_val: None,
50        }
51    }
52
53    pub fn get_sp_bf_key(&self) -> String {
54        let sp = self.space_bf_key.clone();
55        sp.unwrap_or_default()
56    }
57    pub fn get_key(&self) -> String {
58        let key = self.key.clone();
59        key.unwrap_or_default()
60    }
61    pub fn get_sp_af_key(&self) -> String {
62        let sp = self.space_af_key.clone();
63        sp.unwrap_or_default()
64    }
65
66    pub fn get_value(&self) -> JsonValue {
67        let val = self.value.clone();
68        val.unwrap()
69    }
70    pub fn get_sp_bf_val(&self) -> String {
71        let sp = self.space_bf_val.clone();
72        sp.unwrap_or_default()
73    }
74    pub fn get_sp_af_val(&self) -> String {
75        let sp = self.space_af_val.clone();
76        sp.unwrap_or_default()
77    }
78}
79
80/// Internal parser that handles the actual JSON parsing and fixing.
81pub struct JsonParser<'a> {
82    tokenizer: JsonTokenizer<'a>,
83    current_token: Option<Token>,
84    config: JsonFixerConfig,
85}
86
87impl<'a> JsonParser<'a> {
88    /// Creates a new parser instance and advances to the first token.
89    pub fn new(input: &'a str, config: JsonFixerConfig) -> Self {
90        let mut parser = Self {
91            tokenizer: JsonTokenizer::new(input),
92            current_token: None,
93            config: config,
94        };
95
96        let _ = parser.advance();
97        parser
98    }
99
100    /// Advances to the next token in the input stream.
101    fn advance(&mut self) -> Result<(), JsonFixerError> {
102        self.current_token = self.tokenizer.next_token()?;
103
104        Ok(())
105    }
106
107    /// Parses the entire JSON input and returns the fixed JSON string.
108    pub fn parse(&mut self) -> Result<String, JsonFixerError> {
109        let mut output = String::new();
110        // Input can be whitespace-value-whitespace
111        // Handle white space if any
112        if let Some(Token::Whitespace(_sp, _)) = &self.current_token {
113            // Ignore spaces before an actual value
114            self.advance()?; // Consume spaces
115        }
116        let config = self.config.clone();
117        // Handle JsonValue
118        let value = self.parse_value()?;
119        self.advance()?; // Consume value
120
121        // Format the output
122        let formetter = JsonFormatter;
123        write!(output, "{}", formetter.format(&value, &config)?)
124            .map_err(|err| JsonFixerError::IO(err))?;
125
126        loop {
127            match &self.current_token {
128                Some(Token::Whitespace(_sp, _)) => {
129                    // Ignore spaces before an actual value
130                    self.advance()?; // Consume spaces
131                    continue;
132                }
133                Some(token) => {
134                    // Error if there is anything else after a value was found
135                    return Err(JsonFixerError::Syntax(SyntaxError::UnexpectedToken(
136                        format!("\nExpected  EOF but found {}", token.get()),
137                        token.pos().clone(),
138                    )));
139                }
140                None => break, // EOF
141            }
142        }
143
144        Ok(output)
145    }
146
147    /// Parses a JSON value (object, array, string, number, boolean, or null).
148    fn parse_value(&mut self) -> Result<JsonValue, JsonFixerError> {
149        match &self.current_token {
150            Some(Token::LeftBrace(_)) => self.parse_object(),
151            Some(Token::LeftBracket(_)) => self.parse_array(),
152            Some(Token::String(s, _)) => Ok(JsonValue::String(s.replace('"', "\\\""))),
153            Some(Token::Number(n, pos)) => {
154                let _result: f64 = n.parse().map_err(|_| {
155                    JsonFixerError::Syntax(SyntaxError::InvalidNumber(n.clone(), pos.clone()))
156                })?;
157
158                Ok(JsonValue::Number(n.to_string()))
159            }
160            Some(Token::Boolean(b, _)) => Ok(JsonValue::Boolean(*b)),
161            Some(Token::Null(_)) => Ok(JsonValue::Null),
162
163            Some(Token::UnquotedString(s, pos)) => {
164                //println!("Here....");
165                Err(JsonFixerError::Syntax(SyntaxError::UnexpectedToken(
166                    s.to_string(),
167                    pos.clone(),
168                )))
169            }
170            None => Err(JsonFixerError::Syntax(SyntaxError::UnexpectedEndOfInput(
171                self.tokenizer.current_position(),
172            ))),
173
174            // Should be reached
175            Some(unexpect_token) => {
176                //println!("There....");
177                Err(JsonFixerError::Syntax(SyntaxError::UnexpectedToken(
178                    unexpect_token.get(),
179                    unexpect_token.pos().clone(),
180                )))
181            }
182        }
183    }
184
185    /// Parses a JSON object, handling potential formatting issues.
186    /// Supports unquoted keys and trailing/multiple commas.
187    fn parse_object(&mut self) -> Result<JsonValue, JsonFixerError> {
188        let mut obj = Vec::new();
189        self.advance()?; // Consume {
190
191        //let go_next_token = true;
192        while !self.current_token.is_none() {
193            let mut entry = JsonEntryValue::new();
194            //println!("Obj: {:?}", obj);
195            //println!("Current_token: {:?}", &self.current_token);
196
197            match &self.current_token {
198                Some(Token::RightBrace(_)) => break,
199                Some(Token::Comma(_)) => {
200                    // Empty entry
201                    // Consume consecutive commas (e.g., {,,})
202                    self.advance()?;
203                    continue;
204                }
205                Some(Token::Whitespace(sp, _)) => {
206                    // Consume spaces before 'Key' if any
207                    entry.space_bf_key = Some(sp.to_string());
208                    self.advance()?;
209                }
210                _ => (),
211            }
212
213            // parse key
214            match &self.current_token {
215                Some(Token::RightBrace(_)) => {
216                    // Empty object with inside spaces eg. {   }
217                    entry.value = None;
218                    obj.push(entry);
219                    break;
220                }
221                Some(Token::Comma(_)) => {
222                    // Empty entry
223                    // Consume consecutive commas (e.g., {,,})
224                    entry.value = None;
225                    obj.push(entry);
226                    self.advance()?;
227                    continue;
228                }
229                Some(Token::String(k, _)) | Some(Token::UnquotedString(k, _)) => {
230                    entry.key = Some(k.to_string());
231
232                    self.advance()?; // Consume the key
233                }
234                token => {
235                    if let Some(t) = &token {
236                        return Err(JsonFixerError::Syntax(SyntaxError::UnexpectedToken(
237                            format!("\nExpected a 'Key' after '{}' but found {}", '{', t.get()),
238                            t.pos().clone(),
239                        )));
240                    } else {
241                        // Reach the EOF with no closing } a no key
242                        // Empty object with inside spaces and not closed eg. {
243                        entry.value = None;
244                        obj.push(entry);
245                        break;
246                    }
247                }
248            }
249
250            // Consume spaces before ':' if any
251            if let Some(Token::Whitespace(sp, _)) = &self.current_token {
252                entry.space_af_key = Some(sp.to_string());
253                self.advance()?;
254            }
255
256            // Expect colon
257            match &self.current_token {
258                Some(Token::Colon(_)) => {
259                    self.advance()?; // Consume the : 
260                }
261                Some(unexped_token) => {
262                    return Err(JsonFixerError::Syntax(SyntaxError::UnexpectedToken(
263                        format!(
264                            "\nExpected ':' after a 'key' but found {}",
265                            unexped_token.get()
266                        ),
267                        unexped_token.pos().clone(),
268                    )));
269                }
270                None => {
271                    // Unexpected end of the input
272                    return Err(JsonFixerError::Syntax(SyntaxError::UnexpectedEndOfInput(
273                        self.tokenizer.current_position(),
274                    )));
275                }
276            }
277
278            // Consume spaces before Value if any
279            if let Some(Token::Whitespace(sp, _)) = &self.current_token {
280                entry.space_bf_val = Some(sp.to_string());
281                self.advance()?;
282            }
283
284            // Parse value
285            entry.value = Some(self.parse_value()?);
286
287            // Consume spaces After Value if any
288            if let Some(Token::Whitespace(sp, _)) = &self.current_token {
289                entry.space_af_val = Some(sp.to_string());
290                self.advance()?;
291            }
292
293            self.advance()?;
294            // Push the entry
295            obj.push(entry);
296        }
297
298        self.advance()?; // Consume }
299        Ok(JsonValue::Object(obj))
300    }
301
302    /// Parses a JSON array, handling trailing/multiple commas.
303    fn parse_array(&mut self) -> Result<JsonValue, JsonFixerError> {
304        let mut arr = Vec::new();
305        self.advance()?; // Consume [
306
307        while !self.current_token.is_none() {
308            let mut entry = JsonEntryValue::new();
309
310            match &self.current_token {
311                Some(Token::RightBracket(_)) => break, // Empty array without spaces
312                Some(Token::Comma(_)) => {
313                    // Consume consecutive commas (e.g., [,,])
314                    self.advance()?;
315                    continue;
316                }
317                Some(Token::Whitespace(sp, _)) => {
318                    // Consume spaces
319                    entry.space_bf_val = Some(sp.to_string());
320                    self.advance()?;
321                }
322                _ => (),
323            }
324
325            match &self.current_token {
326                Some(Token::RightBracket(_)) => {
327                    // Empty array with spaces inside it
328                    entry.value = None;
329                    arr.push(entry);
330                    break;
331                }
332                Some(Token::Comma(_)) => {
333                    // Empty array with spaces inside it and commas
334                    // Consume consecutive commas (e.g., [,,])
335                    entry.value = None;
336                    arr.push(entry);
337                    self.advance()?;
338                    continue;
339                }
340                _ => {
341                    //println!("current_token : {:?}", self.current_token);
342                    // Get the value
343                    let curr_t = self.current_token.clone();
344                    entry.value = Some(self.parse_value()?);
345
346                    // Primitive value needs to be consumed after parse value
347                    if curr_t == self.current_token {
348                        self.advance()?;
349                    }
350
351                    // Consume spaces After Value if any
352                    if let Some(Token::Whitespace(sp, _)) = &self.current_token {
353                        entry.space_af_val = Some(sp.to_string());
354                        self.advance()?;
355                    }
356
357                    arr.push(entry);
358                }
359            }
360        }
361
362        self.advance()?; // Consume ]
363
364        Ok(JsonValue::Array(arr))
365    }
366}