partial_json_fixer/
lib.rs

1//! Partial JSON fixer
2//!
3//! This is a zero dependency partial json fixer.
4//! It is very lenient, and will accept some erroneous JSON too. For example, {key: "value"} would be valid.
5//!
6//! This can be used to parse partial json coming from a stream.
7
8use std::{fmt::Display, str::CharIndices};
9
10/// Takes a partial JSON string, kinda parses it and returns a complete JSON object
11/// The JSON is tokenized and parsed. It can then be converted to a string with `.to_string()`
12/// method
13pub fn fix_json_parse(partial_json: &str) -> JResult<JsonValue> {
14    let tokenizer = JsonTokenizer::new(partial_json);
15    let parser = JsonParser::new(tokenizer);
16
17    let value = parser.parse()?;
18    Ok(value)
19}
20
21/// Takes a partial JSON string, kinda parses it and returns a complete JSON string.
22/// This function keeps the JSON as a string, goes through it and analyzes the brackets, strings,
23/// etc, to determine the missing stuff, and adds it.
24/// This approach is likely faster than the parsing appraoch (TODO: benchmmark maybe)
25/// It's assumed that the given JSON would **always** be a valid incomplete JSON.
26pub fn fix_json(partial_json: &str) -> String {
27    enum Wrapper {
28        Brace,
29        SquareBracket,
30        Quote,
31        Escape,
32        ObjectKey,
33        ObjectValue,
34    }
35    let chars = partial_json.chars();
36    let mut wrappers = vec![];
37    for c in chars {
38        match wrappers.last() {
39            Some(Wrapper::Quote) => {
40                if c == '"' {
41                    wrappers.pop();
42                } else if c == '\\' {
43                    wrappers.push(Wrapper::Escape);
44                }
45            }
46            Some(Wrapper::Escape) => {
47                wrappers.pop(); // get out of escape mode 
48            }
49            _ => {
50                match c {
51                    '{' => {
52                        wrappers.push(Wrapper::Brace);
53                    }
54                    '}' => {
55                        wrappers.pop(); // we assume it's correct JSON
56                        if matches!(wrappers.last(), Some(Wrapper::ObjectValue)) {
57                            wrappers.pop();
58                        }
59                    }
60                    '[' => {
61                        wrappers.push(Wrapper::SquareBracket);
62                    }
63                    ']' => {
64                        wrappers.pop();
65                        if matches!(wrappers.last(), Some(Wrapper::ObjectValue)) {
66                            wrappers.pop();
67                        }
68                    }
69                    '"' => {
70                        if matches!(wrappers.last(), Some(Wrapper::Brace)) {
71                            wrappers.push(Wrapper::ObjectKey);
72                        } else if matches!(wrappers.last(), Some(Wrapper::ObjectValue)) {
73                            wrappers.pop();
74                        }
75                        wrappers.push(Wrapper::Quote);
76                    },
77                    ':' => {
78                        wrappers.pop(); // pop ObjectKey
79                        wrappers.push(Wrapper::ObjectValue);
80                    },
81                    ',' => {
82                        if matches!(wrappers.last(), Some(Wrapper::ObjectValue)) {
83                            wrappers.pop();
84                        }
85                    },
86                    w if w.is_whitespace() => {},
87                    _ => {
88                        // non whitespace
89                        if matches!(wrappers.last(), Some(Wrapper::ObjectValue)) {
90                            wrappers.pop();
91                        }
92                    }
93                }
94            }
95        }
96    }
97
98    let end_index = if partial_json.trim_end().ends_with(',') {
99        partial_json.rfind(',').unwrap()
100    } else {
101        partial_json.len()
102    };
103
104    let mut final_json = partial_json[0..end_index].to_string();
105    while let Some(wrapper) = wrappers.pop() {
106        match wrapper {
107            Wrapper::Brace => {
108                final_json.push('}');
109                if matches!(wrappers.last(), Some(Wrapper::ObjectValue)) {
110                    wrappers.pop();
111                }
112            },
113            Wrapper::SquareBracket => {
114                final_json.push(']');
115                if matches!(wrappers.last(), Some(Wrapper::ObjectValue)) {
116                    wrappers.pop();
117                }
118            },
119            Wrapper::Quote => {
120                final_json.push('"');
121                if matches!(wrappers.last(), Some(Wrapper::ObjectValue)) {
122                    wrappers.pop();
123                }
124            },
125            Wrapper::Escape => {
126                final_json.push('\\');
127            },
128            Wrapper::ObjectKey => {
129                final_json.push_str(": null");
130            },
131            Wrapper::ObjectValue => {
132                final_json.push_str(" null");
133            },
134        }
135    }
136
137    // todo: remove traiiling comma
138
139    final_json
140}
141
142struct JsonParser<'a> {
143    tokenizer: JsonTokenizer<'a>,
144}
145
146impl<'a> JsonParser<'a> {
147    fn new(tokenizer: JsonTokenizer<'a>) -> Self {
148        Self { tokenizer }
149    }
150
151    fn parse(mut self) -> JResult<JsonValue<'a>> {
152        let (_errors, value) = self.parse_value()?;
153        Ok(value)
154    }
155
156    fn parse_value(&mut self) -> JResult<(Vec<JsonError>, JsonValue<'a>)> {
157        let token = self.tokenizer.next().ok_or(JsonError::UnexpectedEnd)?;
158
159        match token.kind {
160            JsonTokenKind::Null | JsonTokenKind::String | JsonTokenKind::Number => {
161                Ok((vec![], JsonValue::Unit(self.token_as_unit(&token))))
162            }
163            JsonTokenKind::OpeningBrace => Ok((vec![], JsonValue::Object(self.parse_object()?))),
164            JsonTokenKind::OpeningSquareBracket => {
165                Ok((vec![], JsonValue::Array(self.parse_array()?)))
166            }
167            JsonTokenKind::Comma
168            | JsonTokenKind::Colon
169            | JsonTokenKind::ClosingBrace
170            | JsonTokenKind::ClosingSquareBracket => Err(JsonError::ExpectedToken {
171                got: token,
172                expected: None,
173            }),
174        }
175    }
176
177    fn token_as_unit(&self, token: &JsonToken) -> JsonUnit<'a> {
178        let source = self.tokenizer.span_source(&token);
179        if source.starts_with("\"") {
180            return JsonUnit::String(source.trim_matches('"'));
181        }
182        if source == "true" {
183            return JsonUnit::True;
184        }
185        if source == "false" {
186            return JsonUnit::False;
187        }
188        if source.parse::<isize>().is_ok() {
189            return JsonUnit::Number(source);
190        }
191        return JsonUnit::Null;
192    }
193
194    fn parse_unit(&mut self) -> JResult<JsonUnit<'a>> {
195        let t = self.tokenizer.next().ok_or(JsonError::UnexpectedEnd)?;
196        match t.kind {
197            JsonTokenKind::String | JsonTokenKind::Number => Ok(self.token_as_unit(&t)),
198            _ => Err(JsonError::ExpectedToken {
199                got: t,
200                expected: None,
201            }),
202        }
203    }
204
205    fn parse_array(&mut self) -> JResult<JsonArray<'a>> {
206        let mut members = vec![];
207        loop {
208            if self.tokenizer.is_next_closing_square_bracket() || self.tokenizer.is_on_last() {
209                break;
210            }
211            if let Ok((_errors, value)) = self.parse_value() {
212                members.push(value);
213
214                match self.tokenizer.next() {
215                    Some(token) if matches!(token.kind, JsonTokenKind::ClosingSquareBracket) => {
216                        break;
217                    }
218                    Some(token) if matches!(token.kind, JsonTokenKind::Comma) => {}
219                    Some(token) => {
220                        return Err(JsonError::ExpectedToken {
221                            got: token,
222                            expected: Some(JsonTokenKind::ClosingSquareBracket),
223                        })
224                    }
225                    None => {}
226                }
227            } else {
228                break;
229            }
230        }
231        Ok(JsonArray { members })
232    }
233
234    fn parse_object(&mut self) -> JResult<JsonObject<'a>> {
235        let mut values = vec![];
236        loop {
237            if self.tokenizer.is_next_closing_brace() || self.tokenizer.is_on_last() {
238                break;
239            }
240            let key = self.parse_unit();
241            if key.is_err() {
242                break;
243            }
244            let key = key.unwrap();
245            // parse colon
246            if self.tokenizer.next().is_none() {
247                values.push((key, JsonValue::Null));
248                break;
249            }
250            let value = self.parse_value();
251            if value.is_err() {
252                values.push((key, JsonValue::Null));
253                break;
254            }
255            let (_errors, value) = value.unwrap();
256            values.push((key, value));
257
258            match self.tokenizer.next() {
259                Some(token) if matches!(token.kind, JsonTokenKind::ClosingBrace) => {
260                    break;
261                }
262                Some(token) if matches!(token.kind, JsonTokenKind::Comma) => {}
263                Some(token) => {
264                    return Err(JsonError::ExpectedToken {
265                        got: token,
266                        expected: Some(JsonTokenKind::ClosingBrace),
267                    })
268                }
269                None => {}
270            }
271        }
272        Ok(JsonObject { values })
273    }
274}
275
276type JResult<T> = Result<T, JsonError>;
277
278#[derive(Debug)]
279pub enum JsonError {
280    UnexpectedEnd,
281    ExpectedToken {
282        got: JsonToken,
283        expected: Option<JsonTokenKind>,
284    },
285}
286impl std::error::Error for JsonError {}
287
288impl Display for JsonError {
289    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
290        match self {
291            JsonError::UnexpectedEnd => write!(f, "Unexpected end of input"),
292            JsonError::ExpectedToken { got, expected } => {
293                if let Some(expected) = expected {
294                    write!(
295                        f,
296                        "Expected token {:?} at char {}, got {:?}",
297                        expected, got.span.start, got.kind
298                    )
299                } else {
300                    write!(
301                        f,
302                        "Unexpected token {:?} at char {}",
303                        got.kind, got.span.start
304                    )
305                }
306            }
307        }
308    }
309}
310
311#[derive(Debug)]
312pub enum JsonValue<'a> {
313    Array(JsonArray<'a>),
314    Object(JsonObject<'a>),
315    Unit(JsonUnit<'a>),
316    Null,
317}
318
319impl<'a> Display for JsonValue<'a> {
320    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
321        match self {
322            JsonValue::Unit(unit) => {
323                write!(f, "{unit}")
324            }
325            JsonValue::Object(object) => write!(f, "{object}"),
326            JsonValue::Array(array) => write!(f, "{array}"),
327            JsonValue::Null => write!(f, "null"),
328        }
329    }
330}
331
332#[derive(Debug)]
333pub struct JsonArray<'a> {
334    pub members: Vec<JsonValue<'a>>,
335}
336
337impl<'a> Display for JsonArray<'a> {
338    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
339        write!(
340            f,
341            "[{}]",
342            self.members
343                .iter()
344                .map(|m| m.to_string())
345                .collect::<Vec<String>>()
346                .join(", ")
347        )
348    }
349}
350
351#[derive(Debug)]
352pub struct JsonObject<'a> {
353    pub values: Vec<(JsonUnit<'a>, JsonValue<'a>)>,
354}
355impl<'a> Display for JsonObject<'a> {
356    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
357        write!(
358            f,
359            "{{{}}}",
360            self.values
361                .iter()
362                .map(|(key, value)| format!("{}: {}", key, value))
363                .collect::<Vec<String>>()
364                .join(", ")
365        )
366    }
367}
368
369#[derive(Debug)]
370pub enum JsonUnit<'a> {
371    Null,
372    Number(&'a str),
373    String(&'a str),
374    True,
375    False,
376}
377
378impl<'a> Display for JsonUnit<'a> {
379    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
380        match self {
381            Self::True => write!(f, "true"),
382            Self::False => write!(f, "false"),
383            Self::Null => write!(f, "null"),
384            Self::Number(n) => write!(f, "{n}"),
385            Self::String(s) => write!(f, "\"{s}\""),
386        }
387    }
388}
389
390struct JsonTokenizer<'a> {
391    source: &'a str,
392    char_indices: CharIndices<'a>,
393}
394
395impl<'a> JsonTokenizer<'a> {
396    fn new(source: &'a str) -> Self {
397        let char_indices = source.char_indices();
398        Self {
399            source,
400            char_indices,
401        }
402    }
403
404    fn span_source(&self, token: &JsonToken) -> &'a str {
405        &self.source[token.span.start..token.span.end]
406    }
407
408    fn skip_whitespace_and_next(&mut self) -> Option<(usize, char)> {
409        let mut it_clone = self.char_indices.clone();
410        let mut v = it_clone.next();
411        while let Some((_i, c)) = v {
412            if !c.is_whitespace() {
413                break;
414            }
415            v = it_clone.next();
416        }
417        self.char_indices = it_clone;
418        v
419    }
420
421    fn consume_number_or_null(&mut self, first_index: usize) -> Option<JsonToken> {
422        let mut it_clone = self.char_indices.clone();
423        let mut last_index = first_index;
424        loop {
425            if let Some((i, c)) = it_clone.next() {
426                last_index = i;
427                if !c.is_alphanumeric() {
428                    break;
429                }
430                self.char_indices.next();
431            } else {
432                last_index += 1;
433                self.char_indices.next();
434                break;
435            }
436        }
437        // todo: consider failure case?
438        Some(JsonToken {
439            kind: JsonTokenKind::Number,
440            span: Span {
441                start: first_index,
442                end: last_index,
443            },
444        })
445    }
446
447    fn is_next_closing_brace(&self) -> bool {
448        let mut it_clone = self.char_indices.clone();
449        it_clone.next().is_some_and(|(_i, c)| c == '}')
450    }
451
452    fn is_next_closing_square_bracket(&self) -> bool {
453        let mut it_clone = self.char_indices.clone();
454        it_clone.next().is_some_and(|(_i, c)| c == ']')
455    }
456
457    fn is_on_last(&self) -> bool {
458        let mut it_clone = self.char_indices.clone();
459        it_clone.next().is_some() && it_clone.next().is_none()
460    }
461
462    fn next(&mut self) -> Option<JsonToken> {
463        let (i, c) = self.skip_whitespace_and_next()?;
464
465        let t = match c {
466            '{' => Some(JsonTokenKind::OpeningBrace),
467            '}' => Some(JsonTokenKind::ClosingBrace),
468            '[' => Some(JsonTokenKind::OpeningSquareBracket),
469            ']' => Some(JsonTokenKind::ClosingSquareBracket),
470            ',' => Some(JsonTokenKind::Comma),
471            ':' => Some(JsonTokenKind::Colon),
472            _ => None,
473        };
474        if t.is_some() {
475            return t.map(|t| JsonToken {
476                kind: t,
477                span: Span {
478                    start: i,
479                    end: i + 1,
480                },
481            });
482        }
483
484        if c == '"' {
485            // i need to consume the whole string
486            let mut previous_char = None;
487            let mut string_end_index = i + c.len_utf8();
488            for (i, str_char) in self.char_indices.by_ref() {
489                string_end_index = i + str_char.len_utf8();
490                if str_char == '"' {
491                    if let Some('\\') = previous_char {
492                    } else {
493                        break;
494                    }
495                }
496                previous_char = Some(str_char);
497            }
498            return Some(JsonToken {
499                kind: JsonTokenKind::String,
500                span: Span {
501                    start: i,
502                    end: string_end_index,
503                },
504            });
505        };
506        // let's just assume it's a number if nothing else
507        self.consume_number_or_null(i)
508    }
509}
510
511#[derive(Clone, Copy, Debug)]
512pub struct Span {
513    start: usize,
514    end: usize,
515}
516
517#[derive(Clone, Copy, Debug)]
518pub struct JsonToken {
519    kind: JsonTokenKind,
520    span: Span,
521}
522
523#[derive(Clone, Copy, Debug)]
524pub enum JsonTokenKind {
525    OpeningBrace,
526    ClosingBrace,
527    OpeningSquareBracket,
528    ClosingSquareBracket,
529    Comma,
530    Colon,
531    String,
532    Number,
533    Null,
534}