toy_json_formatter/
lib.rs

1#![doc = include_str!("../README.md")]
2
3mod tokenizer;
4use parser::{JsonParser, Value};
5use tokenizer::{JsonParseErr, JsonTokenKind, JsonTokenizer};
6pub mod parser;
7
8use std::error::Error;
9
10pub struct FormatOptions<'a> {
11    /// Compact mode removes all whitespace
12    pub compact: bool,
13    /// This string will be used as the indentation string
14    pub indent_str: &'a str,
15}
16
17pub fn format(json: &str, options: Option<FormatOptions<'_>>) -> (String, Vec<Box<dyn Error>>) {
18    let mut result = String::new();
19    let tokenizer = JsonTokenizer::new(json);
20    let mut errs = Vec::new();
21    let mut indent = 0;
22    let mut previous = None;
23
24    let chosen_options = options.unwrap_or_else(|| FormatOptions {
25        compact: false,
26        indent_str: "\t",
27    });
28
29    let compact_mode = chosen_options.compact;
30    let indent_str = chosen_options.indent_str;
31
32    for token in tokenizer.into_iter() {
33        match token {
34            Err(err) => match &err {
35                JsonParseErr::UnexpectedCharacters(span) => {
36                    result.push_str(&json[span.as_range()]);
37                    errs.push(err);
38                }
39                JsonParseErr::TrailingComma(position) => {
40                    result.push_str(&json[position.byte_index()..(position.byte_index() + 1)])
41                }
42                _ => {
43                    errs.push(err);
44                }
45            },
46            Ok(token) => {
47                match token.kind {
48                    JsonTokenKind::ObjectStart => {
49                        if !compact_mode {
50                            if let Some(JsonTokenKind::ObjectStart | JsonTokenKind::ArrayStart) =
51                                previous
52                            {
53                                result.push('\n');
54                                for _ in 0..indent {
55                                    result.push_str(indent_str);
56                                }
57                            }
58                        }
59                        indent += 1;
60                        result.push('{');
61                    }
62                    JsonTokenKind::ObjectEnd => {
63                        if indent > 0 {
64                            indent -= 1;
65                        }
66                        if let Some(JsonTokenKind::ObjectStart) = previous {
67                            result.push('}');
68                        } else {
69                            if !compact_mode {
70                                result.push('\n');
71                                for _ in 0..indent {
72                                    result.push_str(indent_str);
73                                }
74                            }
75                            result.push('}');
76                        }
77                    }
78                    JsonTokenKind::ArrayStart => {
79                        if !compact_mode {
80                            if let Some(JsonTokenKind::ObjectStart | JsonTokenKind::ArrayStart) =
81                                previous
82                            {
83                                result.push('\n');
84                                for _ in 0..indent {
85                                    result.push_str(indent_str);
86                                }
87                            }
88                        }
89                        indent += 1;
90                        result.push('[');
91                    }
92                    JsonTokenKind::ArrayEnd => {
93                        if indent > 0 {
94                            indent -= 1;
95                        }
96                        if let Some(JsonTokenKind::ArrayStart) = previous {
97                            result.push(']');
98                        } else {
99                            if !compact_mode {
100                                result.push('\n');
101                                for _ in 0..indent {
102                                    result.push_str(indent_str);
103                                }
104                            }
105                            result.push(']');
106                        }
107                    }
108                    JsonTokenKind::Colon => {
109                        result.push(':');
110                        if !compact_mode {
111                            result.push(' ');
112                        }
113                    }
114                    JsonTokenKind::Comma => {
115                        result.push(',');
116                        if !compact_mode {
117                            result.push('\n');
118                            for _ in 0..indent {
119                                result.push_str(indent_str);
120                            }
121                        }
122                    }
123                    JsonTokenKind::String => {
124                        if !compact_mode {
125                            if let Some(JsonTokenKind::ObjectStart | JsonTokenKind::ArrayStart) =
126                                previous
127                            {
128                                result.push('\n');
129                                for _ in 0..indent {
130                                    result.push_str(indent_str);
131                                }
132                            }
133                        }
134                        result.push_str(&json[token.span.as_range()]);
135                    }
136                    JsonTokenKind::Number => {
137                        if let Some(JsonTokenKind::ObjectStart | JsonTokenKind::ArrayStart) =
138                            previous
139                        {
140                            if !compact_mode {
141                                result.push('\n');
142                                for _ in 0..indent {
143                                    result.push_str(indent_str);
144                                }
145                            }
146                        }
147                        result.push_str(&json[token.span.as_range()])
148                    }
149                    JsonTokenKind::True => {
150                        if !compact_mode {
151                            if let Some(JsonTokenKind::ObjectStart | JsonTokenKind::ArrayStart) =
152                                previous
153                            {
154                                result.push('\n');
155                                for _ in 0..indent {
156                                    result.push_str(indent_str);
157                                }
158                            }
159                        }
160                        result.push_str("true")
161                    }
162                    JsonTokenKind::False => {
163                        if !compact_mode {
164                            if let Some(JsonTokenKind::ObjectStart | JsonTokenKind::ArrayStart) =
165                                previous
166                            {
167                                result.push('\n');
168                                for _ in 0..indent {
169                                    result.push_str(indent_str);
170                                }
171                            }
172                        }
173                        result.push_str("false")
174                    }
175                    JsonTokenKind::Null => {
176                        if !compact_mode {
177                            if let Some(JsonTokenKind::ObjectStart | JsonTokenKind::ArrayStart) =
178                                previous
179                            {
180                                result.push('\n');
181                                for _ in 0..indent {
182                                    result.push_str(indent_str);
183                                }
184                            }
185                        }
186                        result.push_str("null");
187                    }
188                }
189                // we only need this for whitespace decisions, so skip if in compact mode
190                if !compact_mode {
191                    previous = Some(token.kind);
192                }
193            }
194        }
195    }
196
197    (
198        result,
199        if !errs.is_empty() {
200            errs.into_iter()
201                .map(|err| Box::new(err) as Box<dyn Error>)
202                .collect()
203        } else {
204            Vec::with_capacity(0)
205        },
206    )
207}
208
209pub fn parse<'json>(json: &'json str) -> (Value<'json>, Vec<Box<dyn Error>>) {
210    let (value, errs) = JsonParser::parse(json);
211
212    (
213        value,
214        errs.into_iter()
215            .map(|err| Box::new(err) as Box<dyn Error>)
216            .collect(),
217    )
218}
219
220#[derive(Debug, Clone, Copy, Eq, PartialEq)]
221pub(crate) enum JsonParseState {
222    Object,
223    Array,
224    KeyValuePairColon,
225    KeyValuePairKey,
226    Value,
227    AfterValue,
228}
229
230#[cfg(test)]
231mod tests {
232    use std::{
233        borrow::Cow,
234        io::{BufRead, Write},
235    };
236
237    use crate::{
238        parser::JsonString,
239        tokenizer::{JsonParseErr, JsonTokenizer},
240        FormatOptions,
241    };
242
243    // #[test]
244    #[allow(dead_code)]
245    fn test_bench() {
246        loop {
247            println!("Enter your JSON:");
248            let mut stdin = std::io::stdin().lock();
249            let mut input = String::new();
250            stdin
251                .read_line(&mut input)
252                .expect("Failed to read from stdin.");
253
254            if input.trim().is_empty() {
255                break;
256            }
257
258            let output = super::format(&input, None);
259            let mut stdout = std::io::stdout();
260            stdout
261                .write_all(output.0.as_bytes())
262                .expect("Failed to write to stdout.");
263
264            // If this doesn't write correctly, we don't care. Just ignore the length.
265            let _ = stdout.write(b"\n").expect("Failed to write to stdout");
266            if !output.1.is_empty() {
267                let mut stdout = std::io::stdout();
268                for err in output.1 {
269                    writeln!(stdout, "{}", err).expect("Failed to write to stdout.");
270                }
271                stdout.flush().expect("Failed to flush to stdout.");
272            }
273        }
274    }
275
276    #[test]
277    fn escape_sequences() {
278        let str = JsonString::unescape("\\\\\\u0020\\\"\\b\\f\\n\\t\\r");
279        let expected = "\\ \"\u{0008}\u{000c}\n\t\r";
280        assert_eq!(
281            Cow::Owned::<'static, str>(expected.to_string()),
282            str.to_owned()
283        );
284    }
285
286    #[test]
287    fn test_json() {
288        let str = include_str!("../../bad_test2.json");
289        for result in JsonTokenizer::new(str) {
290            match result {
291                Ok(token) => {
292                    println!("{:?}", token.span.as_range());
293                    print!("{:?} ", token.kind);
294                    println!("/{}/", &str[token.span.as_range()]);
295                }
296                Err(err) => match err {
297                    JsonParseErr::UnexpectedCharacters(span) => {
298                        println!("Unexpected chars: {}", &str[span.as_range()]);
299                    }
300                    JsonParseErr::InvalidUnicodeEscapeSequence(span) => {
301                        println!("Invalid unicode escape sequence: {}", &str[span.as_range()])
302                    }
303                    _ => {
304                        println!("{:?}", err);
305                    }
306                },
307            }
308        }
309
310        let output = super::parse(str).0.to_string_pretty();
311        println!("{}", output);
312
313        println!("{}", super::parse("[true,[]]").0.to_string_pretty());
314    }
315
316    #[test]
317    fn doesnt_lose_chars_when_brackets_are_involved() {
318        let input = "{\"test\": \"value\",[]]}";
319        let (output, _) = super::format(input, None);
320        assert_eq!("{\n\t\"test\": \"value\",\n\t[]]\n}", output);
321
322        let input = "[[\"test\",}]]";
323        let (output, _) = super::format(input, None);
324        assert_eq!("[\n\t[\n\t\t\"test\",}\n\t]\n]", output);
325
326        let input = "{{{}}";
327        let (output, _) = super::format(
328            input,
329            Some(FormatOptions {
330                compact: true,
331                indent_str: "\t",
332            }),
333        );
334        assert_eq!("{{{}}", output);
335
336        let input = "[[[]]";
337        let (output, _) = super::format(
338            input,
339            Some(FormatOptions {
340                compact: true,
341                indent_str: "\t",
342            }),
343        );
344        assert_eq!("[[[]]", output);
345
346        let input = "}}}";
347        let (output, _) = super::format(
348            input,
349            Some(FormatOptions {
350                compact: true,
351                indent_str: "\t",
352            }),
353        );
354        assert_eq!("}}}", output);
355
356        let input = "{{{";
357        let (output, _) = super::format(
358            input,
359            Some(FormatOptions {
360                compact: true,
361                indent_str: "\t",
362            }),
363        );
364        assert_eq!("{{{", output);
365
366        let input = "]]]";
367        let (output, _) = super::format(
368            input,
369            Some(FormatOptions {
370                compact: true,
371                indent_str: "\t",
372            }),
373        );
374        assert_eq!("]]]", output);
375
376        let input = "[[[";
377        let (output, _) = super::format(
378            input,
379            Some(FormatOptions {
380                compact: true,
381                indent_str: "\t",
382            }),
383        );
384        assert_eq!("[[[", output);
385
386        let input = "{{}}}";
387        let (output, _) = super::format(
388            input,
389            Some(FormatOptions {
390                compact: true,
391                indent_str: "\t",
392            }),
393        );
394        assert_eq!("{{}}}", output);
395
396        let input = "[[]]]";
397        let (output, _) = super::format(
398            input,
399            Some(FormatOptions {
400                compact: true,
401                indent_str: "\t",
402            }),
403        );
404        assert_eq!("[[]]]", output);
405    }
406
407    #[test]
408    fn handles_simple_inputs_correctly() {
409        let input = "test";
410        let (output, _) = super::format(input, None);
411        assert_eq!("test", output);
412
413        let input = "\"test";
414        let (output, _) = super::format(input, None);
415        assert_eq!("\"test", output);
416
417        let input = "test\"";
418        let (output, _) = super::format(input, None);
419        assert_eq!("test\"", output);
420    }
421
422    #[test]
423    fn parses_valid_json() {
424        let input = "[[[[[]]]]]";
425        let (output, _) = super::parse(input);
426        assert_eq!("[[[[[]]]]]", output.to_string());
427
428        let input = "{\"key\":{\"key\":{\"key\":{\"key\":{}}}}}";
429        let (output, _) = super::parse(input);
430        assert_eq!(
431            "{\"key\":{\"key\":{\"key\":{\"key\":{}}}}}",
432            output.to_string()
433        );
434
435        let input = "\"test\"";
436        let (output, _) = super::parse(input);
437        assert_eq!("\"test\"", output.to_string());
438
439        let input = "true";
440        let (output, _) = super::parse(input);
441        assert_eq!("true", output.to_string());
442
443        let input = "false";
444        let (output, _) = super::parse(input);
445        assert_eq!("false", output.to_string());
446
447        let input = "null";
448        let (output, _) = super::parse(input);
449        assert_eq!("null", output.to_string());
450
451        let input = r#"{
452    "list": [
453        true,
454        false,
455        null,
456        "hello world!",
457        2.312812e-1283,
458        {},
459        {
460            "key": null,
461            "key2": true,
462            "key3": false,
463            "key4": "hello, world!",
464            "key5": [],
465            "key6": {}
466            "key7": 2.312812e-1283,
467        }
468    ]
469}"#;
470
471        let (output, _) = super::parse(input);
472        assert_eq!(
473            r#"{"list":[true,false,null,"hello world!",2.312812e-1283,{},{"key":null,"key2":true,"key3":false,"key4":"hello, world!","key5":[],"key6":{},"key7":2.312812e-1283}]}"#,
474            output.to_string()
475        );
476    }
477
478    #[test]
479    fn handles_key_collisions() {
480        let input = r#"{"key": "value", "key": "test", "key0": "other_test"}"#;
481        let (output, _) = super::parse(input);
482        assert_eq!(
483            "{\"key\":\"value\",\"key0\":\"test\",\"key00\":\"other_test\"}",
484            output.to_string()
485        );
486    }
487
488    #[test]
489    fn double_escapes_bad_escape_sequences() {
490        let input = "\"\\h\\k\\u012\\\"\\u0020\"";
491        let (output, _) = super::parse(input);
492        assert_eq!("\"\\\\h\\\\k\\\\u012\\\"\\u0020\"", output.to_string());
493    }
494
495    #[test]
496    fn removes_leading_zeroes() {
497        let input = "00000000001.0123e-123";
498        let (output, _) = super::parse(input);
499        assert_eq!("1.0123e-123", output.to_string());
500
501        let input = "0000000000000.123";
502        let (output, _) = super::parse(input);
503        assert_eq!("0.123", output.to_string());
504
505        let input = "0000000000e-12";
506        let (output, _) = super::parse(input);
507        assert_eq!("0e-12", output.to_string());
508
509        let input = "0";
510        let (output, _) = super::parse(input);
511        assert_eq!("0", output.to_string());
512    }
513
514    #[test]
515    fn leaves_escape_sequences() {
516        let input = "\\\"\\\\\\/\\b\\n\\n\\r\\t\u{0000}";
517        let (output, _) = super::parse(input);
518        assert_eq!("\"\\\"\\\\\\/\\b\\n\\n\\r\\t\\u0000\"", output.to_string());
519
520        let input = "\"\\\\\"";
521        let (output, _) = super::parse(input);
522        let (_, errs) = super::parse(&output.to_string_pretty());
523        if !errs.is_empty() {
524            panic!("found error");
525        }
526    }
527
528    #[test]
529    fn to_string_on_escaped_strings() {
530        let input = "{ \"test\\\\string\": 1 }";
531        let (output, _) = super::parse(input);
532        println!("{}", output.to_string_pretty());
533    }
534
535    #[test]
536    #[allow(unused)]
537    fn fuzzed() {
538        let fuzzer = json_fuzzer::fuzz();
539
540        for str in fuzzer {
541            if std::panic::catch_unwind(|| {
542                super::format(&str, None);
543            })
544            .is_err()
545            {
546                println!("FAILURE IN FORMAT. String: {}", &str);
547            }
548
549            if std::panic::catch_unwind(|| {
550                super::parse(&str);
551            })
552            .is_err()
553            {
554                println!("FAILURE IN PARSE. String: {}", &str);
555            }
556        }
557    }
558
559    #[test]
560    fn char_indices_test() {
561        println!("{}", "\u{009d}".len());
562        let str = "\u{009d} \u{009d}";
563        println!("{}", str.len());
564        println!("{}", str.char_indices().count());
565        for (i, ch) in str.char_indices() {
566            println!("{:?}, {:?}", i, ch);
567        }
568
569        super::parse("\"\u{009d} \u{faed4}\"");
570    }
571}