json_fmt/
lib.rs

1use std::io::{self, BufRead, BufReader, Read, Result, Write};
2
3#[derive(Clone, Copy, PartialEq, Eq, Debug)]
4enum State {
5    Value,
6    Object,
7    Pair,
8    Array,
9    Elem,
10}
11
12fn error_msg(msg: String) -> io::Error {
13    io::Error::new(io::ErrorKind::Other, msg)
14}
15
16const BAD_CHAR: u8 = b'\x00';
17
18#[inline(always)]
19fn is_whitespace(c: u8) -> bool {
20    matches!(c, b'\t' | b' ' | b'\n' | b'\r')
21}
22
23fn peek_char(r: &mut BufReader<&mut dyn Read>) -> u8 {
24    if let Ok(b) = r.fill_buf() {
25        return *b.get(0).unwrap_or(&BAD_CHAR);
26    }
27    BAD_CHAR
28}
29
30fn skip_whitespace(br: &mut BufReader<&mut dyn Read>) -> Result<()> {
31    loop {
32        let buf = br.fill_buf()?;
33        let mut i = 0usize;
34        while i < buf.len() {
35            if !is_whitespace(buf[i]) {
36                br.consume(i);
37                return Ok(());
38            }
39            i += 1;
40        }
41        br.consume(i);
42    }
43}
44
45fn write_string(w: &mut dyn Write, br: &mut BufReader<&mut dyn Read>) -> Result<()> {
46    let mut buf = br.fill_buf()?;
47    assert!(buf[0] == b'"');
48    let mut i = 1usize;
49    let mut prev = BAD_CHAR;
50    loop {
51        if i >= buf.len() {
52            w.write_all(buf)?;
53            br.consume(i);
54            buf = br.fill_buf()?;
55            if buf.is_empty() {
56                return Err(io::ErrorKind::UnexpectedEof.into());
57            }
58            i = 0;
59        }
60        let c = buf[i];
61        i += 1;
62        if c == b'"' && prev != b'\\' {
63            break;
64        } else if c == b'\\' && prev == b'\\' {
65            prev = BAD_CHAR;
66        } else {
67            prev = c;
68        }
69    }
70    w.write_all(&buf[..i])?;
71    br.consume(i);
72    Ok(())
73}
74
75fn write_number(w: &mut dyn Write, br: &mut BufReader<&mut dyn Read>) -> Result<()> {
76    let mut buf = br.fill_buf()?;
77    let mut i = 1usize;
78    loop {
79        if i >= buf.len() {
80            w.write_all(buf)?;
81            br.consume(i);
82            buf = br.fill_buf()?;
83            if buf.is_empty() {
84                break;
85            }
86            i = 0;
87        }
88        let c = buf[i];
89        if !c.is_ascii_digit() && c != b'-' && c != b'.' && c != b'+' && c != b'e' && c != b'E' {
90            w.write_all(&buf[..i])?;
91            br.consume(i);
92            break;
93        }
94        i += 1;
95    }
96    Ok(())
97}
98
99fn write_expected(
100    w: &mut dyn Write,
101    br: &mut BufReader<&mut dyn Read>,
102    expect: &[u8],
103) -> Result<()> {
104    const MAX_ID_LEN: usize = 5;
105    assert!(expect.len() - 1 <= MAX_ID_LEN);
106    let mut buf = [0u8; MAX_ID_LEN];
107    br.read_exact(&mut buf[..expect.len()])?;
108    if expect != &buf[..expect.len()] {
109        for (i, &c) in expect.iter().enumerate() {
110            if c != buf[i] {
111                return Err(error_msg(format!("invalid input: {}", buf[i] as char)));
112            }
113        }
114    }
115    w.write_all(expect)
116}
117
118pub struct Indent {
119    one_tab: String,
120    prefix: String,
121}
122
123impl Indent {
124    pub fn new(width: usize) -> Self {
125        Self {
126            prefix: String::new(),
127            one_tab: " ".repeat(width),
128        }
129    }
130
131    fn push(&mut self) {
132        self.prefix.push_str(&self.one_tab);
133    }
134
135    fn pop(&mut self) {
136        self.prefix.truncate(self.prefix.len() - self.one_tab.len());
137    }
138
139    fn write_to(&self, w: &mut dyn Write) -> Result<()> {
140        w.write_all(self.prefix.as_bytes())
141    }
142}
143
144pub fn format_json(
145    w: &mut dyn Write,
146    br: &mut BufReader<&mut dyn Read>,
147    indent: &mut Indent,
148) -> Result<()> {
149    let mut stack = vec![State::Value];
150    while let Some(state) = stack.pop() {
151        skip_whitespace(br)?;
152        match state {
153            State::Value => match peek_char(br) {
154                c if c == b'{' || c == b'[' => {
155                    br.consume(1);
156                    w.write(&[c, b'\n'])?;
157                    indent.push();
158                    stack.push(if c == b'{' {
159                        State::Object
160                    } else {
161                        State::Array
162                    });
163                }
164                b'"' => write_string(w, br)?,
165                c if c.is_ascii_digit() || c == b'-' => write_number(w, br)?,
166                b'n' => write_expected(w, br, b"null")?,
167                b't' => write_expected(w, br, b"true")?,
168                b'f' => write_expected(w, br, b"false")?,
169                c => return Err(error_msg(format!("unexpected input: '{}'", c as char))),
170            },
171            State::Pair | State::Elem => {
172                let c = peek_char(br);
173                if c == b',' {
174                    br.consume(1);
175                    w.write(b",")?;
176                } else if state == State::Pair && c != b'}' || state == State::Elem && c != b']' {
177                    return Err(error_msg(format!("unexpected input: '{}'", c as char)));
178                }
179                w.write(b"\n")?;
180                stack.push(if state == State::Pair {
181                    State::Object
182                } else {
183                    State::Array
184                });
185            }
186            State::Object => match peek_char(br) {
187                b'"' => {
188                    indent.write_to(w)?;
189                    write_string(w, br)?;
190                    skip_whitespace(br)?;
191                    write_expected(w, br, b":")?;
192                    w.write(b" ")?;
193                    stack.push(State::Pair);
194                    stack.push(State::Value);
195                }
196                b'}' => {
197                    br.consume(1);
198                    indent.pop();
199                    indent.write_to(w)?;
200                    w.write(b"}")?;
201                }
202                c => return Err(error_msg(format!("unexpected input: '{}'", c as char))),
203            },
204            State::Array => match peek_char(br) {
205                b']' => {
206                    br.consume(1);
207                    indent.pop();
208                    indent.write_to(w)?;
209                    w.write(b"]")?;
210                }
211                _ => {
212                    indent.write_to(w)?;
213                    stack.push(State::Elem);
214                    stack.push(State::Value);
215                }
216            },
217        }
218    }
219    Ok(())
220}
221
222pub fn format_json_fast(
223    w: &mut dyn Write,
224    br: &mut BufReader<&mut dyn Read>,
225    indent: &mut Indent,
226) -> Result<()> {
227    let mut new_line = false;
228    let mut buf = br.fill_buf()?;
229    let mut i = 0usize;
230    loop {
231        if i >= buf.len() {
232            br.consume(i);
233            buf = br.fill_buf()?;
234            if buf.is_empty() {
235                break;
236            }
237            i = 0;
238        }
239
240        let c = buf[i];
241        if c == b'}' || c == b']' {
242            indent.pop();
243            if !new_line {
244                w.write(b"\n")?;
245                indent.write_to(w)?;
246            }
247        }
248        if new_line {
249            indent.write_to(w)?;
250            new_line = false;
251        }
252        match c {
253            c if c == b'"' || c.is_ascii_digit() || c == b'-' => {
254                br.consume(i);
255
256                if c == b'"' {
257                    write_string(w, br)?;
258                } else {
259                    write_number(w, br)?;
260                }
261
262                buf = br.fill_buf()?;
263                i = 0;
264                continue;
265            }
266            b'{' | b'[' => {
267                indent.push();
268                w.write(&[c, b'\n'])?;
269                new_line = true;
270            }
271            b',' => {
272                w.write(b",\n")?;
273                new_line = true;
274            }
275            b':' => {
276                w.write(b": ")?;
277            }
278            c if is_whitespace(c) => {
279                i += 1;
280                while i < buf.len() {
281                    if !is_whitespace(buf[i]) {
282                        break;
283                    }
284                    i += 1;
285                }
286                continue;
287            }
288            _ => {
289                w.write(&[c])?;
290            }
291        }
292        i += 1;
293    }
294    Ok(())
295}
296
297#[cfg(test)]
298mod tests {
299    use super::*;
300
301    const TEST_CASE1_IN: &'static [u8] = br#"{ "a" : 1 , "b": "test \\", "c": false, "d": null, "e": 1.234e5, "f":  [ 1, 2  ] , "g"   : {},"h": [[[[]]]]}"#;
302    const TEST_CASE1_OUT: &'static [u8] = br#"{
303    "a": 1,
304    "b": "test \\",
305    "c": false,
306    "d": null,
307    "e": 1.234e5,
308    "f": [
309        1,
310        2
311    ],
312    "g": {
313    },
314    "h": [
315        [
316            [
317                [
318                ]
319            ]
320        ]
321    ]
322}"#;
323
324    #[test]
325    fn test_format_json() {
326        let mut outbuf = Vec::<u8>::new();
327
328        let r: &mut dyn Read = &mut &TEST_CASE1_IN[..];
329        let mut br = BufReader::new(r);
330        let mut indent = Indent::new(4);
331        format_json(&mut outbuf, &mut br, &mut indent).unwrap();
332        println!(">> {}", std::str::from_utf8(&outbuf).unwrap());
333        assert_eq!(outbuf.as_slice(), TEST_CASE1_OUT);
334    }
335
336    #[test]
337    fn test_format_json_fast() {
338        let mut outbuf = Vec::<u8>::new();
339
340        let r: &mut dyn Read = &mut &TEST_CASE1_IN[..];
341        let mut br = BufReader::new(r);
342        let mut indent = Indent::new(4);
343        format_json_fast(&mut outbuf, &mut br, &mut indent).unwrap();
344        println!(">> {}", std::str::from_utf8(&outbuf).unwrap());
345        assert_eq!(outbuf.as_slice(), TEST_CASE1_OUT);
346    }
347}