Skip to main content

nu_json/
util.rs

1use std::io;
2use std::str;
3
4use super::error::{Error, ErrorCode, Result};
5
6pub struct StringReader<Iter: Iterator<Item = u8>> {
7    iter: Iter,
8    line: usize,
9    col: usize,
10    ch: Vec<u8>,
11}
12
13impl<Iter> StringReader<Iter>
14where
15    Iter: Iterator<Item = u8>,
16{
17    #[inline]
18    pub fn new(iter: Iter) -> Self {
19        StringReader {
20            iter,
21            line: 1,
22            col: 0,
23            ch: Vec::new(),
24        }
25    }
26
27    fn next(&mut self) -> Option<io::Result<u8>> {
28        match self.iter.next() {
29            None => None,
30            Some(b'\n') => {
31                self.line += 1;
32                self.col = 0;
33                Some(Ok(b'\n'))
34            }
35            Some(c) => {
36                self.col += 1;
37                Some(Ok(c))
38            }
39        }
40    }
41
42    pub fn pos(&mut self) -> (usize, usize) {
43        (self.line, self.col)
44    }
45
46    pub fn eof(&mut self) -> Result<bool> {
47        let ch = self.peek()?;
48        Ok(matches!(ch, None | Some(b'\x00')))
49    }
50
51    pub fn peek_next(&mut self, idx: usize) -> Result<Option<u8>> {
52        while self.ch.len() <= idx {
53            match self.next() {
54                Some(Err(err)) => return Err(Error::Io(err)),
55                Some(Ok(ch)) => self.ch.push(ch),
56                None => return Ok(None),
57            }
58        }
59        Ok(Some(self.ch[idx]))
60    }
61
62    pub fn peek(&mut self) -> Result<Option<u8>> {
63        self.peek_next(0)
64    }
65
66    pub fn peek_or_null(&mut self) -> Result<u8> {
67        Ok(self.peek()?.unwrap_or(b'\x00'))
68    }
69
70    pub fn eat_char(&mut self) -> u8 {
71        self.ch.remove(0)
72    }
73
74    pub fn uneat_char(&mut self, ch: u8) {
75        self.ch.insert(0, ch);
76    }
77
78    pub fn next_char(&mut self) -> Result<Option<u8>> {
79        match self.ch.first() {
80            Some(&ch) => {
81                self.eat_char();
82                Ok(Some(ch))
83            }
84            None => match self.next() {
85                Some(Err(err)) => Err(Error::Io(err)),
86                Some(Ok(ch)) => Ok(Some(ch)),
87                None => Ok(None),
88            },
89        }
90    }
91
92    pub fn next_char_or_null(&mut self) -> Result<u8> {
93        Ok(self.next_char()?.unwrap_or(b'\x00'))
94    }
95
96    fn eat_line(&mut self) -> Result<()> {
97        loop {
98            match self.peek()? {
99                Some(b'\n') | None => return Ok(()),
100                _ => {}
101            }
102            self.eat_char();
103        }
104    }
105
106    pub fn parse_whitespace(&mut self) -> Result<()> {
107        loop {
108            match self.peek_or_null()? {
109                b' ' | b'\n' | b'\t' | b'\r' => {
110                    self.eat_char();
111                }
112                b'#' => self.eat_line()?,
113                b'/' => {
114                    match self.peek_next(1)? {
115                        Some(b'/') => self.eat_line()?,
116                        Some(b'*') => {
117                            self.eat_char();
118                            self.eat_char();
119                            while !(self.peek()?.unwrap_or(b'*') == b'*'
120                                && self.peek_next(1)?.unwrap_or(b'/') == b'/')
121                            {
122                                self.eat_char();
123                            }
124                            self.eat_char();
125                            self.eat_char();
126                        }
127                        Some(_) => {
128                            self.eat_char();
129                        }
130                        None => return Err(self.error(ErrorCode::TrailingCharacters)), //todo
131                    }
132                }
133                _ => {
134                    return Ok(());
135                }
136            }
137        }
138    }
139
140    pub fn error(&mut self, reason: ErrorCode) -> Error {
141        Error::Syntax(reason, self.line, self.col)
142    }
143}
144
145pub enum Number {
146    I64(i64),
147    U64(u64),
148    F64(f64),
149}
150
151pub struct ParseNumber<Iter: Iterator<Item = u8>> {
152    rdr: StringReader<Iter>,
153    result: Vec<u8>,
154}
155
156impl<Iter: Iterator<Item = u8>> ParseNumber<Iter> {
157    #[inline]
158    pub fn new(iter: Iter) -> Self {
159        ParseNumber {
160            rdr: StringReader::new(iter),
161            result: Vec::new(),
162        }
163    }
164
165    pub fn parse(&mut self, stop_at_next: bool) -> Result<Number> {
166        match self.try_parse() {
167            Ok(()) => {
168                self.rdr.parse_whitespace()?;
169
170                let mut ch = self.rdr.next_char_or_null()?;
171
172                if stop_at_next {
173                    let ch2 = self.rdr.peek_or_null()?;
174                    // end scan if we find a punctuator character like ,}] or a comment
175                    if ch == b','
176                        || ch == b'}'
177                        || ch == b']'
178                        || ch == b'#'
179                        || ch == b'/' && (ch2 == b'/' || ch2 == b'*')
180                    {
181                        ch = b'\x00';
182                    }
183                }
184
185                match ch {
186                    b'\x00' => {
187                        let res =
188                            str::from_utf8(&self.result).expect("Internal error: json parsing");
189
190                        let mut is_float = false;
191                        for ch in res.chars() {
192                            if ch == '.' || ch == 'e' || ch == 'E' {
193                                is_float = true;
194                                break;
195                            }
196                        }
197
198                        if !is_float {
199                            if res.starts_with('-') {
200                                if let Ok(n) = res.parse::<i64>() {
201                                    return Ok(Number::I64(n));
202                                }
203                            } else if let Ok(n) = res.parse::<u64>() {
204                                return Ok(Number::U64(n));
205                            }
206                        }
207
208                        match res.parse::<f64>() {
209                            Ok(n) => Ok(Number::F64(n)),
210                            _ => Err(Error::Syntax(ErrorCode::InvalidNumber, 0, 0)),
211                        }
212                    }
213                    _ => Err(Error::Syntax(ErrorCode::InvalidNumber, 0, 0)),
214                }
215            }
216            Err(e) => Err(e),
217        }
218    }
219
220    fn try_parse(&mut self) -> Result<()> {
221        if self.rdr.peek_or_null()? == b'-' {
222            self.result.push(self.rdr.eat_char());
223        }
224
225        let mut has_value = false;
226
227        if self.rdr.peek_or_null()? == b'0' {
228            self.result.push(self.rdr.eat_char());
229            has_value = true;
230
231            // There can be only one leading '0'.
232            if let b'0'..=b'9' = self.rdr.peek_or_null()? {
233                return Err(Error::Syntax(ErrorCode::InvalidNumber, 0, 0));
234            }
235        }
236
237        loop {
238            match self.rdr.peek_or_null()? {
239                b'0'..=b'9' => {
240                    self.result.push(self.rdr.eat_char());
241                    has_value = true;
242                }
243                b'.' => {
244                    if !has_value {
245                        return Err(Error::Syntax(ErrorCode::InvalidNumber, 0, 0));
246                    }
247                    self.rdr.eat_char();
248                    return self.try_decimal();
249                }
250                b'e' | b'E' => {
251                    if !has_value {
252                        return Err(Error::Syntax(ErrorCode::InvalidNumber, 0, 0));
253                    }
254                    self.rdr.eat_char();
255                    return self.try_exponent();
256                }
257                _ => {
258                    if !has_value {
259                        return Err(Error::Syntax(ErrorCode::InvalidNumber, 0, 0));
260                    }
261                    return Ok(());
262                }
263            }
264        }
265    }
266
267    fn try_decimal(&mut self) -> Result<()> {
268        self.result.push(b'.');
269
270        // Make sure a digit follows the decimal place.
271        match self.rdr.next_char_or_null()? {
272            c @ b'0'..=b'9' => {
273                self.result.push(c);
274            }
275            _ => {
276                return Err(Error::Syntax(ErrorCode::InvalidNumber, 0, 0));
277            }
278        };
279
280        while let b'0'..=b'9' = self.rdr.peek_or_null()? {
281            self.result.push(self.rdr.eat_char());
282        }
283
284        match self.rdr.peek_or_null()? {
285            b'e' | b'E' => {
286                self.rdr.eat_char();
287                self.try_exponent()
288            }
289            _ => Ok(()),
290        }
291    }
292
293    fn try_exponent(&mut self) -> Result<()> {
294        self.result.push(b'e');
295
296        match self.rdr.peek_or_null()? {
297            b'+' => {
298                self.result.push(self.rdr.eat_char());
299            }
300            b'-' => {
301                self.result.push(self.rdr.eat_char());
302            }
303            _ => {}
304        };
305
306        // Make sure a digit follows the exponent place.
307        match self.rdr.next_char_or_null()? {
308            c @ b'0'..=b'9' => {
309                self.result.push(c);
310            }
311            _ => {
312                return Err(Error::Syntax(ErrorCode::InvalidNumber, 0, 0));
313            }
314        };
315
316        while let b'0'..=b'9' = self.rdr.peek_or_null()? {
317            self.result.push(self.rdr.eat_char());
318        }
319
320        Ok(())
321    }
322}