sonic_rs/
error.rs

1//! Errors.
2
3// The code is cloned from [serde_json](https://github.com/serde-rs/json) and modified necessary parts.
4
5use core::{
6    fmt::{self, Debug, Display},
7    result,
8};
9use std::{borrow::Cow, error, fmt::Result as FmtResult, str::FromStr};
10
11use serde::{
12    de::{self, Unexpected},
13    ser,
14};
15use sonic_number::Error as NumberError;
16use thiserror::Error as ErrorTrait;
17
18use crate::reader::Position;
19
20/// This type represents all possible errors that can occur when serializing or
21/// deserializing JSON data.
22pub struct Error {
23    /// This `Box` allows us to keep the size of `Error` as small as possible. A
24    /// larger `Error` type was substantially slower due to all the functions
25    /// that pass around `Result<T, Error>`.
26    err: Box<ErrorImpl>,
27}
28
29/// Alias for a `Result` with the error type `sonic_rs::Error`.
30pub type Result<T> = result::Result<T, Error>;
31
32impl Error {
33    /// One-based line number at which the error was detected.
34    ///
35    /// Characters in the first line of the input (before the first newline
36    /// character) are in line 1.
37    pub fn line(&self) -> usize {
38        self.err.line
39    }
40
41    /// One-based column number at which the error was detected.
42    ///
43    /// The first character in the input and any characters immediately
44    /// following a newline character are in column 1.
45    ///
46    /// Note that errors may occur in column 0, for example if a read from an
47    /// I/O stream fails immediately following a previously read newline
48    /// character.
49    pub fn column(&self) -> usize {
50        self.err.column
51    }
52
53    /// The kind reported by the underlying standard library I/O error, if this
54    /// error was caused by a failure to read or write bytes on an I/O stream.
55    pub fn io_error_kind(&self) -> Option<std::io::ErrorKind> {
56        if let ErrorCode::Io(io_error) = &self.err.code {
57            Some(io_error.kind())
58        } else {
59            None
60        }
61    }
62
63    /// Categorizes the cause of this error.
64    ///
65    /// - `Category::Io` - failure to read or write bytes on an I/O stream
66    /// - `Category::Syntax` - input that is not syntactically valid JSON
67    /// - `Category::Data` - input data that is semantically incorrect
68    /// - `Category::Eof` - unexpected end of the input data
69    pub fn classify(&self) -> Category {
70        match self.err.code {
71            ErrorCode::Message(_) | ErrorCode::UnexpectedVisitType => Category::TypeUnmatched,
72            ErrorCode::GetInEmptyObject
73            | ErrorCode::GetInEmptyArray
74            | ErrorCode::GetIndexOutOfArray
75            | ErrorCode::GetUnknownKeyInObject => Category::NotFound,
76            ErrorCode::Io(_) => Category::Io,
77            ErrorCode::EofWhileParsing => Category::Eof,
78            ErrorCode::ExpectedColon
79            | ErrorCode::ExpectedObjectCommaOrEnd
80            | ErrorCode::InvalidEscape
81            | ErrorCode::InvalidJsonValue
82            | ErrorCode::InvalidLiteral
83            | ErrorCode::InvalidUTF8
84            | ErrorCode::InvalidNumber
85            | ErrorCode::NumberOutOfRange
86            | ErrorCode::InvalidUnicodeCodePoint
87            | ErrorCode::ControlCharacterWhileParsingString
88            | ErrorCode::TrailingComma
89            | ErrorCode::TrailingCharacters
90            | ErrorCode::ExpectObjectKeyOrEnd
91            | ErrorCode::ExpectedArrayCommaOrEnd
92            | ErrorCode::ExpectedArrayStart
93            | ErrorCode::ExpectedObjectStart
94            | ErrorCode::InvalidSurrogateUnicodeCodePoint
95            | ErrorCode::SerExpectKeyIsStrOrNum(_)
96            | ErrorCode::FloatMustBeFinite
97            | ErrorCode::ExpectedQuote
98            | ErrorCode::ExpectedNumericKey
99            | ErrorCode::RecursionLimitExceeded => Category::Syntax,
100        }
101    }
102
103    /// Returns true if this error was caused by a failure to read or write
104    /// bytes on an I/O stream.
105    pub fn is_io(&self) -> bool {
106        self.classify() == Category::Io
107    }
108
109    /// Returns true if this error was caused by input that was not
110    /// syntactically valid JSON.
111    pub fn is_syntax(&self) -> bool {
112        self.classify() == Category::Syntax
113    }
114
115    /// Returns true when the input data is unmatched for expected type.
116    ///
117    /// For example, JSON containing a number  when the type being deserialized into holds a String.
118    pub fn is_unmatched_type(&self) -> bool {
119        self.classify() == Category::TypeUnmatched
120    }
121
122    /// Return true when the target field was not found from JSON.
123    ///
124    /// For example:
125    /// When using `get*` APIs, it gets a unknown keys from JSON text, or get
126    /// a index out of the array.
127    pub fn is_not_found(&self) -> bool {
128        self.classify() == Category::NotFound
129    }
130
131    /// Returns true if this error was caused by prematurely reaching the end of
132    /// the input data.
133    ///
134    /// Callers that process streaming input may be interested in retrying the
135    /// deserialization once more data is available.
136    pub fn is_eof(&self) -> bool {
137        self.classify() == Category::Eof
138    }
139
140    /// Returens the offset of the error position from the starting of JSON text.
141    pub fn offset(&self) -> usize {
142        self.err.index
143    }
144}
145
146#[allow(clippy::fallible_impl_from)]
147impl From<Error> for std::io::Error {
148    /// Convert a `sonic_rs::Error` into an `std::io::Error`.
149    ///
150    /// JSON syntax and data errors are turned into `InvalidData` I/O errors.
151    /// EOF errors are turned into `UnexpectedEof` I/O errors.
152    fn from(j: Error) -> Self {
153        match j.err.code {
154            ErrorCode::Io(err) => err,
155            ErrorCode::EofWhileParsing => std::io::Error::new(std::io::ErrorKind::UnexpectedEof, j),
156            _ => std::io::Error::new(std::io::ErrorKind::InvalidData, j),
157        }
158    }
159}
160
161/// Categorizes the cause of a `sonic_rs::Error`.
162#[derive(Copy, Clone, PartialEq, Eq, Debug)]
163#[non_exhaustive]
164pub enum Category {
165    /// The error was caused by a failure to read or write bytes on an I/O
166    /// stream.
167    Io,
168
169    /// The error was caused by input that was not syntactically valid JSON.
170    Syntax,
171
172    /// The error was caused when the input data is unmatched for expected type.
173    ///
174    /// For example, JSON containing a number  when the type being deserialized into holds a
175    /// String.
176    TypeUnmatched,
177
178    /// The error was caused when the target field was not found from JSON.
179    ///
180    /// For example:
181    /// When using `get*` APIs, it gets a unknown keys from JSON text, or get
182    /// a index out of the array.
183    NotFound,
184
185    /// The error was caused by prematurely reaching the end of the input data.
186    ///
187    /// Callers that process streaming input may be interested in retrying the
188    /// deserialization once more data is available.
189    Eof,
190}
191
192struct ErrorImpl {
193    code: ErrorCode,
194    index: usize,
195    line: usize,
196    column: usize,
197    // the descript of the error position
198    descript: Option<String>,
199}
200
201#[derive(ErrorTrait, Debug)]
202pub(crate) enum ErrorCode {
203    #[error("{0}")]
204    Message(Cow<'static, str>),
205
206    #[error("io error while serializing or deserializing")]
207    Io(std::io::Error),
208
209    #[error("EOF while parsing")]
210    EofWhileParsing,
211
212    #[error("Expected this character to be a ':' while parsing")]
213    ExpectedColon,
214
215    #[error("Expected this character to be either a ',' or a ']' while parsing")]
216    ExpectedArrayCommaOrEnd,
217
218    #[error("Expected this character to be either a ',' or a '}}' while parsing")]
219    ExpectedObjectCommaOrEnd,
220
221    #[error("Invalid literal (`true`, `false`, or a `null`) while parsing")]
222    InvalidLiteral,
223
224    #[error("Invalid JSON value")]
225    InvalidJsonValue,
226
227    #[error("Expected this character to be '{{'")]
228    ExpectedObjectStart,
229
230    #[error("Expected this character to be '['")]
231    ExpectedArrayStart,
232
233    #[error("Invalid escape chars")]
234    InvalidEscape,
235
236    #[error("Invalid number")]
237    InvalidNumber,
238
239    #[error("Number is bigger than the maximum value of its type")]
240    NumberOutOfRange,
241
242    #[error("Invalid unicode code point")]
243    InvalidUnicodeCodePoint,
244
245    #[error("Invalid UTF-8 characters in json")]
246    InvalidUTF8,
247
248    #[error("Control character found while parsing a string")]
249    ControlCharacterWhileParsingString,
250
251    #[error("Expected this character to be '\"' or '}}'")]
252    ExpectObjectKeyOrEnd,
253
254    #[error("JSON has a comma after the last value in an array or object")]
255    TrailingComma,
256
257    #[error("JSON has non-whitespace trailing characters after the value")]
258    TrailingCharacters,
259
260    #[error("Encountered nesting of JSON maps and arrays more than 128 layers deep")]
261    RecursionLimitExceeded,
262
263    #[error("Get value from an empty object")]
264    GetInEmptyObject,
265
266    #[error("Get unknown key from the object")]
267    GetUnknownKeyInObject,
268
269    #[error("Get value from an empty array")]
270    GetInEmptyArray,
271
272    #[error("Get index out of the array")]
273    GetIndexOutOfArray,
274
275    #[error("Unexpected visited type in JSON visitor")]
276    UnexpectedVisitType,
277
278    #[error("Invalid surrogate Unicode code point")]
279    InvalidSurrogateUnicodeCodePoint,
280
281    #[error("Float number must be finite, not be Infinity or NaN")]
282    FloatMustBeFinite,
283
284    #[error("Expect a numeric key in Value")]
285    ExpectedNumericKey,
286
287    #[error("Expect a quote")]
288    ExpectedQuote,
289
290    #[error("Expected the key to be string/bool/number when serializing map, now is {0}")]
291    SerExpectKeyIsStrOrNum(Unexpected<'static>),
292}
293
294impl From<NumberError> for ErrorCode {
295    fn from(err: NumberError) -> Self {
296        match err {
297            NumberError::InvalidNumber => ErrorCode::InvalidNumber,
298            NumberError::FloatMustBeFinite => ErrorCode::FloatMustBeFinite,
299        }
300    }
301}
302
303impl Error {
304    #[cold]
305    pub(crate) fn syntax(code: ErrorCode, json: &[u8], index: usize) -> Self {
306        let position = Position::from_index(index, json);
307        // generate descript about 16 characters
308        let mut start = if index < 8 { 0 } else { index - 8 };
309        let mut end = if index + 8 > json.len() {
310            json.len()
311        } else {
312            index + 8
313        };
314
315        // find the nearest valid utf-8 character
316        while start > 0 && index - start <= 16 && (json[start] & 0b1100_0000) == 0b1000_0000 {
317            start -= 1;
318        }
319
320        // find the nearest valid utf-8 character
321        while end < json.len() && end - index <= 16 && (json[end - 1] & 0b1100_0000) == 0b1000_0000
322        {
323            end += 1;
324        }
325
326        let fragment = String::from_utf8_lossy(&json[start..end]).to_string();
327        let left = index - start;
328        let right = if end - index > 1 {
329            end - (index + 1)
330        } else {
331            0
332        };
333        let mask = ".".repeat(left) + "^" + &".".repeat(right);
334        let descript = format!("\n\n\t{}\n\t{}\n", fragment, mask);
335
336        Error {
337            err: Box::new(ErrorImpl {
338                code,
339                line: position.line,
340                column: position.column,
341                index,
342                descript: Some(descript),
343            }),
344        }
345    }
346
347    #[cold]
348    pub(crate) fn ser_error(code: ErrorCode) -> Self {
349        Error {
350            err: Box::new(ErrorImpl {
351                code,
352                line: 0,
353                column: 0,
354                index: 0,
355                descript: None,
356            }),
357        }
358    }
359
360    #[cold]
361    pub(crate) fn io(error: std::io::Error) -> Self {
362        Error {
363            err: Box::new(ErrorImpl {
364                code: ErrorCode::Io(error),
365                line: 0,
366                index: 0,
367                column: 0,
368                descript: None,
369            }),
370        }
371    }
372
373    #[cold]
374    pub(crate) fn error_code(self) -> ErrorCode {
375        self.err.code
376    }
377}
378
379impl serde::de::StdError for Error {
380    fn source(&self) -> Option<&(dyn error::Error + 'static)> {
381        match &self.err.code {
382            ErrorCode::Io(err) => err.source(),
383            _ => None,
384        }
385    }
386}
387
388impl Display for Error {
389    fn fmt(&self, f: &mut fmt::Formatter) -> FmtResult {
390        Display::fmt(&*self.err, f)
391    }
392}
393
394impl Display for ErrorImpl {
395    fn fmt(&self, f: &mut fmt::Formatter) -> FmtResult {
396        if self.line != 0 {
397            write!(
398                f,
399                "{} at line {} column {}{}",
400                self.code,
401                self.line,
402                self.column,
403                self.descript.as_ref().unwrap_or(&"".to_string())
404            )
405        } else {
406            write!(f, "{}", self.code)
407        }
408    }
409}
410
411// Remove two layers of verbosity from the debug representation. Humans often
412// end up seeing this representation because it is what unwrap() shows.
413impl Debug for Error {
414    fn fmt(&self, f: &mut fmt::Formatter) -> FmtResult {
415        Display::fmt(&self, f)
416    }
417}
418
419impl de::Error for Error {
420    #[cold]
421    fn custom<T: Display>(msg: T) -> Error {
422        make_error(msg.to_string())
423    }
424
425    #[cold]
426    fn invalid_type(unexp: de::Unexpected, exp: &dyn de::Expected) -> Self {
427        if let de::Unexpected::Unit = unexp {
428            Error::custom(format_args!("invalid type: null, expected {}", exp))
429        } else {
430            Error::custom(format_args!("invalid type: {}, expected {}", unexp, exp))
431        }
432    }
433}
434
435impl ser::Error for Error {
436    #[cold]
437    fn custom<T: Display>(msg: T) -> Error {
438        make_error(msg.to_string())
439    }
440}
441
442// TODO: remove me in 0.4 version.
443#[cold]
444pub(crate) fn make_error(mut msg: String) -> Error {
445    let (line, column) = parse_line_col(&mut msg).unwrap_or((0, 0));
446    Error {
447        err: Box::new(ErrorImpl {
448            code: ErrorCode::Message(msg.into()),
449            line,
450            index: 0,
451            column,
452            descript: None,
453        }),
454    }
455}
456
457fn parse_line_col(msg: &mut String) -> Option<(usize, usize)> {
458    let start_of_suffix = msg.rfind(" at line ")?;
459
460    // Find start and end of line number.
461    let start_of_line = start_of_suffix + " at line ".len();
462    let mut end_of_line = start_of_line;
463    while starts_with_digit(&msg[end_of_line..]) {
464        end_of_line += 1;
465    }
466
467    if !msg[end_of_line..].starts_with(" column ") {
468        return None;
469    }
470
471    // Find start and end of column number.
472    let start_of_column = end_of_line + " column ".len();
473    let mut end_of_column = start_of_column;
474    while starts_with_digit(&msg[end_of_column..]) {
475        end_of_column += 1;
476    }
477
478    if end_of_column < msg.len() {
479        return None;
480    }
481
482    // Parse numbers.
483    let line = match usize::from_str(&msg[start_of_line..end_of_line]) {
484        Ok(line) => line,
485        Err(_) => return None,
486    };
487    let column = match usize::from_str(&msg[start_of_column..end_of_column]) {
488        Ok(column) => column,
489        Err(_) => return None,
490    };
491
492    msg.truncate(start_of_suffix);
493    Some((line, column))
494}
495
496fn starts_with_digit(slice: &str) -> bool {
497    match slice.as_bytes().first() {
498        None => false,
499        Some(&byte) => byte.is_ascii_digit(),
500    }
501}
502
503pub(crate) fn invalid_utf8(json: &[u8], index: usize) -> Error {
504    Error::syntax(ErrorCode::InvalidUTF8, json, index)
505}
506
507#[cfg(test)]
508mod test {
509    use crate::{from_slice, from_str, Deserialize};
510
511    #[test]
512    fn test_serde_errors_display() {
513        #[allow(unused)]
514        #[derive(Debug, Deserialize)]
515        struct Foo {
516            a: Vec<i32>,
517            c: String,
518        }
519
520        let err = from_str::<Foo>("{ \"b\":[]}").unwrap_err();
521        assert_eq!(
522            format!("{}", err),
523            "missing field `a` at line 1 column 8\n\n\t{ \"b\":[]}\n\t........^\n"
524        );
525
526        let err = from_str::<Foo>("{\"a\": [1, 2x, 3, 4, 5]}").unwrap_err();
527        println!("{}", err);
528        assert_eq!(
529            format!("{}", err),
530            "Expected this character to be either a ',' or a ']' while parsing at line 1 column \
531             11\n\n\t\": [1, 2x, 3, 4,\n\t........^.......\n"
532        );
533
534        let err = from_str::<Foo>("{\"a\": null}").unwrap_err();
535        assert_eq!(
536            format!("{}", err),
537            "invalid type: null, expected a sequence at line 1 column 9\n\n\t\"a\": \
538             null}\n\t........^.\n"
539        );
540
541        let err = from_str::<Foo>("{\"a\": [1,2,3  }").unwrap_err();
542        assert_eq!(
543            format!("{}", err),
544            "Expected this character to be either a ',' or a ']' while parsing at line 1 column \
545             14\n\n\t[1,2,3  }\n\t........^\n"
546        );
547
548        let err = from_str::<Foo>("{\"a\": [\"123\"]}").unwrap_err();
549        assert_eq!(
550            format!("{}", err),
551            "invalid type: string \"123\", expected i32 at line 1 column 11\n\n\t\": \
552             [\"123\"]}\n\t........^..\n"
553        );
554
555        let err = from_str::<Foo>("{\"a\": [").unwrap_err();
556        assert_eq!(
557            format!("{}", err),
558            "EOF while parsing at line 1 column 6\n\n\t{\"a\": [\n\t......^\n"
559        );
560
561        let err = from_str::<Foo>("{\"a\": [000]}").unwrap_err();
562        assert_eq!(
563            format!("{}", err),
564            "Expected this character to be either a ',' or a ']' while parsing at line 1 column \
565             8\n\n\t{\"a\": [000]}\n\t........^...\n"
566        );
567
568        let err = from_str::<Foo>("{\"a\": [-]}").unwrap_err();
569        assert_eq!(
570            format!("{}", err),
571            "Invalid number at line 1 column 7\n\n\t{\"a\": [-]}\n\t.......^..\n"
572        );
573
574        let err = from_str::<Foo>("{\"a\": [-1.23e]}").unwrap_err();
575        assert_eq!(
576            format!("{}", err),
577            "Invalid number at line 1 column 12\n\n\t: [-1.23e]}\n\t........^..\n"
578        );
579
580        let err = from_str::<Foo>("{\"c\": \"哈哈哈哈哈哈}").unwrap_err();
581        assert_eq!(
582            format!("{}", err),
583            "EOF while parsing at line 1 column 25\n\n\t哈哈哈}\n\t.........^\n"
584        );
585
586        let err = from_slice::<Foo>(b"{\"b\":\"\x80\"}").unwrap_err();
587        assert_eq!(
588            format!("{}", err),
589            "Invalid UTF-8 characters in json at line 1 column 6\n\n\t{\"b\":\"�\"}\n\t......^..\n"
590        );
591    }
592
593    #[test]
594    fn test_other_errors() {
595        let err = crate::Value::try_from(f64::NAN).unwrap_err();
596        assert_eq!(
597            format!("{}", err),
598            "NaN or Infinity is not a valid JSON value"
599        );
600    }
601}