sonic_rs/
error.rs

1//! Errors.
2
3// The code is cloned from [serde_json](https://github.com/serde-rs/json) and modified necessary parts.
4
5use core::{
6    fmt::{self, Debug, Display},
7    result,
8};
9use std::{borrow::Cow, error, fmt::Result as FmtResult, str::FromStr};
10
11use serde::{
12    de::{self, Unexpected},
13    ser,
14};
15use sonic_number::Error as NumberError;
16use thiserror::Error as ErrorTrait;
17
18use crate::reader::Position;
19
20/// This type represents all possible errors that can occur when serializing or
21/// deserializing JSON data.
22pub struct Error {
23    /// This `Box` allows us to keep the size of `Error` as small as possible. A
24    /// larger `Error` type was substantially slower due to all the functions
25    /// that pass around `Result<T, Error>`.
26    err: Box<ErrorImpl>,
27}
28
29/// Alias for a `Result` with the error type `sonic_rs::Error`.
30pub type Result<T> = result::Result<T, Error>;
31
32impl Error {
33    /// One-based line number at which the error was detected.
34    ///
35    /// Characters in the first line of the input (before the first newline
36    /// character) are in line 1.
37    pub fn line(&self) -> usize {
38        self.err.line
39    }
40
41    /// One-based column number at which the error was detected.
42    ///
43    /// The first character in the input and any characters immediately
44    /// following a newline character are in column 1.
45    ///
46    /// Note that errors may occur in column 0, for example if a read from an
47    /// I/O stream fails immediately following a previously read newline
48    /// character.
49    pub fn column(&self) -> usize {
50        self.err.column
51    }
52
53    /// The kind reported by the underlying standard library I/O error, if this
54    /// error was caused by a failure to read or write bytes on an I/O stream.
55    pub fn io_error_kind(&self) -> Option<std::io::ErrorKind> {
56        if let ErrorCode::Io(io_error) = &self.err.code {
57            Some(io_error.kind())
58        } else {
59            None
60        }
61    }
62
63    /// Categorizes the cause of this error.
64    ///
65    /// - `Category::Io` - failure to read or write bytes on an I/O stream
66    /// - `Category::Syntax` - input that is not syntactically valid JSON
67    /// - `Category::Data` - input data that is semantically incorrect
68    /// - `Category::Eof` - unexpected end of the input data
69    pub fn classify(&self) -> Category {
70        match self.err.code {
71            ErrorCode::Message(_) | ErrorCode::UnexpectedVisitType => Category::TypeUnmatched,
72            ErrorCode::GetInEmptyObject
73            | ErrorCode::GetInEmptyArray
74            | ErrorCode::GetIndexOutOfArray
75            | ErrorCode::GetUnknownKeyInObject => Category::NotFound,
76            ErrorCode::Io(_) => Category::Io,
77            ErrorCode::EofWhileParsing => Category::Eof,
78            ErrorCode::ExpectedColon
79            | ErrorCode::ExpectedObjectCommaOrEnd
80            | ErrorCode::InvalidEscape
81            | ErrorCode::InvalidJsonValue
82            | ErrorCode::InvalidLiteral
83            | ErrorCode::InvalidUTF8
84            | ErrorCode::InvalidNumber
85            | ErrorCode::NumberOutOfRange
86            | ErrorCode::InvalidUnicodeCodePoint
87            | ErrorCode::ControlCharacterWhileParsingString
88            | ErrorCode::TrailingComma
89            | ErrorCode::TrailingCharacters
90            | ErrorCode::ExpectObjectKeyOrEnd
91            | ErrorCode::ExpectedArrayCommaOrEnd
92            | ErrorCode::ExpectedArrayStart
93            | ErrorCode::ExpectedObjectStart
94            | ErrorCode::InvalidSurrogateUnicodeCodePoint
95            | ErrorCode::SerExpectKeyIsStrOrNum(_)
96            | ErrorCode::FloatMustBeFinite
97            | ErrorCode::ExpectedQuote
98            | ErrorCode::ExpectedNumericKey
99            | ErrorCode::RecursionLimitExceeded => Category::Syntax,
100        }
101    }
102
103    /// Returns true if this error was caused by a failure to read or write
104    /// bytes on an I/O stream.
105    pub fn is_io(&self) -> bool {
106        self.classify() == Category::Io
107    }
108
109    /// Returns true if this error was caused by input that was not
110    /// syntactically valid JSON.
111    pub fn is_syntax(&self) -> bool {
112        self.classify() == Category::Syntax
113    }
114
115    /// Returns true when the input data is unmatched for expected type.
116    ///
117    /// For example, JSON containing a number  when the type being deserialized into holds a String.
118    pub fn is_unmatched_type(&self) -> bool {
119        self.classify() == Category::TypeUnmatched
120    }
121
122    /// Return true when the target field was not found from JSON.
123    ///
124    /// For example:
125    /// When using `get*` APIs, it gets a unknown keys from JSON text, or get
126    /// a index out of the array.
127    pub fn is_not_found(&self) -> bool {
128        self.classify() == Category::NotFound
129    }
130
131    /// Returns true if this error was caused by prematurely reaching the end of
132    /// the input data.
133    ///
134    /// Callers that process streaming input may be interested in retrying the
135    /// deserialization once more data is available.
136    pub fn is_eof(&self) -> bool {
137        self.classify() == Category::Eof
138    }
139
140    /// Returens the offset of the error position from the starting of JSON text.
141    pub fn offset(&self) -> usize {
142        self.err.index
143    }
144}
145
146#[allow(clippy::fallible_impl_from)]
147impl From<Error> for std::io::Error {
148    /// Convert a `sonic_rs::Error` into an `std::io::Error`.
149    ///
150    /// JSON syntax and data errors are turned into `InvalidData` I/O errors.
151    /// EOF errors are turned into `UnexpectedEof` I/O errors.
152    fn from(j: Error) -> Self {
153        match j.err.code {
154            ErrorCode::Io(err) => err,
155            ErrorCode::EofWhileParsing => std::io::Error::new(std::io::ErrorKind::UnexpectedEof, j),
156            _ => std::io::Error::new(std::io::ErrorKind::InvalidData, j),
157        }
158    }
159}
160
161/// Categorizes the cause of a `sonic_rs::Error`.
162#[derive(Copy, Clone, PartialEq, Eq, Debug)]
163#[non_exhaustive]
164pub enum Category {
165    /// The error was caused by a failure to read or write bytes on an I/O
166    /// stream.
167    Io,
168
169    /// The error was caused by input that was not syntactically valid JSON.
170    Syntax,
171
172    /// The error was caused when the input data is unmatched for expected type.
173    ///
174    /// For example, JSON containing a number  when the type being deserialized into holds a
175    /// String.
176    TypeUnmatched,
177
178    /// The error was caused when the target field was not found from JSON.
179    ///
180    /// For example:
181    /// When using `get*` APIs, it gets a unknown keys from JSON text, or get
182    /// a index out of the array.
183    NotFound,
184
185    /// The error was caused by prematurely reaching the end of the input data.
186    ///
187    /// Callers that process streaming input may be interested in retrying the
188    /// deserialization once more data is available.
189    Eof,
190}
191
192struct ErrorImpl {
193    code: ErrorCode,
194    index: usize,
195    line: usize,
196    column: usize,
197    // the descript of the error position
198    descript: Option<String>,
199}
200
201#[derive(ErrorTrait, Debug)]
202#[non_exhaustive]
203pub enum ErrorCode {
204    #[error("{0}")]
205    Message(Cow<'static, str>),
206
207    #[error("io error while serializing or deserializing")]
208    Io(std::io::Error),
209
210    #[error("EOF while parsing")]
211    EofWhileParsing,
212
213    #[error("Expected this character to be a ':' while parsing")]
214    ExpectedColon,
215
216    #[error("Expected this character to be either a ',' or a ']' while parsing")]
217    ExpectedArrayCommaOrEnd,
218
219    #[error("Expected this character to be either a ',' or a '}}' while parsing")]
220    ExpectedObjectCommaOrEnd,
221
222    #[error("Invalid literal (`true`, `false`, or a `null`) while parsing")]
223    InvalidLiteral,
224
225    #[error("Invalid JSON value")]
226    InvalidJsonValue,
227
228    #[error("Expected this character to be '{{'")]
229    ExpectedObjectStart,
230
231    #[error("Expected this character to be '['")]
232    ExpectedArrayStart,
233
234    #[error("Invalid escape chars")]
235    InvalidEscape,
236
237    #[error("Invalid number")]
238    InvalidNumber,
239
240    #[error("Number is bigger than the maximum value of its type")]
241    NumberOutOfRange,
242
243    #[error("Invalid unicode code point")]
244    InvalidUnicodeCodePoint,
245
246    #[error("Invalid UTF-8 characters in json")]
247    InvalidUTF8,
248
249    #[error("Control character found while parsing a string")]
250    ControlCharacterWhileParsingString,
251
252    #[error("Expected this character to be '\"' or '}}'")]
253    ExpectObjectKeyOrEnd,
254
255    #[error("JSON has a comma after the last value in an array or object")]
256    TrailingComma,
257
258    #[error("JSON has non-whitespace trailing characters after the value")]
259    TrailingCharacters,
260
261    #[error("Encountered nesting of JSON maps and arrays more than 128 layers deep")]
262    RecursionLimitExceeded,
263
264    #[error("Get value from an empty object")]
265    GetInEmptyObject,
266
267    #[error("Get unknown key from the object")]
268    GetUnknownKeyInObject,
269
270    #[error("Get value from an empty array")]
271    GetInEmptyArray,
272
273    #[error("Get index out of the array")]
274    GetIndexOutOfArray,
275
276    #[error("Unexpected visited type")]
277    UnexpectedVisitType,
278
279    #[error("Invalid surrogate Unicode code point")]
280    InvalidSurrogateUnicodeCodePoint,
281
282    #[error("Float number must be finite, not be Infinity or NaN")]
283    FloatMustBeFinite,
284
285    #[error("Expect a numeric key in Value")]
286    ExpectedNumericKey,
287
288    #[error("Expect a quote")]
289    ExpectedQuote,
290
291    #[error("Expected the key to be string/bool/number when serializing map, now is {0}")]
292    SerExpectKeyIsStrOrNum(Unexpected<'static>),
293}
294
295impl From<NumberError> for ErrorCode {
296    fn from(err: NumberError) -> Self {
297        match err {
298            NumberError::InvalidNumber => ErrorCode::InvalidNumber,
299            NumberError::FloatMustBeFinite => ErrorCode::FloatMustBeFinite,
300        }
301    }
302}
303
304impl Error {
305    #[cold]
306    pub(crate) fn syntax(code: ErrorCode, json: &[u8], index: usize) -> Self {
307        let position = Position::from_index(index, json);
308        // generate descript about 16 characters
309        let mut start = index.saturating_sub(8);
310        let mut end = if index + 8 > json.len() {
311            json.len()
312        } else {
313            index + 8
314        };
315
316        // find the nearest valid utf-8 character
317        while start > 0 && index - start <= 16 && (json[start] & 0b1100_0000) == 0b1000_0000 {
318            start -= 1;
319        }
320
321        // find the nearest valid utf-8 character
322        while end < json.len() && end - index <= 16 && (json[end - 1] & 0b1100_0000) == 0b1000_0000
323        {
324            end += 1;
325        }
326
327        let fragment = String::from_utf8_lossy(&json[start..end]).to_string();
328        let left = index - start;
329        let right = if end - index > 1 {
330            end - (index + 1)
331        } else {
332            0
333        };
334        let mask = ".".repeat(left) + "^" + &".".repeat(right);
335        let descript = format!("\n\n\t{fragment}\n\t{mask}\n");
336
337        Error {
338            err: Box::new(ErrorImpl {
339                code,
340                line: position.line,
341                column: position.column,
342                index,
343                descript: Some(descript),
344            }),
345        }
346    }
347
348    #[cold]
349    pub(crate) fn ser_error(code: ErrorCode) -> Self {
350        Error {
351            err: Box::new(ErrorImpl {
352                code,
353                line: 0,
354                column: 0,
355                index: 0,
356                descript: None,
357            }),
358        }
359    }
360
361    #[cold]
362    pub(crate) fn io(error: std::io::Error) -> Self {
363        Error {
364            err: Box::new(ErrorImpl {
365                code: ErrorCode::Io(error),
366                line: 0,
367                index: 0,
368                column: 0,
369                descript: None,
370            }),
371        }
372    }
373
374    #[cold]
375    pub(crate) fn error_code(self) -> ErrorCode {
376        self.err.code
377    }
378}
379
380impl serde::de::StdError for Error {
381    fn source(&self) -> Option<&(dyn error::Error + 'static)> {
382        match &self.err.code {
383            ErrorCode::Io(err) => err.source(),
384            _ => None,
385        }
386    }
387}
388
389impl Display for Error {
390    fn fmt(&self, f: &mut fmt::Formatter) -> FmtResult {
391        Display::fmt(&*self.err, f)
392    }
393}
394
395impl Display for ErrorImpl {
396    fn fmt(&self, f: &mut fmt::Formatter) -> FmtResult {
397        if self.line != 0 {
398            write!(
399                f,
400                "{} at line {} column {}{}",
401                self.code,
402                self.line,
403                self.column,
404                self.descript.as_ref().unwrap_or(&"".to_string())
405            )
406        } else {
407            write!(f, "{}", self.code)
408        }
409    }
410}
411
412// Remove two layers of verbosity from the debug representation. Humans often
413// end up seeing this representation because it is what unwrap() shows.
414impl Debug for Error {
415    fn fmt(&self, f: &mut fmt::Formatter) -> FmtResult {
416        Display::fmt(&self, f)
417    }
418}
419
420impl de::Error for Error {
421    #[cold]
422    fn custom<T: Display>(msg: T) -> Error {
423        make_error(msg.to_string())
424    }
425
426    #[cold]
427    fn invalid_type(unexp: de::Unexpected, exp: &dyn de::Expected) -> Self {
428        if let de::Unexpected::Unit = unexp {
429            Error::custom(format_args!("invalid type: null, expected {exp}"))
430        } else {
431            Error::custom(format_args!("invalid type: {unexp}, expected {exp}"))
432        }
433    }
434}
435
436impl ser::Error for Error {
437    #[cold]
438    fn custom<T: Display>(msg: T) -> Error {
439        make_error(msg.to_string())
440    }
441}
442
443// TODO: remove me in 0.4 version.
444#[cold]
445pub(crate) fn make_error(mut msg: String) -> Error {
446    let (line, column) = parse_line_col(&mut msg).unwrap_or((0, 0));
447    Error {
448        err: Box::new(ErrorImpl {
449            code: ErrorCode::Message(msg.into()),
450            line,
451            index: 0,
452            column,
453            descript: None,
454        }),
455    }
456}
457
458fn parse_line_col(msg: &mut String) -> Option<(usize, usize)> {
459    let start_of_suffix = msg.rfind(" at line ")?;
460
461    // Find start and end of line number.
462    let start_of_line = start_of_suffix + " at line ".len();
463    let mut end_of_line = start_of_line;
464    while starts_with_digit(&msg[end_of_line..]) {
465        end_of_line += 1;
466    }
467
468    if !msg[end_of_line..].starts_with(" column ") {
469        return None;
470    }
471
472    // Find start and end of column number.
473    let start_of_column = end_of_line + " column ".len();
474    let mut end_of_column = start_of_column;
475    while starts_with_digit(&msg[end_of_column..]) {
476        end_of_column += 1;
477    }
478
479    if end_of_column < msg.len() {
480        return None;
481    }
482
483    // Parse numbers.
484    let line = match usize::from_str(&msg[start_of_line..end_of_line]) {
485        Ok(line) => line,
486        Err(_) => return None,
487    };
488    let column = match usize::from_str(&msg[start_of_column..end_of_column]) {
489        Ok(column) => column,
490        Err(_) => return None,
491    };
492
493    msg.truncate(start_of_suffix);
494    Some((line, column))
495}
496
497fn starts_with_digit(slice: &str) -> bool {
498    match slice.as_bytes().first() {
499        None => false,
500        Some(&byte) => byte.is_ascii_digit(),
501    }
502}
503
504#[cfg(test)]
505mod test {
506    use crate::{from_slice, from_str, Deserialize};
507
508    #[test]
509    fn test_serde_errors_display() {
510        #[allow(unused)]
511        #[derive(Debug, Deserialize)]
512        struct Foo {
513            a: Vec<i32>,
514            c: String,
515        }
516
517        let err = from_str::<Foo>("{ \"b\":[]}").unwrap_err();
518        assert_eq!(
519            format!("{err}"),
520            "missing field `a` at line 1 column 9\n\n\t{ \"b\":[]}\n\t........^\n"
521        );
522
523        let err = from_str::<Foo>("{\"a\": [1, 2x, 3, 4, 5]}").unwrap_err();
524        println!("{err}");
525        assert_eq!(
526            format!("{err}"),
527            "Expected this character to be either a ',' or a ']' while parsing at line 1 column \
528             12\n\n\t\": [1, 2x, 3, 4,\n\t........^.......\n"
529        );
530
531        let err = from_str::<Foo>("{\"a\": null}").unwrap_err();
532        assert_eq!(
533            format!("{err}"),
534            "invalid type: null, expected a sequence at line 1 column 10\n\n\t\"a\": \
535             null}\n\t........^.\n"
536        );
537
538        let err = from_str::<Foo>("{\"a\": [1,2,3  }").unwrap_err();
539        assert_eq!(
540            format!("{err}"),
541            "Expected this character to be either a ',' or a ']' while parsing at line 1 column \
542             15\n\n\t[1,2,3  }\n\t........^\n"
543        );
544
545        let err = from_str::<Foo>("{\"a\": [\"123\"]}").unwrap_err();
546        assert_eq!(
547            format!("{err}"),
548            "invalid type: string \"123\", expected i32 at line 1 column 12\n\n\t\": \
549             [\"123\"]}\n\t........^..\n"
550        );
551
552        let err = from_str::<Foo>("{\"a\": [").unwrap_err();
553        assert_eq!(
554            format!("{err}"),
555            "EOF while parsing at line 1 column 7\n\n\t{\"a\": [\n\t......^\n"
556        );
557
558        let err = from_str::<Foo>("{\"a\": [000]}").unwrap_err();
559        assert_eq!(
560            format!("{err}"),
561            "Expected this character to be either a ',' or a ']' while parsing at line 1 column \
562             9\n\n\t{\"a\": [000]}\n\t........^...\n"
563        );
564
565        let err = from_str::<Foo>("{\"a\": [-]}").unwrap_err();
566        assert_eq!(
567            format!("{err}"),
568            "Invalid number at line 1 column 8\n\n\t{\"a\": [-]}\n\t.......^..\n"
569        );
570
571        let err = from_str::<Foo>("{\"a\": [-1.23e]}").unwrap_err();
572        assert_eq!(
573            format!("{err}"),
574            "Invalid number at line 1 column 13\n\n\t: [-1.23e]}\n\t........^..\n"
575        );
576
577        let err = from_str::<Foo>("{\"c\": \"哈哈哈哈哈哈}").unwrap_err();
578        assert_eq!(
579            format!("{err}"),
580            "EOF while parsing at line 1 column 26\n\n\t哈哈哈}\n\t.........^\n"
581        );
582
583        let err = from_slice::<Foo>(b"{\"b\":\"\x80\"}").unwrap_err();
584        assert_eq!(
585            format!("{err}"),
586            "Invalid UTF-8 characters in json at line 1 column 7\n\n\t{\"b\":\"�\"}\n\t......^..\n"
587        );
588    }
589
590    #[test]
591    fn test_other_errors() {
592        let err = crate::Value::try_from(f64::NAN).unwrap_err();
593        assert_eq!(
594            format!("{err}"),
595            "NaN or Infinity is not a valid JSON value"
596        );
597    }
598
599    #[test]
600    fn test_error_column() {
601        let json_str = r#"
602{
603    "key": [, 1, 2, 3]
604}
605"#;
606        let err = from_str::<crate::Value>(json_str).unwrap_err();
607        assert_eq!(err.column(), 13);
608    }
609}