serde_encom/
error.rs

1//! When serializing or deserializing EnCom goes wrong.
2
3use crate::io;
4use alloc::boxed::Box;
5use alloc::string::{String, ToString};
6use atoi_simd::AtoiSimdError;
7use core::fmt::{self, Debug, Display};
8use core::result;
9use core::str::FromStr;
10use serde::{de, ser};
11#[cfg(feature = "std")]
12use std::error;
13#[cfg(feature = "std")]
14use std::io::ErrorKind;
15
16/// This type represents all possible errors that can occur when serializing or
17/// deserializing EnCom data.
18pub struct Error {
19    /// This `Box` allows us to keep the size of `Error` as small as possible. A
20    /// larger `Error` type was substantially slower due to all the functions
21    /// that pass around `Result<T, Error>`.
22    err: Box<ErrorImpl>,
23}
24
25/// Alias for a `Result` with the error type `serde_encom::Error`.
26pub type Result<T> = result::Result<T, Error>;
27
28impl Error {
29    /// One-based line number at which the error was detected.
30    ///
31    /// Characters in the first line of the input (before the first newline
32    /// character) are in line 1.
33    pub fn line(&self) -> usize {
34        self.err.line
35    }
36
37    /// One-based column number at which the error was detected.
38    ///
39    /// The first character in the input and any characters immediately
40    /// following a newline character are in column 1.
41    ///
42    /// Note that errors may occur in column 0, for example if a read from an IO
43    /// stream fails immediately following a previously read newline character.
44    pub fn column(&self) -> usize {
45        self.err.column
46    }
47
48    /// Categorizes the cause of this error.
49    ///
50    /// - `ErrorCategory::Io` - failure to read or write bytes on an IO stream
51    /// - `ErrorCategory::Syntax` - input that is not syntactically valid EnCom
52    /// - `ErrorCategory::Data` - input data that is semantically incorrect
53    /// - `ErrorCategory::Eof` - unexpected end of the input data
54    pub fn classify(&self) -> ErrorCategory {
55        match self.err.code {
56            ErrorCode::Message(_) => ErrorCategory::Data,
57            ErrorCode::Io(_) => ErrorCategory::Io,
58            ErrorCode::EofWhileParsingList
59            | ErrorCode::EofWhileParsingObject
60            | ErrorCode::EofWhileParsingString
61            | ErrorCode::EofWhileParsingValue => ErrorCategory::Eof,
62            ErrorCode::ExpectedColon
63            // | ErrorCode::ExpectedListCommaOrEnd
64            | ErrorCode::ExpectedObjectCommaOrEnd
65            | ErrorCode::ExpectedSomeIdent
66            | ErrorCode::ExpectedSomeValue
67            | ErrorCode::ExpectedDoubleQuote
68            | ErrorCode::InvalidEscape
69            | ErrorCode::InvalidNumber
70            | ErrorCode::NumberOutOfRange
71            | ErrorCode::InvalidUnicodeCodePoint
72            | ErrorCode::ControlCharacterWhileParsingString
73            | ErrorCode::KeyMustBeAString
74            | ErrorCode::ExpectedNumericKey
75            | ErrorCode::FloatKeyMustBeFinite
76            // | ErrorCode::LoneLeadingSurrogateInHexEscape
77            | ErrorCode::TrailingComma
78            | ErrorCode::TrailingCharacters
79            // | ErrorCode::UnexpectedEndOfHexEscape
80            | ErrorCode::UnexpectedEndOfString
81            | ErrorCode::RecursionLimitExceeded => ErrorCategory::Syntax,
82        }
83    }
84
85    /// Returns true if this error was caused by a failure to read or write
86    /// bytes on an IO stream.
87    pub fn is_io(&self) -> bool {
88        self.classify() == ErrorCategory::Io
89    }
90
91    /// Returns true if this error was caused by input that was not
92    /// syntactically valid EnCom.
93    pub fn is_syntax(&self) -> bool {
94        self.classify() == ErrorCategory::Syntax
95    }
96
97    /// Returns true if this error was caused by input data that was
98    /// semantically incorrect.
99    ///
100    /// For example, EnCom containing a number is semantically incorrect when the
101    /// type being deserialized into holds a String.
102    pub fn is_data(&self) -> bool {
103        self.classify() == ErrorCategory::Data
104    }
105
106    /// Returns true if this error was caused by prematurely reaching the end of
107    /// the input data.
108    ///
109    /// Callers that process streaming input may be interested in retrying the
110    /// deserialization once more data is available.
111    pub fn is_eof(&self) -> bool {
112        self.classify() == ErrorCategory::Eof
113    }
114
115    /// The kind reported by the underlying standard library I/O error, if this
116    /// error was caused by a failure to read or write bytes on an I/O stream.
117    ///
118    /// # Example
119    ///
120    /// ```
121    /// use serde_encom::Value;
122    /// use std::io::{self, ErrorKind, Read};
123    /// use std::process;
124    ///
125    /// struct ReaderThatWillTimeOut<'a>(&'a [u8]);
126    ///
127    /// impl<'a> Read for ReaderThatWillTimeOut<'a> {
128    ///     fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
129    ///         if self.0.is_empty() {
130    ///             Err(io::Error::new(ErrorKind::TimedOut, "timed out"))
131    ///         } else {
132    ///             self.0.read(buf)
133    ///         }
134    ///     }
135    /// }
136    ///
137    /// fn main() {
138    ///     let reader = ReaderThatWillTimeOut(br#" {"k": "#);
139    ///
140    ///     let _: Value = match serde_encom::from_reader(reader) {
141    ///         Ok(value) => value,
142    ///         Err(error) => {
143    ///             if error.io_error_kind() == Some(ErrorKind::TimedOut) {
144    ///                 // Maybe this application needs to retry certain kinds of errors.
145    ///
146    ///                 # return;
147    ///             } else {
148    ///                 eprintln!("error: {}", error);
149    ///                 process::exit(1);
150    ///             }
151    ///         }
152    ///     };
153    /// }
154    /// ```
155    #[cfg(feature = "std")]
156    pub fn io_error_kind(&self) -> Option<ErrorKind> {
157        if let ErrorCode::Io(io_error) = &self.err.code {
158            Some(io_error.kind())
159        } else {
160            None
161        }
162    }
163}
164
165/// Categorizes the cause of a `serde_encom::Error`.
166#[derive(Copy, Clone, PartialEq, Eq, Debug)]
167pub enum ErrorCategory {
168    /// The error was caused by a failure to read or write bytes on an IO
169    /// stream.
170    Io,
171
172    /// The error was caused by input that was not syntactically valid EnCom.
173    Syntax,
174
175    /// The error was caused by input data that was semantically incorrect.
176    ///
177    /// For example, EnCom containing a number is semantically incorrect when the
178    /// type being deserialized into holds a String.
179    Data,
180
181    /// The error was caused by prematurely reaching the end of the input data.
182    ///
183    /// Callers that process streaming input may be interested in retrying the
184    /// deserialization once more data is available.
185    Eof,
186}
187
188#[cfg(feature = "std")]
189#[allow(clippy::fallible_impl_from)]
190impl From<Error> for io::Error {
191    /// Convert a `serde_encom::Error` into an `io::Error`.
192    ///
193    /// EnCom syntax and data errors are turned into `InvalidData` IO errors.
194    /// EOF errors are turned into `UnexpectedEof` IO errors.
195    ///
196    /// ```
197    /// use std::io;
198    ///
199    /// enum MyError {
200    ///     Io(io::Error),
201    ///     EnCom(serde_encom::Error),
202    /// }
203    ///
204    /// impl From<serde_encom::Error> for MyError {
205    ///     fn from(err: serde_encom::Error) -> MyError {
206    ///         use serde_encom::ErrorCategory;
207    ///         match err.classify() {
208    ///             ErrorCategory::Io => {
209    ///                 MyError::Io(err.into())
210    ///             }
211    ///             ErrorCategory::Syntax | ErrorCategory::Data | ErrorCategory::Eof => {
212    ///                 MyError::EnCom(err)
213    ///             }
214    ///         }
215    ///     }
216    /// }
217    /// ```
218    fn from(j: Error) -> Self {
219        if let ErrorCode::Io(err) = j.err.code {
220            err
221        } else {
222            match j.classify() {
223                ErrorCategory::Io => unreachable!(),
224                ErrorCategory::Syntax | ErrorCategory::Data => {
225                    io::Error::new(ErrorKind::InvalidData, j)
226                }
227                ErrorCategory::Eof => io::Error::new(ErrorKind::UnexpectedEof, j),
228            }
229        }
230    }
231}
232
233struct ErrorImpl {
234    code: ErrorCode,
235    line: usize,
236    column: usize,
237}
238
239pub(crate) enum ErrorCode {
240    /// Catchall for syntax error messages
241    Message(Box<str>),
242
243    /// Some IO error occurred while serializing or deserializing.
244    Io(io::Error),
245
246    /// EOF while parsing a list.
247    EofWhileParsingList,
248
249    /// EOF while parsing an object.
250    EofWhileParsingObject,
251
252    /// EOF while parsing a string.
253    EofWhileParsingString,
254
255    /// EOF while parsing an EnCom value.
256    EofWhileParsingValue,
257
258    /// Expected this character to be a `':'`.
259    ExpectedColon,
260
261    /// Expected this character to be either a `','` or a `']'`.
262    // ExpectedListCommaOrEnd,
263
264    /// Expected this character to be either a `','` or a `'}'`.
265    ExpectedObjectCommaOrEnd,
266
267    /// Expected to parse either a `true`, `false`, or a `null`.
268    ExpectedSomeIdent,
269
270    /// Expected this character to start an EnCom value.
271    ExpectedSomeValue,
272
273    /// Expected this character to be a `"`.
274    ExpectedDoubleQuote,
275
276    /// Invalid hex escape code.
277    InvalidEscape,
278
279    /// Invalid number.
280    InvalidNumber,
281
282    /// Number is bigger than the maximum value of its type.
283    NumberOutOfRange,
284
285    /// Invalid unicode code point.
286    InvalidUnicodeCodePoint,
287
288    /// Control character found while parsing a string.
289    ControlCharacterWhileParsingString,
290
291    /// Object key is not a string.
292    KeyMustBeAString,
293
294    /// Contents of key were supposed to be a number.
295    ExpectedNumericKey,
296
297    /// Object key is a non-finite float value.
298    FloatKeyMustBeFinite,
299
300    /// Lone leading surrogate in hex escape.
301    // LoneLeadingSurrogateInHexEscape,
302
303    /// EnCom has a comma after the last value in an array or map.
304    TrailingComma,
305
306    /// EnCom has non-whitespace trailing characters after the value.
307    TrailingCharacters,
308
309    /// Unexpected end of hex escape.
310    // UnexpectedEndOfHexEscape,
311
312    /// Unexpected end of hex escape.
313    UnexpectedEndOfString,
314
315    /// Encountered nesting of EnCom maps and arrays more than 128 layers deep.
316    RecursionLimitExceeded,
317}
318
319impl Error {
320    #[cold]
321    pub(crate) fn syntax(code: ErrorCode, line: usize, column: usize) -> Self {
322        Error {
323            err: Box::new(ErrorImpl { code, line, column }),
324        }
325    }
326
327    // Not public API. Should be pub(crate).
328    //
329    // Update `eager_json` crate when this function changes.
330    #[doc(hidden)]
331    #[cold]
332    pub fn io(error: io::Error) -> Self {
333        Error {
334            err: Box::new(ErrorImpl {
335                code: ErrorCode::Io(error),
336                line: 0,
337                column: 0,
338            }),
339        }
340    }
341
342    #[cold]
343    pub(crate) fn fix_position<F>(self, f: F) -> Self
344    where
345        F: FnOnce(ErrorCode) -> Error,
346    {
347        if self.err.line == 0 {
348            f(self.err.code)
349        } else {
350            self
351        }
352    }
353}
354
355impl Display for ErrorCode {
356    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
357        match self {
358            ErrorCode::Message(msg) => f.write_str(msg),
359            ErrorCode::Io(err) => Display::fmt(err, f),
360            ErrorCode::EofWhileParsingList => f.write_str("EOF while parsing a list"),
361            ErrorCode::EofWhileParsingObject => f.write_str("EOF while parsing an object"),
362            ErrorCode::EofWhileParsingString => f.write_str("EOF while parsing a string"),
363            ErrorCode::EofWhileParsingValue => f.write_str("EOF while parsing a value"),
364            ErrorCode::ExpectedColon => f.write_str("expected `:`"),
365            // ErrorCode::ExpectedListCommaOrEnd => f.write_str("expected `,` or `]`"),
366            ErrorCode::ExpectedObjectCommaOrEnd => f.write_str("expected `,` or `}`"),
367            ErrorCode::ExpectedSomeIdent => f.write_str("expected ident"),
368            ErrorCode::ExpectedSomeValue => f.write_str("expected value"),
369            ErrorCode::ExpectedDoubleQuote => f.write_str("expected `\"`"),
370            ErrorCode::InvalidEscape => f.write_str("invalid escape"),
371            ErrorCode::InvalidNumber => f.write_str("invalid number"),
372            ErrorCode::NumberOutOfRange => f.write_str("number out of range"),
373            ErrorCode::InvalidUnicodeCodePoint => f.write_str("invalid unicode code point"),
374            ErrorCode::ControlCharacterWhileParsingString => {
375                f.write_str("control character (\\u0000-\\u001F) found while parsing a string")
376            }
377            ErrorCode::KeyMustBeAString => f.write_str("key must be a string"),
378            ErrorCode::ExpectedNumericKey => {
379                f.write_str("invalid value: expected key to be a number in quotes")
380            }
381            ErrorCode::FloatKeyMustBeFinite => {
382                f.write_str("float key must be finite (got NaN or +/-inf)")
383            }
384            /* ErrorCode::LoneLeadingSurrogateInHexEscape => {
385                f.write_str("lone leading surrogate in hex escape")
386            } */
387            ErrorCode::TrailingComma => f.write_str("trailing comma"),
388            ErrorCode::TrailingCharacters => f.write_str("trailing characters"),
389            // ErrorCode::UnexpectedEndOfHexEscape => f.write_str("unexpected end of hex escape"),
390            ErrorCode::UnexpectedEndOfString => f.write_str("unexpected end of string"),
391            ErrorCode::RecursionLimitExceeded => f.write_str("recursion limit exceeded"),
392        }
393    }
394}
395
396impl From<AtoiSimdError<'_>> for ErrorCode {
397    fn from(e: AtoiSimdError) -> Self {
398        match e {
399            AtoiSimdError::Empty => ErrorCode::EofWhileParsingValue,
400            AtoiSimdError::Size(_, _) => ErrorCode::NumberOutOfRange, //todo: new error
401            AtoiSimdError::Overflow(_) => ErrorCode::NumberOutOfRange,
402            AtoiSimdError::Invalid64(_, _, _) | AtoiSimdError::Invalid128(_, _, _) => {
403                ErrorCode::InvalidNumber
404            }
405        }
406    }
407}
408
409impl From<AtoiSimdError<'_>> for Error {
410    fn from(e: AtoiSimdError<'_>) -> Self {
411        Self {
412            err: Box::new(ErrorImpl {
413                code: e.into(),
414                line: 0,
415                column: 0,
416            }),
417        }
418    }
419}
420
421impl serde::de::StdError for Error {
422    #[cfg(feature = "std")]
423    fn source(&self) -> Option<&(dyn error::Error + 'static)> {
424        match &self.err.code {
425            ErrorCode::Io(err) => err.source(),
426            _ => None,
427        }
428    }
429}
430
431impl Display for Error {
432    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
433        Display::fmt(&*self.err, f)
434    }
435}
436
437impl Display for ErrorImpl {
438    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
439        if self.line == 0 {
440            Display::fmt(&self.code, f)
441        } else {
442            write!(
443                f,
444                "{} at line {} column {}",
445                self.code, self.line, self.column
446            )
447        }
448    }
449}
450
451// Remove two layers of verbosity from the debug representation. Humans often
452// end up seeing this representation because it is what unwrap() shows.
453impl Debug for Error {
454    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
455        write!(
456            f,
457            "Error({:?}, line: {}, column: {})",
458            self.err.code.to_string(),
459            self.err.line,
460            self.err.column
461        )
462    }
463}
464
465impl de::Error for Error {
466    #[cold]
467    fn custom<T: Display>(msg: T) -> Error {
468        make_error(msg.to_string())
469    }
470
471    #[cold]
472    fn invalid_type(unexp: de::Unexpected, exp: &dyn de::Expected) -> Self {
473        if let de::Unexpected::Unit = unexp {
474            Error::custom(format_args!("invalid type: null, expected {}", exp))
475        } else {
476            Error::custom(format_args!("invalid type: {}, expected {}", unexp, exp))
477        }
478    }
479}
480
481impl ser::Error for Error {
482    #[cold]
483    fn custom<T: Display>(msg: T) -> Error {
484        make_error(msg.to_string())
485    }
486}
487
488// Parse our own error message that looks like "{} at line {} column {}" to work
489// around erased-serde round-tripping the error through de::Error::custom.
490fn make_error(mut msg: String) -> Error {
491    let (line, column) = parse_line_col(&mut msg).unwrap_or((0, 0));
492    Error {
493        err: Box::new(ErrorImpl {
494            code: ErrorCode::Message(msg.into_boxed_str()),
495            line,
496            column,
497        }),
498    }
499}
500
501fn parse_line_col(msg: &mut String) -> Option<(usize, usize)> {
502    let start_of_suffix = msg.rfind(" at line ")?;
503
504    // Find start and end of line number.
505    let start_of_line = start_of_suffix + " at line ".len();
506    let mut end_of_line = start_of_line;
507    while starts_with_digit(&msg[end_of_line..]) {
508        end_of_line += 1;
509    }
510
511    if !msg[end_of_line..].starts_with(" column ") {
512        return None;
513    }
514
515    // Find start and end of column number.
516    let start_of_column = end_of_line + " column ".len();
517    let mut end_of_column = start_of_column;
518    while starts_with_digit(&msg[end_of_column..]) {
519        end_of_column += 1;
520    }
521
522    if end_of_column < msg.len() {
523        return None;
524    }
525
526    // Parse numbers.
527    let line = match usize::from_str(&msg[start_of_line..end_of_line]) {
528        Ok(line) => line,
529        Err(_) => return None,
530    };
531    let column = match usize::from_str(&msg[start_of_column..end_of_column]) {
532        Ok(column) => column,
533        Err(_) => return None,
534    };
535
536    msg.truncate(start_of_suffix);
537    Some((line, column))
538}
539
540fn starts_with_digit(slice: &str) -> bool {
541    match slice.as_bytes().first() {
542        None => false,
543        Some(&byte) => byte >= b'0' && byte <= b'9',
544    }
545}