serde_encom/error.rs
1//! When serializing or deserializing EnCom goes wrong.
2
3use crate::io;
4use alloc::boxed::Box;
5use alloc::string::{String, ToString};
6use atoi_simd::AtoiSimdError;
7use core::fmt::{self, Debug, Display};
8use core::result;
9use core::str::FromStr;
10use serde::{de, ser};
11#[cfg(feature = "std")]
12use std::error;
13#[cfg(feature = "std")]
14use std::io::ErrorKind;
15
16/// This type represents all possible errors that can occur when serializing or
17/// deserializing EnCom data.
18pub struct Error {
19 /// This `Box` allows us to keep the size of `Error` as small as possible. A
20 /// larger `Error` type was substantially slower due to all the functions
21 /// that pass around `Result<T, Error>`.
22 err: Box<ErrorImpl>,
23}
24
25/// Alias for a `Result` with the error type `serde_encom::Error`.
26pub type Result<T> = result::Result<T, Error>;
27
28impl Error {
29 /// One-based line number at which the error was detected.
30 ///
31 /// Characters in the first line of the input (before the first newline
32 /// character) are in line 1.
33 pub fn line(&self) -> usize {
34 self.err.line
35 }
36
37 /// One-based column number at which the error was detected.
38 ///
39 /// The first character in the input and any characters immediately
40 /// following a newline character are in column 1.
41 ///
42 /// Note that errors may occur in column 0, for example if a read from an IO
43 /// stream fails immediately following a previously read newline character.
44 pub fn column(&self) -> usize {
45 self.err.column
46 }
47
48 /// Categorizes the cause of this error.
49 ///
50 /// - `ErrorCategory::Io` - failure to read or write bytes on an IO stream
51 /// - `ErrorCategory::Syntax` - input that is not syntactically valid EnCom
52 /// - `ErrorCategory::Data` - input data that is semantically incorrect
53 /// - `ErrorCategory::Eof` - unexpected end of the input data
54 pub fn classify(&self) -> ErrorCategory {
55 match self.err.code {
56 ErrorCode::Message(_) => ErrorCategory::Data,
57 ErrorCode::Io(_) => ErrorCategory::Io,
58 ErrorCode::EofWhileParsingList
59 | ErrorCode::EofWhileParsingObject
60 | ErrorCode::EofWhileParsingString
61 | ErrorCode::EofWhileParsingValue => ErrorCategory::Eof,
62 ErrorCode::ExpectedColon
63 // | ErrorCode::ExpectedListCommaOrEnd
64 | ErrorCode::ExpectedObjectCommaOrEnd
65 | ErrorCode::ExpectedSomeIdent
66 | ErrorCode::ExpectedSomeValue
67 | ErrorCode::ExpectedDoubleQuote
68 | ErrorCode::InvalidEscape
69 | ErrorCode::InvalidNumber
70 | ErrorCode::NumberOutOfRange
71 | ErrorCode::InvalidUnicodeCodePoint
72 | ErrorCode::ControlCharacterWhileParsingString
73 | ErrorCode::KeyMustBeAString
74 | ErrorCode::ExpectedNumericKey
75 | ErrorCode::FloatKeyMustBeFinite
76 // | ErrorCode::LoneLeadingSurrogateInHexEscape
77 | ErrorCode::TrailingComma
78 | ErrorCode::TrailingCharacters
79 // | ErrorCode::UnexpectedEndOfHexEscape
80 | ErrorCode::UnexpectedEndOfString
81 | ErrorCode::RecursionLimitExceeded => ErrorCategory::Syntax,
82 }
83 }
84
85 /// Returns true if this error was caused by a failure to read or write
86 /// bytes on an IO stream.
87 pub fn is_io(&self) -> bool {
88 self.classify() == ErrorCategory::Io
89 }
90
91 /// Returns true if this error was caused by input that was not
92 /// syntactically valid EnCom.
93 pub fn is_syntax(&self) -> bool {
94 self.classify() == ErrorCategory::Syntax
95 }
96
97 /// Returns true if this error was caused by input data that was
98 /// semantically incorrect.
99 ///
100 /// For example, EnCom containing a number is semantically incorrect when the
101 /// type being deserialized into holds a String.
102 pub fn is_data(&self) -> bool {
103 self.classify() == ErrorCategory::Data
104 }
105
106 /// Returns true if this error was caused by prematurely reaching the end of
107 /// the input data.
108 ///
109 /// Callers that process streaming input may be interested in retrying the
110 /// deserialization once more data is available.
111 pub fn is_eof(&self) -> bool {
112 self.classify() == ErrorCategory::Eof
113 }
114
115 /// The kind reported by the underlying standard library I/O error, if this
116 /// error was caused by a failure to read or write bytes on an I/O stream.
117 ///
118 /// # Example
119 ///
120 /// ```
121 /// use serde_encom::Value;
122 /// use std::io::{self, ErrorKind, Read};
123 /// use std::process;
124 ///
125 /// struct ReaderThatWillTimeOut<'a>(&'a [u8]);
126 ///
127 /// impl<'a> Read for ReaderThatWillTimeOut<'a> {
128 /// fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
129 /// if self.0.is_empty() {
130 /// Err(io::Error::new(ErrorKind::TimedOut, "timed out"))
131 /// } else {
132 /// self.0.read(buf)
133 /// }
134 /// }
135 /// }
136 ///
137 /// fn main() {
138 /// let reader = ReaderThatWillTimeOut(br#" {"k": "#);
139 ///
140 /// let _: Value = match serde_encom::from_reader(reader) {
141 /// Ok(value) => value,
142 /// Err(error) => {
143 /// if error.io_error_kind() == Some(ErrorKind::TimedOut) {
144 /// // Maybe this application needs to retry certain kinds of errors.
145 ///
146 /// # return;
147 /// } else {
148 /// eprintln!("error: {}", error);
149 /// process::exit(1);
150 /// }
151 /// }
152 /// };
153 /// }
154 /// ```
155 #[cfg(feature = "std")]
156 pub fn io_error_kind(&self) -> Option<ErrorKind> {
157 if let ErrorCode::Io(io_error) = &self.err.code {
158 Some(io_error.kind())
159 } else {
160 None
161 }
162 }
163}
164
165/// Categorizes the cause of a `serde_encom::Error`.
166#[derive(Copy, Clone, PartialEq, Eq, Debug)]
167pub enum ErrorCategory {
168 /// The error was caused by a failure to read or write bytes on an IO
169 /// stream.
170 Io,
171
172 /// The error was caused by input that was not syntactically valid EnCom.
173 Syntax,
174
175 /// The error was caused by input data that was semantically incorrect.
176 ///
177 /// For example, EnCom containing a number is semantically incorrect when the
178 /// type being deserialized into holds a String.
179 Data,
180
181 /// The error was caused by prematurely reaching the end of the input data.
182 ///
183 /// Callers that process streaming input may be interested in retrying the
184 /// deserialization once more data is available.
185 Eof,
186}
187
188#[cfg(feature = "std")]
189#[allow(clippy::fallible_impl_from)]
190impl From<Error> for io::Error {
191 /// Convert a `serde_encom::Error` into an `io::Error`.
192 ///
193 /// EnCom syntax and data errors are turned into `InvalidData` IO errors.
194 /// EOF errors are turned into `UnexpectedEof` IO errors.
195 ///
196 /// ```
197 /// use std::io;
198 ///
199 /// enum MyError {
200 /// Io(io::Error),
201 /// EnCom(serde_encom::Error),
202 /// }
203 ///
204 /// impl From<serde_encom::Error> for MyError {
205 /// fn from(err: serde_encom::Error) -> MyError {
206 /// use serde_encom::ErrorCategory;
207 /// match err.classify() {
208 /// ErrorCategory::Io => {
209 /// MyError::Io(err.into())
210 /// }
211 /// ErrorCategory::Syntax | ErrorCategory::Data | ErrorCategory::Eof => {
212 /// MyError::EnCom(err)
213 /// }
214 /// }
215 /// }
216 /// }
217 /// ```
218 fn from(j: Error) -> Self {
219 if let ErrorCode::Io(err) = j.err.code {
220 err
221 } else {
222 match j.classify() {
223 ErrorCategory::Io => unreachable!(),
224 ErrorCategory::Syntax | ErrorCategory::Data => {
225 io::Error::new(ErrorKind::InvalidData, j)
226 }
227 ErrorCategory::Eof => io::Error::new(ErrorKind::UnexpectedEof, j),
228 }
229 }
230 }
231}
232
233struct ErrorImpl {
234 code: ErrorCode,
235 line: usize,
236 column: usize,
237}
238
239pub(crate) enum ErrorCode {
240 /// Catchall for syntax error messages
241 Message(Box<str>),
242
243 /// Some IO error occurred while serializing or deserializing.
244 Io(io::Error),
245
246 /// EOF while parsing a list.
247 EofWhileParsingList,
248
249 /// EOF while parsing an object.
250 EofWhileParsingObject,
251
252 /// EOF while parsing a string.
253 EofWhileParsingString,
254
255 /// EOF while parsing an EnCom value.
256 EofWhileParsingValue,
257
258 /// Expected this character to be a `':'`.
259 ExpectedColon,
260
261 /// Expected this character to be either a `','` or a `']'`.
262 // ExpectedListCommaOrEnd,
263
264 /// Expected this character to be either a `','` or a `'}'`.
265 ExpectedObjectCommaOrEnd,
266
267 /// Expected to parse either a `true`, `false`, or a `null`.
268 ExpectedSomeIdent,
269
270 /// Expected this character to start an EnCom value.
271 ExpectedSomeValue,
272
273 /// Expected this character to be a `"`.
274 ExpectedDoubleQuote,
275
276 /// Invalid hex escape code.
277 InvalidEscape,
278
279 /// Invalid number.
280 InvalidNumber,
281
282 /// Number is bigger than the maximum value of its type.
283 NumberOutOfRange,
284
285 /// Invalid unicode code point.
286 InvalidUnicodeCodePoint,
287
288 /// Control character found while parsing a string.
289 ControlCharacterWhileParsingString,
290
291 /// Object key is not a string.
292 KeyMustBeAString,
293
294 /// Contents of key were supposed to be a number.
295 ExpectedNumericKey,
296
297 /// Object key is a non-finite float value.
298 FloatKeyMustBeFinite,
299
300 /// Lone leading surrogate in hex escape.
301 // LoneLeadingSurrogateInHexEscape,
302
303 /// EnCom has a comma after the last value in an array or map.
304 TrailingComma,
305
306 /// EnCom has non-whitespace trailing characters after the value.
307 TrailingCharacters,
308
309 /// Unexpected end of hex escape.
310 // UnexpectedEndOfHexEscape,
311
312 /// Unexpected end of hex escape.
313 UnexpectedEndOfString,
314
315 /// Encountered nesting of EnCom maps and arrays more than 128 layers deep.
316 RecursionLimitExceeded,
317}
318
319impl Error {
320 #[cold]
321 pub(crate) fn syntax(code: ErrorCode, line: usize, column: usize) -> Self {
322 Error {
323 err: Box::new(ErrorImpl { code, line, column }),
324 }
325 }
326
327 // Not public API. Should be pub(crate).
328 //
329 // Update `eager_json` crate when this function changes.
330 #[doc(hidden)]
331 #[cold]
332 pub fn io(error: io::Error) -> Self {
333 Error {
334 err: Box::new(ErrorImpl {
335 code: ErrorCode::Io(error),
336 line: 0,
337 column: 0,
338 }),
339 }
340 }
341
342 #[cold]
343 pub(crate) fn fix_position<F>(self, f: F) -> Self
344 where
345 F: FnOnce(ErrorCode) -> Error,
346 {
347 if self.err.line == 0 {
348 f(self.err.code)
349 } else {
350 self
351 }
352 }
353}
354
355impl Display for ErrorCode {
356 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
357 match self {
358 ErrorCode::Message(msg) => f.write_str(msg),
359 ErrorCode::Io(err) => Display::fmt(err, f),
360 ErrorCode::EofWhileParsingList => f.write_str("EOF while parsing a list"),
361 ErrorCode::EofWhileParsingObject => f.write_str("EOF while parsing an object"),
362 ErrorCode::EofWhileParsingString => f.write_str("EOF while parsing a string"),
363 ErrorCode::EofWhileParsingValue => f.write_str("EOF while parsing a value"),
364 ErrorCode::ExpectedColon => f.write_str("expected `:`"),
365 // ErrorCode::ExpectedListCommaOrEnd => f.write_str("expected `,` or `]`"),
366 ErrorCode::ExpectedObjectCommaOrEnd => f.write_str("expected `,` or `}`"),
367 ErrorCode::ExpectedSomeIdent => f.write_str("expected ident"),
368 ErrorCode::ExpectedSomeValue => f.write_str("expected value"),
369 ErrorCode::ExpectedDoubleQuote => f.write_str("expected `\"`"),
370 ErrorCode::InvalidEscape => f.write_str("invalid escape"),
371 ErrorCode::InvalidNumber => f.write_str("invalid number"),
372 ErrorCode::NumberOutOfRange => f.write_str("number out of range"),
373 ErrorCode::InvalidUnicodeCodePoint => f.write_str("invalid unicode code point"),
374 ErrorCode::ControlCharacterWhileParsingString => {
375 f.write_str("control character (\\u0000-\\u001F) found while parsing a string")
376 }
377 ErrorCode::KeyMustBeAString => f.write_str("key must be a string"),
378 ErrorCode::ExpectedNumericKey => {
379 f.write_str("invalid value: expected key to be a number in quotes")
380 }
381 ErrorCode::FloatKeyMustBeFinite => {
382 f.write_str("float key must be finite (got NaN or +/-inf)")
383 }
384 /* ErrorCode::LoneLeadingSurrogateInHexEscape => {
385 f.write_str("lone leading surrogate in hex escape")
386 } */
387 ErrorCode::TrailingComma => f.write_str("trailing comma"),
388 ErrorCode::TrailingCharacters => f.write_str("trailing characters"),
389 // ErrorCode::UnexpectedEndOfHexEscape => f.write_str("unexpected end of hex escape"),
390 ErrorCode::UnexpectedEndOfString => f.write_str("unexpected end of string"),
391 ErrorCode::RecursionLimitExceeded => f.write_str("recursion limit exceeded"),
392 }
393 }
394}
395
396impl From<AtoiSimdError<'_>> for ErrorCode {
397 fn from(e: AtoiSimdError) -> Self {
398 match e {
399 AtoiSimdError::Empty => ErrorCode::EofWhileParsingValue,
400 AtoiSimdError::Size(_, _) => ErrorCode::NumberOutOfRange, //todo: new error
401 AtoiSimdError::Overflow(_) => ErrorCode::NumberOutOfRange,
402 AtoiSimdError::Invalid64(_, _, _) | AtoiSimdError::Invalid128(_, _, _) => {
403 ErrorCode::InvalidNumber
404 }
405 }
406 }
407}
408
409impl From<AtoiSimdError<'_>> for Error {
410 fn from(e: AtoiSimdError<'_>) -> Self {
411 Self {
412 err: Box::new(ErrorImpl {
413 code: e.into(),
414 line: 0,
415 column: 0,
416 }),
417 }
418 }
419}
420
421impl serde::de::StdError for Error {
422 #[cfg(feature = "std")]
423 fn source(&self) -> Option<&(dyn error::Error + 'static)> {
424 match &self.err.code {
425 ErrorCode::Io(err) => err.source(),
426 _ => None,
427 }
428 }
429}
430
431impl Display for Error {
432 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
433 Display::fmt(&*self.err, f)
434 }
435}
436
437impl Display for ErrorImpl {
438 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
439 if self.line == 0 {
440 Display::fmt(&self.code, f)
441 } else {
442 write!(
443 f,
444 "{} at line {} column {}",
445 self.code, self.line, self.column
446 )
447 }
448 }
449}
450
451// Remove two layers of verbosity from the debug representation. Humans often
452// end up seeing this representation because it is what unwrap() shows.
453impl Debug for Error {
454 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
455 write!(
456 f,
457 "Error({:?}, line: {}, column: {})",
458 self.err.code.to_string(),
459 self.err.line,
460 self.err.column
461 )
462 }
463}
464
465impl de::Error for Error {
466 #[cold]
467 fn custom<T: Display>(msg: T) -> Error {
468 make_error(msg.to_string())
469 }
470
471 #[cold]
472 fn invalid_type(unexp: de::Unexpected, exp: &dyn de::Expected) -> Self {
473 if let de::Unexpected::Unit = unexp {
474 Error::custom(format_args!("invalid type: null, expected {}", exp))
475 } else {
476 Error::custom(format_args!("invalid type: {}, expected {}", unexp, exp))
477 }
478 }
479}
480
481impl ser::Error for Error {
482 #[cold]
483 fn custom<T: Display>(msg: T) -> Error {
484 make_error(msg.to_string())
485 }
486}
487
488// Parse our own error message that looks like "{} at line {} column {}" to work
489// around erased-serde round-tripping the error through de::Error::custom.
490fn make_error(mut msg: String) -> Error {
491 let (line, column) = parse_line_col(&mut msg).unwrap_or((0, 0));
492 Error {
493 err: Box::new(ErrorImpl {
494 code: ErrorCode::Message(msg.into_boxed_str()),
495 line,
496 column,
497 }),
498 }
499}
500
501fn parse_line_col(msg: &mut String) -> Option<(usize, usize)> {
502 let start_of_suffix = msg.rfind(" at line ")?;
503
504 // Find start and end of line number.
505 let start_of_line = start_of_suffix + " at line ".len();
506 let mut end_of_line = start_of_line;
507 while starts_with_digit(&msg[end_of_line..]) {
508 end_of_line += 1;
509 }
510
511 if !msg[end_of_line..].starts_with(" column ") {
512 return None;
513 }
514
515 // Find start and end of column number.
516 let start_of_column = end_of_line + " column ".len();
517 let mut end_of_column = start_of_column;
518 while starts_with_digit(&msg[end_of_column..]) {
519 end_of_column += 1;
520 }
521
522 if end_of_column < msg.len() {
523 return None;
524 }
525
526 // Parse numbers.
527 let line = match usize::from_str(&msg[start_of_line..end_of_line]) {
528 Ok(line) => line,
529 Err(_) => return None,
530 };
531 let column = match usize::from_str(&msg[start_of_column..end_of_column]) {
532 Ok(column) => column,
533 Err(_) => return None,
534 };
535
536 msg.truncate(start_of_suffix);
537 Some((line, column))
538}
539
540fn starts_with_digit(slice: &str) -> bool {
541 match slice.as_bytes().first() {
542 None => false,
543 Some(&byte) => byte >= b'0' && byte <= b'9',
544 }
545}