utf8_bufread/
error.rs

1use std::io::ErrorKind;
2use std::str::Utf8Error;
3use std::string::FromUtf8Error;
4use std::{error, fmt, io};
5
6/// The error type for operations of the [`BufRead`] trait and associated iterators.
7///
8/// It can be created from an [`io::Error`] or an [`ErrorKind`].
9///
10/// Instances of this error may contain some bytes that would have been lost otherwise.
11///
12/// # Examples
13///
14/// ```
15/// use utf8_bufread::Error;
16/// use std::io::ErrorKind;
17///
18/// let error = Error::from(ErrorKind::InvalidData);
19/// ```
20///
21/// [`Utf8Error`]: std::str::Utf8Error
22/// [`BufRead`]: crate::BufRead
23// TODO: Add tests for Error
24pub struct Error {
25    leftovers: Option<Vec<u8>>,
26    repr: Repr,
27}
28
29enum Repr {
30    Simple(ErrorKind),
31    Custom {
32        kind: ErrorKind,
33        inner: Box<dyn error::Error + Send + Sync>,
34    },
35}
36
37impl Error {
38    // TODO: Redirect to [`leftovers`] for more info about what we call leftovers
39
40    pub(crate) fn with_leftovers(mut self, bytes: Vec<u8>) -> Self {
41        debug_assert!(!bytes.is_empty());
42        if self.leftovers.is_some() {
43            panic!("This error already had leftover bytes assigned, we won't drop them !")
44        }
45        self.leftovers = Some(bytes);
46        self
47    }
48
49    /// Get the "leftover" bytes stored in this error.
50    ///
51    /// Leftover bytes are bytes that were read from the inner reader of a type implementing
52    /// [`io::BufRead`], before clearing the buffer and filling it again, in a call to one of
53    /// [`BufRead`]'s functions that returned an error. This means that they form an invalid or
54    /// incomplete codepoint but would be lost if not returned with this error, as the call cleared
55    /// the buffer they were coming from.
56    ///
57    /// It is guaranteed that, if the error contains a non-zero amount of leftover bytes, the
58    /// following read operation on the reader that returned the error will not return any of those
59    /// bytes, nor "skip" bytes from the reader.
60    ///
61    /// It is also guaranteed that, if the error contains a non-zero amount of leftover bytes,
62    /// their amount is of the expected length of a codepoint, based on the first invalid byte
63    /// read, i.e. the first of the leftover bytes.
64    ///
65    /// If you want to be sure not to lose any bytes from the inner reader, you should check if
66    /// the error is holding "leftovers" with `error.leftovers.`[`is_empty`]`()`.
67    ///
68    /// # Examples
69    ///
70    /// The following example plays with buffer capacity to purposefully trigger a read that will
71    /// return an error holding leftover bytes. The user should not bother thinking about buffer
72    /// capacity in most cases, so this example may be a bit harder to follow along.
73    ///
74    /// ```
75    /// use std::io::{BufReader, Read};
76    /// use utf8_bufread::BufRead;
77    ///
78    /// let input = "💖💖";
79    /// assert_eq!(input.len(), 8);
80    /// // The reader will successfully read the first codepoint, but trying to read the second one
81    /// // will result in an error since '💖' is 4 byte long, and we only take the first 7 bytes.
82    /// // Since the reader as a buffer capacity of 6, it will have to clear and refill its buffer
83    /// // to attempt reading the incomplete codepoint, then fail.
84    /// let mut reader = BufReader::with_capacity(6, &input.as_bytes()[..7]);
85    /// // First read is successful
86    /// let s = reader.read_str().unwrap();
87    /// assert_eq!(s.as_ref(), "💖");
88    /// // Storing how many bytes were read with the first call for later use
89    /// let first_read_len = s.len();
90    /// // Second read gives us an error
91    /// let err = reader.read_str();
92    /// assert!(err.is_err());
93    /// let err = err.unwrap_err();
94    /// // Since the reader had to clear and re-fill its buffer, the error will contained leftover
95    /// // bytes
96    /// assert!(!err.leftovers().is_empty());
97    /// // We can still "manually" read from the reader, but any bytes read before clearing the
98    /// // inner buffer are "lost" (they are stored as leftovers in previously returned error)
99    /// let mut buf: [u8; 8] = Default::default();
100    /// // If the reader didn't had to clear its buffer, we should have read 3 bytes.
101    /// // But since it did, we have 2 bytes stored in the error, hence why we only read 1 byte
102    /// assert_eq!(1, reader.read(&mut buf).unwrap());
103    /// // The input was truncated to 7 bytes, and we did read all 7 bytes
104    /// assert_eq!(7, first_read_len + err.leftovers().len() + 1)
105    /// ```
106    ///
107    /// [`is_empty`]: slice::is_empty
108    /// [`BufRead`]: crate::BufRead
109    pub fn leftovers(&self) -> &[u8] {
110        match &self.repr {
111            Repr::Simple(_) => {
112                if let Some(l) = &self.leftovers {
113                    l.as_slice()
114                } else {
115                    &[]
116                }
117            }
118            Repr::Custom { inner, .. } => {
119                if let Some(e) = inner.downcast_ref::<FromUtf8Error>() {
120                    e.as_bytes()
121                } else {
122                    &[]
123                }
124            }
125        }
126    }
127
128    /// Returns the corresponding [`ErrorKind`] for this error.
129    ///
130    /// # Examples
131    ///
132    /// ```
133    /// use std::io::{Error, ErrorKind};
134    ///
135    /// fn print_error(err: Error) {
136    ///     println!("{:?}", err.kind());
137    /// }
138    ///
139    /// fn main() {
140    ///     // Will print "AddrInUse".
141    ///     print_error(Error::from(ErrorKind::AddrInUse));
142    /// }
143    /// ```
144    pub fn kind(&self) -> ErrorKind {
145        match self.repr {
146            Repr::Simple(kind) => kind,
147            Repr::Custom { kind, .. } => kind,
148        }
149    }
150
151    /// Consumes the `Error`, returning its inner error (if any).
152    ///
153    /// If this [`Error`] was constructed from an [`ErrorKind`], then this function will
154    /// return [`None`], otherwise it will return [`Some`].
155    ///
156    /// # Panics
157    ///
158    /// This function will panic if this error is holding "leftover" bytes.
159    ///
160    /// # Examples
161    ///
162    /// ```
163    /// use std::io::{self, ErrorKind};
164    /// use utf8_bufread::Error;
165    ///
166    /// fn print_error(err: Error) {
167    /// if let Some(inner_err) = err.into_inner() {
168    ///         println!("Inner error: {}", inner_err);
169    ///     } else {
170    ///         println!("No inner error");
171    ///     }
172    /// }
173    ///
174    /// fn main() {
175    ///     // Will print "No inner error".
176    ///     print_error(Error::from(ErrorKind::Other));
177    ///     // Will print "Inner error: ...".
178    ///     print_error(Error::from(io::Error::from(ErrorKind::AddrInUse)));
179    /// }
180    /// ```
181    pub fn into_inner(self) -> Option<Box<dyn error::Error + Send + Sync>> {
182        if let Ok(inner) = self.into_inner_checked() {
183            inner
184        } else {
185            panic!("This error is holding leftover bytes, we won't drop them !")
186        }
187    }
188
189    /// Consumes the `Error`, returning its inner error (if any).
190    ///
191    /// If this [`Error`] was constructed from an [`ErrorKind`] or is holding "leftover" bytes,
192    /// then this function will return [`None`], otherwise it will return [`Some`].
193    ///
194    /// # Examples
195    ///
196    /// ```
197    /// use std::io::{self, ErrorKind};
198    /// use utf8_bufread::Error;
199    ///
200    /// fn print_error(err: Error) {
201    ///     if let Some(inner_err) = err.into_inner_checked().ok().flatten() {
202    ///         println!("Inner error: {}", inner_err);
203    ///     } else {
204    ///         println!("No inner error, or transforming the error would cause data loss");
205    ///     }
206    /// }
207    ///
208    /// fn main() {
209    ///     // Will print "No inner error".
210    ///     print_error(Error::from(ErrorKind::Other));
211    ///     // Will print "Inner error: ...".
212    ///     print_error(Error::from(io::Error::from(ErrorKind::AddrInUse)));
213    /// }
214    /// ```
215    pub fn into_inner_checked(
216        self,
217    ) -> std::result::Result<Option<Box<dyn error::Error + Send + Sync>>, Self> {
218        if self.leftovers.is_some() {
219            Err(self)
220        } else {
221            match self.repr {
222                Repr::Simple(_) => Ok(None),
223                Repr::Custom { inner, .. } if !inner.is::<FromUtf8Error>() => Ok(Some(inner)),
224                // inner must be a `FromUtf8Error`, which has leftovers stored in it
225                Repr::Custom { .. } => Err(self),
226            }
227        }
228    }
229
230    /// Consumes the `Error`, returning its inner error (if any).
231    ///
232    /// If this [`Error`] was constructed from an [`ErrorKind`], then this function will
233    /// return [`None`], otherwise it will return [`Some`]. Any leftover bytes held by this error
234    /// are lost in the process.
235    ///
236    ///
237    /// # Examples
238    ///
239    /// ```
240    /// use std::io::{self, ErrorKind};
241    /// use utf8_bufread::Error;
242    ///
243    /// fn print_error(err: Error) {
244    ///     if let Some(inner_err) = err.into_inner() {
245    ///         println!("Inner error: {}", inner_err);
246    ///     } else {
247    ///         println!("No inner error");
248    ///     }
249    /// }
250    ///
251    /// fn main() {
252    ///     // Will print "No inner error".
253    ///     print_error(Error::from(ErrorKind::Other));
254    ///     // Will print "Inner error: ...".
255    ///     print_error(Error::from(io::Error::from(ErrorKind::AddrInUse)));
256    /// }
257    /// ```
258    pub fn into_inner_lossy(self) -> Option<Box<dyn error::Error + Send + Sync>> {
259        if let Repr::Custom { inner, .. } = self.repr {
260            Some(inner)
261        } else {
262            None
263        }
264    }
265}
266
267impl fmt::Debug for Error {
268    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
269        match &self.repr {
270            Repr::Simple(kind) => f
271                .debug_struct("Error")
272                .field("leftover bytes", &self.leftovers)
273                .field("kind", &kind)
274                .finish(),
275            Repr::Custom { inner, .. } => fmt::Debug::fmt(&inner, f),
276        }
277    }
278}
279
280impl fmt::Display for Error {
281    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
282        match &self.repr {
283            Repr::Simple(kind) => fmt::Display::fmt(&io::Error::from(*kind), f),
284            Repr::Custom { inner, .. } => fmt::Display::fmt(&inner, f),
285        }
286    }
287}
288
289impl From<ErrorKind> for Error {
290    fn from(kind: ErrorKind) -> Self {
291        Error {
292            leftovers: None,
293            repr: Repr::Simple(kind),
294        }
295    }
296}
297
298impl From<io::Error> for Error {
299    fn from(err: io::Error) -> Self {
300        Error {
301            leftovers: None,
302            repr: Repr::Custom {
303                kind: err.kind(),
304                inner: err.into(),
305            },
306        }
307    }
308}
309
310impl From<Utf8Error> for Error {
311    fn from(err: Utf8Error) -> Self {
312        Error {
313            leftovers: None,
314            repr: Repr::Custom {
315                kind: ErrorKind::InvalidData,
316                inner: err.into(),
317            },
318        }
319    }
320}
321
322impl From<std::string::FromUtf8Error> for Error {
323    fn from(err: std::string::FromUtf8Error) -> Self {
324        Error {
325            leftovers: None,
326            repr: Repr::Custom {
327                kind: ErrorKind::InvalidData,
328                inner: err.into(),
329            },
330        }
331    }
332}
333
334impl error::Error for Error {
335    fn source(&self) -> Option<&(dyn error::Error + 'static)> {
336        match &self.repr {
337            Repr::Simple(_) => None,
338            Repr::Custom { inner, .. } => inner.source(),
339        }
340    }
341}
342
343pub(crate) type Result<T> = std::result::Result<T, Error>;
344
345#[cfg(test)]
346mod with_leftovers_tests {
347    use crate::error::Repr;
348    use crate::Error;
349    use std::io::ErrorKind;
350
351    #[test]
352    #[should_panic]
353    fn double_call_with_leftovers() {
354        Error {
355            leftovers: None,
356            repr: Repr::Simple(ErrorKind::Interrupted),
357        }
358        .with_leftovers(Vec::new())
359        .with_leftovers(Vec::new());
360    }
361}