1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
use std::io::ErrorKind;
use std::str::Utf8Error;
use std::string::FromUtf8Error;
use std::{error, fmt, io};

/// The error type for operations of the [`BufRead`] trait and associated iterators.
///
/// It can be created from an [`io::Error`] or an [`ErrorKind`].
///
/// Instances of this error may contain some bytes that would have been lost otherwise.
///
/// # Examples
///
/// ```
/// use utf8_bufread::Error;
/// use std::io::ErrorKind;
///
/// let error = Error::from(ErrorKind::InvalidData);
/// ```
///
/// [`Utf8Error`]: std::str::Utf8Error
/// [`BufRead`]: crate::BufRead
// TODO: Add tests for Error
pub struct Error {
    leftovers: Option<Vec<u8>>,
    repr: Repr,
}

enum Repr {
    Simple(ErrorKind),
    Custom {
        kind: ErrorKind,
        inner: Box<dyn error::Error + Send + Sync>,
    },
}

impl Error {
    // TODO: Redirect to [`leftovers`] for more info about what we call leftovers

    pub(crate) fn with_leftovers(mut self, bytes: Vec<u8>) -> Self {
        debug_assert!(!bytes.is_empty());
        if self.leftovers.is_some() {
            panic!("This error already had leftover bytes assigned, we won't drop them !")
        }
        self.leftovers = Some(bytes);
        self
    }

    /// Get the "leftover" bytes stored in this error.
    ///
    /// Leftover bytes are bytes that were read from the inner reader of a type implementing
    /// [`io::BufRead`], before clearing the buffer and filling it again, in a call to one of
    /// [`BufRead`]'s functions that returned an error. This means that they form an invalid or
    /// incomplete codepoint but would be lost if not returned with this error, as the call cleared
    /// the buffer they were coming from.
    ///
    /// It is guaranteed that, if the error contains a non-zero amount of leftover bytes, the
    /// following read operation on the reader that returned the error will not return any of those
    /// bytes, nor "skip" bytes from the reader.
    ///
    /// It is also guaranteed that, if the error contains a non-zero amount of leftover bytes,
    /// their amount is of the expected length of a codepoint, based on the first invalid byte
    /// read, i.e. the first of the leftover bytes.
    ///
    /// If you want to be sure not to lose any bytes from the inner reader, you should check if
    /// the error is holding "leftovers" with `error.leftovers.`[`is_empty`]`()`.
    ///
    /// # Examples
    ///
    /// The following example plays with buffer capacity to purposefully trigger a read that will
    /// return an error holding leftover bytes. The user should not bother thinking about buffer
    /// capacity in most cases, so this example may be a bit harder to follow along.
    ///
    /// ```
    /// use std::io::{BufReader, Read};
    /// use utf8_bufread::BufRead;
    ///
    /// let input = "💖💖";
    /// assert_eq!(input.len(), 8);
    /// // The reader will successfully read the first codepoint, but trying to read the second one
    /// // will result in an error since '💖' is 4 byte long, and we only take the first 7 bytes.
    /// // Since the reader as a buffer capacity of 6, it will have to clear and refill its buffer
    /// // to attempt reading the incomplete codepoint, then fail.
    /// let mut reader = BufReader::with_capacity(6, &input.as_bytes()[..7]);
    /// // First read is successful
    /// let s = reader.read_str().unwrap();
    /// assert_eq!(s.as_ref(), "💖");
    /// // Storing how many bytes were read with the first call for later use
    /// let first_read_len = s.len();
    /// // Second read gives us an error
    /// let err = reader.read_str();
    /// assert!(err.is_err());
    /// let err = err.unwrap_err();
    /// // Since the reader had to clear and re-fill its buffer, the error will contained leftover
    /// // bytes
    /// assert!(!err.leftovers().is_empty());
    /// // We can still "manually" read from the reader, but any bytes read before clearing the
    /// // inner buffer are "lost" (they are stored as leftovers in previously returned error)
    /// let mut buf: [u8; 8] = Default::default();
    /// // If the reader didn't had to clear its buffer, we should have read 3 bytes.
    /// // But since it did, we have 2 bytes stored in the error, hence why we only read 1 byte
    /// assert_eq!(1, reader.read(&mut buf).unwrap());
    /// // The input was truncated to 7 bytes, and we did read all 7 bytes
    /// assert_eq!(7, first_read_len + err.leftovers().len() + 1)
    /// ```
    ///
    /// [`is_empty`]: slice::is_empty
    /// [`BufRead`]: crate::BufRead
    pub fn leftovers(&self) -> &[u8] {
        match &self.repr {
            Repr::Simple(_) => {
                if let Some(l) = &self.leftovers {
                    l.as_slice()
                } else {
                    &[]
                }
            }
            Repr::Custom { inner, .. } => {
                if let Some(e) = inner.downcast_ref::<FromUtf8Error>() {
                    e.as_bytes()
                } else {
                    &[]
                }
            }
        }
    }

    /// Returns the corresponding [`ErrorKind`] for this error.
    ///
    /// # Examples
    ///
    /// ```
    /// use std::io::{Error, ErrorKind};
    ///
    /// fn print_error(err: Error) {
    ///     println!("{:?}", err.kind());
    /// }
    ///
    /// fn main() {
    ///     // Will print "AddrInUse".
    ///     print_error(Error::from(ErrorKind::AddrInUse));
    /// }
    /// ```
    pub fn kind(&self) -> ErrorKind {
        match self.repr {
            Repr::Simple(kind) => kind,
            Repr::Custom { kind, .. } => kind,
        }
    }

    /// Consumes the `Error`, returning its inner error (if any).
    ///
    /// If this [`Error`] was constructed from an [`ErrorKind`], then this function will
    /// return [`None`], otherwise it will return [`Some`].
    ///
    /// # Panics
    ///
    /// This function will panic if this error is holding "leftover" bytes.
    ///
    /// # Examples
    ///
    /// ```
    /// use std::io::{self, ErrorKind};
    /// use utf8_bufread::Error;
    ///
    /// fn print_error(err: Error) {
    /// if let Some(inner_err) = err.into_inner() {
    ///         println!("Inner error: {}", inner_err);
    ///     } else {
    ///         println!("No inner error");
    ///     }
    /// }
    ///
    /// fn main() {
    ///     // Will print "No inner error".
    ///     print_error(Error::from(ErrorKind::Other));
    ///     // Will print "Inner error: ...".
    ///     print_error(Error::from(io::Error::from(ErrorKind::AddrInUse)));
    /// }
    /// ```
    pub fn into_inner(self) -> Option<Box<dyn error::Error + Send + Sync>> {
        if let Ok(inner) = self.into_inner_checked() {
            inner
        } else {
            panic!("This error is holding leftover bytes, we won't drop them !")
        }
    }

    /// Consumes the `Error`, returning its inner error (if any).
    ///
    /// If this [`Error`] was constructed from an [`ErrorKind`] or is holding "leftover" bytes,
    /// then this function will return [`None`], otherwise it will return [`Some`].
    ///
    /// # Examples
    ///
    /// ```
    /// use std::io::{self, ErrorKind};
    /// use utf8_bufread::Error;
    ///
    /// fn print_error(err: Error) {
    ///     if let Some(inner_err) = err.into_inner_checked().ok().flatten() {
    ///         println!("Inner error: {}", inner_err);
    ///     } else {
    ///         println!("No inner error, or transforming the error would cause data loss");
    ///     }
    /// }
    ///
    /// fn main() {
    ///     // Will print "No inner error".
    ///     print_error(Error::from(ErrorKind::Other));
    ///     // Will print "Inner error: ...".
    ///     print_error(Error::from(io::Error::from(ErrorKind::AddrInUse)));
    /// }
    /// ```
    pub fn into_inner_checked(
        self,
    ) -> std::result::Result<Option<Box<dyn error::Error + Send + Sync>>, Self> {
        if self.leftovers.is_some() {
            Err(self)
        } else {
            match self.repr {
                Repr::Simple(_) => Ok(None),
                Repr::Custom { inner, .. } if !inner.is::<FromUtf8Error>() => Ok(Some(inner)),
                // inner must be a `FromUtf8Error`, which has leftovers stored in it
                Repr::Custom { .. } => Err(self),
            }
        }
    }

    /// Consumes the `Error`, returning its inner error (if any).
    ///
    /// If this [`Error`] was constructed from an [`ErrorKind`], then this function will
    /// return [`None`], otherwise it will return [`Some`]. Any leftover bytes held by this error
    /// are lost in the process.
    ///
    ///
    /// # Examples
    ///
    /// ```
    /// use std::io::{self, ErrorKind};
    /// use utf8_bufread::Error;
    ///
    /// fn print_error(err: Error) {
    ///     if let Some(inner_err) = err.into_inner() {
    ///         println!("Inner error: {}", inner_err);
    ///     } else {
    ///         println!("No inner error");
    ///     }
    /// }
    ///
    /// fn main() {
    ///     // Will print "No inner error".
    ///     print_error(Error::from(ErrorKind::Other));
    ///     // Will print "Inner error: ...".
    ///     print_error(Error::from(io::Error::from(ErrorKind::AddrInUse)));
    /// }
    /// ```
    pub fn into_inner_lossy(self) -> Option<Box<dyn error::Error + Send + Sync>> {
        if let Repr::Custom { inner, .. } = self.repr {
            Some(inner)
        } else {
            None
        }
    }
}

impl fmt::Debug for Error {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match &self.repr {
            Repr::Simple(kind) => f
                .debug_struct("Error")
                .field("leftover bytes", &self.leftovers)
                .field("kind", &kind)
                .finish(),
            Repr::Custom { inner, .. } => fmt::Debug::fmt(&inner, f),
        }
    }
}

impl fmt::Display for Error {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match &self.repr {
            Repr::Simple(kind) => fmt::Display::fmt(&io::Error::from(*kind), f),
            Repr::Custom { inner, .. } => fmt::Display::fmt(&inner, f),
        }
    }
}

impl From<ErrorKind> for Error {
    fn from(kind: ErrorKind) -> Self {
        Error {
            leftovers: None,
            repr: Repr::Simple(kind),
        }
    }
}

impl From<io::Error> for Error {
    fn from(err: io::Error) -> Self {
        Error {
            leftovers: None,
            repr: Repr::Custom {
                kind: err.kind(),
                inner: err.into(),
            },
        }
    }
}

impl From<Utf8Error> for Error {
    fn from(err: Utf8Error) -> Self {
        Error {
            leftovers: None,
            repr: Repr::Custom {
                kind: ErrorKind::InvalidData,
                inner: err.into(),
            },
        }
    }
}

impl From<std::string::FromUtf8Error> for Error {
    fn from(err: std::string::FromUtf8Error) -> Self {
        Error {
            leftovers: None,
            repr: Repr::Custom {
                kind: ErrorKind::InvalidData,
                inner: err.into(),
            },
        }
    }
}

impl error::Error for Error {
    fn source(&self) -> Option<&(dyn error::Error + 'static)> {
        match &self.repr {
            Repr::Simple(_) => None,
            Repr::Custom { inner, .. } => inner.source(),
        }
    }
}

pub(crate) type Result<T> = std::result::Result<T, Error>;

#[cfg(test)]
mod with_leftovers_tests {
    use crate::error::Repr;
    use crate::Error;
    use std::io::ErrorKind;

    #[test]
    #[should_panic]
    fn double_call_with_leftovers() {
        Error {
            leftovers: None,
            repr: Repr::Simple(ErrorKind::Interrupted),
        }
        .with_leftovers(Vec::new())
        .with_leftovers(Vec::new());
    }
}