1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361
use std::io::ErrorKind; use std::str::Utf8Error; use std::string::FromUtf8Error; use std::{error, fmt, io}; /// The error type for operations of the [`BufRead`] trait and associated iterators. /// /// It can be created from an [`io::Error`] or an [`ErrorKind`]. /// /// Instances of this error may contain some bytes that would have been lost otherwise. /// /// # Examples /// /// ``` /// use utf8_bufread::Error; /// use std::io::ErrorKind; /// /// let error = Error::from(ErrorKind::InvalidData); /// ``` /// /// [`Utf8Error`]: std::str::Utf8Error /// [`BufRead`]: crate::BufRead // TODO: Add tests for Error pub struct Error { leftovers: Option<Vec<u8>>, repr: Repr, } enum Repr { Simple(ErrorKind), Custom { kind: ErrorKind, inner: Box<dyn error::Error + Send + Sync>, }, } impl Error { // TODO: Redirect to [`leftovers`] for more info about what we call leftovers pub(crate) fn with_leftovers(mut self, bytes: Vec<u8>) -> Self { debug_assert!(!bytes.is_empty()); if self.leftovers.is_some() { panic!("This error already had leftover bytes assigned, we won't drop them !") } self.leftovers = Some(bytes); self } /// Get the "leftover" bytes stored in this error. /// /// Leftover bytes are bytes that were read from the inner reader of a type implementing /// [`io::BufRead`], before clearing the buffer and filling it again, in a call to one of /// [`BufRead`]'s functions that returned an error. This means that they form an invalid or /// incomplete codepoint but would be lost if not returned with this error, as the call cleared /// the buffer they were coming from. /// /// It is guaranteed that, if the error contains a non-zero amount of leftover bytes, the /// following read operation on the reader that returned the error will not return any of those /// bytes, nor "skip" bytes from the reader. /// /// It is also guaranteed that, if the error contains a non-zero amount of leftover bytes, /// their amount is of the expected length of a codepoint, based on the first invalid byte /// read, i.e. the first of the leftover bytes. /// /// If you want to be sure not to lose any bytes from the inner reader, you should check if /// the error is holding "leftovers" with `error.leftovers.`[`is_empty`]`()`. /// /// # Examples /// /// The following example plays with buffer capacity to purposefully trigger a read that will /// return an error holding leftover bytes. The user should not bother thinking about buffer /// capacity in most cases, so this example may be a bit harder to follow along. /// /// ``` /// use std::io::{BufReader, Read}; /// use utf8_bufread::BufRead; /// /// let input = "💖💖"; /// assert_eq!(input.len(), 8); /// // The reader will successfully read the first codepoint, but trying to read the second one /// // will result in an error since '💖' is 4 byte long, and we only take the first 7 bytes. /// // Since the reader as a buffer capacity of 6, it will have to clear and refill its buffer /// // to attempt reading the incomplete codepoint, then fail. /// let mut reader = BufReader::with_capacity(6, &input.as_bytes()[..7]); /// // First read is successful /// let s = reader.read_str().unwrap(); /// assert_eq!(s.as_ref(), "💖"); /// // Storing how many bytes were read with the first call for later use /// let first_read_len = s.len(); /// // Second read gives us an error /// let err = reader.read_str(); /// assert!(err.is_err()); /// let err = err.unwrap_err(); /// // Since the reader had to clear and re-fill its buffer, the error will contained leftover /// // bytes /// assert!(!err.leftovers().is_empty()); /// // We can still "manually" read from the reader, but any bytes read before clearing the /// // inner buffer are "lost" (they are stored as leftovers in previously returned error) /// let mut buf: [u8; 8] = Default::default(); /// // If the reader didn't had to clear its buffer, we should have read 3 bytes. /// // But since it did, we have 2 bytes stored in the error, hence why we only read 1 byte /// assert_eq!(1, reader.read(&mut buf).unwrap()); /// // The input was truncated to 7 bytes, and we did read all 7 bytes /// assert_eq!(7, first_read_len + err.leftovers().len() + 1) /// ``` /// /// [`is_empty`]: slice::is_empty /// [`BufRead`]: crate::BufRead pub fn leftovers(&self) -> &[u8] { match &self.repr { Repr::Simple(_) => { if let Some(l) = &self.leftovers { l.as_slice() } else { &[] } } Repr::Custom { inner, .. } => { if let Some(e) = inner.downcast_ref::<FromUtf8Error>() { e.as_bytes() } else { &[] } } } } /// Returns the corresponding [`ErrorKind`] for this error. /// /// # Examples /// /// ``` /// use std::io::{Error, ErrorKind}; /// /// fn print_error(err: Error) { /// println!("{:?}", err.kind()); /// } /// /// fn main() { /// // Will print "AddrInUse". /// print_error(Error::from(ErrorKind::AddrInUse)); /// } /// ``` pub fn kind(&self) -> ErrorKind { match self.repr { Repr::Simple(kind) => kind, Repr::Custom { kind, .. } => kind, } } /// Consumes the `Error`, returning its inner error (if any). /// /// If this [`Error`] was constructed from an [`ErrorKind`], then this function will /// return [`None`], otherwise it will return [`Some`]. /// /// # Panics /// /// This function will panic if this error is holding "leftover" bytes. /// /// # Examples /// /// ``` /// use std::io::{self, ErrorKind}; /// use utf8_bufread::Error; /// /// fn print_error(err: Error) { /// if let Some(inner_err) = err.into_inner() { /// println!("Inner error: {}", inner_err); /// } else { /// println!("No inner error"); /// } /// } /// /// fn main() { /// // Will print "No inner error". /// print_error(Error::from(ErrorKind::Other)); /// // Will print "Inner error: ...". /// print_error(Error::from(io::Error::from(ErrorKind::AddrInUse))); /// } /// ``` pub fn into_inner(self) -> Option<Box<dyn error::Error + Send + Sync>> { if let Ok(inner) = self.into_inner_checked() { inner } else { panic!("This error is holding leftover bytes, we won't drop them !") } } /// Consumes the `Error`, returning its inner error (if any). /// /// If this [`Error`] was constructed from an [`ErrorKind`] or is holding "leftover" bytes, /// then this function will return [`None`], otherwise it will return [`Some`]. /// /// # Examples /// /// ``` /// use std::io::{self, ErrorKind}; /// use utf8_bufread::Error; /// /// fn print_error(err: Error) { /// if let Some(inner_err) = err.into_inner_checked().ok().flatten() { /// println!("Inner error: {}", inner_err); /// } else { /// println!("No inner error, or transforming the error would cause data loss"); /// } /// } /// /// fn main() { /// // Will print "No inner error". /// print_error(Error::from(ErrorKind::Other)); /// // Will print "Inner error: ...". /// print_error(Error::from(io::Error::from(ErrorKind::AddrInUse))); /// } /// ``` pub fn into_inner_checked( self, ) -> std::result::Result<Option<Box<dyn error::Error + Send + Sync>>, Self> { if self.leftovers.is_some() { Err(self) } else { match self.repr { Repr::Simple(_) => Ok(None), Repr::Custom { inner, .. } if !inner.is::<FromUtf8Error>() => Ok(Some(inner)), // inner must be a `FromUtf8Error`, which has leftovers stored in it Repr::Custom { .. } => Err(self), } } } /// Consumes the `Error`, returning its inner error (if any). /// /// If this [`Error`] was constructed from an [`ErrorKind`], then this function will /// return [`None`], otherwise it will return [`Some`]. Any leftover bytes held by this error /// are lost in the process. /// /// /// # Examples /// /// ``` /// use std::io::{self, ErrorKind}; /// use utf8_bufread::Error; /// /// fn print_error(err: Error) { /// if let Some(inner_err) = err.into_inner() { /// println!("Inner error: {}", inner_err); /// } else { /// println!("No inner error"); /// } /// } /// /// fn main() { /// // Will print "No inner error". /// print_error(Error::from(ErrorKind::Other)); /// // Will print "Inner error: ...". /// print_error(Error::from(io::Error::from(ErrorKind::AddrInUse))); /// } /// ``` pub fn into_inner_lossy(self) -> Option<Box<dyn error::Error + Send + Sync>> { if let Repr::Custom { inner, .. } = self.repr { Some(inner) } else { None } } } impl fmt::Debug for Error { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match &self.repr { Repr::Simple(kind) => f .debug_struct("Error") .field("leftover bytes", &self.leftovers) .field("kind", &kind) .finish(), Repr::Custom { inner, .. } => fmt::Debug::fmt(&inner, f), } } } impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match &self.repr { Repr::Simple(kind) => fmt::Display::fmt(&io::Error::from(*kind), f), Repr::Custom { inner, .. } => fmt::Display::fmt(&inner, f), } } } impl From<ErrorKind> for Error { fn from(kind: ErrorKind) -> Self { Error { leftovers: None, repr: Repr::Simple(kind), } } } impl From<io::Error> for Error { fn from(err: io::Error) -> Self { Error { leftovers: None, repr: Repr::Custom { kind: err.kind(), inner: err.into(), }, } } } impl From<Utf8Error> for Error { fn from(err: Utf8Error) -> Self { Error { leftovers: None, repr: Repr::Custom { kind: ErrorKind::InvalidData, inner: err.into(), }, } } } impl From<std::string::FromUtf8Error> for Error { fn from(err: std::string::FromUtf8Error) -> Self { Error { leftovers: None, repr: Repr::Custom { kind: ErrorKind::InvalidData, inner: err.into(), }, } } } impl error::Error for Error { fn source(&self) -> Option<&(dyn error::Error + 'static)> { match &self.repr { Repr::Simple(_) => None, Repr::Custom { inner, .. } => inner.source(), } } } pub(crate) type Result<T> = std::result::Result<T, Error>; #[cfg(test)] mod with_leftovers_tests { use crate::error::Repr; use crate::Error; use std::io::ErrorKind; #[test] #[should_panic] fn double_call_with_leftovers() { Error { leftovers: None, repr: Repr::Simple(ErrorKind::Interrupted), } .with_leftovers(Vec::new()) .with_leftovers(Vec::new()); } }