data_streams/
error.rs

1// Copyright 2024 - Strixpyrr
2// SPDX-License-Identifier: Apache-2.0
3
4#[cfg(feature = "alloc")]
5use alloc::collections::TryReserveError;
6#[cfg(feature = "unstable_ascii_char")]
7use core::ascii;
8#[cfg(feature = "utf8")]
9pub use simdutf8::compat::Utf8Error as SimdUtf8Error;
10use core::fmt::{Display, Formatter, Result as FmtResult};
11#[cfg(feature = "utf8")]
12use core::num::NonZeroU8;
13
14/// A stream error.
15#[derive(Debug)]
16#[non_exhaustive]
17pub enum Error {
18	/// An IO error.
19	#[cfg(feature = "std")]
20	Io(std::io::Error),
21	/// An invalid ASCII byte was encountered.
22	#[cfg(feature = "unstable_ascii_char")]
23	Ascii(AsciiError),
24	/// Invalid UTF-8 bytes were encountered.
25	#[cfg(feature = "utf8")]
26	Utf8(Utf8Error),
27	/// Error while attempting to reserve capacity.
28	#[cfg(feature = "alloc")]
29	Allocation(TryReserveError),
30	/// A sink reached a hard storage limit, causing an overflow while writing. An
31	/// example is a mutable slice, which can't write more bytes than its length.
32	Overflow {
33		/// The byte count remaining in the attempted read operation.
34		remaining: usize
35	},
36	/// Premature end-of-stream.
37	End {
38		/// The total required byte count.
39		required_count: usize
40	},
41	/// A "read to end" method was called on a source with no defined end.
42	NoEnd,
43	/// Buffer size is insufficient to buffer a read operation.
44	InsufficientBuffer {
45		/// The buffer's spare capacity.
46		spare_capacity: usize,
47		/// The total required byte count.
48		required_count: usize
49	},
50}
51
52impl Error {
53	/// Creates an ASCII error.
54	#[allow(clippy::missing_panics_doc)]
55	#[inline]
56	#[cfg(feature = "unstable_ascii_char")]
57	pub const fn invalid_ascii(invalid_byte: u8, valid_up_to: usize, consumed_count: usize) -> Self {
58		assert!(consumed_count >= valid_up_to, "at least `valid_up_to` bytes must be consumed");
59		Self::Ascii(AsciiError { invalid_byte, valid_up_to, consumed_count })
60	}
61	/// Creates an overflow error.
62	#[inline]
63	pub const fn overflow(remaining: usize) -> Self {
64		Self::Overflow { remaining }
65	}
66	/// Creates an end-of-stream error.
67	#[inline]
68	pub const fn end(required_count: usize) -> Self {
69		Self::End { required_count }
70	}
71	/// Creates an insufficient buffer capacity error.
72	#[inline]
73	pub const fn insufficient_buffer(spare_capacity: usize, required_count: usize) -> Self {
74		Self::InsufficientBuffer { spare_capacity, required_count }
75	}
76}
77
78#[allow(clippy::std_instead_of_core, reason = "Error trait in core is not yet stable")]
79#[cfg(feature = "std")]
80impl std::error::Error for Error {
81	fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
82		match self {
83			Self::Io(error) => Some(error),
84			#[cfg(feature = "unstable_ascii_char")]
85			Self::Ascii(_) => None,
86			#[cfg(feature = "utf8")]
87			Self::Utf8(error) => error.source(),
88			#[cfg(feature = "alloc")]
89			Self::Allocation(error) => Some(error),
90			Self::Overflow { .. } |
91			Self::End { .. } |
92			Self::NoEnd |
93			Self::InsufficientBuffer { .. } => None,
94		}
95	}
96}
97
98impl Display for Error {
99	fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
100		match self {
101			#[cfg(feature = "std")]
102			Self::Io(error) => Display::fmt(error, f),
103			#[cfg(feature = "unstable_ascii_char")]
104			Self::Ascii(error) => Display::fmt(error, f),
105			#[cfg(feature = "utf8")]
106			Self::Utf8(error) => Display::fmt(error, f),
107			#[cfg(feature = "alloc")]
108			Self::Allocation(error) => Display::fmt(error, f),
109			Self::Overflow { remaining } => write!(f, "sink overflowed with {remaining} bytes remaining to write"),
110			Self::End { required_count } => write!(f, "premature end-of-stream when reading {required_count} bytes"),
111			Self::NoEnd => write!(f, "cannot read to end of infinite source"),
112			Self::InsufficientBuffer {
113				spare_capacity, required_count
114			} => write!(f, "insufficient buffer capacity ({spare_capacity}) to read {required_count} bytes"),
115		}
116	}
117}
118
119#[cfg(feature = "std")]
120impl From<std::io::Error> for Error {
121	#[inline]
122	fn from(value: std::io::Error) -> Self {
123		Self::Io(value)
124	}
125}
126
127#[cfg(feature = "utf8")]
128impl From<SimdUtf8Error> for Error {
129	#[inline]
130	fn from(value: SimdUtf8Error) -> Self {
131		Self::Utf8(value.into())
132	}
133}
134
135#[cfg(feature = "utf8")]
136impl From<Utf8Error> for Error {
137	#[inline]
138	fn from(value: Utf8Error) -> Self {
139		Self::Utf8(value)
140	}
141}
142
143#[cfg(feature = "unstable_ascii_char")]
144impl From<AsciiError> for Error {
145	#[inline]
146	fn from(value: AsciiError) -> Self {
147		Self::Ascii(value)
148	}
149}
150
151#[cfg(feature = "alloc")]
152impl From<TryReserveError> for Error {
153	#[inline]
154	fn from(value: TryReserveError) -> Self {
155		Self::Allocation(value)
156	}
157}
158
159#[cfg(feature = "utf8")]
160#[derive(Copy, Clone, Debug, Eq, PartialEq)]
161pub struct Utf8Error {
162	offset: usize,
163	inner: SimdUtf8Error,
164}
165
166/// A kind of UTF-8 error.
167#[allow(clippy::exhaustive_enums)]
168#[cfg(feature = "utf8")]
169#[derive(Copy, Clone, Debug, Eq, PartialEq)]
170pub enum Utf8ErrorKind {
171	/// The end of the input was reached while reading a character.
172	IncompleteChar,
173	/// The next `1` to `4` bytes are invalid.
174	InvalidBytes(NonZeroU8),
175}
176
177#[cfg(feature = "utf8")]
178impl Utf8Error {
179	/// Returns the index in the input to which valid UTF-8 was verified before the
180	/// last error.
181	#[inline]
182	#[must_use]
183	pub const fn offset(&self) -> usize { self.offset }
184	/// Returns the index in the input to which valid UTF-8 was verified.
185	#[inline]
186	#[must_use]
187	pub fn valid_up_to(&self) -> usize {
188		self.offset + self.inner.valid_up_to()
189	}
190	/// Returns the length, in range `1..=3`, of the invalid byte sequence, if any.
191	/// Reading may continue with these removed. If `None` is returned, an incomplete
192	/// character sequence was encountered. This could be a valid character whose
193	/// sequence spans multiple buffer chunks.
194	#[inline]
195	#[must_use]
196	pub fn error_len(&self) -> Option<usize> {
197		self.inner.error_len()
198	}
199	/// Returns the last [`Utf8Error`](SimdUtf8Error) without the offset. Calling
200	/// [`valid_up_to`] may be meaningless, because multiple UTF-8 validations may
201	/// have taken place while reading.
202	/// 
203	/// [`valid_up_to`]: SimdUtf8Error::valid_up_to
204	#[inline]
205	#[must_use]
206	pub const fn last_error(&self) -> SimdUtf8Error { self.inner }
207	/// Returns the kind of error encountered.
208	#[inline]
209	#[must_use]
210	pub fn error_kind(&self) -> Utf8ErrorKind {
211		match self.inner.error_len() {
212			Some(len) => Utf8ErrorKind::InvalidBytes(
213				// Safety: core::str::from_utf8 (used by simdutf8 to get the error)
214				// never returns an error_len outside the range 1..=3, so the cast
215				// never truncates and conversion to non-zero is safe.
216				unsafe {
217					NonZeroU8::new_unchecked(len as u8)
218				}
219			),
220			None => Utf8ErrorKind::IncompleteChar
221		}
222	}
223	/// Returns the validated part of a slice as UTF-8, assuming it has identical
224	/// contents from the slice which produced the error.
225	///
226	/// # Safety
227	///
228	/// The slice length and contents must be identical to the slice which produced
229	/// the error. Passing a shorter and/or unvalidated slice may cause UB, because
230	/// it may index out-of-bounds or invalidate the result.
231	///
232	/// For a safe alternative, use [`valid_slice`](Self::valid_slice).
233	#[must_use]
234	pub unsafe fn valid_slice_unchecked<'a>(&self, bytes: &'a [u8]) -> &'a str {
235		core::str::from_utf8_unchecked(bytes.get_unchecked(..self.valid_up_to()))
236	}
237	/// Splits a slice at the valid UTF-8 index, returning the first slice as a
238	/// string.
239	/// 
240	/// # Safety
241	/// 
242	/// The caller promises the slice has exactly the same contents and length as
243	/// the slice passed to the method which produced the error. Passing another
244	/// slice may cause undefined behavior, such as the string containing invalid
245	/// UTF-8, or reading out-of-bounds if the slice is shorter than the valid
246	/// length.
247	pub unsafe fn split_valid<'a>(&self, bytes: &'a [u8]) -> (&'a str, &'a [u8]) {
248		let (valid, invalid) = bytes.split_at_unchecked(self.valid_up_to());
249		(core::str::from_utf8_unchecked(valid), invalid)
250	}
251	/// Splits a mutable slice at the valid UTF-8 index, returning the first slice
252	/// as a string.
253	/// 
254	/// # Safety
255	///
256	/// The caller promises the slice has exactly the same contents and length as
257	/// the slice passed to the method which produced the error. Passing another
258	/// slice may cause undefined behavior, such as the string containing invalid
259	/// UTF-8, or reading out-of-bounds if the slice is shorter than the valid
260	/// length.
261	pub unsafe fn split_valid_mut<'a>(&self, bytes: &'a mut [u8]) -> (&'a mut str, &'a mut [u8]) {
262		let (valid, invalid) = bytes.split_at_mut_unchecked(self.valid_up_to());
263		(core::str::from_utf8_unchecked_mut(valid), invalid)
264	}
265}
266
267#[cfg(feature = "utf8")]
268impl Utf8Error {
269	#[cfg(any(feature = "unstable_specialization", feature = "alloc"))]
270	pub(crate) fn set_offset(&mut self, offset: usize) {
271		self.offset += offset;
272	}
273	#[cfg(feature = "unstable_specialization")]
274	pub(crate) fn with_offset(mut self, offset: usize) -> Self {
275		self.set_offset(offset);
276		self
277	}
278}
279
280#[allow(clippy::std_instead_of_core, reason = "Error trait in core is not yet stable")]
281#[cfg(all(feature = "std", feature = "utf8"))]
282impl std::error::Error for Utf8Error {
283	fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
284		Some(&self.inner)
285	}
286}
287
288#[cfg(feature = "utf8")]
289impl Display for Utf8Error {
290	fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
291		let valid_up_to = self.valid_up_to();
292		match self.error_len() {
293			Some(len) => write!(f, "invalid UTF-8 sequence of {len} bytes from index {valid_up_to}"),
294			None => write!(f, "incomplete UTF-8 byte sequence from index {valid_up_to}")
295		}
296	}
297}
298
299#[cfg(feature = "utf8")]
300impl From<SimdUtf8Error> for Utf8Error {
301	#[inline]
302	fn from(inner: SimdUtf8Error) -> Self {
303		Self { offset: 0, inner }
304	}
305}
306
307#[cfg(feature = "unstable_ascii_char")]
308#[derive(Copy, Clone, Debug, Eq, PartialEq)]
309pub struct AsciiError {
310	/// The invalid byte, in range `128..256`.
311	pub invalid_byte: u8,
312	/// The index of the invalid byte to which the input was valid ASCII.
313	pub valid_up_to: usize,
314	/// The number of bytes consumed from the source, including any unchecked bytes
315	/// after the invalid byte.
316	pub consumed_count: usize,
317}
318
319#[cfg(feature = "unstable_ascii_char")]
320impl AsciiError {
321	/// Returns the invalid byte.
322	#[inline]
323	#[must_use]
324	pub const fn invalid_byte(&self) -> u8 { self.invalid_byte }
325	/// Returns the index in the input to which valid ASCII was verified.
326	#[inline]
327	#[must_use]
328	pub const fn valid_up_to(&self) -> usize { self.valid_up_to }
329	/// Returns the total number of bytes consumed from the source, including any
330	/// unchecked bytes after the invalid byte.
331	#[inline]
332	#[must_use]
333	pub const fn consumed_count(&self) -> usize { self.consumed_count }
334	/// Returns the number of bytes consumed from the source after the invalid byte
335	/// which haven't been checked.
336	#[inline]
337	#[must_use]
338	pub const fn unchecked_count(&self) -> usize { self.consumed_count.saturating_sub(1 + self.valid_up_to) }
339	/// Returns the validated part of a slice as ASCII.
340	/// 
341	/// # Panics
342	/// 
343	/// Panics if the slice does not contain valid bytes up to the valid length in
344	/// the error.
345	#[must_use]
346	pub fn valid_slice<'a>(&self, bytes: &'a [u8]) -> &'a [ascii::Char] {
347		assert!(bytes.len() >= self.valid_up_to, "the slice must contain at least `valid_up_to` bytes");
348		assert!(bytes[..self.valid_up_to].is_ascii(), "the valid slice must be ASCII");
349		// Safety: the invariants were checked by the above assertions.
350		unsafe {
351			self.valid_slice_unchecked(bytes)
352		}
353	}
354	/// Returns the validated part of a slice as ASCII, assuming it has identical
355	/// contents from the slice which produced the error.
356	/// 
357	/// # Safety
358	/// 
359	/// The slice length and contents must be identical to the slice which produced
360	/// the error. Passing a shorter and/or unvalidated slice may cause UB, because
361	/// it may index out-of-bounds or invalidate the result.
362	/// 
363	/// For a safe alternative, use [`valid_slice`](Self::valid_slice).
364	#[must_use]
365	pub unsafe fn valid_slice_unchecked<'a>(&self, bytes: &'a [u8]) -> &'a [ascii::Char] {
366		bytes.get_unchecked(..self.valid_up_to).as_ascii_unchecked()
367	}
368	/// Splits a slice at the valid ASCII index, returning the first slice as an
369	/// [`ascii::Char`] slice.
370	/// 
371	/// # Panics
372	/// 
373	/// Panics if the slice does not contain valid bytes up to the valid length in
374	/// the error, or if shorter than the consumed count.
375	#[must_use]
376	pub fn split_valid<'a>(&self, bytes: &'a [u8]) -> (&'a [ascii::Char], &'a [u8]) {
377		assert!(self.consumed_count >= self.valid_up_to, "at least `valid_up_to` bytes must be consumed");
378		assert!(bytes.len() >= self.consumed_count, "the slice length must be longer than the consumed count");
379		assert!(bytes[..self.valid_up_to].is_ascii(), "the valid slice must be ASCII");
380		// Safety: the invariants were checked by the above assertions.
381		unsafe {
382			self.split_valid_unchecked(bytes)
383		}
384	}
385	/// Splits a slice at the valid ASCII index, returning the first slice as an
386	/// [`ascii::Char`] slice. Assumes the slice has identical contents from the
387	/// slice which produced the error.
388	///
389	/// # Safety
390	///
391	/// The slice length and contents must be identical to the slice which produced
392	/// the error. Passing a shorter and/or unvalidated slice may cause UB, because
393	/// it may index out-of-bounds or invalidate the result.
394	/// 
395	/// For a safe alternative, use [`split_valid`](Self::split_valid).
396	#[must_use]
397	pub unsafe fn split_valid_unchecked<'a>(&self, bytes: &'a [u8]) -> (&'a [ascii::Char], &'a [u8]) {
398		(self.valid_slice_unchecked(bytes),
399		 bytes.get_unchecked(self.valid_up_to..self.consumed_count))
400	}
401}
402
403#[allow(clippy::std_instead_of_core, reason = "Error trait in core is not yet stable")]
404#[cfg(all(feature = "std", feature = "unstable_ascii_char"))]
405impl std::error::Error for AsciiError { }
406
407#[cfg(feature = "unstable_ascii_char")]
408impl Display for AsciiError {
409	fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
410		let Self { invalid_byte, valid_up_to, .. } = self;
411		write!(f, "non-ASCII byte {invalid_byte:#X} at index {valid_up_to}")
412	}
413}