data_streams/
lib.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! This crate provides stream traits for conveniently read and writing many data types: bytes,
4//! little or big-endian integers, and UTF-8 strings. [`DataSource`] reads from a stream, [`DataSink`]
5//! writes to a stream.
6//!
7//! Implementations for byte slices and `std::io`'s buffered readers and writers are provided, but
8//! it's easy to write your own implementations:
9//!
10//! ```ignore
11//! # use data_streams::{DataSource, DataSink, Result};
12//!
13//! struct MySource {
14//!     buffer: Vec<u8>,
15//!     // ...
16//! }
17//!
18//! impl DataSource for MySource {
19//!     fn available(&self) -> usize {
20//!         self.buffer.len()
21//!     }
22//!
23//!     fn request(&mut self, count: usize) -> Result<bool> {
24//!         if self.available() < count {
25//!             // Fill the buffer...
26//!         }
27//!
28//!         Ok(self.available() >= count)
29//!     }
30//!
31//!     fn read_bytes<'a>(&mut self, buf: &'a mut [u8]) -> Result<&'a [u8]> {
32//!         let count = self.available().min(buf.len());
33//!         buf[..count].copy_from_slice(&self.buffer);
34//!         self.buffer.drain(..count);
35//!         Ok(&buf[..count])
36//!     }
37//!
38//!     fn read_utf8_to_end<'a>(&mut self, buf: &'a mut String) -> Result<&'a str> {
39//!         self.read_utf8(self.available(), buf)
40//!     }
41//! }
42//!
43//! struct MySink {
44//!     buffer: Vec<u8>,
45//!     // ...
46//! }
47//!
48//! impl DataSink for MySink {
49//!     fn write_bytes(&mut self, buf: &[u8]) -> Result {
50//!         self.buffer.extend_from_slice(buf);
51//!         // Flush the buffer?
52//!         Ok(())
53//!     }
54//! }
55//! ```
56
57#![cfg_attr(not(feature = "std"), no_std)]
58
59#[cfg(not(feature = "std"))]
60extern crate core;
61#[cfg(feature = "alloc")]
62extern crate alloc;
63
64mod slice;
65mod std_io;
66
67#[cfg(feature = "alloc")]
68use alloc::{string::String, vec::Vec};
69use core::fmt;
70#[cfg(feature = "std")]
71use std::io;
72use bytemuck::{bytes_of, bytes_of_mut, Pod};
73use num_traits::PrimInt;
74#[cfg(feature = "alloc")]
75use simdutf8::compat::Utf8Error;
76
77#[derive(Debug)]
78pub enum Error {
79	#[cfg(feature = "std")]
80	Io(io::Error),
81	#[cfg(feature = "alloc")]
82	Utf8(Utf8Error),
83	End {
84		required_count: usize
85	},
86}
87
88#[cfg(feature = "std")]
89impl std::error::Error for Error {
90	fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
91		match self {
92			#[cfg(feature = "std")]
93			Self::Io(error) => Some(error),
94			#[cfg(feature = "alloc")]
95			Self::Utf8(error) => Some(error),
96			Self::End { .. } => None,
97		}
98	}
99}
100
101impl fmt::Display for Error {
102	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
103		match self {
104			#[cfg(feature = "std")]
105			Self::Io(error) => fmt::Display::fmt(error, f),
106			#[cfg(feature = "alloc")]
107			Self::Utf8(error) => fmt::Display::fmt(error, f),
108			Self::End { required_count } => write!(f, "premature end-of-stream when reading {required_count} bytes"),
109		}
110	}
111}
112
113#[cfg(feature = "std")]
114impl From<io::Error> for Error {
115	fn from(value: io::Error) -> Self {
116		Self::Io(value)
117	}
118}
119
120#[cfg(feature = "alloc")]
121impl From<Utf8Error> for Error {
122	fn from(value: Utf8Error) -> Self {
123		Self::Utf8(value)
124	}
125}
126
127pub type Result<T = (), E = Error> = core::result::Result<T, E>;
128
129/// A source stream of data.
130pub trait DataSource {
131	/// Returns the number of bytes available for reading.
132	fn available(&self) -> usize;
133	/// Reads at most `count` bytes into an internal buffer, returning whether
134	/// enough bytes are available. To return an end-of-stream error, use [`require`]
135	/// instead.
136	///
137	/// Note that a request returning `false` doesn't necessarily mean the stream
138	/// has ended. More bytes may be read after.
139	///
140	/// [`require`]: Self::require
141	fn request(&mut self, count: usize) -> Result<bool>;
142	/// Reads at least `count` bytes into an internal buffer, returning the available
143	/// count if successful, or an end-of-stream error if not. For a softer version
144	/// that returns whether enough bytes are available, use [`request`].
145	///
146	/// [`request`]: Self::request
147	fn require(&mut self, count: usize) -> Result {
148		if self.request(count)? {
149			Ok(())
150		} else {
151			Err(Error::End { required_count: count })
152		}
153	}
154
155	/// Reads bytes into a slice, returning the bytes read.
156	fn read_bytes<'a>(&mut self, buf: &'a mut [u8]) -> Result<&'a [u8]>;
157	/// Reads the exact length of bytes into a slice, returning the bytes read if
158	/// successful, or an end-of-stream error if not. Bytes are not consumed if an
159	/// end-of-stream error is returned.
160	fn read_exact_bytes<'a>(&mut self, buf: &'a mut [u8]) -> Result<&'a [u8]> {
161		let len = buf.len();
162		self.require(len)?;
163		let bytes = self.read_bytes(buf)?;
164		assert_eq!(bytes.len(), len);
165		Ok(bytes)
166	}
167	/// Reads an array with a size of `N` bytes.
168	fn read_array<const N: usize>(&mut self) -> Result<[u8; N]> where Self: Sized {
169		let mut array = [0; N];
170		self.read_exact_bytes(&mut array)?;
171		Ok(array)
172	}
173
174	/// Reads a [`u8`].
175	fn read_u8(&mut self) -> Result<u8> { self.read_int_be_spec() }
176	/// Reads an [`i8`].
177	fn read_i8(&mut self) -> Result<i8> { self.read_int_be_spec() }
178	/// Reads a big-endian [`u16`].
179	fn read_u16(&mut self) -> Result<u16> { self.read_int_be_spec() }
180	/// Reads a big-endian [`i16`].
181	fn read_i16(&mut self) -> Result<i16> { self.read_int_be_spec() }
182	/// Reads a little-endian [`u16`].
183	fn read_u16_le(&mut self) -> Result<u16> { self.read_int_le_spec() }
184	/// Reads a little-endian [`i16`].
185	fn read_i16_le(&mut self) -> Result<i16> { self.read_int_le_spec() }
186	/// Reads a big-endian [`u32`].
187	fn read_u32(&mut self) -> Result<u32> { self.read_int_be_spec() }
188	/// Reads a big-endian [`i32`].
189	fn read_i32(&mut self) -> Result<i32> { self.read_int_be_spec() }
190	/// Reads a little-endian [`u32`].
191	fn read_u32_le(&mut self) -> Result<u32> { self.read_int_le_spec() }
192	/// Reads a little-endian [`i32`].
193	fn read_i32_le(&mut self) -> Result<i32> { self.read_int_le_spec() }
194	/// Reads a big-endian [`u64`].
195	fn read_u64(&mut self) -> Result<u64> { self.read_int_be_spec() }
196	/// Reads a big-endian [`i64`].
197	fn read_i64(&mut self) -> Result<i64> { self.read_int_be_spec() }
198	/// Reads a little-endian [`u64`].
199	fn read_u64_le(&mut self) -> Result<u64> { self.read_int_le_spec() }
200	/// Reads a little-endian [`i64`].
201	fn read_i64_le(&mut self) -> Result<i64> { self.read_int_le_spec() }
202	/// Reads a big-endian [`u128`].
203	fn read_u128(&mut self) -> Result<u128> { self.read_int_be_spec() }
204	/// Reads a big-endian [`i128`].
205	fn read_i128(&mut self) -> Result<i128> { self.read_int_be_spec() }
206	/// Reads a little-endian [`u128`].
207	fn read_u128_le(&mut self) -> Result<u128> { self.read_int_le_spec() }
208	/// Reads a little-endian [`i128`].
209	fn read_i128_le(&mut self) -> Result<i128> { self.read_int_le_spec() }
210	/// Reads a big-endian [`usize`]. To make streams consistent across platforms,
211	/// [`usize`] is fixed to the size of [`u64`] regardless of the target platform.
212	fn read_usize(&mut self) -> Result<usize> {
213		self.read_u64().map(|i| i as usize)
214	}
215	/// Reads a big-endian [`isize`]. To make streams consistent across platforms,
216	/// [`isize`] is fixed to the size of [`i64`] regardless of the target platform.
217	fn read_isize(&mut self) -> Result<isize> {
218		self.read_i64().map(|i| i as isize)
219	}
220	/// Reads a little-endian [`usize`]. To make streams consistent across platforms,
221	/// [`usize`] is fixed to the size of [`u64`] regardless of the target platform.
222	fn read_usize_le(&mut self) -> Result<usize> {
223		self.read_u64_le().map(|i| i as usize)
224	}
225	/// Reads a little-endian [`isize`]. To make streams consistent across platforms,
226	/// [`isize`] is fixed to the size of [`i64`] regardless of the target platform.
227	fn read_isize_le(&mut self) -> Result<isize> {
228		self.read_i64_le().map(|i| i as isize)
229	}
230
231	/// Reads a big-endian integer.
232	fn read_int<T: PrimInt + Pod>(&mut self) -> Result<T> where Self: Sized {
233		self.read_int_be_spec()
234	}
235	/// Reads a little-endian integer.
236	fn read_int_le<T: PrimInt + Pod>(&mut self) -> Result<T> where Self: Sized {
237		self.read_int_le_spec()
238	}
239
240	/// Reads a value of generic type `T` supporting an arbitrary bit pattern. See
241	/// [`Pod`].
242	fn read_data<T: Pod>(&mut self) -> Result<T> where Self: Sized {
243		self.read_data_spec()
244	}
245
246	/// Reads up to `count` bytes of UTF-8 into `buf`, returning the string read.
247	/// If invalid bytes are encountered, an error is returned and `buf` is unchanged.
248	/// In this case, the stream is left in a state with up to `count` bytes consumed
249	/// from it, including the invalid bytes and any subsequent bytes.
250	#[cfg(feature = "alloc")]
251	fn read_utf8<'a>(&mut self, count: usize, buf: &'a mut String) -> Result<&'a str> {
252		buf.reserve(count);
253		unsafe {
254			append_utf8(buf, |b| {
255				let len = b.len();
256				b.set_len(len + count);
257				self.read_bytes(&mut b[len..])
258					.map(<[u8]>::len)
259			})
260		}
261	}
262
263	/// Reads UTF-8 bytes into `buf` until the end of the stream, returning the
264	/// string read. If invalid bytes are encountered, an error is returned and
265	/// `buf` is unchanged. In this case, the stream is left in a state with an
266	/// undefined number of bytes read.
267	#[cfg(feature = "alloc")]
268	fn read_utf8_to_end<'a>(&mut self, buf: &'a mut String) -> Result<&'a str>;
269}
270
271/// Helper extension trait for reading generic data from an unsized source.
272trait ReadSpec<T: Pod>: DataSource {
273	fn read_int_be_spec(&mut self) -> Result<T> where T: PrimInt {
274		self.read_data_spec().map(T::from_be)
275	}
276	fn read_int_le_spec(&mut self) -> Result<T> where T: PrimInt {
277		self.read_data_spec().map(T::from_le)
278	}
279	fn read_data_spec(&mut self) -> Result<T> {
280		let mut value = T::zeroed();
281		self.read_exact_bytes(bytes_of_mut(&mut value))?;
282		Ok(value)
283	}
284}
285
286impl<S: DataSource + ?Sized, T: Pod> ReadSpec<T> for S { }
287
288pub trait DataSink {
289	/// Writes all bytes from `buf`. Equivalent to [`Write::write_all`].
290	/// 
291	/// [`Write::write_all`]: io::Write::write_all
292	fn write_bytes(&mut self, buf: &[u8]) -> Result;
293
294	/// Writes a [`u8`].
295	fn write_u8(&mut self, value: u8) -> Result { self.write_int_be_spec(value) }
296	/// Writes an [`i8`].
297	fn write_i8(&mut self, value: i8) -> Result { self.write_int_be_spec(value) }
298	/// Writes a big-endian [`u16`].
299	fn write_u16(&mut self, value: u16) -> Result { self.write_int_be_spec(value) }
300	/// Writes a big-endian [`i16`].
301	fn write_i16(&mut self, value: i16) -> Result { self.write_int_be_spec(value) }
302	/// Writes a little-endian [`u16`].
303	fn write_u16_le(&mut self, value: u16) -> Result { self.write_int_le_spec(value) }
304	/// Writes a little-endian [`i16`].
305	fn write_i16_le(&mut self, value: i16) -> Result { self.write_int_le_spec(value) }
306	/// Writes a big-endian [`u32`].
307	fn write_u32(&mut self, value: u32) -> Result { self.write_int_be_spec(value) }
308	/// Writes a big-endian [`i32`].
309	fn write_i32(&mut self, value: i32) -> Result { self.write_int_be_spec(value) }
310	/// Writes a little-endian [`u32`].
311	fn write_u32_le(&mut self, value: u32) -> Result { self.write_int_le_spec(value) }
312	/// Writes a little-endian [`i32`].
313	fn write_i32_le(&mut self, value: i32) -> Result { self.write_int_le_spec(value) }
314	/// Writes a big-endian [`u64`].
315	fn write_u64(&mut self, value: u64) -> Result { self.write_int_be_spec(value) }
316	/// Writes a big-endian [`i64`].
317	fn write_i64(&mut self, value: i64) -> Result { self.write_int_be_spec(value) }
318	/// Writes a little-endian [`u64`].
319	fn write_u64_le(&mut self, value: u64) -> Result { self.write_int_le_spec(value) }
320	/// Writes a little-endian [`i64`].
321	fn write_i64_le(&mut self, value: i64) -> Result { self.write_int_le_spec(value) }
322	/// Writes a big-endian [`u128`].
323	fn write_u128(&mut self, value: u128) -> Result { self.write_int_be_spec(value) }
324	/// Writes a big-endian [`i128`].
325	fn write_i128(&mut self, value: i128) -> Result { self.write_int_be_spec(value) }
326	/// Writes a little-endian [`u128`].
327	fn write_u128_le(&mut self, value: u128) -> Result { self.write_int_le_spec(value) }
328	/// Writes a little-endian [`i128`].
329	fn write_i128_le(&mut self, value: i128) -> Result { self.write_int_le_spec(value) }
330	/// Writes a big-endian [`usize`]. To make streams consistent across platforms,
331	/// [`usize`] is fixed to the size of [`u64`] regardless of the target platform.
332	fn write_usize(&mut self, value: usize) -> Result {
333		self.write_u64(value as u64)
334	}
335	/// Writes a big-endian [`isize`]. To make streams consistent across platforms,
336	/// [`isize`] is fixed to the size of [`i64`] regardless of the target platform.
337	fn write_isize(&mut self, value: isize) -> Result {
338		self.write_i64(value as i64)
339	}
340	/// Writes a little-endian [`usize`]. To make streams consistent across platforms,
341	/// [`usize`] is fixed to the size of [`u64`] regardless of the target platform.
342	fn write_usize_le(&mut self, value: usize) -> Result {
343		self.write_u64_le(value as u64)
344	}
345	/// Writes a little-endian [`isize`]. To make streams consistent across platforms,
346	/// [`isize`] is fixed to the size of [`i64`] regardless of the target platform.
347	fn write_isize_le(&mut self, value: isize) -> Result {
348		self.write_i64_le(value as i64)
349	}
350
351	/// Writes a big-endian integer.
352	fn write_int<T: PrimInt + Pod>(&mut self, value: T) -> Result where Self: Sized {
353		self.write_int_be_spec(value)
354	}
355	/// Writes a little-endian integer.
356	fn write_int_le<T: PrimInt + Pod>(&mut self, value: T) -> Result where Self: Sized {
357		self.write_int_le_spec(value)
358	}
359	/// Writes a value of an arbitrary bit pattern. See [`Pod`].
360	fn write_data<T: Pod>(&mut self, value: T) -> Result where Self: Sized {
361		self.write_data_spec(value)
362	}
363	/// Writes a UTF-8 string.
364	fn write_utf8(&mut self, value: &str) -> Result {
365		self.write_bytes(value.as_bytes())
366	}
367}
368
369trait WriteSpec<T: Pod>: DataSink {
370	fn write_int_be_spec(&mut self, value: T) -> Result where T: PrimInt {
371		self.write_data_spec(value.to_be())
372	}
373	fn write_int_le_spec(&mut self, value: T) -> Result where T: PrimInt {
374		self.write_data_spec(value.to_le())
375	}
376	fn write_data_spec(&mut self, value: T) -> Result {
377		self.write_bytes(bytes_of(&value))
378	}
379}
380
381impl<S: DataSink + ?Sized, T: Pod> WriteSpec<T> for S { }
382
383#[cfg(feature = "alloc")]
384unsafe fn append_utf8<R>(buf: &mut String, read: R) -> Result<&str>
385where
386	R: FnOnce(&mut Vec<u8>) -> Result<usize> {
387	use simdutf8::compat::from_utf8;
388
389	// A drop guard which ensures the string is truncated to valid UTF-8 when out
390	// of scope. Starts by truncating to its original length, only allowing the
391	// string to grow after the new bytes are checked to be valid UTF-8.
392	struct Guard<'a> {
393		len: usize,
394		buf: &'a mut Vec<u8>
395	}
396
397	impl Drop for Guard<'_> {
398		fn drop(&mut self) {
399			unsafe {
400				self.buf.set_len(self.len);
401			}
402		}
403	}
404
405	let start;
406	{
407		let mut guard = Guard { len: buf.len(), buf: buf.as_mut_vec() };
408		let count = read(guard.buf)?;
409		from_utf8(&guard.buf[guard.len..][..count])?;
410		start = guard.len;
411		guard.len += count;
412	}
413	Ok(&buf[start..])
414}