data_streams/
source.rs

1// Copyright 2025 - Strixpyrr
2// SPDX-License-Identifier: Apache-2.0
3
4use bytemuck::{bytes_of_mut, cast_slice_mut, Pod};
5#[cfg(feature = "unstable_ascii_char")]
6use core::ascii;
7use bytemuck::cast_slice;
8use num_traits::PrimInt;
9#[cfg(feature = "utf8")]
10use simdutf8::compat::from_utf8;
11use crate::{Error, Result};
12#[cfg(feature = "utf8")]
13use crate::utf8::utf8_char_width;
14
15mod exact_size;
16mod impls;
17pub mod markers;
18
19/// A source stream of data.
20pub trait DataSource {
21	/// Returns the number of bytes available for reading. This does not necessarily
22	/// mean more data isn't available, just that *at least* this count is may be
23	/// read.
24	/// 
25	/// # Example
26	/// 
27	/// ```
28	/// use data_streams::DataSource;
29	/// 
30	/// let buf: &[u8] = b"Hello!";
31	/// assert_eq!(buf.available(), 6);
32	/// ```
33	fn available(&self) -> usize;
34	/// Reads at most `count` bytes into an internal buffer, returning whether
35	/// enough bytes are available. To return an end-of-stream error, use [`require`]
36	/// instead.
37	///
38	/// Note that a request returning `false` doesn't necessarily mean the stream
39	/// has ended. More bytes may be read after.
40	///
41	/// # Errors
42	///
43	/// If the byte count exceeds the spare buffer capacity, [`Error::InsufficientBuffer`]
44	/// is returned and both the internal buffer and underlying streams remain unchanged.
45	///
46	/// [`require`]: Self::require
47	/// 
48	/// # Example
49	/// 
50	/// ```
51	/// # use data_streams::Error;
52	/// use data_streams::DataSource;
53	///
54	/// let mut buf: &[u8] = b"Hello!";
55	/// assert_eq!(buf.request(3)?, true);
56	/// assert_eq!(buf.request(50)?, false);
57	/// # Ok::<_, Error>(())
58	/// ```
59	fn request(&mut self, count: usize) -> Result<bool>;
60	/// Reads at least `count` bytes into an internal buffer, returning `Ok` if
61	/// successful, or an end-of-stream error if not. For a softer version that
62	/// returns whether enough bytes are available, use [`request`].
63	///
64	/// # Errors
65	///
66	/// Returns [`Error::End`] if the stream ended before `count` bytes could be
67	/// read. If the byte count exceeds the spare buffer capacity, [`Error::InsufficientBuffer`]
68	/// is returned instead.
69	///
70	/// [`request`]: Self::request
71	/// 
72	/// # Example
73	/// 
74	/// ```
75	/// use data_streams::{DataSource, Error};
76	/// 
77	/// let mut buf: &[u8] = b"Hello!";
78	/// assert!(buf.require(3).is_ok());
79	/// assert!(matches!(buf.require(50), Err(Error::End { .. })));
80	/// ```
81	fn require(&mut self, count: usize) -> Result {
82		if self.request(count)? {
83			Ok(())
84		} else {
85			Err(Error::end(count))
86		}
87	}
88
89	/// Consumes up to `count` bytes in the stream, returning the number of bytes
90	/// consumed if successful. At least the available count may be consumed.
91	///
92	/// # Errors
93	///
94	/// Returns any IO errors encountered.
95	/// 
96	/// # Example
97	/// 
98	/// ```
99	/// # use data_streams::Error;
100	/// use data_streams::DataSource;
101	/// 
102	/// let mut buf: &[u8] = b"Hello!";
103	/// assert_eq!(buf.skip(3)?, 3);
104	/// assert_eq!(buf.skip(8)?, 3);
105	/// # Ok::<_, Error>(())
106	/// ```
107	fn skip(&mut self, count: usize) -> Result<usize>;
108	/// Reads bytes into a slice, returning the bytes read. This method is greedy;
109	/// it consumes as many bytes as it can, until `buf` is filled or no more bytes
110	/// are read.
111	///
112	/// # Errors
113	///
114	/// Returns any IO errors encountered.
115	/// 
116	/// # Example
117	/// 
118	/// ```
119	/// # use data_streams::Error;
120	/// use data_streams::DataSource;
121	/// 
122	/// let mut input: &[u8] = b"Hello!";
123	/// let buf: &mut [u8] = &mut [0; 5];
124	/// assert_eq!(input.read_bytes(&mut buf[..3])?, b"Hel");
125	/// assert_eq!(input.read_bytes(buf)?, b"lo!");
126	/// # Ok::<_, Error>(())
127	/// ```
128	fn read_bytes<'a>(&mut self, buf: &'a mut [u8]) -> Result<&'a [u8]>;
129	/// Reads the exact length of bytes into a slice, returning the bytes read if
130	/// successful, or an end-of-stream error if not. Bytes are not consumed if an
131	/// end-of-stream error is returned.
132	///
133	/// # Errors
134	///
135	/// Returns [`Error::End`] with the slice length if the exact number of bytes
136	/// cannot be read. The bytes that were read remain in the buffer, but have
137	/// been consumed from the source.
138	/// 
139	/// # Example
140	/// 
141	/// ```
142	/// use data_streams::{DataSource, Error};
143	/// 
144	/// let mut input: &[u8] = b"Hello!";
145	/// let buf: &mut [u8] = &mut [0; 5];
146	/// assert_eq!(input.read_exact_bytes(&mut buf[..3])?, b"Hel");
147	/// assert!(matches!(input.read_exact_bytes(buf), Err(Error::End { .. })));
148	/// # Ok::<_, Error>(())
149	/// ```
150	fn read_exact_bytes<'a>(&mut self, buf: &'a mut [u8]) -> Result<&'a [u8]> {
151		default_read_exact_bytes(self, buf)
152	}
153	/// Reads bytes into a slice in multiples of `alignment`, returning the bytes
154	/// read. This method is greedy; it consumes as many bytes as it can, until
155	/// `buf` is filled or less than `alignment` bytes could be read.
156	/// 
157	/// If the alignment is zero, the returned slice is empty.
158	/// 
159	/// # Errors
160	/// 
161	/// Returns any IO errors encountered.
162	/// 
163	/// # Example
164	/// 
165	/// ```
166	/// # use data_streams::Error;
167	/// use data_streams::DataSource;
168	/// 
169	/// let mut input: &[u8] = b"Hello?!";
170	/// let buf: &mut [u8] = &mut [0; 10];
171	/// assert_eq!(input.read_aligned_bytes(buf, 2)?, b"Hello?");
172	/// assert_eq!(input.read_aligned_bytes(buf, 2)?, b"");
173	/// # Ok::<_, Error>(())
174	/// ```
175	fn read_aligned_bytes<'a>(&mut self, buf: &'a mut [u8], alignment: usize) -> Result<&'a [u8]> {
176		default_read_aligned_bytes(self, buf, alignment)
177	}
178	/// Reads an array with a size of `N` bytes.
179	///
180	/// # Errors
181	///
182	/// Returns [`Error::End`] with the array length if [`N`] bytes cannot be read.
183	/// 
184	/// # Example
185	/// 
186	/// ```
187	/// # use data_streams::Error;
188	/// use data_streams::DataSource;
189	/// 
190	/// let mut input: &[u8] = b"Hello!";
191	/// assert_eq!(input.read_array::<3>()?, *b"Hel");
192	/// # Ok::<_, Error>(())
193	/// ```
194	fn read_array<const N: usize>(&mut self) -> Result<[u8; N]>
195	where
196		Self: Sized
197	{
198		default_read_array(self)
199	}
200
201	/// Reads a [`u8`].
202	///
203	/// # Errors
204	///
205	/// Returns [`Error::End`] if the stream ends before exactly `1` byte can be
206	/// read.
207	/// 
208	/// # Example
209	/// 
210	/// ```
211	/// use data_streams::DataSource;
212	/// 
213	/// let mut buf: &[u8] = &[2, 3, 5, 7, 11];
214	/// 
215	/// let mut sum = 0;
216	/// while let Ok(byte) = buf.read_u8() {
217 	///     sum += byte;
218	/// }
219	/// assert_eq!(sum, 28);
220	/// ```
221	fn read_u8(&mut self) -> Result<u8> { self.read_data() }
222	/// Reads an [`i8`].
223	///
224	/// # Errors
225	///
226	/// Returns [`Error::End`] if the stream ends before exactly `1` byte can be
227	/// read.
228	/// 
229	/// ```
230	/// use data_streams::DataSource;
231	///
232	/// let mut buf: &[u8] = &[2, (-3i8) as u8, 5, (-7i8) as u8, 11];
233	/// 
234	/// let mut sum = 0;
235	/// while let Ok(byte) = buf.read_i8() {
236	///     sum += byte;
237	/// }
238	/// assert_eq!(sum, 8);
239	/// ```
240	fn read_i8(&mut self) -> Result<i8> { self.read_data() }
241	/// Reads a big-endian [`u16`].
242	///
243	/// # Errors
244	///
245	/// Returns [`Error::End`] if the stream ends before exactly `2` bytes can be
246	/// read.
247	/// 
248	/// # Example
249	/// 
250	/// ```
251	/// # use data_streams::Error;
252	/// use data_streams::DataSource;
253	/// 
254	/// let mut buf: &[u8] = &[0x12, 0x34, 0x56, 0x78];
255	/// assert_eq!(buf.read_u16()?, 0x1234);
256	/// assert_eq!(buf.read_u16()?, 0x5678);
257	/// # Ok::<_, Error>(())
258	/// ```
259	fn read_u16(&mut self) -> Result<u16> { self.read_int() }
260	/// Reads a big-endian [`i16`].
261	///
262	/// # Errors
263	///
264	/// Returns [`Error::End`] if the stream ends before exactly `2` bytes can be
265	/// read.
266	///
267	/// # Example
268	///
269	/// ```
270	/// # use data_streams::Error;
271	/// use data_streams::DataSource;
272	///
273	/// let mut buf: &[u8] = &[0x12, 0x34, 0x56, 0x78];
274	/// assert_eq!(buf.read_i16()?, 0x1234);
275	/// assert_eq!(buf.read_i16()?, 0x5678);
276	/// # Ok::<_, Error>(())
277	/// ```
278	fn read_i16(&mut self) -> Result<i16> { self.read_int() }
279	/// Reads a little-endian [`u16`].
280	///
281	/// # Errors
282	///
283	/// Returns [`Error::End`] if the stream ends before exactly `2` bytes can be
284	/// read.
285	///
286	/// # Example
287	///
288	/// ```
289	/// # use data_streams::Error;
290	/// use data_streams::DataSource;
291	///
292	/// let mut buf: &[u8] = &[0x12, 0x34, 0x56, 0x78];
293	/// assert_eq!(buf.read_u16_le()?, 0x3412);
294	/// assert_eq!(buf.read_u16_le()?, 0x7856);
295	/// # Ok::<_, Error>(())
296	/// ```
297	fn read_u16_le(&mut self) -> Result<u16> { self.read_int_le() }
298	/// Reads a little-endian [`i16`].
299	///
300	/// # Errors
301	///
302	/// Returns [`Error::End`] if the stream ends before exactly `2` bytes can be
303	/// read.
304	///
305	/// # Example
306	///
307	/// ```
308	/// # use data_streams::Error;
309	/// use data_streams::DataSource;
310	///
311	/// let mut buf: &[u8] = &[0x12, 0x34, 0x56, 0x78];
312	/// assert_eq!(buf.read_i16_le()?, 0x3412);
313	/// assert_eq!(buf.read_i16_le()?, 0x7856);
314	/// # Ok::<_, Error>(())
315	/// ```
316	fn read_i16_le(&mut self) -> Result<i16> { self.read_int_le() }
317	/// Reads a big-endian [`u32`].
318	///
319	/// # Errors
320	///
321	/// Returns [`Error::End`] if the stream ends before exactly `4` bytes can be
322	/// read.
323	///
324	/// # Example
325	///
326	/// ```
327	/// # use data_streams::Error;
328	/// use data_streams::DataSource;
329	///
330	/// let mut buf: &[u8] = &[0x12, 0x34, 0x56, 0x78];
331	/// assert_eq!(buf.read_u32()?, 0x12345678);
332	/// # Ok::<_, Error>(())
333	/// ```
334	fn read_u32(&mut self) -> Result<u32> { self.read_int() }
335	/// Reads a big-endian [`i32`].
336	///
337	/// # Errors
338	///
339	/// Returns [`Error::End`] if the stream ends before exactly `4` bytes can be
340	/// read.
341	///
342	/// # Example
343	///
344	/// ```
345	/// # use data_streams::Error;
346	/// use data_streams::DataSource;
347	///
348	/// let mut buf: &[u8] = &[0x12, 0x34, 0x56, 0x78];
349	/// assert_eq!(buf.read_i32()?, 0x12345678);
350	/// # Ok::<_, Error>(())
351	/// ```
352	fn read_i32(&mut self) -> Result<i32> { self.read_int() }
353	/// Reads a little-endian [`u32`].
354	///
355	/// # Errors
356	///
357	/// Returns [`Error::End`] if the stream ends before exactly `4` bytes can be
358	/// read.
359	///
360	/// # Example
361	///
362	/// ```
363	/// # use data_streams::Error;
364	/// use data_streams::DataSource;
365	///
366	/// let mut buf: &[u8] = &[0x12, 0x34, 0x56, 0x78];
367	/// assert_eq!(buf.read_u32_le()?, 0x78563412);
368	/// # Ok::<_, Error>(())
369	/// ```
370	fn read_u32_le(&mut self) -> Result<u32> { self.read_int_le() }
371	/// Reads a little-endian [`i32`].
372	///
373	/// # Errors
374	///
375	/// Returns [`Error::End`] if the stream ends before exactly `4` bytes can be
376	/// read.
377	///
378	/// # Example
379	///
380	/// ```
381	/// # use data_streams::Error;
382	/// use data_streams::DataSource;
383	///
384	/// let mut buf: &[u8] = &[0x12, 0x34, 0x56, 0x78];
385	/// assert_eq!(buf.read_i32_le()?, 0x78563412);
386	/// # Ok::<_, Error>(())
387	/// ```
388	fn read_i32_le(&mut self) -> Result<i32> { self.read_int_le() }
389	/// Reads a big-endian [`u64`].
390	///
391	/// # Errors
392	///
393	/// Returns [`Error::End`] if the stream ends before exactly `8` bytes can be
394	/// read.
395	///
396	/// # Example
397	///
398	/// ```
399	/// # use data_streams::Error;
400	/// use data_streams::DataSource;
401	///
402	/// let mut buf: &[u8] = &[
403	///     0x12, 0x34, 0x56, 0x78,
404	///     0x9A, 0xBC, 0xDE, 0xF0
405	/// ];
406	/// assert_eq!(buf.read_u64()?, 0x1234_5678_9ABC_DEF0);
407	/// # Ok::<_, Error>(())
408	/// ```
409	fn read_u64(&mut self) -> Result<u64> { self.read_int() }
410	/// Reads a big-endian [`i64`].
411	///
412	/// # Errors
413	///
414	/// Returns [`Error::End`] if the stream ends before exactly `8` bytes can be
415	/// read.
416	///
417	/// # Example
418	///
419	/// ```
420	/// # use data_streams::Error;
421	/// use data_streams::DataSource;
422	///
423	/// let mut buf: &[u8] = &[
424	///     0x12, 0x34, 0x56, 0x78,
425	///     0x9A, 0xBC, 0xDE, 0xF0
426	/// ];
427	/// assert_eq!(buf.read_i64()?, 0x1234_5678_9ABC_DEF0);
428	/// # Ok::<_, Error>(())
429	/// ```
430	fn read_i64(&mut self) -> Result<i64> { self.read_int() }
431	/// Reads a little-endian [`u64`].
432	///
433	/// # Errors
434	///
435	/// Returns [`Error::End`] if the stream ends before exactly `8` bytes can be
436	/// read.
437	///
438	/// # Example
439	///
440	/// ```
441	/// # use data_streams::Error;
442	/// use data_streams::DataSource;
443	///
444	/// let mut buf: &[u8] = &[
445	///     0x12, 0x34, 0x56, 0x78,
446	///     0x9A, 0xBC, 0xDE, 0xF0
447	/// ];
448	/// assert_eq!(buf.read_u64_le()?, 0xF0DE_BC9A_7856_3412);
449	/// # Ok::<_, Error>(())
450	/// ```
451	fn read_u64_le(&mut self) -> Result<u64> { self.read_int_le() }
452	/// Reads a little-endian [`i64`].
453	///
454	/// # Errors
455	///
456	/// Returns [`Error::End`] if the stream ends before exactly `8` bytes can be
457	/// read.
458	///
459	/// # Example
460	///
461	/// ```
462	/// # use data_streams::Error;
463	/// use data_streams::DataSource;
464	///
465	/// let mut buf: &[u8] = &[
466	///     0x12, 0x34, 0x56, 0x78,
467	///     0x9A, 0xBC, 0xDE, 0xF0
468	/// ];
469	/// assert_eq!(buf.read_i64_le()?, 0xF0DE_BC9A_7856_3412u64 as i64);
470	/// # Ok::<_, Error>(())
471	/// ```
472	fn read_i64_le(&mut self) -> Result<i64> { self.read_int_le() }
473	/// Reads a big-endian [`u128`].
474	///
475	/// # Errors
476	///
477	/// Returns [`Error::End`] if the stream ends before exactly `16` bytes can be
478	/// read.
479	///
480	/// # Example
481	///
482	/// ```
483	/// # use data_streams::Error;
484	/// use data_streams::DataSource;
485	///
486	/// let mut buf: &[u8] = &[
487	///     0x12, 0x34, 0x56, 0x78,
488	///     0x9A, 0xBC, 0xDE, 0xF0,
489	///     0x0F, 0xED, 0xCB, 0xA9,
490	///     0x87, 0x65, 0x43, 0x21
491	/// ];
492	/// assert_eq!(buf.read_u128()?, 0x1234_5678_9ABC_DEF0_0FED_CBA9_8765_4321);
493	/// # Ok::<_, Error>(())
494	/// ```
495	fn read_u128(&mut self) -> Result<u128> { self.read_int() }
496	/// Reads a big-endian [`i128`].
497	///
498	/// # Errors
499	///
500	/// Returns [`Error::End`] if the stream ends before exactly `16` bytes can be
501	/// read.
502	///
503	/// # Example
504	///
505	/// ```
506	/// # use data_streams::Error;
507	/// use data_streams::DataSource;
508	///
509	/// let mut buf: &[u8] = &[
510	///     0x12, 0x34, 0x56, 0x78,
511	///     0x9A, 0xBC, 0xDE, 0xF0,
512	///     0x0F, 0xED, 0xCB, 0xA9,
513	///     0x87, 0x65, 0x43, 0x21
514	/// ];
515	/// assert_eq!(buf.read_i128()?, 0x1234_5678_9ABC_DEF0_0FED_CBA9_8765_4321);
516	/// # Ok::<_, Error>(())
517	/// ```
518	fn read_i128(&mut self) -> Result<i128> { self.read_int() }
519	/// Reads a little-endian [`u128`].
520	///
521	/// # Errors
522	///
523	/// Returns [`Error::End`] if the stream ends before exactly `16` bytes can be
524	/// read.
525	///
526	/// # Example
527	///
528	/// ```
529	/// # use data_streams::Error;
530	/// use data_streams::DataSource;
531	///
532	/// let mut buf: &[u8] = &[
533	///     0x12, 0x34, 0x56, 0x78,
534	///     0x9A, 0xBC, 0xDE, 0xF0,
535	///     0x0F, 0xED, 0xCB, 0xA9,
536	///     0x87, 0x65, 0x43, 0x21
537	/// ];
538	/// assert_eq!(buf.read_u128_le()?, 0x2143_6587_A9CB_ED0F_F0DE_BC9A_7856_3412);
539	/// # Ok::<_, Error>(())
540	/// ```
541	fn read_u128_le(&mut self) -> Result<u128> { self.read_int_le() }
542	/// Reads a little-endian [`i128`].
543	///
544	/// # Errors
545	///
546	/// Returns [`Error::End`] if the stream ends before exactly `16` bytes can be
547	/// read.
548	///
549	/// # Example
550	///
551	/// ```
552	/// # use data_streams::Error;
553	/// use data_streams::DataSource;
554	///
555	/// let mut buf: &[u8] = &[
556	///     0x12, 0x34, 0x56, 0x78,
557	///     0x9A, 0xBC, 0xDE, 0xF0,
558	///     0x0F, 0xED, 0xCB, 0xA9,
559	///     0x87, 0x65, 0x43, 0x21
560	/// ];
561	/// assert_eq!(buf.read_i128_le()?, 0x2143_6587_A9CB_ED0F_F0DE_BC9A_7856_3412);
562	/// # Ok::<_, Error>(())
563	/// ```
564	fn read_i128_le(&mut self) -> Result<i128> { self.read_int_le() }
565	/// Reads a big-endian [`usize`]. To make streams consistent across platforms,
566	/// [`usize`] is fixed to the size of [`u64`] regardless of the target platform.
567	///
568	/// # Errors
569	///
570	/// Returns [`Error::End`] if the stream ends before exactly `8` bytes can be
571	/// read.
572	///
573	/// # Example
574	///
575	/// ```
576	/// # use data_streams::Error;
577	/// use data_streams::DataSource;
578	///
579	/// let mut buf: &[u8] = &[
580	///     0x12, 0x34, 0x56, 0x78,
581	///     0x9A, 0xBC, 0xDE, 0xF0
582	/// ];
583	/// assert_eq!(buf.read_usize()?, 0x1234_5678_9ABC_DEF0);
584	/// # Ok::<_, Error>(())
585	/// ```
586	fn read_usize(&mut self) -> Result<usize> {
587		self.read_u64().map(|i| i as usize)
588	}
589	/// Reads a big-endian [`isize`]. To make streams consistent across platforms,
590	/// [`isize`] is fixed to the size of [`i64`] regardless of the target platform.
591	///
592	/// # Errors
593	///
594	/// Returns [`Error::End`] if the stream ends before exactly `8` bytes can be
595	/// read.
596	///
597	/// # Example
598	///
599	/// ```
600	/// # use data_streams::Error;
601	/// use data_streams::DataSource;
602	///
603	/// let mut buf: &[u8] = &[
604	///     0x12, 0x34, 0x56, 0x78,
605	///     0x9A, 0xBC, 0xDE, 0xF0
606	/// ];
607	/// assert_eq!(buf.read_isize()?, 0x1234_5678_9ABC_DEF0);
608	/// # Ok::<_, Error>(())
609	/// ```
610	fn read_isize(&mut self) -> Result<isize> {
611		self.read_i64().map(|i| i as isize)
612	}
613	/// Reads a little-endian [`usize`]. To make streams consistent across platforms,
614	/// [`usize`] is fixed to the size of [`u64`] regardless of the target platform.
615	///
616	/// # Errors
617	///
618	/// Returns [`Error::End`] if the stream ends before exactly `8` bytes can be
619	/// read.
620	///
621	/// # Example
622	///
623	/// ```
624	/// # use data_streams::Error;
625	/// use data_streams::DataSource;
626	///
627	/// let mut buf: &[u8] = &[
628	///     0x12, 0x34, 0x56, 0x78,
629	///     0x9A, 0xBC, 0xDE, 0xF0
630	/// ];
631	/// assert_eq!(buf.read_usize_le()?, 0xF0DE_BC9A_7856_3412);
632	/// # Ok::<_, Error>(())
633	/// ```
634	fn read_usize_le(&mut self) -> Result<usize> {
635		self.read_u64_le().map(|i| i as usize)
636	}
637	/// Reads a little-endian [`isize`]. To make streams consistent across platforms,
638	/// [`isize`] is fixed to the size of [`i64`] regardless of the target platform.
639	///
640	/// # Errors
641	///
642	/// Returns [`Error::End`] if the stream ends before exactly `8` bytes can be
643	/// read.
644	///
645	/// # Example
646	///
647	/// ```
648	/// # use data_streams::Error;
649	/// use data_streams::DataSource;
650	///
651	/// let mut buf: &[u8] = &[
652	///     0x12, 0x34, 0x56, 0x78,
653	///     0x9A, 0xBC, 0xDE, 0xF0
654	/// ];
655	/// assert_eq!(buf.read_isize_le()?, 0xF0DE_BC9A_7856_3412usize as isize);
656	/// # Ok::<_, Error>(())
657	/// ```
658	fn read_isize_le(&mut self) -> Result<isize> {
659		self.read_i64_le().map(|i| i as isize)
660	}
661
662	/// Reads bytes into a slice, returning them as a UTF-8 string if valid.
663	///
664	/// # Errors
665	///
666	/// Returns [`Error::Utf8`] if invalid UTF-8 is read. The stream is left in an
667	/// undefined state with up to `buf.len()` bytes consumed, including invalid
668	/// bytes and any subsequent bytes. `buf` contains at least any valid UTF-8
669	/// read before invalid bytes were encountered. The valid UTF-8 length is given
670	/// by the error, [`Utf8Error::valid_up_to`]. This slice can be safely converted
671	/// to a string with [`from_utf8_unchecked`] or [`Utf8Error::split_valid`]:
672	///
673	/// ```
674	/// # use data_streams::{DataSource, Error};
675	/// # let mut source = &[b'h', b'e', b'l', b'l', b'o', 0xFF][..];
676	/// # let buffer = &mut [0; 6];
677	/// let str: &str = match source.read_utf8(buffer) {
678	///     Ok(str) => str,
679	///     Err(Error::Utf8(error)) => {
680	///         let (valid, invalid) = unsafe {
681	///             // Safe because the buffer has been validated up to this point,
682	///             // according to the error.
683	///             error.split_valid(buffer)
684	///         };
685	///         // Do something with invalid bytes...
686	///         valid
687	///     }
688	///     Err(error) => return Err(error)
689	/// };
690	/// # assert_eq!(str, "hello");
691	/// # Ok::<_, Error>(())
692	/// ```
693	///
694	/// [`from_utf8_unchecked`]: core::str::from_utf8_unchecked
695	///
696	/// # Example
697	///
698	/// ```
699	/// # use data_streams::Error;
700	/// use data_streams::DataSource;
701	///
702	/// let mut input: &[u8] = "Hello! 👋".as_bytes();
703	/// let buf: &mut [u8] = &mut [0; 11];
704	///
705	/// assert_eq!(input.read_utf8(buf)?, "Hello! 👋");
706	/// # Ok::<_, Error>(())
707	/// ```
708	///
709	/// # Implementation
710	///
711	/// The default implementation uses a very fast UTF-8 validator ([`simdutf8`]),
712	/// so overriding is unlikely to be useful.
713	///
714	/// [`simdutf8`]: https://crates.io/crates/simdutf8
715	#[cfg(feature = "utf8")]
716	fn read_utf8<'a>(&mut self, buf: &'a mut [u8]) -> Result<&'a str> {
717		let bytes = self.read_bytes(buf)?;
718		let utf8 = from_utf8(bytes)?;
719		Ok(utf8)
720	}
721	/// Reads a single UTF-8 codepoint, returning a [`char`] if valid.
722	///
723	/// # Errors
724	///
725	/// Returns [`Error::Utf8`] if invalid UTF-8 is read. The stream is left with
726	/// one to four bytes consumed, depending on the UTF-8 character width encoded
727	/// in the first byte. `buf` contains any consumed bytes.
728	///
729	/// Returns [`Error::End`] if the end-of-stream is reached before the full
730	/// character width is read. `buf` is empty or contains exactly one byte.
731	///
732	/// # Example
733	///
734	/// ```
735	/// # use data_streams::Error;
736	/// use data_streams::DataSource;
737	///
738	/// let mut input: &[u8] = "🍉".as_bytes();
739	/// assert_eq!(input.read_utf8_codepoint(&mut [0; 4])?, '🍉');
740	/// # Ok::<_, Error>(())
741	/// ```
742	#[cfg(feature = "utf8")]
743	fn read_utf8_codepoint(&mut self, buf: &mut [u8; 4]) -> Result<char> {
744		let Ok(char) = default_read_utf8_codepoint(self, buf)?.parse() else {
745			// Safety: this function promises to produce a UTF-8 string with exactly one character.
746			unreachable!()
747		};
748		Ok(char)
749	}
750	/// Reads bytes into a slice, returning them as an ASCII slice if valid.
751	///
752	/// # Errors
753	///
754	/// Returns [`Error::Ascii`] if a non-ASCII byte is found. The stream is left
755	/// in an undefined state with up to `buf.len()` bytes consumed, including the
756	/// invalid byte and any subsequent bytes. `buf` contains all consumed bytes.
757	/// The valid ASCII length is given by the error, [`AsciiError::valid_up_to`].
758	/// The number of bytes consumed after the invalid byte is given by
759	/// [`AsciiError::unchecked_count`]. These slices can be safely split with
760	/// [`AsciiError::split_valid`]:
761	///
762	/// ```
763	/// #![feature(ascii_char)]
764	///
765	/// # use data_streams::{DataSource, Error};
766	/// # use core::ascii;
767	/// # let mut source = &[b'h', b'e', b'l', b'l', b'o', 0xFF][..];
768	/// # let buffer = &mut [0; 6];
769	/// let str: &[ascii::Char] = match source.read_ascii(buffer) {
770	///     Ok(str) => str,
771	///     Err(Error::Ascii(error)) => {
772	///         let (valid, invalid) = error.split_valid(buffer);
773	///         // Do something with invalid bytes...
774	///         valid
775	///     }
776	///     Err(error) => return Err(error)
777	/// };
778	/// # assert_eq!(str.as_str(), "hello");
779	/// # Ok::<_, Error>(())
780	/// ```
781	///
782	/// # Example
783	///
784	/// ```
785	/// #![feature(ascii_char)]
786	///
787	/// # use data_streams::Error;
788	/// use data_streams::DataSource;
789	///
790	/// let mut input: &[u8] = b"Hello!";
791	/// let buf: &mut [u8] = &mut [0; 6];
792	/// 
793	/// assert_eq!(input.read_ascii(buf)?.as_str(), "Hello!");
794	/// # Ok::<_, Error>(())
795	/// ```
796	#[cfg(feature = "unstable_ascii_char")]
797	fn read_ascii<'a>(&mut self, buf: &'a mut [u8]) -> Result<&'a [ascii::Char]> {
798		default_read_ascii(self, buf)
799	}
800}
801
802/// A helper macro which conditionally disables the default body of a method if
803/// the specialization feature-gate is not enabled.
804#[cfg(feature = "alloc")]
805macro_rules! spec_default {
806    ($(#[$meta:meta])+fn $name:ident<$lt:lifetime>(&mut $self:ident, $arg:ident: $arg_ty:ty) -> $result:ty $body:block) => {
807		$(#[$meta])+
808		#[cfg(feature = "unstable_specialization")]
809		fn $name<$lt>(&mut $self, $arg: $arg_ty) -> $result $body
810		$(#[$meta])+
811		#[cfg(not(feature = "unstable_specialization"))]
812		fn $name<$lt>(&mut $self, $arg: $arg_ty) -> $result;
813	};
814}
815
816/// A source stream reading data into vectors.
817#[cfg(feature = "alloc")]
818pub trait VecSource: DataSource {
819	spec_default! {
820	/// Reads bytes into `buf` until the presumptive end of the stream, returning
821	/// the bytes read. If an error is returned, any bytes read remain in `buf`.
822	///
823	/// Note that the stream may not necessarily have ended; more bytes may still
824	/// be read in subsequent calls. The stream's end is only *presumed* to be
825	/// reached. For example, a TCP socket may read no data signaling an end, but
826	/// later begin reading again.
827	///
828	/// # Errors
829	///
830	/// Returns any IO errors encountered.
831	/// 
832	/// # Example
833	/// 
834	/// ```
835	/// # use data_streams::Error;
836	/// # #[cfg(feature = "unstable_specialization")]
837	/// # {
838	/// use data_streams::VecSource;
839	///
840	/// let mut input: &[u8] = b"Hello!";
841	/// let mut buf = Vec::new();
842	/// assert_eq!(input.read_to_end(&mut buf)?, b"Hello!");
843	/// # }
844	/// # Ok::<_, Error>(())
845	/// ```
846	fn read_to_end<'a>(&mut self, buf: &'a mut alloc::vec::Vec<u8>) -> Result<&'a [u8]> {
847		impls::read_to_end(self, buf, 0)
848	}
849	}
850
851	spec_default! {
852	/// Reads UTF-8 bytes into `buf` until the end of the stream, returning the
853	/// string read. If invalid bytes are encountered, an error is returned and
854	/// `buf` is unchanged. In this case, the stream is left in a state with an
855	/// undefined number of bytes read.
856	///
857	/// # Errors
858	///
859	/// Returns [`Error::Utf8`] if invalid UTF-8 is read. The stream is left in a
860	/// state with all bytes consumed from it. `buf` contains the read UTF-8 string
861	/// up to the invalid bytes.
862	///
863	/// # Example
864	///
865	/// ```
866	/// # use data_streams::Error;
867	/// use data_streams::VecSource;
868	///
869	/// let mut input: &[u8] = b"Hello!";
870	/// let mut buf = String::new();
871	/// assert_eq!(input.read_utf8_to_end(&mut buf)?, "Hello!");
872	/// # Ok::<_, Error>(())
873	/// ```
874	#[cfg(feature = "utf8")]
875	fn read_utf8_to_end<'a>(&mut self, buf: &'a mut alloc::string::String) -> Result<&'a str> {
876		// Safety: this function only modifies the string's bytes if the new bytes are found to be
877		//  valid UTF-8.
878		unsafe {
879			append_utf8(buf, |buf| impls::read_to_end(self, buf, 0).map(<[u8]>::len))
880		}
881	}
882	}
883}
884
885/// Reads generic data from a [source](DataSource).
886pub trait GenericDataSource: DataSource {
887	/// Reads a big-endian integer.
888	///
889	/// # Errors
890	///
891	/// Returns [`Error::End`] if the stream ends before exactly the type's size in
892	/// bytes can be read.
893	///
894	/// # Example
895	/// 
896	/// ```
897	/// # use data_streams::Error;
898	/// use data_streams::GenericDataSource;
899	/// 
900	/// let mut buf: &[u8] = &[0x12, 0x34, 0x56, 0x78];
901	/// let int: u32 = buf.read_int()?;
902	/// assert_eq!(int, 0x12345678);
903	/// # Ok::<_, Error>(())
904	/// ```
905	fn read_int<T: Pod + PrimInt>(&mut self) -> Result<T> {
906		self.read_data().map(T::from_be)
907	}
908
909	/// Reads a little-endian integer.
910	///
911	/// # Errors
912	///
913	/// Returns [`Error::End`] if the stream ends before exactly the type's size in
914	/// bytes can be read.
915	///
916	/// # Example
917	///
918	/// ```
919	/// # use data_streams::Error;
920	/// use data_streams::GenericDataSource;
921	///
922	/// let mut buf: &[u8] = &[0x12, 0x34, 0x56, 0x78];
923	/// let int: u32 = buf.read_int_le()?;
924	/// assert_eq!(int, 0x78563412);
925	/// # Ok::<_, Error>(())
926	/// ```
927	fn read_int_le<T: Pod + PrimInt>(&mut self) -> Result<T> {
928		self.read_data().map(T::from_le)
929	}
930
931	/// Reads a value of generic type `T` supporting an arbitrary bit pattern. See
932	/// [`Pod`].
933	///
934	/// # Errors
935	///
936	/// Returns [`Error::End`] if the stream ends before exactly the type's size in
937	/// bytes can be read.
938	///
939	/// # Example
940	/// 
941	/// ```
942	/// # use data_streams::Error;
943	/// # #[cfg(target_endian = "little")]
944	/// # {
945	/// use data_streams::GenericDataSource;
946	///
947	/// let mut buf: &[u8] = &[0x12, 0x34, 0x56, 0x78];
948	/// let int: u32 = buf.read_data()?;
949	/// assert_eq!(int, 0x78563412);
950	/// # }
951	/// # Ok::<_, Error>(())
952	/// ```
953	fn read_data<T: Pod>(&mut self) -> Result<T> {
954		let mut value = T::zeroed();
955		self.read_exact_bytes(bytes_of_mut(&mut value))?;
956		Ok(value)
957	}
958	
959	/// Reads multiple values of generic type `T` supporting an arbitrary bit pattern,
960	/// returning the read values.
961	/// 
962	/// # Errors
963	/// 
964	/// Returns any IO errors encountered.
965	/// 
966	/// # Panics
967	/// 
968	/// Panics if the [`DataSource::read_aligned_bytes`] implementation returns an unaligned slice.
969	///
970	/// # Example
971	///
972	/// ```
973	/// # use data_streams::Error;
974	/// # #[cfg(target_endian = "little")]
975	/// # {
976	/// use data_streams::GenericDataSource;
977	///
978	/// let mut input: &[u8] = &[0x12, 0x34, 0x56, 0x78, 0xFF];
979	/// let buf: &mut [u16] = &mut [0; 3];
980	/// assert_eq!(input.read_data_slice(buf)?, [0x3412, 0x7856]);
981	/// # }
982	/// # Ok::<_, Error>(())
983	/// ```
984	fn read_data_slice<'a, T: Pod>(&mut self, buf: &'a mut [T]) -> Result<&'a [T]> {
985		let bytes = self.read_aligned_bytes(cast_slice_mut(buf), size_of::<T>())?;
986		assert_eq!(bytes.len() % size_of::<T>(), 0, "unaligned read implementation");
987		Ok(cast_slice(bytes))
988	}
989}
990
991impl<S: DataSource + ?Sized> GenericDataSource for S { }
992
993/// Accesses a source's internal buffer.
994pub trait BufferAccess: DataSource {
995	/// Returns the capacity of the internal buffer.
996	/// 
997	/// # Example
998	/// 
999	/// ```
1000	/// # #[cfg(feature = "alloc")]
1001	/// # {
1002	/// use data_streams::BufferAccess;
1003	///
1004	/// let buf = Vec::<u8>::with_capacity(16);
1005	/// assert_eq!(buf.buffer_capacity(), 16);
1006	/// # }
1007	/// ```
1008	fn buffer_capacity(&self) -> usize;
1009	/// Returns the byte count contained in the internal buffer.
1010	/// 
1011	/// # Example
1012	/// 
1013	/// ```
1014	/// use data_streams::BufferAccess;
1015	/// 
1016	/// let buf: &[u8] = &[0; 16];
1017	/// assert_eq!(buf.buffer_count(), 16);
1018	/// ```
1019	fn buffer_count(&self) -> usize { self.buffer().len() }
1020	/// Returns a slice over the filled portion of the internal buffer. This slice
1021	/// may not contain the whole buffer, for example if it can't be represented as
1022	/// just one slice.
1023	/// 
1024	/// # Example
1025	/// 
1026	/// ```
1027	/// use data_streams::BufferAccess;
1028	/// 
1029	/// let buf: &[u8] = b"Hello!";
1030	/// assert_eq!(buf.buffer(), b"Hello!");
1031	/// ```
1032	fn buffer(&self) -> &[u8];
1033	/// Fills the internal buffer from the underlying stream, returning its contents
1034	/// if successful.
1035	/// 
1036	/// # Errors
1037	/// 
1038	/// Returns any IO errors encountered.
1039	///
1040	/// # Example
1041	///
1042	/// ```no_run
1043	/// # use data_streams::Error;
1044	/// # #[cfg(feature = "std")]
1045	/// # {
1046	/// use std::{fs::File, io::BufReader};
1047	/// use data_streams::BufferAccess;
1048	/// 
1049	/// let mut source = BufReader::new(File::open("file.txt")?);
1050	/// source.fill_buffer()?;
1051	/// # }
1052	/// # Ok::<_, Error>(())
1053	/// ```
1054	fn fill_buffer(&mut self) -> Result<&[u8]>;
1055	/// Clears the internal buffer.
1056	/// 
1057	/// # Example
1058	/// 
1059	/// ```no_run
1060	/// # use data_streams::Error;
1061	/// # #[cfg(feature = "std")]
1062	/// # {
1063	/// use std::{fs::File, io::BufReader};
1064	/// use data_streams::BufferAccess;
1065	///
1066	/// let mut source = BufReader::new(File::open("file.txt")?);
1067	/// source.fill_buffer()?;
1068	/// 
1069	/// source.clear_buffer();
1070	/// assert_eq!(source.buffer_count(), 0);
1071	/// # }
1072	/// # Ok::<_, Error>(())
1073	/// ```
1074	fn clear_buffer(&mut self) {
1075		self.drain_buffer(self.buffer_count());
1076	}
1077	/// Consumes `count` bytes from the internal buffer. The `count` must be `<=`
1078	/// the length of the slice returned by either [`buffer`](Self::buffer) or
1079	/// [`fill_buffer`](Self::fill_buffer)
1080	/// 
1081	/// # Panics
1082	/// 
1083	/// This method panics if `count` exceeds the buffer length.
1084	///
1085	/// # Example
1086	///
1087	/// ```no_run
1088	/// # use data_streams::Error;
1089	/// # #[cfg(feature = "std")]
1090	/// # {
1091	/// use std::{fs::File, io::BufReader};
1092	/// use data_streams::BufferAccess;
1093	///
1094	/// let mut source = BufReader::new(File::open("file.txt")?);
1095	/// source.fill_buffer()?;
1096	///
1097	/// source.drain_buffer(512);
1098	/// # }
1099	/// # Ok::<_, Error>(())
1100	/// ```
1101	fn drain_buffer(&mut self, count: usize);
1102	/// Bypasses the internal buffer by returning the underlying source, or `self`
1103	/// if this behavior is not supported. Note that not fully draining the buffer
1104	/// before bypassing it will cause data loss.
1105	fn bypass_buffer(&mut self) -> &mut impl DataSource where Self: Sized {
1106		self.clear_buffer();
1107		self
1108	}
1109}
1110
1111#[cfg(feature = "unstable_specialization")]
1112impl<T: BufferAccess + ?Sized> DataSource for T {
1113	default fn available(&self) -> usize {
1114		self.buffer_count()
1115	}
1116
1117	default fn request(&mut self, count: usize) -> Result<bool> {
1118		default_request(self, count)
1119	}
1120
1121	default fn skip(&mut self, count: usize) -> Result<usize> {
1122		Ok(default_skip(self, count))
1123	}
1124
1125	default fn read_bytes<'a>(&mut self, buf: &'a mut [u8]) -> Result<&'a [u8]> {
1126		buf_read_bytes(
1127			self,
1128			buf,
1129			<[u8]>::is_empty,
1130			|mut source, buf|
1131				source.read_bytes(buf).map(<[u8]>::len)
1132		)
1133	}
1134
1135	default fn read_exact_bytes<'a>(&mut self, buf: &'a mut [u8]) -> Result<&'a [u8]> {
1136		buf_read_exact_bytes(self, buf)
1137	}
1138
1139	/// Reads bytes into a slice in multiples of `alignment`, returning the bytes
1140	/// read. This method is greedy; it consumes as many bytes as it can, until
1141	/// `buf` is filled or less than `alignment` bytes could be read.
1142	///
1143	/// If the alignment is zero or `buf`'s length is less than the alignment, the returned slice is
1144	/// empty.
1145	///
1146	/// # Errors
1147	///
1148	/// Returns any IO errors encountered.
1149	/// 
1150	/// [`Error::InsufficientBuffer`] is returned without reading if the buffer [capacity] is not
1151	/// large enough to hold at least one `alignment` width.
1152	/// 
1153	/// [capacity]: Self::buffer_capacity
1154	default fn read_aligned_bytes<'a>(&mut self, buf: &'a mut [u8], alignment: usize) -> Result<&'a [u8]> {
1155		if alignment == 0 { return Ok(&[]) }
1156		if self.buffer_capacity() < alignment {
1157			let spare_capacity = self.buffer_capacity() - self.buffer_count();
1158			return Err(Error::InsufficientBuffer {
1159				spare_capacity,
1160				required_count: alignment
1161			})
1162		}
1163		
1164		let len = buf.len() / alignment * alignment;
1165		buf_read_bytes(
1166			self,
1167			&mut buf[..len],
1168			|buf| buf.len() < alignment,
1169			|mut source, buf|
1170				source.read_aligned_bytes(buf, alignment).map(<[u8]>::len)
1171		)
1172	}
1173
1174	#[cfg(feature = "utf8")]
1175	default fn read_utf8<'a>(&mut self, buf: &'a mut [u8]) -> Result<&'a str> {
1176		let mut valid_len = 0;
1177		let slice = buf_read_bytes(
1178			self,
1179			buf,
1180			<[u8]>::is_empty,
1181			|mut source, buf|
1182				match source.read_utf8(buf) {
1183					Ok(str) => {
1184						let len = str.len();
1185						valid_len += len;
1186						Ok(len)
1187					}
1188					Err(Error::Utf8(error)) =>
1189						Err(error.with_offset(valid_len).into()),
1190					Err(error) => Err(error)
1191				}
1192		)?;
1193
1194		// Safety: valid_len bytes have been validated as UTF-8.
1195		Ok(unsafe { core::str::from_utf8_unchecked(slice) })
1196	}
1197
1198	#[cfg(feature = "utf8")]
1199	default fn read_utf8_codepoint(&mut self, buf: &mut [u8; 4]) -> Result<char> {
1200		let str = match self.buffer() {
1201			&[first_byte, ..] => {
1202				let char_width = utf8_char_width(first_byte);
1203				self.read_utf8(&mut buf[..char_width])?
1204			},
1205			[] => default_read_utf8_codepoint(self, buf)?
1206		};
1207		
1208		Ok(str.parse().expect("bytes read by `read_utf8` must be valid UTF-8 codepoints"))
1209	}
1210
1211	#[cfg(feature = "unstable_ascii_char")]
1212	default fn read_ascii<'a>(&mut self, buf: &'a mut [u8]) -> Result<&'a [ascii::Char]> {
1213		default_read_ascii(self, buf)
1214	}
1215}
1216
1217#[cfg(all(feature = "alloc", feature = "unstable_specialization"))]
1218impl<T: BufferAccess> VecSource for T {
1219	default fn read_to_end<'a>(&mut self, buf: &'a mut alloc::vec::Vec<u8>) -> Result<&'a [u8]> {
1220		impls::buf_read_to_end(self, buf)
1221	}
1222
1223	#[cfg(feature = "utf8")]
1224	default fn read_utf8_to_end<'a>(&mut self, buf: &'a mut alloc::string::String) -> Result<&'a str> {
1225		impls::buf_read_utf8_to_end(self, buf)
1226	}
1227}
1228
1229/// Returns the maximum multiple of `factor` less than or equal to `value`.
1230pub(crate) const fn max_multiple_of(value: usize, factor: usize) -> usize {
1231	// For powers of 2, this optimizes to a simple AND of the negative factor.
1232	value / factor * factor
1233}
1234
1235#[allow(dead_code)]
1236pub(crate) fn default_request(source: &mut (impl BufferAccess + ?Sized), count: usize) -> Result<bool> {
1237	if source.available() < count {
1238		let buf_len = source.buffer_count();
1239		let spare_capacity = source.buffer_capacity() - buf_len;
1240		if source.buffer_capacity() > 0 && count < spare_capacity {
1241			Ok(source.fill_buffer()?.len() >= count)
1242		} else {
1243			Err(Error::InsufficientBuffer {
1244				spare_capacity,
1245				required_count: count - buf_len,
1246			})
1247		}
1248	} else {
1249		Ok(true)
1250	}
1251}
1252
1253// Todo: after consuming, loop fill_buf and consume.
1254#[allow(dead_code)]
1255pub(crate) fn default_skip(source: &mut (impl BufferAccess + ?Sized), mut count: usize) -> usize {
1256	let avail = source.available();
1257	count = count.min(avail);
1258	source.drain_buffer(count);
1259	// Guard against faulty implementations by verifying that the buffered
1260	// bytes were removed.
1261	assert_eq!(
1262		source.available(),
1263		avail.saturating_sub(count),
1264		"`drain_buffer` must remove buffered bytes"
1265	);
1266	avail
1267}
1268
1269pub(crate) fn default_read_array<const N: usize>(source: &mut (impl DataSource + ?Sized)) -> Result<[u8; N]> {
1270	let mut array = [0; N];
1271	source.read_exact_bytes(&mut array)?;
1272	Ok(array)
1273}
1274
1275fn try_read_exact_contiguous<'a>(source: &mut (impl DataSource + ?Sized), buf: &'a mut [u8]) -> Result<&'a [u8]> {
1276	let len = buf.len();
1277	let bytes = source.read_bytes(buf)?;
1278	assert_eq!(
1279		bytes.len(),
1280		len,
1281		"read_bytes should be greedy; at least {available} bytes were available \
1282		in the buffer, but only {read_len} bytes of the required {len} were read",
1283		available = source.available(),
1284		read_len = bytes.len()
1285	);
1286	Ok(bytes)
1287}
1288
1289#[allow(clippy::panic, reason = "can't use assert here")]
1290fn try_read_exact_discontiguous<'a>(
1291	source: &mut (impl DataSource + ?Sized),
1292	buf: &'a mut [u8],
1293	remaining: usize
1294) -> Result<&'a [u8]> {
1295	let filled = buf.len() - remaining;
1296	let read_count = source.read_bytes(&mut buf[filled..])?.len();
1297	if read_count < remaining {
1298		if source.available() < remaining {
1299			// Buffer was exhausted, meaning the stream ended prematurely
1300			Err(Error::End { required_count: buf.len() })
1301		} else {
1302			// read_bytes wasn't greedy, there were enough bytes in the buffer >:(
1303			panic!("read_bytes should have read {remaining} buffered bytes")
1304		}
1305	} else {
1306		// The whole slice has been confirmed to be filled.
1307		Ok(buf)
1308	}
1309}
1310
1311fn default_read_exact_bytes<'a>(source: &mut (impl DataSource + ?Sized), buf: &'a mut [u8]) -> Result<&'a [u8]> {
1312	let len = buf.len();
1313	match source.require(len) {
1314		Ok(()) => try_read_exact_contiguous(source, buf),
1315		Err(Error::InsufficientBuffer { .. }) => {
1316			// The buffer is not large enough to read the slice contiguously, and
1317			// we have no access to the buffer to drain it. So just try reading and
1318			// check if all bytes were read.
1319			let remaining = buf.len();
1320			try_read_exact_discontiguous(source, buf, remaining)
1321		}
1322		Err(error) => Err(error)
1323	}
1324}
1325
1326fn default_read_aligned_bytes<'a>(source: &mut (impl DataSource + ?Sized), buf: &'a mut [u8], alignment: usize) -> Result<&'a [u8]> {
1327	if alignment == 0 {
1328		return Ok(&[])
1329	}
1330	
1331	let len = max_multiple_of(buf.len(), alignment);
1332	let mut slice = &mut buf[..len];
1333	let mut count = 0;
1334	while !slice.is_empty() && source.request(alignment)? {
1335		let avail = slice.len().min(max_multiple_of(source.available(), alignment));
1336		source.read_exact_bytes(&mut slice[..avail])?;
1337		count += avail;
1338		slice = &mut slice[avail..];
1339	}
1340	
1341	Ok(&buf[..count])
1342}
1343
1344#[cfg(feature = "unstable_specialization")]
1345fn buf_read_exact_bytes<'a>(source: &mut (impl BufferAccess + ?Sized), buf: &'a mut [u8]) -> Result<&'a [u8]> {
1346	let len = buf.len();
1347	match source.require(len) {
1348		Ok(()) => try_read_exact_contiguous(source, buf),
1349		Err(Error::InsufficientBuffer { .. }) => {
1350			// We're doing a large read. Drain the internal buffer, then try reading.
1351			// Most default implementations of read_bytes optimize for this case by
1352			// skipping the buffer.
1353
1354			let mut slice = &mut *buf;
1355			let mut s_buf = source.buffer();
1356			while !slice.is_empty() && !s_buf.is_empty() {
1357				let len = s_buf.read_bytes(slice)?.len();
1358				slice = &mut slice[len..];
1359				source.drain_buffer(len);
1360				s_buf = source.buffer();
1361			}
1362
1363			let remaining = slice.len();
1364			try_read_exact_discontiguous(source, buf, remaining)
1365		}
1366		Err(error) => Err(error)
1367	}
1368}
1369
1370#[cfg(feature = "unstable_specialization")]
1371fn buf_read_bytes<'a>(
1372	source: &mut (impl BufferAccess + ?Sized),
1373	buf: &'a mut [u8],
1374	mut is_empty: impl FnMut(&[u8]) -> bool,
1375	mut slice_read_bytes: impl FnMut(&[u8], &mut [u8]) -> Result<usize>,
1376) -> Result<&'a [u8]> {
1377	let mut slice = &mut *buf;
1378	while !is_empty(slice) {
1379		let buf = match source.request(slice.len()) {
1380			Ok(_) => source.buffer(),
1381			Err(Error::InsufficientBuffer { .. }) => source.fill_buffer()?,
1382			Err(error) => return Err(error)
1383		};
1384		if is_empty(buf) {
1385			break
1386		}
1387
1388		let count = slice_read_bytes(buf, slice)?;
1389		source.drain_buffer(count);
1390		slice = &mut slice[count..];
1391	}
1392
1393	let unfilled = slice.len();
1394	let filled = buf.len() - unfilled;
1395	Ok(&buf[..filled])
1396}
1397
1398#[cfg(all(feature = "alloc", feature = "utf8"))]
1399#[allow(dead_code, clippy::multiple_unsafe_ops_per_block)]
1400pub(crate) fn default_read_utf8<'a>(
1401	source: &mut (impl DataSource + ?Sized),
1402	count: usize,
1403	buf: &'a mut alloc::string::String
1404) -> Result<&'a str> {
1405	buf.reserve(count);
1406	// Safety: this function only modifies the string's bytes if the new bytes are found to be
1407	//  valid UTF-8.
1408	unsafe {
1409		append_utf8(buf, |b| {
1410			let len = b.len();
1411			b.set_len(len + count);
1412			source.read_bytes(&mut b[len..])
1413				  .map(<[u8]>::len)
1414		})
1415	}
1416}
1417
1418#[cfg(feature = "utf8")]
1419fn default_read_utf8_codepoint<'a>(source: &mut (impl DataSource + ?Sized), buf: &'a mut [u8; 4]) -> Result<&'a str> {
1420	let (first_byte, remaining) = buf.split_at_mut(1);
1421	source.read_exact_bytes(first_byte)?;
1422	let char_width = utf8_char_width(first_byte[0]);
1423	source.read_exact_bytes(&mut remaining[..char_width - 1])?;
1424	Ok(from_utf8(&buf[..char_width])?)
1425}
1426
1427#[cfg(feature = "unstable_ascii_char")]
1428fn default_read_ascii<'a>(source: &mut (impl DataSource + ?Sized), buf: &'a mut [u8]) -> Result<&'a [ascii::Char]> {
1429	let bytes = source.read_bytes(buf)?;
1430	let idx = count_ascii(bytes);
1431	if idx == bytes.len() {
1432		// Safety: all bytes have been checked as valid ASCII.
1433		Ok(unsafe { bytes.as_ascii_unchecked() })
1434	} else {
1435		Err(Error::invalid_ascii(bytes[idx], idx, bytes.len()))
1436	}
1437}
1438
1439#[cfg(feature = "unstable_ascii_char")]
1440pub(crate) fn count_ascii(slice: &[u8]) -> usize {
1441	if slice.is_ascii() {
1442		slice.len()
1443	} else {
1444		// Safety: is_ascii indicates there is a non-ASCII character somewhere.
1445		unsafe { slice.iter().rposition(|b| !b.is_ascii()).unwrap_unchecked() }
1446	}
1447}
1448
1449#[cfg(all(feature = "alloc", feature = "utf8"))]
1450#[allow(dead_code)]
1451pub(crate) unsafe fn append_utf8<R>(buf: &mut alloc::string::String, read: R) -> Result<&str>
1452where
1453	R: FnOnce(&mut alloc::vec::Vec<u8>) -> Result<usize> {
1454	use simdutf8::compat::from_utf8;
1455
1456	// A drop guard which ensures the string is truncated to valid UTF-8 when out
1457	// of scope. Starts by truncating to its original length, only allowing the
1458	// string to grow after the new bytes are checked to be valid UTF-8.
1459	struct Guard<'a> {
1460		len: usize,
1461		buf: &'a mut alloc::vec::Vec<u8>
1462	}
1463
1464	impl Drop for Guard<'_> {
1465		fn drop(&mut self) {
1466			// Safety: exactly `len` bytes have been written.
1467			unsafe {
1468				self.buf.set_len(self.len);
1469			}
1470		}
1471	}
1472
1473	let start;
1474	{
1475		let mut guard = Guard { len: buf.len(), buf: buf.as_mut_vec() };
1476		let count = read(guard.buf)?;
1477		from_utf8(&guard.buf[guard.len..][..count])?;
1478		start = guard.len;
1479		guard.len += count;
1480	}
1481	Ok(&buf[start..])
1482}
1483
1484#[cfg(all(
1485	test,
1486	feature = "std",
1487	feature = "alloc",
1488	feature = "unstable_specialization"
1489))]
1490mod read_exact_test {
1491	use std::assert_matches::assert_matches;
1492	use proptest::prelude::*;
1493	use alloc::vec::from_elem;
1494	use std::iter::repeat;
1495	use proptest::collection::vec;
1496	use crate::{BufferAccess, DataSource, Result};
1497	
1498	struct FakeBufSource {
1499		source: Vec<u8>,
1500		buffer: Vec<u8>
1501	}
1502
1503	impl BufferAccess for FakeBufSource {
1504		fn buffer_capacity(&self) -> usize {
1505			self.buffer.capacity()
1506		}
1507
1508		fn buffer(&self) -> &[u8] {
1509			&self.buffer
1510		}
1511
1512		fn fill_buffer(&mut self) -> Result<&[u8]> {
1513			let Self { source, buffer } = self;
1514			let len = buffer.len();
1515			buffer.extend(repeat(0).take(buffer.capacity() - len));
1516			let source_slice = &mut &source[..];
1517			let consumed = source_slice.read_bytes(&mut buffer[len..])?.len();
1518			source.drain_buffer(consumed);
1519			buffer.truncate(consumed + len);
1520			Ok(buffer)
1521		}
1522
1523		fn clear_buffer(&mut self) {
1524			self.buffer.clear();
1525		}
1526
1527		fn drain_buffer(&mut self, count: usize) {
1528			self.buffer.drain_buffer(count);
1529		}
1530	}
1531
1532	proptest! {
1533		#[test]
1534		fn read_exact_end_of_stream(source in vec(any::<u8>(), 1..=256)) {
1535			let mut buf = from_elem(0, source.len() + 1);
1536			assert_matches!(
1537				super::default_read_exact_bytes(&mut &*source, &mut buf),
1538				Err(super::Error::End { .. })
1539			);
1540		}
1541	}
1542
1543	proptest! {
1544		#[test]
1545		fn buf_read_exact_end_of_stream(source in vec(any::<u8>(), 1..=256)) {
1546			let mut buf = from_elem(0, source.len() + 1);
1547			assert_matches!(
1548				super::buf_read_exact_bytes(&mut &*source, &mut buf),
1549				Err(super::Error::End { .. })
1550			);
1551		}
1552	}
1553
1554	proptest! {
1555		#[test]
1556		fn read_exact_insufficient_buffer(source in vec(any::<u8>(), 2..=256)) {
1557			let source_len = source.len();
1558			let buffer = Vec::with_capacity(source_len - 1);
1559			let mut source = FakeBufSource { source, buffer };
1560			let mut target = from_elem(0, source_len);
1561			source.read_exact_bytes(&mut target).map(<[u8]>::len).unwrap();
1562		}
1563	}
1564
1565	proptest! {
1566		#[test]
1567		fn read_exact_buffered(source in vec(any::<u8>(), 1..=256)) {
1568			let source_len = source.len();
1569			let buffer = Vec::with_capacity(source_len + 1);
1570			let mut source = FakeBufSource { source, buffer };
1571			let mut target = from_elem(0, source_len);
1572			source.read_exact_bytes(&mut target).map(<[u8]>::len).unwrap();
1573		}
1574	}
1575}
1576
1577#[cfg(all(
1578	test,
1579	feature = "std",
1580	feature = "alloc",
1581))]
1582mod read_aligned_test {
1583	use proptest::arbitrary::any;
1584	use proptest::collection::vec;
1585	use proptest::{prop_assert_eq, prop_assume, proptest};
1586	use crate::DataSource;
1587
1588	proptest! {
1589		#[test]
1590		fn read_aligned(source in vec(any::<u8>(), 16..=256), alignment in 1usize..=16) {
1591			let buf = &mut [0; 256][..source.len()];
1592			let bytes = (&source[..]).read_aligned_bytes(buf, alignment).unwrap();
1593			prop_assert_eq!(bytes.len() % alignment, 0);
1594		}
1595	}
1596	
1597	proptest! {
1598		#[test]
1599		fn read_aligned_truncated(buf_size in 0usize..=15, alignment in 1usize..=16) {
1600			prop_assume!(buf_size < alignment);
1601			let buf = &mut [0; 15][..buf_size];
1602			let bytes = (&[0; 16][..]).read_aligned_bytes(buf, alignment).unwrap();
1603			prop_assert_eq!(bytes.len(), 0);
1604		}
1605	}
1606}