data_streams/source.rs
1// Copyright 2025 - Strixpyrr
2// SPDX-License-Identifier: Apache-2.0
3
4use bytemuck::{bytes_of_mut, cast_slice_mut, Pod};
5#[cfg(feature = "unstable_ascii_char")]
6use core::ascii;
7use bytemuck::cast_slice;
8use num_traits::PrimInt;
9#[cfg(feature = "utf8")]
10use simdutf8::compat::from_utf8;
11use crate::{Error, Result};
12#[cfg(feature = "utf8")]
13use crate::utf8::utf8_char_width;
14
15mod exact_size;
16mod impls;
17pub mod markers;
18
19/// A source stream of data.
20pub trait DataSource {
21 /// Returns the number of bytes available for reading. This does not necessarily
22 /// mean more data isn't available, just that *at least* this count is may be
23 /// read.
24 ///
25 /// # Example
26 ///
27 /// ```
28 /// use data_streams::DataSource;
29 ///
30 /// let buf: &[u8] = b"Hello!";
31 /// assert_eq!(buf.available(), 6);
32 /// ```
33 fn available(&self) -> usize;
34 /// Reads at most `count` bytes into an internal buffer, returning whether
35 /// enough bytes are available. To return an end-of-stream error, use [`require`]
36 /// instead.
37 ///
38 /// Note that a request returning `false` doesn't necessarily mean the stream
39 /// has ended. More bytes may be read after.
40 ///
41 /// # Errors
42 ///
43 /// If the byte count exceeds the spare buffer capacity, [`Error::InsufficientBuffer`]
44 /// is returned and both the internal buffer and underlying streams remain unchanged.
45 ///
46 /// [`require`]: Self::require
47 ///
48 /// # Example
49 ///
50 /// ```
51 /// # use data_streams::Error;
52 /// use data_streams::DataSource;
53 ///
54 /// let mut buf: &[u8] = b"Hello!";
55 /// assert_eq!(buf.request(3)?, true);
56 /// assert_eq!(buf.request(50)?, false);
57 /// # Ok::<_, Error>(())
58 /// ```
59 fn request(&mut self, count: usize) -> Result<bool>;
60 /// Reads at least `count` bytes into an internal buffer, returning `Ok` if
61 /// successful, or an end-of-stream error if not. For a softer version that
62 /// returns whether enough bytes are available, use [`request`].
63 ///
64 /// # Errors
65 ///
66 /// Returns [`Error::End`] if the stream ended before `count` bytes could be
67 /// read. If the byte count exceeds the spare buffer capacity, [`Error::InsufficientBuffer`]
68 /// is returned instead.
69 ///
70 /// [`request`]: Self::request
71 ///
72 /// # Example
73 ///
74 /// ```
75 /// use data_streams::{DataSource, Error};
76 ///
77 /// let mut buf: &[u8] = b"Hello!";
78 /// assert!(buf.require(3).is_ok());
79 /// assert!(matches!(buf.require(50), Err(Error::End { .. })));
80 /// ```
81 fn require(&mut self, count: usize) -> Result {
82 if self.request(count)? {
83 Ok(())
84 } else {
85 Err(Error::end(count))
86 }
87 }
88
89 /// Consumes up to `count` bytes in the stream, returning the number of bytes
90 /// consumed if successful. At least the available count may be consumed.
91 ///
92 /// # Errors
93 ///
94 /// Returns any IO errors encountered.
95 ///
96 /// # Example
97 ///
98 /// ```
99 /// # use data_streams::Error;
100 /// use data_streams::DataSource;
101 ///
102 /// let mut buf: &[u8] = b"Hello!";
103 /// assert_eq!(buf.skip(3)?, 3);
104 /// assert_eq!(buf.skip(8)?, 3);
105 /// # Ok::<_, Error>(())
106 /// ```
107 fn skip(&mut self, count: usize) -> Result<usize>;
108 /// Reads bytes into a slice, returning the bytes read. This method is greedy;
109 /// it consumes as many bytes as it can, until `buf` is filled or no more bytes
110 /// are read.
111 ///
112 /// # Errors
113 ///
114 /// Returns any IO errors encountered.
115 ///
116 /// # Example
117 ///
118 /// ```
119 /// # use data_streams::Error;
120 /// use data_streams::DataSource;
121 ///
122 /// let mut input: &[u8] = b"Hello!";
123 /// let buf: &mut [u8] = &mut [0; 5];
124 /// assert_eq!(input.read_bytes(&mut buf[..3])?, b"Hel");
125 /// assert_eq!(input.read_bytes(buf)?, b"lo!");
126 /// # Ok::<_, Error>(())
127 /// ```
128 fn read_bytes<'a>(&mut self, buf: &'a mut [u8]) -> Result<&'a [u8]>;
129 /// Reads the exact length of bytes into a slice, returning the bytes read if
130 /// successful, or an end-of-stream error if not. Bytes are not consumed if an
131 /// end-of-stream error is returned.
132 ///
133 /// # Errors
134 ///
135 /// Returns [`Error::End`] with the slice length if the exact number of bytes
136 /// cannot be read. The bytes that were read remain in the buffer, but have
137 /// been consumed from the source.
138 ///
139 /// # Example
140 ///
141 /// ```
142 /// use data_streams::{DataSource, Error};
143 ///
144 /// let mut input: &[u8] = b"Hello!";
145 /// let buf: &mut [u8] = &mut [0; 5];
146 /// assert_eq!(input.read_exact_bytes(&mut buf[..3])?, b"Hel");
147 /// assert!(matches!(input.read_exact_bytes(buf), Err(Error::End { .. })));
148 /// # Ok::<_, Error>(())
149 /// ```
150 fn read_exact_bytes<'a>(&mut self, buf: &'a mut [u8]) -> Result<&'a [u8]> {
151 default_read_exact_bytes(self, buf)
152 }
153 /// Reads bytes into a slice in multiples of `alignment`, returning the bytes
154 /// read. This method is greedy; it consumes as many bytes as it can, until
155 /// `buf` is filled or less than `alignment` bytes could be read.
156 ///
157 /// If the alignment is zero, the returned slice is empty.
158 ///
159 /// # Errors
160 ///
161 /// Returns any IO errors encountered.
162 ///
163 /// # Example
164 ///
165 /// ```
166 /// # use data_streams::Error;
167 /// use data_streams::DataSource;
168 ///
169 /// let mut input: &[u8] = b"Hello?!";
170 /// let buf: &mut [u8] = &mut [0; 10];
171 /// assert_eq!(input.read_aligned_bytes(buf, 2)?, b"Hello?");
172 /// assert_eq!(input.read_aligned_bytes(buf, 2)?, b"");
173 /// # Ok::<_, Error>(())
174 /// ```
175 fn read_aligned_bytes<'a>(&mut self, buf: &'a mut [u8], alignment: usize) -> Result<&'a [u8]> {
176 default_read_aligned_bytes(self, buf, alignment)
177 }
178 /// Reads an array with a size of `N` bytes.
179 ///
180 /// # Errors
181 ///
182 /// Returns [`Error::End`] with the array length if [`N`] bytes cannot be read.
183 ///
184 /// # Example
185 ///
186 /// ```
187 /// # use data_streams::Error;
188 /// use data_streams::DataSource;
189 ///
190 /// let mut input: &[u8] = b"Hello!";
191 /// assert_eq!(input.read_array::<3>()?, *b"Hel");
192 /// # Ok::<_, Error>(())
193 /// ```
194 fn read_array<const N: usize>(&mut self) -> Result<[u8; N]>
195 where
196 Self: Sized
197 {
198 default_read_array(self)
199 }
200
201 /// Reads a [`u8`].
202 ///
203 /// # Errors
204 ///
205 /// Returns [`Error::End`] if the stream ends before exactly `1` byte can be
206 /// read.
207 ///
208 /// # Example
209 ///
210 /// ```
211 /// use data_streams::DataSource;
212 ///
213 /// let mut buf: &[u8] = &[2, 3, 5, 7, 11];
214 ///
215 /// let mut sum = 0;
216 /// while let Ok(byte) = buf.read_u8() {
217 /// sum += byte;
218 /// }
219 /// assert_eq!(sum, 28);
220 /// ```
221 fn read_u8(&mut self) -> Result<u8> { self.read_data() }
222 /// Reads an [`i8`].
223 ///
224 /// # Errors
225 ///
226 /// Returns [`Error::End`] if the stream ends before exactly `1` byte can be
227 /// read.
228 ///
229 /// ```
230 /// use data_streams::DataSource;
231 ///
232 /// let mut buf: &[u8] = &[2, (-3i8) as u8, 5, (-7i8) as u8, 11];
233 ///
234 /// let mut sum = 0;
235 /// while let Ok(byte) = buf.read_i8() {
236 /// sum += byte;
237 /// }
238 /// assert_eq!(sum, 8);
239 /// ```
240 fn read_i8(&mut self) -> Result<i8> { self.read_data() }
241 /// Reads a big-endian [`u16`].
242 ///
243 /// # Errors
244 ///
245 /// Returns [`Error::End`] if the stream ends before exactly `2` bytes can be
246 /// read.
247 ///
248 /// # Example
249 ///
250 /// ```
251 /// # use data_streams::Error;
252 /// use data_streams::DataSource;
253 ///
254 /// let mut buf: &[u8] = &[0x12, 0x34, 0x56, 0x78];
255 /// assert_eq!(buf.read_u16()?, 0x1234);
256 /// assert_eq!(buf.read_u16()?, 0x5678);
257 /// # Ok::<_, Error>(())
258 /// ```
259 fn read_u16(&mut self) -> Result<u16> { self.read_int() }
260 /// Reads a big-endian [`i16`].
261 ///
262 /// # Errors
263 ///
264 /// Returns [`Error::End`] if the stream ends before exactly `2` bytes can be
265 /// read.
266 ///
267 /// # Example
268 ///
269 /// ```
270 /// # use data_streams::Error;
271 /// use data_streams::DataSource;
272 ///
273 /// let mut buf: &[u8] = &[0x12, 0x34, 0x56, 0x78];
274 /// assert_eq!(buf.read_i16()?, 0x1234);
275 /// assert_eq!(buf.read_i16()?, 0x5678);
276 /// # Ok::<_, Error>(())
277 /// ```
278 fn read_i16(&mut self) -> Result<i16> { self.read_int() }
279 /// Reads a little-endian [`u16`].
280 ///
281 /// # Errors
282 ///
283 /// Returns [`Error::End`] if the stream ends before exactly `2` bytes can be
284 /// read.
285 ///
286 /// # Example
287 ///
288 /// ```
289 /// # use data_streams::Error;
290 /// use data_streams::DataSource;
291 ///
292 /// let mut buf: &[u8] = &[0x12, 0x34, 0x56, 0x78];
293 /// assert_eq!(buf.read_u16_le()?, 0x3412);
294 /// assert_eq!(buf.read_u16_le()?, 0x7856);
295 /// # Ok::<_, Error>(())
296 /// ```
297 fn read_u16_le(&mut self) -> Result<u16> { self.read_int_le() }
298 /// Reads a little-endian [`i16`].
299 ///
300 /// # Errors
301 ///
302 /// Returns [`Error::End`] if the stream ends before exactly `2` bytes can be
303 /// read.
304 ///
305 /// # Example
306 ///
307 /// ```
308 /// # use data_streams::Error;
309 /// use data_streams::DataSource;
310 ///
311 /// let mut buf: &[u8] = &[0x12, 0x34, 0x56, 0x78];
312 /// assert_eq!(buf.read_i16_le()?, 0x3412);
313 /// assert_eq!(buf.read_i16_le()?, 0x7856);
314 /// # Ok::<_, Error>(())
315 /// ```
316 fn read_i16_le(&mut self) -> Result<i16> { self.read_int_le() }
317 /// Reads a big-endian [`u32`].
318 ///
319 /// # Errors
320 ///
321 /// Returns [`Error::End`] if the stream ends before exactly `4` bytes can be
322 /// read.
323 ///
324 /// # Example
325 ///
326 /// ```
327 /// # use data_streams::Error;
328 /// use data_streams::DataSource;
329 ///
330 /// let mut buf: &[u8] = &[0x12, 0x34, 0x56, 0x78];
331 /// assert_eq!(buf.read_u32()?, 0x12345678);
332 /// # Ok::<_, Error>(())
333 /// ```
334 fn read_u32(&mut self) -> Result<u32> { self.read_int() }
335 /// Reads a big-endian [`i32`].
336 ///
337 /// # Errors
338 ///
339 /// Returns [`Error::End`] if the stream ends before exactly `4` bytes can be
340 /// read.
341 ///
342 /// # Example
343 ///
344 /// ```
345 /// # use data_streams::Error;
346 /// use data_streams::DataSource;
347 ///
348 /// let mut buf: &[u8] = &[0x12, 0x34, 0x56, 0x78];
349 /// assert_eq!(buf.read_i32()?, 0x12345678);
350 /// # Ok::<_, Error>(())
351 /// ```
352 fn read_i32(&mut self) -> Result<i32> { self.read_int() }
353 /// Reads a little-endian [`u32`].
354 ///
355 /// # Errors
356 ///
357 /// Returns [`Error::End`] if the stream ends before exactly `4` bytes can be
358 /// read.
359 ///
360 /// # Example
361 ///
362 /// ```
363 /// # use data_streams::Error;
364 /// use data_streams::DataSource;
365 ///
366 /// let mut buf: &[u8] = &[0x12, 0x34, 0x56, 0x78];
367 /// assert_eq!(buf.read_u32_le()?, 0x78563412);
368 /// # Ok::<_, Error>(())
369 /// ```
370 fn read_u32_le(&mut self) -> Result<u32> { self.read_int_le() }
371 /// Reads a little-endian [`i32`].
372 ///
373 /// # Errors
374 ///
375 /// Returns [`Error::End`] if the stream ends before exactly `4` bytes can be
376 /// read.
377 ///
378 /// # Example
379 ///
380 /// ```
381 /// # use data_streams::Error;
382 /// use data_streams::DataSource;
383 ///
384 /// let mut buf: &[u8] = &[0x12, 0x34, 0x56, 0x78];
385 /// assert_eq!(buf.read_i32_le()?, 0x78563412);
386 /// # Ok::<_, Error>(())
387 /// ```
388 fn read_i32_le(&mut self) -> Result<i32> { self.read_int_le() }
389 /// Reads a big-endian [`u64`].
390 ///
391 /// # Errors
392 ///
393 /// Returns [`Error::End`] if the stream ends before exactly `8` bytes can be
394 /// read.
395 ///
396 /// # Example
397 ///
398 /// ```
399 /// # use data_streams::Error;
400 /// use data_streams::DataSource;
401 ///
402 /// let mut buf: &[u8] = &[
403 /// 0x12, 0x34, 0x56, 0x78,
404 /// 0x9A, 0xBC, 0xDE, 0xF0
405 /// ];
406 /// assert_eq!(buf.read_u64()?, 0x1234_5678_9ABC_DEF0);
407 /// # Ok::<_, Error>(())
408 /// ```
409 fn read_u64(&mut self) -> Result<u64> { self.read_int() }
410 /// Reads a big-endian [`i64`].
411 ///
412 /// # Errors
413 ///
414 /// Returns [`Error::End`] if the stream ends before exactly `8` bytes can be
415 /// read.
416 ///
417 /// # Example
418 ///
419 /// ```
420 /// # use data_streams::Error;
421 /// use data_streams::DataSource;
422 ///
423 /// let mut buf: &[u8] = &[
424 /// 0x12, 0x34, 0x56, 0x78,
425 /// 0x9A, 0xBC, 0xDE, 0xF0
426 /// ];
427 /// assert_eq!(buf.read_i64()?, 0x1234_5678_9ABC_DEF0);
428 /// # Ok::<_, Error>(())
429 /// ```
430 fn read_i64(&mut self) -> Result<i64> { self.read_int() }
431 /// Reads a little-endian [`u64`].
432 ///
433 /// # Errors
434 ///
435 /// Returns [`Error::End`] if the stream ends before exactly `8` bytes can be
436 /// read.
437 ///
438 /// # Example
439 ///
440 /// ```
441 /// # use data_streams::Error;
442 /// use data_streams::DataSource;
443 ///
444 /// let mut buf: &[u8] = &[
445 /// 0x12, 0x34, 0x56, 0x78,
446 /// 0x9A, 0xBC, 0xDE, 0xF0
447 /// ];
448 /// assert_eq!(buf.read_u64_le()?, 0xF0DE_BC9A_7856_3412);
449 /// # Ok::<_, Error>(())
450 /// ```
451 fn read_u64_le(&mut self) -> Result<u64> { self.read_int_le() }
452 /// Reads a little-endian [`i64`].
453 ///
454 /// # Errors
455 ///
456 /// Returns [`Error::End`] if the stream ends before exactly `8` bytes can be
457 /// read.
458 ///
459 /// # Example
460 ///
461 /// ```
462 /// # use data_streams::Error;
463 /// use data_streams::DataSource;
464 ///
465 /// let mut buf: &[u8] = &[
466 /// 0x12, 0x34, 0x56, 0x78,
467 /// 0x9A, 0xBC, 0xDE, 0xF0
468 /// ];
469 /// assert_eq!(buf.read_i64_le()?, 0xF0DE_BC9A_7856_3412u64 as i64);
470 /// # Ok::<_, Error>(())
471 /// ```
472 fn read_i64_le(&mut self) -> Result<i64> { self.read_int_le() }
473 /// Reads a big-endian [`u128`].
474 ///
475 /// # Errors
476 ///
477 /// Returns [`Error::End`] if the stream ends before exactly `16` bytes can be
478 /// read.
479 ///
480 /// # Example
481 ///
482 /// ```
483 /// # use data_streams::Error;
484 /// use data_streams::DataSource;
485 ///
486 /// let mut buf: &[u8] = &[
487 /// 0x12, 0x34, 0x56, 0x78,
488 /// 0x9A, 0xBC, 0xDE, 0xF0,
489 /// 0x0F, 0xED, 0xCB, 0xA9,
490 /// 0x87, 0x65, 0x43, 0x21
491 /// ];
492 /// assert_eq!(buf.read_u128()?, 0x1234_5678_9ABC_DEF0_0FED_CBA9_8765_4321);
493 /// # Ok::<_, Error>(())
494 /// ```
495 fn read_u128(&mut self) -> Result<u128> { self.read_int() }
496 /// Reads a big-endian [`i128`].
497 ///
498 /// # Errors
499 ///
500 /// Returns [`Error::End`] if the stream ends before exactly `16` bytes can be
501 /// read.
502 ///
503 /// # Example
504 ///
505 /// ```
506 /// # use data_streams::Error;
507 /// use data_streams::DataSource;
508 ///
509 /// let mut buf: &[u8] = &[
510 /// 0x12, 0x34, 0x56, 0x78,
511 /// 0x9A, 0xBC, 0xDE, 0xF0,
512 /// 0x0F, 0xED, 0xCB, 0xA9,
513 /// 0x87, 0x65, 0x43, 0x21
514 /// ];
515 /// assert_eq!(buf.read_i128()?, 0x1234_5678_9ABC_DEF0_0FED_CBA9_8765_4321);
516 /// # Ok::<_, Error>(())
517 /// ```
518 fn read_i128(&mut self) -> Result<i128> { self.read_int() }
519 /// Reads a little-endian [`u128`].
520 ///
521 /// # Errors
522 ///
523 /// Returns [`Error::End`] if the stream ends before exactly `16` bytes can be
524 /// read.
525 ///
526 /// # Example
527 ///
528 /// ```
529 /// # use data_streams::Error;
530 /// use data_streams::DataSource;
531 ///
532 /// let mut buf: &[u8] = &[
533 /// 0x12, 0x34, 0x56, 0x78,
534 /// 0x9A, 0xBC, 0xDE, 0xF0,
535 /// 0x0F, 0xED, 0xCB, 0xA9,
536 /// 0x87, 0x65, 0x43, 0x21
537 /// ];
538 /// assert_eq!(buf.read_u128_le()?, 0x2143_6587_A9CB_ED0F_F0DE_BC9A_7856_3412);
539 /// # Ok::<_, Error>(())
540 /// ```
541 fn read_u128_le(&mut self) -> Result<u128> { self.read_int_le() }
542 /// Reads a little-endian [`i128`].
543 ///
544 /// # Errors
545 ///
546 /// Returns [`Error::End`] if the stream ends before exactly `16` bytes can be
547 /// read.
548 ///
549 /// # Example
550 ///
551 /// ```
552 /// # use data_streams::Error;
553 /// use data_streams::DataSource;
554 ///
555 /// let mut buf: &[u8] = &[
556 /// 0x12, 0x34, 0x56, 0x78,
557 /// 0x9A, 0xBC, 0xDE, 0xF0,
558 /// 0x0F, 0xED, 0xCB, 0xA9,
559 /// 0x87, 0x65, 0x43, 0x21
560 /// ];
561 /// assert_eq!(buf.read_i128_le()?, 0x2143_6587_A9CB_ED0F_F0DE_BC9A_7856_3412);
562 /// # Ok::<_, Error>(())
563 /// ```
564 fn read_i128_le(&mut self) -> Result<i128> { self.read_int_le() }
565 /// Reads a big-endian [`usize`]. To make streams consistent across platforms,
566 /// [`usize`] is fixed to the size of [`u64`] regardless of the target platform.
567 ///
568 /// # Errors
569 ///
570 /// Returns [`Error::End`] if the stream ends before exactly `8` bytes can be
571 /// read.
572 ///
573 /// # Example
574 ///
575 /// ```
576 /// # use data_streams::Error;
577 /// use data_streams::DataSource;
578 ///
579 /// let mut buf: &[u8] = &[
580 /// 0x12, 0x34, 0x56, 0x78,
581 /// 0x9A, 0xBC, 0xDE, 0xF0
582 /// ];
583 /// assert_eq!(buf.read_usize()?, 0x1234_5678_9ABC_DEF0);
584 /// # Ok::<_, Error>(())
585 /// ```
586 fn read_usize(&mut self) -> Result<usize> {
587 self.read_u64().map(|i| i as usize)
588 }
589 /// Reads a big-endian [`isize`]. To make streams consistent across platforms,
590 /// [`isize`] is fixed to the size of [`i64`] regardless of the target platform.
591 ///
592 /// # Errors
593 ///
594 /// Returns [`Error::End`] if the stream ends before exactly `8` bytes can be
595 /// read.
596 ///
597 /// # Example
598 ///
599 /// ```
600 /// # use data_streams::Error;
601 /// use data_streams::DataSource;
602 ///
603 /// let mut buf: &[u8] = &[
604 /// 0x12, 0x34, 0x56, 0x78,
605 /// 0x9A, 0xBC, 0xDE, 0xF0
606 /// ];
607 /// assert_eq!(buf.read_isize()?, 0x1234_5678_9ABC_DEF0);
608 /// # Ok::<_, Error>(())
609 /// ```
610 fn read_isize(&mut self) -> Result<isize> {
611 self.read_i64().map(|i| i as isize)
612 }
613 /// Reads a little-endian [`usize`]. To make streams consistent across platforms,
614 /// [`usize`] is fixed to the size of [`u64`] regardless of the target platform.
615 ///
616 /// # Errors
617 ///
618 /// Returns [`Error::End`] if the stream ends before exactly `8` bytes can be
619 /// read.
620 ///
621 /// # Example
622 ///
623 /// ```
624 /// # use data_streams::Error;
625 /// use data_streams::DataSource;
626 ///
627 /// let mut buf: &[u8] = &[
628 /// 0x12, 0x34, 0x56, 0x78,
629 /// 0x9A, 0xBC, 0xDE, 0xF0
630 /// ];
631 /// assert_eq!(buf.read_usize_le()?, 0xF0DE_BC9A_7856_3412);
632 /// # Ok::<_, Error>(())
633 /// ```
634 fn read_usize_le(&mut self) -> Result<usize> {
635 self.read_u64_le().map(|i| i as usize)
636 }
637 /// Reads a little-endian [`isize`]. To make streams consistent across platforms,
638 /// [`isize`] is fixed to the size of [`i64`] regardless of the target platform.
639 ///
640 /// # Errors
641 ///
642 /// Returns [`Error::End`] if the stream ends before exactly `8` bytes can be
643 /// read.
644 ///
645 /// # Example
646 ///
647 /// ```
648 /// # use data_streams::Error;
649 /// use data_streams::DataSource;
650 ///
651 /// let mut buf: &[u8] = &[
652 /// 0x12, 0x34, 0x56, 0x78,
653 /// 0x9A, 0xBC, 0xDE, 0xF0
654 /// ];
655 /// assert_eq!(buf.read_isize_le()?, 0xF0DE_BC9A_7856_3412usize as isize);
656 /// # Ok::<_, Error>(())
657 /// ```
658 fn read_isize_le(&mut self) -> Result<isize> {
659 self.read_i64_le().map(|i| i as isize)
660 }
661
662 /// Reads bytes into a slice, returning them as a UTF-8 string if valid.
663 ///
664 /// # Errors
665 ///
666 /// Returns [`Error::Utf8`] if invalid UTF-8 is read. The stream is left in an
667 /// undefined state with up to `buf.len()` bytes consumed, including invalid
668 /// bytes and any subsequent bytes. `buf` contains at least any valid UTF-8
669 /// read before invalid bytes were encountered. The valid UTF-8 length is given
670 /// by the error, [`Utf8Error::valid_up_to`]. This slice can be safely converted
671 /// to a string with [`from_utf8_unchecked`] or [`Utf8Error::split_valid`]:
672 ///
673 /// ```
674 /// # use data_streams::{DataSource, Error};
675 /// # let mut source = &[b'h', b'e', b'l', b'l', b'o', 0xFF][..];
676 /// # let buffer = &mut [0; 6];
677 /// let str: &str = match source.read_utf8(buffer) {
678 /// Ok(str) => str,
679 /// Err(Error::Utf8(error)) => {
680 /// let (valid, invalid) = unsafe {
681 /// // Safe because the buffer has been validated up to this point,
682 /// // according to the error.
683 /// error.split_valid(buffer)
684 /// };
685 /// // Do something with invalid bytes...
686 /// valid
687 /// }
688 /// Err(error) => return Err(error)
689 /// };
690 /// # assert_eq!(str, "hello");
691 /// # Ok::<_, Error>(())
692 /// ```
693 ///
694 /// [`from_utf8_unchecked`]: core::str::from_utf8_unchecked
695 ///
696 /// # Example
697 ///
698 /// ```
699 /// # use data_streams::Error;
700 /// use data_streams::DataSource;
701 ///
702 /// let mut input: &[u8] = "Hello! 👋".as_bytes();
703 /// let buf: &mut [u8] = &mut [0; 11];
704 ///
705 /// assert_eq!(input.read_utf8(buf)?, "Hello! 👋");
706 /// # Ok::<_, Error>(())
707 /// ```
708 ///
709 /// # Implementation
710 ///
711 /// The default implementation uses a very fast UTF-8 validator ([`simdutf8`]),
712 /// so overriding is unlikely to be useful.
713 ///
714 /// [`simdutf8`]: https://crates.io/crates/simdutf8
715 #[cfg(feature = "utf8")]
716 fn read_utf8<'a>(&mut self, buf: &'a mut [u8]) -> Result<&'a str> {
717 let bytes = self.read_bytes(buf)?;
718 let utf8 = from_utf8(bytes)?;
719 Ok(utf8)
720 }
721 /// Reads a single UTF-8 codepoint, returning a [`char`] if valid.
722 ///
723 /// # Errors
724 ///
725 /// Returns [`Error::Utf8`] if invalid UTF-8 is read. The stream is left with
726 /// one to four bytes consumed, depending on the UTF-8 character width encoded
727 /// in the first byte. `buf` contains any consumed bytes.
728 ///
729 /// Returns [`Error::End`] if the end-of-stream is reached before the full
730 /// character width is read. `buf` is empty or contains exactly one byte.
731 ///
732 /// # Example
733 ///
734 /// ```
735 /// # use data_streams::Error;
736 /// use data_streams::DataSource;
737 ///
738 /// let mut input: &[u8] = "🍉".as_bytes();
739 /// assert_eq!(input.read_utf8_codepoint(&mut [0; 4])?, '🍉');
740 /// # Ok::<_, Error>(())
741 /// ```
742 #[cfg(feature = "utf8")]
743 fn read_utf8_codepoint(&mut self, buf: &mut [u8; 4]) -> Result<char> {
744 let Ok(char) = default_read_utf8_codepoint(self, buf)?.parse() else {
745 // Safety: this function promises to produce a UTF-8 string with exactly one character.
746 unreachable!()
747 };
748 Ok(char)
749 }
750 /// Reads bytes into a slice, returning them as an ASCII slice if valid.
751 ///
752 /// # Errors
753 ///
754 /// Returns [`Error::Ascii`] if a non-ASCII byte is found. The stream is left
755 /// in an undefined state with up to `buf.len()` bytes consumed, including the
756 /// invalid byte and any subsequent bytes. `buf` contains all consumed bytes.
757 /// The valid ASCII length is given by the error, [`AsciiError::valid_up_to`].
758 /// The number of bytes consumed after the invalid byte is given by
759 /// [`AsciiError::unchecked_count`]. These slices can be safely split with
760 /// [`AsciiError::split_valid`]:
761 ///
762 /// ```
763 /// #![feature(ascii_char)]
764 ///
765 /// # use data_streams::{DataSource, Error};
766 /// # use core::ascii;
767 /// # let mut source = &[b'h', b'e', b'l', b'l', b'o', 0xFF][..];
768 /// # let buffer = &mut [0; 6];
769 /// let str: &[ascii::Char] = match source.read_ascii(buffer) {
770 /// Ok(str) => str,
771 /// Err(Error::Ascii(error)) => {
772 /// let (valid, invalid) = error.split_valid(buffer);
773 /// // Do something with invalid bytes...
774 /// valid
775 /// }
776 /// Err(error) => return Err(error)
777 /// };
778 /// # assert_eq!(str.as_str(), "hello");
779 /// # Ok::<_, Error>(())
780 /// ```
781 ///
782 /// # Example
783 ///
784 /// ```
785 /// #![feature(ascii_char)]
786 ///
787 /// # use data_streams::Error;
788 /// use data_streams::DataSource;
789 ///
790 /// let mut input: &[u8] = b"Hello!";
791 /// let buf: &mut [u8] = &mut [0; 6];
792 ///
793 /// assert_eq!(input.read_ascii(buf)?.as_str(), "Hello!");
794 /// # Ok::<_, Error>(())
795 /// ```
796 #[cfg(feature = "unstable_ascii_char")]
797 fn read_ascii<'a>(&mut self, buf: &'a mut [u8]) -> Result<&'a [ascii::Char]> {
798 default_read_ascii(self, buf)
799 }
800}
801
802/// A helper macro which conditionally disables the default body of a method if
803/// the specialization feature-gate is not enabled.
804#[cfg(feature = "alloc")]
805macro_rules! spec_default {
806 ($(#[$meta:meta])+fn $name:ident<$lt:lifetime>(&mut $self:ident, $arg:ident: $arg_ty:ty) -> $result:ty $body:block) => {
807 $(#[$meta])+
808 #[cfg(feature = "unstable_specialization")]
809 fn $name<$lt>(&mut $self, $arg: $arg_ty) -> $result $body
810 $(#[$meta])+
811 #[cfg(not(feature = "unstable_specialization"))]
812 fn $name<$lt>(&mut $self, $arg: $arg_ty) -> $result;
813 };
814}
815
816/// A source stream reading data into vectors.
817#[cfg(feature = "alloc")]
818pub trait VecSource: DataSource {
819 spec_default! {
820 /// Reads bytes into `buf` until the presumptive end of the stream, returning
821 /// the bytes read. If an error is returned, any bytes read remain in `buf`.
822 ///
823 /// Note that the stream may not necessarily have ended; more bytes may still
824 /// be read in subsequent calls. The stream's end is only *presumed* to be
825 /// reached. For example, a TCP socket may read no data signaling an end, but
826 /// later begin reading again.
827 ///
828 /// # Errors
829 ///
830 /// Returns any IO errors encountered.
831 ///
832 /// # Example
833 ///
834 /// ```
835 /// # use data_streams::Error;
836 /// # #[cfg(feature = "unstable_specialization")]
837 /// # {
838 /// use data_streams::VecSource;
839 ///
840 /// let mut input: &[u8] = b"Hello!";
841 /// let mut buf = Vec::new();
842 /// assert_eq!(input.read_to_end(&mut buf)?, b"Hello!");
843 /// # }
844 /// # Ok::<_, Error>(())
845 /// ```
846 fn read_to_end<'a>(&mut self, buf: &'a mut alloc::vec::Vec<u8>) -> Result<&'a [u8]> {
847 impls::read_to_end(self, buf, 0)
848 }
849 }
850
851 spec_default! {
852 /// Reads UTF-8 bytes into `buf` until the end of the stream, returning the
853 /// string read. If invalid bytes are encountered, an error is returned and
854 /// `buf` is unchanged. In this case, the stream is left in a state with an
855 /// undefined number of bytes read.
856 ///
857 /// # Errors
858 ///
859 /// Returns [`Error::Utf8`] if invalid UTF-8 is read. The stream is left in a
860 /// state with all bytes consumed from it. `buf` contains the read UTF-8 string
861 /// up to the invalid bytes.
862 ///
863 /// # Example
864 ///
865 /// ```
866 /// # use data_streams::Error;
867 /// use data_streams::VecSource;
868 ///
869 /// let mut input: &[u8] = b"Hello!";
870 /// let mut buf = String::new();
871 /// assert_eq!(input.read_utf8_to_end(&mut buf)?, "Hello!");
872 /// # Ok::<_, Error>(())
873 /// ```
874 #[cfg(feature = "utf8")]
875 fn read_utf8_to_end<'a>(&mut self, buf: &'a mut alloc::string::String) -> Result<&'a str> {
876 // Safety: this function only modifies the string's bytes if the new bytes are found to be
877 // valid UTF-8.
878 unsafe {
879 append_utf8(buf, |buf| impls::read_to_end(self, buf, 0).map(<[u8]>::len))
880 }
881 }
882 }
883}
884
885/// Reads generic data from a [source](DataSource).
886pub trait GenericDataSource: DataSource {
887 /// Reads a big-endian integer.
888 ///
889 /// # Errors
890 ///
891 /// Returns [`Error::End`] if the stream ends before exactly the type's size in
892 /// bytes can be read.
893 ///
894 /// # Example
895 ///
896 /// ```
897 /// # use data_streams::Error;
898 /// use data_streams::GenericDataSource;
899 ///
900 /// let mut buf: &[u8] = &[0x12, 0x34, 0x56, 0x78];
901 /// let int: u32 = buf.read_int()?;
902 /// assert_eq!(int, 0x12345678);
903 /// # Ok::<_, Error>(())
904 /// ```
905 fn read_int<T: Pod + PrimInt>(&mut self) -> Result<T> {
906 self.read_data().map(T::from_be)
907 }
908
909 /// Reads a little-endian integer.
910 ///
911 /// # Errors
912 ///
913 /// Returns [`Error::End`] if the stream ends before exactly the type's size in
914 /// bytes can be read.
915 ///
916 /// # Example
917 ///
918 /// ```
919 /// # use data_streams::Error;
920 /// use data_streams::GenericDataSource;
921 ///
922 /// let mut buf: &[u8] = &[0x12, 0x34, 0x56, 0x78];
923 /// let int: u32 = buf.read_int_le()?;
924 /// assert_eq!(int, 0x78563412);
925 /// # Ok::<_, Error>(())
926 /// ```
927 fn read_int_le<T: Pod + PrimInt>(&mut self) -> Result<T> {
928 self.read_data().map(T::from_le)
929 }
930
931 /// Reads a value of generic type `T` supporting an arbitrary bit pattern. See
932 /// [`Pod`].
933 ///
934 /// # Errors
935 ///
936 /// Returns [`Error::End`] if the stream ends before exactly the type's size in
937 /// bytes can be read.
938 ///
939 /// # Example
940 ///
941 /// ```
942 /// # use data_streams::Error;
943 /// # #[cfg(target_endian = "little")]
944 /// # {
945 /// use data_streams::GenericDataSource;
946 ///
947 /// let mut buf: &[u8] = &[0x12, 0x34, 0x56, 0x78];
948 /// let int: u32 = buf.read_data()?;
949 /// assert_eq!(int, 0x78563412);
950 /// # }
951 /// # Ok::<_, Error>(())
952 /// ```
953 fn read_data<T: Pod>(&mut self) -> Result<T> {
954 let mut value = T::zeroed();
955 self.read_exact_bytes(bytes_of_mut(&mut value))?;
956 Ok(value)
957 }
958
959 /// Reads multiple values of generic type `T` supporting an arbitrary bit pattern,
960 /// returning the read values.
961 ///
962 /// # Errors
963 ///
964 /// Returns any IO errors encountered.
965 ///
966 /// # Panics
967 ///
968 /// Panics if the [`DataSource::read_aligned_bytes`] implementation returns an unaligned slice.
969 ///
970 /// # Example
971 ///
972 /// ```
973 /// # use data_streams::Error;
974 /// # #[cfg(target_endian = "little")]
975 /// # {
976 /// use data_streams::GenericDataSource;
977 ///
978 /// let mut input: &[u8] = &[0x12, 0x34, 0x56, 0x78, 0xFF];
979 /// let buf: &mut [u16] = &mut [0; 3];
980 /// assert_eq!(input.read_data_slice(buf)?, [0x3412, 0x7856]);
981 /// # }
982 /// # Ok::<_, Error>(())
983 /// ```
984 fn read_data_slice<'a, T: Pod>(&mut self, buf: &'a mut [T]) -> Result<&'a [T]> {
985 let bytes = self.read_aligned_bytes(cast_slice_mut(buf), size_of::<T>())?;
986 assert_eq!(bytes.len() % size_of::<T>(), 0, "unaligned read implementation");
987 Ok(cast_slice(bytes))
988 }
989}
990
991impl<S: DataSource + ?Sized> GenericDataSource for S { }
992
993/// Accesses a source's internal buffer.
994pub trait BufferAccess: DataSource {
995 /// Returns the capacity of the internal buffer.
996 ///
997 /// # Example
998 ///
999 /// ```
1000 /// # #[cfg(feature = "alloc")]
1001 /// # {
1002 /// use data_streams::BufferAccess;
1003 ///
1004 /// let buf = Vec::<u8>::with_capacity(16);
1005 /// assert_eq!(buf.buffer_capacity(), 16);
1006 /// # }
1007 /// ```
1008 fn buffer_capacity(&self) -> usize;
1009 /// Returns the byte count contained in the internal buffer.
1010 ///
1011 /// # Example
1012 ///
1013 /// ```
1014 /// use data_streams::BufferAccess;
1015 ///
1016 /// let buf: &[u8] = &[0; 16];
1017 /// assert_eq!(buf.buffer_count(), 16);
1018 /// ```
1019 fn buffer_count(&self) -> usize { self.buffer().len() }
1020 /// Returns a slice over the filled portion of the internal buffer. This slice
1021 /// may not contain the whole buffer, for example if it can't be represented as
1022 /// just one slice.
1023 ///
1024 /// # Example
1025 ///
1026 /// ```
1027 /// use data_streams::BufferAccess;
1028 ///
1029 /// let buf: &[u8] = b"Hello!";
1030 /// assert_eq!(buf.buffer(), b"Hello!");
1031 /// ```
1032 fn buffer(&self) -> &[u8];
1033 /// Fills the internal buffer from the underlying stream, returning its contents
1034 /// if successful.
1035 ///
1036 /// # Errors
1037 ///
1038 /// Returns any IO errors encountered.
1039 ///
1040 /// # Example
1041 ///
1042 /// ```no_run
1043 /// # use data_streams::Error;
1044 /// # #[cfg(feature = "std")]
1045 /// # {
1046 /// use std::{fs::File, io::BufReader};
1047 /// use data_streams::BufferAccess;
1048 ///
1049 /// let mut source = BufReader::new(File::open("file.txt")?);
1050 /// source.fill_buffer()?;
1051 /// # }
1052 /// # Ok::<_, Error>(())
1053 /// ```
1054 fn fill_buffer(&mut self) -> Result<&[u8]>;
1055 /// Clears the internal buffer.
1056 ///
1057 /// # Example
1058 ///
1059 /// ```no_run
1060 /// # use data_streams::Error;
1061 /// # #[cfg(feature = "std")]
1062 /// # {
1063 /// use std::{fs::File, io::BufReader};
1064 /// use data_streams::BufferAccess;
1065 ///
1066 /// let mut source = BufReader::new(File::open("file.txt")?);
1067 /// source.fill_buffer()?;
1068 ///
1069 /// source.clear_buffer();
1070 /// assert_eq!(source.buffer_count(), 0);
1071 /// # }
1072 /// # Ok::<_, Error>(())
1073 /// ```
1074 fn clear_buffer(&mut self) {
1075 self.drain_buffer(self.buffer_count());
1076 }
1077 /// Consumes `count` bytes from the internal buffer. The `count` must be `<=`
1078 /// the length of the slice returned by either [`buffer`](Self::buffer) or
1079 /// [`fill_buffer`](Self::fill_buffer)
1080 ///
1081 /// # Panics
1082 ///
1083 /// This method panics if `count` exceeds the buffer length.
1084 ///
1085 /// # Example
1086 ///
1087 /// ```no_run
1088 /// # use data_streams::Error;
1089 /// # #[cfg(feature = "std")]
1090 /// # {
1091 /// use std::{fs::File, io::BufReader};
1092 /// use data_streams::BufferAccess;
1093 ///
1094 /// let mut source = BufReader::new(File::open("file.txt")?);
1095 /// source.fill_buffer()?;
1096 ///
1097 /// source.drain_buffer(512);
1098 /// # }
1099 /// # Ok::<_, Error>(())
1100 /// ```
1101 fn drain_buffer(&mut self, count: usize);
1102 /// Bypasses the internal buffer by returning the underlying source, or `self`
1103 /// if this behavior is not supported. Note that not fully draining the buffer
1104 /// before bypassing it will cause data loss.
1105 fn bypass_buffer(&mut self) -> &mut impl DataSource where Self: Sized {
1106 self.clear_buffer();
1107 self
1108 }
1109}
1110
1111#[cfg(feature = "unstable_specialization")]
1112impl<T: BufferAccess + ?Sized> DataSource for T {
1113 default fn available(&self) -> usize {
1114 self.buffer_count()
1115 }
1116
1117 default fn request(&mut self, count: usize) -> Result<bool> {
1118 default_request(self, count)
1119 }
1120
1121 default fn skip(&mut self, count: usize) -> Result<usize> {
1122 Ok(default_skip(self, count))
1123 }
1124
1125 default fn read_bytes<'a>(&mut self, buf: &'a mut [u8]) -> Result<&'a [u8]> {
1126 buf_read_bytes(
1127 self,
1128 buf,
1129 <[u8]>::is_empty,
1130 |mut source, buf|
1131 source.read_bytes(buf).map(<[u8]>::len)
1132 )
1133 }
1134
1135 default fn read_exact_bytes<'a>(&mut self, buf: &'a mut [u8]) -> Result<&'a [u8]> {
1136 buf_read_exact_bytes(self, buf)
1137 }
1138
1139 /// Reads bytes into a slice in multiples of `alignment`, returning the bytes
1140 /// read. This method is greedy; it consumes as many bytes as it can, until
1141 /// `buf` is filled or less than `alignment` bytes could be read.
1142 ///
1143 /// If the alignment is zero or `buf`'s length is less than the alignment, the returned slice is
1144 /// empty.
1145 ///
1146 /// # Errors
1147 ///
1148 /// Returns any IO errors encountered.
1149 ///
1150 /// [`Error::InsufficientBuffer`] is returned without reading if the buffer [capacity] is not
1151 /// large enough to hold at least one `alignment` width.
1152 ///
1153 /// [capacity]: Self::buffer_capacity
1154 default fn read_aligned_bytes<'a>(&mut self, buf: &'a mut [u8], alignment: usize) -> Result<&'a [u8]> {
1155 if alignment == 0 { return Ok(&[]) }
1156 if self.buffer_capacity() < alignment {
1157 let spare_capacity = self.buffer_capacity() - self.buffer_count();
1158 return Err(Error::InsufficientBuffer {
1159 spare_capacity,
1160 required_count: alignment
1161 })
1162 }
1163
1164 let len = buf.len() / alignment * alignment;
1165 buf_read_bytes(
1166 self,
1167 &mut buf[..len],
1168 |buf| buf.len() < alignment,
1169 |mut source, buf|
1170 source.read_aligned_bytes(buf, alignment).map(<[u8]>::len)
1171 )
1172 }
1173
1174 #[cfg(feature = "utf8")]
1175 default fn read_utf8<'a>(&mut self, buf: &'a mut [u8]) -> Result<&'a str> {
1176 let mut valid_len = 0;
1177 let slice = buf_read_bytes(
1178 self,
1179 buf,
1180 <[u8]>::is_empty,
1181 |mut source, buf|
1182 match source.read_utf8(buf) {
1183 Ok(str) => {
1184 let len = str.len();
1185 valid_len += len;
1186 Ok(len)
1187 }
1188 Err(Error::Utf8(error)) =>
1189 Err(error.with_offset(valid_len).into()),
1190 Err(error) => Err(error)
1191 }
1192 )?;
1193
1194 // Safety: valid_len bytes have been validated as UTF-8.
1195 Ok(unsafe { core::str::from_utf8_unchecked(slice) })
1196 }
1197
1198 #[cfg(feature = "utf8")]
1199 default fn read_utf8_codepoint(&mut self, buf: &mut [u8; 4]) -> Result<char> {
1200 let str = match self.buffer() {
1201 &[first_byte, ..] => {
1202 let char_width = utf8_char_width(first_byte);
1203 self.read_utf8(&mut buf[..char_width])?
1204 },
1205 [] => default_read_utf8_codepoint(self, buf)?
1206 };
1207
1208 Ok(str.parse().expect("bytes read by `read_utf8` must be valid UTF-8 codepoints"))
1209 }
1210
1211 #[cfg(feature = "unstable_ascii_char")]
1212 default fn read_ascii<'a>(&mut self, buf: &'a mut [u8]) -> Result<&'a [ascii::Char]> {
1213 default_read_ascii(self, buf)
1214 }
1215}
1216
1217#[cfg(all(feature = "alloc", feature = "unstable_specialization"))]
1218impl<T: BufferAccess> VecSource for T {
1219 default fn read_to_end<'a>(&mut self, buf: &'a mut alloc::vec::Vec<u8>) -> Result<&'a [u8]> {
1220 impls::buf_read_to_end(self, buf)
1221 }
1222
1223 #[cfg(feature = "utf8")]
1224 default fn read_utf8_to_end<'a>(&mut self, buf: &'a mut alloc::string::String) -> Result<&'a str> {
1225 impls::buf_read_utf8_to_end(self, buf)
1226 }
1227}
1228
1229/// Returns the maximum multiple of `factor` less than or equal to `value`.
1230pub(crate) const fn max_multiple_of(value: usize, factor: usize) -> usize {
1231 // For powers of 2, this optimizes to a simple AND of the negative factor.
1232 value / factor * factor
1233}
1234
1235#[allow(dead_code)]
1236pub(crate) fn default_request(source: &mut (impl BufferAccess + ?Sized), count: usize) -> Result<bool> {
1237 if source.available() < count {
1238 let buf_len = source.buffer_count();
1239 let spare_capacity = source.buffer_capacity() - buf_len;
1240 if source.buffer_capacity() > 0 && count < spare_capacity {
1241 Ok(source.fill_buffer()?.len() >= count)
1242 } else {
1243 Err(Error::InsufficientBuffer {
1244 spare_capacity,
1245 required_count: count - buf_len,
1246 })
1247 }
1248 } else {
1249 Ok(true)
1250 }
1251}
1252
1253// Todo: after consuming, loop fill_buf and consume.
1254#[allow(dead_code)]
1255pub(crate) fn default_skip(source: &mut (impl BufferAccess + ?Sized), mut count: usize) -> usize {
1256 let avail = source.available();
1257 count = count.min(avail);
1258 source.drain_buffer(count);
1259 // Guard against faulty implementations by verifying that the buffered
1260 // bytes were removed.
1261 assert_eq!(
1262 source.available(),
1263 avail.saturating_sub(count),
1264 "`drain_buffer` must remove buffered bytes"
1265 );
1266 avail
1267}
1268
1269pub(crate) fn default_read_array<const N: usize>(source: &mut (impl DataSource + ?Sized)) -> Result<[u8; N]> {
1270 let mut array = [0; N];
1271 source.read_exact_bytes(&mut array)?;
1272 Ok(array)
1273}
1274
1275fn try_read_exact_contiguous<'a>(source: &mut (impl DataSource + ?Sized), buf: &'a mut [u8]) -> Result<&'a [u8]> {
1276 let len = buf.len();
1277 let bytes = source.read_bytes(buf)?;
1278 assert_eq!(
1279 bytes.len(),
1280 len,
1281 "read_bytes should be greedy; at least {available} bytes were available \
1282 in the buffer, but only {read_len} bytes of the required {len} were read",
1283 available = source.available(),
1284 read_len = bytes.len()
1285 );
1286 Ok(bytes)
1287}
1288
1289#[allow(clippy::panic, reason = "can't use assert here")]
1290fn try_read_exact_discontiguous<'a>(
1291 source: &mut (impl DataSource + ?Sized),
1292 buf: &'a mut [u8],
1293 remaining: usize
1294) -> Result<&'a [u8]> {
1295 let filled = buf.len() - remaining;
1296 let read_count = source.read_bytes(&mut buf[filled..])?.len();
1297 if read_count < remaining {
1298 if source.available() < remaining {
1299 // Buffer was exhausted, meaning the stream ended prematurely
1300 Err(Error::End { required_count: buf.len() })
1301 } else {
1302 // read_bytes wasn't greedy, there were enough bytes in the buffer >:(
1303 panic!("read_bytes should have read {remaining} buffered bytes")
1304 }
1305 } else {
1306 // The whole slice has been confirmed to be filled.
1307 Ok(buf)
1308 }
1309}
1310
1311fn default_read_exact_bytes<'a>(source: &mut (impl DataSource + ?Sized), buf: &'a mut [u8]) -> Result<&'a [u8]> {
1312 let len = buf.len();
1313 match source.require(len) {
1314 Ok(()) => try_read_exact_contiguous(source, buf),
1315 Err(Error::InsufficientBuffer { .. }) => {
1316 // The buffer is not large enough to read the slice contiguously, and
1317 // we have no access to the buffer to drain it. So just try reading and
1318 // check if all bytes were read.
1319 let remaining = buf.len();
1320 try_read_exact_discontiguous(source, buf, remaining)
1321 }
1322 Err(error) => Err(error)
1323 }
1324}
1325
1326fn default_read_aligned_bytes<'a>(source: &mut (impl DataSource + ?Sized), buf: &'a mut [u8], alignment: usize) -> Result<&'a [u8]> {
1327 if alignment == 0 {
1328 return Ok(&[])
1329 }
1330
1331 let len = max_multiple_of(buf.len(), alignment);
1332 let mut slice = &mut buf[..len];
1333 let mut count = 0;
1334 while !slice.is_empty() && source.request(alignment)? {
1335 let avail = slice.len().min(max_multiple_of(source.available(), alignment));
1336 source.read_exact_bytes(&mut slice[..avail])?;
1337 count += avail;
1338 slice = &mut slice[avail..];
1339 }
1340
1341 Ok(&buf[..count])
1342}
1343
1344#[cfg(feature = "unstable_specialization")]
1345fn buf_read_exact_bytes<'a>(source: &mut (impl BufferAccess + ?Sized), buf: &'a mut [u8]) -> Result<&'a [u8]> {
1346 let len = buf.len();
1347 match source.require(len) {
1348 Ok(()) => try_read_exact_contiguous(source, buf),
1349 Err(Error::InsufficientBuffer { .. }) => {
1350 // We're doing a large read. Drain the internal buffer, then try reading.
1351 // Most default implementations of read_bytes optimize for this case by
1352 // skipping the buffer.
1353
1354 let mut slice = &mut *buf;
1355 let mut s_buf = source.buffer();
1356 while !slice.is_empty() && !s_buf.is_empty() {
1357 let len = s_buf.read_bytes(slice)?.len();
1358 slice = &mut slice[len..];
1359 source.drain_buffer(len);
1360 s_buf = source.buffer();
1361 }
1362
1363 let remaining = slice.len();
1364 try_read_exact_discontiguous(source, buf, remaining)
1365 }
1366 Err(error) => Err(error)
1367 }
1368}
1369
1370#[cfg(feature = "unstable_specialization")]
1371fn buf_read_bytes<'a>(
1372 source: &mut (impl BufferAccess + ?Sized),
1373 buf: &'a mut [u8],
1374 mut is_empty: impl FnMut(&[u8]) -> bool,
1375 mut slice_read_bytes: impl FnMut(&[u8], &mut [u8]) -> Result<usize>,
1376) -> Result<&'a [u8]> {
1377 let mut slice = &mut *buf;
1378 while !is_empty(slice) {
1379 let buf = match source.request(slice.len()) {
1380 Ok(_) => source.buffer(),
1381 Err(Error::InsufficientBuffer { .. }) => source.fill_buffer()?,
1382 Err(error) => return Err(error)
1383 };
1384 if is_empty(buf) {
1385 break
1386 }
1387
1388 let count = slice_read_bytes(buf, slice)?;
1389 source.drain_buffer(count);
1390 slice = &mut slice[count..];
1391 }
1392
1393 let unfilled = slice.len();
1394 let filled = buf.len() - unfilled;
1395 Ok(&buf[..filled])
1396}
1397
1398#[cfg(all(feature = "alloc", feature = "utf8"))]
1399#[allow(dead_code, clippy::multiple_unsafe_ops_per_block)]
1400pub(crate) fn default_read_utf8<'a>(
1401 source: &mut (impl DataSource + ?Sized),
1402 count: usize,
1403 buf: &'a mut alloc::string::String
1404) -> Result<&'a str> {
1405 buf.reserve(count);
1406 // Safety: this function only modifies the string's bytes if the new bytes are found to be
1407 // valid UTF-8.
1408 unsafe {
1409 append_utf8(buf, |b| {
1410 let len = b.len();
1411 b.set_len(len + count);
1412 source.read_bytes(&mut b[len..])
1413 .map(<[u8]>::len)
1414 })
1415 }
1416}
1417
1418#[cfg(feature = "utf8")]
1419fn default_read_utf8_codepoint<'a>(source: &mut (impl DataSource + ?Sized), buf: &'a mut [u8; 4]) -> Result<&'a str> {
1420 let (first_byte, remaining) = buf.split_at_mut(1);
1421 source.read_exact_bytes(first_byte)?;
1422 let char_width = utf8_char_width(first_byte[0]);
1423 source.read_exact_bytes(&mut remaining[..char_width - 1])?;
1424 Ok(from_utf8(&buf[..char_width])?)
1425}
1426
1427#[cfg(feature = "unstable_ascii_char")]
1428fn default_read_ascii<'a>(source: &mut (impl DataSource + ?Sized), buf: &'a mut [u8]) -> Result<&'a [ascii::Char]> {
1429 let bytes = source.read_bytes(buf)?;
1430 let idx = count_ascii(bytes);
1431 if idx == bytes.len() {
1432 // Safety: all bytes have been checked as valid ASCII.
1433 Ok(unsafe { bytes.as_ascii_unchecked() })
1434 } else {
1435 Err(Error::invalid_ascii(bytes[idx], idx, bytes.len()))
1436 }
1437}
1438
1439#[cfg(feature = "unstable_ascii_char")]
1440pub(crate) fn count_ascii(slice: &[u8]) -> usize {
1441 if slice.is_ascii() {
1442 slice.len()
1443 } else {
1444 // Safety: is_ascii indicates there is a non-ASCII character somewhere.
1445 unsafe { slice.iter().rposition(|b| !b.is_ascii()).unwrap_unchecked() }
1446 }
1447}
1448
1449#[cfg(all(feature = "alloc", feature = "utf8"))]
1450#[allow(dead_code)]
1451pub(crate) unsafe fn append_utf8<R>(buf: &mut alloc::string::String, read: R) -> Result<&str>
1452where
1453 R: FnOnce(&mut alloc::vec::Vec<u8>) -> Result<usize> {
1454 use simdutf8::compat::from_utf8;
1455
1456 // A drop guard which ensures the string is truncated to valid UTF-8 when out
1457 // of scope. Starts by truncating to its original length, only allowing the
1458 // string to grow after the new bytes are checked to be valid UTF-8.
1459 struct Guard<'a> {
1460 len: usize,
1461 buf: &'a mut alloc::vec::Vec<u8>
1462 }
1463
1464 impl Drop for Guard<'_> {
1465 fn drop(&mut self) {
1466 // Safety: exactly `len` bytes have been written.
1467 unsafe {
1468 self.buf.set_len(self.len);
1469 }
1470 }
1471 }
1472
1473 let start;
1474 {
1475 let mut guard = Guard { len: buf.len(), buf: buf.as_mut_vec() };
1476 let count = read(guard.buf)?;
1477 from_utf8(&guard.buf[guard.len..][..count])?;
1478 start = guard.len;
1479 guard.len += count;
1480 }
1481 Ok(&buf[start..])
1482}
1483
1484#[cfg(all(
1485 test,
1486 feature = "std",
1487 feature = "alloc",
1488 feature = "unstable_specialization"
1489))]
1490mod read_exact_test {
1491 use std::assert_matches::assert_matches;
1492 use proptest::prelude::*;
1493 use alloc::vec::from_elem;
1494 use std::iter::repeat;
1495 use proptest::collection::vec;
1496 use crate::{BufferAccess, DataSource, Result};
1497
1498 struct FakeBufSource {
1499 source: Vec<u8>,
1500 buffer: Vec<u8>
1501 }
1502
1503 impl BufferAccess for FakeBufSource {
1504 fn buffer_capacity(&self) -> usize {
1505 self.buffer.capacity()
1506 }
1507
1508 fn buffer(&self) -> &[u8] {
1509 &self.buffer
1510 }
1511
1512 fn fill_buffer(&mut self) -> Result<&[u8]> {
1513 let Self { source, buffer } = self;
1514 let len = buffer.len();
1515 buffer.extend(repeat(0).take(buffer.capacity() - len));
1516 let source_slice = &mut &source[..];
1517 let consumed = source_slice.read_bytes(&mut buffer[len..])?.len();
1518 source.drain_buffer(consumed);
1519 buffer.truncate(consumed + len);
1520 Ok(buffer)
1521 }
1522
1523 fn clear_buffer(&mut self) {
1524 self.buffer.clear();
1525 }
1526
1527 fn drain_buffer(&mut self, count: usize) {
1528 self.buffer.drain_buffer(count);
1529 }
1530 }
1531
1532 proptest! {
1533 #[test]
1534 fn read_exact_end_of_stream(source in vec(any::<u8>(), 1..=256)) {
1535 let mut buf = from_elem(0, source.len() + 1);
1536 assert_matches!(
1537 super::default_read_exact_bytes(&mut &*source, &mut buf),
1538 Err(super::Error::End { .. })
1539 );
1540 }
1541 }
1542
1543 proptest! {
1544 #[test]
1545 fn buf_read_exact_end_of_stream(source in vec(any::<u8>(), 1..=256)) {
1546 let mut buf = from_elem(0, source.len() + 1);
1547 assert_matches!(
1548 super::buf_read_exact_bytes(&mut &*source, &mut buf),
1549 Err(super::Error::End { .. })
1550 );
1551 }
1552 }
1553
1554 proptest! {
1555 #[test]
1556 fn read_exact_insufficient_buffer(source in vec(any::<u8>(), 2..=256)) {
1557 let source_len = source.len();
1558 let buffer = Vec::with_capacity(source_len - 1);
1559 let mut source = FakeBufSource { source, buffer };
1560 let mut target = from_elem(0, source_len);
1561 source.read_exact_bytes(&mut target).map(<[u8]>::len).unwrap();
1562 }
1563 }
1564
1565 proptest! {
1566 #[test]
1567 fn read_exact_buffered(source in vec(any::<u8>(), 1..=256)) {
1568 let source_len = source.len();
1569 let buffer = Vec::with_capacity(source_len + 1);
1570 let mut source = FakeBufSource { source, buffer };
1571 let mut target = from_elem(0, source_len);
1572 source.read_exact_bytes(&mut target).map(<[u8]>::len).unwrap();
1573 }
1574 }
1575}
1576
1577#[cfg(all(
1578 test,
1579 feature = "std",
1580 feature = "alloc",
1581))]
1582mod read_aligned_test {
1583 use proptest::arbitrary::any;
1584 use proptest::collection::vec;
1585 use proptest::{prop_assert_eq, prop_assume, proptest};
1586 use crate::DataSource;
1587
1588 proptest! {
1589 #[test]
1590 fn read_aligned(source in vec(any::<u8>(), 16..=256), alignment in 1usize..=16) {
1591 let buf = &mut [0; 256][..source.len()];
1592 let bytes = (&source[..]).read_aligned_bytes(buf, alignment).unwrap();
1593 prop_assert_eq!(bytes.len() % alignment, 0);
1594 }
1595 }
1596
1597 proptest! {
1598 #[test]
1599 fn read_aligned_truncated(buf_size in 0usize..=15, alignment in 1usize..=16) {
1600 prop_assume!(buf_size < alignment);
1601 let buf = &mut [0; 15][..buf_size];
1602 let bytes = (&[0; 16][..]).read_aligned_bytes(buf, alignment).unwrap();
1603 prop_assert_eq!(bytes.len(), 0);
1604 }
1605 }
1606}