Skip to main content

qubit_io/ext/
read_ext.rs

1/*******************************************************************************
2 *
3 *    Copyright (c) 2026 Haixing Hu.
4 *
5 *    SPDX-License-Identifier: Apache-2.0
6 *
7 *    Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10use std::io::{
11    Error,
12    ErrorKind,
13    Read,
14    Result,
15    Write,
16    copy as copy_all,
17};
18use std::string::FromUtf8Error;
19
20use crate::Streams;
21
22/// Default stack buffer size used by discard operations.
23const DISCARD_BUFFER_SIZE: usize = 8 * 1024;
24
25/// Default stack buffer size used by bounded read operations.
26const READ_TO_END_BUFFER_SIZE: usize = 8 * 1024;
27
28/// Extension methods for [`Read`] values.
29///
30/// `ReadExt` fills small semantic gaps in the standard [`Read`] trait while
31/// keeping the same blocking and error model. The methods are implemented for
32/// every type that implements [`Read`], including `dyn Read` trait objects.
33///
34/// # Examples
35/// ```
36/// use qubit_io::ReadExt;
37/// use std::io::Cursor;
38///
39/// let mut input = Cursor::new(b"abcdef".to_vec());
40/// let header = input.read_exact_array::<2>()?;
41/// let payload = input.read_exact_vec_limited(4, 16)?;
42///
43/// assert_eq!(*b"ab", header);
44/// assert_eq!(b"cdef", payload.as_slice());
45/// # Ok::<(), std::io::Error>(())
46/// ```
47pub trait ReadExt: Read {
48    /// Reads bytes until `buffer` is full or EOF is reached.
49    ///
50    /// This method differs from [`Read::read_exact`] by treating EOF as a
51    /// successful partial result. It keeps retrying short reads until the
52    /// caller-provided buffer is full, EOF is reached, or a non-interrupted
53    /// I/O error occurs.
54    ///
55    /// # Parameters
56    /// - `buffer`: Destination buffer to fill.
57    ///
58    /// # Returns
59    /// The number of bytes written into `buffer`. The value is in
60    /// `0..=buffer.len()`.
61    ///
62    /// # Errors
63    /// Returns the first non-[`ErrorKind::Interrupted`] error reported by the
64    /// underlying reader. Interrupted reads are retried.
65    fn read_exact_or_eof(&mut self, buffer: &mut [u8]) -> Result<usize>;
66
67    /// Reads exactly `N` bytes into a stack-allocated array.
68    ///
69    /// This method uses [`Read::read_exact`] and therefore requires the reader
70    /// to provide exactly `N` bytes before EOF.
71    ///
72    /// # Returns
73    /// An array containing exactly `N` bytes read from this reader.
74    ///
75    /// # Errors
76    /// Returns the error reported by [`Read::read_exact`], including
77    /// [`ErrorKind::UnexpectedEof`] when EOF is reached before the array is
78    /// full.
79    fn read_exact_array<const N: usize>(&mut self) -> Result<[u8; N]>;
80
81    /// Reads exactly `len` bytes into a new vector after checking a limit.
82    ///
83    /// If `len` is greater than `max_len`, this method returns
84    /// [`ErrorKind::InvalidData`] before reading any bytes.
85    ///
86    /// # Parameters
87    /// - `len`: Exact number of bytes to read.
88    /// - `max_len`: Maximum accepted exact read length.
89    ///
90    /// # Returns
91    /// A vector containing exactly `len` bytes.
92    ///
93    /// # Errors
94    /// Returns [`ErrorKind::InvalidData`] when `len > max_len`. Returns the
95    /// error reported by [`Read::read_exact`], including
96    /// [`ErrorKind::UnexpectedEof`] when EOF is reached before `len` bytes are
97    /// read.
98    fn read_exact_vec_limited(&mut self, len: usize, max_len: usize) -> Result<Vec<u8>>;
99
100    /// Reads exactly `len` bytes and appends them to `output`.
101    ///
102    /// If `len` is greater than `max_len`, this method returns
103    /// [`ErrorKind::InvalidData`] before reading any bytes and leaves `output`
104    /// unchanged. On a read error, `output` is truncated back to its original
105    /// length. The underlying reader may still have consumed bytes before the
106    /// error because [`Read`] does not provide rollback.
107    ///
108    /// # Parameters
109    /// - `output`: Destination vector to append to.
110    /// - `len`: Exact number of bytes to read.
111    /// - `max_len`: Maximum accepted exact read length.
112    ///
113    /// # Errors
114    /// Returns [`ErrorKind::InvalidData`] when `len > max_len`. Returns the
115    /// error reported by [`Read::read_exact`], including
116    /// [`ErrorKind::UnexpectedEof`] when EOF is reached before `len` bytes are
117    /// read.
118    fn read_exact_vec_limited_into(
119        &mut self,
120        output: &mut Vec<u8>,
121        len: usize,
122        max_len: usize,
123    ) -> Result<()>;
124
125    /// Discards up to `bytes` bytes from this reader.
126    ///
127    /// The method repeatedly reads into an internal stack buffer until the
128    /// requested number of bytes has been consumed or EOF is reached. It does
129    /// not allocate and does not require seeking support.
130    ///
131    /// # Parameters
132    /// - `bytes`: Maximum number of bytes to discard.
133    ///
134    /// # Returns
135    /// The number of bytes actually discarded. The value may be smaller than
136    /// `bytes` when EOF is reached first.
137    ///
138    /// # Errors
139    /// Returns the first non-[`ErrorKind::Interrupted`] error reported by the
140    /// underlying reader. Interrupted reads are retried.
141    fn discard_exact_or_eof(&mut self, bytes: u64) -> Result<u64>;
142
143    /// Copies all remaining bytes from this reader into `writer`.
144    ///
145    /// This method is a method-style wrapper around [`std::io::copy`]. It
146    /// copies from the current reader position until EOF and does not close or
147    /// flush either stream.
148    ///
149    /// # Parameters
150    /// - `writer`: Destination writer.
151    ///
152    /// # Returns
153    /// The number of bytes copied.
154    ///
155    /// # Errors
156    /// Returns the first read or write error reported by the underlying
157    /// streams, using the same error behavior as [`std::io::copy`].
158    fn copy_to(&mut self, writer: &mut dyn Write) -> Result<u64>;
159
160    /// Copies at most `max_bytes` bytes from this reader into `writer`.
161    ///
162    /// This method stops successfully when either EOF is reached or
163    /// `max_bytes` bytes have been copied. It does not close or flush either
164    /// stream.
165    ///
166    /// # Parameters
167    /// - `writer`: Destination writer.
168    /// - `max_bytes`: Maximum number of bytes to copy.
169    ///
170    /// # Returns
171    /// The number of bytes copied.
172    ///
173    /// # Errors
174    /// Returns the first non-[`ErrorKind::Interrupted`] read error or write
175    /// error reported by the underlying streams. Interrupted reads are retried.
176    fn copy_to_at_most(&mut self, writer: &mut dyn Write, max_bytes: u64) -> Result<u64>;
177
178    /// Copies the remaining input if its total length is at most `max_bytes`.
179    ///
180    /// This method copies from the current reader position until EOF. If EOF is
181    /// not reached within `max_bytes` bytes, it returns
182    /// [`ErrorKind::InvalidData`]. Detecting oversized input consumes one
183    /// excess byte from this reader; that excess byte is not written to
184    /// `writer`.
185    ///
186    /// # Parameters
187    /// - `writer`: Destination writer.
188    /// - `max_bytes`: Maximum accepted number of bytes in the remaining input.
189    ///
190    /// # Returns
191    /// The number of bytes copied when EOF is reached within the limit.
192    ///
193    /// # Errors
194    /// Returns [`ErrorKind::InvalidData`] when the remaining input is longer
195    /// than `max_bytes`. Returns the first non-[`ErrorKind::Interrupted`] read
196    /// error or write error reported by the underlying streams. Interrupted
197    /// reads are retried.
198    fn copy_to_end_limited(&mut self, writer: &mut dyn Write, max_bytes: u64) -> Result<u64>;
199
200    /// Reads the remaining bytes into a vector with a maximum accepted length.
201    ///
202    /// This method consumes bytes from the current reader position until EOF is
203    /// reached. If the stream contains more than `max_len` bytes, it returns
204    /// [`ErrorKind::InvalidData`] after detecting the first excess byte.
205    ///
206    /// # Parameters
207    /// - `max_len`: Maximum number of bytes accepted in the returned vector.
208    ///
209    /// # Returns
210    /// A vector containing all remaining bytes when the stream length is within
211    /// the limit.
212    ///
213    /// # Errors
214    /// Returns [`ErrorKind::InvalidData`] when the stream contains more than
215    /// `max_len` bytes. Returns the first non-[`ErrorKind::Interrupted`] error
216    /// reported by the underlying reader; interrupted reads are retried.
217    fn read_to_end_limited(&mut self, max_len: usize) -> Result<Vec<u8>>;
218
219    /// Reads the remaining bytes into `output` with a maximum accepted length.
220    ///
221    /// This method appends at most `max_len` bytes from the current reader
222    /// position to `output`. If the stream contains more than `max_len` bytes,
223    /// it returns [`ErrorKind::InvalidData`] after detecting the first excess
224    /// byte. In that case, the accepted prefix may already have been appended
225    /// to `output`, and one excess byte may have been consumed from the reader.
226    ///
227    /// # Parameters
228    /// - `output`: Destination vector to append to.
229    /// - `max_len`: Maximum number of bytes accepted from this reader.
230    ///
231    /// # Returns
232    /// The number of bytes appended to `output`.
233    ///
234    /// # Errors
235    /// Returns [`ErrorKind::InvalidData`] when the stream contains more than
236    /// `max_len` bytes. Returns the first non-[`ErrorKind::Interrupted`] error
237    /// reported by the underlying reader; interrupted reads are retried.
238    fn read_to_end_limited_into(&mut self, output: &mut Vec<u8>, max_len: usize) -> Result<usize>;
239
240    /// Reads the remaining bytes as UTF-8 text with a maximum accepted length.
241    ///
242    /// This method has the same size limit and read semantics as
243    /// [`ReadExt::read_to_end_limited`], then validates the collected bytes as
244    /// UTF-8.
245    ///
246    /// # Parameters
247    /// - `max_len`: Maximum number of bytes accepted before UTF-8 decoding.
248    ///
249    /// # Returns
250    /// The decoded UTF-8 string.
251    ///
252    /// # Errors
253    /// Returns [`ErrorKind::InvalidData`] when the stream contains more than
254    /// `max_len` bytes or when the collected bytes are not valid UTF-8. Returns
255    /// the first non-[`ErrorKind::Interrupted`] error reported by the
256    /// underlying reader; interrupted reads are retried.
257    fn read_to_string_limited(&mut self, max_len: usize) -> Result<String>;
258
259    /// Reads the remaining bytes as UTF-8 text and appends to `output`.
260    ///
261    /// This method accepts at most `max_len` bytes from the current reader
262    /// position, validates them as UTF-8, and appends the decoded text to
263    /// `output`. If the input is oversized or invalid UTF-8, `output` is left
264    /// unchanged. Oversized input may still consume up to `max_len + 1` bytes
265    /// from the reader while detecting the limit violation.
266    ///
267    /// # Parameters
268    /// - `output`: Destination string to append to.
269    /// - `max_len`: Maximum number of bytes accepted before UTF-8 decoding.
270    ///
271    /// # Returns
272    /// The number of bytes appended to `output`.
273    ///
274    /// # Errors
275    /// Returns [`ErrorKind::InvalidData`] when the stream contains more than
276    /// `max_len` bytes or when the collected bytes are not valid UTF-8. Returns
277    /// the first non-[`ErrorKind::Interrupted`] error reported by the
278    /// underlying reader; interrupted reads are retried.
279    fn read_to_string_limited_into(&mut self, output: &mut String, max_len: usize)
280    -> Result<usize>;
281}
282
283impl<T> ReadExt for T
284where
285    T: Read + ?Sized,
286{
287    #[inline]
288    fn read_exact_or_eof(&mut self, buffer: &mut [u8]) -> Result<usize> {
289        let mut reader = self;
290        read_exact_or_eof_impl(&mut reader, buffer)
291    }
292
293    #[inline]
294    fn read_exact_array<const N: usize>(&mut self) -> Result<[u8; N]> {
295        let mut reader = self;
296        read_exact_array_impl::<N>(&mut reader)
297    }
298
299    #[inline]
300    fn read_exact_vec_limited(&mut self, len: usize, max_len: usize) -> Result<Vec<u8>> {
301        let mut reader = self;
302        read_exact_vec_limited_impl(&mut reader, len, max_len)
303    }
304
305    #[inline]
306    fn read_exact_vec_limited_into(
307        &mut self,
308        output: &mut Vec<u8>,
309        len: usize,
310        max_len: usize,
311    ) -> Result<()> {
312        let mut reader = self;
313        read_exact_vec_limited_into_impl(&mut reader, output, len, max_len)
314    }
315
316    #[inline]
317    fn discard_exact_or_eof(&mut self, bytes: u64) -> Result<u64> {
318        let mut reader = self;
319        discard_exact_or_eof_impl(&mut reader, bytes)
320    }
321
322    #[inline]
323    fn copy_to(&mut self, writer: &mut dyn Write) -> Result<u64> {
324        let mut reader = self;
325        copy_to_impl(&mut reader, writer)
326    }
327
328    #[inline]
329    fn copy_to_at_most(&mut self, writer: &mut dyn Write, max_bytes: u64) -> Result<u64> {
330        let mut reader = self;
331        Streams::copy_at_most(&mut reader, writer, max_bytes)
332    }
333
334    #[inline]
335    fn copy_to_end_limited(&mut self, writer: &mut dyn Write, max_bytes: u64) -> Result<u64> {
336        let mut reader = self;
337        Streams::copy_to_end_limited(&mut reader, writer, max_bytes)
338    }
339
340    #[inline]
341    fn read_to_end_limited(&mut self, max_len: usize) -> Result<Vec<u8>> {
342        let mut reader = self;
343        read_to_end_limited_impl(&mut reader, max_len)
344    }
345
346    #[inline]
347    fn read_to_end_limited_into(&mut self, output: &mut Vec<u8>, max_len: usize) -> Result<usize> {
348        let mut reader = self;
349        read_to_end_limited_into_impl(&mut reader, output, max_len)
350    }
351
352    #[inline]
353    fn read_to_string_limited(&mut self, max_len: usize) -> Result<String> {
354        let mut reader = self;
355        read_to_string_limited_impl(&mut reader, max_len)
356    }
357
358    #[inline]
359    fn read_to_string_limited_into(
360        &mut self,
361        output: &mut String,
362        max_len: usize,
363    ) -> Result<usize> {
364        let mut reader = self;
365        read_to_string_limited_into_impl(&mut reader, output, max_len)
366    }
367}
368
369/// Reads from `reader` until `buffer` is full or EOF is reached.
370///
371/// # Parameters
372/// - `reader`: Source reader.
373/// - `buffer`: Destination buffer to fill.
374///
375/// # Returns
376/// The number of bytes written into `buffer`.
377///
378/// # Errors
379/// Returns the first non-interrupted read error reported by `reader`.
380fn read_exact_or_eof_impl(reader: &mut dyn Read, buffer: &mut [u8]) -> Result<usize> {
381    let mut total = 0;
382    while total < buffer.len() {
383        match reader.read(&mut buffer[total..]) {
384            Ok(0) => break,
385            Ok(count) => total += count,
386            Err(error) => {
387                if error.kind() == ErrorKind::Interrupted {
388                    continue;
389                }
390                return Err(error);
391            }
392        }
393    }
394    Ok(total)
395}
396
397/// Reads exactly `N` bytes from `reader` into an array.
398///
399/// # Parameters
400/// - `reader`: Source reader.
401///
402/// # Returns
403/// A stack-allocated array containing exactly `N` bytes.
404///
405/// # Errors
406/// Returns the error reported by [`Read::read_exact`].
407fn read_exact_array_impl<const N: usize>(reader: &mut dyn Read) -> Result<[u8; N]> {
408    let mut buffer = [0; N];
409    reader.read_exact(&mut buffer)?;
410    Ok(buffer)
411}
412
413/// Reads exactly `len` bytes from `reader` when `len` is within `max_len`.
414///
415/// # Parameters
416/// - `reader`: Source reader.
417/// - `len`: Exact number of bytes to read.
418/// - `max_len`: Maximum accepted exact read length.
419///
420/// # Returns
421/// A vector containing exactly `len` bytes.
422///
423/// # Errors
424/// Returns [`ErrorKind::InvalidData`] when `len > max_len`. Returns the error
425/// reported by [`Read::read_exact`] for read failures.
426fn read_exact_vec_limited_impl(
427    reader: &mut dyn Read,
428    len: usize,
429    max_len: usize,
430) -> Result<Vec<u8>> {
431    validate_exact_read_len(len, max_len)?;
432    let mut output = Vec::with_capacity(len);
433    read_exact_vec_limited_into_impl(reader, &mut output, len, max_len)?;
434    Ok(output)
435}
436
437/// Reads exactly `len` bytes from `reader` and appends them to `output`.
438///
439/// # Parameters
440/// - `reader`: Source reader.
441/// - `output`: Destination vector to append to.
442/// - `len`: Exact number of bytes to read.
443/// - `max_len`: Maximum accepted exact read length.
444///
445/// # Errors
446/// Returns [`ErrorKind::InvalidData`] when `len > max_len` before reading and
447/// leaves `output` unchanged. Returns the error reported by
448/// [`Read::read_exact`] for read failures and truncates `output` back to its
449/// original length.
450fn read_exact_vec_limited_into_impl(
451    reader: &mut dyn Read,
452    output: &mut Vec<u8>,
453    len: usize,
454    max_len: usize,
455) -> Result<()> {
456    validate_exact_read_len(len, max_len)?;
457    let original_len = output.len();
458    output.resize(original_len + len, 0);
459    match reader.read_exact(&mut output[original_len..]) {
460        Ok(()) => Ok(()),
461        Err(error) => {
462            output.truncate(original_len);
463            Err(error)
464        }
465    }
466}
467
468/// Validates that an exact read length is within the configured maximum.
469///
470/// # Parameters
471/// - `len`: Exact number of bytes requested by the caller.
472/// - `max_len`: Maximum accepted exact read length.
473///
474/// # Errors
475/// Returns [`ErrorKind::InvalidData`] when `len > max_len`.
476fn validate_exact_read_len(len: usize, max_len: usize) -> Result<()> {
477    if len > max_len {
478        return Err(Error::new(
479            ErrorKind::InvalidData,
480            format!("requested length {len} exceeds maximum length {max_len}"),
481        ));
482    }
483    Ok(())
484}
485
486/// Discards up to `bytes` bytes from `reader`.
487///
488/// # Parameters
489/// - `reader`: Source reader.
490/// - `bytes`: Maximum number of bytes to discard.
491///
492/// # Returns
493/// The number of bytes actually discarded.
494///
495/// # Errors
496/// Returns the first non-interrupted read error reported by `reader`.
497fn discard_exact_or_eof_impl(reader: &mut dyn Read, bytes: u64) -> Result<u64> {
498    let mut buffer = [0; DISCARD_BUFFER_SIZE];
499    let mut remaining = bytes;
500    let mut discarded = 0;
501    while remaining > 0 {
502        let requested = remaining.min(DISCARD_BUFFER_SIZE as u64) as usize;
503        match reader.read(&mut buffer[..requested]) {
504            Ok(0) => break,
505            Ok(count) => {
506                let count = count as u64;
507                remaining -= count;
508                discarded += count;
509            }
510            Err(error) => {
511                if error.kind() == ErrorKind::Interrupted {
512                    continue;
513                }
514                return Err(error);
515            }
516        }
517    }
518    Ok(discarded)
519}
520
521/// Reads all remaining bytes from `reader` when the result fits `max_len`.
522///
523/// # Parameters
524/// - `reader`: Source reader.
525/// - `max_len`: Maximum accepted result length.
526///
527/// # Returns
528/// A vector containing all remaining bytes.
529///
530/// # Errors
531/// Returns [`ErrorKind::InvalidData`] after detecting that the input contains
532/// more than `max_len` bytes. Returns the first non-interrupted read error
533/// reported by `reader`.
534fn read_to_end_limited_impl(reader: &mut dyn Read, max_len: usize) -> Result<Vec<u8>> {
535    let mut output = Vec::with_capacity(max_len.min(READ_TO_END_BUFFER_SIZE));
536    read_to_end_limited_into_impl(reader, &mut output, max_len)?;
537    Ok(output)
538}
539
540/// Reads all remaining bytes from `reader` into `output` when the input fits.
541///
542/// # Parameters
543/// - `reader`: Source reader.
544/// - `output`: Destination vector to append to.
545/// - `max_len`: Maximum accepted input length in bytes.
546///
547/// # Returns
548/// The number of bytes appended to `output`.
549///
550/// # Errors
551/// Returns [`ErrorKind::InvalidData`] after detecting that the input contains
552/// more than `max_len` bytes. Returns the first non-interrupted read error
553/// reported by `reader`.
554fn read_to_end_limited_into_impl(
555    reader: &mut dyn Read,
556    output: &mut Vec<u8>,
557    max_len: usize,
558) -> Result<usize> {
559    let mut buffer = [0; READ_TO_END_BUFFER_SIZE];
560    let mut appended = 0;
561    loop {
562        let remaining = max_len.saturating_sub(appended);
563        let requested = remaining.saturating_add(1).min(READ_TO_END_BUFFER_SIZE);
564        match reader.read(&mut buffer[..requested]) {
565            Ok(0) => return Ok(appended),
566            Ok(count) if count <= remaining => {
567                output.extend_from_slice(&buffer[..count]);
568                appended += count;
569            }
570            Ok(_) => {
571                if remaining > 0 {
572                    output.extend_from_slice(&buffer[..remaining]);
573                }
574                return Err(Error::new(
575                    ErrorKind::InvalidData,
576                    format!("input exceeds maximum length of {max_len} bytes"),
577                ));
578            }
579            Err(error) => {
580                if error.kind() == ErrorKind::Interrupted {
581                    continue;
582                }
583                return Err(error);
584            }
585        }
586    }
587}
588
589/// Reads all remaining bytes from `reader` as UTF-8 when the input fits `max_len`.
590///
591/// # Parameters
592/// - `reader`: Source reader.
593/// - `max_len`: Maximum accepted input length in bytes.
594///
595/// # Returns
596/// Decoded UTF-8 string.
597///
598/// # Errors
599/// Returns [`ErrorKind::InvalidData`] when the input is oversized or is not
600/// valid UTF-8. Returns the first non-interrupted read error reported by
601/// `reader`.
602fn read_to_string_limited_impl(reader: &mut dyn Read, max_len: usize) -> Result<String> {
603    let bytes = read_to_end_limited_impl(reader, max_len)?;
604    String::from_utf8(bytes).map_err(invalid_utf8_error)
605}
606
607/// Reads all remaining UTF-8 text from `reader` into `output`.
608///
609/// # Parameters
610/// - `reader`: Source reader.
611/// - `output`: Destination string to append to.
612/// - `max_len`: Maximum accepted input length in bytes.
613///
614/// # Returns
615/// The number of bytes appended to `output`.
616///
617/// # Errors
618/// Returns [`ErrorKind::InvalidData`] when the input is oversized or is not
619/// valid UTF-8. Returns the first non-interrupted read error reported by
620/// `reader`.
621fn read_to_string_limited_into_impl(
622    reader: &mut dyn Read,
623    output: &mut String,
624    max_len: usize,
625) -> Result<usize> {
626    let bytes = read_to_end_limited_impl(reader, max_len)?;
627    let text = String::from_utf8(bytes).map_err(invalid_utf8_error)?;
628    let count = text.len();
629    output.push_str(&text);
630    Ok(count)
631}
632
633/// Copies all remaining bytes from `reader` into `writer`.
634///
635/// # Parameters
636/// - `reader`: Source reader.
637/// - `writer`: Destination writer.
638///
639/// # Returns
640/// The number of bytes copied.
641///
642/// # Errors
643/// Returns the first read or write error reported by the underlying streams.
644fn copy_to_impl(reader: &mut dyn Read, writer: &mut dyn Write) -> Result<u64> {
645    copy_all(reader, writer)
646}
647
648/// Converts an invalid UTF-8 read result into an I/O error.
649///
650/// # Parameters
651/// - `error`: UTF-8 conversion error.
652///
653/// # Returns
654/// An [`ErrorKind::InvalidData`] error containing the UTF-8 error context.
655fn invalid_utf8_error(error: FromUtf8Error) -> Error {
656    Error::new(
657        ErrorKind::InvalidData,
658        format!("limited input is not valid UTF-8: {error}"),
659    )
660}