qubit_io/ext/read_ext.rs
1/*******************************************************************************
2 *
3 * Copyright (c) 2026 Haixing Hu.
4 *
5 * SPDX-License-Identifier: Apache-2.0
6 *
7 * Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10use std::io::{
11 Error,
12 ErrorKind,
13 Read,
14 Result,
15 Write,
16 copy as copy_all,
17};
18use std::string::FromUtf8Error;
19
20use crate::Streams;
21
22/// Default stack buffer size used by discard operations.
23const DISCARD_BUFFER_SIZE: usize = 8 * 1024;
24
25/// Default stack buffer size used by bounded read operations.
26const READ_TO_END_BUFFER_SIZE: usize = 8 * 1024;
27
28/// Extension methods for [`Read`] values.
29///
30/// `ReadExt` fills small semantic gaps in the standard [`Read`] trait while
31/// keeping the same blocking and error model. The methods are implemented for
32/// every type that implements [`Read`], including `dyn Read` trait objects.
33///
34/// # Examples
35/// ```
36/// use qubit_io::ReadExt;
37/// use std::io::Cursor;
38///
39/// let mut input = Cursor::new(b"abcdef".to_vec());
40/// let header = input.read_exact_array::<2>()?;
41/// let payload = input.read_exact_vec_limited(4, 16)?;
42///
43/// assert_eq!(*b"ab", header);
44/// assert_eq!(b"cdef", payload.as_slice());
45/// # Ok::<(), std::io::Error>(())
46/// ```
47pub trait ReadExt: Read {
48 /// Reads bytes until `buffer` is full or EOF is reached.
49 ///
50 /// This method differs from [`Read::read_exact`] by treating EOF as a
51 /// successful partial result. It keeps retrying short reads until the
52 /// caller-provided buffer is full, EOF is reached, or a non-interrupted
53 /// I/O error occurs.
54 ///
55 /// # Parameters
56 /// - `buffer`: Destination buffer to fill.
57 ///
58 /// # Returns
59 /// The number of bytes written into `buffer`. The value is in
60 /// `0..=buffer.len()`.
61 ///
62 /// # Errors
63 /// Returns the first non-[`ErrorKind::Interrupted`] error reported by the
64 /// underlying reader. Interrupted reads are retried.
65 fn read_exact_or_eof(&mut self, buffer: &mut [u8]) -> Result<usize>;
66
67 /// Reads exactly `N` bytes into a stack-allocated array.
68 ///
69 /// This method uses [`Read::read_exact`] and therefore requires the reader
70 /// to provide exactly `N` bytes before EOF.
71 ///
72 /// # Returns
73 /// An array containing exactly `N` bytes read from this reader.
74 ///
75 /// # Errors
76 /// Returns the error reported by [`Read::read_exact`], including
77 /// [`ErrorKind::UnexpectedEof`] when EOF is reached before the array is
78 /// full.
79 fn read_exact_array<const N: usize>(&mut self) -> Result<[u8; N]>;
80
81 /// Reads exactly `len` bytes into a new vector after checking a limit.
82 ///
83 /// If `len` is greater than `max_len`, this method returns
84 /// [`ErrorKind::InvalidData`] before reading any bytes.
85 ///
86 /// # Parameters
87 /// - `len`: Exact number of bytes to read.
88 /// - `max_len`: Maximum accepted exact read length.
89 ///
90 /// # Returns
91 /// A vector containing exactly `len` bytes.
92 ///
93 /// # Errors
94 /// Returns [`ErrorKind::InvalidData`] when `len > max_len`. Returns the
95 /// error reported by [`Read::read_exact`], including
96 /// [`ErrorKind::UnexpectedEof`] when EOF is reached before `len` bytes are
97 /// read.
98 fn read_exact_vec_limited(&mut self, len: usize, max_len: usize) -> Result<Vec<u8>>;
99
100 /// Reads exactly `len` bytes and appends them to `output`.
101 ///
102 /// If `len` is greater than `max_len`, this method returns
103 /// [`ErrorKind::InvalidData`] before reading any bytes and leaves `output`
104 /// unchanged. On a read error, `output` is truncated back to its original
105 /// length. The underlying reader may still have consumed bytes before the
106 /// error because [`Read`] does not provide rollback.
107 ///
108 /// # Parameters
109 /// - `output`: Destination vector to append to.
110 /// - `len`: Exact number of bytes to read.
111 /// - `max_len`: Maximum accepted exact read length.
112 ///
113 /// # Errors
114 /// Returns [`ErrorKind::InvalidData`] when `len > max_len`. Returns the
115 /// error reported by [`Read::read_exact`], including
116 /// [`ErrorKind::UnexpectedEof`] when EOF is reached before `len` bytes are
117 /// read.
118 fn read_exact_vec_limited_into(
119 &mut self,
120 output: &mut Vec<u8>,
121 len: usize,
122 max_len: usize,
123 ) -> Result<()>;
124
125 /// Discards up to `bytes` bytes from this reader.
126 ///
127 /// The method repeatedly reads into an internal stack buffer until the
128 /// requested number of bytes has been consumed or EOF is reached. It does
129 /// not allocate and does not require seeking support.
130 ///
131 /// # Parameters
132 /// - `bytes`: Maximum number of bytes to discard.
133 ///
134 /// # Returns
135 /// The number of bytes actually discarded. The value may be smaller than
136 /// `bytes` when EOF is reached first.
137 ///
138 /// # Errors
139 /// Returns the first non-[`ErrorKind::Interrupted`] error reported by the
140 /// underlying reader. Interrupted reads are retried.
141 fn discard_exact_or_eof(&mut self, bytes: u64) -> Result<u64>;
142
143 /// Copies all remaining bytes from this reader into `writer`.
144 ///
145 /// This method is a method-style wrapper around [`std::io::copy`]. It
146 /// copies from the current reader position until EOF and does not close or
147 /// flush either stream.
148 ///
149 /// # Parameters
150 /// - `writer`: Destination writer.
151 ///
152 /// # Returns
153 /// The number of bytes copied.
154 ///
155 /// # Errors
156 /// Returns the first read or write error reported by the underlying
157 /// streams, using the same error behavior as [`std::io::copy`].
158 fn copy_to(&mut self, writer: &mut dyn Write) -> Result<u64>;
159
160 /// Copies at most `max_bytes` bytes from this reader into `writer`.
161 ///
162 /// This method stops successfully when either EOF is reached or
163 /// `max_bytes` bytes have been copied. It does not close or flush either
164 /// stream.
165 ///
166 /// # Parameters
167 /// - `writer`: Destination writer.
168 /// - `max_bytes`: Maximum number of bytes to copy.
169 ///
170 /// # Returns
171 /// The number of bytes copied.
172 ///
173 /// # Errors
174 /// Returns the first non-[`ErrorKind::Interrupted`] read error or write
175 /// error reported by the underlying streams. Interrupted reads are retried.
176 fn copy_to_at_most(&mut self, writer: &mut dyn Write, max_bytes: u64) -> Result<u64>;
177
178 /// Copies the remaining input if its total length is at most `max_bytes`.
179 ///
180 /// This method copies from the current reader position until EOF. If EOF is
181 /// not reached within `max_bytes` bytes, it returns
182 /// [`ErrorKind::InvalidData`]. Detecting oversized input consumes one
183 /// excess byte from this reader; that excess byte is not written to
184 /// `writer`.
185 ///
186 /// # Parameters
187 /// - `writer`: Destination writer.
188 /// - `max_bytes`: Maximum accepted number of bytes in the remaining input.
189 ///
190 /// # Returns
191 /// The number of bytes copied when EOF is reached within the limit.
192 ///
193 /// # Errors
194 /// Returns [`ErrorKind::InvalidData`] when the remaining input is longer
195 /// than `max_bytes`. Returns the first non-[`ErrorKind::Interrupted`] read
196 /// error or write error reported by the underlying streams. Interrupted
197 /// reads are retried.
198 fn copy_to_end_limited(&mut self, writer: &mut dyn Write, max_bytes: u64) -> Result<u64>;
199
200 /// Reads the remaining bytes into a vector with a maximum accepted length.
201 ///
202 /// This method consumes bytes from the current reader position until EOF is
203 /// reached. If the stream contains more than `max_len` bytes, it returns
204 /// [`ErrorKind::InvalidData`] after detecting the first excess byte.
205 ///
206 /// # Parameters
207 /// - `max_len`: Maximum number of bytes accepted in the returned vector.
208 ///
209 /// # Returns
210 /// A vector containing all remaining bytes when the stream length is within
211 /// the limit.
212 ///
213 /// # Errors
214 /// Returns [`ErrorKind::InvalidData`] when the stream contains more than
215 /// `max_len` bytes. Returns the first non-[`ErrorKind::Interrupted`] error
216 /// reported by the underlying reader; interrupted reads are retried.
217 fn read_to_end_limited(&mut self, max_len: usize) -> Result<Vec<u8>>;
218
219 /// Reads the remaining bytes into `output` with a maximum accepted length.
220 ///
221 /// This method appends at most `max_len` bytes from the current reader
222 /// position to `output`. If the stream contains more than `max_len` bytes,
223 /// it returns [`ErrorKind::InvalidData`] after detecting the first excess
224 /// byte. In that case, the accepted prefix may already have been appended
225 /// to `output`, and one excess byte may have been consumed from the reader.
226 ///
227 /// # Parameters
228 /// - `output`: Destination vector to append to.
229 /// - `max_len`: Maximum number of bytes accepted from this reader.
230 ///
231 /// # Returns
232 /// The number of bytes appended to `output`.
233 ///
234 /// # Errors
235 /// Returns [`ErrorKind::InvalidData`] when the stream contains more than
236 /// `max_len` bytes. Returns the first non-[`ErrorKind::Interrupted`] error
237 /// reported by the underlying reader; interrupted reads are retried.
238 fn read_to_end_limited_into(&mut self, output: &mut Vec<u8>, max_len: usize) -> Result<usize>;
239
240 /// Reads the remaining bytes as UTF-8 text with a maximum accepted length.
241 ///
242 /// This method has the same size limit and read semantics as
243 /// [`ReadExt::read_to_end_limited`], then validates the collected bytes as
244 /// UTF-8.
245 ///
246 /// # Parameters
247 /// - `max_len`: Maximum number of bytes accepted before UTF-8 decoding.
248 ///
249 /// # Returns
250 /// The decoded UTF-8 string.
251 ///
252 /// # Errors
253 /// Returns [`ErrorKind::InvalidData`] when the stream contains more than
254 /// `max_len` bytes or when the collected bytes are not valid UTF-8. Returns
255 /// the first non-[`ErrorKind::Interrupted`] error reported by the
256 /// underlying reader; interrupted reads are retried.
257 fn read_to_string_limited(&mut self, max_len: usize) -> Result<String>;
258
259 /// Reads the remaining bytes as UTF-8 text and appends to `output`.
260 ///
261 /// This method accepts at most `max_len` bytes from the current reader
262 /// position, validates them as UTF-8, and appends the decoded text to
263 /// `output`. If the input is oversized or invalid UTF-8, `output` is left
264 /// unchanged. Oversized input may still consume up to `max_len + 1` bytes
265 /// from the reader while detecting the limit violation.
266 ///
267 /// # Parameters
268 /// - `output`: Destination string to append to.
269 /// - `max_len`: Maximum number of bytes accepted before UTF-8 decoding.
270 ///
271 /// # Returns
272 /// The number of bytes appended to `output`.
273 ///
274 /// # Errors
275 /// Returns [`ErrorKind::InvalidData`] when the stream contains more than
276 /// `max_len` bytes or when the collected bytes are not valid UTF-8. Returns
277 /// the first non-[`ErrorKind::Interrupted`] error reported by the
278 /// underlying reader; interrupted reads are retried.
279 fn read_to_string_limited_into(&mut self, output: &mut String, max_len: usize)
280 -> Result<usize>;
281}
282
283impl<T> ReadExt for T
284where
285 T: Read + ?Sized,
286{
287 #[inline]
288 fn read_exact_or_eof(&mut self, buffer: &mut [u8]) -> Result<usize> {
289 let mut reader = self;
290 read_exact_or_eof_impl(&mut reader, buffer)
291 }
292
293 #[inline]
294 fn read_exact_array<const N: usize>(&mut self) -> Result<[u8; N]> {
295 let mut reader = self;
296 read_exact_array_impl::<N>(&mut reader)
297 }
298
299 #[inline]
300 fn read_exact_vec_limited(&mut self, len: usize, max_len: usize) -> Result<Vec<u8>> {
301 let mut reader = self;
302 read_exact_vec_limited_impl(&mut reader, len, max_len)
303 }
304
305 #[inline]
306 fn read_exact_vec_limited_into(
307 &mut self,
308 output: &mut Vec<u8>,
309 len: usize,
310 max_len: usize,
311 ) -> Result<()> {
312 let mut reader = self;
313 read_exact_vec_limited_into_impl(&mut reader, output, len, max_len)
314 }
315
316 #[inline]
317 fn discard_exact_or_eof(&mut self, bytes: u64) -> Result<u64> {
318 let mut reader = self;
319 discard_exact_or_eof_impl(&mut reader, bytes)
320 }
321
322 #[inline]
323 fn copy_to(&mut self, writer: &mut dyn Write) -> Result<u64> {
324 let mut reader = self;
325 copy_to_impl(&mut reader, writer)
326 }
327
328 #[inline]
329 fn copy_to_at_most(&mut self, writer: &mut dyn Write, max_bytes: u64) -> Result<u64> {
330 let mut reader = self;
331 Streams::copy_at_most(&mut reader, writer, max_bytes)
332 }
333
334 #[inline]
335 fn copy_to_end_limited(&mut self, writer: &mut dyn Write, max_bytes: u64) -> Result<u64> {
336 let mut reader = self;
337 Streams::copy_to_end_limited(&mut reader, writer, max_bytes)
338 }
339
340 #[inline]
341 fn read_to_end_limited(&mut self, max_len: usize) -> Result<Vec<u8>> {
342 let mut reader = self;
343 read_to_end_limited_impl(&mut reader, max_len)
344 }
345
346 #[inline]
347 fn read_to_end_limited_into(&mut self, output: &mut Vec<u8>, max_len: usize) -> Result<usize> {
348 let mut reader = self;
349 read_to_end_limited_into_impl(&mut reader, output, max_len)
350 }
351
352 #[inline]
353 fn read_to_string_limited(&mut self, max_len: usize) -> Result<String> {
354 let mut reader = self;
355 read_to_string_limited_impl(&mut reader, max_len)
356 }
357
358 #[inline]
359 fn read_to_string_limited_into(
360 &mut self,
361 output: &mut String,
362 max_len: usize,
363 ) -> Result<usize> {
364 let mut reader = self;
365 read_to_string_limited_into_impl(&mut reader, output, max_len)
366 }
367}
368
369/// Reads from `reader` until `buffer` is full or EOF is reached.
370///
371/// # Parameters
372/// - `reader`: Source reader.
373/// - `buffer`: Destination buffer to fill.
374///
375/// # Returns
376/// The number of bytes written into `buffer`.
377///
378/// # Errors
379/// Returns the first non-interrupted read error reported by `reader`.
380fn read_exact_or_eof_impl(reader: &mut dyn Read, buffer: &mut [u8]) -> Result<usize> {
381 let mut total = 0;
382 while total < buffer.len() {
383 match reader.read(&mut buffer[total..]) {
384 Ok(0) => break,
385 Ok(count) => total += count,
386 Err(error) => {
387 if error.kind() == ErrorKind::Interrupted {
388 continue;
389 }
390 return Err(error);
391 }
392 }
393 }
394 Ok(total)
395}
396
397/// Reads exactly `N` bytes from `reader` into an array.
398///
399/// # Parameters
400/// - `reader`: Source reader.
401///
402/// # Returns
403/// A stack-allocated array containing exactly `N` bytes.
404///
405/// # Errors
406/// Returns the error reported by [`Read::read_exact`].
407fn read_exact_array_impl<const N: usize>(reader: &mut dyn Read) -> Result<[u8; N]> {
408 let mut buffer = [0; N];
409 reader.read_exact(&mut buffer)?;
410 Ok(buffer)
411}
412
413/// Reads exactly `len` bytes from `reader` when `len` is within `max_len`.
414///
415/// # Parameters
416/// - `reader`: Source reader.
417/// - `len`: Exact number of bytes to read.
418/// - `max_len`: Maximum accepted exact read length.
419///
420/// # Returns
421/// A vector containing exactly `len` bytes.
422///
423/// # Errors
424/// Returns [`ErrorKind::InvalidData`] when `len > max_len`. Returns the error
425/// reported by [`Read::read_exact`] for read failures.
426fn read_exact_vec_limited_impl(
427 reader: &mut dyn Read,
428 len: usize,
429 max_len: usize,
430) -> Result<Vec<u8>> {
431 validate_exact_read_len(len, max_len)?;
432 let mut output = Vec::with_capacity(len);
433 read_exact_vec_limited_into_impl(reader, &mut output, len, max_len)?;
434 Ok(output)
435}
436
437/// Reads exactly `len` bytes from `reader` and appends them to `output`.
438///
439/// # Parameters
440/// - `reader`: Source reader.
441/// - `output`: Destination vector to append to.
442/// - `len`: Exact number of bytes to read.
443/// - `max_len`: Maximum accepted exact read length.
444///
445/// # Errors
446/// Returns [`ErrorKind::InvalidData`] when `len > max_len` before reading and
447/// leaves `output` unchanged. Returns the error reported by
448/// [`Read::read_exact`] for read failures and truncates `output` back to its
449/// original length.
450fn read_exact_vec_limited_into_impl(
451 reader: &mut dyn Read,
452 output: &mut Vec<u8>,
453 len: usize,
454 max_len: usize,
455) -> Result<()> {
456 validate_exact_read_len(len, max_len)?;
457 let original_len = output.len();
458 output.resize(original_len + len, 0);
459 match reader.read_exact(&mut output[original_len..]) {
460 Ok(()) => Ok(()),
461 Err(error) => {
462 output.truncate(original_len);
463 Err(error)
464 }
465 }
466}
467
468/// Validates that an exact read length is within the configured maximum.
469///
470/// # Parameters
471/// - `len`: Exact number of bytes requested by the caller.
472/// - `max_len`: Maximum accepted exact read length.
473///
474/// # Errors
475/// Returns [`ErrorKind::InvalidData`] when `len > max_len`.
476fn validate_exact_read_len(len: usize, max_len: usize) -> Result<()> {
477 if len > max_len {
478 return Err(Error::new(
479 ErrorKind::InvalidData,
480 format!("requested length {len} exceeds maximum length {max_len}"),
481 ));
482 }
483 Ok(())
484}
485
486/// Discards up to `bytes` bytes from `reader`.
487///
488/// # Parameters
489/// - `reader`: Source reader.
490/// - `bytes`: Maximum number of bytes to discard.
491///
492/// # Returns
493/// The number of bytes actually discarded.
494///
495/// # Errors
496/// Returns the first non-interrupted read error reported by `reader`.
497fn discard_exact_or_eof_impl(reader: &mut dyn Read, bytes: u64) -> Result<u64> {
498 let mut buffer = [0; DISCARD_BUFFER_SIZE];
499 let mut remaining = bytes;
500 let mut discarded = 0;
501 while remaining > 0 {
502 let requested = remaining.min(DISCARD_BUFFER_SIZE as u64) as usize;
503 match reader.read(&mut buffer[..requested]) {
504 Ok(0) => break,
505 Ok(count) => {
506 let count = count as u64;
507 remaining -= count;
508 discarded += count;
509 }
510 Err(error) => {
511 if error.kind() == ErrorKind::Interrupted {
512 continue;
513 }
514 return Err(error);
515 }
516 }
517 }
518 Ok(discarded)
519}
520
521/// Reads all remaining bytes from `reader` when the result fits `max_len`.
522///
523/// # Parameters
524/// - `reader`: Source reader.
525/// - `max_len`: Maximum accepted result length.
526///
527/// # Returns
528/// A vector containing all remaining bytes.
529///
530/// # Errors
531/// Returns [`ErrorKind::InvalidData`] after detecting that the input contains
532/// more than `max_len` bytes. Returns the first non-interrupted read error
533/// reported by `reader`.
534fn read_to_end_limited_impl(reader: &mut dyn Read, max_len: usize) -> Result<Vec<u8>> {
535 let mut output = Vec::with_capacity(max_len.min(READ_TO_END_BUFFER_SIZE));
536 read_to_end_limited_into_impl(reader, &mut output, max_len)?;
537 Ok(output)
538}
539
540/// Reads all remaining bytes from `reader` into `output` when the input fits.
541///
542/// # Parameters
543/// - `reader`: Source reader.
544/// - `output`: Destination vector to append to.
545/// - `max_len`: Maximum accepted input length in bytes.
546///
547/// # Returns
548/// The number of bytes appended to `output`.
549///
550/// # Errors
551/// Returns [`ErrorKind::InvalidData`] after detecting that the input contains
552/// more than `max_len` bytes. Returns the first non-interrupted read error
553/// reported by `reader`.
554fn read_to_end_limited_into_impl(
555 reader: &mut dyn Read,
556 output: &mut Vec<u8>,
557 max_len: usize,
558) -> Result<usize> {
559 let mut buffer = [0; READ_TO_END_BUFFER_SIZE];
560 let mut appended = 0;
561 loop {
562 let remaining = max_len.saturating_sub(appended);
563 let requested = remaining.saturating_add(1).min(READ_TO_END_BUFFER_SIZE);
564 match reader.read(&mut buffer[..requested]) {
565 Ok(0) => return Ok(appended),
566 Ok(count) if count <= remaining => {
567 output.extend_from_slice(&buffer[..count]);
568 appended += count;
569 }
570 Ok(_) => {
571 if remaining > 0 {
572 output.extend_from_slice(&buffer[..remaining]);
573 }
574 return Err(Error::new(
575 ErrorKind::InvalidData,
576 format!("input exceeds maximum length of {max_len} bytes"),
577 ));
578 }
579 Err(error) => {
580 if error.kind() == ErrorKind::Interrupted {
581 continue;
582 }
583 return Err(error);
584 }
585 }
586 }
587}
588
589/// Reads all remaining bytes from `reader` as UTF-8 when the input fits `max_len`.
590///
591/// # Parameters
592/// - `reader`: Source reader.
593/// - `max_len`: Maximum accepted input length in bytes.
594///
595/// # Returns
596/// Decoded UTF-8 string.
597///
598/// # Errors
599/// Returns [`ErrorKind::InvalidData`] when the input is oversized or is not
600/// valid UTF-8. Returns the first non-interrupted read error reported by
601/// `reader`.
602fn read_to_string_limited_impl(reader: &mut dyn Read, max_len: usize) -> Result<String> {
603 let bytes = read_to_end_limited_impl(reader, max_len)?;
604 String::from_utf8(bytes).map_err(invalid_utf8_error)
605}
606
607/// Reads all remaining UTF-8 text from `reader` into `output`.
608///
609/// # Parameters
610/// - `reader`: Source reader.
611/// - `output`: Destination string to append to.
612/// - `max_len`: Maximum accepted input length in bytes.
613///
614/// # Returns
615/// The number of bytes appended to `output`.
616///
617/// # Errors
618/// Returns [`ErrorKind::InvalidData`] when the input is oversized or is not
619/// valid UTF-8. Returns the first non-interrupted read error reported by
620/// `reader`.
621fn read_to_string_limited_into_impl(
622 reader: &mut dyn Read,
623 output: &mut String,
624 max_len: usize,
625) -> Result<usize> {
626 let bytes = read_to_end_limited_impl(reader, max_len)?;
627 let text = String::from_utf8(bytes).map_err(invalid_utf8_error)?;
628 let count = text.len();
629 output.push_str(&text);
630 Ok(count)
631}
632
633/// Copies all remaining bytes from `reader` into `writer`.
634///
635/// # Parameters
636/// - `reader`: Source reader.
637/// - `writer`: Destination writer.
638///
639/// # Returns
640/// The number of bytes copied.
641///
642/// # Errors
643/// Returns the first read or write error reported by the underlying streams.
644fn copy_to_impl(reader: &mut dyn Read, writer: &mut dyn Write) -> Result<u64> {
645 copy_all(reader, writer)
646}
647
648/// Converts an invalid UTF-8 read result into an I/O error.
649///
650/// # Parameters
651/// - `error`: UTF-8 conversion error.
652///
653/// # Returns
654/// An [`ErrorKind::InvalidData`] error containing the UTF-8 error context.
655fn invalid_utf8_error(error: FromUtf8Error) -> Error {
656 Error::new(
657 ErrorKind::InvalidData,
658 format!("limited input is not valid UTF-8: {error}"),
659 )
660}