Skip to main content

ax_io/read/
mod.rs

1#[cfg(feature = "alloc")]
2use alloc::{string::String, vec::Vec};
3use core::io::BorrowedCursor;
4
5use crate::{Chain, Error, Result, Take};
6
7mod impls;
8
9/// Default [`Read::read_exact`] implementation.
10pub fn default_read_exact<R: Read + ?Sized>(this: &mut R, mut buf: &mut [u8]) -> Result<()> {
11    while !buf.is_empty() {
12        match this.read(buf) {
13            Ok(0) => break,
14            Ok(n) => {
15                buf = &mut buf[n..];
16            }
17            Err(e) if e.canonicalize() == Error::Interrupted => continue,
18            Err(e) => return Err(e),
19        }
20    }
21    if !buf.is_empty() {
22        Err(Error::UnexpectedEof)
23    } else {
24        Ok(())
25    }
26}
27
28/// Default [`Read::read_buf`] implementation.
29pub fn default_read_buf<F>(read: F, mut cursor: BorrowedCursor<'_>) -> Result<()>
30where
31    F: FnOnce(&mut [u8]) -> Result<usize>,
32{
33    let n = read(cursor.ensure_init())?;
34    cursor.advance_checked(n);
35    Ok(())
36}
37
38/// Default [`Read::read_buf_exact`] implementation.
39pub fn default_read_buf_exact<R: Read + ?Sized>(
40    this: &mut R,
41    mut cursor: BorrowedCursor<'_>,
42) -> Result<()> {
43    while cursor.capacity() > 0 {
44        let prev_written = cursor.written();
45        match this.read_buf(cursor.reborrow()) {
46            Ok(()) => {}
47            Err(e) if e.canonicalize() == Error::Interrupted => continue,
48            Err(e) => return Err(e),
49        }
50
51        if cursor.written() == prev_written {
52            return Err(Error::UnexpectedEof);
53        }
54    }
55
56    Ok(())
57}
58
59/// Default [`Read::read_to_end`] implementation with optional size hint.
60#[cfg(feature = "alloc")]
61pub fn default_read_to_end<R: Read + ?Sized>(
62    r: &mut R,
63    buf: &mut Vec<u8>,
64    size_hint: Option<usize>,
65) -> Result<usize> {
66    use core::io::BorrowedBuf;
67
68    use crate::DEFAULT_BUF_SIZE;
69
70    let start_len = buf.len();
71    let start_cap = buf.capacity();
72    // Optionally limit the maximum bytes read on each iteration.
73    // This adds an arbitrary fiddle factor to allow for more data than we expect.
74    let mut max_read_size = size_hint
75        .and_then(|s| {
76            s.checked_add(1024)?
77                .checked_next_multiple_of(DEFAULT_BUF_SIZE)
78        })
79        .unwrap_or(DEFAULT_BUF_SIZE);
80
81    const PROBE_SIZE: usize = 32;
82
83    fn small_probe_read<R: Read + ?Sized>(r: &mut R, buf: &mut Vec<u8>) -> Result<usize> {
84        let mut probe = [0u8; PROBE_SIZE];
85
86        loop {
87            match r.read(&mut probe) {
88                Ok(n) => {
89                    // there is no way to recover from allocation failure here
90                    // because the data has already been read.
91                    buf.extend_from_slice(&probe[..n]);
92                    return Ok(n);
93                }
94                Err(e) if e.canonicalize() == Error::Interrupted => continue,
95                Err(e) => return Err(e),
96            }
97        }
98    }
99
100    if (size_hint.is_none() || size_hint == Some(0)) && buf.capacity() - buf.len() < PROBE_SIZE {
101        let read = small_probe_read(r, buf)?;
102
103        if read == 0 {
104            return Ok(0);
105        }
106    }
107
108    loop {
109        if buf.len() == buf.capacity() && buf.capacity() == start_cap {
110            // The buffer might be an exact fit. Let's read into a probe buffer
111            // and see if it returns `Ok(0)`. If so, we've avoided an
112            // unnecessary doubling of the capacity. But if not, append the
113            // probe buffer to the primary buffer and let its capacity grow.
114            let read = small_probe_read(r, buf)?;
115
116            if read == 0 {
117                return Ok(buf.len() - start_len);
118            }
119        }
120
121        if buf.len() == buf.capacity() {
122            // buf is full, need more space
123            buf.try_reserve(PROBE_SIZE).map_err(|_| Error::NoMemory)?;
124        }
125
126        let mut spare = buf.spare_capacity_mut();
127        let buf_len = spare.len().min(max_read_size);
128        spare = &mut spare[..buf_len];
129        let mut read_buf: BorrowedBuf<'_> = spare.into();
130
131        // Note that we don't track already initialized bytes here, but this is fine
132        // because we explicitly limit the read size
133        let mut cursor = read_buf.unfilled();
134        let result = loop {
135            match r.read_buf(cursor.reborrow()) {
136                Err(e) if e.canonicalize() == Error::Interrupted => continue,
137                // Do not stop now in case of error: we might have received both data
138                // and an error
139                res => break res,
140            }
141        };
142
143        let bytes_read = cursor.written();
144        let is_init = read_buf.is_init();
145
146        // SAFETY: BorrowedBuf's invariants mean this much memory is initialized.
147        unsafe {
148            let new_len = bytes_read + buf.len();
149            buf.set_len(new_len);
150        }
151
152        // Now that all data is pushed to the vector, we can fail without data loss
153        result?;
154
155        if bytes_read == 0 {
156            return Ok(buf.len() - start_len);
157        }
158
159        // Use heuristics to determine the max read size if no initial size hint was provided
160        if size_hint.is_none() {
161            // The reader is returning short reads but it doesn't call ensure_init().
162            // In that case we no longer need to restrict read sizes to avoid
163            // initialization costs.
164            // When reading from disk we usually don't get any short reads except at EOF.
165            // So we wait for at least 2 short reads before uncapping the read buffer;
166            // this helps with the Windows issue.
167            if !is_init {
168                max_read_size = usize::MAX;
169            }
170            // we have passed a larger buffer than previously and the
171            // reader still hasn't returned a short read
172            else if buf_len >= max_read_size && bytes_read == buf_len {
173                max_read_size = max_read_size.saturating_mul(2);
174            }
175        }
176    }
177}
178
179#[cfg(feature = "alloc")]
180pub(crate) unsafe fn append_to_string<F>(buf: &mut String, f: F) -> Result<usize>
181where
182    F: FnOnce(&mut Vec<u8>) -> Result<usize>,
183{
184    struct Guard<'a> {
185        buf: &'a mut Vec<u8>,
186        len: usize,
187    }
188
189    impl Drop for Guard<'_> {
190        fn drop(&mut self) {
191            unsafe {
192                self.buf.set_len(self.len);
193            }
194        }
195    }
196
197    let mut g = Guard {
198        len: buf.len(),
199        buf: unsafe { buf.as_mut_vec() },
200    };
201    let ret = f(g.buf);
202
203    // SAFETY: the caller promises to only append data to `buf`
204    let appended = unsafe { g.buf.get_unchecked(g.len..) };
205    if str::from_utf8(appended).is_err() {
206        ret.and(Err(Error::IllegalBytes))
207    } else {
208        g.len = g.buf.len();
209        ret
210    }
211}
212
213/// Default [`Read::read_to_string`] implementation with optional size hint.
214#[cfg(feature = "alloc")]
215pub fn default_read_to_string<R: Read + ?Sized>(
216    r: &mut R,
217    buf: &mut String,
218    size_hint: Option<usize>,
219) -> Result<usize> {
220    // Note that we do *not* call `r.read_to_end()` here. We are passing
221    // `&mut Vec<u8>` (the raw contents of `buf`) into the `read_to_end`
222    // method to fill it up. An arbitrary implementation could overwrite the
223    // entire contents of the vector, not just append to it (which is what
224    // we are expecting).
225    //
226    // To prevent extraneously checking the UTF-8-ness of the entire buffer
227    // we pass it to our hardcoded `default_read_to_end` implementation which
228    // we know is guaranteed to only read data into the end of the buffer.
229    unsafe { append_to_string(buf, |b| default_read_to_end(r, b, size_hint)) }
230}
231
232/// The `Read` trait allows for reading bytes from a source.
233///
234/// See [`std::io::Read`] for more details.
235pub trait Read {
236    /// Pull some bytes from this source into the specified buffer, returning
237    /// how many bytes were read.
238    fn read(&mut self, buf: &mut [u8]) -> Result<usize>;
239
240    /// Read the exact number of bytes required to fill `buf`.
241    fn read_exact(&mut self, buf: &mut [u8]) -> Result<()> {
242        default_read_exact(self, buf)
243    }
244
245    /// Pull some bytes from this source into the specified buffer.
246    ///
247    /// This method makes it possible to return both data and an error but it is advised against.
248    fn read_buf(&mut self, buf: BorrowedCursor<'_>) -> Result<()> {
249        default_read_buf(|b| self.read(b), buf)
250    }
251
252    /// Reads the exact number of bytes required to fill `cursor`.
253    ///
254    /// If this function returns an error, all bytes read will be appended to `cursor`.
255    fn read_buf_exact(&mut self, cursor: BorrowedCursor<'_>) -> Result<()> {
256        default_read_buf_exact(self, cursor)
257    }
258
259    /// Read all bytes until EOF in this source, placing them into `buf`.
260    #[cfg(feature = "alloc")]
261    fn read_to_end(&mut self, buf: &mut Vec<u8>) -> Result<usize> {
262        default_read_to_end(self, buf, None)
263    }
264
265    /// Read all bytes until EOF in this source, appending them to `buf`.
266    #[cfg(feature = "alloc")]
267    fn read_to_string(&mut self, buf: &mut String) -> Result<usize> {
268        default_read_to_string(self, buf, None)
269    }
270
271    /// Creates a "by reference" adapter for this instance of `Read`.
272    ///
273    /// The returned `adapter` also implements Read and will simply borrow this
274    /// current reader.
275    fn by_ref(&mut self) -> &mut Self
276    where
277        Self: Sized,
278    {
279        self
280    }
281
282    /// Creates an adapter which will chain this stream with another.
283    ///
284    /// The returned `Read` instance will first read all bytes from this object
285    /// until EOF is encountered. Afterwards the output is equivalent to the
286    /// output of `next`.
287    fn chain<R: Read>(self, next: R) -> Chain<Self, R>
288    where
289        Self: Sized,
290    {
291        Chain::new(self, next)
292    }
293
294    /// Creates an adapter which will read at most `limit` bytes from it.
295    ///
296    /// This function returns a new instance of `Read` which will read at most
297    /// `limit` bytes, after which it will always return EOF ([`Ok(0)`]). Any
298    /// read errors will not count towards the number of bytes read and future
299    /// calls to [`read()`] may succeed.
300    ///
301    /// [`Ok(0)`]: Ok
302    /// [`read()`]: Read::read
303    fn take(self, limit: u64) -> Take<Self>
304    where
305        Self: Sized,
306    {
307        Take::new(self, limit)
308    }
309}
310
311/// Reads all bytes from a [reader][Read] into a new [`String`].
312///
313/// This is a convenience function for [`Read::read_to_string`].
314///
315/// See [`std::io::read_to_string`] for more details.
316#[cfg(feature = "alloc")]
317pub fn read_to_string<R: Read>(mut reader: R) -> Result<String> {
318    let mut buf = String::new();
319    reader.read_to_string(&mut buf)?;
320    Ok(buf)
321}
322
323/// A `BufRead` is a type of `Read`er which has an internal buffer, allowing it
324/// to perform extra ways of reading.
325///
326/// See [`std::io::BufRead`] for more details.
327pub trait BufRead: Read {
328    /// Returns the contents of the internal buffer, filling it with more data, via `Read` methods,
329    /// if empty.
330    fn fill_buf(&mut self) -> Result<&[u8]>;
331
332    /// Marks the given `amount` of additional bytes from the internal buffer as having been read.
333    /// Subsequent calls to `read` only return bytes that have not been marked as read.
334    fn consume(&mut self, amount: usize);
335
336    /// Checks if there is any data left to be `read`.
337    fn has_data_left(&mut self) -> Result<bool> {
338        self.fill_buf().map(|b| !b.is_empty())
339    }
340
341    /// Skips all bytes until the delimiter `byte` or EOF is reached.
342    fn skip_until(&mut self, byte: u8) -> Result<usize> {
343        let mut read = 0;
344        loop {
345            let (done, used) = {
346                let available = self.fill_buf()?;
347                match memchr::memchr(byte, available) {
348                    Some(i) => (true, i + 1),
349                    None => (false, available.len()),
350                }
351            };
352            self.consume(used);
353            read += used;
354            if done || used == 0 {
355                return Ok(read);
356            }
357        }
358    }
359
360    /// Read all bytes into `buf` until the delimiter `byte` or EOF is reached.
361    #[cfg(feature = "alloc")]
362    fn read_until(&mut self, byte: u8, buf: &mut Vec<u8>) -> Result<usize> {
363        let mut read = 0;
364        loop {
365            let (done, used) = {
366                let available = self.fill_buf()?;
367                match memchr::memchr(byte, available) {
368                    Some(i) => {
369                        buf.extend_from_slice(&available[..=i]);
370                        (true, i + 1)
371                    }
372                    None => {
373                        buf.extend_from_slice(available);
374                        (false, available.len())
375                    }
376                }
377            };
378            self.consume(used);
379            read += used;
380            if done || used == 0 {
381                return Ok(read);
382            }
383        }
384    }
385
386    /// Read all bytes until a newline (the `0xA` byte) is reached, and append
387    /// them to the provided `String` buffer.
388    #[cfg(feature = "alloc")]
389    fn read_line(&mut self, buf: &mut String) -> Result<usize> {
390        unsafe { super::append_to_string(buf, |b| self.read_until(b'\n', b)) }
391    }
392
393    /// Returns an iterator over the contents of this reader split on the byte
394    /// `byte`.
395    #[cfg(feature = "alloc")]
396    fn split(self, byte: u8) -> Split<Self>
397    where
398        Self: Sized,
399    {
400        Split {
401            buf: self,
402            delim: byte,
403        }
404    }
405
406    /// Returns an iterator over the lines of this reader.
407    #[cfg(feature = "alloc")]
408    fn lines(self) -> Lines<Self>
409    where
410        Self: Sized,
411    {
412        Lines { buf: self }
413    }
414}
415
416/// An iterator over the contents of an instance of `BufRead` split on a
417/// particular byte.
418///
419/// This struct is generally created by calling [`split`] on a `BufRead`.
420/// Please see the documentation of [`split`] for more details.
421///
422/// [`split`]: BufRead::split
423#[cfg(feature = "alloc")]
424#[derive(Debug)]
425pub struct Split<B> {
426    buf: B,
427    delim: u8,
428}
429
430#[cfg(feature = "alloc")]
431impl<B: BufRead> Iterator for Split<B> {
432    type Item = Result<Vec<u8>>;
433
434    fn next(&mut self) -> Option<Result<Vec<u8>>> {
435        let mut buf = Vec::new();
436        match self.buf.read_until(self.delim, &mut buf) {
437            Ok(0) => None,
438            Ok(_n) => {
439                if buf[buf.len() - 1] == self.delim {
440                    buf.pop();
441                }
442                Some(Ok(buf))
443            }
444            Err(e) => Some(Err(e)),
445        }
446    }
447}
448
449/// An iterator over the lines of an instance of `BufRead`.
450///
451/// This struct is generally created by calling [`lines`] on a `BufRead`.
452/// Please see the documentation of [`lines`] for more details.
453///
454/// [`lines`]: BufRead::lines
455#[cfg(feature = "alloc")]
456#[derive(Debug)]
457pub struct Lines<B> {
458    buf: B,
459}
460
461#[cfg(feature = "alloc")]
462impl<B: BufRead> Iterator for Lines<B> {
463    type Item = Result<String>;
464
465    fn next(&mut self) -> Option<Result<String>> {
466        let mut buf = String::new();
467        match self.buf.read_line(&mut buf) {
468            Ok(0) => None,
469            Ok(_n) => {
470                if buf.ends_with('\n') {
471                    buf.pop();
472                    if buf.ends_with('\r') {
473                        buf.pop();
474                    }
475                }
476                Some(Ok(buf))
477            }
478            Err(e) => Some(Err(e)),
479        }
480    }
481}