nom_bufreader/
bufreader.rs

1// originally extracted from Rust's std::io::BufReader
2//
3// this version allows refilling even if the buffer still has some data
4
5use std::cmp;
6use std::fmt;
7use std::io::{self, BufRead, Error, ErrorKind, IoSliceMut, Read, Result, Seek, SeekFrom};
8
9pub(crate) const DEFAULT_BUF_SIZE: usize = 8 * 1024;
10
11fn default_read_exact<R: Read + ?Sized>(this: &mut R, mut buf: &mut [u8]) -> Result<()> {
12    while !buf.is_empty() {
13        match this.read(buf) {
14            Ok(0) => break,
15            Ok(n) => {
16                let tmp = buf;
17                buf = &mut tmp[n..];
18            }
19            Err(ref e) if e.kind() == ErrorKind::Interrupted => {}
20            Err(e) => return Err(e),
21        }
22    }
23    if !buf.is_empty() {
24        Err(Error::new(
25            ErrorKind::UnexpectedEof,
26            "failed to fill whole buffer",
27        ))
28    } else {
29        Ok(())
30    }
31}
32
33/// The `BufReader<R>` struct adds buffering to any reader.
34///
35/// It can be excessively inefficient to work directly with a [`Read`] instance.
36/// For example, every call to [`read`][`TcpStream::read`] on [`TcpStream`]
37/// results in a system call. A `BufReader<R>` performs large, infrequent reads on
38/// the underlying [`Read`] and maintains an in-memory buffer of the results.
39///
40/// `BufReader<R>` can improve the speed of programs that make *small* and
41/// *repeated* read calls to the same file or network socket. It does not
42/// help when reading very large amounts at once, or reading just one or a few
43/// times. It also provides no advantage when reading from a source that is
44/// already in memory, like a [`Vec`]`<u8>`.
45///
46/// When the `BufReader<R>` is dropped, the contents of its buffer will be
47/// discarded. Creating multiple instances of a `BufReader<R>` on the same
48/// stream can cause data loss. Reading from the underlying reader after
49/// unwrapping the `BufReader<R>` with [`BufReader::into_inner`] can also cause
50/// data loss.
51///
52/// **Note: this is a fork from `std::io::BufReader` that reads more data in
53/// `fill_buf` even if there is already some data in the buffer**
54///
55// HACK(#78696): can't use `crate` for associated items
56/// [`TcpStream::read`]: super::super::super::net::TcpStream::read
57/// [`TcpStream`]: crate::net::TcpStream
58///
59/// # Examples
60///
61/// ```no_run
62/// use std::io::prelude::*;
63/// use std::io::BufReader;
64/// use std::fs::File;
65///
66/// fn main() -> std::io::Result<()> {
67///     let f = File::open("log.txt")?;
68///     let mut reader = BufReader::new(f);
69///
70///     let mut line = String::new();
71///     let len = reader.read_line(&mut line)?;
72///     println!("First line is {} bytes long", len);
73///     Ok(())
74/// }
75/// ```
76pub struct BufReader<R> {
77    inner: R,
78    buf: Vec<u8>,
79    pos: usize,
80    cap: usize,
81}
82
83impl<R: Read> BufReader<R> {
84    /// Creates a new `BufReader<R>` with a default buffer capacity. The default is currently 8 KB,
85    /// but may change in the future.
86    ///
87    /// # Examples
88    ///
89    /// ```no_run
90    /// use std::io::BufReader;
91    /// use std::fs::File;
92    ///
93    /// fn main() -> std::io::Result<()> {
94    ///     let f = File::open("log.txt")?;
95    ///     let reader = BufReader::new(f);
96    ///     Ok(())
97    /// }
98    /// ```
99    pub fn new(inner: R) -> BufReader<R> {
100        BufReader::with_capacity(DEFAULT_BUF_SIZE, inner)
101    }
102
103    /// Creates a new `BufReader<R>` with the specified buffer capacity.
104    ///
105    /// # Examples
106    ///
107    /// Creating a buffer with ten bytes of capacity:
108    ///
109    /// ```no_run
110    /// use std::io::BufReader;
111    /// use std::fs::File;
112    ///
113    /// fn main() -> std::io::Result<()> {
114    ///     let f = File::open("log.txt")?;
115    ///     let reader = BufReader::with_capacity(10, f);
116    ///     Ok(())
117    /// }
118    /// ```
119    pub fn with_capacity(capacity: usize, inner: R) -> BufReader<R> {
120        let buf = vec![0; capacity];
121        BufReader {
122            inner,
123            buf,
124            pos: 0,
125            cap: 0,
126        }
127    }
128}
129
130impl<R> BufReader<R> {
131    /// Gets a reference to the underlying reader.
132    ///
133    /// It is inadvisable to directly read from the underlying reader.
134    ///
135    /// # Examples
136    ///
137    /// ```no_run
138    /// use std::io::BufReader;
139    /// use std::fs::File;
140    ///
141    /// fn main() -> std::io::Result<()> {
142    ///     let f1 = File::open("log.txt")?;
143    ///     let reader = BufReader::new(f1);
144    ///
145    ///     let f2 = reader.get_ref();
146    ///     Ok(())
147    /// }
148    /// ```
149    pub fn get_ref(&self) -> &R {
150        &self.inner
151    }
152
153    /// Gets a mutable reference to the underlying reader.
154    ///
155    /// It is inadvisable to directly read from the underlying reader.
156    ///
157    /// # Examples
158    ///
159    /// ```no_run
160    /// use std::io::BufReader;
161    /// use std::fs::File;
162    ///
163    /// fn main() -> std::io::Result<()> {
164    ///     let f1 = File::open("log.txt")?;
165    ///     let mut reader = BufReader::new(f1);
166    ///
167    ///     let f2 = reader.get_mut();
168    ///     Ok(())
169    /// }
170    /// ```
171    pub fn get_mut(&mut self) -> &mut R {
172        &mut self.inner
173    }
174
175    /// Returns a reference to the internally buffered data.
176    ///
177    /// Unlike [`fill_buf`], this will not attempt to fill the buffer if it is empty.
178    ///
179    /// [`fill_buf`]: BufRead::fill_buf
180    ///
181    /// # Examples
182    ///
183    /// ```no_run
184    /// use std::io::{BufReader, BufRead};
185    /// use std::fs::File;
186    ///
187    /// fn main() -> std::io::Result<()> {
188    ///     let f = File::open("log.txt")?;
189    ///     let mut reader = BufReader::new(f);
190    ///     assert!(reader.buffer().is_empty());
191    ///
192    ///     if reader.fill_buf()?.len() > 0 {
193    ///         assert!(!reader.buffer().is_empty());
194    ///     }
195    ///     Ok(())
196    /// }
197    /// ```
198    pub fn buffer(&self) -> &[u8] {
199        &self.buf[self.pos..self.cap]
200    }
201
202    /// Returns the number of bytes the internal buffer can hold at once.
203    ///
204    /// # Examples
205    ///
206    /// ```no_run
207    /// use std::io::{BufReader, BufRead};
208    /// use std::fs::File;
209    ///
210    /// fn main() -> std::io::Result<()> {
211    ///     let f = File::open("log.txt")?;
212    ///     let mut reader = BufReader::new(f);
213    ///
214    ///     let capacity = reader.capacity();
215    ///     let buffer = reader.fill_buf()?;
216    ///     assert!(buffer.len() <= capacity);
217    ///     Ok(())
218    /// }
219    /// ```
220    pub fn capacity(&self) -> usize {
221        self.buf.len()
222    }
223
224    /// Unwraps this `BufReader<R>`, returning the underlying reader.
225    ///
226    /// Note that any leftover data in the internal buffer is lost. Therefore,
227    /// a following read from the underlying reader may lead to data loss.
228    ///
229    /// # Examples
230    ///
231    /// ```no_run
232    /// use std::io::BufReader;
233    /// use std::fs::File;
234    ///
235    /// fn main() -> std::io::Result<()> {
236    ///     let f1 = File::open("log.txt")?;
237    ///     let reader = BufReader::new(f1);
238    ///
239    ///     let f2 = reader.into_inner();
240    ///     Ok(())
241    /// }
242    /// ```
243    pub fn into_inner(self) -> R {
244        self.inner
245    }
246
247    /// Invalidates all data in the internal buffer.
248    #[inline]
249    fn discard_buffer(&mut self) {
250        self.pos = 0;
251        self.cap = 0;
252    }
253
254    fn reset_buffer_position(&mut self) {
255        //println!("resetting buffer at pos: {} capacity: {}", self.pos, self.cap);
256        if self.cap - self.pos > 0 {
257            for i in 0..(self.cap - self.pos) {
258                //println!("buf[{}] = buf[{}]", i, self.pos + i);
259                self.buf[i] = self.buf[self.pos + i];
260            }
261        }
262        self.cap = self.cap - self.pos;
263        self.pos = 0;
264    }
265}
266
267impl<R: Seek> BufReader<R> {
268    /// Seeks relative to the current position. If the new position lies within the buffer,
269    /// the buffer will not be flushed, allowing for more efficient seeks.
270    /// This method does not return the location of the underlying reader, so the caller
271    /// must track this information themselves if it is required.
272    pub fn seek_relative(&mut self, offset: i64) -> io::Result<()> {
273        let pos = self.pos as u64;
274        if offset < 0 {
275            if let Some(new_pos) = pos.checked_sub((-offset) as u64) {
276                self.pos = new_pos as usize;
277                return Ok(());
278            }
279        } else {
280            if let Some(new_pos) = pos.checked_add(offset as u64) {
281                if new_pos <= self.cap as u64 {
282                    self.pos = new_pos as usize;
283                    return Ok(());
284                }
285            }
286        }
287        self.seek(SeekFrom::Current(offset)).map(drop)
288    }
289}
290
291impl<R: Read> Read for BufReader<R> {
292    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
293        // If we don't have any buffered data and we're doing a massive read
294        // (larger than our internal buffer), bypass our internal buffer
295        // entirely.
296        if self.pos == self.cap && buf.len() >= self.buf.len() {
297            self.discard_buffer();
298            return self.inner.read(buf);
299        }
300        let nread = {
301            let mut rem = self.fill_buf()?;
302            rem.read(buf)?
303        };
304        self.consume(nread);
305        Ok(nread)
306    }
307
308    // Small read_exacts from a BufReader are extremely common when used with a deserializer.
309    // The default implementation calls read in a loop, which results in surprisingly poor code
310    // generation for the common path where the buffer has enough bytes to fill the passed-in
311    // buffer.
312    fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
313        if self.buffer().len() >= buf.len() {
314            buf.copy_from_slice(&self.buffer()[..buf.len()]);
315            self.consume(buf.len());
316            return Ok(());
317        }
318
319        default_read_exact(self, buf)
320    }
321
322    fn read_vectored(&mut self, bufs: &mut [IoSliceMut<'_>]) -> io::Result<usize> {
323        let total_len = bufs.iter().map(|b| b.len()).sum::<usize>();
324        if self.pos == self.cap && total_len >= self.buf.len() {
325            self.discard_buffer();
326            return self.inner.read_vectored(bufs);
327        }
328        let nread = {
329            let mut rem = self.fill_buf()?;
330            rem.read_vectored(bufs)?
331        };
332        self.consume(nread);
333        Ok(nread)
334    }
335}
336
337impl<R: Read> BufRead for BufReader<R> {
338    fn fill_buf(&mut self) -> io::Result<&[u8]> {
339        if self.cap == self.buf.len() {
340            if self.pos == 0 {
341                return Err(io::Error::new(
342                    io::ErrorKind::Interrupted,
343                    "buffer completely filled",
344                ));
345            } else {
346                self.reset_buffer_position();
347            }
348        }
349
350        let read = self.inner.read(&mut self.buf[self.cap..])?;
351        self.cap += read;
352        Ok(&self.buf[self.pos..self.cap])
353    }
354
355    fn consume(&mut self, amt: usize) {
356        self.pos = cmp::min(self.pos + amt, self.cap);
357    }
358}
359
360impl<R> fmt::Debug for BufReader<R>
361where
362    R: fmt::Debug,
363{
364    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
365        fmt.debug_struct("BufReader")
366            .field("reader", &self.inner)
367            .field(
368                "buffer",
369                &format_args!("{}/{}", self.cap - self.pos, self.buf.len()),
370            )
371            .finish()
372    }
373}
374
375impl<R: Seek> Seek for BufReader<R> {
376    /// Seek to an offset, in bytes, in the underlying reader.
377    ///
378    /// The position used for seeking with [`SeekFrom::Current`]`(_)` is the
379    /// position the underlying reader would be at if the `BufReader<R>` had no
380    /// internal buffer.
381    ///
382    /// Seeking always discards the internal buffer, even if the seek position
383    /// would otherwise fall within it. This guarantees that calling
384    /// [`BufReader::into_inner()`] immediately after a seek yields the underlying reader
385    /// at the same position.
386    ///
387    /// To seek without discarding the internal buffer, use [`BufReader::seek_relative`].
388    ///
389    /// See [`std::io::Seek`] for more details.
390    ///
391    /// Note: In the edge case where you're seeking with [`SeekFrom::Current`]`(n)`
392    /// where `n` minus the internal buffer length overflows an `i64`, two
393    /// seeks will be performed instead of one. If the second seek returns
394    /// [`Err`], the underlying reader will be left at the same position it would
395    /// have if you called `seek` with [`SeekFrom::Current`]`(0)`.
396    ///
397    /// [`std::io::Seek`]: Seek
398    fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
399        let result: u64;
400        if let SeekFrom::Current(n) = pos {
401            let remainder = (self.cap - self.pos) as i64;
402            // it should be safe to assume that remainder fits within an i64 as the alternative
403            // means we managed to allocate 8 exbibytes and that's absurd.
404            // But it's not out of the realm of possibility for some weird underlying reader to
405            // support seeking by i64::MIN so we need to handle underflow when subtracting
406            // remainder.
407            if let Some(offset) = n.checked_sub(remainder) {
408                result = self.inner.seek(SeekFrom::Current(offset))?;
409            } else {
410                // seek backwards by our remainder, and then by the offset
411                self.inner.seek(SeekFrom::Current(-remainder))?;
412                self.discard_buffer();
413                result = self.inner.seek(SeekFrom::Current(n))?;
414            }
415        } else {
416            // Seeking with Start/End doesn't care about our buffer length.
417            result = self.inner.seek(pos)?;
418        }
419        self.discard_buffer();
420        Ok(result)
421    }
422
423    /// Returns the current seek position from the start of the stream.
424    ///
425    /// The value returned is equivalent to `self.seek(SeekFrom::Current(0))`
426    /// but does not flush the internal buffer. Due to this optimization the
427    /// function does not guarantee that calling `.into_inner()` immediately
428    /// afterwards will yield the underlying reader at the same position. Use
429    /// [`BufReader::seek`] instead if you require that guarantee.
430    ///
431    /// # Panics
432    ///
433    /// This function will panic if the position of the inner reader is smaller
434    /// than the amount of buffered data. That can happen if the inner reader
435    /// has an incorrect implementation of [`Seek::stream_position`], or if the
436    /// position has gone out of sync due to calling [`Seek::seek`] directly on
437    /// the underlying reader.
438    ///
439    /// # Example
440    ///
441    /// ```no_run
442    /// use std::{
443    ///     io::{self, BufRead, BufReader, Seek},
444    ///     fs::File,
445    /// };
446    ///
447    /// fn main() -> io::Result<()> {
448    ///     let mut f = BufReader::new(File::open("foo.txt")?);
449    ///
450    ///     let before = f.stream_position()?;
451    ///     f.read_line(&mut String::new())?;
452    ///     let after = f.stream_position()?;
453    ///
454    ///     println!("The first line was {} bytes long", after - before);
455    ///     Ok(())
456    /// }
457    /// ```
458    fn stream_position(&mut self) -> io::Result<u64> {
459        let remainder = (self.cap - self.pos) as u64;
460        self.inner.stream_position().map(|pos| {
461            pos.checked_sub(remainder).expect(
462                "overflow when subtracting remaining buffer size from inner stream position",
463            )
464        })
465    }
466}