nom_bufreader/bufreader.rs
1// originally extracted from Rust's std::io::BufReader
2//
3// this version allows refilling even if the buffer still has some data
4
5use std::cmp;
6use std::fmt;
7use std::io::{self, BufRead, Error, ErrorKind, IoSliceMut, Read, Result, Seek, SeekFrom};
8
9pub(crate) const DEFAULT_BUF_SIZE: usize = 8 * 1024;
10
11fn default_read_exact<R: Read + ?Sized>(this: &mut R, mut buf: &mut [u8]) -> Result<()> {
12 while !buf.is_empty() {
13 match this.read(buf) {
14 Ok(0) => break,
15 Ok(n) => {
16 let tmp = buf;
17 buf = &mut tmp[n..];
18 }
19 Err(ref e) if e.kind() == ErrorKind::Interrupted => {}
20 Err(e) => return Err(e),
21 }
22 }
23 if !buf.is_empty() {
24 Err(Error::new(
25 ErrorKind::UnexpectedEof,
26 "failed to fill whole buffer",
27 ))
28 } else {
29 Ok(())
30 }
31}
32
33/// The `BufReader<R>` struct adds buffering to any reader.
34///
35/// It can be excessively inefficient to work directly with a [`Read`] instance.
36/// For example, every call to [`read`][`TcpStream::read`] on [`TcpStream`]
37/// results in a system call. A `BufReader<R>` performs large, infrequent reads on
38/// the underlying [`Read`] and maintains an in-memory buffer of the results.
39///
40/// `BufReader<R>` can improve the speed of programs that make *small* and
41/// *repeated* read calls to the same file or network socket. It does not
42/// help when reading very large amounts at once, or reading just one or a few
43/// times. It also provides no advantage when reading from a source that is
44/// already in memory, like a [`Vec`]`<u8>`.
45///
46/// When the `BufReader<R>` is dropped, the contents of its buffer will be
47/// discarded. Creating multiple instances of a `BufReader<R>` on the same
48/// stream can cause data loss. Reading from the underlying reader after
49/// unwrapping the `BufReader<R>` with [`BufReader::into_inner`] can also cause
50/// data loss.
51///
52/// **Note: this is a fork from `std::io::BufReader` that reads more data in
53/// `fill_buf` even if there is already some data in the buffer**
54///
55// HACK(#78696): can't use `crate` for associated items
56/// [`TcpStream::read`]: super::super::super::net::TcpStream::read
57/// [`TcpStream`]: crate::net::TcpStream
58///
59/// # Examples
60///
61/// ```no_run
62/// use std::io::prelude::*;
63/// use std::io::BufReader;
64/// use std::fs::File;
65///
66/// fn main() -> std::io::Result<()> {
67/// let f = File::open("log.txt")?;
68/// let mut reader = BufReader::new(f);
69///
70/// let mut line = String::new();
71/// let len = reader.read_line(&mut line)?;
72/// println!("First line is {} bytes long", len);
73/// Ok(())
74/// }
75/// ```
76pub struct BufReader<R> {
77 inner: R,
78 buf: Vec<u8>,
79 pos: usize,
80 cap: usize,
81}
82
83impl<R: Read> BufReader<R> {
84 /// Creates a new `BufReader<R>` with a default buffer capacity. The default is currently 8 KB,
85 /// but may change in the future.
86 ///
87 /// # Examples
88 ///
89 /// ```no_run
90 /// use std::io::BufReader;
91 /// use std::fs::File;
92 ///
93 /// fn main() -> std::io::Result<()> {
94 /// let f = File::open("log.txt")?;
95 /// let reader = BufReader::new(f);
96 /// Ok(())
97 /// }
98 /// ```
99 pub fn new(inner: R) -> BufReader<R> {
100 BufReader::with_capacity(DEFAULT_BUF_SIZE, inner)
101 }
102
103 /// Creates a new `BufReader<R>` with the specified buffer capacity.
104 ///
105 /// # Examples
106 ///
107 /// Creating a buffer with ten bytes of capacity:
108 ///
109 /// ```no_run
110 /// use std::io::BufReader;
111 /// use std::fs::File;
112 ///
113 /// fn main() -> std::io::Result<()> {
114 /// let f = File::open("log.txt")?;
115 /// let reader = BufReader::with_capacity(10, f);
116 /// Ok(())
117 /// }
118 /// ```
119 pub fn with_capacity(capacity: usize, inner: R) -> BufReader<R> {
120 let buf = vec![0; capacity];
121 BufReader {
122 inner,
123 buf,
124 pos: 0,
125 cap: 0,
126 }
127 }
128}
129
130impl<R> BufReader<R> {
131 /// Gets a reference to the underlying reader.
132 ///
133 /// It is inadvisable to directly read from the underlying reader.
134 ///
135 /// # Examples
136 ///
137 /// ```no_run
138 /// use std::io::BufReader;
139 /// use std::fs::File;
140 ///
141 /// fn main() -> std::io::Result<()> {
142 /// let f1 = File::open("log.txt")?;
143 /// let reader = BufReader::new(f1);
144 ///
145 /// let f2 = reader.get_ref();
146 /// Ok(())
147 /// }
148 /// ```
149 pub fn get_ref(&self) -> &R {
150 &self.inner
151 }
152
153 /// Gets a mutable reference to the underlying reader.
154 ///
155 /// It is inadvisable to directly read from the underlying reader.
156 ///
157 /// # Examples
158 ///
159 /// ```no_run
160 /// use std::io::BufReader;
161 /// use std::fs::File;
162 ///
163 /// fn main() -> std::io::Result<()> {
164 /// let f1 = File::open("log.txt")?;
165 /// let mut reader = BufReader::new(f1);
166 ///
167 /// let f2 = reader.get_mut();
168 /// Ok(())
169 /// }
170 /// ```
171 pub fn get_mut(&mut self) -> &mut R {
172 &mut self.inner
173 }
174
175 /// Returns a reference to the internally buffered data.
176 ///
177 /// Unlike [`fill_buf`], this will not attempt to fill the buffer if it is empty.
178 ///
179 /// [`fill_buf`]: BufRead::fill_buf
180 ///
181 /// # Examples
182 ///
183 /// ```no_run
184 /// use std::io::{BufReader, BufRead};
185 /// use std::fs::File;
186 ///
187 /// fn main() -> std::io::Result<()> {
188 /// let f = File::open("log.txt")?;
189 /// let mut reader = BufReader::new(f);
190 /// assert!(reader.buffer().is_empty());
191 ///
192 /// if reader.fill_buf()?.len() > 0 {
193 /// assert!(!reader.buffer().is_empty());
194 /// }
195 /// Ok(())
196 /// }
197 /// ```
198 pub fn buffer(&self) -> &[u8] {
199 &self.buf[self.pos..self.cap]
200 }
201
202 /// Returns the number of bytes the internal buffer can hold at once.
203 ///
204 /// # Examples
205 ///
206 /// ```no_run
207 /// use std::io::{BufReader, BufRead};
208 /// use std::fs::File;
209 ///
210 /// fn main() -> std::io::Result<()> {
211 /// let f = File::open("log.txt")?;
212 /// let mut reader = BufReader::new(f);
213 ///
214 /// let capacity = reader.capacity();
215 /// let buffer = reader.fill_buf()?;
216 /// assert!(buffer.len() <= capacity);
217 /// Ok(())
218 /// }
219 /// ```
220 pub fn capacity(&self) -> usize {
221 self.buf.len()
222 }
223
224 /// Unwraps this `BufReader<R>`, returning the underlying reader.
225 ///
226 /// Note that any leftover data in the internal buffer is lost. Therefore,
227 /// a following read from the underlying reader may lead to data loss.
228 ///
229 /// # Examples
230 ///
231 /// ```no_run
232 /// use std::io::BufReader;
233 /// use std::fs::File;
234 ///
235 /// fn main() -> std::io::Result<()> {
236 /// let f1 = File::open("log.txt")?;
237 /// let reader = BufReader::new(f1);
238 ///
239 /// let f2 = reader.into_inner();
240 /// Ok(())
241 /// }
242 /// ```
243 pub fn into_inner(self) -> R {
244 self.inner
245 }
246
247 /// Invalidates all data in the internal buffer.
248 #[inline]
249 fn discard_buffer(&mut self) {
250 self.pos = 0;
251 self.cap = 0;
252 }
253
254 fn reset_buffer_position(&mut self) {
255 //println!("resetting buffer at pos: {} capacity: {}", self.pos, self.cap);
256 if self.cap - self.pos > 0 {
257 for i in 0..(self.cap - self.pos) {
258 //println!("buf[{}] = buf[{}]", i, self.pos + i);
259 self.buf[i] = self.buf[self.pos + i];
260 }
261 }
262 self.cap = self.cap - self.pos;
263 self.pos = 0;
264 }
265}
266
267impl<R: Seek> BufReader<R> {
268 /// Seeks relative to the current position. If the new position lies within the buffer,
269 /// the buffer will not be flushed, allowing for more efficient seeks.
270 /// This method does not return the location of the underlying reader, so the caller
271 /// must track this information themselves if it is required.
272 pub fn seek_relative(&mut self, offset: i64) -> io::Result<()> {
273 let pos = self.pos as u64;
274 if offset < 0 {
275 if let Some(new_pos) = pos.checked_sub((-offset) as u64) {
276 self.pos = new_pos as usize;
277 return Ok(());
278 }
279 } else {
280 if let Some(new_pos) = pos.checked_add(offset as u64) {
281 if new_pos <= self.cap as u64 {
282 self.pos = new_pos as usize;
283 return Ok(());
284 }
285 }
286 }
287 self.seek(SeekFrom::Current(offset)).map(drop)
288 }
289}
290
291impl<R: Read> Read for BufReader<R> {
292 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
293 // If we don't have any buffered data and we're doing a massive read
294 // (larger than our internal buffer), bypass our internal buffer
295 // entirely.
296 if self.pos == self.cap && buf.len() >= self.buf.len() {
297 self.discard_buffer();
298 return self.inner.read(buf);
299 }
300 let nread = {
301 let mut rem = self.fill_buf()?;
302 rem.read(buf)?
303 };
304 self.consume(nread);
305 Ok(nread)
306 }
307
308 // Small read_exacts from a BufReader are extremely common when used with a deserializer.
309 // The default implementation calls read in a loop, which results in surprisingly poor code
310 // generation for the common path where the buffer has enough bytes to fill the passed-in
311 // buffer.
312 fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
313 if self.buffer().len() >= buf.len() {
314 buf.copy_from_slice(&self.buffer()[..buf.len()]);
315 self.consume(buf.len());
316 return Ok(());
317 }
318
319 default_read_exact(self, buf)
320 }
321
322 fn read_vectored(&mut self, bufs: &mut [IoSliceMut<'_>]) -> io::Result<usize> {
323 let total_len = bufs.iter().map(|b| b.len()).sum::<usize>();
324 if self.pos == self.cap && total_len >= self.buf.len() {
325 self.discard_buffer();
326 return self.inner.read_vectored(bufs);
327 }
328 let nread = {
329 let mut rem = self.fill_buf()?;
330 rem.read_vectored(bufs)?
331 };
332 self.consume(nread);
333 Ok(nread)
334 }
335}
336
337impl<R: Read> BufRead for BufReader<R> {
338 fn fill_buf(&mut self) -> io::Result<&[u8]> {
339 if self.cap == self.buf.len() {
340 if self.pos == 0 {
341 return Err(io::Error::new(
342 io::ErrorKind::Interrupted,
343 "buffer completely filled",
344 ));
345 } else {
346 self.reset_buffer_position();
347 }
348 }
349
350 let read = self.inner.read(&mut self.buf[self.cap..])?;
351 self.cap += read;
352 Ok(&self.buf[self.pos..self.cap])
353 }
354
355 fn consume(&mut self, amt: usize) {
356 self.pos = cmp::min(self.pos + amt, self.cap);
357 }
358}
359
360impl<R> fmt::Debug for BufReader<R>
361where
362 R: fmt::Debug,
363{
364 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
365 fmt.debug_struct("BufReader")
366 .field("reader", &self.inner)
367 .field(
368 "buffer",
369 &format_args!("{}/{}", self.cap - self.pos, self.buf.len()),
370 )
371 .finish()
372 }
373}
374
375impl<R: Seek> Seek for BufReader<R> {
376 /// Seek to an offset, in bytes, in the underlying reader.
377 ///
378 /// The position used for seeking with [`SeekFrom::Current`]`(_)` is the
379 /// position the underlying reader would be at if the `BufReader<R>` had no
380 /// internal buffer.
381 ///
382 /// Seeking always discards the internal buffer, even if the seek position
383 /// would otherwise fall within it. This guarantees that calling
384 /// [`BufReader::into_inner()`] immediately after a seek yields the underlying reader
385 /// at the same position.
386 ///
387 /// To seek without discarding the internal buffer, use [`BufReader::seek_relative`].
388 ///
389 /// See [`std::io::Seek`] for more details.
390 ///
391 /// Note: In the edge case where you're seeking with [`SeekFrom::Current`]`(n)`
392 /// where `n` minus the internal buffer length overflows an `i64`, two
393 /// seeks will be performed instead of one. If the second seek returns
394 /// [`Err`], the underlying reader will be left at the same position it would
395 /// have if you called `seek` with [`SeekFrom::Current`]`(0)`.
396 ///
397 /// [`std::io::Seek`]: Seek
398 fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
399 let result: u64;
400 if let SeekFrom::Current(n) = pos {
401 let remainder = (self.cap - self.pos) as i64;
402 // it should be safe to assume that remainder fits within an i64 as the alternative
403 // means we managed to allocate 8 exbibytes and that's absurd.
404 // But it's not out of the realm of possibility for some weird underlying reader to
405 // support seeking by i64::MIN so we need to handle underflow when subtracting
406 // remainder.
407 if let Some(offset) = n.checked_sub(remainder) {
408 result = self.inner.seek(SeekFrom::Current(offset))?;
409 } else {
410 // seek backwards by our remainder, and then by the offset
411 self.inner.seek(SeekFrom::Current(-remainder))?;
412 self.discard_buffer();
413 result = self.inner.seek(SeekFrom::Current(n))?;
414 }
415 } else {
416 // Seeking with Start/End doesn't care about our buffer length.
417 result = self.inner.seek(pos)?;
418 }
419 self.discard_buffer();
420 Ok(result)
421 }
422
423 /// Returns the current seek position from the start of the stream.
424 ///
425 /// The value returned is equivalent to `self.seek(SeekFrom::Current(0))`
426 /// but does not flush the internal buffer. Due to this optimization the
427 /// function does not guarantee that calling `.into_inner()` immediately
428 /// afterwards will yield the underlying reader at the same position. Use
429 /// [`BufReader::seek`] instead if you require that guarantee.
430 ///
431 /// # Panics
432 ///
433 /// This function will panic if the position of the inner reader is smaller
434 /// than the amount of buffered data. That can happen if the inner reader
435 /// has an incorrect implementation of [`Seek::stream_position`], or if the
436 /// position has gone out of sync due to calling [`Seek::seek`] directly on
437 /// the underlying reader.
438 ///
439 /// # Example
440 ///
441 /// ```no_run
442 /// use std::{
443 /// io::{self, BufRead, BufReader, Seek},
444 /// fs::File,
445 /// };
446 ///
447 /// fn main() -> io::Result<()> {
448 /// let mut f = BufReader::new(File::open("foo.txt")?);
449 ///
450 /// let before = f.stream_position()?;
451 /// f.read_line(&mut String::new())?;
452 /// let after = f.stream_position()?;
453 ///
454 /// println!("The first line was {} bytes long", after - before);
455 /// Ok(())
456 /// }
457 /// ```
458 fn stream_position(&mut self) -> io::Result<u64> {
459 let remainder = (self.cap - self.pos) as u64;
460 self.inner.stream_position().map(|pos| {
461 pos.checked_sub(remainder).expect(
462 "overflow when subtracting remaining buffer size from inner stream position",
463 )
464 })
465 }
466}