1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
//! Streaming parser extensions for `BufRead`.
//!
//! This is a one-off experiment to see if we can extend the `std::io::BufRead`
//! traits with better parsing capabilities. The name is a riff on the
//! [`nom`](https://docs.rs/nom) parser, which you should probably check out.
//!
//! # Why?
//!
//! The purpose of this crate is to make authoring streaming parsers easier. And
//! the way we do this is by providing more operations that decouple
//! "looking at bytes" from "consuming bytes". So hence we introduce `fill_`
//! counterparts for `read_until` and `read_exact`. And two new methods:
//! `read_while` and `fill_while` that read bytes into a buffer based on a
//! predicate.
//!
//! Together this should make it easier to parse bytes from streams.
//!
//! # Methods
//!
//! Methods prefixed with `fill_` don't [`consume`] bytes. This means that in order to move the
//! `BufRead` cursor forward, the `consume` method needs to be called. This means the same bytes
//! can be read multiple times.
//!
//! Methods prefixed with `read_` *do* `consume` bytes. This means when this method is called, the
//! cursor moves forward, which means the same bytes *cannot* be read multiple times.
//!
//! - [`BufReadExt::fill_exact`] reads bytes until a buffer has been filled, doesn't consume bytes.
//! - [`BufReadExt::fill_until`] reads bytes until a byte has been encountered, doesn't consume bytes.
//! - [`BufReadExt::fill_while`] reads bytes based on a predicate, doesn't consume bytes.
//! - [`BufReadExt::read_while`] reads bytes based on a predicate, consumes bytes.
//!
//! [`consume`]: https://doc.rust-lang.org/std/io/trait.BufRead.html#tymethod.consume
//! [`BufReadExt::fill_exact`]: trait.BufReadExt.html#method.fill_exact
//! [`BufReadExt::fill_until`]: trait.BufReadExt.html#method.fill_until
//! [`BufReadExt::fill_while`]: trait.BufReadExt.html#method.fill_while
//! [`BufReadExt::read_while`]: trait.BufReadExt.html#method.read_while
//!
//! # Todos
//!
//! - `AsyncRead` support
//! - [byte-ordered reads/writes](https://github.com/async-rs/async-std/issues/578)
//!
//! # Examples
//!
//! ```
//! use std::io::{self, BufRead};
//! use omnom::prelude::*;
//!
//! let mut cursor = io::Cursor::new(b"lorem-ipsum");
//! let mut buf = vec![];
//!
//! // cursor is at 'l'
//! let num_bytes = cursor.fill_until(b'-', &mut buf)
//!     .expect("reading from cursor won't fail");
//! assert_eq!(buf, b"lorem-");
//! assert_eq!(num_bytes, 6);
//! cursor.consume(num_bytes);
//! buf.clear();
//!
//! // cursor is at 'i'
//! let num_bytes = cursor.fill_until(b'-', &mut buf)
//!     .expect("reading from cursor won't fail");
//! assert_eq!(buf, b"ipsum");
//! assert_eq!(num_bytes, 5);
//! cursor.consume(num_bytes);
//! buf.clear();
//!
//! // cursor is at EOF
//! let num_bytes = cursor.fill_until(b'-', &mut buf)
//!     .expect("reading from cursor won't fail");
//! assert_eq!(num_bytes, 0);
//! assert_eq!(buf, b"");
//! ```

#![forbid(unsafe_code, future_incompatible, rust_2018_idioms)]
#![deny(missing_debug_implementations, nonstandard_style)]
#![warn(missing_docs, missing_doc_code_examples, unreachable_pub)]

use std::io::{self, BufRead, ErrorKind, Read};
use std::slice;

/// The `omnom` prelude.
pub mod prelude {
    pub use crate::BufReadExt;
}

/// Extend `BufRead` with methods for streaming parsing.
pub trait BufReadExt: BufRead {
    /// Read bytes based on a predicate.
    ///
    /// `read_while` takes a predicate as an argument.
    /// It will call this on each byte, and copy it to the slice if the
    /// predicate evaluates to `true`. Returns the amount of bytes read.
    ///
    /// # Errors
    ///
    /// If this function encounters an error of the kind
    /// `ErrorKind::Interrupted` then the error is ignored and the operation
    /// will continue.
    ///
    /// If any other read error is encountered then this function immediately
    /// returns. Any bytes which have already been read will be appended to
    /// `buf`.
    ///
    /// # Examples
    ///
    /// [`std::io::Cursor`][`Cursor`] is a type that implements `BufRead`. In
    /// this example, we use [`Cursor`] to read bytes in a byte slice until
    /// we encounter a hyphen:
    ///
    /// ```
    /// use std::io::{self, BufRead};
    /// use omnom::prelude::*;
    ///
    /// let mut cursor = io::Cursor::new(b"lorem-ipsum");
    /// let mut buf = vec![];
    ///
    /// let num_bytes = cursor.read_while(&mut buf, |b| b != b'-')
    ///     .expect("reading from cursor won't fail");
    /// assert_eq!(buf, b"lorem");
    /// ```
    fn read_while<P>(&mut self, buf: &mut Vec<u8>, mut predicate: P) -> io::Result<usize>
    where
        P: FnMut(u8) -> bool,
    {
        let mut read = 0;

        loop {
            let mut byte = 0;

            match self.read(slice::from_mut(&mut byte)) {
                Ok(0) => break,
                Ok(_) => {
                    if predicate(byte) {
                        buf.extend_from_slice(&[byte]);
                        read += 1;
                    } else {
                        read += 1;
                        break;
                    }
                }
                Err(ref e) if e.kind() == ErrorKind::Interrupted => continue,
                Err(e) => {
                    self.consume(read);
                    return Err(e);
                }
            };
        }

        self.consume(read);
        Ok(read)
    }

    /// Try reading based on a predicate.
    ///
    /// `read_while` takes a predicate as an argument.
    /// It will call this on each byte, and copy it to the slice if the
    /// predicate evaluates to `true`. Returns the amount of bytes read.
    ///
    /// Unlike `read_while` after consuming bytes through this method you'll
    /// have to manually call [`BufRead::consume`](https://doc.rust-lang.org/std/io/trait.BufRead.html#tymethod.consume).
    ///
    /// # Errors
    ///
    /// If this function encounters an error of the kind
    /// `ErrorKind::Interrupted` then the error is ignored and the operation
    /// will continue.
    ///
    /// If any other read error is encountered then this function immediately
    /// returns. Any bytes which have already been read will be appended to
    /// `buf`.
    ///
    /// # Examples
    ///
    /// [`std::io::Cursor`][`Cursor`] is a type that implements `BufRead`. In
    /// this example, we use [`Cursor`] to read bytes in a byte slice until
    /// we encounter a hyphen:
    ///
    /// [`Cursor`]: https://doc.rust-lang.org/std/io/struct.Cursor.html
    ///
    /// ```
    /// use std::io::{self, BufRead};
    /// use omnom::prelude::*;
    ///
    /// let mut cursor = io::Cursor::new(b"lorem-ipsum");
    /// let mut buf = vec![];
    ///
    /// let num_bytes = cursor.fill_while(&mut buf, |b| b != b'-')
    ///     .expect("reading from cursor won't fail");
    /// assert_eq!(buf, b"lorem");
    /// cursor.consume(num_bytes);
    /// ```
    fn fill_while<P>(&mut self, buf: &mut Vec<u8>, mut predicate: P) -> io::Result<usize>
    where
        Self: Read,
        P: FnMut(u8) -> bool,
    {
        let mut read = 0;

        loop {
            let mut byte = 0;

            match self.read(slice::from_mut(&mut byte)) {
                Ok(0) => break,
                Ok(_) => {
                    if predicate(byte) {
                        buf.extend_from_slice(&[byte]);
                        read += 1;
                    } else {
                        read += 1;
                        break;
                    }
                }
                Err(ref e) if e.kind() == ErrorKind::Interrupted => continue,
                Err(e) => return Err(e),
            };
        }
        Ok(read)
    }

    /// Read all bytes into `buf` until the delimiter `byte` or EOF is reached.
    ///
    /// This function will read bytes from the underlying stream until the
    /// delimiter or EOF is found. Once found, all bytes up to, and including,
    /// the delimiter (if found) will be appended to `buf`.
    ///
    /// Unlike `read_until` after consuming bytes through this method you'll
    /// have to manually call [`BufRead::consume`].
    ///
    /// If successful, this function will return the total number of bytes read.
    ///
    /// # Errors
    ///
    /// This function will ignore all instances of [`ErrorKind::Interrupted`] and
    /// will otherwise return any errors returned by [`BufRead::fill_buf`].
    ///
    /// If an I/O error is encountered then all bytes read so far will be
    /// present in `buf` and its length will have been adjusted appropriately.
    ///
    /// [`BufRead::consume`]: https://doc.rust-lang.org/std/io/trait.BufRead.html#tymethod.consume
    /// [`BufRead::fill_buf`]: https://doc.rust-lang.org/std/io/trait.BufRead.html#tymethod.consume
    /// [`ErrorKind::Interrupted`]: https://doc.rust-lang.org/std/io/enum.ErrorKind.html#variant.Interrupted
    ///
    /// # Examples
    ///
    /// [`std::io::Cursor`][`Cursor`] is a type that implements `BufRead`. In
    /// this example, we use [`Cursor`] to read all the bytes in a byte slice
    /// in hyphen delimited segments:
    ///
    /// [`Cursor`]: https://doc.rust-lang.org/std/io/struct.Cursor.html
    ///
    /// ```
    /// use std::io::{self, BufRead};
    /// use omnom::prelude::*;
    ///
    /// let mut cursor = io::Cursor::new(b"lorem-ipsum");
    /// let mut buf = vec![];
    ///
    /// // cursor is at 'l'
    /// let num_bytes = cursor.fill_until(b'-', &mut buf)
    ///     .expect("reading from cursor won't fail");
    /// assert_eq!(buf, b"lorem-");
    /// assert_eq!(num_bytes, 6);
    /// cursor.consume(num_bytes);
    /// buf.clear();
    ///
    /// // cursor is at 'i'
    /// let num_bytes = cursor.fill_until(b'-', &mut buf)
    ///     .expect("reading from cursor won't fail");
    /// assert_eq!(buf, b"ipsum");
    /// assert_eq!(num_bytes, 5);
    /// cursor.consume(num_bytes);
    /// buf.clear();
    ///
    /// // cursor is at EOF
    /// let num_bytes = cursor.fill_until(b'-', &mut buf)
    ///     .expect("reading from cursor won't fail");
    /// assert_eq!(num_bytes, 0);
    /// assert_eq!(buf, b"");
    /// ```
    fn fill_until(&mut self, byte: u8, buf: &mut Vec<u8>) -> io::Result<usize> {
        let mut read = 0;
        loop {
            let available = match self.fill_buf() {
                Ok(n) => n,
                Err(ref e) if e.kind() == ErrorKind::Interrupted => continue,
                Err(e) => return Err(e),
            };

            let available = &available[read..];

            let (done, used) = match memchr::memchr(byte, available) {
                Some(i) => {
                    buf.extend_from_slice(&available[..=i]);
                    (true, i + 1)
                }
                None => {
                    buf.extend_from_slice(available);
                    (false, available.len())
                }
            };

            read += used;
            if done || used == 0 {
                return Ok(read);
            }
        }
    }

    /// Read the exact number of bytes required to fill `buf`.
    ///
    /// This function reads as many bytes as necessary to completely fill the
    /// specified buffer `buf`.
    ///
    /// Unlike `read_exact`, after reading bytes through this method you'll
    /// have to manually call [`BufRead::consume`].
    ///
    /// No guarantees are provided about the contents of `buf` when this
    /// function is called, implementations cannot rely on any property of the
    /// contents of `buf` being true. It is recommended that implementations
    /// only write data to `buf` instead of reading its contents.
    ///
    /// # Errors
    ///
    /// If this function encounters an error of the kind
    /// [`ErrorKind::Interrupted`] then the error is ignored and the operation
    /// will continue.
    ///
    /// If this function encounters an "end of file" before completely filling
    /// the buffer, it returns an error of the kind [`ErrorKind::UnexpectedEof`].
    /// The contents of `buf` are unspecified in this case.
    ///
    /// If any other read error is encountered then this function immediately
    /// returns. The contents of `buf` are unspecified in this case.
    ///
    /// If this function returns an error, it is unspecified how many bytes it
    /// has read, but it will never read more than would be necessary to
    /// completely fill the buffer.
    ///
    /// # Examples
    ///
    /// [`File`]s implement `Read`:
    ///
    /// [`File`]: https://doc.rust-lang.org/std/fs/struct.File.html
    /// [`ErrorKind::Interrupted`]: https://doc.rust-lang.org/std/io/enum.ErrorKind.html#variant.Interrupted
    /// [`ErrorKind::UnexpectedEof`]: https://doc.rust-lang.org/std/io/enum.ErrorKind.html#variant.UnexpectedEof
    /// [`ErrorKind::UnexpectedEof`]: ../../std/io/enum.ErrorKind.html#variant.UnexpectedEof
    ///
    /// ```no_run
    /// use std::io::{self, BufReader};
    /// use std::io::prelude::*;
    /// use std::fs::File;
    /// use omnom::prelude::*;
    ///
    /// let mut cursor = io::Cursor::new(b"lorem-ipsum");
    /// let mut buf = vec![0; 5];
    ///
    /// // read exactly 5 bytes
    /// cursor.fill_exact(&mut buf).unwrap();
    /// assert_eq!(buf, b"lorem");
    /// buf.clear();
    ///
    /// // the same bytes can be read again
    /// cursor.fill_exact(&mut buf).unwrap();
    /// assert_eq!(buf, b"lorem");
    /// buf.clear();
    /// cursor.consume(5);
    ///
    /// // after consuming bytes we read new bytes
    /// cursor.fill_exact(&mut buf).unwrap();
    /// assert_eq!(buf, b"-ipsu");
    /// ```
    fn fill_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
        loop {
            let available = match self.fill_buf() {
                Ok(n) => n,
                Err(ref e) if e.kind() == ErrorKind::Interrupted => continue,
                Err(e) => return Err(e),
            };

            if available.len() >= buf.len() {
                buf.copy_from_slice(&available[..buf.len()]);
                break;
            }
        }

        Ok(())
    }
}

impl<T: BufRead> BufReadExt for T {}