1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
//! Alternative implementation of many functions found in [`std::io`][stdio],
//! but suitable for blocking IO over networks.
//!
//! The main reason for this crate is the handling of [`ErrorKind::Interrupted`][errorkind] in
//! `std::io`.
//! Except for [`Read::read()`][readread] and [`Write::write()`][writewrite], almost all functions
//! will ignore interrupts and just retry.
//!
//! This crate provides alternative implementations using a similar API but allow for interrupts
//! whithout losing any content.
//!
//! Most functions are based on [`BufRead`][bufread] instead of [`Read`][read] to ensure that no
//! content is lost on retry.
//!
//! [stdio]: https://doc.rust-lang.org/nightly/std/io/index.html
//! [errorkind]: https://doc.rust-lang.org/nightly/std/io/enum.ErrorKind.html
//! [read]: https://doc.rust-lang.org/nightly/std/io/trait.Read.html
//! [bufread]: https://doc.rust-lang.org/nightly/std/io/trait.BufRead.html
//! [readread]: https://doc.rust-lang.org/nightly/std/io/trait.Read.html#tymethod.read
//! [writewrite]: https://doc.rust-lang.org/nightly/std/io/trait.Write.html#tymethod.write

extern crate memchr;

use std::{io, mem};

/// Copies the entire content of a buffered reader into a writer.
///
/// Similar to [`std::io::copy`][copy], this function will continuously read data from reader and
/// then write it into writer in a streaming fashion until reader returns EOF.
///
/// Errors
/// ======
/// This function will return an error immediately if any call to [`fill_buf`][fillbuf] or
/// [`write`][writewrite] returns any kind of error.
/// Instances of [`ErrorKind::Interrupted`][errorkind] are *not* handled by this function.
///
/// All bytes consumed from the buffered reader will be written to the specified writer and vice
/// versa.
/// It is guaranteed that no data is lost in case of error.
///
/// Differences to `std::io::copy`
/// ==============================
/// - Does not retry on [`ErrorKind::Interrupted`][errorkind].
/// - Uses [`BufRead`][bufread] instead of [`Read`][read].
/// - Does not return the number of bytes that are copied.
///
/// Advantages
/// ----------
/// - Allows for reliable retry on errors.
/// - Function is interruptable, e.g. to allow graceful shutdown for server applications.
/// - Avoids double buffering if the source already implements [`BufRead`][bufread].
/// - Allows different buffer sizes by using [`BufReader::with_capacity`][withcap].
///
/// Disadvantages
/// -------------
/// The fact that it does not return the number of bytes copied stems from the fact that it cannot
/// return this information in case of error.
/// This would go against the goal of allowing reliable retry.
///
/// [copy]: https://doc.rust-lang.org/nightly/std/io/fn.copy.html
/// [errorkind]: https://doc.rust-lang.org/nightly/std/io/enum.ErrorKind.html
/// [fillbuf]: https://doc.rust-lang.org/nightly/std/io/trait.BufRead.html#tymethod.fill_buf
/// [writewrite]: https://doc.rust-lang.org/nightly/std/io/trait.Write.html#tymethod.write
/// [bufread]: https://doc.rust-lang.org/nightly/std/io/trait.BufRead.html
/// [read]: https://doc.rust-lang.org/nightly/std/io/trait.Read.html
/// [withcap]: https://doc.rust-lang.org/nightly/std/io/struct.BufReader.html#method.with_capacity
pub fn copy<R: ?Sized, W: ?Sized>(reader: &mut R, writer: &mut W) -> io::Result<()>
    where R: io::BufRead, W: io::Write
{
    loop {
        let written = {
            let buf = try!(reader.fill_buf());
            if buf.len() == 0 {
                return Ok(());
            }
            try!(writer.write(buf))
        };
        reader.consume(written);
    }
}

/// Copies the content of a buffered reader into a writer until a delimiter is reached.
///
/// This function will continuously read data from reader and then write it into writer in a
/// streaming fashion until until the delimiter or EOF is found.
///
/// Errors
/// ======
/// This function will return an error immediately if any call to [`fill_buf`][fillbuf] or
/// [`write`][writewrite] returns any kind of error.
/// Instances of [`ErrorKind::Interrupted`][errorkind] are *not* handled by this function.
///
/// All bytes consumed from the buffered reader will be written to the specified writer and vice
/// versa.
/// It is guaranteed that no data is lost in case of error.
///
/// [errorkind]: https://doc.rust-lang.org/nightly/std/io/enum.ErrorKind.html
/// [fillbuf]: https://doc.rust-lang.org/nightly/std/io/trait.BufRead.html#tymethod.fill_buf
/// [writewrite]: https://doc.rust-lang.org/nightly/std/io/trait.Write.html#tymethod.write
pub fn copy_until<R: ?Sized, W: ?Sized>(reader: &mut R, writer: &mut W, delim: u8) -> io::Result<bool>
    where R: io::BufRead, W: io::Write
{
    loop {
        let (done, used) = {
            let buf = try!(reader.fill_buf());
            match memchr::memchr(delim, buf) {
                Some(i) => (true, try!(writer.write(&buf[..i + 1]))),
                None =>    (false, try!(writer.write(buf))),
            }
        };
        reader.consume(used);
        if done || used == 0 {
            return Ok(used == 0);
        }
    }
}

/// Extension methods for `std::io::Read`
///
/// This trait is automatically implemented for all types that implement `std::io::Read`.
pub trait ReadExt : io::Read {
    /// Creates a buffered reader with default capacity
    ///
    /// Please see the documentation of [`BufReader`][bufreader] for more details
    ///
    /// [bufreader]: https://doc.rust-lang.org/nightly/std/io/struct.BufReader.html
    fn buffer(self) -> io::BufReader<Self> where Self: Sized {
        io::BufReader::new(self)
    }

    /// Transforms this reader into a reader that automatically retries on interrupts
    ///
    /// The returned adapter will behave identically to the original reader, except that it retries
    /// the reading operation automatically if an error of kind
    /// [`ErrorKind::Interrupted`][errorkind] occurs.
    ///
    /// Note
    /// ----
    /// Methods that are already expected to retry are forwarded directly to the underlying reader.
    ///
    /// [errorkind]: https://doc.rust-lang.org/nightly/std/io/enum.ErrorKind.html
    /// [bufread]: https://doc.rust-lang.org/nightly/std/io/trait.BufRead.html
    /// [read]: https://doc.rust-lang.org/nightly/std/io/trait.Read.html
    fn retry(self) -> RetryReader<Self> where Self: Sized {
        RetryReader { inner: self }
    }
}

impl<R: io::Read> ReadExt for R { }

/// Extension methods for `std::io::BufRead`
///
/// This trait is automatically implemented for all types that implement `std::io::BufRead`.
pub trait BufReadExt : io::BufRead {
    //=============================================================================================
    // Methods originally implemented in std::io::Read
    //=============================================================================================

    /// Read all bytes until EOF in this source, placing them into `buf`.
    ///
    /// Similar to [`std::io::Read::read_to_end`][readtoend], all bytes read from this source will
    /// be appended to the specified buffer `buf`.
    ///
    /// This function will continuously call [`fill_buf`][fillbuf] and [`consume`][consume] to
    /// append more data to `buf` until [`fill_buf`][fillbuf] returns either `Ok(&[])` or any kind
    /// of error.
    ///
    /// Errors
    /// ======
    /// This function will return an error immediately if any call to [`fill_buf`][fillbuf] returns
    /// any kind of error.
    /// Instances of [`ErrorKind::Interrupted`][errorkind] are *not* handled by this function.
    ///
    /// All bytes consumed from the buffered reader will be written to the specified buffer and
    /// vice versa.
    /// It is guaranteed that no data is lost in case of error.
    ///
    /// Differences to `std::io::Read::read_to_end`
    /// ===========================================
    /// - Does not retry on [`ErrorKind::Interrupted`][errorkind].
    /// - Uses [`BufRead`][bufread] instead of [`Read`][read].
    /// - Does not return the number of bytes that are copied.
    /// - Different reallocation behavior of the buffer.
    ///
    /// Advantages
    /// ----------
    /// - Function is interruptable, e.g. to allow graceful shutdown for server applications.
    /// - Avoids double buffering if the source already implements [`BufRead`][bufread].
    /// - Allows different buffer sizes by using [`BufReader::with_capacity`][withcap].
    ///
    /// Disadvantages
    /// -------------
    /// The fact that it does not return the number of bytes copied stems from the fact that it
    /// cannot return this information in case of error.
    /// This would go against the goal of allowing reliable retry.
    ///
    /// [readtoend]: https://doc.rust-lang.org/nightly/std/io/trait.Read.html#method.read_to_end
    /// [errorkind]: https://doc.rust-lang.org/nightly/std/io/enum.ErrorKind.html
    /// [fillbuf]: https://doc.rust-lang.org/nightly/std/io/trait.BufRead.html#tymethod.fill_buf
    /// [consume]: https://doc.rust-lang.org/nightly/std/io/trait.BufRead.html#tymethod.consume
    /// [bufread]: https://doc.rust-lang.org/nightly/std/io/trait.BufRead.html
    /// [read]: https://doc.rust-lang.org/nightly/std/io/trait.Read.html
    /// [withcap]: https://doc.rust-lang.org/nightly/std/io/struct.BufReader.html#method.with_capacity
    fn read_to_end_net(&mut self, buf: &mut Vec<u8>) -> io::Result<()> {
        copy(self, buf)
    }

    /// Skip all bytes until EOF in this source.
    ///
    /// Acts like [`read_to_end_net`][readtoendnet], but all bytes read from this source are
    /// discarded.
    ///
    /// This function will continuously call [`fill_buf`][fillbuf] and [`consume`][consume] until
    /// [`fill_buf`][fillbuf] returns either `Ok(&[])` or any kind of error.
    ///
    /// Errors
    /// ======
    /// This function will return an error immediately if any call to [`fill_buf`][fillbuf] returns
    /// any kind of error.
    /// Instances of [`ErrorKind::Interrupted`][errorkind] are *not* handled by this function.
    ///
    /// [readtoendnet]: #method.read_to_end_net
    /// [errorkind]: https://doc.rust-lang.org/nightly/std/io/enum.ErrorKind.html
    /// [fillbuf]: https://doc.rust-lang.org/nightly/std/io/trait.BufRead.html#tymethod.fill_buf
    /// [consume]: https://doc.rust-lang.org/nightly/std/io/trait.BufRead.html#tymethod.consume
    fn skip_to_end_net(&mut self) -> io::Result<()> {
        copy(self, &mut io::sink())
    }

    /// Transforms this [`BufRead`][bufread] instance to an [`Iterator`][iterator] over its bytes.
    ///
    /// This method is approximately equivalent to [`std::io::Read::bytes`][bytes].
    ///
    /// The returned type implements [`Iterator`][iterator] where the `Item` is `Result<u8, R::Err>`.
    /// The yielded item is `Ok` if a byte was successfully read and
    /// `Err` otherwise for I/O errors. EOF is mapped to returning `None` from
    /// this iterator.
    ///
    /// Errors
    /// ======
    /// If fill_buf returns any kind of error, the iterator yields `Some(Err)`. In case of error
    /// it is safe to iterate further to retry the reading operation.
    /// Instances of [`ErrorKind::Interrupted`][errorkind] are *not* handled by the iterator.
    ///
    /// Differences to `std::io::Read::bytes`
    /// =====================================
    /// - Uses [`BufRead`][bufread] instead of [`Read`][read].
    ///
    /// Advantages
    /// ----------
    /// - No accidentialy unbuffered reading of single bytes
    ///
    /// [iterator]: https://doc.rust-lang.org/nightly/std/iter/trait.Iterator.html
    /// [bufread]: https://doc.rust-lang.org/nightly/std/io/trait.BufRead.html
    /// [bytes]: https://doc.rust-lang.org/nightly/std/io/trait.Read.html#method.bytes
    /// [errorkind]: https://doc.rust-lang.org/nightly/std/io/enum.ErrorKind.html
    /// [read]: https://doc.rust-lang.org/nightly/std/io/trait.Read.html
    fn bytes_net(self) -> Bytes<Self> where Self: Sized {
        Bytes { inner: self }
    }

    //=============================================================================================
    // Methods originally implemented in std::io::BufRead
    //=============================================================================================

    /// Read all bytes into a buffer until a delimiter is reached.
    ///
    /// Similar to [`std::io::BufRead::read_until`][readuntil] ,this function will read bytes from
    /// the underlying stream and push them to the specified buffer `buf`, until the delimiter
    /// `delim` or EOF is found. If the delimiter is found, it is also part of the result.
    ///
    /// If this reader has reached EOF then this function will return `Ok(true)`.
    ///
    /// Errors
    /// ======
    /// This function will return an error immediately if any call to [`fill_buf`][fillbuf] returns
    /// any kind of error.
    /// Instances of [`ErrorKind::Interrupted`][errorkind] are *not* handled by this function.
    ///
    /// All bytes consumed from the buffered reader will be written to the specified buffer and
    /// vice versa.
    /// It is guaranteed that no data is lost in case of error.
    ///
    /// Differences to `std::io::BufRead::read_until`
    /// =============================================
    /// - Does not retry on [`ErrorKind::Interrupted`][errorkind].
    /// - Does not return the number of bytes that are read.
    ///
    /// Advantages
    /// ----------
    /// - Function is interruptable, e.g. to allow graceful shutdown for server applications.
    ///
    /// Disadvantages
    /// -------------
    /// The fact that it does not return the number of bytes copied stems from the fact that it
    /// cannot return this information in case of error.
    /// This would go against the goal of allowing reliable retry.
    ///
    /// [readuntil]: http://doc.rust-lang.org/nightly/std/io/trait.BufRead.html#method.read_until
    /// [errorkind]: https://doc.rust-lang.org/nightly/std/io/enum.ErrorKind.html
    /// [fillbuf]: https://doc.rust-lang.org/nightly/std/io/trait.BufRead.html#tymethod.fill_buf
    fn read_until_net(&mut self, delim: u8, buf: &mut Vec<u8>) -> io::Result<bool> {
        copy_until(self, buf, delim)
    }

    /// Skips all bytes until a delimiter is reached.
    ///
    /// This function will discard bytes from the underlying stream until the delimiter `delim` or
    /// EOF is found.
    ///
    /// Acts like [`read_until_net`][readuntilnet], but all bytes read from this source are
    /// discarded.
    ///
    /// If this reader has reached EOF then this function will return `Ok(true)`.
    ///
    /// Errors
    /// ======
    /// This function will return an error immediately if any call to [`fill_buf`][fillbuf] returns
    /// any kind of error.
    /// Instances of [`ErrorKind::Interrupted`][errorkind] are *not* handled by this function.
    ///
    /// [readuntilnet]: #method.read_until_net
    /// [errorkind]: https://doc.rust-lang.org/nightly/std/io/enum.ErrorKind.html
    /// [fillbuf]: https://doc.rust-lang.org/nightly/std/io/trait.BufRead.html#tymethod.fill_buf
    fn skip_until_net(&mut self, delim: u8) -> io::Result<bool> {
        copy_until(self, &mut io::sink(), delim)
    }

    /// Returns an iterator over the contents of this reader split on a delimiter.
    ///
    /// The iterator returned from this function will return instances of
    /// `io::Result<Vec<u8>>`. Each vector returned will *not* have the
    /// delimiter byte at the end.
    ///
    /// Errors
    /// ======
    /// The iterator will yield an error whenever [`read_until_net`][readuntilnet] would have also
    /// returned an error.
    ///
    /// [readuntilnet]: #method.read_until_net
    fn split_net(self, byte: u8) -> Split<Self> where Self: Sized {
        Split { reader: self, delim: byte, buf: Vec::new() }
    }
}

impl<R: io::BufRead> BufReadExt for R { }

/// An iterator over `u8` values of a reader.
///
/// This struct is generally created by calling [`bytes_net()`][bytesnet] on a buffered reader.
/// Please see the documentation of `bytes_net()` for more details.
///
/// [bytesnet]: trait.BufReadExt.html#method.bytes_net
pub struct Bytes<R> {
    inner: R,
}

impl<R: io::BufRead> Iterator for Bytes<R> {
    type Item = io::Result<u8>;

    fn next(&mut self) -> Option<io::Result<u8>> {
        let result = match self.inner.fill_buf() {
            Ok(buf) if buf.len() == 0 => None,
            Ok(buf) => Some(Ok(buf[0])),
            Err(e) => Some(Err(e)),
        };
        self.inner.consume(1);
        result
    }
}

/// An adapter that retries reading operations of the underlying reader
///
/// This struct is generally created by calling [`retry()`][retry] on a reader.
/// Please see the documentation of `retry()` for more details.
///
/// [retry]: trait.ReadExt.html#method.retry
pub struct RetryReader<I> {
    inner: I,
}

impl<I: io::Read> io::Read for RetryReader<I> {
    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
        loop {
            match self.inner.read(buf) {
                Err(ref e) if e.kind() == io::ErrorKind::Interrupted => { },
                other => return other,
            }
        }
    }

    fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
        self.inner.read_to_end(buf)
    }

    fn read_to_string(&mut self, buf: &mut String) -> io::Result<usize> {
        self.inner.read_to_string(buf)
    }

    fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
        self.inner.read_exact(buf)
    }
}

impl<R: io::BufRead> io::BufRead for RetryReader<R> {
    fn fill_buf(&mut self) -> io::Result<&[u8]> {
        loop {
            match self.inner.fill_buf() {
                Err(ref e) if e.kind() == io::ErrorKind::Interrupted => { },
                _ => break,
            }
        }
        self.inner.fill_buf()
    }

    fn consume(&mut self, amt: usize) {
        self.inner.consume(amt)
    }

    fn read_until(&mut self, byte: u8, buf: &mut Vec<u8>) -> io::Result<usize> {
        self.inner.read_until(byte, buf)
    }

    fn read_line(&mut self, buf: &mut String) -> io::Result<usize> {
        self.inner.read_line(buf)
    }
}

/// An iterator over the contents of an instance of `BufRead` split on a
/// particular byte.
///
/// This struct is generally created by calling [`split_net()`][splitnet] on a
/// `BufRead`. Please see the documentation of `split_net()` for more details.
///
/// [splitnet]: trait.BufReadExt.html#method.split
pub struct Split<B> {
    reader: B,
    delim: u8,
    buf: Vec<u8>,
}

impl<B: io::BufRead> Iterator for Split<B> {
    type Item = io::Result<Vec<u8>>;

    fn next(&mut self) -> Option<io::Result<Vec<u8>>> {
        match self.reader.read_until_net(self.delim, &mut self.buf) {
            Ok(true) => {
                if self.buf.is_empty() {
                    return None;
                }
            }
            Ok(false) => {
                self.buf.pop();
            },
            Err(e) => return Some(Err(e)),
        };
        let mut result = Vec::new();
        mem::swap(&mut self.buf, &mut result);
        Some(Ok(result))
    }
}

#[cfg(test)]
mod tests {
    use std::io;
    use super::BufReadExt;

    #[test]
    fn read_until_net() {
        let mut buf = io::Cursor::new(&b"12"[..]);
        let mut v = Vec::new();
        assert_eq!(buf.read_until_net(b'3', &mut v).unwrap(), true);
        assert_eq!(v, b"12");

        let mut buf = io::Cursor::new(&b"1233"[..]);
        let mut v = Vec::new();
        assert_eq!(buf.read_until_net(b'3', &mut v).unwrap(), false);
        assert_eq!(v, b"123");
        v.truncate(0);
        assert_eq!(buf.read_until_net(b'3', &mut v).unwrap(), false);
        assert_eq!(v, b"3");
        v.truncate(0);
        assert_eq!(buf.read_until_net(b'3', &mut v).unwrap(), true);
        assert_eq!(v, []);
    }

    #[test]
    fn split_net() {
        let buf = io::Cursor::new(&b"12"[..]);
        let mut s = buf.split_net(b'3');
        assert_eq!(s.next().unwrap().unwrap(), vec![b'1', b'2']);
        assert!(s.next().is_none());

        let buf = io::Cursor::new(&b"1233"[..]);
        let mut s = buf.split_net(b'3');
        assert_eq!(s.next().unwrap().unwrap(), vec![b'1', b'2']);
        assert_eq!(s.next().unwrap().unwrap(), vec![]);
        assert!(s.next().is_none());
    }

    #[test]
    fn read_to_end_net() {
        let mut c = io::Cursor::new(&b""[..]);
        let mut v = Vec::new();
        c.read_to_end_net(&mut v).unwrap();
        assert_eq!(v, []);

        let mut c = io::Cursor::new(&b"1"[..]);
        let mut v = Vec::new();
        c.read_to_end_net(&mut v).unwrap();
        assert_eq!(v, b"1");

        let cap = 1024 * 1024;
        let data = (0..cap).map(|i| (i / 3) as u8).collect::<Vec<_>>();
        let mut v = Vec::new();
        let (a, b) = data.split_at(data.len() / 2);
        io::Cursor::new(a).read_to_end_net(&mut v).unwrap();
        io::Cursor::new(b).read_to_end_net(&mut v).unwrap();
        assert_eq!(v, data);
    }
}