buf_read_ext/
lib.rs

1// Copyright © 2016 by Michael Dilger (of New Zealand) and other buf-read-ext Developers
2//
3// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
4// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5// http://opensource.org/licenses/MIT>, at your option. This file may not be
6// copied, modified, or distributed except according to those terms.
7
8use std::io::{BufRead, ErrorKind, Result, Write};
9
10/// Extends any type that implements BufRead with a stream_until_token() function.
11pub trait BufReadExt: BufRead {
12    /// Streams all bytes to `out` until the `token` delimiter or EOF is reached.
13    ///
14    /// This function will continue to read (and stream) bytes from the underlying stream until the
15    /// token or end-of-file is found. Once found, all bytes up to (but not including) the
16    /// token (if found) will have been streamed to `out` and the input stream will advance past
17    /// the token.
18    ///
19    /// This function will return the number of bytes that were streamed to `out` (this will
20    /// exclude the count of token bytes, if the token was found), and whether or not the token
21    /// was found.
22    ///
23    /// # Errors
24    ///
25    /// This function will ignore all instances of `ErrorKind::Interrupted` and will otherwise
26    /// return any errors returned by `fill_buf`.
27    fn stream_until_token<W: Write>(&mut self, token: &[u8], out: &mut W) -> Result<(usize, bool)>
28    {
29        stream_until_token(self, token, out)
30    }
31}
32
33// Implement BufReadExt for everything that implements BufRead.
34impl<T: BufRead> BufReadExt for T { }
35
36fn stream_until_token<R: BufRead + ?Sized, W: Write>(stream: &mut R, token: &[u8], out: &mut W)
37                                                     -> Result<(usize, bool)>
38{
39    let mut read = 0;
40    // Represents the sizes of possible token prefixes found at the end of the last buffer, usually
41    // empty. If not empty, the beginning of this buffer is checked for the matching suffixes to
42    // to find tokens that straddle two buffers. Entries should be in longest prefix to shortest
43    // prefix order.
44    let mut prefix_lengths: Vec<usize> = Vec::new();
45    let mut found: bool;
46    let mut used: usize;
47
48    'stream:
49    loop {
50        found = false;
51        used = 0;
52
53        // This is not actually meant to repeat, we only need the break functionality of a loop.
54        // The reader is encouraged to try their hand at coding this better, noting that buffer must
55        // drop out of scope before stream can be used again.
56        let mut do_once = true;
57        'buffer:
58        while do_once {
59            do_once = false;
60
61            // Fill the buffer (without consuming)
62            let buffer = match stream.fill_buf() {
63                Ok(n) => n,
64                Err(ref err) if err.kind() == ErrorKind::Interrupted => continue,
65                Err(err) => return Err(err)
66            };
67            if buffer.len() == 0 {
68                break 'stream;
69            }
70
71            // If the buffer starts with a token suffix matching a token prefix from the end of the
72            // previous buffer, then we have found a token.
73            if !prefix_lengths.is_empty() {
74                let drain: Vec<usize> = prefix_lengths.drain(..).collect();
75
76                for index in 0..drain.len() {
77                    let prefix_len = drain[index];
78
79                    let mut prefix_failed: bool = true;
80
81                    // If the buffer is too small to fit an entire suffix
82                    if buffer.len() < token.len() - prefix_len {
83                        if buffer[..] == token[prefix_len..prefix_len + buffer.len()] {
84                            // that prefix just got bigger and needs to be preserved
85                            prefix_lengths.push(prefix_len + buffer.len());
86                            prefix_failed = false;
87                        }
88                    } else {
89                        // If we find a complete suffix at the front of the buffer for this
90                        // prefix...
91                        if buffer[..token.len() - prefix_len] == token[prefix_len..] {
92                            found = true;
93                            used = token.len() - prefix_len;
94                            break 'buffer;
95                        }
96                    }
97
98                    if prefix_failed {
99                        // This prefix length doesn't work.  We should write the bytes...
100                        if index == drain.len() - 1 {
101                            // ...of this prefix length
102                            out.write_all(&token[..prefix_len])?;
103                        } else {
104                            // ...from this prefix length to the next
105                            let next_prefix_len = drain[index+1];
106                            out.write_all(&token[..prefix_len - next_prefix_len])?;
107                        }
108                    }
109                }
110            }
111
112            // Get the index index of the first token in the middle of the buffer, if any
113            let index = buffer
114                .windows(token.len())
115                .enumerate()
116                .filter(|&(_, t)| t == token)
117                .map(|(i, _)| i)
118                .next();
119
120            if let Some(index) = index {
121                out.write_all(&buffer[..index])?;
122                found = true;
123                used = index + token.len();
124                break 'buffer;
125            }
126
127            // Check for token prefixes at the end of the buffer.
128            let mut window = token.len() - 1;
129            if buffer.len() < window {
130                window = buffer.len();
131            }
132            // Remember the largest prefix for writing later if it didn't match
133            // (we don't write it now just in case it turns out to be the token)
134            let mut reserve = if !prefix_lengths.is_empty() {
135                buffer.len()
136            } else {
137                0
138            };
139            for prefix in (1..window+1).rev()
140                .filter(|&w| token[..w] == buffer[buffer.len() - w..])
141            {
142                if reserve == 0 {
143                    reserve = prefix;
144                }
145                prefix_lengths.push(prefix)
146            }
147
148            out.write_all(&buffer[..buffer.len()-reserve])?;
149            used = buffer.len();
150        }
151
152        stream.consume(used);
153        read += used;
154
155        if found || used == 0 {
156            break;
157        }
158    }
159
160    return Ok((if found { read - token.len() } else { read }, found));
161}
162
163#[cfg(test)]
164mod tests {
165    use super::*;
166
167    use std::io::{BufReader, Cursor};
168
169    #[test]
170    fn stream_until_token() {
171        let mut buf = Cursor::new(&b"123456"[..]);
172        let mut result: Vec<u8> = Vec::new();
173        assert_eq!(buf.stream_until_token(b"78", &mut result).unwrap(), (6, false));
174        assert_eq!(result, b"123456");
175
176        let mut buf = Cursor::new(&b"12345678"[..]);
177        let mut result: Vec<u8> = Vec::new();
178        assert_eq!(buf.stream_until_token(b"34", &mut result).unwrap(), (2, true));
179        assert_eq!(result, b"12");
180
181        result.truncate(0);
182        assert_eq!(buf.stream_until_token(b"78", &mut result).unwrap(), (2, true));
183        assert_eq!(result, b"56");
184
185        let mut buf = Cursor::new(&b"bananas for nana"[..]);
186        let mut result: Vec<u8> = Vec::new();
187        assert_eq!(buf.stream_until_token(b"nan", &mut result).unwrap(), (2, true));
188        assert_eq!(result, b"ba");
189
190        result.truncate(0);
191        assert_eq!(buf.stream_until_token(b"nan", &mut result).unwrap(), (7, true));
192        assert_eq!(result, b"as for ");
193
194        result.truncate(0);
195        assert_eq!(buf.stream_until_token(b"nan", &mut result).unwrap(), (1, false));
196        assert_eq!(result, b"a");
197
198        result.truncate(0);
199        assert_eq!(buf.stream_until_token(b"nan", &mut result).unwrap(), (0, false));
200        assert_eq!(result, b"");
201    }
202
203    #[test]
204    fn stream_until_token_straddle_test() {
205        let cursor = Cursor::new(&b"12345TOKEN345678"[..]);
206        let mut buf = BufReader::with_capacity(8, cursor);
207        let mut result: Vec<u8> = Vec::new();
208        assert_eq!(buf.stream_until_token(b"TOKEN", &mut result).unwrap(), (5, true));
209        assert_eq!(result, b"12345");
210
211        result.truncate(0);
212        assert_eq!(buf.stream_until_token(b"TOKEN", &mut result).unwrap(), (6, false));
213        assert_eq!(result, b"345678");
214
215        result.truncate(0);
216        assert_eq!(buf.stream_until_token(b"TOKEN", &mut result).unwrap(), (0, false));
217        assert_eq!(result, b"");
218
219        //                          <------><------><------>
220        let cursor = Cursor::new(&b"12345TOKE23456781TOKEN78"[..]);
221        let mut buf = BufReader::with_capacity(8, cursor);
222        let mut result: Vec<u8> = Vec::new();
223        assert_eq!(buf.stream_until_token(b"TOKEN", &mut result).unwrap(), (17, true));
224        assert_eq!(result, b"12345TOKE23456781");
225    }
226
227    // This tests against mikedilger/formdata github issue #1
228    #[test]
229    fn stream_until_token_large_token_test() {
230        let cursor = Cursor::new(&b"IAMALARGETOKEN7812345678"[..]);
231        let mut buf = BufReader::with_capacity(8, cursor);
232        let mut v: Vec<u8> = Vec::new();
233        assert_eq!(buf.stream_until_token(b"IAMALARGETOKEN", &mut v).unwrap(), (0, true));
234        assert_eq!(v, b"");
235        assert_eq!(buf.stream_until_token(b"IAMALARGETOKEN", &mut v).unwrap(), (10, false));
236        assert_eq!(v, b"7812345678");
237
238        let cursor = Cursor::new(&b"0IAMALARGERTOKEN12345678"[..]);
239        let mut buf = BufReader::with_capacity(8, cursor);
240        let mut v: Vec<u8> = Vec::new();
241        assert_eq!(buf.stream_until_token(b"IAMALARGERTOKEN", &mut v).unwrap(), (1, true));
242        assert_eq!(v, b"0");
243        v.truncate(0);
244        assert_eq!(buf.stream_until_token(b"IAMALARGERTOKEN", &mut v).unwrap(), (8, false));
245        assert_eq!(v, b"12345678");
246    }
247
248    // This tests against mikedilger/formdata github issue #11
249    #[test]
250    fn stream_until_token_double_straddle_test() {
251        let cursor = Cursor::new(&b"12345IAMALARGETOKEN4567"[..]);
252        let mut buf = BufReader::with_capacity(8, cursor);
253        let mut v: Vec<u8> = Vec::new();
254        assert_eq!(buf.stream_until_token(b"IAMALARGETOKEN", &mut v).unwrap(), (5, true));
255        assert_eq!(v, b"12345");
256        v.truncate(0);
257        assert_eq!(buf.stream_until_token(b"IAMALARGETOKEN", &mut v).unwrap(), (4, false));
258        assert_eq!(v, b"4567");
259    }
260
261    // This tests against mikedilger/formdata github issue #12
262    #[test]
263    fn stream_until_token_multiple_prefix_test() {
264        let cursor = Cursor::new(&b"12barbarian4567"[..]);
265        let mut buf = BufReader::with_capacity(8, cursor);
266        let mut v: Vec<u8> = Vec::new();
267        assert_eq!(buf.stream_until_token(b"barbarian", &mut v).unwrap(), (2, true));
268        assert_eq!(v, b"12");
269
270        let cursor = Cursor::new(&b"12barbarbarian7812"[..]);
271        let mut buf = BufReader::with_capacity(8, cursor);
272        let mut v: Vec<u8> = Vec::new();
273        assert_eq!(buf.stream_until_token(b"barbarian", &mut v).unwrap(), (5, true));
274        assert_eq!(v, b"12bar");
275    }
276
277    #[test]
278    fn stream_until_token_complex_test() {
279        //                                             <-TOKEN->
280        //                          <--><--><--><--><--><--><--><-->
281        let cursor = Cursor::new(&b"A SANTA BARBARA BARBARBARIANEND"[..]);
282        let mut buf = BufReader::with_capacity(4, cursor);
283        let mut v: Vec<u8> = Vec::new();
284        assert_eq!(buf.stream_until_token(b"BARBARIAN", &mut v).unwrap(), (19, true));
285        assert_eq!(v, b"A SANTA BARBARA BAR");
286
287        /*            prefix lens:   out:
288        "A SA"        []             "A SA"
289        "NTA "        []             "NTA "
290        "BARB"        [4, 1]         ""
291        "ARA "        []             "BARB"  "ARA "
292        "BARB"        [4, 1]         ""
293        "ARBA"        [5, 2]         "BAR"
294        "RIAN"
295         */
296
297        v.truncate(0);
298        assert_eq!(buf.stream_until_token(b"BARBARIAN", &mut v).unwrap(), (3, false));
299        assert_eq!(v, b"END");
300    }
301}