nu_utils/
split_read.rs

1use std::io::{BufRead, ErrorKind};
2
3use memchr::memmem::Finder;
4
5pub struct SplitRead<R> {
6    reader: Option<R>,
7    buf: Option<Vec<u8>>,
8    finder: Finder<'static>,
9}
10
11impl<R: BufRead> SplitRead<R> {
12    pub fn new(reader: R, delim: impl AsRef<[u8]>) -> Self {
13        // empty delimiter results in an infinite stream of empty items
14        debug_assert!(!delim.as_ref().is_empty(), "delimiter can't be empty");
15        Self {
16            reader: Some(reader),
17            buf: Some(Vec::new()),
18            finder: Finder::new(delim.as_ref()).into_owned(),
19        }
20    }
21}
22
23impl<R: BufRead> Iterator for SplitRead<R> {
24    type Item = Result<Vec<u8>, std::io::Error>;
25
26    fn next(&mut self) -> Option<Self::Item> {
27        let buf = self.buf.as_mut()?;
28        let mut search_start = 0usize;
29
30        loop {
31            if let Some(i) = self.finder.find(&buf[search_start..]) {
32                let needle_idx = search_start + i;
33                let right = buf.split_off(needle_idx + self.finder.needle().len());
34                buf.truncate(needle_idx);
35                let left = std::mem::replace(buf, right);
36                return Some(Ok(left));
37            }
38
39            if let Some(mut r) = self.reader.take() {
40                search_start = buf.len().saturating_sub(self.finder.needle().len() + 1);
41                let available = match r.fill_buf() {
42                    Ok(n) => n,
43                    Err(ref e) if e.kind() == ErrorKind::Interrupted => continue,
44                    Err(e) => return Some(Err(e)),
45                };
46
47                buf.extend_from_slice(available);
48                let used = available.len();
49                r.consume(used);
50                if used != 0 {
51                    self.reader = Some(r);
52                }
53                continue;
54            } else {
55                return self.buf.take().map(Ok);
56            }
57        }
58    }
59}
60
61#[cfg(test)]
62mod tests {
63    use super::*;
64    use std::io::{self, Cursor, Read};
65
66    #[test]
67    fn simple() {
68        let s = "foo-bar-baz";
69        let cursor = Cursor::new(String::from(s));
70        let mut split = SplitRead::new(cursor, "-").map(|r| String::from_utf8(r.unwrap()).unwrap());
71
72        assert_eq!(split.next().as_deref(), Some("foo"));
73        assert_eq!(split.next().as_deref(), Some("bar"));
74        assert_eq!(split.next().as_deref(), Some("baz"));
75        assert_eq!(split.next(), None);
76    }
77
78    #[test]
79    fn with_empty_fields() -> Result<(), io::Error> {
80        let s = "\0\0foo\0\0bar\0\0\0\0baz\0\0";
81        let cursor = Cursor::new(String::from(s));
82        let mut split =
83            SplitRead::new(cursor, "\0\0").map(|r| String::from_utf8(r.unwrap()).unwrap());
84
85        assert_eq!(split.next().as_deref(), Some(""));
86        assert_eq!(split.next().as_deref(), Some("foo"));
87        assert_eq!(split.next().as_deref(), Some("bar"));
88        assert_eq!(split.next().as_deref(), Some(""));
89        assert_eq!(split.next().as_deref(), Some("baz"));
90        assert_eq!(split.next().as_deref(), Some(""));
91        assert_eq!(split.next().as_deref(), None);
92
93        Ok(())
94    }
95
96    #[test]
97    fn complex_delimiter() -> Result<(), io::Error> {
98        let s = "<|>foo<|>bar<|><|>baz<|>";
99        let cursor = Cursor::new(String::from(s));
100        let mut split =
101            SplitRead::new(cursor, "<|>").map(|r| String::from_utf8(r.unwrap()).unwrap());
102
103        assert_eq!(split.next().as_deref(), Some(""));
104        assert_eq!(split.next().as_deref(), Some("foo"));
105        assert_eq!(split.next().as_deref(), Some("bar"));
106        assert_eq!(split.next().as_deref(), Some(""));
107        assert_eq!(split.next().as_deref(), Some("baz"));
108        assert_eq!(split.next().as_deref(), Some(""));
109        assert_eq!(split.next().as_deref(), None);
110
111        Ok(())
112    }
113
114    #[test]
115    fn all_empty() -> Result<(), io::Error> {
116        let s = "<><>";
117        let cursor = Cursor::new(String::from(s));
118        let mut split =
119            SplitRead::new(cursor, "<>").map(|r| String::from_utf8(r.unwrap()).unwrap());
120
121        assert_eq!(split.next().as_deref(), Some(""));
122        assert_eq!(split.next().as_deref(), Some(""));
123        assert_eq!(split.next().as_deref(), Some(""));
124        assert_eq!(split.next(), None);
125
126        Ok(())
127    }
128
129    #[should_panic = "delimiter can't be empty"]
130    #[test]
131    fn empty_delimiter() {
132        let s = "abc";
133        let cursor = Cursor::new(String::from(s));
134        let _split = SplitRead::new(cursor, "").map(|e| e.unwrap());
135    }
136
137    #[test]
138    fn delimiter_spread_across_reads() {
139        let reader = Cursor::new("<|>foo<|")
140            .chain(Cursor::new(">bar<|><"))
141            .chain(Cursor::new("|>baz<|>"));
142
143        let mut split =
144            SplitRead::new(reader, "<|>").map(|r| String::from_utf8(r.unwrap()).unwrap());
145
146        assert_eq!(split.next().unwrap(), "");
147        assert_eq!(split.next().unwrap(), "foo");
148        assert_eq!(split.next().unwrap(), "bar");
149        assert_eq!(split.next().unwrap(), "");
150        assert_eq!(split.next().unwrap(), "baz");
151        assert_eq!(split.next().unwrap(), "");
152        assert_eq!(split.next(), None);
153    }
154}