1use std::io::{BufRead, ErrorKind};
2
3use memchr::memmem::Finder;
4
5pub struct SplitRead<R> {
6 reader: Option<R>,
7 buf: Option<Vec<u8>>,
8 finder: Finder<'static>,
9}
10
11impl<R: BufRead> SplitRead<R> {
12 pub fn new(reader: R, delim: impl AsRef<[u8]>) -> Self {
13 debug_assert!(!delim.as_ref().is_empty(), "delimiter can't be empty");
15 Self {
16 reader: Some(reader),
17 buf: Some(Vec::new()),
18 finder: Finder::new(delim.as_ref()).into_owned(),
19 }
20 }
21}
22
23impl<R: BufRead> Iterator for SplitRead<R> {
24 type Item = Result<Vec<u8>, std::io::Error>;
25
26 fn next(&mut self) -> Option<Self::Item> {
27 let buf = self.buf.as_mut()?;
28 let mut search_start = 0usize;
29
30 loop {
31 if let Some(i) = self.finder.find(&buf[search_start..]) {
32 let needle_idx = search_start + i;
33 let right = buf.split_off(needle_idx + self.finder.needle().len());
34 buf.truncate(needle_idx);
35 let left = std::mem::replace(buf, right);
36 return Some(Ok(left));
37 }
38
39 if let Some(mut r) = self.reader.take() {
40 search_start = buf.len().saturating_sub(self.finder.needle().len() + 1);
41 let available = match r.fill_buf() {
42 Ok(n) => n,
43 Err(ref e) if e.kind() == ErrorKind::Interrupted => continue,
44 Err(e) => return Some(Err(e)),
45 };
46
47 buf.extend_from_slice(available);
48 let used = available.len();
49 r.consume(used);
50 if used != 0 {
51 self.reader = Some(r);
52 }
53 continue;
54 } else {
55 return self.buf.take().map(Ok);
56 }
57 }
58 }
59}
60
61#[cfg(test)]
62mod tests {
63 use super::*;
64 use std::io::{self, Cursor, Read};
65
66 #[test]
67 fn simple() {
68 let s = "foo-bar-baz";
69 let cursor = Cursor::new(String::from(s));
70 let mut split = SplitRead::new(cursor, "-").map(|r| String::from_utf8(r.unwrap()).unwrap());
71
72 assert_eq!(split.next().as_deref(), Some("foo"));
73 assert_eq!(split.next().as_deref(), Some("bar"));
74 assert_eq!(split.next().as_deref(), Some("baz"));
75 assert_eq!(split.next(), None);
76 }
77
78 #[test]
79 fn with_empty_fields() -> Result<(), io::Error> {
80 let s = "\0\0foo\0\0bar\0\0\0\0baz\0\0";
81 let cursor = Cursor::new(String::from(s));
82 let mut split =
83 SplitRead::new(cursor, "\0\0").map(|r| String::from_utf8(r.unwrap()).unwrap());
84
85 assert_eq!(split.next().as_deref(), Some(""));
86 assert_eq!(split.next().as_deref(), Some("foo"));
87 assert_eq!(split.next().as_deref(), Some("bar"));
88 assert_eq!(split.next().as_deref(), Some(""));
89 assert_eq!(split.next().as_deref(), Some("baz"));
90 assert_eq!(split.next().as_deref(), Some(""));
91 assert_eq!(split.next().as_deref(), None);
92
93 Ok(())
94 }
95
96 #[test]
97 fn complex_delimiter() -> Result<(), io::Error> {
98 let s = "<|>foo<|>bar<|><|>baz<|>";
99 let cursor = Cursor::new(String::from(s));
100 let mut split =
101 SplitRead::new(cursor, "<|>").map(|r| String::from_utf8(r.unwrap()).unwrap());
102
103 assert_eq!(split.next().as_deref(), Some(""));
104 assert_eq!(split.next().as_deref(), Some("foo"));
105 assert_eq!(split.next().as_deref(), Some("bar"));
106 assert_eq!(split.next().as_deref(), Some(""));
107 assert_eq!(split.next().as_deref(), Some("baz"));
108 assert_eq!(split.next().as_deref(), Some(""));
109 assert_eq!(split.next().as_deref(), None);
110
111 Ok(())
112 }
113
114 #[test]
115 fn all_empty() -> Result<(), io::Error> {
116 let s = "<><>";
117 let cursor = Cursor::new(String::from(s));
118 let mut split =
119 SplitRead::new(cursor, "<>").map(|r| String::from_utf8(r.unwrap()).unwrap());
120
121 assert_eq!(split.next().as_deref(), Some(""));
122 assert_eq!(split.next().as_deref(), Some(""));
123 assert_eq!(split.next().as_deref(), Some(""));
124 assert_eq!(split.next(), None);
125
126 Ok(())
127 }
128
129 #[should_panic = "delimiter can't be empty"]
130 #[test]
131 fn empty_delimiter() {
132 let s = "abc";
133 let cursor = Cursor::new(String::from(s));
134 let _split = SplitRead::new(cursor, "").map(|e| e.unwrap());
135 }
136
137 #[test]
138 fn delimiter_spread_across_reads() {
139 let reader = Cursor::new("<|>foo<|")
140 .chain(Cursor::new(">bar<|><"))
141 .chain(Cursor::new("|>baz<|>"));
142
143 let mut split =
144 SplitRead::new(reader, "<|>").map(|r| String::from_utf8(r.unwrap()).unwrap());
145
146 assert_eq!(split.next().unwrap(), "");
147 assert_eq!(split.next().unwrap(), "foo");
148 assert_eq!(split.next().unwrap(), "bar");
149 assert_eq!(split.next().unwrap(), "");
150 assert_eq!(split.next().unwrap(), "baz");
151 assert_eq!(split.next().unwrap(), "");
152 assert_eq!(split.next(), None);
153 }
154}