escaped_delimiter/
lib.rs

1//! `escaped-delimiter` provides an iterator of a delimited slice, considering an escape character.
2//!
3//! See the examples below.
4//!
5//! # Examples
6//!
7//! ```
8//! use escaped_delimiter::iter;
9//!
10//! // Without escape characters
11//! let s = b"The quick brown fox";
12//! let s_vec: Vec<_> = iter(s, b' ', b'\\').collect();
13//! assert_eq!(s_vec, &[&b"The"[..], &b"quick"[..], &b"brown"[..], &b"fox"[..]]);
14//!
15//! // Reverse it (`DoubleEndedIterator`)
16//! let s = b"The quick brown fox";
17//! let s_vec: Vec<_> = iter(s, b' ', b'\\').rev().collect();
18//! assert_eq!(s_vec, &[&b"fox"[..], &b"brown"[..], &b"quick"[..], &b"The"[..]]);
19//!
20//! // With escape characters
21//! let s = b"a\\ b\\\\ c\\\\\\ d\\\\\\\\ e";
22//! let s_vec: Vec<_> = iter(s, b' ', b'\\').collect();
23//! assert_eq!(s_vec, &[&b"a\\ b\\\\"[..], &b"c\\\\\\ d\\\\\\\\"[..], &b"e"[..]]);
24//! ```
25
26use std::num::NonZeroUsize;
27
28pub fn iter(slice: &[u8], delim: u8, escape: u8) -> Iter<'_> {
29    Iter::from_slice(slice, delim, escape)
30}
31
32#[derive(Debug, Clone, Copy, PartialEq, Eq)]
33pub struct Iter<'a> {
34    delim: u8,
35    escape: u8,
36    inner: &'a [u8],
37}
38
39impl<'a> Iterator for Iter<'a> {
40    type Item = &'a [u8];
41
42    fn next(&mut self) -> Option<Self::Item> {
43        let pos = self.find_bow();
44        self.inner = &self.inner[pos..];
45
46        let pos = self.find_eow()?.get();
47        let inner = &self.inner[..pos];
48        self.inner = &self.inner[pos..];
49
50        Some(inner)
51    }
52}
53
54impl<'a> DoubleEndedIterator for Iter<'a> {
55    fn next_back(&mut self) -> Option<Self::Item> {
56        let pos = self.rfind_eow()?.get();
57        self.inner = &self.inner[..pos];
58
59        let pos = self.rfind_bow();
60        let inner = &self.inner[pos..];
61        self.inner = &self.inner[..pos];
62
63        Some(inner)
64    }
65}
66
67impl Iter<'_> {
68    #[inline]
69    fn len(self) -> usize {
70        self.inner.len()
71    }
72
73    #[inline]
74    fn is_empty(self) -> bool {
75        self.inner.is_empty()
76    }
77
78    fn enumerate(&self) -> impl DoubleEndedIterator<Item = (usize, u8)> + '_ {
79        self.inner.iter().copied().enumerate()
80    }
81
82    #[inline]
83    fn renumerate(&self) -> impl Iterator<Item = (usize, u8)> + '_ {
84        self.enumerate().rev()
85    }
86
87    fn find_bow(&self) -> usize {
88        let mut it = self.enumerate().skip_while(|&(_, c)| c == self.delim);
89
90        if let Some((i, _)) = it.next() {
91            i
92        } else {
93            self.len()
94        }
95    }
96
97    fn find_eow(&self) -> Option<NonZeroUsize> {
98        if self.is_empty() {
99            return None;
100        }
101
102        let mut prev_char = 0u8;
103        for (i, c) in self.enumerate() {
104            if c == self.delim && prev_char != self.escape {
105                // SAFETY: self.inner[0] != DELIM
106                return unsafe { Some(NonZeroUsize::new_unchecked(i)) };
107            }
108
109            prev_char = if c == self.escape && prev_char == self.escape {
110                0
111            } else {
112                c
113            };
114        }
115
116        // SAFETY: self.inner.len() > 0
117        unsafe { Some(NonZeroUsize::new_unchecked(self.len())) }
118    }
119
120    fn rfind_eow(&self) -> Option<NonZeroUsize> {
121        let mut it = self.renumerate().skip_while(|&(_, c)| c == self.delim);
122
123        if let Some((i, c)) = it.next() {
124            if c == self.escape {
125                // [^ESCAPE] ESCAPE ESCAPE* ESCAPE \t*
126                //             ^              ^
127                //             j              i
128                let last = match it.filter(|&(_, c)| c == self.escape).last() {
129                    Some((j, _)) if !iso_parity(i, j) => i + 1,
130                    _ => i + 2,
131                };
132                // SAFETY: i + 1 > 0
133                unsafe { Some(NonZeroUsize::new_unchecked(last)) }
134            } else {
135                // SAFETY: i + 1 > 0
136                unsafe { Some(NonZeroUsize::new_unchecked(i + 1)) }
137            }
138        } else {
139            None
140        }
141    }
142
143    fn rfind_bow(&self) -> usize {
144        let mut delim = 0;
145        let mut delim_found = false;
146        let mut broken = false;
147
148        for (i, c) in self.renumerate() {
149            if delim_found && c != self.escape {
150                // [^ESCAPE] ESCAPE* DELIM
151                //     ^               ^
152                //     i             delim
153                if iso_parity(i, delim) {
154                    // # of ESCAPE's is odd
155                    delim_found = false;
156                } else {
157                    // # of ESCAPE's is even
158                    broken = true;
159                    break;
160                }
161            }
162            if c == self.delim {
163                delim_found = true;
164                delim = i;
165            }
166        }
167
168        if delim_found && (broken || iso_parity(delim, 0)) {
169            delim + 1
170        } else {
171            0
172        }
173    }
174}
175
176#[inline]
177fn iso_parity(i: usize, j: usize) -> bool {
178    (i & 1) == (j & 1)
179}
180
181impl<'a> Iter<'a> {
182    /// Returns the rest of the inner slice.
183    ///
184    /// ```
185    /// use escaped_delimiter::iter;
186    ///
187    /// let s = b"abc";
188    /// let it = iter(s, b' ', b'\\');
189    /// assert_eq!(it.as_slice(), &b"abc"[..]);
190    ///
191    /// let s = b"a b c d";
192    /// let mut it = iter(s, b' ', b'\\');
193    /// it.next(); // consumes b'a'
194    /// it.next_back(); // consumes b'd'
195    /// assert_eq!(it.as_slice(), &b" b c "[..]);
196    /// ```
197    #[inline]
198    pub fn as_slice(&self) -> &'a [u8] {
199        self.inner
200    }
201
202    /// See the examples at the top of this doc page.
203    #[inline]
204    pub fn from_slice(inner: &'a [u8], delim: u8, escape: u8) -> Self {
205        Self {
206            inner,
207            delim,
208            escape,
209        }
210    }
211}
212
213#[cfg(test)]
214mod test {
215    use super::*;
216
217    #[test]
218    fn test_words() {
219        let s = b"";
220        let mut words = Iter::from_slice(s, b'X', b'Y');
221        assert_eq!(words.next(), None);
222
223        let s = b"abc";
224        let mut words = Iter::from_slice(s, b'X', b'Y');
225        assert_eq!(words.next(), Some(&b"abc"[..]));
226        assert_eq!(words.next(), None);
227
228        let s = b"abcX";
229        let mut words = Iter::from_slice(s, b'X', b'Y');
230        assert_eq!(words.next(), Some(&b"abc"[..]));
231        assert_eq!(words.next(), None);
232
233        let s = b"abcXdefXXhX jklm";
234        let mut words = Iter::from_slice(s, b'X', b'Y');
235        assert_eq!(words.next(), Some(&b"abc"[..]));
236        assert_eq!(words.next(), Some(&b"def"[..]));
237        assert_eq!(words.next(), Some(&b"h"[..]));
238        assert_eq!(words.next(), Some(&b" jklm"[..]));
239        assert_eq!(words.next(), None);
240
241        let s = b"abXYXcdeXYfYXXYYYXgYYX";
242        let mut words = Iter::from_slice(s, b'X', b'Y');
243        assert_eq!(words.next(), Some(&b"ab"[..]));
244        assert_eq!(words.next(), Some(&b"YXcde"[..]));
245        assert_eq!(words.next(), Some(&b"YfYX"[..]));
246        assert_eq!(words.next(), Some(&b"YYYXgYY"[..]));
247        assert_eq!(words.next(), None);
248    }
249
250    #[test]
251    fn test_words_rev() {
252        let s = b"";
253        let mut words = Iter::from_slice(s, b'X', b'Y').rev();
254        assert_eq!(words.next(), None);
255
256        let s = b"abcXdefXXhX jklm";
257        let mut words = Iter::from_slice(s, b'X', b'Y').rev();
258        assert_eq!(words.next(), Some(&b" jklm"[..]));
259        assert_eq!(words.next(), Some(&b"h"[..]));
260        assert_eq!(words.next(), Some(&b"def"[..]));
261        assert_eq!(words.next(), Some(&b"abc"[..]));
262        assert_eq!(words.next(), None);
263
264        let s = b"XXabXYXcdeXYfYXXYYYXgYYX";
265        let mut words = Iter::from_slice(s, b'X', b'Y').rev();
266        assert_eq!(words.next(), Some(&b"YYYXgYY"[..]));
267        assert_eq!(words.next(), Some(&b"YfYX"[..]));
268        assert_eq!(words.next(), Some(&b"YXcde"[..]));
269        assert_eq!(words.next(), Some(&b"ab"[..]));
270        assert_eq!(words.next(), None);
271
272        let s = b"Xa";
273        let mut words = Iter::from_slice(s, b'X', b'Y').rev();
274        assert_eq!(words.next(), Some(&b"a"[..]));
275        assert_eq!(words.next(), None);
276
277        let s = b"YXa";
278        let mut words = Iter::from_slice(s, b'X', b'Y').rev();
279        assert_eq!(words.next(), Some(&b"YXa"[..]));
280        assert_eq!(words.next(), None);
281
282        let s = b"YYXa";
283        let mut words = Iter::from_slice(s, b'X', b'Y').rev();
284        assert_eq!(words.next(), Some(&b"a"[..]));
285        assert_eq!(words.next(), Some(&b"YY"[..]));
286        assert_eq!(words.next(), None);
287    }
288
289    #[test]
290    fn test_words_mixed() {
291        let s = b"abcXdefXXhX jklm";
292        let mut words = Iter::from_slice(s, b'X', b'Y');
293        assert_eq!(words.next(), Some(&b"abc"[..]));
294        assert_eq!(words.next_back(), Some(&b" jklm"[..]));
295        assert_eq!(words.next(), Some(&b"def"[..]));
296        assert_eq!(words.next_back(), Some(&b"h"[..]));
297        assert_eq!(words.next(), None);
298        assert_eq!(words.next_back(), None);
299    }
300}