regex_split/
bytes.rs

1use std::iter::FusedIterator;
2
3use regex::bytes::{Matches, Regex};
4
5pub trait RegexSplit {
6    fn split_inclusive<'r, 't>(&'r self, text: &'t [u8]) -> SplitInclusive<'r, 't>;
7    fn split_inclusive_left<'r, 't>(&'r self, text: &'t [u8]) -> SplitInclusiveLeft<'r, 't>;
8}
9
10/// Yields all substrings delimited by a regular expression match inclusive of
11/// the match.
12///
13/// `'r` is the lifetime of the compiled regular expression and `'t` is the
14/// lifetime of the byte string being split.
15#[derive(Debug)]
16pub struct SplitInclusive<'r, 't> {
17    finder: Matches<'r, 't>,
18    last: usize,
19
20    // The internals of finder are private, meaning we need to keep a reference
21    // to the text for ourselves. This differs from the previous
22    // implementation.
23    text: &'t [u8],
24}
25
26impl<'r, 't> Iterator for SplitInclusive<'r, 't> {
27    type Item = &'t [u8];
28
29    fn next(&mut self) -> Option<Self::Item> {
30        match self.finder.next() {
31            None => {
32                if self.last > self.text.len() {
33                    None
34                } else {
35                    let s = &self.text[self.last..];
36                    self.last = self.text.len() + 1; // Next call will return None
37                    Some(s)
38                }
39            }
40            Some(m) => {
41                let matched = &self.text[self.last..m.end()];
42                self.last = m.end();
43                Some(matched)
44            }
45        }
46    }
47}
48
49impl<'r, 't> FusedIterator for SplitInclusive<'r, 't> {}
50
51/// Yields all substrings delimited by a regular expression match inclusive of
52/// the match.
53///
54/// `'r` is the lifetime of the compiled regular expression and `'t` is the
55/// lifetime of the byte string being split.
56#[derive(Debug)]
57pub struct SplitInclusiveLeft<'r, 't> {
58    finder: Matches<'r, 't>,
59    last: usize,
60
61    // The internals of finder are private, meaning we need to keep a reference
62    // to the text for ourselves. This differs from the previous
63    // implementation.
64    text: &'t [u8],
65}
66
67impl<'r, 't> Iterator for SplitInclusiveLeft<'r, 't> {
68    type Item = &'t [u8];
69
70    fn next(&mut self) -> Option<Self::Item> {
71        match self.finder.next() {
72            None => {
73                if self.last > self.text.len() {
74                    None
75                } else {
76                    let s = &self.text[self.last..];
77                    self.last = self.text.len() + 1; // Next call will return None
78                    Some(s)
79                }
80            }
81            Some(m) => {
82                let matched = &self.text[self.last..m.start()];
83                self.last = m.start();
84                Some(matched)
85            }
86        }
87    }
88}
89
90impl<'r, 't> FusedIterator for SplitInclusiveLeft<'r, 't> {}
91
92impl RegexSplit for Regex {
93    /// Returns an iterator of substrings of `text` separated by a match of the
94    /// regular expression. Differs from the iterator produced by split in that
95    /// split_inclusive leaves the matched part as the terminator of the
96    /// substring.
97    ///
98    /// This method will *not* copy the text given.
99    ///
100    /// # Example
101    ///
102    /// ```rust
103    /// # use regex::bytes::Regex;
104    /// # use crate::regex_split::bytes::RegexSplit;
105    /// # fn main() {
106    /// let re = Regex::new(r"\r?\n").unwrap();
107    /// let text = b"Mary had a little lamb\nlittle lamb\r\nlittle lamb.";
108    /// let v: Vec<&[u8]> = re.split_inclusive(text).collect();
109    /// assert_eq!(v, [
110    ///     &b"Mary had a little lamb\n"[..],
111    ///     &b"little lamb\r\n"[..],
112    ///     &b"little lamb."[..]
113    /// ]);
114    /// # }
115    /// ```
116    fn split_inclusive<'r, 't>(&'r self, text: &'t [u8]) -> SplitInclusive<'r, 't> {
117        SplitInclusive {
118            finder: self.find_iter(text),
119            last: 0,
120            text,
121        }
122    }
123
124    /// Returns an iterator of substrings of `text` separated by a match of the
125    /// regular expression. Differs from the iterator produced by split in that
126    /// split_inclusive leaves the matched part as the terminator of the
127    /// substring.
128    ///
129    /// This method will *not* copy the text given.
130    ///
131    /// # Example
132    ///
133    /// ```rust
134    /// # use regex::bytes::Regex;
135    /// # use crate::regex_split::bytes::RegexSplit;
136    /// # fn main() {
137    /// let re = Regex::new(r"\r?\n").unwrap();
138    /// let text = b"Mary had a little lamb\nlittle lamb\r\nlittle lamb.";
139    /// let v: Vec<&[u8]> = re.split_inclusive_left(text).collect();
140    /// assert_eq!(v, [
141    ///     &b"Mary had a little lamb"[..],
142    ///     &b"\nlittle lamb"[..],
143    ///     &b"\r\nlittle lamb."[..]
144    /// ]);
145    /// # }
146    /// ```
147    fn split_inclusive_left<'r, 't>(&'r self, text: &'t [u8]) -> SplitInclusiveLeft<'r, 't> {
148        SplitInclusiveLeft {
149            finder: self.find_iter(text),
150            last: 0,
151            text,
152        }
153    }
154}