regex_split/bytes.rs
1use std::iter::FusedIterator;
2
3use regex::bytes::{Matches, Regex};
4
5pub trait RegexSplit {
6 fn split_inclusive<'r, 't>(&'r self, text: &'t [u8]) -> SplitInclusive<'r, 't>;
7 fn split_inclusive_left<'r, 't>(&'r self, text: &'t [u8]) -> SplitInclusiveLeft<'r, 't>;
8}
9
10/// Yields all substrings delimited by a regular expression match inclusive of
11/// the match.
12///
13/// `'r` is the lifetime of the compiled regular expression and `'t` is the
14/// lifetime of the byte string being split.
15#[derive(Debug)]
16pub struct SplitInclusive<'r, 't> {
17 finder: Matches<'r, 't>,
18 last: usize,
19
20 // The internals of finder are private, meaning we need to keep a reference
21 // to the text for ourselves. This differs from the previous
22 // implementation.
23 text: &'t [u8],
24}
25
26impl<'r, 't> Iterator for SplitInclusive<'r, 't> {
27 type Item = &'t [u8];
28
29 fn next(&mut self) -> Option<Self::Item> {
30 match self.finder.next() {
31 None => {
32 if self.last > self.text.len() {
33 None
34 } else {
35 let s = &self.text[self.last..];
36 self.last = self.text.len() + 1; // Next call will return None
37 Some(s)
38 }
39 }
40 Some(m) => {
41 let matched = &self.text[self.last..m.end()];
42 self.last = m.end();
43 Some(matched)
44 }
45 }
46 }
47}
48
49impl<'r, 't> FusedIterator for SplitInclusive<'r, 't> {}
50
51/// Yields all substrings delimited by a regular expression match inclusive of
52/// the match.
53///
54/// `'r` is the lifetime of the compiled regular expression and `'t` is the
55/// lifetime of the byte string being split.
56#[derive(Debug)]
57pub struct SplitInclusiveLeft<'r, 't> {
58 finder: Matches<'r, 't>,
59 last: usize,
60
61 // The internals of finder are private, meaning we need to keep a reference
62 // to the text for ourselves. This differs from the previous
63 // implementation.
64 text: &'t [u8],
65}
66
67impl<'r, 't> Iterator for SplitInclusiveLeft<'r, 't> {
68 type Item = &'t [u8];
69
70 fn next(&mut self) -> Option<Self::Item> {
71 match self.finder.next() {
72 None => {
73 if self.last > self.text.len() {
74 None
75 } else {
76 let s = &self.text[self.last..];
77 self.last = self.text.len() + 1; // Next call will return None
78 Some(s)
79 }
80 }
81 Some(m) => {
82 let matched = &self.text[self.last..m.start()];
83 self.last = m.start();
84 Some(matched)
85 }
86 }
87 }
88}
89
90impl<'r, 't> FusedIterator for SplitInclusiveLeft<'r, 't> {}
91
92impl RegexSplit for Regex {
93 /// Returns an iterator of substrings of `text` separated by a match of the
94 /// regular expression. Differs from the iterator produced by split in that
95 /// split_inclusive leaves the matched part as the terminator of the
96 /// substring.
97 ///
98 /// This method will *not* copy the text given.
99 ///
100 /// # Example
101 ///
102 /// ```rust
103 /// # use regex::bytes::Regex;
104 /// # use crate::regex_split::bytes::RegexSplit;
105 /// # fn main() {
106 /// let re = Regex::new(r"\r?\n").unwrap();
107 /// let text = b"Mary had a little lamb\nlittle lamb\r\nlittle lamb.";
108 /// let v: Vec<&[u8]> = re.split_inclusive(text).collect();
109 /// assert_eq!(v, [
110 /// &b"Mary had a little lamb\n"[..],
111 /// &b"little lamb\r\n"[..],
112 /// &b"little lamb."[..]
113 /// ]);
114 /// # }
115 /// ```
116 fn split_inclusive<'r, 't>(&'r self, text: &'t [u8]) -> SplitInclusive<'r, 't> {
117 SplitInclusive {
118 finder: self.find_iter(text),
119 last: 0,
120 text,
121 }
122 }
123
124 /// Returns an iterator of substrings of `text` separated by a match of the
125 /// regular expression. Differs from the iterator produced by split in that
126 /// split_inclusive leaves the matched part as the terminator of the
127 /// substring.
128 ///
129 /// This method will *not* copy the text given.
130 ///
131 /// # Example
132 ///
133 /// ```rust
134 /// # use regex::bytes::Regex;
135 /// # use crate::regex_split::bytes::RegexSplit;
136 /// # fn main() {
137 /// let re = Regex::new(r"\r?\n").unwrap();
138 /// let text = b"Mary had a little lamb\nlittle lamb\r\nlittle lamb.";
139 /// let v: Vec<&[u8]> = re.split_inclusive_left(text).collect();
140 /// assert_eq!(v, [
141 /// &b"Mary had a little lamb"[..],
142 /// &b"\nlittle lamb"[..],
143 /// &b"\r\nlittle lamb."[..]
144 /// ]);
145 /// # }
146 /// ```
147 fn split_inclusive_left<'r, 't>(&'r self, text: &'t [u8]) -> SplitInclusiveLeft<'r, 't> {
148 SplitInclusiveLeft {
149 finder: self.find_iter(text),
150 last: 0,
151 text,
152 }
153 }
154}