regex_split/lib.rs
1//! The trait `RegexSplit` adds a pair of split options to `Regex`: `split_inclusive` and
2//! `split_inclusive_left`. `split_inclusive` works similarly to the method of the same name in
3//! std, where `split_inclusive_left` includes the delimiter at the front of each substring instead
4//! of at the back.
5//!
6//! ## `split_inclusive`
7//!
8//! It's possible to roll your own `lines()` function. Why? I don't know, but you can.
9//!
10//! ```rust
11//! # use regex::Regex;
12//! # use crate::regex_split::RegexSplit;
13//! # fn main() {
14//! let re = Regex::new("\r?\n").unwrap();
15//! let text = "This is just\na set of lines\r\nwith different newlines.";
16//! let v: Vec<&str> = re.split_inclusive(text).collect();
17//! assert_eq!(v, [
18//! "This is just\n",
19//! "a set of lines\r\n",
20//! "with different newlines.",
21//! ]);
22//! # }
23//! ```
24//!
25//! ## `split_inclusive_left`
26//!
27//! This is useful if your delimiter includes some context that is associated with the substring
28//! to the *right.* Is that useful? No, not generally--but there's really no substitute if you
29//! need it.
30//!
31//! ```rust
32//! # use regex::Regex;
33//! # use crate::regex_split::RegexSplit;
34//! # fn main() {
35//! let re = Regex::new("(?m)^-").unwrap();
36//! let text = "List of fruits:\n-apple\n-pear\n-banana";
37//! let v: Vec<&str> = re.split_inclusive_left(text).collect();
38//! assert_eq!(v, [
39//! "List of fruits:\n",
40//! "-apple\n",
41//! "-pear\n",
42//! "-banana",
43//! ]);
44//! # }
45//! ```
46//!
47//! Use `regex_split::bytes::RegexSplit` for `regex::bytes::Regex`.
48
49pub mod bytes;
50
51use std::iter::FusedIterator;
52
53use regex::{Matches, Regex};
54
55pub trait RegexSplit {
56 fn split_inclusive<'r, 't>(&'r self, text: &'t str) -> SplitInclusive<'r, 't>;
57 fn split_inclusive_left<'r, 't>(&'r self, text: &'t str) -> SplitInclusiveLeft<'r, 't>;
58}
59
60/// Yields all substrings delimited by a regular expression match inclusive of
61/// the match.
62///
63/// `'r` is the lifetime of the compiled regular expression and `'t` is the
64/// lifetime of the byte string being split.
65#[derive(Debug)]
66pub struct SplitInclusive<'r, 't> {
67 finder: Matches<'r, 't>,
68 last: usize,
69
70 // The internals of finder are private, meaning we need to keep a reference
71 // to the text for ourselves. This differs from the previous
72 // implementation.
73 text: &'t str,
74}
75
76impl<'r, 't> Iterator for SplitInclusive<'r, 't> {
77 type Item = &'t str;
78
79 fn next(&mut self) -> Option<Self::Item> {
80 match self.finder.next() {
81 None => {
82 if self.last > self.text.len() {
83 None
84 } else {
85 let s = &self.text[self.last..];
86 self.last = self.text.len() + 1; // Next call will return None
87 Some(s)
88 }
89 }
90 Some(m) => {
91 let matched = &self.text[self.last..m.end()];
92 self.last = m.end();
93 Some(matched)
94 }
95 }
96 }
97}
98
99impl<'r, 't> FusedIterator for SplitInclusive<'r, 't> {}
100
101/// Yields all substrings delimited by a regular expression match inclusive of
102/// the match.
103///
104/// `'r` is the lifetime of the compiled regular expression and `'t` is the
105/// lifetime of the byte string being split.
106#[derive(Debug)]
107pub struct SplitInclusiveLeft<'r, 't> {
108 finder: Matches<'r, 't>,
109 last: usize,
110
111 // The internals of finder are private, meaning we need to keep a reference
112 // to the text for ourselves. This differs from the previous
113 // implementation.
114 text: &'t str,
115}
116
117impl<'r, 't> Iterator for SplitInclusiveLeft<'r, 't> {
118 type Item = &'t str;
119
120 fn next(&mut self) -> Option<Self::Item> {
121 match self.finder.next() {
122 None => {
123 if self.last > self.text.len() {
124 None
125 } else {
126 let s = &self.text[self.last..];
127 self.last = self.text.len() + 1; // Next call will return None
128 Some(s)
129 }
130 }
131 Some(m) => {
132 let matched = &self.text[self.last..m.start()];
133 self.last = m.start();
134 Some(matched)
135 }
136 }
137 }
138}
139
140impl<'r, 't> FusedIterator for SplitInclusiveLeft<'r, 't> {}
141
142impl RegexSplit for Regex {
143 /// Returns an iterator of substrings of `text` separated by a match of the
144 /// regular expression. Differs from the iterator produced by split in that
145 /// split_inclusive leaves the matched part as the terminator of the
146 /// substring.
147 ///
148 /// This method will *not* copy the text given.
149 ///
150 /// # Example
151 ///
152 /// ```rust
153 /// # use regex::Regex;
154 /// # use crate::regex_split::RegexSplit;
155 /// # fn main() {
156 /// let re = Regex::new(r"\r?\n").unwrap();
157 /// let text = "Mary had a little lamb\nlittle lamb\r\nlittle lamb.";
158 /// let v: Vec<&str> = re.split_inclusive(text).collect();
159 /// assert_eq!(v, [
160 /// "Mary had a little lamb\n",
161 /// "little lamb\r\n",
162 /// "little lamb.",
163 /// ]);
164 /// # }
165 /// ```
166 fn split_inclusive<'r, 't>(&'r self, text: &'t str) -> SplitInclusive<'r, 't> {
167 SplitInclusive {
168 finder: self.find_iter(text),
169 last: 0,
170 text,
171 }
172 }
173
174 /// Returns an iterator of substrings of `text` separated by a match of the
175 /// regular expression. Differs from the iterator produced by split in that
176 /// split_inclusive leaves the matched part as the terminator of the
177 /// substring.
178 ///
179 /// This method will *not* copy the text given.
180 ///
181 /// # Example
182 ///
183 /// ```rust
184 /// # use regex::Regex;
185 /// # use crate::regex_split::RegexSplit;
186 /// # fn main() {
187 /// let re = Regex::new(r"\r?\n").unwrap();
188 /// let text = "Mary had a little lamb\nlittle lamb\r\nlittle lamb.";
189 /// let v: Vec<&str> = re.split_inclusive_left(text).collect();
190 /// assert_eq!(v, [
191 /// "Mary had a little lamb",
192 /// "\nlittle lamb",
193 /// "\r\nlittle lamb.",
194 /// ]);
195 /// # }
196 /// ```
197 fn split_inclusive_left<'r, 't>(&'r self, text: &'t str) -> SplitInclusiveLeft<'r, 't> {
198 SplitInclusiveLeft {
199 finder: self.find_iter(text),
200 last: 0,
201 text,
202 }
203 }
204}