regex_split/
lib.rs

1//! The trait `RegexSplit` adds a pair of split options to `Regex`: `split_inclusive` and
2//! `split_inclusive_left`. `split_inclusive` works similarly to the method of the same name in
3//! std, where `split_inclusive_left` includes the delimiter at the front of each substring instead
4//! of at the back.
5//! 
6//! ## `split_inclusive`
7//! 
8//! It's possible to roll your own `lines()` function. Why? I don't know, but you can.
9//! 
10//! ```rust
11//! # use regex::Regex;
12//! # use crate::regex_split::RegexSplit;
13//! # fn main() {
14//! let re = Regex::new("\r?\n").unwrap();
15//! let text = "This is just\na set of lines\r\nwith different newlines.";
16//! let v: Vec<&str> = re.split_inclusive(text).collect();
17//! assert_eq!(v, [
18//!     "This is just\n",
19//!     "a set of lines\r\n",
20//!     "with different newlines.",
21//! ]);
22//! # }
23//! ```
24//! 
25//! ## `split_inclusive_left`
26//! 
27//! This is useful if your delimiter includes some context that is associated with the substring
28//! to the *right.* Is that useful? No, not generally--but there's really no substitute if you
29//! need it.
30//! 
31//! ```rust
32//! # use regex::Regex;
33//! # use crate::regex_split::RegexSplit;
34//! # fn main() {
35//! let re = Regex::new("(?m)^-").unwrap();
36//! let text = "List of fruits:\n-apple\n-pear\n-banana";
37//! let v: Vec<&str> = re.split_inclusive_left(text).collect();
38//! assert_eq!(v, [
39//!     "List of fruits:\n",
40//!     "-apple\n",
41//!     "-pear\n",
42//!     "-banana",
43//! ]);
44//! # }
45//! ```
46//!  
47//! Use `regex_split::bytes::RegexSplit` for `regex::bytes::Regex`.
48
49pub mod bytes;
50
51use std::iter::FusedIterator;
52
53use regex::{Matches, Regex};
54
55pub trait RegexSplit {
56    fn split_inclusive<'r, 't>(&'r self, text: &'t str) -> SplitInclusive<'r, 't>;
57    fn split_inclusive_left<'r, 't>(&'r self, text: &'t str) -> SplitInclusiveLeft<'r, 't>;
58}
59
60/// Yields all substrings delimited by a regular expression match inclusive of
61/// the match.
62///
63/// `'r` is the lifetime of the compiled regular expression and `'t` is the
64/// lifetime of the byte string being split.
65#[derive(Debug)]
66pub struct SplitInclusive<'r, 't> {
67    finder: Matches<'r, 't>,
68    last: usize,
69
70    // The internals of finder are private, meaning we need to keep a reference
71    // to the text for ourselves. This differs from the previous
72    // implementation.
73    text: &'t str,
74}
75
76impl<'r, 't> Iterator for SplitInclusive<'r, 't> {
77    type Item = &'t str;
78
79    fn next(&mut self) -> Option<Self::Item> {
80        match self.finder.next() {
81            None => {
82                if self.last > self.text.len() {
83                    None
84                } else {
85                    let s = &self.text[self.last..];
86                    self.last = self.text.len() + 1; // Next call will return None
87                    Some(s)
88                }
89            }
90            Some(m) => {
91                let matched = &self.text[self.last..m.end()];
92                self.last = m.end();
93                Some(matched)
94            }
95        }
96    }
97}
98
99impl<'r, 't> FusedIterator for SplitInclusive<'r, 't> {}
100
101/// Yields all substrings delimited by a regular expression match inclusive of
102/// the match.
103///
104/// `'r` is the lifetime of the compiled regular expression and `'t` is the
105/// lifetime of the byte string being split.
106#[derive(Debug)]
107pub struct SplitInclusiveLeft<'r, 't> {
108    finder: Matches<'r, 't>,
109    last: usize,
110
111    // The internals of finder are private, meaning we need to keep a reference
112    // to the text for ourselves. This differs from the previous
113    // implementation.
114    text: &'t str,
115}
116
117impl<'r, 't> Iterator for SplitInclusiveLeft<'r, 't> {
118    type Item = &'t str;
119
120    fn next(&mut self) -> Option<Self::Item> {
121        match self.finder.next() {
122            None => {
123                if self.last > self.text.len() {
124                    None
125                } else {
126                    let s = &self.text[self.last..];
127                    self.last = self.text.len() + 1; // Next call will return None
128                    Some(s)
129                }
130            }
131            Some(m) => {
132                let matched = &self.text[self.last..m.start()];
133                self.last = m.start();
134                Some(matched)
135            }
136        }
137    }
138}
139
140impl<'r, 't> FusedIterator for SplitInclusiveLeft<'r, 't> {}
141
142impl RegexSplit for Regex {
143    /// Returns an iterator of substrings of `text` separated by a match of the
144    /// regular expression. Differs from the iterator produced by split in that
145    /// split_inclusive leaves the matched part as the terminator of the
146    /// substring.
147    ///
148    /// This method will *not* copy the text given.
149    ///
150    /// # Example
151    ///
152    /// ```rust
153    /// # use regex::Regex;
154    /// # use crate::regex_split::RegexSplit;
155    /// # fn main() {
156    /// let re = Regex::new(r"\r?\n").unwrap();
157    /// let text = "Mary had a little lamb\nlittle lamb\r\nlittle lamb.";
158    /// let v: Vec<&str> = re.split_inclusive(text).collect();
159    /// assert_eq!(v, [
160    ///     "Mary had a little lamb\n",
161    ///     "little lamb\r\n",
162    ///     "little lamb.",
163    /// ]);
164    /// # }
165    /// ```
166    fn split_inclusive<'r, 't>(&'r self, text: &'t str) -> SplitInclusive<'r, 't> {
167        SplitInclusive {
168            finder: self.find_iter(text),
169            last: 0,
170            text,
171        }
172    }
173
174    /// Returns an iterator of substrings of `text` separated by a match of the
175    /// regular expression. Differs from the iterator produced by split in that
176    /// split_inclusive leaves the matched part as the terminator of the
177    /// substring.
178    ///
179    /// This method will *not* copy the text given.
180    ///
181    /// # Example
182    ///
183    /// ```rust
184    /// # use regex::Regex;
185    /// # use crate::regex_split::RegexSplit;
186    /// # fn main() {
187    /// let re = Regex::new(r"\r?\n").unwrap();
188    /// let text = "Mary had a little lamb\nlittle lamb\r\nlittle lamb.";
189    /// let v: Vec<&str> = re.split_inclusive_left(text).collect();
190    /// assert_eq!(v, [
191    ///     "Mary had a little lamb",
192    ///     "\nlittle lamb",
193    ///     "\r\nlittle lamb.",
194    /// ]);
195    /// # }
196    /// ```
197    fn split_inclusive_left<'r, 't>(&'r self, text: &'t str) -> SplitInclusiveLeft<'r, 't> {
198        SplitInclusiveLeft {
199            finder: self.find_iter(text),
200            last: 0,
201            text,
202        }
203    }
204}