split_preserve/lib.rs
1//! This module provides an iterator over strings that splits on whitespace
2//! but doesn't throw the whitespace away, like the version in
3//! [std](https://doc.rust-lang.org/std/primitive.str.html#method.split_whitespace)
4//! does.
5
6//! An iterator over the whitespace and non-whitespace sub-strings of a string, separated by any
7//! amount of whitespace.
8pub struct SplitPreserveWS<'a> {
9 string: Option<Token<'a>>,
10}
11
12/// The token returned by the `SplitPreserveWS` iterator. It can be either
13/// `Whitespace` or `Other`
14#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
15pub enum Token<'a> {
16 Whitespace(&'a str),
17 Other(&'a str),
18}
19
20impl<'a> SplitPreserveWS<'a> {
21 /// Splits a string slice by whitespace.
22 ///
23 /// The iterator returned will return string slices that are sub-slices of the original string
24 /// slice, annotated as `Whitespace` or `Other` using the `Token` enum.
25 ///
26 /// 'Whitespace' is defined according to the terms of the Unicode Derived Core Property
27 /// `White_Space`.
28 ///
29 /// ```rust
30 /// use split_preserve::{SplitPreserveWS, Token};
31 ///
32 /// assert_eq!(SplitPreserveWS::new("aa ").next(), Some(Token::Other("aa")))
33 /// ```
34 pub fn new(string: &'a str) -> Self {
35 if string.is_empty() {
36 Self { string: None }
37 } else if string.starts_with(char::is_whitespace) {
38 Self {
39 string: Some(Token::Whitespace(string)),
40 }
41 } else {
42 Self {
43 string: Some(Token::Other(string)),
44 }
45 }
46 }
47
48 /// Maps over the `Token::Other` elements of the iterator.
49 ///
50 /// This will allocate a new string for each of the tokens in the iterator
51 ///
52 /// ```rust
53 /// use split_preserve::{SplitPreserveWS, Token};
54 ///
55 /// assert_eq!(
56 /// SplitPreserveWS::new("Line\twith\nweird whitespace")
57 /// .map_words(|f| f.chars().rev().collect::<String>())
58 /// .collect::<String>(),
59 /// "eniL\thtiw\ndriew ecapsetihw"
60 /// )
61 /// ```
62 pub fn map_words<S>(self, mut f: S) -> std::iter::Map<Self, impl FnMut(Token<'a>) -> String>
63 where
64 S: FnMut(&str) -> String,
65 {
66 self.map(move |t: Token<'a>| match t {
67 Token::Other(s) => f(s),
68 Token::Whitespace(s) => s.to_string(),
69 })
70 }
71
72 /// Maps over the `Token::Whitespace` elements of the iterator.
73 ///
74 /// This will allocate a new string for each of the tokens in the iterator
75 ///
76 /// ```rust
77 /// use split_preserve::{SplitPreserveWS, Token};
78 ///
79 /// assert_eq!(
80 /// SplitPreserveWS::new("Line\twith\nweird whitespace")
81 /// .map_whitespace(|_| String::from(" "))
82 /// .collect::<String>(),
83 /// "Line with weird whitespace"
84 /// )
85 /// ```
86 pub fn map_whitespace<S>(
87 self,
88 mut f: S,
89 ) -> std::iter::Map<Self, impl FnMut(Token<'a>) -> String>
90 where
91 S: FnMut(&str) -> String,
92 {
93 self.map(move |t: Token<'a>| match t {
94 Token::Other(s) => s.to_string(),
95 Token::Whitespace(s) => f(s),
96 })
97 }
98}
99
100impl<'a> Iterator for SplitPreserveWS<'a> {
101 type Item = Token<'a>;
102
103 fn next(&mut self) -> Option<Self::Item> {
104 self.string.take().map(|t| match t {
105 Token::Whitespace(s) => {
106 let (token, rest) = match s.find(|c: char| !c.is_whitespace()) {
107 Some(i) => {
108 let (a, b) = s.split_at(i);
109 (a, Some(Token::Other(b)))
110 }
111 None => (s, None),
112 };
113 self.string = rest;
114 Token::Whitespace(token)
115 }
116 Token::Other(s) => {
117 let (token, rest) = match s.find(char::is_whitespace) {
118 Some(i) => {
119 let (a, b) = s.split_at(i);
120 (a, Some(Token::Whitespace(b)))
121 }
122 None => (s, None),
123 };
124 self.string = rest;
125 Token::Other(token)
126 }
127 })
128 }
129}