split_preserve/
lib.rs

1//! This module provides an iterator over strings that splits on whitespace
2//! but doesn't throw the whitespace away, like the version in
3//! [std](https://doc.rust-lang.org/std/primitive.str.html#method.split_whitespace)
4//! does.
5
6//! An iterator over the whitespace and non-whitespace sub-strings of a string, separated by any
7//! amount of whitespace.
8pub struct SplitPreserveWS<'a> {
9    string: Option<Token<'a>>,
10}
11
12/// The token returned by the `SplitPreserveWS` iterator. It can be either
13/// `Whitespace` or `Other`
14#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
15pub enum Token<'a> {
16    Whitespace(&'a str),
17    Other(&'a str),
18}
19
20impl<'a> SplitPreserveWS<'a> {
21    /// Splits a string slice by whitespace.
22    ///
23    /// The iterator returned will return string slices that are sub-slices of the original string
24    /// slice, annotated as `Whitespace` or `Other` using the `Token` enum.
25    ///
26    /// 'Whitespace' is defined according to the terms of the Unicode Derived Core Property
27    /// `White_Space`.
28    ///
29    /// ```rust
30    /// use split_preserve::{SplitPreserveWS, Token};
31    ///
32    /// assert_eq!(SplitPreserveWS::new("aa  ").next(), Some(Token::Other("aa")))
33    /// ```
34    pub fn new(string: &'a str) -> Self {
35        if string.is_empty() {
36            Self { string: None }
37        } else if string.starts_with(char::is_whitespace) {
38            Self {
39                string: Some(Token::Whitespace(string)),
40            }
41        } else {
42            Self {
43                string: Some(Token::Other(string)),
44            }
45        }
46    }
47
48    /// Maps over the `Token::Other` elements of the iterator.
49    ///
50    /// This will allocate a new string for each of the tokens in the iterator
51    ///
52    /// ```rust
53    /// use split_preserve::{SplitPreserveWS, Token};
54    ///
55    /// assert_eq!(
56    ///     SplitPreserveWS::new("Line\twith\nweird whitespace")
57    ///         .map_words(|f| f.chars().rev().collect::<String>())
58    ///         .collect::<String>(),
59    ///     "eniL\thtiw\ndriew ecapsetihw"
60    /// )
61    /// ```
62    pub fn map_words<S>(self, mut f: S) -> std::iter::Map<Self, impl FnMut(Token<'a>) -> String>
63    where
64        S: FnMut(&str) -> String,
65    {
66        self.map(move |t: Token<'a>| match t {
67            Token::Other(s) => f(s),
68            Token::Whitespace(s) => s.to_string(),
69        })
70    }
71
72    /// Maps over the `Token::Whitespace` elements of the iterator.
73    ///
74    /// This will allocate a new string for each of the tokens in the iterator
75    ///
76    /// ```rust
77    /// use split_preserve::{SplitPreserveWS, Token};
78    ///
79    /// assert_eq!(
80    ///     SplitPreserveWS::new("Line\twith\nweird whitespace")
81    ///         .map_whitespace(|_| String::from(" "))
82    ///         .collect::<String>(),
83    ///     "Line with weird whitespace"
84    /// )
85    /// ```
86    pub fn map_whitespace<S>(
87        self,
88        mut f: S,
89    ) -> std::iter::Map<Self, impl FnMut(Token<'a>) -> String>
90    where
91        S: FnMut(&str) -> String,
92    {
93        self.map(move |t: Token<'a>| match t {
94            Token::Other(s) => s.to_string(),
95            Token::Whitespace(s) => f(s),
96        })
97    }
98}
99
100impl<'a> Iterator for SplitPreserveWS<'a> {
101    type Item = Token<'a>;
102
103    fn next(&mut self) -> Option<Self::Item> {
104        self.string.take().map(|t| match t {
105            Token::Whitespace(s) => {
106                let (token, rest) = match s.find(|c: char| !c.is_whitespace()) {
107                    Some(i) => {
108                        let (a, b) = s.split_at(i);
109                        (a, Some(Token::Other(b)))
110                    }
111                    None => (s, None),
112                };
113                self.string = rest;
114                Token::Whitespace(token)
115            }
116            Token::Other(s) => {
117                let (token, rest) = match s.find(char::is_whitespace) {
118                    Some(i) => {
119                        let (a, b) = s.split_at(i);
120                        (a, Some(Token::Whitespace(b)))
121                    }
122                    None => (s, None),
123                };
124                self.string = rest;
125                Token::Other(token)
126            }
127        })
128    }
129}