1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#[cfg(test)]
mod tests {
    use super::*;
    fn tst(input: &str, expected: &str) {
        assert_eq!(input.words().next().unwrap(), expected);
    }
    #[test]
    fn next_word() {
        tst("hello world", "hello");
    }

    #[test]
    fn cut_whitespace() {
        tst("  \nhello world", "hello");
    }

    #[test]
    fn remove_redundant_characters() {
        tst(".hello world", "hello");
    }

    #[test]
    fn sentence() {
        let input = "Hello world, my name is Nils Martel. I love coding in rust";
        let expected = vec![
            "Hello", "world", "my", "name", "is", "Nils", "Martel", "I", "love", "coding", "in",
            "rust",
        ];

        assert_eq!(input.words().collect::<Vec<_>>(), expected);
    }

    #[test]
    fn non_ascii() {
        let input = "Hellö wörld, my name is Nils Martel. I löve cöding in rust";
        let expected = vec![
            "Hellö", "wörld", "my", "name", "is", "Nils", "Martel", "I", "löve", "cöding", "in",
            "rust",
        ];

        assert_eq!(input.words().collect::<Vec<_>>(), expected);
    }
}

fn next(i: &str) -> Option<(&str, &str)> {
    // start by cutting of all characters, that are not alphabetic
    let mut start = 0;
    for c in i.chars() {
        if c.is_alphabetic() {
            break;
        }
        start += c.len_utf8();
    }

    // now i is guaranteed to start with some alphabetic character
    let i = &i[start..];

    let mut end = 0;
    for c in i.chars() {
        if !c.is_alphabetic() {
            break;
        }

        end += c.len_utf8();
    }
    if end == 0 {
        return None;
    }

    Some((&i[..end], &i[end..]))
}

pub trait Words<'a> {
    fn words(self) -> WordIter<'a>;
}

impl<'a> Words<'a> for &'a str {
    fn words(self) -> WordIter<'a> {
        WordIter { rest: self }
    }
}

pub struct WordIter<'a> {
    rest: &'a str,
}

impl<'a> Iterator for WordIter<'a> {
    type Item = &'a str;

    fn next(&mut self) -> Option<Self::Item> {
        let (word, rest) = next(self.rest)?;
        self.rest = rest;
        Some(word)
    }
}