1#![warn(clippy::all, missing_docs, nonstandard_style, future_incompatible)]
26
27pub fn extract_words(text: &str) -> impl Iterator<Item = &str> {
29 Entries::new(text).filter_map(|e| match e {
30 Entry::Word(s) => Some(s),
31 Entry::Other(_) => None,
32 })
33}
34
35pub struct Entries<'a> {
37 text: &'a str,
38 char_indices: std::str::CharIndices<'a>,
39 cur_entry: CurEntry,
40}
41
42#[derive(Debug, PartialEq)]
44pub enum Entry<'a> {
45 Other(&'a str),
47 Word(&'a str),
49}
50
51enum CurEntry {
52 None,
53 Other(usize),
54 Word(usize),
55}
56
57impl<'a> Entries<'a> {
58 pub fn new(text: &'a str) -> Self {
60 Entries {
61 text,
62 char_indices: text.char_indices(),
63 cur_entry: CurEntry::None,
64 }
65 }
66}
67
68impl<'a> Iterator for Entries<'a> {
69 type Item = Entry<'a>;
70
71 fn next(&mut self) -> Option<Self::Item> {
72 for (i, c) in self.char_indices.by_ref() {
73 if c.is_alphanumeric() {
74 match self.cur_entry {
75 CurEntry::None => self.cur_entry = CurEntry::Word(i),
76 CurEntry::Other(start) => {
77 self.cur_entry = CurEntry::Word(i);
78 return Some(Entry::Other(&self.text[start..i]));
79 }
80 CurEntry::Word(_) => (),
81 }
82 } else {
83 match self.cur_entry {
84 CurEntry::None => self.cur_entry = CurEntry::Other(i),
85 CurEntry::Other(_) => (),
86 CurEntry::Word(start) => {
87 self.cur_entry = CurEntry::Other(i);
88 return Some(Entry::Word(&self.text[start..i]));
89 }
90 }
91 }
92 }
93
94 match self.cur_entry {
95 CurEntry::None => None,
96 CurEntry::Other(start) => {
97 self.cur_entry = CurEntry::None;
98 if start < self.text.len() {
99 Some(Entry::Other(&self.text[start..]))
100 } else {
101 None
102 }
103 }
104 CurEntry::Word(start) => {
105 self.cur_entry = CurEntry::None;
106 if start < self.text.len() {
107 Some(Entry::Word(&self.text[start..]))
108 } else {
109 None
110 }
111 }
112 }
113 }
114}
115
116impl<'a> AsRef<str> for Entry<'a> {
117 fn as_ref(&self) -> &str {
118 match self {
119 Entry::Other(s) => s,
120 Entry::Word(s) => s,
121 }
122 }
123}
124
125#[cfg(test)]
126mod tests {
127 use super::extract_words;
128
129 fn extract_vec(text: &str) -> Vec<&str> {
130 extract_words(text).collect()
131 }
132
133 #[test]
134 fn test_empty_string() {
135 assert!(extract_vec("").is_empty());
136 }
137
138 #[test]
139 fn test_punctuation_only() {
140 assert!(extract_vec(".,!?-").is_empty());
141 }
142
143 #[test]
144 fn test_mixed_input() {
145 assert_eq!(
146 extract_vec("Hola,mundo! ¿Cómo estás?"),
147 ["Hola", "mundo", "Cómo", "estás"]
148 );
149 }
150
151 #[test]
152 fn test_multiple_delimiters() {
153 assert_eq!(extract_vec("Hola, mundo!¿ .. !¿á"), ["Hola", "mundo", "á"]);
154 }
155}