1#![forbid(unsafe_code)]
2#![doc = include_str!("../README.md")]
3
4use std::collections::HashSet;
5
6#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
8pub struct Word {
9 text: String,
10}
11
12impl Word {
13 pub fn new(input: &str) -> Option<Self> {
15 let text = normalize_word(input);
16 (!text.is_empty()).then_some(Self { text })
17 }
18
19 pub fn as_str(&self) -> &str {
21 &self.text
22 }
23
24 pub fn into_string(self) -> String {
26 self.text
27 }
28}
29
30#[derive(Clone, Copy, Debug, Eq, PartialEq)]
32pub struct WordStats {
33 pub total: usize,
35 pub unique: usize,
37}
38
39impl WordStats {
40 pub fn from_text(input: &str) -> Self {
42 let all_words = words(input);
43 let total = all_words.len();
44 let unique = unique_words(input).len();
45 Self { total, unique }
46 }
47}
48
49pub fn word_count(input: &str) -> usize {
51 words(input).len()
52}
53
54pub fn unique_words(input: &str) -> Vec<Word> {
56 let mut seen = HashSet::new();
57 let mut unique = Vec::new();
58
59 for word in words(input) {
60 if seen.insert(word.text.clone()) {
61 unique.push(word);
62 }
63 }
64
65 unique
66}
67
68pub fn normalize_word(input: &str) -> String {
70 let characters: Vec<char> = input.trim().chars().collect();
71 let mut output = String::new();
72
73 for (index, character) in characters.iter().copied().enumerate() {
74 let previous = index
75 .checked_sub(1)
76 .and_then(|value| characters.get(value))
77 .copied();
78 let next = characters.get(index + 1).copied();
79
80 if character.is_alphanumeric() || is_apostrophe(previous, character, next) {
81 output.extend(character.to_lowercase());
82 }
83 }
84
85 output
86}
87
88pub fn contains_word(input: &str, target: &str) -> bool {
90 let normalized_target = normalize_word(target);
91 !normalized_target.is_empty()
92 && words(input)
93 .iter()
94 .any(|word| word.as_str() == normalized_target)
95}
96
97pub fn starts_with_word(input: &str, target: &str) -> bool {
99 let normalized_target = normalize_word(target);
100 !normalized_target.is_empty()
101 && words(input)
102 .first()
103 .is_some_and(|word| word.as_str() == normalized_target)
104}
105
106pub fn ends_with_word(input: &str, target: &str) -> bool {
108 let normalized_target = normalize_word(target);
109 !normalized_target.is_empty()
110 && words(input)
111 .last()
112 .is_some_and(|word| word.as_str() == normalized_target)
113}
114
115pub fn words(input: &str) -> Vec<Word> {
117 word_ranges(input)
118 .into_iter()
119 .filter_map(|(start, end)| Word::new(&input[start..end]))
120 .collect()
121}
122
123fn word_ranges(input: &str) -> Vec<(usize, usize)> {
124 let characters: Vec<(usize, char)> = input.char_indices().collect();
125 let mut ranges = Vec::new();
126 let mut start = None;
127
128 for (index, (byte_index, character)) in characters.iter().copied().enumerate() {
129 let previous = index.checked_sub(1).map(|value| characters[value].1);
130 let next = characters.get(index + 1).map(|(_, value)| *value);
131 let is_word_character =
132 character.is_alphanumeric() || is_apostrophe(previous, character, next);
133
134 if is_word_character {
135 if start.is_none() {
136 start = Some(byte_index);
137 }
138 } else if let Some(word_start) = start.take() {
139 ranges.push((word_start, byte_index));
140 }
141 }
142
143 if let Some(word_start) = start {
144 ranges.push((word_start, input.len()));
145 }
146
147 ranges
148}
149
150fn is_apostrophe(previous: Option<char>, current: char, next: Option<char>) -> bool {
151 matches!(current, '\'' | '’')
152 && previous.is_some_and(char::is_alphanumeric)
153 && next.is_some_and(char::is_alphanumeric)
154}
155
156#[cfg(test)]
157mod tests {
158 use super::{
159 Word, WordStats, contains_word, ends_with_word, normalize_word, starts_with_word,
160 unique_words, word_count, words,
161 };
162
163 #[test]
164 fn handles_empty_and_whitespace_only_input() {
165 assert_eq!(word_count(""), 0);
166 assert!(words(" \n").is_empty());
167 assert_eq!(normalize_word(" \n"), "");
168 }
169
170 #[test]
171 fn normalizes_ascii_words_and_punctuation() {
172 assert_eq!(normalize_word("Hello!"), "hello");
173 assert_eq!(normalize_word("don't"), "don't");
174 assert_eq!(word_count("Hello, hello world"), 3);
175 }
176
177 #[test]
178 fn preserves_first_seen_unique_words() {
179 let unique = unique_words("Hello, hello world world");
180 let texts: Vec<&str> = unique.iter().map(Word::as_str).collect();
181 assert_eq!(texts, vec!["hello", "world"]);
182 }
183
184 #[test]
185 fn checks_word_boundaries() {
186 assert!(contains_word("Hello, world", "world"));
187 assert!(!contains_word("cartwheel", "art"));
188 assert!(starts_with_word("Hello world", "hello"));
189 assert!(ends_with_word("Hello world!", "world"));
190 }
191
192 #[test]
193 fn handles_multiline_and_unicode_input() {
194 let extracted = words("Straße\ncafé");
195 let texts: Vec<&str> = extracted.iter().map(Word::as_str).collect();
196 assert_eq!(texts, vec!["straße", "café"]);
197
198 let stats = WordStats::from_text("Straße\ncafé café");
199 assert_eq!(stats.total, 3);
200 assert_eq!(stats.unique, 2);
201 }
202}