anagrambot/anagram.rs
1//! Utilities for finding and verifying anagrams
2//!
3//! For the purposes of this module, an anagram is defined as such:
4//! two strings are "anagrams" of each other if they both contain the same characters,
5//! but in a different order.
6//!
7//! For example:
8//! - "race" and "care" are anagrams, because they contain the same characters and are not identical
9//! - "race" and "race" are not anagrams, because words cannot be anagrams of themselves
10//! - "asdf" and "safd" are anagrams, because they contain the same characters and are not identical
11//!
12//! This last point introduces the need for a more strict form of anagram, which this
13//! module calls a "proper anagram": proper anagrams must be actual words. That is,
14//! anagrams can only be considered proper anagrams if they appear in a list of defined words.
15//! Each proper anagram related function requires a [Wordlist] to check if a given string is a word.
16//!
17//! For example:
18//! - "race" and "care" are proper anagrams because they are anagrams and both words
19//! - "race" and "reca" are not proper anagrams because "reca" is not a word
20
21use crate::wordlist::Wordlist;
22
23use std::collections::BTreeMap;
24
25/// Type representing the set of characters a word contains
26///
27/// Each key is a character that appears in the word. Each value
28/// is the number of times that character appears
29type Charmap = BTreeMap<char, u32>;
30
31pub mod loose_anagram;
32pub use loose_anagram::{are_loose_anagrams, are_loose_anagrams_strict, find_loose_anagrams};
33
34/// Returns a [Charmap] with the number of times each character appears in `word`
35///
36/// The resulting [Charmap] has a key for each character in `word`, with the value
37/// being the number of times that character appears in `word`
38///
39/// If `ignore_spaces` is true, space characters `' '` will be entirely skipped over
40///
41/// If `case_sensitive` is true, characters of different case will be treated as different.
42/// If `case_sensitive` is false, characters of different case will be treated as the same.
43fn get_charcount_map(word: &str, ignore_spaces: bool, case_sensitive: bool) -> Charmap {
44 let mut lettercount_map = Charmap::new();
45
46 let mut insert_closure = |letter| match lettercount_map.get_mut(&letter) {
47 None => {
48 lettercount_map.insert(letter, 1);
49 }
50 Some(count) => *count += 1
51 };
52
53 for letter in word.chars() {
54 if ignore_spaces && letter == ' ' {
55 continue;
56 } else if case_sensitive {
57 insert_closure(letter);
58 } else {
59 for lower_letter in letter.to_lowercase() {
60 insert_closure(lower_letter);
61 }
62 }
63 }
64
65 lettercount_map
66}
67
68/// Caching object for word charmaps, do not use directly
69struct WordWithCharmap<'a> {
70 word: &'a str,
71 word_charmap: Option<Charmap>,
72 case_sensitive: bool
73}
74
75impl<'a> WordWithCharmap<'a> {
76 pub fn new(word: &'a str, case_sensitive: bool) -> Self {
77 Self {
78 word,
79 word_charmap: None,
80 case_sensitive
81 }
82 }
83 pub fn get_word(&self) -> &'a str {
84 self.word
85 }
86 pub fn get_charmap(&mut self) -> &Charmap {
87 if self.word_charmap == None {
88 self.word_charmap = Some(get_charcount_map(self.word, false, self.case_sensitive));
89 }
90
91 self.word_charmap.as_ref().unwrap()
92 }
93}
94
95/// Returns true if two words are anagrams
96///
97/// `word_a` and `word_b` are the two words to check
98///
99/// If `case_sensitive` is `true`, uppercase and lowercase forms of the
100/// same letter will be considered different. If `case_sensitive` is `false`,
101/// uppercase and lowercase forms of th same letter will be considered the same.
102///
103/// This tests for standard anagrams, not proper anagrams. This means
104/// that non-word character sequences that nonetheless contain the same
105/// characters in a different order will result in `true`
106///
107/// Note that two identical words are not considered anagrams
108///
109///# Examples
110/// ```
111/// use anagrambot::anagram::are_anagrams;
112///
113/// const CASE_SENSITIVE: bool = true;
114///
115/// //proper anagram
116/// assert!(are_anagrams("race", "care", CASE_SENSITIVE));
117/// //non-proper anagram
118/// assert!(are_anagrams("aabc", "caab", CASE_SENSITIVE));
119///
120/// //non-anagram due to different letters
121/// assert!(!are_anagrams("race", "cow", CASE_SENSITIVE));
122/// //non-anagram due to being identical
123/// assert!(!are_anagrams("race", "race", CASE_SENSITIVE));
124/// ```
125pub fn are_anagrams(word_a: &str, word_b: &str, case_sensitive: bool) -> bool {
126 let mut word_a = WordWithCharmap::new(word_a, case_sensitive);
127 let mut word_b = WordWithCharmap::new(word_b, case_sensitive);
128
129 are_anagrams_internal(&mut word_a, &mut word_b)
130}
131
132/// internal body of [are_anagrams]; do not use directly
133///
134/// takes in WordWithCharmap structs instead of words
135///
136///# Panics
137///
138/// This function panics if the `case_sensitive` members of both words don't match
139fn are_anagrams_internal(word_a: &mut WordWithCharmap, word_b: &mut WordWithCharmap) -> bool {
140 assert_eq!(word_a.case_sensitive, word_b.case_sensitive);
141
142 let word_a_internal = word_a.get_word();
143 let word_b_internal = word_b.get_word();
144
145 //words can't be anagrams if their lengths are different
146 //it's ok to use byte length here when case sensitivity is enabled
147 //and we can skip checking word_b case sensitivity because we already asserted they were equal
148 if word_a.case_sensitive && word_a_internal.len() != word_b_internal.len()
149 //two identical words are not anagrams
150 || word_a_internal == word_b_internal
151 {
152 return false;
153 }
154
155 //note that we don't do the length check if case sensitivity is disabled
156 //this is because we would need a case-agnostic char count, meaning
157 //we would have to create the charmap anyway just to check the length,
158 //defeating the purpose of the length check
159
160 //words are anagrams if both previous conditions weren't true
161 //and the counts of each of their letters are identical
162 word_a.get_charmap() == word_b.get_charmap()
163}
164
165/// Similar to [are_anagrams] but checks that both words are real words
166///
167/// This function will return false if either `word_a`, `word_b`, or both
168/// are not found in the specified `wordlist`.
169///
170/// `wordlist` must implement the [Wordlist] trait (for example, the
171/// [default wordlist](crate::default_wordlist::default_wordlist) if present)
172///# Examples
173/// ```
174/// use anagrambot::anagram::are_proper_anagrams;
175/// use anagrambot::wordlist::BorrowedWordList;
176///
177/// const CASE_SENSITIVE: bool = true;
178///
179/// // you can use anagrambot::default_wordlist::default_wordlist()
180/// // to get the default Wordlist instead of generating your own,
181/// // as long as the `no-default-wordlist` feature is not enabled
182/// const TEST_WORD_SET: [&str; 3] = ["race", "care", "cow"];
183/// let wordlist: BorrowedWordList = TEST_WORD_SET.into_iter().collect();
184///
185/// //proper anagram
186/// assert!(are_proper_anagrams("race", "care", &wordlist, CASE_SENSITIVE));
187///
188/// //non-proper anagram
189/// assert!(!are_proper_anagrams("aabc", "caab", &wordlist, CASE_SENSITIVE));
190/// //non-anagram due to different letters
191/// assert!(!are_proper_anagrams("race", "cow", &wordlist, CASE_SENSITIVE));
192/// //non-anagram due to being identical
193/// assert!(!are_proper_anagrams("race", "race", &wordlist, CASE_SENSITIVE));
194/// ```
195pub fn are_proper_anagrams<'a>(
196 word_a: &str,
197 word_b: &str,
198 wordlist: &impl Wordlist<'a>,
199 case_sensitive: bool
200) -> bool {
201 //return false if either word is not found in wordlist
202 if !wordlist.includes_word(word_a) || !wordlist.includes_word(word_b) {
203 return false;
204 }
205
206 //now that we ensured both words are real words, use the standard are_anagrams function
207 are_anagrams(word_a, word_b, case_sensitive)
208}
209
210/// An iterator over all standard anagrams of a word
211///
212/// The return value of [find_anagrams]
213pub struct AnagramsIter {
214 chars: Vec<char>,
215 stack_state: Vec<usize>,
216 i: usize
217}
218
219impl AnagramsIter {
220 pub fn new(word: &str) -> Self {
221 let chars: Vec<char> = word.chars().collect();
222 let stack_state = vec![0; chars.len()];
223
224 Self {
225 chars,
226 stack_state,
227 i: 1
228 }
229 }
230}
231
232impl Iterator for AnagramsIter {
233 type Item = String;
234
235 // heaps algorithm graciously taken from wikipedia
236 // and modified to function as a rust iterator
237 // https://en.wikipedia.org/wiki/Heap's_algorithm
238 fn next(&mut self) -> Option<Self::Item> {
239 let seq_len = self.chars.len();
240 if seq_len <= 1 {
241 return None;
242 }
243 while self.i < seq_len {
244 let k = self.stack_state.get_mut(self.i).unwrap();
245 if *k < self.i {
246 if (self.i & 1) == 0 {
247 self.chars.swap(0, self.i);
248 } else {
249 self.chars.swap(*k, self.i);
250 }
251 // Swap has occurred ending the for-loop. Simulate the increment of the for-loop counter
252 *k += 1;
253 // Simulate recursive call reaching the base case by bringing the pointer to the base case analog in the array
254 self.i = 1;
255
256 return Some(self.chars.iter().collect());
257 } else {
258 // Calling generate(i+1, A) has ended as the for-loop terminated. Reset the state and simulate popping the stack by incrementing the pointer.
259 *k = 0;
260 self.i += 1;
261 }
262 }
263 None
264 }
265}
266
267/// Returns an [AnagramsIter] over all the standard anagrams of a word
268///
269/// Effectively returns an iterator over all permutations of word's characters,
270/// except the original permutation (which is skipped because a word cannot be an anagram
271/// of itself)
272///
273///# Notes
274///
275/// For a word of length `n`, there are `n! - 1` standard anagrams (`n!` meaning `factorial(n)`).
276/// Factorials get up to extremely high output values for relatively low input values.
277/// Be mindful of this if you plan to fill a vector with standard anagrams:
278/// storing ***all*** standard anagrams of a word may require multiple gigabytes of memory.
279pub fn find_anagrams(word: &str) -> impl Iterator<Item = String> {
280 AnagramsIter::new(word)
281}
282
283/// An iterator over all the proper anagrams of a word
284///
285/// The return value of [find_proper_anagrams]
286pub struct ProperAnagramsIter<'a, 'b, T>
287where
288 T: Iterator<Item = &'a str>
289{
290 word: WordWithCharmap<'b>,
291 wordlist_iter: T,
292 case_sensitive: bool
293}
294
295impl<'a, 'b, T> Iterator for ProperAnagramsIter<'a, 'b, T>
296where
297 T: Iterator<Item = &'a str>
298{
299 type Item = &'a str;
300 fn next(&mut self) -> Option<Self::Item> {
301 for next_word in self.wordlist_iter.by_ref() {
302 let mut next_word_with_charmap = WordWithCharmap::new(next_word, self.case_sensitive);
303 if are_anagrams_internal(&mut self.word, &mut next_word_with_charmap) {
304 return Some(next_word);
305 }
306 }
307 None
308 }
309}
310
311/// Returns a [ProperAnagramsIter] over all proper anagrams of `word`
312///
313/// Note that this method does not check if `word` is present in `wordlist`;
314/// this is the responsibility of the caller (if desired)
315///
316///# Examples
317/// ```
318/// use anagrambot::anagram::find_proper_anagrams;
319/// use anagrambot::wordlist::BorrowedWordList;
320///
321/// const CASE_SENSITIVE: bool = true;
322///
323/// // you can use anagrambot::default_wordlist::default_wordlist()
324/// // to get the default Wordlist instead of generating your own,
325/// // as long as the `no-default-wordlist` feature is not enabled
326/// const TEST_WORD_SET: [&str; 5] = ["aster", "taser", "tears", "race", "cow"];
327/// let wordlist: BorrowedWordList = TEST_WORD_SET.into_iter().collect();
328///
329/// let mut proper_anagrams = find_proper_anagrams("tears", &wordlist, CASE_SENSITIVE);
330///
331/// assert_eq!(proper_anagrams.next(), Some("aster"));
332/// assert_eq!(proper_anagrams.next(), Some("taser"));
333/// assert_eq!(proper_anagrams.next(), None);
334///
335/// // note that the original word "tears" is not included because
336/// // two identical words are not considered anagrams
337/// ```
338pub fn find_proper_anagrams<'a, 'b, T>(
339 word: &'b str,
340 wordlist: &'a T,
341 case_sensitive: bool
342) -> ProperAnagramsIter<'a, 'b, impl Iterator<Item = &'a str>>
343where
344 T: Wordlist<'a>
345{
346 let word_with_charmap = WordWithCharmap::new(word, case_sensitive);
347 ProperAnagramsIter {
348 word: word_with_charmap,
349 wordlist_iter: wordlist.iter(),
350 case_sensitive
351 }
352}