anagrambot/
anagram.rs

1//! Utilities for finding and verifying anagrams
2//!
3//! For the purposes of this module, an anagram is defined as such:
4//! two strings are "anagrams" of each other if they both contain the same characters,
5//! but in a different order.
6//!
7//! For example:
8//! - "race" and "care" are anagrams, because they contain the same characters and are not identical
9//! - "race" and "race" are not anagrams, because words cannot be anagrams of themselves
10//! - "asdf" and "safd" are anagrams, because they contain the same characters and are not identical
11//!
12//! This last point introduces the need for a more strict form of anagram, which this
13//! module calls a "proper anagram": proper anagrams must be actual words. That is,
14//! anagrams can only be considered proper anagrams if they appear in a list of defined words.
15//! Each proper anagram related function requires a [Wordlist] to check if a given string is a word.
16//!
17//! For example:
18//! - "race" and "care" are proper anagrams because they are anagrams and both words
19//! - "race" and "reca" are not proper anagrams because "reca" is not a word
20
21use crate::wordlist::Wordlist;
22
23use std::collections::BTreeMap;
24
25/// Type representing the set of characters a word contains
26///
27/// Each key is a character that appears in the word. Each value
28/// is the number of times that character appears
29type Charmap = BTreeMap<char, u32>;
30
31pub mod loose_anagram;
32pub use loose_anagram::{are_loose_anagrams, are_loose_anagrams_strict, find_loose_anagrams};
33
34/// Returns a [Charmap] with the number of times each character appears in `word`
35///
36/// The resulting [Charmap] has a key for each character in `word`, with the value
37/// being the number of times that character appears in `word`
38///
39/// If `ignore_spaces` is true, space characters `' '` will be entirely skipped over
40///
41/// If `case_sensitive` is true, characters of different case will be treated as different.
42/// If `case_sensitive` is false, characters of different case will be treated as the same.
43fn get_charcount_map(word: &str, ignore_spaces: bool, case_sensitive: bool) -> Charmap {
44    let mut lettercount_map = Charmap::new();
45
46    let mut insert_closure = |letter| match lettercount_map.get_mut(&letter) {
47        None => {
48            lettercount_map.insert(letter, 1);
49        }
50        Some(count) => *count += 1
51    };
52
53    for letter in word.chars() {
54        if ignore_spaces && letter == ' ' {
55            continue;
56        } else if case_sensitive {
57            insert_closure(letter);
58        } else {
59            for lower_letter in letter.to_lowercase() {
60                insert_closure(lower_letter);
61            }
62        }
63    }
64
65    lettercount_map
66}
67
68/// Caching object for word charmaps, do not use directly
69struct WordWithCharmap<'a> {
70    word: &'a str,
71    word_charmap: Option<Charmap>,
72    case_sensitive: bool
73}
74
75impl<'a> WordWithCharmap<'a> {
76    pub fn new(word: &'a str, case_sensitive: bool) -> Self {
77        Self {
78            word,
79            word_charmap: None,
80            case_sensitive
81        }
82    }
83    pub fn get_word(&self) -> &'a str {
84        self.word
85    }
86    pub fn get_charmap(&mut self) -> &Charmap {
87        if self.word_charmap == None {
88            self.word_charmap = Some(get_charcount_map(self.word, false, self.case_sensitive));
89        }
90
91        self.word_charmap.as_ref().unwrap()
92    }
93}
94
95/// Returns true if two words are anagrams
96///
97/// `word_a` and `word_b` are the two words to check
98///
99/// If `case_sensitive` is `true`, uppercase and lowercase forms of the
100/// same letter will be considered different. If `case_sensitive` is `false`,
101/// uppercase and lowercase forms of th same letter will be considered the same.
102///
103/// This tests for standard anagrams, not proper anagrams. This means
104/// that non-word character sequences that nonetheless contain the same
105/// characters in a different order will result in `true`
106///
107/// Note that two identical words are not considered anagrams
108///
109///# Examples
110/// ```
111/// use anagrambot::anagram::are_anagrams;
112///
113/// const CASE_SENSITIVE: bool = true;
114///
115/// //proper anagram
116/// assert!(are_anagrams("race", "care", CASE_SENSITIVE));
117/// //non-proper anagram
118/// assert!(are_anagrams("aabc", "caab", CASE_SENSITIVE));
119///
120/// //non-anagram due to different letters
121/// assert!(!are_anagrams("race", "cow", CASE_SENSITIVE));
122/// //non-anagram due to being identical
123/// assert!(!are_anagrams("race", "race", CASE_SENSITIVE));
124/// ```
125pub fn are_anagrams(word_a: &str, word_b: &str, case_sensitive: bool) -> bool {
126    let mut word_a = WordWithCharmap::new(word_a, case_sensitive);
127    let mut word_b = WordWithCharmap::new(word_b, case_sensitive);
128
129    are_anagrams_internal(&mut word_a, &mut word_b)
130}
131
132/// internal body of [are_anagrams]; do not use directly
133///
134/// takes in WordWithCharmap structs instead of words
135///
136///# Panics
137///
138/// This function panics if the `case_sensitive` members of both words don't match
139fn are_anagrams_internal(word_a: &mut WordWithCharmap, word_b: &mut WordWithCharmap) -> bool {
140    assert_eq!(word_a.case_sensitive, word_b.case_sensitive);
141
142    let word_a_internal = word_a.get_word();
143    let word_b_internal = word_b.get_word();
144
145    //words can't be anagrams if their lengths are different
146    //it's ok to use byte length here when case sensitivity is enabled
147    //and we can skip checking word_b case sensitivity because we already asserted they were equal
148    if word_a.case_sensitive && word_a_internal.len() != word_b_internal.len()
149    //two identical words are not anagrams
150    || word_a_internal == word_b_internal
151    {
152        return false;
153    }
154
155    //note that we don't do the length check if case sensitivity is disabled
156    //this is because we would need a case-agnostic char count, meaning
157    //we would have to create the charmap anyway just to check the length,
158    //defeating the purpose of the length check
159
160    //words are anagrams if both previous conditions weren't true
161    //and the counts of each of their letters are identical
162    word_a.get_charmap() == word_b.get_charmap()
163}
164
165/// Similar to [are_anagrams] but checks that both words are real words
166///
167/// This function will return false if either `word_a`, `word_b`, or both
168/// are not found in the specified `wordlist`.
169///
170/// `wordlist` must implement the [Wordlist] trait (for example, the
171/// [default wordlist](crate::default_wordlist::default_wordlist) if present)
172///# Examples
173/// ```
174/// use anagrambot::anagram::are_proper_anagrams;
175/// use anagrambot::wordlist::BorrowedWordList;
176///
177/// const CASE_SENSITIVE: bool = true;
178///
179/// // you can use anagrambot::default_wordlist::default_wordlist()
180/// // to get the default Wordlist instead of generating your own,
181/// // as long as the `no-default-wordlist` feature is not enabled
182/// const TEST_WORD_SET: [&str; 3] = ["race", "care", "cow"];
183/// let wordlist: BorrowedWordList = TEST_WORD_SET.into_iter().collect();
184///
185/// //proper anagram
186/// assert!(are_proper_anagrams("race", "care", &wordlist, CASE_SENSITIVE));
187///
188/// //non-proper anagram
189/// assert!(!are_proper_anagrams("aabc", "caab", &wordlist, CASE_SENSITIVE));
190/// //non-anagram due to different letters
191/// assert!(!are_proper_anagrams("race", "cow", &wordlist, CASE_SENSITIVE));
192/// //non-anagram due to being identical
193/// assert!(!are_proper_anagrams("race", "race", &wordlist, CASE_SENSITIVE));
194/// ```
195pub fn are_proper_anagrams<'a>(
196    word_a: &str,
197    word_b: &str,
198    wordlist: &impl Wordlist<'a>,
199    case_sensitive: bool
200) -> bool {
201    //return false if either word is not found in wordlist
202    if !wordlist.includes_word(word_a) || !wordlist.includes_word(word_b) {
203        return false;
204    }
205
206    //now that we ensured both words are real words, use the standard are_anagrams function
207    are_anagrams(word_a, word_b, case_sensitive)
208}
209
210/// An iterator over all standard anagrams of a word
211///
212/// The return value of [find_anagrams]
213pub struct AnagramsIter {
214    chars: Vec<char>,
215    stack_state: Vec<usize>,
216    i: usize
217}
218
219impl AnagramsIter {
220    pub fn new(word: &str) -> Self {
221        let chars: Vec<char> = word.chars().collect();
222        let stack_state = vec![0; chars.len()];
223
224        Self {
225            chars,
226            stack_state,
227            i: 1
228        }
229    }
230}
231
232impl Iterator for AnagramsIter {
233    type Item = String;
234
235    // heaps algorithm graciously taken from wikipedia
236    // and modified to function as a rust iterator
237    // https://en.wikipedia.org/wiki/Heap's_algorithm
238    fn next(&mut self) -> Option<Self::Item> {
239        let seq_len = self.chars.len();
240        if seq_len <= 1 {
241            return None;
242        }
243        while self.i < seq_len {
244            let k = self.stack_state.get_mut(self.i).unwrap();
245            if *k < self.i {
246                if (self.i & 1) == 0 {
247                    self.chars.swap(0, self.i);
248                } else {
249                    self.chars.swap(*k, self.i);
250                }
251                // Swap has occurred ending the for-loop. Simulate the increment of the for-loop counter
252                *k += 1;
253                // Simulate recursive call reaching the base case by bringing the pointer to the base case analog in the array
254                self.i = 1;
255
256                return Some(self.chars.iter().collect());
257            } else {
258                // Calling generate(i+1, A) has ended as the for-loop terminated. Reset the state and simulate popping the stack by incrementing the pointer.
259                *k = 0;
260                self.i += 1;
261            }
262        }
263        None
264    }
265}
266
267/// Returns an [AnagramsIter] over all the standard anagrams of a word
268///
269/// Effectively returns an iterator over all permutations of word's characters,
270/// except the original permutation (which is skipped because a word cannot be an anagram
271/// of itself)
272///
273///# Notes
274///
275/// For a word of length `n`, there are `n! - 1` standard anagrams (`n!` meaning `factorial(n)`).
276/// Factorials get up to extremely high output values for relatively low input values.
277/// Be mindful of this if you plan to fill a vector with standard anagrams:
278/// storing ***all*** standard anagrams of a word may require multiple gigabytes of memory.
279pub fn find_anagrams(word: &str) -> impl Iterator<Item = String> {
280    AnagramsIter::new(word)
281}
282
283/// An iterator over all the proper anagrams of a word
284///
285/// The return value of [find_proper_anagrams]
286pub struct ProperAnagramsIter<'a, 'b, T>
287where
288    T: Iterator<Item = &'a str>
289{
290    word: WordWithCharmap<'b>,
291    wordlist_iter: T,
292    case_sensitive: bool
293}
294
295impl<'a, 'b, T> Iterator for ProperAnagramsIter<'a, 'b, T>
296where
297    T: Iterator<Item = &'a str>
298{
299    type Item = &'a str;
300    fn next(&mut self) -> Option<Self::Item> {
301        for next_word in self.wordlist_iter.by_ref() {
302            let mut next_word_with_charmap = WordWithCharmap::new(next_word, self.case_sensitive);
303            if are_anagrams_internal(&mut self.word, &mut next_word_with_charmap) {
304                return Some(next_word);
305            }
306        }
307        None
308    }
309}
310
311/// Returns a [ProperAnagramsIter] over all proper anagrams of `word`
312///
313/// Note that this method does not check if `word` is present in `wordlist`;
314/// this is the responsibility of the caller (if desired)
315///
316///# Examples
317/// ```
318/// use anagrambot::anagram::find_proper_anagrams;
319/// use anagrambot::wordlist::BorrowedWordList;
320///
321/// const CASE_SENSITIVE: bool = true;
322///
323/// // you can use anagrambot::default_wordlist::default_wordlist()
324/// // to get the default Wordlist instead of generating your own,
325/// // as long as the `no-default-wordlist` feature is not enabled
326/// const TEST_WORD_SET: [&str; 5] = ["aster", "taser", "tears", "race", "cow"];
327/// let wordlist: BorrowedWordList = TEST_WORD_SET.into_iter().collect();
328///
329/// let mut proper_anagrams = find_proper_anagrams("tears", &wordlist, CASE_SENSITIVE);
330///
331/// assert_eq!(proper_anagrams.next(), Some("aster"));
332/// assert_eq!(proper_anagrams.next(), Some("taser"));
333/// assert_eq!(proper_anagrams.next(), None);
334///
335/// // note that the original word "tears" is not included because
336/// // two identical words are not considered anagrams
337/// ```
338pub fn find_proper_anagrams<'a, 'b, T>(
339    word: &'b str,
340    wordlist: &'a T,
341    case_sensitive: bool
342) -> ProperAnagramsIter<'a, 'b, impl Iterator<Item = &'a str>>
343where
344    T: Wordlist<'a>
345{
346    let word_with_charmap = WordWithCharmap::new(word, case_sensitive);
347    ProperAnagramsIter {
348        word: word_with_charmap,
349        wordlist_iter: wordlist.iter(),
350        case_sensitive
351    }
352}