1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
//! `allwords` is a crate that allows you to generate words over a given alphabet.
//!
//! Word generation can be useful in several scenarios:
//!
//!  - Pseudo-random data generation (e.g. testing / mocking)
//!  - Brute forcing of keys / passwords
//!  - Id or Serial number generation
//!
//! The basic idea for using this library is that you create an [`Alphabet`] with a set of
//! characters and then you can use it to generate a [`WordsIterator`]. You can use the iterator
//! to generate all the possible words over the alphabet.
//!
//! For instance if you want to generate all the possible words containing `"a"`, `"b"`, `"c"` with
//! a maximum length of 3 chars:
//!
//! ```rust
//! use allwords::{Alphabet};
//!
//! let a = Alphabet::from_chars_in_str("abc").unwrap();
//!
//! let words: Vec<String> = a.all_words(Some(3)).collect();
//!
//! let expected_words: Vec<String> = [
//!     "a", "b", "c",
//!     "aa", "ab", "ac", "ba", "bb", "bc", "ca", "cb", "cc",
//!     "aaa", "aab", "aac", "aba", "abb", "abc", "aca", "acb", "acc",
//!     "baa", "bab", "bac", "bba", "bbb", "bbc", "bca", "bcb", "bcc",
//!     "caa", "cab", "cac", "cba", "cbb", "cbc", "cca", "ccb", "ccc"]
//!     .iter()
//!     .map(|s| s.to_string())
//!     .collect();
//!
//! assert_eq!(words, expected_words);
//! ```

use std::collections::{HashMap, VecDeque};
use std::str;

/// A representation of an alphabet
pub struct Alphabet {
    /// An hashmap used to track what's the next character for every given character.
    /// The last caracter will point to None.
    pub next_char_map: HashMap<char, Option<char>>,
    /// The first character in the alphabet
    pub first_char: char,
}

/// A iterator that can generate words for a given alphabet
pub struct WordsIterator<'a> {
    /// The reference alphabet instance
    pub alphabet: &'a Alphabet,
    max_len: Option<usize>,
    next_item: String,
}

impl Alphabet {
    /// Creates a new alphabet starting from the unique characters found in a given string.
    ///
    /// This function will extract all the unique characters found in order in the given string.
    /// It will return an `Err` if there are less than 2 unique characters in the given string.
    ///
    /// # Arguments
    ///
    /// * `alphabet_str` - A string-like instance that contains the sequence of characters that
    ///     we want to use to initialize our `Alphabet` instance
    ///
    /// # Returns
    ///
    /// It returns a Result containing the new `Alphabet` instance in case of success.
    ///
    /// # Examples
    ///
    /// Creates an alphabet using characters from `'a'` to `'f'`:
    ///
    /// ```rust
    /// use allwords::{Alphabet};
    ///
    /// let alphabet = Alphabet::from_chars_in_str("abcdef").unwrap();
    /// ```
    ///
    /// Passing an empty string or a string with less than 2 unique chars will return an error:
    ///
    /// ```rust
    /// use allwords::{Alphabet};
    ///
    /// let a = Alphabet::from_chars_in_str("zzzzzzzzzzzzzz");
    ///
    /// match a {
    ///     Ok(_) => panic!("An alphabet was created when we expected an error"),
    ///     Err(e) => assert_eq!(
    ///         e,
    ///         String::from("Invalid alphabet string. Found less than 2 unique chars")
    ///     ),
    /// };
    /// ```
    ///
    /// Since `Alphabet` implements `str::FromStr`, you can also do the following:
    ///
    /// ```rust
    /// use allwords::{Alphabet};
    ///
    /// let value = "abcdef";
    /// let alphabet = value.parse::<Alphabet>().unwrap(); // long life to the turbofish!
    /// ```
    pub fn from_chars_in_str<T: AsRef<str>>(alphabet_str: T) -> Result<Self, String> {
        // creates the map of next characters removing duplicates
        let mut next_char_map = HashMap::new();
        let mut first_char: Option<char> = None;

        let mut previous_char: Option<char> = None;
        for c in alphabet_str.as_ref().chars() {
            if first_char.is_none() {
                first_char = Some(c);
                previous_char = Some(c);
            } else if previous_char.is_some()
                && previous_char.unwrap() != c
                && !next_char_map.contains_key(&c)
            {
                next_char_map.insert(previous_char.unwrap(), Some(c));
                previous_char = Some(c);
            }
        }
        // adds last char if hasn't been added yet
        if let Some(pc) = previous_char {
            next_char_map.entry(pc).or_insert(None);
        }

        if next_char_map.keys().len() < 2 {
            return Err(String::from(
                "Invalid alphabet string. Found less than 2 unique chars",
            ));
        }

        Ok(Alphabet {
            next_char_map,
            first_char: first_char.unwrap(),
        })
    }

    /// Creates an iterator that will generate all the words for a given alphabet. You can optionally
    /// specifify a maximum length, after which, the iterator will terminate.
    ///
    /// # Arguments
    ///
    /// * `max_len` - an optional `usize` that, if present, will specify the maximum length of the
    ///     generated string. If `None` the iterator will be endless.
    ///
    /// # Returns
    ///
    /// An instance of a [`WordsIterator`].
    ///
    /// # Examples
    ///
    /// Creates an iterator with `max_len` = `2` for a given alphabet:
    ///
    /// ```rust
    /// use allwords::{Alphabet};
    ///
    /// let alphabet = Alphabet::from_chars_in_str("01").unwrap();
    /// let iterator = alphabet.all_words(Some(2));
    /// let words: Vec<String> = iterator.collect();
    /// assert_eq!(words, vec!["0", "1", "00", "01", "10", "11"]);
    /// ```
    pub fn all_words(&self, max_len: Option<usize>) -> WordsIterator {
        WordsIterator {
            alphabet: self,
            max_len,
            next_item: String::from(self.first_char),
        }
    }

    /// A shortcut for creating an unbound (endless) iterator for the given alphabet.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use allwords::{Alphabet};
    ///
    /// let alphabet = Alphabet::from_chars_in_str("01").unwrap();
    /// let iterator = alphabet.all_words_unbound(); // equivalent to `alphabet.all_words(None)`
    /// ```
    pub fn all_words_unbound(&self) -> WordsIterator {
        WordsIterator {
            alphabet: self,
            max_len: None,
            next_item: String::from(self.first_char),
        }
    }

    /// Creates an iterator that will generate all the words for a given alphabet starting from a given word.
    /// This method can be useful in case you want to restart a partially completed iteration from another execution or
    /// if you want to distribute computation across indepentend processes or threads.
    ///
    /// **Note:** this method does not check that the starting word complies with the alphabet. If there are characters
    /// in the string that are NOT present in the alphabet, the iterator will consider these characters as last character and
    /// restart the sequence from the first character in the alphabet.
    ///
    /// # Arguments
    ///
    /// * `start_word` - a `String` instance representing the starting string. This string will be returned by the
    ///     first `.next()` call to the iterator).
    /// * `max_len` - an optional `usize` that, if present, will specify the maximum length of the
    ///     generated string. If `None` the iterator will be endless.
    ///
    /// # Returns
    ///
    /// An instance of a [`WordsIterator`].
    ///
    /// # Examples
    ///
    /// Creates an iterator with `max_len` = `2` starting from "01" for a given alphabet:
    ///
    /// ```rust
    /// use allwords::{Alphabet};
    ///
    /// let alphabet = Alphabet::from_chars_in_str("01").unwrap();
    /// let iterator = alphabet.all_words_starting_from(String::from("01"), Some(2));
    /// let words: Vec<String> = iterator.collect();
    /// assert_eq!(words, vec!["01", "10", "11"]);
    /// ```
    pub fn all_words_starting_from(
        &self,
        start_word: String,
        max_len: Option<usize>,
    ) -> WordsIterator {
        WordsIterator {
            alphabet: self,
            max_len,
            next_item: start_word,
        }
    }

    /// Creates an iterator that will generate all the words for a given alphabet starting from the first word with
    /// a given minimum length.
    ///
    /// # Arguments
    ///
    /// * `start_len` - a `usize` defining the minimum length of the first word emitted by the iterator.
    /// * `max_len` - an optional `usize` that, if present, will specify the maximum length of the
    ///     generated string. If `None` the iterator will be endless.
    ///
    /// # Returns
    ///
    /// An instance of a [`WordsIterator`].
    ///
    /// # Examples
    ///
    /// Creates an iterator for all the words with length bethween 2 and 3 chars:
    ///
    /// ```rust
    /// use allwords::{Alphabet};
    ///
    /// let alphabet = Alphabet::from_chars_in_str("01").unwrap();
    /// let iterator = alphabet.all_words_with_len(2, Some(3));
    /// let words: Vec<String> = iterator.collect();
    /// assert_eq!(words, vec!["00", "01", "10", "11", "000", "001", "010", "011", "100", "101", "110", "111"]);
    /// ```
    pub fn all_words_with_len(&self, start_len: usize, max_len: Option<usize>) -> WordsIterator {
        WordsIterator {
            alphabet: self,
            max_len,
            next_item: (0..start_len).map(|_| self.first_char).collect::<String>(),
        }
    }
}

impl<'a> Iterator for WordsIterator<'a> {
    type Item = String;

    fn next(&mut self) -> Option<String> {
        if self.max_len.is_some() && self.max_len.unwrap() < self.next_item.len() {
            return None;
        }

        let current_item = self.next_item.clone();
        let mut next_item: VecDeque<char> = VecDeque::with_capacity(current_item.len() + 1);
        let mut carry = true;
        for c in current_item.chars().rev() {
            if carry {
                let next_char = self.alphabet.next_char_map.get(&c).unwrap_or(&None);
                let next_char = match next_char {
                    Some(c) => {
                        carry = false;
                        *c
                    }
                    None => {
                        carry = true;
                        self.alphabet.first_char
                    }
                };
                next_item.push_front(next_char);
            } else {
                next_item.push_front(c);
            }
        }
        if carry {
            next_item.push_front(self.alphabet.first_char);
        }
        let next_item: String = next_item.iter().collect();
        self.next_item = next_item;

        Some(current_item)
    }
}

impl str::FromStr for Alphabet {
    type Err = String;

    fn from_str(s: &str) -> Result<Self, Self::Err> {
        Alphabet::from_chars_in_str(&String::from(s))
    }
}

#[cfg(test)]
mod test;