1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315
//! `allwords` is a crate that allows you to generate words over a given alphabet.
//!
//! Word generation can be useful in several scenarios:
//!
//! - Pseudo-random data generation (e.g. testing / mocking)
//! - Brute forcing of keys / passwords
//! - Id or Serial number generation
//!
//! The basic idea for using this library is that you create an [`Alphabet`] with a set of
//! characters and then you can use it to generate a [`WordsIterator`]. You can use the iterator
//! to generate all the possible words over the alphabet.
//!
//! For instance if you want to generate all the possible words containing `"a"`, `"b"`, `"c"` with
//! a maximum length of 3 chars:
//!
//! ```rust
//! use allwords::{Alphabet};
//!
//! let a = Alphabet::from_chars_in_str("abc").unwrap();
//!
//! let words: Vec<String> = a.all_words(Some(3)).collect();
//!
//! let expected_words: Vec<String> = [
//! "a", "b", "c",
//! "aa", "ab", "ac", "ba", "bb", "bc", "ca", "cb", "cc",
//! "aaa", "aab", "aac", "aba", "abb", "abc", "aca", "acb", "acc",
//! "baa", "bab", "bac", "bba", "bbb", "bbc", "bca", "bcb", "bcc",
//! "caa", "cab", "cac", "cba", "cbb", "cbc", "cca", "ccb", "ccc"]
//! .iter()
//! .map(|s| s.to_string())
//! .collect();
//!
//! assert_eq!(words, expected_words);
//! ```
use std::collections::{HashMap, VecDeque};
use std::str;
/// A representation of an alphabet
pub struct Alphabet {
/// An hashmap used to track what's the next character for every given character.
/// The last caracter will point to None.
pub next_char_map: HashMap<char, Option<char>>,
/// The first character in the alphabet
pub first_char: char,
}
/// A iterator that can generate words for a given alphabet
pub struct WordsIterator<'a> {
/// The reference alphabet instance
pub alphabet: &'a Alphabet,
max_len: Option<usize>,
next_item: String,
}
impl Alphabet {
/// Creates a new alphabet starting from the unique characters found in a given string.
///
/// This function will extract all the unique characters found in order in the given string.
/// It will return an `Err` if there are less than 2 unique characters in the given string.
///
/// # Arguments
///
/// * `alphabet_str` - A string-like instance that contains the sequence of characters that
/// we want to use to initialize our `Alphabet` instance
///
/// # Returns
///
/// It returns a Result containing the new `Alphabet` instance in case of success.
///
/// # Examples
///
/// Creates an alphabet using characters from `'a'` to `'f'`:
///
/// ```rust
/// use allwords::{Alphabet};
///
/// let alphabet = Alphabet::from_chars_in_str("abcdef").unwrap();
/// ```
///
/// Passing an empty string or a string with less than 2 unique chars will return an error:
///
/// ```rust
/// use allwords::{Alphabet};
///
/// let a = Alphabet::from_chars_in_str("zzzzzzzzzzzzzz");
///
/// match a {
/// Ok(_) => panic!("An alphabet was created when we expected an error"),
/// Err(e) => assert_eq!(
/// e,
/// String::from("Invalid alphabet string. Found less than 2 unique chars")
/// ),
/// };
/// ```
///
/// Since `Alphabet` implements `str::FromStr`, you can also do the following:
///
/// ```rust
/// use allwords::{Alphabet};
///
/// let value = "abcdef";
/// let alphabet = value.parse::<Alphabet>().unwrap(); // long life to the turbofish!
/// ```
pub fn from_chars_in_str<T: AsRef<str>>(alphabet_str: T) -> Result<Self, String> {
// creates the map of next characters removing duplicates
let mut next_char_map = HashMap::new();
let mut first_char: Option<char> = None;
let mut previous_char: Option<char> = None;
for c in alphabet_str.as_ref().chars() {
if first_char.is_none() {
first_char = Some(c);
previous_char = Some(c);
} else if previous_char.is_some()
&& previous_char.unwrap() != c
&& !next_char_map.contains_key(&c)
{
next_char_map.insert(previous_char.unwrap(), Some(c));
previous_char = Some(c);
}
}
// adds last char if hasn't been added yet
if let Some(pc) = previous_char {
next_char_map.entry(pc).or_insert(None);
}
if next_char_map.keys().len() < 2 {
return Err(String::from(
"Invalid alphabet string. Found less than 2 unique chars",
));
}
Ok(Alphabet {
next_char_map,
first_char: first_char.unwrap(),
})
}
/// Creates an iterator that will generate all the words for a given alphabet. You can optionally
/// specifify a maximum length, after which, the iterator will terminate.
///
/// # Arguments
///
/// * `max_len` - an optional `usize` that, if present, will specify the maximum length of the
/// generated string. If `None` the iterator will be endless.
///
/// # Returns
///
/// An instance of a [`WordsIterator`].
///
/// # Examples
///
/// Creates an iterator with `max_len` = `2` for a given alphabet:
///
/// ```rust
/// use allwords::{Alphabet};
///
/// let alphabet = Alphabet::from_chars_in_str("01").unwrap();
/// let iterator = alphabet.all_words(Some(2));
/// let words: Vec<String> = iterator.collect();
/// assert_eq!(words, vec!["0", "1", "00", "01", "10", "11"]);
/// ```
pub fn all_words(&self, max_len: Option<usize>) -> WordsIterator {
WordsIterator {
alphabet: self,
max_len,
next_item: String::from(self.first_char),
}
}
/// A shortcut for creating an unbound (endless) iterator for the given alphabet.
///
/// # Examples
///
/// ```rust
/// use allwords::{Alphabet};
///
/// let alphabet = Alphabet::from_chars_in_str("01").unwrap();
/// let iterator = alphabet.all_words_unbound(); // equivalent to `alphabet.all_words(None)`
/// ```
pub fn all_words_unbound(&self) -> WordsIterator {
WordsIterator {
alphabet: self,
max_len: None,
next_item: String::from(self.first_char),
}
}
/// Creates an iterator that will generate all the words for a given alphabet starting from a given word.
/// This method can be useful in case you want to restart a partially completed iteration from another execution or
/// if you want to distribute computation across indepentend processes or threads.
///
/// **Note:** this method does not check that the starting word complies with the alphabet. If there are characters
/// in the string that are NOT present in the alphabet, the iterator will consider these characters as last character and
/// restart the sequence from the first character in the alphabet.
///
/// # Arguments
///
/// * `start_word` - a `String` instance representing the starting string. This string will be returned by the
/// first `.next()` call to the iterator).
/// * `max_len` - an optional `usize` that, if present, will specify the maximum length of the
/// generated string. If `None` the iterator will be endless.
///
/// # Returns
///
/// An instance of a [`WordsIterator`].
///
/// # Examples
///
/// Creates an iterator with `max_len` = `2` starting from "01" for a given alphabet:
///
/// ```rust
/// use allwords::{Alphabet};
///
/// let alphabet = Alphabet::from_chars_in_str("01").unwrap();
/// let iterator = alphabet.all_words_starting_from(String::from("01"), Some(2));
/// let words: Vec<String> = iterator.collect();
/// assert_eq!(words, vec!["01", "10", "11"]);
/// ```
pub fn all_words_starting_from(
&self,
start_word: String,
max_len: Option<usize>,
) -> WordsIterator {
WordsIterator {
alphabet: self,
max_len,
next_item: start_word,
}
}
/// Creates an iterator that will generate all the words for a given alphabet starting from the first word with
/// a given minimum length.
///
/// # Arguments
///
/// * `start_len` - a `usize` defining the minimum length of the first word emitted by the iterator.
/// * `max_len` - an optional `usize` that, if present, will specify the maximum length of the
/// generated string. If `None` the iterator will be endless.
///
/// # Returns
///
/// An instance of a [`WordsIterator`].
///
/// # Examples
///
/// Creates an iterator for all the words with length bethween 2 and 3 chars:
///
/// ```rust
/// use allwords::{Alphabet};
///
/// let alphabet = Alphabet::from_chars_in_str("01").unwrap();
/// let iterator = alphabet.all_words_with_len(2, Some(3));
/// let words: Vec<String> = iterator.collect();
/// assert_eq!(words, vec!["00", "01", "10", "11", "000", "001", "010", "011", "100", "101", "110", "111"]);
/// ```
pub fn all_words_with_len(&self, start_len: usize, max_len: Option<usize>) -> WordsIterator {
WordsIterator {
alphabet: self,
max_len,
next_item: (0..start_len).map(|_| self.first_char).collect::<String>(),
}
}
}
impl<'a> Iterator for WordsIterator<'a> {
type Item = String;
fn next(&mut self) -> Option<String> {
if self.max_len.is_some() && self.max_len.unwrap() < self.next_item.len() {
return None;
}
let current_item = self.next_item.clone();
let mut next_item: VecDeque<char> = VecDeque::with_capacity(current_item.len() + 1);
let mut carry = true;
for c in current_item.chars().rev() {
if carry {
let next_char = self.alphabet.next_char_map.get(&c).unwrap_or(&None);
let next_char = match next_char {
Some(c) => {
carry = false;
*c
}
None => {
carry = true;
self.alphabet.first_char
}
};
next_item.push_front(next_char);
} else {
next_item.push_front(c);
}
}
if carry {
next_item.push_front(self.alphabet.first_char);
}
let next_item: String = next_item.iter().collect();
self.next_item = next_item;
Some(current_item)
}
}
impl str::FromStr for Alphabet {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Alphabet::from_chars_in_str(&String::from(s))
}
}
#[cfg(test)]
mod test;