use std::io::Write;
use std::rc::Rc;
use num_bigint::{BigUint, ToBigUint};
use crate::charsets::Charset;
use crate::mask::{parse_mask, validate_charsets, validate_wordlists, MaskOp};
use crate::stackbuf::StackBuf;
use crate::wordlists::{Wordlist, WordlistIterator};
use crate::{BoxResult, MAX_WORD_SIZE};
pub trait WordGenerator {
fn gen<'b>(&self, out: &mut Box<dyn Write + 'b>) -> Result<(), std::io::Error>;
fn combinations(&self) -> BigUint;
}
pub struct CharsetGenerator {
pub mask: Vec<MaskOp>,
pub minlen: usize,
pub maxlen: usize,
charsets: Vec<Charset>,
min_word: Vec<u8>,
}
pub struct WordlistGenerator {
pub mask: Vec<MaskOp>,
items: Vec<WordlistItem>,
}
#[allow(clippy::large_enum_variant)]
enum WordlistItem {
Charset(Charset),
Wordlist(Rc<Wordlist>),
}
enum Position<'a> {
CharsetPos {
charset: &'a Charset,
chr: u8,
},
WordlistPos {
wordlist: &'a Rc<Wordlist>,
idx: WordlistIterator<'a>,
},
}
pub fn get_word_generator<'a>(
mask: &'a str,
minlen: Option<usize>,
maxlen: Option<usize>,
custom_charsets: &[&'a str],
wordlists_fnames: &[&'a str],
) -> BoxResult<Box<dyn WordGenerator + 'a>> {
let mask_ops = parse_mask(mask)?;
validate_charsets(&mask_ops, custom_charsets.len())?;
validate_wordlists(&mask_ops, wordlists_fnames.len())?;
if mask_ops.iter().all(|op| !matches!(op, MaskOp::Wordlist(_))) {
Ok(Box::new(CharsetGenerator::new(
mask_ops,
minlen,
maxlen,
custom_charsets,
)?))
} else if minlen.is_some() || maxlen.is_some() {
bail!("cannot set minlen or maxlen with wordlists")
} else {
Ok(Box::new(WordlistGenerator::new(
mask_ops,
wordlists_fnames,
custom_charsets,
)?))
}
}
impl<'a> CharsetGenerator {
pub fn new(
mask: Vec<MaskOp>,
minlen: Option<usize>,
maxlen: Option<usize>,
custom_charsets: &[&'a str],
) -> BoxResult<CharsetGenerator> {
let charsets: Vec<_> = mask
.iter()
.map(|op| match op {
MaskOp::Char(ch) => Charset::from_chars(vec![*ch as u8].as_ref()),
MaskOp::BuiltinCharset(ch) => Charset::from_symbol(*ch),
MaskOp::CustomCharset(idx) => Charset::from_chars(custom_charsets[*idx].as_bytes()),
MaskOp::Wordlist(_) => unreachable!("cant handle wordlists"),
})
.collect();
let minlen = minlen.unwrap_or_else(|| charsets.len());
let maxlen = maxlen.unwrap_or_else(|| charsets.len());
if !(0 < minlen && minlen <= maxlen && minlen <= charsets.len()) {
bail!("minlen is invalid");
}
if maxlen > charsets.len() {
bail!("maxlen is invalid");
}
let min_word: Vec<u8> = charsets.iter().map(|c| c.min_char).collect();
Ok(CharsetGenerator {
mask,
minlen,
maxlen,
charsets,
min_word,
})
}
#[allow(clippy::borrowed_box)]
fn gen_by_length<'b>(
&self,
pwdlen: usize,
out: &mut Box<dyn Write + 'b>,
) -> Result<(), std::io::Error> {
let mut buf = StackBuf::new();
let batch_size = buf.len() / (pwdlen + 1);
let word = &mut [b'\n'; MAX_WORD_SIZE][..=pwdlen];
word[..pwdlen].copy_from_slice(&self.min_word[..pwdlen]);
'outer_loop: loop {
'batch_for: for _ in 0..batch_size {
buf.write(word);
for pos in (0..pwdlen).rev() {
let chr = word[pos];
let next_chr = self.charsets[pos][chr as usize];
word[pos] = next_chr;
if chr < next_chr {
continue 'batch_for;
}
}
break 'outer_loop;
}
out.write_all(buf.getdata())?;
buf.clear();
}
out.write_all(buf.getdata())?;
Ok(())
}
}
impl<'a> WordGenerator for CharsetGenerator {
fn gen<'b>(&self, out: &mut Box<dyn Write + 'b>) -> Result<(), std::io::Error> {
for pwdlen in self.minlen..=self.maxlen {
self.gen_by_length(pwdlen, out)?;
}
Ok(())
}
fn combinations(&self) -> BigUint {
let mut combs: BigUint = 0.to_biguint().unwrap();
for i in self.minlen..=self.maxlen {
combs += self
.charsets
.iter()
.take(i)
.fold(1.to_biguint().unwrap(), |acc, x| {
(acc * x.len).to_biguint().unwrap()
});
}
combs
}
}
impl<'a> WordlistGenerator {
pub fn new(
mask: Vec<MaskOp>,
wordlists_fnames: &[&'a str],
custom_charsets: &[&'a str],
) -> BoxResult<WordlistGenerator> {
let mut wordlists_data = vec![];
for fname in wordlists_fnames.iter() {
wordlists_data.push(Rc::new(Wordlist::from_file(fname)?));
}
let items: Vec<WordlistItem> = mask
.iter()
.map(|op| match op {
MaskOp::Char(ch) => {
WordlistItem::Charset(Charset::from_chars(vec![*ch as u8].as_ref()))
}
MaskOp::BuiltinCharset(ch) => WordlistItem::Charset(Charset::from_symbol(*ch)),
MaskOp::CustomCharset(idx) => {
WordlistItem::Charset(Charset::from_chars(custom_charsets[*idx].as_bytes()))
}
MaskOp::Wordlist(idx) => WordlistItem::Wordlist(Rc::clone(&wordlists_data[*idx])),
})
.collect();
Ok(WordlistGenerator { mask, items })
}
#[allow(clippy::borrowed_box)]
fn gen_words<'b>(&self, out: &mut Box<dyn Write + 'b>) -> Result<(), std::io::Error> {
let mut buf = StackBuf::new();
let mut word_buf = [b'\n'; MAX_WORD_SIZE];
let word = &mut word_buf[..];
let mut positions: Vec<_> = self
.items
.iter()
.map(|item| match item {
WordlistItem::Charset(charset) => Position::CharsetPos {
charset,
chr: charset.min_char,
},
WordlistItem::Wordlist(wordlist) => Position::WordlistPos {
wordlist,
idx: wordlist.iter(),
},
})
.collect();
let mut min_word = vec![];
for pos in positions.iter_mut() {
match pos {
Position::CharsetPos { chr, .. } => min_word.push(*chr),
Position::WordlistPos { idx, .. } => {
min_word.extend_from_slice(idx.next().unwrap())
}
}
}
min_word.push(b'\n');
let min_word = min_word;
let mut word_len = min_word.len();
word[..word_len].copy_from_slice(&min_word);
'outer_loop: loop {
if buf.pos() + word_len >= buf.len() {
out.write_all(buf.getdata())?;
buf.clear();
}
buf.write(&word[..word_len]);
let mut pos = word_len - 2;
for itempos in positions.iter_mut().rev() {
match itempos {
Position::CharsetPos { charset, chr } => {
let prev_chr = *chr;
*chr = charset[prev_chr as usize];
word[pos] = *chr;
if prev_chr < *chr {
continue 'outer_loop;
}
if cfg!(debug_assertions) && pos == 0 {
pos = 0;
} else {
pos -= 1;
}
}
Position::WordlistPos { wordlist, idx } => {
let finished;
let prev_len = idx.current_len();
let wordlist_word = match idx.next() {
Some(w) => {
finished = false;
w
}
None => {
*idx = wordlist.iter();
finished = true;
idx.next().unwrap()
}
};
let wlen = wordlist_word.len();
if prev_len != wlen {
let offset = wlen as isize - prev_len as isize;
if offset > 0 {
for i in (pos + 1..word_len).rev() {
word[i + offset as usize] = word[i];
}
} else {
for i in pos + 1..word_len {
word[(i as isize + offset) as usize] = word[i];
}
}
pos = (pos as isize + offset) as usize;
word_len = (word_len as isize + offset) as usize;
}
word[pos + 1 - wlen..=pos].copy_from_slice(wordlist_word);
if !finished {
continue 'outer_loop;
}
if cfg!(debug_assertions) && pos < wlen {
pos = 0;
} else {
pos -= wlen;
}
}
}
}
break;
}
out.write_all(buf.getdata())?;
Ok(())
}
}
impl<'a> WordGenerator for WordlistGenerator {
fn gen<'b>(&self, out: &mut Box<dyn Write + 'b>) -> Result<(), std::io::Error> {
self.gen_words(out)?;
Ok(())
}
fn combinations(&self) -> BigUint {
self.items
.iter()
.map(|item| match item {
WordlistItem::Wordlist(wl) => wl.len().to_biguint().unwrap(),
WordlistItem::Charset(c) => c.len.to_biguint().unwrap(),
})
.product()
}
}
#[cfg(test)]
mod tests {
use std::fs;
use std::io::{Cursor, Write};
use num_bigint::{BigUint, ToBigUint};
use crate::generators::get_word_generator;
use crate::mask::parse_mask;
use crate::test_util::wordlist_fname;
use super::{CharsetGenerator, WordGenerator};
#[test]
fn test_gen_words_single_digit() {
let mask = parse_mask("?d").unwrap();
let word_gen = CharsetGenerator::new(mask.to_vec(), None, None, &vec![]).unwrap();
assert_eq!(word_gen.mask, mask);
assert_eq!(word_gen.minlen, 1);
assert_eq!(word_gen.maxlen, 1);
assert_eq!(word_gen.charsets.len(), 1);
assert_eq!(word_gen.min_word, "0".as_bytes());
let res = assert_gen(Box::new(word_gen), "single-digits.txt");
assert_eq!(res, "0\n1\n2\n3\n4\n5\n6\n7\n8\n9\n");
}
#[test]
fn test_gen_upper_lower_1_4() {
let mask = parse_mask("?u?l?u?l").unwrap();
let word_gen = CharsetGenerator::new(mask.to_vec(), Some(1), None, &vec![]).unwrap();
assert_eq!(word_gen.mask, mask);
assert_eq!(word_gen.minlen, 1);
assert_eq!(word_gen.maxlen, 4);
assert_eq!(word_gen.charsets.len(), 4);
assert_eq!(word_gen.min_word, "AaAa".as_bytes());
assert_gen(Box::new(word_gen), "upper-lower-1-4.txt");
}
#[test]
fn test_gen_pwd_upper_lower_year_1_4() {
let mask = parse_mask("pwd?u?l201?1").unwrap();
let word_gen = CharsetGenerator::new(mask.to_vec(), Some(1), None, &vec!["56789"]).unwrap();
assert_eq!(word_gen.mask, mask);
assert_eq!(word_gen.minlen, 1);
assert_eq!(word_gen.maxlen, 9);
assert_eq!(word_gen.charsets.len(), 9);
assert_eq!(word_gen.min_word, "pwdAa2015".as_bytes());
assert_gen(Box::new(word_gen), "upper-lower-year-1-4.txt");
}
#[test]
fn test_get_word_generator_charset() {
let mask = "?d?d?d?d";
let word_gen =
get_word_generator(mask, Some(4), None, vec![].as_ref(), vec![].as_ref()).unwrap();
assert_eq!(word_gen.combinations(), 10000.to_biguint().unwrap());
}
#[test]
fn test_get_word_generator_wordlist() {
let mask = "?d?d?d?d?w1";
let wordlist_fname = wordlist_fname("wordlist1.txt");
let wordlists = vec![wordlist_fname.to_str().unwrap()];
let word_gen =
get_word_generator(mask, None, None, vec![].as_ref(), wordlists.as_ref()).unwrap();
assert_eq!(word_gen.combinations(), 100000.to_biguint().unwrap());
}
#[test]
fn test_word_generator_wordlist_simple() {
let mask = "?w1";
let wordlist1 = wordlist_fname("wordlist1.txt");
let wordlists = vec![wordlist1.to_str().unwrap()];
let word_gen =
get_word_generator(mask, None, None, vec![].as_ref(), wordlists.as_ref()).unwrap();
assert_eq!(word_gen.combinations(), 10.to_biguint().unwrap());
assert_gen(word_gen, "wordlist-simple.txt");
}
#[test]
fn test_word_generator_wordlist_and_custom_charset() {
let mask = "?w1?d?w2?l?w1?1";
let wordlist1 = wordlist_fname("wordlist1.txt");
let wordlist2 = wordlist_fname("wordlist2.txt");
let charsets = vec!["!@#"];
let wordlists = vec![wordlist1.to_str().unwrap(), wordlist2.to_str().unwrap()];
let word_gen =
get_word_generator(mask, None, None, charsets.as_ref(), wordlists.as_ref()).unwrap();
assert_eq!(
word_gen.combinations(),
(10 * 10 * 12 * 26 * 10 * 3).to_biguint().unwrap()
);
assert_gen(word_gen, "wordlists-mix.txt");
}
#[test]
fn test_word_generator_invalid_wordlist_mask() {
let mask = "?w1?d?w2?l?w1?1";
let wordlist1 = wordlist_fname("wordlist1.txt");
let charsets = vec!["!@#"];
let wordlists = vec![wordlist1.to_str().unwrap()];
let word_gen = get_word_generator(mask, None, None, charsets.as_ref(), wordlists.as_ref());
assert!(word_gen.is_err());
}
#[test]
fn test_word_generator_invalid_custom_charset_mask() {
let mask = "a?1?2?l?3";
let charsets = vec!["!@#", "abc"];
let word_gen = get_word_generator(mask, None, None, charsets.as_ref(), vec![].as_ref());
assert!(word_gen.is_err());
}
fn assert_gen<'a>(w: Box<dyn WordGenerator + 'a>, fname: &str) -> String {
let mut buf: Vec<u8> = Vec::new();
{
let mut cur: Box<dyn Write> = Box::new(Cursor::new(&mut buf));
w.gen(&mut cur).unwrap();
}
let result = String::from_utf8(buf).unwrap();
let expected = fs::read_to_string(wordlist_fname(fname)).unwrap();
let mut s2 = fname.to_owned();
s2.push_str("_expected.txt");
fs::write(wordlist_fname(&s2), &result).unwrap();
assert_eq!(result, expected);
result
}
#[test]
fn test_gen_stats() {
let custom_charsets = vec!["abcd", "01"];
let combinations = vec![
("?d?s?u?l?a?b", "5368197120", None, None),
("?d?d?d?d?d?d?d?d", "111111110", Some(1), Some(8)),
("?d?d?d?d?d?d?d?d", "10000", Some(4), Some(4)),
("?d?d?d?d?d?d?d?d", "100000000", None, Some(8)),
("?1?2", "8", None, None),
("?1?2abc", "8", None, None),
("?d?1?2", "80", None, None),
("?d?s?u?l?a?b?1?2", "42945576960", None, None),
("?d?1?2?d", "930", Some(1), None),
(
"?b?b?b?b?b?b?b?b?b?b",
"1208925819614629174706176",
None,
None,
),
];
for (mask_str, result, minlen, maxlen) in combinations {
let mask = parse_mask(mask_str).unwrap();
let word_gen = CharsetGenerator::new(mask, minlen, maxlen, &custom_charsets).unwrap();
assert_eq!(
word_gen.combinations(),
BigUint::parse_bytes(result.as_bytes(), 10).unwrap()
);
}
}
}