use std::{
collections::HashMap,
io::{self, prelude::*},
};
use rand::distributions::WeightedIndex;
use rand::prelude::*;
pub mod langs;
#[derive(Debug, Clone)]
pub struct ProbabilityTable {
pub(crate) table: HashMap<String, HashMap<char, u32>>,
pub(crate) accuracy: usize,
}
impl ProbabilityTable {
fn new(accuracy: usize) -> ProbabilityTable {
ProbabilityTable {
table: HashMap::new(),
accuracy,
}
}
pub fn from_reader<T: BufRead>(reader: T, accuracy: usize) -> io::Result<ProbabilityTable> {
assert!(accuracy >= 1);
Ok(generate_table(add_space(reader, accuracy)?, accuracy))
}
pub fn generate_words(&self, amount: u32) -> Vec<String> {
generate_multiple_words(self, amount)
}
}
fn add_space<T: BufRead>(reader: T, accuracy: usize) -> io::Result<String> {
reader
.lines()
.map(|line| -> io::Result<String> {
Ok(format!("{:accuracy$}{}", " ", line?.to_lowercase(), accuracy=accuracy))
})
.collect()
}
fn generate_table(spaced_file: String, accuracy: usize) -> ProbabilityTable {
let mut table = ProbabilityTable::new(accuracy);
let chars_list: Vec<_> = spaced_file.chars().collect();
for charactere in 0..chars_list.len() - accuracy {
let key: String = chars_list
.get(charactere..charactere + accuracy)
.unwrap()
.iter()
.collect();
let value: char = *chars_list.get(charactere + accuracy).unwrap();
*table
.table
.entry(key)
.or_default()
.entry(value)
.or_default() += 1;
}
table
}
fn generate_word(table: &ProbabilityTable, rng: &mut ThreadRng) -> String {
let mut out = " ".repeat(table.accuracy);
loop {
let chars_list: Vec<_> = out.chars().collect();
let key = &chars_list[chars_list.len() - table.accuracy..]
.iter()
.collect::<String>();
let choices = table.table.get(key).unwrap();
let weight = WeightedIndex::new(choices.values()).unwrap();
let next_letter = choices.keys().collect::<Vec<&char>>()[weight.sample(rng)];
out += &next_letter.to_string();
if out.ends_with(' ') {
break;
}
}
out.trim().to_string()
}
fn generate_multiple_words(matrix: &ProbabilityTable, number: u32) -> Vec<String> {
let mut vec_string = Vec::new();
let mut rng = thread_rng();
for _ in 0..number {
vec_string.push(generate_word(&matrix, &mut rng));
}
vec_string
}
pub fn generate_words<T: BufRead>(
reader: T,
accuracy: usize,
amout: u32,
) -> io::Result<Vec<String>> {
let mut out = generate_multiple_words(
&generate_table(add_space(reader, accuracy)?, accuracy),
amout,
);
out.sort_by_key(|a| a.len());
Ok(out)
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::{self, BufReader};
#[test]
fn table_generate_words_works() -> io::Result<()> {
let reader = BufReader::new(langs::FR_TXT);
let table = ProbabilityTable::from_reader(reader, 3)?;
assert_eq!(table.generate_words(15).len(), 15);
Ok(())
}
#[test]
fn table_consticency() -> io::Result<()> {
let reader = BufReader::new(langs::FR_TXT);
let table = ProbabilityTable::from_reader(reader, 3)?;
for _ in 0..100 {
assert_eq!(
table
.table
.keys()
.nth(random::<u8>().into())
.unwrap()
.chars()
.collect::<Vec<char>>()
.len(),
3
);
}
Ok(())
}
}