use rand::{Rng, RngCore};
use std::{
collections::*,
fs::File,
io::{prelude::*, BufReader},
iter::{once, repeat},
path::Path,
};
#[cfg(feature = "hashbrown")]
use hashbrown::HashMap;
pub trait PrefixStorage {}
pub trait FrequencyStorage {
fn insert(&mut self, c: char, count: usize);
fn iter(&self) -> impl Iterator<Item = (char, usize)>;
fn nth(&self, mut index: usize) -> Option<(char, usize)> {
for (character, count) in self.iter() {
if index < count {
return Some((character, count));
}
index -= count;
}
None
}
}
pub trait Storage {
type Prefix: PrefixStorage;
type Frequency: FrequencyStorage;
}
impl FrequencyStorage for HashMap<char, usize> {
fn insert(&mut self, c: char, count: usize) {
let value = self.entry(c).or_default();
*value += count;
}
fn iter(&self) -> impl Iterator<Item = (char, usize)> {
self.iter().map(|(c, count)| (*c, *count))
}
}
#[derive(Debug, Clone, Default, PartialEq)]
pub struct Frequency<S: FrequencyStorage = HashMap<char, usize>> {
table: S,
total: usize,
}
impl Frequency {
pub fn insert(&mut self, c: char, count: usize) {
self.table.insert(c, count);
self.total += count;
}
}
pub struct Window(Box<[char]>);
#[derive(Debug, Clone, Default, PartialEq)]
pub struct Markov {
depth: usize,
prefix: HashMap<Box<[char]>, Frequency>,
}
impl Markov {
pub fn new(depth: usize) -> Self {
Self {
depth,
prefix: Default::default(),
}
}
pub fn load(path: &Path, depth: usize) -> Result<Self, std::io::Error> {
let f = File::open(path)?;
let reader = BufReader::new(f);
let mut list = Self::new(depth);
for line in reader.lines() {
list.insert(&line?, 1);
}
Ok(list)
}
pub fn insert(&mut self, word: &str, count: usize) {
let chars = repeat(0 as char)
.take(self.depth)
.chain(word.chars())
.chain(once(0 as char))
.collect::<Vec<char>>();
for sequence in chars.windows(self.depth + 1) {
self.insert_sequence(sequence, count);
}
}
fn insert_sequence(&mut self, sequence: &[char], count: usize) {
let prefix = &sequence[0..self.depth];
let frequency = match self.prefix.get_mut(prefix) {
Some(frequency) => frequency,
None => self.prefix.entry(prefix.into()).or_default(),
};
frequency.insert(sequence[self.depth], count);
}
pub fn window(&self) -> Box<[char]> {
vec![0 as char; self.depth].into()
}
pub fn generate(&self, rng: &mut dyn RngCore, window: &mut [char]) -> (Option<char>, f64) {
debug_assert!(window.len() == self.depth);
let frequency = self.prefix.get(&window[0..self.depth]).unwrap();
window.copy_within(1..self.depth, 0);
let index = rng.gen_range(0..frequency.total);
let (character, count) = frequency.table.nth(index).unwrap();
window[self.depth - 1] = character;
let entropy = frequency.total as f64 / count as f64;
let character = (character != 0 as char).then_some(character);
(character, entropy)
}
}