use crate::language::{Letter, LetterGroup};
use rand::{distr::weighted::WeightedIndex, prelude::Distribution};
use std::collections::BTreeMap;
#[derive(Debug, Clone)]
pub struct LetterSampler {
pub alphabet: Vec<char>,
pub weights: WeightedIndex<f32>,
}
impl LetterSampler {
pub fn new(alphabet: &BTreeMap<char, Letter>) -> Self {
let (alphabet_chars, base_weights): (Vec<char>, Vec<f32>) =
alphabet.iter().map(|(c, l)| (*c, l.frequency)).unzip();
let weights = WeightedIndex::new(&base_weights).unwrap();
Self {
alphabet: alphabet_chars,
weights,
}
}
pub fn from_digraphs(letter: &Letter) -> Self {
let mut digraphs = letter.digraphs.clone();
digraphs.sort_by_key(|d| d.letter);
let (alphabet, base_weights): (Vec<char>, Vec<f32>) = digraphs
.into_iter()
.map(|d| (d.letter, d.frequency))
.unzip();
let weights = WeightedIndex::new(&base_weights).unwrap();
Self { alphabet, weights }
}
pub fn sample(&self, rng: &mut impl rand::Rng) -> char {
self.alphabet[self.weights.sample(rng)]
}
pub fn add_letters_with_freq(&mut self, letters: Vec<(char, &Letter)>) {
let mut current_weights: Vec<f32> = self.weights.weights().into_iter().map(|w| w).collect();
for (char, letter_data) in letters {
if !self.alphabet.contains(&char) {
self.alphabet.push(char);
current_weights.push(letter_data.frequency);
}
}
self.weights = WeightedIndex::new(¤t_weights).unwrap();
}
pub fn remove_group(&mut self, group: &LetterGroup) {
let (new_alphabet, new_weights): (Vec<char>, Vec<f32>) = self
.alphabet
.iter()
.zip(self.weights.weights())
.filter(|(c, _)| !group.letters.contains(c))
.map(|(c, w)| (*c, w))
.unzip();
self.alphabet = new_alphabet;
self.weights = WeightedIndex::new(new_weights).unwrap();
}
pub fn remove_char(&mut self, letter: char) {
let mut new_alphabet = Vec::new();
let mut new_weights = Vec::new();
for (i, l) in self.alphabet.iter().enumerate() {
if *l != letter {
new_alphabet.push(*l);
new_weights.push(self.weights.weight(i).unwrap());
}
}
self.alphabet = new_alphabet;
self.weights = WeightedIndex::new(&new_weights).unwrap();
}
}