engish 0.3.9

A language utility for sampling and building words.
Documentation
use crate::language::{Letter, LetterGroup};
use rand::{distr::weighted::WeightedIndex, prelude::Distribution};
use std::collections::BTreeMap;

/// A neat little struct to quickly sample letters based on frequency.
#[derive(Debug, Clone)]
pub struct LetterSampler {
    /// The alphabet to sample from.
    pub alphabet: Vec<char>,
    /// The weights of each letter.
    pub weights: WeightedIndex<f32>,
}

impl LetterSampler {
    /// Makes a new letter sampler from the given BTreeMap.
    /// Using a BTreeMap ensures that the iteration order is deterministic.
    pub fn new(alphabet: &BTreeMap<char, Letter>) -> Self {
        let (alphabet_chars, base_weights): (Vec<char>, Vec<f32>) =
            alphabet.iter().map(|(c, l)| (*c, l.frequency)).unzip();
        let weights = WeightedIndex::new(&base_weights).unwrap();
        Self {
            alphabet: alphabet_chars,
            weights,
        }
    }

    /// Makes a new, deterministic letter sampler from the given letter's potential digraphs.
    pub fn from_digraphs(letter: &Letter) -> Self {
        let mut digraphs = letter.digraphs.clone();
        // Sort by character to ensure deterministic order.
        digraphs.sort_by_key(|d| d.letter);
        let (alphabet, base_weights): (Vec<char>, Vec<f32>) = digraphs
            .into_iter()
            .map(|d| (d.letter, d.frequency))
            .unzip();
        let weights = WeightedIndex::new(&base_weights).unwrap();
        Self { alphabet, weights }
    }

    /// Takes a random value using a weighted frequency.
    pub fn sample(&self, rng: &mut impl rand::Rng) -> char {
        self.alphabet[self.weights.sample(rng)]
    }

    /// Introduces a list of letters with their frequencies to the sampler.
    pub fn add_letters_with_freq(&mut self, letters: Vec<(char, &Letter)>) {
        let mut current_weights: Vec<f32> = self.weights.weights().into_iter().map(|w| w).collect();

        for (char, letter_data) in letters {
            if !self.alphabet.contains(&char) {
                self.alphabet.push(char);
                current_weights.push(letter_data.frequency);
            }
        }
        self.weights = WeightedIndex::new(&current_weights).unwrap();
    }

    /// Filters out any letters in the given group from this sampler.
    pub fn remove_group(&mut self, group: &LetterGroup) {
        let (new_alphabet, new_weights): (Vec<char>, Vec<f32>) = self
            .alphabet
            .iter()
            .zip(self.weights.weights())
            .filter(|(c, _)| !group.letters.contains(c))
            .map(|(c, w)| (*c, w))
            .unzip();

        self.alphabet = new_alphabet;
        self.weights = WeightedIndex::new(new_weights).unwrap();
    }

    /// Removes the given letter from this sampler.
    pub fn remove_char(&mut self, letter: char) {
        let mut new_alphabet = Vec::new();
        let mut new_weights = Vec::new();
        for (i, l) in self.alphabet.iter().enumerate() {
            if *l != letter {
                new_alphabet.push(*l);
                new_weights.push(self.weights.weight(i).unwrap());
            }
        }
        self.alphabet = new_alphabet;
        self.weights = WeightedIndex::new(&new_weights).unwrap();
    }
}