use std::collections::HashMap;
use once_cell::sync::Lazy;
use std::sync::Arc;
pub const WORD_COUNT: usize = 65_536;
pub const BITS_PER_WORD: usize = 16;
pub const BITS_THREE_WORDS: usize = 48;
pub const MAX_WORD_INDEX: u16 = (WORD_COUNT - 1) as u16;
#[derive(Debug, thiserror::Error)]
pub enum Dictionary65KError {
#[error("Invalid word index: {0} (max: {1})")]
InvalidIndex(u16, u16),
#[error("Unknown word: {0}")]
UnknownWord(String),
#[error("Dictionary not initialized")]
NotInitialized,
#[error("Invalid word count: expected {expected}, got {actual}")]
InvalidWordCount { expected: usize, actual: usize },
#[error("Dictionary source file not found or invalid")]
SourceFileError,
}
#[derive(Debug, Clone)]
pub struct Dictionary65K {
words: Vec<String>,
word_to_index: HashMap<String, u16>,
}
impl Dictionary65K {
pub fn new() -> Result<Self, Dictionary65KError> {
let wordlist_data = include_str!("../GOLD_WORDLIST.txt");
let words: Vec<String> = wordlist_data
.lines()
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
.collect();
if words.len() != WORD_COUNT {
return Err(Dictionary65KError::InvalidWordCount {
expected: WORD_COUNT,
actual: words.len(),
});
}
for word in words.iter() {
let len = word.len();
if len < 1 || len > 25 {
return Err(Dictionary65KError::SourceFileError);
}
}
let word_to_index = words
.iter()
.enumerate()
.map(|(i, word)| (word.clone(), i as u16))
.collect();
Ok(Self {
words,
word_to_index,
})
}
pub fn get_word(&self, index: u16) -> Result<&str, Dictionary65KError> {
self.words
.get(index as usize)
.map(|s| s.as_str())
.ok_or(Dictionary65KError::InvalidIndex(index, MAX_WORD_INDEX))
}
pub fn get_index(&self, word: &str) -> Result<u16, Dictionary65KError> {
self.word_to_index
.get(word)
.copied()
.ok_or_else(|| Dictionary65KError::UnknownWord(word.to_string()))
}
pub fn len(&self) -> usize {
self.words.len()
}
pub fn is_empty(&self) -> bool {
self.words.is_empty()
}
pub fn contains_word(&self, word: &str) -> bool {
self.word_to_index.contains_key(word)
}
pub fn stats(&self) -> Dictionary65KStats {
let mut length_distribution = HashMap::new();
let mut total_chars = 0;
for word in &self.words {
let len = word.len();
*length_distribution.entry(len).or_insert(0) += 1;
total_chars += len;
}
let min_length = self.words.iter().map(|w| w.len()).min().unwrap_or(0);
let max_length = self.words.iter().map(|w| w.len()).max().unwrap_or(0);
let avg_length = total_chars as f64 / self.words.len() as f64;
Dictionary65KStats {
total_words: self.words.len(),
min_length,
max_length,
avg_length,
length_distribution,
}
}
pub fn sample_words(&self, count: usize) -> Vec<&str> {
let step = self.words.len() / count.min(self.words.len());
self.words
.iter()
.step_by(step.max(1))
.take(count)
.map(|s| s.as_str())
.collect()
}
}
#[derive(Debug, Clone)]
pub struct Dictionary65KStats {
pub total_words: usize,
pub min_length: usize,
pub max_length: usize,
pub avg_length: f64,
pub length_distribution: HashMap<usize, usize>,
}
static GLOBAL_DICTIONARY: Lazy<Arc<Dictionary65K>> = Lazy::new(|| {
Arc::new(Dictionary65K::new().expect("Failed to initialize 65K dictionary"))
});
pub fn get_global_dictionary() -> Result<&'static Dictionary65K, Dictionary65KError> {
Ok(&**GLOBAL_DICTIONARY)
}
pub mod utils {
use super::*;
pub fn index_to_word(index: u16) -> Result<&'static str, Dictionary65KError> {
let dict = get_global_dictionary()?;
dict.get_word(index)
}
pub fn word_to_index(word: &str) -> Result<u16, Dictionary65KError> {
let dict = get_global_dictionary()?;
dict.get_index(word)
}
pub fn indices_to_words(indices: [u16; 3]) -> Result<[String; 3], Dictionary65KError> {
let dict = get_global_dictionary()?;
Ok([
dict.get_word(indices[0])?.to_string(),
dict.get_word(indices[1])?.to_string(),
dict.get_word(indices[2])?.to_string(),
])
}
pub fn words_to_indices(words: [&str; 3]) -> Result<[u16; 3], Dictionary65KError> {
let dict = get_global_dictionary()?;
Ok([
dict.get_index(words[0])?,
dict.get_index(words[1])?,
dict.get_index(words[2])?,
])
}
pub fn validate_words(words: &[&str]) -> Result<(), Dictionary65KError> {
let dict = get_global_dictionary()?;
for word in words {
dict.get_index(word)?;
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_dictionary_creation() {
let dict = Dictionary65K::new().unwrap();
assert_eq!(dict.len(), WORD_COUNT);
assert!(!dict.is_empty());
}
#[test]
fn test_word_lookup() {
let dict = Dictionary65K::new().unwrap();
let first_word = dict.get_word(0).unwrap();
assert!(!first_word.is_empty());
let last_word = dict.get_word(MAX_WORD_INDEX).unwrap();
assert!(!last_word.is_empty());
let index = dict.get_index(first_word).unwrap();
assert_eq!(index, 0);
let word = dict.get_word(index).unwrap();
assert_eq!(word, first_word);
}
#[test]
fn test_constants() {
assert_eq!(WORD_COUNT, 65_536);
assert_eq!(BITS_PER_WORD, 16);
assert_eq!(BITS_THREE_WORDS, 48);
assert_eq!(MAX_WORD_INDEX, 65_535);
}
#[test]
fn test_global_dictionary() {
let dict1 = get_global_dictionary().unwrap();
let dict2 = get_global_dictionary().unwrap();
assert_eq!(dict1.len(), dict2.len());
assert_eq!(dict1.get_word(0).unwrap(), dict2.get_word(0).unwrap());
}
#[test]
fn test_utility_functions() {
let word = utils::index_to_word(0).unwrap();
assert!(!word.is_empty());
let index = utils::word_to_index(word).unwrap();
assert_eq!(index, 0);
let indices = [0, 1, 2];
let words = utils::indices_to_words(indices).unwrap();
assert_eq!(words.len(), 3);
let words_refs: [&str; 3] = [&words[0], &words[1], &words[2]];
let recovered_indices = utils::words_to_indices(words_refs).unwrap();
assert_eq!(recovered_indices, indices);
}
#[test]
fn test_sample_words() {
let dict = Dictionary65K::new().unwrap();
let samples = dict.sample_words(10);
assert_eq!(samples.len(), 10);
for word in samples {
assert!(dict.contains_word(word));
}
}
}