pub(crate) const DATA: &[u8] = include_bytes!("dictionary.bin");
pub(crate) const SIZE_BITS_BY_LENGTH: [u8; 32] = [
0, 0, 0, 0, 10, 10, 11, 11, 10, 10, 10, 10, 10, 9, 9, 8, 7, 7, 8, 7, 7, 6, 6, 5, 5, 0, 0, 0, 0,
0, 0, 0,
];
pub(crate) const OFFSETS_BY_LENGTH: [u32; 32] = [
0, 0, 0, 0, 0, 4096, 9216, 21504, 35840, 44032, 53248, 63488, 74752, 87040, 93696, 100864,
104704, 106752, 108928, 113536, 115968, 118528, 119872, 121280, 122016, 122784, 122784, 122784,
122784, 122784, 122784, 122784,
];
pub(crate) const MIN_DICTIONARY_WORD_LENGTH: usize = 4;
pub(crate) const MAX_DICTIONARY_WORD_LENGTH: usize = 24;
pub(crate) fn word(len: usize, idx: u32) -> Option<&'static [u8]> {
if !(MIN_DICTIONARY_WORD_LENGTH..=MAX_DICTIONARY_WORD_LENGTH).contains(&len) {
return None;
}
let bits = SIZE_BITS_BY_LENGTH[len];
if bits == 0 {
return None;
}
let count = 1u32 << bits;
if idx >= count {
return None;
}
let off = OFFSETS_BY_LENGTH[len] as usize + (idx as usize) * len;
let end = off + len;
if end > DATA.len() {
return None;
}
Some(&DATA[off..end])
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn dictionary_size_is_canonical() {
assert_eq!(DATA.len(), 122_784);
}
#[test]
fn first_word_is_time() {
let w = word(4, 0).unwrap();
assert_eq!(w, b"time");
}
#[test]
fn word_bounds_match_size_bits() {
for (len, &bits_u8) in SIZE_BITS_BY_LENGTH
.iter()
.enumerate()
.take(MAX_DICTIONARY_WORD_LENGTH + 1)
.skip(MIN_DICTIONARY_WORD_LENGTH)
{
let bits = bits_u8 as u32;
if bits == 0 {
continue;
}
let count = 1u32 << bits;
let last = word(len, count - 1).unwrap();
assert_eq!(last.len(), len);
assert!(word(len, count).is_none());
}
}
}