use std::fs::File;
use std::io::Read;
use std::path::Path;
use boolinator::Boolinator;
use byteorder::{LittleEndian, ReadBytesExt};
use flate2::read::GzDecoder;
use dawg::units;
#[derive(Debug, Clone)]
pub struct Dictionary {
pub root: u32,
pub units: Vec<u32>,
}
impl Dictionary {
pub fn from_file(p: &Path) -> Self {
Self::from_reader(&mut GzDecoder::new(File::open(p).unwrap()))
}
pub fn from_reader<T>(reader: &mut T) -> Self
where
T: Read,
{
let size = reader.read_u32::<LittleEndian>().unwrap();
let units = (0..size)
.map(|_| reader.read_u32::<LittleEndian>().unwrap())
.collect();
let root = 0;
Dictionary { root, units }
}
pub fn has_value(&self, index: u32) -> bool {
units::has_leaf(self.units[index as usize])
}
pub fn value(&self, index: u32) -> u32 {
let offset = units::offset(self.units[index as usize]);
let value_index = (index ^ offset) & units::PRECISION_MASK;
units::value(self.units[value_index as usize])
}
pub fn try_value(&self, index: u32) -> Option<u32> {
self.has_value(index).as_some_from(|| self.value(index))
}
pub fn contains(&self, key: &str) -> bool {
self.follow_bytes(key, self.root)
.map(|index| self.has_value(index))
.unwrap_or(false)
}
pub fn find(&self, key: &str) -> Option<u32> {
self.follow_bytes(key, self.root)
.and_then(|index| self.try_value(index))
}
pub fn follow_char(&self, label: u8, index: u32) -> Option<u32> {
trace!(
r#"Dictionary::follow_char() label: {:x}, index = {:x} "#,
label,
index
);
let unit = self.units[index as usize];
trace!(r#"Dictionary::follow_char() unit: {:x} "#, unit);
let offset = units::offset(unit);
trace!(r#"Dictionary::follow_char() offset: {:x} "#, offset);
let next_index = (index ^ offset ^ u32::from(label)) & units::PRECISION_MASK;
let leaf_label = units::label(self.units[next_index as usize]);
trace!(r#"Dictionary::follow_char() leaf_label: {:x} "#, leaf_label);
if leaf_label == u32::from(label) {
return Some(next_index);
}
None
}
pub fn follow_bytes(&self, key: &str, mut index: u32) -> Option<u32> {
for &ch in key.as_bytes() {
index = self.follow_char(ch, index)?;
}
Some(index)
}
}