use crate::{Error, Result};
use memmap2::Mmap;
use std::sync::Arc;
use super::DictionaryEntry;
use super::sys_dic::SysDic;
pub struct UnknownDictionary {
inner: SysDic,
}
impl std::fmt::Debug for UnknownDictionary {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("UnknownDictionary")
.field("inner", &self.inner)
.finish()
}
}
impl UnknownDictionary {
pub fn from_mmap(mmap: Arc<Mmap>) -> Result<Self> {
let inner = SysDic::from_mmap(mmap)?;
if inner.dict_type() != super::MECAB_UNK_DIC {
return Err(Error::InvalidDictionaryFormat(format!(
"Expected unknown dictionary (type=2), got type={}",
inner.dict_type()
)));
}
Ok(Self { inner })
}
pub fn lookup(&self, category_name: &str) -> Vec<DictionaryEntry> {
self.inner.common_prefix_search(category_name)
}
pub fn get_entries_for_category(&self, category_name: &str) -> Vec<DictionaryEntry> {
self.inner
.common_prefix_search(category_name)
.into_iter()
.filter(|e| e.length == category_name.len())
.collect()
}
pub fn charset(&self) -> &str {
self.inner.charset()
}
pub fn generate_entries(
&self,
category: super::CharCategory,
length: usize,
) -> Vec<DictionaryEntry> {
let category_name = match category {
super::CharCategory::Default => "DEFAULT",
super::CharCategory::Space => "SPACE",
super::CharCategory::Kanji => "KANJI",
super::CharCategory::Symbol => "SYMBOL",
super::CharCategory::Numeric => "NUMERIC",
super::CharCategory::Alpha => "ALPHA",
super::CharCategory::Hiragana => "HIRAGANA",
super::CharCategory::Katakana => "KATAKANA",
super::CharCategory::Kanjinumeric => "KANJINUMERIC",
super::CharCategory::Greek => "GREEK",
super::CharCategory::Cyrillic => "CYRILLIC",
};
self.get_entries_for_category(category_name)
.into_iter()
.map(|mut e| {
e.length = length;
e
})
.collect()
}
}
#[cfg(test)]
mod tests {
#[test]
fn test_unknown_dic_type() {
assert_eq!(super::super::MECAB_UNK_DIC, 2);
}
}