pub mod dictionary;
mod jp_common;
mod mapping;
mod mecab;
mod model;
mod njd;
#[cfg(test)]
mod tests;
use crate::errors::HaqumeiError;
use crate::ffi;
use crate::open_jtalk::{
jp_common::JpCommon,
model::MecabModel,
njd::{Njd, apply_plus_rules, njd_to_features},
};
use crate::utils::default_is_non_pause_symbol;
use crate::{NjdFeature, WordPhonemeDetail, WordPhonemeMap, WordPhonemePair};
use arc_swap::ArcSwap;
use mecab::Mecab;
use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
use std::cell::Cell;
use std::ffi::{CStr, CString, c_char};
use std::marker::PhantomData;
use std::path::Path;
use std::sync::{Arc, LazyLock};
pub use dictionary::{Dictionary, MecabDictIndexCompiler};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
pub static GLOBAL_MECAB_DICTIONARY: LazyLock<ArcSwap<Dictionary>> = LazyLock::new(|| {
#[cfg(feature = "embed-dictionary")]
{
let default_dict = Dictionary::from_embedded()
.expect("Failed to load embedded dictionary. This should not happen.");
ArcSwap::from(Arc::new(default_dict))
}
#[cfg(not(feature = "embed-dictionary"))]
{
let dummy_model = MecabModel::new_uninitialized();
let dummy_dict = Dictionary {
model: Arc::new(dummy_model),
dict_dir: std::path::PathBuf::new(),
};
ArcSwap::from(Arc::new(dummy_dict))
}
});
unsafe impl Send for Mecab {}
unsafe impl Send for Njd {}
unsafe impl Send for JpCommon {}
unsafe impl Send for MecabModel {}
unsafe impl Sync for MecabModel {}
pub fn update_global_dictionary(new_dict: Dictionary) {
GLOBAL_MECAB_DICTIONARY.store(Arc::new(new_dict));
}
pub fn unset_user_dictionary() -> Result<(), HaqumeiError> {
GLOBAL_MECAB_DICTIONARY.store(Arc::new(Dictionary::from_path(
&GLOBAL_MECAB_DICTIONARY.load_full().dict_dir,
None,
)?));
Ok(())
}
#[derive(Debug)]
pub struct OpenJTalk {
pub(crate) mecab: Mecab,
pub(crate) njd: Njd,
pub(crate) jp_common: JpCommon,
pub(crate) dict: Option<Arc<Dictionary>>,
_marker: PhantomData<Cell<()>>,
}
#[derive(Debug, Clone, PartialEq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct MecabMorph {
pub surface: String,
pub feature: String,
pub left_id: u16,
pub right_id: u16,
pub pos_id: u16,
pub word_cost: i16,
pub is_unknown: bool,
pub is_ignored: bool,
}
impl OpenJTalk {
pub fn new() -> Result<Self, HaqumeiError> {
let initial_dict = GLOBAL_MECAB_DICTIONARY.load_full();
if !initial_dict.model.is_initialized() {
return Err(HaqumeiError::GlobalDictionaryNotInitialized);
}
let mecab = Mecab::from_model(&initial_dict.model)?;
let njd = Njd::new()?;
let jp_common = JpCommon::new()?;
Ok(Self {
mecab,
njd,
jp_common,
dict: Some(initial_dict),
_marker: PhantomData,
})
}
pub(crate) fn ensure_dictionary_is_latest(&mut self) -> Result<(), HaqumeiError> {
let latest_dict = GLOBAL_MECAB_DICTIONARY.load();
if let Some(active_dict) = &self.dict
&& !Arc::ptr_eq(active_dict, &*latest_dict)
{
log::info!("OpenJTalk instance detected a dictionary update. Re-initializing Mecab.");
let new_mecab = Mecab::from_model(&latest_dict.model)?;
self.dict = Some(latest_dict.clone());
self.mecab = new_mecab;
}
Ok(())
}
pub fn from_dictionary(dict: Dictionary) -> Result<Self, HaqumeiError> {
let mecab = Mecab::from_model(&dict.model)?;
let njd = Njd::new()?;
let jp_common = JpCommon::new()?;
Ok(Self {
mecab,
njd,
jp_common,
dict: Some(Arc::new(dict)),
_marker: PhantomData,
})
}
pub fn from_shared_dictionary(dict: Arc<Dictionary>) -> Result<Self, HaqumeiError> {
let mecab = Mecab::from_model(&dict.model)?;
let njd = Njd::new()?;
let jp_common = JpCommon::new()?;
Ok(Self {
mecab,
njd,
jp_common,
dict: Some(dict),
_marker: PhantomData,
})
}
pub fn from_path<P: AsRef<Path>>(dict_dir: P) -> Result<Self, HaqumeiError> {
Self::from_path_inner(dict_dir, None::<P>)
}
pub fn from_path_with_userdict<P: AsRef<Path>, Q: AsRef<Path>>(
dict_dir: P,
user_dict: Q,
) -> Result<Self, HaqumeiError> {
Self::from_path_inner(dict_dir, Some(user_dict))
}
fn from_path_inner<P: AsRef<Path>, Q: AsRef<Path>>(
dict_dir: P,
user_dict: Option<Q>,
) -> Result<Self, HaqumeiError> {
let mecab = Mecab::new()?;
let njd = Njd::new()?;
let jp_common = JpCommon::new()?;
let dict_dir = dict_dir.as_ref();
let user_dict = user_dict.as_ref();
if !dict_dir.exists() {
return Err(HaqumeiError::DictionaryNotFound {
path: dict_dir.to_path_buf(),
});
}
if let Some(user_dict) = user_dict
&& !user_dict.as_ref().exists()
{
return Err(HaqumeiError::DictionaryNotFound {
path: dict_dir.to_path_buf(),
});
}
let path_to_cstring = |p: &Path| -> Result<CString, HaqumeiError> {
let p = p.canonicalize()?;
let path_str = p.as_os_str();
#[cfg(unix)]
{
use std::os::unix::ffi::OsStrExt;
CString::new(path_str.as_bytes()).map_err(|_| {
HaqumeiError::InvalidDictionaryPath(path_str.to_string_lossy().to_string())
})
}
#[cfg(windows)]
{
let mut s = path_str.to_str().ok_or_else(|| {
HaqumeiError::InvalidDictionaryPath(path_str.to_string_lossy().to_string())
})?;
if let Some(stripped) = s.strip_prefix(r"\\?\") {
s = stripped;
}
CString::new(s).map_err(|_| HaqumeiError::InvalidDictionaryPath(s.to_string()))
}
};
let c_dict_dir = path_to_cstring(dict_dir)?;
let c_user_dict: Option<CString> = user_dict
.as_ref()
.map(|p| path_to_cstring(p.as_ref()))
.transpose()?;
let result = unsafe {
if let Some(user_dict) = c_user_dict.as_ref().filter(|s| !s.to_bytes().is_empty()) {
ffi::Mecab_load_with_userdic(
mecab.inner.as_ptr(),
c_dict_dir.as_ptr() as *mut c_char,
user_dict.as_ptr() as *mut c_char,
)
} else {
ffi::Mecab_load(mecab.inner.as_ptr(), c_dict_dir.as_ptr() as *mut c_char)
}
};
if result != 1 {
return Err(HaqumeiError::MecabLoadError);
}
Ok(Self {
mecab,
njd,
jp_common,
dict: None,
_marker: PhantomData,
})
}
pub fn run_frontend(&mut self, text: &str) -> Result<Vec<NjdFeature>, HaqumeiError> {
self.ensure_dictionary_is_latest()?;
if text.is_empty() {
return Ok(Vec::new());
}
let mecab_features = self.run_mecab(text)?;
self.run_njd_from_mecab(&mecab_features)
}
pub fn run_frontend_detailed(
&mut self,
text: &str,
) -> Result<(Vec<NjdFeature>, Vec<MecabMorph>), HaqumeiError> {
self.ensure_dictionary_is_latest()?;
if text.is_empty() {
return Ok((Vec::new(), Vec::new()));
}
let mecab_morphs = self.run_mecab_detailed(text)?;
Ok((
self.run_njd_from_mecab(
mecab_morphs
.iter()
.filter(|morph| !morph.is_ignored)
.map(|morph| &morph.feature),
)?,
mecab_morphs,
))
}
pub fn extract_fullcontext(&mut self, text: &str) -> Result<Vec<String>, HaqumeiError> {
let njd_features = self.run_frontend(text)?;
self.make_label(&njd_features)
}
pub fn g2p(&mut self, text: &str) -> Result<Vec<String>, HaqumeiError> {
self.ensure_dictionary_is_latest()?;
if text.is_empty() {
return Ok(Vec::new());
}
let mecab_features = self.run_mecab(text.as_ref())?;
let njd_features = self.run_njd_from_mecab(&mecab_features)?;
if njd_features.is_empty() {
return Ok(Vec::new());
}
self.extract_phonemes(&njd_features)
}
pub fn g2p_detailed(&mut self, text: &str) -> Result<Vec<String>, HaqumeiError> {
let detailed_mapping = self.g2p_mapping(text)?;
let mut result_phonemes = Vec::new();
for map in detailed_mapping {
result_phonemes.extend(map.phonemes);
}
Ok(result_phonemes)
}
pub fn g2p_kana(&mut self, text: &str) -> Result<String, HaqumeiError> {
self.ensure_dictionary_is_latest()?;
if text.is_empty() {
return Ok(String::new());
}
let mecab_features = self.run_mecab(text.as_ref())?;
let njd_features = self.run_njd_from_mecab(&mecab_features)?;
if njd_features.is_empty() {
return Ok(String::new());
}
let kana_string: String = njd_features
.iter()
.map(|f| {
let p = if f.pos == "記号" {
&f.string
} else {
&f.pron
};
p.replace('’', "")
})
.collect();
Ok(kana_string)
}
pub fn g2p_kana_per_word(&mut self, text: &str) -> Result<Vec<String>, HaqumeiError> {
if text.is_empty() {
return Ok(Vec::new());
}
let features = self.run_frontend(text.as_ref())?;
let kana_list: Vec<String> = features
.iter()
.map(|f| {
let p = if f.pos == "記号" {
&f.string
} else {
&f.pron
};
p.replace('’', "")
})
.collect();
Ok(kana_list)
}
pub fn g2p_per_word(&mut self, text: &str) -> Result<Vec<Vec<String>>, HaqumeiError> {
let mapping = self.g2p_pairs(text)?;
Ok(mapping.into_iter().map(|m| m.phonemes).collect())
}
pub fn g2p_pairs(&mut self, text: &str) -> Result<Vec<WordPhonemePair>, HaqumeiError> {
self.ensure_dictionary_is_latest()?;
if text.is_empty() {
return Ok(Vec::new());
}
let mecab_features = self.run_mecab(text.as_ref())?;
let njd_features = self.run_njd_from_mecab(&mecab_features)?;
if njd_features.is_empty() {
return Ok(Vec::new());
}
self.g2p_pairs_inner(&njd_features, default_is_non_pause_symbol)
}
pub fn g2p_mapping(&mut self, text: &str) -> Result<Vec<WordPhonemeMap>, HaqumeiError> {
self.ensure_dictionary_is_latest()?;
if text.is_empty() {
return Ok(Vec::new());
}
let morphs = self.run_mecab_detailed(text)?;
let njd_features = self.run_njd_from_mecab(
morphs
.iter()
.filter(|m| !m.is_ignored)
.map(|morph| morph.feature.as_str()),
)?;
if njd_features.is_empty() {
return Ok(Vec::new());
}
let pairs = self.g2p_pairs_inner(&njd_features, default_is_non_pause_symbol)?;
self.make_phoneme_mapping(morphs, pairs)
}
pub fn g2p_mapping_detailed(
&mut self,
text: &str,
) -> Result<Vec<WordPhonemeDetail>, HaqumeiError> {
if text.is_empty() {
return Ok(Vec::new());
}
let (njd_features, morphs) = self.run_frontend_detailed(text)?;
let mapping = self.g2p_mapping_inner(&njd_features, default_is_non_pause_symbol)?;
self.make_phoneme_mapping(morphs, mapping)
}
const BUFFER_SIZE: usize = 16384;
pub fn run_mecab(&mut self, text: &str) -> Result<Vec<String>, HaqumeiError> {
self.ensure_dictionary_is_latest()?;
let c_text = CString::new(text)?;
let mut buffer = vec![0u8; Self::BUFFER_SIZE];
let result = unsafe {
ffi::text2mecab(
buffer.as_mut_ptr() as *mut i8,
Self::BUFFER_SIZE,
c_text.as_ptr(),
)
};
match result {
ffi::text2mecab_result_t_TEXT2MECAB_RESULT_SUCCESS => {}
ffi::text2mecab_result_t_TEXT2MECAB_RESULT_RANGE_ERROR => {
return Err(HaqumeiError::Text2MecabError(
"Text is too long".to_string(),
));
}
ffi::text2mecab_result_t_TEXT2MECAB_RESULT_INVALID_ARGUMENT => {
return Err(HaqumeiError::Text2MecabError(
"Invalid argument for text2mecab".to_string(),
));
}
_ => {
return Err(HaqumeiError::Text2MecabError(format!(
"Unknown error from text2mecab: {}",
result
)));
}
}
let result =
unsafe { ffi::Mecab_analysis(self.mecab.inner.as_ptr(), buffer.as_ptr() as *const i8) };
if result != 1 {
return Err(HaqumeiError::MecabError(
"Mecab_analysis failed to parse the text".to_string(),
));
}
let mut result_vec = Vec::new();
unsafe {
let mecab_ptr = self.mecab.inner.as_ptr();
let lattice = (*mecab_ptr).lattice as *mut ffi::mecab_lattice_t;
let mut node = ffi::mecab_lattice_get_bos_node(lattice);
while !node.is_null() {
let stat = (*node).stat;
if stat != 2 && stat != 3 {
let feat_ptr = (*node).feature;
if !feat_ptr.is_null() {
let c_feature = CStr::from_ptr(feat_ptr);
let feature_str = c_feature.to_string_lossy();
if !feature_str.contains("記号,空白") {
let surface_ptr = (*node).surface;
let length = (*node).length as usize;
let surface = if !surface_ptr.is_null() && length > 0 {
let bytes =
std::slice::from_raw_parts(surface_ptr as *const u8, length);
String::from_utf8_lossy(bytes)
} else {
std::borrow::Cow::Borrowed("")
};
result_vec.push(format!("{},{}", surface, feature_str));
}
}
}
node = (*node).next;
}
ffi::Mecab_refresh(mecab_ptr);
}
Ok(result_vec)
}
pub fn run_mecab_detailed(&mut self, text: &str) -> Result<Vec<MecabMorph>, HaqumeiError> {
self.ensure_dictionary_is_latest()?;
let c_text = CString::new(text)?;
let mut buffer = vec![0u8; Self::BUFFER_SIZE];
let result = unsafe {
ffi::text2mecab(
buffer.as_mut_ptr() as *mut i8,
Self::BUFFER_SIZE,
c_text.as_ptr(),
)
};
match result {
ffi::text2mecab_result_t_TEXT2MECAB_RESULT_SUCCESS => {}
ffi::text2mecab_result_t_TEXT2MECAB_RESULT_RANGE_ERROR => {
return Err(HaqumeiError::Text2MecabError(
"Text is too long".to_string(),
));
}
ffi::text2mecab_result_t_TEXT2MECAB_RESULT_INVALID_ARGUMENT => {
return Err(HaqumeiError::Text2MecabError(
"Invalid argument for text2mecab".to_string(),
));
}
_ => {
return Err(HaqumeiError::Text2MecabError(format!(
"Unknown error from text2mecab: {}",
result
)));
}
}
let result =
unsafe { ffi::Mecab_analysis(self.mecab.inner.as_ptr(), buffer.as_ptr() as *const i8) };
if result != 1 {
return Err(HaqumeiError::MecabError(
"Mecab_analysis failed to parse the text".to_string(),
));
}
let morphs = unsafe {
let mecab_ptr = self.mecab.inner.as_ptr();
let lattice = (*mecab_ptr).lattice as *mut ffi::mecab_lattice_t;
let mut node = ffi::mecab_lattice_get_bos_node(lattice);
let mut results = Vec::new();
while !node.is_null() {
let stat = (*node).stat;
if stat != 2 && stat != 3 {
let surface_ptr = (*node).surface;
let length = (*node).length as usize;
let surface = if !surface_ptr.is_null() && length > 0 {
let bytes = std::slice::from_raw_parts(surface_ptr as *const u8, length);
String::from_utf8_lossy(bytes)
} else {
std::borrow::Cow::Borrowed("")
};
let feat_ptr = (*node).feature;
let raw_feature = if !feat_ptr.is_null() {
CStr::from_ptr(feat_ptr).to_string_lossy()
} else {
std::borrow::Cow::Borrowed("")
};
let compatible_feature = format!("{},{}", surface, raw_feature);
let is_unknown = stat == 1;
let is_ignored = raw_feature.contains("記号,空白");
results.push(MecabMorph {
surface: surface.to_string(),
feature: compatible_feature,
left_id: (*node).lcAttr,
right_id: (*node).rcAttr,
pos_id: (*node).posid,
word_cost: (*node).wcost,
is_unknown,
is_ignored,
});
}
node = (*node).next;
}
results
};
unsafe {
ffi::Mecab_refresh(self.mecab.inner.as_ptr());
}
Ok(morphs)
}
pub(crate) fn run_njd_from_mecab<'a, I>(
&mut self,
mecab_features: I,
) -> Result<Vec<NjdFeature>, HaqumeiError>
where
I: IntoIterator,
I::Item: AsRef<str> + 'a,
{
let c_strings: Vec<CString> = mecab_features
.into_iter()
.map(|s| CString::new(s.as_ref()))
.collect::<Result<Vec<_>, _>>()?;
if c_strings.is_empty() {
return Ok(Vec::new());
}
let mut c_string_pointers: Vec<*const c_char> =
c_strings.iter().map(|cs| cs.as_ptr()).collect();
unsafe {
ffi::mecab2njd(
self.njd.inner.as_mut(),
c_string_pointers.as_mut_ptr() as *mut *mut c_char,
c_string_pointers.len() as i32,
);
ffi::njd_set_pronunciation(self.njd.inner.as_mut());
}
let mut features = njd_to_features(&self.njd);
apply_plus_rules(&mut features);
Self::features_to_njd(&features, &mut self.njd)?;
unsafe {
ffi::njd_set_digit(self.njd.inner.as_mut());
ffi::njd_set_accent_phrase(self.njd.inner.as_mut());
ffi::njd_set_accent_type(self.njd.inner.as_mut());
ffi::njd_set_unvoiced_vowel(self.njd.inner.as_mut());
ffi::njd_set_long_vowel(self.njd.inner.as_mut());
}
let final_features = njd_to_features(&self.njd);
unsafe {
ffi::NJD_refresh(self.njd.inner.as_mut());
}
Ok(final_features)
}
pub fn make_label(&mut self, features: &[NjdFeature]) -> Result<Vec<String>, HaqumeiError> {
Self::features_to_njd(features, &mut self.njd)?;
let (label_size, label_feature_ptr) = unsafe {
ffi::njd2jpcommon(self.jp_common.inner.as_mut(), self.njd.inner.as_mut());
ffi::JPCommon_make_label(self.jp_common.inner.as_mut());
let size = ffi::JPCommon_get_label_size(self.jp_common.inner.as_mut());
let ptr = ffi::JPCommon_get_label_feature(self.jp_common.inner.as_mut());
(size, ptr)
};
if label_feature_ptr.is_null() {
return Ok(Vec::new());
}
let labels = unsafe {
let mut result = Vec::with_capacity(label_size as usize);
for i in 0..(label_size as isize) {
let label_ptr = *label_feature_ptr.offset(i);
let c_label = CStr::from_ptr(label_ptr);
result.push(c_label.to_string_lossy().into_owned());
}
result
};
unsafe {
ffi::JPCommon_refresh(self.jp_common.inner.as_mut());
ffi::NJD_refresh(self.njd.inner.as_mut());
}
Ok(labels)
}
pub fn extract_phonemes(
&mut self,
features: &[NjdFeature],
) -> Result<Vec<String>, HaqumeiError> {
if features.is_empty() {
return Ok(Vec::new());
}
let result = unsafe {
Self::features_to_njd(features, &mut self.njd)?;
let jp = self.jp_common.inner.as_mut();
let njd = self.njd.inner.as_mut();
ffi::njd2jpcommon(jp, njd);
if !jp.label.is_null() {
ffi::JPCommonLabel_clear(jp.label);
} else {
let ptr = libc::calloc(1, std::mem::size_of::<ffi::JPCommonLabel>());
if ptr.is_null() {
return Err(HaqumeiError::AllocationError("ffi::JPCommonLabel"));
}
jp.label = ptr as *mut ffi::JPCommonLabel;
}
ffi::JPCommonLabel_initialize(jp.label);
let mut node = jp.head;
while !node.is_null() {
ffi::JPCommonLabel_push_word(
jp.label,
ffi::JPCommonNode_get_pron(node),
ffi::JPCommonNode_get_pos(node),
ffi::JPCommonNode_get_ctype(node),
ffi::JPCommonNode_get_cform(node),
ffi::JPCommonNode_get_acc(node),
ffi::JPCommonNode_get_chain_flag(node),
);
node = (*node).next;
}
let mut result_vec = Vec::with_capacity(features.len() * 3);
let mut p = (*jp.label).phoneme_head;
while !p.is_null() {
let s_ptr = (*p).phoneme;
if !s_ptr.is_null() {
let s = CStr::from_ptr(s_ptr).to_string_lossy().into_owned();
result_vec.push(s);
}
p = (*p).next;
}
ffi::JPCommon_refresh(jp);
ffi::NJD_refresh(self.njd.inner.as_mut());
result_vec
};
Ok(result)
}
pub(crate) fn features_to_njd(
features: &[NjdFeature],
njd: &mut Njd,
) -> Result<(), HaqumeiError> {
unsafe {
ffi::NJD_clear(njd.inner.as_mut());
}
for feature in features {
let c_string = CString::new(feature.string.as_str())?;
let c_pos = CString::new(feature.pos.as_str())?;
let c_pos_group1 = CString::new(feature.pos_group1.as_str())?;
let c_pos_group2 = CString::new(feature.pos_group2.as_str())?;
let c_pos_group3 = CString::new(feature.pos_group3.as_str())?;
let c_ctype = CString::new(feature.ctype.as_str())?;
let c_cform = CString::new(feature.cform.as_str())?;
let c_orig = CString::new(feature.orig.as_str())?;
let c_read = CString::new(feature.read.as_str())?;
let c_pron = CString::new(feature.pron.as_str())?;
let c_chain_rule = CString::new(feature.chain_rule.as_str())?;
unsafe {
let node =
libc::calloc(1, std::mem::size_of::<ffi::NJDNode>()) as *mut ffi::NJDNode;
if node.is_null() {
return Err(HaqumeiError::AllocationError("ffi::NJDNode"));
}
ffi::NJDNode_initialize(node);
ffi::NJDNode_set_string(node, c_string.as_ptr());
ffi::NJDNode_set_pos(node, c_pos.as_ptr());
ffi::NJDNode_set_pos_group1(node, c_pos_group1.as_ptr());
ffi::NJDNode_set_pos_group2(node, c_pos_group2.as_ptr());
ffi::NJDNode_set_pos_group3(node, c_pos_group3.as_ptr());
ffi::NJDNode_set_ctype(node, c_ctype.as_ptr());
ffi::NJDNode_set_cform(node, c_cform.as_ptr());
ffi::NJDNode_set_orig(node, c_orig.as_ptr());
ffi::NJDNode_set_read(node, c_read.as_ptr());
ffi::NJDNode_set_pron(node, c_pron.as_ptr());
ffi::NJDNode_set_acc(node, feature.acc);
ffi::NJDNode_set_mora_size(node, feature.mora_size);
ffi::NJDNode_set_chain_rule(node, c_chain_rule.as_ptr());
ffi::NJDNode_set_chain_flag(node, feature.chain_flag);
ffi::NJD_push_node(njd.inner.as_mut(), node);
}
}
Ok(())
}
impl_batch_method_openjtalk!(
run_frontend_batch => run_frontend -> Vec<NjdFeature>
);
impl_batch_method_openjtalk!(
run_frontend_detailed_batch => run_frontend_detailed -> (Vec<NjdFeature>, Vec<MecabMorph>)
);
impl_batch_method_openjtalk!(
g2p_batch => g2p -> Vec<String>
);
impl_batch_method_openjtalk!(
g2p_detailed_batch => g2p_detailed -> Vec<String>
);
impl_batch_method_openjtalk!(
g2p_kana_batch => g2p_kana -> String
);
impl_batch_method_openjtalk!(
g2p_kana_per_word_batch => g2p_kana_per_word -> Vec<String>
);
impl_batch_method_openjtalk!(
g2p_per_word_batch => g2p_per_word -> Vec<Vec<String>>
);
impl_batch_method_openjtalk!(
g2p_pairs_batch => g2p_pairs -> Vec<WordPhonemePair>
);
impl_batch_method_openjtalk!(
g2p_mapping_batch => g2p_mapping -> Vec<WordPhonemeMap>
);
impl_batch_method_openjtalk!(
g2p_mapping_detailed_batch => g2p_mapping_detailed -> Vec<WordPhonemeDetail>
);
impl_batch_method_openjtalk!(
extract_fullcontext_batch => extract_fullcontext -> Vec<String>
);
}
pub fn build_mecab_dictionary<P: AsRef<Path>>(
path: P,
) -> Result<(), dictionary::DictCompilerError> {
MecabDictIndexCompiler::new()
.dict_dir(&path)
.out_dir(&path)
.run()
}