pub mod dictionary;
mod jp_common;
mod mecab;
mod model;
mod njd;
#[cfg(test)]
mod tests;
use crate::errors::HaqumeiError;
use crate::ffi;
use crate::open_jtalk::{
jp_common::JpCommon,
model::MecabModel,
njd::{Njd, apply_plus_rules, njd_to_features},
};
use crate::{NjdFeature, WordPhonemeDetail, WordPhonemeMap};
use arc_swap::ArcSwap;
use mecab::Mecab;
use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
use std::cell::Cell;
use std::collections::HashMap;
use std::ffi::{CStr, CString, c_char};
use std::marker::PhantomData;
use std::path::Path;
use std::sync::{Arc, LazyLock};
pub use dictionary::{Dictionary, MecabDictIndexCompiler};
pub static GLOBAL_MECAB_DICTIONARY: LazyLock<ArcSwap<Dictionary>> = LazyLock::new(|| {
#[cfg(feature = "embed-dictionary")]
{
let default_dict = Dictionary::from_embedded()
.expect("Failed to load embedded dictionary. This should not happen.");
ArcSwap::from(Arc::new(default_dict))
}
#[cfg(not(feature = "embed-dictionary"))]
{
let dummy_model = MecabModel::new_uninitialized();
let dummy_dict = Dictionary {
model: Arc::new(dummy_model),
dict_dir: std::path::PathBuf::new(),
};
ArcSwap::from(Arc::new(dummy_dict))
}
});
unsafe impl Send for Mecab {}
unsafe impl Send for Njd {}
unsafe impl Send for JpCommon {}
unsafe impl Send for MecabModel {}
unsafe impl Sync for MecabModel {}
pub fn update_global_dictionary(new_dict: Dictionary) {
GLOBAL_MECAB_DICTIONARY.store(Arc::new(new_dict));
}
pub fn unset_user_dictionary() -> Result<(), HaqumeiError> {
GLOBAL_MECAB_DICTIONARY.store(Arc::new(Dictionary::from_path(
&GLOBAL_MECAB_DICTIONARY.load_full().dict_dir,
None,
)?));
Ok(())
}
#[derive(Debug)]
pub struct OpenJTalk {
pub(crate) mecab: Mecab,
pub(crate) njd: Njd,
pub(crate) jp_common: JpCommon,
pub(crate) dict: Option<Arc<Dictionary>>,
_marker: PhantomData<Cell<()>>,
}
#[derive(Debug, Clone, PartialEq)]
pub struct MecabMorph {
pub surface: String,
pub feature: String,
pub is_unknown: bool,
pub is_ignored: bool,
}
impl OpenJTalk {
pub fn new() -> Result<Self, HaqumeiError> {
let initial_dict = GLOBAL_MECAB_DICTIONARY.load_full();
if !initial_dict.model.is_initialized() {
return Err(HaqumeiError::GlobalDictionaryNotInitialized);
}
let mecab = Mecab::from_model(&initial_dict.model)?;
let njd = Njd::new()?;
let jp_common = JpCommon::new()?;
Ok(Self {
mecab,
njd,
jp_common,
dict: Some(initial_dict),
_marker: PhantomData,
})
}
pub(crate) fn ensure_dictionary_is_latest(&mut self) -> Result<(), HaqumeiError> {
let latest_dict = GLOBAL_MECAB_DICTIONARY.load();
if let Some(active_dict) = &self.dict
&& !Arc::ptr_eq(active_dict, &*latest_dict)
{
log::info!("OpenJTalk instance detected a dictionary update. Re-initializing Mecab.");
let new_mecab = Mecab::from_model(&latest_dict.model)?;
self.dict = Some(latest_dict.clone());
self.mecab = new_mecab;
}
Ok(())
}
pub fn from_dictonary(dict: Dictionary) -> Result<Self, HaqumeiError> {
let mecab = Mecab::from_model(&dict.model)?;
let njd = Njd::new()?;
let jp_common = JpCommon::new()?;
Ok(Self {
mecab,
njd,
jp_common,
dict: Some(Arc::new(dict)),
_marker: PhantomData,
})
}
pub fn from_path<P: AsRef<Path>>(
dict_dir: P,
user_dict: Option<P>,
) -> Result<Self, HaqumeiError> {
let mecab = Mecab::new()?;
let njd = Njd::new()?;
let jp_common = JpCommon::new()?;
let path_to_cstring = |p: &Path| -> Result<CString, HaqumeiError> {
let path_str = p.to_str().ok_or_else(|| {
HaqumeiError::InvalidDictionaryPath(p.to_string_lossy().into_owned())
})?;
CString::new(path_str)
.map_err(|_| HaqumeiError::InvalidDictionaryPath(path_str.to_string()))
};
let c_dict_dir = path_to_cstring(dict_dir.as_ref())?;
let c_user_dict: Option<CString> = user_dict
.as_ref()
.map(|p| path_to_cstring(p.as_ref()))
.transpose()?;
let result = unsafe {
if let Some(user_dict) = c_user_dict.as_ref().filter(|s| !s.to_bytes().is_empty()) {
ffi::Mecab_load_with_userdic(
mecab.inner.as_ptr(),
c_dict_dir.as_ptr() as *mut c_char,
user_dict.as_ptr() as *mut c_char,
)
} else {
ffi::Mecab_load(mecab.inner.as_ptr(), c_dict_dir.as_ptr() as *mut c_char)
}
};
if result != 1 {
return Err(HaqumeiError::MecabLoadError);
}
Ok(Self {
mecab,
njd,
jp_common,
dict: None,
_marker: PhantomData,
})
}
pub fn from_shared_dictionary(dict: Arc<Dictionary>) -> Result<Self, HaqumeiError> {
let mecab = Mecab::from_model(&dict.model)?;
let njd = Njd::new()?;
let jp_common = JpCommon::new()?;
Ok(Self {
mecab,
njd,
jp_common,
dict: Some(dict),
_marker: PhantomData,
})
}
pub fn run_frontend(&mut self, text: &str) -> Result<Vec<NjdFeature>, HaqumeiError> {
self.ensure_dictionary_is_latest()?;
let mecab_features = self.run_mecab(text)?;
self.run_njd_from_mecab(&mecab_features)
}
pub fn extract_fullcontext(&mut self, text: &str) -> Result<Vec<String>, HaqumeiError> {
let njd_features = self.run_frontend(text)?;
self.make_label(&njd_features)
}
pub fn g2p(&mut self, text: &str) -> Result<Vec<String>, HaqumeiError> {
self.ensure_dictionary_is_latest()?;
let mecab_features = self.run_mecab(text.as_ref())?;
let njd_features = self.run_njd_from_mecab(&mecab_features)?;
if njd_features.is_empty() {
return Ok(Vec::new());
}
self.extract_phonemes(&njd_features)
}
pub fn g2p_detailed(&mut self, text: &str) -> Result<Vec<String>, HaqumeiError> {
let detailed_mapping = self.g2p_mapping_detailed(text)?;
let mut result_phonemes = Vec::new();
for map in detailed_mapping {
result_phonemes.extend(map.phonemes);
}
Ok(result_phonemes)
}
pub fn g2p_kana(&mut self, text: &str) -> Result<String, HaqumeiError> {
self.ensure_dictionary_is_latest()?;
let mecab_features = self.run_mecab(text.as_ref())?;
let njd_features = self.run_njd_from_mecab(&mecab_features)?;
if njd_features.is_empty() {
return Ok(String::new());
}
let kana_string: String = njd_features
.iter()
.map(|f| {
let p = if f.pos == "記号" {
&f.string
} else {
&f.pron
};
p.replace('’', "")
})
.collect();
Ok(kana_string)
}
pub fn g2p_per_word(&mut self, text: &str) -> Result<Vec<Vec<String>>, HaqumeiError> {
let mapping = self.g2p_mapping(text)?;
let result = mapping.into_iter().map(|m| m.phonemes).collect();
Ok(result)
}
pub fn g2p_mapping(&mut self, text: &str) -> Result<Vec<WordPhonemeMap>, HaqumeiError> {
self.ensure_dictionary_is_latest()?;
let mecab_features = self.run_mecab(text.as_ref())?;
let njd_features = self.run_njd_from_mecab(&mecab_features)?;
if njd_features.is_empty() {
return Ok(Vec::new());
}
self.g2p_mapping_inner(&njd_features)
}
pub fn g2p_mapping_detailed(
&mut self,
text: &str,
) -> Result<Vec<WordPhonemeDetail>, HaqumeiError> {
self.ensure_dictionary_is_latest()?;
let morphs = self.run_mecab_detailed(text)?;
let valid_features_str: Vec<String> = morphs
.iter()
.filter(|m| !m.is_ignored)
.map(|m| m.feature.clone())
.collect();
if valid_features_str.is_empty() {
return Ok(morphs
.into_iter()
.map(|m| WordPhonemeDetail {
word: m.surface,
phonemes: vec!["sp".to_string()],
is_unknown: m.is_unknown,
is_ignored: true,
})
.collect());
}
let njd_features = self.run_njd_from_mecab(&valid_features_str)?;
if njd_features.is_empty() {
return Ok(Vec::new());
}
let mapping = self.g2p_mapping_inner(&njd_features)?;
let mut result = Vec::with_capacity(morphs.len());
let mut morph_idx = 0;
for map in mapping {
while let Some(m) = morphs.get(morph_idx) {
if m.is_ignored {
result.push(WordPhonemeDetail {
word: m.surface.clone(),
phonemes: vec!["sp".to_string()],
is_unknown: m.is_unknown,
is_ignored: true,
});
morph_idx += 1;
} else {
break;
}
}
let current_map_word = &map.word;
if let Some(morph) = morphs.get(morph_idx) {
if current_map_word == &morph.surface {
let mut phonemes = map.phonemes.clone();
if morph.is_unknown {
if phonemes.is_empty() || phonemes == ["pau"] {
phonemes = vec!["unk".to_string()];
}
}
result.push(WordPhonemeDetail {
word: map.word.clone(),
phonemes,
is_unknown: morph.is_unknown,
is_ignored: map.phonemes.is_empty(),
});
morph_idx += 1;
} else if current_map_word.starts_with(&morph.surface) {
let mut is_unknown_word = false;
let mut matched_len = 0;
while let Some(inner_morph) = morphs.get(morph_idx) {
if inner_morph.is_ignored {
result.push(WordPhonemeDetail {
word: inner_morph.surface.clone(),
phonemes: vec!["sp".to_string()],
is_unknown: inner_morph.is_unknown,
is_ignored: true,
});
morph_idx += 1;
continue;
}
let remaining = ¤t_map_word[matched_len..];
if remaining.starts_with(&inner_morph.surface) {
is_unknown_word |= inner_morph.is_unknown;
matched_len += inner_morph.surface.len();
morph_idx += 1;
if matched_len == current_map_word.len() {
break;
}
} else {
break;
}
}
let mut phonemes = map.phonemes.clone();
if is_unknown_word && (phonemes.is_empty() || phonemes == ["pau"]) {
phonemes = vec!["unk".to_string()];
}
result.push(WordPhonemeDetail {
word: map.word.clone(),
phonemes,
is_unknown: is_unknown_word,
is_ignored: map.phonemes.is_empty(),
});
} else {
result.push(WordPhonemeDetail {
word: map.word.clone(),
phonemes: map.phonemes.clone(),
is_unknown: false,
is_ignored: map.phonemes.is_empty(),
});
}
}
}
while let Some(m) = morphs.get(morph_idx) {
if m.is_ignored {
result.push(WordPhonemeDetail {
word: m.surface.clone(),
phonemes: vec!["sp".to_string()],
is_unknown: m.is_unknown,
is_ignored: true,
});
}
morph_idx += 1;
}
Ok(result)
}
pub(crate) fn g2p_mapping_inner(
&mut self,
njd_features: &[NjdFeature],
) -> Result<Vec<WordPhonemeMap>, HaqumeiError> {
unsafe {
let ptr_to_idx = self.prepare_jpcommon_label_internal(njd_features)?;
let jp = self.jp_common.inner.as_mut();
let mut mapping: Vec<WordPhonemeMap> = njd_features
.iter()
.map(|f| WordPhonemeMap {
word: f.string.clone(),
phonemes: Vec::new(),
})
.collect();
let mut pause_count = 0;
for (f_idx, f) in njd_features.iter().enumerate() {
let is_pause_pron = f.pron == "、" || f.pron == "?" || f.pron == "!";
if is_pause_pron {
mapping[f_idx].phonemes.push("pau".to_string());
pause_count += 1;
}
}
let needs_merge = njd_features.len() > ptr_to_idx.len() + pause_count;
let mut p = (*jp.label).phoneme_head;
while !p.is_null() {
let s_ptr = (*p).phoneme;
if !s_ptr.is_null() {
let s = CStr::from_ptr(s_ptr).to_string_lossy().into_owned();
if s == "pau" {
p = (*p).next;
continue;
}
let mut current_word_ptr = 0usize;
let mora = (*p).up;
if !mora.is_null() {
let word = (*mora).up;
if !word.is_null() {
current_word_ptr = word as usize;
}
}
if current_word_ptr != 0
&& let Some(&idx) = ptr_to_idx.get(¤t_word_ptr)
&& let Some(target) = mapping.get_mut(idx)
{
target.phonemes.push(s);
}
}
p = (*p).next;
}
ffi::JPCommon_refresh(jp);
ffi::NJD_refresh(self.njd.inner.as_mut());
if needs_merge {
let mut write_idx = 0;
for read_idx in 0..mapping.len() {
if read_idx > 0 && mapping[read_idx].phonemes.is_empty() {
let prev_is_pause = mapping[write_idx - 1].phonemes.len() == 1
&& mapping[write_idx - 1].phonemes[0] == "pau";
if !prev_is_pause {
let text_to_merge = std::mem::take(&mut mapping[read_idx].word);
mapping[write_idx - 1].word.push_str(&text_to_merge);
continue;
}
}
if write_idx != read_idx {
mapping.swap(write_idx, read_idx);
}
write_idx += 1;
}
mapping.truncate(write_idx);
}
Ok(mapping)
}
}
const BUFFER_SIZE: usize = 16384;
pub fn run_mecab(&mut self, text: &str) -> Result<Vec<String>, HaqumeiError> {
self.ensure_dictionary_is_latest()?;
let c_text = CString::new(text)?;
let mut buffer = vec![0u8; Self::BUFFER_SIZE];
let result = unsafe {
ffi::text2mecab(
buffer.as_mut_ptr() as *mut i8,
Self::BUFFER_SIZE,
c_text.as_ptr(),
)
};
match result {
ffi::text2mecab_result_t_TEXT2MECAB_RESULT_SUCCESS => {}
ffi::text2mecab_result_t_TEXT2MECAB_RESULT_RANGE_ERROR => {
return Err(HaqumeiError::Text2MecabError(
"Text is too long".to_string(),
));
}
ffi::text2mecab_result_t_TEXT2MECAB_RESULT_INVALID_ARGUMENT => {
return Err(HaqumeiError::Text2MecabError(
"Invalid argument for text2mecab".to_string(),
));
}
_ => {
return Err(HaqumeiError::Text2MecabError(format!(
"Unknown error from text2mecab: {}",
result
)));
}
}
let result =
unsafe { ffi::Mecab_analysis(self.mecab.inner.as_ptr(), buffer.as_ptr() as *const i8) };
if result != 1 {
return Err(HaqumeiError::MecabError(
"Mecab_analysis failed to parse the text".to_string(),
));
}
let morphs = unsafe {
let size = ffi::Mecab_get_size(self.mecab.inner.as_ptr()) as usize;
let features_ptr = ffi::Mecab_get_feature(self.mecab.inner.as_ptr());
let mut result_vec = Vec::with_capacity(size);
for i in 0..size {
let c_feature_ptr = *features_ptr.add(i);
let c_feature = CStr::from_ptr(c_feature_ptr);
result_vec.push(c_feature.to_string_lossy().into_owned());
}
result_vec
};
unsafe {
ffi::Mecab_refresh(self.mecab.inner.as_ptr());
}
let filtered_morphs: Vec<String> = morphs
.into_iter()
.filter(|m| !m.contains("記号,空白"))
.collect();
Ok(filtered_morphs)
}
pub fn run_mecab_detailed(&mut self, text: &str) -> Result<Vec<MecabMorph>, HaqumeiError> {
self.ensure_dictionary_is_latest()?;
let c_text = CString::new(text)?;
let mut buffer = vec![0u8; Self::BUFFER_SIZE];
let result = unsafe {
ffi::text2mecab(
buffer.as_mut_ptr() as *mut i8,
Self::BUFFER_SIZE,
c_text.as_ptr(),
)
};
match result {
ffi::text2mecab_result_t_TEXT2MECAB_RESULT_SUCCESS => {}
ffi::text2mecab_result_t_TEXT2MECAB_RESULT_RANGE_ERROR => {
return Err(HaqumeiError::Text2MecabError(
"Text is too long".to_string(),
));
}
_ => {
return Err(HaqumeiError::Text2MecabError(
"Error in text2mecab".to_string(),
));
}
}
let result =
unsafe { ffi::Mecab_analysis(self.mecab.inner.as_ptr(), buffer.as_ptr() as *const i8) };
if result != 1 {
return Err(HaqumeiError::MecabError(
"Mecab_analysis failed to parse the text".to_string(),
));
}
let morphs = unsafe {
let mecab_ptr = self.mecab.inner.as_ptr();
let lattice = (*mecab_ptr).lattice as *mut ffi::mecab_lattice_t;
let mut node = ffi::mecab_lattice_get_bos_node(lattice);
let mut results = Vec::new();
while !node.is_null() {
let stat = (*node).stat;
if stat != 2 && stat != 3 {
let surf_ptr = (*node).surface;
let length = (*node).length as usize;
let surface = if !surf_ptr.is_null() && length > 0 {
let bytes = std::slice::from_raw_parts(surf_ptr as *const u8, length);
String::from_utf8_lossy(bytes).into_owned()
} else {
String::new()
};
let feat_ptr = (*node).feature;
let raw_feature = if !feat_ptr.is_null() {
CStr::from_ptr(feat_ptr).to_string_lossy()
} else {
std::borrow::Cow::Borrowed("")
};
let compatible_feature = format!("{},{}", surface, raw_feature);
let is_unknown = stat == 1;
let is_ignored = raw_feature.contains("記号,空白");
results.push(MecabMorph {
surface,
feature: compatible_feature,
is_unknown,
is_ignored,
});
}
node = (*node).next;
}
results
};
unsafe {
ffi::Mecab_refresh(self.mecab.inner.as_ptr());
}
Ok(morphs)
}
pub fn run_njd_from_mecab(
&mut self,
mecab_features: &[String],
) -> Result<Vec<NjdFeature>, HaqumeiError> {
if mecab_features.is_empty() {
return Ok(Vec::with_capacity(0));
}
let c_strings: Vec<CString> = mecab_features
.iter()
.map(|s| CString::new(s.as_str()))
.collect::<Result<Vec<_>, _>>()?;
let mut c_string_pointers: Vec<*const c_char> =
c_strings.iter().map(|cs| cs.as_ptr()).collect();
unsafe {
ffi::mecab2njd(
self.njd.inner.as_mut(),
c_string_pointers.as_mut_ptr() as *mut *mut c_char,
c_string_pointers.len() as i32,
);
ffi::njd_set_pronunciation(self.njd.inner.as_mut());
}
let mut features = njd_to_features(&self.njd);
apply_plus_rules(&mut features);
Self::features_to_njd(&features, &mut self.njd)?;
unsafe {
ffi::njd_set_digit(self.njd.inner.as_mut());
ffi::njd_set_accent_phrase(self.njd.inner.as_mut());
ffi::njd_set_accent_type(self.njd.inner.as_mut());
ffi::njd_set_unvoiced_vowel(self.njd.inner.as_mut());
ffi::njd_set_long_vowel(self.njd.inner.as_mut());
}
let final_features = njd_to_features(&self.njd);
unsafe {
ffi::NJD_refresh(self.njd.inner.as_mut());
}
Ok(final_features)
}
pub(crate) fn make_label(
&mut self,
features: &[NjdFeature],
) -> Result<Vec<String>, HaqumeiError> {
Self::features_to_njd(features, &mut self.njd)?;
let (label_size, label_feature_ptr) = unsafe {
ffi::njd2jpcommon(self.jp_common.inner.as_mut(), self.njd.inner.as_mut());
ffi::JPCommon_make_label(self.jp_common.inner.as_mut());
let size = ffi::JPCommon_get_label_size(self.jp_common.inner.as_mut());
let ptr = ffi::JPCommon_get_label_feature(self.jp_common.inner.as_mut());
(size, ptr)
};
if label_feature_ptr.is_null() {
return Ok(Vec::new());
}
let labels = unsafe {
let mut result = Vec::with_capacity(label_size as usize);
for i in 0..(label_size as isize) {
let label_ptr = *label_feature_ptr.offset(i);
let c_label = CStr::from_ptr(label_ptr);
result.push(c_label.to_string_lossy().into_owned());
}
result
};
unsafe {
ffi::JPCommon_refresh(self.jp_common.inner.as_mut());
ffi::NJD_refresh(self.njd.inner.as_mut());
}
Ok(labels)
}
unsafe fn prepare_jpcommon_label_internal(
&mut self,
features: &[NjdFeature],
) -> Result<HashMap<usize, usize>, HaqumeiError> {
Self::features_to_njd(features, &mut self.njd)?;
let mut ptr_to_idx = HashMap::with_capacity(features.len());
unsafe {
let jp = self.jp_common.inner.as_mut();
let njd = self.njd.inner.as_mut();
ffi::njd2jpcommon(jp, njd);
if !jp.label.is_null() {
ffi::JPCommonLabel_clear(jp.label);
} else {
let ptr = libc::calloc(1, std::mem::size_of::<ffi::JPCommonLabel>());
if ptr.is_null() {
return Err(HaqumeiError::AllocationError("ffi::JPCommonLabel"));
}
jp.label = ptr as *mut ffi::JPCommonLabel;
}
ffi::JPCommonLabel_initialize(jp.label);
let mut node = jp.head;
let mut f_idx = 0;
while !node.is_null() {
let prev_word_tail = (*jp.label).word_tail;
ffi::JPCommonLabel_push_word(
jp.label,
ffi::JPCommonNode_get_pron(node),
ffi::JPCommonNode_get_pos(node),
ffi::JPCommonNode_get_ctype(node),
ffi::JPCommonNode_get_cform(node),
ffi::JPCommonNode_get_acc(node),
ffi::JPCommonNode_get_chain_flag(node),
);
let curr_word_tail = (*jp.label).word_tail;
if prev_word_tail != curr_word_tail && !curr_word_tail.is_null() {
ptr_to_idx.insert(curr_word_tail as usize, f_idx);
}
node = (*node).next;
f_idx += 1;
}
}
Ok(ptr_to_idx)
}
pub fn extract_phonemes(
&mut self,
features: &[NjdFeature],
) -> Result<Vec<String>, HaqumeiError> {
if features.is_empty() {
return Ok(Vec::new());
}
let result = unsafe {
Self::features_to_njd(features, &mut self.njd)?;
let jp = self.jp_common.inner.as_mut();
let njd = self.njd.inner.as_mut();
ffi::njd2jpcommon(jp, njd);
if !jp.label.is_null() {
ffi::JPCommonLabel_clear(jp.label);
} else {
let ptr = libc::calloc(1, std::mem::size_of::<ffi::JPCommonLabel>());
if ptr.is_null() {
return Err(HaqumeiError::AllocationError("ffi::JPCommonLabel"));
}
jp.label = ptr as *mut ffi::JPCommonLabel;
}
ffi::JPCommonLabel_initialize(jp.label);
let mut node = jp.head;
while !node.is_null() {
ffi::JPCommonLabel_push_word(
jp.label,
ffi::JPCommonNode_get_pron(node),
ffi::JPCommonNode_get_pos(node),
ffi::JPCommonNode_get_ctype(node),
ffi::JPCommonNode_get_cform(node),
ffi::JPCommonNode_get_acc(node),
ffi::JPCommonNode_get_chain_flag(node),
);
node = (*node).next;
}
let mut result_vec = Vec::with_capacity(features.len() * 3);
let mut p = (*jp.label).phoneme_head;
while !p.is_null() {
let s_ptr = (*p).phoneme;
if !s_ptr.is_null() {
let s = CStr::from_ptr(s_ptr).to_string_lossy().into_owned();
result_vec.push(s);
}
p = (*p).next;
}
ffi::JPCommon_refresh(jp);
ffi::NJD_refresh(self.njd.inner.as_mut());
result_vec
};
Ok(result)
}
pub(crate) fn features_to_njd(
features: &[NjdFeature],
njd: &mut Njd,
) -> Result<(), HaqumeiError> {
unsafe {
ffi::NJD_clear(njd.inner.as_mut());
}
for feature in features {
let c_string = CString::new(feature.string.as_str())?;
let c_pos = CString::new(feature.pos.as_str())?;
let c_pos_group1 = CString::new(feature.pos_group1.as_str())?;
let c_pos_group2 = CString::new(feature.pos_group2.as_str())?;
let c_pos_group3 = CString::new(feature.pos_group3.as_str())?;
let c_ctype = CString::new(feature.ctype.as_str())?;
let c_cform = CString::new(feature.cform.as_str())?;
let c_orig = CString::new(feature.orig.as_str())?;
let c_read = CString::new(feature.read.as_str())?;
let c_pron = CString::new(feature.pron.as_str())?;
let c_chain_rule = CString::new(feature.chain_rule.as_str())?;
unsafe {
let node =
libc::calloc(1, std::mem::size_of::<ffi::NJDNode>()) as *mut ffi::NJDNode;
if node.is_null() {
return Err(HaqumeiError::AllocationError("ffi::NJDNode"));
}
ffi::NJDNode_initialize(node);
ffi::NJDNode_set_string(node, c_string.as_ptr());
ffi::NJDNode_set_pos(node, c_pos.as_ptr());
ffi::NJDNode_set_pos_group1(node, c_pos_group1.as_ptr());
ffi::NJDNode_set_pos_group2(node, c_pos_group2.as_ptr());
ffi::NJDNode_set_pos_group3(node, c_pos_group3.as_ptr());
ffi::NJDNode_set_ctype(node, c_ctype.as_ptr());
ffi::NJDNode_set_cform(node, c_cform.as_ptr());
ffi::NJDNode_set_orig(node, c_orig.as_ptr());
ffi::NJDNode_set_read(node, c_read.as_ptr());
ffi::NJDNode_set_pron(node, c_pron.as_ptr());
ffi::NJDNode_set_acc(node, feature.acc);
ffi::NJDNode_set_mora_size(node, feature.mora_size);
ffi::NJDNode_set_chain_rule(node, c_chain_rule.as_ptr());
ffi::NJDNode_set_chain_flag(node, feature.chain_flag);
ffi::NJD_push_node(njd.inner.as_mut(), node);
}
}
Ok(())
}
impl_batch_method_openjtalk!(
g2p_batch => g2p -> Vec<String>
);
impl_batch_method_openjtalk!(
g2p_detailed_batch => g2p_detailed -> Vec<String>
);
impl_batch_method_openjtalk!(
g2p_kana_batch => g2p_kana -> String
);
impl_batch_method_openjtalk!(
g2p_per_word_batch => g2p_per_word -> Vec<Vec<String>>
);
impl_batch_method_openjtalk!(
g2p_mapping_batch => g2p_mapping -> Vec<WordPhonemeMap>
);
impl_batch_method_openjtalk!(
g2p_mapping_detailed_batch => g2p_mapping_detailed -> Vec<WordPhonemeDetail>
);
impl_batch_method_openjtalk!(
extract_fullcontext_batch => extract_fullcontext -> Vec<String>
);
}
pub fn build_mecab_dictionary<P: AsRef<Path>>(
path: P,
) -> Result<(), dictionary::DictCompilerError> {
MecabDictIndexCompiler::new()
.dict_dir(&path)
.out_dir(&path)
.run()
}