mod ffi {
#![allow(non_upper_case_globals)]
#![allow(non_camel_case_types)]
#![allow(non_snake_case)]
#![allow(dead_code)]
include!(concat!(env!("OUT_DIR"), "/bindings.rs"));
}
unsafe extern "C" {
fn setup_cpp_redirect();
fn teardown_cpp_redirect();
}
#[unsafe(no_mangle)]
unsafe extern "C" fn haqumei_rust_print(msg: *const libc::c_char, is_stderr: libc::c_int) {
unsafe {
if msg.is_null() {
return;
}
let c_str = std::ffi::CStr::from_ptr(msg);
let s = c_str.to_string_lossy();
let s = s.trim_end();
if is_stderr != 0 {
log::warn!("[OpenJTalk] {}", s);
} else {
log::info!("[OpenJTalk] {}", s);
}
}
}
mod data;
pub mod errors;
pub mod features;
#[macro_use]
mod macros;
pub mod nani_predict;
pub mod open_jtalk;
mod postprocess;
pub mod utils;
pub mod word_phoneme;
use std::{
path::{Path, PathBuf},
sync::{Arc, LazyLock, Mutex, OnceLock},
thread,
};
use crossbeam_channel::{Sender, bounded};
use moka::sync::Cache;
pub use features::NjdFeature;
pub use open_jtalk::{
MecabDictIndexCompiler, MecabMorph, OpenJTalk, unset_user_dictionary, update_global_dictionary,
};
pub use word_phoneme::{WordPhonemeDetail, WordPhonemeMap, WordPhonemePair};
use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
use vibrato_rkyv::dictionary::PresetDictionaryKind;
use crate::{
errors::HaqumeiError,
features::UnidicFeature,
nani_predict::NaniPredictor,
open_jtalk::{Dictionary, GLOBAL_MECAB_DICTIONARY},
postprocess::{
modify_acc_after_chaining, modify_filler_accent, process_odori_features, retreat_acc_nuc,
vibrato_analysis,
},
utils::default_is_non_pause_symbol,
};
static VIBRATO_CACHE: LazyLock<Cache<String, Vec<UnidicFeature>>> =
LazyLock::new(|| Cache::new(1000));
static NANI_PREDICTOR_CACHE: LazyLock<Cache<NjdFeature, bool>> = LazyLock::new(|| Cache::new(1000));
static NANI_PREDICTOR: LazyLock<Mutex<NaniPredictor>> = LazyLock::new(|| {
Mutex::new(NaniPredictor::new().expect("Failed to initialize NaniPredictor models"))
});
static CACHE_DIR: OnceLock<PathBuf> = OnceLock::new();
type VibratoTask = (String, Sender<Vec<UnidicFeature>>);
static VIBRATO_TASK_TX: OnceLock<Sender<VibratoTask>> = OnceLock::new();
pub(crate) fn init_vibrato_workers_if_needed(tokenizer: &vibrato_rkyv::Tokenizer) {
VIBRATO_TASK_TX.get_or_init(|| {
let (tx, rx) = bounded::<VibratoTask>(1024);
let worker_count = 8;
for _ in 0..worker_count {
let rx = rx.clone();
let tokenizer = tokenizer.clone();
thread::spawn(move || {
let mut worker = tokenizer.new_worker();
while let Ok((text, res_tx)) = rx.recv() {
let features = vibrato_analysis(&mut worker, &text);
let _ = res_tx.send(features);
}
});
}
tx
});
}
pub struct Haqumei {
pub(crate) open_jtalk: OpenJTalk,
pub(crate) tokenizer: Option<vibrato_rkyv::Tokenizer>,
pub(crate) rx: Option<crossbeam_channel::Receiver<Vec<UnidicFeature>>>,
pub options: HaqumeiOptions,
}
#[derive(Debug, Clone, Copy)]
pub struct HaqumeiOptions {
pub normalize_unicode: UnicodeNormalization,
pub use_read_as_pron: bool,
pub revert_long_vowels: bool,
pub revert_yotsugana: bool,
pub modify_filler_accent: bool,
pub predict_nani: bool,
pub use_unidic_yomi: bool,
pub retreat_acc_nuc: bool,
pub modify_acc_after_chaining: bool,
pub process_odoriji: bool,
pub is_non_pause_symbol: fn(&str) -> bool,
}
impl Default for HaqumeiOptions {
fn default() -> Self {
Self {
normalize_unicode: UnicodeNormalization::None,
use_read_as_pron: false,
revert_long_vowels: false,
revert_yotsugana: false,
modify_filler_accent: true,
predict_nani: true,
use_unidic_yomi: false,
retreat_acc_nuc: true,
modify_acc_after_chaining: true,
process_odoriji: true,
is_non_pause_symbol: default_is_non_pause_symbol,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum UnicodeNormalization {
#[default]
None,
Nfc,
Nfkc,
}
impl Haqumei {
pub fn new() -> Result<Self, HaqumeiError> {
Self::from_open_jtalk(OpenJTalk::new()?, HaqumeiOptions::default())
}
pub fn with_options(options: HaqumeiOptions) -> Result<Self, HaqumeiError> {
Self::from_open_jtalk(OpenJTalk::new()?, options)
}
#[inline]
pub fn from_open_jtalk(
open_jtalk: OpenJTalk,
options: HaqumeiOptions,
) -> Result<Self, HaqumeiError> {
let mut haqumei = Haqumei {
open_jtalk,
tokenizer: None,
rx: None,
options,
};
if options.use_unidic_yomi {
haqumei.init_tokenizer_if_needed()?;
}
Ok(haqumei)
}
pub(crate) fn init_tokenizer_if_needed(&mut self) -> Result<(), HaqumeiError> {
if self.tokenizer.is_some() {
return Ok(());
}
if CACHE_DIR.get().is_none() {
let base = dirs::cache_dir().ok_or(HaqumeiError::CacheDirectoryNotFound)?;
CACHE_DIR.get_or_init(|| base.join("haqumei"));
}
let cache_dir = CACHE_DIR.get().unwrap();
let kind = PresetDictionaryKind::UnidicCsj;
log::info!("Downloading {} dictionary...", kind.name());
let vibrato_dict =
vibrato_rkyv::Dictionary::from_preset_with_download(kind, cache_dir.join(kind.name()))?;
log::info!("Downloaded {} dictionary.", kind.name());
self.tokenizer = Some(vibrato_rkyv::Tokenizer::new(vibrato_dict));
Ok(())
}
pub(crate) fn init_tokenizer_if_needed_and_modify_kanji_yomi_enabled(
&mut self,
) -> Result<Option<vibrato_rkyv::Tokenizer>, HaqumeiError> {
if self.options.use_unidic_yomi {
self.init_tokenizer_if_needed()?;
Ok(self.tokenizer.clone()) } else {
Ok(None)
}
}
pub fn from_dictionary(
dict: Dictionary,
options: HaqumeiOptions,
) -> Result<Self, HaqumeiError> {
Self::from_open_jtalk(OpenJTalk::from_dictionary(dict)?, options)
}
pub fn from_shared_dictionary(
dict: Arc<Dictionary>,
options: HaqumeiOptions,
) -> Result<Self, HaqumeiError> {
Self::from_open_jtalk(OpenJTalk::from_shared_dictionary(dict)?, options)
}
pub fn from_path<P: AsRef<Path>>(
dict_dir: P,
options: HaqumeiOptions,
) -> Result<Self, HaqumeiError> {
Self::from_open_jtalk(OpenJTalk::from_path(dict_dir)?, options)
}
pub fn from_path_with_userdict<P: AsRef<Path>, Q: AsRef<Path>>(
dict_dir: P,
user_dict: Q,
options: HaqumeiOptions,
) -> Result<Self, HaqumeiError> {
Self::from_open_jtalk(
OpenJTalk::from_path_with_userdict(dict_dir, user_dict)?,
options,
)
}
pub fn g2p(&mut self, text: &str) -> Result<Vec<String>, HaqumeiError> {
if text.is_empty() {
self.open_jtalk.ensure_dictionary_is_latest()?;
return Ok(Vec::new());
}
let features = self.run_frontend(text)?;
if features.is_empty() {
return Ok(Vec::new());
}
self.open_jtalk.extract_phonemes(&features)
}
pub fn g2p_detailed(&mut self, text: &str) -> Result<Vec<String>, HaqumeiError> {
if text.is_empty() {
self.open_jtalk.ensure_dictionary_is_latest()?;
return Ok(Vec::new());
}
let detailed_mapping = self.g2p_mapping(text)?;
let mut result_phonemes = Vec::new();
for map in detailed_mapping {
result_phonemes.extend(map.phonemes);
}
Ok(result_phonemes)
}
pub fn g2p_kana(&mut self, text: &str) -> Result<String, HaqumeiError> {
if text.is_empty() {
self.open_jtalk.ensure_dictionary_is_latest()?;
return Ok(String::new());
}
let features = self.run_frontend(text.as_ref())?;
let kana_string: String = features
.iter()
.map(|f| {
let p = if f.pos == "記号" {
&f.string
} else {
&f.pron
};
p.replace('’', "")
})
.collect();
Ok(kana_string)
}
pub fn g2p_kana_per_word(&mut self, text: &str) -> Result<Vec<String>, HaqumeiError> {
if text.is_empty() {
self.open_jtalk.ensure_dictionary_is_latest()?;
return Ok(Vec::new());
}
let features = self.run_frontend(text.as_ref())?;
let kana_list: Vec<String> = features
.iter()
.map(|f| {
let p = if f.pos == "記号" {
&f.string
} else {
&f.pron
};
p.replace('’', "")
})
.collect();
Ok(kana_list)
}
pub fn g2p_per_word(&mut self, text: &str) -> Result<Vec<Vec<String>>, HaqumeiError> {
if text.is_empty() {
self.open_jtalk.ensure_dictionary_is_latest()?;
return Ok(Vec::new());
}
let mapping = self.g2p_pairs(text.as_ref())?;
let result = mapping.into_iter().map(|m| m.phonemes).collect();
Ok(result)
}
pub fn g2p_pairs(&mut self, text: &str) -> Result<Vec<WordPhonemePair>, HaqumeiError> {
if text.is_empty() {
self.open_jtalk.ensure_dictionary_is_latest()?;
return Ok(Vec::new());
}
let features = self.run_frontend(text)?;
if features.is_empty() {
return Ok(Vec::new());
}
self.open_jtalk
.g2p_pairs_inner(&features, self.options.is_non_pause_symbol)
}
pub fn g2p_mapping(&mut self, text: &str) -> Result<Vec<WordPhonemeMap>, HaqumeiError> {
if text.is_empty() {
self.open_jtalk.ensure_dictionary_is_latest()?;
return Ok(Vec::new());
}
let text = &self.normalize_unicode_if_needed(text);
let text = text.as_ref();
self.rx = if let Some(tokenizer) =
self.init_tokenizer_if_needed_and_modify_kanji_yomi_enabled()?
{
init_vibrato_workers_if_needed(&tokenizer);
let (tx, rx) = bounded(1);
if let Some(task_tx) = VIBRATO_TASK_TX.get() {
let _ = task_tx.send((text.to_string(), tx));
}
Some(rx)
} else {
None
};
let morphs = self.open_jtalk.run_mecab_detailed(text)?;
let njd_features = self.open_jtalk.run_njd_from_mecab(
morphs
.iter()
.filter(|m| !m.is_ignored)
.map(|morph| morph.feature.as_str()),
)?;
if njd_features.is_empty() {
return Ok(Vec::new());
}
let mut njd_features = self.apply_postprocessing(text, njd_features)?;
let options = &self.options;
if options.use_read_as_pron | options.revert_long_vowels | options.revert_yotsugana {
self.revert_pron_to_read(&mut njd_features);
}
let mapping = self
.open_jtalk
.g2p_pairs_inner(&njd_features, self.options.is_non_pause_symbol)?;
self.open_jtalk.make_phoneme_mapping(morphs, mapping)
}
pub fn g2p_mapping_detailed(
&mut self,
text: &str,
) -> Result<Vec<WordPhonemeDetail>, HaqumeiError> {
if text.is_empty() {
self.open_jtalk.ensure_dictionary_is_latest()?;
return Ok(Vec::new());
}
let (njd_features, morphs) = self.run_frontend_detailed(text)?;
let mapping = self
.open_jtalk
.g2p_mapping_inner(&njd_features, self.options.is_non_pause_symbol)?;
self.open_jtalk.make_phoneme_mapping(morphs, mapping)
}
pub fn run_frontend(&mut self, text: &str) -> Result<Vec<NjdFeature>, HaqumeiError> {
if text.is_empty() {
self.open_jtalk.ensure_dictionary_is_latest()?;
return Ok(Vec::new());
}
let text = self.normalize_unicode_if_needed(text);
let text = text.as_ref();
self.rx = if let Some(tokenizer) =
self.init_tokenizer_if_needed_and_modify_kanji_yomi_enabled()?
{
init_vibrato_workers_if_needed(&tokenizer);
let (tx, rx) = bounded(1);
if let Some(task_tx) = VIBRATO_TASK_TX.get() {
let _ = task_tx.send((text.to_string(), tx));
}
Some(rx)
} else {
None
};
let mut njd_features = self.open_jtalk.run_frontend(text)?;
let options = &self.options;
if options.use_read_as_pron | options.revert_long_vowels | options.revert_yotsugana {
self.revert_pron_to_read(&mut njd_features);
}
self.apply_postprocessing(text, njd_features)
}
pub fn run_frontend_detailed(
&mut self,
text: &str,
) -> Result<(Vec<NjdFeature>, Vec<MecabMorph>), HaqumeiError> {
if text.is_empty() {
self.open_jtalk.ensure_dictionary_is_latest()?;
return Ok((Vec::new(), Vec::new()));
}
let text = self.normalize_unicode_if_needed(text);
let text = text.as_ref();
self.rx = if let Some(tokenizer) =
self.init_tokenizer_if_needed_and_modify_kanji_yomi_enabled()?
{
init_vibrato_workers_if_needed(&tokenizer);
let (tx, rx) = bounded(1);
if let Some(task_tx) = VIBRATO_TASK_TX.get() {
let _ = task_tx.send((text.to_string(), tx));
}
Some(rx)
} else {
None
};
let (mut njd_features, mecab_morphs) = self.open_jtalk.run_frontend_detailed(text)?;
let options = &self.options;
if options.use_read_as_pron | options.revert_long_vowels | options.revert_yotsugana {
self.revert_pron_to_read(&mut njd_features);
}
Ok((self.apply_postprocessing(text, njd_features)?, mecab_morphs))
}
pub fn extract_fullcontext(&mut self, text: &str) -> Result<Vec<String>, HaqumeiError> {
if text.is_empty() {
self.open_jtalk.ensure_dictionary_is_latest()?;
return Ok(Vec::new());
}
let njd_features = self.run_frontend(text.as_ref())?;
self.open_jtalk.make_label(&njd_features)
}
fn apply_postprocessing(
&mut self,
text: &str,
mut njd_features: Vec<NjdFeature>,
) -> Result<Vec<NjdFeature>, HaqumeiError> {
let options = self.options;
if options.modify_filler_accent {
modify_filler_accent(&mut njd_features);
}
if options.predict_nani {
self.predict_nani_reading(&mut njd_features);
}
if options.use_unidic_yomi {
self.modify_kanji_yomi(text, &mut njd_features);
}
if options.retreat_acc_nuc {
retreat_acc_nuc(&mut njd_features);
}
if options.modify_acc_after_chaining {
modify_acc_after_chaining(&mut njd_features);
}
if options.process_odoriji {
process_odori_features(&mut njd_features, &mut self.open_jtalk)?;
}
Ok(njd_features)
}
pub(crate) fn predict_is_nan(&mut self, prev_node: Option<&NjdFeature>) -> bool {
let prev_node = match prev_node {
Some(node) => node,
None => return false,
};
NANI_PREDICTOR_CACHE.get_with(prev_node.clone(), || {
NANI_PREDICTOR
.lock()
.unwrap()
.predict_is_nan(Some(prev_node))
})
}
impl_batch_method_haqumei!(
run_frontend_batch => run_frontend -> Vec<NjdFeature>
);
impl_batch_method_haqumei!(
run_frontend_detailed_batch => run_frontend_detailed -> (Vec<NjdFeature>, Vec<MecabMorph>)
);
impl_batch_method_haqumei!(
g2p_batch => g2p -> Vec<String>
);
impl_batch_method_haqumei!(
g2p_detailed_batch => g2p_detailed -> Vec<String>
);
impl_batch_method_haqumei!(
g2p_kana_batch => g2p_kana -> String
);
impl_batch_method_haqumei!(
g2p_kana_per_word_batch => g2p_kana_per_word -> Vec<String>
);
impl_batch_method_haqumei!(
g2p_per_word_batch => g2p_per_word -> Vec<Vec<String>>
);
impl_batch_method_haqumei!(
g2p_pairs_batch => g2p_pairs -> Vec<WordPhonemePair>
);
impl_batch_method_haqumei!(
g2p_mapping_batch => g2p_mapping -> Vec<WordPhonemeMap>
);
impl_batch_method_haqumei!(
g2p_mapping_detailed_batch => g2p_mapping_detailed -> Vec<WordPhonemeDetail>
);
impl_batch_method_haqumei!(
extract_fullcontext_batch => extract_fullcontext -> Vec<String>
);
}