#![allow(non_upper_case_globals)]
#![allow(non_camel_case_types)]
#![allow(non_snake_case)]
include!("./voicevox_core.rs");
use std::ffi::CStr;
pub type InitializeOptions = VoicevoxInitializeOptions;
pub type AudioQueryOptions = VoicevoxAudioQueryOptions;
pub type SynthesisOptions = VoicevoxSynthesisOptions;
pub type TtsOptions = VoicevoxTtsOptions;
pub struct CPointerWrap<T> {
bytes: *mut T,
length: usize,
free_fn: fn(*mut T),
}
impl<T> CPointerWrap<T> {
pub fn new(bytes: *mut T, length: usize, free_fn: fn(*mut T)) -> Self {
Self {
bytes,
length,
free_fn,
}
}
pub fn as_slice(&self) -> &[T] {
unsafe { std::slice::from_raw_parts(self.bytes, self.length) }
}
}
impl<T> Drop for CPointerWrap<T> {
fn drop(&mut self) {
(self.free_fn)(self.bytes);
}
}
pub struct CStrWrap {
string: *mut std::os::raw::c_char,
free_fn: fn(*mut std::os::raw::c_char),
}
impl CStrWrap {
pub fn new(string: *mut std::os::raw::c_char, free_fn: fn(*mut std::os::raw::c_char)) -> Self {
Self { string, free_fn }
}
pub fn as_str(&self) -> &str {
unsafe { CStr::from_ptr(self.string) }.to_str().unwrap()
}
}
impl Drop for CStrWrap {
fn drop(&mut self) {
(self.free_fn)(self.string);
}
}
#[repr(i32)]
#[derive(Debug, PartialEq, Eq)]
pub enum ResultCode {
Ok = 0,
NotLoadedOpenjtalkDictError = 1,
LoadModelError = 2,
GetSupportedDevicesError = 3,
GpuSupportError = 4,
LoadMetasError = 5,
UninitializedStatusError = 6,
InvalidSpeakerIdError = 7,
InvalidModelIndexError = 8,
InferenceError = 9,
ExtractFullContextLabelError = 10,
InvalidUtf8InputError = 11,
ParseKanaError = 12,
InvalidAudioQueryError = 13,
}
#[derive(Copy, Clone)]
pub enum AccelerationMode {
Auto = VoicevoxAccelerationMode_VOICEVOX_ACCELERATION_MODE_AUTO as isize,
CPU = VoicevoxAccelerationMode_VOICEVOX_ACCELERATION_MODE_CPU as isize,
GPU = VoicevoxAccelerationMode_VOICEVOX_ACCELERATION_MODE_GPU as isize,
}
pub struct VoicevoxCore;
impl VoicevoxCore {
pub fn new(opt: InitializeOptions) -> Result<Self, ResultCode> {
let result = unsafe { voicevox_initialize(opt) };
match result {
0 => Ok(Self {}),
e => Err(unsafe { std::mem::transmute(e) }),
}
}
pub fn new_from_options(
acceleration_mode: AccelerationMode,
cpu_num_threads: u16,
load_all_models: bool,
open_jtalk_dict_dir: &std::ffi::CStr,
) -> Result<Self, ResultCode> {
let opt = InitializeOptions {
acceleration_mode: acceleration_mode as i32,
cpu_num_threads,
load_all_models,
open_jtalk_dict_dir: open_jtalk_dict_dir.as_ptr(),
};
Self::new(opt)
}
}
impl Drop for VoicevoxCore {
fn drop(&mut self) {
unsafe { voicevox_finalize() };
}
}
impl VoicevoxCore {
pub fn make_default_initialize_options() -> InitializeOptions {
unsafe { voicevox_make_default_initialize_options() }
}
pub fn make_default_tts_options() -> TtsOptions {
unsafe { voicevox_make_default_tts_options() }
}
pub fn make_default_audio_query_options() -> AudioQueryOptions {
unsafe { voicevox_make_default_audio_query_options() }
}
pub fn make_default_synthesis_options() -> SynthesisOptions {
unsafe { voicevox_make_default_synthesis_options() }
}
pub fn get_version() -> &'static str {
let version_ptr = unsafe { voicevox_get_version() };
let version_cstr = unsafe { std::ffi::CStr::from_ptr(version_ptr) };
version_cstr.to_str().unwrap()
}
pub fn get_metas_json() -> &'static str {
unsafe { CStr::from_ptr(voicevox_get_metas_json()).to_str().unwrap() }
}
pub fn get_supported_devices_json() -> &'static str {
unsafe {
CStr::from_ptr(voicevox_get_supported_devices_json())
.to_str()
.unwrap()
}
}
pub fn load_model(&self, speaker_id: u32) -> Result<(), ResultCode> {
let result_code = unsafe { voicevox_load_model(speaker_id) };
if result_code == ResultCode::Ok as i32 {
Ok(())
} else {
Err(unsafe { std::mem::transmute(result_code) })
}
}
pub fn is_gpu_mode(&self) -> bool {
unsafe { voicevox_is_gpu_mode() }
}
pub fn is_model_loaded(&self, speaker_id: u32) -> bool {
unsafe { voicevox_is_model_loaded(speaker_id) }
}
pub fn predict_duration(
&self,
phoneme_vector: &[i64],
speaker_id: u32,
) -> Result<CPointerWrap<f32>, ResultCode> {
let len = phoneme_vector.len();
let ptr = phoneme_vector.as_ptr() as *mut i64;
let mut data_length: usize = 0;
let mut data_ptr: *mut f32 = std::ptr::null_mut();
let result_code = unsafe {
voicevox_predict_duration(len, ptr, speaker_id, &mut data_length, &mut data_ptr)
};
match result_code {
0 => {
let ptr_wrap = CPointerWrap::new(data_ptr, data_length, |p| unsafe {
voicevox_predict_duration_data_free(p)
});
Ok(ptr_wrap)
}
e => Err(unsafe { std::mem::transmute(e) }),
}
}
#[allow(clippy::too_many_arguments)]
pub fn predict_intonation(
&self,
vowel_phoneme_vector: &[i64],
consonant_phoneme_vector: &[i64],
start_accent_vector: &[i64],
end_accent_vector: &[i64],
start_accent_phrase_vector: &[i64],
end_accent_phrase_vector: &[i64],
speaker_id: u32,
) -> Result<CPointerWrap<f32>, ResultCode> {
let length = vowel_phoneme_vector.len();
let vowel_ptr = vowel_phoneme_vector.as_ptr() as *mut i64;
let consonant_ptr = consonant_phoneme_vector.as_ptr() as *mut i64;
let start_accent_ptr = start_accent_vector.as_ptr() as *mut i64;
let end_accent_ptr = end_accent_vector.as_ptr() as *mut i64;
let start_accent_phrase_ptr = start_accent_phrase_vector.as_ptr() as *mut i64;
let end_accent_phrase_ptr = end_accent_phrase_vector.as_ptr() as *mut i64;
let mut output_predict_intonation_data_length: usize = 0;
let mut output_predict_intonation_data: *mut f32 = std::ptr::null_mut();
let result_code = unsafe {
voicevox_predict_intonation(
length,
vowel_ptr,
consonant_ptr,
start_accent_ptr,
end_accent_ptr,
start_accent_phrase_ptr,
end_accent_phrase_ptr,
speaker_id,
&mut output_predict_intonation_data_length,
&mut output_predict_intonation_data,
)
};
match result_code {
0 => {
let ptr_wrap = CPointerWrap::new(
output_predict_intonation_data,
output_predict_intonation_data_length,
|p| unsafe { voicevox_predict_intonation_data_free(p) },
);
Ok(ptr_wrap)
}
e => Err(unsafe { std::mem::transmute(e) }),
}
}
pub fn decode(
&self,
phoneme_vectors: &[f32],
f0: &[f32],
speaker_id: u32,
) -> Result<CPointerWrap<f32>, ResultCode> {
let phoneme_size = phoneme_vectors.len();
let phoneme_ptr = phoneme_vectors.as_ptr() as *mut f32;
let f0_size = f0.len();
let f0_ptr = f0.as_ptr() as *mut f32;
let mut data_length: usize = 0;
let mut data_ptr: *mut f32 = std::ptr::null_mut();
let result_code = unsafe {
voicevox_decode(
phoneme_size,
f0_size / phoneme_size,
phoneme_ptr,
f0_ptr,
speaker_id,
&mut data_length,
&mut data_ptr,
)
};
match result_code {
0 => {
let ptr_wrap = CPointerWrap::new(data_ptr, data_length, |p| unsafe {
voicevox_decode_data_free(p)
});
Ok(ptr_wrap)
}
e => Err(unsafe { std::mem::transmute(e) }),
}
}
pub fn synthesis(
&self,
audio_query: &str,
speaker_id: u32,
options: SynthesisOptions,
) -> Result<CPointerWrap<u8>, ResultCode> {
let audio_query_c_str = std::ffi::CString::new(audio_query).unwrap();
let mut output_wav_ptr: *mut u8 = std::ptr::null_mut();
let mut output_wav_length: usize = 0;
let result_code = unsafe {
voicevox_synthesis(
audio_query_c_str.as_ptr(),
speaker_id,
options,
&mut output_wav_length,
&mut output_wav_ptr,
)
};
match result_code {
0 => {
let wav = CPointerWrap::<u8>::new(output_wav_ptr, output_wav_length, |p| unsafe {
voicevox_wav_free(p)
});
Ok(wav)
}
e => Err(unsafe { std::mem::transmute(e) }),
}
}
pub fn audio_query(
&self,
text: &str,
speaker_id: u32,
options: AudioQueryOptions,
) -> Result<CStrWrap, ResultCode> {
let c_str = std::ffi::CString::new(text).unwrap();
let mut output_ptr: *mut std::os::raw::c_char = std::ptr::null_mut();
let result_code =
unsafe { voicevox_audio_query(c_str.as_ptr(), speaker_id, options, &mut output_ptr) };
match result_code {
0 => {
let ptr_wrap =
CStrWrap::new(output_ptr, |p| unsafe { voicevox_audio_query_json_free(p) });
Ok(ptr_wrap)
}
e => Err(unsafe { std::mem::transmute(e) }),
}
}
pub fn tts_simple(&self, text: &str, speaker_id: u32) -> Result<CPointerWrap<u8>, ResultCode> {
Self::_tts(text, speaker_id, Self::make_default_tts_options())
}
pub fn tts(
&self,
text: &str,
speaker_id: u32,
options: TtsOptions,
) -> Result<CPointerWrap<u8>, ResultCode> {
Self::_tts(text, speaker_id, options)
}
fn _tts(
text: &str,
speaker_id: u32,
options: TtsOptions,
) -> Result<CPointerWrap<u8>, ResultCode> {
let c_str = std::ffi::CString::new(text).unwrap();
let mut out_length: usize = 0;
let mut out_wav: *mut u8 = std::ptr::null_mut();
let result = unsafe {
voicevox_tts(
c_str.as_ptr(),
speaker_id,
options,
&mut out_length,
&mut out_wav,
)
};
match result {
0 => {
let wav = CPointerWrap::<u8>::new(out_wav, out_length, |p| unsafe {
voicevox_wav_free(p)
});
Ok(wav)
}
e => Err(unsafe { std::mem::transmute(e) }),
}
}
pub fn error_result_to_message(result_code: ResultCode) -> &'static str {
unsafe {
let message = voicevox_error_result_to_message(result_code as i32);
CStr::from_ptr(message).to_str().unwrap()
}
}
}