1#![allow(
18 clippy::missing_panics_doc,
19 clippy::not_unsafe_ptr_arg_deref,
20 clippy::cast_possible_truncation,
21 clippy::cast_possible_wrap,
22 clippy::cast_sign_loss,
23 clippy::ptr_as_ptr,
24 clippy::cast_ptr_alignment,
25 clippy::doc_markdown,
26 clippy::multiple_crate_versions,
27 clippy::field_reassign_with_default,
28 non_camel_case_types,
29 dead_code
30)]
31
32#[cfg(feature = "cloud")]
33mod cloud_engine;
34pub mod engine;
35pub mod factory;
36#[cfg(feature = "sherpaonnx")]
37mod sherpaonnx_engine;
38#[cfg(feature = "system")]
39mod system_engine;
40pub mod types;
41
42use std::ffi::{CStr, CString};
43use std::os::raw::c_char;
44use std::ptr;
45use std::sync::Mutex;
46
47use engine::TtsEngine;
48use factory::create_engine;
49
50type BoxedEngine = Box<dyn TtsEngine>;
51
52pub type CAudioCb = Option<extern "C" fn(*const u8, usize, *mut std::ffi::c_void)>;
54pub type CBoundaryCb = Option<extern "C" fn(*const c_char, f32, f32, *mut std::ffi::c_void)>;
55type BoxedAudioCb = Box<dyn FnMut(&[u8])>;
56type BoxedBoundaryCb = Box<dyn FnMut(&str, f32, f32)>;
57
58pub struct tts_ctx {
59 engine: Mutex<BoxedEngine>,
60 voice_id: Mutex<Option<String>>,
61 rate: Mutex<f32>,
62 pitch: Mutex<f32>,
63 volume: Mutex<f32>,
64 last_error: Mutex<String>,
65 on_audio: Mutex<CAudioCb>,
66 on_audio_userdata: Mutex<*mut std::ffi::c_void>,
67 on_boundary: Mutex<CBoundaryCb>,
68 on_boundary_userdata: Mutex<*mut std::ffi::c_void>,
69}
70
71static LAST_ERROR: Mutex<Option<CString>> = Mutex::new(None);
72
73fn set_error(msg: &str) {
74 if let Ok(mut guard) = LAST_ERROR.lock() {
75 *guard = Some(CString::new(msg).unwrap_or_else(|_| CString::new("error").unwrap()));
76 }
77}
78
79#[no_mangle]
89pub extern "C" fn tts_create(
90 engine_id: *const c_char,
91 credentials_json: *const c_char,
92) -> *mut tts_ctx {
93 if engine_id.is_null() {
94 set_error("engine_id is null");
95 return ptr::null_mut();
96 }
97 let engine_id_str = unsafe { CStr::from_ptr(engine_id) }
98 .to_string_lossy()
99 .into_owned();
100 let creds = if credentials_json.is_null() {
101 String::new()
102 } else {
103 unsafe { CStr::from_ptr(credentials_json) }
104 .to_string_lossy()
105 .into_owned()
106 };
107
108 if let Some(engine) = create_engine(&engine_id_str, &creds) {
109 let ctx = Box::new(tts_ctx {
110 engine: Mutex::new(engine),
111 voice_id: Mutex::new(None),
112 rate: Mutex::new(1.0),
113 pitch: Mutex::new(1.0),
114 volume: Mutex::new(1.0),
115 last_error: Mutex::new(String::new()),
116 on_audio: Mutex::new(None),
117 on_audio_userdata: Mutex::new(ptr::null_mut()),
118 on_boundary: Mutex::new(None),
119 on_boundary_userdata: Mutex::new(ptr::null_mut()),
120 });
121 Box::into_raw(ctx)
122 } else {
123 set_error(&format!("Unknown engine: {engine_id_str}"));
124 ptr::null_mut()
125 }
126}
127
128#[no_mangle]
135pub extern "C" fn tts_destroy(ctx: *mut tts_ctx) {
136 if !ctx.is_null() {
137 unsafe {
138 drop(Box::from_raw(ctx));
139 }
140 }
141}
142
143#[no_mangle]
152pub extern "C" fn tts_speak(ctx: *mut tts_ctx, text: *const c_char) -> i32 {
153 if ctx.is_null() || text.is_null() {
154 return -1;
155 }
156 let ctx_ref = unsafe { &*ctx };
157 let text_str = unsafe { CStr::from_ptr(text) }
158 .to_string_lossy()
159 .into_owned();
160 let voice = ctx_ref.voice_id.lock().unwrap().clone();
161 let rate = *ctx_ref.rate.lock().unwrap();
162 let pitch = *ctx_ref.pitch.lock().unwrap();
163 let volume = *ctx_ref.volume.lock().unwrap();
164
165 let audio_cb = *ctx_ref.on_audio.lock().unwrap();
166 let audio_userdata = *ctx_ref.on_audio_userdata.lock().unwrap();
167 let boundary_cb = *ctx_ref.on_boundary.lock().unwrap();
168 let boundary_userdata = *ctx_ref.on_boundary_userdata.lock().unwrap();
169
170 let mut on_audio_closure: Option<BoxedAudioCb> = match audio_cb {
171 Some(cb) => Some(Box::new(move |bytes: &[u8]| {
172 cb(bytes.as_ptr(), bytes.len(), audio_userdata);
173 })),
174 None => None,
175 };
176
177 let mut on_boundary_closure: Option<BoxedBoundaryCb> = match boundary_cb {
178 Some(cb) => Some(Box::new(move |word: &str, start: f32, end: f32| {
179 if let Ok(c_word) = CString::new(word) {
180 cb(c_word.as_ptr(), start, end, boundary_userdata);
181 }
182 })),
183 None => None,
184 };
185
186 let engine = ctx_ref.engine.lock().unwrap();
187 match engine.speak(
188 &text_str,
189 voice.as_deref(),
190 rate,
191 pitch,
192 volume,
193 on_audio_closure
194 .as_mut()
195 .map(|f| &mut **f as &mut dyn FnMut(&[u8])),
196 on_boundary_closure
197 .as_mut()
198 .map(|f| &mut **f as &mut dyn FnMut(&str, f32, f32)),
199 ) {
200 Ok(()) => 0,
201 Err(e) => {
202 *ctx_ref.last_error.lock().unwrap() = e.to_string();
203 -1
204 }
205 }
206}
207
208#[no_mangle]
217pub extern "C" fn tts_speak_sync(ctx: *mut tts_ctx, text: *const c_char) -> i32 {
218 if ctx.is_null() || text.is_null() {
219 return -1;
220 }
221 let ctx_ref = unsafe { &*ctx };
222 let text_str = unsafe { CStr::from_ptr(text) }
223 .to_string_lossy()
224 .into_owned();
225 let voice = ctx_ref.voice_id.lock().unwrap().clone();
226 let rate = *ctx_ref.rate.lock().unwrap();
227 let pitch = *ctx_ref.pitch.lock().unwrap();
228 let volume = *ctx_ref.volume.lock().unwrap();
229
230 let audio_cb = *ctx_ref.on_audio.lock().unwrap();
231 let audio_userdata = *ctx_ref.on_audio_userdata.lock().unwrap();
232 let boundary_cb = *ctx_ref.on_boundary.lock().unwrap();
233 let boundary_userdata = *ctx_ref.on_boundary_userdata.lock().unwrap();
234
235 let mut on_audio_closure: Option<BoxedAudioCb> = match audio_cb {
236 Some(cb) => Some(Box::new(move |bytes: &[u8]| {
237 cb(bytes.as_ptr(), bytes.len(), audio_userdata);
238 })),
239 None => None,
240 };
241
242 let mut on_boundary_closure: Option<BoxedBoundaryCb> = match boundary_cb {
243 Some(cb) => Some(Box::new(move |word: &str, start: f32, end: f32| {
244 if let Ok(c_word) = CString::new(word) {
245 cb(c_word.as_ptr(), start, end, boundary_userdata);
246 }
247 })),
248 None => None,
249 };
250
251 let engine = ctx_ref.engine.lock().unwrap();
252 match engine.speak_sync(
253 &text_str,
254 voice.as_deref(),
255 rate,
256 pitch,
257 volume,
258 on_audio_closure
259 .as_mut()
260 .map(|f| &mut **f as &mut dyn FnMut(&[u8])),
261 on_boundary_closure
262 .as_mut()
263 .map(|f| &mut **f as &mut dyn FnMut(&str, f32, f32)),
264 ) {
265 Ok(()) => 0,
266 Err(e) => {
267 *ctx_ref.last_error.lock().unwrap() = e.to_string();
268 -1
269 }
270 }
271}
272
273#[no_mangle]
279pub extern "C" fn tts_stop(ctx: *mut tts_ctx) {
280 if ctx.is_null() {
281 return;
282 }
283 let ctx_ref = unsafe { &*ctx };
284 let engine = ctx_ref.engine.lock().unwrap();
285 let _ = engine.stop();
286}
287
288#[no_mangle]
299pub extern "C" fn tts_get_voices(
300 ctx: *mut tts_ctx,
301 out_voices: *mut *mut types::tts_voice,
302 out_count: *mut i32,
303) -> i32 {
304 if ctx.is_null() || out_voices.is_null() || out_count.is_null() {
305 return -1;
306 }
307 let ctx_ref = unsafe { &*ctx };
308 let engine = ctx_ref.engine.lock().unwrap();
309 match engine.get_voices() {
310 Ok(voices) => {
311 let len = voices.len();
312 if len == 0 {
313 unsafe {
314 *out_voices = ptr::null_mut();
315 *out_count = 0;
316 }
317 return 0;
318 }
319 let layout = std::alloc::Layout::array::<types::tts_voice>(len).unwrap();
320 let arr_ptr = unsafe { std::alloc::alloc(layout).cast::<types::tts_voice>() };
321 for (i, v) in voices.iter().enumerate() {
322 unsafe {
323 let entry = arr_ptr.add(i);
324 std::ptr::write(
325 entry,
326 types::tts_voice {
327 id: CString::new(v.id.clone()).unwrap().into_raw(),
328 name: CString::new(v.name.clone()).unwrap().into_raw(),
329 language: CString::new(v.primary_language().to_string())
330 .unwrap()
331 .into_raw(),
332 gender: CString::new(v.gender.to_string()).unwrap().into_raw(),
333 engine: CString::new(v.provider.clone()).unwrap().into_raw(),
334 },
335 );
336 }
337 }
338 unsafe {
339 *out_voices = arr_ptr;
340 *out_count = len as i32;
341 }
342 0
343 }
344 Err(e) => {
345 *ctx_ref.last_error.lock().unwrap() = e.to_string();
346 -1
347 }
348 }
349}
350
351#[no_mangle]
357pub extern "C" fn tts_free_voices(voices: *mut types::tts_voice, count: i32) {
358 if voices.is_null() || count <= 0 {
359 return;
360 }
361 for i in 0..count {
362 unsafe {
363 let v = voices.add(i as usize);
364 if !(*v).id.is_null() {
365 let _ = CString::from_raw((*v).id);
366 }
367 if !(*v).name.is_null() {
368 let _ = CString::from_raw((*v).name);
369 }
370 if !(*v).language.is_null() {
371 let _ = CString::from_raw((*v).language);
372 }
373 if !(*v).gender.is_null() {
374 let _ = CString::from_raw((*v).gender);
375 }
376 if !(*v).engine.is_null() {
377 let _ = CString::from_raw((*v).engine);
378 }
379 }
380 }
381 let layout = std::alloc::Layout::array::<types::tts_voice>(count as usize).unwrap();
382 unsafe {
383 std::alloc::dealloc(voices.cast::<u8>(), layout);
384 }
385}
386
387#[no_mangle]
393pub extern "C" fn tts_set_voice(ctx: *mut tts_ctx, voice_id: *const c_char) {
394 if ctx.is_null() || voice_id.is_null() {
395 return;
396 }
397 let ctx_ref = unsafe { &*ctx };
398 let id = unsafe { CStr::from_ptr(voice_id) }
399 .to_string_lossy()
400 .into_owned();
401 *ctx_ref.voice_id.lock().unwrap() = Some(id);
402}
403
404#[no_mangle]
410pub extern "C" fn tts_set_rate(ctx: *mut tts_ctx, rate: f32) {
411 if ctx.is_null() {
412 return;
413 }
414 *unsafe { &*ctx }.rate.lock().unwrap() = rate;
415}
416
417#[no_mangle]
423pub extern "C" fn tts_set_pitch(ctx: *mut tts_ctx, pitch: f32) {
424 if ctx.is_null() {
425 return;
426 }
427 *unsafe { &*ctx }.pitch.lock().unwrap() = pitch;
428}
429
430#[no_mangle]
436pub extern "C" fn tts_set_volume(ctx: *mut tts_ctx, volume: f32) {
437 if ctx.is_null() {
438 return;
439 }
440 *unsafe { &*ctx }.volume.lock().unwrap() = volume;
441}
442
443#[no_mangle]
448pub extern "C" fn tts_set_on_audio(
449 ctx: *mut tts_ctx,
450 cb: CAudioCb,
451 userdata: *mut std::ffi::c_void,
452) {
453 if ctx.is_null() {
454 return;
455 }
456 let ctx_ref = unsafe { &*ctx };
457 *ctx_ref.on_audio.lock().unwrap() = cb;
458 *ctx_ref.on_audio_userdata.lock().unwrap() = userdata;
459}
460
461#[no_mangle]
466pub extern "C" fn tts_set_on_boundary(
467 ctx: *mut tts_ctx,
468 cb: CBoundaryCb,
469 userdata: *mut std::ffi::c_void,
470) {
471 if ctx.is_null() {
472 return;
473 }
474 let ctx_ref = unsafe { &*ctx };
475 *ctx_ref.on_boundary.lock().unwrap() = cb;
476 *ctx_ref.on_boundary_userdata.lock().unwrap() = userdata;
477}
478
479#[no_mangle]
481pub extern "C" fn tts_get_engine_count() -> i32 {
482 factory::engine_count() as i32
483}
484
485#[no_mangle]
494pub extern "C" fn tts_get_engines(out_engines: *mut types::tts_engine_info) {
495 if out_engines.is_null() {
496 return;
497 }
498 let engines = factory::engine_list();
499 for (i, e) in engines.iter().enumerate() {
500 unsafe {
501 let entry = out_engines.add(i);
502 std::ptr::write(
503 entry,
504 types::tts_engine_info {
505 id: CString::new(e.id.clone()).unwrap().into_raw(),
506 name: CString::new(e.name.clone()).unwrap().into_raw(),
507 needs_credentials: e.needs_credentials,
508 credential_keys_json: CString::new(e.credential_keys_json.clone())
509 .unwrap()
510 .into_raw(),
511 },
512 );
513 }
514 }
515}
516
517#[no_mangle]
523pub extern "C" fn tts_free_engine_info(engines: *mut types::tts_engine_info, count: i32) {
524 if engines.is_null() || count <= 0 {
525 return;
526 }
527 for i in 0..count {
528 unsafe {
529 let e = engines.add(i as usize);
530 if !(*e).id.is_null() {
531 let _ = CString::from_raw((*e).id);
532 }
533 if !(*e).name.is_null() {
534 let _ = CString::from_raw((*e).name);
535 }
536 if !(*e).credential_keys_json.is_null() {
537 let _ = CString::from_raw((*e).credential_keys_json);
538 }
539 }
540 }
541 let layout = std::alloc::Layout::array::<types::tts_engine_info>(count as usize).unwrap();
542 unsafe {
543 std::alloc::dealloc(engines.cast::<u8>(), layout);
544 }
545}
546
547#[no_mangle]
551pub extern "C" fn tts_get_last_error() -> *const c_char {
552 match LAST_ERROR.lock() {
553 Ok(guard) => match guard.as_ref() {
554 Some(cs) => cs.as_ptr(),
555 None => ptr::null(),
556 },
557 Err(_) => ptr::null(),
558 }
559}
560
561#[no_mangle]
566pub extern "C" fn tts_pause(ctx: *mut tts_ctx) {
567 if ctx.is_null() {
568 return;
569 }
570 let ctx_ref = unsafe { &*ctx };
571 let engine = ctx_ref.engine.lock().unwrap();
572 let _ = engine.pause();
573}
574
575#[no_mangle]
580pub extern "C" fn tts_resume(ctx: *mut tts_ctx) {
581 if ctx.is_null() {
582 return;
583 }
584 let ctx_ref = unsafe { &*ctx };
585 let engine = ctx_ref.engine.lock().unwrap();
586 let _ = engine.resume();
587}
588
589#[no_mangle]
597pub extern "C" fn tts_synth_to_bytes(
598 ctx: *mut tts_ctx,
599 text: *const c_char,
600 out_bytes: *mut *mut u8,
601 out_len: *mut usize,
602) -> i32 {
603 if ctx.is_null() || text.is_null() || out_bytes.is_null() || out_len.is_null() {
604 return -1;
605 }
606 let ctx_ref = unsafe { &*ctx };
607 let text_str = unsafe { CStr::from_ptr(text) }
608 .to_string_lossy()
609 .into_owned();
610 let voice = ctx_ref.voice_id.lock().unwrap().clone();
611 let rate = *ctx_ref.rate.lock().unwrap();
612 let pitch = *ctx_ref.pitch.lock().unwrap();
613 let volume = *ctx_ref.volume.lock().unwrap();
614
615 let engine = ctx_ref.engine.lock().unwrap();
616 match engine.synth_to_bytes(&text_str, voice.as_deref(), rate, pitch, volume) {
617 Ok(data) => {
618 if data.is_empty() {
619 unsafe {
620 *out_bytes = ptr::null_mut();
621 *out_len = 0;
622 }
623 return 0;
624 }
625 let len = data.len();
626 let layout = std::alloc::Layout::array::<u8>(len).unwrap();
627 let ptr = unsafe { std::alloc::alloc(layout) };
628 unsafe {
629 ptr::copy_nonoverlapping(data.as_ptr(), ptr, len);
630 *out_bytes = ptr;
631 *out_len = len;
632 }
633 0
634 }
635 Err(e) => {
636 *ctx_ref.last_error.lock().unwrap() = e.to_string();
637 -1
638 }
639 }
640}
641
642#[no_mangle]
647pub extern "C" fn tts_free_bytes(bytes: *mut u8, len: usize) {
648 if bytes.is_null() || len == 0 {
649 return;
650 }
651 let layout = std::alloc::Layout::array::<u8>(len).unwrap();
652 unsafe {
653 std::alloc::dealloc(bytes, layout);
654 }
655}