use serde::de::DeserializeOwned;
use std::sync::{Arc, Mutex, RwLock};
use std::time::{Duration, Instant};
use tauri::{plugin::PluginApi, AppHandle, Emitter, Runtime};
use tts::{Features, Tts as TtsEngine};
use crate::models::*;
struct VoiceCache {
voices: Vec<Voice>,
cached_at: Instant,
}
impl VoiceCache {
const TTL: Duration = Duration::from_secs(60);
fn new(voices: Vec<Voice>) -> Self {
Self {
voices,
cached_at: Instant::now(),
}
}
fn is_valid(&self) -> bool {
self.cached_at.elapsed() < Self::TTL
}
}
struct EventEmitter<R: Runtime> {
app: AppHandle<R>,
}
impl<R: Runtime> EventEmitter<R> {
fn emit(&self, event_name: &str, event: TtsEventPayload) {
let full_event_name = format!("tts://{}", event_name);
if let Err(e) = self.app.emit(&full_event_name, event) {
log::warn!("Failed to emit TTS event '{}': {}", event_name, e);
}
}
}
fn normalize_rate_for_platform(engine: &TtsEngine, user_rate: f32) -> f32 {
let normal = engine.normal_rate();
let min = engine.min_rate();
let max = engine.max_rate();
if user_rate <= 1.0 {
let t = (user_rate - 0.25) / 0.75; let t = t.clamp(0.0, 1.0);
min + t * (normal - min)
} else {
let t = (user_rate - 1.0) / 3.0; let t = t.clamp(0.0, 1.0);
normal + t * (max - normal)
}
}
pub fn init<R: Runtime, C: DeserializeOwned>(
app: &AppHandle<R>,
_api: PluginApi<R, C>,
) -> crate::Result<Tts<R>> {
let engine = TtsEngine::default().map_err(|e| {
#[cfg(target_os = "linux")]
{
let err_msg = e.to_string();
if err_msg.contains("speech-dispatcher") || err_msg.contains("Speech Dispatcher") {
return crate::Error::OperationFailed(
"Speech Dispatcher not available. Please install it:\n\
Ubuntu/Debian: sudo apt install speech-dispatcher\n\
Fedora: sudo dnf install speech-dispatcher\n\
Arch: sudo pacman -S speech-dispatcher"
.to_string(),
);
}
}
crate::Error::from(e)
})?;
let Features {
utterance_callbacks,
..
} = engine.supported_features();
let emitter = Arc::new(EventEmitter { app: app.clone() });
let current_utterance_id: Arc<Mutex<Option<String>>> = Arc::new(Mutex::new(None));
if utterance_callbacks {
let end_emitter = Arc::clone(&emitter);
let stop_emitter = Arc::clone(&emitter);
let end_id = Arc::clone(¤t_utterance_id);
let stop_id = Arc::clone(¤t_utterance_id);
if let Err(e) = engine.on_utterance_end(Some(Box::new(move |_utterance_id| {
let id = end_id.lock().ok().and_then(|g| g.clone());
end_emitter.emit(
"speech:finish",
TtsEventPayload {
event_type: "speech:finish".to_string(),
id,
..Default::default()
},
);
}))) {
log::warn!("Failed to set on_utterance_end callback: {:?}", e);
}
if let Err(e) = engine.on_utterance_stop(Some(Box::new(move |_utterance_id| {
let id = stop_id.lock().ok().and_then(|g| g.clone());
stop_emitter.emit(
"speech:cancel",
TtsEventPayload {
event_type: "speech:cancel".to_string(),
id,
..Default::default()
},
);
}))) {
log::warn!("Failed to set on_utterance_stop callback: {:?}", e);
}
log::info!("TTS utterance callbacks enabled for speech:finish events");
} else {
log::warn!("TTS engine does not support utterance callbacks - speech:finish events will not be emitted");
}
Ok(Tts {
app: app.clone(),
engine: Mutex::new(engine),
voice_cache: RwLock::new(None),
has_utterance_callbacks: utterance_callbacks,
current_utterance_id,
})
}
pub struct Tts<R: Runtime> {
app: AppHandle<R>,
engine: Mutex<TtsEngine>,
voice_cache: RwLock<Option<VoiceCache>>,
has_utterance_callbacks: bool,
current_utterance_id: Arc<Mutex<Option<String>>>,
}
impl<R: Runtime> Tts<R> {
fn with_engine<T, F>(&self, f: F) -> crate::Result<T>
where
F: FnOnce(&mut TtsEngine) -> crate::Result<T>,
{
let mut engine = self
.engine
.lock()
.map_err(|_| crate::Error::MutexPoisoned)?;
f(&mut engine)
}
fn emit_event(&self, event_name: &str, event: TtsEventPayload) {
let full_event_name = format!("tts://{}", event_name);
if let Err(e) = self.app.emit(&full_event_name, event) {
log::warn!("Failed to emit TTS event '{}': {}", event_name, e);
}
}
pub fn speak(&self, payload: SpeakRequest) -> crate::Result<SpeakResponse> {
let validated = payload.validate()?;
let utterance_id = uuid::Uuid::now_v7().to_string();
if let Ok(mut guard) = self.current_utterance_id.lock() {
*guard = Some(utterance_id.clone());
}
let result = self.with_engine(|engine| {
if let Some(ref voice_id) = validated.voice_id {
if let Ok(voices) = engine.voices() {
if let Some(voice) = voices.into_iter().find(|v| v.id() == *voice_id) {
let _ = engine.set_voice(&voice);
}
}
}
let all_defaults =
validated.rate == 1.0 && validated.pitch == 1.0 && validated.volume == 1.0;
if !all_defaults {
if validated.rate != 1.0 {
let rate_to_set = normalize_rate_for_platform(engine, validated.rate);
let _ = engine.set_rate(rate_to_set);
}
if validated.pitch != 1.0 {
let _ = engine.set_pitch(validated.pitch);
}
if validated.volume != 1.0 {
let _ = engine.set_volume(validated.volume);
}
}
let interrupt = validated.queue_mode != QueueMode::Add;
engine.speak(&validated.text, interrupt)?;
Ok(SpeakResponse {
success: true,
warning: None,
})
});
if result.is_ok() {
self.emit_event(
"speech:start",
TtsEventPayload {
event_type: "speech:start".to_string(),
id: Some(utterance_id),
..Default::default()
},
);
}
result
}
pub fn stop(&self) -> crate::Result<StopResponse> {
self.with_engine(|engine| {
engine.stop()?;
Ok(())
})?;
if !self.has_utterance_callbacks {
self.emit_event(
"speech:cancel",
TtsEventPayload {
event_type: "speech:cancel".to_string(),
..Default::default()
},
);
}
Ok(StopResponse { success: true })
}
pub fn get_voices(&self, payload: GetVoicesRequest) -> crate::Result<GetVoicesResponse> {
{
let cache = self
.voice_cache
.read()
.map_err(|_| crate::Error::MutexPoisoned)?;
if let Some(ref c) = *cache {
if c.is_valid() {
return Ok(self.filter_voices(&c.voices, &payload.language));
}
}
}
let voices = self.with_engine(|engine| {
let native_voices = engine.voices()?;
Ok(native_voices
.into_iter()
.map(|v| Voice {
id: v.id().to_string(),
name: v.name().to_string(),
language: v.language().to_string(),
})
.collect::<Vec<Voice>>())
})?;
{
let mut cache = self
.voice_cache
.write()
.map_err(|_| crate::Error::MutexPoisoned)?;
*cache = Some(VoiceCache::new(voices.clone()));
}
Ok(self.filter_voices(&voices, &payload.language))
}
fn filter_voices(&self, voices: &[Voice], language: &Option<String>) -> GetVoicesResponse {
let filtered: Vec<Voice> = voices
.iter()
.filter(|v| {
if let Some(ref lang_filter) = language {
v.language
.to_lowercase()
.contains(&lang_filter.to_lowercase())
} else {
true
}
})
.cloned()
.collect();
GetVoicesResponse { voices: filtered }
}
pub fn is_speaking(&self) -> crate::Result<IsSpeakingResponse> {
self.with_engine(|engine| {
let speaking = engine.is_speaking()?;
Ok(IsSpeakingResponse { speaking })
})
}
pub fn is_initialized(&self) -> crate::Result<IsInitializedResponse> {
let voice_count = self
.get_voices(GetVoicesRequest { language: None })
.map(|r| r.voices.len() as u32)
.unwrap_or(0);
Ok(IsInitializedResponse {
initialized: true,
voice_count,
})
}
pub fn pause_speaking(&self) -> crate::Result<PauseResumeResponse> {
Ok(PauseResumeResponse {
success: false,
reason: Some("Pause is not supported on desktop platform".to_string()),
})
}
pub fn resume_speaking(&self) -> crate::Result<PauseResumeResponse> {
Ok(PauseResumeResponse {
success: false,
reason: Some("Resume is not supported on desktop platform".to_string()),
})
}
pub fn preview_voice(&self, payload: PreviewVoiceRequest) -> crate::Result<SpeakResponse> {
payload.validate()?;
let speak_request = SpeakRequest {
text: payload.sample_text().into_owned(),
language: None,
voice_id: Some(payload.voice_id),
rate: 1.0,
pitch: 1.0,
volume: 1.0,
queue_mode: QueueMode::Flush,
};
self.speak(speak_request)
}
pub fn set_background_behavior(
&self,
_payload: SetBackgroundBehaviorRequest,
) -> crate::Result<SetBackgroundBehaviorResponse> {
Ok(SetBackgroundBehaviorResponse { success: true })
}
}