tts/
lib.rs

1//! * a Text-To-Speech (TTS) library providing high-level interfaces to a variety of backends.
2//!  * Currently supported backends are:
3//!  * * Windows
4//!  *   * Screen readers/SAPI via Tolk (requires `tolk` Cargo feature)
5//!  *   * WinRT
6//!  * * Linux via [Speech Dispatcher](https://freebsoft.org/speechd)
7//!  * * MacOS/iOS
8//!  *   * AppKit on MacOS 10.13 and below
9//!  *   * AVFoundation on MacOS 10.14 and above, and iOS
10//!  * * Android
11//!  * * WebAssembly
12
13use std::collections::HashMap;
14#[cfg(target_os = "macos")]
15use std::ffi::CStr;
16use std::fmt;
17use std::rc::Rc;
18#[cfg(windows)]
19use std::string::FromUtf16Error;
20use std::sync::Mutex;
21use std::{boxed::Box, sync::RwLock};
22
23#[cfg(any(target_os = "macos", target_os = "ios"))]
24use cocoa_foundation::base::id;
25use dyn_clonable::*;
26use lazy_static::lazy_static;
27#[cfg(target_os = "macos")]
28use libc::c_char;
29#[cfg(target_os = "macos")]
30use objc::{class, msg_send, sel, sel_impl};
31pub use oxilangtag::LanguageTag;
32#[cfg(target_os = "linux")]
33use speech_dispatcher::Error as SpeechDispatcherError;
34use thiserror::Error;
35#[cfg(all(windows, feature = "tolk"))]
36use tolk::Tolk;
37
38mod backends;
39
40#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, PartialOrd, Ord)]
41#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
42pub enum Backends {
43    #[cfg(target_os = "android")]
44    Android,
45    #[cfg(target_os = "macos")]
46    AppKit,
47    #[cfg(any(target_os = "macos", target_os = "ios"))]
48    AvFoundation,
49    #[cfg(target_os = "linux")]
50    SpeechDispatcher,
51    #[cfg(all(windows, feature = "tolk"))]
52    Tolk,
53    #[cfg(target_arch = "wasm32")]
54    Web,
55    #[cfg(windows)]
56    WinRt,
57}
58
59impl fmt::Display for Backends {
60    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
61        match self {
62            #[cfg(target_os = "android")]
63            Backends::Android => writeln!(f, "Android"),
64            #[cfg(target_os = "macos")]
65            Backends::AppKit => writeln!(f, "AppKit"),
66            #[cfg(any(target_os = "macos", target_os = "ios"))]
67            Backends::AvFoundation => writeln!(f, "AVFoundation"),
68            #[cfg(target_os = "linux")]
69            Backends::SpeechDispatcher => writeln!(f, "Speech Dispatcher"),
70            #[cfg(all(windows, feature = "tolk"))]
71            Backends::Tolk => writeln!(f, "Tolk"),
72            #[cfg(target_arch = "wasm32")]
73            Backends::Web => writeln!(f, "Web"),
74            #[cfg(windows)]
75            Backends::WinRt => writeln!(f, "Windows Runtime"),
76        }
77    }
78}
79
80#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, PartialOrd, Ord)]
81#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
82pub enum BackendId {
83    #[cfg(target_os = "android")]
84    Android(u64),
85    #[cfg(any(target_os = "macos", target_os = "ios"))]
86    AvFoundation(u64),
87    #[cfg(target_os = "linux")]
88    SpeechDispatcher(usize),
89    #[cfg(target_arch = "wasm32")]
90    Web(u64),
91    #[cfg(windows)]
92    WinRt(u64),
93}
94
95impl fmt::Display for BackendId {
96    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
97        match self {
98            #[cfg(target_os = "android")]
99            BackendId::Android(id) => writeln!(f, "Android({id})"),
100            #[cfg(any(target_os = "macos", target_os = "ios"))]
101            BackendId::AvFoundation(id) => writeln!(f, "AvFoundation({id})"),
102            #[cfg(target_os = "linux")]
103            BackendId::SpeechDispatcher(id) => writeln!(f, "SpeechDispatcher({id})"),
104            #[cfg(target_arch = "wasm32")]
105            BackendId::Web(id) => writeln!(f, "Web({id})"),
106            #[cfg(windows)]
107            BackendId::WinRt(id) => writeln!(f, "WinRT({id})"),
108        }
109    }
110}
111
112// # Note
113//
114// Most trait implementations are blocked by cocoa_foundation::base::id;
115// which is a type alias for objc::runtime::Object, which only implements Debug.
116#[derive(Debug)]
117#[cfg_attr(
118    not(any(target_os = "macos", target_os = "ios")),
119    derive(Clone, Copy, Eq, Hash, PartialEq, PartialOrd, Ord)
120)]
121#[cfg_attr(
122    all(feature = "serde", not(any(target_os = "macos", target_os = "ios"))),
123    derive(serde::Serialize, serde::Deserialize)
124)]
125pub enum UtteranceId {
126    #[cfg(target_os = "android")]
127    Android(u64),
128    #[cfg(any(target_os = "macos", target_os = "ios"))]
129    AvFoundation(id),
130    #[cfg(target_os = "linux")]
131    SpeechDispatcher(u64),
132    #[cfg(target_arch = "wasm32")]
133    Web(u64),
134    #[cfg(windows)]
135    WinRt(u64),
136}
137
138// # Note
139//
140// Display is not implemented by cocoa_foundation::base::id;
141// which is a type alias for objc::runtime::Object, which only implements Debug.
142#[cfg(not(any(target_os = "macos", target_os = "ios")))]
143impl fmt::Display for UtteranceId {
144    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
145        match self {
146            #[cfg(target_os = "android")]
147            UtteranceId::Android(id) => writeln!(f, "Android({id})"),
148            #[cfg(target_os = "linux")]
149            UtteranceId::SpeechDispatcher(id) => writeln!(f, "SpeechDispatcher({id})"),
150            #[cfg(target_arch = "wasm32")]
151            UtteranceId::Web(id) => writeln!(f, "Web({})", id),
152            #[cfg(windows)]
153            UtteranceId::WinRt(id) => writeln!(f, "WinRt({id})"),
154        }
155    }
156}
157
158unsafe impl Send for UtteranceId {}
159
160unsafe impl Sync for UtteranceId {}
161
162#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq, PartialOrd, Ord)]
163#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
164pub struct Features {
165    pub is_speaking: bool,
166    pub pitch: bool,
167    pub rate: bool,
168    pub stop: bool,
169    pub utterance_callbacks: bool,
170    pub voice: bool,
171    pub get_voice: bool,
172    pub volume: bool,
173}
174
175impl fmt::Display for Features {
176    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
177        writeln!(f, "{self:#?}")
178    }
179}
180
181impl Features {
182    pub fn new() -> Self {
183        Self::default()
184    }
185}
186
187#[derive(Debug, Error)]
188pub enum Error {
189    #[error("IO error: {0}")]
190    Io(#[from] std::io::Error),
191    #[error("Value not received")]
192    NoneError,
193    #[error("Operation failed")]
194    OperationFailed,
195    #[cfg(target_arch = "wasm32")]
196    #[error("JavaScript error: [0]")]
197    JavaScriptError(wasm_bindgen::JsValue),
198    #[cfg(target_os = "linux")]
199    #[error("Speech Dispatcher error: {0}")]
200    SpeechDispatcher(#[from] SpeechDispatcherError),
201    #[cfg(windows)]
202    #[error("WinRT error")]
203    WinRt(windows::core::Error),
204    #[cfg(windows)]
205    #[error("UTF string conversion failed")]
206    UtfStringConversionFailed(#[from] FromUtf16Error),
207    #[error("Unsupported feature")]
208    UnsupportedFeature,
209    #[error("Out of range")]
210    OutOfRange,
211    #[cfg(target_os = "android")]
212    #[error("JNI error: [0])]")]
213    JNI(#[from] jni::errors::Error),
214}
215
216#[clonable]
217pub trait Backend: Clone {
218    fn id(&self) -> Option<BackendId>;
219    fn supported_features(&self) -> Features;
220    fn speak(&mut self, text: &str, interrupt: bool) -> Result<Option<UtteranceId>, Error>;
221    fn stop(&mut self) -> Result<(), Error>;
222    fn min_rate(&self) -> f32;
223    fn max_rate(&self) -> f32;
224    fn normal_rate(&self) -> f32;
225    fn get_rate(&self) -> Result<f32, Error>;
226    fn set_rate(&mut self, rate: f32) -> Result<(), Error>;
227    fn min_pitch(&self) -> f32;
228    fn max_pitch(&self) -> f32;
229    fn normal_pitch(&self) -> f32;
230    fn get_pitch(&self) -> Result<f32, Error>;
231    fn set_pitch(&mut self, pitch: f32) -> Result<(), Error>;
232    fn min_volume(&self) -> f32;
233    fn max_volume(&self) -> f32;
234    fn normal_volume(&self) -> f32;
235    fn get_volume(&self) -> Result<f32, Error>;
236    fn set_volume(&mut self, volume: f32) -> Result<(), Error>;
237    fn is_speaking(&self) -> Result<bool, Error>;
238    fn voices(&self) -> Result<Vec<Voice>, Error>;
239    fn voice(&self) -> Result<Option<Voice>, Error>;
240    fn set_voice(&mut self, voice: &Voice) -> Result<(), Error>;
241}
242
243#[derive(Default)]
244struct Callbacks {
245    utterance_begin: Option<Box<dyn FnMut(UtteranceId)>>,
246    utterance_end: Option<Box<dyn FnMut(UtteranceId)>>,
247    utterance_stop: Option<Box<dyn FnMut(UtteranceId)>>,
248}
249
250unsafe impl Send for Callbacks {}
251
252unsafe impl Sync for Callbacks {}
253
254lazy_static! {
255    static ref CALLBACKS: Mutex<HashMap<BackendId, Callbacks>> = {
256        let m: HashMap<BackendId, Callbacks> = HashMap::new();
257        Mutex::new(m)
258    };
259}
260
261#[derive(Clone)]
262pub struct Tts(Rc<RwLock<Box<dyn Backend>>>);
263
264unsafe impl Send for Tts {}
265
266unsafe impl Sync for Tts {}
267
268impl Tts {
269    /// Create a new `TTS` instance with the specified backend.
270    pub fn new(backend: Backends) -> Result<Tts, Error> {
271        let backend = match backend {
272            #[cfg(target_os = "linux")]
273            Backends::SpeechDispatcher => {
274                let tts = backends::SpeechDispatcher::new()?;
275                Ok(Tts(Rc::new(RwLock::new(Box::new(tts)))))
276            }
277            #[cfg(target_arch = "wasm32")]
278            Backends::Web => {
279                let tts = backends::Web::new()?;
280                Ok(Tts(Rc::new(RwLock::new(Box::new(tts)))))
281            }
282            #[cfg(all(windows, feature = "tolk"))]
283            Backends::Tolk => {
284                let tts = backends::Tolk::new();
285                if let Some(tts) = tts {
286                    Ok(Tts(Rc::new(RwLock::new(Box::new(tts)))))
287                } else {
288                    Err(Error::NoneError)
289                }
290            }
291            #[cfg(windows)]
292            Backends::WinRt => {
293                let tts = backends::WinRt::new()?;
294                Ok(Tts(Rc::new(RwLock::new(Box::new(tts)))))
295            }
296            #[cfg(target_os = "macos")]
297            Backends::AppKit => Ok(Tts(Rc::new(RwLock::new(
298                Box::new(backends::AppKit::new()?),
299            )))),
300            #[cfg(any(target_os = "macos", target_os = "ios"))]
301            Backends::AvFoundation => Ok(Tts(Rc::new(RwLock::new(Box::new(
302                backends::AvFoundation::new()?,
303            ))))),
304            #[cfg(target_os = "android")]
305            Backends::Android => {
306                let tts = backends::Android::new()?;
307                Ok(Tts(Rc::new(RwLock::new(Box::new(tts)))))
308            }
309        };
310        if let Ok(backend) = backend {
311            if let Some(id) = backend.0.read().unwrap().id() {
312                let mut callbacks = CALLBACKS.lock().unwrap();
313                callbacks.insert(id, Callbacks::default());
314            }
315            Ok(backend)
316        } else {
317            backend
318        }
319    }
320
321    #[allow(clippy::should_implement_trait)]
322    pub fn default() -> Result<Tts, Error> {
323        #[cfg(target_os = "linux")]
324        let tts = Tts::new(Backends::SpeechDispatcher);
325        #[cfg(all(windows, feature = "tolk"))]
326        let tts = if let Ok(tts) = Tts::new(Backends::Tolk) {
327            Ok(tts)
328        } else {
329            Tts::new(Backends::WinRt)
330        };
331        #[cfg(all(windows, not(feature = "tolk")))]
332        let tts = Tts::new(Backends::WinRt);
333        #[cfg(target_arch = "wasm32")]
334        let tts = Tts::new(Backends::Web);
335        #[cfg(target_os = "macos")]
336        let tts = unsafe {
337            // Needed because the Rust NSProcessInfo structs report bogus values, and I don't want to pull in a full bindgen stack.
338            let pi: id = msg_send![class!(NSProcessInfo), new];
339            let version: id = msg_send![pi, operatingSystemVersionString];
340            let str: *const c_char = msg_send![version, UTF8String];
341            let str = CStr::from_ptr(str);
342            let str = str.to_string_lossy();
343            let version: Vec<&str> = str.split(' ').collect();
344            let version = version[1];
345            let version_parts: Vec<&str> = version.split('.').collect();
346            let major_version: i8 = version_parts[0].parse().unwrap();
347            let minor_version: i8 = version_parts[1].parse().unwrap();
348            if major_version >= 11 || minor_version >= 14 {
349                Tts::new(Backends::AvFoundation)
350            } else {
351                Tts::new(Backends::AppKit)
352            }
353        };
354        #[cfg(target_os = "ios")]
355        let tts = Tts::new(Backends::AvFoundation);
356        #[cfg(target_os = "android")]
357        let tts = Tts::new(Backends::Android);
358        tts
359    }
360
361    /// Returns the features supported by this TTS engine
362    pub fn supported_features(&self) -> Features {
363        self.0.read().unwrap().supported_features()
364    }
365
366    /// Speaks the specified text, optionally interrupting current speech.
367    pub fn speak<S: Into<String>>(
368        &mut self,
369        text: S,
370        interrupt: bool,
371    ) -> Result<Option<UtteranceId>, Error> {
372        self.0
373            .write()
374            .unwrap()
375            .speak(text.into().as_str(), interrupt)
376    }
377
378    /// Stops current speech.
379    pub fn stop(&mut self) -> Result<&Self, Error> {
380        let Features { stop, .. } = self.supported_features();
381        if stop {
382            self.0.write().unwrap().stop()?;
383            Ok(self)
384        } else {
385            Err(Error::UnsupportedFeature)
386        }
387    }
388
389    /// Returns the minimum rate for this speech synthesizer.
390    pub fn min_rate(&self) -> f32 {
391        self.0.read().unwrap().min_rate()
392    }
393
394    /// Returns the maximum rate for this speech synthesizer.
395    pub fn max_rate(&self) -> f32 {
396        self.0.read().unwrap().max_rate()
397    }
398
399    /// Returns the normal rate for this speech synthesizer.
400    pub fn normal_rate(&self) -> f32 {
401        self.0.read().unwrap().normal_rate()
402    }
403
404    /// Gets the current speech rate.
405    pub fn get_rate(&self) -> Result<f32, Error> {
406        let Features { rate, .. } = self.supported_features();
407        if rate {
408            self.0.read().unwrap().get_rate()
409        } else {
410            Err(Error::UnsupportedFeature)
411        }
412    }
413
414    /// Sets the desired speech rate.
415    pub fn set_rate(&mut self, rate: f32) -> Result<&Self, Error> {
416        let Features {
417            rate: rate_feature, ..
418        } = self.supported_features();
419        if rate_feature {
420            let mut backend = self.0.write().unwrap();
421            if rate < backend.min_rate() || rate > backend.max_rate() {
422                Err(Error::OutOfRange)
423            } else {
424                backend.set_rate(rate)?;
425                Ok(self)
426            }
427        } else {
428            Err(Error::UnsupportedFeature)
429        }
430    }
431
432    /// Returns the minimum pitch for this speech synthesizer.
433    pub fn min_pitch(&self) -> f32 {
434        self.0.read().unwrap().min_pitch()
435    }
436
437    /// Returns the maximum pitch for this speech synthesizer.
438    pub fn max_pitch(&self) -> f32 {
439        self.0.read().unwrap().max_pitch()
440    }
441
442    /// Returns the normal pitch for this speech synthesizer.
443    pub fn normal_pitch(&self) -> f32 {
444        self.0.read().unwrap().normal_pitch()
445    }
446
447    /// Gets the current speech pitch.
448    pub fn get_pitch(&self) -> Result<f32, Error> {
449        let Features { pitch, .. } = self.supported_features();
450        if pitch {
451            self.0.read().unwrap().get_pitch()
452        } else {
453            Err(Error::UnsupportedFeature)
454        }
455    }
456
457    /// Sets the desired speech pitch.
458    pub fn set_pitch(&mut self, pitch: f32) -> Result<&Self, Error> {
459        let Features {
460            pitch: pitch_feature,
461            ..
462        } = self.supported_features();
463        if pitch_feature {
464            let mut backend = self.0.write().unwrap();
465            if pitch < backend.min_pitch() || pitch > backend.max_pitch() {
466                Err(Error::OutOfRange)
467            } else {
468                backend.set_pitch(pitch)?;
469                Ok(self)
470            }
471        } else {
472            Err(Error::UnsupportedFeature)
473        }
474    }
475
476    /// Returns the minimum volume for this speech synthesizer.
477    pub fn min_volume(&self) -> f32 {
478        self.0.read().unwrap().min_volume()
479    }
480
481    /// Returns the maximum volume for this speech synthesizer.
482    pub fn max_volume(&self) -> f32 {
483        self.0.read().unwrap().max_volume()
484    }
485
486    /// Returns the normal volume for this speech synthesizer.
487    pub fn normal_volume(&self) -> f32 {
488        self.0.read().unwrap().normal_volume()
489    }
490
491    /// Gets the current speech volume.
492    pub fn get_volume(&self) -> Result<f32, Error> {
493        let Features { volume, .. } = self.supported_features();
494        if volume {
495            self.0.read().unwrap().get_volume()
496        } else {
497            Err(Error::UnsupportedFeature)
498        }
499    }
500
501    /// Sets the desired speech volume.
502    pub fn set_volume(&mut self, volume: f32) -> Result<&Self, Error> {
503        let Features {
504            volume: volume_feature,
505            ..
506        } = self.supported_features();
507        if volume_feature {
508            let mut backend = self.0.write().unwrap();
509            if volume < backend.min_volume() || volume > backend.max_volume() {
510                Err(Error::OutOfRange)
511            } else {
512                backend.set_volume(volume)?;
513                Ok(self)
514            }
515        } else {
516            Err(Error::UnsupportedFeature)
517        }
518    }
519
520    /// Returns whether this speech synthesizer is speaking.
521    pub fn is_speaking(&self) -> Result<bool, Error> {
522        let Features { is_speaking, .. } = self.supported_features();
523        if is_speaking {
524            self.0.read().unwrap().is_speaking()
525        } else {
526            Err(Error::UnsupportedFeature)
527        }
528    }
529
530    /// Returns list of available voices.
531    pub fn voices(&self) -> Result<Vec<Voice>, Error> {
532        let Features { voice, .. } = self.supported_features();
533        if voice {
534            self.0.read().unwrap().voices()
535        } else {
536            Err(Error::UnsupportedFeature)
537        }
538    }
539
540    /// Return the current speaking voice.
541    pub fn voice(&self) -> Result<Option<Voice>, Error> {
542        let Features { get_voice, .. } = self.supported_features();
543        if get_voice {
544            self.0.read().unwrap().voice()
545        } else {
546            Err(Error::UnsupportedFeature)
547        }
548    }
549
550    /// Set speaking voice.
551    pub fn set_voice(&mut self, voice: &Voice) -> Result<(), Error> {
552        let Features {
553            voice: voice_feature,
554            ..
555        } = self.supported_features();
556        if voice_feature {
557            self.0.write().unwrap().set_voice(voice)
558        } else {
559            Err(Error::UnsupportedFeature)
560        }
561    }
562
563    /// Called when this speech synthesizer begins speaking an utterance.
564    pub fn on_utterance_begin(
565        &self,
566        callback: Option<Box<dyn FnMut(UtteranceId)>>,
567    ) -> Result<(), Error> {
568        let Features {
569            utterance_callbacks,
570            ..
571        } = self.supported_features();
572        if utterance_callbacks {
573            let mut callbacks = CALLBACKS.lock().unwrap();
574            let id = self.0.read().unwrap().id().unwrap();
575            let callbacks = callbacks.get_mut(&id).unwrap();
576            callbacks.utterance_begin = callback;
577            Ok(())
578        } else {
579            Err(Error::UnsupportedFeature)
580        }
581    }
582
583    /// Called when this speech synthesizer finishes speaking an utterance.
584    pub fn on_utterance_end(
585        &self,
586        callback: Option<Box<dyn FnMut(UtteranceId)>>,
587    ) -> Result<(), Error> {
588        let Features {
589            utterance_callbacks,
590            ..
591        } = self.supported_features();
592        if utterance_callbacks {
593            let mut callbacks = CALLBACKS.lock().unwrap();
594            let id = self.0.read().unwrap().id().unwrap();
595            let callbacks = callbacks.get_mut(&id).unwrap();
596            callbacks.utterance_end = callback;
597            Ok(())
598        } else {
599            Err(Error::UnsupportedFeature)
600        }
601    }
602
603    /// Called when this speech synthesizer is stopped and still has utterances in its queue.
604    pub fn on_utterance_stop(
605        &self,
606        callback: Option<Box<dyn FnMut(UtteranceId)>>,
607    ) -> Result<(), Error> {
608        let Features {
609            utterance_callbacks,
610            ..
611        } = self.supported_features();
612        if utterance_callbacks {
613            let mut callbacks = CALLBACKS.lock().unwrap();
614            let id = self.0.read().unwrap().id().unwrap();
615            let callbacks = callbacks.get_mut(&id).unwrap();
616            callbacks.utterance_stop = callback;
617            Ok(())
618        } else {
619            Err(Error::UnsupportedFeature)
620        }
621    }
622
623    /*
624     * Returns `true` if a screen reader is available to provide speech.
625     */
626    #[allow(unreachable_code)]
627    pub fn screen_reader_available() -> bool {
628        #[cfg(target_os = "windows")]
629        {
630            #[cfg(feature = "tolk")]
631            {
632                let tolk = Tolk::new();
633                return tolk.detect_screen_reader().is_some();
634            }
635            #[cfg(not(feature = "tolk"))]
636            return false;
637        }
638        false
639    }
640}
641
642impl Drop for Tts {
643    fn drop(&mut self) {
644        if Rc::strong_count(&self.0) <= 1 {
645            if let Some(id) = self.0.read().unwrap().id() {
646                let mut callbacks = CALLBACKS.lock().unwrap();
647                callbacks.remove(&id);
648            }
649        }
650    }
651}
652
653#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
654pub enum Gender {
655    Male,
656    Female,
657}
658
659#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
660pub struct Voice {
661    pub(crate) id: String,
662    pub(crate) name: String,
663    pub(crate) gender: Option<Gender>,
664    pub(crate) language: LanguageTag<String>,
665}
666
667impl Voice {
668    pub fn id(&self) -> String {
669        self.id.clone()
670    }
671
672    pub fn name(&self) -> String {
673        self.name.clone()
674    }
675
676    pub fn gender(&self) -> Option<Gender> {
677        self.gender
678    }
679
680    pub fn language(&self) -> LanguageTag<String> {
681        self.language.clone()
682    }
683}