use std::collections::HashMap;
use std::fmt;
use std::os::raw::c_char;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Gender {
Male,
Female,
Unknown,
}
impl fmt::Display for Gender {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Male => write!(f, "Male"),
Self::Female => write!(f, "Female"),
Self::Unknown => write!(f, "Unknown"),
}
}
}
#[must_use]
pub fn normalize_gender(value: &str) -> Gender {
match value.to_lowercase().as_str() {
"female" => Gender::Female,
"male" => Gender::Male,
_ => Gender::Unknown,
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct LanguageCode {
pub bcp47: String,
pub iso639_3: String,
pub display: String,
}
#[derive(Debug, Clone)]
pub struct Voice {
pub id: String,
pub name: String,
pub gender: Gender,
pub provider: String,
pub language_codes: Vec<LanguageCode>,
}
impl Voice {
#[must_use]
pub fn primary_language(&self) -> &str {
self.language_codes.first().map_or("", |l| l.bcp47.as_str())
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum AudioFormat {
Mp3,
Wav,
Ogg,
Opus,
Aac,
Flac,
Pcm,
}
impl fmt::Display for AudioFormat {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Mp3 => write!(f, "mp3"),
Self::Wav => write!(f, "wav"),
Self::Ogg => write!(f, "ogg"),
Self::Opus => write!(f, "opus"),
Self::Aac => write!(f, "aac"),
Self::Flac => write!(f, "flac"),
Self::Pcm => write!(f, "pcm"),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum SpeechRate {
XSlow,
Slow,
Medium,
Fast,
XFast,
}
impl SpeechRate {
#[must_use]
pub fn rate_value(self) -> f32 {
match self {
Self::XSlow => 0.5,
Self::Slow => 0.75,
Self::Medium => 1.0,
Self::Fast => 1.25,
Self::XFast => 1.5,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum SpeechPitch {
XLow,
Low,
Medium,
High,
XHigh,
}
impl SpeechPitch {
#[must_use]
pub fn pitch_value(self) -> f32 {
match self {
Self::XLow => 0.5,
Self::Low => 0.75,
Self::Medium => 1.0,
Self::High => 1.25,
Self::XHigh => 1.5,
}
}
}
#[derive(Debug, Clone, Default)]
pub struct SpeakOptions {
pub rate: Option<f32>,
pub speech_rate: Option<SpeechRate>,
pub pitch: Option<f32>,
pub speech_pitch: Option<SpeechPitch>,
pub volume: Option<f32>,
pub voice: Option<String>,
pub format: Option<AudioFormat>,
pub use_speech_markdown: bool,
pub use_word_boundary: bool,
pub raw_ssml: bool,
pub extra: HashMap<String, String>,
}
impl SpeakOptions {
#[must_use]
pub fn effective_rate(&self) -> f32 {
self.rate
.or_else(|| self.speech_rate.map(SpeechRate::rate_value))
.unwrap_or(1.0)
}
#[must_use]
pub fn effective_pitch(&self) -> f32 {
self.pitch
.or_else(|| self.speech_pitch.map(SpeechPitch::pitch_value))
.unwrap_or(1.0)
}
#[must_use]
pub fn effective_volume(&self) -> f32 {
self.volume.unwrap_or(1.0)
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct WordBoundary {
pub text: String,
pub offset: u64,
pub duration: u64,
}
#[derive(Debug, Clone)]
pub struct EngineDescriptor {
pub id: String,
pub name: String,
pub needs_credentials: bool,
pub credential_keys_json: String,
}
#[derive(Debug, Clone)]
pub struct SherpaModelInfo {
pub id: String,
pub model_type: String,
pub name: String,
pub language: Vec<SherpaLanguage>,
pub sample_rate: u32,
pub num_speakers: u32,
pub url: String,
pub compression: bool,
pub filesize_mb: f64,
}
#[derive(Debug, Clone)]
pub struct SherpaLanguage {
pub lang_code: String,
pub language_name: String,
pub country: String,
}
#[repr(C)]
pub struct tts_voice {
pub id: *mut c_char,
pub name: *mut c_char,
pub language: *mut c_char,
pub gender: *mut c_char,
pub engine: *mut c_char,
}
#[repr(C)]
pub struct tts_engine_info {
pub id: *mut c_char,
pub name: *mut c_char,
pub needs_credentials: bool,
pub credential_keys_json: *mut c_char,
}
#[derive(Debug)]
pub struct TtsError(pub String);
impl fmt::Display for TtsError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
}
impl std::error::Error for TtsError {}
impl From<anyhow::Error> for TtsError {
fn from(e: anyhow::Error) -> Self {
TtsError(e.to_string())
}
}
pub type TtsResult<T> = Result<T, TtsError>;