use std::pin::Pin;
use async_trait::async_trait;
use futures::Stream;
use crate::codec::AudioFormat;
use crate::error::AudioResult;
use crate::frame::AudioFrame;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Emotion {
Neutral,
Happy,
Sad,
Angry,
Whisper,
Excited,
Calm,
}
#[derive(Debug, Clone)]
pub struct Voice {
pub id: String,
pub name: String,
pub language: String,
pub gender: Option<String>,
}
#[derive(Debug, Clone)]
pub struct TtsRequest {
pub text: String,
pub voice: String,
pub language: Option<String>,
pub speed: f32,
pub pitch: Option<f32>,
pub emotion: Option<Emotion>,
pub output_format: AudioFormat,
}
impl Default for TtsRequest {
fn default() -> Self {
Self {
text: String::new(),
voice: String::new(),
language: None,
speed: 1.0,
pitch: None,
emotion: None,
output_format: AudioFormat::Pcm16,
}
}
}
#[async_trait]
pub trait TtsProvider: Send + Sync {
async fn synthesize(&self, request: &TtsRequest) -> AudioResult<AudioFrame>;
async fn synthesize_stream(
&self,
request: &TtsRequest,
) -> AudioResult<Pin<Box<dyn Stream<Item = AudioResult<AudioFrame>> + Send>>>;
fn voice_catalog(&self) -> &[Voice];
}