use crate::tts::{AudioFormat, Speaker, SynthesizedAudio};
use crate::{Result, VoiceConfig, VoiceError};
use async_trait::async_trait;
use futures::StreamExt;
use serde::Serialize;
pub struct ElevenLabsSpeaker {
client: reqwest::Client,
api_key: String,
voice_id: String,
model: String,
voice_settings: VoiceSettings,
}
impl std::fmt::Debug for ElevenLabsSpeaker {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("ElevenLabsSpeaker")
.field("client", &"reqwest::Client")
.field("api_key", &"<redacted>")
.field("voice_id", &self.voice_id)
.field("model", &self.model)
.field("voice_settings", &self.voice_settings)
.finish()
}
}
#[derive(Debug, Clone, Serialize)]
pub struct VoiceSettings {
pub stability: f32,
pub similarity_boost: f32,
pub style: f32,
}
impl Default for VoiceSettings {
fn default() -> Self {
Self {
stability: 0.45,
similarity_boost: 0.85,
style: 0.20,
}
}
}
impl ElevenLabsSpeaker {
pub fn new(
api_key: impl Into<String>,
voice_id: impl Into<String>,
model: impl Into<String>,
) -> Self {
Self {
client: reqwest::Client::new(),
api_key: api_key.into(),
voice_id: voice_id.into(),
model: model.into(),
voice_settings: VoiceSettings::default(),
}
}
pub fn from_config(config: &VoiceConfig) -> Result<Self> {
let api_key = config
.elevenlabs_api_key
.clone()
.filter(|k| !k.is_empty())
.or_else(|| car_secrets::resolve_env_or_keychain("ELEVENLABS_API_KEY"))
.ok_or_else(|| {
VoiceError::Config(
"ELEVENLABS_API_KEY not set; set the env var or store it via \
`car secrets put ELEVENLABS_API_KEY`"
.into(),
)
})?;
Ok(Self {
client: reqwest::Client::new(),
api_key,
voice_id: config.elevenlabs_voice_id.clone(),
model: config.elevenlabs_tts_model.clone(),
voice_settings: VoiceSettings::default(),
})
}
pub fn with_voice_settings(mut self, settings: VoiceSettings) -> Self {
self.voice_settings = settings;
self
}
pub fn request_body(&self, text: &str) -> serde_json::Value {
serde_json::json!({
"text": text,
"model_id": self.model,
"voice_settings": {
"stability": self.voice_settings.stability,
"similarity_boost": self.voice_settings.similarity_boost,
"style": self.voice_settings.style,
},
})
}
pub fn stream_url(&self) -> String {
format!(
"https://api.elevenlabs.io/v1/text-to-speech/{}/stream?output_format=mp3_44100_128",
self.voice_id
)
}
}
#[async_trait]
impl Speaker for ElevenLabsSpeaker {
async fn synth(&self, text: &str) -> Result<SynthesizedAudio> {
if text.trim().is_empty() {
return Err(VoiceError::Tts("empty text".into()));
}
let resp = self
.client
.post(self.stream_url())
.header("xi-api-key", &self.api_key)
.header("accept", "audio/mpeg")
.json(&self.request_body(text))
.send()
.await
.map_err(|e| VoiceError::Tts(format!("http: {e}")))?;
if !resp.status().is_success() {
let status = resp.status();
let body = resp.text().await.unwrap_or_default();
return Err(VoiceError::Tts(format!("API {status}: {body}")));
}
let mut bytes = Vec::with_capacity(64 * 1024);
let mut stream = resp.bytes_stream();
while let Some(chunk) = stream.next().await {
let chunk = chunk.map_err(|e| VoiceError::Tts(format!("stream: {e}")))?;
bytes.extend_from_slice(&chunk);
}
if bytes.is_empty() {
return Err(VoiceError::Tts("empty audio response".into()));
}
Ok(SynthesizedAudio {
bytes,
format: AudioFormat::Mp3,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn request_body_includes_text_model_and_voice_settings() {
let speaker = ElevenLabsSpeaker::new("k", "v", "eleven_turbo_v2_5");
let body = speaker.request_body("hello tokhn");
assert_eq!(body["text"], "hello tokhn");
assert_eq!(body["model_id"], "eleven_turbo_v2_5");
let stability = body["voice_settings"]["stability"].as_f64().unwrap();
let similarity = body["voice_settings"]["similarity_boost"].as_f64().unwrap();
let style = body["voice_settings"]["style"].as_f64().unwrap();
assert!((stability - 0.45).abs() < 1e-4);
assert!((similarity - 0.85).abs() < 1e-4);
assert!((style - 0.20).abs() < 1e-4);
}
#[test]
fn stream_url_uses_voice_id_and_format_query() {
let speaker = ElevenLabsSpeaker::new("k", "voice_xyz", "m");
let url = speaker.stream_url();
assert!(url.contains("/text-to-speech/voice_xyz/stream"));
assert!(url.contains("output_format=mp3_44100_128"));
}
#[test]
fn from_config_env_var_fallback_behavior() {
let saved = std::env::var("ELEVENLABS_API_KEY").ok();
let cfg = VoiceConfig {
elevenlabs_api_key: Some("explicit-key".into()),
..VoiceConfig::default()
};
let speaker = ElevenLabsSpeaker::from_config(&cfg).unwrap();
assert_eq!(speaker.api_key, "explicit-key");
std::env::set_var("ELEVENLABS_API_KEY", "env-key");
let cfg = VoiceConfig {
elevenlabs_api_key: None,
..VoiceConfig::default()
};
let speaker = ElevenLabsSpeaker::from_config(&cfg).unwrap();
assert_eq!(speaker.api_key, "env-key");
std::env::remove_var("ELEVENLABS_API_KEY");
let cfg = VoiceConfig {
elevenlabs_api_key: None,
..VoiceConfig::default()
};
let err = ElevenLabsSpeaker::from_config(&cfg).unwrap_err();
assert!(matches!(err, VoiceError::Config(_)));
match saved {
Some(k) => std::env::set_var("ELEVENLABS_API_KEY", k),
None => std::env::remove_var("ELEVENLABS_API_KEY"),
}
}
#[tokio::test]
async fn synth_rejects_empty_text() {
let speaker = ElevenLabsSpeaker::new("k", "v", "m");
let err = speaker.synth(" ").await.unwrap_err();
assert!(matches!(err, VoiceError::Tts(_)));
}
}