use anyhow::{anyhow, Context, Result};
use reqwest::Client;
use serde::Deserialize;
use serde_json::json;
use std::time::Duration;
pub struct ElevenLabsClient {
http: Client,
api_key: String,
}
#[derive(Debug, Deserialize)]
pub struct Voice {
pub voice_id: String,
pub name: String,
}
impl ElevenLabsClient {
pub fn new(api_key: impl Into<String>) -> Result<Self> {
let api_key = api_key.into();
if api_key.is_empty() {
return Err(anyhow!("ElevenLabs API key is empty"));
}
let http = Client::builder()
.timeout(Duration::from_secs(30))
.build()
.context("build reqwest client")?;
Ok(Self { http, api_key })
}
pub async fn list_voices(&self) -> Result<Vec<Voice>> {
let resp = self
.http
.get("https://api.elevenlabs.io/v1/voices")
.header("xi-api-key", &self.api_key)
.send()
.await
.context("ElevenLabs list_voices")?;
if !resp.status().is_success() {
let body = resp.text().await.unwrap_or_default();
return Err(anyhow!("list_voices HTTP error: {}", body));
}
let data: serde_json::Value = resp.json().await.context("parse voices")?;
let voices = data["voices"]
.as_array()
.ok_or_else(|| anyhow!("unexpected response shape"))?
.iter()
.filter_map(|v| serde_json::from_value(v.clone()).ok())
.collect();
Ok(voices)
}
pub async fn synthesize(&self, text: &str, voice_id: &str) -> Result<Vec<u8>> {
let endpoint = format!(
"https://api.elevenlabs.io/v1/text-to-speech/{}",
voice_id
);
let body = json!({
"text": text,
"model_id": "eleven_multilingual_v2",
"voice_settings": {
"stability": 0.5,
"similarity_boost": 0.75
}
});
let resp = self
.http
.post(&endpoint)
.header("xi-api-key", &self.api_key)
.header("Content-Type", "application/json")
.json(&body)
.send()
.await
.context("ElevenLabs TTS request")?;
if !resp.status().is_success() {
let status = resp.status();
let body = resp.text().await.unwrap_or_default();
return Err(anyhow!(
"TTS HTTP {}: {}",
status,
body.chars().take(300).collect::<String>()
));
}
let audio = resp.bytes().await.context("read audio bytes")?;
Ok(audio.to_vec())
}
pub async fn speak(&self, text: &str, voice_id: &str) -> Result<()> {
let audio = self.synthesize(text, voice_id).await?;
play_audio_bytes(&audio).await
}
}
async fn play_audio_bytes(audio: &[u8]) -> Result<()> {
let tmp = std::env::temp_dir().join(format!("asurada_tts_{}.mp3", std::process::id()));
tokio::fs::write(&tmp, audio).await.context("write tmp mp3")?;
let result = tokio::task::spawn_blocking({
let path = tmp.clone();
move || play_blocking(&path)
})
.await
.context("playback task")?;
let _ = tokio::fs::remove_file(&tmp).await;
result
}
#[cfg(target_os = "macos")]
fn play_blocking(path: &std::path::Path) -> Result<()> {
let status = std::process::Command::new("afplay")
.arg(path)
.status()
.context("afplay")?;
if !status.success() {
return Err(anyhow!("afplay exited {}", status));
}
Ok(())
}
#[cfg(not(target_os = "macos"))]
fn play_blocking(path: &std::path::Path) -> Result<()> {
for player in &["mpg123", "mpg321", "paplay"] {
if std::process::Command::new(player)
.arg(path)
.status()
.map(|s| s.success())
.unwrap_or(false)
{
return Ok(());
}
}
Err(anyhow!("No audio player found. Install mpg123."))
}