use crate::cli::TextToSpeechArgs;
use crate::output::{print_info, print_success};
use crate::utils::{
confirm_overwrite, format_to_extension, generate_output_filename, get_input_text,
parse_output_format, validate_text_length, write_bytes_to_file,
};
use crate::validation::validate_voice_settings;
use anyhow::Result;
use colored::*;
use elevenlabs_rs::{
endpoints::genai::tts::{TextToSpeech, TextToSpeechBody, TextToSpeechQuery},
ElevenLabsClient, Model, VoiceSettings,
};
use std::path::Path;
pub async fn execute(
args: TextToSpeechArgs,
api_key: &str,
output_format: &str,
assume_yes: bool,
) -> Result<()> {
let text = get_input_text(args.text, args.file)?;
validate_text_length(&text)?;
let client = ElevenLabsClient::new(api_key);
let model = parse_model(&args.model);
let format = parse_output_format(output_format)?;
validate_voice_settings(args.stability, args.similarity_boost, args.style)?;
let voice_settings = if args.stability.is_some()
|| args.similarity_boost.is_some()
|| args.style.is_some()
|| args.speaker_boost
{
let mut settings = VoiceSettings::default();
if let Some(s) = args.stability {
settings = settings.with_stability(s);
}
if let Some(sb) = args.similarity_boost {
settings = settings.with_similarity_boost(sb);
}
if let Some(st) = args.style {
settings = settings.with_style(st);
}
settings = settings.use_speaker_boost(args.speaker_boost);
Some(settings)
} else {
None
};
let mut body = TextToSpeechBody::new(&text).with_model_id(model);
if let Some(settings) = voice_settings {
body = body.with_voice_settings(settings);
}
if let Some(lang) = &args.language {
body = body.with_language_code(lang);
}
if let Some(seed) = args.seed {
body = body.with_seed(seed as u64);
}
print_info(&format!(
"Generating speech with voice '{}'...",
args.voice.cyan()
));
print_info(&format!("Model: {}", args.model.yellow()));
print_info(&format!("Characters: {}", text.len().to_string().yellow()));
let query = TextToSpeechQuery::default().with_output_format(format);
let endpoint = TextToSpeech::new(&args.voice, body).with_query(query);
let start_time = std::time::Instant::now();
let audio = client.hit(endpoint).await.map_err(|e| anyhow::anyhow!(e))?;
let duration = start_time.elapsed();
let output_path = args
.output
.unwrap_or_else(|| generate_output_filename("speech", format_to_extension(output_format)));
let path = Path::new(&output_path);
if !confirm_overwrite(path, assume_yes)? {
print_info("Cancelled");
return Ok(());
}
write_bytes_to_file(&audio, path)?;
print_success(&format!(
"Generated speech in {:.2}s -> {}",
duration.as_secs_f64(),
output_path.green()
));
if args.play {
print_info("Playing audio...");
elevenlabs_rs::utils::play(audio).map_err(|e| anyhow::anyhow!(e))?;
}
Ok(())
}
fn parse_model(model: &str) -> Model {
match model {
"eleven_multilingual_v2" => Model::ElevenMultilingualV2,
"eleven_flash_v2_5" => Model::ElevenFlashV2_5,
"eleven_turbo_v2" => Model::ElevenTurboV2,
"eleven_turbo_v2_5" => Model::ElevenTurboV2_5,
"eleven_english_sts_v2" => Model::ElevenEnglishV2,
"eleven_multilingual_sts_v2" => Model::ElevenMultilingualV2STS,
"eleven_flash_v2" => Model::ElevenFlashV2,
"eleven_v3" => Model::ElevenMultilingualV2,
_ => Model::ElevenMultilingualV2,
}
}