use crate::cli::VoiceChangerArgs;
use crate::output::{print_info, print_success};
use crate::utils::{
confirm_overwrite, format_to_extension, parse_output_format, write_bytes_to_file,
};
use crate::validation::validate_voice_settings;
#[cfg(feature = "audio")]
use crate::audio::audio_io;
use anyhow::Result;
use colored::*;
use elevenlabs_rs::{
endpoints::genai::voice_changer::{
VoiceChanger as VoiceChangerEndpoint, VoiceChangerBody, VoiceChangerQuery,
},
ElevenLabsClient, Model, VoiceSettings,
};
use std::path::Path;
pub async fn execute(
args: VoiceChangerArgs,
api_key: &str,
output_format: &str,
assume_yes: bool,
) -> Result<()> {
#[cfg(feature = "audio")]
if args.record {
return record_and_transform(args, api_key).await;
}
#[cfg(not(feature = "audio"))]
if args.record {
return Err(anyhow::anyhow!(
"Recording not available. Rebuild with --features audio"
));
}
let file = match &args.file {
Some(f) => f,
None => {
return Err(anyhow::anyhow!(
"No audio file specified. Use --file or --record"
));
}
};
let file_path = Path::new(file);
if !file_path.exists() {
return Err(anyhow::anyhow!("File not found: {}", file));
}
let metadata = std::fs::metadata(file_path)?;
let file_size = metadata.len();
print_info(&format!("Transforming voice in '{}'...", file.cyan()));
print_info(&format!(
"File size: {} MB",
(file_size as f64 / 1_048_576.0).round()
));
print_info(&format!("Target voice: {}", args.voice.yellow()));
print_info(&format!("Model: {}", args.model.yellow()));
let client = ElevenLabsClient::new(api_key);
let model = match args.model.as_str() {
"eleven_english_sts_v2" => Model::ElevenEnglishV2,
"eleven_multilingual_sts_v2" => Model::ElevenMultilingualV2STS,
_ => Model::ElevenMultilingualV2STS,
};
let format = parse_output_format(output_format)?;
validate_voice_settings(args.stability, args.similarity_boost, args.style)?;
let voice_settings =
if args.stability.is_some() || args.similarity_boost.is_some() || args.style.is_some() {
let mut settings = VoiceSettings::default();
if let Some(s) = args.stability {
settings = settings.with_stability(s);
}
if let Some(sb) = args.similarity_boost {
settings = settings.with_similarity_boost(sb);
}
if let Some(st) = args.style {
settings = settings.with_style(st);
}
Some(settings)
} else {
None
};
let mut body = VoiceChangerBody::new(file.clone()).with_model_id(model);
if let Some(settings) = voice_settings {
body = body.with_voice_settings(settings);
}
let query = VoiceChangerQuery::default().with_output_format(format);
let endpoint = VoiceChangerEndpoint::new(&args.voice, body).with_query(query);
let start_time = std::time::Instant::now();
let audio = client.hit(endpoint).await.map_err(|e| anyhow::anyhow!(e))?;
let duration = start_time.elapsed();
let output_path = if let Some(output) = args.output {
output
} else {
let stem = file_path
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("audio");
let ext = format_to_extension(output_format);
format!("{}_transformed.{}", stem, ext)
};
let path = Path::new(&output_path);
if !confirm_overwrite(path, assume_yes)? {
print_info("Cancelled");
return Ok(());
}
write_bytes_to_file(&audio, path)?;
print_success(&format!(
"Voice transformed in {:.2}s -> {}",
duration.as_secs_f64(),
output_path.green()
));
#[cfg(feature = "audio")]
if args.play {
print_info("Playing audio...");
if let Err(e) = audio_io::play_to_speaker(&audio) {
print_info(&format!("Could not play audio: {}", e));
}
}
Ok(())
}
#[cfg(feature = "audio")]
async fn record_and_transform(args: VoiceChangerArgs, api_key: &str) -> Result<()> {
use elevenlabs_rs::ElevenLabsClient;
use std::io::Write;
use tempfile::NamedTempFile;
print_info(&format!(
"Recording from microphone for {} seconds...",
args.duration
));
let audio_data = match audio_io::record_from_microphone(args.duration) {
Ok(data) => data,
Err(e) => {
return Err(anyhow::anyhow!("Failed to record audio: {}", e));
}
};
print_info("Recording complete. Saving to temporary file...");
let mut temp_file = NamedTempFile::new()?;
temp_file.write_all(&audio_data)?;
let temp_path = temp_file.path().to_str().unwrap_or("recorded_audio.wav");
print_info(&format!("Transforming voice with '{}'...", args.voice));
let client = ElevenLabsClient::new(api_key);
let model = match args.model.as_str() {
"eleven_english_sts_v2" => Model::ElevenEnglishV2,
_ => Model::ElevenMultilingualV2STS,
};
let body = VoiceChangerBody::new(temp_path).with_model_id(model);
let endpoint =
elevenlabs_rs::endpoints::genai::voice_changer::VoiceChanger::new(&args.voice, body);
let audio = client.hit(endpoint).await.map_err(|e| anyhow::anyhow!(e))?;
print_success(&format!("Transformed {} bytes of audio", audio.len()));
if args.play {
print_info("Playing transformed audio...");
if let Err(e) = audio_io::play_to_speaker(&audio) {
print_info(&format!("Could not play audio: {}", e));
}
}
Ok(())
}