use anyhow::Result;
use colored::*;
use std::io::{self, Write};
pub async fn handle_transcribe(
audio_files: Vec<String>,
model: Option<String>,
provider: Option<String>,
language: Option<String>,
prompt: Option<String>,
format: Option<String>,
temperature: Option<f32>,
output: Option<String>,
debug: bool,
) -> Result<()> {
if debug {
crate::utils::cli_utils::set_debug_mode(true);
}
if audio_files.is_empty() {
anyhow::bail!("No audio files provided for transcription");
}
let config = crate::config::Config::load()?;
let model_str = model.unwrap_or_else(|| "whisper-1".to_string());
let format_str = format.unwrap_or_else(|| "text".to_string());
let (provider_name, model_name) = if let Some(p) = provider {
(p, model_str)
} else {
let provider_name = config
.providers
.iter()
.find(|(_, pc)| pc.models.iter().any(|m| m.contains("whisper")))
.map(|(name, _)| name.clone())
.unwrap_or_else(|| "openai".to_string());
(provider_name, model_str)
};
let provider_config = config.get_provider_with_auth(&provider_name)?;
let header_has_resolved_key = provider_config.headers.iter().any(|(k, v)| {
let k_l = k.to_lowercase();
(k_l.contains("key") || k_l.contains("token") || k_l.contains("auth"))
&& !v.trim().is_empty()
&& !v.contains("${api_key}")
});
if provider_config.api_key.is_none() && !header_has_resolved_key {
anyhow::bail!(
"No API key configured for provider '{}'. Add one with 'lc keys add {}'",
provider_name,
provider_name
);
}
let mut config_mut = config.clone();
let client = crate::core::chat::create_authenticated_client(&mut config_mut, &provider_name).await?;
if config_mut.get_cached_token(&provider_name) != config.get_cached_token(&provider_name) {
config_mut.save()?;
}
println!(
"{} Transcribing {} audio file(s)",
"🎤".blue(),
audio_files.len()
);
println!("{} Model: {}", "🤖".blue(), model_name);
println!("{} Provider: {}", "🏭".blue(), provider_name);
if let Some(ref lang) = language {
println!("{} Language: {}", "🌐".blue(), lang);
}
println!("{} Format: {}", "📄".blue(), format_str);
let mut all_transcriptions = Vec::new();
for (i, audio_file) in audio_files.iter().enumerate() {
println!(
"\n{} Processing file {}/{}: {}",
"📁".blue(),
i + 1,
audio_files.len(),
audio_file
);
print!("{} ", "Transcribing...".dimmed());
io::stdout().flush()?;
let audio_data = if audio_file.starts_with("http://") || audio_file.starts_with("https://") {
crate::utils::audio::process_audio_url(audio_file)?
} else {
crate::utils::audio::process_audio_file(std::path::Path::new(audio_file))?
};
let transcription_request = crate::core::provider::AudioTranscriptionRequest {
file: audio_data,
model: model_name.clone(),
language: language.clone(),
prompt: prompt.clone(),
response_format: Some(format_str.clone()),
temperature,
};
match client.transcribe_audio(&transcription_request).await {
Ok(response) => {
print!("\r{}\r", " ".repeat(20)); println!("{} Transcription complete!", "✅".green());
let transcription_text = response.text;
if let Some(ref output_file) = output {
let mut file = std::fs::OpenOptions::new()
.create(true)
.append(true)
.open(output_file)?;
if audio_files.len() > 1 {
writeln!(file, "\n=== {} ===", audio_file)?;
}
writeln!(file, "{}", transcription_text)?;
all_transcriptions.push(transcription_text);
} else {
if audio_files.len() > 1 {
println!("\n{} Transcription for {}:", "📝".blue(), audio_file);
} else {
println!("\n{} Transcription:", "📝".blue());
}
println!("{}", transcription_text);
all_transcriptions.push(transcription_text);
}
}
Err(e) => {
print!("\r{}\r", " ".repeat(20)); eprintln!("{} Failed to transcribe {}: {}", "❌".red(), audio_file, e);
}
}
}
if let Some(output_file) = output {
println!(
"\n{} All transcriptions saved to: {}",
"💾".green(),
output_file
);
}
Ok(())
}
pub async fn handle_tts(
text: String,
model: Option<String>,
provider: Option<String>,
voice: Option<String>,
format: Option<String>,
speed: Option<f32>,
output: Option<String>,
debug: bool,
) -> Result<()> {
if debug {
crate::utils::cli_utils::set_debug_mode(true);
}
let config = crate::config::Config::load()?;
let model_str = model.unwrap_or_else(|| "tts-1".to_string());
let voice_str = voice.unwrap_or_else(|| "alloy".to_string());
let format_str = format.unwrap_or_else(|| "mp3".to_string());
let output_path = output.unwrap_or_else(|| {
let timestamp = chrono::Local::now().format("%Y%m%d_%H%M%S");
format!("speech_{}.{}", timestamp, format_str)
});
let (provider_name, model_name) = if let Some(p) = provider {
(p, model_str)
} else {
let provider_name = config
.providers
.iter()
.find(|(_, pc)| pc.models.iter().any(|m| m.contains("tts")))
.map(|(name, _)| name.clone())
.unwrap_or_else(|| "openai".to_string());
(provider_name, model_str)
};
let provider_config = config.get_provider_with_auth(&provider_name)?;
let header_has_resolved_key = provider_config.headers.iter().any(|(k, v)| {
let k_l = k.to_lowercase();
(k_l.contains("key") || k_l.contains("token") || k_l.contains("auth"))
&& !v.trim().is_empty()
&& !v.contains("${api_key}")
});
if provider_config.api_key.is_none() && !header_has_resolved_key {
anyhow::bail!(
"No API key configured for provider '{}'. Add one with 'lc keys add {}'",
provider_name,
provider_name
);
}
let mut config_mut = config.clone();
let client = crate::core::chat::create_authenticated_client(&mut config_mut, &provider_name).await?;
if config_mut.get_cached_token(&provider_name) != config.get_cached_token(&provider_name) {
config_mut.save()?;
}
let display_text = if text.len() > 100 {
format!("{}...", &text[..100])
} else {
text.clone()
};
println!("{} Generating speech", "🔊".blue());
println!("{} Text: \"{}\"", "📝".blue(), display_text);
println!("{} Model: {}", "🤖".blue(), model_name);
println!("{} Provider: {}", "🏭".blue(), provider_name);
println!("{} Voice: {}", "🎭".blue(), voice_str);
println!("{} Format: {}", "🎵".blue(), format_str);
if let Some(s) = speed {
println!("{} Speed: {}x", "⚡".blue(), s);
}
print!("{} ", "Generating speech...".dimmed());
io::stdout().flush()?;
let tts_request = crate::core::provider::AudioSpeechRequest {
model: model_name,
input: text,
voice: voice_str,
response_format: Some(format_str.clone()),
speed,
};
match client.generate_speech(&tts_request).await {
Ok(audio_bytes) => {
print!("\r{}\r", " ".repeat(25));
let detected_extension = crate::utils::audio::get_audio_file_extension(&audio_bytes, Some(&format_str));
let is_pcm_conversion_needed = crate::utils::audio::is_likely_pcm(&audio_bytes) || format_str.to_lowercase() == "pcm";
let (final_audio_data, final_extension, conversion_info) = if is_pcm_conversion_needed {
let wav_data = crate::utils::audio::pcm_to_wav(&audio_bytes, None, None, None);
(wav_data, "wav", Some("Converted PCM to WAV for better compatibility"))
} else {
(audio_bytes, detected_extension, None)
};
let final_output = if output_path.ends_with(&format!(".{}", final_extension)) {
output_path
} else {
let path = std::path::Path::new(&output_path);
if let Some(stem) = path.file_stem() {
if let Some(parent) = path.parent() {
parent.join(format!("{}.{}", stem.to_string_lossy(), final_extension)).to_string_lossy().to_string()
} else {
format!("{}.{}", stem.to_string_lossy(), final_extension)
}
} else {
format!("{}.{}", output_path, final_extension)
}
};
std::fs::write(&final_output, &final_audio_data)?;
println!(
"{} Speech generated successfully!",
"✅".green()
);
println!("{} Saved to: {}", "💾".green(), final_output);
if let Some(info) = conversion_info {
println!("{} {}", "🔄".blue(), info);
}
let metadata = std::fs::metadata(&final_output)?;
let size_kb = metadata.len() as f64 / 1024.0;
println!("{} File size: {:.2} KB", "📊".blue(), size_kb);
println!("{} Format: {} ({})", "🎵".blue(), final_extension.to_uppercase(),
if is_pcm_conversion_needed { "24kHz, 16-bit, Mono" } else { "Original format" });
}
Err(e) => {
print!("\r{}\r", " ".repeat(25)); anyhow::bail!("Failed to generate speech: {}", e);
}
}
Ok(())
}