1use anyhow::Result;
4use colored::*;
5use std::io::{self, Write};
6
7pub async fn handle_transcribe(
9 audio_files: Vec<String>,
10 model: Option<String>,
11 provider: Option<String>,
12 language: Option<String>,
13 prompt: Option<String>,
14 format: Option<String>,
15 temperature: Option<f32>,
16 output: Option<String>,
17 debug: bool,
18) -> Result<()> {
19 if debug {
21 crate::utils::cli_utils::set_debug_mode(true);
22 }
23
24 if audio_files.is_empty() {
25 anyhow::bail!("No audio files provided for transcription");
26 }
27
28 let config = crate::config::Config::load()?;
29
30 let model_str = model.unwrap_or_else(|| "whisper-1".to_string());
32 let format_str = format.unwrap_or_else(|| "text".to_string());
33
34 let (provider_name, model_name) = if let Some(p) = provider {
36 (p, model_str)
37 } else {
38 let provider_name = config
40 .providers
41 .iter()
42 .find(|(_, pc)| pc.models.iter().any(|m| m.contains("whisper")))
43 .map(|(name, _)| name.clone())
44 .unwrap_or_else(|| "openai".to_string());
45 (provider_name, model_str)
46 };
47
48 let provider_config = config.get_provider_with_auth(&provider_name)?;
50
51 let header_has_resolved_key = provider_config.headers.iter().any(|(k, v)| {
53 let k_l = k.to_lowercase();
54 (k_l.contains("key") || k_l.contains("token") || k_l.contains("auth"))
55 && !v.trim().is_empty()
56 && !v.contains("${api_key}")
57 });
58 if provider_config.api_key.is_none() && !header_has_resolved_key {
59 anyhow::bail!(
60 "No API key configured for provider '{}'. Add one with 'lc keys add {}'",
61 provider_name,
62 provider_name
63 );
64 }
65
66 let mut config_mut = config.clone();
67 let client = crate::core::chat::create_authenticated_client(&mut config_mut, &provider_name).await?;
68
69 if config_mut.get_cached_token(&provider_name) != config.get_cached_token(&provider_name) {
71 config_mut.save()?;
72 }
73
74 println!(
75 "{} Transcribing {} audio file(s)",
76 "🎤".blue(),
77 audio_files.len()
78 );
79 println!("{} Model: {}", "🤖".blue(), model_name);
80 println!("{} Provider: {}", "🏭".blue(), provider_name);
81 if let Some(ref lang) = language {
82 println!("{} Language: {}", "🌐".blue(), lang);
83 }
84 println!("{} Format: {}", "📄".blue(), format_str);
85
86 let mut all_transcriptions = Vec::new();
87
88 for (i, audio_file) in audio_files.iter().enumerate() {
89 println!(
90 "\n{} Processing file {}/{}: {}",
91 "📁".blue(),
92 i + 1,
93 audio_files.len(),
94 audio_file
95 );
96
97 print!("{} ", "Transcribing...".dimmed());
98 io::stdout().flush()?;
99
100 let audio_data = if audio_file.starts_with("http://") || audio_file.starts_with("https://") {
102 crate::utils::audio::process_audio_url(audio_file)?
103 } else {
104 crate::utils::audio::process_audio_file(std::path::Path::new(audio_file))?
105 };
106
107 let transcription_request = crate::core::provider::AudioTranscriptionRequest {
109 file: audio_data,
110 model: model_name.clone(),
111 language: language.clone(),
112 prompt: prompt.clone(),
113 response_format: Some(format_str.clone()),
114 temperature,
115 };
116
117 match client.transcribe_audio(&transcription_request).await {
119 Ok(response) => {
120 print!("\r{}\r", " ".repeat(20)); println!("{} Transcription complete!", "✅".green());
122
123 let transcription_text = response.text;
125
126 if let Some(ref output_file) = output {
127 let mut file = std::fs::OpenOptions::new()
129 .create(true)
130 .append(true)
131 .open(output_file)?;
132
133 if audio_files.len() > 1 {
134 writeln!(file, "\n=== {} ===", audio_file)?;
135 }
136 writeln!(file, "{}", transcription_text)?;
137
138 all_transcriptions.push(transcription_text);
139 } else {
140 if audio_files.len() > 1 {
142 println!("\n{} Transcription for {}:", "📝".blue(), audio_file);
143 } else {
144 println!("\n{} Transcription:", "📝".blue());
145 }
146 println!("{}", transcription_text);
147
148 all_transcriptions.push(transcription_text);
149 }
150 }
151 Err(e) => {
152 print!("\r{}\r", " ".repeat(20)); eprintln!("{} Failed to transcribe {}: {}", "❌".red(), audio_file, e);
154 }
155 }
156 }
157
158 if let Some(output_file) = output {
159 println!(
160 "\n{} All transcriptions saved to: {}",
161 "💾".green(),
162 output_file
163 );
164 }
165
166 Ok(())
167}
168
169pub async fn handle_tts(
171 text: String,
172 model: Option<String>,
173 provider: Option<String>,
174 voice: Option<String>,
175 format: Option<String>,
176 speed: Option<f32>,
177 output: Option<String>,
178 debug: bool,
179) -> Result<()> {
180 if debug {
182 crate::utils::cli_utils::set_debug_mode(true);
183 }
184
185 let config = crate::config::Config::load()?;
186
187 let model_str = model.unwrap_or_else(|| "tts-1".to_string());
189 let voice_str = voice.unwrap_or_else(|| "alloy".to_string());
190 let format_str = format.unwrap_or_else(|| "mp3".to_string());
191
192 let output_path = output.unwrap_or_else(|| {
194 let timestamp = chrono::Local::now().format("%Y%m%d_%H%M%S");
195 format!("speech_{}.{}", timestamp, format_str)
196 });
197
198 let (provider_name, model_name) = if let Some(p) = provider {
200 (p, model_str)
201 } else {
202 let provider_name = config
204 .providers
205 .iter()
206 .find(|(_, pc)| pc.models.iter().any(|m| m.contains("tts")))
207 .map(|(name, _)| name.clone())
208 .unwrap_or_else(|| "openai".to_string());
209 (provider_name, model_str)
210 };
211
212 let provider_config = config.get_provider_with_auth(&provider_name)?;
214
215 let header_has_resolved_key = provider_config.headers.iter().any(|(k, v)| {
217 let k_l = k.to_lowercase();
218 (k_l.contains("key") || k_l.contains("token") || k_l.contains("auth"))
219 && !v.trim().is_empty()
220 && !v.contains("${api_key}")
221 });
222 if provider_config.api_key.is_none() && !header_has_resolved_key {
223 anyhow::bail!(
224 "No API key configured for provider '{}'. Add one with 'lc keys add {}'",
225 provider_name,
226 provider_name
227 );
228 }
229
230 let mut config_mut = config.clone();
231 let client = crate::core::chat::create_authenticated_client(&mut config_mut, &provider_name).await?;
232
233 if config_mut.get_cached_token(&provider_name) != config.get_cached_token(&provider_name) {
235 config_mut.save()?;
236 }
237
238 let display_text = if text.len() > 100 {
240 format!("{}...", &text[..100])
241 } else {
242 text.clone()
243 };
244
245 println!("{} Generating speech", "🔊".blue());
246 println!("{} Text: \"{}\"", "📝".blue(), display_text);
247 println!("{} Model: {}", "🤖".blue(), model_name);
248 println!("{} Provider: {}", "🏭".blue(), provider_name);
249 println!("{} Voice: {}", "🎭".blue(), voice_str);
250 println!("{} Format: {}", "🎵".blue(), format_str);
251 if let Some(s) = speed {
252 println!("{} Speed: {}x", "⚡".blue(), s);
253 }
254
255 print!("{} ", "Generating speech...".dimmed());
256 io::stdout().flush()?;
257
258 let tts_request = crate::core::provider::AudioSpeechRequest {
260 model: model_name,
261 input: text,
262 voice: voice_str,
263 response_format: Some(format_str.clone()),
264 speed,
265 };
266
267 match client.generate_speech(&tts_request).await {
269 Ok(audio_bytes) => {
270 print!("\r{}\r", " ".repeat(25)); let detected_extension = crate::utils::audio::get_audio_file_extension(&audio_bytes, Some(&format_str));
274 let is_pcm_conversion_needed = crate::utils::audio::is_likely_pcm(&audio_bytes) || format_str.to_lowercase() == "pcm";
275
276 let (final_audio_data, final_extension, conversion_info) = if is_pcm_conversion_needed {
278 let wav_data = crate::utils::audio::pcm_to_wav(&audio_bytes, None, None, None);
280 (wav_data, "wav", Some("Converted PCM to WAV for better compatibility"))
281 } else {
282 (audio_bytes, detected_extension, None)
283 };
284
285 let final_output = if output_path.ends_with(&format!(".{}", final_extension)) {
287 output_path
288 } else {
289 let path = std::path::Path::new(&output_path);
291 if let Some(stem) = path.file_stem() {
292 if let Some(parent) = path.parent() {
293 parent.join(format!("{}.{}", stem.to_string_lossy(), final_extension)).to_string_lossy().to_string()
294 } else {
295 format!("{}.{}", stem.to_string_lossy(), final_extension)
296 }
297 } else {
298 format!("{}.{}", output_path, final_extension)
299 }
300 };
301
302 std::fs::write(&final_output, &final_audio_data)?;
304
305 println!(
306 "{} Speech generated successfully!",
307 "✅".green()
308 );
309 println!("{} Saved to: {}", "💾".green(), final_output);
310
311 if let Some(info) = conversion_info {
313 println!("{} {}", "🔄".blue(), info);
314 }
315
316 let metadata = std::fs::metadata(&final_output)?;
318 let size_kb = metadata.len() as f64 / 1024.0;
319 println!("{} File size: {:.2} KB", "📊".blue(), size_kb);
320
321 println!("{} Format: {} ({})", "🎵".blue(), final_extension.to_uppercase(),
323 if is_pcm_conversion_needed { "24kHz, 16-bit, Mono" } else { "Original format" });
324 }
325 Err(e) => {
326 print!("\r{}\r", " ".repeat(25)); anyhow::bail!("Failed to generate speech: {}", e);
328 }
329 }
330
331 Ok(())
332}