lc/cli/
audio.rs

1//! Audio processing commands (transcribe, TTS)
2
3use anyhow::Result;
4use colored::*;
5use std::io::{self, Write};
6
7/// Handle transcribe command
8pub async fn handle_transcribe(
9    audio_files: Vec<String>,
10    model: Option<String>,
11    provider: Option<String>,
12    language: Option<String>,
13    prompt: Option<String>,
14    format: Option<String>,
15    temperature: Option<f32>,
16    output: Option<String>,
17    debug: bool,
18) -> Result<()> {
19    // Set debug mode if requested
20    if debug {
21        crate::utils::cli_utils::set_debug_mode(true);
22    }
23
24    if audio_files.is_empty() {
25        anyhow::bail!("No audio files provided for transcription");
26    }
27
28    let config = crate::config::Config::load()?;
29
30    // Default to whisper-1 model if not specified
31    let model_str = model.unwrap_or_else(|| "whisper-1".to_string());
32    let format_str = format.unwrap_or_else(|| "text".to_string());
33    
34    // Resolve provider and model
35    let (provider_name, model_name) = if let Some(p) = provider {
36        (p, model_str)
37    } else {
38        // Try to find a provider that has the whisper model
39        let provider_name = config
40            .providers
41            .iter()
42            .find(|(_, pc)| pc.models.iter().any(|m| m.contains("whisper")))
43            .map(|(name, _)| name.clone())
44            .unwrap_or_else(|| "openai".to_string());
45        (provider_name, model_str)
46    };
47
48    // Get provider config with authentication
49    let provider_config = config.get_provider_with_auth(&provider_name)?;
50    
51    // Check for API key or custom auth headers
52    let header_has_resolved_key = provider_config.headers.iter().any(|(k, v)| {
53        let k_l = k.to_lowercase();
54        (k_l.contains("key") || k_l.contains("token") || k_l.contains("auth"))
55            && !v.trim().is_empty()
56            && !v.contains("${api_key}")
57    });
58    if provider_config.api_key.is_none() && !header_has_resolved_key {
59        anyhow::bail!(
60            "No API key configured for provider '{}'. Add one with 'lc keys add {}'",
61            provider_name,
62            provider_name
63        );
64    }
65
66    let mut config_mut = config.clone();
67    let client = crate::core::chat::create_authenticated_client(&mut config_mut, &provider_name).await?;
68
69    // Save config if tokens were updated
70    if config_mut.get_cached_token(&provider_name) != config.get_cached_token(&provider_name) {
71        config_mut.save()?;
72    }
73
74    println!(
75        "{} Transcribing {} audio file(s)",
76        "🎤".blue(),
77        audio_files.len()
78    );
79    println!("{} Model: {}", "🤖".blue(), model_name);
80    println!("{} Provider: {}", "🏭".blue(), provider_name);
81    if let Some(ref lang) = language {
82        println!("{} Language: {}", "🌐".blue(), lang);
83    }
84    println!("{} Format: {}", "📄".blue(), format_str);
85
86    let mut all_transcriptions = Vec::new();
87
88    for (i, audio_file) in audio_files.iter().enumerate() {
89        println!(
90            "\n{} Processing file {}/{}: {}",
91            "📁".blue(),
92            i + 1,
93            audio_files.len(),
94            audio_file
95        );
96
97        print!("{} ", "Transcribing...".dimmed());
98        io::stdout().flush()?;
99
100        // Process audio file (handles both local files and URLs)
101        let audio_data = if audio_file.starts_with("http://") || audio_file.starts_with("https://") {
102            crate::utils::audio::process_audio_url(audio_file)?
103        } else {
104            crate::utils::audio::process_audio_file(std::path::Path::new(audio_file))?
105        };
106
107        // Create transcription request
108        let transcription_request = crate::core::provider::AudioTranscriptionRequest {
109            file: audio_data,
110            model: model_name.clone(),
111            language: language.clone(),
112            prompt: prompt.clone(),
113            response_format: Some(format_str.clone()),
114            temperature,
115        };
116
117        // Transcribe audio
118        match client.transcribe_audio(&transcription_request).await {
119            Ok(response) => {
120                print!("\r{}\r", " ".repeat(20)); // Clear "Transcribing..."
121                println!("{} Transcription complete!", "✅".green());
122                
123                // Display or save transcription
124                let transcription_text = response.text;
125                
126                if let Some(ref output_file) = output {
127                    // Append to output file if multiple files
128                    let mut file = std::fs::OpenOptions::new()
129                        .create(true)
130                        .append(true)
131                        .open(output_file)?;
132                    
133                    if audio_files.len() > 1 {
134                        writeln!(file, "\n=== {} ===", audio_file)?;
135                    }
136                    writeln!(file, "{}", transcription_text)?;
137                    
138                    all_transcriptions.push(transcription_text);
139                } else {
140                    // Print to stdout
141                    if audio_files.len() > 1 {
142                        println!("\n{} Transcription for {}:", "📝".blue(), audio_file);
143                    } else {
144                        println!("\n{} Transcription:", "📝".blue());
145                    }
146                    println!("{}", transcription_text);
147                    
148                    all_transcriptions.push(transcription_text);
149                }
150            }
151            Err(e) => {
152                print!("\r{}\r", " ".repeat(20)); // Clear "Transcribing..."
153                eprintln!("{} Failed to transcribe {}: {}", "❌".red(), audio_file, e);
154            }
155        }
156    }
157
158    if let Some(output_file) = output {
159        println!(
160            "\n{} All transcriptions saved to: {}",
161            "💾".green(),
162            output_file
163        );
164    }
165
166    Ok(())
167}
168
169/// Handle TTS (text-to-speech) command
170pub async fn handle_tts(
171    text: String,
172    model: Option<String>,
173    provider: Option<String>,
174    voice: Option<String>,
175    format: Option<String>,
176    speed: Option<f32>,
177    output: Option<String>,
178    debug: bool,
179) -> Result<()> {
180    // Set debug mode if requested
181    if debug {
182        crate::utils::cli_utils::set_debug_mode(true);
183    }
184
185    let config = crate::config::Config::load()?;
186
187    // Default to tts-1 model if not specified
188    let model_str = model.unwrap_or_else(|| "tts-1".to_string());
189    let voice_str = voice.unwrap_or_else(|| "alloy".to_string());
190    let format_str = format.unwrap_or_else(|| "mp3".to_string());
191    
192    // Generate default output filename
193    let output_path = output.unwrap_or_else(|| {
194        let timestamp = chrono::Local::now().format("%Y%m%d_%H%M%S");
195        format!("speech_{}.{}", timestamp, format_str)
196    });
197    
198    // Resolve provider and model
199    let (provider_name, model_name) = if let Some(p) = provider {
200        (p, model_str)
201    } else {
202        // Try to find a provider that has TTS models
203        let provider_name = config
204            .providers
205            .iter()
206            .find(|(_, pc)| pc.models.iter().any(|m| m.contains("tts")))
207            .map(|(name, _)| name.clone())
208            .unwrap_or_else(|| "openai".to_string());
209        (provider_name, model_str)
210    };
211
212    // Get provider config with authentication
213    let provider_config = config.get_provider_with_auth(&provider_name)?;
214    
215    // Check for API key or custom auth headers
216    let header_has_resolved_key = provider_config.headers.iter().any(|(k, v)| {
217        let k_l = k.to_lowercase();
218        (k_l.contains("key") || k_l.contains("token") || k_l.contains("auth"))
219            && !v.trim().is_empty()
220            && !v.contains("${api_key}")
221    });
222    if provider_config.api_key.is_none() && !header_has_resolved_key {
223        anyhow::bail!(
224            "No API key configured for provider '{}'. Add one with 'lc keys add {}'",
225            provider_name,
226            provider_name
227        );
228    }
229
230    let mut config_mut = config.clone();
231    let client = crate::core::chat::create_authenticated_client(&mut config_mut, &provider_name).await?;
232
233    // Save config if tokens were updated
234    if config_mut.get_cached_token(&provider_name) != config.get_cached_token(&provider_name) {
235        config_mut.save()?;
236    }
237
238    // Truncate text for display if it's too long
239    let display_text = if text.len() > 100 {
240        format!("{}...", &text[..100])
241    } else {
242        text.clone()
243    };
244
245    println!("{} Generating speech", "🔊".blue());
246    println!("{} Text: \"{}\"", "📝".blue(), display_text);
247    println!("{} Model: {}", "🤖".blue(), model_name);
248    println!("{} Provider: {}", "🏭".blue(), provider_name);
249    println!("{} Voice: {}", "🎭".blue(), voice_str);
250    println!("{} Format: {}", "🎵".blue(), format_str);
251    if let Some(s) = speed {
252        println!("{} Speed: {}x", "⚡".blue(), s);
253    }
254
255    print!("{} ", "Generating speech...".dimmed());
256    io::stdout().flush()?;
257
258    // Create TTS request
259    let tts_request = crate::core::provider::AudioSpeechRequest {
260        model: model_name,
261        input: text,
262        voice: voice_str,
263        response_format: Some(format_str.clone()),
264        speed,
265    };
266
267    // Generate speech
268    match client.generate_speech(&tts_request).await {
269        Ok(audio_bytes) => {
270            print!("\r{}\r", " ".repeat(25)); // Clear "Generating speech..."
271            
272            // Determine the appropriate file extension and format
273            let detected_extension = crate::utils::audio::get_audio_file_extension(&audio_bytes, Some(&format_str));
274            let is_pcm_conversion_needed = crate::utils::audio::is_likely_pcm(&audio_bytes) || format_str.to_lowercase() == "pcm";
275            
276            // Process audio data for better compatibility
277            let (final_audio_data, final_extension, conversion_info) = if is_pcm_conversion_needed {
278                // Convert PCM to WAV for better playability
279                let wav_data = crate::utils::audio::pcm_to_wav(&audio_bytes, None, None, None);
280                (wav_data, "wav", Some("Converted PCM to WAV for better compatibility"))
281            } else {
282                (audio_bytes, detected_extension, None)
283            };
284            
285            // Determine final output filename
286            let final_output = if output_path.ends_with(&format!(".{}", final_extension)) {
287                output_path
288            } else {
289                // Replace or add the correct extension
290                let path = std::path::Path::new(&output_path);
291                if let Some(stem) = path.file_stem() {
292                    if let Some(parent) = path.parent() {
293                        parent.join(format!("{}.{}", stem.to_string_lossy(), final_extension)).to_string_lossy().to_string()
294                    } else {
295                        format!("{}.{}", stem.to_string_lossy(), final_extension)
296                    }
297                } else {
298                    format!("{}.{}", output_path, final_extension)
299                }
300            };
301            
302            // Save audio to file
303            std::fs::write(&final_output, &final_audio_data)?;
304            
305            println!(
306                "{} Speech generated successfully!",
307                "✅".green()
308            );
309            println!("{} Saved to: {}", "💾".green(), final_output);
310            
311            // Show conversion info if applicable
312            if let Some(info) = conversion_info {
313                println!("{} {}", "🔄".blue(), info);
314            }
315            
316            // Show file size
317            let metadata = std::fs::metadata(&final_output)?;
318            let size_kb = metadata.len() as f64 / 1024.0;
319            println!("{} File size: {:.2} KB", "📊".blue(), size_kb);
320            
321            // Show format info
322            println!("{} Format: {} ({})", "🎵".blue(), final_extension.to_uppercase(),
323                if is_pcm_conversion_needed { "24kHz, 16-bit, Mono" } else { "Original format" });
324        }
325        Err(e) => {
326            print!("\r{}\r", " ".repeat(25)); // Clear "Generating speech..."
327            anyhow::bail!("Failed to generate speech: {}", e);
328        }
329    }
330
331    Ok(())
332}