Skip to main content

lc/utils/
audio.rs

1use anyhow::Result;
2use std::path::Path;
3
4/// Audio file data with metadata
5#[allow(dead_code)]
6pub struct AudioData {
7    pub data: String, // Base64 encoded audio data
8    pub filename: String,
9    pub mime_type: String,
10}
11
12/// Process an audio file and return base64 encoded data
13pub fn process_audio_file(file_path: &Path) -> Result<String> {
14    let audio_bytes = std::fs::read(file_path)?;
15
16    // Encode to base64
17    use base64::{engine::general_purpose, Engine as _};
18    let base64_data = general_purpose::STANDARD.encode(&audio_bytes);
19
20    // Create data URL with appropriate MIME type based on file extension
21    let extension = file_path
22        .extension()
23        .and_then(|ext| ext.to_str())
24        .unwrap_or("")
25        .to_lowercase();
26
27    let mime_type = match extension.as_str() {
28        "mp3" => "audio/mpeg",
29        "wav" => "audio/wav",
30        "flac" => "audio/flac",
31        "ogg" => "audio/ogg",
32        "m4a" | "mp4" => "audio/mp4",
33        "webm" => "audio/webm",
34        _ => "audio/wav", // Default to WAV
35    };
36
37    Ok(format!("data:{};base64,{}", mime_type, base64_data))
38}
39
40/// Process an audio URL and return base64 encoded data
41pub fn process_audio_url(url: &str) -> Result<String> {
42    // For now, just return the URL as-is
43    // In a full implementation, you might want to download and encode the audio
44    Ok(url.to_string())
45}
46
47/// Generate a WAV header for PCM audio data
48///
49/// This creates a standard WAV file header for 16-bit PCM audio.
50/// Based on Gemini's audio format: 16-bit little-endian PCM at 24kHz sample rate, mono channel
51pub fn generate_wav_header(
52    data_size: u32,
53    sample_rate: u32,
54    channels: u16,
55    bits_per_sample: u16,
56) -> Vec<u8> {
57    let mut header = Vec::with_capacity(44);
58
59    // RIFF header
60    header.extend_from_slice(b"RIFF");
61
62    // File size - 8 bytes (will be filled in later)
63    let file_size = 36 + data_size;
64    header.extend_from_slice(&file_size.to_le_bytes());
65
66    // WAVE format
67    header.extend_from_slice(b"WAVE");
68
69    // fmt subchunk
70    header.extend_from_slice(b"fmt ");
71
72    // Subchunk1 size (16 for PCM)
73    header.extend_from_slice(&16u32.to_le_bytes());
74
75    // Audio format (1 for PCM)
76    header.extend_from_slice(&1u16.to_le_bytes());
77
78    // Number of channels
79    header.extend_from_slice(&channels.to_le_bytes());
80
81    // Sample rate
82    header.extend_from_slice(&sample_rate.to_le_bytes());
83
84    // Byte rate (sample_rate * channels * bits_per_sample / 8)
85    let byte_rate = sample_rate * channels as u32 * bits_per_sample as u32 / 8;
86    header.extend_from_slice(&byte_rate.to_le_bytes());
87
88    // Block align (channels * bits_per_sample / 8)
89    let block_align = channels * bits_per_sample / 8;
90    header.extend_from_slice(&block_align.to_le_bytes());
91
92    // Bits per sample
93    header.extend_from_slice(&bits_per_sample.to_le_bytes());
94
95    // data subchunk
96    header.extend_from_slice(b"data");
97
98    // Data size
99    header.extend_from_slice(&data_size.to_le_bytes());
100
101    header
102}
103
104/// Convert PCM audio data to WAV format
105///
106/// This function takes raw PCM audio data and wraps it with a proper WAV header
107/// to make it playable by standard media players.
108///
109/// Default parameters are based on Gemini's audio format:
110/// - 24kHz sample rate
111/// - 16-bit depth
112/// - Mono channel
113pub fn pcm_to_wav(
114    pcm_data: &[u8],
115    sample_rate: Option<u32>,
116    channels: Option<u16>,
117    bits_per_sample: Option<u16>,
118) -> Vec<u8> {
119    let sample_rate = sample_rate.unwrap_or(24000); // Default to 24kHz (Gemini's format)
120    let channels = channels.unwrap_or(1); // Default to mono
121    let bits_per_sample = bits_per_sample.unwrap_or(16); // Default to 16-bit
122
123    let data_size = pcm_data.len() as u32;
124    let header = generate_wav_header(data_size, sample_rate, channels, bits_per_sample);
125
126    let mut wav_data = Vec::with_capacity(header.len() + pcm_data.len());
127    wav_data.extend_from_slice(&header);
128    wav_data.extend_from_slice(pcm_data);
129
130    wav_data
131}
132
133/// Detect if audio data is likely PCM format
134///
135/// This is a heuristic check - PCM data typically doesn't have recognizable headers
136/// and the data should be relatively uniform in distribution.
137pub fn is_likely_pcm(data: &[u8]) -> bool {
138    // Check if it's not a known audio format by looking for headers
139    if data.len() < 4 {
140        return false;
141    }
142
143    // Check for common audio format headers
144    let header = &data[0..4];
145
146    // WAV files start with "RIFF"
147    if header == b"RIFF" {
148        return false;
149    }
150
151    // MP3 files often start with ID3 tag or sync frame
152    if header[0..3] == [0x49, 0x44, 0x33] || // ID3
153       (header[0] == 0xFF && (header[1] & 0xE0) == 0xE0)
154    {
155        // MP3 sync frame
156        return false;
157    }
158
159    // FLAC files start with "fLaC"
160    if header == b"fLaC" {
161        return false;
162    }
163
164    // OGG files start with "OggS"
165    if header == b"OggS" {
166        return false;
167    }
168
169    // If we don't recognize the format and the data size is reasonable for audio, assume PCM
170    true
171}
172
173/// Get the appropriate file extension based on the detected or specified format
174pub fn get_audio_file_extension(data: &[u8], requested_format: Option<&str>) -> &'static str {
175    // If a specific format was requested, use that
176    if let Some(format) = requested_format {
177        return match format.to_lowercase().as_str() {
178            "mp3" => "mp3",
179            "wav" => "wav",
180            "flac" => "flac",
181            "ogg" => "ogg",
182            "aac" => "aac",
183            "opus" => "opus",
184            "pcm" => "wav", // Convert PCM to WAV for better compatibility
185            _ => "wav",     // Default to WAV for unknown formats
186        };
187    }
188
189    // Auto-detect based on data
190    if is_likely_pcm(data) {
191        "wav" // Convert PCM to WAV
192    } else {
193        // Try to detect format from header
194        if data.len() >= 4 {
195            let header = &data[0..4];
196            if header == b"RIFF" {
197                "wav"
198            } else if header[0..3] == [0x49, 0x44, 0x33]
199                || (header[0] == 0xFF && (header[1] & 0xE0) == 0xE0)
200            {
201                "mp3"
202            } else if header == b"fLaC" {
203                "flac"
204            } else if header == b"OggS" {
205                "ogg"
206            } else {
207                "wav" // Default to WAV
208            }
209        } else {
210            "wav" // Default to WAV
211        }
212    }
213}
214
215#[cfg(test)]
216mod tests {
217    use super::*;
218
219    #[test]
220    fn test_wav_header_generation() {
221        let header = generate_wav_header(1000, 44100, 2, 16);
222        assert_eq!(header.len(), 44);
223        assert_eq!(&header[0..4], b"RIFF");
224        assert_eq!(&header[8..12], b"WAVE");
225        assert_eq!(&header[12..16], b"fmt ");
226    }
227
228    #[test]
229    fn test_pcm_to_wav_conversion() {
230        let pcm_data = vec![0u8; 1000]; // 1000 bytes of silence
231        let wav_data = pcm_to_wav(&pcm_data, Some(44100), Some(2), Some(16));
232
233        // Should have WAV header (44 bytes) + PCM data
234        assert_eq!(wav_data.len(), 44 + 1000);
235        assert_eq!(&wav_data[0..4], b"RIFF");
236        assert_eq!(&wav_data[8..12], b"WAVE");
237    }
238
239    #[test]
240    fn test_pcm_detection() {
241        // Test with WAV header - should not be detected as PCM
242        let wav_header = b"RIFF\x24\x08\x00\x00WAVE";
243        assert!(!is_likely_pcm(wav_header));
244
245        // Test with random data - should be detected as PCM
246        let pcm_data = vec![0x12, 0x34, 0x56, 0x78, 0x9A, 0xBC];
247        assert!(is_likely_pcm(&pcm_data));
248    }
249
250    #[test]
251    fn test_file_extension_detection() {
252        // Test PCM detection
253        let pcm_data = vec![0x12, 0x34, 0x56, 0x78];
254        assert_eq!(get_audio_file_extension(&pcm_data, None), "wav");
255
256        // Test WAV detection
257        let wav_data = b"RIFF\x24\x08\x00\x00WAVE";
258        assert_eq!(get_audio_file_extension(wav_data, None), "wav");
259
260        // Test requested format override
261        assert_eq!(get_audio_file_extension(&pcm_data, Some("mp3")), "mp3");
262        assert_eq!(get_audio_file_extension(&pcm_data, Some("pcm")), "wav");
263    }
264}