livespeech_sdk/
audio.rs

1//! Audio encoding utilities for the LiveSpeech SDK
2
3use base64::{engine::general_purpose::STANDARD, Engine};
4
5/// Encode audio data to base64
6pub fn encode_to_base64(data: &[u8]) -> String {
7    STANDARD.encode(data)
8}
9
10/// Decode base64 to audio data
11pub fn decode_from_base64(base64: &str) -> Result<Vec<u8>, base64::DecodeError> {
12    STANDARD.decode(base64)
13}
14
15/// Convert f32 audio samples to i16 PCM
16pub fn float32_to_int16(samples: &[f32]) -> Vec<i16> {
17    samples
18        .iter()
19        .map(|&sample| {
20            let clamped = sample.clamp(-1.0, 1.0);
21            if clamped < 0.0 {
22                (clamped * 32768.0) as i16
23            } else {
24                (clamped * 32767.0) as i16
25            }
26        })
27        .collect()
28}
29
30/// Convert i16 PCM to f32 audio samples
31pub fn int16_to_float32(samples: &[i16]) -> Vec<f32> {
32    samples
33        .iter()
34        .map(|&sample| {
35            if sample < 0 {
36                sample as f32 / 32768.0
37            } else {
38                sample as f32 / 32767.0
39            }
40        })
41        .collect()
42}
43
44/// Convert i16 samples to bytes (little-endian)
45pub fn int16_to_bytes(samples: &[i16]) -> Vec<u8> {
46    let mut bytes = Vec::with_capacity(samples.len() * 2);
47    for &sample in samples {
48        bytes.extend_from_slice(&sample.to_le_bytes());
49    }
50    bytes
51}
52
53/// Convert bytes to i16 samples (little-endian)
54pub fn bytes_to_int16(bytes: &[u8]) -> Vec<i16> {
55    bytes
56        .chunks_exact(2)
57        .map(|chunk| i16::from_le_bytes([chunk[0], chunk[1]]))
58        .collect()
59}
60
61/// Create a WAV header for PCM audio
62pub fn create_wav_header(data_length: u32, sample_rate: u32, channels: u16, bit_depth: u16) -> Vec<u8> {
63    let byte_rate = sample_rate * channels as u32 * bit_depth as u32 / 8;
64    let block_align = channels * bit_depth / 8;
65    
66    let mut header = Vec::with_capacity(44);
67    
68    // RIFF header
69    header.extend_from_slice(b"RIFF");
70    header.extend_from_slice(&(36 + data_length).to_le_bytes());
71    header.extend_from_slice(b"WAVE");
72    
73    // fmt sub-chunk
74    header.extend_from_slice(b"fmt ");
75    header.extend_from_slice(&16u32.to_le_bytes()); // Sub-chunk size
76    header.extend_from_slice(&1u16.to_le_bytes()); // Audio format (1 = PCM)
77    header.extend_from_slice(&channels.to_le_bytes());
78    header.extend_from_slice(&sample_rate.to_le_bytes());
79    header.extend_from_slice(&byte_rate.to_le_bytes());
80    header.extend_from_slice(&block_align.to_le_bytes());
81    header.extend_from_slice(&bit_depth.to_le_bytes());
82    
83    // data sub-chunk
84    header.extend_from_slice(b"data");
85    header.extend_from_slice(&data_length.to_le_bytes());
86    
87    header
88}
89
90/// Wrap PCM data in a WAV container
91pub fn wrap_pcm_in_wav(pcm_data: &[u8], sample_rate: u32, channels: u16, bit_depth: u16) -> Vec<u8> {
92    let header = create_wav_header(pcm_data.len() as u32, sample_rate, channels, bit_depth);
93    let mut wav = header;
94    wav.extend_from_slice(pcm_data);
95    wav
96}
97
98/// Audio encoder/decoder
99#[derive(Debug, Clone)]
100pub struct AudioEncoder {
101    pub sample_rate: u32,
102    pub channels: u16,
103    pub bit_depth: u16,
104}
105
106impl Default for AudioEncoder {
107    fn default() -> Self {
108        Self {
109            sample_rate: 16000,
110            channels: 1,
111            bit_depth: 16,
112        }
113    }
114}
115
116impl AudioEncoder {
117    /// Create a new audio encoder with default settings
118    pub fn new() -> Self {
119        Self::default()
120    }
121
122    /// Create with custom settings
123    pub fn with_settings(sample_rate: u32, channels: u16, bit_depth: u16) -> Self {
124        Self {
125            sample_rate,
126            channels,
127            bit_depth,
128        }
129    }
130
131    /// Encode audio bytes to base64
132    pub fn encode(&self, data: &[u8]) -> String {
133        encode_to_base64(data)
134    }
135
136    /// Decode base64 to audio bytes
137    pub fn decode(&self, base64: &str) -> Result<Vec<u8>, base64::DecodeError> {
138        decode_from_base64(base64)
139    }
140
141    /// Convert f32 samples to bytes for transmission
142    pub fn from_float32(&self, samples: &[f32]) -> Vec<u8> {
143        let int16 = float32_to_int16(samples);
144        int16_to_bytes(&int16)
145    }
146
147    /// Convert received bytes to f32 samples
148    pub fn to_float32(&self, bytes: &[u8]) -> Vec<f32> {
149        let int16 = bytes_to_int16(bytes);
150        int16_to_float32(&int16)
151    }
152
153    /// Wrap PCM data in WAV format
154    pub fn wrap_wav(&self, pcm_data: &[u8]) -> Vec<u8> {
155        wrap_pcm_in_wav(pcm_data, self.sample_rate, self.channels, self.bit_depth)
156    }
157}
158
159#[cfg(test)]
160mod tests {
161    use super::*;
162
163    #[test]
164    fn test_base64_roundtrip() {
165        let data = vec![1, 2, 3, 4, 5];
166        let encoded = encode_to_base64(&data);
167        let decoded = decode_from_base64(&encoded).unwrap();
168        assert_eq!(data, decoded);
169    }
170
171    #[test]
172    fn test_float_int_roundtrip() {
173        let floats = vec![0.0, 0.5, -0.5, 1.0, -1.0];
174        let int16 = float32_to_int16(&floats);
175        let back = int16_to_float32(&int16);
176        
177        for (orig, converted) in floats.iter().zip(back.iter()) {
178            assert!((orig - converted).abs() < 0.001);
179        }
180    }
181
182    #[test]
183    fn test_wav_header() {
184        let header = create_wav_header(1000, 16000, 1, 16);
185        assert_eq!(header.len(), 44);
186        assert_eq!(&header[0..4], b"RIFF");
187        assert_eq!(&header[8..12], b"WAVE");
188    }
189}