rtmp_rs/media/
aac.rs

1//! AAC audio parsing
2//!
3//! RTMP transports AAC audio in raw format (without ADTS headers).
4//!
5//! AAC Audio Packet Structure:
6//! ```text
7//! +----------+----------+----------+----------+---------+
8//! |SoundFormat|SoundRate|SoundSize |SoundType | AACType | AACData
9//! | (4 bits)  | (2 bits)| (1 bit)  | (1 bit)  | (1 byte)|
10//! +----------+----------+----------+----------+---------+
11//! ```
12//!
13//! AACPacketType:
14//! - 0: AAC sequence header (AudioSpecificConfig)
15//! - 1: AAC raw frame data
16
17use bytes::{Buf, Bytes};
18
19use crate::error::{MediaError, Result};
20
21/// AAC packet type
22#[derive(Debug, Clone, Copy, PartialEq, Eq)]
23pub enum AacPacketType {
24    /// Sequence header (AudioSpecificConfig)
25    SequenceHeader = 0,
26    /// Raw AAC frame data
27    Raw = 1,
28}
29
30impl AacPacketType {
31    pub fn from_byte(b: u8) -> Option<Self> {
32        match b {
33            0 => Some(AacPacketType::SequenceHeader),
34            1 => Some(AacPacketType::Raw),
35            _ => None,
36        }
37    }
38}
39
40/// AAC profile (audio object type)
41#[derive(Debug, Clone, Copy, PartialEq, Eq)]
42pub enum AacProfile {
43    /// AAC Main
44    Main = 1,
45    /// AAC LC (Low Complexity) - most common
46    Lc = 2,
47    /// AAC SSR (Scalable Sample Rate)
48    Ssr = 3,
49    /// AAC LTP (Long Term Prediction)
50    Ltp = 4,
51    /// SBR (Spectral Band Replication) - HE-AAC
52    Sbr = 5,
53    /// AAC Scalable
54    Scalable = 6,
55}
56
57impl AacProfile {
58    pub fn from_object_type(ot: u8) -> Option<Self> {
59        match ot {
60            1 => Some(AacProfile::Main),
61            2 => Some(AacProfile::Lc),
62            3 => Some(AacProfile::Ssr),
63            4 => Some(AacProfile::Ltp),
64            5 => Some(AacProfile::Sbr),
65            6 => Some(AacProfile::Scalable),
66            _ => None,
67        }
68    }
69
70    pub fn name(&self) -> &'static str {
71        match self {
72            AacProfile::Main => "AAC Main",
73            AacProfile::Lc => "AAC LC",
74            AacProfile::Ssr => "AAC SSR",
75            AacProfile::Ltp => "AAC LTP",
76            AacProfile::Sbr => "HE-AAC",
77            AacProfile::Scalable => "AAC Scalable",
78        }
79    }
80}
81
82/// AudioSpecificConfig (from sequence header)
83#[derive(Debug, Clone)]
84pub struct AudioSpecificConfig {
85    /// Audio object type (profile)
86    pub audio_object_type: u8,
87    /// Sampling frequency index
88    pub sampling_frequency_index: u8,
89    /// Sampling frequency in Hz
90    pub sampling_frequency: u32,
91    /// Channel configuration (1=mono, 2=stereo, etc.)
92    pub channel_configuration: u8,
93    /// Frame length flag (960 or 1024 samples)
94    pub frame_length_flag: bool,
95    /// Depends on core coder flag
96    pub depends_on_core_coder: bool,
97    /// Extension flag
98    pub extension_flag: bool,
99    /// Raw config bytes
100    pub raw: Bytes,
101}
102
103impl AudioSpecificConfig {
104    /// Standard sampling frequencies by index
105    const SAMPLING_FREQUENCIES: [u32; 16] = [
106        96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 16000, 12000, 11025, 8000, 7350, 0,
107        0, 0,
108    ];
109
110    /// Parse from AAC sequence header data
111    pub fn parse(data: Bytes) -> Result<Self> {
112        if data.len() < 2 {
113            return Err(MediaError::InvalidAacPacket.into());
114        }
115
116        // AudioSpecificConfig is bit-packed
117        // audioObjectType: 5 bits
118        // samplingFrequencyIndex: 4 bits
119        // if (samplingFrequencyIndex == 0xf) samplingFrequency: 24 bits
120        // channelConfiguration: 4 bits
121        // ... more optional fields
122
123        let b0 = data[0];
124        let b1 = data[1];
125
126        let audio_object_type = (b0 >> 3) & 0x1F;
127        let sampling_frequency_index = ((b0 & 0x07) << 1) | ((b1 >> 7) & 0x01);
128
129        let sampling_frequency = if sampling_frequency_index == 0x0F {
130            // Explicit frequency in next 24 bits
131            if data.len() < 5 {
132                return Err(MediaError::InvalidAacPacket.into());
133            }
134            let f0 = (data[1] & 0x7F) as u32;
135            let f1 = data[2] as u32;
136            let f2 = data[3] as u32;
137            let f3 = (data[4] >> 1) as u32;
138            (f0 << 17) | (f1 << 9) | (f2 << 1) | f3
139        } else if (sampling_frequency_index as usize) < Self::SAMPLING_FREQUENCIES.len() {
140            Self::SAMPLING_FREQUENCIES[sampling_frequency_index as usize]
141        } else {
142            return Err(MediaError::InvalidAacPacket.into());
143        };
144
145        let channel_configuration = (b1 >> 3) & 0x0F;
146        let frame_length_flag = (b1 & 0x04) != 0;
147        let depends_on_core_coder = (b1 & 0x02) != 0;
148        let extension_flag = (b1 & 0x01) != 0;
149
150        Ok(AudioSpecificConfig {
151            audio_object_type,
152            sampling_frequency_index,
153            sampling_frequency,
154            channel_configuration,
155            frame_length_flag,
156            depends_on_core_coder,
157            extension_flag,
158            raw: data,
159        })
160    }
161
162    /// Get the profile
163    pub fn profile(&self) -> Option<AacProfile> {
164        AacProfile::from_object_type(self.audio_object_type)
165    }
166
167    /// Get channel count
168    pub fn channels(&self) -> u8 {
169        match self.channel_configuration {
170            0 => 0, // Defined in stream
171            1 => 1, // Mono
172            2 => 2, // Stereo
173            3 => 3, // 3.0
174            4 => 4, // 4.0
175            5 => 5, // 5.0
176            6 => 6, // 5.1
177            7 => 8, // 7.1
178            _ => 0,
179        }
180    }
181
182    /// Get samples per frame
183    pub fn samples_per_frame(&self) -> u32 {
184        if self.frame_length_flag {
185            960
186        } else {
187            1024
188        }
189    }
190}
191
192/// Parsed AAC data
193#[derive(Debug, Clone)]
194pub enum AacData {
195    /// Sequence header (AudioSpecificConfig)
196    SequenceHeader(AudioSpecificConfig),
197
198    /// Raw AAC frame
199    Frame {
200        /// Raw AAC data (without ADTS header)
201        data: Bytes,
202    },
203}
204
205impl AacData {
206    /// Parse from RTMP audio data (after format byte)
207    pub fn parse(mut data: Bytes) -> Result<Self> {
208        if data.is_empty() {
209            return Err(MediaError::InvalidAacPacket.into());
210        }
211
212        let packet_type = data.get_u8();
213
214        match AacPacketType::from_byte(packet_type) {
215            Some(AacPacketType::SequenceHeader) => {
216                let config = AudioSpecificConfig::parse(data)?;
217                Ok(AacData::SequenceHeader(config))
218            }
219            Some(AacPacketType::Raw) => Ok(AacData::Frame { data }),
220            None => Err(MediaError::InvalidAacPacket.into()),
221        }
222    }
223
224    /// Check if this is a sequence header
225    pub fn is_sequence_header(&self) -> bool {
226        matches!(self, AacData::SequenceHeader(_))
227    }
228}
229
230/// Generate ADTS header for a raw AAC frame
231///
232/// This is useful when writing AAC to a file that requires ADTS headers.
233pub fn generate_adts_header(config: &AudioSpecificConfig, frame_length: usize) -> [u8; 7] {
234    let profile = config.audio_object_type.saturating_sub(1); // ADTS uses profile - 1
235    let freq_idx = config.sampling_frequency_index;
236    let channels = config.channel_configuration;
237
238    // ADTS header is 7 bytes (without CRC)
239    let frame_len = frame_length + 7;
240
241    let mut header = [0u8; 7];
242
243    // Syncword (12 bits) + ID (1 bit) + Layer (2 bits) + Protection (1 bit)
244    header[0] = 0xFF;
245    header[1] = 0xF1; // MPEG-4, Layer 0, no CRC
246
247    // Profile (2 bits) + Freq (4 bits) + Private (1 bit) + Channels (1 bit)
248    header[2] = ((profile & 0x03) << 6) | ((freq_idx & 0x0F) << 2) | ((channels >> 2) & 0x01);
249
250    // Channels (3 bits) + Original (1 bit) + Home (1 bit) + Copyright (1 bit) + Length (2 bits)
251    header[3] = ((channels & 0x03) << 6) | ((frame_len >> 11) & 0x03) as u8;
252
253    // Length (8 bits)
254    header[4] = ((frame_len >> 3) & 0xFF) as u8;
255
256    // Length (3 bits) + Buffer fullness (5 bits)
257    header[5] = (((frame_len & 0x07) << 5) | 0x1F) as u8;
258
259    // Buffer fullness (6 bits) + Number of frames (2 bits)
260    header[6] = 0xFC;
261
262    header
263}
264
265#[cfg(test)]
266mod tests {
267    use super::*;
268
269    #[test]
270    fn test_audio_specific_config_parse() {
271        // AAC-LC, 44100 Hz, Stereo
272        let data = Bytes::from_static(&[0x12, 0x10]);
273
274        let config = AudioSpecificConfig::parse(data).unwrap();
275        assert_eq!(config.audio_object_type, 2); // AAC-LC
276        assert_eq!(config.sampling_frequency_index, 4); // 44100 Hz
277        assert_eq!(config.sampling_frequency, 44100);
278        assert_eq!(config.channel_configuration, 2); // Stereo
279        assert_eq!(config.channels(), 2);
280        assert_eq!(config.profile(), Some(AacProfile::Lc));
281    }
282
283    #[test]
284    fn test_adts_header() {
285        let config = AudioSpecificConfig {
286            audio_object_type: 2,
287            sampling_frequency_index: 4,
288            sampling_frequency: 44100,
289            channel_configuration: 2,
290            frame_length_flag: false,
291            depends_on_core_coder: false,
292            extension_flag: false,
293            raw: Bytes::new(),
294        };
295
296        let header = generate_adts_header(&config, 100);
297
298        // Check syncword
299        assert_eq!(header[0], 0xFF);
300        assert_eq!(header[1] & 0xF0, 0xF0);
301    }
302
303    #[test]
304    fn test_aac_packet_type() {
305        assert_eq!(
306            AacPacketType::from_byte(0),
307            Some(AacPacketType::SequenceHeader)
308        );
309        assert_eq!(AacPacketType::from_byte(1), Some(AacPacketType::Raw));
310        assert_eq!(AacPacketType::from_byte(2), None);
311        assert_eq!(AacPacketType::from_byte(255), None);
312    }
313
314    #[test]
315    fn test_aac_profile_from_object_type() {
316        assert_eq!(AacProfile::from_object_type(1), Some(AacProfile::Main));
317        assert_eq!(AacProfile::from_object_type(2), Some(AacProfile::Lc));
318        assert_eq!(AacProfile::from_object_type(3), Some(AacProfile::Ssr));
319        assert_eq!(AacProfile::from_object_type(4), Some(AacProfile::Ltp));
320        assert_eq!(AacProfile::from_object_type(5), Some(AacProfile::Sbr));
321        assert_eq!(AacProfile::from_object_type(6), Some(AacProfile::Scalable));
322        assert_eq!(AacProfile::from_object_type(0), None);
323        assert_eq!(AacProfile::from_object_type(7), None);
324    }
325
326    #[test]
327    fn test_aac_profile_names() {
328        assert_eq!(AacProfile::Main.name(), "AAC Main");
329        assert_eq!(AacProfile::Lc.name(), "AAC LC");
330        assert_eq!(AacProfile::Ssr.name(), "AAC SSR");
331        assert_eq!(AacProfile::Ltp.name(), "AAC LTP");
332        assert_eq!(AacProfile::Sbr.name(), "HE-AAC");
333        assert_eq!(AacProfile::Scalable.name(), "AAC Scalable");
334    }
335
336    #[test]
337    fn test_audio_specific_config_various_rates() {
338        // AudioSpecificConfig bit layout:
339        // - audioObjectType: 5 bits
340        // - samplingFrequencyIndex: 4 bits
341        // - channelConfiguration: 4 bits
342        //
343        // For [0x12, 0x10]:
344        // b0 = 0001_0010, b1 = 0001_0000
345        // audioObjectType = (0x12 >> 3) = 2 (AAC-LC)
346        // samplingFrequencyIndex = ((0x12 & 7) << 1) | (0x10 >> 7) = 4 (44100 Hz)
347        // channelConfiguration = (0x10 >> 3) & 0xF = 2 (stereo)
348
349        // Test cases: [bytes], expected_freq, expected_channels
350        let test_cases = [
351            // AAC-LC, 44.1kHz, stereo
352            (&[0x12, 0x10][..], 44100, 2),
353            // AAC-LC, 48kHz, stereo: obj=2, freq_idx=3, ch=2
354            // freq_idx 3 = 48000 Hz
355            // b0 = (2 << 3) | (3 >> 1) = 0x11, b1 = ((3 & 1) << 7) | (2 << 3) = 0x90
356            (&[0x11, 0x90][..], 48000, 2),
357            // AAC-LC, 48kHz, mono: obj=2, freq_idx=3, ch=1
358            // b0 = (2 << 3) | (3 >> 1) = 0x11, b1 = ((3 & 1) << 7) | (1 << 3) = 0x88
359            (&[0x11, 0x88][..], 48000, 1),
360        ];
361
362        for (data, expected_freq, expected_channels) in test_cases {
363            let config = AudioSpecificConfig::parse(Bytes::copy_from_slice(data)).unwrap();
364            assert_eq!(
365                config.sampling_frequency, expected_freq,
366                "sampling_frequency mismatch for {:02X?}",
367                data
368            );
369            assert_eq!(
370                config.channel_configuration, expected_channels,
371                "channel_configuration mismatch for {:02X?}",
372                data
373            );
374        }
375    }
376
377    #[test]
378    fn test_audio_specific_config_channels() {
379        let config = AudioSpecificConfig {
380            audio_object_type: 2,
381            sampling_frequency_index: 4,
382            sampling_frequency: 44100,
383            channel_configuration: 0, // Defined in stream
384            frame_length_flag: false,
385            depends_on_core_coder: false,
386            extension_flag: false,
387            raw: Bytes::new(),
388        };
389        assert_eq!(config.channels(), 0);
390
391        // Test various channel configurations
392        let channel_tests = [
393            (1, 1), // Mono
394            (2, 2), // Stereo
395            (3, 3), // 3.0
396            (4, 4), // 4.0
397            (5, 5), // 5.0
398            (6, 6), // 5.1
399            (7, 8), // 7.1
400            (8, 0), // Unknown
401        ];
402
403        for (config_value, expected_channels) in channel_tests {
404            let config = AudioSpecificConfig {
405                audio_object_type: 2,
406                sampling_frequency_index: 4,
407                sampling_frequency: 44100,
408                channel_configuration: config_value,
409                frame_length_flag: false,
410                depends_on_core_coder: false,
411                extension_flag: false,
412                raw: Bytes::new(),
413            };
414            assert_eq!(config.channels(), expected_channels);
415        }
416    }
417
418    #[test]
419    fn test_audio_specific_config_samples_per_frame() {
420        let config_1024 = AudioSpecificConfig {
421            audio_object_type: 2,
422            sampling_frequency_index: 4,
423            sampling_frequency: 44100,
424            channel_configuration: 2,
425            frame_length_flag: false, // 1024 samples
426            depends_on_core_coder: false,
427            extension_flag: false,
428            raw: Bytes::new(),
429        };
430        assert_eq!(config_1024.samples_per_frame(), 1024);
431
432        let config_960 = AudioSpecificConfig {
433            audio_object_type: 2,
434            sampling_frequency_index: 4,
435            sampling_frequency: 44100,
436            channel_configuration: 2,
437            frame_length_flag: true, // 960 samples
438            depends_on_core_coder: false,
439            extension_flag: false,
440            raw: Bytes::new(),
441        };
442        assert_eq!(config_960.samples_per_frame(), 960);
443    }
444
445    #[test]
446    fn test_audio_specific_config_profile() {
447        let config = AudioSpecificConfig {
448            audio_object_type: 2, // AAC LC
449            sampling_frequency_index: 4,
450            sampling_frequency: 44100,
451            channel_configuration: 2,
452            frame_length_flag: false,
453            depends_on_core_coder: false,
454            extension_flag: false,
455            raw: Bytes::new(),
456        };
457        assert_eq!(config.profile(), Some(AacProfile::Lc));
458
459        let config_unknown = AudioSpecificConfig {
460            audio_object_type: 99, // Unknown
461            sampling_frequency_index: 4,
462            sampling_frequency: 44100,
463            channel_configuration: 2,
464            frame_length_flag: false,
465            depends_on_core_coder: false,
466            extension_flag: false,
467            raw: Bytes::new(),
468        };
469        assert!(config_unknown.profile().is_none());
470    }
471
472    #[test]
473    fn test_aac_data_sequence_header() {
474        let data = Bytes::from_static(&[
475            0x00, // Sequence header
476            0x12, 0x10, // AudioSpecificConfig: AAC LC, 44.1kHz, stereo
477        ]);
478
479        let aac = AacData::parse(data).unwrap();
480        assert!(aac.is_sequence_header());
481
482        if let AacData::SequenceHeader(config) = aac {
483            assert_eq!(config.audio_object_type, 2);
484            assert_eq!(config.sampling_frequency, 44100);
485            assert_eq!(config.channel_configuration, 2);
486        } else {
487            panic!("Expected SequenceHeader");
488        }
489    }
490
491    #[test]
492    fn test_aac_data_raw_frame() {
493        let data = Bytes::from_static(&[
494            0x01, // Raw frame
495            0x21, 0x00, 0x49, 0x90, 0x02, // AAC frame data
496        ]);
497
498        let aac = AacData::parse(data).unwrap();
499        assert!(!aac.is_sequence_header());
500
501        if let AacData::Frame { data } = aac {
502            assert_eq!(data.len(), 5);
503        } else {
504            panic!("Expected Frame");
505        }
506    }
507
508    #[test]
509    fn test_aac_data_invalid_packet_type() {
510        let data = Bytes::from_static(&[0x02, 0x00, 0x00]); // Invalid type
511        let result = AacData::parse(data);
512        assert!(result.is_err());
513    }
514
515    #[test]
516    fn test_aac_data_empty() {
517        let data = Bytes::new();
518        let result = AacData::parse(data);
519        assert!(result.is_err());
520    }
521
522    #[test]
523    fn test_audio_specific_config_too_short() {
524        let data = Bytes::from_static(&[0x12]); // Only 1 byte
525        let result = AudioSpecificConfig::parse(data);
526        assert!(result.is_err());
527    }
528
529    #[test]
530    fn test_adts_header_frame_length() {
531        let config = AudioSpecificConfig {
532            audio_object_type: 2,
533            sampling_frequency_index: 4,
534            sampling_frequency: 44100,
535            channel_configuration: 2,
536            frame_length_flag: false,
537            depends_on_core_coder: false,
538            extension_flag: false,
539            raw: Bytes::new(),
540        };
541
542        // Test with different frame lengths
543        let header1 = generate_adts_header(&config, 100);
544        let header2 = generate_adts_header(&config, 500);
545
546        // Headers should differ (frame length encoded in bytes 3-5)
547        assert_ne!(header1, header2);
548
549        // Both should have correct syncword
550        assert_eq!(header1[0], 0xFF);
551        assert_eq!(header2[0], 0xFF);
552    }
553
554    #[test]
555    fn test_audio_specific_config_all_sampling_frequencies() {
556        // Test the sampling frequency index lookup
557        let expected_freqs = [
558            96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 16000, 12000, 11025, 8000, 7350,
559        ];
560
561        for (index, &expected) in expected_freqs.iter().enumerate() {
562            if index < 13 {
563                // First 13 indices have defined frequencies
564                let freq = AudioSpecificConfig::SAMPLING_FREQUENCIES[index];
565                assert_eq!(freq, expected);
566            }
567        }
568    }
569
570    #[test]
571    fn test_aac_data_raw_stores_config_bytes() {
572        let raw_data = Bytes::from_static(&[0x12, 0x10]);
573        let config = AudioSpecificConfig::parse(raw_data.clone()).unwrap();
574
575        // The raw field should contain the original bytes
576        assert_eq!(config.raw.len(), 2);
577    }
578}