rtmp_rs/media/
aac.rs

1//! AAC audio parsing
2//!
3//! RTMP transports AAC audio in raw format (without ADTS headers).
4//!
5//! AAC Audio Packet Structure:
6//! ```text
7//! +----------+----------+----------+----------+---------+
8//! |SoundFormat|SoundRate|SoundSize |SoundType | AACType | AACData
9//! | (4 bits)  | (2 bits)| (1 bit)  | (1 bit)  | (1 byte)|
10//! +----------+----------+----------+----------+---------+
11//! ```
12//!
13//! AACPacketType:
14//! - 0: AAC sequence header (AudioSpecificConfig)
15//! - 1: AAC raw frame data
16
17use bytes::{Buf, Bytes};
18
19use crate::error::{MediaError, Result};
20
21/// AAC packet type
22#[derive(Debug, Clone, Copy, PartialEq, Eq)]
23pub enum AacPacketType {
24    /// Sequence header (AudioSpecificConfig)
25    SequenceHeader = 0,
26    /// Raw AAC frame data
27    Raw = 1,
28}
29
30impl AacPacketType {
31    pub fn from_byte(b: u8) -> Option<Self> {
32        match b {
33            0 => Some(AacPacketType::SequenceHeader),
34            1 => Some(AacPacketType::Raw),
35            _ => None,
36        }
37    }
38}
39
40/// AAC profile (audio object type)
41#[derive(Debug, Clone, Copy, PartialEq, Eq)]
42pub enum AacProfile {
43    /// AAC Main
44    Main = 1,
45    /// AAC LC (Low Complexity) - most common
46    Lc = 2,
47    /// AAC SSR (Scalable Sample Rate)
48    Ssr = 3,
49    /// AAC LTP (Long Term Prediction)
50    Ltp = 4,
51    /// SBR (Spectral Band Replication) - HE-AAC
52    Sbr = 5,
53    /// AAC Scalable
54    Scalable = 6,
55}
56
57impl AacProfile {
58    pub fn from_object_type(ot: u8) -> Option<Self> {
59        match ot {
60            1 => Some(AacProfile::Main),
61            2 => Some(AacProfile::Lc),
62            3 => Some(AacProfile::Ssr),
63            4 => Some(AacProfile::Ltp),
64            5 => Some(AacProfile::Sbr),
65            6 => Some(AacProfile::Scalable),
66            _ => None,
67        }
68    }
69
70    pub fn name(&self) -> &'static str {
71        match self {
72            AacProfile::Main => "AAC Main",
73            AacProfile::Lc => "AAC LC",
74            AacProfile::Ssr => "AAC SSR",
75            AacProfile::Ltp => "AAC LTP",
76            AacProfile::Sbr => "HE-AAC",
77            AacProfile::Scalable => "AAC Scalable",
78        }
79    }
80}
81
82/// AudioSpecificConfig (from sequence header)
83#[derive(Debug, Clone)]
84pub struct AudioSpecificConfig {
85    /// Audio object type (profile)
86    pub audio_object_type: u8,
87    /// Sampling frequency index
88    pub sampling_frequency_index: u8,
89    /// Sampling frequency in Hz
90    pub sampling_frequency: u32,
91    /// Channel configuration (1=mono, 2=stereo, etc.)
92    pub channel_configuration: u8,
93    /// Frame length flag (960 or 1024 samples)
94    pub frame_length_flag: bool,
95    /// Depends on core coder flag
96    pub depends_on_core_coder: bool,
97    /// Extension flag
98    pub extension_flag: bool,
99    /// Raw config bytes
100    pub raw: Bytes,
101}
102
103impl AudioSpecificConfig {
104    /// Standard sampling frequencies by index
105    const SAMPLING_FREQUENCIES: [u32; 16] = [
106        96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 16000, 12000, 11025, 8000, 7350, 0,
107        0, 0,
108    ];
109
110    /// Parse from AAC sequence header data
111    pub fn parse(data: Bytes) -> Result<Self> {
112        if data.len() < 2 {
113            return Err(MediaError::InvalidAacPacket.into());
114        }
115
116        // AudioSpecificConfig is bit-packed
117        // audioObjectType: 5 bits
118        // samplingFrequencyIndex: 4 bits
119        // if (samplingFrequencyIndex == 0xf) samplingFrequency: 24 bits
120        // channelConfiguration: 4 bits
121        // ... more optional fields
122
123        let b0 = data[0];
124        let b1 = data[1];
125
126        let audio_object_type = (b0 >> 3) & 0x1F;
127        let sampling_frequency_index = ((b0 & 0x07) << 1) | ((b1 >> 7) & 0x01);
128
129        let sampling_frequency = if sampling_frequency_index == 0x0F {
130            // Explicit frequency in next 24 bits
131            if data.len() < 5 {
132                return Err(MediaError::InvalidAacPacket.into());
133            }
134            let f0 = (data[1] & 0x7F) as u32;
135            let f1 = data[2] as u32;
136            let f2 = data[3] as u32;
137            let f3 = (data[4] >> 1) as u32;
138            (f0 << 17) | (f1 << 9) | (f2 << 1) | f3
139        } else if (sampling_frequency_index as usize) < Self::SAMPLING_FREQUENCIES.len() {
140            Self::SAMPLING_FREQUENCIES[sampling_frequency_index as usize]
141        } else {
142            return Err(MediaError::InvalidAacPacket.into());
143        };
144
145        let channel_configuration = (b1 >> 3) & 0x0F;
146        let frame_length_flag = (b1 & 0x04) != 0;
147        let depends_on_core_coder = (b1 & 0x02) != 0;
148        let extension_flag = (b1 & 0x01) != 0;
149
150        Ok(AudioSpecificConfig {
151            audio_object_type,
152            sampling_frequency_index,
153            sampling_frequency,
154            channel_configuration,
155            frame_length_flag,
156            depends_on_core_coder,
157            extension_flag,
158            raw: data,
159        })
160    }
161
162    /// Get the profile
163    pub fn profile(&self) -> Option<AacProfile> {
164        AacProfile::from_object_type(self.audio_object_type)
165    }
166
167    /// Get channel count
168    pub fn channels(&self) -> u8 {
169        match self.channel_configuration {
170            0 => 0, // Defined in stream
171            1 => 1, // Mono
172            2 => 2, // Stereo
173            3 => 3, // 3.0
174            4 => 4, // 4.0
175            5 => 5, // 5.0
176            6 => 6, // 5.1
177            7 => 8, // 7.1
178            _ => 0,
179        }
180    }
181
182    /// Get samples per frame
183    pub fn samples_per_frame(&self) -> u32 {
184        if self.frame_length_flag {
185            960
186        } else {
187            1024
188        }
189    }
190}
191
192/// Parsed AAC data
193#[derive(Debug, Clone)]
194pub enum AacData {
195    /// Sequence header (AudioSpecificConfig)
196    SequenceHeader(AudioSpecificConfig),
197
198    /// Raw AAC frame
199    Frame {
200        /// Raw AAC data (without ADTS header)
201        data: Bytes,
202    },
203}
204
205impl AacData {
206    /// Parse from RTMP audio data (after format byte)
207    pub fn parse(mut data: Bytes) -> Result<Self> {
208        if data.is_empty() {
209            return Err(MediaError::InvalidAacPacket.into());
210        }
211
212        let packet_type = data.get_u8();
213
214        match AacPacketType::from_byte(packet_type) {
215            Some(AacPacketType::SequenceHeader) => {
216                let config = AudioSpecificConfig::parse(data)?;
217                Ok(AacData::SequenceHeader(config))
218            }
219            Some(AacPacketType::Raw) => Ok(AacData::Frame { data }),
220            None => Err(MediaError::InvalidAacPacket.into()),
221        }
222    }
223
224    /// Check if this is a sequence header
225    pub fn is_sequence_header(&self) -> bool {
226        matches!(self, AacData::SequenceHeader(_))
227    }
228}
229
230/// Generate ADTS header for a raw AAC frame
231///
232/// This is useful when writing AAC to a file that requires ADTS headers.
233pub fn generate_adts_header(config: &AudioSpecificConfig, frame_length: usize) -> [u8; 7] {
234    let profile = config.audio_object_type.saturating_sub(1); // ADTS uses profile - 1
235    let freq_idx = config.sampling_frequency_index;
236    let channels = config.channel_configuration;
237
238    // ADTS header is 7 bytes (without CRC)
239    let frame_len = frame_length + 7;
240
241    let mut header = [0u8; 7];
242
243    // Syncword (12 bits) + ID (1 bit) + Layer (2 bits) + Protection (1 bit)
244    header[0] = 0xFF;
245    header[1] = 0xF1; // MPEG-4, Layer 0, no CRC
246
247    // Profile (2 bits) + Freq (4 bits) + Private (1 bit) + Channels (1 bit)
248    header[2] = ((profile & 0x03) << 6) | ((freq_idx & 0x0F) << 2) | ((channels >> 2) & 0x01);
249
250    // Channels (3 bits) + Original (1 bit) + Home (1 bit) + Copyright (1 bit) + Length (2 bits)
251    header[3] = ((channels & 0x03) << 6) | ((frame_len >> 11) & 0x03) as u8;
252
253    // Length (8 bits)
254    header[4] = ((frame_len >> 3) & 0xFF) as u8;
255
256    // Length (3 bits) + Buffer fullness (5 bits)
257    header[5] = (((frame_len & 0x07) << 5) | 0x1F) as u8;
258
259    // Buffer fullness (6 bits) + Number of frames (2 bits)
260    header[6] = 0xFC;
261
262    header
263}
264
265#[cfg(test)]
266mod tests {
267    use super::*;
268
269    #[test]
270    fn test_audio_specific_config_parse() {
271        // AAC-LC, 44100 Hz, Stereo
272        let data = Bytes::from_static(&[0x12, 0x10]);
273
274        let config = AudioSpecificConfig::parse(data).unwrap();
275        assert_eq!(config.audio_object_type, 2); // AAC-LC
276        assert_eq!(config.sampling_frequency_index, 4); // 44100 Hz
277        assert_eq!(config.sampling_frequency, 44100);
278        assert_eq!(config.channel_configuration, 2); // Stereo
279        assert_eq!(config.channels(), 2);
280        assert_eq!(config.profile(), Some(AacProfile::Lc));
281    }
282
283    #[test]
284    fn test_adts_header() {
285        let config = AudioSpecificConfig {
286            audio_object_type: 2,
287            sampling_frequency_index: 4,
288            sampling_frequency: 44100,
289            channel_configuration: 2,
290            frame_length_flag: false,
291            depends_on_core_coder: false,
292            extension_flag: false,
293            raw: Bytes::new(),
294        };
295
296        let header = generate_adts_header(&config, 100);
297
298        // Check syncword
299        assert_eq!(header[0], 0xFF);
300        assert_eq!(header[1] & 0xF0, 0xF0);
301    }
302}