Skip to main content

xaac_rs/
encoder.rs

1use std::ffi::c_void;
2use std::mem::zeroed;
3use std::ptr;
4
5use crate::error::{Error, Result};
6use crate::ffi::{check_encoder, encoder_version};
7use crate::util::{VersionInfo, encoder_alloc, encoder_free};
8
9const USAC_ONLY_SWITCHED: i32 = 0;
10const USAC_ONLY_FD: i32 = 1;
11const USAC_ONLY_TD: i32 = 2;
12
13const NO_SBR_CCFL_768: i32 = 0;
14const NO_SBR_CCFL_1024: i32 = 1;
15const SBR_8_3_CCFL_768: i32 = 2;
16const SBR_2_1_CCFL_1024: i32 = 3;
17const SBR_4_1_CCFL_1024: i32 = 4;
18
19const DEFAULT_LC_BITRESERVOIR: i32 = 768;
20const DEFAULT_LD_BITRESERVOIR: i32 = 384;
21const METHOD_DEFINITION_PROGRAM_LOUDNESS: u32 = 1;
22const MEASUREMENT_SYSTEM_BS_1770_3: u32 = 2;
23const DEFAULT_RAP_INTERVAL_IN_MS: i32 = -1;
24#[derive(Debug, Clone, Copy, PartialEq, Eq)]
25pub enum Profile {
26    AacLc,
27    HeAacV1,
28    AacLd,
29    HeAacV2,
30    AacEld,
31    Usac,
32}
33
34impl Profile {
35    fn aot(self) -> i32 {
36        match self {
37            Self::AacLc => libxaac_sys::AOT_AAC_LC as i32,
38            Self::HeAacV1 => libxaac_sys::AOT_SBR as i32,
39            Self::AacLd => libxaac_sys::AOT_AAC_LD as i32,
40            Self::HeAacV2 => libxaac_sys::AOT_PS as i32,
41            Self::AacEld => libxaac_sys::AOT_AAC_ELD as i32,
42            Self::Usac => libxaac_sys::AOT_USAC as i32,
43        }
44    }
45
46    fn default_frame_length(self) -> u16 {
47        match self {
48            Self::AacLc | Self::HeAacV1 | Self::HeAacV2 => 1024,
49            Self::AacLd | Self::AacEld => 512,
50            Self::Usac => 1024,
51        }
52    }
53
54    fn validate_frame_length(self, frame_length: u16) -> bool {
55        match self {
56            Self::AacLc | Self::HeAacV1 | Self::HeAacV2 => matches!(frame_length, 960 | 1024),
57            Self::AacLd | Self::AacEld => matches!(frame_length, 480 | 512),
58            Self::Usac => matches!(frame_length, 768 | 1024),
59        }
60    }
61}
62
63#[derive(Debug, Clone, Copy, PartialEq, Eq)]
64pub enum OutputFormat {
65    Raw,
66    Adts,
67}
68
69#[derive(Debug, Clone, Copy, PartialEq, Eq)]
70pub enum InverseQuantizationMode {
71    Off,
72    On,
73    High,
74}
75
76impl InverseQuantizationMode {
77    fn raw(self) -> i32 {
78        match self {
79            Self::Off => 0,
80            Self::On => 1,
81            Self::High => 2,
82        }
83    }
84}
85
86#[derive(Debug, Clone, Copy, PartialEq, Eq)]
87pub enum UsacCodecMode {
88    Switched,
89    FrequencyDomain,
90    TimeDomain,
91}
92
93impl UsacCodecMode {
94    fn raw(self) -> i32 {
95        match self {
96            Self::Switched => USAC_ONLY_SWITCHED,
97            Self::FrequencyDomain => USAC_ONLY_FD,
98            Self::TimeDomain => USAC_ONLY_TD,
99        }
100    }
101}
102
103#[derive(Debug, Clone, Copy, PartialEq, Eq)]
104pub enum UsacFrameLengthIndex {
105    NoSbr768,
106    NoSbr1024,
107    Esbr8_3_768,
108    Esbr2_1_1024,
109    Esbr4_1_1024,
110}
111
112impl UsacFrameLengthIndex {
113    fn raw(self) -> i32 {
114        match self {
115            Self::NoSbr768 => NO_SBR_CCFL_768,
116            Self::NoSbr1024 => NO_SBR_CCFL_1024,
117            Self::Esbr8_3_768 => SBR_8_3_CCFL_768,
118            Self::Esbr2_1_1024 => SBR_2_1_CCFL_1024,
119            Self::Esbr4_1_1024 => SBR_4_1_CCFL_1024,
120        }
121    }
122}
123
124#[derive(Debug, Clone, PartialEq, Eq)]
125pub struct EncoderDrcConfig {
126    pub use_drc_element: bool,
127    pub frame_size: Option<i32>,
128    pub config_payload: Vec<u8>,
129}
130
131#[derive(Debug, Clone)]
132pub struct EncoderConfig {
133    pub profile: Profile,
134    pub sample_rate: u32,
135    pub native_sample_rate: Option<u32>,
136    pub channels: u16,
137    pub channel_mask: u32,
138    pub num_coupling_channels: u16,
139    pub bitrate: u32,
140    pub pcm_word_size: u16,
141    pub frame_length: Option<u16>,
142    pub output_format: OutputFormat,
143    pub bandwidth: Option<u32>,
144    pub dual_mono: bool,
145    pub use_tns: bool,
146    pub noise_filling: bool,
147    pub full_bandwidth: bool,
148    pub private_bit: bool,
149    pub copyright_bit: bool,
150    pub original_copy_bit: bool,
151    pub no_stereo_preprocessing: bool,
152    pub use_mps: bool,
153    pub mps_tree_config: Option<i32>,
154    pub complex_prediction: bool,
155    pub bit_reservoir_size: Option<i32>,
156    pub inverse_quantization: InverseQuantizationMode,
157    pub enable_esbr: Option<bool>,
158    pub high_quality_esbr: bool,
159    pub pvc: bool,
160    pub harmonic_sbr: bool,
161    pub inter_tes: bool,
162    pub usac_mode: UsacCodecMode,
163    pub usac_frame_length_index: UsacFrameLengthIndex,
164    pub random_access_interval_ms: i32,
165    pub stream_id: u16,
166    pub use_delay_adjustment: bool,
167    pub write_program_config_element: bool,
168    pub measured_loudness: Option<f64>,
169    pub sample_peak_level: Option<f32>,
170    pub drc: Option<EncoderDrcConfig>,
171}
172
173impl Default for EncoderConfig {
174    fn default() -> Self {
175        Self {
176            profile: Profile::AacLc,
177            sample_rate: 44_100,
178            native_sample_rate: None,
179            channels: 2,
180            channel_mask: 0,
181            num_coupling_channels: 0,
182            bitrate: 48_000,
183            pcm_word_size: 16,
184            frame_length: None,
185            output_format: OutputFormat::Raw,
186            bandwidth: None,
187            dual_mono: false,
188            use_tns: true,
189            noise_filling: false,
190            full_bandwidth: false,
191            private_bit: false,
192            copyright_bit: false,
193            original_copy_bit: false,
194            no_stereo_preprocessing: false,
195            use_mps: false,
196            mps_tree_config: None,
197            complex_prediction: false,
198            bit_reservoir_size: None,
199            inverse_quantization: InverseQuantizationMode::High,
200            enable_esbr: None,
201            high_quality_esbr: false,
202            pvc: false,
203            harmonic_sbr: false,
204            inter_tes: false,
205            usac_mode: UsacCodecMode::FrequencyDomain,
206            usac_frame_length_index: UsacFrameLengthIndex::NoSbr1024,
207            random_access_interval_ms: DEFAULT_RAP_INTERVAL_IN_MS,
208            stream_id: 0,
209            use_delay_adjustment: true,
210            write_program_config_element: false,
211            measured_loudness: None,
212            sample_peak_level: None,
213            drc: None,
214        }
215    }
216}
217
218impl EncoderConfig {
219    fn validate(&self) -> Result<u16> {
220        if self.channels == 0 {
221            return Err(Error::InvalidConfig(
222                "channel count must be greater than zero",
223            ));
224        }
225        if self.bitrate == 0 {
226            return Err(Error::InvalidConfig("bitrate must be greater than zero"));
227        }
228        if !matches!(self.pcm_word_size, 16 | 24 | 32) {
229            return Err(Error::InvalidConfig("pcm_word_size must be 16, 24, or 32"));
230        }
231        let frame_length = self
232            .frame_length
233            .unwrap_or_else(|| self.profile.default_frame_length());
234        if !self.profile.validate_frame_length(frame_length) {
235            return Err(Error::InvalidConfig(
236                "frame_length is not valid for the selected profile",
237            ));
238        }
239        if self.profile != Profile::Usac && self.output_format == OutputFormat::Adts && self.use_mps
240        {
241            return Err(Error::InvalidConfig(
242                "MPS is only usable with elementary streams",
243            ));
244        }
245        if self.bandwidth == Some(0) {
246            return Err(Error::InvalidConfig("bandwidth must be greater than zero"));
247        }
248        Ok(frame_length)
249    }
250}
251
252#[derive(Debug, Clone)]
253pub struct EncodedPacket {
254    pub data: Vec<u8>,
255    pub bytes_consumed: usize,
256}
257
258#[derive(Debug, Clone)]
259pub struct EncodedFrame {
260    pub packet: EncodedPacket,
261    pub padded_input_bytes: usize,
262}
263
264#[derive(Debug)]
265pub struct Encoder {
266    config: EncoderConfig,
267    ffi: Box<libxaac_sys::ixheaace_user_config_struct>,
268    input_size: usize,
269    audio_specific_config: Vec<u8>,
270    version: VersionInfo,
271    _drc_payload: Option<Box<[u8]>>,
272}
273
274impl Encoder {
275    pub fn new(config: EncoderConfig) -> Result<Self> {
276        let frame_length = config.validate()?;
277        let mut ffi: Box<libxaac_sys::ixheaace_user_config_struct> = Box::new(unsafe { zeroed() });
278        let version = encoder_version()?;
279        let drc_payload = config
280            .drc
281            .as_ref()
282            .filter(|drc| !drc.config_payload.is_empty())
283            .map(|drc| drc.config_payload.clone().into_boxed_slice());
284
285        ffi.output_config.malloc_xheaace = encoder_alloc();
286        ffi.output_config.free_xheaace = encoder_free();
287
288        ffi.input_config.ui_pcm_wd_sz = config.pcm_word_size as u32;
289        ffi.input_config.i_bitrate = config.bitrate as i32;
290        ffi.input_config.frame_length = frame_length as i32;
291        ffi.input_config.frame_cmd_flag = i32::from(config.frame_length.is_some());
292        ffi.input_config.aot = config.profile.aot();
293        ffi.input_config.i_mps_tree_config = config.mps_tree_config.unwrap_or(-1);
294        ffi.input_config.esbr_flag = i32::from(
295            config
296                .enable_esbr
297                .unwrap_or(config.profile == Profile::Usac),
298        );
299        ffi.input_config.i_channels = i32::from(config.channels);
300        ffi.input_config.i_samp_freq = config.sample_rate;
301        ffi.input_config.i_native_samp_freq =
302            config.native_sample_rate.unwrap_or(config.sample_rate) as i32;
303        ffi.input_config.i_channels_mask = config.channel_mask as i32;
304        ffi.input_config.i_num_coupling_chan = i32::from(config.num_coupling_channels);
305        ffi.input_config.i_use_mps = i32::from(config.use_mps);
306        ffi.input_config.i_use_adts = i32::from(config.output_format == OutputFormat::Adts);
307        ffi.input_config.i_use_es = i32::from(config.output_format == OutputFormat::Raw);
308        ffi.input_config.usac_en = i32::from(config.profile == Profile::Usac);
309        ffi.input_config.codec_mode = config.usac_mode.raw();
310        ffi.input_config.cplx_pred = i32::from(config.complex_prediction);
311        ffi.input_config.ccfl_idx = config.usac_frame_length_index.raw();
312        ffi.input_config.pvc_active = i32::from(config.pvc);
313        ffi.input_config.harmonic_sbr = i32::from(config.harmonic_sbr);
314        ffi.input_config.inter_tes_active = i32::from(config.inter_tes);
315        ffi.input_config.pv_drc_cfg = drc_payload
316            .as_ref()
317            .map_or(ptr::null_mut(), |payload| payload.as_ptr() as *mut _);
318        ffi.input_config.use_drc_element = i32::from(
319            config
320                .drc
321                .as_ref()
322                .map(|drc| drc.use_drc_element)
323                .unwrap_or(false),
324        );
325        ffi.input_config.drc_frame_size = config
326            .drc
327            .as_ref()
328            .and_then(|drc| drc.frame_size)
329            .unwrap_or(0);
330        ffi.input_config.hq_esbr = i32::from(config.high_quality_esbr);
331        ffi.input_config.write_program_config_element =
332            i32::from(config.write_program_config_element);
333        ffi.input_config.random_access_interval = config.random_access_interval_ms;
334        ffi.input_config.method_def = METHOD_DEFINITION_PROGRAM_LOUDNESS;
335        ffi.input_config.measured_loudness = config.measured_loudness.unwrap_or(0.0);
336        ffi.input_config.measurement_system = MEASUREMENT_SYSTEM_BS_1770_3;
337        ffi.input_config.sample_peak_level = config.sample_peak_level.unwrap_or(0.0);
338        ffi.input_config.stream_id = config.stream_id;
339        ffi.input_config.use_delay_adjustment = i32::from(config.use_delay_adjustment);
340        ffi.input_config.user_tns_flag = 1;
341        ffi.input_config.user_esbr_flag = i32::from(config.enable_esbr.is_some());
342
343        ffi.input_config.aac_config.bitrate = config.bitrate as i32;
344        ffi.input_config.aac_config.sample_rate = config.sample_rate as i32;
345        ffi.input_config.aac_config.num_channels_in = i32::from(config.channels);
346        ffi.input_config.aac_config.num_channels_out = i32::from(config.channels);
347        ffi.input_config.aac_config.bandwidth = config.bandwidth.unwrap_or_default() as i32;
348        ffi.input_config.aac_config.dual_mono = i32::from(config.dual_mono);
349        ffi.input_config.aac_config.use_tns = i32::from(config.use_tns);
350        ffi.input_config.aac_config.noise_filling = i32::from(config.noise_filling);
351        ffi.input_config.aac_config.private_bit = i32::from(config.private_bit);
352        ffi.input_config.aac_config.copyright_bit = i32::from(config.copyright_bit);
353        ffi.input_config.aac_config.original_copy_bit = i32::from(config.original_copy_bit);
354        ffi.input_config.aac_config.f_no_stereo_preprocessing =
355            i32::from(config.no_stereo_preprocessing);
356        ffi.input_config.aac_config.full_bandwidth = i32::from(config.full_bandwidth);
357        ffi.input_config.aac_config.inv_quant = config.inverse_quantization.raw();
358        ffi.input_config.aac_config.bitreservoir_size =
359            config
360                .bit_reservoir_size
361                .unwrap_or_else(|| match config.profile {
362                    Profile::AacLd | Profile::AacEld => DEFAULT_LD_BITRESERVOIR,
363                    _ => DEFAULT_LC_BITRESERVOIR,
364                });
365        ffi.input_config.aac_config.length = 0;
366        ffi.input_config.aac_config.use_adts =
367            i32::from(config.output_format == OutputFormat::Adts);
368
369        let create_status = unsafe {
370            libxaac_sys::ixheaace_create(
371                (&mut ffi.input_config as *mut _) as *mut c_void,
372                (&mut ffi.output_config as *mut _) as *mut c_void,
373            )
374        };
375        check_encoder(create_status)?;
376
377        let input_size = ffi.output_config.input_size as usize;
378        let asc_len = ffi.output_config.i_out_bytes.max(0) as usize;
379        let out_ptr = ffi.output_config.mem_info_table[libxaac_sys::IA_MEMTYPE_OUTPUT as usize]
380            .mem_ptr as *const u8;
381        let audio_specific_config = if asc_len == 0 || out_ptr.is_null() {
382            Vec::new()
383        } else {
384            unsafe { std::slice::from_raw_parts(out_ptr, asc_len) }.to_vec()
385        };
386
387        Ok(Self {
388            config,
389            ffi,
390            input_size,
391            audio_specific_config,
392            version,
393            _drc_payload: drc_payload,
394        })
395    }
396
397    pub fn config(&self) -> &EncoderConfig {
398        &self.config
399    }
400
401    pub fn version(&self) -> &VersionInfo {
402        &self.version
403    }
404
405    pub fn input_frame_bytes(&self) -> usize {
406        self.input_size
407    }
408
409    pub fn input_samples_per_channel(&self) -> usize {
410        self.ffi.input_config.frame_length as usize
411    }
412
413    pub fn expected_frame_count(&self) -> usize {
414        self.ffi.output_config.expected_frame_count.max(0) as usize
415    }
416
417    pub fn audio_specific_config(&self) -> &[u8] {
418        &self.audio_specific_config
419    }
420
421    pub fn encode_pcm_bytes(&mut self, pcm: &[u8]) -> Result<EncodedPacket> {
422        if pcm.len() != self.input_size {
423            return Err(Error::InvalidInput {
424                expected: self.input_size,
425                actual: pcm.len(),
426                context: "encoder frame",
427            });
428        }
429        self.encode_pcm_bytes_inner(pcm)
430    }
431
432    pub fn encode_pcm_bytes_with_padding(&mut self, pcm: &[u8]) -> Result<EncodedFrame> {
433        if pcm.len() > self.input_size {
434            return Err(Error::InvalidInput {
435                expected: self.input_size,
436                actual: pcm.len(),
437                context: "encoder partial frame",
438            });
439        }
440
441        let mut padded = vec![0u8; self.input_size];
442        padded[..pcm.len()].copy_from_slice(pcm);
443        let packet = self.encode_pcm_bytes_inner(&padded)?;
444        Ok(EncodedFrame {
445            packet,
446            padded_input_bytes: self.input_size - pcm.len(),
447        })
448    }
449
450    pub fn encode_i16_interleaved(&mut self, pcm: &[i16]) -> Result<EncodedPacket> {
451        if self.config.pcm_word_size != 16 {
452            return Err(Error::InvalidConfig(
453                "encode_i16_interleaved requires pcm_word_size = 16",
454            ));
455        }
456        let expected_samples = self.input_size / std::mem::size_of::<i16>();
457        if pcm.len() != expected_samples {
458            return Err(Error::InvalidInput {
459                expected: expected_samples,
460                actual: pcm.len(),
461                context: "encoder i16 frame",
462            });
463        }
464        let bytes = unsafe {
465            std::slice::from_raw_parts(pcm.as_ptr().cast::<u8>(), std::mem::size_of_val(pcm))
466        };
467        self.encode_pcm_bytes_inner(bytes)
468    }
469
470    pub fn encode_i24_interleaved(&mut self, pcm: &[[u8; 3]]) -> Result<EncodedPacket> {
471        if self.config.pcm_word_size != 24 {
472            return Err(Error::InvalidConfig(
473                "encode_i24_interleaved requires pcm_word_size = 24",
474            ));
475        }
476        let expected_samples = self.input_size / 3;
477        if pcm.len() != expected_samples {
478            return Err(Error::InvalidInput {
479                expected: expected_samples,
480                actual: pcm.len(),
481                context: "encoder i24 frame",
482            });
483        }
484        let bytes = unsafe { std::slice::from_raw_parts(pcm.as_ptr().cast::<u8>(), pcm.len() * 3) };
485        self.encode_pcm_bytes_inner(bytes)
486    }
487
488    pub fn encode_i32_interleaved(&mut self, pcm: &[i32]) -> Result<EncodedPacket> {
489        if self.config.pcm_word_size != 32 {
490            return Err(Error::InvalidConfig(
491                "encode_i32_interleaved requires pcm_word_size = 32",
492            ));
493        }
494        let expected_samples = self.input_size / std::mem::size_of::<i32>();
495        if pcm.len() != expected_samples {
496            return Err(Error::InvalidInput {
497                expected: expected_samples,
498                actual: pcm.len(),
499                context: "encoder i32 frame",
500            });
501        }
502        let bytes = unsafe {
503            std::slice::from_raw_parts(pcm.as_ptr().cast::<u8>(), std::mem::size_of_val(pcm))
504        };
505        self.encode_pcm_bytes_inner(bytes)
506    }
507
508    fn encode_pcm_bytes_inner(&mut self, pcm: &[u8]) -> Result<EncodedPacket> {
509        let in_ptr = self.ffi.output_config.mem_info_table[libxaac_sys::IA_MEMTYPE_INPUT as usize]
510            .mem_ptr as *mut u8;
511        let out_ptr = self.ffi.output_config.mem_info_table[libxaac_sys::IA_MEMTYPE_OUTPUT as usize]
512            .mem_ptr as *const u8;
513        if in_ptr.is_null() || out_ptr.is_null() {
514            return Err(Error::InvalidConfig("encoder buffers were not allocated"));
515        }
516
517        unsafe {
518            ptr::copy_nonoverlapping(pcm.as_ptr(), in_ptr, pcm.len());
519        }
520
521        let status = unsafe {
522            libxaac_sys::ixheaace_process(
523                self.ffi.output_config.pv_ia_process_api_obj,
524                (&mut self.ffi.input_config as *mut _) as *mut c_void,
525                (&mut self.ffi.output_config as *mut _) as *mut c_void,
526            )
527        };
528        check_encoder(status)?;
529
530        let len = self.ffi.output_config.i_out_bytes.max(0) as usize;
531        let data = unsafe { std::slice::from_raw_parts(out_ptr, len) }.to_vec();
532        Ok(EncodedPacket {
533            data,
534            bytes_consumed: self.ffi.output_config.i_bytes_consumed.max(0) as usize,
535        })
536    }
537}
538
539impl Drop for Encoder {
540    fn drop(&mut self) {
541        let _ = unsafe {
542            libxaac_sys::ixheaace_delete((&mut self.ffi.output_config as *mut _) as *mut c_void)
543        };
544    }
545}