1use std::ffi::c_void;
2use std::mem::zeroed;
3use std::ptr;
4
5use crate::error::{Error, Result};
6use crate::ffi::{check_encoder, encoder_version};
7use crate::util::{VersionInfo, encoder_alloc, encoder_free};
8
9const USAC_ONLY_SWITCHED: i32 = 0;
10const USAC_ONLY_FD: i32 = 1;
11const USAC_ONLY_TD: i32 = 2;
12
13const NO_SBR_CCFL_768: i32 = 0;
14const NO_SBR_CCFL_1024: i32 = 1;
15const SBR_8_3_CCFL_768: i32 = 2;
16const SBR_2_1_CCFL_1024: i32 = 3;
17const SBR_4_1_CCFL_1024: i32 = 4;
18
19const DEFAULT_LC_BITRESERVOIR: i32 = 768;
20const DEFAULT_LD_BITRESERVOIR: i32 = 384;
21const METHOD_DEFINITION_PROGRAM_LOUDNESS: u32 = 1;
22const MEASUREMENT_SYSTEM_BS_1770_3: u32 = 2;
23const DEFAULT_RAP_INTERVAL_IN_MS: i32 = -1;
24#[derive(Debug, Clone, Copy, PartialEq, Eq)]
25pub enum Profile {
26 AacLc,
27 HeAacV1,
28 AacLd,
29 HeAacV2,
30 AacEld,
31 Usac,
32}
33
34impl Profile {
35 fn aot(self) -> i32 {
36 match self {
37 Self::AacLc => libxaac_sys::AOT_AAC_LC as i32,
38 Self::HeAacV1 => libxaac_sys::AOT_SBR as i32,
39 Self::AacLd => libxaac_sys::AOT_AAC_LD as i32,
40 Self::HeAacV2 => libxaac_sys::AOT_PS as i32,
41 Self::AacEld => libxaac_sys::AOT_AAC_ELD as i32,
42 Self::Usac => libxaac_sys::AOT_USAC as i32,
43 }
44 }
45
46 fn default_frame_length(self) -> u16 {
47 match self {
48 Self::AacLc | Self::HeAacV1 | Self::HeAacV2 => 1024,
49 Self::AacLd | Self::AacEld => 512,
50 Self::Usac => 1024,
51 }
52 }
53
54 fn validate_frame_length(self, frame_length: u16) -> bool {
55 match self {
56 Self::AacLc | Self::HeAacV1 | Self::HeAacV2 => matches!(frame_length, 960 | 1024),
57 Self::AacLd | Self::AacEld => matches!(frame_length, 480 | 512),
58 Self::Usac => matches!(frame_length, 768 | 1024),
59 }
60 }
61}
62
63#[derive(Debug, Clone, Copy, PartialEq, Eq)]
64pub enum OutputFormat {
65 Raw,
66 Adts,
67}
68
69#[derive(Debug, Clone, Copy, PartialEq, Eq)]
70pub enum InverseQuantizationMode {
71 Off,
72 On,
73 High,
74}
75
76impl InverseQuantizationMode {
77 fn raw(self) -> i32 {
78 match self {
79 Self::Off => 0,
80 Self::On => 1,
81 Self::High => 2,
82 }
83 }
84}
85
86#[derive(Debug, Clone, Copy, PartialEq, Eq)]
87pub enum UsacCodecMode {
88 Switched,
89 FrequencyDomain,
90 TimeDomain,
91}
92
93impl UsacCodecMode {
94 fn raw(self) -> i32 {
95 match self {
96 Self::Switched => USAC_ONLY_SWITCHED,
97 Self::FrequencyDomain => USAC_ONLY_FD,
98 Self::TimeDomain => USAC_ONLY_TD,
99 }
100 }
101}
102
103#[derive(Debug, Clone, Copy, PartialEq, Eq)]
104pub enum UsacFrameLengthIndex {
105 NoSbr768,
106 NoSbr1024,
107 Esbr8_3_768,
108 Esbr2_1_1024,
109 Esbr4_1_1024,
110}
111
112impl UsacFrameLengthIndex {
113 fn raw(self) -> i32 {
114 match self {
115 Self::NoSbr768 => NO_SBR_CCFL_768,
116 Self::NoSbr1024 => NO_SBR_CCFL_1024,
117 Self::Esbr8_3_768 => SBR_8_3_CCFL_768,
118 Self::Esbr2_1_1024 => SBR_2_1_CCFL_1024,
119 Self::Esbr4_1_1024 => SBR_4_1_CCFL_1024,
120 }
121 }
122}
123
124#[derive(Debug, Clone, PartialEq, Eq)]
125pub struct EncoderDrcConfig {
126 pub use_drc_element: bool,
127 pub frame_size: Option<i32>,
128 pub config_payload: Vec<u8>,
129}
130
131#[derive(Debug, Clone)]
132pub struct EncoderConfig {
133 pub profile: Profile,
134 pub sample_rate: u32,
135 pub native_sample_rate: Option<u32>,
136 pub channels: u16,
137 pub channel_mask: u32,
138 pub num_coupling_channels: u16,
139 pub bitrate: u32,
140 pub pcm_word_size: u16,
141 pub frame_length: Option<u16>,
142 pub output_format: OutputFormat,
143 pub bandwidth: Option<u32>,
144 pub dual_mono: bool,
145 pub use_tns: bool,
146 pub noise_filling: bool,
147 pub full_bandwidth: bool,
148 pub private_bit: bool,
149 pub copyright_bit: bool,
150 pub original_copy_bit: bool,
151 pub no_stereo_preprocessing: bool,
152 pub use_mps: bool,
153 pub mps_tree_config: Option<i32>,
154 pub complex_prediction: bool,
155 pub bit_reservoir_size: Option<i32>,
156 pub inverse_quantization: InverseQuantizationMode,
157 pub enable_esbr: Option<bool>,
158 pub high_quality_esbr: bool,
159 pub pvc: bool,
160 pub harmonic_sbr: bool,
161 pub inter_tes: bool,
162 pub usac_mode: UsacCodecMode,
163 pub usac_frame_length_index: UsacFrameLengthIndex,
164 pub random_access_interval_ms: i32,
165 pub stream_id: u16,
166 pub use_delay_adjustment: bool,
167 pub write_program_config_element: bool,
168 pub measured_loudness: Option<f64>,
169 pub sample_peak_level: Option<f32>,
170 pub drc: Option<EncoderDrcConfig>,
171}
172
173impl Default for EncoderConfig {
174 fn default() -> Self {
175 Self {
176 profile: Profile::AacLc,
177 sample_rate: 44_100,
178 native_sample_rate: None,
179 channels: 2,
180 channel_mask: 0,
181 num_coupling_channels: 0,
182 bitrate: 48_000,
183 pcm_word_size: 16,
184 frame_length: None,
185 output_format: OutputFormat::Raw,
186 bandwidth: None,
187 dual_mono: false,
188 use_tns: true,
189 noise_filling: false,
190 full_bandwidth: false,
191 private_bit: false,
192 copyright_bit: false,
193 original_copy_bit: false,
194 no_stereo_preprocessing: false,
195 use_mps: false,
196 mps_tree_config: None,
197 complex_prediction: false,
198 bit_reservoir_size: None,
199 inverse_quantization: InverseQuantizationMode::High,
200 enable_esbr: None,
201 high_quality_esbr: false,
202 pvc: false,
203 harmonic_sbr: false,
204 inter_tes: false,
205 usac_mode: UsacCodecMode::FrequencyDomain,
206 usac_frame_length_index: UsacFrameLengthIndex::NoSbr1024,
207 random_access_interval_ms: DEFAULT_RAP_INTERVAL_IN_MS,
208 stream_id: 0,
209 use_delay_adjustment: true,
210 write_program_config_element: false,
211 measured_loudness: None,
212 sample_peak_level: None,
213 drc: None,
214 }
215 }
216}
217
218impl EncoderConfig {
219 fn validate(&self) -> Result<u16> {
220 if self.channels == 0 {
221 return Err(Error::InvalidConfig(
222 "channel count must be greater than zero",
223 ));
224 }
225 if self.bitrate == 0 {
226 return Err(Error::InvalidConfig("bitrate must be greater than zero"));
227 }
228 if !matches!(self.pcm_word_size, 16 | 24 | 32) {
229 return Err(Error::InvalidConfig("pcm_word_size must be 16, 24, or 32"));
230 }
231 let frame_length = self
232 .frame_length
233 .unwrap_or_else(|| self.profile.default_frame_length());
234 if !self.profile.validate_frame_length(frame_length) {
235 return Err(Error::InvalidConfig(
236 "frame_length is not valid for the selected profile",
237 ));
238 }
239 if self.profile != Profile::Usac && self.output_format == OutputFormat::Adts && self.use_mps
240 {
241 return Err(Error::InvalidConfig(
242 "MPS is only usable with elementary streams",
243 ));
244 }
245 if self.bandwidth == Some(0) {
246 return Err(Error::InvalidConfig("bandwidth must be greater than zero"));
247 }
248 Ok(frame_length)
249 }
250}
251
252#[derive(Debug, Clone)]
253pub struct EncodedPacket {
254 pub data: Vec<u8>,
255 pub bytes_consumed: usize,
256}
257
258#[derive(Debug, Clone)]
259pub struct EncodedFrame {
260 pub packet: EncodedPacket,
261 pub padded_input_bytes: usize,
262}
263
264#[derive(Debug)]
265pub struct Encoder {
266 config: EncoderConfig,
267 ffi: Box<libxaac_sys::ixheaace_user_config_struct>,
268 input_size: usize,
269 audio_specific_config: Vec<u8>,
270 version: VersionInfo,
271 _drc_payload: Option<Box<[u8]>>,
272}
273
274impl Encoder {
275 pub fn new(config: EncoderConfig) -> Result<Self> {
276 let frame_length = config.validate()?;
277 let mut ffi: Box<libxaac_sys::ixheaace_user_config_struct> = Box::new(unsafe { zeroed() });
278 let version = encoder_version()?;
279 let drc_payload = config
280 .drc
281 .as_ref()
282 .filter(|drc| !drc.config_payload.is_empty())
283 .map(|drc| drc.config_payload.clone().into_boxed_slice());
284
285 ffi.output_config.malloc_xheaace = encoder_alloc();
286 ffi.output_config.free_xheaace = encoder_free();
287
288 ffi.input_config.ui_pcm_wd_sz = config.pcm_word_size as u32;
289 ffi.input_config.i_bitrate = config.bitrate as i32;
290 ffi.input_config.frame_length = frame_length as i32;
291 ffi.input_config.frame_cmd_flag = i32::from(config.frame_length.is_some());
292 ffi.input_config.aot = config.profile.aot();
293 ffi.input_config.i_mps_tree_config = config.mps_tree_config.unwrap_or(-1);
294 ffi.input_config.esbr_flag = i32::from(
295 config
296 .enable_esbr
297 .unwrap_or(config.profile == Profile::Usac),
298 );
299 ffi.input_config.i_channels = i32::from(config.channels);
300 ffi.input_config.i_samp_freq = config.sample_rate;
301 ffi.input_config.i_native_samp_freq =
302 config.native_sample_rate.unwrap_or(config.sample_rate) as i32;
303 ffi.input_config.i_channels_mask = config.channel_mask as i32;
304 ffi.input_config.i_num_coupling_chan = i32::from(config.num_coupling_channels);
305 ffi.input_config.i_use_mps = i32::from(config.use_mps);
306 ffi.input_config.i_use_adts = i32::from(config.output_format == OutputFormat::Adts);
307 ffi.input_config.i_use_es = i32::from(config.output_format == OutputFormat::Raw);
308 ffi.input_config.usac_en = i32::from(config.profile == Profile::Usac);
309 ffi.input_config.codec_mode = config.usac_mode.raw();
310 ffi.input_config.cplx_pred = i32::from(config.complex_prediction);
311 ffi.input_config.ccfl_idx = config.usac_frame_length_index.raw();
312 ffi.input_config.pvc_active = i32::from(config.pvc);
313 ffi.input_config.harmonic_sbr = i32::from(config.harmonic_sbr);
314 ffi.input_config.inter_tes_active = i32::from(config.inter_tes);
315 ffi.input_config.pv_drc_cfg = drc_payload
316 .as_ref()
317 .map_or(ptr::null_mut(), |payload| payload.as_ptr() as *mut _);
318 ffi.input_config.use_drc_element = i32::from(
319 config
320 .drc
321 .as_ref()
322 .map(|drc| drc.use_drc_element)
323 .unwrap_or(false),
324 );
325 ffi.input_config.drc_frame_size = config
326 .drc
327 .as_ref()
328 .and_then(|drc| drc.frame_size)
329 .unwrap_or(0);
330 ffi.input_config.hq_esbr = i32::from(config.high_quality_esbr);
331 ffi.input_config.write_program_config_element =
332 i32::from(config.write_program_config_element);
333 ffi.input_config.random_access_interval = config.random_access_interval_ms;
334 ffi.input_config.method_def = METHOD_DEFINITION_PROGRAM_LOUDNESS;
335 ffi.input_config.measured_loudness = config.measured_loudness.unwrap_or(0.0);
336 ffi.input_config.measurement_system = MEASUREMENT_SYSTEM_BS_1770_3;
337 ffi.input_config.sample_peak_level = config.sample_peak_level.unwrap_or(0.0);
338 ffi.input_config.stream_id = config.stream_id;
339 ffi.input_config.use_delay_adjustment = i32::from(config.use_delay_adjustment);
340 ffi.input_config.user_tns_flag = 1;
341 ffi.input_config.user_esbr_flag = i32::from(config.enable_esbr.is_some());
342
343 ffi.input_config.aac_config.bitrate = config.bitrate as i32;
344 ffi.input_config.aac_config.sample_rate = config.sample_rate as i32;
345 ffi.input_config.aac_config.num_channels_in = i32::from(config.channels);
346 ffi.input_config.aac_config.num_channels_out = i32::from(config.channels);
347 ffi.input_config.aac_config.bandwidth = config.bandwidth.unwrap_or_default() as i32;
348 ffi.input_config.aac_config.dual_mono = i32::from(config.dual_mono);
349 ffi.input_config.aac_config.use_tns = i32::from(config.use_tns);
350 ffi.input_config.aac_config.noise_filling = i32::from(config.noise_filling);
351 ffi.input_config.aac_config.private_bit = i32::from(config.private_bit);
352 ffi.input_config.aac_config.copyright_bit = i32::from(config.copyright_bit);
353 ffi.input_config.aac_config.original_copy_bit = i32::from(config.original_copy_bit);
354 ffi.input_config.aac_config.f_no_stereo_preprocessing =
355 i32::from(config.no_stereo_preprocessing);
356 ffi.input_config.aac_config.full_bandwidth = i32::from(config.full_bandwidth);
357 ffi.input_config.aac_config.inv_quant = config.inverse_quantization.raw();
358 ffi.input_config.aac_config.bitreservoir_size =
359 config
360 .bit_reservoir_size
361 .unwrap_or_else(|| match config.profile {
362 Profile::AacLd | Profile::AacEld => DEFAULT_LD_BITRESERVOIR,
363 _ => DEFAULT_LC_BITRESERVOIR,
364 });
365 ffi.input_config.aac_config.length = 0;
366 ffi.input_config.aac_config.use_adts =
367 i32::from(config.output_format == OutputFormat::Adts);
368
369 let create_status = unsafe {
370 libxaac_sys::ixheaace_create(
371 (&mut ffi.input_config as *mut _) as *mut c_void,
372 (&mut ffi.output_config as *mut _) as *mut c_void,
373 )
374 };
375 check_encoder(create_status)?;
376
377 let input_size = ffi.output_config.input_size as usize;
378 let asc_len = ffi.output_config.i_out_bytes.max(0) as usize;
379 let out_ptr = ffi.output_config.mem_info_table[libxaac_sys::IA_MEMTYPE_OUTPUT as usize]
380 .mem_ptr as *const u8;
381 let audio_specific_config = if asc_len == 0 || out_ptr.is_null() {
382 Vec::new()
383 } else {
384 unsafe { std::slice::from_raw_parts(out_ptr, asc_len) }.to_vec()
385 };
386
387 Ok(Self {
388 config,
389 ffi,
390 input_size,
391 audio_specific_config,
392 version,
393 _drc_payload: drc_payload,
394 })
395 }
396
397 pub fn config(&self) -> &EncoderConfig {
398 &self.config
399 }
400
401 pub fn version(&self) -> &VersionInfo {
402 &self.version
403 }
404
405 pub fn input_frame_bytes(&self) -> usize {
406 self.input_size
407 }
408
409 pub fn input_samples_per_channel(&self) -> usize {
410 self.ffi.input_config.frame_length as usize
411 }
412
413 pub fn expected_frame_count(&self) -> usize {
414 self.ffi.output_config.expected_frame_count.max(0) as usize
415 }
416
417 pub fn audio_specific_config(&self) -> &[u8] {
418 &self.audio_specific_config
419 }
420
421 pub fn encode_pcm_bytes(&mut self, pcm: &[u8]) -> Result<EncodedPacket> {
422 if pcm.len() != self.input_size {
423 return Err(Error::InvalidInput {
424 expected: self.input_size,
425 actual: pcm.len(),
426 context: "encoder frame",
427 });
428 }
429 self.encode_pcm_bytes_inner(pcm)
430 }
431
432 pub fn encode_pcm_bytes_with_padding(&mut self, pcm: &[u8]) -> Result<EncodedFrame> {
433 if pcm.len() > self.input_size {
434 return Err(Error::InvalidInput {
435 expected: self.input_size,
436 actual: pcm.len(),
437 context: "encoder partial frame",
438 });
439 }
440
441 let mut padded = vec![0u8; self.input_size];
442 padded[..pcm.len()].copy_from_slice(pcm);
443 let packet = self.encode_pcm_bytes_inner(&padded)?;
444 Ok(EncodedFrame {
445 packet,
446 padded_input_bytes: self.input_size - pcm.len(),
447 })
448 }
449
450 pub fn encode_i16_interleaved(&mut self, pcm: &[i16]) -> Result<EncodedPacket> {
451 if self.config.pcm_word_size != 16 {
452 return Err(Error::InvalidConfig(
453 "encode_i16_interleaved requires pcm_word_size = 16",
454 ));
455 }
456 let expected_samples = self.input_size / std::mem::size_of::<i16>();
457 if pcm.len() != expected_samples {
458 return Err(Error::InvalidInput {
459 expected: expected_samples,
460 actual: pcm.len(),
461 context: "encoder i16 frame",
462 });
463 }
464 let bytes = unsafe {
465 std::slice::from_raw_parts(pcm.as_ptr().cast::<u8>(), std::mem::size_of_val(pcm))
466 };
467 self.encode_pcm_bytes_inner(bytes)
468 }
469
470 pub fn encode_i24_interleaved(&mut self, pcm: &[[u8; 3]]) -> Result<EncodedPacket> {
471 if self.config.pcm_word_size != 24 {
472 return Err(Error::InvalidConfig(
473 "encode_i24_interleaved requires pcm_word_size = 24",
474 ));
475 }
476 let expected_samples = self.input_size / 3;
477 if pcm.len() != expected_samples {
478 return Err(Error::InvalidInput {
479 expected: expected_samples,
480 actual: pcm.len(),
481 context: "encoder i24 frame",
482 });
483 }
484 let bytes = unsafe { std::slice::from_raw_parts(pcm.as_ptr().cast::<u8>(), pcm.len() * 3) };
485 self.encode_pcm_bytes_inner(bytes)
486 }
487
488 pub fn encode_i32_interleaved(&mut self, pcm: &[i32]) -> Result<EncodedPacket> {
489 if self.config.pcm_word_size != 32 {
490 return Err(Error::InvalidConfig(
491 "encode_i32_interleaved requires pcm_word_size = 32",
492 ));
493 }
494 let expected_samples = self.input_size / std::mem::size_of::<i32>();
495 if pcm.len() != expected_samples {
496 return Err(Error::InvalidInput {
497 expected: expected_samples,
498 actual: pcm.len(),
499 context: "encoder i32 frame",
500 });
501 }
502 let bytes = unsafe {
503 std::slice::from_raw_parts(pcm.as_ptr().cast::<u8>(), std::mem::size_of_val(pcm))
504 };
505 self.encode_pcm_bytes_inner(bytes)
506 }
507
508 fn encode_pcm_bytes_inner(&mut self, pcm: &[u8]) -> Result<EncodedPacket> {
509 let in_ptr = self.ffi.output_config.mem_info_table[libxaac_sys::IA_MEMTYPE_INPUT as usize]
510 .mem_ptr as *mut u8;
511 let out_ptr = self.ffi.output_config.mem_info_table[libxaac_sys::IA_MEMTYPE_OUTPUT as usize]
512 .mem_ptr as *const u8;
513 if in_ptr.is_null() || out_ptr.is_null() {
514 return Err(Error::InvalidConfig("encoder buffers were not allocated"));
515 }
516
517 unsafe {
518 ptr::copy_nonoverlapping(pcm.as_ptr(), in_ptr, pcm.len());
519 }
520
521 let status = unsafe {
522 libxaac_sys::ixheaace_process(
523 self.ffi.output_config.pv_ia_process_api_obj,
524 (&mut self.ffi.input_config as *mut _) as *mut c_void,
525 (&mut self.ffi.output_config as *mut _) as *mut c_void,
526 )
527 };
528 check_encoder(status)?;
529
530 let len = self.ffi.output_config.i_out_bytes.max(0) as usize;
531 let data = unsafe { std::slice::from_raw_parts(out_ptr, len) }.to_vec();
532 Ok(EncodedPacket {
533 data,
534 bytes_consumed: self.ffi.output_config.i_bytes_consumed.max(0) as usize,
535 })
536 }
537}
538
539impl Drop for Encoder {
540 fn drop(&mut self) {
541 let _ = unsafe {
542 libxaac_sys::ixheaace_delete((&mut self.ffi.output_config as *mut _) as *mut c_void)
543 };
544 }
545}