use mp4::Mp4Reader;
use matroska_demuxer::{Frame as MkvFrame, MatroskaFile, TrackType as MkvTrackType};
use std::io::Cursor;
use super::AudioTrack;
mod aac;
mod opus;
mod ac3;
#[cfg(test)]
mod tests;
use aac::{extract_aac_asc, mp4_has_aac_sample_entry, decode_asc_sample_rate, decode_asc_channels, hex_prefix};
use opus::{extract_mp4_opus_dops_body, dops_to_opus_head};
use ac3::{extract_mp4_ac3_dac3_body, extract_mp4_eac3_dec3_body};
pub(crate) use ac3::{ac3_sample_rate_channels_from_dac3, eac3_sample_rate_channels_from_dec3};
pub(super) fn extract_mp4_audio(data: &[u8]) -> Option<AudioTrack> {
let size = data.len() as u64;
let cursor = Cursor::new(data);
let reader = Mp4Reader::read_header(cursor, size).ok()?;
let track = reader
.tracks()
.values()
.find(|t| t.track_type().ok() == Some(mp4::TrackType::Audio))?;
let track_id = track.track_id();
let opus_dops = extract_mp4_opus_dops_body(data);
let ac3_cfg = extract_mp4_ac3_dac3_body(data);
let eac3_cfg = extract_mp4_eac3_dec3_body(data);
let media_type = track.media_type();
let crate_says_aac = media_type
.as_ref()
.map(|mt| matches!(mt, mp4::MediaType::AAC))
.unwrap_or(false);
let manual_says_aac = mp4_has_aac_sample_entry(data);
let is_aac = crate_says_aac || manual_says_aac;
if !is_aac && opus_dops.is_none() && ac3_cfg.is_none() && eac3_cfg.is_none() {
match media_type {
Ok(mt) => tracing::warn!(
codec = ?mt,
"audio passthrough skipped: only AAC / Opus / AC-3 / E-AC-3 are supported"
),
Err(e) => tracing::warn!(
error = ?e,
"audio passthrough skipped: mp4 crate could not classify audio sample entry, \
and manual stsd walk found no recognized 4cc"
),
}
return None;
}
let timescale = track.timescale();
let sample_count = track.sample_count();
if is_aac {
let asc = match extract_aac_asc(data) {
Some(a) => a,
None => return None,
};
if asc.is_empty() {
tracing::warn!(
"AAC track found but AudioSpecificConfig is empty; dropping. \
Source has an esds box but its DecoderSpecificInfo descriptor is \
zero-length."
);
return None;
}
let parsed = crate::aac_asc::parse_aac_asc(&asc);
let sample_rate = match parsed
.as_ref()
.and_then(|p| p.sbr_sample_rate.or(Some(p.sample_rate)))
.or_else(|| decode_asc_sample_rate(&asc))
{
Some(sr) => sr,
None => {
tracing::warn!(
asc_hex = %hex_prefix(&asc, 16),
"AAC ASC sample rate could not be decoded; dropping audio. \
Likely an extended sampling-frequency-index escape (0x0F) \
pointing at unsupported bytes, or a malformed ASC."
);
return None;
}
};
let channels = parsed
.as_ref()
.map(crate::aac_asc::effective_output_channels)
.or_else(|| decode_asc_channels(&asc))
.unwrap_or(2);
let mut samples = Vec::with_capacity(sample_count as usize);
let mut durations = Vec::with_capacity(sample_count as usize);
const AAC_LC_CORE_FRAME_SIZE_TICKS: u32 = 1024;
if let Some(frag) = super::mp4::build_fragmented_sample_table(data, track_id, 0, 0) {
tracing::info!(
track_id,
sample_count = frag.len(),
"fragmented MP4 audio: built sample table from moof/traf/trun"
);
for s in &frag {
let off = s.offset as usize;
let sz = s.size as usize;
let end = match off.checked_add(sz) {
Some(e) if e <= data.len() => e,
_ => {
tracing::warn!(
track_id,
offset = s.offset,
size = s.size,
data_len = data.len(),
"fragmented audio sample range out of bounds; truncating track"
);
break;
}
};
let dur = if is_aac {
AAC_LC_CORE_FRAME_SIZE_TICKS
} else {
s.duration_ticks
};
durations.push(dur);
samples.push(data[off..end].to_vec());
}
} else {
let mut cursor = Cursor::new(data);
let mut reader = match Mp4Reader::read_header(&mut cursor, size) {
Ok(r) => r,
Err(e) => {
tracing::warn!(error = %e, "audio passthrough: re-opening MP4 for sample read failed; dropping audio");
return None;
}
};
for idx in 1..=sample_count {
match reader.read_sample(track_id, idx) {
Ok(Some(sample)) => {
let dur = if is_aac && sample.duration == 0 {
AAC_LC_CORE_FRAME_SIZE_TICKS
} else {
sample.duration
};
durations.push(dur);
samples.push(sample.bytes.to_vec());
}
Ok(None) => break,
Err(e) => {
tracing::warn!(
track_id,
idx,
error = %e,
"audio passthrough: read_sample error mid-track; \
keeping samples read so far ({} of {}) and continuing",
samples.len(),
sample_count
);
break;
}
}
}
}
if samples.is_empty() {
tracing::warn!(
track_id,
sample_count,
"AAC track parsed (ASC + sample table) but read_sample returned 0 \
samples — possible mp4 crate stsd / stco parse failure on the source"
);
return None;
}
return Some(AudioTrack {
codec: "aac".into(),
samples,
sample_rate,
channels,
asc,
codec_private: Vec::new(),
timescale,
durations,
});
}
if let Some(dac3_body) = ac3_cfg {
if dac3_body.len() < 3 {
tracing::warn!("MP4 AC-3 dac3 body shorter than 3 bytes — dropping audio");
return None;
}
let (sr, ch) = ac3_sample_rate_channels_from_dac3(&dac3_body)?;
let mut cursor = Cursor::new(data);
let mut reader = Mp4Reader::read_header(&mut cursor, size).ok()?;
let mut samples = Vec::with_capacity(sample_count as usize);
let mut durations = Vec::with_capacity(sample_count as usize);
for idx in 1..=sample_count {
match reader.read_sample(track_id, idx).ok()? {
Some(sample) => {
durations.push(sample.duration);
samples.push(sample.bytes.to_vec());
}
None => break,
}
}
if samples.is_empty() {
return None;
}
return Some(AudioTrack {
codec: "ac3".into(),
samples,
sample_rate: sr,
channels: ch,
asc: Vec::new(),
codec_private: dac3_body[..3].to_vec(),
timescale,
durations,
});
}
if let Some(dec3_body) = eac3_cfg {
if dec3_body.len() < 5 {
tracing::warn!("MP4 E-AC-3 dec3 body shorter than 5 bytes — dropping audio");
return None;
}
let (sr, ch) = eac3_sample_rate_channels_from_dec3(&dec3_body)?;
let mut cursor = Cursor::new(data);
let mut reader = Mp4Reader::read_header(&mut cursor, size).ok()?;
let mut samples = Vec::with_capacity(sample_count as usize);
let mut durations = Vec::with_capacity(sample_count as usize);
for idx in 1..=sample_count {
match reader.read_sample(track_id, idx).ok()? {
Some(sample) => {
durations.push(sample.duration);
samples.push(sample.bytes.to_vec());
}
None => break,
}
}
if samples.is_empty() {
return None;
}
return Some(AudioTrack {
codec: "eac3".into(),
samples,
sample_rate: sr,
channels: ch,
asc: Vec::new(),
codec_private: dec3_body,
timescale,
durations,
});
}
let dops_body = opus_dops?; let opus_head = dops_to_opus_head(&dops_body)?;
let input_sample_rate =
u32::from_le_bytes([opus_head[4], opus_head[5], opus_head[6], opus_head[7]]);
let channels = opus_head[1] as u16;
let mut cursor = Cursor::new(data);
let mut reader = Mp4Reader::read_header(&mut cursor, size).ok()?;
let mut samples = Vec::with_capacity(sample_count as usize);
let mut durations = Vec::with_capacity(sample_count as usize);
for idx in 1..=sample_count {
match reader.read_sample(track_id, idx).ok()? {
Some(sample) => {
durations.push(sample.duration);
samples.push(sample.bytes.to_vec());
}
None => break,
}
}
if samples.is_empty() {
return None;
}
Some(AudioTrack {
codec: "opus".into(),
samples,
sample_rate: input_sample_rate,
channels,
asc: Vec::new(),
codec_private: opus_head,
timescale,
durations,
})
}
pub(super) fn extract_mkv_audio(data: &[u8]) -> Option<AudioTrack> {
let cursor = Cursor::new(data);
let mut mkv = MatroskaFile::open(cursor).ok()?;
enum MkvAudioKind {
Aac,
Opus,
Ac3,
Eac3,
}
let (track_number, kind, codec_private_or_empty, sample_rate, channels, default_duration) = {
let track = mkv
.tracks()
.iter()
.find(|t| t.track_type() == MkvTrackType::Audio)?;
let codec_id = track.codec_id();
let kind = match codec_id {
"A_AAC" => MkvAudioKind::Aac,
"A_OPUS" => MkvAudioKind::Opus,
"A_AC3" => MkvAudioKind::Ac3,
"A_EAC3" => MkvAudioKind::Eac3,
other => {
tracing::warn!(
codec = other,
"audio passthrough skipped: only AAC / Opus / AC-3 / E-AC-3 are supported"
);
return None;
}
};
let codec_private = match kind {
MkvAudioKind::Aac => {
let cp = track.codec_private()?.to_vec();
if cp.is_empty() {
return None;
}
cp
}
MkvAudioKind::Opus => {
let mut cp = track.codec_private()?.to_vec();
if cp.is_empty() {
return None;
}
if cp.len() >= 8 && &cp[..8] == b"OpusHead" {
cp.drain(..8);
}
if cp.is_empty() {
return None;
}
cp
}
MkvAudioKind::Ac3 | MkvAudioKind::Eac3 => track
.codec_private()
.map(|p| p.to_vec())
.unwrap_or_default(),
};
let audio = track.audio()?;
let sr = audio.sampling_frequency() as u32;
let ch = audio.channels().get() as u16;
let default_duration = track.default_duration().map(|d| d.get());
(
track.track_number().get(),
kind,
codec_private,
sr,
ch,
default_duration,
)
};
let timescale = match kind {
MkvAudioKind::Aac => sample_rate,
MkvAudioKind::Opus => 48_000,
MkvAudioKind::Ac3 | MkvAudioKind::Eac3 => sample_rate,
};
let default_frame_samples_at_ts = match kind {
MkvAudioKind::Aac => 1024u64,
MkvAudioKind::Opus => 960u64,
MkvAudioKind::Ac3 | MkvAudioKind::Eac3 => 1536u64,
};
let timescale_for_fallback = if timescale == 0 { 48_000 } else { timescale };
let mut samples: Vec<Vec<u8>> = Vec::new();
let mut durations: Vec<u32> = Vec::new();
let mut frame = MkvFrame::default();
loop {
match mkv.next_frame(&mut frame) {
Ok(true) => {
if frame.track == track_number {
let dur_ns = frame.duration.or(default_duration).unwrap_or_else(|| {
1_000_000_000u64 * default_frame_samples_at_ts
/ timescale_for_fallback as u64
});
let dur_ticks =
((dur_ns as u128) * (timescale as u128) / 1_000_000_000) as u32;
durations.push(dur_ticks.max(1));
samples.push(std::mem::take(&mut frame.data));
}
}
Ok(false) => break,
Err(_) => break,
}
}
if samples.is_empty() {
return None;
}
Some(match kind {
MkvAudioKind::Aac => {
let parsed = crate::aac_asc::parse_aac_asc(&codec_private_or_empty);
let aac_channels = parsed
.as_ref()
.map(crate::aac_asc::effective_output_channels)
.unwrap_or(channels);
let aac_sample_rate = parsed
.as_ref()
.and_then(|p| p.sbr_sample_rate.or(Some(p.sample_rate)))
.unwrap_or(sample_rate);
AudioTrack {
codec: "aac".into(),
samples,
sample_rate: aac_sample_rate,
channels: aac_channels,
asc: codec_private_or_empty,
codec_private: Vec::new(),
timescale: aac_sample_rate, durations,
}
}
MkvAudioKind::Opus => AudioTrack {
codec: "opus".into(),
samples,
sample_rate,
channels,
asc: Vec::new(),
codec_private: codec_private_or_empty,
timescale,
durations,
},
MkvAudioKind::Ac3 => {
let dac3 = match samples
.first()
.and_then(|f| crate::ac3_sync::parse_sync_info(f).ok())
{
Some(crate::ac3_sync::SyncInfo::Ac3(s)) => {
crate::mux::dac3_body_from_sync(&s).to_vec()
}
_ => {
tracing::warn!(
"MKV A_AC3: failed to parse first frame sync header — dropping audio"
);
return None;
}
};
let (sr, ch) =
ac3_sample_rate_channels_from_dac3(&dac3).unwrap_or((sample_rate, channels));
AudioTrack {
codec: "ac3".into(),
samples,
sample_rate: sr,
channels: ch,
asc: Vec::new(),
codec_private: dac3,
timescale: sr,
durations,
}
}
MkvAudioKind::Eac3 => {
let (dec3, sr, ch) = match samples
.first()
.and_then(|f| crate::ac3_sync::parse_sync_info(f).ok())
{
Some(crate::ac3_sync::SyncInfo::Eac3(s)) => {
let sr = crate::ac3_sync::eac3_sample_rate_hz(s.fscod, s.fscod2);
let spf = crate::ac3_sync::eac3_samples_per_frame(s.numblkscod) as u64;
let frame_bytes = ((s.frmsiz as u64) + 1) * 2;
let bitrate_kbps = if spf > 0 && sr > 0 {
(frame_bytes * 8 * sr as u64) / spf / 1000
} else {
0
};
let data_rate = bitrate_kbps.div_ceil(2) as u16;
let dec3 = crate::mux::dec3_body_from_sync(&s, data_rate).to_vec();
let ch = crate::ac3_sync::channel_count(s.acmod, s.lfeon);
(dec3, sr, ch)
}
_ => {
tracing::warn!(
"MKV A_EAC3: failed to parse first frame sync header — dropping audio"
);
return None;
}
};
AudioTrack {
codec: "eac3".into(),
samples,
sample_rate: sr,
channels: ch,
asc: Vec::new(),
codec_private: dec3,
timescale: sr,
durations,
}
}
})
}