use bytes::{BufMut, Bytes, BytesMut};
use super::flv::{AudioConfig, VideoConfig};
fn write_box(buf: &mut BytesMut, box_type: &[u8; 4], f: impl FnOnce(&mut BytesMut)) {
let start = buf.len();
buf.put_u32(0); buf.put_slice(box_type);
f(buf);
let size = (buf.len() - start) as u32;
let start_bytes = size.to_be_bytes();
buf[start..start + 4].copy_from_slice(&start_bytes);
}
fn write_full_box(buf: &mut BytesMut, box_type: &[u8; 4], version: u8, flags: u32, f: impl FnOnce(&mut BytesMut)) {
write_box(buf, box_type, |buf| {
buf.put_u8(version);
buf.put_u8((flags >> 16) as u8);
buf.put_u8((flags >> 8) as u8);
buf.put_u8(flags as u8);
f(buf);
});
}
fn write_mpeg4_descriptor(buf: &mut BytesMut, tag: u8, f: impl FnOnce(&mut BytesMut)) {
buf.put_u8(tag);
let size_pos = buf.len();
buf.put_bytes(0, 4); let payload_start = buf.len();
f(buf);
let size = buf.len() - payload_start;
assert!(size < (1 << 28), "MPEG-4 descriptor payload exceeds 28-bit size field");
buf[size_pos] = 0x80 | ((size >> 21) & 0x7F) as u8;
buf[size_pos + 1] = 0x80 | ((size >> 14) & 0x7F) as u8;
buf[size_pos + 2] = 0x80 | ((size >> 7) & 0x7F) as u8;
buf[size_pos + 3] = (size & 0x7F) as u8;
}
pub fn video_init_segment(config: &VideoConfig) -> Bytes {
video_init_segment_with_size(config, 0, 0)
}
pub fn video_init_segment_with_size(config: &VideoConfig, width: u16, height: u16) -> Bytes {
let mut buf = BytesMut::with_capacity(512);
write_box(&mut buf, b"ftyp", |buf| {
buf.put_slice(b"isom"); buf.put_u32(0); buf.put_slice(b"isom");
buf.put_slice(b"iso6");
buf.put_slice(b"msdh");
buf.put_slice(b"msix");
});
write_box(&mut buf, b"moov", |buf| {
write_full_box(buf, b"mvhd", 0, 0, |buf| {
buf.put_u32(0); buf.put_u32(0); buf.put_u32(90000); buf.put_u32(0); buf.put_u32(0x00010000); buf.put_u16(0x0100); buf.put_bytes(0, 10); for &v in &[0x00010000u32, 0, 0, 0, 0x00010000, 0, 0, 0, 0x40000000] {
buf.put_u32(v);
}
buf.put_bytes(0, 24); buf.put_u32(2); });
write_box(buf, b"trak", |buf| {
write_full_box(buf, b"tkhd", 0, 0x03, |buf| {
buf.put_u32(0); buf.put_u32(0); buf.put_u32(1); buf.put_u32(0); buf.put_u32(0); buf.put_bytes(0, 8); buf.put_u16(0); buf.put_u16(0); buf.put_u16(0); buf.put_u16(0); for &v in &[0x00010000u32, 0, 0, 0, 0x00010000, 0, 0, 0, 0x40000000] {
buf.put_u32(v);
}
buf.put_u32((width as u32) << 16); buf.put_u32((height as u32) << 16); });
write_box(buf, b"mdia", |buf| {
write_full_box(buf, b"mdhd", 0, 0, |buf| {
buf.put_u32(0); buf.put_u32(0); buf.put_u32(90000); buf.put_u32(0); buf.put_u32(0x55C40000); });
write_full_box(buf, b"hdlr", 0, 0, |buf| {
buf.put_u32(0); buf.put_slice(b"vide"); buf.put_bytes(0, 12); buf.put_slice(b"LVQR Video\0");
});
write_box(buf, b"minf", |buf| {
write_full_box(buf, b"vmhd", 0, 1, |buf| {
buf.put_u16(0); buf.put_bytes(0, 6); });
write_box(buf, b"dinf", |buf| {
write_full_box(buf, b"dref", 0, 0, |buf| {
buf.put_u32(1); write_full_box(buf, b"url ", 0, 1, |_buf| {
});
});
});
write_box(buf, b"stbl", |buf| {
write_full_box(buf, b"stsd", 0, 0, |buf| {
buf.put_u32(1);
write_box(buf, b"avc1", |buf| {
buf.put_bytes(0, 6); buf.put_u16(1); buf.put_bytes(0, 16); buf.put_u16(width);
buf.put_u16(height);
buf.put_u32(0x00480000); buf.put_u32(0x00480000); buf.put_u32(0); buf.put_u16(1); buf.put_bytes(0, 32); buf.put_u16(0x0018); buf.put_i16(-1);
write_box(buf, b"avcC", |buf| {
buf.put_u8(1); buf.put_u8(config.profile);
buf.put_u8(config.compat);
buf.put_u8(config.level);
buf.put_u8(0xFF); buf.put_u8(0xE0 | (config.sps_list.len() as u8)); for sps in &config.sps_list {
buf.put_u16(sps.len() as u16);
buf.put_slice(sps);
}
buf.put_u8(config.pps_list.len() as u8); for pps in &config.pps_list {
buf.put_u16(pps.len() as u16);
buf.put_slice(pps);
}
});
});
});
write_full_box(buf, b"stts", 0, 0, |buf| buf.put_u32(0));
write_full_box(buf, b"stsc", 0, 0, |buf| buf.put_u32(0));
write_full_box(buf, b"stsz", 0, 0, |buf| {
buf.put_u32(0); buf.put_u32(0); });
write_full_box(buf, b"stco", 0, 0, |buf| buf.put_u32(0));
});
});
});
});
write_box(buf, b"mvex", |buf| {
write_full_box(buf, b"trex", 0, 0, |buf| {
buf.put_u32(1); buf.put_u32(1); buf.put_u32(0); buf.put_u32(0); buf.put_u32(0); });
});
});
buf.freeze()
}
pub fn audio_init_segment(config: &AudioConfig) -> Bytes {
let mut buf = BytesMut::with_capacity(512);
write_box(&mut buf, b"ftyp", |buf| {
buf.put_slice(b"isom");
buf.put_u32(0);
buf.put_slice(b"isom");
buf.put_slice(b"iso6");
buf.put_slice(b"msdh");
buf.put_slice(b"msix");
});
write_box(&mut buf, b"moov", |buf| {
write_full_box(buf, b"mvhd", 0, 0, |buf| {
buf.put_u32(0);
buf.put_u32(0);
buf.put_u32(config.sample_rate);
buf.put_u32(0);
buf.put_u32(0x00010000);
buf.put_u16(0x0100);
buf.put_bytes(0, 10);
for &v in &[0x00010000u32, 0, 0, 0, 0x00010000, 0, 0, 0, 0x40000000] {
buf.put_u32(v);
}
buf.put_bytes(0, 24);
buf.put_u32(2);
});
write_box(buf, b"trak", |buf| {
write_full_box(buf, b"tkhd", 0, 0x03, |buf| {
buf.put_u32(0);
buf.put_u32(0);
buf.put_u32(1);
buf.put_u32(0);
buf.put_u32(0);
buf.put_bytes(0, 8);
buf.put_u16(0);
buf.put_u16(0);
buf.put_u16(0x0100); buf.put_u16(0);
for &v in &[0x00010000u32, 0, 0, 0, 0x00010000, 0, 0, 0, 0x40000000] {
buf.put_u32(v);
}
buf.put_u32(0);
buf.put_u32(0);
});
write_box(buf, b"mdia", |buf| {
write_full_box(buf, b"mdhd", 0, 0, |buf| {
buf.put_u32(0);
buf.put_u32(0);
buf.put_u32(config.sample_rate);
buf.put_u32(0);
buf.put_u32(0x55C40000);
});
write_full_box(buf, b"hdlr", 0, 0, |buf| {
buf.put_u32(0);
buf.put_slice(b"soun");
buf.put_bytes(0, 12);
buf.put_slice(b"LVQR Audio\0");
});
write_box(buf, b"minf", |buf| {
write_full_box(buf, b"smhd", 0, 0, |buf| {
buf.put_u16(0); buf.put_u16(0); });
write_box(buf, b"dinf", |buf| {
write_full_box(buf, b"dref", 0, 0, |buf| {
buf.put_u32(1);
write_full_box(buf, b"url ", 0, 1, |_buf| {});
});
});
write_box(buf, b"stbl", |buf| {
write_full_box(buf, b"stsd", 0, 0, |buf| {
buf.put_u32(1);
write_box(buf, b"mp4a", |buf| {
buf.put_bytes(0, 6); buf.put_u16(1); buf.put_bytes(0, 8); buf.put_u16(config.channels as u16);
buf.put_u16(16); buf.put_u16(0); buf.put_u16(0); buf.put_u32(config.sample_rate << 16);
write_full_box(buf, b"esds", 0, 0, |buf| {
write_mpeg4_descriptor(buf, 0x03, |buf| {
buf.put_u16(1);
buf.put_u8(0);
write_mpeg4_descriptor(buf, 0x04, |buf| {
buf.put_u8(0x40); buf.put_u8(0x15); buf.put_u8(0); buf.put_u16(0);
buf.put_u32(0); buf.put_u32(0);
write_mpeg4_descriptor(buf, 0x05, |buf| {
buf.put_slice(&config.asc);
});
});
write_mpeg4_descriptor(buf, 0x06, |buf| {
buf.put_u8(0x02); });
});
});
});
});
write_full_box(buf, b"stts", 0, 0, |buf| buf.put_u32(0));
write_full_box(buf, b"stsc", 0, 0, |buf| buf.put_u32(0));
write_full_box(buf, b"stsz", 0, 0, |buf| {
buf.put_u32(0);
buf.put_u32(0);
});
write_full_box(buf, b"stco", 0, 0, |buf| buf.put_u32(0));
});
});
});
});
write_box(buf, b"mvex", |buf| {
write_full_box(buf, b"trex", 0, 0, |buf| {
buf.put_u32(1);
buf.put_u32(1);
buf.put_u32(0);
buf.put_u32(0);
buf.put_u32(0);
});
});
});
buf.freeze()
}
pub fn audio_segment(sequence: u32, base_dts: u64, duration: u32, data: &Bytes) -> Bytes {
let mut buf = BytesMut::with_capacity(128 + data.len());
let moof_start = buf.len();
write_box(&mut buf, b"moof", |buf| {
write_full_box(buf, b"mfhd", 0, 0, |buf| {
buf.put_u32(sequence);
});
write_box(buf, b"traf", |buf| {
write_full_box(buf, b"tfhd", 0, 0x020000, |buf| {
buf.put_u32(1);
});
write_full_box(buf, b"tfdt", 1, 0, |buf| {
buf.put_u64(base_dts);
});
let trun_flags: u32 = 0x000001 | 0x000100 | 0x000200;
write_full_box(buf, b"trun", 0, trun_flags, |buf| {
buf.put_u32(1); buf.put_i32(0); buf.put_u32(duration);
buf.put_u32(data.len() as u32);
});
});
});
let moof_size = buf.len() - moof_start;
let data_offset = (moof_size + 8) as i32;
patch_trun_data_offset(&mut buf, moof_start, data_offset);
write_box(&mut buf, b"mdat", |buf| {
buf.put_slice(data);
});
buf.freeze()
}
fn patch_trun_data_offset(buf: &mut BytesMut, moof_start: usize, data_offset: i32) {
let mut pos = moof_start + 8; while pos + 8 <= buf.len() {
let box_size = u32::from_be_bytes([buf[pos], buf[pos + 1], buf[pos + 2], buf[pos + 3]]) as usize;
let box_type = &buf[pos + 4..pos + 8];
if box_type == b"traf" {
let traf_end = pos + box_size;
let mut inner = pos + 8;
while inner + 8 <= traf_end {
let inner_size =
u32::from_be_bytes([buf[inner], buf[inner + 1], buf[inner + 2], buf[inner + 3]]) as usize;
let inner_type = &buf[inner + 4..inner + 8];
if inner_type == b"trun" {
let offset_pos = inner + 8 + 4 + 4; let bytes = data_offset.to_be_bytes();
buf[offset_pos..offset_pos + 4].copy_from_slice(&bytes);
return;
}
inner += inner_size;
}
}
pos += box_size;
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::remux::flv;
fn read_box_at(data: &[u8], offset: usize) -> Option<(usize, &[u8; 4], &[u8])> {
if offset + 8 > data.len() {
return None;
}
let size = u32::from_be_bytes([data[offset], data[offset + 1], data[offset + 2], data[offset + 3]]) as usize;
if offset + size > data.len() || size < 8 {
return None;
}
let box_type: &[u8; 4] = data[offset + 4..offset + 8].try_into().ok()?;
let payload = &data[offset + 8..offset + size];
Some((size, box_type, payload))
}
fn find_box<'a>(data: &'a [u8], target: &[u8; 4]) -> Option<(usize, &'a [u8])> {
let mut pos = 0;
while let Some((size, box_type, payload)) = read_box_at(data, pos) {
if box_type == target {
return Some((pos, payload));
}
pos += size;
}
None
}
fn test_video_config() -> flv::VideoConfig {
flv::VideoConfig {
sps_list: vec![vec![0x67, 0x64, 0x00, 0x1F, 0xAC, 0xD9]],
pps_list: vec![vec![0x68, 0xEE, 0x3C, 0x80]],
profile: 0x64,
compat: 0x00,
level: 0x1F,
nalu_length_size: 4,
}
}
fn test_audio_config() -> flv::AudioConfig {
flv::AudioConfig {
asc: vec![0x12, 0x10], sample_rate: 44100,
channels: 2,
object_type: 2,
}
}
#[test]
fn video_init_starts_with_ftyp() {
let init = video_init_segment(&test_video_config());
assert!(init.len() > 8);
assert_eq!(&init[4..8], b"ftyp");
}
#[test]
fn video_init_contains_moov() {
let init = video_init_segment(&test_video_config());
assert!(find_box(&init, b"moov").is_some());
}
#[test]
fn video_init_contains_avcc_with_sps_pps() {
let config = test_video_config();
let init = video_init_segment(&config);
let init_bytes = &init[..];
let avcc_needle = b"avcC";
let pos = init_bytes
.windows(4)
.position(|w| w == avcc_needle)
.expect("avcC box not found");
let avcc_start = pos + 4; assert_eq!(init_bytes[avcc_start], 1); assert_eq!(init_bytes[avcc_start + 1], config.profile);
assert_eq!(init_bytes[avcc_start + 2], config.compat);
assert_eq!(init_bytes[avcc_start + 3], config.level);
}
#[test]
fn audio_init_starts_with_ftyp() {
let init = audio_init_segment(&test_audio_config());
assert_eq!(&init[4..8], b"ftyp");
}
#[test]
fn audio_init_contains_esds() {
let init = audio_init_segment(&test_audio_config());
let init_bytes = &init[..];
let esds_pos = init_bytes.windows(4).position(|w| w == b"esds");
assert!(esds_pos.is_some(), "esds box not found in audio init");
}
#[test]
fn mpeg4_descriptor_length_encoding_round_trips_large_payloads() {
let payload: Vec<u8> = (0..200u8).collect();
let mut buf = BytesMut::new();
write_mpeg4_descriptor(&mut buf, 0x05, |inner| {
inner.put_slice(&payload);
});
assert_eq!(buf.len(), 1 + 4 + payload.len());
assert_eq!(buf[0], 0x05, "tag byte");
assert_eq!(buf[1], 0x80);
assert_eq!(buf[2], 0x80);
assert_eq!(buf[3], 0x81);
assert_eq!(buf[4], 0x48);
assert_eq!(&buf[5..], payload.as_slice());
}
#[test]
fn audio_segment_structure() {
let data = Bytes::from(vec![0x01, 0x02, 0x03, 0x04]);
let seg = audio_segment(1, 0, 1024, &data);
assert_eq!(&seg[4..8], b"moof");
let moof_size = u32::from_be_bytes([seg[0], seg[1], seg[2], seg[3]]) as usize;
assert_eq!(&seg[moof_size + 4..moof_size + 8], b"mdat");
}
}