#![warn(missing_docs)]
extern crate self as mp4san;
#[macro_use]
extern crate mediasan_common;
pub mod error;
pub mod parse;
mod util;
use std::io::Read;
use std::pin::Pin;
use derive_builder::Builder;
use derive_more::Display;
use futures_util::io::BufReader;
use futures_util::{pin_mut, AsyncBufReadExt, AsyncRead};
use mediasan_common::sync;
use mediasan_common::util::{checked_add_signed, IoResultExt};
use mediasan_common::AsyncSkipExt;
use crate::error::Report;
use crate::parse::error::{MultipleBoxes, WhileParsingBox};
use crate::parse::{BoxHeader, BoxType, FourCC, FtypBox, MoovBox, Mp4Box, Mp4Value, ParseError, StblCoMut};
pub use crate::error::Error;
#[derive(Builder, Clone)]
#[builder(build_fn(name = "try_build"))]
pub struct Config {
#[builder(default = "1024 * 1024 * 1024")]
pub max_metadata_size: u64,
#[builder(default = None)]
pub cumulative_mdat_box_size: Option<u32>,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct SanitizedMetadata {
pub metadata: Option<Vec<u8>>,
pub data: InputSpan,
}
pub use mediasan_common::{AsyncSkip, InputSpan, SeekSkipAdapter, Skip};
pub const COMPATIBLE_BRAND: FourCC = FourCC { value: *b"isom" };
#[derive(Clone, Copy, Debug, Display)]
#[display(fmt = "box data too large: {} > {}", _0, _1)]
struct BoxDataTooLarge(u64, u64);
const MAX_FTYP_SIZE: u64 = 1024;
pub fn sanitize<R: Read + Skip + Unpin>(input: R) -> Result<SanitizedMetadata, Error> {
sync::sanitize(input, sanitize_async)
}
pub fn sanitize_with_config<R: Read + Skip + Unpin>(input: R, config: Config) -> Result<SanitizedMetadata, Error> {
sync::sanitize(input, |input| sanitize_async_with_config(input, config))
}
pub async fn sanitize_async<R: AsyncRead + AsyncSkip>(input: R) -> Result<SanitizedMetadata, Error> {
sanitize_async_with_config(input, Config::default()).await
}
pub async fn sanitize_async_with_config<R: AsyncRead + AsyncSkip>(
input: R,
config: Config,
) -> Result<SanitizedMetadata, Error> {
let reader = BufReader::with_capacity(BoxHeader::MAX_SIZE as usize, input);
pin_mut!(reader);
let mut ftyp: Option<Mp4Box<FtypBox>> = None;
let mut moov: Option<Mp4Box<MoovBox>> = None;
let mut data: Option<InputSpan> = None;
let mut moov_offset = None;
while !reader.as_mut().fill_buf().await?.is_empty() {
let start_pos = reader.as_mut().stream_position().await?;
let mut header = BoxHeader::read(&mut reader)
.await
.map_eof(|_| Error::Parse(report_attach!(ParseError::TruncatedBox, "while parsing box header")))?;
match header.box_type() {
name @ (BoxType::FREE | BoxType::SKIP) => {
let box_size = skip_box(reader.as_mut(), &header).await? + header.encoded_len();
log::info!("{name} @ 0x{start_pos:08x}: {box_size} bytes");
if let Some(data) = &mut data {
if data.offset + data.len == start_pos {
data.len += box_size;
}
}
}
BoxType::FTYP => {
ensure_attach!(
ftyp.is_none(),
ParseError::InvalidBoxLayout,
MultipleBoxes(BoxType::FTYP)
);
let mut read_ftyp = Mp4Box::read_data(reader.as_mut(), header, MAX_FTYP_SIZE).await?;
let ftyp_data: &mut FtypBox = read_ftyp.data.parse()?;
let compatible_brand_count = ftyp_data.compatible_brands().len();
let FtypBox { major_brand, minor_version, .. } = ftyp_data;
log::info!("ftyp @ 0x{start_pos:08x}: {major_brand} version {minor_version}, {compatible_brand_count} compatible brands");
ensure_attach!(
ftyp_data.compatible_brands().any(|b| b == COMPATIBLE_BRAND),
ParseError::UnsupportedFormat(ftyp_data.major_brand)
);
ftyp = Some(read_ftyp);
}
_ if ftyp.is_none() => {
bail_attach!(ParseError::InvalidBoxLayout, "ftyp is not the first significant box");
}
BoxType::MDAT => {
if let Ok(None) = header.box_data_size() {
if let Some(t) = config.cumulative_mdat_box_size {
header.overwrite_size(t);
}
}
let box_size = skip_box(reader.as_mut(), &header).await? + header.encoded_len();
log::info!("mdat @ 0x{start_pos:08x}: {box_size} bytes");
if let Some(data) = &mut data {
ensure_attach!(
data.offset + data.len == start_pos,
ParseError::UnsupportedBoxLayout,
"discontiguous mdat boxes",
);
data.len += box_size;
} else {
data = Some(InputSpan { offset: start_pos, len: box_size });
}
}
BoxType::MOOV => {
let mut read_moov = Mp4Box::read_data(reader.as_mut(), header, config.max_metadata_size).await?;
let moov_data: &mut MoovBox = read_moov.data.parse()?;
let trak_chunk_counts = moov_data
.traks()
.map(|trak| Ok::<_, Report<_>>(trak?.co_mut()?.entry_count()));
let chunk_count = trak_chunk_counts.reduce(|a, b| Ok(a? + b?)).unwrap_or(Ok(0))?;
let trak_count = moov_data.traks().count();
log::info!("moov @ 0x{start_pos:08x}: {trak_count} traks {chunk_count} chunks");
moov = Some(read_moov);
moov_offset = Some(start_pos);
}
name @ (BoxType::META | BoxType::MECO) => {
let box_size = skip_box(reader.as_mut(), &header).await? + header.encoded_len();
log::info!("{name} @ 0x{start_pos:08x}: {box_size} bytes");
if let Some(data) = &mut data {
if data.offset + data.len == start_pos {
data.len += box_size;
}
}
}
name => {
let box_size = skip_box(reader.as_mut(), &header).await? + header.encoded_len();
log::info!("{name} @ 0x{start_pos:08x}: {box_size} bytes");
bail_attach!(ParseError::UnsupportedBox(name));
}
}
}
let Some(ftyp) = ftyp else {
bail_attach!(ParseError::MissingRequiredBox(BoxType::FTYP));
};
let (Some(moov), Some(moov_offset)) = (moov, moov_offset) else {
bail_attach!(ParseError::MissingRequiredBox(BoxType::MOOV));
};
let Some(data) = data else {
bail_attach!(ParseError::MissingRequiredBox(BoxType::MDAT));
};
if moov_offset < data.offset {
log::info!("metadata: nothing to sanitize");
return Ok(SanitizedMetadata { metadata: None, data });
}
let ftyp = Mp4Box::with_data(ftyp.data)?;
let mut moov = Mp4Box::with_data(moov.data)?;
let metadata_len = ftyp.encoded_len() + moov.encoded_len();
let mut pad_size = 0;
const PAD_HEADER_SIZE: u64 = BoxHeader::with_u32_data_size(BoxType::FREE, 0).encoded_len();
const MAX_PAD_SIZE: u64 = u32::MAX as u64 - PAD_HEADER_SIZE;
match data.offset.checked_sub(metadata_len) {
Some(0) => {
log::info!("metadata: 0x{metadata_len:08x} bytes");
}
Some(size @ PAD_HEADER_SIZE..=MAX_PAD_SIZE) => {
pad_size = size;
log::info!("metadata: 0x{metadata_len:08x} bytes; adding padding of 0x{pad_size:08x} bytes");
}
mdat_backward_displacement => {
let mdat_displacement = match mdat_backward_displacement {
Some(mdat_backward_displacement) => {
mdat_backward_displacement.try_into().ok().and_then(i32::checked_neg)
}
None => metadata_len.checked_sub(data.offset).unwrap().try_into().ok(),
};
let mdat_displacement: i32 = mdat_displacement
.ok_or_else(|| report_attach!(ParseError::UnsupportedBoxLayout, "mdat displaced too far"))?;
log::info!("metadata: 0x{metadata_len:08x} bytes; displacing chunk offsets by 0x{mdat_displacement:08x}");
for trak in &mut moov.data.parse()?.traks() {
let co = trak?.co_mut()?;
if let StblCoMut::Stco(stco) = co {
for mut entry in &mut stco.entries_mut() {
let value = entry.get().unwrap_or_else(|_| unreachable!());
entry.set(
checked_add_signed(value, mdat_displacement).ok_or_else(|| {
report_attach!(ParseError::InvalidInput, "chunk offset not within mdat")
})?,
);
}
} else if let StblCoMut::Co64(co64) = co {
for mut entry in &mut co64.entries_mut() {
let value = entry.get().unwrap_or_else(|_| unreachable!());
entry.set(
checked_add_signed(value, mdat_displacement.into()).ok_or_else(|| {
report_attach!(ParseError::InvalidInput, "chunk offset not within mdat")
})?,
);
}
}
}
}
}
let mut metadata = Vec::with_capacity((metadata_len + pad_size) as usize);
ftyp.put_buf(&mut metadata);
moov.put_buf(&mut metadata);
if pad_size != 0 {
let pad_header = BoxHeader::with_u32_data_size(BoxType::FREE, (pad_size - PAD_HEADER_SIZE) as u32);
pad_header.put_buf(&mut metadata);
metadata.resize((metadata_len + pad_size) as usize, 0);
}
Ok(SanitizedMetadata { metadata: Some(metadata), data })
}
impl Config {
pub fn builder() -> ConfigBuilder {
ConfigBuilder::default()
}
}
impl Default for Config {
fn default() -> Self {
Self::builder().build()
}
}
impl ConfigBuilder {
pub fn build(&self) -> Config {
self.try_build().unwrap()
}
}
async fn skip_box<R: AsyncRead + AsyncSkip>(
mut reader: Pin<&mut BufReader<R>>,
header: &BoxHeader,
) -> Result<u64, Error> {
let box_data_size = match header.box_data_size()? {
Some(box_size) => box_size,
None => reader.as_mut().stream_len().await? - reader.as_mut().stream_position().await?,
};
reader.skip(box_data_size).await.map_eof(|_| {
Error::Parse(report_attach!(
ParseError::TruncatedBox,
WhileParsingBox(header.box_type())
))
})?;
Ok(box_data_size)
}
#[cfg(doctest)]
#[doc = include_str!("../README.md")]
pub mod readme {}
#[cfg(test)]
mod test {
use std::io;
use assert_matches::assert_matches;
use crate::parse::box_type::{CO64, FREE, FTYP, MDAT, MDIA, MECO, META, MINF, MOOV, SKIP, STBL, STCO, TRAK};
use crate::util::test::{
init_logger, sanitized_data, test_ftyp, test_moov, test_mp4, write_test_mdat, ISOM, MP41, MP42, TEST_UUID,
};
use super::*;
#[test]
fn until_eof_sized_moov() {
init_logger();
let mut data = vec![];
let mut metadata = vec![];
test_ftyp().build().put_buf(&mut data);
test_ftyp().build().put_buf(&mut metadata);
let mdat = write_test_mdat(&mut data, b"abcdefg");
let moov_pos = data.len();
test_moov().build().put_buf(&mut data);
test_moov().build().put_buf(&mut metadata);
BoxHeader::until_eof(MOOV).put_buf(&mut &mut data[moov_pos..]);
let sanitized = sanitize(io::Cursor::new(&data)).unwrap();
assert_eq!(sanitized.data, mdat);
assert_eq!(sanitized.metadata, Some(metadata));
sanitize(io::Cursor::new(sanitized_data(sanitized, &data))).unwrap();
}
#[test]
fn until_eof_sized_mdat() {
let test = test_mp4()
.boxes(&[FTYP, MOOV, MDAT][..])
.mdat_data(&b"abcdefg"[..])
.mdat_data_until_eof()
.build();
test.sanitize_ok_noop();
}
#[test]
fn skip() {
test_mp4().mdat_data(&b"abcdefg"[..]).build().sanitize_ok();
}
#[test]
fn max_input_length() {
let mut test = test_mp4().boxes(&[FTYP, MOOV, MDAT][..]).mdat_data(vec![]).clone();
let test_data_len = test.mdat_data_len(u64::MAX - 16).build().data.len() as u64;
let test = test.mdat_data_len(u64::MAX - test_data_len).build();
let sanitized = sanitize(test.clone()).unwrap();
assert_eq!(sanitized.data, test.mdat);
assert_eq!(sanitized.data.offset + sanitized.data.len, u64::MAX);
assert_eq!(sanitized.metadata, None);
}
#[test]
fn input_length_overflow() {
let mut test = test_mp4().mdat_data(vec![]).clone();
let test_data_len = test.mdat_data_len(u64::MAX - 16).build().data.len() as u64;
let test = test.mdat_data_len(u64::MAX - test_data_len + 1).build();
sanitize(test).unwrap_err();
}
#[test]
fn box_size_overflow() {
let test = test_mp4().mdat_data_len(u64::MAX - 16).build();
sanitize(test).unwrap_err();
}
#[test]
fn ftyp_too_large() {
let mut compatible_brands = vec![];
while compatible_brands.len() * COMPATIBLE_BRAND.value.len() < MAX_FTYP_SIZE as usize {
compatible_brands.push(COMPATIBLE_BRAND);
}
let test = test_mp4()
.ftyp(test_ftyp().compatible_brands(compatible_brands).clone())
.build();
assert_matches!(sanitize(test).unwrap_err(), Error::Parse(err) => {
assert_matches!(err.into_inner(), ParseError::InvalidInput);
});
}
#[test]
fn max_moov_size() {
let test_spec = test_mp4().build_spec().unwrap();
let config = Config::builder()
.max_metadata_size(test_spec.moov().build().encoded_len())
.build();
test_spec.build().sanitize_ok_with_config(config);
}
#[test]
fn moov_too_large() {
let test_spec = test_mp4().build_spec().unwrap();
let config = Config::builder()
.max_metadata_size(test_spec.moov().build().data.encoded_len() - 1)
.build();
let test = test_spec.build();
test.sanitize_ok();
assert_matches!(sanitize_with_config(test, config).unwrap_err(), Error::Parse(err) => {
assert_matches!(err.into_inner(), ParseError::InvalidInput);
});
}
#[test]
fn mdat_after_moov() {
test_mp4().boxes(&[FTYP, MOOV, MDAT][..]).build().sanitize_ok_noop();
}
#[test]
fn no_ftyp() {
let test = test_mp4().boxes(&[MOOV, MDAT][..]).build();
assert_matches!(sanitize(test).unwrap_err(), Error::Parse(err) => {
assert_matches!(err.into_inner(), ParseError::InvalidBoxLayout);
});
}
#[test]
fn multiple_ftyp() {
let test = test_mp4().boxes(&[FTYP, FTYP, MOOV, MDAT][..]).build();
assert_matches!(sanitize(test).unwrap_err(), Error::Parse(err) => {
assert_matches!(err.into_inner(), ParseError::InvalidBoxLayout);
});
}
#[test]
fn ftyp_not_first_box() {
let test = test_mp4().boxes(&[FREE, FREE, FTYP, MDAT, MOOV][..]).build();
test.sanitize_ok();
}
#[test]
fn ftyp_not_first_significant_box() {
let test = test_mp4().boxes(&[MOOV, FTYP, MDAT][..]).build();
assert_matches!(sanitize(test).unwrap_err(), Error::Parse(err) => {
assert_matches!(err.into_inner(), ParseError::InvalidBoxLayout);
});
}
#[test]
fn no_moov() {
let test = test_mp4().boxes(&[FTYP, MDAT][..]).build();
assert_matches!(sanitize(test).unwrap_err(), Error::Parse(err) => {
assert_matches!(err.into_inner(), ParseError::MissingRequiredBox(MOOV));
});
}
#[test]
fn no_mdat() {
let test = test_mp4().boxes(&[FTYP, MOOV][..]).build();
assert_matches!(sanitize(test).unwrap_err(), Error::Parse(err) => {
assert_matches!(err.into_inner(), ParseError::MissingRequiredBox(MDAT));
});
}
#[test]
fn free_boxes_in_metadata() {
let test = test_mp4().boxes(&[FTYP, FREE, SKIP, MDAT, MOOV, FREE][..]).build();
test.sanitize_ok();
}
#[test]
fn free_boxes_after_mdat() {
let test = test_mp4().boxes(&[FTYP, MDAT, SKIP, FREE, MOOV][..]).build();
test.sanitize_ok();
}
#[test]
fn meta_boxes_in_metadata() {
let test = test_mp4().boxes(&[FTYP, MDAT, MOOV, META, MECO][..]).build();
test.sanitize_ok();
}
#[test]
fn meta_boxes_after_mdat() {
let test = test_mp4().boxes(&[FTYP, MDAT, META, MDAT, MECO, MOOV][..]).build();
test.sanitize_ok();
}
#[test]
fn multiple_mdat() {
test_mp4()
.boxes(&[FTYP, MDAT, FREE, MDAT, MDAT, FREE, MOOV][..])
.build()
.sanitize_ok();
}
#[test]
fn uuid() {
let test = test_mp4().boxes(&[FTYP, MOOV, TEST_UUID, MDAT][..]).build();
assert_matches!(sanitize(test).unwrap_err(), Error::Parse(err) => {
assert_matches!(err.into_inner(), ParseError::UnsupportedBox(TEST_UUID));
});
}
#[test]
fn mp41() {
let test = test_mp4()
.ftyp(test_ftyp().major_brand(MP41).add_compatible_brand(MP41).clone())
.build();
assert_matches!(sanitize(test).unwrap_err(), Error::Parse(err) => {
assert_matches!(err.into_inner(), ParseError::UnsupportedFormat(MP41));
});
}
#[test]
fn mp42() {
let ftyp = test_ftyp()
.major_brand(MP42)
.compatible_brands(vec![MP42, ISOM])
.clone();
let test = test_mp4().ftyp(ftyp).build();
test.sanitize_ok();
}
#[test]
fn no_compatible_brands() {
let test = test_mp4()
.ftyp(test_ftyp().major_brand(ISOM).compatible_brands(vec![]).clone())
.build();
assert_matches!(sanitize(test).unwrap_err(), Error::Parse(err) => {
assert_matches!(err.into_inner(), ParseError::UnsupportedFormat(ISOM));
});
}
#[test]
fn no_trak() {
let test = test_mp4().moov(test_moov().trak(false).clone()).build();
assert_matches!(sanitize(test).unwrap_err(), Error::Parse(err) => {
assert_matches!(err.into_inner(), ParseError::MissingRequiredBox(TRAK));
});
}
#[test]
fn no_mdia() {
let test = test_mp4()
.boxes(&[FTYP, MDAT, MOOV][..])
.moov(test_moov().mdia(false).clone())
.build();
assert_matches!(sanitize(test).unwrap_err(), Error::Parse(err) => {
assert_matches!(err.into_inner(), ParseError::MissingRequiredBox(MDIA));
});
}
#[test]
fn no_minf() {
let test = test_mp4()
.boxes(&[FTYP, MDAT, MOOV][..])
.moov(test_moov().minf(false).clone())
.build();
assert_matches!(sanitize(test).unwrap_err(), Error::Parse(err) => {
assert_matches!(err.into_inner(), ParseError::MissingRequiredBox(MINF));
});
}
#[test]
fn no_stbl() {
let test = test_mp4()
.boxes(&[FTYP, MDAT, MOOV][..])
.moov(test_moov().stbl(false).clone())
.build();
assert_matches!(sanitize(test).unwrap_err(), Error::Parse(err) => {
assert_matches!(err.into_inner(), ParseError::MissingRequiredBox(STBL));
});
}
#[test]
fn no_stco() {
let test = test_mp4()
.boxes(&[FTYP, MDAT, MOOV][..])
.moov(test_moov().stco(false).clone())
.build();
assert_matches!(sanitize(test).unwrap_err(), Error::Parse(err) => {
assert_matches!(err.into_inner(), ParseError::MissingRequiredBox(STCO | CO64));
});
}
#[test]
fn co64() {
test_mp4()
.boxes(&[FTYP, MDAT, MOOV][..])
.moov(test_moov().stco(false).co64(true).clone())
.build()
.sanitize_ok();
}
#[test]
fn stco_and_co64() {
let test = test_mp4()
.boxes(&[FTYP, MDAT, MOOV][..])
.moov(test_moov().co64(true).clone())
.build();
assert_matches!(sanitize(test).unwrap_err(), Error::Parse(err) => {
assert_matches!(err.into_inner(), ParseError::InvalidBoxLayout);
});
}
#[test]
fn cumulative_mdat_box_size() {
let test_spec = test_mp4().mdat_data_until_eof().build_spec().unwrap();
let test_1 = test_spec.build();
let mdat_box_length = test_1.mdat.len as u32;
let config_bad = Config::builder().build();
assert_matches!(sanitize_with_config(test_1, config_bad).unwrap_err(), Error::Parse(err) => {
assert_matches!(err.into_inner(), ParseError::MissingRequiredBox(_));
});
let test_2 = test_spec.build();
let config_good = Config::builder()
.cumulative_mdat_box_size(Some(mdat_box_length))
.build();
test_2.sanitize_ok_with_config(config_good);
}
}