use crate::aac::AacFile;
use crate::ape::ApeFile;
use crate::config::{ParseOptions, global_options};
use crate::error::Result;
use crate::file::{AudioFile, BoundTaggedFile, FileType, FileTypeGuessResult, TaggedFile};
use crate::flac::FlacFile;
use crate::iff::aiff::AiffFile;
use crate::iff::wav::WavFile;
use crate::macros::err;
use crate::mp4::Mp4File;
use crate::mpeg::MpegFile;
use crate::mpeg::header::search_for_frame_sync;
use crate::musepack::MpcFile;
use crate::ogg::opus::OpusFile;
use crate::ogg::speex::SpeexFile;
use crate::ogg::vorbis::VorbisFile;
use crate::resolve::custom_resolvers;
use crate::wavpack::WavPackFile;
use crate::io::FileLike;
use std::fs::File;
use std::io::{BufReader, Cursor, Read, Seek, SeekFrom};
use std::path::Path;
pub struct Probe<R: Read> {
inner: R,
options: Option<ParseOptions>,
f_ty: Option<FileType>,
}
impl<R: Read> Probe<R> {
#[must_use]
pub const fn new(reader: R) -> Self {
Self {
inner: reader,
options: None,
f_ty: None,
}
}
pub fn with_file_type(reader: R, file_type: FileType) -> Self {
Self {
inner: reader,
options: None,
f_ty: Some(file_type),
}
}
pub fn file_type(&self) -> Option<FileType> {
self.f_ty
}
pub fn set_file_type(mut self, file_type: FileType) -> Self {
self.f_ty = Some(file_type);
self
}
#[must_use]
pub fn options(mut self, options: ParseOptions) -> Self {
self.options = Some(options);
self
}
pub fn into_inner(self) -> R {
self.inner
}
}
impl Probe<BufReader<File>> {
pub fn open<P>(path: P) -> Result<Self>
where
P: AsRef<Path>,
{
let path = path.as_ref();
log::debug!("Probe: Opening `{}` for reading", path.display());
let file_type = FileType::from_path(path);
log::debug!("Probe: Guessed file type `{:?}` from extension", file_type);
Ok(Self {
inner: BufReader::new(File::open(path)?),
options: None,
f_ty: file_type,
})
}
}
impl<R: Read + Seek> Probe<R> {
pub fn guess_file_type(mut self) -> std::io::Result<Self> {
let max_junk_bytes = self
.options
.map_or(ParseOptions::DEFAULT_MAX_JUNK_BYTES, |options| {
options.max_junk_bytes
});
let f_ty = self.guess_inner(max_junk_bytes)?;
self.f_ty = f_ty.or(self.f_ty);
log::debug!("Probe: Guessed file type: {:?}", self.f_ty);
Ok(self)
}
#[allow(clippy::shadow_unrelated)]
fn guess_inner(&mut self, max_junk_bytes: usize) -> std::io::Result<Option<FileType>> {
let mut buf = [0; 36];
let starting_position = self.inner.stream_position()?;
let buf_len = std::io::copy(
&mut self.inner.by_ref().take(buf.len() as u64),
&mut Cursor::new(&mut buf[..]),
)? as usize;
self.inner.seek(SeekFrom::Start(starting_position))?;
if unsafe { global_options().use_custom_resolvers } {
if let Ok(lock) = custom_resolvers().lock() {
#[allow(clippy::significant_drop_in_scrutinee)]
for (_, resolve) in lock.iter() {
if let ret @ Some(_) = resolve.guess(&buf[..buf_len]) {
return Ok(ret);
}
}
}
}
let Some(file_type_guess) = FileType::from_buffer_inner(&buf[..buf_len]) else {
return Ok(None);
};
match file_type_guess {
FileTypeGuessResult::Determined(file_ty) => Ok(Some(file_ty)),
FileTypeGuessResult::MaybePrecededById3(id3_len) => {
log::debug!("Probe: ID3v2 tag detected, skipping {} bytes", 10 + id3_len);
let position_after_id3_block = self
.inner
.seek(SeekFrom::Current(i64::from(10 + id3_len)))?;
let mut ident = [0; 4];
std::io::copy(
&mut self.inner.by_ref().take(ident.len() as u64),
&mut Cursor::new(&mut ident[..]),
)?;
self.inner.seek(SeekFrom::Start(position_after_id3_block))?;
let file_type_after_id3_block = match &ident {
[b'M', b'A', b'C', ..] => Ok(Some(FileType::Ape)),
b"fLaC" => Ok(Some(FileType::Flac)),
b"MPCK" | [b'M', b'P', b'+', ..] => Ok(Some(FileType::Mpc)),
_ => self.check_mpeg_or_aac(max_junk_bytes),
};
self.inner.seek(SeekFrom::Start(starting_position))?;
file_type_after_id3_block
},
FileTypeGuessResult::MaybePrecededByJunk => {
log::debug!(
"Probe: Possible junk bytes detected, searching up to {} bytes",
max_junk_bytes
);
let ret = self.check_mpeg_or_aac(max_junk_bytes);
self.inner.seek(SeekFrom::Start(starting_position))?;
ret
},
}
}
fn check_mpeg_or_aac(&mut self, max_junk_bytes: usize) -> std::io::Result<Option<FileType>> {
{
let mut restricted_reader = self.inner.by_ref().take(max_junk_bytes as u64);
if search_for_frame_sync(&mut restricted_reader)?.is_none() {
return Ok(None);
}
}
let sync_pos = self.inner.seek(SeekFrom::Current(-2))?;
log::debug!("Probe: Found possible frame sync at position {}", sync_pos);
let mut buf = [0; 2];
self.inner.read_exact(&mut buf)?;
if buf[1] & 0b10000 > 0 && buf[1] & 0b110 == 0 {
Ok(Some(FileType::Aac))
} else {
Ok(Some(FileType::Mpeg))
}
}
pub fn read(self) -> Result<TaggedFile> {
self.read_inner().map(|(tagged_file, _)| tagged_file)
}
fn read_inner(mut self) -> Result<(TaggedFile, R)> {
let reader = &mut self.inner;
let options = self.options.unwrap_or_default();
if !options.read_tags && !options.read_properties {
log::warn!("Skipping both tag and property reading, file will be empty");
}
let tagged_file = match self.f_ty {
Some(f_type) => match f_type {
FileType::Aac => AacFile::read_from(reader, options)?.into(),
FileType::Aiff => AiffFile::read_from(reader, options)?.into(),
FileType::Ape => ApeFile::read_from(reader, options)?.into(),
FileType::Flac => FlacFile::read_from(reader, options)?.into(),
FileType::Mpeg => MpegFile::read_from(reader, options)?.into(),
FileType::Opus => OpusFile::read_from(reader, options)?.into(),
FileType::Vorbis => VorbisFile::read_from(reader, options)?.into(),
FileType::Wav => WavFile::read_from(reader, options)?.into(),
FileType::Mp4 => Mp4File::read_from(reader, options)?.into(),
FileType::Mpc => MpcFile::read_from(reader, options)?.into(),
FileType::Speex => SpeexFile::read_from(reader, options)?.into(),
FileType::WavPack => WavPackFile::read_from(reader, options)?.into(),
FileType::Custom(c) => {
if !unsafe { global_options().use_custom_resolvers } {
err!(UnknownFormat)
}
let resolver = crate::resolve::lookup_resolver(c);
resolver.read_from(reader, options)?
},
},
None => err!(UnknownFormat),
};
Ok((tagged_file, self.inner))
}
}
impl<F: FileLike> Probe<F> {
pub fn read_bound(self) -> Result<BoundTaggedFile<F>> {
let (tagged_file, file_handle) = self.read_inner()?;
Ok(BoundTaggedFile {
inner: tagged_file,
file_handle,
})
}
}
pub fn read_from(file: &mut File) -> Result<TaggedFile> {
Probe::new(BufReader::new(file)).guess_file_type()?.read()
}
pub fn read_from_path<P>(path: P) -> Result<TaggedFile>
where
P: AsRef<Path>,
{
Probe::open(path)?.read()
}
#[cfg(test)]
mod tests {
use crate::config::{GlobalOptions, ParseOptions};
use crate::file::FileType;
use crate::probe::Probe;
use std::fs::File;
#[test_log::test]
fn mp3_id3v2_trailing_junk() {
let data: [&[u8]; 4] = [
&[0x49, 0x44, 0x33, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23],
&[
0x54, 0x41, 0x4C, 0x42, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x01, 0xFF, 0xFE, 0x61,
0x00, 0x61, 0x00, 0x61, 0x00, 0x61, 0x00, 0x61, 0x00, 0x61, 0x00, 0x61, 0x00, 0x61,
0x00, 0x61, 0x00, 0x61, 0x00, 0x61, 0x00,
],
&[0x20, 0x20, 0x20, 0x20],
&[
0xFF, 0xFB, 0x50, 0xC4, 0x00, 0x03, 0xC0, 0x00, 0x01, 0xA4, 0x00, 0x00, 0x00, 0x20,
0x00, 0x00, 0x34, 0x80, 0x00, 0x00, 0x04,
],
];
let data: Vec<u8> = data.into_iter().flatten().copied().collect();
let data = std::io::Cursor::new(&data);
let probe = Probe::new(data).guess_file_type().unwrap();
assert_eq!(probe.file_type(), Some(FileType::Mpeg));
}
#[test_log::test]
fn parse_options_allocation_limit() {
use crate::id3::v2::util::synchsafe::SynchsafeInteger;
fn create_encrypted_frame(size: usize) -> Vec<u8> {
let flag_data = vec![0; 5];
let bytes = vec![0; size];
let frame_length_synch = ((bytes.len() + flag_data.len()) as u32)
.synch()
.unwrap()
.to_be_bytes();
let frame_header = vec![
b'S',
b'M',
b'T',
b'H',
frame_length_synch[0],
frame_length_synch[1],
frame_length_synch[2],
frame_length_synch[3],
0x00,
0b0000_0101, ];
[frame_header, flag_data, bytes].concat()
}
fn create_fake_mp3(frame_size: u32) -> Vec<u8> {
let id3v2_tag_length = (frame_size + 5 + 10).synch().unwrap().to_be_bytes();
[
vec![
0x49,
0x44,
0x33,
0x04,
0x00,
0x00,
id3v2_tag_length[0],
id3v2_tag_length[1],
id3v2_tag_length[2],
id3v2_tag_length[3],
],
create_encrypted_frame(frame_size as usize),
vec![
0xFF, 0xFB, 0x50, 0xC4, 0x00, 0x03, 0xC0, 0x00, 0x01, 0xA4, 0x00, 0x00, 0x00,
0x20, 0x00, 0x00, 0x34, 0x80, 0x00, 0x00, 0x04,
],
]
.into_iter()
.flatten()
.collect::<Vec<u8>>()
}
let parse_options = ParseOptions::new().read_properties(false);
let mut global_options = GlobalOptions::new().allocation_limit(50);
crate::config::apply_global_options(global_options);
let within_limits = create_fake_mp3(40);
let probe = Probe::new(std::io::Cursor::new(&within_limits))
.set_file_type(FileType::Mpeg)
.options(parse_options);
assert!(probe.read().is_ok());
let too_big = create_fake_mp3(60);
let probe = Probe::new(std::io::Cursor::new(&too_big))
.set_file_type(FileType::Mpeg)
.options(parse_options);
assert!(probe.read().is_err());
global_options.allocation_limit = GlobalOptions::DEFAULT_ALLOCATION_LIMIT;
crate::config::apply_global_options(global_options);
let probe = Probe::new(std::io::Cursor::new(&too_big))
.set_file_type(FileType::Mpeg)
.options(parse_options);
assert!(probe.read().is_ok());
}
fn test_probe(path: &str, expected_file_type_guess: FileType) {
test_probe_file(path, expected_file_type_guess);
test_probe_path(path, expected_file_type_guess);
}
fn test_probe_file(path: &str, expected_file_type_guess: FileType) {
let mut f = File::open(path).unwrap();
let probe = Probe::new(&mut f).guess_file_type().unwrap();
assert_eq!(probe.file_type(), Some(expected_file_type_guess));
}
fn test_probe_path(path: &str, expected_file_type_guess: FileType) {
let probe = Probe::open(path).unwrap();
assert_eq!(probe.file_type(), Some(expected_file_type_guess));
}
#[test_log::test]
fn probe_aac() {
test_probe("tests/files/assets/minimal/untagged.aac", FileType::Aac);
}
#[test_log::test]
fn probe_aac_with_id3v2() {
test_probe("tests/files/assets/minimal/full_test.aac", FileType::Aac);
}
#[test_log::test]
fn probe_aiff() {
test_probe("tests/files/assets/minimal/full_test.aiff", FileType::Aiff);
}
#[test_log::test]
fn probe_ape_with_id3v2() {
test_probe("tests/files/assets/minimal/full_test.ape", FileType::Ape);
}
#[test_log::test]
fn probe_flac() {
test_probe("tests/files/assets/minimal/full_test.flac", FileType::Flac);
}
#[test_log::test]
fn probe_flac_with_id3v2() {
test_probe("tests/files/assets/flac_with_id3v2.flac", FileType::Flac);
}
#[test_log::test]
fn probe_mp3_with_id3v2() {
test_probe("tests/files/assets/minimal/full_test.mp3", FileType::Mpeg);
}
#[test_log::test]
fn probe_mp3_with_lots_of_junk() {
test_probe("tests/files/assets/junk.mp3", FileType::Mpeg);
}
#[test_log::test]
fn probe_vorbis() {
test_probe("tests/files/assets/minimal/full_test.ogg", FileType::Vorbis);
}
#[test_log::test]
fn probe_opus() {
test_probe("tests/files/assets/minimal/full_test.opus", FileType::Opus);
}
#[test_log::test]
fn probe_speex() {
test_probe("tests/files/assets/minimal/full_test.spx", FileType::Speex);
}
#[test_log::test]
fn probe_mp4() {
test_probe(
"tests/files/assets/minimal/m4a_codec_aac.m4a",
FileType::Mp4,
);
}
#[test_log::test]
fn probe_wav() {
test_probe(
"tests/files/assets/minimal/wav_format_pcm.wav",
FileType::Wav,
);
}
}