#![cfg_attr(feature = "nightly", feature(test))]
#![cfg_attr(feature = "nightly", feature(macro_metavar_expr))]
use crate::description::FileTypeDescription;
use crate::matcher::{FileTypeMatcher, RelativePosition, Step, TestResult};
pub use crate::types::FileRootType;
pub use crate::types::FileType;
use std::collections::HashSet;
use std::io::{ErrorKind, Read, Seek, SeekFrom};
use std::prelude::rust_2021::TryFrom;
#[cfg(any(feature = "mime", feature = "mediatype"))]
mod conv;
pub mod description;
pub mod matcher;
pub mod types;
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub struct FileTypeMatch {
pub file_type: FileType,
pub full_match: bool,
}
impl FileTypeMatch {
pub fn new(file_type: FileType, full_match: bool) -> FileTypeMatch {
FileTypeMatch {
file_type,
full_match,
}
}
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct FileTypeMatches {
pub likely_to_be: Vec<FileType>,
pub all_matches: Vec<FileTypeMatch>,
}
impl FileTypeMatches {
pub fn new(likely_to_be: Vec<FileType>, all_matches: Vec<FileTypeMatch>) -> FileTypeMatches {
FileTypeMatches {
likely_to_be,
all_matches,
}
}
}
pub fn detect<R>(mut read: R) -> Result<Option<FileTypeMatches>, std::io::Error>
where
R: Read,
R: Seek,
{
let at_start = detect_at_start_from_ref(&mut read)?;
if let Some(start) = at_start {
let types: Vec<FileType> = start.all_matches.iter().map(|s| s.file_type).collect();
let at_end = detect_variants_at_end_from_ref(&mut read, types.as_slice())?;
if let Some(at_end) = at_end {
let start_matches: Vec<FileType> =
start.all_matches.iter().map(|c| c.file_type).collect();
let perfect: Vec<FileType> = at_end
.all_matches
.iter()
.map(|t| t.file_type)
.filter(|c| start_matches.contains(c))
.collect();
let mut all_likely: Vec<FileType> = vec![];
all_likely.extend(start.likely_to_be);
all_likely.extend(perfect);
all_likely.dedup_by_key(|v| *v);
let mut merged: Vec<FileTypeMatch> = vec![];
merged.extend(start.all_matches);
merged.extend(at_end.all_matches);
let mut mapped_merged_items = merged
.iter()
.map(|v| FileTypeMatch {
file_type: v.file_type,
full_match: all_likely.contains(&v.file_type),
})
.collect::<Vec<FileTypeMatch>>();
mapped_merged_items.dedup_by_key(|v| v.file_type);
Ok(Some(FileTypeMatches {
likely_to_be: all_likely,
all_matches: mapped_merged_items,
}))
} else {
Ok(Some(start))
}
} else {
Ok(None)
}
}
pub fn detect_at_start<R>(mut read: R) -> Result<Option<FileTypeMatches>, std::io::Error>
where
R: Read,
{
detect_variants_at_start_from_ref(&mut read, &FileType::variants())
}
pub fn detect_at_start_from_ref<R>(read: &mut R) -> Result<Option<FileTypeMatches>, std::io::Error>
where
R: Read,
{
detect_variants_at_start_from_ref(read, &FileType::variants())
}
pub fn detect_variants_at_start<R>(
mut read: R,
variants: &[FileType],
) -> Result<Option<FileTypeMatches>, std::io::Error>
where
R: Read,
{
detect_variants_at_start_from_ref(&mut read, variants)
}
pub fn detect_variants_at_start_from_ref<R>(
read: &mut R,
variants: &[FileType],
) -> Result<Option<FileTypeMatches>, std::io::Error>
where
R: Read,
{
let start_position = RelativePosition::Start;
let small = FileType::ideal_block_size_of_variants(&start_position, variants);
let mut matches: Vec<FileTypeMatch> = vec![];
let mut read_data: Vec<u8> = vec![];
if let Some((size, types)) = small {
let mut buff = vec![0u8; size];
let buff_slice = &mut buff[..];
let read = loop {
match read.read(buff_slice) {
Ok(i) => break i,
Err(e) if e.kind() == ErrorKind::Interrupted => continue,
Err(e) => return Err(e),
};
};
let bytes = &buff_slice[..read];
read_data.extend_from_slice(bytes);
push_matched_types_into(&mut matches, bytes, &start_position, &Step::Small, &types);
}
let any_perfect_match = matches.iter().filter(|v| v.full_match).count() > 0;
if any_perfect_match {
let perfect: Vec<FileType> = matches
.iter()
.filter(|v| v.full_match)
.map(|v| v.file_type)
.collect();
return Ok(Some(FileTypeMatches {
likely_to_be: perfect,
all_matches: matches,
}));
}
let big = FileType::maximum_block_size_of_variants(&start_position, variants);
if let Some((size, types)) = big {
let new_size = size - read_data.len();
let mut buff = vec![0u8; new_size];
let buff_slice = &mut buff[..];
let mut all_read = read_data.len();
let mut filled = 0usize;
while all_read < new_size {
let read = match read.read(&mut buff_slice[filled..]) {
Ok(i) => i,
Err(e) if e.kind() == ErrorKind::Interrupted => continue,
Err(e) => return Err(e),
};
if read == 0 {
break;
}
filled += read;
all_read += read;
}
if filled != 0 {
read_data.extend_from_slice(&buff_slice[..filled]);
let bytes = &read_data[..];
push_matched_types_into(&mut matches, bytes, &start_position, &Step::Large, &types);
}
}
if !matches.is_empty() {
let types: Vec<FileType> = matches.iter().map(|v| v.file_type).collect();
return Ok(Some(FileTypeMatches {
likely_to_be: types,
all_matches: matches,
}));
}
Ok(None)
}
pub fn detect_at_end<R>(mut read: R) -> Result<Option<FileTypeMatches>, std::io::Error>
where
R: Read,
R: Seek,
{
detect_variants_at_end_from_ref(&mut read, &FileType::variants())
}
pub fn detect_variants_at_end<R>(
mut read: R,
variants: &[FileType],
) -> Result<Option<FileTypeMatches>, std::io::Error>
where
R: Read,
R: Seek,
{
detect_variants_at_end_from_ref(&mut read, variants)
}
pub fn detect_at_end_from_ref<R>(read: &mut R) -> Result<Option<FileTypeMatches>, std::io::Error>
where
R: Read,
R: Seek,
{
detect_variants_at_end_from_ref(read, &FileType::variants())
}
pub fn detect_variants_at_end_from_ref<R>(
read: &mut R,
variants: &[FileType],
) -> Result<Option<FileTypeMatches>, std::io::Error>
where
R: Read,
R: Seek,
{
let end_position = RelativePosition::End;
let small = FileType::ideal_block_size_of_variants(&end_position, variants);
let mut matches: Vec<FileTypeMatch> = vec![];
if let Some((size, types)) = small {
let seek = read.seek(SeekFrom::End(0))?;
let real_size = if seek > u64::try_from(size).unwrap() {
size
} else {
usize::try_from(seek).unwrap()
};
let mut buff = vec![0u8; real_size];
let buff_slice = &mut buff[..];
let mut back = -i64::try_from(real_size).unwrap();
let mut seek = read.seek(SeekFrom::End(back))?;
loop {
if seek > 0 {
let read_bytes = loop {
match read.read(buff_slice) {
Ok(i) => break i,
Err(e) if e.kind() == ErrorKind::Interrupted => continue,
Err(e) => return Err(e),
};
};
let bytes = &buff_slice[..read_bytes];
push_matched_types_into(&mut matches, bytes, &end_position, &Step::Small, &types);
if types.len() == matches.len() {
break;
}
back -= 1; seek = read.seek(SeekFrom::End(back))?;
} else {
break;
}
}
}
let any_perfect_match = matches.iter().filter(|v| v.full_match).count() > 0;
if any_perfect_match {
let perfect: Vec<FileType> = matches
.iter()
.filter(|v| v.full_match)
.map(|v| v.file_type)
.collect();
return Ok(Some(FileTypeMatches {
likely_to_be: perfect,
all_matches: matches,
}));
}
if !matches.is_empty() {
let types: Vec<FileType> = matches.iter().map(|v| v.file_type).collect();
return Ok(Some(FileTypeMatches {
likely_to_be: types,
all_matches: matches,
}));
}
Ok(None)
}
fn push_matched_types_into(
matches: &mut Vec<FileTypeMatch>,
bytes: &[u8],
relative_position: &RelativePosition,
step: &Step,
types: &Vec<FileType>,
) {
let mut matched_roots = HashSet::new();
for file_type in types {
let root = file_type.root();
if matched_roots.contains(&root) {
continue;
}
let matched = file_type.test(relative_position, step, bytes);
if matched != TestResult::NotMatched {
matches.push(FileTypeMatch {
file_type: *file_type,
full_match: matched == TestResult::Matched,
});
matched_roots.insert(root);
}
}
}
#[cfg(test)]
mod tests {
use crate::types::FileType;
use crate::{
detect, detect_at_end_from_ref, detect_at_start_from_ref, FileTypeMatch, FileTypeMatches,
};
use std::collections::HashSet;
use std::fs::OpenOptions;
use std::io::{BufReader, Error, ErrorKind};
use std::path::Path;
#[test]
fn test_zip_detect() {
test_detect_match("files/hello.zip", FileType::Zip);
}
#[test]
fn test_rar_detect() {
test_detect_match("files/hello.rar", FileType::Rar5);
}
#[test]
fn test_rar_sfx_detect() {
test_detect_match_n(
"files/hello-world.exe",
vec![
FileTypeMatch::new(FileType::Rar5, true),
FileTypeMatch::new(FileType::DosMzExecutable, false),
],
);
}
#[test]
fn test_fast_rar_sfx_detect() {
test_fast_detect_match_n(
"files/hello-world.exe",
vec![
FileTypeMatch::new(FileType::Rar5, true),
FileTypeMatch::new(FileType::DosMzExecutable, false),
],
);
}
#[test]
fn test_2mib_rar_sfx_detect() {
test_fast_detect_match_n(
"files/2mib.exe",
vec![
FileTypeMatch::new(FileType::Rar5, true),
FileTypeMatch::new(FileType::DosMzExecutable, false),
],
);
}
#[test]
fn test_png_detect() {
test_detect_match("files/rust-logo.png", FileType::Png);
}
#[test]
fn test_jpg_detect() {
test_detect_match("files/rust-logo.jpg", FileType::Jpg);
}
#[test]
fn test_7z_detect() {
test_detect_match("files/rust-logo.7z", FileType::_7z);
}
#[test]
fn test_opus_detect() {
test_detect_match("files/test-opus.opus", FileType::Opus);
}
#[test]
fn test_vorbis_detect() {
test_detect_match("files/test-vorbis.ogg", FileType::Vorbis);
}
#[test]
fn test_mp3_detect() {
test_detect_match("files/test-mp3.mp3", FileType::Mp3);
}
#[test]
fn test_webp_detect() {
test_detect_match("files/rust-logo.webp", FileType::Webp);
}
#[test]
fn test_flac_detect() {
test_detect_match_maybe("files/test-flac.flac", FileType::Flac);
}
#[test]
fn test_wasm_detect() {
test_detect_match("files/test-wasm.wasm", FileType::Wasm);
}
#[test]
fn test_class_detect() {
test_detect_match("files/test-class.class", FileType::Class);
}
#[test]
fn so_class_detect() {
test_detect_match("files/test-so.so", FileType::Elf);
}
#[test]
fn wav_class_detect() {
test_detect_match("files/test-wav.wav", FileType::Wav);
}
#[test]
fn avi_class_detect() {
test_detect_match("files/test-avi.avi", FileType::Avi);
}
#[test]
fn aif_class_detect() {
test_detect_match("files/test-aif.aif", FileType::Aiff);
}
#[test]
fn tiff_class_detect() {
test_detect_match("files/rust-logo.tiff", FileType::Tiff);
}
#[test]
fn sqlite3_class_detect() {
test_detect_match("files/test-db.db", FileType::Sqlite3);
}
#[test]
fn test_flac_txt_detect() {
test_detect_match_maybe("files/test-flac.txt", FileType::Flac);
}
#[test]
fn test_pdf_detect() {
test_detect_match_maybe("files/rust-logo.pdf", FileType::Pdf);
}
#[test]
fn test_mka_detect() {
test_detect_match("files/test-mka.mka", FileType::Matroska);
}
#[test]
fn test_ico_detect() {
test_detect_match("files/rust-logo.ico", FileType::Ico);
}
#[test]
fn test_tasty_detect() {
test_detect_match("files/test-tasty.tasty", FileType::Tasty);
}
#[test]
fn test_xcf_detect() {
test_detect_match_maybe("files/rust-logo.xcf", FileType::Xcf);
}
#[test]
fn test_gif_detect() {
test_detect_match_maybe("files/rust-logo.gif", FileType::Gif);
}
#[test]
fn test_bmp_detect() {
test_detect_match_maybe("files/rust-logo.bmp", FileType::Bmp);
}
#[test]
fn test_iso_detect() {
test_detect_match("files/test-iso.iso", FileType::Iso);
}
#[test]
fn test_txt_no_match() {
test_detect_no_match("files/text");
}
#[test]
fn test_tar_detect() {
test_detect_match("files/hello.tar", FileType::Tar);
test_detect_match("files/test.tar", FileType::Tar);
test_detect_match("files/test-0.tar", FileType::Tar);
}
#[test]
fn test_lzma_detect() {
test_detect_match("files/test.tar.lzma", FileType::Lzma);
}
#[test]
fn test_xz_detect() {
test_detect_match("files/test-xz.xz", FileType::Xz);
}
#[test]
fn test_zst_detect() {
test_detect_match("files/ex.tar.zst", FileType::Zst);
}
#[test]
fn test_gpg_detect() {
test_detect_match("files/test-db.db.gpg", FileType::Gpg);
}
#[test]
fn test_armored_gpg_detect() {
test_detect_match("files/test-db.db.asc", FileType::ArmoredGpg);
}
fn test_detect<P>(path: P) -> Result<Option<FileTypeMatches>, Error>
where
P: AsRef<Path>,
{
let file = OpenOptions::new().read(true).open(path).unwrap();
let buf = BufReader::new(file);
detect(buf)
}
fn test_fast_detect<P>(path: P) -> Result<Option<FileTypeMatches>, Error>
where
P: AsRef<Path>,
{
let file = OpenOptions::new().read(true).open(path).unwrap();
let mut buf = BufReader::new(file);
detect_at_start_from_ref(&mut buf)
}
fn test_detect_sliding<P>(path: P) -> Result<Option<FileTypeMatches>, Error>
where
P: AsRef<Path>,
{
let file = OpenOptions::new().read(true).open(path).unwrap();
let mut buf = BufReader::new(file);
detect_at_end_from_ref(&mut buf)
}
fn test_detect_match<P>(path: P, file_type: FileType)
where
P: AsRef<Path>,
{
let detect = test_detect(path).map_err(|e| e.kind());
let expected: Result<Option<FileTypeMatches>, ErrorKind> = Ok(Some(FileTypeMatches::new(
vec![file_type],
vec![FileTypeMatch::new(file_type, true)],
)));
assert_eq!(detect, expected);
}
fn test_detect_match_n<P>(path: P, file_type_match: Vec<FileTypeMatch>)
where
P: AsRef<Path>,
{
let detect = test_detect(path).map_err(|e| e.kind());
assert_eq!(true, detect.is_ok());
let detect_option = detect.unwrap();
assert_eq!(true, detect_option.is_some());
let types: HashSet<FileType> = file_type_match.iter().map(|v| v.file_type).collect();
let detected = detect_option.unwrap();
let likely_types: HashSet<FileType> = detected.likely_to_be.iter().copied().collect();
assert_eq!(types, likely_types);
let should_match: HashSet<FileTypeMatch> = file_type_match.iter().cloned().collect();
let matches: HashSet<FileTypeMatch> = detected.all_matches.iter().cloned().collect();
assert_eq!(should_match, matches);
}
fn test_fast_detect_match_n<P>(path: P, file_type_match: Vec<FileTypeMatch>)
where
P: AsRef<Path>,
{
let detect = test_fast_detect(path).map_err(|e| e.kind());
assert_eq!(true, detect.is_ok());
let detect_option = detect.unwrap();
assert_eq!(true, detect_option.is_some());
let types: HashSet<FileType> = file_type_match.iter().map(|v| v.file_type).collect();
let detected = detect_option.unwrap();
let likely_types: HashSet<FileType> = detected.likely_to_be.iter().copied().collect();
assert_eq!(types, likely_types);
let should_match: HashSet<FileTypeMatch> = file_type_match.iter().cloned().collect();
let matches: HashSet<FileTypeMatch> = detected.all_matches.iter().cloned().collect();
assert_eq!(should_match, matches);
}
fn test_detect_match_maybe<P>(path: P, file_type: FileType)
where
P: AsRef<Path>,
{
let detect = test_detect(path).map_err(|e| e.kind());
let expected: Result<Option<FileTypeMatches>, ErrorKind> = Ok(Some(FileTypeMatches::new(
vec![file_type],
vec![FileTypeMatch::new(file_type, false)],
)));
assert_eq!(detect, expected);
}
fn test_detect_no_match<P>(path: P)
where
P: AsRef<Path>,
{
let detect = test_detect(path).map_err(|e| e.kind());
let expected: Result<Option<FileTypeMatches>, ErrorKind> = Ok(None);
assert_eq!(detect, expected);
}
#[allow(dead_code)]
fn test_detect_match_sliding<P>(path: P, file_type: FileType)
where
P: AsRef<Path>,
{
let detect = test_detect_sliding(path).map_err(|e| e.kind());
let expected: Result<Option<FileTypeMatches>, ErrorKind> = Ok(Some(FileTypeMatches::new(
vec![file_type],
vec![FileTypeMatch::new(file_type, true)],
)));
assert_eq!(detect, expected);
}
}