use std::fmt::Display;
use std::io::Read;
use std::path::Path;
use anyhow::Result;
use clap::ValueEnum;
use serde::{Deserialize, Serialize};
#[allow(clippy::manual_non_exhaustive)]
#[derive(ValueEnum, Copy, Clone, Debug, PartialEq, Eq, Hash, Deserialize, Serialize)]
pub enum FileType {
DOCFILE,
ELF,
ELF32,
ELF64,
#[allow(non_camel_case_types)] ELF_LSB,
#[allow(non_camel_case_types)] ELF_MSB,
#[allow(non_camel_case_types)] ELF32_LSB,
#[allow(non_camel_case_types)] ELF64_LSB,
#[allow(non_camel_case_types)] ELF32_MSB,
#[allow(non_camel_case_types)] ELF64_MSB,
EXE,
MachO,
PDF,
PE32,
PE32DotNet,
PE32Native,
RTF,
#[doc(hidden)]
#[serde(skip)]
#[clap(skip)]
NotSet,
}
const FILE_DETECTION_BUFFER_SIZE: usize = 384;
const MAGIC32: [u8; 4] = [0xfe, 0xed, 0xfa, 0xce];
const CIGAM32: [u8; 4] = [0xce, 0xfa, 0xed, 0xfe];
const MAGIC64: [u8; 4] = [0xfe, 0xed, 0xfa, 0xcf];
const CIGAM64: [u8; 4] = [0xcf, 0xfa, 0xed, 0xfe];
const FAT_MACHO: [u8; 4] = [0xCA, 0xFE, 0xBA, 0xBE]; const MACH_O_MAGICS: [[u8; 4]; 4] = [MAGIC32, CIGAM32, MAGIC64, CIGAM64];
const ELF_MAGIC: [u8; 4] = [0x7f, 0x45, 0x4c, 0x46]; const EXE_MAGICS: [[u8; 2]; 2] = [[0x4D, 0x5A], [0x5A, 0x4D]]; const PE_MAGIC: [u8; 4] = [0x50, 0x45, 0x00, 0x00];
const PDF_MAGIC: [u8; 4] = [0x25, 0x50, 0x44, 0x46]; const RTF_MAGIC: [u8; 4] = [0x7B, 0x5C, 0x72, 0x74];
const DOCFILE_MAGIC: [u8; 8] = [0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1];
impl FileType {
#[inline]
#[must_use]
pub fn from_bytes(bytes: &[u8]) -> Option<Self> {
if bytes.starts_with(&DOCFILE_MAGIC) {
return Some(Self::DOCFILE);
}
if bytes.starts_with(&ELF_MAGIC) {
if bytes[0x4] == 1 && bytes[0x5] == 1 {
return Some(Self::ELF32_LSB);
}
if bytes[0x4] == 1 && bytes[0x5] == 2 {
return Some(Self::ELF32_MSB);
}
if bytes[0x4] == 2 && bytes[0x5] == 1 {
return Some(Self::ELF64_LSB);
}
if bytes[0x4] == 2 && bytes[0x5] == 2 {
return Some(Self::ELF64_MSB);
}
if bytes[0x5] == 1 {
return Some(Self::ELF_LSB);
}
if bytes[0x5] == 2 {
return Some(Self::ELF_MSB);
}
if bytes[0x4] == 1 {
return Some(Self::ELF32);
}
if bytes[0x4] == 2 {
return Some(Self::ELF64);
}
return Some(Self::ELF);
}
if MACH_O_MAGICS.iter().any(|magic| bytes.starts_with(magic)) {
return Some(Self::MachO);
}
if bytes.starts_with(&FAT_MACHO) && Self::is_fat_macho(bytes) {
return Some(Self::MachO);
}
if bytes.starts_with(&PDF_MAGIC) {
return Some(Self::PDF);
}
if EXE_MAGICS.iter().any(|magic| bytes.starts_with(magic)) {
if Self::is_pe32(bytes) {
if Self::is_dotnet(bytes) {
return Some(Self::PE32DotNet);
}
return Some(Self::PE32Native);
}
return Some(Self::EXE);
}
if bytes.starts_with(&RTF_MAGIC) {
return Some(Self::RTF);
}
None
}
#[inline]
pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Option<Self>> {
let mut file = std::fs::File::open(path)?;
let mut buffer = [0; FILE_DETECTION_BUFFER_SIZE];
let read = file.read(&mut buffer)?;
Ok(Self::from_bytes(&buffer[..read]))
}
#[must_use]
pub fn matches(&self, bytes: &[u8]) -> bool {
match self {
FileType::DOCFILE => bytes.starts_with(&DOCFILE_MAGIC),
FileType::ELF => bytes.starts_with(&ELF_MAGIC),
FileType::ELF_LSB => bytes.starts_with(&ELF_MAGIC) && bytes[0x5] == 1,
FileType::ELF_MSB => bytes.starts_with(&ELF_MAGIC) && bytes[0x5] == 2,
FileType::ELF32 => bytes.starts_with(&ELF_MAGIC) && bytes[0x4] == 1,
FileType::ELF64 => bytes.starts_with(&ELF_MAGIC) && bytes[0x4] == 2,
FileType::ELF32_LSB => {
bytes.starts_with(&ELF_MAGIC) && bytes[0x4] == 1 && bytes[0x5] == 1
}
FileType::ELF32_MSB => {
bytes.starts_with(&ELF_MAGIC) && bytes[0x4] == 1 && bytes[0x5] == 2
}
FileType::ELF64_LSB => {
bytes.starts_with(&ELF_MAGIC) && bytes[0x4] == 2 && bytes[0x5] == 1
}
FileType::ELF64_MSB => {
bytes.starts_with(&ELF_MAGIC) && bytes[0x4] == 2 && bytes[0x5] == 2
}
FileType::EXE => EXE_MAGICS.iter().any(|magic| bytes.starts_with(magic)),
FileType::MachO => {
MACH_O_MAGICS.iter().any(|magic| bytes.starts_with(magic))
|| bytes.starts_with(&FAT_MACHO) && Self::is_fat_macho(bytes)
}
FileType::PDF => bytes.starts_with(&PDF_MAGIC),
FileType::PE32 => {
EXE_MAGICS.iter().any(|magic| bytes.starts_with(magic)) && Self::is_pe32(bytes)
}
FileType::PE32Native => {
EXE_MAGICS.iter().any(|magic| bytes.starts_with(magic)) && !Self::is_pe32(bytes)
}
FileType::PE32DotNet => {
EXE_MAGICS.iter().any(|magic| bytes.starts_with(magic)) && Self::is_dotnet(bytes)
}
FileType::RTF => bytes.starts_with(&RTF_MAGIC),
FileType::NotSet => unreachable!("`FileType::NotSet` should never be used"),
}
}
#[inline]
pub fn matches_path<P: AsRef<Path>>(&self, path: P) -> Result<bool> {
let mut file = std::fs::File::open(path)?;
let mut buffer = [0; FILE_DETECTION_BUFFER_SIZE];
let read = file.read(&mut buffer)?;
Ok(self.matches(&buffer[..read]))
}
#[inline]
fn is_pe32(bytes: &[u8]) -> bool {
if bytes.len() < 0x40 {
return false;
}
let pe_magic_offset = u32::from_le_bytes([
bytes[0x3C],
bytes[0x3C + 1],
bytes[0x3C + 2],
bytes[0x3C + 3],
]) as usize;
pe_magic_offset < bytes.len()
&& pe_magic_offset + PE_MAGIC.len() < bytes.len()
&& bytes[pe_magic_offset..pe_magic_offset + 4] == PE_MAGIC
}
#[inline]
fn is_dotnet(bytes: &[u8]) -> bool {
if let Ok(pe32) = malwaredb_types::exec::pe32::EXE::from(bytes) {
pe32.sub_type == malwaredb_types::exec::pe32::SubType::DotNet
} else {
false
}
}
#[inline]
fn is_fat_macho(bytes: &[u8]) -> bool {
u32::from_be_bytes([
bytes[0x04],
bytes[0x04 + 1],
bytes[0x04 + 2],
bytes[0x04 + 3],
]) < 0x20
}
}
impl From<FileType> for &'static str {
fn from(ft: FileType) -> &'static str {
match ft {
FileType::DOCFILE => "DOCFILE",
FileType::ELF => "ELF",
FileType::ELF_LSB => "ELF_LSB",
FileType::ELF_MSB => "ELF_MSB",
FileType::ELF32 => "ELF32",
FileType::ELF64 => "ELF64",
FileType::ELF32_LSB => "ELF32_LSB",
FileType::ELF64_LSB => "ELF64_LSB",
FileType::ELF32_MSB => "ELF32_MSB",
FileType::ELF64_MSB => "ELF64_MSB",
FileType::EXE => "EXE",
FileType::MachO => "MachO",
FileType::PDF => "PDF",
FileType::PE32 => "PE32",
FileType::PE32DotNet => "PE32DotNet",
FileType::PE32Native => "PE32Native",
FileType::RTF => "RTF",
FileType::NotSet => "NotSet",
}
}
}
impl Display for FileType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let s: &'static str = (*self).into();
write!(f, "{s}")
}
}