malwaredb-types 0.3.3

Data types and parsers for MalwareDB.
Documentation
// SPDX-License-Identifier: Apache-2.0

#![doc = include_str!("../README.md")]
#![cfg_attr(docsrs, feature(doc_cfg))]
#![deny(clippy::all)]
#![deny(clippy::pedantic)]
#![deny(missing_docs)]

/// Document types
pub mod doc;

/// Executable types
pub mod exec;

/// Convenience functions for reading data types from binary blobs
pub mod utils;

use crate::{doc::DocumentFile, exec::ExecutableFile};

use std::fmt::{Display, Formatter};

use anyhow::Result;
use chrono::{DateTime, Utc};
use tracing::instrument;

/// MDB version
pub const MDB_VERSION: &str = env!("CARGO_PKG_VERSION");

/// Common functions for all file types parsed by Malware DB
pub trait SpecimenFile {
    /// Magic number, the bytes at the beginning of the file, which identify the file format
    /// Some file types have more than one possible magic number
    const MAGIC: &'static [&'static [u8]];

    /// Common name for a specific file type
    fn type_name(&self) -> &'static str;
}

/// Types known to Malware DB
#[allow(clippy::large_enum_variant)]
#[derive(Clone, Debug)]
pub enum KnownType<'a> {
    /// Linux, *BSD, Haiku, Solaris, etc binaries
    #[cfg(feature = "elf")]
    ELF(exec::elf::Elf<'a>),

    /// Windows, DOS, OS/2 Executables. Anything ending with:
    /// * .cpl
    /// * .dll
    /// * .exe
    /// * .ocx
    /// * .sys
    #[cfg(feature = "pe32")]
    EXE(exec::pe32::EXE<'a>),

    /// Single architecture macOS (and derivatives) binaries
    #[cfg(feature = "macho")]
    MachO(exec::macho::Macho<'a>),

    /// Multiple architecture macOS (and derivatives) binaries
    #[cfg(feature = "macho")]
    FatMachO(exec::macho::fat::FatMacho<'a>),

    /// Classic Mac OS and Be OS
    #[cfg(feature = "pef")]
    PEF(exec::pef::Pef<'a>),

    /// Microsoft Office Compound Document Format
    #[cfg(feature = "office95")]
    Office95(doc::office95::Office95<'a>),

    /// Adobe PDF document
    #[cfg(feature = "pdf")]
    PDF(doc::pdf::PDF<'a>),

    /// Rich Text File
    #[cfg(feature = "rtf")]
    RTF(doc::rtf::Rtf<'a>),

    /// Files for which we don't have an analytic or feature extractor, or are of an unknown type
    Unknown(&'a [u8]),
}

impl<'a> KnownType<'a> {
    /// Known type from a sequence of bytes
    ///
    /// # Errors
    ///
    /// Returns an error if the parser fails to process the detect type.
    #[instrument(name = "KnownType detector", skip(data))]
    pub fn new(data: &'a [u8]) -> Result<Self> {
        // TODO: Replace the checking of byte arrays with a hashing mechanism for faster matching
        #[cfg(feature = "elf")]
        if data.starts_with(exec::elf::Elf::MAGIC[0]) {
            return Ok(Self::ELF(exec::elf::Elf::from(data)?));
        }

        #[cfg(feature = "pe32")]
        if data.starts_with(exec::pe32::EXE::MAGIC[0])
            || data.starts_with(exec::pe32::EXE::MAGIC[1])
        {
            return Ok(Self::EXE(exec::pe32::EXE::from(data)?));
        }

        #[cfg(feature = "macho")]
        for mach_magic in exec::macho::Macho::MAGIC {
            if data.starts_with(mach_magic) {
                return Ok(Self::MachO(exec::macho::Macho::from(data)?));
            }
        }

        #[cfg(feature = "macho")]
        for mach_magic in exec::macho::fat::FatMacho::MAGIC {
            if data.starts_with(mach_magic) {
                return Ok(Self::FatMachO(exec::macho::fat::FatMacho::from(data)?));
            }
        }

        #[cfg(feature = "office95")]
        if data.starts_with(doc::office95::Office95::MAGIC[0]) {
            return Ok(Self::Office95(doc::office95::Office95::from(data)?));
        }

        #[cfg(feature = "pdf")]
        if data.starts_with(doc::pdf::PDF::MAGIC[0]) {
            return Ok(Self::PDF(doc::pdf::PDF::from(data)?));
        }

        #[cfg(feature = "rtf")]
        if data.starts_with(doc::rtf::Rtf::MAGIC[0]) {
            return Ok(Self::RTF(doc::rtf::Rtf::from(data)?));
        }

        #[cfg(feature = "pef")]
        if data.starts_with(exec::pef::Pef::MAGIC[0]) {
            return Ok(Self::PEF(exec::pef::Pef::from(data)?));
        }

        Ok(Self::Unknown(data))
    }

    /// Whether the sample is an executable file
    #[must_use]
    pub fn is_exec(&self) -> bool {
        match self {
            #[cfg(feature = "elf")]
            KnownType::ELF(_) => true,

            #[cfg(feature = "pe32")]
            KnownType::EXE(_) => true,

            #[cfg(feature = "macho")]
            KnownType::MachO(_) => true,

            #[cfg(feature = "macho")]
            KnownType::FatMachO(_) => true,

            #[cfg(feature = "pef")]
            KnownType::PEF(_) => true,

            _ => false,
        }
    }

    /// Whether the sample is a document type
    #[must_use]
    pub fn is_doc(&self) -> bool {
        match self {
            #[cfg(feature = "pdf")]
            KnownType::PDF(_) => true,

            #[cfg(feature = "rtf")]
            KnownType::RTF(_) => true,

            #[cfg(feature = "office95")]
            KnownType::Office95(_) => true,

            _ => false,
        }
    }

    /// When the file was created
    #[must_use]
    pub fn created(&self) -> Option<DateTime<Utc>> {
        match self {
            #[cfg(feature = "pe32")]
            KnownType::EXE(e) => e.compiled_timestamp(),

            #[cfg(feature = "pef")]
            KnownType::PEF(p) => p.compiled_timestamp(),

            #[cfg(feature = "pdf")]
            KnownType::PDF(p) => p.creation_date,

            _ => None,
        }
    }

    /// Get the file's inner executable type
    #[must_use]
    pub fn exec(self) -> Option<Box<dyn ExecutableFile + Send + 'a>> {
        match self {
            #[cfg(feature = "elf")]
            KnownType::ELF(e) => Some(Box::new(e)),

            #[cfg(feature = "pe32")]
            KnownType::EXE(e) => Some(Box::new(e)),

            #[cfg(feature = "macho")]
            KnownType::MachO(m) => Some(Box::new(m)),

            #[cfg(feature = "macho")]
            KnownType::FatMachO(m) => Some(Box::new(m)),

            #[cfg(feature = "pef")]
            KnownType::PEF(p) => Some(Box::new(p)),
            _ => None,
        }
    }

    /// If the sample has a child [`KnownType`], currently only supports [`crate::exec::macho::fat::FatMacho`]
    #[must_use]
    pub fn children(&self) -> Option<Vec<KnownType<'_>>> {
        match self {
            #[cfg(feature = "macho")]
            KnownType::FatMachO(m) => Some(
                m.binaries
                    .iter()
                    .map(|b| KnownType::MachO(b.clone()))
                    .collect(),
            ),

            _ => None,
        }
    }

    /// Raw bytes of the sample
    #[must_use]
    pub fn contents(&self) -> &'a [u8] {
        match self {
            #[cfg(feature = "elf")]
            KnownType::ELF(e) => e.contents,

            #[cfg(feature = "pe32")]
            KnownType::EXE(e) => e.contents,

            #[cfg(feature = "macho")]
            KnownType::MachO(m) => m.contents,

            #[cfg(feature = "macho")]
            KnownType::FatMachO(m) => m.contents,

            #[cfg(feature = "pef")]
            KnownType::PEF(p) => p.contents,

            #[cfg(feature = "office95")]
            KnownType::Office95(p) => p.contents,

            #[cfg(feature = "pdf")]
            KnownType::PDF(p) => p.contents,

            #[cfg(feature = "rtf")]
            KnownType::RTF(r) => r.contents,

            KnownType::Unknown(u) => u,
        }
    }

    /// Get the inner document type
    #[must_use]
    pub fn doc(self) -> Option<Box<dyn DocumentFile + Send + 'a>> {
        match self {
            #[cfg(feature = "office95")]
            KnownType::Office95(o) => Some(Box::new(o)),

            #[cfg(feature = "pdf")]
            KnownType::PDF(p) => Some(Box::new(p)),

            #[cfg(feature = "rtf")]
            KnownType::RTF(r) => Some(Box::new(r)),

            _ => None,
        }
    }
}

/// Byte ordering
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum Ordering {
    /// Big Endian, Most Significant Byte (MSB) is first
    BigEndian,

    /// Little Endian, Least Significant Byte (LSB) is first
    LittleEndian,

    /// An application which may use both in the same file
    BiEndian,
}

impl Display for Ordering {
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
        match self {
            Ordering::BigEndian => write!(f, "Big Endian"),
            Ordering::LittleEndian => write!(f, "Little Endian"),
            Ordering::BiEndian => write!(f, "Bi-Endian"),
        }
    }
}