malwaredb-types 0.3.4

Data types and parsers for MalwareDB.
Documentation
// SPDX-License-Identifier: Apache-2.0

/// Fat Mach-O parsing (multiple Mach-O programs in one binary)
pub mod fat;

use crate::exec::{
    Architecture, ExecutableFile, ExecutableType, Imports, OperatingSystem, Sections,
};
use crate::utils::{u16_from_offset, u32_from_offset, EntropyCalc};
use crate::{Ordering, SpecimenFile};

use std::fmt::{Display, Formatter};

use anyhow::{anyhow, bail, Context, Result};
use chrono::{DateTime, Utc};
use tracing::{instrument, warn};
use uuid::Uuid;

// Apple's Mach-O loader:
// https://opensource.apple.com/source/xnu/xnu-2050.18.24/EXTERNAL_HEADERS/mach-o/loader.h

const MAGIC32: [u8; 4] = [0xfe, 0xed, 0xfa, 0xce];
const CIGAM32: [u8; 4] = [0xce, 0xfa, 0xed, 0xfe];
const MAGIC64: [u8; 4] = [0xfe, 0xed, 0xfa, 0xcf];
const CIGAM64: [u8; 4] = [0xcf, 0xfa, 0xed, 0xfe];

/// A struct presenting Mach Objects (Mach-O) files
///
/// These are executables or libraries used on macOS, iOS, iPadOS, tvOS, watchOS, etc. They began
/// as the file format for `NeXTSTEP`.
///
/// Because of the different architectures, these files could be bi-endian.
#[derive(Clone, Debug)]
pub struct Macho<'a> {
    /// If the program is 64-bit
    pub is64bit: bool,

    /// Instruction set architecture for this binary
    pub arch: Architecture,

    /// If the binary has extra data after the last section, could be used to hide something
    pub has_overlay: Option<bool>,

    /// Byte ordering for this binary
    pub ordering: Ordering,

    /// Executable subtype: Program, Library, or Core file?
    pub executable_type: ExecutableType,

    /// Operating System for this binary, going to be Mac OS or some derivative, could be `NeXTSTEP`
    pub os: OperatingSystem,

    /// Sections of this binary
    pub sections: Option<Sections<'a>>,

    /// External libraries used by this application or library
    pub imports: Option<Imports>,

    /// The array containing the raw bytes used to parse this program
    pub contents: &'a [u8],
}

impl<'a> Macho<'a> {
    /// Mach-O parsed from a sequence of bytes
    ///
    /// # Errors
    ///
    /// Returns an error if parsing fails.
    #[instrument(name = "Mach-O parser", skip(contents))]
    pub fn from(contents: &'a [u8]) -> Result<Self> {
        let (is_64bit, ordering) = {
            match contents[0..MAGIC32.len()]
                .try_into()
                .context("Mach-O buffer too small for a magic number")?
            {
                MAGIC32 => (false, Ordering::BigEndian),
                CIGAM32 => (false, Ordering::LittleEndian),
                MAGIC64 => (true, Ordering::BigEndian),
                CIGAM64 => (true, Ordering::LittleEndian),
                _ => bail!("Not a Mach-O file"),
            }
        };

        // The CPU type has an upper end flag to indicate if 64-bit, in addition
        // to the different machine number. Check it, and clear it.
        let mut arch = u32_from_offset(contents, 4, ordering)
            .ok_or(anyhow!("Mach-O buffer too small for getting architecture"))?;
        let should_be_64bit = (arch & 0x0100_0000) == 0x0100_0000;
        if !is_64bit && should_be_64bit {
            warn!("Mach-O had 64-bit magic header but not 64-bit magic.");
        }
        if is_64bit && !should_be_64bit {
            warn!("Mach-O had 64-bit lacked magic header but had 64-bit magic.");
        }
        arch &= 0x00FF_FFFF;
        let arch = match arch {
            0x06 => Architecture::M68k,
            0x07 => {
                if is_64bit {
                    Architecture::X86_64
                } else {
                    Architecture::X86
                }
            }
            0x8 => {
                if is_64bit {
                    Architecture::MIPS64
                } else {
                    Architecture::MIPS
                }
            }
            0x0C => {
                if is_64bit {
                    Architecture::ARM64
                } else {
                    Architecture::ARM
                }
            }
            0x0D => Architecture::M88k,
            0x0E => {
                if is_64bit {
                    Architecture::Sparc64
                } else {
                    Architecture::Sparc
                }
            }
            0x10 => Architecture::Alpha,
            0x12 => {
                if is_64bit {
                    Architecture::PowerPC64
                } else {
                    Architecture::PowerPC
                }
            }
            other => Architecture::Other(other),
        };

        let exec_type = u16_from_offset(contents, 4, ordering)
            .ok_or(anyhow!("Mach-O buffer too small for executable type"))?;
        let exec_type = {
            if (exec_type & 0x02) != 0 || (exec_type & 0x05) != 0 {
                ExecutableType::Program
            } else if (exec_type & 0x06) != 0 || (exec_type & 0x09) != 0 {
                ExecutableType::Library
            } else if (exec_type & 0x04) != 0 {
                ExecutableType::Core
            } else {
                ExecutableType::Unknown(exec_type)
            }
        };

        Ok(Self {
            is64bit: is_64bit,
            arch,
            has_overlay: None,
            ordering,
            executable_type: exec_type,
            os: OperatingSystem::MacOS,
            sections: None,
            imports: None,
            contents,
        })
    }
}

impl ExecutableFile for Macho<'_> {
    fn architecture(&self) -> Option<Architecture> {
        Some(self.arch)
    }

    fn pointer_size(&self) -> usize {
        if self.is64bit {
            64
        } else {
            32
        }
    }

    fn operating_system(&self) -> OperatingSystem {
        self.os
    }

    fn compiled_timestamp(&self) -> Option<DateTime<Utc>> {
        None
    }

    #[allow(clippy::cast_possible_truncation)]
    fn num_sections(&self) -> u32 {
        self.sections.as_ref().unwrap_or(&Sections::default()).len() as u32
    }

    fn sections(&self) -> Option<&Sections<'_>> {
        self.sections.as_ref()
    }

    fn import_hash(&self) -> Option<Uuid> {
        self.imports.as_ref().map(Imports::hash)
    }

    fn fuzzy_imports(&self) -> Option<String> {
        self.imports.as_ref().map(Imports::fuzzy_hash)
    }
}

impl SpecimenFile for Macho<'_> {
    const MAGIC: &'static [&'static [u8]] = &[&MAGIC32, &MAGIC64, &CIGAM32, &CIGAM64];

    fn type_name(&self) -> &'static str {
        "Mach-O"
    }
}

impl Display for Macho<'_> {
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
        writeln!(f, "Mach-O file:")?;
        writeln!(f, "\tOS: {}", self.os)?;
        writeln!(f, "\tArchitecture: {}", self.arch)?;
        writeln!(f, "\tOrdering: {}", self.ordering)?;
        writeln!(f, "\tType: {}", self.executable_type)?;
        if let Some(sections) = &self.sections {
            writeln!(f, "\t{} sections:", sections.len())?;
            for section in sections {
                writeln!(f, "\t\t{section}")?;
            }
        }
        if self.has_overlay == Some(true) {
            writeln!(f, "\tHas extra bytes at the end (overlay).")?;
        }
        writeln!(f, "\tSize: {}", self.contents.len())?;
        writeln!(f, "\tEntropy: {:.4}", self.contents.entropy())
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn arm64() {
        const BYTES: &[u8] = include_bytes!("../../../testdata/macho/macho_arm64");

        let macho = Macho::from(BYTES).unwrap();
        assert!(macho.is64bit);
        assert_eq!(macho.executable_type, ExecutableType::Program);
        assert_eq!(macho.arch, Architecture::ARM64);
    }

    #[test]
    fn ppc() {
        const BYTES: &[u8] = include_bytes!("../../../testdata/macho/macho_ppc");

        let macho = Macho::from(BYTES).unwrap();
        assert!(!macho.is64bit);
        //assert_eq!(macho.executable_type, ExecutableType::Program);
        assert_eq!(macho.arch, Architecture::PowerPC);
    }

    #[test]
    fn ppc64() {
        const BYTES: &[u8] = include_bytes!("../../../testdata/macho/macho_ppc64");

        let macho = Macho::from(BYTES).unwrap();
        assert!(macho.is64bit);
        //assert_eq!(macho.executable_type, ExecutableType::Program);
        assert_eq!(macho.arch, Architecture::PowerPC64);
    }

    #[test]
    fn x86_64() {
        const BYTES: &[u8] = include_bytes!("../../../testdata/macho/macho_x86_64");

        let macho = Macho::from(BYTES).unwrap();
        assert!(macho.is64bit);
        assert_eq!(macho.executable_type, ExecutableType::Program);
        assert_eq!(macho.arch, Architecture::X86_64);
    }
}