malwaredb_types/
utils.rs

// Convenience functions for reading data types from binary blobs

use crate::Ordering;

/// Convenience function for [u16] from a buffer with specified [endian] ordering
#[inline]
pub fn u16_from_offset(contents: &[u8], offset: usize, endian: Ordering) -> u16 {
    let bytes: [u8; 2] = [contents[offset], contents[offset + 1]];
    if endian == Ordering::BigEndian {
        u16::from_be_bytes(bytes)
    } else {
        u16::from_le_bytes(bytes)
    }
}

/// Convenience function for [u32] from a buffer with specified [endian] ordering
#[inline]
pub fn u32_from_offset(contents: &[u8], offset: usize, endian: Ordering) -> u32 {
    let bytes: [u8; 4] = [
        contents[offset],
        contents[offset + 1],
        contents[offset + 2],
        contents[offset + 3],
    ];
    if endian == Ordering::BigEndian {
        u32::from_be_bytes(bytes)
    } else {
        u32::from_le_bytes(bytes)
    }
}

/// Convenience function for [i32] from a buffer with specified [endian] ordering
#[inline]
pub fn i32_from_offset(contents: &[u8], offset: usize, endian: Ordering) -> i32 {
    let bytes: [u8; 4] = [
        contents[offset],
        contents[offset + 1],
        contents[offset + 2],
        contents[offset + 3],
    ];
    if endian == Ordering::BigEndian {
        i32::from_be_bytes(bytes)
    } else {
        i32::from_le_bytes(bytes)
    }
}

/// Convenience function for [u64] from a buffer with specified [endian] ordering
#[inline]
pub fn u64_from_offset(contents: &[u8], offset: usize, endian: Ordering) -> u64 {
    let bytes: [u8; 8] = [
        contents[offset],
        contents[offset + 1],
        contents[offset + 2],
        contents[offset + 3],
        contents[offset + 4],
        contents[offset + 5],
        contents[offset + 6],
        contents[offset + 7],
    ];
    if endian == Ordering::BigEndian {
        u64::from_be_bytes(bytes)
    } else {
        u64::from_le_bytes(bytes)
    }
}

/// Convenience function for [f32] from a buffer with specified [endian] ordering
#[inline]
pub fn f32_from_offset(contents: &[u8], offset: usize, endian: Ordering) -> f32 {
    let bytes: [u8; 4] = [
        contents[offset],
        contents[offset + 1],
        contents[offset + 2],
        contents[offset + 3],
    ];
    if endian == Ordering::BigEndian {
        f32::from_be_bytes(bytes)
    } else {
        f32::from_le_bytes(bytes)
    }
}

/// Convenience function for [f64] from a buffer with specified [endian] ordering
#[inline]
pub fn f64_from_offset(contents: &[u8], offset: usize, endian: Ordering) -> f64 {
    let bytes: [u8; 8] = [
        contents[offset],
        contents[offset + 1],
        contents[offset + 2],
        contents[offset + 3],
        contents[offset + 4],
        contents[offset + 5],
        contents[offset + 6],
        contents[offset + 7],
    ];
    if endian == Ordering::BigEndian {
        f64::from_be_bytes(bytes)
    } else {
        f64::from_le_bytes(bytes)
    }
}

/// Try to get a String from a byte buffer, get a lossy String if it wasn't UTF-8,
/// or get a hex string as a last resort.
#[inline]
pub fn string_from_offset(contents: &[u8], offset: usize) -> String {
    let mut bytes = Vec::new();
    let mut position = offset;
    loop {
        bytes.push(contents[position]);
        position += 1;
        if position > contents.len() || contents[position] == 0 {
            break;
        }
    }

    match String::from_utf8(bytes.clone()) {
        Ok(s) => s,
        Err(_e) => {
            let lossy_string = String::from_utf8_lossy(&bytes).to_string();
            if lossy_string.is_empty() {
                hex::encode(bytes)
            } else {
                lossy_string
            }
        }
    }
}

/// Convenience function to see if a byte sequence in a buffer matches some other byte sequence
#[inline]
pub fn bytes_offset_match(haystack: &[u8], offset: usize, needle: &[u8]) -> bool {
    if offset >= haystack.len() || haystack.len() - offset < needle.len() {
        return false;
    }

    let mut matches = true;

    for index in 0..needle.len() {
        if haystack[offset + index] != needle[index] {
            matches = false;
            break;
        }
    }

    matches
}

/// Convenience to see if a smaller byte sequence is in the larger sequence
/// https://stackoverflow.com/questions/35901547/how-can-i-find-a-subsequence-in-a-u8-slice
#[inline]
pub fn find_subsequence<T>(haystack: &[T], needle: &[T]) -> Option<usize>
where
    for<'a> &'a [T]: PartialEq,
{
    haystack
        .windows(needle.len())
        .position(|window| window == needle)
}

/// Calculate entropy (0-8) for a byte sequence
#[inline]
pub fn entropy_calc(data: &[u8]) -> f32 {
    let mut e = 0.0;
    let len = data.len() as f32;
    for byte in 0..255u8 {
        let p = data.iter().filter(|&n| *n == byte).count() as f32 / len;
        if p > 0.0 {
            e -= p * p.log2();
        }
    }
    e
}

/// Calculate the entropy of bytes
pub trait EntropyCalc {
    /// Calculate entropy (0-8) for some sequence
    fn entropy(&self) -> f32;
}

impl EntropyCalc for Vec<u8> {
    fn entropy(&self) -> f32 {
        entropy_calc(self)
    }
}

impl EntropyCalc for &[u8] {
    fn entropy(&self) -> f32 {
        entropy_calc(self)
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::str::FromStr;

    const TWO_BYTES: [u8; 2] = [0x12, 0x34];
    const FOUR_BYTES: [u8; 4] = [0x12, 0x34, 0x56, 0x78];

    #[test]
    fn u16_le() {
        assert_eq!(
            u16_from_offset(&TWO_BYTES, 0, Ordering::LittleEndian),
            13330
        );
    }

    #[test]
    fn u16_be() {
        assert_eq!(u16_from_offset(&TWO_BYTES, 0, Ordering::BigEndian), 4660);
    }

    #[test]
    fn u32_le() {
        assert_eq!(
            u32_from_offset(&FOUR_BYTES, 0, Ordering::LittleEndian),
            2018915346
        );
    }

    #[test]
    fn u32_be() {
        assert_eq!(
            u32_from_offset(&FOUR_BYTES, 0, Ordering::BigEndian),
            305419896
        );
    }

    #[test]
    fn f32_le() {
        assert_eq!(
            f32_from_offset(&FOUR_BYTES, 0, Ordering::LittleEndian),
            f32::from_str("1.73782444e+34").unwrap()
        );
    }

    #[test]
    fn f32_be() {
        assert_eq!(
            f32_from_offset(&FOUR_BYTES, 0, Ordering::BigEndian),
            f32::from_str("5.69045661e-28").unwrap()
        );
    }

    #[test]
    fn zero_entropy() {
        let d = vec![0u8; 100];
        assert!(d.entropy() < 0.1);
    }

    #[test]
    fn pdf_entropy() {
        let pdf = include_bytes!("../testdata/pdf/test.pdf").to_vec();
        assert!(pdf.entropy() > 7.7 && pdf.entropy() < 8.0);
    }
}
malwaredb_types/utils.rs

malwaredb_types/
utils.rs