malwaredb_types/
utils.rs

1// SPDX-License-Identifier: Apache-2.0
2
3// Convenience functions for reading data types from binary blobs
4
5use crate::Ordering;
6
7/// Convenience function for [u16] from a buffer with specified [endian] ordering
8#[inline]
9#[must_use]
10pub fn u16_from_offset(contents: &[u8], offset: usize, endian: Ordering) -> u16 {
11    let bytes: [u8; 2] = [contents[offset], contents[offset + 1]];
12    if endian == Ordering::BigEndian {
13        u16::from_be_bytes(bytes)
14    } else {
15        u16::from_le_bytes(bytes)
16    }
17}
18
19/// Convenience function for [u32] from a buffer with specified [endian] ordering
20#[inline]
21#[must_use]
22pub fn u32_from_offset(contents: &[u8], offset: usize, endian: Ordering) -> u32 {
23    let bytes: [u8; 4] = [
24        contents[offset],
25        contents[offset + 1],
26        contents[offset + 2],
27        contents[offset + 3],
28    ];
29    if endian == Ordering::BigEndian {
30        u32::from_be_bytes(bytes)
31    } else {
32        u32::from_le_bytes(bytes)
33    }
34}
35
36/// Convenience function for [i32] from a buffer with specified [endian] ordering
37#[inline]
38#[must_use]
39pub fn i32_from_offset(contents: &[u8], offset: usize, endian: Ordering) -> i32 {
40    let bytes: [u8; 4] = [
41        contents[offset],
42        contents[offset + 1],
43        contents[offset + 2],
44        contents[offset + 3],
45    ];
46    if endian == Ordering::BigEndian {
47        i32::from_be_bytes(bytes)
48    } else {
49        i32::from_le_bytes(bytes)
50    }
51}
52
53/// Convenience function for [u64] from a buffer with specified [endian] ordering
54#[inline]
55#[must_use]
56pub fn u64_from_offset(contents: &[u8], offset: usize, endian: Ordering) -> u64 {
57    let bytes: [u8; 8] = [
58        contents[offset],
59        contents[offset + 1],
60        contents[offset + 2],
61        contents[offset + 3],
62        contents[offset + 4],
63        contents[offset + 5],
64        contents[offset + 6],
65        contents[offset + 7],
66    ];
67    if endian == Ordering::BigEndian {
68        u64::from_be_bytes(bytes)
69    } else {
70        u64::from_le_bytes(bytes)
71    }
72}
73
74/// Convenience function for [f32] from a buffer with specified [endian] ordering
75#[inline]
76#[must_use]
77pub fn f32_from_offset(contents: &[u8], offset: usize, endian: Ordering) -> f32 {
78    let bytes: [u8; 4] = [
79        contents[offset],
80        contents[offset + 1],
81        contents[offset + 2],
82        contents[offset + 3],
83    ];
84    if endian == Ordering::BigEndian {
85        f32::from_be_bytes(bytes)
86    } else {
87        f32::from_le_bytes(bytes)
88    }
89}
90
91/// Convenience function for [f64] from a buffer with specified [endian] ordering
92#[inline]
93#[must_use]
94pub fn f64_from_offset(contents: &[u8], offset: usize, endian: Ordering) -> f64 {
95    let bytes: [u8; 8] = [
96        contents[offset],
97        contents[offset + 1],
98        contents[offset + 2],
99        contents[offset + 3],
100        contents[offset + 4],
101        contents[offset + 5],
102        contents[offset + 6],
103        contents[offset + 7],
104    ];
105    if endian == Ordering::BigEndian {
106        f64::from_be_bytes(bytes)
107    } else {
108        f64::from_le_bytes(bytes)
109    }
110}
111
112/// Try to get a String from a byte buffer, get a lossy String if it wasn't UTF-8,
113/// or get a hex string as a last resort.
114#[inline]
115#[must_use]
116pub fn string_from_offset(contents: &[u8], offset: usize) -> String {
117    let mut bytes = Vec::new();
118    let mut position = offset;
119    loop {
120        bytes.push(contents[position]);
121        position += 1;
122        if position > contents.len() || contents[position] == 0 {
123            break;
124        }
125    }
126
127    match String::from_utf8(bytes.clone()) {
128        Ok(s) => s,
129        Err(_e) => {
130            let lossy_string = String::from_utf8_lossy(&bytes).to_string();
131            if lossy_string.is_empty() {
132                hex::encode(bytes)
133            } else {
134                lossy_string
135            }
136        }
137    }
138}
139
140/// Convenience function to see if a byte sequence in a buffer matches some other byte sequence
141#[inline]
142#[must_use]
143pub fn bytes_offset_match(haystack: &[u8], offset: usize, needle: &[u8]) -> bool {
144    if offset >= haystack.len() || haystack.len() - offset < needle.len() {
145        return false;
146    }
147
148    let mut matches = true;
149
150    for index in 0..needle.len() {
151        if haystack[offset + index] != needle[index] {
152            matches = false;
153            break;
154        }
155    }
156
157    matches
158}
159
160/// Convenience to see if a smaller byte sequence is in the larger sequence
161/// <https://stackoverflow.com/questions/35901547/how-can-i-find-a-subsequence-in-a-u8-slice>
162#[inline]
163pub fn find_subsequence<T>(haystack: &[T], needle: &[T]) -> Option<usize>
164where
165    for<'a> &'a [T]: PartialEq,
166{
167    haystack
168        .windows(needle.len())
169        .position(|window| window == needle)
170}
171
172/// Calculate entropy (0-8) for a byte sequence
173#[inline]
174#[must_use]
175pub fn entropy_calc(data: &[u8]) -> f32 {
176    let mut e = 0.0;
177    let len = data.len() as f32;
178    for byte in 0..255u8 {
179        let p = data.iter().filter(|&n| *n == byte).count() as f32 / len;
180        if p > 0.0 {
181            e -= p * p.log2();
182        }
183    }
184    e
185}
186
187/// Calculate the entropy of bytes
188pub trait EntropyCalc {
189    /// Calculate entropy (0-8) for some sequence
190    fn entropy(&self) -> f32;
191}
192
193impl EntropyCalc for Vec<u8> {
194    fn entropy(&self) -> f32 {
195        entropy_calc(self)
196    }
197}
198
199impl EntropyCalc for &[u8] {
200    fn entropy(&self) -> f32 {
201        entropy_calc(self)
202    }
203}
204
205#[cfg(test)]
206mod tests {
207    use super::*;
208    use std::str::FromStr;
209
210    const TWO_BYTES: [u8; 2] = [0x12, 0x34];
211    const FOUR_BYTES: [u8; 4] = [0x12, 0x34, 0x56, 0x78];
212
213    #[test]
214    fn u16_le() {
215        assert_eq!(
216            u16_from_offset(&TWO_BYTES, 0, Ordering::LittleEndian),
217            13330
218        );
219    }
220
221    #[test]
222    fn u16_be() {
223        assert_eq!(u16_from_offset(&TWO_BYTES, 0, Ordering::BigEndian), 4660);
224    }
225
226    #[test]
227    fn u32_le() {
228        assert_eq!(
229            u32_from_offset(&FOUR_BYTES, 0, Ordering::LittleEndian),
230            2_018_915_346
231        );
232    }
233
234    #[test]
235    fn u32_be() {
236        assert_eq!(
237            u32_from_offset(&FOUR_BYTES, 0, Ordering::BigEndian),
238            305_419_896
239        );
240    }
241
242    #[test]
243    fn f32_le() {
244        let within_tolerance = (f32_from_offset(&FOUR_BYTES, 0, Ordering::LittleEndian)
245            - f32::from_str("1.73782444e+34").unwrap())
246        .abs()
247            < 0.000_000_01_f32;
248        assert!(within_tolerance);
249    }
250
251    #[test]
252    fn f32_be() {
253        let within_tolerance = (f32_from_offset(&FOUR_BYTES, 0, Ordering::BigEndian)
254            - f32::from_str("5.69045661e-28").unwrap())
255        .abs()
256            < 0.000_000_01_f32;
257        assert!(within_tolerance);
258    }
259
260    #[test]
261    fn zero_entropy() {
262        let d = vec![0u8; 100];
263        assert!(d.entropy() < 0.1);
264    }
265
266    #[test]
267    fn pdf_entropy() {
268        let pdf = include_bytes!("../testdata/pdf/test.pdf").to_vec();
269        assert!(pdf.entropy() > 7.7 && pdf.entropy() < 8.0);
270    }
271}