malwaredb_types/
utils.rs

1// SPDX-License-Identifier: Apache-2.0
2
3// Convenience functions for reading data types from binary blobs
4
5use crate::Ordering;
6
7/// Convenience function for [u16] from a buffer with specified [endian] ordering
8#[inline]
9#[must_use]
10pub fn u16_from_offset(contents: &[u8], offset: usize, endian: Ordering) -> Option<u16> {
11    const SIZE: usize = 2;
12
13    if offset + SIZE > contents.len() {
14        return None;
15    }
16
17    let bytes: [u8; SIZE] = [contents[offset], contents[offset + 1]];
18    Some(if endian == Ordering::BigEndian {
19        u16::from_be_bytes(bytes)
20    } else {
21        u16::from_le_bytes(bytes)
22    })
23}
24
25/// Convenience function for [u32] from a buffer with specified [endian] ordering
26#[inline]
27#[must_use]
28pub fn u32_from_offset(contents: &[u8], offset: usize, endian: Ordering) -> Option<u32> {
29    const SIZE: usize = 4;
30
31    if offset + SIZE > contents.len() {
32        return None;
33    }
34
35    let bytes: [u8; SIZE] = [
36        contents[offset],
37        contents[offset + 1],
38        contents[offset + 2],
39        contents[offset + 3],
40    ];
41    Some(if endian == Ordering::BigEndian {
42        u32::from_be_bytes(bytes)
43    } else {
44        u32::from_le_bytes(bytes)
45    })
46}
47
48/// Convenience function for [i32] from a buffer with specified [endian] ordering
49#[inline]
50#[must_use]
51pub fn i32_from_offset(contents: &[u8], offset: usize, endian: Ordering) -> Option<i32> {
52    const SIZE: usize = 4;
53
54    if offset + SIZE > contents.len() {
55        return None;
56    }
57
58    let bytes: [u8; SIZE] = [
59        contents[offset],
60        contents[offset + 1],
61        contents[offset + 2],
62        contents[offset + 3],
63    ];
64    Some(if endian == Ordering::BigEndian {
65        i32::from_be_bytes(bytes)
66    } else {
67        i32::from_le_bytes(bytes)
68    })
69}
70
71/// Convenience function for [u64] from a buffer with specified [endian] ordering
72#[inline]
73#[must_use]
74pub fn u64_from_offset(contents: &[u8], offset: usize, endian: Ordering) -> Option<u64> {
75    const SIZE: usize = 8;
76
77    if offset + SIZE > contents.len() {
78        return None;
79    }
80
81    let bytes: [u8; SIZE] = [
82        contents[offset],
83        contents[offset + 1],
84        contents[offset + 2],
85        contents[offset + 3],
86        contents[offset + 4],
87        contents[offset + 5],
88        contents[offset + 6],
89        contents[offset + 7],
90    ];
91    Some(if endian == Ordering::BigEndian {
92        u64::from_be_bytes(bytes)
93    } else {
94        u64::from_le_bytes(bytes)
95    })
96}
97
98/// Convenience function for [f32] from a buffer with specified [endian] ordering
99#[inline]
100#[must_use]
101pub fn f32_from_offset(contents: &[u8], offset: usize, endian: Ordering) -> Option<f32> {
102    const SIZE: usize = 4;
103
104    if offset + SIZE > contents.len() {
105        return None;
106    }
107
108    let bytes: [u8; SIZE] = [
109        contents[offset],
110        contents[offset + 1],
111        contents[offset + 2],
112        contents[offset + 3],
113    ];
114    Some(if endian == Ordering::BigEndian {
115        f32::from_be_bytes(bytes)
116    } else {
117        f32::from_le_bytes(bytes)
118    })
119}
120
121/// Convenience function for [f64] from a buffer with specified [endian] ordering
122#[inline]
123#[must_use]
124pub fn f64_from_offset(contents: &[u8], offset: usize, endian: Ordering) -> Option<f64> {
125    const SIZE: usize = 8;
126
127    if offset + SIZE > contents.len() {
128        return None;
129    }
130
131    let bytes: [u8; SIZE] = [
132        contents[offset],
133        contents[offset + 1],
134        contents[offset + 2],
135        contents[offset + 3],
136        contents[offset + 4],
137        contents[offset + 5],
138        contents[offset + 6],
139        contents[offset + 7],
140    ];
141    Some(if endian == Ordering::BigEndian {
142        f64::from_be_bytes(bytes)
143    } else {
144        f64::from_le_bytes(bytes)
145    })
146}
147
148/// Try to get a String from a byte buffer, get a lossy String if it wasn't UTF-8,
149/// or get a hex string as a last resort.
150#[inline]
151#[must_use]
152pub fn string_from_offset(contents: &[u8], offset: usize) -> Option<String> {
153    if offset >= contents.len() {
154        return None;
155    }
156
157    let mut bytes = Vec::new();
158    let mut position = offset;
159    loop {
160        bytes.push(contents[position]);
161        position += 1;
162        if position >= contents.len() || contents[position] == 0 {
163            break;
164        }
165    }
166
167    Some(match String::from_utf8(bytes.clone()) {
168        Ok(s) => s,
169        Err(_e) => {
170            let lossy_string = String::from_utf8_lossy(&bytes).to_string();
171            if lossy_string.is_empty() {
172                hex::encode(bytes)
173            } else {
174                lossy_string
175            }
176        }
177    })
178}
179
180/// Convenience function to see if a byte sequence in a buffer matches some other byte sequence
181#[inline]
182#[must_use]
183pub fn bytes_offset_match(haystack: &[u8], offset: usize, needle: &[u8]) -> bool {
184    if offset >= haystack.len() || haystack.len() - offset < needle.len() {
185        return false;
186    }
187
188    let mut matches = true;
189
190    for index in 0..needle.len() {
191        if haystack[offset + index] != needle[index] {
192            matches = false;
193            break;
194        }
195    }
196
197    matches
198}
199
200/// Convenience to see if a smaller byte sequence is in the larger sequence
201/// <https://stackoverflow.com/questions/35901547/how-can-i-find-a-subsequence-in-a-u8-slice>
202#[inline]
203pub fn find_subsequence<T>(haystack: &[T], needle: &[T]) -> Option<usize>
204where
205    for<'a> &'a [T]: PartialEq,
206{
207    haystack
208        .windows(needle.len())
209        .position(|window| window == needle)
210}
211
212/// Calculate entropy (0-8) for a byte sequence
213#[allow(clippy::cast_precision_loss)]
214#[inline]
215#[must_use]
216pub fn entropy_calc(data: &[u8]) -> f32 {
217    let mut e = 0.0;
218    let len = data.len() as f32;
219    for byte in 0..=255u8 {
220        let p = bytecount::count(data, byte) as f32 / len;
221        if p > 0.0 {
222            e -= p * p.log2();
223        }
224    }
225    e
226}
227
228/// Calculate the entropy of bytes
229pub trait EntropyCalc {
230    /// Calculate entropy (0-8) for some sequence
231    fn entropy(&self) -> f32;
232}
233
234impl EntropyCalc for Vec<u8> {
235    fn entropy(&self) -> f32 {
236        entropy_calc(self)
237    }
238}
239
240impl EntropyCalc for &[u8] {
241    fn entropy(&self) -> f32 {
242        entropy_calc(self)
243    }
244}
245
246#[cfg(test)]
247mod tests {
248    use super::*;
249    use std::str::FromStr;
250
251    const TWO_BYTES: [u8; 2] = [0x12, 0x34];
252    const FOUR_BYTES: [u8; 4] = [0x12, 0x34, 0x56, 0x78];
253
254    #[test]
255    fn u16_none() {
256        const BYTES: [u8; 1] = [0x00];
257
258        assert!(u16_from_offset(&BYTES, 0, Ordering::LittleEndian).is_none());
259    }
260
261    #[test]
262    fn u16_le() {
263        assert_eq!(
264            u16_from_offset(&TWO_BYTES, 0, Ordering::LittleEndian).unwrap(),
265            13330
266        );
267    }
268
269    #[test]
270    fn u16_be() {
271        assert_eq!(
272            u16_from_offset(&TWO_BYTES, 0, Ordering::BigEndian).unwrap(),
273            4660
274        );
275    }
276
277    #[test]
278    fn u32_le() {
279        assert_eq!(
280            u32_from_offset(&FOUR_BYTES, 0, Ordering::LittleEndian).unwrap(),
281            2_018_915_346
282        );
283    }
284
285    #[test]
286    fn u32_be() {
287        assert_eq!(
288            u32_from_offset(&FOUR_BYTES, 0, Ordering::BigEndian).unwrap(),
289            305_419_896
290        );
291    }
292
293    #[test]
294    fn f32_le() {
295        let within_tolerance = (f32_from_offset(&FOUR_BYTES, 0, Ordering::LittleEndian).unwrap()
296            - f32::from_str("1.73782444e+34").unwrap())
297        .abs()
298            < 0.000_000_01_f32;
299        assert!(within_tolerance);
300    }
301
302    #[test]
303    fn f32_be() {
304        let within_tolerance = (f32_from_offset(&FOUR_BYTES, 0, Ordering::BigEndian).unwrap()
305            - f32::from_str("5.69045661e-28").unwrap())
306        .abs()
307            < 0.000_000_01_f32;
308        assert!(within_tolerance);
309    }
310
311    #[test]
312    fn zero_entropy() {
313        let d = vec![0u8; 100];
314        assert!(d.entropy() < 0.1);
315    }
316
317    #[test]
318    fn pdf_entropy() {
319        let pdf = include_bytes!("../testdata/pdf/test.pdf").to_vec();
320        assert!(pdf.entropy() > 7.7 && pdf.entropy() < 8.0);
321    }
322}