malwaredb_types/
utils.rs

1// SPDX-License-Identifier: Apache-2.0
2
3use crate::Ordering;
4
5/// Convenience function for [u16] from a buffer with specified endian [Ordering]
6#[inline]
7#[must_use]
8pub fn u16_from_offset(contents: &[u8], offset: usize, endian: Ordering) -> Option<u16> {
9    const SIZE: usize = 2;
10
11    if offset + SIZE > contents.len() {
12        return None;
13    }
14
15    let bytes: [u8; SIZE] = [contents[offset], contents[offset + 1]];
16    Some(if endian == Ordering::BigEndian {
17        u16::from_be_bytes(bytes)
18    } else {
19        u16::from_le_bytes(bytes)
20    })
21}
22
23/// Convenience function for [u32] from a buffer with specified endian [Ordering]
24#[inline]
25#[must_use]
26pub fn u32_from_offset(contents: &[u8], offset: usize, endian: Ordering) -> Option<u32> {
27    const SIZE: usize = 4;
28
29    if offset + SIZE > contents.len() {
30        return None;
31    }
32
33    let bytes: [u8; SIZE] = [
34        contents[offset],
35        contents[offset + 1],
36        contents[offset + 2],
37        contents[offset + 3],
38    ];
39    Some(if endian == Ordering::BigEndian {
40        u32::from_be_bytes(bytes)
41    } else {
42        u32::from_le_bytes(bytes)
43    })
44}
45
46/// Convenience function for [i32] from a buffer with specified endian [Ordering]
47#[inline]
48#[must_use]
49pub fn i32_from_offset(contents: &[u8], offset: usize, endian: Ordering) -> Option<i32> {
50    const SIZE: usize = 4;
51
52    if offset + SIZE > contents.len() {
53        return None;
54    }
55
56    let bytes: [u8; SIZE] = [
57        contents[offset],
58        contents[offset + 1],
59        contents[offset + 2],
60        contents[offset + 3],
61    ];
62    Some(if endian == Ordering::BigEndian {
63        i32::from_be_bytes(bytes)
64    } else {
65        i32::from_le_bytes(bytes)
66    })
67}
68
69/// Convenience function for [u64] from a buffer with specified endian [Ordering]
70#[inline]
71#[must_use]
72pub fn u64_from_offset(contents: &[u8], offset: usize, endian: Ordering) -> Option<u64> {
73    const SIZE: usize = 8;
74
75    if offset + SIZE > contents.len() {
76        return None;
77    }
78
79    let bytes: [u8; SIZE] = [
80        contents[offset],
81        contents[offset + 1],
82        contents[offset + 2],
83        contents[offset + 3],
84        contents[offset + 4],
85        contents[offset + 5],
86        contents[offset + 6],
87        contents[offset + 7],
88    ];
89    Some(if endian == Ordering::BigEndian {
90        u64::from_be_bytes(bytes)
91    } else {
92        u64::from_le_bytes(bytes)
93    })
94}
95
96/// Convenience function for [f32] from a buffer with specified endian [Ordering]
97#[inline]
98#[must_use]
99pub fn f32_from_offset(contents: &[u8], offset: usize, endian: Ordering) -> Option<f32> {
100    const SIZE: usize = 4;
101
102    if offset + SIZE > contents.len() {
103        return None;
104    }
105
106    let bytes: [u8; SIZE] = [
107        contents[offset],
108        contents[offset + 1],
109        contents[offset + 2],
110        contents[offset + 3],
111    ];
112    Some(if endian == Ordering::BigEndian {
113        f32::from_be_bytes(bytes)
114    } else {
115        f32::from_le_bytes(bytes)
116    })
117}
118
119/// Convenience function for [f64] from a buffer with specified endian [Ordering]
120#[inline]
121#[must_use]
122pub fn f64_from_offset(contents: &[u8], offset: usize, endian: Ordering) -> Option<f64> {
123    const SIZE: usize = 8;
124
125    if offset + SIZE > contents.len() {
126        return None;
127    }
128
129    let bytes: [u8; SIZE] = [
130        contents[offset],
131        contents[offset + 1],
132        contents[offset + 2],
133        contents[offset + 3],
134        contents[offset + 4],
135        contents[offset + 5],
136        contents[offset + 6],
137        contents[offset + 7],
138    ];
139    Some(if endian == Ordering::BigEndian {
140        f64::from_be_bytes(bytes)
141    } else {
142        f64::from_le_bytes(bytes)
143    })
144}
145
146/// Try to get a String from a byte buffer, get a lossy String if it wasn't UTF-8,
147/// or get a hex string as a last resort.
148#[inline]
149#[must_use]
150pub fn string_from_offset(contents: &[u8], offset: usize) -> Option<String> {
151    if offset >= contents.len() {
152        return None;
153    }
154
155    let mut bytes = Vec::new();
156    let mut position = offset;
157    loop {
158        bytes.push(contents[position]);
159        position += 1;
160        if position >= contents.len() || contents[position] == 0 {
161            break;
162        }
163    }
164
165    Some(match String::from_utf8(bytes.clone()) {
166        Ok(s) => s,
167        Err(_e) => {
168            let lossy_string = String::from_utf8_lossy(&bytes).to_string();
169            if lossy_string.is_empty() {
170                hex::encode(bytes)
171            } else {
172                lossy_string
173            }
174        }
175    })
176}
177
178/// Convenience function to see if a byte sequence in a buffer matches some other byte sequence
179#[inline]
180#[must_use]
181pub fn bytes_offset_match(haystack: &[u8], offset: usize, needle: &[u8]) -> bool {
182    if offset >= haystack.len() || haystack.len() - offset < needle.len() {
183        return false;
184    }
185
186    let mut matches = true;
187
188    for index in 0..needle.len() {
189        if haystack[offset + index] != needle[index] {
190            matches = false;
191            break;
192        }
193    }
194
195    matches
196}
197
198/// Convenience to see if a smaller byte sequence is in the larger sequence
199/// <https://stackoverflow.com/questions/35901547/how-can-i-find-a-subsequence-in-a-u8-slice>
200#[inline]
201pub fn find_subsequence<T>(haystack: &[T], needle: &[T]) -> Option<usize>
202where
203    for<'a> &'a [T]: PartialEq,
204{
205    haystack
206        .windows(needle.len())
207        .position(|window| window == needle)
208}
209
210/// Calculate entropy (0-8) for a byte sequence
211#[allow(clippy::cast_precision_loss)]
212#[inline]
213#[must_use]
214pub fn entropy_calc(data: &[u8]) -> f32 {
215    let mut e = 0.0;
216    let len = data.len() as f32;
217    for byte in 0..=255u8 {
218        let p = bytecount::count(data, byte) as f32 / len;
219        if p > 0.0 {
220            e -= p * p.log2();
221        }
222    }
223    e
224}
225
226/// Calculate the entropy of bytes
227pub trait EntropyCalc {
228    /// Calculate entropy (0-8) for some sequence
229    fn entropy(&self) -> f32;
230}
231
232impl EntropyCalc for Vec<u8> {
233    fn entropy(&self) -> f32 {
234        entropy_calc(self)
235    }
236}
237
238impl EntropyCalc for &[u8] {
239    fn entropy(&self) -> f32 {
240        entropy_calc(self)
241    }
242}
243
244#[cfg(test)]
245mod tests {
246    use super::*;
247    use std::str::FromStr;
248
249    const TWO_BYTES: [u8; 2] = [0x12, 0x34];
250    const FOUR_BYTES: [u8; 4] = [0x12, 0x34, 0x56, 0x78];
251
252    #[test]
253    fn u16_none() {
254        const BYTES: [u8; 1] = [0x00];
255
256        assert!(u16_from_offset(&BYTES, 0, Ordering::LittleEndian).is_none());
257    }
258
259    #[test]
260    fn u16_le() {
261        assert_eq!(
262            u16_from_offset(&TWO_BYTES, 0, Ordering::LittleEndian).unwrap(),
263            13330
264        );
265    }
266
267    #[test]
268    fn u16_be() {
269        assert_eq!(
270            u16_from_offset(&TWO_BYTES, 0, Ordering::BigEndian).unwrap(),
271            4660
272        );
273    }
274
275    #[test]
276    fn u32_le() {
277        assert_eq!(
278            u32_from_offset(&FOUR_BYTES, 0, Ordering::LittleEndian).unwrap(),
279            2_018_915_346
280        );
281    }
282
283    #[test]
284    fn u32_be() {
285        assert_eq!(
286            u32_from_offset(&FOUR_BYTES, 0, Ordering::BigEndian).unwrap(),
287            305_419_896
288        );
289    }
290
291    #[test]
292    fn f32_le() {
293        let within_tolerance = (f32_from_offset(&FOUR_BYTES, 0, Ordering::LittleEndian).unwrap()
294            - f32::from_str("1.73782444e+34").unwrap())
295        .abs()
296            < 0.000_000_01_f32;
297        assert!(within_tolerance);
298    }
299
300    #[test]
301    fn f32_be() {
302        let within_tolerance = (f32_from_offset(&FOUR_BYTES, 0, Ordering::BigEndian).unwrap()
303            - f32::from_str("5.69045661e-28").unwrap())
304        .abs()
305            < 0.000_000_01_f32;
306        assert!(within_tolerance);
307    }
308
309    #[test]
310    fn zero_entropy() {
311        let d = vec![0u8; 100];
312        assert!(d.entropy() < 0.1);
313    }
314
315    #[test]
316    fn pdf_entropy() {
317        let pdf = include_bytes!("../testdata/pdf/test.pdf").to_vec();
318        assert!(pdf.entropy() > 7.7 && pdf.entropy() < 8.0);
319    }
320}