Skip to main content

malwaredb_api/
digest.rs

1// SPDX-License-Identifier: Apache-2.0
2
3use std::borrow::Borrow;
4use std::error::Error;
5use std::fmt::{Display, Formatter};
6use std::ops::Deref;
7
8use base64::{engine::general_purpose, Engine};
9use serde::{Deserialize, Deserializer, Serialize, Serializer};
10use uuid::Uuid;
11
12// Adapted from
13// https://github.com/profianinc/steward/commit/69a4f297e06cbc95f327d271a691198230c97429#diff-adf0e917b493348b9f22a754b89ff8644fd3af28a769f75caaec2ffd47edfea4
14// Idea for this Digest struct by Roman Volosatovs <roman@profian.com>
15
16/// Digest generic in hash size `N`, serialized and deserialized as hexidecimal strings.
17#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
18pub struct Digest<const N: usize>(pub [u8; N]);
19
20impl<'de, const N: usize> Deserialize<'de> for Digest<N> {
21    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
22    where
23        D: Deserializer<'de>,
24    {
25        use serde::de::Error;
26
27        let dig: String = Deserialize::deserialize(deserializer)?;
28        let dig = hex::decode(dig).map_err(|e| Error::custom(format!("invalid hex: {e}")))?;
29        let dig = dig.try_into().map_err(|v: Vec<_>| {
30            Error::custom(format!(
31                "expected digest to have length of {N}, got {}",
32                v.len()
33            ))
34        })?;
35        Ok(Digest(dig))
36    }
37}
38
39impl<const N: usize> Serialize for Digest<N> {
40    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
41    where
42        S: Serializer,
43    {
44        let hex = self.to_string();
45        serializer.serialize_str(&hex)
46    }
47}
48
49impl<const N: usize> AsRef<[u8; N]> for Digest<N> {
50    fn as_ref(&self) -> &[u8; N] {
51        &self.0
52    }
53}
54
55impl<const N: usize> Borrow<[u8; N]> for Digest<N> {
56    fn borrow(&self) -> &[u8; N] {
57        &self.0
58    }
59}
60
61impl<const N: usize> Deref for Digest<N> {
62    type Target = [u8; N];
63
64    fn deref(&self) -> &Self::Target {
65        &self.0
66    }
67}
68
69impl From<Uuid> for Digest<16> {
70    fn from(uuid: Uuid) -> Self {
71        let bytes = uuid.into_bytes();
72        let mut array = [0u8; 16];
73        array.copy_from_slice(&bytes[..16]);
74        Digest(array)
75    }
76}
77
78/// Digest error, generally for a hash of an unexpected size.
79#[derive(Debug, Clone)]
80pub struct DigestError(String);
81
82impl Display for DigestError {
83    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
84        write!(f, "{}", self.0)
85    }
86}
87
88impl Error for DigestError {}
89
90impl<const N: usize> TryFrom<Vec<u8>> for Digest<N> {
91    type Error = DigestError;
92
93    fn try_from(value: Vec<u8>) -> Result<Self, Self::Error> {
94        let len = value.len();
95        let array: [u8; N] = value
96            .try_into()
97            .map_err(|_| DigestError(format!("Expected a Vec of length {N} but it was {len}")))?;
98        Ok(Digest(array))
99    }
100}
101
102impl<const N: usize> Display for Digest<N> {
103    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
104        write!(f, "{}", hex::encode(self.0))
105    }
106}
107
108/// The hash by which a sample is identified
109#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq, Ord, PartialOrd, Hash)]
110pub enum HashType {
111    /// MD5
112    Md5(Digest<16>),
113
114    /// SHA-1
115    SHA1(Digest<20>),
116
117    /// SHA-256, assumed to be SHA2-256
118    SHA256(Digest<32>),
119
120    /// SHA-384, assumed to be SHA2-384
121    SHA384(Digest<48>),
122
123    /// SHA-512, assumed to be SHA2-512
124    SHA512(Digest<64>),
125}
126
127impl Display for HashType {
128    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
129        match self {
130            HashType::Md5(h) => write!(f, "MD5: {h}"),
131            HashType::SHA1(h) => write!(f, "SHA-1: {h}"),
132            HashType::SHA256(h) => write!(f, "SHA-256: {h}"),
133            HashType::SHA384(h) => write!(f, "SHA-384: {h}"),
134            HashType::SHA512(h) => write!(f, "SHA-512: {h}"),
135        }
136    }
137}
138
139impl HashType {
140    /// Get the hash type from the `content-digest` header.
141    ///
142    /// # Errors
143    ///
144    /// Returns an error if the header is malformed or if the base64 decoding fails.
145    pub fn from_content_digest_header(s: &str) -> Result<Self, DigestError> {
146        let parts: Vec<&str> = s.splitn(2, '=').collect();
147        if parts.len() != 2 {
148            return Err(DigestError("Invalid header".into()));
149        }
150
151        let first_colon = parts[1]
152            .find(':')
153            .ok_or_else(|| DigestError("Invalid header".into()))?;
154        let second_colon = parts[1]
155            .rfind(':')
156            .ok_or_else(|| DigestError("Invalid header".into()))?;
157
158        let file_contents_b64 = general_purpose::STANDARD
159            .decode(&parts[1][first_colon + 1..second_colon])
160            .map_err(|_| DigestError("Invalid base64".into()))?;
161
162        match parts[0] {
163            "md5" | "md-5" => Ok(HashType::Md5(file_contents_b64.try_into()?)),
164            "sha1" | "sha-1" => Ok(HashType::SHA1(file_contents_b64.try_into()?)),
165            "sha256" | "sha-256" => Ok(HashType::SHA256(file_contents_b64.try_into()?)),
166            "sha384" | "sha-384" => Ok(HashType::SHA384(file_contents_b64.try_into()?)),
167            "sha512" | "sha-512" => Ok(HashType::SHA512(file_contents_b64.try_into()?)),
168            _ => Err(DigestError("Invalid hash type".into())),
169        }
170    }
171
172    /// Return the name of the hash type, used to decide
173    /// on the database field to find the match
174    #[inline]
175    #[must_use]
176    pub fn name(&self) -> &'static str {
177        match self {
178            HashType::Md5(_) => "md5",
179            HashType::SHA1(_) => "sha1",
180            HashType::SHA256(_) => "sha256",
181            HashType::SHA384(_) => "sha384",
182            HashType::SHA512(_) => "sha512",
183        }
184    }
185
186    /// Unwrap the hash from the enum's types
187    #[inline]
188    #[must_use]
189    pub fn the_hash(&self) -> String {
190        match self {
191            HashType::Md5(h) => h.to_string(),
192            HashType::SHA1(h) => h.to_string(),
193            HashType::SHA256(h) => h.to_string(),
194            HashType::SHA384(h) => h.to_string(),
195            HashType::SHA512(h) => h.to_string(),
196        }
197    }
198
199    /// Get the inner bytes of the hash
200    #[inline]
201    #[must_use]
202    pub fn bytes(&self) -> &[u8] {
203        match self {
204            HashType::Md5(h) => &h.0,
205            HashType::SHA1(h) => &h.0,
206            HashType::SHA256(h) => &h.0,
207            HashType::SHA384(h) => &h.0,
208            HashType::SHA512(h) => &h.0,
209        }
210    }
211
212    /// Create a `content-digest` header from the hash type.
213    #[inline]
214    #[must_use]
215    pub fn content_digest_header(&self) -> String {
216        format!(
217            "{}={}",
218            self.name(),
219            general_purpose::STANDARD.encode(self.the_hash())
220        )
221    }
222
223    /// Test that this hash matches the given bytes.
224    #[must_use]
225    pub fn verify(&self, bytes: &[u8]) -> bool {
226        use md5::Digest;
227
228        match self {
229            HashType::Md5(h) => md5::Md5::digest(bytes).as_slice().eq(&h.0),
230            HashType::SHA1(h) => sha1::Sha1::digest(bytes).as_slice().eq(&h.0),
231            HashType::SHA256(h) => sha2::Sha256::digest(bytes).as_slice().eq(&h.0),
232            HashType::SHA384(h) => sha2::Sha384::digest(bytes).as_slice().eq(&h.0),
233            HashType::SHA512(h) => sha2::Sha512::digest(bytes).as_slice().eq(&h.0),
234        }
235    }
236}
237
238impl TryFrom<&str> for HashType {
239    type Error = DigestError;
240
241    fn try_from(value: &str) -> Result<Self, Self::Error> {
242        let decoded = hex::decode(value).map_err(|e| DigestError(e.to_string()))?;
243        Ok(match decoded.len() {
244            16 => HashType::Md5(Digest::try_from(decoded)?),
245            20 => HashType::SHA1(Digest::try_from(decoded)?),
246            32 => HashType::SHA256(Digest::try_from(decoded)?),
247            48 => HashType::SHA384(Digest::try_from(decoded)?),
248            64 => HashType::SHA512(Digest::try_from(decoded)?),
249            _ => return Err(DigestError(format!("unknown hash size {}", value.len()))),
250        })
251    }
252}
253
254impl TryFrom<&[u8]> for HashType {
255    type Error = DigestError;
256    fn try_from(digest: &[u8]) -> Result<Self, Self::Error> {
257        Ok(match digest.len() {
258            16 => HashType::Md5(Digest(
259                digest
260                    .try_into()
261                    .map_err(|_| DigestError("Invalid MD5".into()))?,
262            )),
263            20 => HashType::SHA1(Digest(
264                digest
265                    .try_into()
266                    .map_err(|_| DigestError("Invalid SHA1".into()))?,
267            )),
268            32 => HashType::SHA256(Digest(
269                digest
270                    .try_into()
271                    .map_err(|_| DigestError("Invalid SHA-256".into()))?,
272            )),
273            48 => HashType::SHA384(Digest(
274                digest
275                    .try_into()
276                    .map_err(|_| DigestError("Invalid SHA-384".into()))?,
277            )),
278            64 => HashType::SHA512(Digest(
279                digest
280                    .try_into()
281                    .map_err(|_| DigestError("Invalid SHA-512".into()))?,
282            )),
283            _ => return Err(DigestError(format!("unknown hash size {}", digest.len()))),
284        })
285    }
286}
287
288impl From<Uuid> for HashType {
289    fn from(uuid: Uuid) -> Self {
290        HashType::Md5(Digest::from(uuid))
291    }
292}
293
294#[cfg(test)]
295mod tests {
296    use super::*;
297
298    #[test]
299    fn strings() {
300        let digest = Digest([0x00, 0x11, 0x22, 0x33]);
301        assert_eq!(digest.to_string(), "00112233");
302        assert!(HashType::try_from("00112233").is_err());
303    }
304
305    #[test]
306    fn sha1() {
307        const TEST: &str = "3204c1ca863c2068214900e831fb8047b934bf88";
308
309        let digest = HashType::try_from(TEST).unwrap();
310        assert_eq!(digest.name(), "sha1");
311
312        if let HashType::Md5(_) = digest {
313            panic!("Failed: SHA-1 hash was made into MD-5");
314        }
315
316        if let HashType::SHA256(_) = digest {
317            panic!("Failed: SHA-1 hash was made into SHA-256");
318        }
319
320        if let HashType::SHA384(_) = digest {
321            panic!("Failed: SHA-1 hash was made into SHA-384");
322        }
323
324        if let HashType::SHA512(_) = digest {
325            panic!("Failed: SHA-1 hash was made into SHA-512");
326        }
327    }
328
329    #[test]
330    fn sha256() {
331        const TEST: &str = "d154b8420fc56a629df2e6d918be53310d8ac39a926aa5f60ae59a66298969a0";
332
333        let digest = HashType::try_from(TEST).unwrap();
334        assert_eq!(digest.name(), "sha256");
335
336        if let HashType::Md5(_) = digest {
337            panic!("Failed: SHA-256 hash was made into MD-5");
338        }
339
340        if let HashType::SHA1(_) = digest {
341            panic!("Failed: SHA-256 hash was made into SHA-1");
342        }
343
344        if let HashType::SHA384(_) = digest {
345            panic!("Failed: SHA-256 hash was made into SHA-384");
346        }
347
348        if let HashType::SHA512(_) = digest {
349            panic!("Failed: SHA-256 hash was made into SHA-512");
350        }
351    }
352
353    #[test]
354    fn sha512() {
355        const TEST: &str = "dafe60f7d02b0151909550d6f20343d0fe374b044d40221c13295a312489e1b702edbeac99ffda85f61b812b1ddd0c9394cda0c1162bffb716f04d996ff73cdf";
356
357        let digest = HashType::try_from(TEST).unwrap();
358        assert_eq!(digest.name(), "sha512");
359
360        if let HashType::Md5(_) = digest {
361            panic!("Failed: SHA-512 hash was made into MD-5");
362        }
363
364        if let HashType::SHA1(_) = digest {
365            panic!("Failed: SHA-512 hash was made into SHA-1");
366        }
367
368        if let HashType::SHA256(_) = digest {
369            panic!("Failed: SHA-512 hash was made into SHA-256");
370        }
371
372        if let HashType::SHA384(_) = digest {
373            panic!("Failed: SHA-512 hash was made into SHA-384");
374        }
375    }
376}