malwaredb_types/
lib.rs

1// SPDX-License-Identifier: Apache-2.0
2
3#![doc = include_str!("../README.md")]
4#![deny(clippy::all)]
5#![deny(clippy::pedantic)]
6#![deny(missing_docs)]
7
8/// Document types
9pub mod doc;
10
11/// Executable types
12pub mod exec;
13
14/// Convenience functions to aid with parsing
15pub mod utils;
16
17use crate::exec::ExecutableFile;
18
19use std::fmt::{Display, Formatter};
20
21use crate::doc::DocumentFile;
22use anyhow::Result;
23use chrono::{DateTime, Utc};
24use tracing::instrument;
25
26/// MDB version
27pub const MDB_VERSION: &str = env!("CARGO_PKG_VERSION");
28
29/// Common functions for all file types parsed by `MalwareDB`
30pub trait SpecimenFile {
31    /// Magic number, the bytes at the beginning of the file, which identify the file format
32    /// Some file types have more than one possible magic number
33    const MAGIC: &'static [&'static [u8]];
34
35    /// Common name for a specific file type
36    fn type_name(&self) -> &'static str;
37}
38
39/// Types known to `MalwareDB`
40#[allow(clippy::large_enum_variant)]
41#[derive(Clone, Debug)]
42pub enum KnownType<'a> {
43    /// Linux, *BSD, Haiku, Solaris, etc binaries
44    #[cfg(feature = "elf")]
45    ELF(exec::elf::Elf<'a>),
46
47    /// Windows, DOS, OS/2 Executables. Anything ending with:
48    /// * .cpl
49    /// * .dll
50    /// * .exe
51    /// * .ocx
52    /// * .sys
53    #[cfg(feature = "pe32")]
54    EXE(exec::pe32::EXE<'a>),
55
56    /// Single architecture macOS (and derivatives) binaries
57    #[cfg(feature = "macho")]
58    MachO(exec::macho::Macho<'a>),
59
60    /// Multiple architecture macOS (and derivatives) binaries
61    #[cfg(feature = "macho")]
62    FatMachO(exec::macho::fat::FatMacho<'a>),
63
64    /// Classic Mac OS and Be OS
65    #[cfg(feature = "pef")]
66    PEF(exec::pef::Pef<'a>),
67
68    /// Microsoft Office Compound Document Format
69    #[cfg(feature = "office95")]
70    Office95(doc::office95::Office95<'a>),
71
72    /// Adobe PDF document
73    #[cfg(feature = "pdf")]
74    PDF(doc::pdf::PDF<'a>),
75
76    /// Rich Text File
77    #[cfg(feature = "rtf")]
78    RTF(doc::rtf::Rtf<'a>),
79
80    /// Files for which we don't have an analytic or feature extractor, or are of an unknown type
81    Unknown(&'a [u8]),
82}
83
84impl<'a> KnownType<'a> {
85    /// Known type from a sequence of bytes
86    ///
87    /// # Errors
88    ///
89    /// Returns an error if the parser fails to process the detect type.
90    #[instrument(name = "KnownType detector", skip(data))]
91    pub fn new(data: &'a [u8]) -> Result<Self> {
92        // TODO: Replace the checking of byte arrays with a hashing mechanism for faster matching
93        #[cfg(feature = "elf")]
94        if data.starts_with(exec::elf::Elf::MAGIC[0]) {
95            return Ok(Self::ELF(exec::elf::Elf::from(data)?));
96        }
97
98        #[cfg(feature = "pe32")]
99        if data.starts_with(exec::pe32::EXE::MAGIC[0])
100            || data.starts_with(exec::pe32::EXE::MAGIC[1])
101        {
102            return Ok(Self::EXE(exec::pe32::EXE::from(data)?));
103        }
104
105        #[cfg(feature = "macho")]
106        for mach_magic in exec::macho::Macho::MAGIC {
107            if data.starts_with(mach_magic) {
108                return Ok(Self::MachO(exec::macho::Macho::from(data)?));
109            }
110        }
111
112        #[cfg(feature = "macho")]
113        for mach_magic in exec::macho::fat::FatMacho::MAGIC {
114            if data.starts_with(mach_magic) {
115                return Ok(Self::FatMachO(exec::macho::fat::FatMacho::from(data)?));
116            }
117        }
118
119        #[cfg(feature = "office95")]
120        if data.starts_with(doc::office95::Office95::MAGIC[0]) {
121            return Ok(Self::Office95(doc::office95::Office95::from(data)?));
122        }
123
124        #[cfg(feature = "pdf")]
125        if data.starts_with(doc::pdf::PDF::MAGIC[0]) {
126            return Ok(Self::PDF(doc::pdf::PDF::from(data)?));
127        }
128
129        #[cfg(feature = "rtf")]
130        if data.starts_with(doc::rtf::Rtf::MAGIC[0]) {
131            return Ok(Self::RTF(doc::rtf::Rtf::from(data)?));
132        }
133
134        #[cfg(feature = "pef")]
135        if data.starts_with(exec::pef::Pef::MAGIC[0]) {
136            return Ok(Self::PEF(exec::pef::Pef::from(data)?));
137        }
138
139        Ok(Self::Unknown(data))
140    }
141
142    /// Whether the sample is an executable file
143    #[must_use]
144    pub fn is_exec(&self) -> bool {
145        match self {
146            #[cfg(feature = "elf")]
147            KnownType::ELF(_) => true,
148
149            #[cfg(feature = "pe32")]
150            KnownType::EXE(_) => true,
151
152            #[cfg(feature = "macho")]
153            KnownType::MachO(_) => true,
154
155            #[cfg(feature = "macho")]
156            KnownType::FatMachO(_) => true,
157
158            #[cfg(feature = "pef")]
159            KnownType::PEF(_) => true,
160
161            _ => false,
162        }
163    }
164
165    /// Whether the sample is a document type
166    #[must_use]
167    pub fn is_doc(&self) -> bool {
168        match self {
169            #[cfg(feature = "pdf")]
170            KnownType::PDF(_) => true,
171
172            #[cfg(feature = "rtf")]
173            KnownType::RTF(_) => true,
174
175            #[cfg(feature = "office95")]
176            KnownType::Office95(_) => true,
177
178            _ => false,
179        }
180    }
181
182    /// When the file was created
183    #[must_use]
184    pub fn created(&self) -> Option<DateTime<Utc>> {
185        match self {
186            #[cfg(feature = "pe32")]
187            KnownType::EXE(e) => e.compiled_timestamp(),
188
189            #[cfg(feature = "pef")]
190            KnownType::PEF(p) => p.compiled_timestamp(),
191
192            #[cfg(feature = "pdf")]
193            KnownType::PDF(p) => p.creation_date,
194
195            _ => None,
196        }
197    }
198
199    /// Get the file's inner executable type
200    #[must_use]
201    pub fn exec(self) -> Option<Box<dyn ExecutableFile + Send + 'a>> {
202        match self {
203            #[cfg(feature = "elf")]
204            KnownType::ELF(e) => Some(Box::new(e)),
205
206            #[cfg(feature = "pe32")]
207            KnownType::EXE(e) => Some(Box::new(e)),
208
209            #[cfg(feature = "macho")]
210            KnownType::MachO(m) => Some(Box::new(m)),
211
212            #[cfg(feature = "macho")]
213            KnownType::FatMachO(m) => Some(Box::new(m)),
214
215            #[cfg(feature = "pef")]
216            KnownType::PEF(p) => Some(Box::new(p)),
217            _ => None,
218        }
219    }
220
221    /// If the sample has a child [`KnownType`], currently only supports [`FatMachO`]
222    #[must_use]
223    pub fn children(&self) -> Option<Vec<KnownType>> {
224        match self {
225            #[cfg(feature = "macho")]
226            KnownType::FatMachO(m) => Some(
227                m.binaries
228                    .iter()
229                    .map(|b| KnownType::MachO(b.clone()))
230                    .collect(),
231            ),
232
233            _ => None,
234        }
235    }
236
237    /// Raw bytes of the sample
238    #[must_use]
239    pub fn contents(&self) -> &'a [u8] {
240        match self {
241            #[cfg(feature = "elf")]
242            KnownType::ELF(e) => e.contents,
243
244            #[cfg(feature = "pe32")]
245            KnownType::EXE(e) => e.contents,
246
247            #[cfg(feature = "macho")]
248            KnownType::MachO(m) => m.contents,
249
250            #[cfg(feature = "macho")]
251            KnownType::FatMachO(m) => m.contents,
252
253            #[cfg(feature = "pef")]
254            KnownType::PEF(p) => p.contents,
255
256            #[cfg(feature = "office95")]
257            KnownType::Office95(p) => p.contents,
258
259            #[cfg(feature = "pdf")]
260            KnownType::PDF(p) => p.contents,
261
262            #[cfg(feature = "rtf")]
263            KnownType::RTF(r) => r.contents,
264
265            KnownType::Unknown(u) => u,
266        }
267    }
268
269    /// Get the inner document type
270    #[must_use]
271    pub fn doc(self) -> Option<Box<dyn DocumentFile + Send + 'a>> {
272        match self {
273            #[cfg(feature = "office95")]
274            KnownType::Office95(o) => Some(Box::new(o)),
275
276            #[cfg(feature = "pdf")]
277            KnownType::PDF(p) => Some(Box::new(p)),
278
279            #[cfg(feature = "rtf")]
280            KnownType::RTF(r) => Some(Box::new(r)),
281
282            _ => None,
283        }
284    }
285}
286
287/// Byte ordering
288#[derive(Copy, Clone, Debug, Eq, PartialEq)]
289pub enum Ordering {
290    /// Big Endian, Most Significant Byte (MSB) is first
291    BigEndian,
292
293    /// Little Endian, Least Significant Byte (LSB) is first
294    LittleEndian,
295
296    /// An application which may use both in the same file
297    BiEndian,
298}
299
300impl Display for Ordering {
301    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
302        match self {
303            Ordering::BigEndian => write!(f, "Big Endian"),
304            Ordering::LittleEndian => write!(f, "Little Endian"),
305            Ordering::BiEndian => write!(f, "Bi-Endian"),
306        }
307    }
308}