malwaredb_types/
lib.rs

1// SPDX-License-Identifier: Apache-2.0
2
3#![doc = include_str!("../README.md")]
4#![deny(clippy::all)]
5//#![deny(clippy::pedantic)]
6#![deny(missing_docs)]
7
8/// Document types
9pub mod doc;
10
11/// Executable types
12pub mod exec;
13
14/// Convenience functions to aid with parsing
15pub mod utils;
16
17use crate::exec::ExecutableFile;
18
19use std::fmt::{Display, Formatter};
20
21use crate::doc::DocumentFile;
22use anyhow::Result;
23use chrono::{DateTime, Utc};
24use tracing::instrument;
25
26/// MDB version
27pub const MDB_VERSION: &str = env!("CARGO_PKG_VERSION");
28
29/// Common functions for all file types parsed by `MalwareDB`
30pub trait SpecimenFile {
31    /// Magic number, the bytes at the beginning of the file, which identify the file format
32    /// Some file types have more than one possible magic number
33    const MAGIC: &'static [&'static [u8]];
34
35    /// Common name for a specific file type
36    fn type_name(&self) -> &'static str;
37}
38
39/// Types known to `MalwareDB`
40#[allow(clippy::large_enum_variant)]
41#[derive(Clone, Debug)]
42pub enum KnownType<'a> {
43    /// Linux, *BSD, Haiku, Solaris, etc binaries
44    #[cfg(feature = "elf")]
45    ELF(exec::elf::Elf<'a>),
46
47    /// Windows, DOS, OS/2 Executables. Anything ending with:
48    /// * .cpl
49    /// * .dll
50    /// * .exe
51    /// * .ocx
52    /// * .sys
53    #[cfg(feature = "pe32")]
54    EXE(exec::pe32::EXE<'a>),
55
56    /// Single architecture macOS (and derivatives) binaries
57    #[cfg(feature = "macho")]
58    MachO(exec::macho::Macho<'a>),
59
60    /// Multiple architecture macOS (and derivatives) binaries
61    #[cfg(feature = "macho")]
62    FatMachO(exec::macho::fat::FatMacho<'a>),
63
64    /// Classic Mac OS and Be OS
65    #[cfg(feature = "pef")]
66    PEF(exec::pef::Pef<'a>),
67
68    /// Microsoft Office Compound Document Format
69    #[cfg(feature = "office95")]
70    Office95(doc::office95::Office95<'a>),
71
72    /// Adobe PDF document
73    #[cfg(feature = "pdf")]
74    PDF(doc::pdf::PDF<'a>),
75
76    /// Rich Text File
77    #[cfg(feature = "rtf")]
78    RTF(doc::rtf::Rtf<'a>),
79
80    /// Files for which we don't have an analytic or feature extractor, or are of an unknown type
81    Unknown(&'a [u8]),
82}
83
84impl<'a> KnownType<'a> {
85    /// Known type from a sequence of bytes
86    #[instrument(name = "KnownType detector", skip(data))]
87    pub fn new(data: &'a [u8]) -> Result<Self> {
88        // TODO: Replace the checking of byte arrays with a hashing mechanism for faster matching
89        #[cfg(feature = "elf")]
90        if data.starts_with(exec::elf::Elf::MAGIC[0]) {
91            return Ok(Self::ELF(exec::elf::Elf::from(data)?));
92        }
93
94        #[cfg(feature = "pe32")]
95        if data.starts_with(exec::pe32::EXE::MAGIC[0])
96            || data.starts_with(exec::pe32::EXE::MAGIC[1])
97        {
98            return Ok(Self::EXE(exec::pe32::EXE::from(data)?));
99        }
100
101        #[cfg(feature = "macho")]
102        for mach_magic in exec::macho::Macho::MAGIC {
103            if data.starts_with(mach_magic) {
104                return Ok(Self::MachO(exec::macho::Macho::from(data)?));
105            }
106        }
107
108        #[cfg(feature = "macho")]
109        for mach_magic in exec::macho::fat::FatMacho::MAGIC {
110            if data.starts_with(mach_magic) {
111                return Ok(Self::FatMachO(exec::macho::fat::FatMacho::from(data)?));
112            }
113        }
114
115        #[cfg(feature = "office95")]
116        if data.starts_with(doc::office95::Office95::MAGIC[0]) {
117            return Ok(Self::Office95(doc::office95::Office95::from(data)?));
118        }
119
120        #[cfg(feature = "pdf")]
121        if data.starts_with(doc::pdf::PDF::MAGIC[0]) {
122            return Ok(Self::PDF(doc::pdf::PDF::from(data)?));
123        }
124
125        #[cfg(feature = "rtf")]
126        if data.starts_with(doc::rtf::Rtf::MAGIC[0]) {
127            return Ok(Self::RTF(doc::rtf::Rtf::from(data)?));
128        }
129
130        #[cfg(feature = "pef")]
131        if data.starts_with(exec::pef::Pef::MAGIC[0]) {
132            return Ok(Self::PEF(exec::pef::Pef::from(data)?));
133        }
134
135        Ok(Self::Unknown(data))
136    }
137
138    /// Whether the sample is an executable file
139    #[must_use]
140    pub fn is_exec(&self) -> bool {
141        match self {
142            #[cfg(feature = "elf")]
143            KnownType::ELF(_) => true,
144
145            #[cfg(feature = "pe32")]
146            KnownType::EXE(_) => true,
147
148            #[cfg(feature = "macho")]
149            KnownType::MachO(_) => true,
150
151            #[cfg(feature = "macho")]
152            KnownType::FatMachO(_) => true,
153
154            #[cfg(feature = "pef")]
155            KnownType::PEF(_) => true,
156
157            _ => false,
158        }
159    }
160
161    /// Whether the sample is a document type
162    #[must_use]
163    pub fn is_doc(&self) -> bool {
164        match self {
165            #[cfg(feature = "pdf")]
166            KnownType::PDF(_) => true,
167
168            #[cfg(feature = "rtf")]
169            KnownType::RTF(_) => true,
170
171            #[cfg(feature = "office95")]
172            KnownType::Office95(_) => true,
173
174            _ => false,
175        }
176    }
177
178    /// When the file was created
179    #[must_use]
180    pub fn created(&self) -> Option<DateTime<Utc>> {
181        match self {
182            #[cfg(feature = "pe32")]
183            KnownType::EXE(e) => e.compiled_timestamp(),
184
185            #[cfg(feature = "pef")]
186            KnownType::PEF(p) => p.compiled_timestamp(),
187
188            #[cfg(feature = "pdf")]
189            KnownType::PDF(p) => p.creation_date,
190
191            _ => None,
192        }
193    }
194
195    /// Get the file's inner executable type
196    #[must_use]
197    pub fn exec(self) -> Option<Box<dyn ExecutableFile + Send + 'a>> {
198        match self {
199            #[cfg(feature = "elf")]
200            KnownType::ELF(e) => Some(Box::new(e)),
201
202            #[cfg(feature = "pe32")]
203            KnownType::EXE(e) => Some(Box::new(e)),
204
205            #[cfg(feature = "macho")]
206            KnownType::MachO(m) => Some(Box::new(m)),
207
208            #[cfg(feature = "macho")]
209            KnownType::FatMachO(m) => Some(Box::new(m)),
210
211            #[cfg(feature = "pef")]
212            KnownType::PEF(p) => Some(Box::new(p)),
213            _ => None,
214        }
215    }
216
217    /// If the sample has a child [`KnownType`], currently only supports [`FatMachO`]
218    #[must_use]
219    pub fn children(&self) -> Option<Vec<KnownType>> {
220        match self {
221            #[cfg(feature = "macho")]
222            KnownType::FatMachO(m) => Some(
223                m.binaries
224                    .iter()
225                    .map(|b| KnownType::MachO(b.clone()))
226                    .collect(),
227            ),
228
229            _ => None,
230        }
231    }
232
233    /// Raw bytes of the sample
234    #[must_use]
235    pub fn contents(&self) -> &'a [u8] {
236        match self {
237            #[cfg(feature = "elf")]
238            KnownType::ELF(e) => e.contents,
239
240            #[cfg(feature = "pe32")]
241            KnownType::EXE(e) => e.contents,
242
243            #[cfg(feature = "macho")]
244            KnownType::MachO(m) => m.contents,
245
246            #[cfg(feature = "macho")]
247            KnownType::FatMachO(m) => m.contents,
248
249            #[cfg(feature = "pef")]
250            KnownType::PEF(p) => p.contents,
251
252            #[cfg(feature = "office95")]
253            KnownType::Office95(p) => p.contents,
254
255            #[cfg(feature = "pdf")]
256            KnownType::PDF(p) => p.contents,
257
258            #[cfg(feature = "rtf")]
259            KnownType::RTF(r) => r.contents,
260
261            KnownType::Unknown(u) => u,
262        }
263    }
264
265    /// Get the inner document type
266    #[must_use]
267    pub fn doc(self) -> Option<Box<dyn DocumentFile + Send + 'a>> {
268        match self {
269            #[cfg(feature = "office95")]
270            KnownType::Office95(o) => Some(Box::new(o)),
271
272            #[cfg(feature = "pdf")]
273            KnownType::PDF(p) => Some(Box::new(p)),
274
275            #[cfg(feature = "rtf")]
276            KnownType::RTF(r) => Some(Box::new(r)),
277
278            _ => None,
279        }
280    }
281}
282
283/// Byte ordering
284#[derive(Copy, Clone, Debug, Eq, PartialEq)]
285pub enum Ordering {
286    /// Big Endian, Most Significant Byte (MSB) is first
287    BigEndian,
288
289    /// Little Endian, Least Significant Byte (LSB) is first
290    LittleEndian,
291
292    /// An application which may use both in the same file
293    BiEndian,
294}
295
296impl Display for Ordering {
297    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
298        match self {
299            Ordering::BigEndian => write!(f, "Big Endian"),
300            Ordering::LittleEndian => write!(f, "Little Endian"),
301            Ordering::BiEndian => write!(f, "Bi-Endian"),
302        }
303    }
304}