malwaredb_types/exec/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2
3use std::collections::{HashMap, HashSet};
4use std::fmt::{Display, Formatter};
5
6use chrono::{DateTime, Utc};
7use fuzzyhash::FuzzyHash;
8use md5::{Digest, Md5};
9
10/// ELF parsing
11#[cfg(feature = "elf")]
12pub mod elf;
13
14/// Mach-O parsing
15#[cfg(feature = "macho")]
16pub mod macho;
17
18/// Parsing of PE32 & EXE files, and anything else with an MZ header
19#[cfg(feature = "pe32")]
20pub mod pe32;
21
22/// PEF (Preferred Executable File) parsing
23#[cfg(feature = "pef")]
24pub mod pef;
25
26/// CPU Architectures
27#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
28pub enum Architecture {
29    /// DEC Alpha
30    /// <https://en.wikipedia.org/wiki/DEC_Alpha>
31    Alpha,
32
33    /// ARM 32-bit
34    /// <https://en.wikipedia.org/wiki/ARM_architecture_family>
35    ARM,
36
37    /// ARM Thumb
38    /// <https://en.wikipedia.org/wiki/ARM_architecture_family>
39    ARMThumb,
40
41    /// ARM 64-bit, also known as Aarch64
42    /// <https://en.wikipedia.org/wiki/ARM_architecture_family>
43    ARM64,
44
45    /// Hitachi SH3
46    /// <https://en.wikipedia.org/wiki/SuperH>
47    HitachiSH3,
48
49    /// Hitachi SH4
50    /// <https://en.wikipedia.org/wiki/SuperH>
51    HitachiSH4,
52
53    /// Hitachi SH5
54    /// <https://en.wikipedia.org/wiki/SuperH>
55    HitachiSH5,
56
57    /// AT&T Hobbit
58    /// <https://en.wikipedia.org/wiki/AT%26T_Hobbit>
59    Hobbit,
60
61    /// Intel Itanium
62    /// <https://en.wikipedia.org/wiki/Itanium>
63    Itanium,
64
65    /// Loongson 32-bit
66    /// <https://en.wikipedia.org/wiki/Loongson>
67    LoongArch32,
68
69    /// Loongson 64-bit
70    /// <https://en.wikipedia.org/wiki/Loongson>
71    LoongArch64,
72
73    /// Motorola 68000 (68k)
74    /// <https://en.wikipedia.org/wiki/Motorola_68000>
75    M68k,
76
77    /// Motorola 88000 (88k)
78    /// <https://en.wikipedia.org/wiki/Motorola_88000>
79    M88k,
80
81    /// MIPS 32-bit Big Endian
82    /// <https://en.wikipedia.org/wiki/MIPS_architecture>
83    MIPS,
84
85    /// MIPS 64-bit Big Endian
86    /// <https://en.wikipedia.org/wiki/MIPS_architecture>
87    MIPS64,
88
89    /// MIPS 32-bit Little Endian
90    /// <https://en.wikipedia.org/wiki/MIPS_architecture>
91    MIPSEL,
92
93    /// MIPS 64-bit Little Endian
94    /// <https://en.wikipedia.org/wiki/MIPS_architecture>
95    MIPSEL64,
96
97    /// IBM Power PC 32-bit Big Endian
98    /// <https://en.wikipedia.org/wiki/PowerPC>
99    PowerPC,
100
101    /// IBM Power PC 64-bit Big Endian
102    /// <https://en.wikipedia.org/wiki/PowerPC>
103    PowerPC64,
104
105    /// IBM Power PC 32-bit Little Endian
106    /// <https://en.wikipedia.org/wiki/PowerPC>
107    PowerPCLE,
108
109    /// IBM Power PC 64-bit Little Endian
110    /// <https://en.wikipedia.org/wiki/PowerPC>
111    PowerPC64LE,
112
113    /// RISC-V 32-bit
114    /// <https://en.wikipedia.org/wiki/RISC-V>
115    RISCV,
116
117    /// RISC-V 64-bit
118    /// <https://en.wikipedia.org/wiki/RISC-V>
119    RISCV64,
120
121    /// RISC-V 128-bit
122    /// <https://en.wikipedia.org/wiki/RISC-V>
123    RISCV128,
124
125    /// Sun (now Oracle) Sparc 32-bit
126    /// <https://en.wikipedia.org/wiki/SPARC>
127    Sparc,
128
129    /// Sun (now Oracle) Sparc 64-bit
130    /// <https://en.wikipedia.org/wiki/SPARC>
131    Sparc64,
132
133    /// IBM s390 mainframe 32-bit
134    /// <https://en.wikipedia.org/wiki/IBM_System/390>
135    S390,
136
137    /// IBM s390x mainframe 64-bit
138    /// <https://en.wikipedia.org/wiki/IBM_System/390>
139    S390x,
140
141    /// Intel (or AMD) x86 32-bit
142    /// <https://en.wikipedia.org/wiki/X86-64>
143    X86,
144
145    /// Intel (or AMD) x86 64-bit
146    /// <https://en.wikipedia.org/wiki/X86-64>
147    X86_64,
148
149    /// Other CPU type
150    Other(u32),
151
152    /// Unknown CPU
153    Unknown,
154}
155
156impl Architecture {
157    /// Static string representation
158    #[must_use]
159    pub fn as_str(&self) -> &'static str {
160        match self {
161            Architecture::Alpha => "DEC Alpha",
162            Architecture::ARM => "ARM",
163            Architecture::ARMThumb => "ARM Thumb",
164            Architecture::ARM64 => "ARM64",
165            Architecture::HitachiSH3 => "Hitachi SH3",
166            Architecture::HitachiSH4 => "Hitachi SH4",
167            Architecture::HitachiSH5 => "Hitachi SH5",
168            Architecture::Hobbit => "AT&T Hobbit",
169            Architecture::Itanium => "Intel Itanium",
170            Architecture::LoongArch32 => "LoongArch",
171            Architecture::LoongArch64 => "LoongArch64",
172            Architecture::M68k => "M68k",
173            Architecture::M88k => "M88k",
174            Architecture::MIPS => "MIPS",
175            Architecture::MIPS64 => "MIPS64",
176            Architecture::MIPSEL => "MIPSEL",
177            Architecture::MIPSEL64 => "MIPSEL64",
178            Architecture::PowerPC => "PowerPC",
179            Architecture::PowerPC64 => "PowerPC64",
180            Architecture::PowerPCLE => "PowerPCLE",
181            Architecture::PowerPC64LE => "PowerPC64LE",
182            Architecture::RISCV => "RISC-V",
183            Architecture::RISCV64 => "RISC-V 64",
184            Architecture::RISCV128 => "RISC-V 128",
185            Architecture::Sparc => "Sparc",
186            Architecture::Sparc64 => "Sparc64",
187            Architecture::S390 => "S390",
188            Architecture::S390x => "S390x",
189            Architecture::X86 => "x86",
190            Architecture::X86_64 => "x86_64",
191            Architecture::Other(_) => "Other",
192            Architecture::Unknown => "Unknown architecture, or architecture-independent",
193        }
194    }
195}
196
197impl Display for Architecture {
198    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
199        match self {
200            Architecture::Other(other) => write!(f, "Other: 0x{other:02X}"),
201            a => write!(f, "{}", a.as_str()),
202        }
203    }
204}
205
206/// Operating Systems
207#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
208pub enum OperatingSystem {
209    /// IBM AIX
210    AIX,
211
212    /// Linux (includes "System V" type in ELFs)
213    Linux,
214
215    /// FreeBSD
216    FreeBSD,
217
218    /// OpenBSD
219    OpenBSD,
220
221    /// NetBSD
222    NetBSD,
223
224    /// HP's UX
225    HPUX,
226
227    /// SGI's Irix
228    Irix,
229
230    /// Sun then Oracle Solaris
231    Solaris,
232
233    /// Unknown Unix or Unix-like
234    UnknownUnixLike,
235
236    /// Haiku, the Be OS successor
237    Haiku,
238
239    /// Apple's Mac OS X (now macOS)
240    MacOS,
241
242    /// Apple's older Mac OS, now referred to Classic Mac OS
243    #[allow(non_camel_case_types)]
244    MacOS_Classic,
245
246    /// MS-DOS, IBM-DOS, or Free DOS
247    DOS,
248
249    /// Microsoft Windows
250    Windows,
251
252    /// Something else?
253    Other(u16),
254}
255
256impl OperatingSystem {
257    /// Static string representation
258    #[must_use]
259    pub fn as_str(&self) -> &'static str {
260        match self {
261            OperatingSystem::AIX => "AIX",
262            OperatingSystem::Linux => "Linux",
263            OperatingSystem::FreeBSD => "FreeBSD",
264            OperatingSystem::OpenBSD => "OpenBSD",
265            OperatingSystem::NetBSD => "NetBSD",
266            OperatingSystem::HPUX => "HP-UX",
267            OperatingSystem::Irix => "Irix",
268            OperatingSystem::Solaris => "Solaris",
269            OperatingSystem::UnknownUnixLike => "Unknown Unix or Unix-like",
270            OperatingSystem::Haiku => "Haiku",
271            OperatingSystem::MacOS => "Mac OS (or maybe iOS)",
272            OperatingSystem::MacOS_Classic => "Classic Mac OS (7.0 - 9.2)",
273            OperatingSystem::DOS => "MS-DOS or compatible",
274            OperatingSystem::Windows => "Windows",
275            OperatingSystem::Other(_) => "Other",
276        }
277    }
278}
279
280impl Display for OperatingSystem {
281    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
282        match self {
283            OperatingSystem::Other(other) => write!(f, "Other: 0x{other:02X}"),
284            o => write!(f, "{}", o.as_str()),
285        }
286    }
287}
288
289/// Type of binary file containing machine code
290#[derive(Copy, Clone, Debug, Eq, PartialEq)]
291pub enum ExecutableType {
292    /// Core file, from a crash
293    Core,
294
295    /// Shared library
296    Library,
297
298    /// Directly executable program or application
299    Program,
300
301    /// Something else?
302    Unknown(u16),
303}
304
305impl ExecutableType {
306    /// Static string representation
307    #[must_use]
308    pub fn as_str(&self) -> &'static str {
309        match self {
310            ExecutableType::Core => "Core file",
311            ExecutableType::Library => "Shared library",
312            ExecutableType::Program => "Program/Application",
313            ExecutableType::Unknown(_) => "Unknown",
314        }
315    }
316}
317
318impl Display for ExecutableType {
319    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
320        match self {
321            ExecutableType::Unknown(other) => write!(f, "Unknown 0x{other:02X}"),
322            x => write!(f, "{}", x.as_str()),
323        }
324    }
325}
326
327/// Common functions for executable files
328pub trait ExecutableFile {
329    /// Get the architecture type
330    fn architecture(&self) -> Option<Architecture>;
331
332    /// Get the pointer size, 32- or 64-bit
333    fn pointer_size(&self) -> usize;
334
335    /// Get the operating system type for a binary
336    fn operating_system(&self) -> OperatingSystem;
337
338    /// Get the compilation timestamp, if available
339    fn compiled_timestamp(&self) -> Option<DateTime<Utc>>;
340
341    /// Number of sections for a binary
342    fn num_sections(&self) -> u32;
343
344    /// Vec of sections for the binary
345    fn sections(&self) -> Option<&Sections>;
346
347    /// Import hash of the binary
348    fn import_hash(&self) -> Option<String>;
349
350    /// `SSDeep` fuzzy hash of the binary
351    fn fuzzy_imports(&self) -> Option<String>;
352}
353
354/// Section of an executable file
355#[derive(Clone, Debug, PartialEq)]
356pub struct Section<'a> {
357    /// Name of the section, can be empty, not a reliable way to identify attributes of it
358    pub name: String,
359
360    /// Whether an execute bit was set
361    pub is_executable: bool,
362
363    /// Size of the section
364    pub size: usize,
365
366    /// Offset in the file where the section starts
367    pub offset: usize,
368
369    /// Address of the section once loaded into memory, not for all executable types
370    pub virtual_address: u32,
371
372    /// Size of the section once loaded into memory, not for all executable types
373    pub virtual_size: u32,
374
375    /// Entropy of the section
376    pub entropy: f32,
377
378    /// Slice of this section's bytes
379    pub data: Option<&'a [u8]>,
380}
381
382type Sections<'a> = Vec<Section<'a>>;
383
384impl Display for Section<'_> {
385    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
386        write!(
387            f,
388            "{} at 0x{:02x}, size 0x{:02x}, entropy {:.2}",
389            self.name, self.offset, self.size, self.entropy
390        )?;
391        if self.virtual_address > 0 {
392            write!(f, ", v address: 0x{:02x}", self.virtual_address)?;
393        }
394        if self.is_executable {
395            write!(f, " - executable")?;
396        }
397        Ok(())
398    }
399}
400
401/// Import data, normalized across various executable file formats
402#[derive(Clone, Debug, Eq, PartialEq)]
403pub struct Import {
404    /// Library file, .dll in Windows, .so in Unix/Linux, .dylib in macOS
405    pub library: String,
406
407    /// Function name imported
408    pub function: String,
409}
410
411impl Display for Import {
412    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
413        write!(f, "{}:{}", self.library, self.function)
414    }
415}
416
417/// Collection of import data, normalized across various executable file formats
418#[derive(Clone, Debug, Default, Eq, PartialEq)]
419pub struct Imports {
420    /// The collection of found imports
421    pub imports: Vec<Import>,
422
423    /// The total number of imports which should have been found, in case some couldn't be parsed
424    pub expected_imports: u32,
425}
426
427impl Display for Imports {
428    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
429        for import in &self.imports {
430            writeln!(f, "{import}")?;
431        }
432        Ok(())
433    }
434}
435
436impl Imports {
437    /// Build a string with library.function for each pair, sorted.
438    #[allow(clippy::case_sensitive_file_extension_comparisons)]
439    #[must_use]
440    pub fn build_import_string(&self) -> String {
441        // A HashSet probably isn't needed, but malware might do something funny.
442        let mut imports_map: HashMap<String, HashSet<String>> = HashMap::new();
443
444        // Collect all function names by library
445        for import in &self.imports {
446            let mut lib = import.library.to_lowercase();
447            // These aren't actual file names from disk, but from a parsed binary.
448            if lib.ends_with(".dll") {
449                lib = lib.replace(".dll", "");
450            } else if lib.ends_with(".sys") {
451                lib = lib.replace(".sys", "");
452            } else if let Some(idx) = lib.find(".so") {
453                lib.truncate(lib.len() - idx);
454            }
455
456            if !imports_map.contains_key(&lib) {
457                imports_map.insert(lib.clone(), HashSet::new());
458            }
459
460            if let Some(import_ref) = imports_map.get_mut(&lib) {
461                import_ref.insert(import.function.to_lowercase());
462            }
463        }
464
465        // Sort the libraries
466        let mut libs: Vec<&String> = imports_map.keys().collect();
467        libs.sort();
468
469        // Get the mapping of lib.func
470        let mut imports_string = Vec::new();
471        for lib in libs {
472            // Sort the functions
473            if let Some(functions) = imports_map.get(lib) {
474                let mut functions = Vec::from_iter(functions);
475                functions.sort();
476                for function in &functions {
477                    imports_string.push(format!("{lib}.{function}"));
478                }
479            }
480        }
481
482        imports_string.join(",")
483    }
484
485    /// The Import Hash, or `ImpHash` is the MD5 of the imports string
486    #[must_use]
487    pub fn hash(&self) -> Vec<u8> {
488        let mut hasher = Md5::new();
489        hasher.update(self.build_import_string());
490        let result = hasher.finalize();
491        result.to_vec()
492    }
493
494    /// The fuzzy import hash is the `SSDeep` hash of the import string
495    #[must_use]
496    pub fn fuzzy_hash(&self) -> String {
497        let import_string = self.build_import_string();
498        let fuzzy = FuzzyHash::new(import_string.into_bytes());
499        fuzzy.to_string()
500    }
501}