Skip to main content

malwaredb_types/exec/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2
3use std::collections::{HashMap, HashSet};
4use std::fmt::{Display, Formatter};
5
6use chrono::{DateTime, Utc};
7use fuzzyhash::FuzzyHash;
8use md5::{Digest, Md5};
9use uuid::Uuid;
10
11/// ELF parsing
12#[cfg_attr(docsrs, doc(cfg(feature = "elf")))]
13#[cfg(feature = "elf")]
14pub mod elf;
15
16/// Mach-O parsing
17#[cfg_attr(docsrs, doc(cfg(feature = "macho")))]
18#[cfg(feature = "macho")]
19pub mod macho;
20
21/// Parsing of PE32 & EXE files, and anything else with an MZ header
22#[cfg_attr(docsrs, doc(cfg(feature = "pe32")))]
23#[cfg(feature = "pe32")]
24pub mod pe32;
25
26/// PEF (Preferred Executable File) parsing, a legacy format
27#[cfg_attr(docsrs, doc(cfg(feature = "pef")))]
28#[cfg(feature = "pef")]
29pub mod pef;
30
31/// CPU Architectures
32#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
33pub enum Architecture {
34    /// DEC Alpha
35    /// <https://en.wikipedia.org/wiki/DEC_Alpha>
36    Alpha,
37
38    /// DEC Alpha 64-bit
39    Alpha64,
40
41    /// ARM 32-bit
42    /// <https://en.wikipedia.org/wiki/ARM_architecture_family>
43    ARM,
44
45    /// ARM Thumb
46    /// <https://en.wikipedia.org/wiki/ARM_architecture_family>
47    ARMThumb,
48
49    /// ARM 64-bit, also known as Aarch64
50    /// <https://en.wikipedia.org/wiki/ARM_architecture_family>
51    ARM64,
52
53    /// Hitachi SH3
54    /// <https://en.wikipedia.org/wiki/SuperH>
55    HitachiSH3,
56
57    /// Hitachi SH4
58    /// <https://en.wikipedia.org/wiki/SuperH>
59    HitachiSH4,
60
61    /// Hitachi SH5
62    /// <https://en.wikipedia.org/wiki/SuperH>
63    HitachiSH5,
64
65    /// AT&T Hobbit
66    /// <https://en.wikipedia.org/wiki/AT%26T_Hobbit>
67    Hobbit,
68
69    /// Intel Itanium
70    /// <https://en.wikipedia.org/wiki/Itanium>
71    Itanium,
72
73    /// Loongson 32-bit
74    /// <https://en.wikipedia.org/wiki/Loongson>
75    LoongArch32,
76
77    /// Loongson 64-bit
78    /// <https://en.wikipedia.org/wiki/Loongson>
79    LoongArch64,
80
81    /// Motorola 68000 (68k)
82    /// <https://en.wikipedia.org/wiki/Motorola_68000>
83    M68k,
84
85    /// Motorola 88000 (88k)
86    /// <https://en.wikipedia.org/wiki/Motorola_88000>
87    M88k,
88
89    /// MIPS 32-bit Big Endian
90    /// <https://en.wikipedia.org/wiki/MIPS_architecture>
91    MIPS,
92
93    /// MIPS 64-bit Big Endian
94    /// <https://en.wikipedia.org/wiki/MIPS_architecture>
95    MIPS64,
96
97    /// MIPS 32-bit Little Endian
98    /// <https://en.wikipedia.org/wiki/MIPS_architecture>
99    MIPSEL,
100
101    /// MIPS 64-bit Little Endian
102    /// <https://en.wikipedia.org/wiki/MIPS_architecture>
103    MIPSEL64,
104
105    /// IBM Power PC 32-bit Big Endian
106    /// <https://en.wikipedia.org/wiki/PowerPC>
107    PowerPC,
108
109    /// IBM Power PC 64-bit Big Endian
110    /// <https://en.wikipedia.org/wiki/PowerPC>
111    PowerPC64,
112
113    /// IBM Power PC 32-bit Little Endian
114    /// <https://en.wikipedia.org/wiki/PowerPC>
115    PowerPCLE,
116
117    /// IBM Power PC 64-bit Little Endian
118    /// <https://en.wikipedia.org/wiki/PowerPC>
119    PowerPC64LE,
120
121    /// RISC-V 32-bit
122    /// <https://en.wikipedia.org/wiki/RISC-V>
123    RISCV,
124
125    /// RISC-V 64-bit
126    /// <https://en.wikipedia.org/wiki/RISC-V>
127    RISCV64,
128
129    /// RISC-V 128-bit
130    /// <https://en.wikipedia.org/wiki/RISC-V>
131    RISCV128,
132
133    /// Sun (now Oracle) Sparc 32-bit
134    /// <https://en.wikipedia.org/wiki/SPARC>
135    Sparc,
136
137    /// Sun (now Oracle) Sparc 64-bit
138    /// <https://en.wikipedia.org/wiki/SPARC>
139    Sparc64,
140
141    /// IBM s390 mainframe 32-bit
142    /// <https://en.wikipedia.org/wiki/IBM_System/390>
143    S390,
144
145    /// IBM s390x mainframe 64-bit
146    /// <https://en.wikipedia.org/wiki/IBM_System/390>
147    S390x,
148
149    /// Intel (or AMD) x86 32-bit
150    /// <https://en.wikipedia.org/wiki/X86-64>
151    X86,
152
153    /// Intel (or AMD) x86 64-bit
154    /// <https://en.wikipedia.org/wiki/X86-64>
155    X86_64,
156
157    /// Other CPU type
158    Other(u32),
159
160    /// Unknown CPU
161    Unknown,
162}
163
164impl Architecture {
165    /// Static string representation
166    #[must_use]
167    pub fn as_str(&self) -> &'static str {
168        match self {
169            Architecture::Alpha => "DEC Alpha",
170            Architecture::Alpha64 => "DEC Alpha64",
171            Architecture::ARM => "ARM",
172            Architecture::ARMThumb => "ARM Thumb",
173            Architecture::ARM64 => "ARM64",
174            Architecture::HitachiSH3 => "Hitachi SH3",
175            Architecture::HitachiSH4 => "Hitachi SH4",
176            Architecture::HitachiSH5 => "Hitachi SH5",
177            Architecture::Hobbit => "AT&T Hobbit",
178            Architecture::Itanium => "Intel Itanium",
179            Architecture::LoongArch32 => "LoongArch",
180            Architecture::LoongArch64 => "LoongArch64",
181            Architecture::M68k => "M68k",
182            Architecture::M88k => "M88k",
183            Architecture::MIPS => "MIPS",
184            Architecture::MIPS64 => "MIPS64",
185            Architecture::MIPSEL => "MIPSEL",
186            Architecture::MIPSEL64 => "MIPSEL64",
187            Architecture::PowerPC => "PowerPC",
188            Architecture::PowerPC64 => "PowerPC64",
189            Architecture::PowerPCLE => "PowerPCLE",
190            Architecture::PowerPC64LE => "PowerPC64LE",
191            Architecture::RISCV => "RISC-V",
192            Architecture::RISCV64 => "RISC-V 64",
193            Architecture::RISCV128 => "RISC-V 128",
194            Architecture::Sparc => "Sparc",
195            Architecture::Sparc64 => "Sparc64",
196            Architecture::S390 => "S390",
197            Architecture::S390x => "S390x",
198            Architecture::X86 => "x86",
199            Architecture::X86_64 => "x86_64",
200            Architecture::Other(_) => "Other",
201            Architecture::Unknown => "Unknown architecture, or architecture-independent",
202        }
203    }
204}
205
206impl Display for Architecture {
207    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
208        match self {
209            Architecture::Other(other) => write!(f, "Other: 0x{other:02X}"),
210            a => write!(f, "{}", a.as_str()),
211        }
212    }
213}
214
215/// Operating Systems
216#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
217pub enum OperatingSystem {
218    /// IBM AIX
219    AIX,
220
221    /// Linux (includes "System V" type in ELFs)
222    Linux,
223
224    /// FreeBSD
225    FreeBSD,
226
227    /// OpenBSD
228    OpenBSD,
229
230    /// NetBSD
231    NetBSD,
232
233    /// HP's UX
234    HPUX,
235
236    /// SGI's Irix
237    Irix,
238
239    /// Sun then Oracle Solaris
240    Solaris,
241
242    /// Unknown Unix or Unix-like
243    UnknownUnixLike,
244
245    /// Haiku, the Be OS successor
246    Haiku,
247
248    /// Apple's Mac OS X (now macOS)
249    MacOS,
250
251    /// Apple's older Mac OS, now referred to Classic Mac OS
252    #[allow(non_camel_case_types)]
253    MacOS_Classic,
254
255    /// MS-DOS, IBM-DOS, or Free DOS
256    DOS,
257
258    /// Microsoft Windows
259    Windows,
260
261    /// Something else?
262    Other(u16),
263}
264
265impl OperatingSystem {
266    /// Static string representation
267    #[must_use]
268    pub fn as_str(&self) -> &'static str {
269        match self {
270            OperatingSystem::AIX => "AIX",
271            OperatingSystem::Linux => "Linux",
272            OperatingSystem::FreeBSD => "FreeBSD",
273            OperatingSystem::OpenBSD => "OpenBSD",
274            OperatingSystem::NetBSD => "NetBSD",
275            OperatingSystem::HPUX => "HP-UX",
276            OperatingSystem::Irix => "Irix",
277            OperatingSystem::Solaris => "Solaris",
278            OperatingSystem::UnknownUnixLike => "Unknown Unix or Unix-like",
279            OperatingSystem::Haiku => "Haiku",
280            OperatingSystem::MacOS => "Mac OS (or maybe iOS)",
281            OperatingSystem::MacOS_Classic => "Classic Mac OS (7.0 - 9.2)",
282            OperatingSystem::DOS => "MS-DOS or compatible",
283            OperatingSystem::Windows => "Windows",
284            OperatingSystem::Other(_) => "Other",
285        }
286    }
287}
288
289impl Display for OperatingSystem {
290    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
291        match self {
292            OperatingSystem::Other(other) => write!(f, "Other: 0x{other:02X}"),
293            o => write!(f, "{}", o.as_str()),
294        }
295    }
296}
297
298/// Type of binary file containing machine code
299#[derive(Copy, Clone, Debug, Eq, PartialEq)]
300pub enum ExecutableType {
301    /// Core file, from a crash
302    Core,
303
304    /// Shared library
305    Library,
306
307    /// Directly executable program or application
308    Program,
309
310    /// Something else?
311    Unknown(u16),
312}
313
314impl ExecutableType {
315    /// Static string representation
316    #[must_use]
317    pub fn as_str(&self) -> &'static str {
318        match self {
319            ExecutableType::Core => "Core file",
320            ExecutableType::Library => "Shared library",
321            ExecutableType::Program => "Program/Application",
322            ExecutableType::Unknown(_) => "Unknown",
323        }
324    }
325}
326
327impl Display for ExecutableType {
328    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
329        match self {
330            ExecutableType::Unknown(other) => write!(f, "Unknown 0x{other:02X}"),
331            x => write!(f, "{}", x.as_str()),
332        }
333    }
334}
335
336/// Common functions for executable files
337pub trait ExecutableFile {
338    /// Get the architecture type
339    fn architecture(&self) -> Option<Architecture>;
340
341    /// Get the pointer size, 32- or 64-bit
342    fn pointer_size(&self) -> usize;
343
344    /// Get the operating system type for a binary
345    fn operating_system(&self) -> OperatingSystem;
346
347    /// Get the compilation timestamp, if available
348    fn compiled_timestamp(&self) -> Option<DateTime<Utc>>;
349
350    /// Number of sections for a binary
351    fn num_sections(&self) -> u32;
352
353    /// Vec of sections for the binary
354    fn sections(&self) -> Option<&Sections<'_>>;
355
356    /// Import hash of the binary
357    fn import_hash(&self) -> Option<Uuid>;
358
359    /// `SSDeep` fuzzy hash of the binary
360    fn fuzzy_imports(&self) -> Option<String>;
361}
362
363/// Section of an executable file
364#[derive(Clone, Debug, PartialEq)]
365pub struct Section<'a> {
366    /// Name of the section; can be empty, not a reliable way to identify attributes of it
367    pub name: String,
368
369    /// Whether the execute bit was set
370    pub is_executable: bool,
371
372    /// Size of the section
373    pub size: usize,
374
375    /// Offset in the file where the section starts
376    pub offset: usize,
377
378    /// Address of the section once loaded into memory, not for all executable types
379    pub virtual_address: u32,
380
381    /// Size of the section once loaded into memory, not for all executable types
382    pub virtual_size: u32,
383
384    /// Entropy of the section
385    pub entropy: f32,
386
387    /// Slice of this section's bytes
388    pub data: Option<&'a [u8]>,
389}
390
391type Sections<'a> = Vec<Section<'a>>;
392
393impl Display for Section<'_> {
394    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
395        write!(
396            f,
397            "{} at 0x{:02x}, size 0x{:02x}, entropy {:.2}",
398            self.name, self.offset, self.size, self.entropy
399        )?;
400        if self.virtual_address > 0 {
401            write!(f, ", v address: 0x{:02x}", self.virtual_address)?;
402        }
403        if self.is_executable {
404            write!(f, " - executable")?;
405        }
406        Ok(())
407    }
408}
409
410/// Import data, normalized across various executable file formats
411#[derive(Clone, Debug, Eq, PartialEq)]
412pub struct Import {
413    /// Library file, .dll in Windows, .so in Unix/Linux, .dylib in macOS
414    pub library: String,
415
416    /// Function name imported
417    pub function: String,
418}
419
420impl Display for Import {
421    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
422        write!(f, "{}:{}", self.library, self.function)
423    }
424}
425
426/// Collection of import data, normalized across various executable file formats
427#[derive(Clone, Debug, Default, Eq, PartialEq)]
428pub struct Imports {
429    /// The collection of found imports
430    pub imports: Vec<Import>,
431
432    /// The total number of imports which should have been found, in case some couldn't be parsed
433    pub expected_imports: u32,
434}
435
436impl Display for Imports {
437    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
438        for import in &self.imports {
439            writeln!(f, "{import}")?;
440        }
441        Ok(())
442    }
443}
444
445impl Imports {
446    /// Build a string with library.function for each pair, sorted.
447    #[allow(clippy::case_sensitive_file_extension_comparisons)]
448    #[must_use]
449    pub fn build_import_string(&self) -> String {
450        // A HashSet probably isn't needed, but malware might do something funny.
451        let mut imports_map: HashMap<String, HashSet<String>> = HashMap::new();
452
453        // Collect all function names by library
454        for import in &self.imports {
455            let mut lib = import.library.to_lowercase();
456            // These aren't actual file names from disk, but from a parsed binary.
457            if lib.ends_with(".dll") {
458                lib = lib.replace(".dll", "");
459            } else if lib.ends_with(".sys") {
460                lib = lib.replace(".sys", "");
461            } else if let Some(idx) = lib.find(".so") {
462                lib.truncate(lib.len() - idx);
463            }
464
465            if !imports_map.contains_key(&lib) {
466                imports_map.insert(lib.clone(), HashSet::new());
467            }
468
469            if let Some(import_ref) = imports_map.get_mut(&lib) {
470                import_ref.insert(import.function.to_lowercase());
471            }
472        }
473
474        // Sort the libraries
475        let mut libs: Vec<&String> = imports_map.keys().collect();
476        libs.sort();
477
478        // Get the mapping of lib.func
479        let mut imports_string = Vec::new();
480        for lib in libs {
481            // Sort the functions
482            if let Some(functions) = imports_map.get(lib) {
483                let mut functions = Vec::from_iter(functions);
484                functions.sort();
485                for function in &functions {
486                    imports_string.push(format!("{lib}.{function}"));
487                }
488            }
489        }
490
491        imports_string.join(",")
492    }
493
494    /// The Import Hash, or `ImpHash` is the MD5 of the imports string
495    #[must_use]
496    pub fn hash(&self) -> Uuid {
497        let mut hasher = Md5::new();
498        hasher.update(self.build_import_string());
499        let result = hasher.finalize();
500
501        // This won't panic since the MD5 hash is 16 bytes long
502        Uuid::from_bytes(uuid::Bytes::from(result))
503    }
504
505    /// The fuzzy import hash is the `SSDeep` hash of the import string
506    #[must_use]
507    pub fn fuzzy_hash(&self) -> String {
508        let import_string = self.build_import_string();
509        let fuzzy = FuzzyHash::new(import_string.into_bytes());
510        fuzzy.to_string()
511    }
512}