malwaredb_types/exec/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2
3use std::collections::{HashMap, HashSet};
4use std::fmt::{Display, Formatter};
5
6use chrono::{DateTime, Utc};
7use fuzzyhash::FuzzyHash;
8use md5::{Digest, Md5};
9
10/// ELF parsing
11#[cfg_attr(docsrs, doc(cfg(feature = "elf")))]
12#[cfg(feature = "elf")]
13pub mod elf;
14
15/// Mach-O parsing
16#[cfg_attr(docsrs, doc(cfg(feature = "macho")))]
17#[cfg(feature = "macho")]
18pub mod macho;
19
20/// Parsing of PE32 & EXE files, and anything else with an MZ header
21#[cfg_attr(docsrs, doc(cfg(feature = "pe32")))]
22#[cfg(feature = "pe32")]
23pub mod pe32;
24
25/// PEF (Preferred Executable File) parsing, a legacy format
26#[cfg_attr(docsrs, doc(cfg(feature = "pef")))]
27#[cfg(feature = "pef")]
28pub mod pef;
29
30/// CPU Architectures
31#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
32pub enum Architecture {
33    /// DEC Alpha
34    /// <https://en.wikipedia.org/wiki/DEC_Alpha>
35    Alpha,
36
37    /// DEC Alpha 64-bit
38    Alpha64,
39
40    /// ARM 32-bit
41    /// <https://en.wikipedia.org/wiki/ARM_architecture_family>
42    ARM,
43
44    /// ARM Thumb
45    /// <https://en.wikipedia.org/wiki/ARM_architecture_family>
46    ARMThumb,
47
48    /// ARM 64-bit, also known as Aarch64
49    /// <https://en.wikipedia.org/wiki/ARM_architecture_family>
50    ARM64,
51
52    /// Hitachi SH3
53    /// <https://en.wikipedia.org/wiki/SuperH>
54    HitachiSH3,
55
56    /// Hitachi SH4
57    /// <https://en.wikipedia.org/wiki/SuperH>
58    HitachiSH4,
59
60    /// Hitachi SH5
61    /// <https://en.wikipedia.org/wiki/SuperH>
62    HitachiSH5,
63
64    /// AT&T Hobbit
65    /// <https://en.wikipedia.org/wiki/AT%26T_Hobbit>
66    Hobbit,
67
68    /// Intel Itanium
69    /// <https://en.wikipedia.org/wiki/Itanium>
70    Itanium,
71
72    /// Loongson 32-bit
73    /// <https://en.wikipedia.org/wiki/Loongson>
74    LoongArch32,
75
76    /// Loongson 64-bit
77    /// <https://en.wikipedia.org/wiki/Loongson>
78    LoongArch64,
79
80    /// Motorola 68000 (68k)
81    /// <https://en.wikipedia.org/wiki/Motorola_68000>
82    M68k,
83
84    /// Motorola 88000 (88k)
85    /// <https://en.wikipedia.org/wiki/Motorola_88000>
86    M88k,
87
88    /// MIPS 32-bit Big Endian
89    /// <https://en.wikipedia.org/wiki/MIPS_architecture>
90    MIPS,
91
92    /// MIPS 64-bit Big Endian
93    /// <https://en.wikipedia.org/wiki/MIPS_architecture>
94    MIPS64,
95
96    /// MIPS 32-bit Little Endian
97    /// <https://en.wikipedia.org/wiki/MIPS_architecture>
98    MIPSEL,
99
100    /// MIPS 64-bit Little Endian
101    /// <https://en.wikipedia.org/wiki/MIPS_architecture>
102    MIPSEL64,
103
104    /// IBM Power PC 32-bit Big Endian
105    /// <https://en.wikipedia.org/wiki/PowerPC>
106    PowerPC,
107
108    /// IBM Power PC 64-bit Big Endian
109    /// <https://en.wikipedia.org/wiki/PowerPC>
110    PowerPC64,
111
112    /// IBM Power PC 32-bit Little Endian
113    /// <https://en.wikipedia.org/wiki/PowerPC>
114    PowerPCLE,
115
116    /// IBM Power PC 64-bit Little Endian
117    /// <https://en.wikipedia.org/wiki/PowerPC>
118    PowerPC64LE,
119
120    /// RISC-V 32-bit
121    /// <https://en.wikipedia.org/wiki/RISC-V>
122    RISCV,
123
124    /// RISC-V 64-bit
125    /// <https://en.wikipedia.org/wiki/RISC-V>
126    RISCV64,
127
128    /// RISC-V 128-bit
129    /// <https://en.wikipedia.org/wiki/RISC-V>
130    RISCV128,
131
132    /// Sun (now Oracle) Sparc 32-bit
133    /// <https://en.wikipedia.org/wiki/SPARC>
134    Sparc,
135
136    /// Sun (now Oracle) Sparc 64-bit
137    /// <https://en.wikipedia.org/wiki/SPARC>
138    Sparc64,
139
140    /// IBM s390 mainframe 32-bit
141    /// <https://en.wikipedia.org/wiki/IBM_System/390>
142    S390,
143
144    /// IBM s390x mainframe 64-bit
145    /// <https://en.wikipedia.org/wiki/IBM_System/390>
146    S390x,
147
148    /// Intel (or AMD) x86 32-bit
149    /// <https://en.wikipedia.org/wiki/X86-64>
150    X86,
151
152    /// Intel (or AMD) x86 64-bit
153    /// <https://en.wikipedia.org/wiki/X86-64>
154    X86_64,
155
156    /// Other CPU type
157    Other(u32),
158
159    /// Unknown CPU
160    Unknown,
161}
162
163impl Architecture {
164    /// Static string representation
165    #[must_use]
166    pub fn as_str(&self) -> &'static str {
167        match self {
168            Architecture::Alpha => "DEC Alpha",
169            Architecture::Alpha64 => "DEC Alpha64",
170            Architecture::ARM => "ARM",
171            Architecture::ARMThumb => "ARM Thumb",
172            Architecture::ARM64 => "ARM64",
173            Architecture::HitachiSH3 => "Hitachi SH3",
174            Architecture::HitachiSH4 => "Hitachi SH4",
175            Architecture::HitachiSH5 => "Hitachi SH5",
176            Architecture::Hobbit => "AT&T Hobbit",
177            Architecture::Itanium => "Intel Itanium",
178            Architecture::LoongArch32 => "LoongArch",
179            Architecture::LoongArch64 => "LoongArch64",
180            Architecture::M68k => "M68k",
181            Architecture::M88k => "M88k",
182            Architecture::MIPS => "MIPS",
183            Architecture::MIPS64 => "MIPS64",
184            Architecture::MIPSEL => "MIPSEL",
185            Architecture::MIPSEL64 => "MIPSEL64",
186            Architecture::PowerPC => "PowerPC",
187            Architecture::PowerPC64 => "PowerPC64",
188            Architecture::PowerPCLE => "PowerPCLE",
189            Architecture::PowerPC64LE => "PowerPC64LE",
190            Architecture::RISCV => "RISC-V",
191            Architecture::RISCV64 => "RISC-V 64",
192            Architecture::RISCV128 => "RISC-V 128",
193            Architecture::Sparc => "Sparc",
194            Architecture::Sparc64 => "Sparc64",
195            Architecture::S390 => "S390",
196            Architecture::S390x => "S390x",
197            Architecture::X86 => "x86",
198            Architecture::X86_64 => "x86_64",
199            Architecture::Other(_) => "Other",
200            Architecture::Unknown => "Unknown architecture, or architecture-independent",
201        }
202    }
203}
204
205impl Display for Architecture {
206    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
207        match self {
208            Architecture::Other(other) => write!(f, "Other: 0x{other:02X}"),
209            a => write!(f, "{}", a.as_str()),
210        }
211    }
212}
213
214/// Operating Systems
215#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
216pub enum OperatingSystem {
217    /// IBM AIX
218    AIX,
219
220    /// Linux (includes "System V" type in ELFs)
221    Linux,
222
223    /// FreeBSD
224    FreeBSD,
225
226    /// OpenBSD
227    OpenBSD,
228
229    /// NetBSD
230    NetBSD,
231
232    /// HP's UX
233    HPUX,
234
235    /// SGI's Irix
236    Irix,
237
238    /// Sun then Oracle Solaris
239    Solaris,
240
241    /// Unknown Unix or Unix-like
242    UnknownUnixLike,
243
244    /// Haiku, the Be OS successor
245    Haiku,
246
247    /// Apple's Mac OS X (now macOS)
248    MacOS,
249
250    /// Apple's older Mac OS, now referred to Classic Mac OS
251    #[allow(non_camel_case_types)]
252    MacOS_Classic,
253
254    /// MS-DOS, IBM-DOS, or Free DOS
255    DOS,
256
257    /// Microsoft Windows
258    Windows,
259
260    /// Something else?
261    Other(u16),
262}
263
264impl OperatingSystem {
265    /// Static string representation
266    #[must_use]
267    pub fn as_str(&self) -> &'static str {
268        match self {
269            OperatingSystem::AIX => "AIX",
270            OperatingSystem::Linux => "Linux",
271            OperatingSystem::FreeBSD => "FreeBSD",
272            OperatingSystem::OpenBSD => "OpenBSD",
273            OperatingSystem::NetBSD => "NetBSD",
274            OperatingSystem::HPUX => "HP-UX",
275            OperatingSystem::Irix => "Irix",
276            OperatingSystem::Solaris => "Solaris",
277            OperatingSystem::UnknownUnixLike => "Unknown Unix or Unix-like",
278            OperatingSystem::Haiku => "Haiku",
279            OperatingSystem::MacOS => "Mac OS (or maybe iOS)",
280            OperatingSystem::MacOS_Classic => "Classic Mac OS (7.0 - 9.2)",
281            OperatingSystem::DOS => "MS-DOS or compatible",
282            OperatingSystem::Windows => "Windows",
283            OperatingSystem::Other(_) => "Other",
284        }
285    }
286}
287
288impl Display for OperatingSystem {
289    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
290        match self {
291            OperatingSystem::Other(other) => write!(f, "Other: 0x{other:02X}"),
292            o => write!(f, "{}", o.as_str()),
293        }
294    }
295}
296
297/// Type of binary file containing machine code
298#[derive(Copy, Clone, Debug, Eq, PartialEq)]
299pub enum ExecutableType {
300    /// Core file, from a crash
301    Core,
302
303    /// Shared library
304    Library,
305
306    /// Directly executable program or application
307    Program,
308
309    /// Something else?
310    Unknown(u16),
311}
312
313impl ExecutableType {
314    /// Static string representation
315    #[must_use]
316    pub fn as_str(&self) -> &'static str {
317        match self {
318            ExecutableType::Core => "Core file",
319            ExecutableType::Library => "Shared library",
320            ExecutableType::Program => "Program/Application",
321            ExecutableType::Unknown(_) => "Unknown",
322        }
323    }
324}
325
326impl Display for ExecutableType {
327    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
328        match self {
329            ExecutableType::Unknown(other) => write!(f, "Unknown 0x{other:02X}"),
330            x => write!(f, "{}", x.as_str()),
331        }
332    }
333}
334
335/// Common functions for executable files
336pub trait ExecutableFile {
337    /// Get the architecture type
338    fn architecture(&self) -> Option<Architecture>;
339
340    /// Get the pointer size, 32- or 64-bit
341    fn pointer_size(&self) -> usize;
342
343    /// Get the operating system type for a binary
344    fn operating_system(&self) -> OperatingSystem;
345
346    /// Get the compilation timestamp, if available
347    fn compiled_timestamp(&self) -> Option<DateTime<Utc>>;
348
349    /// Number of sections for a binary
350    fn num_sections(&self) -> u32;
351
352    /// Vec of sections for the binary
353    fn sections(&self) -> Option<&Sections<'_>>;
354
355    /// Import hash of the binary
356    fn import_hash(&self) -> Option<String>;
357
358    /// `SSDeep` fuzzy hash of the binary
359    fn fuzzy_imports(&self) -> Option<String>;
360}
361
362/// Section of an executable file
363#[derive(Clone, Debug, PartialEq)]
364pub struct Section<'a> {
365    /// Name of the section; can be empty, not a reliable way to identify attributes of it
366    pub name: String,
367
368    /// Whether the execute bit was set
369    pub is_executable: bool,
370
371    /// Size of the section
372    pub size: usize,
373
374    /// Offset in the file where the section starts
375    pub offset: usize,
376
377    /// Address of the section once loaded into memory, not for all executable types
378    pub virtual_address: u32,
379
380    /// Size of the section once loaded into memory, not for all executable types
381    pub virtual_size: u32,
382
383    /// Entropy of the section
384    pub entropy: f32,
385
386    /// Slice of this section's bytes
387    pub data: Option<&'a [u8]>,
388}
389
390type Sections<'a> = Vec<Section<'a>>;
391
392impl Display for Section<'_> {
393    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
394        write!(
395            f,
396            "{} at 0x{:02x}, size 0x{:02x}, entropy {:.2}",
397            self.name, self.offset, self.size, self.entropy
398        )?;
399        if self.virtual_address > 0 {
400            write!(f, ", v address: 0x{:02x}", self.virtual_address)?;
401        }
402        if self.is_executable {
403            write!(f, " - executable")?;
404        }
405        Ok(())
406    }
407}
408
409/// Import data, normalized across various executable file formats
410#[derive(Clone, Debug, Eq, PartialEq)]
411pub struct Import {
412    /// Library file, .dll in Windows, .so in Unix/Linux, .dylib in macOS
413    pub library: String,
414
415    /// Function name imported
416    pub function: String,
417}
418
419impl Display for Import {
420    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
421        write!(f, "{}:{}", self.library, self.function)
422    }
423}
424
425/// Collection of import data, normalized across various executable file formats
426#[derive(Clone, Debug, Default, Eq, PartialEq)]
427pub struct Imports {
428    /// The collection of found imports
429    pub imports: Vec<Import>,
430
431    /// The total number of imports which should have been found, in case some couldn't be parsed
432    pub expected_imports: u32,
433}
434
435impl Display for Imports {
436    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
437        for import in &self.imports {
438            writeln!(f, "{import}")?;
439        }
440        Ok(())
441    }
442}
443
444impl Imports {
445    /// Build a string with library.function for each pair, sorted.
446    #[allow(clippy::case_sensitive_file_extension_comparisons)]
447    #[must_use]
448    pub fn build_import_string(&self) -> String {
449        // A HashSet probably isn't needed, but malware might do something funny.
450        let mut imports_map: HashMap<String, HashSet<String>> = HashMap::new();
451
452        // Collect all function names by library
453        for import in &self.imports {
454            let mut lib = import.library.to_lowercase();
455            // These aren't actual file names from disk, but from a parsed binary.
456            if lib.ends_with(".dll") {
457                lib = lib.replace(".dll", "");
458            } else if lib.ends_with(".sys") {
459                lib = lib.replace(".sys", "");
460            } else if let Some(idx) = lib.find(".so") {
461                lib.truncate(lib.len() - idx);
462            }
463
464            if !imports_map.contains_key(&lib) {
465                imports_map.insert(lib.clone(), HashSet::new());
466            }
467
468            if let Some(import_ref) = imports_map.get_mut(&lib) {
469                import_ref.insert(import.function.to_lowercase());
470            }
471        }
472
473        // Sort the libraries
474        let mut libs: Vec<&String> = imports_map.keys().collect();
475        libs.sort();
476
477        // Get the mapping of lib.func
478        let mut imports_string = Vec::new();
479        for lib in libs {
480            // Sort the functions
481            if let Some(functions) = imports_map.get(lib) {
482                let mut functions = Vec::from_iter(functions);
483                functions.sort();
484                for function in &functions {
485                    imports_string.push(format!("{lib}.{function}"));
486                }
487            }
488        }
489
490        imports_string.join(",")
491    }
492
493    /// The Import Hash, or `ImpHash` is the MD5 of the imports string
494    #[must_use]
495    pub fn hash(&self) -> Vec<u8> {
496        let mut hasher = Md5::new();
497        hasher.update(self.build_import_string());
498        let result = hasher.finalize();
499        result.to_vec()
500    }
501
502    /// The fuzzy import hash is the `SSDeep` hash of the import string
503    #[must_use]
504    pub fn fuzzy_hash(&self) -> String {
505        let import_string = self.build_import_string();
506        let fuzzy = FuzzyHash::new(import_string.into_bytes());
507        fuzzy.to_string()
508    }
509}