malwaredb_types/exec/elf/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2
3/// Flags for some fields within an ELF header
4pub mod fields;
5
6use crate::exec::{
7    Architecture, ExecutableFile, ExecutableType, Imports, OperatingSystem, Section, Sections,
8};
9use crate::utils::{
10    bytes_offset_match, string_from_offset, u16_from_offset, u32_from_offset, u64_from_offset,
11    EntropyCalc,
12};
13use crate::{Ordering, SpecimenFile};
14
15use std::fmt::{Display, Formatter};
16
17use anyhow::{bail, Result};
18use chrono::{DateTime, Utc};
19use flagset::FlagSet;
20use tracing::{error, info, instrument};
21
22// Wikipedia article:
23// https://en.wikipedia.org/wiki/Executable_and_Linkable_Format
24
25const MAGIC: [u8; 4] = [0x7f, 0x45, 0x4c, 0x46];
26
27/// The struct to partially represent the ELF (Executable and Linkable File) format
28///
29/// This is the file type used for Linux, *BSD (FreeBSD, OpenBSD, NetBSD, etc), Haiku, Solaris,
30/// and possibly some others.
31///
32/// Effort is made to fail gracefully, since malware may not obey all the rules, and some information
33/// is better than none because some part of the data wasn't parsed correctly.
34#[derive(Clone, Debug)]
35pub struct Elf<'a> {
36    /// If the program is 64-bit
37    pub is64bit: bool,
38
39    /// Instruction set architecture for this binary
40    pub arch: Architecture,
41
42    /// If the binary has extra data after the last section, could be used to hide something
43    pub has_overlay: Option<bool>,
44
45    /// Byte ordering for this binary
46    pub ordering: Ordering,
47
48    /// Executable subtype: Program, Library, or Core file?
49    pub executable_type: ExecutableType,
50
51    /// Operating System for this binary
52    pub os: OperatingSystem,
53
54    /// Sections of this binary
55    pub sections: Option<Sections<'a>>,
56
57    /// External libraries used by this application or library
58    pub imports: Option<Imports>,
59
60    /// The path for the ELF loader (or interpreter)
61    pub interpreter: Option<String>,
62
63    /// The array containing the raw bytes used to parse this program
64    pub contents: &'a [u8],
65}
66
67impl<'a> Elf<'a> {
68    /// ELF parsed from a sequence of bytes
69    #[instrument(name = "ELF parser", skip(contents))]
70    pub fn from(contents: &'a [u8]) -> Result<Self> {
71        if !bytes_offset_match(contents, 0, &MAGIC) {
72            bail!("Not an ELF file");
73        }
74
75        let is_64bit = contents[4] == 2;
76        let ordering = {
77            if contents[5] == 2 {
78                Ordering::BigEndian
79            } else {
80                Ordering::LittleEndian
81            }
82        };
83        let mut os = match contents[7] {
84            1 => OperatingSystem::HPUX,
85            2 => OperatingSystem::NetBSD,
86
87            // Assume SystemV is Linux, some compilers don't set this correctly
88            // https://unix.stackexchange.com/questions/132036/why-does-readelf-show-system-v-as-my-os-instead-of-linux
89            0 | 3 => OperatingSystem::Linux,
90            6 => OperatingSystem::Solaris,
91            7 => OperatingSystem::AIX,
92            8 => OperatingSystem::Irix,
93            9 => OperatingSystem::FreeBSD,
94            0xC => OperatingSystem::OpenBSD,
95            other => OperatingSystem::Other(u16::from(other)),
96        };
97
98        let elf_type = match u16_from_offset(contents, 0x10, ordering) {
99            1 | 2 => ExecutableType::Program,
100            3 => ExecutableType::Library,
101            4 => ExecutableType::Core,
102            other => ExecutableType::Unknown(other),
103        };
104
105        let arch = match u16_from_offset(contents, 0x12, ordering) {
106            // https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html
107            0 => Architecture::Unknown,
108            2 => Architecture::Sparc,
109            3 => Architecture::X86,
110            4 => Architecture::M68k,
111            5 => Architecture::M88k,
112            8 => {
113                if is_64bit {
114                    Architecture::MIPS64
115                } else {
116                    Architecture::MIPS
117                }
118            }
119            0x0A => {
120                if is_64bit {
121                    Architecture::MIPSEL64
122                } else {
123                    Architecture::MIPSEL
124                }
125            }
126            0x14 => {
127                if ordering == Ordering::BigEndian {
128                    Architecture::PowerPC
129                } else {
130                    Architecture::PowerPCLE
131                }
132            }
133            0x15 => {
134                if ordering == Ordering::BigEndian {
135                    Architecture::PowerPC64
136                } else {
137                    Architecture::PowerPC64LE
138                }
139            }
140            0x16 => {
141                if is_64bit {
142                    Architecture::S390x
143                } else {
144                    Architecture::S390
145                }
146            }
147
148            0x28 => Architecture::ARM,
149            0x29 => Architecture::Alpha,
150            0x2b => Architecture::Sparc64,
151
152            0x32 => Architecture::Itanium,
153            0x3E => Architecture::X86_64,
154
155            0xB7 => Architecture::ARM64,
156
157            0xF3 => {
158                if is_64bit {
159                    Architecture::RISCV64
160                } else {
161                    Architecture::RISCV
162                }
163            }
164
165            0x39d => Architecture::Hobbit,
166
167            other => Architecture::Other(other),
168        };
169
170        // Offset for section table
171        let e_shoff = {
172            if is_64bit {
173                u64_from_offset(contents, 0x28, ordering) as usize
174            } else {
175                u32_from_offset(contents, 0x20, ordering) as usize
176            }
177        };
178
179        let e_phentsize = {
180            if is_64bit {
181                u16_from_offset(contents, 0x36, ordering) as usize
182            } else {
183                u16_from_offset(contents, 0x2A, ordering) as usize
184            }
185        };
186
187        let e_phnum = {
188            if is_64bit {
189                u16_from_offset(contents, 0x38, ordering) as usize
190            } else {
191                u16_from_offset(contents, 0x2C, ordering) as usize
192            }
193        };
194
195        let mut interpreter = None;
196        for p_header_index in 0..e_phnum {
197            let start_index = {
198                if is_64bit {
199                    0x40 + p_header_index * e_phentsize
200                } else {
201                    0x34 + p_header_index * e_phentsize
202                }
203            };
204            let p_type = FlagSet::<fields::ProgramHeaderFlags>::new_truncated(u32_from_offset(
205                contents,
206                start_index,
207                ordering,
208            ));
209
210            if p_type.contains(fields::ProgramHeaderFlags::Interpreter) {
211                let header = &contents[start_index..start_index + e_phentsize];
212                let p_offset = {
213                    if is_64bit {
214                        u64_from_offset(header, 0x08, ordering) as usize
215                    } else {
216                        u32_from_offset(header, 0x04, ordering) as usize
217                    }
218                };
219                let p_filesz = {
220                    if is_64bit {
221                        u64_from_offset(header, 0x20, ordering) as usize
222                    } else {
223                        u32_from_offset(header, 0x10, ordering) as usize
224                    }
225                };
226
227                if p_offset > 0 && p_filesz > 0 {
228                    let interpreter_path =
229                        String::from_utf8(Vec::from(&contents[p_offset..p_offset + p_filesz]))
230                            .map_err(|e| {
231                                error!(
232                                    "Interpreter error {e}, bytes: {:?}",
233                                    &contents[p_offset..p_offset + p_filesz]
234                                );
235                            })
236                            .unwrap_or_default();
237                    if !interpreter_path.is_empty() {
238                        if interpreter_path.contains("/system/runtime_loader") {
239                            os = OperatingSystem::Haiku;
240                        }
241                        interpreter = Some(interpreter_path);
242                    }
243                    break;
244                }
245            }
246        }
247
248        // Size of a section header table entry
249        let e_shentsize = {
250            if is_64bit {
251                u16_from_offset(contents, 0x3A, ordering)
252            } else {
253                u16_from_offset(contents, 0x2E, ordering)
254            }
255        };
256
257        // Number of entries in the section header table
258        let e_shnum = {
259            if is_64bit {
260                u16_from_offset(contents, 0x3C, ordering)
261            } else {
262                u16_from_offset(contents, 0x30, ordering)
263            }
264        };
265
266        // Index of the section header table with section names
267        let e_shstrndx = {
268            if is_64bit {
269                u16_from_offset(contents, 0x3E, ordering)
270            } else {
271                u16_from_offset(contents, 0x32, ordering)
272            }
273        };
274
275        // Find the offset for section name strings
276        let section_names_offset = {
277            if is_64bit {
278                u64_from_offset(
279                    contents,
280                    e_shoff + (e_shstrndx * e_shentsize) as usize + 0x18,
281                    ordering,
282                ) as usize
283            } else {
284                u32_from_offset(
285                    contents,
286                    e_shoff + (e_shstrndx * e_shentsize) as usize + 0x10,
287                    ordering,
288                ) as usize
289            }
290        };
291
292        let mut section_offset = e_shoff;
293        let mut sections = Sections::default();
294        for section_index in 0..e_shnum {
295            let section_name_offset =
296                u32_from_offset(contents, section_offset, ordering) as usize + section_names_offset;
297            let section_name = if section_name_offset < contents.len() {
298                string_from_offset(contents, section_name_offset)
299            } else {
300                info!(
301                    "ELF: section name offset {section_name_offset} greater than buffer length {}.",
302                    contents.len()
303                );
304                String::new()
305            };
306
307            let section_type = FlagSet::<fields::SectionHeaderTypes>::new_truncated(
308                u32_from_offset(contents, section_offset + 0x4, ordering),
309            );
310
311            if section_type.contains(fields::SectionHeaderTypes::DynamicSymbolsTable) {
312                // Parse imports here
313            }
314
315            let this_section_offset = {
316                if is_64bit {
317                    u64_from_offset(contents, section_offset + 0x18, ordering) as usize
318                } else {
319                    u32_from_offset(contents, section_offset + 0x10, ordering) as usize
320                }
321            };
322            let section_size = {
323                if is_64bit {
324                    u64_from_offset(contents, section_offset + 0x20, ordering) as usize
325                } else {
326                    u32_from_offset(contents, section_offset + 0x14, ordering) as usize
327                }
328            };
329            let section_flags = {
330                if is_64bit {
331                    u64_from_offset(contents, section_offset + 0x08, ordering) as usize
332                } else {
333                    u32_from_offset(contents, section_offset + 0x08, ordering) as usize
334                }
335            };
336
337            if this_section_offset + section_size <= contents.len() {
338                let section_bytes =
339                    &contents[this_section_offset..this_section_offset + section_size];
340
341                sections.push(Section {
342                    name: section_name,
343                    is_executable: (section_flags & 4) != 0,
344                    size: section_size,
345                    offset: this_section_offset,
346                    virtual_address: 0,
347                    virtual_size: 0,
348                    entropy: section_bytes.to_vec().entropy(),
349                    data: Some(section_bytes),
350                });
351            } else {
352                error!("Section {section_index}: {section_name} offset {this_section_offset} + size {section_size} (end {}) is beyond the ELF buffer {}!", this_section_offset + section_size, contents.len());
353            }
354
355            // Advance to the next section
356            section_offset += e_shentsize as usize;
357        }
358
359        Ok(Self {
360            is64bit: is_64bit,
361            arch,
362            has_overlay: Some(section_offset < contents.len()),
363            ordering,
364            executable_type: elf_type,
365            os,
366            sections: Some(sections),
367            imports: None,
368            interpreter,
369            contents,
370        })
371    }
372}
373
374impl ExecutableFile for Elf<'_> {
375    fn architecture(&self) -> Architecture {
376        self.arch
377    }
378
379    fn pointer_size(&self) -> usize {
380        if self.is64bit {
381            64
382        } else {
383            32
384        }
385    }
386
387    fn operating_system(&self) -> OperatingSystem {
388        self.os
389    }
390
391    fn compiled_timestamp(&self) -> Option<DateTime<Utc>> {
392        None
393    }
394
395    fn num_sections(&self) -> u32 {
396        self.sections.as_ref().unwrap_or(&Sections::default()).len() as u32
397    }
398
399    fn sections(&self) -> Option<&Sections> {
400        self.sections.as_ref()
401    }
402
403    fn import_hash(&self) -> Option<String> {
404        self.imports.as_ref().map(|i| hex::encode(i.hash()))
405    }
406
407    fn fuzzy_imports(&self) -> Option<String> {
408        self.imports.as_ref().map(Imports::fuzzy_hash)
409    }
410}
411
412impl SpecimenFile for Elf<'_> {
413    const MAGIC: &'static [&'static [u8]] = &[&MAGIC];
414
415    fn type_name(&self) -> &'static str {
416        "ELF"
417    }
418}
419
420impl Display for Elf<'_> {
421    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
422        writeln!(f, "ELF file:")?;
423        writeln!(f, "\tOS: {}", self.os)?;
424        writeln!(f, "\tArchitecture: {}", self.arch)?;
425        writeln!(f, "\tOrdering: {}", self.ordering)?;
426        writeln!(f, "\tType: {}", self.executable_type)?;
427        if let Some(interp) = &self.interpreter {
428            writeln!(f, "\tInterpreter: {interp}")?;
429        }
430        if let Some(sections) = &self.sections {
431            writeln!(f, "\t{} sections:", sections.len())?;
432            for section in sections {
433                writeln!(f, "\t\t{section}")?;
434            }
435        }
436        if self.has_overlay == Some(true) {
437            writeln!(f, "\tHas extra bytes at the end (overlay).")?;
438        }
439        writeln!(f, "\tSize: {}", self.contents.len())?;
440        writeln!(f, "\tEntropy: {:.4}", self.contents.entropy())
441    }
442}
443
444#[cfg(test)]
445mod tests {
446    use super::*;
447
448    use rstest::rstest;
449
450    #[rstest]
451    #[case::arm32(include_bytes!("../../../testdata/elf/elf_linux_arm"), false, OperatingSystem::Linux, Architecture::ARM, Ordering::LittleEndian, ExecutableType::Program)]
452    #[case::arm64(include_bytes!("../../../testdata/elf/elf_linux_arm64"), true, OperatingSystem::Linux, Architecture::ARM64, Ordering::LittleEndian, ExecutableType::Library /* Not really a library, but compiler said it is*/)]
453    #[case::mips32(include_bytes!("../../../testdata/elf/elf_linux_mips"), false, OperatingSystem::Linux, Architecture::MIPS, Ordering::BigEndian, ExecutableType::Program)]
454    #[case::mips64(include_bytes!("../../../testdata/elf/elf_linux_mips64"), true, OperatingSystem::Linux, Architecture::MIPS64, Ordering::BigEndian, ExecutableType::Program)]
455    #[case::ppc64le(include_bytes!("../../../testdata/elf/elf_linux_ppc64le"), true, OperatingSystem::Linux, Architecture::PowerPC64LE, Ordering::LittleEndian, ExecutableType::Program)]
456    #[case::ppc64le_lib(include_bytes!("../../../testdata/elf/elf_linux_ppc64le.so"), true, OperatingSystem::Linux, Architecture::PowerPC64LE, Ordering::LittleEndian, ExecutableType::Library)]
457    #[case::riscv(include_bytes!("../../../testdata/elf/elf_linux_riscv64"), true, OperatingSystem::Linux, Architecture::RISCV64, Ordering::LittleEndian, ExecutableType::Library /* Not really a library, but compiler said it is*/)]
458    #[case::s390x(include_bytes!("../../../testdata/elf/elf_linux_s390x"), true, OperatingSystem::Linux, Architecture::S390x, Ordering::BigEndian, ExecutableType::Library /* Not really a library, but compiler said it is*/)]
459    #[case::x86_haiku(include_bytes!("../../../testdata/elf/elf_haiku_x86"), false, OperatingSystem::Haiku, Architecture::X86, Ordering::LittleEndian, ExecutableType::Library /* Not really a library, but compiler said it is*/)]
460    #[case::x86_64_freebsd(include_bytes!("../../../testdata/elf/elf_freebsd_x86_64"), true, OperatingSystem::FreeBSD, Architecture::X86_64, Ordering::LittleEndian, ExecutableType::Program)]
461    #[test]
462    fn binaries(
463        #[case] bytes: &[u8],
464        #[case] is_64bit: bool,
465        #[case] os: OperatingSystem,
466        #[case] arch: Architecture,
467        #[case] ordering: Ordering,
468        #[case] elf_type: ExecutableType,
469    ) {
470        let elf = Elf::from(bytes).unwrap();
471        eprintln!("{elf}");
472        assert_eq!(elf.is64bit, is_64bit);
473        assert_eq!(elf.os, os);
474        if elf_type == ExecutableType::Program
475            && arch != Architecture::ARM64
476            && arch != Architecture::RISCV64
477            && arch != Architecture::S390x
478        {
479            assert!(elf.interpreter.is_some());
480        }
481        assert_eq!(elf.executable_type, elf_type);
482        assert_eq!(elf.ordering, ordering);
483        assert_eq!(elf.arch, arch);
484    }
485
486    #[test]
487    fn hobbit() {
488        // This test is separate, as the parser reports this program is for Linux when it's for BeOS.
489        // Interpreter not found.
490        const BYTES: &[u8] = include_bytes!("../../../testdata/elf/elf_aclock_hobbit_beos");
491
492        let elf = Elf::from(BYTES).unwrap();
493        eprintln!("{elf}");
494        assert!(!elf.is64bit);
495        assert_eq!(elf.ordering, Ordering::BigEndian);
496        assert_eq!(elf.arch, Architecture::Hobbit);
497    }
498}