malwaredb_types/exec/elf/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2
3/// Flags for some fields within an ELF header
4pub mod fields;
5
6use crate::exec::{
7    Architecture, ExecutableFile, ExecutableType, Imports, OperatingSystem, Section, Sections,
8};
9use crate::utils::{
10    bytes_offset_match, string_from_offset, u16_from_offset, u32_from_offset, u64_from_offset,
11    EntropyCalc,
12};
13use crate::{Ordering, SpecimenFile};
14
15use std::fmt::{Display, Formatter};
16
17use anyhow::{anyhow, bail, Result};
18use chrono::{DateTime, Utc};
19use flagset::FlagSet;
20use tracing::{error, info, instrument};
21
22// Wikipedia article:
23// https://en.wikipedia.org/wiki/Executable_and_Linkable_Format
24
25const MAGIC: [u8; 4] = [0x7f, 0x45, 0x4c, 0x46];
26
27/// The struct to partially represent the ELF (Executable and Linkable File) format
28///
29/// This is the file type used for Linux, *BSD (FreeBSD, OpenBSD, NetBSD, etc), Haiku, Solaris,
30/// and possibly some others.
31///
32/// Effort is made to fail gracefully, since malware may not obey all the rules, and some information
33/// is better than none because some part of the data wasn't parsed correctly.
34#[derive(Clone, Debug)]
35pub struct Elf<'a> {
36    /// If the program is 64-bit
37    pub is64bit: bool,
38
39    /// Instruction set architecture for this binary
40    pub arch: Architecture,
41
42    /// If the binary has extra data after the last section, could be used to hide something
43    pub has_overlay: Option<bool>,
44
45    /// Byte ordering for this binary
46    pub ordering: Ordering,
47
48    /// Executable subtype: Program, Library, or Core file?
49    pub executable_type: ExecutableType,
50
51    /// Operating System for this binary
52    pub os: OperatingSystem,
53
54    /// Sections of this binary
55    pub sections: Option<Sections<'a>>,
56
57    /// External libraries used by this application or library
58    pub imports: Option<Imports>,
59
60    /// The path for the ELF loader (or interpreter)
61    pub interpreter: Option<String>,
62
63    /// The array containing the raw bytes used to parse this program
64    pub contents: &'a [u8],
65}
66
67impl<'a> Elf<'a> {
68    /// ELF parsed from a sequence of bytes
69    ///
70    /// # Errors
71    ///
72    /// Returns an error if the parsing fails.
73    #[allow(
74        clippy::too_many_lines,
75        clippy::cast_possible_truncation,
76        clippy::similar_names
77    )] // TODO: Revisit these
78    #[instrument(name = "ELF parser", skip(contents))]
79    pub fn from(contents: &'a [u8]) -> Result<Self> {
80        if !bytes_offset_match(contents, 0, &MAGIC) {
81            bail!("Not an ELF file");
82        }
83
84        let is_64bit = contents[4] == 2;
85        let ordering = {
86            if contents[5] == 2 {
87                Ordering::BigEndian
88            } else {
89                Ordering::LittleEndian
90            }
91        };
92        let mut os = match contents[7] {
93            1 => OperatingSystem::HPUX,
94            2 => OperatingSystem::NetBSD,
95
96            // Assume SystemV is Linux, some compilers don't set this correctly
97            // https://unix.stackexchange.com/questions/132036/why-does-readelf-show-system-v-as-my-os-instead-of-linux
98            0 | 3 => OperatingSystem::Linux,
99            6 => OperatingSystem::Solaris,
100            7 => OperatingSystem::AIX,
101            8 => OperatingSystem::Irix,
102            9 => OperatingSystem::FreeBSD,
103            0xC => OperatingSystem::OpenBSD,
104            other => OperatingSystem::Other(u16::from(other)),
105        };
106
107        let elf_type = match u16_from_offset(contents, 0x10, ordering)
108            .ok_or(anyhow!("ELF buffer too small for elf type"))?
109        {
110            1 | 2 => ExecutableType::Program,
111            3 => ExecutableType::Library,
112            4 => ExecutableType::Core,
113            other => ExecutableType::Unknown(other),
114        };
115
116        let arch = match u16_from_offset(contents, 0x12, ordering)
117            .ok_or(anyhow!("ELF buffer too small for architecture"))?
118        {
119            // https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html
120            0 => Architecture::Unknown,
121            2 => Architecture::Sparc,
122            3 => Architecture::X86,
123            4 => Architecture::M68k,
124            5 => Architecture::M88k,
125            8 => {
126                if is_64bit {
127                    Architecture::MIPS64
128                } else {
129                    Architecture::MIPS
130                }
131            }
132            0x0A => {
133                if is_64bit {
134                    Architecture::MIPSEL64
135                } else {
136                    Architecture::MIPSEL
137                }
138            }
139            0x14 => {
140                if ordering == Ordering::BigEndian {
141                    Architecture::PowerPC
142                } else {
143                    Architecture::PowerPCLE
144                }
145            }
146            0x15 => {
147                if ordering == Ordering::BigEndian {
148                    Architecture::PowerPC64
149                } else {
150                    Architecture::PowerPC64LE
151                }
152            }
153            0x16 => {
154                if is_64bit {
155                    Architecture::S390x
156                } else {
157                    Architecture::S390
158                }
159            }
160
161            0x28 => Architecture::ARM,
162            0x29 => {
163                if is_64bit {
164                    Architecture::Alpha64
165                } else {
166                    Architecture::Alpha
167                }
168            }
169            0x2b => Architecture::Sparc64,
170
171            0x32 => Architecture::Itanium,
172            0x3E => Architecture::X86_64,
173
174            0xB7 => Architecture::ARM64,
175
176            0xF3 => {
177                if is_64bit {
178                    Architecture::RISCV64
179                } else {
180                    Architecture::RISCV
181                }
182            }
183
184            0x39d => Architecture::Hobbit,
185
186            other => Architecture::Other(u32::from(other)),
187        };
188
189        // Offset for section table
190        let e_shoff = {
191            if is_64bit {
192                u64_from_offset(contents, 0x28, ordering)
193                    .ok_or(anyhow!("ELF contents too short for section offset"))?
194                    as usize
195            } else {
196                u32_from_offset(contents, 0x20, ordering)
197                    .ok_or(anyhow!("ELF contents too short for section offset"))?
198                    as usize
199            }
200        };
201
202        let e_phentsize = {
203            if is_64bit {
204                u16_from_offset(contents, 0x36, ordering)
205            } else {
206                u16_from_offset(contents, 0x2A, ordering)
207            }
208        }
209        .ok_or(anyhow!("ELF contents too short program entry size"))?
210            as usize;
211
212        let e_phnum = {
213            if is_64bit {
214                u16_from_offset(contents, 0x38, ordering)
215            } else {
216                u16_from_offset(contents, 0x2C, ordering)
217            }
218        }
219        .ok_or(anyhow!("ELF contents too short program entries"))? as usize;
220
221        let mut interpreter = None;
222        for p_header_index in 0..e_phnum {
223            let start_index = {
224                if is_64bit {
225                    0x40 + p_header_index * e_phentsize
226                } else {
227                    0x34 + p_header_index * e_phentsize
228                }
229            };
230            let p_type = FlagSet::<fields::ProgramHeaderFlags>::new_truncated(
231                u32_from_offset(contents, start_index, ordering).unwrap_or_default(),
232            );
233
234            if p_type.contains(fields::ProgramHeaderFlags::Interpreter) {
235                let header = &contents[start_index..start_index + e_phentsize];
236                let p_offset = {
237                    if is_64bit {
238                        u64_from_offset(header, 0x08, ordering).unwrap_or_default() as usize
239                    } else {
240                        u32_from_offset(header, 0x04, ordering).unwrap_or_default() as usize
241                    }
242                };
243                let p_filesz = {
244                    if is_64bit {
245                        u64_from_offset(header, 0x20, ordering).unwrap_or_default() as usize
246                    } else {
247                        u32_from_offset(header, 0x10, ordering).unwrap_or_default() as usize
248                    }
249                };
250
251                if p_offset > 0 && p_filesz > 0 {
252                    let interpreter_path =
253                        String::from_utf8(Vec::from(&contents[p_offset..p_offset + p_filesz]))
254                            .map_err(|e| {
255                                error!(
256                                    "Interpreter error {e}, bytes: {:?}",
257                                    &contents[p_offset..p_offset + p_filesz]
258                                );
259                            })
260                            .unwrap_or_default();
261                    if !interpreter_path.is_empty() {
262                        if interpreter_path.contains("/system/runtime_loader") {
263                            os = OperatingSystem::Haiku;
264                        }
265                        interpreter = Some(interpreter_path);
266                    }
267                    break;
268                }
269            }
270        }
271
272        // Size of a section header table entry
273        let e_shentsize = {
274            if is_64bit {
275                u16_from_offset(contents, 0x3A, ordering)
276            } else {
277                u16_from_offset(contents, 0x2E, ordering)
278            }
279        }
280        .ok_or(anyhow!("ELF contents too short for section entry size"))?;
281
282        // Number of entries in the section header table
283        let e_shnum = {
284            if is_64bit {
285                u16_from_offset(contents, 0x3C, ordering)
286            } else {
287                u16_from_offset(contents, 0x30, ordering)
288            }
289        }
290        .ok_or(anyhow!("ELF contents too short for section count"))?;
291
292        // Index of the section header table with section names
293        let e_shstrndx = {
294            if is_64bit {
295                u16_from_offset(contents, 0x3E, ordering)
296            } else {
297                u16_from_offset(contents, 0x32, ordering)
298            }
299        }
300        .ok_or(anyhow!(
301            "ELF contents too short for section header table with section names"
302        ))?;
303
304        // Find the offset for section name strings
305        let section_names_offset = {
306            if is_64bit {
307                u64_from_offset(
308                    contents,
309                    e_shoff + (e_shstrndx * e_shentsize) as usize + 0x18,
310                    ordering,
311                )
312                .ok_or(anyhow!("ELF contents too short for section name"))? as usize
313            } else {
314                u32_from_offset(
315                    contents,
316                    e_shoff + (e_shstrndx * e_shentsize) as usize + 0x10,
317                    ordering,
318                )
319                .ok_or(anyhow!("ELF contents too short for section name"))? as usize
320            }
321        };
322
323        let mut section_offset = e_shoff;
324        let mut sections = Sections::default();
325        for section_index in 0..e_shnum {
326            let section_name_offset = u32_from_offset(contents, section_offset, ordering)
327                .unwrap_or_default() as usize
328                + section_names_offset;
329            let section_name = if section_name_offset < contents.len() {
330                string_from_offset(contents, section_name_offset).unwrap_or_default()
331            } else {
332                info!(
333                    "ELF: section name offset {section_name_offset} greater than buffer length {}.",
334                    contents.len()
335                );
336                String::new()
337            };
338
339            let section_type = FlagSet::<fields::SectionHeaderTypes>::new_truncated(
340                u32_from_offset(contents, section_offset + 0x4, ordering).unwrap_or_default(),
341            );
342
343            if section_type.contains(fields::SectionHeaderTypes::DynamicSymbolsTable) {
344                // TODO: Parse imports here
345            }
346
347            let this_section_offset = {
348                if is_64bit {
349                    u64_from_offset(contents, section_offset + 0x18, ordering).unwrap_or_default()
350                        as usize
351                } else {
352                    u32_from_offset(contents, section_offset + 0x10, ordering).unwrap_or_default()
353                        as usize
354                }
355            };
356            let section_size = {
357                if is_64bit {
358                    if let Some(size) = u64_from_offset(contents, section_offset + 0x20, ordering) {
359                        size as usize
360                    } else {
361                        continue;
362                    }
363                } else if let Some(size) =
364                    u32_from_offset(contents, section_offset + 0x14, ordering)
365                {
366                    size as usize
367                } else {
368                    continue;
369                }
370            };
371            let section_flags = {
372                if is_64bit {
373                    u64_from_offset(contents, section_offset + 0x08, ordering).unwrap_or_default()
374                        as usize
375                } else {
376                    u32_from_offset(contents, section_offset + 0x08, ordering).unwrap_or_default()
377                        as usize
378                }
379            };
380
381            if this_section_offset + section_size <= contents.len() {
382                let section_bytes =
383                    &contents[this_section_offset..this_section_offset + section_size];
384
385                sections.push(Section {
386                    name: section_name,
387                    is_executable: (section_flags & 4) != 0,
388                    size: section_size,
389                    offset: this_section_offset,
390                    virtual_address: 0,
391                    virtual_size: 0,
392                    entropy: section_bytes.to_vec().entropy(),
393                    data: Some(section_bytes),
394                });
395            } else {
396                error!("Section {section_index}: {section_name} offset {this_section_offset} + size {section_size} (end {}) is beyond the ELF buffer {}!", this_section_offset + section_size, contents.len());
397            }
398
399            // Advance to the next section
400            section_offset += e_shentsize as usize;
401        }
402
403        Ok(Self {
404            is64bit: is_64bit,
405            arch,
406            has_overlay: Some(section_offset < contents.len()),
407            ordering,
408            executable_type: elf_type,
409            os,
410            sections: Some(sections),
411            imports: None,
412            interpreter,
413            contents,
414        })
415    }
416}
417
418impl ExecutableFile for Elf<'_> {
419    fn architecture(&self) -> Option<Architecture> {
420        Some(self.arch)
421    }
422
423    fn pointer_size(&self) -> usize {
424        if self.is64bit {
425            64
426        } else {
427            32
428        }
429    }
430
431    fn operating_system(&self) -> OperatingSystem {
432        self.os
433    }
434
435    fn compiled_timestamp(&self) -> Option<DateTime<Utc>> {
436        None
437    }
438
439    #[allow(clippy::cast_possible_truncation)]
440    fn num_sections(&self) -> u32 {
441        self.sections.as_ref().unwrap_or(&Sections::default()).len() as u32
442    }
443
444    fn sections(&self) -> Option<&Sections<'_>> {
445        self.sections.as_ref()
446    }
447
448    fn import_hash(&self) -> Option<String> {
449        self.imports.as_ref().map(|i| hex::encode(i.hash()))
450    }
451
452    fn fuzzy_imports(&self) -> Option<String> {
453        self.imports.as_ref().map(Imports::fuzzy_hash)
454    }
455}
456
457impl SpecimenFile for Elf<'_> {
458    const MAGIC: &'static [&'static [u8]] = &[&MAGIC];
459
460    fn type_name(&self) -> &'static str {
461        "ELF"
462    }
463}
464
465impl Display for Elf<'_> {
466    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
467        writeln!(f, "ELF file:")?;
468        writeln!(f, "\tOS: {}", self.os)?;
469        writeln!(f, "\tArchitecture: {}", self.arch)?;
470        writeln!(f, "\tOrdering: {}", self.ordering)?;
471        writeln!(f, "\tType: {}", self.executable_type)?;
472        if let Some(interp) = &self.interpreter {
473            writeln!(f, "\tInterpreter: {interp}")?;
474        }
475        if let Some(sections) = &self.sections {
476            writeln!(f, "\t{} sections:", sections.len())?;
477            for section in sections {
478                writeln!(f, "\t\t{section}")?;
479            }
480        }
481        if self.has_overlay == Some(true) {
482            writeln!(f, "\tHas extra bytes at the end (overlay).")?;
483        }
484        writeln!(f, "\tSize: {}", self.contents.len())?;
485        writeln!(f, "\tEntropy: {:.4}", self.contents.entropy())
486    }
487}
488
489#[cfg(test)]
490mod tests {
491    use super::*;
492
493    use rstest::rstest;
494
495    #[rstest]
496    #[case::arm32(include_bytes!("../../../testdata/elf/elf_linux_arm"), false, OperatingSystem::Linux, Architecture::ARM, Ordering::LittleEndian, ExecutableType::Program)]
497    #[case::arm64(include_bytes!("../../../testdata/elf/elf_linux_arm64"), true, OperatingSystem::Linux, Architecture::ARM64, Ordering::LittleEndian, ExecutableType::Library /* Not really a library, but compiler said it is*/)]
498    #[case::mips32(include_bytes!("../../../testdata/elf/elf_linux_mips"), false, OperatingSystem::Linux, Architecture::MIPS, Ordering::BigEndian, ExecutableType::Program)]
499    #[case::mips64(include_bytes!("../../../testdata/elf/elf_linux_mips64"), true, OperatingSystem::Linux, Architecture::MIPS64, Ordering::BigEndian, ExecutableType::Program)]
500    #[case::ppc64le(include_bytes!("../../../testdata/elf/elf_linux_ppc64le"), true, OperatingSystem::Linux, Architecture::PowerPC64LE, Ordering::LittleEndian, ExecutableType::Program)]
501    #[case::ppc64le_lib(include_bytes!("../../../testdata/elf/elf_linux_ppc64le.so"), true, OperatingSystem::Linux, Architecture::PowerPC64LE, Ordering::LittleEndian, ExecutableType::Library)]
502    #[case::riscv(include_bytes!("../../../testdata/elf/elf_linux_riscv64"), true, OperatingSystem::Linux, Architecture::RISCV64, Ordering::LittleEndian, ExecutableType::Library /* Not really a library, but compiler said it is*/)]
503    #[case::s390x(include_bytes!("../../../testdata/elf/elf_linux_s390x"), true, OperatingSystem::Linux, Architecture::S390x, Ordering::BigEndian, ExecutableType::Library /* Not really a library, but compiler said it is*/)]
504    #[case::x86_haiku(include_bytes!("../../../testdata/elf/elf_haiku_x86"), false, OperatingSystem::Haiku, Architecture::X86, Ordering::LittleEndian, ExecutableType::Library /* Not really a library, but compiler said it is*/)]
505    #[case::x86_64_freebsd(include_bytes!("../../../testdata/elf/elf_freebsd_x86_64"), true, OperatingSystem::FreeBSD, Architecture::X86_64, Ordering::LittleEndian, ExecutableType::Program)]
506    #[test]
507    fn binaries(
508        #[case] bytes: &[u8],
509        #[case] is_64bit: bool,
510        #[case] os: OperatingSystem,
511        #[case] arch: Architecture,
512        #[case] ordering: Ordering,
513        #[case] elf_type: ExecutableType,
514    ) {
515        let elf = Elf::from(bytes).unwrap();
516        eprintln!("{elf}");
517        assert_eq!(elf.is64bit, is_64bit);
518        assert_eq!(elf.os, os);
519        if elf_type == ExecutableType::Program
520            && arch != Architecture::ARM64
521            && arch != Architecture::RISCV64
522            && arch != Architecture::S390x
523        {
524            assert!(elf.interpreter.is_some());
525        }
526        assert_eq!(elf.executable_type, elf_type);
527        assert_eq!(elf.ordering, ordering);
528        assert_eq!(elf.arch, arch);
529    }
530
531    #[test]
532    fn hobbit() {
533        // This test is separate, as the parser reports this program is for Linux when it's for BeOS.
534        // Interpreter not found.
535        const BYTES: &[u8] = include_bytes!("../../../testdata/elf/elf_aclock_hobbit_beos");
536
537        let elf = Elf::from(BYTES).unwrap();
538        eprintln!("{elf}");
539        assert!(!elf.is64bit);
540        assert_eq!(elf.ordering, Ordering::BigEndian);
541        assert_eq!(elf.arch, Architecture::Hobbit);
542    }
543}