Skip to main content

malwaredb_types/exec/elf/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2
3/// Flags for some fields within an ELF header
4pub mod fields;
5
6use crate::exec::{
7    Architecture, ExecutableFile, ExecutableType, Imports, OperatingSystem, Section, Sections,
8};
9use crate::utils::{
10    bytes_offset_match, string_from_offset, u16_from_offset, u32_from_offset, u64_from_offset,
11    EntropyCalc,
12};
13use crate::{Ordering, SpecimenFile};
14
15use std::fmt::{Display, Formatter};
16
17use anyhow::{anyhow, bail, Result};
18use chrono::{DateTime, Utc};
19use flagset::FlagSet;
20use tracing::{error, info, instrument};
21use uuid::Uuid;
22
23// Wikipedia article:
24// https://en.wikipedia.org/wiki/Executable_and_Linkable_Format
25
26const MAGIC: [u8; 4] = [0x7f, 0x45, 0x4c, 0x46];
27
28/// The struct to partially represent the ELF (Executable and Linkable File) format
29///
30/// This is the file type used for Linux, *BSD (FreeBSD, OpenBSD, NetBSD, etc), Haiku, Solaris,
31/// and possibly some others.
32///
33/// Effort is made to fail gracefully, since malware may not obey all the rules, and some information
34/// is better than none because some part of the data wasn't parsed correctly.
35#[derive(Clone, Debug)]
36pub struct Elf<'a> {
37    /// If the program is 64-bit
38    pub is64bit: bool,
39
40    /// Instruction set architecture for this binary
41    pub arch: Architecture,
42
43    /// If the binary has extra data after the last section, could be used to hide something
44    pub has_overlay: Option<bool>,
45
46    /// Byte ordering for this binary
47    pub ordering: Ordering,
48
49    /// Executable subtype: Program, Library, or Core file?
50    pub executable_type: ExecutableType,
51
52    /// Operating System for this binary
53    pub os: OperatingSystem,
54
55    /// Sections of this binary
56    pub sections: Option<Sections<'a>>,
57
58    /// External libraries used by this application or library
59    pub imports: Option<Imports>,
60
61    /// The path for the ELF loader (or interpreter)
62    pub interpreter: Option<String>,
63
64    /// The array containing the raw bytes used to parse this program
65    pub contents: &'a [u8],
66}
67
68impl<'a> Elf<'a> {
69    /// ELF parsed from a sequence of bytes
70    ///
71    /// # Errors
72    ///
73    /// Returns an error if the parsing fails.
74    #[allow(
75        clippy::too_many_lines,
76        clippy::cast_possible_truncation,
77        clippy::similar_names
78    )] // TODO: Revisit these
79    #[instrument(name = "ELF parser", skip(contents))]
80    pub fn from(contents: &'a [u8]) -> Result<Self> {
81        if !bytes_offset_match(contents, 0, &MAGIC) {
82            bail!("Not an ELF file");
83        }
84
85        let is_64bit = contents[4] == 2;
86        let ordering = {
87            if contents[5] == 2 {
88                Ordering::BigEndian
89            } else {
90                Ordering::LittleEndian
91            }
92        };
93        let mut os = match contents[7] {
94            1 => OperatingSystem::HPUX,
95            2 => OperatingSystem::NetBSD,
96
97            // Assume SystemV is Linux, some compilers don't set this correctly
98            // https://unix.stackexchange.com/questions/132036/why-does-readelf-show-system-v-as-my-os-instead-of-linux
99            0 | 3 => OperatingSystem::Linux,
100            6 => OperatingSystem::Solaris,
101            7 => OperatingSystem::AIX,
102            8 => OperatingSystem::Irix,
103            9 => OperatingSystem::FreeBSD,
104            0xC => OperatingSystem::OpenBSD,
105            other => OperatingSystem::Other(u16::from(other)),
106        };
107
108        let elf_type = match u16_from_offset(contents, 0x10, ordering)
109            .ok_or(anyhow!("ELF buffer too small for elf type"))?
110        {
111            1 | 2 => ExecutableType::Program,
112            3 => ExecutableType::Library,
113            4 => ExecutableType::Core,
114            other => ExecutableType::Unknown(other),
115        };
116
117        let arch = match u16_from_offset(contents, 0x12, ordering)
118            .ok_or(anyhow!("ELF buffer too small for architecture"))?
119        {
120            // https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html
121            0 => Architecture::Unknown,
122            2 => Architecture::Sparc,
123            3 => Architecture::X86,
124            4 => Architecture::M68k,
125            5 => Architecture::M88k,
126            8 => {
127                if is_64bit {
128                    Architecture::MIPS64
129                } else {
130                    Architecture::MIPS
131                }
132            }
133            0x0A => {
134                if is_64bit {
135                    Architecture::MIPSEL64
136                } else {
137                    Architecture::MIPSEL
138                }
139            }
140            0x14 => {
141                if ordering == Ordering::BigEndian {
142                    Architecture::PowerPC
143                } else {
144                    Architecture::PowerPCLE
145                }
146            }
147            0x15 => {
148                if ordering == Ordering::BigEndian {
149                    Architecture::PowerPC64
150                } else {
151                    Architecture::PowerPC64LE
152                }
153            }
154            0x16 => {
155                if is_64bit {
156                    Architecture::S390x
157                } else {
158                    Architecture::S390
159                }
160            }
161
162            0x28 => Architecture::ARM,
163            0x29 => {
164                if is_64bit {
165                    Architecture::Alpha64
166                } else {
167                    Architecture::Alpha
168                }
169            }
170            0x2b => Architecture::Sparc64,
171
172            0x32 => Architecture::Itanium,
173            0x3E => Architecture::X86_64,
174
175            0xB7 => Architecture::ARM64,
176
177            0xF3 => {
178                if is_64bit {
179                    Architecture::RISCV64
180                } else {
181                    Architecture::RISCV
182                }
183            }
184
185            0x39d => Architecture::Hobbit,
186
187            other => Architecture::Other(u32::from(other)),
188        };
189
190        // Offset for section table
191        let e_shoff = {
192            if is_64bit {
193                u64_from_offset(contents, 0x28, ordering)
194                    .ok_or(anyhow!("ELF contents too short for section offset"))?
195                    as usize
196            } else {
197                u32_from_offset(contents, 0x20, ordering)
198                    .ok_or(anyhow!("ELF contents too short for section offset"))?
199                    as usize
200            }
201        };
202
203        let e_phentsize = {
204            if is_64bit {
205                u16_from_offset(contents, 0x36, ordering)
206            } else {
207                u16_from_offset(contents, 0x2A, ordering)
208            }
209        }
210        .ok_or(anyhow!("ELF contents too short program entry size"))?
211            as usize;
212
213        let e_phnum = {
214            if is_64bit {
215                u16_from_offset(contents, 0x38, ordering)
216            } else {
217                u16_from_offset(contents, 0x2C, ordering)
218            }
219        }
220        .ok_or(anyhow!("ELF contents too short program entries"))? as usize;
221
222        let mut interpreter = None;
223        for p_header_index in 0..e_phnum {
224            let start_index = {
225                if is_64bit {
226                    0x40 + p_header_index * e_phentsize
227                } else {
228                    0x34 + p_header_index * e_phentsize
229                }
230            };
231            let p_type = FlagSet::<fields::ProgramHeaderFlags>::new_truncated(
232                u32_from_offset(contents, start_index, ordering).unwrap_or_default(),
233            );
234
235            if p_type.contains(fields::ProgramHeaderFlags::Interpreter) {
236                let header = &contents[start_index..start_index + e_phentsize];
237                let p_offset = {
238                    if is_64bit {
239                        u64_from_offset(header, 0x08, ordering).unwrap_or_default() as usize
240                    } else {
241                        u32_from_offset(header, 0x04, ordering).unwrap_or_default() as usize
242                    }
243                };
244                let p_filesz = {
245                    if is_64bit {
246                        u64_from_offset(header, 0x20, ordering).unwrap_or_default() as usize
247                    } else {
248                        u32_from_offset(header, 0x10, ordering).unwrap_or_default() as usize
249                    }
250                };
251
252                if p_offset > 0 && p_filesz > 0 {
253                    let interpreter_path =
254                        String::from_utf8(Vec::from(&contents[p_offset..p_offset + p_filesz]))
255                            .map_err(|e| {
256                                error!(
257                                    "Interpreter error {e}, bytes: {:?}",
258                                    &contents[p_offset..p_offset + p_filesz]
259                                );
260                            })
261                            .unwrap_or_default();
262                    if !interpreter_path.is_empty() {
263                        if interpreter_path.contains("/system/runtime_loader") {
264                            os = OperatingSystem::Haiku;
265                        }
266                        interpreter = Some(interpreter_path);
267                    }
268                    break;
269                }
270            }
271        }
272
273        // Size of a section header table entry
274        let e_shentsize = {
275            if is_64bit {
276                u16_from_offset(contents, 0x3A, ordering)
277            } else {
278                u16_from_offset(contents, 0x2E, ordering)
279            }
280        }
281        .ok_or(anyhow!("ELF contents too short for section entry size"))?;
282
283        // Number of entries in the section header table
284        let e_shnum = {
285            if is_64bit {
286                u16_from_offset(contents, 0x3C, ordering)
287            } else {
288                u16_from_offset(contents, 0x30, ordering)
289            }
290        }
291        .ok_or(anyhow!("ELF contents too short for section count"))?;
292
293        // Index of the section header table with section names
294        let e_shstrndx = {
295            if is_64bit {
296                u16_from_offset(contents, 0x3E, ordering)
297            } else {
298                u16_from_offset(contents, 0x32, ordering)
299            }
300        }
301        .ok_or(anyhow!(
302            "ELF contents too short for section header table with section names"
303        ))?;
304
305        // Find the offset for section name strings
306        let section_names_offset = {
307            if is_64bit {
308                u64_from_offset(
309                    contents,
310                    e_shoff + (e_shstrndx * e_shentsize) as usize + 0x18,
311                    ordering,
312                )
313                .ok_or(anyhow!("ELF contents too short for section name"))? as usize
314            } else {
315                u32_from_offset(
316                    contents,
317                    e_shoff + (e_shstrndx * e_shentsize) as usize + 0x10,
318                    ordering,
319                )
320                .ok_or(anyhow!("ELF contents too short for section name"))? as usize
321            }
322        };
323
324        let mut section_offset = e_shoff;
325        let mut sections = Sections::default();
326        for section_index in 0..e_shnum {
327            let section_name_offset = u32_from_offset(contents, section_offset, ordering)
328                .unwrap_or_default() as usize
329                + section_names_offset;
330            let section_name = if section_name_offset < contents.len() {
331                string_from_offset(contents, section_name_offset).unwrap_or_default()
332            } else {
333                info!(
334                    "ELF: section name offset {section_name_offset} greater than buffer length {}.",
335                    contents.len()
336                );
337                String::new()
338            };
339
340            let section_type = FlagSet::<fields::SectionHeaderTypes>::new_truncated(
341                u32_from_offset(contents, section_offset + 0x4, ordering).unwrap_or_default(),
342            );
343
344            if section_type.contains(fields::SectionHeaderTypes::DynamicSymbolsTable) {
345                // TODO: Parse imports here
346            }
347
348            let this_section_offset = {
349                if is_64bit {
350                    u64_from_offset(contents, section_offset + 0x18, ordering).unwrap_or_default()
351                        as usize
352                } else {
353                    u32_from_offset(contents, section_offset + 0x10, ordering).unwrap_or_default()
354                        as usize
355                }
356            };
357            let section_size = {
358                if is_64bit {
359                    if let Some(size) = u64_from_offset(contents, section_offset + 0x20, ordering) {
360                        size as usize
361                    } else {
362                        continue;
363                    }
364                } else if let Some(size) =
365                    u32_from_offset(contents, section_offset + 0x14, ordering)
366                {
367                    size as usize
368                } else {
369                    continue;
370                }
371            };
372            let section_flags = {
373                if is_64bit {
374                    u64_from_offset(contents, section_offset + 0x08, ordering).unwrap_or_default()
375                        as usize
376                } else {
377                    u32_from_offset(contents, section_offset + 0x08, ordering).unwrap_or_default()
378                        as usize
379                }
380            };
381
382            if this_section_offset + section_size <= contents.len() {
383                let section_bytes =
384                    &contents[this_section_offset..this_section_offset + section_size];
385
386                sections.push(Section {
387                    name: section_name,
388                    is_executable: (section_flags & 4) != 0,
389                    size: section_size,
390                    offset: this_section_offset,
391                    virtual_address: 0,
392                    virtual_size: 0,
393                    entropy: section_bytes.to_vec().entropy(),
394                    data: Some(section_bytes),
395                });
396            } else {
397                error!("Section {section_index}: {section_name} offset {this_section_offset} + size {section_size} (end {}) is beyond the ELF buffer {}!", this_section_offset + section_size, contents.len());
398            }
399
400            // Advance to the next section
401            section_offset += e_shentsize as usize;
402        }
403
404        Ok(Self {
405            is64bit: is_64bit,
406            arch,
407            has_overlay: Some(section_offset < contents.len()),
408            ordering,
409            executable_type: elf_type,
410            os,
411            sections: Some(sections),
412            imports: None,
413            interpreter,
414            contents,
415        })
416    }
417}
418
419impl ExecutableFile for Elf<'_> {
420    fn architecture(&self) -> Option<Architecture> {
421        Some(self.arch)
422    }
423
424    fn pointer_size(&self) -> usize {
425        if self.is64bit {
426            64
427        } else {
428            32
429        }
430    }
431
432    fn operating_system(&self) -> OperatingSystem {
433        self.os
434    }
435
436    fn compiled_timestamp(&self) -> Option<DateTime<Utc>> {
437        None
438    }
439
440    #[allow(clippy::cast_possible_truncation)]
441    fn num_sections(&self) -> u32 {
442        self.sections.as_ref().unwrap_or(&Sections::default()).len() as u32
443    }
444
445    fn sections(&self) -> Option<&Sections<'_>> {
446        self.sections.as_ref()
447    }
448
449    fn import_hash(&self) -> Option<Uuid> {
450        self.imports.as_ref().map(Imports::hash)
451    }
452
453    fn fuzzy_imports(&self) -> Option<String> {
454        self.imports.as_ref().map(Imports::fuzzy_hash)
455    }
456}
457
458impl SpecimenFile for Elf<'_> {
459    const MAGIC: &'static [&'static [u8]] = &[&MAGIC];
460
461    fn type_name(&self) -> &'static str {
462        "ELF"
463    }
464}
465
466impl Display for Elf<'_> {
467    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
468        writeln!(f, "ELF file:")?;
469        writeln!(f, "\tOS: {}", self.os)?;
470        writeln!(f, "\tArchitecture: {}", self.arch)?;
471        writeln!(f, "\tOrdering: {}", self.ordering)?;
472        writeln!(f, "\tType: {}", self.executable_type)?;
473        if let Some(interp) = &self.interpreter {
474            writeln!(f, "\tInterpreter: {interp}")?;
475        }
476        if let Some(sections) = &self.sections {
477            writeln!(f, "\t{} sections:", sections.len())?;
478            for section in sections {
479                writeln!(f, "\t\t{section}")?;
480            }
481        }
482        if self.has_overlay == Some(true) {
483            writeln!(f, "\tHas extra bytes at the end (overlay).")?;
484        }
485        writeln!(f, "\tSize: {}", self.contents.len())?;
486        writeln!(f, "\tEntropy: {:.4}", self.contents.entropy())
487    }
488}
489
490#[cfg(test)]
491mod tests {
492    use super::*;
493
494    use rstest::rstest;
495
496    #[rstest]
497    #[case::arm32(include_bytes!("../../../testdata/elf/elf_linux_arm"), false, OperatingSystem::Linux, Architecture::ARM, Ordering::LittleEndian, ExecutableType::Program)]
498    #[case::arm64(include_bytes!("../../../testdata/elf/elf_linux_arm64"), true, OperatingSystem::Linux, Architecture::ARM64, Ordering::LittleEndian, ExecutableType::Library /* Not really a library, but compiler said it is*/)]
499    #[case::mips32(include_bytes!("../../../testdata/elf/elf_linux_mips"), false, OperatingSystem::Linux, Architecture::MIPS, Ordering::BigEndian, ExecutableType::Program)]
500    #[case::mips64(include_bytes!("../../../testdata/elf/elf_linux_mips64"), true, OperatingSystem::Linux, Architecture::MIPS64, Ordering::BigEndian, ExecutableType::Program)]
501    #[case::ppc64le(include_bytes!("../../../testdata/elf/elf_linux_ppc64le"), true, OperatingSystem::Linux, Architecture::PowerPC64LE, Ordering::LittleEndian, ExecutableType::Program)]
502    #[case::ppc64le_lib(include_bytes!("../../../testdata/elf/elf_linux_ppc64le.so"), true, OperatingSystem::Linux, Architecture::PowerPC64LE, Ordering::LittleEndian, ExecutableType::Library)]
503    #[case::riscv(include_bytes!("../../../testdata/elf/elf_linux_riscv64"), true, OperatingSystem::Linux, Architecture::RISCV64, Ordering::LittleEndian, ExecutableType::Library /* Not really a library, but compiler said it is*/)]
504    #[case::s390x(include_bytes!("../../../testdata/elf/elf_linux_s390x"), true, OperatingSystem::Linux, Architecture::S390x, Ordering::BigEndian, ExecutableType::Library /* Not really a library, but compiler said it is*/)]
505    #[case::x86_haiku(include_bytes!("../../../testdata/elf/elf_haiku_x86"), false, OperatingSystem::Haiku, Architecture::X86, Ordering::LittleEndian, ExecutableType::Library /* Not really a library, but compiler said it is*/)]
506    #[case::x86_64_freebsd(include_bytes!("../../../testdata/elf/elf_freebsd_x86_64"), true, OperatingSystem::FreeBSD, Architecture::X86_64, Ordering::LittleEndian, ExecutableType::Program)]
507    #[test]
508    fn binaries(
509        #[case] bytes: &[u8],
510        #[case] is_64bit: bool,
511        #[case] os: OperatingSystem,
512        #[case] arch: Architecture,
513        #[case] ordering: Ordering,
514        #[case] elf_type: ExecutableType,
515    ) {
516        let elf = Elf::from(bytes).unwrap();
517        eprintln!("{elf}");
518        assert_eq!(elf.is64bit, is_64bit);
519        assert_eq!(elf.os, os);
520        if elf_type == ExecutableType::Program
521            && arch != Architecture::ARM64
522            && arch != Architecture::RISCV64
523            && arch != Architecture::S390x
524        {
525            assert!(elf.interpreter.is_some());
526        }
527        assert_eq!(elf.executable_type, elf_type);
528        assert_eq!(elf.ordering, ordering);
529        assert_eq!(elf.arch, arch);
530    }
531
532    #[test]
533    fn hobbit() {
534        // This test is separate, as the parser reports this program is for Linux when it's for BeOS.
535        // Interpreter not found.
536        const BYTES: &[u8] = include_bytes!("../../../testdata/elf/elf_aclock_hobbit_beos");
537
538        let elf = Elf::from(BYTES).unwrap();
539        eprintln!("{elf}");
540        assert!(!elf.is64bit);
541        assert_eq!(elf.ordering, Ordering::BigEndian);
542        assert_eq!(elf.arch, Architecture::Hobbit);
543    }
544}