Skip to main content

symbolic_debuginfo/
elf.rs

1//! Support for the Executable and Linkable Format, used on Linux.
2
3use std::borrow::Cow;
4use std::error::Error;
5use std::ffi::CStr;
6use std::fmt;
7
8use core::cmp;
9use flate2::{Decompress, FlushDecompress};
10use goblin::elf::compression_header::{CompressionHeader, ELFCOMPRESS_ZLIB};
11use goblin::elf::SectionHeader;
12use goblin::elf64::sym::SymIterator;
13use goblin::strtab::Strtab;
14use goblin::{
15    container::{Container, Ctx},
16    elf, strtab,
17};
18use scroll::Pread;
19use thiserror::Error;
20
21use symbolic_common::{Arch, AsSelf, CodeId, DebugId, Uuid};
22
23use crate::base::*;
24use crate::dwarf::{Dwarf, DwarfDebugSession, DwarfError, DwarfSection, Endian};
25use crate::ParseObjectOptions;
26
27const UUID_SIZE: usize = 16;
28const PAGE_SIZE: usize = 4096;
29
30const SHN_UNDEF: usize = elf::section_header::SHN_UNDEF as usize;
31const SHF_COMPRESSED: u64 = elf::section_header::SHF_COMPRESSED as u64;
32
33/// The ELF compression header type for `zstd`, as that is not (yet) exported by `goblin`.
34pub const ELFCOMPRESS_ZSTD: u32 = 2;
35
36/// This file follows the first MIPS 32 bit ABI
37#[allow(unused)]
38const EF_MIPS_ABI_O32: u32 = 0x0000_1000;
39/// O32 ABI extended for 64-bit architecture.
40const EF_MIPS_ABI_O64: u32 = 0x0000_2000;
41/// EABI in 32 bit mode.
42#[allow(unused)]
43const EF_MIPS_ABI_EABI32: u32 = 0x0000_3000;
44/// EABI in 64 bit mode.
45const EF_MIPS_ABI_EABI64: u32 = 0x0000_4000;
46
47/// Any flag value that might indicate 64-bit MIPS.
48const MIPS_64_FLAGS: u32 = EF_MIPS_ABI_O64 | EF_MIPS_ABI_EABI64;
49
50/// An error when dealing with [`ElfObject`].
51#[derive(Debug, Error)]
52#[error("invalid ELF file")]
53pub struct ElfError {
54    #[source]
55    source: Option<Box<dyn Error + Send + Sync + 'static>>,
56}
57
58impl ElfError {
59    /// Creates a new ELF error from an arbitrary error payload.
60    fn new<E>(source: E) -> Self
61    where
62        E: Into<Box<dyn Error + Send + Sync>>,
63    {
64        let source = Some(source.into());
65        Self { source }
66    }
67}
68
69/// Executable and Linkable Format, used for executables and libraries on Linux.
70pub struct ElfObject<'data> {
71    elf: elf::Elf<'data>,
72    data: &'data [u8],
73    is_malformed: bool,
74    max_decompressed_section_size: Option<usize>,
75}
76
77impl<'data> ElfObject<'data> {
78    /// Tests whether the buffer could contain an ELF object.
79    pub fn test(data: &[u8]) -> bool {
80        data.get(0..elf::header::SELFMAG)
81            .is_some_and(|data| data == elf::header::ELFMAG)
82    }
83
84    // Pulled from https://github.com/m4b/goblin/blob/master/src/elf/mod.rs#L393-L424 as it
85    // currently isn't public, but we need this to parse an ELF.
86    fn gnu_hash_len(bytes: &[u8], offset: usize, ctx: Ctx) -> goblin::error::Result<usize> {
87        let buckets_num = bytes.pread_with::<u32>(offset, ctx.le)? as usize;
88        let min_chain = bytes.pread_with::<u32>(offset + 4, ctx.le)? as usize;
89        let bloom_size = bytes.pread_with::<u32>(offset + 8, ctx.le)? as usize;
90        // We could handle min_chain==0 if we really had to, but it shouldn't happen.
91        if buckets_num == 0 || min_chain == 0 || bloom_size == 0 {
92            return Err(goblin::error::Error::Malformed(format!(
93                "Invalid DT_GNU_HASH: buckets_num={buckets_num} min_chain={min_chain} bloom_size={bloom_size}"
94            )));
95        }
96        // Find the last bucket.
97        let buckets_offset = offset + 16 + bloom_size * if ctx.container.is_big() { 8 } else { 4 };
98        let mut max_chain = 0;
99        for bucket in 0..buckets_num {
100            let chain = bytes.pread_with::<u32>(buckets_offset + bucket * 4, ctx.le)? as usize;
101            if max_chain < chain {
102                max_chain = chain;
103            }
104        }
105        if max_chain < min_chain {
106            return Ok(0);
107        }
108        // Find the last chain within the bucket.
109        let mut chain_offset = buckets_offset + buckets_num * 4 + (max_chain - min_chain) * 4;
110        loop {
111            let hash = bytes.pread_with::<u32>(chain_offset, ctx.le)?;
112            max_chain += 1;
113            chain_offset += 4;
114            if hash & 1 != 0 {
115                return Ok(max_chain);
116            }
117        }
118    }
119
120    // Pulled from https://github.com/m4b/goblin/blob/master/src/elf/mod.rs#L426-L434 as it
121    // currently isn't public, but we need this to parse an ELF.
122    fn hash_len(
123        bytes: &[u8],
124        offset: usize,
125        machine: u16,
126        ctx: Ctx,
127    ) -> goblin::error::Result<usize> {
128        // Based on readelf code.
129        let nchain = if (machine == elf::header::EM_FAKE_ALPHA || machine == elf::header::EM_S390)
130            && ctx.container.is_big()
131        {
132            bytes.pread_with::<u64>(offset.saturating_add(4), ctx.le)? as usize
133        } else {
134            bytes.pread_with::<u32>(offset.saturating_add(4), ctx.le)? as usize
135        };
136        Ok(nchain)
137    }
138
139    /// Tries to parse an ELF object from the given slice.
140    ///
141    /// Will return a partially parsed ELF object
142    /// if at least the program and section headers can be parsed.
143    pub fn parse_with_opts(data: &'data [u8], opts: ParseObjectOptions) -> Result<Self, ElfError> {
144        let header =
145            elf::Elf::parse_header(data).map_err(|_| ElfError::new("ELF header unreadable"))?;
146        // dummy Elf with only header
147        let mut obj =
148            elf::Elf::lazy_parse(header).map_err(|_| ElfError::new("cannot parse ELF header"))?;
149
150        let ctx = Ctx {
151            container: if obj.is_64 {
152                Container::Big
153            } else {
154                Container::Little
155            },
156            le: if obj.little_endian {
157                scroll::Endian::Little
158            } else {
159                scroll::Endian::Big
160            },
161        };
162
163        macro_rules! return_partial_on_err {
164            ($parse_func:expr) => {
165                if let Ok(expected) = $parse_func {
166                    expected
167                } else {
168                    // does this snapshot?
169                    return Ok(ElfObject {
170                        elf: obj,
171                        data,
172                        is_malformed: true,
173                        max_decompressed_section_size: opts.max_decompressed_section_size,
174                    });
175                }
176            };
177        }
178
179        obj.program_headers =
180            elf::ProgramHeader::parse(data, header.e_phoff as usize, header.e_phnum as usize, ctx)
181                .map_err(|_| ElfError::new("unable to parse program headers"))?;
182
183        for ph in &obj.program_headers {
184            if ph.p_type == elf::program_header::PT_INTERP && ph.p_filesz != 0 {
185                let count = (ph.p_filesz - 1) as usize;
186                let offset = ph.p_offset as usize;
187                obj.interpreter = data
188                    .pread_with::<&str>(offset, ::scroll::ctx::StrCtx::Length(count))
189                    .ok();
190            }
191        }
192
193        obj.section_headers =
194            SectionHeader::parse(data, header.e_shoff as usize, header.e_shnum as usize, ctx)
195                .map_err(|_| ElfError::new("unable to parse section headers"))?;
196
197        let get_strtab = |section_headers: &[SectionHeader], section_idx: usize| {
198            if section_idx >= section_headers.len() {
199                // FIXME: warn! here
200                Ok(Strtab::default())
201            } else {
202                let shdr = &section_headers[section_idx];
203                shdr.check_size(data.len())?;
204                Strtab::parse(data, shdr.sh_offset as usize, shdr.sh_size as usize, 0x0)
205            }
206        };
207
208        let strtab_idx = header.e_shstrndx as usize;
209        obj.shdr_strtab = return_partial_on_err!(get_strtab(&obj.section_headers, strtab_idx));
210
211        obj.syms = elf::Symtab::default();
212        obj.strtab = Strtab::default();
213        for shdr in &obj.section_headers {
214            if shdr.sh_type == elf::section_header::SHT_SYMTAB {
215                let count = shdr.sh_size.checked_div(shdr.sh_entsize).unwrap_or(0);
216                obj.syms = return_partial_on_err!(elf::Symtab::parse(
217                    data,
218                    shdr.sh_offset as usize,
219                    count as usize,
220                    ctx
221                ));
222
223                obj.strtab =
224                    return_partial_on_err!(get_strtab(&obj.section_headers, shdr.sh_link as usize));
225            }
226        }
227
228        obj.soname = None;
229        obj.libraries = vec![];
230        obj.dynsyms = elf::Symtab::default();
231        obj.dynrelas = elf::RelocSection::default();
232        obj.dynrels = elf::RelocSection::default();
233        obj.pltrelocs = elf::RelocSection::default();
234        obj.dynstrtab = Strtab::default();
235        let dynamic = return_partial_on_err!(elf::Dynamic::parse(data, &obj.program_headers, ctx));
236        if let Some(ref dynamic) = dynamic {
237            let dyn_info = &dynamic.info;
238            obj.dynstrtab =
239                return_partial_on_err!(Strtab::parse(data, dyn_info.strtab, dyn_info.strsz, 0x0));
240
241            if dyn_info.soname != 0 {
242                // FIXME: warn! here
243                obj.soname = obj.dynstrtab.get_at(dyn_info.soname);
244            }
245            if dyn_info.needed_count > 0 {
246                obj.libraries = dynamic.get_libraries(&obj.dynstrtab);
247            }
248            // parse the dynamic relocations
249            obj.dynrelas = return_partial_on_err!(elf::RelocSection::parse(
250                data,
251                dyn_info.rela,
252                dyn_info.relasz,
253                true,
254                ctx
255            ));
256            obj.dynrels = return_partial_on_err!(elf::RelocSection::parse(
257                data,
258                dyn_info.rel,
259                dyn_info.relsz,
260                false,
261                ctx
262            ));
263            let is_rela = dyn_info.pltrel == elf::dynamic::DT_RELA;
264            obj.pltrelocs = return_partial_on_err!(elf::RelocSection::parse(
265                data,
266                dyn_info.jmprel,
267                dyn_info.pltrelsz,
268                is_rela,
269                ctx
270            ));
271
272            let mut num_syms = if let Some(gnu_hash) = dyn_info.gnu_hash {
273                return_partial_on_err!(ElfObject::gnu_hash_len(data, gnu_hash as usize, ctx))
274            } else if let Some(hash) = dyn_info.hash {
275                return_partial_on_err!(ElfObject::hash_len(
276                    data,
277                    hash as usize,
278                    header.e_machine,
279                    ctx
280                ))
281            } else {
282                0
283            };
284            let max_reloc_sym = obj
285                .dynrelas
286                .iter()
287                .chain(obj.dynrels.iter())
288                .chain(obj.pltrelocs.iter())
289                .fold(0, |num, reloc| cmp::max(num, reloc.r_sym));
290            if max_reloc_sym != 0 {
291                num_syms = cmp::max(num_syms, max_reloc_sym + 1);
292            }
293
294            obj.dynsyms =
295                return_partial_on_err!(elf::Symtab::parse(data, dyn_info.symtab, num_syms, ctx));
296        }
297
298        // If the dynamic symbol table is empty, try finding a SHT_DYNSYM section in the section headers.
299        // See https://refspecs.linuxfoundation.org/LSB_2.1.0/LSB-Core-generic/LSB-Core-generic/elftypes.html:
300        //
301        // > This section holds a minimal set of symbols adequate for dynamic linking. See also SHT_SYMTAB. Currently, an object file may have either a section of SHT_SYMTAB type or a section of SHT_DYNSYM type, but not both.
302        if obj.dynsyms.is_empty() {
303            if let Some(shdr) = obj
304                .section_headers
305                .iter()
306                .find(|h| h.sh_type == elf::section_header::SHT_DYNSYM)
307            {
308                let count = shdr.sh_size.checked_div(shdr.sh_entsize).unwrap_or(0);
309                obj.dynsyms = return_partial_on_err!(elf::Symtab::parse(
310                    data,
311                    shdr.sh_offset as usize,
312                    count as usize,
313                    ctx
314                ));
315
316                obj.dynstrtab =
317                    return_partial_on_err!(get_strtab(&obj.section_headers, shdr.sh_link as usize));
318            }
319        }
320
321        obj.shdr_relocs = vec![];
322        for (idx, section) in obj.section_headers.iter().enumerate() {
323            let is_rela = section.sh_type == elf::section_header::SHT_RELA;
324            if is_rela || section.sh_type == elf::section_header::SHT_REL {
325                return_partial_on_err!(section.check_size(data.len()));
326                let sh_relocs = return_partial_on_err!(elf::RelocSection::parse(
327                    data,
328                    section.sh_offset as usize,
329                    section.sh_size as usize,
330                    is_rela,
331                    ctx,
332                ));
333                obj.shdr_relocs.push((idx, sh_relocs));
334            }
335        }
336
337        obj.versym = return_partial_on_err!(elf::symver::VersymSection::parse(
338            data,
339            &obj.section_headers,
340            ctx
341        ));
342        obj.verdef = return_partial_on_err!(elf::symver::VerdefSection::parse(
343            data,
344            &obj.section_headers,
345            ctx
346        ));
347        obj.verneed = return_partial_on_err!(elf::symver::VerneedSection::parse(
348            data,
349            &obj.section_headers,
350            ctx
351        ));
352
353        Ok(ElfObject {
354            elf: obj,
355            data,
356            is_malformed: false,
357            max_decompressed_section_size: opts.max_decompressed_section_size,
358        })
359    }
360
361    /// Tries to parse an ELF object from the given slice, with default options.
362    ///
363    /// Will return a partially parsed ELF object
364    /// if at least the program and section headers can be parsed.
365    pub fn parse(data: &'data [u8]) -> Result<Self, ElfError> {
366        Self::parse_with_opts(data, ParseObjectOptions::default())
367    }
368    /// The container file format, which is always `FileFormat::Elf`.
369    pub fn file_format(&self) -> FileFormat {
370        FileFormat::Elf
371    }
372
373    /// The code identifier of this object.
374    ///
375    /// As opposed to Mach-O, ELF does not specify a unique ID for object files in
376    /// its header. Compilers and linkers usually add either `SHT_NOTE` sections or
377    /// `PT_NOTE` program header elements for this purpose.
378    pub fn code_id(&self) -> Option<CodeId> {
379        self.find_build_id()
380            .filter(|slice| !slice.is_empty())
381            .map(CodeId::from_binary)
382    }
383
384    /// The debug link of this object.
385    ///
386    /// The debug link is an alternative to the build id for specifying the location
387    /// of an ELF's debugging information. It refers to a filename that can be used
388    /// to build various debug paths where debuggers can look for the debug files.
389    ///
390    /// # Errors
391    ///
392    /// - None if there is no gnu_debuglink section
393    /// - DebugLinkError if this section exists, but is malformed
394    pub fn debug_link(&self) -> Result<Option<DebugLink<'_>>, DebugLinkError<'_>> {
395        self.section("gnu_debuglink")
396            .map(|section| DebugLink::from_data(section.data, self.endianity()))
397            .transpose()
398    }
399
400    /// The binary's soname, if any.
401    pub fn name(&self) -> Option<&'data str> {
402        self.elf.soname
403    }
404
405    /// The debug information identifier of an ELF object.
406    ///
407    /// The debug identifier is a rehash of the first 16 bytes of the `code_id`, if
408    /// present. Otherwise, this function will hash the first page of the `.text`
409    /// section (program code) to synthesize a unique ID. This is likely not a valid
410    /// UUID since was generated off a hash value.
411    ///
412    /// If all of the above fails, the identifier will be an empty `DebugId`.
413    pub fn debug_id(&self) -> DebugId {
414        // Search for a GNU build identifier node in the program headers or the
415        // build ID section. If errors occur during this process, fall through
416        // silently to the next method.
417        if let Some(identifier) = self.find_build_id() {
418            return self.compute_debug_id(identifier);
419        }
420
421        // We were not able to locate the build ID, so fall back to hashing the
422        // first page of the ".text" (program code) section. This algorithm XORs
423        // 16-byte chunks directly into a UUID buffer.
424        if let Some(section) = self.raw_section("text") {
425            let mut hash = [0; UUID_SIZE];
426            for i in 0..std::cmp::min(section.data.len(), PAGE_SIZE) {
427                hash[i % UUID_SIZE] ^= section.data[i];
428            }
429
430            return self.compute_debug_id(&hash);
431        }
432
433        DebugId::default()
434    }
435
436    /// The CPU architecture of this object, as specified in the ELF header.
437    pub fn arch(&self) -> Arch {
438        match self.elf.header.e_machine {
439            goblin::elf::header::EM_386 => Arch::X86,
440            goblin::elf::header::EM_X86_64 => Arch::Amd64,
441            goblin::elf::header::EM_AARCH64 => Arch::Arm64,
442            // NOTE: This could actually be any of the other 32bit ARMs. Since we don't need this
443            // information, we use the generic Arch::Arm. By reading CPU_arch and FP_arch attributes
444            // from the SHT_ARM_ATTRIBUTES section it would be possible to distinguish the ARM arch
445            // version and infer hard/soft FP.
446            //
447            // For more information, see:
448            // http://code.metager.de/source/xref/gnu/src/binutils/readelf.c#11282
449            // https://stackoverflow.com/a/20556156/4228225
450            goblin::elf::header::EM_ARM => Arch::Arm,
451            goblin::elf::header::EM_PPC => Arch::Ppc,
452            goblin::elf::header::EM_PPC64 => Arch::Ppc64,
453            goblin::elf::header::EM_MIPS | goblin::elf::header::EM_MIPS_RS3_LE => {
454                if self.elf.header.e_flags & MIPS_64_FLAGS != 0 {
455                    Arch::Mips64
456                } else {
457                    Arch::Mips
458                }
459            }
460            _ => Arch::Unknown,
461        }
462    }
463
464    /// The kind of this object, as specified in the ELF header.
465    pub fn kind(&self) -> ObjectKind {
466        const ET_SCE_DYNEXEC: u16 = 0xfe10;
467        const ET_SCE_DYNAMIC: u16 = 0xfe18;
468
469        let kind = match self.elf.header.e_type {
470            goblin::elf::header::ET_NONE => ObjectKind::None,
471            goblin::elf::header::ET_REL => ObjectKind::Relocatable,
472            goblin::elf::header::ET_EXEC => ObjectKind::Executable,
473            goblin::elf::header::ET_DYN => ObjectKind::Library,
474            goblin::elf::header::ET_CORE => ObjectKind::Dump,
475            ET_SCE_DYNEXEC => ObjectKind::Executable,
476            ET_SCE_DYNAMIC => ObjectKind::Library,
477            _ => ObjectKind::Other,
478        };
479
480        // When stripping debug information into a separate file with objcopy,
481        // the eh_type field still reads ET_EXEC. However, the interpreter is
482        // removed. Since an executable without interpreter does not make any
483        // sense, we assume ``Debug`` in this case.
484        if kind == ObjectKind::Executable && self.elf.interpreter.is_none() {
485            return ObjectKind::Debug;
486        }
487
488        // The same happens for libraries. However, here we can only check for
489        // a missing text section. If this still yields too many false positivies,
490        // we will have to check either the size or offset of that section in
491        // the future.
492        if kind == ObjectKind::Library && self.raw_section("text").is_none() {
493            return ObjectKind::Debug;
494        }
495
496        kind
497    }
498
499    /// The address at which the image prefers to be loaded into memory.
500    ///
501    /// ELF files store all internal addresses as if it was loaded at that address. When the image
502    /// is actually loaded, that spot might already be taken by other images and so it must be
503    /// relocated to a new address. At runtime, a relocation table manages the arithmetics behind
504    /// this.
505    ///
506    /// Addresses used in `symbols` or `debug_session` have already been rebased relative to that
507    /// load address, so that the caller only has to deal with addresses relative to the actual
508    /// start of the image.
509    pub fn load_address(&self) -> u64 {
510        // For non-PIC executables (e_type == ET_EXEC), the load address is
511        // the start address of the first PT_LOAD segment.  (ELF requires
512        // the segments to be sorted by load address.)  For PIC executables
513        // and dynamic libraries (e_type == ET_DYN), this address will
514        // normally be zero.
515        for phdr in &self.elf.program_headers {
516            if phdr.p_type == elf::program_header::PT_LOAD {
517                return phdr.p_vaddr;
518            }
519        }
520
521        0
522    }
523
524    /// Determines whether this object exposes a public symbol table.
525    pub fn has_symbols(&self) -> bool {
526        !self.elf.syms.is_empty() || !self.elf.dynsyms.is_empty()
527    }
528
529    /// Returns an iterator over symbols in the public symbol table.
530    pub fn symbols(&self) -> ElfSymbolIterator<'data, '_> {
531        ElfSymbolIterator {
532            symbols: self.elf.syms.iter(),
533            strtab: &self.elf.strtab,
534            dynamic_symbols: self.elf.dynsyms.iter(),
535            dynamic_strtab: &self.elf.dynstrtab,
536            sections: &self.elf.section_headers,
537            load_addr: self.load_address(),
538        }
539    }
540
541    /// Returns an ordered map of symbols in the symbol table.
542    pub fn symbol_map(&self) -> SymbolMap<'data> {
543        self.symbols().collect()
544    }
545
546    /// Determines whether this object contains debug information.
547    pub fn has_debug_info(&self) -> bool {
548        self.has_section("debug_info")
549    }
550
551    /// Constructs a debugging session.
552    ///
553    /// A debugging session loads certain information from the object file and creates caches for
554    /// efficient access to various records in the debug information. Since this can be quite a
555    /// costly process, try to reuse the debugging session as long as possible.
556    ///
557    /// ELF files generally use DWARF debugging information, which is also used by MachO containers
558    /// on macOS.
559    ///
560    /// Constructing this session will also work if the object does not contain debugging
561    /// information, in which case the session will be a no-op. This can be checked via
562    /// [`has_debug_info`](struct.ElfObject.html#method.has_debug_info).
563    pub fn debug_session(&self) -> Result<DwarfDebugSession<'data>, DwarfError> {
564        let symbols = self.symbol_map();
565        DwarfDebugSession::parse(self, symbols, self.load_address() as i64, self.kind())
566    }
567
568    /// Determines whether this object contains stack unwinding information.
569    pub fn has_unwind_info(&self) -> bool {
570        self.has_section("eh_frame") || self.has_section("debug_frame")
571    }
572
573    /// Determines whether this object contains embedded source.
574    pub fn has_sources(&self) -> bool {
575        false
576    }
577
578    /// Determines whether this object is malformed and was only partially parsed
579    pub fn is_malformed(&self) -> bool {
580        self.is_malformed
581    }
582
583    /// Returns the raw data of the ELF file.
584    pub fn data(&self) -> &'data [u8] {
585        self.data
586    }
587
588    /// Decompresses the given compressed section data, if supported.
589    fn decompress_section(&self, section_data: &[u8]) -> Option<Vec<u8>> {
590        enum CompressionType {
591            Zlib,
592            Zstd,
593        }
594
595        let (ty, size, compressed) = if section_data.starts_with(b"ZLIB") {
596            // The GNU compression header is a 4 byte magic "ZLIB", followed by an 8-byte big-endian
597            // size prefix of the decompressed data. This adds up to 12 bytes of GNU header.
598            if section_data.len() < 12 {
599                return None;
600            }
601
602            let mut size_bytes = [0; 8];
603            size_bytes.copy_from_slice(&section_data[4..12]);
604
605            (
606                CompressionType::Zlib,
607                u64::from_be_bytes(size_bytes) as usize,
608                &section_data[12..],
609            )
610        } else {
611            let container = self.elf.header.container().ok()?;
612            let endianness = self.elf.header.endianness().ok()?;
613            let context = Ctx::new(container, endianness);
614
615            let compression = CompressionHeader::parse(section_data, 0, context).ok()?;
616            let ty = match compression.ch_type {
617                ELFCOMPRESS_ZLIB => CompressionType::Zlib,
618                ELFCOMPRESS_ZSTD => CompressionType::Zstd,
619                _ => {
620                    return None;
621                }
622            };
623
624            let compressed = &section_data[CompressionHeader::size(context)..];
625            (ty, compression.ch_size as usize, compressed)
626        };
627
628        if size > self.max_decompressed_section_size.unwrap_or(usize::MAX) {
629            return None;
630        }
631
632        let decompressed = match ty {
633            CompressionType::Zlib => {
634                let mut decompressed = Vec::with_capacity(size);
635                Decompress::new(true)
636                    .decompress_vec(compressed, &mut decompressed, FlushDecompress::Finish)
637                    .ok()?;
638                decompressed
639            }
640            CompressionType::Zstd => zstd::bulk::decompress(compressed, size).ok()?,
641        };
642
643        Some(decompressed)
644    }
645
646    /// Locates and reads a section in an ELF binary.
647    fn find_section(&self, name: &str) -> Option<(bool, DwarfSection<'data>)> {
648        for header in &self.elf.section_headers {
649            // The section type is usually SHT_PROGBITS, but some compilers also use
650            // SHT_X86_64_UNWIND and SHT_MIPS_DWARF. We apply the same approach as elfutils,
651            // matching against SHT_NOBITS, instead.
652            if header.sh_type == elf::section_header::SHT_NOBITS {
653                continue;
654            }
655
656            if let Some(section_name) = self.elf.shdr_strtab.get_at(header.sh_name) {
657                let offset = header.sh_offset as usize;
658                if offset == 0 {
659                    // We're defensive here. On darwin, dsymutil leaves phantom section headers
660                    // while stripping their data from the file by setting their offset to 0. We
661                    // know that no section can start at an absolute file offset of zero, so we can
662                    // safely skip them in case similar things happen on linux.
663                    continue;
664                }
665
666                if section_name.is_empty() {
667                    continue;
668                }
669
670                // Before SHF_COMPRESSED was a thing, compressed sections were prefixed with `.z`.
671                // Support this as an override to the flag.
672                let (compressed, section_name) = match section_name.strip_prefix(".z") {
673                    Some(name) => (true, name),
674                    None => (header.sh_flags & SHF_COMPRESSED != 0, &section_name[1..]),
675                };
676
677                if section_name != name {
678                    continue;
679                }
680
681                let size = header.sh_size as usize;
682                let data = &self.data[offset..][..size];
683                let section = DwarfSection {
684                    data: Cow::Borrowed(data),
685                    address: header.sh_addr,
686                    offset: header.sh_offset,
687                    align: header.sh_addralign,
688                };
689
690                return Some((compressed, section));
691            }
692        }
693
694        None
695    }
696
697    /// Searches for a GNU build identifier node in an ELF file.
698    ///
699    /// Depending on the compiler and linker, the build ID can be declared in a
700    /// PT_NOTE program header entry, the ".note.gnu.build-id" section, or even
701    /// both.
702    fn find_build_id(&self) -> Option<&'data [u8]> {
703        // First, search the note program headers (PT_NOTE) for a NT_GNU_BUILD_ID.
704        // We swallow all errors during this process and simply fall back to the
705        // next method below.
706        if let Some(mut notes) = self.elf.iter_note_headers(self.data) {
707            while let Some(Ok(note)) = notes.next() {
708                if note.n_type == elf::note::NT_GNU_BUILD_ID {
709                    return Some(note.desc);
710                }
711            }
712        }
713
714        // Some old linkers or compilers might not output the above PT_NOTE headers.
715        // In that case, search for a note section (SHT_NOTE). We are looking for a
716        // note within the ".note.gnu.build-id" section. Again, swallow all errors
717        // and fall through if reading the section is not possible.
718        if let Some(mut notes) = self
719            .elf
720            .iter_note_sections(self.data, Some(".note.gnu.build-id"))
721        {
722            while let Some(Ok(note)) = notes.next() {
723                if note.n_type == elf::note::NT_GNU_BUILD_ID {
724                    return Some(note.desc);
725                }
726            }
727        }
728
729        const PT_SCE_DYNLIBDATA: u32 = 0x61000000;
730
731        for ph in &self.elf.program_headers {
732            if ph.p_type == PT_SCE_DYNLIBDATA && ph.p_filesz >= 20 {
733                let offset = ph.p_offset as usize;
734                return self.data.get(offset..offset.saturating_add(20));
735            }
736        }
737
738        None
739    }
740
741    /// Converts an ELF object identifier into a `DebugId`.
742    ///
743    /// The identifier data is first truncated or extended to match 16 byte size of
744    /// Uuids. If the data is declared in little endian, the first three Uuid fields
745    /// are flipped to match the big endian expected by the breakpad processor.
746    ///
747    /// The `DebugId::appendix` field is always `0` for ELF.
748    fn compute_debug_id(&self, identifier: &[u8]) -> DebugId {
749        // Make sure that we have exactly UUID_SIZE bytes available
750        let mut data = [0; UUID_SIZE];
751        let len = std::cmp::min(identifier.len(), UUID_SIZE);
752        data[0..len].copy_from_slice(&identifier[0..len]);
753
754        if self.elf.little_endian {
755            // The file ELF file targets a little endian architecture. Convert to
756            // network byte order (big endian) to match the Breakpad processor's
757            // expectations. For big endian object files, this is not needed.
758            data[0..4].reverse(); // uuid field 1
759            data[4..6].reverse(); // uuid field 2
760            data[6..8].reverse(); // uuid field 3
761        }
762
763        Uuid::from_slice(&data)
764            .map(DebugId::from_uuid)
765            .unwrap_or_default()
766    }
767}
768
769impl fmt::Debug for ElfObject<'_> {
770    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
771        f.debug_struct("ElfObject")
772            .field("code_id", &self.code_id())
773            .field("debug_id", &self.debug_id())
774            .field("arch", &self.arch())
775            .field("kind", &self.kind())
776            .field("load_address", &format_args!("{:#x}", self.load_address()))
777            .field("has_symbols", &self.has_symbols())
778            .field("has_debug_info", &self.has_debug_info())
779            .field("has_unwind_info", &self.has_unwind_info())
780            .field("is_malformed", &self.is_malformed())
781            .finish()
782    }
783}
784
785impl<'slf, 'data: 'slf> AsSelf<'slf> for ElfObject<'data> {
786    type Ref = ElfObject<'slf>;
787
788    fn as_self(&'slf self) -> &'slf Self::Ref {
789        self
790    }
791}
792
793impl<'data> Parse<'data> for ElfObject<'data> {
794    type Error = ElfError;
795
796    fn test(data: &[u8]) -> bool {
797        Self::test(data)
798    }
799
800    fn parse_with_opts(data: &'data [u8], opts: ParseObjectOptions) -> Result<Self, ElfError> {
801        Self::parse_with_opts(data, opts)
802    }
803}
804
805impl<'data: 'object, 'object> ObjectLike<'data, 'object> for ElfObject<'data> {
806    type Error = DwarfError;
807    type Session = DwarfDebugSession<'data>;
808    type SymbolIterator = ElfSymbolIterator<'data, 'object>;
809
810    fn file_format(&self) -> FileFormat {
811        self.file_format()
812    }
813
814    fn code_id(&self) -> Option<CodeId> {
815        self.code_id()
816    }
817
818    fn debug_id(&self) -> DebugId {
819        self.debug_id()
820    }
821
822    fn arch(&self) -> Arch {
823        self.arch()
824    }
825
826    fn kind(&self) -> ObjectKind {
827        self.kind()
828    }
829
830    fn load_address(&self) -> u64 {
831        self.load_address()
832    }
833
834    fn has_symbols(&self) -> bool {
835        self.has_symbols()
836    }
837
838    fn symbols(&'object self) -> Self::SymbolIterator {
839        self.symbols()
840    }
841
842    fn symbol_map(&self) -> SymbolMap<'data> {
843        self.symbol_map()
844    }
845
846    fn has_debug_info(&self) -> bool {
847        self.has_debug_info()
848    }
849
850    fn debug_session(&self) -> Result<Self::Session, Self::Error> {
851        self.debug_session()
852    }
853
854    fn has_unwind_info(&self) -> bool {
855        self.has_unwind_info()
856    }
857
858    fn has_sources(&self) -> bool {
859        self.has_sources()
860    }
861
862    fn is_malformed(&self) -> bool {
863        self.is_malformed()
864    }
865}
866
867impl<'data> Dwarf<'data> for ElfObject<'data> {
868    fn endianity(&self) -> Endian {
869        if self.elf.little_endian {
870            Endian::Little
871        } else {
872            Endian::Big
873        }
874    }
875
876    fn raw_section(&self, name: &str) -> Option<DwarfSection<'data>> {
877        let (_, section) = self.find_section(name)?;
878        Some(section)
879    }
880
881    fn section(&self, name: &str) -> Option<DwarfSection<'data>> {
882        let (compressed, mut section) = self.find_section(name)?;
883
884        if compressed {
885            let decompressed = self.decompress_section(&section.data)?;
886            section.data = Cow::Owned(decompressed);
887        }
888
889        Some(section)
890    }
891}
892
893/// An iterator over symbols in the ELF file.
894///
895/// Returned by [`ElfObject::symbols`](struct.ElfObject.html#method.symbols).
896pub struct ElfSymbolIterator<'data, 'object> {
897    symbols: elf::sym::SymIterator<'data>,
898    strtab: &'object strtab::Strtab<'data>,
899    dynamic_symbols: elf::sym::SymIterator<'data>,
900    dynamic_strtab: &'object strtab::Strtab<'data>,
901    sections: &'object [elf::SectionHeader],
902    load_addr: u64,
903}
904
905impl<'data> Iterator for ElfSymbolIterator<'data, '_> {
906    type Item = Symbol<'data>;
907
908    fn next(&mut self) -> Option<Self::Item> {
909        fn get_symbols<'data>(
910            symbols: &mut SymIterator,
911            strtab: &Strtab<'data>,
912            load_addr: u64,
913            sections: &[SectionHeader],
914        ) -> Option<Symbol<'data>> {
915            for symbol in symbols {
916                // Only check for function symbols.
917                if symbol.st_type() != elf::sym::STT_FUNC {
918                    continue;
919                }
920
921                // Sanity check of the symbol address. Since we only intend to iterate over function
922                // symbols, they need to be mapped after the image's load address.
923                if symbol.st_value < load_addr {
924                    continue;
925                }
926
927                let section = match symbol.st_shndx {
928                    self::SHN_UNDEF => None,
929                    index => sections.get(index),
930                };
931
932                // We are only interested in symbols pointing into sections with executable flag.
933                if !section.is_some_and(|header| header.is_executable()) {
934                    continue;
935                }
936
937                let name = strtab.get_at(symbol.st_name).map(Cow::Borrowed);
938
939                return Some(Symbol {
940                    name,
941                    address: symbol.st_value - load_addr,
942                    size: symbol.st_size,
943                });
944            }
945
946            None
947        }
948
949        get_symbols(
950            &mut self.symbols,
951            self.strtab,
952            self.load_addr,
953            self.sections,
954        )
955        .or_else(|| {
956            get_symbols(
957                &mut self.dynamic_symbols,
958                self.dynamic_strtab,
959                self.load_addr,
960                self.sections,
961            )
962        })
963    }
964}
965
966/// Parsed debug link section.
967#[derive(Debug)]
968pub struct DebugLink<'data> {
969    filename: Cow<'data, CStr>,
970    crc: u32,
971}
972
973impl<'data> DebugLink<'data> {
974    /// Attempts to parse a debug link section from its data.
975    ///
976    /// The expected format for the section is:
977    ///
978    /// - A filename, with any leading directory components removed, followed by a zero byte,
979    /// - zero to three bytes of padding, as needed to reach the next four-byte boundary within the section, and
980    /// - a four-byte CRC checksum, stored in the same endianness used for the executable file itself.
981    ///
982    /// (from <https://sourceware.org/gdb/current/onlinedocs/gdb/Separate-Debug-Files.html#index-_002egnu_005fdebuglink-sections>)
983    ///
984    /// # Errors
985    ///
986    /// If the section data is malformed, in particular:
987    /// - No NUL byte delimiting the filename from the CRC
988    /// - Not enough space for the CRC checksum
989    pub fn from_data(
990        data: Cow<'data, [u8]>,
991        endianity: Endian,
992    ) -> Result<Self, DebugLinkError<'data>> {
993        match data {
994            Cow::Owned(data) => {
995                let (filename, crc) = Self::from_borrowed_data(&data, endianity)
996                    .map(|(filename, crc)| (filename.to_owned(), crc))
997                    .map_err(|kind| DebugLinkError {
998                        kind,
999                        data: Cow::Owned(data),
1000                    })?;
1001                Ok(Self {
1002                    filename: Cow::Owned(filename),
1003                    crc,
1004                })
1005            }
1006            Cow::Borrowed(data) => {
1007                let (filename, crc) =
1008                    Self::from_borrowed_data(data, endianity).map_err(|kind| DebugLinkError {
1009                        kind,
1010                        data: Cow::Borrowed(data),
1011                    })?;
1012                Ok(Self {
1013                    filename: Cow::Borrowed(filename),
1014                    crc,
1015                })
1016            }
1017        }
1018    }
1019
1020    fn from_borrowed_data(
1021        data: &[u8],
1022        endianity: Endian,
1023    ) -> Result<(&CStr, u32), DebugLinkErrorKind> {
1024        let nul_pos = data
1025            .iter()
1026            .position(|byte| *byte == 0)
1027            .ok_or(DebugLinkErrorKind::MissingNul)?;
1028
1029        if nul_pos + 1 == data.len() {
1030            return Err(DebugLinkErrorKind::MissingCrc {
1031                filename_len_with_nul: nul_pos + 1,
1032            });
1033        }
1034
1035        let filename = &data[..nul_pos + 1];
1036
1037        // let's be liberal and assume that the padding is correct and all 0s,
1038        // and just check that we have enough remaining length for the CRC.
1039        let crc = data
1040            .get(nul_pos + 1..)
1041            .and_then(|crc| crc.get(crc.len() - 4..))
1042            .ok_or(DebugLinkErrorKind::MissingCrc {
1043                filename_len_with_nul: filename.len(),
1044            })?;
1045
1046        let crc: [u8; 4] = crc.try_into().map_err(|_| DebugLinkErrorKind::MissingCrc {
1047            filename_len_with_nul: filename.len(),
1048        })?;
1049
1050        let crc = match endianity {
1051            Endian::Little => u32::from_le_bytes(crc),
1052            Endian::Big => u32::from_be_bytes(crc),
1053        };
1054
1055        let filename =
1056            CStr::from_bytes_with_nul(filename).map_err(|_| DebugLinkErrorKind::MissingNul)?;
1057
1058        Ok((filename, crc))
1059    }
1060
1061    /// The debug link filename
1062    pub fn filename(&self) -> &CStr {
1063        &self.filename
1064    }
1065
1066    /// The CRC checksum associated with the debug link file
1067    pub fn crc(&self) -> u32 {
1068        self.crc
1069    }
1070}
1071
1072/// Kind of errors that can occur while parsing a debug link section.
1073#[derive(Debug, Error)]
1074pub enum DebugLinkErrorKind {
1075    /// No NUL byte delimiting the filename from the CRC
1076    #[error("missing NUL character")]
1077    MissingNul,
1078    /// Not enough space in the section data for the CRC checksum
1079    #[error("missing CRC")]
1080    MissingCrc {
1081        /// Size of the filename part of the section including the NUL character
1082        filename_len_with_nul: usize,
1083    },
1084}
1085
1086/// Errors that can occur while parsing a debug link section.
1087#[derive(Debug, Error)]
1088#[error("could not parse debug link section")]
1089pub struct DebugLinkError<'data> {
1090    #[source]
1091    /// The kind of error that occurred.
1092    pub kind: DebugLinkErrorKind,
1093    /// The original data of the debug section.
1094    pub data: Cow<'data, [u8]>,
1095}