Skip to main content

symbolic_debuginfo/
elf.rs

1//! Support for the Executable and Linkable Format, used on Linux.
2
3use std::borrow::Cow;
4use std::error::Error;
5use std::ffi::CStr;
6use std::fmt;
7
8use core::cmp;
9use flate2::{Decompress, FlushDecompress};
10use goblin::elf::compression_header::{CompressionHeader, ELFCOMPRESS_ZLIB};
11use goblin::elf::SectionHeader;
12use goblin::elf64::sym::SymIterator;
13use goblin::strtab::Strtab;
14use goblin::{
15    container::{Container, Ctx},
16    elf, strtab,
17};
18use scroll::Pread;
19use thiserror::Error;
20
21use symbolic_common::{Arch, AsSelf, CodeId, DebugId, Uuid};
22
23use crate::base::*;
24use crate::dwarf::{Dwarf, DwarfDebugSession, DwarfError, DwarfSection, Endian};
25
26const UUID_SIZE: usize = 16;
27const PAGE_SIZE: usize = 4096;
28
29const SHN_UNDEF: usize = elf::section_header::SHN_UNDEF as usize;
30const SHF_COMPRESSED: u64 = elf::section_header::SHF_COMPRESSED as u64;
31
32/// The ELF compression header type for `zstd`, as that is not (yet) exported by `goblin`.
33pub const ELFCOMPRESS_ZSTD: u32 = 2;
34
35/// This file follows the first MIPS 32 bit ABI
36#[allow(unused)]
37const EF_MIPS_ABI_O32: u32 = 0x0000_1000;
38/// O32 ABI extended for 64-bit architecture.
39const EF_MIPS_ABI_O64: u32 = 0x0000_2000;
40/// EABI in 32 bit mode.
41#[allow(unused)]
42const EF_MIPS_ABI_EABI32: u32 = 0x0000_3000;
43/// EABI in 64 bit mode.
44const EF_MIPS_ABI_EABI64: u32 = 0x0000_4000;
45
46/// Any flag value that might indicate 64-bit MIPS.
47const MIPS_64_FLAGS: u32 = EF_MIPS_ABI_O64 | EF_MIPS_ABI_EABI64;
48
49/// An error when dealing with [`ElfObject`](struct.ElfObject.html).
50#[derive(Debug, Error)]
51#[error("invalid ELF file")]
52pub struct ElfError {
53    #[source]
54    source: Option<Box<dyn Error + Send + Sync + 'static>>,
55}
56
57impl ElfError {
58    /// Creates a new ELF error from an arbitrary error payload.
59    fn new<E>(source: E) -> Self
60    where
61        E: Into<Box<dyn Error + Send + Sync>>,
62    {
63        let source = Some(source.into());
64        Self { source }
65    }
66}
67
68/// Executable and Linkable Format, used for executables and libraries on Linux.
69pub struct ElfObject<'data> {
70    elf: elf::Elf<'data>,
71    data: &'data [u8],
72    is_malformed: bool,
73}
74
75impl<'data> ElfObject<'data> {
76    /// Tests whether the buffer could contain an ELF object.
77    pub fn test(data: &[u8]) -> bool {
78        data.get(0..elf::header::SELFMAG)
79            .is_some_and(|data| data == elf::header::ELFMAG)
80    }
81
82    // Pulled from https://github.com/m4b/goblin/blob/master/src/elf/mod.rs#L393-L424 as it
83    // currently isn't public, but we need this to parse an ELF.
84    fn gnu_hash_len(bytes: &[u8], offset: usize, ctx: Ctx) -> goblin::error::Result<usize> {
85        let buckets_num = bytes.pread_with::<u32>(offset, ctx.le)? as usize;
86        let min_chain = bytes.pread_with::<u32>(offset + 4, ctx.le)? as usize;
87        let bloom_size = bytes.pread_with::<u32>(offset + 8, ctx.le)? as usize;
88        // We could handle min_chain==0 if we really had to, but it shouldn't happen.
89        if buckets_num == 0 || min_chain == 0 || bloom_size == 0 {
90            return Err(goblin::error::Error::Malformed(format!(
91                "Invalid DT_GNU_HASH: buckets_num={buckets_num} min_chain={min_chain} bloom_size={bloom_size}"
92            )));
93        }
94        // Find the last bucket.
95        let buckets_offset = offset + 16 + bloom_size * if ctx.container.is_big() { 8 } else { 4 };
96        let mut max_chain = 0;
97        for bucket in 0..buckets_num {
98            let chain = bytes.pread_with::<u32>(buckets_offset + bucket * 4, ctx.le)? as usize;
99            if max_chain < chain {
100                max_chain = chain;
101            }
102        }
103        if max_chain < min_chain {
104            return Ok(0);
105        }
106        // Find the last chain within the bucket.
107        let mut chain_offset = buckets_offset + buckets_num * 4 + (max_chain - min_chain) * 4;
108        loop {
109            let hash = bytes.pread_with::<u32>(chain_offset, ctx.le)?;
110            max_chain += 1;
111            chain_offset += 4;
112            if hash & 1 != 0 {
113                return Ok(max_chain);
114            }
115        }
116    }
117
118    // Pulled from https://github.com/m4b/goblin/blob/master/src/elf/mod.rs#L426-L434 as it
119    // currently isn't public, but we need this to parse an ELF.
120    fn hash_len(
121        bytes: &[u8],
122        offset: usize,
123        machine: u16,
124        ctx: Ctx,
125    ) -> goblin::error::Result<usize> {
126        // Based on readelf code.
127        let nchain = if (machine == elf::header::EM_FAKE_ALPHA || machine == elf::header::EM_S390)
128            && ctx.container.is_big()
129        {
130            bytes.pread_with::<u64>(offset.saturating_add(4), ctx.le)? as usize
131        } else {
132            bytes.pread_with::<u32>(offset.saturating_add(4), ctx.le)? as usize
133        };
134        Ok(nchain)
135    }
136
137    /// Tries to parse an ELF object from the given slice. Will return a partially parsed ELF object
138    /// if at least the program and section headers can be parsed.
139    pub fn parse(data: &'data [u8]) -> Result<Self, ElfError> {
140        let header =
141            elf::Elf::parse_header(data).map_err(|_| ElfError::new("ELF header unreadable"))?;
142        // dummy Elf with only header
143        let mut obj =
144            elf::Elf::lazy_parse(header).map_err(|_| ElfError::new("cannot parse ELF header"))?;
145
146        let ctx = Ctx {
147            container: if obj.is_64 {
148                Container::Big
149            } else {
150                Container::Little
151            },
152            le: if obj.little_endian {
153                scroll::Endian::Little
154            } else {
155                scroll::Endian::Big
156            },
157        };
158
159        macro_rules! return_partial_on_err {
160            ($parse_func:expr) => {
161                if let Ok(expected) = $parse_func {
162                    expected
163                } else {
164                    // does this snapshot?
165                    return Ok(ElfObject {
166                        elf: obj,
167                        data,
168                        is_malformed: true,
169                    });
170                }
171            };
172        }
173
174        obj.program_headers =
175            elf::ProgramHeader::parse(data, header.e_phoff as usize, header.e_phnum as usize, ctx)
176                .map_err(|_| ElfError::new("unable to parse program headers"))?;
177
178        for ph in &obj.program_headers {
179            if ph.p_type == elf::program_header::PT_INTERP && ph.p_filesz != 0 {
180                let count = (ph.p_filesz - 1) as usize;
181                let offset = ph.p_offset as usize;
182                obj.interpreter = data
183                    .pread_with::<&str>(offset, ::scroll::ctx::StrCtx::Length(count))
184                    .ok();
185            }
186        }
187
188        obj.section_headers =
189            SectionHeader::parse(data, header.e_shoff as usize, header.e_shnum as usize, ctx)
190                .map_err(|_| ElfError::new("unable to parse section headers"))?;
191
192        let get_strtab = |section_headers: &[SectionHeader], section_idx: usize| {
193            if section_idx >= section_headers.len() {
194                // FIXME: warn! here
195                Ok(Strtab::default())
196            } else {
197                let shdr = &section_headers[section_idx];
198                shdr.check_size(data.len())?;
199                Strtab::parse(data, shdr.sh_offset as usize, shdr.sh_size as usize, 0x0)
200            }
201        };
202
203        let strtab_idx = header.e_shstrndx as usize;
204        obj.shdr_strtab = return_partial_on_err!(get_strtab(&obj.section_headers, strtab_idx));
205
206        obj.syms = elf::Symtab::default();
207        obj.strtab = Strtab::default();
208        for shdr in &obj.section_headers {
209            if shdr.sh_type == elf::section_header::SHT_SYMTAB {
210                let count = shdr.sh_size.checked_div(shdr.sh_entsize).unwrap_or(0);
211                obj.syms = return_partial_on_err!(elf::Symtab::parse(
212                    data,
213                    shdr.sh_offset as usize,
214                    count as usize,
215                    ctx
216                ));
217
218                obj.strtab =
219                    return_partial_on_err!(get_strtab(&obj.section_headers, shdr.sh_link as usize));
220            }
221        }
222
223        obj.soname = None;
224        obj.libraries = vec![];
225        obj.dynsyms = elf::Symtab::default();
226        obj.dynrelas = elf::RelocSection::default();
227        obj.dynrels = elf::RelocSection::default();
228        obj.pltrelocs = elf::RelocSection::default();
229        obj.dynstrtab = Strtab::default();
230        let dynamic = return_partial_on_err!(elf::Dynamic::parse(data, &obj.program_headers, ctx));
231        if let Some(ref dynamic) = dynamic {
232            let dyn_info = &dynamic.info;
233            obj.dynstrtab =
234                return_partial_on_err!(Strtab::parse(data, dyn_info.strtab, dyn_info.strsz, 0x0));
235
236            if dyn_info.soname != 0 {
237                // FIXME: warn! here
238                obj.soname = obj.dynstrtab.get_at(dyn_info.soname);
239            }
240            if dyn_info.needed_count > 0 {
241                obj.libraries = dynamic.get_libraries(&obj.dynstrtab);
242            }
243            // parse the dynamic relocations
244            obj.dynrelas = return_partial_on_err!(elf::RelocSection::parse(
245                data,
246                dyn_info.rela,
247                dyn_info.relasz,
248                true,
249                ctx
250            ));
251            obj.dynrels = return_partial_on_err!(elf::RelocSection::parse(
252                data,
253                dyn_info.rel,
254                dyn_info.relsz,
255                false,
256                ctx
257            ));
258            let is_rela = dyn_info.pltrel == elf::dynamic::DT_RELA;
259            obj.pltrelocs = return_partial_on_err!(elf::RelocSection::parse(
260                data,
261                dyn_info.jmprel,
262                dyn_info.pltrelsz,
263                is_rela,
264                ctx
265            ));
266
267            let mut num_syms = if let Some(gnu_hash) = dyn_info.gnu_hash {
268                return_partial_on_err!(ElfObject::gnu_hash_len(data, gnu_hash as usize, ctx))
269            } else if let Some(hash) = dyn_info.hash {
270                return_partial_on_err!(ElfObject::hash_len(
271                    data,
272                    hash as usize,
273                    header.e_machine,
274                    ctx
275                ))
276            } else {
277                0
278            };
279            let max_reloc_sym = obj
280                .dynrelas
281                .iter()
282                .chain(obj.dynrels.iter())
283                .chain(obj.pltrelocs.iter())
284                .fold(0, |num, reloc| cmp::max(num, reloc.r_sym));
285            if max_reloc_sym != 0 {
286                num_syms = cmp::max(num_syms, max_reloc_sym + 1);
287            }
288
289            obj.dynsyms =
290                return_partial_on_err!(elf::Symtab::parse(data, dyn_info.symtab, num_syms, ctx));
291        }
292
293        // If the dynamic symbol table is empty, try finding a SHT_DYNSYM section in the section headers.
294        // See https://refspecs.linuxfoundation.org/LSB_2.1.0/LSB-Core-generic/LSB-Core-generic/elftypes.html:
295        //
296        // > This section holds a minimal set of symbols adequate for dynamic linking. See also SHT_SYMTAB. Currently, an object file may have either a section of SHT_SYMTAB type or a section of SHT_DYNSYM type, but not both.
297        if obj.dynsyms.is_empty() {
298            if let Some(shdr) = obj
299                .section_headers
300                .iter()
301                .find(|h| h.sh_type == elf::section_header::SHT_DYNSYM)
302            {
303                let count = shdr.sh_size.checked_div(shdr.sh_entsize).unwrap_or(0);
304                obj.dynsyms = return_partial_on_err!(elf::Symtab::parse(
305                    data,
306                    shdr.sh_offset as usize,
307                    count as usize,
308                    ctx
309                ));
310
311                obj.dynstrtab =
312                    return_partial_on_err!(get_strtab(&obj.section_headers, shdr.sh_link as usize));
313            }
314        }
315
316        obj.shdr_relocs = vec![];
317        for (idx, section) in obj.section_headers.iter().enumerate() {
318            let is_rela = section.sh_type == elf::section_header::SHT_RELA;
319            if is_rela || section.sh_type == elf::section_header::SHT_REL {
320                return_partial_on_err!(section.check_size(data.len()));
321                let sh_relocs = return_partial_on_err!(elf::RelocSection::parse(
322                    data,
323                    section.sh_offset as usize,
324                    section.sh_size as usize,
325                    is_rela,
326                    ctx,
327                ));
328                obj.shdr_relocs.push((idx, sh_relocs));
329            }
330        }
331
332        obj.versym = return_partial_on_err!(elf::symver::VersymSection::parse(
333            data,
334            &obj.section_headers,
335            ctx
336        ));
337        obj.verdef = return_partial_on_err!(elf::symver::VerdefSection::parse(
338            data,
339            &obj.section_headers,
340            ctx
341        ));
342        obj.verneed = return_partial_on_err!(elf::symver::VerneedSection::parse(
343            data,
344            &obj.section_headers,
345            ctx
346        ));
347
348        Ok(ElfObject {
349            elf: obj,
350            data,
351            is_malformed: false,
352        })
353    }
354
355    /// The container file format, which is always `FileFormat::Elf`.
356    pub fn file_format(&self) -> FileFormat {
357        FileFormat::Elf
358    }
359
360    /// The code identifier of this object.
361    ///
362    /// As opposed to Mach-O, ELF does not specify a unique ID for object files in
363    /// its header. Compilers and linkers usually add either `SHT_NOTE` sections or
364    /// `PT_NOTE` program header elements for this purpose.
365    pub fn code_id(&self) -> Option<CodeId> {
366        self.find_build_id()
367            .filter(|slice| !slice.is_empty())
368            .map(CodeId::from_binary)
369    }
370
371    /// The debug link of this object.
372    ///
373    /// The debug link is an alternative to the build id for specifying the location
374    /// of an ELF's debugging information. It refers to a filename that can be used
375    /// to build various debug paths where debuggers can look for the debug files.
376    ///
377    /// # Errors
378    ///
379    /// - None if there is no gnu_debuglink section
380    /// - DebugLinkError if this section exists, but is malformed
381    pub fn debug_link(&self) -> Result<Option<DebugLink<'_>>, DebugLinkError<'_>> {
382        self.section("gnu_debuglink")
383            .map(|section| DebugLink::from_data(section.data, self.endianity()))
384            .transpose()
385    }
386
387    /// The binary's soname, if any.
388    pub fn name(&self) -> Option<&'data str> {
389        self.elf.soname
390    }
391
392    /// The debug information identifier of an ELF object.
393    ///
394    /// The debug identifier is a rehash of the first 16 bytes of the `code_id`, if
395    /// present. Otherwise, this function will hash the first page of the `.text`
396    /// section (program code) to synthesize a unique ID. This is likely not a valid
397    /// UUID since was generated off a hash value.
398    ///
399    /// If all of the above fails, the identifier will be an empty `DebugId`.
400    pub fn debug_id(&self) -> DebugId {
401        // Search for a GNU build identifier node in the program headers or the
402        // build ID section. If errors occur during this process, fall through
403        // silently to the next method.
404        if let Some(identifier) = self.find_build_id() {
405            return self.compute_debug_id(identifier);
406        }
407
408        // We were not able to locate the build ID, so fall back to hashing the
409        // first page of the ".text" (program code) section. This algorithm XORs
410        // 16-byte chunks directly into a UUID buffer.
411        if let Some(section) = self.raw_section("text") {
412            let mut hash = [0; UUID_SIZE];
413            for i in 0..std::cmp::min(section.data.len(), PAGE_SIZE) {
414                hash[i % UUID_SIZE] ^= section.data[i];
415            }
416
417            return self.compute_debug_id(&hash);
418        }
419
420        DebugId::default()
421    }
422
423    /// The CPU architecture of this object, as specified in the ELF header.
424    pub fn arch(&self) -> Arch {
425        match self.elf.header.e_machine {
426            goblin::elf::header::EM_386 => Arch::X86,
427            goblin::elf::header::EM_X86_64 => Arch::Amd64,
428            goblin::elf::header::EM_AARCH64 => Arch::Arm64,
429            // NOTE: This could actually be any of the other 32bit ARMs. Since we don't need this
430            // information, we use the generic Arch::Arm. By reading CPU_arch and FP_arch attributes
431            // from the SHT_ARM_ATTRIBUTES section it would be possible to distinguish the ARM arch
432            // version and infer hard/soft FP.
433            //
434            // For more information, see:
435            // http://code.metager.de/source/xref/gnu/src/binutils/readelf.c#11282
436            // https://stackoverflow.com/a/20556156/4228225
437            goblin::elf::header::EM_ARM => Arch::Arm,
438            goblin::elf::header::EM_PPC => Arch::Ppc,
439            goblin::elf::header::EM_PPC64 => Arch::Ppc64,
440            goblin::elf::header::EM_MIPS | goblin::elf::header::EM_MIPS_RS3_LE => {
441                if self.elf.header.e_flags & MIPS_64_FLAGS != 0 {
442                    Arch::Mips64
443                } else {
444                    Arch::Mips
445                }
446            }
447            _ => Arch::Unknown,
448        }
449    }
450
451    /// The kind of this object, as specified in the ELF header.
452    pub fn kind(&self) -> ObjectKind {
453        const ET_SCE_DYNEXEC: u16 = 0xfe10;
454        const ET_SCE_DYNAMIC: u16 = 0xfe18;
455
456        let kind = match self.elf.header.e_type {
457            goblin::elf::header::ET_NONE => ObjectKind::None,
458            goblin::elf::header::ET_REL => ObjectKind::Relocatable,
459            goblin::elf::header::ET_EXEC => ObjectKind::Executable,
460            goblin::elf::header::ET_DYN => ObjectKind::Library,
461            goblin::elf::header::ET_CORE => ObjectKind::Dump,
462            ET_SCE_DYNEXEC => ObjectKind::Executable,
463            ET_SCE_DYNAMIC => ObjectKind::Library,
464            _ => ObjectKind::Other,
465        };
466
467        // When stripping debug information into a separate file with objcopy,
468        // the eh_type field still reads ET_EXEC. However, the interpreter is
469        // removed. Since an executable without interpreter does not make any
470        // sense, we assume ``Debug`` in this case.
471        if kind == ObjectKind::Executable && self.elf.interpreter.is_none() {
472            return ObjectKind::Debug;
473        }
474
475        // The same happens for libraries. However, here we can only check for
476        // a missing text section. If this still yields too many false positivies,
477        // we will have to check either the size or offset of that section in
478        // the future.
479        if kind == ObjectKind::Library && self.raw_section("text").is_none() {
480            return ObjectKind::Debug;
481        }
482
483        kind
484    }
485
486    /// The address at which the image prefers to be loaded into memory.
487    ///
488    /// ELF files store all internal addresses as if it was loaded at that address. When the image
489    /// is actually loaded, that spot might already be taken by other images and so it must be
490    /// relocated to a new address. At runtime, a relocation table manages the arithmetics behind
491    /// this.
492    ///
493    /// Addresses used in `symbols` or `debug_session` have already been rebased relative to that
494    /// load address, so that the caller only has to deal with addresses relative to the actual
495    /// start of the image.
496    pub fn load_address(&self) -> u64 {
497        // For non-PIC executables (e_type == ET_EXEC), the load address is
498        // the start address of the first PT_LOAD segment.  (ELF requires
499        // the segments to be sorted by load address.)  For PIC executables
500        // and dynamic libraries (e_type == ET_DYN), this address will
501        // normally be zero.
502        for phdr in &self.elf.program_headers {
503            if phdr.p_type == elf::program_header::PT_LOAD {
504                return phdr.p_vaddr;
505            }
506        }
507
508        0
509    }
510
511    /// Determines whether this object exposes a public symbol table.
512    pub fn has_symbols(&self) -> bool {
513        !self.elf.syms.is_empty() || !self.elf.dynsyms.is_empty()
514    }
515
516    /// Returns an iterator over symbols in the public symbol table.
517    pub fn symbols(&self) -> ElfSymbolIterator<'data, '_> {
518        ElfSymbolIterator {
519            symbols: self.elf.syms.iter(),
520            strtab: &self.elf.strtab,
521            dynamic_symbols: self.elf.dynsyms.iter(),
522            dynamic_strtab: &self.elf.dynstrtab,
523            sections: &self.elf.section_headers,
524            load_addr: self.load_address(),
525        }
526    }
527
528    /// Returns an ordered map of symbols in the symbol table.
529    pub fn symbol_map(&self) -> SymbolMap<'data> {
530        self.symbols().collect()
531    }
532
533    /// Determines whether this object contains debug information.
534    pub fn has_debug_info(&self) -> bool {
535        self.has_section("debug_info")
536    }
537
538    /// Constructs a debugging session.
539    ///
540    /// A debugging session loads certain information from the object file and creates caches for
541    /// efficient access to various records in the debug information. Since this can be quite a
542    /// costly process, try to reuse the debugging session as long as possible.
543    ///
544    /// ELF files generally use DWARF debugging information, which is also used by MachO containers
545    /// on macOS.
546    ///
547    /// Constructing this session will also work if the object does not contain debugging
548    /// information, in which case the session will be a no-op. This can be checked via
549    /// [`has_debug_info`](struct.ElfObject.html#method.has_debug_info).
550    pub fn debug_session(&self) -> Result<DwarfDebugSession<'data>, DwarfError> {
551        let symbols = self.symbol_map();
552        DwarfDebugSession::parse(self, symbols, self.load_address() as i64, self.kind())
553    }
554
555    /// Determines whether this object contains stack unwinding information.
556    pub fn has_unwind_info(&self) -> bool {
557        self.has_section("eh_frame") || self.has_section("debug_frame")
558    }
559
560    /// Determines whether this object contains embedded source.
561    pub fn has_sources(&self) -> bool {
562        false
563    }
564
565    /// Determines whether this object is malformed and was only partially parsed
566    pub fn is_malformed(&self) -> bool {
567        self.is_malformed
568    }
569
570    /// Returns the raw data of the ELF file.
571    pub fn data(&self) -> &'data [u8] {
572        self.data
573    }
574
575    /// Decompresses the given compressed section data, if supported.
576    fn decompress_section(&self, section_data: &[u8]) -> Option<Vec<u8>> {
577        enum CompressionType {
578            Zlib,
579            Zstd,
580        }
581
582        let (ty, size, compressed) = if section_data.starts_with(b"ZLIB") {
583            // The GNU compression header is a 4 byte magic "ZLIB", followed by an 8-byte big-endian
584            // size prefix of the decompressed data. This adds up to 12 bytes of GNU header.
585            if section_data.len() < 12 {
586                return None;
587            }
588
589            let mut size_bytes = [0; 8];
590            size_bytes.copy_from_slice(&section_data[4..12]);
591
592            (
593                CompressionType::Zlib,
594                u64::from_be_bytes(size_bytes),
595                &section_data[12..],
596            )
597        } else {
598            let container = self.elf.header.container().ok()?;
599            let endianness = self.elf.header.endianness().ok()?;
600            let context = Ctx::new(container, endianness);
601
602            let compression = CompressionHeader::parse(section_data, 0, context).ok()?;
603            let ty = match compression.ch_type {
604                ELFCOMPRESS_ZLIB => CompressionType::Zlib,
605                ELFCOMPRESS_ZSTD => CompressionType::Zstd,
606                _ => {
607                    return None;
608                }
609            };
610
611            let compressed = &section_data[CompressionHeader::size(context)..];
612            (ty, compression.ch_size, compressed)
613        };
614
615        let decompressed = match ty {
616            CompressionType::Zlib => {
617                let mut decompressed = Vec::with_capacity(size as usize);
618                Decompress::new(true)
619                    .decompress_vec(compressed, &mut decompressed, FlushDecompress::Finish)
620                    .ok()?;
621                decompressed
622            }
623            CompressionType::Zstd => zstd::bulk::decompress(compressed, size as usize).ok()?,
624        };
625
626        Some(decompressed)
627    }
628
629    /// Locates and reads a section in an ELF binary.
630    fn find_section(&self, name: &str) -> Option<(bool, DwarfSection<'data>)> {
631        for header in &self.elf.section_headers {
632            // The section type is usually SHT_PROGBITS, but some compilers also use
633            // SHT_X86_64_UNWIND and SHT_MIPS_DWARF. We apply the same approach as elfutils,
634            // matching against SHT_NOBITS, instead.
635            if header.sh_type == elf::section_header::SHT_NOBITS {
636                continue;
637            }
638
639            if let Some(section_name) = self.elf.shdr_strtab.get_at(header.sh_name) {
640                let offset = header.sh_offset as usize;
641                if offset == 0 {
642                    // We're defensive here. On darwin, dsymutil leaves phantom section headers
643                    // while stripping their data from the file by setting their offset to 0. We
644                    // know that no section can start at an absolute file offset of zero, so we can
645                    // safely skip them in case similar things happen on linux.
646                    continue;
647                }
648
649                if section_name.is_empty() {
650                    continue;
651                }
652
653                // Before SHF_COMPRESSED was a thing, compressed sections were prefixed with `.z`.
654                // Support this as an override to the flag.
655                let (compressed, section_name) = match section_name.strip_prefix(".z") {
656                    Some(name) => (true, name),
657                    None => (header.sh_flags & SHF_COMPRESSED != 0, &section_name[1..]),
658                };
659
660                if section_name != name {
661                    continue;
662                }
663
664                let size = header.sh_size as usize;
665                let data = &self.data[offset..][..size];
666                let section = DwarfSection {
667                    data: Cow::Borrowed(data),
668                    address: header.sh_addr,
669                    offset: header.sh_offset,
670                    align: header.sh_addralign,
671                };
672
673                return Some((compressed, section));
674            }
675        }
676
677        None
678    }
679
680    /// Searches for a GNU build identifier node in an ELF file.
681    ///
682    /// Depending on the compiler and linker, the build ID can be declared in a
683    /// PT_NOTE program header entry, the ".note.gnu.build-id" section, or even
684    /// both.
685    fn find_build_id(&self) -> Option<&'data [u8]> {
686        // First, search the note program headers (PT_NOTE) for a NT_GNU_BUILD_ID.
687        // We swallow all errors during this process and simply fall back to the
688        // next method below.
689        if let Some(mut notes) = self.elf.iter_note_headers(self.data) {
690            while let Some(Ok(note)) = notes.next() {
691                if note.n_type == elf::note::NT_GNU_BUILD_ID {
692                    return Some(note.desc);
693                }
694            }
695        }
696
697        // Some old linkers or compilers might not output the above PT_NOTE headers.
698        // In that case, search for a note section (SHT_NOTE). We are looking for a
699        // note within the ".note.gnu.build-id" section. Again, swallow all errors
700        // and fall through if reading the section is not possible.
701        if let Some(mut notes) = self
702            .elf
703            .iter_note_sections(self.data, Some(".note.gnu.build-id"))
704        {
705            while let Some(Ok(note)) = notes.next() {
706                if note.n_type == elf::note::NT_GNU_BUILD_ID {
707                    return Some(note.desc);
708                }
709            }
710        }
711
712        const PT_SCE_DYNLIBDATA: u32 = 0x61000000;
713
714        for ph in &self.elf.program_headers {
715            if ph.p_type == PT_SCE_DYNLIBDATA && ph.p_filesz >= 20 {
716                let offset = ph.p_offset as usize;
717                return self.data.get(offset..offset.saturating_add(20));
718            }
719        }
720
721        None
722    }
723
724    /// Converts an ELF object identifier into a `DebugId`.
725    ///
726    /// The identifier data is first truncated or extended to match 16 byte size of
727    /// Uuids. If the data is declared in little endian, the first three Uuid fields
728    /// are flipped to match the big endian expected by the breakpad processor.
729    ///
730    /// The `DebugId::appendix` field is always `0` for ELF.
731    fn compute_debug_id(&self, identifier: &[u8]) -> DebugId {
732        // Make sure that we have exactly UUID_SIZE bytes available
733        let mut data = [0; UUID_SIZE];
734        let len = std::cmp::min(identifier.len(), UUID_SIZE);
735        data[0..len].copy_from_slice(&identifier[0..len]);
736
737        if self.elf.little_endian {
738            // The file ELF file targets a little endian architecture. Convert to
739            // network byte order (big endian) to match the Breakpad processor's
740            // expectations. For big endian object files, this is not needed.
741            data[0..4].reverse(); // uuid field 1
742            data[4..6].reverse(); // uuid field 2
743            data[6..8].reverse(); // uuid field 3
744        }
745
746        Uuid::from_slice(&data)
747            .map(DebugId::from_uuid)
748            .unwrap_or_default()
749    }
750}
751
752impl fmt::Debug for ElfObject<'_> {
753    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
754        f.debug_struct("ElfObject")
755            .field("code_id", &self.code_id())
756            .field("debug_id", &self.debug_id())
757            .field("arch", &self.arch())
758            .field("kind", &self.kind())
759            .field("load_address", &format_args!("{:#x}", self.load_address()))
760            .field("has_symbols", &self.has_symbols())
761            .field("has_debug_info", &self.has_debug_info())
762            .field("has_unwind_info", &self.has_unwind_info())
763            .field("is_malformed", &self.is_malformed())
764            .finish()
765    }
766}
767
768impl<'slf, 'data: 'slf> AsSelf<'slf> for ElfObject<'data> {
769    type Ref = ElfObject<'slf>;
770
771    fn as_self(&'slf self) -> &'slf Self::Ref {
772        self
773    }
774}
775
776impl<'data> Parse<'data> for ElfObject<'data> {
777    type Error = ElfError;
778
779    fn test(data: &[u8]) -> bool {
780        Self::test(data)
781    }
782
783    fn parse(data: &'data [u8]) -> Result<Self, ElfError> {
784        Self::parse(data)
785    }
786}
787
788impl<'data: 'object, 'object> ObjectLike<'data, 'object> for ElfObject<'data> {
789    type Error = DwarfError;
790    type Session = DwarfDebugSession<'data>;
791    type SymbolIterator = ElfSymbolIterator<'data, 'object>;
792
793    fn file_format(&self) -> FileFormat {
794        self.file_format()
795    }
796
797    fn code_id(&self) -> Option<CodeId> {
798        self.code_id()
799    }
800
801    fn debug_id(&self) -> DebugId {
802        self.debug_id()
803    }
804
805    fn arch(&self) -> Arch {
806        self.arch()
807    }
808
809    fn kind(&self) -> ObjectKind {
810        self.kind()
811    }
812
813    fn load_address(&self) -> u64 {
814        self.load_address()
815    }
816
817    fn has_symbols(&self) -> bool {
818        self.has_symbols()
819    }
820
821    fn symbols(&'object self) -> Self::SymbolIterator {
822        self.symbols()
823    }
824
825    fn symbol_map(&self) -> SymbolMap<'data> {
826        self.symbol_map()
827    }
828
829    fn has_debug_info(&self) -> bool {
830        self.has_debug_info()
831    }
832
833    fn debug_session(&self) -> Result<Self::Session, Self::Error> {
834        self.debug_session()
835    }
836
837    fn has_unwind_info(&self) -> bool {
838        self.has_unwind_info()
839    }
840
841    fn has_sources(&self) -> bool {
842        self.has_sources()
843    }
844
845    fn is_malformed(&self) -> bool {
846        self.is_malformed()
847    }
848}
849
850impl<'data> Dwarf<'data> for ElfObject<'data> {
851    fn endianity(&self) -> Endian {
852        if self.elf.little_endian {
853            Endian::Little
854        } else {
855            Endian::Big
856        }
857    }
858
859    fn raw_section(&self, name: &str) -> Option<DwarfSection<'data>> {
860        let (_, section) = self.find_section(name)?;
861        Some(section)
862    }
863
864    fn section(&self, name: &str) -> Option<DwarfSection<'data>> {
865        let (compressed, mut section) = self.find_section(name)?;
866
867        if compressed {
868            let decompressed = self.decompress_section(&section.data)?;
869            section.data = Cow::Owned(decompressed);
870        }
871
872        Some(section)
873    }
874}
875
876/// An iterator over symbols in the ELF file.
877///
878/// Returned by [`ElfObject::symbols`](struct.ElfObject.html#method.symbols).
879pub struct ElfSymbolIterator<'data, 'object> {
880    symbols: elf::sym::SymIterator<'data>,
881    strtab: &'object strtab::Strtab<'data>,
882    dynamic_symbols: elf::sym::SymIterator<'data>,
883    dynamic_strtab: &'object strtab::Strtab<'data>,
884    sections: &'object [elf::SectionHeader],
885    load_addr: u64,
886}
887
888impl<'data> Iterator for ElfSymbolIterator<'data, '_> {
889    type Item = Symbol<'data>;
890
891    fn next(&mut self) -> Option<Self::Item> {
892        fn get_symbols<'data>(
893            symbols: &mut SymIterator,
894            strtab: &Strtab<'data>,
895            load_addr: u64,
896            sections: &[SectionHeader],
897        ) -> Option<Symbol<'data>> {
898            for symbol in symbols {
899                // Only check for function symbols.
900                if symbol.st_type() != elf::sym::STT_FUNC {
901                    continue;
902                }
903
904                // Sanity check of the symbol address. Since we only intend to iterate over function
905                // symbols, they need to be mapped after the image's load address.
906                if symbol.st_value < load_addr {
907                    continue;
908                }
909
910                let section = match symbol.st_shndx {
911                    self::SHN_UNDEF => None,
912                    index => sections.get(index),
913                };
914
915                // We are only interested in symbols pointing into sections with executable flag.
916                if !section.is_some_and(|header| header.is_executable()) {
917                    continue;
918                }
919
920                let name = strtab.get_at(symbol.st_name).map(Cow::Borrowed);
921
922                return Some(Symbol {
923                    name,
924                    address: symbol.st_value - load_addr,
925                    size: symbol.st_size,
926                });
927            }
928
929            None
930        }
931
932        get_symbols(
933            &mut self.symbols,
934            self.strtab,
935            self.load_addr,
936            self.sections,
937        )
938        .or_else(|| {
939            get_symbols(
940                &mut self.dynamic_symbols,
941                self.dynamic_strtab,
942                self.load_addr,
943                self.sections,
944            )
945        })
946    }
947}
948
949/// Parsed debug link section.
950#[derive(Debug)]
951pub struct DebugLink<'data> {
952    filename: Cow<'data, CStr>,
953    crc: u32,
954}
955
956impl<'data> DebugLink<'data> {
957    /// Attempts to parse a debug link section from its data.
958    ///
959    /// The expected format for the section is:
960    ///
961    /// - A filename, with any leading directory components removed, followed by a zero byte,
962    /// - zero to three bytes of padding, as needed to reach the next four-byte boundary within the section, and
963    /// - a four-byte CRC checksum, stored in the same endianness used for the executable file itself.
964    ///
965    /// (from <https://sourceware.org/gdb/current/onlinedocs/gdb/Separate-Debug-Files.html#index-_002egnu_005fdebuglink-sections>)
966    ///
967    /// # Errors
968    ///
969    /// If the section data is malformed, in particular:
970    /// - No NUL byte delimiting the filename from the CRC
971    /// - Not enough space for the CRC checksum
972    pub fn from_data(
973        data: Cow<'data, [u8]>,
974        endianity: Endian,
975    ) -> Result<Self, DebugLinkError<'data>> {
976        match data {
977            Cow::Owned(data) => {
978                let (filename, crc) = Self::from_borrowed_data(&data, endianity)
979                    .map(|(filename, crc)| (filename.to_owned(), crc))
980                    .map_err(|kind| DebugLinkError {
981                        kind,
982                        data: Cow::Owned(data),
983                    })?;
984                Ok(Self {
985                    filename: Cow::Owned(filename),
986                    crc,
987                })
988            }
989            Cow::Borrowed(data) => {
990                let (filename, crc) =
991                    Self::from_borrowed_data(data, endianity).map_err(|kind| DebugLinkError {
992                        kind,
993                        data: Cow::Borrowed(data),
994                    })?;
995                Ok(Self {
996                    filename: Cow::Borrowed(filename),
997                    crc,
998                })
999            }
1000        }
1001    }
1002
1003    fn from_borrowed_data(
1004        data: &[u8],
1005        endianity: Endian,
1006    ) -> Result<(&CStr, u32), DebugLinkErrorKind> {
1007        let nul_pos = data
1008            .iter()
1009            .position(|byte| *byte == 0)
1010            .ok_or(DebugLinkErrorKind::MissingNul)?;
1011
1012        if nul_pos + 1 == data.len() {
1013            return Err(DebugLinkErrorKind::MissingCrc {
1014                filename_len_with_nul: nul_pos + 1,
1015            });
1016        }
1017
1018        let filename = &data[..nul_pos + 1];
1019
1020        // let's be liberal and assume that the padding is correct and all 0s,
1021        // and just check that we have enough remaining length for the CRC.
1022        let crc = data
1023            .get(nul_pos + 1..)
1024            .and_then(|crc| crc.get(crc.len() - 4..))
1025            .ok_or(DebugLinkErrorKind::MissingCrc {
1026                filename_len_with_nul: filename.len(),
1027            })?;
1028
1029        let crc: [u8; 4] = crc.try_into().map_err(|_| DebugLinkErrorKind::MissingCrc {
1030            filename_len_with_nul: filename.len(),
1031        })?;
1032
1033        let crc = match endianity {
1034            Endian::Little => u32::from_le_bytes(crc),
1035            Endian::Big => u32::from_be_bytes(crc),
1036        };
1037
1038        let filename =
1039            CStr::from_bytes_with_nul(filename).map_err(|_| DebugLinkErrorKind::MissingNul)?;
1040
1041        Ok((filename, crc))
1042    }
1043
1044    /// The debug link filename
1045    pub fn filename(&self) -> &CStr {
1046        &self.filename
1047    }
1048
1049    /// The CRC checksum associated with the debug link file
1050    pub fn crc(&self) -> u32 {
1051        self.crc
1052    }
1053}
1054
1055/// Kind of errors that can occur while parsing a debug link section.
1056#[derive(Debug, Error)]
1057pub enum DebugLinkErrorKind {
1058    /// No NUL byte delimiting the filename from the CRC
1059    #[error("missing NUL character")]
1060    MissingNul,
1061    /// Not enough space in the section data for the CRC checksum
1062    #[error("missing CRC")]
1063    MissingCrc {
1064        /// Size of the filename part of the section including the NUL character
1065        filename_len_with_nul: usize,
1066    },
1067}
1068
1069/// Errors that can occur while parsing a debug link section.
1070#[derive(Debug, Error)]
1071#[error("could not parse debug link section")]
1072pub struct DebugLinkError<'data> {
1073    #[source]
1074    /// The kind of error that occurred.
1075    pub kind: DebugLinkErrorKind,
1076    /// The original data of the debug section.
1077    pub data: Cow<'data, [u8]>,
1078}