symbolic_debuginfo/
elf.rs

1//! Support for the Executable and Linkable Format, used on Linux.
2
3use std::borrow::Cow;
4use std::error::Error;
5use std::ffi::CStr;
6use std::fmt;
7
8use core::cmp;
9use flate2::{Decompress, FlushDecompress};
10use goblin::elf::compression_header::{CompressionHeader, ELFCOMPRESS_ZLIB};
11use goblin::elf::SectionHeader;
12use goblin::elf64::sym::SymIterator;
13use goblin::strtab::Strtab;
14use goblin::{
15    container::{Container, Ctx},
16    elf, strtab,
17};
18use scroll::Pread;
19use thiserror::Error;
20
21use symbolic_common::{Arch, AsSelf, CodeId, DebugId, Uuid};
22
23use crate::base::*;
24use crate::dwarf::{Dwarf, DwarfDebugSession, DwarfError, DwarfSection, Endian};
25
26const UUID_SIZE: usize = 16;
27const PAGE_SIZE: usize = 4096;
28
29const SHN_UNDEF: usize = elf::section_header::SHN_UNDEF as usize;
30const SHF_COMPRESSED: u64 = elf::section_header::SHF_COMPRESSED as u64;
31
32/// The ELF compression header type for `zstd`, as that is not (yet) exported by `goblin`.
33pub const ELFCOMPRESS_ZSTD: u32 = 2;
34
35/// This file follows the first MIPS 32 bit ABI
36#[allow(unused)]
37const EF_MIPS_ABI_O32: u32 = 0x0000_1000;
38/// O32 ABI extended for 64-bit architecture.
39const EF_MIPS_ABI_O64: u32 = 0x0000_2000;
40/// EABI in 32 bit mode.
41#[allow(unused)]
42const EF_MIPS_ABI_EABI32: u32 = 0x0000_3000;
43/// EABI in 64 bit mode.
44const EF_MIPS_ABI_EABI64: u32 = 0x0000_4000;
45
46/// Any flag value that might indicate 64-bit MIPS.
47const MIPS_64_FLAGS: u32 = EF_MIPS_ABI_O64 | EF_MIPS_ABI_EABI64;
48
49/// An error when dealing with [`ElfObject`](struct.ElfObject.html).
50#[derive(Debug, Error)]
51#[error("invalid ELF file")]
52pub struct ElfError {
53    #[source]
54    source: Option<Box<dyn Error + Send + Sync + 'static>>,
55}
56
57impl ElfError {
58    /// Creates a new ELF error from an arbitrary error payload.
59    fn new<E>(source: E) -> Self
60    where
61        E: Into<Box<dyn Error + Send + Sync>>,
62    {
63        let source = Some(source.into());
64        Self { source }
65    }
66}
67
68/// Executable and Linkable Format, used for executables and libraries on Linux.
69pub struct ElfObject<'data> {
70    elf: elf::Elf<'data>,
71    data: &'data [u8],
72    is_malformed: bool,
73}
74
75impl<'data> ElfObject<'data> {
76    /// Tests whether the buffer could contain an ELF object.
77    pub fn test(data: &[u8]) -> bool {
78        data.get(0..elf::header::SELFMAG)
79            .is_some_and(|data| data == elf::header::ELFMAG)
80    }
81
82    // Pulled from https://github.com/m4b/goblin/blob/master/src/elf/mod.rs#L393-L424 as it
83    // currently isn't public, but we need this to parse an ELF.
84    fn gnu_hash_len(bytes: &[u8], offset: usize, ctx: Ctx) -> goblin::error::Result<usize> {
85        let buckets_num = bytes.pread_with::<u32>(offset, ctx.le)? as usize;
86        let min_chain = bytes.pread_with::<u32>(offset + 4, ctx.le)? as usize;
87        let bloom_size = bytes.pread_with::<u32>(offset + 8, ctx.le)? as usize;
88        // We could handle min_chain==0 if we really had to, but it shouldn't happen.
89        if buckets_num == 0 || min_chain == 0 || bloom_size == 0 {
90            return Err(goblin::error::Error::Malformed(format!(
91                "Invalid DT_GNU_HASH: buckets_num={buckets_num} min_chain={min_chain} bloom_size={bloom_size}"
92            )));
93        }
94        // Find the last bucket.
95        let buckets_offset = offset + 16 + bloom_size * if ctx.container.is_big() { 8 } else { 4 };
96        let mut max_chain = 0;
97        for bucket in 0..buckets_num {
98            let chain = bytes.pread_with::<u32>(buckets_offset + bucket * 4, ctx.le)? as usize;
99            if max_chain < chain {
100                max_chain = chain;
101            }
102        }
103        if max_chain < min_chain {
104            return Ok(0);
105        }
106        // Find the last chain within the bucket.
107        let mut chain_offset = buckets_offset + buckets_num * 4 + (max_chain - min_chain) * 4;
108        loop {
109            let hash = bytes.pread_with::<u32>(chain_offset, ctx.le)?;
110            max_chain += 1;
111            chain_offset += 4;
112            if hash & 1 != 0 {
113                return Ok(max_chain);
114            }
115        }
116    }
117
118    // Pulled from https://github.com/m4b/goblin/blob/master/src/elf/mod.rs#L426-L434 as it
119    // currently isn't public, but we need this to parse an ELF.
120    fn hash_len(
121        bytes: &[u8],
122        offset: usize,
123        machine: u16,
124        ctx: Ctx,
125    ) -> goblin::error::Result<usize> {
126        // Based on readelf code.
127        let nchain = if (machine == elf::header::EM_FAKE_ALPHA || machine == elf::header::EM_S390)
128            && ctx.container.is_big()
129        {
130            bytes.pread_with::<u64>(offset.saturating_add(4), ctx.le)? as usize
131        } else {
132            bytes.pread_with::<u32>(offset.saturating_add(4), ctx.le)? as usize
133        };
134        Ok(nchain)
135    }
136
137    /// Tries to parse an ELF object from the given slice. Will return a partially parsed ELF object
138    /// if at least the program and section headers can be parsed.
139    pub fn parse(data: &'data [u8]) -> Result<Self, ElfError> {
140        let header =
141            elf::Elf::parse_header(data).map_err(|_| ElfError::new("ELF header unreadable"))?;
142        // dummy Elf with only header
143        let mut obj =
144            elf::Elf::lazy_parse(header).map_err(|_| ElfError::new("cannot parse ELF header"))?;
145
146        let ctx = Ctx {
147            container: if obj.is_64 {
148                Container::Big
149            } else {
150                Container::Little
151            },
152            le: if obj.little_endian {
153                scroll::Endian::Little
154            } else {
155                scroll::Endian::Big
156            },
157        };
158
159        macro_rules! return_partial_on_err {
160            ($parse_func:expr) => {
161                if let Ok(expected) = $parse_func {
162                    expected
163                } else {
164                    // does this snapshot?
165                    return Ok(ElfObject {
166                        elf: obj,
167                        data,
168                        is_malformed: true,
169                    });
170                }
171            };
172        }
173
174        obj.program_headers =
175            elf::ProgramHeader::parse(data, header.e_phoff as usize, header.e_phnum as usize, ctx)
176                .map_err(|_| ElfError::new("unable to parse program headers"))?;
177
178        for ph in &obj.program_headers {
179            if ph.p_type == elf::program_header::PT_INTERP && ph.p_filesz != 0 {
180                let count = (ph.p_filesz - 1) as usize;
181                let offset = ph.p_offset as usize;
182                obj.interpreter = data
183                    .pread_with::<&str>(offset, ::scroll::ctx::StrCtx::Length(count))
184                    .ok();
185            }
186        }
187
188        obj.section_headers =
189            SectionHeader::parse(data, header.e_shoff as usize, header.e_shnum as usize, ctx)
190                .map_err(|_| ElfError::new("unable to parse section headers"))?;
191
192        let get_strtab = |section_headers: &[SectionHeader], section_idx: usize| {
193            if section_idx >= section_headers.len() {
194                // FIXME: warn! here
195                Ok(Strtab::default())
196            } else {
197                let shdr = &section_headers[section_idx];
198                shdr.check_size(data.len())?;
199                Strtab::parse(data, shdr.sh_offset as usize, shdr.sh_size as usize, 0x0)
200            }
201        };
202
203        let strtab_idx = header.e_shstrndx as usize;
204        obj.shdr_strtab = return_partial_on_err!(get_strtab(&obj.section_headers, strtab_idx));
205
206        obj.syms = elf::Symtab::default();
207        obj.strtab = Strtab::default();
208        for shdr in &obj.section_headers {
209            if shdr.sh_type == elf::section_header::SHT_SYMTAB {
210                let size = shdr.sh_entsize;
211                let count = if size == 0 { 0 } else { shdr.sh_size / size };
212                obj.syms = return_partial_on_err!(elf::Symtab::parse(
213                    data,
214                    shdr.sh_offset as usize,
215                    count as usize,
216                    ctx
217                ));
218
219                obj.strtab =
220                    return_partial_on_err!(get_strtab(&obj.section_headers, shdr.sh_link as usize));
221            }
222        }
223
224        obj.soname = None;
225        obj.libraries = vec![];
226        obj.dynsyms = elf::Symtab::default();
227        obj.dynrelas = elf::RelocSection::default();
228        obj.dynrels = elf::RelocSection::default();
229        obj.pltrelocs = elf::RelocSection::default();
230        obj.dynstrtab = Strtab::default();
231        let dynamic = return_partial_on_err!(elf::Dynamic::parse(data, &obj.program_headers, ctx));
232        if let Some(ref dynamic) = dynamic {
233            let dyn_info = &dynamic.info;
234            obj.dynstrtab =
235                return_partial_on_err!(Strtab::parse(data, dyn_info.strtab, dyn_info.strsz, 0x0));
236
237            if dyn_info.soname != 0 {
238                // FIXME: warn! here
239                obj.soname = obj.dynstrtab.get_at(dyn_info.soname);
240            }
241            if dyn_info.needed_count > 0 {
242                obj.libraries = dynamic.get_libraries(&obj.dynstrtab);
243            }
244            // parse the dynamic relocations
245            obj.dynrelas = return_partial_on_err!(elf::RelocSection::parse(
246                data,
247                dyn_info.rela,
248                dyn_info.relasz,
249                true,
250                ctx
251            ));
252            obj.dynrels = return_partial_on_err!(elf::RelocSection::parse(
253                data,
254                dyn_info.rel,
255                dyn_info.relsz,
256                false,
257                ctx
258            ));
259            let is_rela = dyn_info.pltrel == elf::dynamic::DT_RELA;
260            obj.pltrelocs = return_partial_on_err!(elf::RelocSection::parse(
261                data,
262                dyn_info.jmprel,
263                dyn_info.pltrelsz,
264                is_rela,
265                ctx
266            ));
267
268            let mut num_syms = if let Some(gnu_hash) = dyn_info.gnu_hash {
269                return_partial_on_err!(ElfObject::gnu_hash_len(data, gnu_hash as usize, ctx))
270            } else if let Some(hash) = dyn_info.hash {
271                return_partial_on_err!(ElfObject::hash_len(
272                    data,
273                    hash as usize,
274                    header.e_machine,
275                    ctx
276                ))
277            } else {
278                0
279            };
280            let max_reloc_sym = obj
281                .dynrelas
282                .iter()
283                .chain(obj.dynrels.iter())
284                .chain(obj.pltrelocs.iter())
285                .fold(0, |num, reloc| cmp::max(num, reloc.r_sym));
286            if max_reloc_sym != 0 {
287                num_syms = cmp::max(num_syms, max_reloc_sym + 1);
288            }
289
290            obj.dynsyms =
291                return_partial_on_err!(elf::Symtab::parse(data, dyn_info.symtab, num_syms, ctx));
292        }
293
294        // If the dynamic symbol table is empty, try finding a SHT_DYNSYM section in the section headers.
295        // See https://refspecs.linuxfoundation.org/LSB_2.1.0/LSB-Core-generic/LSB-Core-generic/elftypes.html:
296        //
297        // > This section holds a minimal set of symbols adequate for dynamic linking. See also SHT_SYMTAB. Currently, an object file may have either a section of SHT_SYMTAB type or a section of SHT_DYNSYM type, but not both.
298        if obj.dynsyms.is_empty() {
299            if let Some(shdr) = obj
300                .section_headers
301                .iter()
302                .find(|h| h.sh_type == elf::section_header::SHT_DYNSYM)
303            {
304                let size = shdr.sh_entsize;
305                let count = if size == 0 { 0 } else { shdr.sh_size / size };
306                obj.dynsyms = return_partial_on_err!(elf::Symtab::parse(
307                    data,
308                    shdr.sh_offset as usize,
309                    count as usize,
310                    ctx
311                ));
312
313                obj.dynstrtab =
314                    return_partial_on_err!(get_strtab(&obj.section_headers, shdr.sh_link as usize));
315            }
316        }
317
318        obj.shdr_relocs = vec![];
319        for (idx, section) in obj.section_headers.iter().enumerate() {
320            let is_rela = section.sh_type == elf::section_header::SHT_RELA;
321            if is_rela || section.sh_type == elf::section_header::SHT_REL {
322                return_partial_on_err!(section.check_size(data.len()));
323                let sh_relocs = return_partial_on_err!(elf::RelocSection::parse(
324                    data,
325                    section.sh_offset as usize,
326                    section.sh_size as usize,
327                    is_rela,
328                    ctx,
329                ));
330                obj.shdr_relocs.push((idx, sh_relocs));
331            }
332        }
333
334        obj.versym = return_partial_on_err!(elf::symver::VersymSection::parse(
335            data,
336            &obj.section_headers,
337            ctx
338        ));
339        obj.verdef = return_partial_on_err!(elf::symver::VerdefSection::parse(
340            data,
341            &obj.section_headers,
342            ctx
343        ));
344        obj.verneed = return_partial_on_err!(elf::symver::VerneedSection::parse(
345            data,
346            &obj.section_headers,
347            ctx
348        ));
349
350        Ok(ElfObject {
351            elf: obj,
352            data,
353            is_malformed: false,
354        })
355    }
356
357    /// The container file format, which is always `FileFormat::Elf`.
358    pub fn file_format(&self) -> FileFormat {
359        FileFormat::Elf
360    }
361
362    /// The code identifier of this object.
363    ///
364    /// As opposed to Mach-O, ELF does not specify a unique ID for object files in
365    /// its header. Compilers and linkers usually add either `SHT_NOTE` sections or
366    /// `PT_NOTE` program header elements for this purpose.
367    pub fn code_id(&self) -> Option<CodeId> {
368        self.find_build_id()
369            .filter(|slice| !slice.is_empty())
370            .map(CodeId::from_binary)
371    }
372
373    /// The debug link of this object.
374    ///
375    /// The debug link is an alternative to the build id for specifying the location
376    /// of an ELF's debugging information. It refers to a filename that can be used
377    /// to build various debug paths where debuggers can look for the debug files.
378    ///
379    /// # Errors
380    ///
381    /// - None if there is no gnu_debuglink section
382    /// - DebugLinkError if this section exists, but is malformed
383    pub fn debug_link(&self) -> Result<Option<DebugLink<'_>>, DebugLinkError<'_>> {
384        self.section("gnu_debuglink")
385            .map(|section| DebugLink::from_data(section.data, self.endianity()))
386            .transpose()
387    }
388
389    /// The binary's soname, if any.
390    pub fn name(&self) -> Option<&'data str> {
391        self.elf.soname
392    }
393
394    /// The debug information identifier of an ELF object.
395    ///
396    /// The debug identifier is a rehash of the first 16 bytes of the `code_id`, if
397    /// present. Otherwise, this function will hash the first page of the `.text`
398    /// section (program code) to synthesize a unique ID. This is likely not a valid
399    /// UUID since was generated off a hash value.
400    ///
401    /// If all of the above fails, the identifier will be an empty `DebugId`.
402    pub fn debug_id(&self) -> DebugId {
403        // Search for a GNU build identifier node in the program headers or the
404        // build ID section. If errors occur during this process, fall through
405        // silently to the next method.
406        if let Some(identifier) = self.find_build_id() {
407            return self.compute_debug_id(identifier);
408        }
409
410        // We were not able to locate the build ID, so fall back to hashing the
411        // first page of the ".text" (program code) section. This algorithm XORs
412        // 16-byte chunks directly into a UUID buffer.
413        if let Some(section) = self.raw_section("text") {
414            let mut hash = [0; UUID_SIZE];
415            for i in 0..std::cmp::min(section.data.len(), PAGE_SIZE) {
416                hash[i % UUID_SIZE] ^= section.data[i];
417            }
418
419            return self.compute_debug_id(&hash);
420        }
421
422        DebugId::default()
423    }
424
425    /// The CPU architecture of this object, as specified in the ELF header.
426    pub fn arch(&self) -> Arch {
427        match self.elf.header.e_machine {
428            goblin::elf::header::EM_386 => Arch::X86,
429            goblin::elf::header::EM_X86_64 => Arch::Amd64,
430            goblin::elf::header::EM_AARCH64 => Arch::Arm64,
431            // NOTE: This could actually be any of the other 32bit ARMs. Since we don't need this
432            // information, we use the generic Arch::Arm. By reading CPU_arch and FP_arch attributes
433            // from the SHT_ARM_ATTRIBUTES section it would be possible to distinguish the ARM arch
434            // version and infer hard/soft FP.
435            //
436            // For more information, see:
437            // http://code.metager.de/source/xref/gnu/src/binutils/readelf.c#11282
438            // https://stackoverflow.com/a/20556156/4228225
439            goblin::elf::header::EM_ARM => Arch::Arm,
440            goblin::elf::header::EM_PPC => Arch::Ppc,
441            goblin::elf::header::EM_PPC64 => Arch::Ppc64,
442            goblin::elf::header::EM_MIPS | goblin::elf::header::EM_MIPS_RS3_LE => {
443                if self.elf.header.e_flags & MIPS_64_FLAGS != 0 {
444                    Arch::Mips64
445                } else {
446                    Arch::Mips
447                }
448            }
449            _ => Arch::Unknown,
450        }
451    }
452
453    /// The kind of this object, as specified in the ELF header.
454    pub fn kind(&self) -> ObjectKind {
455        const ET_SCE_DYNEXEC: u16 = 0xfe10;
456        const ET_SCE_DYNAMIC: u16 = 0xfe18;
457
458        let kind = match self.elf.header.e_type {
459            goblin::elf::header::ET_NONE => ObjectKind::None,
460            goblin::elf::header::ET_REL => ObjectKind::Relocatable,
461            goblin::elf::header::ET_EXEC => ObjectKind::Executable,
462            goblin::elf::header::ET_DYN => ObjectKind::Library,
463            goblin::elf::header::ET_CORE => ObjectKind::Dump,
464            ET_SCE_DYNEXEC => ObjectKind::Executable,
465            ET_SCE_DYNAMIC => ObjectKind::Library,
466            _ => ObjectKind::Other,
467        };
468
469        // When stripping debug information into a separate file with objcopy,
470        // the eh_type field still reads ET_EXEC. However, the interpreter is
471        // removed. Since an executable without interpreter does not make any
472        // sense, we assume ``Debug`` in this case.
473        if kind == ObjectKind::Executable && self.elf.interpreter.is_none() {
474            return ObjectKind::Debug;
475        }
476
477        // The same happens for libraries. However, here we can only check for
478        // a missing text section. If this still yields too many false positivies,
479        // we will have to check either the size or offset of that section in
480        // the future.
481        if kind == ObjectKind::Library && self.raw_section("text").is_none() {
482            return ObjectKind::Debug;
483        }
484
485        kind
486    }
487
488    /// The address at which the image prefers to be loaded into memory.
489    ///
490    /// ELF files store all internal addresses as if it was loaded at that address. When the image
491    /// is actually loaded, that spot might already be taken by other images and so it must be
492    /// relocated to a new address. At runtime, a relocation table manages the arithmetics behind
493    /// this.
494    ///
495    /// Addresses used in `symbols` or `debug_session` have already been rebased relative to that
496    /// load address, so that the caller only has to deal with addresses relative to the actual
497    /// start of the image.
498    pub fn load_address(&self) -> u64 {
499        // For non-PIC executables (e_type == ET_EXEC), the load address is
500        // the start address of the first PT_LOAD segment.  (ELF requires
501        // the segments to be sorted by load address.)  For PIC executables
502        // and dynamic libraries (e_type == ET_DYN), this address will
503        // normally be zero.
504        for phdr in &self.elf.program_headers {
505            if phdr.p_type == elf::program_header::PT_LOAD {
506                return phdr.p_vaddr;
507            }
508        }
509
510        0
511    }
512
513    /// Determines whether this object exposes a public symbol table.
514    pub fn has_symbols(&self) -> bool {
515        !self.elf.syms.is_empty() || !self.elf.dynsyms.is_empty()
516    }
517
518    /// Returns an iterator over symbols in the public symbol table.
519    pub fn symbols(&self) -> ElfSymbolIterator<'data, '_> {
520        ElfSymbolIterator {
521            symbols: self.elf.syms.iter(),
522            strtab: &self.elf.strtab,
523            dynamic_symbols: self.elf.dynsyms.iter(),
524            dynamic_strtab: &self.elf.dynstrtab,
525            sections: &self.elf.section_headers,
526            load_addr: self.load_address(),
527        }
528    }
529
530    /// Returns an ordered map of symbols in the symbol table.
531    pub fn symbol_map(&self) -> SymbolMap<'data> {
532        self.symbols().collect()
533    }
534
535    /// Determines whether this object contains debug information.
536    pub fn has_debug_info(&self) -> bool {
537        self.has_section("debug_info")
538    }
539
540    /// Constructs a debugging session.
541    ///
542    /// A debugging session loads certain information from the object file and creates caches for
543    /// efficient access to various records in the debug information. Since this can be quite a
544    /// costly process, try to reuse the debugging session as long as possible.
545    ///
546    /// ELF files generally use DWARF debugging information, which is also used by MachO containers
547    /// on macOS.
548    ///
549    /// Constructing this session will also work if the object does not contain debugging
550    /// information, in which case the session will be a no-op. This can be checked via
551    /// [`has_debug_info`](struct.ElfObject.html#method.has_debug_info).
552    pub fn debug_session(&self) -> Result<DwarfDebugSession<'data>, DwarfError> {
553        let symbols = self.symbol_map();
554        DwarfDebugSession::parse(self, symbols, self.load_address() as i64, self.kind())
555    }
556
557    /// Determines whether this object contains stack unwinding information.
558    pub fn has_unwind_info(&self) -> bool {
559        self.has_section("eh_frame") || self.has_section("debug_frame")
560    }
561
562    /// Determines whether this object contains embedded source.
563    pub fn has_sources(&self) -> bool {
564        false
565    }
566
567    /// Determines whether this object is malformed and was only partially parsed
568    pub fn is_malformed(&self) -> bool {
569        self.is_malformed
570    }
571
572    /// Returns the raw data of the ELF file.
573    pub fn data(&self) -> &'data [u8] {
574        self.data
575    }
576
577    /// Decompresses the given compressed section data, if supported.
578    fn decompress_section(&self, section_data: &[u8]) -> Option<Vec<u8>> {
579        enum CompressionType {
580            Zlib,
581            Zstd,
582        }
583
584        let (ty, size, compressed) = if section_data.starts_with(b"ZLIB") {
585            // The GNU compression header is a 4 byte magic "ZLIB", followed by an 8-byte big-endian
586            // size prefix of the decompressed data. This adds up to 12 bytes of GNU header.
587            if section_data.len() < 12 {
588                return None;
589            }
590
591            let mut size_bytes = [0; 8];
592            size_bytes.copy_from_slice(&section_data[4..12]);
593
594            (
595                CompressionType::Zlib,
596                u64::from_be_bytes(size_bytes),
597                &section_data[12..],
598            )
599        } else {
600            let container = self.elf.header.container().ok()?;
601            let endianness = self.elf.header.endianness().ok()?;
602            let context = Ctx::new(container, endianness);
603
604            let compression = CompressionHeader::parse(section_data, 0, context).ok()?;
605            let ty = match compression.ch_type {
606                ELFCOMPRESS_ZLIB => CompressionType::Zlib,
607                ELFCOMPRESS_ZSTD => CompressionType::Zstd,
608                _ => {
609                    return None;
610                }
611            };
612
613            let compressed = &section_data[CompressionHeader::size(context)..];
614            (ty, compression.ch_size, compressed)
615        };
616
617        let decompressed = match ty {
618            CompressionType::Zlib => {
619                let mut decompressed = Vec::with_capacity(size as usize);
620                Decompress::new(true)
621                    .decompress_vec(compressed, &mut decompressed, FlushDecompress::Finish)
622                    .ok()?;
623                decompressed
624            }
625            CompressionType::Zstd => zstd::bulk::decompress(compressed, size as usize).ok()?,
626        };
627
628        Some(decompressed)
629    }
630
631    /// Locates and reads a section in an ELF binary.
632    fn find_section(&self, name: &str) -> Option<(bool, DwarfSection<'data>)> {
633        for header in &self.elf.section_headers {
634            // The section type is usually SHT_PROGBITS, but some compilers also use
635            // SHT_X86_64_UNWIND and SHT_MIPS_DWARF. We apply the same approach as elfutils,
636            // matching against SHT_NOBITS, instead.
637            if header.sh_type == elf::section_header::SHT_NOBITS {
638                continue;
639            }
640
641            if let Some(section_name) = self.elf.shdr_strtab.get_at(header.sh_name) {
642                let offset = header.sh_offset as usize;
643                if offset == 0 {
644                    // We're defensive here. On darwin, dsymutil leaves phantom section headers
645                    // while stripping their data from the file by setting their offset to 0. We
646                    // know that no section can start at an absolute file offset of zero, so we can
647                    // safely skip them in case similar things happen on linux.
648                    continue;
649                }
650
651                if section_name.is_empty() {
652                    continue;
653                }
654
655                // Before SHF_COMPRESSED was a thing, compressed sections were prefixed with `.z`.
656                // Support this as an override to the flag.
657                let (compressed, section_name) = match section_name.strip_prefix(".z") {
658                    Some(name) => (true, name),
659                    None => (header.sh_flags & SHF_COMPRESSED != 0, &section_name[1..]),
660                };
661
662                if section_name != name {
663                    continue;
664                }
665
666                let size = header.sh_size as usize;
667                let data = &self.data[offset..][..size];
668                let section = DwarfSection {
669                    data: Cow::Borrowed(data),
670                    address: header.sh_addr,
671                    offset: header.sh_offset,
672                    align: header.sh_addralign,
673                };
674
675                return Some((compressed, section));
676            }
677        }
678
679        None
680    }
681
682    /// Searches for a GNU build identifier node in an ELF file.
683    ///
684    /// Depending on the compiler and linker, the build ID can be declared in a
685    /// PT_NOTE program header entry, the ".note.gnu.build-id" section, or even
686    /// both.
687    fn find_build_id(&self) -> Option<&'data [u8]> {
688        // First, search the note program headers (PT_NOTE) for a NT_GNU_BUILD_ID.
689        // We swallow all errors during this process and simply fall back to the
690        // next method below.
691        if let Some(mut notes) = self.elf.iter_note_headers(self.data) {
692            while let Some(Ok(note)) = notes.next() {
693                if note.n_type == elf::note::NT_GNU_BUILD_ID {
694                    return Some(note.desc);
695                }
696            }
697        }
698
699        // Some old linkers or compilers might not output the above PT_NOTE headers.
700        // In that case, search for a note section (SHT_NOTE). We are looking for a
701        // note within the ".note.gnu.build-id" section. Again, swallow all errors
702        // and fall through if reading the section is not possible.
703        if let Some(mut notes) = self
704            .elf
705            .iter_note_sections(self.data, Some(".note.gnu.build-id"))
706        {
707            while let Some(Ok(note)) = notes.next() {
708                if note.n_type == elf::note::NT_GNU_BUILD_ID {
709                    return Some(note.desc);
710                }
711            }
712        }
713
714        const PT_SCE_DYNLIBDATA: u32 = 0x61000000;
715
716        for ph in &self.elf.program_headers {
717            if ph.p_type == PT_SCE_DYNLIBDATA && ph.p_filesz >= 20 {
718                let offset = ph.p_offset as usize;
719                return self.data.get(offset..offset.saturating_add(20));
720            }
721        }
722
723        None
724    }
725
726    /// Converts an ELF object identifier into a `DebugId`.
727    ///
728    /// The identifier data is first truncated or extended to match 16 byte size of
729    /// Uuids. If the data is declared in little endian, the first three Uuid fields
730    /// are flipped to match the big endian expected by the breakpad processor.
731    ///
732    /// The `DebugId::appendix` field is always `0` for ELF.
733    fn compute_debug_id(&self, identifier: &[u8]) -> DebugId {
734        // Make sure that we have exactly UUID_SIZE bytes available
735        let mut data = [0; UUID_SIZE];
736        let len = std::cmp::min(identifier.len(), UUID_SIZE);
737        data[0..len].copy_from_slice(&identifier[0..len]);
738
739        if self.elf.little_endian {
740            // The file ELF file targets a little endian architecture. Convert to
741            // network byte order (big endian) to match the Breakpad processor's
742            // expectations. For big endian object files, this is not needed.
743            data[0..4].reverse(); // uuid field 1
744            data[4..6].reverse(); // uuid field 2
745            data[6..8].reverse(); // uuid field 3
746        }
747
748        Uuid::from_slice(&data)
749            .map(DebugId::from_uuid)
750            .unwrap_or_default()
751    }
752}
753
754impl fmt::Debug for ElfObject<'_> {
755    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
756        f.debug_struct("ElfObject")
757            .field("code_id", &self.code_id())
758            .field("debug_id", &self.debug_id())
759            .field("arch", &self.arch())
760            .field("kind", &self.kind())
761            .field("load_address", &format_args!("{:#x}", self.load_address()))
762            .field("has_symbols", &self.has_symbols())
763            .field("has_debug_info", &self.has_debug_info())
764            .field("has_unwind_info", &self.has_unwind_info())
765            .field("is_malformed", &self.is_malformed())
766            .finish()
767    }
768}
769
770impl<'slf, 'data: 'slf> AsSelf<'slf> for ElfObject<'data> {
771    type Ref = ElfObject<'slf>;
772
773    fn as_self(&'slf self) -> &'slf Self::Ref {
774        self
775    }
776}
777
778impl<'data> Parse<'data> for ElfObject<'data> {
779    type Error = ElfError;
780
781    fn test(data: &[u8]) -> bool {
782        Self::test(data)
783    }
784
785    fn parse(data: &'data [u8]) -> Result<Self, ElfError> {
786        Self::parse(data)
787    }
788}
789
790impl<'data: 'object, 'object> ObjectLike<'data, 'object> for ElfObject<'data> {
791    type Error = DwarfError;
792    type Session = DwarfDebugSession<'data>;
793    type SymbolIterator = ElfSymbolIterator<'data, 'object>;
794
795    fn file_format(&self) -> FileFormat {
796        self.file_format()
797    }
798
799    fn code_id(&self) -> Option<CodeId> {
800        self.code_id()
801    }
802
803    fn debug_id(&self) -> DebugId {
804        self.debug_id()
805    }
806
807    fn arch(&self) -> Arch {
808        self.arch()
809    }
810
811    fn kind(&self) -> ObjectKind {
812        self.kind()
813    }
814
815    fn load_address(&self) -> u64 {
816        self.load_address()
817    }
818
819    fn has_symbols(&self) -> bool {
820        self.has_symbols()
821    }
822
823    fn symbols(&'object self) -> Self::SymbolIterator {
824        self.symbols()
825    }
826
827    fn symbol_map(&self) -> SymbolMap<'data> {
828        self.symbol_map()
829    }
830
831    fn has_debug_info(&self) -> bool {
832        self.has_debug_info()
833    }
834
835    fn debug_session(&self) -> Result<Self::Session, Self::Error> {
836        self.debug_session()
837    }
838
839    fn has_unwind_info(&self) -> bool {
840        self.has_unwind_info()
841    }
842
843    fn has_sources(&self) -> bool {
844        self.has_sources()
845    }
846
847    fn is_malformed(&self) -> bool {
848        self.is_malformed()
849    }
850}
851
852impl<'data> Dwarf<'data> for ElfObject<'data> {
853    fn endianity(&self) -> Endian {
854        if self.elf.little_endian {
855            Endian::Little
856        } else {
857            Endian::Big
858        }
859    }
860
861    fn raw_section(&self, name: &str) -> Option<DwarfSection<'data>> {
862        let (_, section) = self.find_section(name)?;
863        Some(section)
864    }
865
866    fn section(&self, name: &str) -> Option<DwarfSection<'data>> {
867        let (compressed, mut section) = self.find_section(name)?;
868
869        if compressed {
870            let decompressed = self.decompress_section(&section.data)?;
871            section.data = Cow::Owned(decompressed);
872        }
873
874        Some(section)
875    }
876}
877
878/// An iterator over symbols in the ELF file.
879///
880/// Returned by [`ElfObject::symbols`](struct.ElfObject.html#method.symbols).
881pub struct ElfSymbolIterator<'data, 'object> {
882    symbols: elf::sym::SymIterator<'data>,
883    strtab: &'object strtab::Strtab<'data>,
884    dynamic_symbols: elf::sym::SymIterator<'data>,
885    dynamic_strtab: &'object strtab::Strtab<'data>,
886    sections: &'object [elf::SectionHeader],
887    load_addr: u64,
888}
889
890impl<'data> Iterator for ElfSymbolIterator<'data, '_> {
891    type Item = Symbol<'data>;
892
893    fn next(&mut self) -> Option<Self::Item> {
894        fn get_symbols<'data>(
895            symbols: &mut SymIterator,
896            strtab: &Strtab<'data>,
897            load_addr: u64,
898            sections: &[SectionHeader],
899        ) -> Option<Symbol<'data>> {
900            for symbol in symbols {
901                // Only check for function symbols.
902                if symbol.st_type() != elf::sym::STT_FUNC {
903                    continue;
904                }
905
906                // Sanity check of the symbol address. Since we only intend to iterate over function
907                // symbols, they need to be mapped after the image's load address.
908                if symbol.st_value < load_addr {
909                    continue;
910                }
911
912                let section = match symbol.st_shndx {
913                    self::SHN_UNDEF => None,
914                    index => sections.get(index),
915                };
916
917                // We are only interested in symbols pointing into sections with executable flag.
918                if !section.is_some_and(|header| header.is_executable()) {
919                    continue;
920                }
921
922                let name = strtab.get_at(symbol.st_name).map(Cow::Borrowed);
923
924                return Some(Symbol {
925                    name,
926                    address: symbol.st_value - load_addr,
927                    size: symbol.st_size,
928                });
929            }
930
931            None
932        }
933
934        get_symbols(
935            &mut self.symbols,
936            self.strtab,
937            self.load_addr,
938            self.sections,
939        )
940        .or_else(|| {
941            get_symbols(
942                &mut self.dynamic_symbols,
943                self.dynamic_strtab,
944                self.load_addr,
945                self.sections,
946            )
947        })
948    }
949}
950
951/// Parsed debug link section.
952#[derive(Debug)]
953pub struct DebugLink<'data> {
954    filename: Cow<'data, CStr>,
955    crc: u32,
956}
957
958impl<'data> DebugLink<'data> {
959    /// Attempts to parse a debug link section from its data.
960    ///
961    /// The expected format for the section is:
962    ///
963    /// - A filename, with any leading directory components removed, followed by a zero byte,
964    /// - zero to three bytes of padding, as needed to reach the next four-byte boundary within the section, and
965    /// - a four-byte CRC checksum, stored in the same endianness used for the executable file itself.
966    ///
967    /// (from <https://sourceware.org/gdb/current/onlinedocs/gdb/Separate-Debug-Files.html#index-_002egnu_005fdebuglink-sections>)
968    ///
969    /// # Errors
970    ///
971    /// If the section data is malformed, in particular:
972    /// - No NUL byte delimiting the filename from the CRC
973    /// - Not enough space for the CRC checksum
974    pub fn from_data(
975        data: Cow<'data, [u8]>,
976        endianity: Endian,
977    ) -> Result<Self, DebugLinkError<'data>> {
978        match data {
979            Cow::Owned(data) => {
980                let (filename, crc) = Self::from_borrowed_data(&data, endianity)
981                    .map(|(filename, crc)| (filename.to_owned(), crc))
982                    .map_err(|kind| DebugLinkError {
983                        kind,
984                        data: Cow::Owned(data),
985                    })?;
986                Ok(Self {
987                    filename: Cow::Owned(filename),
988                    crc,
989                })
990            }
991            Cow::Borrowed(data) => {
992                let (filename, crc) =
993                    Self::from_borrowed_data(data, endianity).map_err(|kind| DebugLinkError {
994                        kind,
995                        data: Cow::Borrowed(data),
996                    })?;
997                Ok(Self {
998                    filename: Cow::Borrowed(filename),
999                    crc,
1000                })
1001            }
1002        }
1003    }
1004
1005    fn from_borrowed_data(
1006        data: &[u8],
1007        endianity: Endian,
1008    ) -> Result<(&CStr, u32), DebugLinkErrorKind> {
1009        let nul_pos = data
1010            .iter()
1011            .position(|byte| *byte == 0)
1012            .ok_or(DebugLinkErrorKind::MissingNul)?;
1013
1014        if nul_pos + 1 == data.len() {
1015            return Err(DebugLinkErrorKind::MissingCrc {
1016                filename_len_with_nul: nul_pos + 1,
1017            });
1018        }
1019
1020        let filename = &data[..nul_pos + 1];
1021
1022        // let's be liberal and assume that the padding is correct and all 0s,
1023        // and just check that we have enough remaining length for the CRC.
1024        let crc = data
1025            .get(nul_pos + 1..)
1026            .and_then(|crc| crc.get(crc.len() - 4..))
1027            .ok_or(DebugLinkErrorKind::MissingCrc {
1028                filename_len_with_nul: filename.len(),
1029            })?;
1030
1031        let crc: [u8; 4] = crc.try_into().map_err(|_| DebugLinkErrorKind::MissingCrc {
1032            filename_len_with_nul: filename.len(),
1033        })?;
1034
1035        let crc = match endianity {
1036            Endian::Little => u32::from_le_bytes(crc),
1037            Endian::Big => u32::from_be_bytes(crc),
1038        };
1039
1040        let filename =
1041            CStr::from_bytes_with_nul(filename).map_err(|_| DebugLinkErrorKind::MissingNul)?;
1042
1043        Ok((filename, crc))
1044    }
1045
1046    /// The debug link filename
1047    pub fn filename(&self) -> &CStr {
1048        &self.filename
1049    }
1050
1051    /// The CRC checksum associated with the debug link file
1052    pub fn crc(&self) -> u32 {
1053        self.crc
1054    }
1055}
1056
1057/// Kind of errors that can occur while parsing a debug link section.
1058#[derive(Debug, Error)]
1059pub enum DebugLinkErrorKind {
1060    /// No NUL byte delimiting the filename from the CRC
1061    #[error("missing NUL character")]
1062    MissingNul,
1063    /// Not enough space in the section data for the CRC checksum
1064    #[error("missing CRC")]
1065    MissingCrc {
1066        /// Size of the filename part of the section including the NUL character
1067        filename_len_with_nul: usize,
1068    },
1069}
1070
1071/// Errors that can occur while parsing a debug link section.
1072#[derive(Debug, Error)]
1073#[error("could not parse debug link section")]
1074pub struct DebugLinkError<'data> {
1075    #[source]
1076    /// The kind of error that occurred.
1077    pub kind: DebugLinkErrorKind,
1078    /// The original data of the debug section.
1079    pub data: Cow<'data, [u8]>,
1080}
symbolic_debuginfo/elf.rs

symbolic_debuginfo/
elf.rs