goblin_experimental/elf/
mod.rs

1//! The generic ELF module, which gives access to ELF constants and other helper functions, which are independent of ELF bithood.  Also defines an `Elf` struct which implements a unified parser that returns a wrapped `Elf64` or `Elf32` binary.
2//!
3//! To access the exact 32-bit or 64-bit versions, use [goblin::elf32::Header](header/header32/struct.Header.html)/[goblin::elf64::Header](header/header64/struct.Header.html), etc., for the various 32/64-bit structs.
4//!
5//! # Example
6//!
7//! ```rust
8//! use goblin_experimental as goblin;
9//! use std::fs::File;
10//!
11//! pub fn read (bytes: &[u8]) {
12//!   match goblin::elf::Elf::parse(&bytes) {
13//!     Ok(binary) => {
14//!       let entry = binary.entry;
15//!       for ph in binary.program_headers {
16//!         if ph.p_type == goblin::elf::program_header::PT_LOAD {
17//!           // TODO: you should validate p_filesz before allocating.
18//!           let mut _buf = vec![0u8; ph.p_filesz as usize];
19//!           // read responsibly
20//!          }
21//!       }
22//!     },
23//!     Err(_) => ()
24//!   }
25//! }
26//! ```
27//!
28//! This will properly access the underlying 32-bit or 64-bit binary automatically. Note that since
29//! 32-bit binaries typically have shorter 32-bit values in some cases (specifically for addresses and pointer
30//! values), these values are upcasted to u64/i64s when appropriate.
31//!
32//! See [goblin::elf::Elf](struct.Elf.html) for more information.
33//!
34//! You are still free to use the specific 32-bit or 64-bit versions by accessing them through `goblin::elf64`, etc., but you will have to parse and/or construct the various components yourself.
35//! In other words, there is no unified 32/64-bit `Elf` struct.
36//!
37//! # Note
38//! To use the automagic ELF datatype union parser, you _must_ enable/opt-in to the  `elf64`, `elf32`, and
39//! `endian_fd` features if you disable `default`.
40
41#[macro_use]
42pub(crate) mod gnu_hash;
43
44// These are shareable values for the 32/64 bit implementations.
45//
46// They are publicly re-exported by the pub-using module
47pub mod compression_header;
48pub mod header;
49pub mod program_header;
50pub mod section_header;
51#[macro_use]
52pub mod sym;
53pub mod dynamic;
54#[macro_use]
55pub mod reloc;
56pub mod note;
57#[cfg(all(any(feature = "elf32", feature = "elf64"), feature = "alloc"))]
58pub mod symver;
59
60macro_rules! if_sylvan {
61    ($($i:item)*) => ($(
62        #[cfg(all(feature = "elf32", feature = "elf64", feature = "endian_fd"))]
63        $i
64    )*)
65}
66
67if_sylvan! {
68    use scroll::{ctx, Pread, Endian};
69    use crate::strtab::Strtab;
70    use crate::error;
71    use crate::container::{Container, Ctx};
72    use alloc::vec::Vec;
73    use core::cmp;
74
75    pub use header::Header;
76    pub use program_header::ProgramHeader;
77    pub use section_header::SectionHeader;
78    pub use sym::Symtab;
79    pub use sym::Sym;
80    pub use dynamic::Dyn;
81    pub use dynamic::Dynamic;
82    pub use reloc::Reloc;
83    pub use reloc::RelocSection;
84    pub use symver::{VersymSection, VerdefSection, VerneedSection};
85
86    pub type ProgramHeaders = Vec<ProgramHeader>;
87    pub type SectionHeaders = Vec<SectionHeader>;
88    pub type ShdrIdx = usize;
89
90    #[derive(Debug)]
91    /// An ELF binary. The underlying data structures are read according to the headers byte order and container size (32 or 64).
92    pub struct Elf<'a> {
93        /// The ELF header, which provides a rudimentary index into the rest of the binary
94        pub header: Header,
95        /// The program headers; they primarily tell the kernel and the dynamic linker
96        /// how to load this binary
97        pub program_headers: ProgramHeaders,
98        /// The sections headers. These are strippable, never count on them being
99        /// here unless you're a static linker!
100        pub section_headers: SectionHeaders,
101        /// The section header string table
102        pub shdr_strtab: Strtab<'a>,
103        /// The string table for the dynamically accessible symbols
104        pub dynstrtab: Strtab<'a>,
105        /// The dynamically accessible symbols, i.e., exports, imports.
106        /// This is what the dynamic linker uses to dynamically load and link your binary,
107        /// or find imported symbols for binaries which dynamically link against your library
108        pub dynsyms: Symtab<'a>,
109        /// The debugging symbol table
110        pub syms: Symtab<'a>,
111        /// The string table for the symbol table
112        pub strtab: Strtab<'a>,
113        /// Contains dynamic linking information, with the _DYNAMIC array + a preprocessed DynamicInfo for that array
114        pub dynamic: Option<Dynamic>,
115        /// The dynamic relocation entries (strings, copy-data, etc.) with an addend
116        pub dynrelas: RelocSection<'a>,
117        /// The dynamic relocation entries without an addend
118        pub dynrels: RelocSection<'a>,
119        /// The plt relocation entries (procedure linkage table). For 32-bit binaries these are usually Rel (no addend)
120        pub pltrelocs: RelocSection<'a>,
121        /// Section relocations by section index (only present if this is a relocatable object file)
122        pub shdr_relocs: Vec<(ShdrIdx, RelocSection<'a>)>,
123        /// The binary's soname, if it has one
124        pub soname: Option<&'a str>,
125        /// The binary's program interpreter (e.g., dynamic linker), if it has one
126        pub interpreter: Option<&'a str>,
127        /// A list of this binary's dynamic libraries it uses, if there are any
128        pub libraries: Vec<&'a str>,
129        /// A list of runtime search paths for this binary's dynamic libraries it uses, if there
130        /// are any. (deprecated)
131        pub rpaths: Vec<&'a str>,
132        /// A list of runtime search paths for this binary's dynamic libraries it uses, if there
133        /// are any.
134        pub runpaths: Vec<&'a str>,
135        /// Whether this is a 64-bit elf or not
136        pub is_64: bool,
137        /// Whether this is a shared object or not
138        pub is_lib: bool,
139        /// The binaries entry point address, if it has one
140        pub entry: u64,
141        /// Whether the binary is little endian or not
142        pub little_endian: bool,
143        /// Contains the symbol version information from the optional section
144        /// [`SHT_GNU_VERSYM`][section_header::SHT_GNU_VERSYM] (GNU extenstion).
145        pub versym : Option<VersymSection<'a>>,
146        /// Contains the version definition information from the optional section
147        /// [`SHT_GNU_VERDEF`][section_header::SHT_GNU_VERDEF] (GNU extenstion).
148        pub verdef : Option<VerdefSection<'a>>,
149        /// Contains the version needed information from the optional section
150        /// [`SHT_GNU_VERNEED`][section_header::SHT_GNU_VERNEED] (GNU extenstion).
151        pub verneed : Option<VerneedSection<'a>>,
152        ctx: Ctx,
153    }
154
155    impl<'a> Elf<'a> {
156        /// Try to iterate notes in PT_NOTE program headers; returns `None` if there aren't any note headers in this binary
157        pub fn iter_note_headers(&self, data: &'a [u8]) -> Option<note::NoteIterator<'a>> {
158            let mut iters = vec![];
159            for phdr in &self.program_headers {
160                if phdr.p_type == program_header::PT_NOTE {
161                    let offset = phdr.p_offset as usize;
162                    let alignment = phdr.p_align as usize;
163
164                    iters.push(note::NoteDataIterator {
165                        data,
166                        offset,
167                        size: offset.saturating_add(phdr.p_filesz as usize),
168                        ctx: (alignment, self.ctx)
169                    });
170                }
171            }
172
173            if iters.is_empty() {
174                None
175            } else {
176                Some(note::NoteIterator {
177                    iters: iters,
178                    index: 0,
179                })
180            }
181        }
182        /// Try to iterate notes in SHT_NOTE sections; returns `None` if there aren't any note sections in this binary
183        ///
184        /// If a section_name is given, only the section with the according name is iterated.
185        pub fn iter_note_sections(
186            &self,
187            data: &'a [u8],
188            section_name: Option<&str>,
189        ) -> Option<note::NoteIterator<'a>> {
190            let mut iters = vec![];
191            for sect in &self.section_headers {
192                if sect.sh_type != section_header::SHT_NOTE {
193                    continue;
194                }
195
196                if section_name.is_some() && self.shdr_strtab.get_at(sect.sh_name) != section_name {
197                    continue;
198                }
199
200                let offset = sect.sh_offset as usize;
201                let alignment = sect.sh_addralign as usize;
202                iters.push(note::NoteDataIterator {
203                    data,
204                    offset,
205                    size: offset.saturating_add(sect.sh_size as usize),
206                    ctx: (alignment, self.ctx)
207                });
208            }
209
210            if iters.is_empty() {
211                None
212            } else {
213                Some(note::NoteIterator {
214                    iters: iters,
215                    index: 0,
216                })
217            }
218        }
219        pub fn is_object_file(&self) -> bool {
220            self.header.e_type == header::ET_REL
221        }
222
223        /// Parses the contents to get the Header only. This `bytes` buffer should contain at least the length for parsing Header.
224        pub fn parse_header(bytes: &'a [u8]) -> error::Result<Header> {
225            bytes.pread::<Header>(0)
226        }
227
228        /// Lazy parse the ELF contents. This function mainly just assembles an Elf struct. Once we have the struct, we can choose to parse whatever we want.
229        pub fn lazy_parse(header: Header) -> error::Result<Self> {
230            let misc = parse_misc(&header)?;
231
232            Ok(Elf {
233                header,
234                program_headers: vec![],
235                section_headers: Default::default(),
236                shdr_strtab: Default::default(),
237                dynamic: None,
238                dynsyms: Default::default(),
239                dynstrtab: Strtab::default(),
240                syms: Default::default(),
241                strtab: Default::default(),
242                dynrelas: Default::default(),
243                dynrels: Default::default(),
244                pltrelocs: Default::default(),
245                shdr_relocs: Default::default(),
246                soname: None,
247                interpreter: None,
248                libraries: vec![],
249                rpaths: vec![],
250                runpaths: vec![],
251                is_64: misc.is_64,
252                is_lib: misc.is_lib,
253                entry: misc.entry,
254                little_endian: misc.little_endian,
255                ctx: misc.ctx,
256                versym: None,
257                verdef: None,
258                verneed: None,
259            })
260        }
261
262        /// Parses the contents of the byte stream in `bytes`, and maybe returns a unified binary
263        pub fn parse(bytes: &'a [u8]) -> error::Result<Self> {
264            let header = Self::parse_header(bytes)?;
265            let misc = parse_misc(&header)?;
266            let ctx = misc.ctx;
267
268            let program_headers = ProgramHeader::parse(bytes, header.e_phoff as usize, header.e_phnum as usize, ctx)?;
269
270            let mut interpreter = None;
271            for ph in &program_headers {
272                if ph.p_type == program_header::PT_INTERP && ph.p_filesz != 0 {
273                    let count = (ph.p_filesz - 1) as usize;
274                    let offset = ph.p_offset as usize;
275                    interpreter = bytes.pread_with::<&str>(offset, ::scroll::ctx::StrCtx::Length(count)).ok();
276                }
277            }
278
279            let section_headers = SectionHeader::parse(bytes, header.e_shoff as usize, header.e_shnum as usize, ctx)?;
280
281            let get_strtab = |section_headers: &[SectionHeader], mut section_idx: usize| {
282                if section_idx == section_header::SHN_XINDEX as usize {
283                    if section_headers.is_empty() {
284                        return Ok(Strtab::default())
285                    }
286                    section_idx = section_headers[0].sh_link as usize;
287                }
288
289                if section_idx >= section_headers.len() {
290                    // FIXME: warn! here
291                    Ok(Strtab::default())
292                } else {
293                    let shdr = &section_headers[section_idx];
294                    shdr.check_size(bytes.len())?;
295                    Strtab::parse(bytes, shdr.sh_offset as usize, shdr.sh_size as usize, 0x0)
296                }
297            };
298
299            let strtab_idx = header.e_shstrndx as usize;
300            let shdr_strtab = get_strtab(&section_headers, strtab_idx)?;
301
302            let mut syms = Symtab::default();
303            let mut strtab = Strtab::default();
304            if let Some(shdr) = section_headers.iter().rfind(|shdr| shdr.sh_type as u32 == section_header::SHT_SYMTAB) {
305                let size = shdr.sh_entsize;
306                let count = if size == 0 { 0 } else { shdr.sh_size / size };
307                syms = Symtab::parse(bytes, shdr.sh_offset as usize, count as usize, ctx)?;
308                strtab = get_strtab(&section_headers, shdr.sh_link as usize)?;
309            }
310
311            let mut is_pie = false;
312            let mut soname = None;
313            let mut libraries = vec![];
314            let mut rpaths = vec![];
315            let mut runpaths = vec![];
316            let mut dynsyms = Symtab::default();
317            let mut dynrelas = RelocSection::default();
318            let mut dynrels = RelocSection::default();
319            let mut pltrelocs = RelocSection::default();
320            let mut dynstrtab = Strtab::default();
321            let dynamic = Dynamic::parse(bytes, &program_headers, ctx)?;
322            if let Some(ref dynamic) = dynamic {
323                let dyn_info = &dynamic.info;
324
325                is_pie = dyn_info.flags_1 & dynamic::DF_1_PIE != 0;
326                dynstrtab = Strtab::parse(bytes,
327                                          dyn_info.strtab,
328                                          dyn_info.strsz,
329                                          0x0)?;
330
331                if dyn_info.soname != 0 {
332                    // FIXME: warn! here
333                    soname = dynstrtab.get_at(dyn_info.soname);
334                }
335                if dyn_info.needed_count > 0 {
336                    libraries = dynamic.get_libraries(&dynstrtab);
337                }
338                for dyn_ in &dynamic.dyns {
339                    if dyn_.d_tag == dynamic::DT_RPATH {
340                        if let Some(path) = dynstrtab.get_at(dyn_.d_val as usize) {
341                            rpaths.push(path);
342                        }
343                    } else if dyn_.d_tag == dynamic::DT_RUNPATH {
344                        if let Some(path) = dynstrtab.get_at(dyn_.d_val as usize) {
345                            runpaths.push(path);
346                        }
347                    }
348                }
349                // parse the dynamic relocations
350                dynrelas = RelocSection::parse(bytes, dyn_info.rela, dyn_info.relasz, true, ctx)?;
351                dynrels = RelocSection::parse(bytes, dyn_info.rel, dyn_info.relsz, false, ctx)?;
352                let is_rela = dyn_info.pltrel as u64 == dynamic::DT_RELA;
353                pltrelocs = RelocSection::parse(bytes, dyn_info.jmprel, dyn_info.pltrelsz, is_rela, ctx)?;
354
355                let mut num_syms = if let Some(gnu_hash) = dyn_info.gnu_hash {
356                    gnu_hash_len(bytes, gnu_hash as usize, ctx)?
357                } else if let Some(hash) = dyn_info.hash {
358                    hash_len(bytes, hash as usize, header.e_machine, ctx)?
359                } else {
360                    0
361                };
362                let max_reloc_sym = dynrelas.iter()
363                    .chain(dynrels.iter())
364                    .chain(pltrelocs.iter())
365                    .fold(0, |num, reloc| cmp::max(num, reloc.r_sym));
366                if max_reloc_sym != 0 {
367                    num_syms = cmp::max(num_syms, max_reloc_sym + 1);
368                }
369                dynsyms = Symtab::parse(bytes, dyn_info.symtab, num_syms, ctx)?;
370            }
371
372            let mut shdr_relocs = vec![];
373            for (idx, section) in section_headers.iter().enumerate() {
374                let is_rela = section.sh_type == section_header::SHT_RELA;
375                if is_rela || section.sh_type == section_header::SHT_REL {
376                    section.check_size(bytes.len())?;
377                    let sh_relocs = RelocSection::parse(bytes, section.sh_offset as usize, section.sh_size as usize, is_rela, ctx)?;
378                    shdr_relocs.push((idx, sh_relocs));
379                }
380            }
381
382            let versym = symver::VersymSection::parse(bytes, &section_headers, ctx)?;
383            let verdef = symver::VerdefSection::parse(bytes, &section_headers, ctx)?;
384            let verneed = symver::VerneedSection::parse(bytes, &section_headers, ctx)?;
385
386            let is_lib = misc.is_lib && !is_pie;
387
388            Ok(Elf {
389                header,
390                program_headers,
391                section_headers,
392                shdr_strtab,
393                dynamic,
394                dynsyms,
395                dynstrtab,
396                syms,
397                strtab,
398                dynrelas,
399                dynrels,
400                pltrelocs,
401                shdr_relocs,
402                soname,
403                interpreter,
404                libraries,
405                rpaths,
406                runpaths,
407                is_64: misc.is_64,
408                is_lib,
409                entry: misc.entry,
410                little_endian: misc.little_endian,
411                ctx: ctx,
412                versym,
413                verdef,
414                verneed,
415            })
416        }
417    }
418
419    impl<'a> ctx::TryFromCtx<'a, (usize, Endian)> for Elf<'a> {
420        type Error = crate::error::Error;
421        fn try_from_ctx(src: &'a [u8], (_, _): (usize, Endian)) -> Result<(Elf<'a>, usize), Self::Error> {
422            let elf = Elf::parse(src)?;
423            Ok((elf, src.len()))
424        }
425    }
426
427    fn gnu_hash_len(bytes: &[u8], offset: usize, ctx: Ctx) -> error::Result<usize> {
428        let buckets_num = bytes.pread_with::<u32>(offset, ctx.le)? as usize;
429        let min_chain = bytes.pread_with::<u32>(offset + 4, ctx.le)? as usize;
430        let bloom_size = bytes.pread_with::<u32>(offset + 8, ctx.le)? as usize;
431        // We could handle min_chain==0 if we really had to, but it shouldn't happen.
432        if buckets_num == 0 || min_chain == 0 || bloom_size == 0 {
433            return Err(error::Error::Malformed(format!("Invalid DT_GNU_HASH: buckets_num={} min_chain={} bloom_size={}",
434                                                       buckets_num, min_chain, bloom_size)));
435        }
436        // Find the last bucket.
437        let buckets_offset = offset + 16 + bloom_size * if ctx.container.is_big() { 8 } else { 4 };
438        let mut max_chain = 0;
439        for bucket in 0..buckets_num {
440            let chain = bytes.pread_with::<u32>(buckets_offset + bucket * 4, ctx.le)? as usize;
441            if max_chain < chain {
442                max_chain = chain;
443            }
444        }
445        if max_chain < min_chain {
446            return Ok(0);
447        }
448        // Find the last chain within the bucket.
449        let mut chain_offset = buckets_offset + buckets_num * 4 + (max_chain - min_chain) * 4;
450        loop {
451            let hash = bytes.pread_with::<u32>(chain_offset, ctx.le)?;
452            max_chain += 1;
453            chain_offset += 4;
454            if hash & 1 != 0 {
455                return Ok(max_chain);
456            }
457        }
458    }
459
460    fn hash_len(bytes: &[u8], offset: usize, machine: u16, ctx: Ctx) -> error::Result<usize> {
461        // Based on readelf code.
462        let nchain = if (machine == header::EM_FAKE_ALPHA || machine == header::EM_S390) && ctx.container.is_big() {
463            bytes.pread_with::<u64>(offset.saturating_add(4), ctx.le)? as usize
464        } else {
465            bytes.pread_with::<u32>(offset.saturating_add(4), ctx.le)? as usize
466        };
467        Ok(nchain)
468    }
469
470    struct Misc {
471        is_64: bool,
472        is_lib: bool,
473        entry: u64,
474        little_endian: bool,
475        ctx: Ctx,
476    }
477
478    fn parse_misc(header: &Header) -> error::Result<Misc> {
479        let entry = header.e_entry as usize;
480        let is_lib = header.e_type == header::ET_DYN;
481        let is_lsb = header.e_ident[header::EI_DATA] == header::ELFDATA2LSB;
482        let endianness = scroll::Endian::from(is_lsb);
483        let class = header.e_ident[header::EI_CLASS];
484        if class != header::ELFCLASS64 && class != header::ELFCLASS32 {
485            return Err(error::Error::Malformed(format!("Unknown values in ELF ident header: class: {} endianness: {}",
486                                                        class,
487                                                        header.e_ident[header::EI_DATA])));
488        }
489        let is_64 = class == header::ELFCLASS64;
490        let container = if is_64 { Container::Big } else { Container::Little };
491        let ctx = Ctx::new(container, endianness);
492
493        Ok(Misc{
494            is_64,
495            is_lib,
496            entry: entry as u64,
497            little_endian:is_lsb,
498            ctx,
499        })
500    }
501}
502
503#[cfg(test)]
504mod tests {
505    use super::*;
506
507    #[test]
508    fn parse_crt1_64bit() {
509        let crt1: Vec<u8> = include!("../../etc/crt1.rs");
510        match Elf::parse(&crt1) {
511            Ok(binary) => {
512                assert!(binary.is_64);
513                assert!(!binary.is_lib);
514                assert_eq!(binary.entry, 0);
515                assert!(binary.syms.get(1000).is_none());
516                assert!(binary.syms.get(5).is_some());
517                let syms = binary.syms.to_vec();
518                assert!(!binary.section_headers.is_empty());
519                for (i, sym) in syms.iter().enumerate() {
520                    if i == 11 {
521                        let symtab = binary.strtab;
522                        println!("sym: {:?}", &sym);
523                        assert_eq!(&symtab[sym.st_name], "_start");
524                        break;
525                    }
526                }
527                assert!(!syms.is_empty());
528            }
529            Err(err) => {
530                panic!("failed: {}", err);
531            }
532        }
533    }
534
535    #[test]
536    fn parse_crt1_32bit() {
537        let crt1: Vec<u8> = include!("../../etc/crt132.rs");
538        match Elf::parse(&crt1) {
539            Ok(binary) => {
540                assert!(!binary.is_64);
541                assert!(!binary.is_lib);
542                assert_eq!(binary.entry, 0);
543                assert!(binary.syms.get(1000).is_none());
544                assert!(binary.syms.get(5).is_some());
545                let syms = binary.syms.to_vec();
546                assert!(!binary.section_headers.is_empty());
547                for (i, sym) in syms.iter().enumerate() {
548                    if i == 11 {
549                        let symtab = binary.strtab;
550                        println!("sym: {:?}", &sym);
551                        assert_eq!(&symtab[sym.st_name], "__libc_csu_fini");
552                        break;
553                    }
554                }
555                assert!(!syms.is_empty());
556            }
557            Err(err) => {
558                panic!("failed: {}", err);
559            }
560        }
561    }
562
563    // See https://github.com/m4b/goblin/issues/257
564    #[test]
565    #[allow(unused)]
566    fn no_use_statement_conflict() {
567        use crate::elf::section_header::*;
568        use crate::elf::*;
569
570        fn f(_: SectionHeader) {}
571    }
572}