goblin_experimental/
lib.rs

1//! # libgoblin
2//!
3//! ![say the right
4//! words](https://s-media-cache-ak0.pinimg.com/736x/1b/6a/aa/1b6aaa2bae005e2fed84b1a7c32ecb1b.jpg)
5//!
6//! `libgoblin` is a cross-platform trifecta of binary parsing and loading fun.  It supports:
7//!
8//! * An ELF32/64 parser, and raw C structs
9//! * A 32/64-bit, zero-copy, endian aware, Mach-o parser, and raw C structs
10//! * A PE32/PE32+ (64-bit) parser, and raw C structs
11//! * A Unix archive parser and loader
12//!
13//! Goblin requires at least `rustc` 1.36.0, uses the 2018 rust edition, and is developed on stable.
14//!
15//! Goblin primarily supports the following important use cases:
16//!
17//! 1. Core, std-free `#[repr(C)]` structs, tiny compile time, 32/64 (or both) at your leisure
18//!
19//! 2. Type punning. Define a function once on a type, but have it work on 32 or 64-bit variants - without really changing anything, and no macros! See `examples/automagic.rs` for a basic example.
20//!
21//! 3. `std` mode. This throws in read and write impls via `Pread` and `Pwrite`, reading from file, convenience allocations, extra methods, etc. This is for clients who can allocate and want to read binaries off disk.
22//!
23//! 4. `Endian_fd`. A truly terrible name :laughing: this is for binary analysis like in [panopticon](https://github.com/das-labor/panopticon) which needs to read binaries of foreign endianness, _or_ as a basis for constructing cross platform foreign architecture binutils, e.g. [cargo-sym](https://github.com/m4b/cargo-sym) and [bingrep](https://github.com/m4b/bingrep) are simple examples of this, but the sky is the limit.
24//!
25//! # Example
26//!
27//! ```rust
28//! use goblin_experimental as goblin;
29//! use goblin::{error, Object};
30//! use std::path::Path;
31//! use std::env;
32//! use std::fs;
33//!
34//! fn run () -> error::Result<()> {
35//!     for (i, arg) in env::args().enumerate() {
36//!         if i == 1 {
37//!             let path = Path::new(arg.as_str());
38//!             let buffer = fs::read(path)?;
39//!             match Object::parse(&buffer)? {
40//!                 Object::Elf(elf) => {
41//!                     println!("elf: {:#?}", &elf);
42//!                 },
43//!                 Object::PE(pe) => {
44//!                     println!("pe: {:#?}", &pe);
45//!                 },
46//!                 Object::COFF(coff) => {
47//!                     println!("coff: {:#?}", &coff);
48//!                 },
49//!                 Object::Mach(mach) => {
50//!                     println!("mach: {:#?}", &mach);
51//!                 },
52//!                 Object::Archive(archive) => {
53//!                     println!("archive: {:#?}", &archive);
54//!                 },
55//!                 Object::Unknown(magic) => { println!("unknown magic: {:#x}", magic) },
56//!                 _ => { }
57//!             }
58//!         }
59//!     }
60//!     Ok(())
61//! }
62//! ```
63//!
64//! # Feature Usage
65//!
66//! `libgoblin` is engineered to be tailored towards very different use-case scenarios, for example:
67//!
68//! * a no-std mode; just simply set default features to false
69//! * a endian aware parsing and reading
70//! * for binary loaders which don't require this, simply use `elf32` and `elf64` (and `std` of course)
71//!
72//! For example, if you are writing a 64-bit kernel, or just want a barebones C-like
73//! header interface which defines the structures, just select `elf64`, `--cfg
74//! feature=\"elf64\"`, which will compile without `std`.
75//!
76//! Similarly, if you want to use host endianness loading via the various `from_fd` methods, `--cfg
77//! feature=\"std\"`, which will not use the `byteorder` extern crate, and read the bytes
78//! from disk in the endianness of the host machine.
79//!
80//! If you want endian aware reading, and you don't use `default`, then you need to opt in as normal
81//! via `endian_fd`
82
83#![cfg_attr(not(feature = "std"), no_std)]
84
85#[cfg(feature = "std")]
86extern crate core;
87
88#[cfg(feature = "alloc")]
89#[macro_use]
90extern crate alloc;
91
92/////////////////////////
93// Misc/Helper Modules
94/////////////////////////
95
96#[allow(unused)]
97macro_rules! if_std {
98    ($($i:item)*) => ($(
99        #[cfg(feature = "std")]
100        $i
101    )*)
102}
103
104#[allow(unused)]
105macro_rules! if_alloc {
106    ($($i:item)*) => ($(
107        #[cfg(feature = "alloc")]
108        $i
109    )*)
110}
111
112#[cfg(feature = "alloc")]
113pub mod error;
114
115pub mod strtab;
116
117/// Binary container size information and byte-order context
118pub mod container {
119    pub use scroll::Endian;
120
121    #[derive(Debug, Copy, Clone, PartialEq)]
122    /// The size of a binary container
123    pub enum Container {
124        Little,
125        Big,
126    }
127
128    impl Container {
129        /// Is this a 64-bit container or not?
130        pub fn is_big(self) -> bool {
131            self == Container::Big
132        }
133    }
134
135    #[cfg(not(target_pointer_width = "64"))]
136    /// The default binary container size - either `Big` or `Little`, depending on whether the host machine's pointer size is 64 or not
137    pub const CONTAINER: Container = Container::Little;
138
139    #[cfg(target_pointer_width = "64")]
140    /// The default binary container size - either `Big` or `Little`, depending on whether the host machine's pointer size is 64 or not
141    pub const CONTAINER: Container = Container::Big;
142
143    impl Default for Container {
144        #[inline]
145        fn default() -> Self {
146            CONTAINER
147        }
148    }
149
150    #[derive(Debug, Copy, Clone, PartialEq)]
151    /// A binary parsing context, including the container size and underlying byte endianness
152    pub struct Ctx {
153        pub container: Container,
154        pub le: scroll::Endian,
155    }
156
157    impl Ctx {
158        /// Whether this binary container context is "big" or not
159        pub fn is_big(self) -> bool {
160            self.container.is_big()
161        }
162        /// Whether this binary container context is little endian or not
163        pub fn is_little_endian(self) -> bool {
164            self.le.is_little()
165        }
166        /// Create a new binary container context
167        pub fn new(container: Container, le: scroll::Endian) -> Self {
168            Ctx { container, le }
169        }
170        /// Return a dubious pointer/address byte size for the container
171        pub fn size(self) -> usize {
172            match self.container {
173                // TODO: require pointer size initialization/setting or default to container size with these values, e.g., avr pointer width will be smaller iirc
174                Container::Little => 4,
175                Container::Big => 8,
176            }
177        }
178    }
179
180    impl From<Container> for Ctx {
181        fn from(container: Container) -> Self {
182            Ctx {
183                container,
184                le: scroll::Endian::default(),
185            }
186        }
187    }
188
189    impl From<scroll::Endian> for Ctx {
190        fn from(le: scroll::Endian) -> Self {
191            Ctx {
192                container: CONTAINER,
193                le,
194            }
195        }
196    }
197
198    impl Default for Ctx {
199        #[inline]
200        fn default() -> Self {
201            Ctx {
202                container: Container::default(),
203                le: scroll::Endian::default(),
204            }
205        }
206    }
207}
208
209/// Takes a reference to the first 16 bytes of the total bytes slice and convert it to an array for `peek_bytes` to use.
210/// Returns None if bytes's length is less than 16.
211#[allow(unused)]
212fn take_hint_bytes(bytes: &[u8]) -> Option<&[u8; 16]> {
213    bytes
214        .get(0..16)
215        .and_then(|hint_bytes_slice| hint_bytes_slice.try_into().ok())
216}
217
218#[derive(Debug, Default)]
219/// Information obtained from a peek `Hint`
220pub struct HintData {
221    pub is_lsb: bool,
222    pub is_64: Option<bool>,
223}
224
225#[derive(Debug)]
226#[non_exhaustive]
227/// A hint at the underlying binary format for 16 bytes of arbitrary data
228pub enum Hint {
229    Elf(HintData),
230    Mach(HintData),
231    MachFat(usize),
232    PE,
233    TE,
234    COFF,
235    Archive,
236    Unknown(u64),
237}
238
239macro_rules! if_everything {
240    ($($i:item)*) => ($(
241        #[cfg(all(feature = "endian_fd", feature = "elf64", feature = "elf32", feature = "pe64", feature = "pe32", feature = "te", feature = "mach64", feature = "mach32", feature = "archive"))]
242        $i
243    )*)
244}
245
246if_everything! {
247
248    /// Peeks at `bytes`, and returns a `Hint`
249    pub fn peek_bytes(bytes: &[u8; 16]) -> error::Result<Hint> {
250        use scroll::{Pread, LE};
251        if &bytes[0..elf::header::SELFMAG] == elf::header::ELFMAG {
252            let class = bytes[elf::header::EI_CLASS];
253            let is_lsb = bytes[elf::header::EI_DATA] == elf::header::ELFDATA2LSB;
254            let is_64 =
255                if class == elf::header::ELFCLASS64 {
256                    Some (true)
257                } else if class == elf::header::ELFCLASS32 {
258                    Some (false)
259                } else { None };
260
261            Ok(Hint::Elf(HintData { is_lsb, is_64 }))
262        } else if &bytes[0..archive::SIZEOF_MAGIC] == archive::MAGIC {
263            Ok(Hint::Archive)
264        } else {
265            match *&bytes[0..2].pread_with::<u16>(0, LE)? {
266                pe::header::DOS_MAGIC => Ok(Hint::PE),
267                pe::header::TE_MAGIC => Ok(Hint::TE),
268                pe::header::COFF_MACHINE_X86 |
269                pe::header::COFF_MACHINE_X86_64 |
270                pe::header::COFF_MACHINE_ARM64 => Ok(Hint::COFF),
271                _ => mach::peek_bytes(bytes)
272            }
273        }
274    }
275
276    /// Peeks at the underlying Read object. Requires the underlying bytes to have at least 16 byte length. Resets the seek to `Start` after reading.
277    #[cfg(feature = "std")]
278    pub fn peek<R: ::std::io::Read + ::std::io::Seek>(fd: &mut R) -> error::Result<Hint> {
279        use std::io::SeekFrom;
280        let mut bytes = [0u8; 16];
281        fd.seek(SeekFrom::Start(0))?;
282        fd.read_exact(&mut bytes)?;
283        fd.seek(SeekFrom::Start(0))?;
284        peek_bytes(&bytes)
285    }
286
287    #[derive(Debug)]
288    #[allow(clippy::large_enum_variant)]
289    #[non_exhaustive]
290    /// A parseable object that goblin understands
291    pub enum Object<'a> {
292        /// An ELF32/ELF64!
293        Elf(elf::Elf<'a>),
294        /// A PE32/PE32+!
295        PE(pe::PE<'a>),
296        /// A TE!
297        TE(pe::TE<'a>),
298        /// A COFF
299        COFF(pe::Coff<'a>),
300        /// A 32/64-bit Mach-o binary _OR_ it is a multi-architecture binary container!
301        Mach(mach::Mach<'a>),
302        /// A Unix archive
303        Archive(archive::Archive<'a>),
304        /// None of the above, with the given magic value
305        Unknown(u64),
306    }
307
308    impl<'a> Object<'a> {
309        /// Tries to parse an `Object` from `bytes`
310        pub fn parse(bytes: &[u8]) -> error::Result<Object> {
311            if let Some(hint_bytes) = take_hint_bytes(bytes) {
312                match peek_bytes(hint_bytes)? {
313                    Hint::Elf(_) => Ok(Object::Elf(elf::Elf::parse(bytes)?)),
314                    Hint::Mach(_) | Hint::MachFat(_) => Ok(Object::Mach(mach::Mach::parse(bytes)?)),
315                    Hint::Archive => Ok(Object::Archive(archive::Archive::parse(bytes)?)),
316                    Hint::PE => Ok(Object::PE(pe::PE::parse(bytes)?)),
317                    Hint::TE => Ok(Object::TE(pe::TE::parse(bytes)?)),
318                    Hint::COFF => Ok(Object::COFF(pe::Coff::parse(bytes)?)),
319                    Hint::Unknown(magic) => Ok(Object::Unknown(magic)),
320                }
321            } else {
322                Err(error::Error::Malformed(format!("Object is too small.")))
323            }
324        }
325    }
326} // end if_endian_fd
327
328/////////////////////////
329// Binary Modules
330/////////////////////////
331
332#[cfg(any(feature = "elf64", feature = "elf32"))]
333#[macro_use]
334pub mod elf;
335
336#[cfg(feature = "elf32")]
337/// The ELF 32-bit struct definitions and associated values, re-exported for easy "type-punning"
338pub mod elf32 {
339    pub use crate::elf::dynamic::dyn32 as dynamic;
340    pub use crate::elf::header::header32 as header;
341    pub use crate::elf::note::Nhdr32 as Note;
342    pub use crate::elf::program_header::program_header32 as program_header;
343    pub use crate::elf::reloc::reloc32 as reloc;
344    pub use crate::elf::section_header::section_header32 as section_header;
345    pub use crate::elf::sym::sym32 as sym;
346
347    pub mod gnu_hash {
348        pub use crate::elf::gnu_hash::hash;
349        elf_gnu_hash_impl!(u32);
350    }
351}
352
353#[cfg(feature = "elf64")]
354/// The ELF 64-bit struct definitions and associated values, re-exported for easy "type-punning"
355pub mod elf64 {
356    pub use crate::elf::dynamic::dyn64 as dynamic;
357    pub use crate::elf::header::header64 as header;
358    pub use crate::elf::note::Nhdr64 as Note;
359    pub use crate::elf::program_header::program_header64 as program_header;
360    pub use crate::elf::reloc::reloc64 as reloc;
361    pub use crate::elf::section_header::section_header64 as section_header;
362    pub use crate::elf::sym::sym64 as sym;
363
364    pub mod gnu_hash {
365        pub use crate::elf::gnu_hash::hash;
366        elf_gnu_hash_impl!(u64);
367    }
368}
369
370#[cfg(any(feature = "mach32", feature = "mach64"))]
371pub mod mach;
372
373#[cfg(any(feature = "pe32", feature = "pe64"))]
374pub mod pe;
375
376#[cfg(feature = "archive")]
377pub mod archive;
378
379#[cfg(test)]
380mod tests {
381    use super::*;
382    if_everything! {
383        #[test]
384        fn take_hint_bytes_long_enough() {
385            let bytes_array = [1; 32];
386            let bytes = &bytes_array[..];
387            assert!(take_hint_bytes(bytes).is_some())
388        }
389
390        #[test]
391        fn take_hint_bytes_not_long_enough() {
392            let bytes_array = [1; 8];
393            let bytes = &bytes_array[..];
394            assert!(take_hint_bytes(bytes).is_none())
395        }
396    }
397}