goblin/
lib.rs

1//! # libgoblin
2//!
3//! ![say the right
4//! words](https://s-media-cache-ak0.pinimg.com/736x/1b/6a/aa/1b6aaa2bae005e2fed84b1a7c32ecb1b.jpg)
5//!
6//! `libgoblin` is a cross-platform trifecta of binary parsing and loading fun.  It supports:
7//!
8//! * An ELF32/64 parser, and raw C structs
9//! * A 32/64-bit, zero-copy, endian aware, Mach-o parser, and raw C structs
10//! * A PE32/PE32+ (64-bit) parser, and raw C structs
11//! * A Unix archive parser and loader
12//!
13//! Goblin requires at least `rustc` 1.36.0, uses the 2018 rust edition, and is developed on stable.
14//!
15//! Goblin primarily supports the following important use cases:
16//!
17//! 1. Core, std-free `#[repr(C)]` structs, tiny compile time, 32/64 (or both) at your leisure
18//!
19//! 2. Type punning. Define a function once on a type, but have it work on 32 or 64-bit variants - without really changing anything, and no macros! See `examples/automagic.rs` for a basic example.
20//!
21//! 3. `std` mode. This throws in read and write impls via `Pread` and `Pwrite`, reading from file, convenience allocations, extra methods, etc. This is for clients who can allocate and want to read binaries off disk.
22//!
23//! 4. `Endian_fd`. A truly terrible name :laughing: this is for binary analysis like in [panopticon](https://github.com/das-labor/panopticon) which needs to read binaries of foreign endianness, _or_ as a basis for constructing cross platform foreign architecture binutils, e.g. [cargo-sym](https://github.com/m4b/cargo-sym) and [bingrep](https://github.com/m4b/bingrep) are simple examples of this, but the sky is the limit.
24//!
25//! # Example
26//!
27//! ```rust
28//! use goblin::{error, Object};
29//! use std::path::Path;
30//! use std::env;
31//! use std::fs;
32//!
33//! fn run () -> error::Result<()> {
34//!     for (i, arg) in env::args().enumerate() {
35//!         if i == 1 {
36//!             let path = Path::new(arg.as_str());
37//!             let buffer = fs::read(path)?;
38//!             match Object::parse(&buffer)? {
39//!                 Object::Elf(elf) => {
40//!                     println!("elf: {:#?}", &elf);
41//!                 },
42//!                 Object::PE(pe) => {
43//!                     println!("pe: {:#?}", &pe);
44//!                 },
45//!                 Object::COFF(coff) => {
46//!                     println!("coff: {:#?}", &coff);
47//!                 },
48//!                 Object::Mach(mach) => {
49//!                     println!("mach: {:#?}", &mach);
50//!                 },
51//!                 Object::Archive(archive) => {
52//!                     println!("archive: {:#?}", &archive);
53//!                 },
54//!                 Object::Unknown(magic) => { println!("unknown magic: {:#x}", magic) },
55//!                 _ => { }
56//!             }
57//!         }
58//!     }
59//!     Ok(())
60//! }
61//! ```
62//!
63//! # Feature Usage
64//!
65//! `libgoblin` is engineered to be tailored towards very different use-case scenarios, for example:
66//!
67//! * a no-std mode; just simply set default features to false
68//! * a endian aware parsing and reading
69//! * for binary loaders which don't require this, simply use `elf32` and `elf64` (and `std` of course)
70//!
71//! For example, if you are writing a 64-bit kernel, or just want a barebones C-like
72//! header interface which defines the structures, just select `elf64`, `--cfg
73//! feature=\"elf64\"`, which will compile without `std`.
74//!
75//! Similarly, if you want to use host endianness loading via the various `from_fd` methods, `--cfg
76//! feature=\"std\"`, which will not use the `byteorder` extern crate, and read the bytes
77//! from disk in the endianness of the host machine.
78//!
79//! If you want endian aware reading, and you don't use `default`, then you need to opt in as normal
80//! via `endian_fd`
81
82#![cfg_attr(not(feature = "std"), no_std)]
83
84#[cfg(feature = "std")]
85extern crate core;
86
87#[cfg(feature = "alloc")]
88#[macro_use]
89extern crate alloc;
90
91/////////////////////////
92// Misc/Helper Modules
93/////////////////////////
94
95#[allow(unused)]
96macro_rules! if_std {
97    ($($i:item)*) => ($(
98        #[cfg(feature = "std")]
99        $i
100    )*)
101}
102
103#[allow(unused)]
104macro_rules! if_alloc {
105    ($($i:item)*) => ($(
106        #[cfg(feature = "alloc")]
107        $i
108    )*)
109}
110
111#[cfg(feature = "alloc")]
112pub mod error;
113
114pub mod strtab;
115
116/// Binary container size information and byte-order context
117pub mod container {
118    pub use scroll::Endian;
119
120    #[derive(Debug, Copy, Clone, PartialEq)]
121    /// The size of a binary container
122    pub enum Container {
123        Little,
124        Big,
125    }
126
127    impl Container {
128        /// Is this a 64-bit container or not?
129        pub fn is_big(self) -> bool {
130            self == Container::Big
131        }
132    }
133
134    #[cfg(not(target_pointer_width = "64"))]
135    /// The default binary container size - either `Big` or `Little`, depending on whether the host machine's pointer size is 64 or not
136    pub const CONTAINER: Container = Container::Little;
137
138    #[cfg(target_pointer_width = "64")]
139    /// The default binary container size - either `Big` or `Little`, depending on whether the host machine's pointer size is 64 or not
140    pub const CONTAINER: Container = Container::Big;
141
142    impl Default for Container {
143        #[inline]
144        fn default() -> Self {
145            CONTAINER
146        }
147    }
148
149    #[derive(Debug, Copy, Clone, PartialEq)]
150    /// A binary parsing context, including the container size and underlying byte endianness
151    pub struct Ctx {
152        pub container: Container,
153        pub le: scroll::Endian,
154    }
155
156    impl Ctx {
157        /// Whether this binary container context is "big" or not
158        pub fn is_big(self) -> bool {
159            self.container.is_big()
160        }
161        /// Whether this binary container context is little endian or not
162        pub fn is_little_endian(self) -> bool {
163            self.le.is_little()
164        }
165        /// Create a new binary container context
166        pub fn new(container: Container, le: scroll::Endian) -> Self {
167            Ctx { container, le }
168        }
169        /// Return a dubious pointer/address byte size for the container
170        pub fn size(self) -> usize {
171            match self.container {
172                // TODO: require pointer size initialization/setting or default to container size with these values, e.g., avr pointer width will be smaller iirc
173                Container::Little => 4,
174                Container::Big => 8,
175            }
176        }
177    }
178
179    impl From<Container> for Ctx {
180        fn from(container: Container) -> Self {
181            Ctx {
182                container,
183                le: scroll::Endian::default(),
184            }
185        }
186    }
187
188    impl From<scroll::Endian> for Ctx {
189        fn from(le: scroll::Endian) -> Self {
190            Ctx {
191                container: CONTAINER,
192                le,
193            }
194        }
195    }
196
197    impl Default for Ctx {
198        #[inline]
199        fn default() -> Self {
200            Ctx {
201                container: Container::default(),
202                le: scroll::Endian::default(),
203            }
204        }
205    }
206}
207
208/// Takes a reference to the first 16 bytes of the total bytes slice and convert it to an array for `peek_bytes` to use.
209/// Returns None if bytes's length is less than 16.
210#[allow(unused)]
211fn take_hint_bytes(bytes: &[u8]) -> Option<&[u8; 16]> {
212    bytes
213        .get(0..16)
214        .and_then(|hint_bytes_slice| hint_bytes_slice.try_into().ok())
215}
216
217#[derive(Debug, Default)]
218/// Information obtained from a peek `Hint`
219pub struct HintData {
220    pub is_lsb: bool,
221    pub is_64: Option<bool>,
222}
223
224#[derive(Debug)]
225#[non_exhaustive]
226/// A hint at the underlying binary format for 16 bytes of arbitrary data
227pub enum Hint {
228    Elf(HintData),
229    Mach(HintData),
230    MachFat(usize),
231    PE,
232    TE,
233    COFF,
234    Archive,
235    Unknown(u64),
236}
237
238macro_rules! if_everything {
239    ($($i:item)*) => ($(
240        #[cfg(all(feature = "endian_fd", feature = "elf64", feature = "elf32", feature = "pe64", feature = "pe32", feature = "te", feature = "mach64", feature = "mach32", feature = "archive"))]
241        $i
242    )*)
243}
244
245if_everything! {
246
247    /// Peeks at `bytes`, and returns a `Hint`
248    pub fn peek_bytes(bytes: &[u8; 16]) -> error::Result<Hint> {
249        use scroll::{Pread, LE};
250        if &bytes[0..elf::header::SELFMAG] == elf::header::ELFMAG {
251            let class = bytes[elf::header::EI_CLASS];
252            let is_lsb = bytes[elf::header::EI_DATA] == elf::header::ELFDATA2LSB;
253            let is_64 =
254                if class == elf::header::ELFCLASS64 {
255                    Some (true)
256                } else if class == elf::header::ELFCLASS32 {
257                    Some (false)
258                } else { None };
259
260            Ok(Hint::Elf(HintData { is_lsb, is_64 }))
261        } else if &bytes[0..archive::SIZEOF_MAGIC] == archive::MAGIC {
262            Ok(Hint::Archive)
263        } else {
264            match *&bytes[0..2].pread_with::<u16>(0, LE)? {
265                pe::header::DOS_MAGIC => Ok(Hint::PE),
266                pe::header::TE_MAGIC => Ok(Hint::TE),
267                pe::header::COFF_MACHINE_X86 |
268                pe::header::COFF_MACHINE_X86_64 |
269                pe::header::COFF_MACHINE_ARM64 => Ok(Hint::COFF),
270                _ => mach::peek_bytes(bytes)
271            }
272        }
273    }
274
275    /// Peeks at the underlying Read object. Requires the underlying bytes to have at least 16 byte length. Resets the seek to `Start` after reading.
276    #[cfg(feature = "std")]
277    pub fn peek<R: ::std::io::Read + ::std::io::Seek>(fd: &mut R) -> error::Result<Hint> {
278        use std::io::SeekFrom;
279        let mut bytes = [0u8; 16];
280        fd.seek(SeekFrom::Start(0))?;
281        fd.read_exact(&mut bytes)?;
282        fd.seek(SeekFrom::Start(0))?;
283        peek_bytes(&bytes)
284    }
285
286    #[derive(Debug)]
287    #[allow(clippy::large_enum_variant)]
288    #[non_exhaustive]
289    /// A parseable object that goblin understands
290    pub enum Object<'a> {
291        /// An ELF32/ELF64!
292        Elf(elf::Elf<'a>),
293        /// A PE32/PE32+!
294        PE(pe::PE<'a>),
295        /// A TE!
296        TE(pe::TE<'a>),
297        /// A COFF
298        COFF(pe::Coff<'a>),
299        /// A 32/64-bit Mach-o binary _OR_ it is a multi-architecture binary container!
300        Mach(mach::Mach<'a>),
301        /// A Unix archive
302        Archive(archive::Archive<'a>),
303        /// None of the above, with the given magic value
304        Unknown(u64),
305    }
306
307    impl<'a> Object<'a> {
308        /// Tries to parse an `Object` from `bytes`
309        pub fn parse(bytes: &[u8]) -> error::Result<Object> {
310            if let Some(hint_bytes) = take_hint_bytes(bytes) {
311                match peek_bytes(hint_bytes)? {
312                    Hint::Elf(_) => Ok(Object::Elf(elf::Elf::parse(bytes)?)),
313                    Hint::Mach(_) | Hint::MachFat(_) => Ok(Object::Mach(mach::Mach::parse(bytes)?)),
314                    Hint::Archive => Ok(Object::Archive(archive::Archive::parse(bytes)?)),
315                    Hint::PE => Ok(Object::PE(pe::PE::parse(bytes)?)),
316                    Hint::TE => Ok(Object::TE(pe::TE::parse(bytes)?)),
317                    Hint::COFF => Ok(Object::COFF(pe::Coff::parse(bytes)?)),
318                    Hint::Unknown(magic) => Ok(Object::Unknown(magic)),
319                }
320            } else {
321                Err(error::Error::Malformed(format!("Object is too small.")))
322            }
323        }
324    }
325} // end if_endian_fd
326
327/////////////////////////
328// Binary Modules
329/////////////////////////
330
331#[cfg(any(feature = "elf64", feature = "elf32"))]
332#[macro_use]
333pub mod elf;
334
335#[cfg(feature = "elf32")]
336/// The ELF 32-bit struct definitions and associated values, re-exported for easy "type-punning"
337pub mod elf32 {
338    pub use crate::elf::dynamic::dyn32 as dynamic;
339    pub use crate::elf::header::header32 as header;
340    pub use crate::elf::note::Nhdr32 as Note;
341    pub use crate::elf::program_header::program_header32 as program_header;
342    pub use crate::elf::reloc::reloc32 as reloc;
343    pub use crate::elf::section_header::section_header32 as section_header;
344    pub use crate::elf::sym::sym32 as sym;
345
346    pub mod gnu_hash {
347        pub use crate::elf::gnu_hash::hash;
348        elf_gnu_hash_impl!(u32);
349    }
350}
351
352#[cfg(feature = "elf64")]
353/// The ELF 64-bit struct definitions and associated values, re-exported for easy "type-punning"
354pub mod elf64 {
355    pub use crate::elf::dynamic::dyn64 as dynamic;
356    pub use crate::elf::header::header64 as header;
357    pub use crate::elf::note::Nhdr64 as Note;
358    pub use crate::elf::program_header::program_header64 as program_header;
359    pub use crate::elf::reloc::reloc64 as reloc;
360    pub use crate::elf::section_header::section_header64 as section_header;
361    pub use crate::elf::sym::sym64 as sym;
362
363    pub mod gnu_hash {
364        pub use crate::elf::gnu_hash::hash;
365        elf_gnu_hash_impl!(u64);
366    }
367}
368
369#[cfg(any(feature = "mach32", feature = "mach64"))]
370pub mod mach;
371
372#[cfg(any(feature = "pe32", feature = "pe64"))]
373pub mod pe;
374
375#[cfg(feature = "archive")]
376pub mod archive;
377
378#[cfg(test)]
379mod tests {
380    use super::*;
381    if_everything! {
382        #[test]
383        fn take_hint_bytes_long_enough() {
384            let bytes_array = [1; 32];
385            let bytes = &bytes_array[..];
386            assert!(take_hint_bytes(bytes).is_some())
387        }
388
389        #[test]
390        fn take_hint_bytes_not_long_enough() {
391            let bytes_array = [1; 8];
392            let bytes = &bytes_array[..];
393            assert!(take_hint_bytes(bytes).is_none())
394        }
395    }
396}