Skip to main content

bias_goblin/
lib.rs

1//! # libgoblin
2//!
3//! ![say the right
4//! words](https://s-media-cache-ak0.pinimg.com/736x/1b/6a/aa/1b6aaa2bae005e2fed84b1a7c32ecb1b.jpg)
5//!
6//! `libgoblin` is a cross-platform trifecta of binary parsing and loading fun.  It supports:
7//!
8//! * An ELF32/64 parser, and raw C structs
9//! * A 32/64-bit, zero-copy, endian aware, Mach-o parser, and raw C structs
10//! * A PE32/PE32+ (64-bit) parser, and raw C structs
11//! * A Unix archive parser and loader
12//!
13//! Goblin requires at least `rustc` 1.36.0, uses the 2018 rust edition, and is developed on stable.
14//!
15//! Goblin primarily supports the following important use cases:
16//!
17//! 1. Core, std-free `#[repr(C)]` structs, tiny compile time, 32/64 (or both) at your leisure
18//!
19//! 2. Type punning. Define a function once on a type, but have it work on 32 or 64-bit variants - without really changing anything, and no macros! See `examples/automagic.rs` for a basic example.
20//!
21//! 3. `std` mode. This throws in read and write impls via `Pread` and `Pwrite`, reading from file, convenience allocations, extra methods, etc. This is for clients who can allocate and want to read binaries off disk.
22//!
23//! 4. `Endian_fd`. A truly terrible name :laughing: this is for binary analysis like in [panopticon](https://github.com/das-labor/panopticon) which needs to read binaries of foreign endianness, _or_ as a basis for constructing cross platform foreign architecture binutils, e.g. [cargo-sym](https://github.com/m4b/cargo-sym) and [bingrep](https://github.com/m4b/bingrep) are simple examples of this, but the sky is the limit.
24//!
25//! # Example
26//!
27//! ```rust
28//! use goblin::{error, Object};
29//! use std::path::Path;
30//! use std::env;
31//! use std::fs;
32//!
33//! fn run () -> error::Result<()> {
34//!     for (i, arg) in env::args().enumerate() {
35//!         if i == 1 {
36//!             let path = Path::new(arg.as_str());
37//!             let buffer = fs::read(path)?;
38//!             match Object::parse(&buffer)? {
39//!                 Object::Elf(elf) => {
40//!                     println!("elf: {:#?}", &elf);
41//!                 },
42//!                 Object::PE(pe) => {
43//!                     println!("pe: {:#?}", &pe);
44//!                 },
45//!                 Object::TE(te) => {
46//!                     println!("te: {:#?}", &te);
47//!                 },
48//!                 Object::Mach(mach) => {
49//!                     println!("mach: {:#?}", &mach);
50//!                 },
51//!                 Object::Archive(archive) => {
52//!                     println!("archive: {:#?}", &archive);
53//!                 },
54//!                 Object::Unknown(magic) => { println!("unknown magic: {:#x}", magic) }
55//!             }
56//!         }
57//!     }
58//!     Ok(())
59//! }
60//! ```
61//!
62//! # Feature Usage
63//!
64//! `libgoblin` is engineered to be tailored towards very different use-case scenarios, for example:
65//!
66//! * a no-std mode; just simply set default features to false
67//! * a endian aware parsing and reading
68//! * for binary loaders which don't require this, simply use `elf32` and `elf64` (and `std` of course)
69//!
70//! For example, if you are writing a 64-bit kernel, or just want a barebones C-like
71//! header interface which defines the structures, just select `elf64`, `--cfg
72//! feature=\"elf64\"`, which will compile without `std`.
73//!
74//! Similarly, if you want to use host endianness loading via the various `from_fd` methods, `--cfg
75//! feature=\"std\"`, which will not use the `byteorder` extern crate, and read the bytes
76//! from disk in the endianness of the host machine.
77//!
78//! If you want endian aware reading, and you don't use `default`, then you need to opt in as normal
79//! via `endian_fd`
80
81#![cfg_attr(not(feature = "std"), no_std)]
82
83#[cfg(feature = "std")]
84extern crate core;
85
86#[cfg(feature = "alloc")]
87#[macro_use]
88extern crate alloc;
89
90/////////////////////////
91// Misc/Helper Modules
92/////////////////////////
93
94#[allow(unused)]
95macro_rules! if_std {
96    ($($i:item)*) => ($(
97        #[cfg(feature = "std")]
98        $i
99    )*)
100}
101
102#[allow(unused)]
103macro_rules! if_alloc {
104    ($($i:item)*) => ($(
105        #[cfg(feature = "alloc")]
106        $i
107    )*)
108}
109
110#[cfg(feature = "alloc")]
111pub mod error;
112
113pub mod strtab;
114
115/// Binary container size information and byte-order context
116pub mod container {
117    pub use scroll::Endian;
118
119    #[derive(Debug, Copy, Clone, PartialEq)]
120    /// The size of a binary container
121    pub enum Container {
122        Little,
123        Big,
124    }
125
126    impl Container {
127        /// Is this a 64-bit container or not?
128        pub fn is_big(self) -> bool {
129            self == Container::Big
130        }
131    }
132
133    #[cfg(not(target_pointer_width = "64"))]
134    /// The default binary container size - either `Big` or `Little`, depending on whether the host machine's pointer size is 64 or not
135    pub const CONTAINER: Container = Container::Little;
136
137    #[cfg(target_pointer_width = "64")]
138    /// The default binary container size - either `Big` or `Little`, depending on whether the host machine's pointer size is 64 or not
139    pub const CONTAINER: Container = Container::Big;
140
141    impl Default for Container {
142        #[inline]
143        fn default() -> Self {
144            CONTAINER
145        }
146    }
147
148    #[derive(Debug, Copy, Clone, PartialEq)]
149    /// A binary parsing context, including the container size and underlying byte endianness
150    pub struct Ctx {
151        pub container: Container,
152        pub le: scroll::Endian,
153    }
154
155    impl Ctx {
156        /// Whether this binary container context is "big" or not
157        pub fn is_big(self) -> bool {
158            self.container.is_big()
159        }
160        /// Whether this binary container context is little endian or not
161        pub fn is_little_endian(self) -> bool {
162            self.le.is_little()
163        }
164        /// Create a new binary container context
165        pub fn new(container: Container, le: scroll::Endian) -> Self {
166            Ctx { container, le }
167        }
168        /// Return a dubious pointer/address byte size for the container
169        pub fn size(self) -> usize {
170            match self.container {
171                // TODO: require pointer size initialization/setting or default to container size with these values, e.g., avr pointer width will be smaller iirc
172                Container::Little => 4,
173                Container::Big => 8,
174            }
175        }
176    }
177
178    impl From<Container> for Ctx {
179        fn from(container: Container) -> Self {
180            Ctx {
181                container,
182                le: scroll::Endian::default(),
183            }
184        }
185    }
186
187    impl From<scroll::Endian> for Ctx {
188        fn from(le: scroll::Endian) -> Self {
189            Ctx {
190                container: CONTAINER,
191                le,
192            }
193        }
194    }
195
196    impl Default for Ctx {
197        #[inline]
198        fn default() -> Self {
199            Ctx {
200                container: Container::default(),
201                le: scroll::Endian::default(),
202            }
203        }
204    }
205}
206
207macro_rules! if_everything {
208    ($($i:item)*) => ($(
209        #[cfg(all(feature = "endian_fd", feature = "elf64", feature = "elf32", feature = "pe64", feature = "pe32", feature = "mach64", feature = "mach32", feature = "archive"))]
210        $i
211    )*)
212}
213
214if_everything! {
215
216    #[derive(Debug, Default)]
217    /// Information obtained from a peek `Hint`
218    pub struct HintData {
219        pub is_lsb: bool,
220        pub is_64: Option<bool>,
221    }
222
223    #[derive(Debug)]
224    /// A hint at the underlying binary format for 16 bytes of arbitrary data
225    pub enum Hint {
226        Elf(HintData),
227        Mach(HintData),
228        MachFat(usize),
229        PE,
230        TE,
231        Archive,
232        Unknown(u64),
233    }
234
235    /// Peeks at `bytes`, and returns a `Hint`
236    pub fn peek_bytes(bytes: &[u8; 16]) -> error::Result<Hint> {
237        use scroll::{Pread, LE, BE};
238        use crate::mach::{fat, header};
239        if &bytes[0..elf::header::SELFMAG] == elf::header::ELFMAG {
240            let class = bytes[elf::header::EI_CLASS];
241            let is_lsb = bytes[elf::header::EI_DATA] == elf::header::ELFDATA2LSB;
242            let is_64 =
243                if class == elf::header::ELFCLASS64 {
244                    Some (true)
245                } else if class == elf::header::ELFCLASS32 {
246                    Some (false)
247                } else { None };
248
249            Ok(Hint::Elf(HintData { is_lsb, is_64 }))
250        } else if &bytes[0..archive::SIZEOF_MAGIC] == archive::MAGIC {
251            Ok(Hint::Archive)
252        } else if (&bytes[0..2]).pread_with::<u16>(0, LE)? == pe::header::DOS_MAGIC {
253            Ok(Hint::PE)
254        } else if (&bytes[0..2]).pread_with::<u16>(0, LE)? == te::header::TE_MAGIC {
255            Ok(Hint::TE)
256        } else {
257            let (magic, maybe_ctx) = mach::parse_magic_and_ctx(bytes, 0)?;
258            match magic {
259                fat::FAT_MAGIC => {
260                    // should probably verify this is always Big Endian...
261                    let narchitectures = bytes.pread_with::<u32>(4, BE)? as usize;
262                    Ok(Hint::MachFat(narchitectures))
263                },
264                header::MH_CIGAM_64 | header::MH_CIGAM | header::MH_MAGIC_64 | header::MH_MAGIC => {
265                    if let Some(ctx) = maybe_ctx {
266                        Ok(Hint::Mach(HintData { is_lsb: ctx.le.is_little(), is_64: Some(ctx.container.is_big()) }))
267                    } else {
268                        Err(error::Error::Malformed(format!("Correct mach magic {:#x} does not have a matching parsing context!", magic)))
269                    }
270                },
271                // its something else
272                _ => Ok(Hint::Unknown(bytes.pread::<u64>(0)?))
273            }
274        }
275    }
276
277    /// Peeks at the underlying Read object. Requires the underlying bytes to have at least 16 byte length. Resets the seek to `Start` after reading.
278    #[cfg(feature = "std")]
279    pub fn peek<R: ::std::io::Read + ::std::io::Seek>(fd: &mut R) -> error::Result<Hint> {
280        use std::io::SeekFrom;
281        let mut bytes = [0u8; 16];
282        fd.seek(SeekFrom::Start(0))?;
283        fd.read_exact(&mut bytes)?;
284        fd.seek(SeekFrom::Start(0))?;
285        peek_bytes(&bytes)
286    }
287
288    /// Takes a reference to the first 16 bytes of the total bytes slice and convert it to an array for `peek_bytes` to use.
289    /// Returns None if bytes's length is less than 16.
290    fn take_hint_bytes(bytes: &[u8]) -> Option<&[u8; 16]> {
291        use core::convert::TryInto;
292        bytes.get(0..16)
293            .and_then(|hint_bytes_slice| {
294                hint_bytes_slice.try_into().ok()
295            })
296    }
297
298    #[derive(Debug)]
299    #[allow(clippy::large_enum_variant)]
300    /// A parseable object that goblin understands
301    pub enum Object<'a> {
302        /// An ELF32/ELF64!
303        Elf(elf::Elf<'a>),
304        /// A PE32/PE32+!
305        PE(pe::PE<'a>),
306        /// A TE!
307        TE(te::TE<'a>),
308        /// A 32/64-bit Mach-o binary _OR_ it is a multi-architecture binary container!
309        Mach(mach::Mach<'a>),
310        /// A Unix archive
311        Archive(archive::Archive<'a>),
312        /// None of the above, with the given magic value
313        Unknown(u64),
314    }
315
316    impl<'a> Object<'a> {
317        /// Tries to parse an `Object` from `bytes`
318        pub fn parse(bytes: &[u8]) -> error::Result<Object> {
319            if let Some(hint_bytes) = take_hint_bytes(bytes) {
320                match peek_bytes(hint_bytes)? {
321                    Hint::Elf(_) => Ok(Object::Elf(elf::Elf::parse(bytes)?)),
322                    Hint::Mach(_) | Hint::MachFat(_) => Ok(Object::Mach(mach::Mach::parse(bytes)?)),
323                    Hint::Archive => Ok(Object::Archive(archive::Archive::parse(bytes)?)),
324                    Hint::PE => Ok(Object::PE(pe::PE::parse(bytes)?)),
325                    Hint::TE => Ok(Object::TE(te::TE::parse(bytes)?)),
326                    Hint::Unknown(magic) => Ok(Object::Unknown(magic))
327                }
328            } else {
329                Err(error::Error::Malformed(format!("Object is too small.")))
330            }
331        }
332    }
333} // end if_endian_fd
334
335/////////////////////////
336// Binary Modules
337/////////////////////////
338
339#[cfg(any(feature = "elf64", feature = "elf32"))]
340#[macro_use]
341pub mod elf;
342
343#[cfg(feature = "elf32")]
344/// The ELF 32-bit struct definitions and associated values, re-exported for easy "type-punning"
345pub mod elf32 {
346    pub use crate::elf::dynamic::dyn32 as dynamic;
347    pub use crate::elf::header::header32 as header;
348    pub use crate::elf::note::Nhdr32 as Note;
349    pub use crate::elf::program_header::program_header32 as program_header;
350    pub use crate::elf::reloc::reloc32 as reloc;
351    pub use crate::elf::section_header::section_header32 as section_header;
352    pub use crate::elf::sym::sym32 as sym;
353
354    pub mod gnu_hash {
355        pub use crate::elf::gnu_hash::hash;
356        elf_gnu_hash_impl!(u32);
357    }
358}
359
360#[cfg(feature = "elf64")]
361/// The ELF 64-bit struct definitions and associated values, re-exported for easy "type-punning"
362pub mod elf64 {
363    pub use crate::elf::dynamic::dyn64 as dynamic;
364    pub use crate::elf::header::header64 as header;
365    pub use crate::elf::note::Nhdr64 as Note;
366    pub use crate::elf::program_header::program_header64 as program_header;
367    pub use crate::elf::reloc::reloc64 as reloc;
368    pub use crate::elf::section_header::section_header64 as section_header;
369    pub use crate::elf::sym::sym64 as sym;
370
371    pub mod gnu_hash {
372        pub use crate::elf::gnu_hash::hash;
373        elf_gnu_hash_impl!(u64);
374    }
375}
376
377#[cfg(any(feature = "mach32", feature = "mach64"))]
378pub mod mach;
379
380#[cfg(any(feature = "pe32", feature = "pe64"))]
381pub mod pe;
382
383#[cfg(feature = "te")]
384pub mod te;
385
386#[cfg(feature = "archive")]
387pub mod archive;
388
389#[cfg(test)]
390mod tests {
391    use super::*;
392    if_everything! {
393        #[test]
394        fn take_hint_bytes_long_enough() {
395            let bytes_array = [1; 32];
396            let bytes = &bytes_array[..];
397            assert!(take_hint_bytes(bytes).is_some())
398        }
399
400        #[test]
401        fn take_hint_bytes_not_long_enough() {
402            let bytes_array = [1; 8];
403            let bytes = &bytes_array[..];
404            assert!(take_hint_bytes(bytes).is_none())
405        }
406    }
407}