1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
//! # libgoblin
//!
//! ![say the right
//! words](https://s-media-cache-ak0.pinimg.com/736x/1b/6a/aa/1b6aaa2bae005e2fed84b1a7c32ecb1b.jpg)
//!
//! `libgoblin` is a cross-platform trifecta of binary parsing and loading fun.  It supports:
//!
//! * An ELF32/64 parser, and raw C structs
//! * A 32/64-bit, zero-copy, endian aware, Mach-o parser, and raw C structs
//! * A PE32/PE32+ (64-bit) parser, and raw C structs
//! * A Unix archive parser and loader
//!
//! Goblin _should_ require at least `rustc` 1.16, but is developed on stable.
//!
//! Goblin primarily supports the following important use cases:
//!
//! 1. Core, std-free `#[repr(C)]` structs, tiny compile time, 32/64 (or both) at your leisure
//!
//! 2. Type punning. Define a function once on a type, but have it work on 32 or 64-bit variants - without really changing anything, and no macros! See `examples/automagic.rs` for a basic example.
//!
//! 3. `std` mode. This throws in read and write impls via `Pread` and `Pwrite`, reading from file, convenience allocations, extra methods, etc. This is for clients who can allocate and want to read binaries off disk.
//!
//! 4. `Endian_fd`. A truly terrible name :laughing: this is for binary analysis like in [panopticon](https://github.com/das-labor/panopticon) which needs to read binaries of foreign endianness, _or_ as a basis for constructing cross platform foreign architecture binutils, e.g. [cargo-sym](https://github.com/m4b/cargo-sym) and [bingrep](https://github.com/m4b/bingrep) are simple examples of this, but the sky is the limit.
//!
//! # Example
//!
//! ```rust
//! use goblin::{error, Object};
//! use std::path::Path;
//! use std::env;
//! use std::fs::File;
//! use std::io::Read;
//!
//! fn run () -> error::Result<()> {
//!     for (i, arg) in env::args().enumerate() {
//!         if i == 1 {
//!             let path = Path::new(arg.as_str());
//!             let mut fd = File::open(path)?;
//!             let mut buffer = Vec::new();
//!             fd.read_to_end(&mut buffer)?;
//!             match Object::parse(&buffer)? {
//!                 Object::Elf(elf) => {
//!                     println!("elf: {:#?}", &elf);
//!                 },
//!                 Object::PE(pe) => {
//!                     println!("pe: {:#?}", &pe);
//!                 },
//!                 Object::Mach(mach) => {
//!                     println!("mach: {:#?}", &mach);
//!                 },
//!                 Object::Archive(archive) => {
//!                     println!("archive: {:#?}", &archive);
//!                 },
//!                 Object::Unknown(magic) => { println!("unknown magic: {:#x}", magic) }
//!             }
//!         }
//!     }
//!     Ok(())
//! }
//! ```
//!
//! # Feature Usage
//!
//! `libgoblin` is engineered to be tailored towards very different use-case scenarios, for example:
//!
//! * a no-std mode; just simply set default features to false
//! * a endian aware parsing and reading
//! * for binary loaders which don't require this, simply use `elf32` and `elf64` (and `std` of course)
//!
//! For example, if you are writing a 64-bit kernel, or just want a barebones C-like
//! header interface which defines the structures, just select `elf64`, `--cfg
//! feature=\"elf64\"`, which will compile without `std`.
//!
//! Similarly, if you want to use host endianness loading via the various `from_fd` methods, `--cfg
//! feature=\"std\"`, which will not use the `byteorder` extern crate, and read the bytes
//! from disk in the endianness of the host machine.
//!
//! If you want endian aware reading, and you don't use `default`, then you need to opt in as normal
//! via `endian_fd`

#![cfg_attr(not(feature = "std"), no_std)]
#![cfg_attr(all(feature = "alloc", not(feature = "std")), feature(alloc))]

extern crate plain;
#[cfg_attr(feature = "alloc", macro_use)]
extern crate scroll;

#[cfg(feature = "log")]
#[macro_use]
extern crate log;

#[cfg(feature = "std")]
extern crate core;

#[cfg(all(feature = "alloc", not(feature = "std")))]
#[macro_use]
extern crate alloc;

#[cfg(feature = "std")]
mod alloc {
    pub use std::borrow;
    pub use std::boxed;
    pub use std::string;
    pub use std::vec;
    pub use std::collections::btree_map;
}

/////////////////////////
// Misc/Helper Modules
/////////////////////////

#[allow(unused)]
macro_rules! if_std {
    ($($i:item)*) => ($(
        #[cfg(feature = "std")]
        $i
    )*)
}

#[allow(unused)]
macro_rules! if_alloc {
    ($($i:item)*) => ($(
        #[cfg(feature = "alloc")]
        $i
    )*)
}

#[cfg(feature = "alloc")]
pub mod error;

pub mod strtab;

/// Binary container size information and byte-order context
pub mod container {
    use scroll;
    pub use scroll::Endian as Endian;

    #[derive(Debug, Copy, Clone, PartialEq)]
    /// The size of a binary container
    pub enum Container {
        Little,
        Big,
    }

    impl Container {
        /// Is this a 64-bit container or not?
        pub fn is_big(&self) -> bool {
            *self == Container::Big
        }
    }

    #[cfg(not(target_pointer_width = "64"))]
    /// The default binary container size - either `Big` or `Little`, depending on whether the host machine's pointer size is 64 or not
    pub const CONTAINER: Container =  Container::Little;

    #[cfg(target_pointer_width = "64")]
    /// The default binary container size - either `Big` or `Little`, depending on whether the host machine's pointer size is 64 or not
    pub const CONTAINER: Container =  Container::Big;

    impl Default for Container {
        #[inline]
        fn default() -> Self {
            CONTAINER
        }
    }

    #[derive(Debug, Copy, Clone, PartialEq)]
    /// A binary parsing context, including the container size and underlying byte endianness
    pub struct Ctx {
        pub container: Container,
        pub le: scroll::Endian,
    }

    impl Ctx {
        /// Whether this binary container context is "big" or not
        pub fn is_big(&self) -> bool {
            self.container.is_big()
        }
        /// Whether this binary container context is little endian or not
        pub fn is_little_endian(&self) -> bool {
            self.le.is_little()
        }
        /// Create a new binary container context
        pub fn new (container: Container, le: scroll::Endian) -> Self {
            Ctx { container: container, le: le }
        }
        /// Return a dubious pointer/address byte size for the container
        pub fn size(&self) -> usize {
            match self.container {
                // TODO: require pointer size initialization/setting or default to container size with these values, e.g., avr pointer width will be smaller iirc
                Container::Little => 4,
                Container::Big    => 8,
            }
        }
    }

    impl From<Container> for Ctx {
        fn from(container: Container) -> Self {
            Ctx { container: container, le: scroll::Endian::default() }
        }
    }

    impl From<scroll::Endian> for Ctx {
        fn from(le: scroll::Endian) -> Self {
            Ctx { container: CONTAINER, le: le }
        }
    }

    impl Default for Ctx {
        #[inline]
        fn default() -> Self {
            Ctx { container: Container::default(), le: scroll::Endian::default() }
        }
    }
}

macro_rules! if_everything {
    ($($i:item)*) => ($(
        #[cfg(all(feature = "endian_fd", feature = "elf64", feature = "elf32", feature = "pe64", feature = "pe32", feature = "mach64", feature = "mach32", feature = "archive"))]
        $i
    )*)
}

if_everything! {

    #[derive(Debug, Default)]
    /// Information obtained from a peek `Hint`
    pub struct HintData {
        pub is_lsb: bool,
        pub is_64: Option<bool>,
    }

    #[derive(Debug)]
    /// A hint at the underlying binary format for 16 bytes of arbitrary data
    pub enum Hint {
        Elf(HintData),
        Mach(HintData),
        MachFat(usize),
        PE,
        Archive,
        Unknown(u64),
    }

    /// Peeks at `bytes`, and returns a `Hint`
    pub fn peek_bytes(bytes: &[u8; 16]) -> error::Result<Hint> {
        use scroll::{Pread, LE, BE};
        use mach::{fat, header};
        if &bytes[0..elf::header::SELFMAG] == elf::header::ELFMAG {
            let class = bytes[elf::header::EI_CLASS];
            let is_lsb = bytes[elf::header::EI_DATA] == elf::header::ELFDATA2LSB;
            let is_64 =
                if class == elf::header::ELFCLASS64 {
                    Some (true)
                } else if class == elf::header::ELFCLASS32 {
                    Some (false)
                } else { None };

            Ok(Hint::Elf(HintData { is_lsb, is_64 }))
        } else if &bytes[0..archive::SIZEOF_MAGIC] == archive::MAGIC {
            Ok(Hint::Archive)
        } else if (&bytes[0..2]).pread_with::<u16>(0, LE)? == pe::header::DOS_MAGIC {
            Ok(Hint::PE)
        } else {
            let (magic, maybe_ctx) = mach::parse_magic_and_ctx(bytes, 0)?;
            match magic {
                fat::FAT_MAGIC => {
                    // should probably verify this is always Big Endian...
                    let narchitectures = bytes.pread_with::<u32>(4, BE)? as usize;
                    Ok(Hint::MachFat(narchitectures))
                },
                header::MH_CIGAM_64 | header::MH_CIGAM | header::MH_MAGIC_64 | header::MH_MAGIC => {
                    if let Some(ctx) = maybe_ctx {
                        Ok(Hint::Mach(HintData { is_lsb: ctx.le.is_little(), is_64: Some(ctx.container.is_big()) }))
                    } else {
                        Err(error::Error::Malformed(format!("Correct mach magic {:#x} does not have a matching parsing context!", magic).into()))
                    }
                },
                // its something else
                _ => Ok(Hint::Unknown(bytes.pread::<u64>(0)?))
            }
        }
    }

    /// Peeks at the underlying Read object. Requires the underlying bytes to have at least 16 byte length. Resets the seek to `Start` after reading.
    #[cfg(feature = "std")]
    pub fn peek<R: ::std::io::Read + ::std::io::Seek>(fd: &mut R) -> error::Result<Hint> {
        use std::io::SeekFrom;
        let mut bytes = [0u8; 16];
        fd.seek(SeekFrom::Start(0))?;
        fd.read_exact(&mut bytes)?;
        fd.seek(SeekFrom::Start(0))?;
        peek_bytes(&bytes)
    }

    #[derive(Debug)]
    /// A parseable object that goblin understands
    pub enum Object<'a> {
        /// An ELF32/ELF64!
        Elf(elf::Elf<'a>),
        /// A PE32/PE32+!
        PE(pe::PE<'a>),
        /// A 32/64-bit Mach-o binary _OR_ it is a multi-architecture binary container!
        Mach(mach::Mach<'a>),
        /// A Unix archive
        Archive(archive::Archive<'a>),
        /// None of the above, with the given magic value
        Unknown(u64),
    }

    // TODO: this could avoid std using peek_bytes
    #[cfg(feature = "std")]
    impl<'a> Object<'a> {
        /// Tries to parse an `Object` from `bytes`
        pub fn parse(bytes: &[u8]) -> error::Result<Object> {
            use std::io::Cursor;
            match peek(&mut Cursor::new(&bytes))? {
                Hint::Elf(_) => Ok(Object::Elf(elf::Elf::parse(bytes)?)),
                Hint::Mach(_) | Hint::MachFat(_) => Ok(Object::Mach(mach::Mach::parse(bytes)?)),
                Hint::Archive => Ok(Object::Archive(archive::Archive::parse(bytes)?)),
                Hint::PE => Ok(Object::PE(pe::PE::parse(bytes)?)),
                Hint::Unknown(magic) => Ok(Object::Unknown(magic))
            }
        }
    }
} // end if_endian_fd

/////////////////////////
// Binary Modules
/////////////////////////

#[cfg(any(feature = "elf64", feature = "elf32"))]
#[macro_use]
pub mod elf;

#[cfg(feature = "elf32")]
/// The ELF 32-bit struct definitions and associated values, re-exported for easy "type-punning"
pub mod elf32 {
    pub use elf::header::header32 as header;
    pub use elf::program_header::program_header32 as program_header;
    pub use elf::section_header::section_header32 as section_header;
    pub use elf::dyn::dyn32 as dyn;
    pub use elf::sym::sym32 as sym;
    pub use elf::reloc::reloc32 as reloc;
    pub use elf::note::Nhdr32 as Note;

    pub mod gnu_hash {
        elf_gnu_hash_impl!(u32);
    }
}

#[cfg(feature = "elf64")]
/// The ELF 64-bit struct definitions and associated values, re-exported for easy "type-punning"
pub mod elf64 {
    pub use elf::header::header64 as header;
    pub use elf::program_header::program_header64 as program_header;
    pub use elf::section_header::section_header64 as section_header;
    pub use elf::dyn::dyn64 as dyn;
    pub use elf::sym::sym64 as sym;
    pub use elf::reloc::reloc64 as reloc;
    pub use elf::note::Nhdr64 as Note;

    pub mod gnu_hash {
        elf_gnu_hash_impl!(u64);
    }
}

#[cfg(any(feature = "mach32", feature = "mach64"))]
pub mod mach;

#[cfg(any(feature = "pe32", feature = "pe64"))]
pub mod pe;

#[cfg(feature = "archive")]
pub mod archive;