use std::borrow::Cow;
use std::fmt;
use std::io::Cursor;
use failure::Fail;
use goblin::{error::Error as GoblinError, mach};
use smallvec::SmallVec;
use symbolic_common::{Arch, AsSelf, CodeId, DebugId, Uuid};
use crate::base::*;
use crate::dwarf::{Dwarf, DwarfDebugSession, DwarfError, Endian};
use crate::private::{HexFmt, MonoArchive, MonoArchiveObjects, Parse};
#[derive(Debug, Fail)]
pub enum MachError {
#[fail(display = "invalid MachO file")]
BadObject(#[fail(cause)] GoblinError),
}
pub struct MachObject<'d> {
macho: mach::MachO<'d>,
data: &'d [u8],
}
impl<'d> MachObject<'d> {
pub fn test(data: &[u8]) -> bool {
match goblin::peek(&mut Cursor::new(data)) {
Ok(goblin::Hint::Mach(_)) => true,
_ => false,
}
}
pub fn parse(data: &'d [u8]) -> Result<Self, MachError> {
mach::MachO::parse(data, 0)
.map(|macho| MachObject { macho, data })
.map_err(MachError::BadObject)
}
pub fn file_format(&self) -> FileFormat {
FileFormat::MachO
}
fn find_uuid(&self) -> Option<Uuid> {
for cmd in &self.macho.load_commands {
if let mach::load_command::CommandVariant::Uuid(ref uuid_cmd) = cmd.command {
return Uuid::from_slice(&uuid_cmd.uuid).ok();
}
}
None
}
pub fn code_id(&self) -> Option<CodeId> {
let uuid = self.find_uuid()?;
Some(CodeId::from_slice(&uuid.as_bytes()[..]))
}
pub fn debug_id(&self) -> DebugId {
self.find_uuid().map(DebugId::from_uuid).unwrap_or_default()
}
pub fn arch(&self) -> Arch {
use goblin::mach::constants::cputype;
match (self.macho.header.cputype(), self.macho.header.cpusubtype()) {
(cputype::CPU_TYPE_I386, cputype::CPU_SUBTYPE_I386_ALL) => Arch::X86,
(cputype::CPU_TYPE_I386, _) => Arch::X86Unknown,
(cputype::CPU_TYPE_X86_64, cputype::CPU_SUBTYPE_X86_64_ALL) => Arch::Amd64,
(cputype::CPU_TYPE_X86_64, cputype::CPU_SUBTYPE_X86_64_H) => Arch::Amd64h,
(cputype::CPU_TYPE_X86_64, _) => Arch::Amd64Unknown,
(cputype::CPU_TYPE_ARM64, cputype::CPU_SUBTYPE_ARM64_ALL) => Arch::Arm64,
(cputype::CPU_TYPE_ARM64, cputype::CPU_SUBTYPE_ARM64_V8) => Arch::Arm64V8,
(cputype::CPU_TYPE_ARM64, cputype::CPU_SUBTYPE_ARM64_E) => Arch::Arm64e,
(cputype::CPU_TYPE_ARM64, _) => Arch::Arm64Unknown,
(cputype::CPU_TYPE_ARM, cputype::CPU_SUBTYPE_ARM_ALL) => Arch::Arm,
(cputype::CPU_TYPE_ARM, cputype::CPU_SUBTYPE_ARM_V5TEJ) => Arch::ArmV5,
(cputype::CPU_TYPE_ARM, cputype::CPU_SUBTYPE_ARM_V6) => Arch::ArmV6,
(cputype::CPU_TYPE_ARM, cputype::CPU_SUBTYPE_ARM_V6M) => Arch::ArmV6m,
(cputype::CPU_TYPE_ARM, cputype::CPU_SUBTYPE_ARM_V7) => Arch::ArmV7,
(cputype::CPU_TYPE_ARM, cputype::CPU_SUBTYPE_ARM_V7F) => Arch::ArmV7f,
(cputype::CPU_TYPE_ARM, cputype::CPU_SUBTYPE_ARM_V7S) => Arch::ArmV7s,
(cputype::CPU_TYPE_ARM, cputype::CPU_SUBTYPE_ARM_V7K) => Arch::ArmV7k,
(cputype::CPU_TYPE_ARM, cputype::CPU_SUBTYPE_ARM_V7M) => Arch::ArmV7m,
(cputype::CPU_TYPE_ARM, cputype::CPU_SUBTYPE_ARM_V7EM) => Arch::ArmV7em,
(cputype::CPU_TYPE_ARM, _) => Arch::ArmUnknown,
(cputype::CPU_TYPE_POWERPC, cputype::CPU_SUBTYPE_POWERPC_ALL) => Arch::Ppc,
(cputype::CPU_TYPE_POWERPC64, cputype::CPU_SUBTYPE_POWERPC_ALL) => Arch::Ppc64,
(_, _) => Arch::Unknown,
}
}
pub fn kind(&self) -> ObjectKind {
match self.macho.header.filetype {
goblin::mach::header::MH_OBJECT => ObjectKind::Relocatable,
goblin::mach::header::MH_EXECUTE => ObjectKind::Executable,
goblin::mach::header::MH_DYLIB => ObjectKind::Library,
goblin::mach::header::MH_CORE => ObjectKind::Dump,
goblin::mach::header::MH_DSYM => ObjectKind::Debug,
_ => ObjectKind::Other,
}
}
pub fn load_address(&self) -> u64 {
for seg in &self.macho.segments {
if seg.name().map(|name| name == "__TEXT").unwrap_or(false) {
return seg.vmaddr;
}
}
0
}
pub fn has_symbols(&self) -> bool {
self.macho.symbols.is_some()
}
pub fn symbols(&self) -> MachOSymbolIterator<'d> {
let mut sections = SmallVec::new();
let mut section_index = 0;
'outer: for segment in &self.macho.segments {
if segment.name().ok() != Some("__TEXT") {
section_index += segment.nsects as usize;
continue;
}
for result in segment {
let section = match result {
Ok((section, _data)) => section,
Err(_) => break 'outer,
};
match section.name() {
Ok("__text") | Ok("__stubs") => sections.push(section_index),
_ => (),
}
section_index += 1;
}
}
MachOSymbolIterator {
symbols: self.macho.symbols(),
sections,
vmaddr: self.load_address(),
}
}
pub fn symbol_map(&self) -> SymbolMap<'d> {
self.symbols().collect()
}
pub fn has_debug_info(&self) -> bool {
self.has_section("debug_info")
}
pub fn debug_session(&self) -> Result<DwarfDebugSession<'d>, DwarfError> {
let symbols = self.symbol_map();
DwarfDebugSession::parse(self, symbols, self.load_address())
}
pub fn has_unwind_info(&self) -> bool {
self.has_section("eh_frame") || self.has_section("debug_frame")
}
pub fn data(&self) -> &'d [u8] {
self.data
}
pub fn requires_symbolmap(&self) -> bool {
self.symbols()
.any(|s| s.name().map_or(false, |n| n.starts_with("__?hidden#")))
}
fn find_segment(&self, name: &str) -> Option<&mach::segment::Segment<'d>> {
for segment in &self.macho.segments {
if segment.name().map(|seg| seg == name).unwrap_or(false) {
return Some(segment);
}
}
None
}
}
impl fmt::Debug for MachObject<'_> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.debug_struct("MachObject")
.field("code_id", &self.code_id())
.field("debug_id", &self.debug_id())
.field("arch", &self.arch())
.field("kind", &self.kind())
.field("load_address", &HexFmt(self.load_address()))
.field("has_symbols", &self.has_symbols())
.field("has_debug_info", &self.has_debug_info())
.field("has_unwind_info", &self.has_unwind_info())
.finish()
}
}
impl<'slf, 'd: 'slf> AsSelf<'slf> for MachObject<'d> {
type Ref = MachObject<'slf>;
fn as_self(&'slf self) -> &Self::Ref {
self
}
}
impl<'d> Parse<'d> for MachObject<'d> {
type Error = MachError;
fn test(data: &[u8]) -> bool {
Self::test(data)
}
fn parse(data: &'d [u8]) -> Result<Self, MachError> {
Self::parse(data)
}
}
impl<'d> ObjectLike for MachObject<'d> {
type Error = DwarfError;
type Session = DwarfDebugSession<'d>;
fn file_format(&self) -> FileFormat {
self.file_format()
}
fn code_id(&self) -> Option<CodeId> {
self.code_id()
}
fn debug_id(&self) -> DebugId {
self.debug_id()
}
fn arch(&self) -> Arch {
self.arch()
}
fn kind(&self) -> ObjectKind {
self.kind()
}
fn load_address(&self) -> u64 {
self.load_address()
}
fn has_symbols(&self) -> bool {
self.has_symbols()
}
fn symbols(&self) -> DynIterator<'_, Symbol<'_>> {
Box::new(self.symbols())
}
fn symbol_map(&self) -> SymbolMap<'_> {
self.symbol_map()
}
fn has_debug_info(&self) -> bool {
self.has_debug_info()
}
fn debug_session(&self) -> Result<Self::Session, Self::Error> {
self.debug_session()
}
fn has_unwind_info(&self) -> bool {
self.has_unwind_info()
}
}
impl<'d> Dwarf<'d> for MachObject<'d> {
fn endianity(&self) -> Endian {
if self.macho.little_endian {
Endian::Little
} else {
Endian::Big
}
}
fn raw_data(&self, section_name: &str) -> Option<(u64, &'d [u8])> {
let segment_name = match section_name {
"eh_frame" => "__TEXT",
_ => "__DWARF",
};
let segment = self.find_segment(segment_name)?;
for section in segment {
if let Ok((header, data)) = section {
if let Ok(sec) = header.name() {
if sec.len() >= 2 && &sec[2..] == section_name {
return match header.offset {
0 => None,
offset => Some((offset.into(), data)),
};
}
}
}
}
None
}
}
pub struct MachOSymbolIterator<'d> {
symbols: mach::symbols::SymbolIterator<'d>,
sections: SmallVec<[usize; 2]>,
vmaddr: u64,
}
impl<'d> Iterator for MachOSymbolIterator<'d> {
type Item = Symbol<'d>;
fn next(&mut self) -> Option<Self::Item> {
while let Some(next) = self.symbols.next() {
let (mut name, nlist) = match next {
Ok(pair) => pair,
Err(_) => continue,
};
if nlist.n_value < self.vmaddr {
continue;
}
let in_valid_section = !nlist.is_stab()
&& nlist.get_type() == mach::symbols::N_SECT
&& nlist.n_sect != (mach::symbols::NO_SECT as usize)
&& self.sections.contains(&(nlist.n_sect - 1));
if !in_valid_section {
continue;
}
if name.starts_with('_') {
name = &name[1..];
}
return Some(Symbol {
name: Some(Cow::Borrowed(name)),
address: nlist.n_value - self.vmaddr,
size: 0,
});
}
None
}
}
pub struct FatMachObjectIterator<'d, 'a> {
iter: mach::FatArchIterator<'a>,
remaining: usize,
data: &'d [u8],
}
impl<'d, 'a> Iterator for FatMachObjectIterator<'d, 'a> {
type Item = Result<MachObject<'d>, MachError>;
fn next(&mut self) -> Option<Self::Item> {
if self.remaining == 0 {
return None;
}
self.remaining -= 1;
match self.iter.next() {
Some(Ok(arch)) => Some(MachObject::parse(arch.slice(self.data))),
Some(Err(error)) => Some(Err(MachError::BadObject(error))),
None => None,
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
(self.remaining, Some(self.remaining))
}
}
impl std::iter::FusedIterator for FatMachObjectIterator<'_, '_> {}
impl ExactSizeIterator for FatMachObjectIterator<'_, '_> {}
pub struct FatMachO<'d> {
fat: mach::MultiArch<'d>,
data: &'d [u8],
}
impl<'d> FatMachO<'d> {
pub fn test(data: &[u8]) -> bool {
match goblin::peek(&mut Cursor::new(data)) {
Ok(goblin::Hint::MachFat(_)) => true,
_ => false,
}
}
pub fn parse(data: &'d [u8]) -> Result<Self, MachError> {
mach::MultiArch::new(data)
.map(|fat| FatMachO { fat, data })
.map_err(MachError::BadObject)
}
pub fn objects(&self) -> FatMachObjectIterator<'d, '_> {
FatMachObjectIterator {
iter: self.fat.iter_arches(),
remaining: self.fat.narches,
data: self.data,
}
}
pub fn object_count(&self) -> usize {
self.fat.narches
}
pub fn object_by_index(&self, index: usize) -> Result<Option<MachObject<'d>>, MachError> {
let arch = match self.fat.iter_arches().nth(index) {
Some(arch) => arch.map_err(MachError::BadObject)?,
None => return Ok(None),
};
MachObject::parse(arch.slice(self.data)).map(Some)
}
}
impl fmt::Debug for FatMachO<'_> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.debug_struct("FatMachO").field("fat", &self.fat).finish()
}
}
impl<'slf, 'd: 'slf> AsSelf<'slf> for FatMachO<'d> {
type Ref = FatMachO<'slf>;
fn as_self(&'slf self) -> &Self::Ref {
self
}
}
#[allow(clippy::large_enum_variant)]
enum MachObjectIteratorInner<'d, 'a> {
Single(MonoArchiveObjects<'d, MachObject<'d>>),
Archive(FatMachObjectIterator<'d, 'a>),
}
pub struct MachObjectIterator<'d, 'a>(MachObjectIteratorInner<'d, 'a>);
impl<'d, 'a> Iterator for MachObjectIterator<'d, 'a> {
type Item = Result<MachObject<'d>, MachError>;
fn next(&mut self) -> Option<Self::Item> {
match self.0 {
MachObjectIteratorInner::Single(ref mut iter) => iter.next(),
MachObjectIteratorInner::Archive(ref mut iter) => iter.next(),
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
match self.0 {
MachObjectIteratorInner::Single(ref iter) => iter.size_hint(),
MachObjectIteratorInner::Archive(ref iter) => iter.size_hint(),
}
}
}
impl std::iter::FusedIterator for MachObjectIterator<'_, '_> {}
impl ExactSizeIterator for MachObjectIterator<'_, '_> {}
#[derive(Debug)]
enum MachArchiveInner<'d> {
Single(MonoArchive<'d, MachObject<'d>>),
Archive(FatMachO<'d>),
}
#[derive(Debug)]
pub struct MachArchive<'d>(MachArchiveInner<'d>);
impl<'d> MachArchive<'d> {
pub fn test(data: &[u8]) -> bool {
match goblin::peek(&mut Cursor::new(data)) {
Ok(goblin::Hint::Mach(_)) => true,
Ok(goblin::Hint::MachFat(_)) => true,
_ => false,
}
}
pub fn parse(data: &'d [u8]) -> Result<Self, MachError> {
Ok(MachArchive(match goblin::peek(&mut Cursor::new(data)) {
Ok(goblin::Hint::MachFat(_)) => MachArchiveInner::Archive(FatMachO::parse(data)?),
_ => MachArchiveInner::Single(MonoArchive::new(data)),
}))
}
pub fn objects(&self) -> MachObjectIterator<'d, '_> {
MachObjectIterator(match self.0 {
MachArchiveInner::Single(ref inner) => MachObjectIteratorInner::Single(inner.objects()),
MachArchiveInner::Archive(ref inner) => {
MachObjectIteratorInner::Archive(inner.objects())
}
})
}
pub fn object_count(&self) -> usize {
match self.0 {
MachArchiveInner::Single(ref inner) => inner.object_count(),
MachArchiveInner::Archive(ref inner) => inner.object_count(),
}
}
pub fn object_by_index(&self, index: usize) -> Result<Option<MachObject<'d>>, MachError> {
match self.0 {
MachArchiveInner::Single(ref inner) => inner.object_by_index(index),
MachArchiveInner::Archive(ref inner) => inner.object_by_index(index),
}
}
}
impl<'slf, 'd: 'slf> AsSelf<'slf> for MachArchive<'d> {
type Ref = MachArchive<'slf>;
fn as_self(&'slf self) -> &Self::Ref {
self
}
}