use std::collections::HashMap;
use zerocopy::{FromBytes, IntoBytes};
use super::constants::*;
use super::structs::*;
use crate::error::{Error, Result};
#[derive(Debug, Clone)]
pub struct SegmentInfo {
pub command: SegmentCommand64,
pub command_offset: usize,
pub sections: Vec<SectionInfo>,
}
impl SegmentInfo {
pub fn name(&self) -> &str {
self.command.name()
}
pub fn section(&self, name: &str) -> Option<&SectionInfo> {
self.sections.iter().find(|s| s.section.name() == name)
}
pub fn section_mut(&mut self, name: &str) -> Option<&mut SectionInfo> {
self.sections.iter_mut().find(|s| s.section.name() == name)
}
}
#[derive(Debug, Clone)]
pub struct SectionInfo {
pub section: Section64,
pub struct_offset: usize,
}
impl SectionInfo {
pub fn name(&self) -> &str {
self.section.name()
}
pub fn full_name(&self) -> String {
format!("{},{}", self.section.segment_name(), self.section.name())
}
}
#[derive(Debug, Clone)]
#[allow(missing_docs)] pub enum LoadCommandInfo {
Segment(SegmentInfo),
Symtab {
command: SymtabCommand,
offset: usize,
},
Dysymtab {
command: DysymtabCommand,
offset: usize,
},
DyldInfo {
command: DyldInfoCommand,
offset: usize,
},
LinkeditData {
command: LinkeditDataCommand,
offset: usize,
},
Dylib {
command: DylibCommand,
name: String,
offset: usize,
},
Uuid {
command: UuidCommand,
offset: usize,
},
BuildVersion {
command: BuildVersionCommand,
offset: usize,
},
FilesetEntry {
command: FilesetEntryCommand,
entry_id: String,
offset: usize,
},
Unknown {
cmd: u32,
cmdsize: u32,
offset: usize,
},
}
impl LoadCommandInfo {
pub fn offset(&self) -> usize {
match self {
LoadCommandInfo::Segment(s) => s.command_offset,
LoadCommandInfo::Symtab { offset, .. } => *offset,
LoadCommandInfo::Dysymtab { offset, .. } => *offset,
LoadCommandInfo::DyldInfo { offset, .. } => *offset,
LoadCommandInfo::LinkeditData { offset, .. } => *offset,
LoadCommandInfo::Dylib { offset, .. } => *offset,
LoadCommandInfo::Uuid { offset, .. } => *offset,
LoadCommandInfo::BuildVersion { offset, .. } => *offset,
LoadCommandInfo::FilesetEntry { offset, .. } => *offset,
LoadCommandInfo::Unknown { offset, .. } => *offset,
}
}
pub fn size(&self) -> u32 {
match self {
LoadCommandInfo::Segment(s) => s.command.cmdsize,
LoadCommandInfo::Symtab { command, .. } => command.cmdsize,
LoadCommandInfo::Dysymtab { command, .. } => command.cmdsize,
LoadCommandInfo::DyldInfo { command, .. } => command.cmdsize,
LoadCommandInfo::LinkeditData { command, .. } => command.cmdsize,
LoadCommandInfo::Dylib { command, .. } => command.cmdsize,
LoadCommandInfo::Uuid { command, .. } => command.cmdsize,
LoadCommandInfo::BuildVersion { command, .. } => command.cmdsize,
LoadCommandInfo::FilesetEntry { command, .. } => command.cmdsize,
LoadCommandInfo::Unknown { cmdsize, .. } => *cmdsize,
}
}
}
#[derive(Debug, Clone, Copy)]
struct SegmentRange {
vmaddr: u64,
vmsize: u64,
fileoff: u64,
}
#[derive(Debug)]
pub struct MachOContext {
pub header: MachHeader64,
pub base_offset: usize,
pub data: Vec<u8>,
pub load_commands: Vec<LoadCommandInfo>,
segment_indices: HashMap<String, usize>,
segment_ranges: Vec<SegmentRange>,
}
impl MachOContext {
pub fn new(data: &[u8], base_offset: usize) -> Result<Self> {
if data.len() < MachHeader64::SIZE {
return Err(Error::BufferTooSmall {
needed: MachHeader64::SIZE,
available: data.len(),
});
}
let header = MachHeader64::read_from_prefix(data)
.map_err(|_| Error::InvalidMachoMagic(0))?
.0;
if !header.is_valid() {
return Err(Error::InvalidMachoMagic(header.magic));
}
let mut ctx = Self {
header: header.clone(),
base_offset,
data: data.to_vec(),
load_commands: Vec::with_capacity(32), segment_indices: HashMap::new(),
segment_ranges: Vec::with_capacity(8), };
ctx.parse_load_commands()?;
ctx.build_segment_ranges();
Ok(ctx)
}
fn build_segment_ranges(&mut self) {
self.segment_ranges.clear();
for lc in &self.load_commands {
if let LoadCommandInfo::Segment(seg) = lc {
if seg.command.vmsize > 0 {
self.segment_ranges.push(SegmentRange {
vmaddr: seg.command.vmaddr,
vmsize: seg.command.vmsize,
fileoff: seg.command.fileoff,
});
}
}
}
self.segment_ranges.sort_by_key(|r| r.vmaddr);
}
pub fn from_cache_slice(cache_data: &[u8], offset: usize, size: usize) -> Result<Self> {
if offset + size > cache_data.len() {
return Err(Error::BufferTooSmall {
needed: offset + size,
available: cache_data.len(),
});
}
Self::new(&cache_data[offset..offset + size], offset)
}
fn parse_load_commands(&mut self) -> Result<()> {
let mut offset = MachHeader64::SIZE;
let end_offset = MachHeader64::SIZE + self.header.sizeofcmds as usize;
for _ in 0..self.header.ncmds {
if offset + LoadCommand::SIZE > end_offset
|| offset + LoadCommand::SIZE > self.data.len()
{
return Err(Error::LoadCommandOverflow { offset });
}
let lc = LoadCommand::read_from_prefix(&self.data[offset..])
.map_err(|_| Error::Parse {
offset,
reason: "failed to parse load command".into(),
})?
.0;
if offset + lc.cmdsize as usize > self.data.len() {
return Err(Error::LoadCommandOverflow { offset });
}
let cmd_data = &self.data[offset..offset + lc.cmdsize as usize];
let cmd_info = self.parse_load_command(lc.cmd, cmd_data, offset)?;
if let LoadCommandInfo::Segment(ref seg) = cmd_info {
self.segment_indices
.insert(seg.name().to_string(), self.load_commands.len());
}
self.load_commands.push(cmd_info);
offset += lc.cmdsize as usize;
}
Ok(())
}
fn parse_load_command(&self, cmd: u32, data: &[u8], offset: usize) -> Result<LoadCommandInfo> {
match cmd {
LC_SEGMENT_64 => {
let seg = SegmentCommand64::read_from_prefix(data)
.map_err(|_| Error::Parse {
offset,
reason: "failed to parse segment command".into(),
})?
.0;
let mut sections = Vec::with_capacity(seg.nsects as usize);
let mut sect_offset = offset + SegmentCommand64::SIZE;
for _ in 0..seg.nsects {
let sect = Section64::read_from_prefix(&self.data[sect_offset..])
.map_err(|_| Error::Parse {
offset: sect_offset,
reason: "failed to parse section".into(),
})?
.0;
sections.push(SectionInfo {
section: sect.clone(),
struct_offset: sect_offset,
});
sect_offset += Section64::SIZE;
}
Ok(LoadCommandInfo::Segment(SegmentInfo {
command: seg.clone(),
command_offset: offset,
sections,
}))
}
LC_SYMTAB => {
let symtab = SymtabCommand::read_from_prefix(data)
.map_err(|_| Error::Parse {
offset,
reason: "failed to parse symtab command".into(),
})?
.0;
Ok(LoadCommandInfo::Symtab {
command: symtab.clone(),
offset,
})
}
LC_DYSYMTAB => {
let dysymtab = DysymtabCommand::read_from_prefix(data)
.map_err(|_| Error::Parse {
offset,
reason: "failed to parse dysymtab command".into(),
})?
.0;
Ok(LoadCommandInfo::Dysymtab {
command: dysymtab.clone(),
offset,
})
}
LC_DYLD_INFO | LC_DYLD_INFO_ONLY => {
let dyld_info = DyldInfoCommand::read_from_prefix(data)
.map_err(|_| Error::Parse {
offset,
reason: "failed to parse dyld info command".into(),
})?
.0;
Ok(LoadCommandInfo::DyldInfo {
command: dyld_info.clone(),
offset,
})
}
LC_CODE_SIGNATURE
| LC_SEGMENT_SPLIT_INFO
| LC_FUNCTION_STARTS
| LC_DATA_IN_CODE
| LC_DYLD_EXPORTS_TRIE
| LC_DYLD_CHAINED_FIXUPS
| LC_LINKER_OPTIMIZATION_HINT
| LC_ATOM_INFO => {
let linkedit = LinkeditDataCommand::read_from_prefix(data)
.map_err(|_| Error::Parse {
offset,
reason: "failed to parse linkedit data command".into(),
})?
.0;
Ok(LoadCommandInfo::LinkeditData {
command: linkedit.clone(),
offset,
})
}
LC_LOAD_DYLIB | LC_LOAD_WEAK_DYLIB | LC_REEXPORT_DYLIB | LC_LAZY_LOAD_DYLIB
| LC_LOAD_UPWARD_DYLIB | LC_ID_DYLIB => {
let dylib = DylibCommand::read_from_prefix(data)
.map_err(|_| Error::Parse {
offset,
reason: "failed to parse dylib command".into(),
})?
.0;
let name_offset = dylib.dylib.name_offset as usize;
let name = if name_offset < data.len() {
let name_bytes = &data[name_offset..];
let end = name_bytes
.iter()
.position(|&b| b == 0)
.unwrap_or(name_bytes.len());
String::from_utf8_lossy(&name_bytes[..end]).to_string()
} else {
String::new()
};
Ok(LoadCommandInfo::Dylib {
command: dylib.clone(),
name,
offset,
})
}
LC_UUID => {
let uuid = UuidCommand::read_from_prefix(data)
.map_err(|_| Error::Parse {
offset,
reason: "failed to parse uuid command".into(),
})?
.0;
Ok(LoadCommandInfo::Uuid {
command: uuid.clone(),
offset,
})
}
LC_BUILD_VERSION => {
let build_version = BuildVersionCommand::read_from_prefix(data)
.map_err(|_| Error::Parse {
offset,
reason: "failed to parse build version command".into(),
})?
.0;
Ok(LoadCommandInfo::BuildVersion {
command: build_version.clone(),
offset,
})
}
LC_FILESET_ENTRY => {
let entry = FilesetEntryCommand::read_from_prefix(data)
.map_err(|_| Error::Parse {
offset,
reason: "failed to parse fileset entry command".into(),
})?
.0;
let id_offset = entry.entry_id_offset as usize;
let entry_id = if id_offset < data.len() {
let name_bytes = &data[id_offset..];
let end = name_bytes
.iter()
.position(|&b| b == 0)
.unwrap_or(name_bytes.len());
String::from_utf8_lossy(&name_bytes[..end]).to_string()
} else {
String::new()
};
Ok(LoadCommandInfo::FilesetEntry {
command: entry.clone(),
entry_id,
offset,
})
}
_ => {
let lc = LoadCommand::read_from_prefix(data)
.map_err(|_| Error::Parse {
offset,
reason: "failed to parse load command".into(),
})?
.0;
Ok(LoadCommandInfo::Unknown {
cmd,
cmdsize: lc.cmdsize,
offset,
})
}
}
}
pub fn segment(&self, name: &str) -> Option<&SegmentInfo> {
self.segment_indices.get(name).and_then(|&idx| {
if let LoadCommandInfo::Segment(ref seg) = self.load_commands[idx] {
Some(seg)
} else {
None
}
})
}
pub fn segment_mut(&mut self, name: &str) -> Option<&mut SegmentInfo> {
let idx = *self.segment_indices.get(name)?;
if let LoadCommandInfo::Segment(ref mut seg) = self.load_commands[idx] {
Some(seg)
} else {
None
}
}
pub fn segments(&self) -> impl Iterator<Item = &SegmentInfo> {
self.load_commands.iter().filter_map(|lc| {
if let LoadCommandInfo::Segment(seg) = lc {
Some(seg)
} else {
None
}
})
}
pub fn section(&self, segment: &str, section: &str) -> Option<&SectionInfo> {
self.segment(segment)?.section(section)
}
pub fn text_segment(&self) -> Option<&SegmentInfo> {
self.segment("__TEXT")
}
pub fn data_segment(&self) -> Option<&SegmentInfo> {
self.segment("__DATA")
}
pub fn linkedit_segment(&self) -> Option<&SegmentInfo> {
self.segment("__LINKEDIT")
}
pub fn symtab(&self) -> Option<&SymtabCommand> {
self.load_commands.iter().find_map(|lc| {
if let LoadCommandInfo::Symtab { command, .. } = lc {
Some(command)
} else {
None
}
})
}
pub fn dysymtab(&self) -> Option<&DysymtabCommand> {
self.load_commands.iter().find_map(|lc| {
if let LoadCommandInfo::Dysymtab { command, .. } = lc {
Some(command)
} else {
None
}
})
}
pub fn dyld_info(&self) -> Option<&DyldInfoCommand> {
self.load_commands.iter().find_map(|lc| {
if let LoadCommandInfo::DyldInfo { command, .. } = lc {
Some(command)
} else {
None
}
})
}
pub fn dylibs(&self) -> impl Iterator<Item = (&str, u32)> {
self.load_commands.iter().filter_map(|lc| {
if let LoadCommandInfo::Dylib { command, name, .. } = lc {
Some((name.as_str(), command.cmd))
} else {
None
}
})
}
pub fn dependencies(&self) -> Vec<String> {
self.load_commands
.iter()
.filter_map(|lc| {
if let LoadCommandInfo::Dylib { command, name, .. } = lc {
match command.cmd {
LC_LOAD_DYLIB | LC_LOAD_WEAK_DYLIB | LC_REEXPORT_DYLIB
| LC_LAZY_LOAD_DYLIB | LC_LOAD_UPWARD_DYLIB => Some(name.clone()),
_ => None,
}
} else {
None
}
})
.collect()
}
pub fn is_arm64(&self) -> bool {
self.header.is_arm64()
}
pub fn is_arm64e(&self) -> bool {
self.header.is_arm64e()
}
#[inline]
pub fn read_at(&self, offset: usize, len: usize) -> Result<&[u8]> {
if offset + len > self.data.len() {
return Err(Error::BufferTooSmall {
needed: offset + len,
available: self.data.len(),
});
}
Ok(&self.data[offset..offset + len])
}
#[inline(always)]
pub fn read_u32(&self, offset: usize) -> Result<u32> {
let bytes = self.read_at(offset, 4)?;
Ok(u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]))
}
#[inline(always)]
pub fn read_u64(&self, offset: usize) -> Result<u64> {
let bytes = self.read_at(offset, 8)?;
Ok(u64::from_le_bytes([
bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
]))
}
#[inline]
pub fn write_at(&mut self, offset: usize, data: &[u8]) -> Result<()> {
if offset + data.len() > self.data.len() {
return Err(Error::BufferTooSmall {
needed: offset + data.len(),
available: self.data.len(),
});
}
self.data[offset..offset + data.len()].copy_from_slice(data);
Ok(())
}
#[inline(always)]
pub fn write_u32(&mut self, offset: usize, value: u32) -> Result<()> {
self.write_at(offset, &value.to_le_bytes())
}
#[inline(always)]
pub fn write_u64(&mut self, offset: usize, value: u64) -> Result<()> {
self.write_at(offset, &value.to_le_bytes())
}
pub fn write_struct<T: IntoBytes + Immutable>(
&mut self,
offset: usize,
value: &T,
) -> Result<()> {
let bytes = value.as_bytes();
self.write_at(offset, bytes)
}
pub fn sync_header(&mut self) -> Result<()> {
let header = self.header.clone();
self.write_struct(0, &header)
}
pub fn load_commands_size(&self) -> usize {
self.load_commands.iter().map(|lc| lc.size() as usize).sum()
}
pub fn available_load_command_space(&self) -> usize {
let text = self
.text_segment()
.map(|s| s.command.fileoff as usize)
.unwrap_or(usize::MAX);
let used = MachHeader64::SIZE + self.load_commands_size();
text.saturating_sub(used)
}
#[inline]
pub fn addr_to_offset(&self, addr: u64) -> Option<usize> {
let idx = self
.segment_ranges
.partition_point(|r| r.vmaddr + r.vmsize <= addr);
if idx < self.segment_ranges.len() {
let range = &self.segment_ranges[idx];
if addr >= range.vmaddr && addr < range.vmaddr + range.vmsize {
let offset = range.fileoff + (addr - range.vmaddr);
return Some(offset as usize);
}
}
None
}
pub fn offset_to_addr(&self, offset: usize) -> Option<u64> {
let offset = offset as u64;
for seg in self.segments() {
if offset >= seg.command.fileoff && offset < seg.command.fileoff + seg.command.filesize
{
let addr = seg.command.vmaddr + (offset - seg.command.fileoff);
return Some(addr);
}
}
None
}
pub fn contains_addr(&self, addr: u64) -> bool {
self.segments()
.any(|seg| addr >= seg.command.vmaddr && addr < seg.command.vmaddr + seg.command.vmsize)
}
pub fn as_bytes(&self) -> &[u8] {
&self.data
}
pub fn as_bytes_mut(&mut self) -> &mut [u8] {
&mut self.data
}
}
use zerocopy::Immutable;
#[cfg(test)]
mod tests {
use super::*;
fn create_minimal_macho() -> Vec<u8> {
let mut data = vec![0u8; 0x1000];
let header = MachHeader64 {
magic: MH_MAGIC_64,
cputype: CPU_TYPE_ARM64,
cpusubtype: CPU_SUBTYPE_ARM64_ALL,
filetype: MH_DYLIB,
ncmds: 1,
sizeofcmds: SegmentCommand64::SIZE as u32,
flags: 0,
reserved: 0,
};
data[..MachHeader64::SIZE].copy_from_slice(header.as_bytes());
let mut seg = SegmentCommand64::default();
seg.set_name("__TEXT");
seg.vmaddr = 0x100000000;
seg.vmsize = 0x1000;
seg.fileoff = 0;
seg.filesize = 0x1000;
data[MachHeader64::SIZE..MachHeader64::SIZE + SegmentCommand64::SIZE]
.copy_from_slice(seg.as_bytes());
data
}
#[test]
fn test_parse_minimal_macho() {
let data = create_minimal_macho();
let ctx = MachOContext::new(&data, 0).unwrap();
assert!(ctx.header.is_valid());
assert!(ctx.is_arm64());
assert_eq!(ctx.header.ncmds, 1);
assert!(ctx.segment("__TEXT").is_some());
}
}