1use alloc::vec::Vec;
3use core::fmt;
4
5use log::debug;
6
7use scroll::ctx::SizeWith;
8use scroll::{Pread, BE};
9
10use crate::{archive, container};
11use crate::{error, take_hint_bytes};
12
13pub mod bind_opcodes;
14pub mod constants;
15pub mod exports;
16pub mod fat;
17pub mod header;
18pub mod imports;
19pub mod load_command;
20pub mod relocation;
21pub mod segment;
22pub mod symbols;
23
24pub use self::constants::cputype;
25
26pub fn peek(bytes: &[u8], offset: usize) -> error::Result<u32> {
28 Ok(bytes.pread_with::<u32>(offset, scroll::BE)?)
29}
30
31pub fn parse_magic_and_ctx(
33 bytes: &[u8],
34 offset: usize,
35) -> error::Result<(u32, Option<container::Ctx>)> {
36 use crate::container::Container;
37 use crate::mach::header::*;
38 let magic = bytes.pread_with::<u32>(offset, BE)?;
39 let ctx = match magic {
40 MH_CIGAM_64 | MH_CIGAM | MH_MAGIC_64 | MH_MAGIC => {
41 let is_lsb = magic == MH_CIGAM || magic == MH_CIGAM_64;
42 let le = scroll::Endian::from(is_lsb);
43 let container = if magic == MH_MAGIC_64 || magic == MH_CIGAM_64 {
44 Container::Big
45 } else {
46 Container::Little
47 };
48 Some(container::Ctx::new(container, le))
49 }
50 _ => None,
51 };
52 Ok((magic, ctx))
53}
54
55pub struct MachO<'a> {
57 pub header: header::Header,
59 pub load_commands: Vec<load_command::LoadCommand>,
61 pub segments: segment::Segments<'a>,
63 pub symbols: Option<symbols::Symbols<'a>>,
65 pub libs: Vec<&'a str>,
67 pub rpaths: Vec<&'a str>,
69 pub entry: u64,
71 pub old_style_entry: bool,
73 pub name: Option<&'a str>,
75 pub little_endian: bool,
77 pub is_64: bool,
79 data: &'a [u8],
80 ctx: container::Ctx,
81 export_trie: Option<exports::ExportTrie<'a>>,
82 bind_interpreter: Option<imports::BindInterpreter<'a>>,
83}
84
85impl<'a> fmt::Debug for MachO<'a> {
86 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
87 fmt.debug_struct("MachO")
88 .field("header", &self.header)
89 .field("load_commands", &self.load_commands)
90 .field("segments", &self.segments)
91 .field("entry", &self.entry)
92 .field("old_style_entry", &self.old_style_entry)
93 .field("libs", &self.libs)
94 .field("name", &self.name)
95 .field("little_endian", &self.little_endian)
96 .field("is_64", &self.is_64)
97 .field("symbols()", &self.symbols().collect::<Vec<_>>())
98 .field("exports()", &self.exports())
99 .field("imports()", &self.imports())
100 .finish()
101 }
102}
103
104impl<'a> MachO<'a> {
105 pub fn is_object_file(&self) -> bool {
107 self.header.filetype == header::MH_OBJECT
108 }
109 pub fn symbols(&self) -> symbols::SymbolIterator<'a> {
111 if let Some(ref symbols) = self.symbols {
112 symbols.into_iter()
113 } else {
114 symbols::SymbolIterator::default()
115 }
116 }
117 pub fn relocations(
119 &self,
120 ) -> error::Result<Vec<(usize, segment::RelocationIterator, segment::Section)>> {
121 debug!("Iterating relocations");
122 let mut relocs = Vec::new();
123 for (_i, segment) in (&self.segments).into_iter().enumerate() {
124 for (j, section) in segment.into_iter().enumerate() {
125 let (section, _data) = section?;
126 if section.nreloc > 0 {
127 relocs.push((j, section.iter_relocations(self.data, self.ctx), section));
128 }
129 }
130 }
131 Ok(relocs)
132 }
133 pub fn exports(&self) -> error::Result<Vec<exports::Export>> {
135 if let Some(ref trie) = self.export_trie {
136 trie.exports(self.libs.as_slice())
137 } else {
138 Ok(vec![])
139 }
140 }
141 pub fn imports(&self) -> error::Result<Vec<imports::Import>> {
143 if let Some(ref interpreter) = self.bind_interpreter {
144 interpreter.imports(self.libs.as_slice(), self.segments.as_slice(), self.ctx)
145 } else {
146 Ok(vec![])
147 }
148 }
149 pub fn parse(bytes: &'a [u8], offset: usize) -> error::Result<MachO<'a>> {
151 Self::parse_impl(bytes, offset, false)
152 }
153
154 pub fn parse_lossy(bytes: &'a [u8], offset: usize) -> error::Result<MachO<'a>> {
156 Self::parse_impl(bytes, offset, true)
157 }
158
159 fn parse_impl(bytes: &'a [u8], mut offset: usize, lossy: bool) -> error::Result<MachO<'a>> {
161 let (magic, maybe_ctx) = parse_magic_and_ctx(bytes, offset)?;
162 let ctx = if let Some(ctx) = maybe_ctx {
163 ctx
164 } else {
165 return Err(error::Error::BadMagic(u64::from(magic)));
166 };
167 debug!("Ctx: {:?}", ctx);
168 let offset = &mut offset;
169 let header: header::Header = bytes.pread_with(*offset, ctx)?;
170 debug!("Mach-o header: {:?}", header);
171 let little_endian = ctx.le.is_little();
172 let is_64 = ctx.container.is_big();
173 *offset += header::Header::size_with(&ctx.container);
174 let ncmds = header.ncmds;
175
176 let sizeofcmds = header.sizeofcmds as usize;
177 if ncmds > sizeofcmds / 8 || sizeofcmds > bytes.len() {
179 return Err(error::Error::BufferTooShort(ncmds, "load commands"));
180 }
181
182 let mut cmds: Vec<load_command::LoadCommand> = Vec::with_capacity(ncmds);
183 let mut symbols = None;
184 let mut libs = vec!["self"];
185 let mut rpaths = vec![];
186 let mut export_trie = None;
187 let mut bind_interpreter = None;
188 let mut unixthread_entry_address = None;
189 let mut main_entry_offset = None;
190 let mut name = None;
191 let mut segments = segment::Segments::new(ctx);
192 for i in 0..ncmds {
193 let cmd = load_command::LoadCommand::parse(bytes, offset, ctx.le)?;
194 debug!("{} - {:?}", i, cmd);
195 match cmd.command {
196 load_command::CommandVariant::Segment32(command) => segments.push(
197 segment::Segment::from_32_impl(bytes, &command, cmd.offset, ctx, lossy)?,
198 ),
199 load_command::CommandVariant::Segment64(command) => segments.push(
200 segment::Segment::from_64_impl(bytes, &command, cmd.offset, ctx, lossy)?,
201 ),
202 load_command::CommandVariant::Symtab(command) => {
203 match symbols::Symbols::parse(bytes, &command, ctx) {
204 Ok(s) => symbols = Some(s),
205 Err(e) if lossy => {
206 debug!("CommandVariant::Symtab failed: {e}");
207 }
208 Err(e) => return Err(e),
209 }
210 }
211 load_command::CommandVariant::LoadDylib(command)
212 | load_command::CommandVariant::LoadUpwardDylib(command)
213 | load_command::CommandVariant::ReexportDylib(command)
214 | load_command::CommandVariant::LoadWeakDylib(command)
215 | load_command::CommandVariant::LazyLoadDylib(command) => {
216 match bytes.pread::<&str>(cmd.offset + command.dylib.name as usize) {
217 Ok(lib) => libs.push(lib),
218 Err(e) if lossy => {
219 debug!("CommandVariant::Load/Reexport Dylib failed: {e}");
220 }
221 Err(e) => return Err(e.into()),
222 }
223 }
224 load_command::CommandVariant::Rpath(command) => {
225 match bytes.pread::<&str>(cmd.offset + command.path as usize) {
226 Ok(rpath) => rpaths.push(rpath),
227 Err(e) if lossy => {
228 debug!("CommandVariant::Rpath failed: {e}");
229 }
230 Err(e) => return Err(e.into()),
231 }
232 }
233 load_command::CommandVariant::DyldInfo(command)
234 | load_command::CommandVariant::DyldInfoOnly(command) => {
235 export_trie = Some(exports::ExportTrie::new(bytes, &command));
236 bind_interpreter = Some(imports::BindInterpreter::new(bytes, &command));
237 }
238 load_command::CommandVariant::DyldExportsTrie(command) => {
239 export_trie = Some(exports::ExportTrie::new_from_linkedit_data_command(
240 bytes, &command,
241 ));
242 }
243 load_command::CommandVariant::Unixthread(command) => {
244 if unixthread_entry_address.is_none() {
246 unixthread_entry_address =
247 Some(command.instruction_pointer(header.cputype)?);
248 }
249 }
250 load_command::CommandVariant::Main(command) => {
251 if main_entry_offset.is_none() {
253 main_entry_offset = Some(command.entryoff);
254 }
255 }
256 load_command::CommandVariant::IdDylib(command) => {
257 match bytes.pread::<&str>(cmd.offset + command.dylib.name as usize) {
258 Ok(id) => {
259 libs[0] = id;
260 name = Some(id);
261 }
262 Err(e) if lossy => {
263 debug!("CommandVariant::IdDylib failed: {e}");
264 }
265 Err(e) => return Err(e.into()),
266 }
267 }
268 _ => (),
269 }
270 cmds.push(cmd)
271 }
272
273 let (entry, old_style_entry) = if let Some(offset) = main_entry_offset {
276 let base_address = segments
278 .iter()
279 .filter(|s| &s.segname[0..7] == b"__TEXT\0")
280 .map(|s| s.vmaddr - s.fileoff)
281 .next()
282 .ok_or_else(|| {
283 error::Error::Malformed(format!(
284 "image specifies LC_MAIN offset {} but has no __TEXT segment",
285 offset
286 ))
287 })?;
288
289 (base_address + offset, false)
290 } else if let Some(address) = unixthread_entry_address {
291 (address, true)
292 } else {
293 (0, false)
294 };
295
296 Ok(MachO {
297 header,
298 load_commands: cmds,
299 segments,
300 symbols,
301 libs,
302 rpaths,
303 export_trie,
304 bind_interpreter,
305 entry,
306 old_style_entry,
307 name,
308 ctx,
309 is_64,
310 little_endian,
311 data: bytes,
312 })
313 }
314}
315
316pub struct MultiArch<'a> {
318 data: &'a [u8],
319 start: usize,
320 pub narches: usize,
321}
322
323pub struct FatArchIterator<'a> {
325 index: usize,
326 data: &'a [u8],
327 narches: usize,
328 start: usize,
329}
330
331#[derive(Debug)]
334#[allow(clippy::large_enum_variant)]
335pub enum SingleArch<'a> {
336 MachO(MachO<'a>),
337 Archive(archive::Archive<'a>),
338}
339
340impl<'a> Iterator for FatArchIterator<'a> {
341 type Item = error::Result<fat::FatArch>;
342 fn next(&mut self) -> Option<Self::Item> {
343 if self.index >= self.narches {
344 None
345 } else {
346 let offset = (self.index * fat::SIZEOF_FAT_ARCH) + self.start;
347 let arch = self
348 .data
349 .pread_with::<fat::FatArch>(offset, scroll::BE)
350 .map_err(core::convert::Into::into);
351 self.index += 1;
352 Some(arch)
353 }
354 }
355}
356
357pub struct SingleArchIterator<'a> {
359 index: usize,
360 data: &'a [u8],
361 narches: usize,
362 start: usize,
363}
364
365pub fn peek_bytes(bytes: &[u8; 16]) -> error::Result<crate::Hint> {
366 if &bytes[0..archive::SIZEOF_MAGIC] == archive::MAGIC {
367 Ok(crate::Hint::Archive)
368 } else {
369 let (magic, maybe_ctx) = parse_magic_and_ctx(bytes, 0)?;
370 match magic {
371 header::MH_CIGAM_64 | header::MH_CIGAM | header::MH_MAGIC_64 | header::MH_MAGIC => {
372 if let Some(ctx) = maybe_ctx {
373 Ok(crate::Hint::Mach(crate::HintData {
374 is_lsb: ctx.le.is_little(),
375 is_64: Some(ctx.container.is_big()),
376 }))
377 } else {
378 Err(error::Error::Malformed(format!(
379 "Correct mach magic {:#x} does not have a matching parsing context!",
380 magic
381 )))
382 }
383 }
384 fat::FAT_MAGIC => {
385 let narchitectures = bytes.pread_with::<u32>(4, BE)? as usize;
387 Ok(crate::Hint::MachFat(narchitectures))
388 }
389 _ => Ok(crate::Hint::Unknown(bytes.pread::<u64>(0)?)),
390 }
391 }
392}
393
394fn extract_multi_entry(bytes: &[u8]) -> error::Result<SingleArch> {
395 if let Some(hint_bytes) = take_hint_bytes(bytes) {
396 match peek_bytes(hint_bytes)? {
397 crate::Hint::Mach(_) => {
398 let binary = MachO::parse(bytes, 0)?;
399 Ok(SingleArch::MachO(binary))
400 }
401 crate::Hint::Archive => {
402 let archive = archive::Archive::parse(bytes)?;
403 Ok(SingleArch::Archive(archive))
404 }
405 _ => Err(error::Error::Malformed(format!(
406 "multi-arch entry must be a Mach-O binary or an archive"
407 ))),
408 }
409 } else {
410 Err(error::Error::Malformed(format!("Object is too small")))
411 }
412}
413
414impl<'a> Iterator for SingleArchIterator<'a> {
415 type Item = error::Result<SingleArch<'a>>;
416 fn next(&mut self) -> Option<Self::Item> {
417 if self.index >= self.narches {
418 None
419 } else {
420 let index = self.index;
421 let offset = (index * fat::SIZEOF_FAT_ARCH) + self.start;
422 self.index += 1;
423 match self.data.pread_with::<fat::FatArch>(offset, scroll::BE) {
424 Ok(arch) => {
425 let bytes = arch.slice(self.data);
426 Some(extract_multi_entry(bytes))
427 }
428 Err(e) => Some(Err(e.into())),
429 }
430 }
431 }
432}
433
434impl<'a, 'b> IntoIterator for &'b MultiArch<'a> {
435 type Item = error::Result<SingleArch<'a>>;
436 type IntoIter = SingleArchIterator<'a>;
437 fn into_iter(self) -> Self::IntoIter {
438 SingleArchIterator {
439 index: 0,
440 data: self.data,
441 narches: self.narches,
442 start: self.start,
443 }
444 }
445}
446
447impl<'a> MultiArch<'a> {
448 pub fn new(bytes: &'a [u8]) -> error::Result<Self> {
450 let header = fat::FatHeader::parse(bytes)?;
451 Ok(MultiArch {
452 data: bytes,
453 start: fat::SIZEOF_FAT_HEADER,
454 narches: header.nfat_arch as usize,
455 })
456 }
457 pub fn iter_arches(&self) -> FatArchIterator {
459 FatArchIterator {
460 index: 0,
461 data: self.data,
462 narches: self.narches,
463 start: self.start,
464 }
465 }
466 pub fn arches(&self) -> error::Result<Vec<fat::FatArch>> {
468 if self.narches > self.data.len() / fat::SIZEOF_FAT_ARCH {
469 return Err(error::Error::BufferTooShort(self.narches, "arches"));
470 }
471
472 let mut arches = Vec::with_capacity(self.narches);
473 for arch in self.iter_arches() {
474 arches.push(arch?);
475 }
476 Ok(arches)
477 }
478 pub fn get(&self, index: usize) -> error::Result<SingleArch<'a>> {
480 if index >= self.narches {
481 return Err(error::Error::Malformed(format!(
482 "Requested the {}-th binary, but there are only {} architectures in this container",
483 index, self.narches
484 )));
485 }
486 let offset = (index * fat::SIZEOF_FAT_ARCH) + self.start;
487 let arch = self.data.pread_with::<fat::FatArch>(offset, scroll::BE)?;
488 let bytes = arch.slice(self.data);
489 extract_multi_entry(bytes)
490 }
491
492 pub fn find<F: Fn(error::Result<fat::FatArch>) -> bool>(
493 &'a self,
494 f: F,
495 ) -> Option<error::Result<SingleArch<'a>>> {
496 for (i, arch) in self.iter_arches().enumerate() {
497 if f(arch) {
498 return Some(self.get(i));
499 }
500 }
501 None
502 }
503 pub fn find_cputype(&self, cputype: u32) -> error::Result<Option<fat::FatArch>> {
505 for arch in self.iter_arches() {
506 let arch = arch?;
507 if arch.cputype == cputype {
508 return Ok(Some(arch));
509 }
510 }
511 Ok(None)
512 }
513}
514
515impl<'a> fmt::Debug for MultiArch<'a> {
516 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
517 fmt.debug_struct("MultiArch")
518 .field("arches", &self.arches().unwrap_or_default())
519 .field("data", &self.data.len())
520 .finish()
521 }
522}
523
524#[derive(Debug)]
525#[allow(clippy::large_enum_variant)]
526pub enum Mach<'a> {
528 Fat(MultiArch<'a>),
530 Binary(MachO<'a>),
532}
533
534impl<'a> Mach<'a> {
535 pub fn parse(bytes: &'a [u8]) -> error::Result<Self> {
537 Self::parse_impl(bytes, false)
538 }
539
540 pub fn parse_lossy(bytes: &'a [u8]) -> error::Result<Self> {
542 Self::parse_impl(bytes, true)
543 }
544
545 fn parse_impl(bytes: &'a [u8], lossy: bool) -> error::Result<Self> {
547 let size = bytes.len();
548 if size < 4 {
549 let error = error::Error::Malformed("size is smaller than a magical number".into());
550 return Err(error);
551 }
552 let magic = peek(&bytes, 0)?;
553 match magic {
554 fat::FAT_MAGIC => {
555 let multi = MultiArch::new(bytes)?;
556 Ok(Mach::Fat(multi))
557 }
558 _ => {
560 let binary = MachO::parse_impl(bytes, 0, lossy)?;
561 Ok(Mach::Binary(binary))
562 }
563 }
564 }
565}
566
567#[cfg(test)]
568mod test {
569 use super::{Mach, SingleArch};
570
571 #[test]
572 fn parse_multi_arch_of_macho_binaries() {
573 let bytes = include_bytes!(concat!(
579 env!("CARGO_MANIFEST_DIR"),
580 "/assets/hello_world_fat_binaries"
581 ));
582 let mach = Mach::parse(bytes).expect("failed to parse input file");
583 match mach {
584 Mach::Fat(fat) => {
585 assert!(fat.into_iter().count() > 0);
586 for entry in fat.into_iter() {
587 let entry = entry.expect("failed to read entry");
588 match entry {
589 SingleArch::MachO(macho) => {
590 assert!(macho.symbols().count() > 0);
591 }
592 _ => panic!("expected MultiArchEntry::MachO, got {:?}", entry),
593 }
594 }
595 }
596 Mach::Binary(_) => panic!("expected Mach::Fat, got Mach::Binary"),
597 }
598 }
599
600 #[test]
601 fn parse_multi_arch_of_archives() {
602 let bytes = include_bytes!(concat!(
608 env!("CARGO_MANIFEST_DIR"),
609 "/assets/hello_world_fat_archives"
610 ));
611 let mach = Mach::parse(bytes).expect("failed to parse input file");
612 match mach {
613 Mach::Fat(fat) => {
614 assert!(fat.into_iter().count() > 0);
615 for entry in fat.into_iter() {
616 let entry = entry.expect("failed to read entry");
617 match entry {
618 SingleArch::Archive(archive) => {
619 assert!(!archive.members().is_empty())
620 }
621 _ => panic!("expected MultiArchEntry::Archive, got {:?}", entry),
622 }
623 }
624 }
625 Mach::Binary(_) => panic!("expected Mach::Fat, got Mach::Binary"),
626 }
627 }
628}