macho_unwind_info/
lib.rs

1//! A zero-copy parser for the contents of the `__unwind_info` section of a
2//! mach-O binary.
3//!
4//! Quickly look up the unwinding opcode for an address. Then parse the opcode to find
5//! out how to recover the return address and the caller frame's register values.
6//!
7//! This crate is intended to be fast enough to be used in a sampling profiler.
8//! Re-parsing from scratch is cheap and can be done on every sample.
9//!
10//! For the full unwinding experience, both `__unwind_info` and `__eh_frame` may need
11//! to be consulted. The two sections are complementary: `__unwind_info` handles the
12//! easy cases, and refers to an `__eh_frame` FDE for the hard cases. Conversely,
13//! `__eh_frame` only includes FDEs for functions whose unwinding info cannot be
14//! represented in `__unwind_info`.
15//!
16//! On x86 and x86_64, `__unwind_info` can represent most functions regardless of
17//! whether they were compiled with framepointers or without.
18//!
19//! On arm64, compiling without framepointers is strongly discouraged, and
20//! `__unwind_info` can only represent functions which have framepointers or
21//! which don't need to restore any registers. As a result, if you have an arm64
22//! binary without framepointers (rare!), then the `__unwind_info` basically just
23//! acts as an index for `__eh_frame`, similarly to `.eh_frame_hdr` for ELF.
24//!
25//! In clang's default configuration for arm64, non-leaf functions have framepointers
26//! and leaf functions without stored registers on the stack don't have framepointers.
27//! For leaf functions, the return address is kept in the `lr` register for the entire
28//! duration of the function. And the unwind info lets you discern between these two
29//! types of functions ("frame-based" and "frameless").
30//!
31//! # Example
32//!
33//! ```rust
34//! use macho_unwind_info::UnwindInfo;
35//! use macho_unwind_info::opcodes::OpcodeX86_64;
36//!
37//! # fn example(data: &[u8]) -> Result<(), macho_unwind_info::Error> {
38//! let unwind_info = UnwindInfo::parse(data)?;
39//!
40//! if let Some(function) = unwind_info.lookup(0x1234)? {
41//!     println!("Found function entry covering the address 0x1234:");
42//!     let opcode = OpcodeX86_64::parse(function.opcode);
43//!     println!("0x{:08x}..0x{:08x}: {}", function.start_address, function.end_address, opcode);
44//! }
45//! # Ok(())
46//! # }
47//! ```
48
49mod error;
50mod num_display;
51
52/// Provides architecture-specific opcode parsing.
53pub mod opcodes;
54/// Lower-level structs for interpreting the format data. Can be used if the convenience APIs are too limiting.
55pub mod raw;
56
57mod reader;
58
59pub use error::*;
60use raw::*;
61
62/// A parsed representation of the unwind info.
63///
64/// The UnwindInfo contains a list of pages, each of which contain a list of
65/// function entries.
66pub struct UnwindInfo<'a> {
67    /// The full __unwind_info section data.
68    data: &'a [u8],
69
70    /// The list of global opcodes.
71    global_opcodes: &'a [Opcode],
72
73    /// The list of page entries in this UnwindInfo.
74    pages: &'a [PageEntry],
75}
76
77/// The information about a single function in the UnwindInfo.
78#[derive(Clone, Debug, PartialEq, Eq, Hash)]
79pub struct Function {
80    /// The address where this function starts.
81    pub start_address: u32,
82
83    /// The address where this function ends. Includes the padding at the end of
84    /// the function. In reality, this is the address of the *next* function
85    /// entry, or for the last function this is the address of the sentinel page
86    /// entry.
87    pub end_address: u32,
88
89    /// The opcode which describes the unwinding information for this function.
90    /// This opcode needs to be parsed in an architecture-specific manner.
91    /// See the [opcodes] module for the facilities to do so.
92    pub opcode: u32,
93}
94
95impl<'a> UnwindInfo<'a> {
96    /// Create an [UnwindInfo] instance which wraps the raw bytes of a mach-O binary's
97    /// `__unwind_info` section. The data can have arbitrary alignment. The parsing done
98    /// in this function is minimal; it's basically just three bounds checks.
99    pub fn parse(data: &'a [u8]) -> Result<Self, Error> {
100        let header = CompactUnwindInfoHeader::parse(data)?;
101        let global_opcodes = header.global_opcodes(data)?;
102        let pages = header.pages(data)?;
103        Ok(Self {
104            data,
105            global_opcodes,
106            pages,
107        })
108    }
109
110    /// Returns an iterator over all the functions in this UnwindInfo.
111    pub fn functions(&self) -> FunctionIter<'a> {
112        FunctionIter {
113            data: self.data,
114            global_opcodes: self.global_opcodes,
115            pages: self.pages,
116            cur_page: None,
117        }
118    }
119
120    /// Returns the range of addresses covered by unwind information.
121    pub fn address_range(&self) -> core::ops::Range<u32> {
122        if self.pages.is_empty() {
123            return 0..0;
124        }
125        let first_page = self.pages.first().unwrap();
126        let last_page = self.pages.last().unwrap();
127        first_page.first_address()..last_page.first_address()
128    }
129
130    /// Looks up the unwind information for the function that covers the given address.
131    /// Returns `Ok(Some(function))` if a function was found.
132    /// Returns `Ok(None)` if the address was outside of the range of addresses covered
133    /// by the unwind info.
134    /// Returns `Err(error)` if there was a problem with the format of the `__unwind_info`
135    /// data.
136    ///
137    /// This lookup is architecture agnostic. The opcode is returned as a u32.
138    /// To actually perform unwinding, the opcode needs to be parsed in an
139    /// architecture-specific manner.
140    ///
141    /// The design of the compact unwinding format makes this lookup extremely cheap.
142    /// It's just two binary searches: First to find the right page, end then to find
143    /// the right function within a page. The search happens inside the wrapped data,
144    /// with no extra copies.
145    pub fn lookup(&self, pc: u32) -> Result<Option<Function>, Error> {
146        let Self {
147            pages,
148            data,
149            global_opcodes,
150        } = self;
151        let page_index = match pages.binary_search_by_key(&pc, PageEntry::first_address) {
152            Ok(i) => i,
153            Err(insertion_index) => {
154                if insertion_index == 0 {
155                    return Ok(None);
156                }
157                insertion_index - 1
158            }
159        };
160        if page_index == pages.len() - 1 {
161            // We found the sentinel last page, which just marks the end of the range.
162            // So the looked up address is at or after the end address, i.e. outside the
163            // range of addresses covered by this UnwindInfo.
164            return Ok(None);
165        }
166        let page_entry = &pages[page_index];
167        let next_page_entry = &pages[page_index + 1];
168        let page_offset = page_entry.page_offset();
169        match page_entry.page_kind(data)? {
170            consts::PAGE_KIND_REGULAR => {
171                let page = RegularPage::parse(data, page_offset.into())?;
172                let functions = page.functions(data, page_offset)?;
173                let function_index =
174                    match functions.binary_search_by_key(&pc, RegularFunctionEntry::address) {
175                        Ok(i) => i,
176                        Err(insertion_index) => {
177                            if insertion_index == 0 {
178                                return Err(Error::InvalidPageEntryFirstAddress);
179                            }
180                            insertion_index - 1
181                        }
182                    };
183                let entry = &functions[function_index];
184                let fun_address = entry.address();
185                let next_fun_address = if let Some(next_entry) = functions.get(function_index + 1) {
186                    next_entry.address()
187                } else {
188                    next_page_entry.first_address()
189                };
190                Ok(Some(Function {
191                    start_address: fun_address,
192                    end_address: next_fun_address,
193                    opcode: entry.opcode(),
194                }))
195            }
196            consts::PAGE_KIND_COMPRESSED => {
197                let page = CompressedPage::parse(data, page_offset.into())?;
198                let functions = page.functions(data, page_offset)?;
199                let page_address = page_entry.first_address();
200                let rel_pc = pc - page_address;
201                let function_index = match functions.binary_search_by_key(&rel_pc, |&entry| {
202                    CompressedFunctionEntry::new(entry.into()).relative_address()
203                }) {
204                    Ok(i) => i,
205                    Err(insertion_index) => {
206                        if insertion_index == 0 {
207                            return Err(Error::InvalidPageEntryFirstAddress);
208                        }
209                        insertion_index - 1
210                    }
211                };
212
213                let entry = CompressedFunctionEntry::new(functions[function_index].into());
214                let fun_address = page_address + entry.relative_address();
215                let next_fun_address = if let Some(next_entry) = functions.get(function_index + 1) {
216                    let next_entry = CompressedFunctionEntry::new((*next_entry).into());
217                    page_address + next_entry.relative_address()
218                } else {
219                    next_page_entry.first_address()
220                };
221
222                let opcode_index: usize = entry.opcode_index().into();
223                let opcode = if opcode_index < global_opcodes.len() {
224                    global_opcodes[opcode_index].opcode()
225                } else {
226                    let local_opcodes = page.local_opcodes(data, page_offset)?;
227                    let local_index = opcode_index - global_opcodes.len();
228                    local_opcodes[local_index].opcode()
229                };
230                Ok(Some(Function {
231                    start_address: fun_address,
232                    end_address: next_fun_address,
233                    opcode,
234                }))
235            }
236            consts::PAGE_KIND_SENTINEL => {
237                // Only the last page should be a sentinel page, and we've already checked earlier
238                // that we're not in the last page.
239                Err(Error::UnexpectedSentinelPage)
240            }
241            _ => Err(Error::InvalidPageKind),
242        }
243    }
244}
245
246/// An iterator over the functions in an UnwindInfo page.
247pub struct FunctionIter<'a> {
248    /// The full __unwind_info section data.
249    data: &'a [u8],
250
251    /// The list of global opcodes.
252    global_opcodes: &'a [Opcode],
253
254    /// The slice of the remaining to-be-iterated-over pages.
255    pages: &'a [PageEntry],
256
257    /// The page whose functions we're iterating over at the moment.
258    cur_page: Option<PageWithPartialFunctions<'a>>,
259}
260
261/// The current page of the function iterator.
262/// The functions field is the slice of the remaining to-be-iterated-over functions.
263#[derive(Clone, Copy)]
264enum PageWithPartialFunctions<'a> {
265    Regular {
266        next_page_address: u32,
267        functions: &'a [RegularFunctionEntry],
268    },
269    Compressed {
270        page_address: u32,
271        next_page_address: u32,
272        local_opcodes: &'a [Opcode],
273        functions: &'a [U32],
274    },
275}
276
277impl<'a> FunctionIter<'a> {
278    #[allow(clippy::should_implement_trait)]
279    pub fn next(&mut self) -> Result<Option<Function>, Error> {
280        loop {
281            let cur_page = if let Some(cur_page) = self.cur_page.as_mut() {
282                cur_page
283            } else {
284                let cur_page = match self.next_page()? {
285                    Some(page) => page,
286                    None => return Ok(None),
287                };
288                self.cur_page.insert(cur_page)
289            };
290
291            match cur_page {
292                PageWithPartialFunctions::Regular {
293                    next_page_address,
294                    functions,
295                } => {
296                    if let Some((entry, remainder)) = functions.split_first() {
297                        *functions = remainder;
298                        let start_address = entry.address();
299                        let end_address = remainder
300                            .first()
301                            .map(RegularFunctionEntry::address)
302                            .unwrap_or(*next_page_address);
303                        return Ok(Some(Function {
304                            start_address,
305                            end_address,
306                            opcode: entry.opcode(),
307                        }));
308                    }
309                }
310                PageWithPartialFunctions::Compressed {
311                    page_address,
312                    functions,
313                    next_page_address,
314                    local_opcodes,
315                } => {
316                    if let Some((entry, remainder)) = functions.split_first() {
317                        *functions = remainder;
318                        let entry = CompressedFunctionEntry::new((*entry).into());
319                        let start_address = *page_address + entry.relative_address();
320                        let end_address = match remainder.first() {
321                            Some(next_entry) => {
322                                let next_entry = CompressedFunctionEntry::new((*next_entry).into());
323                                *page_address + next_entry.relative_address()
324                            }
325                            None => *next_page_address,
326                        };
327                        let opcode_index: usize = entry.opcode_index().into();
328                        let opcode = if opcode_index < self.global_opcodes.len() {
329                            self.global_opcodes[opcode_index].opcode()
330                        } else {
331                            let local_index = opcode_index - self.global_opcodes.len();
332                            local_opcodes[local_index].opcode()
333                        };
334                        return Ok(Some(Function {
335                            start_address,
336                            end_address,
337                            opcode,
338                        }));
339                    }
340                }
341            }
342            self.cur_page = None;
343        }
344    }
345
346    fn next_page(&mut self) -> Result<Option<PageWithPartialFunctions<'a>>, Error> {
347        let (page_entry, remainder) = match self.pages.split_first() {
348            Some(split) => split,
349            None => return Ok(None),
350        };
351
352        self.pages = remainder;
353
354        let next_page_entry = match remainder.first() {
355            Some(entry) => entry,
356            None => return Ok(None),
357        };
358
359        let page_offset = page_entry.page_offset();
360        let page_address = page_entry.first_address();
361        let next_page_address = next_page_entry.first_address();
362        let data = self.data;
363        let cur_page = match page_entry.page_kind(data)? {
364            consts::PAGE_KIND_REGULAR => {
365                let page = RegularPage::parse(data, page_offset.into())?;
366                PageWithPartialFunctions::Regular {
367                    functions: page.functions(data, page_offset)?,
368                    next_page_address,
369                }
370            }
371            consts::PAGE_KIND_COMPRESSED => {
372                let page = CompressedPage::parse(data, page_offset.into())?;
373                PageWithPartialFunctions::Compressed {
374                    page_address,
375                    next_page_address,
376                    functions: page.functions(data, page_offset)?,
377                    local_opcodes: page.local_opcodes(data, page_offset)?,
378                }
379            }
380            consts::PAGE_KIND_SENTINEL => return Err(Error::UnexpectedSentinelPage),
381            _ => return Err(Error::InvalidPageKind),
382        };
383        Ok(Some(cur_page))
384    }
385}