macho_unwind_info/lib.rs
1//! A zero-copy parser for the contents of the `__unwind_info` section of a
2//! mach-O binary.
3//!
4//! Quickly look up the unwinding opcode for an address. Then parse the opcode to find
5//! out how to recover the return address and the caller frame's register values.
6//!
7//! This crate is intended to be fast enough to be used in a sampling profiler.
8//! Re-parsing from scratch is cheap and can be done on every sample.
9//!
10//! For the full unwinding experience, both `__unwind_info` and `__eh_frame` may need
11//! to be consulted. The two sections are complementary: `__unwind_info` handles the
12//! easy cases, and refers to an `__eh_frame` FDE for the hard cases. Conversely,
13//! `__eh_frame` only includes FDEs for functions whose unwinding info cannot be
14//! represented in `__unwind_info`.
15//!
16//! On x86 and x86_64, `__unwind_info` can represent most functions regardless of
17//! whether they were compiled with framepointers or without.
18//!
19//! On arm64, compiling without framepointers is strongly discouraged, and
20//! `__unwind_info` can only represent functions which have framepointers or
21//! which don't need to restore any registers. As a result, if you have an arm64
22//! binary without framepointers (rare!), then the `__unwind_info` basically just
23//! acts as an index for `__eh_frame`, similarly to `.eh_frame_hdr` for ELF.
24//!
25//! In clang's default configuration for arm64, non-leaf functions have framepointers
26//! and leaf functions without stored registers on the stack don't have framepointers.
27//! For leaf functions, the return address is kept in the `lr` register for the entire
28//! duration of the function. And the unwind info lets you discern between these two
29//! types of functions ("frame-based" and "frameless").
30//!
31//! # Example
32//!
33//! ```rust
34//! use macho_unwind_info::UnwindInfo;
35//! use macho_unwind_info::opcodes::OpcodeX86_64;
36//!
37//! # fn example(data: &[u8]) -> Result<(), macho_unwind_info::Error> {
38//! let unwind_info = UnwindInfo::parse(data)?;
39//!
40//! if let Some(function) = unwind_info.lookup(0x1234)? {
41//! println!("Found function entry covering the address 0x1234:");
42//! let opcode = OpcodeX86_64::parse(function.opcode);
43//! println!("0x{:08x}..0x{:08x}: {}", function.start_address, function.end_address, opcode);
44//! }
45//! # Ok(())
46//! # }
47//! ```
48
49mod error;
50mod num_display;
51
52/// Provides architecture-specific opcode parsing.
53pub mod opcodes;
54/// Lower-level structs for interpreting the format data. Can be used if the convenience APIs are too limiting.
55pub mod raw;
56
57mod reader;
58
59pub use error::*;
60use raw::*;
61
62/// A parsed representation of the unwind info.
63///
64/// The UnwindInfo contains a list of pages, each of which contain a list of
65/// function entries.
66pub struct UnwindInfo<'a> {
67 /// The full __unwind_info section data.
68 data: &'a [u8],
69
70 /// The list of global opcodes.
71 global_opcodes: &'a [Opcode],
72
73 /// The list of page entries in this UnwindInfo.
74 pages: &'a [PageEntry],
75}
76
77/// The information about a single function in the UnwindInfo.
78#[derive(Clone, Debug, PartialEq, Eq, Hash)]
79pub struct Function {
80 /// The address where this function starts.
81 pub start_address: u32,
82
83 /// The address where this function ends. Includes the padding at the end of
84 /// the function. In reality, this is the address of the *next* function
85 /// entry, or for the last function this is the address of the sentinel page
86 /// entry.
87 pub end_address: u32,
88
89 /// The opcode which describes the unwinding information for this function.
90 /// This opcode needs to be parsed in an architecture-specific manner.
91 /// See the [opcodes] module for the facilities to do so.
92 pub opcode: u32,
93}
94
95impl<'a> UnwindInfo<'a> {
96 /// Create an [UnwindInfo] instance which wraps the raw bytes of a mach-O binary's
97 /// `__unwind_info` section. The data can have arbitrary alignment. The parsing done
98 /// in this function is minimal; it's basically just three bounds checks.
99 pub fn parse(data: &'a [u8]) -> Result<Self, Error> {
100 let header = CompactUnwindInfoHeader::parse(data)?;
101 let global_opcodes = header.global_opcodes(data)?;
102 let pages = header.pages(data)?;
103 Ok(Self {
104 data,
105 global_opcodes,
106 pages,
107 })
108 }
109
110 /// Returns an iterator over all the functions in this UnwindInfo.
111 pub fn functions(&self) -> FunctionIter<'a> {
112 FunctionIter {
113 data: self.data,
114 global_opcodes: self.global_opcodes,
115 pages: self.pages,
116 cur_page: None,
117 }
118 }
119
120 /// Returns the range of addresses covered by unwind information.
121 pub fn address_range(&self) -> core::ops::Range<u32> {
122 if self.pages.is_empty() {
123 return 0..0;
124 }
125 let first_page = self.pages.first().unwrap();
126 let last_page = self.pages.last().unwrap();
127 first_page.first_address()..last_page.first_address()
128 }
129
130 /// Looks up the unwind information for the function that covers the given address.
131 /// Returns `Ok(Some(function))` if a function was found.
132 /// Returns `Ok(None)` if the address was outside of the range of addresses covered
133 /// by the unwind info.
134 /// Returns `Err(error)` if there was a problem with the format of the `__unwind_info`
135 /// data.
136 ///
137 /// This lookup is architecture agnostic. The opcode is returned as a u32.
138 /// To actually perform unwinding, the opcode needs to be parsed in an
139 /// architecture-specific manner.
140 ///
141 /// The design of the compact unwinding format makes this lookup extremely cheap.
142 /// It's just two binary searches: First to find the right page, end then to find
143 /// the right function within a page. The search happens inside the wrapped data,
144 /// with no extra copies.
145 pub fn lookup(&self, pc: u32) -> Result<Option<Function>, Error> {
146 let Self {
147 pages,
148 data,
149 global_opcodes,
150 } = self;
151 let page_index = match pages.binary_search_by_key(&pc, PageEntry::first_address) {
152 Ok(i) => i,
153 Err(insertion_index) => {
154 if insertion_index == 0 {
155 return Ok(None);
156 }
157 insertion_index - 1
158 }
159 };
160 if page_index == pages.len() - 1 {
161 // We found the sentinel last page, which just marks the end of the range.
162 // So the looked up address is at or after the end address, i.e. outside the
163 // range of addresses covered by this UnwindInfo.
164 return Ok(None);
165 }
166 let page_entry = &pages[page_index];
167 let next_page_entry = &pages[page_index + 1];
168 let page_offset = page_entry.page_offset();
169 match page_entry.page_kind(data)? {
170 consts::PAGE_KIND_REGULAR => {
171 let page = RegularPage::parse(data, page_offset.into())?;
172 let functions = page.functions(data, page_offset)?;
173 let function_index =
174 match functions.binary_search_by_key(&pc, RegularFunctionEntry::address) {
175 Ok(i) => i,
176 Err(insertion_index) => {
177 if insertion_index == 0 {
178 return Err(Error::InvalidPageEntryFirstAddress);
179 }
180 insertion_index - 1
181 }
182 };
183 let entry = &functions[function_index];
184 let fun_address = entry.address();
185 let next_fun_address = if let Some(next_entry) = functions.get(function_index + 1) {
186 next_entry.address()
187 } else {
188 next_page_entry.first_address()
189 };
190 Ok(Some(Function {
191 start_address: fun_address,
192 end_address: next_fun_address,
193 opcode: entry.opcode(),
194 }))
195 }
196 consts::PAGE_KIND_COMPRESSED => {
197 let page = CompressedPage::parse(data, page_offset.into())?;
198 let functions = page.functions(data, page_offset)?;
199 let page_address = page_entry.first_address();
200 let rel_pc = pc - page_address;
201 let function_index = match functions.binary_search_by_key(&rel_pc, |&entry| {
202 CompressedFunctionEntry::new(entry.into()).relative_address()
203 }) {
204 Ok(i) => i,
205 Err(insertion_index) => {
206 if insertion_index == 0 {
207 return Err(Error::InvalidPageEntryFirstAddress);
208 }
209 insertion_index - 1
210 }
211 };
212
213 let entry = CompressedFunctionEntry::new(functions[function_index].into());
214 let fun_address = page_address + entry.relative_address();
215 let next_fun_address = if let Some(next_entry) = functions.get(function_index + 1) {
216 let next_entry = CompressedFunctionEntry::new((*next_entry).into());
217 page_address + next_entry.relative_address()
218 } else {
219 next_page_entry.first_address()
220 };
221
222 let opcode_index: usize = entry.opcode_index().into();
223 let opcode = if opcode_index < global_opcodes.len() {
224 global_opcodes[opcode_index].opcode()
225 } else {
226 let local_opcodes = page.local_opcodes(data, page_offset)?;
227 let local_index = opcode_index - global_opcodes.len();
228 local_opcodes[local_index].opcode()
229 };
230 Ok(Some(Function {
231 start_address: fun_address,
232 end_address: next_fun_address,
233 opcode,
234 }))
235 }
236 consts::PAGE_KIND_SENTINEL => {
237 // Only the last page should be a sentinel page, and we've already checked earlier
238 // that we're not in the last page.
239 Err(Error::UnexpectedSentinelPage)
240 }
241 _ => Err(Error::InvalidPageKind),
242 }
243 }
244}
245
246/// An iterator over the functions in an UnwindInfo page.
247pub struct FunctionIter<'a> {
248 /// The full __unwind_info section data.
249 data: &'a [u8],
250
251 /// The list of global opcodes.
252 global_opcodes: &'a [Opcode],
253
254 /// The slice of the remaining to-be-iterated-over pages.
255 pages: &'a [PageEntry],
256
257 /// The page whose functions we're iterating over at the moment.
258 cur_page: Option<PageWithPartialFunctions<'a>>,
259}
260
261/// The current page of the function iterator.
262/// The functions field is the slice of the remaining to-be-iterated-over functions.
263#[derive(Clone, Copy)]
264enum PageWithPartialFunctions<'a> {
265 Regular {
266 next_page_address: u32,
267 functions: &'a [RegularFunctionEntry],
268 },
269 Compressed {
270 page_address: u32,
271 next_page_address: u32,
272 local_opcodes: &'a [Opcode],
273 functions: &'a [U32],
274 },
275}
276
277impl<'a> FunctionIter<'a> {
278 #[allow(clippy::should_implement_trait)]
279 pub fn next(&mut self) -> Result<Option<Function>, Error> {
280 loop {
281 let cur_page = if let Some(cur_page) = self.cur_page.as_mut() {
282 cur_page
283 } else {
284 let cur_page = match self.next_page()? {
285 Some(page) => page,
286 None => return Ok(None),
287 };
288 self.cur_page.insert(cur_page)
289 };
290
291 match cur_page {
292 PageWithPartialFunctions::Regular {
293 next_page_address,
294 functions,
295 } => {
296 if let Some((entry, remainder)) = functions.split_first() {
297 *functions = remainder;
298 let start_address = entry.address();
299 let end_address = remainder
300 .first()
301 .map(RegularFunctionEntry::address)
302 .unwrap_or(*next_page_address);
303 return Ok(Some(Function {
304 start_address,
305 end_address,
306 opcode: entry.opcode(),
307 }));
308 }
309 }
310 PageWithPartialFunctions::Compressed {
311 page_address,
312 functions,
313 next_page_address,
314 local_opcodes,
315 } => {
316 if let Some((entry, remainder)) = functions.split_first() {
317 *functions = remainder;
318 let entry = CompressedFunctionEntry::new((*entry).into());
319 let start_address = *page_address + entry.relative_address();
320 let end_address = match remainder.first() {
321 Some(next_entry) => {
322 let next_entry = CompressedFunctionEntry::new((*next_entry).into());
323 *page_address + next_entry.relative_address()
324 }
325 None => *next_page_address,
326 };
327 let opcode_index: usize = entry.opcode_index().into();
328 let opcode = if opcode_index < self.global_opcodes.len() {
329 self.global_opcodes[opcode_index].opcode()
330 } else {
331 let local_index = opcode_index - self.global_opcodes.len();
332 local_opcodes[local_index].opcode()
333 };
334 return Ok(Some(Function {
335 start_address,
336 end_address,
337 opcode,
338 }));
339 }
340 }
341 }
342 self.cur_page = None;
343 }
344 }
345
346 fn next_page(&mut self) -> Result<Option<PageWithPartialFunctions<'a>>, Error> {
347 let (page_entry, remainder) = match self.pages.split_first() {
348 Some(split) => split,
349 None => return Ok(None),
350 };
351
352 self.pages = remainder;
353
354 let next_page_entry = match remainder.first() {
355 Some(entry) => entry,
356 None => return Ok(None),
357 };
358
359 let page_offset = page_entry.page_offset();
360 let page_address = page_entry.first_address();
361 let next_page_address = next_page_entry.first_address();
362 let data = self.data;
363 let cur_page = match page_entry.page_kind(data)? {
364 consts::PAGE_KIND_REGULAR => {
365 let page = RegularPage::parse(data, page_offset.into())?;
366 PageWithPartialFunctions::Regular {
367 functions: page.functions(data, page_offset)?,
368 next_page_address,
369 }
370 }
371 consts::PAGE_KIND_COMPRESSED => {
372 let page = CompressedPage::parse(data, page_offset.into())?;
373 PageWithPartialFunctions::Compressed {
374 page_address,
375 next_page_address,
376 functions: page.functions(data, page_offset)?,
377 local_opcodes: page.local_opcodes(data, page_offset)?,
378 }
379 }
380 consts::PAGE_KIND_SENTINEL => return Err(Error::UnexpectedSentinelPage),
381 _ => return Err(Error::InvalidPageKind),
382 };
383 Ok(Some(cur_page))
384 }
385}