goblin/mach/
exports.rs

1//! Symbols exported by this binary and available for dynamic linking are encoded in mach-o binaries using a special trie
2//!
3//! **Note**: the trie is constructed lazily in case it won't be used, and since computing exports will require allocation, to compute the exports, you need call the export trie's [exports()](struct.ExportTrie.html#method.exports) method.
4
5// TODO:
6// (1) Weak of regular_symbol_info type probably needs to be added ?
7// (3) /usr/lib/libstdc++.6.0.9.dylib has flag 0xc at many offsets... they're weak
8
9use crate::error;
10use crate::mach::load_command;
11use alloc::string::String;
12use alloc::vec::Vec;
13use core::fmt::{self, Debug};
14use core::ops::Range;
15use scroll::{Pread, Uleb128};
16
17type Flag = u64;
18
19// "The following are used on the flags byte of a terminal node
20// in the export information."
21pub const EXPORT_SYMBOL_FLAGS_KIND_MASK: Flag = 0x03;
22pub const EXPORT_SYMBOL_FLAGS_KIND_REGULAR: Flag = 0x00;
23pub const EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE: Flag = 0x02; // this is a symbol not present in the loader.h but only in the dyld compressed image loader source code, and only available with a #def macro for export flags but libobjc. def has this
24pub const EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL: Flag = 0x01;
25pub const EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION: Flag = 0x04;
26pub const EXPORT_SYMBOL_FLAGS_REEXPORT: Flag = 0x08;
27pub const EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER: Flag = 0x10;
28
29#[derive(Debug)]
30pub enum SymbolKind {
31    Regular,
32    Absolute,
33    ThreadLocal,
34    UnknownSymbolKind(Flag),
35}
36
37impl SymbolKind {
38    pub fn new(kind: Flag) -> SymbolKind {
39        match kind & EXPORT_SYMBOL_FLAGS_KIND_MASK {
40            0x00 => SymbolKind::Regular,
41            0x01 => SymbolKind::ThreadLocal,
42            0x02 => SymbolKind::Absolute,
43            _ => SymbolKind::UnknownSymbolKind(kind),
44        }
45    }
46    pub fn to_str(&self) -> &'static str {
47        match self {
48            SymbolKind::Regular => "Regular",
49            SymbolKind::Absolute => "Absolute",
50            SymbolKind::ThreadLocal => "Thread_LOCAL",
51            SymbolKind::UnknownSymbolKind(_k) => "Unknown",
52        }
53    }
54}
55
56#[derive(Debug)]
57/// An export can be a regular export, a re-export, or a stub
58pub enum ExportInfo<'a> {
59    /// A regular exported symbol, which is an address where it is found, and the flags associated with it
60    Regular { address: u64, flags: Flag },
61    /// if lib_symbol_name None then same symbol name, otherwise reexport of lib_symbol_name with name in the trie
62    /// "If the string is zero length, then the symbol is re-export from the specified dylib with the same name"
63    Reexport {
64        lib: &'a str,
65        lib_symbol_name: Option<&'a str>,
66        flags: Flag,
67    },
68    /// If the flags is `EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER`, then following the flags are two `Uleb128`s: the stub offset and the resolver offset. The stub is used by non-lazy pointers.  The resolver is used by lazy pointers and must be called to get the actual address to use
69    Stub {
70        stub_offset: scroll::Uleb128,
71        resolver_offset: scroll::Uleb128,
72        flags: Flag,
73    },
74}
75
76impl<'a> ExportInfo<'a> {
77    /// Parse out the export info from `bytes`, at `offset`
78    pub fn parse(
79        bytes: &'a [u8],
80        libs: &[&'a str],
81        flags: Flag,
82        mut offset: usize,
83    ) -> error::Result<ExportInfo<'a>> {
84        use self::ExportInfo::*;
85        let regular = |offset| -> error::Result<ExportInfo> {
86            let address = bytes.pread::<Uleb128>(offset)?;
87            Ok(Regular {
88                address: address.into(),
89                flags,
90            })
91        };
92        let reexport = |mut offset| -> error::Result<ExportInfo<'a>> {
93            let lib_ordinal: u64 = {
94                let tmp = bytes.pread::<Uleb128>(offset)?;
95                offset += tmp.size();
96                tmp.into()
97            };
98            let lib_symbol_name = bytes.pread::<&str>(offset)?;
99            let lib = libs[lib_ordinal as usize];
100            let lib_symbol_name = if lib_symbol_name == "" {
101                None
102            } else {
103                Some(lib_symbol_name)
104            };
105            Ok(Reexport {
106                lib,
107                lib_symbol_name,
108                flags,
109            })
110        };
111        match SymbolKind::new(flags) {
112            SymbolKind::Regular => {
113                if flags & EXPORT_SYMBOL_FLAGS_REEXPORT != 0 {
114                    reexport(offset)
115                } else if flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER != 0 {
116                    // 0x10
117                    let stub_offset = bytes.pread::<Uleb128>(offset)?;
118                    offset += stub_offset.size();
119                    let resolver_offset = bytes.pread::<Uleb128>(offset)?;
120                    Ok(Stub {
121                        stub_offset,
122                        resolver_offset,
123                        flags,
124                    })
125                // else if (flags = kEXPORT_SYMBOL_FLAGS_WEAK_DEFINITION) then (*0x40 unused*)
126                } else {
127                    regular(offset)
128                }
129            }
130            SymbolKind::ThreadLocal | SymbolKind::Absolute => {
131                if flags & EXPORT_SYMBOL_FLAGS_REEXPORT != 0 {
132                    reexport(offset)
133                } else {
134                    regular(offset)
135                }
136            }
137            SymbolKind::UnknownSymbolKind(_kind) => {
138                // 0x5f causes errors, but parsing as regular symbol resolves...
139                //Err(error::Error::Malformed(format!("Unknown kind {:#x} from flags {:#x} in get_symbol_type at offset {}", kind, flags, offset)))
140                regular(offset)
141            }
142        }
143    }
144}
145
146#[derive(Debug)]
147/// A finalized symbolic export reconstructed from the export trie
148pub struct Export<'a> {
149    /// The reconsituted export name which dyld matches against
150    pub name: String,
151    /// The export info in the node data
152    pub info: ExportInfo<'a>,
153    /// How large this export is
154    pub size: usize,
155    /// The offset this symbol export is found in the binary
156    pub offset: u64,
157}
158
159impl<'a> Export<'a> {
160    /// Create a new export from `name` and `info`
161    pub fn new(name: String, info: ExportInfo<'a>) -> Export<'a> {
162        let offset = match info {
163            ExportInfo::Regular { address, .. } => address,
164            _ => 0x0,
165        };
166        Export {
167            name,
168            info,
169            size: 0,
170            offset,
171        }
172    }
173}
174
175/// An export trie efficiently encodes all of the symbols exported by this binary for dynamic linking
176pub struct ExportTrie<'a> {
177    data: &'a [u8],
178    location: Range<usize>,
179}
180
181impl<'a> ExportTrie<'a> {
182    #[inline]
183    fn walk_nodes(
184        &self,
185        libs: &[&'a str],
186        branches: Vec<(String, usize)>,
187        acc: &mut Vec<Export<'a>>,
188    ) -> error::Result<()> {
189        for (symbol, next_node) in branches {
190            self.walk_trie(libs, symbol, next_node, acc)?;
191        }
192        Ok(())
193    }
194
195    // current_symbol can be a str iiuc
196    fn walk_branches(
197        &self,
198        nbranches: usize,
199        current_symbol: String,
200        mut offset: usize,
201    ) -> error::Result<Vec<(String, usize)>> {
202        if nbranches > self.data.len() {
203            return Err(error::Error::BufferTooShort(nbranches, "branches"));
204        }
205        let mut branches = Vec::with_capacity(nbranches);
206        //println!("\t@{:#x}", *offset);
207        for _i in 0..nbranches {
208            // additional offset calculations are relative to the base we received
209            let offset = &mut offset;
210            let string = self.data.pread::<&str>(*offset)?;
211            let mut key = current_symbol.clone();
212            key.push_str(string);
213            // +1 for null terminator
214            *offset = *offset + string.len() + 1;
215            //println!("\t({}) string_len: {} offset: {:#x}", i, string.len(), *offset);
216            // value is relative to export trie base
217            let next_node = Uleb128::read(&self.data, offset)? as usize + self.location.start;
218            //println!("\t({}) string: {} next_node: {:#x}", _i, key, next_node);
219            branches.push((key, next_node));
220        }
221        Ok(branches)
222    }
223
224    fn walk_trie(
225        &self,
226        libs: &[&'a str],
227        current_symbol: String,
228        start: usize,
229        exports: &mut Vec<Export<'a>>,
230    ) -> error::Result<()> {
231        if start < self.location.end {
232            let mut offset = start;
233            let terminal_size = Uleb128::read(&self.data, &mut offset)?;
234            // let mut input = String::new();
235            // ::std::io::stdin().read_line(&mut input).unwrap();
236            // println!("@ {:#x} node: {:#x} current_symbol: {}", start, terminal_size, current_symbol);
237            if terminal_size == 0 {
238                let nbranches = Uleb128::read(&self.data, &mut offset)? as usize;
239                //println!("\t@ {:#x} BRAN {}", *offset, nbranches);
240                let branches = self.walk_branches(nbranches, current_symbol, offset)?;
241                self.walk_nodes(libs, branches, exports)
242            } else {
243                // terminal node, but the tricky part is that they can have children...
244                let pos = offset;
245                let children_start = &mut (pos + terminal_size as usize);
246                let nchildren = Uleb128::read(&self.data, children_start)? as usize;
247                let flags = Uleb128::read(&self.data, &mut offset)?;
248                //println!("\t@ {:#x} TERM {} flags: {:#x}", offset, nchildren, flags);
249                let info = ExportInfo::parse(&self.data, libs, flags, offset)?;
250                let export = Export::new(current_symbol.clone(), info);
251                //println!("\t{:?}", &export);
252                exports.push(export);
253                if nchildren == 0 {
254                    // this branch is done
255                    Ok(())
256                } else {
257                    // more branches to walk
258                    let branches =
259                        self.walk_branches(nchildren, current_symbol, *children_start)?;
260                    self.walk_nodes(libs, branches, exports)
261                }
262            }
263        } else {
264            Ok(())
265        }
266    }
267
268    fn new_impl(bytes: &'a [u8], start: usize, size: usize) -> Self {
269        // FIXME: Ideally, this should validate `command`, but the best we can
270        // do for now is return an empty `Range`.
271        let location = match start
272            .checked_add(size)
273            .and_then(|end| bytes.get(start..end).map(|_| start..end))
274        {
275            Some(location) => location,
276            None => {
277                log::warn!("Invalid `DyldInfo` `command`.");
278                0..0
279            }
280        };
281        ExportTrie {
282            data: bytes,
283            location,
284        }
285    }
286
287    /// Walk the export trie for symbols exported by this binary, using the provided `libs` to resolve re-exports
288    pub fn exports(&self, libs: &[&'a str]) -> error::Result<Vec<Export<'a>>> {
289        let offset = self.location.start;
290        let current_symbol = String::new();
291        let mut exports = Vec::new();
292        self.walk_trie(libs, current_symbol, offset, &mut exports)?;
293        Ok(exports)
294    }
295
296    /// Create a new, lazy, zero-copy export trie from the `DyldInfo` `command`
297    pub fn new(bytes: &'a [u8], command: &load_command::DyldInfoCommand) -> Self {
298        Self::new_impl(
299            bytes,
300            command.export_off as usize,
301            command.export_size as usize,
302        )
303    }
304
305    /// Create a new, lazy, zero-copy export trie from the `LinkeditDataCommand` `command`
306    pub fn new_from_linkedit_data_command(
307        bytes: &'a [u8],
308        command: &load_command::LinkeditDataCommand,
309    ) -> Self {
310        Self::new_impl(bytes, command.dataoff as usize, command.datasize as usize)
311    }
312}
313
314impl<'a> Debug for ExportTrie<'a> {
315    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
316        fmt.debug_struct("ExportTrie")
317            .field("data", &"<... redacted ...>")
318            .field(
319                "location",
320                &format_args!("{:#x}..{:#x}", self.location.start, self.location.end),
321            )
322            .finish()
323    }
324}
325
326#[cfg(test)]
327mod tests {
328    use super::*;
329    #[test]
330    fn export_trie() {
331        const EXPORTS: [u8; 64] = [
332            0x00, 0x01, 0x5f, 0x00, 0x05, 0x00, 0x02, 0x5f, 0x6d, 0x68, 0x5f, 0x65, 0x78, 0x65,
333            0x63, 0x75, 0x74, 0x65, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x00, 0x1f, 0x6d,
334            0x61, 0x00, 0x23, 0x02, 0x00, 0x00, 0x00, 0x00, 0x02, 0x78, 0x69, 0x6d, 0x75, 0x6d,
335            0x00, 0x30, 0x69, 0x6e, 0x00, 0x35, 0x03, 0x00, 0xc0, 0x1e, 0x00, 0x03, 0x00, 0xd0,
336            0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
337        ];
338        let exports = &EXPORTS[..];
339        let libs = vec!["/usr/lib/libderp.so", "/usr/lib/libthuglife.so"];
340        let mut command = load_command::DyldInfoCommand::default();
341        command.export_size = exports.len() as u32;
342        let trie = ExportTrie::new(&exports, &command);
343        println!("trie: {:#?}", &trie);
344        let exports = trie.exports(&libs).unwrap();
345        println!("len: {} exports: {:#?}", exports.len(), &exports);
346        assert_eq!(exports.len() as usize, 3usize)
347    }
348
349    #[test]
350    fn export_trie_linkedit_data() {
351        const EXPORTS: [u8; 64] = [
352            0x00, 0x01, 0x5f, 0x00, 0x05, 0x00, 0x02, 0x5f, 0x6d, 0x68, 0x5f, 0x65, 0x78, 0x65,
353            0x63, 0x75, 0x74, 0x65, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x00, 0x1f, 0x6d,
354            0x61, 0x00, 0x23, 0x02, 0x00, 0x00, 0x00, 0x00, 0x02, 0x78, 0x69, 0x6d, 0x75, 0x6d,
355            0x00, 0x30, 0x69, 0x6e, 0x00, 0x35, 0x03, 0x00, 0xc0, 0x1e, 0x00, 0x03, 0x00, 0xd0,
356            0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
357        ];
358        let exports = &EXPORTS[..];
359        let libs = vec!["/usr/lib/libderp.so", "/usr/lib/libthuglife.so"];
360        let command = load_command::LinkeditDataCommand {
361            datasize: exports.len() as u32,
362            ..Default::default()
363        };
364        let trie = ExportTrie::new_from_linkedit_data_command(exports, &command);
365        println!("trie: {:#?}", &trie);
366        let exports = trie.exports(&libs).unwrap();
367        println!("len: {} exports: {:#?}", exports.len(), &exports);
368        assert_eq!(exports.len() as usize, 3usize);
369    }
370
371    #[test]
372    fn invalid_range() {
373        let mut command = load_command::DyldInfoCommand::default();
374        command.export_off = 0xffff_ff00;
375        command.export_size = 0x00ff_ff00;
376        let trie = ExportTrie::new(&[], &command);
377        // FIXME: it would have been nice if this were an `Err`.
378        let exports = trie.exports(&[]).unwrap();
379        assert_eq!(exports.len(), 0);
380    }
381}