stack_sizes/
lib.rs

1//! Library to parse stack usage information ([`.stack_sizes`]) emitted by LLVM
2//!
3//! [`.stack_sizes`]: https://llvm.org/docs/CodeGenerator.html#emitting-function-stack-size-information
4
5#![deny(rust_2018_idioms)]
6#![deny(missing_docs)]
7#![deny(warnings)]
8
9use core::u16;
10use std::{
11    collections::{BTreeMap, HashMap, HashSet},
12    io::Cursor,
13};
14#[cfg(feature = "tools")]
15use std::{fs, path::Path};
16
17use anyhow::{anyhow, bail};
18use byteorder::{ReadBytesExt, LE};
19use xmas_elf::{
20    header,
21    sections::SectionData,
22    symbol_table::{Entry, Type},
23    ElfFile,
24};
25
26/// Functions found after analyzing an executable
27#[derive(Clone, Debug)]
28pub struct Functions<'a> {
29    /// Whether the addresses of these functions are 32-bit or 64-bit
30    pub have_32_bit_addresses: bool,
31
32    /// "undefined" symbols, symbols that need to be dynamically loaded
33    pub undefined: HashSet<&'a str>,
34
35    /// "defined" symbols, symbols with known locations (addresses)
36    pub defined: BTreeMap<u64, Function<'a>>,
37}
38
39/// A symbol that represents a function (subroutine)
40#[derive(Clone, Debug)]
41pub struct Function<'a> {
42    names: Vec<&'a str>,
43    size: u64,
44    stack: Option<u64>,
45}
46
47impl<'a> Function<'a> {
48    /// Returns the (mangled) name of the function and its aliases
49    pub fn names(&self) -> &[&'a str] {
50        &self.names
51    }
52
53    /// Returns the size of this subroutine in bytes
54    pub fn size(&self) -> u64 {
55        self.size
56    }
57
58    /// Returns the stack usage of the function in bytes
59    pub fn stack(&self) -> Option<u64> {
60        self.stack
61    }
62}
63
64// is this symbol a tag used to delimit code / data sections within a subroutine?
65fn is_tag(name: &str) -> bool {
66    name == "$a" || name == "$t" || name == "$d" || {
67        (name.starts_with("$a.") || name.starts_with("$d.") || name.starts_with("$t."))
68            && name.splitn(2, '.').nth(1).unwrap().parse::<u64>().is_ok()
69    }
70}
71
72fn process_symtab_obj<'a, E>(
73    entries: &'a [E],
74    elf: &ElfFile<'a>,
75) -> anyhow::Result<
76    (
77        BTreeMap<u16, BTreeMap<u64, HashSet<&'a str>>>,
78        BTreeMap<u32, u16>,
79    )
80>
81where
82    E: Entry,
83{
84    let mut names: BTreeMap<_, BTreeMap<_, HashSet<_>>> = BTreeMap::new();
85    let mut shndxs = BTreeMap::new();
86
87    for (entry, i) in entries.iter().zip(0..) {
88        let name = entry.get_name(elf);
89        let shndx = entry.shndx();
90        let addr = entry.value() & !1; // clear the thumb bit
91        let ty = entry.get_type();
92
93        if shndx != 0 {
94            shndxs.insert(i, shndx);
95        }
96
97        if ty == Ok(Type::Func)
98            || (ty == Ok(Type::NoType)
99                && name
100                    .map(|name| !name.is_empty() && !is_tag(name))
101                    .unwrap_or(false))
102        {
103            let name = name.map_err(anyhow::Error::msg)?;
104
105            names
106                .entry(shndx)
107                .or_default()
108                .entry(addr)
109                .or_default()
110                .insert(name);
111        }
112    }
113
114    Ok((names, shndxs))
115}
116
117/// Parses an *input* (AKA relocatable) object file (`.o`) and returns a list of symbols and their
118/// stack usage
119pub fn analyze_object(obj: &[u8]) -> anyhow::Result<HashMap<&str, u64>> {
120    let elf = &ElfFile::new(obj).map_err(anyhow::Error::msg)?;
121
122    if elf.header.pt2.type_().as_type() != header::Type::Relocatable {
123        bail!("object file is not relocatable")
124    }
125
126    // shndx -> (address -> [symbol-name])
127    let mut is_64_bit = false;
128    let (shndx2names, symtab2shndx) = match elf
129        .find_section_by_name(".symtab")
130        .ok_or_else(|| anyhow!("`.symtab` section not found"))?
131        .get_data(elf)
132    {
133        Ok(SectionData::SymbolTable32(entries)) => process_symtab_obj(entries, elf)?,
134
135        Ok(SectionData::SymbolTable64(entries)) => {
136            is_64_bit = true;
137            process_symtab_obj(entries, elf)?
138        }
139
140        _ => bail!("malformed .symtab section"),
141    };
142
143    let mut sizes = HashMap::new();
144    let mut sections = elf.section_iter();
145    while let Some(section) = sections.next() {
146        if section.get_name(elf) == Ok(".stack_sizes") {
147            let mut stack_sizes = Cursor::new(section.raw_data(elf));
148
149            // next section should be `.rel.stack_sizes` or `.rela.stack_sizes`
150            // XXX should we check the section name?
151            let relocs: Vec<_> = match sections
152                .next()
153                .and_then(|section| section.get_data(elf).ok())
154            {
155                Some(SectionData::Rel32(rels)) if !is_64_bit => rels
156                    .iter()
157                    .map(|rel| rel.get_symbol_table_index())
158                    .collect(),
159
160                Some(SectionData::Rela32(relas)) if !is_64_bit => relas
161                    .iter()
162                    .map(|rel| rel.get_symbol_table_index())
163                    .collect(),
164
165                Some(SectionData::Rel64(rels)) if is_64_bit => rels
166                    .iter()
167                    .map(|rel| rel.get_symbol_table_index())
168                    .collect(),
169
170                Some(SectionData::Rela64(relas)) if is_64_bit => relas
171                    .iter()
172                    .map(|rel| rel.get_symbol_table_index())
173                    .collect(),
174
175                _ => bail!("expected a section with relocation information after `.stack_sizes`"),
176            };
177
178            for index in relocs {
179                let addr = if is_64_bit {
180                    stack_sizes.read_u64::<LE>()?
181                } else {
182                    u64::from(stack_sizes.read_u32::<LE>()?)
183                };
184                let stack = leb128::read::unsigned(&mut stack_sizes).unwrap();
185
186                let shndx = symtab2shndx[&index];
187                let entries = shndx2names
188                    .get(&(shndx as u16))
189                    .unwrap_or_else(|| panic!("section header with index {} not found", shndx));
190
191                assert!(sizes
192                    .insert(
193                        *entries
194                            .get(&addr)
195                            .unwrap_or_else(|| panic!(
196                                "symbol with address {} not found at section {} ({:?})",
197                                addr, shndx, entries
198                            ))
199                            .iter()
200                            .next()
201                            .unwrap(),
202                        stack
203                    )
204                    .is_none());
205            }
206
207            if stack_sizes.position() != stack_sizes.get_ref().len() as u64 {
208                bail!(
209                    "the number of relocations doesn't match the number of `.stack_sizes` entries"
210                );
211            }
212        }
213    }
214
215    Ok(sizes)
216}
217
218fn process_symtab_exec<'a, E>(
219    entries: &'a [E],
220    elf: &ElfFile<'a>,
221) -> anyhow::Result<(HashSet<&'a str>, BTreeMap<u64, Function<'a>>)>
222where
223    E: Entry + core::fmt::Debug,
224{
225    let mut defined = BTreeMap::new();
226    let mut maybe_aliases = BTreeMap::new();
227    let mut undefined = HashSet::new();
228
229    for entry in entries {
230        let ty = entry.get_type();
231        let value = entry.value();
232        let size = entry.size();
233        let name = entry.get_name(&elf);
234
235        if ty == Ok(Type::Func) {
236            let name = name.map_err(anyhow::Error::msg)?;
237
238            if value == 0 && size == 0 {
239                undefined.insert(name);
240            } else {
241                defined
242                    .entry(value)
243                    .or_insert(Function {
244                        names: vec![],
245                        size,
246                        stack: None,
247                    })
248                    .names
249                    .push(name);
250            }
251        } else if ty == Ok(Type::NoType) {
252            if let Ok(name) = name {
253                if !is_tag(name) {
254                    maybe_aliases.entry(value).or_insert(vec![]).push(name);
255                }
256            }
257        }
258    }
259
260    for (value, alias) in maybe_aliases {
261        // try with the thumb bit both set and clear
262        if let Some(sym) = defined.get_mut(&(value | 1)) {
263            sym.names.extend(alias);
264        } else if let Some(sym) = defined.get_mut(&(value & !1)) {
265            sym.names.extend(alias);
266        }
267    }
268
269    Ok((undefined, defined))
270}
271
272/// Parses an executable ELF file and returns a list of functions and their stack usage
273pub fn analyze_executable(elf: &[u8]) -> anyhow::Result<Functions<'_>> {
274    let elf = &ElfFile::new(elf).map_err(anyhow::Error::msg)?;
275
276    let mut have_32_bit_addresses = false;
277    let (undefined, mut defined) = if let Some(section) = elf.find_section_by_name(".symtab") {
278        match section.get_data(elf).map_err(anyhow::Error::msg)? {
279            SectionData::SymbolTable32(entries) => {
280                have_32_bit_addresses = true;
281
282                process_symtab_exec(entries, elf)?
283            }
284
285            SectionData::SymbolTable64(entries) => process_symtab_exec(entries, elf)?,
286            _ => bail!("malformed .symtab section"),
287        }
288    } else {
289        (HashSet::new(), BTreeMap::new())
290    };
291
292    if let Some(stack_sizes) = elf.find_section_by_name(".stack_sizes") {
293        let data = stack_sizes.raw_data(elf);
294        let end = data.len() as u64;
295        let mut cursor = Cursor::new(data);
296
297        while cursor.position() < end {
298            let address = if have_32_bit_addresses {
299                u64::from(cursor.read_u32::<LE>()?)
300            } else {
301                cursor.read_u64::<LE>()?
302            };
303            let stack = leb128::read::unsigned(&mut cursor)?;
304
305            // NOTE try with the thumb bit both set and clear
306            if let Some(sym) = defined.get_mut(&(address | 1)) {
307                sym.stack = Some(stack);
308            } else if let Some(sym) = defined.get_mut(&(address & !1)) {
309                sym.stack = Some(stack);
310            } else {
311                unreachable!()
312            }
313        }
314    }
315
316    Ok(Functions {
317        have_32_bit_addresses,
318        defined,
319        undefined,
320    })
321}
322