layout_audit/
loader.rs

1use crate::error::{Error, Result};
2use gimli::{Dwarf, EndianSlice, RunTimeEndian, SectionId};
3use memmap2::Mmap;
4use object::{Object, ObjectSection};
5use std::borrow::Cow;
6use std::collections::HashMap;
7use std::fs::File;
8use std::path::Path;
9use std::pin::Pin;
10
11pub struct BinaryData {
12    pub mmap: Mmap,
13}
14
15pub type DwarfSlice<'a> = EndianSlice<'a, RunTimeEndian>;
16
17/// Storage for decompressed DWARF sections.
18/// Pinned to ensure stable addresses for slices pointing into the data.
19pub struct DecompressedSections {
20    sections: HashMap<&'static str, Vec<u8>>,
21}
22
23impl DecompressedSections {
24    fn new() -> Pin<Box<Self>> {
25        Box::pin(Self { sections: HashMap::new() })
26    }
27
28    fn insert(self: &mut Pin<Box<Self>>, name: &'static str, data: Vec<u8>) {
29        // SAFETY: We only modify the HashMap contents, not the Box location.
30        // The HashMap's heap allocations (Vec<u8>) have stable addresses.
31        unsafe { self.as_mut().get_unchecked_mut() }.sections.insert(name, data);
32    }
33
34    fn get(&self, name: &str) -> Option<&[u8]> {
35        self.sections.get(name).map(|v| v.as_slice())
36    }
37}
38
39pub struct LoadedDwarf<'a> {
40    pub dwarf: Dwarf<DwarfSlice<'a>>,
41    pub address_size: u8,
42    pub endian: RunTimeEndian,
43    /// Pinned storage for decompressed sections. The Dwarf object holds slices
44    /// pointing into this data, so it must remain at a stable address.
45    /// Named with underscore prefix to indicate intentional non-use (kept for lifetime).
46    _decompressed_sections: Pin<Box<DecompressedSections>>,
47}
48
49/// Standard DWARF section names that we need to load.
50const DEBUG_SECTIONS: &[&str] = &[
51    "abbrev",
52    "addr",
53    "aranges",
54    "info",
55    "line",
56    "line_str",
57    "loc",
58    "loclists",
59    "ranges",
60    "rnglists",
61    "str",
62    "str_offsets",
63    "types",
64];
65
66impl BinaryData {
67    pub fn load(path: &Path) -> Result<Self> {
68        let file = File::open(path)?;
69        // SAFETY: The file is opened read-only and we keep the mmap alive
70        // for the lifetime of BinaryData.
71        let mmap = unsafe { Mmap::map(&file)? };
72        Ok(Self { mmap })
73    }
74
75    pub fn load_dwarf(&self) -> Result<LoadedDwarf<'_>> {
76        let object = object::File::parse(&*self.mmap)?;
77
78        if !matches!(
79            object.format(),
80            object::BinaryFormat::Elf | object::BinaryFormat::MachO | object::BinaryFormat::Pe
81        ) {
82            return Err(Error::UnsupportedFormat);
83        }
84
85        let endian =
86            if object.is_little_endian() { RunTimeEndian::Little } else { RunTimeEndian::Big };
87
88        // Create pinned storage for decompressed sections
89        let mut decompressed_sections = DecompressedSections::new();
90
91        // Pre-decompress any compressed sections and store them
92        for &base_name in DEBUG_SECTIONS {
93            let debug_name = format!(".debug_{}", base_name);
94            let zdebug_name = format!(".zdebug_{}", base_name);
95
96            // Try .debug_* first, then .zdebug_*
97            for name in [&debug_name, &zdebug_name] {
98                if let Some(section) = object.section_by_name(name) {
99                    if let Ok(Cow::Owned(vec)) = section.uncompressed_data() {
100                        // Leak the string to get a 'static lifetime - this is fine since
101                        // these are a fixed set of section names used for the program lifetime
102                        let static_name: &'static str = match name.as_str() {
103                            n if n == debug_name => leak_section_name(&debug_name),
104                            _ => leak_section_name(&zdebug_name),
105                        };
106                        decompressed_sections.insert(static_name, vec);
107                    }
108                }
109            }
110        }
111
112        // Create a raw pointer to the pinned storage for use in the closure.
113        // SAFETY: The Pin<Box<DecompressedSections>> ensures the data won't move.
114        // We only read from it in the closure, and the LoadedDwarf keeps it alive.
115        let decompressed_ptr = &*decompressed_sections as *const DecompressedSections;
116
117        let load_section = |id: SectionId| -> std::result::Result<DwarfSlice<'_>, gimli::Error> {
118            let section_name = id.name();
119            let zdebug_name = section_name.replace(".debug_", ".zdebug_");
120
121            let try_load = |name: &str| -> Option<&[u8]> {
122                // SAFETY: decompressed_ptr points to pinned data that outlives this closure
123                let decompressed = unsafe { &*decompressed_ptr };
124                if let Some(slice) = decompressed.get(name) {
125                    return Some(slice);
126                }
127
128                // Fall back to borrowing directly from mmap for uncompressed sections
129                object.section_by_name(name).and_then(|s| s.uncompressed_data().ok()).and_then(
130                    |data| match data {
131                        Cow::Borrowed(b) => Some(b),
132                        Cow::Owned(_) => None,
133                    },
134                )
135            };
136
137            let slice = try_load(section_name).or_else(|| try_load(&zdebug_name)).unwrap_or(&[]);
138
139            Ok(EndianSlice::new(slice, endian))
140        };
141
142        let dwarf = Dwarf::load(load_section).map_err(|e| Error::Dwarf(e.to_string()))?;
143
144        let mut units = dwarf.units();
145        if units.next().map_err(|e| Error::Dwarf(e.to_string()))?.is_none() {
146            return Err(Error::NoDebugInfo);
147        }
148
149        Ok(LoadedDwarf {
150            dwarf,
151            address_size: if object.is_64() { 8 } else { 4 },
152            endian,
153            _decompressed_sections: decompressed_sections,
154        })
155    }
156}
157
158/// Leak a section name string to get a 'static lifetime.
159/// This is acceptable because we only call this for a fixed set of ~26 section names.
160fn leak_section_name(name: &str) -> &'static str {
161    // Use a simple cache to avoid leaking duplicates
162    use std::sync::OnceLock;
163    static CACHE: OnceLock<std::sync::Mutex<HashMap<String, &'static str>>> = OnceLock::new();
164
165    let cache = CACHE.get_or_init(|| std::sync::Mutex::new(HashMap::new()));
166    let mut guard = cache.lock().unwrap();
167
168    if let Some(&cached) = guard.get(name) {
169        return cached;
170    }
171
172    let leaked: &'static str = Box::leak(name.to_string().into_boxed_str());
173    guard.insert(name.to_string(), leaked);
174    leaked
175}