Skip to main content

lightswitch_object/
object.rs

1use std::fs;
2use std::fs::File;
3use std::io::Read;
4use std::path::Path;
5
6use anyhow::{Result, anyhow};
7use memmap2::Mmap;
8use ring::digest::{Context, Digest, SHA256};
9
10use object::Endianness;
11use object::FileKind;
12use object::Object;
13use object::ObjectKind;
14use object::ObjectSection;
15use object::ObjectSymbol;
16use object::elf::{FileHeader32, FileHeader64, PF_X, PT_LOAD};
17use object::read::elf::FileHeader;
18use object::read::elf::ProgramHeader;
19
20use crate::{BuildId, ExecutableId};
21
22/// Elf load segments used during address normalization to find the segment
23/// for what an code address falls into.
24#[derive(Debug, Clone)]
25pub struct ElfLoad {
26    pub p_offset: u64,
27    pub p_vaddr: u64,
28    pub p_filesz: u64,
29}
30
31#[derive(Clone)]
32pub enum Runtime {
33    /// C, C++, Rust, Fortran
34    CLike,
35    /// Zig. Needs special handling because before [0] the top level frame
36    /// (`start`) didn't have the right unwind information
37    ///
38    /// [0]: https://github.com/ziglang/zig/commit/130f7c2ed8e3358e24bb2fc7cca57f7a6f1f85c3
39    Zig {
40        start_low_address: u64,
41        start_high_address: u64,
42    },
43    /// Golang
44    Go(Vec<StopUnwindingFrames>),
45    /// V8, used by Node.js which is always compiled with frame pointers and has
46    /// handwritten code sections that aren't covered by the unwind
47    /// information
48    V8,
49}
50
51#[derive(Debug, Clone)]
52pub struct StopUnwindingFrames {
53    pub name: String,
54    pub start_address: u64,
55    pub end_address: u64,
56}
57
58#[derive(Debug)]
59pub struct ObjectFile {
60    /// Warning! `object` must always go above `mmap` to ensure it will be
61    /// dropped before. Rust guarantees that fields are dropped in the order
62    /// they are defined.
63    object: object::File<'static>, // Its lifetime is tied to the `mmap` below.
64    mmap: Box<Mmap>,
65    build_id: BuildId,
66}
67
68impl ObjectFile {
69    pub fn new(file: &File) -> Result<Self> {
70        // Rust offers no guarantees on whether a "move" is done virtually or by
71        // memcpying, so to ensure that the memory value is valid we store it in
72        // the heap. Safety: Memory mapping files can cause issues if the file
73        // is modified or unmapped.
74        let mmap = Box::new(unsafe { Mmap::map(file) }?);
75        let object = object::File::parse(&**mmap)?;
76        // Safety: The lifetime of `object` will outlive `mmap`'s. We ensure `mmap`
77        // lives as long as `object` by defining `object` before.
78        let object =
79            unsafe { std::mem::transmute::<object::File<'_>, object::File<'static>>(object) };
80        let build_id = Self::read_build_id(&object)?;
81
82        Ok(ObjectFile {
83            object,
84            mmap,
85            build_id,
86        })
87    }
88
89    pub fn from_path(path: &Path) -> Result<Self> {
90        let file = fs::File::open(path)?;
91        Self::new(&file)
92    }
93
94    /// Returns an identifier for the executable using the first 8 bytes of the
95    /// build id.
96    pub fn id(&self) -> Result<ExecutableId> {
97        self.build_id.id()
98    }
99
100    /// Returns the executable build ID.
101    pub fn build_id(&self) -> &BuildId {
102        &self.build_id
103    }
104
105    /// Returns the executable build ID if present. If no GNU build ID and no Go
106    /// build ID are found it returns the hash of the text section.
107    pub fn read_build_id(object: &object::File<'static>) -> Result<BuildId> {
108        let gnu_build_id = object.build_id()?;
109
110        if let Some(data) = gnu_build_id {
111            return Ok(BuildId::gnu_from_bytes(data)?);
112        }
113
114        // Golang (the Go toolchain does not interpret these bytes as we do).
115        for section in object.sections() {
116            if section.name()? == ".note.go.buildid"
117                && let Ok(data) = section.data()
118            {
119                return Ok(BuildId::go_from_bytes(data)?);
120            }
121        }
122
123        // No build id (Rust, some compilers and Linux distributions).
124        let Some(code_hash) = code_hash(object) else {
125            return Err(anyhow!("code hash is None"));
126        };
127        Ok(BuildId::sha256_from_digest(&code_hash)?)
128    }
129
130    /// Returns whether the object has debug symbols.
131    pub fn has_debug_info(&self) -> bool {
132        self.object.has_debug_symbols()
133    }
134
135    pub fn is_dynamic(&self) -> bool {
136        self.object.kind() == ObjectKind::Dynamic
137    }
138
139    pub fn runtime(&self) -> Runtime {
140        if self.is_go() {
141            Runtime::Go(self.go_stop_unwinding_frames())
142        } else {
143            let mut is_zig = false;
144            let mut zig_first_frame = None;
145
146            for symbol in self.object.symbols() {
147                let Ok(name) = symbol.name() else { continue };
148                if name.starts_with("_ZZN2v88internal") {
149                    return Runtime::V8;
150                }
151                if name.starts_with("__zig") {
152                    is_zig = true;
153                }
154                if name == "_start" {
155                    zig_first_frame = Some((symbol.address(), symbol.address() + symbol.size()));
156                }
157
158                // Once we've found both Zig markers we are done. Not that this is a heuristic
159                // and it's possible that a Zig library is linked against code
160                // written in a C-like language. In this case we might be
161                // rewriting unwind information that's correct. This won't have a negative
162                // effect as `_start` is always the first function.
163                if is_zig && let Some((low_address, high_address)) = zig_first_frame {
164                    return Runtime::Zig {
165                        start_low_address: low_address,
166                        start_high_address: high_address,
167                    };
168                }
169            }
170            Runtime::CLike
171        }
172    }
173
174    pub fn is_go(&self) -> bool {
175        for section in self.object.sections() {
176            if let Ok(section_name) = section.name()
177                && (section_name == ".gosymtab"
178                    || section_name == ".gopclntab"
179                    || section_name == ".note.go.buildid")
180            {
181                return true;
182            }
183        }
184        false
185    }
186
187    pub fn go_stop_unwinding_frames(&self) -> Vec<StopUnwindingFrames> {
188        let mut r = Vec::new();
189
190        for symbol in self.object.symbols() {
191            let Ok(name) = symbol.name() else { continue };
192            for func in [
193                "runtime.mcall",
194                "runtime.goexit",
195                "runtime.mstart",
196                "runtime.systemstack",
197            ] {
198                // In some occasions functions might get some suffixes added to them like
199                // `runtime.mcall0`.
200                if name.starts_with(func) {
201                    r.push(StopUnwindingFrames {
202                        name: name.to_string(),
203                        start_address: symbol.address(),
204                        end_address: symbol.address() + symbol.size(),
205                    });
206                }
207            }
208        }
209
210        r
211    }
212
213    /// Retrieves the executable load segments. These are used to convert
214    /// virtual addresses to offsets in an executable during unwinding
215    /// and symbolization.
216    pub fn elf_load_segments(&self) -> Result<Vec<ElfLoad>> {
217        let mmap = &**self.mmap;
218
219        match FileKind::parse(mmap) {
220            Ok(FileKind::Elf32) => {
221                let header: &FileHeader32<Endianness> = FileHeader32::<Endianness>::parse(mmap)?;
222                let endian = header.endian()?;
223                let segments = header.program_headers(endian, mmap)?;
224
225                let mut elf_loads = Vec::new();
226                for segment in segments {
227                    if segment.p_type(endian) != PT_LOAD || segment.p_flags(endian) & PF_X == 0 {
228                        continue;
229                    }
230                    elf_loads.push(ElfLoad {
231                        p_offset: segment.p_offset(endian) as u64,
232                        p_vaddr: segment.p_vaddr(endian) as u64,
233                        p_filesz: segment.p_filesz(endian) as u64,
234                    });
235                }
236                Ok(elf_loads)
237            }
238            Ok(FileKind::Elf64) => {
239                let header: &FileHeader64<Endianness> = FileHeader64::<Endianness>::parse(mmap)?;
240                let endian = header.endian()?;
241                let segments = header.program_headers(endian, mmap)?;
242
243                let mut elf_loads = Vec::new();
244                for segment in segments {
245                    if segment.p_type(endian) != PT_LOAD || segment.p_flags(endian) & PF_X == 0 {
246                        continue;
247                    }
248                    elf_loads.push(ElfLoad {
249                        p_offset: segment.p_offset(endian),
250                        p_vaddr: segment.p_vaddr(endian),
251                        p_filesz: segment.p_filesz(endian),
252                    });
253                }
254                Ok(elf_loads)
255            }
256            Ok(other_file_kind) => Err(anyhow!(
257                "object is not an 32 or 64 bits ELF but {:?}",
258                other_file_kind
259            )),
260            Err(e) => Err(anyhow!("FileKind failed with {:?}", e)),
261        }
262    }
263}
264
265pub fn code_hash(object: &object::File) -> Option<Digest> {
266    for section in object.sections() {
267        let Ok(section_name) = section.name() else {
268            continue;
269        };
270
271        if section_name == ".text"
272            && let Ok(section) = section.data()
273        {
274            return Some(sha256_digest(section));
275        }
276    }
277
278    None
279}
280
281fn sha256_digest<R: Read>(mut reader: R) -> Digest {
282    let mut context = Context::new(&SHA256);
283    let mut buffer = [0; 1024];
284
285    loop {
286        let count = reader
287            .read(&mut buffer)
288            .expect("reading digest into buffer should not fail");
289        if count == 0 {
290            break;
291        }
292        context.update(&buffer[..count]);
293    }
294
295    context.finish()
296}