Skip to main content

lightswitch_object/
object.rs

1use std::fmt;
2use std::fs;
3use std::fs::File;
4use std::io::Read;
5use std::path::Path;
6
7use anyhow::{Result, anyhow};
8use memmap2::Mmap;
9use ring::digest::{Context, Digest, SHA256};
10
11use object::Endianness;
12use object::FileKind;
13use object::Object;
14use object::ObjectKind;
15use object::ObjectSection;
16use object::ObjectSymbol;
17use object::elf::{FileHeader32, FileHeader64, PF_X, PT_LOAD};
18use object::read::elf::FileHeader;
19use object::read::elf::ProgramHeader;
20
21use crate::{BuildId, ExecutableId};
22
23/// Elf load segments used during address normalization to find the ELF segment
24/// where an address falls into.
25#[derive(Debug, Clone)]
26pub struct ElfLoad {
27    pub p_offset: u64,
28    pub p_vaddr: u64,
29    pub p_filesz: u64,
30}
31
32#[derive(Clone)]
33pub enum Runtime {
34    /// C, C++, Rust, Fortran
35    CLike,
36    /// Zig. Needs special handling because before [0] the top level frame
37    /// (`start`) didn't have the right unwind information
38    ///
39    /// [0]: https://github.com/ziglang/zig/commit/130f7c2ed8e3358e24bb2fc7cca57f7a6f1f85c3
40    Zig {
41        start_low_address: u64,
42        start_high_address: u64,
43    },
44    /// Golang
45    Go(Vec<StopUnwindingFrames>),
46    /// V8, used by Node.js which is always compiled with frame pointers and has
47    /// handwritten code sections that aren't covered by the unwind
48    /// information
49    V8,
50}
51
52impl fmt::Debug for Runtime {
53    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
54        match self {
55            Runtime::CLike => write!(f, "CLike"),
56            Runtime::Zig {
57                start_low_address,
58                start_high_address,
59            } => f
60                .debug_struct("Zig")
61                .field(
62                    "start_low_address",
63                    &format_args!("0x{:x}", start_low_address),
64                )
65                .field(
66                    "start_high_address",
67                    &format_args!("0x{:x}", start_high_address),
68                )
69                .finish(),
70            Runtime::Go(frames) => f.debug_tuple("Go").field(frames).finish(),
71            Runtime::V8 => write!(f, "V8"),
72        }
73    }
74}
75
76#[derive(Debug, Clone)]
77pub struct StopUnwindingFrames {
78    pub name: String,
79    pub start_address: u64,
80    pub end_address: u64,
81}
82
83#[derive(Debug)]
84pub struct ObjectFile {
85    /// Warning! `object` must always go above `mmap` to ensure it will be
86    /// dropped before. Rust guarantees that fields are dropped in the order
87    /// they are defined.
88    object: object::File<'static>, // Its lifetime is tied to the `mmap` below.
89    mmap: Box<Mmap>,
90    build_id: BuildId,
91}
92
93impl ObjectFile {
94    pub fn new(file: &File) -> Result<Self> {
95        // Rust offers no guarantees on whether a "move" is done virtually or by
96        // memcpying, so to ensure that the memory value is valid we store it in
97        // the heap. Safety: Memory mapping files can cause issues if the file
98        // is modified or unmapped.
99        let mmap = Box::new(unsafe { Mmap::map(file) }?);
100        let object = object::File::parse(&**mmap)?;
101        // Safety: The lifetime of `object` will outlive `mmap`'s. We ensure `mmap`
102        // lives as long as `object` by defining `object` before.
103        let object =
104            unsafe { std::mem::transmute::<object::File<'_>, object::File<'static>>(object) };
105        let build_id = Self::read_build_id(&object)?;
106
107        Ok(ObjectFile {
108            object,
109            mmap,
110            build_id,
111        })
112    }
113
114    pub fn from_path(path: &Path) -> Result<Self> {
115        let file = fs::File::open(path)?;
116        Self::new(&file)
117    }
118
119    /// Returns an identifier for the executable using the first 8 bytes of the
120    /// build id.
121    pub fn id(&self) -> Result<ExecutableId> {
122        self.build_id.id()
123    }
124
125    /// Returns the executable build ID.
126    pub fn build_id(&self) -> &BuildId {
127        &self.build_id
128    }
129
130    /// Returns the executable build ID if present. If no GNU build ID and no Go
131    /// build ID are found it returns the hash of the text section.
132    pub fn read_build_id(object: &object::File<'static>) -> Result<BuildId> {
133        let gnu_build_id = object.build_id()?;
134
135        if let Some(data) = gnu_build_id {
136            return Ok(BuildId::gnu_from_bytes(data)?);
137        }
138
139        // Golang (the Go toolchain does not interpret these bytes as we do).
140        for section in object.sections() {
141            if section.name()? == ".note.go.buildid"
142                && let Ok(data) = section.data()
143            {
144                return Ok(BuildId::go_from_bytes(data)?);
145            }
146        }
147
148        // No build id (Rust, some compilers and Linux distributions).
149        let Some(code_hash) = code_hash(object) else {
150            return Err(anyhow!("code hash is None"));
151        };
152        Ok(BuildId::sha256_from_digest(&code_hash)?)
153    }
154
155    /// Returns whether the object has debug symbols.
156    pub fn has_debug_info(&self) -> bool {
157        self.object.has_debug_symbols()
158    }
159
160    pub fn is_dynamic(&self) -> bool {
161        self.object.kind() == ObjectKind::Dynamic
162    }
163
164    pub fn runtime(&self) -> Runtime {
165        if self.is_go() {
166            Runtime::Go(self.go_stop_unwinding_frames())
167        } else {
168            let mut is_zig = false;
169            let mut zig_first_frame = None;
170
171            for symbol in self.object.symbols() {
172                let Ok(name) = symbol.name() else { continue };
173                if name.starts_with("_ZZN2v88internal") {
174                    return Runtime::V8;
175                }
176                if name.starts_with("__zig") {
177                    is_zig = true;
178                }
179                if name == "_start" {
180                    zig_first_frame = Some((symbol.address(), symbol.address() + symbol.size()));
181                }
182
183                // Once we've found both Zig markers we are done. Note that this is a heuristic
184                // and it's possible that a Zig library is linked against code
185                // written in a C-like language. In this case we might be
186                // rewriting unwind information that's correct. This won't have a negative
187                // effect as `_start` is always the first function.
188                if is_zig && let Some((low_address, high_address)) = zig_first_frame {
189                    return Runtime::Zig {
190                        start_low_address: low_address,
191                        start_high_address: high_address,
192                    };
193                }
194            }
195            Runtime::CLike
196        }
197    }
198
199    pub fn is_go(&self) -> bool {
200        for section in self.object.sections() {
201            if let Ok(section_name) = section.name()
202                && (section_name == ".gosymtab"
203                    || section_name == ".gopclntab"
204                    || section_name == ".note.go.buildid")
205            {
206                return true;
207            }
208        }
209        false
210    }
211
212    pub fn go_stop_unwinding_frames(&self) -> Vec<StopUnwindingFrames> {
213        let mut r = Vec::new();
214
215        for symbol in self.object.symbols() {
216            let Ok(name) = symbol.name() else { continue };
217            for func in [
218                "runtime.mcall",
219                "runtime.goexit",
220                "runtime.mstart",
221                "runtime.systemstack",
222            ] {
223                // In some occasions functions might get some suffixes added to them like
224                // `runtime.mcall0`.
225                if name.starts_with(func) {
226                    r.push(StopUnwindingFrames {
227                        name: name.to_string(),
228                        start_address: symbol.address(),
229                        end_address: symbol.address() + symbol.size(),
230                    });
231                }
232            }
233        }
234
235        r
236    }
237
238    /// Retrieves the executable load segments. These are used to convert
239    /// virtual addresses to offsets in an executable during unwinding
240    /// and symbolization.
241    pub fn elf_load_segments(&self) -> Result<Vec<ElfLoad>> {
242        let mmap = &**self.mmap;
243
244        match FileKind::parse(mmap) {
245            Ok(FileKind::Elf32) => {
246                let header: &FileHeader32<Endianness> = FileHeader32::<Endianness>::parse(mmap)?;
247                let endian = header.endian()?;
248                let segments = header.program_headers(endian, mmap)?;
249
250                let mut elf_loads = Vec::new();
251                for segment in segments {
252                    if segment.p_type(endian) != PT_LOAD || segment.p_flags(endian) & PF_X == 0 {
253                        continue;
254                    }
255                    elf_loads.push(ElfLoad {
256                        p_offset: segment.p_offset(endian) as u64,
257                        p_vaddr: segment.p_vaddr(endian) as u64,
258                        p_filesz: segment.p_filesz(endian) as u64,
259                    });
260                }
261                Ok(elf_loads)
262            }
263            Ok(FileKind::Elf64) => {
264                let header: &FileHeader64<Endianness> = FileHeader64::<Endianness>::parse(mmap)?;
265                let endian = header.endian()?;
266                let segments = header.program_headers(endian, mmap)?;
267
268                let mut elf_loads = Vec::new();
269                for segment in segments {
270                    if segment.p_type(endian) != PT_LOAD || segment.p_flags(endian) & PF_X == 0 {
271                        continue;
272                    }
273                    elf_loads.push(ElfLoad {
274                        p_offset: segment.p_offset(endian),
275                        p_vaddr: segment.p_vaddr(endian),
276                        p_filesz: segment.p_filesz(endian),
277                    });
278                }
279                Ok(elf_loads)
280            }
281            Ok(other_file_kind) => Err(anyhow!(
282                "object is not an 32 or 64 bits ELF but {:?}",
283                other_file_kind
284            )),
285            Err(e) => Err(anyhow!("FileKind failed with {:?}", e)),
286        }
287    }
288}
289
290pub fn code_hash(object: &object::File) -> Option<Digest> {
291    for section in object.sections() {
292        let Ok(section_name) = section.name() else {
293            continue;
294        };
295
296        if section_name == ".text"
297            && let Ok(section) = section.data()
298        {
299            return Some(sha256_digest(section));
300        }
301    }
302
303    None
304}
305
306fn sha256_digest<R: Read>(mut reader: R) -> Digest {
307    let mut context = Context::new(&SHA256);
308    let mut buffer = [0; 1024];
309
310    loop {
311        let count = reader
312            .read(&mut buffer)
313            .expect("reading digest into buffer should not fail");
314        if count == 0 {
315            break;
316        }
317        context.update(&buffer[..count]);
318    }
319
320    context.finish()
321}
322
323#[cfg(test)]
324mod tests {
325    use super::*;
326
327    #[test]
328    fn test_runtime_debug_clike() {
329        let r = Runtime::CLike;
330        assert_eq!(format!("{:?}", r), "CLike");
331    }
332
333    #[test]
334    fn test_runtime_debug_v8() {
335        let r = Runtime::V8;
336        assert_eq!(format!("{:?}", r), "V8");
337    }
338
339    #[test]
340    fn test_runtime_debug_zig() {
341        let r = Runtime::Zig {
342            start_low_address: 0x7f8a_0000_1000,
343            start_high_address: 0x7f8a_0000_2000,
344        };
345        let debug = format!("{:?}", r);
346        assert_eq!(
347            debug,
348            "Zig { start_low_address: 0x7f8a00001000, start_high_address: 0x7f8a00002000 }"
349        );
350    }
351
352    #[test]
353    fn test_runtime_debug_go() {
354        let r = Runtime::Go(vec![StopUnwindingFrames {
355            name: "runtime.mcall".to_string(),
356            start_address: 0x4000,
357            end_address: 0x5000,
358        }]);
359        let debug = format!("{:?}", r);
360        assert!(debug.starts_with("Go("));
361        assert!(debug.contains("runtime.mcall"));
362    }
363}