hermit_entry/
elf.rs

1//! Parsing and loading kernel objects from ELF files.
2
3use core::mem::{self, MaybeUninit};
4use core::{fmt, str};
5
6use align_address::Align;
7use goblin::elf::note::Nhdr32;
8use goblin::elf::reloc::r_to_str;
9use goblin::elf::section_header::{self, SHN_UNDEF};
10use goblin::elf::sym::{self, STB_WEAK};
11use goblin::elf64::dynamic::{self, Dyn, DynamicInfo};
12use goblin::elf64::header::{self, Header};
13use goblin::elf64::program_header::{self, ProgramHeader};
14use goblin::elf64::reloc::{self, Rela};
15use goblin::elf64::section_header::SectionHeader;
16use goblin::elf64::sym::Sym;
17use log::{info, warn};
18use plain::Plain;
19
20use crate::boot_info::{LoadInfo, TlsInfo};
21use crate::HermitVersion;
22
23// See https://refspecs.linuxbase.org/elf/x86_64-abi-0.98.pdf
24#[cfg(target_arch = "x86_64")]
25const ELF_ARCH: u16 = goblin::elf::header::EM_X86_64;
26#[cfg(target_arch = "x86_64")]
27const R_ABS64: u32 = goblin::elf::reloc::R_X86_64_64;
28#[cfg(target_arch = "x86_64")]
29const R_RELATIVE: u32 = goblin::elf::reloc::R_X86_64_RELATIVE;
30#[cfg(target_arch = "x86_64")]
31const R_GLOB_DAT: u32 = goblin::elf::reloc::R_X86_64_GLOB_DAT;
32
33// See https://github.com/ARM-software/abi-aa/blob/2023Q3/aaelf64/aaelf64.rst#relocation
34#[cfg(target_arch = "aarch64")]
35const ELF_ARCH: u16 = goblin::elf::header::EM_AARCH64;
36#[cfg(target_arch = "aarch64")]
37const R_ABS64: u32 = goblin::elf::reloc::R_AARCH64_ABS64;
38#[cfg(target_arch = "aarch64")]
39const R_RELATIVE: u32 = goblin::elf::reloc::R_AARCH64_RELATIVE;
40#[cfg(target_arch = "aarch64")]
41const R_GLOB_DAT: u32 = goblin::elf::reloc::R_AARCH64_GLOB_DAT;
42
43/// https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/v1.0/riscv-elf.adoc#relocations
44#[cfg(target_arch = "riscv64")]
45const ELF_ARCH: u16 = goblin::elf::header::EM_RISCV;
46#[cfg(target_arch = "riscv64")]
47const R_ABS64: u32 = goblin::elf::reloc::R_RISCV_64;
48#[cfg(target_arch = "riscv64")]
49const R_RELATIVE: u32 = goblin::elf::reloc::R_RISCV_RELATIVE;
50
51/// A parsed kernel object ready for loading.
52pub struct KernelObject<'a> {
53    /// The raw bytes of the parsed ELF file.
54    elf: &'a [u8],
55
56    /// The ELF file header at the beginning of [`Self::elf`].
57    header: &'a Header,
58
59    /// The kernel's program headers.
60    ///
61    /// Loadable program segments will be copied for execution.
62    ///
63    /// The thread-local storage segment will be used for creating [`TlsInfo`] for the kernel.
64    phs: &'a [ProgramHeader],
65
66    /// Relocations with an explicit addend.
67    relas: &'a [Rela],
68
69    /// Symbol table for relocations
70    dynsyms: &'a [Sym],
71
72    /// The kernel's Hermit version if any.
73    hermit_version: Option<HermitVersion>,
74}
75
76impl<'a> fmt::Debug for KernelObject<'a> {
77    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
78        let start_addr = self.start_addr();
79        f.debug_struct("KernelObject")
80            .field("hermit_version", &self.hermit_version)
81            .field("start_address", &start_addr)
82            .field(
83                "entry_point",
84                &format_args!("{:#x}", self.entry_point(start_addr.unwrap_or_default())),
85            )
86            .field("tls_info", &self.tls_info(start_addr.unwrap_or_default()))
87            .finish()
88    }
89}
90
91#[derive(Clone)]
92struct NoteIterator<'a> {
93    bytes: &'a [u8],
94    align: usize,
95}
96
97#[derive(Debug)]
98struct Note<'a> {
99    ty: u32,
100    name: &'a str,
101    desc: &'a [u8],
102}
103
104impl<'a> Iterator for NoteIterator<'a> {
105    type Item = Note<'a>;
106
107    fn next(&mut self) -> Option<Self::Item> {
108        let header = Nhdr32::from_bytes(self.bytes).ok()?;
109        let mut offset = mem::size_of_val(header);
110        let name = str::from_utf8(&self.bytes[offset..][..header.n_namesz as usize - 1]).unwrap();
111        offset = (offset + header.n_namesz as usize).align_up(self.align);
112        let desc = &self.bytes[offset..][..header.n_descsz as usize];
113        offset = (offset + header.n_descsz as usize).align_up(self.align);
114        self.bytes = &self.bytes[offset..];
115        Some(Note {
116            ty: header.n_type,
117            name,
118            desc,
119        })
120    }
121}
122
123fn iter_notes(bytes: &[u8], align: usize) -> NoteIterator<'_> {
124    NoteIterator { bytes, align }
125}
126
127#[derive(Debug)]
128struct ParseHermitVersionError;
129
130impl TryFrom<Note<'_>> for HermitVersion {
131    type Error = ParseHermitVersionError;
132
133    fn try_from(value: Note<'_>) -> Result<Self, Self::Error> {
134        if value.name != "GNU" {
135            return Err(ParseHermitVersionError);
136        }
137
138        if value.ty != crate::NT_GNU_ABI_TAG {
139            return Err(ParseHermitVersionError);
140        }
141
142        let data = <[u8; 16]>::try_from(value.desc).map_err(|_| ParseHermitVersionError)?;
143        let data = unsafe { mem::transmute::<[u8; 16], [u32; 4]>(data) };
144
145        if data[0] != crate::ELF_NOTE_OS_HERMIT {
146            return Err(ParseHermitVersionError);
147        }
148
149        Ok(Self {
150            major: data[1],
151            minor: data[2],
152            patch: data[3],
153        })
154    }
155}
156
157/// An error returned when parsing a kernel ELF fails.
158#[derive(Debug)]
159pub struct ParseKernelError(&'static str);
160
161impl fmt::Display for ParseKernelError {
162    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
163        let info = self.0;
164        write!(f, "invalid ELF: {info}")
165    }
166}
167
168impl KernelObject<'_> {
169    /// Parses raw bytes of an ELF file into a loadable kernel object.
170    pub fn parse(elf: &[u8]) -> Result<KernelObject<'_>, ParseKernelError> {
171        {
172            let range = elf.as_ptr_range();
173            let len = elf.len();
174            info!("Parsing kernel from ELF at {range:?} (len = {len:#x} B / {len} B)");
175        }
176
177        let header = plain::from_bytes::<Header>(elf).unwrap();
178
179        let phs = {
180            let start = header.e_phoff as usize;
181            let len = header.e_phnum as usize;
182            ProgramHeader::slice_from_bytes_len(&elf[start..], len).unwrap()
183        };
184
185        let shs = {
186            let start = header.e_shoff as usize;
187            let len = header.e_shnum as usize;
188            SectionHeader::slice_from_bytes_len(&elf[start..], len).unwrap()
189        };
190
191        let note_section = phs
192            .iter()
193            .find(|ph| ph.p_type == program_header::PT_NOTE)
194            .ok_or(ParseKernelError("Kernel does not have note section"))?;
195        let mut note_iter = iter_notes(
196            &elf[note_section.p_offset as usize..][..note_section.p_filesz as usize],
197            note_section.p_align as usize,
198        );
199
200        let hermit_version = note_iter
201            .clone()
202            .find_map(|note| HermitVersion::try_from(note).ok());
203        if let Some(hermit_version) = hermit_version {
204            info!("Found Hermit version {hermit_version}");
205        }
206
207        // General compatibility checks
208        {
209            let class = header.e_ident[header::EI_CLASS];
210            if class != header::ELFCLASS64 {
211                return Err(ParseKernelError("kernel ist not a 64-bit object"));
212            }
213            let data_encoding = header.e_ident[header::EI_DATA];
214
215            #[cfg(target_endian = "little")]
216            if data_encoding != header::ELFDATA2LSB {
217                return Err(ParseKernelError("kernel object is not little endian"));
218            }
219
220            #[cfg(target_endian = "big")]
221            if data_encoding != header::ELFDATA2MSB {
222                return Err(ParseKernelError("kernel object is not big endian"));
223            }
224
225            let os_abi = header.e_ident[header::EI_OSABI];
226            if os_abi != header::ELFOSABI_STANDALONE {
227                warn!("Kernel is not a hermit application");
228            }
229
230            let note = note_iter
231                .find(|note| note.name == "HERMIT" && note.ty == crate::NT_HERMIT_ENTRY_VERSION)
232                .ok_or(ParseKernelError(
233                    "Kernel does not specify hermit entry version",
234                ))?;
235            if note.desc[0] != crate::HERMIT_ENTRY_VERSION {
236                return Err(ParseKernelError("hermit entry version does not match"));
237            }
238
239            if !matches!(header.e_type, header::ET_DYN | header::ET_EXEC) {
240                return Err(ParseKernelError("kernel has unsupported ELF type"));
241            }
242
243            if header.e_machine != ELF_ARCH {
244                return Err(ParseKernelError(
245                    "kernel is not compiled for the correct architecture",
246                ));
247            }
248        }
249
250        let dyns = phs
251            .iter()
252            .find(|program_header| program_header.p_type == program_header::PT_DYNAMIC)
253            .map(|ph| {
254                let start = ph.p_offset as usize;
255                let len = ph.p_filesz as usize;
256                Dyn::slice_from_bytes(&elf[start..][..len]).unwrap()
257            })
258            .unwrap_or_default();
259
260        if dyns.iter().any(|d| d.d_tag == dynamic::DT_NEEDED) {
261            return Err(ParseKernelError(
262                "kernel was linked against dynamic libraries",
263            ));
264        }
265
266        let dynamic_info = DynamicInfo::new(dyns, phs);
267        assert_eq!(0, dynamic_info.relcount);
268
269        let relas = {
270            let start = dynamic_info.rela;
271            let len = dynamic_info.relasz;
272            Rela::slice_from_bytes(&elf[start..][..len]).unwrap()
273        };
274
275        let dynsyms = shs
276            .iter()
277            .find(|section_header| section_header.sh_type == section_header::SHT_DYNSYM)
278            .map(|sh| {
279                let start = sh.sh_offset as usize;
280                let len = sh.sh_size as usize;
281                Sym::slice_from_bytes(&elf[start..][..len]).unwrap()
282            })
283            .unwrap_or_default();
284
285        Ok(KernelObject {
286            elf,
287            header,
288            phs,
289            relas,
290            dynsyms,
291            hermit_version,
292        })
293    }
294
295    /// Returns the Hermit version of this kernel if present.
296    pub fn hermit_version(&self) -> Option<HermitVersion> {
297        self.hermit_version
298    }
299
300    /// Required memory size for loading.
301    pub fn mem_size(&self) -> usize {
302        let first_ph = self
303            .phs
304            .iter()
305            .find(|ph| ph.p_type == program_header::PT_LOAD)
306            .unwrap();
307        let start_addr = first_ph.p_vaddr;
308
309        let last_ph = self
310            .phs
311            .iter()
312            .rev()
313            .find(|ph| ph.p_type == program_header::PT_LOAD)
314            .unwrap();
315        let end_addr = last_ph.p_vaddr + last_ph.p_memsz;
316
317        let mem_size = end_addr - start_addr;
318        mem_size.try_into().unwrap()
319    }
320
321    fn is_relocatable(&self) -> bool {
322        match self.header.e_type {
323            header::ET_DYN => true,
324            header::ET_EXEC => false,
325            _ => unreachable!(),
326        }
327    }
328
329    /// Returns the required start address.
330    ///
331    /// If this returns [`None`], the kernel is relocatable and does not require a certain start address.
332    pub fn start_addr(&self) -> Option<u64> {
333        (!self.is_relocatable()).then(|| {
334            self.phs
335                .iter()
336                .find(|ph| ph.p_type == program_header::PT_LOAD)
337                .unwrap()
338                .p_vaddr
339        })
340    }
341
342    fn tls_info(&self, start_addr: u64) -> Option<TlsInfo> {
343        self.phs
344            .iter()
345            .find(|ph| ph.p_type == program_header::PT_TLS)
346            .map(|ph| {
347                let mut tls_start = ph.p_vaddr;
348                if self.is_relocatable() {
349                    tls_start += start_addr;
350                }
351                TlsInfo {
352                    start: tls_start,
353                    filesz: ph.p_filesz,
354                    memsz: ph.p_memsz,
355                    align: ph.p_align,
356                }
357            })
358    }
359
360    fn entry_point(&self, start_addr: u64) -> u64 {
361        let mut entry_point = self.header.e_entry;
362        if self.is_relocatable() {
363            entry_point += start_addr;
364        }
365        entry_point
366    }
367
368    /// Loads the kernel into the provided memory.
369    pub fn load_kernel(&self, memory: &mut [MaybeUninit<u8>], start_addr: u64) -> LoadedKernel {
370        info!(
371            "Loading kernel to {:?} (len = {len:#x} B / {len} B)",
372            memory.as_ptr_range(),
373            len = memory.len()
374        );
375
376        if !self.is_relocatable() {
377            assert_eq!(self.start_addr().unwrap(), start_addr);
378        }
379        assert_eq!(self.mem_size(), memory.len());
380
381        // Load program segments
382        // Contains TLS initialization image
383        let load_start_addr = self.start_addr().unwrap_or_default();
384        self.phs
385            .iter()
386            .filter(|ph| ph.p_type == program_header::PT_LOAD)
387            .for_each(|ph| {
388                let ph_memory = {
389                    let mem_start = (ph.p_vaddr - load_start_addr) as usize;
390                    let mem_len = ph.p_memsz as usize;
391                    &mut memory[mem_start..][..mem_len]
392                };
393                let file_len = ph.p_filesz as usize;
394                let ph_file = &self.elf[ph.p_offset as usize..][..file_len];
395                // FIXME: Replace with `maybe_uninit_write_slice` once stable
396                let ph_file = unsafe { mem::transmute::<&[u8], &[MaybeUninit<u8>]>(ph_file) };
397                ph_memory[..file_len].copy_from_slice(ph_file);
398                for byte in &mut ph_memory[file_len..] {
399                    byte.write(0);
400                }
401            });
402
403        if self.is_relocatable() {
404            // Perform relocations
405            self.relas.iter().for_each(|rela| {
406                match reloc::r_type(rela.r_info) {
407                    R_ABS64 => {
408                        let sym = reloc::r_sym(rela.r_info) as usize;
409                        let sym = &self.dynsyms[sym];
410
411                        if sym::st_bind(sym.st_info) == STB_WEAK
412                            && u32::from(sym.st_shndx) == SHN_UNDEF
413                        {
414                            let memory = &memory[rela.r_offset as usize..][..8];
415                            let memory =
416                                unsafe { mem::transmute::<&[MaybeUninit<u8>], &[u8]>(memory) };
417                            assert_eq!(memory, &[0; 8]);
418                            return;
419                        }
420
421                        let relocated =
422                            (start_addr as i64 + sym.st_value as i64 + rela.r_addend).to_ne_bytes();
423                        let buf = &relocated[..];
424                        // FIXME: Replace with `maybe_uninit_write_slice` once stable
425                        let buf = unsafe { mem::transmute::<&[u8], &[MaybeUninit<u8>]>(buf) };
426                        memory[rela.r_offset as usize..][..mem::size_of_val(&relocated)]
427                            .copy_from_slice(buf);
428                    }
429                    R_RELATIVE => {
430                        let relocated = (start_addr as i64 + rela.r_addend).to_ne_bytes();
431                        let buf = &relocated[..];
432                        // FIXME: Replace with `maybe_uninit_write_slice` once stable
433                        let buf = unsafe { mem::transmute::<&[u8], &[MaybeUninit<u8>]>(buf) };
434                        memory[rela.r_offset as usize..][..mem::size_of_val(&relocated)]
435                            .copy_from_slice(buf);
436                    }
437                    #[cfg(any(target_arch = "aarch64", target_arch = "x86_64"))]
438                    R_GLOB_DAT => {
439                        let sym = reloc::r_sym(rela.r_info) as usize;
440                        let sym = &self.dynsyms[sym];
441
442                        if sym::st_bind(sym.st_info) == STB_WEAK
443                            && u32::from(sym.st_shndx) == SHN_UNDEF
444                        {
445                            let memory = &memory[rela.r_offset as usize..][..8];
446                            let memory =
447                                unsafe { mem::transmute::<&[MaybeUninit<u8>], &[u8]>(memory) };
448                            assert_eq!(memory, &[0; 8]);
449                            return;
450                        }
451
452                        let relocated =
453                            (start_addr as i64 + sym.st_value as i64 + rela.r_addend).to_ne_bytes();
454                        #[cfg(target_arch = "x86_64")]
455                        assert_eq!(rela.r_addend, 0);
456                        let buf = &relocated[..];
457                        // FIXME: Replace with `maybe_uninit_write_slice` once stable
458                        let buf = unsafe { mem::transmute::<&[u8], &[MaybeUninit<u8>]>(buf) };
459                        memory[rela.r_offset as usize..][..mem::size_of_val(&relocated)]
460                            .copy_from_slice(buf);
461                    }
462                    typ => panic!("unkown relocation type {}", r_to_str(typ, ELF_ARCH)),
463                }
464            });
465        }
466
467        let tls_info = self.tls_info(start_addr);
468
469        if let Some(tls_info) = &tls_info {
470            let range = tls_info.start as *const ()..(tls_info.start + tls_info.memsz) as *const ();
471            let len = tls_info.memsz;
472            info!("TLS is at {range:?} (len =  {len:#x} B / {len} B)",);
473        }
474
475        LoadedKernel {
476            load_info: LoadInfo {
477                kernel_image_addr_range: start_addr..start_addr + self.mem_size() as u64,
478                tls_info,
479            },
480            entry_point: self.entry_point(start_addr),
481        }
482    }
483}
484
485/// Load information required by the loader.
486#[derive(Debug)]
487pub struct LoadedKernel {
488    /// Load information required by the kernel.
489    pub load_info: LoadInfo,
490
491    /// The kernel's entry point.
492    pub entry_point: u64,
493}