Skip to main content

hermit_entry/
elf.rs

1//! Parsing and loading kernel objects from ELF files.
2
3use core::mem::{self, MaybeUninit};
4use core::{fmt, str};
5
6use align_address::Align;
7use goblin::elf::note::Nhdr32;
8use goblin::elf::reloc::r_to_str;
9use goblin::elf::section_header::{self, SHN_UNDEF};
10use goblin::elf::sym::{self, STB_WEAK};
11use goblin::elf64::dynamic::{self, Dyn, DynamicInfo};
12use goblin::elf64::header::{self, Header};
13use goblin::elf64::program_header::{self, ProgramHeader};
14use goblin::elf64::reloc::{self, Rela};
15use goblin::elf64::section_header::SectionHeader;
16use goblin::elf64::sym::Sym;
17use log::{info, warn};
18use plain::Plain;
19
20use crate::boot_info::{LoadInfo, TlsInfo};
21use crate::{HermitVersion, UhyveIfVersion};
22
23// See https://refspecs.linuxbase.org/elf/x86_64-abi-0.98.pdf
24#[cfg(target_arch = "x86_64")]
25const ELF_ARCH: u16 = goblin::elf::header::EM_X86_64;
26#[cfg(target_arch = "x86_64")]
27const R_ABS64: u32 = goblin::elf::reloc::R_X86_64_64;
28#[cfg(target_arch = "x86_64")]
29const R_RELATIVE: u32 = goblin::elf::reloc::R_X86_64_RELATIVE;
30#[cfg(target_arch = "x86_64")]
31const R_GLOB_DAT: u32 = goblin::elf::reloc::R_X86_64_GLOB_DAT;
32
33// See https://github.com/ARM-software/abi-aa/blob/2023Q3/aaelf64/aaelf64.rst#relocation
34#[cfg(target_arch = "aarch64")]
35const ELF_ARCH: u16 = goblin::elf::header::EM_AARCH64;
36#[cfg(target_arch = "aarch64")]
37const R_ABS64: u32 = goblin::elf::reloc::R_AARCH64_ABS64;
38#[cfg(target_arch = "aarch64")]
39const R_RELATIVE: u32 = goblin::elf::reloc::R_AARCH64_RELATIVE;
40#[cfg(target_arch = "aarch64")]
41const R_GLOB_DAT: u32 = goblin::elf::reloc::R_AARCH64_GLOB_DAT;
42
43/// https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/v1.0/riscv-elf.adoc#relocations
44#[cfg(target_arch = "riscv64")]
45const ELF_ARCH: u16 = goblin::elf::header::EM_RISCV;
46#[cfg(target_arch = "riscv64")]
47const R_ABS64: u32 = goblin::elf::reloc::R_RISCV_64;
48#[cfg(target_arch = "riscv64")]
49const R_RELATIVE: u32 = goblin::elf::reloc::R_RISCV_RELATIVE;
50
51/// A parsed kernel object ready for loading.
52pub struct KernelObject<'a> {
53    /// The raw bytes of the parsed ELF file.
54    elf: &'a [u8],
55
56    /// The ELF file header at the beginning of [`Self::elf`].
57    header: &'a Header,
58
59    /// The kernel's program headers.
60    ///
61    /// Loadable program segments will be copied for execution.
62    ///
63    /// The thread-local storage segment will be used for creating [`TlsInfo`] for the kernel.
64    phs: &'a [ProgramHeader],
65
66    /// Relocations with an explicit addend.
67    relas: &'a [Rela],
68
69    /// Symbol table for relocations
70    dynsyms: &'a [Sym],
71
72    /// The kernel's Hermit version if any.
73    hermit_version: Option<HermitVersion>,
74
75    /// The kernel's Uhyve interface version if any.
76    uhyve_interface_version: Option<UhyveIfVersion>,
77}
78
79impl<'a> fmt::Debug for KernelObject<'a> {
80    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
81        let start_addr = self.start_addr();
82        f.debug_struct("KernelObject")
83            .field("hermit_version", &self.hermit_version)
84            .field("start_address", &start_addr)
85            .field(
86                "entry_point",
87                &format_args!("{:#x}", self.entry_point(start_addr.unwrap_or_default())),
88            )
89            .field("tls_info", &self.tls_info(start_addr.unwrap_or_default()))
90            .finish()
91    }
92}
93
94#[derive(Clone)]
95struct NoteIterator<'a> {
96    bytes: &'a [u8],
97    align: usize,
98}
99
100#[derive(Debug)]
101struct Note<'a> {
102    ty: u32,
103    name: &'a str,
104    desc: &'a [u8],
105}
106
107impl<'a> Iterator for NoteIterator<'a> {
108    type Item = Note<'a>;
109
110    fn next(&mut self) -> Option<Self::Item> {
111        let header = Nhdr32::from_bytes(self.bytes).ok()?;
112        let mut offset = mem::size_of_val(header);
113        let name = str::from_utf8(&self.bytes[offset..][..header.n_namesz as usize - 1]).unwrap();
114        offset = (offset + header.n_namesz as usize).align_up(self.align);
115        let desc = &self.bytes[offset..][..header.n_descsz as usize];
116        offset = (offset + header.n_descsz as usize).align_up(self.align);
117        self.bytes = &self.bytes[offset..];
118        Some(Note {
119            ty: header.n_type,
120            name,
121            desc,
122        })
123    }
124}
125
126fn iter_notes(bytes: &[u8], align: usize) -> NoteIterator<'_> {
127    NoteIterator { bytes, align }
128}
129
130#[derive(Debug)]
131struct ParseHermitVersionError;
132
133impl TryFrom<Note<'_>> for HermitVersion {
134    type Error = ParseHermitVersionError;
135
136    fn try_from(value: Note<'_>) -> Result<Self, Self::Error> {
137        if value.name != "GNU" {
138            return Err(ParseHermitVersionError);
139        }
140
141        if value.ty != crate::NT_GNU_ABI_TAG {
142            return Err(ParseHermitVersionError);
143        }
144
145        let data = <[u8; 16]>::try_from(value.desc).map_err(|_| ParseHermitVersionError)?;
146        let data = unsafe { mem::transmute::<[u8; 16], [u32; 4]>(data) };
147
148        if data[0] != crate::ELF_NOTE_OS_HERMIT {
149            return Err(ParseHermitVersionError);
150        }
151
152        Ok(Self {
153            major: data[1],
154            minor: data[2],
155            patch: data[3],
156        })
157    }
158}
159
160#[derive(Debug)]
161struct ParseUhyveIfVersionError;
162
163impl TryFrom<Note<'_>> for UhyveIfVersion {
164    type Error = ParseUhyveIfVersionError;
165
166    fn try_from(value: Note<'_>) -> Result<Self, Self::Error> {
167        if value.name != "UHYVEIF" {
168            return Err(ParseUhyveIfVersionError);
169        }
170
171        if value.ty != crate::NT_UHYVE_INTERFACE_VERSION {
172            return Err(ParseUhyveIfVersionError);
173        }
174
175        let data = <[u8; 4]>::try_from(value.desc).map_err(|_| ParseUhyveIfVersionError)?;
176        let data = u32::from_be_bytes(data);
177
178        Ok(Self(data))
179    }
180}
181
182/// An error returned when parsing a kernel ELF fails.
183#[derive(Debug)]
184pub struct ParseKernelError(&'static str);
185
186impl fmt::Display for ParseKernelError {
187    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
188        let info = self.0;
189        write!(f, "invalid ELF: {info}")
190    }
191}
192
193impl KernelObject<'_> {
194    /// Parses raw bytes of an ELF file into a loadable kernel object.
195    pub fn parse(elf: &[u8]) -> Result<KernelObject<'_>, ParseKernelError> {
196        {
197            let range = elf.as_ptr_range();
198            let len = elf.len();
199            info!("Parsing kernel from ELF at {range:?} (len = {len:#x} B / {len} B)");
200        }
201
202        let header = plain::from_bytes::<Header>(elf).unwrap();
203
204        let phs = {
205            let start = header.e_phoff as usize;
206            let len = header.e_phnum as usize;
207            ProgramHeader::slice_from_bytes_len(&elf[start..], len).unwrap()
208        };
209
210        let shs = {
211            let start = header.e_shoff as usize;
212            let len = header.e_shnum as usize;
213            SectionHeader::slice_from_bytes_len(&elf[start..], len).unwrap()
214        };
215
216        let note_section = phs
217            .iter()
218            .find(|ph| ph.p_type == program_header::PT_NOTE)
219            .ok_or(ParseKernelError("Kernel does not have note section"))?;
220        let mut note_iter = iter_notes(
221            &elf[note_section.p_offset as usize..][..note_section.p_filesz as usize],
222            note_section.p_align as usize,
223        );
224
225        let hermit_version = note_iter
226            .clone()
227            .find_map(|note| HermitVersion::try_from(note).ok());
228        if let Some(hermit_version) = hermit_version {
229            info!("Found Hermit version {hermit_version}");
230        }
231
232        let uhyve_interface_version: Option<UhyveIfVersion> = note_iter
233            .clone()
234            .find_map(|note| UhyveIfVersion::try_from(note).ok());
235        if let Some(uhyve_interface_version) = uhyve_interface_version {
236            info!("Found Uhyve interface version {uhyve_interface_version}");
237        }
238
239        // General compatibility checks
240        {
241            let class = header.e_ident[header::EI_CLASS];
242            if class != header::ELFCLASS64 {
243                return Err(ParseKernelError("kernel is not a 64-bit object"));
244            }
245            let data_encoding = header.e_ident[header::EI_DATA];
246
247            #[cfg(target_endian = "little")]
248            if data_encoding != header::ELFDATA2LSB {
249                return Err(ParseKernelError("kernel object is not little endian"));
250            }
251
252            #[cfg(target_endian = "big")]
253            if data_encoding != header::ELFDATA2MSB {
254                return Err(ParseKernelError("kernel object is not big endian"));
255            }
256
257            let os_abi = header.e_ident[header::EI_OSABI];
258            if os_abi != header::ELFOSABI_STANDALONE {
259                warn!("Kernel is not a hermit application");
260            }
261
262            let note = note_iter
263                .find(|note| note.name == "HERMIT" && note.ty == crate::NT_HERMIT_ENTRY_VERSION)
264                .ok_or(ParseKernelError(
265                    "Kernel does not specify hermit entry version",
266                ))?;
267            if note.desc[0] != crate::HERMIT_ENTRY_VERSION {
268                return Err(ParseKernelError("hermit entry version does not match"));
269            }
270
271            if !matches!(header.e_type, header::ET_DYN | header::ET_EXEC) {
272                return Err(ParseKernelError("kernel has unsupported ELF type"));
273            }
274
275            if header.e_machine != ELF_ARCH {
276                return Err(ParseKernelError(
277                    "kernel is not compiled for the correct architecture",
278                ));
279            }
280        }
281
282        let dyns = phs
283            .iter()
284            .find(|program_header| program_header.p_type == program_header::PT_DYNAMIC)
285            .map(|ph| {
286                let start = ph.p_offset as usize;
287                let len = ph.p_filesz as usize;
288                Dyn::slice_from_bytes(&elf[start..][..len]).unwrap()
289            })
290            .unwrap_or_default();
291
292        if dyns.iter().any(|d| d.d_tag == dynamic::DT_NEEDED) {
293            return Err(ParseKernelError(
294                "kernel was linked against dynamic libraries",
295            ));
296        }
297
298        let dynamic_info = DynamicInfo::new(dyns, phs);
299        assert_eq!(0, dynamic_info.relcount);
300
301        let relas = {
302            let start = dynamic_info.rela;
303            let len = dynamic_info.relasz;
304            Rela::slice_from_bytes(&elf[start..][..len]).unwrap()
305        };
306
307        let dynsyms = shs
308            .iter()
309            .find(|section_header| section_header.sh_type == section_header::SHT_DYNSYM)
310            .map(|sh| {
311                let start = sh.sh_offset as usize;
312                let len = sh.sh_size as usize;
313                Sym::slice_from_bytes(&elf[start..][..len]).unwrap()
314            })
315            .unwrap_or_default();
316
317        Ok(KernelObject {
318            elf,
319            header,
320            phs,
321            relas,
322            dynsyms,
323            hermit_version,
324            uhyve_interface_version,
325        })
326    }
327
328    /// Returns the Hermit version of this kernel if present.
329    pub fn hermit_version(&self) -> Option<HermitVersion> {
330        self.hermit_version
331    }
332
333    /// Returns the Hermit version of this kernel if present.
334    pub fn uhyve_interface_version(&self) -> Option<UhyveIfVersion> {
335        self.uhyve_interface_version
336    }
337
338    /// Required memory size for loading.
339    pub fn mem_size(&self) -> usize {
340        let first_ph = self
341            .phs
342            .iter()
343            .find(|ph| ph.p_type == program_header::PT_LOAD)
344            .unwrap();
345        let start_addr = first_ph.p_vaddr;
346
347        let last_ph = self
348            .phs
349            .iter()
350            .rev()
351            .find(|ph| ph.p_type == program_header::PT_LOAD)
352            .unwrap();
353        let end_addr = last_ph.p_vaddr + last_ph.p_memsz;
354
355        let mem_size = end_addr - start_addr;
356        mem_size.try_into().unwrap()
357    }
358
359    fn is_relocatable(&self) -> bool {
360        match self.header.e_type {
361            header::ET_DYN => true,
362            header::ET_EXEC => false,
363            _ => unreachable!(),
364        }
365    }
366
367    /// Returns the required start address.
368    ///
369    /// If this returns [`None`], the kernel is relocatable and does not require a certain start address.
370    pub fn start_addr(&self) -> Option<u64> {
371        (!self.is_relocatable()).then(|| {
372            self.phs
373                .iter()
374                .find(|ph| ph.p_type == program_header::PT_LOAD)
375                .unwrap()
376                .p_vaddr
377        })
378    }
379
380    fn tls_info(&self, start_addr: u64) -> Option<TlsInfo> {
381        self.phs
382            .iter()
383            .find(|ph| ph.p_type == program_header::PT_TLS)
384            .map(|ph| {
385                let mut tls_start = ph.p_vaddr;
386                if self.is_relocatable() {
387                    tls_start += start_addr;
388                }
389                TlsInfo {
390                    start: tls_start,
391                    filesz: ph.p_filesz,
392                    memsz: ph.p_memsz,
393                    align: ph.p_align,
394                }
395            })
396    }
397
398    fn entry_point(&self, start_addr: u64) -> u64 {
399        let mut entry_point = self.header.e_entry;
400        if self.is_relocatable() {
401            entry_point += start_addr;
402        }
403        entry_point
404    }
405
406    /// Loads the kernel into the provided memory.
407    pub fn load_kernel(&self, memory: &mut [MaybeUninit<u8>], start_addr: u64) -> LoadedKernel {
408        info!(
409            "Loading kernel to {:?} (len = {len:#x} B / {len} B)",
410            memory.as_ptr_range(),
411            len = memory.len()
412        );
413
414        if !self.is_relocatable() {
415            assert_eq!(self.start_addr().unwrap(), start_addr);
416        }
417        assert_eq!(self.mem_size(), memory.len());
418
419        // Load program segments
420        // Contains TLS initialization image
421        let load_start_addr = self.start_addr().unwrap_or_default();
422        self.phs
423            .iter()
424            .filter(|ph| ph.p_type == program_header::PT_LOAD)
425            .for_each(|ph| {
426                let ph_memory = {
427                    let mem_start = (ph.p_vaddr - load_start_addr) as usize;
428                    let mem_len = ph.p_memsz as usize;
429                    &mut memory[mem_start..][..mem_len]
430                };
431                let file_len = ph.p_filesz as usize;
432                let ph_file = &self.elf[ph.p_offset as usize..][..file_len];
433                // FIXME: Replace with `maybe_uninit_write_slice` once stable
434                let ph_file = unsafe { mem::transmute::<&[u8], &[MaybeUninit<u8>]>(ph_file) };
435                ph_memory[..file_len].copy_from_slice(ph_file);
436                for byte in &mut ph_memory[file_len..] {
437                    byte.write(0);
438                }
439            });
440
441        if self.is_relocatable() {
442            // Perform relocations
443            self.relas.iter().for_each(|rela| {
444                match reloc::r_type(rela.r_info) {
445                    R_ABS64 => {
446                        let sym = reloc::r_sym(rela.r_info) as usize;
447                        let sym = &self.dynsyms[sym];
448
449                        if sym::st_bind(sym.st_info) == STB_WEAK
450                            && u32::from(sym.st_shndx) == SHN_UNDEF
451                        {
452                            let memory = &memory[rela.r_offset as usize..][..8];
453                            let memory =
454                                unsafe { mem::transmute::<&[MaybeUninit<u8>], &[u8]>(memory) };
455                            assert_eq!(memory, &[0; 8]);
456                            return;
457                        }
458
459                        let relocated =
460                            (start_addr as i64 + sym.st_value as i64 + rela.r_addend).to_ne_bytes();
461                        let buf = &relocated[..];
462                        // FIXME: Replace with `maybe_uninit_write_slice` once stable
463                        let buf = unsafe { mem::transmute::<&[u8], &[MaybeUninit<u8>]>(buf) };
464                        memory[rela.r_offset as usize..][..mem::size_of_val(&relocated)]
465                            .copy_from_slice(buf);
466                    }
467                    R_RELATIVE => {
468                        let relocated = (start_addr as i64 + rela.r_addend).to_ne_bytes();
469                        let buf = &relocated[..];
470                        // FIXME: Replace with `maybe_uninit_write_slice` once stable
471                        let buf = unsafe { mem::transmute::<&[u8], &[MaybeUninit<u8>]>(buf) };
472                        memory[rela.r_offset as usize..][..mem::size_of_val(&relocated)]
473                            .copy_from_slice(buf);
474                    }
475                    #[cfg(any(target_arch = "aarch64", target_arch = "x86_64"))]
476                    R_GLOB_DAT => {
477                        let sym = reloc::r_sym(rela.r_info) as usize;
478                        let sym = &self.dynsyms[sym];
479
480                        if sym::st_bind(sym.st_info) == STB_WEAK
481                            && u32::from(sym.st_shndx) == SHN_UNDEF
482                        {
483                            let memory = &memory[rela.r_offset as usize..][..8];
484                            let memory =
485                                unsafe { mem::transmute::<&[MaybeUninit<u8>], &[u8]>(memory) };
486                            assert_eq!(memory, &[0; 8]);
487                            return;
488                        }
489
490                        let relocated =
491                            (start_addr as i64 + sym.st_value as i64 + rela.r_addend).to_ne_bytes();
492                        #[cfg(target_arch = "x86_64")]
493                        assert_eq!(rela.r_addend, 0);
494                        let buf = &relocated[..];
495                        // FIXME: Replace with `maybe_uninit_write_slice` once stable
496                        let buf = unsafe { mem::transmute::<&[u8], &[MaybeUninit<u8>]>(buf) };
497                        memory[rela.r_offset as usize..][..mem::size_of_val(&relocated)]
498                            .copy_from_slice(buf);
499                    }
500                    typ => panic!("unknown relocation type {}", r_to_str(typ, ELF_ARCH)),
501                }
502            });
503        }
504
505        let tls_info = self.tls_info(start_addr);
506
507        if let Some(tls_info) = &tls_info {
508            let range = tls_info.start as *const ()..(tls_info.start + tls_info.memsz) as *const ();
509            let len = tls_info.memsz;
510            info!("TLS is at {range:?} (len =  {len:#x} B / {len} B)",);
511        }
512
513        LoadedKernel {
514            load_info: LoadInfo {
515                kernel_image_addr_range: start_addr..start_addr + self.mem_size() as u64,
516                tls_info,
517            },
518            entry_point: self.entry_point(start_addr),
519        }
520    }
521}
522
523/// Load information required by the loader.
524#[derive(Debug)]
525pub struct LoadedKernel {
526    /// Load information required by the kernel.
527    pub load_info: LoadInfo,
528
529    /// The kernel's entry point.
530    pub entry_point: u64,
531}