solana_sbpf/
elf.rs

1//! This module relocates a BPF ELF
2
3// Note: Typically ELF shared objects are loaded using the program headers and
4// not the section headers.  Since we are leveraging the elfkit crate its much
5// easier to use the section headers.  There are cases (reduced size, obfuscation)
6// where the section headers may be removed from the ELF.  If that happens then
7// this loader will need to be re-written to use the program headers instead.
8
9use crate::{
10    aligned_memory::{is_memory_aligned, AlignedMemory},
11    ebpf::{self, HOST_ALIGN, INSN_SIZE},
12    elf_parser::{
13        consts::{
14            ELFCLASS64, ELFDATA2LSB, ELFOSABI_NONE, EM_BPF, EM_SBPF, ET_DYN, R_X86_64_32,
15            R_X86_64_64, R_X86_64_NONE, R_X86_64_RELATIVE,
16        },
17        types::{Elf64Phdr, Elf64Shdr, Elf64Word},
18        Elf64, ElfParserError,
19    },
20    error::EbpfError,
21    memory_region::MemoryRegion,
22    program::{BuiltinProgram, FunctionRegistry, SBPFVersion},
23    verifier::Verifier,
24    vm::{Config, ContextObject},
25};
26
27#[cfg(all(feature = "jit", not(target_os = "windows"), target_arch = "x86_64"))]
28use crate::jit::{JitCompiler, JitProgram};
29use byteorder::{ByteOrder, LittleEndian};
30use std::{collections::BTreeMap, fmt::Debug, mem, ops::Range, str};
31
32#[cfg(not(feature = "shuttle-test"))]
33use std::sync::Arc;
34
35#[cfg(feature = "shuttle-test")]
36use shuttle::sync::Arc;
37
38/// Error definitions
39#[derive(Debug, thiserror::Error, PartialEq, Eq)]
40pub enum ElfError {
41    /// Failed to parse ELF file
42    #[error("Failed to parse ELF file: {0}")]
43    FailedToParse(String),
44    /// Entrypoint out of bounds
45    #[error("Entrypoint out of bounds")]
46    EntrypointOutOfBounds,
47    /// Invalid entrypoint
48    #[error("Invalid entrypoint")]
49    InvalidEntrypoint,
50    /// Failed to get section
51    #[error("Failed to get section {0}")]
52    FailedToGetSection(String),
53    /// Unresolved symbol
54    #[error("Unresolved symbol ({0}) at instruction #{1:?} (ELF file offset {2:#x})")]
55    UnresolvedSymbol(String, usize, usize),
56    /// Section not found
57    #[error("Section not found: {0}")]
58    SectionNotFound(String),
59    /// Relative jump out of bounds
60    #[error("Relative jump out of bounds at instruction #{0}")]
61    RelativeJumpOutOfBounds(usize),
62    /// Symbol hash collision
63    #[error("Symbol hash collision {0:#x}")]
64    SymbolHashCollision(u32),
65    /// Incompatible ELF: wrong endianess
66    #[error("Incompatible ELF: wrong endianess")]
67    WrongEndianess,
68    /// Incompatible ELF: wrong ABI
69    #[error("Incompatible ELF: wrong ABI")]
70    WrongAbi,
71    /// Incompatible ELF: wrong machine
72    #[error("Incompatible ELF: wrong machine")]
73    WrongMachine,
74    /// Incompatible ELF: wrong class
75    #[error("Incompatible ELF: wrong class")]
76    WrongClass,
77    /// Not one text section
78    #[error("Multiple or no text sections, consider removing llc option: -function-sections")]
79    NotOneTextSection,
80    /// Read-write data not supported
81    #[error("Found writable section ({0}) in ELF, read-write data not supported")]
82    WritableSectionNotSupported(String),
83    /// Relocation failed, no loadable section contains virtual address
84    #[error("Relocation failed, no loadable section contains virtual address {0:#x}")]
85    AddressOutsideLoadableSection(u64),
86    /// Relocation failed, invalid referenced virtual address
87    #[error("Relocation failed, invalid referenced virtual address {0:#x}")]
88    InvalidVirtualAddress(u64),
89    /// Relocation failed, unknown type
90    #[error("Relocation failed, unknown type {0:?}")]
91    UnknownRelocation(u32),
92    /// Failed to read relocation info
93    #[error("Failed to read relocation info")]
94    FailedToReadRelocationInfo,
95    /// Incompatible ELF: wrong type
96    #[error("Incompatible ELF: wrong type")]
97    WrongType,
98    /// Unknown symbol
99    #[error("Unknown symbol with index {0}")]
100    UnknownSymbol(usize),
101    /// Offset or value is out of bounds
102    #[error("Offset or value is out of bounds")]
103    ValueOutOfBounds,
104    /// Detected sbpf_version required by the executable which are not enabled
105    #[error("Detected sbpf_version required by the executable which are not enabled")]
106    UnsupportedSBPFVersion,
107    /// Invalid program header
108    #[error("Invalid ELF program header")]
109    InvalidProgramHeader,
110}
111
112impl From<ElfParserError> for ElfError {
113    fn from(err: ElfParserError) -> Self {
114        match err {
115            ElfParserError::InvalidSectionHeader
116            | ElfParserError::InvalidString
117            | ElfParserError::InvalidSize
118            | ElfParserError::Overlap
119            | ElfParserError::SectionNotInOrder
120            | ElfParserError::NoSectionNameStringTable
121            | ElfParserError::InvalidDynamicSectionTable
122            | ElfParserError::InvalidRelocationTable
123            | ElfParserError::InvalidAlignment
124            | ElfParserError::NoStringTable
125            | ElfParserError::NoDynamicStringTable
126            | ElfParserError::InvalidFileHeader
127            | ElfParserError::StringTooLong(_, _) => ElfError::FailedToParse(err.to_string()),
128            ElfParserError::InvalidProgramHeader => ElfError::InvalidProgramHeader,
129            ElfParserError::OutOfBounds => ElfError::ValueOutOfBounds,
130        }
131    }
132}
133
134fn get_section(elf: &Elf64, name: &[u8]) -> Result<Elf64Shdr, ElfError> {
135    for section_header in elf.section_header_table() {
136        if elf.section_name(section_header.sh_name)? == name {
137            return Ok(section_header.clone());
138        }
139    }
140
141    Err(ElfError::SectionNotFound(
142        std::str::from_utf8(name)
143            .unwrap_or("UTF-8 error")
144            .to_string(),
145    ))
146}
147
148// For more information on the BPF instruction set:
149// https://github.com/iovisor/bpf-docs/blob/master/eBPF.md
150
151// msb                                                        lsb
152// +------------------------+----------------+----+----+--------+
153// |immediate               |offset          |src |dst |opcode  |
154// +------------------------+----------------+----+----+--------+
155
156// From least significant to most significant bit:
157//   8 bit opcode
158//   4 bit destination register (dst)
159//   4 bit source register (src)
160//   16 bit offset
161//   32 bit immediate (imm)
162
163/// Byte offset of the immediate field in the instruction
164const BYTE_OFFSET_IMMEDIATE: usize = 4;
165/// Byte length of the immediate field
166const BYTE_LENGTH_IMMEDIATE: usize = 4;
167
168/// BPF relocation types.
169#[allow(non_camel_case_types)]
170#[derive(Debug, PartialEq, Copy, Clone)]
171enum BpfRelocationType {
172    /// No relocation, placeholder
173    R_Bpf_None = 0,
174    /// R_BPF_64_64 relocation type is used for ld_imm64 instruction.
175    /// The actual to-be-relocated data (0 or section offset) is
176    /// stored at r_offset + 4 and the read/write data bitsize is 32
177    /// (4 bytes). The relocation can be resolved with the symbol
178    /// value plus implicit addend.
179    R_Bpf_64_64 = 1,
180    /// 64 bit relocation of a ldxdw instruction.  The ldxdw
181    /// instruction occupies two instruction slots. The 64-bit address
182    /// to load from is split into the 32-bit imm field of each
183    /// slot. The first slot's pre-relocation imm field contains the
184    /// virtual address (typically same as the file offset) of the
185    /// location to load. Relocation involves calculating the
186    /// post-load 64-bit physical address referenced by the imm field
187    /// and writing that physical address back into the imm fields of
188    /// the ldxdw instruction.
189    R_Bpf_64_Relative = 8,
190    /// Relocation of a call instruction.  The existing imm field
191    /// contains either an offset of the instruction to jump to (think
192    /// local function call) or a special value of "-1".  If -1 the
193    /// symbol must be looked up in the symbol table.  The relocation
194    /// entry contains the symbol number to call.  In order to support
195    /// both local jumps and calling external symbols a 32-bit hash is
196    /// computed and stored in the the call instruction's 32-bit imm
197    /// field.  The hash is used later to look up the 64-bit address
198    /// to jump to.  In the case of a local jump the hash is
199    /// calculated using the current program counter and in the case
200    /// of a symbol the hash is calculated using the name of the
201    /// symbol.
202    R_Bpf_64_32 = 10,
203}
204impl BpfRelocationType {
205    fn from_x86_relocation_type(from: u32) -> Option<BpfRelocationType> {
206        match from {
207            R_X86_64_NONE => Some(BpfRelocationType::R_Bpf_None),
208            R_X86_64_64 => Some(BpfRelocationType::R_Bpf_64_64),
209            R_X86_64_RELATIVE => Some(BpfRelocationType::R_Bpf_64_Relative),
210            R_X86_64_32 => Some(BpfRelocationType::R_Bpf_64_32),
211            _ => None,
212        }
213    }
214}
215
216/// ELF section
217#[derive(Debug, PartialEq)]
218pub enum Section {
219    /// Owned section data.
220    ///
221    /// The first field is virtual address of the section.
222    /// The second field is the actual section data.
223    Owned(usize, Vec<u8>),
224    /// Borrowed section data.
225    ///
226    /// The first field is virtual address of the section.
227    /// The second field can be used to index the input ELF buffer to
228    /// retrieve the section data.
229    Borrowed(usize, Range<usize>),
230}
231
232/// Elf loader/relocator
233#[derive(Debug, PartialEq)]
234pub struct Executable<C: ContextObject> {
235    /// Loaded and executable elf
236    elf_bytes: AlignedMemory<{ HOST_ALIGN }>,
237    /// Required SBPF capabilities
238    sbpf_version: SBPFVersion,
239    /// Read-only section
240    ro_section: Section,
241    /// Text section virtual address
242    text_section_vaddr: u64,
243    /// Text section range in `elf_bytes`
244    text_section_range: Range<usize>,
245    /// Address of the entry point
246    entry_pc: usize,
247    /// Call resolution map (hash, pc, name)
248    function_registry: FunctionRegistry<usize>,
249    /// Loader built-in program
250    loader: Arc<BuiltinProgram<C>>,
251    /// Compiled program and argument
252    #[cfg(all(feature = "jit", not(target_os = "windows"), target_arch = "x86_64"))]
253    compiled_program: Option<JitProgram>,
254}
255
256impl<C: ContextObject> Executable<C> {
257    /// Get the configuration settings
258    pub fn get_config(&self) -> &Config {
259        self.loader.get_config()
260    }
261
262    /// Get the executable sbpf_version
263    pub fn get_sbpf_version(&self) -> SBPFVersion {
264        self.sbpf_version
265    }
266
267    /// Get the .text section virtual address and bytes
268    pub fn get_text_bytes(&self) -> (u64, &[u8]) {
269        (
270            self.text_section_vaddr,
271            &self.elf_bytes.as_slice()[self.text_section_range.clone()],
272        )
273    }
274
275    /// Get the concatenated read-only sections (including the text section)
276    pub fn get_ro_section(&self) -> &[u8] {
277        match &self.ro_section {
278            Section::Owned(_offset, data) => data.as_slice(),
279            Section::Borrowed(_offset, byte_range) => {
280                &self.elf_bytes.as_slice()[byte_range.clone()]
281            }
282        }
283    }
284
285    /// Get a memory region that can be used to access the merged readonly section
286    pub fn get_ro_region(&self) -> MemoryRegion {
287        get_ro_region(&self.ro_section, self.elf_bytes.as_slice())
288    }
289
290    /// Get the entry point offset into the text section
291    pub fn get_entrypoint_instruction_offset(&self) -> usize {
292        self.entry_pc
293    }
294
295    /// Get the text section offset in the ELF file
296    #[cfg(feature = "debugger")]
297    pub fn get_text_section_offset(&self) -> u64 {
298        self.text_section_range.start as u64
299    }
300
301    /// Get the loader built-in program
302    pub fn get_loader(&self) -> &Arc<BuiltinProgram<C>> {
303        &self.loader
304    }
305
306    /// Get the JIT compiled program
307    #[cfg(all(feature = "jit", not(target_os = "windows"), target_arch = "x86_64"))]
308    pub fn get_compiled_program(&self) -> Option<&JitProgram> {
309        self.compiled_program.as_ref()
310    }
311
312    /// Verify the executable
313    pub fn verify<V: Verifier>(&self) -> Result<(), EbpfError> {
314        <V as Verifier>::verify(
315            self.get_text_bytes().1,
316            self.get_config(),
317            self.get_sbpf_version(),
318            self.get_function_registry(),
319            self.loader.get_function_registry(),
320        )?;
321        Ok(())
322    }
323
324    /// JIT compile the executable
325    #[cfg(all(feature = "jit", not(target_os = "windows"), target_arch = "x86_64"))]
326    pub fn jit_compile(&mut self) -> Result<(), crate::error::EbpfError> {
327        let jit = JitCompiler::<C>::new(self)?;
328        self.compiled_program = Some(jit.compile()?);
329        Ok(())
330    }
331
332    /// Get the function registry
333    pub fn get_function_registry(&self) -> &FunctionRegistry<usize> {
334        &self.function_registry
335    }
336
337    /// Create from raw text section bytes (list of instructions)
338    pub fn new_from_text_bytes(
339        text_bytes: &[u8],
340        loader: Arc<BuiltinProgram<C>>,
341        sbpf_version: SBPFVersion,
342        mut function_registry: FunctionRegistry<usize>,
343    ) -> Result<Self, ElfError> {
344        let elf_bytes = AlignedMemory::from_slice(text_bytes);
345        let entry_pc = if let Some((_name, pc)) = function_registry.lookup_by_name(b"entrypoint") {
346            pc
347        } else {
348            function_registry.register_function_hashed_legacy(
349                &loader,
350                !sbpf_version.static_syscalls(),
351                *b"entrypoint",
352                0,
353            )?;
354            0
355        };
356        Ok(Self {
357            elf_bytes,
358            sbpf_version,
359            ro_section: Section::Borrowed(ebpf::MM_RODATA_START as usize, 0..text_bytes.len()),
360            text_section_vaddr: if sbpf_version.enable_lower_bytecode_vaddr() {
361                ebpf::MM_BYTECODE_START
362            } else {
363                ebpf::MM_RODATA_START
364            },
365            text_section_range: 0..text_bytes.len(),
366            entry_pc,
367            function_registry,
368            loader,
369            #[cfg(all(feature = "jit", not(target_os = "windows"), target_arch = "x86_64"))]
370            compiled_program: None,
371        })
372    }
373
374    /// Fully loads an ELF
375    pub fn load(bytes: &[u8], loader: Arc<BuiltinProgram<C>>) -> Result<Self, ElfError> {
376        const E_FLAGS_OFFSET: usize = 48;
377        let e_flags = LittleEndian::read_u32(
378            bytes
379                .get(E_FLAGS_OFFSET..E_FLAGS_OFFSET.saturating_add(std::mem::size_of::<u32>()))
380                .ok_or(ElfParserError::OutOfBounds)?,
381        );
382        let config = loader.get_config();
383        let sbpf_version = match e_flags {
384            0 => SBPFVersion::V0,
385            1 => SBPFVersion::V1,
386            2 => SBPFVersion::V2,
387            3 => SBPFVersion::V3,
388            4 => SBPFVersion::V4,
389            _ => SBPFVersion::Reserved,
390        };
391        if !config.enabled_sbpf_versions.contains(&sbpf_version) {
392            return Err(ElfError::UnsupportedSBPFVersion);
393        }
394
395        let mut executable = if sbpf_version.enable_stricter_elf_headers() {
396            Self::load_with_strict_parser(bytes, loader)?
397        } else {
398            Self::load_with_lenient_parser(bytes, loader)?
399        };
400        executable.sbpf_version = sbpf_version;
401        Ok(executable)
402    }
403
404    /// Loads an ELF without relocation
405    pub fn load_with_strict_parser(
406        bytes: &[u8],
407        loader: Arc<BuiltinProgram<C>>,
408    ) -> Result<Self, ElfParserError> {
409        use crate::elf_parser::{
410            consts::{ELFMAG, EV_CURRENT, PF_R, PF_W, PF_X, PT_LOAD, SHN_UNDEF, STT_FUNC},
411            types::{Elf64Ehdr, Elf64Shdr, Elf64Sym},
412        };
413
414        let aligned_memory = AlignedMemory::<{ HOST_ALIGN }>::from_slice(bytes);
415        let elf_bytes = aligned_memory.as_slice();
416
417        let (file_header_range, file_header) = Elf64::parse_file_header(elf_bytes)?;
418        let program_header_table_range = mem::size_of::<Elf64Ehdr>()
419            ..mem::size_of::<Elf64Phdr>()
420                .saturating_mul(file_header.e_phnum as usize)
421                .saturating_add(mem::size_of::<Elf64Ehdr>());
422        if file_header.e_ident.ei_mag != ELFMAG
423            || file_header.e_ident.ei_class != ELFCLASS64
424            || file_header.e_ident.ei_data != ELFDATA2LSB
425            || file_header.e_ident.ei_version != EV_CURRENT as u8
426            || file_header.e_ident.ei_osabi != ELFOSABI_NONE
427            || file_header.e_ident.ei_abiversion != 0x00
428            || file_header.e_ident.ei_pad != [0x00; 7]
429            || file_header.e_type != ET_DYN
430            || file_header.e_machine != EM_SBPF
431            || file_header.e_version != EV_CURRENT
432            // file_header.e_entry
433            || file_header.e_phoff != mem::size_of::<Elf64Ehdr>() as u64
434            // file_header.e_shoff
435            // file_header.e_flags
436            || file_header.e_ehsize != mem::size_of::<Elf64Ehdr>() as u16
437            || file_header.e_phentsize != mem::size_of::<Elf64Phdr>() as u16
438            || file_header.e_phnum < EXPECTED_PROGRAM_HEADERS.len() as u16
439            || program_header_table_range.end >= elf_bytes.len()
440            || file_header.e_shentsize != mem::size_of::<Elf64Shdr>() as u16
441            // file_header.e_shnum
442            || file_header.e_shstrndx >= file_header.e_shnum
443        {
444            return Err(ElfParserError::InvalidFileHeader);
445        }
446
447        const EXPECTED_PROGRAM_HEADERS: [(u32, u64); 4] = [
448            (PF_X, ebpf::MM_BYTECODE_START),     // byte code
449            (PF_R, ebpf::MM_RODATA_START),       // read only data
450            (PF_R | PF_W, ebpf::MM_STACK_START), // stack
451            (PF_R | PF_W, ebpf::MM_HEAP_START),  // heap
452        ];
453        let program_header_table =
454            Elf64::slice_from_bytes::<Elf64Phdr>(elf_bytes, program_header_table_range.clone())?;
455        for (program_header, (p_flags, p_vaddr)) in program_header_table
456            .iter()
457            .zip(EXPECTED_PROGRAM_HEADERS.iter())
458        {
459            let p_filesz = if (*p_flags & PF_W) != 0 {
460                0
461            } else {
462                program_header.p_memsz
463            };
464            if program_header.p_type != PT_LOAD
465                || program_header.p_flags != *p_flags
466                || program_header.p_offset < program_header_table_range.end as u64
467                || program_header.p_offset >= elf_bytes.len() as u64
468                || program_header.p_offset.checked_rem(ebpf::INSN_SIZE as u64) != Some(0)
469                || program_header.p_vaddr != *p_vaddr
470                || program_header.p_paddr != *p_vaddr
471                || program_header.p_filesz != p_filesz
472                || program_header.p_filesz
473                    > (elf_bytes.len() as u64).saturating_sub(program_header.p_offset)
474                || program_header.p_filesz.checked_rem(ebpf::INSN_SIZE as u64) != Some(0)
475                || program_header.p_memsz >= ebpf::MM_REGION_SIZE
476            {
477                return Err(ElfParserError::InvalidProgramHeader);
478            }
479        }
480
481        let bytecode_header = &program_header_table[0];
482        let rodata_header = &program_header_table[1];
483        let text_section_vaddr = bytecode_header.p_vaddr;
484        let text_section_range = bytecode_header.file_range().unwrap_or_default();
485        let ro_section = Section::Borrowed(
486            rodata_header.p_vaddr as usize,
487            rodata_header.file_range().unwrap_or_default(),
488        );
489
490        if !bytecode_header.vm_range().contains(
491            &file_header
492                .e_entry
493                .saturating_add(ebpf::INSN_SIZE as u64)
494                .saturating_sub(1),
495        ) || file_header.e_entry.checked_rem(ebpf::INSN_SIZE as u64) != Some(0)
496        {
497            return Err(ElfParserError::InvalidFileHeader);
498        }
499        let entry_pc = file_header
500            .e_entry
501            .saturating_sub(bytecode_header.p_vaddr)
502            .checked_div(ebpf::INSN_SIZE as u64)
503            .unwrap_or_default() as usize;
504        let entry_insn = ebpf::get_insn(&elf_bytes[text_section_range.clone()], entry_pc);
505        if !entry_insn.is_function_start_marker() {
506            return Err(ElfParserError::InvalidFileHeader);
507        }
508
509        let mut function_registry = FunctionRegistry::<usize>::default();
510        let config = loader.get_config();
511        if config.enable_symbol_and_section_labels {
512            let (_section_header_table_range, section_header_table) =
513                Elf64::parse_section_header_table(
514                    elf_bytes,
515                    file_header_range.clone(),
516                    file_header,
517                    program_header_table_range.clone(),
518                )
519                .unwrap();
520            let section_names_section_header = (file_header.e_shstrndx != SHN_UNDEF)
521                .then(|| {
522                    section_header_table
523                        .get(file_header.e_shstrndx as usize)
524                        .ok_or(ElfParserError::OutOfBounds)
525                })
526                .transpose()?
527                .unwrap();
528            let mut symbol_names_section_header = None;
529            let mut symbol_table_section_header = None;
530            for section_header in section_header_table.iter() {
531                let section_name = Elf64::get_string_in_section(
532                    elf_bytes,
533                    section_names_section_header,
534                    section_header.sh_name,
535                    64,
536                )
537                .unwrap();
538                if section_name == b".strtab" {
539                    symbol_names_section_header = Some(section_header);
540                }
541                if section_name == b".symtab" {
542                    symbol_table_section_header = Some(section_header);
543                }
544            }
545            let symbol_names_section_header = symbol_names_section_header.unwrap();
546            let symbol_table: &[Elf64Sym] =
547                Elf64::slice_from_section_header(elf_bytes, symbol_table_section_header.unwrap())
548                    .unwrap();
549            for symbol in symbol_table {
550                if symbol.st_info & STT_FUNC == 0 {
551                    continue;
552                }
553                let target_pc = symbol
554                    .st_value
555                    .saturating_sub(bytecode_header.p_vaddr)
556                    .checked_div(ebpf::INSN_SIZE as u64)
557                    .unwrap_or_default() as usize;
558                let name = Elf64::get_string_in_section(
559                    elf_bytes,
560                    symbol_names_section_header,
561                    symbol.st_name as Elf64Word,
562                    u8::MAX as usize,
563                )
564                .unwrap();
565                function_registry
566                    .register_function(target_pc as u32, name, target_pc)
567                    .unwrap();
568            }
569        }
570
571        Ok(Self {
572            elf_bytes: aligned_memory,
573            sbpf_version: SBPFVersion::Reserved, // Is set in Self::load()
574            ro_section,
575            text_section_vaddr,
576            text_section_range,
577            entry_pc,
578            function_registry,
579            loader,
580            #[cfg(all(feature = "jit", not(target_os = "windows"), target_arch = "x86_64"))]
581            compiled_program: None,
582        })
583    }
584
585    /// Loads an ELF with relocation
586    fn load_with_lenient_parser(
587        bytes: &[u8],
588        loader: Arc<BuiltinProgram<C>>,
589    ) -> Result<Self, ElfError> {
590        // We always need one memory copy to take ownership and for relocations
591        let aligned_memory = AlignedMemory::<{ HOST_ALIGN }>::from_slice(bytes);
592        let (mut elf_bytes, unrelocated_elf_bytes) =
593            if is_memory_aligned(bytes.as_ptr() as usize, HOST_ALIGN) {
594                (aligned_memory, bytes)
595            } else {
596                // We might need another memory copy to ensure alignment
597                (aligned_memory.clone(), aligned_memory.as_slice())
598            };
599        let elf = Elf64::parse(unrelocated_elf_bytes)?;
600
601        let config = loader.get_config();
602        let header = elf.file_header();
603
604        Self::validate(&elf, elf_bytes.as_slice())?;
605
606        // calculate the text section info
607        let text_section = get_section(&elf, b".text")?;
608        let text_section_vaddr = text_section.sh_addr.saturating_add(ebpf::MM_RODATA_START);
609        if (config.reject_broken_elfs && text_section.sh_addr != text_section.sh_offset)
610            || text_section_vaddr > ebpf::MM_STACK_START
611        {
612            return Err(ElfError::ValueOutOfBounds);
613        }
614
615        // relocate symbols
616        let mut function_registry = FunctionRegistry::default();
617        Self::relocate(
618            &mut function_registry,
619            &loader,
620            &elf,
621            elf_bytes.as_slice_mut(),
622        )?;
623
624        // calculate entrypoint offset into the text section
625        let offset = header.e_entry.saturating_sub(text_section.sh_addr);
626        if offset.checked_rem(ebpf::INSN_SIZE as u64) != Some(0) {
627            return Err(ElfError::InvalidEntrypoint);
628        }
629        let entry_pc = if let Some(entry_pc) = (offset as usize).checked_div(ebpf::INSN_SIZE) {
630            function_registry.register_function_hashed_legacy(
631                &loader,
632                true,
633                *b"entrypoint",
634                entry_pc,
635            )?;
636            entry_pc
637        } else {
638            return Err(ElfError::InvalidEntrypoint);
639        };
640
641        let ro_section = Self::parse_ro_sections(
642            config,
643            elf.section_header_table()
644                .iter()
645                .map(|s| (elf.section_name(s.sh_name).ok(), s)),
646            elf_bytes.as_slice(),
647        )?;
648
649        Ok(Self {
650            elf_bytes,
651            sbpf_version: SBPFVersion::Reserved, // Is set in Self::load()
652            ro_section,
653            text_section_vaddr,
654            text_section_range: text_section.file_range().unwrap_or_default(),
655            entry_pc,
656            function_registry,
657            loader,
658            #[cfg(all(feature = "jit", not(target_os = "windows"), target_arch = "x86_64"))]
659            compiled_program: None,
660        })
661    }
662
663    /// Calculate the total memory size of the executable
664    #[rustfmt::skip]
665    #[allow(clippy::size_of_ref)]
666    pub fn mem_size(&self) -> usize {
667        let mut total = mem::size_of::<Self>();
668        total = total
669            // elf bytes
670            .saturating_add(self.elf_bytes.mem_size())
671            // ro section
672            .saturating_add(match &self.ro_section {
673                Section::Owned(_, data) => data.capacity(),
674                Section::Borrowed(_, _) => 0,
675            })
676            // bpf functions
677            .saturating_add(self.function_registry.mem_size());
678
679        #[cfg(all(feature = "jit", not(target_os = "windows"), target_arch = "x86_64"))]
680        {
681            // compiled programs
682            total = total.saturating_add(self.compiled_program.as_ref().map_or(0, |program| program.mem_size()));
683        }
684
685        total
686    }
687
688    // Functions exposed for tests
689
690    /// Validates the ELF
691    pub fn validate(elf: &Elf64, elf_bytes: &[u8]) -> Result<(), ElfError> {
692        let header = elf.file_header();
693        if header.e_ident.ei_class != ELFCLASS64 {
694            return Err(ElfError::WrongClass);
695        }
696        if header.e_ident.ei_data != ELFDATA2LSB {
697            return Err(ElfError::WrongEndianess);
698        }
699        if header.e_ident.ei_osabi != ELFOSABI_NONE {
700            return Err(ElfError::WrongAbi);
701        }
702        if header.e_machine != EM_BPF && header.e_machine != EM_SBPF {
703            return Err(ElfError::WrongMachine);
704        }
705        if header.e_type != ET_DYN {
706            return Err(ElfError::WrongType);
707        }
708
709        let num_text_sections =
710            elf.section_header_table()
711                .iter()
712                .fold(0, |count: usize, section_header| {
713                    if let Ok(this_name) = elf.section_name(section_header.sh_name) {
714                        if this_name == b".text" {
715                            return count.saturating_add(1);
716                        }
717                    }
718                    count
719                });
720        if 1 != num_text_sections {
721            return Err(ElfError::NotOneTextSection);
722        }
723
724        for section_header in elf.section_header_table().iter() {
725            if let Ok(name) = elf.section_name(section_header.sh_name) {
726                if name.starts_with(b".bss")
727                    || (section_header.is_writable()
728                        && (name.starts_with(b".data") && !name.starts_with(b".data.rel")))
729                {
730                    return Err(ElfError::WritableSectionNotSupported(
731                        String::from_utf8_lossy(name).to_string(),
732                    ));
733                }
734            }
735        }
736
737        for section_header in elf.section_header_table().iter() {
738            let start = section_header.sh_offset as usize;
739            let end = section_header
740                .sh_offset
741                .checked_add(section_header.sh_size)
742                .ok_or(ElfError::ValueOutOfBounds)? as usize;
743            let _ = elf_bytes
744                .get(start..end)
745                .ok_or(ElfError::ValueOutOfBounds)?;
746        }
747        let text_section = get_section(elf, b".text")?;
748        if !text_section.vm_range().contains(&header.e_entry) {
749            return Err(ElfError::EntrypointOutOfBounds);
750        }
751
752        Ok(())
753    }
754
755    /// Parses and concatenates the readonly data sections
756    pub fn parse_ro_sections<'a, S: IntoIterator<Item = (Option<&'a [u8]>, &'a Elf64Shdr)>>(
757        config: &Config,
758        sections: S,
759        elf_bytes: &[u8],
760    ) -> Result<Section, ElfError> {
761        // the lowest section address
762        let mut lowest_addr = usize::MAX;
763        // the highest section address
764        let mut highest_addr = 0;
765        // the aggregated section length, not including gaps between sections
766        let mut ro_fill_length = 0usize;
767        let mut invalid_offsets = false;
768
769        // keep track of where ro sections are so we can tell whether they're
770        // contiguous
771        let mut first_ro_section = 0;
772        let mut last_ro_section = 0;
773        let mut n_ro_sections = 0usize;
774
775        let mut ro_slices = vec![];
776        for (i, (name, section_header)) in sections.into_iter().enumerate() {
777            match name {
778                Some(name)
779                    if name == b".text"
780                        || name == b".rodata"
781                        || name == b".data.rel.ro"
782                        || name == b".eh_frame" => {}
783                _ => continue,
784            }
785
786            if n_ro_sections == 0 {
787                first_ro_section = i;
788            }
789            last_ro_section = i;
790            n_ro_sections = n_ro_sections.saturating_add(1);
791
792            let section_addr = section_header.sh_addr;
793
794            // sh_offset handling:
795            // section_addr must match sh_offset
796            if !invalid_offsets && section_addr != section_header.sh_offset {
797                invalid_offsets = true;
798            }
799
800            let vaddr_end = section_addr.saturating_add(ebpf::MM_RODATA_START);
801            if (config.reject_broken_elfs && invalid_offsets) || vaddr_end > ebpf::MM_STACK_START {
802                return Err(ElfError::ValueOutOfBounds);
803            }
804
805            let section_data = elf_bytes
806                .get(section_header.file_range().unwrap_or_default())
807                .ok_or(ElfError::ValueOutOfBounds)?;
808
809            let section_addr = section_addr as usize;
810            lowest_addr = lowest_addr.min(section_addr);
811            highest_addr = highest_addr.max(section_addr.saturating_add(section_data.len()));
812            ro_fill_length = ro_fill_length.saturating_add(section_data.len());
813
814            ro_slices.push((section_addr, section_data));
815        }
816
817        if config.reject_broken_elfs && lowest_addr.saturating_add(ro_fill_length) > highest_addr {
818            return Err(ElfError::ValueOutOfBounds);
819        }
820
821        let can_borrow = !invalid_offsets
822            && last_ro_section
823                .saturating_add(1)
824                .saturating_sub(first_ro_section)
825                == n_ro_sections;
826        let ro_section = if config.optimize_rodata && can_borrow {
827            // Read only sections are grouped together with no intermixed non-ro
828            // sections. We can borrow.
829
830            let addr_offset = if lowest_addr >= ebpf::MM_RODATA_START as usize {
831                // The first field of Section::Borrowed is an offset from
832                // ebpf::MM_RODATA_START so if the linker has already put the
833                // sections within ebpf::MM_RODATA_START, we need to subtract
834                // it now.
835                lowest_addr
836            } else {
837                lowest_addr.saturating_add(ebpf::MM_RODATA_START as usize)
838            };
839
840            Section::Borrowed(addr_offset, lowest_addr..highest_addr)
841        } else {
842            // Read only and other non-ro sections are mixed. Zero the non-ro
843            // sections and and copy the ro ones at their intended offsets.
844
845            if config.optimize_rodata {
846                // The rodata region starts at MM_RODATA_START + offset,
847                // [MM_RODATA_START, MM_RODATA_START + offset) is not
848                // mappable. We only need to allocate highest_addr - lowest_addr
849                // bytes.
850                highest_addr = highest_addr.saturating_sub(lowest_addr);
851            } else {
852                // For backwards compatibility, the whole [MM_RODATA_START,
853                // MM_RODATA_START + highest_addr) range is mappable. We need
854                // to allocate the whole address range.
855                lowest_addr = 0;
856            };
857
858            let buf_len = highest_addr;
859            if buf_len > elf_bytes.len() {
860                return Err(ElfError::ValueOutOfBounds);
861            }
862
863            let mut ro_section = vec![0; buf_len];
864            for (section_addr, slice) in ro_slices.iter() {
865                let buf_offset_start = section_addr.saturating_sub(lowest_addr);
866                ro_section[buf_offset_start..buf_offset_start.saturating_add(slice.len())]
867                    .copy_from_slice(slice);
868            }
869
870            let addr_offset = if lowest_addr >= ebpf::MM_RODATA_START as usize {
871                lowest_addr
872            } else {
873                lowest_addr.saturating_add(ebpf::MM_RODATA_START as usize)
874            };
875            Section::Owned(addr_offset, ro_section)
876        };
877
878        Ok(ro_section)
879    }
880
881    /// Relocates the ELF in-place
882    fn relocate(
883        function_registry: &mut FunctionRegistry<usize>,
884        loader: &BuiltinProgram<C>,
885        elf: &Elf64,
886        elf_bytes: &mut [u8],
887    ) -> Result<(), ElfError> {
888        let mut syscall_cache = BTreeMap::new();
889        let text_section = get_section(elf, b".text")?;
890
891        // Fixup all program counter relative call instructions
892        let config = loader.get_config();
893        let text_bytes = elf_bytes
894            .get_mut(text_section.file_range().unwrap_or_default())
895            .ok_or(ElfError::ValueOutOfBounds)?;
896        let instruction_count = text_bytes
897            .len()
898            .checked_div(ebpf::INSN_SIZE)
899            .ok_or(ElfError::ValueOutOfBounds)?;
900        for i in 0..instruction_count {
901            let insn = ebpf::get_insn(text_bytes, i);
902            if insn.opc == ebpf::CALL_IMM && insn.imm != -1 {
903                let target_pc = (i as isize)
904                    .saturating_add(1)
905                    .saturating_add(insn.imm as isize);
906                if target_pc < 0 || target_pc >= instruction_count as isize {
907                    return Err(ElfError::RelativeJumpOutOfBounds(i));
908                }
909                let name = if config.enable_symbol_and_section_labels {
910                    format!("function_{target_pc}")
911                } else {
912                    String::default()
913                };
914                let key = function_registry.register_function_hashed_legacy(
915                    loader,
916                    true,
917                    name.as_bytes(),
918                    target_pc as usize,
919                )?;
920                let offset = i.saturating_mul(ebpf::INSN_SIZE).saturating_add(4);
921                let checked_slice = text_bytes
922                    .get_mut(offset..offset.saturating_add(4))
923                    .ok_or(ElfError::ValueOutOfBounds)?;
924                LittleEndian::write_u32(checked_slice, key);
925            }
926        }
927
928        // Fixup all the relocations in the relocation section if exists
929        for relocation in elf.dynamic_relocations_table().unwrap_or_default().iter() {
930            let r_offset = relocation.r_offset as usize;
931
932            match BpfRelocationType::from_x86_relocation_type(relocation.r_type()) {
933                Some(BpfRelocationType::R_Bpf_64_64) => {
934                    // Offset of the immediate field
935                    let imm_offset = r_offset.saturating_add(BYTE_OFFSET_IMMEDIATE);
936
937                    // Read the instruction's immediate field which contains virtual
938                    // address to convert to physical
939                    let checked_slice = elf_bytes
940                        .get(imm_offset..imm_offset.saturating_add(BYTE_LENGTH_IMMEDIATE))
941                        .ok_or(ElfError::ValueOutOfBounds)?;
942                    let refd_addr = LittleEndian::read_u32(checked_slice) as u64;
943
944                    let symbol = elf
945                        .dynamic_symbol_table()
946                        .and_then(|table| table.get(relocation.r_sym() as usize).cloned())
947                        .ok_or_else(|| ElfError::UnknownSymbol(relocation.r_sym() as usize))?;
948
949                    // The relocated address is relative to the address of the
950                    // symbol at index `r_sym`
951                    let mut addr = symbol.st_value.saturating_add(refd_addr);
952
953                    // The "physical address" from the VM's perspective is rooted
954                    // at `MM_RODATA_START`. If the linker hasn't already put
955                    // the symbol within `MM_RODATA_START`, we need to do so
956                    // now.
957                    if addr < ebpf::MM_RODATA_START {
958                        addr = ebpf::MM_RODATA_START.saturating_add(addr);
959                    }
960
961                    let imm_low_offset = imm_offset;
962                    let imm_high_offset = imm_low_offset.saturating_add(INSN_SIZE);
963
964                    // Write the low side of the relocate address
965                    let imm_slice = elf_bytes
966                        .get_mut(
967                            imm_low_offset..imm_low_offset.saturating_add(BYTE_LENGTH_IMMEDIATE),
968                        )
969                        .ok_or(ElfError::ValueOutOfBounds)?;
970                    LittleEndian::write_u32(imm_slice, (addr & 0xFFFFFFFF) as u32);
971
972                    // Write the high side of the relocate address
973                    let imm_slice = elf_bytes
974                        .get_mut(
975                            imm_high_offset..imm_high_offset.saturating_add(BYTE_LENGTH_IMMEDIATE),
976                        )
977                        .ok_or(ElfError::ValueOutOfBounds)?;
978                    LittleEndian::write_u32(
979                        imm_slice,
980                        addr.checked_shr(32).unwrap_or_default() as u32,
981                    );
982                }
983                Some(BpfRelocationType::R_Bpf_64_Relative) => {
984                    // Relocation between different sections, where the target
985                    // memory is not associated to a symbol (eg some compiler
986                    // generated rodata that doesn't have an explicit symbol).
987
988                    // Offset of the immediate field
989                    let imm_offset = r_offset.saturating_add(BYTE_OFFSET_IMMEDIATE);
990
991                    if text_section
992                        .file_range()
993                        .unwrap_or_default()
994                        .contains(&r_offset)
995                    {
996                        // We're relocating a lddw instruction, which spans two
997                        // instruction slots. The address to be relocated is
998                        // split in two halves in the two imms of the
999                        // instruction slots.
1000                        let imm_low_offset = imm_offset;
1001                        let imm_high_offset = r_offset
1002                            .saturating_add(INSN_SIZE)
1003                            .saturating_add(BYTE_OFFSET_IMMEDIATE);
1004
1005                        // Read the low side of the address
1006                        let imm_slice = elf_bytes
1007                            .get(
1008                                imm_low_offset
1009                                    ..imm_low_offset.saturating_add(BYTE_LENGTH_IMMEDIATE),
1010                            )
1011                            .ok_or(ElfError::ValueOutOfBounds)?;
1012                        let va_low = LittleEndian::read_u32(imm_slice) as u64;
1013
1014                        // Read the high side of the address
1015                        let imm_slice = elf_bytes
1016                            .get(
1017                                imm_high_offset
1018                                    ..imm_high_offset.saturating_add(BYTE_LENGTH_IMMEDIATE),
1019                            )
1020                            .ok_or(ElfError::ValueOutOfBounds)?;
1021                        let va_high = LittleEndian::read_u32(imm_slice) as u64;
1022
1023                        // Put the address back together
1024                        let mut refd_addr = va_high.checked_shl(32).unwrap_or_default() | va_low;
1025
1026                        if refd_addr == 0 {
1027                            return Err(ElfError::InvalidVirtualAddress(refd_addr));
1028                        }
1029
1030                        if refd_addr < ebpf::MM_RODATA_START {
1031                            // The linker hasn't already placed rodata within
1032                            // MM_RODATA_START, so we do so now
1033                            refd_addr = ebpf::MM_RODATA_START.saturating_add(refd_addr);
1034                        }
1035
1036                        // Write back the low half
1037                        let imm_slice = elf_bytes
1038                            .get_mut(
1039                                imm_low_offset
1040                                    ..imm_low_offset.saturating_add(BYTE_LENGTH_IMMEDIATE),
1041                            )
1042                            .ok_or(ElfError::ValueOutOfBounds)?;
1043                        LittleEndian::write_u32(imm_slice, (refd_addr & 0xFFFFFFFF) as u32);
1044
1045                        // Write back the high half
1046                        let imm_slice = elf_bytes
1047                            .get_mut(
1048                                imm_high_offset
1049                                    ..imm_high_offset.saturating_add(BYTE_LENGTH_IMMEDIATE),
1050                            )
1051                            .ok_or(ElfError::ValueOutOfBounds)?;
1052                        LittleEndian::write_u32(
1053                            imm_slice,
1054                            refd_addr.checked_shr(32).unwrap_or_default() as u32,
1055                        );
1056                    } else {
1057                        // There used to be a bug in toolchains before
1058                        // https://github.com/solana-labs/llvm-project/pull/35 where for 64 bit
1059                        // relocations we were encoding only the low 32 bits, shifted 32 bits to
1060                        // the left. Our relocation code used to be compatible with that, so we
1061                        // need to keep supporting this case for backwards compatibility.
1062                        let addr_slice = elf_bytes
1063                            .get(imm_offset..imm_offset.saturating_add(BYTE_LENGTH_IMMEDIATE))
1064                            .ok_or(ElfError::ValueOutOfBounds)?;
1065                        let mut refd_addr = LittleEndian::read_u32(addr_slice) as u64;
1066                        refd_addr = ebpf::MM_RODATA_START.saturating_add(refd_addr);
1067
1068                        let addr_slice = elf_bytes
1069                            .get_mut(r_offset..r_offset.saturating_add(mem::size_of::<u64>()))
1070                            .ok_or(ElfError::ValueOutOfBounds)?;
1071                        LittleEndian::write_u64(addr_slice, refd_addr);
1072                    }
1073                }
1074                Some(BpfRelocationType::R_Bpf_64_32) => {
1075                    // The .text section has an unresolved call to symbol instruction
1076                    // Hash the symbol name and stick it into the call instruction's imm
1077                    // field.  Later that hash will be used to look up the function location.
1078
1079                    // Offset of the immediate field
1080                    let imm_offset = r_offset.saturating_add(BYTE_OFFSET_IMMEDIATE);
1081
1082                    let symbol = elf
1083                        .dynamic_symbol_table()
1084                        .and_then(|table| table.get(relocation.r_sym() as usize).cloned())
1085                        .ok_or_else(|| ElfError::UnknownSymbol(relocation.r_sym() as usize))?;
1086
1087                    let name = elf
1088                        .dynamic_symbol_name(symbol.st_name as Elf64Word)
1089                        .map_err(|_| ElfError::UnknownSymbol(symbol.st_name as usize))?;
1090
1091                    // If the symbol is defined, this is a bpf-to-bpf call
1092                    let key = if symbol.is_function() && symbol.st_value != 0 {
1093                        if !text_section.vm_range().contains(&symbol.st_value) {
1094                            return Err(ElfError::ValueOutOfBounds);
1095                        }
1096                        let target_pc = (symbol.st_value.saturating_sub(text_section.sh_addr)
1097                            as usize)
1098                            .checked_div(ebpf::INSN_SIZE)
1099                            .unwrap_or_default();
1100                        function_registry
1101                            .register_function_hashed_legacy(loader, true, name, target_pc)?
1102                    } else {
1103                        // Else it's a syscall
1104                        let hash = *syscall_cache
1105                            .entry(symbol.st_name)
1106                            .or_insert_with(|| ebpf::hash_symbol_name(name));
1107                        if config.reject_broken_elfs
1108                            && loader.get_function_registry().lookup_by_key(hash).is_none()
1109                        {
1110                            return Err(ElfError::UnresolvedSymbol(
1111                                String::from_utf8_lossy(name).to_string(),
1112                                r_offset.checked_div(ebpf::INSN_SIZE).unwrap_or(0),
1113                                r_offset,
1114                            ));
1115                        }
1116                        hash
1117                    };
1118
1119                    let checked_slice = elf_bytes
1120                        .get_mut(imm_offset..imm_offset.saturating_add(BYTE_LENGTH_IMMEDIATE))
1121                        .ok_or(ElfError::ValueOutOfBounds)?;
1122                    LittleEndian::write_u32(checked_slice, key);
1123                }
1124                _ => return Err(ElfError::UnknownRelocation(relocation.r_type())),
1125            }
1126        }
1127
1128        if config.enable_symbol_and_section_labels {
1129            // Register all known function names from the symbol table
1130            for symbol in elf.symbol_table().ok().flatten().unwrap_or_default().iter() {
1131                if symbol.st_info & 0xEF != 0x02 {
1132                    continue;
1133                }
1134                if !text_section.vm_range().contains(&symbol.st_value) {
1135                    return Err(ElfError::ValueOutOfBounds);
1136                }
1137                let target_pc = (symbol.st_value.saturating_sub(text_section.sh_addr) as usize)
1138                    .checked_div(ebpf::INSN_SIZE)
1139                    .unwrap_or_default();
1140                let name = elf
1141                    .symbol_name(symbol.st_name as Elf64Word)
1142                    .map_err(|_| ElfError::UnknownSymbol(symbol.st_name as usize))?;
1143                function_registry.register_function_hashed_legacy(loader, true, name, target_pc)?;
1144            }
1145        }
1146
1147        Ok(())
1148    }
1149
1150    #[allow(dead_code)]
1151    fn dump_data(name: &str, prog: &[u8]) {
1152        let mut eight_bytes: Vec<u8> = Vec::new();
1153        println!("{name}");
1154        for i in prog.iter() {
1155            if eight_bytes.len() >= 7 {
1156                println!("{eight_bytes:02X?}");
1157                eight_bytes.clear();
1158            } else {
1159                eight_bytes.push(*i);
1160            }
1161        }
1162    }
1163}
1164
1165/// Creates a [MemoryRegion] for the given [Section]
1166pub fn get_ro_region(ro_section: &Section, elf: &[u8]) -> MemoryRegion {
1167    let (offset, ro_data) = match ro_section {
1168        Section::Owned(offset, data) => (*offset, data.as_slice()),
1169        Section::Borrowed(offset, byte_range) => (*offset, &elf[byte_range.clone()]),
1170    };
1171
1172    // If offset > 0, the region will start at MM_RODATA_START + the offset of
1173    // the first read only byte. [MM_RODATA_START, MM_RODATA_START + offset)
1174    // will be unmappable, see MemoryRegion::vm_to_host.
1175    MemoryRegion::new_readonly(ro_data, offset as u64)
1176}