elf_loader 0.14.0

A high-performance, no_std compliant ELF loader and JIT linker for Rust.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
//! x86-64 architecture-specific ELF relocation and dynamic linking support.
//!
//! This module provides x86-64 specific implementations for ELF relocation,
//! dynamic linking, and procedure linkage table (PLT) handling.

use crate::{
    elf::ElfRelType,
    relocation::{
        RelocHelper, RelocValue, RelocationHandler, StaticReloc, SymbolLookup, reloc_error,
    },
    segment::section::{GotEntry, PltEntry, PltGotSection},
};
use elf::abi::*;

/// The ELF machine type for x86-64 architecture.
pub const EM_ARCH: u16 = EM_X86_64;

/// Offset for TLS Dynamic Thread Vector.
/// For x86-64, this is 0 as the TCB (Thread Control Block) comes first.
pub const TLS_DTV_OFFSET: usize = 0;

/// Relative relocation type - add base address to relative offset.
pub const REL_RELATIVE: u32 = R_X86_64_RELATIVE;
/// GOT entry relocation type - set GOT entry to symbol address.
pub const REL_GOT: u32 = R_X86_64_GLOB_DAT;
/// TLS DTPMOD relocation type - set to TLS module ID.
pub const REL_DTPMOD: u32 = R_X86_64_DTPMOD64;
/// Symbolic relocation type - set to absolute symbol address.
pub const REL_SYMBOLIC: u32 = R_X86_64_64;
/// PLT jump slot relocation type - set PLT entry to symbol address.
pub const REL_JUMP_SLOT: u32 = R_X86_64_JUMP_SLOT;
/// TLS DTPOFF relocation type - set to TLS offset relative to DTV.
pub const REL_DTPOFF: u32 = R_X86_64_DTPOFF64;
/// IRELATIVE relocation type - call function to get address.
pub const REL_IRELATIVE: u32 = R_X86_64_IRELATIVE;
/// COPY relocation type - copy data from shared object.
pub const REL_COPY: u32 = R_X86_64_COPY;
/// TLS TPOFF relocation type - set to TLS offset relative to thread pointer.
pub const REL_TPOFF: u32 = R_X86_64_TPOFF64;
/// TLSDESC relocation type - set to a function pointer and an argument.
pub const REL_TLSDESC: u32 = R_X86_64_TLSDESC;

/// Get the current thread pointer using architecture-specific register.
pub(crate) unsafe fn get_thread_pointer() -> *mut u8 {
    let tp: *mut u8;
    unsafe {
        core::arch::asm!("mov {}, fs:0", out(reg) tp);
    }
    tp
}

/// Offset in GOT for dynamic library handle.
pub(crate) const DYLIB_OFFSET: usize = 1;
/// Offset in GOT for resolver function pointer.
pub(crate) const RESOLVE_FUNCTION_OFFSET: usize = 2;
/// Size of each PLT entry in bytes.
pub(crate) const PLT_ENTRY_SIZE: usize = 16;

/// Template for PLT entries.
/// Each PLT entry contains:
/// - endbr64 instruction for CET (Control-flow Enforcement Technology)
/// - jmp instruction to jump through GOT entry
/// - padding bytes
pub(crate) const PLT_ENTRY: [u8; PLT_ENTRY_SIZE] = [
    0xf3, 0x0f, 0x1e, 0xfa, // endbr64
    0xff, 0x25, 0, 0, 0, 0, // jmp *GOTPLT+idx(%rip)
    0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // (padding)
];

/// Dynamic linker runtime resolver for x86-64 PLT entries.
///
/// This function is called when a PLT entry needs to resolve a symbol address
/// at runtime. It saves the current register state, calls the dynamic linker
/// resolution function, and then restores the state before jumping to the
/// resolved function.
///
/// The function preserves all caller-saved registers and SIMD registers
/// to ensure compatibility with various calling conventions.
///
/// # Safety
/// This function uses naked assembly and must be called with the correct
/// stack layout set up by the PLT stub code.
#[unsafe(naked)]
pub(crate) extern "C" fn dl_runtime_resolve() {
    core::arch::naked_asm!(
        "
    // Save caller-saved registers
    push rdi
    push rsi
    push rdx
    push rcx
    push r8
    push r9
    push r10
    push r11

    // Save xmm registers (arguments can be passed in xmm0-xmm7)
    // We need 128 bytes for xmm0-xmm7 + 8 bytes padding to align stack to 16 bytes
    sub rsp, 136
    movdqu [rsp + 0], xmm0
    movdqu [rsp + 16], xmm1
    movdqu [rsp + 32], xmm2
    movdqu [rsp + 48], xmm3
    movdqu [rsp + 64], xmm4
    movdqu [rsp + 80], xmm5
    movdqu [rsp + 96], xmm6
    movdqu [rsp + 112], xmm7

    // Arguments for dl_fixup(link_map, reloc_idx)
    // link_map was pushed by PLT0, reloc_idx was pushed by PLT entry
    // Stack layout now:
    // [rsp + 0..127]  : xmm0-xmm7
    // [rsp + 128..135]: padding
    // [rsp + 136..199]: r11, r10, r9, r8, rcx, rdx, rsi, rdi (8 * 8 = 64)
    // [rsp + 200]     : link_map
    // [rsp + 208]     : reloc_idx
    // [rsp + 216]     : return address to caller
    mov rdi, [rsp + 200]
    mov rsi, [rsp + 208]

    // Call the resolver
    call {0}

    // Restore xmm registers
    movdqu xmm0, [rsp + 0]
    movdqu xmm1, [rsp + 16]
    movdqu xmm2, [rsp + 32]
    movdqu xmm3, [rsp + 48]
    movdqu xmm4, [rsp + 64]
    movdqu xmm5, [rsp + 80]
    movdqu xmm6, [rsp + 96]
    movdqu xmm7, [rsp + 112]
    add rsp, 136

    // Restore caller-saved registers
    pop r11
    pop r10
    pop r9
    pop r8
    pop rcx
    pop rdx
    pop rsi
    pop rdi

    // Clean up link_map and reloc_idx from stack
    add rsp, 16

    // Jump to the resolved function
    jmp rax
    ",
        sym crate::relocation::dl_fixup,
    )
}

/// x86-64 ELF relocator implementation.
///
/// This struct implements the `StaticReloc` trait to provide x86-64 specific
/// relocation processing for ELF files. It handles various relocation types
/// including absolute addresses, PC-relative offsets, GOT entries, and PLT entries.
pub(crate) struct X86_64Relocator;

/// Map x86_64 relocation type value to human readable name.
///
/// This function converts numeric relocation type constants to their
/// corresponding string names for debugging and error reporting purposes.
///
/// # Arguments
/// * `r_type` - The numeric relocation type value
///
/// # Returns
/// A static string containing the relocation type name, or "UNKNOWN" for unrecognized types.
pub(crate) fn rel_type_to_str(r_type: usize) -> &'static str {
    match r_type as u32 {
        R_X86_64_NONE => "R_X86_64_NONE",
        R_X86_64_64 => "R_X86_64_64",
        R_X86_64_PC32 => "R_X86_64_PC32",
        R_X86_64_GOT32 => "R_X86_64_GOT32",
        R_X86_64_PLT32 => "R_X86_64_PLT32",
        R_X86_64_COPY => "R_X86_64_COPY",
        R_X86_64_GLOB_DAT => "R_X86_64_GLOB_DAT",
        R_X86_64_JUMP_SLOT => "R_X86_64_JUMP_SLOT",
        R_X86_64_RELATIVE => "R_X86_64_RELATIVE",
        R_X86_64_GOTPCREL => "R_X86_64_GOTPCREL",
        R_X86_64_32 => "R_X86_64_32",
        R_X86_64_32S => "R_X86_64_32S",
        R_X86_64_IRELATIVE => "R_X86_64_IRELATIVE",
        R_X86_64_TPOFF64 => "R_X86_64_TPOFF64",
        R_X86_64_TLSDESC => "R_X86_64_TLSDESC",
        R_X86_64_DTPMOD64 => "R_X86_64_DTPMOD64",
        R_X86_64_DTPOFF64 => "R_X86_64_DTPOFF64",
        _ => "UNKNOWN",
    }
}

/// Static TLSDESC resolver for x86-64.
///
/// According to the x86-64 TLSDESC ABI:
/// - rax: address of the TLSDESC structure
/// - returns: offset in rax
/// - All other registers preserved.
#[unsafe(naked)]
pub(crate) extern "C" fn tlsdesc_resolver_static() {
    core::arch::naked_asm!("mov rax, [rax + 8]", "ret");
}

/// Dynamic TLSDESC resolver for x86-64.
///
/// According to the x86-64 TLSDESC ABI:
/// - rax: address of the TLSDESC structure
/// - [rax + 8]: pointer to TlsDescDynamicArg
/// - returns: offset in rax (addr - TP)
/// - All other registers preserved.
#[unsafe(naked)]
pub(crate) extern "C" fn tlsdesc_resolver_dynamic() {
    core::arch::naked_asm!(
        "
        // Save all registers that might be clobbered
        push rdi
        push rsi
        push rdx
        push rcx
        push r8
        push r9
        push r10
        push r11

        // Save xmm0-xmm7 (arguments in some conventions)
        sub rsp, 128
        movdqu [rsp + 0], xmm0
        movdqu [rsp + 16], xmm1
        movdqu [rsp + 32], xmm2
        movdqu [rsp + 48], xmm3
        movdqu [rsp + 64], xmm4
        movdqu [rsp + 80], xmm5
        movdqu [rsp + 96], xmm6
        movdqu [rsp + 112], xmm7

        mov rsi, [rax + 8]   // Get TlsDescDynamicArg pointer
        mov rdx, [rsi]       // Get tls_get_addr pointer
        lea rdi, [rsi + 8]   // Get pointer to TlsIndex (first arg of tls_get_addr)
        call rdx             // Call tls_get_addr

        // TP is at fs:0
        mov rcx, fs:0
        sub rax, rcx

        // Restore everything
        movdqu xmm0, [rsp + 0]
        movdqu xmm1, [rsp + 16]
        movdqu xmm2, [rsp + 32]
        movdqu xmm3, [rsp + 48]
        movdqu xmm4, [rsp + 64]
        movdqu xmm5, [rsp + 80]
        movdqu xmm6, [rsp + 96]
        movdqu xmm7, [rsp + 112]
        add rsp, 128

        pop r11
        pop r10
        pop r9
        pop r8
        pop rcx
        pop rdx
        pop rsi
        pop rdi
        ret
        ",
    )
}

impl StaticReloc for X86_64Relocator {
    /// Perform x86-64 specific ELF relocation.
    ///
    /// This method handles various x86-64 relocation types including:
    /// - R_X86_64_64: Absolute 64-bit address
    /// - R_X86_64_PC32: 32-bit PC-relative offset
    /// - R_X86_64_PLT32: 32-bit PLT entry offset
    /// - R_X86_64_GOTPCREL: 32-bit GOT entry offset
    /// - R_X86_64_32/R_X86_64_32S: 32-bit absolute addresses
    ///
    /// # Arguments
    /// * `core` - The ELF core image being relocated
    /// * `rel_type` - The relocation entry to process
    /// * `pltgot` - PLT/GOT section for managing procedure linkage
    /// * `scope` - Array of loaded core images for symbol resolution
    /// * `pre_find` - Pre-resolution symbol lookup
    /// * `post_find` - Post-resolution symbol lookup
    ///
    /// # Returns
    /// `Ok(())` on success, or an error if relocation fails
    fn relocate<D, PreS, PostS, PreH, PostH>(
        helper: &mut RelocHelper<'_, D, PreS, PostS, PreH, PostH>,
        rel: &ElfRelType,
        pltgot: &mut PltGotSection,
    ) -> crate::Result<()>
    where
        PreS: SymbolLookup + ?Sized,
        PostS: SymbolLookup + ?Sized,
        PreH: RelocationHandler + ?Sized,
        PostH: RelocationHandler + ?Sized,
    {
        let r_sym = rel.r_symbol();
        let r_type = rel.r_type();
        let base = helper.core.base();
        let segments = helper.core.segments();
        let append = rel.r_addend(base);
        let offset = rel.r_offset();
        let p = base + rel.r_offset();
        let boxed_error = || reloc_error(rel, "unknown symbol", helper.core);
        match r_type as _ {
            R_X86_64_64 => {
                let Some(sym) = helper.find_symbol(r_sym) else {
                    return Err(boxed_error());
                };
                segments.write(offset, sym + append);
            }
            R_X86_64_PC32 => {
                let Some(sym) = helper.find_symbol(r_sym) else {
                    return Err(boxed_error());
                };
                let val: RelocValue<i32> = (sym + append - p).try_into().map_err(|_| {
                    reloc_error(
                        rel,
                        "out of range integral type conversion attempted",
                        helper.core,
                    )
                })?;
                segments.write(offset, val);
            }
            R_X86_64_PLT32 => {
                let Some(sym) = helper.find_symbol(r_sym) else {
                    return Err(boxed_error());
                };
                let val: RelocValue<i32> = if let Ok(val) = (sym + append - p).try_into() {
                    val
                } else {
                    let plt_entry = pltgot.add_plt_entry(r_sym);
                    let plt_entry_addr = match plt_entry {
                        PltEntry::Occupied(plt_entry_addr) => plt_entry_addr,
                        PltEntry::Vacant { plt, mut got } => {
                            let plt_entry_addr = plt.as_ptr() as usize;
                            got.update(sym.into());
                            let call_offset = got.get_addr() - plt_entry_addr - 10;
                            let call_offset_val: RelocValue<i32> = call_offset.try_into().unwrap();
                            plt[6..10].copy_from_slice(&call_offset_val.0.to_ne_bytes());
                            RelocValue::new(plt_entry_addr)
                        }
                    };
                    (plt_entry_addr + append - p).try_into().unwrap()
                };
                segments.write(offset, val);
            }
            R_X86_64_GOTPCREL => {
                let Some(sym) = helper.find_symbol(r_sym) else {
                    return Err(boxed_error());
                };
                let got_entry = pltgot.add_got_entry(r_sym);
                let got_entry_addr = match got_entry {
                    GotEntry::Occupied(got_entry_addr) => got_entry_addr,
                    GotEntry::Vacant(mut got) => {
                        got.update(sym);
                        got.get_addr()
                    }
                };
                let val: RelocValue<i32> = (got_entry_addr + append - p).try_into().unwrap();
                segments.write(offset, val);
            }
            R_X86_64_32 => {
                let Some(sym) = helper.find_symbol(r_sym) else {
                    return Err(boxed_error());
                };
                let val: RelocValue<u32> = (sym + append).try_into().map_err(|_| {
                    reloc_error(
                        rel,
                        "out of range integral type conversion attempted",
                        helper.core,
                    )
                })?;
                segments.write(offset, val);
            }
            R_X86_64_32S => {
                let Some(sym) = helper.find_symbol(r_sym) else {
                    return Err(boxed_error());
                };
                let val: RelocValue<i32> = (sym + append).try_into().map_err(|_| {
                    reloc_error(
                        rel,
                        "out of range integral type conversion attempted",
                        helper.core,
                    )
                })?;
                segments.write(offset, val);
            }
            _ => {
                return Err(boxed_error());
            }
        }
        Ok(())
    }

    /// Check if a relocation type requires a GOT entry.
    ///
    /// GOT (Global Offset Table) entries are needed for position-independent
    /// references to symbols. On x86-64, GOT entries are required for:
    /// - R_X86_64_GOTPCREL: PC-relative reference to GOT entry
    /// - R_X86_64_PLT32: PLT entry that may need GOT indirection
    ///
    /// # Arguments
    /// * `rel_type` - The relocation type to check
    ///
    /// # Returns
    /// `true` if the relocation type requires a GOT entry, `false` otherwise
    fn needs_got(rel_type: u32) -> bool {
        matches!(rel_type, R_X86_64_GOTPCREL | R_X86_64_PLT32)
    }

    /// Check if a relocation type requires a PLT entry.
    ///
    /// PLT (Procedure Linkage Table) entries are needed for function calls
    /// that may need lazy binding. On x86-64, PLT entries are required for:
    /// - R_X86_64_PLT32: PC-relative call through PLT
    ///
    /// # Arguments
    /// * `rel_type` - The relocation type to check
    ///
    /// # Returns
    /// `true` if the relocation type requires a PLT entry, `false` otherwise
    fn needs_plt(rel_type: u32) -> bool {
        rel_type == R_X86_64_PLT32
    }
}