Skip to main content

svod_runtime/
jit_loader.rs

1//! JIT ELF loader: compiles C source via clang stdin→stdout, parses the
2//! relocatable ELF with the `object` crate, copies sections into an anonymous
3//! mmap, applies relocations, and returns an executable function pointer.
4//!
5//! Supports x86_64, aarch64, riscv64, loongarch64, and powerpc64le.
6
7use std::collections::HashMap;
8
9use object::read::{Object, ObjectSection, ObjectSymbol};
10use object::{Architecture, RelocationFlags, SectionKind};
11
12use crate::dispatch::KernelCif;
13
14/// A compiled C kernel loaded via custom ELF relocator + mmap.
15pub struct JitKernel {
16    _mmap: memmap2::MmapMut,
17    fn_ptr: *const (),
18    name: String,
19    var_names: Vec<String>,
20    cif: KernelCif,
21}
22
23// SAFETY: Function pointer points to read-only compiled code in mmap'd memory.
24// Multiple threads can call it concurrently.
25unsafe impl Send for JitKernel {}
26unsafe impl Sync for JitKernel {}
27
28impl JitKernel {
29    /// Compile C source code via clang (stdin→stdout) and load the resulting
30    /// object file into executable memory.
31    pub fn compile(src: &str, name: &str, var_names: Vec<String>, buf_count: usize) -> crate::Result<Self> {
32        let obj = compile_to_object(src)?;
33        let (fn_ptr, mmap) = jit_load(&obj, name)?;
34        let cif = KernelCif::new(buf_count + var_names.len());
35        tracing::debug!(kernel.name = %name, "JIT kernel compiled and loaded");
36        Ok(Self { _mmap: mmap, fn_ptr, name: name.to_string(), var_names, cif })
37    }
38
39    /// Execute the kernel with buffer pointers and variable values.
40    ///
41    /// # Safety
42    ///
43    /// Caller must ensure buffer pointers are valid/aligned and `vals` length
44    /// matches `var_names`.
45    pub unsafe fn execute_with_vals(&self, buffers: &[*mut u8], vals: &[i64]) -> crate::Result<()> {
46        unsafe { self.cif.dispatch(self.fn_ptr, buffers, vals, None) };
47        Ok(())
48    }
49
50    pub(crate) fn cif(&self) -> &KernelCif {
51        &self.cif
52    }
53
54    pub fn fn_ptr(&self) -> *const () {
55        self.fn_ptr
56    }
57
58    pub fn name(&self) -> &str {
59        &self.name
60    }
61
62    pub fn var_names(&self) -> &[String] {
63        &self.var_names
64    }
65}
66
67// ── Compilation ─────────────────────────────────────────────────────────────
68
69/// Returns the `--target=<arch>-none-unknown-elf` flag for the host architecture.
70/// Shared between the C and LLVM IR compilation paths.
71pub(crate) fn elf_target_triple() -> String {
72    let arch = std::env::consts::ARCH;
73    if arch == "powerpc64" && cfg!(target_endian = "little") {
74        "--target=powerpc64le-none-unknown-elf".to_string()
75    } else {
76        format!("--target={arch}-none-unknown-elf")
77    }
78}
79
80/// Extra clang flags required for correct JIT code on the host platform.
81/// Shared between the C and LLVM IR compilation paths.
82pub(crate) fn platform_clang_flags() -> &'static [&'static str] {
83    // Reserve x18 only on macOS ARM, where the kernel clobbers it on context
84    // switch. Linux ARM treats x18 as a free GPR; Windows ARM is not a target
85    // svod currently supports.
86    #[cfg(all(target_arch = "aarch64", target_os = "macos"))]
87    {
88        &["-ffixed-x18"]
89    }
90    #[cfg(not(all(target_arch = "aarch64", target_os = "macos")))]
91    {
92        &[]
93    }
94}
95
96/// Pipe C source to clang via stdin, receive relocatable object from stdout.
97fn compile_to_object(src: &str) -> crate::Result<Vec<u8>> {
98    use std::io::Write;
99    use std::process::{Command, Stdio};
100
101    let arch = std::env::consts::ARCH;
102
103    // Architecture-specific tuning. On ARM, `-march=native` only sets the
104    // base ISA family (e.g. `armv8-a`); CPU-specific tuning (Apple-Silicon
105    // pipelines, NEON dual-issue scheduling, FP cost model) requires
106    // `-mcpu=native`.
107    let march = match arch {
108        "x86_64" | "loongarch64" => "-march=native",
109        "riscv64" => "-march=rv64g",
110        _ => "-mcpu=native",
111    };
112
113    let target = elf_target_triple();
114
115    let mut args = vec![
116        "-c",
117        "-x",
118        "c",
119        "-O2",
120        march,
121        "-fPIC",
122        "-ffreestanding",
123        "-fno-math-errno",
124        "-fno-stack-protector",
125        "-nostdlib",
126        "-fno-ident",
127    ];
128    args.push(&target);
129    args.extend_from_slice(platform_clang_flags());
130    args.extend_from_slice(&["-", "-o", "-"]);
131
132    let mut child = Command::new("clang")
133        .args(&args)
134        .stdin(Stdio::piped())
135        .stdout(Stdio::piped())
136        .stderr(Stdio::piped())
137        .spawn()
138        .map_err(|e| crate::Error::JitCompilation {
139            reason: format!("Failed to spawn clang: {e}. Is clang installed?"),
140        })?;
141
142    child
143        .stdin
144        .take()
145        .expect("stdin was piped")
146        .write_all(src.as_bytes())
147        .map_err(|e| crate::Error::JitCompilation { reason: format!("Failed to write to clang stdin: {e}") })?;
148
149    let output = child
150        .wait_with_output()
151        .map_err(|e| crate::Error::JitCompilation { reason: format!("Failed to wait for clang: {e}") })?;
152
153    if !output.status.success() {
154        let stderr = String::from_utf8_lossy(&output.stderr);
155        return Err(crate::Error::JitCompilation {
156            reason: format!("clang compilation failed:\n{stderr}\nSource:\n{src}"),
157        });
158    }
159
160    if output.stdout.is_empty() {
161        return Err(crate::Error::JitCompilation { reason: "clang produced empty output".to_string() });
162    }
163
164    Ok(output.stdout)
165}
166
167// ── ELF Loading ─────────────────────────────────────────────────────────────
168
169/// Parse an ELF relocatable object, copy loadable sections into an anonymous
170/// mmap, apply relocations, mprotect to executable, and return the function
171/// pointer for the named symbol.
172pub(crate) fn jit_load(obj: &[u8], name: &str) -> crate::Result<(*const (), memmap2::MmapMut)> {
173    let elf = object::File::parse(obj)
174        .map_err(|e| crate::Error::JitCompilation { reason: format!("Failed to parse ELF: {e}") })?;
175
176    let arch = elf.architecture();
177
178    // Collect loadable sections and compute layout.
179    let mut section_offsets: HashMap<object::SectionIndex, usize> = HashMap::new();
180    let mut total_size: usize = 0;
181
182    for section in elf.sections() {
183        if matches!(section.kind(), SectionKind::Text | SectionKind::Data | SectionKind::ReadOnlyData) {
184            let align = section.align().max(1) as usize;
185            total_size = (total_size + align - 1) & !(align - 1);
186            section_offsets.insert(section.index(), total_size);
187            total_size += section.size() as usize;
188        }
189    }
190
191    if total_size == 0 {
192        return Err(crate::Error::JitCompilation { reason: "No loadable sections in ELF".to_string() });
193    }
194
195    // Aarch64: reserve space for branch veneers (trampolines) after loadable
196    // sections. CALL26/JUMP26 only reach ±128 MiB; external symbols (libm etc.)
197    // are typically much farther on macOS/ARM.
198    let veneer_base = if arch == Architecture::Aarch64 {
199        let n = count_aarch64_external_calls(&elf, &section_offsets);
200        let base = (total_size + 3) & !3; // align to 4 bytes (instruction size)
201        total_size = base + n * VENEER_SIZE;
202        base
203    } else {
204        total_size
205    };
206
207    // Allocate and populate mmap.
208    let mut mmap = memmap2::MmapMut::map_anon(total_size)
209        .map_err(|e| crate::Error::JitCompilation { reason: format!("mmap failed: {e}") })?;
210
211    for section in elf.sections() {
212        if let Some(&offset) = section_offsets.get(&section.index()) {
213            let data = section
214                .data()
215                .map_err(|e| crate::Error::JitCompilation { reason: format!("Failed to read section: {e}") })?;
216            mmap[offset..offset + data.len()].copy_from_slice(data);
217        }
218    }
219
220    // Build symbol address table.
221    let mmap_base = mmap.as_ptr() as u64;
222    let mut symbol_addrs: HashMap<object::SymbolIndex, u64> = HashMap::new();
223
224    for symbol in elf.symbols() {
225        if let Some(&sec_offset) = symbol.section_index().and_then(|si| section_offsets.get(&si)) {
226            symbol_addrs.insert(symbol.index(), mmap_base + sec_offset as u64 + symbol.address());
227        }
228    }
229
230    // Apply relocations.
231    let mut state = RelocState { veneers: VeneerPool::new(veneer_base), ..Default::default() };
232
233    // PPC64 ELFv2: find TOC base for TOC-relative relocations.
234    if arch == Architecture::PowerPc64 {
235        state.toc_base = elf.symbols().find(|s| s.name() == Ok(".TOC.")).and_then(|s| {
236            s.section_index().and_then(|si| section_offsets.get(&si)).map(|&off| mmap_base + off as u64 + s.address())
237        });
238    }
239
240    for section in elf.sections() {
241        if !matches!(section.kind(), SectionKind::Text | SectionKind::Data | SectionKind::ReadOnlyData) {
242            continue;
243        }
244        let Some(&sec_offset) = section_offsets.get(&section.index()) else { continue };
245
246        for (reloc_offset, reloc) in section.relocations() {
247            let patch_mmap_offset = sec_offset + reloc_offset as usize;
248            let patch_addr = mmap_base + patch_mmap_offset as u64;
249
250            let target_addr = match reloc.target() {
251                object::RelocationTarget::Symbol(sym_idx) => {
252                    if let Some(&addr) = symbol_addrs.get(&sym_idx) {
253                        addr
254                    } else {
255                        let sym = elf
256                            .symbol_by_index(sym_idx)
257                            .map_err(|e| crate::Error::JitCompilation { reason: format!("Bad symbol index: {e}") })?;
258                        let sym_name = sym
259                            .name()
260                            .map_err(|e| crate::Error::JitCompilation { reason: format!("Bad symbol name: {e}") })?;
261                        let addr = resolve_symbol(sym_name)?;
262                        symbol_addrs.insert(sym_idx, addr);
263                        addr
264                    }
265                }
266                object::RelocationTarget::Section(sec_idx) => section_offsets
267                    .get(&sec_idx)
268                    .map(|&off| mmap_base + off as u64)
269                    .ok_or_else(|| crate::Error::JitCompilation {
270                        reason: format!("Relocation references unloaded section {sec_idx:?}"),
271                    })?,
272                other => {
273                    return Err(crate::Error::JitCompilation {
274                        reason: format!("Unsupported relocation target: {other:?}"),
275                    });
276                }
277            };
278
279            let r_type = match reloc.flags() {
280                RelocationFlags::Elf { r_type } => r_type,
281                other => {
282                    return Err(crate::Error::JitCompilation {
283                        reason: format!("Non-ELF relocation format: {other:?}"),
284                    });
285                }
286            };
287
288            apply_relocation(
289                &mut mmap,
290                patch_mmap_offset,
291                patch_addr,
292                target_addr,
293                reloc.addend(),
294                r_type,
295                arch,
296                &mut state,
297            )?;
298        }
299    }
300
301    // Find kernel entry point.
302    let fn_offset = find_symbol_offset(&elf, name, &section_offsets)?;
303
304    // mprotect to executable.
305    unsafe {
306        let ret = libc::mprotect(mmap.as_ptr() as *mut libc::c_void, mmap.len(), libc::PROT_READ | libc::PROT_EXEC);
307        if ret != 0 {
308            return Err(crate::Error::JitCompilation {
309                reason: format!("mprotect failed: {}", std::io::Error::last_os_error()),
310            });
311        }
312    }
313
314    // Flush instruction cache on architectures with non-coherent I/D caches.
315    #[cfg(not(target_arch = "x86_64"))]
316    unsafe {
317        unsafe extern "C" {
318            fn __clear_cache(start: *mut libc::c_void, end: *mut libc::c_void);
319        }
320        __clear_cache(mmap.as_ptr() as *mut _, mmap.as_ptr().add(mmap.len()) as *mut _);
321    }
322
323    Ok(((mmap_base + fn_offset as u64) as *const (), mmap))
324}
325
326// ── Relocation dispatch ─────────────────────────────────────────────────────
327
328/// Auxiliary state for multi-instruction relocations.
329#[derive(Default)]
330struct RelocState {
331    /// RISC-V: maps patch address of PCREL_HI20 → full (S + A - P) value
332    /// for subsequent LO12 relocations that reference the same label.
333    pcrel_hi: HashMap<u64, i64>,
334    /// PPC64 ELFv2: TOC base address (.TOC. symbol), needed for TOC16 relocations.
335    toc_base: Option<u64>,
336    /// Aarch64: veneer (branch trampoline) pool for CALL26/JUMP26 that exceed ±128 MiB.
337    veneers: VeneerPool,
338}
339
340/// Pool of branch veneers (trampolines) for aarch64 CALL26/JUMP26 range overflow.
341///
342/// When the target of a direct branch is more than ±128 MiB away, we emit a
343/// small trampoline that loads the full 64-bit address and does an indirect
344/// branch:
345///
346/// ```text
347///   LDR X16, [PC, #8]   // load 64-bit address from next 8 bytes
348///   BR  X16              // indirect branch
349///   .quad <target>       // 64-bit absolute address
350/// ```
351#[derive(Default)]
352struct VeneerPool {
353    /// Next available offset for a new veneer.
354    next: usize,
355    /// Reuse map: target address → veneer mmap offset (avoid duplicate veneers).
356    map: HashMap<u64, usize>,
357}
358
359const VENEER_SIZE: usize = 16; // LDR X16 + BR X16 + .quad addr
360const CALL26_MAX: i64 = (1 << 27) - 4; // ±128 MiB (signed 28-bit range, 4-byte aligned)
361
362impl VeneerPool {
363    fn new(base: usize) -> Self {
364        Self { next: base, map: HashMap::new() }
365    }
366
367    /// Get or create a veneer for `target_addr`, returning its mmap offset.
368    fn get_or_create(&mut self, mmap: &mut [u8], target_addr: u64) -> usize {
369        if let Some(&off) = self.map.get(&target_addr) {
370            return off;
371        }
372        let off = self.next;
373        self.next += VENEER_SIZE;
374        debug_assert!(self.next <= mmap.len(), "veneer pool overflow");
375
376        // LDR X16, [PC, #8]  →  0x58000050
377        mmap[off..off + 4].copy_from_slice(&0x5800_0050u32.to_le_bytes());
378        // BR X16              →  0xD61F0200
379        mmap[off + 4..off + 8].copy_from_slice(&0xD61F_0200u32.to_le_bytes());
380        // .quad target_addr
381        mmap[off + 8..off + 16].copy_from_slice(&target_addr.to_le_bytes());
382
383        self.map.insert(target_addr, off);
384        off
385    }
386}
387
388#[allow(clippy::too_many_arguments)]
389fn apply_relocation(
390    mmap: &mut memmap2::MmapMut,
391    off: usize,
392    patch: u64,
393    target: u64,
394    addend: i64,
395    r_type: u32,
396    arch: Architecture,
397    state: &mut RelocState,
398) -> crate::Result<()> {
399    match arch {
400        Architecture::X86_64 => reloc_x86_64(mmap, off, patch, target, addend, r_type),
401        Architecture::Aarch64 => reloc_aarch64(mmap, off, patch, target, addend, r_type, state),
402        Architecture::Riscv64 => reloc_riscv64(mmap, off, patch, target, addend, r_type, state),
403        Architecture::LoongArch64 => reloc_loongarch64(mmap, off, patch, target, addend, r_type),
404        Architecture::PowerPc64 => reloc_ppc64(mmap, off, patch, target, addend, r_type, state),
405        other => Err(unsupported_arch(other)),
406    }
407}
408
409// ── x86_64 relocations ─────────────────────────────────────────────────────
410
411fn reloc_x86_64(mmap: &mut [u8], off: usize, patch: u64, target: u64, addend: i64, r_type: u32) -> crate::Result<()> {
412    use object::elf::*;
413    match r_type {
414        // S + A - P, 32-bit PC-relative
415        R_X86_64_PC32 | R_X86_64_PLT32 | R_X86_64_GOTPCRELX | R_X86_64_REX_GOTPCRELX => {
416            let v = (target as i64 + addend - patch as i64) as i32;
417            mmap[off..off + 4].copy_from_slice(&v.to_le_bytes());
418        }
419        // S + A, signed 32-bit
420        R_X86_64_32S => {
421            let v = (target as i64 + addend) as i32;
422            mmap[off..off + 4].copy_from_slice(&v.to_le_bytes());
423        }
424        // S + A, unsigned 32-bit
425        R_X86_64_32 => {
426            let v = (target as i64 + addend) as u32;
427            mmap[off..off + 4].copy_from_slice(&v.to_le_bytes());
428        }
429        // S + A, 64-bit
430        R_X86_64_64 => {
431            let v = (target as i64 + addend) as u64;
432            mmap[off..off + 8].copy_from_slice(&v.to_le_bytes());
433        }
434        _ => return Err(unsupported_reloc("x86_64", r_type)),
435    }
436    Ok(())
437}
438
439// ── aarch64 relocations ────────────────────────────────────────────────────
440
441fn reloc_aarch64(
442    mmap: &mut [u8],
443    off: usize,
444    patch: u64,
445    target: u64,
446    addend: i64,
447    r_type: u32,
448    state: &mut RelocState,
449) -> crate::Result<()> {
450    use object::elf::*;
451    match r_type {
452        // 26-bit PC-relative branch: (S+A-P)>>2, encoded in [25:0]
453        // Range: ±128 MiB. Use a veneer when the target is out of range.
454        R_AARCH64_CALL26 | R_AARCH64_JUMP26 => {
455            let dest = (target as i64).wrapping_add(addend);
456            let offset = dest.wrapping_sub(patch as i64);
457            let final_offset = if !(-CALL26_MAX..=CALL26_MAX).contains(&offset) {
458                let mmap_base = patch - off as u64;
459                let veneer_off = state.veneers.get_or_create(mmap, dest as u64);
460                mmap_base as i64 + veneer_off as i64 - patch as i64
461            } else {
462                offset
463            };
464            let imm26 = (final_offset >> 2) as u32 & 0x03FF_FFFF;
465            patch_insn(mmap, off, 0xFC00_0000, imm26);
466        }
467        // ADRP page-relative: ((S+A) & ~0xFFF) - (P & ~0xFFF), split into immlo[30:29] immhi[23:5]
468        R_AARCH64_ADR_PREL_PG_HI21 => {
469            let page_delta = ((target as i64 + addend) & !0xFFF) - (patch as i64 & !0xFFF);
470            let imm = (page_delta >> 12) as u32;
471            patch_insn(mmap, off, 0x9F00_001F, ((imm & 0x3) << 29) | (((imm >> 2) & 0x7FFFF) << 5));
472        }
473        // Low 12-bit page offset for ADD/LDR/STR: (S+A) & 0xFFF, shifted by access size
474        R_AARCH64_ADD_ABS_LO12_NC
475        | R_AARCH64_LDST8_ABS_LO12_NC
476        | R_AARCH64_LDST16_ABS_LO12_NC
477        | R_AARCH64_LDST32_ABS_LO12_NC
478        | R_AARCH64_LDST64_ABS_LO12_NC
479        | R_AARCH64_LDST128_ABS_LO12_NC => {
480            let shift = match r_type {
481                R_AARCH64_LDST16_ABS_LO12_NC => 1,
482                R_AARCH64_LDST32_ABS_LO12_NC => 2,
483                R_AARCH64_LDST64_ABS_LO12_NC => 3,
484                R_AARCH64_LDST128_ABS_LO12_NC => 4,
485                _ => 0,
486            };
487            let imm12 = (((target as i64 + addend) as u32) & 0xFFF) >> shift;
488            patch_insn(mmap, off, 0xFFC0_03FF, imm12 << 10);
489        }
490        _ => return Err(unsupported_reloc("aarch64", r_type)),
491    }
492    Ok(())
493}
494
495// ── RISC-V relocations ─────────────────────────────────────────────────────
496//
497// Instruction formats referenced below:
498//   U-type (lui, auipc):  imm[31:12] | rd[11:7]  | opcode[6:0]
499//   I-type (jalr, loads):  imm[31:20] | rs1[19:15] | f3[14:12] | rd[11:7]  | opcode[6:0]
500//   S-type (stores):  imm[31:25] | rs2[24:20] | rs1[19:15] | f3[14:12] | imm[11:7] | opcode[6:0]
501//   B-type (branches): imm[31] | imm[30:25] | rs2[24:20] | rs1[19:15] | f3[14:12] | imm[11:8] | imm[7] | opcode[6:0]
502//   J-type (jal):  imm[31] | imm[30:21] | imm[20] | imm[19:12] | rd[11:7] | opcode[6:0]
503
504const RV_U_MASK: u32 = 0x0000_0FFF; // preserve rd + opcode
505const RV_I_MASK: u32 = 0x000F_FFFF; // preserve rs1 + f3 + rd + opcode
506const RV_S_MASK: u32 = 0x01FF_F07F; // preserve rs2 + rs1 + f3 + opcode
507
508fn reloc_riscv64(
509    mmap: &mut [u8],
510    off: usize,
511    patch: u64,
512    target: u64,
513    addend: i64,
514    r_type: u32,
515    state: &mut RelocState,
516) -> crate::Result<()> {
517    use object::elf::*;
518    match r_type {
519        // auipc+jalr pair: S+A-P split into hi20 (U-type) + lo12 (I-type)
520        R_RISCV_CALL | R_RISCV_CALL_PLT => {
521            let v = target as i64 + addend - patch as i64;
522            let hi = ((v + 0x800) >> 12) as u32;
523            let lo = (v as u32) & 0xFFF;
524            patch_insn(mmap, off, RV_U_MASK, hi << 12);
525            patch_insn(mmap, off + 4, RV_I_MASK, lo << 20);
526        }
527        // PC-relative high 20 bits (auipc) — store full value for paired LO12
528        R_RISCV_PCREL_HI20 => {
529            let v = target as i64 + addend - patch as i64;
530            let hi = ((v + 0x800) >> 12) as u32;
531            patch_insn(mmap, off, RV_U_MASK, hi << 12);
532            state.pcrel_hi.insert(patch, v);
533        }
534        // PC-relative low 12 bits (I-type), paired with PCREL_HI20
535        R_RISCV_PCREL_LO12_I => {
536            let full = *state.pcrel_hi.get(&target).ok_or_else(|| crate::Error::JitCompilation {
537                reason: format!("PCREL_LO12_I: no paired HI20 at {target:#x}"),
538            })?;
539            patch_insn(mmap, off, RV_I_MASK, ((full as u32) & 0xFFF) << 20);
540        }
541        // PC-relative low 12 bits (S-type), paired with PCREL_HI20
542        R_RISCV_PCREL_LO12_S => {
543            let full = *state.pcrel_hi.get(&target).ok_or_else(|| crate::Error::JitCompilation {
544                reason: format!("PCREL_LO12_S: no paired HI20 at {target:#x}"),
545            })?;
546            let lo = (full as u32) & 0xFFF;
547            patch_insn(mmap, off, RV_S_MASK, ((lo >> 5) << 25) | ((lo & 0x1F) << 7));
548        }
549        // Absolute high 20 bits (lui)
550        R_RISCV_HI20 => {
551            let v = (target as i64 + addend) as u32;
552            patch_insn(mmap, off, RV_U_MASK, v.wrapping_add(0x800) & 0xFFFF_F000);
553        }
554        // Absolute low 12 bits (I-type)
555        R_RISCV_LO12_I => {
556            let lo = ((target as i64 + addend) as u32) & 0xFFF;
557            patch_insn(mmap, off, RV_I_MASK, lo << 20);
558        }
559        // Absolute low 12 bits (S-type)
560        R_RISCV_LO12_S => {
561            let lo = ((target as i64 + addend) as u32) & 0xFFF;
562            patch_insn(mmap, off, RV_S_MASK, ((lo >> 5) << 25) | ((lo & 0x1F) << 7));
563        }
564        // 12-bit PC-relative branch (B-type)
565        R_RISCV_BRANCH => {
566            let v = (target as i64 + addend - patch as i64) as u32;
567            let bits = ((v >> 12) & 1) << 31 | ((v >> 5) & 0x3F) << 25 | ((v >> 1) & 0xF) << 8 | ((v >> 11) & 1) << 7;
568            patch_insn(mmap, off, RV_S_MASK, bits);
569        }
570        // 20-bit PC-relative jump (J-type)
571        R_RISCV_JAL => {
572            let v = (target as i64 + addend - patch as i64) as u32;
573            let bits =
574                ((v >> 20) & 1) << 31 | ((v >> 1) & 0x3FF) << 21 | ((v >> 11) & 1) << 20 | ((v >> 12) & 0xFF) << 12;
575            patch_insn(mmap, off, RV_U_MASK, bits);
576        }
577        // Data relocations
578        R_RISCV_64 => {
579            let v = (target as i64 + addend) as u64;
580            mmap[off..off + 8].copy_from_slice(&v.to_le_bytes());
581        }
582        R_RISCV_32 => {
583            let v = (target as i64 + addend) as u32;
584            mmap[off..off + 4].copy_from_slice(&v.to_le_bytes());
585        }
586        // Linker relaxation hint — skip
587        R_RISCV_RELAX => {}
588        _ => return Err(unsupported_reloc("riscv64", r_type)),
589    }
590    Ok(())
591}
592
593// ── LoongArch relocations ──────────────────────────────────────────────────
594//
595// Instruction formats referenced below:
596//   1RI20 (pcalau12i): opcode[31:25] | si20[24:5] | rd[4:0]
597//   2RI12 (addi/ld/st): opcode[31:22] | si12[21:10] | rj[9:5] | rd[4:0]
598//   I26   (b/bl):       opcode[31:26] | offs[15:0 in 25:10] | offs[25:16 in 9:0]
599
600fn reloc_loongarch64(
601    mmap: &mut [u8],
602    off: usize,
603    patch: u64,
604    target: u64,
605    addend: i64,
606    r_type: u32,
607) -> crate::Result<()> {
608    use object::elf::*;
609    match r_type {
610        // 26-bit PC-relative branch (B/BL)
611        R_LARCH_B26 => {
612            let offs = ((target as i64 + addend - patch as i64) >> 2) as u32;
613            patch_insn(mmap, off, 0xFC00_0000, ((offs & 0xFFFF) << 10) | ((offs >> 16) & 0x3FF));
614        }
615        // PC-aligned page high 20 bits (pcalau12i): si20 in [24:5]
616        R_LARCH_PCALA_HI20 => {
617            let page_delta = ((target as i64 + addend + 0x800) >> 12) - (patch as i64 >> 12);
618            patch_insn(mmap, off, 0xFE00_001F, ((page_delta as u32) & 0xF_FFFF) << 5);
619        }
620        // Low 12 bits (2RI12 format): si12 in [21:10]
621        R_LARCH_PCALA_LO12 => {
622            let lo12 = ((target as i64 + addend) as u32) & 0xFFF;
623            patch_insn(mmap, off, 0xFFC0_03FF, lo12 << 10);
624        }
625        // Data relocations
626        R_LARCH_64 => {
627            let v = (target as i64 + addend) as u64;
628            mmap[off..off + 8].copy_from_slice(&v.to_le_bytes());
629        }
630        R_LARCH_32 => {
631            let v = (target as i64 + addend) as u32;
632            mmap[off..off + 4].copy_from_slice(&v.to_le_bytes());
633        }
634        // Linker relaxation hint — skip
635        R_LARCH_RELAX => {}
636        _ => return Err(unsupported_reloc("loongarch64", r_type)),
637    }
638    Ok(())
639}
640
641// ── PPC64 relocations ───────────────────────────────────────────────────────
642//
643// PPC64 ELFv2 ABI (little-endian). Instructions are 32-bit, stored LE on PPC64LE.
644//   I-form  (b/bl):    OPCD[31:26] | LI[25:2] | AA[1] | LK[0]
645//   D-form  (addi/lwz): OPCD[31:26] | RT[25:21] | RA[20:16] | D[15:0]
646//   DS-form (ld/std):   OPCD[31:26] | RT[25:21] | RA[20:16] | DS[15:2] | XO[1:0]
647
648fn reloc_ppc64(
649    mmap: &mut [u8],
650    off: usize,
651    patch: u64,
652    target: u64,
653    addend: i64,
654    r_type: u32,
655    state: &mut RelocState,
656) -> crate::Result<()> {
657    use object::elf::*;
658    match r_type {
659        // 24-bit PC-relative branch (bl): LI in [25:2]
660        R_PPC64_REL24 => {
661            let li = ((target as i64 + addend - patch as i64) >> 2) as u32 & 0x00FF_FFFF;
662            patch_insn(mmap, off, 0xFC00_0003, li << 2);
663        }
664        // 16-bit TOC-relative, high adjusted: ha16(S + A - .TOC.)
665        R_PPC64_TOC16_HA => {
666            let toc = toc_base(state)?;
667            let v = target as i64 + addend - toc as i64;
668            let ha = (((v >> 16) as u32).wrapping_add((v as u32 >> 15) & 1)) & 0xFFFF;
669            patch_insn(mmap, off, 0xFFFF_0000, ha);
670        }
671        // 16-bit TOC-relative, low: lo16(S + A - .TOC.)
672        R_PPC64_TOC16_LO => {
673            let toc = toc_base(state)?;
674            let lo = ((target as i64 + addend - toc as i64) as u32) & 0xFFFF;
675            patch_insn(mmap, off, 0xFFFF_0000, lo);
676        }
677        // 16-bit TOC-relative, low DS-form: lo16(S + A - .TOC.) with bits [1:0] preserved
678        R_PPC64_TOC16_LO_DS => {
679            let toc = toc_base(state)?;
680            let lo = ((target as i64 + addend - toc as i64) as u32) & 0xFFFC;
681            patch_insn(mmap, off, 0xFFFF_0003, lo);
682        }
683        // 32-bit PC-relative
684        R_PPC64_REL32 => {
685            let v = (target as i64 + addend - patch as i64) as i32;
686            mmap[off..off + 4].copy_from_slice(&v.to_le_bytes());
687        }
688        // Data relocations
689        R_PPC64_ADDR64 => {
690            let v = (target as i64 + addend) as u64;
691            mmap[off..off + 8].copy_from_slice(&v.to_le_bytes());
692        }
693        R_PPC64_ADDR32 => {
694            let v = (target as i64 + addend) as u32;
695            mmap[off..off + 4].copy_from_slice(&v.to_le_bytes());
696        }
697        // nop (ori 0,0,0) after bl — linker may rewrite for TOC restore; we skip
698        R_PPC64_TOC16 | R_PPC64_TOC16_HI => {
699            let toc = toc_base(state)?;
700            let v = target as i64 + addend - toc as i64;
701            let bits = match r_type {
702                R_PPC64_TOC16 => (v as u32) & 0xFFFF,
703                R_PPC64_TOC16_HI => ((v >> 16) as u32) & 0xFFFF,
704                _ => unreachable!(),
705            };
706            patch_insn(mmap, off, 0xFFFF_0000, bits);
707        }
708        _ => return Err(unsupported_reloc("ppc64", r_type)),
709    }
710    Ok(())
711}
712
713fn toc_base(state: &RelocState) -> crate::Result<u64> {
714    state.toc_base.ok_or_else(|| crate::Error::JitCompilation {
715        reason: "PPC64 TOC relocation but no .TOC. symbol found in ELF".to_string(),
716    })
717}
718
719// ── Helpers ─────────────────────────────────────────────────────────────────
720
721/// Read-modify-write a 32-bit LE instruction: `insn = (insn & mask) | bits`.
722fn patch_insn(mmap: &mut [u8], off: usize, mask: u32, bits: u32) {
723    let insn = u32::from_le_bytes(mmap[off..off + 4].try_into().unwrap());
724    mmap[off..off + 4].copy_from_slice(&((insn & mask) | bits).to_le_bytes());
725}
726
727fn unsupported_reloc(arch: &str, r_type: u32) -> crate::Error {
728    crate::Error::JitCompilation { reason: format!("Unsupported {arch} relocation type: {r_type}") }
729}
730
731fn unsupported_arch(arch: Architecture) -> crate::Error {
732    crate::Error::JitCompilation { reason: format!("Unsupported ELF architecture: {arch:?}") }
733}
734
735/// Look up a symbol's offset within the mmap by name.
736fn find_symbol_offset(
737    elf: &object::File,
738    name: &str,
739    section_offsets: &HashMap<object::SectionIndex, usize>,
740) -> crate::Result<usize> {
741    let prefixed = format!("_{name}");
742    for symbol in elf.symbols() {
743        let sym_name = symbol.name().unwrap_or("");
744        if (sym_name == name || sym_name == prefixed)
745            && let Some(&sec_offset) = symbol.section_index().and_then(|si| section_offsets.get(&si))
746        {
747            return Ok(sec_offset + symbol.address() as usize);
748        }
749    }
750    Err(crate::Error::FunctionNotFound { name: name.to_string() })
751}
752
753/// Count unique external symbols referenced by CALL26/JUMP26 in an aarch64 ELF.
754/// Used to pre-allocate veneer space before mmap.
755fn count_aarch64_external_calls(elf: &object::File, section_offsets: &HashMap<object::SectionIndex, usize>) -> usize {
756    use std::collections::HashSet;
757    let mut external = HashSet::new();
758    for section in elf.sections() {
759        if !matches!(section.kind(), SectionKind::Text | SectionKind::Data | SectionKind::ReadOnlyData) {
760            continue;
761        }
762        for (_, reloc) in section.relocations() {
763            let r_type = match reloc.flags() {
764                RelocationFlags::Elf { r_type } => r_type,
765                _ => continue,
766            };
767            if r_type != object::elf::R_AARCH64_CALL26 && r_type != object::elf::R_AARCH64_JUMP26 {
768                continue;
769            }
770            if let object::RelocationTarget::Symbol(sym_idx) = reloc.target()
771                && let Ok(sym) = elf.symbol_by_index(sym_idx)
772                && sym.section_index().and_then(|si| section_offsets.get(&si)).is_none()
773            {
774                external.insert(sym_idx);
775            }
776        }
777    }
778    external.len()
779}
780
781/// Resolve an external symbol (e.g. `sqrtf`, `expf`) via dlsym at runtime.
782fn resolve_symbol(name: &str) -> crate::Result<u64> {
783    let cname = std::ffi::CString::new(name)
784        .map_err(|e| crate::Error::JitCompilation { reason: format!("Invalid symbol name: {e}") })?;
785    let ptr = unsafe { libc::dlsym(libc::RTLD_DEFAULT, cname.as_ptr()) };
786    if ptr.is_null() {
787        return Err(crate::Error::JitCompilation { reason: format!("Cannot resolve symbol: {name}") });
788    }
789    Ok(ptr as u64)
790}
791
792#[cfg(test)]
793#[path = "test/unit/jit_loader.rs"]
794mod tests;