asmkit/core/
buffer.rs

1use alloc::{borrow::Cow, collections::BinaryHeap, vec::Vec};
2
3use smallvec::SmallVec;
4
5use crate::{
6    riscv::{self},
7    AsmError,
8};
9
10use super::{
11    jit_allocator::{JitAllocator, Span},
12    operand::{Label, Sym},
13    target::Environment,
14};
15
16/// A buffer of output to be produced, fixed up, and then emitted to a CodeSink
17/// in bulk.
18///
19/// This struct uses `SmallVec`s to support small-ish function bodies without
20/// any heap allocation. As such, it will be several kilobytes large. This is
21/// likely fine as long as it is stack-allocated for function emission then
22/// thrown away; but beware if many buffer objects are retained persistently.
23#[derive(Default)]
24pub struct CodeBuffer {
25    env: Environment,
26    data: SmallVec<[u8; 1024]>,
27    relocs: SmallVec<[AsmReloc; 16]>,
28    symbols: SmallVec<[SymData; 16]>,
29    label_offsets: SmallVec<[CodeOffset; 16]>,
30    pending_fixup_records: SmallVec<[AsmFixup; 16]>,
31    pending_fixup_deadline: u32,
32    pending_constants: SmallVec<[Constant; 16]>,
33    pending_constants_size: CodeOffset,
34    used_constants: SmallVec<[(Constant, CodeOffset); 4]>,
35    constants: SmallVec<[(ConstantData, AsmConstant); 4]>,
36    fixup_records: BinaryHeap<AsmFixup>,
37}
38
39#[derive(Clone, PartialEq, Eq, Hash)]
40pub enum ExternalName {
41    Symbol(Cow<'static, str>),
42    UserName(u32),
43}
44#[derive(Clone, PartialEq, Eq, Hash, Debug)]
45pub enum RelocTarget {
46    Sym(Sym),
47    Label(Label),
48}
49
50#[derive(Copy, Clone, PartialEq, Eq)]
51pub enum RelocDistance {
52    Near,
53    Far,
54}
55
56pub(crate) struct SymData {
57    name: ExternalName,
58    distance: RelocDistance,
59}
60
61/// A relocation resulting from emitting assembly.
62#[derive(Clone, PartialEq, Eq, Hash, Debug)]
63pub struct AsmReloc {
64    pub offset: CodeOffset,
65    pub kind: Reloc,
66    pub addend: i64,
67    pub target: RelocTarget,
68}
69/// A fixup to perform on the buffer once code is emitted.
70/// Fixups always refer to labels and patch the code based on label offsets.
71/// Hence, they are like relocations, but internal to one buffer.
72#[derive(Clone, Copy, PartialEq, PartialOrd, Ord, Eq)]
73pub struct AsmFixup {
74    pub label: Label,
75    pub offset: CodeOffset,
76    pub kind: LabelUse,
77}
78
79/// Metadata about a constant.
80#[derive(Clone, Copy)]
81struct AsmConstant {
82    /// A label which has not yet been bound which can be used for this
83    /// constant.
84    ///
85    /// This is lazily created when a label is requested for a constant and is
86    /// cleared when a constant is emitted.
87    upcoming_label: Option<Label>,
88    /// Required alignment.
89    align: CodeOffset,
90    /// The byte size of this constant.
91    size: usize,
92}
93
94/// A `CodeBuffer` once emission is completed: holds generated code and records,
95/// without fixups. This allows the type to be independent of the backend.
96pub struct CodeBufferFinalized {
97    pub(crate) data: SmallVec<[u8; 1024]>,
98    pub(crate) relocs: SmallVec<[AsmReloc; 16]>,
99    pub(crate) symbols: SmallVec<[SymData; 16]>,
100    pub(crate) alignment: u32,
101}
102
103impl CodeBufferFinalized {
104    pub fn total_size(&self) -> usize {
105        self.data.len()
106    }
107
108    pub fn data(&self) -> &[u8] {
109        &self.data[..]
110    }
111
112    pub fn data_mut(&mut self) -> &mut [u8] {
113        &mut self.data[..]
114    }
115
116    pub fn symbol_name(&self, sym: Sym) -> &ExternalName {
117        &self.symbols[sym.id() as usize].name
118    }
119
120    pub fn symbol_distance(&self, sym: Sym) -> RelocDistance {
121        self.symbols[sym.id() as usize].distance
122    }
123
124    pub fn relocs(&self) -> &[AsmReloc] {
125        &self.relocs[..]
126    }
127
128    pub fn alignment(&self) -> u32 {
129        self.alignment
130    }
131
132    pub fn allocate(&self, jit_allocator: &mut JitAllocator) -> Result<Span, AsmError> {
133        let mut span = jit_allocator.alloc(self.data().len())?;
134
135        unsafe {
136            jit_allocator.write(&mut span, |span| {
137                span.rw()
138                    .copy_from_nonoverlapping(self.data().as_ptr(), self.data().len());
139            })?;
140        }
141
142        Ok(span)
143    }
144}
145
146impl CodeBuffer {
147    pub fn new() -> Self {
148        Self::default()
149    }
150
151    pub fn with_env(env: Environment) -> Self {
152        Self {
153            env,
154            ..Default::default()
155        }
156    }
157
158    pub fn clear(&mut self) {
159        self.data.clear();
160        self.relocs.clear();
161        self.label_offsets.clear();
162        self.pending_fixup_records.clear();
163    }
164    pub fn env(&self) -> &Environment {
165        &self.env
166    }
167
168    pub fn data(&self) -> &[u8] {
169        &self.data
170    }
171
172    pub fn data_mut(&mut self) -> &mut [u8] {
173        &mut self.data
174    }
175
176    pub fn relocs(&self) -> &[AsmReloc] {
177        &self.relocs
178    }
179
180    pub fn put1(&mut self, value: u8) {
181        self.data.push(value);
182    }
183
184    pub fn put2(&mut self, value: u16) {
185        self.data.extend_from_slice(&value.to_ne_bytes());
186    }
187
188    pub fn put4(&mut self, value: u32) {
189        self.data.extend_from_slice(&value.to_ne_bytes());
190    }
191
192    pub fn put8(&mut self, value: u64) {
193        self.data.extend_from_slice(&value.to_ne_bytes());
194    }
195
196    pub fn write_u8(&mut self, value: u8) {
197        self.data.push(value);
198    }
199
200    pub fn write_u16(&mut self, value: u16) {
201        self.data.extend_from_slice(&value.to_ne_bytes());
202    }
203
204    pub fn write_u32(&mut self, value: u32) {
205        self.data.extend_from_slice(&value.to_ne_bytes());
206    }
207
208    pub fn write_u64(&mut self, value: u64) {
209        self.data.extend_from_slice(&value.to_ne_bytes());
210    }
211
212    pub fn add_symbol(&mut self, name: impl Into<ExternalName>, distance: RelocDistance) -> Sym {
213        let ix = self.symbols.len();
214        self.symbols.push(SymData {
215            distance,
216            name: name.into(),
217        });
218
219        Sym::from_id(ix as u32)
220    }
221
222    pub fn symbol_distance(&self, sym: Sym) -> RelocDistance {
223        self.symbols[sym.id() as usize].distance
224    }
225
226    pub fn symbol_name(&self, sym: Sym) -> &ExternalName {
227        &self.symbols[sym.id() as usize].name
228    }
229
230    pub fn get_label(&mut self) -> Label {
231        let l = self.label_offsets.len();
232        self.label_offsets.push(u32::MAX);
233        Label::from_id(l as _)
234    }
235
236    pub fn get_label_for_constant(&mut self, constant: Constant) -> Label {
237        let (
238            _,
239            AsmConstant {
240                upcoming_label,
241                align: _,
242                size,
243            },
244        ) = self.constants[constant.0 as usize];
245        if let Some(label) = upcoming_label {
246            return label;
247        }
248
249        let label = self.get_label();
250        self.pending_constants.push(constant);
251        self.pending_constants_size += size as u32;
252        self.constants[constant.0 as usize].1.upcoming_label = Some(label);
253        label
254    }
255
256    pub fn add_constant(&mut self, constant: impl Into<ConstantData>) -> Constant {
257        let c = self.constants.len() as u32;
258        let data = constant.into();
259        let x = AsmConstant {
260            upcoming_label: None,
261            align: data.alignment() as _,
262            size: data.as_slice().len(),
263        };
264        self.constants.push((data, x));
265        Constant(c)
266    }
267
268    pub fn use_label_at_offset(&mut self, offset: CodeOffset, label: Label, kind: LabelUse) {
269        let fixup = AsmFixup {
270            kind,
271            label,
272            offset,
273        };
274
275        self.pending_fixup_records.push(fixup);
276    }
277
278    /// Align up to the given alignment.
279    pub fn align_to(&mut self, align_to: CodeOffset) {
280        assert!(
281            align_to.is_power_of_two(),
282            "{align_to} is not a power of two"
283        );
284        while self.cur_offset() & (align_to - 1) != 0 {
285            self.write_u8(0);
286        }
287
288        // Post-invariant: as for `put1()`.
289    }
290
291    pub fn cur_offset(&self) -> CodeOffset {
292        self.data.len() as _
293    }
294
295    pub fn bind_label(&mut self, label: Label) {
296        self.label_offsets[label.id() as usize] = self.cur_offset();
297    }
298
299    pub fn label_offset(&self, label: Label) -> u32 {
300        self.label_offsets[label.id() as usize]
301    }
302
303    pub fn add_reloc(&mut self, kind: Reloc, target: RelocTarget, addend: i64) {
304        let offset = self.cur_offset();
305        self.add_reloc_at_offset(offset, kind, target, addend);
306    }
307
308    pub fn add_reloc_at_offset(
309        &mut self,
310        offset: CodeOffset,
311        kind: Reloc,
312        target: RelocTarget,
313        addend: i64,
314    ) {
315        self.relocs.push(AsmReloc {
316            addend,
317            kind,
318            offset,
319            target,
320        })
321    }
322
323    fn handle_fixup(&mut self, fixup: AsmFixup) {
324        let AsmFixup {
325            kind,
326            label,
327            offset,
328        } = fixup;
329        let start = offset as u32;
330        let end = offset as usize + kind.patch_size();
331
332        let label_offset = self.label_offsets[label.id() as usize];
333        if label_offset != u32::MAX {
334            let veneer_required = if label_offset >= offset {
335                false
336            } else {
337                (offset - label_offset) > kind.max_neg_range()
338            };
339
340            if veneer_required {
341                self.emit_veneer(label, offset, kind);
342            } else {
343                let slice = &mut self.data[start as usize..end as usize];
344
345                kind.patch(slice, start, label_offset);
346            }
347        } else {
348            // If the offset of this label is not known at this time then
349            // that means that a veneer is required because after this
350            // island the target can't be in range of the original target.
351            self.emit_veneer(label, offset, kind);
352        }
353    }
354
355    /// Emits a "veneer" the `kind` code at `offset` to jump to `label`.
356    ///
357    /// This will generate extra machine code, using `kind`, to get a
358    /// larger-jump-kind than `kind` allows. The code at `offset` is then
359    /// patched to jump to our new code, and then the new code is enqueued for
360    /// a fixup to get processed at some later time.
361    pub fn emit_veneer(&mut self, label: Label, offset: CodeOffset, kind: LabelUse) {
362        // If this `kind` doesn't support a veneer then that's a bug in the
363        // backend because we need to implement support for such a veneer.
364        assert!(
365            kind.supports_veneer(),
366            "jump beyond the range of {kind:?} but a veneer isn't supported",
367        );
368
369        self.align_to(kind.align() as _);
370        let veneer_offset = self.cur_offset();
371        let start = offset as usize;
372        let end = (offset + kind.patch_size() as u32) as usize;
373        let slice = &mut self.data[start..end];
374
375        kind.patch(slice, offset, veneer_offset);
376        let veneer_slice = self.get_appended_space(kind.veneer_size() as usize);
377        let (veneer_fixup_off, veneer_label_use) =
378            kind.generate_veneer(veneer_slice, veneer_offset);
379
380        // Register a new use of `label` with our new veneer fixup and
381        // offset. This'll recalculate deadlines accordingly and
382        // enqueue this fixup to get processed at some later
383        // time.
384        self.use_label_at_offset(veneer_fixup_off, label, veneer_label_use);
385    }
386
387    /// Reserve appended space and return a mutable slice referring to it.
388    pub fn get_appended_space(&mut self, len: usize) -> &mut [u8] {
389        let off = self.data.len();
390        let new_len = self.data.len() + len;
391        self.data.resize(new_len, 0);
392        &mut self.data[off..]
393
394        // Post-invariant: as for `put1()`.
395    }
396
397    /// Returns the maximal offset that islands can reach if `distance` more
398    /// bytes are appended.
399    ///
400    /// This is used to determine if veneers need insertions since jumps that
401    /// can't reach past this point must get a veneer of some form.
402    fn worst_case_end_of_island(&self, distance: CodeOffset) -> CodeOffset {
403        // Assume that all fixups will require veneers and that the veneers are
404        // the worst-case size for each platform. This is an over-generalization
405        // to avoid iterating over the `fixup_records` list or maintaining
406        // information about it as we go along.
407        let island_worst_case_size =
408            ((self.fixup_records.len() + self.pending_fixup_records.len()) as u32) * 20
409                + self.pending_constants_size;
410        self.cur_offset()
411            .saturating_add(distance)
412            .saturating_add(island_worst_case_size)
413    }
414
415    fn should_apply_fixup(&self, fixup: &AsmFixup, forced_threshold: CodeOffset) -> bool {
416        let label_offset = self.label_offset(fixup.label);
417        label_offset != u32::MAX
418            || fixup.offset.saturating_add(fixup.kind.max_pos_range()) < forced_threshold
419    }
420    /// Is an island needed within the next N bytes?
421    pub fn island_needed(&mut self, distance: CodeOffset) -> bool {
422        let deadline = match self.fixup_records.peek() {
423            Some(fixup) => fixup
424                .offset
425                .saturating_add(fixup.kind.max_pos_range())
426                .min(self.pending_fixup_deadline),
427            None => self.pending_fixup_deadline,
428        };
429
430        deadline < u32::MAX && self.worst_case_end_of_island(distance) > deadline
431    }
432
433    /// Emit all pending constants and required pending veneers.
434    pub fn emit_island(&mut self, distance: CodeOffset) {
435        let forced_threshold = self.worst_case_end_of_island(distance);
436
437        for constant in core::mem::take(&mut self.pending_constants) {
438            let (_, AsmConstant { align, size, .. }) = self.constants[constant.0 as usize];
439            let label = self.constants[constant.0 as usize]
440                .1
441                .upcoming_label
442                .take()
443                .unwrap();
444            self.align_to(align as _);
445            self.bind_label(label);
446            self.used_constants.push((constant, self.cur_offset()));
447            self.get_appended_space(size);
448        }
449        // Either handle all pending fixups because they're ready or move them
450        // onto the `BinaryHeap` tracking all pending fixups if they aren't
451        // ready.
452        for fixup in core::mem::take(&mut self.pending_fixup_records) {
453            if self.should_apply_fixup(&fixup, forced_threshold) {
454                self.handle_fixup(fixup);
455            } else {
456                self.fixup_records.push(fixup);
457            }
458        }
459
460        self.pending_fixup_deadline = u32::MAX;
461
462        while let Some(fixup) = self.fixup_records.peek() {
463            // If this fixup shouldn't be applied, that means its label isn't
464            // defined yet and there'll be remaining space to apply a veneer if
465            // necessary in the future after this island. In that situation
466            // because `fixup_records` is sorted by deadline this loop can
467            // exit.
468            if !self.should_apply_fixup(fixup, forced_threshold) {
469                break;
470            }
471            let fixup = self.fixup_records.pop().unwrap();
472            self.handle_fixup(fixup);
473        }
474    }
475
476    fn finish_emission_maybe_forcing_veneers(&mut self) {
477        while !self.pending_constants.is_empty()
478            || !self.pending_fixup_records.is_empty()
479            || !self.fixup_records.is_empty()
480        {
481            // `emit_island()` will emit any pending veneers and constants, and
482            // as a side-effect, will also take care of any fixups with resolved
483            // labels eagerly.
484            self.emit_island(u32::MAX);
485        }
486    }
487
488    fn finish_constants(&mut self) -> u32 {
489        let mut alignment = 32;
490
491        for (constant, offset) in core::mem::take(&mut self.used_constants) {
492            let constant = &self.constants[constant.0 as usize].0;
493            let data = constant.as_slice();
494            self.data[offset as usize..][..data.len()].copy_from_slice(data);
495            alignment = constant.alignment().max(alignment);
496        }
497
498        alignment as _
499    }
500
501    pub fn finish(mut self) -> CodeBufferFinalized {
502        self.finish_emission_maybe_forcing_veneers();
503        let alignment = self.finish_constants();
504        CodeBufferFinalized {
505            data: self.data,
506            relocs: self.relocs,
507            symbols: self.symbols,
508            alignment,
509        }
510    }
511}
512
513#[derive(Clone, PartialEq, Eq, Debug, Hash)]
514pub enum ConstantData {
515    WellKnown(&'static [u8]),
516    U64([u8; 8]),
517    Bytes(Vec<u8>),
518}
519
520impl ConstantData {
521    pub fn as_slice(&self) -> &[u8] {
522        match self {
523            ConstantData::WellKnown(data) => data,
524            ConstantData::U64(data) => data.as_ref(),
525            ConstantData::Bytes(data) => data,
526        }
527    }
528
529    pub fn alignment(&self) -> usize {
530        if self.as_slice().len() <= 8 {
531            8
532        } else {
533            16
534        }
535    }
536}
537
538/// A use of a constant by one or mroe assembly instructions.
539#[derive(Clone, Copy, Debug, PartialEq, Eq)]
540pub struct Constant(pub(crate) u32);
541
542impl From<&'static str> for ConstantData {
543    fn from(value: &'static str) -> Self {
544        Self::WellKnown(value.as_bytes())
545    }
546}
547
548impl From<[u8; 8]> for ConstantData {
549    fn from(value: [u8; 8]) -> Self {
550        Self::U64(value)
551    }
552}
553
554impl From<Vec<u8>> for ConstantData {
555    fn from(value: Vec<u8>) -> Self {
556        Self::Bytes(value)
557    }
558}
559
560impl From<&'static [u8]> for ConstantData {
561    fn from(value: &'static [u8]) -> Self {
562        Self::WellKnown(value)
563    }
564}
565
566impl From<u64> for ConstantData {
567    fn from(value: u64) -> Self {
568        Self::U64(value.to_ne_bytes())
569    }
570}
571
572/// Offset in bytes from the beginning of the function.
573///
574/// Cranelift can be used as a cross compiler, so we don't want to use a type like `usize` which
575/// depends on the *host* platform, not the *target* platform.
576pub type CodeOffset = u32;
577
578/// Addend to add to the symbol value.
579pub type Addend = i64;
580
581/// Relocation kinds for every ISA
582#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
583pub enum Reloc {
584    /// absolute 4-byte
585    Abs4,
586    /// absolute 8-byte
587    Abs8,
588    /// x86 PC-relative 4-byte
589    X86PCRel4,
590    /// x86 call to PC-relative 4-byte
591    X86CallPCRel4,
592    /// x86 call to PLT-relative 4-byte
593    X86CallPLTRel4,
594    /// x86 GOT PC-relative 4-byte
595    X86GOTPCRel4,
596    /// The 32-bit offset of the target from the beginning of its section.
597    /// Equivalent to `IMAGE_REL_AMD64_SECREL`.
598    /// See: [PE Format](https://docs.microsoft.com/en-us/windows/win32/debug/pe-format)
599    X86SecRel,
600    /// Arm32 call target
601    Arm32Call,
602    /// Arm64 call target. Encoded as bottom 26 bits of instruction. This
603    /// value is sign-extended, multiplied by 4, and added to the PC of
604    /// the call instruction to form the destination address.
605    Arm64Call,
606
607    /// Elf x86_64 32 bit signed PC relative offset to two GOT entries for GD symbol.
608    ElfX86_64TlsGd,
609
610    /// Mach-O x86_64 32 bit signed PC relative offset to a `__thread_vars` entry.
611    MachOX86_64Tlv,
612
613    /// Mach-O Aarch64 TLS
614    /// PC-relative distance to the page of the TLVP slot.
615    MachOAarch64TlsAdrPage21,
616
617    /// Mach-O Aarch64 TLS
618    /// Offset within page of TLVP slot.
619    MachOAarch64TlsAdrPageOff12,
620
621    /// Aarch64 TLSDESC Adr Page21
622    /// This is equivalent to `R_AARCH64_TLSDESC_ADR_PAGE21` in the [aaelf64](https://github.com/ARM-software/abi-aa/blob/2bcab1e3b22d55170c563c3c7940134089176746/aaelf64/aaelf64.rst#57105thread-local-storage-descriptors)
623    Aarch64TlsDescAdrPage21,
624
625    /// Aarch64 TLSDESC Ld64 Lo12
626    /// This is equivalent to `R_AARCH64_TLSDESC_LD64_LO12` in the [aaelf64](https://github.com/ARM-software/abi-aa/blob/2bcab1e3b22d55170c563c3c7940134089176746/aaelf64/aaelf64.rst#57105thread-local-storage-descriptors)
627    Aarch64TlsDescLd64Lo12,
628
629    /// Aarch64 TLSDESC Add Lo12
630    /// This is equivalent to `R_AARCH64_TLSGD_ADD_LO12` in the [aaelf64](https://github.com/ARM-software/abi-aa/blob/2bcab1e3b22d55170c563c3c7940134089176746/aaelf64/aaelf64.rst#57105thread-local-storage-descriptors)
631    Aarch64TlsDescAddLo12,
632
633    /// Aarch64 TLSDESC Call
634    /// This is equivalent to `R_AARCH64_TLSDESC_CALL` in the [aaelf64](https://github.com/ARM-software/abi-aa/blob/2bcab1e3b22d55170c563c3c7940134089176746/aaelf64/aaelf64.rst#57105thread-local-storage-descriptors)
635    Aarch64TlsDescCall,
636
637    /// AArch64 GOT Page
638    /// Set the immediate value of an ADRP to bits 32:12 of X; check that –2^32 <= X < 2^32
639    /// This is equivalent to `R_AARCH64_ADR_GOT_PAGE` (311) in the  [aaelf64](https://github.com/ARM-software/abi-aa/blob/2bcab1e3b22d55170c563c3c7940134089176746/aaelf64/aaelf64.rst#static-aarch64-relocations)
640    Aarch64AdrGotPage21,
641
642    /// AArch64 GOT Low bits
643
644    /// Set the LD/ST immediate field to bits 11:3 of X. No overflow check; check that X&7 = 0
645    /// This is equivalent to `R_AARCH64_LD64_GOT_LO12_NC` (312) in the  [aaelf64](https://github.com/ARM-software/abi-aa/blob/2bcab1e3b22d55170c563c3c7940134089176746/aaelf64/aaelf64.rst#static-aarch64-relocations)
646    Aarch64Ld64GotLo12Nc,
647
648    /// RISC-V Absolute address: 64-bit address.
649    RiscvAbs8,
650
651    /// RISC-V Call PLT: 32-bit PC-relative function call, macros call, tail (PIC)
652    ///
653    /// Despite having PLT in the name, this relocation is also used for normal calls.
654    /// The non-PLT version of this relocation has been deprecated.
655    ///
656    /// This is the `R_RISCV_CALL_PLT` relocation from the RISC-V ELF psABI document.
657    /// <https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#procedure-calls>
658    RiscvCallPlt,
659
660    /// RISC-V TLS GD: High 20 bits of 32-bit PC-relative TLS GD GOT reference,
661    ///
662    /// This is the `R_RISCV_TLS_GD_HI20` relocation from the RISC-V ELF psABI document.
663    /// <https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#global-dynamic>
664    RiscvTlsGdHi20,
665
666    /// Low 12 bits of a 32-bit PC-relative relocation (I-Type instruction)
667    ///
668    /// This is the `R_RISCV_PCREL_LO12_I` relocation from the RISC-V ELF psABI document.
669    /// <https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#pc-relative-symbol-addresses>
670    RiscvPCRelLo12I,
671
672    /// High 20 bits of a 32-bit PC-relative GOT offset relocation
673    ///
674    /// This is the `R_RISCV_GOT_HI20` relocation from the RISC-V ELF psABI document.
675    /// <https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#pc-relative-symbol-addresses>
676    RiscvGotHi20,
677}
678
679#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)]
680pub enum LabelUse {
681    X86JmpRel32,
682    /// 20-bit branch offset (unconditional branches). PC-rel, offset is
683    /// imm << 1. Immediate is 20 signed bits. Use in Jal instructions.
684    RVJal20,
685    /// The unconditional jump instructions all use PC-relative
686    /// addressing to help support position independent code. The JALR
687    /// instruction was defined to enable a two-instruction sequence to
688    /// jump anywhere in a 32-bit absolute address range. A LUI
689    /// instruction can first load rs1 with the upper 20 bits of a
690    /// target address, then JALR can add in the lower bits. Similarly,
691    /// AUIPC then JALR can jump anywhere in a 32-bit pc-relative
692    /// address range.
693    RVPCRel32,
694
695    /// All branch instructions use the B-type instruction format. The
696    /// 12-bit B-immediate encodes signed offsets in multiples of 2, and
697    /// is added to the current pc to give the target address. The
698    /// conditional branch range is ±4 KiB.
699    RVB12,
700
701    /// Equivalent to the `R_RISCV_PCREL_HI20` relocation, Allows setting
702    /// the immediate field of an `auipc` instruction.
703    RVPCRelHi20,
704
705    /// Similar to the `R_RISCV_PCREL_LO12_I` relocation but pointing to
706    /// the final address, instead of the `PCREL_HI20` label. Allows setting
707    /// the immediate field of I Type instructions such as `addi` or `lw`.
708    ///
709    /// Since we currently don't support offsets in labels, this relocation has
710    /// an implicit offset of 4.
711    RVPCRelLo12I,
712
713    /// 11-bit PC-relative jump offset. Equivalent to the `RVC_JUMP` relocation
714    RVCJump,
715    /// 9-bit PC-relative branch offset.
716    RVCB9,
717    /// 14-bit branch offset (conditional branches). PC-rel, offset is imm <<
718    /// 2. Immediate is 14 signed bits, in bits 18:5. Used by tbz and tbnz.
719    A64Branch14,
720    /// 19-bit branch offset (conditional branches). PC-rel, offset is imm << 2. Immediate is 19
721    /// signed bits, in bits 23:5. Used by cbz, cbnz, b.cond.
722    A64Branch19,
723    /// 26-bit branch offset (unconditional branches). PC-rel, offset is imm << 2. Immediate is 26
724    /// signed bits, in bits 25:0. Used by b, bl.
725    A64Branch26,
726    A64Ldr19,
727    A64Adr21,
728}
729
730impl LabelUse {
731    /// Maximum PC-relative range (positive), inclusive.
732    pub const fn max_pos_range(self) -> CodeOffset {
733        match self {
734            LabelUse::RVJal20 => ((1 << 19) - 1) * 2,
735            LabelUse::RVPCRelLo12I | LabelUse::RVPCRelHi20 | LabelUse::RVPCRel32 => {
736                let imm20_max: i64 = ((1 << 19) - 1) << 12;
737                let imm12_max = (1 << 11) - 1;
738                (imm20_max + imm12_max) as _
739            }
740            LabelUse::RVB12 => ((1 << 11) - 1) * 2,
741            LabelUse::RVCB9 => ((1 << 8) - 1) * 2,
742            LabelUse::RVCJump => ((1 << 10) - 1) * 2,
743            LabelUse::X86JmpRel32 => i32::MAX as _,
744            _ => u32::MAX,
745        }
746    }
747
748    pub const fn max_neg_range(self) -> CodeOffset {
749        match self {
750            LabelUse::RVPCRel32 => {
751                let imm20_max: i64 = (1 << 19) << 12;
752                let imm12_max = 1 << 11;
753                (-imm20_max - imm12_max) as CodeOffset
754            }
755            _ => self.max_pos_range() + 2,
756        }
757    }
758
759    pub const fn patch_size(&self) -> usize {
760        match self {
761            Self::X86JmpRel32 => 4,
762            Self::RVCJump | Self::RVCB9 => 2,
763            Self::RVJal20 | Self::RVB12 | Self::RVPCRelHi20 | Self::RVPCRelLo12I => 4,
764            Self::RVPCRel32 => 8,
765            _ => 4,
766        }
767    }
768
769    pub const fn align(&self) -> usize {
770        match self {
771            Self::X86JmpRel32 => 1,
772            Self::RVCJump => 4,
773            Self::RVJal20 | Self::RVB12 | Self::RVCB9 | Self::RVPCRelHi20 | Self::RVPCRelLo12I => 4,
774            Self::RVPCRel32 => 4,
775            _ => 4,
776        }
777    }
778
779    pub const fn supports_veneer(&self) -> bool {
780        match self {
781            Self::RVB12 | Self::RVJal20 | Self::RVCJump => true,
782            _ => false,
783        }
784    }
785
786    pub const fn veneer_size(&self) -> usize {
787        match self {
788            Self::RVB12 | Self::RVJal20 | Self::RVCJump => 8,
789            _ => unreachable!(),
790        }
791    }
792
793    pub fn generate_veneer(
794        &self,
795        buffer: &mut [u8],
796        veneer_offset: CodeOffset,
797    ) -> (CodeOffset, Self) {
798        if matches!(
799            self,
800            Self::RVB12
801                | Self::RVCJump
802                | Self::RVJal20
803                | Self::RVPCRelHi20
804                | Self::RVPCRelLo12I
805                | Self::RVPCRel32
806        ) {
807            let base = riscv::X31;
808
809            {
810                let x = riscv::Inst::new(riscv::Opcode::AUIPC)
811                    .encode()
812                    .set_rd(base.id())
813                    .value
814                    .to_le_bytes();
815                buffer[0] = x[0];
816                buffer[1] = x[1];
817                buffer[2] = x[2];
818                buffer[3] = x[3];
819            }
820
821            {
822                let x = riscv::Inst::new(riscv::Opcode::JALR)
823                    .encode()
824                    .set_rd(riscv::ZERO.id())
825                    .set_rs1(base.id())
826                    .value
827                    .to_le_bytes();
828                buffer[4] = x[0];
829                buffer[5] = x[1];
830                buffer[6] = x[2];
831                buffer[7] = x[3];
832            }
833
834            (veneer_offset, LabelUse::RVPCRel32)
835        } else {
836            todo!()
837        }
838    }
839    pub fn patch(&self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
840        let pc_reli = (label_offset as i64) - (use_offset as i64);
841
842        let pc_rel = pc_reli as u32;
843
844        match self {
845            Self::X86JmpRel32 => {
846                let addend = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
847
848                let value = pc_rel.wrapping_add(addend).wrapping_sub(4);
849
850                buffer.copy_from_slice(&value.to_le_bytes());
851            }
852
853            Self::RVJal20 => {
854                let insn = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
855                let offset = pc_rel as u32;
856                let v = ((offset >> 12 & 0b1111_1111) << 12)
857                    | ((offset >> 11 & 0b1) << 20)
858                    | ((offset >> 1 & 0b11_1111_1111) << 21)
859                    | ((offset >> 20 & 0b1) << 31);
860                buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn | v));
861            }
862
863            Self::RVPCRel32 => {
864                let (imm20, imm12) = generate_imm(pc_rel as u64);
865                let insn = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
866                let insn2 = u32::from_le_bytes([buffer[4], buffer[5], buffer[6], buffer[7]]);
867
868                let auipc = riscv::Inst::new(riscv::Opcode::AUIPC).encode().set_imm20(0);
869                let jalr = riscv::Inst::new(riscv::Opcode::JALR)
870                    .encode()
871                    .set_rd(0)
872                    .set_rs1(0)
873                    .set_imm12(0);
874
875                buffer[0..4].copy_from_slice(&(insn | auipc.value | imm20).to_le_bytes());
876                buffer[4..8].copy_from_slice(&(insn2 | jalr.value | imm12).to_le_bytes());
877            }
878
879            Self::RVB12 => {
880                let insn = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
881                let offset = pc_rel as u32;
882                let v = ((offset >> 11 & 0b1) << 7)
883                    | ((offset >> 1 & 0b1111) << 8)
884                    | ((offset >> 5 & 0b11_1111) << 25)
885                    | ((offset >> 12 & 0b1) << 31);
886                buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn | v));
887            }
888
889            Self::RVPCRelHi20 => {
890                // See https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#pc-relative-symbol-addresses
891                //
892                // We need to add 0x800 to ensure that we land at the next page as soon as it goes out of range for the
893                // Lo12 relocation. That relocation is signed and has a maximum range of -2048..2047. So when we get an
894                // offset of 2048, we need to land at the next page and subtract instead.
895                let offset = pc_reli as u32;
896                let insn = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
897                let hi20 = offset.wrapping_add(0x800) >> 12;
898                let insn = (insn & 0xfff) | (hi20 << 12);
899                buffer[0..4].copy_from_slice(&insn.to_le_bytes());
900            }
901
902            Self::RVPCRelLo12I => {
903                // `offset` is the offset from the current instruction to the target address.
904                //
905                // However we are trying to compute the offset to the target address from the previous instruction.
906                // The previous instruction should be the one that contains the PCRelHi20 relocation and
907                // stores/references the program counter (`auipc` usually).
908                //
909                // Since we are trying to compute the offset from the previous instruction, we can
910                // represent it as offset = target_address - (current_instruction_address - 4)
911                // which is equivalent to offset = target_address - current_instruction_address + 4.
912                //
913                let insn = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
914
915                let lo12 = (pc_reli + 4) as u32 & 0xfff;
916                let insn = (insn & 0xFFFFF) | (lo12 << 20);
917                buffer[0..4].copy_from_slice(&insn.to_le_bytes());
918            }
919
920            Self::RVCJump => {
921                debug_assert!(pc_rel & 1 == 0);
922
923                let insn = riscv::Inst::new(riscv::Opcode::CJ)
924                    .encode()
925                    .set_c_imm12(pc_rel as _);
926                buffer[0..2].clone_from_slice(&(insn.value as u16).to_le_bytes());
927            }
928            _ => todo!(),
929        }
930    }
931}
932
933pub const fn is_imm12(val: i64) -> bool {
934    val >= -2048 && val <= 2047
935}
936
937pub(crate) fn generate_imm(value: u64) -> (u32, u32) {
938    if is_imm12(value as _) {
939        return (
940            0,
941            riscv::InstructionValue::new(0)
942                .set_imm12(value as i64 as i32)
943                .value,
944        );
945    }
946
947    let value = value as i64;
948
949    let mod_num = 4096i64;
950    let (imm20, imm12) = if value > 0 {
951        let mut imm20 = value / mod_num;
952        let mut imm12 = value % mod_num;
953
954        if imm12 >= 2048 {
955            imm12 -= mod_num;
956            imm20 += 1;
957        }
958
959        (imm20, imm12)
960    } else {
961        let value_abs = value.abs();
962        let imm20 = value_abs / mod_num;
963        let imm12 = value_abs % mod_num;
964        let mut imm20 = -imm20;
965        let mut imm12 = -imm12;
966        if imm12 < -2048 {
967            imm12 += mod_num;
968            imm20 -= 1;
969        }
970        (imm20, imm12)
971    };
972    (
973        riscv::InstructionValue::new(0).set_imm20(imm20 as _).value,
974        riscv::InstructionValue::new(0).set_imm12(imm12 as _).value,
975    )
976}
977
978/// A generic implementation of relocation resolving.
979///
980/// # NOTE
981///
982/// Very simple and incomplete. At the moment only Abs4, Abs8, X86 and RISC-V GOT relocations are supported.
983pub fn perform_relocations(
984    code: *mut u8,
985    code_rx: *const u8,
986    relocs: &[AsmReloc],
987    get_address: impl Fn(&RelocTarget) -> *const u8,
988    get_got_entry: impl Fn(&RelocTarget) -> *const u8,
989    get_plt_entry: impl Fn(&RelocTarget) -> *const u8,
990) {
991    use core::ptr::write_unaligned;
992
993    for &AsmReloc {
994        addend,
995        kind,
996        offset,
997        ref target,
998    } in relocs
999    {
1000        let at = unsafe { code.offset(isize::try_from(offset).unwrap()) };
1001        let atrx = unsafe { code_rx.offset(isize::try_from(offset).unwrap()) };
1002        match kind {
1003            Reloc::Abs4 => {
1004                let base = get_address(target);
1005                let what = unsafe { base.offset(isize::try_from(addend).unwrap()) };
1006                unsafe {
1007                    write_unaligned(at as *mut u32, u32::try_from(what as usize).unwrap());
1008                }
1009            }
1010
1011            Reloc::Abs8 => {
1012                let base = get_address(target);
1013                let what = unsafe { base.offset(isize::try_from(addend).unwrap()) };
1014                unsafe {
1015                    write_unaligned(at as *mut u64, u64::try_from(what as usize).unwrap());
1016                }
1017            }
1018
1019            Reloc::X86PCRel4 | Reloc::X86CallPCRel4 => {
1020                let base = get_address(target);
1021                let what = unsafe { base.offset(isize::try_from(addend).unwrap()) };
1022                let pcrel = i32::try_from((what as isize) - (atrx as isize)).unwrap();
1023
1024                unsafe {
1025                    write_unaligned(at as *mut i32, pcrel);
1026                }
1027            }
1028
1029            Reloc::X86GOTPCRel4 => {
1030                let base = get_got_entry(target);
1031                let what = unsafe { base.offset(isize::try_from(addend).unwrap()) };
1032                let pcrel = i32::try_from((what as isize) - (atrx as isize)).unwrap();
1033
1034                unsafe {
1035                    write_unaligned(at as *mut i32, pcrel);
1036                }
1037            }
1038
1039            Reloc::X86CallPLTRel4 => {
1040                let base = get_plt_entry(target);
1041                let what = unsafe { base.offset(isize::try_from(addend).unwrap()) };
1042                let pcrel = i32::try_from((what as isize) - (atrx as isize)).unwrap();
1043                unsafe { write_unaligned(at as *mut i32, pcrel) };
1044            }
1045
1046            Reloc::RiscvGotHi20 => {
1047                let base = get_got_entry(target);
1048                let what = unsafe { base.offset(isize::try_from(addend).unwrap()) };
1049                let pc_rel = i32::try_from((what as isize) - (atrx as isize)).unwrap();
1050                unsafe {
1051                    let buffer = core::slice::from_raw_parts_mut(at as *mut u8, 4);
1052                    let insn = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1053                    let hi20 = (pc_rel as u32).wrapping_add(0x800) >> 12;
1054                    let insn = (insn & 0xfff) | (hi20 << 12);
1055                    buffer.copy_from_slice(&insn.to_le_bytes());
1056                }
1057            }
1058
1059            Reloc::RiscvPCRelLo12I => {
1060                let base = get_got_entry(target);
1061                let what = unsafe { base.offset(isize::try_from(addend).unwrap()) };
1062                let pc_rel = i32::try_from((what as isize) - (atrx as isize)).unwrap();
1063
1064                unsafe {
1065                    let buffer = core::slice::from_raw_parts_mut(at as *mut u8, 4);
1066                    let insn = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1067                    let lo12 = (pc_rel + 4) as u32 & 0xfff;
1068                    let insn = (insn & 0xFFFFF) | (lo12 << 20);
1069                    buffer.copy_from_slice(&insn.to_le_bytes());
1070                }
1071            }
1072
1073            _ => todo!(),
1074        }
1075    }
1076}