Skip to main content

asmkit/core/
buffer.rs

1use alloc::{borrow::Cow, collections::BinaryHeap, vec::Vec};
2
3use smallvec::SmallVec;
4
5use crate::AsmError;
6use crate::core::patch::{
7    PatchBlock, PatchBlockId, PatchCatalog, PatchSite, PatchSiteId, fill_with_nops,
8    minimum_patch_alignment,
9};
10#[cfg(feature = "riscv")]
11use crate::riscv;
12
13#[cfg(feature = "jit")]
14use crate::core::jit_allocator::{JitAllocator, Span};
15
16use super::{
17    operand::{Label, Sym},
18    target::Environment,
19};
20
21/// A buffer of output to be produced, fixed up, and then emitted to a CodeSink
22/// in bulk.
23///
24/// This struct uses `SmallVec`s to support small-ish function bodies without
25/// any heap allocation. As such, it will be several kilobytes large. This is
26/// likely fine as long as it is stack-allocated for function emission then
27/// thrown away; but beware if many buffer objects are retained persistently.
28#[derive(Default)]
29pub struct CodeBuffer {
30    env: Environment,
31    data: SmallVec<[u8; 1024]>,
32    relocs: SmallVec<[AsmReloc; 16]>,
33    symbols: SmallVec<[SymData; 16]>,
34    label_offsets: SmallVec<[CodeOffset; 16]>,
35    pending_fixup_records: SmallVec<[AsmFixup; 16]>,
36    pending_fixup_deadline: u32,
37    pending_constants: SmallVec<[Constant; 16]>,
38    pending_constants_size: CodeOffset,
39    used_constants: SmallVec<[(Constant, CodeOffset); 4]>,
40    constants: SmallVec<[(ConstantData, AsmConstant); 4]>,
41    fixup_records: BinaryHeap<AsmFixup>,
42    patch_blocks: SmallVec<[PendingPatchBlock; 4]>,
43    patch_sites: SmallVec<[PendingPatchSite; 8]>,
44}
45
46#[derive(Clone, Copy)]
47struct PendingPatchBlock {
48    offset: CodeOffset,
49    size: CodeOffset,
50    align: CodeOffset,
51}
52
53#[derive(Clone, Copy)]
54enum PendingPatchTarget {
55    Offset(CodeOffset),
56    Label(Label),
57}
58
59#[derive(Clone, Copy)]
60struct PendingPatchSite {
61    offset: CodeOffset,
62    kind: LabelUse,
63    target: PendingPatchTarget,
64    addend: i64,
65}
66
67#[derive(Clone, PartialEq, Eq, Hash)]
68pub enum ExternalName {
69    Symbol(Cow<'static, str>),
70    UserName(u32),
71}
72#[derive(Clone, PartialEq, Eq, Hash, Debug)]
73pub enum RelocTarget {
74    Sym(Sym),
75    Label(Label),
76}
77
78#[derive(Copy, Clone, PartialEq, Eq)]
79pub enum RelocDistance {
80    Near,
81    Far,
82}
83
84#[derive(Clone, PartialEq, Eq)]
85pub(crate) struct SymData {
86    name: ExternalName,
87    distance: RelocDistance,
88}
89
90/// A relocation resulting from emitting assembly.
91#[derive(Clone, PartialEq, Eq, Hash, Debug)]
92pub struct AsmReloc {
93    pub offset: CodeOffset,
94    pub kind: Reloc,
95    pub addend: i64,
96    pub target: RelocTarget,
97}
98/// A fixup to perform on the buffer once code is emitted.
99/// Fixups always refer to labels and patch the code based on label offsets.
100/// Hence, they are like relocations, but internal to one buffer.
101#[derive(Clone, Copy, PartialEq, PartialOrd, Ord, Eq)]
102pub struct AsmFixup {
103    pub label: Label,
104    pub offset: CodeOffset,
105    pub kind: LabelUse,
106}
107
108impl AsmFixup {
109    fn deadline(&self) -> CodeOffset {
110        self.offset.saturating_sub(self.kind.max_pos_range())
111    }
112}
113
114/// Metadata about a constant.
115#[derive(Clone, Copy)]
116struct AsmConstant {
117    /// A label which has not yet been bound which can be used for this
118    /// constant.
119    ///
120    /// This is lazily created when a label is requested for a constant and is
121    /// cleared when a constant is emitted.
122    upcoming_label: Option<Label>,
123    /// Required alignment.
124    align: CodeOffset,
125    /// The byte size of this constant.
126    size: usize,
127}
128
129/// A `CodeBuffer` once emission is completed: holds generated code and records,
130/// without fixups. This allows the type to be independent of the backend.
131pub struct CodeBufferFinalized {
132    pub(crate) data: SmallVec<[u8; 1024]>,
133    pub(crate) relocs: SmallVec<[AsmReloc; 16]>,
134    pub(crate) symbols: SmallVec<[SymData; 16]>,
135    pub(crate) alignment: u32,
136    pub(crate) patch_catalog: PatchCatalog,
137}
138
139/// Executable memory loaded from a finalized code buffer with relocations applied.
140#[cfg(feature = "jit")]
141pub struct LoadedRelocatedCode {
142    span: Span,
143    code_size: usize,
144    got_targets: Vec<RelocTarget>,
145}
146
147#[cfg(feature = "jit")]
148impl LoadedRelocatedCode {
149    pub const fn rx(&self) -> *const u8 {
150        self.span.rx()
151    }
152
153    pub const fn rw(&self) -> *mut u8 {
154        self.span.rw()
155    }
156
157    pub const fn span(&self) -> &Span {
158        &self.span
159    }
160
161    pub const fn code_size(&self) -> usize {
162        self.code_size
163    }
164
165    pub fn got_targets(&self) -> &[RelocTarget] {
166        &self.got_targets
167    }
168
169    pub fn got_size(&self) -> usize {
170        self.got_targets.len() * core::mem::size_of::<usize>()
171    }
172
173    pub fn got_rx(&self) -> *const u8 {
174        self.rx().wrapping_add(self.code_size)
175    }
176
177    pub fn got_rw(&self) -> *mut u8 {
178        self.rw().wrapping_add(self.code_size)
179    }
180}
181
182pub fn reloc_uses_got(kind: Reloc) -> bool {
183    matches!(
184        kind,
185        Reloc::X86GOTPCRel4
186            | Reloc::RiscvGotHi20
187            | Reloc::RiscvPCRelLo12I
188            | Reloc::Aarch64AdrGotPage21
189            | Reloc::Aarch64Ld64GotLo12Nc
190    )
191}
192
193pub fn got_slot_index(got_targets: &[RelocTarget], target: &RelocTarget) -> usize {
194    got_targets
195        .iter()
196        .position(|item| item == target)
197        .expect("missing GOT target for relocation")
198}
199
200impl CodeBufferFinalized {
201    pub fn total_size(&self) -> usize {
202        self.data.len()
203    }
204
205    pub fn data(&self) -> &[u8] {
206        &self.data[..]
207    }
208
209    pub fn data_mut(&mut self) -> &mut [u8] {
210        &mut self.data[..]
211    }
212
213    pub fn symbol_name(&self, sym: Sym) -> &ExternalName {
214        &self.symbols[sym.id() as usize].name
215    }
216
217    pub fn symbol_distance(&self, sym: Sym) -> RelocDistance {
218        self.symbols[sym.id() as usize].distance
219    }
220
221    pub fn relocs(&self) -> &[AsmReloc] {
222        &self.relocs[..]
223    }
224
225    pub fn alignment(&self) -> u32 {
226        self.alignment
227    }
228
229    /// Allocate this code buffer in executable memory and return a `Span` referring to it.
230    /// This will also write the code into the allocated memory. To execute
231    /// code you can simply use [`span.rx()`](Span::rx) to get a pointer to read+exec memory
232    /// and transmute that to a function pointer of the appropriate type.
233    #[cfg(feature = "jit")]
234    pub fn allocate(&self, jit_allocator: &mut JitAllocator) -> Result<Span, AsmError> {
235        let mut span = jit_allocator.alloc(self.data().len())?;
236
237        unsafe {
238            jit_allocator.write(&mut span, |span| {
239                span.rw()
240                    .copy_from_nonoverlapping(self.data().as_ptr(), self.data().len());
241            })?;
242        }
243
244        Ok(span)
245    }
246
247    /// Allocate executable memory and apply relocations, including GOT setup in JIT mode.
248    ///
249    /// GOT entries are created automatically for relocations that require them and populated
250    /// with values returned by `get_address`.
251    #[cfg(feature = "jit")]
252    pub fn allocate_relocated(
253        &self,
254        jit_allocator: &mut JitAllocator,
255        get_address: impl Fn(&RelocTarget) -> *const u8,
256        get_plt_entry: impl Fn(&RelocTarget) -> *const u8,
257    ) -> Result<LoadedRelocatedCode, AsmError> {
258        let mut got_targets = Vec::new();
259
260        for reloc in &self.relocs {
261            if reloc_uses_got(reloc.kind) && !got_targets.iter().any(|item| item == &reloc.target) {
262                got_targets.push(reloc.target.clone());
263            }
264        }
265
266        let got_size = got_targets.len() * core::mem::size_of::<usize>();
267        let total_size = self.data().len() + got_size;
268        let mut span = jit_allocator.alloc(total_size)?;
269
270        unsafe {
271            jit_allocator.write(&mut span, |span| {
272                span.rw()
273                    .copy_from_nonoverlapping(self.data().as_ptr(), self.data().len());
274
275                let got_rw = span.rw().wrapping_add(self.data().len()) as *mut usize;
276                for (index, target) in got_targets.iter().enumerate() {
277                    let addr = get_address(target);
278                    got_rw.wrapping_add(index).write_unaligned(addr.addr());
279                }
280
281                let got_rx = span.rx().wrapping_add(self.data().len());
282                perform_relocations(
283                    span.rw(),
284                    span.rx(),
285                    &self.relocs,
286                    &get_address,
287                    |target| {
288                        got_rx.wrapping_add(
289                            got_slot_index(&got_targets, target) * core::mem::size_of::<usize>(),
290                        )
291                    },
292                    &get_plt_entry,
293                );
294            })?;
295        }
296
297        Ok(LoadedRelocatedCode {
298            span,
299            code_size: self.data().len(),
300            got_targets,
301        })
302    }
303}
304
305impl CodeBuffer {
306    pub fn new() -> Self {
307        Self::default()
308    }
309
310    pub fn with_env(env: Environment) -> Self {
311        Self {
312            env,
313            ..Default::default()
314        }
315    }
316
317    pub fn clear(&mut self) {
318        self.data.clear();
319        self.relocs.clear();
320        self.label_offsets.clear();
321        self.pending_fixup_records.clear();
322        self.constants.clear();
323        self.fixup_records.clear();
324        self.symbols.clear();
325        self.used_constants.clear();
326        self.pending_fixup_deadline = 0;
327        self.pending_constants_size = 0;
328        self.pending_constants.clear();
329        self.patch_blocks.clear();
330        self.patch_sites.clear();
331    }
332    pub fn env(&self) -> &Environment {
333        &self.env
334    }
335
336    pub fn env_mut(&mut self) -> &mut Environment {
337        &mut self.env
338    }
339
340    pub fn data(&self) -> &[u8] {
341        &self.data
342    }
343
344    pub fn data_mut(&mut self) -> &mut [u8] {
345        &mut self.data
346    }
347
348    pub fn relocs(&self) -> &[AsmReloc] {
349        &self.relocs
350    }
351
352    pub fn put1(&mut self, value: u8) {
353        self.data.push(value);
354    }
355
356    pub fn put2(&mut self, value: u16) {
357        self.data.extend_from_slice(&value.to_ne_bytes());
358    }
359
360    pub fn put4(&mut self, value: u32) {
361        self.data.extend_from_slice(&value.to_ne_bytes());
362    }
363
364    pub fn put8(&mut self, value: u64) {
365        self.data.extend_from_slice(&value.to_ne_bytes());
366    }
367
368    pub fn write_u8(&mut self, value: u8) {
369        self.data.push(value);
370    }
371
372    pub fn write_u16(&mut self, value: u16) {
373        self.data.extend_from_slice(&value.to_ne_bytes());
374    }
375
376    pub fn write_u32(&mut self, value: u32) {
377        self.data.extend_from_slice(&value.to_ne_bytes());
378    }
379
380    pub fn write_u64(&mut self, value: u64) {
381        self.data.extend_from_slice(&value.to_ne_bytes());
382    }
383
384    pub fn add_symbol(&mut self, name: impl Into<ExternalName>, distance: RelocDistance) -> Sym {
385        let ix = self.symbols.len();
386        self.symbols.push(SymData {
387            distance,
388            name: name.into(),
389        });
390
391        Sym::from_id(ix as u32)
392    }
393
394    pub fn symbol_distance(&self, sym: Sym) -> RelocDistance {
395        self.symbols[sym.id() as usize].distance
396    }
397
398    pub fn symbol_name(&self, sym: Sym) -> &ExternalName {
399        &self.symbols[sym.id() as usize].name
400    }
401
402    pub fn get_label(&mut self) -> Label {
403        let l = self.label_offsets.len();
404        self.label_offsets.push(u32::MAX);
405        Label::from_id(l as _)
406    }
407
408    pub fn is_bound(&mut self, label: Label) -> bool {
409        self.label_offsets[label.id() as usize] != u32::MAX
410    }
411
412    pub fn get_label_for_constant(&mut self, constant: Constant) -> Label {
413        let (
414            _,
415            AsmConstant {
416                upcoming_label,
417                align: _,
418                size,
419            },
420        ) = self.constants[constant.0 as usize];
421        if let Some(label) = upcoming_label {
422            return label;
423        }
424
425        let label = self.get_label();
426        self.pending_constants.push(constant);
427        self.pending_constants_size += size as u32;
428        self.constants[constant.0 as usize].1.upcoming_label = Some(label);
429        label
430    }
431
432    pub fn add_constant(&mut self, constant: impl Into<ConstantData>) -> Constant {
433        let c = self.constants.len() as u32;
434        let data = constant.into();
435        let x = AsmConstant {
436            upcoming_label: None,
437            align: data.alignment() as _,
438            size: data.as_slice().len(),
439        };
440        self.constants.push((data, x));
441        Constant(c)
442    }
443
444    pub fn use_label_at_offset(&mut self, offset: CodeOffset, label: Label, kind: LabelUse) {
445        let fixup = AsmFixup {
446            kind,
447            label,
448            offset,
449        };
450
451        self.pending_fixup_records.push(fixup);
452        self.pending_fixup_deadline = self.pending_fixup_deadline.min(fixup.deadline());
453    }
454
455    /// Align up to the given alignment.
456    pub fn align_to(&mut self, align_to: CodeOffset) {
457        assert!(
458            align_to.is_power_of_two(),
459            "{align_to} is not a power of two"
460        );
461        while self.cur_offset() & (align_to - 1) != 0 {
462            self.write_u8(0);
463        }
464
465        // Post-invariant: as for `put1()`.
466    }
467
468    pub fn cur_offset(&self) -> CodeOffset {
469        self.data.len() as _
470    }
471
472    pub fn bind_label(&mut self, label: Label) {
473        self.label_offsets[label.id() as usize] = self.cur_offset();
474    }
475
476    pub fn label_offset(&self, label: Label) -> u32 {
477        self.label_offsets[label.id() as usize]
478    }
479
480    pub fn add_reloc(&mut self, kind: Reloc, target: RelocTarget, addend: i64) {
481        let offset = self.cur_offset();
482        self.add_reloc_at_offset(offset, kind, target, addend);
483    }
484
485    pub fn add_reloc_at_offset(
486        &mut self,
487        offset: CodeOffset,
488        kind: Reloc,
489        target: RelocTarget,
490        addend: i64,
491    ) {
492        self.relocs.push(AsmReloc {
493            addend,
494            kind,
495            offset,
496            target,
497        })
498    }
499
500    pub fn reserve_patch_block(
501        &mut self,
502        size: CodeOffset,
503        align: CodeOffset,
504    ) -> Result<PatchBlockId, AsmError> {
505        let min_align = minimum_patch_alignment(self.env.arch());
506        let align = align.max(min_align);
507        if size == 0 || !align.is_power_of_two() {
508            return Err(AsmError::InvalidArgument);
509        }
510
511        self.align_to(align);
512        let arch = self.env.arch();
513        let offset = self.cur_offset();
514        let block = self.get_appended_space(size as usize);
515        fill_with_nops(arch, block)?;
516
517        let id = PatchBlockId::from_index(self.patch_blocks.len());
518        self.patch_blocks.push(PendingPatchBlock {
519            offset,
520            size,
521            align,
522        });
523        Ok(id)
524    }
525
526    pub fn record_patch_block(
527        &mut self,
528        offset: CodeOffset,
529        size: CodeOffset,
530        align: CodeOffset,
531    ) -> PatchBlockId {
532        if size == 0 || align == 0 || !align.is_power_of_two() {
533            unreachable!("invalid patch block with size {size} and align {align}");
534        }
535
536        let end = offset as usize + size as usize;
537        if end > self.data.len() {
538            unreachable!(
539                "patch block at offset {offset} with size {size} exceeds code buffer size {}",
540                self.data.len()
541            );
542        }
543
544        let id = PatchBlockId::from_index(self.patch_blocks.len());
545        self.patch_blocks.push(PendingPatchBlock {
546            offset,
547            size,
548            align,
549        });
550        id
551    }
552
553    pub fn record_patch_site(
554        &mut self,
555        offset: CodeOffset,
556        kind: LabelUse,
557        target_offset: CodeOffset,
558    ) -> PatchSiteId {
559        self.validate_patch_site_offset(offset, kind);
560        let id = PatchSiteId::from_index(self.patch_sites.len());
561        self.patch_sites.push(PendingPatchSite {
562            offset,
563            kind,
564            target: PendingPatchTarget::Offset(target_offset),
565            addend: 0,
566        });
567        id
568    }
569
570    pub fn record_label_patch_site(
571        &mut self,
572        offset: CodeOffset,
573        label: Label,
574        kind: LabelUse,
575    ) -> PatchSiteId {
576        self.validate_patch_site_offset(offset, kind);
577        let id = PatchSiteId::from_index(self.patch_sites.len());
578        self.patch_sites.push(PendingPatchSite {
579            offset,
580            kind,
581            target: PendingPatchTarget::Label(label),
582            addend: 0,
583        });
584        id
585    }
586
587    fn validate_patch_site_offset(&self, offset: CodeOffset, kind: LabelUse) {
588        let end = offset as usize + kind.patch_size();
589        if end > self.data.len() {
590            unreachable!(
591                "patch site at offset {offset} with size {} exceeds code buffer size {}",
592                kind.patch_size(),
593                self.data.len()
594            );
595        }
596    }
597
598    fn handle_fixup(&mut self, fixup: AsmFixup) {
599        let AsmFixup {
600            kind,
601            label,
602            offset,
603        } = fixup;
604        let start = offset;
605        let end = offset as usize + kind.patch_size();
606
607        let label_offset = self.label_offsets[label.id() as usize];
608        if label_offset != u32::MAX {
609            let veneer_required = if label_offset >= offset {
610                false
611            } else {
612                (offset - label_offset) > kind.max_neg_range()
613            };
614
615            if veneer_required {
616                self.emit_veneer(label, offset, kind);
617            } else {
618                let slice = &mut self.data[start as usize..end];
619
620                kind.patch(slice, start, label_offset);
621            }
622        } else {
623            // If the offset of this label is not known at this time then
624            // that means that a veneer is required because after this
625            // island the target can't be in range of the original target.
626            self.emit_veneer(label, offset, kind);
627        }
628    }
629
630    /// Emits a "veneer" the `kind` code at `offset` to jump to `label`.
631    ///
632    /// This will generate extra machine code, using `kind`, to get a
633    /// larger-jump-kind than `kind` allows. The code at `offset` is then
634    /// patched to jump to our new code, and then the new code is enqueued for
635    /// a fixup to get processed at some later time.
636    pub fn emit_veneer(&mut self, label: Label, offset: CodeOffset, kind: LabelUse) {
637        // If this `kind` doesn't support a veneer then that's a bug in the
638        // backend because we need to implement support for such a veneer.
639        assert!(
640            kind.supports_veneer(),
641            "jump beyond the range of {kind:?} but a veneer isn't supported",
642        );
643
644        self.align_to(kind.align() as _);
645        let veneer_offset = self.cur_offset();
646        let start = offset as usize;
647        let end = (offset + kind.patch_size() as u32) as usize;
648        let slice = &mut self.data[start..end];
649
650        kind.patch(slice, offset, veneer_offset);
651        let veneer_slice = self.get_appended_space(kind.veneer_size());
652        let (veneer_fixup_off, veneer_label_use) =
653            kind.generate_veneer(veneer_slice, veneer_offset);
654
655        // Register a new use of `label` with our new veneer fixup and
656        // offset. This'll recalculate deadlines accordingly and
657        // enqueue this fixup to get processed at some later
658        // time.
659        self.use_label_at_offset(veneer_fixup_off, label, veneer_label_use);
660    }
661
662    /// Reserve appended space and return a mutable slice referring to it.
663    pub fn get_appended_space(&mut self, len: usize) -> &mut [u8] {
664        let off = self.data.len();
665        let new_len = self.data.len() + len;
666        self.data.resize(new_len, 0);
667        &mut self.data[off..]
668
669        // Post-invariant: as for `put1()`.
670    }
671
672    /// Returns the maximal offset that islands can reach if `distance` more
673    /// bytes are appended.
674    ///
675    /// This is used to determine if veneers need insertions since jumps that
676    /// can't reach past this point must get a veneer of some form.
677    fn worst_case_end_of_island(&self, distance: CodeOffset) -> CodeOffset {
678        // Assume that all fixups will require veneers and that the veneers are
679        // the worst-case size for each platform. This is an over-generalization
680        // to avoid iterating over the `fixup_records` list or maintaining
681        // information about it as we go along.
682        let island_worst_case_size =
683            ((self.fixup_records.len() + self.pending_fixup_records.len()) as u32) * 20
684                + self.pending_constants_size;
685        self.cur_offset()
686            .saturating_add(distance)
687            .saturating_add(island_worst_case_size)
688    }
689
690    fn should_apply_fixup(&self, fixup: &AsmFixup, forced_threshold: CodeOffset) -> bool {
691        let label_offset = self.label_offset(fixup.label);
692        label_offset != u32::MAX
693            || fixup.offset.saturating_add(fixup.kind.max_pos_range()) < forced_threshold
694    }
695    /// Is an island needed within the next N bytes?
696    pub fn island_needed(&mut self, distance: CodeOffset) -> bool {
697        let deadline = match self.fixup_records.peek() {
698            Some(fixup) => fixup
699                .offset
700                .saturating_add(fixup.kind.max_pos_range())
701                .min(self.pending_fixup_deadline),
702            None => self.pending_fixup_deadline,
703        };
704
705        deadline < u32::MAX && self.worst_case_end_of_island(distance) > deadline
706    }
707
708    /// Emit all pending constants and required pending veneers.
709    pub fn emit_island(&mut self, distance: CodeOffset) {
710        let forced_threshold = self.worst_case_end_of_island(distance);
711
712        for constant in core::mem::take(&mut self.pending_constants) {
713            let (_, AsmConstant { align, size, .. }) = self.constants[constant.0 as usize];
714            let label = self.constants[constant.0 as usize]
715                .1
716                .upcoming_label
717                .take()
718                .unwrap();
719            self.align_to(align as _);
720            self.bind_label(label);
721            self.used_constants.push((constant, self.cur_offset()));
722            self.get_appended_space(size);
723        }
724        // Either handle all pending fixups because they're ready or move them
725        // onto the `BinaryHeap` tracking all pending fixups if they aren't
726        // ready.
727        for fixup in core::mem::take(&mut self.pending_fixup_records) {
728            if self.should_apply_fixup(&fixup, forced_threshold) {
729                self.handle_fixup(fixup);
730            } else {
731                self.fixup_records.push(fixup);
732            }
733        }
734
735        self.pending_fixup_deadline = u32::MAX;
736
737        while let Some(fixup) = self.fixup_records.peek() {
738            // If this fixup shouldn't be applied, that means its label isn't
739            // defined yet and there'll be remaining space to apply a veneer if
740            // necessary in the future after this island. In that situation
741            // because `fixup_records` is sorted by deadline this loop can
742            // exit.
743            if !self.should_apply_fixup(fixup, forced_threshold) {
744                break;
745            }
746            let fixup = self.fixup_records.pop().unwrap();
747            self.handle_fixup(fixup);
748        }
749    }
750
751    fn finish_emission_maybe_forcing_veneers(&mut self) {
752        while !self.pending_constants.is_empty()
753            || !self.pending_fixup_records.is_empty()
754            || !self.fixup_records.is_empty()
755        {
756            // `emit_island()` will emit any pending veneers and constants, and
757            // as a side-effect, will also take care of any fixups with resolved
758            // labels eagerly.
759            self.emit_island(u32::MAX);
760        }
761    }
762
763    fn finish_constants(&mut self) -> u32 {
764        let mut alignment = 32;
765
766        for (constant, offset) in core::mem::take(&mut self.used_constants) {
767            let constant = &self.constants[constant.0 as usize].0;
768            let data = constant.as_slice();
769            self.data[offset as usize..][..data.len()].copy_from_slice(data);
770            alignment = constant.alignment().max(alignment);
771        }
772
773        alignment as _
774    }
775
776    fn resolve_patch_catalog(&self, validate_ranges: bool) -> Result<PatchCatalog, AsmError> {
777        let mut blocks = SmallVec::new();
778        let mut sites = SmallVec::new();
779
780        for block in &self.patch_blocks {
781            blocks.push(PatchBlock {
782                offset: block.offset,
783                size: block.size,
784                align: block.align,
785            });
786        }
787
788        for site in &self.patch_sites {
789            let target_offset = match site.target {
790                PendingPatchTarget::Offset(offset) => offset,
791                PendingPatchTarget::Label(label) => self.label_offset(label),
792            };
793
794            if target_offset == u32::MAX {
795                return Err(AsmError::InvalidState);
796            }
797
798            if validate_ranges && !site.kind.can_reach(site.offset, target_offset) {
799                return Err(AsmError::TooLarge);
800            }
801
802            sites.push(PatchSite {
803                offset: site.offset,
804                kind: site.kind,
805                current_target: target_offset,
806                addend: site.addend,
807            });
808        }
809
810        Ok(PatchCatalog::with_parts(self.env.arch(), blocks, sites))
811    }
812
813    pub fn finish_patched(mut self) -> Result<CodeBufferFinalized, AsmError> {
814        self.finish_emission_maybe_forcing_veneers();
815        let patch_catalog = self.resolve_patch_catalog(true)?;
816        let alignment = self.finish_constants();
817        Ok(CodeBufferFinalized {
818            data: self.data,
819            relocs: self.relocs,
820            symbols: self.symbols,
821            alignment,
822            patch_catalog,
823        })
824    }
825
826    pub fn finish(&mut self) -> CodeBufferFinalized {
827        self.finish_emission_maybe_forcing_veneers();
828        let patch_catalog = self
829            .resolve_patch_catalog(false)
830            .expect("patch metadata must be validated at registration time");
831        let alignment = self.finish_constants();
832        CodeBufferFinalized {
833            data: self.data.clone(),
834            relocs: self.relocs.clone(),
835            symbols: self.symbols.clone(),
836            alignment,
837            patch_catalog,
838        }
839    }
840}
841
842#[derive(Clone, PartialEq, Eq, Debug, Hash)]
843pub enum ConstantData {
844    WellKnown(&'static [u8]),
845    U64([u8; 8]),
846    Bytes(Vec<u8>),
847}
848
849impl ConstantData {
850    pub fn as_slice(&self) -> &[u8] {
851        match self {
852            ConstantData::WellKnown(data) => data,
853            ConstantData::U64(data) => data.as_ref(),
854            ConstantData::Bytes(data) => data,
855        }
856    }
857
858    pub fn alignment(&self) -> usize {
859        if self.as_slice().len() <= 8 { 8 } else { 16 }
860    }
861}
862
863/// A use of a constant by one or mroe assembly instructions.
864#[derive(Clone, Copy, Debug, PartialEq, Eq)]
865pub struct Constant(pub(crate) u32);
866
867impl From<&'static str> for ConstantData {
868    fn from(value: &'static str) -> Self {
869        Self::WellKnown(value.as_bytes())
870    }
871}
872
873impl From<[u8; 8]> for ConstantData {
874    fn from(value: [u8; 8]) -> Self {
875        Self::U64(value)
876    }
877}
878
879impl From<Vec<u8>> for ConstantData {
880    fn from(value: Vec<u8>) -> Self {
881        Self::Bytes(value)
882    }
883}
884
885impl From<&'static [u8]> for ConstantData {
886    fn from(value: &'static [u8]) -> Self {
887        Self::WellKnown(value)
888    }
889}
890
891impl From<u64> for ConstantData {
892    fn from(value: u64) -> Self {
893        Self::U64(value.to_ne_bytes())
894    }
895}
896
897/// Offset in bytes from the beginning of the function.
898///
899/// Cranelift can be used as a cross compiler, so we don't want to use a type like `usize` which
900/// depends on the *host* platform, not the *target* platform.
901pub type CodeOffset = u32;
902
903/// Addend to add to the symbol value.
904pub type Addend = i64;
905
906/// Relocation kinds for every ISA
907#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
908pub enum Reloc {
909    /// absolute 4-byte
910    Abs4,
911    /// absolute 8-byte
912    Abs8,
913    /// x86 PC-relative 4-byte
914    X86PCRel4,
915    /// x86 call to PC-relative 4-byte
916    X86CallPCRel4,
917    /// x86 call to PLT-relative 4-byte
918    X86CallPLTRel4,
919    /// x86 GOT PC-relative 4-byte
920    X86GOTPCRel4,
921    /// The 32-bit offset of the target from the beginning of its section.
922    /// Equivalent to `IMAGE_REL_AMD64_SECREL`.
923    /// See: [PE Format](https://docs.microsoft.com/en-us/windows/win32/debug/pe-format)
924    X86SecRel,
925    /// Arm32 call target
926    Arm32Call,
927    /// Arm64 call target. Encoded as bottom 26 bits of instruction. This
928    /// value is sign-extended, multiplied by 4, and added to the PC of
929    /// the call instruction to form the destination address.
930    Arm64Call,
931
932    /// Elf x86_64 32 bit signed PC relative offset to two GOT entries for GD symbol.
933    ElfX86_64TlsGd,
934
935    /// Mach-O x86_64 32 bit signed PC relative offset to a `__thread_vars` entry.
936    MachOX86_64Tlv,
937
938    /// Mach-O Aarch64 TLS
939    /// PC-relative distance to the page of the TLVP slot.
940    MachOAarch64TlsAdrPage21,
941
942    /// Mach-O Aarch64 TLS
943    /// Offset within page of TLVP slot.
944    MachOAarch64TlsAdrPageOff12,
945
946    /// Aarch64 TLSDESC Adr Page21
947    /// This is equivalent to `R_AARCH64_TLSDESC_ADR_PAGE21` in the [aaelf64](https://github.com/ARM-software/abi-aa/blob/2bcab1e3b22d55170c563c3c7940134089176746/aaelf64/aaelf64.rst#57105thread-local-storage-descriptors)
948    Aarch64TlsDescAdrPage21,
949
950    /// Aarch64 TLSDESC Ld64 Lo12
951    /// This is equivalent to `R_AARCH64_TLSDESC_LD64_LO12` in the [aaelf64](https://github.com/ARM-software/abi-aa/blob/2bcab1e3b22d55170c563c3c7940134089176746/aaelf64/aaelf64.rst#57105thread-local-storage-descriptors)
952    Aarch64TlsDescLd64Lo12,
953
954    /// Aarch64 TLSDESC Add Lo12
955    /// This is equivalent to `R_AARCH64_TLSGD_ADD_LO12` in the [aaelf64](https://github.com/ARM-software/abi-aa/blob/2bcab1e3b22d55170c563c3c7940134089176746/aaelf64/aaelf64.rst#57105thread-local-storage-descriptors)
956    Aarch64TlsDescAddLo12,
957
958    /// Aarch64 TLSDESC Call
959    /// This is equivalent to `R_AARCH64_TLSDESC_CALL` in the [aaelf64](https://github.com/ARM-software/abi-aa/blob/2bcab1e3b22d55170c563c3c7940134089176746/aaelf64/aaelf64.rst#57105thread-local-storage-descriptors)
960    Aarch64TlsDescCall,
961
962    /// AArch64 GOT Page
963    /// Set the immediate value of an ADRP to bits 32:12 of X; check that –2^32 <= X < 2^32
964    /// This is equivalent to `R_AARCH64_ADR_GOT_PAGE` (311) in the  [aaelf64](https://github.com/ARM-software/abi-aa/blob/2bcab1e3b22d55170c563c3c7940134089176746/aaelf64/aaelf64.rst#static-aarch64-relocations)
965    Aarch64AdrGotPage21,
966
967    /// AArch64 GOT Low bits
968
969    /// Set the LD/ST immediate field to bits 11:3 of X. No overflow check; check that X&7 = 0
970    /// This is equivalent to `R_AARCH64_LD64_GOT_LO12_NC` (312) in the  [aaelf64](https://github.com/ARM-software/abi-aa/blob/2bcab1e3b22d55170c563c3c7940134089176746/aaelf64/aaelf64.rst#static-aarch64-relocations)
971    Aarch64Ld64GotLo12Nc,
972
973    /// Equivalent of `R_AARCH64_ADR_PREL_PG_HI21`.
974    Aarch64AdrPrelPgHi21,
975    /// Equivalent of `R_AARCH64_ADD_ABS_LO12_NC`.
976    Aarch64AddAbsLo12Nc,
977
978    /// RISC-V Absolute address: 64-bit address.
979    RiscvAbs8,
980
981    /// RISC-V Call PLT: 32-bit PC-relative function call, macros call, tail (PIC)
982    ///
983    /// Despite having PLT in the name, this relocation is also used for normal calls.
984    /// The non-PLT version of this relocation has been deprecated.
985    ///
986    /// This is the `R_RISCV_CALL_PLT` relocation from the RISC-V ELF psABI document.
987    /// <https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#procedure-calls>
988    RiscvCallPlt,
989
990    /// RISC-V TLS GD: High 20 bits of 32-bit PC-relative TLS GD GOT reference,
991    ///
992    /// This is the `R_RISCV_TLS_GD_HI20` relocation from the RISC-V ELF psABI document.
993    /// <https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#global-dynamic>
994    RiscvTlsGdHi20,
995
996    /// Low 12 bits of a 32-bit PC-relative relocation (I-Type instruction)
997    ///
998    /// This is the `R_RISCV_PCREL_LO12_I` relocation from the RISC-V ELF psABI document.
999    /// <https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#pc-relative-symbol-addresses>
1000    RiscvPCRelLo12I,
1001
1002    /// High 20 bits of a 32-bit PC-relative GOT offset relocation
1003    ///
1004    /// This is the `R_RISCV_GOT_HI20` relocation from the RISC-V ELF psABI document.
1005    /// <https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#pc-relative-symbol-addresses>
1006    RiscvGotHi20,
1007}
1008
1009#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)]
1010pub enum LabelUse {
1011    X86JmpRel32,
1012    /// 20-bit branch offset (unconditional branches). PC-rel, offset is
1013    /// imm << 1. Immediate is 20 signed bits. Use in Jal instructions.
1014    RVJal20,
1015    /// The unconditional jump instructions all use PC-relative
1016    /// addressing to help support position independent code. The JALR
1017    /// instruction was defined to enable a two-instruction sequence to
1018    /// jump anywhere in a 32-bit absolute address range. A LUI
1019    /// instruction can first load rs1 with the upper 20 bits of a
1020    /// target address, then JALR can add in the lower bits. Similarly,
1021    /// AUIPC then JALR can jump anywhere in a 32-bit pc-relative
1022    /// address range.
1023    RVPCRel32,
1024
1025    /// All branch instructions use the B-type instruction format. The
1026    /// 12-bit B-immediate encodes signed offsets in multiples of 2, and
1027    /// is added to the current pc to give the target address. The
1028    /// conditional branch range is ±4 KiB.
1029    RVB12,
1030
1031    /// Equivalent to the `R_RISCV_PCREL_HI20` relocation, Allows setting
1032    /// the immediate field of an `auipc` instruction.
1033    RVPCRelHi20,
1034
1035    /// Similar to the `R_RISCV_PCREL_LO12_I` relocation but pointing to
1036    /// the final address, instead of the `PCREL_HI20` label. Allows setting
1037    /// the immediate field of I Type instructions such as `addi` or `lw`.
1038    ///
1039    /// Since we currently don't support offsets in labels, this relocation has
1040    /// an implicit offset of 4.
1041    RVPCRelLo12I,
1042
1043    /// 11-bit PC-relative jump offset. Equivalent to the `RVC_JUMP` relocation
1044    RVCJump,
1045    /// 9-bit PC-relative branch offset.
1046    RVCB9,
1047    /// 14-bit branch offset (conditional branches). PC-rel, offset is imm <<
1048    /// 2. Immediate is 14 signed bits, in bits 18:5. Used by tbz and tbnz.
1049    A64Branch14,
1050    /// 19-bit branch offset (conditional branches). PC-rel, offset is imm << 2. Immediate is 19
1051    /// signed bits, in bits 23:5. Used by cbz, cbnz, b.cond.
1052    A64Branch19,
1053    /// 26-bit branch offset (unconditional branches). PC-rel, offset is imm << 2. Immediate is 26
1054    /// signed bits, in bits 25:0. Used by b, bl.
1055    A64Branch26,
1056    /// 19-bit offset for LDR (load literal). PC-rel, offset is imm << 2. Immediate is 19 signed bits,
1057    /// in bits 23:5.
1058    A64Ldr19,
1059    /// 21-bit offset for ADR (get address of label). PC-rel, offset is not shifted. Immediate is
1060    /// 21 signed bits, with high 19 bits in bits 23:5 and low 2 bits in bits 30:29.
1061    A64Adr21,
1062    /// 21-bit offset for ADRP (get address of label). PC-rel, offset is shifted. Immediate is
1063    /// 21 signed bits, with high 19 bits in bits 23:5 and low 2 bits in bits 30:29.
1064    A64Adrp21,
1065    A64Ldr12,
1066
1067    A64AddAbsLo12,
1068}
1069
1070impl LabelUse {
1071    pub fn can_reach(&self, use_offset: CodeOffset, label_offset: CodeOffset) -> bool {
1072        let delta = (label_offset as i64) - (use_offset as i64);
1073
1074        match self {
1075            Self::X86JmpRel32 => {
1076                let disp = delta - 4;
1077                i32::try_from(disp).is_ok()
1078            }
1079            Self::RVJal20 => delta % 2 == 0 && (-(1 << 20)..=((1 << 20) - 2)).contains(&delta),
1080            Self::RVB12 => delta % 2 == 0 && (-(1 << 12)..=((1 << 12) - 2)).contains(&delta),
1081            Self::RVCJump => delta % 2 == 0 && (-(1 << 11)..=((1 << 11) - 2)).contains(&delta),
1082            Self::RVCB9 => delta % 2 == 0 && (-(1 << 8)..=((1 << 8) - 2)).contains(&delta),
1083            Self::RVPCRelHi20 | Self::RVPCRelLo12I | Self::RVPCRel32 => {
1084                i32::try_from(delta).is_ok()
1085            }
1086            Self::A64Branch14 => delta % 4 == 0 && (-(1 << 15)..=((1 << 15) - 4)).contains(&delta),
1087            Self::A64Branch19 | Self::A64Ldr19 => {
1088                delta % 4 == 0 && (-(1 << 20)..=((1 << 20) - 4)).contains(&delta)
1089            }
1090            Self::A64Branch26 => delta % 4 == 0 && (-(1 << 27)..=((1 << 27) - 4)).contains(&delta),
1091            Self::A64Adr21 => (-(1 << 20)..=((1 << 20) - 1)).contains(&delta),
1092            Self::A64Adrp21 => {
1093                let page_delta = ((label_offset & !0xfff) as i64) - ((use_offset & !0xfff) as i64);
1094                page_delta % 4096 == 0 && (-(1 << 32)..=((1 << 32) - 4096)).contains(&page_delta)
1095            }
1096
1097            Self::A64AddAbsLo12 => {
1098                delta % 4096 == delta && (-(1 << 12)..=(1 << 12) - 1).contains(&delta)
1099            }
1100
1101            Self::A64Ldr12 => true,
1102        }
1103    }
1104
1105    /// Maximum PC-relative range (positive), inclusive.
1106    pub const fn max_pos_range(self) -> CodeOffset {
1107        match self {
1108            LabelUse::RVJal20 => ((1 << 19) - 1) * 2,
1109            LabelUse::RVPCRelLo12I | LabelUse::RVPCRelHi20 | LabelUse::RVPCRel32 => {
1110                let imm20_max: i64 = ((1 << 19) - 1) << 12;
1111                let imm12_max = (1 << 11) - 1;
1112                (imm20_max + imm12_max) as _
1113            }
1114            LabelUse::RVB12 => ((1 << 11) - 1) * 2,
1115            LabelUse::RVCB9 => ((1 << 8) - 1) * 2,
1116            LabelUse::RVCJump => ((1 << 10) - 1) * 2,
1117            LabelUse::X86JmpRel32 => i32::MAX as _,
1118            _ => u32::MAX,
1119        }
1120    }
1121
1122    pub const fn max_neg_range(self) -> CodeOffset {
1123        match self {
1124            LabelUse::RVPCRel32 => {
1125                let imm20_max: i64 = (1 << 19) << 12;
1126                let imm12_max = 1 << 11;
1127                (-imm20_max - imm12_max) as CodeOffset
1128            }
1129            _ => self.max_pos_range() + 2,
1130        }
1131    }
1132
1133    pub const fn patch_size(&self) -> usize {
1134        match self {
1135            Self::X86JmpRel32 => 4,
1136            Self::RVCJump | Self::RVCB9 => 2,
1137            Self::RVJal20 | Self::RVB12 | Self::RVPCRelHi20 | Self::RVPCRelLo12I => 4,
1138            Self::RVPCRel32 => 8,
1139            _ => 4,
1140        }
1141    }
1142
1143    pub const fn align(&self) -> usize {
1144        match self {
1145            Self::X86JmpRel32 => 1,
1146            Self::RVCJump => 4,
1147            Self::RVJal20 | Self::RVB12 | Self::RVCB9 | Self::RVPCRelHi20 | Self::RVPCRelLo12I => 4,
1148            Self::RVPCRel32 => 4,
1149            _ => 4,
1150        }
1151    }
1152
1153    pub const fn supports_veneer(&self) -> bool {
1154        matches!(self, Self::RVB12 | Self::RVJal20 | Self::RVCJump)
1155    }
1156
1157    pub const fn veneer_size(&self) -> usize {
1158        match self {
1159            Self::RVB12 | Self::RVJal20 | Self::RVCJump => 8,
1160            _ => unreachable!(),
1161        }
1162    }
1163
1164    pub fn generate_veneer(
1165        &self,
1166        buffer: &mut [u8],
1167        veneer_offset: CodeOffset,
1168    ) -> (CodeOffset, Self) {
1169        if matches!(
1170            self,
1171            Self::RVB12
1172                | Self::RVCJump
1173                | Self::RVJal20
1174                | Self::RVPCRelHi20
1175                | Self::RVPCRelLo12I
1176                | Self::RVPCRel32
1177        ) {
1178            #[cfg(not(feature = "riscv"))]
1179            {
1180                let _ = (buffer, veneer_offset);
1181                panic!("RISC-V veneers aren't supported without the `riscv` feature");
1182            }
1183            #[cfg(feature = "riscv")]
1184            {
1185                let base = riscv::X31;
1186
1187                {
1188                    let x = riscv::Inst::new(riscv::Opcode::AUIPC)
1189                        .encode()
1190                        .set_rd(base.id())
1191                        .value
1192                        .to_le_bytes();
1193                    buffer[0] = x[0];
1194                    buffer[1] = x[1];
1195                    buffer[2] = x[2];
1196                    buffer[3] = x[3];
1197                }
1198
1199                {
1200                    let x = riscv::Inst::new(riscv::Opcode::JALR)
1201                        .encode()
1202                        .set_rd(riscv::ZERO.id())
1203                        .set_rs1(base.id())
1204                        .value
1205                        .to_le_bytes();
1206                    buffer[4] = x[0];
1207                    buffer[5] = x[1];
1208                    buffer[6] = x[2];
1209                    buffer[7] = x[3];
1210                }
1211
1212                (veneer_offset, LabelUse::RVPCRel32)
1213            }
1214        } else {
1215            #[cfg(not(feature = "aarch64"))]
1216            {
1217                panic!("AArch64 veneers aren't supported without the `aarch64` feature");
1218            }
1219
1220            #[cfg(feature = "aarch64")]
1221            match self {
1222                LabelUse::A64Branch14 | LabelUse::A64Branch19 => {
1223                    // veneer is a Branch26 (unconditional branch). Just encode directly here -- don't
1224                    // bother with constructing an Inst.
1225                    let insn_word = 0b000101 << 26;
1226                    buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn_word));
1227                    (veneer_offset, LabelUse::A64Branch26)
1228                }
1229
1230                LabelUse::A64Branch26 => {
1231                    todo!()
1232                }
1233
1234                _ => todo!(),
1235            }
1236        }
1237    }
1238    pub fn patch(&self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
1239        let addend = match self {
1240            Self::X86JmpRel32 => i64::from(u32::from_le_bytes([
1241                buffer[0], buffer[1], buffer[2], buffer[3],
1242            ])),
1243            _ => 0,
1244        };
1245
1246        self.patch_with_addend(buffer, use_offset, label_offset, addend);
1247    }
1248
1249    pub fn patch_with_addend(
1250        &self,
1251        buffer: &mut [u8],
1252        use_offset: CodeOffset,
1253        label_offset: CodeOffset,
1254        addend: i64,
1255    ) {
1256        let pc_reli = (label_offset as i64) - (use_offset as i64);
1257
1258        let pc_rel = pc_reli as u32;
1259
1260        match self {
1261            Self::X86JmpRel32 => {
1262                let value = pc_rel.wrapping_add(addend as u32).wrapping_sub(4);
1263
1264                buffer.copy_from_slice(&value.to_le_bytes());
1265            }
1266
1267            Self::RVJal20 => {
1268                let insn = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1269                let offset = pc_rel;
1270                let v = ((offset >> 12 & 0b1111_1111) << 12)
1271                    | ((offset >> 11 & 0b1) << 20)
1272                    | ((offset >> 1 & 0b11_1111_1111) << 21)
1273                    | ((offset >> 20 & 0b1) << 31);
1274                buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn | v));
1275            }
1276
1277            Self::RVPCRel32 => {
1278                #[cfg(feature = "riscv")]
1279                {
1280                    let (imm20, imm12) = generate_imm(pc_rel as u64);
1281                    let insn = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1282                    let insn2 = u32::from_le_bytes([buffer[4], buffer[5], buffer[6], buffer[7]]);
1283
1284                    let auipc = riscv::Inst::new(riscv::Opcode::AUIPC).encode().set_imm20(0);
1285                    let jalr = riscv::Inst::new(riscv::Opcode::JALR)
1286                        .encode()
1287                        .set_rd(0)
1288                        .set_rs1(0)
1289                        .set_imm12(0);
1290
1291                    buffer[0..4].copy_from_slice(&(insn | auipc.value | imm20).to_le_bytes());
1292                    buffer[4..8].copy_from_slice(&(insn2 | jalr.value | imm12).to_le_bytes());
1293                }
1294                #[cfg(not(feature = "riscv"))]
1295                {
1296                    panic!("RISC-V veneers aren't supported without the `riscv` feature");
1297                }
1298            }
1299
1300            Self::RVB12 => {
1301                #[cfg(feature = "riscv")]
1302                {
1303                    let insn = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1304                    let offset = pc_rel;
1305                    let v = ((offset >> 11 & 0b1) << 7)
1306                        | ((offset >> 1 & 0b1111) << 8)
1307                        | ((offset >> 5 & 0b11_1111) << 25)
1308                        | ((offset >> 12 & 0b1) << 31);
1309                    buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn | v));
1310                }
1311                #[cfg(not(feature = "riscv"))]
1312                {
1313                    panic!("RISC-V veneers aren't supported without the `riscv` feature");
1314                }
1315            }
1316
1317            Self::RVPCRelHi20 => {
1318                // See https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#pc-relative-symbol-addresses
1319                //
1320                // We need to add 0x800 to ensure that we land at the next page as soon as it goes out of range for the
1321                // Lo12 relocation. That relocation is signed and has a maximum range of -2048..2047. So when we get an
1322                // offset of 2048, we need to land at the next page and subtract instead.
1323                let offset = pc_reli as u32;
1324                let insn = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1325                let hi20 = offset.wrapping_add(0x800) >> 12;
1326                let insn = (insn & 0xfff) | (hi20 << 12);
1327                buffer[0..4].copy_from_slice(&insn.to_le_bytes());
1328            }
1329
1330            Self::RVPCRelLo12I => {
1331                // `offset` is the offset from the current instruction to the target address.
1332                //
1333                // However we are trying to compute the offset to the target address from the previous instruction.
1334                // The previous instruction should be the one that contains the PCRelHi20 relocation and
1335                // stores/references the program counter (`auipc` usually).
1336                //
1337                // Since we are trying to compute the offset from the previous instruction, we can
1338                // represent it as offset = target_address - (current_instruction_address - 4)
1339                // which is equivalent to offset = target_address - current_instruction_address + 4.
1340                //
1341                let insn = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1342
1343                let lo12 = (pc_reli + 4) as u32 & 0xfff;
1344                let insn = (insn & 0xFFFFF) | (lo12 << 20);
1345                buffer[0..4].copy_from_slice(&insn.to_le_bytes());
1346            }
1347
1348            Self::RVCJump => {
1349                debug_assert!(pc_rel & 1 == 0);
1350
1351                #[cfg(feature = "riscv")]
1352                {
1353                    let insn = riscv::Inst::new(riscv::Opcode::CJ)
1354                        .encode()
1355                        .set_c_imm12(pc_rel as _);
1356                    buffer[0..2].clone_from_slice(&(insn.value as u16).to_le_bytes());
1357                }
1358                #[cfg(not(feature = "riscv"))]
1359                {
1360                    panic!("RISC-V jumps aren't supported without the `riscv` feature");
1361                }
1362            }
1363
1364            Self::RVCB9 => {
1365                debug_assert!(pc_rel & 1 == 0);
1366
1367                #[cfg(feature = "riscv")]
1368                {
1369                    let insn = riscv::Inst::new(riscv::Opcode::BEQZ)
1370                        .encode()
1371                        .set_c_bimm9lohi(pc_rel as _);
1372                    buffer[0..2].clone_from_slice(&(insn.value as u16).to_le_bytes());
1373                }
1374                #[cfg(not(feature = "riscv"))]
1375                {
1376                    panic!("RISC-V veneers aren't supported without the `riscv` feature");
1377                }
1378            }
1379
1380            Self::A64Branch14 => {
1381                debug_assert!(pc_reli & 0b11 == 0);
1382
1383                let insn = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1384                let imm14 = ((pc_reli >> 2) as i32 as u32) & 0x3fff;
1385                let insn = (insn & !0x0007ffe0) | (imm14 << 5);
1386                buffer[0..4].copy_from_slice(&insn.to_le_bytes());
1387            }
1388
1389            Self::A64Branch19 | Self::A64Ldr19 => {
1390                debug_assert!(pc_reli & 0b11 == 0);
1391
1392                let insn = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1393                let imm19 = ((pc_reli >> 2) as i32 as u32) & 0x7ffff;
1394                let insn = (insn & !0x00ffffe0) | (imm19 << 5);
1395                buffer[0..4].copy_from_slice(&insn.to_le_bytes());
1396            }
1397
1398            Self::A64Branch26 => {
1399                debug_assert!(pc_reli & 0b11 == 0);
1400
1401                let insn = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1402                let imm26 = ((pc_reli >> 2) as i32 as u32) & 0x03ff_ffff;
1403                let insn = (insn & !0x03ff_ffff) | imm26;
1404                buffer[0..4].copy_from_slice(&insn.to_le_bytes());
1405            }
1406
1407            Self::A64Adr21 => {
1408                let insn = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1409                let imm21 = (pc_reli as i32 as u32) & 0x1f_ffff;
1410                let immlo = imm21 & 0x3;
1411                let immhi = (imm21 >> 2) & 0x7ffff;
1412                let insn = (insn & !0x60ff_ffe0) | (immlo << 29) | (immhi << 5);
1413                buffer[0..4].copy_from_slice(&insn.to_le_bytes());
1414            }
1415
1416            Self::A64Adrp21 => {
1417                let insn = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1418
1419                // 1. Calculate the page-aligned PC and Target
1420                let pc_page = (use_offset as i64) & !0xFFF;
1421                let target_page = ((label_offset as i64) + addend) & !0xFFF;
1422
1423                // 2. Calculate the offset in pages
1424                let page_offset = (target_page - pc_page) >> 12;
1425
1426                // 3. Encode the 21-bit signed immediate
1427                let imm21 = (page_offset as u32) & 0x1F_FFFF;
1428                let immlo = imm21 & 0x3; // Lowest 2 bits
1429                let immhi = (imm21 >> 2) & 0x7FFFF; // Upper 19 bits
1430
1431                // 4. Clear existing immediate bits and insert new ones
1432                // Bits 29..31 (immlo) and Bits 5..24 (immhi)
1433                let insn = (insn & !0x60FF_FFE0) | (immlo << 29) | (immhi << 5);
1434
1435                buffer[0..4].copy_from_slice(&insn.to_le_bytes());
1436            }
1437
1438            Self::A64AddAbsLo12 => {
1439                let insn = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1440
1441                let imm12 = ((pc_reli as i32 as u32) & 0xfff) << 10;
1442                let insn = insn | imm12;
1443                buffer[0..4].copy_from_slice(&insn.to_le_bytes());
1444            }
1445
1446            _ => todo!(),
1447        }
1448    }
1449}
1450
1451pub const fn is_imm12(val: i64) -> bool {
1452    val >= -2048 && val <= 2047
1453}
1454
1455#[allow(dead_code)]
1456pub(crate) fn generate_imm(value: u64) -> (u32, u32) {
1457    #[cfg(not(feature = "riscv"))]
1458    {
1459        let _ = value;
1460        panic!("Can't generate RISC-V immediates without the `riscv` feature");
1461    }
1462    #[cfg(feature = "riscv")]
1463    {
1464        if is_imm12(value as _) {
1465            return (
1466                0,
1467                riscv::InstructionValue::new(0)
1468                    .set_imm12(value as i64 as i32)
1469                    .value,
1470            );
1471        }
1472
1473        let value = value as i64;
1474
1475        let mod_num = 4096i64;
1476        let (imm20, imm12) = if value > 0 {
1477            let mut imm20 = value / mod_num;
1478            let mut imm12 = value % mod_num;
1479
1480            if imm12 >= 2048 {
1481                imm12 -= mod_num;
1482                imm20 += 1;
1483            }
1484
1485            (imm20, imm12)
1486        } else {
1487            let value_abs = value.abs();
1488            let imm20 = value_abs / mod_num;
1489            let imm12 = value_abs % mod_num;
1490            let mut imm20 = -imm20;
1491            let mut imm12 = -imm12;
1492            if imm12 < -2048 {
1493                imm12 += mod_num;
1494                imm20 -= 1;
1495            }
1496            (imm20, imm12)
1497        };
1498        (
1499            riscv::InstructionValue::new(0).set_imm20(imm20 as _).value,
1500            riscv::InstructionValue::new(0).set_imm12(imm12 as _).value,
1501        )
1502    }
1503}
1504
1505/// A generic implementation of relocation resolving.
1506///
1507/// # NOTE
1508///
1509/// Very simple and incomplete. At the moment only Abs4, Abs8, X86 and RISC-V GOT relocations are supported.
1510///
1511/// # Safety
1512///
1513/// Code and code_rx must be valid pointers to the beginning of the code section. They are used to compute the addresses of the instructions to patch.
1514///
1515/// get_address, get_got_entry and get_plt_entry must return valid pointers to the target addresses for the given relocation targets.
1516pub unsafe fn perform_relocations(
1517    code: *mut u8,
1518    code_rx: *const u8,
1519    relocs: &[AsmReloc],
1520    get_address: impl Fn(&RelocTarget) -> *const u8,
1521    get_got_entry: impl Fn(&RelocTarget) -> *const u8,
1522    get_plt_entry: impl Fn(&RelocTarget) -> *const u8,
1523) {
1524    use core::ptr::write_unaligned;
1525
1526    for &AsmReloc {
1527        addend,
1528        kind,
1529        offset,
1530        ref target,
1531    } in relocs
1532    {
1533        let at = unsafe { code.offset(isize::try_from(offset).unwrap()) };
1534        let atrx = unsafe { code_rx.offset(isize::try_from(offset).unwrap()) };
1535        match kind {
1536            Reloc::Abs4 => {
1537                let base = get_address(target);
1538                let what = unsafe { base.offset(isize::try_from(addend).unwrap()) };
1539                unsafe {
1540                    write_unaligned(at as *mut u32, u32::try_from(what as usize).unwrap());
1541                }
1542            }
1543
1544            Reloc::Abs8 => {
1545                let base = get_address(target);
1546                let what = unsafe { base.offset(isize::try_from(addend).unwrap()) };
1547                unsafe {
1548                    write_unaligned(at as *mut u64, u64::try_from(what as usize).unwrap());
1549                }
1550            }
1551
1552            Reloc::X86PCRel4 | Reloc::X86CallPCRel4 => {
1553                let base = get_address(target);
1554                let what = unsafe { base.offset(isize::try_from(addend).unwrap()) };
1555                let pcrel = i32::try_from((what as isize) - (atrx as isize)).unwrap();
1556
1557                unsafe {
1558                    write_unaligned(at as *mut i32, pcrel);
1559                }
1560            }
1561
1562            Reloc::X86GOTPCRel4 => {
1563                let base = get_got_entry(target);
1564                let what = unsafe { base.offset(isize::try_from(addend).unwrap()) };
1565                let pcrel = i32::try_from((what as isize) - (atrx as isize)).unwrap();
1566
1567                unsafe {
1568                    write_unaligned(at as *mut i32, pcrel);
1569                }
1570            }
1571
1572            Reloc::X86CallPLTRel4 => {
1573                let base = get_plt_entry(target);
1574                let what = unsafe { base.offset(isize::try_from(addend).unwrap()) };
1575                let pcrel = i32::try_from((what as isize) - (atrx as isize)).unwrap();
1576                unsafe { write_unaligned(at as *mut i32, pcrel) };
1577            }
1578
1579            Reloc::RiscvGotHi20 => {
1580                let base = get_got_entry(target);
1581                let what = unsafe { base.offset(isize::try_from(addend).unwrap()) };
1582                let pc_rel = i32::try_from((what as isize) - (atrx as isize)).unwrap();
1583                unsafe {
1584                    let buffer = core::slice::from_raw_parts_mut(at, 4);
1585                    let insn = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1586                    let hi20 = (pc_rel as u32).wrapping_add(0x800) >> 12;
1587                    let insn = (insn & 0xfff) | (hi20 << 12);
1588                    buffer.copy_from_slice(&insn.to_le_bytes());
1589                }
1590            }
1591
1592            Reloc::RiscvPCRelLo12I => {
1593                let base = get_got_entry(target);
1594                let what = unsafe { base.offset(isize::try_from(addend).unwrap()) };
1595                let pc_rel = i32::try_from((what as isize) - (atrx as isize)).unwrap();
1596
1597                unsafe {
1598                    let buffer = core::slice::from_raw_parts_mut(at, 4);
1599                    let insn = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1600                    let lo12 = (pc_rel + 4) as u32 & 0xfff;
1601                    let insn = (insn & 0xFFFFF) | (lo12 << 20);
1602                    buffer.copy_from_slice(&insn.to_le_bytes());
1603                }
1604            }
1605
1606            Reloc::RiscvCallPlt => {
1607                #[cfg(not(feature = "riscv"))]
1608                {
1609                    panic!("RISC-V calls aren't supported without the `riscv` feature");
1610                }
1611                #[cfg(feature = "riscv")]
1612                {
1613                    // A R_RISCV_CALL_PLT relocation expects auipc+jalr instruction pair.
1614                    // It is the equivalent of two relocations:
1615                    // 1. R_RISCV_PCREL_HI20 on the `auipc`
1616                    // 2. R_RISCV_PCREL_LO12_I on the `jalr`
1617
1618                    let base = get_address(target);
1619                    let what = unsafe { base.offset(isize::try_from(addend).unwrap()) };
1620                    let pcrel = i32::try_from((what as isize) - (atrx as isize)).unwrap();
1621
1622                    // See https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#pc-relative-symbol-addresses
1623                    // for a better explanation of the following code.
1624                    //
1625                    // Unlike the regular symbol relocations, here both "sub-relocations" point to the same address.
1626                    //
1627                    // `pcrel` is a signed value (+/- 2GiB range), when splitting it into two parts, we need to
1628                    // ensure that `hi20` is close enough to `pcrel` to be able to add `lo12` to it and still
1629                    // get a valid address.
1630                    //
1631                    // `lo12` is also a signed offset (+/- 2KiB range) relative to the `hi20` value.
1632                    //
1633                    // `hi20` should also be shifted right to be the "true" value. But we also need it
1634                    // left shifted for the `lo12` calculation and it also matches the instruction encoding.
1635                    let hi20 = pcrel.wrapping_add(0x800) as u32 & 0xFFFFF000u32;
1636                    let lo12 = (pcrel as u32).wrapping_sub(hi20) & 0xFFF;
1637
1638                    unsafe {
1639                        let auipc_addr = at as *mut u32;
1640                        let auipc = riscv::Inst::new(riscv::Opcode::AUIPC)
1641                            .encode()
1642                            .set_imm20(hi20 as _)
1643                            .value;
1644                        auipc_addr.write(auipc_addr.read() | auipc);
1645
1646                        let jalr_addr = at.offset(4) as *mut u32;
1647                        let jalr = riscv::Inst::new(riscv::Opcode::JALR)
1648                            .encode()
1649                            .set_imm12(lo12 as _)
1650                            .value;
1651                        jalr_addr.write(jalr_addr.read() | jalr);
1652                    }
1653                }
1654            }
1655
1656            Reloc::Aarch64AdrPrelPgHi21 => {
1657                let base = get_address(target);
1658                let what = unsafe { base.offset(isize::try_from(addend).unwrap()) };
1659                let get_page = |x| x & (!0xfff);
1660                // NOTE: This should technically be i33 given that this relocation type allows
1661                // a range from -4GB to +4GB, not -2GB to +2GB. But this doesn't really matter
1662                // as the target is unlikely to be more than 2GB from the adrp instruction. We
1663                // need to be careful to not cast to an unsigned int until after doing >> 12 to
1664                // compute the upper 21bits of the pcrel address however as otherwise the top
1665                // bit of the 33bit pcrel address would be forced 0 through zero extension
1666                // instead of being sign extended as it should be.
1667                let pcrel =
1668                    i32::try_from(get_page(what as isize) - get_page(atrx as isize)).unwrap();
1669                let iptr = at as *mut u32;
1670                let hi21 = (pcrel >> 12).cast_unsigned();
1671                let lo = (hi21 & 0x3) << 29;
1672                let hi = (hi21 & 0x1ffffc) << 3;
1673                unsafe {
1674                    let insn = iptr.read();
1675                    iptr.write(insn | lo | hi);
1676                }
1677            }
1678
1679            Reloc::Aarch64AddAbsLo12Nc => {
1680                let base = get_address(target);
1681                let what = unsafe { base.offset(isize::try_from(addend).unwrap()) };
1682                let iptr = at as *mut u32;
1683                let imm12 = (what.addr() as u32 & 0xfff) << 10;
1684                unsafe {
1685                    let insn = iptr.read();
1686                    iptr.write(insn | imm12);
1687                }
1688            }
1689
1690            Reloc::Aarch64AdrGotPage21 => {
1691                let base = get_got_entry(target);
1692                let what = unsafe { base.offset(isize::try_from(addend).unwrap()) };
1693                let get_page = |x| x & (!0xfff);
1694                let pcrel =
1695                    i32::try_from(get_page(what as isize) - get_page(atrx as isize)).unwrap();
1696                let iptr = at as *mut u32;
1697                let hi21 = (pcrel >> 12).cast_unsigned();
1698                let lo = (hi21 & 0x3) << 29;
1699                let hi = (hi21 & 0x1ffffc) << 3;
1700                unsafe {
1701                    let insn = iptr.read();
1702                    iptr.write(insn | lo | hi);
1703                }
1704            }
1705
1706            Reloc::Aarch64Ld64GotLo12Nc => {
1707                let base = get_got_entry(target);
1708                let what = unsafe { base.offset(isize::try_from(addend).unwrap()) };
1709                let iptr = at as *mut u32;
1710                let imm12 = ((what.addr() as u32 & 0xfff) >> 3) << 10;
1711                unsafe {
1712                    let insn = iptr.read();
1713                    iptr.write(insn | imm12);
1714                }
1715            }
1716
1717            _ => todo!(),
1718        }
1719    }
1720}