Skip to main content

asmkit/core/
buffer.rs

1use alloc::{borrow::Cow, collections::BinaryHeap, vec::Vec};
2
3use smallvec::SmallVec;
4
5use crate::{
6    riscv::{self},
7    AsmError,
8};
9
10use super::{
11    jit_allocator::{JitAllocator, Span},
12    operand::{Label, Sym},
13    patch::{
14        fill_with_nops, minimum_patch_alignment, PatchBlock, PatchBlockId, PatchCatalog, PatchSite,
15        PatchSiteId,
16    },
17    target::Environment,
18};
19
20/// A buffer of output to be produced, fixed up, and then emitted to a CodeSink
21/// in bulk.
22///
23/// This struct uses `SmallVec`s to support small-ish function bodies without
24/// any heap allocation. As such, it will be several kilobytes large. This is
25/// likely fine as long as it is stack-allocated for function emission then
26/// thrown away; but beware if many buffer objects are retained persistently.
27#[derive(Default)]
28pub struct CodeBuffer {
29    env: Environment,
30    data: SmallVec<[u8; 1024]>,
31    relocs: SmallVec<[AsmReloc; 16]>,
32    symbols: SmallVec<[SymData; 16]>,
33    label_offsets: SmallVec<[CodeOffset; 16]>,
34    pending_fixup_records: SmallVec<[AsmFixup; 16]>,
35    pending_fixup_deadline: u32,
36    pending_constants: SmallVec<[Constant; 16]>,
37    pending_constants_size: CodeOffset,
38    used_constants: SmallVec<[(Constant, CodeOffset); 4]>,
39    constants: SmallVec<[(ConstantData, AsmConstant); 4]>,
40    fixup_records: BinaryHeap<AsmFixup>,
41    patch_blocks: SmallVec<[PendingPatchBlock; 4]>,
42    patch_sites: SmallVec<[PendingPatchSite; 8]>,
43}
44
45#[derive(Clone, Copy)]
46struct PendingPatchBlock {
47    offset: CodeOffset,
48    size: CodeOffset,
49    align: CodeOffset,
50}
51
52#[derive(Clone, Copy)]
53enum PendingPatchTarget {
54    Offset(CodeOffset),
55    Label(Label),
56}
57
58#[derive(Clone, Copy)]
59struct PendingPatchSite {
60    offset: CodeOffset,
61    kind: LabelUse,
62    target: PendingPatchTarget,
63    addend: i64,
64}
65
66#[derive(Clone, PartialEq, Eq, Hash)]
67pub enum ExternalName {
68    Symbol(Cow<'static, str>),
69    UserName(u32),
70}
71#[derive(Clone, PartialEq, Eq, Hash, Debug)]
72pub enum RelocTarget {
73    Sym(Sym),
74    Label(Label),
75}
76
77#[derive(Copy, Clone, PartialEq, Eq)]
78pub enum RelocDistance {
79    Near,
80    Far,
81}
82
83#[derive(Clone, PartialEq, Eq)]
84pub(crate) struct SymData {
85    name: ExternalName,
86    distance: RelocDistance,
87}
88
89/// A relocation resulting from emitting assembly.
90#[derive(Clone, PartialEq, Eq, Hash, Debug)]
91pub struct AsmReloc {
92    pub offset: CodeOffset,
93    pub kind: Reloc,
94    pub addend: i64,
95    pub target: RelocTarget,
96}
97/// A fixup to perform on the buffer once code is emitted.
98/// Fixups always refer to labels and patch the code based on label offsets.
99/// Hence, they are like relocations, but internal to one buffer.
100#[derive(Clone, Copy, PartialEq, PartialOrd, Ord, Eq)]
101pub struct AsmFixup {
102    pub label: Label,
103    pub offset: CodeOffset,
104    pub kind: LabelUse,
105}
106
107/// Metadata about a constant.
108#[derive(Clone, Copy)]
109struct AsmConstant {
110    /// A label which has not yet been bound which can be used for this
111    /// constant.
112    ///
113    /// This is lazily created when a label is requested for a constant and is
114    /// cleared when a constant is emitted.
115    upcoming_label: Option<Label>,
116    /// Required alignment.
117    align: CodeOffset,
118    /// The byte size of this constant.
119    size: usize,
120}
121
122/// A `CodeBuffer` once emission is completed: holds generated code and records,
123/// without fixups. This allows the type to be independent of the backend.
124pub struct CodeBufferFinalized {
125    pub(crate) data: SmallVec<[u8; 1024]>,
126    pub(crate) relocs: SmallVec<[AsmReloc; 16]>,
127    pub(crate) symbols: SmallVec<[SymData; 16]>,
128    pub(crate) alignment: u32,
129    pub(crate) patch_catalog: PatchCatalog,
130}
131
132impl CodeBufferFinalized {
133    pub fn total_size(&self) -> usize {
134        self.data.len()
135    }
136
137    pub fn data(&self) -> &[u8] {
138        &self.data[..]
139    }
140
141    pub fn data_mut(&mut self) -> &mut [u8] {
142        &mut self.data[..]
143    }
144
145    pub fn symbol_name(&self, sym: Sym) -> &ExternalName {
146        &self.symbols[sym.id() as usize].name
147    }
148
149    pub fn symbol_distance(&self, sym: Sym) -> RelocDistance {
150        self.symbols[sym.id() as usize].distance
151    }
152
153    pub fn relocs(&self) -> &[AsmReloc] {
154        &self.relocs[..]
155    }
156
157    pub fn alignment(&self) -> u32 {
158        self.alignment
159    }
160
161    /// Allocate this code buffer in executable memory and return a `Span` referring to it.
162    /// This will also write the code into the allocated memory. To execute
163    /// code you can simply use [`span.rx()`](Span::rx) to get a pointer to read+exec memory
164    /// and transmute that to a function pointer of the appropriate type.
165    pub fn allocate(&self, jit_allocator: &mut JitAllocator) -> Result<Span, AsmError> {
166        let mut span = jit_allocator.alloc(self.data().len())?;
167
168        unsafe {
169            jit_allocator.write(&mut span, |span| {
170                span.rw()
171                    .copy_from_nonoverlapping(self.data().as_ptr(), self.data().len());
172            })?;
173        }
174
175        Ok(span)
176    }
177}
178
179impl CodeBuffer {
180    pub fn new() -> Self {
181        Self::default()
182    }
183
184    pub fn with_env(env: Environment) -> Self {
185        Self {
186            env,
187            ..Default::default()
188        }
189    }
190
191    pub fn clear(&mut self) {
192        self.data.clear();
193        self.relocs.clear();
194        self.label_offsets.clear();
195        self.pending_fixup_records.clear();
196        self.constants.clear();
197        self.fixup_records.clear();
198        self.symbols.clear();
199        self.used_constants.clear();
200        self.pending_fixup_deadline = 0;
201        self.pending_constants_size = 0;
202        self.pending_constants.clear();
203        self.patch_blocks.clear();
204        self.patch_sites.clear();
205    }
206    pub fn env(&self) -> &Environment {
207        &self.env
208    }
209
210    pub fn env_mut(&mut self) -> &mut Environment {
211        &mut self.env
212    }
213
214    pub fn data(&self) -> &[u8] {
215        &self.data
216    }
217
218    pub fn data_mut(&mut self) -> &mut [u8] {
219        &mut self.data
220    }
221
222    pub fn relocs(&self) -> &[AsmReloc] {
223        &self.relocs
224    }
225
226    pub fn put1(&mut self, value: u8) {
227        self.data.push(value);
228    }
229
230    pub fn put2(&mut self, value: u16) {
231        self.data.extend_from_slice(&value.to_ne_bytes());
232    }
233
234    pub fn put4(&mut self, value: u32) {
235        self.data.extend_from_slice(&value.to_ne_bytes());
236    }
237
238    pub fn put8(&mut self, value: u64) {
239        self.data.extend_from_slice(&value.to_ne_bytes());
240    }
241
242    pub fn write_u8(&mut self, value: u8) {
243        self.data.push(value);
244    }
245
246    pub fn write_u16(&mut self, value: u16) {
247        self.data.extend_from_slice(&value.to_ne_bytes());
248    }
249
250    pub fn write_u32(&mut self, value: u32) {
251        self.data.extend_from_slice(&value.to_ne_bytes());
252    }
253
254    pub fn write_u64(&mut self, value: u64) {
255        self.data.extend_from_slice(&value.to_ne_bytes());
256    }
257
258    pub fn add_symbol(&mut self, name: impl Into<ExternalName>, distance: RelocDistance) -> Sym {
259        let ix = self.symbols.len();
260        self.symbols.push(SymData {
261            distance,
262            name: name.into(),
263        });
264
265        Sym::from_id(ix as u32)
266    }
267
268    pub fn symbol_distance(&self, sym: Sym) -> RelocDistance {
269        self.symbols[sym.id() as usize].distance
270    }
271
272    pub fn symbol_name(&self, sym: Sym) -> &ExternalName {
273        &self.symbols[sym.id() as usize].name
274    }
275
276    pub fn get_label(&mut self) -> Label {
277        let l = self.label_offsets.len();
278        self.label_offsets.push(u32::MAX);
279        Label::from_id(l as _)
280    }
281
282    pub fn get_label_for_constant(&mut self, constant: Constant) -> Label {
283        let (
284            _,
285            AsmConstant {
286                upcoming_label,
287                align: _,
288                size,
289            },
290        ) = self.constants[constant.0 as usize];
291        if let Some(label) = upcoming_label {
292            return label;
293        }
294
295        let label = self.get_label();
296        self.pending_constants.push(constant);
297        self.pending_constants_size += size as u32;
298        self.constants[constant.0 as usize].1.upcoming_label = Some(label);
299        label
300    }
301
302    pub fn add_constant(&mut self, constant: impl Into<ConstantData>) -> Constant {
303        let c = self.constants.len() as u32;
304        let data = constant.into();
305        let x = AsmConstant {
306            upcoming_label: None,
307            align: data.alignment() as _,
308            size: data.as_slice().len(),
309        };
310        self.constants.push((data, x));
311        Constant(c)
312    }
313
314    pub fn use_label_at_offset(&mut self, offset: CodeOffset, label: Label, kind: LabelUse) {
315        let fixup = AsmFixup {
316            kind,
317            label,
318            offset,
319        };
320
321        self.pending_fixup_records.push(fixup);
322    }
323
324    /// Align up to the given alignment.
325    pub fn align_to(&mut self, align_to: CodeOffset) {
326        assert!(
327            align_to.is_power_of_two(),
328            "{align_to} is not a power of two"
329        );
330        while self.cur_offset() & (align_to - 1) != 0 {
331            self.write_u8(0);
332        }
333
334        // Post-invariant: as for `put1()`.
335    }
336
337    pub fn cur_offset(&self) -> CodeOffset {
338        self.data.len() as _
339    }
340
341    pub fn bind_label(&mut self, label: Label) {
342        self.label_offsets[label.id() as usize] = self.cur_offset();
343    }
344
345    pub fn label_offset(&self, label: Label) -> u32 {
346        self.label_offsets[label.id() as usize]
347    }
348
349    pub fn add_reloc(&mut self, kind: Reloc, target: RelocTarget, addend: i64) {
350        let offset = self.cur_offset();
351        self.add_reloc_at_offset(offset, kind, target, addend);
352    }
353
354    pub fn add_reloc_at_offset(
355        &mut self,
356        offset: CodeOffset,
357        kind: Reloc,
358        target: RelocTarget,
359        addend: i64,
360    ) {
361        self.relocs.push(AsmReloc {
362            addend,
363            kind,
364            offset,
365            target,
366        })
367    }
368
369    pub fn reserve_patch_block(
370        &mut self,
371        size: CodeOffset,
372        align: CodeOffset,
373    ) -> Result<PatchBlockId, AsmError> {
374        let min_align = minimum_patch_alignment(self.env.arch());
375        let align = align.max(min_align);
376        if size == 0 || !align.is_power_of_two() {
377            return Err(AsmError::InvalidArgument);
378        }
379
380        self.align_to(align);
381        let arch = self.env.arch();
382        let offset = self.cur_offset();
383        let block = self.get_appended_space(size as usize);
384        fill_with_nops(arch, block)?;
385
386        let id = PatchBlockId::from_index(self.patch_blocks.len());
387        self.patch_blocks.push(PendingPatchBlock {
388            offset,
389            size,
390            align,
391        });
392        Ok(id)
393    }
394
395    pub fn record_patch_block(
396        &mut self,
397        offset: CodeOffset,
398        size: CodeOffset,
399        align: CodeOffset,
400    ) -> PatchBlockId {
401        if size == 0 || align == 0 || !align.is_power_of_two() {
402            unreachable!("invalid patch block with size {size} and align {align}");
403        }
404
405        let end = offset as usize + size as usize;
406        if end > self.data.len() {
407            unreachable!(
408                "patch block at offset {offset} with size {size} exceeds code buffer size {}",
409                self.data.len()
410            );
411        }
412
413        let id = PatchBlockId::from_index(self.patch_blocks.len());
414        self.patch_blocks.push(PendingPatchBlock {
415            offset,
416            size,
417            align,
418        });
419        id
420    }
421
422    pub fn record_patch_site(
423        &mut self,
424        offset: CodeOffset,
425        kind: LabelUse,
426        target_offset: CodeOffset,
427    ) -> PatchSiteId {
428        self.validate_patch_site_offset(offset, kind);
429        let id = PatchSiteId::from_index(self.patch_sites.len());
430        self.patch_sites.push(PendingPatchSite {
431            offset,
432            kind,
433            target: PendingPatchTarget::Offset(target_offset),
434            addend: 0,
435        });
436        id
437    }
438
439    pub fn record_label_patch_site(
440        &mut self,
441        offset: CodeOffset,
442        label: Label,
443        kind: LabelUse,
444    ) -> PatchSiteId {
445        self.validate_patch_site_offset(offset, kind);
446        let id = PatchSiteId::from_index(self.patch_sites.len());
447        self.patch_sites.push(PendingPatchSite {
448            offset,
449            kind,
450            target: PendingPatchTarget::Label(label),
451            addend: 0,
452        });
453        id
454    }
455
456    fn validate_patch_site_offset(&self, offset: CodeOffset, kind: LabelUse) {
457        let end = offset as usize + kind.patch_size();
458        if end > self.data.len() {
459            unreachable!(
460                "patch site at offset {offset} with size {} exceeds code buffer size {}",
461                kind.patch_size(),
462                self.data.len()
463            );
464        }
465    }
466
467    fn handle_fixup(&mut self, fixup: AsmFixup) {
468        let AsmFixup {
469            kind,
470            label,
471            offset,
472        } = fixup;
473        let start = offset as u32;
474        let end = offset as usize + kind.patch_size();
475
476        let label_offset = self.label_offsets[label.id() as usize];
477        if label_offset != u32::MAX {
478            let veneer_required = if label_offset >= offset {
479                false
480            } else {
481                (offset - label_offset) > kind.max_neg_range()
482            };
483
484            if veneer_required {
485                self.emit_veneer(label, offset, kind);
486            } else {
487                let slice = &mut self.data[start as usize..end as usize];
488
489                kind.patch(slice, start, label_offset);
490            }
491        } else {
492            // If the offset of this label is not known at this time then
493            // that means that a veneer is required because after this
494            // island the target can't be in range of the original target.
495            self.emit_veneer(label, offset, kind);
496        }
497    }
498
499    /// Emits a "veneer" the `kind` code at `offset` to jump to `label`.
500    ///
501    /// This will generate extra machine code, using `kind`, to get a
502    /// larger-jump-kind than `kind` allows. The code at `offset` is then
503    /// patched to jump to our new code, and then the new code is enqueued for
504    /// a fixup to get processed at some later time.
505    pub fn emit_veneer(&mut self, label: Label, offset: CodeOffset, kind: LabelUse) {
506        // If this `kind` doesn't support a veneer then that's a bug in the
507        // backend because we need to implement support for such a veneer.
508        assert!(
509            kind.supports_veneer(),
510            "jump beyond the range of {kind:?} but a veneer isn't supported",
511        );
512
513        self.align_to(kind.align() as _);
514        let veneer_offset = self.cur_offset();
515        let start = offset as usize;
516        let end = (offset + kind.patch_size() as u32) as usize;
517        let slice = &mut self.data[start..end];
518
519        kind.patch(slice, offset, veneer_offset);
520        let veneer_slice = self.get_appended_space(kind.veneer_size() as usize);
521        let (veneer_fixup_off, veneer_label_use) =
522            kind.generate_veneer(veneer_slice, veneer_offset);
523
524        // Register a new use of `label` with our new veneer fixup and
525        // offset. This'll recalculate deadlines accordingly and
526        // enqueue this fixup to get processed at some later
527        // time.
528        self.use_label_at_offset(veneer_fixup_off, label, veneer_label_use);
529    }
530
531    /// Reserve appended space and return a mutable slice referring to it.
532    pub fn get_appended_space(&mut self, len: usize) -> &mut [u8] {
533        let off = self.data.len();
534        let new_len = self.data.len() + len;
535        self.data.resize(new_len, 0);
536        &mut self.data[off..]
537
538        // Post-invariant: as for `put1()`.
539    }
540
541    /// Returns the maximal offset that islands can reach if `distance` more
542    /// bytes are appended.
543    ///
544    /// This is used to determine if veneers need insertions since jumps that
545    /// can't reach past this point must get a veneer of some form.
546    fn worst_case_end_of_island(&self, distance: CodeOffset) -> CodeOffset {
547        // Assume that all fixups will require veneers and that the veneers are
548        // the worst-case size for each platform. This is an over-generalization
549        // to avoid iterating over the `fixup_records` list or maintaining
550        // information about it as we go along.
551        let island_worst_case_size =
552            ((self.fixup_records.len() + self.pending_fixup_records.len()) as u32) * 20
553                + self.pending_constants_size;
554        self.cur_offset()
555            .saturating_add(distance)
556            .saturating_add(island_worst_case_size)
557    }
558
559    fn should_apply_fixup(&self, fixup: &AsmFixup, forced_threshold: CodeOffset) -> bool {
560        let label_offset = self.label_offset(fixup.label);
561        label_offset != u32::MAX
562            || fixup.offset.saturating_add(fixup.kind.max_pos_range()) < forced_threshold
563    }
564    /// Is an island needed within the next N bytes?
565    pub fn island_needed(&mut self, distance: CodeOffset) -> bool {
566        let deadline = match self.fixup_records.peek() {
567            Some(fixup) => fixup
568                .offset
569                .saturating_add(fixup.kind.max_pos_range())
570                .min(self.pending_fixup_deadline),
571            None => self.pending_fixup_deadline,
572        };
573
574        deadline < u32::MAX && self.worst_case_end_of_island(distance) > deadline
575    }
576
577    /// Emit all pending constants and required pending veneers.
578    pub fn emit_island(&mut self, distance: CodeOffset) {
579        let forced_threshold = self.worst_case_end_of_island(distance);
580
581        for constant in core::mem::take(&mut self.pending_constants) {
582            let (_, AsmConstant { align, size, .. }) = self.constants[constant.0 as usize];
583            let label = self.constants[constant.0 as usize]
584                .1
585                .upcoming_label
586                .take()
587                .unwrap();
588            self.align_to(align as _);
589            self.bind_label(label);
590            self.used_constants.push((constant, self.cur_offset()));
591            self.get_appended_space(size);
592        }
593        // Either handle all pending fixups because they're ready or move them
594        // onto the `BinaryHeap` tracking all pending fixups if they aren't
595        // ready.
596        for fixup in core::mem::take(&mut self.pending_fixup_records) {
597            if self.should_apply_fixup(&fixup, forced_threshold) {
598                self.handle_fixup(fixup);
599            } else {
600                self.fixup_records.push(fixup);
601            }
602        }
603
604        self.pending_fixup_deadline = u32::MAX;
605
606        while let Some(fixup) = self.fixup_records.peek() {
607            // If this fixup shouldn't be applied, that means its label isn't
608            // defined yet and there'll be remaining space to apply a veneer if
609            // necessary in the future after this island. In that situation
610            // because `fixup_records` is sorted by deadline this loop can
611            // exit.
612            if !self.should_apply_fixup(fixup, forced_threshold) {
613                break;
614            }
615            let fixup = self.fixup_records.pop().unwrap();
616            self.handle_fixup(fixup);
617        }
618    }
619
620    fn finish_emission_maybe_forcing_veneers(&mut self) {
621        while !self.pending_constants.is_empty()
622            || !self.pending_fixup_records.is_empty()
623            || !self.fixup_records.is_empty()
624        {
625            // `emit_island()` will emit any pending veneers and constants, and
626            // as a side-effect, will also take care of any fixups with resolved
627            // labels eagerly.
628            self.emit_island(u32::MAX);
629        }
630    }
631
632    fn finish_constants(&mut self) -> u32 {
633        let mut alignment = 32;
634
635        for (constant, offset) in core::mem::take(&mut self.used_constants) {
636            let constant = &self.constants[constant.0 as usize].0;
637            let data = constant.as_slice();
638            self.data[offset as usize..][..data.len()].copy_from_slice(data);
639            alignment = constant.alignment().max(alignment);
640        }
641
642        alignment as _
643    }
644
645    fn resolve_patch_catalog(&self, validate_ranges: bool) -> Result<PatchCatalog, AsmError> {
646        let mut blocks = SmallVec::new();
647        let mut sites = SmallVec::new();
648
649        for block in &self.patch_blocks {
650            blocks.push(PatchBlock {
651                offset: block.offset,
652                size: block.size,
653                align: block.align,
654            });
655        }
656
657        for site in &self.patch_sites {
658            let target_offset = match site.target {
659                PendingPatchTarget::Offset(offset) => offset,
660                PendingPatchTarget::Label(label) => self.label_offset(label),
661            };
662
663            if target_offset == u32::MAX {
664                return Err(AsmError::InvalidState);
665            }
666
667            if validate_ranges && !site.kind.can_reach(site.offset, target_offset) {
668                return Err(AsmError::TooLarge);
669            }
670
671            sites.push(PatchSite {
672                offset: site.offset,
673                kind: site.kind,
674                current_target: target_offset,
675                addend: site.addend,
676            });
677        }
678
679        Ok(PatchCatalog::with_parts(self.env.arch(), blocks, sites))
680    }
681
682    pub fn finish_patched(mut self) -> Result<CodeBufferFinalized, AsmError> {
683        self.finish_emission_maybe_forcing_veneers();
684        let patch_catalog = self.resolve_patch_catalog(true)?;
685        let alignment = self.finish_constants();
686        Ok(CodeBufferFinalized {
687            data: self.data,
688            relocs: self.relocs,
689            symbols: self.symbols,
690            alignment,
691            patch_catalog,
692        })
693    }
694
695    pub fn finish(&mut self) -> CodeBufferFinalized {
696        self.finish_emission_maybe_forcing_veneers();
697        let patch_catalog = self
698            .resolve_patch_catalog(false)
699            .expect("patch metadata must be validated at registration time");
700        let alignment = self.finish_constants();
701        CodeBufferFinalized {
702            data: self.data.clone(),
703            relocs: self.relocs.clone(),
704            symbols: self.symbols.clone(),
705            alignment,
706            patch_catalog,
707        }
708    }
709}
710
711#[derive(Clone, PartialEq, Eq, Debug, Hash)]
712pub enum ConstantData {
713    WellKnown(&'static [u8]),
714    U64([u8; 8]),
715    Bytes(Vec<u8>),
716}
717
718impl ConstantData {
719    pub fn as_slice(&self) -> &[u8] {
720        match self {
721            ConstantData::WellKnown(data) => data,
722            ConstantData::U64(data) => data.as_ref(),
723            ConstantData::Bytes(data) => data,
724        }
725    }
726
727    pub fn alignment(&self) -> usize {
728        if self.as_slice().len() <= 8 {
729            8
730        } else {
731            16
732        }
733    }
734}
735
736/// A use of a constant by one or mroe assembly instructions.
737#[derive(Clone, Copy, Debug, PartialEq, Eq)]
738pub struct Constant(pub(crate) u32);
739
740impl From<&'static str> for ConstantData {
741    fn from(value: &'static str) -> Self {
742        Self::WellKnown(value.as_bytes())
743    }
744}
745
746impl From<[u8; 8]> for ConstantData {
747    fn from(value: [u8; 8]) -> Self {
748        Self::U64(value)
749    }
750}
751
752impl From<Vec<u8>> for ConstantData {
753    fn from(value: Vec<u8>) -> Self {
754        Self::Bytes(value)
755    }
756}
757
758impl From<&'static [u8]> for ConstantData {
759    fn from(value: &'static [u8]) -> Self {
760        Self::WellKnown(value)
761    }
762}
763
764impl From<u64> for ConstantData {
765    fn from(value: u64) -> Self {
766        Self::U64(value.to_ne_bytes())
767    }
768}
769
770/// Offset in bytes from the beginning of the function.
771///
772/// Cranelift can be used as a cross compiler, so we don't want to use a type like `usize` which
773/// depends on the *host* platform, not the *target* platform.
774pub type CodeOffset = u32;
775
776/// Addend to add to the symbol value.
777pub type Addend = i64;
778
779/// Relocation kinds for every ISA
780#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
781pub enum Reloc {
782    /// absolute 4-byte
783    Abs4,
784    /// absolute 8-byte
785    Abs8,
786    /// x86 PC-relative 4-byte
787    X86PCRel4,
788    /// x86 call to PC-relative 4-byte
789    X86CallPCRel4,
790    /// x86 call to PLT-relative 4-byte
791    X86CallPLTRel4,
792    /// x86 GOT PC-relative 4-byte
793    X86GOTPCRel4,
794    /// The 32-bit offset of the target from the beginning of its section.
795    /// Equivalent to `IMAGE_REL_AMD64_SECREL`.
796    /// See: [PE Format](https://docs.microsoft.com/en-us/windows/win32/debug/pe-format)
797    X86SecRel,
798    /// Arm32 call target
799    Arm32Call,
800    /// Arm64 call target. Encoded as bottom 26 bits of instruction. This
801    /// value is sign-extended, multiplied by 4, and added to the PC of
802    /// the call instruction to form the destination address.
803    Arm64Call,
804
805    /// Elf x86_64 32 bit signed PC relative offset to two GOT entries for GD symbol.
806    ElfX86_64TlsGd,
807
808    /// Mach-O x86_64 32 bit signed PC relative offset to a `__thread_vars` entry.
809    MachOX86_64Tlv,
810
811    /// Mach-O Aarch64 TLS
812    /// PC-relative distance to the page of the TLVP slot.
813    MachOAarch64TlsAdrPage21,
814
815    /// Mach-O Aarch64 TLS
816    /// Offset within page of TLVP slot.
817    MachOAarch64TlsAdrPageOff12,
818
819    /// Aarch64 TLSDESC Adr Page21
820    /// This is equivalent to `R_AARCH64_TLSDESC_ADR_PAGE21` in the [aaelf64](https://github.com/ARM-software/abi-aa/blob/2bcab1e3b22d55170c563c3c7940134089176746/aaelf64/aaelf64.rst#57105thread-local-storage-descriptors)
821    Aarch64TlsDescAdrPage21,
822
823    /// Aarch64 TLSDESC Ld64 Lo12
824    /// This is equivalent to `R_AARCH64_TLSDESC_LD64_LO12` in the [aaelf64](https://github.com/ARM-software/abi-aa/blob/2bcab1e3b22d55170c563c3c7940134089176746/aaelf64/aaelf64.rst#57105thread-local-storage-descriptors)
825    Aarch64TlsDescLd64Lo12,
826
827    /// Aarch64 TLSDESC Add Lo12
828    /// This is equivalent to `R_AARCH64_TLSGD_ADD_LO12` in the [aaelf64](https://github.com/ARM-software/abi-aa/blob/2bcab1e3b22d55170c563c3c7940134089176746/aaelf64/aaelf64.rst#57105thread-local-storage-descriptors)
829    Aarch64TlsDescAddLo12,
830
831    /// Aarch64 TLSDESC Call
832    /// This is equivalent to `R_AARCH64_TLSDESC_CALL` in the [aaelf64](https://github.com/ARM-software/abi-aa/blob/2bcab1e3b22d55170c563c3c7940134089176746/aaelf64/aaelf64.rst#57105thread-local-storage-descriptors)
833    Aarch64TlsDescCall,
834
835    /// AArch64 GOT Page
836    /// Set the immediate value of an ADRP to bits 32:12 of X; check that –2^32 <= X < 2^32
837    /// This is equivalent to `R_AARCH64_ADR_GOT_PAGE` (311) in the  [aaelf64](https://github.com/ARM-software/abi-aa/blob/2bcab1e3b22d55170c563c3c7940134089176746/aaelf64/aaelf64.rst#static-aarch64-relocations)
838    Aarch64AdrGotPage21,
839
840    /// AArch64 GOT Low bits
841
842    /// Set the LD/ST immediate field to bits 11:3 of X. No overflow check; check that X&7 = 0
843    /// This is equivalent to `R_AARCH64_LD64_GOT_LO12_NC` (312) in the  [aaelf64](https://github.com/ARM-software/abi-aa/blob/2bcab1e3b22d55170c563c3c7940134089176746/aaelf64/aaelf64.rst#static-aarch64-relocations)
844    Aarch64Ld64GotLo12Nc,
845
846    /// RISC-V Absolute address: 64-bit address.
847    RiscvAbs8,
848
849    /// RISC-V Call PLT: 32-bit PC-relative function call, macros call, tail (PIC)
850    ///
851    /// Despite having PLT in the name, this relocation is also used for normal calls.
852    /// The non-PLT version of this relocation has been deprecated.
853    ///
854    /// This is the `R_RISCV_CALL_PLT` relocation from the RISC-V ELF psABI document.
855    /// <https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#procedure-calls>
856    RiscvCallPlt,
857
858    /// RISC-V TLS GD: High 20 bits of 32-bit PC-relative TLS GD GOT reference,
859    ///
860    /// This is the `R_RISCV_TLS_GD_HI20` relocation from the RISC-V ELF psABI document.
861    /// <https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#global-dynamic>
862    RiscvTlsGdHi20,
863
864    /// Low 12 bits of a 32-bit PC-relative relocation (I-Type instruction)
865    ///
866    /// This is the `R_RISCV_PCREL_LO12_I` relocation from the RISC-V ELF psABI document.
867    /// <https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#pc-relative-symbol-addresses>
868    RiscvPCRelLo12I,
869
870    /// High 20 bits of a 32-bit PC-relative GOT offset relocation
871    ///
872    /// This is the `R_RISCV_GOT_HI20` relocation from the RISC-V ELF psABI document.
873    /// <https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#pc-relative-symbol-addresses>
874    RiscvGotHi20,
875}
876
877#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)]
878pub enum LabelUse {
879    X86JmpRel32,
880    /// 20-bit branch offset (unconditional branches). PC-rel, offset is
881    /// imm << 1. Immediate is 20 signed bits. Use in Jal instructions.
882    RVJal20,
883    /// The unconditional jump instructions all use PC-relative
884    /// addressing to help support position independent code. The JALR
885    /// instruction was defined to enable a two-instruction sequence to
886    /// jump anywhere in a 32-bit absolute address range. A LUI
887    /// instruction can first load rs1 with the upper 20 bits of a
888    /// target address, then JALR can add in the lower bits. Similarly,
889    /// AUIPC then JALR can jump anywhere in a 32-bit pc-relative
890    /// address range.
891    RVPCRel32,
892
893    /// All branch instructions use the B-type instruction format. The
894    /// 12-bit B-immediate encodes signed offsets in multiples of 2, and
895    /// is added to the current pc to give the target address. The
896    /// conditional branch range is ±4 KiB.
897    RVB12,
898
899    /// Equivalent to the `R_RISCV_PCREL_HI20` relocation, Allows setting
900    /// the immediate field of an `auipc` instruction.
901    RVPCRelHi20,
902
903    /// Similar to the `R_RISCV_PCREL_LO12_I` relocation but pointing to
904    /// the final address, instead of the `PCREL_HI20` label. Allows setting
905    /// the immediate field of I Type instructions such as `addi` or `lw`.
906    ///
907    /// Since we currently don't support offsets in labels, this relocation has
908    /// an implicit offset of 4.
909    RVPCRelLo12I,
910
911    /// 11-bit PC-relative jump offset. Equivalent to the `RVC_JUMP` relocation
912    RVCJump,
913    /// 9-bit PC-relative branch offset.
914    RVCB9,
915    /// 14-bit branch offset (conditional branches). PC-rel, offset is imm <<
916    /// 2. Immediate is 14 signed bits, in bits 18:5. Used by tbz and tbnz.
917    A64Branch14,
918    /// 19-bit branch offset (conditional branches). PC-rel, offset is imm << 2. Immediate is 19
919    /// signed bits, in bits 23:5. Used by cbz, cbnz, b.cond.
920    A64Branch19,
921    /// 26-bit branch offset (unconditional branches). PC-rel, offset is imm << 2. Immediate is 26
922    /// signed bits, in bits 25:0. Used by b, bl.
923    A64Branch26,
924    /// 19-bit offset for LDR (load literal). PC-rel, offset is imm << 2. Immediate is 19 signed bits,
925    /// in bits 23:5.
926    A64Ldr19,
927    /// 21-bit offset for ADR (get address of label). PC-rel, offset is not shifted. Immediate is
928    /// 21 signed bits, with high 19 bits in bits 23:5 and low 2 bits in bits 30:29.
929    A64Adr21,
930    /// 21-bit offset for ADRP (get address of label). PC-rel, offset is shifted. Immediate is
931    /// 21 signed bits, with high 19 bits in bits 23:5 and low 2 bits in bits 30:29.
932    A64Adrp21,
933}
934
935impl LabelUse {
936    pub fn can_reach(&self, use_offset: CodeOffset, label_offset: CodeOffset) -> bool {
937        let delta = (label_offset as i64) - (use_offset as i64);
938
939        match self {
940            Self::X86JmpRel32 => {
941                let disp = delta - 4;
942                i32::try_from(disp).is_ok()
943            }
944            Self::RVJal20 => delta % 2 == 0 && (-(1 << 20)..=((1 << 20) - 2)).contains(&delta),
945            Self::RVB12 => delta % 2 == 0 && (-(1 << 12)..=((1 << 12) - 2)).contains(&delta),
946            Self::RVCJump => delta % 2 == 0 && (-(1 << 11)..=((1 << 11) - 2)).contains(&delta),
947            Self::RVCB9 => delta % 2 == 0 && (-(1 << 8)..=((1 << 8) - 2)).contains(&delta),
948            Self::RVPCRelHi20 | Self::RVPCRelLo12I | Self::RVPCRel32 => {
949                i32::try_from(delta).is_ok()
950            }
951            Self::A64Branch14 => delta % 4 == 0 && (-(1 << 15)..=((1 << 15) - 4)).contains(&delta),
952            Self::A64Branch19 | Self::A64Ldr19 => {
953                delta % 4 == 0 && (-(1 << 20)..=((1 << 20) - 4)).contains(&delta)
954            }
955            Self::A64Branch26 => delta % 4 == 0 && (-(1 << 27)..=((1 << 27) - 4)).contains(&delta),
956            Self::A64Adr21 => (-(1 << 20)..=((1 << 20) - 1)).contains(&delta),
957            Self::A64Adrp21 => {
958                let page_delta = ((label_offset & !0xfff) as i64) - ((use_offset & !0xfff) as i64);
959                page_delta % 4096 == 0 && (-(1 << 32)..=((1 << 32) - 4096)).contains(&page_delta)
960            }
961        }
962    }
963
964    /// Maximum PC-relative range (positive), inclusive.
965    pub const fn max_pos_range(self) -> CodeOffset {
966        match self {
967            LabelUse::RVJal20 => ((1 << 19) - 1) * 2,
968            LabelUse::RVPCRelLo12I | LabelUse::RVPCRelHi20 | LabelUse::RVPCRel32 => {
969                let imm20_max: i64 = ((1 << 19) - 1) << 12;
970                let imm12_max = (1 << 11) - 1;
971                (imm20_max + imm12_max) as _
972            }
973            LabelUse::RVB12 => ((1 << 11) - 1) * 2,
974            LabelUse::RVCB9 => ((1 << 8) - 1) * 2,
975            LabelUse::RVCJump => ((1 << 10) - 1) * 2,
976            LabelUse::X86JmpRel32 => i32::MAX as _,
977            _ => u32::MAX,
978        }
979    }
980
981    pub const fn max_neg_range(self) -> CodeOffset {
982        match self {
983            LabelUse::RVPCRel32 => {
984                let imm20_max: i64 = (1 << 19) << 12;
985                let imm12_max = 1 << 11;
986                (-imm20_max - imm12_max) as CodeOffset
987            }
988            _ => self.max_pos_range() + 2,
989        }
990    }
991
992    pub const fn patch_size(&self) -> usize {
993        match self {
994            Self::X86JmpRel32 => 4,
995            Self::RVCJump | Self::RVCB9 => 2,
996            Self::RVJal20 | Self::RVB12 | Self::RVPCRelHi20 | Self::RVPCRelLo12I => 4,
997            Self::RVPCRel32 => 8,
998            _ => 4,
999        }
1000    }
1001
1002    pub const fn align(&self) -> usize {
1003        match self {
1004            Self::X86JmpRel32 => 1,
1005            Self::RVCJump => 4,
1006            Self::RVJal20 | Self::RVB12 | Self::RVCB9 | Self::RVPCRelHi20 | Self::RVPCRelLo12I => 4,
1007            Self::RVPCRel32 => 4,
1008            _ => 4,
1009        }
1010    }
1011
1012    pub const fn supports_veneer(&self) -> bool {
1013        match self {
1014            Self::RVB12 | Self::RVJal20 | Self::RVCJump => true,
1015            _ => false,
1016        }
1017    }
1018
1019    pub const fn veneer_size(&self) -> usize {
1020        match self {
1021            Self::RVB12 | Self::RVJal20 | Self::RVCJump => 8,
1022            _ => unreachable!(),
1023        }
1024    }
1025
1026    pub fn generate_veneer(
1027        &self,
1028        buffer: &mut [u8],
1029        veneer_offset: CodeOffset,
1030    ) -> (CodeOffset, Self) {
1031        if matches!(
1032            self,
1033            Self::RVB12
1034                | Self::RVCJump
1035                | Self::RVJal20
1036                | Self::RVPCRelHi20
1037                | Self::RVPCRelLo12I
1038                | Self::RVPCRel32
1039        ) {
1040            let base = riscv::X31;
1041
1042            {
1043                let x = riscv::Inst::new(riscv::Opcode::AUIPC)
1044                    .encode()
1045                    .set_rd(base.id())
1046                    .value
1047                    .to_le_bytes();
1048                buffer[0] = x[0];
1049                buffer[1] = x[1];
1050                buffer[2] = x[2];
1051                buffer[3] = x[3];
1052            }
1053
1054            {
1055                let x = riscv::Inst::new(riscv::Opcode::JALR)
1056                    .encode()
1057                    .set_rd(riscv::ZERO.id())
1058                    .set_rs1(base.id())
1059                    .value
1060                    .to_le_bytes();
1061                buffer[4] = x[0];
1062                buffer[5] = x[1];
1063                buffer[6] = x[2];
1064                buffer[7] = x[3];
1065            }
1066
1067            (veneer_offset, LabelUse::RVPCRel32)
1068        } else {
1069            todo!()
1070        }
1071    }
1072    pub fn patch(&self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
1073        let addend = match self {
1074            Self::X86JmpRel32 => i64::from(u32::from_le_bytes([
1075                buffer[0], buffer[1], buffer[2], buffer[3],
1076            ])),
1077            _ => 0,
1078        };
1079
1080        self.patch_with_addend(buffer, use_offset, label_offset, addend);
1081    }
1082
1083    pub fn patch_with_addend(
1084        &self,
1085        buffer: &mut [u8],
1086        use_offset: CodeOffset,
1087        label_offset: CodeOffset,
1088        addend: i64,
1089    ) {
1090        let pc_reli = (label_offset as i64) - (use_offset as i64);
1091
1092        let pc_rel = pc_reli as u32;
1093
1094        match self {
1095            Self::X86JmpRel32 => {
1096                let value = pc_rel.wrapping_add(addend as u32).wrapping_sub(4);
1097
1098                buffer.copy_from_slice(&value.to_le_bytes());
1099            }
1100
1101            Self::RVJal20 => {
1102                let insn = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1103                let offset = pc_rel as u32;
1104                let v = ((offset >> 12 & 0b1111_1111) << 12)
1105                    | ((offset >> 11 & 0b1) << 20)
1106                    | ((offset >> 1 & 0b11_1111_1111) << 21)
1107                    | ((offset >> 20 & 0b1) << 31);
1108                buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn | v));
1109            }
1110
1111            Self::RVPCRel32 => {
1112                let (imm20, imm12) = generate_imm(pc_rel as u64);
1113                let insn = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1114                let insn2 = u32::from_le_bytes([buffer[4], buffer[5], buffer[6], buffer[7]]);
1115
1116                let auipc = riscv::Inst::new(riscv::Opcode::AUIPC).encode().set_imm20(0);
1117                let jalr = riscv::Inst::new(riscv::Opcode::JALR)
1118                    .encode()
1119                    .set_rd(0)
1120                    .set_rs1(0)
1121                    .set_imm12(0);
1122
1123                buffer[0..4].copy_from_slice(&(insn | auipc.value | imm20).to_le_bytes());
1124                buffer[4..8].copy_from_slice(&(insn2 | jalr.value | imm12).to_le_bytes());
1125            }
1126
1127            Self::RVB12 => {
1128                let insn = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1129                let offset = pc_rel as u32;
1130                let v = ((offset >> 11 & 0b1) << 7)
1131                    | ((offset >> 1 & 0b1111) << 8)
1132                    | ((offset >> 5 & 0b11_1111) << 25)
1133                    | ((offset >> 12 & 0b1) << 31);
1134                buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn | v));
1135            }
1136
1137            Self::RVPCRelHi20 => {
1138                // See https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#pc-relative-symbol-addresses
1139                //
1140                // We need to add 0x800 to ensure that we land at the next page as soon as it goes out of range for the
1141                // Lo12 relocation. That relocation is signed and has a maximum range of -2048..2047. So when we get an
1142                // offset of 2048, we need to land at the next page and subtract instead.
1143                let offset = pc_reli as u32;
1144                let insn = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1145                let hi20 = offset.wrapping_add(0x800) >> 12;
1146                let insn = (insn & 0xfff) | (hi20 << 12);
1147                buffer[0..4].copy_from_slice(&insn.to_le_bytes());
1148            }
1149
1150            Self::RVPCRelLo12I => {
1151                // `offset` is the offset from the current instruction to the target address.
1152                //
1153                // However we are trying to compute the offset to the target address from the previous instruction.
1154                // The previous instruction should be the one that contains the PCRelHi20 relocation and
1155                // stores/references the program counter (`auipc` usually).
1156                //
1157                // Since we are trying to compute the offset from the previous instruction, we can
1158                // represent it as offset = target_address - (current_instruction_address - 4)
1159                // which is equivalent to offset = target_address - current_instruction_address + 4.
1160                //
1161                let insn = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1162
1163                let lo12 = (pc_reli + 4) as u32 & 0xfff;
1164                let insn = (insn & 0xFFFFF) | (lo12 << 20);
1165                buffer[0..4].copy_from_slice(&insn.to_le_bytes());
1166            }
1167
1168            Self::RVCJump => {
1169                debug_assert!(pc_rel & 1 == 0);
1170
1171                let insn = riscv::Inst::new(riscv::Opcode::CJ)
1172                    .encode()
1173                    .set_c_imm12(pc_rel as _);
1174                buffer[0..2].clone_from_slice(&(insn.value as u16).to_le_bytes());
1175            }
1176
1177            Self::RVCB9 => {
1178                debug_assert!(pc_rel & 1 == 0);
1179
1180                let insn = riscv::Inst::new(riscv::Opcode::BEQZ)
1181                    .encode()
1182                    .set_c_bimm9lohi(pc_rel as _);
1183                buffer[0..2].clone_from_slice(&(insn.value as u16).to_le_bytes());
1184            }
1185
1186            Self::A64Branch14 => {
1187                debug_assert!(pc_reli & 0b11 == 0);
1188
1189                let insn = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1190                let imm14 = ((pc_reli >> 2) as i32 as u32) & 0x3fff;
1191                let insn = (insn & !0x0007ffe0) | (imm14 << 5);
1192                buffer[0..4].copy_from_slice(&insn.to_le_bytes());
1193            }
1194
1195            Self::A64Branch19 | Self::A64Ldr19 => {
1196                debug_assert!(pc_reli & 0b11 == 0);
1197
1198                let insn = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1199                let imm19 = ((pc_reli >> 2) as i32 as u32) & 0x7ffff;
1200                let insn = (insn & !0x00ffffe0) | (imm19 << 5);
1201                buffer[0..4].copy_from_slice(&insn.to_le_bytes());
1202            }
1203
1204            Self::A64Branch26 => {
1205                debug_assert!(pc_reli & 0b11 == 0);
1206
1207                let insn = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1208                let imm26 = ((pc_reli >> 2) as i32 as u32) & 0x03ff_ffff;
1209                let insn = (insn & !0x03ff_ffff) | imm26;
1210                buffer[0..4].copy_from_slice(&insn.to_le_bytes());
1211            }
1212
1213            Self::A64Adr21 => {
1214                let insn = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1215                let imm21 = (pc_reli as i32 as u32) & 0x1f_ffff;
1216                let immlo = imm21 & 0x3;
1217                let immhi = (imm21 >> 2) & 0x7ffff;
1218                let insn = (insn & !0x60ff_ffe0) | (immlo << 29) | (immhi << 5);
1219                buffer[0..4].copy_from_slice(&insn.to_le_bytes());
1220            }
1221
1222            Self::A64Adrp21 => {
1223                let page_delta = ((label_offset & !0xfff) as i64) - ((use_offset & !0xfff) as i64);
1224                debug_assert!(page_delta & 0xfff == 0);
1225
1226                let insn = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1227                let imm21 = ((page_delta >> 12) as i32 as u32) & 0x1f_ffff;
1228                let immlo = imm21 & 0x3;
1229                let immhi = (imm21 >> 2) & 0x7ffff;
1230                let insn = (insn & !0x60ff_ffe0) | (immlo << 29) | (immhi << 5);
1231                buffer[0..4].copy_from_slice(&insn.to_le_bytes());
1232            }
1233        }
1234    }
1235}
1236
1237pub const fn is_imm12(val: i64) -> bool {
1238    val >= -2048 && val <= 2047
1239}
1240
1241pub(crate) fn generate_imm(value: u64) -> (u32, u32) {
1242    if is_imm12(value as _) {
1243        return (
1244            0,
1245            riscv::InstructionValue::new(0)
1246                .set_imm12(value as i64 as i32)
1247                .value,
1248        );
1249    }
1250
1251    let value = value as i64;
1252
1253    let mod_num = 4096i64;
1254    let (imm20, imm12) = if value > 0 {
1255        let mut imm20 = value / mod_num;
1256        let mut imm12 = value % mod_num;
1257
1258        if imm12 >= 2048 {
1259            imm12 -= mod_num;
1260            imm20 += 1;
1261        }
1262
1263        (imm20, imm12)
1264    } else {
1265        let value_abs = value.abs();
1266        let imm20 = value_abs / mod_num;
1267        let imm12 = value_abs % mod_num;
1268        let mut imm20 = -imm20;
1269        let mut imm12 = -imm12;
1270        if imm12 < -2048 {
1271            imm12 += mod_num;
1272            imm20 -= 1;
1273        }
1274        (imm20, imm12)
1275    };
1276    (
1277        riscv::InstructionValue::new(0).set_imm20(imm20 as _).value,
1278        riscv::InstructionValue::new(0).set_imm12(imm12 as _).value,
1279    )
1280}
1281
1282/// A generic implementation of relocation resolving.
1283///
1284/// # NOTE
1285///
1286/// Very simple and incomplete. At the moment only Abs4, Abs8, X86 and RISC-V GOT relocations are supported.
1287pub fn perform_relocations(
1288    code: *mut u8,
1289    code_rx: *const u8,
1290    relocs: &[AsmReloc],
1291    get_address: impl Fn(&RelocTarget) -> *const u8,
1292    get_got_entry: impl Fn(&RelocTarget) -> *const u8,
1293    get_plt_entry: impl Fn(&RelocTarget) -> *const u8,
1294) {
1295    use core::ptr::write_unaligned;
1296
1297    for &AsmReloc {
1298        addend,
1299        kind,
1300        offset,
1301        ref target,
1302    } in relocs
1303    {
1304        let at = unsafe { code.offset(isize::try_from(offset).unwrap()) };
1305        let atrx = unsafe { code_rx.offset(isize::try_from(offset).unwrap()) };
1306        match kind {
1307            Reloc::Abs4 => {
1308                let base = get_address(target);
1309                let what = unsafe { base.offset(isize::try_from(addend).unwrap()) };
1310                unsafe {
1311                    write_unaligned(at as *mut u32, u32::try_from(what as usize).unwrap());
1312                }
1313            }
1314
1315            Reloc::Abs8 => {
1316                let base = get_address(target);
1317                let what = unsafe { base.offset(isize::try_from(addend).unwrap()) };
1318                unsafe {
1319                    write_unaligned(at as *mut u64, u64::try_from(what as usize).unwrap());
1320                }
1321            }
1322
1323            Reloc::X86PCRel4 | Reloc::X86CallPCRel4 => {
1324                let base = get_address(target);
1325                let what = unsafe { base.offset(isize::try_from(addend).unwrap()) };
1326                let pcrel = i32::try_from((what as isize) - (atrx as isize)).unwrap();
1327
1328                unsafe {
1329                    write_unaligned(at as *mut i32, pcrel);
1330                }
1331            }
1332
1333            Reloc::X86GOTPCRel4 => {
1334                let base = get_got_entry(target);
1335                let what = unsafe { base.offset(isize::try_from(addend).unwrap()) };
1336                let pcrel = i32::try_from((what as isize) - (atrx as isize)).unwrap();
1337
1338                unsafe {
1339                    write_unaligned(at as *mut i32, pcrel);
1340                }
1341            }
1342
1343            Reloc::X86CallPLTRel4 => {
1344                let base = get_plt_entry(target);
1345                let what = unsafe { base.offset(isize::try_from(addend).unwrap()) };
1346                let pcrel = i32::try_from((what as isize) - (atrx as isize)).unwrap();
1347                unsafe { write_unaligned(at as *mut i32, pcrel) };
1348            }
1349
1350            Reloc::RiscvGotHi20 => {
1351                let base = get_got_entry(target);
1352                let what = unsafe { base.offset(isize::try_from(addend).unwrap()) };
1353                let pc_rel = i32::try_from((what as isize) - (atrx as isize)).unwrap();
1354                unsafe {
1355                    let buffer = core::slice::from_raw_parts_mut(at as *mut u8, 4);
1356                    let insn = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1357                    let hi20 = (pc_rel as u32).wrapping_add(0x800) >> 12;
1358                    let insn = (insn & 0xfff) | (hi20 << 12);
1359                    buffer.copy_from_slice(&insn.to_le_bytes());
1360                }
1361            }
1362
1363            Reloc::RiscvPCRelLo12I => {
1364                let base = get_got_entry(target);
1365                let what = unsafe { base.offset(isize::try_from(addend).unwrap()) };
1366                let pc_rel = i32::try_from((what as isize) - (atrx as isize)).unwrap();
1367
1368                unsafe {
1369                    let buffer = core::slice::from_raw_parts_mut(at as *mut u8, 4);
1370                    let insn = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1371                    let lo12 = (pc_rel + 4) as u32 & 0xfff;
1372                    let insn = (insn & 0xFFFFF) | (lo12 << 20);
1373                    buffer.copy_from_slice(&insn.to_le_bytes());
1374                }
1375            }
1376
1377            Reloc::RiscvCallPlt => {
1378                // A R_RISCV_CALL_PLT relocation expects auipc+jalr instruction pair.
1379                // It is the equivalent of two relocations:
1380                // 1. R_RISCV_PCREL_HI20 on the `auipc`
1381                // 2. R_RISCV_PCREL_LO12_I on the `jalr`
1382
1383                let base = get_address(target);
1384                let what = unsafe { base.offset(isize::try_from(addend).unwrap()) };
1385                let pcrel = i32::try_from((what as isize) - (atrx as isize)).unwrap();
1386
1387                // See https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#pc-relative-symbol-addresses
1388                // for a better explanation of the following code.
1389                //
1390                // Unlike the regular symbol relocations, here both "sub-relocations" point to the same address.
1391                //
1392                // `pcrel` is a signed value (+/- 2GiB range), when splitting it into two parts, we need to
1393                // ensure that `hi20` is close enough to `pcrel` to be able to add `lo12` to it and still
1394                // get a valid address.
1395                //
1396                // `lo12` is also a signed offset (+/- 2KiB range) relative to the `hi20` value.
1397                //
1398                // `hi20` should also be shifted right to be the "true" value. But we also need it
1399                // left shifted for the `lo12` calculation and it also matches the instruction encoding.
1400                let hi20 = pcrel.wrapping_add(0x800) as u32 & 0xFFFFF000u32;
1401                let lo12 = (pcrel as u32).wrapping_sub(hi20) & 0xFFF;
1402
1403                unsafe {
1404                    let auipc_addr = at as *mut u32;
1405                    let auipc = riscv::Inst::new(riscv::Opcode::AUIPC)
1406                        .encode()
1407                        .set_imm20(hi20 as _)
1408                        .value;
1409                    auipc_addr.write(auipc_addr.read() | auipc);
1410
1411                    let jalr_addr = at.offset(4) as *mut u32;
1412                    let jalr = riscv::Inst::new(riscv::Opcode::JALR)
1413                        .encode()
1414                        .set_imm12(lo12 as _)
1415                        .value;
1416                    jalr_addr.write(jalr_addr.read() | jalr);
1417                }
1418            }
1419
1420            _ => todo!(),
1421        }
1422    }
1423}