polkavm_linker/
program_from_elf.rs

1use polkavm_common::abi::{MemoryMapBuilder, VM_CODE_ADDRESS_ALIGNMENT, VM_MAX_PAGE_SIZE, VM_MIN_PAGE_SIZE};
2use polkavm_common::cast::cast;
3use polkavm_common::program::{
4    self, FrameKind, Instruction, InstructionSet, LineProgramOp, Opcode, ProgramBlob, ProgramCounter, ProgramSymbol,
5};
6use polkavm_common::utils::{align_to_next_page_u32, align_to_next_page_u64};
7use polkavm_common::varint;
8use polkavm_common::writer::{ProgramBlobBuilder, Writer};
9
10use core::ops::Range;
11use std::borrow::Cow;
12use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque};
13use std::sync::Arc;
14
15use crate::dwarf::Location;
16use crate::elf::{Elf, Section, SectionIndex};
17use crate::fast_range_map::RangeMap;
18use crate::riscv::DecoderConfig;
19use crate::riscv::Reg as RReg;
20use crate::riscv::{AtomicKind, BranchKind, CmovKind, Inst, LoadKind, RegImmKind, StoreKind};
21
22#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
23#[repr(u8)]
24enum Reg {
25    // The registers supported by the VM.
26    RA = 0,
27    SP = 1,
28    T0 = 2,
29    T1 = 3,
30    T2 = 4,
31    S0 = 5,
32    S1 = 6,
33    A0 = 7,
34    A1 = 8,
35    A2 = 9,
36    A3 = 10,
37    A4 = 11,
38    A5 = 12,
39
40    // Extra fake registers. These will be stripped away before the final codegen pass.
41    E0 = 13,
42    E1 = 14,
43    E2 = 15,
44    E3 = 16,
45}
46
47impl Reg {
48    fn to_usize(self) -> usize {
49        self as usize
50    }
51}
52
53impl From<polkavm_common::program::Reg> for Reg {
54    fn from(reg: polkavm_common::program::Reg) -> Reg {
55        use polkavm_common::program::Reg as R;
56        match reg {
57            R::RA => Reg::RA,
58            R::SP => Reg::SP,
59            R::T0 => Reg::T0,
60            R::T1 => Reg::T1,
61            R::T2 => Reg::T2,
62            R::S0 => Reg::S0,
63            R::S1 => Reg::S1,
64            R::A0 => Reg::A0,
65            R::A1 => Reg::A1,
66            R::A2 => Reg::A2,
67            R::A3 => Reg::A3,
68            R::A4 => Reg::A4,
69            R::A5 => Reg::A5,
70        }
71    }
72}
73
74impl From<polkavm_common::program::RawReg> for Reg {
75    fn from(reg: polkavm_common::program::RawReg) -> Reg {
76        reg.get().into()
77    }
78}
79
80impl From<polkavm_common::program::RawReg> for RegImm {
81    fn from(reg: polkavm_common::program::RawReg) -> RegImm {
82        RegImm::Reg(reg.get().into())
83    }
84}
85
86impl Reg {
87    pub const fn from_usize(value: usize) -> Option<Reg> {
88        match value {
89            0 => Some(Reg::RA),
90            1 => Some(Reg::SP),
91            2 => Some(Reg::T0),
92            3 => Some(Reg::T1),
93            4 => Some(Reg::T2),
94            5 => Some(Reg::S0),
95            6 => Some(Reg::S1),
96            7 => Some(Reg::A0),
97            8 => Some(Reg::A1),
98            9 => Some(Reg::A2),
99            10 => Some(Reg::A3),
100            11 => Some(Reg::A4),
101            12 => Some(Reg::A5),
102            13 => Some(Reg::E0),
103            14 => Some(Reg::E1),
104            15 => Some(Reg::E2),
105            16 => Some(Reg::E3),
106            _ => None,
107        }
108    }
109
110    pub const fn name(self) -> &'static str {
111        use Reg::*;
112        match self {
113            RA => "ra",
114            SP => "sp",
115            T0 => "t0",
116            T1 => "t1",
117            T2 => "t2",
118            S0 => "s0",
119            S1 => "s1",
120            A0 => "a0",
121            A1 => "a1",
122            A2 => "a2",
123            A3 => "a3",
124            A4 => "a4",
125            A5 => "a5",
126
127            E0 => "e0",
128            E1 => "e1",
129            E2 => "e2",
130            E3 => "e3",
131        }
132    }
133
134    fn fake_register_index(self) -> Option<usize> {
135        match self {
136            Reg::E0 => Some(0),
137            Reg::E1 => Some(1),
138            Reg::E2 => Some(2),
139            Reg::E3 => Some(3),
140            _ => None,
141        }
142    }
143
144    const ALL: [Reg; 17] = {
145        use Reg::*;
146        [RA, SP, T0, T1, T2, S0, S1, A0, A1, A2, A3, A4, A5, E0, E1, E2, E3]
147    };
148
149    const FAKE: [Reg; 4] = { [Reg::E0, Reg::E1, Reg::E2, Reg::E3] };
150    const INPUT_REGS: [Reg; 9] = [Reg::A0, Reg::A1, Reg::A2, Reg::A3, Reg::A4, Reg::A5, Reg::T0, Reg::T1, Reg::T2];
151    const OUTPUT_REGS: [Reg; 2] = [Reg::A0, Reg::A1];
152}
153
154polkavm_common::static_assert!(Reg::INPUT_REGS.len() == polkavm_common::program::Reg::MAXIMUM_INPUT_REGS);
155polkavm_common::static_assert!(Reg::OUTPUT_REGS.len() == polkavm_common::program::Reg::MAXIMUM_OUTPUT_REGS);
156
157#[derive(Debug)]
158pub enum ProgramFromElfErrorKind {
159    FailedToParseElf(object::read::Error),
160    FailedToParseDwarf(gimli::Error),
161    FailedToParseProgram(program::ProgramParseError),
162    UnsupportedSection(String),
163    UnsupportedInstruction { section: String, offset: u64, instruction: u32 },
164    UnsupportedRegister { reg: RReg },
165
166    Other(Cow<'static, str>),
167}
168
169impl From<object::read::Error> for ProgramFromElfError {
170    fn from(error: object::read::Error) -> Self {
171        ProgramFromElfError(ProgramFromElfErrorKind::FailedToParseElf(error))
172    }
173}
174
175impl From<gimli::Error> for ProgramFromElfError {
176    fn from(error: gimli::Error) -> Self {
177        ProgramFromElfError(ProgramFromElfErrorKind::FailedToParseDwarf(error))
178    }
179}
180
181impl From<program::ProgramParseError> for ProgramFromElfError {
182    fn from(error: program::ProgramParseError) -> Self {
183        ProgramFromElfError(ProgramFromElfErrorKind::FailedToParseProgram(error))
184    }
185}
186
187#[derive(Debug)]
188pub struct ProgramFromElfError(ProgramFromElfErrorKind);
189
190impl From<ProgramFromElfErrorKind> for ProgramFromElfError {
191    fn from(kind: ProgramFromElfErrorKind) -> Self {
192        Self(kind)
193    }
194}
195
196impl ProgramFromElfError {
197    pub(crate) fn other(error: impl Into<Cow<'static, str>>) -> Self {
198        Self(ProgramFromElfErrorKind::Other(error.into()))
199    }
200}
201
202impl std::error::Error for ProgramFromElfError {}
203
204impl core::fmt::Display for ProgramFromElfError {
205    fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
206        match &self.0 {
207            ProgramFromElfErrorKind::FailedToParseElf(error) => write!(fmt, "failed to parse ELF file: {}", error),
208            ProgramFromElfErrorKind::FailedToParseDwarf(error) => write!(fmt, "failed to parse DWARF: {}", error),
209            ProgramFromElfErrorKind::FailedToParseProgram(error) => write!(fmt, "{}", error),
210            ProgramFromElfErrorKind::UnsupportedSection(section) => write!(fmt, "unsupported section: {}", section),
211            ProgramFromElfErrorKind::UnsupportedInstruction {
212                section,
213                offset,
214                instruction,
215            } => {
216                write!(
217                    fmt,
218                    "unsupported instruction in section '{section}' at offset 0x{offset:x}: 0x{instruction:08x}"
219                )
220            }
221            ProgramFromElfErrorKind::UnsupportedRegister { reg } => write!(fmt, "unsupported register: {reg}"),
222            ProgramFromElfErrorKind::Other(message) => fmt.write_str(message),
223        }
224    }
225}
226
227fn cast_reg_non_zero(reg: RReg) -> Result<Option<Reg>, ProgramFromElfError> {
228    use RReg::*;
229    match reg {
230        Zero => Ok(None),
231        RA => Ok(Some(Reg::RA)),
232        SP => Ok(Some(Reg::SP)),
233        T0 => Ok(Some(Reg::T0)),
234        T1 => Ok(Some(Reg::T1)),
235        T2 => Ok(Some(Reg::T2)),
236        S0 => Ok(Some(Reg::S0)),
237        S1 => Ok(Some(Reg::S1)),
238        A0 => Ok(Some(Reg::A0)),
239        A1 => Ok(Some(Reg::A1)),
240        A2 => Ok(Some(Reg::A2)),
241        A3 => Ok(Some(Reg::A3)),
242        A4 => Ok(Some(Reg::A4)),
243        A5 => Ok(Some(Reg::A5)),
244        GP | TP | A6 | A7 | S2 | S3 | S4 | S5 | S6 | S7 | S8 | S9 | S10 | S11 | T3 | T4 | T5 | T6 => {
245            Err(ProgramFromElfErrorKind::UnsupportedRegister { reg }.into())
246        }
247    }
248}
249
250fn cast_reg_any(reg: RReg) -> Result<RegImm, ProgramFromElfError> {
251    Ok(cast_reg_non_zero(reg)?.map_or(RegImm::Imm(0), RegImm::Reg))
252}
253
254#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)]
255pub(crate) struct Source {
256    pub(crate) section_index: SectionIndex,
257    pub(crate) offset_range: AddressRange,
258}
259
260impl core::fmt::Display for Source {
261    fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
262        write!(
263            fmt,
264            "<{}+{}..{}>",
265            self.section_index, self.offset_range.start, self.offset_range.end
266        )
267    }
268}
269
270impl Source {
271    fn begin(&self) -> SectionTarget {
272        SectionTarget {
273            section_index: self.section_index,
274            offset: self.offset_range.start,
275        }
276    }
277
278    fn iter(&'_ self) -> impl Iterator<Item = SectionTarget> + '_ {
279        (self.offset_range.start..self.offset_range.end)
280            .step_by(2)
281            .map(|offset| SectionTarget {
282                section_index: self.section_index,
283                offset,
284            })
285    }
286}
287
288// TODO: Use smallvec.
289#[derive(Clone, Debug)]
290struct SourceStack(Vec<Source>);
291
292impl core::fmt::Display for SourceStack {
293    fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
294        fmt.write_str("[")?;
295        let mut is_first = true;
296        for source in &self.0 {
297            if is_first {
298                is_first = false;
299            } else {
300                fmt.write_str(", ")?;
301            }
302            source.fmt(fmt)?;
303        }
304        fmt.write_str("]")
305    }
306}
307
308impl SourceStack {
309    fn as_slice(&self) -> &[Source] {
310        &self.0
311    }
312
313    fn top(&self) -> &Source {
314        &self.0[0]
315    }
316
317    fn overlay_on_top_of(&self, stack: &SourceStack) -> Self {
318        let mut vec = Vec::with_capacity(self.0.len() + stack.0.len());
319        vec.extend(self.0.iter().copied());
320        vec.extend(stack.0.iter().copied());
321
322        SourceStack(vec)
323    }
324
325    fn overlay_on_top_of_inplace(&mut self, stack: &SourceStack) {
326        self.0.extend(stack.0.iter().copied());
327    }
328
329    fn display(&self, section_to_function_name: &BTreeMap<SectionTarget, &str>) -> String {
330        use core::fmt::Write;
331
332        let mut out = String::new();
333        out.push('[');
334        let mut is_first = true;
335        for source in &self.0 {
336            if is_first {
337                is_first = false;
338            } else {
339                out.push_str(", ");
340            }
341            write!(&mut out, "{}", source).unwrap();
342            if let Some((origin, name)) = section_to_function_name.range(..=source.begin()).next_back() {
343                if origin.section_index == source.section_index {
344                    write!(&mut out, " \"{name}\"+{}", source.offset_range.start - origin.offset).unwrap();
345                }
346            }
347        }
348        out.push(']');
349        out
350    }
351}
352
353impl From<Source> for SourceStack {
354    fn from(source: Source) -> Self {
355        SourceStack(vec![source])
356    }
357}
358
359#[derive(Clone, Debug)]
360struct EndOfBlock<T> {
361    source: SourceStack,
362    instruction: ControlInst<T>,
363}
364
365impl<T> EndOfBlock<T> {
366    fn map_target<U, E>(self, map: impl Fn(T) -> Result<U, E>) -> Result<EndOfBlock<U>, E> {
367        Ok(EndOfBlock {
368            source: self.source,
369            instruction: self.instruction.map_target(map)?,
370        })
371    }
372}
373
374#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
375pub(crate) struct AddressRange {
376    pub(crate) start: u64,
377    pub(crate) end: u64,
378}
379
380impl AddressRange {
381    pub(crate) fn is_empty(&self) -> bool {
382        self.end == self.start
383    }
384
385    pub(crate) const fn is_overlapping(&self, other: &AddressRange) -> bool {
386        !(self.end <= other.start || self.start >= other.end)
387    }
388}
389
390impl core::fmt::Display for AddressRange {
391    fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
392        write!(fmt, "0x{:x}-0x{:x}", self.start, self.end)
393    }
394}
395
396impl core::fmt::Debug for AddressRange {
397    fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
398        write!(fmt, "0x{:x}-0x{:x}", self.start, self.end)
399    }
400}
401
402impl From<Range<u64>> for AddressRange {
403    fn from(range: Range<u64>) -> Self {
404        AddressRange {
405            start: range.start,
406            end: range.end,
407        }
408    }
409}
410
411#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
412pub(crate) struct SectionTarget {
413    pub(crate) section_index: SectionIndex,
414    pub(crate) offset: u64,
415}
416
417impl core::fmt::Display for SectionTarget {
418    fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
419        write!(fmt, "<{}+{}>", self.section_index, self.offset)
420    }
421}
422
423impl core::fmt::Debug for SectionTarget {
424    fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
425        write!(fmt, "<{}+{}>", self.section_index, self.offset)
426    }
427}
428
429impl From<SectionTarget> for SectionIndex {
430    fn from(target: SectionTarget) -> Self {
431        target.section_index
432    }
433}
434
435fn extract_delimited<'a>(str: &mut &'a str, prefix: &str, suffix: &str) -> Option<(&'a str, &'a str)> {
436    let original = *str;
437    let start_of_prefix = str.find(prefix)?;
438    let start = start_of_prefix + prefix.len();
439    let end = str[start..].find(suffix)? + start;
440    *str = &str[end + suffix.len()..];
441    Some((&original[..start_of_prefix], &original[start..end]))
442}
443
444#[test]
445fn test_extract_delimited() {
446    let mut str = "foo <section #1234+567> bar";
447    assert_eq!(extract_delimited(&mut str, "<section #", ">").unwrap(), ("foo ", "1234+567"));
448    assert_eq!(str, " bar");
449}
450
451impl SectionTarget {
452    fn fmt_human_readable<H>(&self, elf: &Elf<H>) -> String
453    where
454        H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
455    {
456        Self::make_human_readable_in_debug_string(elf, &self.to_string())
457    }
458
459    fn make_human_readable_in_debug_string<H>(elf: &Elf<H>, mut str: &str) -> String
460    where
461        H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
462    {
463        // A hack-ish way to make nested `Debug` error messages more readable by replacing
464        // raw section indexes and offsets with a more human readable string.
465
466        let mut output = String::new();
467        while let Some((prefix, chunk)) = extract_delimited(&mut str, "<section #", ">") {
468            output.push_str(prefix);
469
470            let mut iter = chunk.split('+');
471            if let Some(section_index) = iter.next().and_then(|s| s.parse::<usize>().ok()) {
472                if let Some(offset) = iter.next().and_then(|s| s.parse::<u64>().ok()) {
473                    if let Some(section) = elf.section_by_raw_index(section_index) {
474                        use core::fmt::Write;
475
476                        let symbol = elf.symbols().find(|symbol| {
477                            let Ok((symbol_section, symbol_offset)) = symbol.section_and_offset() else {
478                                return false;
479                            };
480                            symbol_section.index().raw() == section_index
481                                && offset >= symbol_offset
482                                && offset < (symbol_offset + symbol.size())
483                        });
484
485                        let section_name = section.name();
486                        write!(&mut output, "<section #{section_index}+{offset} ('{section_name}'").unwrap();
487                        if let Some(symbol) = symbol {
488                            if let Some(symbol_name) = symbol.name() {
489                                write!(
490                                    &mut output,
491                                    ": '{}'+{}",
492                                    symbol_name,
493                                    offset - symbol.section_and_offset().unwrap().1
494                                )
495                                .unwrap();
496                            }
497                        }
498                        output.push_str(")>");
499                        continue;
500                    }
501                }
502            }
503            output.push_str(chunk);
504        }
505
506        output.push_str(str);
507        output
508    }
509
510    fn add(self, offset: u64) -> Self {
511        SectionTarget {
512            section_index: self.section_index,
513            offset: self.offset + offset,
514        }
515    }
516
517    fn map_offset_i64(self, cb: impl FnOnce(i64) -> i64) -> Self {
518        let offset = self.offset as i64;
519        SectionTarget {
520            section_index: self.section_index,
521            offset: cb(offset) as u64,
522        }
523    }
524}
525
526#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)]
527#[repr(transparent)]
528struct BlockTarget {
529    block_index: usize,
530}
531
532impl BlockTarget {
533    fn from_raw(block_index: usize) -> Self {
534        BlockTarget { block_index }
535    }
536
537    fn index(self) -> usize {
538        self.block_index
539    }
540}
541
542#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
543enum AnyTarget {
544    Data(SectionTarget),
545    Code(BlockTarget),
546}
547
548#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
549enum RegImm {
550    Reg(Reg),
551    Imm(i32),
552}
553
554impl RegImm {
555    fn map_register(self, mut map: impl FnMut(Reg) -> Reg) -> RegImm {
556        match self {
557            RegImm::Reg(reg) => RegImm::Reg(map(reg)),
558            RegImm::Imm(value) => RegImm::Imm(value),
559        }
560    }
561}
562
563impl From<Reg> for RegImm {
564    fn from(reg: Reg) -> Self {
565        RegImm::Reg(reg)
566    }
567}
568
569impl From<i32> for RegImm {
570    fn from(value: i32) -> Self {
571        RegImm::Imm(value)
572    }
573}
574
575#[derive(Copy, Clone, PartialEq, Eq, Debug)]
576enum BasicInst<T> {
577    LoadAbsolute {
578        kind: LoadKind,
579        dst: Reg,
580        target: SectionTarget,
581    },
582    StoreAbsolute {
583        kind: StoreKind,
584        src: RegImm,
585        target: SectionTarget,
586    },
587    LoadIndirect {
588        kind: LoadKind,
589        dst: Reg,
590        base: Reg,
591        offset: i32,
592    },
593    StoreIndirect {
594        kind: StoreKind,
595        src: RegImm,
596        base: Reg,
597        offset: i32,
598    },
599    LoadAddress {
600        dst: Reg,
601        target: T,
602    },
603    // This is supposed to load the address from the GOT, instead of loading it directly as an immediate.
604    LoadAddressIndirect {
605        dst: Reg,
606        target: T,
607    },
608    LoadImmediate {
609        dst: Reg,
610        imm: i32,
611    },
612    LoadImmediate64 {
613        dst: Reg,
614        imm: i64,
615    },
616    MoveReg {
617        dst: Reg,
618        src: Reg,
619    },
620    Reg {
621        kind: RegKind,
622        dst: Reg,
623        src: Reg,
624    },
625    RegReg {
626        kind: RegRegKind,
627        dst: Reg,
628        src1: Reg,
629        src2: Reg,
630    },
631    AnyAny {
632        kind: AnyAnyKind,
633        dst: Reg,
634        src1: RegImm,
635        src2: RegImm,
636    },
637    Cmov {
638        kind: CmovKind,
639        dst: Reg,
640        src: RegImm,
641        cond: Reg,
642    },
643    Ecalli {
644        nth_import: usize,
645    },
646    Sbrk {
647        dst: Reg,
648        size: Reg,
649    },
650    Memset,
651    Nop,
652    LoadHeapBase {
653        dst: Reg,
654    },
655}
656
657#[derive(Copy, Clone)]
658enum OpKind {
659    Read,
660    Write,
661    ReadWrite,
662}
663
664impl<T> BasicInst<T> {
665    fn is_nop(&self) -> bool {
666        match self {
667            BasicInst::MoveReg { dst, src } => dst == src,
668            BasicInst::Nop => true,
669            _ => false,
670        }
671    }
672
673    fn src_mask(&self, imports: &[Import]) -> RegMask {
674        match *self {
675            BasicInst::Nop
676            | BasicInst::LoadHeapBase { .. }
677            | BasicInst::LoadImmediate { .. }
678            | BasicInst::LoadImmediate64 { .. }
679            | BasicInst::LoadAbsolute { .. }
680            | BasicInst::LoadAddress { .. }
681            | BasicInst::LoadAddressIndirect { .. } => RegMask::empty(),
682            BasicInst::MoveReg { src, .. } | BasicInst::Reg { src, .. } => RegMask::from(src),
683            BasicInst::StoreAbsolute { src, .. } => RegMask::from(src),
684            BasicInst::LoadIndirect { base, .. } => RegMask::from(base),
685            BasicInst::StoreIndirect { src, base, .. } => RegMask::from(src) | RegMask::from(base),
686            BasicInst::RegReg { src1, src2, .. } => RegMask::from(src1) | RegMask::from(src2),
687            BasicInst::AnyAny { src1, src2, .. } => RegMask::from(src1) | RegMask::from(src2),
688            BasicInst::Cmov { dst, src, cond, .. } => RegMask::from(dst) | RegMask::from(src) | RegMask::from(cond),
689            BasicInst::Ecalli { nth_import } => imports[nth_import].src_mask(),
690            BasicInst::Sbrk { size, .. } => RegMask::from(size),
691            BasicInst::Memset => RegMask::from(Reg::A0) | RegMask::from(Reg::A1) | RegMask::from(Reg::A2),
692        }
693    }
694
695    fn dst_mask(&self, imports: &[Import]) -> RegMask {
696        match *self {
697            BasicInst::Nop | BasicInst::StoreAbsolute { .. } | BasicInst::StoreIndirect { .. } => RegMask::empty(),
698            BasicInst::MoveReg { dst, .. }
699            | BasicInst::LoadHeapBase { dst }
700            | BasicInst::LoadImmediate { dst, .. }
701            | BasicInst::LoadImmediate64 { dst, .. }
702            | BasicInst::LoadAbsolute { dst, .. }
703            | BasicInst::LoadAddress { dst, .. }
704            | BasicInst::LoadAddressIndirect { dst, .. }
705            | BasicInst::LoadIndirect { dst, .. }
706            | BasicInst::RegReg { dst, .. }
707            | BasicInst::Cmov { dst, .. }
708            | BasicInst::Reg { dst, .. }
709            | BasicInst::AnyAny { dst, .. } => RegMask::from(dst),
710            BasicInst::Ecalli { nth_import } => imports[nth_import].dst_mask(),
711            BasicInst::Sbrk { dst, .. } => RegMask::from(dst),
712            BasicInst::Memset { .. } => RegMask::from(Reg::A0) | RegMask::from(Reg::A2),
713        }
714    }
715
716    fn has_side_effects(&self, config: &Config) -> bool {
717        match *self {
718            BasicInst::Sbrk { .. }
719            | BasicInst::Ecalli { .. }
720            | BasicInst::StoreAbsolute { .. }
721            | BasicInst::StoreIndirect { .. }
722            | BasicInst::Memset { .. } => true,
723            BasicInst::LoadAbsolute { .. } | BasicInst::LoadIndirect { .. } => !config.elide_unnecessary_loads,
724            BasicInst::Nop
725            | BasicInst::LoadHeapBase { .. }
726            | BasicInst::MoveReg { .. }
727            | BasicInst::Reg { .. }
728            | BasicInst::LoadImmediate { .. }
729            | BasicInst::LoadImmediate64 { .. }
730            | BasicInst::LoadAddress { .. }
731            | BasicInst::LoadAddressIndirect { .. }
732            | BasicInst::RegReg { .. }
733            | BasicInst::Cmov { .. }
734            | BasicInst::AnyAny { .. } => false,
735        }
736    }
737
738    fn map_register(self, mut map: impl FnMut(Reg, OpKind) -> Reg) -> Option<Self> {
739        // Note: ALWAYS map the inputs first; otherwise `regalloc2` might break!
740        match self {
741            BasicInst::LoadImmediate { dst, imm } => Some(BasicInst::LoadImmediate {
742                dst: map(dst, OpKind::Write),
743                imm,
744            }),
745            BasicInst::LoadImmediate64 { dst, imm } => Some(BasicInst::LoadImmediate64 {
746                dst: map(dst, OpKind::Write),
747                imm,
748            }),
749            BasicInst::LoadAbsolute { kind, dst, target } => Some(BasicInst::LoadAbsolute {
750                kind,
751                dst: map(dst, OpKind::Write),
752                target,
753            }),
754            BasicInst::StoreAbsolute { kind, src, target } => Some(BasicInst::StoreAbsolute {
755                kind,
756                src: src.map_register(|reg| map(reg, OpKind::Read)),
757                target,
758            }),
759            BasicInst::LoadAddress { dst, target } => Some(BasicInst::LoadAddress {
760                dst: map(dst, OpKind::Write),
761                target,
762            }),
763            BasicInst::LoadAddressIndirect { dst, target } => Some(BasicInst::LoadAddressIndirect {
764                dst: map(dst, OpKind::Write),
765                target,
766            }),
767            BasicInst::LoadIndirect { kind, dst, base, offset } => Some(BasicInst::LoadIndirect {
768                kind,
769                base: map(base, OpKind::Read),
770                dst: map(dst, OpKind::Write),
771                offset,
772            }),
773            BasicInst::StoreIndirect { kind, src, base, offset } => Some(BasicInst::StoreIndirect {
774                kind,
775                src: src.map_register(|reg| map(reg, OpKind::Read)),
776                base: map(base, OpKind::Read),
777                offset,
778            }),
779            BasicInst::Reg { kind, dst, src } => Some(BasicInst::Reg {
780                kind,
781                src: map(src, OpKind::Read),
782                dst: map(dst, OpKind::Write),
783            }),
784            BasicInst::RegReg { kind, dst, src1, src2 } => Some(BasicInst::RegReg {
785                kind,
786                src1: map(src1, OpKind::Read),
787                src2: map(src2, OpKind::Read),
788                dst: map(dst, OpKind::Write),
789            }),
790            BasicInst::AnyAny { kind, dst, src1, src2 } => Some(BasicInst::AnyAny {
791                kind,
792                src1: src1.map_register(|reg| map(reg, OpKind::Read)),
793                src2: src2.map_register(|reg| map(reg, OpKind::Read)),
794                dst: map(dst, OpKind::Write),
795            }),
796            BasicInst::MoveReg { dst, src } => Some(BasicInst::MoveReg {
797                src: map(src, OpKind::Read),
798                dst: map(dst, OpKind::Write),
799            }),
800            BasicInst::Cmov { kind, dst, src, cond } => Some(BasicInst::Cmov {
801                kind,
802                src: src.map_register(|reg| map(reg, OpKind::Read)),
803                cond: map(cond, OpKind::Read),
804                dst: map(dst, OpKind::ReadWrite),
805            }),
806            BasicInst::Ecalli { .. } => None,
807            BasicInst::Sbrk { dst, size } => Some(BasicInst::Sbrk {
808                size: map(size, OpKind::Read),
809                dst: map(dst, OpKind::Write),
810            }),
811            BasicInst::Memset => {
812                assert_eq!(map(Reg::A1, OpKind::Read), Reg::A1);
813                assert_eq!(map(Reg::A0, OpKind::ReadWrite), Reg::A0);
814                assert_eq!(map(Reg::A2, OpKind::ReadWrite), Reg::A2);
815                Some(BasicInst::Memset)
816            }
817            BasicInst::LoadHeapBase { dst } => Some(BasicInst::LoadHeapBase {
818                dst: map(dst, OpKind::Write),
819            }),
820            BasicInst::Nop => Some(BasicInst::Nop),
821        }
822    }
823
824    fn operands(&self, imports: &[Import]) -> impl Iterator<Item = (Reg, OpKind)>
825    where
826        T: Clone,
827    {
828        let mut list = [None, None, None, None, None, None, None, None];
829        let mut length = 0;
830        // Abuse the `map_register` to avoid matching on everything again.
831        let is_special_instruction = self
832            .clone()
833            .map_register(|reg, kind| {
834                list[length] = Some((reg, kind));
835                length += 1;
836                reg
837            })
838            .is_none();
839
840        if is_special_instruction {
841            assert_eq!(length, 0);
842
843            let BasicInst::Ecalli { nth_import } = *self else { unreachable!() };
844            let import = &imports[nth_import];
845
846            for reg in import.src_mask() {
847                list[length] = Some((reg, OpKind::Read));
848                length += 1;
849            }
850
851            for reg in import.dst_mask() {
852                list[length] = Some((reg, OpKind::Write));
853                length += 1;
854            }
855        };
856
857        let mut seen_dst = false;
858        list.into_iter().take_while(|reg| reg.is_some()).flatten().map(move |(reg, kind)| {
859            let is_dst = matches!(kind, OpKind::Write | OpKind::ReadWrite);
860
861            // Sanity check to make sure inputs always come before outputs, so that `regalloc2` doesn't break.
862            if seen_dst {
863                assert!(is_dst);
864            }
865            seen_dst |= is_dst;
866
867            (reg, kind)
868        })
869    }
870
871    fn map_target<U, E>(self, map: impl Fn(T) -> Result<U, E>) -> Result<BasicInst<U>, E> {
872        Ok(match self {
873            BasicInst::MoveReg { dst, src } => BasicInst::MoveReg { dst, src },
874            BasicInst::LoadImmediate { dst, imm } => BasicInst::LoadImmediate { dst, imm },
875            BasicInst::LoadImmediate64 { dst, imm } => BasicInst::LoadImmediate64 { dst, imm },
876            BasicInst::LoadAbsolute { kind, dst, target } => BasicInst::LoadAbsolute { kind, dst, target },
877            BasicInst::StoreAbsolute { kind, src, target } => BasicInst::StoreAbsolute { kind, src, target },
878            BasicInst::LoadAddress { dst, target } => BasicInst::LoadAddress { dst, target: map(target)? },
879            BasicInst::LoadAddressIndirect { dst, target } => BasicInst::LoadAddressIndirect { dst, target: map(target)? },
880            BasicInst::LoadIndirect { kind, dst, base, offset } => BasicInst::LoadIndirect { kind, dst, base, offset },
881            BasicInst::StoreIndirect { kind, src, base, offset } => BasicInst::StoreIndirect { kind, src, base, offset },
882            BasicInst::Reg { kind, dst, src } => BasicInst::Reg { kind, dst, src },
883            BasicInst::RegReg { kind, dst, src1, src2 } => BasicInst::RegReg { kind, dst, src1, src2 },
884            BasicInst::AnyAny { kind, dst, src1, src2 } => BasicInst::AnyAny { kind, dst, src1, src2 },
885            BasicInst::Cmov { kind, dst, src, cond } => BasicInst::Cmov { kind, dst, src, cond },
886            BasicInst::Ecalli { nth_import } => BasicInst::Ecalli { nth_import },
887            BasicInst::Sbrk { dst, size } => BasicInst::Sbrk { dst, size },
888            BasicInst::LoadHeapBase { dst } => BasicInst::LoadHeapBase { dst },
889            BasicInst::Memset => BasicInst::Memset,
890            BasicInst::Nop => BasicInst::Nop,
891        })
892    }
893
894    fn target(&self) -> (Option<SectionTarget>, Option<T>)
895    where
896        T: Copy,
897    {
898        match self {
899            BasicInst::LoadAbsolute { target, .. } | BasicInst::StoreAbsolute { target, .. } => (Some(*target), None),
900            BasicInst::LoadAddress { target, .. } | BasicInst::LoadAddressIndirect { target, .. } => (None, Some(*target)),
901            BasicInst::Nop
902            | BasicInst::LoadHeapBase { .. }
903            | BasicInst::MoveReg { .. }
904            | BasicInst::LoadImmediate { .. }
905            | BasicInst::LoadImmediate64 { .. }
906            | BasicInst::LoadIndirect { .. }
907            | BasicInst::StoreIndirect { .. }
908            | BasicInst::Reg { .. }
909            | BasicInst::RegReg { .. }
910            | BasicInst::AnyAny { .. }
911            | BasicInst::Cmov { .. }
912            | BasicInst::Sbrk { .. }
913            | BasicInst::Memset { .. }
914            | BasicInst::Ecalli { .. } => (None, None),
915        }
916    }
917}
918
919#[derive(Copy, Clone, PartialEq, Eq, Debug)]
920enum ControlInst<T> {
921    Jump {
922        target: T,
923    },
924    Call {
925        ra: Reg,
926        target: T,
927        target_return: T,
928    },
929    JumpIndirect {
930        base: Reg,
931        offset: i64,
932    },
933    CallIndirect {
934        ra: Reg,
935        base: Reg,
936        offset: i64,
937        target_return: T,
938    },
939    Branch {
940        kind: BranchKind,
941        src1: RegImm,
942        src2: RegImm,
943        target_true: T,
944        target_false: T,
945    },
946    Unimplemented,
947}
948
949impl<T> ControlInst<T> {
950    fn src_mask(&self) -> RegMask {
951        match *self {
952            ControlInst::Jump { .. } | ControlInst::Call { .. } | ControlInst::Unimplemented => RegMask::empty(),
953            ControlInst::JumpIndirect { base, .. } | ControlInst::CallIndirect { base, .. } => RegMask::from(base),
954            ControlInst::Branch { src1, src2, .. } => RegMask::from(src1) | RegMask::from(src2),
955        }
956    }
957
958    fn dst_mask(&self) -> RegMask {
959        match *self {
960            ControlInst::Jump { .. } | ControlInst::JumpIndirect { .. } | ControlInst::Branch { .. } | ControlInst::Unimplemented => {
961                RegMask::empty()
962            }
963            ControlInst::Call { ra, .. } | ControlInst::CallIndirect { ra, .. } => RegMask::from(ra),
964        }
965    }
966
967    fn map_target<U, E>(self, map: impl Fn(T) -> Result<U, E>) -> Result<ControlInst<U>, E> {
968        Ok(match self {
969            ControlInst::Jump { target } => ControlInst::Jump { target: map(target)? },
970            ControlInst::Call { ra, target, target_return } => ControlInst::Call {
971                ra,
972                target: map(target)?,
973                target_return: map(target_return)?,
974            },
975            ControlInst::JumpIndirect { base, offset } => ControlInst::JumpIndirect { base, offset },
976            ControlInst::CallIndirect {
977                ra,
978                base,
979                offset,
980                target_return,
981            } => ControlInst::CallIndirect {
982                ra,
983                base,
984                offset,
985                target_return: map(target_return)?,
986            },
987            ControlInst::Branch {
988                kind,
989                src1,
990                src2,
991                target_true,
992                target_false,
993            } => ControlInst::Branch {
994                kind,
995                src1,
996                src2,
997                target_true: map(target_true)?,
998                target_false: map(target_false)?,
999            },
1000            ControlInst::Unimplemented => ControlInst::Unimplemented,
1001        })
1002    }
1003
1004    fn targets(&self) -> [Option<&T>; 2] {
1005        match self {
1006            ControlInst::Jump { target, .. } => [Some(target), None],
1007            ControlInst::Call { target, target_return, .. } => [Some(target), Some(target_return)],
1008            ControlInst::CallIndirect { target_return, .. } => [Some(target_return), None],
1009            ControlInst::Branch {
1010                target_true, target_false, ..
1011            } => [Some(target_true), Some(target_false)],
1012            ControlInst::JumpIndirect { .. } | ControlInst::Unimplemented => [None, None],
1013        }
1014    }
1015
1016    fn fallthrough_target(&self) -> Option<T>
1017    where
1018        T: Copy,
1019    {
1020        match self {
1021            ControlInst::Jump { .. } | ControlInst::JumpIndirect { .. } | ControlInst::Unimplemented => None,
1022            ControlInst::Branch { target_false: target, .. }
1023            | ControlInst::Call { target_return: target, .. }
1024            | ControlInst::CallIndirect { target_return: target, .. } => Some(*target),
1025        }
1026    }
1027
1028    fn fallthrough_target_mut(&mut self) -> Option<&mut T> {
1029        match self {
1030            ControlInst::Jump { .. } | ControlInst::JumpIndirect { .. } | ControlInst::Unimplemented => None,
1031            ControlInst::Branch { target_false: target, .. }
1032            | ControlInst::Call { target_return: target, .. }
1033            | ControlInst::CallIndirect { target_return: target, .. } => Some(target),
1034        }
1035    }
1036}
1037
1038#[derive(Copy, Clone, Debug)]
1039enum InstExt<BasicT, ControlT> {
1040    Basic(BasicInst<BasicT>),
1041    Control(ControlInst<ControlT>),
1042}
1043
1044impl<BasicT, ControlT> InstExt<BasicT, ControlT> {
1045    fn nop() -> Self {
1046        InstExt::Basic(BasicInst::Nop)
1047    }
1048}
1049
1050impl<BasicT, ControlT> From<BasicInst<BasicT>> for InstExt<BasicT, ControlT> {
1051    fn from(inst: BasicInst<BasicT>) -> Self {
1052        InstExt::Basic(inst)
1053    }
1054}
1055
1056impl<BasicT, ControlT> From<ControlInst<ControlT>> for InstExt<BasicT, ControlT> {
1057    fn from(inst: ControlInst<ControlT>) -> Self {
1058        InstExt::Control(inst)
1059    }
1060}
1061
1062#[derive(Debug)]
1063struct BasicBlock<BasicT, ControlT> {
1064    target: BlockTarget,
1065    source: Source,
1066    ops: Vec<(SourceStack, BasicInst<BasicT>)>,
1067    next: EndOfBlock<ControlT>,
1068}
1069
1070impl<BasicT, ControlT> BasicBlock<BasicT, ControlT> {
1071    fn new(target: BlockTarget, source: Source, ops: Vec<(SourceStack, BasicInst<BasicT>)>, next: EndOfBlock<ControlT>) -> Self {
1072        Self { target, source, ops, next }
1073    }
1074}
1075
1076fn split_function_name(name: &str) -> (String, String) {
1077    let (with_hash, without_hash) = if let Ok(name) = rustc_demangle::try_demangle(name) {
1078        (name.to_string(), format!("{:#}", name))
1079    } else {
1080        (name.to_owned(), name.to_owned())
1081    };
1082
1083    // Ideally we'd parse the symbol into an actual AST and use that,
1084    // but that's a lot of work, so for now let's just do it like this.
1085    //
1086    // Here we want to split the symbol into two parts: the namespace, and the name + hash.
1087    // The idea being that multiple symbols most likely share the namespcae, allowing us to
1088    // deduplicate those strings in the output blob.
1089    //
1090    // For example, this symbol:
1091    //   _ZN5alloc7raw_vec19RawVec$LT$T$C$A$GT$7reserve21do_reserve_and_handle17hddecba91f804dbebE
1092    // can be demangled into these:
1093    //   with_hash    = "alloc::raw_vec::RawVec<T,A>::reserve::do_reserve_and_handle::hddecba91f804dbeb"
1094    //   without_hash = "alloc::raw_vec::RawVec<T,A>::reserve::do_reserve_and_handle"
1095    //
1096    // So what we want is to split it in two like this:
1097    //   prefix = "alloc::raw_vec::RawVec<T,A>::reserve"
1098    //   suffix = "do_reserve_and_handle::hddecba91f804dbeb"
1099
1100    if with_hash.contains("::") {
1101        let suffix_index = {
1102            let mut found = None;
1103            let mut depth = 0;
1104            let mut last = '\0';
1105            let mut index = without_hash.len();
1106            for ch in without_hash.chars().rev() {
1107                if ch == '>' {
1108                    depth += 1;
1109                } else if ch == '<' {
1110                    depth -= 1;
1111                } else if ch == ':' && depth == 0 && last == ':' {
1112                    found = Some(index + 1);
1113                    break;
1114                }
1115
1116                last = ch;
1117                index -= ch.len_utf8();
1118            }
1119
1120            found
1121        };
1122
1123        if let Some(suffix_index) = suffix_index {
1124            let prefix = &with_hash[..suffix_index - 2];
1125            let suffix = &with_hash[suffix_index..];
1126            return (prefix.to_owned(), suffix.to_owned());
1127        } else {
1128            log::warn!("Failed to split symbol: {:?}", with_hash);
1129        }
1130    }
1131
1132    (String::new(), with_hash)
1133}
1134
1135#[derive(Clone, Debug)]
1136enum DataRef {
1137    Section { section_index: SectionIndex, range: Range<usize> },
1138    Padding(usize),
1139}
1140
1141impl DataRef {
1142    fn size(&self) -> usize {
1143        match self {
1144            Self::Section { range, .. } => range.len(),
1145            Self::Padding(size) => *size,
1146        }
1147    }
1148}
1149
1150#[derive(Debug)]
1151struct MemoryConfig {
1152    ro_data: Vec<DataRef>,
1153    rw_data: Vec<DataRef>,
1154    ro_data_size: u32,
1155    rw_data_size: u32,
1156    min_stack_size: u32,
1157    heap_base: u32,
1158}
1159
1160fn get_padding(memory_end: u64, align: u64) -> Option<u64> {
1161    let misalignment = memory_end % align;
1162    if misalignment == 0 {
1163        None
1164    } else {
1165        Some(align - misalignment)
1166    }
1167}
1168
1169fn process_sections<H>(
1170    elf: &Elf<H>,
1171    current_address: &mut u64,
1172    chunks: &mut Vec<DataRef>,
1173    base_address_for_section: &mut HashMap<SectionIndex, u64>,
1174    sections: impl IntoIterator<Item = SectionIndex>,
1175) -> u64
1176where
1177    H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
1178{
1179    for section_index in sections {
1180        let section = elf.section_by_index(section_index);
1181        assert!(section.size() >= section.data().len() as u64);
1182
1183        if let Some(padding) = get_padding(*current_address, section.align()) {
1184            *current_address += padding;
1185            chunks.push(DataRef::Padding(padding as usize));
1186        }
1187
1188        let section_name = section.name();
1189        let section_base_address = *current_address;
1190        base_address_for_section.insert(section.index(), section_base_address);
1191
1192        *current_address += section.size();
1193        if !section.data().is_empty() {
1194            chunks.push(DataRef::Section {
1195                section_index: section.index(),
1196                range: 0..section.data().len(),
1197            });
1198        }
1199
1200        let padding = section.size() - section.data().len() as u64;
1201        if padding > 0 {
1202            chunks.push(DataRef::Padding(padding.try_into().expect("overflow")))
1203        }
1204
1205        log::trace!(
1206            "Found section: '{}', original range = 0x{:x}..0x{:x} (relocated to: 0x{:x}..0x{:x}), size = 0x{:x}/0x{:x}",
1207            section_name,
1208            section.original_address(),
1209            section.original_address() + section.size(),
1210            section_base_address,
1211            section_base_address + section.size(),
1212            section.data().len(),
1213            section.size(),
1214        );
1215    }
1216
1217    let size_in_memory: u64 = chunks.iter().map(|chunk| chunk.size() as u64).sum();
1218    while let Some(DataRef::Padding(..)) = chunks.last() {
1219        chunks.pop();
1220    }
1221
1222    *current_address = align_to_next_page_u64(u64::from(VM_MAX_PAGE_SIZE), *current_address).expect("overflow");
1223    // Add a guard page between this section and the next one.
1224    *current_address += u64::from(VM_MAX_PAGE_SIZE);
1225
1226    size_in_memory
1227}
1228
1229#[allow(clippy::too_many_arguments)]
1230fn extract_memory_config<H>(
1231    elf: &Elf<H>,
1232    sections_ro_data: &[SectionIndex],
1233    sections_rw_data: &[SectionIndex],
1234    sections_bss: &[SectionIndex],
1235    sections_min_stack_size: &[SectionIndex],
1236    base_address_for_section: &mut HashMap<SectionIndex, u64>,
1237    mut min_stack_size: u32,
1238) -> Result<MemoryConfig, ProgramFromElfError>
1239where
1240    H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
1241{
1242    let mut current_address = u64::from(VM_MAX_PAGE_SIZE);
1243
1244    let mut ro_data = Vec::new();
1245    let mut rw_data = Vec::new();
1246    let ro_data_address = current_address;
1247    let ro_data_size = process_sections(
1248        elf,
1249        &mut current_address,
1250        &mut ro_data,
1251        base_address_for_section,
1252        sections_ro_data.iter().copied(),
1253    );
1254    let rw_data_address = current_address;
1255    let rw_data_size = process_sections(
1256        elf,
1257        &mut current_address,
1258        &mut rw_data,
1259        base_address_for_section,
1260        sections_rw_data.iter().copied().chain(sections_bss.iter().copied()),
1261    );
1262
1263    for &section_index in sections_min_stack_size {
1264        let section = elf.section_by_index(section_index);
1265        let data = section.data();
1266        if data.len() % 4 != 0 {
1267            return Err(ProgramFromElfError::other(format!("section '{}' has invalid size", section.name())));
1268        }
1269
1270        for xs in data.chunks_exact(4) {
1271            let value = u32::from_le_bytes([xs[0], xs[1], xs[2], xs[3]]);
1272            min_stack_size = core::cmp::max(min_stack_size, value);
1273        }
1274    }
1275
1276    let min_stack_size =
1277        align_to_next_page_u32(VM_MIN_PAGE_SIZE, min_stack_size).ok_or(ProgramFromElfError::other("out of range size for the stack"))?;
1278
1279    log::trace!("Configured minimum stack size: 0x{min_stack_size:x}");
1280
1281    let ro_data_size = u32::try_from(ro_data_size).expect("overflow");
1282    let rw_data_size = u32::try_from(rw_data_size).expect("overflow");
1283
1284    // Sanity check that the memory configuration is actually valid.
1285    let heap_base = {
1286        let rw_data_size_physical: u64 = rw_data.iter().map(|x| x.size() as u64).sum();
1287        let rw_data_size_physical = u32::try_from(rw_data_size_physical).expect("overflow");
1288        assert!(rw_data_size_physical <= rw_data_size);
1289
1290        let config = match MemoryMapBuilder::new(VM_MAX_PAGE_SIZE)
1291            .ro_data_size(ro_data_size)
1292            .rw_data_size(rw_data_size)
1293            .stack_size(min_stack_size)
1294            .build()
1295        {
1296            Ok(config) => config,
1297            Err(error) => {
1298                return Err(ProgramFromElfError::other(error));
1299            }
1300        };
1301
1302        assert_eq!(u64::from(config.ro_data_address()), ro_data_address);
1303        assert_eq!(u64::from(config.rw_data_address()), rw_data_address);
1304
1305        config.heap_base()
1306    };
1307
1308    let memory_config = MemoryConfig {
1309        ro_data,
1310        rw_data,
1311        ro_data_size,
1312        rw_data_size,
1313        min_stack_size,
1314        heap_base,
1315    };
1316
1317    Ok(memory_config)
1318}
1319
1320#[derive(Clone, PartialEq, Eq, Debug, Hash)]
1321struct ExternMetadata {
1322    index: Option<u32>,
1323    symbol: Vec<u8>,
1324    input_regs: u8,
1325    output_regs: u8,
1326}
1327
1328#[derive(Clone, PartialEq, Eq, Debug)]
1329struct Export {
1330    location: SectionTarget,
1331    metadata: ExternMetadata,
1332}
1333
1334fn extract_exports<H>(
1335    elf: &Elf<H>,
1336    relocations: &BTreeMap<SectionTarget, RelocationKind>,
1337    section: &Section,
1338) -> Result<Vec<Export>, ProgramFromElfError>
1339where
1340    H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
1341{
1342    let mut b = polkavm_common::elf::Reader::from(section.data());
1343    let mut exports = Vec::new();
1344    loop {
1345        let Ok(version) = b.read_byte() else { break };
1346
1347        if version != 1 {
1348            return Err(ProgramFromElfError::other(format!(
1349                "failed to parse export metadata: unsupported export metadata version: {}",
1350                version
1351            )));
1352        }
1353
1354        let metadata = {
1355            let location = SectionTarget {
1356                section_index: section.index(),
1357                offset: b.offset() as u64,
1358            };
1359
1360            // Ignore the address as written; we'll just use the relocations instead.
1361            let address = if elf.is_64() { b.read_u64() } else { b.read_u32().map(u64::from) };
1362            let address = address.map_err(|error| ProgramFromElfError::other(format!("failed to parse export metadata: {}", error)))?;
1363
1364            let Some(relocation) = relocations.get(&location) else {
1365                return Err(ProgramFromElfError::other(format!(
1366                    "found an export without a relocation for a pointer to the metadata at {location} (found address = 0x{address:x})"
1367                )));
1368            };
1369
1370            let target = match relocation {
1371                RelocationKind::Abs {
1372                    target,
1373                    size: RelocationSize::U64,
1374                } if elf.is_64() => target,
1375                RelocationKind::Abs {
1376                    target,
1377                    size: RelocationSize::U32,
1378                } if !elf.is_64() => target,
1379                _ => {
1380                    return Err(ProgramFromElfError::other(format!(
1381                        "found an export with an unexpected relocation at {location}: {relocation:?}"
1382                    )));
1383                }
1384            };
1385
1386            parse_extern_metadata(elf, relocations, *target)?
1387        };
1388
1389        let location = SectionTarget {
1390            section_index: section.index(),
1391            offset: b.offset() as u64,
1392        };
1393
1394        // Ignore the address as written; we'll just use the relocations instead.
1395        let error = if elf.is_64() { b.read_u64().err() } else { b.read_u32().err() };
1396
1397        if let Some(error) = error {
1398            return Err(ProgramFromElfError::other(format!("failed to parse export metadata: {}", error)));
1399        }
1400
1401        let Some(relocation) = relocations.get(&location) else {
1402            return Err(ProgramFromElfError::other(format!(
1403                "found an export without a relocation for a pointer to the code at {location}"
1404            )));
1405        };
1406
1407        let target = match relocation {
1408            RelocationKind::Abs {
1409                target,
1410                size: RelocationSize::U64,
1411            } if elf.is_64() => target,
1412            RelocationKind::Abs {
1413                target,
1414                size: RelocationSize::U32,
1415            } if !elf.is_64() => target,
1416            _ => {
1417                return Err(ProgramFromElfError::other(format!(
1418                    "found an export with an unexpected relocation at {location}: {relocation:?}"
1419                )));
1420            }
1421        };
1422
1423        exports.push(Export {
1424            location: *target,
1425            metadata,
1426        });
1427    }
1428
1429    Ok(exports)
1430}
1431
1432#[derive(Clone, Debug)]
1433struct Import {
1434    metadata: ExternMetadata,
1435}
1436
1437impl core::ops::Deref for Import {
1438    type Target = ExternMetadata;
1439    fn deref(&self) -> &Self::Target {
1440        &self.metadata
1441    }
1442}
1443
1444impl Import {
1445    fn src(&'_ self) -> impl Iterator<Item = Reg> + '_ {
1446        assert!(self.metadata.input_regs as usize <= Reg::INPUT_REGS.len());
1447        Reg::INPUT_REGS
1448            .into_iter()
1449            .take(self.metadata.input_regs as usize)
1450            .chain(core::iter::once(Reg::SP))
1451    }
1452
1453    fn src_mask(&self) -> RegMask {
1454        let mut mask = RegMask::empty();
1455        for reg in self.src() {
1456            mask.insert(reg);
1457        }
1458
1459        mask
1460    }
1461
1462    #[allow(clippy::unused_self)]
1463    fn dst(&self) -> impl Iterator<Item = Reg> {
1464        assert!(self.metadata.output_regs as usize <= Reg::OUTPUT_REGS.len());
1465        [Reg::T0, Reg::T1, Reg::T2, Reg::A0, Reg::A1, Reg::A2, Reg::A3, Reg::A4, Reg::A5].into_iter()
1466    }
1467
1468    fn dst_mask(&self) -> RegMask {
1469        let mut mask = RegMask::empty();
1470        for reg in self.dst() {
1471            mask.insert(reg);
1472        }
1473
1474        mask
1475    }
1476}
1477
1478fn parse_extern_metadata_impl<H>(
1479    elf: &Elf<H>,
1480    relocations: &BTreeMap<SectionTarget, RelocationKind>,
1481    target: SectionTarget,
1482) -> Result<ExternMetadata, String>
1483where
1484    H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
1485{
1486    let section = elf.section_by_index(target.section_index);
1487    let mut b = polkavm_common::elf::Reader::from(section.data());
1488
1489    // Skip `sh_offset` bytes:
1490    let _ = b.read(target.offset as usize)?;
1491
1492    let version = b.read_byte()?;
1493    if version != 1 && version != 2 {
1494        return Err(format!("unsupported extern metadata version: '{version}' (expected '1' or '2')"));
1495    }
1496
1497    let flags = b.read_u32()?;
1498    let symbol_length = b.read_u32()?;
1499    let Some(symbol_relocation) = relocations.get(&SectionTarget {
1500        section_index: section.index(),
1501        offset: b.offset() as u64,
1502    }) else {
1503        return Err("missing relocation for the symbol".into());
1504    };
1505
1506    // Ignore the address as written; we'll just use the relocations instead.
1507    if elf.is_64() {
1508        b.read_u64()?;
1509    } else {
1510        b.read_u32()?;
1511    };
1512
1513    let symbol_location = match symbol_relocation {
1514        RelocationKind::Abs {
1515            target,
1516            size: RelocationSize::U64,
1517        } if elf.is_64() => target,
1518        RelocationKind::Abs {
1519            target,
1520            size: RelocationSize::U32,
1521        } if !elf.is_64() => target,
1522        _ => return Err(format!("unexpected relocation for the symbol: {symbol_relocation:?}")),
1523    };
1524
1525    let Some(symbol) = elf
1526        .section_by_index(symbol_location.section_index)
1527        .data()
1528        .get(symbol_location.offset as usize..symbol_location.offset.saturating_add(u64::from(symbol_length)) as usize)
1529    else {
1530        return Err("symbol out of bounds".into());
1531    };
1532
1533    let input_regs = b.read_byte()?;
1534    if input_regs as usize > Reg::INPUT_REGS.len() {
1535        return Err(format!("too many input registers: {input_regs}"));
1536    }
1537
1538    let output_regs = b.read_byte()?;
1539    if output_regs as usize > Reg::OUTPUT_REGS.len() {
1540        return Err(format!("too many output registers: {output_regs}"));
1541    }
1542
1543    let index = if version >= 2 {
1544        let has_index = b.read_byte()?;
1545        let index = b.read_u32()?;
1546        if has_index > 0 {
1547            Some(index)
1548        } else {
1549            None
1550        }
1551    } else {
1552        None
1553    };
1554
1555    if flags != 0 {
1556        return Err(format!("found unsupported flags: 0x{flags:x}"));
1557    }
1558
1559    Ok(ExternMetadata {
1560        index,
1561        symbol: symbol.to_owned(),
1562        input_regs,
1563        output_regs,
1564    })
1565}
1566
1567fn parse_extern_metadata<H>(
1568    elf: &Elf<H>,
1569    relocations: &BTreeMap<SectionTarget, RelocationKind>,
1570    target: SectionTarget,
1571) -> Result<ExternMetadata, ProgramFromElfError>
1572where
1573    H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
1574{
1575    parse_extern_metadata_impl(elf, relocations, target)
1576        .map_err(|error| ProgramFromElfError::other(format!("failed to parse extern metadata: {}", error)))
1577}
1578
1579fn check_imports_and_assign_indexes(imports: &mut Vec<Import>, used_imports: &HashSet<usize>) -> Result<(), ProgramFromElfError> {
1580    let mut import_by_symbol: HashMap<Vec<u8>, usize> = HashMap::new();
1581    for (nth_import, import) in imports.iter().enumerate() {
1582        if let Some(&old_nth_import) = import_by_symbol.get(&import.metadata.symbol) {
1583            let old_import = &imports[old_nth_import];
1584            if import.metadata == old_import.metadata {
1585                continue;
1586            }
1587
1588            return Err(ProgramFromElfError::other(format!(
1589                "duplicate imports with the same symbol yet different prototype: {}",
1590                ProgramSymbol::new(&*import.metadata.symbol)
1591            )));
1592        }
1593
1594        import_by_symbol.insert(import.metadata.symbol.clone(), nth_import);
1595    }
1596
1597    if imports.iter().any(|import| import.metadata.index.is_some()) {
1598        let mut import_by_index: HashMap<u32, ExternMetadata> = HashMap::new();
1599        let mut max_index = 0;
1600        for import in &*imports {
1601            if let Some(index) = import.index {
1602                if let Some(old_metadata) = import_by_index.get(&index) {
1603                    if *old_metadata != import.metadata {
1604                        return Err(ProgramFromElfError::other(format!(
1605                            "duplicate imports with the same index yet different prototypes: {}, {}",
1606                            ProgramSymbol::new(&*old_metadata.symbol),
1607                            ProgramSymbol::new(&*import.metadata.symbol)
1608                        )));
1609                    }
1610                } else {
1611                    import_by_index.insert(index, import.metadata.clone());
1612                }
1613
1614                max_index = core::cmp::max(max_index, index);
1615            } else {
1616                return Err(ProgramFromElfError::other(format!(
1617                    "import without a specified index: {}",
1618                    ProgramSymbol::new(&*import.metadata.symbol)
1619                )));
1620            }
1621        }
1622
1623        // If there are any holes in the indexes then insert dummy imports.
1624        for index in 0..max_index {
1625            if !import_by_index.contains_key(&index) {
1626                imports.push(Import {
1627                    metadata: ExternMetadata {
1628                        index: Some(index),
1629                        symbol: Vec::new(),
1630                        input_regs: 0,
1631                        output_regs: 0,
1632                    },
1633                })
1634            }
1635        }
1636    } else {
1637        let mut ordered: Vec<_> = used_imports.iter().copied().collect();
1638        ordered.sort_by(|&a, &b| imports[a].metadata.symbol.cmp(&imports[b].metadata.symbol));
1639
1640        for (assigned_index, &nth_import) in ordered.iter().enumerate() {
1641            imports[nth_import].metadata.index = Some(assigned_index as u32);
1642        }
1643    }
1644
1645    for import in imports {
1646        log::debug!(
1647            "Import: '{}', index = {:?}, input regs = {}, output regs = {}",
1648            String::from_utf8_lossy(&import.metadata.symbol),
1649            import.metadata.index,
1650            import.metadata.input_regs,
1651            import.metadata.output_regs
1652        );
1653    }
1654
1655    Ok(())
1656}
1657
1658fn get_relocation_target<H>(elf: &Elf<H>, relocation: &crate::elf::Relocation) -> Result<Option<SectionTarget>, ProgramFromElfError>
1659where
1660    H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
1661{
1662    match relocation.target() {
1663        object::RelocationTarget::Absolute => {
1664            if let object::RelocationFlags::Elf { r_type } = relocation.flags() {
1665                if r_type == object::elf::R_RISCV_NONE {
1666                    // GNU ld apparently turns R_RISCV_ALIGN and R_RISCV_RELAX into these.
1667                    return Ok(None);
1668                }
1669            }
1670            // Example of such relocation:
1671            //   Offset     Info    Type                Sym. Value  Symbol's Name + Addend
1672            //   00060839  00000001 R_RISCV_32                        0
1673            //
1674            // So far I've only seen these emitted for `.debug_info`.
1675            //
1676            // I'm not entirely sure what's the point of those, as they don't point to any symbol
1677            // and have an addend of zero.
1678            assert_eq!(relocation.addend(), 0);
1679            assert!(!relocation.has_implicit_addend());
1680            Ok(None)
1681        }
1682        object::RelocationTarget::Symbol(target_symbol_index) => {
1683            let target_symbol = elf
1684                .symbol_by_index(target_symbol_index)
1685                .map_err(|error| ProgramFromElfError::other(format!("failed to fetch relocation target: {}", error)))?;
1686
1687            let (section, offset) = target_symbol.section_and_offset()?;
1688            log::trace!(
1689                "Fetched relocation target: target section = \"{}\", target symbol = \"{}\" ({}), symbol offset = 0x{:x} + 0x{:x}",
1690                section.name(),
1691                target_symbol.name().unwrap_or(""),
1692                target_symbol_index.0,
1693                offset,
1694                relocation.addend(),
1695            );
1696
1697            let Some(offset) = offset.checked_add_signed(relocation.addend()) else {
1698                return Err(ProgramFromElfError::other(
1699                    "failed to add addend to the symbol's offset due to overflow",
1700                ));
1701            };
1702
1703            Ok(Some(SectionTarget {
1704                section_index: section.index(),
1705                offset,
1706            }))
1707        }
1708        _ => Err(ProgramFromElfError::other(format!(
1709            "unsupported target for relocation: {:?}",
1710            relocation
1711        ))),
1712    }
1713}
1714
1715enum MinMax {
1716    MaxSigned,
1717    MinSigned,
1718    MaxUnsigned,
1719    MinUnsigned,
1720
1721    MaxSigned64,
1722    MinSigned64,
1723    MaxUnsigned64,
1724    MinUnsigned64,
1725}
1726
1727fn emit_minmax(
1728    kind: MinMax,
1729    dst: Reg,
1730    src1: Option<Reg>,
1731    src2: Option<Reg>,
1732    tmp: Reg,
1733    mut emit: impl FnMut(InstExt<SectionTarget, SectionTarget>),
1734) {
1735    // This is supposed to emit something like this:
1736    //   tmp = src1 ? src2
1737    //   dst = src1
1738    //   dst = src2 if tmp == 0
1739
1740    assert_ne!(dst, tmp);
1741    assert_ne!(Some(tmp), src1);
1742    assert_ne!(Some(tmp), src2);
1743    assert_ne!(Some(dst), src2);
1744
1745    let (cmp_src1, cmp_src2, cmp_kind) = match kind {
1746        MinMax::MinUnsigned => (src1, src2, AnyAnyKind::SetLessThanUnsigned32),
1747        MinMax::MaxUnsigned => (src2, src1, AnyAnyKind::SetLessThanUnsigned32),
1748        MinMax::MinSigned => (src1, src2, AnyAnyKind::SetLessThanSigned32),
1749        MinMax::MaxSigned => (src2, src1, AnyAnyKind::SetLessThanSigned32),
1750        MinMax::MinUnsigned64 => (src1, src2, AnyAnyKind::SetLessThanUnsigned64),
1751        MinMax::MaxUnsigned64 => (src2, src1, AnyAnyKind::SetLessThanUnsigned64),
1752        MinMax::MinSigned64 => (src1, src2, AnyAnyKind::SetLessThanSigned64),
1753        MinMax::MaxSigned64 => (src2, src1, AnyAnyKind::SetLessThanSigned64),
1754    };
1755
1756    emit(InstExt::Basic(BasicInst::AnyAny {
1757        kind: cmp_kind,
1758        dst: tmp,
1759        src1: cmp_src1.map_or(RegImm::Imm(0), RegImm::Reg),
1760        src2: cmp_src2.map_or(RegImm::Imm(0), RegImm::Reg),
1761    }));
1762
1763    if let Some(src1) = src1 {
1764        emit(InstExt::Basic(BasicInst::MoveReg { dst, src: src1 }));
1765    } else {
1766        emit(InstExt::Basic(BasicInst::LoadImmediate { dst: tmp, imm: 0 }));
1767    }
1768
1769    emit(InstExt::Basic(BasicInst::Cmov {
1770        kind: CmovKind::EqZero,
1771        dst,
1772        src: src2.map_or(RegImm::Imm(0), RegImm::Reg),
1773        cond: tmp,
1774    }));
1775}
1776
1777fn resolve_simple_zero_register_usage(
1778    kind: crate::riscv::RegRegKind,
1779    dst: Reg,
1780    src1: RReg,
1781    src2: RReg,
1782    mut emit: impl FnMut(InstExt<SectionTarget, SectionTarget>),
1783) -> bool {
1784    use crate::riscv::RegRegKind as K;
1785    if kind == K::OrInverted && src1 == RReg::Zero && src2 != RReg::Zero {
1786        emit(InstExt::Basic(BasicInst::AnyAny {
1787            kind: AnyAnyKind::Xor32,
1788            dst,
1789            src1: RegImm::Imm(!0),
1790            src2: cast_reg_any(src2).unwrap(),
1791        }));
1792        return true;
1793    }
1794
1795    if kind == K::Xnor && src1 == RReg::Zero && src2 != RReg::Zero {
1796        emit(InstExt::Basic(BasicInst::AnyAny {
1797            kind: AnyAnyKind::Xor32,
1798            dst,
1799            src1: RegImm::Imm(!0),
1800            src2: cast_reg_any(src2).unwrap(),
1801        }));
1802        return true;
1803    }
1804
1805    if kind == K::Xnor && src1 != RReg::Zero && src2 == RReg::Zero {
1806        emit(InstExt::Basic(BasicInst::AnyAny {
1807            kind: AnyAnyKind::Xor32,
1808            dst,
1809            src1: cast_reg_any(src1).unwrap(),
1810            src2: RegImm::Imm(!0),
1811        }));
1812        return true;
1813    }
1814
1815    if (kind == K::Minimum || kind == K::Maximum) && (src1 == RReg::Zero || src2 == RReg::Zero) {
1816        if src1 == RReg::Zero && src2 == RReg::Zero {
1817            emit(InstExt::Basic(BasicInst::LoadImmediate { dst, imm: 0 }));
1818            return true;
1819        }
1820
1821        let tmp = Reg::E2;
1822        let src1 = cast_reg_any(src1).unwrap();
1823        let src2 = cast_reg_any(src2).unwrap();
1824        let (kind, cmp_src1, cmp_src2) = match kind {
1825            K::Minimum => (AnyAnyKind::SetLessThanSigned32, src1, src2),
1826            K::Maximum => (AnyAnyKind::SetLessThanSigned32, src2, src1),
1827            _ => unreachable!(),
1828        };
1829
1830        emit(InstExt::Basic(BasicInst::AnyAny {
1831            kind,
1832            dst: tmp,
1833            src1: cmp_src1,
1834            src2: cmp_src2,
1835        }));
1836
1837        match src1 {
1838            RegImm::Reg(src) => emit(InstExt::Basic(BasicInst::MoveReg { dst, src })),
1839            RegImm::Imm(imm) => emit(InstExt::Basic(BasicInst::LoadImmediate { dst, imm })),
1840        }
1841
1842        emit(InstExt::Basic(BasicInst::Cmov {
1843            kind: CmovKind::EqZero,
1844            dst,
1845            src: src2,
1846            cond: tmp,
1847        }));
1848
1849        return true;
1850    }
1851
1852    if matches!(kind, K::RotateLeft32AndSignExtend | K::RotateRight32AndSignExtend) && src1 != RReg::Zero && src2 == RReg::Zero {
1853        emit(InstExt::Basic(BasicInst::AnyAny {
1854            kind: AnyAnyKind::Add32AndSignExtend,
1855            dst,
1856            src1: cast_reg_any(src1).unwrap(),
1857            src2: RegImm::Imm(0),
1858        }));
1859        return true;
1860    }
1861
1862    false
1863}
1864
1865fn emit_or_combine_byte(
1866    location: SectionTarget,
1867    dst: Reg,
1868    src: Reg,
1869    rv64: bool,
1870    mut emit: impl FnMut(InstExt<SectionTarget, SectionTarget>),
1871) {
1872    let op_reg = dst;
1873    let cmp_reg = Reg::E1;
1874    let tmp_reg = Reg::E2;
1875    let mask_reg = if dst != src { src } else { Reg::E3 };
1876    let range = if rv64 { 0..64 } else { 0..32 };
1877
1878    log::warn!("Emulating orc.b at {:?} with an instruction sequence", location);
1879
1880    if dst != src {
1881        emit(InstExt::Basic(BasicInst::MoveReg { dst, src }));
1882    }
1883
1884    // Loop:
1885    // mov tmp, op
1886    // shl mask, 8
1887    // or tmp, mask
1888    // test op, mask
1889    // cmov.neq op, tmp
1890
1891    for iter in range.step_by(8) {
1892        emit(InstExt::Basic(BasicInst::MoveReg { dst: tmp_reg, src: op_reg }));
1893
1894        if iter == 0 {
1895            emit(InstExt::Basic(BasicInst::LoadImmediate { dst: mask_reg, imm: 0xff }));
1896        } else {
1897            emit(InstExt::Basic(BasicInst::AnyAny {
1898                kind: if rv64 {
1899                    AnyAnyKind::ShiftLogicalLeft64
1900                } else {
1901                    AnyAnyKind::ShiftLogicalLeft32
1902                },
1903                dst: mask_reg,
1904                src1: RegImm::Reg(mask_reg),
1905                src2: RegImm::Imm(8),
1906            }));
1907        }
1908
1909        emit(InstExt::Basic(BasicInst::AnyAny {
1910            kind: if rv64 { AnyAnyKind::Or64 } else { AnyAnyKind::Or32 },
1911            dst: tmp_reg,
1912            src1: RegImm::Reg(tmp_reg),
1913            src2: RegImm::Reg(mask_reg),
1914        }));
1915
1916        emit(InstExt::Basic(BasicInst::AnyAny {
1917            kind: if rv64 { AnyAnyKind::And64 } else { AnyAnyKind::And32 },
1918            dst: cmp_reg,
1919            src1: RegImm::Reg(op_reg),
1920            src2: RegImm::Reg(mask_reg),
1921        }));
1922
1923        emit(InstExt::Basic(BasicInst::Cmov {
1924            kind: CmovKind::NotEqZero,
1925            dst: op_reg,
1926            src: RegImm::Reg(tmp_reg),
1927            cond: cmp_reg,
1928        }));
1929    }
1930}
1931
1932fn convert_instruction<H>(
1933    elf: &Elf<H>,
1934    section: &Section,
1935    current_location: SectionTarget,
1936    instruction: Inst,
1937    instruction_size: u64,
1938    rv64: bool,
1939    mut emit: impl FnMut(InstExt<SectionTarget, SectionTarget>),
1940) -> Result<(), ProgramFromElfError>
1941where
1942    H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
1943{
1944    match instruction {
1945        Inst::LoadUpperImmediate { dst, value } => {
1946            let Some(dst) = cast_reg_non_zero(dst)? else {
1947                emit(InstExt::nop());
1948                return Ok(());
1949            };
1950
1951            emit(InstExt::Basic(BasicInst::LoadImmediate { dst, imm: value as i32 }));
1952            Ok(())
1953        }
1954        Inst::JumpAndLink { dst, target } => {
1955            let target = SectionTarget {
1956                section_index: section.index(),
1957                offset: current_location.offset.wrapping_add_signed(i64::from(target as i32)),
1958            };
1959
1960            if target.offset > section.size() {
1961                return Err(ProgramFromElfError::other("out of range JAL instruction"));
1962            }
1963
1964            let next = if let Some(dst) = cast_reg_non_zero(dst)? {
1965                let target_return = current_location.add(instruction_size);
1966                ControlInst::Call {
1967                    ra: dst,
1968                    target,
1969                    target_return,
1970                }
1971            } else {
1972                ControlInst::Jump { target }
1973            };
1974
1975            emit(InstExt::Control(next));
1976            Ok(())
1977        }
1978        Inst::Branch { kind, src1, src2, target } => {
1979            let src1 = cast_reg_any(src1)?;
1980            let src2 = cast_reg_any(src2)?;
1981
1982            let target_true = SectionTarget {
1983                section_index: section.index(),
1984                offset: current_location.offset.wrapping_add_signed(i64::from(target as i32)),
1985            };
1986
1987            if target_true.offset > section.size() {
1988                return Err(ProgramFromElfError::other("out of range unrelocated branch"));
1989            }
1990
1991            let target_false = current_location.add(instruction_size);
1992            emit(InstExt::Control(ControlInst::Branch {
1993                kind,
1994                src1,
1995                src2,
1996                target_true,
1997                target_false,
1998            }));
1999            Ok(())
2000        }
2001        Inst::JumpAndLinkRegister { dst, base, value } => {
2002            let Some(base) = cast_reg_non_zero(base)? else {
2003                return Err(ProgramFromElfError::other("found an unrelocated JALR instruction"));
2004            };
2005
2006            let next = if let Some(dst) = cast_reg_non_zero(dst)? {
2007                let target_return = current_location.add(instruction_size);
2008                ControlInst::CallIndirect {
2009                    ra: dst,
2010                    base,
2011                    offset: value.into(),
2012                    target_return,
2013                }
2014            } else {
2015                ControlInst::JumpIndirect {
2016                    base,
2017                    offset: value.into(),
2018                }
2019            };
2020
2021            emit(InstExt::Control(next));
2022            Ok(())
2023        }
2024        Inst::Unimplemented => {
2025            emit(InstExt::Control(ControlInst::Unimplemented));
2026            Ok(())
2027        }
2028        Inst::FenceI | Inst::Fence { .. } => {
2029            emit(InstExt::Basic(BasicInst::Nop));
2030            Ok(())
2031        }
2032        Inst::Load { kind, dst, base, offset } => {
2033            if dst == RReg::Zero && base == RReg::Zero && offset == 0 {
2034                // These are sometimes used as a poor man's trap.
2035                emit(InstExt::Control(ControlInst::Unimplemented));
2036                return Ok(());
2037            }
2038
2039            let Some(base) = cast_reg_non_zero(base)? else {
2040                return Err(ProgramFromElfError::other(format!(
2041                    "found an unrelocated absolute load at {}",
2042                    current_location.fmt_human_readable(elf)
2043                )));
2044            };
2045
2046            // LLVM riscv-enable-dead-defs pass may rewrite dst to the zero register.
2047            match cast_reg_non_zero(dst)? {
2048                Some(dst) => emit(InstExt::Basic(BasicInst::LoadIndirect { kind, dst, base, offset })),
2049                None => emit(InstExt::Basic(BasicInst::Nop)),
2050            }
2051
2052            Ok(())
2053        }
2054        Inst::Store { kind, src, base, offset } => {
2055            if src == RReg::Zero && base == RReg::Zero && offset == 0 {
2056                emit(InstExt::Control(ControlInst::Unimplemented));
2057                return Ok(());
2058            }
2059
2060            let Some(base) = cast_reg_non_zero(base)? else {
2061                return Err(ProgramFromElfError::other(format!(
2062                    "found an unrelocated absolute store at {}",
2063                    current_location.fmt_human_readable(elf)
2064                )));
2065            };
2066
2067            let src = cast_reg_any(src)?;
2068            emit(InstExt::Basic(BasicInst::StoreIndirect { kind, src, base, offset }));
2069            Ok(())
2070        }
2071        Inst::RegImm { kind, dst, src, imm } => {
2072            let Some(dst) = cast_reg_non_zero(dst)? else {
2073                emit(InstExt::nop());
2074                return Ok(());
2075            };
2076
2077            let src = cast_reg_any(src)?;
2078            let kind = match kind {
2079                RegImmKind::Add32 => AnyAnyKind::Add32,
2080                RegImmKind::Add32AndSignExtend => AnyAnyKind::Add32AndSignExtend,
2081                RegImmKind::Add64 => AnyAnyKind::Add64,
2082                RegImmKind::And32 => AnyAnyKind::And32,
2083                RegImmKind::And64 => AnyAnyKind::And64,
2084                RegImmKind::Or32 => AnyAnyKind::Or32,
2085                RegImmKind::Or64 => AnyAnyKind::Or64,
2086                RegImmKind::Xor32 => AnyAnyKind::Xor32,
2087                RegImmKind::Xor64 => AnyAnyKind::Xor64,
2088                RegImmKind::SetLessThanUnsigned32 => AnyAnyKind::SetLessThanUnsigned32,
2089                RegImmKind::SetLessThanUnsigned64 => AnyAnyKind::SetLessThanUnsigned64,
2090                RegImmKind::SetLessThanSigned32 => AnyAnyKind::SetLessThanSigned32,
2091                RegImmKind::SetLessThanSigned64 => AnyAnyKind::SetLessThanSigned64,
2092                RegImmKind::ShiftLogicalLeft32 => AnyAnyKind::ShiftLogicalLeft32,
2093                RegImmKind::ShiftLogicalLeft32AndSignExtend => AnyAnyKind::ShiftLogicalLeft32AndSignExtend,
2094                RegImmKind::ShiftLogicalLeft64 => AnyAnyKind::ShiftLogicalLeft64,
2095                RegImmKind::ShiftLogicalRight32 => AnyAnyKind::ShiftLogicalRight32,
2096                RegImmKind::ShiftLogicalRight32AndSignExtend => AnyAnyKind::ShiftLogicalRight32AndSignExtend,
2097                RegImmKind::ShiftLogicalRight64 => AnyAnyKind::ShiftLogicalRight64,
2098                RegImmKind::ShiftArithmeticRight32 => AnyAnyKind::ShiftArithmeticRight32,
2099                RegImmKind::ShiftArithmeticRight32AndSignExtend => AnyAnyKind::ShiftArithmeticRight32AndSignExtend,
2100                RegImmKind::ShiftArithmeticRight64 => AnyAnyKind::ShiftArithmeticRight64,
2101                RegImmKind::RotateRight32 => AnyAnyKind::RotateRight32,
2102                RegImmKind::RotateRight32AndSignExtend => AnyAnyKind::RotateRight32AndSignExtend,
2103                RegImmKind::RotateRight64 => AnyAnyKind::RotateRight64,
2104            };
2105
2106            match src {
2107                RegImm::Imm(0) => {
2108                    // The optimizer can take care of this later, but doing it early here is more efficient.
2109                    emit(InstExt::Basic(BasicInst::LoadImmediate {
2110                        dst,
2111                        imm: OperationKind::from(kind)
2112                            .apply_const(0, cast(imm).to_i64_sign_extend())
2113                            .try_into()
2114                            .expect("load immediate overflow"),
2115                    }));
2116                }
2117                RegImm::Reg(src) if imm == 0 && matches!(kind, AnyAnyKind::Add32 | AnyAnyKind::Add64) => {
2118                    emit(InstExt::Basic(BasicInst::MoveReg { dst, src }));
2119                }
2120                _ => {
2121                    emit(InstExt::Basic(BasicInst::AnyAny {
2122                        kind,
2123                        dst,
2124                        src1: src,
2125                        src2: imm.into(),
2126                    }));
2127                }
2128            }
2129
2130            Ok(())
2131        }
2132        Inst::Reg { kind, dst, src } => {
2133            let Some(dst) = cast_reg_non_zero(dst)? else {
2134                emit(InstExt::nop());
2135                return Ok(());
2136            };
2137
2138            use crate::riscv::RegKind as K;
2139
2140            let Some(src) = cast_reg_non_zero(src)? else {
2141                let imm = match kind {
2142                    K::CountLeadingZeroBits32 | K::CountTrailingZeroBits32 => 32,
2143                    K::CountLeadingZeroBits64 | K::CountTrailingZeroBits64 => 64,
2144                    K::CountSetBits32 | K::CountSetBits64 => 0,
2145                    K::ReverseByte => 0,
2146                    K::OrCombineByte => 0,
2147                    K::SignExtend8 | K::SignExtend16 | K::ZeroExtend16 => 0,
2148                };
2149
2150                emit(InstExt::Basic(BasicInst::LoadImmediate { dst, imm }));
2151                return Ok(());
2152            };
2153
2154            let kind = match kind {
2155                K::CountLeadingZeroBits32 => RegKind::CountLeadingZeroBits32,
2156                K::CountLeadingZeroBits64 => RegKind::CountLeadingZeroBits64,
2157                K::CountSetBits32 => RegKind::CountSetBits32,
2158                K::CountSetBits64 => RegKind::CountSetBits64,
2159                K::CountTrailingZeroBits32 => RegKind::CountTrailingZeroBits32,
2160                K::CountTrailingZeroBits64 => RegKind::CountTrailingZeroBits64,
2161                K::ReverseByte => RegKind::ReverseByte,
2162                K::SignExtend8 => RegKind::SignExtend8,
2163                K::SignExtend16 => RegKind::SignExtend16,
2164                K::ZeroExtend16 => RegKind::ZeroExtend16,
2165                K::OrCombineByte => {
2166                    emit_or_combine_byte(current_location, dst, src, rv64, &mut emit);
2167                    return Ok(());
2168                }
2169            };
2170
2171            emit(InstExt::Basic(BasicInst::Reg { kind, dst, src }));
2172
2173            Ok(())
2174        }
2175        Inst::RegReg { kind, dst, src1, src2 } => {
2176            let Some(dst) = cast_reg_non_zero(dst)? else {
2177                emit(InstExt::nop());
2178                return Ok(());
2179            };
2180
2181            macro_rules! anyany {
2182                ($kind:ident) => {
2183                    BasicInst::AnyAny {
2184                        kind: AnyAnyKind::$kind,
2185                        dst,
2186                        src1: cast_reg_any(src1)?,
2187                        src2: cast_reg_any(src2)?,
2188                    }
2189                };
2190            }
2191
2192            macro_rules! regreg {
2193                ($kind:ident) => {
2194                    match (cast_reg_non_zero(src1)?, cast_reg_non_zero(src2)?) {
2195                        (Some(src1), Some(src2)) => BasicInst::RegReg {
2196                            kind: RegRegKind::$kind,
2197                            dst,
2198                            src1,
2199                            src2,
2200                        },
2201                        (lhs, rhs) => {
2202                            let lhs = lhs
2203                                .map(|reg| RegValue::InputReg {
2204                                    reg,
2205                                    source_block: BlockTarget::from_raw(0),
2206                                    bits_used: u64::MAX,
2207                                })
2208                                .unwrap_or(RegValue::Constant(0));
2209
2210                            let rhs = rhs
2211                                .map(|reg| RegValue::InputReg {
2212                                    reg,
2213                                    source_block: BlockTarget::from_raw(0),
2214                                    bits_used: u64::MAX,
2215                                })
2216                                .unwrap_or(RegValue::Constant(0));
2217
2218                            match OperationKind::from(RegRegKind::$kind).apply(elf, lhs, rhs) {
2219                                Some(RegValue::Constant(imm)) => {
2220                                    let imm: i32 = imm.try_into().expect("immediate operand overflow");
2221                                    BasicInst::LoadImmediate { dst, imm }
2222                                }
2223                                Some(RegValue::InputReg { reg, .. }) => BasicInst::MoveReg { dst, src: reg },
2224                                _ => {
2225                                    return Err(ProgramFromElfError::other(format!(
2226                                        "found a {:?} instruction using a zero register",
2227                                        kind
2228                                    )))
2229                                }
2230                            }
2231                        }
2232                    }
2233                };
2234            }
2235
2236            if resolve_simple_zero_register_usage(kind, dst, src1, src2, &mut emit) {
2237                emit(InstExt::nop());
2238                return Ok(());
2239            };
2240
2241            use crate::riscv::RegRegKind as K;
2242            let instruction = match kind {
2243                K::Add32 => anyany!(Add32),
2244                K::Add32AndSignExtend => anyany!(Add32AndSignExtend),
2245                K::Add64 => anyany!(Add64),
2246                K::Sub32 => anyany!(Sub32),
2247                K::Sub32AndSignExtend => anyany!(Sub32AndSignExtend),
2248                K::Sub64 => anyany!(Sub64),
2249                K::And32 => anyany!(And32),
2250                K::And64 => anyany!(And64),
2251                K::Or32 => anyany!(Or32),
2252                K::Or64 => anyany!(Or64),
2253                K::Xor32 => anyany!(Xor32),
2254                K::Xor64 => anyany!(Xor64),
2255                K::SetLessThanUnsigned32 => anyany!(SetLessThanUnsigned32),
2256                K::SetLessThanUnsigned64 => anyany!(SetLessThanUnsigned64),
2257                K::SetLessThanSigned32 => anyany!(SetLessThanSigned32),
2258                K::SetLessThanSigned64 => anyany!(SetLessThanSigned64),
2259                K::ShiftLogicalLeft32 => anyany!(ShiftLogicalLeft32),
2260                K::ShiftLogicalLeft32AndSignExtend => anyany!(ShiftLogicalLeft32AndSignExtend),
2261                K::ShiftLogicalLeft64 => anyany!(ShiftLogicalLeft64),
2262                K::ShiftLogicalRight32 => anyany!(ShiftLogicalRight32),
2263                K::ShiftLogicalRight32AndSignExtend => anyany!(ShiftLogicalRight32AndSignExtend),
2264                K::ShiftLogicalRight64 => anyany!(ShiftLogicalRight64),
2265                K::ShiftArithmeticRight32 => anyany!(ShiftArithmeticRight32),
2266                K::ShiftArithmeticRight32AndSignExtend => anyany!(ShiftArithmeticRight32AndSignExtend),
2267                K::ShiftArithmeticRight64 => anyany!(ShiftArithmeticRight64),
2268                K::Mul32 => anyany!(Mul32),
2269                K::Mul32AndSignExtend => anyany!(Mul32AndSignExtend),
2270                K::Mul64 => anyany!(Mul64),
2271                K::MulUpperSignedSigned32 => regreg!(MulUpperSignedSigned32),
2272                K::MulUpperSignedSigned64 => regreg!(MulUpperSignedSigned64),
2273                K::MulUpperUnsignedUnsigned32 => regreg!(MulUpperUnsignedUnsigned32),
2274                K::MulUpperUnsignedUnsigned64 => regreg!(MulUpperUnsignedUnsigned64),
2275                K::MulUpperSignedUnsigned32 => regreg!(MulUpperSignedUnsigned32),
2276                K::MulUpperSignedUnsigned64 => regreg!(MulUpperSignedUnsigned64),
2277                K::Div32 => regreg!(Div32),
2278                K::Div32AndSignExtend => regreg!(Div32AndSignExtend),
2279                K::Div64 => regreg!(Div64),
2280                K::DivUnsigned32 => regreg!(DivUnsigned32),
2281                K::DivUnsigned32AndSignExtend => regreg!(DivUnsigned32AndSignExtend),
2282                K::DivUnsigned64 => regreg!(DivUnsigned64),
2283                K::Rem32 => regreg!(Rem32),
2284                K::Rem32AndSignExtend => regreg!(Rem32AndSignExtend),
2285                K::Rem64 => regreg!(Rem64),
2286                K::RemUnsigned32 => regreg!(RemUnsigned32),
2287                K::RemUnsigned32AndSignExtend => regreg!(RemUnsigned32AndSignExtend),
2288                K::RemUnsigned64 => regreg!(RemUnsigned64),
2289
2290                K::AndInverted => regreg!(AndInverted),
2291                K::OrInverted => regreg!(OrInverted),
2292                K::Xnor => regreg!(Xnor),
2293                K::Maximum => regreg!(Maximum),
2294                K::MaximumUnsigned => regreg!(MaximumUnsigned),
2295                K::Minimum => regreg!(Minimum),
2296                K::MinimumUnsigned => regreg!(MinimumUnsigned),
2297                K::RotateLeft32 => regreg!(RotateLeft32),
2298                K::RotateLeft32AndSignExtend => regreg!(RotateLeft32AndSignExtend),
2299                K::RotateLeft64 => regreg!(RotateLeft64),
2300                K::RotateRight32 => anyany!(RotateRight32),
2301                K::RotateRight32AndSignExtend => anyany!(RotateRight32AndSignExtend),
2302                K::RotateRight64 => anyany!(RotateRight64),
2303            };
2304
2305            emit(InstExt::Basic(instruction));
2306            Ok(())
2307        }
2308        Inst::AddUpperImmediateToPc { .. } => Err(ProgramFromElfError::other(format!(
2309            "found an unrelocated auipc instruction at offset {} in section '{}'; is the program compiled with relocations?",
2310            current_location.offset,
2311            section.name()
2312        ))),
2313        Inst::Ecall => Err(ProgramFromElfError::other(
2314            "found a bare ecall instruction; those are not supported",
2315        )),
2316        Inst::Cmov { kind, dst, src, cond, .. } => {
2317            let Some(dst) = cast_reg_non_zero(dst)? else {
2318                emit(InstExt::Basic(BasicInst::Nop));
2319                return Ok(());
2320            };
2321
2322            match cast_reg_non_zero(cond)? {
2323                Some(cond) => {
2324                    emit(InstExt::Basic(BasicInst::Cmov {
2325                        kind,
2326                        dst,
2327                        src: cast_reg_any(src)?,
2328                        cond,
2329                    }));
2330                }
2331                None => match kind {
2332                    CmovKind::EqZero => {
2333                        if let Some(src) = cast_reg_non_zero(src)? {
2334                            emit(InstExt::Basic(BasicInst::MoveReg { dst, src }));
2335                        } else {
2336                            emit(InstExt::Basic(BasicInst::LoadImmediate { dst, imm: 0 }));
2337                        }
2338                    }
2339                    CmovKind::NotEqZero => {
2340                        emit(InstExt::nop());
2341                    }
2342                },
2343            };
2344
2345            Ok(())
2346        }
2347        Inst::LoadReserved32 { dst, src, .. } => {
2348            let Some(dst) = cast_reg_non_zero(dst)? else {
2349                return Err(ProgramFromElfError::other(
2350                    "found an atomic load with a zero register as the destination",
2351                ));
2352            };
2353
2354            let Some(src) = cast_reg_non_zero(src)? else {
2355                return Err(ProgramFromElfError::other(
2356                    "found an atomic load with a zero register as the source",
2357                ));
2358            };
2359
2360            emit(InstExt::Basic(BasicInst::LoadIndirect {
2361                kind: LoadKind::I32,
2362                dst,
2363                base: src,
2364                offset: 0,
2365            }));
2366
2367            Ok(())
2368        }
2369        Inst::LoadReserved64 { dst, src, .. } if rv64 => {
2370            let Some(dst) = cast_reg_non_zero(dst)? else {
2371                return Err(ProgramFromElfError::other(
2372                    "found an atomic load with a zero register as the destination",
2373                ));
2374            };
2375
2376            let Some(src) = cast_reg_non_zero(src)? else {
2377                return Err(ProgramFromElfError::other(
2378                    "found an atomic load with a zero register as the source",
2379                ));
2380            };
2381
2382            emit(InstExt::Basic(BasicInst::LoadIndirect {
2383                kind: LoadKind::U64,
2384                dst,
2385                base: src,
2386                offset: 0,
2387            }));
2388
2389            Ok(())
2390        }
2391        Inst::StoreConditional32 { src, addr, dst, .. } => {
2392            let Some(addr) = cast_reg_non_zero(addr)? else {
2393                return Err(ProgramFromElfError::other(
2394                    "found an atomic store with a zero register as the address",
2395                ));
2396            };
2397
2398            let src = cast_reg_any(src)?;
2399            emit(InstExt::Basic(BasicInst::StoreIndirect {
2400                kind: StoreKind::U32,
2401                src,
2402                base: addr,
2403                offset: 0,
2404            }));
2405
2406            if let Some(dst) = cast_reg_non_zero(dst)? {
2407                // The store always succeeds, so write zero here.
2408                emit(InstExt::Basic(BasicInst::LoadImmediate { dst, imm: 0 }));
2409            }
2410
2411            Ok(())
2412        }
2413        Inst::StoreConditional64 { src, addr, dst, .. } if rv64 => {
2414            let Some(addr) = cast_reg_non_zero(addr)? else {
2415                return Err(ProgramFromElfError::other(
2416                    "found an atomic store with a zero register as the address",
2417                ));
2418            };
2419
2420            let src = cast_reg_any(src)?;
2421            emit(InstExt::Basic(BasicInst::StoreIndirect {
2422                kind: StoreKind::U64,
2423                src,
2424                base: addr,
2425                offset: 0,
2426            }));
2427
2428            if let Some(dst) = cast_reg_non_zero(dst)? {
2429                // The store always succeeds, so write zero here.
2430                emit(InstExt::Basic(BasicInst::LoadImmediate { dst, imm: 0 }));
2431            }
2432
2433            Ok(())
2434        }
2435        Inst::LoadReserved64 { .. } | Inst::StoreConditional64 { .. } => {
2436            unreachable!("64-bit instruction in a 32-bit program: {instruction:?}");
2437        }
2438        Inst::Atomic {
2439            kind,
2440            dst: old_value,
2441            addr,
2442            src: operand,
2443            ..
2444        } => {
2445            let Some(addr) = cast_reg_non_zero(addr)? else {
2446                return Err(ProgramFromElfError::other(
2447                    "found an atomic operation with a zero register as the address",
2448                ));
2449            };
2450
2451            let is_64_bit = match kind {
2452                AtomicKind::Swap32
2453                | AtomicKind::Add32
2454                | AtomicKind::And32
2455                | AtomicKind::Or32
2456                | AtomicKind::Xor32
2457                | AtomicKind::MaxSigned32
2458                | AtomicKind::MinSigned32
2459                | AtomicKind::MaxUnsigned32
2460                | AtomicKind::MinUnsigned32 => false,
2461
2462                AtomicKind::Swap64
2463                | AtomicKind::Add64
2464                | AtomicKind::MaxSigned64
2465                | AtomicKind::MinSigned64
2466                | AtomicKind::MaxUnsigned64
2467                | AtomicKind::MinUnsigned64
2468                | AtomicKind::And64
2469                | AtomicKind::Or64
2470                | AtomicKind::Xor64 => true,
2471            };
2472
2473            let mut operand = cast_reg_non_zero(operand)?;
2474            if rv64 && !is_64_bit {
2475                // Zero-extend the operand to ignore any bits that might be there.
2476                if let Some(src) = operand {
2477                    emit(InstExt::Basic(BasicInst::AnyAny {
2478                        kind: AnyAnyKind::ShiftLogicalLeft64,
2479                        dst: Reg::E3,
2480                        src1: RegImm::Reg(src),
2481                        src2: RegImm::Imm(32),
2482                    }));
2483                    emit(InstExt::Basic(BasicInst::AnyAny {
2484                        kind: AnyAnyKind::ShiftArithmeticRight64,
2485                        dst: Reg::E3,
2486                        src1: RegImm::Reg(Reg::E3),
2487                        src2: RegImm::Imm(32),
2488                    }));
2489                }
2490                operand = Some(Reg::E3);
2491            }
2492            let operand_regimm = operand.map_or(RegImm::Imm(0), RegImm::Reg);
2493            let (old_value, new_value, output) = match cast_reg_non_zero(old_value)? {
2494                None => (Reg::E0, Reg::E0, None),
2495                Some(old_value) if old_value == addr || Some(old_value) == operand => (Reg::E0, Reg::E1, Some(old_value)),
2496                Some(old_value) => (old_value, Reg::E0, None),
2497            };
2498
2499            emit(InstExt::Basic(BasicInst::LoadIndirect {
2500                kind: if is_64_bit { LoadKind::U64 } else { LoadKind::I32 },
2501                dst: old_value,
2502                base: addr,
2503                offset: 0,
2504            }));
2505
2506            match kind {
2507                AtomicKind::Swap64 => {
2508                    emit(InstExt::Basic(BasicInst::AnyAny {
2509                        kind: AnyAnyKind::Add64,
2510                        dst: new_value,
2511                        src1: operand_regimm,
2512                        src2: RegImm::Imm(0),
2513                    }));
2514                }
2515                AtomicKind::Swap32 => {
2516                    emit(InstExt::Basic(BasicInst::AnyAny {
2517                        kind: AnyAnyKind::Add32,
2518                        dst: new_value,
2519                        src1: operand_regimm,
2520                        src2: RegImm::Imm(0),
2521                    }));
2522                }
2523                AtomicKind::Add64 => {
2524                    emit(InstExt::Basic(BasicInst::AnyAny {
2525                        kind: AnyAnyKind::Add64,
2526                        dst: new_value,
2527                        src1: old_value.into(),
2528                        src2: operand_regimm,
2529                    }));
2530                }
2531                AtomicKind::Add32 => {
2532                    emit(InstExt::Basic(BasicInst::AnyAny {
2533                        kind: AnyAnyKind::Add32,
2534                        dst: new_value,
2535                        src1: old_value.into(),
2536                        src2: operand_regimm,
2537                    }));
2538                }
2539                AtomicKind::And64 => {
2540                    emit(InstExt::Basic(BasicInst::AnyAny {
2541                        kind: AnyAnyKind::And64,
2542                        dst: new_value,
2543                        src1: old_value.into(),
2544                        src2: operand_regimm,
2545                    }));
2546                }
2547                AtomicKind::And32 => {
2548                    emit(InstExt::Basic(BasicInst::AnyAny {
2549                        kind: AnyAnyKind::And32,
2550                        dst: new_value,
2551                        src1: old_value.into(),
2552                        src2: operand_regimm,
2553                    }));
2554                }
2555                AtomicKind::Or64 => {
2556                    emit(InstExt::Basic(BasicInst::AnyAny {
2557                        kind: AnyAnyKind::Or64,
2558                        dst: new_value,
2559                        src1: old_value.into(),
2560                        src2: operand_regimm,
2561                    }));
2562                }
2563                AtomicKind::Or32 => {
2564                    emit(InstExt::Basic(BasicInst::AnyAny {
2565                        kind: AnyAnyKind::Or32,
2566                        dst: new_value,
2567                        src1: old_value.into(),
2568                        src2: operand_regimm,
2569                    }));
2570                }
2571                AtomicKind::Xor64 => {
2572                    emit(InstExt::Basic(BasicInst::AnyAny {
2573                        kind: AnyAnyKind::Xor64,
2574                        dst: new_value,
2575                        src1: old_value.into(),
2576                        src2: operand_regimm,
2577                    }));
2578                }
2579                AtomicKind::Xor32 => {
2580                    emit(InstExt::Basic(BasicInst::AnyAny {
2581                        kind: AnyAnyKind::Xor32,
2582                        dst: new_value,
2583                        src1: old_value.into(),
2584                        src2: operand_regimm,
2585                    }));
2586                }
2587                AtomicKind::MaxSigned32 => {
2588                    emit_minmax(MinMax::MaxSigned, new_value, Some(old_value), operand, Reg::E2, &mut emit);
2589                }
2590                AtomicKind::MinSigned32 => {
2591                    emit_minmax(MinMax::MinSigned, new_value, Some(old_value), operand, Reg::E2, &mut emit);
2592                }
2593                AtomicKind::MaxUnsigned32 => {
2594                    emit_minmax(MinMax::MaxUnsigned, new_value, Some(old_value), operand, Reg::E2, &mut emit);
2595                }
2596                AtomicKind::MinUnsigned32 => {
2597                    emit_minmax(MinMax::MinUnsigned, new_value, Some(old_value), operand, Reg::E2, &mut emit);
2598                }
2599                AtomicKind::MaxSigned64 => {
2600                    emit_minmax(MinMax::MaxSigned64, new_value, Some(old_value), operand, Reg::E2, &mut emit);
2601                }
2602                AtomicKind::MinSigned64 => {
2603                    emit_minmax(MinMax::MinSigned64, new_value, Some(old_value), operand, Reg::E2, &mut emit);
2604                }
2605                AtomicKind::MaxUnsigned64 => {
2606                    emit_minmax(MinMax::MaxUnsigned64, new_value, Some(old_value), operand, Reg::E2, &mut emit);
2607                }
2608                AtomicKind::MinUnsigned64 => {
2609                    emit_minmax(MinMax::MinUnsigned64, new_value, Some(old_value), operand, Reg::E2, &mut emit);
2610                }
2611            }
2612
2613            emit(InstExt::Basic(BasicInst::StoreIndirect {
2614                kind: if is_64_bit { StoreKind::U64 } else { StoreKind::U32 },
2615                src: new_value.into(),
2616                base: addr,
2617                offset: 0,
2618            }));
2619
2620            if let Some(output) = output {
2621                emit(InstExt::Basic(BasicInst::MoveReg {
2622                    dst: output,
2623                    src: old_value,
2624                }));
2625            }
2626
2627            Ok(())
2628        }
2629    }
2630}
2631
2632/// Read `n` bytes in `text` at `relative_offset` where `n` is
2633/// the length of the instruction at `relative_offset`.
2634///
2635/// # Panics
2636/// - Valid RISC-V instructions can be 2 or 4 bytes. Misaligned
2637///   `relative_offset` are considered an internal error.
2638/// - `relative_offset` is expected to be inbounds.
2639///
2640/// # Returns
2641/// The instruction length and the raw instruction.
2642fn read_instruction_bytes(text: &[u8], relative_offset: usize) -> (u64, u32) {
2643    assert!(
2644        relative_offset % VM_CODE_ADDRESS_ALIGNMENT as usize == 0,
2645        "internal error: misaligned instruction read: 0x{relative_offset:08x}"
2646    );
2647
2648    if Inst::is_compressed(text[relative_offset]) {
2649        (2, u32::from(u16::from_le_bytes([text[relative_offset], text[relative_offset + 1]])))
2650    } else {
2651        (
2652            4,
2653            u32::from_le_bytes([
2654                text[relative_offset],
2655                text[relative_offset + 1],
2656                text[relative_offset + 2],
2657                text[relative_offset + 3],
2658            ]),
2659        )
2660    }
2661}
2662
2663const FUNC3_ECALLI: u32 = 0b000;
2664const FUNC3_SBRK: u32 = 0b001;
2665const FUNC3_MEMSET: u32 = 0b010;
2666const FUNC3_HEAP_BASE: u32 = 0b011;
2667
2668#[allow(clippy::too_many_arguments)]
2669fn parse_code_section<H>(
2670    elf: &Elf<H>,
2671    section: &Section,
2672    decoder_config: &DecoderConfig,
2673    relocations: &BTreeMap<SectionTarget, RelocationKind>,
2674    imports: &mut Vec<Import>,
2675    metadata_to_nth_import: &mut HashMap<ExternMetadata, usize>,
2676    instruction_overrides: &mut HashMap<SectionTarget, InstExt<SectionTarget, SectionTarget>>,
2677    output: &mut Vec<(Source, InstExt<SectionTarget, SectionTarget>)>,
2678) -> Result<(), ProgramFromElfError>
2679where
2680    H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
2681{
2682    let section_index = section.index();
2683    let section_name = section.name();
2684    let text = &section.data();
2685
2686    if text.len() % VM_CODE_ADDRESS_ALIGNMENT as usize != 0 {
2687        return Err(ProgramFromElfError::other(format!(
2688            "size of section '{section_name}' is not divisible by 2"
2689        )));
2690    }
2691
2692    output.reserve(text.len() / 4);
2693    let mut relative_offset = 0;
2694    while relative_offset < text.len() {
2695        let current_location = SectionTarget {
2696            section_index: section.index(),
2697            offset: relative_offset.try_into().expect("overflow"),
2698        };
2699
2700        let (inst_size, raw_inst) = read_instruction_bytes(text, relative_offset);
2701
2702        if crate::riscv::R(raw_inst).unpack() == (crate::riscv::OPCODE_CUSTOM_0, FUNC3_ECALLI, 0, RReg::Zero, RReg::Zero, RReg::Zero) {
2703            let initial_offset = relative_offset as u64;
2704            let pointer_size = if elf.is_64() { 8 } else { 4 };
2705
2706            // so (on 32-bit): 4 (ecalli) + 4 (pointer) = 8
2707            if relative_offset + pointer_size + 4 > text.len() {
2708                return Err(ProgramFromElfError::other("truncated ecalli instruction"));
2709            }
2710
2711            let target_location = current_location.add(4);
2712            relative_offset += 4 + pointer_size;
2713
2714            let Some(relocation) = relocations.get(&target_location) else {
2715                return Err(ProgramFromElfError::other(format!(
2716                    "found an external call without a relocation for a pointer to metadata at {target_location}"
2717                )));
2718            };
2719
2720            let metadata_location = match relocation {
2721                RelocationKind::Abs {
2722                    target,
2723                    size: RelocationSize::U64,
2724                } if elf.is_64() => target,
2725                RelocationKind::Abs {
2726                    target,
2727                    size: RelocationSize::U32,
2728                } if !elf.is_64() => target,
2729                _ => {
2730                    return Err(ProgramFromElfError::other(format!(
2731                        "found an external call with an unexpected relocation at {target_location}: {relocation:?}"
2732                    )));
2733                }
2734            };
2735
2736            let metadata = parse_extern_metadata(elf, relocations, *metadata_location)?;
2737
2738            // The same import can be inlined in multiple places, so deduplicate those here.
2739            let nth_import = match metadata_to_nth_import.entry(metadata) {
2740                std::collections::hash_map::Entry::Vacant(entry) => {
2741                    let nth_import = imports.len();
2742                    imports.push(Import {
2743                        metadata: entry.key().clone(),
2744                    });
2745                    entry.insert(nth_import);
2746                    nth_import
2747                }
2748                std::collections::hash_map::Entry::Occupied(entry) => *entry.get(),
2749            };
2750
2751            output.push((
2752                Source {
2753                    section_index,
2754                    offset_range: AddressRange::from(initial_offset..relative_offset as u64),
2755                },
2756                InstExt::Basic(BasicInst::Ecalli { nth_import }),
2757            ));
2758
2759            continue;
2760        }
2761
2762        if let (crate::riscv::OPCODE_CUSTOM_0, FUNC3_SBRK, 0, dst, size, RReg::Zero) = crate::riscv::R(raw_inst).unpack() {
2763            let Some(dst) = cast_reg_non_zero(dst)? else {
2764                return Err(ProgramFromElfError::other(
2765                    "found an 'sbrk' instruction with the zero register as the destination",
2766                ));
2767            };
2768
2769            let Some(size) = cast_reg_non_zero(size)? else {
2770                return Err(ProgramFromElfError::other(
2771                    "found an 'sbrk' instruction with the zero register as the size",
2772                ));
2773            };
2774
2775            output.push((
2776                Source {
2777                    section_index,
2778                    offset_range: (relative_offset as u64..relative_offset as u64 + inst_size).into(),
2779                },
2780                InstExt::Basic(BasicInst::Sbrk { dst, size }),
2781            ));
2782
2783            relative_offset += inst_size as usize;
2784            continue;
2785        }
2786
2787        if let (crate::riscv::OPCODE_CUSTOM_0, FUNC3_MEMSET, 0, RReg::Zero, RReg::Zero, RReg::Zero) = crate::riscv::R(raw_inst).unpack() {
2788            output.push((
2789                Source {
2790                    section_index,
2791                    offset_range: (relative_offset as u64..relative_offset as u64 + inst_size).into(),
2792                },
2793                InstExt::Basic(BasicInst::Memset),
2794            ));
2795
2796            relative_offset += inst_size as usize;
2797            continue;
2798        }
2799
2800        if let (crate::riscv::OPCODE_CUSTOM_0, FUNC3_HEAP_BASE, 0, dst, RReg::Zero, RReg::Zero) = crate::riscv::R(raw_inst).unpack() {
2801            output.push((
2802                Source {
2803                    section_index,
2804                    offset_range: (relative_offset as u64..relative_offset as u64 + inst_size).into(),
2805                },
2806                match cast_reg_non_zero(dst)? {
2807                    Some(dst) => InstExt::Basic(BasicInst::LoadHeapBase { dst }),
2808                    None => InstExt::Basic(BasicInst::Nop),
2809                },
2810            ));
2811
2812            relative_offset += inst_size as usize;
2813            continue;
2814        }
2815
2816        let source = Source {
2817            section_index,
2818            offset_range: AddressRange::from(relative_offset as u64..relative_offset as u64 + inst_size),
2819        };
2820
2821        relative_offset += inst_size as usize;
2822
2823        let Some(original_inst) = Inst::decode(decoder_config, raw_inst) else {
2824            return Err(ProgramFromElfErrorKind::UnsupportedInstruction {
2825                section: section.name().into(),
2826                offset: current_location.offset,
2827                instruction: raw_inst,
2828            }
2829            .into());
2830        };
2831
2832        if let Some(inst) = instruction_overrides.remove(&current_location) {
2833            output.push((source, inst));
2834        } else {
2835            // For some reason (compiler bug?) *very rarely* we have those AUIPC instructions
2836            // without any relocation attached to them, so let's deal with them traditionally.
2837            if let Inst::AddUpperImmediateToPc {
2838                dst: base_upper,
2839                value: value_upper,
2840            } = original_inst
2841            {
2842                if relative_offset < text.len() {
2843                    let (next_inst_size, next_inst) = read_instruction_bytes(text, relative_offset);
2844                    let next_inst = Inst::decode(decoder_config, next_inst);
2845
2846                    if let Some(Inst::JumpAndLinkRegister { dst: ra_dst, base, value }) = next_inst {
2847                        if base == ra_dst && base == base_upper {
2848                            if let Some(ra) = cast_reg_non_zero(ra_dst)? {
2849                                let offset = (relative_offset as i32 - next_inst_size as i32)
2850                                    .wrapping_add(value)
2851                                    .wrapping_add(value_upper as i32);
2852                                if offset >= 0 && offset < section.data().len() as i32 {
2853                                    output.push((
2854                                        source,
2855                                        InstExt::Control(ControlInst::Call {
2856                                            ra,
2857                                            target: SectionTarget {
2858                                                section_index,
2859                                                offset: u64::from(cast(offset).to_unsigned()),
2860                                            },
2861                                            target_return: current_location.add(inst_size + next_inst_size),
2862                                        }),
2863                                    ));
2864
2865                                    relative_offset += inst_size as usize;
2866                                    continue;
2867                                }
2868                            }
2869                        }
2870                    }
2871                }
2872            }
2873
2874            let original_length = output.len();
2875            convert_instruction(elf, section, current_location, original_inst, inst_size, elf.is_64(), |inst| {
2876                output.push((source, inst));
2877            })?;
2878
2879            // We need to always emit at least one instruction (even if it's a NOP) to handle potential jumps.
2880            assert_ne!(
2881                output.len(),
2882                original_length,
2883                "internal error: no instructions were emitted for instruction {original_inst:?} in section {section_name}"
2884            );
2885        }
2886    }
2887
2888    Ok(())
2889}
2890
2891fn split_code_into_basic_blocks<H>(
2892    elf: &Elf<H>,
2893    #[allow(unused_variables)] section_to_function_name: &BTreeMap<SectionTarget, &str>,
2894    jump_targets: &HashSet<SectionTarget>,
2895    instructions: Vec<(Source, InstExt<SectionTarget, SectionTarget>)>,
2896) -> Result<Vec<BasicBlock<SectionTarget, SectionTarget>>, ProgramFromElfError>
2897where
2898    H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
2899{
2900    #[cfg(test)]
2901    let _ = elf;
2902
2903    let mut blocks: Vec<BasicBlock<SectionTarget, SectionTarget>> = Vec::new();
2904    let mut current_block: Vec<(SourceStack, BasicInst<SectionTarget>)> = Vec::new();
2905    let mut block_start_opt = None;
2906    let mut last_source_in_block = None;
2907    #[cfg(not(test))]
2908    let mut current_symbol = "";
2909    for (source, op) in instructions {
2910        // TODO: This panics because we use a dummy ELF in tests; fix it.
2911        #[cfg(not(test))]
2912        {
2913            if let Some(name) = section_to_function_name.get(&source.begin()) {
2914                current_symbol = name;
2915            }
2916            log::trace!(
2917                "Instruction at {source} (0x{:x}) \"{current_symbol}\": {op:?}",
2918                elf.section_by_index(source.section_index).original_address() + source.offset_range.start
2919            );
2920        }
2921
2922        if let Some(last_source_in_block) = last_source_in_block {
2923            // Handle the case where we've emitted multiple instructions from a single RISC-V instruction.
2924            if source == last_source_in_block {
2925                let InstExt::Basic(instruction) = op else { unreachable!() };
2926                current_block.push((source.into(), instruction));
2927                continue;
2928            }
2929        }
2930
2931        assert!(source.offset_range.start < source.offset_range.end);
2932
2933        let is_jump_target = jump_targets.contains(&source.begin());
2934        let (block_section, block_start) = if !is_jump_target {
2935            // Make sure nothing wants to jump into the middle of this instruction.
2936            assert!((source.offset_range.start..source.offset_range.end)
2937                .step_by(2)
2938                .skip(1)
2939                .all(|offset| !jump_targets.contains(&SectionTarget {
2940                    section_index: source.section_index,
2941                    offset
2942                })));
2943
2944            if let Some((block_section, block_start)) = block_start_opt {
2945                // We're in a block that's reachable by a jump.
2946                (block_section, block_start)
2947            } else {
2948                // Nothing can possibly jump here, so just skip this instruction.
2949                log::trace!("Skipping dead instruction at {}: {:?}", source.begin(), op);
2950                continue;
2951            }
2952        } else {
2953            // Control flow can jump to this instruction.
2954            if let Some((block_section, block_start)) = block_start_opt.take() {
2955                // End the current basic block to prevent a jump into the middle of it.
2956                if !current_block.is_empty() {
2957                    let block_index = BlockTarget::from_raw(blocks.len());
2958                    let block_source = Source {
2959                        section_index: block_section,
2960                        offset_range: (block_start..source.offset_range.start).into(),
2961                    };
2962
2963                    let last_instruction_source = current_block.last().unwrap().0.as_slice()[0];
2964                    assert_eq!(last_instruction_source.section_index, block_section);
2965
2966                    let end_of_block_source = Source {
2967                        section_index: block_section,
2968                        offset_range: (last_instruction_source.offset_range.start..source.offset_range.start).into(),
2969                    };
2970
2971                    assert!(block_source.offset_range.start < block_source.offset_range.end);
2972                    assert!(end_of_block_source.offset_range.start < end_of_block_source.offset_range.end);
2973
2974                    log::trace!("Emitting block (due to a potential jump): {}", block_source.begin());
2975                    blocks.push(BasicBlock::new(
2976                        block_index,
2977                        block_source,
2978                        core::mem::take(&mut current_block),
2979                        EndOfBlock {
2980                            source: end_of_block_source.into(),
2981                            instruction: ControlInst::Jump { target: source.begin() },
2982                        },
2983                    ));
2984                }
2985            }
2986
2987            block_start_opt = Some((source.section_index, source.offset_range.start));
2988            (source.section_index, source.offset_range.start)
2989        };
2990
2991        match op {
2992            InstExt::Control(instruction) => {
2993                last_source_in_block = None;
2994                block_start_opt = None;
2995
2996                let block_index = BlockTarget::from_raw(blocks.len());
2997                let block_source = Source {
2998                    section_index: block_section,
2999                    offset_range: (block_start..source.offset_range.end).into(),
3000                };
3001
3002                log::trace!("Emitting block (due to a control instruction): {}", block_source.begin());
3003                blocks.push(BasicBlock::new(
3004                    block_index,
3005                    block_source,
3006                    core::mem::take(&mut current_block),
3007                    EndOfBlock {
3008                        source: source.into(),
3009                        instruction,
3010                    },
3011                ));
3012
3013                if let ControlInst::Branch { target_false, .. } = instruction {
3014                    if !cfg!(test) {
3015                        if source.section_index != target_false.section_index {
3016                            return Err(ProgramFromElfError::other("found a branch with a fallthrough to another section"));
3017                        }
3018                        assert_eq!(source.offset_range.end, target_false.offset);
3019                    }
3020                    block_start_opt = Some((block_section, source.offset_range.end));
3021                }
3022            }
3023            InstExt::Basic(instruction) => {
3024                last_source_in_block = Some(source);
3025                current_block.push((source.into(), instruction));
3026            }
3027        }
3028    }
3029
3030    if !current_block.is_empty() {
3031        return Err(ProgramFromElfError::other(
3032            "code doesn't end with a control-flow affecting instruction",
3033        ));
3034    }
3035
3036    Ok(blocks)
3037}
3038
3039fn build_section_to_block_map(
3040    blocks: &[BasicBlock<SectionTarget, SectionTarget>],
3041) -> Result<HashMap<SectionTarget, BlockTarget>, ProgramFromElfError> {
3042    let mut section_to_block = HashMap::new();
3043    for (block_index, block) in blocks.iter().enumerate() {
3044        let section_target = block.source.begin();
3045        let block_target = BlockTarget::from_raw(block_index);
3046        if section_to_block.insert(section_target, block_target).is_some() {
3047            return Err(ProgramFromElfError::other("found two or more basic blocks with the same location"));
3048        }
3049    }
3050
3051    Ok(section_to_block)
3052}
3053
3054fn resolve_basic_block_references(
3055    data_sections_set: &HashSet<SectionIndex>,
3056    section_to_block: &HashMap<SectionTarget, BlockTarget>,
3057    blocks: &[BasicBlock<SectionTarget, SectionTarget>],
3058) -> Result<Vec<BasicBlock<AnyTarget, BlockTarget>>, ProgramFromElfError> {
3059    let mut output = Vec::with_capacity(blocks.len());
3060    for block in blocks {
3061        let mut ops = Vec::with_capacity(block.ops.len());
3062        for (source, op) in &block.ops {
3063            let map = |target: SectionTarget| {
3064                if data_sections_set.contains(&target.section_index) {
3065                    Ok(AnyTarget::Data(target))
3066                } else if let Some(&target) = section_to_block.get(&target) {
3067                    Ok(AnyTarget::Code(target))
3068                } else {
3069                    return Err(ProgramFromElfError::other(format!(
3070                        "found basic instruction which doesn't point to a data section nor resolve to any basic block: {source:?}, {op:?}",
3071                    )));
3072                }
3073            };
3074
3075            let op = op.map_target(map)?;
3076            ops.push((source.clone(), op));
3077        }
3078
3079        let Ok(next) = block
3080            .next
3081            .clone()
3082            .map_target(|section_target| section_to_block.get(&section_target).copied().ok_or(()))
3083        else {
3084            return Err(ProgramFromElfError::other(format!(
3085                "found control instruction at the end of block at {block_source} whose target doesn't resolve to any basic block: {next:?}",
3086                block_source = block.source,
3087                next = block.next.instruction,
3088            )));
3089        };
3090
3091        output.push(BasicBlock::new(block.target, block.source, ops, next));
3092    }
3093
3094    Ok(output)
3095}
3096
3097fn garbage_collect_reachability(all_blocks: &[BasicBlock<AnyTarget, BlockTarget>], reachability_graph: &mut ReachabilityGraph) -> bool {
3098    let mut queue_code = VecSet::new();
3099    let mut queue_data = VecSet::new();
3100    for (block_target, reachability) in &reachability_graph.for_code {
3101        if reachability.always_reachable_or_exported() {
3102            queue_code.push(*block_target);
3103        }
3104    }
3105
3106    for (data_target, reachability) in &reachability_graph.for_data {
3107        if reachability.always_reachable_or_exported() {
3108            queue_data.push(*data_target);
3109        }
3110    }
3111
3112    while !queue_code.is_empty() || !queue_data.is_empty() {
3113        while let Some(block_target) = queue_code.pop_unique() {
3114            each_reference(&all_blocks[block_target.index()], |ext| match ext {
3115                ExtRef::Jump(target) | ExtRef::Address(target) => queue_code.push(target),
3116                ExtRef::DataAddress(target) => queue_data.push(target),
3117            });
3118        }
3119
3120        while let Some(data_target) = queue_data.pop_unique() {
3121            if let Some(list) = reachability_graph.code_references_in_data_section.get(&data_target) {
3122                for &target in list {
3123                    queue_code.push(target);
3124                }
3125            }
3126
3127            if let Some(list) = reachability_graph.data_references_in_data_section.get(&data_target) {
3128                for &target in list {
3129                    queue_data.push(target);
3130                }
3131            }
3132        }
3133    }
3134
3135    let set_code = queue_code.into_set();
3136    let set_data = queue_data.into_set();
3137    if set_code.len() == reachability_graph.for_code.len() && set_data.len() == reachability_graph.for_data.len() {
3138        return false;
3139    }
3140
3141    log::debug!(
3142        "Code reachability garbage collection: {} -> {}",
3143        reachability_graph.for_code.len(),
3144        set_code.len()
3145    );
3146    reachability_graph.for_code.retain(|block_target, reachability| {
3147        reachability.reachable_from.retain(|inner_key| set_code.contains(inner_key));
3148        reachability.address_taken_in.retain(|inner_key| set_code.contains(inner_key));
3149        reachability.referenced_by_data.retain(|inner_key| set_data.contains(inner_key));
3150        if !set_code.contains(block_target) {
3151            assert!(!reachability.always_reachable);
3152            log::trace!("  Garbage collected: {block_target:?}");
3153            false
3154        } else {
3155            true
3156        }
3157    });
3158
3159    assert_eq!(reachability_graph.for_code.len(), set_code.len());
3160
3161    log::debug!(
3162        "Data reachability garbage collection: {} -> {}",
3163        reachability_graph.for_data.len(),
3164        set_data.len()
3165    );
3166    reachability_graph.for_data.retain(|data_target, reachability| {
3167        assert!(reachability.reachable_from.is_empty());
3168        reachability.address_taken_in.retain(|inner_key| set_code.contains(inner_key));
3169        reachability.referenced_by_data.retain(|inner_key| set_data.contains(inner_key));
3170        if !set_data.contains(data_target) {
3171            assert!(!reachability.always_reachable);
3172            log::trace!("  Garbage collected: {data_target:?}");
3173            false
3174        } else {
3175            true
3176        }
3177    });
3178
3179    reachability_graph.code_references_in_data_section.retain(|data_target, list| {
3180        if !set_data.contains(data_target) {
3181            false
3182        } else {
3183            assert!(list.iter().all(|block_target| set_code.contains(block_target)));
3184            true
3185        }
3186    });
3187
3188    reachability_graph.data_references_in_data_section.retain(|data_target, list| {
3189        if !set_data.contains(data_target) {
3190            false
3191        } else {
3192            assert!(list.iter().all(|next_data_target| set_data.contains(next_data_target)));
3193            true
3194        }
3195    });
3196
3197    assert_eq!(reachability_graph.for_data.len(), set_data.len());
3198    true
3199}
3200
3201fn remove_unreachable_code_impl(
3202    all_blocks: &[BasicBlock<AnyTarget, BlockTarget>],
3203    reachability_graph: &mut ReachabilityGraph,
3204    mut optimize_queue: Option<&mut VecSet<BlockTarget>>,
3205    queue_code: &mut VecSet<BlockTarget>,
3206    queue_data: &mut VecSet<SectionIndex>,
3207    current: BlockTarget,
3208) {
3209    assert!(reachability_graph.for_code.get(&current).unwrap().is_unreachable());
3210    log::trace!("Removing {current:?} from the graph...");
3211
3212    each_reference(&all_blocks[current.index()], |ext| match ext {
3213        ExtRef::Jump(target) => {
3214            log::trace!("{target:?} is not reachable from {current:?} anymore");
3215            let reachability = reachability_graph.for_code.get_mut(&target).unwrap();
3216            reachability.reachable_from.remove(&current);
3217            if reachability.is_unreachable() {
3218                log::trace!("{target:?} is now unreachable!");
3219                queue_code.push(target)
3220            } else if let Some(ref mut optimize_queue) = optimize_queue {
3221                optimize_queue.push(target);
3222            }
3223        }
3224        ExtRef::Address(target) => {
3225            log::trace!("{target:?}'s address is not taken in {current:?} anymore");
3226            let reachability = reachability_graph.for_code.get_mut(&target).unwrap();
3227            reachability.address_taken_in.remove(&current);
3228            if reachability.is_unreachable() {
3229                log::trace!("{target:?} is now unreachable!");
3230                queue_code.push(target)
3231            } else if let Some(ref mut optimize_queue) = optimize_queue {
3232                optimize_queue.push(target);
3233            }
3234        }
3235        ExtRef::DataAddress(target) => {
3236            log::trace!("{target:?}'s address is not taken in {current:?} anymore");
3237            let reachability = reachability_graph.for_data.get_mut(&target).unwrap();
3238            reachability.address_taken_in.remove(&current);
3239            if reachability.is_unreachable() {
3240                log::trace!("{target:?} is now unreachable!");
3241                queue_data.push(target);
3242            }
3243        }
3244    });
3245
3246    reachability_graph.for_code.remove(&current);
3247}
3248
3249fn remove_unreachable_data_impl(
3250    reachability_graph: &mut ReachabilityGraph,
3251    mut optimize_queue: Option<&mut VecSet<BlockTarget>>,
3252    queue_code: &mut VecSet<BlockTarget>,
3253    queue_data: &mut VecSet<SectionIndex>,
3254    current: SectionIndex,
3255) {
3256    assert!(reachability_graph.for_data.get(&current).unwrap().is_unreachable());
3257    log::trace!("Removing {current:?} from the graph...");
3258
3259    let code_refs = reachability_graph.code_references_in_data_section.remove(&current);
3260    let data_refs = reachability_graph.data_references_in_data_section.remove(&current);
3261
3262    if let Some(list) = code_refs {
3263        for target in list {
3264            log::trace!("{target:?} is not reachable from {current:?} anymore");
3265            let reachability = reachability_graph.for_code.get_mut(&target).unwrap();
3266            reachability.referenced_by_data.remove(&current);
3267            if reachability.is_unreachable() {
3268                log::trace!("{target:?} is now unreachable!");
3269                queue_code.push(target)
3270            } else if let Some(ref mut optimize_queue) = optimize_queue {
3271                optimize_queue.push(target);
3272            }
3273        }
3274    }
3275
3276    if let Some(list) = data_refs {
3277        for target in list {
3278            log::trace!("{target:?} is not reachable from {current:?} anymore");
3279            let reachability = reachability_graph.for_data.get_mut(&target).unwrap();
3280            reachability.referenced_by_data.remove(&current);
3281            if reachability.is_unreachable() {
3282                log::trace!("{target:?} is now unreachable!");
3283                queue_data.push(target)
3284            }
3285        }
3286    }
3287
3288    reachability_graph.for_data.remove(&current);
3289}
3290
3291fn remove_code_if_globally_unreachable(
3292    all_blocks: &[BasicBlock<AnyTarget, BlockTarget>],
3293    reachability_graph: &mut ReachabilityGraph,
3294    mut optimize_queue: Option<&mut VecSet<BlockTarget>>,
3295    block_target: BlockTarget,
3296) {
3297    let Some(reachability) = reachability_graph.for_code.get(&block_target) else {
3298        return;
3299    };
3300    if !reachability.is_unreachable() {
3301        return;
3302    }
3303
3304    // The inner block is now globally unreachable.
3305    let mut queue_code = VecSet::new();
3306    let mut queue_data = VecSet::new();
3307    remove_unreachable_code_impl(
3308        all_blocks,
3309        reachability_graph,
3310        optimize_queue.as_deref_mut(),
3311        &mut queue_code,
3312        &mut queue_data,
3313        block_target,
3314    );
3315
3316    // If there are other dependencies which are now unreachable then remove them too.
3317    while !queue_code.is_empty() || !queue_data.is_empty() {
3318        while let Some(next) = queue_code.pop_unique() {
3319            remove_unreachable_code_impl(
3320                all_blocks,
3321                reachability_graph,
3322                optimize_queue.as_deref_mut(),
3323                &mut queue_code,
3324                &mut queue_data,
3325                next,
3326            );
3327        }
3328
3329        while let Some(next) = queue_data.pop_unique() {
3330            remove_unreachable_data_impl(
3331                reachability_graph,
3332                optimize_queue.as_deref_mut(),
3333                &mut queue_code,
3334                &mut queue_data,
3335                next,
3336            );
3337        }
3338    }
3339}
3340
3341fn remove_if_data_is_globally_unreachable(
3342    all_blocks: &[BasicBlock<AnyTarget, BlockTarget>],
3343    reachability_graph: &mut ReachabilityGraph,
3344    mut optimize_queue: Option<&mut VecSet<BlockTarget>>,
3345    data_target: SectionIndex,
3346) {
3347    let Some(reachability) = reachability_graph.for_data.get(&data_target) else {
3348        return;
3349    };
3350    if !reachability.is_unreachable() {
3351        return;
3352    }
3353
3354    let mut queue_code = VecSet::new();
3355    let mut queue_data = VecSet::new();
3356    remove_unreachable_data_impl(
3357        reachability_graph,
3358        optimize_queue.as_deref_mut(),
3359        &mut queue_code,
3360        &mut queue_data,
3361        data_target,
3362    );
3363
3364    // If there are other dependencies which are now unreachable then remove them too.
3365    while !queue_code.is_empty() || !queue_data.is_empty() {
3366        while let Some(next) = queue_code.pop_unique() {
3367            remove_unreachable_code_impl(
3368                all_blocks,
3369                reachability_graph,
3370                optimize_queue.as_deref_mut(),
3371                &mut queue_code,
3372                &mut queue_data,
3373                next,
3374            );
3375        }
3376
3377        while let Some(next) = queue_data.pop_unique() {
3378            remove_unreachable_data_impl(
3379                reachability_graph,
3380                optimize_queue.as_deref_mut(),
3381                &mut queue_code,
3382                &mut queue_data,
3383                next,
3384            );
3385        }
3386    }
3387}
3388
3389fn add_to_optimize_queue(
3390    all_blocks: &[BasicBlock<AnyTarget, BlockTarget>],
3391    reachability_graph: &ReachabilityGraph,
3392    optimize_queue: &mut VecSet<BlockTarget>,
3393    block_target: BlockTarget,
3394) {
3395    let Some(reachability) = reachability_graph.for_code.get(&block_target) else {
3396        return;
3397    };
3398    if reachability.is_unreachable() {
3399        return;
3400    }
3401
3402    optimize_queue.push(block_target);
3403
3404    for &previous in &reachability.reachable_from {
3405        optimize_queue.push(previous);
3406    }
3407
3408    for &previous in &reachability.address_taken_in {
3409        optimize_queue.push(previous);
3410    }
3411
3412    for &next in all_blocks[block_target.index()].next.instruction.targets().into_iter().flatten() {
3413        optimize_queue.push(next);
3414    }
3415
3416    each_reference(&all_blocks[block_target.index()], |ext| match ext {
3417        ExtRef::Jump(target) => optimize_queue.push(target),
3418        ExtRef::Address(target) => optimize_queue.push(target),
3419        ExtRef::DataAddress(..) => {}
3420    });
3421}
3422
3423fn perform_nop_elimination(all_blocks: &mut [BasicBlock<AnyTarget, BlockTarget>], current: BlockTarget) {
3424    all_blocks[current.index()].ops.retain(|(_, instruction)| !instruction.is_nop());
3425}
3426
3427#[deny(clippy::as_conversions)]
3428fn perform_inlining(
3429    all_blocks: &mut [BasicBlock<AnyTarget, BlockTarget>],
3430    reachability_graph: &mut ReachabilityGraph,
3431    exports: &mut [Export],
3432    optimize_queue: Option<&mut VecSet<BlockTarget>>,
3433    inline_history: &mut HashSet<(BlockTarget, BlockTarget)>,
3434    inline_threshold: usize,
3435    current: BlockTarget,
3436) -> bool {
3437    fn is_infinite_loop(all_blocks: &[BasicBlock<AnyTarget, BlockTarget>], current: BlockTarget) -> bool {
3438        all_blocks[current.index()].next.instruction == ControlInst::Jump { target: current }
3439    }
3440
3441    fn inline(
3442        all_blocks: &mut [BasicBlock<AnyTarget, BlockTarget>],
3443        reachability_graph: &mut ReachabilityGraph,
3444        mut optimize_queue: Option<&mut VecSet<BlockTarget>>,
3445        outer: BlockTarget,
3446        inner: BlockTarget,
3447    ) {
3448        log::trace!("Inlining {inner:?} into {outer:?}...");
3449        log::trace!("  {outer:?} will now end with: {:?}", all_blocks[inner.index()].next.instruction);
3450
3451        if let Some(ref mut optimize_queue) = optimize_queue {
3452            add_to_optimize_queue(all_blocks, reachability_graph, optimize_queue, outer);
3453            add_to_optimize_queue(all_blocks, reachability_graph, optimize_queue, inner);
3454        }
3455
3456        // Inlining into ourselves doesn't make sense.
3457        assert_ne!(outer, inner);
3458
3459        // No infinite loops.
3460        assert!(!is_infinite_loop(all_blocks, inner));
3461
3462        // Make sure this block actually goes to the block we're inlining.
3463        assert_eq!(all_blocks[outer.index()].next.instruction, ControlInst::Jump { target: inner });
3464
3465        // The inner block is not reachable from here anymore.
3466        // NOTE: This needs to be done *before* adding the references below,
3467        //       as the inner block might be an infinite loop.
3468        reachability_graph.for_code.get_mut(&inner).unwrap().reachable_from.remove(&outer);
3469
3470        // Everything which the inner block accesses will be reachable from here, so update reachability.
3471        each_reference(&all_blocks[inner.index()], |ext| match ext {
3472            ExtRef::Jump(target) => {
3473                reachability_graph.for_code.entry(target).or_default().reachable_from.insert(outer);
3474            }
3475            ExtRef::Address(target) => {
3476                reachability_graph
3477                    .for_code
3478                    .entry(target)
3479                    .or_default()
3480                    .address_taken_in
3481                    .insert(outer);
3482            }
3483            ExtRef::DataAddress(target) => {
3484                reachability_graph
3485                    .for_data
3486                    .entry(target)
3487                    .or_default()
3488                    .address_taken_in
3489                    .insert(outer);
3490            }
3491        });
3492
3493        // Remove it from the graph if it's globally unreachable now.
3494        remove_code_if_globally_unreachable(all_blocks, reachability_graph, optimize_queue, inner);
3495
3496        let outer_source = all_blocks[outer.index()].next.source.clone();
3497        let inner_source = all_blocks[inner.index()].next.source.clone();
3498        let inner_code: Vec<_> = all_blocks[inner.index()]
3499            .ops
3500            .iter()
3501            .map(|(inner_source, op)| (outer_source.overlay_on_top_of(inner_source), *op))
3502            .collect();
3503
3504        all_blocks[outer.index()].ops.extend(inner_code);
3505        all_blocks[outer.index()].next.source.overlay_on_top_of_inplace(&inner_source);
3506        all_blocks[outer.index()].next.instruction = all_blocks[inner.index()].next.instruction;
3507    }
3508
3509    fn should_inline(
3510        all_blocks: &[BasicBlock<AnyTarget, BlockTarget>],
3511        reachability_graph: &ReachabilityGraph,
3512        current: BlockTarget,
3513        target: BlockTarget,
3514        inline_threshold: usize,
3515    ) -> bool {
3516        // Don't inline if it's an infinite loop.
3517        if target == current || is_infinite_loop(all_blocks, target) {
3518            return false;
3519        }
3520
3521        if let Some(fallthrough_target) = all_blocks[target.index()].next.instruction.fallthrough_target() {
3522            if fallthrough_target.index() == target.index() + 1 {
3523                // Do not inline if we'd need to inject a new fallthrough basic block.
3524                return false;
3525            }
3526        }
3527
3528        // Inline if the target block is small enough.
3529        if all_blocks[target.index()].ops.len() <= inline_threshold {
3530            return true;
3531        }
3532
3533        // Inline if the target block is only reachable from here.
3534        if let Some(reachability) = reachability_graph.for_code.get(&target) {
3535            if reachability.is_only_reachable_from(current) {
3536                return true;
3537            }
3538        }
3539
3540        false
3541    }
3542
3543    if !reachability_graph.is_code_reachable(current) {
3544        return false;
3545    }
3546
3547    let block = &all_blocks[current.index()];
3548    match block.next.instruction {
3549        ControlInst::Jump { target } => {
3550            if all_blocks[current.index()].ops.is_empty() && inline_history.insert((current, target)) {
3551                let reachability = reachability_graph.for_code.get_mut(&current).unwrap();
3552                if !reachability.exports.is_empty() {
3553                    let export_indexes = core::mem::take(&mut reachability.exports);
3554                    for &export_index in &export_indexes {
3555                        exports[export_index].location = all_blocks[target.index()].source.begin();
3556                    }
3557                    reachability_graph.for_code.get_mut(&target).unwrap().exports.extend(export_indexes);
3558                    remove_code_if_globally_unreachable(all_blocks, reachability_graph, optimize_queue, current);
3559                    return true;
3560                }
3561            }
3562
3563            if should_inline(all_blocks, reachability_graph, current, target, inline_threshold) && inline_history.insert((current, target))
3564            {
3565                inline(all_blocks, reachability_graph, optimize_queue, current, target);
3566                return true;
3567            }
3568        }
3569        ControlInst::Branch {
3570            kind,
3571            src1,
3572            src2,
3573            target_true,
3574            target_false,
3575        } => {
3576            if let ControlInst::Jump { target } = all_blocks[target_true.index()].next.instruction {
3577                if target != target_true && all_blocks[target_true.index()].ops.is_empty() {
3578                    // We're branching to another block which immediately jumps somewhere else.
3579                    // So skip the middle-man and just jump where we want to go directly.
3580                    assert!(reachability_graph
3581                        .for_code
3582                        .get_mut(&target_true)
3583                        .unwrap()
3584                        .reachable_from
3585                        .remove(&current));
3586
3587                    reachability_graph.for_code.get_mut(&target).unwrap().reachable_from.insert(current);
3588                    all_blocks[current.index()].next.instruction = ControlInst::Branch {
3589                        kind,
3590                        src1,
3591                        src2,
3592                        target_true: target,
3593                        target_false,
3594                    };
3595
3596                    remove_code_if_globally_unreachable(all_blocks, reachability_graph, optimize_queue, target_true);
3597                    return true;
3598                }
3599            }
3600        }
3601        ControlInst::Call { .. } => unreachable!(),
3602        _ => {}
3603    }
3604
3605    false
3606}
3607
3608fn gather_references(block: &BasicBlock<AnyTarget, BlockTarget>) -> BTreeSet<ExtRef> {
3609    let mut references = BTreeSet::new();
3610    each_reference(block, |ext| {
3611        references.insert(ext);
3612    });
3613    references
3614}
3615
3616fn update_references(
3617    all_blocks: &[BasicBlock<AnyTarget, BlockTarget>],
3618    reachability_graph: &mut ReachabilityGraph,
3619    mut optimize_queue: Option<&mut VecSet<BlockTarget>>,
3620    block_target: BlockTarget,
3621    mut old_references: BTreeSet<ExtRef>,
3622) {
3623    let mut new_references = gather_references(&all_blocks[block_target.index()]);
3624    new_references.retain(|ext| !old_references.remove(ext));
3625
3626    for ext in &old_references {
3627        match ext {
3628            ExtRef::Jump(target) => {
3629                log::trace!("{target:?} is not reachable from {block_target:?} anymore");
3630                reachability_graph
3631                    .for_code
3632                    .get_mut(target)
3633                    .unwrap()
3634                    .reachable_from
3635                    .remove(&block_target);
3636            }
3637            ExtRef::Address(target) => {
3638                log::trace!("{target:?}'s address is not taken in {block_target:?} anymore");
3639                reachability_graph
3640                    .for_code
3641                    .get_mut(target)
3642                    .unwrap()
3643                    .address_taken_in
3644                    .remove(&block_target);
3645            }
3646            ExtRef::DataAddress(target) => {
3647                log::trace!("{target:?}'s address is not taken in {block_target:?} anymore");
3648                reachability_graph
3649                    .for_data
3650                    .get_mut(target)
3651                    .unwrap()
3652                    .address_taken_in
3653                    .remove(&block_target);
3654            }
3655        }
3656    }
3657
3658    for ext in &new_references {
3659        match ext {
3660            ExtRef::Jump(target) => {
3661                log::trace!("{target:?} is reachable from {block_target:?}");
3662                reachability_graph
3663                    .for_code
3664                    .get_mut(target)
3665                    .unwrap()
3666                    .reachable_from
3667                    .insert(block_target);
3668            }
3669            ExtRef::Address(target) => {
3670                log::trace!("{target:?}'s address is taken in {block_target:?}");
3671                reachability_graph
3672                    .for_code
3673                    .get_mut(target)
3674                    .unwrap()
3675                    .address_taken_in
3676                    .insert(block_target);
3677            }
3678            ExtRef::DataAddress(target) => {
3679                log::trace!("{target:?}'s address is taken in {block_target:?}");
3680                reachability_graph
3681                    .for_data
3682                    .get_mut(target)
3683                    .unwrap()
3684                    .address_taken_in
3685                    .insert(block_target);
3686            }
3687        }
3688    }
3689
3690    for ext in old_references.into_iter().chain(new_references.into_iter()) {
3691        match ext {
3692            ExtRef::Jump(target) => {
3693                remove_code_if_globally_unreachable(all_blocks, reachability_graph, optimize_queue.as_deref_mut(), target);
3694            }
3695            ExtRef::Address(target) => {
3696                remove_code_if_globally_unreachable(all_blocks, reachability_graph, optimize_queue.as_deref_mut(), target);
3697            }
3698            ExtRef::DataAddress(target) => {
3699                remove_if_data_is_globally_unreachable(all_blocks, reachability_graph, optimize_queue.as_deref_mut(), target);
3700            }
3701        }
3702    }
3703}
3704
3705#[deny(clippy::as_conversions)]
3706fn perform_dead_code_elimination(
3707    config: &Config,
3708    imports: &[Import],
3709    all_blocks: &mut [BasicBlock<AnyTarget, BlockTarget>],
3710    registers_needed_for_block: &mut [RegMask],
3711    reachability_graph: &mut ReachabilityGraph,
3712    mut optimize_queue: Option<&mut VecSet<BlockTarget>>,
3713    block_target: BlockTarget,
3714) -> bool {
3715    #[allow(clippy::too_many_arguments)]
3716    fn perform_dead_code_elimination_on_block(
3717        config: &Config,
3718        imports: &[Import],
3719        all_blocks: &mut [BasicBlock<AnyTarget, BlockTarget>],
3720        reachability_graph: &mut ReachabilityGraph,
3721        mut optimize_queue: Option<&mut VecSet<BlockTarget>>,
3722        modified: &mut bool,
3723        mut registers_needed: RegMask,
3724        block_target: BlockTarget,
3725    ) -> RegMask {
3726        let next_instruction = &all_blocks[block_target.index()].next.instruction;
3727        registers_needed.remove(next_instruction.dst_mask());
3728        registers_needed.insert(next_instruction.src_mask());
3729
3730        let mut dead_code = Vec::new();
3731        for (nth_instruction, (_, op)) in all_blocks[block_target.index()].ops.iter().enumerate().rev() {
3732            let dst_mask = op.dst_mask(imports);
3733            if !op.has_side_effects(config) && (dst_mask & registers_needed) == RegMask::empty() {
3734                // This instruction has no side effects and its result is not used; it's dead.
3735                dead_code.push(nth_instruction);
3736                continue;
3737            }
3738
3739            // If the register was overwritten it means it wasn't needed later.
3740            registers_needed.remove(dst_mask);
3741            // ...unless it was used as a source.
3742            registers_needed.insert(op.src_mask(imports));
3743        }
3744
3745        if dead_code.is_empty() {
3746            return registers_needed;
3747        }
3748
3749        *modified = true;
3750        if let Some(ref mut optimize_queue) = optimize_queue {
3751            add_to_optimize_queue(all_blocks, reachability_graph, optimize_queue, block_target);
3752        }
3753
3754        let references = gather_references(&all_blocks[block_target.index()]);
3755        for nth_instruction in dead_code {
3756            log::trace!(
3757                "Removing dead instruction in {}: {:?}",
3758                all_blocks[block_target.index()].ops[nth_instruction].0,
3759                all_blocks[block_target.index()].ops[nth_instruction].1
3760            );
3761
3762            // Replace it with a NOP.
3763            all_blocks[block_target.index()].ops[nth_instruction].1 = BasicInst::Nop;
3764        }
3765
3766        all_blocks[block_target.index()]
3767            .ops
3768            .retain(|(_, instruction)| !instruction.is_nop());
3769
3770        update_references(all_blocks, reachability_graph, optimize_queue, block_target, references);
3771        registers_needed
3772    }
3773
3774    if !reachability_graph.is_code_reachable(block_target) {
3775        return false;
3776    }
3777
3778    let mut previous_blocks = Vec::new();
3779    for &previous_block in &reachability_graph.for_code.get(&block_target).unwrap().reachable_from {
3780        if previous_block == block_target {
3781            continue;
3782        }
3783
3784        let ControlInst::Jump { target } = all_blocks[previous_block.index()].next.instruction else {
3785            continue;
3786        };
3787        if target == block_target {
3788            previous_blocks.push(previous_block);
3789        }
3790    }
3791
3792    let registers_needed_for_next_block = match all_blocks[block_target.index()].next.instruction {
3793        // If it's going to trap then it's not going to need any of the register values.
3794        ControlInst::Unimplemented => RegMask::empty(),
3795        // If it's a jump then we'll need whatever registers the jump target needs.
3796        ControlInst::Jump { target } => registers_needed_for_block[target.index()],
3797        ControlInst::Branch {
3798            target_true, target_false, ..
3799        } => registers_needed_for_block[target_true.index()] | registers_needed_for_block[target_false.index()],
3800        // ...otherwise assume it'll need all of them.
3801        ControlInst::Call { .. } => unreachable!(),
3802        ControlInst::CallIndirect { .. } | ControlInst::JumpIndirect { .. } => RegMask::all(),
3803    };
3804
3805    let mut modified = false;
3806    let registers_needed_for_this_block = perform_dead_code_elimination_on_block(
3807        config,
3808        imports,
3809        all_blocks,
3810        reachability_graph,
3811        optimize_queue.as_deref_mut(),
3812        &mut modified,
3813        registers_needed_for_next_block,
3814        block_target,
3815    );
3816
3817    if registers_needed_for_block[block_target.index()] != registers_needed_for_this_block {
3818        registers_needed_for_block[block_target.index()] = registers_needed_for_this_block;
3819        if let Some(ref mut optimize_queue) = optimize_queue {
3820            for previous_block in previous_blocks {
3821                add_to_optimize_queue(all_blocks, reachability_graph, optimize_queue, previous_block);
3822            }
3823        }
3824    }
3825
3826    modified
3827}
3828
3829#[derive(Copy, Clone, PartialEq, Eq, Debug)]
3830pub enum AnyAnyKind {
3831    Add32,
3832    Add32AndSignExtend,
3833    Add64,
3834    Sub32,
3835    Sub32AndSignExtend,
3836    Sub64,
3837    And32,
3838    And64,
3839    Or32,
3840    Or64,
3841    Xor32,
3842    Xor64,
3843    SetLessThanUnsigned32,
3844    SetLessThanUnsigned64,
3845    SetLessThanSigned32,
3846    SetLessThanSigned64,
3847    ShiftLogicalLeft32,
3848    ShiftLogicalLeft32AndSignExtend,
3849    ShiftLogicalLeft64,
3850    ShiftLogicalRight32,
3851    ShiftLogicalRight32AndSignExtend,
3852    ShiftLogicalRight64,
3853    ShiftArithmeticRight32,
3854    ShiftArithmeticRight32AndSignExtend,
3855    ShiftArithmeticRight64,
3856    Mul32,
3857    Mul32AndSignExtend,
3858    Mul64,
3859    RotateRight32,
3860    RotateRight32AndSignExtend,
3861    RotateRight64,
3862}
3863
3864#[derive(Copy, Clone, PartialEq, Eq, Debug)]
3865pub enum RegKind {
3866    CountLeadingZeroBits32,
3867    CountLeadingZeroBits64,
3868    CountSetBits32,
3869    CountSetBits64,
3870    CountTrailingZeroBits32,
3871    CountTrailingZeroBits64,
3872    ReverseByte,
3873    SignExtend8,
3874    SignExtend16,
3875    ZeroExtend16,
3876}
3877
3878#[derive(Copy, Clone, PartialEq, Eq, Debug)]
3879pub enum RegRegKind {
3880    MulUpperSignedSigned32,
3881    MulUpperSignedSigned64,
3882    MulUpperUnsignedUnsigned32,
3883    MulUpperUnsignedUnsigned64,
3884    MulUpperSignedUnsigned32,
3885    MulUpperSignedUnsigned64,
3886    Div32,
3887    Div32AndSignExtend,
3888    Div64,
3889    DivUnsigned32,
3890    DivUnsigned32AndSignExtend,
3891    DivUnsigned64,
3892    Rem32,
3893    Rem32AndSignExtend,
3894    Rem64,
3895    RemUnsigned32,
3896    RemUnsigned32AndSignExtend,
3897    RemUnsigned64,
3898
3899    AndInverted,
3900    OrInverted,
3901    Xnor,
3902    Maximum,
3903    MaximumUnsigned,
3904    Minimum,
3905    MinimumUnsigned,
3906    RotateLeft32,
3907    RotateLeft32AndSignExtend,
3908    RotateLeft64,
3909}
3910
3911#[derive(Copy, Clone, PartialEq, Eq, Debug)]
3912enum OperationKind {
3913    Add32,
3914    Add32AndSignExtend,
3915    Add64,
3916    Sub32,
3917    Sub32AndSignExtend,
3918    Sub64,
3919    And32,
3920    And64,
3921    Or32,
3922    Or64,
3923    Xor32,
3924    Xor64,
3925    SetLessThanUnsigned32,
3926    SetLessThanUnsigned64,
3927    SetLessThanSigned32,
3928    SetLessThanSigned64,
3929    ShiftLogicalLeft32,
3930    ShiftLogicalLeft32AndSignExtend,
3931    ShiftLogicalLeft64,
3932    ShiftLogicalRight32,
3933    ShiftLogicalRight32AndSignExtend,
3934    ShiftLogicalRight64,
3935    ShiftArithmeticRight32,
3936    ShiftArithmeticRight32AndSignExtend,
3937    ShiftArithmeticRight64,
3938
3939    Mul32,
3940    Mul32AndSignExtend,
3941    Mul64,
3942    MulUpperSignedSigned32,
3943    MulUpperSignedSigned64,
3944    MulUpperSignedUnsigned32,
3945    MulUpperSignedUnsigned64,
3946    MulUpperUnsignedUnsigned32,
3947    MulUpperUnsignedUnsigned64,
3948    Div32,
3949    Div32AndSignExtend,
3950    Div64,
3951    DivUnsigned32,
3952    DivUnsigned32AndSignExtend,
3953    DivUnsigned64,
3954    Rem32,
3955    Rem32AndSignExtend,
3956    Rem64,
3957    RemUnsigned32,
3958    RemUnsigned32AndSignExtend,
3959    RemUnsigned64,
3960
3961    Eq32,
3962    Eq64,
3963    NotEq32,
3964    NotEq64,
3965    SetGreaterOrEqualSigned32,
3966    SetGreaterOrEqualSigned64,
3967    SetGreaterOrEqualUnsigned32,
3968    SetGreaterOrEqualUnsigned64,
3969
3970    AndInverted,
3971    OrInverted,
3972    Xnor,
3973    Maximum,
3974    MaximumUnsigned,
3975    Minimum,
3976    MinimumUnsigned,
3977    RotateLeft32,
3978    RotateLeft32AndSignExtend,
3979    RotateLeft64,
3980    RotateRight32,
3981    RotateRight32AndSignExtend,
3982    RotateRight64,
3983}
3984
3985impl From<AnyAnyKind> for OperationKind {
3986    fn from(kind: AnyAnyKind) -> Self {
3987        match kind {
3988            AnyAnyKind::Add32 => Self::Add32,
3989            AnyAnyKind::Add32AndSignExtend => Self::Add32AndSignExtend,
3990            AnyAnyKind::Add64 => Self::Add64,
3991            AnyAnyKind::Sub32 => Self::Sub32,
3992            AnyAnyKind::Sub32AndSignExtend => Self::Sub32AndSignExtend,
3993            AnyAnyKind::Sub64 => Self::Sub64,
3994            AnyAnyKind::And32 => Self::And32,
3995            AnyAnyKind::And64 => Self::And64,
3996            AnyAnyKind::Or32 => Self::Or32,
3997            AnyAnyKind::Or64 => Self::Or64,
3998            AnyAnyKind::Xor32 => Self::Xor32,
3999            AnyAnyKind::Xor64 => Self::Xor64,
4000            AnyAnyKind::SetLessThanUnsigned32 => Self::SetLessThanUnsigned32,
4001            AnyAnyKind::SetLessThanUnsigned64 => Self::SetLessThanUnsigned64,
4002            AnyAnyKind::SetLessThanSigned32 => Self::SetLessThanSigned32,
4003            AnyAnyKind::SetLessThanSigned64 => Self::SetLessThanSigned64,
4004            AnyAnyKind::ShiftLogicalLeft32 => Self::ShiftLogicalLeft32,
4005            AnyAnyKind::ShiftLogicalLeft32AndSignExtend => Self::ShiftLogicalLeft32AndSignExtend,
4006            AnyAnyKind::ShiftLogicalLeft64 => Self::ShiftLogicalLeft64,
4007            AnyAnyKind::ShiftLogicalRight32 => Self::ShiftLogicalRight32,
4008            AnyAnyKind::ShiftLogicalRight32AndSignExtend => Self::ShiftLogicalRight32AndSignExtend,
4009            AnyAnyKind::ShiftLogicalRight64 => Self::ShiftLogicalRight64,
4010            AnyAnyKind::ShiftArithmeticRight32 => Self::ShiftArithmeticRight32,
4011            AnyAnyKind::ShiftArithmeticRight32AndSignExtend => Self::ShiftArithmeticRight32AndSignExtend,
4012            AnyAnyKind::ShiftArithmeticRight64 => Self::ShiftArithmeticRight64,
4013            AnyAnyKind::Mul32 => Self::Mul32,
4014            AnyAnyKind::Mul32AndSignExtend => Self::Mul32AndSignExtend,
4015            AnyAnyKind::Mul64 => Self::Mul64,
4016            AnyAnyKind::RotateRight32 => Self::RotateRight32,
4017            AnyAnyKind::RotateRight32AndSignExtend => Self::RotateRight32AndSignExtend,
4018            AnyAnyKind::RotateRight64 => Self::RotateRight64,
4019        }
4020    }
4021}
4022
4023impl From<RegRegKind> for OperationKind {
4024    fn from(kind: RegRegKind) -> Self {
4025        match kind {
4026            RegRegKind::MulUpperSignedSigned32 => Self::MulUpperSignedSigned32,
4027            RegRegKind::MulUpperSignedSigned64 => Self::MulUpperSignedSigned64,
4028            RegRegKind::MulUpperUnsignedUnsigned32 => Self::MulUpperUnsignedUnsigned32,
4029            RegRegKind::MulUpperUnsignedUnsigned64 => Self::MulUpperUnsignedUnsigned64,
4030            RegRegKind::MulUpperSignedUnsigned32 => Self::MulUpperSignedUnsigned32,
4031            RegRegKind::MulUpperSignedUnsigned64 => Self::MulUpperSignedUnsigned64,
4032            RegRegKind::Div32 => Self::Div32,
4033            RegRegKind::Div32AndSignExtend => Self::Div32AndSignExtend,
4034            RegRegKind::Div64 => Self::Div64,
4035            RegRegKind::DivUnsigned32 => Self::DivUnsigned32,
4036            RegRegKind::DivUnsigned32AndSignExtend => Self::DivUnsigned32AndSignExtend,
4037            RegRegKind::DivUnsigned64 => Self::DivUnsigned64,
4038            RegRegKind::Rem32 => Self::Rem32,
4039            RegRegKind::Rem32AndSignExtend => Self::Rem32AndSignExtend,
4040            RegRegKind::Rem64 => Self::Rem64,
4041            RegRegKind::RemUnsigned32 => Self::RemUnsigned32,
4042            RegRegKind::RemUnsigned32AndSignExtend => Self::RemUnsigned32AndSignExtend,
4043            RegRegKind::RemUnsigned64 => Self::RemUnsigned64,
4044            RegRegKind::AndInverted => Self::AndInverted,
4045            RegRegKind::OrInverted => Self::OrInverted,
4046            RegRegKind::Xnor => Self::Xnor,
4047            RegRegKind::Maximum => Self::Maximum,
4048            RegRegKind::MaximumUnsigned => Self::MaximumUnsigned,
4049            RegRegKind::Minimum => Self::Minimum,
4050            RegRegKind::MinimumUnsigned => Self::MinimumUnsigned,
4051            RegRegKind::RotateLeft32 => Self::RotateLeft32,
4052            RegRegKind::RotateLeft32AndSignExtend => Self::RotateLeft32AndSignExtend,
4053            RegRegKind::RotateLeft64 => Self::RotateLeft64,
4054        }
4055    }
4056}
4057
4058impl From<BranchKind> for OperationKind {
4059    fn from(kind: BranchKind) -> Self {
4060        match kind {
4061            BranchKind::Eq32 => Self::Eq32,
4062            BranchKind::Eq64 => Self::Eq64,
4063            BranchKind::NotEq32 => Self::NotEq32,
4064            BranchKind::NotEq64 => Self::NotEq64,
4065            BranchKind::LessSigned32 => Self::SetLessThanSigned32,
4066            BranchKind::LessSigned64 => Self::SetLessThanSigned64,
4067            BranchKind::GreaterOrEqualSigned32 => Self::SetGreaterOrEqualSigned32,
4068            BranchKind::GreaterOrEqualSigned64 => Self::SetGreaterOrEqualSigned64,
4069            BranchKind::LessUnsigned32 => Self::SetLessThanUnsigned32,
4070            BranchKind::LessUnsigned64 => Self::SetLessThanUnsigned64,
4071            BranchKind::GreaterOrEqualUnsigned32 => Self::SetGreaterOrEqualUnsigned32,
4072            BranchKind::GreaterOrEqualUnsigned64 => Self::SetGreaterOrEqualUnsigned64,
4073        }
4074    }
4075}
4076
4077impl OperationKind {
4078    #[rustfmt::skip]
4079    fn apply_const(self, lhs: i64, rhs: i64) -> i64 {
4080        use polkavm_common::operation::*;
4081        macro_rules! op32 {
4082            (|$lhs:ident, $rhs:ident| $e:expr) => {{
4083                let $lhs: i32 = lhs.try_into().expect("operand overflow");
4084                let $rhs: i32 = rhs.try_into().expect("operand overflow");
4085                let out: i32 = $e;
4086                cast(out).to_i64_sign_extend()
4087            }};
4088        }
4089
4090        macro_rules! op32_on_64 {
4091            (|$lhs:ident, $rhs:ident| $e:expr) => {{
4092                let $lhs: u64 = cast($lhs).to_unsigned();
4093                let $lhs: u32 = cast($lhs).truncate_to_u32();
4094                let $lhs: i32 = cast($lhs).to_signed();
4095                let $rhs: u64 = cast($rhs).to_unsigned();
4096                let $rhs: u32 = cast($rhs).truncate_to_u32();
4097                let $rhs: i32 = cast($rhs).to_signed();
4098                let out: i32 = $e;
4099                cast(out).to_i64_sign_extend()
4100            }};
4101        }
4102
4103        match self {
4104            Self::Add32 => {
4105                op32!(|lhs, rhs| lhs.wrapping_add(rhs))
4106            }
4107            Self::Add32AndSignExtend => {
4108                op32_on_64!(|lhs, rhs| lhs.wrapping_add(rhs))
4109            }
4110            Self::Add64 => {
4111                lhs.wrapping_add(rhs)
4112            },
4113            Self::And32 => {
4114                op32!(|lhs, rhs| lhs & rhs)
4115            }
4116            Self::And64 => {
4117                lhs & rhs
4118            },
4119            Self::Div32 => {
4120                op32!(|lhs, rhs| div(lhs, rhs))
4121            }
4122            Self::Div32AndSignExtend => {
4123                op32_on_64!(|lhs, rhs| div(lhs, rhs))
4124            }
4125            Self::Div64 => {
4126                div64(lhs, rhs)
4127            },
4128            Self::DivUnsigned32 => {
4129                op32!(|lhs, rhs| cast(divu(cast(lhs).to_unsigned(), cast(rhs).to_unsigned())).to_signed())
4130            }
4131            Self::DivUnsigned32AndSignExtend => {
4132                op32_on_64!(|lhs, rhs| cast(divu(cast(lhs).to_unsigned(), cast(rhs).to_unsigned())).to_signed())
4133            }
4134            Self::DivUnsigned64 => {
4135                cast(divu64(cast(lhs).to_unsigned(), cast(rhs).to_unsigned())).to_signed()
4136            },
4137            Self::Eq32 => {
4138                op32!(|lhs, rhs| i32::from(lhs == rhs))
4139            }
4140            Self::Eq64 => {
4141                i64::from(lhs == rhs)
4142            },
4143            Self::Mul32 => {
4144                op32!(|lhs, rhs| lhs.wrapping_mul(rhs))
4145            }
4146            Self::Mul32AndSignExtend => {
4147                op32_on_64!(|lhs, rhs| lhs.wrapping_mul(rhs))
4148            }
4149            Self::Mul64 => {
4150                lhs.wrapping_mul(rhs)
4151            },
4152            Self::MulUpperSignedSigned32 => {
4153                op32!(|lhs, rhs| mulh(lhs, rhs))
4154            },
4155            Self::MulUpperSignedSigned64 => {
4156                mulh64(lhs, rhs)
4157            },
4158            Self::MulUpperSignedUnsigned32 => {
4159                op32!(|lhs, rhs| mulhsu(lhs, cast(rhs).to_unsigned()))
4160            },
4161            Self::MulUpperSignedUnsigned64 => {
4162                mulhsu64(lhs, cast(rhs).to_unsigned())
4163            },
4164            Self::MulUpperUnsignedUnsigned32 => {
4165                op32!(|lhs, rhs| cast(mulhu(cast(lhs).to_unsigned(), cast(rhs).to_unsigned())).to_signed())
4166            },
4167            Self::MulUpperUnsignedUnsigned64 => {
4168                cast(mulhu64(cast(lhs).to_unsigned(), cast(rhs).to_unsigned())).to_signed()
4169            },
4170            Self::NotEq32 => {
4171                op32!(|lhs, rhs| i32::from(lhs != rhs))
4172            },
4173            Self::NotEq64 => {
4174                i64::from(lhs != rhs)
4175            },
4176            Self::Or32 => {
4177                op32!(|lhs, rhs| lhs | rhs)
4178            },
4179            Self::Or64 => {
4180                lhs | rhs
4181            },
4182            Self::Rem32 => {
4183                op32!(|lhs, rhs| rem(lhs, rhs))
4184            },
4185            Self::Rem32AndSignExtend => {
4186                op32_on_64!(|lhs, rhs| rem(lhs, rhs))
4187            },
4188            Self::Rem64 => {
4189                rem64(lhs, rhs)
4190            },
4191            Self::RemUnsigned32 => {
4192                op32!(|lhs, rhs| cast(remu(cast(lhs).to_unsigned(), cast(rhs).to_unsigned())).to_signed())
4193            },
4194            Self::RemUnsigned32AndSignExtend => {
4195                op32_on_64!(|lhs, rhs| cast(remu(cast(lhs).to_unsigned(), cast(rhs).to_unsigned())).to_signed())
4196            }
4197            Self::RemUnsigned64 => {
4198                remu64(cast(lhs).to_unsigned(), cast(rhs).to_unsigned()) as i64
4199            },
4200            Self::SetGreaterOrEqualSigned32 => {
4201                op32!(|lhs, rhs| i32::from(lhs >= rhs))
4202            },
4203            Self::SetGreaterOrEqualSigned64 => {
4204                i64::from(lhs >= rhs)
4205            },
4206            Self::SetGreaterOrEqualUnsigned32 => {
4207                op32!(|lhs, rhs| i32::from(cast(lhs).to_unsigned() >= cast(rhs).to_unsigned()))
4208            },
4209            Self::SetGreaterOrEqualUnsigned64 => {
4210                i64::from(cast(lhs).to_unsigned() >= cast(rhs).to_unsigned())
4211            },
4212            Self::SetLessThanSigned32 => {
4213                op32!(|lhs, rhs| i32::from(lhs < rhs))
4214            },
4215            Self::SetLessThanSigned64 => {
4216                i64::from(lhs < rhs)
4217            },
4218            Self::SetLessThanUnsigned32 => {
4219                op32!(|lhs, rhs| i32::from(cast(lhs).to_unsigned() < cast(rhs).to_unsigned()))
4220            },
4221            Self::SetLessThanUnsigned64 => {
4222                i64::from((lhs as u64) < (rhs as u64))
4223            },
4224            Self::ShiftArithmeticRight32 => {
4225                op32!(|lhs, rhs| lhs.wrapping_shr(cast(rhs).to_unsigned()))
4226            },
4227            Self::ShiftArithmeticRight32AndSignExtend => {
4228                op32_on_64!(|lhs, rhs| lhs.wrapping_shr(cast(rhs).to_unsigned()))
4229            },
4230            Self::ShiftArithmeticRight64 => {
4231                let rhs = cast(rhs).to_unsigned();
4232                let rhs = cast(rhs).truncate_to_u32();
4233                lhs.wrapping_shr(rhs)
4234            },
4235            Self::ShiftLogicalLeft32 => {
4236                op32!(|lhs, rhs| lhs.wrapping_shl(cast(rhs).to_unsigned()))
4237            },
4238            Self::ShiftLogicalLeft32AndSignExtend => {
4239                op32_on_64!(|lhs, rhs| lhs.wrapping_shl(cast(rhs).to_unsigned()))
4240            },
4241            Self::ShiftLogicalLeft64 => {
4242                let rhs = cast(rhs).to_unsigned();
4243                let rhs = cast(rhs).truncate_to_u32();
4244                (lhs as u64).wrapping_shl(rhs) as i64
4245            },
4246            Self::ShiftLogicalRight32 => {
4247                op32!(|lhs, rhs| cast(cast(lhs).to_unsigned().wrapping_shr(cast(rhs).to_unsigned())).to_signed())
4248            },
4249            Self::ShiftLogicalRight32AndSignExtend => {
4250                op32_on_64!(|lhs, rhs| cast(cast(lhs).to_unsigned().wrapping_shr(cast(rhs).to_unsigned())).to_signed())
4251            }
4252            Self::ShiftLogicalRight64 => {
4253                (lhs as u64).wrapping_shr(rhs as u32) as i64
4254            },
4255            Self::Sub32 => {
4256                op32!(|lhs, rhs| lhs.wrapping_sub(rhs))
4257            },
4258            Self::Sub32AndSignExtend => {
4259                op32_on_64!(|lhs, rhs| lhs.wrapping_sub(rhs))
4260            },
4261            Self::Sub64 => {
4262                lhs.wrapping_sub(rhs)
4263            },
4264            Self::Xor32 => {
4265                op32!(|lhs, rhs| lhs ^ rhs)
4266            },
4267            Self::Xor64 => {
4268                lhs ^ rhs
4269            },
4270            //
4271            // Zbb instructions
4272            //
4273            Self::AndInverted => lhs & (!rhs),
4274            Self::OrInverted => lhs | (!rhs),
4275            Self::Xnor => !(lhs ^ rhs),
4276            Self::Maximum => lhs.max(rhs),
4277            Self::MaximumUnsigned => (lhs as u64).max(rhs as u64) as i64,
4278            Self::Minimum => lhs.min(rhs),
4279            Self::MinimumUnsigned => (lhs as u64).min(rhs as u64) as i64,
4280            Self::RotateLeft32 => {
4281                op32!(|lhs, rhs| lhs.rotate_left(rhs as u32))
4282            },
4283            Self::RotateLeft32AndSignExtend => {
4284                op32_on_64!(|lhs, rhs| lhs.rotate_left(rhs as u32))
4285            },
4286            Self::RotateLeft64 => {
4287                let rhs = cast(rhs).to_unsigned();
4288                let rhs = cast(rhs).truncate_to_u32();
4289                lhs.rotate_left(rhs)
4290            },
4291            Self::RotateRight32 => {
4292                op32!(|lhs, rhs| lhs.rotate_right(rhs as u32))
4293            },
4294            Self::RotateRight32AndSignExtend => {
4295                op32_on_64!(|lhs, rhs| lhs.rotate_right(rhs as u32))
4296            },
4297            Self::RotateRight64 => {
4298                let rhs = cast(rhs).to_unsigned();
4299                let rhs = cast(rhs).truncate_to_u32();
4300                lhs.rotate_right(rhs)
4301            },
4302        }
4303    }
4304
4305    fn apply<H>(self, elf: &Elf<H>, lhs: RegValue, rhs: RegValue) -> Option<RegValue>
4306    where
4307        H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
4308    {
4309        use OperationKind as O;
4310        use RegValue::Constant as C;
4311
4312        #[rustfmt::skip]
4313        let value = match (self, lhs, rhs) {
4314            (_, C(lhs), C(rhs)) => {
4315                C(self.apply_const(lhs, rhs))
4316            },
4317            (O::Add32, RegValue::DataAddress(lhs), C(rhs)) => {
4318                let offset = cast(cast(lhs.offset).to_signed().wrapping_add(rhs)).to_unsigned();
4319                if offset <= elf.section_by_index(lhs.section_index).size() {
4320                    RegValue::DataAddress(SectionTarget {
4321                        section_index: lhs.section_index,
4322                        offset,
4323                    })
4324                } else {
4325                    return None;
4326                }
4327            },
4328            (O::Sub32, RegValue::DataAddress(lhs), C(rhs)) => {
4329                let offset = cast(lhs.offset).to_signed().wrapping_sub(rhs);
4330                if offset >= 0 {
4331                    RegValue::DataAddress(SectionTarget {
4332                        section_index: lhs.section_index,
4333                        offset: cast(offset).to_unsigned(),
4334                    })
4335                } else {
4336                    return None;
4337                }
4338            },
4339
4340            // (x == x) = 1
4341            (O::Eq32,                   lhs, rhs) if lhs == rhs => C(1),
4342            (O::Eq64,                   lhs, rhs) if lhs == rhs => C(1),
4343            // (x != x) = 0
4344            (O::NotEq32,                lhs, rhs) if lhs == rhs => C(0),
4345            (O::NotEq64,                lhs, rhs) if lhs == rhs => C(0),
4346            // x & x = x
4347            (O::And32,                  lhs, rhs) if lhs == rhs => lhs,
4348            (O::And64,                  lhs, rhs) if lhs == rhs => lhs,
4349            // x | x = x
4350            (O::Or32,                   lhs, rhs) if lhs == rhs => lhs,
4351            (O::Or64,                   lhs, rhs) if lhs == rhs => lhs,
4352
4353            // x + 0 = x
4354            (O::Add32,                  lhs, C(0)) => lhs,
4355            (O::Add64,                  lhs, C(0)) => lhs,
4356            // 0 + x = x
4357            (O::Add32,                  C(0), rhs) => rhs,
4358            (O::Add64,                  C(0), rhs) => rhs,
4359            // x | 0 = x
4360            (O::Or32,                   lhs, C(0)) => lhs,
4361            (O::Or64,                   lhs, C(0)) => lhs,
4362            // 0 | x = x
4363            (O::Or32,                   C(0), rhs) => rhs,
4364            (O::Or64,                   C(0), rhs) => rhs,
4365            // x ^ 0 = x
4366            (O::Xor32,                  lhs, C(0)) => lhs,
4367            (O::Xor64,                  lhs, C(0)) => lhs,
4368            // 0 ^ x = x
4369            (O::Xor32,                  C(0), rhs) => rhs,
4370            (O::Xor64,                  C(0), rhs) => rhs,
4371
4372            // x - 0 = x
4373            (O::Sub32,                  lhs, C(0)) => lhs,
4374            (O::Sub64,                  lhs, C(0)) => lhs,
4375            // x << 0 = x
4376            (O::ShiftLogicalLeft32,     lhs, C(0)) => lhs,
4377            (O::ShiftLogicalLeft64,     lhs, C(0)) => lhs,
4378            // x >> 0 = x
4379            (O::ShiftLogicalRight32,    lhs, C(0)) => lhs,
4380            (O::ShiftLogicalRight64,    lhs, C(0)) => lhs,
4381            // x >> 0 = x
4382            (O::ShiftArithmeticRight32, lhs, C(0)) => lhs,
4383            (O::ShiftArithmeticRight64, lhs, C(0)) => lhs,
4384            // x % 0 = x
4385            (O::Rem32,                          lhs, C(0)) => lhs,
4386            (O::Rem64,                          lhs, C(0)) => lhs,
4387            (O::RemUnsigned32,                  lhs, C(0)) => lhs,
4388            (O::RemUnsigned64,                  lhs, C(0)) => lhs,
4389            (O::Rem32AndSignExtend,             lhs, C(0)) => lhs,
4390            (O::RemUnsigned32AndSignExtend,     lhs, C(0)) => lhs,
4391            // 0 % x = 0
4392            (O::Rem32,                          C(0), _) => C(0),
4393            (O::Rem64,                          C(0), _) => C(0),
4394            (O::RemUnsigned32,                  C(0), _) => C(0),
4395            (O::RemUnsigned64,                  C(0), _) => C(0),
4396            (O::Rem32AndSignExtend,             C(0), _) => C(0),
4397            (O::RemUnsigned32AndSignExtend,     C(0), _) => C(0),
4398
4399            // x & 0 = 0
4400            (O::And32,                    _, C(0)) => C(0),
4401            (O::And64,                    _, C(0)) => C(0),
4402            // 0 & x = 0
4403            (O::And32,                    C(0), _) => C(0),
4404            (O::And64,                    C(0), _) => C(0),
4405            // x * 0 = 0
4406            (O::Mul32,                    _, C(0)) => C(0),
4407            (O::Mul64,                    _, C(0)) => C(0),
4408            (O::MulUpperSignedSigned32,   _, C(0)) => C(0),
4409            (O::MulUpperSignedSigned64,   _, C(0)) => C(0),
4410            (O::MulUpperSignedUnsigned32, _, C(0)) => C(0),
4411            (O::MulUpperSignedUnsigned64, _, C(0)) => C(0),
4412            (O::MulUpperUnsignedUnsigned32, _, C(0)) => C(0),
4413            (O::MulUpperUnsignedUnsigned64, _, C(0)) => C(0),
4414            // 0 * x = 0
4415            (O::Mul32,                    C(0), _) => C(0),
4416            (O::Mul64,                    C(0), _) => C(0),
4417            (O::MulUpperSignedSigned32,   C(0), _) => C(0),
4418            (O::MulUpperSignedSigned64,   C(0), _) => C(0),
4419            (O::MulUpperSignedUnsigned32, C(0), _) => C(0),
4420            (O::MulUpperSignedUnsigned64, C(0), _) => C(0),
4421            (O::MulUpperUnsignedUnsigned32, C(0), _) => C(0),
4422            (O::MulUpperUnsignedUnsigned64, C(0), _) => C(0),
4423
4424            // x / 0 = -1
4425            (O::Div32,                          _, C(0)) => C(-1),
4426            (O::Div64,                          _, C(0)) => C(-1),
4427            (O::DivUnsigned32,                  _, C(0)) => C(-1),
4428            (O::DivUnsigned64,                  _, C(0)) => C(-1),
4429            (O::Div32AndSignExtend,             _, C(0)) => C(-1),
4430            (O::DivUnsigned32AndSignExtend,     _, C(0)) => C(-1),
4431
4432            // 0 / x = 0
4433            (O::Div32,                          C(0), _) => C(0),
4434            (O::Div64,                          C(0), _) => C(0),
4435            (O::DivUnsigned32,                  C(0), _) => C(0),
4436            (O::DivUnsigned64,                  C(0), _) => C(0),
4437            (O::Div32AndSignExtend,             C(0), _) => C(0),
4438            (O::DivUnsigned32AndSignExtend,     C(0), _) => C(0),
4439
4440            // (x & ~0) = x
4441            (O::AndInverted,              lhs, C(0)) => lhs,
4442            // (0 & ~x) = 0
4443            (O::AndInverted,              C(0), _) => C(0),
4444
4445            // (x | ~0) = -1
4446            (O::OrInverted,               _, C(0)) => C(-1),
4447
4448            // unsigned_max(0, x) = x
4449            (O::MaximumUnsigned,          C(0), rhs) => rhs,
4450            (O::MaximumUnsigned,          lhs, C(0)) => lhs,
4451
4452            // unsigned min(0, x) = 0
4453            (O::MinimumUnsigned,          C(0), _) => C(0),
4454            (O::MinimumUnsigned,          _, C(0)) => C(0),
4455
4456            // x <<r 0 = x
4457            (O::RotateLeft32,             lhs, C(0)) => lhs,
4458            (O::RotateLeft32,             C(0), _) => C(0),
4459            (O::RotateLeft64,             lhs, C(0)) => lhs,
4460            (O::RotateLeft64,             C(0), _) => C(0),
4461
4462            // x >>r 0 = x
4463            (O::RotateRight32,            lhs, C(0)) => lhs,
4464            (O::RotateRight32,            C(0), _) => C(0),
4465            (O::RotateRight64,            lhs, C(0)) => lhs,
4466            (O::RotateRight64,            C(0), _) => C(0),
4467
4468            // (0 <<r 0) or (0 >>r 0) = 0
4469            (O::RotateLeft32AndSignExtend,  C(0), _) => C(0),
4470            (O::RotateRight32AndSignExtend, C(0), _) => C(0),
4471
4472            _ => return None,
4473        };
4474
4475        Some(value)
4476    }
4477}
4478
4479#[derive(Copy, Clone, PartialEq, Eq, Debug)]
4480enum RegValue {
4481    InputReg {
4482        reg: Reg,
4483        source_block: BlockTarget,
4484        bits_used: u64,
4485    },
4486    CodeAddress(BlockTarget),
4487    DataAddress(SectionTarget),
4488    Constant(i64),
4489    OutputReg {
4490        reg: Reg,
4491        source_block: BlockTarget,
4492        bits_used: u64,
4493    },
4494    Unknown {
4495        unique: u64,
4496        bits_used: u64,
4497    },
4498}
4499
4500impl RegValue {
4501    fn to_instruction(self, dst: Reg, is_rv64: bool) -> Option<BasicInst<AnyTarget>> {
4502        match self {
4503            RegValue::CodeAddress(target) => Some(BasicInst::LoadAddress {
4504                dst,
4505                target: AnyTarget::Code(target),
4506            }),
4507            RegValue::DataAddress(target) => Some(BasicInst::LoadAddress {
4508                dst,
4509                target: AnyTarget::Data(target),
4510            }),
4511            RegValue::Constant(imm) => {
4512                if let Ok(imm) = i32::try_from(imm) {
4513                    Some(BasicInst::LoadImmediate { dst, imm })
4514                } else {
4515                    assert!(is_rv64, "64-bit register value on 32-bit target");
4516                    Some(BasicInst::LoadImmediate64 { dst, imm })
4517                }
4518            }
4519            _ => None,
4520        }
4521    }
4522
4523    fn bits_used(self) -> u64 {
4524        match self {
4525            RegValue::CodeAddress(..) | RegValue::DataAddress(..) => u64::from(u32::MAX),
4526            RegValue::Constant(value) => value as u64,
4527            RegValue::Unknown { bits_used, .. } | RegValue::InputReg { bits_used, .. } | RegValue::OutputReg { bits_used, .. } => bits_used,
4528        }
4529    }
4530}
4531
4532#[derive(Clone, PartialEq, Eq)]
4533struct BlockRegs {
4534    bitness: Bitness,
4535    regs: [RegValue; Reg::ALL.len()],
4536}
4537
4538#[deny(clippy::as_conversions)]
4539impl BlockRegs {
4540    fn new_input(bitness: Bitness, source_block: BlockTarget) -> Self {
4541        BlockRegs {
4542            bitness,
4543            regs: Reg::ALL.map(|reg| RegValue::InputReg {
4544                reg,
4545                source_block,
4546                bits_used: bitness.bits_used_mask(),
4547            }),
4548        }
4549    }
4550
4551    fn new_output(bitness: Bitness, source_block: BlockTarget) -> Self {
4552        BlockRegs {
4553            bitness,
4554            regs: Reg::ALL.map(|reg| RegValue::OutputReg {
4555                reg,
4556                source_block,
4557                bits_used: bitness.bits_used_mask(),
4558            }),
4559        }
4560    }
4561
4562    fn get_reg(&self, reg: impl Into<RegImm>) -> RegValue {
4563        match reg.into() {
4564            RegImm::Imm(imm) => RegValue::Constant(cast(imm).to_i64_sign_extend()),
4565            RegImm::Reg(reg) => self.regs[reg.to_usize()],
4566        }
4567    }
4568
4569    fn set_reg(&mut self, reg: Reg, value: RegValue) {
4570        self.regs[reg.to_usize()] = value;
4571    }
4572
4573    fn simplify_control_instruction<H>(
4574        &self,
4575        elf: &Elf<H>,
4576        instruction: ControlInst<BlockTarget>,
4577    ) -> Option<(Option<BasicInst<AnyTarget>>, ControlInst<BlockTarget>)>
4578    where
4579        H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
4580    {
4581        match instruction {
4582            ControlInst::JumpIndirect { base, offset: 0 } => {
4583                if let RegValue::CodeAddress(target) = self.get_reg(base) {
4584                    return Some((None, ControlInst::Jump { target }));
4585                }
4586            }
4587            ControlInst::Branch {
4588                kind,
4589                src1,
4590                src2,
4591                target_true,
4592                target_false,
4593            } => {
4594                if target_true == target_false {
4595                    return Some((None, ControlInst::Jump { target: target_true }));
4596                }
4597
4598                let src1_value = self.get_reg(src1);
4599                let src2_value = self.get_reg(src2);
4600                if let Some(value) = OperationKind::from(kind).apply(elf, src1_value, src2_value) {
4601                    match value {
4602                        RegValue::Constant(0) => {
4603                            return Some((None, ControlInst::Jump { target: target_false }));
4604                        }
4605                        RegValue::Constant(1) => {
4606                            return Some((None, ControlInst::Jump { target: target_true }));
4607                        }
4608                        _ => unreachable!("internal error: constant evaluation of branch operands returned a non-boolean value"),
4609                    }
4610                }
4611
4612                if let RegImm::Reg(_) = src1 {
4613                    if let RegValue::Constant(src1_value) = src1_value {
4614                        if let Ok(src1_value) = src1_value.try_into() {
4615                            return Some((
4616                                None,
4617                                ControlInst::Branch {
4618                                    kind,
4619                                    src1: RegImm::Imm(src1_value),
4620                                    src2,
4621                                    target_true,
4622                                    target_false,
4623                                },
4624                            ));
4625                        }
4626                    }
4627                }
4628
4629                if let RegImm::Reg(_) = src2 {
4630                    if let RegValue::Constant(src2_value) = src2_value {
4631                        if let Ok(src2_value) = src2_value.try_into() {
4632                            return Some((
4633                                None,
4634                                ControlInst::Branch {
4635                                    kind,
4636                                    src1,
4637                                    src2: RegImm::Imm(src2_value),
4638                                    target_true,
4639                                    target_false,
4640                                },
4641                            ));
4642                        }
4643                    }
4644                }
4645            }
4646            ControlInst::CallIndirect {
4647                ra,
4648                base,
4649                offset: 0,
4650                target_return,
4651            } => {
4652                if let RegValue::CodeAddress(target) = self.get_reg(base) {
4653                    let instruction_1 = BasicInst::LoadAddress {
4654                        dst: ra,
4655                        target: AnyTarget::Code(target_return),
4656                    };
4657                    let instruction_2 = ControlInst::Jump { target };
4658                    return Some((Some(instruction_1), instruction_2));
4659                }
4660            }
4661            _ => {}
4662        }
4663
4664        None
4665    }
4666
4667    fn simplify_instruction<H>(&self, elf: &Elf<H>, instruction: BasicInst<AnyTarget>) -> Option<BasicInst<AnyTarget>>
4668    where
4669        H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
4670    {
4671        let is_rv64 = self.bitness == Bitness::B64;
4672
4673        match instruction {
4674            BasicInst::RegReg { kind, dst, src1, src2 } => {
4675                let src1_value = self.get_reg(src1);
4676                let src2_value = self.get_reg(src2);
4677                if let Some(value) = OperationKind::from(kind).apply(elf, src1_value, src2_value) {
4678                    if let Some(new_instruction) = value.to_instruction(dst, is_rv64) {
4679                        if new_instruction != instruction {
4680                            return Some(new_instruction);
4681                        }
4682                    }
4683                }
4684            }
4685            BasicInst::AnyAny { kind, dst, src1, src2 } => {
4686                let src1_value = self.get_reg(src1);
4687                let src2_value = self.get_reg(src2);
4688                if let Some(value) = OperationKind::from(kind).apply(elf, src1_value, src2_value) {
4689                    if let Some(new_instruction) = value.to_instruction(dst, is_rv64) {
4690                        if new_instruction != instruction {
4691                            return Some(new_instruction);
4692                        }
4693                    }
4694                }
4695
4696                if let RegImm::Reg(_) = src1 {
4697                    if let RegValue::Constant(src1_value) = src1_value {
4698                        if let Ok(src1_value) = src1_value.try_into() {
4699                            return Some(BasicInst::AnyAny {
4700                                kind,
4701                                dst,
4702                                src1: RegImm::Imm(src1_value),
4703                                src2,
4704                            });
4705                        }
4706                    }
4707                }
4708
4709                if let RegImm::Reg(_) = src2 {
4710                    if let RegValue::Constant(src2_value) = src2_value {
4711                        if let Ok(src2_value) = src2_value.try_into() {
4712                            return Some(BasicInst::AnyAny {
4713                                kind,
4714                                dst,
4715                                src1,
4716                                src2: RegImm::Imm(src2_value),
4717                            });
4718                        }
4719                    }
4720                }
4721
4722                if matches!(kind, AnyAnyKind::Add32 | AnyAnyKind::Add64) {
4723                    if src1_value == RegValue::Constant(0) {
4724                        if let RegImm::Reg(src) = src2 {
4725                            return Some(BasicInst::MoveReg { dst, src });
4726                        }
4727                    } else if src2_value == RegValue::Constant(0) {
4728                        if let RegImm::Reg(src) = src1 {
4729                            return Some(BasicInst::MoveReg { dst, src });
4730                        }
4731                    }
4732                }
4733
4734                if matches!(kind, AnyAnyKind::Add32 | AnyAnyKind::Add64)
4735                    && src1_value != RegValue::Constant(0)
4736                    && src2_value != RegValue::Constant(0)
4737                    && (src1_value.bits_used() & src2_value.bits_used()) == 0
4738                {
4739                    // Replace an `add` with an `or` if it's safe to do so.
4740                    //
4741                    // Curiously LLVM's RISC-V backend doesn't do this even though its AMD64 backend does.
4742                    return Some(BasicInst::AnyAny {
4743                        kind: match kind {
4744                            AnyAnyKind::Add32 => AnyAnyKind::Or32,
4745                            AnyAnyKind::Add64 => AnyAnyKind::Or64,
4746                            _ => unreachable!(),
4747                        },
4748                        dst,
4749                        src1,
4750                        src2,
4751                    });
4752                }
4753            }
4754            BasicInst::Cmov {
4755                kind,
4756                dst,
4757                src: RegImm::Reg(src),
4758                cond,
4759            } => {
4760                if let RegValue::Constant(src_value) = self.get_reg(src) {
4761                    if let Ok(src_value) = src_value.try_into() {
4762                        return Some(BasicInst::Cmov {
4763                            kind,
4764                            dst,
4765                            src: RegImm::Imm(src_value),
4766                            cond,
4767                        });
4768                    }
4769                }
4770            }
4771            BasicInst::LoadIndirect { kind, dst, base, offset } => {
4772                if let RegValue::DataAddress(base) = self.get_reg(base) {
4773                    return Some(BasicInst::LoadAbsolute {
4774                        kind,
4775                        dst,
4776                        target: base.map_offset_i64(|base| base.wrapping_add(cast(offset).to_i64_sign_extend())),
4777                    });
4778                }
4779            }
4780            BasicInst::LoadAddressIndirect { dst, target } => {
4781                return Some(BasicInst::LoadAddress { dst, target });
4782            }
4783            BasicInst::StoreIndirect { kind, src, base, offset } => {
4784                if let RegValue::DataAddress(base) = self.get_reg(base) {
4785                    return Some(BasicInst::StoreAbsolute {
4786                        kind,
4787                        src,
4788                        target: base.map_offset_i64(|base| base.wrapping_add(cast(offset).to_i64_sign_extend())),
4789                    });
4790                }
4791
4792                if let RegImm::Reg(src) = src {
4793                    if let RegValue::Constant(src_value) = self.get_reg(src) {
4794                        if let Ok(src_value) = src_value.try_into() {
4795                            return Some(BasicInst::StoreIndirect {
4796                                kind,
4797                                src: RegImm::Imm(src_value),
4798                                base,
4799                                offset,
4800                            });
4801                        }
4802                    }
4803                }
4804            }
4805            BasicInst::StoreAbsolute {
4806                kind,
4807                src: RegImm::Reg(src),
4808                target,
4809            } => {
4810                if let RegValue::Constant(src_value) = self.get_reg(src) {
4811                    if let Ok(src_value) = src_value.try_into() {
4812                        return Some(BasicInst::StoreAbsolute {
4813                            kind,
4814                            src: RegImm::Imm(src_value),
4815                            target,
4816                        });
4817                    }
4818                }
4819            }
4820            BasicInst::MoveReg { dst, src } => {
4821                if dst == src {
4822                    return Some(BasicInst::Nop);
4823                }
4824            }
4825            _ => {}
4826        }
4827
4828        None
4829    }
4830
4831    fn set_reg_unknown(&mut self, dst: Reg, unknown_counter: &mut u64, bits_used: u64) {
4832        let bits_used_masked = bits_used & self.bitness.bits_used_mask();
4833        if bits_used_masked == 0 {
4834            self.set_reg(dst, RegValue::Constant(0));
4835            return;
4836        }
4837
4838        self.set_reg(
4839            dst,
4840            RegValue::Unknown {
4841                unique: *unknown_counter,
4842                bits_used: bits_used_masked,
4843            },
4844        );
4845        *unknown_counter += 1;
4846    }
4847
4848    fn set_reg_from_control_instruction(&mut self, imports: &[Import], unknown_counter: &mut u64, instruction: ControlInst<BlockTarget>) {
4849        #[allow(clippy::single_match)]
4850        match instruction {
4851            ControlInst::CallIndirect { ra, target_return, .. } => {
4852                let implicit_instruction = BasicInst::LoadAddress {
4853                    dst: ra,
4854                    target: AnyTarget::Code(target_return),
4855                };
4856                self.set_reg_from_instruction(imports, unknown_counter, implicit_instruction);
4857            }
4858            _ => {}
4859        }
4860    }
4861
4862    fn set_reg_from_instruction(&mut self, imports: &[Import], unknown_counter: &mut u64, instruction: BasicInst<AnyTarget>) {
4863        match instruction {
4864            BasicInst::LoadImmediate { dst, imm } => {
4865                self.set_reg(dst, RegValue::Constant(cast(imm).to_i64_sign_extend()));
4866            }
4867            BasicInst::LoadImmediate64 { dst, imm } => {
4868                self.set_reg(dst, RegValue::Constant(imm));
4869            }
4870            BasicInst::LoadAddress {
4871                dst,
4872                target: AnyTarget::Code(target),
4873            }
4874            | BasicInst::LoadAddressIndirect {
4875                dst,
4876                target: AnyTarget::Code(target),
4877            } => {
4878                self.set_reg(dst, RegValue::CodeAddress(target));
4879            }
4880            BasicInst::LoadAddress {
4881                dst,
4882                target: AnyTarget::Data(target),
4883            }
4884            | BasicInst::LoadAddressIndirect {
4885                dst,
4886                target: AnyTarget::Data(target),
4887            } => {
4888                self.set_reg(dst, RegValue::DataAddress(target));
4889            }
4890            BasicInst::MoveReg { dst, src } => {
4891                self.set_reg(dst, self.get_reg(src));
4892            }
4893            BasicInst::AnyAny {
4894                kind: AnyAnyKind::Add32 | AnyAnyKind::Add64 | AnyAnyKind::Or32 | AnyAnyKind::Or64,
4895                dst,
4896                src1,
4897                src2: RegImm::Imm(0),
4898            } => {
4899                self.set_reg(dst, self.get_reg(src1));
4900            }
4901            BasicInst::AnyAny {
4902                kind: AnyAnyKind::Add32 | AnyAnyKind::Add64 | AnyAnyKind::Or32 | AnyAnyKind::Or64,
4903                dst,
4904                src1: RegImm::Imm(0),
4905                src2,
4906            } => {
4907                self.set_reg(dst, self.get_reg(src2));
4908            }
4909            BasicInst::AnyAny {
4910                kind: AnyAnyKind::Add32 | AnyAnyKind::Add64,
4911                dst,
4912                src1,
4913                src2,
4914            } => {
4915                let src1_value = self.get_reg(src1);
4916                let src2_value = self.get_reg(src2);
4917                let bits_used =
4918                    src1_value.bits_used() | src2_value.bits_used() | (src1_value.bits_used() << 1) | (src2_value.bits_used() << 1);
4919
4920                self.set_reg_unknown(dst, unknown_counter, bits_used);
4921            }
4922            BasicInst::AnyAny {
4923                kind: AnyAnyKind::And32 | AnyAnyKind::And64,
4924                dst,
4925                src1,
4926                src2,
4927            } => {
4928                let src1_value = self.get_reg(src1);
4929                let src2_value = self.get_reg(src2);
4930                let bits_used = src1_value.bits_used() & src2_value.bits_used();
4931                self.set_reg_unknown(dst, unknown_counter, bits_used);
4932            }
4933            BasicInst::AnyAny {
4934                kind: AnyAnyKind::Or32 | AnyAnyKind::Or64,
4935                dst,
4936                src1,
4937                src2,
4938            } => {
4939                let src1_value = self.get_reg(src1);
4940                let src2_value = self.get_reg(src2);
4941                let bits_used = src1_value.bits_used() | src2_value.bits_used();
4942                self.set_reg_unknown(dst, unknown_counter, bits_used);
4943            }
4944            BasicInst::AnyAny {
4945                kind: AnyAnyKind::ShiftLogicalRight32,
4946                dst,
4947                src1,
4948                src2: RegImm::Imm(src2),
4949            } => {
4950                let src1_value = self.get_reg(src1);
4951                let bits_used = src1_value.bits_used() >> src2;
4952                self.set_reg_unknown(dst, unknown_counter, bits_used);
4953            }
4954            BasicInst::AnyAny {
4955                kind: AnyAnyKind::ShiftLogicalLeft32,
4956                dst,
4957                src1,
4958                src2: RegImm::Imm(src2),
4959            } => {
4960                let src1_value = self.get_reg(src1);
4961                let bits_used = src1_value.bits_used() << src2;
4962                self.set_reg_unknown(dst, unknown_counter, bits_used);
4963            }
4964            BasicInst::AnyAny {
4965                kind:
4966                    AnyAnyKind::SetLessThanSigned32
4967                    | AnyAnyKind::SetLessThanSigned64
4968                    | AnyAnyKind::SetLessThanUnsigned32
4969                    | AnyAnyKind::SetLessThanUnsigned64,
4970                dst,
4971                ..
4972            } => {
4973                self.set_reg_unknown(dst, unknown_counter, 1);
4974            }
4975            BasicInst::LoadAbsolute {
4976                kind: LoadKind::U8, dst, ..
4977            }
4978            | BasicInst::LoadIndirect {
4979                kind: LoadKind::U8, dst, ..
4980            } => {
4981                self.set_reg_unknown(dst, unknown_counter, u64::from(u8::MAX));
4982            }
4983            BasicInst::LoadAbsolute {
4984                kind: LoadKind::U16, dst, ..
4985            }
4986            | BasicInst::LoadIndirect {
4987                kind: LoadKind::U16, dst, ..
4988            } => {
4989                self.set_reg_unknown(dst, unknown_counter, u64::from(u16::MAX));
4990            }
4991            BasicInst::LoadAbsolute {
4992                kind: LoadKind::U32, dst, ..
4993            }
4994            | BasicInst::LoadIndirect {
4995                kind: LoadKind::U32, dst, ..
4996            } => {
4997                self.set_reg_unknown(dst, unknown_counter, u64::from(u32::MAX));
4998            }
4999            _ => {
5000                for dst in instruction.dst_mask(imports) {
5001                    self.set_reg_unknown(dst, unknown_counter, self.bitness.bits_used_mask());
5002                }
5003            }
5004        }
5005    }
5006}
5007
5008#[allow(clippy::too_many_arguments)]
5009fn perform_constant_propagation<H>(
5010    imports: &[Import],
5011    elf: &Elf<H>,
5012    all_blocks: &mut [BasicBlock<AnyTarget, BlockTarget>],
5013    input_regs_for_block: &mut [BlockRegs],
5014    output_regs_for_block: &mut [BlockRegs],
5015    unknown_counter: &mut u64,
5016    reachability_graph: &mut ReachabilityGraph,
5017    mut optimize_queue: Option<&mut VecSet<BlockTarget>>,
5018    current: BlockTarget,
5019) -> bool
5020where
5021    H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
5022{
5023    let is_rv64 = elf.is_64();
5024
5025    let Some(reachability) = reachability_graph.for_code.get(&current) else {
5026        return false;
5027    };
5028
5029    if reachability.is_unreachable() {
5030        return false;
5031    }
5032
5033    let mut modified = false;
5034    if !reachability.is_dynamically_reachable()
5035        && !reachability.always_reachable_or_exported()
5036        && !reachability.reachable_from.is_empty()
5037        && reachability.reachable_from.len() < 64
5038    {
5039        for reg in Reg::ALL {
5040            let mut common_value_opt = None;
5041            for &source in &reachability.reachable_from {
5042                let value = output_regs_for_block[source.index()].get_reg(reg);
5043                if let Some(common_value) = common_value_opt {
5044                    if common_value == value {
5045                        continue;
5046                    }
5047
5048                    common_value_opt = None;
5049                    break;
5050                } else {
5051                    common_value_opt = Some(value);
5052                }
5053            }
5054
5055            if let Some(value) = common_value_opt {
5056                let old_value = input_regs_for_block[current.index()].get_reg(reg);
5057                if value != old_value {
5058                    input_regs_for_block[current.index()].set_reg(reg, value);
5059                    modified = true;
5060                }
5061            }
5062        }
5063    }
5064
5065    let mut regs = input_regs_for_block[current.index()].clone();
5066    let mut references = BTreeSet::new();
5067    let mut modified_this_block = false;
5068    for nth_instruction in 0..all_blocks[current.index()].ops.len() {
5069        let mut instruction = all_blocks[current.index()].ops[nth_instruction].1;
5070        if instruction.is_nop() {
5071            continue;
5072        }
5073
5074        while let Some(new_instruction) = regs.simplify_instruction(elf, instruction) {
5075            log::trace!("Simplifying instruction in {}", all_blocks[current.index()].ops[nth_instruction].0);
5076            for reg in instruction.src_mask(imports) {
5077                log::trace!("  {reg:?} = {:?}", regs.get_reg(reg));
5078            }
5079            log::trace!("     {instruction:?}");
5080            log::trace!("  -> {new_instruction:?}");
5081
5082            if !modified_this_block {
5083                references = gather_references(&all_blocks[current.index()]);
5084                modified_this_block = true;
5085                modified = true;
5086            }
5087
5088            instruction = new_instruction;
5089            all_blocks[current.index()].ops[nth_instruction].1 = new_instruction;
5090        }
5091
5092        if let BasicInst::LoadAbsolute { kind, dst, target } = instruction {
5093            let section = elf.section_by_index(target.section_index);
5094            if section.is_allocated() && !section.is_writable() {
5095                let value = match kind {
5096                    LoadKind::U64 => section
5097                        .data()
5098                        .get(target.offset as usize..target.offset as usize + 8)
5099                        .map(|xs| u64::from_le_bytes([xs[0], xs[1], xs[2], xs[3], xs[4], xs[5], xs[6], xs[7]]))
5100                        .map(|x| cast(x).to_signed()),
5101                    LoadKind::U32 => section
5102                        .data()
5103                        .get(target.offset as usize..target.offset as usize + 4)
5104                        .map(|xs| u32::from_le_bytes([xs[0], xs[1], xs[2], xs[3]]))
5105                        .map(|x| cast(x).to_u64())
5106                        .map(|x| cast(x).to_signed()),
5107                    LoadKind::I32 => section
5108                        .data()
5109                        .get(target.offset as usize..target.offset as usize + 4)
5110                        .map(|xs| i32::from_le_bytes([xs[0], xs[1], xs[2], xs[3]]))
5111                        .map(|x| cast(x).to_i64_sign_extend()),
5112                    LoadKind::U16 => section
5113                        .data()
5114                        .get(target.offset as usize..target.offset as usize + 2)
5115                        .map(|xs| u16::from_le_bytes([xs[0], xs[1]]))
5116                        .map(|x| cast(x).to_u64())
5117                        .map(|x| cast(x).to_signed()),
5118                    LoadKind::I16 => section
5119                        .data()
5120                        .get(target.offset as usize..target.offset as usize + 2)
5121                        .map(|xs| i16::from_le_bytes([xs[0], xs[1]]))
5122                        .map(|x| cast(x).to_i64_sign_extend()),
5123                    LoadKind::I8 => section
5124                        .data()
5125                        .get(target.offset as usize)
5126                        .map(|&x| cast(x).to_signed())
5127                        .map(|x| cast(x).to_i64_sign_extend()),
5128                    LoadKind::U8 => section
5129                        .data()
5130                        .get(target.offset as usize)
5131                        .copied()
5132                        .map(|x| cast(x).to_u64())
5133                        .map(|x| cast(x).to_signed()),
5134                };
5135
5136                if let Some(imm) = value {
5137                    if !modified_this_block {
5138                        references = gather_references(&all_blocks[current.index()]);
5139                        modified_this_block = true;
5140                        modified = true;
5141                    }
5142
5143                    if let Ok(imm) = i32::try_from(imm) {
5144                        instruction = BasicInst::LoadImmediate { dst, imm };
5145                    } else if is_rv64 {
5146                        instruction = BasicInst::LoadImmediate64 { dst, imm };
5147                    } else {
5148                        unreachable!("load immediate overflow in 32-bit");
5149                    }
5150
5151                    all_blocks[current.index()].ops[nth_instruction].1 = instruction;
5152                }
5153            }
5154        }
5155
5156        regs.set_reg_from_instruction(imports, unknown_counter, instruction);
5157    }
5158
5159    if let Some((extra_instruction, new_instruction)) = regs.simplify_control_instruction(elf, all_blocks[current.index()].next.instruction)
5160    {
5161        log::trace!("Simplifying end of {current:?}");
5162        log::trace!("     {:?}", all_blocks[current.index()].next.instruction);
5163        if let Some(ref extra_instruction) = extra_instruction {
5164            log::trace!("  -> {extra_instruction:?}");
5165        }
5166        log::trace!("  -> {new_instruction:?}");
5167
5168        if !modified_this_block {
5169            references = gather_references(&all_blocks[current.index()]);
5170            modified_this_block = true;
5171            modified = true;
5172        }
5173
5174        if let Some(extra_instruction) = extra_instruction {
5175            regs.set_reg_from_instruction(imports, unknown_counter, extra_instruction);
5176
5177            all_blocks[current.index()]
5178                .ops
5179                .push((all_blocks[current.index()].next.source.clone(), extra_instruction));
5180        }
5181        all_blocks[current.index()].next.instruction = new_instruction;
5182    }
5183
5184    regs.set_reg_from_control_instruction(imports, unknown_counter, all_blocks[current.index()].next.instruction);
5185
5186    for reg in Reg::ALL {
5187        if let RegValue::Unknown { bits_used, .. } = regs.get_reg(reg) {
5188            regs.set_reg(
5189                reg,
5190                RegValue::OutputReg {
5191                    reg,
5192                    source_block: current,
5193                    bits_used,
5194                },
5195            )
5196        }
5197    }
5198
5199    let output_regs_modified = output_regs_for_block[current.index()] != regs;
5200    if output_regs_modified {
5201        output_regs_for_block[current.index()] = regs.clone();
5202        modified = true;
5203    }
5204
5205    if modified_this_block {
5206        update_references(all_blocks, reachability_graph, optimize_queue.as_deref_mut(), current, references);
5207        if reachability_graph.is_code_reachable(current) {
5208            if let Some(ref mut optimize_queue) = optimize_queue {
5209                add_to_optimize_queue(all_blocks, reachability_graph, optimize_queue, current);
5210            }
5211        }
5212    }
5213
5214    if let Some(ref mut optimize_queue) = optimize_queue {
5215        if output_regs_modified {
5216            match all_blocks[current.index()].next.instruction {
5217                ControlInst::Jump { target } => add_to_optimize_queue(all_blocks, reachability_graph, optimize_queue, target),
5218                ControlInst::Branch {
5219                    target_true, target_false, ..
5220                } => {
5221                    add_to_optimize_queue(all_blocks, reachability_graph, optimize_queue, target_true);
5222                    add_to_optimize_queue(all_blocks, reachability_graph, optimize_queue, target_false);
5223                }
5224                ControlInst::Call { .. } => unreachable!(),
5225                _ => {}
5226            }
5227        }
5228    }
5229
5230    modified
5231}
5232
5233fn perform_load_address_and_jump_fusion(all_blocks: &mut [BasicBlock<AnyTarget, BlockTarget>], reachability_graph: &ReachabilityGraph) {
5234    let used_blocks: Vec<_> = (0..all_blocks.len())
5235        .map(BlockTarget::from_raw)
5236        .filter(|&block_target| reachability_graph.is_code_reachable(block_target))
5237        .collect();
5238
5239    for window in used_blocks.windows(2) {
5240        let (current, next) = (window[0], window[1]);
5241        let Some(&(
5242            _,
5243            BasicInst::LoadAddress {
5244                dst,
5245                target: AnyTarget::Code(target_return),
5246            },
5247        )) = all_blocks[current.index()].ops.last()
5248        else {
5249            continue;
5250        };
5251
5252        if target_return != next {
5253            continue;
5254        }
5255
5256        all_blocks[current.index()].next.instruction = match all_blocks[current.index()].next.instruction {
5257            ControlInst::Jump { target } => ControlInst::Call {
5258                target,
5259                target_return,
5260                ra: dst,
5261            },
5262            ControlInst::JumpIndirect { base, offset } if dst != base => ControlInst::CallIndirect {
5263                base,
5264                offset,
5265                target_return,
5266                ra: dst,
5267            },
5268            _ => {
5269                continue;
5270            }
5271        };
5272
5273        all_blocks[current.index()].ops.pop();
5274    }
5275}
5276
5277#[deny(clippy::as_conversions)]
5278fn optimize_program<H>(
5279    config: &Config,
5280    elf: &Elf<H>,
5281    imports: &[Import],
5282    all_blocks: &mut [BasicBlock<AnyTarget, BlockTarget>],
5283    reachability_graph: &mut ReachabilityGraph,
5284    exports: &mut [Export],
5285) where
5286    H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
5287{
5288    let bitness = if elf.is_64() { Bitness::B64 } else { Bitness::B32 };
5289
5290    let mut optimize_queue = VecSet::new();
5291    for current in (0..all_blocks.len()).map(BlockTarget::from_raw) {
5292        if !reachability_graph.is_code_reachable(current) {
5293            all_blocks[current.index()].ops.clear();
5294            all_blocks[current.index()].next.instruction = ControlInst::Unimplemented;
5295            continue;
5296        }
5297
5298        perform_nop_elimination(all_blocks, current);
5299
5300        let block = &mut all_blocks[current.index()];
5301        block.next.instruction = match block.next.instruction {
5302            ControlInst::Call { ra, target, target_return } => {
5303                block.ops.push((
5304                    block.next.source.clone(),
5305                    BasicInst::LoadAddress {
5306                        dst: ra,
5307                        target: AnyTarget::Code(target_return),
5308                    },
5309                ));
5310                ControlInst::Jump { target }
5311            }
5312            ControlInst::CallIndirect {
5313                ra,
5314                target_return,
5315                base,
5316                offset,
5317            } if ra != base => {
5318                block.ops.push((
5319                    block.next.source.clone(),
5320                    BasicInst::LoadAddress {
5321                        dst: ra,
5322                        target: AnyTarget::Code(target_return),
5323                    },
5324                ));
5325                ControlInst::JumpIndirect { base, offset }
5326            }
5327            instruction => instruction,
5328        };
5329
5330        optimize_queue.push(current);
5331    }
5332
5333    let mut unknown_counter = 0;
5334    let mut input_regs_for_block = Vec::with_capacity(all_blocks.len());
5335    let mut output_regs_for_block = Vec::with_capacity(all_blocks.len());
5336    for current in (0..all_blocks.len()).map(BlockTarget::from_raw) {
5337        input_regs_for_block.push(BlockRegs::new_input(bitness, current));
5338        output_regs_for_block.push(BlockRegs::new_output(bitness, current));
5339    }
5340
5341    let mut registers_needed_for_block = Vec::with_capacity(all_blocks.len());
5342    for _ in 0..all_blocks.len() {
5343        registers_needed_for_block.push(RegMask::all())
5344    }
5345
5346    let mut count_inline = 0;
5347    let mut count_dce = 0;
5348    let mut count_cp = 0;
5349
5350    let mut inline_history: HashSet<(BlockTarget, BlockTarget)> = HashSet::new(); // Necessary to prevent infinite loops.
5351    macro_rules! run_optimizations {
5352        ($current:expr, $optimize_queue:expr) => {{
5353            let mut modified = false;
5354            if reachability_graph.is_code_reachable($current) {
5355                perform_nop_elimination(all_blocks, $current);
5356
5357                if perform_inlining(
5358                    all_blocks,
5359                    reachability_graph,
5360                    exports,
5361                    $optimize_queue,
5362                    &mut inline_history,
5363                    config.inline_threshold,
5364                    $current,
5365                ) {
5366                    count_inline += 1;
5367                    modified |= true;
5368                }
5369
5370                if perform_dead_code_elimination(
5371                    config,
5372                    imports,
5373                    all_blocks,
5374                    &mut registers_needed_for_block,
5375                    reachability_graph,
5376                    $optimize_queue,
5377                    $current,
5378                ) {
5379                    count_dce += 1;
5380                    modified |= true;
5381                }
5382
5383                if perform_constant_propagation(
5384                    imports,
5385                    elf,
5386                    all_blocks,
5387                    &mut input_regs_for_block,
5388                    &mut output_regs_for_block,
5389                    &mut unknown_counter,
5390                    reachability_graph,
5391                    $optimize_queue,
5392                    $current,
5393                ) {
5394                    count_cp += 1;
5395                    modified |= true;
5396                }
5397            }
5398
5399            modified
5400        }};
5401    }
5402
5403    for current in (0..all_blocks.len()).map(BlockTarget::from_raw) {
5404        run_optimizations!(current, None);
5405    }
5406
5407    garbage_collect_reachability(all_blocks, reachability_graph);
5408
5409    let timestamp = std::time::Instant::now();
5410    let mut opt_iteration_count = 0;
5411    while let Some(current) = optimize_queue.pop_non_unique() {
5412        loop {
5413            if !run_optimizations!(current, Some(&mut optimize_queue)) {
5414                break;
5415            }
5416        }
5417        opt_iteration_count += 1;
5418    }
5419
5420    log::debug!(
5421        "Optimizing the program took {opt_iteration_count} iteration(s) and {}ms",
5422        timestamp.elapsed().as_millis()
5423    );
5424    log::debug!("             Inlinining: {count_inline}");
5425    log::debug!("  Dead code elimination: {count_dce}");
5426    log::debug!("   Constant propagation: {count_cp}");
5427    garbage_collect_reachability(all_blocks, reachability_graph);
5428
5429    inline_history.clear();
5430    count_inline = 0;
5431    count_dce = 0;
5432    count_cp = 0;
5433
5434    let timestamp = std::time::Instant::now();
5435    let mut opt_brute_force_iterations = 0;
5436    let mut modified = true;
5437    while modified {
5438        opt_brute_force_iterations += 1;
5439        modified = false;
5440        for current in (0..all_blocks.len()).map(BlockTarget::from_raw) {
5441            modified |= run_optimizations!(current, Some(&mut optimize_queue));
5442        }
5443
5444        while let Some(current) = optimize_queue.pop_non_unique() {
5445            loop {
5446                if !run_optimizations!(current, Some(&mut optimize_queue)) {
5447                    break;
5448                }
5449            }
5450        }
5451
5452        if modified {
5453            garbage_collect_reachability(all_blocks, reachability_graph);
5454        }
5455    }
5456
5457    perform_load_address_and_jump_fusion(all_blocks, reachability_graph);
5458
5459    log::debug!(
5460        "Optimizing the program took {} brute force iteration(s) and {} ms",
5461        opt_brute_force_iterations - 1,
5462        timestamp.elapsed().as_millis()
5463    );
5464    log::debug!("             Inlinining: {count_inline}");
5465    log::debug!("  Dead code elimination: {count_dce}");
5466    log::debug!("   Constant propagation: {count_cp}");
5467}
5468
5469#[cfg(test)]
5470mod test {
5471    use super::*;
5472    use polkavm::Reg;
5473
5474    struct ProgramBuilder {
5475        data_section: SectionIndex,
5476        current_section: SectionIndex,
5477        next_free_section: SectionIndex,
5478        next_offset_for_section: HashMap<SectionIndex, u64>,
5479        instructions: Vec<(Source, InstExt<SectionTarget, SectionTarget>)>,
5480        exports: Vec<Export>,
5481    }
5482
5483    struct TestProgram {
5484        disassembly: String,
5485        instance: polkavm::RawInstance,
5486    }
5487
5488    impl ProgramBuilder {
5489        fn new() -> Self {
5490            ProgramBuilder {
5491                data_section: SectionIndex::new(0),
5492                current_section: SectionIndex::new(1),
5493                next_free_section: SectionIndex::new(1),
5494                next_offset_for_section: HashMap::default(),
5495                instructions: Vec::new(),
5496                exports: Vec::new(),
5497            }
5498        }
5499
5500        fn from_assembly(assembly: &str) -> Self {
5501            let mut b = Self::new();
5502            b.append_assembly(assembly);
5503            b
5504        }
5505
5506        fn add_export(&mut self, name: impl AsRef<[u8]>, input_regs: u8, output_regs: u8, location: SectionTarget) {
5507            self.exports.push(Export {
5508                location,
5509                metadata: ExternMetadata {
5510                    index: None,
5511                    symbol: name.as_ref().to_owned(),
5512                    input_regs,
5513                    output_regs,
5514                },
5515            })
5516        }
5517
5518        fn add_section(&mut self) -> SectionTarget {
5519            let index = self.next_free_section;
5520            self.next_offset_for_section.insert(index, 0);
5521            self.next_free_section = SectionIndex::new(index.raw() + 1);
5522            SectionTarget {
5523                section_index: index,
5524                offset: 0,
5525            }
5526        }
5527
5528        fn switch_section(&mut self, section_index: impl Into<SectionIndex>) {
5529            self.current_section = section_index.into();
5530        }
5531
5532        fn current_source(&self) -> Source {
5533            let next_offset = self.next_offset_for_section.get(&self.current_section).copied().unwrap_or(0);
5534            Source {
5535                section_index: self.current_section,
5536                offset_range: (next_offset..next_offset + 4).into(),
5537            }
5538        }
5539
5540        fn push(&mut self, inst: impl Into<InstExt<SectionTarget, SectionTarget>>) -> SectionTarget {
5541            let source = self.current_source();
5542            *self.next_offset_for_section.get_mut(&self.current_section).unwrap() += 4;
5543            self.instructions.push((source, inst.into()));
5544            source.begin()
5545        }
5546
5547        fn append_assembly(&mut self, assembly: &str) {
5548            let raw_blob = polkavm_common::assembler::assemble(assembly).unwrap();
5549            let blob = ProgramBlob::parse(raw_blob.into()).unwrap();
5550            let mut program_counter_to_section_target = HashMap::new();
5551            let mut program_counter_to_instruction_index = HashMap::new();
5552            let mut in_new_block = true;
5553            for instruction in blob.instructions(Bitness::B32) {
5554                if in_new_block {
5555                    let block = self.add_section();
5556                    self.switch_section(block);
5557                    program_counter_to_section_target.insert(instruction.offset, block);
5558                    in_new_block = false;
5559                }
5560
5561                program_counter_to_instruction_index.insert(instruction.offset, self.instructions.len());
5562                self.push(BasicInst::Nop);
5563
5564                if instruction.kind.starts_new_basic_block() {
5565                    in_new_block = true;
5566                }
5567            }
5568
5569            for instruction in blob.instructions(Bitness::B32) {
5570                let out = &mut self.instructions[*program_counter_to_instruction_index.get(&instruction.offset).unwrap()].1;
5571                match instruction.kind {
5572                    Instruction::fallthrough => {
5573                        let target = *program_counter_to_section_target.get(&instruction.next_offset).unwrap();
5574                        *out = ControlInst::Jump { target }.into();
5575                    }
5576                    Instruction::jump(target) => {
5577                        let target = *program_counter_to_section_target.get(&polkavm::ProgramCounter(target)).unwrap();
5578                        *out = ControlInst::Jump { target }.into();
5579                    }
5580                    Instruction::load_imm(dst, imm) => {
5581                        *out = BasicInst::LoadImmediate {
5582                            dst: dst.into(),
5583                            imm: cast(imm).to_signed(),
5584                        }
5585                        .into();
5586                    }
5587                    Instruction::add_imm_32(dst, src, imm) => {
5588                        *out = BasicInst::AnyAny {
5589                            kind: AnyAnyKind::Add32,
5590                            dst: dst.into(),
5591                            src1: src.into(),
5592                            src2: cast(imm).to_signed().into(),
5593                        }
5594                        .into();
5595                    }
5596                    Instruction::add_32(dst, src1, src2) => {
5597                        *out = BasicInst::AnyAny {
5598                            kind: AnyAnyKind::Add32,
5599                            dst: dst.into(),
5600                            src1: src1.into(),
5601                            src2: src2.into(),
5602                        }
5603                        .into();
5604                    }
5605                    Instruction::branch_less_unsigned_imm(src1, src2, target) | Instruction::branch_eq_imm(src1, src2, target) => {
5606                        let target_true = *program_counter_to_section_target.get(&polkavm::ProgramCounter(target)).unwrap();
5607                        let target_false = *program_counter_to_section_target.get(&instruction.next_offset).unwrap();
5608                        *out = ControlInst::Branch {
5609                            kind: match instruction.kind {
5610                                Instruction::branch_less_unsigned_imm(..) => BranchKind::LessUnsigned32,
5611                                Instruction::branch_eq_imm(..) => BranchKind::Eq32,
5612                                _ => unreachable!(),
5613                            },
5614                            src1: src1.into(),
5615                            src2: cast(src2).to_signed().into(),
5616                            target_true,
5617                            target_false,
5618                        }
5619                        .into();
5620                    }
5621                    Instruction::jump_indirect(base, 0) => {
5622                        *out = ControlInst::JumpIndirect {
5623                            base: base.into(),
5624                            offset: 0,
5625                        }
5626                        .into();
5627                    }
5628                    Instruction::trap => {
5629                        *out = ControlInst::Unimplemented.into();
5630                    }
5631                    Instruction::store_u32(src, address) => {
5632                        *out = BasicInst::StoreAbsolute {
5633                            kind: StoreKind::U32,
5634                            src: src.into(),
5635                            target: SectionTarget {
5636                                section_index: self.data_section,
5637                                offset: u64::from(address),
5638                            },
5639                        }
5640                        .into();
5641                    }
5642                    Instruction::store_indirect_u32(src, base, offset) => {
5643                        *out = BasicInst::StoreIndirect {
5644                            kind: StoreKind::U32,
5645                            src: src.into(),
5646                            base: base.into(),
5647                            offset: cast(offset).to_signed(),
5648                        }
5649                        .into();
5650                    }
5651                    _ => unimplemented!("{instruction:?}"),
5652                }
5653            }
5654
5655            for export in blob.exports() {
5656                let input_regs = 1;
5657                let output_regs = 1;
5658                let target = program_counter_to_section_target.get(&export.program_counter()).unwrap();
5659                self.add_export(export.symbol().as_bytes(), input_regs, output_regs, *target);
5660            }
5661        }
5662
5663        fn build(&self, config: Config) -> TestProgram {
5664            let elf: Elf<object::elf::FileHeader32<object::endian::LittleEndian>> = Elf::default();
5665            let data_sections_set: HashSet<_> = core::iter::once(self.data_section).collect();
5666            let code_sections_set: HashSet<_> = self.next_offset_for_section.keys().copied().collect();
5667            let relocations = BTreeMap::default();
5668            let imports = [];
5669            let mut exports = self.exports.clone();
5670
5671            // TODO: Refactor the main code so that we don't have to copy-paste this here.
5672            let all_jump_targets = harvest_all_jump_targets(
5673                &elf,
5674                &data_sections_set,
5675                &code_sections_set,
5676                &self.instructions,
5677                &relocations,
5678                &exports,
5679            )
5680            .unwrap();
5681
5682            let all_blocks = split_code_into_basic_blocks(&elf, &Default::default(), &all_jump_targets, self.instructions.clone()).unwrap();
5683            let mut section_to_block = build_section_to_block_map(&all_blocks).unwrap();
5684            let mut all_blocks = resolve_basic_block_references(&data_sections_set, &section_to_block, &all_blocks).unwrap();
5685            let mut reachability_graph =
5686                calculate_reachability(&section_to_block, &all_blocks, &data_sections_set, &exports, &relocations).unwrap();
5687            if matches!(config.opt_level, OptLevel::O2) {
5688                optimize_program(&config, &elf, &imports, &mut all_blocks, &mut reachability_graph, &mut exports);
5689            }
5690            let mut used_blocks = collect_used_blocks(&all_blocks, &reachability_graph);
5691
5692            if matches!(config.opt_level, OptLevel::O2) {
5693                used_blocks = add_missing_fallthrough_blocks(&mut all_blocks, &mut reachability_graph, used_blocks);
5694                merge_consecutive_fallthrough_blocks(&mut all_blocks, &mut reachability_graph, &mut section_to_block, &mut used_blocks);
5695                replace_immediates_with_registers(&mut all_blocks, &imports, &used_blocks);
5696            }
5697
5698            let expected_reachability_graph =
5699                calculate_reachability(&section_to_block, &all_blocks, &data_sections_set, &exports, &relocations).unwrap();
5700            assert!(reachability_graph == expected_reachability_graph);
5701
5702            let used_imports = HashSet::new();
5703            let mut base_address_for_section = HashMap::new();
5704            base_address_for_section.insert(self.data_section, 0);
5705            let section_got = self.next_free_section;
5706            let target_to_got_offset = HashMap::new();
5707
5708            let (jump_table, jump_target_for_block) = build_jump_table(all_blocks.len(), &used_blocks, &reachability_graph);
5709            let code = emit_code(
5710                &Default::default(),
5711                &imports,
5712                &base_address_for_section,
5713                section_got,
5714                &target_to_got_offset,
5715                &all_blocks,
5716                &used_blocks,
5717                &used_imports,
5718                &jump_target_for_block,
5719                true,
5720                false,
5721                0,
5722            )
5723            .unwrap();
5724
5725            let mut builder = ProgramBlobBuilder::new();
5726
5727            let mut export_count = 0;
5728            for current in used_blocks {
5729                for &export_index in &reachability_graph.for_code.get(&current).unwrap().exports {
5730                    let export = &exports[export_index];
5731                    let jump_target = jump_target_for_block[current.index()]
5732                        .expect("internal error: export metadata points to a block without a jump target assigned");
5733
5734                    builder.add_export_by_basic_block(jump_target.static_target, &export.metadata.symbol);
5735                    export_count += 1;
5736                }
5737            }
5738            assert_eq!(export_count, exports.len());
5739
5740            let mut raw_code = Vec::with_capacity(code.len());
5741            for (_, inst) in code {
5742                raw_code.push(inst);
5743            }
5744
5745            builder.set_code(&raw_code, &jump_table);
5746            builder.set_rw_data_size(1);
5747
5748            let blob = ProgramBlob::parse(builder.to_vec().into()).unwrap();
5749            let mut disassembler = polkavm_disassembler::Disassembler::new(&blob, polkavm_disassembler::DisassemblyFormat::Guest).unwrap();
5750            disassembler.emit_header(false);
5751            disassembler.show_offsets(false);
5752            let mut buf = Vec::new();
5753            disassembler.disassemble_into(&mut buf).unwrap();
5754            let disassembly = String::from_utf8(buf).unwrap();
5755
5756            let mut config = polkavm::Config::from_env().unwrap();
5757            config.set_backend(Some(polkavm::BackendKind::Interpreter));
5758            let engine = polkavm::Engine::new(&config).unwrap();
5759            let mut module_config = polkavm::ModuleConfig::default();
5760            module_config.set_gas_metering(Some(polkavm::GasMeteringKind::Sync));
5761            let module = polkavm::Module::from_blob(&engine, &module_config, blob).unwrap();
5762            let mut instance = module.instantiate().unwrap();
5763            instance.set_gas(10000);
5764            instance.set_reg(polkavm::Reg::RA, polkavm::RETURN_TO_HOST);
5765            let pc = module.exports().find(|export| export.symbol() == "main").unwrap().program_counter();
5766            instance.set_next_program_counter(pc);
5767
5768            TestProgram { disassembly, instance }
5769        }
5770
5771        fn test_optimize(
5772            &self,
5773            mut run: impl FnMut(&mut polkavm::RawInstance),
5774            mut check: impl FnMut(&mut polkavm::RawInstance, &mut polkavm::RawInstance),
5775            expected_disassembly: &str,
5776        ) {
5777            let mut unopt = self.build(Config {
5778                opt_level: OptLevel::O0,
5779                ..Config::default()
5780            });
5781            let mut opt = self.build(Config {
5782                opt_level: OptLevel::O2,
5783                ..Config::default()
5784            });
5785
5786            log::info!("Unoptimized disassembly:\n{}", unopt.disassembly);
5787            log::info!("Optimized disassembly:\n{}", opt.disassembly);
5788
5789            run(&mut unopt.instance);
5790            run(&mut opt.instance);
5791
5792            check(&mut opt.instance, &mut unopt.instance);
5793
5794            fn normalize(s: &str) -> String {
5795                let mut out = String::new();
5796                for line in s.trim().lines() {
5797                    if !line.trim().starts_with('@') {
5798                        out.push_str("    ");
5799                    }
5800                    out.push_str(line.trim());
5801                    out.push('\n');
5802                }
5803                out
5804            }
5805
5806            let is_todo = expected_disassembly.trim() == "TODO";
5807            let actual_normalized = normalize(&opt.disassembly);
5808            let expected_normalized = normalize(expected_disassembly);
5809            if actual_normalized != expected_normalized && !is_todo {
5810                use core::fmt::Write;
5811                let mut output_actual = String::new();
5812                let mut output_expected = String::new();
5813                for diff in diff::lines(&actual_normalized, &expected_normalized) {
5814                    match diff {
5815                        diff::Result::Left(line) => {
5816                            writeln!(&mut output_actual, "{}", yansi::Paint::red(line)).unwrap();
5817                        }
5818                        diff::Result::Both(line, _) => {
5819                            writeln!(&mut output_actual, "{}", line).unwrap();
5820                            writeln!(&mut output_expected, "{}", line).unwrap();
5821                        }
5822                        diff::Result::Right(line) => {
5823                            writeln!(&mut output_expected, "{}", line).unwrap();
5824                        }
5825                    }
5826                }
5827
5828                {
5829                    use std::io::Write;
5830                    let stderr = std::io::stderr();
5831                    let mut stderr = stderr.lock();
5832
5833                    writeln!(&mut stderr, "Optimization test failed!\n").unwrap();
5834                    writeln!(&mut stderr, "Expected optimized:").unwrap();
5835                    writeln!(&mut stderr, "{output_expected}").unwrap();
5836                    writeln!(&mut stderr, "Actual optimized:").unwrap();
5837                    writeln!(&mut stderr, "{output_actual}").unwrap();
5838                }
5839
5840                panic!("optimized program is not what we've expected")
5841            }
5842
5843            if is_todo {
5844                todo!();
5845            }
5846        }
5847
5848        fn test_optimize_oneshot(
5849            assembly: &str,
5850            expected_disassembly: &str,
5851            run: impl FnMut(&mut polkavm::RawInstance),
5852            check: impl FnMut(&mut polkavm::RawInstance, &mut polkavm::RawInstance),
5853        ) {
5854            let _ = env_logger::try_init();
5855            let b = ProgramBuilder::from_assembly(assembly);
5856            b.test_optimize(run, check, expected_disassembly);
5857        }
5858    }
5859
5860    fn expect_finished(i: &mut polkavm::RawInstance) {
5861        assert!(matches!(i.run().unwrap(), polkavm::InterruptKind::Finished));
5862    }
5863
5864    fn expect_regs(regs: impl IntoIterator<Item = (Reg, u64)> + Clone) -> impl FnMut(&mut polkavm::RawInstance, &mut polkavm::RawInstance) {
5865        move |a: &mut polkavm::RawInstance, b: &mut polkavm::RawInstance| {
5866            for (reg, value) in regs.clone() {
5867                assert_eq!(b.reg(reg), value);
5868                assert_eq!(a.reg(reg), b.reg(reg));
5869            }
5870        }
5871    }
5872
5873    #[test]
5874    fn test_optimize_01_empty_block_elimination() {
5875        ProgramBuilder::test_optimize_oneshot(
5876            "
5877            pub @main:
5878                jump @loop
5879            @before_loop:
5880                jump @loop
5881            @loop:
5882                i32 a0 = a0 + 0x1
5883                jump @before_loop if a0 <u 10
5884                ret
5885            ",
5886            "
5887            @0 [export #0: 'main']
5888                a0 = a0 + 0x1
5889                jump @0 if a0 <u 10
5890            @1
5891                ret
5892            ",
5893            expect_finished,
5894            expect_regs([(Reg::A0, 10)]),
5895        )
5896    }
5897
5898    #[test]
5899    fn test_optimize_02_simple_constant_propagation() {
5900        ProgramBuilder::test_optimize_oneshot(
5901            "
5902            pub @main:
5903                a1 = 0
5904                i32 a1 = a1 + 1
5905            @loop:
5906                i32 a0 = a0 + a1
5907                jump @loop if a0 <u 10
5908                ret
5909            ",
5910            "
5911            @0 [export #0: 'main']
5912                a1 = 0x1
5913                fallthrough
5914            @1
5915                a0 = a0 + a1
5916                jump @1 if a0 <u 10
5917            @2
5918                ret
5919            ",
5920            expect_finished,
5921            expect_regs([(Reg::A0, 10), (Reg::A1, 1)]),
5922        )
5923    }
5924
5925    #[test]
5926    fn test_optimize_03_simple_dead_code_elimination() {
5927        ProgramBuilder::test_optimize_oneshot(
5928            "
5929            pub @main:
5930                i32 a1 = a1 + 100
5931                a1 = 8
5932                i32 a2 = a2 + 0
5933                i32 a0 = a0 + 1
5934                jump @main if a0 <u 10
5935                ret
5936            ",
5937            "
5938            @0 [export #0: 'main']
5939                a1 = 0x8
5940                a0 = a0 + 0x1
5941                jump @0 if a0 <u 10
5942            @1
5943                ret
5944            ",
5945            expect_finished,
5946            expect_regs([(Reg::A0, 10), (Reg::A1, 8)]),
5947        )
5948    }
5949}
5950
5951fn collect_used_blocks(all_blocks: &[BasicBlock<AnyTarget, BlockTarget>], reachability_graph: &ReachabilityGraph) -> Vec<BlockTarget> {
5952    let mut used_blocks = Vec::new();
5953    for block in all_blocks {
5954        if !reachability_graph.is_code_reachable(block.target) {
5955            continue;
5956        }
5957
5958        used_blocks.push(block.target);
5959    }
5960
5961    used_blocks
5962}
5963
5964fn add_missing_fallthrough_blocks(
5965    all_blocks: &mut Vec<BasicBlock<AnyTarget, BlockTarget>>,
5966    reachability_graph: &mut ReachabilityGraph,
5967    used_blocks: Vec<BlockTarget>,
5968) -> Vec<BlockTarget> {
5969    let mut new_used_blocks = Vec::new();
5970    let can_fallthrough_to_next_block = calculate_whether_can_fallthrough(all_blocks, &used_blocks);
5971    for current in used_blocks {
5972        new_used_blocks.push(current);
5973        if can_fallthrough_to_next_block.contains(&current) {
5974            continue;
5975        }
5976
5977        let Some(target) = all_blocks[current.index()].next.instruction.fallthrough_target_mut().copied() else {
5978            continue;
5979        };
5980
5981        let inline_target = target != current
5982            && all_blocks[target.index()].ops.is_empty()
5983            && all_blocks[target.index()].next.instruction.fallthrough_target_mut().is_none();
5984
5985        let new_block_index = BlockTarget::from_raw(all_blocks.len());
5986        all_blocks.push(BasicBlock {
5987            target: new_block_index,
5988            source: all_blocks[current.index()].source,
5989            ops: Default::default(),
5990            next: if inline_target {
5991                all_blocks[target.index()].next.clone()
5992            } else {
5993                EndOfBlock {
5994                    source: all_blocks[current.index()].next.source.clone(),
5995                    instruction: ControlInst::Jump { target },
5996                }
5997            },
5998        });
5999
6000        new_used_blocks.push(new_block_index);
6001
6002        reachability_graph
6003            .for_code
6004            .entry(new_block_index)
6005            .or_insert(Reachability::default())
6006            .always_reachable = true;
6007        update_references(all_blocks, reachability_graph, None, new_block_index, Default::default());
6008        reachability_graph.for_code.get_mut(&new_block_index).unwrap().always_reachable = false;
6009
6010        let references = gather_references(&all_blocks[current.index()]);
6011        *all_blocks[current.index()].next.instruction.fallthrough_target_mut().unwrap() = new_block_index;
6012        update_references(all_blocks, reachability_graph, None, current, references);
6013    }
6014
6015    new_used_blocks
6016}
6017
6018fn merge_consecutive_fallthrough_blocks(
6019    all_blocks: &mut [BasicBlock<AnyTarget, BlockTarget>],
6020    reachability_graph: &mut ReachabilityGraph,
6021    section_to_block: &mut HashMap<SectionTarget, BlockTarget>,
6022    used_blocks: &mut Vec<BlockTarget>,
6023) {
6024    if used_blocks.len() < 2 {
6025        return;
6026    }
6027
6028    let mut removed = HashSet::new();
6029    for nth_block in 0..used_blocks.len() - 1 {
6030        let current = used_blocks[nth_block];
6031        let next = used_blocks[nth_block + 1];
6032
6033        // Find blocks which are empty...
6034        if !all_blocks[current.index()].ops.is_empty() {
6035            continue;
6036        }
6037
6038        // ...and which immediately jump somewhere else.
6039        {
6040            let ControlInst::Jump { target } = all_blocks[current.index()].next.instruction else {
6041                continue;
6042            };
6043            if target != next {
6044                continue;
6045            }
6046        }
6047
6048        let current_reachability = reachability_graph.for_code.get_mut(&current).unwrap();
6049        if current_reachability.always_reachable_or_exported() {
6050            continue;
6051        }
6052
6053        removed.insert(current);
6054
6055        // Gather all other basic blocks which reference this block.
6056        let referenced_by_code: BTreeSet<BlockTarget> = current_reachability
6057            .reachable_from
6058            .iter()
6059            .copied()
6060            .chain(current_reachability.address_taken_in.iter().copied())
6061            .collect();
6062
6063        // Replace code references to this block.
6064        for dep in referenced_by_code {
6065            let references = gather_references(&all_blocks[dep.index()]);
6066            for (_, op) in &mut all_blocks[dep.index()].ops {
6067                *op = op
6068                    .map_target(|target| {
6069                        Ok::<_, ()>(if target == AnyTarget::Code(current) {
6070                            AnyTarget::Code(next)
6071                        } else {
6072                            target
6073                        })
6074                    })
6075                    .unwrap();
6076            }
6077
6078            all_blocks[dep.index()].next.instruction = all_blocks[dep.index()]
6079                .next
6080                .instruction
6081                .map_target(|target| Ok::<_, ()>(if target == current { next } else { target }))
6082                .unwrap();
6083
6084            update_references(all_blocks, reachability_graph, None, dep, references);
6085        }
6086
6087        // Remove it from the graph if it's globally unreachable now.
6088        remove_code_if_globally_unreachable(all_blocks, reachability_graph, None, current);
6089
6090        let Some(current_reachability) = reachability_graph.for_code.get_mut(&current) else {
6091            continue;
6092        };
6093
6094        if !current_reachability.referenced_by_data.is_empty() {
6095            // Find all section targets which correspond to this block...
6096            let section_targets: Vec<SectionTarget> = section_to_block
6097                .iter()
6098                .filter(|&(_, block_target)| *block_target == current)
6099                .map(|(section_target, _)| *section_target)
6100                .collect();
6101
6102            // ...then make them to point to the new block.
6103            for section_target in section_targets {
6104                section_to_block.insert(section_target, next);
6105            }
6106
6107            // Grab all of the data sections which reference the current block.
6108            let referenced_by_data = core::mem::take(&mut current_reachability.referenced_by_data);
6109
6110            // Mark the next block as referenced by all of the data sections which reference the current block.
6111            reachability_graph
6112                .for_code
6113                .get_mut(&next)
6114                .unwrap()
6115                .referenced_by_data
6116                .extend(referenced_by_data.iter().copied());
6117
6118            // Mark the data sections as NOT referencing the current block, and make them reference the next block.
6119            for section_index in &referenced_by_data {
6120                if let Some(list) = reachability_graph.code_references_in_data_section.get_mut(section_index) {
6121                    list.retain(|&target| target != current);
6122                    list.push(next);
6123                    list.sort_unstable();
6124                    list.dedup();
6125                }
6126            }
6127        }
6128
6129        remove_code_if_globally_unreachable(all_blocks, reachability_graph, None, current);
6130    }
6131
6132    for &current in &removed {
6133        assert!(
6134            !reachability_graph.is_code_reachable(current),
6135            "block {current:?} still reachable: {:#?}",
6136            reachability_graph.for_code.get(&current)
6137        );
6138    }
6139
6140    used_blocks.retain(|current| !removed.contains(current));
6141}
6142
6143fn spill_fake_registers(
6144    section_regspill: SectionIndex,
6145    all_blocks: &mut [BasicBlock<AnyTarget, BlockTarget>],
6146    reachability_graph: &mut ReachabilityGraph,
6147    imports: &[Import],
6148    used_blocks: &[BlockTarget],
6149    regspill_size: &mut usize,
6150    is_rv64: bool,
6151) {
6152    struct RegAllocBlock<'a> {
6153        instructions: &'a [Vec<regalloc2::Operand>],
6154        num_vregs: usize,
6155    }
6156
6157    impl<'a> regalloc2::Function for RegAllocBlock<'a> {
6158        fn num_insts(&self) -> usize {
6159            self.instructions.len()
6160        }
6161
6162        fn num_blocks(&self) -> usize {
6163            1
6164        }
6165
6166        fn entry_block(&self) -> regalloc2::Block {
6167            regalloc2::Block(0)
6168        }
6169
6170        fn block_insns(&self, _block: regalloc2::Block) -> regalloc2::InstRange {
6171            regalloc2::InstRange::forward(regalloc2::Inst(0), regalloc2::Inst(self.instructions.len() as u32))
6172        }
6173
6174        fn block_succs(&self, _block: regalloc2::Block) -> &[regalloc2::Block] {
6175            &[]
6176        }
6177
6178        fn block_preds(&self, _block: regalloc2::Block) -> &[regalloc2::Block] {
6179            &[]
6180        }
6181
6182        fn block_params(&self, _block: regalloc2::Block) -> &[regalloc2::VReg] {
6183            &[]
6184        }
6185
6186        fn is_ret(&self, insn: regalloc2::Inst) -> bool {
6187            insn.0 as usize + 1 == self.instructions.len()
6188        }
6189
6190        fn is_branch(&self, _insn: regalloc2::Inst) -> bool {
6191            false
6192        }
6193
6194        fn branch_blockparams(&self, _block: regalloc2::Block, _insn: regalloc2::Inst, _succ_idx: usize) -> &[regalloc2::VReg] {
6195            unimplemented!();
6196        }
6197
6198        fn inst_operands(&self, insn: regalloc2::Inst) -> &[regalloc2::Operand] {
6199            &self.instructions[insn.0 as usize]
6200        }
6201
6202        fn inst_clobbers(&self, _insn: regalloc2::Inst) -> regalloc2::PRegSet {
6203            regalloc2::PRegSet::empty()
6204        }
6205
6206        fn num_vregs(&self) -> usize {
6207            self.num_vregs
6208        }
6209
6210        fn spillslot_size(&self, _regclass: regalloc2::RegClass) -> usize {
6211            1
6212        }
6213    }
6214
6215    let fake_mask = RegMask::fake();
6216    for current in used_blocks {
6217        let block = &mut all_blocks[current.index()];
6218        let Some(start_at) = block
6219            .ops
6220            .iter()
6221            .position(|(_, instruction)| !((instruction.src_mask(imports) | instruction.dst_mask(imports)) & fake_mask).is_empty())
6222        else {
6223            continue;
6224        };
6225
6226        let end_at = {
6227            let mut end_at = start_at + 1;
6228            for index in start_at..block.ops.len() {
6229                let instruction = block.ops[index].1;
6230                if !((instruction.src_mask(imports) | instruction.dst_mask(imports)) & fake_mask).is_empty() {
6231                    end_at = index + 1;
6232                }
6233            }
6234            end_at
6235        };
6236
6237        // This block uses one or more "fake" registers which are not supported by the VM.
6238        //
6239        // So we have to spill those register into memory and modify the block in such a way
6240        // that it only uses "real" registers natively supported by the VM.
6241        //
6242        // This is not going to be particularily pretty nor very fast at run time, but it is done only as the last restort.
6243
6244        let mut counter = 0;
6245        let mut reg_to_value_index: [usize; Reg::ALL.len()] = Default::default();
6246        let mut instructions = Vec::new();
6247
6248        let mut prologue = Vec::new();
6249        for reg in RegMask::all() {
6250            let value_index = counter;
6251            counter += 1;
6252            reg_to_value_index[reg as usize] = value_index;
6253            prologue.push(regalloc2::Operand::new(
6254                regalloc2::VReg::new(value_index, regalloc2::RegClass::Int),
6255                regalloc2::OperandConstraint::FixedReg(regalloc2::PReg::new(reg as usize, regalloc2::RegClass::Int)),
6256                regalloc2::OperandKind::Def,
6257                regalloc2::OperandPos::Late,
6258            ));
6259        }
6260
6261        instructions.push(prologue);
6262
6263        for nth_instruction in start_at..end_at {
6264            let (_, instruction) = &block.ops[nth_instruction];
6265            let mut operands = Vec::new();
6266
6267            for (reg, kind) in instruction.operands(imports) {
6268                match kind {
6269                    OpKind::Write => {
6270                        let value_index = counter;
6271                        counter += 1;
6272                        reg_to_value_index[reg as usize] = value_index;
6273                        operands.push(regalloc2::Operand::new(
6274                            regalloc2::VReg::new(value_index, regalloc2::RegClass::Int),
6275                            if reg.fake_register_index().is_none() {
6276                                regalloc2::OperandConstraint::FixedReg(regalloc2::PReg::new(reg as usize, regalloc2::RegClass::Int))
6277                            } else {
6278                                regalloc2::OperandConstraint::Reg
6279                            },
6280                            regalloc2::OperandKind::Def,
6281                            regalloc2::OperandPos::Late,
6282                        ));
6283                    }
6284                    OpKind::Read => {
6285                        let value_index = reg_to_value_index[reg as usize];
6286                        operands.push(regalloc2::Operand::new(
6287                            regalloc2::VReg::new(value_index, regalloc2::RegClass::Int),
6288                            if reg.fake_register_index().is_none() {
6289                                regalloc2::OperandConstraint::FixedReg(regalloc2::PReg::new(reg as usize, regalloc2::RegClass::Int))
6290                            } else {
6291                                regalloc2::OperandConstraint::Reg
6292                            },
6293                            regalloc2::OperandKind::Use,
6294                            regalloc2::OperandPos::Early,
6295                        ));
6296                    }
6297                    OpKind::ReadWrite => {
6298                        let value_index_read = reg_to_value_index[reg as usize];
6299                        operands.push(regalloc2::Operand::new(
6300                            regalloc2::VReg::new(value_index_read, regalloc2::RegClass::Int),
6301                            if reg.fake_register_index().is_none() {
6302                                regalloc2::OperandConstraint::FixedReg(regalloc2::PReg::new(reg as usize, regalloc2::RegClass::Int))
6303                            } else {
6304                                regalloc2::OperandConstraint::Reg
6305                            },
6306                            regalloc2::OperandKind::Use,
6307                            regalloc2::OperandPos::Early,
6308                        ));
6309
6310                        let value_index_write = counter;
6311                        counter += 1;
6312
6313                        reg_to_value_index[reg as usize] = value_index_write;
6314                        operands.push(regalloc2::Operand::new(
6315                            regalloc2::VReg::new(value_index_write, regalloc2::RegClass::Int),
6316                            regalloc2::OperandConstraint::Reuse(operands.len() - 1),
6317                            regalloc2::OperandKind::Def,
6318                            regalloc2::OperandPos::Late,
6319                        ));
6320                    }
6321                }
6322            }
6323
6324            instructions.push(operands);
6325        }
6326
6327        let mut epilogue = Vec::new();
6328        for reg in RegMask::all() & !RegMask::fake() {
6329            let value_index = reg_to_value_index[reg as usize];
6330            epilogue.push(regalloc2::Operand::new(
6331                regalloc2::VReg::new(value_index, regalloc2::RegClass::Int),
6332                regalloc2::OperandConstraint::FixedReg(regalloc2::PReg::new(reg as usize, regalloc2::RegClass::Int)),
6333                regalloc2::OperandKind::Use,
6334                regalloc2::OperandPos::Early,
6335            ));
6336        }
6337
6338        instructions.push(epilogue);
6339
6340        let alloc_block = RegAllocBlock {
6341            instructions: &instructions,
6342            num_vregs: counter,
6343        };
6344
6345        let env = regalloc2::MachineEnv {
6346            preferred_regs_by_class: [
6347                [Reg::T0, Reg::T1, Reg::T2]
6348                    .map(|reg| regalloc2::PReg::new(reg as usize, regalloc2::RegClass::Int))
6349                    .into(),
6350                vec![],
6351                vec![],
6352            ],
6353            non_preferred_regs_by_class: [
6354                [Reg::S0, Reg::S1]
6355                    .map(|reg| regalloc2::PReg::new(reg as usize, regalloc2::RegClass::Int))
6356                    .into(),
6357                vec![],
6358                vec![],
6359            ],
6360            scratch_by_class: [None, None, None],
6361            fixed_stack_slots: vec![],
6362        };
6363
6364        let opts = regalloc2::RegallocOptions {
6365            validate_ssa: true,
6366            ..regalloc2::RegallocOptions::default()
6367        };
6368
6369        let output = match regalloc2::run(&alloc_block, &env, &opts) {
6370            Ok(output) => output,
6371            Err(regalloc2::RegAllocError::SSA(vreg, inst)) => {
6372                let nth_instruction: isize = inst.index() as isize - 1 + start_at as isize;
6373                let instruction = block.ops.get(nth_instruction as usize).map(|(_, instruction)| instruction);
6374                panic!("internal error: register allocation failed because of invalid SSA for {vreg} for instruction {instruction:?}");
6375            }
6376            Err(error) => {
6377                panic!("internal error: register allocation failed: {error}")
6378            }
6379        };
6380
6381        let mut buffer = Vec::new();
6382        let mut edits = output.edits.into_iter().peekable();
6383        for nth_instruction in start_at..=end_at {
6384            while let Some((next_edit_at, edit)) = edits.peek() {
6385                let target_nth_instruction: isize = next_edit_at.inst().index() as isize - 1 + start_at as isize;
6386                if target_nth_instruction < 0
6387                    || target_nth_instruction > nth_instruction as isize
6388                    || (target_nth_instruction == nth_instruction as isize && next_edit_at.pos() == regalloc2::InstPosition::After)
6389                {
6390                    break;
6391                }
6392
6393                let target_nth_instruction = target_nth_instruction as usize;
6394                let regalloc2::Edit::Move { from: src, to: dst } = edit.clone();
6395
6396                // Advance the iterator so that we can use `continue` later.
6397                edits.next();
6398
6399                let reg_size = if is_rv64 { 8 } else { 4 };
6400                let src_reg = src.as_reg();
6401                let dst_reg = dst.as_reg();
6402                let new_instruction = match (dst_reg, src_reg) {
6403                    (Some(dst_reg), None) => {
6404                        let dst_reg = Reg::from_usize(dst_reg.hw_enc()).unwrap();
6405                        let src_slot = src.as_stack().unwrap();
6406                        let offset = src_slot.index() * reg_size;
6407                        *regspill_size = core::cmp::max(*regspill_size, offset + reg_size);
6408                        BasicInst::LoadAbsolute {
6409                            kind: if is_rv64 { LoadKind::U64 } else { LoadKind::I32 },
6410                            dst: dst_reg,
6411                            target: SectionTarget {
6412                                section_index: section_regspill,
6413                                offset: cast(offset).to_u64(),
6414                            },
6415                        }
6416                    }
6417                    (None, Some(src_reg)) => {
6418                        let src_reg = Reg::from_usize(src_reg.hw_enc()).unwrap();
6419                        let dst_slot = dst.as_stack().unwrap();
6420                        let offset = dst_slot.index() * reg_size;
6421                        *regspill_size = core::cmp::max(*regspill_size, offset + reg_size);
6422                        BasicInst::StoreAbsolute {
6423                            kind: if is_rv64 { StoreKind::U64 } else { StoreKind::U32 },
6424                            src: src_reg.into(),
6425                            target: SectionTarget {
6426                                section_index: section_regspill,
6427                                offset: cast(offset).to_u64(),
6428                            },
6429                        }
6430                    }
6431                    (Some(dst_reg), Some(src_reg)) => {
6432                        let dst_reg = Reg::from_usize(dst_reg.hw_enc()).unwrap();
6433                        let src_reg = Reg::from_usize(src_reg.hw_enc()).unwrap();
6434                        if src_reg == dst_reg {
6435                            continue;
6436                        }
6437
6438                        BasicInst::MoveReg {
6439                            dst: dst_reg,
6440                            src: src_reg,
6441                        }
6442                    }
6443                    // Won't be emitted according to `regalloc2` docs.
6444                    (None, None) => unreachable!(),
6445                };
6446
6447                log::trace!("Injected:\n     {new_instruction:?}");
6448
6449                let source = block.ops.get(target_nth_instruction).or(block.ops.last()).unwrap().0.clone();
6450                buffer.push((source, new_instruction));
6451            }
6452
6453            if nth_instruction == end_at {
6454                assert!(edits.next().is_none());
6455                break;
6456            }
6457
6458            let (source, instruction) = &block.ops[nth_instruction];
6459            let mut alloc_index = output.inst_alloc_offsets[nth_instruction - start_at + 1];
6460            let new_instruction = instruction
6461                .map_register(|reg, _| {
6462                    let alloc = &output.allocs[alloc_index as usize];
6463                    alloc_index += 1;
6464
6465                    assert_eq!(alloc.kind(), regalloc2::AllocationKind::Reg);
6466                    let allocated_reg = Reg::from_usize(alloc.as_reg().unwrap().hw_enc() as usize).unwrap();
6467                    if reg.fake_register_index().is_none() {
6468                        assert_eq!(reg, allocated_reg);
6469                    } else {
6470                        assert_ne!(reg, allocated_reg);
6471                        assert!(allocated_reg.fake_register_index().is_none());
6472                    }
6473
6474                    allocated_reg
6475                })
6476                .unwrap_or(*instruction);
6477
6478            if *instruction == new_instruction {
6479                log::trace!("Unmodified:\n     {instruction:?}");
6480            } else {
6481                log::trace!("Replaced:\n     {instruction:?}\n  -> {new_instruction:?}");
6482            }
6483
6484            buffer.push((source.clone(), new_instruction));
6485        }
6486
6487        assert!(edits.next().is_none());
6488
6489        reachability_graph
6490            .for_data
6491            .entry(section_regspill)
6492            .or_default()
6493            .address_taken_in
6494            .insert(*current);
6495
6496        block.ops.splice(start_at..end_at, buffer);
6497    }
6498
6499    for current in used_blocks {
6500        if all_blocks[current.index()]
6501            .ops
6502            .iter()
6503            .any(|(_, instruction)| !((instruction.src_mask(imports) | instruction.dst_mask(imports)) & fake_mask).is_empty())
6504        {
6505            panic!("internal error: not all fake registers were removed")
6506        }
6507    }
6508}
6509
6510#[deny(clippy::as_conversions)]
6511fn replace_immediates_with_registers(
6512    all_blocks: &mut [BasicBlock<AnyTarget, BlockTarget>],
6513    imports: &[Import],
6514    used_blocks: &[BlockTarget],
6515) {
6516    let mut imm_to_reg: HashMap<i64, RegMask> = HashMap::new();
6517    for block_target in used_blocks {
6518        let mut reg_to_imm: [Option<i64>; Reg::ALL.len()] = [None; Reg::ALL.len()];
6519        imm_to_reg.clear();
6520
6521        // If there already exists a register which contains a given immediate value
6522        // then there's no point in duplicating it here again; just use that register.
6523        macro_rules! replace {
6524            ($src:ident) => {
6525                if let RegImm::Imm(imm) = $src {
6526                    let imm = cast(*imm).to_i64_sign_extend();
6527                    if imm != 0 {
6528                        let mask = imm_to_reg.get(&imm).copied().unwrap_or(RegMask::empty());
6529                        if let Some(reg) = mask.into_iter().next() {
6530                            *$src = RegImm::Reg(reg);
6531                        }
6532                    }
6533                }
6534            };
6535        }
6536
6537        for (_, op) in &mut all_blocks[block_target.index()].ops {
6538            match op {
6539                BasicInst::LoadImmediate { dst, imm } => {
6540                    if let Some(old_imm) = reg_to_imm[dst.to_usize()].take() {
6541                        imm_to_reg.get_mut(&old_imm).unwrap().remove(*dst);
6542                    }
6543
6544                    let imm = cast(*imm).to_i64_sign_extend();
6545                    imm_to_reg.entry(imm).or_insert(RegMask::empty()).insert(*dst);
6546                    reg_to_imm[dst.to_usize()] = Some(imm);
6547                    continue;
6548                }
6549                BasicInst::LoadImmediate64 { dst, imm } => {
6550                    if let Some(old_imm) = reg_to_imm[dst.to_usize()].take() {
6551                        imm_to_reg.get_mut(&old_imm).unwrap().remove(*dst);
6552                    }
6553
6554                    imm_to_reg.entry(*imm).or_insert(RegMask::empty()).insert(*dst);
6555                    reg_to_imm[dst.to_usize()] = Some(*imm);
6556                    continue;
6557                }
6558                BasicInst::AnyAny {
6559                    kind,
6560                    ref mut src1,
6561                    ref mut src2,
6562                    ..
6563                } => {
6564                    replace!(src1);
6565                    if !matches!(
6566                        kind,
6567                        AnyAnyKind::ShiftLogicalLeft32
6568                            | AnyAnyKind::ShiftLogicalRight32
6569                            | AnyAnyKind::ShiftArithmeticRight32
6570                            | AnyAnyKind::ShiftLogicalLeft64
6571                            | AnyAnyKind::ShiftLogicalRight64
6572                            | AnyAnyKind::ShiftArithmeticRight64
6573                            | AnyAnyKind::ShiftLogicalLeft32AndSignExtend
6574                            | AnyAnyKind::ShiftLogicalRight32AndSignExtend
6575                            | AnyAnyKind::ShiftArithmeticRight32AndSignExtend
6576                            | AnyAnyKind::RotateRight32
6577                            | AnyAnyKind::RotateRight32AndSignExtend
6578                            | AnyAnyKind::RotateRight64
6579                    ) {
6580                        replace!(src2);
6581                    }
6582                }
6583                BasicInst::StoreAbsolute { src, .. } => {
6584                    replace!(src);
6585                }
6586                BasicInst::StoreIndirect { src, .. } => {
6587                    replace!(src);
6588                }
6589                BasicInst::Cmov { src, .. } => {
6590                    replace!(src);
6591                }
6592                _ => {}
6593            }
6594
6595            for reg in op.dst_mask(imports) {
6596                if let Some(imm) = reg_to_imm[reg.to_usize()].take() {
6597                    imm_to_reg.get_mut(&imm).unwrap().remove(reg);
6598                }
6599            }
6600        }
6601
6602        if let ControlInst::Branch {
6603            ref mut src1,
6604            ref mut src2,
6605            ..
6606        } = all_blocks[block_target.index()].next.instruction
6607        {
6608            replace!(src1);
6609            replace!(src2);
6610        }
6611    }
6612}
6613
6614fn harvest_all_jump_targets<H>(
6615    elf: &Elf<H>,
6616    data_sections_set: &HashSet<SectionIndex>,
6617    code_sections_set: &HashSet<SectionIndex>,
6618    instructions: &[(Source, InstExt<SectionTarget, SectionTarget>)],
6619    relocations: &BTreeMap<SectionTarget, RelocationKind>,
6620    exports: &[Export],
6621) -> Result<HashSet<SectionTarget>, ProgramFromElfError>
6622where
6623    H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
6624{
6625    let mut all_jump_targets = HashSet::new();
6626    for (_, instruction) in instructions {
6627        match instruction {
6628            InstExt::Basic(instruction) => {
6629                let (data_target, code_or_data_target) = instruction.target();
6630                if let Some(target) = data_target {
6631                    if !data_sections_set.contains(&target.section_index) {
6632                        return Err(ProgramFromElfError::other(
6633                            "found basic instruction which refers to a non-data section",
6634                        ));
6635                    }
6636                }
6637
6638                if let Some(target) = code_or_data_target {
6639                    if code_sections_set.contains(&target.section_index) {
6640                        if all_jump_targets.insert(target) {
6641                            log::trace!("Adding jump target: {target} (referenced indirectly by code)");
6642                        }
6643                    } else if !data_sections_set.contains(&target.section_index) {
6644                        return Err(ProgramFromElfError::other(
6645                            "found basic instruction which refers to neither a data nor a text section",
6646                        ));
6647                    }
6648                }
6649            }
6650            InstExt::Control(instruction) => {
6651                for target in instruction.targets().into_iter().flatten() {
6652                    if !code_sections_set.contains(&target.section_index) {
6653                        return Err(ProgramFromElfError::other(
6654                            "found control instruction which refers to a non-text section",
6655                        ));
6656                    }
6657
6658                    if all_jump_targets.insert(*target) {
6659                        log::trace!("Adding jump target: {target} (referenced by a control instruction)");
6660                    }
6661                }
6662            }
6663        }
6664    }
6665
6666    for (source_location, relocation) in relocations {
6667        if !data_sections_set.contains(&source_location.section_index) {
6668            continue;
6669        }
6670
6671        for target in relocation.targets().into_iter().flatten() {
6672            #[allow(clippy::collapsible_if)]
6673            if code_sections_set.contains(&target.section_index) {
6674                if all_jump_targets.insert(target) {
6675                    log::trace!(
6676                        "Adding jump target: {target} (referenced by relocation from {source_location} in '{}')",
6677                        elf.section_by_index(source_location.section_index).name()
6678                    );
6679                }
6680            }
6681        }
6682    }
6683
6684    for export in exports {
6685        let target = export.location;
6686        if !code_sections_set.contains(&target.section_index) {
6687            return Err(ProgramFromElfError::other("export points to a non-code section"));
6688        }
6689
6690        if all_jump_targets.insert(target) {
6691            log::trace!("Adding jump target: {target} (referenced by export)");
6692        }
6693    }
6694
6695    Ok(all_jump_targets)
6696}
6697
6698struct VecSet<T> {
6699    vec: VecDeque<T>,
6700    set: HashSet<T>,
6701}
6702
6703impl<T> VecSet<T> {
6704    fn new() -> Self {
6705        Self {
6706            vec: VecDeque::new(),
6707            set: HashSet::new(),
6708        }
6709    }
6710
6711    fn pop_unique(&mut self) -> Option<T> {
6712        self.vec.pop_front()
6713    }
6714
6715    fn pop_non_unique(&mut self) -> Option<T>
6716    where
6717        T: core::hash::Hash + Eq,
6718    {
6719        // Popping from the front instead of the back cuts down on the time
6720        // the optimizer takes for the Westend runtime from ~53s down to ~2.6s
6721        let value = self.vec.pop_front()?;
6722        self.set.remove(&value);
6723        Some(value)
6724    }
6725
6726    fn push(&mut self, value: T)
6727    where
6728        T: core::hash::Hash + Eq + Clone,
6729    {
6730        if self.set.insert(value.clone()) {
6731            self.vec.push_back(value);
6732        }
6733    }
6734
6735    fn is_empty(&self) -> bool {
6736        self.vec.is_empty()
6737    }
6738
6739    fn into_set(self) -> HashSet<T> {
6740        self.set
6741    }
6742}
6743
6744#[derive(PartialEq, Eq, Debug, Default)]
6745struct ReachabilityGraph {
6746    for_code: BTreeMap<BlockTarget, Reachability>,
6747    for_data: BTreeMap<SectionIndex, Reachability>,
6748    code_references_in_data_section: BTreeMap<SectionIndex, Vec<BlockTarget>>,
6749    data_references_in_data_section: BTreeMap<SectionIndex, Vec<SectionIndex>>,
6750}
6751
6752impl ReachabilityGraph {
6753    fn is_code_reachable(&self, block_target: BlockTarget) -> bool {
6754        if let Some(reachability) = self.for_code.get(&block_target) {
6755            assert!(
6756                !reachability.is_unreachable(),
6757                "Block {block_target:?} is unreachable and yet it wasn't removed from the graph!"
6758            );
6759            true
6760        } else {
6761            false
6762        }
6763    }
6764
6765    fn is_data_section_reachable(&self, section_index: SectionIndex) -> bool {
6766        if let Some(reachability) = self.for_data.get(&section_index) {
6767            assert!(!reachability.is_unreachable());
6768            true
6769        } else {
6770            false
6771        }
6772    }
6773
6774    fn mark_data_section_reachable(&mut self, section_index: SectionIndex) {
6775        self.for_data.entry(section_index).or_default().always_reachable = true;
6776    }
6777}
6778
6779#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Default)]
6780struct Reachability {
6781    reachable_from: BTreeSet<BlockTarget>,
6782    address_taken_in: BTreeSet<BlockTarget>,
6783    referenced_by_data: BTreeSet<SectionIndex>,
6784    always_reachable: bool,
6785    always_dynamically_reachable: bool,
6786    exports: Vec<usize>,
6787}
6788
6789impl Reachability {
6790    fn is_only_reachable_from(&self, block_target: BlockTarget) -> bool {
6791        !self.always_reachable
6792            && !self.always_dynamically_reachable
6793            && self.referenced_by_data.is_empty()
6794            && self.address_taken_in.is_empty()
6795            && self.reachable_from.len() == 1
6796            && self.reachable_from.contains(&block_target)
6797            && self.exports.is_empty()
6798    }
6799
6800    fn is_unreachable(&self) -> bool {
6801        self.reachable_from.is_empty()
6802            && self.address_taken_in.is_empty()
6803            && self.referenced_by_data.is_empty()
6804            && !self.always_reachable
6805            && !self.always_dynamically_reachable
6806            && self.exports.is_empty()
6807    }
6808
6809    fn is_dynamically_reachable(&self) -> bool {
6810        !self.address_taken_in.is_empty() || !self.referenced_by_data.is_empty() || self.always_dynamically_reachable
6811    }
6812
6813    fn always_reachable_or_exported(&self) -> bool {
6814        self.always_reachable || !self.exports.is_empty()
6815    }
6816}
6817
6818#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
6819enum ExtRef {
6820    Address(BlockTarget),
6821    Jump(BlockTarget),
6822    DataAddress(SectionIndex),
6823}
6824
6825fn each_reference_for_basic_instruction(instruction: &BasicInst<AnyTarget>, mut cb: impl FnMut(ExtRef)) {
6826    let (data_target, code_or_data_target) = instruction.target();
6827    if let Some(target) = data_target {
6828        cb(ExtRef::DataAddress(target.section_index));
6829    }
6830
6831    if let Some(target) = code_or_data_target {
6832        match target {
6833            AnyTarget::Code(target) => {
6834                cb(ExtRef::Address(target));
6835            }
6836            AnyTarget::Data(target) => {
6837                cb(ExtRef::DataAddress(target.section_index));
6838            }
6839        }
6840    }
6841}
6842
6843fn each_reference_for_control_instruction(instruction: &ControlInst<BlockTarget>, mut cb: impl FnMut(ExtRef)) {
6844    match *instruction {
6845        ControlInst::Jump { target } => {
6846            cb(ExtRef::Jump(target));
6847        }
6848        ControlInst::Call { target, target_return, .. } => {
6849            cb(ExtRef::Jump(target));
6850            cb(ExtRef::Address(target_return));
6851        }
6852        ControlInst::CallIndirect { target_return, .. } => {
6853            cb(ExtRef::Address(target_return));
6854        }
6855        ControlInst::Branch {
6856            target_true, target_false, ..
6857        } => {
6858            cb(ExtRef::Jump(target_true));
6859            cb(ExtRef::Jump(target_false));
6860        }
6861        ControlInst::JumpIndirect { .. } | ControlInst::Unimplemented => {}
6862    }
6863}
6864
6865fn each_reference(block: &BasicBlock<AnyTarget, BlockTarget>, mut cb: impl FnMut(ExtRef)) {
6866    for (_, instruction) in &block.ops {
6867        each_reference_for_basic_instruction(instruction, &mut cb);
6868    }
6869
6870    each_reference_for_control_instruction(&block.next.instruction, cb);
6871}
6872
6873fn calculate_reachability(
6874    section_to_block: &HashMap<SectionTarget, BlockTarget>,
6875    all_blocks: &[BasicBlock<AnyTarget, BlockTarget>],
6876    data_sections_set: &HashSet<SectionIndex>,
6877    exports: &[Export],
6878    relocations: &BTreeMap<SectionTarget, RelocationKind>,
6879) -> Result<ReachabilityGraph, ProgramFromElfError> {
6880    let mut graph = ReachabilityGraph::default();
6881    let mut data_queue: VecSet<SectionTarget> = VecSet::new();
6882    let mut block_queue: VecSet<BlockTarget> = VecSet::new();
6883    let mut section_queue: VecSet<SectionIndex> = VecSet::new();
6884    let mut relocations_per_section: HashMap<SectionIndex, Vec<&RelocationKind>> = HashMap::new();
6885    for (relocation_location, relocation) in relocations.iter() {
6886        relocations_per_section
6887            .entry(relocation_location.section_index)
6888            .or_insert_with(Vec::new)
6889            .push(relocation);
6890    }
6891
6892    for (export_index, export) in exports.iter().enumerate() {
6893        let Some(&block_target) = section_to_block.get(&export.location) else {
6894            return Err(ProgramFromElfError::other("export points to a non-block"));
6895        };
6896
6897        graph.for_code.entry(block_target).or_default().exports.push(export_index);
6898        block_queue.push(block_target);
6899    }
6900
6901    while !block_queue.is_empty() || !data_queue.is_empty() {
6902        while let Some(current_block) = block_queue.pop_unique() {
6903            each_reference(&all_blocks[current_block.index()], |ext| match ext {
6904                ExtRef::Jump(target) => {
6905                    graph.for_code.entry(target).or_default().reachable_from.insert(current_block);
6906                    block_queue.push(target);
6907                }
6908                ExtRef::Address(target) => {
6909                    graph.for_code.entry(target).or_default().address_taken_in.insert(current_block);
6910                    block_queue.push(target)
6911                }
6912                ExtRef::DataAddress(target) => {
6913                    graph.for_data.entry(target).or_default().address_taken_in.insert(current_block);
6914                    section_queue.push(target)
6915                }
6916            });
6917        }
6918
6919        while let Some(target) = data_queue.pop_unique() {
6920            assert!(!section_to_block.contains_key(&target));
6921            assert!(data_sections_set.contains(&target.section_index));
6922            section_queue.push(target.section_index);
6923        }
6924
6925        while let Some(section_index) = section_queue.pop_unique() {
6926            let Some(local_relocations) = relocations_per_section.get(&section_index) else {
6927                continue;
6928            };
6929            for relocation in local_relocations {
6930                for relocation_target in relocation.targets().into_iter().flatten() {
6931                    if let Some(&block_target) = section_to_block.get(&relocation_target) {
6932                        graph
6933                            .code_references_in_data_section
6934                            .entry(section_index)
6935                            .or_default()
6936                            .push(block_target);
6937
6938                        graph
6939                            .for_code
6940                            .entry(block_target)
6941                            .or_default()
6942                            .referenced_by_data
6943                            .insert(section_index);
6944
6945                        block_queue.push(block_target);
6946                    } else {
6947                        graph
6948                            .data_references_in_data_section
6949                            .entry(section_index)
6950                            .or_default()
6951                            .push(relocation_target.section_index);
6952
6953                        graph
6954                            .for_data
6955                            .entry(relocation_target.section_index)
6956                            .or_default()
6957                            .referenced_by_data
6958                            .insert(section_index);
6959
6960                        data_queue.push(relocation_target);
6961                    }
6962                }
6963            }
6964        }
6965    }
6966
6967    for list in graph.code_references_in_data_section.values_mut() {
6968        list.sort_unstable();
6969        list.dedup();
6970    }
6971
6972    for list in graph.data_references_in_data_section.values_mut() {
6973        list.sort_unstable();
6974        list.dedup();
6975    }
6976
6977    for reachability in graph.for_code.values() {
6978        assert!(!reachability.is_unreachable());
6979    }
6980
6981    for reachability in graph.for_data.values() {
6982        assert!(!reachability.is_unreachable());
6983    }
6984
6985    assert_eq!(block_queue.set.len(), graph.for_code.len());
6986    Ok(graph)
6987}
6988
6989#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Default)]
6990struct RegMask(u32);
6991
6992impl core::fmt::Debug for RegMask {
6993    fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
6994        fmt.write_str("(")?;
6995        let mut is_first = true;
6996        for (nth, reg) in Reg::ALL.iter().enumerate() {
6997            if self.0 & (1 << nth) != 0 {
6998                if is_first {
6999                    is_first = false;
7000                } else {
7001                    fmt.write_str("|")?;
7002                }
7003                fmt.write_str(reg.name())?;
7004            }
7005        }
7006        fmt.write_str(")")?;
7007        Ok(())
7008    }
7009}
7010
7011struct RegMaskIter {
7012    mask: u32,
7013    remaining: &'static [Reg],
7014}
7015
7016impl Iterator for RegMaskIter {
7017    type Item = Reg;
7018    fn next(&mut self) -> Option<Self::Item> {
7019        loop {
7020            let reg = *self.remaining.get(0)?;
7021            let is_set = (self.mask & 1) != 0;
7022            self.remaining = &self.remaining[1..];
7023            self.mask >>= 1;
7024
7025            if is_set {
7026                return Some(reg);
7027            }
7028        }
7029    }
7030}
7031
7032impl IntoIterator for RegMask {
7033    type Item = Reg;
7034    type IntoIter = RegMaskIter;
7035
7036    fn into_iter(self) -> Self::IntoIter {
7037        RegMaskIter {
7038            mask: self.0,
7039            remaining: &Reg::ALL,
7040        }
7041    }
7042}
7043
7044impl RegMask {
7045    fn all() -> Self {
7046        RegMask((1 << Reg::ALL.len()) - 1)
7047    }
7048
7049    fn fake() -> Self {
7050        let mut mask = RegMask(0);
7051        for reg in Reg::FAKE {
7052            mask.insert(reg);
7053        }
7054        mask
7055    }
7056
7057    fn empty() -> Self {
7058        RegMask(0)
7059    }
7060
7061    fn is_empty(self) -> bool {
7062        self == Self::empty()
7063    }
7064
7065    fn remove(&mut self, mask: impl Into<RegMask>) {
7066        *self &= !mask.into();
7067    }
7068
7069    fn insert(&mut self, mask: impl Into<RegMask>) {
7070        *self |= mask.into();
7071    }
7072}
7073
7074impl From<Reg> for RegMask {
7075    fn from(reg: Reg) -> Self {
7076        RegMask(1 << (reg as usize))
7077    }
7078}
7079
7080impl From<RegImm> for RegMask {
7081    fn from(rm: RegImm) -> Self {
7082        match rm {
7083            RegImm::Reg(reg) => reg.into(),
7084            RegImm::Imm(_) => Self::empty(),
7085        }
7086    }
7087}
7088
7089impl core::ops::Not for RegMask {
7090    type Output = Self;
7091    fn not(self) -> Self {
7092        RegMask(!self.0)
7093    }
7094}
7095
7096impl core::ops::BitAnd for RegMask {
7097    type Output = Self;
7098    fn bitand(self, rhs: RegMask) -> Self {
7099        RegMask(self.0 & rhs.0)
7100    }
7101}
7102
7103impl core::ops::BitAnd<Reg> for RegMask {
7104    type Output = Self;
7105    fn bitand(self, rhs: Reg) -> Self {
7106        self & RegMask::from(rhs)
7107    }
7108}
7109
7110impl core::ops::BitAndAssign for RegMask {
7111    fn bitand_assign(&mut self, rhs: RegMask) {
7112        self.0 &= rhs.0;
7113    }
7114}
7115
7116impl core::ops::BitAndAssign<Reg> for RegMask {
7117    fn bitand_assign(&mut self, rhs: Reg) {
7118        self.bitand_assign(RegMask::from(rhs));
7119    }
7120}
7121
7122impl core::ops::BitOr for RegMask {
7123    type Output = Self;
7124    fn bitor(self, rhs: RegMask) -> Self {
7125        RegMask(self.0 | rhs.0)
7126    }
7127}
7128
7129impl core::ops::BitOr<Reg> for RegMask {
7130    type Output = Self;
7131    fn bitor(self, rhs: Reg) -> Self {
7132        self | RegMask::from(rhs)
7133    }
7134}
7135
7136impl core::ops::BitOrAssign for RegMask {
7137    fn bitor_assign(&mut self, rhs: RegMask) {
7138        self.0 |= rhs.0;
7139    }
7140}
7141
7142impl core::ops::BitOrAssign<Reg> for RegMask {
7143    fn bitor_assign(&mut self, rhs: Reg) {
7144        self.bitor_assign(RegMask::from(rhs));
7145    }
7146}
7147
7148#[test]
7149fn test_all_regs_indexes() {
7150    for (index, reg) in Reg::ALL.iter().enumerate() {
7151        assert_eq!(index, *reg as usize);
7152    }
7153}
7154
7155#[derive(Copy, Clone)]
7156struct JumpTarget {
7157    static_target: u32,
7158    dynamic_target: Option<u32>,
7159}
7160
7161fn build_jump_table(
7162    total_block_count: usize,
7163    used_blocks: &[BlockTarget],
7164    reachability_graph: &ReachabilityGraph,
7165) -> (Vec<u32>, Vec<Option<JumpTarget>>) {
7166    let mut jump_target_for_block: Vec<Option<JumpTarget>> = Vec::new();
7167    jump_target_for_block.resize(total_block_count, None);
7168
7169    let mut jump_table = Vec::new();
7170    for (static_target, current) in used_blocks.iter().enumerate() {
7171        let reachability = reachability_graph.for_code.get(current).unwrap();
7172        assert!(!reachability.is_unreachable());
7173
7174        let dynamic_target = if reachability.is_dynamically_reachable() {
7175            let dynamic_target: u32 = (jump_table.len() + 1).try_into().expect("jump table index overflow");
7176            jump_table.push(static_target.try_into().expect("jump table index overflow"));
7177            Some(dynamic_target)
7178        } else {
7179            None
7180        };
7181
7182        jump_target_for_block[current.index()] = Some(JumpTarget {
7183            static_target: static_target.try_into().expect("jump table index overflow"),
7184            dynamic_target,
7185        });
7186    }
7187
7188    (jump_table, jump_target_for_block)
7189}
7190
7191fn calculate_whether_can_fallthrough(
7192    all_blocks: &[BasicBlock<AnyTarget, BlockTarget>],
7193    used_blocks: &[BlockTarget],
7194) -> HashSet<BlockTarget> {
7195    let mut can_fallthrough_to_next_block: HashSet<BlockTarget> = HashSet::new();
7196    for window in used_blocks.windows(2) {
7197        match all_blocks[window[0].index()].next.instruction {
7198            ControlInst::Jump { target }
7199            | ControlInst::Branch { target_false: target, .. }
7200            | ControlInst::Call { target_return: target, .. }
7201            | ControlInst::CallIndirect { target_return: target, .. } => {
7202                if target == window[1] {
7203                    can_fallthrough_to_next_block.insert(window[0]);
7204                }
7205            }
7206
7207            ControlInst::JumpIndirect { .. } | ControlInst::Unimplemented => {}
7208        }
7209    }
7210
7211    can_fallthrough_to_next_block
7212}
7213
7214#[allow(clippy::too_many_arguments)]
7215fn emit_code(
7216    section_to_function_name: &BTreeMap<SectionTarget, &str>,
7217    imports: &[Import],
7218    base_address_for_section: &HashMap<SectionIndex, u64>,
7219    section_got: SectionIndex,
7220    target_to_got_offset: &HashMap<AnyTarget, u64>,
7221    all_blocks: &[BasicBlock<AnyTarget, BlockTarget>],
7222    used_blocks: &[BlockTarget],
7223    used_imports: &HashSet<usize>,
7224    jump_target_for_block: &[Option<JumpTarget>],
7225    is_optimized: bool,
7226    is_rv64: bool,
7227    heap_base: u32,
7228) -> Result<Vec<(SourceStack, Instruction)>, ProgramFromElfError> {
7229    use polkavm_common::program::Reg as PReg;
7230    fn conv_reg(reg: Reg) -> polkavm_common::program::RawReg {
7231        match reg {
7232            Reg::RA => PReg::RA,
7233            Reg::SP => PReg::SP,
7234            Reg::T0 => PReg::T0,
7235            Reg::T1 => PReg::T1,
7236            Reg::T2 => PReg::T2,
7237            Reg::S0 => PReg::S0,
7238            Reg::S1 => PReg::S1,
7239            Reg::A0 => PReg::A0,
7240            Reg::A1 => PReg::A1,
7241            Reg::A2 => PReg::A2,
7242            Reg::A3 => PReg::A3,
7243            Reg::A4 => PReg::A4,
7244            Reg::A5 => PReg::A5,
7245            Reg::E0 | Reg::E1 | Reg::E2 | Reg::E3 => {
7246                unreachable!("internal error: temporary register was not spilled into memory");
7247            }
7248        }
7249        .into()
7250    }
7251
7252    let can_fallthrough_to_next_block = calculate_whether_can_fallthrough(all_blocks, used_blocks);
7253    let get_data_address = |source: &SourceStack, target: SectionTarget| -> Result<u32, ProgramFromElfError> {
7254        if let Some(&base_address) = base_address_for_section.get(&target.section_index) {
7255            let Some(address) = base_address.checked_add(target.offset) else {
7256                return Err(ProgramFromElfError::other(format!(
7257                    "address overflow when relocating instruction in {}",
7258                    source.display(section_to_function_name)
7259                )));
7260            };
7261
7262            let Ok(address) = address.try_into() else {
7263                return Err(ProgramFromElfError::other("address overflow when casting"));
7264            };
7265
7266            Ok(address)
7267        } else {
7268            Err(ProgramFromElfError::other("internal error: section with no base address"))
7269        }
7270    };
7271
7272    let get_jump_target = |target: BlockTarget| -> Result<JumpTarget, ProgramFromElfError> {
7273        let Some(jump_target) = jump_target_for_block[target.index()] else {
7274            return Err(ProgramFromElfError::other("out of range jump target"));
7275        };
7276
7277        Ok(jump_target)
7278    };
7279
7280    let mut basic_block_delimited = true;
7281    let mut code: Vec<(SourceStack, Instruction)> = Vec::new();
7282    for block_target in used_blocks {
7283        let block = &all_blocks[block_target.index()];
7284
7285        if !basic_block_delimited {
7286            basic_block_delimited = true;
7287            code.push((
7288                Source {
7289                    section_index: block.source.section_index,
7290                    offset_range: (block.source.offset_range.start..block.source.offset_range.start + 4).into(),
7291                }
7292                .into(),
7293                Instruction::fallthrough,
7294            ));
7295        }
7296
7297        macro_rules! codegen {
7298            (
7299                args = $args:tt,
7300                kind = $kind:expr,
7301
7302                {
7303                    $($p:pat => $inst:ident,)+
7304                }
7305            ) => {
7306                match $kind {
7307                    $(
7308                        $p => Instruction::$inst $args
7309                    ),+
7310                }
7311            }
7312        }
7313
7314        for (source, op) in &block.ops {
7315            let op = match *op {
7316                BasicInst::LoadImmediate { dst, imm } => Instruction::load_imm(conv_reg(dst), cast(imm).to_unsigned()),
7317                BasicInst::LoadImmediate64 { dst, imm } => {
7318                    if !is_rv64 {
7319                        unreachable!("internal error: load_imm64 found when processing 32-bit binary")
7320                    } else {
7321                        Instruction::load_imm64(conv_reg(dst), cast(imm).to_unsigned())
7322                    }
7323                }
7324                BasicInst::LoadHeapBase { dst } => Instruction::load_imm(conv_reg(dst), heap_base),
7325                BasicInst::LoadAbsolute { kind, dst, target } => {
7326                    codegen! {
7327                        args = (conv_reg(dst), get_data_address(source, target)?),
7328                        kind = kind,
7329                        {
7330                            LoadKind::I8 => load_i8,
7331                            LoadKind::I16 => load_i16,
7332                            LoadKind::I32 => load_i32,
7333                            LoadKind::U8 => load_u8,
7334                            LoadKind::U16 => load_u16,
7335                            LoadKind::U32 => load_u32,
7336                            LoadKind::U64 => load_u64,
7337                        }
7338                    }
7339                }
7340                BasicInst::StoreAbsolute { kind, src, target } => {
7341                    let target = get_data_address(source, target)?;
7342                    match src {
7343                        RegImm::Reg(src) => {
7344                            codegen! {
7345                                args = (conv_reg(src), target),
7346                                kind = kind,
7347                                {
7348                                    StoreKind::U64 => store_u64,
7349                                    StoreKind::U32 => store_u32,
7350                                    StoreKind::U16 => store_u16,
7351                                    StoreKind::U8 => store_u8,
7352                                }
7353                            }
7354                        }
7355                        RegImm::Imm(value) => {
7356                            codegen! {
7357                                args = (target, cast(value).to_unsigned()),
7358                                kind = kind,
7359                                {
7360                                    StoreKind::U64 => store_imm_u64,
7361                                    StoreKind::U32 => store_imm_u32,
7362                                    StoreKind::U16 => store_imm_u16,
7363                                    StoreKind::U8 => store_imm_u8,
7364                                }
7365                            }
7366                        }
7367                    }
7368                }
7369                BasicInst::LoadIndirect { kind, dst, base, offset } => {
7370                    codegen! {
7371                        args = (conv_reg(dst), conv_reg(base), cast(offset).to_unsigned()),
7372                        kind = kind,
7373                        {
7374                            LoadKind::I8 => load_indirect_i8,
7375                            LoadKind::I16 => load_indirect_i16,
7376                            LoadKind::I32 => load_indirect_i32,
7377                            LoadKind::U8 => load_indirect_u8,
7378                            LoadKind::U16 => load_indirect_u16,
7379                            LoadKind::U32 => load_indirect_u32,
7380                            LoadKind::U64 => load_indirect_u64,
7381                        }
7382                    }
7383                }
7384                BasicInst::StoreIndirect { kind, src, base, offset } => match src {
7385                    RegImm::Reg(src) => {
7386                        codegen! {
7387                            args = (conv_reg(src), conv_reg(base), cast(offset).to_unsigned()),
7388                            kind = kind,
7389                            {
7390                                StoreKind::U64 => store_indirect_u64,
7391                                StoreKind::U32 => store_indirect_u32,
7392                                StoreKind::U16 => store_indirect_u16,
7393                                StoreKind::U8 => store_indirect_u8,
7394                            }
7395                        }
7396                    }
7397                    RegImm::Imm(value) => {
7398                        codegen! {
7399                            args = (conv_reg(base), cast(offset).to_unsigned(), cast(value).to_unsigned()),
7400                            kind = kind,
7401                            {
7402                                StoreKind::U64 => store_imm_indirect_u64,
7403                                StoreKind::U32 => store_imm_indirect_u32,
7404                                StoreKind::U16 => store_imm_indirect_u16,
7405                                StoreKind::U8 => store_imm_indirect_u8,
7406                            }
7407                        }
7408                    }
7409                },
7410                BasicInst::LoadAddress { dst, target } => {
7411                    let value = match target {
7412                        AnyTarget::Code(target) => {
7413                            let value = get_jump_target(target)?.dynamic_target.expect("missing jump target for address");
7414                            let Some(value) = value.checked_mul(VM_CODE_ADDRESS_ALIGNMENT) else {
7415                                return Err(ProgramFromElfError::other("overflow when emitting an address load"));
7416                            };
7417                            value
7418                        }
7419                        AnyTarget::Data(target) => get_data_address(source, target)?,
7420                    };
7421
7422                    Instruction::load_imm(conv_reg(dst), value)
7423                }
7424                BasicInst::LoadAddressIndirect { dst, target } => {
7425                    let Some(&offset) = target_to_got_offset.get(&target) else {
7426                        return Err(ProgramFromElfError::other(
7427                            "indirect address load without a corresponding GOT entry",
7428                        ));
7429                    };
7430
7431                    let target = SectionTarget {
7432                        section_index: section_got,
7433                        offset,
7434                    };
7435
7436                    let value = get_data_address(source, target)?;
7437                    if is_rv64 {
7438                        Instruction::load_u64(conv_reg(dst), value)
7439                    } else {
7440                        Instruction::load_i32(conv_reg(dst), value)
7441                    }
7442                }
7443                BasicInst::Reg { kind, dst, src } => {
7444                    codegen! {
7445                        args = (conv_reg(dst), conv_reg(src)),
7446                        kind = kind,
7447                        {
7448                            RegKind::CountLeadingZeroBits32 => count_leading_zero_bits_32,
7449                            RegKind::CountLeadingZeroBits64 => count_leading_zero_bits_64,
7450                            RegKind::CountSetBits32 => count_set_bits_32,
7451                            RegKind::CountSetBits64 => count_set_bits_64,
7452                            RegKind::CountTrailingZeroBits32 => count_trailing_zero_bits_32,
7453                            RegKind::CountTrailingZeroBits64 => count_trailing_zero_bits_64,
7454                            RegKind::ReverseByte => reverse_byte,
7455                            RegKind::SignExtend8 => sign_extend_8,
7456                            RegKind::SignExtend16 => sign_extend_16,
7457                            RegKind::ZeroExtend16 => zero_extend_16,
7458                        }
7459                    }
7460                }
7461                BasicInst::RegReg { kind, dst, src1, src2 } => {
7462                    use RegRegKind as K;
7463                    codegen! {
7464                        args = (conv_reg(dst), conv_reg(src1), conv_reg(src2)),
7465                        kind = kind,
7466                        {
7467                            K::MulUpperSignedSigned32 => mul_upper_signed_signed,
7468                            K::MulUpperSignedSigned64 => mul_upper_signed_signed,
7469                            K::MulUpperUnsignedUnsigned32 => mul_upper_unsigned_unsigned,
7470                            K::MulUpperUnsignedUnsigned64 => mul_upper_unsigned_unsigned,
7471                            K::MulUpperSignedUnsigned32 => mul_upper_signed_unsigned,
7472                            K::MulUpperSignedUnsigned64 => mul_upper_signed_unsigned,
7473                            K::Div32 => div_signed_32,
7474                            K::Div32AndSignExtend => div_signed_32,
7475                            K::Div64 => div_signed_64,
7476                            K::DivUnsigned32 => div_unsigned_32,
7477                            K::DivUnsigned32AndSignExtend => div_unsigned_32,
7478                            K::DivUnsigned64 => div_unsigned_64,
7479                            K::Rem32 => rem_signed_32,
7480                            K::Rem32AndSignExtend => rem_signed_32,
7481                            K::Rem64 => rem_signed_64,
7482                            K::RemUnsigned32 => rem_unsigned_32,
7483                            K::RemUnsigned32AndSignExtend => rem_unsigned_32,
7484                            K::RemUnsigned64 => rem_unsigned_64,
7485                            K::AndInverted => and_inverted,
7486                            K::OrInverted => or_inverted,
7487                            K::Xnor => xnor,
7488                            K::Maximum => maximum,
7489                            K::MaximumUnsigned => maximum_unsigned,
7490                            K::Minimum => minimum,
7491                            K::MinimumUnsigned => minimum_unsigned,
7492                            K::RotateLeft32 => rotate_left_32,
7493                            K::RotateLeft32AndSignExtend => rotate_left_32,
7494                            K::RotateLeft64 => rotate_left_64,
7495                        }
7496                    }
7497                }
7498                BasicInst::MoveReg { dst, src } => Instruction::move_reg(conv_reg(dst), conv_reg(src)),
7499                BasicInst::AnyAny { kind, dst, src1, src2 } => {
7500                    use AnyAnyKind as K;
7501                    use Instruction as I;
7502                    let dst = conv_reg(dst);
7503                    match (src1, src2) {
7504                        (RegImm::Reg(src1), RegImm::Reg(src2)) => {
7505                            codegen! {
7506                                args = (dst, conv_reg(src1), conv_reg(src2)),
7507                                kind = kind,
7508                                {
7509                                    K::Add32 => add_32,
7510                                    K::Add32AndSignExtend => add_32,
7511                                    K::Add64 => add_64,
7512                                    K::Sub32 => sub_32,
7513                                    K::Sub32AndSignExtend => sub_32,
7514                                    K::Sub64 => sub_64,
7515                                    K::ShiftLogicalLeft32 => shift_logical_left_32,
7516                                    K::ShiftLogicalLeft32AndSignExtend => shift_logical_left_32,
7517                                    K::ShiftLogicalLeft64 => shift_logical_left_64,
7518                                    K::SetLessThanSigned32 => set_less_than_signed,
7519                                    K::SetLessThanSigned64 => set_less_than_signed,
7520                                    K::SetLessThanUnsigned32 => set_less_than_unsigned,
7521                                    K::SetLessThanUnsigned64 => set_less_than_unsigned,
7522                                    K::Xor32 => xor,
7523                                    K::Xor64 => xor,
7524                                    K::ShiftLogicalRight32 => shift_logical_right_32,
7525                                    K::ShiftLogicalRight32AndSignExtend => shift_logical_right_32,
7526                                    K::ShiftLogicalRight64 => shift_logical_right_64,
7527                                    K::ShiftArithmeticRight32 => shift_arithmetic_right_32,
7528                                    K::ShiftArithmeticRight32AndSignExtend => shift_arithmetic_right_32,
7529                                    K::ShiftArithmeticRight64 => shift_arithmetic_right_64,
7530                                    K::Or32 => or,
7531                                    K::Or64 => or,
7532                                    K::And32 => and,
7533                                    K::And64 => and,
7534                                    K::Mul32 => mul_32,
7535                                    K::Mul32AndSignExtend => mul_32,
7536                                    K::Mul64 => mul_64,
7537                                    K::RotateRight32 => rotate_right_32,
7538                                    K::RotateRight32AndSignExtend => rotate_right_32,
7539                                    K::RotateRight64 => rotate_right_64,
7540                                }
7541                            }
7542                        }
7543                        (RegImm::Reg(src1), RegImm::Imm(src2)) => {
7544                            let src1 = conv_reg(src1);
7545                            let src2 = cast(src2).to_unsigned();
7546                            match kind {
7547                                K::Add32 => I::add_imm_32(dst, src1, src2),
7548                                K::Add32AndSignExtend => I::add_imm_32(dst, src1, src2),
7549                                K::Add64 => I::add_imm_64(dst, src1, src2),
7550                                K::Sub32 => I::add_imm_32(dst, src1, cast(-cast(src2).to_signed()).to_unsigned()),
7551                                K::Sub32AndSignExtend => I::add_imm_32(dst, src1, cast(-cast(src2).to_signed()).to_unsigned()),
7552                                K::Sub64 => I::add_imm_64(dst, src1, cast(-cast(src2).to_signed()).to_unsigned()),
7553                                K::ShiftLogicalLeft32 => I::shift_logical_left_imm_32(dst, src1, src2),
7554                                K::ShiftLogicalLeft32AndSignExtend => I::shift_logical_left_imm_32(dst, src1, src2),
7555                                K::ShiftLogicalLeft64 => I::shift_logical_left_imm_64(dst, src1, src2),
7556                                K::SetLessThanSigned32 => I::set_less_than_signed_imm(dst, src1, src2),
7557                                K::SetLessThanSigned64 => I::set_less_than_signed_imm(dst, src1, src2),
7558                                K::SetLessThanUnsigned32 => I::set_less_than_unsigned_imm(dst, src1, src2),
7559                                K::SetLessThanUnsigned64 => I::set_less_than_unsigned_imm(dst, src1, src2),
7560                                K::Xor32 | K::Xor64 => I::xor_imm(dst, src1, src2),
7561                                K::ShiftLogicalRight32 => I::shift_logical_right_imm_32(dst, src1, src2),
7562                                K::ShiftLogicalRight32AndSignExtend => I::shift_logical_right_imm_32(dst, src1, src2),
7563                                K::ShiftLogicalRight64 => I::shift_logical_right_imm_64(dst, src1, src2),
7564                                K::ShiftArithmeticRight32 => I::shift_arithmetic_right_imm_32(dst, src1, src2),
7565                                K::ShiftArithmeticRight32AndSignExtend => I::shift_arithmetic_right_imm_32(dst, src1, src2),
7566                                K::ShiftArithmeticRight64 => I::shift_arithmetic_right_imm_64(dst, src1, src2),
7567                                K::Or32 | K::Or64 => I::or_imm(dst, src1, src2),
7568                                K::And32 | K::And64 => I::and_imm(dst, src1, src2),
7569                                K::Mul32 => I::mul_imm_32(dst, src1, src2),
7570                                K::Mul32AndSignExtend => I::mul_imm_32(dst, src1, src2),
7571                                K::Mul64 => I::mul_imm_64(dst, src1, src2),
7572                                K::RotateRight32 => I::rotate_right_imm_32(dst, src1, src2),
7573                                K::RotateRight32AndSignExtend => I::rotate_right_imm_32(dst, src1, src2),
7574                                K::RotateRight64 => I::rotate_right_imm_64(dst, src1, src2),
7575                            }
7576                        }
7577                        (RegImm::Imm(src1), RegImm::Reg(src2)) => {
7578                            let src1 = cast(src1).to_unsigned();
7579                            let src2 = conv_reg(src2);
7580                            match kind {
7581                                K::Add32 => I::add_imm_32(dst, src2, src1),
7582                                K::Add32AndSignExtend => I::add_imm_32(dst, src2, src1),
7583                                K::Add64 => I::add_imm_64(dst, src2, src1),
7584                                K::Xor32 | K::Xor64 => I::xor_imm(dst, src2, src1),
7585                                K::Or32 | K::Or64 => I::or_imm(dst, src2, src1),
7586                                K::And32 | K::And64 => I::and_imm(dst, src2, src1),
7587                                K::Mul32 => I::mul_imm_32(dst, src2, src1),
7588                                K::Mul32AndSignExtend => I::mul_imm_32(dst, src2, src1),
7589                                K::Mul64 => I::mul_imm_64(dst, src2, src1),
7590
7591                                K::Sub32 => I::negate_and_add_imm_32(dst, src2, src1),
7592                                K::Sub32AndSignExtend => I::negate_and_add_imm_32(dst, src2, src1),
7593                                K::Sub64 => I::negate_and_add_imm_64(dst, src2, src1),
7594                                K::ShiftLogicalLeft32 => I::shift_logical_left_imm_alt_32(dst, src2, src1),
7595                                K::ShiftLogicalLeft32AndSignExtend => I::shift_logical_left_imm_alt_32(dst, src2, src1),
7596                                K::ShiftLogicalLeft64 => I::shift_logical_left_imm_alt_64(dst, src2, src1),
7597                                K::SetLessThanSigned32 => I::set_greater_than_signed_imm(dst, src2, src1),
7598                                K::SetLessThanSigned64 => I::set_greater_than_signed_imm(dst, src2, src1),
7599                                K::SetLessThanUnsigned32 => I::set_greater_than_unsigned_imm(dst, src2, src1),
7600                                K::SetLessThanUnsigned64 => I::set_greater_than_unsigned_imm(dst, src2, src1),
7601                                K::ShiftLogicalRight32 => I::shift_logical_right_imm_alt_32(dst, src2, src1),
7602                                K::ShiftLogicalRight32AndSignExtend => I::shift_logical_right_imm_alt_32(dst, src2, src1),
7603                                K::ShiftLogicalRight64 => I::shift_logical_right_imm_alt_64(dst, src2, src1),
7604                                K::ShiftArithmeticRight32 => I::shift_arithmetic_right_imm_alt_32(dst, src2, src1),
7605                                K::ShiftArithmeticRight32AndSignExtend => I::shift_arithmetic_right_imm_alt_32(dst, src2, src1),
7606                                K::ShiftArithmeticRight64 => I::shift_arithmetic_right_imm_alt_64(dst, src2, src1),
7607
7608                                K::RotateRight32 => I::rotate_right_imm_alt_32(dst, src2, src1),
7609                                K::RotateRight32AndSignExtend => I::rotate_right_imm_alt_32(dst, src2, src1),
7610                                K::RotateRight64 => I::rotate_right_imm_alt_64(dst, src2, src1),
7611                            }
7612                        }
7613                        (RegImm::Imm(src1), RegImm::Imm(src2)) => {
7614                            if is_optimized {
7615                                unreachable!("internal error: instruction with only constant operands: {op:?}")
7616                            } else {
7617                                let imm: u32 = OperationKind::from(kind)
7618                                    .apply_const(cast(src1).to_i64_sign_extend(), cast(src2).to_i64_sign_extend())
7619                                    .try_into()
7620                                    .expect("load immediate overflow");
7621                                I::load_imm(dst, imm)
7622                            }
7623                        }
7624                    }
7625                }
7626                BasicInst::Cmov { kind, dst, src, cond } => match src {
7627                    RegImm::Reg(src) => {
7628                        codegen! {
7629                            args = (conv_reg(dst), conv_reg(src), conv_reg(cond)),
7630                            kind = kind,
7631                            {
7632                                CmovKind::EqZero => cmov_if_zero,
7633                                CmovKind::NotEqZero => cmov_if_not_zero,
7634                            }
7635                        }
7636                    }
7637                    RegImm::Imm(imm) => {
7638                        codegen! {
7639                            args = (conv_reg(dst), conv_reg(cond), cast(imm).to_unsigned()),
7640                            kind = kind,
7641                            {
7642                                CmovKind::EqZero => cmov_if_zero_imm,
7643                                CmovKind::NotEqZero => cmov_if_not_zero_imm,
7644                            }
7645                        }
7646                    }
7647                },
7648                BasicInst::Ecalli { nth_import } => {
7649                    assert!(used_imports.contains(&nth_import));
7650                    let import = &imports[nth_import];
7651                    Instruction::ecalli(import.metadata.index.expect("internal error: no index was assigned to an ecall"))
7652                }
7653                BasicInst::Sbrk { dst, size } => Instruction::sbrk(conv_reg(dst), conv_reg(size)),
7654                BasicInst::Memset => Instruction::memset,
7655                BasicInst::Nop => unreachable!("internal error: a nop instruction was not removed"),
7656            };
7657
7658            code.push((source.clone(), op));
7659        }
7660
7661        fn unconditional_jump(target: JumpTarget) -> Instruction {
7662            Instruction::jump(target.static_target)
7663        }
7664
7665        match block.next.instruction {
7666            ControlInst::Jump { target } => {
7667                let target = get_jump_target(target)?;
7668                if can_fallthrough_to_next_block.contains(block_target) {
7669                    assert!(basic_block_delimited);
7670                    basic_block_delimited = false;
7671                } else {
7672                    code.push((block.next.source.clone(), unconditional_jump(target)));
7673                }
7674            }
7675            ControlInst::Call { ra, target, target_return } => {
7676                assert!(can_fallthrough_to_next_block.contains(block_target));
7677
7678                let target = get_jump_target(target)?;
7679                let target_return = get_jump_target(target_return)?
7680                    .dynamic_target
7681                    .expect("missing jump target for address");
7682                let Some(target_return) = target_return.checked_mul(VM_CODE_ADDRESS_ALIGNMENT) else {
7683                    return Err(ProgramFromElfError::other("overflow when emitting an indirect call"));
7684                };
7685
7686                code.push((
7687                    block.next.source.clone(),
7688                    Instruction::load_imm_and_jump(conv_reg(ra), target_return, target.static_target),
7689                ));
7690            }
7691            ControlInst::JumpIndirect { base, offset } => {
7692                if offset != 0 {
7693                    return Err(ProgramFromElfError::other(
7694                        "found an indirect jump with a non-zero offset - this would most likely result in a broken program; aborting",
7695                    ));
7696                }
7697
7698                let Ok(offset) = offset.try_into() else {
7699                    unreachable!("internal error: indirect jump with an out-of-range offset");
7700                };
7701
7702                code.push((block.next.source.clone(), Instruction::jump_indirect(conv_reg(base), offset)));
7703            }
7704            ControlInst::CallIndirect {
7705                ra,
7706                base,
7707                offset,
7708                target_return,
7709            } => {
7710                if offset != 0 {
7711                    return Err(ProgramFromElfError::other(
7712                        "found an indirect call with a non-zero offset - this would most likely result in a broken program; aborting",
7713                    ));
7714                }
7715
7716                assert!(can_fallthrough_to_next_block.contains(block_target));
7717
7718                let target_return = get_jump_target(target_return)?
7719                    .dynamic_target
7720                    .expect("missing jump target for address");
7721                let Some(target_return) = target_return.checked_mul(VM_CODE_ADDRESS_ALIGNMENT) else {
7722                    return Err(ProgramFromElfError::other("overflow when emitting an indirect call"));
7723                };
7724
7725                let Ok(offset) = offset.try_into() else {
7726                    unreachable!("internal error: indirect call with an out-of-range offset");
7727                };
7728
7729                code.push((
7730                    block.next.source.clone(),
7731                    Instruction::load_imm_and_jump_indirect(conv_reg(ra), conv_reg(base), target_return, offset),
7732                ));
7733            }
7734            ControlInst::Branch {
7735                kind,
7736                src1,
7737                src2,
7738                target_true,
7739                target_false,
7740            } => {
7741                assert!(can_fallthrough_to_next_block.contains(block_target));
7742
7743                let target_true = get_jump_target(target_true)?;
7744                get_jump_target(target_false)?;
7745
7746                let instruction = match (src1, src2) {
7747                    (RegImm::Reg(src1), RegImm::Reg(src2)) => {
7748                        codegen! {
7749                            args = (conv_reg(src1), conv_reg(src2), target_true.static_target),
7750                            kind = kind,
7751                            {
7752                                BranchKind::Eq32 | BranchKind::Eq64 => branch_eq,
7753                                BranchKind::NotEq32 | BranchKind::NotEq64 => branch_not_eq,
7754                                BranchKind::GreaterOrEqualUnsigned32 | BranchKind::GreaterOrEqualUnsigned64 => branch_greater_or_equal_unsigned,
7755                                BranchKind::GreaterOrEqualSigned32 | BranchKind::GreaterOrEqualSigned64 => branch_greater_or_equal_signed,
7756                                BranchKind::LessSigned32 | BranchKind::LessSigned64 => branch_less_signed,
7757                                BranchKind::LessUnsigned32 | BranchKind::LessUnsigned64 => branch_less_unsigned,
7758                            }
7759                        }
7760                    }
7761                    (RegImm::Imm(src1), RegImm::Reg(src2)) => {
7762                        codegen! {
7763                            args = (conv_reg(src2), cast(src1).to_unsigned(), target_true.static_target),
7764                            kind = kind,
7765                            {
7766                                BranchKind::Eq32 | BranchKind::Eq64 => branch_eq_imm,
7767                                BranchKind::NotEq32 | BranchKind::NotEq64 => branch_not_eq_imm,
7768                                BranchKind::GreaterOrEqualUnsigned32 | BranchKind::GreaterOrEqualUnsigned64 => branch_less_or_equal_unsigned_imm,
7769                                BranchKind::GreaterOrEqualSigned32 | BranchKind::GreaterOrEqualSigned64 => branch_less_or_equal_signed_imm,
7770                                BranchKind::LessSigned32 | BranchKind::LessSigned64 => branch_greater_signed_imm,
7771                                BranchKind::LessUnsigned32 | BranchKind::LessUnsigned64 => branch_greater_unsigned_imm,
7772                            }
7773                        }
7774                    }
7775                    (RegImm::Reg(src1), RegImm::Imm(src2)) => {
7776                        codegen! {
7777                            args = (conv_reg(src1), cast(src2).to_unsigned(), target_true.static_target),
7778                            kind = kind,
7779                            {
7780                                BranchKind::Eq32 | BranchKind::Eq64 => branch_eq_imm,
7781                                BranchKind::NotEq32 | BranchKind::NotEq64 => branch_not_eq_imm,
7782                                BranchKind::LessSigned32 | BranchKind::LessSigned64 => branch_less_signed_imm,
7783                                BranchKind::LessUnsigned32 | BranchKind::LessUnsigned64 => branch_less_unsigned_imm,
7784                                BranchKind::GreaterOrEqualSigned32 | BranchKind::GreaterOrEqualSigned64 => branch_greater_or_equal_signed_imm,
7785                                BranchKind::GreaterOrEqualUnsigned32 | BranchKind::GreaterOrEqualUnsigned64 => branch_greater_or_equal_unsigned_imm,
7786                            }
7787                        }
7788                    }
7789                    (RegImm::Imm(src1), RegImm::Imm(src2)) => {
7790                        if is_optimized {
7791                            unreachable!("internal error: branch with only constant operands")
7792                        } else {
7793                            match OperationKind::from(kind).apply_const(cast(src1).to_i64_sign_extend(), cast(src2).to_i64_sign_extend()) {
7794                                1 => unconditional_jump(target_true),
7795                                0 => {
7796                                    assert!(can_fallthrough_to_next_block.contains(block_target));
7797                                    Instruction::fallthrough
7798                                }
7799                                _ => unreachable!(),
7800                            }
7801                        }
7802                    }
7803                };
7804
7805                code.push((block.next.source.clone(), instruction));
7806            }
7807            ControlInst::Unimplemented => {
7808                code.push((block.next.source.clone(), Instruction::trap));
7809            }
7810        }
7811    }
7812
7813    Ok(code)
7814}
7815
7816#[derive(Copy, Clone, PartialEq, Eq, Debug)]
7817enum Bitness {
7818    B32,
7819    B64,
7820}
7821
7822impl Bitness {
7823    fn bits_used_mask(self) -> u64 {
7824        match self {
7825            Bitness::B32 => u64::from(u32::MAX),
7826            Bitness::B64 => u64::MAX,
7827        }
7828    }
7829}
7830
7831impl InstructionSet for Bitness {
7832    fn opcode_from_u8(self, byte: u8) -> Option<Opcode> {
7833        match self {
7834            Bitness::B32 => polkavm_common::program::ISA32_V1.opcode_from_u8(byte),
7835            Bitness::B64 => polkavm_common::program::ISA64_V1.opcode_from_u8(byte),
7836        }
7837    }
7838}
7839
7840impl From<Bitness> for u64 {
7841    fn from(value: Bitness) -> Self {
7842        match value {
7843            Bitness::B32 => 4,
7844            Bitness::B64 => 8,
7845        }
7846    }
7847}
7848
7849impl From<Bitness> for RelocationSize {
7850    fn from(value: Bitness) -> Self {
7851        match value {
7852            Bitness::B32 => RelocationSize::U32,
7853            Bitness::B64 => RelocationSize::U64,
7854        }
7855    }
7856}
7857
7858#[derive(Copy, Clone, PartialEq, Eq, Debug)]
7859pub(crate) enum RelocationSize {
7860    U8,
7861    U16,
7862    U32,
7863    U64,
7864}
7865
7866#[derive(Copy, Clone, Debug)]
7867pub(crate) enum SizeRelocationSize {
7868    SixBits,
7869    Uleb128,
7870    Generic(RelocationSize),
7871}
7872
7873#[derive(Copy, Clone, Debug)]
7874pub(crate) enum RelocationKind {
7875    Abs {
7876        target: SectionTarget,
7877        size: RelocationSize,
7878    },
7879    JumpTable {
7880        target_code: SectionTarget,
7881        target_base: SectionTarget,
7882    },
7883    Offset {
7884        origin: SectionTarget,
7885        target: SectionTarget,
7886        size: SizeRelocationSize,
7887    },
7888}
7889
7890impl RelocationKind {
7891    fn targets(&self) -> [Option<SectionTarget>; 2] {
7892        match self {
7893            RelocationKind::Abs { target, .. } => [Some(*target), None],
7894            RelocationKind::Offset { origin, target, .. } => [Some(*origin), Some(*target)],
7895            RelocationKind::JumpTable { target_code, target_base } => [Some(*target_code), Some(*target_base)],
7896        }
7897    }
7898}
7899
7900fn harvest_data_relocations<H>(
7901    elf: &Elf<H>,
7902    code_sections_set: &HashSet<SectionIndex>,
7903    section: &Section,
7904    relocations: &mut BTreeMap<SectionTarget, RelocationKind>,
7905) -> Result<(), ProgramFromElfError>
7906where
7907    H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
7908{
7909    #[derive(Debug)]
7910    enum MutOp {
7911        Add,
7912        Sub,
7913    }
7914
7915    #[derive(Debug)]
7916    enum Kind {
7917        Set(RelocationKind),
7918        Mut(MutOp, RelocationSize, SectionTarget),
7919
7920        Set6 { target: SectionTarget },
7921        Sub6 { target: SectionTarget },
7922
7923        SetUleb128 { target: SectionTarget },
7924        SubUleb128 { target: SectionTarget },
7925    }
7926
7927    if section.relocations().next().is_none() {
7928        return Ok(());
7929    }
7930
7931    let section_name = section.name();
7932    log::trace!("Harvesting data relocations from section: {}", section_name);
7933
7934    let mut for_address = BTreeMap::new();
7935    for (absolute_address, relocation) in section.relocations() {
7936        let Some(relative_address) = absolute_address.checked_sub(section.original_address()) else {
7937            return Err(ProgramFromElfError::other("invalid data relocation offset"));
7938        };
7939
7940        if relocation.has_implicit_addend() {
7941            // AFAIK these should never be emitted for RISC-V.
7942            return Err(ProgramFromElfError::other(format!("unsupported relocation: {:?}", relocation)));
7943        }
7944
7945        let Some(target) = get_relocation_target(elf, &relocation)? else {
7946            continue;
7947        };
7948
7949        if relocation.flags()
7950            == (object::RelocationFlags::Elf {
7951                r_type: object::elf::R_RISCV_PCREL_HI20,
7952            })
7953            && section_name == ".polkavm_exports"
7954        {
7955            relocations.insert(
7956                SectionTarget {
7957                    section_index: section.index(),
7958                    offset: relative_address,
7959                },
7960                RelocationKind::Abs {
7961                    target,
7962                    size: if elf.is_64() { RelocationSize::U64 } else { RelocationSize::U32 },
7963                },
7964            );
7965
7966            continue;
7967        }
7968
7969        let (relocation_name, kind) = match (relocation.kind(), relocation.flags()) {
7970            (object::RelocationKind::Absolute, _)
7971                if relocation.encoding() == object::RelocationEncoding::Generic && relocation.size() == 32 =>
7972            {
7973                (
7974                    "R_RISCV_32",
7975                    Kind::Set(RelocationKind::Abs {
7976                        target,
7977                        size: RelocationSize::U32,
7978                    }),
7979                )
7980            }
7981            (object::RelocationKind::Absolute, _)
7982                if relocation.encoding() == object::RelocationEncoding::Generic && relocation.size() == 64 =>
7983            {
7984                (
7985                    "R_RISCV_64",
7986                    Kind::Set(RelocationKind::Abs {
7987                        target,
7988                        size: RelocationSize::U64,
7989                    }),
7990                )
7991            }
7992
7993            (_, object::RelocationFlags::Elf { r_type: reloc_kind }) => match reloc_kind {
7994                object::elf::R_RISCV_SET6 => ("R_RISCV_SET6", Kind::Set6 { target }),
7995                object::elf::R_RISCV_SUB6 => ("R_RISCV_SUB6", Kind::Sub6 { target }),
7996                object::elf::R_RISCV_SET8 => (
7997                    "R_RISCV_SET8",
7998                    Kind::Set(RelocationKind::Abs {
7999                        target,
8000                        size: RelocationSize::U8,
8001                    }),
8002                ),
8003                object::elf::R_RISCV_SET16 => (
8004                    "R_RISCV_SET16",
8005                    Kind::Set(RelocationKind::Abs {
8006                        target,
8007                        size: RelocationSize::U16,
8008                    }),
8009                ),
8010                object::elf::R_RISCV_ADD8 => ("R_RISCV_ADD8", Kind::Mut(MutOp::Add, RelocationSize::U8, target)),
8011                object::elf::R_RISCV_SUB8 => ("R_RISCV_SUB8", Kind::Mut(MutOp::Sub, RelocationSize::U8, target)),
8012                object::elf::R_RISCV_ADD16 => ("R_RISCV_ADD16", Kind::Mut(MutOp::Add, RelocationSize::U16, target)),
8013                object::elf::R_RISCV_SUB16 => ("R_RISCV_SUB16", Kind::Mut(MutOp::Sub, RelocationSize::U16, target)),
8014                object::elf::R_RISCV_ADD32 => ("R_RISCV_ADD32", Kind::Mut(MutOp::Add, RelocationSize::U32, target)),
8015                object::elf::R_RISCV_ADD64 => ("R_RISCV_ADD64", Kind::Mut(MutOp::Add, RelocationSize::U64, target)),
8016                object::elf::R_RISCV_SUB32 => ("R_RISCV_SUB32", Kind::Mut(MutOp::Sub, RelocationSize::U32, target)),
8017                object::elf::R_RISCV_SUB64 => ("R_RISCV_SUB64", Kind::Mut(MutOp::Sub, RelocationSize::U64, target)),
8018                object::elf::R_RISCV_SET_ULEB128 => ("R_RISCV_SET_ULEB128", Kind::SetUleb128 { target }),
8019                object::elf::R_RISCV_SUB_ULEB128 => ("R_RISCV_SUB_ULEB128", Kind::SubUleb128 { target }),
8020
8021                _ => {
8022                    return Err(ProgramFromElfError::other(format!(
8023                        "unsupported relocation in data section '{section_name}': {relocation:?}"
8024                    )))
8025                }
8026            },
8027            _ => {
8028                return Err(ProgramFromElfError::other(format!(
8029                    "unsupported relocation in data section '{section_name}': {relocation:?}"
8030                )))
8031            }
8032        };
8033
8034        log::trace!("  {relocation_name}: {section_name}[0x{relative_address:x}] (0x{absolute_address:x}): -> {target}");
8035        for_address
8036            .entry(relative_address)
8037            .or_insert_with(Vec::new)
8038            .push((relocation_name, kind));
8039    }
8040
8041    for (relative_address, list) in for_address {
8042        let current_location = SectionTarget {
8043            section_index: section.index(),
8044            offset: relative_address,
8045        };
8046
8047        struct ErrorToken; // To make sure we don't forget a `continue` anywhere.
8048        let _: ErrorToken = match &*list {
8049            [(_, Kind::Set(kind))] => {
8050                relocations.insert(current_location, *kind);
8051                continue;
8052            }
8053            [(_, Kind::Mut(MutOp::Add, size_1, target_1)), (_, Kind::Mut(MutOp::Sub, size_2, target_2))]
8054                if size_1 == size_2
8055                    && matches!(*size_1, RelocationSize::U32 | RelocationSize::U64)
8056                    && code_sections_set.contains(&target_1.section_index)
8057                    && !code_sections_set.contains(&target_2.section_index) =>
8058            {
8059                if *size_1 == RelocationSize::U64 {
8060                    // We could support this, but I'm not sure if anything ever emits this,
8061                    // so let's return an error for now until somebody complains.
8062                    return Err(ProgramFromElfError::other(
8063                        "internal error: found 64-bit jump table relocation; please report this",
8064                    ));
8065                }
8066
8067                relocations.insert(
8068                    current_location,
8069                    RelocationKind::JumpTable {
8070                        target_code: *target_1,
8071                        target_base: *target_2,
8072                    },
8073                );
8074                continue;
8075            }
8076            [(_, Kind::Mut(MutOp::Add, size_1, target_1)), (_, Kind::Mut(MutOp::Sub, size_2, target_2))] if size_1 == size_2 => {
8077                relocations.insert(
8078                    current_location,
8079                    RelocationKind::Offset {
8080                        origin: *target_2,
8081                        target: *target_1,
8082                        size: SizeRelocationSize::Generic(*size_1),
8083                    },
8084                );
8085                continue;
8086            }
8087            [(
8088                _,
8089                Kind::Set(RelocationKind::Abs {
8090                    target: target_1,
8091                    size: size_1,
8092                }),
8093            ), (_, Kind::Mut(MutOp::Sub, size_2, target_2))]
8094                if size_1 == size_2 =>
8095            {
8096                relocations.insert(
8097                    current_location,
8098                    RelocationKind::Offset {
8099                        origin: *target_2,
8100                        target: *target_1,
8101                        size: SizeRelocationSize::Generic(*size_1),
8102                    },
8103                );
8104                continue;
8105            }
8106            [(_, Kind::Set6 { target: target_1 }), (_, Kind::Sub6 { target: target_2 })] => {
8107                relocations.insert(
8108                    current_location,
8109                    RelocationKind::Offset {
8110                        origin: *target_2,
8111                        target: *target_1,
8112                        size: SizeRelocationSize::SixBits,
8113                    },
8114                );
8115                continue;
8116            }
8117            [(_, Kind::SetUleb128 { target: target_1 }), (_, Kind::SubUleb128 { target: target_2 })] => {
8118                relocations.insert(
8119                    current_location,
8120                    RelocationKind::Offset {
8121                        origin: *target_2,
8122                        target: *target_1,
8123                        size: SizeRelocationSize::Uleb128,
8124                    },
8125                );
8126                continue;
8127            }
8128            _ => ErrorToken,
8129        };
8130
8131        return Err(ProgramFromElfError::other(format!(
8132            "unsupported relocations for '{section_name}'[{relative_address:x}] (0x{absolute_address:08x}): {list}",
8133            absolute_address = section.original_address() + relative_address,
8134            list = SectionTarget::make_human_readable_in_debug_string(elf, &format!("{list:?}")),
8135        )));
8136    }
8137
8138    Ok(())
8139}
8140
8141fn read_u32(data: &[u8], relative_address: u64) -> Result<u32, ProgramFromElfError> {
8142    let target_range = relative_address as usize..relative_address as usize + 4;
8143    let value = data
8144        .get(target_range)
8145        .ok_or(ProgramFromElfError::other("out of range relocation"))?;
8146    Ok(u32::from_le_bytes([value[0], value[1], value[2], value[3]]))
8147}
8148
8149fn read_u16(data: &[u8], relative_address: u64) -> Result<u16, ProgramFromElfError> {
8150    let target_range = relative_address as usize..relative_address as usize + 2;
8151    let value = data
8152        .get(target_range)
8153        .ok_or(ProgramFromElfError::other("out of range relocation"))?;
8154    Ok(u16::from_le_bytes([value[0], value[1]]))
8155}
8156
8157fn read_u8(data: &[u8], relative_address: u64) -> Result<u8, ProgramFromElfError> {
8158    data.get(relative_address as usize)
8159        .ok_or(ProgramFromElfError::other("out of range relocation"))
8160        .copied()
8161}
8162
8163/// ULEB128 encode `value` and overwrite the existing value at `data_offset`, keeping the length.
8164///
8165/// See the [ELF ABI spec] and [LLD implementation] for reference.
8166///
8167/// [ELF ABI spec]: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/fbf3cbbac00ef1860ae60302a9afedb98fd31109/riscv-elf.adoc#uleb128-note
8168/// [LLD implementation]: https://github.com/llvm/llvm-project/blob/release/18.x/lld/ELF/Target.h#L310
8169fn overwrite_uleb128(data: &mut [u8], mut data_offset: usize, mut value: u64) -> Result<(), ProgramFromElfError> {
8170    loop {
8171        let Some(byte) = data.get_mut(data_offset) else {
8172            return Err(ProgramFromElfError::other("ULEB128 relocation target offset out of bounds"));
8173        };
8174        data_offset += 1;
8175
8176        if *byte & 0x80 != 0 {
8177            *byte = 0x80 | (value as u8 & 0x7f);
8178            value >>= 7;
8179        } else {
8180            *byte = value as u8;
8181            return if value > 0x80 {
8182                Err(ProgramFromElfError::other("ULEB128 relocation overflow"))
8183            } else {
8184                Ok(())
8185            };
8186        }
8187    }
8188}
8189
8190#[test]
8191fn test_overwrite_uleb128() {
8192    let value = 624485;
8193    let encoded_value = vec![0xE5u8, 0x8E, 0x26];
8194    let mut data = vec![0x80, 0x80, 0x00];
8195
8196    overwrite_uleb128(&mut data, 0, value).unwrap();
8197
8198    assert_eq!(data, encoded_value);
8199}
8200
8201fn write_u64(data: &mut [u8], relative_address: u64, value: u64) -> Result<(), ProgramFromElfError> {
8202    let value = value.to_le_bytes();
8203    data[relative_address as usize + 7] = value[7];
8204    data[relative_address as usize + 6] = value[6];
8205    data[relative_address as usize + 5] = value[5];
8206    data[relative_address as usize + 4] = value[4];
8207    data[relative_address as usize + 3] = value[3];
8208    data[relative_address as usize + 2] = value[2];
8209    data[relative_address as usize + 1] = value[1];
8210    data[relative_address as usize] = value[0];
8211    Ok(())
8212}
8213
8214fn write_u32(data: &mut [u8], relative_address: u64, value: u32) -> Result<(), ProgramFromElfError> {
8215    let value = value.to_le_bytes();
8216    data[relative_address as usize + 3] = value[3];
8217    data[relative_address as usize + 2] = value[2];
8218    data[relative_address as usize + 1] = value[1];
8219    data[relative_address as usize] = value[0];
8220    Ok(())
8221}
8222
8223fn write_u16(data: &mut [u8], relative_address: u64, value: u16) -> Result<(), ProgramFromElfError> {
8224    let value = value.to_le_bytes();
8225    data[relative_address as usize + 1] = value[1];
8226    data[relative_address as usize] = value[0];
8227    Ok(())
8228}
8229
8230fn harvest_code_relocations<H>(
8231    elf: &Elf<H>,
8232    section: &Section,
8233    decoder_config: &DecoderConfig,
8234    instruction_overrides: &mut HashMap<SectionTarget, InstExt<SectionTarget, SectionTarget>>,
8235    data_relocations: &mut BTreeMap<SectionTarget, RelocationKind>,
8236) -> Result<(), ProgramFromElfError>
8237where
8238    H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
8239{
8240    fn jump_or_call<T>(ra: RReg, target: T, target_return: T) -> Result<ControlInst<T>, ProgramFromElfError> {
8241        if let Some(ra) = cast_reg_non_zero(ra)? {
8242            Ok(ControlInst::Call { ra, target, target_return })
8243        } else {
8244            Ok(ControlInst::Jump { target })
8245        }
8246    }
8247
8248    #[derive(Copy, Clone)]
8249    enum HiRelocKind {
8250        PcRel,
8251        Got,
8252    }
8253
8254    impl core::fmt::Display for HiRelocKind {
8255        fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
8256            match self {
8257                HiRelocKind::PcRel => fmt.write_str("R_RISCV_PCREL_HI20"),
8258                HiRelocKind::Got => fmt.write_str("R_RISCV_GOT_HI20"),
8259            }
8260        }
8261    }
8262
8263    #[derive(Default)]
8264    struct RelocPairs {
8265        reloc_pcrel_hi20: BTreeMap<u64, (HiRelocKind, SectionTarget)>,
8266        reloc_pcrel_lo12: BTreeMap<u64, (&'static str, u64)>,
8267    }
8268
8269    if section.relocations().next().is_none() {
8270        return Ok(());
8271    }
8272
8273    let mut pcrel_relocations = RelocPairs::default();
8274
8275    let section_name = section.name();
8276    log::trace!("Harvesting code relocations from section: {}", section_name);
8277
8278    let section_data = section.data();
8279    for (absolute_address, relocation) in section.relocations() {
8280        let Some(relative_address) = absolute_address.checked_sub(section.original_address()) else {
8281            return Err(ProgramFromElfError::other("invalid code relocation offset"));
8282        };
8283
8284        if relocation.has_implicit_addend() {
8285            // AFAIK these should never be emitted for RISC-V.
8286            return Err(ProgramFromElfError::other(format!(
8287                "unsupported relocation in section '{section_name}': {relocation:?}"
8288            )));
8289        }
8290
8291        let current_location = SectionTarget {
8292            section_index: section.index(),
8293            offset: relative_address,
8294        };
8295
8296        let relative_address = current_location.offset;
8297        let Some(target) = get_relocation_target(elf, &relocation)? else {
8298            continue;
8299        };
8300
8301        match (relocation.kind(), relocation.flags()) {
8302            (object::RelocationKind::Absolute, _)
8303                if relocation.encoding() == object::RelocationEncoding::Generic && relocation.size() == 32 =>
8304            {
8305                data_relocations.insert(
8306                    current_location,
8307                    RelocationKind::Abs {
8308                        target,
8309                        size: RelocationSize::U32,
8310                    },
8311                );
8312            }
8313            (object::RelocationKind::Absolute, _)
8314                if relocation.encoding() == object::RelocationEncoding::Generic && relocation.size() == 64 =>
8315            {
8316                data_relocations.insert(
8317                    current_location,
8318                    RelocationKind::Abs {
8319                        target,
8320                        size: RelocationSize::U64,
8321                    },
8322                );
8323            }
8324            (_, object::RelocationFlags::Elf { r_type: reloc_kind }) => {
8325                // https://github.com/riscv-non-isa/riscv-elf-psabi-doc/releases
8326                match reloc_kind {
8327                    object::elf::R_RISCV_CALL_PLT => {
8328                        // This relocation is for a pair of instructions, namely AUIPC + JALR, where we're allowed to delete the AUIPC if it's unnecessary.
8329                        let Some(xs) = section_data.get(current_location.offset as usize..current_location.offset as usize + 8) else {
8330                            return Err(ProgramFromElfError::other("invalid R_RISCV_CALL_PLT relocation"));
8331                        };
8332
8333                        let hi_inst_raw = u32::from_le_bytes([xs[0], xs[1], xs[2], xs[3]]);
8334                        let Some(hi_inst) = Inst::decode(decoder_config, hi_inst_raw) else {
8335                            return Err(ProgramFromElfError::other(format!(
8336                                "R_RISCV_CALL_PLT for an unsupported instruction (1st): 0x{hi_inst_raw:08}"
8337                            )));
8338                        };
8339
8340                        let lo_inst_raw = u32::from_le_bytes([xs[4], xs[5], xs[6], xs[7]]);
8341                        let Some(lo_inst) = Inst::decode(decoder_config, lo_inst_raw) else {
8342                            return Err(ProgramFromElfError::other(format!(
8343                                "R_RISCV_CALL_PLT for an unsupported instruction (2nd): 0x{lo_inst_raw:08}"
8344                            )));
8345                        };
8346
8347                        let Inst::AddUpperImmediateToPc { dst: hi_reg, value: _ } = hi_inst else {
8348                            return Err(ProgramFromElfError::other(format!(
8349                                "R_RISCV_CALL_PLT for an unsupported instruction (1st): 0x{hi_inst_raw:08} ({hi_inst:?})"
8350                            )));
8351                        };
8352
8353                        let Inst::JumpAndLinkRegister {
8354                            dst: lo_dst,
8355                            base: lo_reg,
8356                            value: _,
8357                        } = lo_inst
8358                        else {
8359                            return Err(ProgramFromElfError::other(format!(
8360                                "R_RISCV_CALL_PLT for an unsupported instruction (2nd): 0x{lo_inst_raw:08} ({lo_inst:?})"
8361                            )));
8362                        };
8363
8364                        if hi_reg != lo_reg {
8365                            return Err(ProgramFromElfError::other(
8366                                "R_RISCV_CALL_PLT for a pair of instructions with different destination registers",
8367                            ));
8368                        }
8369
8370                        let target_return = current_location.add(8);
8371                        instruction_overrides.insert(current_location, InstExt::nop());
8372                        instruction_overrides.insert(
8373                            current_location.add(4),
8374                            InstExt::Control(jump_or_call(lo_dst, target, target_return)?),
8375                        );
8376
8377                        log::trace!(
8378                            "  R_RISCV_CALL_PLT: {}[0x{relative_address:x}] (0x{absolute_address:x}): -> {}",
8379                            section.name(),
8380                            target
8381                        );
8382                    }
8383                    object::elf::R_RISCV_PCREL_HI20 => {
8384                        if let Some(raw_inst) = section_data
8385                            .get((relative_address as usize).wrapping_sub(4)..)
8386                            .and_then(|slice| slice.get(..4))
8387                        {
8388                            let raw_inst = u32::from_le_bytes([raw_inst[0], raw_inst[1], raw_inst[2], raw_inst[3]]);
8389                            if crate::riscv::R(raw_inst).unpack()
8390                                == (crate::riscv::OPCODE_CUSTOM_0, FUNC3_ECALLI, 0, RReg::Zero, RReg::Zero, RReg::Zero)
8391                            {
8392                                data_relocations.insert(
8393                                    current_location,
8394                                    RelocationKind::Abs {
8395                                        target,
8396                                        size: if elf.is_64() { RelocationSize::U64 } else { RelocationSize::U32 },
8397                                    },
8398                                );
8399                                continue;
8400                            }
8401                        }
8402
8403                        // This relocation is for an AUIPC.
8404                        pcrel_relocations
8405                            .reloc_pcrel_hi20
8406                            .insert(relative_address, (HiRelocKind::PcRel, target));
8407                        log::trace!(
8408                            "  R_RISCV_PCREL_HI20: {}[0x{relative_address:x}] (0x{absolute_address:x}): -> {}",
8409                            section.name(),
8410                            target
8411                        );
8412                    }
8413                    object::elf::R_RISCV_GOT_HI20 => {
8414                        pcrel_relocations
8415                            .reloc_pcrel_hi20
8416                            .insert(relative_address, (HiRelocKind::Got, target));
8417                        log::trace!(
8418                            "  R_RISCV_GOT_HI20: {}[0x{relative_address:x}] (0x{absolute_address:x}): -> {}",
8419                            section.name(),
8420                            target
8421                        );
8422                    }
8423                    object::elf::R_RISCV_PCREL_LO12_I => {
8424                        if target.section_index != section.index() {
8425                            return Err(ProgramFromElfError::other(
8426                                "R_RISCV_PCREL_LO12_I relocation points to a different section",
8427                            ));
8428                        }
8429
8430                        pcrel_relocations
8431                            .reloc_pcrel_lo12
8432                            .insert(relative_address, ("R_RISCV_PCREL_LO12_I", target.offset));
8433                        log::trace!(
8434                            "  R_RISCV_PCREL_LO12_I: {}[0x{relative_address:x}] (0x{absolute_address:x}): -> {}",
8435                            section.name(),
8436                            target
8437                        );
8438                    }
8439                    object::elf::R_RISCV_PCREL_LO12_S => {
8440                        if target.section_index != section.index() {
8441                            return Err(ProgramFromElfError::other(
8442                                "R_RISCV_PCREL_LO12_I relocation points to a different section",
8443                            ));
8444                        }
8445
8446                        pcrel_relocations
8447                            .reloc_pcrel_lo12
8448                            .insert(relative_address, ("R_RISCV_PCREL_LO12_S", target.offset));
8449                        log::trace!(
8450                            "  R_RISCV_PCREL_LO12_S: {}[0x{relative_address:x}] (0x{absolute_address:x}): -> {}",
8451                            section.name(),
8452                            target
8453                        );
8454                    }
8455                    object::elf::R_RISCV_JAL => {
8456                        let inst_raw = read_u32(section_data, relative_address)?;
8457                        let Some(inst) = Inst::decode(decoder_config, inst_raw) else {
8458                            return Err(ProgramFromElfError::other(format!(
8459                                "R_RISCV_JAL for an unsupported instruction: 0x{inst_raw:08}"
8460                            )));
8461                        };
8462
8463                        let Inst::JumpAndLink { dst, .. } = inst else {
8464                            return Err(ProgramFromElfError::other(format!(
8465                                "R_RISCV_JAL for an unsupported instruction: 0x{inst_raw:08} ({inst:?})"
8466                            )));
8467                        };
8468
8469                        let target_return = current_location.add(4);
8470                        instruction_overrides.insert(current_location, InstExt::Control(jump_or_call(dst, target, target_return)?));
8471
8472                        log::trace!(
8473                            "  R_RISCV_JAL: {}[0x{relative_address:x}] (0x{absolute_address:x} -> {}",
8474                            section.name(),
8475                            target
8476                        );
8477                    }
8478                    object::elf::R_RISCV_BRANCH => {
8479                        let inst_raw = read_u32(section_data, relative_address)?;
8480                        let Some(inst) = Inst::decode(decoder_config, inst_raw) else {
8481                            return Err(ProgramFromElfError::other(format!(
8482                                "R_RISCV_BRANCH for an unsupported instruction: 0x{inst_raw:08}"
8483                            )));
8484                        };
8485
8486                        let Inst::Branch { kind, src1, src2, .. } = inst else {
8487                            return Err(ProgramFromElfError::other(format!(
8488                                "R_RISCV_BRANCH for an unsupported instruction: 0x{inst_raw:08} ({inst:?})"
8489                            )));
8490                        };
8491
8492                        let target_false = current_location.add(4);
8493                        instruction_overrides.insert(
8494                            current_location,
8495                            InstExt::Control(ControlInst::Branch {
8496                                kind,
8497                                src1: cast_reg_any(src1)?,
8498                                src2: cast_reg_any(src2)?,
8499                                target_true: target,
8500                                target_false,
8501                            }),
8502                        );
8503
8504                        log::trace!(
8505                            "  R_RISCV_BRANCH: {}[0x{relative_address:x}] (0x{absolute_address:x} -> {}",
8506                            section.name(),
8507                            target
8508                        );
8509                    }
8510                    object::elf::R_RISCV_HI20 => {
8511                        // This relocation is for a LUI.
8512                        let inst_raw = read_u32(section_data, relative_address)?;
8513                        let Some(inst) = Inst::decode(decoder_config, inst_raw) else {
8514                            return Err(ProgramFromElfError::other(format!(
8515                                "R_RISCV_HI20 for an unsupported instruction: 0x{inst_raw:08}"
8516                            )));
8517                        };
8518
8519                        let Inst::LoadUpperImmediate { dst, value: _ } = inst else {
8520                            return Err(ProgramFromElfError::other(format!(
8521                                "R_RISCV_HI20 for an unsupported instruction: 0x{inst_raw:08} ({inst:?})"
8522                            )));
8523                        };
8524
8525                        let Some(dst) = cast_reg_non_zero(dst)? else {
8526                            return Err(ProgramFromElfError::other("R_RISCV_HI20 with a zero destination register"));
8527                        };
8528
8529                        instruction_overrides.insert(current_location, InstExt::Basic(BasicInst::LoadAddress { dst, target }));
8530
8531                        log::trace!(
8532                            "  R_RISCV_HI20: {}[0x{relative_address:x}] (0x{absolute_address:x}): -> {}",
8533                            section.name(),
8534                            target
8535                        );
8536
8537                        continue;
8538                    }
8539                    object::elf::R_RISCV_LO12_I => {
8540                        let inst_raw = read_u32(section_data, relative_address)?;
8541                        let Some(inst) = Inst::decode(decoder_config, inst_raw) else {
8542                            return Err(ProgramFromElfError::other(format!(
8543                                "R_RISCV_LO12_I for an unsupported instruction: 0x{inst_raw:08}"
8544                            )));
8545                        };
8546
8547                        let new_instruction = match inst {
8548                            Inst::RegImm {
8549                                kind: RegImmKind::Add32,
8550                                dst,
8551                                src: _,
8552                                imm: _,
8553                            } => {
8554                                if let Some(dst) = cast_reg_non_zero(dst)? {
8555                                    InstExt::Basic(BasicInst::LoadAddress { dst, target })
8556                                } else {
8557                                    InstExt::nop()
8558                                }
8559                            }
8560                            Inst::RegImm {
8561                                kind: RegImmKind::Add64,
8562                                dst,
8563                                src: _,
8564                                imm: _,
8565                            } => {
8566                                if let Some(dst) = cast_reg_non_zero(dst)? {
8567                                    InstExt::Basic(BasicInst::LoadAddress { dst, target })
8568                                } else {
8569                                    InstExt::nop()
8570                                }
8571                            }
8572                            Inst::Load {
8573                                kind,
8574                                dst,
8575                                base: _,
8576                                offset: _,
8577                            } => {
8578                                if let Some(dst) = cast_reg_non_zero(dst)? {
8579                                    InstExt::Basic(BasicInst::LoadAbsolute { kind, dst, target })
8580                                } else {
8581                                    InstExt::nop()
8582                                }
8583                            }
8584                            _ => {
8585                                return Err(ProgramFromElfError::other(format!(
8586                                    "R_RISCV_LO12_I for an unsupported instruction: 0x{inst_raw:08} ({inst:?}) (at {loc})",
8587                                    loc = current_location.fmt_human_readable(elf),
8588                                )));
8589                            }
8590                        };
8591
8592                        instruction_overrides.insert(current_location, new_instruction);
8593
8594                        log::trace!(
8595                            "  R_RISCV_LO12_I: {}[0x{relative_address:x}] (0x{absolute_address:x}): -> {}",
8596                            section.name(),
8597                            target
8598                        );
8599                    }
8600                    object::elf::R_RISCV_LO12_S => {
8601                        let inst_raw = read_u32(section_data, relative_address)?;
8602                        let Some(inst) = Inst::decode(decoder_config, inst_raw) else {
8603                            return Err(ProgramFromElfError::other(format!(
8604                                "R_RISCV_LO12_S for an unsupported instruction: 0x{inst_raw:08}"
8605                            )));
8606                        };
8607
8608                        let new_instruction = match inst {
8609                            Inst::Store {
8610                                kind,
8611                                src,
8612                                base: _,
8613                                offset: _,
8614                            } => InstExt::Basic(BasicInst::StoreAbsolute {
8615                                kind,
8616                                src: cast_reg_any(src)?,
8617                                target,
8618                            }),
8619                            _ => {
8620                                return Err(ProgramFromElfError::other(format!(
8621                                    "R_RISCV_LO12_S for an unsupported instruction: 0x{inst_raw:08} ({inst:?}) (at {loc})",
8622                                    loc = current_location.fmt_human_readable(elf),
8623                                )));
8624                            }
8625                        };
8626
8627                        instruction_overrides.insert(current_location, new_instruction);
8628
8629                        log::trace!(
8630                            "  R_RISCV_LO12_S: {}[0x{relative_address:x}] (0x{absolute_address:x}): -> {}",
8631                            section.name(),
8632                            target
8633                        );
8634                    }
8635                    object::elf::R_RISCV_RVC_JUMP => {
8636                        let inst_raw = read_u16(section_data, relative_address)?;
8637                        let Some(inst) = Inst::decode(decoder_config, inst_raw.into()) else {
8638                            return Err(ProgramFromElfError::other(format!(
8639                                "R_RISCV_RVC_JUMP for an unsupported instruction: 0x{inst_raw:04}"
8640                            )));
8641                        };
8642
8643                        let (Inst::JumpAndLink { dst, .. } | Inst::JumpAndLinkRegister { dst, .. }) = inst else {
8644                            return Err(ProgramFromElfError::other(format!(
8645                                "R_RISCV_RVC_JUMP for an unsupported instruction: 0x{inst_raw:04} ({inst:?})"
8646                            )));
8647                        };
8648
8649                        let target_return = current_location.add(2);
8650                        instruction_overrides.insert(current_location, InstExt::Control(jump_or_call(dst, target, target_return)?));
8651
8652                        log::trace!(
8653                            "  R_RISCV_RVC_JUMP: {}[0x{relative_address:x}] (0x{absolute_address:x} -> {}",
8654                            section.name(),
8655                            target
8656                        );
8657                    }
8658                    object::elf::R_RISCV_RVC_BRANCH => {
8659                        let inst_raw = read_u16(section_data, relative_address)?;
8660                        let Some(inst) = Inst::decode(decoder_config, inst_raw.into()) else {
8661                            return Err(ProgramFromElfError::other(format!(
8662                                "R_RISCV_RVC_BRANCH for an unsupported instruction: 0x{inst_raw:04}"
8663                            )));
8664                        };
8665
8666                        let Inst::Branch { kind, src1, src2, .. } = inst else {
8667                            return Err(ProgramFromElfError::other(format!(
8668                                "R_RISCV_BRANCH for an unsupported instruction: 0x{inst_raw:04} ({inst:?})"
8669                            )));
8670                        };
8671
8672                        let target_false = current_location.add(2);
8673                        instruction_overrides.insert(
8674                            current_location,
8675                            InstExt::Control(ControlInst::Branch {
8676                                kind,
8677                                src1: cast_reg_any(src1)?,
8678                                src2: cast_reg_any(src2)?,
8679                                target_true: target,
8680                                target_false,
8681                            }),
8682                        );
8683
8684                        log::trace!(
8685                            "  R_RISCV_RVC_BRANCH: {}[0x{relative_address:x}] (0x{absolute_address:x} -> {}",
8686                            section.name(),
8687                            target
8688                        );
8689                    }
8690                    object::elf::R_RISCV_RELAX => {}
8691                    _ => {
8692                        return Err(ProgramFromElfError::other(format!(
8693                            "unsupported relocation type in section '{}': 0x{:08x}",
8694                            section.name(),
8695                            reloc_kind
8696                        )));
8697                    }
8698                }
8699            }
8700            _ => {
8701                return Err(ProgramFromElfError::other(format!(
8702                    "unsupported relocation in code section '{}': {:?}",
8703                    section.name(),
8704                    relocation
8705                )))
8706            }
8707        }
8708    }
8709
8710    for (relative_lo, (lo_rel_name, relative_hi)) in pcrel_relocations.reloc_pcrel_lo12 {
8711        let lo_inst_raw = &section_data[relative_lo as usize..][..4];
8712        let lo_inst_raw = u32::from_le_bytes([lo_inst_raw[0], lo_inst_raw[1], lo_inst_raw[2], lo_inst_raw[3]]);
8713        let lo_inst = Inst::decode(decoder_config, lo_inst_raw);
8714        let hi_inst_raw = &section_data[relative_hi as usize..][..4];
8715        let hi_inst_raw = u32::from_le_bytes([hi_inst_raw[0], hi_inst_raw[1], hi_inst_raw[2], hi_inst_raw[3]]);
8716        let hi_inst = Inst::decode(decoder_config, hi_inst_raw);
8717
8718        let Some((hi_kind, target)) = pcrel_relocations.reloc_pcrel_hi20.get(&relative_hi).copied() else {
8719            return Err(ProgramFromElfError::other(format!("{lo_rel_name} relocation at '{section_name}'0x{relative_lo:x} targets '{section_name}'0x{relative_hi:x} which doesn't have a R_RISCV_PCREL_HI20 or R_RISCV_GOT_HI20 relocation")));
8720        };
8721
8722        let Some(hi_inst) = hi_inst else {
8723            return Err(ProgramFromElfError::other(format!(
8724                "{hi_kind} relocation for an unsupported instruction at '{section_name}'0x{relative_hi:x}: 0x{hi_inst_raw:08x}"
8725            )));
8726        };
8727
8728        let Inst::AddUpperImmediateToPc { dst: hi_reg, .. } = hi_inst else {
8729            return Err(ProgramFromElfError::other(format!(
8730                "{hi_kind} relocation for an unsupported instruction at '{section_name}'[0x{relative_hi:x}]: {hi_inst:?}"
8731            )));
8732        };
8733
8734        let Some(lo_inst) = lo_inst else {
8735            return Err(ProgramFromElfError::other(format!(
8736                "{lo_rel_name} relocation for an unsupported instruction: 0x{lo_inst_raw:08x}"
8737            )));
8738        };
8739
8740        let (lo_reg, new_instruction) = if matches!(hi_kind, HiRelocKind::Got) {
8741            // For these relocations the target address points to the symbol that the code wants to reference,
8742            // but the actual address that's in the code shouldn't point to the symbol directly, but to a place
8743            // where the symbol's address can be found.
8744
8745            match lo_inst {
8746                Inst::Load {
8747                    kind: LoadKind::U64,
8748                    base,
8749                    dst,
8750                    ..
8751                } if elf.is_64() => {
8752                    let Some(dst) = cast_reg_non_zero(dst)? else {
8753                        return Err(ProgramFromElfError::other(format!(
8754                            "{lo_rel_name} with a zero destination register: 0x{lo_inst_raw:08x} in {section_name}[0x{relative_lo:08x}]"
8755                        )));
8756                    };
8757
8758                    (base, InstExt::Basic(BasicInst::LoadAddressIndirect { dst, target }))
8759                }
8760                Inst::Load {
8761                    kind: LoadKind::I32,
8762                    base,
8763                    dst,
8764                    ..
8765                } => {
8766                    let Some(dst) = cast_reg_non_zero(dst)? else {
8767                        return Err(ProgramFromElfError::other(format!(
8768                            "{lo_rel_name} with a zero destination register: 0x{lo_inst_raw:08x} in {section_name}[0x{relative_lo:08x}]"
8769                        )));
8770                    };
8771
8772                    (base, InstExt::Basic(BasicInst::LoadAddressIndirect { dst, target }))
8773                }
8774                _ => {
8775                    return Err(ProgramFromElfError::other(format!(
8776                        "{lo_rel_name} relocation (with {hi_kind} as the upper relocation) for an unsupported instruction: {lo_inst:?}"
8777                    )));
8778                }
8779            }
8780        } else {
8781            match lo_inst {
8782                Inst::RegImm {
8783                    kind: RegImmKind::Add32,
8784                    src,
8785                    dst,
8786                    ..
8787                } if !elf.is_64() => {
8788                    let Some(dst) = cast_reg_non_zero(dst)? else {
8789                        return Err(ProgramFromElfError::other(format!(
8790                            "{lo_rel_name} with a zero destination register: 0x{lo_inst_raw:08x} in {section_name}[0x{relative_lo:08x}]"
8791                        )));
8792                    };
8793
8794                    (src, InstExt::Basic(BasicInst::LoadAddress { dst, target }))
8795                }
8796                Inst::RegImm {
8797                    kind: RegImmKind::Add64,
8798                    src,
8799                    dst,
8800                    ..
8801                } if elf.is_64() => {
8802                    let Some(dst) = cast_reg_non_zero(dst)? else {
8803                        return Err(ProgramFromElfError::other(format!(
8804                            "{lo_rel_name} with a zero destination register: 0x{lo_inst_raw:08x} in {section_name}[0x{relative_lo:08x}]"
8805                        )));
8806                    };
8807
8808                    (src, InstExt::Basic(BasicInst::LoadAddress { dst, target }))
8809                }
8810                Inst::Load { kind, base, dst, .. } => {
8811                    if let Some(dst) = cast_reg_non_zero(dst)? {
8812                        (base, InstExt::Basic(BasicInst::LoadAbsolute { kind, dst, target }))
8813                    } else {
8814                        (base, InstExt::nop())
8815                    }
8816                }
8817                Inst::Store { kind, base, src, .. } => (
8818                    base,
8819                    InstExt::Basic(BasicInst::StoreAbsolute {
8820                        kind,
8821                        src: cast_reg_any(src)?,
8822                        target,
8823                    }),
8824                ),
8825                _ => {
8826                    return Err(ProgramFromElfError::other(format!(
8827                        "{lo_rel_name} relocation (with {hi_kind} as the upper relocation) for an unsupported instruction: {lo_inst:?}"
8828                    )));
8829                }
8830            }
8831        };
8832
8833        if lo_reg != hi_reg {
8834            // NOTE: These *can* apparently be sometimes different, so it's not an error if this happens.
8835            //
8836            // I've seen a case where the whole thing looked roughly like this:
8837            //
8838            //   auipc   a1,0x2057        # HI
8839            //   sw      a1,4(sp)         # Stash the HI part on the stack
8840            //   lw      a1,-460(a1)      # LO (1)
8841            //   ... a bunch of code ...
8842            //   lw      a2,4(sp)         # Reload the HI port from the stack (note different register)
8843            //   sw      a0,-460(a2)      # LO (2)
8844            log::trace!(
8845                "{lo_rel_name} + {hi_kind} relocation pair in '{section_name}' [+0x{relative_lo:x}, +0x{relative_hi:x}] uses different destination registers ({lo_reg:?} and {hi_reg:?})",
8846            );
8847        }
8848
8849        let location_hi = SectionTarget {
8850            section_index: section.index(),
8851            offset: relative_hi,
8852        };
8853        let location_lo = SectionTarget {
8854            section_index: section.index(),
8855            offset: relative_lo,
8856        };
8857
8858        // Since we support full length immediates just turn the upper instructions into a NOP.
8859        instruction_overrides.insert(location_hi, InstExt::nop());
8860        instruction_overrides.insert(location_lo, new_instruction);
8861    }
8862
8863    Ok(())
8864}
8865
8866fn parse_function_symbols<H>(elf: &Elf<H>) -> Result<Vec<(Source, String)>, ProgramFromElfError>
8867where
8868    H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
8869{
8870    let mut functions = Vec::new();
8871    for sym in elf.symbols() {
8872        match sym.kind() {
8873            object::elf::STT_FUNC => {
8874                if sym.is_undefined() {
8875                    continue;
8876                }
8877
8878                let (section, offset) = sym.section_and_offset()?;
8879                let Some(name) = sym.name() else { continue };
8880
8881                if name.is_empty() {
8882                    continue;
8883                }
8884
8885                let source = Source {
8886                    section_index: section.index(),
8887                    offset_range: (offset..offset + sym.size()).into(),
8888                };
8889
8890                functions.push((source, name.to_owned()));
8891            }
8892            object::elf::STT_NOTYPE | object::elf::STT_OBJECT | object::elf::STT_SECTION | object::elf::STT_FILE => {}
8893            kind => return Err(ProgramFromElfError::other(format!("unsupported symbol type: {}", kind))),
8894        }
8895    }
8896
8897    functions.sort_unstable_by_key(|(source, _)| *source);
8898    functions.dedup_by_key(|(source, _)| *source);
8899
8900    Ok(functions)
8901}
8902
8903#[derive(Copy, Clone, PartialEq, Eq, Debug)]
8904pub enum OptLevel {
8905    O0,
8906    O1,
8907    O2,
8908}
8909
8910pub struct Config {
8911    strip: bool,
8912    opt_level: OptLevel,
8913    inline_threshold: usize,
8914    elide_unnecessary_loads: bool,
8915    dispatch_table: Vec<Vec<u8>>,
8916    min_stack_size: u32,
8917}
8918
8919impl Default for Config {
8920    fn default() -> Self {
8921        Config {
8922            strip: false,
8923            opt_level: OptLevel::O2,
8924            inline_threshold: 2,
8925            elide_unnecessary_loads: true,
8926            dispatch_table: Vec::new(),
8927            min_stack_size: VM_MIN_PAGE_SIZE * 2,
8928        }
8929    }
8930}
8931
8932impl Config {
8933    pub fn set_strip(&mut self, value: bool) -> &mut Self {
8934        self.strip = value;
8935        self
8936    }
8937
8938    pub fn set_optimize(&mut self, value: bool) -> &mut Self {
8939        self.opt_level = if value { OptLevel::O2 } else { OptLevel::O0 };
8940        self
8941    }
8942
8943    pub fn set_opt_level(&mut self, value: OptLevel) -> &mut Self {
8944        self.opt_level = value;
8945        self
8946    }
8947
8948    pub fn set_inline_threshold(&mut self, value: usize) -> &mut Self {
8949        self.inline_threshold = value;
8950        self
8951    }
8952
8953    pub fn set_elide_unnecessary_loads(&mut self, value: bool) -> &mut Self {
8954        self.elide_unnecessary_loads = value;
8955        self
8956    }
8957
8958    pub fn set_dispatch_table(&mut self, dispatch_table: Vec<Vec<u8>>) -> &mut Self {
8959        self.dispatch_table = dispatch_table;
8960        self
8961    }
8962
8963    pub fn set_min_stack_size(&mut self, value: u32) -> &mut Self {
8964        self.min_stack_size = value;
8965        self
8966    }
8967}
8968
8969pub fn program_from_elf(config: Config, data: &[u8]) -> Result<Vec<u8>, ProgramFromElfError> {
8970    match Elf::<object::elf::FileHeader32<object::endian::LittleEndian>>::parse(data) {
8971        Ok(elf) => program_from_elf_internal(config, elf),
8972        Err(ProgramFromElfError(ProgramFromElfErrorKind::FailedToParseElf(e))) if e.to_string() == "Unsupported ELF header" => {
8973            let elf = Elf::<object::elf::FileHeader64<object::endian::LittleEndian>>::parse(data)?;
8974            program_from_elf_internal(config, elf)
8975        }
8976        Err(e) => Err(e),
8977    }
8978}
8979
8980fn program_from_elf_internal<H>(config: Config, mut elf: Elf<H>) -> Result<Vec<u8>, ProgramFromElfError>
8981where
8982    H: object::read::elf::FileHeader<Endian = object::LittleEndian>,
8983{
8984    let is_rv64 = elf.is_64();
8985    let bitness = if is_rv64 { Bitness::B64 } else { Bitness::B32 };
8986
8987    if elf.section_by_name(".got").next().is_none() {
8988        elf.add_empty_data_section(".got");
8989    }
8990
8991    let mut decoder_config = DecoderConfig::new_32bit();
8992    decoder_config.set_rv64(elf.is_64());
8993
8994    let mut sections_ro_data = Vec::new();
8995    let mut sections_rw_data = Vec::new();
8996    let mut sections_bss = Vec::new();
8997    let mut sections_code = Vec::new();
8998    let mut sections_metadata = Vec::new();
8999    let mut sections_exports = Vec::new();
9000    let mut sections_min_stack_size = Vec::new();
9001    let mut sections_other = Vec::new();
9002
9003    let mut section_map = RangeMap::new();
9004
9005    log::trace!("ELF sections:");
9006    for section in elf.sections() {
9007        let name = section.name();
9008        let is_writable = section.is_writable();
9009        let kind = section.elf_section_type();
9010
9011        log::trace!(
9012            " {}: 0x{:08x}..0x{:08x}: {} [ty={}] ({} bytes)",
9013            section.index(),
9014            section.original_address(),
9015            section.original_address() + section.size(),
9016            name,
9017            kind,
9018            section.size()
9019        );
9020
9021        if section.is_allocated() && section.original_address() != 0 {
9022            section_map.insert(
9023                section.original_address()..section.original_address() + section.size(),
9024                section.index(),
9025            );
9026        }
9027
9028        if name == ".rodata"
9029            || name.starts_with(".rodata.")
9030            || name.starts_with(".srodata.")
9031            || name == ".data.rel.ro"
9032            || name.starts_with(".data.rel.ro.")
9033            || name == ".got"
9034            || name == ".got.plt"
9035            || name == ".relro_padding"
9036        {
9037            if name == ".rodata" && is_writable {
9038                return Err(ProgramFromElfError::other(format!(
9039                    "expected section '{name}' to be read-only, yet it is writable"
9040                )));
9041            }
9042
9043            sections_ro_data.push(section.index());
9044        } else if name == ".data" || name.starts_with(".data.") || name == ".sdata" || name.starts_with(".sdata.") {
9045            if !is_writable {
9046                return Err(ProgramFromElfError::other(format!(
9047                    "expected section '{name}' to be writable, yet it is read-only"
9048                )));
9049            }
9050
9051            sections_rw_data.push(section.index());
9052        } else if name == ".bss" || name.starts_with(".bss.") || name == ".sbss" || name.starts_with(".sbss.") {
9053            if !is_writable {
9054                return Err(ProgramFromElfError::other(format!(
9055                    "expected section '{name}' to be writable, yet it is read-only"
9056                )));
9057            }
9058
9059            sections_bss.push(section.index());
9060        } else if name == ".text" || name.starts_with(".text.") || (section.is_allocated() && section.is_executable()) {
9061            if is_writable {
9062                return Err(ProgramFromElfError::other(format!(
9063                    "expected section '{name}' to be read-only, yet it is writable"
9064                )));
9065            }
9066
9067            sections_code.push(section.index());
9068        } else if name == ".polkavm_metadata" {
9069            sections_metadata.push(section.index());
9070        } else if name == ".polkavm_exports" {
9071            sections_exports.push(section.index());
9072        } else if name == ".polkavm_min_stack_size" {
9073            sections_min_stack_size.push(section.index());
9074        } else if name == ".eh_frame" || name == ".got" || name == ".dynsym" || name == ".dynstr" || name == ".dynamic" {
9075            continue;
9076        } else if section.is_allocated() {
9077            if matches!(
9078                kind,
9079                object::elf::SHT_HASH
9080                    | object::elf::SHT_GNU_HASH
9081                    | object::elf::SHT_DYNSYM
9082                    | object::elf::SHT_STRTAB
9083                    | object::elf::SHT_RELA
9084            ) {
9085                continue;
9086            }
9087
9088            // We're supposed to load this section into memory at runtime, but we don't know what it is.
9089            return Err(ProgramFromElfErrorKind::UnsupportedSection(name.to_owned()).into());
9090        } else {
9091            sections_other.push(section.index());
9092        }
9093    }
9094
9095    if sections_code.is_empty() {
9096        return Err(ProgramFromElfError::other(
9097            "the program contains no code (linking empty programs is not supported!)",
9098        ));
9099    }
9100
9101    let section_regspill = elf.add_empty_data_section(".regspill");
9102    sections_rw_data.insert(0, section_regspill);
9103
9104    let code_sections_set: HashSet<SectionIndex> = sections_code.iter().copied().collect();
9105    let data_sections = sections_ro_data
9106        .iter()
9107        .chain(sections_rw_data.iter())
9108        .chain(sections_bss.iter()) // Shouldn't need relocations, but just in case.
9109        .chain(sections_other.iter())
9110        .chain(sections_metadata.iter())
9111        .chain(sections_exports.iter())
9112        .copied();
9113
9114    let mut relocations = BTreeMap::new();
9115    for section_index in data_sections {
9116        let section = elf.section_by_index(section_index);
9117        harvest_data_relocations(&elf, &code_sections_set, section, &mut relocations)?;
9118    }
9119
9120    let mut instruction_overrides = HashMap::new();
9121    for &section_index in &sections_code {
9122        let section = elf.section_by_index(section_index);
9123        harvest_code_relocations(&elf, section, &decoder_config, &mut instruction_overrides, &mut relocations)?;
9124    }
9125
9126    let exports = sections_exports
9127        .iter()
9128        .map(|&section_index| {
9129            let section = elf.section_by_index(section_index);
9130            extract_exports(&elf, &relocations, section)
9131        })
9132        .collect::<Result<Vec<_>, _>>()?;
9133    let mut exports: Vec<_> = exports.into_iter().flatten().collect();
9134
9135    let mut instructions = Vec::new();
9136    let mut imports = Vec::new();
9137    let mut metadata_to_nth_import = HashMap::new();
9138
9139    for &section_index in &sections_code {
9140        let section = elf.section_by_index(section_index);
9141        let initial_instruction_count = instructions.len();
9142        parse_code_section(
9143            &elf,
9144            section,
9145            &decoder_config,
9146            &relocations,
9147            &mut imports,
9148            &mut metadata_to_nth_import,
9149            &mut instruction_overrides,
9150            &mut instructions,
9151        )?;
9152
9153        if instructions.len() > initial_instruction_count {
9154            // Sometimes a section ends with a `call`, which (considering sections can be reordered) would put
9155            // the return address out of bounds of the section, so let's inject an `unimp` here to make sure this doesn't happen.
9156            //
9157            // If it ends up being unnecessary the optimizer will remove it anyway.
9158            let last_source = instructions.last().unwrap().0;
9159            let source = Source {
9160                section_index: last_source.section_index,
9161                offset_range: (last_source.offset_range.end..last_source.offset_range.end + 4).into(),
9162            };
9163            instructions.push((source, InstExt::Control(ControlInst::Unimplemented)));
9164        }
9165    }
9166
9167    if !instruction_overrides.is_empty() {
9168        return Err(ProgramFromElfError::other("internal error: instruction overrides map is not empty"));
9169    }
9170
9171    core::mem::drop(instruction_overrides);
9172
9173    assert!(instructions
9174        .iter()
9175        .all(|(source, _)| source.offset_range.start < source.offset_range.end));
9176
9177    {
9178        let strip_relocations_for_sections: HashSet<_> =
9179            sections_metadata.iter().copied().chain(sections_exports.iter().copied()).collect();
9180
9181        relocations.retain(|relocation_target, _| !strip_relocations_for_sections.contains(&relocation_target.section_index));
9182    }
9183
9184    let data_sections_set: HashSet<SectionIndex> = sections_ro_data
9185        .iter()
9186        .chain(sections_rw_data.iter())
9187        .chain(sections_bss.iter()) // Shouldn't need relocations, but just in case.
9188        .copied()
9189        .collect();
9190
9191    let section_to_function_name = elf.section_to_function_name();
9192    let all_jump_targets = harvest_all_jump_targets(&elf, &data_sections_set, &code_sections_set, &instructions, &relocations, &exports)?;
9193    let all_blocks = split_code_into_basic_blocks(&elf, &section_to_function_name, &all_jump_targets, instructions)?;
9194    for block in &all_blocks {
9195        for source in block.next.source.as_slice() {
9196            assert!(source.offset_range.start < source.offset_range.end);
9197        }
9198    }
9199
9200    let mut section_to_block = build_section_to_block_map(&all_blocks)?;
9201    let mut all_blocks = resolve_basic_block_references(&data_sections_set, &section_to_block, &all_blocks)?;
9202    let mut reachability_graph;
9203    let mut used_blocks;
9204
9205    let mut regspill_size = 0;
9206    if matches!(config.opt_level, OptLevel::O1 | OptLevel::O2) {
9207        reachability_graph = calculate_reachability(&section_to_block, &all_blocks, &data_sections_set, &exports, &relocations)?;
9208        if matches!(config.opt_level, OptLevel::O2) {
9209            optimize_program(&config, &elf, &imports, &mut all_blocks, &mut reachability_graph, &mut exports);
9210        } else {
9211            for current in (0..all_blocks.len()).map(BlockTarget::from_raw) {
9212                perform_nop_elimination(&mut all_blocks, current);
9213            }
9214        }
9215        used_blocks = collect_used_blocks(&all_blocks, &reachability_graph);
9216        spill_fake_registers(
9217            section_regspill,
9218            &mut all_blocks,
9219            &mut reachability_graph,
9220            &imports,
9221            &used_blocks,
9222            &mut regspill_size,
9223            is_rv64,
9224        );
9225        used_blocks = add_missing_fallthrough_blocks(&mut all_blocks, &mut reachability_graph, used_blocks);
9226        merge_consecutive_fallthrough_blocks(&mut all_blocks, &mut reachability_graph, &mut section_to_block, &mut used_blocks);
9227        if matches!(config.opt_level, OptLevel::O2) {
9228            replace_immediates_with_registers(&mut all_blocks, &imports, &used_blocks);
9229        }
9230
9231        let expected_reachability_graph =
9232            calculate_reachability(&section_to_block, &all_blocks, &data_sections_set, &exports, &relocations)?;
9233        if reachability_graph != expected_reachability_graph {
9234            if std::env::var("POLKAVM_LINKER_DUMP_REACHABILITY_GRAPH")
9235                .map(|value| value == "1")
9236                .unwrap_or(false)
9237            {
9238                let _ = std::fs::write("/tmp/reachability_graph_actual.txt", format!("{reachability_graph:#?}"));
9239                let _ = std::fs::write("/tmp/reachability_graph_expected.txt", format!("{expected_reachability_graph:#?}"));
9240            }
9241            panic!("internal error: inconsistent reachability after optimization; this is a bug, please report it!");
9242        }
9243    } else {
9244        for current in (0..all_blocks.len()).map(BlockTarget::from_raw) {
9245            perform_nop_elimination(&mut all_blocks, current);
9246        }
9247
9248        reachability_graph = ReachabilityGraph::default();
9249        for current in (0..all_blocks.len()).map(BlockTarget::from_raw) {
9250            let reachability = reachability_graph.for_code.entry(current).or_default();
9251
9252            reachability.always_reachable = true;
9253            reachability.always_dynamically_reachable = true;
9254        }
9255
9256        for &section_index in sections_ro_data.iter().chain(sections_rw_data.iter()) {
9257            let reachability = reachability_graph.for_data.entry(section_index).or_default();
9258
9259            reachability.always_reachable = true;
9260            reachability.always_dynamically_reachable = true;
9261        }
9262
9263        for (export_index, export) in exports.iter().enumerate() {
9264            let Some(&block_target) = section_to_block.get(&export.location) else {
9265                return Err(ProgramFromElfError::other("export points to a non-block"));
9266            };
9267
9268            reachability_graph
9269                .for_code
9270                .entry(block_target)
9271                .or_default()
9272                .exports
9273                .push(export_index);
9274        }
9275
9276        used_blocks = (0..all_blocks.len()).map(BlockTarget::from_raw).collect();
9277        spill_fake_registers(
9278            section_regspill,
9279            &mut all_blocks,
9280            &mut reachability_graph,
9281            &imports,
9282            &used_blocks,
9283            &mut regspill_size,
9284            is_rv64,
9285        );
9286    }
9287
9288    elf.extend_section_to_at_least(section_regspill, regspill_size);
9289
9290    for &section_index in &sections_other {
9291        if reachability_graph.is_data_section_reachable(section_index) {
9292            return Err(ProgramFromElfError::other(format!(
9293                "unsupported section used in program graph: '{name}'",
9294                name = elf.section_by_index(section_index).name(),
9295            )));
9296        }
9297    }
9298
9299    log::debug!("Exports found: {}", exports.len());
9300
9301    {
9302        let mut count_dynamic = 0;
9303        for reachability in reachability_graph.for_code.values() {
9304            if reachability.is_dynamically_reachable() {
9305                count_dynamic += 1;
9306            }
9307        }
9308        log::debug!(
9309            "Blocks used: {}/{} ({} dynamically reachable, {} statically reachable)",
9310            reachability_graph.for_code.len(),
9311            all_blocks.len(),
9312            count_dynamic,
9313            reachability_graph.for_code.len() - count_dynamic
9314        );
9315    }
9316
9317    let section_got = elf.add_empty_data_section(".got");
9318    sections_ro_data.push(section_got);
9319    reachability_graph.mark_data_section_reachable(section_got);
9320
9321    let mut target_to_got_offset: HashMap<AnyTarget, u64> = HashMap::new();
9322    let mut got_size = 0;
9323
9324    let mut used_imports = HashSet::new();
9325    for block in &all_blocks {
9326        if !reachability_graph.is_code_reachable(block.target) {
9327            continue;
9328        }
9329
9330        for (_, instruction) in &block.ops {
9331            match instruction {
9332                BasicInst::LoadAddressIndirect { target, .. } => {
9333                    if target_to_got_offset.contains_key(target) {
9334                        continue;
9335                    }
9336
9337                    let offset = target_to_got_offset.len() as u64 * u64::from(bitness);
9338                    target_to_got_offset.insert(*target, offset);
9339                    got_size = offset + u64::from(bitness);
9340
9341                    let target = match target {
9342                        AnyTarget::Data(target) => *target,
9343                        AnyTarget::Code(target) => all_blocks[target.index()].source.begin(),
9344                    };
9345
9346                    relocations.insert(
9347                        SectionTarget {
9348                            section_index: section_got,
9349                            offset,
9350                        },
9351                        RelocationKind::Abs {
9352                            target,
9353                            size: bitness.into(),
9354                        },
9355                    );
9356                }
9357                BasicInst::Ecalli { nth_import } => {
9358                    used_imports.insert(*nth_import);
9359                }
9360                _ => {}
9361            }
9362        }
9363    }
9364
9365    elf.extend_section_to_at_least(section_got, got_size.try_into().expect("overflow"));
9366    check_imports_and_assign_indexes(&mut imports, &used_imports)?;
9367
9368    let mut base_address_for_section = HashMap::new();
9369    let sections_ro_data: Vec<_> = sections_ro_data
9370        .into_iter()
9371        .filter(|section_index| reachability_graph.is_data_section_reachable(*section_index))
9372        .collect();
9373
9374    let sections_rw_data: Vec<_> = sections_rw_data
9375        .into_iter()
9376        .filter(|section_index| reachability_graph.is_data_section_reachable(*section_index))
9377        .collect();
9378
9379    let memory_config = extract_memory_config(
9380        &elf,
9381        &sections_ro_data,
9382        &sections_rw_data,
9383        &sections_bss,
9384        &sections_min_stack_size,
9385        &mut base_address_for_section,
9386        config.min_stack_size,
9387    )?;
9388
9389    log::trace!("Memory configuration: {:#?}", memory_config);
9390
9391    let (jump_table, jump_target_for_block) = build_jump_table(all_blocks.len(), &used_blocks, &reachability_graph);
9392    let code = emit_code(
9393        &section_to_function_name,
9394        &imports,
9395        &base_address_for_section,
9396        section_got,
9397        &target_to_got_offset,
9398        &all_blocks,
9399        &used_blocks,
9400        &used_imports,
9401        &jump_target_for_block,
9402        matches!(config.opt_level, OptLevel::O2),
9403        is_rv64,
9404        memory_config.heap_base,
9405    )?;
9406
9407    {
9408        // Assign dummy base addresses to all other sections.
9409        //
9410        // This is mostly used for debug info.
9411        for &section_index in &sections_other {
9412            let address = elf.section_by_index(section_index).original_address();
9413            assert!(!reachability_graph.is_data_section_reachable(section_index));
9414            assert!(base_address_for_section.insert(section_index, address).is_none());
9415        }
9416    }
9417
9418    for (&relocation_target, &relocation) in &relocations {
9419        let section = elf.section_by_index(relocation_target.section_index);
9420        if !reachability_graph.is_data_section_reachable(relocation_target.section_index) {
9421            continue;
9422        }
9423
9424        log::trace!(
9425            "Applying relocation to '{}'[0x{:x}] {relocation_target}: {:?}",
9426            section.name(),
9427            relocation_target.offset,
9428            relocation
9429        );
9430
9431        fn write_generic(size: RelocationSize, data: &mut [u8], relative_address: u64, value: u64) -> Result<(), ProgramFromElfError> {
9432            match size {
9433                RelocationSize::U64 => write_u64(data, relative_address, value),
9434                RelocationSize::U32 => {
9435                    let Ok(value) = u32::try_from(value) else {
9436                        return Err(ProgramFromElfError::other(
9437                            "overflow when applying relocations: value doesn't fit in an u32",
9438                        ));
9439                    };
9440
9441                    write_u32(data, relative_address, value)
9442                }
9443                RelocationSize::U16 => {
9444                    let Ok(value) = u16::try_from(value) else {
9445                        return Err(ProgramFromElfError::other(
9446                            "overflow when applying relocations: value doesn't fit in an u16",
9447                        ));
9448                    };
9449
9450                    write_u16(data, relative_address, value)
9451                }
9452                RelocationSize::U8 => {
9453                    let Ok(value) = u8::try_from(value) else {
9454                        return Err(ProgramFromElfError::other(
9455                            "overflow when applying relocations: value doesn't fit in an u8",
9456                        ));
9457                    };
9458
9459                    data[relative_address as usize] = value;
9460                    Ok(())
9461                }
9462            }
9463        }
9464
9465        match relocation {
9466            RelocationKind::Offset { origin, target, size } => {
9467                // These relocations should only be used in debug info sections and RO data sections.
9468                if reachability_graph.is_data_section_reachable(section.index()) && !matches!(size, SizeRelocationSize::Generic(..)) {
9469                    return Err(ProgramFromElfError::other(format!(
9470                        "relocation was not expected in section '{name}': {relocation:?}",
9471                        name = section.name(),
9472                    )));
9473                }
9474
9475                let Some(&origin_section_address) = base_address_for_section.get(&origin.section_index) else {
9476                    return Err(ProgramFromElfError::other(format!(
9477                        "internal error: relocation in '{name}' ({relocation_target}) refers to an origin section that doesn't have a base address assigned: origin = '{origin_name}' ({origin}), target = '{target_name}' ({target}), size = {size:?}",
9478                        name = section.name(),
9479                        origin_name = elf.section_by_index(origin.section_index).name(),
9480                        target_name = elf.section_by_index(target.section_index).name(),
9481                    )));
9482                };
9483
9484                let Some(&target_section_address) = base_address_for_section.get(&target.section_index) else {
9485                    return Err(ProgramFromElfError::other(format!(
9486                        "internal error: relocation in '{name}' ({relocation_target}) refers to a target section that doesn't have a base address assigned: origin = '{origin_name}' ({origin}), target = '{target_name}' ({target}), size = {size:?}",
9487                        name = section.name(),
9488                        origin_name = elf.section_by_index(origin.section_index).name(),
9489                        target_name = elf.section_by_index(target.section_index).name(),
9490                    )));
9491                };
9492
9493                let range = origin_section_address.wrapping_add(origin.offset)..target_section_address.wrapping_add(target.offset);
9494                let data = elf.section_data_mut(relocation_target.section_index);
9495                let mut value = range.end.wrapping_sub(range.start);
9496                match size {
9497                    SizeRelocationSize::Uleb128 => {
9498                        overwrite_uleb128(data, relocation_target.offset as usize, value)?;
9499                    }
9500                    SizeRelocationSize::SixBits => {
9501                        let mask = 0b00111111;
9502                        if value > mask {
9503                            return Err(ProgramFromElfError::other("six bit relocation overflow"));
9504                        }
9505
9506                        let output = (u64::from(read_u8(data, relocation_target.offset)?) & (!mask)) | (value & mask);
9507                        data[relocation_target.offset as usize] = output as u8;
9508                    }
9509                    SizeRelocationSize::Generic(size) => {
9510                        if range.end < range.start {
9511                            match size {
9512                                RelocationSize::U8 => {
9513                                    if let Ok(new_value) = cast(value).to_signed().try_into() {
9514                                        let new_value: i8 = new_value;
9515                                        value = cast(cast(new_value).to_unsigned()).to_u64();
9516                                    }
9517                                }
9518                                RelocationSize::U16 => {
9519                                    if let Ok(new_value) = cast(value).to_signed().try_into() {
9520                                        let new_value: i16 = new_value;
9521                                        value = cast(cast(new_value).to_unsigned()).to_u64();
9522                                    }
9523                                }
9524                                RelocationSize::U32 => {
9525                                    if let Ok(new_value) = cast(value).to_signed().try_into() {
9526                                        let new_value: i32 = new_value;
9527                                        value = cast(cast(new_value).to_unsigned()).to_u64();
9528                                    }
9529                                }
9530                                RelocationSize::U64 => {}
9531                            }
9532                        }
9533
9534                        write_generic(size, data, relocation_target.offset, value)?;
9535                    }
9536                }
9537            }
9538            RelocationKind::Abs { target, size } => {
9539                if let Some(&block_target) = section_to_block.get(&target) {
9540                    let Some(jump_target) = jump_target_for_block[block_target.index()] else {
9541                        if !reachability_graph.is_data_section_reachable(relocation_target.section_index) {
9542                            // Most likely debug info for something that was stripped out.
9543                            let data = elf.section_data_mut(relocation_target.section_index);
9544                            write_generic(size, data, relocation_target.offset, 0)?;
9545                            continue;
9546                        }
9547
9548                        return Err(ProgramFromElfError::other(format!(
9549                            "absolute relocation in section '{location_name}' targets section '{target_name}'[0x{target_offset:x}] which has no associated basic block",
9550                            location_name = elf.section_by_index(relocation_target.section_index).name(),
9551                            target_name = elf.section_by_index(target.section_index).name(),
9552                            target_offset = target.offset,
9553                        )));
9554                    };
9555
9556                    let jump_target = jump_target.dynamic_target.expect("missing jump target for address");
9557                    let Some(jump_target) = jump_target.checked_mul(VM_CODE_ADDRESS_ALIGNMENT) else {
9558                        return Err(ProgramFromElfError::other("overflow when applying a jump target relocation"));
9559                    };
9560
9561                    let data = elf.section_data_mut(relocation_target.section_index);
9562                    write_generic(size, data, relocation_target.offset, jump_target.into())?;
9563                } else {
9564                    let Some(section_base) = base_address_for_section.get(&target.section_index) else {
9565                        if !reachability_graph.is_data_section_reachable(relocation_target.section_index) {
9566                            let data = elf.section_data_mut(relocation_target.section_index);
9567                            write_generic(size, data, relocation_target.offset, 0)?;
9568                            continue;
9569                        }
9570
9571                        return Err(ProgramFromElfError::other(format!(
9572                            "absolute relocation in section '{location_name}' targets section '{target_name}'[0x{target_offset:x}] which has no relocated base address assigned",
9573                            location_name = elf.section_by_index(relocation_target.section_index).name(),
9574                            target_name = elf.section_by_index(target.section_index).name(),
9575                            target_offset = target.offset,
9576                        )));
9577                    };
9578
9579                    let Some(value) = section_base.checked_add(target.offset) else {
9580                        return Err(ProgramFromElfError::other("overflow when applying an absolute relocation"));
9581                    };
9582
9583                    let data = elf.section_data_mut(relocation_target.section_index);
9584                    write_generic(size, data, relocation_target.offset, value)?;
9585                }
9586            }
9587            RelocationKind::JumpTable { target_code, target_base } => {
9588                let Some(&block_target) = section_to_block.get(&target_code) else {
9589                    return Err(ProgramFromElfError::other(
9590                        "jump table relocation doesn't refers to a start of a basic block",
9591                    ));
9592                };
9593
9594                let Some(jump_target) = jump_target_for_block[block_target.index()] else {
9595                    return Err(ProgramFromElfError::other(
9596                        "no jump target for block was found when applying a jump table relocation",
9597                    ));
9598                };
9599
9600                let Some(section_base) = base_address_for_section.get(&target_base.section_index) else {
9601                    return Err(ProgramFromElfError::other(
9602                        "no base address for section when applying a jump table relocation",
9603                    ));
9604                };
9605
9606                let Some(base_address) = section_base.checked_add(target_base.offset) else {
9607                    return Err(ProgramFromElfError::other(
9608                        "overflow when applying a jump table relocation: section base and offset cannot be added together",
9609                    ));
9610                };
9611
9612                let Ok(base_address) = u32::try_from(base_address) else {
9613                    return Err(ProgramFromElfError::other(
9614                        "overflow when applying a jump table relocation: base address doesn't fit in a u32",
9615                    ));
9616                };
9617
9618                let jump_target = jump_target.dynamic_target.expect("missing jump target for address");
9619                let Some(jump_target) = jump_target.checked_mul(VM_CODE_ADDRESS_ALIGNMENT) else {
9620                    return Err(ProgramFromElfError::other(
9621                        "overflow when applying a jump table relocation: jump target is too big",
9622                    ));
9623                };
9624
9625                let value = jump_target.wrapping_sub(base_address);
9626                let data = elf.section_data_mut(relocation_target.section_index);
9627                write_u32(data, relocation_target.offset, value)?;
9628            }
9629        }
9630    }
9631
9632    let mut location_map: HashMap<SectionTarget, Arc<[Location]>> = HashMap::new();
9633    if !config.strip {
9634        let mut string_cache = crate::utils::StringCache::default();
9635        let dwarf_info = crate::dwarf::load_dwarf(&mut string_cache, &elf, &relocations, &section_map)?;
9636        location_map = dwarf_info.location_map;
9637
9638        // If there is no DWARF info present try to use the symbol table as a fallback.
9639        for (source, name) in parse_function_symbols(&elf)? {
9640            if location_map.contains_key(&source.begin()) {
9641                continue;
9642            }
9643
9644            let (namespace, function_name) = split_function_name(&name);
9645            let namespace = if namespace.is_empty() {
9646                None
9647            } else {
9648                Some(string_cache.dedup(&namespace))
9649            };
9650
9651            let location = Location {
9652                kind: FrameKind::Enter,
9653                namespace,
9654                function_name: Some(string_cache.dedup(&function_name)),
9655                source_code_location: None,
9656            };
9657
9658            let location_stack: Arc<[Location]> = vec![location].into();
9659            for target in source.iter() {
9660                location_map.insert(target, Arc::clone(&location_stack));
9661            }
9662        }
9663    }
9664
9665    log::trace!("Instruction count: {}", code.len());
9666
9667    let mut builder = if elf.is_64() {
9668        ProgramBlobBuilder::new_64bit()
9669    } else {
9670        ProgramBlobBuilder::new()
9671    };
9672
9673    builder.set_ro_data_size(memory_config.ro_data_size);
9674    builder.set_rw_data_size(memory_config.rw_data_size);
9675    builder.set_stack_size(memory_config.min_stack_size);
9676
9677    let [ro_data, rw_data] = {
9678        [memory_config.ro_data, memory_config.rw_data].map(|ranges| {
9679            let mut buffer = Vec::new();
9680            for range in ranges {
9681                match range {
9682                    DataRef::Section { section_index, range } => {
9683                        let slice = &elf.section_by_index(section_index).data()[range];
9684                        buffer.extend_from_slice(slice);
9685                    }
9686                    DataRef::Padding(bytes) => {
9687                        let new_size = buffer.len() + bytes;
9688                        buffer.resize(new_size, 0);
9689                    }
9690                }
9691            }
9692            buffer
9693        })
9694    };
9695
9696    builder.set_ro_data(ro_data);
9697    builder.set_rw_data(rw_data);
9698
9699    {
9700        let mut sorted_imports = imports.clone();
9701        sorted_imports.sort_by(|a, b| {
9702            a.metadata
9703                .index
9704                .cmp(&b.metadata.index)
9705                .then_with(|| a.metadata.symbol.cmp(&b.metadata.symbol))
9706        });
9707
9708        let mut next_index = 0;
9709        for import in sorted_imports {
9710            let Some(index) = import.index else {
9711                continue;
9712            };
9713
9714            assert_eq!(index, next_index);
9715            next_index += 1;
9716
9717            builder.add_import(&import.metadata.symbol);
9718        }
9719    }
9720
9721    let mut export_count = 0;
9722    for current in used_blocks {
9723        for &export_index in &reachability_graph.for_code.get(&current).unwrap().exports {
9724            let export = &exports[export_index];
9725            let jump_target = jump_target_for_block[current.index()]
9726                .expect("internal error: export metadata points to a block without a jump target assigned");
9727
9728            builder.add_export_by_basic_block(jump_target.static_target, &export.metadata.symbol);
9729            export_count += 1;
9730        }
9731    }
9732    assert_eq!(export_count, exports.len());
9733
9734    let mut locations_for_instruction: Vec<Option<Arc<[Location]>>> = Vec::with_capacity(code.len());
9735    let mut raw_code = Vec::with_capacity(code.len());
9736
9737    for (nth_inst, (source_stack, inst)) in code.into_iter().enumerate() {
9738        raw_code.push(inst);
9739
9740        let mut function_name = None;
9741        if !config.strip {
9742            // Two or more addresses can point to the same instruction (e.g. in case of macro op fusion).
9743            // Two or more instructions can also have the same address (e.g. in case of jump targets).
9744
9745            // TODO: Use a smallvec.
9746            let mut list = Vec::new();
9747            for source in source_stack.as_slice() {
9748                for offset in (source.offset_range.start..source.offset_range.end).step_by(2) {
9749                    let target = SectionTarget {
9750                        section_index: source.section_index,
9751                        offset,
9752                    };
9753
9754                    if let Some(locations) = location_map.get(&target) {
9755                        if let Some(last) = list.last() {
9756                            if locations == last {
9757                                // If we inlined a basic block from the same function do not repeat the same location.
9758                                break;
9759                            }
9760                        } else {
9761                            function_name = locations[0].function_name.as_deref();
9762                        }
9763
9764                        list.push(Arc::clone(locations));
9765                        break;
9766                    }
9767                }
9768
9769                if list.is_empty() {
9770                    // If the toplevel source doesn't have a location don't try the lower ones.
9771                    break;
9772                }
9773            }
9774
9775            if list.is_empty() {
9776                locations_for_instruction.push(None);
9777            } else if list.len() == 1 {
9778                locations_for_instruction.push(list.into_iter().next())
9779            } else {
9780                let mut new_list = Vec::new();
9781                for sublist in list {
9782                    new_list.extend(sublist.iter().cloned());
9783                }
9784
9785                locations_for_instruction.push(Some(new_list.into()));
9786            }
9787        }
9788
9789        log::trace!(
9790            "Code: 0x{source_address:x} [{function_name}] -> {source_stack} -> #{nth_inst}: {inst}",
9791            source_address = {
9792                elf.section_by_index(source_stack.top().section_index)
9793                    .original_address()
9794                    .wrapping_add(source_stack.top().offset_range.start)
9795            },
9796            function_name = function_name.unwrap_or("")
9797        );
9798    }
9799
9800    for symbol in config.dispatch_table {
9801        builder.add_dispatch_table_entry(symbol);
9802    }
9803
9804    builder.set_code(&raw_code, &jump_table);
9805
9806    let mut offsets = Vec::new();
9807    if !config.strip {
9808        let blob = ProgramBlob::parse(builder.to_vec().into())?;
9809        offsets = blob
9810            .instructions(bitness)
9811            .map(|instruction| (instruction.offset, instruction.next_offset))
9812            .collect();
9813        assert_eq!(offsets.len(), locations_for_instruction.len());
9814
9815        emit_debug_info(&mut builder, &locations_for_instruction, &offsets);
9816    }
9817
9818    let raw_blob = builder.to_vec();
9819
9820    log::debug!("Built a program of {} bytes", raw_blob.len());
9821    let blob = ProgramBlob::parse(raw_blob[..].into())?;
9822
9823    // Sanity check that our debug info was properly emitted and can be parsed.
9824    if cfg!(debug_assertions) && !config.strip {
9825        'outer: for (nth_instruction, locations) in locations_for_instruction.iter().enumerate() {
9826            let (program_counter, _) = offsets[nth_instruction];
9827            let line_program = blob.get_debug_line_program_at(program_counter).unwrap();
9828            let Some(locations) = locations else {
9829                assert!(line_program.is_none());
9830                continue;
9831            };
9832
9833            let mut line_program = line_program.unwrap();
9834            while let Some(region_info) = line_program.run().unwrap() {
9835                if !region_info.instruction_range().contains(&program_counter) {
9836                    continue;
9837                }
9838
9839                assert!(region_info.frames().len() <= locations.len());
9840                for (actual, expected) in region_info.frames().zip(locations.iter()) {
9841                    assert_eq!(actual.kind(), expected.kind);
9842                    assert_eq!(actual.namespace().unwrap(), expected.namespace.as_deref());
9843                    assert_eq!(actual.function_name_without_namespace().unwrap(), expected.function_name.as_deref());
9844                    assert_eq!(
9845                        actual.path().unwrap().map(Cow::Borrowed),
9846                        expected
9847                            .source_code_location
9848                            .as_ref()
9849                            .map(|location| simplify_path(location.path()))
9850                    );
9851                    assert_eq!(
9852                        actual.line(),
9853                        expected
9854                            .source_code_location
9855                            .as_ref()
9856                            .and_then(|location| location.line())
9857                            .and_then(|line| if line != 0 { Some(line) } else { None })
9858                    );
9859                    assert_eq!(
9860                        actual.column(),
9861                        expected
9862                            .source_code_location
9863                            .as_ref()
9864                            .and_then(|location| location.column())
9865                            .and_then(|column| if column != 0 { Some(column) } else { None })
9866                    );
9867                }
9868
9869                continue 'outer;
9870            }
9871
9872            panic!("internal error: region not found for instruction");
9873        }
9874    }
9875
9876    Ok(raw_blob)
9877}
9878
9879fn simplify_path(path: &str) -> Cow<str> {
9880    // TODO: Sanitize macOS and Windows paths.
9881    if let Some(p) = path.strip_prefix("/home/") {
9882        if let Some(index) = p.bytes().position(|byte| byte == b'/') {
9883            return format!("~{}", &p[index..]).into();
9884        }
9885    }
9886
9887    path.into()
9888}
9889
9890fn emit_debug_info(
9891    builder: &mut ProgramBlobBuilder,
9892    locations_for_instruction: &[Option<Arc<[Location]>>],
9893    offsets: &[(ProgramCounter, ProgramCounter)],
9894) {
9895    #[derive(Default)]
9896    struct DebugStringsBuilder<'a> {
9897        map: HashMap<Cow<'a, str>, u32>,
9898        section: Vec<u8>,
9899        write_protected: bool,
9900    }
9901
9902    impl<'a> DebugStringsBuilder<'a> {
9903        fn dedup_cow(&mut self, s: Cow<'a, str>) -> u32 {
9904            if let Some(offset) = self.map.get(&s) {
9905                return *offset;
9906            }
9907
9908            assert!(!self.write_protected);
9909
9910            let offset = self.section.len();
9911            let mut buffer = [0xff_u8; varint::MAX_VARINT_LENGTH];
9912            let length = varint::write_varint(s.len().try_into().expect("debug string length overflow"), &mut buffer);
9913            self.section.extend_from_slice(&buffer[..length]);
9914            self.section.extend_from_slice(s.as_bytes());
9915            let offset: u32 = offset.try_into().expect("debug string section length overflow");
9916            self.map.insert(s, offset);
9917            offset
9918        }
9919
9920        fn dedup(&mut self, s: &'a str) -> u32 {
9921            self.dedup_cow(s.into())
9922        }
9923    }
9924
9925    let mut dbg_strings = DebugStringsBuilder::default();
9926    let empty_string_id = dbg_strings.dedup("");
9927
9928    struct Group<'a> {
9929        namespace: Option<Arc<str>>,
9930        function_name: Option<Arc<str>>,
9931        path: Option<Cow<'a, str>>,
9932        instruction_position: usize,
9933        instruction_count: usize,
9934        program_counter_start: ProgramCounter,
9935        program_counter_end: ProgramCounter,
9936    }
9937
9938    impl<'a> Group<'a> {
9939        fn key(&self) -> (Option<&str>, Option<&str>, Option<&str>) {
9940            (self.namespace.as_deref(), self.function_name.as_deref(), self.path.as_deref())
9941        }
9942    }
9943
9944    let mut groups: Vec<Group> = Vec::new();
9945    for (instruction_position, locations) in locations_for_instruction.iter().enumerate() {
9946        let group = if let Some(locations) = locations {
9947            for location in locations.iter() {
9948                if let Some(ref namespace) = location.namespace {
9949                    dbg_strings.dedup(namespace);
9950                }
9951
9952                if let Some(ref name) = location.function_name {
9953                    dbg_strings.dedup(name);
9954                }
9955
9956                if let Some(ref location) = location.source_code_location {
9957                    dbg_strings.dedup_cow(simplify_path(location.path()));
9958                }
9959            }
9960
9961            let location = &locations[0];
9962            Group {
9963                namespace: location.namespace.clone(),
9964                function_name: location.function_name.clone(),
9965                path: location.source_code_location.as_ref().map(|target| simplify_path(target.path())),
9966                instruction_position,
9967                instruction_count: 1,
9968                program_counter_start: offsets[instruction_position].0,
9969                program_counter_end: offsets[instruction_position].1,
9970            }
9971        } else {
9972            Group {
9973                namespace: None,
9974                function_name: None,
9975                path: None,
9976                instruction_position,
9977                instruction_count: 1,
9978                program_counter_start: offsets[instruction_position].0,
9979                program_counter_end: offsets[instruction_position].1,
9980            }
9981        };
9982
9983        if let Some(last_group) = groups.last_mut() {
9984            if last_group.key() == group.key() {
9985                assert_eq!(last_group.instruction_position + last_group.instruction_count, instruction_position);
9986                last_group.instruction_count += 1;
9987                last_group.program_counter_end = group.program_counter_end;
9988                continue;
9989            }
9990        }
9991
9992        groups.push(group);
9993    }
9994
9995    groups.retain(|group| group.function_name.is_some() || group.path.is_some());
9996
9997    log::trace!("Location groups: {}", groups.len());
9998    dbg_strings.write_protected = true;
9999
10000    let mut section_line_programs = Vec::new();
10001    let mut info_offsets = Vec::with_capacity(groups.len());
10002    {
10003        let mut writer = Writer::new(&mut section_line_programs);
10004        let writer = &mut writer;
10005
10006        let offset_base = writer.len();
10007        writer.push_byte(program::VERSION_DEBUG_LINE_PROGRAM_V1);
10008        for group in &groups {
10009            let info_offset: u32 = (writer.len() - offset_base).try_into().expect("function info offset overflow");
10010            info_offsets.push(info_offset);
10011
10012            #[derive(Default)]
10013            struct LineProgramFrame {
10014                kind: Option<FrameKind>,
10015                namespace: Option<Arc<str>>,
10016                function_name: Option<Arc<str>>,
10017                path: Option<Arc<str>>,
10018                line: Option<u32>,
10019                column: Option<u32>,
10020            }
10021
10022            #[derive(Default)]
10023            struct LineProgramState {
10024                stack: Vec<LineProgramFrame>,
10025                stack_depth: usize,
10026                mutation_depth: usize,
10027
10028                queued_count: u32,
10029            }
10030
10031            impl LineProgramState {
10032                fn flush_if_any_are_queued(&mut self, writer: &mut Writer) {
10033                    if self.queued_count == 0 {
10034                        return;
10035                    }
10036
10037                    if self.queued_count == 1 {
10038                        writer.push_byte(LineProgramOp::FinishInstruction as u8);
10039                    } else {
10040                        writer.push_byte(LineProgramOp::FinishMultipleInstructions as u8);
10041                        writer.push_varint(self.queued_count);
10042                    }
10043
10044                    self.queued_count = 0;
10045                }
10046
10047                fn set_mutation_depth(&mut self, writer: &mut Writer, depth: usize) {
10048                    self.flush_if_any_are_queued(writer);
10049
10050                    if depth == self.mutation_depth {
10051                        return;
10052                    }
10053
10054                    writer.push_byte(LineProgramOp::SetMutationDepth as u8);
10055                    writer.push_varint(depth as u32);
10056                    self.mutation_depth = depth;
10057                }
10058
10059                fn set_stack_depth(&mut self, writer: &mut Writer, depth: usize) {
10060                    if self.stack_depth == depth {
10061                        return;
10062                    }
10063
10064                    while depth > self.stack.len() {
10065                        self.stack.push(LineProgramFrame::default());
10066                    }
10067
10068                    self.flush_if_any_are_queued(writer);
10069
10070                    writer.push_byte(LineProgramOp::SetStackDepth as u8);
10071                    writer.push_varint(depth as u32);
10072                    self.stack_depth = depth;
10073                }
10074
10075                fn finish_instruction(&mut self, writer: &mut Writer, next_depth: usize, instruction_length: u32) {
10076                    self.queued_count += instruction_length;
10077
10078                    enum Direction {
10079                        GoDown,
10080                        GoUp,
10081                    }
10082
10083                    let dir = if next_depth == self.stack_depth + 1 {
10084                        Direction::GoDown
10085                    } else if next_depth + 1 == self.stack_depth {
10086                        Direction::GoUp
10087                    } else {
10088                        return;
10089                    };
10090
10091                    while next_depth > self.stack.len() {
10092                        self.stack.push(LineProgramFrame::default());
10093                    }
10094
10095                    match (self.queued_count == 1, dir) {
10096                        (true, Direction::GoDown) => {
10097                            writer.push_byte(LineProgramOp::FinishInstructionAndIncrementStackDepth as u8);
10098                        }
10099                        (false, Direction::GoDown) => {
10100                            writer.push_byte(LineProgramOp::FinishMultipleInstructionsAndIncrementStackDepth as u8);
10101                            writer.push_varint(self.queued_count);
10102                        }
10103                        (true, Direction::GoUp) => {
10104                            writer.push_byte(LineProgramOp::FinishInstructionAndDecrementStackDepth as u8);
10105                        }
10106                        (false, Direction::GoUp) => {
10107                            writer.push_byte(LineProgramOp::FinishMultipleInstructionsAndDecrementStackDepth as u8);
10108                            writer.push_varint(self.queued_count);
10109                        }
10110                    }
10111
10112                    self.stack_depth = next_depth;
10113                    self.queued_count = 0;
10114                }
10115            }
10116
10117            let mut state = LineProgramState::default();
10118            for nth_instruction in group.instruction_position..group.instruction_position + group.instruction_count {
10119                let locations = locations_for_instruction[nth_instruction].as_ref().unwrap();
10120                state.set_stack_depth(writer, locations.len());
10121
10122                for (depth, location) in locations.iter().enumerate() {
10123                    let new_path = location
10124                        .source_code_location
10125                        .as_ref()
10126                        .map(|location| simplify_path(location.path()));
10127                    let new_line = location.source_code_location.as_ref().and_then(|location| location.line());
10128                    let new_column = location.source_code_location.as_ref().and_then(|location| location.column());
10129
10130                    let changed_kind = state.stack[depth].kind != Some(location.kind);
10131                    let changed_namespace = state.stack[depth].namespace != location.namespace;
10132                    let changed_function_name = state.stack[depth].function_name != location.function_name;
10133                    let changed_path = state.stack[depth].path.as_deref().map(Cow::Borrowed) != new_path;
10134                    let changed_line = state.stack[depth].line != new_line;
10135                    let changed_column = state.stack[depth].column != new_column;
10136
10137                    if changed_kind {
10138                        state.set_mutation_depth(writer, depth);
10139                        state.stack[depth].kind = Some(location.kind);
10140                        let kind = match location.kind {
10141                            FrameKind::Enter => LineProgramOp::SetKindEnter,
10142                            FrameKind::Call => LineProgramOp::SetKindCall,
10143                            FrameKind::Line => LineProgramOp::SetKindLine,
10144                        };
10145                        writer.push_byte(kind as u8);
10146                    }
10147
10148                    if changed_namespace {
10149                        state.set_mutation_depth(writer, depth);
10150                        writer.push_byte(LineProgramOp::SetNamespace as u8);
10151                        state.stack[depth].namespace = location.namespace.clone();
10152
10153                        let namespace_offset = location
10154                            .namespace
10155                            .as_ref()
10156                            .map_or(empty_string_id, |string| dbg_strings.dedup(string));
10157                        writer.push_varint(namespace_offset);
10158                    }
10159
10160                    if changed_function_name {
10161                        state.set_mutation_depth(writer, depth);
10162                        writer.push_byte(LineProgramOp::SetFunctionName as u8);
10163                        state.stack[depth].function_name = location.function_name.clone();
10164
10165                        let function_name_offset = location
10166                            .function_name
10167                            .as_ref()
10168                            .map_or(empty_string_id, |string| dbg_strings.dedup(string));
10169                        writer.push_varint(function_name_offset);
10170                    }
10171
10172                    if changed_path {
10173                        state.set_mutation_depth(writer, depth);
10174                        writer.push_byte(LineProgramOp::SetPath as u8);
10175                        state.stack[depth].path =
10176                            location
10177                                .source_code_location
10178                                .as_ref()
10179                                .map(|location| match simplify_path(location.path()) {
10180                                    Cow::Borrowed(_) => Arc::clone(location.path()),
10181                                    Cow::Owned(path) => path.into(),
10182                                });
10183
10184                        let path_offset = location
10185                            .source_code_location
10186                            .as_ref()
10187                            .map_or(empty_string_id, |location| dbg_strings.dedup_cow(simplify_path(location.path())));
10188                        writer.push_varint(path_offset);
10189                    }
10190
10191                    if changed_line {
10192                        state.set_mutation_depth(writer, depth);
10193                        match (state.stack[depth].line, new_line) {
10194                            (Some(old_value), Some(new_value)) if old_value + 1 == new_value => {
10195                                writer.push_byte(LineProgramOp::IncrementLine as u8);
10196                            }
10197                            (Some(old_value), Some(new_value)) if new_value > old_value => {
10198                                writer.push_byte(LineProgramOp::AddLine as u8);
10199                                writer.push_varint(new_value - old_value);
10200                            }
10201                            (Some(old_value), Some(new_value)) if new_value < old_value => {
10202                                writer.push_byte(LineProgramOp::SubLine as u8);
10203                                writer.push_varint(old_value - new_value);
10204                            }
10205                            _ => {
10206                                writer.push_byte(LineProgramOp::SetLine as u8);
10207                                writer.push_varint(new_line.unwrap_or(0));
10208                            }
10209                        }
10210                        state.stack[depth].line = new_line;
10211                    }
10212
10213                    if changed_column {
10214                        state.set_mutation_depth(writer, depth);
10215                        writer.push_byte(LineProgramOp::SetColumn as u8);
10216                        state.stack[depth].column = new_column;
10217                        writer.push_varint(new_column.unwrap_or(0));
10218                    }
10219                }
10220
10221                let next_depth = locations_for_instruction
10222                    .get(nth_instruction + 1)
10223                    .and_then(|next_locations| next_locations.as_ref().map(|xs| xs.len()))
10224                    .unwrap_or(0);
10225                state.finish_instruction(writer, next_depth, (offsets[nth_instruction].1).0 - (offsets[nth_instruction].0).0);
10226            }
10227
10228            state.flush_if_any_are_queued(writer);
10229            writer.push_byte(LineProgramOp::FinishProgram as u8);
10230        }
10231    }
10232
10233    assert_eq!(info_offsets.len(), groups.len());
10234
10235    let mut section_line_program_ranges = Vec::new();
10236    {
10237        let mut writer = Writer::new(&mut section_line_program_ranges);
10238        for (group, info_offset) in groups.iter().zip(info_offsets.into_iter()) {
10239            writer.push_u32(group.program_counter_start.0);
10240            writer.push_u32(group.program_counter_end.0);
10241            writer.push_u32(info_offset);
10242        }
10243    }
10244
10245    builder.add_custom_section(program::SECTION_OPT_DEBUG_STRINGS, dbg_strings.section);
10246    builder.add_custom_section(program::SECTION_OPT_DEBUG_LINE_PROGRAMS, section_line_programs);
10247    builder.add_custom_section(program::SECTION_OPT_DEBUG_LINE_PROGRAM_RANGES, section_line_program_ranges);
10248}